From 620d0b49a40e24465472b667f19b5fb0c63a6f0c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 8 Feb 2022 05:54:29 +0300 Subject: [PATCH 01/34] common-user/host/sparc64: Fix safe_syscall_base Use the "retl" instead of "ret" instruction alias, since we do not allocate a register window in this function. Fix the offset to the first stacked parameter, which lies beyond the register window save area. Fixes: 95c021dac835 ("linux-user/host/sparc64: Add safe-syscall.inc.S") Signed-off-by: Richard Henderson --- common-user/host/sparc64/safe-syscall.inc.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/common-user/host/sparc64/safe-syscall.inc.S b/common-user/host/sparc64/safe-syscall.inc.S index a2f2b9c967..c7be8f2d25 100644 --- a/common-user/host/sparc64/safe-syscall.inc.S +++ b/common-user/host/sparc64/safe-syscall.inc.S @@ -24,7 +24,8 @@ .type safe_syscall_end, @function #define STACK_BIAS 2047 -#define PARAM(N) STACK_BIAS + N*8 +#define WINDOW_SIZE 16 * 8 +#define PARAM(N) STACK_BIAS + WINDOW_SIZE + N * 8 /* * This is the entry point for making a system call. The calling @@ -74,7 +75,7 @@ safe_syscall_end: /* code path for having successfully executed the syscall */ bcs,pn %xcc, 1f nop - ret + retl nop /* code path when we didn't execute the syscall */ From c8c89a6a30be0e6f24e6a56d4ef181ec0e4dd064 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 8 Feb 2022 09:30:42 +0300 Subject: [PATCH 02/34] linux-user: Introduce host_signal_mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do not directly access the uc_sigmask member. This is preparation for a sparc64 fix. Reviewed-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- linux-user/include/host/aarch64/host-signal.h | 5 +++++ linux-user/include/host/alpha/host-signal.h | 5 +++++ linux-user/include/host/arm/host-signal.h | 5 +++++ linux-user/include/host/i386/host-signal.h | 5 +++++ .../include/host/loongarch64/host-signal.h | 5 +++++ linux-user/include/host/mips/host-signal.h | 5 +++++ linux-user/include/host/ppc/host-signal.h | 5 +++++ linux-user/include/host/riscv/host-signal.h | 5 +++++ linux-user/include/host/s390/host-signal.h | 5 +++++ linux-user/include/host/sparc/host-signal.h | 5 +++++ linux-user/include/host/x86_64/host-signal.h | 5 +++++ linux-user/signal.c | 18 ++++++++---------- 12 files changed, 63 insertions(+), 10 deletions(-) diff --git a/linux-user/include/host/aarch64/host-signal.h b/linux-user/include/host/aarch64/host-signal.h index 9770b36dc1..76ab078069 100644 --- a/linux-user/include/host/aarch64/host-signal.h +++ b/linux-user/include/host/aarch64/host-signal.h @@ -40,6 +40,11 @@ static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) uc->uc_mcontext.pc = pc; } +static inline void *host_signal_mask(ucontext_t *uc) +{ + return &uc->uc_sigmask; +} + static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) { struct _aarch64_ctx *hdr; diff --git a/linux-user/include/host/alpha/host-signal.h b/linux-user/include/host/alpha/host-signal.h index f4c942948a..a44d670f2b 100644 --- a/linux-user/include/host/alpha/host-signal.h +++ b/linux-user/include/host/alpha/host-signal.h @@ -21,6 +21,11 @@ static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) uc->uc_mcontext.sc_pc = pc; } +static inline void *host_signal_mask(ucontext_t *uc) +{ + return &uc->uc_sigmask; +} + static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) { uint32_t *pc = (uint32_t *)host_signal_pc(uc); diff --git a/linux-user/include/host/arm/host-signal.h b/linux-user/include/host/arm/host-signal.h index 6c095773c0..bbeb4ffefb 100644 --- a/linux-user/include/host/arm/host-signal.h +++ b/linux-user/include/host/arm/host-signal.h @@ -21,6 +21,11 @@ static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) uc->uc_mcontext.arm_pc = pc; } +static inline void *host_signal_mask(ucontext_t *uc) +{ + return &uc->uc_sigmask; +} + static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) { /* diff --git a/linux-user/include/host/i386/host-signal.h b/linux-user/include/host/i386/host-signal.h index abe1ece5c9..fd36f06bda 100644 --- a/linux-user/include/host/i386/host-signal.h +++ b/linux-user/include/host/i386/host-signal.h @@ -21,6 +21,11 @@ static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) uc->uc_mcontext.gregs[REG_EIP] = pc; } +static inline void *host_signal_mask(ucontext_t *uc) +{ + return &uc->uc_sigmask; +} + static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) { return uc->uc_mcontext.gregs[REG_TRAPNO] == 0xe diff --git a/linux-user/include/host/loongarch64/host-signal.h b/linux-user/include/host/loongarch64/host-signal.h index 7effa24251..a9dfe0c688 100644 --- a/linux-user/include/host/loongarch64/host-signal.h +++ b/linux-user/include/host/loongarch64/host-signal.h @@ -21,6 +21,11 @@ static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) uc->uc_mcontext.__pc = pc; } +static inline void *host_signal_mask(ucontext_t *uc) +{ + return &uc->uc_sigmask; +} + static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) { const uint32_t *pinsn = (const uint32_t *)host_signal_pc(uc); diff --git a/linux-user/include/host/mips/host-signal.h b/linux-user/include/host/mips/host-signal.h index c666ed8c3f..ff840dd491 100644 --- a/linux-user/include/host/mips/host-signal.h +++ b/linux-user/include/host/mips/host-signal.h @@ -21,6 +21,11 @@ static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) uc->uc_mcontext.pc = pc; } +static inline void *host_signal_mask(ucontext_t *uc) +{ + return &uc->uc_sigmask; +} + #if defined(__misp16) || defined(__mips_micromips) #error "Unsupported encoding" #endif diff --git a/linux-user/include/host/ppc/host-signal.h b/linux-user/include/host/ppc/host-signal.h index 1d8e658ff7..730a321d98 100644 --- a/linux-user/include/host/ppc/host-signal.h +++ b/linux-user/include/host/ppc/host-signal.h @@ -21,6 +21,11 @@ static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) uc->uc_mcontext.regs->nip = pc; } +static inline void *host_signal_mask(ucontext_t *uc) +{ + return &uc->uc_sigmask; +} + static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) { return uc->uc_mcontext.regs->trap != 0x400 diff --git a/linux-user/include/host/riscv/host-signal.h b/linux-user/include/host/riscv/host-signal.h index a4f170efb0..aceae544f2 100644 --- a/linux-user/include/host/riscv/host-signal.h +++ b/linux-user/include/host/riscv/host-signal.h @@ -21,6 +21,11 @@ static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) uc->uc_mcontext.__gregs[REG_PC] = pc; } +static inline void *host_signal_mask(ucontext_t *uc) +{ + return &uc->uc_sigmask; +} + static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) { /* diff --git a/linux-user/include/host/s390/host-signal.h b/linux-user/include/host/s390/host-signal.h index a524f2ab00..e454cea54a 100644 --- a/linux-user/include/host/s390/host-signal.h +++ b/linux-user/include/host/s390/host-signal.h @@ -21,6 +21,11 @@ static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) uc->uc_mcontext.psw.addr = pc; } +static inline void *host_signal_mask(ucontext_t *uc) +{ + return &uc->uc_sigmask; +} + static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) { uint16_t *pinsn = (uint16_t *)host_signal_pc(uc); diff --git a/linux-user/include/host/sparc/host-signal.h b/linux-user/include/host/sparc/host-signal.h index 7342936071..158918f2ec 100644 --- a/linux-user/include/host/sparc/host-signal.h +++ b/linux-user/include/host/sparc/host-signal.h @@ -29,6 +29,11 @@ static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) #endif } +static inline void *host_signal_mask(ucontext_t *uc) +{ + return &uc->uc_sigmask; +} + static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) { uint32_t insn = *(uint32_t *)host_signal_pc(uc); diff --git a/linux-user/include/host/x86_64/host-signal.h b/linux-user/include/host/x86_64/host-signal.h index c71d597eb2..d64d076625 100644 --- a/linux-user/include/host/x86_64/host-signal.h +++ b/linux-user/include/host/x86_64/host-signal.h @@ -20,6 +20,11 @@ static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) uc->uc_mcontext.gregs[REG_RIP] = pc; } +static inline void *host_signal_mask(ucontext_t *uc) +{ + return &uc->uc_sigmask; +} + static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) { return uc->uc_mcontext.gregs[REG_TRAPNO] == 0xe diff --git a/linux-user/signal.c b/linux-user/signal.c index 32854bb375..0c61459d4a 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -820,6 +820,7 @@ static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) int guest_sig; uintptr_t pc = 0; bool sync_sig = false; + void *sigmask = host_signal_mask(uc); /* * Non-spoofed SIGSEGV and SIGBUS are synchronous, and need special @@ -849,8 +850,7 @@ static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) if (info->si_code == SEGV_ACCERR && h2g_valid(host_addr)) { /* If this was a write to a TB protected page, restart. */ if (is_write && - handle_sigsegv_accerr_write(cpu, &uc->uc_sigmask, - pc, guest_addr)) { + handle_sigsegv_accerr_write(cpu, sigmask, pc, guest_addr)) { return; } @@ -865,10 +865,10 @@ static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) } } - sigprocmask(SIG_SETMASK, &uc->uc_sigmask, NULL); + sigprocmask(SIG_SETMASK, sigmask, NULL); cpu_loop_exit_sigsegv(cpu, guest_addr, access_type, maperr, pc); } else { - sigprocmask(SIG_SETMASK, &uc->uc_sigmask, NULL); + sigprocmask(SIG_SETMASK, sigmask, NULL); if (info->si_code == BUS_ADRALN) { cpu_loop_exit_sigbus(cpu, guest_addr, access_type, pc); } @@ -909,17 +909,15 @@ static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) * now and it getting out to the main loop. Signals will be * unblocked again in process_pending_signals(). * - * WARNING: we cannot use sigfillset() here because the uc_sigmask + * WARNING: we cannot use sigfillset() here because the sigmask * field is a kernel sigset_t, which is much smaller than the * libc sigset_t which sigfillset() operates on. Using sigfillset() * would write 0xff bytes off the end of the structure and trash * data on the struct. - * We can't use sizeof(uc->uc_sigmask) either, because the libc - * headers define the struct field with the wrong (too large) type. */ - memset(&uc->uc_sigmask, 0xff, SIGSET_T_SIZE); - sigdelset(&uc->uc_sigmask, SIGSEGV); - sigdelset(&uc->uc_sigmask, SIGBUS); + memset(sigmask, 0xff, SIGSET_T_SIZE); + sigdelset(sigmask, SIGSEGV); + sigdelset(sigmask, SIGBUS); /* interrupt the virtual CPU as soon as possible */ cpu_exit(thread_cpu); From 9940799bdea5d456fdbc11d10f355755843063e9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 8 Feb 2022 09:40:00 +0300 Subject: [PATCH 03/34] linux-user: Introduce host_sigcontext MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do not directly access ucontext_t as the third signal parameter. This is preparation for a sparc64 fix. Reviewed-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- linux-user/include/host/aarch64/host-signal.h | 13 ++++++++----- linux-user/include/host/alpha/host-signal.h | 11 +++++++---- linux-user/include/host/arm/host-signal.h | 11 +++++++---- linux-user/include/host/i386/host-signal.h | 11 +++++++---- linux-user/include/host/loongarch64/host-signal.h | 11 +++++++---- linux-user/include/host/mips/host-signal.h | 11 +++++++---- linux-user/include/host/ppc/host-signal.h | 11 +++++++---- linux-user/include/host/riscv/host-signal.h | 11 +++++++---- linux-user/include/host/s390/host-signal.h | 11 +++++++---- linux-user/include/host/sparc/host-signal.h | 11 +++++++---- linux-user/include/host/x86_64/host-signal.h | 11 +++++++---- linux-user/signal.c | 4 ++-- 12 files changed, 80 insertions(+), 47 deletions(-) diff --git a/linux-user/include/host/aarch64/host-signal.h b/linux-user/include/host/aarch64/host-signal.h index 76ab078069..be079684a2 100644 --- a/linux-user/include/host/aarch64/host-signal.h +++ b/linux-user/include/host/aarch64/host-signal.h @@ -11,6 +11,9 @@ #ifndef AARCH64_HOST_SIGNAL_H #define AARCH64_HOST_SIGNAL_H +/* The third argument to a SA_SIGINFO handler is ucontext_t. */ +typedef ucontext_t host_sigcontext; + /* Pre-3.16 kernel headers don't have these, so provide fallback definitions */ #ifndef ESR_MAGIC #define ESR_MAGIC 0x45535201 @@ -20,7 +23,7 @@ struct esr_context { }; #endif -static inline struct _aarch64_ctx *first_ctx(ucontext_t *uc) +static inline struct _aarch64_ctx *first_ctx(host_sigcontext *uc) { return (struct _aarch64_ctx *)&uc->uc_mcontext.__reserved; } @@ -30,22 +33,22 @@ static inline struct _aarch64_ctx *next_ctx(struct _aarch64_ctx *hdr) return (struct _aarch64_ctx *)((char *)hdr + hdr->size); } -static inline uintptr_t host_signal_pc(ucontext_t *uc) +static inline uintptr_t host_signal_pc(host_sigcontext *uc) { return uc->uc_mcontext.pc; } -static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) +static inline void host_signal_set_pc(host_sigcontext *uc, uintptr_t pc) { uc->uc_mcontext.pc = pc; } -static inline void *host_signal_mask(ucontext_t *uc) +static inline void *host_signal_mask(host_sigcontext *uc) { return &uc->uc_sigmask; } -static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) +static inline bool host_signal_write(siginfo_t *info, host_sigcontext *uc) { struct _aarch64_ctx *hdr; uint32_t insn; diff --git a/linux-user/include/host/alpha/host-signal.h b/linux-user/include/host/alpha/host-signal.h index a44d670f2b..4f9e2abc4b 100644 --- a/linux-user/include/host/alpha/host-signal.h +++ b/linux-user/include/host/alpha/host-signal.h @@ -11,22 +11,25 @@ #ifndef ALPHA_HOST_SIGNAL_H #define ALPHA_HOST_SIGNAL_H -static inline uintptr_t host_signal_pc(ucontext_t *uc) +/* The third argument to a SA_SIGINFO handler is ucontext_t. */ +typedef ucontext_t host_sigcontext; + +static inline uintptr_t host_signal_pc(host_sigcontext *uc) { return uc->uc_mcontext.sc_pc; } -static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) +static inline void host_signal_set_pc(host_sigcontext *uc, uintptr_t pc) { uc->uc_mcontext.sc_pc = pc; } -static inline void *host_signal_mask(ucontext_t *uc) +static inline void *host_signal_mask(host_sigcontext *uc) { return &uc->uc_sigmask; } -static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) +static inline bool host_signal_write(siginfo_t *info, host_sigcontext *uc) { uint32_t *pc = (uint32_t *)host_signal_pc(uc); uint32_t insn = *pc; diff --git a/linux-user/include/host/arm/host-signal.h b/linux-user/include/host/arm/host-signal.h index bbeb4ffefb..faba496d24 100644 --- a/linux-user/include/host/arm/host-signal.h +++ b/linux-user/include/host/arm/host-signal.h @@ -11,22 +11,25 @@ #ifndef ARM_HOST_SIGNAL_H #define ARM_HOST_SIGNAL_H -static inline uintptr_t host_signal_pc(ucontext_t *uc) +/* The third argument to a SA_SIGINFO handler is ucontext_t. */ +typedef ucontext_t host_sigcontext; + +static inline uintptr_t host_signal_pc(host_sigcontext *uc) { return uc->uc_mcontext.arm_pc; } -static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) +static inline void host_signal_set_pc(host_sigcontext *uc, uintptr_t pc) { uc->uc_mcontext.arm_pc = pc; } -static inline void *host_signal_mask(ucontext_t *uc) +static inline void *host_signal_mask(host_sigcontext *uc) { return &uc->uc_sigmask; } -static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) +static inline bool host_signal_write(siginfo_t *info, host_sigcontext *uc) { /* * In the FSR, bit 11 is WnR, assuming a v6 or diff --git a/linux-user/include/host/i386/host-signal.h b/linux-user/include/host/i386/host-signal.h index fd36f06bda..e2b64f077f 100644 --- a/linux-user/include/host/i386/host-signal.h +++ b/linux-user/include/host/i386/host-signal.h @@ -11,22 +11,25 @@ #ifndef I386_HOST_SIGNAL_H #define I386_HOST_SIGNAL_H -static inline uintptr_t host_signal_pc(ucontext_t *uc) +/* The third argument to a SA_SIGINFO handler is ucontext_t. */ +typedef ucontext_t host_sigcontext; + +static inline uintptr_t host_signal_pc(host_sigcontext *uc) { return uc->uc_mcontext.gregs[REG_EIP]; } -static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) +static inline void host_signal_set_pc(host_sigcontext *uc, uintptr_t pc) { uc->uc_mcontext.gregs[REG_EIP] = pc; } -static inline void *host_signal_mask(ucontext_t *uc) +static inline void *host_signal_mask(host_sigcontext *uc) { return &uc->uc_sigmask; } -static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) +static inline bool host_signal_write(siginfo_t *info, host_sigcontext *uc) { return uc->uc_mcontext.gregs[REG_TRAPNO] == 0xe && (uc->uc_mcontext.gregs[REG_ERR] & 0x2); diff --git a/linux-user/include/host/loongarch64/host-signal.h b/linux-user/include/host/loongarch64/host-signal.h index a9dfe0c688..d33c3fc03e 100644 --- a/linux-user/include/host/loongarch64/host-signal.h +++ b/linux-user/include/host/loongarch64/host-signal.h @@ -11,22 +11,25 @@ #ifndef LOONGARCH64_HOST_SIGNAL_H #define LOONGARCH64_HOST_SIGNAL_H -static inline uintptr_t host_signal_pc(ucontext_t *uc) +/* The third argument to a SA_SIGINFO handler is ucontext_t. */ +typedef ucontext_t host_sigcontext; + +static inline uintptr_t host_signal_pc(host_sigcontext *uc) { return uc->uc_mcontext.__pc; } -static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) +static inline void host_signal_set_pc(host_sigcontext *uc, uintptr_t pc) { uc->uc_mcontext.__pc = pc; } -static inline void *host_signal_mask(ucontext_t *uc) +static inline void *host_signal_mask(host_sigcontext *uc) { return &uc->uc_sigmask; } -static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) +static inline bool host_signal_write(siginfo_t *info, host_sigcontext *uc) { const uint32_t *pinsn = (const uint32_t *)host_signal_pc(uc); uint32_t insn = pinsn[0]; diff --git a/linux-user/include/host/mips/host-signal.h b/linux-user/include/host/mips/host-signal.h index ff840dd491..0dbc5cecfd 100644 --- a/linux-user/include/host/mips/host-signal.h +++ b/linux-user/include/host/mips/host-signal.h @@ -11,17 +11,20 @@ #ifndef MIPS_HOST_SIGNAL_H #define MIPS_HOST_SIGNAL_H -static inline uintptr_t host_signal_pc(ucontext_t *uc) +/* The third argument to a SA_SIGINFO handler is ucontext_t. */ +typedef ucontext_t host_sigcontext; + +static inline uintptr_t host_signal_pc(host_sigcontext *uc) { return uc->uc_mcontext.pc; } -static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) +static inline void host_signal_set_pc(host_sigcontext *uc, uintptr_t pc) { uc->uc_mcontext.pc = pc; } -static inline void *host_signal_mask(ucontext_t *uc) +static inline void *host_signal_mask(host_sigcontext *uc) { return &uc->uc_sigmask; } @@ -30,7 +33,7 @@ static inline void *host_signal_mask(ucontext_t *uc) #error "Unsupported encoding" #endif -static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) +static inline bool host_signal_write(siginfo_t *info, host_sigcontext *uc) { uint32_t insn = *(uint32_t *)host_signal_pc(uc); diff --git a/linux-user/include/host/ppc/host-signal.h b/linux-user/include/host/ppc/host-signal.h index 730a321d98..b80384d135 100644 --- a/linux-user/include/host/ppc/host-signal.h +++ b/linux-user/include/host/ppc/host-signal.h @@ -11,22 +11,25 @@ #ifndef PPC_HOST_SIGNAL_H #define PPC_HOST_SIGNAL_H -static inline uintptr_t host_signal_pc(ucontext_t *uc) +/* The third argument to a SA_SIGINFO handler is ucontext_t. */ +typedef ucontext_t host_sigcontext; + +static inline uintptr_t host_signal_pc(host_sigcontext *uc) { return uc->uc_mcontext.regs->nip; } -static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) +static inline void host_signal_set_pc(host_sigcontext *uc, uintptr_t pc) { uc->uc_mcontext.regs->nip = pc; } -static inline void *host_signal_mask(ucontext_t *uc) +static inline void *host_signal_mask(host_sigcontext *uc) { return &uc->uc_sigmask; } -static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) +static inline bool host_signal_write(siginfo_t *info, host_sigcontext *uc) { return uc->uc_mcontext.regs->trap != 0x400 && (uc->uc_mcontext.regs->dsisr & 0x02000000); diff --git a/linux-user/include/host/riscv/host-signal.h b/linux-user/include/host/riscv/host-signal.h index aceae544f2..decacb2325 100644 --- a/linux-user/include/host/riscv/host-signal.h +++ b/linux-user/include/host/riscv/host-signal.h @@ -11,22 +11,25 @@ #ifndef RISCV_HOST_SIGNAL_H #define RISCV_HOST_SIGNAL_H -static inline uintptr_t host_signal_pc(ucontext_t *uc) +/* The third argument to a SA_SIGINFO handler is ucontext_t. */ +typedef ucontext_t host_sigcontext; + +static inline uintptr_t host_signal_pc(host_sigcontext *uc) { return uc->uc_mcontext.__gregs[REG_PC]; } -static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) +static inline void host_signal_set_pc(host_sigcontext *uc, uintptr_t pc) { uc->uc_mcontext.__gregs[REG_PC] = pc; } -static inline void *host_signal_mask(ucontext_t *uc) +static inline void *host_signal_mask(host_sigcontext *uc) { return &uc->uc_sigmask; } -static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) +static inline bool host_signal_write(siginfo_t *info, host_sigcontext *uc) { /* * Detect store by reading the instruction at the program counter. diff --git a/linux-user/include/host/s390/host-signal.h b/linux-user/include/host/s390/host-signal.h index e454cea54a..6f191e64d7 100644 --- a/linux-user/include/host/s390/host-signal.h +++ b/linux-user/include/host/s390/host-signal.h @@ -11,22 +11,25 @@ #ifndef S390_HOST_SIGNAL_H #define S390_HOST_SIGNAL_H -static inline uintptr_t host_signal_pc(ucontext_t *uc) +/* The third argument to a SA_SIGINFO handler is ucontext_t. */ +typedef ucontext_t host_sigcontext; + +static inline uintptr_t host_signal_pc(host_sigcontext *uc) { return uc->uc_mcontext.psw.addr; } -static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) +static inline void host_signal_set_pc(host_sigcontext *uc, uintptr_t pc) { uc->uc_mcontext.psw.addr = pc; } -static inline void *host_signal_mask(ucontext_t *uc) +static inline void *host_signal_mask(host_sigcontext *uc) { return &uc->uc_sigmask; } -static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) +static inline bool host_signal_write(siginfo_t *info, host_sigcontext *uc) { uint16_t *pinsn = (uint16_t *)host_signal_pc(uc); diff --git a/linux-user/include/host/sparc/host-signal.h b/linux-user/include/host/sparc/host-signal.h index 158918f2ec..871b6bb269 100644 --- a/linux-user/include/host/sparc/host-signal.h +++ b/linux-user/include/host/sparc/host-signal.h @@ -11,7 +11,10 @@ #ifndef SPARC_HOST_SIGNAL_H #define SPARC_HOST_SIGNAL_H -static inline uintptr_t host_signal_pc(ucontext_t *uc) +/* FIXME: the third argument to a SA_SIGINFO handler is *not* ucontext_t. */ +typedef ucontext_t host_sigcontext; + +static inline uintptr_t host_signal_pc(host_sigcontext *uc) { #ifdef __arch64__ return uc->uc_mcontext.mc_gregs[MC_PC]; @@ -20,7 +23,7 @@ static inline uintptr_t host_signal_pc(ucontext_t *uc) #endif } -static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) +static inline void host_signal_set_pc(host_sigcontext *uc, uintptr_t pc) { #ifdef __arch64__ uc->uc_mcontext.mc_gregs[MC_PC] = pc; @@ -29,12 +32,12 @@ static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) #endif } -static inline void *host_signal_mask(ucontext_t *uc) +static inline void *host_signal_mask(host_sigcontext *uc) { return &uc->uc_sigmask; } -static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) +static inline bool host_signal_write(siginfo_t *info, host_sigcontext *uc) { uint32_t insn = *(uint32_t *)host_signal_pc(uc); diff --git a/linux-user/include/host/x86_64/host-signal.h b/linux-user/include/host/x86_64/host-signal.h index d64d076625..5a7627fedc 100644 --- a/linux-user/include/host/x86_64/host-signal.h +++ b/linux-user/include/host/x86_64/host-signal.h @@ -10,22 +10,25 @@ #ifndef X86_64_HOST_SIGNAL_H #define X86_64_HOST_SIGNAL_H -static inline uintptr_t host_signal_pc(ucontext_t *uc) +/* The third argument to a SA_SIGINFO handler is ucontext_t. */ +typedef ucontext_t host_sigcontext; + +static inline uintptr_t host_signal_pc(host_sigcontext *uc) { return uc->uc_mcontext.gregs[REG_RIP]; } -static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) +static inline void host_signal_set_pc(host_sigcontext *uc, uintptr_t pc) { uc->uc_mcontext.gregs[REG_RIP] = pc; } -static inline void *host_signal_mask(ucontext_t *uc) +static inline void *host_signal_mask(host_sigcontext *uc) { return &uc->uc_sigmask; } -static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) +static inline bool host_signal_write(siginfo_t *info, host_sigcontext *uc) { return uc->uc_mcontext.gregs[REG_TRAPNO] == 0xe && (uc->uc_mcontext.gregs[REG_ERR] & 0x2); diff --git a/linux-user/signal.c b/linux-user/signal.c index 0c61459d4a..27a0ff30e9 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -800,7 +800,7 @@ void queue_signal(CPUArchState *env, int sig, int si_type, /* Adjust the signal context to rewind out of safe-syscall if we're in it */ static inline void rewind_if_in_safe_syscall(void *puc) { - ucontext_t *uc = (ucontext_t *)puc; + host_sigcontext *uc = (host_sigcontext *)puc; uintptr_t pcreg = host_signal_pc(uc); if (pcreg > (uintptr_t)safe_syscall_start @@ -815,7 +815,7 @@ static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) CPUState *cpu = env_cpu(env); TaskState *ts = cpu->opaque; target_siginfo_t tinfo; - ucontext_t *uc = puc; + host_sigcontext *uc = puc; struct emulated_sigtable *k; int guest_sig; uintptr_t pc = 0; From 238b32de39b77b01ef4f205e09b01fda8a3a9216 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 8 Feb 2022 09:42:13 +0300 Subject: [PATCH 04/34] linux-user: Move sparc/host-signal.h to sparc64/host-signal.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We do not support sparc32 as a host, so there's no point in sparc64 redirecting to sparc. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- linux-user/include/host/sparc/host-signal.h | 71 ------------------- linux-user/include/host/sparc64/host-signal.h | 64 ++++++++++++++++- 2 files changed, 63 insertions(+), 72 deletions(-) delete mode 100644 linux-user/include/host/sparc/host-signal.h diff --git a/linux-user/include/host/sparc/host-signal.h b/linux-user/include/host/sparc/host-signal.h deleted file mode 100644 index 871b6bb269..0000000000 --- a/linux-user/include/host/sparc/host-signal.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * host-signal.h: signal info dependent on the host architecture - * - * Copyright (c) 2003-2005 Fabrice Bellard - * Copyright (c) 2021 Linaro Limited - * - * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. - * See the COPYING file in the top-level directory. - */ - -#ifndef SPARC_HOST_SIGNAL_H -#define SPARC_HOST_SIGNAL_H - -/* FIXME: the third argument to a SA_SIGINFO handler is *not* ucontext_t. */ -typedef ucontext_t host_sigcontext; - -static inline uintptr_t host_signal_pc(host_sigcontext *uc) -{ -#ifdef __arch64__ - return uc->uc_mcontext.mc_gregs[MC_PC]; -#else - return uc->uc_mcontext.gregs[REG_PC]; -#endif -} - -static inline void host_signal_set_pc(host_sigcontext *uc, uintptr_t pc) -{ -#ifdef __arch64__ - uc->uc_mcontext.mc_gregs[MC_PC] = pc; -#else - uc->uc_mcontext.gregs[REG_PC] = pc; -#endif -} - -static inline void *host_signal_mask(host_sigcontext *uc) -{ - return &uc->uc_sigmask; -} - -static inline bool host_signal_write(siginfo_t *info, host_sigcontext *uc) -{ - uint32_t insn = *(uint32_t *)host_signal_pc(uc); - - if ((insn >> 30) == 3) { - switch ((insn >> 19) & 0x3f) { - case 0x05: /* stb */ - case 0x15: /* stba */ - case 0x06: /* sth */ - case 0x16: /* stha */ - case 0x04: /* st */ - case 0x14: /* sta */ - case 0x07: /* std */ - case 0x17: /* stda */ - case 0x0e: /* stx */ - case 0x1e: /* stxa */ - case 0x24: /* stf */ - case 0x34: /* stfa */ - case 0x27: /* stdf */ - case 0x37: /* stdfa */ - case 0x26: /* stqf */ - case 0x36: /* stqfa */ - case 0x25: /* stfsr */ - case 0x3c: /* casa */ - case 0x3e: /* casxa */ - return true; - } - } - return false; -} - -#endif diff --git a/linux-user/include/host/sparc64/host-signal.h b/linux-user/include/host/sparc64/host-signal.h index 1191fe2d40..f8a8a4908d 100644 --- a/linux-user/include/host/sparc64/host-signal.h +++ b/linux-user/include/host/sparc64/host-signal.h @@ -1 +1,63 @@ -#include "../sparc/host-signal.h" +/* + * host-signal.h: signal info dependent on the host architecture + * + * Copyright (c) 2003-2005 Fabrice Bellard + * Copyright (c) 2021 Linaro Limited + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef SPARC64_HOST_SIGNAL_H +#define SPARC64_HOST_SIGNAL_H + +/* FIXME: the third argument to a SA_SIGINFO handler is *not* ucontext_t. */ +typedef ucontext_t host_sigcontext; + +static inline uintptr_t host_signal_pc(host_sigcontext *uc) +{ + return uc->uc_mcontext.mc_gregs[MC_PC]; +} + +static inline void host_signal_set_pc(host_sigcontext *uc, uintptr_t pc) +{ + uc->uc_mcontext.mc_gregs[MC_PC] = pc; +} + +static inline void *host_signal_mask(host_sigcontext *uc) +{ + return &uc->uc_sigmask; +} + +static inline bool host_signal_write(siginfo_t *info, host_sigcontext *uc) +{ + uint32_t insn = *(uint32_t *)host_signal_pc(uc); + + if ((insn >> 30) == 3) { + switch ((insn >> 19) & 0x3f) { + case 0x05: /* stb */ + case 0x15: /* stba */ + case 0x06: /* sth */ + case 0x16: /* stha */ + case 0x04: /* st */ + case 0x14: /* sta */ + case 0x07: /* std */ + case 0x17: /* stda */ + case 0x0e: /* stx */ + case 0x1e: /* stxa */ + case 0x24: /* stf */ + case 0x34: /* stfa */ + case 0x27: /* stdf */ + case 0x37: /* stdfa */ + case 0x26: /* stqf */ + case 0x36: /* stqfa */ + case 0x25: /* stfsr */ + case 0x3c: /* casa */ + case 0x3e: /* casxa */ + return true; + } + } + return false; +} + +#endif From 4f152ef27e26d82905244b7cbe344929630d8fae Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 8 Feb 2022 09:49:12 +0300 Subject: [PATCH 05/34] linux-user/include/host/sparc64: Fix host_sigcontext Sparc64 is unique on linux in *not* passing ucontext_t as the third argument to a SA_SIGINFO handler. It passes the old struct sigcontext instead. Set both pc and npc in host_signal_set_pc. Fixes: 8b5bd461935b ("linux-user/host/sparc: Populate host_signal.h") Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- linux-user/include/host/sparc64/host-signal.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/linux-user/include/host/sparc64/host-signal.h b/linux-user/include/host/sparc64/host-signal.h index f8a8a4908d..64957c2bca 100644 --- a/linux-user/include/host/sparc64/host-signal.h +++ b/linux-user/include/host/sparc64/host-signal.h @@ -11,22 +11,23 @@ #ifndef SPARC64_HOST_SIGNAL_H #define SPARC64_HOST_SIGNAL_H -/* FIXME: the third argument to a SA_SIGINFO handler is *not* ucontext_t. */ -typedef ucontext_t host_sigcontext; +/* The third argument to a SA_SIGINFO handler is struct sigcontext. */ +typedef struct sigcontext host_sigcontext; -static inline uintptr_t host_signal_pc(host_sigcontext *uc) +static inline uintptr_t host_signal_pc(host_sigcontext *sc) { - return uc->uc_mcontext.mc_gregs[MC_PC]; + return sc->sigc_regs.tpc; } -static inline void host_signal_set_pc(host_sigcontext *uc, uintptr_t pc) +static inline void host_signal_set_pc(host_sigcontext *sc, uintptr_t pc) { - uc->uc_mcontext.mc_gregs[MC_PC] = pc; + sc->sigc_regs.tpc = pc; + sc->sigc_regs.tnpc = pc + 4; } -static inline void *host_signal_mask(host_sigcontext *uc) +static inline void *host_signal_mask(host_sigcontext *sc) { - return &uc->uc_sigmask; + return &sc->sigc_mask; } static inline bool host_signal_write(siginfo_t *info, host_sigcontext *uc) From cfc2a2d69d59f02b32df3098ce17e10ab86d43c6 Mon Sep 17 00:00:00 2001 From: Idan Horowitz Date: Mon, 10 Jan 2022 18:47:53 +0200 Subject: [PATCH 06/34] accel/tcg: Optimize jump cache flush during tlb range flush When the length of the range is large enough, clearing the whole cache is faster than iterating over the (possibly extremely large) set of pages contained in the range. This mimics the pre-existing similar optimization done on the flush of the tlb itself. Signed-off-by: Idan Horowitz Message-Id: <20220110164754.1066025-1-idan.horowitz@gmail.com> Reviewed-by: Richard Henderson Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 5e0d0eebc3..926d9a9192 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -783,6 +783,15 @@ static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu, } qemu_spin_unlock(&env_tlb(env)->c.lock); + /* + * If the length is larger than the jump cache size, then it will take + * longer to clear each entry individually than it will to clear it all. + */ + if (d.len >= (TARGET_PAGE_SIZE * TB_JMP_CACHE_SIZE)) { + cpu_tb_jmp_cache_clear(cpu); + return; + } + for (target_ulong i = 0; i < d.len; i += TARGET_PAGE_SIZE) { tb_flush_jmp_cache(cpu, d.addr + i); } From 25e82fb769eddb83e0b68487b8b08d9426704d54 Mon Sep 17 00:00:00 2001 From: Idan Horowitz Date: Fri, 14 Jan 2022 02:43:57 +0200 Subject: [PATCH 07/34] softmmu/cpus: Check if the cpu work list is empty atomically Instead of taking the lock of the cpu work list in order to check if it's empty, we can just read the head pointer atomically. This decreases cpu_work_list_empty's share from 5% to 1.3% in a profile of icount-enabled aarch64-softmmu. Signed-off-by: Idan Horowitz Message-Id: <20220114004358.299534-1-idan.horowitz@gmail.com> Reviewed-by: Richard Henderson Signed-off-by: Richard Henderson --- softmmu/cpus.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/softmmu/cpus.c b/softmmu/cpus.c index 23bca46b07..035395ae13 100644 --- a/softmmu/cpus.c +++ b/softmmu/cpus.c @@ -73,12 +73,7 @@ bool cpu_is_stopped(CPUState *cpu) bool cpu_work_list_empty(CPUState *cpu) { - bool ret; - - qemu_mutex_lock(&cpu->work_mutex); - ret = QSIMPLEQ_EMPTY(&cpu->work_list); - qemu_mutex_unlock(&cpu->work_mutex); - return ret; + return QSIMPLEQ_EMPTY_ATOMIC(&cpu->work_list); } bool cpu_thread_is_idle(CPUState *cpu) From c3e97f640642d6ea11cddc1ed9d4d68d180eb18a Mon Sep 17 00:00:00 2001 From: Pavel Dovgalyuk Date: Mon, 31 Jan 2022 14:25:40 +0300 Subject: [PATCH 08/34] replay: use CF_NOIRQ for special exception-replaying TB Commit aff0e204cb1f1c036a496c94c15f5dfafcd9b4b4 introduced CF_NOIRQ usage, but one case was forgotten. Record/replay uses one special TB which is not really executed, but used to cause a correct exception in replay mode. This patch adds CF_NOIRQ flag for such block. Signed-off-by: Pavel Dovgalyuk Reviewed-by: Richard Henderson Message-Id: <164362834054.1754532.7678416881159817273.stgit@pasha-ThinkPad-X280> Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 8b4cd6c59d..8da6a55593 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -648,7 +648,8 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret) if (replay_has_exception() && cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0) { /* Execute just one insn to trigger exception pending in the log */ - cpu->cflags_next_tb = (curr_cflags(cpu) & ~CF_USE_ICOUNT) | 1; + cpu->cflags_next_tb = (curr_cflags(cpu) & ~CF_USE_ICOUNT) + | CF_NOIRQ | 1; } #endif return false; From 7b17a475404c970de1b192d026e00058cea980a6 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Mon, 7 Feb 2022 00:21:06 +0800 Subject: [PATCH 09/34] tcg/loongarch64: Fix fallout from recent MO_Q renaming Apparently we were left behind; just renaming MO_Q to MO_UQ is enough. Fixes: fc313c64345453c7 ("exec/memop: Adding signedness to quad definitions") Signed-off-by: WANG Xuerui Message-Id: <20220206162106.1092364-1-i.qemu@xen0n.name> Signed-off-by: Richard Henderson --- tcg/loongarch64/tcg-target.c.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 9cd46c9be3..d31a0e5991 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -871,7 +871,7 @@ static void tcg_out_qemu_ld_indexed(TCGContext *s, TCGReg rd, TCGReg rj, case MO_SL: tcg_out_opc_ldx_w(s, rd, rj, rk); break; - case MO_Q: + case MO_UQ: tcg_out_opc_ldx_d(s, rd, rj, rk); break; default: From b1ee3c67253a40f8e62a799eb5b5a0d2954769f4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 27 Jul 2021 19:42:35 -1000 Subject: [PATCH 10/34] tcg/i386: Support raising sigbus for user-only Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.c.inc | 103 ++++++++++++++++++++++++++++++++++++-- tcg/i386/tcg-target.h | 2 - 2 files changed, 98 insertions(+), 7 deletions(-) diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 4dab09f265..faa15eecab 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -22,6 +22,7 @@ * THE SOFTWARE. */ +#include "../tcg-ldst.c.inc" #include "../tcg-pool.c.inc" #ifdef CONFIG_DEBUG_TCG @@ -421,8 +422,9 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) #define OPC_VZEROUPPER (0x77 | P_EXT) #define OPC_XCHG_ax_r32 (0x90) -#define OPC_GRP3_Ev (0xf7) -#define OPC_GRP5 (0xff) +#define OPC_GRP3_Eb (0xf6) +#define OPC_GRP3_Ev (0xf7) +#define OPC_GRP5 (0xff) #define OPC_GRP14 (0x73 | P_EXT | P_DATA16) /* Group 1 opcode extensions for 0x80-0x83. @@ -444,6 +446,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) #define SHIFT_SAR 7 /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */ +#define EXT3_TESTi 0 #define EXT3_NOT 2 #define EXT3_NEG 3 #define EXT3_MUL 4 @@ -1606,8 +1609,6 @@ static void tcg_out_nopn(TCGContext *s, int n) } #if defined(CONFIG_SOFTMMU) -#include "../tcg-ldst.c.inc" - /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, * int mmu_idx, uintptr_t ra) */ @@ -1916,7 +1917,84 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); return true; } -#elif TCG_TARGET_REG_BITS == 32 +#else + +static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo, + TCGReg addrhi, unsigned a_bits) +{ + unsigned a_mask = (1 << a_bits) - 1; + TCGLabelQemuLdst *label; + + /* + * We are expecting a_bits to max out at 7, so we can usually use testb. + * For i686, we have to use testl for %esi/%edi. + */ + if (a_mask <= 0xff && (TCG_TARGET_REG_BITS == 64 || addrlo < 4)) { + tcg_out_modrm(s, OPC_GRP3_Eb | P_REXB_RM, EXT3_TESTi, addrlo); + tcg_out8(s, a_mask); + } else { + tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_TESTi, addrlo); + tcg_out32(s, a_mask); + } + + /* jne slow_path */ + tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); + + label = new_ldst_label(s); + label->is_ld = is_ld; + label->addrlo_reg = addrlo; + label->addrhi_reg = addrhi; + label->raddr = tcg_splitwx_to_rx(s->code_ptr + 4); + label->label_ptr[0] = s->code_ptr; + + s->code_ptr += 4; +} + +static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) +{ + /* resolve label address */ + tcg_patch32(l->label_ptr[0], s->code_ptr - l->label_ptr[0] - 4); + + if (TCG_TARGET_REG_BITS == 32) { + int ofs = 0; + + tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs); + ofs += 4; + + tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs); + ofs += 4; + if (TARGET_LONG_BITS == 64) { + tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs); + ofs += 4; + } + + tcg_out_pushi(s, (uintptr_t)l->raddr); + } else { + tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1], + l->addrlo_reg); + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RAX, (uintptr_t)l->raddr); + tcg_out_push(s, TCG_REG_RAX); + } + + /* "Tail call" to the helper, with the return address back inline. */ + tcg_out_jmp(s, (const void *)(l->is_ld ? helper_unaligned_ld + : helper_unaligned_st)); + return true; +} + +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} + +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} + +#if TCG_TARGET_REG_BITS == 32 # define x86_guest_base_seg 0 # define x86_guest_base_index -1 # define x86_guest_base_offset guest_base @@ -1950,6 +2028,7 @@ static inline int setup_guest_base_seg(void) return 0; } # endif +#endif #endif /* SOFTMMU */ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, @@ -2059,6 +2138,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) #if defined(CONFIG_SOFTMMU) int mem_index; tcg_insn_unit *label_ptr[2]; +#else + unsigned a_bits; #endif datalo = *args++; @@ -2081,6 +2162,11 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) add_qemu_ldst_label(s, true, is64, oi, datalo, datahi, addrlo, addrhi, s->code_ptr, label_ptr); #else + a_bits = get_alignment_bits(opc); + if (a_bits) { + tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits); + } + tcg_out_qemu_ld_direct(s, datalo, datahi, addrlo, x86_guest_base_index, x86_guest_base_offset, x86_guest_base_seg, is64, opc); @@ -2148,6 +2234,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) #if defined(CONFIG_SOFTMMU) int mem_index; tcg_insn_unit *label_ptr[2]; +#else + unsigned a_bits; #endif datalo = *args++; @@ -2170,6 +2258,11 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) add_qemu_ldst_label(s, false, is64, oi, datalo, datahi, addrlo, addrhi, s->code_ptr, label_ptr); #else + a_bits = get_alignment_bits(opc); + if (a_bits) { + tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits); + } + tcg_out_qemu_st_direct(s, datalo, datahi, addrlo, x86_guest_base_index, x86_guest_base_offset, x86_guest_base_seg, opc); #endif diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index b00a6da293..3b2c9437a0 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -232,9 +232,7 @@ static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, #define TCG_TARGET_HAS_MEMORY_BSWAP have_movbe -#ifdef CONFIG_SOFTMMU #define TCG_TARGET_NEED_LDST_LABELS -#endif #define TCG_TARGET_NEED_POOL_LABELS #endif From f85ab3d2e51e45e7fa33b539bc7e1350bdf64dde Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 3 Aug 2021 20:07:57 +0000 Subject: [PATCH 11/34] tcg/aarch64: Support raising sigbus for user-only Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 91 +++++++++++++++++++++++++++++------- tcg/aarch64/tcg-target.h | 2 - 2 files changed, 74 insertions(+), 19 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index a8db553287..077fc51401 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -10,6 +10,7 @@ * See the COPYING file in the top-level directory for details. */ +#include "../tcg-ldst.c.inc" #include "../tcg-pool.c.inc" #include "qemu/bitops.h" @@ -443,6 +444,7 @@ typedef enum { I3404_ANDI = 0x12000000, I3404_ORRI = 0x32000000, I3404_EORI = 0x52000000, + I3404_ANDSI = 0x72000000, /* Move wide immediate instructions. */ I3405_MOVN = 0x12800000, @@ -1328,8 +1330,9 @@ static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target) if (offset == sextract64(offset, 0, 26)) { tcg_out_insn(s, 3206, B, offset); } else { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); - tcg_out_insn(s, 3207, BR, TCG_REG_TMP); + /* Choose X9 as a call-clobbered non-LR temporary. */ + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target); + tcg_out_insn(s, 3207, BR, TCG_REG_X9); } } @@ -1541,9 +1544,14 @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, } } -#ifdef CONFIG_SOFTMMU -#include "../tcg-ldst.c.inc" +static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target) +{ + ptrdiff_t offset = tcg_pcrel_diff(s, target); + tcg_debug_assert(offset == sextract64(offset, 0, 21)); + tcg_out_insn(s, 3406, ADR, rd, offset); +} +#ifdef CONFIG_SOFTMMU /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, * MemOpIdx oi, uintptr_t ra) */ @@ -1577,13 +1585,6 @@ static void * const qemu_st_helpers[MO_SIZE + 1] = { #endif }; -static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target) -{ - ptrdiff_t offset = tcg_pcrel_diff(s, target); - tcg_debug_assert(offset == sextract64(offset, 0, 21)); - tcg_out_insn(s, 3406, ADR, rd, offset); -} - static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { MemOpIdx oi = lb->oi; @@ -1714,15 +1715,58 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc, tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); } +#else +static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg, + unsigned a_bits) +{ + unsigned a_mask = (1 << a_bits) - 1; + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->addrlo_reg = addr_reg; + + /* tst addr, #mask */ + tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask); + + label->label_ptr[0] = s->code_ptr; + + /* b.ne slow_path */ + tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); + + label->raddr = tcg_splitwx_to_rx(s->code_ptr); +} + +static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) +{ + if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { + return false; + } + + tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_X1, l->addrlo_reg); + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); + + /* "Tail call" to the helper, with the return address back inline. */ + tcg_out_adr(s, TCG_REG_LR, l->raddr); + tcg_out_goto_long(s, (const void *)(l->is_ld ? helper_unaligned_ld + : helper_unaligned_st)); + return true; +} + +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} + +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} #endif /* CONFIG_SOFTMMU */ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, TCGReg data_r, TCGReg addr_r, TCGType otype, TCGReg off_r) { - /* Byte swapping is left to middle-end expansion. */ - tcg_debug_assert((memop & MO_BSWAP) == 0); - switch (memop & MO_SSIZE) { case MO_UB: tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r); @@ -1756,9 +1800,6 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, TCGReg data_r, TCGReg addr_r, TCGType otype, TCGReg off_r) { - /* Byte swapping is left to middle-end expansion. */ - tcg_debug_assert((memop & MO_BSWAP) == 0); - switch (memop & MO_SIZE) { case MO_8: tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r); @@ -1782,6 +1823,10 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, { MemOp memop = get_memop(oi); const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; + + /* Byte swapping is left to middle-end expansion. */ + tcg_debug_assert((memop & MO_BSWAP) == 0); + #ifdef CONFIG_SOFTMMU unsigned mem_index = get_mmuidx(oi); tcg_insn_unit *label_ptr; @@ -1792,6 +1837,10 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ + unsigned a_bits = get_alignment_bits(memop); + if (a_bits) { + tcg_out_test_alignment(s, true, addr_reg, a_bits); + } if (USE_GUEST_BASE) { tcg_out_qemu_ld_direct(s, memop, ext, data_reg, TCG_REG_GUEST_BASE, otype, addr_reg); @@ -1807,6 +1856,10 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, { MemOp memop = get_memop(oi); const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; + + /* Byte swapping is left to middle-end expansion. */ + tcg_debug_assert((memop & MO_BSWAP) == 0); + #ifdef CONFIG_SOFTMMU unsigned mem_index = get_mmuidx(oi); tcg_insn_unit *label_ptr; @@ -1817,6 +1870,10 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64, data_reg, addr_reg, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ + unsigned a_bits = get_alignment_bits(memop); + if (a_bits) { + tcg_out_test_alignment(s, false, addr_reg, a_bits); + } if (USE_GUEST_BASE) { tcg_out_qemu_st_direct(s, memop, data_reg, TCG_REG_GUEST_BASE, otype, addr_reg); diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 7a93ac8023..876af589ce 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -151,9 +151,7 @@ typedef enum { void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); -#ifdef CONFIG_SOFTMMU #define TCG_TARGET_NEED_LDST_LABELS -#endif #define TCG_TARGET_NEED_POOL_LABELS #endif /* AARCH64_TCG_TARGET_H */ From 8605cbcdeedb5f24a20dd1341a7a08bd9f38be71 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 3 Aug 2021 22:08:55 +0000 Subject: [PATCH 12/34] tcg/ppc: Support raising sigbus for user-only Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target.c.inc | 98 ++++++++++++++++++++++++++++++++++++---- tcg/ppc/tcg-target.h | 2 - 2 files changed, 90 insertions(+), 10 deletions(-) diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 9e79a7edee..dea24f23c4 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -24,6 +24,7 @@ #include "elf.h" #include "../tcg-pool.c.inc" +#include "../tcg-ldst.c.inc" /* * Standardize on the _CALL_FOO symbols used by GCC: @@ -1881,7 +1882,8 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, } } -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) +static void tcg_out_call_int(TCGContext *s, int lk, + const tcg_insn_unit *target) { #ifdef _CALL_AIX /* Look through the descriptor. If the branch is in range, and we @@ -1892,7 +1894,7 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) if (in_range_b(diff) && toc == (uint32_t)toc) { tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc); - tcg_out_b(s, LK, tgt); + tcg_out_b(s, lk, tgt); } else { /* Fold the low bits of the constant into the addresses below. */ intptr_t arg = (intptr_t)target; @@ -1907,7 +1909,7 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs); tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR); tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP); - tcg_out32(s, BCCTR | BO_ALWAYS | LK); + tcg_out32(s, BCCTR | BO_ALWAYS | lk); } #elif defined(_CALL_ELF) && _CALL_ELF == 2 intptr_t diff; @@ -1921,16 +1923,21 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) diff = tcg_pcrel_diff(s, target); if (in_range_b(diff)) { - tcg_out_b(s, LK, target); + tcg_out_b(s, lk, target); } else { tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR); - tcg_out32(s, BCCTR | BO_ALWAYS | LK); + tcg_out32(s, BCCTR | BO_ALWAYS | lk); } #else - tcg_out_b(s, LK, target); + tcg_out_b(s, lk, target); #endif } +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) +{ + tcg_out_call_int(s, LK, target); +} + static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = { [MO_UB] = LBZX, [MO_UW] = LHZX, @@ -1960,8 +1967,6 @@ static const uint32_t qemu_exts_opc[4] = { }; #if defined (CONFIG_SOFTMMU) -#include "../tcg-ldst.c.inc" - /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, * int mmu_idx, uintptr_t ra) */ @@ -2227,6 +2232,71 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_b(s, 0, lb->raddr); return true; } +#else + +static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo, + TCGReg addrhi, unsigned a_bits) +{ + unsigned a_mask = (1 << a_bits) - 1; + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->addrlo_reg = addrlo; + label->addrhi_reg = addrhi; + + /* We are expecting a_bits to max out at 7, much lower than ANDI. */ + tcg_debug_assert(a_bits < 16); + tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, a_mask)); + + label->label_ptr[0] = s->code_ptr; + tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK); + + label->raddr = tcg_splitwx_to_rx(s->code_ptr); +} + +static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) +{ + if (!reloc_pc14(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { + return false; + } + + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { + TCGReg arg = TCG_REG_R4; +#ifdef TCG_TARGET_CALL_ALIGN_ARGS + arg |= 1; +#endif + if (l->addrlo_reg != arg) { + tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg); + tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg); + } else if (l->addrhi_reg != arg + 1) { + tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg); + tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg); + } else { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R0, arg); + tcg_out_mov(s, TCG_TYPE_I32, arg, arg + 1); + tcg_out_mov(s, TCG_TYPE_I32, arg + 1, TCG_REG_R0); + } + } else { + tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R4, l->addrlo_reg); + } + tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, TCG_AREG0); + + /* "Tail call" to the helper, with the return address back inline. */ + tcg_out_call_int(s, 0, (const void *)(l->is_ld ? helper_unaligned_ld + : helper_unaligned_st)); + return true; +} + +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} + +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} + #endif /* SOFTMMU */ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) @@ -2238,6 +2308,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) #ifdef CONFIG_SOFTMMU int mem_index; tcg_insn_unit *label_ptr; +#else + unsigned a_bits; #endif datalo = *args++; @@ -2258,6 +2330,10 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) rbase = TCG_REG_R3; #else /* !CONFIG_SOFTMMU */ + a_bits = get_alignment_bits(opc); + if (a_bits) { + tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits); + } rbase = guest_base ? TCG_GUEST_BASE_REG : 0; if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); @@ -2313,6 +2389,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) #ifdef CONFIG_SOFTMMU int mem_index; tcg_insn_unit *label_ptr; +#else + unsigned a_bits; #endif datalo = *args++; @@ -2333,6 +2411,10 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) rbase = TCG_REG_R3; #else /* !CONFIG_SOFTMMU */ + a_bits = get_alignment_bits(opc); + if (a_bits) { + tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits); + } rbase = guest_base ? TCG_GUEST_BASE_REG : 0; if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index 0943192cde..c775c97b61 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -182,9 +182,7 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 1 -#ifdef CONFIG_SOFTMMU #define TCG_TARGET_NEED_LDST_LABELS -#endif #define TCG_TARGET_NEED_POOL_LABELS #endif From a3fb7c99c05659c98f4de301a932c70991382e26 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 5 Aug 2021 07:20:12 -1000 Subject: [PATCH 13/34] tcg/riscv: Support raising sigbus for user-only Signed-off-by: Richard Henderson --- tcg/riscv/tcg-target.c.inc | 63 ++++++++++++++++++++++++++++++++++++-- tcg/riscv/tcg-target.h | 2 -- 2 files changed, 61 insertions(+), 4 deletions(-) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index e9488f7093..6409d9c3d5 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -27,6 +27,7 @@ * THE SOFTWARE. */ +#include "../tcg-ldst.c.inc" #include "../tcg-pool.c.inc" #ifdef CONFIG_DEBUG_TCG @@ -847,8 +848,6 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0) */ #if defined(CONFIG_SOFTMMU) -#include "../tcg-ldst.c.inc" - /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, * MemOpIdx oi, uintptr_t ra) */ @@ -1053,6 +1052,54 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_goto(s, l->raddr); return true; } +#else + +static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg, + unsigned a_bits) +{ + unsigned a_mask = (1 << a_bits) - 1; + TCGLabelQemuLdst *l = new_ldst_label(s); + + l->is_ld = is_ld; + l->addrlo_reg = addr_reg; + + /* We are expecting a_bits to max out at 7, so we can always use andi. */ + tcg_debug_assert(a_bits < 12); + tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask); + + l->label_ptr[0] = s->code_ptr; + tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0); + + l->raddr = tcg_splitwx_to_rx(s->code_ptr); +} + +static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) +{ + /* resolve label address */ + if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { + return false; + } + + tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_A1, l->addrlo_reg); + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0); + + /* tail call, with the return address back inline. */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (uintptr_t)l->raddr); + tcg_out_call_int(s, (const void *)(l->is_ld ? helper_unaligned_ld + : helper_unaligned_st), true); + return true; +} + +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} + +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} + #endif /* CONFIG_SOFTMMU */ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi, @@ -1108,6 +1155,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) MemOp opc; #if defined(CONFIG_SOFTMMU) tcg_insn_unit *label_ptr[1]; +#else + unsigned a_bits; #endif TCGReg base = TCG_REG_TMP0; @@ -1130,6 +1179,10 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) tcg_out_ext32u(s, base, addr_regl); addr_regl = base; } + a_bits = get_alignment_bits(opc); + if (a_bits) { + tcg_out_test_alignment(s, true, addr_regl, a_bits); + } if (guest_base != 0) { tcg_out_opc_reg(s, OPC_ADD, base, TCG_GUEST_BASE_REG, addr_regl); } @@ -1174,6 +1227,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) MemOp opc; #if defined(CONFIG_SOFTMMU) tcg_insn_unit *label_ptr[1]; +#else + unsigned a_bits; #endif TCGReg base = TCG_REG_TMP0; @@ -1196,6 +1251,10 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) tcg_out_ext32u(s, base, addr_regl); addr_regl = base; } + a_bits = get_alignment_bits(opc); + if (a_bits) { + tcg_out_test_alignment(s, false, addr_regl, a_bits); + } if (guest_base != 0) { tcg_out_opc_reg(s, OPC_ADD, base, TCG_GUEST_BASE_REG, addr_regl); } diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index ef78b99e98..11c9b3e4f4 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -165,9 +165,7 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); #define TCG_TARGET_DEFAULT_MO (0) -#ifdef CONFIG_SOFTMMU #define TCG_TARGET_NEED_LDST_LABELS -#endif #define TCG_TARGET_NEED_POOL_LABELS #define TCG_TARGET_HAS_MEMORY_BSWAP 0 From 1cd49868d47ceceaa0302582bf024936836392a6 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 4 Aug 2021 00:08:57 +0000 Subject: [PATCH 14/34] tcg/s390x: Support raising sigbus for user-only Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/s390x/tcg-target.c.inc | 59 ++++++++++++++++++++++++++++++++++++-- tcg/s390x/tcg-target.h | 2 -- 2 files changed, 57 insertions(+), 4 deletions(-) diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index b12fbfda63..d56c1e51e4 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -29,6 +29,7 @@ #error "unsupported code generation mode" #endif +#include "../tcg-ldst.c.inc" #include "../tcg-pool.c.inc" #include "elf.h" @@ -136,6 +137,7 @@ typedef enum S390Opcode { RI_OIHL = 0xa509, RI_OILH = 0xa50a, RI_OILL = 0xa50b, + RI_TMLL = 0xa701, RIE_CGIJ = 0xec7c, RIE_CGRJ = 0xec64, @@ -1804,8 +1806,6 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data, } #if defined(CONFIG_SOFTMMU) -#include "../tcg-ldst.c.inc" - /* We're expecting to use a 20-bit negative offset on the tlb memory ops. */ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19)); @@ -1942,6 +1942,53 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) return true; } #else +static void tcg_out_test_alignment(TCGContext *s, bool is_ld, + TCGReg addrlo, unsigned a_bits) +{ + unsigned a_mask = (1 << a_bits) - 1; + TCGLabelQemuLdst *l = new_ldst_label(s); + + l->is_ld = is_ld; + l->addrlo_reg = addrlo; + + /* We are expecting a_bits to max out at 7, much lower than TMLL. */ + tcg_debug_assert(a_bits < 16); + tcg_out_insn(s, RI, TMLL, addrlo, a_mask); + + tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */ + l->label_ptr[0] = s->code_ptr; + s->code_ptr += 1; + + l->raddr = tcg_splitwx_to_rx(s->code_ptr); +} + +static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) +{ + if (!patch_reloc(l->label_ptr[0], R_390_PC16DBL, + (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) { + return false; + } + + tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, l->addrlo_reg); + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); + + /* "Tail call" to the helper, with the return address back inline. */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R14, (uintptr_t)l->raddr); + tgen_gotoi(s, S390_CC_ALWAYS, (const void *)(l->is_ld ? helper_unaligned_ld + : helper_unaligned_st)); + return true; +} + +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} + +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} + static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg, TCGReg *index_reg, tcg_target_long *disp) { @@ -1980,7 +2027,11 @@ static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, #else TCGReg index_reg; tcg_target_long disp; + unsigned a_bits = get_alignment_bits(opc); + if (a_bits) { + tcg_out_test_alignment(s, true, addr_reg, a_bits); + } tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp); tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp); #endif @@ -2007,7 +2058,11 @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, #else TCGReg index_reg; tcg_target_long disp; + unsigned a_bits = get_alignment_bits(opc); + if (a_bits) { + tcg_out_test_alignment(s, false, addr_reg, a_bits); + } tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp); tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp); #endif diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h index 527ada0f63..69217d995b 100644 --- a/tcg/s390x/tcg-target.h +++ b/tcg/s390x/tcg-target.h @@ -178,9 +178,7 @@ static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, /* no need to flush icache explicitly */ } -#ifdef CONFIG_SOFTMMU #define TCG_TARGET_NEED_LDST_LABELS -#endif #define TCG_TARGET_NEED_POOL_LABELS #endif From fe1bee3a0a97585e3b59dc1db154c28bc8005882 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 3 Aug 2021 14:56:51 -1000 Subject: [PATCH 15/34] tcg/tci: Support raising sigbus for user-only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/tci.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/tcg/tci.c b/tcg/tci.c index 336af5945a..fe92b5d084 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -292,11 +292,11 @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition) static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr, MemOpIdx oi, const void *tb_ptr) { - MemOp mop = get_memop(oi) & (MO_BSWAP | MO_SSIZE); + MemOp mop = get_memop(oi); uintptr_t ra = (uintptr_t)tb_ptr; #ifdef CONFIG_SOFTMMU - switch (mop) { + switch (mop & (MO_BSWAP | MO_SSIZE)) { case MO_UB: return helper_ret_ldub_mmu(env, taddr, oi, ra); case MO_SB: @@ -326,10 +326,14 @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr, } #else void *haddr = g2h(env_cpu(env), taddr); + unsigned a_mask = (1u << get_alignment_bits(mop)) - 1; uint64_t ret; set_helper_retaddr(ra); - switch (mop) { + if (taddr & a_mask) { + helper_unaligned_ld(env, taddr); + } + switch (mop & (MO_BSWAP | MO_SSIZE)) { case MO_UB: ret = ldub_p(haddr); break; @@ -377,11 +381,11 @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr, static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val, MemOpIdx oi, const void *tb_ptr) { - MemOp mop = get_memop(oi) & (MO_BSWAP | MO_SSIZE); + MemOp mop = get_memop(oi); uintptr_t ra = (uintptr_t)tb_ptr; #ifdef CONFIG_SOFTMMU - switch (mop) { + switch (mop & (MO_BSWAP | MO_SIZE)) { case MO_UB: helper_ret_stb_mmu(env, taddr, val, oi, ra); break; @@ -408,9 +412,13 @@ static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val, } #else void *haddr = g2h(env_cpu(env), taddr); + unsigned a_mask = (1u << get_alignment_bits(mop)) - 1; set_helper_retaddr(ra); - switch (mop) { + if (taddr & a_mask) { + helper_unaligned_st(env, taddr); + } + switch (mop & (MO_BSWAP | MO_SIZE)) { case MO_UB: stb_p(haddr, val); break; From 6f78c7b0828660f6733528dab7bedf027d4958f6 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Thu, 6 Jan 2022 21:42:38 +0800 Subject: [PATCH 16/34] tcg/loongarch64: Support raising sigbus for user-only Signed-off-by: WANG Xuerui Reviewed-by: Richard Henderson Message-Id: <20220106134238.3936163-1-git@xen0n.name> Signed-off-by: Richard Henderson --- tcg/loongarch64/tcg-target.c.inc | 71 +++++++++++++++++++++++++++++++- tcg/loongarch64/tcg-target.h | 2 - 2 files changed, 69 insertions(+), 4 deletions(-) diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index d31a0e5991..a3debf6da7 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -29,6 +29,8 @@ * THE SOFTWARE. */ +#include "../tcg-ldst.c.inc" + #ifdef CONFIG_DEBUG_TCG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "zero", @@ -642,8 +644,6 @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, */ #if defined(CONFIG_SOFTMMU) -#include "../tcg-ldst.c.inc" - /* * helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, * MemOpIdx oi, uintptr_t ra) @@ -825,6 +825,61 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) return tcg_out_goto(s, l->raddr); } +#else + +/* + * Alignment helpers for user-mode emulation + */ + +static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg, + unsigned a_bits) +{ + TCGLabelQemuLdst *l = new_ldst_label(s); + + l->is_ld = is_ld; + l->addrlo_reg = addr_reg; + + /* + * Without micro-architecture details, we don't know which of bstrpick or + * andi is faster, so use bstrpick as it's not constrained by imm field + * width. (Not to say alignments >= 2^12 are going to happen any time + * soon, though) + */ + tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1); + + l->label_ptr[0] = s->code_ptr; + tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0); + + l->raddr = tcg_splitwx_to_rx(s->code_ptr); +} + +static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) +{ + /* resolve label address */ + if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { + return false; + } + + tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_A1, l->addrlo_reg); + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0); + + /* tail call, with the return address back inline. */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (uintptr_t)l->raddr); + tcg_out_call_int(s, (const void *)(l->is_ld ? helper_unaligned_ld + : helper_unaligned_st), true); + return true; +} + +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} + +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} + #endif /* CONFIG_SOFTMMU */ /* @@ -887,6 +942,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGType type) MemOp opc; #if defined(CONFIG_SOFTMMU) tcg_insn_unit *label_ptr[1]; +#else + unsigned a_bits; #endif TCGReg base; @@ -903,6 +960,10 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGType type) data_regl, addr_regl, s->code_ptr, label_ptr); #else + a_bits = get_alignment_bits(opc); + if (a_bits) { + tcg_out_test_alignment(s, true, addr_regl, a_bits); + } base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0); TCGReg guest_base_reg = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO; tcg_out_qemu_ld_indexed(s, data_regl, base, guest_base_reg, opc, type); @@ -941,6 +1002,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) MemOp opc; #if defined(CONFIG_SOFTMMU) tcg_insn_unit *label_ptr[1]; +#else + unsigned a_bits; #endif TCGReg base; @@ -958,6 +1021,10 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) data_regl, addr_regl, s->code_ptr, label_ptr); #else + a_bits = get_alignment_bits(opc); + if (a_bits) { + tcg_out_test_alignment(s, false, addr_regl, a_bits); + } base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0); TCGReg guest_base_reg = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO; tcg_out_qemu_st_indexed(s, data_regl, base, guest_base_reg, opc); diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h index 05010805e7..d58a6162f2 100644 --- a/tcg/loongarch64/tcg-target.h +++ b/tcg/loongarch64/tcg-target.h @@ -171,9 +171,7 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); #define TCG_TARGET_DEFAULT_MO (0) -#ifdef CONFIG_SOFTMMU #define TCG_TARGET_NEED_LDST_LABELS -#endif #define TCG_TARGET_HAS_MEMORY_BSWAP 0 From 01dfc0ed7f2c5f8dbab65f31228a2888c7b85a07 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 2 Jan 2022 17:42:07 -0800 Subject: [PATCH 17/34] tcg/arm: Drop support for armv4 and armv5 hosts Support for unaligned accesses is difficult for pre-v6 hosts. While debian still builds for armv4, we cannot use a compile time test, so test the architecture at runtime and error out. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 5345c4e39c..29d63e98a8 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -2474,6 +2474,11 @@ static void tcg_target_init(TCGContext *s) if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') { arm_arch = pl[1] - '0'; } + + if (arm_arch < 6) { + error_report("TCG: ARMv%d is unsupported; exiting", arm_arch); + exit(EXIT_FAILURE); + } } tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS; From 6cef13940cc57124bdef9dffbf03f25784f7a51c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 2 Jan 2022 18:30:13 -0800 Subject: [PATCH 18/34] tcg/arm: Remove use_armv5t_instructions This is now always true, since we require armv6. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 35 ++++++----------------------------- tcg/arm/tcg-target.h | 3 +-- 2 files changed, 7 insertions(+), 31 deletions(-) diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 29d63e98a8..f3b635063f 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -596,11 +596,7 @@ static void tcg_out_b_reg(TCGContext *s, ARMCond cond, TCGReg rn) * Unless the C portion of QEMU is compiled as thumb, we don't need * true BX semantics; merely a branch to an address held in a register. */ - if (use_armv5t_instructions) { - tcg_out_bx_reg(s, cond, rn); - } else { - tcg_out_mov_reg(s, cond, TCG_REG_PC, rn); - } + tcg_out_bx_reg(s, cond, rn); } static void tcg_out_dat_imm(TCGContext *s, ARMCond cond, ARMInsn opc, @@ -1247,14 +1243,7 @@ static void tcg_out_goto(TCGContext *s, ARMCond cond, const tcg_insn_unit *addr) } /* LDR is interworking from v5t. */ - if (arm_mode || use_armv5t_instructions) { - tcg_out_movi_pool(s, cond, TCG_REG_PC, addri); - return; - } - - /* else v4t */ - tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri); - tcg_out_bx_reg(s, COND_AL, TCG_REG_TMP); + tcg_out_movi_pool(s, cond, TCG_REG_PC, addri); } /* @@ -1270,26 +1259,14 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *addr) if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) { if (arm_mode) { tcg_out_bl_imm(s, COND_AL, disp); - return; - } - if (use_armv5t_instructions) { + } else { tcg_out_blx_imm(s, disp); - return; } + return; } - if (use_armv5t_instructions) { - tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri); - tcg_out_blx_reg(s, COND_AL, TCG_REG_TMP); - } else if (arm_mode) { - /* ??? Know that movi_pool emits exactly 1 insn. */ - tcg_out_mov_reg(s, COND_AL, TCG_REG_R14, TCG_REG_PC); - tcg_out_movi_pool(s, COND_AL, TCG_REG_PC, addri); - } else { - tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri); - tcg_out_mov_reg(s, COND_AL, TCG_REG_R14, TCG_REG_PC); - tcg_out_bx_reg(s, COND_AL, TCG_REG_TMP); - } + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri); + tcg_out_blx_reg(s, COND_AL, TCG_REG_TMP); } static void tcg_out_goto_label(TCGContext *s, ARMCond cond, TCGLabel *l) diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index f41b809554..5c9ba5feea 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -28,7 +28,6 @@ extern int arm_arch; -#define use_armv5t_instructions (__ARM_ARCH >= 5 || arm_arch >= 5) #define use_armv6_instructions (__ARM_ARCH >= 6 || arm_arch >= 6) #define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7) @@ -109,7 +108,7 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 -#define TCG_TARGET_HAS_clz_i32 use_armv5t_instructions +#define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 use_armv7_instructions #define TCG_TARGET_HAS_ctpop_i32 0 #define TCG_TARGET_HAS_deposit_i32 use_armv7_instructions From bde2cdb59bcd3b0965a80597e72c835fcf5ef7f4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 2 Jan 2022 20:54:57 -0800 Subject: [PATCH 19/34] tcg/arm: Remove use_armv6_instructions This is now always true, since we require armv6. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 192 ++++++--------------------------------- tcg/arm/tcg-target.h | 1 - 2 files changed, 27 insertions(+), 166 deletions(-) diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index f3b635063f..9eb43407ea 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -923,17 +923,6 @@ static void tcg_out_dat_rIN(TCGContext *s, ARMCond cond, ARMInsn opc, static void tcg_out_mul32(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn, TCGReg rm) { - /* if ArchVersion() < 6 && d == n then UNPREDICTABLE; */ - if (!use_armv6_instructions && rd == rn) { - if (rd == rm) { - /* rd == rn == rm; copy an input to tmp first. */ - tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); - rm = rn = TCG_REG_TMP; - } else { - rn = rm; - rm = rd; - } - } /* mul */ tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn); } @@ -941,17 +930,6 @@ static void tcg_out_mul32(TCGContext *s, ARMCond cond, TCGReg rd, static void tcg_out_umull32(TCGContext *s, ARMCond cond, TCGReg rd0, TCGReg rd1, TCGReg rn, TCGReg rm) { - /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */ - if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) { - if (rd0 == rm || rd1 == rm) { - tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); - rn = TCG_REG_TMP; - } else { - TCGReg t = rn; - rn = rm; - rm = t; - } - } /* umull */ tcg_out32(s, (cond << 28) | 0x00800090 | (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); @@ -960,17 +938,6 @@ static void tcg_out_umull32(TCGContext *s, ARMCond cond, TCGReg rd0, static void tcg_out_smull32(TCGContext *s, ARMCond cond, TCGReg rd0, TCGReg rd1, TCGReg rn, TCGReg rm) { - /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */ - if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) { - if (rd0 == rm || rd1 == rm) { - tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); - rn = TCG_REG_TMP; - } else { - TCGReg t = rn; - rn = rm; - rm = t; - } - } /* smull */ tcg_out32(s, (cond << 28) | 0x00c00090 | (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); @@ -990,15 +957,8 @@ static void tcg_out_udiv(TCGContext *s, ARMCond cond, static void tcg_out_ext8s(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn) { - if (use_armv6_instructions) { - /* sxtb */ - tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rn, SHIFT_IMM_LSL(24)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rd, SHIFT_IMM_ASR(24)); - } + /* sxtb */ + tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn); } static void __attribute__((unused)) @@ -1009,113 +969,37 @@ tcg_out_ext8u(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn) static void tcg_out_ext16s(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn) { - if (use_armv6_instructions) { - /* sxth */ - tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rn, SHIFT_IMM_LSL(16)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rd, SHIFT_IMM_ASR(16)); - } + /* sxth */ + tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn); } static void tcg_out_ext16u(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn) { - if (use_armv6_instructions) { - /* uxth */ - tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rn, SHIFT_IMM_LSL(16)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rd, SHIFT_IMM_LSR(16)); - } + /* uxth */ + tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn); } static void tcg_out_bswap16(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn, int flags) { - if (use_armv6_instructions) { - if (flags & TCG_BSWAP_OS) { - /* revsh */ - tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn); - return; - } - - /* rev16 */ - tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); - if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { - /* uxth */ - tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rd); - } + if (flags & TCG_BSWAP_OS) { + /* revsh */ + tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn); return; } - if (flags == 0) { - /* - * For stores, no input or output extension: - * rn = xxAB - * lsr tmp, rn, #8 tmp = 0xxA - * and tmp, tmp, #0xff tmp = 000A - * orr rd, tmp, rn, lsl #8 rd = xABA - */ - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8)); - tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff); - tcg_out_dat_reg(s, cond, ARITH_ORR, - rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8)); - return; + /* rev16 */ + tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); + if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + /* uxth */ + tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rd); } - - /* - * Byte swap, leaving the result at the top of the register. - * We will then shift down, zero or sign-extending. - */ - if (flags & TCG_BSWAP_IZ) { - /* - * rn = 00AB - * ror tmp, rn, #8 tmp = B00A - * orr tmp, tmp, tmp, lsl #16 tmp = BA00 - */ - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, rn, SHIFT_IMM_ROR(8)); - tcg_out_dat_reg(s, cond, ARITH_ORR, - TCG_REG_TMP, TCG_REG_TMP, TCG_REG_TMP, - SHIFT_IMM_LSL(16)); - } else { - /* - * rn = xxAB - * and tmp, rn, #0xff00 tmp = 00A0 - * lsl tmp, tmp, #8 tmp = 0A00 - * orr tmp, tmp, rn, lsl #24 tmp = BA00 - */ - tcg_out_dat_rI(s, cond, ARITH_AND, TCG_REG_TMP, rn, 0xff00, 1); - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSL(8)); - tcg_out_dat_reg(s, cond, ARITH_ORR, - TCG_REG_TMP, TCG_REG_TMP, rn, SHIFT_IMM_LSL(24)); - } - tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, TCG_REG_TMP, - (flags & TCG_BSWAP_OS - ? SHIFT_IMM_ASR(8) : SHIFT_IMM_LSR(8))); } static void tcg_out_bswap32(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn) { - if (use_armv6_instructions) { - /* rev */ - tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_EOR, - TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16)); - tcg_out_dat_imm(s, cond, ARITH_BIC, - TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800); - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rn, SHIFT_IMM_ROR(8)); - tcg_out_dat_reg(s, cond, ARITH_EOR, - rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8)); - } + /* rev */ + tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn); } static void tcg_out_deposit(TCGContext *s, ARMCond cond, TCGReg rd, @@ -1283,7 +1167,7 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0) { if (use_armv7_instructions) { tcg_out32(s, INSN_DMB_ISH); - } else if (use_armv6_instructions) { + } else { tcg_out32(s, INSN_DMB_MCR); } } @@ -1489,8 +1373,7 @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg, if (argreg & 1) { argreg++; } - if (use_armv6_instructions && argreg >= 4 - && (arglo & 1) == 0 && arghi == arglo + 1) { + if (argreg >= 4 && (arglo & 1) == 0 && arghi == arglo + 1) { tcg_out_strd_8(s, COND_AL, arglo, TCG_REG_CALL_STACK, (argreg - 4) * 4); return argreg + 2; @@ -1520,8 +1403,6 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read) : offsetof(CPUTLBEntry, addr_write)); int fast_off = TLB_MASK_TABLE_OFS(mem_index); - int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); - int table_off = fast_off + offsetof(CPUTLBDescFast, table); unsigned s_bits = opc & MO_SIZE; unsigned a_bits = get_alignment_bits(opc); @@ -1534,12 +1415,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, } /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */ - if (use_armv6_instructions) { - tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off); - } else { - tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R0, TCG_AREG0, mask_off); - tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R1, TCG_AREG0, table_off); - } + tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off); /* Extract the tlb index from the address into R0. */ tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo, @@ -1550,7 +1426,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, * Load the tlb comparator into R2/R3 and the fast path addend into R1. */ if (cmp_off == 0) { - if (use_armv6_instructions && TARGET_LONG_BITS == 64) { + if (TARGET_LONG_BITS == 64) { tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0); } else { tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0); @@ -1558,15 +1434,12 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, } else { tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0); - if (use_armv6_instructions && TARGET_LONG_BITS == 64) { + if (TARGET_LONG_BITS == 64) { tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off); } else { tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off); } } - if (!use_armv6_instructions && TARGET_LONG_BITS == 64) { - tcg_out_ld32_12(s, COND_AL, TCG_REG_R3, TCG_REG_R1, cmp_off + 4); - } /* Load the tlb addend. */ tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1, @@ -1631,7 +1504,6 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) TCGReg argreg, datalo, datahi; MemOpIdx oi = lb->oi; MemOp opc = get_memop(oi); - void *func; if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { return false; @@ -1646,18 +1518,8 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) argreg = tcg_out_arg_imm32(s, argreg, oi); argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); - /* For armv6 we can use the canonical unsigned helpers and minimize - icache usage. For pre-armv6, use the signed helpers since we do - not have a single insn sign-extend. */ - if (use_armv6_instructions) { - func = qemu_ld_helpers[opc & MO_SIZE]; - } else { - func = qemu_ld_helpers[opc & MO_SSIZE]; - if (opc & MO_SIGN) { - opc = MO_UL; - } - } - tcg_out_call(s, func); + /* Use the canonical unsigned helpers and minimize icache usage. */ + tcg_out_call(s, qemu_ld_helpers[opc & MO_SIZE]); datalo = lb->datalo_reg; datahi = lb->datahi_reg; @@ -1760,7 +1622,7 @@ static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc, break; case MO_UQ: /* Avoid ldrd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU && use_armv6_instructions + if (USING_SOFTMMU && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend); } else if (datalo != addend) { @@ -1803,7 +1665,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo, break; case MO_UQ: /* Avoid ldrd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU && use_armv6_instructions + if (USING_SOFTMMU && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_ldrd_8(s, COND_AL, datalo, addrlo, 0); } else if (datalo == addrlo) { @@ -1880,7 +1742,7 @@ static void tcg_out_qemu_st_index(TCGContext *s, ARMCond cond, MemOp opc, break; case MO_64: /* Avoid strd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU && use_armv6_instructions + if (USING_SOFTMMU && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_strd_r(s, cond, datalo, addrlo, addend); } else { @@ -1912,7 +1774,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo, break; case MO_64: /* Avoid strd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU && use_armv6_instructions + if (USING_SOFTMMU && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0); } else { diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 5c9ba5feea..1dd4cd5377 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -28,7 +28,6 @@ extern int arm_arch; -#define use_armv6_instructions (__ARM_ARCH >= 6 || arm_arch >= 6) #define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7) #undef TCG_TARGET_STACK_GROWSUP From 367d43d85b8a0f6262125ccbad8720d02416265e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 2 Jan 2022 21:26:17 -0800 Subject: [PATCH 20/34] tcg/arm: Check alignment for ldrd and strd We will shortly allow the use of unaligned memory accesses, and these require proper alignment. Use get_alignment_bits to verify and remove USING_SOFTMMU. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 9eb43407ea..4b0b4f4c2f 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -34,13 +34,6 @@ bool use_idiv_instructions; bool use_neon_instructions; #endif -/* ??? Ought to think about changing CONFIG_SOFTMMU to always defined. */ -#ifdef CONFIG_SOFTMMU -# define USING_SOFTMMU 1 -#else -# define USING_SOFTMMU 0 -#endif - #ifdef CONFIG_DEBUG_TCG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", @@ -1621,8 +1614,8 @@ static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc, tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend); break; case MO_UQ: - /* Avoid ldrd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU + /* LDRD requires alignment; double-check that. */ + if (get_alignment_bits(opc) >= MO_64 && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend); } else if (datalo != addend) { @@ -1664,8 +1657,8 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo, tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0); break; case MO_UQ: - /* Avoid ldrd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU + /* LDRD requires alignment; double-check that. */ + if (get_alignment_bits(opc) >= MO_64 && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_ldrd_8(s, COND_AL, datalo, addrlo, 0); } else if (datalo == addrlo) { @@ -1741,8 +1734,8 @@ static void tcg_out_qemu_st_index(TCGContext *s, ARMCond cond, MemOp opc, tcg_out_st32_r(s, cond, datalo, addrlo, addend); break; case MO_64: - /* Avoid strd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU + /* STRD requires alignment; double-check that. */ + if (get_alignment_bits(opc) >= MO_64 && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_strd_r(s, cond, datalo, addrlo, addend); } else { @@ -1773,8 +1766,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo, tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); break; case MO_64: - /* Avoid strd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU + /* STRD requires alignment; double-check that. */ + if (get_alignment_bits(opc) >= MO_64 && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0); } else { From 8821ec2323dd8793d840fd455c5e20e144bddc9b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 7 Aug 2021 18:19:14 -1000 Subject: [PATCH 21/34] tcg/arm: Support unaligned access for softmmu From armv6, the architecture supports unaligned accesses. All we need to do is perform the correct alignment check in tcg_out_tlb_read. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 41 ++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 4b0b4f4c2f..d290b4556c 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1396,16 +1396,9 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read) : offsetof(CPUTLBEntry, addr_write)); int fast_off = TLB_MASK_TABLE_OFS(mem_index); - unsigned s_bits = opc & MO_SIZE; - unsigned a_bits = get_alignment_bits(opc); - - /* - * We don't support inline unaligned acceses, but we can easily - * support overalignment checks. - */ - if (a_bits < s_bits) { - a_bits = s_bits; - } + unsigned s_mask = (1 << (opc & MO_SIZE)) - 1; + unsigned a_mask = (1 << get_alignment_bits(opc)) - 1; + TCGReg t_addr; /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off); @@ -1440,27 +1433,35 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, /* * Check alignment, check comparators. - * Do this in no more than 3 insns. Use MOVW for v7, if possible, + * Do this in 2-4 insns. Use MOVW for v7, if possible, * to reduce the number of sequential conditional instructions. * Almost all guests have at least 4k pages, which means that we need * to clear at least 9 bits even for an 8-byte memory, which means it * isn't worth checking for an immediate operand for BIC. + * + * For unaligned accesses, test the page of the last unit of alignment. + * This leaves the least significant alignment bits unchanged, and of + * course must be zero. */ + t_addr = addrlo; + if (a_mask < s_mask) { + t_addr = TCG_REG_R0; + tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr, + addrlo, s_mask - a_mask); + } if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) { - tcg_target_ulong mask = ~(TARGET_PAGE_MASK | ((1 << a_bits) - 1)); - - tcg_out_movi32(s, COND_AL, TCG_REG_TMP, mask); + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask)); tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP, - addrlo, TCG_REG_TMP, 0); + t_addr, TCG_REG_TMP, 0); tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0); } else { - if (a_bits) { - tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, - (1 << a_bits) - 1); + if (a_mask) { + tcg_debug_assert(a_mask <= 0xff); + tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask); } - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, addrlo, + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr, SHIFT_IMM_LSR(TARGET_PAGE_BITS)); - tcg_out_dat_reg(s, (a_bits ? COND_EQ : COND_AL), ARITH_CMP, + tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); } From 4bb802073f212b5a7e7cffcd4456484932911b91 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 9 Aug 2021 17:44:50 -1000 Subject: [PATCH 22/34] tcg/arm: Reserve a register for guest_base Reserve a register for the guest_base using aarch64 for reference. By doing so, we do not have to recompute it for every memory load. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index d290b4556c..7eebbfaf02 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -84,6 +84,9 @@ static const int tcg_target_call_oarg_regs[2] = { #define TCG_REG_TMP TCG_REG_R12 #define TCG_VEC_TMP TCG_REG_Q15 +#ifndef CONFIG_SOFTMMU +#define TCG_REG_GUEST_BASE TCG_REG_R11 +#endif typedef enum { COND_EQ = 0x0, @@ -1593,7 +1596,8 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc, TCGReg datalo, TCGReg datahi, - TCGReg addrlo, TCGReg addend) + TCGReg addrlo, TCGReg addend, + bool scratch_addend) { /* Byte swapping is left to middle-end expansion. */ tcg_debug_assert((opc & MO_BSWAP) == 0); @@ -1619,7 +1623,7 @@ static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc, if (get_alignment_bits(opc) >= MO_64 && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend); - } else if (datalo != addend) { + } else if (scratch_addend) { tcg_out_ld32_rwb(s, COND_AL, datalo, addend, addrlo); tcg_out_ld32_12(s, COND_AL, datahi, addend, 4); } else { @@ -1703,14 +1707,14 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) label_ptr = s->code_ptr; tcg_out_bl_imm(s, COND_NE, 0); - tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend); + tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend, true); add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ if (guest_base) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base); - tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP); + tcg_out_qemu_ld_index(s, opc, datalo, datahi, + addrlo, TCG_REG_GUEST_BASE, false); } else { tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo); } @@ -1719,7 +1723,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) static void tcg_out_qemu_st_index(TCGContext *s, ARMCond cond, MemOp opc, TCGReg datalo, TCGReg datahi, - TCGReg addrlo, TCGReg addend) + TCGReg addrlo, TCGReg addend, + bool scratch_addend) { /* Byte swapping is left to middle-end expansion. */ tcg_debug_assert((opc & MO_BSWAP) == 0); @@ -1739,9 +1744,14 @@ static void tcg_out_qemu_st_index(TCGContext *s, ARMCond cond, MemOp opc, if (get_alignment_bits(opc) >= MO_64 && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_strd_r(s, cond, datalo, addrlo, addend); - } else { + } else if (scratch_addend) { tcg_out_st32_rwb(s, cond, datalo, addend, addrlo); tcg_out_st32_12(s, cond, datahi, addend, 4); + } else { + tcg_out_dat_reg(s, cond, ARITH_ADD, TCG_REG_TMP, + addend, addrlo, SHIFT_IMM_LSL(0)); + tcg_out_st32_12(s, cond, datalo, TCG_REG_TMP, 0); + tcg_out_st32_12(s, cond, datahi, TCG_REG_TMP, 4); } break; default: @@ -1804,7 +1814,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) mem_index = get_mmuidx(oi); addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 0); - tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend); + tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, + addrlo, addend, true); /* The conditional call must come last, as we're going to return here. */ label_ptr = s->code_ptr; @@ -1814,9 +1825,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ if (guest_base) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base); - tcg_out_qemu_st_index(s, COND_AL, opc, datalo, - datahi, addrlo, TCG_REG_TMP); + tcg_out_qemu_st_index(s, COND_AL, opc, datalo, datahi, + addrlo, TCG_REG_GUEST_BASE, false); } else { tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo); } @@ -2958,6 +2968,13 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); +#ifndef CONFIG_SOFTMMU + if (guest_base) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); + } +#endif + tcg_out_b_reg(s, COND_AL, tcg_target_call_iarg_regs[1]); /* From 0c90fa5dce29243c06841d7b07ff2bd97c27c1f4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 9 Aug 2021 19:18:27 -1000 Subject: [PATCH 23/34] tcg/arm: Support raising sigbus for user-only Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 83 +++++++++++++++++++++++++++++++++++++++- tcg/arm/tcg-target.h | 2 - 2 files changed, 81 insertions(+), 4 deletions(-) diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 7eebbfaf02..e1ea69669c 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -23,6 +23,7 @@ */ #include "elf.h" +#include "../tcg-ldst.c.inc" #include "../tcg-pool.c.inc" int arm_arch = __ARM_ARCH; @@ -1289,8 +1290,6 @@ static void tcg_out_vldst(TCGContext *s, ARMInsn insn, } #ifdef CONFIG_SOFTMMU -#include "../tcg-ldst.c.inc" - /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, * int mmu_idx, uintptr_t ra) */ @@ -1592,6 +1591,74 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]); return true; } +#else + +static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo, + TCGReg addrhi, unsigned a_bits) +{ + unsigned a_mask = (1 << a_bits) - 1; + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->addrlo_reg = addrlo; + label->addrhi_reg = addrhi; + + /* We are expecting a_bits to max out at 7, and can easily support 8. */ + tcg_debug_assert(a_mask <= 0xff); + /* tst addr, #mask */ + tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask); + + /* blne slow_path */ + label->label_ptr[0] = s->code_ptr; + tcg_out_bl_imm(s, COND_NE, 0); + + label->raddr = tcg_splitwx_to_rx(s->code_ptr); +} + +static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) +{ + if (!reloc_pc24(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { + return false; + } + + if (TARGET_LONG_BITS == 64) { + /* 64-bit target address is aligned into R2:R3. */ + if (l->addrhi_reg != TCG_REG_R2) { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, l->addrlo_reg); + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, l->addrhi_reg); + } else if (l->addrlo_reg != TCG_REG_R3) { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, l->addrhi_reg); + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, l->addrlo_reg); + } else { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, TCG_REG_R2); + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, TCG_REG_R3); + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, TCG_REG_R1); + } + } else { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, l->addrlo_reg); + } + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_AREG0); + + /* + * Tail call to the helper, with the return address back inline, + * just for the clarity of the debugging traceback -- the helper + * cannot return. We have used BLNE to arrive here, so LR is + * already set. + */ + tcg_out_goto(s, COND_AL, (const void *) + (l->is_ld ? helper_unaligned_ld : helper_unaligned_st)); + return true; +} + +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} + +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} #endif /* SOFTMMU */ static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc, @@ -1689,6 +1756,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) int mem_index; TCGReg addend; tcg_insn_unit *label_ptr; +#else + unsigned a_bits; #endif datalo = *args++; @@ -1712,6 +1781,10 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ + a_bits = get_alignment_bits(opc); + if (a_bits) { + tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits); + } if (guest_base) { tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_GUEST_BASE, false); @@ -1801,6 +1874,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) int mem_index; TCGReg addend; tcg_insn_unit *label_ptr; +#else + unsigned a_bits; #endif datalo = *args++; @@ -1824,6 +1899,10 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ + a_bits = get_alignment_bits(opc); + if (a_bits) { + tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits); + } if (guest_base) { tcg_out_qemu_st_index(s, COND_AL, opc, datalo, datahi, addrlo, TCG_REG_GUEST_BASE, false); diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 1dd4cd5377..27c27a1f14 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -151,9 +151,7 @@ extern bool use_neon_instructions; /* not defined -- call should be eliminated at compile time */ void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); -#ifdef CONFIG_SOFTMMU #define TCG_TARGET_NEED_LDST_LABELS -#endif #define TCG_TARGET_NEED_POOL_LABELS #endif From 23a79c113ed2ae693d882d109862f4a759fbf10e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 6 Aug 2021 05:49:16 -1000 Subject: [PATCH 24/34] tcg/mips: Support unaligned access for user-only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is kinda sorta the opposite of the other tcg hosts, where we get (normal) alignment checks for free with host SIGBUS and need to add code to support unaligned accesses. Fortunately, the ISA contains pairs of instructions that are used to implement unaligned memory accesses. Use them. Tested-by: Jiaxun Yang Reviewed-by: Jiaxun Yang Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c.inc | 334 +++++++++++++++++++++++++++++++++++++- tcg/mips/tcg-target.h | 2 - 2 files changed, 328 insertions(+), 8 deletions(-) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 27b020e66c..2c94ac2ed6 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -24,6 +24,8 @@ * THE SOFTWARE. */ +#include "../tcg-ldst.c.inc" + #ifdef HOST_WORDS_BIGENDIAN # define MIPS_BE 1 #else @@ -230,16 +232,26 @@ typedef enum { OPC_ORI = 015 << 26, OPC_XORI = 016 << 26, OPC_LUI = 017 << 26, + OPC_BNEL = 025 << 26, + OPC_BNEZALC_R6 = 030 << 26, OPC_DADDIU = 031 << 26, + OPC_LDL = 032 << 26, + OPC_LDR = 033 << 26, OPC_LB = 040 << 26, OPC_LH = 041 << 26, + OPC_LWL = 042 << 26, OPC_LW = 043 << 26, OPC_LBU = 044 << 26, OPC_LHU = 045 << 26, + OPC_LWR = 046 << 26, OPC_LWU = 047 << 26, OPC_SB = 050 << 26, OPC_SH = 051 << 26, + OPC_SWL = 052 << 26, OPC_SW = 053 << 26, + OPC_SDL = 054 << 26, + OPC_SDR = 055 << 26, + OPC_SWR = 056 << 26, OPC_LD = 067 << 26, OPC_SD = 077 << 26, @@ -1015,8 +1027,6 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg) } #if defined(CONFIG_SOFTMMU) -#include "../tcg-ldst.c.inc" - static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = { [MO_UB] = helper_ret_ldub_mmu, [MO_SB] = helper_ret_ldsb_mmu, @@ -1324,7 +1334,82 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); return true; } -#endif + +#else + +static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo, + TCGReg addrhi, unsigned a_bits) +{ + unsigned a_mask = (1 << a_bits) - 1; + TCGLabelQemuLdst *l = new_ldst_label(s); + + l->is_ld = is_ld; + l->addrlo_reg = addrlo; + l->addrhi_reg = addrhi; + + /* We are expecting a_bits to max out at 7, much lower than ANDI. */ + tcg_debug_assert(a_bits < 16); + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, addrlo, a_mask); + + l->label_ptr[0] = s->code_ptr; + if (use_mips32r6_instructions) { + tcg_out_opc_br(s, OPC_BNEZALC_R6, TCG_REG_ZERO, TCG_TMP0); + } else { + tcg_out_opc_br(s, OPC_BNEL, TCG_TMP0, TCG_REG_ZERO); + tcg_out_nop(s); + } + + l->raddr = tcg_splitwx_to_rx(s->code_ptr); +} + +static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) +{ + void *target; + + if (!reloc_pc16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { + return false; + } + + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { + /* A0 is env, A1 is skipped, A2:A3 is the uint64_t address. */ + TCGReg a2 = MIPS_BE ? l->addrhi_reg : l->addrlo_reg; + TCGReg a3 = MIPS_BE ? l->addrlo_reg : l->addrhi_reg; + + if (a3 != TCG_REG_A2) { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, a2); + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A3, a3); + } else if (a2 != TCG_REG_A3) { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A3, a3); + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, a2); + } else { + tcg_out_mov(s, TCG_TYPE_I32, TCG_TMP0, TCG_REG_A2); + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, TCG_REG_A3); + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A3, TCG_TMP0); + } + } else { + tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_A1, l->addrlo_reg); + } + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0); + + /* + * Tail call to the helper, with the return address back inline. + * We have arrived here via BNEL, so $31 is already set. + */ + target = (l->is_ld ? helper_unaligned_ld : helper_unaligned_st); + tcg_out_call_int(s, target, true); + return true; +} + +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} + +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + return tcg_out_fail_alignment(s, l); +} +#endif /* SOFTMMU */ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi, TCGReg base, MemOp opc, bool is_64) @@ -1430,6 +1515,127 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi, } } +static void __attribute__((unused)) +tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi, + TCGReg base, MemOp opc, bool is_64) +{ + const MIPSInsn lw1 = MIPS_BE ? OPC_LWL : OPC_LWR; + const MIPSInsn lw2 = MIPS_BE ? OPC_LWR : OPC_LWL; + const MIPSInsn ld1 = MIPS_BE ? OPC_LDL : OPC_LDR; + const MIPSInsn ld2 = MIPS_BE ? OPC_LDR : OPC_LDL; + + bool sgn = (opc & MO_SIGN); + + switch (opc & (MO_SSIZE | MO_BSWAP)) { + case MO_SW | MO_BE: + case MO_UW | MO_BE: + tcg_out_opc_imm(s, sgn ? OPC_LB : OPC_LBU, TCG_TMP0, base, 0); + tcg_out_opc_imm(s, OPC_LBU, lo, base, 1); + if (use_mips32r2_instructions) { + tcg_out_opc_bf(s, OPC_INS, lo, TCG_TMP0, 31, 8); + } else { + tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8); + tcg_out_opc_reg(s, OPC_OR, lo, TCG_TMP0, TCG_TMP1); + } + break; + + case MO_SW | MO_LE: + case MO_UW | MO_LE: + if (use_mips32r2_instructions && lo != base) { + tcg_out_opc_imm(s, OPC_LBU, lo, base, 0); + tcg_out_opc_imm(s, sgn ? OPC_LB : OPC_LBU, TCG_TMP0, base, 1); + tcg_out_opc_bf(s, OPC_INS, lo, TCG_TMP0, 31, 8); + } else { + tcg_out_opc_imm(s, OPC_LBU, TCG_TMP0, base, 0); + tcg_out_opc_imm(s, sgn ? OPC_LB : OPC_LBU, TCG_TMP1, base, 1); + tcg_out_opc_sa(s, OPC_SLL, TCG_TMP1, TCG_TMP1, 8); + tcg_out_opc_reg(s, OPC_OR, lo, TCG_TMP0, TCG_TMP1); + } + break; + + case MO_SL: + case MO_UL: + tcg_out_opc_imm(s, lw1, lo, base, 0); + tcg_out_opc_imm(s, lw2, lo, base, 3); + if (TCG_TARGET_REG_BITS == 64 && is_64 && !sgn) { + tcg_out_ext32u(s, lo, lo); + } + break; + + case MO_UL | MO_BSWAP: + case MO_SL | MO_BSWAP: + if (use_mips32r2_instructions) { + tcg_out_opc_imm(s, lw1, lo, base, 0); + tcg_out_opc_imm(s, lw2, lo, base, 3); + tcg_out_bswap32(s, lo, lo, + TCG_TARGET_REG_BITS == 64 && is_64 + ? (sgn ? TCG_BSWAP_OS : TCG_BSWAP_OZ) : 0); + } else { + const tcg_insn_unit *subr = + (TCG_TARGET_REG_BITS == 64 && is_64 && !sgn + ? bswap32u_addr : bswap32_addr); + + tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0); + tcg_out_bswap_subr(s, subr); + /* delay slot */ + tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 3); + tcg_out_mov(s, is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32, lo, TCG_TMP3); + } + break; + + case MO_UQ: + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_opc_imm(s, ld1, lo, base, 0); + tcg_out_opc_imm(s, ld2, lo, base, 7); + } else { + tcg_out_opc_imm(s, lw1, MIPS_BE ? hi : lo, base, 0 + 0); + tcg_out_opc_imm(s, lw2, MIPS_BE ? hi : lo, base, 0 + 3); + tcg_out_opc_imm(s, lw1, MIPS_BE ? lo : hi, base, 4 + 0); + tcg_out_opc_imm(s, lw2, MIPS_BE ? lo : hi, base, 4 + 3); + } + break; + + case MO_UQ | MO_BSWAP: + if (TCG_TARGET_REG_BITS == 64) { + if (use_mips32r2_instructions) { + tcg_out_opc_imm(s, ld1, lo, base, 0); + tcg_out_opc_imm(s, ld2, lo, base, 7); + tcg_out_bswap64(s, lo, lo); + } else { + tcg_out_opc_imm(s, ld1, TCG_TMP0, base, 0); + tcg_out_bswap_subr(s, bswap64_addr); + /* delay slot */ + tcg_out_opc_imm(s, ld2, TCG_TMP0, base, 7); + tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3); + } + } else if (use_mips32r2_instructions) { + tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0 + 0); + tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 0 + 3); + tcg_out_opc_imm(s, lw1, TCG_TMP1, base, 4 + 0); + tcg_out_opc_imm(s, lw2, TCG_TMP1, base, 4 + 3); + tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, TCG_TMP0); + tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, TCG_TMP1); + tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? lo : hi, TCG_TMP0, 16); + tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? hi : lo, TCG_TMP1, 16); + } else { + tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0 + 0); + tcg_out_bswap_subr(s, bswap32_addr); + /* delay slot */ + tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 0 + 3); + tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 4 + 0); + tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? lo : hi, TCG_TMP3); + tcg_out_bswap_subr(s, bswap32_addr); + /* delay slot */ + tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 4 + 3); + tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? hi : lo, TCG_TMP3); + } + break; + + default: + g_assert_not_reached(); + } +} + static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) { TCGReg addr_regl, addr_regh __attribute__((unused)); @@ -1438,6 +1644,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) MemOp opc; #if defined(CONFIG_SOFTMMU) tcg_insn_unit *label_ptr[2]; +#else + unsigned a_bits, s_bits; #endif TCGReg base = TCG_REG_A0; @@ -1467,7 +1675,27 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) } else { tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_GUEST_BASE_REG, addr_regl); } - tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64); + a_bits = get_alignment_bits(opc); + s_bits = opc & MO_SIZE; + /* + * R6 removes the left/right instructions but requires the + * system to support misaligned memory accesses. + */ + if (use_mips32r6_instructions) { + if (a_bits) { + tcg_out_test_alignment(s, true, addr_regl, addr_regh, a_bits); + } + tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64); + } else { + if (a_bits && a_bits != s_bits) { + tcg_out_test_alignment(s, true, addr_regl, addr_regh, a_bits); + } + if (a_bits >= s_bits) { + tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64); + } else { + tcg_out_qemu_ld_unalign(s, data_regl, data_regh, base, opc, is_64); + } + } #endif } @@ -1532,6 +1760,79 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi, } } +static void __attribute__((unused)) +tcg_out_qemu_st_unalign(TCGContext *s, TCGReg lo, TCGReg hi, + TCGReg base, MemOp opc) +{ + const MIPSInsn sw1 = MIPS_BE ? OPC_SWL : OPC_SWR; + const MIPSInsn sw2 = MIPS_BE ? OPC_SWR : OPC_SWL; + const MIPSInsn sd1 = MIPS_BE ? OPC_SDL : OPC_SDR; + const MIPSInsn sd2 = MIPS_BE ? OPC_SDR : OPC_SDL; + + /* Don't clutter the code below with checks to avoid bswapping ZERO. */ + if ((lo | hi) == 0) { + opc &= ~MO_BSWAP; + } + + switch (opc & (MO_SIZE | MO_BSWAP)) { + case MO_16 | MO_BE: + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, lo, 8); + tcg_out_opc_imm(s, OPC_SB, TCG_TMP0, base, 0); + tcg_out_opc_imm(s, OPC_SB, lo, base, 1); + break; + + case MO_16 | MO_LE: + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, lo, 8); + tcg_out_opc_imm(s, OPC_SB, lo, base, 0); + tcg_out_opc_imm(s, OPC_SB, TCG_TMP0, base, 1); + break; + + case MO_32 | MO_BSWAP: + tcg_out_bswap32(s, TCG_TMP3, lo, 0); + lo = TCG_TMP3; + /* fall through */ + case MO_32: + tcg_out_opc_imm(s, sw1, lo, base, 0); + tcg_out_opc_imm(s, sw2, lo, base, 3); + break; + + case MO_64 | MO_BSWAP: + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_bswap64(s, TCG_TMP3, lo); + lo = TCG_TMP3; + } else if (use_mips32r2_instructions) { + tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, MIPS_BE ? hi : lo); + tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, MIPS_BE ? lo : hi); + tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP0, TCG_TMP0, 16); + tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP1, TCG_TMP1, 16); + hi = MIPS_BE ? TCG_TMP0 : TCG_TMP1; + lo = MIPS_BE ? TCG_TMP1 : TCG_TMP0; + } else { + tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? lo : hi, 0); + tcg_out_opc_imm(s, sw1, TCG_TMP3, base, 0 + 0); + tcg_out_opc_imm(s, sw2, TCG_TMP3, base, 0 + 3); + tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? hi : lo, 0); + tcg_out_opc_imm(s, sw1, TCG_TMP3, base, 4 + 0); + tcg_out_opc_imm(s, sw2, TCG_TMP3, base, 4 + 3); + break; + } + /* fall through */ + case MO_64: + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_opc_imm(s, sd1, lo, base, 0); + tcg_out_opc_imm(s, sd2, lo, base, 7); + } else { + tcg_out_opc_imm(s, sw1, MIPS_BE ? hi : lo, base, 0 + 0); + tcg_out_opc_imm(s, sw2, MIPS_BE ? hi : lo, base, 0 + 3); + tcg_out_opc_imm(s, sw1, MIPS_BE ? lo : hi, base, 4 + 0); + tcg_out_opc_imm(s, sw2, MIPS_BE ? lo : hi, base, 4 + 3); + } + break; + + default: + tcg_abort(); + } +} static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) { TCGReg addr_regl, addr_regh __attribute__((unused)); @@ -1540,6 +1841,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) MemOp opc; #if defined(CONFIG_SOFTMMU) tcg_insn_unit *label_ptr[2]; +#else + unsigned a_bits, s_bits; #endif TCGReg base = TCG_REG_A0; @@ -1558,7 +1861,6 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) data_regl, data_regh, addr_regl, addr_regh, s->code_ptr, label_ptr); #else - base = TCG_REG_A0; if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { tcg_out_ext32u(s, base, addr_regl); addr_regl = base; @@ -1570,7 +1872,27 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) } else { tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_GUEST_BASE_REG, addr_regl); } - tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); + a_bits = get_alignment_bits(opc); + s_bits = opc & MO_SIZE; + /* + * R6 removes the left/right instructions but requires the + * system to support misaligned memory accesses. + */ + if (use_mips32r6_instructions) { + if (a_bits) { + tcg_out_test_alignment(s, true, addr_regl, addr_regh, a_bits); + } + tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); + } else { + if (a_bits && a_bits != s_bits) { + tcg_out_test_alignment(s, true, addr_regl, addr_regh, a_bits); + } + if (a_bits >= s_bits) { + tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); + } else { + tcg_out_qemu_st_unalign(s, data_regl, data_regh, base, opc); + } + } #endif } diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index c366fdf74b..7669213175 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -207,8 +207,6 @@ extern bool use_mips32r2_instructions; void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t) QEMU_ERROR("code path is reachable"); -#ifdef CONFIG_SOFTMMU #define TCG_TARGET_NEED_LDST_LABELS -#endif #endif From d9e52834656ffa766113f1fbae8efe37aaac1df2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 6 Aug 2021 06:55:14 -1000 Subject: [PATCH 25/34] tcg/mips: Support unaligned access for softmmu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can use the routines just added for user-only to emit unaligned accesses in softmmu mode too. Tested-by: Jiaxun Yang Reviewed-by: Jiaxun Yang Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c.inc | 91 ++++++++++++++++++++++----------------- 1 file changed, 51 insertions(+), 40 deletions(-) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 2c94ac2ed6..993149d18a 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1134,8 +1134,10 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl, tcg_insn_unit *label_ptr[2], bool is_load) { MemOp opc = get_memop(oi); - unsigned s_bits = opc & MO_SIZE; unsigned a_bits = get_alignment_bits(opc); + unsigned s_bits = opc & MO_SIZE; + unsigned a_mask = (1 << a_bits) - 1; + unsigned s_mask = (1 << s_bits) - 1; int mem_index = get_mmuidx(oi); int fast_off = TLB_MASK_TABLE_OFS(mem_index); int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); @@ -1143,7 +1145,7 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl, int add_off = offsetof(CPUTLBEntry, addend); int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read) : offsetof(CPUTLBEntry, addr_write)); - target_ulong mask; + target_ulong tlb_mask; /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_AREG0, mask_off); @@ -1157,27 +1159,13 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl, /* Add the tlb_table pointer, creating the CPUTLBEntry address in TMP3. */ tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1); - /* We don't currently support unaligned accesses. - We could do so with mips32r6. */ - if (a_bits < s_bits) { - a_bits = s_bits; - } - - /* Mask the page bits, keeping the alignment bits to compare against. */ - mask = (target_ulong)TARGET_PAGE_MASK | ((1 << a_bits) - 1); - /* Load the (low-half) tlb comparator. */ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - tcg_out_ld(s, TCG_TYPE_I32, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF); - tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, mask); + tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF); } else { tcg_out_ldst(s, (TARGET_LONG_BITS == 64 ? OPC_LD : TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW), TCG_TMP0, TCG_TMP3, cmp_off); - tcg_out_movi(s, TCG_TYPE_TL, TCG_TMP1, mask); - /* No second compare is required here; - load the tlb addend for the fast path. */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off); } /* Zero extend a 32-bit guest address for a 64-bit host. */ @@ -1185,7 +1173,25 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl, tcg_out_ext32u(s, base, addrl); addrl = base; } - tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl); + + /* + * Mask the page bits, keeping the alignment bits to compare against. + * For unaligned accesses, compare against the end of the access to + * verify that it does not cross a page boundary. + */ + tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask; + tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, tlb_mask); + if (a_mask >= s_mask) { + tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl); + } else { + tcg_out_opc_imm(s, ALIAS_PADDI, TCG_TMP2, addrl, s_mask - a_mask); + tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2); + } + + if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) { + /* Load the tlb addend for the fast path. */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off); + } label_ptr[0] = s->code_ptr; tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0); @@ -1193,7 +1199,7 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl, /* Load and test the high half tlb comparator. */ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { /* delay slot */ - tcg_out_ld(s, TCG_TYPE_I32, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF); + tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF); /* Load the tlb addend for the fast path. */ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off); @@ -1515,8 +1521,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi, } } -static void __attribute__((unused)) -tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi, +static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi, TCGReg base, MemOp opc, bool is_64) { const MIPSInsn lw1 = MIPS_BE ? OPC_LWL : OPC_LWR; @@ -1645,8 +1650,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) #if defined(CONFIG_SOFTMMU) tcg_insn_unit *label_ptr[2]; #else - unsigned a_bits, s_bits; #endif + unsigned a_bits, s_bits; TCGReg base = TCG_REG_A0; data_regl = *args++; @@ -1655,10 +1660,20 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0); oi = *args++; opc = get_memop(oi); + a_bits = get_alignment_bits(opc); + s_bits = opc & MO_SIZE; + /* + * R6 removes the left/right instructions but requires the + * system to support misaligned memory accesses. + */ #if defined(CONFIG_SOFTMMU) tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 1); - tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64); + if (use_mips32r6_instructions || a_bits >= s_bits) { + tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64); + } else { + tcg_out_qemu_ld_unalign(s, data_regl, data_regh, base, opc, is_64); + } add_qemu_ldst_label(s, 1, oi, (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32), data_regl, data_regh, addr_regl, addr_regh, @@ -1675,12 +1690,6 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) } else { tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_GUEST_BASE_REG, addr_regl); } - a_bits = get_alignment_bits(opc); - s_bits = opc & MO_SIZE; - /* - * R6 removes the left/right instructions but requires the - * system to support misaligned memory accesses. - */ if (use_mips32r6_instructions) { if (a_bits) { tcg_out_test_alignment(s, true, addr_regl, addr_regh, a_bits); @@ -1760,8 +1769,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi, } } -static void __attribute__((unused)) -tcg_out_qemu_st_unalign(TCGContext *s, TCGReg lo, TCGReg hi, +static void tcg_out_qemu_st_unalign(TCGContext *s, TCGReg lo, TCGReg hi, TCGReg base, MemOp opc) { const MIPSInsn sw1 = MIPS_BE ? OPC_SWL : OPC_SWR; @@ -1841,9 +1849,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) MemOp opc; #if defined(CONFIG_SOFTMMU) tcg_insn_unit *label_ptr[2]; -#else - unsigned a_bits, s_bits; #endif + unsigned a_bits, s_bits; TCGReg base = TCG_REG_A0; data_regl = *args++; @@ -1852,10 +1859,20 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0); oi = *args++; opc = get_memop(oi); + a_bits = get_alignment_bits(opc); + s_bits = opc & MO_SIZE; + /* + * R6 removes the left/right instructions but requires the + * system to support misaligned memory accesses. + */ #if defined(CONFIG_SOFTMMU) tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 0); - tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); + if (use_mips32r6_instructions || a_bits >= s_bits) { + tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); + } else { + tcg_out_qemu_st_unalign(s, data_regl, data_regh, base, opc); + } add_qemu_ldst_label(s, 0, oi, (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32), data_regl, data_regh, addr_regl, addr_regh, @@ -1872,12 +1889,6 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) } else { tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_GUEST_BASE_REG, addr_regl); } - a_bits = get_alignment_bits(opc); - s_bits = opc & MO_SIZE; - /* - * R6 removes the left/right instructions but requires the - * system to support misaligned memory accesses. - */ if (use_mips32r6_instructions) { if (a_bits) { tcg_out_test_alignment(s, true, addr_regl, addr_regh, a_bits); From 414399b6b8a02f03d3b0cba5cfa9205dff618a9b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 8 Feb 2022 10:15:58 +1100 Subject: [PATCH 26/34] tcg/sparc: Use tcg_out_movi_imm13 in tcg_out_addsub2_i64 When BH is constant, it is constrained to 11 bits for use in MOVCC. For the cases in which we must load the constant BH into a register, we do not need the full logic of tcg_out_movi; we can use the simpler function for emitting a 13 bit constant. This eliminates the only case in which TCG_REG_T2 was passed to tcg_out_movi, which will shortly become invalid. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/sparc/tcg-target.c.inc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc index 0c062c60eb..8d5992ef29 100644 --- a/tcg/sparc/tcg-target.c.inc +++ b/tcg/sparc/tcg-target.c.inc @@ -795,7 +795,7 @@ static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh, if (use_vis3_instructions && !is_sub) { /* Note that ADDXC doesn't accept immediates. */ if (bhconst && bh != 0) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh); + tcg_out_movi_imm13(s, TCG_REG_T2, bh); bh = TCG_REG_T2; } tcg_out_arith(s, rh, ah, bh, ARITH_ADDXC); @@ -811,9 +811,13 @@ static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh, tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, rh, ah, 0); } } else { - /* Otherwise adjust BH as if there is carry into T2 ... */ + /* + * Otherwise adjust BH as if there is carry into T2. + * Note that constant BH is constrained to 11 bits for the MOVCC, + * so the adjustment fits 12 bits. + */ if (bhconst) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh + (is_sub ? -1 : 1)); + tcg_out_movi_imm13(s, TCG_REG_T2, bh + (is_sub ? -1 : 1)); } else { tcg_out_arithi(s, TCG_REG_T2, bh, 1, is_sub ? ARITH_SUB : ARITH_ADD); From c71929c345d08fa70edb1eeaa9babaaf5d6a9bf7 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 8 Feb 2022 10:46:40 +1100 Subject: [PATCH 27/34] tcg/sparc: Split out tcg_out_movi_imm32 Handle 32-bit constants with a separate function, so that tcg_out_movi_int does not need to recurse. This slightly rearranges the order of tests for small constants, but produces the same output. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/sparc/tcg-target.c.inc | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc index 8d5992ef29..2f7c8dcb0a 100644 --- a/tcg/sparc/tcg-target.c.inc +++ b/tcg/sparc/tcg-target.c.inc @@ -413,15 +413,30 @@ static void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg) tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR); } +static void tcg_out_movi_imm32(TCGContext *s, TCGReg ret, int32_t arg) +{ + if (check_fit_i32(arg, 13)) { + /* A 13-bit constant sign-extended to 64-bits. */ + tcg_out_movi_imm13(s, ret, arg); + } else { + /* A 32-bit constant zero-extended to 64 bits. */ + tcg_out_sethi(s, ret, arg); + if (arg & 0x3ff) { + tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR); + } + } +} + static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg, bool in_prologue) { tcg_target_long hi, lo = (int32_t)arg; tcg_target_long test, lsb; - /* Make sure we test 32-bit constants for imm13 properly. */ - if (type == TCG_TYPE_I32) { - arg = lo; + /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */ + if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) { + tcg_out_movi_imm32(s, ret, arg); + return; } /* A 13-bit constant sign-extended to 64-bits. */ @@ -439,15 +454,6 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, } } - /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */ - if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) { - tcg_out_sethi(s, ret, arg); - if (arg & 0x3ff) { - tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR); - } - return; - } - /* A 32-bit constant sign-extended to 64-bits. */ if (arg == lo) { tcg_out_sethi(s, ret, ~arg); @@ -471,13 +477,13 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, /* A 64-bit constant decomposed into 2 32-bit pieces. */ if (check_fit_i32(lo, 13)) { hi = (arg - lo) >> 32; - tcg_out_movi(s, TCG_TYPE_I32, ret, hi); + tcg_out_movi_imm32(s, ret, hi); tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX); tcg_out_arithi(s, ret, ret, lo, ARITH_ADD); } else { hi = arg >> 32; - tcg_out_movi(s, TCG_TYPE_I32, ret, hi); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T2, lo); + tcg_out_movi_imm32(s, ret, hi); + tcg_out_movi_imm32(s, TCG_REG_T2, lo); tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX); tcg_out_arith(s, ret, ret, TCG_REG_T2, ARITH_OR); } From 92840d06faeed2038b90d5b168c18d73ca3a44b8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 5 Aug 2021 02:16:05 +0300 Subject: [PATCH 28/34] tcg/sparc: Add scratch argument to tcg_out_movi_int This will allow us to control exactly what scratch register is used for loading the constant. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/sparc/tcg-target.c.inc | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc index 2f7c8dcb0a..7a8f20ee9a 100644 --- a/tcg/sparc/tcg-target.c.inc +++ b/tcg/sparc/tcg-target.c.inc @@ -428,7 +428,8 @@ static void tcg_out_movi_imm32(TCGContext *s, TCGReg ret, int32_t arg) } static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, - tcg_target_long arg, bool in_prologue) + tcg_target_long arg, bool in_prologue, + TCGReg scratch) { tcg_target_long hi, lo = (int32_t)arg; tcg_target_long test, lsb; @@ -483,16 +484,17 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, } else { hi = arg >> 32; tcg_out_movi_imm32(s, ret, hi); - tcg_out_movi_imm32(s, TCG_REG_T2, lo); + tcg_out_movi_imm32(s, scratch, lo); tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX); - tcg_out_arith(s, ret, ret, TCG_REG_T2, ARITH_OR); + tcg_out_arith(s, ret, ret, scratch, ARITH_OR); } } static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg) { - tcg_out_movi_int(s, type, ret, arg, false); + tcg_debug_assert(ret != TCG_REG_T2); + tcg_out_movi_int(s, type, ret, arg, false, TCG_REG_T2); } static void tcg_out_ldst_rr(TCGContext *s, TCGReg data, TCGReg a1, @@ -847,7 +849,7 @@ static void tcg_out_call_nodelay(TCGContext *s, const tcg_insn_unit *dest, } else { uintptr_t desti = (uintptr_t)dest; tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_REG_T1, - desti & ~0xfff, in_prologue); + desti & ~0xfff, in_prologue, TCG_REG_O7); tcg_out_arithi(s, TCG_REG_O7, TCG_REG_T1, desti & 0xfff, JMPL); } } @@ -1023,7 +1025,8 @@ static void tcg_target_qemu_prologue(TCGContext *s) #ifndef CONFIG_SOFTMMU if (guest_base != 0) { - tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true); + tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, + guest_base, true, TCG_REG_T1); tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); } #endif From 684db2a0b04ff024d0a275de85982fe5892185d3 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 5 Aug 2021 04:09:15 +0300 Subject: [PATCH 29/34] tcg/sparc: Improve code gen for shifted 32-bit constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We had code for checking for 13 and 21-bit shifted constants, but we can do better and allow 32-bit shifted constants. This is still 2 insns shorter than the full 64-bit sequence. Reviewed-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/sparc/tcg-target.c.inc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc index 7a8f20ee9a..ed2f4ecc40 100644 --- a/tcg/sparc/tcg-target.c.inc +++ b/tcg/sparc/tcg-target.c.inc @@ -462,17 +462,17 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, return; } - /* A 21-bit constant, shifted. */ + /* A 32-bit constant, shifted. */ lsb = ctz64(arg); test = (tcg_target_long)arg >> lsb; - if (check_fit_tl(test, 13)) { - tcg_out_movi_imm13(s, ret, test); - tcg_out_arithi(s, ret, ret, lsb, SHIFT_SLLX); - return; - } else if (lsb > 10 && test == extract64(test, 0, 21)) { + if (lsb > 10 && test == extract64(test, 0, 21)) { tcg_out_sethi(s, ret, test << 10); tcg_out_arithi(s, ret, ret, lsb - 10, SHIFT_SLLX); return; + } else if (test == (uint32_t)test || test == (int32_t)test) { + tcg_out_movi_int(s, TCG_TYPE_I64, ret, test, in_prologue, scratch); + tcg_out_arithi(s, ret, ret, lsb, SHIFT_SLLX); + return; } /* A 64-bit constant decomposed into 2 32-bit pieces. */ From 6a6bfa3c60ba70e870f8355b9d3508915c90c13a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 5 Feb 2022 08:12:30 +0300 Subject: [PATCH 30/34] tcg/sparc: Convert patch_reloc to return bool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since 7ecd02a06f8, if patch_reloc fails we restart translation with a smaller TB. SPARC had its function signature changed, but not the logic. Replace assert with return false. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/sparc/tcg-target.c.inc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc index ed2f4ecc40..213aba4be6 100644 --- a/tcg/sparc/tcg-target.c.inc +++ b/tcg/sparc/tcg-target.c.inc @@ -323,12 +323,16 @@ static bool patch_reloc(tcg_insn_unit *src_rw, int type, switch (type) { case R_SPARC_WDISP16: - assert(check_fit_ptr(pcrel >> 2, 16)); + if (!check_fit_ptr(pcrel >> 2, 16)) { + return false; + } insn &= ~INSN_OFF16(-1); insn |= INSN_OFF16(pcrel); break; case R_SPARC_WDISP19: - assert(check_fit_ptr(pcrel >> 2, 19)); + if (!check_fit_ptr(pcrel >> 2, 19)) { + return false; + } insn &= ~INSN_OFF19(-1); insn |= INSN_OFF19(pcrel); break; From c834b8d81b0ee597d185a4d8127dfc76230307c8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 5 Aug 2021 04:34:20 +0300 Subject: [PATCH 31/34] tcg/sparc: Use the constant pool for 64-bit constants Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/sparc/tcg-target.c.inc | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc index 213aba4be6..e78945d153 100644 --- a/tcg/sparc/tcg-target.c.inc +++ b/tcg/sparc/tcg-target.c.inc @@ -336,6 +336,13 @@ static bool patch_reloc(tcg_insn_unit *src_rw, int type, insn &= ~INSN_OFF19(-1); insn |= INSN_OFF19(pcrel); break; + case R_SPARC_13: + if (!check_fit_ptr(value, 13)) { + return false; + } + insn &= ~INSN_IMM13(-1); + insn |= INSN_IMM13(value); + break; default: g_assert_not_reached(); } @@ -479,6 +486,14 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, return; } + /* Use the constant pool, if possible. */ + if (!in_prologue && USE_REG_TB) { + new_pool_label(s, arg, R_SPARC_13, s->code_ptr, + tcg_tbrel_diff(s, NULL)); + tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(TCG_REG_TB)); + return; + } + /* A 64-bit constant decomposed into 2 32-bit pieces. */ if (check_fit_i32(lo, 13)) { hi = (arg - lo) >> 32; From e01d60f211251388a5d9ae7e02d0b4500af26966 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 5 Aug 2021 02:23:05 +0300 Subject: [PATCH 32/34] tcg/sparc: Add tcg_out_jmpl_const for better tail calls Due to mapping changes, we now rarely place the code_gen_buffer near the main executable. Which means that direct calls will now rarely be in range. So, always use indirect calls for tail calls, which allows us to avoid clobbering %o7, and therefore we need not save and restore it. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/sparc/tcg-target.c.inc | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc index e78945d153..646bb462c3 100644 --- a/tcg/sparc/tcg-target.c.inc +++ b/tcg/sparc/tcg-target.c.inc @@ -858,6 +858,19 @@ static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh, tcg_out_mov(s, TCG_TYPE_I64, rl, tmp); } +static void tcg_out_jmpl_const(TCGContext *s, const tcg_insn_unit *dest, + bool in_prologue, bool tail_call) +{ + uintptr_t desti = (uintptr_t)dest; + + /* Be careful not to clobber %o7 for a tail call. */ + tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_REG_T1, + desti & ~0xfff, in_prologue, + tail_call ? TCG_REG_G2 : TCG_REG_O7); + tcg_out_arithi(s, tail_call ? TCG_REG_G0 : TCG_REG_O7, + TCG_REG_T1, desti & 0xfff, JMPL); +} + static void tcg_out_call_nodelay(TCGContext *s, const tcg_insn_unit *dest, bool in_prologue) { @@ -866,10 +879,7 @@ static void tcg_out_call_nodelay(TCGContext *s, const tcg_insn_unit *dest, if (disp == (int32_t)disp) { tcg_out32(s, CALL | (uint32_t)disp >> 2); } else { - uintptr_t desti = (uintptr_t)dest; - tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_REG_T1, - desti & ~0xfff, in_prologue, TCG_REG_O7); - tcg_out_arithi(s, TCG_REG_O7, TCG_REG_T1, desti & 0xfff, JMPL); + tcg_out_jmpl_const(s, dest, in_prologue, false); } } @@ -960,11 +970,10 @@ static void build_trampolines(TCGContext *s) /* Set the retaddr operand. */ tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7); - /* Set the env operand. */ - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0); /* Tail call. */ - tcg_out_call_nodelay(s, qemu_ld_helpers[i], true); - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra); + tcg_out_jmpl_const(s, qemu_ld_helpers[i], true, true); + /* delay slot -- set the env argument */ + tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0); } for (i = 0; i < ARRAY_SIZE(qemu_st_helpers); ++i) { @@ -1006,14 +1015,14 @@ static void build_trampolines(TCGContext *s) if (ra >= TCG_REG_O6) { tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_O7, TCG_REG_CALL_STACK, TCG_TARGET_CALL_STACK_OFFSET); - ra = TCG_REG_G1; + } else { + tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7); } - tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7); - /* Set the env operand. */ - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0); + /* Tail call. */ - tcg_out_call_nodelay(s, qemu_st_helpers[i], true); - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra); + tcg_out_jmpl_const(s, qemu_st_helpers[i], true, true); + /* delay slot -- set the env argument */ + tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0); } } #endif From 321dbde33a6aa8e7780a3b6b4746628d215a1fec Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 5 Aug 2021 03:54:30 +0300 Subject: [PATCH 33/34] tcg/sparc: Support unaligned access for user-only This is kinda sorta the opposite of the other tcg hosts, where we get (normal) alignment checks for free with host SIGBUS and need to add code to support unaligned accesses. This inline code expansion is somewhat large, but it takes quite a few instructions to make a function call to a helper anyway. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/sparc/tcg-target.c.inc | 219 +++++++++++++++++++++++++++++++++++-- 1 file changed, 211 insertions(+), 8 deletions(-) diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc index 646bb462c3..72d9552fd0 100644 --- a/tcg/sparc/tcg-target.c.inc +++ b/tcg/sparc/tcg-target.c.inc @@ -211,6 +211,7 @@ static const int tcg_target_call_oarg_regs[] = { #define ARITH_ADD (INSN_OP(2) | INSN_OP3(0x00)) #define ARITH_ADDCC (INSN_OP(2) | INSN_OP3(0x10)) #define ARITH_AND (INSN_OP(2) | INSN_OP3(0x01)) +#define ARITH_ANDCC (INSN_OP(2) | INSN_OP3(0x11)) #define ARITH_ANDN (INSN_OP(2) | INSN_OP3(0x05)) #define ARITH_OR (INSN_OP(2) | INSN_OP3(0x02)) #define ARITH_ORCC (INSN_OP(2) | INSN_OP3(0x12)) @@ -1025,6 +1026,38 @@ static void build_trampolines(TCGContext *s) tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0); } } +#else +static const tcg_insn_unit *qemu_unalign_ld_trampoline; +static const tcg_insn_unit *qemu_unalign_st_trampoline; + +static void build_trampolines(TCGContext *s) +{ + for (int ld = 0; ld < 2; ++ld) { + void *helper; + + while ((uintptr_t)s->code_ptr & 15) { + tcg_out_nop(s); + } + + if (ld) { + helper = helper_unaligned_ld; + qemu_unalign_ld_trampoline = tcg_splitwx_to_rx(s->code_ptr); + } else { + helper = helper_unaligned_st; + qemu_unalign_st_trampoline = tcg_splitwx_to_rx(s->code_ptr); + } + + if (!SPARC64 && TARGET_LONG_BITS == 64) { + /* Install the high part of the address. */ + tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX); + } + + /* Tail call. */ + tcg_out_jmpl_const(s, helper, true, true); + /* delay slot -- set the env argument */ + tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0); + } +} #endif /* Generate global QEMU prologue and epilogue code */ @@ -1075,9 +1108,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) /* delay slot */ tcg_out_movi_imm13(s, TCG_REG_O0, 0); -#ifdef CONFIG_SOFTMMU build_trampolines(s); -#endif } static void tcg_out_nop_fill(tcg_insn_unit *p, int count) @@ -1162,18 +1193,22 @@ static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index, static const int qemu_ld_opc[(MO_SSIZE | MO_BSWAP) + 1] = { [MO_UB] = LDUB, [MO_SB] = LDSB, + [MO_UB | MO_LE] = LDUB, + [MO_SB | MO_LE] = LDSB, [MO_BEUW] = LDUH, [MO_BESW] = LDSH, [MO_BEUL] = LDUW, [MO_BESL] = LDSW, [MO_BEUQ] = LDX, + [MO_BESQ] = LDX, [MO_LEUW] = LDUH_LE, [MO_LESW] = LDSH_LE, [MO_LEUL] = LDUW_LE, [MO_LESL] = LDSW_LE, [MO_LEUQ] = LDX_LE, + [MO_LESQ] = LDX_LE, }; static const int qemu_st_opc[(MO_SIZE | MO_BSWAP) + 1] = { @@ -1192,11 +1227,12 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, MemOpIdx oi, bool is_64) { MemOp memop = get_memop(oi); + tcg_insn_unit *label_ptr; + #ifdef CONFIG_SOFTMMU unsigned memi = get_mmuidx(oi); TCGReg addrz, param; const tcg_insn_unit *func; - tcg_insn_unit *label_ptr; addrz = tcg_out_tlb_load(s, addr, memi, memop, offsetof(CPUTLBEntry, addr_read)); @@ -1260,13 +1296,99 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); #else + TCGReg index = (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0); + unsigned a_bits = get_alignment_bits(memop); + unsigned s_bits = memop & MO_SIZE; + unsigned t_bits; + if (SPARC64 && TARGET_LONG_BITS == 32) { tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL); addr = TCG_REG_T1; } - tcg_out_ldst_rr(s, data, addr, - (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0), + + /* + * Normal case: alignment equal to access size. + */ + if (a_bits == s_bits) { + tcg_out_ldst_rr(s, data, addr, index, + qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]); + return; + } + + /* + * Test for at least natural alignment, and assume most accesses + * will be aligned -- perform a straight load in the delay slot. + * This is required to preserve atomicity for aligned accesses. + */ + t_bits = MAX(a_bits, s_bits); + tcg_debug_assert(t_bits < 13); + tcg_out_arithi(s, TCG_REG_G0, addr, (1u << t_bits) - 1, ARITH_ANDCC); + + /* beq,a,pt %icc, label */ + label_ptr = s->code_ptr; + tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | BPCC_ICC, 0); + /* delay slot */ + tcg_out_ldst_rr(s, data, addr, index, qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]); + + if (a_bits >= s_bits) { + /* + * Overalignment: A successful alignment test will perform the memory + * operation in the delay slot, and failure need only invoke the + * handler for SIGBUS. + */ + TCGReg arg_low = TCG_REG_O1 + (!SPARC64 && TARGET_LONG_BITS == 64); + tcg_out_call_nodelay(s, qemu_unalign_ld_trampoline, false); + /* delay slot -- move to low part of argument reg */ + tcg_out_mov_delay(s, arg_low, addr); + } else { + /* Underalignment: load by pieces of minimum alignment. */ + int ld_opc, a_size, s_size, i; + + /* + * Force full address into T1 early; avoids problems with + * overlap between @addr and @data. + */ + tcg_out_arith(s, TCG_REG_T1, addr, index, ARITH_ADD); + + a_size = 1 << a_bits; + s_size = 1 << s_bits; + if ((memop & MO_BSWAP) == MO_BE) { + ld_opc = qemu_ld_opc[a_bits | MO_BE | (memop & MO_SIGN)]; + tcg_out_ldst(s, data, TCG_REG_T1, 0, ld_opc); + ld_opc = qemu_ld_opc[a_bits | MO_BE]; + for (i = a_size; i < s_size; i += a_size) { + tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, ld_opc); + tcg_out_arithi(s, data, data, a_size, SHIFT_SLLX); + tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR); + } + } else if (a_bits == 0) { + ld_opc = LDUB; + tcg_out_ldst(s, data, TCG_REG_T1, 0, ld_opc); + for (i = a_size; i < s_size; i += a_size) { + if ((memop & MO_SIGN) && i == s_size - a_size) { + ld_opc = LDSB; + } + tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, ld_opc); + tcg_out_arithi(s, TCG_REG_T2, TCG_REG_T2, i * 8, SHIFT_SLLX); + tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR); + } + } else { + ld_opc = qemu_ld_opc[a_bits | MO_LE]; + tcg_out_ldst_rr(s, data, TCG_REG_T1, TCG_REG_G0, ld_opc); + for (i = a_size; i < s_size; i += a_size) { + tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, a_size, ARITH_ADD); + if ((memop & MO_SIGN) && i == s_size - a_size) { + ld_opc = qemu_ld_opc[a_bits | MO_LE | MO_SIGN]; + } + tcg_out_ldst_rr(s, TCG_REG_T2, TCG_REG_T1, TCG_REG_G0, ld_opc); + tcg_out_arithi(s, TCG_REG_T2, TCG_REG_T2, i * 8, SHIFT_SLLX); + tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR); + } + } + } + + *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); #endif /* CONFIG_SOFTMMU */ } @@ -1274,11 +1396,12 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, MemOpIdx oi) { MemOp memop = get_memop(oi); + tcg_insn_unit *label_ptr; + #ifdef CONFIG_SOFTMMU unsigned memi = get_mmuidx(oi); TCGReg addrz, param; const tcg_insn_unit *func; - tcg_insn_unit *label_ptr; addrz = tcg_out_tlb_load(s, addr, memi, memop, offsetof(CPUTLBEntry, addr_write)); @@ -1315,13 +1438,93 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); #else + TCGReg index = (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0); + unsigned a_bits = get_alignment_bits(memop); + unsigned s_bits = memop & MO_SIZE; + unsigned t_bits; + if (SPARC64 && TARGET_LONG_BITS == 32) { tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL); addr = TCG_REG_T1; } - tcg_out_ldst_rr(s, data, addr, - (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0), + + /* + * Normal case: alignment equal to access size. + */ + if (a_bits == s_bits) { + tcg_out_ldst_rr(s, data, addr, index, + qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]); + return; + } + + /* + * Test for at least natural alignment, and assume most accesses + * will be aligned -- perform a straight store in the delay slot. + * This is required to preserve atomicity for aligned accesses. + */ + t_bits = MAX(a_bits, s_bits); + tcg_debug_assert(t_bits < 13); + tcg_out_arithi(s, TCG_REG_G0, addr, (1u << t_bits) - 1, ARITH_ANDCC); + + /* beq,a,pt %icc, label */ + label_ptr = s->code_ptr; + tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | BPCC_ICC, 0); + /* delay slot */ + tcg_out_ldst_rr(s, data, addr, index, qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]); + + if (a_bits >= s_bits) { + /* + * Overalignment: A successful alignment test will perform the memory + * operation in the delay slot, and failure need only invoke the + * handler for SIGBUS. + */ + TCGReg arg_low = TCG_REG_O1 + (!SPARC64 && TARGET_LONG_BITS == 64); + tcg_out_call_nodelay(s, qemu_unalign_st_trampoline, false); + /* delay slot -- move to low part of argument reg */ + tcg_out_mov_delay(s, arg_low, addr); + } else { + /* Underalignment: store by pieces of minimum alignment. */ + int st_opc, a_size, s_size, i; + + /* + * Force full address into T1 early; avoids problems with + * overlap between @addr and @data. + */ + tcg_out_arith(s, TCG_REG_T1, addr, index, ARITH_ADD); + + a_size = 1 << a_bits; + s_size = 1 << s_bits; + if ((memop & MO_BSWAP) == MO_BE) { + st_opc = qemu_st_opc[a_bits | MO_BE]; + for (i = 0; i < s_size; i += a_size) { + TCGReg d = data; + int shift = (s_size - a_size - i) * 8; + if (shift) { + d = TCG_REG_T2; + tcg_out_arithi(s, d, data, shift, SHIFT_SRLX); + } + tcg_out_ldst(s, d, TCG_REG_T1, i, st_opc); + } + } else if (a_bits == 0) { + tcg_out_ldst(s, data, TCG_REG_T1, 0, STB); + for (i = 1; i < s_size; i++) { + tcg_out_arithi(s, TCG_REG_T2, data, i * 8, SHIFT_SRLX); + tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, STB); + } + } else { + /* Note that ST*A with immediate asi must use indexed address. */ + st_opc = qemu_st_opc[a_bits + MO_LE]; + tcg_out_ldst_rr(s, data, TCG_REG_T1, TCG_REG_G0, st_opc); + for (i = a_size; i < s_size; i += a_size) { + tcg_out_arithi(s, TCG_REG_T2, data, i * 8, SHIFT_SRLX); + tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, a_size, ARITH_ADD); + tcg_out_ldst_rr(s, TCG_REG_T2, TCG_REG_T1, TCG_REG_G0, st_opc); + } + } + } + + *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); #endif /* CONFIG_SOFTMMU */ } From 5c1a101ef6b85537a4ade93c39ea81cadd5c246e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 28 Jul 2021 11:32:24 -1000 Subject: [PATCH 34/34] tests/tcg/multiarch: Add sigbus.c A mostly generic test for unaligned access raising SIGBUS. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tests/tcg/multiarch/sigbus.c | 68 ++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 tests/tcg/multiarch/sigbus.c diff --git a/tests/tcg/multiarch/sigbus.c b/tests/tcg/multiarch/sigbus.c new file mode 100644 index 0000000000..8134c5fd56 --- /dev/null +++ b/tests/tcg/multiarch/sigbus.c @@ -0,0 +1,68 @@ +#define _GNU_SOURCE 1 + +#include +#include +#include +#include + + +unsigned long long x = 0x8877665544332211ull; +void * volatile p = (void *)&x + 1; + +void sigbus(int sig, siginfo_t *info, void *uc) +{ + assert(sig == SIGBUS); + assert(info->si_signo == SIGBUS); +#ifdef BUS_ADRALN + assert(info->si_code == BUS_ADRALN); +#endif + assert(info->si_addr == p); + exit(EXIT_SUCCESS); +} + +int main() +{ + struct sigaction sa = { + .sa_sigaction = sigbus, + .sa_flags = SA_SIGINFO + }; + int allow_fail = 0; + int tmp; + + tmp = sigaction(SIGBUS, &sa, NULL); + assert(tmp == 0); + + /* + * Select an operation that's likely to enforce alignment. + * On many guests that support unaligned accesses by default, + * this is often an atomic operation. + */ +#if defined(__aarch64__) + asm volatile("ldxr %w0,[%1]" : "=r"(tmp) : "r"(p) : "memory"); +#elif defined(__alpha__) + asm volatile("ldl_l %0,0(%1)" : "=r"(tmp) : "r"(p) : "memory"); +#elif defined(__arm__) + asm volatile("ldrex %0,[%1]" : "=r"(tmp) : "r"(p) : "memory"); +#elif defined(__powerpc__) + asm volatile("lwarx %0,0,%1" : "=r"(tmp) : "r"(p) : "memory"); +#elif defined(__riscv_atomic) + asm volatile("lr.w %0,(%1)" : "=r"(tmp) : "r"(p) : "memory"); +#else + /* No insn known to fault unaligned -- try for a straight load. */ + allow_fail = 1; + tmp = *(volatile int *)p; +#endif + + assert(allow_fail); + + /* + * We didn't see a signal. + * We might as well validate the unaligned load worked. + */ + if (BYTE_ORDER == LITTLE_ENDIAN) { + assert(tmp == 0x55443322); + } else { + assert(tmp == 0x77665544); + } + return EXIT_SUCCESS; +}