diff --git a/MAINTAINERS b/MAINTAINERS index 180695f5d3..827d6445d3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -266,7 +266,8 @@ S: Maintained F: target/riscv/ F: hw/riscv/ F: include/hw/riscv/ -F: disas/riscv.c +F: linux-user/host/riscv32/ +F: linux-user/host/riscv64/ S390 M: Richard Henderson @@ -2172,6 +2173,15 @@ S: Odd Fixes F: tcg/ppc/ F: disas/ppc.c +RISC-V +M: Michael Clark +M: Palmer Dabbelt +M: Alistair Francis +L: qemu-riscv@nongnu.org +S: Maintained +F: tcg/riscv/ +F: disas/riscv.c + S390 target M: Richard Henderson S: Maintained diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index cd75829cf2..941295ea49 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -571,6 +571,81 @@ int cpu_signal_handler(int host_signum, void *pinfo, return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask); } +#elif defined(__riscv) + +int cpu_signal_handler(int host_signum, void *pinfo, + void *puc) +{ + siginfo_t *info = pinfo; + ucontext_t *uc = puc; + greg_t pc = uc->uc_mcontext.__gregs[REG_PC]; + uint32_t insn = *(uint32_t *)pc; + int is_write = 0; + + /* Detect store by reading the instruction at the program + counter. Note: we currently only generate 32-bit + instructions so we thus only detect 32-bit stores */ + switch (((insn >> 0) & 0b11)) { + case 3: + switch (((insn >> 2) & 0b11111)) { + case 8: + switch (((insn >> 12) & 0b111)) { + case 0: /* sb */ + case 1: /* sh */ + case 2: /* sw */ + case 3: /* sd */ + case 4: /* sq */ + is_write = 1; + break; + default: + break; + } + break; + case 9: + switch (((insn >> 12) & 0b111)) { + case 2: /* fsw */ + case 3: /* fsd */ + case 4: /* fsq */ + is_write = 1; + break; + default: + break; + } + break; + default: + break; + } + } + + /* Check for compressed instructions */ + switch (((insn >> 13) & 0b111)) { + case 7: + switch (insn & 0b11) { + case 0: /*c.sd */ + case 2: /* c.sdsp */ + is_write = 1; + break; + default: + break; + } + break; + case 6: + switch (insn & 0b11) { + case 0: /* c.sw */ + case 3: /* c.swsp */ + is_write = 1; + break; + default: + break; + } + break; + default: + break; + } + + return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask); +} + #else #error host CPU specific signal handler needed diff --git a/configure b/configure index 224d3071ac..79375affc1 100755 --- a/configure +++ b/configure @@ -710,6 +710,12 @@ elif check_define __s390__ ; then else cpu="s390" fi +elif check_define __riscv ; then + if check_define _LP64 ; then + cpu="riscv64" + else + cpu="riscv32" + fi elif check_define __arm__ ; then cpu="arm" elif check_define __aarch64__ ; then @@ -722,7 +728,7 @@ ARCH= # Normalise host CPU name and set ARCH. # Note that this case should only have supported host CPUs, not guests. case "$cpu" in - ppc|ppc64|s390|s390x|sparc64|x32) + ppc|ppc64|s390|s390x|sparc64|x32|riscv32|riscv64) cpu="$cpu" supported_cpu="yes" eval "cross_cc_${cpu}=\$host_cc" @@ -6937,6 +6943,8 @@ elif test "$ARCH" = "x86_64" -o "$ARCH" = "x32" ; then QEMU_INCLUDES="-iquote \$(SRC_PATH)/tcg/i386 $QEMU_INCLUDES" elif test "$ARCH" = "ppc64" ; then QEMU_INCLUDES="-iquote \$(SRC_PATH)/tcg/ppc $QEMU_INCLUDES" +elif test "$ARCH" = "riscv32" -o "$ARCH" = "riscv64" ; then + QEMU_INCLUDES="-I\$(SRC_PATH)/tcg/riscv $QEMU_INCLUDES" else QEMU_INCLUDES="-iquote \$(SRC_PATH)/tcg/\$(ARCH) $QEMU_INCLUDES" fi @@ -7433,7 +7441,7 @@ for i in $ARCH $TARGET_BASE_ARCH ; do ppc*) disas_config "PPC" ;; - riscv) + riscv*) disas_config "RISCV" ;; s390*) diff --git a/disas.c b/disas.c index f9c517b358..d9aa713a40 100644 --- a/disas.c +++ b/disas.c @@ -522,8 +522,14 @@ void disas(FILE *out, void *code, unsigned long size) # ifdef _ARCH_PPC64 s.info.cap_mode = CS_MODE_64; # endif -#elif defined(__riscv__) - print_insn = print_insn_riscv; +#elif defined(__riscv) && defined(CONFIG_RISCV_DIS) +#if defined(_ILP32) || (__riscv_xlen == 32) + print_insn = print_insn_riscv32; +#elif defined(_LP64) + print_insn = print_insn_riscv64; +#else +#error unsupported RISC-V ABI +#endif #elif defined(__aarch64__) && defined(CONFIG_ARM_A64_DIS) print_insn = print_insn_arm_a64; s.info.cap_arch = CS_ARCH_ARM64; diff --git a/disas/microblaze.c b/disas/microblaze.c index 598ecbc89d..c23605043a 100644 --- a/disas/microblaze.c +++ b/disas/microblaze.c @@ -176,7 +176,6 @@ enum microblaze_instr_type { #define REG_TLBSX 36869 /* MMU: TLB Search Index reg */ /* alternate names for gen purpose regs */ -#define REG_SP 1 /* stack pointer */ #define REG_ROSDP 2 /* read-only small data pointer */ #define REG_RWSDP 13 /* read-write small data pointer */ diff --git a/include/elf.h b/include/elf.h index c151164b63..0ac7911b7b 100644 --- a/include/elf.h +++ b/include/elf.h @@ -1338,6 +1338,61 @@ typedef struct { #define R_IA64_DTPREL64LSB 0xb7 /* @dtprel(sym + add), data8 LSB */ #define R_IA64_LTOFF_DTPREL22 0xba /* @ltoff(@dtprel(s+a)), imm22 */ +/* RISC-V relocations. */ +#define R_RISCV_NONE 0 +#define R_RISCV_32 1 +#define R_RISCV_64 2 +#define R_RISCV_RELATIVE 3 +#define R_RISCV_COPY 4 +#define R_RISCV_JUMP_SLOT 5 +#define R_RISCV_TLS_DTPMOD32 6 +#define R_RISCV_TLS_DTPMOD64 7 +#define R_RISCV_TLS_DTPREL32 8 +#define R_RISCV_TLS_DTPREL64 9 +#define R_RISCV_TLS_TPREL32 10 +#define R_RISCV_TLS_TPREL64 11 +#define R_RISCV_BRANCH 16 +#define R_RISCV_JAL 17 +#define R_RISCV_CALL 18 +#define R_RISCV_CALL_PLT 19 +#define R_RISCV_GOT_HI20 20 +#define R_RISCV_TLS_GOT_HI20 21 +#define R_RISCV_TLS_GD_HI20 22 +#define R_RISCV_PCREL_HI20 23 +#define R_RISCV_PCREL_LO12_I 24 +#define R_RISCV_PCREL_LO12_S 25 +#define R_RISCV_HI20 26 +#define R_RISCV_LO12_I 27 +#define R_RISCV_LO12_S 28 +#define R_RISCV_TPREL_HI20 29 +#define R_RISCV_TPREL_LO12_I 30 +#define R_RISCV_TPREL_LO12_S 31 +#define R_RISCV_TPREL_ADD 32 +#define R_RISCV_ADD8 33 +#define R_RISCV_ADD16 34 +#define R_RISCV_ADD32 35 +#define R_RISCV_ADD64 36 +#define R_RISCV_SUB8 37 +#define R_RISCV_SUB16 38 +#define R_RISCV_SUB32 39 +#define R_RISCV_SUB64 40 +#define R_RISCV_GNU_VTINHERIT 41 +#define R_RISCV_GNU_VTENTRY 42 +#define R_RISCV_ALIGN 43 +#define R_RISCV_RVC_BRANCH 44 +#define R_RISCV_RVC_JUMP 45 +#define R_RISCV_RVC_LUI 46 +#define R_RISCV_GPREL_I 47 +#define R_RISCV_GPREL_S 48 +#define R_RISCV_TPREL_I 49 +#define R_RISCV_TPREL_S 50 +#define R_RISCV_RELAX 51 +#define R_RISCV_SUB6 52 +#define R_RISCV_SET6 53 +#define R_RISCV_SET8 54 +#define R_RISCV_SET16 55 +#define R_RISCV_SET32 56 + typedef struct elf32_rel { Elf32_Addr r_offset; Elf32_Word r_info; diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h index 276dd5afce..ab4f8b6623 100644 --- a/include/exec/helper-head.h +++ b/include/exec/helper-head.h @@ -108,6 +108,19 @@ #define dh_is_signed_env dh_is_signed_ptr #define dh_is_signed(t) dh_is_signed_##t +#define dh_callflag_i32 0 +#define dh_callflag_s32 0 +#define dh_callflag_int 0 +#define dh_callflag_i64 0 +#define dh_callflag_s64 0 +#define dh_callflag_f16 0 +#define dh_callflag_f32 0 +#define dh_callflag_f64 0 +#define dh_callflag_ptr 0 +#define dh_callflag_void 0 +#define dh_callflag_noreturn TCG_CALL_NO_RETURN +#define dh_callflag(t) glue(dh_callflag_, dh_alias(t)) + #define dh_sizemask(t, n) \ ((dh_is_64bit(t) << (n*2)) | (dh_is_signed(t) << (n*2+1))) diff --git a/include/exec/helper-tcg.h b/include/exec/helper-tcg.h index b3bdb0c399..268e0f804b 100644 --- a/include/exec/helper-tcg.h +++ b/include/exec/helper-tcg.h @@ -11,36 +11,43 @@ #define str(s) #s #define DEF_HELPER_FLAGS_0(NAME, FLAGS, ret) \ - { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \ + { .func = HELPER(NAME), .name = str(NAME), \ + .flags = FLAGS | dh_callflag(ret), \ .sizemask = dh_sizemask(ret, 0) }, #define DEF_HELPER_FLAGS_1(NAME, FLAGS, ret, t1) \ - { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \ + { .func = HELPER(NAME), .name = str(NAME), \ + .flags = FLAGS | dh_callflag(ret), \ .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) }, #define DEF_HELPER_FLAGS_2(NAME, FLAGS, ret, t1, t2) \ - { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \ + { .func = HELPER(NAME), .name = str(NAME), \ + .flags = FLAGS | dh_callflag(ret), \ .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ | dh_sizemask(t2, 2) }, #define DEF_HELPER_FLAGS_3(NAME, FLAGS, ret, t1, t2, t3) \ - { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \ + { .func = HELPER(NAME), .name = str(NAME), \ + .flags = FLAGS | dh_callflag(ret), \ .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) }, #define DEF_HELPER_FLAGS_4(NAME, FLAGS, ret, t1, t2, t3, t4) \ - { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \ + { .func = HELPER(NAME), .name = str(NAME), \ + .flags = FLAGS | dh_callflag(ret), \ .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) }, #define DEF_HELPER_FLAGS_5(NAME, FLAGS, ret, t1, t2, t3, t4, t5) \ - { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \ + { .func = HELPER(NAME), .name = str(NAME), \ + .flags = FLAGS | dh_callflag(ret), \ .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \ | dh_sizemask(t5, 5) }, #define DEF_HELPER_FLAGS_6(NAME, FLAGS, ret, t1, t2, t3, t4, t5, t6) \ - { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \ + { .func = HELPER(NAME), .name = str(NAME), \ + .flags = FLAGS | dh_callflag(ret), \ .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \ | dh_sizemask(t5, 5) | dh_sizemask(t6, 6) }, diff --git a/include/exec/poison.h b/include/exec/poison.h index 32d53789f8..ecdc83c147 100644 --- a/include/exec/poison.h +++ b/include/exec/poison.h @@ -79,6 +79,7 @@ #pragma GCC poison CONFIG_MOXIE_DIS #pragma GCC poison CONFIG_NIOS2_DIS #pragma GCC poison CONFIG_PPC_DIS +#pragma GCC poison CONFIG_RISCV_DIS #pragma GCC poison CONFIG_S390_DIS #pragma GCC poison CONFIG_SH4_DIS #pragma GCC poison CONFIG_SPARC_DIS diff --git a/linux-user/host/riscv32/hostdep.h b/linux-user/host/riscv32/hostdep.h new file mode 100644 index 0000000000..adf9edbf2d --- /dev/null +++ b/linux-user/host/riscv32/hostdep.h @@ -0,0 +1,11 @@ +/* + * hostdep.h : things which are dependent on the host architecture + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef RISCV32_HOSTDEP_H +#define RISCV32_HOSTDEP_H + +#endif diff --git a/linux-user/host/riscv64/hostdep.h b/linux-user/host/riscv64/hostdep.h new file mode 100644 index 0000000000..865f0fb9ff --- /dev/null +++ b/linux-user/host/riscv64/hostdep.h @@ -0,0 +1,34 @@ +/* + * hostdep.h : things which are dependent on the host architecture + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef RISCV64_HOSTDEP_H +#define RISCV64_HOSTDEP_H + +/* We have a safe-syscall.inc.S */ +#define HAVE_SAFE_SYSCALL + +#ifndef __ASSEMBLER__ + +/* These are defined by the safe-syscall.inc.S file */ +extern char safe_syscall_start[]; +extern char safe_syscall_end[]; + +/* Adjust the signal context to rewind out of safe-syscall if we're in it */ +static inline void rewind_if_in_safe_syscall(void *puc) +{ + ucontext_t *uc = puc; + unsigned long *pcreg = &uc->uc_mcontext.__gregs[REG_PC]; + + if (*pcreg > (uintptr_t)safe_syscall_start + && *pcreg < (uintptr_t)safe_syscall_end) { + *pcreg = (uintptr_t)safe_syscall_start; + } +} + +#endif /* __ASSEMBLER__ */ + +#endif diff --git a/linux-user/host/riscv64/safe-syscall.inc.S b/linux-user/host/riscv64/safe-syscall.inc.S new file mode 100644 index 0000000000..9ca3fbfd1e --- /dev/null +++ b/linux-user/host/riscv64/safe-syscall.inc.S @@ -0,0 +1,77 @@ +/* + * safe-syscall.inc.S : host-specific assembly fragment + * to handle signals occurring at the same time as system calls. + * This is intended to be included by linux-user/safe-syscall.S + * + * Written by Richard Henderson + * Copyright (C) 2018 Linaro, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + + .global safe_syscall_base + .global safe_syscall_start + .global safe_syscall_end + .type safe_syscall_base, @function + .type safe_syscall_start, @function + .type safe_syscall_end, @function + + /* + * This is the entry point for making a system call. The calling + * convention here is that of a C varargs function with the + * first argument an 'int *' to the signal_pending flag, the + * second one the system call number (as a 'long'), and all further + * arguments being syscall arguments (also 'long'). + * We return a long which is the syscall's return value, which + * may be negative-errno on failure. Conversion to the + * -1-and-errno-set convention is done by the calling wrapper. + */ +safe_syscall_base: + .cfi_startproc + /* + * The syscall calling convention is nearly the same as C: + * we enter with a0 == *signal_pending + * a1 == syscall number + * a2 ... a7 == syscall arguments + * and return the result in a0 + * and the syscall instruction needs + * a7 == syscall number + * a0 ... a5 == syscall arguments + * and returns the result in a0 + * Shuffle everything around appropriately. + */ + mv t0, a0 /* signal_pending pointer */ + mv t1, a1 /* syscall number */ + mv a0, a2 /* syscall arguments */ + mv a1, a3 + mv a2, a4 + mv a3, a5 + mv a4, a6 + mv a5, a7 + mv a7, t1 + + /* + * This next sequence of code works in conjunction with the + * rewind_if_safe_syscall_function(). If a signal is taken + * and the interrupted PC is anywhere between 'safe_syscall_start' + * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'. + * The code sequence must therefore be able to cope with this, and + * the syscall instruction must be the final one in the sequence. + */ +safe_syscall_start: + /* If signal_pending is non-zero, don't do the call */ + lw t1, 0(t0) + bnez t1, 0f + scall +safe_syscall_end: + /* code path for having successfully executed the syscall */ + ret + +0: + /* code path when we didn't execute the syscall */ + li a0, -TARGET_ERESTARTSYS + ret + .cfi_endproc + + .size safe_syscall_base, .-safe_syscall_base diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h new file mode 100644 index 0000000000..60918cacb4 --- /dev/null +++ b/tcg/riscv/tcg-target.h @@ -0,0 +1,177 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2018 SiFive, Inc + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef RISCV_TCG_TARGET_H +#define RISCV_TCG_TARGET_H + +#if __riscv_xlen == 32 +# define TCG_TARGET_REG_BITS 32 +#elif __riscv_xlen == 64 +# define TCG_TARGET_REG_BITS 64 +#endif + +#define TCG_TARGET_INSN_UNIT_SIZE 4 +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 20 +#define TCG_TARGET_NB_REGS 32 + +typedef enum { + TCG_REG_ZERO, + TCG_REG_RA, + TCG_REG_SP, + TCG_REG_GP, + TCG_REG_TP, + TCG_REG_T0, + TCG_REG_T1, + TCG_REG_T2, + TCG_REG_S0, + TCG_REG_S1, + TCG_REG_A0, + TCG_REG_A1, + TCG_REG_A2, + TCG_REG_A3, + TCG_REG_A4, + TCG_REG_A5, + TCG_REG_A6, + TCG_REG_A7, + TCG_REG_S2, + TCG_REG_S3, + TCG_REG_S4, + TCG_REG_S5, + TCG_REG_S6, + TCG_REG_S7, + TCG_REG_S8, + TCG_REG_S9, + TCG_REG_S10, + TCG_REG_S11, + TCG_REG_T3, + TCG_REG_T4, + TCG_REG_T5, + TCG_REG_T6, + + /* aliases */ + TCG_AREG0 = TCG_REG_S0, + TCG_GUEST_BASE_REG = TCG_REG_S1, + TCG_REG_TMP0 = TCG_REG_T6, + TCG_REG_TMP1 = TCG_REG_T5, + TCG_REG_TMP2 = TCG_REG_T4, +} TCGReg; + +/* used for function call generation */ +#define TCG_REG_CALL_STACK TCG_REG_SP +#define TCG_TARGET_STACK_ALIGN 16 +#define TCG_TARGET_CALL_ALIGN_ARGS 1 +#define TCG_TARGET_CALL_STACK_OFFSET 0 + +/* optional instructions */ +#define TCG_TARGET_HAS_goto_ptr 1 +#define TCG_TARGET_HAS_movcond_i32 0 +#define TCG_TARGET_HAS_div_i32 1 +#define TCG_TARGET_HAS_rem_i32 1 +#define TCG_TARGET_HAS_div2_i32 0 +#define TCG_TARGET_HAS_rot_i32 0 +#define TCG_TARGET_HAS_deposit_i32 0 +#define TCG_TARGET_HAS_extract_i32 0 +#define TCG_TARGET_HAS_sextract_i32 0 +#define TCG_TARGET_HAS_add2_i32 1 +#define TCG_TARGET_HAS_sub2_i32 1 +#define TCG_TARGET_HAS_mulu2_i32 0 +#define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muluh_i32 (TCG_TARGET_REG_BITS == 32) +#define TCG_TARGET_HAS_mulsh_i32 (TCG_TARGET_REG_BITS == 32) +#define TCG_TARGET_HAS_ext8s_i32 1 +#define TCG_TARGET_HAS_ext16s_i32 1 +#define TCG_TARGET_HAS_ext8u_i32 1 +#define TCG_TARGET_HAS_ext16u_i32 1 +#define TCG_TARGET_HAS_bswap16_i32 0 +#define TCG_TARGET_HAS_bswap32_i32 0 +#define TCG_TARGET_HAS_not_i32 1 +#define TCG_TARGET_HAS_neg_i32 1 +#define TCG_TARGET_HAS_andc_i32 0 +#define TCG_TARGET_HAS_orc_i32 0 +#define TCG_TARGET_HAS_eqv_i32 0 +#define TCG_TARGET_HAS_nand_i32 0 +#define TCG_TARGET_HAS_nor_i32 0 +#define TCG_TARGET_HAS_clz_i32 0 +#define TCG_TARGET_HAS_ctz_i32 0 +#define TCG_TARGET_HAS_ctpop_i32 0 +#define TCG_TARGET_HAS_direct_jump 0 +#define TCG_TARGET_HAS_brcond2 1 +#define TCG_TARGET_HAS_setcond2 1 + +#if TCG_TARGET_REG_BITS == 64 +#define TCG_TARGET_HAS_movcond_i64 0 +#define TCG_TARGET_HAS_div_i64 1 +#define TCG_TARGET_HAS_rem_i64 1 +#define TCG_TARGET_HAS_div2_i64 0 +#define TCG_TARGET_HAS_rot_i64 0 +#define TCG_TARGET_HAS_deposit_i64 0 +#define TCG_TARGET_HAS_extract_i64 0 +#define TCG_TARGET_HAS_sextract_i64 0 +#define TCG_TARGET_HAS_extrl_i64_i32 1 +#define TCG_TARGET_HAS_extrh_i64_i32 1 +#define TCG_TARGET_HAS_ext8s_i64 1 +#define TCG_TARGET_HAS_ext16s_i64 1 +#define TCG_TARGET_HAS_ext32s_i64 1 +#define TCG_TARGET_HAS_ext8u_i64 1 +#define TCG_TARGET_HAS_ext16u_i64 1 +#define TCG_TARGET_HAS_ext32u_i64 1 +#define TCG_TARGET_HAS_bswap16_i64 0 +#define TCG_TARGET_HAS_bswap32_i64 0 +#define TCG_TARGET_HAS_bswap64_i64 0 +#define TCG_TARGET_HAS_not_i64 1 +#define TCG_TARGET_HAS_neg_i64 1 +#define TCG_TARGET_HAS_andc_i64 0 +#define TCG_TARGET_HAS_orc_i64 0 +#define TCG_TARGET_HAS_eqv_i64 0 +#define TCG_TARGET_HAS_nand_i64 0 +#define TCG_TARGET_HAS_nor_i64 0 +#define TCG_TARGET_HAS_clz_i64 0 +#define TCG_TARGET_HAS_ctz_i64 0 +#define TCG_TARGET_HAS_ctpop_i64 0 +#define TCG_TARGET_HAS_add2_i64 1 +#define TCG_TARGET_HAS_sub2_i64 1 +#define TCG_TARGET_HAS_mulu2_i64 0 +#define TCG_TARGET_HAS_muls2_i64 0 +#define TCG_TARGET_HAS_muluh_i64 1 +#define TCG_TARGET_HAS_mulsh_i64 1 +#endif + +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ + __builtin___clear_cache((char *)start, (char *)stop); +} + +/* not defined -- call should be eliminated at compile time */ +void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t); + +#define TCG_TARGET_DEFAULT_MO (0) + +#ifdef CONFIG_SOFTMMU +#define TCG_TARGET_NEED_LDST_LABELS +#endif +#define TCG_TARGET_NEED_POOL_LABELS + +#define TCG_TARGET_HAS_MEMORY_BSWAP 0 + +#endif diff --git a/tcg/riscv/tcg-target.inc.c b/tcg/riscv/tcg-target.inc.c new file mode 100644 index 0000000000..6cf8de32b5 --- /dev/null +++ b/tcg/riscv/tcg-target.inc.c @@ -0,0 +1,1949 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2018 SiFive, Inc + * Copyright (c) 2008-2009 Arnaud Patard + * Copyright (c) 2009 Aurelien Jarno + * Copyright (c) 2008 Fabrice Bellard + * + * Based on i386/tcg-target.c and mips/tcg-target.c + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "tcg-pool.inc.c" + +#ifdef CONFIG_DEBUG_TCG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { + "zero", + "ra", + "sp", + "gp", + "tp", + "t0", + "t1", + "t2", + "s0", + "s1", + "a0", + "a1", + "a2", + "a3", + "a4", + "a5", + "a6", + "a7", + "s2", + "s3", + "s4", + "s5", + "s6", + "s7", + "s8", + "s9", + "s10", + "s11", + "t3", + "t4", + "t5", + "t6" +}; +#endif + +static const int tcg_target_reg_alloc_order[] = { + /* Call saved registers */ + /* TCG_REG_S0 reservered for TCG_AREG0 */ + TCG_REG_S1, + TCG_REG_S2, + TCG_REG_S3, + TCG_REG_S4, + TCG_REG_S5, + TCG_REG_S6, + TCG_REG_S7, + TCG_REG_S8, + TCG_REG_S9, + TCG_REG_S10, + TCG_REG_S11, + + /* Call clobbered registers */ + TCG_REG_T0, + TCG_REG_T1, + TCG_REG_T2, + TCG_REG_T3, + TCG_REG_T4, + TCG_REG_T5, + TCG_REG_T6, + + /* Argument registers */ + TCG_REG_A0, + TCG_REG_A1, + TCG_REG_A2, + TCG_REG_A3, + TCG_REG_A4, + TCG_REG_A5, + TCG_REG_A6, + TCG_REG_A7, +}; + +static const int tcg_target_call_iarg_regs[] = { + TCG_REG_A0, + TCG_REG_A1, + TCG_REG_A2, + TCG_REG_A3, + TCG_REG_A4, + TCG_REG_A5, + TCG_REG_A6, + TCG_REG_A7, +}; + +static const int tcg_target_call_oarg_regs[] = { + TCG_REG_A0, + TCG_REG_A1, +}; + +#define TCG_CT_CONST_ZERO 0x100 +#define TCG_CT_CONST_S12 0x200 +#define TCG_CT_CONST_N12 0x400 +#define TCG_CT_CONST_M12 0x800 + +static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len) +{ + if (TCG_TARGET_REG_BITS == 32) { + return sextract32(val, pos, len); + } else { + return sextract64(val, pos, len); + } +} + +/* parse target specific constraints */ +static const char *target_parse_constraint(TCGArgConstraint *ct, + const char *ct_str, TCGType type) +{ + switch (*ct_str++) { + case 'r': + ct->ct |= TCG_CT_REG; + ct->u.regs = 0xffffffff; + break; + case 'L': + /* qemu_ld/qemu_st constraint */ + ct->ct |= TCG_CT_REG; + ct->u.regs = 0xffffffff; + /* qemu_ld/qemu_st uses TCG_REG_TMP0 */ +#if defined(CONFIG_SOFTMMU) + tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]); + tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]); + tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]); + tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[3]); + tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[4]); +#endif + break; + case 'I': + ct->ct |= TCG_CT_CONST_S12; + break; + case 'N': + ct->ct |= TCG_CT_CONST_N12; + break; + case 'M': + ct->ct |= TCG_CT_CONST_M12; + break; + case 'Z': + /* we can use a zero immediate as a zero register argument. */ + ct->ct |= TCG_CT_CONST_ZERO; + break; + default: + return NULL; + } + return ct_str; +} + +/* test if a constant matches the constraint */ +static int tcg_target_const_match(tcg_target_long val, TCGType type, + const TCGArgConstraint *arg_ct) +{ + int ct = arg_ct->ct; + if (ct & TCG_CT_CONST) { + return 1; + } + if ((ct & TCG_CT_CONST_ZERO) && val == 0) { + return 1; + } + if ((ct & TCG_CT_CONST_S12) && val == sextreg(val, 0, 12)) { + return 1; + } + if ((ct & TCG_CT_CONST_N12) && -val == sextreg(-val, 0, 12)) { + return 1; + } + if ((ct & TCG_CT_CONST_M12) && val >= -0xfff && val <= 0xfff) { + return 1; + } + return 0; +} + +/* + * RISC-V Base ISA opcodes (IM) + */ + +typedef enum { + OPC_ADD = 0x33, + OPC_ADDI = 0x13, + OPC_AND = 0x7033, + OPC_ANDI = 0x7013, + OPC_AUIPC = 0x17, + OPC_BEQ = 0x63, + OPC_BGE = 0x5063, + OPC_BGEU = 0x7063, + OPC_BLT = 0x4063, + OPC_BLTU = 0x6063, + OPC_BNE = 0x1063, + OPC_DIV = 0x2004033, + OPC_DIVU = 0x2005033, + OPC_JAL = 0x6f, + OPC_JALR = 0x67, + OPC_LB = 0x3, + OPC_LBU = 0x4003, + OPC_LD = 0x3003, + OPC_LH = 0x1003, + OPC_LHU = 0x5003, + OPC_LUI = 0x37, + OPC_LW = 0x2003, + OPC_LWU = 0x6003, + OPC_MUL = 0x2000033, + OPC_MULH = 0x2001033, + OPC_MULHSU = 0x2002033, + OPC_MULHU = 0x2003033, + OPC_OR = 0x6033, + OPC_ORI = 0x6013, + OPC_REM = 0x2006033, + OPC_REMU = 0x2007033, + OPC_SB = 0x23, + OPC_SD = 0x3023, + OPC_SH = 0x1023, + OPC_SLL = 0x1033, + OPC_SLLI = 0x1013, + OPC_SLT = 0x2033, + OPC_SLTI = 0x2013, + OPC_SLTIU = 0x3013, + OPC_SLTU = 0x3033, + OPC_SRA = 0x40005033, + OPC_SRAI = 0x40005013, + OPC_SRL = 0x5033, + OPC_SRLI = 0x5013, + OPC_SUB = 0x40000033, + OPC_SW = 0x2023, + OPC_XOR = 0x4033, + OPC_XORI = 0x4013, + +#if TCG_TARGET_REG_BITS == 64 + OPC_ADDIW = 0x1b, + OPC_ADDW = 0x3b, + OPC_DIVUW = 0x200503b, + OPC_DIVW = 0x200403b, + OPC_MULW = 0x200003b, + OPC_REMUW = 0x200703b, + OPC_REMW = 0x200603b, + OPC_SLLIW = 0x101b, + OPC_SLLW = 0x103b, + OPC_SRAIW = 0x4000501b, + OPC_SRAW = 0x4000503b, + OPC_SRLIW = 0x501b, + OPC_SRLW = 0x503b, + OPC_SUBW = 0x4000003b, +#else + /* Simplify code throughout by defining aliases for RV32. */ + OPC_ADDIW = OPC_ADDI, + OPC_ADDW = OPC_ADD, + OPC_DIVUW = OPC_DIVU, + OPC_DIVW = OPC_DIV, + OPC_MULW = OPC_MUL, + OPC_REMUW = OPC_REMU, + OPC_REMW = OPC_REM, + OPC_SLLIW = OPC_SLLI, + OPC_SLLW = OPC_SLL, + OPC_SRAIW = OPC_SRAI, + OPC_SRAW = OPC_SRA, + OPC_SRLIW = OPC_SRLI, + OPC_SRLW = OPC_SRL, + OPC_SUBW = OPC_SUB, +#endif + + OPC_FENCE = 0x0000000f, +} RISCVInsn; + +/* + * RISC-V immediate and instruction encoders (excludes 16-bit RVC) + */ + +/* Type-R */ + +static int32_t encode_r(RISCVInsn opc, TCGReg rd, TCGReg rs1, TCGReg rs2) +{ + return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20; +} + +/* Type-I */ + +static int32_t encode_imm12(uint32_t imm) +{ + return (imm & 0xfff) << 20; +} + +static int32_t encode_i(RISCVInsn opc, TCGReg rd, TCGReg rs1, uint32_t imm) +{ + return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | encode_imm12(imm); +} + +/* Type-S */ + +static int32_t encode_simm12(uint32_t imm) +{ + int32_t ret = 0; + + ret |= (imm & 0xFE0) << 20; + ret |= (imm & 0x1F) << 7; + + return ret; +} + +static int32_t encode_s(RISCVInsn opc, TCGReg rs1, TCGReg rs2, uint32_t imm) +{ + return opc | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20 | encode_simm12(imm); +} + +/* Type-SB */ + +static int32_t encode_sbimm12(uint32_t imm) +{ + int32_t ret = 0; + + ret |= (imm & 0x1000) << 19; + ret |= (imm & 0x7e0) << 20; + ret |= (imm & 0x1e) << 7; + ret |= (imm & 0x800) >> 4; + + return ret; +} + +static int32_t encode_sb(RISCVInsn opc, TCGReg rs1, TCGReg rs2, uint32_t imm) +{ + return opc | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20 | encode_sbimm12(imm); +} + +/* Type-U */ + +static int32_t encode_uimm20(uint32_t imm) +{ + return imm & 0xfffff000; +} + +static int32_t encode_u(RISCVInsn opc, TCGReg rd, uint32_t imm) +{ + return opc | (rd & 0x1f) << 7 | encode_uimm20(imm); +} + +/* Type-UJ */ + +static int32_t encode_ujimm20(uint32_t imm) +{ + int32_t ret = 0; + + ret |= (imm & 0x0007fe) << (21 - 1); + ret |= (imm & 0x000800) << (20 - 11); + ret |= (imm & 0x0ff000) << (12 - 12); + ret |= (imm & 0x100000) << (31 - 20); + + return ret; +} + +static int32_t encode_uj(RISCVInsn opc, TCGReg rd, uint32_t imm) +{ + return opc | (rd & 0x1f) << 7 | encode_ujimm20(imm); +} + +/* + * RISC-V instruction emitters + */ + +static void tcg_out_opc_reg(TCGContext *s, RISCVInsn opc, + TCGReg rd, TCGReg rs1, TCGReg rs2) +{ + tcg_out32(s, encode_r(opc, rd, rs1, rs2)); +} + +static void tcg_out_opc_imm(TCGContext *s, RISCVInsn opc, + TCGReg rd, TCGReg rs1, TCGArg imm) +{ + tcg_out32(s, encode_i(opc, rd, rs1, imm)); +} + +static void tcg_out_opc_store(TCGContext *s, RISCVInsn opc, + TCGReg rs1, TCGReg rs2, uint32_t imm) +{ + tcg_out32(s, encode_s(opc, rs1, rs2, imm)); +} + +static void tcg_out_opc_branch(TCGContext *s, RISCVInsn opc, + TCGReg rs1, TCGReg rs2, uint32_t imm) +{ + tcg_out32(s, encode_sb(opc, rs1, rs2, imm)); +} + +static void tcg_out_opc_upper(TCGContext *s, RISCVInsn opc, + TCGReg rd, uint32_t imm) +{ + tcg_out32(s, encode_u(opc, rd, imm)); +} + +static void tcg_out_opc_jump(TCGContext *s, RISCVInsn opc, + TCGReg rd, uint32_t imm) +{ + tcg_out32(s, encode_uj(opc, rd, imm)); +} + +static void tcg_out_nop_fill(tcg_insn_unit *p, int count) +{ + int i; + for (i = 0; i < count; ++i) { + p[i] = encode_i(OPC_ADDI, TCG_REG_ZERO, TCG_REG_ZERO, 0); + } +} + +/* + * Relocations + */ + +static bool reloc_sbimm12(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +{ + intptr_t offset = (intptr_t)target - (intptr_t)code_ptr; + + if (offset == sextreg(offset, 1, 12) << 1) { + code_ptr[0] |= encode_sbimm12(offset); + return true; + } + + return false; +} + +static bool reloc_jimm20(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +{ + intptr_t offset = (intptr_t)target - (intptr_t)code_ptr; + + if (offset == sextreg(offset, 1, 20) << 1) { + code_ptr[0] |= encode_ujimm20(offset); + return true; + } + + return false; +} + +static bool reloc_call(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +{ + intptr_t offset = (intptr_t)target - (intptr_t)code_ptr; + int32_t lo = sextreg(offset, 0, 12); + int32_t hi = offset - lo; + + if (offset == hi + lo) { + code_ptr[0] |= encode_uimm20(hi); + code_ptr[1] |= encode_imm12(lo); + return true; + } + + return false; +} + +static bool patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) +{ + uint32_t insn = *code_ptr; + intptr_t diff; + bool short_jmp; + + tcg_debug_assert(addend == 0); + + switch (type) { + case R_RISCV_BRANCH: + diff = value - (uintptr_t)code_ptr; + short_jmp = diff == sextreg(diff, 0, 12); + if (short_jmp) { + return reloc_sbimm12(code_ptr, (tcg_insn_unit *)value); + } else { + /* Invert the condition */ + insn = insn ^ (1 << 12); + /* Clear the offset */ + insn &= 0x01fff07f; + /* Set the offset to the PC + 8 */ + insn |= encode_sbimm12(8); + + /* Move forward */ + code_ptr[0] = insn; + + /* Overwrite the NOP with jal x0,value */ + diff = value - (uintptr_t)(code_ptr + 1); + insn = encode_uj(OPC_JAL, TCG_REG_ZERO, diff); + code_ptr[1] = insn; + + return true; + } + break; + case R_RISCV_JAL: + return reloc_jimm20(code_ptr, (tcg_insn_unit *)value); + break; + case R_RISCV_CALL: + return reloc_call(code_ptr, (tcg_insn_unit *)value); + break; + default: + tcg_abort(); + } +} + +/* + * TCG intrinsics + */ + +static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) +{ + if (ret == arg) { + return; + } + switch (type) { + case TCG_TYPE_I32: + case TCG_TYPE_I64: + tcg_out_opc_imm(s, OPC_ADDI, ret, arg, 0); + break; + default: + g_assert_not_reached(); + } +} + +static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, + tcg_target_long val) +{ + tcg_target_long lo, hi, tmp; + int shift, ret; + + if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { + val = (int32_t)val; + } + + lo = sextreg(val, 0, 12); + if (val == lo) { + tcg_out_opc_imm(s, OPC_ADDI, rd, TCG_REG_ZERO, lo); + return; + } + + hi = val - lo; + if (TCG_TARGET_REG_BITS == 32 || val == (int32_t)val) { + tcg_out_opc_upper(s, OPC_LUI, rd, hi); + if (lo != 0) { + tcg_out_opc_imm(s, OPC_ADDIW, rd, rd, lo); + } + return; + } + + /* We can only be here if TCG_TARGET_REG_BITS != 32 */ + tmp = tcg_pcrel_diff(s, (void *)val); + if (tmp == (int32_t)tmp) { + tcg_out_opc_upper(s, OPC_AUIPC, rd, 0); + tcg_out_opc_imm(s, OPC_ADDI, rd, rd, 0); + ret = reloc_call(s->code_ptr - 2, (tcg_insn_unit *)val); + tcg_debug_assert(ret == true); + return; + } + + /* Look for a single 20-bit section. */ + shift = ctz64(val); + tmp = val >> shift; + if (tmp == sextreg(tmp, 0, 20)) { + tcg_out_opc_upper(s, OPC_LUI, rd, tmp << 12); + if (shift > 12) { + tcg_out_opc_imm(s, OPC_SLLI, rd, rd, shift - 12); + } else { + tcg_out_opc_imm(s, OPC_SRAI, rd, rd, 12 - shift); + } + return; + } + + /* Look for a few high zero bits, with lots of bits set in the middle. */ + shift = clz64(val); + tmp = val << shift; + if (tmp == sextreg(tmp, 12, 20) << 12) { + tcg_out_opc_upper(s, OPC_LUI, rd, tmp); + tcg_out_opc_imm(s, OPC_SRLI, rd, rd, shift); + return; + } else if (tmp == sextreg(tmp, 0, 12)) { + tcg_out_opc_imm(s, OPC_ADDI, rd, TCG_REG_ZERO, tmp); + tcg_out_opc_imm(s, OPC_SRLI, rd, rd, shift); + return; + } + + /* Drop into the constant pool. */ + new_pool_label(s, val, R_RISCV_CALL, s->code_ptr, 0); + tcg_out_opc_upper(s, OPC_AUIPC, rd, 0); + tcg_out_opc_imm(s, OPC_LD, rd, rd, 0); +} + +static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg) +{ + tcg_out_opc_imm(s, OPC_ANDI, ret, arg, 0xff); +} + +static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg) +{ + tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 16); + tcg_out_opc_imm(s, OPC_SRLIW, ret, ret, 16); +} + +static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg) +{ + tcg_out_opc_imm(s, OPC_SLLI, ret, arg, 32); + tcg_out_opc_imm(s, OPC_SRLI, ret, ret, 32); +} + +static void tcg_out_ext8s(TCGContext *s, TCGReg ret, TCGReg arg) +{ + tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 24); + tcg_out_opc_imm(s, OPC_SRAIW, ret, ret, 24); +} + +static void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg) +{ + tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 16); + tcg_out_opc_imm(s, OPC_SRAIW, ret, ret, 16); +} + +static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg) +{ + tcg_out_opc_imm(s, OPC_ADDIW, ret, arg, 0); +} + +static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data, + TCGReg addr, intptr_t offset) +{ + intptr_t imm12 = sextreg(offset, 0, 12); + + if (offset != imm12) { + intptr_t diff = offset - (uintptr_t)s->code_ptr; + + if (addr == TCG_REG_ZERO && diff == (int32_t)diff) { + imm12 = sextreg(diff, 0, 12); + tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP2, diff - imm12); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset - imm12); + if (addr != TCG_REG_ZERO) { + tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, addr); + } + } + addr = TCG_REG_TMP2; + } + + switch (opc) { + case OPC_SB: + case OPC_SH: + case OPC_SW: + case OPC_SD: + tcg_out_opc_store(s, opc, addr, data, imm12); + break; + case OPC_LB: + case OPC_LBU: + case OPC_LH: + case OPC_LHU: + case OPC_LW: + case OPC_LWU: + case OPC_LD: + tcg_out_opc_imm(s, opc, data, addr, imm12); + break; + default: + g_assert_not_reached(); + } +} + +static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + bool is32bit = (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32); + tcg_out_ldst(s, is32bit ? OPC_LW : OPC_LD, arg, arg1, arg2); +} + +static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) +{ + bool is32bit = (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32); + tcg_out_ldst(s, is32bit ? OPC_SW : OPC_SD, arg, arg1, arg2); +} + +static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, + TCGReg base, intptr_t ofs) +{ + if (val == 0) { + tcg_out_st(s, type, TCG_REG_ZERO, base, ofs); + return true; + } + return false; +} + +static void tcg_out_addsub2(TCGContext *s, + TCGReg rl, TCGReg rh, + TCGReg al, TCGReg ah, + TCGArg bl, TCGArg bh, + bool cbl, bool cbh, bool is_sub, bool is32bit) +{ + const RISCVInsn opc_add = is32bit ? OPC_ADDW : OPC_ADD; + const RISCVInsn opc_addi = is32bit ? OPC_ADDIW : OPC_ADDI; + const RISCVInsn opc_sub = is32bit ? OPC_SUBW : OPC_SUB; + TCGReg th = TCG_REG_TMP1; + + /* If we have a negative constant such that negating it would + make the high part zero, we can (usually) eliminate one insn. */ + if (cbl && cbh && bh == -1 && bl != 0) { + bl = -bl; + bh = 0; + is_sub = !is_sub; + } + + /* By operating on the high part first, we get to use the final + carry operation to move back from the temporary. */ + if (!cbh) { + tcg_out_opc_reg(s, (is_sub ? opc_sub : opc_add), th, ah, bh); + } else if (bh != 0 || ah == rl) { + tcg_out_opc_imm(s, opc_addi, th, ah, (is_sub ? -bh : bh)); + } else { + th = ah; + } + + /* Note that tcg optimization should eliminate the bl == 0 case. */ + if (is_sub) { + if (cbl) { + tcg_out_opc_imm(s, OPC_SLTIU, TCG_REG_TMP0, al, bl); + tcg_out_opc_imm(s, opc_addi, rl, al, -bl); + } else { + tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_TMP0, al, bl); + tcg_out_opc_reg(s, opc_sub, rl, al, bl); + } + tcg_out_opc_reg(s, opc_sub, rh, th, TCG_REG_TMP0); + } else { + if (cbl) { + tcg_out_opc_imm(s, opc_addi, rl, al, bl); + tcg_out_opc_imm(s, OPC_SLTIU, TCG_REG_TMP0, rl, bl); + } else if (rl == al && rl == bl) { + tcg_out_opc_imm(s, OPC_SLTI, TCG_REG_TMP0, al, 0); + tcg_out_opc_reg(s, opc_addi, rl, al, bl); + } else { + tcg_out_opc_reg(s, opc_add, rl, al, bl); + tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_TMP0, + rl, (rl == bl ? al : bl)); + } + tcg_out_opc_reg(s, opc_add, rh, th, TCG_REG_TMP0); + } +} + +static const struct { + RISCVInsn op; + bool swap; +} tcg_brcond_to_riscv[] = { + [TCG_COND_EQ] = { OPC_BEQ, false }, + [TCG_COND_NE] = { OPC_BNE, false }, + [TCG_COND_LT] = { OPC_BLT, false }, + [TCG_COND_GE] = { OPC_BGE, false }, + [TCG_COND_LE] = { OPC_BGE, true }, + [TCG_COND_GT] = { OPC_BLT, true }, + [TCG_COND_LTU] = { OPC_BLTU, false }, + [TCG_COND_GEU] = { OPC_BGEU, false }, + [TCG_COND_LEU] = { OPC_BGEU, true }, + [TCG_COND_GTU] = { OPC_BLTU, true } +}; + +static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, + TCGReg arg2, TCGLabel *l) +{ + RISCVInsn op = tcg_brcond_to_riscv[cond].op; + + tcg_debug_assert(op != 0); + + if (tcg_brcond_to_riscv[cond].swap) { + TCGReg t = arg1; + arg1 = arg2; + arg2 = t; + } + + if (l->has_value) { + intptr_t diff = tcg_pcrel_diff(s, l->u.value_ptr); + if (diff == sextreg(diff, 0, 12)) { + tcg_out_opc_branch(s, op, arg1, arg2, diff); + } else { + /* Invert the conditional branch. */ + tcg_out_opc_branch(s, op ^ (1 << 12), arg1, arg2, 8); + tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, diff - 4); + } + } else { + tcg_out_reloc(s, s->code_ptr, R_RISCV_BRANCH, l, 0); + tcg_out_opc_branch(s, op, arg1, arg2, 0); + /* NOP to allow patching later */ + tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_ZERO, TCG_REG_ZERO, 0); + } +} + +static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg arg1, TCGReg arg2) +{ + switch (cond) { + case TCG_COND_EQ: + tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2); + tcg_out_opc_imm(s, OPC_SLTIU, ret, ret, 1); + break; + case TCG_COND_NE: + tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2); + tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, ret); + break; + case TCG_COND_LT: + tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); + break; + case TCG_COND_GE: + tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); + tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); + break; + case TCG_COND_LE: + tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1); + tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); + break; + case TCG_COND_GT: + tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1); + break; + case TCG_COND_LTU: + tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); + break; + case TCG_COND_GEU: + tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); + tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); + break; + case TCG_COND_LEU: + tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1); + tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); + break; + case TCG_COND_GTU: + tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1); + break; + default: + g_assert_not_reached(); + break; + } +} + +static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, + TCGReg bl, TCGReg bh, TCGLabel *l) +{ + /* todo */ + g_assert_not_reached(); +} + +static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh) +{ + /* todo */ + g_assert_not_reached(); +} + +static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target) +{ + ptrdiff_t offset = tcg_pcrel_diff(s, target); + tcg_debug_assert(offset == sextreg(offset, 1, 20) << 1); + tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, offset); +} + +static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail) +{ + TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA; + ptrdiff_t offset = tcg_pcrel_diff(s, arg); + int ret; + + if (offset == sextreg(offset, 1, 20) << 1) { + /* short jump: -2097150 to 2097152 */ + tcg_out_opc_jump(s, OPC_JAL, link, offset); + } else if (TCG_TARGET_REG_BITS == 32 || + offset == sextreg(offset, 1, 31) << 1) { + /* long jump: -2147483646 to 2147483648 */ + tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP0, 0); + tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, 0); + ret = reloc_call(s->code_ptr - 2, arg);\ + tcg_debug_assert(ret == true); + } else if (TCG_TARGET_REG_BITS == 64) { + /* far jump: 64-bit */ + tcg_target_long imm = sextreg((tcg_target_long)arg, 0, 12); + tcg_target_long base = (tcg_target_long)arg - imm; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, base); + tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, imm); + } else { + g_assert_not_reached(); + } +} + +static void tcg_out_call(TCGContext *s, tcg_insn_unit *arg) +{ + tcg_out_call_int(s, arg, false); +} + +static void tcg_out_mb(TCGContext *s, TCGArg a0) +{ + tcg_insn_unit insn = OPC_FENCE; + + if (a0 & TCG_MO_LD_LD) { + insn |= 0x02200000; + } + if (a0 & TCG_MO_ST_LD) { + insn |= 0x01200000; + } + if (a0 & TCG_MO_LD_ST) { + insn |= 0x02100000; + } + if (a0 & TCG_MO_ST_ST) { + insn |= 0x02200000; + } + tcg_out32(s, insn); +} + +/* + * Load/store and TLB + */ + +#if defined(CONFIG_SOFTMMU) +#include "tcg-ldst.inc.c" + +/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, + * TCGMemOpIdx oi, uintptr_t ra) + */ +static void * const qemu_ld_helpers[16] = { + [MO_UB] = helper_ret_ldub_mmu, + [MO_SB] = helper_ret_ldsb_mmu, + [MO_LEUW] = helper_le_lduw_mmu, + [MO_LESW] = helper_le_ldsw_mmu, + [MO_LEUL] = helper_le_ldul_mmu, +#if TCG_TARGET_REG_BITS == 64 + [MO_LESL] = helper_le_ldsl_mmu, +#endif + [MO_LEQ] = helper_le_ldq_mmu, + [MO_BEUW] = helper_be_lduw_mmu, + [MO_BESW] = helper_be_ldsw_mmu, + [MO_BEUL] = helper_be_ldul_mmu, +#if TCG_TARGET_REG_BITS == 64 + [MO_BESL] = helper_be_ldsl_mmu, +#endif + [MO_BEQ] = helper_be_ldq_mmu, +}; + +/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, + * uintxx_t val, TCGMemOpIdx oi, + * uintptr_t ra) + */ +static void * const qemu_st_helpers[16] = { + [MO_UB] = helper_ret_stb_mmu, + [MO_LEUW] = helper_le_stw_mmu, + [MO_LEUL] = helper_le_stl_mmu, + [MO_LEQ] = helper_le_stq_mmu, + [MO_BEUW] = helper_be_stw_mmu, + [MO_BEUL] = helper_be_stl_mmu, + [MO_BEQ] = helper_be_stq_mmu, +}; + +static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl, + TCGReg addrh, TCGMemOpIdx oi, + tcg_insn_unit **label_ptr, bool is_load) +{ + TCGMemOp opc = get_memop(oi); + unsigned s_bits = opc & MO_SIZE; + unsigned a_bits = get_alignment_bits(opc); + target_ulong mask; + int mem_index = get_mmuidx(oi); + int cmp_off + = (is_load + ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); + int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); + RISCVInsn load_cmp_op = (TARGET_LONG_BITS == 64 ? OPC_LD : + TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW); + RISCVInsn load_add_op = TCG_TARGET_REG_BITS == 64 ? OPC_LD : OPC_LW; + TCGReg base = TCG_AREG0; + + /* We don't support oversize guests */ + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { + g_assert_not_reached(); + } + + /* We don't support unaligned accesses. */ + if (a_bits < s_bits) { + a_bits = s_bits; + } + mask = (target_ulong)TARGET_PAGE_MASK | ((1 << a_bits) - 1); + + + /* Compensate for very large offsets. */ + if (add_off >= 0x1000) { + int adj; + base = TCG_REG_TMP2; + if (cmp_off <= 2 * 0xfff) { + adj = 0xfff; + tcg_out_opc_imm(s, OPC_ADDI, base, TCG_AREG0, adj); + } else { + adj = cmp_off - sextreg(cmp_off, 0, 12); + tcg_debug_assert(add_off - adj >= -0x1000 + && add_off - adj < 0x1000); + + tcg_out_opc_upper(s, OPC_LUI, base, adj); + tcg_out_opc_reg(s, OPC_ADD, base, base, TCG_AREG0); + } + add_off -= adj; + cmp_off -= adj; + } + + /* Extract the page index. */ + if (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS < 12) { + tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP0, addrl, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP0, TCG_REG_TMP0, + MAKE_64BIT_MASK(CPU_TLB_ENTRY_BITS, CPU_TLB_BITS)); + } else if (TARGET_PAGE_BITS >= 12) { + tcg_out_opc_upper(s, OPC_LUI, TCG_REG_TMP0, + MAKE_64BIT_MASK(TARGET_PAGE_BITS, CPU_TLB_BITS)); + tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP0, TCG_REG_TMP0, addrl); + tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP0, TCG_REG_TMP0, + CPU_TLB_BITS - CPU_TLB_ENTRY_BITS); + } else { + tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP0, addrl, TARGET_PAGE_BITS); + tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP0, TCG_REG_TMP0, + MAKE_64BIT_MASK(0, CPU_TLB_BITS)); + tcg_out_opc_imm(s, OPC_SLLI, TCG_REG_TMP0, TCG_REG_TMP0, + CPU_TLB_ENTRY_BITS); + } + + /* Add that to the base address to index the tlb. */ + tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, base, TCG_REG_TMP0); + base = TCG_REG_TMP2; + + /* Load the tlb comparator and the addend. */ + tcg_out_ldst(s, load_cmp_op, TCG_REG_TMP0, base, cmp_off); + tcg_out_ldst(s, load_add_op, TCG_REG_TMP2, base, add_off); + + /* Clear the non-page, non-alignment bits from the address. */ + if (mask == sextreg(mask, 0, 12)) { + tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addrl, mask); + } else { + tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP1, mask); + tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addrl); + } + + /* Compare masked address with the TLB entry. */ + label_ptr[0] = s->code_ptr; + tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0); + /* NOP to allow patching later */ + tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_ZERO, TCG_REG_ZERO, 0); + /* TODO: Move this out of line + * see: + * https://lists.nongnu.org/archive/html/qemu-devel/2018-11/msg02234.html + */ + + /* TLB Hit - translate address using addend. */ + if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { + tcg_out_ext32u(s, TCG_REG_TMP0, addrl); + addrl = TCG_REG_TMP0; + } + tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addrl); +} + +static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi, + TCGType ext, + TCGReg datalo, TCGReg datahi, + TCGReg addrlo, TCGReg addrhi, + void *raddr, tcg_insn_unit **label_ptr) +{ + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->oi = oi; + label->type = ext; + label->datalo_reg = datalo; + label->datahi_reg = datahi; + label->addrlo_reg = addrlo; + label->addrhi_reg = addrhi; + label->raddr = raddr; + label->label_ptr[0] = label_ptr[0]; +} + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + TCGMemOpIdx oi = l->oi; + TCGMemOp opc = get_memop(oi); + TCGReg a0 = tcg_target_call_iarg_regs[0]; + TCGReg a1 = tcg_target_call_iarg_regs[1]; + TCGReg a2 = tcg_target_call_iarg_regs[2]; + TCGReg a3 = tcg_target_call_iarg_regs[3]; + + /* We don't support oversize guests */ + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { + g_assert_not_reached(); + } + + /* resolve label address */ + patch_reloc(l->label_ptr[0], R_RISCV_BRANCH, (intptr_t) s->code_ptr, 0); + + /* call load helper */ + tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0); + tcg_out_mov(s, TCG_TYPE_PTR, a1, l->addrlo_reg); + tcg_out_movi(s, TCG_TYPE_PTR, a2, oi); + tcg_out_movi(s, TCG_TYPE_PTR, a3, (tcg_target_long)l->raddr); + + tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]); + tcg_out_mov(s, (opc & MO_SIZE) == MO_64, l->datalo_reg, a0); + + tcg_out_goto(s, l->raddr); +} + +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + TCGMemOpIdx oi = l->oi; + TCGMemOp opc = get_memop(oi); + TCGMemOp s_bits = opc & MO_SIZE; + TCGReg a0 = tcg_target_call_iarg_regs[0]; + TCGReg a1 = tcg_target_call_iarg_regs[1]; + TCGReg a2 = tcg_target_call_iarg_regs[2]; + TCGReg a3 = tcg_target_call_iarg_regs[3]; + TCGReg a4 = tcg_target_call_iarg_regs[4]; + + /* We don't support oversize guests */ + if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { + g_assert_not_reached(); + } + + /* resolve label address */ + patch_reloc(l->label_ptr[0], R_RISCV_BRANCH, (intptr_t) s->code_ptr, 0); + + /* call store helper */ + tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0); + tcg_out_mov(s, TCG_TYPE_PTR, a1, l->addrlo_reg); + tcg_out_mov(s, TCG_TYPE_PTR, a2, l->datalo_reg); + switch (s_bits) { + case MO_8: + tcg_out_ext8u(s, a2, a2); + break; + case MO_16: + tcg_out_ext16u(s, a2, a2); + break; + default: + break; + } + tcg_out_movi(s, TCG_TYPE_PTR, a3, oi); + tcg_out_movi(s, TCG_TYPE_PTR, a4, (tcg_target_long)l->raddr); + + tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SSIZE)]); + + tcg_out_goto(s, l->raddr); +} +#endif /* CONFIG_SOFTMMU */ + +static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi, + TCGReg base, TCGMemOp opc, bool is_64) +{ + const TCGMemOp bswap = opc & MO_BSWAP; + + /* We don't yet handle byteswapping, assert */ + g_assert(!bswap); + + switch (opc & (MO_SSIZE)) { + case MO_UB: + tcg_out_opc_imm(s, OPC_LBU, lo, base, 0); + break; + case MO_SB: + tcg_out_opc_imm(s, OPC_LB, lo, base, 0); + break; + case MO_UW: + tcg_out_opc_imm(s, OPC_LHU, lo, base, 0); + break; + case MO_SW: + tcg_out_opc_imm(s, OPC_LH, lo, base, 0); + break; + case MO_UL: + if (TCG_TARGET_REG_BITS == 64 && is_64) { + tcg_out_opc_imm(s, OPC_LWU, lo, base, 0); + break; + } + /* FALLTHRU */ + case MO_SL: + tcg_out_opc_imm(s, OPC_LW, lo, base, 0); + break; + case MO_Q: + /* Prefer to load from offset 0 first, but allow for overlap. */ + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_opc_imm(s, OPC_LD, lo, base, 0); + } else if (lo != base) { + tcg_out_opc_imm(s, OPC_LW, lo, base, 0); + tcg_out_opc_imm(s, OPC_LW, hi, base, 4); + } else { + tcg_out_opc_imm(s, OPC_LW, hi, base, 4); + tcg_out_opc_imm(s, OPC_LW, lo, base, 0); + } + break; + default: + g_assert_not_reached(); + } +} + +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) +{ + TCGReg addr_regl, addr_regh __attribute__((unused)); + TCGReg data_regl, data_regh; + TCGMemOpIdx oi; + TCGMemOp opc; +#if defined(CONFIG_SOFTMMU) + tcg_insn_unit *label_ptr[1]; +#endif + TCGReg base = TCG_REG_TMP0; + + data_regl = *args++; + data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0); + addr_regl = *args++; + addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0); + oi = *args++; + opc = get_memop(oi); + +#if defined(CONFIG_SOFTMMU) + tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 1); + tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64); + add_qemu_ldst_label(s, 1, oi, + (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32), + data_regl, data_regh, addr_regl, addr_regh, + s->code_ptr, label_ptr); +#else + if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { + tcg_out_ext32u(s, base, addr_regl); + addr_regl = base; + } + + if (guest_base == 0) { + tcg_out_opc_reg(s, OPC_ADD, base, addr_regl, TCG_REG_ZERO); + } else { + tcg_out_opc_reg(s, OPC_ADD, base, TCG_GUEST_BASE_REG, addr_regl); + } + tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64); +#endif +} + +static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi, + TCGReg base, TCGMemOp opc) +{ + const TCGMemOp bswap = opc & MO_BSWAP; + + /* We don't yet handle byteswapping, assert */ + g_assert(!bswap); + + switch (opc & (MO_SSIZE)) { + case MO_8: + tcg_out_opc_store(s, OPC_SB, base, lo, 0); + break; + case MO_16: + tcg_out_opc_store(s, OPC_SH, base, lo, 0); + break; + case MO_32: + tcg_out_opc_store(s, OPC_SW, base, lo, 0); + break; + case MO_64: + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_opc_store(s, OPC_SD, base, lo, 0); + } else { + tcg_out_opc_store(s, OPC_SW, base, lo, 0); + tcg_out_opc_store(s, OPC_SW, base, hi, 4); + } + break; + default: + g_assert_not_reached(); + } +} + +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) +{ + TCGReg addr_regl, addr_regh __attribute__((unused)); + TCGReg data_regl, data_regh; + TCGMemOpIdx oi; + TCGMemOp opc; +#if defined(CONFIG_SOFTMMU) + tcg_insn_unit *label_ptr[1]; +#endif + TCGReg base = TCG_REG_TMP0; + + data_regl = *args++; + data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0); + addr_regl = *args++; + addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0); + oi = *args++; + opc = get_memop(oi); + +#if defined(CONFIG_SOFTMMU) + tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 0); + tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); + add_qemu_ldst_label(s, 0, oi, + (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32), + data_regl, data_regh, addr_regl, addr_regh, + s->code_ptr, label_ptr); +#else + if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { + tcg_out_ext32u(s, base, addr_regl); + addr_regl = base; + } + + if (guest_base == 0) { + tcg_out_opc_reg(s, OPC_ADD, base, addr_regl, TCG_REG_ZERO); + } else { + tcg_out_opc_reg(s, OPC_ADD, base, TCG_GUEST_BASE_REG, addr_regl); + } + tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); +#endif +} + +static tcg_insn_unit *tb_ret_addr; + +static void tcg_out_op(TCGContext *s, TCGOpcode opc, + const TCGArg *args, const int *const_args) +{ + TCGArg a0 = args[0]; + TCGArg a1 = args[1]; + TCGArg a2 = args[2]; + int c2 = const_args[2]; + + switch (opc) { + case INDEX_op_exit_tb: + /* Reuse the zeroing that exists for goto_ptr. */ + if (a0 == 0) { + tcg_out_call_int(s, s->code_gen_epilogue, true); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0); + tcg_out_call_int(s, tb_ret_addr, true); + } + break; + + case INDEX_op_goto_tb: + assert(s->tb_jmp_insn_offset == 0); + /* indirect jump method */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO, + (uintptr_t)(s->tb_jmp_target_addr + a0)); + tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_TMP0, 0); + set_jmp_reset_offset(s, a0); + break; + + case INDEX_op_goto_ptr: + tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, a0, 0); + break; + + case INDEX_op_br: + tcg_out_reloc(s, s->code_ptr, R_RISCV_JAL, arg_label(a0), 0); + tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0); + break; + + case INDEX_op_ld8u_i32: + case INDEX_op_ld8u_i64: + tcg_out_ldst(s, OPC_LBU, a0, a1, a2); + break; + case INDEX_op_ld8s_i32: + case INDEX_op_ld8s_i64: + tcg_out_ldst(s, OPC_LB, a0, a1, a2); + break; + case INDEX_op_ld16u_i32: + case INDEX_op_ld16u_i64: + tcg_out_ldst(s, OPC_LHU, a0, a1, a2); + break; + case INDEX_op_ld16s_i32: + case INDEX_op_ld16s_i64: + tcg_out_ldst(s, OPC_LH, a0, a1, a2); + break; + case INDEX_op_ld32u_i64: + tcg_out_ldst(s, OPC_LWU, a0, a1, a2); + break; + case INDEX_op_ld_i32: + case INDEX_op_ld32s_i64: + tcg_out_ldst(s, OPC_LW, a0, a1, a2); + break; + case INDEX_op_ld_i64: + tcg_out_ldst(s, OPC_LD, a0, a1, a2); + break; + + case INDEX_op_st8_i32: + case INDEX_op_st8_i64: + tcg_out_ldst(s, OPC_SB, a0, a1, a2); + break; + case INDEX_op_st16_i32: + case INDEX_op_st16_i64: + tcg_out_ldst(s, OPC_SH, a0, a1, a2); + break; + case INDEX_op_st_i32: + case INDEX_op_st32_i64: + tcg_out_ldst(s, OPC_SW, a0, a1, a2); + break; + case INDEX_op_st_i64: + tcg_out_ldst(s, OPC_SD, a0, a1, a2); + break; + + case INDEX_op_add_i32: + if (c2) { + tcg_out_opc_imm(s, OPC_ADDIW, a0, a1, a2); + } else { + tcg_out_opc_reg(s, OPC_ADDW, a0, a1, a2); + } + break; + case INDEX_op_add_i64: + if (c2) { + tcg_out_opc_imm(s, OPC_ADDI, a0, a1, a2); + } else { + tcg_out_opc_reg(s, OPC_ADD, a0, a1, a2); + } + break; + + case INDEX_op_sub_i32: + if (c2) { + tcg_out_opc_imm(s, OPC_ADDIW, a0, a1, -a2); + } else { + tcg_out_opc_reg(s, OPC_SUBW, a0, a1, a2); + } + break; + case INDEX_op_sub_i64: + if (c2) { + tcg_out_opc_imm(s, OPC_ADDI, a0, a1, -a2); + } else { + tcg_out_opc_reg(s, OPC_SUB, a0, a1, a2); + } + break; + + case INDEX_op_and_i32: + case INDEX_op_and_i64: + if (c2) { + tcg_out_opc_imm(s, OPC_ANDI, a0, a1, a2); + } else { + tcg_out_opc_reg(s, OPC_AND, a0, a1, a2); + } + break; + + case INDEX_op_or_i32: + case INDEX_op_or_i64: + if (c2) { + tcg_out_opc_imm(s, OPC_ORI, a0, a1, a2); + } else { + tcg_out_opc_reg(s, OPC_OR, a0, a1, a2); + } + break; + + case INDEX_op_xor_i32: + case INDEX_op_xor_i64: + if (c2) { + tcg_out_opc_imm(s, OPC_XORI, a0, a1, a2); + } else { + tcg_out_opc_reg(s, OPC_XOR, a0, a1, a2); + } + break; + + case INDEX_op_not_i32: + case INDEX_op_not_i64: + tcg_out_opc_imm(s, OPC_XORI, a0, a1, -1); + break; + + case INDEX_op_neg_i32: + tcg_out_opc_reg(s, OPC_SUBW, a0, TCG_REG_ZERO, a1); + break; + case INDEX_op_neg_i64: + tcg_out_opc_reg(s, OPC_SUB, a0, TCG_REG_ZERO, a1); + break; + + case INDEX_op_mul_i32: + tcg_out_opc_reg(s, OPC_MULW, a0, a1, a2); + break; + case INDEX_op_mul_i64: + tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2); + break; + + case INDEX_op_div_i32: + tcg_out_opc_reg(s, OPC_DIVW, a0, a1, a2); + break; + case INDEX_op_div_i64: + tcg_out_opc_reg(s, OPC_DIV, a0, a1, a2); + break; + + case INDEX_op_divu_i32: + tcg_out_opc_reg(s, OPC_DIVUW, a0, a1, a2); + break; + case INDEX_op_divu_i64: + tcg_out_opc_reg(s, OPC_DIVU, a0, a1, a2); + break; + + case INDEX_op_rem_i32: + tcg_out_opc_reg(s, OPC_REMW, a0, a1, a2); + break; + case INDEX_op_rem_i64: + tcg_out_opc_reg(s, OPC_REM, a0, a1, a2); + break; + + case INDEX_op_remu_i32: + tcg_out_opc_reg(s, OPC_REMUW, a0, a1, a2); + break; + case INDEX_op_remu_i64: + tcg_out_opc_reg(s, OPC_REMU, a0, a1, a2); + break; + + case INDEX_op_shl_i32: + if (c2) { + tcg_out_opc_imm(s, OPC_SLLIW, a0, a1, a2); + } else { + tcg_out_opc_reg(s, OPC_SLLW, a0, a1, a2); + } + break; + case INDEX_op_shl_i64: + if (c2) { + tcg_out_opc_imm(s, OPC_SLLI, a0, a1, a2); + } else { + tcg_out_opc_reg(s, OPC_SLL, a0, a1, a2); + } + break; + + case INDEX_op_shr_i32: + if (c2) { + tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2); + } else { + tcg_out_opc_reg(s, OPC_SRLW, a0, a1, a2); + } + break; + case INDEX_op_shr_i64: + if (c2) { + tcg_out_opc_imm(s, OPC_SRLI, a0, a1, a2); + } else { + tcg_out_opc_reg(s, OPC_SRL, a0, a1, a2); + } + break; + + case INDEX_op_sar_i32: + if (c2) { + tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2); + } else { + tcg_out_opc_reg(s, OPC_SRAW, a0, a1, a2); + } + break; + case INDEX_op_sar_i64: + if (c2) { + tcg_out_opc_imm(s, OPC_SRAI, a0, a1, a2); + } else { + tcg_out_opc_reg(s, OPC_SRA, a0, a1, a2); + } + break; + + case INDEX_op_add2_i32: + tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], + const_args[4], const_args[5], false, true); + break; + case INDEX_op_add2_i64: + tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], + const_args[4], const_args[5], false, false); + break; + case INDEX_op_sub2_i32: + tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], + const_args[4], const_args[5], true, true); + break; + case INDEX_op_sub2_i64: + tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], + const_args[4], const_args[5], true, false); + break; + + case INDEX_op_brcond_i32: + case INDEX_op_brcond_i64: + tcg_out_brcond(s, a2, a0, a1, arg_label(args[3])); + break; + case INDEX_op_brcond2_i32: + tcg_out_brcond2(s, args[4], a0, a1, a2, args[3], arg_label(args[5])); + break; + + case INDEX_op_setcond_i32: + case INDEX_op_setcond_i64: + tcg_out_setcond(s, args[3], a0, a1, a2); + break; + case INDEX_op_setcond2_i32: + tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]); + break; + + case INDEX_op_qemu_ld_i32: + tcg_out_qemu_ld(s, args, false); + break; + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, args, true); + break; + case INDEX_op_qemu_st_i32: + tcg_out_qemu_st(s, args, false); + break; + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, args, true); + break; + + case INDEX_op_ext8u_i32: + case INDEX_op_ext8u_i64: + tcg_out_ext8u(s, a0, a1); + break; + + case INDEX_op_ext16u_i32: + case INDEX_op_ext16u_i64: + tcg_out_ext16u(s, a0, a1); + break; + + case INDEX_op_ext32u_i64: + case INDEX_op_extu_i32_i64: + tcg_out_ext32u(s, a0, a1); + break; + + case INDEX_op_ext8s_i32: + case INDEX_op_ext8s_i64: + tcg_out_ext8s(s, a0, a1); + break; + + case INDEX_op_ext16s_i32: + case INDEX_op_ext16s_i64: + tcg_out_ext16s(s, a0, a1); + break; + + case INDEX_op_ext32s_i64: + case INDEX_op_extrl_i64_i32: + case INDEX_op_ext_i32_i64: + tcg_out_ext32s(s, a0, a1); + break; + + case INDEX_op_extrh_i64_i32: + tcg_out_opc_imm(s, OPC_SRAI, a0, a1, 32); + break; + + case INDEX_op_mulsh_i32: + case INDEX_op_mulsh_i64: + tcg_out_opc_reg(s, OPC_MULH, a0, a1, a2); + break; + + case INDEX_op_muluh_i32: + case INDEX_op_muluh_i64: + tcg_out_opc_reg(s, OPC_MULHU, a0, a1, a2); + break; + + case INDEX_op_mb: + tcg_out_mb(s, a0); + break; + + case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ + case INDEX_op_mov_i64: + case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ + case INDEX_op_movi_i64: + case INDEX_op_call: /* Always emitted via tcg_out_call. */ + default: + g_assert_not_reached(); + } +} + +static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) +{ + static const TCGTargetOpDef r + = { .args_ct_str = { "r" } }; + static const TCGTargetOpDef r_r + = { .args_ct_str = { "r", "r" } }; + static const TCGTargetOpDef rZ_r + = { .args_ct_str = { "rZ", "r" } }; + static const TCGTargetOpDef rZ_rZ + = { .args_ct_str = { "rZ", "rZ" } }; + static const TCGTargetOpDef rZ_rZ_rZ_rZ + = { .args_ct_str = { "rZ", "rZ", "rZ", "rZ" } }; + static const TCGTargetOpDef r_r_ri + = { .args_ct_str = { "r", "r", "ri" } }; + static const TCGTargetOpDef r_r_rI + = { .args_ct_str = { "r", "r", "rI" } }; + static const TCGTargetOpDef r_rZ_rN + = { .args_ct_str = { "r", "rZ", "rN" } }; + static const TCGTargetOpDef r_rZ_rZ + = { .args_ct_str = { "r", "rZ", "rZ" } }; + static const TCGTargetOpDef r_rZ_rZ_rZ_rZ + = { .args_ct_str = { "r", "rZ", "rZ", "rZ", "rZ" } }; + static const TCGTargetOpDef r_L + = { .args_ct_str = { "r", "L" } }; + static const TCGTargetOpDef r_r_L + = { .args_ct_str = { "r", "r", "L" } }; + static const TCGTargetOpDef r_L_L + = { .args_ct_str = { "r", "L", "L" } }; + static const TCGTargetOpDef r_r_L_L + = { .args_ct_str = { "r", "r", "L", "L" } }; + static const TCGTargetOpDef LZ_L + = { .args_ct_str = { "LZ", "L" } }; + static const TCGTargetOpDef LZ_L_L + = { .args_ct_str = { "LZ", "L", "L" } }; + static const TCGTargetOpDef LZ_LZ_L + = { .args_ct_str = { "LZ", "LZ", "L" } }; + static const TCGTargetOpDef LZ_LZ_L_L + = { .args_ct_str = { "LZ", "LZ", "L", "L" } }; + static const TCGTargetOpDef r_r_rZ_rZ_rM_rM + = { .args_ct_str = { "r", "r", "rZ", "rZ", "rM", "rM" } }; + + switch (op) { + case INDEX_op_goto_ptr: + return &r; + + case INDEX_op_ld8u_i32: + case INDEX_op_ld8s_i32: + case INDEX_op_ld16u_i32: + case INDEX_op_ld16s_i32: + case INDEX_op_ld_i32: + case INDEX_op_not_i32: + case INDEX_op_neg_i32: + case INDEX_op_ld8u_i64: + case INDEX_op_ld8s_i64: + case INDEX_op_ld16u_i64: + case INDEX_op_ld16s_i64: + case INDEX_op_ld32s_i64: + case INDEX_op_ld32u_i64: + case INDEX_op_ld_i64: + case INDEX_op_not_i64: + case INDEX_op_neg_i64: + case INDEX_op_ext8u_i32: + case INDEX_op_ext8u_i64: + case INDEX_op_ext16u_i32: + case INDEX_op_ext16u_i64: + case INDEX_op_ext32u_i64: + case INDEX_op_extu_i32_i64: + case INDEX_op_ext8s_i32: + case INDEX_op_ext8s_i64: + case INDEX_op_ext16s_i32: + case INDEX_op_ext16s_i64: + case INDEX_op_ext32s_i64: + case INDEX_op_extrl_i64_i32: + case INDEX_op_extrh_i64_i32: + case INDEX_op_ext_i32_i64: + return &r_r; + + case INDEX_op_st8_i32: + case INDEX_op_st16_i32: + case INDEX_op_st_i32: + case INDEX_op_st8_i64: + case INDEX_op_st16_i64: + case INDEX_op_st32_i64: + case INDEX_op_st_i64: + return &rZ_r; + + case INDEX_op_add_i32: + case INDEX_op_and_i32: + case INDEX_op_or_i32: + case INDEX_op_xor_i32: + case INDEX_op_add_i64: + case INDEX_op_and_i64: + case INDEX_op_or_i64: + case INDEX_op_xor_i64: + return &r_r_rI; + + case INDEX_op_sub_i32: + case INDEX_op_sub_i64: + return &r_rZ_rN; + + case INDEX_op_mul_i32: + case INDEX_op_mulsh_i32: + case INDEX_op_muluh_i32: + case INDEX_op_div_i32: + case INDEX_op_divu_i32: + case INDEX_op_rem_i32: + case INDEX_op_remu_i32: + case INDEX_op_setcond_i32: + case INDEX_op_mul_i64: + case INDEX_op_mulsh_i64: + case INDEX_op_muluh_i64: + case INDEX_op_div_i64: + case INDEX_op_divu_i64: + case INDEX_op_rem_i64: + case INDEX_op_remu_i64: + case INDEX_op_setcond_i64: + return &r_rZ_rZ; + + case INDEX_op_shl_i32: + case INDEX_op_shr_i32: + case INDEX_op_sar_i32: + case INDEX_op_shl_i64: + case INDEX_op_shr_i64: + case INDEX_op_sar_i64: + return &r_r_ri; + + case INDEX_op_brcond_i32: + case INDEX_op_brcond_i64: + return &rZ_rZ; + + case INDEX_op_add2_i32: + case INDEX_op_add2_i64: + case INDEX_op_sub2_i32: + case INDEX_op_sub2_i64: + return &r_r_rZ_rZ_rM_rM; + + case INDEX_op_brcond2_i32: + return &rZ_rZ_rZ_rZ; + + case INDEX_op_setcond2_i32: + return &r_rZ_rZ_rZ_rZ; + + case INDEX_op_qemu_ld_i32: + return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_L : &r_L_L; + case INDEX_op_qemu_st_i32: + return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &LZ_L : &LZ_L_L; + case INDEX_op_qemu_ld_i64: + return TCG_TARGET_REG_BITS == 64 ? &r_L + : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_r_L + : &r_r_L_L; + case INDEX_op_qemu_st_i64: + return TCG_TARGET_REG_BITS == 64 ? &LZ_L + : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &LZ_LZ_L + : &LZ_LZ_L_L; + + default: + return NULL; + } +} + +static const int tcg_target_callee_save_regs[] = { + TCG_REG_S0, /* used for the global env (TCG_AREG0) */ + TCG_REG_S1, + TCG_REG_S2, + TCG_REG_S3, + TCG_REG_S4, + TCG_REG_S5, + TCG_REG_S6, + TCG_REG_S7, + TCG_REG_S8, + TCG_REG_S9, + TCG_REG_S10, + TCG_REG_S11, + TCG_REG_RA, /* should be last for ABI compliance */ +}; + +/* Stack frame parameters. */ +#define REG_SIZE (TCG_TARGET_REG_BITS / 8) +#define SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * REG_SIZE) +#define TEMP_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) +#define FRAME_SIZE ((TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE + SAVE_SIZE \ + + TCG_TARGET_STACK_ALIGN - 1) \ + & -TCG_TARGET_STACK_ALIGN) +#define SAVE_OFS (TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE) + +/* We're expecting to be able to use an immediate for frame allocation. */ +QEMU_BUILD_BUG_ON(FRAME_SIZE > 0x7ff); + +/* Generate global QEMU prologue and epilogue code */ +static void tcg_target_qemu_prologue(TCGContext *s) +{ + int i; + + tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, TEMP_SIZE); + + /* TB prologue */ + tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_SP, TCG_REG_SP, -FRAME_SIZE); + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { + tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], + TCG_REG_SP, SAVE_OFS + i * REG_SIZE); + } + +#if !defined(CONFIG_SOFTMMU) + tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); + tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); +#endif + + /* Call generated code */ + tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); + tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, tcg_target_call_iarg_regs[1], 0); + + /* Return path for goto_ptr. Set return value to 0 */ + s->code_gen_epilogue = s->code_ptr; + tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_A0, TCG_REG_ZERO); + + /* TB epilogue */ + tb_ret_addr = s->code_ptr; + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { + tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], + TCG_REG_SP, SAVE_OFS + i * REG_SIZE); + } + + tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_SP, TCG_REG_SP, FRAME_SIZE); + tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_RA, 0); +} + +static void tcg_target_init(TCGContext *s) +{ + tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; + if (TCG_TARGET_REG_BITS == 64) { + tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff; + } + + tcg_target_call_clobber_regs = -1u; + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S0); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S1); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S2); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S3); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S4); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S5); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S6); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S7); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S8); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S10); + tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S11); + + s->reserved_regs = 0; + tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TP); +} + +typedef struct { + DebugFrameHeader h; + uint8_t fde_def_cfa[4]; + uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2]; +} DebugFrame; + +#define ELF_HOST_MACHINE EM_RISCV + +static const DebugFrame debug_frame = { + .h.cie.len = sizeof(DebugFrameCIE) - 4, /* length after .len member */ + .h.cie.id = -1, + .h.cie.version = 1, + .h.cie.code_align = 1, + .h.cie.data_align = -(TCG_TARGET_REG_BITS / 8) & 0x7f, /* sleb128 */ + .h.cie.return_column = TCG_REG_RA, + + /* Total FDE size does not include the "len" member. */ + .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), + + .fde_def_cfa = { + 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */ + (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ + (FRAME_SIZE >> 7) + }, + .fde_reg_ofs = { + 0x80 + 9, 12, /* DW_CFA_offset, s1, -96 */ + 0x80 + 18, 11, /* DW_CFA_offset, s2, -88 */ + 0x80 + 19, 10, /* DW_CFA_offset, s3, -80 */ + 0x80 + 20, 9, /* DW_CFA_offset, s4, -72 */ + 0x80 + 21, 8, /* DW_CFA_offset, s5, -64 */ + 0x80 + 22, 7, /* DW_CFA_offset, s6, -56 */ + 0x80 + 23, 6, /* DW_CFA_offset, s7, -48 */ + 0x80 + 24, 5, /* DW_CFA_offset, s8, -40 */ + 0x80 + 25, 4, /* DW_CFA_offset, s9, -32 */ + 0x80 + 26, 3, /* DW_CFA_offset, s10, -24 */ + 0x80 + 27, 2, /* DW_CFA_offset, s11, -16 */ + 0x80 + 1 , 1, /* DW_CFA_offset, ra, -8 */ + } +}; + +void tcg_register_jit(void *buf, size_t buf_size) +{ + tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); +} diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 38652db32c..1bd7ef24af 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -240,6 +240,7 @@ void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *l) if (cond == TCG_COND_ALWAYS) { tcg_gen_br(l); } else if (cond != TCG_COND_NEVER) { + l->refs++; tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_arg(l)); } } @@ -1405,6 +1406,7 @@ void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *l) if (cond == TCG_COND_ALWAYS) { tcg_gen_br(l); } else if (cond != TCG_COND_NEVER) { + l->refs++; if (TCG_TARGET_REG_BITS == 32) { tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index db4e9188f4..7007ec0d4d 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -260,6 +260,7 @@ static inline void gen_set_label(TCGLabel *l) static inline void tcg_gen_br(TCGLabel *l) { + l->refs++; tcg_gen_op1(INDEX_op_br, label_arg(l)); } diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index e3a43aabb6..7a8a3edb5b 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -191,9 +191,10 @@ DEF(mulsh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulsh_i64)) /* QEMU specific */ DEF(insn_start, 0, 0, TLADDR_ARGS * TARGET_INSN_START_WORDS, TCG_OPF_NOT_PRESENT) -DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END) -DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END) -DEF(goto_ptr, 0, 1, 0, TCG_OPF_BB_END | IMPL(TCG_TARGET_HAS_goto_ptr)) +DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END) +DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END) +DEF(goto_ptr, 0, 1, 0, + TCG_OPF_BB_EXIT | TCG_OPF_BB_END | IMPL(TCG_TARGET_HAS_goto_ptr)) DEF(qemu_ld_i32, 1, TLADDR_ARGS, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) diff --git a/tcg/tcg.c b/tcg/tcg.c index 963cb37892..c54b119020 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1887,7 +1887,21 @@ static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", }; -void tcg_dump_ops(TCGContext *s) +static inline bool tcg_regset_single(TCGRegSet d) +{ + return (d & (d - 1)) == 0; +} + +static inline TCGReg tcg_regset_first(TCGRegSet d) +{ + if (TCG_TARGET_NB_REGS <= 32) { + return ctz32(d); + } else { + return ctz64(d); + } +} + +static void tcg_dump_ops(TCGContext *s, bool have_prefs) { char buf[128]; TCGOp *op; @@ -1902,6 +1916,7 @@ void tcg_dump_ops(TCGContext *s) def = &tcg_op_defs[c]; if (c == INDEX_op_insn_start) { + nb_oargs = 0; col += qemu_log("\n ----"); for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { @@ -2021,12 +2036,15 @@ void tcg_dump_ops(TCGContext *s) col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]); } } - if (op->life) { - unsigned life = op->life; - for (; col < 48; ++col) { + if (have_prefs || op->life) { + for (; col < 40; ++col) { putc(' ', qemu_logfile); } + } + + if (op->life) { + unsigned life = op->life; if (life & (SYNC_ARG * 3)) { qemu_log(" sync:"); @@ -2046,6 +2064,33 @@ void tcg_dump_ops(TCGContext *s) } } } + + if (have_prefs) { + for (i = 0; i < nb_oargs; ++i) { + TCGRegSet set = op->output_pref[i]; + + if (i == 0) { + qemu_log(" pref="); + } else { + qemu_log(","); + } + if (set == 0) { + qemu_log("none"); + } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { + qemu_log("all"); +#ifdef CONFIG_DEBUG_TCG + } else if (tcg_regset_single(set)) { + TCGReg reg = tcg_regset_first(set); + qemu_log("%s", tcg_target_reg_names[reg]); +#endif + } else if (TCG_TARGET_NB_REGS <= 32) { + qemu_log("%#x", (uint32_t)set); + } else { + qemu_log("%#" PRIx64, (uint64_t)set); + } + } + } + qemu_log("\n"); } } @@ -2171,6 +2216,26 @@ static void process_op_defs(TCGContext *s) void tcg_op_remove(TCGContext *s, TCGOp *op) { + TCGLabel *label; + + switch (op->opc) { + case INDEX_op_br: + label = arg_label(op->args[0]); + label->refs--; + break; + case INDEX_op_brcond_i32: + case INDEX_op_brcond_i64: + label = arg_label(op->args[3]); + label->refs--; + break; + case INDEX_op_brcond2_i32: + label = arg_label(op->args[5]); + label->refs--; + break; + default: + break; + } + QTAILQ_REMOVE(&s->ops, op, link); QTAILQ_INSERT_TAIL(&s->free_ops, op, link); s->nb_ops--; @@ -2219,43 +2284,181 @@ TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc) return new_op; } +/* Reachable analysis : remove unreachable code. */ +static void reachable_code_pass(TCGContext *s) +{ + TCGOp *op, *op_next; + bool dead = false; + + QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { + bool remove = dead; + TCGLabel *label; + int call_flags; + + switch (op->opc) { + case INDEX_op_set_label: + label = arg_label(op->args[0]); + if (label->refs == 0) { + /* + * While there is an occasional backward branch, virtually + * all branches generated by the translators are forward. + * Which means that generally we will have already removed + * all references to the label that will be, and there is + * little to be gained by iterating. + */ + remove = true; + } else { + /* Once we see a label, insns become live again. */ + dead = false; + remove = false; + + /* + * Optimization can fold conditional branches to unconditional. + * If we find a label with one reference which is preceded by + * an unconditional branch to it, remove both. This needed to + * wait until the dead code in between them was removed. + */ + if (label->refs == 1) { + TCGOp *op_prev = QTAILQ_PREV(op, TCGOpHead, link); + if (op_prev->opc == INDEX_op_br && + label == arg_label(op_prev->args[0])) { + tcg_op_remove(s, op_prev); + remove = true; + } + } + } + break; + + case INDEX_op_br: + case INDEX_op_exit_tb: + case INDEX_op_goto_ptr: + /* Unconditional branches; everything following is dead. */ + dead = true; + break; + + case INDEX_op_call: + /* Notice noreturn helper calls, raising exceptions. */ + call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1]; + if (call_flags & TCG_CALL_NO_RETURN) { + dead = true; + } + break; + + case INDEX_op_insn_start: + /* Never remove -- we need to keep these for unwind. */ + remove = false; + break; + + default: + break; + } + + if (remove) { + tcg_op_remove(s, op); + } + } +} + #define TS_DEAD 1 #define TS_MEM 2 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) +/* For liveness_pass_1, the register preferences for a given temp. */ +static inline TCGRegSet *la_temp_pref(TCGTemp *ts) +{ + return ts->state_ptr; +} + +/* For liveness_pass_1, reset the preferences for a given temp to the + * maximal regset for its type. + */ +static inline void la_reset_pref(TCGTemp *ts) +{ + *la_temp_pref(ts) + = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); +} + /* liveness analysis: end of function: all temps are dead, and globals should be in memory. */ -static void tcg_la_func_end(TCGContext *s) +static void la_func_end(TCGContext *s, int ng, int nt) { - int ng = s->nb_globals; - int nt = s->nb_temps; int i; for (i = 0; i < ng; ++i) { s->temps[i].state = TS_DEAD | TS_MEM; + la_reset_pref(&s->temps[i]); } for (i = ng; i < nt; ++i) { s->temps[i].state = TS_DEAD; + la_reset_pref(&s->temps[i]); } } /* liveness analysis: end of basic block: all temps are dead, globals and local temps should be in memory. */ -static void tcg_la_bb_end(TCGContext *s) +static void la_bb_end(TCGContext *s, int ng, int nt) { - int ng = s->nb_globals; - int nt = s->nb_temps; int i; for (i = 0; i < ng; ++i) { s->temps[i].state = TS_DEAD | TS_MEM; + la_reset_pref(&s->temps[i]); } for (i = ng; i < nt; ++i) { s->temps[i].state = (s->temps[i].temp_local ? TS_DEAD | TS_MEM : TS_DEAD); + la_reset_pref(&s->temps[i]); + } +} + +/* liveness analysis: sync globals back to memory. */ +static void la_global_sync(TCGContext *s, int ng) +{ + int i; + + for (i = 0; i < ng; ++i) { + int state = s->temps[i].state; + s->temps[i].state = state | TS_MEM; + if (state == TS_DEAD) { + /* If the global was previously dead, reset prefs. */ + la_reset_pref(&s->temps[i]); + } + } +} + +/* liveness analysis: sync globals back to memory and kill. */ +static void la_global_kill(TCGContext *s, int ng) +{ + int i; + + for (i = 0; i < ng; i++) { + s->temps[i].state = TS_DEAD | TS_MEM; + la_reset_pref(&s->temps[i]); + } +} + +/* liveness analysis: note live globals crossing calls. */ +static void la_cross_call(TCGContext *s, int nt) +{ + TCGRegSet mask = ~tcg_target_call_clobber_regs; + int i; + + for (i = 0; i < nt; i++) { + TCGTemp *ts = &s->temps[i]; + if (!(ts->state & TS_DEAD)) { + TCGRegSet *pset = la_temp_pref(ts); + TCGRegSet set = *pset; + + set &= mask; + /* If the combination is not possible, restart. */ + if (set == 0) { + set = tcg_target_available_regs[ts->type] & mask; + } + *pset = set; + } } } @@ -2265,16 +2468,25 @@ static void tcg_la_bb_end(TCGContext *s) static void liveness_pass_1(TCGContext *s) { int nb_globals = s->nb_globals; + int nb_temps = s->nb_temps; TCGOp *op, *op_prev; + TCGRegSet *prefs; + int i; - tcg_la_func_end(s); + prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); + for (i = 0; i < nb_temps; ++i) { + s->temps[i].state_ptr = prefs + i; + } + + /* ??? Should be redundant with the exit_tb that ends the TB. */ + la_func_end(s, nb_globals, nb_temps); QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, TCGOpHead, link, op_prev) { - int i, nb_iargs, nb_oargs; + int nb_iargs, nb_oargs; TCGOpcode opc_new, opc_new2; bool have_opc_new2; TCGLifeData arg_life = 0; - TCGTemp *arg_ts; + TCGTemp *ts; TCGOpcode opc = op->opc; const TCGOpDef *def = &tcg_op_defs[opc]; @@ -2282,6 +2494,7 @@ static void liveness_pass_1(TCGContext *s) case INDEX_op_call: { int call_flags; + int nb_call_regs; nb_oargs = TCGOP_CALLO(op); nb_iargs = TCGOP_CALLI(op); @@ -2290,53 +2503,74 @@ static void liveness_pass_1(TCGContext *s) /* pure functions can be removed if their result is unused */ if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { for (i = 0; i < nb_oargs; i++) { - arg_ts = arg_temp(op->args[i]); - if (arg_ts->state != TS_DEAD) { + ts = arg_temp(op->args[i]); + if (ts->state != TS_DEAD) { goto do_not_remove_call; } } goto do_remove; - } else { - do_not_remove_call: + } + do_not_remove_call: - /* output args are dead */ - for (i = 0; i < nb_oargs; i++) { - arg_ts = arg_temp(op->args[i]); - if (arg_ts->state & TS_DEAD) { - arg_life |= DEAD_ARG << i; - } - if (arg_ts->state & TS_MEM) { - arg_life |= SYNC_ARG << i; - } - arg_ts->state = TS_DEAD; + /* Output args are dead. */ + for (i = 0; i < nb_oargs; i++) { + ts = arg_temp(op->args[i]); + if (ts->state & TS_DEAD) { + arg_life |= DEAD_ARG << i; } + if (ts->state & TS_MEM) { + arg_life |= SYNC_ARG << i; + } + ts->state = TS_DEAD; + la_reset_pref(ts); - if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | - TCG_CALL_NO_READ_GLOBALS))) { - /* globals should go back to memory */ - for (i = 0; i < nb_globals; i++) { - s->temps[i].state = TS_DEAD | TS_MEM; - } - } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { - /* globals should be synced to memory */ - for (i = 0; i < nb_globals; i++) { - s->temps[i].state |= TS_MEM; - } - } + /* Not used -- it will be tcg_target_call_oarg_regs[i]. */ + op->output_pref[i] = 0; + } - /* record arguments that die in this helper */ - for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { - arg_ts = arg_temp(op->args[i]); - if (arg_ts && arg_ts->state & TS_DEAD) { - arg_life |= DEAD_ARG << i; - } + if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | + TCG_CALL_NO_READ_GLOBALS))) { + la_global_kill(s, nb_globals); + } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { + la_global_sync(s, nb_globals); + } + + /* Record arguments that die in this helper. */ + for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { + ts = arg_temp(op->args[i]); + if (ts && ts->state & TS_DEAD) { + arg_life |= DEAD_ARG << i; } - /* input arguments are live for preceding opcodes */ - for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { - arg_ts = arg_temp(op->args[i]); - if (arg_ts) { - arg_ts->state &= ~TS_DEAD; - } + } + + /* For all live registers, remove call-clobbered prefs. */ + la_cross_call(s, nb_temps); + + nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); + + /* Input arguments are live for preceding opcodes. */ + for (i = 0; i < nb_iargs; i++) { + ts = arg_temp(op->args[i + nb_oargs]); + if (ts && ts->state & TS_DEAD) { + /* For those arguments that die, and will be allocated + * in registers, clear the register set for that arg, + * to be filled in below. For args that will be on + * the stack, reset to any available reg. + */ + *la_temp_pref(ts) + = (i < nb_call_regs ? 0 : + tcg_target_available_regs[ts->type]); + ts->state &= ~TS_DEAD; + } + } + + /* For each input argument, add its input register to prefs. + If a temp is used once, this produces a single set bit. */ + for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) { + ts = arg_temp(op->args[i + nb_oargs]); + if (ts) { + tcg_regset_set_reg(*la_temp_pref(ts), + tcg_target_call_iarg_regs[i]); } } } @@ -2345,7 +2579,9 @@ static void liveness_pass_1(TCGContext *s) break; case INDEX_op_discard: /* mark the temporary as dead */ - arg_temp(op->args[0])->state = TS_DEAD; + ts = arg_temp(op->args[0]); + ts->state = TS_DEAD; + la_reset_pref(ts); break; case INDEX_op_add2_i32: @@ -2440,43 +2676,96 @@ static void liveness_pass_1(TCGContext *s) goto do_not_remove; } } - do_remove: - tcg_op_remove(s, op); - } else { - do_not_remove: - /* output args are dead */ - for (i = 0; i < nb_oargs; i++) { - arg_ts = arg_temp(op->args[i]); - if (arg_ts->state & TS_DEAD) { - arg_life |= DEAD_ARG << i; - } - if (arg_ts->state & TS_MEM) { - arg_life |= SYNC_ARG << i; - } - arg_ts->state = TS_DEAD; - } + goto do_remove; + } + goto do_not_remove; - /* if end of basic block, update */ - if (def->flags & TCG_OPF_BB_END) { - tcg_la_bb_end(s); - } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { - /* globals should be synced to memory */ - for (i = 0; i < nb_globals; i++) { - s->temps[i].state |= TS_MEM; - } - } + do_remove: + tcg_op_remove(s, op); + break; - /* record arguments that die in this opcode */ + do_not_remove: + for (i = 0; i < nb_oargs; i++) { + ts = arg_temp(op->args[i]); + + /* Remember the preference of the uses that followed. */ + op->output_pref[i] = *la_temp_pref(ts); + + /* Output args are dead. */ + if (ts->state & TS_DEAD) { + arg_life |= DEAD_ARG << i; + } + if (ts->state & TS_MEM) { + arg_life |= SYNC_ARG << i; + } + ts->state = TS_DEAD; + la_reset_pref(ts); + } + + /* If end of basic block, update. */ + if (def->flags & TCG_OPF_BB_EXIT) { + la_func_end(s, nb_globals, nb_temps); + } else if (def->flags & TCG_OPF_BB_END) { + la_bb_end(s, nb_globals, nb_temps); + } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { + la_global_sync(s, nb_globals); + if (def->flags & TCG_OPF_CALL_CLOBBER) { + la_cross_call(s, nb_temps); + } + } + + /* Record arguments that die in this opcode. */ + for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { + ts = arg_temp(op->args[i]); + if (ts->state & TS_DEAD) { + arg_life |= DEAD_ARG << i; + } + } + + /* Input arguments are live for preceding opcodes. */ + for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { + ts = arg_temp(op->args[i]); + if (ts->state & TS_DEAD) { + /* For operands that were dead, initially allow + all regs for the type. */ + *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; + ts->state &= ~TS_DEAD; + } + } + + /* Incorporate constraints for this operand. */ + switch (opc) { + case INDEX_op_mov_i32: + case INDEX_op_mov_i64: + /* Note that these are TCG_OPF_NOT_PRESENT and do not + have proper constraints. That said, special case + moves to propagate preferences backward. */ + if (IS_DEAD_ARG(1)) { + *la_temp_pref(arg_temp(op->args[0])) + = *la_temp_pref(arg_temp(op->args[1])); + } + break; + + default: for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { - arg_ts = arg_temp(op->args[i]); - if (arg_ts->state & TS_DEAD) { - arg_life |= DEAD_ARG << i; + const TCGArgConstraint *ct = &def->args_ct[i]; + TCGRegSet set, *pset; + + ts = arg_temp(op->args[i]); + pset = la_temp_pref(ts); + set = *pset; + + set &= ct->u.regs; + if (ct->ct & TCG_CT_IALIAS) { + set &= op->output_pref[ct->alias_index]; } + /* If the combination is not possible, restart. */ + if (set == 0) { + set = ct->u.regs; + } + *pset = set; } - /* input arguments are live for preceding opcodes */ - for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { - arg_temp(op->args[i])->state &= ~TS_DEAD; - } + break; } break; } @@ -2727,7 +3016,7 @@ static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) s->current_frame_offset += sizeof(tcg_target_long); } -static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet); +static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); /* Mark a temporary as free or dead. If 'free_or_dead' is negative, mark it free; otherwise mark it dead. */ @@ -2755,8 +3044,8 @@ static inline void temp_dead(TCGContext *s, TCGTemp *ts) registers needs to be allocated to store a constant. If 'free_or_dead' is non-zero, subsequently release the temporary; if it is positive, the temp is dead; if it is negative, the temp is free. */ -static void temp_sync(TCGContext *s, TCGTemp *ts, - TCGRegSet allocated_regs, int free_or_dead) +static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, + TCGRegSet preferred_regs, int free_or_dead) { if (ts->fixed_reg) { return; @@ -2776,7 +3065,7 @@ static void temp_sync(TCGContext *s, TCGTemp *ts, break; } temp_load(s, ts, tcg_target_available_regs[ts->type], - allocated_regs); + allocated_regs, preferred_regs); /* fallthrough */ case TEMP_VAL_REG: @@ -2803,35 +3092,76 @@ static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) { TCGTemp *ts = s->reg_to_temp[reg]; if (ts != NULL) { - temp_sync(s, ts, allocated_regs, -1); + temp_sync(s, ts, allocated_regs, 0, -1); } } -/* Allocate a register belonging to reg1 & ~reg2 */ -static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs, - TCGRegSet allocated_regs, bool rev) +/** + * tcg_reg_alloc: + * @required_regs: Set of registers in which we must allocate. + * @allocated_regs: Set of registers which must be avoided. + * @preferred_regs: Set of registers we should prefer. + * @rev: True if we search the registers in "indirect" order. + * + * The allocated register must be in @required_regs & ~@allocated_regs, + * but if we can put it in @preferred_regs we may save a move later. + */ +static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, + TCGRegSet allocated_regs, + TCGRegSet preferred_regs, bool rev) { - int i, n = ARRAY_SIZE(tcg_target_reg_alloc_order); + int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); + TCGRegSet reg_ct[2]; const int *order; - TCGReg reg; - TCGRegSet reg_ct; - reg_ct = desired_regs & ~allocated_regs; + reg_ct[1] = required_regs & ~allocated_regs; + tcg_debug_assert(reg_ct[1] != 0); + reg_ct[0] = reg_ct[1] & preferred_regs; + + /* Skip the preferred_regs option if it cannot be satisfied, + or if the preference made no difference. */ + f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; + order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; - /* first try free registers */ - for(i = 0; i < n; i++) { - reg = order[i]; - if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL) - return reg; + /* Try free registers, preferences first. */ + for (j = f; j < 2; j++) { + TCGRegSet set = reg_ct[j]; + + if (tcg_regset_single(set)) { + /* One register in the set. */ + TCGReg reg = tcg_regset_first(set); + if (s->reg_to_temp[reg] == NULL) { + return reg; + } + } else { + for (i = 0; i < n; i++) { + TCGReg reg = order[i]; + if (s->reg_to_temp[reg] == NULL && + tcg_regset_test_reg(set, reg)) { + return reg; + } + } + } } - /* XXX: do better spill choice */ - for(i = 0; i < n; i++) { - reg = order[i]; - if (tcg_regset_test_reg(reg_ct, reg)) { + /* We must spill something. */ + for (j = f; j < 2; j++) { + TCGRegSet set = reg_ct[j]; + + if (tcg_regset_single(set)) { + /* One register in the set. */ + TCGReg reg = tcg_regset_first(set); tcg_reg_free(s, reg, allocated_regs); return reg; + } else { + for (i = 0; i < n; i++) { + TCGReg reg = order[i]; + if (tcg_regset_test_reg(set, reg)) { + tcg_reg_free(s, reg, allocated_regs); + return reg; + } + } } } @@ -2841,7 +3171,7 @@ static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs, /* Make sure the temporary is in a register. If needed, allocate the register from DESIRED while avoiding ALLOCATED. */ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, - TCGRegSet allocated_regs) + TCGRegSet allocated_regs, TCGRegSet preferred_regs) { TCGReg reg; @@ -2849,12 +3179,14 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, case TEMP_VAL_REG: return; case TEMP_VAL_CONST: - reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); + reg = tcg_reg_alloc(s, desired_regs, allocated_regs, + preferred_regs, ts->indirect_base); tcg_out_movi(s, ts->type, reg, ts->val); ts->mem_coherent = 0; break; case TEMP_VAL_MEM: - reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); + reg = tcg_reg_alloc(s, desired_regs, allocated_regs, + preferred_regs, ts->indirect_base); tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); ts->mem_coherent = 1; break; @@ -2924,7 +3256,8 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) } static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, - tcg_target_ulong val, TCGLifeData arg_life) + tcg_target_ulong val, TCGLifeData arg_life, + TCGRegSet preferred_regs) { if (ots->fixed_reg) { /* For fixed registers, we do not do any constant propagation. */ @@ -2940,7 +3273,7 @@ static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, ots->val = val; ots->mem_coherent = 0; if (NEED_SYNC_ARG(0)) { - temp_sync(s, ots, s->reserved_regs, IS_DEAD_ARG(0)); + temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); } else if (IS_DEAD_ARG(0)) { temp_dead(s, ots); } @@ -2951,17 +3284,18 @@ static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op) TCGTemp *ots = arg_temp(op->args[0]); tcg_target_ulong val = op->args[1]; - tcg_reg_alloc_do_movi(s, ots, val, op->life); + tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]); } static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) { const TCGLifeData arg_life = op->life; - TCGRegSet allocated_regs; + TCGRegSet allocated_regs, preferred_regs; TCGTemp *ts, *ots; TCGType otype, itype; allocated_regs = s->reserved_regs; + preferred_regs = op->output_pref[0]; ots = arg_temp(op->args[0]); ts = arg_temp(op->args[1]); @@ -2975,7 +3309,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) if (IS_DEAD_ARG(1)) { temp_dead(s, ts); } - tcg_reg_alloc_do_movi(s, ots, val, arg_life); + tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); return; } @@ -2984,7 +3318,8 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) the SOURCE value into its own register first, that way we don't have to reload SOURCE the next time it is used. */ if (ts->val_type == TEMP_VAL_MEM) { - temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs); + temp_load(s, ts, tcg_target_available_regs[itype], + allocated_regs, preferred_regs); } tcg_debug_assert(ts->val_type == TEMP_VAL_REG); @@ -3014,7 +3349,8 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) input one. */ tcg_regset_set_reg(allocated_regs, ts->reg); ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], - allocated_regs, ots->indirect_base); + allocated_regs, preferred_regs, + ots->indirect_base); } tcg_out_mov(s, otype, ots->reg, ts->reg); } @@ -3022,7 +3358,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) ots->mem_coherent = 0; s->reg_to_temp[ots->reg] = ots; if (NEED_SYNC_ARG(0)) { - temp_sync(s, ots, allocated_regs, 0); + temp_sync(s, ots, allocated_regs, 0, 0); } } } @@ -3054,6 +3390,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) /* satisfy input constraints */ for (k = 0; k < nb_iargs; k++) { + TCGRegSet i_preferred_regs, o_preferred_regs; + i = def->sorted_args[nb_oargs + k]; arg = op->args[i]; arg_ct = &def->args_ct[i]; @@ -3064,17 +3402,18 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) /* constant is OK for instruction */ const_args[i] = 1; new_args[i] = ts->val; - goto iarg_end; + continue; } - temp_load(s, ts, arg_ct->u.regs, i_allocated_regs); - + i_preferred_regs = o_preferred_regs = 0; if (arg_ct->ct & TCG_CT_IALIAS) { + o_preferred_regs = op->output_pref[arg_ct->alias_index]; if (ts->fixed_reg) { /* if fixed register, we must allocate a new register if the alias is not the same register */ - if (arg != op->args[arg_ct->alias_index]) + if (arg != op->args[arg_ct->alias_index]) { goto allocate_in_reg; + } } else { /* if the input is aliased to an output and if it is not dead after the instruction, we must allocate @@ -3082,33 +3421,42 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) if (!IS_DEAD_ARG(i)) { goto allocate_in_reg; } + /* check if the current register has already been allocated for another input aliased to an output */ - int k2, i2; - for (k2 = 0 ; k2 < k ; k2++) { - i2 = def->sorted_args[nb_oargs + k2]; - if ((def->args_ct[i2].ct & TCG_CT_IALIAS) && - (new_args[i2] == ts->reg)) { - goto allocate_in_reg; + if (ts->val_type == TEMP_VAL_REG) { + int k2, i2; + reg = ts->reg; + for (k2 = 0 ; k2 < k ; k2++) { + i2 = def->sorted_args[nb_oargs + k2]; + if ((def->args_ct[i2].ct & TCG_CT_IALIAS) && + reg == new_args[i2]) { + goto allocate_in_reg; + } } } + i_preferred_regs = o_preferred_regs; } } + + temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs); reg = ts->reg; + if (tcg_regset_test_reg(arg_ct->u.regs, reg)) { /* nothing to do : the constraint is satisfied */ } else { allocate_in_reg: /* allocate a new register matching the constraint and move the temporary register into it */ + temp_load(s, ts, tcg_target_available_regs[ts->type], + i_allocated_regs, 0); reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs, - ts->indirect_base); + o_preferred_regs, ts->indirect_base); tcg_out_mov(s, ts->type, reg, ts->reg); } new_args[i] = reg; const_args[i] = 0; tcg_regset_set_reg(i_allocated_regs, reg); - iarg_end: ; } /* mark dead temporaries and free the associated registers */ @@ -3147,7 +3495,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } else if (arg_ct->ct & TCG_CT_NEWREG) { reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs | o_allocated_regs, - ts->indirect_base); + op->output_pref[k], ts->indirect_base); } else { /* if fixed register, we try to use it */ reg = ts->reg; @@ -3156,7 +3504,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) goto oarg_end; } reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs, - ts->indirect_base); + op->output_pref[k], ts->indirect_base); } tcg_regset_set_reg(o_allocated_regs, reg); /* if a fixed register is used, then a move will be done afterwards */ @@ -3192,7 +3540,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) tcg_out_mov(s, ts->type, ts->reg, reg); } if (NEED_SYNC_ARG(i)) { - temp_sync(s, ts, o_allocated_regs, IS_DEAD_ARG(i)); + temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); } else if (IS_DEAD_ARG(i)) { temp_dead(s, ts); } @@ -3248,7 +3596,7 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) if (arg != TCG_CALL_DUMMY_ARG) { ts = arg_temp(arg); temp_load(s, ts, tcg_target_available_regs[ts->type], - s->reserved_regs); + s->reserved_regs, 0); tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); } #ifndef TCG_TARGET_STACK_GROWSUP @@ -3263,17 +3611,18 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) if (arg != TCG_CALL_DUMMY_ARG) { ts = arg_temp(arg); reg = tcg_target_call_iarg_regs[i]; - tcg_reg_free(s, reg, allocated_regs); if (ts->val_type == TEMP_VAL_REG) { if (ts->reg != reg) { + tcg_reg_free(s, reg, allocated_regs); tcg_out_mov(s, ts->type, reg, ts->reg); } } else { TCGRegSet arg_set = 0; + tcg_reg_free(s, reg, allocated_regs); tcg_regset_set_reg(arg_set, reg); - temp_load(s, ts, arg_set, allocated_regs); + temp_load(s, ts, arg_set, allocated_regs, 0); } tcg_regset_set_reg(allocated_regs, reg); @@ -3326,7 +3675,7 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) ts->mem_coherent = 0; s->reg_to_temp[reg] = ts; if (NEED_SYNC_ARG(i)) { - temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i)); + temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i)); } else if (IS_DEAD_ARG(i)) { temp_dead(s, ts); } @@ -3476,7 +3825,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) && qemu_log_in_addr_range(tb->pc))) { qemu_log_lock(); qemu_log("OP:\n"); - tcg_dump_ops(s); + tcg_dump_ops(s, false); qemu_log("\n"); qemu_log_unlock(); } @@ -3495,6 +3844,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) atomic_set(&prof->la_time, prof->la_time - profile_getclock()); #endif + reachable_code_pass(s); liveness_pass_1(s); if (s->nb_indirects > 0) { @@ -3503,7 +3853,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) && qemu_log_in_addr_range(tb->pc))) { qemu_log_lock(); qemu_log("OP before indirect lowering:\n"); - tcg_dump_ops(s); + tcg_dump_ops(s, false); qemu_log("\n"); qemu_log_unlock(); } @@ -3524,7 +3874,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) && qemu_log_in_addr_range(tb->pc))) { qemu_log_lock(); qemu_log("OP after optimization and liveness analysis:\n"); - tcg_dump_ops(s); + tcg_dump_ops(s, true); qemu_log("\n"); qemu_log_unlock(); } diff --git a/tcg/tcg.h b/tcg/tcg.h index ade692fdf5..3a629991ca 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -244,7 +244,8 @@ typedef struct TCGRelocation { typedef struct TCGLabel { unsigned has_value : 1; - unsigned id : 31; + unsigned id : 15; + unsigned refs : 16; union { uintptr_t value; tcg_insn_unit *value_ptr; @@ -462,11 +463,13 @@ typedef TCGv_ptr TCGv_env; /* call flags */ /* Helper does not read globals (either directly or through an exception). It implies TCG_CALL_NO_WRITE_GLOBALS. */ -#define TCG_CALL_NO_READ_GLOBALS 0x0010 +#define TCG_CALL_NO_READ_GLOBALS 0x0001 /* Helper does not write globals */ -#define TCG_CALL_NO_WRITE_GLOBALS 0x0020 +#define TCG_CALL_NO_WRITE_GLOBALS 0x0002 /* Helper can be safely suppressed if the return value is not used. */ -#define TCG_CALL_NO_SIDE_EFFECTS 0x0040 +#define TCG_CALL_NO_SIDE_EFFECTS 0x0004 +/* Helper is QEMU_NORETURN. */ +#define TCG_CALL_NO_RETURN 0x0008 /* convenience version of most used call flags */ #define TCG_CALL_NO_RWG TCG_CALL_NO_READ_GLOBALS @@ -616,6 +619,9 @@ typedef struct TCGOp { /* Arguments for the opcode. */ TCGArg args[MAX_OPC_PARAM]; + + /* Register preferences for the output(s). */ + TCGRegSet output_pref[2]; } TCGOp; #define TCGOP_CALLI(X) (X)->param1 @@ -1024,20 +1030,22 @@ typedef struct TCGArgConstraint { /* Bits for TCGOpDef->flags, 8 bits available. */ enum { + /* Instruction exits the translation block. */ + TCG_OPF_BB_EXIT = 0x01, /* Instruction defines the end of a basic block. */ - TCG_OPF_BB_END = 0x01, + TCG_OPF_BB_END = 0x02, /* Instruction clobbers call registers and potentially update globals. */ - TCG_OPF_CALL_CLOBBER = 0x02, + TCG_OPF_CALL_CLOBBER = 0x04, /* Instruction has side effects: it cannot be removed if its outputs are not used, and might trigger exceptions. */ - TCG_OPF_SIDE_EFFECTS = 0x04, + TCG_OPF_SIDE_EFFECTS = 0x08, /* Instruction operands are 64-bits (otherwise 32-bits). */ - TCG_OPF_64BIT = 0x08, + TCG_OPF_64BIT = 0x10, /* Instruction is optional and not implemented by the host, or insn is generic and should not be implemened by the host. */ - TCG_OPF_NOT_PRESENT = 0x10, + TCG_OPF_NOT_PRESENT = 0x20, /* Instruction operands are vectors. */ - TCG_OPF_VECTOR = 0x20, + TCG_OPF_VECTOR = 0x40, }; typedef struct TCGOpDef { @@ -1076,9 +1084,6 @@ TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc); void tcg_optimize(TCGContext *s); -/* only used for debugging purposes */ -void tcg_dump_ops(TCGContext *s); - TCGv_i32 tcg_const_i32(int32_t val); TCGv_i64 tcg_const_i64(int64_t val); TCGv_i32 tcg_const_local_i32(int32_t val);