From e3608d66cea318698a2c4361d4e11a0e224c36db Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 28 Aug 2013 15:48:21 -0700 Subject: [PATCH 01/21] configure: Allow command-line configure for ppc32 Similar to manually selecting i386 for an x86_64 host. Signed-off-by: Richard Henderson --- configure | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/configure b/configure index 05e16da63c..ef4d9bf552 100755 --- a/configure +++ b/configure @@ -981,6 +981,14 @@ for opt do done case "$cpu" in + ppc) + CPU_CFLAGS="-m32" + LDFLAGS="-m32 $LDFLAGS" + ;; + ppc64) + CPU_CFLAGS="-m64" + LDFLAGS="-m64 $LDFLAGS" + ;; sparc) LDFLAGS="-m32 $LDFLAGS" CPU_CFLAGS="-m32 -mcpu=ultrasparc" From 6a115579883e6c0e56394bf7aaabd04260e11233 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 5 Sep 2013 10:22:08 +0200 Subject: [PATCH 02/21] tcg-ppc: fix qemu_ld/qemu_st for AIX ABI For the AIX ABI, the function pointer and small area pointer need to be loaded in the trampoline. The trampoline instead is called with a normal BL instruction. Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index 25955563b8..204ffbe5fb 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -490,7 +490,8 @@ static void tcg_out_b (TCGContext *s, int mask, tcg_target_long target) } } -static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg) +static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg, + int lk) { #ifdef _CALL_AIX int reg; @@ -504,14 +505,14 @@ static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg) tcg_out32 (s, LWZ | RT (0) | RA (reg)); tcg_out32 (s, MTSPR | RA (0) | CTR); tcg_out32 (s, LWZ | RT (2) | RA (reg) | 4); - tcg_out32 (s, BCCTR | BO_ALWAYS | LK); + tcg_out32 (s, BCCTR | BO_ALWAYS | lk); #else if (const_arg) { - tcg_out_b (s, LK, arg); + tcg_out_b (s, lk, arg); } else { tcg_out32 (s, MTSPR | RS (arg) | LR); - tcg_out32 (s, BCLR | BO_ALWAYS | LK); + tcg_out32 (s, BCLR | BO_ALWAYS | lk); } #endif } @@ -860,7 +861,7 @@ static void tcg_out_qemu_ld_slow_path (TCGContext *s, TCGLabelQemuLdst *label) tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg); #endif tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index); - tcg_out_call (s, (tcg_target_long) ld_trampolines[s_bits], 1); + tcg_out_b (s, LK, (tcg_target_long) ld_trampolines[s_bits]); tcg_out32 (s, (tcg_target_long) raddr); switch (opc) { case 0|4: @@ -954,7 +955,7 @@ static void tcg_out_qemu_st_slow_path (TCGContext *s, TCGLabelQemuLdst *label) ir++; tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index); - tcg_out_call (s, (tcg_target_long) st_trampolines[opc], 1); + tcg_out_b (s, LK, (tcg_target_long) st_trampolines[opc]); tcg_out32 (s, (tcg_target_long) raddr); tcg_out_b (s, 0, (tcg_target_long) raddr); } @@ -984,7 +985,7 @@ static void emit_ldst_trampoline (TCGContext *s, const void *ptr) tcg_out32 (s, ADDI | RT (3) | RA (3) | 4); tcg_out32 (s, MTSPR | RS (3) | LR); tcg_out_mov (s, TCG_TYPE_I32, 3, TCG_AREG0); - tcg_out_b (s, 0, (tcg_target_long) ptr); + tcg_out_call (s, (tcg_target_long) ptr, 1, 0); } #endif @@ -1493,7 +1494,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, } break; case INDEX_op_call: - tcg_out_call (s, args[0], const_args[0]); + tcg_out_call (s, args[0], const_args[0], LK); break; case INDEX_op_movi_i32: tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]); From 619f90ba62e27c674b1a9af8c0ae68eef8d64a92 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 5 Sep 2013 10:22:09 +0200 Subject: [PATCH 03/21] tcg-ppc: use new return-argument ld/st helpers These use a 32-bit load-of-immediate to save a mflr+addi+mtlr sequence. Tested with a Windows 98 guest (pretty much the most recent thing I could run on my PPC machine) and kvm-unit-tests's sieve.flat. The speed up for sieve.flat is as high as 10% for qemu-system-i386, 25% (no kidding) for qemu-system-x86_64 on my PowerBook G4. Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- include/exec/exec-all.h | 4 +--- tcg/ppc/tcg-target.c | 41 ++++++++++++++++++++--------------------- 2 files changed, 21 insertions(+), 24 deletions(-) diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 77242e2d81..dc27f33152 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -324,9 +324,7 @@ extern uintptr_t tci_tb_ptr; In some implementations, we pass the "logical" return address manually; in others, we must infer the logical return from the true return. */ #if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU) -# if defined (_ARCH_PPC) && !defined (_ARCH_PPC64) -# define GETRA_LDST(RA) (*(int32_t *)((RA) - 4)) -# elif defined(__arm__) +# if defined(__arm__) /* We define two insns between the return address and the branch back to straight-line. Find and decode that branch insn. */ # define GETRA_LDST(RA) tcg_getra_ldst(RA) diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index 204ffbe5fb..24a8621fe1 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -550,22 +550,24 @@ static void add_qemu_ldst_label (TCGContext *s, label->label_ptr[0] = label_ptr; } -/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, - int mmu_idx) */ +/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, + * int mmu_idx, uintptr_t ra) + */ static const void * const qemu_ld_helpers[4] = { - helper_ldb_mmu, - helper_ldw_mmu, - helper_ldl_mmu, - helper_ldq_mmu, + helper_ret_ldub_mmu, + helper_ret_lduw_mmu, + helper_ret_ldul_mmu, + helper_ret_ldq_mmu, }; -/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, - uintxx_t val, int mmu_idx) */ +/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, + * uintxx_t val, int mmu_idx, uintptr_t ra) + */ static const void * const qemu_st_helpers[4] = { - helper_stb_mmu, - helper_stw_mmu, - helper_stl_mmu, - helper_stq_mmu, + helper_ret_stb_mmu, + helper_ret_stw_mmu, + helper_ret_stl_mmu, + helper_ret_stq_mmu, }; static void *ld_trampolines[4]; @@ -860,9 +862,9 @@ static void tcg_out_qemu_ld_slow_path (TCGContext *s, TCGLabelQemuLdst *label) tcg_out_mov (s, TCG_TYPE_I32, ir++, label->addrhi_reg); tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg); #endif - tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index); + tcg_out_movi (s, TCG_TYPE_I32, ir++, mem_index); + tcg_out_movi (s, TCG_TYPE_I32, ir, (tcg_target_long) raddr); tcg_out_b (s, LK, (tcg_target_long) ld_trampolines[s_bits]); - tcg_out32 (s, (tcg_target_long) raddr); switch (opc) { case 0|4: tcg_out32 (s, EXTSB | RA (data_reg) | RS (3)); @@ -954,10 +956,10 @@ static void tcg_out_qemu_st_slow_path (TCGContext *s, TCGLabelQemuLdst *label) } ir++; - tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index); - tcg_out_b (s, LK, (tcg_target_long) st_trampolines[opc]); - tcg_out32 (s, (tcg_target_long) raddr); - tcg_out_b (s, 0, (tcg_target_long) raddr); + tcg_out_movi (s, TCG_TYPE_I32, ir++, mem_index); + tcg_out_movi (s, TCG_TYPE_I32, ir, (tcg_target_long) raddr); + tcg_out32 (s, MTSPR | RS (ir) | LR); + tcg_out_b (s, 0, (tcg_target_long) st_trampolines[opc]); } void tcg_out_tb_finalize(TCGContext *s) @@ -981,9 +983,6 @@ void tcg_out_tb_finalize(TCGContext *s) #ifdef CONFIG_SOFTMMU static void emit_ldst_trampoline (TCGContext *s, const void *ptr) { - tcg_out32 (s, MFSPR | RT (3) | LR); - tcg_out32 (s, ADDI | RT (3) | RA (3) | 4); - tcg_out32 (s, MTSPR | RS (3) | LR); tcg_out_mov (s, TCG_TYPE_I32, 3, TCG_AREG0); tcg_out_call (s, (tcg_target_long) ptr, 1, 0); } From 4b2b114d8cc0f0f59bc20855bf287fb3df55b553 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 28 Aug 2013 15:51:08 -0700 Subject: [PATCH 04/21] tcg-ppc: Avoid code for nop move While these are rare from code that's been through the optimizer, it's not uncommon within the tcg backend. Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index 24a8621fe1..965108b8bd 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -450,7 +450,9 @@ static const uint32_t tcg_to_bc[] = { static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) { - tcg_out32 (s, OR | SAB (arg, ret, arg)); + if (ret != arg) { + tcg_out32(s, OR | SAB(arg, ret, arg)); + } } static void tcg_out_movi(TCGContext *s, TCGType type, From 1d10cf9886429d17d22e233081697ef27465dca3 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 29 Aug 2013 10:07:24 -0700 Subject: [PATCH 05/21] tcg-ppc: Cleanup tcg_out_qemu_ld/st_slow_path Coding style fixes. Use TCGReg enumeration values instead of raw numbers. Don't needlessly pull the whole TCGLabelQemuLdst struct into local variables. Less conditional compilation. No functional changes. Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target.c | 143 +++++++++++++++++-------------------------- 1 file changed, 56 insertions(+), 87 deletions(-) diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index 965108b8bd..a5f1f99592 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -836,132 +836,101 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc) } #if defined(CONFIG_SOFTMMU) -static void tcg_out_qemu_ld_slow_path (TCGContext *s, TCGLabelQemuLdst *label) +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { - int s_bits; - int ir; - int opc = label->opc; - int mem_index = label->mem_index; - int data_reg = label->datalo_reg; - int data_reg2 = label->datahi_reg; - int addr_reg = label->addrlo_reg; - uint8_t *raddr = label->raddr; - uint8_t **label_ptr = &label->label_ptr[0]; + TCGReg ir, datalo, datahi; - s_bits = opc & 3; + reloc_pc14 (l->label_ptr[0], (uintptr_t)s->code_ptr); - /* resolve label address */ - reloc_pc14 (label_ptr[0], (tcg_target_long) s->code_ptr); - - /* slow path */ - ir = 4; -#if TARGET_LONG_BITS == 32 - tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg); -#else + ir = TCG_REG_R4; + if (TARGET_LONG_BITS == 32) { + tcg_out_mov(s, TCG_TYPE_I32, ir++, l->addrlo_reg); + } else { #ifdef TCG_TARGET_CALL_ALIGN_ARGS - ir |= 1; + ir |= 1; #endif - tcg_out_mov (s, TCG_TYPE_I32, ir++, label->addrhi_reg); - tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg); -#endif - tcg_out_movi (s, TCG_TYPE_I32, ir++, mem_index); - tcg_out_movi (s, TCG_TYPE_I32, ir, (tcg_target_long) raddr); - tcg_out_b (s, LK, (tcg_target_long) ld_trampolines[s_bits]); - switch (opc) { + tcg_out_mov(s, TCG_TYPE_I32, ir++, l->addrhi_reg); + tcg_out_mov(s, TCG_TYPE_I32, ir++, l->addrlo_reg); + } + tcg_out_movi(s, TCG_TYPE_I32, ir++, l->mem_index); + tcg_out_movi(s, TCG_TYPE_PTR, ir, (uintptr_t)l->raddr); + tcg_out_b(s, LK, (uintptr_t)ld_trampolines[l->opc & 3]); + + datalo = l->datalo_reg; + switch (l->opc) { case 0|4: - tcg_out32 (s, EXTSB | RA (data_reg) | RS (3)); + tcg_out32(s, EXTSB | RA(datalo) | RS(TCG_REG_R3)); break; case 1|4: - tcg_out32 (s, EXTSH | RA (data_reg) | RS (3)); + tcg_out32(s, EXTSH | RA(datalo) | RS(TCG_REG_R3)); break; case 0: case 1: case 2: - if (data_reg != 3) - tcg_out_mov (s, TCG_TYPE_I32, data_reg, 3); + tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R3); break; case 3: - if (data_reg == 3) { - if (data_reg2 == 4) { - tcg_out_mov (s, TCG_TYPE_I32, 0, 4); - tcg_out_mov (s, TCG_TYPE_I32, 4, 3); - tcg_out_mov (s, TCG_TYPE_I32, 3, 0); - } - else { - tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3); - tcg_out_mov (s, TCG_TYPE_I32, 3, 4); - } - } - else { - if (data_reg != 4) tcg_out_mov (s, TCG_TYPE_I32, data_reg, 4); - if (data_reg2 != 3) tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3); + datahi = l->datahi_reg; + if (datalo != TCG_REG_R3) { + tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R4); + tcg_out_mov(s, TCG_TYPE_I32, datahi, TCG_REG_R3); + } else if (datahi != TCG_REG_R4) { + tcg_out_mov(s, TCG_TYPE_I32, datahi, TCG_REG_R3); + tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R4); + } else { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R0, TCG_REG_R4); + tcg_out_mov(s, TCG_TYPE_I32, datahi, TCG_REG_R3); + tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R0); } break; } - /* Jump to the code corresponding to next IR of qemu_st */ - tcg_out_b (s, 0, (tcg_target_long) raddr); + tcg_out_b (s, 0, (uintptr_t)l->raddr); } -static void tcg_out_qemu_st_slow_path (TCGContext *s, TCGLabelQemuLdst *label) +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { - int ir; - int opc = label->opc; - int mem_index = label->mem_index; - int data_reg = label->datalo_reg; - int data_reg2 = label->datahi_reg; - int addr_reg = label->addrlo_reg; - uint8_t *raddr = label->raddr; - uint8_t **label_ptr = &label->label_ptr[0]; + TCGReg ir, datalo; - /* resolve label address */ - reloc_pc14 (label_ptr[0], (tcg_target_long) s->code_ptr); + reloc_pc14 (l->label_ptr[0], (tcg_target_long) s->code_ptr); - /* slow path */ - ir = 4; -#if TARGET_LONG_BITS == 32 - tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg); -#else + ir = TCG_REG_R4; + if (TARGET_LONG_BITS == 32) { + tcg_out_mov (s, TCG_TYPE_I32, ir++, l->addrlo_reg); + } else { #ifdef TCG_TARGET_CALL_ALIGN_ARGS - ir |= 1; -#endif - tcg_out_mov (s, TCG_TYPE_I32, ir++, label->addrhi_reg); - tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg); + ir |= 1; #endif + tcg_out_mov (s, TCG_TYPE_I32, ir++, l->addrhi_reg); + tcg_out_mov (s, TCG_TYPE_I32, ir++, l->addrlo_reg); + } - switch (opc) { + datalo = l->datalo_reg; + switch (l->opc) { case 0: - tcg_out32 (s, (RLWINM - | RA (ir) - | RS (data_reg) - | SH (0) - | MB (24) - | ME (31))); + tcg_out32(s, (RLWINM | RA (ir) | RS (datalo) + | SH (0) | MB (24) | ME (31))); break; case 1: - tcg_out32 (s, (RLWINM - | RA (ir) - | RS (data_reg) - | SH (0) - | MB (16) - | ME (31))); + tcg_out32(s, (RLWINM | RA (ir) | RS (datalo) + | SH (0) | MB (16) | ME (31))); break; case 2: - tcg_out_mov (s, TCG_TYPE_I32, ir, data_reg); + tcg_out_mov(s, TCG_TYPE_I32, ir, datalo); break; case 3: #ifdef TCG_TARGET_CALL_ALIGN_ARGS ir |= 1; #endif - tcg_out_mov (s, TCG_TYPE_I32, ir++, data_reg2); - tcg_out_mov (s, TCG_TYPE_I32, ir, data_reg); + tcg_out_mov(s, TCG_TYPE_I32, ir++, l->datahi_reg); + tcg_out_mov(s, TCG_TYPE_I32, ir, datalo); break; } ir++; - tcg_out_movi (s, TCG_TYPE_I32, ir++, mem_index); - tcg_out_movi (s, TCG_TYPE_I32, ir, (tcg_target_long) raddr); - tcg_out32 (s, MTSPR | RS (ir) | LR); - tcg_out_b (s, 0, (tcg_target_long) st_trampolines[opc]); + tcg_out_movi(s, TCG_TYPE_I32, ir++, l->mem_index); + tcg_out_movi(s, TCG_TYPE_PTR, ir, (uintptr_t)l->raddr); + tcg_out32(s, MTSPR | RS(ir) | LR); + tcg_out_b(s, 0, (uintptr_t)st_trampolines[l->opc]); } void tcg_out_tb_finalize(TCGContext *s) From 5b1c985b7e4d3f430769925c1775c9e8836272df Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 9 Sep 2013 16:49:36 -0700 Subject: [PATCH 06/21] tcg-ppc: Use conditional branch and link to slow path Saves one insn per slow path. Note that we can no longer use a tail call into the store helper. Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index a5f1f99592..516d28f879 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -611,9 +611,14 @@ static void tcg_out_tlb_check (TCGContext *s, int r0, int r1, int r2, tcg_out32 (s, CMP | BF (6) | RA (addr_reg2) | RB (r1)); tcg_out32 (s, CRAND | BT (7, CR_EQ) | BA (6, CR_EQ) | BB (7, CR_EQ)); #endif + + /* Use a conditional branch-and-link so that we load a pointer to + somewhere within the current opcode, for passing on to the helper. + This address cannot be used for a tail call, but it's shorter + than forming an address from scratch. */ *label_ptr = s->code_ptr; retranst = ((uint16_t *) s->code_ptr)[1] & ~3; - tcg_out32 (s, BC | BI (7, CR_EQ) | retranst | BO_COND_FALSE); + tcg_out32(s, BC | BI(7, CR_EQ) | retranst | BO_COND_FALSE | LK); /* r0 now contains &env->tlb_table[mem_index][index].addr_x */ tcg_out32 (s, (LWZ @@ -853,7 +858,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_mov(s, TCG_TYPE_I32, ir++, l->addrlo_reg); } tcg_out_movi(s, TCG_TYPE_I32, ir++, l->mem_index); - tcg_out_movi(s, TCG_TYPE_PTR, ir, (uintptr_t)l->raddr); + tcg_out32(s, MFSPR | RT(ir++) | LR); tcg_out_b(s, LK, (uintptr_t)ld_trampolines[l->opc & 3]); datalo = l->datalo_reg; @@ -928,9 +933,9 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) ir++; tcg_out_movi(s, TCG_TYPE_I32, ir++, l->mem_index); - tcg_out_movi(s, TCG_TYPE_PTR, ir, (uintptr_t)l->raddr); - tcg_out32(s, MTSPR | RS(ir) | LR); - tcg_out_b(s, 0, (uintptr_t)st_trampolines[l->opc]); + tcg_out32(s, MFSPR | RT(ir++) | LR); + tcg_out_b(s, LK, (uintptr_t)st_trampolines[l->opc]); + tcg_out_b(s, 0, (uintptr_t)l->raddr); } void tcg_out_tb_finalize(TCGContext *s) From 8f50c841b374dc90ea604888ca92c37f469c428a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 29 Aug 2013 09:32:20 -0700 Subject: [PATCH 07/21] tcg-ppc: Fix and cleanup tcg_out_tlb_check The fix is that sparc has so many mmu modes that the last one overflowed the 16-bit signed offset we assumed would fit. Handle this, and check the new assumption at compile time. Load the tlb addend earlier for the fast path. Remove the explicit address + addend and make use of index addressing. Adjust constraints for qemu_ld64 such that we don't clobber the address register or tlb addend before loading both values. Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target.c | 298 ++++++++++++++++++------------------------- 1 file changed, 125 insertions(+), 173 deletions(-) diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index 516d28f879..97e33edcfd 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -575,42 +575,72 @@ static const void * const qemu_st_helpers[4] = { static void *ld_trampolines[4]; static void *st_trampolines[4]; -static void tcg_out_tlb_check (TCGContext *s, int r0, int r1, int r2, - int addr_reg, int addr_reg2, int s_bits, - int offset1, int offset2, uint8_t **label_ptr) +/* Perform the TLB load and compare. Branches to the slow path, placing the + address of the branch in *LABEL_PTR. Loads the addend of the TLB into R0. + Clobbers R1 and R2. */ + +static void tcg_out_tlb_check(TCGContext *s, TCGReg r0, TCGReg r1, TCGReg r2, + TCGReg addrlo, TCGReg addrhi, int s_bits, + int mem_index, int is_load, uint8_t **label_ptr) { + int cmp_off = + (is_load + ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); + int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); uint16_t retranst; + TCGReg base = TCG_AREG0; - tcg_out32 (s, (RLWINM - | RA (r0) - | RS (addr_reg) - | SH (32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS)) - | MB (32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS)) - | ME (31 - CPU_TLB_ENTRY_BITS) - ) - ); - tcg_out32 (s, ADD | RT (r0) | RA (r0) | RB (TCG_AREG0)); - tcg_out32 (s, (LWZU - | RT (r1) - | RA (r0) - | offset1 - ) - ); - tcg_out32 (s, (RLWINM - | RA (r2) - | RS (addr_reg) - | SH (0) - | MB ((32 - s_bits) & 31) - | ME (31 - TARGET_PAGE_BITS) - ) - ); + /* Extract the page index, shifted into place for tlb index. */ + tcg_out32(s, (RLWINM + | RA(r0) + | RS(addrlo) + | SH(32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS)) + | MB(32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS)) + | ME(31 - CPU_TLB_ENTRY_BITS))); - tcg_out32 (s, CMP | BF (7) | RA (r2) | RB (r1)); -#if TARGET_LONG_BITS == 64 - tcg_out32 (s, LWZ | RT (r1) | RA (r0) | 4); - tcg_out32 (s, CMP | BF (6) | RA (addr_reg2) | RB (r1)); - tcg_out32 (s, CRAND | BT (7, CR_EQ) | BA (6, CR_EQ) | BB (7, CR_EQ)); -#endif + /* Compensate for very large offsets. */ + if (add_off >= 0x8000) { + /* Most target env are smaller than 32k; none are larger than 64k. + Simplify the logic here merely to offset by 0x7ff0, giving us a + range just shy of 64k. Check this assumption. */ + QEMU_BUILD_BUG_ON(offsetof(CPUArchState, + tlb_table[NB_MMU_MODES - 1][1]) + > 0x7ff0 + 0x7fff); + tcg_out32(s, ADDI | RT(r1) | RA(base) | 0x7ff0); + base = r1; + cmp_off -= 0x7ff0; + add_off -= 0x7ff0; + } + + /* Clear the non-page, non-alignment bits from the address. */ + tcg_out32(s, (RLWINM + | RA(r2) + | RS(addrlo) + | SH(0) + | MB((32 - s_bits) & 31) + | ME(31 - TARGET_PAGE_BITS))); + + tcg_out32(s, ADD | RT(r0) | RA(r0) | RB(base)); + base = r0; + + /* Load the tlb comparator. */ + tcg_out32(s, LWZ | RT(r1) | RA(base) | (cmp_off & 0xffff)); + + tcg_out32(s, CMP | BF(7) | RA(r2) | RB(r1)); + + if (TARGET_LONG_BITS == 64) { + tcg_out32(s, LWZ | RT(r1) | RA(base) | ((cmp_off + 4) & 0xffff)); + } + + /* Load the tlb addend for use on the fast path. + Do this asap to minimize load delay. */ + tcg_out32(s, LWZ | RT(r0) | RA(base) | (add_off & 0xffff)); + + if (TARGET_LONG_BITS == 64) { + tcg_out32(s, CMP | BF(6) | RA(addrhi) | RB(r1)); + tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); + } /* Use a conditional branch-and-link so that we load a pointer to somewhere within the current opcode, for passing on to the helper. @@ -619,58 +649,31 @@ static void tcg_out_tlb_check (TCGContext *s, int r0, int r1, int r2, *label_ptr = s->code_ptr; retranst = ((uint16_t *) s->code_ptr)[1] & ~3; tcg_out32(s, BC | BI(7, CR_EQ) | retranst | BO_COND_FALSE | LK); - - /* r0 now contains &env->tlb_table[mem_index][index].addr_x */ - tcg_out32 (s, (LWZ - | RT (r0) - | RA (r0) - | offset2 - ) - ); - /* r0 = env->tlb_table[mem_index][index].addend */ - tcg_out32 (s, ADD | RT (r0) | RA (r0) | RB (addr_reg)); - /* r0 = env->tlb_table[mem_index][index].addend + addr */ - } #endif static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) { - int addr_reg, data_reg, data_reg2, r0, r1, rbase, bswap; + TCGReg addrlo, datalo, datahi, rbase; + int bswap; #ifdef CONFIG_SOFTMMU - int mem_index, s_bits, r2, addr_reg2; + int mem_index; + TCGReg addrhi; uint8_t *label_ptr; #endif - data_reg = *args++; - if (opc == 3) - data_reg2 = *args++; - else - data_reg2 = 0; - addr_reg = *args++; + datalo = *args++; + datahi = (opc == 3 ? *args++ : 0); + addrlo = *args++; #ifdef CONFIG_SOFTMMU -#if TARGET_LONG_BITS == 64 - addr_reg2 = *args++; -#else - addr_reg2 = 0; -#endif + addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0); mem_index = *args; - s_bits = opc & 3; - r0 = 3; - r1 = 4; - r2 = 0; - rbase = 0; - tcg_out_tlb_check ( - s, r0, r1, r2, addr_reg, addr_reg2, s_bits, - offsetof (CPUArchState, tlb_table[mem_index][0].addr_read), - offsetof (CPUTLBEntry, addend) - offsetof (CPUTLBEntry, addr_read), - &label_ptr - ); + tcg_out_tlb_check(s, TCG_REG_R3, TCG_REG_R4, TCG_REG_R0, addrlo, + addrhi, opc & 3, mem_index, 0, &label_ptr); + rbase = TCG_REG_R3; #else /* !CONFIG_SOFTMMU */ - r0 = addr_reg; - r1 = 3; rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; #endif @@ -683,106 +686,72 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) switch (opc) { default: case 0: - tcg_out32 (s, LBZX | TAB (data_reg, rbase, r0)); + tcg_out32(s, LBZX | TAB(datalo, rbase, addrlo)); break; case 0|4: - tcg_out32 (s, LBZX | TAB (data_reg, rbase, r0)); - tcg_out32 (s, EXTSB | RA (data_reg) | RS (data_reg)); + tcg_out32(s, LBZX | TAB(datalo, rbase, addrlo)); + tcg_out32(s, EXTSB | RA(datalo) | RS(datalo)); break; case 1: - if (bswap) - tcg_out32 (s, LHBRX | TAB (data_reg, rbase, r0)); - else - tcg_out32 (s, LHZX | TAB (data_reg, rbase, r0)); + tcg_out32(s, (bswap ? LHBRX : LHZX) | TAB(datalo, rbase, addrlo)); break; case 1|4: if (bswap) { - tcg_out32 (s, LHBRX | TAB (data_reg, rbase, r0)); - tcg_out32 (s, EXTSH | RA (data_reg) | RS (data_reg)); + tcg_out32(s, LHBRX | TAB(datalo, rbase, addrlo)); + tcg_out32(s, EXTSH | RA(datalo) | RS(datalo)); + } else { + tcg_out32(s, LHAX | TAB(datalo, rbase, addrlo)); } - else tcg_out32 (s, LHAX | TAB (data_reg, rbase, r0)); break; case 2: - if (bswap) - tcg_out32 (s, LWBRX | TAB (data_reg, rbase, r0)); - else - tcg_out32 (s, LWZX | TAB (data_reg, rbase, r0)); + tcg_out32(s, (bswap ? LWBRX : LWZX) | TAB(datalo, rbase, addrlo)); break; case 3: if (bswap) { - tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4); - tcg_out32 (s, LWBRX | TAB (data_reg, rbase, r0)); - tcg_out32 (s, LWBRX | TAB (data_reg2, rbase, r1)); - } - else { -#ifdef CONFIG_USE_GUEST_BASE - tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4); - tcg_out32 (s, LWZX | TAB (data_reg2, rbase, r0)); - tcg_out32 (s, LWZX | TAB (data_reg, rbase, r1)); -#else - if (r0 == data_reg2) { - tcg_out32 (s, LWZ | RT (0) | RA (r0)); - tcg_out32 (s, LWZ | RT (data_reg) | RA (r0) | 4); - tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 0); - } - else { - tcg_out32 (s, LWZ | RT (data_reg2) | RA (r0)); - tcg_out32 (s, LWZ | RT (data_reg) | RA (r0) | 4); - } -#endif + tcg_out32(s, ADDI | RT(TCG_REG_R0) | RA(addrlo) | 4); + tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo)); + tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0)); + } else if (rbase != 0) { + tcg_out32(s, ADDI | RT(TCG_REG_R0) | RA(addrlo) | 4); + tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo)); + tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0)); + } else if (addrlo == datahi) { + tcg_out32(s, LWZ | RT(datalo) | RA(addrlo) | 4); + tcg_out32(s, LWZ | RT(datahi) | RA(addrlo)); + } else { + tcg_out32(s, LWZ | RT(datahi) | RA(addrlo)); + tcg_out32(s, LWZ | RT(datalo) | RA(addrlo) | 4); } break; } #ifdef CONFIG_SOFTMMU - add_qemu_ldst_label (s, - 1, - opc, - data_reg, - data_reg2, - addr_reg, - addr_reg2, - mem_index, - s->code_ptr, - label_ptr); + add_qemu_ldst_label(s, 1, opc, datalo, datahi, addrlo, + addrhi, mem_index, s->code_ptr, label_ptr); #endif } static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc) { - int addr_reg, r0, r1, data_reg, data_reg2, bswap, rbase; + TCGReg addrlo, datalo, datahi, rbase; + int bswap; #ifdef CONFIG_SOFTMMU - int mem_index, r2, addr_reg2; + int mem_index; + TCGReg addrhi; uint8_t *label_ptr; #endif - data_reg = *args++; - if (opc == 3) - data_reg2 = *args++; - else - data_reg2 = 0; - addr_reg = *args++; + datalo = *args++; + datahi = (opc == 3 ? *args++ : 0); + addrlo = *args++; #ifdef CONFIG_SOFTMMU -#if TARGET_LONG_BITS == 64 - addr_reg2 = *args++; -#else - addr_reg2 = 0; -#endif + addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0); mem_index = *args; - r0 = 3; - r1 = 4; - r2 = 0; - rbase = 0; - tcg_out_tlb_check ( - s, r0, r1, r2, addr_reg, addr_reg2, opc & 3, - offsetof (CPUArchState, tlb_table[mem_index][0].addr_write), - offsetof (CPUTLBEntry, addend) - offsetof (CPUTLBEntry, addr_write), - &label_ptr - ); + tcg_out_tlb_check(s, TCG_REG_R3, TCG_REG_R4, TCG_REG_R0, addrlo, + addrhi, opc & 3, mem_index, 0, &label_ptr); + rbase = TCG_REG_R3; #else /* !CONFIG_SOFTMMU */ - r0 = addr_reg; - r1 = 3; rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; #endif @@ -793,50 +762,33 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc) #endif switch (opc) { case 0: - tcg_out32 (s, STBX | SAB (data_reg, rbase, r0)); + tcg_out32(s, STBX | SAB(datalo, rbase, addrlo)); break; case 1: - if (bswap) - tcg_out32 (s, STHBRX | SAB (data_reg, rbase, r0)); - else - tcg_out32 (s, STHX | SAB (data_reg, rbase, r0)); + tcg_out32(s, (bswap ? STHBRX : STHX) | SAB(datalo, rbase, addrlo)); break; case 2: - if (bswap) - tcg_out32 (s, STWBRX | SAB (data_reg, rbase, r0)); - else - tcg_out32 (s, STWX | SAB (data_reg, rbase, r0)); + tcg_out32(s, (bswap ? STWBRX : STWX) | SAB(datalo, rbase, addrlo)); break; case 3: if (bswap) { - tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4); - tcg_out32 (s, STWBRX | SAB (data_reg, rbase, r0)); - tcg_out32 (s, STWBRX | SAB (data_reg2, rbase, r1)); - } - else { -#ifdef CONFIG_USE_GUEST_BASE - tcg_out32 (s, STWX | SAB (data_reg2, rbase, r0)); - tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4); - tcg_out32 (s, STWX | SAB (data_reg, rbase, r1)); -#else - tcg_out32 (s, STW | RS (data_reg2) | RA (r0)); - tcg_out32 (s, STW | RS (data_reg) | RA (r0) | 4); -#endif + tcg_out32(s, ADDI | RT(TCG_REG_R0) | RA(addrlo) | 4); + tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo)); + tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0)); + } else if (rbase != 0) { + tcg_out32(s, ADDI | RT(TCG_REG_R0) | RA(addrlo) | 4); + tcg_out32(s, STWX | SAB(datahi, rbase, addrlo)); + tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0)); + } else { + tcg_out32(s, STW | RS(datahi) | RA(addrlo)); + tcg_out32(s, STW | RS(datalo) | RA(addrlo) | 4); } break; } #ifdef CONFIG_SOFTMMU - add_qemu_ldst_label (s, - 0, - opc, - data_reg, - data_reg2, - addr_reg, - addr_reg2, - mem_index, - s->code_ptr, - label_ptr); + add_qemu_ldst_label(s, 0, opc, datalo, datahi, addrlo, addrhi, + mem_index, s->code_ptr, label_ptr); #endif } @@ -1994,7 +1946,7 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_qemu_ld16u, { "r", "L" } }, { INDEX_op_qemu_ld16s, { "r", "L" } }, { INDEX_op_qemu_ld32, { "r", "L" } }, - { INDEX_op_qemu_ld64, { "r", "r", "L" } }, + { INDEX_op_qemu_ld64, { "L", "L", "L" } }, { INDEX_op_qemu_st8, { "K", "K" } }, { INDEX_op_qemu_st16, { "K", "K" } }, @@ -2006,7 +1958,7 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_qemu_ld16u, { "r", "L", "L" } }, { INDEX_op_qemu_ld16s, { "r", "L", "L" } }, { INDEX_op_qemu_ld32, { "r", "L", "L" } }, - { INDEX_op_qemu_ld64, { "r", "L", "L", "L" } }, + { INDEX_op_qemu_ld64, { "L", "L", "L", "L" } }, { INDEX_op_qemu_st8, { "K", "K", "K" } }, { INDEX_op_qemu_st16, { "K", "K", "K" } }, From 541dd4ceaacb92d93ceae87d4d521ae8bd381559 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 31 Aug 2013 05:14:53 -0700 Subject: [PATCH 08/21] tcg-ppc64: Reformat tcg-target.c Whitespace and brace changes only. Signed-off-by: Richard Henderson --- tcg/ppc64/tcg-target.c | 478 ++++++++++++++++++++--------------------- 1 file changed, 239 insertions(+), 239 deletions(-) diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c index 0bd1e0ce8c..b554b00e92 100644 --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -173,58 +173,59 @@ static const int tcg_target_callee_save_regs[] = { TCG_REG_R31 }; -static uint32_t reloc_pc24_val (void *pc, tcg_target_long target) +static uint32_t reloc_pc24_val(void *pc, tcg_target_long target) { tcg_target_long disp; - disp = target - (tcg_target_long) pc; - if ((disp << 38) >> 38 != disp) - tcg_abort (); + disp = target - (tcg_target_long)pc; + if ((disp << 38) >> 38 != disp) { + tcg_abort(); + } return disp & 0x3fffffc; } -static void reloc_pc24 (void *pc, tcg_target_long target) +static void reloc_pc24(void *pc, tcg_target_long target) { - *(uint32_t *) pc = (*(uint32_t *) pc & ~0x3fffffc) - | reloc_pc24_val (pc, target); + *(uint32_t *)pc = (*(uint32_t *)pc & ~0x3fffffc) + | reloc_pc24_val(pc, target); } -static uint16_t reloc_pc14_val (void *pc, tcg_target_long target) +static uint16_t reloc_pc14_val(void *pc, tcg_target_long target) { tcg_target_long disp; - disp = target - (tcg_target_long) pc; - if (disp != (int16_t) disp) - tcg_abort (); + disp = target - (tcg_target_long)pc; + if (disp != (int16_t) disp) { + tcg_abort(); + } return disp & 0xfffc; } -static void reloc_pc14 (void *pc, tcg_target_long target) +static void reloc_pc14(void *pc, tcg_target_long target) { - *(uint32_t *) pc = (*(uint32_t *) pc & ~0xfffc) - | reloc_pc14_val (pc, target); + *(uint32_t *)pc = (*(uint32_t *)pc & ~0xfffc) | reloc_pc14_val(pc, target); } -static void patch_reloc (uint8_t *code_ptr, int type, - intptr_t value, intptr_t addend) +static void patch_reloc(uint8_t *code_ptr, int type, + intptr_t value, intptr_t addend) { value += addend; switch (type) { case R_PPC_REL14: - reloc_pc14 (code_ptr, value); + reloc_pc14(code_ptr, value); break; case R_PPC_REL24: - reloc_pc24 (code_ptr, value); + reloc_pc24(code_ptr, value); break; default: - tcg_abort (); + tcg_abort(); } } /* parse target specific constraints */ -static int target_parse_constraint (TCGArgConstraint *ct, const char **pct_str) +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) { const char *ct_str; @@ -232,29 +233,29 @@ static int target_parse_constraint (TCGArgConstraint *ct, const char **pct_str) switch (ct_str[0]) { case 'A': case 'B': case 'C': case 'D': ct->ct |= TCG_CT_REG; - tcg_regset_set_reg (ct->u.regs, 3 + ct_str[0] - 'A'); + tcg_regset_set_reg(ct->u.regs, 3 + ct_str[0] - 'A'); break; case 'r': ct->ct |= TCG_CT_REG; - tcg_regset_set32 (ct->u.regs, 0, 0xffffffff); + tcg_regset_set32(ct->u.regs, 0, 0xffffffff); break; case 'L': /* qemu_ld constraint */ ct->ct |= TCG_CT_REG; - tcg_regset_set32 (ct->u.regs, 0, 0xffffffff); - tcg_regset_reset_reg (ct->u.regs, TCG_REG_R3); + tcg_regset_set32(ct->u.regs, 0, 0xffffffff); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); #ifdef CONFIG_SOFTMMU - tcg_regset_reset_reg (ct->u.regs, TCG_REG_R4); - tcg_regset_reset_reg (ct->u.regs, TCG_REG_R5); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5); #endif break; case 'S': /* qemu_st constraint */ ct->ct |= TCG_CT_REG; - tcg_regset_set32 (ct->u.regs, 0, 0xffffffff); - tcg_regset_reset_reg (ct->u.regs, TCG_REG_R3); + tcg_regset_set32(ct->u.regs, 0, 0xffffffff); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); #ifdef CONFIG_SOFTMMU - tcg_regset_reset_reg (ct->u.regs, TCG_REG_R4); - tcg_regset_reset_reg (ct->u.regs, TCG_REG_R5); - tcg_regset_reset_reg (ct->u.regs, TCG_REG_R6); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6); #endif break; case 'I': @@ -284,8 +285,8 @@ static int target_parse_constraint (TCGArgConstraint *ct, const char **pct_str) } /* test if a constant matches the constraint */ -static int tcg_target_const_match (tcg_target_long val, - const TCGArgConstraint *arg_ct) +static int tcg_target_const_match(tcg_target_long val, + const TCGArgConstraint *arg_ct) { int ct = arg_ct->ct; if (ct & TCG_CT_CONST) { @@ -425,7 +426,7 @@ static int tcg_target_const_match (tcg_target_long val, #define STHX XO31(407) #define STWX XO31(151) -#define SPR(a,b) ((((a)<<5)|(b))<<11) +#define SPR(a, b) ((((a)<<5)|(b))<<11) #define LR SPR(8, 0) #define CTR SPR(9, 0) @@ -439,7 +440,7 @@ static int tcg_target_const_match (tcg_target_long val, #define SRADI XO31(413<<1) #define TW XO31( 4) -#define TRAP (TW | TO (31)) +#define TRAP (TW | TO(31)) #define RT(r) ((r)<<21) #define RS(r) ((r)<<21) @@ -467,9 +468,9 @@ static int tcg_target_const_match (tcg_target_long val, #define BB(n, c) (((c)+((n)*4))<<11) #define BC_(n, c) (((c)+((n)*4))<<6) -#define BO_COND_TRUE BO (12) -#define BO_COND_FALSE BO ( 4) -#define BO_ALWAYS BO (20) +#define BO_COND_TRUE BO(12) +#define BO_COND_FALSE BO( 4) +#define BO_ALWAYS BO(20) enum { CR_LT, @@ -479,16 +480,16 @@ enum { }; static const uint32_t tcg_to_bc[] = { - [TCG_COND_EQ] = BC | BI (7, CR_EQ) | BO_COND_TRUE, - [TCG_COND_NE] = BC | BI (7, CR_EQ) | BO_COND_FALSE, - [TCG_COND_LT] = BC | BI (7, CR_LT) | BO_COND_TRUE, - [TCG_COND_GE] = BC | BI (7, CR_LT) | BO_COND_FALSE, - [TCG_COND_LE] = BC | BI (7, CR_GT) | BO_COND_FALSE, - [TCG_COND_GT] = BC | BI (7, CR_GT) | BO_COND_TRUE, - [TCG_COND_LTU] = BC | BI (7, CR_LT) | BO_COND_TRUE, - [TCG_COND_GEU] = BC | BI (7, CR_LT) | BO_COND_FALSE, - [TCG_COND_LEU] = BC | BI (7, CR_GT) | BO_COND_FALSE, - [TCG_COND_GTU] = BC | BI (7, CR_GT) | BO_COND_TRUE, + [TCG_COND_EQ] = BC | BI(7, CR_EQ) | BO_COND_TRUE, + [TCG_COND_NE] = BC | BI(7, CR_EQ) | BO_COND_FALSE, + [TCG_COND_LT] = BC | BI(7, CR_LT) | BO_COND_TRUE, + [TCG_COND_GE] = BC | BI(7, CR_LT) | BO_COND_FALSE, + [TCG_COND_LE] = BC | BI(7, CR_GT) | BO_COND_FALSE, + [TCG_COND_GT] = BC | BI(7, CR_GT) | BO_COND_TRUE, + [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE, + [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE, + [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE, + [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE, }; /* The low bit here is set if the RA and RB fields must be inverted. */ @@ -508,15 +509,15 @@ static const uint32_t tcg_to_isel[] = { static inline void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) { - tcg_out32 (s, OR | SAB (arg, ret, arg)); + tcg_out32(s, OR | SAB(arg, ret, arg)); } static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs, int sh, int mb) { - sh = SH (sh & 0x1f) | (((sh >> 5) & 1) << 1); - mb = MB64 ((mb >> 5) | ((mb << 1) & 0x3f)); - tcg_out32 (s, op | RA (ra) | RS (rs) | sh | mb); + sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1); + mb = MB64((mb >> 5) | ((mb << 1) & 0x3f)); + tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb); } static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs, @@ -686,44 +687,42 @@ static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) tcg_out_zori32(s, dst, src, c, XORI, XORIS); } -static void tcg_out_b (TCGContext *s, int mask, tcg_target_long target) +static void tcg_out_b(TCGContext *s, int mask, tcg_target_long target) { tcg_target_long disp; - disp = target - (tcg_target_long) s->code_ptr; - if ((disp << 38) >> 38 == disp) - tcg_out32 (s, B | (disp & 0x3fffffc) | mask); - else { - tcg_out_movi (s, TCG_TYPE_I64, 0, (tcg_target_long) target); - tcg_out32 (s, MTSPR | RS (0) | CTR); - tcg_out32 (s, BCCTR | BO_ALWAYS | mask); + disp = target - (tcg_target_long)s->code_ptr; + if ((disp << 38) >> 38 == disp) { + tcg_out32(s, B | (disp & 0x3fffffc) | mask); + } else { + tcg_out_movi(s, TCG_TYPE_I64, 0, (tcg_target_long)target); + tcg_out32(s, MTSPR | RS(0) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS | mask); } } -static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg) +static void tcg_out_call(TCGContext *s, tcg_target_long arg, int const_arg) { #ifdef __APPLE__ if (const_arg) { - tcg_out_b (s, LK, arg); - } - else { - tcg_out32 (s, MTSPR | RS (arg) | LR); - tcg_out32 (s, BCLR | BO_ALWAYS | LK); + tcg_out_b(s, LK, arg); + } else { + tcg_out32(s, MTSPR | RS(arg) | LR); + tcg_out32(s, BCLR | BO_ALWAYS | LK); } #else - int reg; + int reg = arg; if (const_arg) { reg = 2; - tcg_out_movi (s, TCG_TYPE_I64, reg, arg); + tcg_out_movi(s, TCG_TYPE_I64, reg, arg); } - else reg = arg; - tcg_out32 (s, LD | RT (0) | RA (reg)); - tcg_out32 (s, MTSPR | RA (0) | CTR); - tcg_out32 (s, LD | RT (11) | RA (reg) | 16); - tcg_out32 (s, LD | RT (2) | RA (reg) | 8); - tcg_out32 (s, BCCTR | BO_ALWAYS | LK); + tcg_out32(s, LD | RT(0) | RA(reg)); + tcg_out32(s, MTSPR | RA(0) | CTR); + tcg_out32(s, LD | RT(11) | RA(reg) | 16); + tcg_out32(s, LD | RT(2) | RA(reg) | 8); + tcg_out32(s, BCCTR | BO_ALWAYS | LK); #endif } @@ -741,7 +740,7 @@ static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr, static void tcg_out_ldsta(TCGContext *s, TCGReg ret, TCGReg addr, int offset, int op1, int op2) { - if (offset == (int16_t) (offset & ~3)) { + if (offset == (int16_t)(offset & ~3)) { tcg_out32(s, op1 | TAI(ret, addr, offset)); } else { tcg_out_movi(s, TCG_TYPE_I64, 0, offset); @@ -749,7 +748,7 @@ static void tcg_out_ldsta(TCGContext *s, TCGReg ret, TCGReg addr, } } -#if defined (CONFIG_SOFTMMU) +#if defined(CONFIG_SOFTMMU) /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, int mmu_idx) */ static const void * const qemu_ld_helpers[4] = { @@ -783,7 +782,7 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg r0, TCGReg r1, TCGReg r2, tcg_out_rlw(s, RLWINM, r2, addr_reg, 0, (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS); #else - tcg_out_rld (s, RLDICL, r0, addr_reg, + tcg_out_rld(s, RLDICL, r0, addr_reg, 64 - TARGET_PAGE_BITS, 64 - CPU_TLB_BITS); tcg_out_shli64(s, r0, r0, CPU_TLB_ENTRY_BITS); @@ -792,13 +791,12 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg r0, TCGReg r1, TCGReg r2, tcg_out32(s, LD_ADDR | TAI(r1, r0, offset)); if (!s_bits) { - tcg_out_rld (s, RLDICR, r2, addr_reg, 0, 63 - TARGET_PAGE_BITS); - } - else { - tcg_out_rld (s, RLDICL, r2, addr_reg, - 64 - TARGET_PAGE_BITS, - TARGET_PAGE_BITS - s_bits); - tcg_out_rld (s, RLDICL, r2, r2, TARGET_PAGE_BITS, 0); + tcg_out_rld(s, RLDICR, r2, addr_reg, 0, 63 - TARGET_PAGE_BITS); + } else { + tcg_out_rld(s, RLDICL, r2, addr_reg, + 64 - TARGET_PAGE_BITS, + TARGET_PAGE_BITS - s_bits); + tcg_out_rld(s, RLDICL, r2, r2, TARGET_PAGE_BITS, 0); } #endif } @@ -826,7 +824,7 @@ static const uint32_t qemu_exts_opc[4] = { EXTSB, EXTSH, EXTSW, 0 }; -static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) { TCGReg addr_reg, data_reg, r0, r1, rbase; uint32_t insn, s_bits; @@ -848,23 +846,23 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) r2 = 0; rbase = 0; - tcg_out_tlb_read (s, r0, r1, r2, addr_reg, s_bits, - offsetof (CPUArchState, tlb_table[mem_index][0].addr_read)); + tcg_out_tlb_read(s, r0, r1, r2, addr_reg, s_bits, + offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)); - tcg_out32 (s, CMP | BF (7) | RA (r2) | RB (r1) | CMP_L); + tcg_out32(s, CMP | BF(7) | RA(r2) | RB(r1) | CMP_L); label1_ptr = s->code_ptr; #ifdef FAST_PATH - tcg_out32 (s, BC | BI (7, CR_EQ) | BO_COND_TRUE); + tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_TRUE); #endif /* slow path */ ir = 3; - tcg_out_mov (s, TCG_TYPE_I64, ir++, TCG_AREG0); - tcg_out_mov (s, TCG_TYPE_I64, ir++, addr_reg); - tcg_out_movi (s, TCG_TYPE_I64, ir++, mem_index); + tcg_out_mov(s, TCG_TYPE_I64, ir++, TCG_AREG0); + tcg_out_mov(s, TCG_TYPE_I64, ir++, addr_reg); + tcg_out_movi(s, TCG_TYPE_I64, ir++, mem_index); - tcg_out_call (s, (tcg_target_long) qemu_ld_helpers[s_bits], 1); + tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[s_bits], 1); if (opc & 4) { insn = qemu_exts_opc[s_bits]; @@ -873,11 +871,11 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) tcg_out_mov(s, TCG_TYPE_I64, data_reg, 3); } label2_ptr = s->code_ptr; - tcg_out32 (s, B); + tcg_out32(s, B); /* label1: fast path */ #ifdef FAST_PATH - reloc_pc14 (label1_ptr, (tcg_target_long) s->code_ptr); + reloc_pc14(label1_ptr, (tcg_target_long)s->code_ptr); #endif /* r0 now contains &env->tlb_table[mem_index][index].addr_read */ @@ -909,15 +907,15 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) insn = qemu_ldx_opc[s_bits]; tcg_out32(s, insn | TAB(data_reg, rbase, r0)); insn = qemu_exts_opc[s_bits]; - tcg_out32 (s, insn | RA(data_reg) | RS(data_reg)); + tcg_out32(s, insn | RA(data_reg) | RS(data_reg)); } #ifdef CONFIG_SOFTMMU - reloc_pc24 (label2_ptr, (tcg_target_long) s->code_ptr); + reloc_pc24(label2_ptr, (tcg_target_long)s->code_ptr); #endif } -static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc) +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) { TCGReg addr_reg, r0, r1, rbase, data_reg; uint32_t insn; @@ -938,38 +936,38 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc) r2 = 0; rbase = 0; - tcg_out_tlb_read (s, r0, r1, r2, addr_reg, opc, - offsetof (CPUArchState, tlb_table[mem_index][0].addr_write)); + tcg_out_tlb_read(s, r0, r1, r2, addr_reg, opc, + offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); - tcg_out32 (s, CMP | BF (7) | RA (r2) | RB (r1) | CMP_L); + tcg_out32(s, CMP | BF(7) | RA(r2) | RB(r1) | CMP_L); label1_ptr = s->code_ptr; #ifdef FAST_PATH - tcg_out32 (s, BC | BI (7, CR_EQ) | BO_COND_TRUE); + tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_TRUE); #endif /* slow path */ ir = 3; - tcg_out_mov (s, TCG_TYPE_I64, ir++, TCG_AREG0); - tcg_out_mov (s, TCG_TYPE_I64, ir++, addr_reg); - tcg_out_rld (s, RLDICL, ir++, data_reg, 0, 64 - (1 << (3 + opc))); - tcg_out_movi (s, TCG_TYPE_I64, ir++, mem_index); + tcg_out_mov(s, TCG_TYPE_I64, ir++, TCG_AREG0); + tcg_out_mov(s, TCG_TYPE_I64, ir++, addr_reg); + tcg_out_rld(s, RLDICL, ir++, data_reg, 0, 64 - (1 << (3 + opc))); + tcg_out_movi(s, TCG_TYPE_I64, ir++, mem_index); - tcg_out_call (s, (tcg_target_long) qemu_st_helpers[opc], 1); + tcg_out_call(s, (tcg_target_long)qemu_st_helpers[opc], 1); label2_ptr = s->code_ptr; - tcg_out32 (s, B); + tcg_out32(s, B); /* label1: fast path */ #ifdef FAST_PATH - reloc_pc14 (label1_ptr, (tcg_target_long) s->code_ptr); + reloc_pc14(label1_ptr, (tcg_target_long)s->code_ptr); #endif - tcg_out32 (s, (LD - | RT (r0) - | RA (r0) - | (offsetof (CPUTLBEntry, addend) - - offsetof (CPUTLBEntry, addr_write)) + tcg_out32(s, (LD + | RT(r0) + | RA(r0) + | (offsetof(CPUTLBEntry, addend) + - offsetof(CPUTLBEntry, addr_write)) )); /* r0 = env->tlb_table[mem_index][index].addend */ tcg_out32(s, ADD | TAB(r0, r0, addr_reg)); @@ -995,11 +993,11 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc) } #ifdef CONFIG_SOFTMMU - reloc_pc24 (label2_ptr, (tcg_target_long) s->code_ptr); + reloc_pc24(label2_ptr, (tcg_target_long)s->code_ptr); #endif } -static void tcg_target_qemu_prologue (TCGContext *s) +static void tcg_target_qemu_prologue(TCGContext *s) { int i, frame_size; #ifndef __APPLE__ @@ -1014,52 +1012,52 @@ static void tcg_target_qemu_prologue (TCGContext *s) + 8 /* link editor doubleword */ + 8 /* TOC save area */ + TCG_STATIC_CALL_ARGS_SIZE - + ARRAY_SIZE (tcg_target_callee_save_regs) * 8 + + ARRAY_SIZE(tcg_target_callee_save_regs) * 8 + CPU_TEMP_BUF_NLONGS * sizeof(long) ; frame_size = (frame_size + 15) & ~15; - tcg_set_frame (s, TCG_REG_CALL_STACK, frame_size - - CPU_TEMP_BUF_NLONGS * sizeof (long), - CPU_TEMP_BUF_NLONGS * sizeof (long)); + tcg_set_frame(s, TCG_REG_CALL_STACK, frame_size + - CPU_TEMP_BUF_NLONGS * sizeof(long), + CPU_TEMP_BUF_NLONGS * sizeof(long)); #ifndef __APPLE__ /* First emit adhoc function descriptor */ addr = (uint64_t) s->code_ptr + 24; - tcg_out32 (s, addr >> 32); tcg_out32 (s, addr); /* entry point */ + tcg_out32(s, addr >> 32); tcg_out32(s, addr); /* entry point */ s->code_ptr += 16; /* skip TOC and environment pointer */ #endif /* Prologue */ - tcg_out32 (s, MFSPR | RT (0) | LR); - tcg_out32 (s, STDU | RS (1) | RA (1) | (-frame_size & 0xffff)); - for (i = 0; i < ARRAY_SIZE (tcg_target_callee_save_regs); ++i) - tcg_out32 (s, (STD - | RS (tcg_target_callee_save_regs[i]) - | RA (1) + tcg_out32(s, MFSPR | RT(0) | LR); + tcg_out32(s, STDU | RS(1) | RA(1) | (-frame_size & 0xffff)); + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) + tcg_out32(s, (STD + | RS(tcg_target_callee_save_regs[i]) + | RA(1) | (i * 8 + 48 + TCG_STATIC_CALL_ARGS_SIZE) ) ); - tcg_out32 (s, STD | RS (0) | RA (1) | (frame_size + 16)); + tcg_out32(s, STD | RS(0) | RA(1) | (frame_size + 16)); #ifdef CONFIG_USE_GUEST_BASE if (GUEST_BASE) { - tcg_out_movi (s, TCG_TYPE_I64, TCG_GUEST_BASE_REG, GUEST_BASE); - tcg_regset_set_reg (s->reserved_regs, TCG_GUEST_BASE_REG); + tcg_out_movi(s, TCG_TYPE_I64, TCG_GUEST_BASE_REG, GUEST_BASE); + tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); } #endif - tcg_out_mov (s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); - tcg_out32 (s, MTSPR | RS (tcg_target_call_iarg_regs[1]) | CTR); - tcg_out32 (s, BCCTR | BO_ALWAYS); + tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); + tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS); /* Epilogue */ tb_ret_addr = s->code_ptr; - for (i = 0; i < ARRAY_SIZE (tcg_target_callee_save_regs); ++i) - tcg_out32 (s, (LD - | RT (tcg_target_callee_save_regs[i]) - | RA (1) + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) + tcg_out32(s, (LD + | RT(tcg_target_callee_save_regs[i]) + | RA(1) | (i * 8 + 48 + TCG_STATIC_CALL_ARGS_SIZE) ) ); @@ -1072,19 +1070,21 @@ static void tcg_target_qemu_prologue (TCGContext *s) static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, intptr_t arg2) { - if (type == TCG_TYPE_I32) - tcg_out_ldst (s, ret, arg1, arg2, LWZ, LWZX); - else - tcg_out_ldsta (s, ret, arg1, arg2, LD, LDX); + if (type == TCG_TYPE_I32) { + tcg_out_ldst(s, ret, arg1, arg2, LWZ, LWZX); + } else { + tcg_out_ldsta(s, ret, arg1, arg2, LD, LDX); + } } static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, intptr_t arg2) { - if (type == TCG_TYPE_I32) - tcg_out_ldst (s, arg, arg1, arg2, STW, STWX); - else - tcg_out_ldsta (s, arg, arg1, arg2, STD, STDX); + if (type == TCG_TYPE_I32) { + tcg_out_ldst(s, arg, arg1, arg2, STW, STWX); + } else { + tcg_out_ldsta(s, arg, arg1, arg2, STD, STDX); + } } static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, @@ -1106,8 +1106,7 @@ static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, op = CMPI; imm = 1; break; - } - else if ((uint16_t) arg2 == arg2) { + } else if ((uint16_t) arg2 == arg2) { op = CMPLI; imm = 1; break; @@ -1148,7 +1147,7 @@ static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, break; default: - tcg_abort (); + tcg_abort(); } op |= BF(cr) | ((type == TCG_TYPE_I64) << 21); @@ -1292,13 +1291,13 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, case TCG_COND_GE: case TCG_COND_GEU: sh = 31; - crop = CRNOR | BT (7, CR_EQ) | BA (7, CR_LT) | BB (7, CR_LT); + crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT); goto crtest; case TCG_COND_LE: case TCG_COND_LEU: sh = 31; - crop = CRNOR | BT (7, CR_EQ) | BA (7, CR_GT) | BB (7, CR_GT); + crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT); crtest: tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); if (crop) { @@ -1309,22 +1308,22 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, break; default: - tcg_abort (); + tcg_abort(); } } -static void tcg_out_bc (TCGContext *s, int bc, int label_index) +static void tcg_out_bc(TCGContext *s, int bc, int label_index) { TCGLabel *l = &s->labels[label_index]; - if (l->has_value) - tcg_out32 (s, bc | reloc_pc14_val (s->code_ptr, l->u.value)); - else { + if (l->has_value) { + tcg_out32(s, bc | reloc_pc14_val(s->code_ptr, l->u.value)); + } else { uint16_t val = *(uint16_t *) &s->code_ptr[2]; /* Thanks to Andrzej Zaborowski */ - tcg_out32 (s, bc | (val & 0xfffc)); - tcg_out_reloc (s, s->code_ptr - 4, R_PPC_REL14, label_index, 0); + tcg_out32(s, bc | (val & 0xfffc)); + tcg_out_reloc(s, s->code_ptr - 4, R_PPC_REL14, label_index, 0); } } @@ -1384,37 +1383,36 @@ static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, } } -void ppc_tb_set_jmp_target (unsigned long jmp_addr, unsigned long addr) +void ppc_tb_set_jmp_target(unsigned long jmp_addr, unsigned long addr) { TCGContext s; unsigned long patch_size; s.code_ptr = (uint8_t *) jmp_addr; - tcg_out_b (&s, 0, addr); + tcg_out_b(&s, 0, addr); patch_size = s.code_ptr - (uint8_t *) jmp_addr; - flush_icache_range (jmp_addr, jmp_addr + patch_size); + flush_icache_range(jmp_addr, jmp_addr + patch_size); } -static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, - const int *const_args) +static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, + const int *const_args) { TCGArg a0, a1, a2; int c; switch (opc) { case INDEX_op_exit_tb: - tcg_out_movi (s, TCG_TYPE_I64, TCG_REG_R3, args[0]); - tcg_out_b (s, 0, (tcg_target_long) tb_ret_addr); + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R3, args[0]); + tcg_out_b(s, 0, (tcg_target_long)tb_ret_addr); break; case INDEX_op_goto_tb: if (s->tb_jmp_offset) { - /* direct jump method */ - + /* Direct jump method. */ s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf; s->code_ptr += 28; - } - else { - tcg_abort (); + } else { + /* Indirect jump method. */ + tcg_abort(); } s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf; break; @@ -1423,67 +1421,66 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, TCGLabel *l = &s->labels[args[0]]; if (l->has_value) { - tcg_out_b (s, 0, l->u.value); - } - else { + tcg_out_b(s, 0, l->u.value); + } else { uint32_t val = *(uint32_t *) s->code_ptr; /* Thanks to Andrzej Zaborowski */ - tcg_out32 (s, B | (val & 0x3fffffc)); - tcg_out_reloc (s, s->code_ptr - 4, R_PPC_REL24, args[0], 0); + tcg_out32(s, B | (val & 0x3fffffc)); + tcg_out_reloc(s, s->code_ptr - 4, R_PPC_REL24, args[0], 0); } } break; case INDEX_op_call: - tcg_out_call (s, args[0], const_args[0]); + tcg_out_call(s, args[0], const_args[0]); break; case INDEX_op_movi_i32: - tcg_out_movi (s, TCG_TYPE_I32, args[0], args[1]); + tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]); break; case INDEX_op_movi_i64: - tcg_out_movi (s, TCG_TYPE_I64, args[0], args[1]); + tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]); break; case INDEX_op_ld8u_i32: case INDEX_op_ld8u_i64: - tcg_out_ldst (s, args[0], args[1], args[2], LBZ, LBZX); + tcg_out_ldst(s, args[0], args[1], args[2], LBZ, LBZX); break; case INDEX_op_ld8s_i32: case INDEX_op_ld8s_i64: - tcg_out_ldst (s, args[0], args[1], args[2], LBZ, LBZX); - tcg_out32 (s, EXTSB | RS (args[0]) | RA (args[0])); + tcg_out_ldst(s, args[0], args[1], args[2], LBZ, LBZX); + tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0])); break; case INDEX_op_ld16u_i32: case INDEX_op_ld16u_i64: - tcg_out_ldst (s, args[0], args[1], args[2], LHZ, LHZX); + tcg_out_ldst(s, args[0], args[1], args[2], LHZ, LHZX); break; case INDEX_op_ld16s_i32: case INDEX_op_ld16s_i64: - tcg_out_ldst (s, args[0], args[1], args[2], LHA, LHAX); + tcg_out_ldst(s, args[0], args[1], args[2], LHA, LHAX); break; case INDEX_op_ld_i32: case INDEX_op_ld32u_i64: - tcg_out_ldst (s, args[0], args[1], args[2], LWZ, LWZX); + tcg_out_ldst(s, args[0], args[1], args[2], LWZ, LWZX); break; case INDEX_op_ld32s_i64: - tcg_out_ldsta (s, args[0], args[1], args[2], LWA, LWAX); + tcg_out_ldsta(s, args[0], args[1], args[2], LWA, LWAX); break; case INDEX_op_ld_i64: - tcg_out_ldsta (s, args[0], args[1], args[2], LD, LDX); + tcg_out_ldsta(s, args[0], args[1], args[2], LD, LDX); break; case INDEX_op_st8_i32: case INDEX_op_st8_i64: - tcg_out_ldst (s, args[0], args[1], args[2], STB, STBX); + tcg_out_ldst(s, args[0], args[1], args[2], STB, STBX); break; case INDEX_op_st16_i32: case INDEX_op_st16_i64: - tcg_out_ldst (s, args[0], args[1], args[2], STH, STHX); + tcg_out_ldst(s, args[0], args[1], args[2], STH, STHX); break; case INDEX_op_st_i32: case INDEX_op_st32_i64: - tcg_out_ldst (s, args[0], args[1], args[2], STW, STWX); + tcg_out_ldst(s, args[0], args[1], args[2], STW, STWX); break; case INDEX_op_st_i64: - tcg_out_ldsta (s, args[0], args[1], args[2], STD, STDX); + tcg_out_ldsta(s, args[0], args[1], args[2], STD, STDX); break; case INDEX_op_add_i32: @@ -1607,32 +1604,33 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, break; case INDEX_op_div_i32: - tcg_out32 (s, DIVW | TAB (args[0], args[1], args[2])); + tcg_out32(s, DIVW | TAB(args[0], args[1], args[2])); break; case INDEX_op_divu_i32: - tcg_out32 (s, DIVWU | TAB (args[0], args[1], args[2])); + tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2])); break; case INDEX_op_shl_i32: if (const_args[2]) { tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31 - args[2]); } else { - tcg_out32 (s, SLW | SAB (args[1], args[0], args[2])); + tcg_out32(s, SLW | SAB(args[1], args[0], args[2])); } break; case INDEX_op_shr_i32: if (const_args[2]) { tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], args[2], 31); } else { - tcg_out32 (s, SRW | SAB (args[1], args[0], args[2])); + tcg_out32(s, SRW | SAB(args[1], args[0], args[2])); } break; case INDEX_op_sar_i32: - if (const_args[2]) - tcg_out32 (s, SRAWI | RS (args[1]) | RA (args[0]) | SH (args[2])); - else - tcg_out32 (s, SRAW | SAB (args[1], args[0], args[2])); + if (const_args[2]) { + tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2])); + } else { + tcg_out32(s, SRAW | SAB(args[1], args[0], args[2])); + } break; case INDEX_op_rotl_i32: if (const_args[2]) { @@ -1664,12 +1662,12 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_neg_i32: case INDEX_op_neg_i64: - tcg_out32 (s, NEG | RT (args[0]) | RA (args[1])); + tcg_out32(s, NEG | RT(args[0]) | RA(args[1])); break; case INDEX_op_not_i32: case INDEX_op_not_i64: - tcg_out32 (s, NOR | SAB (args[1], args[0], args[1])); + tcg_out32(s, NOR | SAB(args[1], args[0], args[1])); break; case INDEX_op_add_i64: @@ -1722,24 +1720,26 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, break; case INDEX_op_shl_i64: - if (const_args[2]) + if (const_args[2]) { tcg_out_shli64(s, args[0], args[1], args[2]); - else - tcg_out32 (s, SLD | SAB (args[1], args[0], args[2])); + } else { + tcg_out32(s, SLD | SAB(args[1], args[0], args[2])); + } break; case INDEX_op_shr_i64: - if (const_args[2]) + if (const_args[2]) { tcg_out_shri64(s, args[0], args[1], args[2]); - else - tcg_out32 (s, SRD | SAB (args[1], args[0], args[2])); + } else { + tcg_out32(s, SRD | SAB(args[1], args[0], args[2])); + } break; case INDEX_op_sar_i64: if (const_args[2]) { - int sh = SH (args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1); - tcg_out32 (s, SRADI | RA (args[0]) | RS (args[1]) | sh); + int sh = SH(args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1); + tcg_out32(s, SRADI | RA(args[0]) | RS(args[1]) | sh); + } else { + tcg_out32(s, SRAD | SAB(args[1], args[0], args[2])); } - else - tcg_out32 (s, SRAD | SAB (args[1], args[0], args[2])); break; case INDEX_op_rotl_i64: if (const_args[2]) { @@ -1766,45 +1766,45 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, } break; case INDEX_op_div_i64: - tcg_out32 (s, DIVD | TAB (args[0], args[1], args[2])); + tcg_out32(s, DIVD | TAB(args[0], args[1], args[2])); break; case INDEX_op_divu_i64: - tcg_out32 (s, DIVDU | TAB (args[0], args[1], args[2])); + tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2])); break; case INDEX_op_qemu_ld8u: - tcg_out_qemu_ld (s, args, 0); + tcg_out_qemu_ld(s, args, 0); break; case INDEX_op_qemu_ld8s: - tcg_out_qemu_ld (s, args, 0 | 4); + tcg_out_qemu_ld(s, args, 0 | 4); break; case INDEX_op_qemu_ld16u: - tcg_out_qemu_ld (s, args, 1); + tcg_out_qemu_ld(s, args, 1); break; case INDEX_op_qemu_ld16s: - tcg_out_qemu_ld (s, args, 1 | 4); + tcg_out_qemu_ld(s, args, 1 | 4); break; case INDEX_op_qemu_ld32: case INDEX_op_qemu_ld32u: - tcg_out_qemu_ld (s, args, 2); + tcg_out_qemu_ld(s, args, 2); break; case INDEX_op_qemu_ld32s: - tcg_out_qemu_ld (s, args, 2 | 4); + tcg_out_qemu_ld(s, args, 2 | 4); break; case INDEX_op_qemu_ld64: - tcg_out_qemu_ld (s, args, 3); + tcg_out_qemu_ld(s, args, 3); break; case INDEX_op_qemu_st8: - tcg_out_qemu_st (s, args, 0); + tcg_out_qemu_st(s, args, 0); break; case INDEX_op_qemu_st16: - tcg_out_qemu_st (s, args, 1); + tcg_out_qemu_st(s, args, 1); break; case INDEX_op_qemu_st32: - tcg_out_qemu_st (s, args, 2); + tcg_out_qemu_st(s, args, 2); break; case INDEX_op_qemu_st64: - tcg_out_qemu_st (s, args, 3); + tcg_out_qemu_st(s, args, 3); break; case INDEX_op_ext8s_i32: @@ -1819,16 +1819,16 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, c = EXTSW; goto gen_ext; gen_ext: - tcg_out32 (s, c | RS (args[1]) | RA (args[0])); + tcg_out32(s, c | RS(args[1]) | RA(args[0])); break; case INDEX_op_setcond_i32: - tcg_out_setcond (s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], - const_args[2]); + tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], + const_args[2]); break; case INDEX_op_setcond_i64: - tcg_out_setcond (s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], - const_args[2]); + tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], + const_args[2]); break; case INDEX_op_bswap16_i32: @@ -1980,8 +1980,8 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, break; default: - tcg_dump_ops (s); - tcg_abort (); + tcg_dump_ops(s); + tcg_abort(); } } @@ -2109,7 +2109,7 @@ static const TCGTargetOpDef ppc_op_defs[] = { { -1 }, }; -static void tcg_target_init (TCGContext *s) +static void tcg_target_init(TCGContext *s) { #ifdef CONFIG_GETAUXVAL unsigned long hwcap = getauxval(AT_HWCAP); @@ -2118,9 +2118,9 @@ static void tcg_target_init (TCGContext *s) } #endif - tcg_regset_set32 (tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); - tcg_regset_set32 (tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff); - tcg_regset_set32 (tcg_target_call_clobber_regs, 0, + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff); + tcg_regset_set32(tcg_target_call_clobber_regs, 0, (1 << TCG_REG_R0) | #ifdef __APPLE__ (1 << TCG_REG_R2) | @@ -2137,13 +2137,13 @@ static void tcg_target_init (TCGContext *s) (1 << TCG_REG_R12) ); - tcg_regset_clear (s->reserved_regs); - tcg_regset_set_reg (s->reserved_regs, TCG_REG_R0); - tcg_regset_set_reg (s->reserved_regs, TCG_REG_R1); + tcg_regset_clear(s->reserved_regs); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); #ifndef __APPLE__ - tcg_regset_set_reg (s->reserved_regs, TCG_REG_R2); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); #endif - tcg_regset_set_reg (s->reserved_regs, TCG_REG_R13); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); - tcg_add_target_add_op_defs (ppc_op_defs); + tcg_add_target_add_op_defs(ppc_op_defs); } From 29b69198690f4b2754338c7c01f8ebe004b2efac Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 31 Aug 2013 05:23:23 -0700 Subject: [PATCH 09/21] tcg-ppc64: More use of TAI and SAI helper macros Finish conversion of all memory operations. Signed-off-by: Richard Henderson --- tcg/ppc64/tcg-target.c | 41 ++++++++++++++++------------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c index b554b00e92..114e23db28 100644 --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -718,10 +718,10 @@ static void tcg_out_call(TCGContext *s, tcg_target_long arg, int const_arg) tcg_out_movi(s, TCG_TYPE_I64, reg, arg); } - tcg_out32(s, LD | RT(0) | RA(reg)); + tcg_out32(s, LD | TAI(0, reg, 0)); tcg_out32(s, MTSPR | RA(0) | CTR); - tcg_out32(s, LD | RT(11) | RA(reg) | 16); - tcg_out32(s, LD | RT(2) | RA(reg) | 8); + tcg_out32(s, LD | TAI(11, reg, 16)); + tcg_out32(s, LD | TAI(2, reg, 8)); tcg_out32(s, BCCTR | BO_ALWAYS | LK); #endif } @@ -963,12 +963,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) reloc_pc14(label1_ptr, (tcg_target_long)s->code_ptr); #endif - tcg_out32(s, (LD - | RT(r0) - | RA(r0) - | (offsetof(CPUTLBEntry, addend) - - offsetof(CPUTLBEntry, addr_write)) - )); + tcg_out32(s, LD | TAI(r0, r0, + offsetof(CPUTLBEntry, addend) + - offsetof(CPUTLBEntry, addr_write))); /* r0 = env->tlb_table[mem_index][index].addend */ tcg_out32(s, ADD | TAB(r0, r0, addr_reg)); /* r0 = env->tlb_table[mem_index][index].addend + addr */ @@ -1030,15 +1027,12 @@ static void tcg_target_qemu_prologue(TCGContext *s) /* Prologue */ tcg_out32(s, MFSPR | RT(0) | LR); - tcg_out32(s, STDU | RS(1) | RA(1) | (-frame_size & 0xffff)); - for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) - tcg_out32(s, (STD - | RS(tcg_target_callee_save_regs[i]) - | RA(1) - | (i * 8 + 48 + TCG_STATIC_CALL_ARGS_SIZE) - ) - ); - tcg_out32(s, STD | RS(0) | RA(1) | (frame_size + 16)); + tcg_out32(s, STDU | SAI(1, 1, -frame_size)); + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { + tcg_out32(s, STD | SAI(tcg_target_callee_save_regs[i], 1, + i * 8 + 48 + TCG_STATIC_CALL_ARGS_SIZE)); + } + tcg_out32(s, STD | SAI(0, 1, frame_size + 16)); #ifdef CONFIG_USE_GUEST_BASE if (GUEST_BASE) { @@ -1054,13 +1048,10 @@ static void tcg_target_qemu_prologue(TCGContext *s) /* Epilogue */ tb_ret_addr = s->code_ptr; - for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) - tcg_out32(s, (LD - | RT(tcg_target_callee_save_regs[i]) - | RA(1) - | (i * 8 + 48 + TCG_STATIC_CALL_ARGS_SIZE) - ) - ); + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { + tcg_out32(s, LD | TAI(tcg_target_callee_save_regs[i], 1, + i * 8 + 48 + TCG_STATIC_CALL_ARGS_SIZE)); + } tcg_out32(s, LD | TAI(0, 1, frame_size + 16)); tcg_out32(s, MTSPR | RS(0) | LR); tcg_out32(s, ADDI | TAI(1, 1, frame_size)); From 8327a470df78cb41de95f6be0133a59e0a721e2c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 31 Aug 2013 05:41:45 -0700 Subject: [PATCH 10/21] tcg-ppc64: Use TCG_REG_Rn constants Instead of bare N, for clarity. The only (intentional) exception made is for insns that encode R|0, i.e. when R0 encoded into the insn is interpreted as zero not the contents of the register. Signed-off-by: Richard Henderson --- tcg/ppc64/tcg-target.c | 96 +++++++++++++++++++++--------------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c index 114e23db28..848029fbe0 100644 --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -637,8 +637,8 @@ static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) } else if (mask_operand(c, &mb, &me)) { tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me); } else { - tcg_out_movi(s, TCG_TYPE_I32, 0, c); - tcg_out32(s, AND | SAB(src, dst, 0)); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c); + tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); } } @@ -659,8 +659,8 @@ static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c) tcg_out_rld(s, RLDICL, dst, src, 0, mb); } } else { - tcg_out_movi(s, TCG_TYPE_I64, 0, c); - tcg_out32(s, AND | SAB(src, dst, 0)); + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c); + tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); } } @@ -695,8 +695,8 @@ static void tcg_out_b(TCGContext *s, int mask, tcg_target_long target) if ((disp << 38) >> 38 == disp) { tcg_out32(s, B | (disp & 0x3fffffc) | mask); } else { - tcg_out_movi(s, TCG_TYPE_I64, 0, (tcg_target_long)target); - tcg_out32(s, MTSPR | RS(0) | CTR); + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, (tcg_target_long)target); + tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR); tcg_out32(s, BCCTR | BO_ALWAYS | mask); } } @@ -714,14 +714,14 @@ static void tcg_out_call(TCGContext *s, tcg_target_long arg, int const_arg) int reg = arg; if (const_arg) { - reg = 2; + reg = TCG_REG_R2; tcg_out_movi(s, TCG_TYPE_I64, reg, arg); } - tcg_out32(s, LD | TAI(0, reg, 0)); - tcg_out32(s, MTSPR | RA(0) | CTR); - tcg_out32(s, LD | TAI(11, reg, 16)); - tcg_out32(s, LD | TAI(2, reg, 8)); + tcg_out32(s, LD | TAI(TCG_REG_R0, reg, 0)); + tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR); + tcg_out32(s, LD | TAI(TCG_REG_R11, reg, 16)); + tcg_out32(s, LD | TAI(TCG_REG_R2, reg, 8)); tcg_out32(s, BCCTR | BO_ALWAYS | LK); #endif } @@ -732,8 +732,8 @@ static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr, if (offset == (int16_t) offset) { tcg_out32(s, op1 | TAI(ret, addr, offset)); } else { - tcg_out_movi(s, TCG_TYPE_I64, 0, offset); - tcg_out32(s, op2 | TAB(ret, addr, 0)); + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, offset); + tcg_out32(s, op2 | TAB(ret, addr, TCG_REG_R0)); } } @@ -743,8 +743,8 @@ static void tcg_out_ldsta(TCGContext *s, TCGReg ret, TCGReg addr, if (offset == (int16_t)(offset & ~3)) { tcg_out32(s, op1 | TAI(ret, addr, offset)); } else { - tcg_out_movi(s, TCG_TYPE_I64, 0, offset); - tcg_out32(s, op2 | TAB(ret, addr, 0)); + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, offset); + tcg_out32(s, op2 | TAB(ret, addr, TCG_REG_R0)); } } @@ -841,9 +841,9 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) #ifdef CONFIG_SOFTMMU mem_index = *args; - r0 = 3; - r1 = 4; - r2 = 0; + r0 = TCG_REG_R3; + r1 = TCG_REG_R4; + r2 = TCG_REG_R0; rbase = 0; tcg_out_tlb_read(s, r0, r1, r2, addr_reg, s_bits, @@ -857,7 +857,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) #endif /* slow path */ - ir = 3; + ir = TCG_REG_R3; tcg_out_mov(s, TCG_TYPE_I64, ir++, TCG_AREG0); tcg_out_mov(s, TCG_TYPE_I64, ir++, addr_reg); tcg_out_movi(s, TCG_TYPE_I64, ir++, mem_index); @@ -866,9 +866,9 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) if (opc & 4) { insn = qemu_exts_opc[s_bits]; - tcg_out32(s, insn | RA(data_reg) | RS(3)); - } else if (data_reg != 3) { - tcg_out_mov(s, TCG_TYPE_I64, data_reg, 3); + tcg_out32(s, insn | RA(data_reg) | RS(TCG_REG_R3)); + } else if (data_reg != TCG_REG_R3) { + tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R3); } label2_ptr = s->code_ptr; tcg_out32(s, B); @@ -891,7 +891,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) tcg_out_ext32u(s, addr_reg, addr_reg); #endif r0 = addr_reg; - r1 = 3; + r1 = TCG_REG_R3; rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; #endif @@ -931,9 +931,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) #ifdef CONFIG_SOFTMMU mem_index = *args; - r0 = 3; - r1 = 4; - r2 = 0; + r0 = TCG_REG_R3; + r1 = TCG_REG_R4; + r2 = TCG_REG_R0; rbase = 0; tcg_out_tlb_read(s, r0, r1, r2, addr_reg, opc, @@ -947,7 +947,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) #endif /* slow path */ - ir = 3; + ir = TCG_REG_R3; tcg_out_mov(s, TCG_TYPE_I64, ir++, TCG_AREG0); tcg_out_mov(s, TCG_TYPE_I64, ir++, addr_reg); tcg_out_rld(s, RLDICL, ir++, data_reg, 0, 64 - (1 << (3 + opc))); @@ -974,7 +974,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) #if TARGET_LONG_BITS == 32 tcg_out_ext32u(s, addr_reg, addr_reg); #endif - r1 = 3; + r1 = TCG_REG_R3; r0 = addr_reg; rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; #endif @@ -983,8 +983,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) if (!HAVE_ISA_2_06 && insn == STDBRX) { tcg_out32(s, STWBRX | SAB(data_reg, rbase, r0)); tcg_out32(s, ADDI | TAI(r1, r0, 4)); - tcg_out_shri64(s, 0, data_reg, 32); - tcg_out32(s, STWBRX | SAB(0, rbase, r1)); + tcg_out_shri64(s, TCG_REG_R0, data_reg, 32); + tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, r1)); } else { tcg_out32(s, insn | SAB(data_reg, rbase, r0)); } @@ -1026,13 +1026,13 @@ static void tcg_target_qemu_prologue(TCGContext *s) #endif /* Prologue */ - tcg_out32(s, MFSPR | RT(0) | LR); - tcg_out32(s, STDU | SAI(1, 1, -frame_size)); + tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR); + tcg_out32(s, STDU | SAI(TCG_REG_R1, TCG_REG_R1, -frame_size)); for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { tcg_out32(s, STD | SAI(tcg_target_callee_save_regs[i], 1, i * 8 + 48 + TCG_STATIC_CALL_ARGS_SIZE)); } - tcg_out32(s, STD | SAI(0, 1, frame_size + 16)); + tcg_out32(s, STD | SAI(TCG_REG_R0, TCG_REG_R1, frame_size + 16)); #ifdef CONFIG_USE_GUEST_BASE if (GUEST_BASE) { @@ -1049,12 +1049,12 @@ static void tcg_target_qemu_prologue(TCGContext *s) tb_ret_addr = s->code_ptr; for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { - tcg_out32(s, LD | TAI(tcg_target_callee_save_regs[i], 1, + tcg_out32(s, LD | TAI(tcg_target_callee_save_regs[i], TCG_REG_R1, i * 8 + 48 + TCG_STATIC_CALL_ARGS_SIZE)); } - tcg_out32(s, LD | TAI(0, 1, frame_size + 16)); - tcg_out32(s, MTSPR | RS(0) | LR); - tcg_out32(s, ADDI | TAI(1, 1, frame_size)); + tcg_out32(s, LD | TAI(TCG_REG_R0, TCG_REG_R1, frame_size + 16)); + tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR); + tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, frame_size)); tcg_out32(s, BCLR | BO_ALWAYS); } @@ -1146,8 +1146,8 @@ static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff)); } else { if (const_arg2) { - tcg_out_movi(s, type, 0, arg2); - arg2 = 0; + tcg_out_movi(s, type, TCG_REG_R0, arg2); + arg2 = TCG_REG_R0; } tcg_out32(s, op | RA(arg1) | RB(arg2)); } @@ -1168,8 +1168,8 @@ static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src) tcg_out32(s, ADDIC | TAI(dst, src, -1)); tcg_out32(s, SUBFE | TAB(dst, dst, src)); } else { - tcg_out32(s, ADDIC | TAI(0, src, -1)); - tcg_out32(s, SUBFE | TAB(dst, 0, src)); + tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); + tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src)); } } @@ -1350,7 +1350,7 @@ static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, } /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand. */ if (v2 == 0) { - tcg_out_movi(s, type, 0, 0); + tcg_out_movi(s, type, TCG_REG_R0, 0); } tcg_out32(s, isel | TAB(dest, v1, v2)); } else { @@ -1635,8 +1635,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, if (const_args[2]) { tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31); } else { - tcg_out32(s, SUBFIC | TAI(0, args[2], 32)); - tcg_out32(s, RLWNM | SAB(args[1], args[0], 0) + tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32)); + tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0) | MB(0) | ME(31)); } break; @@ -1743,8 +1743,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, if (const_args[2]) { tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0); } else { - tcg_out32(s, SUBFIC | TAI(0, args[2], 64)); - tcg_out32(s, RLDCL | SAB(args[1], args[0], 0) | MB64(0)); + tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64)); + tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0)); } break; @@ -1861,9 +1861,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, break; case INDEX_op_bswap64_i64: - a0 = args[0], a1 = args[1], a2 = 0; + a0 = args[0], a1 = args[1], a2 = TCG_REG_R0; if (a0 == a1) { - a0 = 0; + a0 = TCG_REG_R0; a2 = a1; } From 5e0f40cfedecfbc2a0608d75e8a8d22173f23431 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 31 Aug 2013 05:46:25 -0700 Subject: [PATCH 11/21] tcg-ppc64: Use tcg_out64 Signed-off-by: Richard Henderson --- tcg/ppc64/tcg-target.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c index 848029fbe0..27a955ba2a 100644 --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -997,9 +997,6 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) static void tcg_target_qemu_prologue(TCGContext *s) { int i, frame_size; -#ifndef __APPLE__ - uint64_t addr; -#endif frame_size = 0 + 8 /* back chain */ @@ -1020,8 +1017,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) #ifndef __APPLE__ /* First emit adhoc function descriptor */ - addr = (uint64_t) s->code_ptr + 24; - tcg_out32(s, addr >> 32); tcg_out32(s, addr); /* entry point */ + tcg_out64(s, (uint64_t)s->code_ptr + 24); /* entry point */ s->code_ptr += 16; /* skip TOC and environment pointer */ #endif From f8b84129073d600cef20d526814b9bdd15c2e1ba Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 30 Jul 2013 18:26:04 -1000 Subject: [PATCH 12/21] tcg-ppc64: Avoid code for nop move While these are rare from code that's been through the optimizer, it's not uncommon within the tcg backend. Signed-off-by: Richard Henderson --- tcg/ppc64/tcg-target.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c index 27a955ba2a..357f8c11de 100644 --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -509,7 +509,9 @@ static const uint32_t tcg_to_isel[] = { static inline void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) { - tcg_out32(s, OR | SAB(arg, ret, arg)); + if (ret != arg) { + tcg_out32(s, OR | SAB(arg, ret, arg)); + } } static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs, From ad94e1a9db52de4ddfd9940324249518e0265902 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 30 Jul 2013 23:14:19 -0700 Subject: [PATCH 13/21] tcg-ppc64: Don't load the static chain from TCG There are no helpers that require the static chain. Signed-off-by: Richard Henderson --- tcg/ppc64/tcg-target.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c index 357f8c11de..5ac62bf40d 100644 --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -722,7 +722,6 @@ static void tcg_out_call(TCGContext *s, tcg_target_long arg, int const_arg) tcg_out32(s, LD | TAI(TCG_REG_R0, reg, 0)); tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR); - tcg_out32(s, LD | TAI(TCG_REG_R11, reg, 16)); tcg_out32(s, LD | TAI(TCG_REG_R2, reg, 8)); tcg_out32(s, BCCTR | BO_ALWAYS | LK); #endif From d40f3cb1128208d901b6224b52ff36ff05680d28 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 31 Aug 2013 06:13:49 -0700 Subject: [PATCH 14/21] tcg-ppc64: Fold constant call address into descriptor load Eliminates one insn per call: : lis r2,4165 -: ori r2,r2,59616 -: ld r0,0(r2) +: ld r0,-5920(r2) : mtctr r0 -: ld r2,8(r2) +: ld r2,-5912(r2) : bctrl Signed-off-by: Richard Henderson --- tcg/ppc64/tcg-target.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c index 5ac62bf40d..8eb04060e6 100644 --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -713,16 +713,24 @@ static void tcg_out_call(TCGContext *s, tcg_target_long arg, int const_arg) tcg_out32(s, BCLR | BO_ALWAYS | LK); } #else - int reg = arg; + TCGReg reg = arg; + int ofs = 0; if (const_arg) { + /* Fold the low bits of the constant into the addresses below. */ + ofs = (int16_t)arg; + if (ofs + 8 < 0x8000) { + arg -= ofs; + } else { + ofs = 0; + } reg = TCG_REG_R2; tcg_out_movi(s, TCG_TYPE_I64, reg, arg); } - tcg_out32(s, LD | TAI(TCG_REG_R0, reg, 0)); + tcg_out32(s, LD | TAI(TCG_REG_R0, reg, ofs)); tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR); - tcg_out32(s, LD | TAI(TCG_REG_R2, reg, 8)); + tcg_out32(s, LD | TAI(TCG_REG_R2, reg, ofs + 8)); tcg_out32(s, BCCTR | BO_ALWAYS | LK); #endif } From b0940da012c4c80145fdcf1730620f28ce80c2d8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 31 Aug 2013 06:30:45 -0700 Subject: [PATCH 15/21] tcg-ppc64: Look through a constant function descriptor Especially in the user-only configurations, a direct branch into the executable may be in range. Signed-off-by: Richard Henderson --- tcg/ppc64/tcg-target.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c index 8eb04060e6..0659dd6e66 100644 --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -173,14 +173,17 @@ static const int tcg_target_callee_save_regs[] = { TCG_REG_R31 }; +static inline bool in_range_b(tcg_target_long target) +{ + return target == sextract64(target, 0, 26); +} + static uint32_t reloc_pc24_val(void *pc, tcg_target_long target) { tcg_target_long disp; disp = target - (tcg_target_long)pc; - if ((disp << 38) >> 38 != disp) { - tcg_abort(); - } + assert(in_range_b(disp)); return disp & 0x3fffffc; } @@ -694,7 +697,7 @@ static void tcg_out_b(TCGContext *s, int mask, tcg_target_long target) tcg_target_long disp; disp = target - (tcg_target_long)s->code_ptr; - if ((disp << 38) >> 38 == disp) { + if (in_range_b(disp)) { tcg_out32(s, B | (disp & 0x3fffffc) | mask); } else { tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, (tcg_target_long)target); @@ -717,6 +720,18 @@ static void tcg_out_call(TCGContext *s, tcg_target_long arg, int const_arg) int ofs = 0; if (const_arg) { + /* Look through the descriptor. If the branch is in range, and we + don't have to spend too much effort on building the toc. */ + intptr_t tgt = ((intptr_t *)arg)[0]; + intptr_t toc = ((intptr_t *)arg)[1]; + intptr_t diff = tgt - (intptr_t)s->code_ptr; + + if (in_range_b(diff) && toc == (uint32_t)toc) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R2, toc); + tcg_out_b(s, LK, tgt); + return; + } + /* Fold the low bits of the constant into the addresses below. */ ofs = (int16_t)arg; if (ofs + 8 < 0x8000) { From 5e1702b0742b7cc88e85dfe76c3ba5d1432312aa Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 31 Jul 2013 10:18:49 -0700 Subject: [PATCH 16/21] tcg-ppc64: Tidy register allocation order Remove conditionalization from tcg_target_reg_alloc_order, relying on reserved_regs to prevent register allocation that shouldn't happen. So R11 is now present in reg_alloc_order for __APPLE__, but also now reserved. Sort reg_alloc_order into call-saved, call-clobbered, and parameters. This reduces the effect of values getting spilled and reloaded before function calls. Whether or not it is reserved, R2 (TOC) is always call-clobbered. Signed-off-by: Richard Henderson --- tcg/ppc64/tcg-target.c | 51 +++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c index 0659dd6e66..c01a8bb546 100644 --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -99,7 +99,7 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { #endif static const int tcg_target_reg_alloc_order[] = { - TCG_REG_R14, + TCG_REG_R14, /* call saved registers */ TCG_REG_R15, TCG_REG_R16, TCG_REG_R17, @@ -109,29 +109,25 @@ static const int tcg_target_reg_alloc_order[] = { TCG_REG_R21, TCG_REG_R22, TCG_REG_R23, + TCG_REG_R24, + TCG_REG_R25, + TCG_REG_R26, + TCG_REG_R27, TCG_REG_R28, TCG_REG_R29, TCG_REG_R30, TCG_REG_R31, -#ifdef __APPLE__ - TCG_REG_R2, -#endif - TCG_REG_R3, - TCG_REG_R4, - TCG_REG_R5, - TCG_REG_R6, - TCG_REG_R7, - TCG_REG_R8, - TCG_REG_R9, - TCG_REG_R10, -#ifndef __APPLE__ + TCG_REG_R12, /* call clobbered, non-arguments */ TCG_REG_R11, -#endif - TCG_REG_R12, - TCG_REG_R24, - TCG_REG_R25, - TCG_REG_R26, - TCG_REG_R27 + TCG_REG_R2, + TCG_REG_R10, /* call clobbered, arguments */ + TCG_REG_R9, + TCG_REG_R8, + TCG_REG_R7, + TCG_REG_R6, + TCG_REG_R5, + TCG_REG_R4, + TCG_REG_R3, }; static const int tcg_target_call_iarg_regs[] = { @@ -2133,9 +2129,7 @@ static void tcg_target_init(TCGContext *s) tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff); tcg_regset_set32(tcg_target_call_clobber_regs, 0, (1 << TCG_REG_R0) | -#ifdef __APPLE__ (1 << TCG_REG_R2) | -#endif (1 << TCG_REG_R3) | (1 << TCG_REG_R4) | (1 << TCG_REG_R5) | @@ -2145,16 +2139,17 @@ static void tcg_target_init(TCGContext *s) (1 << TCG_REG_R9) | (1 << TCG_REG_R10) | (1 << TCG_REG_R11) | - (1 << TCG_REG_R12) - ); + (1 << TCG_REG_R12)); tcg_regset_clear(s->reserved_regs); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); -#ifndef __APPLE__ - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */ +#ifdef __APPLE__ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R11); /* ??? */ +#else + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc */ #endif - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ tcg_add_target_add_op_defs(ppc_op_defs); } From b18d5d2b80ba0fd33edabae72fd7e7ad6f20316a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 31 Jul 2013 11:36:42 -0700 Subject: [PATCH 17/21] tcg-ppc64: Handle long offsets better Previously we'd only handle 16-bit offsets from memory operand without falling back to indexed, but it's easy to use ADDIS to handle full 32-bit offsets. This also lets us unify code that existed inline in tcg_out_op for handling addition of large constants. The new R2 temporary was marked reserved for the AIX calling convention, but the register really is call-clobbered and since tcg generated code has no use for a TOC, it's available for use. Signed-off-by: Richard Henderson --- tcg/ppc64/tcg-target.c | 149 +++++++++++++++++++++-------------------- 1 file changed, 75 insertions(+), 74 deletions(-) diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c index c01a8bb546..51d2b06142 100644 --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -119,7 +119,6 @@ static const int tcg_target_reg_alloc_order[] = { TCG_REG_R31, TCG_REG_R12, /* call clobbered, non-arguments */ TCG_REG_R11, - TCG_REG_R2, TCG_REG_R10, /* call clobbered, arguments */ TCG_REG_R9, TCG_REG_R8, @@ -746,25 +745,55 @@ static void tcg_out_call(TCGContext *s, tcg_target_long arg, int const_arg) #endif } -static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr, - int offset, int op1, int op2) +static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, + TCGReg base, tcg_target_long offset) { - if (offset == (int16_t) offset) { - tcg_out32(s, op1 | TAI(ret, addr, offset)); - } else { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, offset); - tcg_out32(s, op2 | TAB(ret, addr, TCG_REG_R0)); - } -} + tcg_target_long orig = offset, l0, l1, extra = 0, align = 0; + TCGReg rs = TCG_REG_R2; -static void tcg_out_ldsta(TCGContext *s, TCGReg ret, TCGReg addr, - int offset, int op1, int op2) -{ - if (offset == (int16_t)(offset & ~3)) { - tcg_out32(s, op1 | TAI(ret, addr, offset)); - } else { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, offset); - tcg_out32(s, op2 | TAB(ret, addr, TCG_REG_R0)); + assert(rt != TCG_REG_R2 && base != TCG_REG_R2); + + switch (opi) { + case LD: case LWA: + align = 3; + /* FALLTHRU */ + default: + if (rt != TCG_REG_R0) { + rs = rt; + } + break; + case STD: + align = 3; + break; + case STB: case STH: case STW: + break; + } + + /* For unaligned, or very large offsets, use the indexed form. */ + if (offset & align || offset != (int32_t)offset) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R2, orig); + tcg_out32(s, opx | TAB(rt, base, TCG_REG_R2)); + return; + } + + l0 = (int16_t)offset; + offset = (offset - l0) >> 16; + l1 = (int16_t)offset; + + if (l1 < 0 && orig >= 0) { + extra = 0x4000; + l1 = (int16_t)(offset - 0x4000); + } + if (l1) { + tcg_out32(s, ADDIS | TAI(rs, base, l1)); + base = rs; + } + if (extra) { + tcg_out32(s, ADDIS | TAI(rs, base, extra)); + base = rs; + } + if (opi != ADDI || base != rt || l0 != 0) { + tcg_out32(s, opi | TAI(rt, base, l0)); } } @@ -1074,24 +1103,30 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out32(s, BCLR | BO_ALWAYS); } -static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, - intptr_t arg2) +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, + TCGReg arg1, intptr_t arg2) { + int opi, opx; + if (type == TCG_TYPE_I32) { - tcg_out_ldst(s, ret, arg1, arg2, LWZ, LWZX); + opi = LWZ, opx = LWZX; } else { - tcg_out_ldsta(s, ret, arg1, arg2, LD, LDX); + opi = LD, opx = LDX; } + tcg_out_mem_long(s, opi, opx, ret, arg1, arg2); } -static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, - intptr_t arg2) +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, + TCGReg arg1, intptr_t arg2) { + int opi, opx; + if (type == TCG_TYPE_I32) { - tcg_out_ldst(s, arg, arg1, arg2, STW, STWX); + opi = STW, opx = STWX; } else { - tcg_out_ldsta(s, arg, arg1, arg2, STD, STDX); + opi = STD, opx = STDX; } + tcg_out_mem_long(s, opi, opx, arg, arg1, arg2); } static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, @@ -1449,61 +1484,52 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, break; case INDEX_op_ld8u_i32: case INDEX_op_ld8u_i64: - tcg_out_ldst(s, args[0], args[1], args[2], LBZ, LBZX); + tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); break; case INDEX_op_ld8s_i32: case INDEX_op_ld8s_i64: - tcg_out_ldst(s, args[0], args[1], args[2], LBZ, LBZX); + tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0])); break; case INDEX_op_ld16u_i32: case INDEX_op_ld16u_i64: - tcg_out_ldst(s, args[0], args[1], args[2], LHZ, LHZX); + tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]); break; case INDEX_op_ld16s_i32: case INDEX_op_ld16s_i64: - tcg_out_ldst(s, args[0], args[1], args[2], LHA, LHAX); + tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]); break; case INDEX_op_ld_i32: case INDEX_op_ld32u_i64: - tcg_out_ldst(s, args[0], args[1], args[2], LWZ, LWZX); + tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]); break; case INDEX_op_ld32s_i64: - tcg_out_ldsta(s, args[0], args[1], args[2], LWA, LWAX); + tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]); break; case INDEX_op_ld_i64: - tcg_out_ldsta(s, args[0], args[1], args[2], LD, LDX); + tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]); break; case INDEX_op_st8_i32: case INDEX_op_st8_i64: - tcg_out_ldst(s, args[0], args[1], args[2], STB, STBX); + tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]); break; case INDEX_op_st16_i32: case INDEX_op_st16_i64: - tcg_out_ldst(s, args[0], args[1], args[2], STH, STHX); + tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]); break; case INDEX_op_st_i32: case INDEX_op_st32_i64: - tcg_out_ldst(s, args[0], args[1], args[2], STW, STWX); + tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]); break; case INDEX_op_st_i64: - tcg_out_ldsta(s, args[0], args[1], args[2], STD, STDX); + tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); break; case INDEX_op_add_i32: a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { - int32_t l, h; do_addi_32: - l = (int16_t)a2; - h = a2 - l; - if (h) { - tcg_out32(s, ADDIS | TAI(a0, a1, h >> 16)); - a1 = a0; - } - if (l || a0 != a1) { - tcg_out32(s, ADDI | TAI(a0, a1, l)); - } + tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2); } else { tcg_out32(s, ADD | TAB(a0, a1, a2)); } @@ -1680,32 +1706,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_add_i64: a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { - int32_t l0, h1, h2; do_addi_64: - /* We can always split any 32-bit signed constant into 3 pieces. - Note the positive 0x80000000 coming from the sub_i64 path, - handled with the same code we need for eg 0x7fff8000. */ - assert(a2 == (int32_t)a2 || a2 == 0x80000000); - l0 = (int16_t)a2; - h1 = a2 - l0; - h2 = 0; - if (h1 < 0 && (int64_t)a2 > 0) { - h2 = 0x40000000; - h1 = a2 - h2 - l0; - } - assert((TCGArg)h2 + h1 + l0 == a2); - - if (h2) { - tcg_out32(s, ADDIS | TAI(a0, a1, h2 >> 16)); - a1 = a0; - } - if (h1) { - tcg_out32(s, ADDIS | TAI(a0, a1, h1 >> 16)); - a1 = a0; - } - if (l0 || a0 != a1) { - tcg_out32(s, ADDI | TAI(a0, a1, l0)); - } + tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2); } else { tcg_out32(s, ADD | TAB(a0, a1, a2)); } @@ -2144,10 +2146,9 @@ static void tcg_target_init(TCGContext *s) tcg_regset_clear(s->reserved_regs); tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* mem temp */ #ifdef __APPLE__ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R11); /* ??? */ -#else - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc */ #endif tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ From fa94c3be7a3fc7f1beaa3b031da7199ae3c5ddc8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 31 Aug 2013 04:44:21 -0700 Subject: [PATCH 18/21] tcg-ppc64: Implement tcg_register_jit Signed-off-by: Richard Henderson --- tcg/ppc64/tcg-target.c | 94 ++++++++++++++++++++++++++++++++---------- 1 file changed, 72 insertions(+), 22 deletions(-) diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c index 51d2b06142..8f58831c43 100644 --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -1043,25 +1043,26 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) #endif } +#define FRAME_SIZE ((int) \ + ((8 /* back chain */ \ + + 8 /* CR */ \ + + 8 /* LR */ \ + + 8 /* compiler doubleword */ \ + + 8 /* link editor doubleword */ \ + + 8 /* TOC save area */ \ + + TCG_STATIC_CALL_ARGS_SIZE \ + + CPU_TEMP_BUF_NLONGS * sizeof(long) \ + + ARRAY_SIZE(tcg_target_callee_save_regs) * 8 \ + + 15) & ~15)) + +#define REG_SAVE_BOT (FRAME_SIZE - ARRAY_SIZE(tcg_target_callee_save_regs) * 8) + static void tcg_target_qemu_prologue(TCGContext *s) { - int i, frame_size; + int i; - frame_size = 0 - + 8 /* back chain */ - + 8 /* CR */ - + 8 /* LR */ - + 8 /* compiler doubleword */ - + 8 /* link editor doubleword */ - + 8 /* TOC save area */ - + TCG_STATIC_CALL_ARGS_SIZE - + ARRAY_SIZE(tcg_target_callee_save_regs) * 8 - + CPU_TEMP_BUF_NLONGS * sizeof(long) - ; - frame_size = (frame_size + 15) & ~15; - - tcg_set_frame(s, TCG_REG_CALL_STACK, frame_size - - CPU_TEMP_BUF_NLONGS * sizeof(long), + tcg_set_frame(s, TCG_REG_CALL_STACK, + REG_SAVE_BOT - CPU_TEMP_BUF_NLONGS * sizeof(long), CPU_TEMP_BUF_NLONGS * sizeof(long)); #ifndef __APPLE__ @@ -1072,12 +1073,12 @@ static void tcg_target_qemu_prologue(TCGContext *s) /* Prologue */ tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR); - tcg_out32(s, STDU | SAI(TCG_REG_R1, TCG_REG_R1, -frame_size)); + tcg_out32(s, STDU | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE)); for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { tcg_out32(s, STD | SAI(tcg_target_callee_save_regs[i], 1, - i * 8 + 48 + TCG_STATIC_CALL_ARGS_SIZE)); + REG_SAVE_BOT + i * 8)); } - tcg_out32(s, STD | SAI(TCG_REG_R0, TCG_REG_R1, frame_size + 16)); + tcg_out32(s, STD | SAI(TCG_REG_R0, TCG_REG_R1, FRAME_SIZE + 16)); #ifdef CONFIG_USE_GUEST_BASE if (GUEST_BASE) { @@ -1095,11 +1096,11 @@ static void tcg_target_qemu_prologue(TCGContext *s) for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { tcg_out32(s, LD | TAI(tcg_target_callee_save_regs[i], TCG_REG_R1, - i * 8 + 48 + TCG_STATIC_CALL_ARGS_SIZE)); + REG_SAVE_BOT + i * 8)); } - tcg_out32(s, LD | TAI(TCG_REG_R0, TCG_REG_R1, frame_size + 16)); + tcg_out32(s, LD | TAI(TCG_REG_R0, TCG_REG_R1, FRAME_SIZE + 16)); tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR); - tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, frame_size)); + tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE)); tcg_out32(s, BCLR | BO_ALWAYS); } @@ -2154,3 +2155,52 @@ static void tcg_target_init(TCGContext *s) tcg_add_target_add_op_defs(ppc_op_defs); } + +typedef struct { + DebugFrameCIE cie; + DebugFrameFDEHeader fde; + uint8_t fde_def_cfa[4]; + uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3]; +} DebugFrame; + +/* We're expecting a 2 byte uleb128 encoded value. */ +QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); + +#define ELF_HOST_MACHINE EM_PPC64 + +static DebugFrame debug_frame = { + .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ + .cie.id = -1, + .cie.version = 1, + .cie.code_align = 1, + .cie.data_align = 0x78, /* sleb128 -8 */ + .cie.return_column = 65, + + /* Total FDE size does not include the "len" member. */ + .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), + + .fde_def_cfa = { + 12, 1, /* DW_CFA_def_cfa r1, ... */ + (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ + (FRAME_SIZE >> 7) + }, + .fde_reg_ofs = { + 0x11, 65, 0x7e, /* DW_CFA_offset_extended_sf, lr, 16 */ + } +}; + +void tcg_register_jit(void *buf, size_t buf_size) +{ + uint8_t *p = &debug_frame.fde_reg_ofs[3]; + int i; + + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) { + p[0] = 0x80 + tcg_target_callee_save_regs[i]; + p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * 8)) / 8; + } + + debug_frame.fde.func_start = (tcg_target_long) buf; + debug_frame.fde.func_len = buf_size; + + tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); +} From fedee3e7fda16e2ca438d2de6e76e4d434bcd3bb Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 31 Jul 2013 15:11:44 -0700 Subject: [PATCH 19/21] tcg-ppc64: Streamline tcg_out_tlb_read Less conditional compilation. Merge an add insn with the indexed memory load insn. Load the tlb addend earlier. Avoid the address update memory form. Fix a bug in not allowing large enough tlb offsets for some guests. Signed-off-by: Richard Henderson --- tcg/ppc64/tcg-target.c | 196 ++++++++++++++++++++--------------------- 1 file changed, 98 insertions(+), 98 deletions(-) diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c index 8f58831c43..2076299bd0 100644 --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -31,13 +31,11 @@ static uint8_t *tb_ret_addr; -#define FAST_PATH - #if TARGET_LONG_BITS == 32 -#define LD_ADDR LWZU +#define LD_ADDR LWZ #define CMP_L 0 #else -#define LD_ADDR LDU +#define LD_ADDR LD #define CMP_L (1<<21) #endif @@ -816,38 +814,78 @@ static const void * const qemu_st_helpers[4] = { helper_stq_mmu, }; -static void tcg_out_tlb_read(TCGContext *s, TCGReg r0, TCGReg r1, TCGReg r2, - TCGReg addr_reg, int s_bits, int offset) +/* Perform the TLB load and compare. Places the result of the comparison + in CR7, loads the addend of the TLB into R3, and returns the register + containing the guest address (zero-extended into R4). Clobbers R0 and R2. */ + +static TCGReg tcg_out_tlb_read(TCGContext *s, int s_bits, TCGReg addr_reg, + int mem_index, bool is_read) { -#if TARGET_LONG_BITS == 32 - tcg_out_ext32u(s, addr_reg, addr_reg); + int cmp_off + = (is_read + ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); + int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); + TCGReg base = TCG_AREG0; - tcg_out_rlw(s, RLWINM, r0, addr_reg, - 32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS), - 32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS), - 31 - CPU_TLB_ENTRY_BITS); - tcg_out32(s, ADD | TAB(r0, r0, TCG_AREG0)); - tcg_out32(s, LWZU | TAI(r1, r0, offset)); - tcg_out_rlw(s, RLWINM, r2, addr_reg, 0, - (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS); -#else - tcg_out_rld(s, RLDICL, r0, addr_reg, - 64 - TARGET_PAGE_BITS, - 64 - CPU_TLB_BITS); - tcg_out_shli64(s, r0, r0, CPU_TLB_ENTRY_BITS); - - tcg_out32(s, ADD | TAB(r0, r0, TCG_AREG0)); - tcg_out32(s, LD_ADDR | TAI(r1, r0, offset)); - - if (!s_bits) { - tcg_out_rld(s, RLDICR, r2, addr_reg, 0, 63 - TARGET_PAGE_BITS); + /* Extract the page index, shifted into place for tlb index. */ + if (TARGET_LONG_BITS == 32) { + /* Zero-extend the address into a place helpful for further use. */ + tcg_out_ext32u(s, TCG_REG_R4, addr_reg); + addr_reg = TCG_REG_R4; } else { - tcg_out_rld(s, RLDICL, r2, addr_reg, - 64 - TARGET_PAGE_BITS, - TARGET_PAGE_BITS - s_bits); - tcg_out_rld(s, RLDICL, r2, r2, TARGET_PAGE_BITS, 0); + tcg_out_rld(s, RLDICL, TCG_REG_R3, addr_reg, + 64 - TARGET_PAGE_BITS, 64 - CPU_TLB_BITS); } -#endif + + /* Compensate for very large offsets. */ + if (add_off >= 0x8000) { + /* Most target env are smaller than 32k; none are larger than 64k. + Simplify the logic here merely to offset by 0x7ff0, giving us a + range just shy of 64k. Check this assumption. */ + QEMU_BUILD_BUG_ON(offsetof(CPUArchState, + tlb_table[NB_MMU_MODES - 1][1]) + > 0x7ff0 + 0x7fff); + tcg_out32(s, ADDI | TAI(TCG_REG_R2, base, 0x7ff0)); + base = TCG_REG_R2; + cmp_off -= 0x7ff0; + add_off -= 0x7ff0; + } + + /* Extraction and shifting, part 2. */ + if (TARGET_LONG_BITS == 32) { + tcg_out_rlw(s, RLWINM, TCG_REG_R3, addr_reg, + 32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS), + 32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS), + 31 - CPU_TLB_ENTRY_BITS); + } else { + tcg_out_shli64(s, TCG_REG_R3, TCG_REG_R3, CPU_TLB_ENTRY_BITS); + } + + tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, base)); + + /* Load the tlb comparator. */ + tcg_out32(s, LD_ADDR | TAI(TCG_REG_R2, TCG_REG_R3, cmp_off)); + + /* Load the TLB addend for use on the fast path. Do this asap + to minimize any load use delay. */ + tcg_out32(s, LD | TAI(TCG_REG_R3, TCG_REG_R3, add_off)); + + /* Clear the non-page, non-alignment bits from the address. */ + if (TARGET_LONG_BITS == 32) { + tcg_out_rlw(s, RLWINM, TCG_REG_R0, addr_reg, 0, + (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS); + } else if (!s_bits) { + tcg_out_rld(s, RLDICR, TCG_REG_R0, addr_reg, 0, 63 - TARGET_PAGE_BITS); + } else { + tcg_out_rld(s, RLDICL, TCG_REG_R0, addr_reg, + 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - s_bits); + tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0); + } + + tcg_out32(s, CMP | BF(7) | RA(TCG_REG_R0) | RB(TCG_REG_R2) | CMP_L); + + return addr_reg; } #endif @@ -875,10 +913,10 @@ static const uint32_t qemu_exts_opc[4] = { static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) { - TCGReg addr_reg, data_reg, r0, r1, rbase; + TCGReg addr_reg, data_reg, rbase; uint32_t insn, s_bits; #ifdef CONFIG_SOFTMMU - TCGReg r2, ir; + TCGReg ir; int mem_index; void *label1_ptr, *label2_ptr; #endif @@ -890,20 +928,10 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) #ifdef CONFIG_SOFTMMU mem_index = *args; - r0 = TCG_REG_R3; - r1 = TCG_REG_R4; - r2 = TCG_REG_R0; - rbase = 0; - - tcg_out_tlb_read(s, r0, r1, r2, addr_reg, s_bits, - offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)); - - tcg_out32(s, CMP | BF(7) | RA(r2) | RB(r1) | CMP_L); + addr_reg = tcg_out_tlb_read(s, s_bits, addr_reg, mem_index, true); label1_ptr = s->code_ptr; -#ifdef FAST_PATH tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_TRUE); -#endif /* slow path */ ir = TCG_REG_R3; @@ -919,42 +947,33 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) } else if (data_reg != TCG_REG_R3) { tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R3); } + label2_ptr = s->code_ptr; tcg_out32(s, B); /* label1: fast path */ -#ifdef FAST_PATH reloc_pc14(label1_ptr, (tcg_target_long)s->code_ptr); -#endif - - /* r0 now contains &env->tlb_table[mem_index][index].addr_read */ - tcg_out32(s, LD | TAI(r0, r0, - offsetof(CPUTLBEntry, addend) - - offsetof(CPUTLBEntry, addr_read))); - /* r0 = env->tlb_table[mem_index][index].addend */ - tcg_out32(s, ADD | TAB(r0, r0, addr_reg)); - /* r0 = env->tlb_table[mem_index][index].addend + addr */ + rbase = TCG_REG_R3; #else /* !CONFIG_SOFTMMU */ -#if TARGET_LONG_BITS == 32 - tcg_out_ext32u(s, addr_reg, addr_reg); -#endif - r0 = addr_reg; - r1 = TCG_REG_R3; rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; + if (TARGET_LONG_BITS == 32) { + tcg_out_ext32u(s, TCG_REG_R2, addr_reg); + addr_reg = TCG_REG_R2; + } #endif insn = qemu_ldx_opc[opc]; if (!HAVE_ISA_2_06 && insn == LDBRX) { - tcg_out32(s, ADDI | TAI(r1, r0, 4)); - tcg_out32(s, LWBRX | TAB(data_reg, rbase, r0)); - tcg_out32(s, LWBRX | TAB( r1, rbase, r1)); - tcg_out_rld(s, RLDIMI, data_reg, r1, 32, 0); + tcg_out32(s, ADDI | TAI(TCG_REG_R0, addr_reg, 4)); + tcg_out32(s, LWBRX | TAB(data_reg, rbase, addr_reg)); + tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0)); + tcg_out_rld(s, RLDIMI, data_reg, TCG_REG_R0, 32, 0); } else if (insn) { - tcg_out32(s, insn | TAB(data_reg, rbase, r0)); + tcg_out32(s, insn | TAB(data_reg, rbase, addr_reg)); } else { insn = qemu_ldx_opc[s_bits]; - tcg_out32(s, insn | TAB(data_reg, rbase, r0)); + tcg_out32(s, insn | TAB(data_reg, rbase, addr_reg)); insn = qemu_exts_opc[s_bits]; tcg_out32(s, insn | RA(data_reg) | RS(data_reg)); } @@ -966,10 +985,10 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) { - TCGReg addr_reg, r0, r1, rbase, data_reg; + TCGReg addr_reg, rbase, data_reg; uint32_t insn; #ifdef CONFIG_SOFTMMU - TCGReg r2, ir; + TCGReg ir; int mem_index; void *label1_ptr, *label2_ptr; #endif @@ -980,20 +999,10 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) #ifdef CONFIG_SOFTMMU mem_index = *args; - r0 = TCG_REG_R3; - r1 = TCG_REG_R4; - r2 = TCG_REG_R0; - rbase = 0; - - tcg_out_tlb_read(s, r0, r1, r2, addr_reg, opc, - offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); - - tcg_out32(s, CMP | BF(7) | RA(r2) | RB(r1) | CMP_L); + addr_reg = tcg_out_tlb_read(s, opc, addr_reg, mem_index, false); label1_ptr = s->code_ptr; -#ifdef FAST_PATH tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_TRUE); -#endif /* slow path */ ir = TCG_REG_R3; @@ -1008,34 +1017,25 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) tcg_out32(s, B); /* label1: fast path */ -#ifdef FAST_PATH - reloc_pc14(label1_ptr, (tcg_target_long)s->code_ptr); -#endif - - tcg_out32(s, LD | TAI(r0, r0, - offsetof(CPUTLBEntry, addend) - - offsetof(CPUTLBEntry, addr_write))); - /* r0 = env->tlb_table[mem_index][index].addend */ - tcg_out32(s, ADD | TAB(r0, r0, addr_reg)); - /* r0 = env->tlb_table[mem_index][index].addend + addr */ + reloc_pc14(label1_ptr, (tcg_target_long) s->code_ptr); + rbase = TCG_REG_R3; #else /* !CONFIG_SOFTMMU */ -#if TARGET_LONG_BITS == 32 - tcg_out_ext32u(s, addr_reg, addr_reg); -#endif - r1 = TCG_REG_R3; - r0 = addr_reg; rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; + if (TARGET_LONG_BITS == 32) { + tcg_out_ext32u(s, TCG_REG_R2, addr_reg); + addr_reg = TCG_REG_R2; + } #endif insn = qemu_stx_opc[opc]; if (!HAVE_ISA_2_06 && insn == STDBRX) { - tcg_out32(s, STWBRX | SAB(data_reg, rbase, r0)); - tcg_out32(s, ADDI | TAI(r1, r0, 4)); + tcg_out32(s, STWBRX | SAB(data_reg, rbase, addr_reg)); + tcg_out32(s, ADDI | TAI(TCG_REG_R2, addr_reg, 4)); tcg_out_shri64(s, TCG_REG_R0, data_reg, 32); - tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, r1)); + tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_R2)); } else { - tcg_out32(s, insn | SAB(data_reg, rbase, r0)); + tcg_out32(s, insn | SAB(data_reg, rbase, addr_reg)); } #ifdef CONFIG_SOFTMMU From c7ca6a2b75f3867dd723c214fac08aa6cbf8cf94 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 30 Aug 2013 17:58:10 -0700 Subject: [PATCH 20/21] tcg-ppc64: Add _noaddr functions for emitting forward branches ... rather than open-coding this stuff through the file. Signed-off-by: Richard Henderson --- tcg/ppc64/tcg-target.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c index 2076299bd0..c225c8e879 100644 --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -204,6 +204,18 @@ static void reloc_pc14(void *pc, tcg_target_long target) *(uint32_t *)pc = (*(uint32_t *)pc & ~0xfffc) | reloc_pc14_val(pc, target); } +static inline void tcg_out_b_noaddr(TCGContext *s, int insn) +{ + unsigned retrans = *(uint32_t *)s->code_ptr & 0x3fffffc; + tcg_out32(s, insn | retrans); +} + +static inline void tcg_out_bc_noaddr(TCGContext *s, int insn) +{ + unsigned retrans = *(uint32_t *)s->code_ptr & 0xfffc; + tcg_out32(s, insn | retrans); +} + static void patch_reloc(uint8_t *code_ptr, int type, intptr_t value, intptr_t addend) { @@ -1362,11 +1374,8 @@ static void tcg_out_bc(TCGContext *s, int bc, int label_index) if (l->has_value) { tcg_out32(s, bc | reloc_pc14_val(s->code_ptr, l->u.value)); } else { - uint16_t val = *(uint16_t *) &s->code_ptr[2]; - - /* Thanks to Andrzej Zaborowski */ - tcg_out32(s, bc | (val & 0xfffc)); - tcg_out_reloc(s, s->code_ptr - 4, R_PPC_REL14, label_index, 0); + tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, label_index, 0); + tcg_out_bc_noaddr(s, bc); } } @@ -1466,11 +1475,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, if (l->has_value) { tcg_out_b(s, 0, l->u.value); } else { - uint32_t val = *(uint32_t *) s->code_ptr; - - /* Thanks to Andrzej Zaborowski */ - tcg_out32(s, B | (val & 0x3fffffc)); - tcg_out_reloc(s, s->code_ptr - 4, R_PPC_REL24, args[0], 0); + tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, args[0], 0); + tcg_out_b_noaddr(s, B); } } break; From 7f12d6497f9c4907c1ce4ef296392aef305ed587 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 31 Jul 2013 16:15:18 -0700 Subject: [PATCH 21/21] tcg-ppc64: Implement CONFIG_QEMU_LDST_OPTIMIZATION Signed-off-by: Richard Henderson --- configure | 2 +- tcg/ppc64/tcg-target.c | 212 ++++++++++++++++++++++++++--------------- 2 files changed, 136 insertions(+), 78 deletions(-) diff --git a/configure b/configure index ef4d9bf552..ba2d2b0ed6 100755 --- a/configure +++ b/configure @@ -3800,7 +3800,7 @@ echo "libs_softmmu=$libs_softmmu" >> $config_host_mak echo "ARCH=$ARCH" >> $config_host_mak case "$cpu" in - arm|i386|x86_64|x32|ppc|aarch64) + aarch64 | arm | i386 | x86_64 | x32 | ppc*) # The TCG interpreter currently does not support ld/st optimization. if test "$tcg_interpreter" = "no" ; then echo "CONFIG_QEMU_LDST_OPTIMIZATION=y" >> $config_host_mak diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c index c225c8e879..332f4d8df1 100644 --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -807,23 +807,47 @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, } } -#if defined(CONFIG_SOFTMMU) +static const uint32_t qemu_ldx_opc[8] = { +#ifdef TARGET_WORDS_BIGENDIAN + LBZX, LHZX, LWZX, LDX, + 0, LHAX, LWAX, LDX +#else + LBZX, LHBRX, LWBRX, LDBRX, + 0, 0, 0, LDBRX, +#endif +}; + +static const uint32_t qemu_stx_opc[4] = { +#ifdef TARGET_WORDS_BIGENDIAN + STBX, STHX, STWX, STDX +#else + STBX, STHBRX, STWBRX, STDBRX, +#endif +}; + +static const uint32_t qemu_exts_opc[4] = { + EXTSB, EXTSH, EXTSW, 0 +}; + +#if defined (CONFIG_SOFTMMU) /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, - int mmu_idx) */ + * int mmu_idx, uintptr_t ra) + */ static const void * const qemu_ld_helpers[4] = { - helper_ldb_mmu, - helper_ldw_mmu, - helper_ldl_mmu, - helper_ldq_mmu, + helper_ret_ldub_mmu, + helper_ret_lduw_mmu, + helper_ret_ldul_mmu, + helper_ret_ldq_mmu, }; /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, - uintxx_t val, int mmu_idx) */ + * uintxx_t val, int mmu_idx, uintptr_t ra) + */ static const void * const qemu_st_helpers[4] = { - helper_stb_mmu, - helper_stw_mmu, - helper_stl_mmu, - helper_stq_mmu, + helper_ret_stb_mmu, + helper_ret_stw_mmu, + helper_ret_stl_mmu, + helper_ret_stq_mmu, }; /* Perform the TLB load and compare. Places the result of the comparison @@ -899,38 +923,105 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, int s_bits, TCGReg addr_reg, return addr_reg; } -#endif -static const uint32_t qemu_ldx_opc[8] = { -#ifdef TARGET_WORDS_BIGENDIAN - LBZX, LHZX, LWZX, LDX, - 0, LHAX, LWAX, LDX -#else - LBZX, LHBRX, LWBRX, LDBRX, - 0, 0, 0, LDBRX, -#endif -}; +/* Record the context of a call to the out of line helper code for the slow + path for a load or store, so that we can later generate the correct + helper code. */ +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, int opc, + int data_reg, int addr_reg, int mem_index, + uint8_t *raddr, uint8_t *label_ptr) +{ + int idx; + TCGLabelQemuLdst *label; -static const uint32_t qemu_stx_opc[4] = { -#ifdef TARGET_WORDS_BIGENDIAN - STBX, STHX, STWX, STDX -#else - STBX, STHBRX, STWBRX, STDBRX, -#endif -}; + if (s->nb_qemu_ldst_labels >= TCG_MAX_QEMU_LDST) { + tcg_abort(); + } -static const uint32_t qemu_exts_opc[4] = { - EXTSB, EXTSH, EXTSW, 0 -}; + idx = s->nb_qemu_ldst_labels++; + label = (TCGLabelQemuLdst *)&s->qemu_ldst_labels[idx]; + label->is_ld = is_ld; + label->opc = opc; + label->datalo_reg = data_reg; + label->addrlo_reg = addr_reg; + label->mem_index = mem_index; + label->raddr = raddr; + label->label_ptr[0] = label_ptr; +} + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + int opc = lb->opc; + int s_bits = opc & 3; + + reloc_pc14(lb->label_ptr[0], (uintptr_t)s->code_ptr); + + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0); + + /* If the address needed to be zero-extended, we'll have already + placed it in R4. The only remaining case is 64-bit guest. */ + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, lb->addrlo_reg); + + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, lb->mem_index); + tcg_out32(s, MFSPR | RT(TCG_REG_R6) | LR); + + tcg_out_call(s, (tcg_target_long)qemu_ld_helpers[s_bits], 1); + + if (opc & 4) { + uint32_t insn = qemu_exts_opc[s_bits]; + tcg_out32(s, insn | RA(lb->datalo_reg) | RS(TCG_REG_R3)); + } else { + tcg_out_mov(s, TCG_TYPE_I64, lb->datalo_reg, TCG_REG_R3); + } + + tcg_out_b(s, 0, (uintptr_t)lb->raddr); +} + +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + int opc = lb->opc; + + reloc_pc14(lb->label_ptr[0], (uintptr_t)s->code_ptr); + + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, TCG_AREG0); + + /* If the address needed to be zero-extended, we'll have already + placed it in R4. The only remaining case is 64-bit guest. */ + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, lb->addrlo_reg); + + tcg_out_rld(s, RLDICL, TCG_REG_R5, lb->datalo_reg, + 0, 64 - (1 << (3 + opc))); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R6, lb->mem_index); + tcg_out32(s, MFSPR | RT(TCG_REG_R7) | LR); + + tcg_out_call(s, (tcg_target_long)qemu_st_helpers[opc], 1); + + tcg_out_b(s, 0, (uintptr_t)lb->raddr); +} + +void tcg_out_tb_finalize(TCGContext *s) +{ + int i, n = s->nb_qemu_ldst_labels; + + /* qemu_ld/st slow paths */ + for (i = 0; i < n; i++) { + TCGLabelQemuLdst *label = &s->qemu_ldst_labels[i]; + if (label->is_ld) { + tcg_out_qemu_ld_slow_path(s, label); + } else { + tcg_out_qemu_st_slow_path(s, label); + } + } +} +#endif /* SOFTMMU */ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) { TCGReg addr_reg, data_reg, rbase; uint32_t insn, s_bits; #ifdef CONFIG_SOFTMMU - TCGReg ir; int mem_index; - void *label1_ptr, *label2_ptr; + void *label_ptr; #endif data_reg = *args++; @@ -942,29 +1033,9 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) addr_reg = tcg_out_tlb_read(s, s_bits, addr_reg, mem_index, true); - label1_ptr = s->code_ptr; - tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_TRUE); - - /* slow path */ - ir = TCG_REG_R3; - tcg_out_mov(s, TCG_TYPE_I64, ir++, TCG_AREG0); - tcg_out_mov(s, TCG_TYPE_I64, ir++, addr_reg); - tcg_out_movi(s, TCG_TYPE_I64, ir++, mem_index); - - tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[s_bits], 1); - - if (opc & 4) { - insn = qemu_exts_opc[s_bits]; - tcg_out32(s, insn | RA(data_reg) | RS(TCG_REG_R3)); - } else if (data_reg != TCG_REG_R3) { - tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R3); - } - - label2_ptr = s->code_ptr; - tcg_out32(s, B); - - /* label1: fast path */ - reloc_pc14(label1_ptr, (tcg_target_long)s->code_ptr); + /* Load a pointer into the current opcode w/conditional branch-link. */ + label_ptr = s->code_ptr; + tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); rbase = TCG_REG_R3; #else /* !CONFIG_SOFTMMU */ @@ -991,7 +1062,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) } #ifdef CONFIG_SOFTMMU - reloc_pc24(label2_ptr, (tcg_target_long)s->code_ptr); + add_qemu_ldst_label(s, true, opc, data_reg, addr_reg, mem_index, + s->code_ptr, label_ptr); #endif } @@ -1000,9 +1072,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) TCGReg addr_reg, rbase, data_reg; uint32_t insn; #ifdef CONFIG_SOFTMMU - TCGReg ir; int mem_index; - void *label1_ptr, *label2_ptr; + void *label_ptr; #endif data_reg = *args++; @@ -1013,23 +1084,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) addr_reg = tcg_out_tlb_read(s, opc, addr_reg, mem_index, false); - label1_ptr = s->code_ptr; - tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_TRUE); - - /* slow path */ - ir = TCG_REG_R3; - tcg_out_mov(s, TCG_TYPE_I64, ir++, TCG_AREG0); - tcg_out_mov(s, TCG_TYPE_I64, ir++, addr_reg); - tcg_out_rld(s, RLDICL, ir++, data_reg, 0, 64 - (1 << (3 + opc))); - tcg_out_movi(s, TCG_TYPE_I64, ir++, mem_index); - - tcg_out_call(s, (tcg_target_long)qemu_st_helpers[opc], 1); - - label2_ptr = s->code_ptr; - tcg_out32(s, B); - - /* label1: fast path */ - reloc_pc14(label1_ptr, (tcg_target_long) s->code_ptr); + /* Load a pointer into the current opcode w/conditional branch-link. */ + label_ptr = s->code_ptr; + tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); rbase = TCG_REG_R3; #else /* !CONFIG_SOFTMMU */ @@ -1051,7 +1108,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) } #ifdef CONFIG_SOFTMMU - reloc_pc24(label2_ptr, (tcg_target_long)s->code_ptr); + add_qemu_ldst_label(s, false, opc, data_reg, addr_reg, mem_index, + s->code_ptr, label_ptr); #endif }