tcg/arm: Use LDRD to load tlb mask+table
This changes the code generation for the tlb from e.g. ldr ip, [r6, #-0x10] ldr r2, [r6, #-0xc] and ip, ip, r4, lsr #8 ldrd r0, r1, [r2, ip]! ldr r2, [r2, #0x18] to ldrd r0, r1, [r6, #-0x10] and r0, r0, r4, lsr #8 ldrd r2, r3, [r1, r0]! ldr r1, [r1, #0x18] for armv7 hosts. Rearranging the register allocation in order to avoid overlap between the two ldrd pairs causes the patch to be larger than it ordinarily would be. Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
65b23204d6
commit
057b6e370b
@ -267,6 +267,7 @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
|
|||||||
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
|
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
|
||||||
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
|
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
|
||||||
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
|
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
|
||||||
|
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
|
||||||
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
|
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
@ -1224,6 +1225,10 @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
|
|||||||
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
|
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
|
||||||
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256);
|
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256);
|
||||||
|
|
||||||
|
/* These offsets are built into the LDRD below. */
|
||||||
|
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
|
||||||
|
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
|
||||||
|
|
||||||
/* Load and compare a TLB entry, leaving the flags set. Returns the register
|
/* Load and compare a TLB entry, leaving the flags set. Returns the register
|
||||||
containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */
|
containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */
|
||||||
|
|
||||||
@ -1238,47 +1243,54 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
|
|||||||
unsigned s_bits = opc & MO_SIZE;
|
unsigned s_bits = opc & MO_SIZE;
|
||||||
unsigned a_bits = get_alignment_bits(opc);
|
unsigned a_bits = get_alignment_bits(opc);
|
||||||
|
|
||||||
/* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
|
|
||||||
tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP, TCG_AREG0, mask_off);
|
|
||||||
tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R2, TCG_AREG0, table_off);
|
|
||||||
|
|
||||||
/* Extract the tlb index from the address into TMP. */
|
|
||||||
tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, addrlo,
|
|
||||||
SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Add the tlb_table pointer, creating the CPUTLBEntry address in R2.
|
* We don't support inline unaligned acceses, but we can easily
|
||||||
* Load the tlb comparator into R0/R1 and the fast path addend into R2.
|
* support overalignment checks.
|
||||||
*/
|
*/
|
||||||
if (cmp_off == 0) {
|
|
||||||
if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
|
|
||||||
tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R0, TCG_REG_R2, TCG_REG_TMP);
|
|
||||||
} else {
|
|
||||||
tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R0, TCG_REG_R2, TCG_REG_TMP);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
|
|
||||||
TCG_REG_R2, TCG_REG_R2, TCG_REG_TMP, 0);
|
|
||||||
if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
|
|
||||||
tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
|
|
||||||
} else {
|
|
||||||
tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!use_armv6_instructions && TARGET_LONG_BITS == 64) {
|
|
||||||
tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Load the tlb addend. */
|
|
||||||
tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2,
|
|
||||||
offsetof(CPUTLBEntry, addend));
|
|
||||||
|
|
||||||
/* Check alignment. We don't support inline unaligned acceses,
|
|
||||||
but we can easily support overalignment checks. */
|
|
||||||
if (a_bits < s_bits) {
|
if (a_bits < s_bits) {
|
||||||
a_bits = s_bits;
|
a_bits = s_bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */
|
||||||
|
if (use_armv6_instructions) {
|
||||||
|
tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off);
|
||||||
|
} else {
|
||||||
|
tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R0, TCG_AREG0, mask_off);
|
||||||
|
tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R1, TCG_AREG0, table_off);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Extract the tlb index from the address into R0. */
|
||||||
|
tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
|
||||||
|
SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
|
||||||
|
* Load the tlb comparator into R2/R3 and the fast path addend into R1.
|
||||||
|
*/
|
||||||
|
if (cmp_off == 0) {
|
||||||
|
if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
|
||||||
|
tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
|
||||||
|
} else {
|
||||||
|
tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
|
||||||
|
TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
|
||||||
|
if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
|
||||||
|
tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
|
||||||
|
} else {
|
||||||
|
tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!use_armv6_instructions && TARGET_LONG_BITS == 64) {
|
||||||
|
tcg_out_ld32_12(s, COND_AL, TCG_REG_R3, TCG_REG_R1, cmp_off + 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Load the tlb addend. */
|
||||||
|
tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1,
|
||||||
|
offsetof(CPUTLBEntry, addend));
|
||||||
|
|
||||||
|
/* Check alignment, check comparators. */
|
||||||
if (use_armv7_instructions) {
|
if (use_armv7_instructions) {
|
||||||
tcg_target_ulong mask = ~(TARGET_PAGE_MASK | ((1 << a_bits) - 1));
|
tcg_target_ulong mask = ~(TARGET_PAGE_MASK | ((1 << a_bits) - 1));
|
||||||
int rot = encode_imm(mask);
|
int rot = encode_imm(mask);
|
||||||
@ -1291,22 +1303,24 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
|
|||||||
tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
|
tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
|
||||||
addrlo, TCG_REG_TMP, 0);
|
addrlo, TCG_REG_TMP, 0);
|
||||||
}
|
}
|
||||||
tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R0, TCG_REG_TMP, 0);
|
tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0);
|
||||||
} else {
|
} else {
|
||||||
if (a_bits) {
|
if (a_bits) {
|
||||||
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo,
|
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo,
|
||||||
(1 << a_bits) - 1);
|
(1 << a_bits) - 1);
|
||||||
}
|
}
|
||||||
|
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, addrlo,
|
||||||
|
SHIFT_IMM_LSR(TARGET_PAGE_BITS));
|
||||||
tcg_out_dat_reg(s, (a_bits ? COND_EQ : COND_AL), ARITH_CMP,
|
tcg_out_dat_reg(s, (a_bits ? COND_EQ : COND_AL), ARITH_CMP,
|
||||||
0, TCG_REG_R0, TCG_REG_TMP,
|
0, TCG_REG_R2, TCG_REG_TMP,
|
||||||
SHIFT_IMM_LSL(TARGET_PAGE_BITS));
|
SHIFT_IMM_LSL(TARGET_PAGE_BITS));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (TARGET_LONG_BITS == 64) {
|
if (TARGET_LONG_BITS == 64) {
|
||||||
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R1, addrhi, 0);
|
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
return TCG_REG_R2;
|
return TCG_REG_R1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Record the context of a call to the out of line helper code for the slow
|
/* Record the context of a call to the out of line helper code for the slow
|
||||||
|
Loading…
Reference in New Issue
Block a user