tcg/mips: Introduce prepare_host_addr

Merge tcg_out_tlb_load, add_qemu_ldst_label, tcg_out_test_alignment,
and some code that lived in both tcg_out_qemu_ld and tcg_out_qemu_st
into one function that returns HostAddress and TCGLabelQemuLdst structures.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2023-04-23 18:34:35 +01:00
parent e63eed328f
commit 5b7208daa0

View File

@ -1181,120 +1181,6 @@ static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah)
return i;
}
/* We expect to use a 16-bit negative offset from ENV. */
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
/*
* Perform the tlb comparison operation.
* The complete host address is placed in BASE.
* Clobbers TMP0, TMP1, TMP2, TMP3.
*/
static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
TCGReg addrh, MemOpIdx oi,
tcg_insn_unit *label_ptr[2], bool is_load)
{
MemOp opc = get_memop(oi);
unsigned a_bits = get_alignment_bits(opc);
unsigned s_bits = opc & MO_SIZE;
unsigned a_mask = (1 << a_bits) - 1;
unsigned s_mask = (1 << s_bits) - 1;
int mem_index = get_mmuidx(oi);
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
int table_off = fast_off + offsetof(CPUTLBDescFast, table);
int add_off = offsetof(CPUTLBEntry, addend);
int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write));
target_ulong tlb_mask;
/* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_AREG0, mask_off);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP1, TCG_AREG0, table_off);
/* Extract the TLB index from the address into TMP3. */
tcg_out_opc_sa(s, ALIAS_TSRL, TCG_TMP3, addrl,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0);
/* Add the tlb_table pointer, creating the CPUTLBEntry address in TMP3. */
tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1);
/* Load the (low-half) tlb comparator. */
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
} else {
tcg_out_ldst(s, (TARGET_LONG_BITS == 64 ? OPC_LD
: TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW),
TCG_TMP0, TCG_TMP3, cmp_off);
}
/* Zero extend a 32-bit guest address for a 64-bit host. */
if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
tcg_out_ext32u(s, base, addrl);
addrl = base;
}
/*
* Mask the page bits, keeping the alignment bits to compare against.
* For unaligned accesses, compare against the end of the access to
* verify that it does not cross a page boundary.
*/
tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, tlb_mask);
if (a_mask >= s_mask) {
tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl);
} else {
tcg_out_opc_imm(s, ALIAS_PADDI, TCG_TMP2, addrl, s_mask - a_mask);
tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2);
}
if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
/* Load the tlb addend for the fast path. */
tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
}
label_ptr[0] = s->code_ptr;
tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
/* Load and test the high half tlb comparator. */
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
/* delay slot */
tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
/* Load the tlb addend for the fast path. */
tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
label_ptr[1] = s->code_ptr;
tcg_out_opc_br(s, OPC_BNE, addrh, TCG_TMP0);
}
/* delay slot */
tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP2, addrl);
}
static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
TCGType ext,
TCGReg datalo, TCGReg datahi,
TCGReg addrlo, TCGReg addrhi,
void *raddr, tcg_insn_unit *label_ptr[2])
{
TCGLabelQemuLdst *label = new_ldst_label(s);
label->is_ld = is_ld;
label->oi = oi;
label->type = ext;
label->datalo_reg = datalo;
label->datahi_reg = datahi;
label->addrlo_reg = addrlo;
label->addrhi_reg = addrhi;
label->raddr = tcg_splitwx_to_rx(raddr);
label->label_ptr[0] = label_ptr[0];
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
label->label_ptr[1] = label_ptr[1];
}
}
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr);
@ -1403,32 +1289,6 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
}
#else
static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
TCGReg addrhi, unsigned a_bits)
{
unsigned a_mask = (1 << a_bits) - 1;
TCGLabelQemuLdst *l = new_ldst_label(s);
l->is_ld = is_ld;
l->addrlo_reg = addrlo;
l->addrhi_reg = addrhi;
/* We are expecting a_bits to max out at 7, much lower than ANDI. */
tcg_debug_assert(a_bits < 16);
tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, addrlo, a_mask);
l->label_ptr[0] = s->code_ptr;
if (use_mips32r6_instructions) {
tcg_out_opc_br(s, OPC_BNEZALC_R6, TCG_REG_ZERO, TCG_TMP0);
} else {
tcg_out_opc_br(s, OPC_BNEL, TCG_TMP0, TCG_REG_ZERO);
tcg_out_nop(s);
}
l->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
{
void *target;
@ -1478,6 +1338,154 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
}
#endif /* SOFTMMU */
typedef struct {
TCGReg base;
MemOp align;
} HostAddress;
/*
* For softmmu, perform the TLB load and compare.
* For useronly, perform any required alignment tests.
* In both cases, return a TCGLabelQemuLdst structure if the slow path
* is required and fill in @h with the host address for the fast path.
*/
static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
TCGReg addrlo, TCGReg addrhi,
MemOpIdx oi, bool is_ld)
{
TCGLabelQemuLdst *ldst = NULL;
MemOp opc = get_memop(oi);
unsigned a_bits = get_alignment_bits(opc);
unsigned s_bits = opc & MO_SIZE;
unsigned a_mask = (1 << a_bits) - 1;
TCGReg base;
#ifdef CONFIG_SOFTMMU
unsigned s_mask = (1 << s_bits) - 1;
int mem_index = get_mmuidx(oi);
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
int table_off = fast_off + offsetof(CPUTLBDescFast, table);
int add_off = offsetof(CPUTLBEntry, addend);
int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write);
target_ulong tlb_mask;
ldst = new_ldst_label(s);
ldst->is_ld = is_ld;
ldst->oi = oi;
ldst->addrlo_reg = addrlo;
ldst->addrhi_reg = addrhi;
base = TCG_REG_A0;
/* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_AREG0, mask_off);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP1, TCG_AREG0, table_off);
/* Extract the TLB index from the address into TMP3. */
tcg_out_opc_sa(s, ALIAS_TSRL, TCG_TMP3, addrlo,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0);
/* Add the tlb_table pointer, creating the CPUTLBEntry address in TMP3. */
tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1);
/* Load the (low-half) tlb comparator. */
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
} else {
tcg_out_ldst(s, (TARGET_LONG_BITS == 64 ? OPC_LD
: TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW),
TCG_TMP0, TCG_TMP3, cmp_off);
}
/* Zero extend a 32-bit guest address for a 64-bit host. */
if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
tcg_out_ext32u(s, base, addrlo);
addrlo = base;
}
/*
* Mask the page bits, keeping the alignment bits to compare against.
* For unaligned accesses, compare against the end of the access to
* verify that it does not cross a page boundary.
*/
tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, tlb_mask);
if (a_mask >= s_mask) {
tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrlo);
} else {
tcg_out_opc_imm(s, ALIAS_PADDI, TCG_TMP2, addrlo, s_mask - a_mask);
tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2);
}
if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
/* Load the tlb addend for the fast path. */
tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
}
ldst->label_ptr[0] = s->code_ptr;
tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
/* Load and test the high half tlb comparator. */
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
/* delay slot */
tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
/* Load the tlb addend for the fast path. */
tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
ldst->label_ptr[1] = s->code_ptr;
tcg_out_opc_br(s, OPC_BNE, addrhi, TCG_TMP0);
}
/* delay slot */
tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP2, addrlo);
#else
if (a_mask && (use_mips32r6_instructions || a_bits != s_bits)) {
ldst = new_ldst_label(s);
ldst->is_ld = is_ld;
ldst->oi = oi;
ldst->addrlo_reg = addrlo;
ldst->addrhi_reg = addrhi;
/* We are expecting a_bits to max out at 7, much lower than ANDI. */
tcg_debug_assert(a_bits < 16);
tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, addrlo, a_mask);
ldst->label_ptr[0] = s->code_ptr;
if (use_mips32r6_instructions) {
tcg_out_opc_br(s, OPC_BNEZALC_R6, TCG_REG_ZERO, TCG_TMP0);
} else {
tcg_out_opc_br(s, OPC_BNEL, TCG_TMP0, TCG_REG_ZERO);
tcg_out_nop(s);
}
}
base = addrlo;
if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
tcg_out_ext32u(s, TCG_REG_A0, base);
base = TCG_REG_A0;
}
if (guest_base) {
if (guest_base == (int16_t)guest_base) {
tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base);
} else {
tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base,
TCG_GUEST_BASE_REG);
}
base = TCG_REG_A0;
}
#endif
h->base = base;
h->align = a_bits;
return ldst;
}
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
TCGReg base, MemOp opc, TCGType type)
{
@ -1707,57 +1715,23 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
MemOpIdx oi, TCGType data_type)
{
MemOp opc = get_memop(oi);
unsigned a_bits = get_alignment_bits(opc);
unsigned s_bits = opc & MO_SIZE;
TCGReg base;
TCGLabelQemuLdst *ldst;
HostAddress h;
/*
* R6 removes the left/right instructions but requires the
* system to support misaligned memory accesses.
*/
#if defined(CONFIG_SOFTMMU)
tcg_insn_unit *label_ptr[2];
ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
base = TCG_REG_A0;
tcg_out_tlb_load(s, base, addrlo, addrhi, oi, label_ptr, 1);
if (use_mips32r6_instructions || a_bits >= s_bits) {
tcg_out_qemu_ld_direct(s, datalo, datahi, base, opc, data_type);
if (use_mips32r6_instructions || h.align >= (opc & MO_SIZE)) {
tcg_out_qemu_ld_direct(s, datalo, datahi, h.base, opc, data_type);
} else {
tcg_out_qemu_ld_unalign(s, datalo, datahi, base, opc, data_type);
tcg_out_qemu_ld_unalign(s, datalo, datahi, h.base, opc, data_type);
}
add_qemu_ldst_label(s, true, oi, data_type, datalo, datahi,
addrlo, addrhi, s->code_ptr, label_ptr);
#else
base = addrlo;
if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
tcg_out_ext32u(s, TCG_REG_A0, base);
base = TCG_REG_A0;
if (ldst) {
ldst->type = data_type;
ldst->datalo_reg = datalo;
ldst->datahi_reg = datahi;
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
if (guest_base) {
if (guest_base == (int16_t)guest_base) {
tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base);
} else {
tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base,
TCG_GUEST_BASE_REG);
}
base = TCG_REG_A0;
}
if (use_mips32r6_instructions) {
if (a_bits) {
tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
}
tcg_out_qemu_ld_direct(s, datalo, datahi, base, opc, data_type);
} else {
if (a_bits && a_bits != s_bits) {
tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
}
if (a_bits >= s_bits) {
tcg_out_qemu_ld_direct(s, datalo, datahi, base, opc, data_type);
} else {
tcg_out_qemu_ld_unalign(s, datalo, datahi, base, opc, data_type);
}
}
#endif
}
static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
@ -1899,57 +1873,23 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
MemOpIdx oi, TCGType data_type)
{
MemOp opc = get_memop(oi);
unsigned a_bits = get_alignment_bits(opc);
unsigned s_bits = opc & MO_SIZE;
TCGReg base;
TCGLabelQemuLdst *ldst;
HostAddress h;
/*
* R6 removes the left/right instructions but requires the
* system to support misaligned memory accesses.
*/
#if defined(CONFIG_SOFTMMU)
tcg_insn_unit *label_ptr[2];
ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
base = TCG_REG_A0;
tcg_out_tlb_load(s, base, addrlo, addrhi, oi, label_ptr, 0);
if (use_mips32r6_instructions || a_bits >= s_bits) {
tcg_out_qemu_st_direct(s, datalo, datahi, base, opc);
if (use_mips32r6_instructions || h.align >= (opc & MO_SIZE)) {
tcg_out_qemu_st_direct(s, datalo, datahi, h.base, opc);
} else {
tcg_out_qemu_st_unalign(s, datalo, datahi, base, opc);
tcg_out_qemu_st_unalign(s, datalo, datahi, h.base, opc);
}
add_qemu_ldst_label(s, false, oi, data_type, datalo, datahi,
addrlo, addrhi, s->code_ptr, label_ptr);
#else
base = addrlo;
if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
tcg_out_ext32u(s, TCG_REG_A0, base);
base = TCG_REG_A0;
if (ldst) {
ldst->type = data_type;
ldst->datalo_reg = datalo;
ldst->datahi_reg = datahi;
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
if (guest_base) {
if (guest_base == (int16_t)guest_base) {
tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base);
} else {
tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base,
TCG_GUEST_BASE_REG);
}
base = TCG_REG_A0;
}
if (use_mips32r6_instructions) {
if (a_bits) {
tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
}
tcg_out_qemu_st_direct(s, datalo, datahi, base, opc);
} else {
if (a_bits && a_bits != s_bits) {
tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
}
if (a_bits >= s_bits) {
tcg_out_qemu_st_direct(s, datalo, datahi, base, opc);
} else {
tcg_out_qemu_st_unalign(s, datalo, datahi, base, opc);
}
}
#endif
}
static void tcg_out_mb(TCGContext *s, TCGArg a0)