target/arm: Update contiguous first-fault and no-fault loads

With sve_cont_ldst_pages, the differences between first-fault and no-fault
are minimal, so unify the routines.  With cpu_probe_watchpoint, we are able
to make progress through pages with TLB_WATCHPOINT set when the watchpoint
does not actually fire.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200508154359.7494-15-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Richard Henderson 2020-05-08 08:43:54 -07:00 committed by Peter Maydell
parent 5c9b8458a0
commit c647673ce4

View File

@ -4101,18 +4101,6 @@ static intptr_t find_next_active(uint64_t *vg, intptr_t reg_off,
return reg_off; return reg_off;
} }
/*
* Return the maximum offset <= @mem_max which is still within the page
* referenced by @base + @mem_off.
*/
static intptr_t max_for_page(target_ulong base, intptr_t mem_off,
intptr_t mem_max)
{
target_ulong addr = base + mem_off;
intptr_t split = -(intptr_t)(addr | TARGET_PAGE_MASK);
return MIN(split, mem_max - mem_off) + mem_off;
}
/* /*
* Resolve the guest virtual address to info->host and info->flags. * Resolve the guest virtual address to info->host and info->flags.
* If @nofault, return false if the page is invalid, otherwise * If @nofault, return false if the page is invalid, otherwise
@ -4435,19 +4423,6 @@ static void sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env,
#endif #endif
} }
/*
* The result of tlb_vaddr_to_host for user-only is just g2h(x),
* which is always non-null. Elide the useless test.
*/
static inline bool test_host_page(void *host)
{
#ifdef CONFIG_USER_ONLY
return true;
#else
return likely(host != NULL);
#endif
}
/* /*
* Common helper for all contiguous 1,2,3,4-register predicated stores. * Common helper for all contiguous 1,2,3,4-register predicated stores.
*/ */
@ -4705,167 +4680,167 @@ static void record_fault(CPUARMState *env, uintptr_t i, uintptr_t oprsz)
} }
/* /*
* Common helper for all contiguous first-fault loads. * Common helper for all contiguous no-fault and first-fault loads.
*/ */
static void sve_ldff1_r(CPUARMState *env, void *vg, const target_ulong addr, static inline QEMU_ALWAYS_INLINE
uint32_t desc, const uintptr_t retaddr, void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr,
const int esz, const int msz, uint32_t desc, const uintptr_t retaddr,
sve_ldst1_host_fn *host_fn, const int esz, const int msz, const SVEContFault fault,
sve_ldst1_tlb_fn *tlb_fn) sve_ldst1_host_fn *host_fn,
sve_ldst1_tlb_fn *tlb_fn)
{ {
const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
const int mmu_idx = get_mmuidx(oi);
const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
void *vd = &env->vfp.zregs[rd]; void *vd = &env->vfp.zregs[rd];
const int diffsz = esz - msz;
const intptr_t reg_max = simd_oprsz(desc); const intptr_t reg_max = simd_oprsz(desc);
const intptr_t mem_max = reg_max >> diffsz; intptr_t reg_off, mem_off, reg_last;
intptr_t split, reg_off, mem_off, i; SVEContLdSt info;
int flags;
void *host; void *host;
/* Skip to the first active element. */ /* Find the active elements. */
reg_off = find_next_active(vg, 0, reg_max, esz); if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, 1 << msz)) {
if (unlikely(reg_off == reg_max)) {
/* The entire predicate was false; no load occurs. */ /* The entire predicate was false; no load occurs. */
memset(vd, 0, reg_max); memset(vd, 0, reg_max);
return; return;
} }
mem_off = reg_off >> diffsz; reg_off = info.reg_off_first[0];
/* /* Probe the page(s). */
* If the (remaining) load is entirely within a single page, then: if (!sve_cont_ldst_pages(&info, fault, env, addr, MMU_DATA_LOAD, retaddr)) {
* For softmmu, and the tlb hits, then no faults will occur; /* Fault on first element. */
* For user-only, either the first load will fault or none will. tcg_debug_assert(fault == FAULT_NO);
* We can thus perform the load directly to the destination and memset(vd, 0, reg_max);
* Vd will be unmodified on any exception path. goto do_fault;
*/ }
split = max_for_page(addr, mem_off, mem_max);
if (likely(split == mem_max)) { mem_off = info.mem_off_first[0];
host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); flags = info.page[0].flags;
if (test_host_page(host)) {
i = reg_off; if (fault == FAULT_FIRST) {
host -= mem_off; /*
do { * Special handling of the first active element,
host_fn(vd, i, host + (i >> diffsz)); * if it crosses a page boundary or is MMIO.
i = find_next_active(vg, i + (1 << esz), reg_max, esz); */
} while (i < reg_max); bool is_split = mem_off == info.mem_off_split;
/* After any fault, zero any leading inactive elements. */ /* TODO: MTE check. */
if (unlikely(flags != 0) || unlikely(is_split)) {
/*
* Use the slow path for cross-page handling.
* Might trap for MMIO or watchpoints.
*/
tlb_fn(env, vd, reg_off, addr + mem_off, retaddr);
/* After any fault, zero the other elements. */
swap_memzero(vd, reg_off); swap_memzero(vd, reg_off);
return; reg_off += 1 << esz;
mem_off += 1 << msz;
swap_memzero(vd + reg_off, reg_max - reg_off);
if (is_split) {
goto second_page;
}
} else {
memset(vd, 0, reg_max);
}
} else {
memset(vd, 0, reg_max);
if (unlikely(mem_off == info.mem_off_split)) {
/* The first active element crosses a page boundary. */
flags |= info.page[1].flags;
if (unlikely(flags & TLB_MMIO)) {
/* Some page is MMIO, see below. */
goto do_fault;
}
if (unlikely(flags & TLB_WATCHPOINT) &&
(cpu_watchpoint_address_matches
(env_cpu(env), addr + mem_off, 1 << msz)
& BP_MEM_READ)) {
/* Watchpoint hit, see below. */
goto do_fault;
}
/* TODO: MTE check. */
/*
* Use the slow path for cross-page handling.
* This is RAM, without a watchpoint, and will not trap.
*/
tlb_fn(env, vd, reg_off, addr + mem_off, retaddr);
goto second_page;
} }
} }
/* /*
* Perform one normal read, which will fault or not. * From this point on, all memory operations are MemSingleNF.
* But it is likely to bring the page into the tlb. *
* Per the MemSingleNF pseudocode, a no-fault load from Device memory
* must not actually hit the bus -- it returns (UNKNOWN, FAULT) instead.
*
* Unfortuately we do not have access to the memory attributes from the
* PTE to tell Device memory from Normal memory. So we make a mostly
* correct check, and indicate (UNKNOWN, FAULT) for any MMIO.
* This gives the right answer for the common cases of "Normal memory,
* backed by host RAM" and "Device memory, backed by MMIO".
* The architecture allows us to suppress an NF load and return
* (UNKNOWN, FAULT) for any reason, so our behaviour for the corner
* case of "Normal memory, backed by MMIO" is permitted. The case we
* get wrong is "Device memory, backed by host RAM", for which we
* should return (UNKNOWN, FAULT) for but do not.
*
* Similarly, CPU_BP breakpoints would raise exceptions, and so
* return (UNKNOWN, FAULT). For simplicity, we consider gdb and
* architectural breakpoints the same.
*/ */
tlb_fn(env, vd, reg_off, addr + mem_off, retaddr); if (unlikely(flags & TLB_MMIO)) {
goto do_fault;
}
/* After any fault, zero any leading predicated false elts. */ reg_last = info.reg_off_last[0];
swap_memzero(vd, reg_off); host = info.page[0].host;
mem_off += 1 << msz;
reg_off += 1 << esz;
/* Try again to read the balance of the page. */ do {
split = max_for_page(addr, mem_off - 1, mem_max); uint64_t pg = *(uint64_t *)(vg + (reg_off >> 3));
if (split >= (1 << msz)) { do {
host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); if ((pg >> (reg_off & 63)) & 1) {
if (host) { if (unlikely(flags & TLB_WATCHPOINT) &&
host -= mem_off; (cpu_watchpoint_address_matches
do { (env_cpu(env), addr + mem_off, 1 << msz)
& BP_MEM_READ)) {
goto do_fault;
}
/* TODO: MTE check. */
host_fn(vd, reg_off, host + mem_off); host_fn(vd, reg_off, host + mem_off);
reg_off += 1 << esz; }
reg_off = find_next_active(vg, reg_off, reg_max, esz);
mem_off = reg_off >> diffsz;
} while (split - mem_off >= (1 << msz));
}
}
record_fault(env, reg_off, reg_max);
}
/*
* Common helper for all contiguous no-fault loads.
*/
static void sve_ldnf1_r(CPUARMState *env, void *vg, const target_ulong addr,
uint32_t desc, const int esz, const int msz,
sve_ldst1_host_fn *host_fn)
{
const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
void *vd = &env->vfp.zregs[rd];
const int diffsz = esz - msz;
const intptr_t reg_max = simd_oprsz(desc);
const intptr_t mem_max = reg_max >> diffsz;
const int mmu_idx = cpu_mmu_index(env, false);
intptr_t split, reg_off, mem_off;
void *host;
#ifdef CONFIG_USER_ONLY
host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx);
if (likely(page_check_range(addr, mem_max, PAGE_READ) == 0)) {
/* The entire operation is valid and will not fault. */
reg_off = 0;
do {
mem_off = reg_off >> diffsz;
host_fn(vd, reg_off, host + mem_off);
reg_off += 1 << esz; reg_off += 1 << esz;
reg_off = find_next_active(vg, reg_off, reg_max, esz); mem_off += 1 << msz;
} while (reg_off < reg_max); } while (reg_off <= reg_last && (reg_off & 63));
return; } while (reg_off <= reg_last);
}
#endif
/* There will be no fault, so we may modify in advance. */
memset(vd, 0, reg_max);
/* Skip to the first active element. */
reg_off = find_next_active(vg, 0, reg_max, esz);
if (unlikely(reg_off == reg_max)) {
/* The entire predicate was false; no load occurs. */
return;
}
mem_off = reg_off >> diffsz;
#ifdef CONFIG_USER_ONLY
if (page_check_range(addr + mem_off, 1 << msz, PAGE_READ) == 0) {
/* At least one load is valid; take the rest of the page. */
split = max_for_page(addr, mem_off + (1 << msz) - 1, mem_max);
do {
host_fn(vd, reg_off, host + mem_off);
reg_off += 1 << esz;
reg_off = find_next_active(vg, reg_off, reg_max, esz);
mem_off = reg_off >> diffsz;
} while (split - mem_off >= (1 << msz));
}
#else
/* /*
* If the address is not in the TLB, we have no way to bring the * MemSingleNF is allowed to fail for any reason. We have special
* entry into the TLB without also risking a fault. Note that * code above to handle the first element crossing a page boundary.
* the corollary is that we never load from an address not in RAM. * As an implementation choice, decline to handle a cross-page element
* * in any other position.
* This last is out of spec, in a weird corner case.
* Per the MemNF/MemSingleNF pseudocode, a NF load from Device memory
* must not actually hit the bus -- it returns UNKNOWN data instead.
* But if you map non-RAM with Normal memory attributes and do a NF
* load then it should access the bus. (Nobody ought actually do this
* in the real world, obviously.)
*
* Then there are the annoying special cases with watchpoints...
* TODO: Add a form of non-faulting loads using cc->tlb_fill(probe=true).
*/ */
host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); reg_off = info.reg_off_split;
split = max_for_page(addr, mem_off, mem_max); if (reg_off >= 0) {
if (host && split >= (1 << msz)) { goto do_fault;
host -= mem_off;
do {
host_fn(vd, reg_off, host + mem_off);
reg_off += 1 << esz;
reg_off = find_next_active(vg, reg_off, reg_max, esz);
mem_off = reg_off >> diffsz;
} while (split - mem_off >= (1 << msz));
} }
#endif
second_page:
reg_off = info.reg_off_first[1];
if (likely(reg_off < 0)) {
/* No active elements on the second page. All done. */
return;
}
/*
* MemSingleNF is allowed to fail for any reason. As an implementation
* choice, decline to handle elements on the second page. This should
* be low frequency as the guest walks through memory -- the next
* iteration of the guest's loop should be aligned on the page boundary,
* and then all following iterations will stay aligned.
*/
do_fault:
record_fault(env, reg_off, reg_max); record_fault(env, reg_off, reg_max);
} }
@ -4873,58 +4848,61 @@ static void sve_ldnf1_r(CPUARMState *env, void *vg, const target_ulong addr,
void HELPER(sve_ldff1##PART##_r)(CPUARMState *env, void *vg, \ void HELPER(sve_ldff1##PART##_r)(CPUARMState *env, void *vg, \
target_ulong addr, uint32_t desc) \ target_ulong addr, uint32_t desc) \
{ \ { \
sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, 0, \ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_FIRST, \
sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ sve_ld1##PART##_host, sve_ld1##PART##_tlb); \
} \ } \
void HELPER(sve_ldnf1##PART##_r)(CPUARMState *env, void *vg, \ void HELPER(sve_ldnf1##PART##_r)(CPUARMState *env, void *vg, \
target_ulong addr, uint32_t desc) \ target_ulong addr, uint32_t desc) \
{ \ { \
sve_ldnf1_r(env, vg, addr, desc, ESZ, 0, sve_ld1##PART##_host); \ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_NO, \
sve_ld1##PART##_host, sve_ld1##PART##_tlb); \
} }
#define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ) \ #define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ) \
void HELPER(sve_ldff1##PART##_le_r)(CPUARMState *env, void *vg, \ void HELPER(sve_ldff1##PART##_le_r)(CPUARMState *env, void *vg, \
target_ulong addr, uint32_t desc) \ target_ulong addr, uint32_t desc) \
{ \ { \
sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \
sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \
} \ } \
void HELPER(sve_ldnf1##PART##_le_r)(CPUARMState *env, void *vg, \ void HELPER(sve_ldnf1##PART##_le_r)(CPUARMState *env, void *vg, \
target_ulong addr, uint32_t desc) \ target_ulong addr, uint32_t desc) \
{ \ { \
sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_le_host); \ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \
sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \
} \ } \
void HELPER(sve_ldff1##PART##_be_r)(CPUARMState *env, void *vg, \ void HELPER(sve_ldff1##PART##_be_r)(CPUARMState *env, void *vg, \
target_ulong addr, uint32_t desc) \ target_ulong addr, uint32_t desc) \
{ \ { \
sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \
sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \
} \ } \
void HELPER(sve_ldnf1##PART##_be_r)(CPUARMState *env, void *vg, \ void HELPER(sve_ldnf1##PART##_be_r)(CPUARMState *env, void *vg, \
target_ulong addr, uint32_t desc) \ target_ulong addr, uint32_t desc) \
{ \ { \
sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_be_host); \ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \
sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \
} }
DO_LDFF1_LDNF1_1(bb, 0) DO_LDFF1_LDNF1_1(bb, MO_8)
DO_LDFF1_LDNF1_1(bhu, 1) DO_LDFF1_LDNF1_1(bhu, MO_16)
DO_LDFF1_LDNF1_1(bhs, 1) DO_LDFF1_LDNF1_1(bhs, MO_16)
DO_LDFF1_LDNF1_1(bsu, 2) DO_LDFF1_LDNF1_1(bsu, MO_32)
DO_LDFF1_LDNF1_1(bss, 2) DO_LDFF1_LDNF1_1(bss, MO_32)
DO_LDFF1_LDNF1_1(bdu, 3) DO_LDFF1_LDNF1_1(bdu, MO_64)
DO_LDFF1_LDNF1_1(bds, 3) DO_LDFF1_LDNF1_1(bds, MO_64)
DO_LDFF1_LDNF1_2(hh, 1, 1) DO_LDFF1_LDNF1_2(hh, MO_16, MO_16)
DO_LDFF1_LDNF1_2(hsu, 2, 1) DO_LDFF1_LDNF1_2(hsu, MO_32, MO_16)
DO_LDFF1_LDNF1_2(hss, 2, 1) DO_LDFF1_LDNF1_2(hss, MO_32, MO_16)
DO_LDFF1_LDNF1_2(hdu, 3, 1) DO_LDFF1_LDNF1_2(hdu, MO_64, MO_16)
DO_LDFF1_LDNF1_2(hds, 3, 1) DO_LDFF1_LDNF1_2(hds, MO_64, MO_16)
DO_LDFF1_LDNF1_2(ss, 2, 2) DO_LDFF1_LDNF1_2(ss, MO_32, MO_32)
DO_LDFF1_LDNF1_2(sdu, 3, 2) DO_LDFF1_LDNF1_2(sdu, MO_64, MO_32)
DO_LDFF1_LDNF1_2(sds, 3, 2) DO_LDFF1_LDNF1_2(sds, MO_64, MO_32)
DO_LDFF1_LDNF1_2(dd, 3, 3) DO_LDFF1_LDNF1_2(dd, MO_64, MO_64)
#undef DO_LDFF1_LDNF1_1 #undef DO_LDFF1_LDNF1_1
#undef DO_LDFF1_LDNF1_2 #undef DO_LDFF1_LDNF1_2