cputlb: Merge and move memory_notdirty_write_{prepare,complete}
Since 9458a9a1df
, all readers of the dirty bitmaps wait
for the rcu lock, which means that they wait until the end
of any executing TranslationBlock.
As a consequence, there is no need for the actual access
to happen in between the _prepare and _complete. Therefore,
we can improve things by merging the two functions into
notdirty_write and dropping the NotDirtyInfo structure.
In addition, the only users of notdirty_write are in cputlb.c,
so move the merged function there. Pass in the CPUIOTLBEntry
from which the ram_addr_t may be computed.
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
8f5db64153
commit
707526ad86
@ -33,6 +33,7 @@
|
|||||||
#include "exec/helper-proto.h"
|
#include "exec/helper-proto.h"
|
||||||
#include "qemu/atomic.h"
|
#include "qemu/atomic.h"
|
||||||
#include "qemu/atomic128.h"
|
#include "qemu/atomic128.h"
|
||||||
|
#include "translate-all.h"
|
||||||
|
|
||||||
/* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
|
/* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
|
||||||
/* #define DEBUG_TLB */
|
/* #define DEBUG_TLB */
|
||||||
@ -1085,6 +1086,37 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
|
|||||||
return qemu_ram_addr_from_host_nofail(p);
|
return qemu_ram_addr_from_host_nofail(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
|
||||||
|
CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
|
||||||
|
{
|
||||||
|
ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr;
|
||||||
|
|
||||||
|
trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
|
||||||
|
|
||||||
|
if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
|
||||||
|
struct page_collection *pages
|
||||||
|
= page_collection_lock(ram_addr, ram_addr + size);
|
||||||
|
|
||||||
|
/* We require mem_io_pc in tb_invalidate_phys_page_range. */
|
||||||
|
cpu->mem_io_pc = retaddr;
|
||||||
|
|
||||||
|
tb_invalidate_phys_page_fast(pages, ram_addr, size);
|
||||||
|
page_collection_unlock(pages);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set both VGA and migration bits for simplicity and to remove
|
||||||
|
* the notdirty callback faster.
|
||||||
|
*/
|
||||||
|
cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE);
|
||||||
|
|
||||||
|
/* We remove the notdirty callback only if the code has been flushed. */
|
||||||
|
if (!cpu_physical_memory_is_clean(ram_addr)) {
|
||||||
|
trace_memory_notdirty_set_dirty(mem_vaddr);
|
||||||
|
tlb_set_dirty(cpu, mem_vaddr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Probe for whether the specified guest access is permitted. If it is not
|
* Probe for whether the specified guest access is permitted. If it is not
|
||||||
* permitted then an exception will be taken in the same way as if this
|
* permitted then an exception will be taken in the same way as if this
|
||||||
@ -1204,8 +1236,7 @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
|
|||||||
/* Probe for a read-modify-write atomic operation. Do not allow unaligned
|
/* Probe for a read-modify-write atomic operation. Do not allow unaligned
|
||||||
* operations, or io operations to proceed. Return the host address. */
|
* operations, or io operations to proceed. Return the host address. */
|
||||||
static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
|
static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
|
||||||
TCGMemOpIdx oi, uintptr_t retaddr,
|
TCGMemOpIdx oi, uintptr_t retaddr)
|
||||||
NotDirtyInfo *ndi)
|
|
||||||
{
|
{
|
||||||
size_t mmu_idx = get_mmuidx(oi);
|
size_t mmu_idx = get_mmuidx(oi);
|
||||||
uintptr_t index = tlb_index(env, mmu_idx, addr);
|
uintptr_t index = tlb_index(env, mmu_idx, addr);
|
||||||
@ -1265,12 +1296,9 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
|
|||||||
|
|
||||||
hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
|
hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
|
||||||
|
|
||||||
ndi->active = false;
|
|
||||||
if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
|
if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
|
||||||
ndi->active = true;
|
notdirty_write(env_cpu(env), addr, 1 << s_bits,
|
||||||
memory_notdirty_write_prepare(ndi, env_cpu(env), addr,
|
&env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
|
||||||
qemu_ram_addr_from_host_nofail(hostaddr),
|
|
||||||
1 << s_bits);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return hostaddr;
|
return hostaddr;
|
||||||
@ -1641,28 +1669,13 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
haddr = (void *)((uintptr_t)addr + entry->addend);
|
|
||||||
|
|
||||||
/* Handle clean RAM pages. */
|
/* Handle clean RAM pages. */
|
||||||
if (tlb_addr & TLB_NOTDIRTY) {
|
if (tlb_addr & TLB_NOTDIRTY) {
|
||||||
NotDirtyInfo ndi;
|
notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
|
||||||
|
|
||||||
/* We require mem_io_pc in tb_invalidate_phys_page_range. */
|
|
||||||
env_cpu(env)->mem_io_pc = retaddr;
|
|
||||||
|
|
||||||
memory_notdirty_write_prepare(&ndi, env_cpu(env), addr,
|
|
||||||
addr + iotlbentry->addr, size);
|
|
||||||
|
|
||||||
if (unlikely(need_swap)) {
|
|
||||||
store_memop(haddr, val, op ^ MO_BSWAP);
|
|
||||||
} else {
|
|
||||||
store_memop(haddr, val, op);
|
|
||||||
}
|
|
||||||
|
|
||||||
memory_notdirty_write_complete(&ndi);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
haddr = (void *)((uintptr_t)addr + entry->addend);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Keep these two store_memop separate to ensure that the compiler
|
* Keep these two store_memop separate to ensure that the compiler
|
||||||
* is able to fold the entire function to a single instruction.
|
* is able to fold the entire function to a single instruction.
|
||||||
@ -1793,14 +1806,9 @@ void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
|
|||||||
#define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr
|
#define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr
|
||||||
#define ATOMIC_NAME(X) \
|
#define ATOMIC_NAME(X) \
|
||||||
HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
|
HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
|
||||||
#define ATOMIC_MMU_DECLS NotDirtyInfo ndi
|
#define ATOMIC_MMU_DECLS
|
||||||
#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr, &ndi)
|
#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr)
|
||||||
#define ATOMIC_MMU_CLEANUP \
|
#define ATOMIC_MMU_CLEANUP
|
||||||
do { \
|
|
||||||
if (unlikely(ndi.active)) { \
|
|
||||||
memory_notdirty_write_complete(&ndi); \
|
|
||||||
} \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
#define DATA_SIZE 1
|
#define DATA_SIZE 1
|
||||||
#include "atomic_template.h"
|
#include "atomic_template.h"
|
||||||
@ -1828,7 +1836,7 @@ void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
|
|||||||
#undef ATOMIC_MMU_LOOKUP
|
#undef ATOMIC_MMU_LOOKUP
|
||||||
#define EXTRA_ARGS , TCGMemOpIdx oi
|
#define EXTRA_ARGS , TCGMemOpIdx oi
|
||||||
#define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
|
#define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
|
||||||
#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC(), &ndi)
|
#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC())
|
||||||
|
|
||||||
#define DATA_SIZE 1
|
#define DATA_SIZE 1
|
||||||
#include "atomic_template.h"
|
#include "atomic_template.h"
|
||||||
|
44
exec.c
44
exec.c
@ -2718,50 +2718,6 @@ ram_addr_t qemu_ram_addr_from_host(void *ptr)
|
|||||||
return block->offset + offset;
|
return block->offset + offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Called within RCU critical section. */
|
|
||||||
void memory_notdirty_write_prepare(NotDirtyInfo *ndi,
|
|
||||||
CPUState *cpu,
|
|
||||||
vaddr mem_vaddr,
|
|
||||||
ram_addr_t ram_addr,
|
|
||||||
unsigned size)
|
|
||||||
{
|
|
||||||
ndi->cpu = cpu;
|
|
||||||
ndi->ram_addr = ram_addr;
|
|
||||||
ndi->mem_vaddr = mem_vaddr;
|
|
||||||
ndi->size = size;
|
|
||||||
ndi->pages = NULL;
|
|
||||||
|
|
||||||
trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
|
|
||||||
|
|
||||||
assert(tcg_enabled());
|
|
||||||
if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
|
|
||||||
ndi->pages = page_collection_lock(ram_addr, ram_addr + size);
|
|
||||||
tb_invalidate_phys_page_fast(ndi->pages, ram_addr, size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Called within RCU critical section. */
|
|
||||||
void memory_notdirty_write_complete(NotDirtyInfo *ndi)
|
|
||||||
{
|
|
||||||
if (ndi->pages) {
|
|
||||||
assert(tcg_enabled());
|
|
||||||
page_collection_unlock(ndi->pages);
|
|
||||||
ndi->pages = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Set both VGA and migration bits for simplicity and to remove
|
|
||||||
* the notdirty callback faster.
|
|
||||||
*/
|
|
||||||
cpu_physical_memory_set_dirty_range(ndi->ram_addr, ndi->size,
|
|
||||||
DIRTY_CLIENTS_NOCODE);
|
|
||||||
/* we remove the notdirty callback only if the code has been
|
|
||||||
flushed */
|
|
||||||
if (!cpu_physical_memory_is_clean(ndi->ram_addr)) {
|
|
||||||
trace_memory_notdirty_set_dirty(ndi->mem_vaddr);
|
|
||||||
tlb_set_dirty(ndi->cpu, ndi->mem_vaddr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Generate a debug exception if a watchpoint has been hit. */
|
/* Generate a debug exception if a watchpoint has been hit. */
|
||||||
void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
|
void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
|
||||||
MemTxAttrs attrs, int flags, uintptr_t ra)
|
MemTxAttrs attrs, int flags, uintptr_t ra)
|
||||||
|
@ -49,70 +49,5 @@ void address_space_dispatch_free(AddressSpaceDispatch *d);
|
|||||||
|
|
||||||
void mtree_print_dispatch(struct AddressSpaceDispatch *d,
|
void mtree_print_dispatch(struct AddressSpaceDispatch *d,
|
||||||
MemoryRegion *root);
|
MemoryRegion *root);
|
||||||
|
|
||||||
struct page_collection;
|
|
||||||
|
|
||||||
/* Opaque struct for passing info from memory_notdirty_write_prepare()
|
|
||||||
* to memory_notdirty_write_complete(). Callers should treat all fields
|
|
||||||
* as private, with the exception of @active.
|
|
||||||
*
|
|
||||||
* @active is a field which is not touched by either the prepare or
|
|
||||||
* complete functions, but which the caller can use if it wishes to
|
|
||||||
* track whether it has called prepare for this struct and so needs
|
|
||||||
* to later call the complete function.
|
|
||||||
*/
|
|
||||||
typedef struct {
|
|
||||||
CPUState *cpu;
|
|
||||||
struct page_collection *pages;
|
|
||||||
ram_addr_t ram_addr;
|
|
||||||
vaddr mem_vaddr;
|
|
||||||
unsigned size;
|
|
||||||
bool active;
|
|
||||||
} NotDirtyInfo;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* memory_notdirty_write_prepare: call before writing to non-dirty memory
|
|
||||||
* @ndi: pointer to opaque NotDirtyInfo struct
|
|
||||||
* @cpu: CPU doing the write
|
|
||||||
* @mem_vaddr: virtual address of write
|
|
||||||
* @ram_addr: the ram address of the write
|
|
||||||
* @size: size of write in bytes
|
|
||||||
*
|
|
||||||
* Any code which writes to the host memory corresponding to
|
|
||||||
* guest RAM which has been marked as NOTDIRTY must wrap those
|
|
||||||
* writes in calls to memory_notdirty_write_prepare() and
|
|
||||||
* memory_notdirty_write_complete():
|
|
||||||
*
|
|
||||||
* NotDirtyInfo ndi;
|
|
||||||
* memory_notdirty_write_prepare(&ndi, ....);
|
|
||||||
* ... perform write here ...
|
|
||||||
* memory_notdirty_write_complete(&ndi);
|
|
||||||
*
|
|
||||||
* These calls will ensure that we flush any TCG translated code for
|
|
||||||
* the memory being written, update the dirty bits and (if possible)
|
|
||||||
* remove the slowpath callback for writing to the memory.
|
|
||||||
*
|
|
||||||
* This must only be called if we are using TCG; it will assert otherwise.
|
|
||||||
*
|
|
||||||
* We may take locks in the prepare call, so callers must ensure that
|
|
||||||
* they don't exit (via longjump or otherwise) without calling complete.
|
|
||||||
*
|
|
||||||
* This call must only be made inside an RCU critical section.
|
|
||||||
* (Note that while we're executing a TCG TB we're always in an
|
|
||||||
* RCU critical section, which is likely to be the case for callers
|
|
||||||
* of these functions.)
|
|
||||||
*/
|
|
||||||
void memory_notdirty_write_prepare(NotDirtyInfo *ndi,
|
|
||||||
CPUState *cpu,
|
|
||||||
vaddr mem_vaddr,
|
|
||||||
ram_addr_t ram_addr,
|
|
||||||
unsigned size);
|
|
||||||
/**
|
|
||||||
* memory_notdirty_write_complete: finish write to non-dirty memory
|
|
||||||
* @ndi: pointer to the opaque NotDirtyInfo struct which was initialized
|
|
||||||
* by memory_not_dirty_write_prepare().
|
|
||||||
*/
|
|
||||||
void memory_notdirty_write_complete(NotDirtyInfo *ndi);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user