4a2e242bbb
With a vfio assigned device we lay down a base MemoryRegion registered as an IO region, giving us read & write accessors. If the region supports mmap, we lay down a higher priority sub-region MemoryRegion on top of the base layer initialized as a RAM device pointer to the mmap. Finally, if we have any quirks for the device (ie. address ranges that need additional virtualization support), we put another IO sub-region on top of the mmap MemoryRegion. When this is flattened, we now potentially have sub-page mmap MemoryRegions exposed which cannot be directly mapped through KVM. This is as expected, but a subtle detail of this is that we end up with two different access mechanisms through QEMU. If we disable the mmap MemoryRegion, we make use of the IO MemoryRegion and service accesses using pread and pwrite to the vfio device file descriptor. If the mmap MemoryRegion is enabled and results in one of these sub-page gaps, QEMU handles the access as RAM, using memcpy to the mmap. Using either pread/pwrite or the mmap directly should be correct, but using memcpy causes us problems. I expect that not only does memcpy not necessarily honor the original width and alignment in performing a copy, but it potentially also uses processor instructions not intended for MMIO spaces. It turns out that this has been a problem for Realtek NIC assignment, which has such a quirk that creates a sub-page mmap MemoryRegion access. To resolve this, we disable memory_access_is_direct() for ram_device regions since QEMU assumes that it can use memcpy for those regions. Instead we access through MemoryRegionOps, which replaces the memcpy with simple de-references of standard sizes to the host memory. With this patch we attempt to provide unrestricted access to the RAM device, allowing byte through qword access as well as unaligned access. The assumption here is that accesses initiated by the VM are driven by a device specific driver, which knows the device capabilities. If unaligned accesses are not supported by the device, we don't want them to work in a VM by performing multiple aligned accesses to compose the unaligned access. A down-side of this philosophy is that the xp command from the monitor attempts to use the largest available access weidth, unaware of the underlying device. Using memcpy had this same restriction, but at least now an operator can dump individual registers, even if blocks of device memory may result in access widths beyond the capabilities of a given device (RTL NICs only support up to dword). Reported-by: Thorsten Kohfeldt <thorsten.kohfeldt@gmx.de> Signed-off-by: Alex Williamson <alex.williamson@redhat.com> Acked-by: Paolo Bonzini <pbonzini@redhat.com>
181 lines
9.3 KiB
Plaintext
181 lines
9.3 KiB
Plaintext
# Trace events for debugging and performance instrumentation
|
|
#
|
|
# This file is processed by the tracetool script during the build.
|
|
#
|
|
# To add a new trace event:
|
|
#
|
|
# 1. Choose a name for the trace event. Declare its arguments and format
|
|
# string.
|
|
#
|
|
# 2. Call the trace event from code using trace_##name, e.g. multiwrite_cb() ->
|
|
# trace_multiwrite_cb(). The source file must #include "trace.h".
|
|
#
|
|
# Format of a trace event:
|
|
#
|
|
# [disable] <name>(<type1> <arg1>[, <type2> <arg2>] ...) "<format-string>"
|
|
#
|
|
# Example: g_malloc(size_t size) "size %zu"
|
|
#
|
|
# The "disable" keyword will build without the trace event.
|
|
#
|
|
# The <name> must be a valid as a C function name.
|
|
#
|
|
# Types should be standard C types. Use void * for pointers because the trace
|
|
# system may not have the necessary headers included.
|
|
#
|
|
# The <format-string> should be a sprintf()-compatible format string.
|
|
|
|
# thread-pool.c
|
|
thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
|
|
thread_pool_complete(void *pool, void *req, void *opaque, int ret) "pool %p req %p opaque %p ret %d"
|
|
thread_pool_cancel(void *req, void *opaque) "req %p opaque %p"
|
|
|
|
# ioport.c
|
|
cpu_in(unsigned int addr, char size, unsigned int val) "addr %#x(%c) value %u"
|
|
cpu_out(unsigned int addr, char size, unsigned int val) "addr %#x(%c) value %u"
|
|
|
|
# balloon.c
|
|
# Since requests are raised via monitor, not many tracepoints are needed.
|
|
balloon_event(void *opaque, unsigned long addr) "opaque %p addr %lu"
|
|
|
|
# vl.c
|
|
vm_state_notify(int running, int reason) "running %d reason %d"
|
|
load_file(const char *name, const char *path) "name %s location %s"
|
|
runstate_set(int new_state) "new state %d"
|
|
system_wakeup_request(int reason) "reason=%d"
|
|
qemu_system_shutdown_request(void) ""
|
|
qemu_system_powerdown_request(void) ""
|
|
|
|
# spice-qemu-char.c
|
|
spice_vmc_write(ssize_t out, int len) "spice wrottn %zd of requested %d"
|
|
spice_vmc_read(int bytes, int len) "spice read %d of requested %d"
|
|
spice_vmc_register_interface(void *scd) "spice vmc registered interface %p"
|
|
spice_vmc_unregister_interface(void *scd) "spice vmc unregistered interface %p"
|
|
spice_vmc_event(int event) "spice vmc event %d"
|
|
|
|
# xen-hvm.c
|
|
xen_ram_alloc(unsigned long ram_addr, unsigned long size) "requested: %#lx, size %#lx"
|
|
xen_client_set_memory(uint64_t start_addr, unsigned long size, bool log_dirty) "%#"PRIx64" size %#lx, log_dirty %i"
|
|
xen_default_ioreq_server(void) ""
|
|
xen_ioreq_server_create(uint32_t id) "id: %u"
|
|
xen_ioreq_server_destroy(uint32_t id) "id: %u"
|
|
xen_ioreq_server_state(uint32_t id, bool enable) "id: %u: enable: %i"
|
|
xen_map_mmio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: %#"PRIx64" end: %#"PRIx64
|
|
xen_unmap_mmio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: %#"PRIx64" end: %#"PRIx64
|
|
xen_map_portio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: %#"PRIx64" end: %#"PRIx64
|
|
xen_unmap_portio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: %#"PRIx64" end: %#"PRIx64
|
|
xen_map_pcidev(uint32_t id, uint8_t bus, uint8_t dev, uint8_t func) "id: %u bdf: %02x.%02x.%02x"
|
|
xen_unmap_pcidev(uint32_t id, uint8_t bus, uint8_t dev, uint8_t func) "id: %u bdf: %02x.%02x.%02x"
|
|
handle_ioreq(void *req, uint32_t type, uint32_t dir, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p type=%d dir=%d df=%d ptr=%d port=%#"PRIx64" data=%#"PRIx64" count=%d size=%d"
|
|
handle_ioreq_read(void *req, uint32_t type, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p read type=%d df=%d ptr=%d port=%#"PRIx64" data=%#"PRIx64" count=%d size=%d"
|
|
handle_ioreq_write(void *req, uint32_t type, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p write type=%d df=%d ptr=%d port=%#"PRIx64" data=%#"PRIx64" count=%d size=%d"
|
|
cpu_ioreq_pio(void *req, uint32_t dir, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p pio dir=%d df=%d ptr=%d port=%#"PRIx64" data=%#"PRIx64" count=%d size=%d"
|
|
cpu_ioreq_pio_read_reg(void *req, uint64_t data, uint64_t addr, uint32_t size) "I/O=%p pio read reg data=%#"PRIx64" port=%#"PRIx64" size=%d"
|
|
cpu_ioreq_pio_write_reg(void *req, uint64_t data, uint64_t addr, uint32_t size) "I/O=%p pio write reg data=%#"PRIx64" port=%#"PRIx64" size=%d"
|
|
cpu_ioreq_move(void *req, uint32_t dir, uint32_t df, uint32_t data_is_ptr, uint64_t addr, uint64_t data, uint32_t count, uint32_t size) "I/O=%p copy dir=%d df=%d ptr=%d port=%#"PRIx64" data=%#"PRIx64" count=%d size=%d"
|
|
|
|
# xen-mapcache.c
|
|
xen_map_cache(uint64_t phys_addr) "want %#"PRIx64
|
|
xen_remap_bucket(uint64_t index) "index %#"PRIx64
|
|
xen_map_cache_return(void* ptr) "%p"
|
|
|
|
# monitor.c
|
|
handle_qmp_command(void *mon, const char *cmd_name) "mon %p cmd_name \"%s\""
|
|
monitor_protocol_event_handler(uint32_t event, void *qdict) "event=%d data=%p"
|
|
monitor_protocol_event_emit(uint32_t event, void *data) "event=%d data=%p"
|
|
monitor_protocol_event_queue(uint32_t event, void *qdict, uint64_t rate) "event=%d data=%p rate=%" PRId64
|
|
monitor_protocol_event_throttle(uint32_t event, uint64_t rate) "event=%d rate=%" PRId64
|
|
|
|
# dma-helpers.c
|
|
dma_blk_io(void *dbs, void *bs, int64_t offset, bool to_dev) "dbs=%p bs=%p offset=%" PRId64 " to_dev=%d"
|
|
dma_aio_cancel(void *dbs) "dbs=%p"
|
|
dma_complete(void *dbs, int ret, void *cb) "dbs=%p ret=%d cb=%p"
|
|
dma_blk_cb(void *dbs, int ret) "dbs=%p ret=%d"
|
|
dma_map_wait(void *dbs) "dbs=%p"
|
|
|
|
# kvm-all.c
|
|
kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
|
|
kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p"
|
|
kvm_vcpu_ioctl(int cpu_index, int type, void *arg) "cpu_index %d, type 0x%x, arg %p"
|
|
kvm_run_exit(int cpu_index, uint32_t reason) "cpu_index %d, reason %d"
|
|
kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p"
|
|
kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s"
|
|
kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s"
|
|
kvm_irqchip_commit_routes(void) ""
|
|
kvm_irqchip_add_msi_route(int virq) "Adding MSI route virq=%d"
|
|
kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
|
|
|
|
# TCG related tracing (mostly disabled by default)
|
|
# cpu-exec.c
|
|
disable exec_tb(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR
|
|
disable exec_tb_nocache(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR
|
|
disable exec_tb_exit(void *last_tb, unsigned int flags) "tb:%p flags=%x"
|
|
|
|
# translate-all.c
|
|
translate_block(void *tb, uintptr_t pc, uint8_t *tb_code) "tb:%p, pc:0x%"PRIxPTR", tb_code:%p"
|
|
|
|
# memory.c
|
|
memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
|
|
memory_region_ops_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
|
|
memory_region_subpage_read(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset %#"PRIx64" value %#"PRIx64" size %u"
|
|
memory_region_subpage_write(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset %#"PRIx64" value %#"PRIx64" size %u"
|
|
memory_region_tb_read(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u"
|
|
memory_region_tb_write(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u"
|
|
memory_region_ram_device_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
|
|
memory_region_ram_device_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
|
|
|
|
### Guest events, keep at bottom
|
|
|
|
|
|
## vCPU
|
|
|
|
# Hot-plug a new virtual (guest) CPU
|
|
#
|
|
# Mode: user, softmmu
|
|
# Targets: all
|
|
vcpu guest_cpu_enter(void)
|
|
|
|
# Reset the state of a virtual (guest) CPU
|
|
#
|
|
# Mode: user, softmmu
|
|
# Targets: all
|
|
vcpu guest_cpu_reset(void)
|
|
|
|
# @vaddr: Access' virtual address.
|
|
# @info : Access' information (see below).
|
|
#
|
|
# Start virtual memory access (before any potential access violation).
|
|
#
|
|
# Does not include memory accesses performed by devices.
|
|
#
|
|
# Access information can be parsed as:
|
|
#
|
|
# struct mem_info {
|
|
# uint8_t size_shift : 2; /* interpreted as "1 << size_shift" bytes */
|
|
# bool sign_extend: 1; /* sign-extended */
|
|
# uint8_t endianness : 1; /* 0: little, 1: big */
|
|
# bool store : 1; /* wheter it's a store operation */
|
|
# };
|
|
#
|
|
# Mode: user, softmmu
|
|
# Targets: TCG(all)
|
|
disable vcpu tcg guest_mem_before(TCGv vaddr, uint8_t info) "info=%d", "vaddr=0x%016"PRIx64" info=%d"
|
|
|
|
# @num: System call number.
|
|
# @arg*: System call argument value.
|
|
#
|
|
# Start executing a guest system call in syscall emulation mode.
|
|
#
|
|
# Mode: user
|
|
# Targets: TCG(all)
|
|
disable vcpu guest_user_syscall(uint64_t num, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, uint64_t arg7, uint64_t arg8) "num=0x%016"PRIx64" arg1=0x%016"PRIx64" arg2=0x%016"PRIx64" arg3=0x%016"PRIx64" arg4=0x%016"PRIx64" arg5=0x%016"PRIx64" arg6=0x%016"PRIx64" arg7=0x%016"PRIx64" arg8=0x%016"PRIx64
|
|
|
|
# @num: System call number.
|
|
# @ret: System call result value.
|
|
#
|
|
# Finish executing a guest system call in syscall emulation mode.
|
|
#
|
|
# Mode: user
|
|
# Targets: TCG(all)
|
|
disable vcpu guest_user_syscall_ret(uint64_t num, uint64_t ret) "num=0x%016"PRIx64" ret=0x%016"PRIx64
|