From 707ff80021ccd7a68f4b3d2c44eebf87efbb41c4 Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Tue, 20 Jan 2015 11:05:07 +0000 Subject: [PATCH 1/4] Add device listener interface The Xen ioreq-server API, introduced in Xen 4.5, requires that PCI device models explicitly register with Xen for config space accesses. This patch adds a listener interface into qdev-core which can be used by the Xen interface code to monitor for arrival and departure of PCI devices. Signed-off-by: Paul Durrant Signed-off-by: Stefano Stabellini Reviewed-by: Paolo Bonzini --- hw/core/qdev.c | 53 +++++++++++++++++++++++++++++++++++++++++ include/hw/qdev-core.h | 10 ++++++++ include/qemu/typedefs.h | 1 + 3 files changed, 64 insertions(+) diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 901f289860..2eacac0787 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -189,6 +189,56 @@ int qdev_init(DeviceState *dev) return 0; } +static QTAILQ_HEAD(device_listeners, DeviceListener) device_listeners + = QTAILQ_HEAD_INITIALIZER(device_listeners); + +enum ListenerDirection { Forward, Reverse }; + +#define DEVICE_LISTENER_CALL(_callback, _direction, _args...) \ + do { \ + DeviceListener *_listener; \ + \ + switch (_direction) { \ + case Forward: \ + QTAILQ_FOREACH(_listener, &device_listeners, link) { \ + if (_listener->_callback) { \ + _listener->_callback(_listener, ##_args); \ + } \ + } \ + break; \ + case Reverse: \ + QTAILQ_FOREACH_REVERSE(_listener, &device_listeners, \ + device_listeners, link) { \ + if (_listener->_callback) { \ + _listener->_callback(_listener, ##_args); \ + } \ + } \ + break; \ + default: \ + abort(); \ + } \ + } while (0) + +static int device_listener_add(DeviceState *dev, void *opaque) +{ + DEVICE_LISTENER_CALL(realize, Forward, dev); + + return 0; +} + +void device_listener_register(DeviceListener *listener) +{ + QTAILQ_INSERT_TAIL(&device_listeners, listener, link); + + qbus_walk_children(sysbus_get_default(), NULL, NULL, device_listener_add, + NULL, NULL); +} + +void device_listener_unregister(DeviceListener *listener) +{ + QTAILQ_REMOVE(&device_listeners, listener, link); +} + static void device_realize(DeviceState *dev, Error **errp) { DeviceClass *dc = DEVICE_GET_CLASS(dev); @@ -994,6 +1044,8 @@ static void device_set_realized(Object *obj, bool value, Error **errp) goto fail; } + DEVICE_LISTENER_CALL(realize, Forward, dev); + hotplug_ctrl = qdev_get_hotplug_handler(dev); if (hotplug_ctrl) { hotplug_handler_plug(hotplug_ctrl, dev, &local_err); @@ -1035,6 +1087,7 @@ static void device_set_realized(Object *obj, bool value, Error **errp) dc->unrealize(dev, local_errp); } dev->pending_deleted_event = true; + DEVICE_LISTENER_CALL(unrealize, Reverse, dev); } if (local_err != NULL) { diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index 589bbe7360..15a226f24a 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -165,6 +165,12 @@ struct DeviceState { int alias_required_for_version; }; +struct DeviceListener { + void (*realize)(DeviceListener *listener, DeviceState *dev); + void (*unrealize)(DeviceListener *listener, DeviceState *dev); + QTAILQ_ENTRY(DeviceListener) link; +}; + #define TYPE_BUS "bus" #define BUS(obj) OBJECT_CHECK(BusState, (obj), TYPE_BUS) #define BUS_CLASS(klass) OBJECT_CLASS_CHECK(BusClass, (klass), TYPE_BUS) @@ -376,4 +382,8 @@ static inline bool qbus_is_hotpluggable(BusState *bus) { return bus->hotplug_handler; } + +void device_listener_register(DeviceListener *listener); +void device_listener_unregister(DeviceListener *listener); + #endif diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index f2bbaaf86a..cde3314896 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -17,6 +17,7 @@ typedef struct BusState BusState; typedef struct CharDriverState CharDriverState; typedef struct CompatProperty CompatProperty; typedef struct DeviceState DeviceState; +typedef struct DeviceListener DeviceListener; typedef struct DisplayChangeListener DisplayChangeListener; typedef struct DisplayState DisplayState; typedef struct DisplaySurface DisplaySurface; From 3996e85c1822e05c50250f8d2d1e57b6bea1229d Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Tue, 20 Jan 2015 11:06:19 +0000 Subject: [PATCH 2/4] Xen: Use the ioreq-server API when available The ioreq-server API added to Xen 4.5 offers better security than the existing Xen/QEMU interface because the shared pages that are used to pass emulation request/results back and forth are removed from the guest's memory space before any requests are serviced. This prevents the guest from mapping these pages (they are in a well known location) and attempting to attack QEMU by synthesizing its own request structures. Hence, this patch modifies configure to detect whether the API is available, and adds the necessary code to use the API if it is. Signed-off-by: Paul Durrant Signed-off-by: Stefano Stabellini Acked-by: Stefano Stabellini --- configure | 29 +++++ include/hw/xen/xen_common.h | 223 ++++++++++++++++++++++++++++++++++++ trace-events | 9 ++ xen-hvm.c | 160 ++++++++++++++++++++++---- 4 files changed, 399 insertions(+), 22 deletions(-) diff --git a/configure b/configure index 7539645521..5ea1014925 100755 --- a/configure +++ b/configure @@ -1869,6 +1869,32 @@ EOF #if !defined(HVM_MAX_VCPUS) # error HVM_MAX_VCPUS not defined #endif +int main(void) { + xc_interface *xc; + xs_daemon_open(); + xc = xc_interface_open(0, 0, 0); + xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0); + xc_gnttab_open(NULL, 0); + xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0); + xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000); + xc_hvm_create_ioreq_server(xc, 0, 0, NULL); + return 0; +} +EOF + compile_prog "" "$xen_libs" + then + xen_ctrl_version=450 + xen=yes + + elif + cat > $TMPC < +#include +#include +#include +#if !defined(HVM_MAX_VCPUS) +# error HVM_MAX_VCPUS not defined +#endif int main(void) { xc_interface *xc; xs_daemon_open(); @@ -4283,6 +4309,9 @@ if test -n "$sparc_cpu"; then echo "Target Sparc Arch $sparc_cpu" fi echo "xen support $xen" +if test "$xen" = "yes" ; then + echo "xen ctrl version $xen_ctrl_version" +fi echo "brlapi support $brlapi" echo "bluez support $bluez" echo "Documentation $docs" diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h index 95612a40c1..519696f255 100644 --- a/include/hw/xen/xen_common.h +++ b/include/hw/xen/xen_common.h @@ -16,7 +16,9 @@ #include "hw/hw.h" #include "hw/xen/xen.h" +#include "hw/pci/pci.h" #include "qemu/queue.h" +#include "trace.h" /* * We don't support Xen prior to 3.3.0. @@ -179,4 +181,225 @@ static inline int xen_get_vmport_regs_pfn(XenXC xc, domid_t dom, } #endif +/* Xen before 4.5 */ +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 450 + +#ifndef HVM_PARAM_BUFIOREQ_EVTCHN +#define HVM_PARAM_BUFIOREQ_EVTCHN 26 +#endif + +#define IOREQ_TYPE_PCI_CONFIG 2 + +typedef uint32_t ioservid_t; + +static inline void xen_map_memory_section(XenXC xc, domid_t dom, + ioservid_t ioservid, + MemoryRegionSection *section) +{ +} + +static inline void xen_unmap_memory_section(XenXC xc, domid_t dom, + ioservid_t ioservid, + MemoryRegionSection *section) +{ +} + +static inline void xen_map_io_section(XenXC xc, domid_t dom, + ioservid_t ioservid, + MemoryRegionSection *section) +{ +} + +static inline void xen_unmap_io_section(XenXC xc, domid_t dom, + ioservid_t ioservid, + MemoryRegionSection *section) +{ +} + +static inline void xen_map_pcidev(XenXC xc, domid_t dom, + ioservid_t ioservid, + PCIDevice *pci_dev) +{ +} + +static inline void xen_unmap_pcidev(XenXC xc, domid_t dom, + ioservid_t ioservid, + PCIDevice *pci_dev) +{ +} + +static inline int xen_create_ioreq_server(XenXC xc, domid_t dom, + ioservid_t *ioservid) +{ + return 0; +} + +static inline void xen_destroy_ioreq_server(XenXC xc, domid_t dom, + ioservid_t ioservid) +{ +} + +static inline int xen_get_ioreq_server_info(XenXC xc, domid_t dom, + ioservid_t ioservid, + xen_pfn_t *ioreq_pfn, + xen_pfn_t *bufioreq_pfn, + evtchn_port_t *bufioreq_evtchn) +{ + unsigned long param; + int rc; + + rc = xc_get_hvm_param(xc, dom, HVM_PARAM_IOREQ_PFN, ¶m); + if (rc < 0) { + fprintf(stderr, "failed to get HVM_PARAM_IOREQ_PFN\n"); + return -1; + } + + *ioreq_pfn = param; + + rc = xc_get_hvm_param(xc, dom, HVM_PARAM_BUFIOREQ_PFN, ¶m); + if (rc < 0) { + fprintf(stderr, "failed to get HVM_PARAM_BUFIOREQ_PFN\n"); + return -1; + } + + *bufioreq_pfn = param; + + rc = xc_get_hvm_param(xc, dom, HVM_PARAM_BUFIOREQ_EVTCHN, + ¶m); + if (rc < 0) { + fprintf(stderr, "failed to get HVM_PARAM_BUFIOREQ_EVTCHN\n"); + return -1; + } + + *bufioreq_evtchn = param; + + return 0; +} + +static inline int xen_set_ioreq_server_state(XenXC xc, domid_t dom, + ioservid_t ioservid, + bool enable) +{ + return 0; +} + +/* Xen 4.5 */ +#else + +static inline void xen_map_memory_section(XenXC xc, domid_t dom, + ioservid_t ioservid, + MemoryRegionSection *section) +{ + hwaddr start_addr = section->offset_within_address_space; + ram_addr_t size = int128_get64(section->size); + hwaddr end_addr = start_addr + size - 1; + + trace_xen_map_mmio_range(ioservid, start_addr, end_addr); + xc_hvm_map_io_range_to_ioreq_server(xc, dom, ioservid, 1, + start_addr, end_addr); +} + +static inline void xen_unmap_memory_section(XenXC xc, domid_t dom, + ioservid_t ioservid, + MemoryRegionSection *section) +{ + hwaddr start_addr = section->offset_within_address_space; + ram_addr_t size = int128_get64(section->size); + hwaddr end_addr = start_addr + size - 1; + + trace_xen_unmap_mmio_range(ioservid, start_addr, end_addr); + xc_hvm_unmap_io_range_from_ioreq_server(xc, dom, ioservid, 1, + start_addr, end_addr); +} + +static inline void xen_map_io_section(XenXC xc, domid_t dom, + ioservid_t ioservid, + MemoryRegionSection *section) +{ + hwaddr start_addr = section->offset_within_address_space; + ram_addr_t size = int128_get64(section->size); + hwaddr end_addr = start_addr + size - 1; + + trace_xen_map_portio_range(ioservid, start_addr, end_addr); + xc_hvm_map_io_range_to_ioreq_server(xc, dom, ioservid, 0, + start_addr, end_addr); +} + +static inline void xen_unmap_io_section(XenXC xc, domid_t dom, + ioservid_t ioservid, + MemoryRegionSection *section) +{ + hwaddr start_addr = section->offset_within_address_space; + ram_addr_t size = int128_get64(section->size); + hwaddr end_addr = start_addr + size - 1; + + trace_xen_unmap_portio_range(ioservid, start_addr, end_addr); + xc_hvm_unmap_io_range_from_ioreq_server(xc, dom, ioservid, 0, + start_addr, end_addr); +} + +static inline void xen_map_pcidev(XenXC xc, domid_t dom, + ioservid_t ioservid, + PCIDevice *pci_dev) +{ + trace_xen_map_pcidev(ioservid, pci_bus_num(pci_dev->bus), + PCI_SLOT(pci_dev->devfn), PCI_FUNC(pci_dev->devfn)); + xc_hvm_map_pcidev_to_ioreq_server(xc, dom, ioservid, + 0, pci_bus_num(pci_dev->bus), + PCI_SLOT(pci_dev->devfn), + PCI_FUNC(pci_dev->devfn)); +} + +static inline void xen_unmap_pcidev(XenXC xc, domid_t dom, + ioservid_t ioservid, + PCIDevice *pci_dev) +{ + trace_xen_unmap_pcidev(ioservid, pci_bus_num(pci_dev->bus), + PCI_SLOT(pci_dev->devfn), PCI_FUNC(pci_dev->devfn)); + xc_hvm_unmap_pcidev_from_ioreq_server(xc, dom, ioservid, + 0, pci_bus_num(pci_dev->bus), + PCI_SLOT(pci_dev->devfn), + PCI_FUNC(pci_dev->devfn)); +} + +static inline int xen_create_ioreq_server(XenXC xc, domid_t dom, + ioservid_t *ioservid) +{ + int rc = xc_hvm_create_ioreq_server(xc, dom, 1, ioservid); + + if (rc == 0) { + trace_xen_ioreq_server_create(*ioservid); + } + + return rc; +} + +static inline void xen_destroy_ioreq_server(XenXC xc, domid_t dom, + ioservid_t ioservid) +{ + trace_xen_ioreq_server_destroy(ioservid); + xc_hvm_destroy_ioreq_server(xc, dom, ioservid); +} + +static inline int xen_get_ioreq_server_info(XenXC xc, domid_t dom, + ioservid_t ioservid, + xen_pfn_t *ioreq_pfn, + xen_pfn_t *bufioreq_pfn, + evtchn_port_t *bufioreq_evtchn) +{ + return xc_hvm_get_ioreq_server_info(xc, dom, ioservid, + ioreq_pfn, bufioreq_pfn, + bufioreq_evtchn); +} + +static inline int xen_set_ioreq_server_state(XenXC xc, domid_t dom, + ioservid_t ioservid, + bool enable) +{ + trace_xen_ioreq_server_state(ioservid, enable); + return xc_hvm_set_ioreq_server_state(xc, dom, ioservid, enable); +} + +#endif + #endif /* QEMU_HW_XEN_COMMON_H */ diff --git a/trace-events b/trace-events index 8acbcce0f0..4ec81eb534 100644 --- a/trace-events +++ b/trace-events @@ -897,6 +897,15 @@ pvscsi_tx_rings_num_pages(const char* label, uint32_t num) "Number of %s pages: # xen-hvm.c xen_ram_alloc(unsigned long ram_addr, unsigned long size) "requested: %#lx, size %#lx" xen_client_set_memory(uint64_t start_addr, unsigned long size, bool log_dirty) "%#"PRIx64" size %#lx, log_dirty %i" +xen_ioreq_server_create(uint32_t id) "id: %u" +xen_ioreq_server_destroy(uint32_t id) "id: %u" +xen_ioreq_server_state(uint32_t id, bool enable) "id: %u: enable: %i" +xen_map_mmio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: %#"PRIx64" end: %#"PRIx64 +xen_unmap_mmio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: %#"PRIx64" end: %#"PRIx64 +xen_map_portio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: %#"PRIx64" end: %#"PRIx64 +xen_unmap_portio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: %#"PRIx64" end: %#"PRIx64 +xen_map_pcidev(uint32_t id, uint8_t bus, uint8_t dev, uint8_t func) "id: %u bdf: %02x.%02x.%02x" +xen_unmap_pcidev(uint32_t id, uint8_t bus, uint8_t dev, uint8_t func) "id: %u bdf: %02x.%02x.%02x" # xen-mapcache.c xen_map_cache(uint64_t phys_addr) "want %#"PRIx64 diff --git a/xen-hvm.c b/xen-hvm.c index e2e575b099..315864ca70 100644 --- a/xen-hvm.c +++ b/xen-hvm.c @@ -85,9 +85,6 @@ static inline ioreq_t *xen_vcpu_ioreq(shared_iopage_t *shared_page, int vcpu) } # define FMT_ioreq_size "u" #endif -#ifndef HVM_PARAM_BUFIOREQ_EVTCHN -#define HVM_PARAM_BUFIOREQ_EVTCHN 26 -#endif #define BUFFER_IO_MAX_DELAY 100 /* Leave some slack so that hvmloader does not complain about lack of @@ -107,6 +104,7 @@ typedef struct XenPhysmap { } XenPhysmap; typedef struct XenIOState { + ioservid_t ioservid; shared_iopage_t *shared_page; shared_vmport_iopage_t *shared_vmport_page; buffered_iopage_t *buffered_io_page; @@ -123,6 +121,8 @@ typedef struct XenIOState { struct xs_handle *xenstore; MemoryListener memory_listener; + MemoryListener io_listener; + DeviceListener device_listener; QLIST_HEAD(, XenPhysmap) physmap; hwaddr free_phys_offset; const XenPhysmap *log_for_dirtybit; @@ -491,12 +491,23 @@ static void xen_set_memory(struct MemoryListener *listener, bool log_dirty = memory_region_is_logging(section->mr); hvmmem_type_t mem_type; + if (section->mr == &ram_memory) { + return; + } else { + if (add) { + xen_map_memory_section(xen_xc, xen_domid, state->ioservid, + section); + } else { + xen_unmap_memory_section(xen_xc, xen_domid, state->ioservid, + section); + } + } + if (!memory_region_is_ram(section->mr)) { return; } - if (!(section->mr != &ram_memory - && ( (log_dirty && add) || (!log_dirty && !add)))) { + if (log_dirty != add) { return; } @@ -539,6 +550,50 @@ static void xen_region_del(MemoryListener *listener, memory_region_unref(section->mr); } +static void xen_io_add(MemoryListener *listener, + MemoryRegionSection *section) +{ + XenIOState *state = container_of(listener, XenIOState, io_listener); + + memory_region_ref(section->mr); + + xen_map_io_section(xen_xc, xen_domid, state->ioservid, section); +} + +static void xen_io_del(MemoryListener *listener, + MemoryRegionSection *section) +{ + XenIOState *state = container_of(listener, XenIOState, io_listener); + + xen_unmap_io_section(xen_xc, xen_domid, state->ioservid, section); + + memory_region_unref(section->mr); +} + +static void xen_device_realize(DeviceListener *listener, + DeviceState *dev) +{ + XenIOState *state = container_of(listener, XenIOState, device_listener); + + if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + PCIDevice *pci_dev = PCI_DEVICE(dev); + + xen_map_pcidev(xen_xc, xen_domid, state->ioservid, pci_dev); + } +} + +static void xen_device_unrealize(DeviceListener *listener, + DeviceState *dev) +{ + XenIOState *state = container_of(listener, XenIOState, device_listener); + + if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + PCIDevice *pci_dev = PCI_DEVICE(dev); + + xen_unmap_pcidev(xen_xc, xen_domid, state->ioservid, pci_dev); + } +} + static void xen_sync_dirty_bitmap(XenIOState *state, hwaddr start_addr, ram_addr_t size) @@ -639,6 +694,17 @@ static MemoryListener xen_memory_listener = { .priority = 10, }; +static MemoryListener xen_io_listener = { + .region_add = xen_io_add, + .region_del = xen_io_del, + .priority = 10, +}; + +static DeviceListener xen_device_listener = { + .realize = xen_device_realize, + .unrealize = xen_device_unrealize, +}; + /* get the ioreq packets from share mem */ static ioreq_t *cpu_get_ioreq_from_shared_memory(XenIOState *state, int vcpu) { @@ -887,6 +953,27 @@ static void handle_ioreq(XenIOState *state, ioreq_t *req) case IOREQ_TYPE_INVALIDATE: xen_invalidate_map_cache(); break; + case IOREQ_TYPE_PCI_CONFIG: { + uint32_t sbdf = req->addr >> 32; + uint32_t val; + + /* Fake a write to port 0xCF8 so that + * the config space access will target the + * correct device model. + */ + val = (1u << 31) | + ((req->addr & 0x0f00) << 16) | + ((sbdf & 0xffff) << 8) | + (req->addr & 0xfc); + do_outp(0xcf8, 4, val); + + /* Now issue the config space access via + * port 0xCFC + */ + req->addr = 0xcfc | (req->addr & 0x03); + cpu_ioreq_pio(req); + break; + } default: hw_error("Invalid ioreq type 0x%x\n", req->type); } @@ -1017,9 +1104,15 @@ static void xen_main_loop_prepare(XenIOState *state) static void xen_hvm_change_state_handler(void *opaque, int running, RunState rstate) { + XenIOState *state = opaque; + if (running) { - xen_main_loop_prepare((XenIOState *)opaque); + xen_main_loop_prepare(state); } + + xen_set_ioreq_server_state(xen_xc, xen_domid, + state->ioservid, + (rstate == RUN_STATE_RUNNING)); } static void xen_exit_notifier(Notifier *n, void *data) @@ -1088,8 +1181,9 @@ int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size, MemoryRegion **ram_memory) { int i, rc; - unsigned long ioreq_pfn; - unsigned long bufioreq_evtchn; + xen_pfn_t ioreq_pfn; + xen_pfn_t bufioreq_pfn; + evtchn_port_t bufioreq_evtchn; XenIOState *state; state = g_malloc0(sizeof (XenIOState)); @@ -1106,6 +1200,12 @@ int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size, return -1; } + rc = xen_create_ioreq_server(xen_xc, xen_domid, &state->ioservid); + if (rc < 0) { + perror("xen: ioreq server create"); + return -1; + } + state->exit.notify = xen_exit_notifier; qemu_add_exit_notifier(&state->exit); @@ -1115,8 +1215,18 @@ int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size, state->wakeup.notify = xen_wakeup_notifier; qemu_register_wakeup_notifier(&state->wakeup); - xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_IOREQ_PFN, &ioreq_pfn); + rc = xen_get_ioreq_server_info(xen_xc, xen_domid, state->ioservid, + &ioreq_pfn, &bufioreq_pfn, + &bufioreq_evtchn); + if (rc < 0) { + hw_error("failed to get ioreq server info: error %d handle=" XC_INTERFACE_FMT, + errno, xen_xc); + } + DPRINTF("shared page at pfn %lx\n", ioreq_pfn); + DPRINTF("buffered io page at pfn %lx\n", bufioreq_pfn); + DPRINTF("buffered io evtchn is %x\n", bufioreq_evtchn); + state->shared_page = xc_map_foreign_range(xen_xc, xen_domid, XC_PAGE_SIZE, PROT_READ|PROT_WRITE, ioreq_pfn); if (state->shared_page == NULL) { @@ -1138,10 +1248,10 @@ int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size, hw_error("get vmport regs pfn returned error %d, rc=%d", errno, rc); } - xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_BUFIOREQ_PFN, &ioreq_pfn); - DPRINTF("buffered io page at pfn %lx\n", ioreq_pfn); - state->buffered_io_page = xc_map_foreign_range(xen_xc, xen_domid, XC_PAGE_SIZE, - PROT_READ|PROT_WRITE, ioreq_pfn); + state->buffered_io_page = xc_map_foreign_range(xen_xc, xen_domid, + XC_PAGE_SIZE, + PROT_READ|PROT_WRITE, + bufioreq_pfn); if (state->buffered_io_page == NULL) { hw_error("map buffered IO page returned error %d", errno); } @@ -1149,6 +1259,12 @@ int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size, /* Note: cpus is empty at this point in init */ state->cpu_by_vcpu_id = g_malloc0(max_cpus * sizeof(CPUState *)); + rc = xen_set_ioreq_server_state(xen_xc, xen_domid, state->ioservid, true); + if (rc < 0) { + hw_error("failed to enable ioreq server info: error %d handle=" XC_INTERFACE_FMT, + errno, xen_xc); + } + state->ioreq_local_port = g_malloc0(max_cpus * sizeof (evtchn_port_t)); /* FIXME: how about if we overflow the page here? */ @@ -1156,22 +1272,16 @@ int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size, rc = xc_evtchn_bind_interdomain(state->xce_handle, xen_domid, xen_vcpu_eport(state->shared_page, i)); if (rc == -1) { - fprintf(stderr, "bind interdomain ioctl error %d\n", errno); + fprintf(stderr, "shared evtchn %d bind error %d\n", i, errno); return -1; } state->ioreq_local_port[i] = rc; } - rc = xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_BUFIOREQ_EVTCHN, - &bufioreq_evtchn); - if (rc < 0) { - fprintf(stderr, "failed to get HVM_PARAM_BUFIOREQ_EVTCHN\n"); - return -1; - } rc = xc_evtchn_bind_interdomain(state->xce_handle, xen_domid, - (uint32_t)bufioreq_evtchn); + bufioreq_evtchn); if (rc == -1) { - fprintf(stderr, "bind interdomain ioctl error %d\n", errno); + fprintf(stderr, "buffered evtchn bind error %d\n", errno); return -1; } state->bufioreq_local_port = rc; @@ -1187,6 +1297,12 @@ int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size, memory_listener_register(&state->memory_listener, &address_space_memory); state->log_for_dirtybit = NULL; + state->io_listener = xen_io_listener; + memory_listener_register(&state->io_listener, &address_space_io); + + state->device_listener = xen_device_listener; + device_listener_register(&state->device_listener); + /* Initialize backend core & drivers */ if (xen_be_init() != 0) { fprintf(stderr, "%s: xen backend core setup failed\n", __FUNCTION__); From 9b6d7b365dc211bbeadb0cf2e7bf1a019001c7f8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 14 Jan 2015 11:20:55 +0100 Subject: [PATCH 3/4] xen: do not use __-named variables in mapcache Keep the namespace clean. Signed-off-by: Paolo Bonzini Signed-off-by: Stefano Stabellini --- xen-mapcache.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/xen-mapcache.c b/xen-mapcache.c index 66da1a6d47..458069b705 100644 --- a/xen-mapcache.c +++ b/xen-mapcache.c @@ -199,8 +199,8 @@ uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size, MapCacheEntry *entry, *pentry = NULL; hwaddr address_index; hwaddr address_offset; - hwaddr __size = size; - hwaddr __test_bit_size = size; + hwaddr cache_size = size; + hwaddr test_bit_size; bool translated = false; tryagain: @@ -209,22 +209,22 @@ tryagain: trace_xen_map_cache(phys_addr); - /* __test_bit_size is always a multiple of XC_PAGE_SIZE */ + /* test_bit_size is always a multiple of XC_PAGE_SIZE */ if (size) { - __test_bit_size = size + (phys_addr & (XC_PAGE_SIZE - 1)); + test_bit_size = size + (phys_addr & (XC_PAGE_SIZE - 1)); - if (__test_bit_size % XC_PAGE_SIZE) { - __test_bit_size += XC_PAGE_SIZE - (__test_bit_size % XC_PAGE_SIZE); + if (test_bit_size % XC_PAGE_SIZE) { + test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE); } } else { - __test_bit_size = XC_PAGE_SIZE; + test_bit_size = XC_PAGE_SIZE; } if (mapcache->last_entry != NULL && mapcache->last_entry->paddr_index == address_index && - !lock && !__size && + !lock && !size && test_bits(address_offset >> XC_PAGE_SHIFT, - __test_bit_size >> XC_PAGE_SHIFT, + test_bit_size >> XC_PAGE_SHIFT, mapcache->last_entry->valid_mapping)) { trace_xen_map_cache_return(mapcache->last_entry->vaddr_base + address_offset); return mapcache->last_entry->vaddr_base + address_offset; @@ -232,20 +232,20 @@ tryagain: /* size is always a multiple of MCACHE_BUCKET_SIZE */ if (size) { - __size = size + address_offset; - if (__size % MCACHE_BUCKET_SIZE) { - __size += MCACHE_BUCKET_SIZE - (__size % MCACHE_BUCKET_SIZE); + cache_size = size + address_offset; + if (cache_size % MCACHE_BUCKET_SIZE) { + cache_size += MCACHE_BUCKET_SIZE - (cache_size % MCACHE_BUCKET_SIZE); } } else { - __size = MCACHE_BUCKET_SIZE; + cache_size = MCACHE_BUCKET_SIZE; } entry = &mapcache->entry[address_index % mapcache->nr_buckets]; while (entry && entry->lock && entry->vaddr_base && - (entry->paddr_index != address_index || entry->size != __size || + (entry->paddr_index != address_index || entry->size != cache_size || !test_bits(address_offset >> XC_PAGE_SHIFT, - __test_bit_size >> XC_PAGE_SHIFT, + test_bit_size >> XC_PAGE_SHIFT, entry->valid_mapping))) { pentry = entry; entry = entry->next; @@ -253,19 +253,19 @@ tryagain: if (!entry) { entry = g_malloc0(sizeof (MapCacheEntry)); pentry->next = entry; - xen_remap_bucket(entry, __size, address_index); + xen_remap_bucket(entry, cache_size, address_index); } else if (!entry->lock) { if (!entry->vaddr_base || entry->paddr_index != address_index || - entry->size != __size || + entry->size != cache_size || !test_bits(address_offset >> XC_PAGE_SHIFT, - __test_bit_size >> XC_PAGE_SHIFT, + test_bit_size >> XC_PAGE_SHIFT, entry->valid_mapping)) { - xen_remap_bucket(entry, __size, address_index); + xen_remap_bucket(entry, cache_size, address_index); } } if(!test_bits(address_offset >> XC_PAGE_SHIFT, - __test_bit_size >> XC_PAGE_SHIFT, + test_bit_size >> XC_PAGE_SHIFT, entry->valid_mapping)) { mapcache->last_entry = NULL; if (!translated && mapcache->phys_offset_to_gaddr) { From 86a6a9bf551ffa183880480b37c5836d3916687a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 14 Jan 2015 11:20:56 +0100 Subject: [PATCH 4/4] xen: add a lock for the mapcache Extend the existing dummy mapcache_lock/unlock macros to cover all of xen-mapcache.c. This prepares for unlocked memory access, when parts of exec.c will not be protected by the BQL. Signed-off-by: Paolo Bonzini Signed-off-by: Stefano Stabellini --- xen-mapcache.c | 54 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/xen-mapcache.c b/xen-mapcache.c index 458069b705..8cefd0c62f 100644 --- a/xen-mapcache.c +++ b/xen-mapcache.c @@ -49,9 +49,6 @@ */ #define NON_MCACHE_MEMORY_SIZE (80 * 1024 * 1024) -#define mapcache_lock() ((void)0) -#define mapcache_unlock() ((void)0) - typedef struct MapCacheEntry { hwaddr paddr_index; uint8_t *vaddr_base; @@ -79,11 +76,22 @@ typedef struct MapCache { unsigned int mcache_bucket_shift; phys_offset_to_gaddr_t phys_offset_to_gaddr; + QemuMutex lock; void *opaque; } MapCache; static MapCache *mapcache; +static inline void mapcache_lock(void) +{ + qemu_mutex_lock(&mapcache->lock); +} + +static inline void mapcache_unlock(void) +{ + qemu_mutex_unlock(&mapcache->lock); +} + static inline int test_bits(int nr, int size, const unsigned long *addr) { unsigned long res = find_next_zero_bit(addr, size + nr, nr); @@ -102,6 +110,7 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque) mapcache->phys_offset_to_gaddr = f; mapcache->opaque = opaque; + qemu_mutex_init(&mapcache->lock); QTAILQ_INIT(&mapcache->locked_entries); @@ -193,8 +202,8 @@ static void xen_remap_bucket(MapCacheEntry *entry, g_free(err); } -uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size, - uint8_t lock) +static uint8_t *xen_map_cache_unlocked(hwaddr phys_addr, hwaddr size, + uint8_t lock) { MapCacheEntry *entry, *pentry = NULL; hwaddr address_index; @@ -291,14 +300,27 @@ tryagain: return mapcache->last_entry->vaddr_base + address_offset; } +uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size, + uint8_t lock) +{ + uint8_t *p; + + mapcache_lock(); + p = xen_map_cache_unlocked(phys_addr, size, lock); + mapcache_unlock(); + return p; +} + ram_addr_t xen_ram_addr_from_mapcache(void *ptr) { MapCacheEntry *entry = NULL; MapCacheRev *reventry; hwaddr paddr_index; hwaddr size; + ram_addr_t raddr; int found = 0; + mapcache_lock(); QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { if (reventry->vaddr_req == ptr) { paddr_index = reventry->paddr_index; @@ -323,13 +345,16 @@ ram_addr_t xen_ram_addr_from_mapcache(void *ptr) } if (!entry) { DPRINTF("Trying to find address %p that is not in the mapcache!\n", ptr); - return 0; + raddr = 0; + } else { + raddr = (reventry->paddr_index << MCACHE_BUCKET_SHIFT) + + ((unsigned long) ptr - (unsigned long) entry->vaddr_base); } - return (reventry->paddr_index << MCACHE_BUCKET_SHIFT) + - ((unsigned long) ptr - (unsigned long) entry->vaddr_base); + mapcache_unlock(); + return raddr; } -void xen_invalidate_map_cache_entry(uint8_t *buffer) +static void xen_invalidate_map_cache_entry_unlocked(uint8_t *buffer) { MapCacheEntry *entry = NULL, *pentry = NULL; MapCacheRev *reventry; @@ -383,6 +408,13 @@ void xen_invalidate_map_cache_entry(uint8_t *buffer) g_free(entry); } +void xen_invalidate_map_cache_entry(uint8_t *buffer) +{ + mapcache_lock(); + xen_invalidate_map_cache_entry_unlocked(buffer); + mapcache_unlock(); +} + void xen_invalidate_map_cache(void) { unsigned long i; @@ -391,14 +423,14 @@ void xen_invalidate_map_cache(void) /* Flush pending AIO before destroying the mapcache */ bdrv_drain_all(); + mapcache_lock(); + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { DPRINTF("There should be no locked mappings at this time, " "but "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index, reventry->vaddr_req); } - mapcache_lock(); - for (i = 0; i < mapcache->nr_buckets; i++) { MapCacheEntry *entry = &mapcache->entry[i];