From fb4bb2b587549612a0da92de68fcc096ffd8a7d7 Mon Sep 17 00:00:00 2001 From: Anthony PERARD Date: Fri, 15 Jul 2011 00:33:42 +0000 Subject: [PATCH 1/9] xen: introduce xen_change_state_handler Remove the call to xenstore_record_dm_state from xen_main_loop_prepare that is HVM specific. Add a new vm_change_state_handler shared between xen_pv and xen_hvm machines to record the VM state to xenstore. Signed-off-by: Anthony PERARD Signed-off-by: Stefano Stabellini Signed-off-by: Alexander Graf --- xen-all.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/xen-all.c b/xen-all.c index 167bed6dbd..83c5476764 100644 --- a/xen-all.c +++ b/xen-all.c @@ -797,12 +797,17 @@ void xenstore_store_pv_console_info(int i, CharDriverState *chr) } } -static void xenstore_record_dm_state(XenIOState *s, const char *state) +static void xenstore_record_dm_state(struct xs_handle *xs, const char *state) { char path[50]; + if (xs == NULL) { + fprintf(stderr, "xenstore connection not initialized\n"); + exit(1); + } + snprintf(path, sizeof (path), "/local/domain/0/device-model/%u/state", xen_domid); - if (!xs_write(s->xenstore, XBT_NULL, path, state, strlen(state))) { + if (!xs_write(xs, XBT_NULL, path, state, strlen(state))) { fprintf(stderr, "error recording dm state\n"); exit(1); } @@ -823,15 +828,20 @@ static void xen_main_loop_prepare(XenIOState *state) if (evtchn_fd != -1) { qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, state); } - - /* record state running */ - xenstore_record_dm_state(state, "running"); } /* Initialise Xen */ -static void xen_vm_change_state_handler(void *opaque, int running, int reason) +static void xen_change_state_handler(void *opaque, int running, int reason) +{ + if (running) { + /* record state running */ + xenstore_record_dm_state(xenstore, "running"); + } +} + +static void xen_hvm_change_state_handler(void *opaque, int running, int reason) { XenIOState *state = opaque; if (running) { @@ -854,6 +864,7 @@ int xen_init(void) xen_be_printf(NULL, 0, "can't open xen interface\n"); return -1; } + qemu_add_vm_change_state_handler(xen_change_state_handler, NULL); return 0; } @@ -915,7 +926,7 @@ int xen_hvm_init(void) xen_map_cache_init(); xen_ram_init(ram_size); - qemu_add_vm_change_state_handler(xen_vm_change_state_handler, state); + qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state); state->client = xen_cpu_phys_memory_client; QLIST_INIT(&state->physmap); From 30ab61252b71446977e298f146be124eb4a5b333 Mon Sep 17 00:00:00 2001 From: Anthony PERARD Date: Fri, 15 Jul 2011 04:32:52 +0000 Subject: [PATCH 2/9] xen: Fix xen_enabled(). Use the "host" CONFIG_ define instead of the "target" one. Signed-off-by: Anthony PERARD Acked-by: Paolo Bonzini Signed-off-by: Alexander Graf --- hw/xen.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/xen.h b/hw/xen.h index e432705f45..43b95d6880 100644 --- a/hw/xen.h +++ b/hw/xen.h @@ -24,7 +24,7 @@ extern int xen_allowed; static inline int xen_enabled(void) { -#ifdef CONFIG_XEN +#ifdef CONFIG_XEN_BACKEND return xen_allowed; #else return 0; From 8ca5692df4a635b2c9211d6dc3dea15bd2b674c8 Mon Sep 17 00:00:00 2001 From: Anthony PERARD Date: Fri, 15 Jul 2011 04:32:53 +0000 Subject: [PATCH 3/9] exec.c: Use ram_addr_t in cpu_physical_memory_rw(...). As the variable pd and addr1 inside the function cpu_physical_memory_rw are mean to handle a RAM address, they should be of the ram_addr_t type instead of unsigned long. Signed-off-by: Anthony PERARD Acked-by: Paolo Bonzini Signed-off-by: Alexander Graf --- exec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exec.c b/exec.c index 2160ded401..0393d39998 100644 --- a/exec.c +++ b/exec.c @@ -3858,7 +3858,7 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, uint8_t *ptr; uint32_t val; target_phys_addr_t page; - unsigned long pd; + ram_addr_t pd; PhysPageDesc *p; while (len > 0) { @@ -3898,7 +3898,7 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, l = 1; } } else { - unsigned long addr1; + ram_addr_t addr1; addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK); /* RAM case */ ptr = qemu_get_ram_ptr(addr1); From f03a4ac12222ce1fb7886dc6cb38a027b73ccff2 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 17 Jul 2011 07:22:06 +0200 Subject: [PATCH 4/9] xen: remove CONFIG_XEN_MAPCACHE We were still exporting CONFIG_XEN_MAPCACHE, even though it's completely unused by now. Remove it. Signed-off-by: Alexander Graf --- configure | 3 --- 1 file changed, 3 deletions(-) diff --git a/configure b/configure index 38e3724f33..cd399dc4ce 100755 --- a/configure +++ b/configure @@ -3290,9 +3290,6 @@ case "$target_arch2" in if test "$xen" = "yes" -a "$target_softmmu" = "yes" ; then target_phys_bits=64 echo "CONFIG_XEN=y" >> $config_target_mak - if test "$cpu" = "i386" -o "$cpu" = "x86_64"; then - echo "CONFIG_XEN_MAPCACHE=y" >> $config_target_mak - fi fi esac case "$target_arch2" in From 59d21e537b6c3a964d4e346e9cf2a1e17267a27c Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 17 Jul 2011 07:30:29 +0200 Subject: [PATCH 5/9] xen: make xen_enabled even more clever When using xen_enabled() we're currently only checking if xen is enabled at all during the build. But what if you want to build multiple targets out of which only one can potentially run xen code? That means that for generic code we'll still have to fall back to the variable and potentially slow the code down, but it's not as important as that is mostly xen device emulation which is not touched for non-xen targets. The target specific code however can with this patch see that it's unable to ever execute xen code. We can thus always return 0 on xen_enabled(), giving gcc enough hints to evict the mapcache code from the target memory management code. Signed-off-by: Alexander Graf Acked-by: Anthony PERARD --- configure | 5 +++++ hw/xen.h | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/configure b/configure index cd399dc4ce..bc3495c6fb 100755 --- a/configure +++ b/configure @@ -3290,7 +3290,12 @@ case "$target_arch2" in if test "$xen" = "yes" -a "$target_softmmu" = "yes" ; then target_phys_bits=64 echo "CONFIG_XEN=y" >> $config_target_mak + else + echo "CONFIG_NO_XEN=y" >> $config_target_mak fi + ;; + *) + echo "CONFIG_NO_XEN=y" >> $config_target_mak esac case "$target_arch2" in i386|x86_64|ppcemb|ppc|ppc64|s390x) diff --git a/hw/xen.h b/hw/xen.h index 43b95d6880..21621115e4 100644 --- a/hw/xen.h +++ b/hw/xen.h @@ -24,7 +24,7 @@ extern int xen_allowed; static inline int xen_enabled(void) { -#ifdef CONFIG_XEN_BACKEND +#if defined(CONFIG_XEN_BACKEND) && !defined(CONFIG_NO_XEN) return xen_allowed; #else return 0; From f15fbc4bd1a24bd1477a846e63e62c6d435912f8 Mon Sep 17 00:00:00 2001 From: Anthony PERARD Date: Wed, 20 Jul 2011 08:17:42 +0000 Subject: [PATCH 6/9] cpu-common: Have a ram_addr_t of uint64 with Xen. In Xen case, memory can be bigger than the host memory. that mean a 32bits host (and QEMU) should be able to handle a RAM address of 64bits. Signed-off-by: Anthony PERARD Signed-off-by: Alexander Graf --- cpu-common.h | 8 ++++++++ exec.c | 9 +++++---- xen-all.c | 2 +- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/cpu-common.h b/cpu-common.h index 44b04b3839..070010130c 100644 --- a/cpu-common.h +++ b/cpu-common.h @@ -27,7 +27,15 @@ enum device_endian { }; /* address in the RAM (different from a physical address) */ +#if defined(CONFIG_XEN_BACKEND) && TARGET_PHYS_ADDR_BITS == 64 +typedef uint64_t ram_addr_t; +# define RAM_ADDR_MAX UINT64_MAX +# define RAM_ADDR_FMT "%" PRIx64 +#else typedef unsigned long ram_addr_t; +# define RAM_ADDR_MAX ULONG_MAX +# define RAM_ADDR_FMT "%lx" +#endif /* memory API */ diff --git a/exec.c b/exec.c index 0393d39998..bfc9a43ce7 100644 --- a/exec.c +++ b/exec.c @@ -2863,13 +2863,13 @@ static void *file_ram_alloc(RAMBlock *block, static ram_addr_t find_ram_offset(ram_addr_t size) { RAMBlock *block, *next_block; - ram_addr_t offset = 0, mingap = ULONG_MAX; + ram_addr_t offset = 0, mingap = RAM_ADDR_MAX; if (QLIST_EMPTY(&ram_list.blocks)) return 0; QLIST_FOREACH(block, &ram_list.blocks, next) { - ram_addr_t end, next = ULONG_MAX; + ram_addr_t end, next = RAM_ADDR_MAX; end = block->offset + block->length; @@ -3081,7 +3081,8 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) #endif } if (area != vaddr) { - fprintf(stderr, "Could not remap addr: %lx@%lx\n", + fprintf(stderr, "Could not remap addr: " + RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n", length, addr); exit(1); } @@ -4052,7 +4053,7 @@ void *cpu_physical_memory_map(target_phys_addr_t addr, target_phys_addr_t page; unsigned long pd; PhysPageDesc *p; - ram_addr_t raddr = ULONG_MAX; + ram_addr_t raddr = RAM_ADDR_MAX; ram_addr_t rlen; void *ret; diff --git a/xen-all.c b/xen-all.c index 83c5476764..53296bf2ca 100644 --- a/xen-all.c +++ b/xen-all.c @@ -184,7 +184,7 @@ void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size) } if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) { - hw_error("xen: failed to populate ram at %lx", ram_addr); + hw_error("xen: failed to populate ram at " RAM_ADDR_FMT, ram_addr); } qemu_free(pfn_list); From 8a369e20e701c9d220834e0daa027e65acd35214 Mon Sep 17 00:00:00 2001 From: Anthony PERARD Date: Wed, 20 Jul 2011 08:17:43 +0000 Subject: [PATCH 7/9] xen: Fix the memory registration to reflect of what is done by Xen. A Xen guest memory is allocated by libxc. But this memory is not allocated continuously, instead, it leaves the VGA IO memory space not allocated, same for the MMIO space (at HVM_BELOW_4G_MMIO_START of size HVM_BELOW_4G_MMIO_LENGTH). So to reflect that, we do not register the physical memory for this two holes. But we still keep only one RAMBlock for the all RAM as it is more easier than have two separate blocks (1 above 4G). Also this prevent QEMU from use the MMIO space for a ROM. Signed-off-by: Anthony PERARD Signed-off-by: Alexander Graf --- xen-all.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/xen-all.c b/xen-all.c index 53296bf2ca..9eaeac1654 100644 --- a/xen-all.c +++ b/xen-all.c @@ -19,6 +19,7 @@ #include #include +#include //#define DEBUG_XEN @@ -144,6 +145,12 @@ static void xen_ram_init(ram_addr_t ram_size) new_block->host = NULL; new_block->offset = 0; new_block->length = ram_size; + if (ram_size >= HVM_BELOW_4G_RAM_END) { + /* Xen does not allocate the memory continuously, and keep a hole at + * HVM_BELOW_4G_MMIO_START of HVM_BELOW_4G_MMIO_LENGTH + */ + new_block->length += HVM_BELOW_4G_MMIO_LENGTH; + } QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next); @@ -152,20 +159,26 @@ static void xen_ram_init(ram_addr_t ram_size) memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS), 0xff, new_block->length >> TARGET_PAGE_BITS); - if (ram_size >= 0xe0000000 ) { - above_4g_mem_size = ram_size - 0xe0000000; - below_4g_mem_size = 0xe0000000; + if (ram_size >= HVM_BELOW_4G_RAM_END) { + above_4g_mem_size = ram_size - HVM_BELOW_4G_RAM_END; + below_4g_mem_size = HVM_BELOW_4G_RAM_END; } else { below_4g_mem_size = ram_size; } - cpu_register_physical_memory(0, below_4g_mem_size, new_block->offset); -#if TARGET_PHYS_ADDR_BITS > 32 + cpu_register_physical_memory(0, 0xa0000, 0); + /* Skip of the VGA IO memory space, it will be registered later by the VGA + * emulated device. + * + * The area between 0xc0000 and 0x100000 will be used by SeaBIOS to load + * the Options ROM, so it is registered here as RAM. + */ + cpu_register_physical_memory(0xc0000, below_4g_mem_size - 0xc0000, + 0xc0000); if (above_4g_mem_size > 0) { cpu_register_physical_memory(0x100000000ULL, above_4g_mem_size, - new_block->offset + below_4g_mem_size); + 0x100000000ULL); } -#endif } void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size) From 834e76ea1cc3f6fb261fe6a40f7571600bcb25b1 Mon Sep 17 00:00:00 2001 From: Anthony PERARD Date: Wed, 20 Jul 2011 08:17:44 +0000 Subject: [PATCH 8/9] vl.c: Check the asked ram_size later. As a Xen guest can have more than 2GB of RAM on a 32bit host, we move the conditions after than we now if we run one Xen or not. [agraf] separate xen branch from ram_size check Signed-off-by: Anthony PERARD Signed-off-by: Alexander Graf --- vl.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/vl.c b/vl.c index 4b6688b553..d8c7c01048 100644 --- a/vl.c +++ b/vl.c @@ -2440,11 +2440,6 @@ int main(int argc, char **argv, char **envp) exit(1); } - /* On 32-bit hosts, QEMU is limited by virtual address space */ - if (value > (2047 << 20) && HOST_LONG_BITS == 32) { - fprintf(stderr, "qemu: at most 2047 MB RAM can be simulated\n"); - exit(1); - } if (value != (uint64_t)(ram_addr_t)value) { fprintf(stderr, "qemu: ram size too large\n"); exit(1); @@ -3099,8 +3094,17 @@ int main(int argc, char **argv, char **envp) exit(1); /* init the memory */ - if (ram_size == 0) + if (ram_size == 0) { ram_size = DEFAULT_RAM_SIZE * 1024 * 1024; + } + + if (!xen_enabled()) { + /* On 32-bit hosts, QEMU is limited by virtual address space */ + if (ram_size > (2047 << 20) && HOST_LONG_BITS == 32) { + fprintf(stderr, "qemu: at most 2047 MB RAM can be simulated\n"); + exit(1); + } + } /* init the dynamic translator */ cpu_exec_init_all(tb_size * 1024 * 1024); From 679f4f8b178e7c66fbc2f39c905374ee8663d5d8 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Mon, 18 Jul 2011 06:07:02 +0000 Subject: [PATCH 9/9] xen: implement unplug protocol in xen_platform The unplug protocol is necessary to support PV drivers in the guest: the drivers expect to be able to "unplug" emulated disks and nics before initializing the Xen PV interfaces. It is responsibility of the guest to make sure that the unplug is done before the emulated devices or the PV interface start to be used. We use pci_for_each_device to walk the PCI bus, identify the devices and disks that we want to disable and dynamically unplug them. Changes in v2: - use PCI_CLASS constants; - replace pci_unplug_device with qdev_unplug; - do not import hw/ide/internal.h in xen_platform.c; Changes in v3: - introduce piix3-ide-xen, that support hot-unplug; - move the unplug code to hw/ide/piix.c; - just call qdev_unplug from xen_platform.c to unplug the IDE disks; Signed-off-by: Stefano Stabellini Acked-by: Kevin Wolf Signed-off-by: Alexander Graf --- hw/ide.h | 1 + hw/ide/piix.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ hw/pc_piix.c | 6 +++++- hw/xen_platform.c | 43 ++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 92 insertions(+), 2 deletions(-) diff --git a/hw/ide.h b/hw/ide.h index 34d9394bcc..a490cbb6c5 100644 --- a/hw/ide.h +++ b/hw/ide.h @@ -13,6 +13,7 @@ ISADevice *isa_ide_init(int iobase, int iobase2, int isairq, /* ide-pci.c */ void pci_cmd646_ide_init(PCIBus *bus, DriveInfo **hd_table, int secondary_ide_enabled); +PCIDevice *pci_piix3_xen_ide_init(PCIBus *bus, DriveInfo **hd_table, int devfn); PCIDevice *pci_piix3_ide_init(PCIBus *bus, DriveInfo **hd_table, int devfn); PCIDevice *pci_piix4_ide_init(PCIBus *bus, DriveInfo **hd_table, int devfn); void vt82c686b_ide_init(PCIBus *bus, DriveInfo **hd_table, int devfn); diff --git a/hw/ide/piix.c b/hw/ide/piix.c index 84f72b0a66..f527dbd57e 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c @@ -149,6 +149,42 @@ static int pci_piix_ide_initfn(PCIDevice *dev) return 0; } +static int pci_piix3_xen_ide_unplug(DeviceState *dev) +{ + PCIDevice *pci_dev; + PCIIDEState *pci_ide; + DriveInfo *di; + int i = 0; + + pci_dev = DO_UPCAST(PCIDevice, qdev, dev); + pci_ide = DO_UPCAST(PCIIDEState, dev, pci_dev); + + for (; i < 3; i++) { + di = drive_get_by_index(IF_IDE, i); + if (di != NULL && di->bdrv != NULL && !di->bdrv->removable) { + DeviceState *ds = bdrv_get_attached(di->bdrv); + if (ds) { + bdrv_detach(di->bdrv, ds); + } + bdrv_close(di->bdrv); + pci_ide->bus[di->bus].ifs[di->unit].bs = NULL; + drive_put_ref(di); + } + } + qdev_reset_all(&(pci_ide->dev.qdev)); + return 0; +} + +PCIDevice *pci_piix3_xen_ide_init(PCIBus *bus, DriveInfo **hd_table, int devfn) +{ + PCIDevice *dev; + + dev = pci_create_simple(bus, devfn, "piix3-ide-xen"); + dev->qdev.info->unplug = pci_piix3_xen_ide_unplug; + pci_ide_create_devs(dev, hd_table); + return dev; +} + /* hd_table must contain 4 block drivers */ /* NOTE: for the PIIX3, the IRQs and IOports are hardcoded */ PCIDevice *pci_piix3_ide_init(PCIBus *bus, DriveInfo **hd_table, int devfn) @@ -181,6 +217,14 @@ static PCIDeviceInfo piix_ide_info[] = { .vendor_id = PCI_VENDOR_ID_INTEL, .device_id = PCI_DEVICE_ID_INTEL_82371SB_1, .class_id = PCI_CLASS_STORAGE_IDE, + },{ + .qdev.name = "piix3-ide-xen", + .qdev.size = sizeof(PCIIDEState), + .qdev.no_user = 1, + .init = pci_piix_ide_initfn, + .vendor_id = PCI_VENDOR_ID_INTEL, + .device_id = PCI_DEVICE_ID_INTEL_82371SB_1, + .class_id = PCI_CLASS_STORAGE_IDE, },{ .qdev.name = "piix4-ide", .qdev.size = sizeof(PCIIDEState), diff --git a/hw/pc_piix.c b/hw/pc_piix.c index c5c16b4571..40b73ea25c 100644 --- a/hw/pc_piix.c +++ b/hw/pc_piix.c @@ -155,7 +155,11 @@ static void pc_init1(ram_addr_t ram_size, ide_drive_get(hd, MAX_IDE_BUS); if (pci_enabled) { PCIDevice *dev; - dev = pci_piix3_ide_init(pci_bus, hd, piix3_devfn + 1); + if (xen_enabled()) { + dev = pci_piix3_xen_ide_init(pci_bus, hd, piix3_devfn + 1); + } else { + dev = pci_piix3_ide_init(pci_bus, hd, piix3_devfn + 1); + } idebus[0] = qdev_get_child_bus(&dev->qdev, "ide.0"); idebus[1] = qdev_get_child_bus(&dev->qdev, "ide.1"); } else { diff --git a/hw/xen_platform.c b/hw/xen_platform.c index f43e175b4e..fb6be6a464 100644 --- a/hw/xen_platform.c +++ b/hw/xen_platform.c @@ -76,6 +76,35 @@ static void log_writeb(PCIXenPlatformState *s, char val) } /* Xen Platform, Fixed IOPort */ +#define UNPLUG_ALL_IDE_DISKS 1 +#define UNPLUG_ALL_NICS 2 +#define UNPLUG_AUX_IDE_DISKS 4 + +static void unplug_nic(PCIBus *b, PCIDevice *d) +{ + if (pci_get_word(d->config + PCI_CLASS_DEVICE) == + PCI_CLASS_NETWORK_ETHERNET) { + qdev_unplug(&(d->qdev)); + } +} + +static void pci_unplug_nics(PCIBus *bus) +{ + pci_for_each_device(bus, 0, unplug_nic); +} + +static void unplug_disks(PCIBus *b, PCIDevice *d) +{ + if (pci_get_word(d->config + PCI_CLASS_DEVICE) == + PCI_CLASS_STORAGE_IDE) { + qdev_unplug(&(d->qdev)); + } +} + +static void pci_unplug_disks(PCIBus *bus) +{ + pci_for_each_device(bus, 0, unplug_disks); +} static void platform_fixed_ioport_writew(void *opaque, uint32_t addr, uint32_t val) { @@ -83,10 +112,22 @@ static void platform_fixed_ioport_writew(void *opaque, uint32_t addr, uint32_t v switch (addr - XEN_PLATFORM_IOPORT) { case 0: - /* TODO: */ /* Unplug devices. Value is a bitmask of which devices to unplug, with bit 0 the IDE devices, bit 1 the network devices, and bit 2 the non-primary-master IDE devices. */ + if (val & UNPLUG_ALL_IDE_DISKS) { + DPRINTF("unplug disks\n"); + qemu_aio_flush(); + bdrv_flush_all(); + pci_unplug_disks(s->pci_dev.bus); + } + if (val & UNPLUG_ALL_NICS) { + DPRINTF("unplug nics\n"); + pci_unplug_nics(s->pci_dev.bus); + } + if (val & UNPLUG_AUX_IDE_DISKS) { + DPRINTF("unplug auxiliary disks not supported\n"); + } break; case 2: switch (val) {