qemu/hw/pci/pci_host.c
Hogan Wang 2ebc21216f hw/pci-host: save/restore pci host config register
The pci host config register is used to save PCI address for
read/write config data. If guest writes a value to config register,
and then QEMU pauses the vcpu to migrate, after the migration, the guest
will continue to write pci config data, and the write data will be ignored
because of new qemu process losing the config register state.

To trigger the bug:
1. guest is booting in seabios.
2. guest enables the SMRAM in seabios:piix4_apmc_smm_setup, and then
   expects to disable the SMRAM by pci_config_writeb.
3. after guest writes the pci host config register, QEMU pauses vcpu
   to finish migration.
4. guest write of config data(0x0A) fails to disable the SMRAM because
   the config register state is lost.
5. guest continues to boot and crashes in ipxe option ROM due to SMRAM
   in enabled state.

Example Reproducer:

step 1. Make modifications to seabios and qemu for increase reproduction
efficiency, write 0xf0 to 0x402 port notify qemu to stop vcpu after
0x0cf8 port wrote i440 configure register. qemu stop vcpu when catch
0x402 port wrote 0xf0.

seabios:/src/hw/pci.c
@@ -52,6 +52,11 @@ void pci_config_writeb(u16 bdf, u32 addr, u8 val)
         writeb(mmconfig_addr(bdf, addr), val);
     } else {
         outl(ioconfig_cmd(bdf, addr), PORT_PCI_CMD);
+       if (bdf == 0 && addr == 0x72 && val == 0xa) {
+            dprintf(1, "stop vcpu\n");
+            outb(0xf0, 0x402); // notify qemu to stop vcpu
+            dprintf(1, "resume vcpu\n");
+        }
         outb(val, PORT_PCI_DATA + (addr & 3));
     }
 }

qemu:hw/char/debugcon.c
@@ -60,6 +61,9 @@ static void debugcon_ioport_write(void *opaque, hwaddr addr, uint64_t val,
     printf(" [debugcon: write addr=0x%04" HWADDR_PRIx " val=0x%02" PRIx64 "]\n", addr, val);
 #endif

+    if (ch == 0xf0) {
+        vm_stop(RUN_STATE_PAUSED);
+    }
     /* XXX this blocks entire thread. Rewrite to use
      * qemu_chr_fe_write and background I/O callbacks */
     qemu_chr_fe_write_all(&s->chr, &ch, 1);

step 2. start vm1 by the following command line, and then vm stopped.
$ qemu-system-x86_64 -machine pc-i440fx-5.0,accel=kvm\
 -netdev tap,ifname=tap-test,id=hostnet0,vhost=on,downscript=no,script=no\
 -device virtio-net-pci,netdev=hostnet0,id=net0,bus=pci.0,addr=0x13,bootindex=3\
 -device cirrus-vga,id=video0,vgamem_mb=16,bus=pci.0,addr=0x2\
 -chardev file,id=seabios,path=/var/log/test.seabios,append=on\
 -device isa-debugcon,iobase=0x402,chardev=seabios\
 -monitor stdio

step 3. start vm2 to accept vm1 state.
$ qemu-system-x86_64 -machine pc-i440fx-5.0,accel=kvm\
 -netdev tap,ifname=tap-test1,id=hostnet0,vhost=on,downscript=no,script=no\
 -device virtio-net-pci,netdev=hostnet0,id=net0,bus=pci.0,addr=0x13,bootindex=3\
 -device cirrus-vga,id=video0,vgamem_mb=16,bus=pci.0,addr=0x2\
 -chardev file,id=seabios,path=/var/log/test.seabios,append=on\
 -device isa-debugcon,iobase=0x402,chardev=seabios\
 -monitor stdio \
 -incoming tcp:127.0.0.1:8000

step 4. execute the following qmp command in vm1 to migrate.
(qemu) migrate tcp:127.0.0.1:8000

step 5. execute the following qmp command in vm2 to resume vcpu.
(qemu) cont
Before this patch, we get KVM "emulation failure" error on vm2.
This patch fixes it.

Cc: qemu-stable@nongnu.org
Signed-off-by: Hogan Wang <hogan.wang@huawei.com>
Message-Id: <20200727084621.3279-1-hogan.wang@huawei.com>
Reported-by: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2020-07-27 10:24:39 -04:00

250 lines
6.9 KiB
C

/*
* pci_host.c
*
* Copyright (c) 2009 Isaku Yamahata <yamahata at valinux co jp>
* VA Linux Systems Japan K.K.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License along
* with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "hw/pci/pci.h"
#include "hw/pci/pci_bridge.h"
#include "hw/pci/pci_host.h"
#include "hw/qdev-properties.h"
#include "qemu/module.h"
#include "hw/pci/pci_bus.h"
#include "migration/vmstate.h"
#include "trace.h"
/* debug PCI */
//#define DEBUG_PCI
#ifdef DEBUG_PCI
#define PCI_DPRINTF(fmt, ...) \
do { printf("pci_host_data: " fmt , ## __VA_ARGS__); } while (0)
#else
#define PCI_DPRINTF(fmt, ...)
#endif
/*
* PCI address
* bit 16 - 24: bus number
* bit 8 - 15: devfun number
* bit 0 - 7: offset in configuration space of a given pci device
*/
/* the helper function to get a PCIDevice* for a given pci address */
static inline PCIDevice *pci_dev_find_by_addr(PCIBus *bus, uint32_t addr)
{
uint8_t bus_num = addr >> 16;
uint8_t devfn = addr >> 8;
return pci_find_device(bus, bus_num, devfn);
}
static void pci_adjust_config_limit(PCIBus *bus, uint32_t *limit)
{
if ((*limit > PCI_CONFIG_SPACE_SIZE) &&
!pci_bus_allows_extended_config_space(bus)) {
*limit = PCI_CONFIG_SPACE_SIZE;
}
}
void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
uint32_t limit, uint32_t val, uint32_t len)
{
pci_adjust_config_limit(pci_get_bus(pci_dev), &limit);
if (limit <= addr) {
return;
}
assert(len <= 4);
/* non-zero functions are only exposed when function 0 is present,
* allowing direct removal of unexposed functions.
*/
if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) {
return;
}
trace_pci_cfg_write(pci_dev->name, PCI_SLOT(pci_dev->devfn),
PCI_FUNC(pci_dev->devfn), addr, val);
pci_dev->config_write(pci_dev, addr, val, MIN(len, limit - addr));
}
uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
uint32_t limit, uint32_t len)
{
uint32_t ret;
pci_adjust_config_limit(pci_get_bus(pci_dev), &limit);
if (limit <= addr) {
return ~0x0;
}
assert(len <= 4);
/* non-zero functions are only exposed when function 0 is present,
* allowing direct removal of unexposed functions.
*/
if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) {
return ~0x0;
}
ret = pci_dev->config_read(pci_dev, addr, MIN(len, limit - addr));
trace_pci_cfg_read(pci_dev->name, PCI_SLOT(pci_dev->devfn),
PCI_FUNC(pci_dev->devfn), addr, ret);
return ret;
}
void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, unsigned len)
{
PCIDevice *pci_dev = pci_dev_find_by_addr(s, addr);
uint32_t config_addr = addr & (PCI_CONFIG_SPACE_SIZE - 1);
if (!pci_dev) {
return;
}
pci_host_config_write_common(pci_dev, config_addr, PCI_CONFIG_SPACE_SIZE,
val, len);
}
uint32_t pci_data_read(PCIBus *s, uint32_t addr, unsigned len)
{
PCIDevice *pci_dev = pci_dev_find_by_addr(s, addr);
uint32_t config_addr = addr & (PCI_CONFIG_SPACE_SIZE - 1);
if (!pci_dev) {
return ~0x0;
}
return pci_host_config_read_common(pci_dev, config_addr,
PCI_CONFIG_SPACE_SIZE, len);
}
static void pci_host_config_write(void *opaque, hwaddr addr,
uint64_t val, unsigned len)
{
PCIHostState *s = opaque;
PCI_DPRINTF("%s addr " TARGET_FMT_plx " len %d val %"PRIx64"\n",
__func__, addr, len, val);
if (addr != 0 || len != 4) {
return;
}
s->config_reg = val;
}
static uint64_t pci_host_config_read(void *opaque, hwaddr addr,
unsigned len)
{
PCIHostState *s = opaque;
uint32_t val = s->config_reg;
PCI_DPRINTF("%s addr " TARGET_FMT_plx " len %d val %"PRIx32"\n",
__func__, addr, len, val);
return val;
}
static void pci_host_data_write(void *opaque, hwaddr addr,
uint64_t val, unsigned len)
{
PCIHostState *s = opaque;
if (s->config_reg & (1u << 31))
pci_data_write(s->bus, s->config_reg | (addr & 3), val, len);
}
static uint64_t pci_host_data_read(void *opaque,
hwaddr addr, unsigned len)
{
PCIHostState *s = opaque;
if (!(s->config_reg & (1U << 31))) {
return 0xffffffff;
}
return pci_data_read(s->bus, s->config_reg | (addr & 3), len);
}
const MemoryRegionOps pci_host_conf_le_ops = {
.read = pci_host_config_read,
.write = pci_host_config_write,
.endianness = DEVICE_LITTLE_ENDIAN,
};
const MemoryRegionOps pci_host_conf_be_ops = {
.read = pci_host_config_read,
.write = pci_host_config_write,
.endianness = DEVICE_BIG_ENDIAN,
};
const MemoryRegionOps pci_host_data_le_ops = {
.read = pci_host_data_read,
.write = pci_host_data_write,
.endianness = DEVICE_LITTLE_ENDIAN,
};
const MemoryRegionOps pci_host_data_be_ops = {
.read = pci_host_data_read,
.write = pci_host_data_write,
.endianness = DEVICE_BIG_ENDIAN,
};
static bool pci_host_needed(void *opaque)
{
PCIHostState *s = opaque;
return s->mig_enabled;
}
const VMStateDescription vmstate_pcihost = {
.name = "PCIHost",
.needed = pci_host_needed,
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_UINT32(config_reg, PCIHostState),
VMSTATE_END_OF_LIST()
}
};
static Property pci_host_properties_common[] = {
DEFINE_PROP_BOOL("x-config-reg-migration-enabled", PCIHostState,
mig_enabled, true),
DEFINE_PROP_END_OF_LIST(),
};
static void pci_host_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
device_class_set_props(dc, pci_host_properties_common);
dc->vmsd = &vmstate_pcihost;
}
static const TypeInfo pci_host_type_info = {
.name = TYPE_PCI_HOST_BRIDGE,
.parent = TYPE_SYS_BUS_DEVICE,
.abstract = true,
.class_size = sizeof(PCIHostBridgeClass),
.instance_size = sizeof(PCIHostState),
.class_init = pci_host_class_init,
};
static void pci_host_register_types(void)
{
type_register_static(&pci_host_type_info);
}
type_init(pci_host_register_types)