2004-05-19 03:05:28 +04:00
|
|
|
/*
|
|
|
|
* QEMU PCI bus manager
|
|
|
|
*
|
|
|
|
* Copyright (c) 2004 Fabrice Bellard
|
2007-09-17 01:08:06 +04:00
|
|
|
*
|
2004-05-19 03:05:28 +04:00
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
|
|
* in the Software without restriction, including without limitation the rights
|
|
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
|
|
* furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be included in
|
|
|
|
* all copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
* THE SOFTWARE.
|
|
|
|
*/
|
2018-02-01 14:18:31 +03:00
|
|
|
|
2016-01-26 21:17:15 +03:00
|
|
|
#include "qemu/osdep.h"
|
2020-10-28 14:36:57 +03:00
|
|
|
#include "qemu/datadir.h"
|
2021-02-03 16:18:27 +03:00
|
|
|
#include "qemu/units.h"
|
2019-08-12 08:23:42 +03:00
|
|
|
#include "hw/irq.h"
|
2012-12-13 01:05:42 +04:00
|
|
|
#include "hw/pci/pci.h"
|
|
|
|
#include "hw/pci/pci_bridge.h"
|
2012-12-12 17:00:45 +04:00
|
|
|
#include "hw/pci/pci_bus.h"
|
2013-06-06 12:48:49 +04:00
|
|
|
#include "hw/pci/pci_host.h"
|
2019-08-12 08:23:51 +03:00
|
|
|
#include "hw/qdev-properties.h"
|
2020-12-12 01:05:12 +03:00
|
|
|
#include "hw/qdev-properties-system.h"
|
2019-08-12 08:23:39 +03:00
|
|
|
#include "migration/qemu-file-types.h"
|
2019-08-12 08:23:45 +03:00
|
|
|
#include "migration/vmstate.h"
|
2012-10-24 10:43:34 +04:00
|
|
|
#include "net/net.h"
|
2019-08-12 08:23:55 +03:00
|
|
|
#include "sysemu/numa.h"
|
pci: ROM preallocation for incoming migration
On incoming migration we have the following sequence to load option
ROM:
1. On device realize we do normal load ROM from the file
2. Than, on incoming migration we rewrite ROM from the incoming RAM
block. If sizes mismatch we fail, like this:
Size mismatch: 0000:00:03.0/virtio-net-pci.rom: 0x40000 != 0x80000: Invalid argument
This is not ideal when we migrate to updated distribution: we have to
keep old ROM files in new distribution and be careful around romfile
property to load correct ROM file. Which is loaded actually just to
allocate the ROM with correct length.
Note, that romsize property doesn't really help: if we try to specify
it when default romfile is larger, it fails with something like:
romfile "efi-virtio.rom" (160768 bytes) is too large for ROM size 65536
Let's just ignore ROM file when romsize is specified and we are in
incoming migration state. In other words, we need only to preallocate
ROM of specified size, local ROM file is unrelated.
This way:
If romsize was specified on source, we just use same commandline as on
source, and migration will work independently of local ROM files on
target.
If romsize was not specified on source (and we have mismatching local
ROM file on target host), we have to specify romsize on target to match
source romsize. romfile parameter may be kept same as on source or may
be dropped, the file is not loaded anyway.
As a bonus we avoid extra reading from ROM file on target.
Note: when we don't have romsize parameter on source command line and
need it for target, it may be calculated as aligned up to power of two
size of ROM file on source (if we know, which file is it) or,
alternatively it may be retrieved from source QEMU by QMP qom-get
command, like
{ "execute": "qom-get",
"arguments": {
"path": "/machine/peripheral/CARD_ID/virtio-net-pci.rom[0]",
"property": "size" } }
Note: we have extra initialization of size variable to zero in
pci_add_option_rom to avoid false-positive
"error: ‘size’ may be used uninitialized"
Suggested-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20230522201740.88960-2-vsementsov@yandex-team.ru>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2023-05-22 23:17:40 +03:00
|
|
|
#include "sysemu/runstate.h"
|
2019-08-12 08:23:57 +03:00
|
|
|
#include "sysemu/sysemu.h"
|
2012-12-13 01:05:42 +04:00
|
|
|
#include "hw/loader.h"
|
2015-03-17 20:29:20 +03:00
|
|
|
#include "qemu/error-report.h"
|
2012-12-17 21:20:00 +04:00
|
|
|
#include "qemu/range.h"
|
2015-01-16 22:20:51 +03:00
|
|
|
#include "trace.h"
|
2012-12-13 01:05:42 +04:00
|
|
|
#include "hw/pci/msi.h"
|
|
|
|
#include "hw/pci/msix.h"
|
2014-02-05 19:36:52 +04:00
|
|
|
#include "hw/hotplug.h"
|
2015-07-24 11:35:13 +03:00
|
|
|
#include "hw/boards.h"
|
2018-02-01 14:18:31 +03:00
|
|
|
#include "qapi/error.h"
|
2016-03-20 20:16:19 +03:00
|
|
|
#include "qemu/cutils.h"
|
2022-12-01 15:11:22 +03:00
|
|
|
#include "pci-internal.h"
|
2004-05-19 03:05:28 +04:00
|
|
|
|
2023-01-14 02:35:46 +03:00
|
|
|
#include "hw/xen/xen.h"
|
|
|
|
#include "hw/i386/kvm/xen_evtchn.h"
|
|
|
|
|
2004-05-19 03:05:28 +04:00
|
|
|
//#define DEBUG_PCI
|
2009-07-07 10:59:22 +04:00
|
|
|
#ifdef DEBUG_PCI
|
2009-10-30 15:20:55 +03:00
|
|
|
# define PCI_DPRINTF(format, ...) printf(format, ## __VA_ARGS__)
|
2009-07-07 10:59:22 +04:00
|
|
|
#else
|
|
|
|
# define PCI_DPRINTF(format, ...) do { } while (0)
|
|
|
|
#endif
|
2004-05-19 03:05:28 +04:00
|
|
|
|
2017-07-07 12:45:26 +03:00
|
|
|
bool pci_available = true;
|
|
|
|
|
2010-06-25 21:08:59 +04:00
|
|
|
static char *pcibus_get_dev_path(DeviceState *dev);
|
2010-12-08 14:35:01 +03:00
|
|
|
static char *pcibus_get_fw_dev_path(DeviceState *dev);
|
2013-12-06 20:54:27 +04:00
|
|
|
static void pcibus_reset(BusState *qbus);
|
2023-07-05 14:59:23 +03:00
|
|
|
static bool pcie_has_upstream_port(PCIDevice *dev);
|
2009-06-30 16:12:08 +04:00
|
|
|
|
2012-03-28 20:01:36 +04:00
|
|
|
static Property pci_props[] = {
|
|
|
|
DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
|
|
|
|
DEFINE_PROP_STRING("romfile", PCIDevice, romfile),
|
2021-02-03 16:18:28 +03:00
|
|
|
DEFINE_PROP_UINT32("romsize", PCIDevice, romsize, -1),
|
2012-03-28 20:01:36 +04:00
|
|
|
DEFINE_PROP_UINT32("rombar", PCIDevice, rom_bar, 1),
|
|
|
|
DEFINE_PROP_BIT("multifunction", PCIDevice, cap_present,
|
|
|
|
QEMU_PCI_CAP_MULTIFUNCTION_BITNR, false),
|
2016-07-19 23:16:19 +03:00
|
|
|
DEFINE_PROP_BIT("x-pcie-lnksta-dllla", PCIDevice, cap_present,
|
|
|
|
QEMU_PCIE_LNKSTA_DLLLA_BITNR, true),
|
2017-02-20 23:43:10 +03:00
|
|
|
DEFINE_PROP_BIT("x-pcie-extcap-init", PCIDevice, cap_present,
|
|
|
|
QEMU_PCIE_EXTCAP_INIT_BITNR, true),
|
2019-10-29 14:48:56 +03:00
|
|
|
DEFINE_PROP_STRING("failover_pair_id", PCIDevice,
|
|
|
|
failover_pair_id),
|
pci: introduce acpi-index property for PCI device
In x86/ACPI world, linux distros are using predictable
network interface naming since systemd v197. Which on
QEMU based VMs results into path based naming scheme,
that names network interfaces based on PCI topology.
With itm on has to plug NIC in exactly the same bus/slot,
which was used when disk image was first provisioned/configured
or one risks to loose network configuration due to NIC being
renamed to actually used topology.
That also restricts freedom to reshape PCI configuration of
VM without need to reconfigure used guest image.
systemd also offers "onboard" naming scheme which is
preferred over PCI slot/topology one, provided that
firmware implements:
"
PCI Firmware Specification 3.1
4.6.7. DSM for Naming a PCI or PCI Express Device Under
Operating Systems
"
that allows to assign user defined index to PCI device,
which systemd will use to name NIC. For example, using
-device e1000,acpi-index=100
guest will rename NIC to 'eno100', where 'eno' is default
prefix for "onboard" naming scheme. This doesn't require
any advance configuration on guest side to com in effect
at 'onboard' scheme takes priority over path based naming.
Hope is that 'acpi-index' it will be easier to consume by
management layer, compared to forcing specific PCI topology
and/or having several disk image templates for different
topologies and will help to simplify process of spawning
VM from the same template without need to reconfigure
guest NIC.
This patch adds, 'acpi-index'* property and wires up
a 32bit register on top of pci hotplug register block
to pass index value to AML code at runtime.
Following patch will add corresponding _DSM code and
wire it up to PCI devices described in ACPI.
*) name comes from linux kernel terminology
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <20210315180102.3008391-3-imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2021-03-15 21:00:58 +03:00
|
|
|
DEFINE_PROP_UINT32("acpi-index", PCIDevice, acpi_index, 0),
|
2023-05-03 03:27:02 +03:00
|
|
|
DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present,
|
|
|
|
QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
|
2023-07-10 18:38:36 +03:00
|
|
|
DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
|
|
|
|
QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
|
2012-03-28 20:01:36 +04:00
|
|
|
DEFINE_PROP_END_OF_LIST()
|
|
|
|
};
|
|
|
|
|
2013-11-26 02:48:42 +04:00
|
|
|
static const VMStateDescription vmstate_pcibus = {
|
|
|
|
.name = "PCIBUS",
|
|
|
|
.version_id = 1,
|
|
|
|
.minimum_version_id = 1,
|
2014-04-16 17:32:32 +04:00
|
|
|
.fields = (VMStateField[]) {
|
2017-06-23 17:48:23 +03:00
|
|
|
VMSTATE_INT32_EQUAL(nirq, PCIBus, NULL),
|
2013-11-26 02:48:42 +04:00
|
|
|
VMSTATE_VARRAY_INT32(irq_count, PCIBus,
|
|
|
|
nirq, 0, vmstate_info_int32,
|
|
|
|
int32_t),
|
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2023-03-02 19:15:38 +03:00
|
|
|
static gint g_cmp_uint32(gconstpointer a, gconstpointer b, gpointer user_data)
|
|
|
|
{
|
|
|
|
return a - b;
|
|
|
|
}
|
|
|
|
|
|
|
|
static GSequence *pci_acpi_index_list(void)
|
|
|
|
{
|
|
|
|
static GSequence *used_acpi_index_list;
|
|
|
|
|
|
|
|
if (!used_acpi_index_list) {
|
|
|
|
used_acpi_index_list = g_sequence_new(NULL);
|
|
|
|
}
|
|
|
|
return used_acpi_index_list;
|
|
|
|
}
|
|
|
|
|
2016-06-27 18:38:32 +03:00
|
|
|
static void pci_init_bus_master(PCIDevice *pci_dev)
|
|
|
|
{
|
|
|
|
AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
|
|
|
|
|
|
|
|
memory_region_init_alias(&pci_dev->bus_master_enable_region,
|
|
|
|
OBJECT(pci_dev), "bus master",
|
|
|
|
dma_as->root, 0, memory_region_size(dma_as->root));
|
|
|
|
memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
|
2017-03-13 06:29:58 +03:00
|
|
|
memory_region_add_subregion(&pci_dev->bus_master_container_region, 0,
|
|
|
|
&pci_dev->bus_master_enable_region);
|
2016-06-27 18:38:32 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static void pcibus_machine_done(Notifier *notifier, void *data)
|
|
|
|
{
|
|
|
|
PCIBus *bus = container_of(notifier, PCIBus, machine_done);
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
|
|
|
|
if (bus->devices[i]) {
|
|
|
|
pci_init_bus_master(bus->devices[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-11-26 02:48:42 +04:00
|
|
|
static void pci_bus_realize(BusState *qbus, Error **errp)
|
|
|
|
{
|
|
|
|
PCIBus *bus = PCI_BUS(qbus);
|
|
|
|
|
2016-06-27 18:38:32 +03:00
|
|
|
bus->machine_done.notify = pcibus_machine_done;
|
|
|
|
qemu_add_machine_init_done_notifier(&bus->machine_done);
|
|
|
|
|
2019-10-16 05:29:30 +03:00
|
|
|
vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_pcibus, bus);
|
2013-11-26 02:48:42 +04:00
|
|
|
}
|
|
|
|
|
2019-05-13 09:19:37 +03:00
|
|
|
static void pcie_bus_realize(BusState *qbus, Error **errp)
|
|
|
|
{
|
|
|
|
PCIBus *bus = PCI_BUS(qbus);
|
2021-02-01 18:37:00 +03:00
|
|
|
Error *local_err = NULL;
|
2019-05-13 09:19:37 +03:00
|
|
|
|
2021-02-01 18:37:00 +03:00
|
|
|
pci_bus_realize(qbus, &local_err);
|
|
|
|
if (local_err) {
|
|
|
|
error_propagate(errp, local_err);
|
|
|
|
return;
|
|
|
|
}
|
2019-05-13 09:19:37 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* A PCI-E bus can support extended config space if it's the root
|
|
|
|
* bus, or if the bus/bridge above it does as well
|
|
|
|
*/
|
|
|
|
if (pci_bus_is_root(bus)) {
|
|
|
|
bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE;
|
|
|
|
} else {
|
|
|
|
PCIBus *parent_bus = pci_get_bus(bus->parent_dev);
|
|
|
|
|
|
|
|
if (pci_bus_allows_extended_config_space(parent_bus)) {
|
|
|
|
bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
qdev: Unrealize must not fail
Devices may have component devices and buses.
Device realization may fail. Realization is recursive: a device's
realize() method realizes its components, and device_set_realized()
realizes its buses (which should in turn realize the devices on that
bus, except bus_set_realized() doesn't implement that, yet).
When realization of a component or bus fails, we need to roll back:
unrealize everything we realized so far. If any of these unrealizes
failed, the device would be left in an inconsistent state. Must not
happen.
device_set_realized() lets it happen: it ignores errors in the roll
back code starting at label child_realize_fail.
Since realization is recursive, unrealization must be recursive, too.
But how could a partly failed unrealize be rolled back? We'd have to
re-realize, which can fail. This design is fundamentally broken.
device_set_realized() does not roll back at all. Instead, it keeps
unrealizing, ignoring further errors.
It can screw up even for a device with no buses: if the lone
dc->unrealize() fails, it still unregisters vmstate, and calls
listeners' unrealize() callback.
bus_set_realized() does not roll back either. Instead, it stops
unrealizing.
Fortunately, no unrealize method can fail, as we'll see below.
To fix the design error, drop parameter @errp from all the unrealize
methods.
Any unrealize method that uses @errp now needs an update. This leads
us to unrealize() methods that can fail. Merely passing it to another
unrealize method cannot cause failure, though. Here are the ones that
do other things with @errp:
* virtio_serial_device_unrealize()
Fails when qbus_set_hotplug_handler() fails, but still does all the
other work. On failure, the device would stay realized with its
resources completely gone. Oops. Can't happen, because
qbus_set_hotplug_handler() can't actually fail here. Pass
&error_abort to qbus_set_hotplug_handler() instead.
* hw/ppc/spapr_drc.c's unrealize()
Fails when object_property_del() fails, but all the other work is
already done. On failure, the device would stay realized with its
vmstate registration gone. Oops. Can't happen, because
object_property_del() can't actually fail here. Pass &error_abort
to object_property_del() instead.
* spapr_phb_unrealize()
Fails and bails out when remove_drcs() fails, but other work is
already done. On failure, the device would stay realized with some
of its resources gone. Oops. remove_drcs() fails only when
chassis_from_bus()'s object_property_get_uint() fails, and it can't
here. Pass &error_abort to remove_drcs() instead.
Therefore, no unrealize method can fail before this patch.
device_set_realized()'s recursive unrealization via bus uses
object_property_set_bool(). Can't drop @errp there, so pass
&error_abort.
We similarly unrealize with object_property_set_bool() elsewhere,
always ignoring errors. Pass &error_abort instead.
Several unrealize methods no longer handle errors from other unrealize
methods: virtio_9p_device_unrealize(),
virtio_input_device_unrealize(), scsi_qdev_unrealize(), ...
Much of the deleted error handling looks wrong anyway.
One unrealize methods no longer ignore such errors:
usb_ehci_pci_exit().
Several realize methods no longer ignore errors when rolling back:
v9fs_device_realize_common(), pci_qdev_unrealize(),
spapr_phb_realize(), usb_qdev_realize(), vfio_ccw_realize(),
virtio_device_realize().
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20200505152926.18877-17-armbru@redhat.com>
2020-05-05 18:29:24 +03:00
|
|
|
static void pci_bus_unrealize(BusState *qbus)
|
2013-11-26 02:48:42 +04:00
|
|
|
{
|
|
|
|
PCIBus *bus = PCI_BUS(qbus);
|
|
|
|
|
2016-06-27 18:38:32 +03:00
|
|
|
qemu_remove_machine_init_done_notifier(&bus->machine_done);
|
|
|
|
|
2013-11-26 02:48:42 +04:00
|
|
|
vmstate_unregister(NULL, &vmstate_pcibus, bus);
|
|
|
|
}
|
|
|
|
|
2015-06-02 14:22:58 +03:00
|
|
|
static int pcibus_num(PCIBus *bus)
|
|
|
|
{
|
2019-04-24 07:19:58 +03:00
|
|
|
if (pci_bus_is_root(bus)) {
|
2015-06-02 14:22:58 +03:00
|
|
|
return 0; /* pci host bridge */
|
|
|
|
}
|
|
|
|
return bus->parent_dev->config[PCI_SECONDARY_BUS];
|
|
|
|
}
|
|
|
|
|
2015-06-02 14:23:09 +03:00
|
|
|
static uint16_t pcibus_numa_node(PCIBus *bus)
|
|
|
|
{
|
|
|
|
return NUMA_NODE_UNASSIGNED;
|
|
|
|
}
|
|
|
|
|
2012-05-02 11:00:20 +04:00
|
|
|
static void pci_bus_class_init(ObjectClass *klass, void *data)
|
|
|
|
{
|
|
|
|
BusClass *k = BUS_CLASS(klass);
|
2015-06-02 14:22:57 +03:00
|
|
|
PCIBusClass *pbc = PCI_BUS_CLASS(klass);
|
2012-05-02 11:00:20 +04:00
|
|
|
|
|
|
|
k->print_dev = pcibus_dev_print;
|
|
|
|
k->get_dev_path = pcibus_get_dev_path;
|
|
|
|
k->get_fw_dev_path = pcibus_get_fw_dev_path;
|
2013-11-26 02:48:42 +04:00
|
|
|
k->realize = pci_bus_realize;
|
|
|
|
k->unrealize = pci_bus_unrealize;
|
2012-05-02 11:00:20 +04:00
|
|
|
k->reset = pcibus_reset;
|
2015-06-02 14:22:57 +03:00
|
|
|
|
2015-06-02 14:22:58 +03:00
|
|
|
pbc->bus_num = pcibus_num;
|
2015-06-02 14:23:09 +03:00
|
|
|
pbc->numa_node = pcibus_numa_node;
|
2012-05-02 11:00:20 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static const TypeInfo pci_bus_info = {
|
|
|
|
.name = TYPE_PCI_BUS,
|
|
|
|
.parent = TYPE_BUS,
|
|
|
|
.instance_size = sizeof(PCIBus),
|
2015-06-02 14:22:57 +03:00
|
|
|
.class_size = sizeof(PCIBusClass),
|
2012-05-02 11:00:20 +04:00
|
|
|
.class_init = pci_bus_class_init,
|
2004-06-21 23:45:35 +04:00
|
|
|
};
|
2004-05-19 03:05:28 +04:00
|
|
|
|
2022-04-29 17:40:26 +03:00
|
|
|
static const TypeInfo cxl_interface_info = {
|
|
|
|
.name = INTERFACE_CXL_DEVICE,
|
|
|
|
.parent = TYPE_INTERFACE,
|
|
|
|
};
|
|
|
|
|
2017-09-27 22:56:31 +03:00
|
|
|
static const TypeInfo pcie_interface_info = {
|
|
|
|
.name = INTERFACE_PCIE_DEVICE,
|
|
|
|
.parent = TYPE_INTERFACE,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const TypeInfo conventional_pci_interface_info = {
|
|
|
|
.name = INTERFACE_CONVENTIONAL_PCI_DEVICE,
|
|
|
|
.parent = TYPE_INTERFACE,
|
|
|
|
};
|
|
|
|
|
2019-04-01 20:55:02 +03:00
|
|
|
static void pcie_bus_class_init(ObjectClass *klass, void *data)
|
|
|
|
{
|
2019-05-13 09:19:37 +03:00
|
|
|
BusClass *k = BUS_CLASS(klass);
|
2019-04-01 20:55:02 +03:00
|
|
|
|
2019-05-13 09:19:37 +03:00
|
|
|
k->realize = pcie_bus_realize;
|
2019-04-01 20:55:02 +03:00
|
|
|
}
|
|
|
|
|
2013-03-15 02:00:59 +04:00
|
|
|
static const TypeInfo pcie_bus_info = {
|
|
|
|
.name = TYPE_PCIE_BUS,
|
|
|
|
.parent = TYPE_PCI_BUS,
|
2019-04-01 20:55:02 +03:00
|
|
|
.class_init = pcie_bus_class_init,
|
2013-03-15 02:00:59 +04:00
|
|
|
};
|
|
|
|
|
2022-04-29 17:40:39 +03:00
|
|
|
static const TypeInfo cxl_bus_info = {
|
|
|
|
.name = TYPE_CXL_BUS,
|
|
|
|
.parent = TYPE_PCIE_BUS,
|
|
|
|
.class_init = pcie_bus_class_init,
|
|
|
|
};
|
|
|
|
|
2006-08-17 14:46:34 +04:00
|
|
|
static void pci_update_mappings(PCIDevice *d);
|
2013-10-07 11:36:35 +04:00
|
|
|
static void pci_irq_handler(void *opaque, int irq_num, int level);
|
2015-01-19 17:52:28 +03:00
|
|
|
static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, Error **);
|
2010-06-25 21:10:19 +04:00
|
|
|
static void pci_del_option_rom(PCIDevice *pdev);
|
2006-08-17 14:46:34 +04:00
|
|
|
|
2008-12-12 00:15:42 +03:00
|
|
|
static uint16_t pci_default_sub_vendor_id = PCI_SUBVENDOR_ID_REDHAT_QUMRANET;
|
|
|
|
static uint16_t pci_default_sub_device_id = PCI_SUBDEVICE_ID_QEMU;
|
2009-10-30 15:21:13 +03:00
|
|
|
|
2022-12-01 15:11:22 +03:00
|
|
|
PCIHostStateList pci_host_bridges;
|
2004-06-21 23:45:35 +04:00
|
|
|
|
2015-05-07 08:33:54 +03:00
|
|
|
int pci_bar(PCIDevice *d, int reg)
|
2009-09-16 14:40:57 +04:00
|
|
|
{
|
2009-10-30 15:21:05 +03:00
|
|
|
uint8_t type;
|
|
|
|
|
2022-02-17 20:44:50 +03:00
|
|
|
/* PCIe virtual functions do not have their own BARs */
|
|
|
|
assert(!pci_is_vf(d));
|
|
|
|
|
2009-10-30 15:21:05 +03:00
|
|
|
if (reg != PCI_ROM_SLOT)
|
|
|
|
return PCI_BASE_ADDRESS_0 + reg * 4;
|
|
|
|
|
|
|
|
type = d->config[PCI_HEADER_TYPE] & ~PCI_HEADER_TYPE_MULTI_FUNCTION;
|
|
|
|
return type == PCI_HEADER_TYPE_BRIDGE ? PCI_ROM_ADDRESS1 : PCI_ROM_ADDRESS;
|
2009-09-16 14:40:57 +04:00
|
|
|
}
|
|
|
|
|
2009-11-25 16:20:51 +03:00
|
|
|
static inline int pci_irq_state(PCIDevice *d, int irq_num)
|
|
|
|
{
|
2018-12-14 01:37:37 +03:00
|
|
|
return (d->irq_state >> irq_num) & 0x1;
|
2009-11-25 16:20:51 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void pci_set_irq_state(PCIDevice *d, int irq_num, int level)
|
|
|
|
{
|
2018-12-14 01:37:37 +03:00
|
|
|
d->irq_state &= ~(0x1 << irq_num);
|
|
|
|
d->irq_state |= level << irq_num;
|
2009-11-25 16:20:51 +03:00
|
|
|
}
|
|
|
|
|
2020-10-24 23:38:59 +03:00
|
|
|
static void pci_bus_change_irq_level(PCIBus *bus, int irq_num, int change)
|
|
|
|
{
|
2020-10-24 23:39:00 +03:00
|
|
|
assert(irq_num >= 0);
|
|
|
|
assert(irq_num < bus->nirq);
|
2020-10-24 23:38:59 +03:00
|
|
|
bus->irq_count[irq_num] += change;
|
|
|
|
bus->set_irq(bus->irq_opaque, irq_num, bus->irq_count[irq_num] != 0);
|
|
|
|
}
|
|
|
|
|
2009-11-25 16:20:51 +03:00
|
|
|
static void pci_change_irq_level(PCIDevice *pci_dev, int irq_num, int change)
|
|
|
|
{
|
|
|
|
PCIBus *bus;
|
|
|
|
for (;;) {
|
2023-02-11 18:22:39 +03:00
|
|
|
int dev_irq = irq_num;
|
2017-11-29 11:46:27 +03:00
|
|
|
bus = pci_get_bus(pci_dev);
|
2023-01-09 20:23:17 +03:00
|
|
|
assert(bus->map_irq);
|
2009-11-25 16:20:51 +03:00
|
|
|
irq_num = bus->map_irq(pci_dev, irq_num);
|
2023-02-11 18:22:39 +03:00
|
|
|
trace_pci_route_irq(dev_irq, DEVICE(pci_dev)->canonical_path, irq_num,
|
|
|
|
pci_bus_is_root(bus) ? "root-complex"
|
|
|
|
: DEVICE(bus->parent_dev)->canonical_path);
|
2009-11-25 16:20:51 +03:00
|
|
|
if (bus->set_irq)
|
|
|
|
break;
|
|
|
|
pci_dev = bus->parent_dev;
|
|
|
|
}
|
2020-10-24 23:38:59 +03:00
|
|
|
pci_bus_change_irq_level(bus, irq_num, change);
|
2009-11-25 16:20:51 +03:00
|
|
|
}
|
|
|
|
|
2011-04-01 15:43:21 +04:00
|
|
|
int pci_bus_get_irq_level(PCIBus *bus, int irq_num)
|
|
|
|
{
|
|
|
|
assert(irq_num >= 0);
|
|
|
|
assert(irq_num < bus->nirq);
|
|
|
|
return !!bus->irq_count[irq_num];
|
|
|
|
}
|
|
|
|
|
2009-11-25 16:44:40 +03:00
|
|
|
/* Update interrupt status bit in config space on interrupt
|
|
|
|
* state change. */
|
|
|
|
static void pci_update_irq_status(PCIDevice *dev)
|
|
|
|
{
|
|
|
|
if (dev->irq_state) {
|
|
|
|
dev->config[PCI_STATUS] |= PCI_STATUS_INTERRUPT;
|
|
|
|
} else {
|
|
|
|
dev->config[PCI_STATUS] &= ~PCI_STATUS_INTERRUPT;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-01-20 10:21:38 +03:00
|
|
|
void pci_device_deassert_intx(PCIDevice *dev)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < PCI_NUM_PINS; ++i) {
|
2013-10-07 11:36:35 +04:00
|
|
|
pci_irq_handler(dev, i, 0);
|
2011-01-20 10:21:38 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-06-13 23:26:33 +03:00
|
|
|
static void pci_msi_trigger(PCIDevice *dev, MSIMessage msg)
|
|
|
|
{
|
|
|
|
MemTxAttrs attrs = {};
|
|
|
|
|
2023-01-14 02:35:46 +03:00
|
|
|
/*
|
|
|
|
* Xen uses the high bits of the address to contain some of the bits
|
|
|
|
* of the PIRQ#. Therefore we can't just send the write cycle and
|
|
|
|
* trust that it's caught by the APIC at 0xfee00000 because the
|
|
|
|
* target of the write might be e.g. 0x0x1000fee46000 for PIRQ#4166.
|
|
|
|
* So we intercept the delivery here instead of in kvm_send_msi().
|
|
|
|
*/
|
|
|
|
if (xen_mode == XEN_EMULATE &&
|
|
|
|
xen_evtchn_deliver_pirq_msi(msg.address, msg.data)) {
|
|
|
|
return;
|
|
|
|
}
|
2022-06-13 23:26:33 +03:00
|
|
|
attrs.requester_id = pci_requester_id(dev);
|
|
|
|
address_space_stl_le(&dev->bus_master_as, msg.address, msg.data,
|
|
|
|
attrs, NULL);
|
|
|
|
}
|
|
|
|
|
2022-02-17 20:44:50 +03:00
|
|
|
static void pci_reset_regions(PCIDevice *dev)
|
2009-09-16 14:40:57 +04:00
|
|
|
{
|
2009-09-16 14:41:09 +04:00
|
|
|
int r;
|
2022-02-17 20:44:50 +03:00
|
|
|
if (pci_is_vf(dev)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (r = 0; r < PCI_NUM_REGIONS; ++r) {
|
|
|
|
PCIIORegion *region = &dev->io_regions[r];
|
|
|
|
if (!region->size) {
|
|
|
|
continue;
|
|
|
|
}
|
2011-12-04 21:40:58 +04:00
|
|
|
|
2022-02-17 20:44:50 +03:00
|
|
|
if (!(region->type & PCI_BASE_ADDRESS_SPACE_IO) &&
|
|
|
|
region->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
|
|
|
|
pci_set_quad(dev->config + pci_bar(dev, r), region->type);
|
|
|
|
} else {
|
|
|
|
pci_set_long(dev->config + pci_bar(dev, r), region->type);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void pci_do_device_reset(PCIDevice *dev)
|
|
|
|
{
|
2011-01-20 10:21:38 +03:00
|
|
|
pci_device_deassert_intx(dev);
|
2014-03-31 22:31:44 +04:00
|
|
|
assert(dev->irq_state == 0);
|
|
|
|
|
2011-04-26 12:29:36 +04:00
|
|
|
/* Clear all writable bits */
|
2010-10-19 13:06:30 +04:00
|
|
|
pci_word_test_and_clear_mask(dev->config + PCI_COMMAND,
|
2010-10-27 18:01:25 +04:00
|
|
|
pci_get_word(dev->wmask + PCI_COMMAND) |
|
|
|
|
pci_get_word(dev->w1cmask + PCI_COMMAND));
|
2010-11-16 11:26:07 +03:00
|
|
|
pci_word_test_and_clear_mask(dev->config + PCI_STATUS,
|
|
|
|
pci_get_word(dev->wmask + PCI_STATUS) |
|
|
|
|
pci_get_word(dev->w1cmask + PCI_STATUS));
|
2020-03-13 11:24:40 +03:00
|
|
|
/* Some devices make bits of PCI_INTERRUPT_LINE read only */
|
|
|
|
pci_byte_test_and_clear_mask(dev->config + PCI_INTERRUPT_LINE,
|
|
|
|
pci_get_word(dev->wmask + PCI_INTERRUPT_LINE) |
|
|
|
|
pci_get_word(dev->w1cmask + PCI_INTERRUPT_LINE));
|
2009-09-16 14:41:09 +04:00
|
|
|
dev->config[PCI_CACHE_LINE_SIZE] = 0x0;
|
2022-02-17 20:44:50 +03:00
|
|
|
pci_reset_regions(dev);
|
2009-09-16 14:41:09 +04:00
|
|
|
pci_update_mappings(dev);
|
2012-05-16 03:09:56 +04:00
|
|
|
|
|
|
|
msi_reset(dev);
|
|
|
|
msix_reset(dev);
|
2009-09-16 14:40:57 +04:00
|
|
|
}
|
|
|
|
|
2013-12-06 20:54:27 +04:00
|
|
|
/*
|
|
|
|
* This function is called on #RST and FLR.
|
|
|
|
* FLR if PCI_EXP_DEVCTL_BCR_FLR is set
|
|
|
|
*/
|
|
|
|
void pci_device_reset(PCIDevice *dev)
|
|
|
|
{
|
2022-12-16 18:55:26 +03:00
|
|
|
device_cold_reset(&dev->qdev);
|
2013-12-06 20:54:27 +04:00
|
|
|
pci_do_device_reset(dev);
|
|
|
|
}
|
|
|
|
|
2010-11-19 12:56:02 +03:00
|
|
|
/*
|
|
|
|
* Trigger pci bus reset under a given bus.
|
2022-12-16 18:55:26 +03:00
|
|
|
* Called via bus_cold_reset on RST# assert, after the devices
|
|
|
|
* have been reset device_cold_reset-ed already.
|
2010-11-19 12:56:02 +03:00
|
|
|
*/
|
2013-12-06 20:54:27 +04:00
|
|
|
static void pcibus_reset(BusState *qbus)
|
2009-06-17 20:32:00 +04:00
|
|
|
{
|
2013-12-06 20:54:24 +04:00
|
|
|
PCIBus *bus = DO_UPCAST(PCIBus, qbus, qbus);
|
2009-06-17 20:32:00 +04:00
|
|
|
int i;
|
|
|
|
|
2009-09-16 14:40:57 +04:00
|
|
|
for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
|
|
|
|
if (bus->devices[i]) {
|
2013-12-06 20:54:27 +04:00
|
|
|
pci_do_device_reset(bus->devices[i]);
|
2009-09-16 14:40:57 +04:00
|
|
|
}
|
2009-06-17 20:32:00 +04:00
|
|
|
}
|
2010-11-19 12:56:02 +03:00
|
|
|
|
2013-12-06 20:54:25 +04:00
|
|
|
for (i = 0; i < bus->nirq; i++) {
|
|
|
|
assert(bus->irq_count[i] == 0);
|
|
|
|
}
|
2010-11-19 12:56:02 +03:00
|
|
|
}
|
|
|
|
|
2016-01-27 13:29:01 +03:00
|
|
|
static void pci_host_bus_register(DeviceState *host)
|
2009-10-30 15:21:13 +03:00
|
|
|
{
|
2016-01-27 13:29:01 +03:00
|
|
|
PCIHostState *host_bridge = PCI_HOST_BRIDGE(host);
|
2013-06-06 12:48:54 +04:00
|
|
|
|
|
|
|
QLIST_INSERT_HEAD(&pci_host_bridges, host_bridge, next);
|
2009-10-30 15:21:13 +03:00
|
|
|
}
|
|
|
|
|
2018-12-21 03:35:30 +03:00
|
|
|
static void pci_host_bus_unregister(DeviceState *host)
|
|
|
|
{
|
|
|
|
PCIHostState *host_bridge = PCI_HOST_BRIDGE(host);
|
|
|
|
|
|
|
|
QLIST_REMOVE(host_bridge, next);
|
|
|
|
}
|
|
|
|
|
2013-06-06 12:48:48 +04:00
|
|
|
PCIBus *pci_device_root_bus(const PCIDevice *d)
|
2010-05-28 13:30:46 +04:00
|
|
|
{
|
2017-11-29 11:46:27 +03:00
|
|
|
PCIBus *bus = pci_get_bus(d);
|
2010-05-28 13:30:46 +04:00
|
|
|
|
2015-06-02 14:22:57 +03:00
|
|
|
while (!pci_bus_is_root(bus)) {
|
|
|
|
d = bus->parent_dev;
|
|
|
|
assert(d != NULL);
|
|
|
|
|
2017-11-29 11:46:27 +03:00
|
|
|
bus = pci_get_bus(d);
|
2010-05-28 13:30:46 +04:00
|
|
|
}
|
|
|
|
|
2013-06-06 12:48:48 +04:00
|
|
|
return bus;
|
|
|
|
}
|
|
|
|
|
2013-06-06 12:48:49 +04:00
|
|
|
const char *pci_root_bus_path(PCIDevice *dev)
|
2013-06-06 12:48:48 +04:00
|
|
|
{
|
2013-06-06 12:48:49 +04:00
|
|
|
PCIBus *rootbus = pci_device_root_bus(dev);
|
|
|
|
PCIHostState *host_bridge = PCI_HOST_BRIDGE(rootbus->qbus.parent);
|
|
|
|
PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_GET_CLASS(host_bridge);
|
2013-06-06 12:48:48 +04:00
|
|
|
|
2013-06-06 12:48:49 +04:00
|
|
|
assert(host_bridge->bus == rootbus);
|
|
|
|
|
|
|
|
if (hc->root_bus_path) {
|
|
|
|
return (*hc->root_bus_path)(host_bridge, rootbus);
|
2010-05-28 13:30:46 +04:00
|
|
|
}
|
|
|
|
|
2013-06-06 12:48:49 +04:00
|
|
|
return rootbus->qbus.name;
|
2010-05-28 13:30:46 +04:00
|
|
|
}
|
|
|
|
|
2021-07-08 15:55:11 +03:00
|
|
|
bool pci_bus_bypass_iommu(PCIBus *bus)
|
|
|
|
{
|
|
|
|
PCIBus *rootbus = bus;
|
|
|
|
PCIHostState *host_bridge;
|
|
|
|
|
|
|
|
if (!pci_bus_is_root(bus)) {
|
|
|
|
rootbus = pci_device_root_bus(bus->parent_dev);
|
|
|
|
}
|
|
|
|
|
|
|
|
host_bridge = PCI_HOST_BRIDGE(rootbus->qbus.parent);
|
|
|
|
|
|
|
|
assert(host_bridge->bus == rootbus);
|
|
|
|
|
|
|
|
return host_bridge->bypass_iommu;
|
|
|
|
}
|
|
|
|
|
2021-09-23 15:11:50 +03:00
|
|
|
static void pci_root_bus_internal_init(PCIBus *bus, DeviceState *parent,
|
|
|
|
MemoryRegion *address_space_mem,
|
|
|
|
MemoryRegion *address_space_io,
|
|
|
|
uint8_t devfn_min)
|
2004-06-21 23:45:35 +04:00
|
|
|
{
|
2010-06-23 11:15:26 +04:00
|
|
|
assert(PCI_FUNC(devfn_min) == 0);
|
2006-05-13 20:11:23 +04:00
|
|
|
bus->devfn_min = devfn_min;
|
2017-07-16 23:27:34 +03:00
|
|
|
bus->slot_reserved_mask = 0x0;
|
2011-08-08 17:09:05 +04:00
|
|
|
bus->address_space_mem = address_space_mem;
|
|
|
|
bus->address_space_io = address_space_io;
|
2019-04-24 07:19:58 +03:00
|
|
|
bus->flags |= PCI_BUS_IS_ROOT;
|
2009-10-30 15:21:13 +03:00
|
|
|
|
|
|
|
/* host bridge */
|
|
|
|
QLIST_INIT(&bus->child);
|
2013-06-06 12:48:53 +04:00
|
|
|
|
2016-01-27 13:29:01 +03:00
|
|
|
pci_host_bus_register(parent);
|
2009-09-17 00:25:31 +04:00
|
|
|
}
|
|
|
|
|
2018-12-21 03:35:30 +03:00
|
|
|
static void pci_bus_uninit(PCIBus *bus)
|
|
|
|
{
|
|
|
|
pci_host_bus_unregister(BUS(bus)->parent);
|
|
|
|
}
|
|
|
|
|
2023-01-12 17:03:02 +03:00
|
|
|
bool pci_bus_is_express(const PCIBus *bus)
|
2013-03-15 02:01:23 +04:00
|
|
|
{
|
|
|
|
return object_dynamic_cast(OBJECT(bus), TYPE_PCIE_BUS);
|
|
|
|
}
|
|
|
|
|
2021-09-23 15:11:50 +03:00
|
|
|
void pci_root_bus_init(PCIBus *bus, size_t bus_size, DeviceState *parent,
|
|
|
|
const char *name,
|
|
|
|
MemoryRegion *address_space_mem,
|
|
|
|
MemoryRegion *address_space_io,
|
|
|
|
uint8_t devfn_min, const char *typename)
|
2013-01-25 17:12:29 +04:00
|
|
|
{
|
2021-09-23 15:11:51 +03:00
|
|
|
qbus_init(bus, bus_size, typename, parent, name);
|
2021-09-23 15:11:50 +03:00
|
|
|
pci_root_bus_internal_init(bus, parent, address_space_mem,
|
|
|
|
address_space_io, devfn_min);
|
2013-01-25 17:12:29 +04:00
|
|
|
}
|
|
|
|
|
2017-11-29 11:46:22 +03:00
|
|
|
PCIBus *pci_root_bus_new(DeviceState *parent, const char *name,
|
|
|
|
MemoryRegion *address_space_mem,
|
|
|
|
MemoryRegion *address_space_io,
|
|
|
|
uint8_t devfn_min, const char *typename)
|
2009-09-17 00:25:31 +04:00
|
|
|
{
|
|
|
|
PCIBus *bus;
|
|
|
|
|
2021-09-23 15:11:52 +03:00
|
|
|
bus = PCI_BUS(qbus_new(typename, parent, name));
|
2021-09-23 15:11:50 +03:00
|
|
|
pci_root_bus_internal_init(bus, parent, address_space_mem,
|
|
|
|
address_space_io, devfn_min);
|
2009-09-17 00:25:31 +04:00
|
|
|
return bus;
|
|
|
|
}
|
|
|
|
|
2018-12-21 03:35:30 +03:00
|
|
|
void pci_root_bus_cleanup(PCIBus *bus)
|
|
|
|
{
|
|
|
|
pci_bus_uninit(bus);
|
qdev: Let the hotplug_handler_unplug() caller delete the device
When unplugging a device, at one point the device will be destroyed
via object_unparent(). This will, one the one hand, unrealize the
removed device hierarchy, and on the other hand, destroy/free the
device hierarchy.
When chaining hotplug handlers, we want to overwrite a bus hotplug
handler by the machine hotplug handler, to be able to perform
some part of the plug/unplug and to forward the calls to the bus hotplug
handler.
For now, the bus hotplug handler would trigger an object_unparent(), not
allowing us to perform some unplug action on a device after we forwarded
the call to the bus hotplug handler. The device would be gone at that
point.
machine_unplug_handler(dev)
/* eventually do unplug stuff */
bus_unplug_handler(dev)
/* dev is gone, we can't do more unplug stuff */
So move the object_unparent() to the original caller of the unplug. For
now, keep the unrealize() at the original places of the
object_unparent(). For implicitly chained hotplug handlers (e.g. pc
code calling acpi hotplug handlers), the object_unparent() has to be
done by the outermost caller. So when calling hotplug_handler_unplug()
from inside an unplug handler, nothing is to be done.
hotplug_handler_unplug(dev) -> calls machine_unplug_handler()
machine_unplug_handler(dev) {
/* eventually do unplug stuff */
bus_unplug_handler(dev) -> calls unrealize(dev)
/* we can do more unplug stuff but device already unrealized */
}
object_unparent(dev)
In the long run, every unplug action should be factored out of the
unrealize() function into the unplug handler (especially for PCI). Then
we can get rid of the additonal unrealize() calls and object_unparent()
will properly unrealize the device hierarchy after the device has been
unplugged.
hotplug_handler_unplug(dev) -> calls machine_unplug_handler()
machine_unplug_handler(dev) {
/* eventually do unplug stuff */
bus_unplug_handler(dev) -> only unplugs, does not unrealize
/* we can do more unplug stuff */
}
object_unparent(dev) -> will unrealize
The original approach was suggested by Igor Mammedov for the PCI
part, but I extended it to all hotplug handlers. I consider this one
step into the right direction.
To summarize:
- object_unparent() on synchronous unplugs is done by common code
-- "Caller of hotplug_handler_unplug"
- object_unparent() on asynchronous unplugs ("unplug requests") has to
be done manually
-- "Caller of hotplug_handler_unplug"
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Acked-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Message-Id: <20190228122849.4296-2-david@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2019-02-28 15:28:47 +03:00
|
|
|
/* the caller of the unplug hotplug handler will delete this device */
|
2020-06-10 08:31:55 +03:00
|
|
|
qbus_unrealize(BUS(bus));
|
2018-12-21 03:35:30 +03:00
|
|
|
}
|
|
|
|
|
2023-01-09 20:23:17 +03:00
|
|
|
void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq,
|
2009-09-17 00:25:31 +04:00
|
|
|
void *irq_opaque, int nirq)
|
|
|
|
{
|
|
|
|
bus->set_irq = set_irq;
|
|
|
|
bus->irq_opaque = irq_opaque;
|
|
|
|
bus->nirq = nirq;
|
2023-04-03 10:41:19 +03:00
|
|
|
g_free(bus->irq_count);
|
2011-08-21 07:09:37 +04:00
|
|
|
bus->irq_count = g_malloc0(nirq * sizeof(bus->irq_count[0]));
|
2009-09-17 00:25:31 +04:00
|
|
|
}
|
|
|
|
|
2023-01-09 20:23:17 +03:00
|
|
|
void pci_bus_map_irqs(PCIBus *bus, pci_map_irq_fn map_irq)
|
|
|
|
{
|
|
|
|
bus->map_irq = map_irq;
|
|
|
|
}
|
|
|
|
|
2018-12-21 03:35:30 +03:00
|
|
|
void pci_bus_irqs_cleanup(PCIBus *bus)
|
|
|
|
{
|
|
|
|
bus->set_irq = NULL;
|
|
|
|
bus->map_irq = NULL;
|
|
|
|
bus->irq_opaque = NULL;
|
|
|
|
bus->nirq = 0;
|
|
|
|
g_free(bus->irq_count);
|
2023-04-03 10:41:19 +03:00
|
|
|
bus->irq_count = NULL;
|
2018-12-21 03:35:30 +03:00
|
|
|
}
|
|
|
|
|
2017-11-29 11:46:22 +03:00
|
|
|
PCIBus *pci_register_root_bus(DeviceState *parent, const char *name,
|
|
|
|
pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
|
|
|
|
void *irq_opaque,
|
|
|
|
MemoryRegion *address_space_mem,
|
|
|
|
MemoryRegion *address_space_io,
|
|
|
|
uint8_t devfn_min, int nirq,
|
|
|
|
const char *typename)
|
2009-09-17 00:25:31 +04:00
|
|
|
{
|
|
|
|
PCIBus *bus;
|
|
|
|
|
2017-11-29 11:46:22 +03:00
|
|
|
bus = pci_root_bus_new(parent, name, address_space_mem,
|
|
|
|
address_space_io, devfn_min, typename);
|
2023-01-09 20:23:17 +03:00
|
|
|
pci_bus_irqs(bus, set_irq, irq_opaque, nirq);
|
|
|
|
pci_bus_map_irqs(bus, map_irq);
|
2004-06-21 23:45:35 +04:00
|
|
|
return bus;
|
|
|
|
}
|
2004-05-19 03:05:28 +04:00
|
|
|
|
2018-12-21 03:35:30 +03:00
|
|
|
void pci_unregister_root_bus(PCIBus *bus)
|
|
|
|
{
|
|
|
|
pci_bus_irqs_cleanup(bus);
|
|
|
|
pci_root_bus_cleanup(bus);
|
|
|
|
}
|
|
|
|
|
2006-05-13 20:11:23 +04:00
|
|
|
int pci_bus_num(PCIBus *s)
|
|
|
|
{
|
2015-06-02 14:22:58 +03:00
|
|
|
return PCI_BUS_GET_CLASS(s)->bus_num(s);
|
2006-05-13 20:11:23 +04:00
|
|
|
}
|
|
|
|
|
2021-07-08 15:55:15 +03:00
|
|
|
/* Returns the min and max bus numbers of a PCI bus hierarchy */
|
|
|
|
void pci_bus_range(PCIBus *bus, int *min_bus, int *max_bus)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
*min_bus = *max_bus = pci_bus_num(bus);
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
|
|
|
|
PCIDevice *dev = bus->devices[i];
|
|
|
|
|
2022-11-29 13:13:41 +03:00
|
|
|
if (dev && IS_PCI_BRIDGE(dev)) {
|
2021-07-08 15:55:15 +03:00
|
|
|
*min_bus = MIN(*min_bus, dev->config[PCI_SECONDARY_BUS]);
|
|
|
|
*max_bus = MAX(*max_bus, dev->config[PCI_SUBORDINATE_BUS]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-06-02 14:23:09 +03:00
|
|
|
int pci_bus_numa_node(PCIBus *bus)
|
|
|
|
{
|
|
|
|
return PCI_BUS_GET_CLASS(bus)->numa_node(bus);
|
2006-05-13 20:11:23 +04:00
|
|
|
}
|
|
|
|
|
2017-01-19 22:00:50 +03:00
|
|
|
static int get_pci_config_device(QEMUFile *f, void *pv, size_t size,
|
2018-11-14 16:29:30 +03:00
|
|
|
const VMStateField *field)
|
2004-10-03 17:56:00 +04:00
|
|
|
{
|
2009-08-20 21:42:39 +04:00
|
|
|
PCIDevice *s = container_of(pv, PCIDevice, config);
|
2009-10-30 15:21:18 +03:00
|
|
|
uint8_t *config;
|
2007-12-10 02:56:13 +03:00
|
|
|
int i;
|
|
|
|
|
2009-10-30 15:21:18 +03:00
|
|
|
assert(size == pci_config_size(s));
|
2011-08-21 07:09:37 +04:00
|
|
|
config = g_malloc(size);
|
2009-10-30 15:21:18 +03:00
|
|
|
|
|
|
|
qemu_get_buffer(f, config, size);
|
|
|
|
for (i = 0; i < size; ++i) {
|
2010-10-27 18:01:25 +04:00
|
|
|
if ((config[i] ^ s->config[i]) &
|
|
|
|
s->cmask[i] & ~s->wmask[i] & ~s->w1cmask[i]) {
|
2015-06-03 19:58:01 +03:00
|
|
|
error_report("%s: Bad config data: i=0x%x read: %x device: %x "
|
|
|
|
"cmask: %x wmask: %x w1cmask:%x", __func__,
|
|
|
|
i, config[i], s->config[i],
|
|
|
|
s->cmask[i], s->wmask[i], s->w1cmask[i]);
|
2011-08-21 07:09:37 +04:00
|
|
|
g_free(config);
|
2009-06-21 20:49:40 +04:00
|
|
|
return -EINVAL;
|
2009-10-30 15:21:18 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
memcpy(s->config, config, size);
|
2009-06-21 20:49:40 +04:00
|
|
|
|
2006-08-17 14:46:34 +04:00
|
|
|
pci_update_mappings(s);
|
2022-11-29 13:13:41 +03:00
|
|
|
if (IS_PCI_BRIDGE(s)) {
|
|
|
|
pci_bridge_update_mappings(PCI_BRIDGE(s));
|
2013-07-09 19:40:02 +04:00
|
|
|
}
|
2007-12-10 02:56:13 +03:00
|
|
|
|
2012-11-08 18:54:05 +04:00
|
|
|
memory_region_set_enabled(&s->bus_master_enable_region,
|
|
|
|
pci_get_word(s->config + PCI_COMMAND)
|
|
|
|
& PCI_COMMAND_MASTER);
|
|
|
|
|
2011-08-21 07:09:37 +04:00
|
|
|
g_free(config);
|
2004-10-03 17:56:00 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-08-20 21:42:39 +04:00
|
|
|
/* just put buffer */
|
2017-01-19 22:00:50 +03:00
|
|
|
static int put_pci_config_device(QEMUFile *f, void *pv, size_t size,
|
2020-12-11 20:11:48 +03:00
|
|
|
const VMStateField *field, JSONWriter *vmdesc)
|
2009-08-20 21:42:39 +04:00
|
|
|
{
|
2009-11-12 02:39:14 +03:00
|
|
|
const uint8_t **v = pv;
|
2009-10-30 15:21:18 +03:00
|
|
|
assert(size == pci_config_size(container_of(pv, PCIDevice, config)));
|
2009-11-12 02:39:14 +03:00
|
|
|
qemu_put_buffer(f, *v, size);
|
2017-01-19 22:00:50 +03:00
|
|
|
|
|
|
|
return 0;
|
2009-08-20 21:42:39 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static VMStateInfo vmstate_info_pci_config = {
|
|
|
|
.name = "pci config",
|
|
|
|
.get = get_pci_config_device,
|
|
|
|
.put = put_pci_config_device,
|
|
|
|
};
|
|
|
|
|
2017-01-19 22:00:50 +03:00
|
|
|
static int get_pci_irq_state(QEMUFile *f, void *pv, size_t size,
|
2018-11-14 16:29:30 +03:00
|
|
|
const VMStateField *field)
|
2009-11-25 16:20:51 +03:00
|
|
|
{
|
2010-05-09 20:15:16 +04:00
|
|
|
PCIDevice *s = container_of(pv, PCIDevice, irq_state);
|
2009-11-25 16:20:51 +03:00
|
|
|
uint32_t irq_state[PCI_NUM_PINS];
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < PCI_NUM_PINS; ++i) {
|
|
|
|
irq_state[i] = qemu_get_be32(f);
|
|
|
|
if (irq_state[i] != 0x1 && irq_state[i] != 0) {
|
|
|
|
fprintf(stderr, "irq state %d: must be 0 or 1.\n",
|
|
|
|
irq_state[i]);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < PCI_NUM_PINS; ++i) {
|
|
|
|
pci_set_irq_state(s, i, irq_state[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-01-19 22:00:50 +03:00
|
|
|
static int put_pci_irq_state(QEMUFile *f, void *pv, size_t size,
|
2020-12-11 20:11:48 +03:00
|
|
|
const VMStateField *field, JSONWriter *vmdesc)
|
2009-11-25 16:20:51 +03:00
|
|
|
{
|
|
|
|
int i;
|
2010-05-09 20:15:16 +04:00
|
|
|
PCIDevice *s = container_of(pv, PCIDevice, irq_state);
|
2009-11-25 16:20:51 +03:00
|
|
|
|
|
|
|
for (i = 0; i < PCI_NUM_PINS; ++i) {
|
|
|
|
qemu_put_be32(f, pci_irq_state(s, i));
|
|
|
|
}
|
2017-01-19 22:00:50 +03:00
|
|
|
|
|
|
|
return 0;
|
2009-11-25 16:20:51 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static VMStateInfo vmstate_info_pci_irq_state = {
|
|
|
|
.name = "pci irq state",
|
|
|
|
.get = get_pci_irq_state,
|
|
|
|
.put = put_pci_irq_state,
|
|
|
|
};
|
|
|
|
|
2016-12-14 22:58:29 +03:00
|
|
|
static bool migrate_is_pcie(void *opaque, int version_id)
|
|
|
|
{
|
|
|
|
return pci_is_express((PCIDevice *)opaque);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool migrate_is_not_pcie(void *opaque, int version_id)
|
|
|
|
{
|
|
|
|
return !pci_is_express((PCIDevice *)opaque);
|
|
|
|
}
|
|
|
|
|
2009-08-20 21:42:39 +04:00
|
|
|
const VMStateDescription vmstate_pci_device = {
|
|
|
|
.name = "PCIDevice",
|
|
|
|
.version_id = 2,
|
|
|
|
.minimum_version_id = 1,
|
2014-04-16 17:32:32 +04:00
|
|
|
.fields = (VMStateField[]) {
|
2014-04-03 20:52:21 +04:00
|
|
|
VMSTATE_INT32_POSITIVE_LE(version_id, PCIDevice),
|
2016-12-14 22:58:29 +03:00
|
|
|
VMSTATE_BUFFER_UNSAFE_INFO_TEST(config, PCIDevice,
|
|
|
|
migrate_is_not_pcie,
|
|
|
|
0, vmstate_info_pci_config,
|
2009-10-30 15:21:18 +03:00
|
|
|
PCI_CONFIG_SPACE_SIZE),
|
2016-12-14 22:58:29 +03:00
|
|
|
VMSTATE_BUFFER_UNSAFE_INFO_TEST(config, PCIDevice,
|
|
|
|
migrate_is_pcie,
|
|
|
|
0, vmstate_info_pci_config,
|
2009-10-30 15:21:18 +03:00
|
|
|
PCIE_CONFIG_SPACE_SIZE),
|
2009-11-25 16:20:51 +03:00
|
|
|
VMSTATE_BUFFER_UNSAFE_INFO(irq_state, PCIDevice, 2,
|
2018-12-14 01:37:37 +03:00
|
|
|
vmstate_info_pci_irq_state,
|
|
|
|
PCI_NUM_PINS * sizeof(int32_t)),
|
2009-08-20 21:42:39 +04:00
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2009-10-30 15:21:18 +03:00
|
|
|
|
2009-08-20 21:42:39 +04:00
|
|
|
void pci_device_save(PCIDevice *s, QEMUFile *f)
|
|
|
|
{
|
2009-11-25 16:44:40 +03:00
|
|
|
/* Clear interrupt status bit: it is implicit
|
|
|
|
* in irq_state which we are saving.
|
|
|
|
* This makes us compatible with old devices
|
|
|
|
* which never set or clear this bit. */
|
|
|
|
s->config[PCI_STATUS] &= ~PCI_STATUS_INTERRUPT;
|
2016-12-14 22:58:29 +03:00
|
|
|
vmstate_save_state(f, &vmstate_pci_device, s, NULL);
|
2009-11-25 16:44:40 +03:00
|
|
|
/* Restore the interrupt status bit. */
|
|
|
|
pci_update_irq_status(s);
|
2009-08-20 21:42:39 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
int pci_device_load(PCIDevice *s, QEMUFile *f)
|
|
|
|
{
|
2009-11-25 16:44:40 +03:00
|
|
|
int ret;
|
2016-12-14 22:58:29 +03:00
|
|
|
ret = vmstate_load_state(f, &vmstate_pci_device, s, s->version_id);
|
2009-11-25 16:44:40 +03:00
|
|
|
/* Restore the interrupt status bit. */
|
|
|
|
pci_update_irq_status(s);
|
|
|
|
return ret;
|
2009-08-20 21:42:39 +04:00
|
|
|
}
|
|
|
|
|
2010-05-27 09:42:06 +04:00
|
|
|
static void pci_set_default_subsystem_id(PCIDevice *pci_dev)
|
2008-12-12 00:15:42 +03:00
|
|
|
{
|
2010-05-27 09:42:06 +04:00
|
|
|
pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID,
|
|
|
|
pci_default_sub_vendor_id);
|
|
|
|
pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID,
|
|
|
|
pci_default_sub_device_id);
|
2008-12-12 00:15:42 +03:00
|
|
|
}
|
|
|
|
|
2009-02-11 18:21:48 +03:00
|
|
|
/*
|
2010-09-06 11:46:19 +04:00
|
|
|
* Parse [[<domain>:]<bus>:]<slot>, return -1 on error if funcp == NULL
|
|
|
|
* [[<domain>:]<bus>:]<slot>.<func>, return -1 on error
|
2009-02-11 18:21:48 +03:00
|
|
|
*/
|
2015-02-26 19:21:14 +03:00
|
|
|
static int pci_parse_devaddr(const char *addr, int *domp, int *busp,
|
|
|
|
unsigned int *slotp, unsigned int *funcp)
|
2009-02-11 18:21:48 +03:00
|
|
|
{
|
|
|
|
const char *p;
|
|
|
|
char *e;
|
|
|
|
unsigned long val;
|
|
|
|
unsigned long dom = 0, bus = 0;
|
2010-09-06 11:46:19 +04:00
|
|
|
unsigned int slot = 0;
|
|
|
|
unsigned int func = 0;
|
2009-02-11 18:21:48 +03:00
|
|
|
|
|
|
|
p = addr;
|
|
|
|
val = strtoul(p, &e, 16);
|
|
|
|
if (e == p)
|
2018-12-14 01:37:37 +03:00
|
|
|
return -1;
|
2009-02-11 18:21:48 +03:00
|
|
|
if (*e == ':') {
|
2018-12-14 01:37:37 +03:00
|
|
|
bus = val;
|
|
|
|
p = e + 1;
|
|
|
|
val = strtoul(p, &e, 16);
|
|
|
|
if (e == p)
|
|
|
|
return -1;
|
|
|
|
if (*e == ':') {
|
|
|
|
dom = bus;
|
|
|
|
bus = val;
|
|
|
|
p = e + 1;
|
|
|
|
val = strtoul(p, &e, 16);
|
|
|
|
if (e == p)
|
|
|
|
return -1;
|
|
|
|
}
|
2009-02-11 18:21:48 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
slot = val;
|
|
|
|
|
2010-09-06 11:46:19 +04:00
|
|
|
if (funcp != NULL) {
|
|
|
|
if (*e != '.')
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
p = e + 1;
|
|
|
|
val = strtoul(p, &e, 16);
|
|
|
|
if (e == p)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
func = val;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* if funcp == NULL func is 0 */
|
|
|
|
if (dom > 0xffff || bus > 0xff || slot > 0x1f || func > 7)
|
2018-12-14 01:37:37 +03:00
|
|
|
return -1;
|
2010-09-06 11:46:19 +04:00
|
|
|
|
2009-02-11 18:21:48 +03:00
|
|
|
if (*e)
|
2018-12-14 01:37:37 +03:00
|
|
|
return -1;
|
2009-02-11 18:21:48 +03:00
|
|
|
|
|
|
|
*domp = dom;
|
|
|
|
*busp = bus;
|
|
|
|
*slotp = slot;
|
2010-09-06 11:46:19 +04:00
|
|
|
if (funcp != NULL)
|
|
|
|
*funcp = func;
|
2009-02-11 18:21:48 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-06-21 20:49:40 +04:00
|
|
|
static void pci_init_cmask(PCIDevice *dev)
|
|
|
|
{
|
|
|
|
pci_set_word(dev->cmask + PCI_VENDOR_ID, 0xffff);
|
|
|
|
pci_set_word(dev->cmask + PCI_DEVICE_ID, 0xffff);
|
|
|
|
dev->cmask[PCI_STATUS] = PCI_STATUS_CAP_LIST;
|
|
|
|
dev->cmask[PCI_REVISION_ID] = 0xff;
|
|
|
|
dev->cmask[PCI_CLASS_PROG] = 0xff;
|
|
|
|
pci_set_word(dev->cmask + PCI_CLASS_DEVICE, 0xffff);
|
|
|
|
dev->cmask[PCI_HEADER_TYPE] = 0xff;
|
|
|
|
dev->cmask[PCI_CAPABILITY_LIST] = 0xff;
|
|
|
|
}
|
|
|
|
|
2009-06-21 20:45:18 +04:00
|
|
|
static void pci_init_wmask(PCIDevice *dev)
|
|
|
|
{
|
2009-10-30 15:21:18 +03:00
|
|
|
int config_size = pci_config_size(dev);
|
|
|
|
|
2009-06-21 20:45:18 +04:00
|
|
|
dev->wmask[PCI_CACHE_LINE_SIZE] = 0xff;
|
|
|
|
dev->wmask[PCI_INTERRUPT_LINE] = 0xff;
|
2009-10-30 15:21:02 +03:00
|
|
|
pci_set_word(dev->wmask + PCI_COMMAND,
|
2009-12-23 17:33:56 +03:00
|
|
|
PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER |
|
|
|
|
PCI_COMMAND_INTX_DISABLE);
|
2019-12-09 15:52:47 +03:00
|
|
|
pci_word_test_and_set_mask(dev->wmask + PCI_COMMAND, PCI_COMMAND_SERR);
|
2009-11-12 08:58:39 +03:00
|
|
|
|
|
|
|
memset(dev->wmask + PCI_CONFIG_HEADER_SIZE, 0xff,
|
|
|
|
config_size - PCI_CONFIG_HEADER_SIZE);
|
2009-06-21 20:45:18 +04:00
|
|
|
}
|
|
|
|
|
2010-11-16 11:26:07 +03:00
|
|
|
static void pci_init_w1cmask(PCIDevice *dev)
|
|
|
|
{
|
|
|
|
/*
|
2010-11-18 11:42:50 +03:00
|
|
|
* Note: It's okay to set w1cmask even for readonly bits as
|
2010-11-16 11:26:07 +03:00
|
|
|
* long as their value is hardwired to 0.
|
|
|
|
*/
|
|
|
|
pci_set_word(dev->w1cmask + PCI_STATUS,
|
|
|
|
PCI_STATUS_PARITY | PCI_STATUS_SIG_TARGET_ABORT |
|
|
|
|
PCI_STATUS_REC_TARGET_ABORT | PCI_STATUS_REC_MASTER_ABORT |
|
|
|
|
PCI_STATUS_SIG_SYSTEM_ERROR | PCI_STATUS_DETECTED_PARITY);
|
|
|
|
}
|
|
|
|
|
2012-02-21 17:57:58 +04:00
|
|
|
static void pci_init_mask_bridge(PCIDevice *d)
|
2009-10-30 15:21:22 +03:00
|
|
|
{
|
|
|
|
/* PCI_PRIMARY_BUS, PCI_SECONDARY_BUS, PCI_SUBORDINATE_BUS and
|
|
|
|
PCI_SEC_LETENCY_TIMER */
|
|
|
|
memset(d->wmask + PCI_PRIMARY_BUS, 0xff, 4);
|
|
|
|
|
|
|
|
/* base and limit */
|
|
|
|
d->wmask[PCI_IO_BASE] = PCI_IO_RANGE_MASK & 0xff;
|
|
|
|
d->wmask[PCI_IO_LIMIT] = PCI_IO_RANGE_MASK & 0xff;
|
|
|
|
pci_set_word(d->wmask + PCI_MEMORY_BASE,
|
|
|
|
PCI_MEMORY_RANGE_MASK & 0xffff);
|
|
|
|
pci_set_word(d->wmask + PCI_MEMORY_LIMIT,
|
|
|
|
PCI_MEMORY_RANGE_MASK & 0xffff);
|
|
|
|
pci_set_word(d->wmask + PCI_PREF_MEMORY_BASE,
|
|
|
|
PCI_PREF_RANGE_MASK & 0xffff);
|
|
|
|
pci_set_word(d->wmask + PCI_PREF_MEMORY_LIMIT,
|
|
|
|
PCI_PREF_RANGE_MASK & 0xffff);
|
|
|
|
|
|
|
|
/* PCI_PREF_BASE_UPPER32 and PCI_PREF_LIMIT_UPPER32 */
|
|
|
|
memset(d->wmask + PCI_PREF_BASE_UPPER32, 0xff, 8);
|
|
|
|
|
2012-02-21 17:57:58 +04:00
|
|
|
/* Supported memory and i/o types */
|
2012-03-04 13:36:35 +04:00
|
|
|
d->config[PCI_IO_BASE] |= PCI_IO_RANGE_TYPE_16;
|
|
|
|
d->config[PCI_IO_LIMIT] |= PCI_IO_RANGE_TYPE_16;
|
2012-02-21 17:57:58 +04:00
|
|
|
pci_word_test_and_set_mask(d->config + PCI_PREF_MEMORY_BASE,
|
|
|
|
PCI_PREF_RANGE_TYPE_64);
|
|
|
|
pci_word_test_and_set_mask(d->config + PCI_PREF_MEMORY_LIMIT,
|
|
|
|
PCI_PREF_RANGE_TYPE_64);
|
|
|
|
|
2013-03-04 13:23:49 +04:00
|
|
|
/*
|
|
|
|
* TODO: Bridges default to 10-bit VGA decoding but we currently only
|
|
|
|
* implement 16-bit decoding (no alias support).
|
|
|
|
*/
|
2010-11-18 11:42:50 +03:00
|
|
|
pci_set_word(d->wmask + PCI_BRIDGE_CONTROL,
|
|
|
|
PCI_BRIDGE_CTL_PARITY |
|
|
|
|
PCI_BRIDGE_CTL_SERR |
|
|
|
|
PCI_BRIDGE_CTL_ISA |
|
|
|
|
PCI_BRIDGE_CTL_VGA |
|
|
|
|
PCI_BRIDGE_CTL_VGA_16BIT |
|
|
|
|
PCI_BRIDGE_CTL_MASTER_ABORT |
|
|
|
|
PCI_BRIDGE_CTL_BUS_RESET |
|
|
|
|
PCI_BRIDGE_CTL_FAST_BACK |
|
|
|
|
PCI_BRIDGE_CTL_DISCARD |
|
|
|
|
PCI_BRIDGE_CTL_SEC_DISCARD |
|
|
|
|
PCI_BRIDGE_CTL_DISCARD_SERR);
|
|
|
|
/* Below does not do anything as we never set this bit, put here for
|
|
|
|
* completeness. */
|
|
|
|
pci_set_word(d->w1cmask + PCI_BRIDGE_CONTROL,
|
|
|
|
PCI_BRIDGE_CTL_DISCARD_STATUS);
|
2012-02-21 17:57:58 +04:00
|
|
|
d->cmask[PCI_IO_BASE] |= PCI_IO_RANGE_TYPE_MASK;
|
2012-02-21 17:49:01 +04:00
|
|
|
d->cmask[PCI_IO_LIMIT] |= PCI_IO_RANGE_TYPE_MASK;
|
2012-02-21 17:57:58 +04:00
|
|
|
pci_word_test_and_set_mask(d->cmask + PCI_PREF_MEMORY_BASE,
|
|
|
|
PCI_PREF_RANGE_TYPE_MASK);
|
2012-02-21 17:49:01 +04:00
|
|
|
pci_word_test_and_set_mask(d->cmask + PCI_PREF_MEMORY_LIMIT,
|
|
|
|
PCI_PREF_RANGE_TYPE_MASK);
|
2009-10-30 15:21:22 +03:00
|
|
|
}
|
|
|
|
|
2015-01-19 17:52:28 +03:00
|
|
|
static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp)
|
2010-06-23 11:15:33 +04:00
|
|
|
{
|
|
|
|
uint8_t slot = PCI_SLOT(dev->devfn);
|
|
|
|
uint8_t func;
|
|
|
|
|
|
|
|
if (dev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
|
|
|
|
dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
|
|
|
|
}
|
|
|
|
|
2022-02-17 20:44:50 +03:00
|
|
|
/*
|
|
|
|
* With SR/IOV and ARI, a device at function 0 need not be a multifunction
|
|
|
|
* device, as it may just be a VF that ended up with function 0 in
|
|
|
|
* the legacy PCI interpretation. Avoid failing in such cases:
|
|
|
|
*/
|
|
|
|
if (pci_is_vf(dev) &&
|
|
|
|
dev->exp.sriov_vf.pf->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2010-06-23 11:15:33 +04:00
|
|
|
/*
|
2010-08-06 23:53:45 +04:00
|
|
|
* multifunction bit is interpreted in two ways as follows.
|
2010-06-23 11:15:33 +04:00
|
|
|
* - all functions must set the bit to 1.
|
|
|
|
* Example: Intel X53
|
|
|
|
* - function 0 must set the bit, but the rest function (> 0)
|
|
|
|
* is allowed to leave the bit to 0.
|
|
|
|
* Example: PIIX3(also in qemu), PIIX4(also in qemu), ICH10,
|
|
|
|
*
|
|
|
|
* So OS (at least Linux) checks the bit of only function 0,
|
|
|
|
* and doesn't see the bit of function > 0.
|
|
|
|
*
|
|
|
|
* The below check allows both interpretation.
|
|
|
|
*/
|
|
|
|
if (PCI_FUNC(dev->devfn)) {
|
|
|
|
PCIDevice *f0 = bus->devices[PCI_DEVFN(slot, 0)];
|
|
|
|
if (f0 && !(f0->cap_present & QEMU_PCI_CAP_MULTIFUNCTION)) {
|
|
|
|
/* function 0 should set multifunction bit */
|
2015-01-19 17:52:28 +03:00
|
|
|
error_setg(errp, "PCI: single function device can't be populated "
|
|
|
|
"in function %x.%x", slot, PCI_FUNC(dev->devfn));
|
|
|
|
return;
|
2010-06-23 11:15:33 +04:00
|
|
|
}
|
2015-01-19 17:52:28 +03:00
|
|
|
return;
|
2010-06-23 11:15:33 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
if (dev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
|
2015-01-19 17:52:28 +03:00
|
|
|
return;
|
2010-06-23 11:15:33 +04:00
|
|
|
}
|
|
|
|
/* function 0 indicates single function, so function > 0 must be NULL */
|
|
|
|
for (func = 1; func < PCI_FUNC_MAX; ++func) {
|
|
|
|
if (bus->devices[PCI_DEVFN(slot, func)]) {
|
2015-01-19 17:52:28 +03:00
|
|
|
error_setg(errp, "PCI: %x.0 indicates single function, "
|
|
|
|
"but %x.%x is already populated.",
|
|
|
|
slot, slot, func);
|
|
|
|
return;
|
2010-06-23 11:15:33 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-10-30 15:21:18 +03:00
|
|
|
static void pci_config_alloc(PCIDevice *pci_dev)
|
|
|
|
{
|
|
|
|
int config_size = pci_config_size(pci_dev);
|
|
|
|
|
2011-08-21 07:09:37 +04:00
|
|
|
pci_dev->config = g_malloc0(config_size);
|
|
|
|
pci_dev->cmask = g_malloc0(config_size);
|
|
|
|
pci_dev->wmask = g_malloc0(config_size);
|
|
|
|
pci_dev->w1cmask = g_malloc0(config_size);
|
|
|
|
pci_dev->used = g_malloc0(config_size);
|
2009-10-30 15:21:18 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static void pci_config_free(PCIDevice *pci_dev)
|
|
|
|
{
|
2011-08-21 07:09:37 +04:00
|
|
|
g_free(pci_dev->config);
|
|
|
|
g_free(pci_dev->cmask);
|
|
|
|
g_free(pci_dev->wmask);
|
|
|
|
g_free(pci_dev->w1cmask);
|
|
|
|
g_free(pci_dev->used);
|
2009-10-30 15:21:18 +03:00
|
|
|
}
|
|
|
|
|
2014-01-21 20:37:51 +04:00
|
|
|
static void do_pci_unregister_device(PCIDevice *pci_dev)
|
|
|
|
{
|
2017-11-29 11:46:27 +03:00
|
|
|
pci_get_bus(pci_dev)->devices[pci_dev->devfn] = NULL;
|
2014-01-21 20:37:51 +04:00
|
|
|
pci_config_free(pci_dev);
|
|
|
|
|
2023-01-14 02:35:46 +03:00
|
|
|
if (xen_mode == XEN_EMULATE) {
|
|
|
|
xen_evtchn_remove_pci_device(pci_dev);
|
|
|
|
}
|
2017-03-31 07:47:11 +03:00
|
|
|
if (memory_region_is_mapped(&pci_dev->bus_master_enable_region)) {
|
|
|
|
memory_region_del_subregion(&pci_dev->bus_master_container_region,
|
|
|
|
&pci_dev->bus_master_enable_region);
|
|
|
|
}
|
2014-01-21 20:37:51 +04:00
|
|
|
address_space_destroy(&pci_dev->bus_master_as);
|
|
|
|
}
|
|
|
|
|
2016-05-17 14:26:10 +03:00
|
|
|
/* Extract PCIReqIDCache into BDF format */
|
|
|
|
static uint16_t pci_req_id_cache_extract(PCIReqIDCache *cache)
|
|
|
|
{
|
|
|
|
uint8_t bus_n;
|
|
|
|
uint16_t result;
|
|
|
|
|
|
|
|
switch (cache->type) {
|
|
|
|
case PCI_REQ_ID_BDF:
|
|
|
|
result = pci_get_bdf(cache->dev);
|
|
|
|
break;
|
|
|
|
case PCI_REQ_ID_SECONDARY_BUS:
|
2017-11-29 11:46:27 +03:00
|
|
|
bus_n = pci_dev_bus_num(cache->dev);
|
2016-05-17 14:26:10 +03:00
|
|
|
result = PCI_BUILD_BDF(bus_n, 0);
|
|
|
|
break;
|
|
|
|
default:
|
2019-04-17 22:06:31 +03:00
|
|
|
error_report("Invalid PCI requester ID cache type: %d",
|
2016-05-17 14:26:10 +03:00
|
|
|
cache->type);
|
|
|
|
exit(1);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Parse bridges up to the root complex and return requester ID
|
|
|
|
* cache for specific device. For full PCIe topology, the cache
|
|
|
|
* result would be exactly the same as getting BDF of the device.
|
|
|
|
* However, several tricks are required when system mixed up with
|
|
|
|
* legacy PCI devices and PCIe-to-PCI bridges.
|
|
|
|
*
|
|
|
|
* Here we cache the proxy device (and type) not requester ID since
|
|
|
|
* bus number might change from time to time.
|
|
|
|
*/
|
|
|
|
static PCIReqIDCache pci_req_id_cache_get(PCIDevice *dev)
|
|
|
|
{
|
|
|
|
PCIDevice *parent;
|
|
|
|
PCIReqIDCache cache = {
|
|
|
|
.dev = dev,
|
|
|
|
.type = PCI_REQ_ID_BDF,
|
|
|
|
};
|
|
|
|
|
2017-11-29 11:46:27 +03:00
|
|
|
while (!pci_bus_is_root(pci_get_bus(dev))) {
|
2016-05-17 14:26:10 +03:00
|
|
|
/* We are under PCI/PCIe bridges */
|
2017-11-29 11:46:27 +03:00
|
|
|
parent = pci_get_bus(dev)->parent_dev;
|
2016-05-17 14:26:10 +03:00
|
|
|
if (pci_is_express(parent)) {
|
|
|
|
if (pcie_cap_get_type(parent) == PCI_EXP_TYPE_PCI_BRIDGE) {
|
|
|
|
/* When we pass through PCIe-to-PCI/PCIX bridges, we
|
|
|
|
* override the requester ID using secondary bus
|
|
|
|
* number of parent bridge with zeroed devfn
|
|
|
|
* (pcie-to-pci bridge spec chap 2.3). */
|
|
|
|
cache.type = PCI_REQ_ID_SECONDARY_BUS;
|
|
|
|
cache.dev = dev;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* Legacy PCI, override requester ID with the bridge's
|
|
|
|
* BDF upstream. When the root complex connects to
|
|
|
|
* legacy PCI devices (including buses), it can only
|
|
|
|
* obtain requester ID info from directly attached
|
|
|
|
* devices. If devices are attached under bridges, only
|
|
|
|
* the requester ID of the bridge that is directly
|
|
|
|
* attached to the root complex can be recognized. */
|
|
|
|
cache.type = PCI_REQ_ID_BDF;
|
|
|
|
cache.dev = parent;
|
|
|
|
}
|
|
|
|
dev = parent;
|
|
|
|
}
|
|
|
|
|
|
|
|
return cache;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint16_t pci_requester_id(PCIDevice *dev)
|
|
|
|
{
|
|
|
|
return pci_req_id_cache_extract(&dev->requester_id_cache);
|
|
|
|
}
|
|
|
|
|
2017-07-16 23:27:33 +03:00
|
|
|
static bool pci_bus_devfn_available(PCIBus *bus, int devfn)
|
|
|
|
{
|
|
|
|
return !(bus->devices[devfn]);
|
|
|
|
}
|
|
|
|
|
2017-07-16 23:27:34 +03:00
|
|
|
static bool pci_bus_devfn_reserved(PCIBus *bus, int devfn)
|
|
|
|
{
|
|
|
|
return bus->slot_reserved_mask & (1UL << PCI_SLOT(devfn));
|
|
|
|
}
|
|
|
|
|
2023-03-15 17:26:19 +03:00
|
|
|
uint32_t pci_bus_get_slot_reserved_mask(PCIBus *bus)
|
|
|
|
{
|
|
|
|
return bus->slot_reserved_mask;
|
|
|
|
}
|
|
|
|
|
|
|
|
void pci_bus_set_slot_reserved_mask(PCIBus *bus, uint32_t mask)
|
|
|
|
{
|
|
|
|
bus->slot_reserved_mask |= mask;
|
|
|
|
}
|
|
|
|
|
|
|
|
void pci_bus_clear_slot_reserved_mask(PCIBus *bus, uint32_t mask)
|
|
|
|
{
|
|
|
|
bus->slot_reserved_mask &= ~mask;
|
|
|
|
}
|
|
|
|
|
2004-05-19 03:05:28 +04:00
|
|
|
/* -1 for devfn means auto assign */
|
2017-11-29 11:46:27 +03:00
|
|
|
static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
|
2015-01-19 17:52:28 +03:00
|
|
|
const char *name, int devfn,
|
|
|
|
Error **errp)
|
2004-05-19 03:05:28 +04:00
|
|
|
{
|
2011-12-04 22:22:06 +04:00
|
|
|
PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
|
|
|
|
PCIConfigReadFunc *config_read = pc->config_read;
|
|
|
|
PCIConfigWriteFunc *config_write = pc->config_write;
|
2015-01-19 17:52:28 +03:00
|
|
|
Error *local_err = NULL;
|
2015-10-28 09:20:31 +03:00
|
|
|
DeviceState *dev = DEVICE(pci_dev);
|
2017-11-29 11:46:27 +03:00
|
|
|
PCIBus *bus = pci_get_bus(pci_dev);
|
2022-11-29 13:13:41 +03:00
|
|
|
bool is_bridge = IS_PCI_BRIDGE(pci_dev);
|
2015-10-28 09:20:31 +03:00
|
|
|
|
2016-01-18 18:27:26 +03:00
|
|
|
/* Only pci bridges can be attached to extra PCI root buses */
|
2022-11-29 13:13:41 +03:00
|
|
|
if (pci_bus_is_root(bus) && bus->parent_dev && !is_bridge) {
|
2016-01-18 18:27:26 +03:00
|
|
|
error_setg(errp,
|
|
|
|
"PCI: Only PCI/PCIe bridges can be plugged into %s",
|
|
|
|
bus->parent_dev->name);
|
|
|
|
return NULL;
|
|
|
|
}
|
2011-05-25 05:57:58 +04:00
|
|
|
|
2004-05-19 03:05:28 +04:00
|
|
|
if (devfn < 0) {
|
2009-11-12 08:58:45 +03:00
|
|
|
for(devfn = bus->devfn_min ; devfn < ARRAY_SIZE(bus->devices);
|
2010-06-23 11:15:26 +04:00
|
|
|
devfn += PCI_FUNC_MAX) {
|
2017-07-16 23:27:34 +03:00
|
|
|
if (pci_bus_devfn_available(bus, devfn) &&
|
|
|
|
!pci_bus_devfn_reserved(bus, devfn)) {
|
2004-05-19 03:05:28 +04:00
|
|
|
goto found;
|
2017-07-16 23:27:33 +03:00
|
|
|
}
|
2004-05-19 03:05:28 +04:00
|
|
|
}
|
2017-07-16 23:27:34 +03:00
|
|
|
error_setg(errp, "PCI: no slot/function available for %s, all in use "
|
|
|
|
"or reserved", name);
|
2009-12-10 13:11:06 +03:00
|
|
|
return NULL;
|
2004-05-19 03:05:28 +04:00
|
|
|
found: ;
|
2017-07-16 23:27:34 +03:00
|
|
|
} else if (pci_bus_devfn_reserved(bus, devfn)) {
|
|
|
|
error_setg(errp, "PCI: slot %d function %d not available for %s,"
|
|
|
|
" reserved",
|
|
|
|
PCI_SLOT(devfn), PCI_FUNC(devfn), name);
|
|
|
|
return NULL;
|
2017-07-16 23:27:33 +03:00
|
|
|
} else if (!pci_bus_devfn_available(bus, devfn)) {
|
2015-01-19 17:52:28 +03:00
|
|
|
error_setg(errp, "PCI: slot %d function %d not available for %s,"
|
2022-02-23 12:44:35 +03:00
|
|
|
" in use by %s,id=%s",
|
2015-01-19 17:52:28 +03:00
|
|
|
PCI_SLOT(devfn), PCI_FUNC(devfn), name,
|
2022-02-23 12:44:35 +03:00
|
|
|
bus->devices[devfn]->name, bus->devices[devfn]->qdev.id);
|
2009-12-10 13:11:06 +03:00
|
|
|
return NULL;
|
2023-07-11 10:03:01 +03:00
|
|
|
} /*
|
|
|
|
* Populating function 0 triggers a scan from the guest that
|
|
|
|
* exposes other non-zero functions. Hence we need to ensure that
|
|
|
|
* function 0 wasn't added yet.
|
|
|
|
*/
|
|
|
|
else if (dev->hotplugged &&
|
|
|
|
!pci_is_vf(pci_dev) &&
|
|
|
|
pci_get_function_0(pci_dev)) {
|
2020-10-06 16:39:58 +03:00
|
|
|
error_setg(errp, "PCI: slot %d function 0 already occupied by %s,"
|
2015-10-28 09:20:31 +03:00
|
|
|
" new func %s cannot be exposed to guest.",
|
2016-12-12 21:42:03 +03:00
|
|
|
PCI_SLOT(pci_get_function_0(pci_dev)->devfn),
|
|
|
|
pci_get_function_0(pci_dev)->name,
|
2015-10-28 09:20:31 +03:00
|
|
|
name);
|
|
|
|
|
|
|
|
return NULL;
|
2004-05-19 03:05:28 +04:00
|
|
|
}
|
2012-10-30 15:47:48 +04:00
|
|
|
|
2014-07-02 04:06:35 +04:00
|
|
|
pci_dev->devfn = devfn;
|
2016-05-17 14:26:10 +03:00
|
|
|
pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
|
2017-10-19 05:15:05 +03:00
|
|
|
pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
|
2012-10-30 15:47:48 +04:00
|
|
|
|
2017-03-13 06:29:58 +03:00
|
|
|
memory_region_init(&pci_dev->bus_master_container_region, OBJECT(pci_dev),
|
|
|
|
"bus master container", UINT64_MAX);
|
|
|
|
address_space_init(&pci_dev->bus_master_as,
|
|
|
|
&pci_dev->bus_master_container_region, pci_dev->name);
|
|
|
|
|
2020-11-12 17:38:36 +03:00
|
|
|
if (phase_check(PHASE_MACHINE_READY)) {
|
2016-06-27 18:38:32 +03:00
|
|
|
pci_init_bus_master(pci_dev);
|
|
|
|
}
|
2009-11-25 16:20:51 +03:00
|
|
|
pci_dev->irq_state = 0;
|
2009-10-30 15:21:18 +03:00
|
|
|
pci_config_alloc(pci_dev);
|
2009-10-30 15:21:22 +03:00
|
|
|
|
2011-12-04 22:22:06 +04:00
|
|
|
pci_config_set_vendor_id(pci_dev->config, pc->vendor_id);
|
|
|
|
pci_config_set_device_id(pci_dev->config, pc->device_id);
|
|
|
|
pci_config_set_revision(pci_dev->config, pc->revision);
|
|
|
|
pci_config_set_class(pci_dev->config, pc->class_id);
|
2011-05-25 05:57:58 +04:00
|
|
|
|
2022-11-29 13:13:41 +03:00
|
|
|
if (!is_bridge) {
|
2011-12-04 22:22:06 +04:00
|
|
|
if (pc->subsystem_vendor_id || pc->subsystem_id) {
|
2011-05-25 05:57:58 +04:00
|
|
|
pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID,
|
2011-12-04 22:22:06 +04:00
|
|
|
pc->subsystem_vendor_id);
|
2011-05-25 05:57:58 +04:00
|
|
|
pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID,
|
2011-12-04 22:22:06 +04:00
|
|
|
pc->subsystem_id);
|
2011-05-25 05:57:58 +04:00
|
|
|
} else {
|
|
|
|
pci_set_default_subsystem_id(pci_dev);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* subsystem_vendor_id/subsystem_id are only for header type 0 */
|
2011-12-04 22:22:06 +04:00
|
|
|
assert(!pc->subsystem_vendor_id);
|
|
|
|
assert(!pc->subsystem_id);
|
2009-10-30 15:21:22 +03:00
|
|
|
}
|
2009-06-21 20:49:40 +04:00
|
|
|
pci_init_cmask(pci_dev);
|
2009-06-21 20:45:18 +04:00
|
|
|
pci_init_wmask(pci_dev);
|
2010-11-16 11:26:07 +03:00
|
|
|
pci_init_w1cmask(pci_dev);
|
2022-11-29 13:13:41 +03:00
|
|
|
if (is_bridge) {
|
2012-02-21 17:57:58 +04:00
|
|
|
pci_init_mask_bridge(pci_dev);
|
2009-10-30 15:21:22 +03:00
|
|
|
}
|
2015-01-19 17:52:28 +03:00
|
|
|
pci_init_multifunction(bus, pci_dev, &local_err);
|
|
|
|
if (local_err) {
|
|
|
|
error_propagate(errp, local_err);
|
2014-01-21 20:37:51 +04:00
|
|
|
do_pci_unregister_device(pci_dev);
|
2010-06-23 11:15:33 +04:00
|
|
|
return NULL;
|
|
|
|
}
|
2004-05-20 16:45:00 +04:00
|
|
|
|
|
|
|
if (!config_read)
|
|
|
|
config_read = pci_default_read_config;
|
|
|
|
if (!config_write)
|
|
|
|
config_write = pci_default_write_config;
|
2004-05-19 03:05:28 +04:00
|
|
|
pci_dev->config_read = config_read;
|
|
|
|
pci_dev->config_write = config_write;
|
2004-06-21 23:45:35 +04:00
|
|
|
bus->devices[devfn] = pci_dev;
|
2009-08-20 21:42:38 +04:00
|
|
|
pci_dev->version_id = 2; /* Current pci device vmstate version */
|
2004-05-19 03:05:28 +04:00
|
|
|
return pci_dev;
|
|
|
|
}
|
|
|
|
|
2009-02-11 18:21:10 +03:00
|
|
|
static void pci_unregister_io_regions(PCIDevice *pci_dev)
|
|
|
|
{
|
|
|
|
PCIIORegion *r;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for(i = 0; i < PCI_NUM_REGIONS; i++) {
|
|
|
|
r = &pci_dev->io_regions[i];
|
2009-10-30 15:20:58 +03:00
|
|
|
if (!r->size || r->addr == PCI_BAR_UNMAPPED)
|
2009-02-11 18:21:10 +03:00
|
|
|
continue;
|
2011-08-08 17:09:32 +04:00
|
|
|
memory_region_del_subregion(r->address_space, r->memory);
|
2009-02-11 18:21:10 +03:00
|
|
|
}
|
2013-03-03 21:21:26 +04:00
|
|
|
|
|
|
|
pci_unregister_vga(pci_dev);
|
2009-02-11 18:21:10 +03:00
|
|
|
}
|
|
|
|
|
qdev: Unrealize must not fail
Devices may have component devices and buses.
Device realization may fail. Realization is recursive: a device's
realize() method realizes its components, and device_set_realized()
realizes its buses (which should in turn realize the devices on that
bus, except bus_set_realized() doesn't implement that, yet).
When realization of a component or bus fails, we need to roll back:
unrealize everything we realized so far. If any of these unrealizes
failed, the device would be left in an inconsistent state. Must not
happen.
device_set_realized() lets it happen: it ignores errors in the roll
back code starting at label child_realize_fail.
Since realization is recursive, unrealization must be recursive, too.
But how could a partly failed unrealize be rolled back? We'd have to
re-realize, which can fail. This design is fundamentally broken.
device_set_realized() does not roll back at all. Instead, it keeps
unrealizing, ignoring further errors.
It can screw up even for a device with no buses: if the lone
dc->unrealize() fails, it still unregisters vmstate, and calls
listeners' unrealize() callback.
bus_set_realized() does not roll back either. Instead, it stops
unrealizing.
Fortunately, no unrealize method can fail, as we'll see below.
To fix the design error, drop parameter @errp from all the unrealize
methods.
Any unrealize method that uses @errp now needs an update. This leads
us to unrealize() methods that can fail. Merely passing it to another
unrealize method cannot cause failure, though. Here are the ones that
do other things with @errp:
* virtio_serial_device_unrealize()
Fails when qbus_set_hotplug_handler() fails, but still does all the
other work. On failure, the device would stay realized with its
resources completely gone. Oops. Can't happen, because
qbus_set_hotplug_handler() can't actually fail here. Pass
&error_abort to qbus_set_hotplug_handler() instead.
* hw/ppc/spapr_drc.c's unrealize()
Fails when object_property_del() fails, but all the other work is
already done. On failure, the device would stay realized with its
vmstate registration gone. Oops. Can't happen, because
object_property_del() can't actually fail here. Pass &error_abort
to object_property_del() instead.
* spapr_phb_unrealize()
Fails and bails out when remove_drcs() fails, but other work is
already done. On failure, the device would stay realized with some
of its resources gone. Oops. remove_drcs() fails only when
chassis_from_bus()'s object_property_get_uint() fails, and it can't
here. Pass &error_abort to remove_drcs() instead.
Therefore, no unrealize method can fail before this patch.
device_set_realized()'s recursive unrealization via bus uses
object_property_set_bool(). Can't drop @errp there, so pass
&error_abort.
We similarly unrealize with object_property_set_bool() elsewhere,
always ignoring errors. Pass &error_abort instead.
Several unrealize methods no longer handle errors from other unrealize
methods: virtio_9p_device_unrealize(),
virtio_input_device_unrealize(), scsi_qdev_unrealize(), ...
Much of the deleted error handling looks wrong anyway.
One unrealize methods no longer ignore such errors:
usb_ehci_pci_exit().
Several realize methods no longer ignore errors when rolling back:
v9fs_device_realize_common(), pci_qdev_unrealize(),
spapr_phb_realize(), usb_qdev_realize(), vfio_ccw_realize(),
virtio_device_realize().
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20200505152926.18877-17-armbru@redhat.com>
2020-05-05 18:29:24 +03:00
|
|
|
static void pci_qdev_unrealize(DeviceState *dev)
|
2009-02-11 18:21:10 +03:00
|
|
|
{
|
2011-12-04 22:22:06 +04:00
|
|
|
PCIDevice *pci_dev = PCI_DEVICE(dev);
|
|
|
|
PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
|
2009-02-11 18:21:10 +03:00
|
|
|
|
|
|
|
pci_unregister_io_regions(pci_dev);
|
2010-06-25 21:10:19 +04:00
|
|
|
pci_del_option_rom(pci_dev);
|
2012-07-04 08:39:34 +04:00
|
|
|
|
2012-07-04 08:39:27 +04:00
|
|
|
if (pc->exit) {
|
|
|
|
pc->exit(pci_dev);
|
|
|
|
}
|
2009-02-11 18:21:10 +03:00
|
|
|
|
2017-04-25 05:29:54 +03:00
|
|
|
pci_device_deassert_intx(pci_dev);
|
2010-05-11 14:44:21 +04:00
|
|
|
do_pci_unregister_device(pci_dev);
|
2022-06-13 23:26:33 +03:00
|
|
|
|
|
|
|
pci_dev->msi_trigger = NULL;
|
2023-03-02 19:15:38 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* clean up acpi-index so it could reused by another device
|
|
|
|
*/
|
|
|
|
if (pci_dev->acpi_index) {
|
|
|
|
GSequence *used_indexes = pci_acpi_index_list();
|
|
|
|
|
|
|
|
g_sequence_remove(g_sequence_lookup(used_indexes,
|
|
|
|
GINT_TO_POINTER(pci_dev->acpi_index),
|
|
|
|
g_cmp_uint32, NULL));
|
|
|
|
}
|
2009-02-11 18:21:10 +03:00
|
|
|
}
|
|
|
|
|
2011-08-08 17:09:31 +04:00
|
|
|
void pci_register_bar(PCIDevice *pci_dev, int region_num,
|
|
|
|
uint8_t type, MemoryRegion *memory)
|
2004-05-19 03:05:28 +04:00
|
|
|
{
|
|
|
|
PCIIORegion *r;
|
2016-03-25 09:49:37 +03:00
|
|
|
uint32_t addr; /* offset in pci config space */
|
2010-09-09 06:48:55 +04:00
|
|
|
uint64_t wmask;
|
2011-08-08 17:09:29 +04:00
|
|
|
pcibus_t size = memory_region_size(memory);
|
2020-10-15 21:14:11 +03:00
|
|
|
uint8_t hdr_type;
|
2009-02-11 18:21:16 +03:00
|
|
|
|
2022-02-17 20:44:50 +03:00
|
|
|
assert(!pci_is_vf(pci_dev)); /* VFs must use pcie_sriov_vf_register_bar */
|
2010-09-09 06:48:56 +04:00
|
|
|
assert(region_num >= 0);
|
|
|
|
assert(region_num < PCI_NUM_REGIONS);
|
2020-10-15 21:14:10 +03:00
|
|
|
assert(is_power_of_2(size));
|
2009-02-11 18:21:16 +03:00
|
|
|
|
2020-10-15 21:14:11 +03:00
|
|
|
/* A PCI bridge device (with Type 1 header) may only have at most 2 BARs */
|
|
|
|
hdr_type =
|
|
|
|
pci_dev->config[PCI_HEADER_TYPE] & ~PCI_HEADER_TYPE_MULTI_FUNCTION;
|
|
|
|
assert(hdr_type != PCI_HEADER_TYPE_BRIDGE || region_num < 2);
|
|
|
|
|
2004-05-19 03:05:28 +04:00
|
|
|
r = &pci_dev->io_regions[region_num];
|
2009-10-30 15:20:58 +03:00
|
|
|
r->addr = PCI_BAR_UNMAPPED;
|
2004-05-19 03:05:28 +04:00
|
|
|
r->size = size;
|
|
|
|
r->type = type;
|
2016-03-25 09:49:37 +03:00
|
|
|
r->memory = memory;
|
|
|
|
r->address_space = type & PCI_BASE_ADDRESS_SPACE_IO
|
2017-11-29 11:46:27 +03:00
|
|
|
? pci_get_bus(pci_dev)->address_space_io
|
|
|
|
: pci_get_bus(pci_dev)->address_space_mem;
|
2009-06-21 20:45:18 +04:00
|
|
|
|
|
|
|
wmask = ~(size - 1);
|
2006-04-18 20:55:22 +04:00
|
|
|
if (region_num == PCI_ROM_SLOT) {
|
2011-04-26 12:29:36 +04:00
|
|
|
/* ROM enable bit is writable */
|
2009-09-16 14:40:57 +04:00
|
|
|
wmask |= PCI_ROM_ADDRESS_ENABLE;
|
2006-04-18 20:55:22 +04:00
|
|
|
}
|
2016-03-25 09:49:37 +03:00
|
|
|
|
|
|
|
addr = pci_bar(pci_dev, region_num);
|
2009-10-30 15:21:00 +03:00
|
|
|
pci_set_long(pci_dev->config + addr, type);
|
2016-03-25 09:49:37 +03:00
|
|
|
|
2009-10-30 15:21:11 +03:00
|
|
|
if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) &&
|
|
|
|
r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
|
|
|
|
pci_set_quad(pci_dev->wmask + addr, wmask);
|
|
|
|
pci_set_quad(pci_dev->cmask + addr, ~0ULL);
|
|
|
|
} else {
|
|
|
|
pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff);
|
|
|
|
pci_set_long(pci_dev->cmask + addr, 0xffffffff);
|
|
|
|
}
|
2011-07-26 15:26:20 +04:00
|
|
|
}
|
|
|
|
|
2013-03-03 21:21:26 +04:00
|
|
|
static void pci_update_vga(PCIDevice *pci_dev)
|
|
|
|
{
|
|
|
|
uint16_t cmd;
|
|
|
|
|
|
|
|
if (!pci_dev->has_vga) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
cmd = pci_get_word(pci_dev->config + PCI_COMMAND);
|
|
|
|
|
|
|
|
memory_region_set_enabled(pci_dev->vga_regions[QEMU_PCI_VGA_MEM],
|
|
|
|
cmd & PCI_COMMAND_MEMORY);
|
|
|
|
memory_region_set_enabled(pci_dev->vga_regions[QEMU_PCI_VGA_IO_LO],
|
|
|
|
cmd & PCI_COMMAND_IO);
|
|
|
|
memory_region_set_enabled(pci_dev->vga_regions[QEMU_PCI_VGA_IO_HI],
|
|
|
|
cmd & PCI_COMMAND_IO);
|
|
|
|
}
|
|
|
|
|
|
|
|
void pci_register_vga(PCIDevice *pci_dev, MemoryRegion *mem,
|
|
|
|
MemoryRegion *io_lo, MemoryRegion *io_hi)
|
|
|
|
{
|
2017-11-29 11:46:27 +03:00
|
|
|
PCIBus *bus = pci_get_bus(pci_dev);
|
|
|
|
|
2013-03-03 21:21:26 +04:00
|
|
|
assert(!pci_dev->has_vga);
|
|
|
|
|
|
|
|
assert(memory_region_size(mem) == QEMU_PCI_VGA_MEM_SIZE);
|
|
|
|
pci_dev->vga_regions[QEMU_PCI_VGA_MEM] = mem;
|
2017-11-29 11:46:27 +03:00
|
|
|
memory_region_add_subregion_overlap(bus->address_space_mem,
|
2013-03-03 21:21:26 +04:00
|
|
|
QEMU_PCI_VGA_MEM_BASE, mem, 1);
|
|
|
|
|
|
|
|
assert(memory_region_size(io_lo) == QEMU_PCI_VGA_IO_LO_SIZE);
|
|
|
|
pci_dev->vga_regions[QEMU_PCI_VGA_IO_LO] = io_lo;
|
2017-11-29 11:46:27 +03:00
|
|
|
memory_region_add_subregion_overlap(bus->address_space_io,
|
2013-03-03 21:21:26 +04:00
|
|
|
QEMU_PCI_VGA_IO_LO_BASE, io_lo, 1);
|
|
|
|
|
|
|
|
assert(memory_region_size(io_hi) == QEMU_PCI_VGA_IO_HI_SIZE);
|
|
|
|
pci_dev->vga_regions[QEMU_PCI_VGA_IO_HI] = io_hi;
|
2017-11-29 11:46:27 +03:00
|
|
|
memory_region_add_subregion_overlap(bus->address_space_io,
|
2013-03-03 21:21:26 +04:00
|
|
|
QEMU_PCI_VGA_IO_HI_BASE, io_hi, 1);
|
|
|
|
pci_dev->has_vga = true;
|
|
|
|
|
|
|
|
pci_update_vga(pci_dev);
|
|
|
|
}
|
|
|
|
|
|
|
|
void pci_unregister_vga(PCIDevice *pci_dev)
|
|
|
|
{
|
2017-11-29 11:46:27 +03:00
|
|
|
PCIBus *bus = pci_get_bus(pci_dev);
|
|
|
|
|
2013-03-03 21:21:26 +04:00
|
|
|
if (!pci_dev->has_vga) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-11-29 11:46:27 +03:00
|
|
|
memory_region_del_subregion(bus->address_space_mem,
|
2013-03-03 21:21:26 +04:00
|
|
|
pci_dev->vga_regions[QEMU_PCI_VGA_MEM]);
|
2017-11-29 11:46:27 +03:00
|
|
|
memory_region_del_subregion(bus->address_space_io,
|
2013-03-03 21:21:26 +04:00
|
|
|
pci_dev->vga_regions[QEMU_PCI_VGA_IO_LO]);
|
2017-11-29 11:46:27 +03:00
|
|
|
memory_region_del_subregion(bus->address_space_io,
|
2013-03-03 21:21:26 +04:00
|
|
|
pci_dev->vga_regions[QEMU_PCI_VGA_IO_HI]);
|
|
|
|
pci_dev->has_vga = false;
|
|
|
|
}
|
|
|
|
|
2011-08-08 17:08:55 +04:00
|
|
|
pcibus_t pci_get_bar_addr(PCIDevice *pci_dev, int region_num)
|
|
|
|
{
|
|
|
|
return pci_dev->io_regions[region_num].addr;
|
|
|
|
}
|
|
|
|
|
2022-02-17 20:44:50 +03:00
|
|
|
static pcibus_t pci_config_get_bar_addr(PCIDevice *d, int reg,
|
|
|
|
uint8_t type, pcibus_t size)
|
|
|
|
{
|
|
|
|
pcibus_t new_addr;
|
|
|
|
if (!pci_is_vf(d)) {
|
|
|
|
int bar = pci_bar(d, reg);
|
|
|
|
if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
|
|
|
|
new_addr = pci_get_quad(d->config + bar);
|
|
|
|
} else {
|
|
|
|
new_addr = pci_get_long(d->config + bar);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
PCIDevice *pf = d->exp.sriov_vf.pf;
|
|
|
|
uint16_t sriov_cap = pf->exp.sriov_cap;
|
|
|
|
int bar = sriov_cap + PCI_SRIOV_BAR + reg * 4;
|
|
|
|
uint16_t vf_offset =
|
|
|
|
pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
|
|
|
|
uint16_t vf_stride =
|
|
|
|
pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
|
|
|
|
uint32_t vf_num = (d->devfn - (pf->devfn + vf_offset)) / vf_stride;
|
|
|
|
|
|
|
|
if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
|
|
|
|
new_addr = pci_get_quad(pf->config + bar);
|
|
|
|
} else {
|
|
|
|
new_addr = pci_get_long(pf->config + bar);
|
|
|
|
}
|
|
|
|
new_addr += vf_num * size;
|
|
|
|
}
|
|
|
|
/* The ROM slot has a specific enable bit, keep it intact */
|
|
|
|
if (reg != PCI_ROM_SLOT) {
|
|
|
|
new_addr &= ~(size - 1);
|
|
|
|
}
|
|
|
|
return new_addr;
|
|
|
|
}
|
|
|
|
|
|
|
|
pcibus_t pci_bar_address(PCIDevice *d,
|
|
|
|
int reg, uint8_t type, pcibus_t size)
|
2009-11-12 14:47:17 +03:00
|
|
|
{
|
|
|
|
pcibus_t new_addr, last_addr;
|
|
|
|
uint16_t cmd = pci_get_word(d->config + PCI_COMMAND);
|
2023-05-23 09:12:06 +03:00
|
|
|
MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
|
2015-07-24 11:35:13 +03:00
|
|
|
bool allow_0_address = mc->pci_allow_0_address;
|
2009-11-12 14:47:17 +03:00
|
|
|
|
|
|
|
if (type & PCI_BASE_ADDRESS_SPACE_IO) {
|
|
|
|
if (!(cmd & PCI_COMMAND_IO)) {
|
|
|
|
return PCI_BAR_UNMAPPED;
|
|
|
|
}
|
2022-02-17 20:44:50 +03:00
|
|
|
new_addr = pci_config_get_bar_addr(d, reg, type, size);
|
2009-11-12 14:47:17 +03:00
|
|
|
last_addr = new_addr + size - 1;
|
2013-09-13 15:58:44 +04:00
|
|
|
/* Check if 32 bit BAR wraps around explicitly.
|
|
|
|
* TODO: make priorities correct and remove this work around.
|
|
|
|
*/
|
2015-07-24 11:35:13 +03:00
|
|
|
if (last_addr <= new_addr || last_addr >= UINT32_MAX ||
|
|
|
|
(!allow_0_address && new_addr == 0)) {
|
2009-11-12 14:47:17 +03:00
|
|
|
return PCI_BAR_UNMAPPED;
|
|
|
|
}
|
|
|
|
return new_addr;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!(cmd & PCI_COMMAND_MEMORY)) {
|
|
|
|
return PCI_BAR_UNMAPPED;
|
|
|
|
}
|
2022-02-17 20:44:50 +03:00
|
|
|
new_addr = pci_config_get_bar_addr(d, reg, type, size);
|
2009-11-12 14:47:17 +03:00
|
|
|
/* the ROM slot has a specific enable bit */
|
|
|
|
if (reg == PCI_ROM_SLOT && !(new_addr & PCI_ROM_ADDRESS_ENABLE)) {
|
|
|
|
return PCI_BAR_UNMAPPED;
|
|
|
|
}
|
|
|
|
new_addr &= ~(size - 1);
|
|
|
|
last_addr = new_addr + size - 1;
|
|
|
|
/* NOTE: we do not support wrapping */
|
|
|
|
/* XXX: as we cannot support really dynamic
|
|
|
|
mappings, we handle specific values as invalid
|
|
|
|
mappings. */
|
2015-07-24 11:35:13 +03:00
|
|
|
if (last_addr <= new_addr || last_addr == PCI_BAR_UNMAPPED ||
|
|
|
|
(!allow_0_address && new_addr == 0)) {
|
2009-11-12 14:47:17 +03:00
|
|
|
return PCI_BAR_UNMAPPED;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now pcibus_t is 64bit.
|
|
|
|
* Check if 32 bit BAR wraps around explicitly.
|
|
|
|
* Without this, PC ide doesn't work well.
|
|
|
|
* TODO: remove this work around.
|
|
|
|
*/
|
|
|
|
if (!(type & PCI_BASE_ADDRESS_MEM_TYPE_64) && last_addr >= UINT32_MAX) {
|
|
|
|
return PCI_BAR_UNMAPPED;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* OS is allowed to set BAR beyond its addressable
|
|
|
|
* bits. For example, 32 bit OS can set 64bit bar
|
|
|
|
* to >4G. Check it. TODO: we might need to support
|
|
|
|
* it in the future for e.g. PAE.
|
|
|
|
*/
|
2012-10-23 14:30:10 +04:00
|
|
|
if (last_addr >= HWADDR_MAX) {
|
2009-11-12 14:47:17 +03:00
|
|
|
return PCI_BAR_UNMAPPED;
|
|
|
|
}
|
|
|
|
|
|
|
|
return new_addr;
|
|
|
|
}
|
|
|
|
|
2004-05-20 16:45:00 +04:00
|
|
|
static void pci_update_mappings(PCIDevice *d)
|
|
|
|
{
|
|
|
|
PCIIORegion *r;
|
2009-11-12 14:47:17 +03:00
|
|
|
int i;
|
2011-09-04 17:50:55 +04:00
|
|
|
pcibus_t new_addr;
|
2007-09-17 12:09:54 +04:00
|
|
|
|
2004-06-03 18:06:32 +04:00
|
|
|
for(i = 0; i < PCI_NUM_REGIONS; i++) {
|
2004-05-20 16:45:00 +04:00
|
|
|
r = &d->io_regions[i];
|
2009-10-30 15:21:23 +03:00
|
|
|
|
|
|
|
/* this region isn't registered */
|
2009-11-12 08:58:43 +03:00
|
|
|
if (!r->size)
|
2009-10-30 15:21:23 +03:00
|
|
|
continue;
|
|
|
|
|
2009-11-12 14:47:17 +03:00
|
|
|
new_addr = pci_bar_address(d, i, r->type, r->size);
|
2021-11-11 16:08:54 +03:00
|
|
|
if (!d->has_power) {
|
|
|
|
new_addr = PCI_BAR_UNMAPPED;
|
|
|
|
}
|
2009-10-30 15:21:23 +03:00
|
|
|
|
|
|
|
/* This bar isn't changed */
|
2011-09-04 17:50:55 +04:00
|
|
|
if (new_addr == r->addr)
|
2009-10-30 15:21:23 +03:00
|
|
|
continue;
|
|
|
|
|
|
|
|
/* now do the real mapping */
|
|
|
|
if (r->addr != PCI_BAR_UNMAPPED) {
|
2021-11-05 22:25:41 +03:00
|
|
|
trace_pci_update_mappings_del(d->name, pci_dev_bus_num(d),
|
2015-01-16 22:20:51 +03:00
|
|
|
PCI_SLOT(d->devfn),
|
2015-09-02 00:33:23 +03:00
|
|
|
PCI_FUNC(d->devfn),
|
2015-01-16 22:20:51 +03:00
|
|
|
i, r->addr, r->size);
|
2011-08-08 17:09:32 +04:00
|
|
|
memory_region_del_subregion(r->address_space, r->memory);
|
2004-05-20 16:45:00 +04:00
|
|
|
}
|
2009-10-30 15:21:23 +03:00
|
|
|
r->addr = new_addr;
|
|
|
|
if (r->addr != PCI_BAR_UNMAPPED) {
|
2021-11-05 22:25:41 +03:00
|
|
|
trace_pci_update_mappings_add(d->name, pci_dev_bus_num(d),
|
2015-01-16 22:20:51 +03:00
|
|
|
PCI_SLOT(d->devfn),
|
2015-09-02 00:33:23 +03:00
|
|
|
PCI_FUNC(d->devfn),
|
2015-01-16 22:20:51 +03:00
|
|
|
i, r->addr, r->size);
|
2011-08-11 15:40:58 +04:00
|
|
|
memory_region_add_subregion_overlap(r->address_space,
|
|
|
|
r->addr, r->memory, 1);
|
2009-10-30 15:21:23 +03:00
|
|
|
}
|
2004-05-20 16:45:00 +04:00
|
|
|
}
|
2013-03-03 21:21:26 +04:00
|
|
|
|
|
|
|
pci_update_vga(d);
|
2004-05-20 16:45:00 +04:00
|
|
|
}
|
|
|
|
|
2009-12-23 17:33:56 +03:00
|
|
|
static inline int pci_irq_disabled(PCIDevice *d)
|
|
|
|
{
|
|
|
|
return pci_get_word(d->config + PCI_COMMAND) & PCI_COMMAND_INTX_DISABLE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Called after interrupt disabled field update in config space,
|
|
|
|
* assert/deassert interrupts if necessary.
|
|
|
|
* Gets original interrupt disable bit value (before update). */
|
|
|
|
static void pci_update_irq_disabled(PCIDevice *d, int was_irq_disabled)
|
|
|
|
{
|
|
|
|
int i, disabled = pci_irq_disabled(d);
|
|
|
|
if (disabled == was_irq_disabled)
|
|
|
|
return;
|
|
|
|
for (i = 0; i < PCI_NUM_PINS; ++i) {
|
|
|
|
int state = pci_irq_state(d, i);
|
|
|
|
pci_change_irq_level(d, i, disabled ? -state : state);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-09-17 01:08:06 +04:00
|
|
|
uint32_t pci_default_read_config(PCIDevice *d,
|
2004-05-20 16:45:00 +04:00
|
|
|
uint32_t address, int len)
|
2004-05-19 03:05:28 +04:00
|
|
|
{
|
2009-10-30 15:21:04 +03:00
|
|
|
uint32_t val = 0;
|
2011-07-22 13:05:01 +04:00
|
|
|
|
2020-06-04 14:35:25 +03:00
|
|
|
assert(address + len <= pci_config_size(d));
|
|
|
|
|
2018-12-12 22:38:55 +03:00
|
|
|
if (pci_is_express_downstream_port(d) &&
|
|
|
|
ranges_overlap(address, len, d->exp.exp_cap + PCI_EXP_LNKSTA, 2)) {
|
|
|
|
pcie_sync_bridge_lnk(d);
|
|
|
|
}
|
2009-10-30 15:21:04 +03:00
|
|
|
memcpy(&val, d->config + address, len);
|
|
|
|
return le32_to_cpu(val);
|
2004-05-20 16:45:00 +04:00
|
|
|
}
|
|
|
|
|
2014-09-02 15:00:04 +04:00
|
|
|
void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int l)
|
2004-05-20 16:45:00 +04:00
|
|
|
{
|
2009-12-23 17:33:56 +03:00
|
|
|
int i, was_irq_disabled = pci_irq_disabled(d);
|
2014-09-02 15:00:04 +04:00
|
|
|
uint32_t val = val_in;
|
2004-05-20 16:45:00 +04:00
|
|
|
|
2020-06-04 14:35:25 +03:00
|
|
|
assert(addr + l <= pci_config_size(d));
|
|
|
|
|
2011-07-22 13:05:01 +04:00
|
|
|
for (i = 0; i < l; val >>= 8, ++i) {
|
2009-11-14 01:26:27 +03:00
|
|
|
uint8_t wmask = d->wmask[addr + i];
|
2010-09-15 09:38:15 +04:00
|
|
|
uint8_t w1cmask = d->w1cmask[addr + i];
|
|
|
|
assert(!(wmask & w1cmask));
|
2009-11-14 01:26:27 +03:00
|
|
|
d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask);
|
2010-09-15 09:38:15 +04:00
|
|
|
d->config[addr + i] &= ~(val & w1cmask); /* W1C: Write 1 to Clear */
|
2004-05-20 16:45:00 +04:00
|
|
|
}
|
2009-10-30 15:21:20 +03:00
|
|
|
if (ranges_overlap(addr, l, PCI_BASE_ADDRESS_0, 24) ||
|
2009-10-30 15:21:21 +03:00
|
|
|
ranges_overlap(addr, l, PCI_ROM_ADDRESS, 4) ||
|
|
|
|
ranges_overlap(addr, l, PCI_ROM_ADDRESS1, 4) ||
|
2009-10-30 15:21:20 +03:00
|
|
|
range_covers_byte(addr, l, PCI_COMMAND))
|
2004-05-20 16:45:00 +04:00
|
|
|
pci_update_mappings(d);
|
2009-12-23 17:33:56 +03:00
|
|
|
|
2012-10-03 19:42:58 +04:00
|
|
|
if (range_covers_byte(addr, l, PCI_COMMAND)) {
|
2009-12-23 17:33:56 +03:00
|
|
|
pci_update_irq_disabled(d, was_irq_disabled);
|
2012-10-03 19:42:58 +04:00
|
|
|
memory_region_set_enabled(&d->bus_master_enable_region,
|
2021-11-11 16:08:54 +03:00
|
|
|
(pci_get_word(d->config + PCI_COMMAND)
|
|
|
|
& PCI_COMMAND_MASTER) && d->has_power);
|
2012-10-03 19:42:58 +04:00
|
|
|
}
|
2012-05-11 18:42:40 +04:00
|
|
|
|
2014-09-02 15:00:04 +04:00
|
|
|
msi_write_config(d, addr, val_in, l);
|
|
|
|
msix_write_config(d, addr, val_in, l);
|
2022-02-17 20:44:50 +03:00
|
|
|
pcie_sriov_config_write(d, addr, val_in, l);
|
2004-05-19 03:05:28 +04:00
|
|
|
}
|
|
|
|
|
2006-05-13 20:11:23 +04:00
|
|
|
/***********************************************************/
|
|
|
|
/* generic PCI irq support */
|
2004-06-21 23:45:35 +04:00
|
|
|
|
2006-05-13 20:11:23 +04:00
|
|
|
/* 0 <= irq_num <= 3. level must be 0 or 1 */
|
2013-10-07 11:36:35 +04:00
|
|
|
static void pci_irq_handler(void *opaque, int irq_num, int level)
|
2004-05-19 03:05:28 +04:00
|
|
|
{
|
2009-08-24 20:42:53 +04:00
|
|
|
PCIDevice *pci_dev = opaque;
|
2006-09-24 21:01:44 +04:00
|
|
|
int change;
|
2007-09-17 12:09:54 +04:00
|
|
|
|
2021-03-23 23:52:27 +03:00
|
|
|
assert(0 <= irq_num && irq_num < PCI_NUM_PINS);
|
|
|
|
assert(level == 0 || level == 1);
|
2009-11-25 16:20:51 +03:00
|
|
|
change = level - pci_irq_state(pci_dev, irq_num);
|
2006-09-24 21:01:44 +04:00
|
|
|
if (!change)
|
|
|
|
return;
|
2006-09-24 04:16:34 +04:00
|
|
|
|
2009-11-25 16:20:51 +03:00
|
|
|
pci_set_irq_state(pci_dev, irq_num, level);
|
2009-11-25 16:44:40 +03:00
|
|
|
pci_update_irq_status(pci_dev);
|
2009-12-23 17:33:56 +03:00
|
|
|
if (pci_irq_disabled(pci_dev))
|
|
|
|
return;
|
2009-11-25 16:20:51 +03:00
|
|
|
pci_change_irq_level(pci_dev, irq_num, change);
|
2004-05-19 03:05:28 +04:00
|
|
|
}
|
|
|
|
|
2013-10-07 11:36:35 +04:00
|
|
|
qemu_irq pci_allocate_irq(PCIDevice *pci_dev)
|
|
|
|
{
|
|
|
|
int intx = pci_intx(pci_dev);
|
2021-03-23 23:52:27 +03:00
|
|
|
assert(0 <= intx && intx < PCI_NUM_PINS);
|
2013-10-07 11:36:35 +04:00
|
|
|
|
|
|
|
return qemu_allocate_irq(pci_irq_handler, pci_dev, intx);
|
|
|
|
}
|
|
|
|
|
|
|
|
void pci_set_irq(PCIDevice *pci_dev, int level)
|
|
|
|
{
|
|
|
|
int intx = pci_intx(pci_dev);
|
|
|
|
pci_irq_handler(pci_dev, intx, level);
|
|
|
|
}
|
|
|
|
|
2012-07-19 18:11:47 +04:00
|
|
|
/* Special hooks used by device assignment */
|
|
|
|
void pci_bus_set_route_irq_fn(PCIBus *bus, pci_route_irq_fn route_intx_to_irq)
|
|
|
|
{
|
2013-03-15 02:01:29 +04:00
|
|
|
assert(pci_bus_is_root(bus));
|
2012-07-19 18:11:47 +04:00
|
|
|
bus->route_intx_to_irq = route_intx_to_irq;
|
|
|
|
}
|
|
|
|
|
|
|
|
PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin)
|
|
|
|
{
|
|
|
|
PCIBus *bus;
|
|
|
|
|
|
|
|
do {
|
2023-02-11 18:22:39 +03:00
|
|
|
int dev_irq = pin;
|
2017-11-29 11:46:27 +03:00
|
|
|
bus = pci_get_bus(dev);
|
|
|
|
pin = bus->map_irq(dev, pin);
|
2023-02-11 18:22:39 +03:00
|
|
|
trace_pci_route_irq(dev_irq, DEVICE(dev)->canonical_path, pin,
|
|
|
|
pci_bus_is_root(bus) ? "root-complex"
|
|
|
|
: DEVICE(bus->parent_dev)->canonical_path);
|
2017-11-29 11:46:27 +03:00
|
|
|
dev = bus->parent_dev;
|
2012-07-19 18:11:47 +04:00
|
|
|
} while (dev);
|
2012-10-18 02:13:12 +04:00
|
|
|
|
|
|
|
if (!bus->route_intx_to_irq) {
|
error: Strip trailing '\n' from error string arguments (again)
Commit 6daf194d and be62a2eb got rid of a bunch, but they keep coming
back. Tracked down with this Coccinelle semantic patch:
@r@
expression err, eno, cls, fmt;
position p;
@@
(
error_report(fmt, ...)@p
|
error_set(err, cls, fmt, ...)@p
|
error_set_errno(err, eno, cls, fmt, ...)@p
|
error_setg(err, fmt, ...)@p
|
error_setg_errno(err, eno, fmt, ...)@p
)
@script:python@
fmt << r.fmt;
p << r.p;
@@
if "\\n" in str(fmt):
print "%s:%s:%s:%s" % (p[0].file, p[0].line, p[0].column, fmt)
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-id: 1360354939-10994-4-git-send-email-armbru@redhat.com
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
2013-02-09 00:22:16 +04:00
|
|
|
error_report("PCI: Bug - unimplemented PCI INTx routing (%s)",
|
2012-10-18 02:13:12 +04:00
|
|
|
object_get_typename(OBJECT(bus->qbus.parent)));
|
|
|
|
return (PCIINTxRoute) { PCI_INTX_DISABLED, -1 };
|
|
|
|
}
|
|
|
|
|
2012-07-19 18:11:47 +04:00
|
|
|
return bus->route_intx_to_irq(bus->irq_opaque, pin);
|
2012-07-02 16:38:47 +04:00
|
|
|
}
|
|
|
|
|
2012-10-02 23:21:54 +04:00
|
|
|
bool pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new)
|
|
|
|
{
|
|
|
|
return old->mode != new->mode || old->irq != new->irq;
|
|
|
|
}
|
|
|
|
|
2012-07-02 16:38:47 +04:00
|
|
|
void pci_bus_fire_intx_routing_notifier(PCIBus *bus)
|
|
|
|
{
|
|
|
|
PCIDevice *dev;
|
|
|
|
PCIBus *sec;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
|
|
|
|
dev = bus->devices[i];
|
|
|
|
if (dev && dev->intx_routing_notifier) {
|
|
|
|
dev->intx_routing_notifier(dev);
|
|
|
|
}
|
2013-03-07 22:29:19 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
QLIST_FOREACH(sec, &bus->child, sibling) {
|
|
|
|
pci_bus_fire_intx_routing_notifier(sec);
|
2012-07-02 16:38:47 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void pci_device_set_intx_routing_notifier(PCIDevice *dev,
|
|
|
|
PCIINTxRoutingNotifier notifier)
|
|
|
|
{
|
|
|
|
dev->intx_routing_notifier = notifier;
|
2004-05-19 03:05:28 +04:00
|
|
|
}
|
|
|
|
|
2012-10-20 00:43:28 +04:00
|
|
|
/*
|
|
|
|
* PCI-to-PCI bridge specification
|
|
|
|
* 9.1: Interrupt routing. Table 9-1
|
|
|
|
*
|
|
|
|
* the PCI Express Base Specification, Revision 2.1
|
2023-02-08 20:37:52 +03:00
|
|
|
* 2.2.8.1: INTx interrupt signaling - Rules
|
2012-10-20 00:43:28 +04:00
|
|
|
* the Implementation Note
|
|
|
|
* Table 2-20
|
|
|
|
*/
|
|
|
|
/*
|
|
|
|
* 0 <= pin <= 3 0 = INTA, 1 = INTB, 2 = INTC, 3 = INTD
|
|
|
|
* 0-origin unlike PCI interrupt pin register.
|
|
|
|
*/
|
|
|
|
int pci_swizzle_map_irq_fn(PCIDevice *pci_dev, int pin)
|
|
|
|
{
|
2019-04-05 19:30:48 +03:00
|
|
|
return pci_swizzle(PCI_SLOT(pci_dev->devfn), pin);
|
2012-10-20 00:43:28 +04:00
|
|
|
}
|
|
|
|
|
2006-05-13 20:11:23 +04:00
|
|
|
/***********************************************************/
|
|
|
|
/* monitor info on PCI */
|
2004-05-20 16:45:00 +04:00
|
|
|
|
2008-10-02 22:33:50 +04:00
|
|
|
static const pci_class_desc pci_class_descriptions[] =
|
2006-05-21 17:45:09 +04:00
|
|
|
{
|
2010-12-08 14:35:01 +03:00
|
|
|
{ 0x0001, "VGA controller", "display"},
|
|
|
|
{ 0x0100, "SCSI controller", "scsi"},
|
|
|
|
{ 0x0101, "IDE controller", "ide"},
|
|
|
|
{ 0x0102, "Floppy controller", "fdc"},
|
|
|
|
{ 0x0103, "IPI controller", "ipi"},
|
|
|
|
{ 0x0104, "RAID controller", "raid"},
|
2007-04-14 16:24:46 +04:00
|
|
|
{ 0x0106, "SATA controller"},
|
|
|
|
{ 0x0107, "SAS controller"},
|
|
|
|
{ 0x0180, "Storage controller"},
|
2010-12-08 14:35:01 +03:00
|
|
|
{ 0x0200, "Ethernet controller", "ethernet"},
|
|
|
|
{ 0x0201, "Token Ring controller", "token-ring"},
|
|
|
|
{ 0x0202, "FDDI controller", "fddi"},
|
|
|
|
{ 0x0203, "ATM controller", "atm"},
|
2007-04-14 16:24:46 +04:00
|
|
|
{ 0x0280, "Network controller"},
|
2010-12-08 14:35:01 +03:00
|
|
|
{ 0x0300, "VGA controller", "display", 0x00ff},
|
2007-04-14 16:24:46 +04:00
|
|
|
{ 0x0301, "XGA controller"},
|
|
|
|
{ 0x0302, "3D controller"},
|
|
|
|
{ 0x0380, "Display controller"},
|
2010-12-08 14:35:01 +03:00
|
|
|
{ 0x0400, "Video controller", "video"},
|
|
|
|
{ 0x0401, "Audio controller", "sound"},
|
2007-04-14 16:24:46 +04:00
|
|
|
{ 0x0402, "Phone"},
|
2011-05-02 22:01:37 +04:00
|
|
|
{ 0x0403, "Audio controller", "sound"},
|
2007-04-14 16:24:46 +04:00
|
|
|
{ 0x0480, "Multimedia controller"},
|
2010-12-08 14:35:01 +03:00
|
|
|
{ 0x0500, "RAM controller", "memory"},
|
|
|
|
{ 0x0501, "Flash controller", "flash"},
|
2007-04-14 16:24:46 +04:00
|
|
|
{ 0x0580, "Memory controller"},
|
2010-12-08 14:35:01 +03:00
|
|
|
{ 0x0600, "Host bridge", "host"},
|
|
|
|
{ 0x0601, "ISA bridge", "isa"},
|
|
|
|
{ 0x0602, "EISA bridge", "eisa"},
|
|
|
|
{ 0x0603, "MC bridge", "mca"},
|
2013-12-06 15:24:40 +04:00
|
|
|
{ 0x0604, "PCI bridge", "pci-bridge"},
|
2010-12-08 14:35:01 +03:00
|
|
|
{ 0x0605, "PCMCIA bridge", "pcmcia"},
|
|
|
|
{ 0x0606, "NUBUS bridge", "nubus"},
|
|
|
|
{ 0x0607, "CARDBUS bridge", "cardbus"},
|
2007-04-14 16:24:46 +04:00
|
|
|
{ 0x0608, "RACEWAY bridge"},
|
|
|
|
{ 0x0680, "Bridge"},
|
2010-12-08 14:35:01 +03:00
|
|
|
{ 0x0700, "Serial port", "serial"},
|
|
|
|
{ 0x0701, "Parallel port", "parallel"},
|
|
|
|
{ 0x0800, "Interrupt controller", "interrupt-controller"},
|
|
|
|
{ 0x0801, "DMA controller", "dma-controller"},
|
|
|
|
{ 0x0802, "Timer", "timer"},
|
|
|
|
{ 0x0803, "RTC", "rtc"},
|
|
|
|
{ 0x0900, "Keyboard", "keyboard"},
|
|
|
|
{ 0x0901, "Pen", "pen"},
|
|
|
|
{ 0x0902, "Mouse", "mouse"},
|
|
|
|
{ 0x0A00, "Dock station", "dock", 0x00ff},
|
|
|
|
{ 0x0B00, "i386 cpu", "cpu", 0x00ff},
|
2022-03-18 13:11:24 +03:00
|
|
|
{ 0x0c00, "Firewire controller", "firewire"},
|
2010-12-08 14:35:01 +03:00
|
|
|
{ 0x0c01, "Access bus controller", "access-bus"},
|
|
|
|
{ 0x0c02, "SSA controller", "ssa"},
|
|
|
|
{ 0x0c03, "USB controller", "usb"},
|
|
|
|
{ 0x0c04, "Fibre channel controller", "fibre-channel"},
|
2012-10-20 00:43:31 +04:00
|
|
|
{ 0x0c05, "SMBus"},
|
2006-05-21 17:45:09 +04:00
|
|
|
{ 0, NULL}
|
|
|
|
};
|
|
|
|
|
2021-10-28 07:31:26 +03:00
|
|
|
void pci_for_each_device_under_bus_reverse(PCIBus *bus,
|
|
|
|
pci_bus_dev_fn fn,
|
|
|
|
void *opaque)
|
2017-02-22 13:56:53 +03:00
|
|
|
{
|
|
|
|
PCIDevice *d;
|
|
|
|
int devfn;
|
|
|
|
|
|
|
|
for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
|
|
|
|
d = bus->devices[ARRAY_SIZE(bus->devices) - 1 - devfn];
|
|
|
|
if (d) {
|
|
|
|
fn(bus, d, opaque);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void pci_for_each_device_reverse(PCIBus *bus, int bus_num,
|
2021-10-28 07:31:25 +03:00
|
|
|
pci_bus_dev_fn fn, void *opaque)
|
2017-02-22 13:56:53 +03:00
|
|
|
{
|
|
|
|
bus = pci_find_bus_nr(bus, bus_num);
|
|
|
|
|
|
|
|
if (bus) {
|
|
|
|
pci_for_each_device_under_bus_reverse(bus, fn, opaque);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-28 07:31:26 +03:00
|
|
|
void pci_for_each_device_under_bus(PCIBus *bus,
|
|
|
|
pci_bus_dev_fn fn, void *opaque)
|
2004-06-21 23:45:35 +04:00
|
|
|
{
|
2010-01-22 00:15:40 +03:00
|
|
|
PCIDevice *d;
|
|
|
|
int devfn;
|
2004-06-21 23:45:35 +04:00
|
|
|
|
2010-01-22 00:15:40 +03:00
|
|
|
for(devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
|
|
|
|
d = bus->devices[devfn];
|
|
|
|
if (d) {
|
2012-06-21 19:35:28 +04:00
|
|
|
fn(bus, d, opaque);
|
2010-01-22 00:15:40 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void pci_for_each_device(PCIBus *bus, int bus_num,
|
2021-10-28 07:31:25 +03:00
|
|
|
pci_bus_dev_fn fn, void *opaque)
|
2010-01-22 00:15:40 +03:00
|
|
|
{
|
2012-02-19 20:16:02 +04:00
|
|
|
bus = pci_find_bus_nr(bus, bus_num);
|
2010-01-22 00:15:40 +03:00
|
|
|
|
|
|
|
if (bus) {
|
2012-06-21 19:35:28 +04:00
|
|
|
pci_for_each_device_under_bus(bus, fn, opaque);
|
2010-01-22 00:15:40 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-01 15:11:22 +03:00
|
|
|
const pci_class_desc *get_class_desc(int class)
|
2010-01-22 00:15:40 +03:00
|
|
|
{
|
2011-10-21 20:15:33 +04:00
|
|
|
const pci_class_desc *desc;
|
2010-01-22 00:15:40 +03:00
|
|
|
|
2011-10-21 20:15:33 +04:00
|
|
|
desc = pci_class_descriptions;
|
|
|
|
while (desc->desc && class != desc->class) {
|
|
|
|
desc++;
|
2004-06-21 23:45:35 +04:00
|
|
|
}
|
2009-10-30 15:21:26 +03:00
|
|
|
|
2011-10-21 20:15:33 +04:00
|
|
|
return desc;
|
|
|
|
}
|
2009-10-30 15:21:11 +03:00
|
|
|
|
2006-02-05 07:14:41 +03:00
|
|
|
/* Initialize a PCI NIC. */
|
2015-04-28 13:50:07 +03:00
|
|
|
PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus,
|
2015-02-26 19:21:14 +03:00
|
|
|
const char *default_model,
|
2015-04-28 13:50:07 +03:00
|
|
|
const char *default_devaddr)
|
2006-02-05 07:14:41 +03:00
|
|
|
{
|
2009-06-18 17:14:08 +04:00
|
|
|
const char *devaddr = nd->devaddr ? nd->devaddr : default_devaddr;
|
2018-03-02 12:30:50 +03:00
|
|
|
GPtrArray *pci_nic_models;
|
2009-09-25 05:53:51 +04:00
|
|
|
PCIBus *bus;
|
2009-06-18 17:14:08 +04:00
|
|
|
PCIDevice *pci_dev;
|
2009-05-15 01:35:07 +04:00
|
|
|
DeviceState *dev;
|
2015-04-28 13:50:07 +03:00
|
|
|
int devfn;
|
2009-01-13 22:47:10 +03:00
|
|
|
int i;
|
2019-05-13 09:19:39 +03:00
|
|
|
int dom, busnr;
|
|
|
|
unsigned slot;
|
2009-01-13 22:47:10 +03:00
|
|
|
|
2018-03-02 12:30:50 +03:00
|
|
|
if (nd->model && !strcmp(nd->model, "virtio")) {
|
|
|
|
g_free(nd->model);
|
|
|
|
nd->model = g_strdup("virtio-net-pci");
|
|
|
|
}
|
|
|
|
|
2022-11-10 15:52:22 +03:00
|
|
|
pci_nic_models = qemu_get_nic_models(TYPE_PCI_DEVICE);
|
2018-03-02 12:30:50 +03:00
|
|
|
|
|
|
|
if (qemu_show_nic_models(nd->model, (const char **)pci_nic_models->pdata)) {
|
2015-04-28 13:50:07 +03:00
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
|
2018-03-02 12:30:50 +03:00
|
|
|
i = qemu_find_nic_model(nd, (const char **)pci_nic_models->pdata,
|
|
|
|
default_model);
|
2015-04-28 13:50:07 +03:00
|
|
|
if (i < 0) {
|
|
|
|
exit(1);
|
|
|
|
}
|
2009-09-25 05:53:51 +04:00
|
|
|
|
2019-05-13 09:19:39 +03:00
|
|
|
if (!rootbus) {
|
|
|
|
error_report("No primary PCI bus");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(!rootbus->parent_dev);
|
|
|
|
|
|
|
|
if (!devaddr) {
|
|
|
|
devfn = -1;
|
|
|
|
busnr = 0;
|
|
|
|
} else {
|
|
|
|
if (pci_parse_devaddr(devaddr, &dom, &busnr, &slot, NULL) < 0) {
|
|
|
|
error_report("Invalid PCI device address %s for device %s",
|
|
|
|
devaddr, nd->model);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dom != 0) {
|
|
|
|
error_report("No support for non-zero PCI domains");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
devfn = PCI_DEVFN(slot, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
bus = pci_find_bus_nr(rootbus, busnr);
|
2009-09-25 05:53:51 +04:00
|
|
|
if (!bus) {
|
2010-02-18 19:25:24 +03:00
|
|
|
error_report("Invalid PCI device address %s for device %s",
|
2018-03-02 12:30:50 +03:00
|
|
|
devaddr, nd->model);
|
2015-04-28 13:50:07 +03:00
|
|
|
exit(1);
|
2009-09-25 05:53:51 +04:00
|
|
|
}
|
|
|
|
|
pci: Convert uses of pci_create() etc. with Coccinelle
Replace
dev = pci_create(bus, type_name);
...
qdev_init_nofail(dev);
by
dev = pci_new(type_name);
...
pci_realize_and_unref(dev, bus, &error_fatal);
and similarly for pci_create_multifunction().
Recent commit "qdev: New qdev_new(), qdev_realize(), etc." explains
why.
Coccinelle script:
@@
expression dev, bus, expr;
expression list args;
@@
- dev = pci_create(bus, args);
+ dev = pci_new(args);
... when != dev = expr
- qdev_init_nofail(&dev->qdev);
+ pci_realize_and_unref(dev, bus, &error_fatal);
@@
expression dev, bus, expr;
expression list args;
expression d;
@@
- dev = pci_create(bus, args);
+ dev = pci_new(args);
(
d = &dev->qdev;
|
d = DEVICE(dev);
)
... when != dev = expr
- qdev_init_nofail(d);
+ pci_realize_and_unref(dev, bus, &error_fatal);
@@
expression dev, bus, expr;
expression list args;
@@
- dev = pci_create(bus, args);
+ dev = pci_new(args);
... when != dev = expr
- qdev_init_nofail(DEVICE(dev));
+ pci_realize_and_unref(dev, bus, &error_fatal);
@@
expression dev, bus, expr;
expression list args;
@@
- dev = DEVICE(pci_create(bus, args));
+ PCIDevice *pci_dev; // TODO move
+ pci_dev = pci_new(args);
+ dev = DEVICE(pci_dev);
... when != dev = expr
- qdev_init_nofail(dev);
+ pci_realize_and_unref(pci_dev, bus, &error_fatal);
@@
expression dev, bus, expr;
expression list args;
@@
- dev = pci_create_multifunction(bus, args);
+ dev = pci_new_multifunction(args);
... when != dev = expr
- qdev_init_nofail(&dev->qdev);
+ pci_realize_and_unref(dev, bus, &error_fatal);
@@
expression bus, expr;
expression list args;
identifier dev;
@@
- PCIDevice *dev = pci_create_multifunction(bus, args);
+ PCIDevice *dev = pci_new_multifunction(args);
... when != dev = expr
- qdev_init_nofail(&dev->qdev);
+ pci_realize_and_unref(dev, bus, &error_fatal);
@@
expression dev, bus, expr;
expression list args;
@@
- dev = pci_create_multifunction(bus, args);
+ dev = pci_new_multifunction(args);
... when != dev = expr
- qdev_init_nofail(DEVICE(dev));
+ pci_realize_and_unref(dev, bus, &error_fatal);
Missing #include "qapi/error.h" added manually, whitespace changes
minimized manually, @pci_dev declarations moved manually.
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20200610053247.1583243-16-armbru@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
2020-06-10 08:32:04 +03:00
|
|
|
pci_dev = pci_new(devfn, nd->model);
|
2009-09-25 05:53:50 +04:00
|
|
|
dev = &pci_dev->qdev;
|
2009-10-21 17:25:41 +04:00
|
|
|
qdev_set_nic_properties(dev, nd);
|
pci: Convert uses of pci_create() etc. with Coccinelle
Replace
dev = pci_create(bus, type_name);
...
qdev_init_nofail(dev);
by
dev = pci_new(type_name);
...
pci_realize_and_unref(dev, bus, &error_fatal);
and similarly for pci_create_multifunction().
Recent commit "qdev: New qdev_new(), qdev_realize(), etc." explains
why.
Coccinelle script:
@@
expression dev, bus, expr;
expression list args;
@@
- dev = pci_create(bus, args);
+ dev = pci_new(args);
... when != dev = expr
- qdev_init_nofail(&dev->qdev);
+ pci_realize_and_unref(dev, bus, &error_fatal);
@@
expression dev, bus, expr;
expression list args;
expression d;
@@
- dev = pci_create(bus, args);
+ dev = pci_new(args);
(
d = &dev->qdev;
|
d = DEVICE(dev);
)
... when != dev = expr
- qdev_init_nofail(d);
+ pci_realize_and_unref(dev, bus, &error_fatal);
@@
expression dev, bus, expr;
expression list args;
@@
- dev = pci_create(bus, args);
+ dev = pci_new(args);
... when != dev = expr
- qdev_init_nofail(DEVICE(dev));
+ pci_realize_and_unref(dev, bus, &error_fatal);
@@
expression dev, bus, expr;
expression list args;
@@
- dev = DEVICE(pci_create(bus, args));
+ PCIDevice *pci_dev; // TODO move
+ pci_dev = pci_new(args);
+ dev = DEVICE(pci_dev);
... when != dev = expr
- qdev_init_nofail(dev);
+ pci_realize_and_unref(pci_dev, bus, &error_fatal);
@@
expression dev, bus, expr;
expression list args;
@@
- dev = pci_create_multifunction(bus, args);
+ dev = pci_new_multifunction(args);
... when != dev = expr
- qdev_init_nofail(&dev->qdev);
+ pci_realize_and_unref(dev, bus, &error_fatal);
@@
expression bus, expr;
expression list args;
identifier dev;
@@
- PCIDevice *dev = pci_create_multifunction(bus, args);
+ PCIDevice *dev = pci_new_multifunction(args);
... when != dev = expr
- qdev_init_nofail(&dev->qdev);
+ pci_realize_and_unref(dev, bus, &error_fatal);
@@
expression dev, bus, expr;
expression list args;
@@
- dev = pci_create_multifunction(bus, args);
+ dev = pci_new_multifunction(args);
... when != dev = expr
- qdev_init_nofail(DEVICE(dev));
+ pci_realize_and_unref(dev, bus, &error_fatal);
Missing #include "qapi/error.h" added manually, whitespace changes
minimized manually, @pci_dev declarations moved manually.
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20200610053247.1583243-16-armbru@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
2020-06-10 08:32:04 +03:00
|
|
|
pci_realize_and_unref(pci_dev, bus, &error_fatal);
|
2018-03-02 12:30:50 +03:00
|
|
|
g_ptr_array_free(pci_nic_models, true);
|
2015-04-28 13:50:07 +03:00
|
|
|
return pci_dev;
|
2009-09-25 05:53:51 +04:00
|
|
|
}
|
|
|
|
|
2012-09-08 13:49:24 +04:00
|
|
|
PCIDevice *pci_vga_init(PCIBus *bus)
|
|
|
|
{
|
2022-05-01 15:25:05 +03:00
|
|
|
vga_interface_created = true;
|
2012-09-08 13:49:24 +04:00
|
|
|
switch (vga_interface_type) {
|
|
|
|
case VGA_CIRRUS:
|
|
|
|
return pci_create_simple(bus, -1, "cirrus-vga");
|
|
|
|
case VGA_QXL:
|
|
|
|
return pci_create_simple(bus, -1, "qxl-vga");
|
|
|
|
case VGA_STD:
|
|
|
|
return pci_create_simple(bus, -1, "VGA");
|
|
|
|
case VGA_VMWARE:
|
|
|
|
return pci_create_simple(bus, -1, "vmware-svga");
|
2014-09-10 16:28:48 +04:00
|
|
|
case VGA_VIRTIO:
|
|
|
|
return pci_create_simple(bus, -1, "virtio-vga");
|
2012-09-08 13:49:24 +04:00
|
|
|
case VGA_NONE:
|
|
|
|
default: /* Other non-PCI types. Checking for unsupported types is already
|
|
|
|
done in vl.c. */
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-11-24 08:23:25 +03:00
|
|
|
/* Whether a given bus number is in range of the secondary
|
|
|
|
* bus of the given bridge device. */
|
|
|
|
static bool pci_secondary_bus_in_range(PCIDevice *dev, int bus_num)
|
|
|
|
{
|
|
|
|
return !(pci_get_word(dev->config + PCI_BRIDGE_CONTROL) &
|
|
|
|
PCI_BRIDGE_CTL_BUS_RESET) /* Don't walk the bus if it's reset. */ &&
|
2015-06-02 14:23:00 +03:00
|
|
|
dev->config[PCI_SECONDARY_BUS] <= bus_num &&
|
2010-11-24 08:23:25 +03:00
|
|
|
bus_num <= dev->config[PCI_SUBORDINATE_BUS];
|
|
|
|
}
|
|
|
|
|
2015-06-02 14:23:00 +03:00
|
|
|
/* Whether a given bus number is in a range of a root bus */
|
|
|
|
static bool pci_root_bus_in_range(PCIBus *bus, int bus_num)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
|
|
|
|
PCIDevice *dev = bus->devices[i];
|
|
|
|
|
2022-11-29 13:13:41 +03:00
|
|
|
if (dev && IS_PCI_BRIDGE(dev)) {
|
2015-06-02 14:23:00 +03:00
|
|
|
if (pci_secondary_bus_in_range(dev, bus_num)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2022-12-01 15:11:22 +03:00
|
|
|
PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num)
|
2009-02-11 18:19:46 +03:00
|
|
|
{
|
2010-04-12 06:58:59 +04:00
|
|
|
PCIBus *sec;
|
2009-02-11 18:19:46 +03:00
|
|
|
|
2010-04-12 06:58:59 +04:00
|
|
|
if (!bus) {
|
2009-10-30 15:21:13 +03:00
|
|
|
return NULL;
|
2010-04-12 06:58:59 +04:00
|
|
|
}
|
2009-02-11 18:19:46 +03:00
|
|
|
|
2009-10-30 15:21:13 +03:00
|
|
|
if (pci_bus_num(bus) == bus_num) {
|
|
|
|
return bus;
|
|
|
|
}
|
|
|
|
|
2010-11-24 08:23:25 +03:00
|
|
|
/* Consider all bus numbers in range for the host pci bridge. */
|
2013-03-15 02:01:29 +04:00
|
|
|
if (!pci_bus_is_root(bus) &&
|
2010-11-24 08:23:25 +03:00
|
|
|
!pci_secondary_bus_in_range(bus->parent_dev, bus_num)) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2009-10-30 15:21:13 +03:00
|
|
|
/* try child bus */
|
2010-11-24 08:23:25 +03:00
|
|
|
for (; bus; bus = sec) {
|
|
|
|
QLIST_FOREACH(sec, &bus->child, sibling) {
|
2015-06-02 14:23:00 +03:00
|
|
|
if (pci_bus_num(sec) == bus_num) {
|
2010-11-24 08:23:25 +03:00
|
|
|
return sec;
|
|
|
|
}
|
2015-06-02 14:23:00 +03:00
|
|
|
/* PXB buses assumed to be children of bus 0 */
|
|
|
|
if (pci_bus_is_root(sec)) {
|
|
|
|
if (pci_root_bus_in_range(sec, bus_num)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (pci_secondary_bus_in_range(sec->parent_dev, bus_num)) {
|
|
|
|
break;
|
|
|
|
}
|
2010-02-06 12:20:13 +03:00
|
|
|
}
|
2009-10-30 15:21:13 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
2009-02-11 18:19:46 +03:00
|
|
|
}
|
|
|
|
|
2021-10-28 07:31:25 +03:00
|
|
|
void pci_for_each_bus_depth_first(PCIBus *bus, pci_bus_ret_fn begin,
|
|
|
|
pci_bus_fn end, void *parent_state)
|
2013-10-14 19:01:07 +04:00
|
|
|
{
|
|
|
|
PCIBus *sec;
|
|
|
|
void *state;
|
|
|
|
|
|
|
|
if (!bus) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (begin) {
|
|
|
|
state = begin(bus, parent_state);
|
|
|
|
} else {
|
|
|
|
state = parent_state;
|
|
|
|
}
|
|
|
|
|
|
|
|
QLIST_FOREACH(sec, &bus->child, sibling) {
|
|
|
|
pci_for_each_bus_depth_first(sec, begin, end, state);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (end) {
|
|
|
|
end(bus, state);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-01-27 09:56:36 +03:00
|
|
|
PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn)
|
2009-02-11 18:19:46 +03:00
|
|
|
{
|
2012-02-19 20:16:02 +04:00
|
|
|
bus = pci_find_bus_nr(bus, bus_num);
|
2009-02-11 18:19:46 +03:00
|
|
|
|
|
|
|
if (!bus)
|
|
|
|
return NULL;
|
|
|
|
|
2011-01-27 09:56:36 +03:00
|
|
|
return bus->devices[devfn];
|
2009-02-11 18:19:46 +03:00
|
|
|
}
|
|
|
|
|
2023-03-02 19:15:38 +03:00
|
|
|
#define ONBOARD_INDEX_MAX (16 * 1024 - 1)
|
|
|
|
|
2015-01-19 17:52:28 +03:00
|
|
|
static void pci_qdev_realize(DeviceState *qdev, Error **errp)
|
2009-05-15 01:35:07 +04:00
|
|
|
{
|
|
|
|
PCIDevice *pci_dev = (PCIDevice *)qdev;
|
2011-12-04 22:22:06 +04:00
|
|
|
PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
|
2018-01-16 15:34:56 +03:00
|
|
|
ObjectClass *klass = OBJECT_CLASS(pc);
|
2015-01-19 17:52:28 +03:00
|
|
|
Error *local_err = NULL;
|
2010-10-20 01:08:21 +04:00
|
|
|
bool is_default_rom;
|
2019-10-29 14:48:56 +03:00
|
|
|
uint16_t class_id;
|
2009-05-15 01:35:07 +04:00
|
|
|
|
2023-03-02 19:15:38 +03:00
|
|
|
/*
|
|
|
|
* capped by systemd (see: udev-builtin-net_id.c)
|
|
|
|
* as it's the only known user honor it to avoid users
|
|
|
|
* misconfigure QEMU and then wonder why acpi-index doesn't work
|
|
|
|
*/
|
|
|
|
if (pci_dev->acpi_index > ONBOARD_INDEX_MAX) {
|
|
|
|
error_setg(errp, "acpi-index should be less or equal to %u",
|
|
|
|
ONBOARD_INDEX_MAX);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* make sure that acpi-index is unique across all present PCI devices
|
|
|
|
*/
|
|
|
|
if (pci_dev->acpi_index) {
|
|
|
|
GSequence *used_indexes = pci_acpi_index_list();
|
|
|
|
|
|
|
|
if (g_sequence_lookup(used_indexes,
|
|
|
|
GINT_TO_POINTER(pci_dev->acpi_index),
|
|
|
|
g_cmp_uint32, NULL)) {
|
|
|
|
error_setg(errp, "a PCI device with acpi-index = %" PRIu32
|
|
|
|
" already exist", pci_dev->acpi_index);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
g_sequence_insert_sorted(used_indexes,
|
|
|
|
GINT_TO_POINTER(pci_dev->acpi_index),
|
|
|
|
g_cmp_uint32, NULL);
|
|
|
|
}
|
|
|
|
|
2021-02-03 16:18:28 +03:00
|
|
|
if (pci_dev->romsize != -1 && !is_power_of_2(pci_dev->romsize)) {
|
|
|
|
error_setg(errp, "ROM size %u is not a power of two", pci_dev->romsize);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-01-16 15:34:56 +03:00
|
|
|
/* initialize cap_present for pci_is_express() and pci_config_size(),
|
|
|
|
* Note that hybrid PCIs are not set automatically and need to manage
|
|
|
|
* QEMU_PCI_CAP_EXPRESS manually */
|
|
|
|
if (object_class_dynamic_cast(klass, INTERFACE_PCIE_DEVICE) &&
|
|
|
|
!object_class_dynamic_cast(klass, INTERFACE_CONVENTIONAL_PCI_DEVICE)) {
|
2009-10-30 15:21:18 +03:00
|
|
|
pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
|
|
|
|
}
|
|
|
|
|
2022-04-29 17:40:26 +03:00
|
|
|
if (object_class_dynamic_cast(klass, INTERFACE_CXL_DEVICE)) {
|
|
|
|
pci_dev->cap_present |= QEMU_PCIE_CAP_CXL;
|
|
|
|
}
|
|
|
|
|
2017-11-29 11:46:27 +03:00
|
|
|
pci_dev = do_pci_register_device(pci_dev,
|
2011-12-09 21:06:57 +04:00
|
|
|
object_get_typename(OBJECT(qdev)),
|
2015-01-19 17:52:28 +03:00
|
|
|
pci_dev->devfn, errp);
|
2009-12-10 13:11:06 +03:00
|
|
|
if (pci_dev == NULL)
|
2015-01-19 17:52:28 +03:00
|
|
|
return;
|
2014-02-05 19:36:48 +04:00
|
|
|
|
2015-01-19 17:52:29 +03:00
|
|
|
if (pc->realize) {
|
|
|
|
pc->realize(pci_dev, &local_err);
|
|
|
|
if (local_err) {
|
|
|
|
error_propagate(errp, local_err);
|
2011-06-15 02:37:47 +04:00
|
|
|
do_pci_unregister_device(pci_dev);
|
2015-01-19 17:52:28 +03:00
|
|
|
return;
|
2011-06-15 02:37:47 +04:00
|
|
|
}
|
2010-05-11 14:44:21 +04:00
|
|
|
}
|
2009-12-18 14:01:08 +03:00
|
|
|
|
2023-07-05 14:59:23 +03:00
|
|
|
/*
|
|
|
|
* A PCIe Downstream Port that do not have ARI Forwarding enabled must
|
|
|
|
* associate only Device 0 with the device attached to the bus
|
|
|
|
* representing the Link from the Port (PCIe base spec rev 4.0 ver 0.3,
|
|
|
|
* sec 7.3.1).
|
|
|
|
* With ARI, PCI_SLOT() can return non-zero value as the traditional
|
|
|
|
* 5-bit Device Number and 3-bit Function Number fields in its associated
|
|
|
|
* Routing IDs, Requester IDs and Completer IDs are interpreted as a
|
|
|
|
* single 8-bit Function Number. Hence, ignore ARI capable devices.
|
|
|
|
*/
|
|
|
|
if (pci_is_express(pci_dev) &&
|
|
|
|
!pcie_find_capability(pci_dev, PCI_EXT_CAP_ID_ARI) &&
|
|
|
|
pcie_has_upstream_port(pci_dev) &&
|
|
|
|
PCI_SLOT(pci_dev->devfn)) {
|
|
|
|
warn_report("PCI: slot %d is not valid for %s,"
|
|
|
|
" parent device only allows plugging into slot 0.",
|
|
|
|
PCI_SLOT(pci_dev->devfn), pci_dev->name);
|
|
|
|
}
|
|
|
|
|
2019-10-29 14:48:56 +03:00
|
|
|
if (pci_dev->failover_pair_id) {
|
|
|
|
if (!pci_bus_is_express(pci_get_bus(pci_dev))) {
|
|
|
|
error_setg(errp, "failover primary device must be on "
|
|
|
|
"PCIExpress bus");
|
qdev: Unrealize must not fail
Devices may have component devices and buses.
Device realization may fail. Realization is recursive: a device's
realize() method realizes its components, and device_set_realized()
realizes its buses (which should in turn realize the devices on that
bus, except bus_set_realized() doesn't implement that, yet).
When realization of a component or bus fails, we need to roll back:
unrealize everything we realized so far. If any of these unrealizes
failed, the device would be left in an inconsistent state. Must not
happen.
device_set_realized() lets it happen: it ignores errors in the roll
back code starting at label child_realize_fail.
Since realization is recursive, unrealization must be recursive, too.
But how could a partly failed unrealize be rolled back? We'd have to
re-realize, which can fail. This design is fundamentally broken.
device_set_realized() does not roll back at all. Instead, it keeps
unrealizing, ignoring further errors.
It can screw up even for a device with no buses: if the lone
dc->unrealize() fails, it still unregisters vmstate, and calls
listeners' unrealize() callback.
bus_set_realized() does not roll back either. Instead, it stops
unrealizing.
Fortunately, no unrealize method can fail, as we'll see below.
To fix the design error, drop parameter @errp from all the unrealize
methods.
Any unrealize method that uses @errp now needs an update. This leads
us to unrealize() methods that can fail. Merely passing it to another
unrealize method cannot cause failure, though. Here are the ones that
do other things with @errp:
* virtio_serial_device_unrealize()
Fails when qbus_set_hotplug_handler() fails, but still does all the
other work. On failure, the device would stay realized with its
resources completely gone. Oops. Can't happen, because
qbus_set_hotplug_handler() can't actually fail here. Pass
&error_abort to qbus_set_hotplug_handler() instead.
* hw/ppc/spapr_drc.c's unrealize()
Fails when object_property_del() fails, but all the other work is
already done. On failure, the device would stay realized with its
vmstate registration gone. Oops. Can't happen, because
object_property_del() can't actually fail here. Pass &error_abort
to object_property_del() instead.
* spapr_phb_unrealize()
Fails and bails out when remove_drcs() fails, but other work is
already done. On failure, the device would stay realized with some
of its resources gone. Oops. remove_drcs() fails only when
chassis_from_bus()'s object_property_get_uint() fails, and it can't
here. Pass &error_abort to remove_drcs() instead.
Therefore, no unrealize method can fail before this patch.
device_set_realized()'s recursive unrealization via bus uses
object_property_set_bool(). Can't drop @errp there, so pass
&error_abort.
We similarly unrealize with object_property_set_bool() elsewhere,
always ignoring errors. Pass &error_abort instead.
Several unrealize methods no longer handle errors from other unrealize
methods: virtio_9p_device_unrealize(),
virtio_input_device_unrealize(), scsi_qdev_unrealize(), ...
Much of the deleted error handling looks wrong anyway.
One unrealize methods no longer ignore such errors:
usb_ehci_pci_exit().
Several realize methods no longer ignore errors when rolling back:
v9fs_device_realize_common(), pci_qdev_unrealize(),
spapr_phb_realize(), usb_qdev_realize(), vfio_ccw_realize(),
virtio_device_realize().
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20200505152926.18877-17-armbru@redhat.com>
2020-05-05 18:29:24 +03:00
|
|
|
pci_qdev_unrealize(DEVICE(pci_dev));
|
2019-10-29 14:48:56 +03:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
class_id = pci_get_word(pci_dev->config + PCI_CLASS_DEVICE);
|
|
|
|
if (class_id != PCI_CLASS_NETWORK_ETHERNET) {
|
|
|
|
error_setg(errp, "failover primary device is not an "
|
|
|
|
"Ethernet device");
|
qdev: Unrealize must not fail
Devices may have component devices and buses.
Device realization may fail. Realization is recursive: a device's
realize() method realizes its components, and device_set_realized()
realizes its buses (which should in turn realize the devices on that
bus, except bus_set_realized() doesn't implement that, yet).
When realization of a component or bus fails, we need to roll back:
unrealize everything we realized so far. If any of these unrealizes
failed, the device would be left in an inconsistent state. Must not
happen.
device_set_realized() lets it happen: it ignores errors in the roll
back code starting at label child_realize_fail.
Since realization is recursive, unrealization must be recursive, too.
But how could a partly failed unrealize be rolled back? We'd have to
re-realize, which can fail. This design is fundamentally broken.
device_set_realized() does not roll back at all. Instead, it keeps
unrealizing, ignoring further errors.
It can screw up even for a device with no buses: if the lone
dc->unrealize() fails, it still unregisters vmstate, and calls
listeners' unrealize() callback.
bus_set_realized() does not roll back either. Instead, it stops
unrealizing.
Fortunately, no unrealize method can fail, as we'll see below.
To fix the design error, drop parameter @errp from all the unrealize
methods.
Any unrealize method that uses @errp now needs an update. This leads
us to unrealize() methods that can fail. Merely passing it to another
unrealize method cannot cause failure, though. Here are the ones that
do other things with @errp:
* virtio_serial_device_unrealize()
Fails when qbus_set_hotplug_handler() fails, but still does all the
other work. On failure, the device would stay realized with its
resources completely gone. Oops. Can't happen, because
qbus_set_hotplug_handler() can't actually fail here. Pass
&error_abort to qbus_set_hotplug_handler() instead.
* hw/ppc/spapr_drc.c's unrealize()
Fails when object_property_del() fails, but all the other work is
already done. On failure, the device would stay realized with its
vmstate registration gone. Oops. Can't happen, because
object_property_del() can't actually fail here. Pass &error_abort
to object_property_del() instead.
* spapr_phb_unrealize()
Fails and bails out when remove_drcs() fails, but other work is
already done. On failure, the device would stay realized with some
of its resources gone. Oops. remove_drcs() fails only when
chassis_from_bus()'s object_property_get_uint() fails, and it can't
here. Pass &error_abort to remove_drcs() instead.
Therefore, no unrealize method can fail before this patch.
device_set_realized()'s recursive unrealization via bus uses
object_property_set_bool(). Can't drop @errp there, so pass
&error_abort.
We similarly unrealize with object_property_set_bool() elsewhere,
always ignoring errors. Pass &error_abort instead.
Several unrealize methods no longer handle errors from other unrealize
methods: virtio_9p_device_unrealize(),
virtio_input_device_unrealize(), scsi_qdev_unrealize(), ...
Much of the deleted error handling looks wrong anyway.
One unrealize methods no longer ignore such errors:
usb_ehci_pci_exit().
Several realize methods no longer ignore errors when rolling back:
v9fs_device_realize_common(), pci_qdev_unrealize(),
spapr_phb_realize(), usb_qdev_realize(), vfio_ccw_realize(),
virtio_device_realize().
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20200505152926.18877-17-armbru@redhat.com>
2020-05-05 18:29:24 +03:00
|
|
|
pci_qdev_unrealize(DEVICE(pci_dev));
|
2019-10-29 14:48:56 +03:00
|
|
|
return;
|
|
|
|
}
|
2021-02-12 16:52:47 +03:00
|
|
|
if ((pci_dev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION)
|
|
|
|
|| (PCI_FUNC(pci_dev->devfn) != 0)) {
|
2019-10-29 14:48:56 +03:00
|
|
|
error_setg(errp, "failover: primary device must be in its own "
|
|
|
|
"PCI slot");
|
qdev: Unrealize must not fail
Devices may have component devices and buses.
Device realization may fail. Realization is recursive: a device's
realize() method realizes its components, and device_set_realized()
realizes its buses (which should in turn realize the devices on that
bus, except bus_set_realized() doesn't implement that, yet).
When realization of a component or bus fails, we need to roll back:
unrealize everything we realized so far. If any of these unrealizes
failed, the device would be left in an inconsistent state. Must not
happen.
device_set_realized() lets it happen: it ignores errors in the roll
back code starting at label child_realize_fail.
Since realization is recursive, unrealization must be recursive, too.
But how could a partly failed unrealize be rolled back? We'd have to
re-realize, which can fail. This design is fundamentally broken.
device_set_realized() does not roll back at all. Instead, it keeps
unrealizing, ignoring further errors.
It can screw up even for a device with no buses: if the lone
dc->unrealize() fails, it still unregisters vmstate, and calls
listeners' unrealize() callback.
bus_set_realized() does not roll back either. Instead, it stops
unrealizing.
Fortunately, no unrealize method can fail, as we'll see below.
To fix the design error, drop parameter @errp from all the unrealize
methods.
Any unrealize method that uses @errp now needs an update. This leads
us to unrealize() methods that can fail. Merely passing it to another
unrealize method cannot cause failure, though. Here are the ones that
do other things with @errp:
* virtio_serial_device_unrealize()
Fails when qbus_set_hotplug_handler() fails, but still does all the
other work. On failure, the device would stay realized with its
resources completely gone. Oops. Can't happen, because
qbus_set_hotplug_handler() can't actually fail here. Pass
&error_abort to qbus_set_hotplug_handler() instead.
* hw/ppc/spapr_drc.c's unrealize()
Fails when object_property_del() fails, but all the other work is
already done. On failure, the device would stay realized with its
vmstate registration gone. Oops. Can't happen, because
object_property_del() can't actually fail here. Pass &error_abort
to object_property_del() instead.
* spapr_phb_unrealize()
Fails and bails out when remove_drcs() fails, but other work is
already done. On failure, the device would stay realized with some
of its resources gone. Oops. remove_drcs() fails only when
chassis_from_bus()'s object_property_get_uint() fails, and it can't
here. Pass &error_abort to remove_drcs() instead.
Therefore, no unrealize method can fail before this patch.
device_set_realized()'s recursive unrealization via bus uses
object_property_set_bool(). Can't drop @errp there, so pass
&error_abort.
We similarly unrealize with object_property_set_bool() elsewhere,
always ignoring errors. Pass &error_abort instead.
Several unrealize methods no longer handle errors from other unrealize
methods: virtio_9p_device_unrealize(),
virtio_input_device_unrealize(), scsi_qdev_unrealize(), ...
Much of the deleted error handling looks wrong anyway.
One unrealize methods no longer ignore such errors:
usb_ehci_pci_exit().
Several realize methods no longer ignore errors when rolling back:
v9fs_device_realize_common(), pci_qdev_unrealize(),
spapr_phb_realize(), usb_qdev_realize(), vfio_ccw_realize(),
virtio_device_realize().
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20200505152926.18877-17-armbru@redhat.com>
2020-05-05 18:29:24 +03:00
|
|
|
pci_qdev_unrealize(DEVICE(pci_dev));
|
2019-10-29 14:48:56 +03:00
|
|
|
return;
|
|
|
|
}
|
2019-10-29 14:49:01 +03:00
|
|
|
qdev->allow_unplug_during_migration = true;
|
2019-10-29 14:48:56 +03:00
|
|
|
}
|
|
|
|
|
2009-12-18 14:01:08 +03:00
|
|
|
/* rom loading */
|
2010-10-20 01:08:21 +04:00
|
|
|
is_default_rom = false;
|
2011-12-04 22:22:06 +04:00
|
|
|
if (pci_dev->romfile == NULL && pc->romfile != NULL) {
|
|
|
|
pci_dev->romfile = g_strdup(pc->romfile);
|
2010-10-20 01:08:21 +04:00
|
|
|
is_default_rom = true;
|
|
|
|
}
|
2014-10-27 20:34:41 +03:00
|
|
|
|
2015-01-19 17:52:28 +03:00
|
|
|
pci_add_option_rom(pci_dev, is_default_rom, &local_err);
|
|
|
|
if (local_err) {
|
|
|
|
error_propagate(errp, local_err);
|
qdev: Unrealize must not fail
Devices may have component devices and buses.
Device realization may fail. Realization is recursive: a device's
realize() method realizes its components, and device_set_realized()
realizes its buses (which should in turn realize the devices on that
bus, except bus_set_realized() doesn't implement that, yet).
When realization of a component or bus fails, we need to roll back:
unrealize everything we realized so far. If any of these unrealizes
failed, the device would be left in an inconsistent state. Must not
happen.
device_set_realized() lets it happen: it ignores errors in the roll
back code starting at label child_realize_fail.
Since realization is recursive, unrealization must be recursive, too.
But how could a partly failed unrealize be rolled back? We'd have to
re-realize, which can fail. This design is fundamentally broken.
device_set_realized() does not roll back at all. Instead, it keeps
unrealizing, ignoring further errors.
It can screw up even for a device with no buses: if the lone
dc->unrealize() fails, it still unregisters vmstate, and calls
listeners' unrealize() callback.
bus_set_realized() does not roll back either. Instead, it stops
unrealizing.
Fortunately, no unrealize method can fail, as we'll see below.
To fix the design error, drop parameter @errp from all the unrealize
methods.
Any unrealize method that uses @errp now needs an update. This leads
us to unrealize() methods that can fail. Merely passing it to another
unrealize method cannot cause failure, though. Here are the ones that
do other things with @errp:
* virtio_serial_device_unrealize()
Fails when qbus_set_hotplug_handler() fails, but still does all the
other work. On failure, the device would stay realized with its
resources completely gone. Oops. Can't happen, because
qbus_set_hotplug_handler() can't actually fail here. Pass
&error_abort to qbus_set_hotplug_handler() instead.
* hw/ppc/spapr_drc.c's unrealize()
Fails when object_property_del() fails, but all the other work is
already done. On failure, the device would stay realized with its
vmstate registration gone. Oops. Can't happen, because
object_property_del() can't actually fail here. Pass &error_abort
to object_property_del() instead.
* spapr_phb_unrealize()
Fails and bails out when remove_drcs() fails, but other work is
already done. On failure, the device would stay realized with some
of its resources gone. Oops. remove_drcs() fails only when
chassis_from_bus()'s object_property_get_uint() fails, and it can't
here. Pass &error_abort to remove_drcs() instead.
Therefore, no unrealize method can fail before this patch.
device_set_realized()'s recursive unrealization via bus uses
object_property_set_bool(). Can't drop @errp there, so pass
&error_abort.
We similarly unrealize with object_property_set_bool() elsewhere,
always ignoring errors. Pass &error_abort instead.
Several unrealize methods no longer handle errors from other unrealize
methods: virtio_9p_device_unrealize(),
virtio_input_device_unrealize(), scsi_qdev_unrealize(), ...
Much of the deleted error handling looks wrong anyway.
One unrealize methods no longer ignore such errors:
usb_ehci_pci_exit().
Several realize methods no longer ignore errors when rolling back:
v9fs_device_realize_common(), pci_qdev_unrealize(),
spapr_phb_realize(), usb_qdev_realize(), vfio_ccw_realize(),
virtio_device_realize().
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20200505152926.18877-17-armbru@redhat.com>
2020-05-05 18:29:24 +03:00
|
|
|
pci_qdev_unrealize(DEVICE(pci_dev));
|
2015-01-19 17:52:28 +03:00
|
|
|
return;
|
2014-10-27 20:34:41 +03:00
|
|
|
}
|
2021-11-11 16:08:54 +03:00
|
|
|
|
|
|
|
pci_set_power(pci_dev, true);
|
2022-06-13 23:26:33 +03:00
|
|
|
|
|
|
|
pci_dev->msi_trigger = pci_msi_trigger;
|
2009-09-25 23:42:44 +04:00
|
|
|
}
|
|
|
|
|
2023-03-04 14:40:43 +03:00
|
|
|
static PCIDevice *pci_new_internal(int devfn, bool multifunction,
|
|
|
|
const char *name)
|
2020-06-10 08:32:02 +03:00
|
|
|
{
|
|
|
|
DeviceState *dev;
|
|
|
|
|
|
|
|
dev = qdev_new(name);
|
|
|
|
qdev_prop_set_int32(dev, "addr", devfn);
|
|
|
|
qdev_prop_set_bit(dev, "multifunction", multifunction);
|
|
|
|
return PCI_DEVICE(dev);
|
|
|
|
}
|
|
|
|
|
2023-03-04 14:40:43 +03:00
|
|
|
PCIDevice *pci_new_multifunction(int devfn, const char *name)
|
|
|
|
{
|
|
|
|
return pci_new_internal(devfn, true, name);
|
|
|
|
}
|
|
|
|
|
2020-06-10 08:32:02 +03:00
|
|
|
PCIDevice *pci_new(int devfn, const char *name)
|
|
|
|
{
|
2023-03-04 14:40:43 +03:00
|
|
|
return pci_new_internal(devfn, false, name);
|
2020-06-10 08:32:02 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
bool pci_realize_and_unref(PCIDevice *dev, PCIBus *bus, Error **errp)
|
|
|
|
{
|
|
|
|
return qdev_realize_and_unref(&dev->qdev, &bus->qbus, errp);
|
|
|
|
}
|
|
|
|
|
2010-06-23 11:15:30 +04:00
|
|
|
PCIDevice *pci_create_simple_multifunction(PCIBus *bus, int devfn,
|
|
|
|
const char *name)
|
2009-09-15 23:23:25 +04:00
|
|
|
{
|
2023-03-04 14:40:43 +03:00
|
|
|
PCIDevice *dev = pci_new_multifunction(devfn, name);
|
pci: Convert uses of pci_create() etc. with Coccinelle
Replace
dev = pci_create(bus, type_name);
...
qdev_init_nofail(dev);
by
dev = pci_new(type_name);
...
pci_realize_and_unref(dev, bus, &error_fatal);
and similarly for pci_create_multifunction().
Recent commit "qdev: New qdev_new(), qdev_realize(), etc." explains
why.
Coccinelle script:
@@
expression dev, bus, expr;
expression list args;
@@
- dev = pci_create(bus, args);
+ dev = pci_new(args);
... when != dev = expr
- qdev_init_nofail(&dev->qdev);
+ pci_realize_and_unref(dev, bus, &error_fatal);
@@
expression dev, bus, expr;
expression list args;
expression d;
@@
- dev = pci_create(bus, args);
+ dev = pci_new(args);
(
d = &dev->qdev;
|
d = DEVICE(dev);
)
... when != dev = expr
- qdev_init_nofail(d);
+ pci_realize_and_unref(dev, bus, &error_fatal);
@@
expression dev, bus, expr;
expression list args;
@@
- dev = pci_create(bus, args);
+ dev = pci_new(args);
... when != dev = expr
- qdev_init_nofail(DEVICE(dev));
+ pci_realize_and_unref(dev, bus, &error_fatal);
@@
expression dev, bus, expr;
expression list args;
@@
- dev = DEVICE(pci_create(bus, args));
+ PCIDevice *pci_dev; // TODO move
+ pci_dev = pci_new(args);
+ dev = DEVICE(pci_dev);
... when != dev = expr
- qdev_init_nofail(dev);
+ pci_realize_and_unref(pci_dev, bus, &error_fatal);
@@
expression dev, bus, expr;
expression list args;
@@
- dev = pci_create_multifunction(bus, args);
+ dev = pci_new_multifunction(args);
... when != dev = expr
- qdev_init_nofail(&dev->qdev);
+ pci_realize_and_unref(dev, bus, &error_fatal);
@@
expression bus, expr;
expression list args;
identifier dev;
@@
- PCIDevice *dev = pci_create_multifunction(bus, args);
+ PCIDevice *dev = pci_new_multifunction(args);
... when != dev = expr
- qdev_init_nofail(&dev->qdev);
+ pci_realize_and_unref(dev, bus, &error_fatal);
@@
expression dev, bus, expr;
expression list args;
@@
- dev = pci_create_multifunction(bus, args);
+ dev = pci_new_multifunction(args);
... when != dev = expr
- qdev_init_nofail(DEVICE(dev));
+ pci_realize_and_unref(dev, bus, &error_fatal);
Missing #include "qapi/error.h" added manually, whitespace changes
minimized manually, @pci_dev declarations moved manually.
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20200610053247.1583243-16-armbru@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
2020-06-10 08:32:04 +03:00
|
|
|
pci_realize_and_unref(dev, bus, &error_fatal);
|
2009-09-15 23:23:25 +04:00
|
|
|
return dev;
|
2009-05-15 01:35:07 +04:00
|
|
|
}
|
2009-06-21 20:45:40 +04:00
|
|
|
|
2010-06-23 11:15:30 +04:00
|
|
|
PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
|
|
|
|
{
|
2023-03-04 14:40:42 +03:00
|
|
|
PCIDevice *dev = pci_new(devfn, name);
|
|
|
|
pci_realize_and_unref(dev, bus, &error_fatal);
|
|
|
|
return dev;
|
2010-06-23 11:15:30 +04:00
|
|
|
}
|
|
|
|
|
2012-10-20 00:43:28 +04:00
|
|
|
static uint8_t pci_find_space(PCIDevice *pdev, uint8_t size)
|
2009-06-21 20:45:40 +04:00
|
|
|
{
|
|
|
|
int offset = PCI_CONFIG_HEADER_SIZE;
|
|
|
|
int i;
|
2012-10-20 00:43:28 +04:00
|
|
|
for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i) {
|
2009-06-21 20:45:40 +04:00
|
|
|
if (pdev->used[i])
|
|
|
|
offset = i + 1;
|
|
|
|
else if (i - offset + 1 == size)
|
|
|
|
return offset;
|
2012-10-20 00:43:28 +04:00
|
|
|
}
|
2009-06-21 20:45:40 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint8_t pci_find_capability_list(PCIDevice *pdev, uint8_t cap_id,
|
|
|
|
uint8_t *prev_p)
|
|
|
|
{
|
|
|
|
uint8_t next, prev;
|
|
|
|
|
|
|
|
if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
for (prev = PCI_CAPABILITY_LIST; (next = pdev->config[prev]);
|
|
|
|
prev = next + PCI_CAP_LIST_NEXT)
|
|
|
|
if (pdev->config[next + PCI_CAP_LIST_ID] == cap_id)
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (prev_p)
|
|
|
|
*prev_p = prev;
|
|
|
|
return next;
|
|
|
|
}
|
|
|
|
|
2011-08-24 16:29:30 +04:00
|
|
|
static uint8_t pci_find_capability_at_offset(PCIDevice *pdev, uint8_t offset)
|
|
|
|
{
|
|
|
|
uint8_t next, prev, found = 0;
|
|
|
|
|
|
|
|
if (!(pdev->used[offset])) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST);
|
|
|
|
|
|
|
|
for (prev = PCI_CAPABILITY_LIST; (next = pdev->config[prev]);
|
|
|
|
prev = next + PCI_CAP_LIST_NEXT) {
|
|
|
|
if (next <= offset && next > found) {
|
|
|
|
found = next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return found;
|
|
|
|
}
|
|
|
|
|
2010-10-20 01:08:21 +04:00
|
|
|
/* Patch the PCI vendor and device ids in a PCI rom image if necessary.
|
|
|
|
This is needed for an option rom which is used for more than one device. */
|
2021-02-03 16:18:27 +03:00
|
|
|
static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size)
|
2010-10-20 01:08:21 +04:00
|
|
|
{
|
|
|
|
uint16_t vendor_id;
|
|
|
|
uint16_t device_id;
|
|
|
|
uint16_t rom_vendor_id;
|
|
|
|
uint16_t rom_device_id;
|
|
|
|
uint16_t rom_magic;
|
|
|
|
uint16_t pcir_offset;
|
|
|
|
uint8_t checksum;
|
|
|
|
|
|
|
|
/* Words in rom data are little endian (like in PCI configuration),
|
|
|
|
so they can be read / written with pci_get_word / pci_set_word. */
|
|
|
|
|
|
|
|
/* Only a valid rom will be patched. */
|
|
|
|
rom_magic = pci_get_word(ptr);
|
|
|
|
if (rom_magic != 0xaa55) {
|
|
|
|
PCI_DPRINTF("Bad ROM magic %04x\n", rom_magic);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
pcir_offset = pci_get_word(ptr + 0x18);
|
|
|
|
if (pcir_offset + 8 >= size || memcmp(ptr + pcir_offset, "PCIR", 4)) {
|
|
|
|
PCI_DPRINTF("Bad PCIR offset 0x%x or signature\n", pcir_offset);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID);
|
|
|
|
device_id = pci_get_word(pdev->config + PCI_DEVICE_ID);
|
|
|
|
rom_vendor_id = pci_get_word(ptr + pcir_offset + 4);
|
|
|
|
rom_device_id = pci_get_word(ptr + pcir_offset + 6);
|
|
|
|
|
|
|
|
PCI_DPRINTF("%s: ROM id %04x%04x / PCI id %04x%04x\n", pdev->romfile,
|
|
|
|
vendor_id, device_id, rom_vendor_id, rom_device_id);
|
|
|
|
|
|
|
|
checksum = ptr[6];
|
|
|
|
|
|
|
|
if (vendor_id != rom_vendor_id) {
|
|
|
|
/* Patch vendor id and checksum (at offset 6 for etherboot roms). */
|
|
|
|
checksum += (uint8_t)rom_vendor_id + (uint8_t)(rom_vendor_id >> 8);
|
|
|
|
checksum -= (uint8_t)vendor_id + (uint8_t)(vendor_id >> 8);
|
|
|
|
PCI_DPRINTF("ROM checksum %02x / %02x\n", ptr[6], checksum);
|
|
|
|
ptr[6] = checksum;
|
|
|
|
pci_set_word(ptr + pcir_offset + 4, vendor_id);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (device_id != rom_device_id) {
|
|
|
|
/* Patch device id and checksum (at offset 6 for etherboot roms). */
|
|
|
|
checksum += (uint8_t)rom_device_id + (uint8_t)(rom_device_id >> 8);
|
|
|
|
checksum -= (uint8_t)device_id + (uint8_t)(device_id >> 8);
|
|
|
|
PCI_DPRINTF("ROM checksum %02x / %02x\n", ptr[6], checksum);
|
|
|
|
ptr[6] = checksum;
|
|
|
|
pci_set_word(ptr + pcir_offset + 6, device_id);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-12-18 14:01:07 +03:00
|
|
|
/* Add an option rom for the device */
|
2015-01-19 17:52:28 +03:00
|
|
|
static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom,
|
|
|
|
Error **errp)
|
2009-12-18 14:01:07 +03:00
|
|
|
{
|
pci: ROM preallocation for incoming migration
On incoming migration we have the following sequence to load option
ROM:
1. On device realize we do normal load ROM from the file
2. Than, on incoming migration we rewrite ROM from the incoming RAM
block. If sizes mismatch we fail, like this:
Size mismatch: 0000:00:03.0/virtio-net-pci.rom: 0x40000 != 0x80000: Invalid argument
This is not ideal when we migrate to updated distribution: we have to
keep old ROM files in new distribution and be careful around romfile
property to load correct ROM file. Which is loaded actually just to
allocate the ROM with correct length.
Note, that romsize property doesn't really help: if we try to specify
it when default romfile is larger, it fails with something like:
romfile "efi-virtio.rom" (160768 bytes) is too large for ROM size 65536
Let's just ignore ROM file when romsize is specified and we are in
incoming migration state. In other words, we need only to preallocate
ROM of specified size, local ROM file is unrelated.
This way:
If romsize was specified on source, we just use same commandline as on
source, and migration will work independently of local ROM files on
target.
If romsize was not specified on source (and we have mismatching local
ROM file on target host), we have to specify romsize on target to match
source romsize. romfile parameter may be kept same as on source or may
be dropped, the file is not loaded anyway.
As a bonus we avoid extra reading from ROM file on target.
Note: when we don't have romsize parameter on source command line and
need it for target, it may be calculated as aligned up to power of two
size of ROM file on source (if we know, which file is it) or,
alternatively it may be retrieved from source QEMU by QMP qom-get
command, like
{ "execute": "qom-get",
"arguments": {
"path": "/machine/peripheral/CARD_ID/virtio-net-pci.rom[0]",
"property": "size" } }
Note: we have extra initialization of size variable to zero in
pci_add_option_rom to avoid false-positive
"error: ‘size’ may be used uninitialized"
Suggested-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20230522201740.88960-2-vsementsov@yandex-team.ru>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2023-05-22 23:17:40 +03:00
|
|
|
int64_t size = 0;
|
2023-05-15 15:52:28 +03:00
|
|
|
g_autofree char *path = NULL;
|
2010-06-25 21:09:35 +04:00
|
|
|
char name[32];
|
2011-12-09 20:51:49 +04:00
|
|
|
const VMStateDescription *vmsd;
|
2009-12-18 14:01:07 +03:00
|
|
|
|
pci: ROM preallocation for incoming migration
On incoming migration we have the following sequence to load option
ROM:
1. On device realize we do normal load ROM from the file
2. Than, on incoming migration we rewrite ROM from the incoming RAM
block. If sizes mismatch we fail, like this:
Size mismatch: 0000:00:03.0/virtio-net-pci.rom: 0x40000 != 0x80000: Invalid argument
This is not ideal when we migrate to updated distribution: we have to
keep old ROM files in new distribution and be careful around romfile
property to load correct ROM file. Which is loaded actually just to
allocate the ROM with correct length.
Note, that romsize property doesn't really help: if we try to specify
it when default romfile is larger, it fails with something like:
romfile "efi-virtio.rom" (160768 bytes) is too large for ROM size 65536
Let's just ignore ROM file when romsize is specified and we are in
incoming migration state. In other words, we need only to preallocate
ROM of specified size, local ROM file is unrelated.
This way:
If romsize was specified on source, we just use same commandline as on
source, and migration will work independently of local ROM files on
target.
If romsize was not specified on source (and we have mismatching local
ROM file on target host), we have to specify romsize on target to match
source romsize. romfile parameter may be kept same as on source or may
be dropped, the file is not loaded anyway.
As a bonus we avoid extra reading from ROM file on target.
Note: when we don't have romsize parameter on source command line and
need it for target, it may be calculated as aligned up to power of two
size of ROM file on source (if we know, which file is it) or,
alternatively it may be retrieved from source QEMU by QMP qom-get
command, like
{ "execute": "qom-get",
"arguments": {
"path": "/machine/peripheral/CARD_ID/virtio-net-pci.rom[0]",
"property": "size" } }
Note: we have extra initialization of size variable to zero in
pci_add_option_rom to avoid false-positive
"error: ‘size’ may be used uninitialized"
Suggested-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20230522201740.88960-2-vsementsov@yandex-team.ru>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2023-05-22 23:17:40 +03:00
|
|
|
/*
|
|
|
|
* In case of incoming migration ROM will come with migration stream, no
|
|
|
|
* reason to load the file. Neither we want to fail if local ROM file
|
|
|
|
* mismatches with specified romsize.
|
|
|
|
*/
|
|
|
|
bool load_file = !runstate_check(RUN_STATE_INMIGRATE);
|
|
|
|
|
2023-05-15 15:52:27 +03:00
|
|
|
if (!pdev->romfile || !strlen(pdev->romfile)) {
|
2015-01-19 17:52:28 +03:00
|
|
|
return;
|
2023-05-15 15:52:27 +03:00
|
|
|
}
|
2009-12-18 14:01:08 +03:00
|
|
|
|
2010-01-08 17:25:41 +03:00
|
|
|
if (!pdev->rom_bar) {
|
|
|
|
/*
|
|
|
|
* Load rom via fw_cfg instead of creating a rom bar,
|
|
|
|
* for 0.11 compatibility.
|
|
|
|
*/
|
|
|
|
int class = pci_get_word(pdev->config + PCI_CLASS_DEVICE);
|
2014-10-27 20:34:42 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Hot-plugged devices can't use the option ROM
|
|
|
|
* if the rom bar is disabled.
|
|
|
|
*/
|
|
|
|
if (DEVICE(pdev)->hotplugged) {
|
2015-01-19 17:52:28 +03:00
|
|
|
error_setg(errp, "Hot-plugged device without ROM bar"
|
|
|
|
" can't have an option ROM");
|
|
|
|
return;
|
2014-10-27 20:34:42 +03:00
|
|
|
}
|
|
|
|
|
2010-01-08 17:25:41 +03:00
|
|
|
if (class == 0x0300) {
|
|
|
|
rom_add_vga(pdev->romfile);
|
|
|
|
} else {
|
2010-12-08 14:35:07 +03:00
|
|
|
rom_add_option(pdev->romfile, -1);
|
2010-01-08 17:25:41 +03:00
|
|
|
}
|
2015-01-19 17:52:28 +03:00
|
|
|
return;
|
2010-01-08 17:25:41 +03:00
|
|
|
}
|
|
|
|
|
pci: ROM preallocation for incoming migration
On incoming migration we have the following sequence to load option
ROM:
1. On device realize we do normal load ROM from the file
2. Than, on incoming migration we rewrite ROM from the incoming RAM
block. If sizes mismatch we fail, like this:
Size mismatch: 0000:00:03.0/virtio-net-pci.rom: 0x40000 != 0x80000: Invalid argument
This is not ideal when we migrate to updated distribution: we have to
keep old ROM files in new distribution and be careful around romfile
property to load correct ROM file. Which is loaded actually just to
allocate the ROM with correct length.
Note, that romsize property doesn't really help: if we try to specify
it when default romfile is larger, it fails with something like:
romfile "efi-virtio.rom" (160768 bytes) is too large for ROM size 65536
Let's just ignore ROM file when romsize is specified and we are in
incoming migration state. In other words, we need only to preallocate
ROM of specified size, local ROM file is unrelated.
This way:
If romsize was specified on source, we just use same commandline as on
source, and migration will work independently of local ROM files on
target.
If romsize was not specified on source (and we have mismatching local
ROM file on target host), we have to specify romsize on target to match
source romsize. romfile parameter may be kept same as on source or may
be dropped, the file is not loaded anyway.
As a bonus we avoid extra reading from ROM file on target.
Note: when we don't have romsize parameter on source command line and
need it for target, it may be calculated as aligned up to power of two
size of ROM file on source (if we know, which file is it) or,
alternatively it may be retrieved from source QEMU by QMP qom-get
command, like
{ "execute": "qom-get",
"arguments": {
"path": "/machine/peripheral/CARD_ID/virtio-net-pci.rom[0]",
"property": "size" } }
Note: we have extra initialization of size variable to zero in
pci_add_option_rom to avoid false-positive
"error: ‘size’ may be used uninitialized"
Suggested-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20230522201740.88960-2-vsementsov@yandex-team.ru>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2023-05-22 23:17:40 +03:00
|
|
|
if (load_file || pdev->romsize == -1) {
|
|
|
|
path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile);
|
|
|
|
if (path == NULL) {
|
|
|
|
path = g_strdup(pdev->romfile);
|
|
|
|
}
|
2009-12-18 14:01:07 +03:00
|
|
|
|
pci: ROM preallocation for incoming migration
On incoming migration we have the following sequence to load option
ROM:
1. On device realize we do normal load ROM from the file
2. Than, on incoming migration we rewrite ROM from the incoming RAM
block. If sizes mismatch we fail, like this:
Size mismatch: 0000:00:03.0/virtio-net-pci.rom: 0x40000 != 0x80000: Invalid argument
This is not ideal when we migrate to updated distribution: we have to
keep old ROM files in new distribution and be careful around romfile
property to load correct ROM file. Which is loaded actually just to
allocate the ROM with correct length.
Note, that romsize property doesn't really help: if we try to specify
it when default romfile is larger, it fails with something like:
romfile "efi-virtio.rom" (160768 bytes) is too large for ROM size 65536
Let's just ignore ROM file when romsize is specified and we are in
incoming migration state. In other words, we need only to preallocate
ROM of specified size, local ROM file is unrelated.
This way:
If romsize was specified on source, we just use same commandline as on
source, and migration will work independently of local ROM files on
target.
If romsize was not specified on source (and we have mismatching local
ROM file on target host), we have to specify romsize on target to match
source romsize. romfile parameter may be kept same as on source or may
be dropped, the file is not loaded anyway.
As a bonus we avoid extra reading from ROM file on target.
Note: when we don't have romsize parameter on source command line and
need it for target, it may be calculated as aligned up to power of two
size of ROM file on source (if we know, which file is it) or,
alternatively it may be retrieved from source QEMU by QMP qom-get
command, like
{ "execute": "qom-get",
"arguments": {
"path": "/machine/peripheral/CARD_ID/virtio-net-pci.rom[0]",
"property": "size" } }
Note: we have extra initialization of size variable to zero in
pci_add_option_rom to avoid false-positive
"error: ‘size’ may be used uninitialized"
Suggested-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20230522201740.88960-2-vsementsov@yandex-team.ru>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2023-05-22 23:17:40 +03:00
|
|
|
size = get_image_size(path);
|
|
|
|
if (size < 0) {
|
|
|
|
error_setg(errp, "failed to find romfile \"%s\"", pdev->romfile);
|
|
|
|
return;
|
|
|
|
} else if (size == 0) {
|
|
|
|
error_setg(errp, "romfile \"%s\" is empty", pdev->romfile);
|
|
|
|
return;
|
|
|
|
} else if (size > 2 * GiB) {
|
|
|
|
error_setg(errp,
|
|
|
|
"romfile \"%s\" too large (size cannot exceed 2 GiB)",
|
|
|
|
pdev->romfile);
|
2021-02-03 16:18:28 +03:00
|
|
|
return;
|
|
|
|
}
|
pci: ROM preallocation for incoming migration
On incoming migration we have the following sequence to load option
ROM:
1. On device realize we do normal load ROM from the file
2. Than, on incoming migration we rewrite ROM from the incoming RAM
block. If sizes mismatch we fail, like this:
Size mismatch: 0000:00:03.0/virtio-net-pci.rom: 0x40000 != 0x80000: Invalid argument
This is not ideal when we migrate to updated distribution: we have to
keep old ROM files in new distribution and be careful around romfile
property to load correct ROM file. Which is loaded actually just to
allocate the ROM with correct length.
Note, that romsize property doesn't really help: if we try to specify
it when default romfile is larger, it fails with something like:
romfile "efi-virtio.rom" (160768 bytes) is too large for ROM size 65536
Let's just ignore ROM file when romsize is specified and we are in
incoming migration state. In other words, we need only to preallocate
ROM of specified size, local ROM file is unrelated.
This way:
If romsize was specified on source, we just use same commandline as on
source, and migration will work independently of local ROM files on
target.
If romsize was not specified on source (and we have mismatching local
ROM file on target host), we have to specify romsize on target to match
source romsize. romfile parameter may be kept same as on source or may
be dropped, the file is not loaded anyway.
As a bonus we avoid extra reading from ROM file on target.
Note: when we don't have romsize parameter on source command line and
need it for target, it may be calculated as aligned up to power of two
size of ROM file on source (if we know, which file is it) or,
alternatively it may be retrieved from source QEMU by QMP qom-get
command, like
{ "execute": "qom-get",
"arguments": {
"path": "/machine/peripheral/CARD_ID/virtio-net-pci.rom[0]",
"property": "size" } }
Note: we have extra initialization of size variable to zero in
pci_add_option_rom to avoid false-positive
"error: ‘size’ may be used uninitialized"
Suggested-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20230522201740.88960-2-vsementsov@yandex-team.ru>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2023-05-22 23:17:40 +03:00
|
|
|
if (pdev->romsize != -1) {
|
|
|
|
if (size > pdev->romsize) {
|
|
|
|
error_setg(errp, "romfile \"%s\" (%u bytes) "
|
|
|
|
"is too large for ROM size %u",
|
|
|
|
pdev->romfile, (uint32_t)size, pdev->romsize);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
pdev->romsize = pow2ceil(size);
|
|
|
|
}
|
2021-02-03 16:18:28 +03:00
|
|
|
}
|
2009-12-18 14:01:07 +03:00
|
|
|
|
2011-12-09 20:51:49 +04:00
|
|
|
vmsd = qdev_get_vmsd(DEVICE(pdev));
|
2023-05-15 15:52:27 +03:00
|
|
|
snprintf(name, sizeof(name), "%s.rom",
|
|
|
|
vmsd ? vmsd->name : object_get_typename(OBJECT(pdev)));
|
2011-12-09 20:51:49 +04:00
|
|
|
|
2011-08-08 17:09:28 +04:00
|
|
|
pdev->has_rom = true;
|
2023-05-15 15:52:27 +03:00
|
|
|
memory_region_init_rom(&pdev->rom, OBJECT(pdev), name, pdev->romsize,
|
|
|
|
&error_fatal);
|
|
|
|
|
pci: ROM preallocation for incoming migration
On incoming migration we have the following sequence to load option
ROM:
1. On device realize we do normal load ROM from the file
2. Than, on incoming migration we rewrite ROM from the incoming RAM
block. If sizes mismatch we fail, like this:
Size mismatch: 0000:00:03.0/virtio-net-pci.rom: 0x40000 != 0x80000: Invalid argument
This is not ideal when we migrate to updated distribution: we have to
keep old ROM files in new distribution and be careful around romfile
property to load correct ROM file. Which is loaded actually just to
allocate the ROM with correct length.
Note, that romsize property doesn't really help: if we try to specify
it when default romfile is larger, it fails with something like:
romfile "efi-virtio.rom" (160768 bytes) is too large for ROM size 65536
Let's just ignore ROM file when romsize is specified and we are in
incoming migration state. In other words, we need only to preallocate
ROM of specified size, local ROM file is unrelated.
This way:
If romsize was specified on source, we just use same commandline as on
source, and migration will work independently of local ROM files on
target.
If romsize was not specified on source (and we have mismatching local
ROM file on target host), we have to specify romsize on target to match
source romsize. romfile parameter may be kept same as on source or may
be dropped, the file is not loaded anyway.
As a bonus we avoid extra reading from ROM file on target.
Note: when we don't have romsize parameter on source command line and
need it for target, it may be calculated as aligned up to power of two
size of ROM file on source (if we know, which file is it) or,
alternatively it may be retrieved from source QEMU by QMP qom-get
command, like
{ "execute": "qom-get",
"arguments": {
"path": "/machine/peripheral/CARD_ID/virtio-net-pci.rom[0]",
"property": "size" } }
Note: we have extra initialization of size variable to zero in
pci_add_option_rom to avoid false-positive
"error: ‘size’ may be used uninitialized"
Suggested-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20230522201740.88960-2-vsementsov@yandex-team.ru>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2023-05-22 23:17:40 +03:00
|
|
|
if (load_file) {
|
|
|
|
void *ptr = memory_region_get_ram_ptr(&pdev->rom);
|
2009-12-18 14:01:07 +03:00
|
|
|
|
pci: ROM preallocation for incoming migration
On incoming migration we have the following sequence to load option
ROM:
1. On device realize we do normal load ROM from the file
2. Than, on incoming migration we rewrite ROM from the incoming RAM
block. If sizes mismatch we fail, like this:
Size mismatch: 0000:00:03.0/virtio-net-pci.rom: 0x40000 != 0x80000: Invalid argument
This is not ideal when we migrate to updated distribution: we have to
keep old ROM files in new distribution and be careful around romfile
property to load correct ROM file. Which is loaded actually just to
allocate the ROM with correct length.
Note, that romsize property doesn't really help: if we try to specify
it when default romfile is larger, it fails with something like:
romfile "efi-virtio.rom" (160768 bytes) is too large for ROM size 65536
Let's just ignore ROM file when romsize is specified and we are in
incoming migration state. In other words, we need only to preallocate
ROM of specified size, local ROM file is unrelated.
This way:
If romsize was specified on source, we just use same commandline as on
source, and migration will work independently of local ROM files on
target.
If romsize was not specified on source (and we have mismatching local
ROM file on target host), we have to specify romsize on target to match
source romsize. romfile parameter may be kept same as on source or may
be dropped, the file is not loaded anyway.
As a bonus we avoid extra reading from ROM file on target.
Note: when we don't have romsize parameter on source command line and
need it for target, it may be calculated as aligned up to power of two
size of ROM file on source (if we know, which file is it) or,
alternatively it may be retrieved from source QEMU by QMP qom-get
command, like
{ "execute": "qom-get",
"arguments": {
"path": "/machine/peripheral/CARD_ID/virtio-net-pci.rom[0]",
"property": "size" } }
Note: we have extra initialization of size variable to zero in
pci_add_option_rom to avoid false-positive
"error: ‘size’ may be used uninitialized"
Suggested-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20230522201740.88960-2-vsementsov@yandex-team.ru>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2023-05-22 23:17:40 +03:00
|
|
|
if (load_image_size(path, ptr, size) < 0) {
|
|
|
|
error_setg(errp, "failed to load romfile \"%s\"", pdev->romfile);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (is_default_rom) {
|
|
|
|
/* Only the default rom images will be patched (if needed). */
|
|
|
|
pci_patch_ids(pdev, ptr, size);
|
|
|
|
}
|
2010-10-20 01:08:21 +04:00
|
|
|
}
|
|
|
|
|
2011-08-08 17:09:31 +04:00
|
|
|
pci_register_bar(pdev, PCI_ROM_SLOT, 0, &pdev->rom);
|
2009-12-18 14:01:07 +03:00
|
|
|
}
|
|
|
|
|
2010-06-25 21:10:19 +04:00
|
|
|
static void pci_del_option_rom(PCIDevice *pdev)
|
|
|
|
{
|
2011-08-08 17:09:28 +04:00
|
|
|
if (!pdev->has_rom)
|
2010-06-25 21:10:19 +04:00
|
|
|
return;
|
|
|
|
|
2011-12-20 17:59:12 +04:00
|
|
|
vmstate_unregister_ram(&pdev->rom, &pdev->qdev);
|
2011-08-08 17:09:28 +04:00
|
|
|
pdev->has_rom = false;
|
2010-06-25 21:10:19 +04:00
|
|
|
}
|
|
|
|
|
2010-09-06 11:46:16 +04:00
|
|
|
/*
|
2017-06-27 09:16:51 +03:00
|
|
|
* On success, pci_add_capability() returns a positive value
|
2017-06-27 09:16:48 +03:00
|
|
|
* that the offset of the pci capability.
|
|
|
|
* On failure, it sets an error and returns a negative error
|
|
|
|
* code.
|
|
|
|
*/
|
2017-06-27 09:16:51 +03:00
|
|
|
int pci_add_capability(PCIDevice *pdev, uint8_t cap_id,
|
2014-04-10 12:24:36 +04:00
|
|
|
uint8_t offset, uint8_t size,
|
|
|
|
Error **errp)
|
2009-06-21 20:45:40 +04:00
|
|
|
{
|
2010-09-06 11:46:16 +04:00
|
|
|
uint8_t *config;
|
2011-08-24 16:29:30 +04:00
|
|
|
int i, overlapping_cap;
|
|
|
|
|
2010-09-06 11:46:16 +04:00
|
|
|
if (!offset) {
|
|
|
|
offset = pci_find_space(pdev, size);
|
2016-06-10 12:54:22 +03:00
|
|
|
/* out of PCI config space is programming error */
|
|
|
|
assert(offset);
|
2011-08-24 16:29:30 +04:00
|
|
|
} else {
|
|
|
|
/* Verify that capabilities don't overlap. Note: device assignment
|
|
|
|
* depends on this check to verify that the device is not broken.
|
|
|
|
* Should never trigger for emulated devices, but it's helpful
|
|
|
|
* for debugging these. */
|
|
|
|
for (i = offset; i < offset + size; i++) {
|
|
|
|
overlapping_cap = pci_find_capability_at_offset(pdev, i);
|
|
|
|
if (overlapping_cap) {
|
2014-04-10 12:24:36 +04:00
|
|
|
error_setg(errp, "%s:%02x:%02x.%x "
|
|
|
|
"Attempt to add PCI capability %x at offset "
|
|
|
|
"%x overlaps existing capability %x at offset %x",
|
2017-11-29 11:46:27 +03:00
|
|
|
pci_root_bus_path(pdev), pci_dev_bus_num(pdev),
|
2014-04-10 12:24:36 +04:00
|
|
|
PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
|
|
|
|
cap_id, offset, overlapping_cap, i);
|
2011-08-24 16:29:30 +04:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
2010-09-06 11:46:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
config = pdev->config + offset;
|
2009-06-21 20:45:40 +04:00
|
|
|
config[PCI_CAP_LIST_ID] = cap_id;
|
|
|
|
config[PCI_CAP_LIST_NEXT] = pdev->config[PCI_CAPABILITY_LIST];
|
|
|
|
pdev->config[PCI_CAPABILITY_LIST] = offset;
|
|
|
|
pdev->config[PCI_STATUS] |= PCI_STATUS_CAP_LIST;
|
2012-10-22 14:35:00 +04:00
|
|
|
memset(pdev->used + offset, 0xFF, QEMU_ALIGN_UP(size, 4));
|
2009-06-21 20:45:40 +04:00
|
|
|
/* Make capability read-only by default */
|
|
|
|
memset(pdev->wmask + offset, 0, size);
|
2009-06-21 20:49:40 +04:00
|
|
|
/* Check capability by default */
|
|
|
|
memset(pdev->cmask + offset, 0xFF, size);
|
2009-06-21 20:45:40 +04:00
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Unlink capability from the pci config space. */
|
|
|
|
void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
|
|
|
|
{
|
|
|
|
uint8_t prev, offset = pci_find_capability_list(pdev, cap_id, &prev);
|
|
|
|
if (!offset)
|
|
|
|
return;
|
|
|
|
pdev->config[prev] = pdev->config[offset + PCI_CAP_LIST_NEXT];
|
2011-04-26 12:29:36 +04:00
|
|
|
/* Make capability writable again */
|
2009-06-21 20:45:40 +04:00
|
|
|
memset(pdev->wmask + offset, 0xff, size);
|
2010-10-18 07:17:42 +04:00
|
|
|
memset(pdev->w1cmask + offset, 0, size);
|
2009-06-21 20:49:40 +04:00
|
|
|
/* Clear cmask as device-specific registers can't be checked */
|
|
|
|
memset(pdev->cmask + offset, 0, size);
|
2012-10-22 14:35:00 +04:00
|
|
|
memset(pdev->used + offset, 0, QEMU_ALIGN_UP(size, 4));
|
2009-06-21 20:45:40 +04:00
|
|
|
|
|
|
|
if (!pdev->config[PCI_CAPABILITY_LIST])
|
|
|
|
pdev->config[PCI_STATUS] &= ~PCI_STATUS_CAP_LIST;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint8_t pci_find_capability(PCIDevice *pdev, uint8_t cap_id)
|
|
|
|
{
|
|
|
|
return pci_find_capability_list(pdev, cap_id, NULL);
|
|
|
|
}
|
2009-06-30 16:12:08 +04:00
|
|
|
|
2010-12-08 14:35:01 +03:00
|
|
|
static char *pci_dev_fw_name(DeviceState *dev, char *buf, int len)
|
|
|
|
{
|
|
|
|
PCIDevice *d = (PCIDevice *)dev;
|
|
|
|
const char *name = NULL;
|
|
|
|
const pci_class_desc *desc = pci_class_descriptions;
|
|
|
|
int class = pci_get_word(d->config + PCI_CLASS_DEVICE);
|
|
|
|
|
|
|
|
while (desc->desc &&
|
|
|
|
(class & ~desc->fw_ign_bits) !=
|
|
|
|
(desc->class & ~desc->fw_ign_bits)) {
|
|
|
|
desc++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (desc->desc) {
|
|
|
|
name = desc->fw_name;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (name) {
|
|
|
|
pstrcpy(buf, len, name);
|
|
|
|
} else {
|
|
|
|
snprintf(buf, len, "pci%04x,%04x",
|
|
|
|
pci_get_word(d->config + PCI_VENDOR_ID),
|
|
|
|
pci_get_word(d->config + PCI_DEVICE_ID));
|
|
|
|
}
|
|
|
|
|
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *pcibus_get_fw_dev_path(DeviceState *dev)
|
|
|
|
{
|
|
|
|
PCIDevice *d = (PCIDevice *)dev;
|
pci: fix overflow in snprintf string formatting
the code in pcibus_get_fw_dev_path contained the potential for a
stack buffer overflow of 1 byte, potentially writing to the stack an
extra NUL byte.
This overflow could happen if the PCI slot is >= 0x10000000,
and the PCI function is >= 0x10000000, due to the size parameter
of snprintf being incorrectly calculated in the call:
if (PCI_FUNC(d->devfn))
snprintf(path + off, sizeof(path) + off, ",%x", PCI_FUNC(d->devfn));
since the off obtained from a previous call to snprintf is added
instead of subtracted from the total available size of the buffer.
Without the accurate size guard from snprintf, we end up writing in the
worst case:
name (32) + "@" (1) + SLOT (8) + "," (1) + FUNC (8) + term NUL (1) = 51 bytes
In order to provide something more robust, replace all of the code in
pcibus_get_fw_dev_path with a single call to g_strdup_printf,
so there is no need to rely on manual calculations.
Found by compiling QEMU with FORTIFY_SOURCE=3 as the error:
*** buffer overflow detected ***: terminated
Thread 1 "qemu-system-x86" received signal SIGABRT, Aborted.
[Switching to Thread 0x7ffff642c380 (LWP 121307)]
0x00007ffff71ff55c in __pthread_kill_implementation () from /lib64/libc.so.6
(gdb) bt
#0 0x00007ffff71ff55c in __pthread_kill_implementation () at /lib64/libc.so.6
#1 0x00007ffff71ac6f6 in raise () at /lib64/libc.so.6
#2 0x00007ffff7195814 in abort () at /lib64/libc.so.6
#3 0x00007ffff71f279e in __libc_message () at /lib64/libc.so.6
#4 0x00007ffff729767a in __fortify_fail () at /lib64/libc.so.6
#5 0x00007ffff7295c36 in () at /lib64/libc.so.6
#6 0x00007ffff72957f5 in __snprintf_chk () at /lib64/libc.so.6
#7 0x0000555555b1c1fd in pcibus_get_fw_dev_path ()
#8 0x0000555555f2bde4 in qdev_get_fw_dev_path_helper.constprop ()
#9 0x0000555555f2bd86 in qdev_get_fw_dev_path_helper.constprop ()
#10 0x00005555559a6e5d in get_boot_device_path ()
#11 0x00005555559a712c in get_boot_devices_list ()
#12 0x0000555555b1a3d0 in fw_cfg_machine_reset ()
#13 0x0000555555bf4c2d in pc_machine_reset ()
#14 0x0000555555c66988 in qemu_system_reset ()
#15 0x0000555555a6dff6 in qdev_machine_creation_done ()
#16 0x0000555555c79186 in qmp_x_exit_preconfig.part ()
#17 0x0000555555c7b459 in qemu_init ()
#18 0x0000555555960a29 in main ()
Found-by: Dario Faggioli <Dario Faggioli <dfaggioli@suse.com>
Found-by: Martin Liška <martin.liska@suse.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Claudio Fontana <cfontana@suse.de>
Message-Id: <20220531114707.18830-1-cfontana@suse.de>
Reviewed-by: Ani Sinha <ani@anisinha.ca>
2022-05-31 14:47:07 +03:00
|
|
|
char name[33];
|
|
|
|
int has_func = !!PCI_FUNC(d->devfn);
|
|
|
|
|
|
|
|
return g_strdup_printf("%s@%x%s%.*x",
|
|
|
|
pci_dev_fw_name(dev, name, sizeof(name)),
|
|
|
|
PCI_SLOT(d->devfn),
|
|
|
|
has_func ? "," : "",
|
|
|
|
has_func,
|
|
|
|
PCI_FUNC(d->devfn));
|
2010-12-08 14:35:01 +03:00
|
|
|
}
|
|
|
|
|
2010-06-25 21:08:59 +04:00
|
|
|
static char *pcibus_get_dev_path(DeviceState *dev)
|
|
|
|
{
|
2010-12-27 12:21:38 +03:00
|
|
|
PCIDevice *d = container_of(dev, PCIDevice, qdev);
|
|
|
|
PCIDevice *t;
|
|
|
|
int slot_depth;
|
|
|
|
/* Path format: Domain:00:Slot.Function:Slot.Function....:Slot.Function.
|
|
|
|
* 00 is added here to make this format compatible with
|
|
|
|
* domain:Bus:Slot.Func for systems without nested PCI bridges.
|
|
|
|
* Slot.Function list specifies the slot and function numbers for all
|
|
|
|
* devices on the path from root to the specific device. */
|
2013-06-06 12:48:49 +04:00
|
|
|
const char *root_bus_path;
|
|
|
|
int root_bus_len;
|
2011-01-19 22:18:19 +03:00
|
|
|
char slot[] = ":SS.F";
|
|
|
|
int slot_len = sizeof slot - 1 /* For '\0' */;
|
2010-12-27 12:21:38 +03:00
|
|
|
int path_len;
|
|
|
|
char *path, *p;
|
2011-01-19 22:18:19 +03:00
|
|
|
int s;
|
2010-12-27 12:21:38 +03:00
|
|
|
|
2013-06-06 12:48:49 +04:00
|
|
|
root_bus_path = pci_root_bus_path(d);
|
|
|
|
root_bus_len = strlen(root_bus_path);
|
|
|
|
|
2010-12-27 12:21:38 +03:00
|
|
|
/* Calculate # of slots on path between device and root. */;
|
|
|
|
slot_depth = 0;
|
2017-11-29 11:46:27 +03:00
|
|
|
for (t = d; t; t = pci_get_bus(t)->parent_dev) {
|
2010-12-27 12:21:38 +03:00
|
|
|
++slot_depth;
|
|
|
|
}
|
|
|
|
|
2013-06-06 12:48:49 +04:00
|
|
|
path_len = root_bus_len + slot_len * slot_depth;
|
2010-12-27 12:21:38 +03:00
|
|
|
|
|
|
|
/* Allocate memory, fill in the terminating null byte. */
|
2011-08-21 07:09:37 +04:00
|
|
|
path = g_malloc(path_len + 1 /* For '\0' */);
|
2010-12-27 12:21:38 +03:00
|
|
|
path[path_len] = '\0';
|
|
|
|
|
2013-06-06 12:48:49 +04:00
|
|
|
memcpy(path, root_bus_path, root_bus_len);
|
2010-12-27 12:21:38 +03:00
|
|
|
|
|
|
|
/* Fill in slot numbers. We walk up from device to root, so need to print
|
|
|
|
* them in the reverse order, last to first. */
|
|
|
|
p = path + path_len;
|
2017-11-29 11:46:27 +03:00
|
|
|
for (t = d; t; t = pci_get_bus(t)->parent_dev) {
|
2010-12-27 12:21:38 +03:00
|
|
|
p -= slot_len;
|
2011-01-19 22:18:19 +03:00
|
|
|
s = snprintf(slot, sizeof slot, ":%02x.%x",
|
2011-01-27 06:49:04 +03:00
|
|
|
PCI_SLOT(t->devfn), PCI_FUNC(t->devfn));
|
2011-01-19 22:18:19 +03:00
|
|
|
assert(s == slot_len);
|
|
|
|
memcpy(p, slot, slot_len);
|
2010-12-27 12:21:38 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return path;
|
2010-06-25 21:08:59 +04:00
|
|
|
}
|
|
|
|
|
2010-12-24 06:14:13 +03:00
|
|
|
static int pci_qdev_find_recursive(PCIBus *bus,
|
|
|
|
const char *id, PCIDevice **pdev)
|
|
|
|
{
|
|
|
|
DeviceState *qdev = qdev_find_recursive(&bus->qbus, id);
|
|
|
|
if (!qdev) {
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* roughly check if given qdev is pci device */
|
2011-12-09 20:51:49 +04:00
|
|
|
if (object_dynamic_cast(OBJECT(qdev), TYPE_PCI_DEVICE)) {
|
2011-12-04 22:22:06 +04:00
|
|
|
*pdev = PCI_DEVICE(qdev);
|
2010-12-24 06:14:13 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
int pci_qdev_find_device(const char *id, PCIDevice **pdev)
|
|
|
|
{
|
2013-06-06 12:48:54 +04:00
|
|
|
PCIHostState *host_bridge;
|
2010-12-24 06:14:13 +03:00
|
|
|
int rc = -ENODEV;
|
|
|
|
|
2013-06-06 12:48:54 +04:00
|
|
|
QLIST_FOREACH(host_bridge, &pci_host_bridges, next) {
|
|
|
|
int tmp = pci_qdev_find_recursive(host_bridge->bus, id, pdev);
|
2010-12-24 06:14:13 +03:00
|
|
|
if (!tmp) {
|
|
|
|
rc = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (tmp != -ENODEV) {
|
|
|
|
rc = tmp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
2011-08-15 18:17:36 +04:00
|
|
|
|
|
|
|
MemoryRegion *pci_address_space(PCIDevice *dev)
|
|
|
|
{
|
2017-11-29 11:46:27 +03:00
|
|
|
return pci_get_bus(dev)->address_space_mem;
|
2011-08-15 18:17:36 +04:00
|
|
|
}
|
2011-08-11 02:28:10 +04:00
|
|
|
|
|
|
|
MemoryRegion *pci_address_space_io(PCIDevice *dev)
|
|
|
|
{
|
2017-11-29 11:46:27 +03:00
|
|
|
return pci_get_bus(dev)->address_space_io;
|
2011-08-11 02:28:10 +04:00
|
|
|
}
|
2011-12-04 22:22:06 +04:00
|
|
|
|
2011-12-08 07:34:16 +04:00
|
|
|
static void pci_device_class_init(ObjectClass *klass, void *data)
|
|
|
|
{
|
|
|
|
DeviceClass *k = DEVICE_CLASS(klass);
|
2015-01-19 17:52:29 +03:00
|
|
|
|
2015-01-19 17:52:28 +03:00
|
|
|
k->realize = pci_qdev_realize;
|
|
|
|
k->unrealize = pci_qdev_unrealize;
|
2012-05-02 11:00:20 +04:00
|
|
|
k->bus_type = TYPE_PCI_BUS;
|
2020-01-10 18:30:32 +03:00
|
|
|
device_class_set_props(k, pci_props);
|
2011-12-08 07:34:16 +04:00
|
|
|
}
|
|
|
|
|
2017-09-27 22:56:35 +03:00
|
|
|
static void pci_device_class_base_init(ObjectClass *klass, void *data)
|
|
|
|
{
|
|
|
|
if (!object_class_is_abstract(klass)) {
|
|
|
|
ObjectClass *conventional =
|
|
|
|
object_class_dynamic_cast(klass, INTERFACE_CONVENTIONAL_PCI_DEVICE);
|
|
|
|
ObjectClass *pcie =
|
|
|
|
object_class_dynamic_cast(klass, INTERFACE_PCIE_DEVICE);
|
2022-04-29 17:40:41 +03:00
|
|
|
ObjectClass *cxl =
|
|
|
|
object_class_dynamic_cast(klass, INTERFACE_CXL_DEVICE);
|
|
|
|
assert(conventional || pcie || cxl);
|
2017-09-27 22:56:35 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-08-09 19:09:08 +04:00
|
|
|
AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
|
|
|
|
{
|
2017-11-29 11:46:27 +03:00
|
|
|
PCIBus *bus = pci_get_bus(dev);
|
2015-07-05 02:19:15 +03:00
|
|
|
PCIBus *iommu_bus = bus;
|
pci: Use PCI aliases when determining device IOMMU address space
PCIe requester IDs are used by modern IOMMUs to differentiate devices
in order to provide a unique IOVA address space per device. These
requester IDs are composed of the bus/device/function (BDF) of the
requesting device. Conventional PCI pre-dates this concept and is
simply a shared parallel bus where transactions are claimed by
decoding target ranges rather than the packetized, point-to-point
mechanisms of PCI-express. In order to interface conventional PCI
to PCIe, the PCIe-to-PCI bridge creates and accepts packetized
transactions on behalf of all downstream devices, using one of two
potential forms of a requester ID relating to the bridge itself or its
subordinate bus. All downstream devices are therefore aliased by the
bridge's requester ID and it's not possible for the IOMMU to create
unique IOVA spaces for devices downstream of such buses.
At least that's how it works on bare metal. Until now point we've
ignored this nuance of vIOMMU support in QEMU, creating a unique
AddressSpace per device regardless of the virtual bus topology.
Aside from simply being true to bare metal behavior, there are aspects
of a shared address space that we can use to our advantage when
designing a VM. For instance, a PCI device assignment scenario where
we have the following IOMMU group on the host system:
$ ls /sys/kernel/iommu_groups/1/devices/
0000:00:01.0 0000:01:00.0 0000:01:00.1
An IOMMU group is considered the smallest set of devices which are
fully DMA isolated from other devices by the IOMMU. In this case the
root port at 00:01.0 does not guarantee that it prevents peer to peer
traffic between the endpoints on bus 01: and the devices are therefore
grouped together. VFIO considers an IOMMU group to be the smallest
unit of device ownership and allows only a single shared IOVA space
per group due to the limitations of the isolation.
Therefore, if we attempt to create the following VM, we get an error:
qemu-system-x86_64 -machine q35... \
-device intel-iommu,intremap=on \
-device pcie-root-port,addr=1e.0,id=pcie.1 \
-device vfio-pci,host=1:00.0,bus=pcie.1,addr=0.0,multifunction=on \
-device vfio-pci,host=1:00.1,bus=pcie.1,addr=0.1
qemu-system-x86_64: -device vfio-pci,host=1:00.1,bus=pcie.1,addr=0.1: vfio \
0000:01:00.1: group 1 used in multiple address spaces
VFIO only allows a single IOVA space (AddressSpace) for both devices,
but we've placed them into a topology where the vIOMMU expects a
separate AddressSpace for each device. On bare metal we know that
a conventional PCI bus would provide the sort of aliasing we need
here, forcing the IOMMU to consider these devices to be part of a
single shared IOVA space. The support provided here does the same
for QEMU, such that we can create a conventional PCI topology to
expose equivalent AddressSpace sharing requirements to the VM:
qemu-system-x86_64 -machine q35... \
-device intel-iommu,intremap=on \
-device pcie-pci-bridge,addr=1e.0,id=pci.1 \
-device vfio-pci,host=1:00.0,bus=pci.1,addr=1.0,multifunction=on \
-device vfio-pci,host=1:00.1,bus=pci.1,addr=1.1
There are pros and cons to this configuration; it's not necessarily
recommended, it's simply a tool we can use to create configurations
which may provide additional functionality in spite of host hardware
limitations or as a benefit to the guest configuration or resource
usage. An incomplete list of pros and cons:
Cons:
a) Extended PCI configuration space is unavailable to devices
downstream of a conventional PCI bus. The degree to which this
is a drawback depends on the device and guest drivers.
b) Applying this topology to devices which are already isolated by
the host IOMMU (singleton IOMMU groups) will result in devices
which appear to be non-isolated to the VM (non-singleton groups).
This can limit configurations within the guest, such as userspace
drivers or nested device assignment.
Pros:
a) QEMU better emulates bare metal.
b) Configurations as above are now possible.
c) Host IOMMU resources and VM locked memory requirements are reduced
in vIOMMU configurations due to shared IOMMU domains on the host
and avoidance of duplicate locked memory accounting.
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Message-Id: <157187083548.5439.14747141504058604843.stgit@gimli.home>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-10-24 01:47:15 +03:00
|
|
|
uint8_t devfn = dev->devfn;
|
2013-08-09 19:09:08 +04:00
|
|
|
|
pci: Use PCI aliases when determining device IOMMU address space
PCIe requester IDs are used by modern IOMMUs to differentiate devices
in order to provide a unique IOVA address space per device. These
requester IDs are composed of the bus/device/function (BDF) of the
requesting device. Conventional PCI pre-dates this concept and is
simply a shared parallel bus where transactions are claimed by
decoding target ranges rather than the packetized, point-to-point
mechanisms of PCI-express. In order to interface conventional PCI
to PCIe, the PCIe-to-PCI bridge creates and accepts packetized
transactions on behalf of all downstream devices, using one of two
potential forms of a requester ID relating to the bridge itself or its
subordinate bus. All downstream devices are therefore aliased by the
bridge's requester ID and it's not possible for the IOMMU to create
unique IOVA spaces for devices downstream of such buses.
At least that's how it works on bare metal. Until now point we've
ignored this nuance of vIOMMU support in QEMU, creating a unique
AddressSpace per device regardless of the virtual bus topology.
Aside from simply being true to bare metal behavior, there are aspects
of a shared address space that we can use to our advantage when
designing a VM. For instance, a PCI device assignment scenario where
we have the following IOMMU group on the host system:
$ ls /sys/kernel/iommu_groups/1/devices/
0000:00:01.0 0000:01:00.0 0000:01:00.1
An IOMMU group is considered the smallest set of devices which are
fully DMA isolated from other devices by the IOMMU. In this case the
root port at 00:01.0 does not guarantee that it prevents peer to peer
traffic between the endpoints on bus 01: and the devices are therefore
grouped together. VFIO considers an IOMMU group to be the smallest
unit of device ownership and allows only a single shared IOVA space
per group due to the limitations of the isolation.
Therefore, if we attempt to create the following VM, we get an error:
qemu-system-x86_64 -machine q35... \
-device intel-iommu,intremap=on \
-device pcie-root-port,addr=1e.0,id=pcie.1 \
-device vfio-pci,host=1:00.0,bus=pcie.1,addr=0.0,multifunction=on \
-device vfio-pci,host=1:00.1,bus=pcie.1,addr=0.1
qemu-system-x86_64: -device vfio-pci,host=1:00.1,bus=pcie.1,addr=0.1: vfio \
0000:01:00.1: group 1 used in multiple address spaces
VFIO only allows a single IOVA space (AddressSpace) for both devices,
but we've placed them into a topology where the vIOMMU expects a
separate AddressSpace for each device. On bare metal we know that
a conventional PCI bus would provide the sort of aliasing we need
here, forcing the IOMMU to consider these devices to be part of a
single shared IOVA space. The support provided here does the same
for QEMU, such that we can create a conventional PCI topology to
expose equivalent AddressSpace sharing requirements to the VM:
qemu-system-x86_64 -machine q35... \
-device intel-iommu,intremap=on \
-device pcie-pci-bridge,addr=1e.0,id=pci.1 \
-device vfio-pci,host=1:00.0,bus=pci.1,addr=1.0,multifunction=on \
-device vfio-pci,host=1:00.1,bus=pci.1,addr=1.1
There are pros and cons to this configuration; it's not necessarily
recommended, it's simply a tool we can use to create configurations
which may provide additional functionality in spite of host hardware
limitations or as a benefit to the guest configuration or resource
usage. An incomplete list of pros and cons:
Cons:
a) Extended PCI configuration space is unavailable to devices
downstream of a conventional PCI bus. The degree to which this
is a drawback depends on the device and guest drivers.
b) Applying this topology to devices which are already isolated by
the host IOMMU (singleton IOMMU groups) will result in devices
which appear to be non-isolated to the VM (non-singleton groups).
This can limit configurations within the guest, such as userspace
drivers or nested device assignment.
Pros:
a) QEMU better emulates bare metal.
b) Configurations as above are now possible.
c) Host IOMMU resources and VM locked memory requirements are reduced
in vIOMMU configurations due to shared IOMMU domains on the host
and avoidance of duplicate locked memory accounting.
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Message-Id: <157187083548.5439.14747141504058604843.stgit@gimli.home>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-10-24 01:47:15 +03:00
|
|
|
while (iommu_bus && !iommu_bus->iommu_fn && iommu_bus->parent_dev) {
|
|
|
|
PCIBus *parent_bus = pci_get_bus(iommu_bus->parent_dev);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The requester ID of the provided device may be aliased, as seen from
|
|
|
|
* the IOMMU, due to topology limitations. The IOMMU relies on a
|
|
|
|
* requester ID to provide a unique AddressSpace for devices, but
|
|
|
|
* conventional PCI buses pre-date such concepts. Instead, the PCIe-
|
|
|
|
* to-PCI bridge creates and accepts transactions on behalf of down-
|
|
|
|
* stream devices. When doing so, all downstream devices are masked
|
|
|
|
* (aliased) behind a single requester ID. The requester ID used
|
|
|
|
* depends on the format of the bridge devices. Proper PCIe-to-PCI
|
|
|
|
* bridges, with a PCIe capability indicating such, follow the
|
|
|
|
* guidelines of chapter 2.3 of the PCIe-to-PCI/X bridge specification,
|
|
|
|
* where the bridge uses the seconary bus as the bridge portion of the
|
|
|
|
* requester ID and devfn of 00.0. For other bridges, typically those
|
|
|
|
* found on the root complex such as the dmi-to-pci-bridge, we follow
|
|
|
|
* the convention of typical bare-metal hardware, which uses the
|
|
|
|
* requester ID of the bridge itself. There are device specific
|
|
|
|
* exceptions to these rules, but these are the defaults that the
|
|
|
|
* Linux kernel uses when determining DMA aliases itself and believed
|
|
|
|
* to be true for the bare metal equivalents of the devices emulated
|
|
|
|
* in QEMU.
|
|
|
|
*/
|
|
|
|
if (!pci_bus_is_express(iommu_bus)) {
|
|
|
|
PCIDevice *parent = iommu_bus->parent_dev;
|
|
|
|
|
|
|
|
if (pci_is_express(parent) &&
|
|
|
|
pcie_cap_get_type(parent) == PCI_EXP_TYPE_PCI_BRIDGE) {
|
|
|
|
devfn = PCI_DEVFN(0, 0);
|
|
|
|
bus = iommu_bus;
|
|
|
|
} else {
|
|
|
|
devfn = parent->devfn;
|
|
|
|
bus = parent_bus;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
iommu_bus = parent_bus;
|
2013-08-09 19:09:08 +04:00
|
|
|
}
|
2021-07-08 15:55:11 +03:00
|
|
|
if (!pci_bus_bypass_iommu(bus) && iommu_bus && iommu_bus->iommu_fn) {
|
pci: Use PCI aliases when determining device IOMMU address space
PCIe requester IDs are used by modern IOMMUs to differentiate devices
in order to provide a unique IOVA address space per device. These
requester IDs are composed of the bus/device/function (BDF) of the
requesting device. Conventional PCI pre-dates this concept and is
simply a shared parallel bus where transactions are claimed by
decoding target ranges rather than the packetized, point-to-point
mechanisms of PCI-express. In order to interface conventional PCI
to PCIe, the PCIe-to-PCI bridge creates and accepts packetized
transactions on behalf of all downstream devices, using one of two
potential forms of a requester ID relating to the bridge itself or its
subordinate bus. All downstream devices are therefore aliased by the
bridge's requester ID and it's not possible for the IOMMU to create
unique IOVA spaces for devices downstream of such buses.
At least that's how it works on bare metal. Until now point we've
ignored this nuance of vIOMMU support in QEMU, creating a unique
AddressSpace per device regardless of the virtual bus topology.
Aside from simply being true to bare metal behavior, there are aspects
of a shared address space that we can use to our advantage when
designing a VM. For instance, a PCI device assignment scenario where
we have the following IOMMU group on the host system:
$ ls /sys/kernel/iommu_groups/1/devices/
0000:00:01.0 0000:01:00.0 0000:01:00.1
An IOMMU group is considered the smallest set of devices which are
fully DMA isolated from other devices by the IOMMU. In this case the
root port at 00:01.0 does not guarantee that it prevents peer to peer
traffic between the endpoints on bus 01: and the devices are therefore
grouped together. VFIO considers an IOMMU group to be the smallest
unit of device ownership and allows only a single shared IOVA space
per group due to the limitations of the isolation.
Therefore, if we attempt to create the following VM, we get an error:
qemu-system-x86_64 -machine q35... \
-device intel-iommu,intremap=on \
-device pcie-root-port,addr=1e.0,id=pcie.1 \
-device vfio-pci,host=1:00.0,bus=pcie.1,addr=0.0,multifunction=on \
-device vfio-pci,host=1:00.1,bus=pcie.1,addr=0.1
qemu-system-x86_64: -device vfio-pci,host=1:00.1,bus=pcie.1,addr=0.1: vfio \
0000:01:00.1: group 1 used in multiple address spaces
VFIO only allows a single IOVA space (AddressSpace) for both devices,
but we've placed them into a topology where the vIOMMU expects a
separate AddressSpace for each device. On bare metal we know that
a conventional PCI bus would provide the sort of aliasing we need
here, forcing the IOMMU to consider these devices to be part of a
single shared IOVA space. The support provided here does the same
for QEMU, such that we can create a conventional PCI topology to
expose equivalent AddressSpace sharing requirements to the VM:
qemu-system-x86_64 -machine q35... \
-device intel-iommu,intremap=on \
-device pcie-pci-bridge,addr=1e.0,id=pci.1 \
-device vfio-pci,host=1:00.0,bus=pci.1,addr=1.0,multifunction=on \
-device vfio-pci,host=1:00.1,bus=pci.1,addr=1.1
There are pros and cons to this configuration; it's not necessarily
recommended, it's simply a tool we can use to create configurations
which may provide additional functionality in spite of host hardware
limitations or as a benefit to the guest configuration or resource
usage. An incomplete list of pros and cons:
Cons:
a) Extended PCI configuration space is unavailable to devices
downstream of a conventional PCI bus. The degree to which this
is a drawback depends on the device and guest drivers.
b) Applying this topology to devices which are already isolated by
the host IOMMU (singleton IOMMU groups) will result in devices
which appear to be non-isolated to the VM (non-singleton groups).
This can limit configurations within the guest, such as userspace
drivers or nested device assignment.
Pros:
a) QEMU better emulates bare metal.
b) Configurations as above are now possible.
c) Host IOMMU resources and VM locked memory requirements are reduced
in vIOMMU configurations due to shared IOMMU domains on the host
and avoidance of duplicate locked memory accounting.
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Message-Id: <157187083548.5439.14747141504058604843.stgit@gimli.home>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-10-24 01:47:15 +03:00
|
|
|
return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, devfn);
|
2013-08-09 19:09:08 +04:00
|
|
|
}
|
|
|
|
return &address_space_memory;
|
|
|
|
}
|
|
|
|
|
2012-10-30 15:47:48 +04:00
|
|
|
void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque)
|
2012-06-27 08:50:45 +04:00
|
|
|
{
|
2012-10-30 15:47:48 +04:00
|
|
|
bus->iommu_fn = fn;
|
|
|
|
bus->iommu_opaque = opaque;
|
2012-06-27 08:50:45 +04:00
|
|
|
}
|
|
|
|
|
2013-09-02 12:37:02 +04:00
|
|
|
static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
|
|
|
|
{
|
|
|
|
Range *range = opaque;
|
|
|
|
uint16_t cmd = pci_get_word(dev->config + PCI_COMMAND);
|
2013-10-01 16:39:13 +04:00
|
|
|
int i;
|
2013-09-02 12:37:02 +04:00
|
|
|
|
|
|
|
if (!(cmd & PCI_COMMAND_MEMORY)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2022-11-29 13:13:41 +03:00
|
|
|
if (IS_PCI_BRIDGE(dev)) {
|
2013-09-02 12:37:02 +04:00
|
|
|
pcibus_t base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_MEM_PREFETCH);
|
|
|
|
pcibus_t limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_MEM_PREFETCH);
|
|
|
|
|
|
|
|
base = MAX(base, 0x1ULL << 32);
|
|
|
|
|
|
|
|
if (limit >= base) {
|
|
|
|
Range pref_range;
|
2016-07-01 14:47:47 +03:00
|
|
|
range_set_bounds(&pref_range, base, limit);
|
2013-09-02 12:37:02 +04:00
|
|
|
range_extend(range, &pref_range);
|
|
|
|
}
|
|
|
|
}
|
2013-10-01 16:39:13 +04:00
|
|
|
for (i = 0; i < PCI_NUM_REGIONS; ++i) {
|
|
|
|
PCIIORegion *r = &dev->io_regions[i];
|
2016-07-01 14:47:47 +03:00
|
|
|
pcibus_t lob, upb;
|
2013-09-02 12:37:02 +04:00
|
|
|
Range region_range;
|
|
|
|
|
2013-10-01 16:39:13 +04:00
|
|
|
if (!r->size ||
|
|
|
|
(r->type & PCI_BASE_ADDRESS_SPACE_IO) ||
|
|
|
|
!(r->type & PCI_BASE_ADDRESS_MEM_TYPE_64)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2016-07-01 14:47:47 +03:00
|
|
|
lob = pci_bar_address(dev, i, r->type, r->size);
|
|
|
|
upb = lob + r->size - 1;
|
|
|
|
if (lob == PCI_BAR_UNMAPPED) {
|
2013-09-02 12:37:02 +04:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2016-07-01 14:47:47 +03:00
|
|
|
lob = MAX(lob, 0x1ULL << 32);
|
2013-09-02 12:37:02 +04:00
|
|
|
|
2016-07-01 14:47:47 +03:00
|
|
|
if (upb >= lob) {
|
|
|
|
range_set_bounds(®ion_range, lob, upb);
|
2013-09-02 12:37:02 +04:00
|
|
|
range_extend(range, ®ion_range);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void pci_bus_get_w64_range(PCIBus *bus, Range *range)
|
|
|
|
{
|
2016-07-01 14:47:47 +03:00
|
|
|
range_make_empty(range);
|
2013-09-02 12:37:02 +04:00
|
|
|
pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
|
|
|
|
}
|
|
|
|
|
2015-10-28 09:20:31 +03:00
|
|
|
static bool pcie_has_upstream_port(PCIDevice *dev)
|
|
|
|
{
|
2017-11-29 11:46:27 +03:00
|
|
|
PCIDevice *parent_dev = pci_bridge_get_device(pci_get_bus(dev));
|
2015-10-28 09:20:31 +03:00
|
|
|
|
|
|
|
/* Device associated with an upstream port.
|
|
|
|
* As there are several types of these, it's easier to check the
|
|
|
|
* parent device: upstream ports are always connected to
|
|
|
|
* root or downstream ports.
|
|
|
|
*/
|
|
|
|
return parent_dev &&
|
|
|
|
pci_is_express(parent_dev) &&
|
|
|
|
parent_dev->exp.exp_cap &&
|
|
|
|
(pcie_cap_get_type(parent_dev) == PCI_EXP_TYPE_ROOT_PORT ||
|
|
|
|
pcie_cap_get_type(parent_dev) == PCI_EXP_TYPE_DOWNSTREAM);
|
|
|
|
}
|
|
|
|
|
|
|
|
PCIDevice *pci_get_function_0(PCIDevice *pci_dev)
|
|
|
|
{
|
2017-11-29 11:46:27 +03:00
|
|
|
PCIBus *bus = pci_get_bus(pci_dev);
|
|
|
|
|
2015-10-28 09:20:31 +03:00
|
|
|
if(pcie_has_upstream_port(pci_dev)) {
|
|
|
|
/* With an upstream PCIe port, we only support 1 device at slot 0 */
|
2017-11-29 11:46:27 +03:00
|
|
|
return bus->devices[0];
|
2015-10-28 09:20:31 +03:00
|
|
|
} else {
|
|
|
|
/* Other bus types might support multiple devices at slots 0-31 */
|
2017-11-29 11:46:27 +03:00
|
|
|
return bus->devices[PCI_DEVFN(PCI_SLOT(pci_dev->devfn), 0)];
|
2015-10-28 09:20:31 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-14 08:56:32 +03:00
|
|
|
MSIMessage pci_get_msi_message(PCIDevice *dev, int vector)
|
|
|
|
{
|
|
|
|
MSIMessage msg;
|
|
|
|
if (msix_enabled(dev)) {
|
|
|
|
msg = msix_get_message(dev, vector);
|
|
|
|
} else if (msi_enabled(dev)) {
|
|
|
|
msg = msi_get_message(dev, vector);
|
|
|
|
} else {
|
|
|
|
/* Should never happen */
|
|
|
|
error_report("%s: unknown interrupt type", __func__);
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
return msg;
|
|
|
|
}
|
|
|
|
|
2021-11-11 16:08:54 +03:00
|
|
|
void pci_set_power(PCIDevice *d, bool state)
|
|
|
|
{
|
|
|
|
if (d->has_power == state) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
d->has_power = state;
|
|
|
|
pci_update_mappings(d);
|
|
|
|
memory_region_set_enabled(&d->bus_master_enable_region,
|
|
|
|
(pci_get_word(d->config + PCI_COMMAND)
|
|
|
|
& PCI_COMMAND_MASTER) && d->has_power);
|
|
|
|
if (!d->has_power) {
|
|
|
|
pci_device_reset(d);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-01-10 19:19:07 +04:00
|
|
|
static const TypeInfo pci_device_type_info = {
|
2011-12-04 22:22:06 +04:00
|
|
|
.name = TYPE_PCI_DEVICE,
|
|
|
|
.parent = TYPE_DEVICE,
|
|
|
|
.instance_size = sizeof(PCIDevice),
|
|
|
|
.abstract = true,
|
|
|
|
.class_size = sizeof(PCIDeviceClass),
|
2011-12-08 07:34:16 +04:00
|
|
|
.class_init = pci_device_class_init,
|
2017-09-27 22:56:35 +03:00
|
|
|
.class_base_init = pci_device_class_base_init,
|
2011-12-04 22:22:06 +04:00
|
|
|
};
|
|
|
|
|
2012-02-09 18:20:55 +04:00
|
|
|
static void pci_register_types(void)
|
2011-12-04 22:22:06 +04:00
|
|
|
{
|
2012-05-02 11:00:20 +04:00
|
|
|
type_register_static(&pci_bus_info);
|
2013-03-15 02:00:59 +04:00
|
|
|
type_register_static(&pcie_bus_info);
|
2022-04-29 17:40:39 +03:00
|
|
|
type_register_static(&cxl_bus_info);
|
2017-09-27 22:56:31 +03:00
|
|
|
type_register_static(&conventional_pci_interface_info);
|
2022-04-29 17:40:26 +03:00
|
|
|
type_register_static(&cxl_interface_info);
|
2017-09-27 22:56:31 +03:00
|
|
|
type_register_static(&pcie_interface_info);
|
2011-12-04 22:22:06 +04:00
|
|
|
type_register_static(&pci_device_type_info);
|
|
|
|
}
|
|
|
|
|
2012-02-09 18:20:55 +04:00
|
|
|
type_init(pci_register_types)
|