2018-10-10 20:03:07 +03:00
|
|
|
/*
|
|
|
|
* VFIO based AP matrix device assignment
|
|
|
|
*
|
|
|
|
* Copyright 2018 IBM Corp.
|
|
|
|
* Author(s): Tony Krowiak <akrowiak@linux.ibm.com>
|
|
|
|
* Halil Pasic <pasic@linux.ibm.com>
|
|
|
|
*
|
|
|
|
* This work is licensed under the terms of the GNU GPL, version 2 or (at
|
|
|
|
* your option) any later version. See the COPYING file in the top-level
|
|
|
|
* directory.
|
|
|
|
*/
|
|
|
|
|
2018-12-04 20:25:35 +03:00
|
|
|
#include "qemu/osdep.h"
|
2018-10-10 20:03:07 +03:00
|
|
|
#include <linux/vfio.h>
|
|
|
|
#include <sys/ioctl.h>
|
|
|
|
#include "qapi/error.h"
|
|
|
|
#include "hw/vfio/vfio.h"
|
|
|
|
#include "hw/vfio/vfio-common.h"
|
|
|
|
#include "hw/s390x/ap-device.h"
|
|
|
|
#include "qemu/error-report.h"
|
2019-05-23 17:35:07 +03:00
|
|
|
#include "qemu/module.h"
|
2018-10-10 20:03:07 +03:00
|
|
|
#include "qemu/option.h"
|
|
|
|
#include "qemu/config-file.h"
|
|
|
|
#include "kvm_s390x.h"
|
2019-08-12 08:23:45 +03:00
|
|
|
#include "migration/vmstate.h"
|
2019-08-12 08:23:51 +03:00
|
|
|
#include "hw/qdev-properties.h"
|
2018-10-10 20:03:07 +03:00
|
|
|
#include "hw/s390x/ap-bridge.h"
|
|
|
|
#include "exec/address-spaces.h"
|
2020-09-03 23:43:22 +03:00
|
|
|
#include "qom/object.h"
|
2018-10-10 20:03:07 +03:00
|
|
|
|
2020-09-03 01:42:16 +03:00
|
|
|
#define TYPE_VFIO_AP_DEVICE "vfio-ap"
|
2018-10-10 20:03:07 +03:00
|
|
|
|
2020-09-03 23:43:22 +03:00
|
|
|
struct VFIOAPDevice {
|
2018-10-10 20:03:07 +03:00
|
|
|
APDevice apdev;
|
|
|
|
VFIODevice vdev;
|
2020-09-03 23:43:22 +03:00
|
|
|
};
|
2018-10-10 20:03:07 +03:00
|
|
|
|
2020-09-16 21:25:19 +03:00
|
|
|
OBJECT_DECLARE_SIMPLE_TYPE(VFIOAPDevice, VFIO_AP_DEVICE)
|
2018-10-10 20:03:07 +03:00
|
|
|
|
|
|
|
static void vfio_ap_compute_needs_reset(VFIODevice *vdev)
|
|
|
|
{
|
|
|
|
vdev->needs_reset = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We don't need vfio_hot_reset_multi and vfio_eoi operations for
|
|
|
|
* vfio-ap device now.
|
|
|
|
*/
|
|
|
|
struct VFIODeviceOps vfio_ap_ops = {
|
|
|
|
.vfio_compute_needs_reset = vfio_ap_compute_needs_reset,
|
|
|
|
};
|
|
|
|
|
|
|
|
static void vfio_ap_put_device(VFIOAPDevice *vapdev)
|
|
|
|
{
|
|
|
|
g_free(vapdev->vdev.name);
|
|
|
|
vfio_put_base_device(&vapdev->vdev);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VFIOGroup *vfio_ap_get_group(VFIOAPDevice *vapdev, Error **errp)
|
|
|
|
{
|
|
|
|
GError *gerror = NULL;
|
|
|
|
char *symlink, *group_path;
|
|
|
|
int groupid;
|
|
|
|
|
|
|
|
symlink = g_strdup_printf("%s/iommu_group", vapdev->vdev.sysfsdev);
|
|
|
|
group_path = g_file_read_link(symlink, &gerror);
|
|
|
|
g_free(symlink);
|
|
|
|
|
|
|
|
if (!group_path) {
|
|
|
|
error_setg(errp, "%s: no iommu_group found for %s: %s",
|
2020-09-03 01:42:16 +03:00
|
|
|
TYPE_VFIO_AP_DEVICE, vapdev->vdev.sysfsdev, gerror->message);
|
2020-08-14 19:02:31 +03:00
|
|
|
g_error_free(gerror);
|
2018-10-10 20:03:07 +03:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sscanf(basename(group_path), "%d", &groupid) != 1) {
|
|
|
|
error_setg(errp, "vfio: failed to read %s", group_path);
|
|
|
|
g_free(group_path);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
g_free(group_path);
|
|
|
|
|
|
|
|
return vfio_get_group(groupid, &address_space_memory, errp);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vfio_ap_realize(DeviceState *dev, Error **errp)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
char *mdevid;
|
|
|
|
VFIOGroup *vfio_group;
|
|
|
|
APDevice *apdev = AP_DEVICE(dev);
|
|
|
|
VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev);
|
|
|
|
|
2019-12-05 20:46:34 +03:00
|
|
|
vfio_group = vfio_ap_get_group(vapdev, errp);
|
2018-10-10 20:03:07 +03:00
|
|
|
if (!vfio_group) {
|
2019-12-05 20:46:34 +03:00
|
|
|
return;
|
2018-10-10 20:03:07 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
vapdev->vdev.ops = &vfio_ap_ops;
|
|
|
|
vapdev->vdev.type = VFIO_DEVICE_TYPE_AP;
|
|
|
|
mdevid = basename(vapdev->vdev.sysfsdev);
|
|
|
|
vapdev->vdev.name = g_strdup_printf("%s", mdevid);
|
|
|
|
vapdev->vdev.dev = dev;
|
|
|
|
|
2018-12-05 17:35:03 +03:00
|
|
|
/*
|
2020-06-26 10:22:30 +03:00
|
|
|
* vfio-ap devices operate in a way compatible with discarding of
|
|
|
|
* memory in RAM blocks, as no pages are pinned in the host.
|
2018-12-05 17:35:03 +03:00
|
|
|
* This needs to be set before vfio_get_device() for vfio common to
|
2020-06-26 10:22:30 +03:00
|
|
|
* handle ram_block_discard_disable().
|
2018-12-05 17:35:03 +03:00
|
|
|
*/
|
2020-06-26 10:22:30 +03:00
|
|
|
vapdev->vdev.ram_block_discard_allowed = true;
|
2018-12-05 17:35:03 +03:00
|
|
|
|
2019-12-05 20:46:34 +03:00
|
|
|
ret = vfio_get_device(vfio_group, mdevid, &vapdev->vdev, errp);
|
2018-10-10 20:03:07 +03:00
|
|
|
if (ret) {
|
|
|
|
goto out_get_dev_err;
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
out_get_dev_err:
|
|
|
|
vfio_ap_put_device(vapdev);
|
|
|
|
vfio_put_group(vfio_group);
|
|
|
|
}
|
|
|
|
|
qdev: Unrealize must not fail
Devices may have component devices and buses.
Device realization may fail. Realization is recursive: a device's
realize() method realizes its components, and device_set_realized()
realizes its buses (which should in turn realize the devices on that
bus, except bus_set_realized() doesn't implement that, yet).
When realization of a component or bus fails, we need to roll back:
unrealize everything we realized so far. If any of these unrealizes
failed, the device would be left in an inconsistent state. Must not
happen.
device_set_realized() lets it happen: it ignores errors in the roll
back code starting at label child_realize_fail.
Since realization is recursive, unrealization must be recursive, too.
But how could a partly failed unrealize be rolled back? We'd have to
re-realize, which can fail. This design is fundamentally broken.
device_set_realized() does not roll back at all. Instead, it keeps
unrealizing, ignoring further errors.
It can screw up even for a device with no buses: if the lone
dc->unrealize() fails, it still unregisters vmstate, and calls
listeners' unrealize() callback.
bus_set_realized() does not roll back either. Instead, it stops
unrealizing.
Fortunately, no unrealize method can fail, as we'll see below.
To fix the design error, drop parameter @errp from all the unrealize
methods.
Any unrealize method that uses @errp now needs an update. This leads
us to unrealize() methods that can fail. Merely passing it to another
unrealize method cannot cause failure, though. Here are the ones that
do other things with @errp:
* virtio_serial_device_unrealize()
Fails when qbus_set_hotplug_handler() fails, but still does all the
other work. On failure, the device would stay realized with its
resources completely gone. Oops. Can't happen, because
qbus_set_hotplug_handler() can't actually fail here. Pass
&error_abort to qbus_set_hotplug_handler() instead.
* hw/ppc/spapr_drc.c's unrealize()
Fails when object_property_del() fails, but all the other work is
already done. On failure, the device would stay realized with its
vmstate registration gone. Oops. Can't happen, because
object_property_del() can't actually fail here. Pass &error_abort
to object_property_del() instead.
* spapr_phb_unrealize()
Fails and bails out when remove_drcs() fails, but other work is
already done. On failure, the device would stay realized with some
of its resources gone. Oops. remove_drcs() fails only when
chassis_from_bus()'s object_property_get_uint() fails, and it can't
here. Pass &error_abort to remove_drcs() instead.
Therefore, no unrealize method can fail before this patch.
device_set_realized()'s recursive unrealization via bus uses
object_property_set_bool(). Can't drop @errp there, so pass
&error_abort.
We similarly unrealize with object_property_set_bool() elsewhere,
always ignoring errors. Pass &error_abort instead.
Several unrealize methods no longer handle errors from other unrealize
methods: virtio_9p_device_unrealize(),
virtio_input_device_unrealize(), scsi_qdev_unrealize(), ...
Much of the deleted error handling looks wrong anyway.
One unrealize methods no longer ignore such errors:
usb_ehci_pci_exit().
Several realize methods no longer ignore errors when rolling back:
v9fs_device_realize_common(), pci_qdev_unrealize(),
spapr_phb_realize(), usb_qdev_realize(), vfio_ccw_realize(),
virtio_device_realize().
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20200505152926.18877-17-armbru@redhat.com>
2020-05-05 18:29:24 +03:00
|
|
|
static void vfio_ap_unrealize(DeviceState *dev)
|
2018-10-10 20:03:07 +03:00
|
|
|
{
|
|
|
|
APDevice *apdev = AP_DEVICE(dev);
|
|
|
|
VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev);
|
|
|
|
VFIOGroup *group = vapdev->vdev.group;
|
|
|
|
|
|
|
|
vfio_ap_put_device(vapdev);
|
|
|
|
vfio_put_group(group);
|
|
|
|
}
|
|
|
|
|
|
|
|
static Property vfio_ap_properties[] = {
|
|
|
|
DEFINE_PROP_STRING("sysfsdev", VFIOAPDevice, vdev.sysfsdev),
|
|
|
|
DEFINE_PROP_END_OF_LIST(),
|
|
|
|
};
|
|
|
|
|
|
|
|
static void vfio_ap_reset(DeviceState *dev)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
APDevice *apdev = AP_DEVICE(dev);
|
|
|
|
VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev);
|
|
|
|
|
|
|
|
ret = ioctl(vapdev->vdev.fd, VFIO_DEVICE_RESET);
|
|
|
|
if (ret) {
|
|
|
|
error_report("%s: failed to reset %s device: %s", __func__,
|
2018-10-16 14:51:54 +03:00
|
|
|
vapdev->vdev.name, strerror(errno));
|
2018-10-10 20:03:07 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static const VMStateDescription vfio_ap_vmstate = {
|
2019-05-21 18:15:41 +03:00
|
|
|
.name = "vfio-ap",
|
2018-10-10 20:03:07 +03:00
|
|
|
.unmigratable = 1,
|
|
|
|
};
|
|
|
|
|
|
|
|
static void vfio_ap_class_init(ObjectClass *klass, void *data)
|
|
|
|
{
|
|
|
|
DeviceClass *dc = DEVICE_CLASS(klass);
|
|
|
|
|
2020-01-10 18:30:32 +03:00
|
|
|
device_class_set_props(dc, vfio_ap_properties);
|
2018-10-10 20:03:07 +03:00
|
|
|
dc->vmsd = &vfio_ap_vmstate;
|
|
|
|
dc->desc = "VFIO-based AP device assignment";
|
|
|
|
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
|
|
|
|
dc->realize = vfio_ap_realize;
|
|
|
|
dc->unrealize = vfio_ap_unrealize;
|
s390x/vfio-ap: Implement hot plug/unplug of vfio-ap device
Introduces hot plug/unplug support for the vfio-ap device.
To hot plug a vfio-ap device using the QEMU device_add command:
(qemu) device_add vfio-ap,sysfsdev=$path-to-mdev
Where $path-to-mdev is the absolute path to the mediated matrix device
to which AP resources to be used by the guest have been assigned.
A vfio-ap device can be hot plugged only if:
1. A vfio-ap device has not been attached to the virtual machine's ap-bus
via the QEMU command line or a prior hot plug action.
2. The guest was started with the CPU model feature for AP enabled
(e.g., -cpu host,ap=on)
To hot unplug a vfio-ap device using the QEMU device_del command:
(qemu) device_del vfio-ap,sysfsdev=$path-to-mdev
Where $path-to-mdev is the absolute path to the mediated matrix device
specified when the vfio-ap device was attached to the virtual machine's
ap-bus.
A vfio-ap device can be hot unplugged only if:
1. A vfio-ap device has been attached to the virtual machine's ap-bus
via the QEMU command line or a prior hot plug action.
2. The guest was started with the CPU model feature for AP enabled
(e.g., -cpu host,ap=on)
Please note that a hot plug handler is not necessary for the vfio-ap device
because the AP matrix configuration for the guest is performed by the
kernel device driver when the vfio-ap device is realized. The vfio-ap device
represents a VFIO mediated device created in the host sysfs for use by a guest.
The mdev device is configured with an AP matrix (i.e., adapters and domains) via
its sysfs attribute interfaces prior to starting the guest or plugging a vfio-ap
device in. When the device is realized, a file descriptor is opened on the mdev
device which results in a callback to the vfio_ap kernel device driver. The
device driver then configures the AP matrix in the guest's SIE state description
from the AP matrix assigned via the mdev device's sysfs interfaces. The AP
devices will be created for the guest when the AP bus running on the guest
subsequently performs its periodic scan for AP devices.
The qdev_simple_device_unplug_cb() callback function is used for the same
reaons; namely, the vfio_ap kernel device driver will perform the AP resource
de-configuration for the guest when the vfio-ap device is unplugged. When the
vfio-ap device is unrealized, the mdev device file descriptor is closed which
results in a callback to the vfio_ap kernel device driver. The device driver
then clears the AP matrix configuration in the guest's SIE state description
and resets all of the affected queues. The AP devices created for the guest
will be removed when the AP bus running on the guest subsequently performs
its periodic scan and finds there are no longer any AP resources assigned to the
guest.
Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
Reviewed-by: Pierre Morel <pmorel@linux.ibm.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Halil Pasic <pasic@linux.ibm.com>
Tested-by: Pierre Morel <pmorel@linux.ibm.com>
Message-Id: <1550519397-25359-2-git-send-email-akrowiak@linux.ibm.com>
[CH: adapt to changed qbus_set_hotplug_handler() signature]
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
2019-02-18 22:49:56 +03:00
|
|
|
dc->hotpluggable = true;
|
2018-10-10 20:03:07 +03:00
|
|
|
dc->reset = vfio_ap_reset;
|
|
|
|
dc->bus_type = TYPE_AP_BUS;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const TypeInfo vfio_ap_info = {
|
2020-09-03 01:42:16 +03:00
|
|
|
.name = TYPE_VFIO_AP_DEVICE,
|
2020-09-03 01:42:13 +03:00
|
|
|
.parent = TYPE_AP_DEVICE,
|
2018-10-10 20:03:07 +03:00
|
|
|
.instance_size = sizeof(VFIOAPDevice),
|
|
|
|
.class_init = vfio_ap_class_init,
|
|
|
|
};
|
|
|
|
|
|
|
|
static void vfio_ap_type_init(void)
|
|
|
|
{
|
|
|
|
type_register_static(&vfio_ap_info);
|
|
|
|
}
|
|
|
|
|
|
|
|
type_init(vfio_ap_type_init)
|