VFIO update 2020-11-01
* Migration support (Kirti Wankhede) * s390 DMA limiting (Matthew Rosato) * zPCI hardware info (Matthew Rosato) * Lock guard (Amey Narkhede) * Print fixes (Zhengui li) * Warning/build fixes -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.14 (GNU/Linux) iQIcBAABAgAGBQJfnxyyAAoJECObm247sIsi5lUP/AvsTnmg56sT0vPjFNGpsdpo K6VkClOpT92WEZmY4EjoA2yCJYQXwuokr2m2ZnbwAMq40Z4wfitjJYe8X9VsUJ4p l5fsLtxAbTA3rZH505HhTLHnexy8LloG8Nx101NPCges+oRZP5Wx95GfcuwnB3b/ 68SU4L0e22n38vTvYwdIX7ACCp2akkSfDp22v++V5XrnRdZi8NOySmRYdxjYE2SX V8UpqIi/qJzdVrP6DD8MJXWegJzL6EWOjw1yh3ntbBj9TYkiZlK4AjiWT95ccE8u xUbXFpyTTQUAY/wKnAl7K5KCi7dwyUDv3ZvKTE0ognI/IkYAIuwEOkUwqW8Ypt9o kJV8EQkLNTl63MKvHqtC/CN2Ru2VAacgs3h9b8EezJhuB29f5RMy54KzVXTL/nU/ NOMxG9hvDisUdkMN/b8nVv0T6mSYPoZnILPoMp7HiPW28kG637DUV7PlN4NoM1zi yT8YAFOoUyLoZc4vE67zvTLcVlX6iVgnhRqvWkH+fzoxdaCaQRrrrioolWq0AvAW Lsb8JJSSEbVXQiafuraDgXxyeR/OZ58o8DPq2+eOeNnBCviAcbF40wDkEYGPpJma bXNqPPx5L/hrnH0e80jAxhoBnF3VRkDEyvAYKo9IGEkZLz2baUG0hov6F+Sw47oN TuIsstsLwbRuKlRmznSF =5Xrr -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/awilliam/tags/vfio-update-20201101.0' into staging VFIO update 2020-11-01 * Migration support (Kirti Wankhede) * s390 DMA limiting (Matthew Rosato) * zPCI hardware info (Matthew Rosato) * Lock guard (Amey Narkhede) * Print fixes (Zhengui li) * Warning/build fixes # gpg: Signature made Sun 01 Nov 2020 20:38:10 GMT # gpg: using RSA key 239B9B6E3BB08B22 # gpg: Good signature from "Alex Williamson <alex.williamson@redhat.com>" [full] # gpg: aka "Alex Williamson <alex@shazbot.org>" [full] # gpg: aka "Alex Williamson <alwillia@redhat.com>" [full] # gpg: aka "Alex Williamson <alex.l.williamson@gmail.com>" [full] # Primary key fingerprint: 42F6 C04E 540B D1A9 9E7B 8A90 239B 9B6E 3BB0 8B22 * remotes/awilliam/tags/vfio-update-20201101.0: (32 commits) vfio: fix incorrect print type hw/vfio: Use lock guard macros s390x/pci: get zPCI function info from host vfio: Add routine for finding VFIO_DEVICE_GET_INFO capabilities s390x/pci: use a PCI Function structure s390x/pci: clean up s390 PCI groups s390x/pci: use a PCI Group structure s390x/pci: create a header dedicated to PCI CLP s390x/pci: Honor DMA limits set by vfio s390x/pci: Add routine to get the vfio dma available count vfio: Find DMA available capability vfio: Create shared routine for scanning info capabilities s390x/pci: Move header files to include/hw/s390x linux-headers: update against 5.10-rc1 update-linux-headers: Add vfio_zdev.h qapi: Add VFIO devices migration stats in Migration stats vfio: Make vfio-pci device migration capable vfio: Add ioctl to get dirty pages bitmap during dma unmap vfio: Dirty page tracking when vIOMMU is enabled vfio: Add vfio_listener_log_sync to mark dirty pages ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
2c6605389c
@ -1436,6 +1436,7 @@ S390 PCI
|
||||
M: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||
S: Supported
|
||||
F: hw/s390x/s390-pci*
|
||||
F: include/hw/s390x/s390-pci*
|
||||
L: qemu-s390x@nongnu.org
|
||||
|
||||
UniCore32 Machines
|
||||
|
@ -27,6 +27,7 @@ s390x_ss.add(when: 'CONFIG_KVM', if_true: files(
|
||||
))
|
||||
s390x_ss.add(when: 'CONFIG_S390_CCW_VIRTIO', if_true: files('s390-virtio-ccw.c'))
|
||||
s390x_ss.add(when: 'CONFIG_TERMINAL3270', if_true: files('3270-ccw.c'))
|
||||
s390x_ss.add(when: 'CONFIG_LINUX', if_true: files('s390-pci-vfio.c'))
|
||||
|
||||
virtio_ss = ss.source_set()
|
||||
virtio_ss.add(files('virtio-ccw.c'))
|
||||
|
@ -15,8 +15,9 @@
|
||||
#include "qapi/error.h"
|
||||
#include "qapi/visitor.h"
|
||||
#include "cpu.h"
|
||||
#include "s390-pci-bus.h"
|
||||
#include "s390-pci-inst.h"
|
||||
#include "hw/s390x/s390-pci-bus.h"
|
||||
#include "hw/s390x/s390-pci-inst.h"
|
||||
#include "hw/s390x/s390-pci-vfio.h"
|
||||
#include "hw/pci/pci_bus.h"
|
||||
#include "hw/qdev-properties.h"
|
||||
#include "hw/pci/pci_bridge.h"
|
||||
@ -737,6 +738,57 @@ static void s390_pci_iommu_free(S390pciState *s, PCIBus *bus, int32_t devfn)
|
||||
object_unref(OBJECT(iommu));
|
||||
}
|
||||
|
||||
S390PCIGroup *s390_group_create(int id)
|
||||
{
|
||||
S390PCIGroup *group;
|
||||
S390pciState *s = s390_get_phb();
|
||||
|
||||
group = g_new0(S390PCIGroup, 1);
|
||||
group->id = id;
|
||||
QTAILQ_INSERT_TAIL(&s->zpci_groups, group, link);
|
||||
return group;
|
||||
}
|
||||
|
||||
S390PCIGroup *s390_group_find(int id)
|
||||
{
|
||||
S390PCIGroup *group;
|
||||
S390pciState *s = s390_get_phb();
|
||||
|
||||
QTAILQ_FOREACH(group, &s->zpci_groups, link) {
|
||||
if (group->id == id) {
|
||||
return group;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void s390_pci_init_default_group(void)
|
||||
{
|
||||
S390PCIGroup *group;
|
||||
ClpRspQueryPciGrp *resgrp;
|
||||
|
||||
group = s390_group_create(ZPCI_DEFAULT_FN_GRP);
|
||||
resgrp = &group->zpci_group;
|
||||
resgrp->fr = 1;
|
||||
stq_p(&resgrp->dasm, 0);
|
||||
stq_p(&resgrp->msia, ZPCI_MSI_ADDR);
|
||||
stw_p(&resgrp->mui, DEFAULT_MUI);
|
||||
stw_p(&resgrp->i, 128);
|
||||
stw_p(&resgrp->maxstbl, 128);
|
||||
resgrp->version = 0;
|
||||
}
|
||||
|
||||
static void set_pbdev_info(S390PCIBusDevice *pbdev)
|
||||
{
|
||||
pbdev->zpci_fn.sdma = ZPCI_SDMA_ADDR;
|
||||
pbdev->zpci_fn.edma = ZPCI_EDMA_ADDR;
|
||||
pbdev->zpci_fn.pchid = 0;
|
||||
pbdev->zpci_fn.pfgid = ZPCI_DEFAULT_FN_GRP;
|
||||
pbdev->zpci_fn.fid = pbdev->fid;
|
||||
pbdev->zpci_fn.uid = pbdev->uid;
|
||||
pbdev->pci_group = s390_group_find(ZPCI_DEFAULT_FN_GRP);
|
||||
}
|
||||
|
||||
static void s390_pcihost_realize(DeviceState *dev, Error **errp)
|
||||
{
|
||||
PCIBus *b;
|
||||
@ -764,11 +816,25 @@ static void s390_pcihost_realize(DeviceState *dev, Error **errp)
|
||||
s->bus_no = 0;
|
||||
QTAILQ_INIT(&s->pending_sei);
|
||||
QTAILQ_INIT(&s->zpci_devs);
|
||||
QTAILQ_INIT(&s->zpci_dma_limit);
|
||||
QTAILQ_INIT(&s->zpci_groups);
|
||||
|
||||
s390_pci_init_default_group();
|
||||
css_register_io_adapters(CSS_IO_ADAPTER_PCI, true, false,
|
||||
S390_ADAPTER_SUPPRESSIBLE, errp);
|
||||
}
|
||||
|
||||
static void s390_pcihost_unrealize(DeviceState *dev)
|
||||
{
|
||||
S390PCIGroup *group;
|
||||
S390pciState *s = S390_PCI_HOST_BRIDGE(dev);
|
||||
|
||||
while (!QTAILQ_EMPTY(&s->zpci_groups)) {
|
||||
group = QTAILQ_FIRST(&s->zpci_groups);
|
||||
QTAILQ_REMOVE(&s->zpci_groups, group, link);
|
||||
}
|
||||
}
|
||||
|
||||
static int s390_pci_msix_init(S390PCIBusDevice *pbdev)
|
||||
{
|
||||
char *name;
|
||||
@ -797,7 +863,8 @@ static int s390_pci_msix_init(S390PCIBusDevice *pbdev)
|
||||
name = g_strdup_printf("msix-s390-%04x", pbdev->uid);
|
||||
memory_region_init_io(&pbdev->msix_notify_mr, OBJECT(pbdev),
|
||||
&s390_msi_ctrl_ops, pbdev, name, PAGE_SIZE);
|
||||
memory_region_add_subregion(&pbdev->iommu->mr, ZPCI_MSI_ADDR,
|
||||
memory_region_add_subregion(&pbdev->iommu->mr,
|
||||
pbdev->pci_group->zpci_group.msia,
|
||||
&pbdev->msix_notify_mr);
|
||||
g_free(name);
|
||||
|
||||
@ -941,16 +1008,20 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
|
||||
}
|
||||
}
|
||||
|
||||
if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
|
||||
pbdev->fh |= FH_SHM_VFIO;
|
||||
} else {
|
||||
pbdev->fh |= FH_SHM_EMUL;
|
||||
}
|
||||
|
||||
pbdev->pdev = pdev;
|
||||
pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn);
|
||||
pbdev->iommu->pbdev = pbdev;
|
||||
pbdev->state = ZPCI_FS_DISABLED;
|
||||
set_pbdev_info(pbdev);
|
||||
|
||||
if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
|
||||
pbdev->fh |= FH_SHM_VFIO;
|
||||
pbdev->iommu->dma_limit = s390_pci_start_dma_count(s, pbdev);
|
||||
/* Fill in CLP information passed via the vfio region */
|
||||
s390_pci_get_clp_info(pbdev);
|
||||
} else {
|
||||
pbdev->fh |= FH_SHM_EMUL;
|
||||
}
|
||||
|
||||
if (s390_pci_msix_init(pbdev)) {
|
||||
error_setg(errp, "MSI-X support is mandatory "
|
||||
@ -1004,6 +1075,9 @@ static void s390_pcihost_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
|
||||
pbdev->fid = 0;
|
||||
QTAILQ_REMOVE(&s->zpci_devs, pbdev, link);
|
||||
g_hash_table_remove(s->zpci_table, &pbdev->idx);
|
||||
if (pbdev->iommu->dma_limit) {
|
||||
s390_pci_end_dma_count(s, pbdev->iommu->dma_limit);
|
||||
}
|
||||
qdev_unrealize(dev);
|
||||
}
|
||||
}
|
||||
@ -1123,6 +1197,7 @@ static void s390_pcihost_class_init(ObjectClass *klass, void *data)
|
||||
|
||||
dc->reset = s390_pcihost_reset;
|
||||
dc->realize = s390_pcihost_realize;
|
||||
dc->unrealize = s390_pcihost_unrealize;
|
||||
hc->pre_plug = s390_pcihost_pre_plug;
|
||||
hc->plug = s390_pcihost_plug;
|
||||
hc->unplug_request = s390_pcihost_unplug_request;
|
||||
|
@ -13,12 +13,12 @@
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "cpu.h"
|
||||
#include "s390-pci-inst.h"
|
||||
#include "s390-pci-bus.h"
|
||||
#include "exec/memop.h"
|
||||
#include "exec/memory-internal.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "sysemu/hw_accel.h"
|
||||
#include "hw/s390x/s390-pci-inst.h"
|
||||
#include "hw/s390x/s390-pci-bus.h"
|
||||
#include "hw/s390x/tod.h"
|
||||
|
||||
#ifndef DEBUG_S390PCI_INST
|
||||
@ -32,6 +32,20 @@
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static inline void inc_dma_avail(S390PCIIOMMU *iommu)
|
||||
{
|
||||
if (iommu->dma_limit) {
|
||||
iommu->dma_limit->avail++;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void dec_dma_avail(S390PCIIOMMU *iommu)
|
||||
{
|
||||
if (iommu->dma_limit) {
|
||||
iommu->dma_limit->avail--;
|
||||
}
|
||||
}
|
||||
|
||||
static void s390_set_status_code(CPUS390XState *env,
|
||||
uint8_t r, uint64_t status_code)
|
||||
{
|
||||
@ -267,6 +281,8 @@ int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra)
|
||||
goto out;
|
||||
}
|
||||
|
||||
memcpy(resquery, &pbdev->zpci_fn, sizeof(*resquery));
|
||||
|
||||
for (i = 0; i < PCI_BAR_COUNT; i++) {
|
||||
uint32_t data = pci_get_long(pbdev->pdev->config +
|
||||
PCI_BASE_ADDRESS_0 + (i * 4));
|
||||
@ -280,25 +296,23 @@ int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra)
|
||||
resquery->bar_size[i]);
|
||||
}
|
||||
|
||||
stq_p(&resquery->sdma, ZPCI_SDMA_ADDR);
|
||||
stq_p(&resquery->edma, ZPCI_EDMA_ADDR);
|
||||
stl_p(&resquery->fid, pbdev->fid);
|
||||
stw_p(&resquery->pchid, 0);
|
||||
stw_p(&resquery->ug, 1);
|
||||
stl_p(&resquery->uid, pbdev->uid);
|
||||
stw_p(&resquery->hdr.rsp, CLP_RC_OK);
|
||||
break;
|
||||
}
|
||||
case CLP_QUERY_PCI_FNGRP: {
|
||||
ClpRspQueryPciGrp *resgrp = (ClpRspQueryPciGrp *)resh;
|
||||
resgrp->fr = 1;
|
||||
stq_p(&resgrp->dasm, 0);
|
||||
stq_p(&resgrp->msia, ZPCI_MSI_ADDR);
|
||||
stw_p(&resgrp->mui, DEFAULT_MUI);
|
||||
stw_p(&resgrp->i, 128);
|
||||
stw_p(&resgrp->maxstbl, 128);
|
||||
resgrp->version = 0;
|
||||
|
||||
ClpReqQueryPciGrp *reqgrp = (ClpReqQueryPciGrp *)reqh;
|
||||
S390PCIGroup *group;
|
||||
|
||||
group = s390_group_find(reqgrp->g);
|
||||
if (!group) {
|
||||
/* We do not allow access to unknown groups */
|
||||
/* The group must have been obtained with a vfio device */
|
||||
stw_p(&resgrp->hdr.rsp, CLP_RC_QUERYPCIFG_PFGID);
|
||||
goto out;
|
||||
}
|
||||
memcpy(resgrp, &group->zpci_group, sizeof(ClpRspQueryPciGrp));
|
||||
stw_p(&resgrp->hdr.rsp, CLP_RC_OK);
|
||||
break;
|
||||
}
|
||||
@ -572,7 +586,8 @@ int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry)
|
||||
static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu,
|
||||
S390IOTLBEntry *entry)
|
||||
{
|
||||
S390IOTLBEntry *cache = g_hash_table_lookup(iommu->iotlb, &entry->iova);
|
||||
IOMMUTLBEntry notify = {
|
||||
@ -585,14 +600,15 @@ static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry)
|
||||
|
||||
if (entry->perm == IOMMU_NONE) {
|
||||
if (!cache) {
|
||||
return;
|
||||
goto out;
|
||||
}
|
||||
g_hash_table_remove(iommu->iotlb, &entry->iova);
|
||||
inc_dma_avail(iommu);
|
||||
} else {
|
||||
if (cache) {
|
||||
if (cache->perm == entry->perm &&
|
||||
cache->translated_addr == entry->translated_addr) {
|
||||
return;
|
||||
goto out;
|
||||
}
|
||||
|
||||
notify.perm = IOMMU_NONE;
|
||||
@ -606,9 +622,13 @@ static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry)
|
||||
cache->len = PAGE_SIZE;
|
||||
cache->perm = entry->perm;
|
||||
g_hash_table_replace(iommu->iotlb, &cache->iova, cache);
|
||||
dec_dma_avail(iommu);
|
||||
}
|
||||
|
||||
memory_region_notify_iommu(&iommu->iommu_mr, 0, notify);
|
||||
|
||||
out:
|
||||
return iommu->dma_limit ? iommu->dma_limit->avail : 1;
|
||||
}
|
||||
|
||||
int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
|
||||
@ -620,6 +640,7 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
|
||||
S390PCIIOMMU *iommu;
|
||||
S390IOTLBEntry entry;
|
||||
hwaddr start, end;
|
||||
uint32_t dma_avail;
|
||||
|
||||
if (env->psw.mask & PSW_MASK_PSTATE) {
|
||||
s390_program_interrupt(env, PGM_PRIVILEGED, ra);
|
||||
@ -658,6 +679,11 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
|
||||
}
|
||||
|
||||
iommu = pbdev->iommu;
|
||||
if (iommu->dma_limit) {
|
||||
dma_avail = iommu->dma_limit->avail;
|
||||
} else {
|
||||
dma_avail = 1;
|
||||
}
|
||||
if (!iommu->g_iota) {
|
||||
error = ERR_EVENT_INVALAS;
|
||||
goto err;
|
||||
@ -675,8 +701,9 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
|
||||
}
|
||||
|
||||
start += entry.len;
|
||||
while (entry.iova < start && entry.iova < end) {
|
||||
s390_pci_update_iotlb(iommu, &entry);
|
||||
while (entry.iova < start && entry.iova < end &&
|
||||
(dma_avail > 0 || entry.perm == IOMMU_NONE)) {
|
||||
dma_avail = s390_pci_update_iotlb(iommu, &entry);
|
||||
entry.iova += PAGE_SIZE;
|
||||
entry.translated_addr += PAGE_SIZE;
|
||||
}
|
||||
@ -689,7 +716,13 @@ err:
|
||||
s390_pci_generate_error_event(error, pbdev->fh, pbdev->fid, start, 0);
|
||||
} else {
|
||||
pbdev->fmb.counter[ZPCI_FMB_CNT_RPCIT]++;
|
||||
setcc(cpu, ZPCI_PCI_LS_OK);
|
||||
if (dma_avail > 0) {
|
||||
setcc(cpu, ZPCI_PCI_LS_OK);
|
||||
} else {
|
||||
/* vfio DMA mappings are exhausted, trigger a RPCIT */
|
||||
setcc(cpu, ZPCI_PCI_LS_ERR);
|
||||
s390_set_status_code(env, r1, ZPCI_RPCIT_ST_INSUFF_RES);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -754,7 +787,8 @@ int pcistb_service_call(S390CPU *cpu, uint8_t r1, uint8_t r3, uint64_t gaddr,
|
||||
}
|
||||
/* Length must be greater than 8, a multiple of 8 */
|
||||
/* and not greater than maxstbl */
|
||||
if ((len <= 8) || (len % 8) || (len > pbdev->maxstbl)) {
|
||||
if ((len <= 8) || (len % 8) ||
|
||||
(len > pbdev->pci_group->zpci_group.maxstbl)) {
|
||||
goto specification_error;
|
||||
}
|
||||
/* Do not cross a 4K-byte boundary */
|
||||
|
276
hw/s390x/s390-pci-vfio.c
Normal file
276
hw/s390x/s390-pci-vfio.c
Normal file
@ -0,0 +1,276 @@
|
||||
/*
|
||||
* s390 vfio-pci interfaces
|
||||
*
|
||||
* Copyright 2020 IBM Corp.
|
||||
* Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or (at
|
||||
* your option) any later version. See the COPYING file in the top-level
|
||||
* directory.
|
||||
*/
|
||||
|
||||
#include <sys/ioctl.h>
|
||||
#include <linux/vfio.h>
|
||||
#include <linux/vfio_zdev.h>
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "trace.h"
|
||||
#include "hw/s390x/s390-pci-bus.h"
|
||||
#include "hw/s390x/s390-pci-clp.h"
|
||||
#include "hw/s390x/s390-pci-vfio.h"
|
||||
#include "hw/vfio/pci.h"
|
||||
#include "hw/vfio/vfio-common.h"
|
||||
|
||||
/*
|
||||
* Get the current DMA available count from vfio. Returns true if vfio is
|
||||
* limiting DMA requests, false otherwise. The current available count read
|
||||
* from vfio is returned in avail.
|
||||
*/
|
||||
bool s390_pci_update_dma_avail(int fd, unsigned int *avail)
|
||||
{
|
||||
g_autofree struct vfio_iommu_type1_info *info;
|
||||
uint32_t argsz;
|
||||
|
||||
assert(avail);
|
||||
|
||||
argsz = sizeof(struct vfio_iommu_type1_info);
|
||||
info = g_malloc0(argsz);
|
||||
|
||||
/*
|
||||
* If the specified argsz is not large enough to contain all capabilities
|
||||
* it will be updated upon return from the ioctl. Retry until we have
|
||||
* a big enough buffer to hold the entire capability chain.
|
||||
*/
|
||||
retry:
|
||||
info->argsz = argsz;
|
||||
|
||||
if (ioctl(fd, VFIO_IOMMU_GET_INFO, info)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (info->argsz > argsz) {
|
||||
argsz = info->argsz;
|
||||
info = g_realloc(info, argsz);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* If the capability exists, update with the current value */
|
||||
return vfio_get_info_dma_avail(info, avail);
|
||||
}
|
||||
|
||||
S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
|
||||
S390PCIBusDevice *pbdev)
|
||||
{
|
||||
S390PCIDMACount *cnt;
|
||||
uint32_t avail;
|
||||
VFIOPCIDevice *vpdev = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
|
||||
int id;
|
||||
|
||||
assert(vpdev);
|
||||
|
||||
id = vpdev->vbasedev.group->container->fd;
|
||||
|
||||
if (!s390_pci_update_dma_avail(id, &avail)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
QTAILQ_FOREACH(cnt, &s->zpci_dma_limit, link) {
|
||||
if (cnt->id == id) {
|
||||
cnt->users++;
|
||||
return cnt;
|
||||
}
|
||||
}
|
||||
|
||||
cnt = g_new0(S390PCIDMACount, 1);
|
||||
cnt->id = id;
|
||||
cnt->users = 1;
|
||||
cnt->avail = avail;
|
||||
QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link);
|
||||
return cnt;
|
||||
}
|
||||
|
||||
void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt)
|
||||
{
|
||||
assert(cnt);
|
||||
|
||||
cnt->users--;
|
||||
if (cnt->users == 0) {
|
||||
QTAILQ_REMOVE(&s->zpci_dma_limit, cnt, link);
|
||||
}
|
||||
}
|
||||
|
||||
static void s390_pci_read_base(S390PCIBusDevice *pbdev,
|
||||
struct vfio_device_info *info)
|
||||
{
|
||||
struct vfio_info_cap_header *hdr;
|
||||
struct vfio_device_info_cap_zpci_base *cap;
|
||||
VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
|
||||
|
||||
hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE);
|
||||
|
||||
/* If capability not provided, just leave the defaults in place */
|
||||
if (hdr == NULL) {
|
||||
trace_s390_pci_clp_cap(vpci->vbasedev.name,
|
||||
VFIO_DEVICE_INFO_CAP_ZPCI_BASE);
|
||||
return;
|
||||
}
|
||||
cap = (void *) hdr;
|
||||
|
||||
pbdev->zpci_fn.sdma = cap->start_dma;
|
||||
pbdev->zpci_fn.edma = cap->end_dma;
|
||||
pbdev->zpci_fn.pchid = cap->pchid;
|
||||
pbdev->zpci_fn.vfn = cap->vfn;
|
||||
pbdev->zpci_fn.pfgid = cap->gid;
|
||||
/* The following values remain 0 until we support other FMB formats */
|
||||
pbdev->zpci_fn.fmbl = 0;
|
||||
pbdev->zpci_fn.pft = 0;
|
||||
}
|
||||
|
||||
static void s390_pci_read_group(S390PCIBusDevice *pbdev,
|
||||
struct vfio_device_info *info)
|
||||
{
|
||||
struct vfio_info_cap_header *hdr;
|
||||
struct vfio_device_info_cap_zpci_group *cap;
|
||||
ClpRspQueryPciGrp *resgrp;
|
||||
VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
|
||||
|
||||
hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_GROUP);
|
||||
|
||||
/* If capability not provided, just use the default group */
|
||||
if (hdr == NULL) {
|
||||
trace_s390_pci_clp_cap(vpci->vbasedev.name,
|
||||
VFIO_DEVICE_INFO_CAP_ZPCI_GROUP);
|
||||
pbdev->zpci_fn.pfgid = ZPCI_DEFAULT_FN_GRP;
|
||||
pbdev->pci_group = s390_group_find(ZPCI_DEFAULT_FN_GRP);
|
||||
return;
|
||||
}
|
||||
cap = (void *) hdr;
|
||||
|
||||
/* See if the PCI group is already defined, create if not */
|
||||
pbdev->pci_group = s390_group_find(pbdev->zpci_fn.pfgid);
|
||||
|
||||
if (!pbdev->pci_group) {
|
||||
pbdev->pci_group = s390_group_create(pbdev->zpci_fn.pfgid);
|
||||
|
||||
resgrp = &pbdev->pci_group->zpci_group;
|
||||
if (cap->flags & VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH) {
|
||||
resgrp->fr = 1;
|
||||
}
|
||||
stq_p(&resgrp->dasm, cap->dasm);
|
||||
stq_p(&resgrp->msia, cap->msi_addr);
|
||||
stw_p(&resgrp->mui, cap->mui);
|
||||
stw_p(&resgrp->i, cap->noi);
|
||||
stw_p(&resgrp->maxstbl, cap->maxstbl);
|
||||
stb_p(&resgrp->version, cap->version);
|
||||
}
|
||||
}
|
||||
|
||||
static void s390_pci_read_util(S390PCIBusDevice *pbdev,
|
||||
struct vfio_device_info *info)
|
||||
{
|
||||
struct vfio_info_cap_header *hdr;
|
||||
struct vfio_device_info_cap_zpci_util *cap;
|
||||
VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
|
||||
|
||||
hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_UTIL);
|
||||
|
||||
/* If capability not provided, just leave the defaults in place */
|
||||
if (hdr == NULL) {
|
||||
trace_s390_pci_clp_cap(vpci->vbasedev.name,
|
||||
VFIO_DEVICE_INFO_CAP_ZPCI_UTIL);
|
||||
return;
|
||||
}
|
||||
cap = (void *) hdr;
|
||||
|
||||
if (cap->size > CLP_UTIL_STR_LEN) {
|
||||
trace_s390_pci_clp_cap_size(vpci->vbasedev.name, cap->size,
|
||||
VFIO_DEVICE_INFO_CAP_ZPCI_UTIL);
|
||||
return;
|
||||
}
|
||||
|
||||
pbdev->zpci_fn.flags |= CLP_RSP_QPCI_MASK_UTIL;
|
||||
memcpy(pbdev->zpci_fn.util_str, cap->util_str, CLP_UTIL_STR_LEN);
|
||||
}
|
||||
|
||||
static void s390_pci_read_pfip(S390PCIBusDevice *pbdev,
|
||||
struct vfio_device_info *info)
|
||||
{
|
||||
struct vfio_info_cap_header *hdr;
|
||||
struct vfio_device_info_cap_zpci_pfip *cap;
|
||||
VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
|
||||
|
||||
hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_PFIP);
|
||||
|
||||
/* If capability not provided, just leave the defaults in place */
|
||||
if (hdr == NULL) {
|
||||
trace_s390_pci_clp_cap(vpci->vbasedev.name,
|
||||
VFIO_DEVICE_INFO_CAP_ZPCI_PFIP);
|
||||
return;
|
||||
}
|
||||
cap = (void *) hdr;
|
||||
|
||||
if (cap->size > CLP_PFIP_NR_SEGMENTS) {
|
||||
trace_s390_pci_clp_cap_size(vpci->vbasedev.name, cap->size,
|
||||
VFIO_DEVICE_INFO_CAP_ZPCI_PFIP);
|
||||
return;
|
||||
}
|
||||
|
||||
memcpy(pbdev->zpci_fn.pfip, cap->pfip, CLP_PFIP_NR_SEGMENTS);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function will issue the VFIO_DEVICE_GET_INFO ioctl and look for
|
||||
* capabilities that contain information about CLP features provided by the
|
||||
* underlying host.
|
||||
* On entry, defaults have already been placed into the guest CLP response
|
||||
* buffers. On exit, defaults will have been overwritten for any CLP features
|
||||
* found in the capability chain; defaults will remain for any CLP features not
|
||||
* found in the chain.
|
||||
*/
|
||||
void s390_pci_get_clp_info(S390PCIBusDevice *pbdev)
|
||||
{
|
||||
g_autofree struct vfio_device_info *info;
|
||||
VFIOPCIDevice *vfio_pci;
|
||||
uint32_t argsz;
|
||||
int fd;
|
||||
|
||||
argsz = sizeof(*info);
|
||||
info = g_malloc0(argsz);
|
||||
|
||||
vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
|
||||
fd = vfio_pci->vbasedev.fd;
|
||||
|
||||
/*
|
||||
* If the specified argsz is not large enough to contain all capabilities
|
||||
* it will be updated upon return from the ioctl. Retry until we have
|
||||
* a big enough buffer to hold the entire capability chain. On error,
|
||||
* just exit and rely on CLP defaults.
|
||||
*/
|
||||
retry:
|
||||
info->argsz = argsz;
|
||||
|
||||
if (ioctl(fd, VFIO_DEVICE_GET_INFO, info)) {
|
||||
trace_s390_pci_clp_dev_info(vfio_pci->vbasedev.name);
|
||||
return;
|
||||
}
|
||||
|
||||
if (info->argsz > argsz) {
|
||||
argsz = info->argsz;
|
||||
info = g_realloc(info, argsz);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the CLP features provided and fill in the guest CLP responses.
|
||||
* Always call s390_pci_read_base first as information from this could
|
||||
* determine which function group is used in s390_pci_read_group.
|
||||
* For any feature not found, the default values will remain in the CLP
|
||||
* response.
|
||||
*/
|
||||
s390_pci_read_base(pbdev, info);
|
||||
s390_pci_read_group(pbdev, info);
|
||||
s390_pci_read_util(pbdev, info);
|
||||
s390_pci_read_pfip(pbdev, info);
|
||||
|
||||
return;
|
||||
}
|
@ -28,7 +28,7 @@
|
||||
#include "qemu/error-report.h"
|
||||
#include "qemu/option.h"
|
||||
#include "qemu/qemu-print.h"
|
||||
#include "s390-pci-bus.h"
|
||||
#include "hw/s390x/s390-pci-bus.h"
|
||||
#include "sysemu/reset.h"
|
||||
#include "hw/s390x/storage-keys.h"
|
||||
#include "hw/s390x/storage-attributes.h"
|
||||
|
@ -14,3 +14,8 @@ css_do_sic(uint16_t mode, uint8_t isc) "CSS: set interruption mode 0x%x on isc 0
|
||||
virtio_ccw_interpret_ccw(int cssid, int ssid, int schid, int cmd_code) "VIRTIO-CCW: %x.%x.%04x: interpret command 0x%x"
|
||||
virtio_ccw_new_device(int cssid, int ssid, int schid, int devno, const char *devno_mode) "VIRTIO-CCW: add subchannel %x.%x.%04x, devno 0x%04x (%s)"
|
||||
virtio_ccw_set_ind(uint64_t ind_loc, uint8_t ind_old, uint8_t ind_new) "VIRTIO-CCW: indicator at %" PRIu64 ": 0x%x->0x%x"
|
||||
|
||||
# s390-pci-vfio.c
|
||||
s390_pci_clp_cap(const char *id, uint32_t cap) "PCI: %s: missing expected CLP capability %u"
|
||||
s390_pci_clp_cap_size(const char *id, uint32_t size, uint32_t cap) "PCI: %s: bad size (%u) for CLP capability %u"
|
||||
s390_pci_clp_dev_info(const char *id) "PCI: %s: cannot read vfio device info"
|
||||
|
508
hw/vfio/common.c
508
hw/vfio/common.c
@ -29,6 +29,7 @@
|
||||
#include "hw/vfio/vfio.h"
|
||||
#include "exec/address-spaces.h"
|
||||
#include "exec/memory.h"
|
||||
#include "exec/ram_addr.h"
|
||||
#include "hw/hw.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "qemu/main-loop.h"
|
||||
@ -37,6 +38,7 @@
|
||||
#include "sysemu/reset.h"
|
||||
#include "trace.h"
|
||||
#include "qapi/error.h"
|
||||
#include "migration/migration.h"
|
||||
|
||||
VFIOGroupList vfio_group_list =
|
||||
QLIST_HEAD_INITIALIZER(vfio_group_list);
|
||||
@ -203,7 +205,7 @@ void vfio_region_write(void *opaque, hwaddr addr,
|
||||
buf.qword = cpu_to_le64(data);
|
||||
break;
|
||||
default:
|
||||
hw_error("vfio: unsupported write size, %d bytes", size);
|
||||
hw_error("vfio: unsupported write size, %u bytes", size);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -260,7 +262,7 @@ uint64_t vfio_region_read(void *opaque,
|
||||
data = le64_to_cpu(buf.qword);
|
||||
break;
|
||||
default:
|
||||
hw_error("vfio: unsupported read size, %d bytes", size);
|
||||
hw_error("vfio: unsupported read size, %u bytes", size);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -286,11 +288,147 @@ const MemoryRegionOps vfio_region_ops = {
|
||||
},
|
||||
};
|
||||
|
||||
/*
|
||||
* Device state interfaces
|
||||
*/
|
||||
|
||||
bool vfio_mig_active(void)
|
||||
{
|
||||
VFIOGroup *group;
|
||||
VFIODevice *vbasedev;
|
||||
|
||||
if (QLIST_EMPTY(&vfio_group_list)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
QLIST_FOREACH(group, &vfio_group_list, next) {
|
||||
QLIST_FOREACH(vbasedev, &group->device_list, next) {
|
||||
if (vbasedev->migration_blocker) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool vfio_devices_all_stopped_and_saving(VFIOContainer *container)
|
||||
{
|
||||
VFIOGroup *group;
|
||||
VFIODevice *vbasedev;
|
||||
MigrationState *ms = migrate_get_current();
|
||||
|
||||
if (!migration_is_setup_or_active(ms->state)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
QLIST_FOREACH(group, &container->group_list, container_next) {
|
||||
QLIST_FOREACH(vbasedev, &group->device_list, next) {
|
||||
VFIOMigration *migration = vbasedev->migration;
|
||||
|
||||
if (!migration) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((migration->device_state & VFIO_DEVICE_STATE_SAVING) &&
|
||||
!(migration->device_state & VFIO_DEVICE_STATE_RUNNING)) {
|
||||
continue;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool vfio_devices_all_running_and_saving(VFIOContainer *container)
|
||||
{
|
||||
VFIOGroup *group;
|
||||
VFIODevice *vbasedev;
|
||||
MigrationState *ms = migrate_get_current();
|
||||
|
||||
if (!migration_is_setup_or_active(ms->state)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
QLIST_FOREACH(group, &container->group_list, container_next) {
|
||||
QLIST_FOREACH(vbasedev, &group->device_list, next) {
|
||||
VFIOMigration *migration = vbasedev->migration;
|
||||
|
||||
if (!migration) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((migration->device_state & VFIO_DEVICE_STATE_SAVING) &&
|
||||
(migration->device_state & VFIO_DEVICE_STATE_RUNNING)) {
|
||||
continue;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static int vfio_dma_unmap_bitmap(VFIOContainer *container,
|
||||
hwaddr iova, ram_addr_t size,
|
||||
IOMMUTLBEntry *iotlb)
|
||||
{
|
||||
struct vfio_iommu_type1_dma_unmap *unmap;
|
||||
struct vfio_bitmap *bitmap;
|
||||
uint64_t pages = TARGET_PAGE_ALIGN(size) >> TARGET_PAGE_BITS;
|
||||
int ret;
|
||||
|
||||
unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap));
|
||||
|
||||
unmap->argsz = sizeof(*unmap) + sizeof(*bitmap);
|
||||
unmap->iova = iova;
|
||||
unmap->size = size;
|
||||
unmap->flags |= VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP;
|
||||
bitmap = (struct vfio_bitmap *)&unmap->data;
|
||||
|
||||
/*
|
||||
* cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of
|
||||
* TARGET_PAGE_SIZE to mark those dirty. Hence set bitmap_pgsize to
|
||||
* TARGET_PAGE_SIZE.
|
||||
*/
|
||||
|
||||
bitmap->pgsize = TARGET_PAGE_SIZE;
|
||||
bitmap->size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) /
|
||||
BITS_PER_BYTE;
|
||||
|
||||
if (bitmap->size > container->max_dirty_bitmap_size) {
|
||||
error_report("UNMAP: Size of bitmap too big 0x%"PRIx64,
|
||||
(uint64_t)bitmap->size);
|
||||
ret = -E2BIG;
|
||||
goto unmap_exit;
|
||||
}
|
||||
|
||||
bitmap->data = g_try_malloc0(bitmap->size);
|
||||
if (!bitmap->data) {
|
||||
ret = -ENOMEM;
|
||||
goto unmap_exit;
|
||||
}
|
||||
|
||||
ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
|
||||
if (!ret) {
|
||||
cpu_physical_memory_set_dirty_lebitmap((unsigned long *)bitmap->data,
|
||||
iotlb->translated_addr, pages);
|
||||
} else {
|
||||
error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m");
|
||||
}
|
||||
|
||||
g_free(bitmap->data);
|
||||
unmap_exit:
|
||||
g_free(unmap);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
|
||||
*/
|
||||
static int vfio_dma_unmap(VFIOContainer *container,
|
||||
hwaddr iova, ram_addr_t size)
|
||||
hwaddr iova, ram_addr_t size,
|
||||
IOMMUTLBEntry *iotlb)
|
||||
{
|
||||
struct vfio_iommu_type1_dma_unmap unmap = {
|
||||
.argsz = sizeof(unmap),
|
||||
@ -299,6 +437,11 @@ static int vfio_dma_unmap(VFIOContainer *container,
|
||||
.size = size,
|
||||
};
|
||||
|
||||
if (iotlb && container->dirty_pages_supported &&
|
||||
vfio_devices_all_running_and_saving(container)) {
|
||||
return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
|
||||
}
|
||||
|
||||
while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
|
||||
/*
|
||||
* The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c
|
||||
@ -346,7 +489,7 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
|
||||
* the VGA ROM space.
|
||||
*/
|
||||
if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
|
||||
(errno == EBUSY && vfio_dma_unmap(container, iova, size) == 0 &&
|
||||
(errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 &&
|
||||
ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
|
||||
return 0;
|
||||
}
|
||||
@ -407,8 +550,8 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
|
||||
}
|
||||
|
||||
/* Called with rcu_read_lock held. */
|
||||
static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void **vaddr,
|
||||
bool *read_only)
|
||||
static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
|
||||
ram_addr_t *ram_addr, bool *read_only)
|
||||
{
|
||||
MemoryRegion *mr;
|
||||
hwaddr xlat;
|
||||
@ -439,8 +582,17 @@ static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void **vaddr,
|
||||
return false;
|
||||
}
|
||||
|
||||
*vaddr = memory_region_get_ram_ptr(mr) + xlat;
|
||||
*read_only = !writable || mr->readonly;
|
||||
if (vaddr) {
|
||||
*vaddr = memory_region_get_ram_ptr(mr) + xlat;
|
||||
}
|
||||
|
||||
if (ram_addr) {
|
||||
*ram_addr = memory_region_get_ram_addr(mr) + xlat;
|
||||
}
|
||||
|
||||
if (read_only) {
|
||||
*read_only = !writable || mr->readonly;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -450,7 +602,6 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
|
||||
VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
|
||||
VFIOContainer *container = giommu->container;
|
||||
hwaddr iova = iotlb->iova + giommu->iommu_offset;
|
||||
bool read_only;
|
||||
void *vaddr;
|
||||
int ret;
|
||||
|
||||
@ -466,7 +617,9 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
|
||||
rcu_read_lock();
|
||||
|
||||
if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
|
||||
if (!vfio_get_vaddr(iotlb, &vaddr, &read_only)) {
|
||||
bool read_only;
|
||||
|
||||
if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only)) {
|
||||
goto out;
|
||||
}
|
||||
/*
|
||||
@ -486,7 +639,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
|
||||
iotlb->addr_mask + 1, vaddr, ret);
|
||||
}
|
||||
} else {
|
||||
ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1);
|
||||
ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1, iotlb);
|
||||
if (ret) {
|
||||
error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
|
||||
"0x%"HWADDR_PRIx") = %d (%m)",
|
||||
@ -789,7 +942,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
|
||||
}
|
||||
|
||||
if (try_unmap) {
|
||||
ret = vfio_dma_unmap(container, iova, int128_get64(llsize));
|
||||
ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
|
||||
if (ret) {
|
||||
error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
|
||||
"0x%"HWADDR_PRIx") = %d (%m)",
|
||||
@ -812,9 +965,156 @@ static void vfio_listener_region_del(MemoryListener *listener,
|
||||
}
|
||||
}
|
||||
|
||||
static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
|
||||
uint64_t size, ram_addr_t ram_addr)
|
||||
{
|
||||
struct vfio_iommu_type1_dirty_bitmap *dbitmap;
|
||||
struct vfio_iommu_type1_dirty_bitmap_get *range;
|
||||
uint64_t pages;
|
||||
int ret;
|
||||
|
||||
dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range));
|
||||
|
||||
dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range);
|
||||
dbitmap->flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP;
|
||||
range = (struct vfio_iommu_type1_dirty_bitmap_get *)&dbitmap->data;
|
||||
range->iova = iova;
|
||||
range->size = size;
|
||||
|
||||
/*
|
||||
* cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of
|
||||
* TARGET_PAGE_SIZE to mark those dirty. Hence set bitmap's pgsize to
|
||||
* TARGET_PAGE_SIZE.
|
||||
*/
|
||||
range->bitmap.pgsize = TARGET_PAGE_SIZE;
|
||||
|
||||
pages = TARGET_PAGE_ALIGN(range->size) >> TARGET_PAGE_BITS;
|
||||
range->bitmap.size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) /
|
||||
BITS_PER_BYTE;
|
||||
range->bitmap.data = g_try_malloc0(range->bitmap.size);
|
||||
if (!range->bitmap.data) {
|
||||
ret = -ENOMEM;
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap);
|
||||
if (ret) {
|
||||
error_report("Failed to get dirty bitmap for iova: 0x%"PRIx64
|
||||
" size: 0x%"PRIx64" err: %d", (uint64_t)range->iova,
|
||||
(uint64_t)range->size, errno);
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
cpu_physical_memory_set_dirty_lebitmap((unsigned long *)range->bitmap.data,
|
||||
ram_addr, pages);
|
||||
|
||||
trace_vfio_get_dirty_bitmap(container->fd, range->iova, range->size,
|
||||
range->bitmap.size, ram_addr);
|
||||
err_out:
|
||||
g_free(range->bitmap.data);
|
||||
g_free(dbitmap);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
IOMMUNotifier n;
|
||||
VFIOGuestIOMMU *giommu;
|
||||
} vfio_giommu_dirty_notifier;
|
||||
|
||||
static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
|
||||
{
|
||||
vfio_giommu_dirty_notifier *gdn = container_of(n,
|
||||
vfio_giommu_dirty_notifier, n);
|
||||
VFIOGuestIOMMU *giommu = gdn->giommu;
|
||||
VFIOContainer *container = giommu->container;
|
||||
hwaddr iova = iotlb->iova + giommu->iommu_offset;
|
||||
ram_addr_t translated_addr;
|
||||
|
||||
trace_vfio_iommu_map_dirty_notify(iova, iova + iotlb->addr_mask);
|
||||
|
||||
if (iotlb->target_as != &address_space_memory) {
|
||||
error_report("Wrong target AS \"%s\", only system memory is allowed",
|
||||
iotlb->target_as->name ? iotlb->target_as->name : "none");
|
||||
return;
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) {
|
||||
int ret;
|
||||
|
||||
ret = vfio_get_dirty_bitmap(container, iova, iotlb->addr_mask + 1,
|
||||
translated_addr);
|
||||
if (ret) {
|
||||
error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", "
|
||||
"0x%"HWADDR_PRIx") = %d (%m)",
|
||||
container, iova,
|
||||
iotlb->addr_mask + 1, ret);
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static int vfio_sync_dirty_bitmap(VFIOContainer *container,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
ram_addr_t ram_addr;
|
||||
|
||||
if (memory_region_is_iommu(section->mr)) {
|
||||
VFIOGuestIOMMU *giommu;
|
||||
|
||||
QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
|
||||
if (MEMORY_REGION(giommu->iommu) == section->mr &&
|
||||
giommu->n.start == section->offset_within_region) {
|
||||
Int128 llend;
|
||||
vfio_giommu_dirty_notifier gdn = { .giommu = giommu };
|
||||
int idx = memory_region_iommu_attrs_to_index(giommu->iommu,
|
||||
MEMTXATTRS_UNSPECIFIED);
|
||||
|
||||
llend = int128_add(int128_make64(section->offset_within_region),
|
||||
section->size);
|
||||
llend = int128_sub(llend, int128_one());
|
||||
|
||||
iommu_notifier_init(&gdn.n,
|
||||
vfio_iommu_map_dirty_notify,
|
||||
IOMMU_NOTIFIER_MAP,
|
||||
section->offset_within_region,
|
||||
int128_get64(llend),
|
||||
idx);
|
||||
memory_region_iommu_replay(giommu->iommu, &gdn.n);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
ram_addr = memory_region_get_ram_addr(section->mr) +
|
||||
section->offset_within_region;
|
||||
|
||||
return vfio_get_dirty_bitmap(container,
|
||||
TARGET_PAGE_ALIGN(section->offset_within_address_space),
|
||||
int128_get64(section->size), ram_addr);
|
||||
}
|
||||
|
||||
static void vfio_listerner_log_sync(MemoryListener *listener,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
VFIOContainer *container = container_of(listener, VFIOContainer, listener);
|
||||
|
||||
if (vfio_listener_skipped_section(section) ||
|
||||
!container->dirty_pages_supported) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (vfio_devices_all_stopped_and_saving(container)) {
|
||||
vfio_sync_dirty_bitmap(container, section);
|
||||
}
|
||||
}
|
||||
|
||||
static const MemoryListener vfio_memory_listener = {
|
||||
.region_add = vfio_listener_region_add,
|
||||
.region_del = vfio_listener_region_del,
|
||||
.log_sync = vfio_listerner_log_sync,
|
||||
};
|
||||
|
||||
static void vfio_listener_release(VFIOContainer *container)
|
||||
@ -825,17 +1125,12 @@ static void vfio_listener_release(VFIOContainer *container)
|
||||
}
|
||||
}
|
||||
|
||||
struct vfio_info_cap_header *
|
||||
vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id)
|
||||
static struct vfio_info_cap_header *
|
||||
vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id)
|
||||
{
|
||||
struct vfio_info_cap_header *hdr;
|
||||
void *ptr = info;
|
||||
|
||||
if (!(info->flags & VFIO_REGION_INFO_FLAG_CAPS)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) {
|
||||
for (hdr = ptr + cap_offset; hdr != ptr; hdr = ptr + hdr->next) {
|
||||
if (hdr->id == id) {
|
||||
return hdr;
|
||||
}
|
||||
@ -844,6 +1139,57 @@ vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct vfio_info_cap_header *
|
||||
vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id)
|
||||
{
|
||||
if (!(info->flags & VFIO_REGION_INFO_FLAG_CAPS)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return vfio_get_cap((void *)info, info->cap_offset, id);
|
||||
}
|
||||
|
||||
static struct vfio_info_cap_header *
|
||||
vfio_get_iommu_type1_info_cap(struct vfio_iommu_type1_info *info, uint16_t id)
|
||||
{
|
||||
if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return vfio_get_cap((void *)info, info->cap_offset, id);
|
||||
}
|
||||
|
||||
struct vfio_info_cap_header *
|
||||
vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id)
|
||||
{
|
||||
if (!(info->flags & VFIO_DEVICE_FLAGS_CAPS)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return vfio_get_cap((void *)info, info->cap_offset, id);
|
||||
}
|
||||
|
||||
bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
|
||||
unsigned int *avail)
|
||||
{
|
||||
struct vfio_info_cap_header *hdr;
|
||||
struct vfio_iommu_type1_info_dma_avail *cap;
|
||||
|
||||
/* If the capability cannot be found, assume no DMA limiting */
|
||||
hdr = vfio_get_iommu_type1_info_cap(info,
|
||||
VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL);
|
||||
if (hdr == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (avail != NULL) {
|
||||
cap = (void *) hdr;
|
||||
*avail = cap->avail;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int vfio_setup_region_sparse_mmaps(VFIORegion *region,
|
||||
struct vfio_region_info *info)
|
||||
{
|
||||
@ -924,6 +1270,18 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vfio_subregion_unmap(VFIORegion *region, int index)
|
||||
{
|
||||
trace_vfio_region_unmap(memory_region_name(®ion->mmaps[index].mem),
|
||||
region->mmaps[index].offset,
|
||||
region->mmaps[index].offset +
|
||||
region->mmaps[index].size - 1);
|
||||
memory_region_del_subregion(region->mem, ®ion->mmaps[index].mem);
|
||||
munmap(region->mmaps[index].mmap, region->mmaps[index].size);
|
||||
object_unparent(OBJECT(®ion->mmaps[index].mem));
|
||||
region->mmaps[index].mmap = NULL;
|
||||
}
|
||||
|
||||
int vfio_region_mmap(VFIORegion *region)
|
||||
{
|
||||
int i, prot = 0;
|
||||
@ -954,10 +1312,7 @@ int vfio_region_mmap(VFIORegion *region)
|
||||
region->mmaps[i].mmap = NULL;
|
||||
|
||||
for (i--; i >= 0; i--) {
|
||||
memory_region_del_subregion(region->mem, ®ion->mmaps[i].mem);
|
||||
munmap(region->mmaps[i].mmap, region->mmaps[i].size);
|
||||
object_unparent(OBJECT(®ion->mmaps[i].mem));
|
||||
region->mmaps[i].mmap = NULL;
|
||||
vfio_subregion_unmap(region, i);
|
||||
}
|
||||
|
||||
return ret;
|
||||
@ -982,6 +1337,21 @@ int vfio_region_mmap(VFIORegion *region)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void vfio_region_unmap(VFIORegion *region)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!region->mem) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < region->nr_mmaps; i++) {
|
||||
if (region->mmaps[i].mmap) {
|
||||
vfio_subregion_unmap(region, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vfio_region_exit(VFIORegion *region)
|
||||
{
|
||||
int i;
|
||||
@ -1204,6 +1574,75 @@ static int vfio_init_container(VFIOContainer *container, int group_fd,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vfio_get_iommu_info(VFIOContainer *container,
|
||||
struct vfio_iommu_type1_info **info)
|
||||
{
|
||||
|
||||
size_t argsz = sizeof(struct vfio_iommu_type1_info);
|
||||
|
||||
*info = g_new0(struct vfio_iommu_type1_info, 1);
|
||||
again:
|
||||
(*info)->argsz = argsz;
|
||||
|
||||
if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) {
|
||||
g_free(*info);
|
||||
*info = NULL;
|
||||
return -errno;
|
||||
}
|
||||
|
||||
if (((*info)->argsz > argsz)) {
|
||||
argsz = (*info)->argsz;
|
||||
*info = g_realloc(*info, argsz);
|
||||
goto again;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct vfio_info_cap_header *
|
||||
vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id)
|
||||
{
|
||||
struct vfio_info_cap_header *hdr;
|
||||
void *ptr = info;
|
||||
|
||||
if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) {
|
||||
if (hdr->id == id) {
|
||||
return hdr;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void vfio_get_iommu_info_migration(VFIOContainer *container,
|
||||
struct vfio_iommu_type1_info *info)
|
||||
{
|
||||
struct vfio_info_cap_header *hdr;
|
||||
struct vfio_iommu_type1_info_cap_migration *cap_mig;
|
||||
|
||||
hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION);
|
||||
if (!hdr) {
|
||||
return;
|
||||
}
|
||||
|
||||
cap_mig = container_of(hdr, struct vfio_iommu_type1_info_cap_migration,
|
||||
header);
|
||||
|
||||
/*
|
||||
* cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of
|
||||
* TARGET_PAGE_SIZE to mark those dirty.
|
||||
*/
|
||||
if (cap_mig->pgsize_bitmap & TARGET_PAGE_SIZE) {
|
||||
container->dirty_pages_supported = true;
|
||||
container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size;
|
||||
container->dirty_pgsizes = cap_mig->pgsize_bitmap;
|
||||
}
|
||||
}
|
||||
|
||||
static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||
Error **errp)
|
||||
{
|
||||
@ -1273,6 +1712,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||
container->space = space;
|
||||
container->fd = fd;
|
||||
container->error = NULL;
|
||||
container->dirty_pages_supported = false;
|
||||
QLIST_INIT(&container->giommu_list);
|
||||
QLIST_INIT(&container->hostwin_list);
|
||||
|
||||
@ -1285,7 +1725,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||
case VFIO_TYPE1v2_IOMMU:
|
||||
case VFIO_TYPE1_IOMMU:
|
||||
{
|
||||
struct vfio_iommu_type1_info info;
|
||||
struct vfio_iommu_type1_info *info;
|
||||
|
||||
/*
|
||||
* FIXME: This assumes that a Type1 IOMMU can map any 64-bit
|
||||
@ -1294,15 +1734,19 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||
* existing Type1 IOMMUs generally support any IOVA we're
|
||||
* going to actually try in practice.
|
||||
*/
|
||||
info.argsz = sizeof(info);
|
||||
ret = ioctl(fd, VFIO_IOMMU_GET_INFO, &info);
|
||||
/* Ignore errors */
|
||||
if (ret || !(info.flags & VFIO_IOMMU_INFO_PGSIZES)) {
|
||||
ret = vfio_get_iommu_info(container, &info);
|
||||
|
||||
if (ret || !(info->flags & VFIO_IOMMU_INFO_PGSIZES)) {
|
||||
/* Assume 4k IOVA page size */
|
||||
info.iova_pgsizes = 4096;
|
||||
info->iova_pgsizes = 4096;
|
||||
}
|
||||
vfio_host_win_add(container, 0, (hwaddr)-1, info.iova_pgsizes);
|
||||
container->pgsizes = info.iova_pgsizes;
|
||||
vfio_host_win_add(container, 0, (hwaddr)-1, info->iova_pgsizes);
|
||||
container->pgsizes = info->iova_pgsizes;
|
||||
|
||||
if (!ret) {
|
||||
vfio_get_iommu_info_migration(container, info);
|
||||
}
|
||||
g_free(info);
|
||||
break;
|
||||
}
|
||||
case VFIO_SPAPR_TCE_v2_IOMMU:
|
||||
|
@ -2,6 +2,7 @@ vfio_ss = ss.source_set()
|
||||
vfio_ss.add(files(
|
||||
'common.c',
|
||||
'spapr.c',
|
||||
'migration.c',
|
||||
))
|
||||
vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
|
||||
'display.c',
|
||||
|
933
hw/vfio/migration.c
Normal file
933
hw/vfio/migration.c
Normal file
@ -0,0 +1,933 @@
|
||||
/*
|
||||
* Migration support for VFIO devices
|
||||
*
|
||||
* Copyright NVIDIA, Inc. 2020
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2. See
|
||||
* the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/main-loop.h"
|
||||
#include "qemu/cutils.h"
|
||||
#include <linux/vfio.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include "sysemu/runstate.h"
|
||||
#include "hw/vfio/vfio-common.h"
|
||||
#include "cpu.h"
|
||||
#include "migration/migration.h"
|
||||
#include "migration/vmstate.h"
|
||||
#include "migration/qemu-file.h"
|
||||
#include "migration/register.h"
|
||||
#include "migration/blocker.h"
|
||||
#include "migration/misc.h"
|
||||
#include "qapi/error.h"
|
||||
#include "exec/ramlist.h"
|
||||
#include "exec/ram_addr.h"
|
||||
#include "pci.h"
|
||||
#include "trace.h"
|
||||
#include "hw/hw.h"
|
||||
|
||||
/*
|
||||
* Flags to be used as unique delimiters for VFIO devices in the migration
|
||||
* stream. These flags are composed as:
|
||||
* 0xffffffff => MSB 32-bit all 1s
|
||||
* 0xef10 => Magic ID, represents emulated (virtual) function IO
|
||||
* 0x0000 => 16-bits reserved for flags
|
||||
*
|
||||
* The beginning of state information is marked by _DEV_CONFIG_STATE,
|
||||
* _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a
|
||||
* certain state information is marked by _END_OF_STATE.
|
||||
*/
|
||||
#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL)
|
||||
#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL)
|
||||
#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL)
|
||||
#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL)
|
||||
|
||||
static int64_t bytes_transferred;
|
||||
|
||||
static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
|
||||
off_t off, bool iswrite)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = iswrite ? pwrite(vbasedev->fd, val, count, off) :
|
||||
pread(vbasedev->fd, val, count, off);
|
||||
if (ret < count) {
|
||||
error_report("vfio_mig_%s %d byte %s: failed at offset 0x%"
|
||||
HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count,
|
||||
vbasedev->name, off, strerror(errno));
|
||||
return (ret < 0) ? ret : -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count,
|
||||
off_t off, bool iswrite)
|
||||
{
|
||||
int ret, done = 0;
|
||||
__u8 *tbuf = buf;
|
||||
|
||||
while (count) {
|
||||
int bytes = 0;
|
||||
|
||||
if (count >= 8 && !(off % 8)) {
|
||||
bytes = 8;
|
||||
} else if (count >= 4 && !(off % 4)) {
|
||||
bytes = 4;
|
||||
} else if (count >= 2 && !(off % 2)) {
|
||||
bytes = 2;
|
||||
} else {
|
||||
bytes = 1;
|
||||
}
|
||||
|
||||
ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
count -= bytes;
|
||||
done += bytes;
|
||||
off += bytes;
|
||||
tbuf += bytes;
|
||||
}
|
||||
return done;
|
||||
}
|
||||
|
||||
#define vfio_mig_read(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, false)
|
||||
#define vfio_mig_write(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, true)
|
||||
|
||||
#define VFIO_MIG_STRUCT_OFFSET(f) \
|
||||
offsetof(struct vfio_device_migration_info, f)
|
||||
/*
|
||||
* Change the device_state register for device @vbasedev. Bits set in @mask
|
||||
* are preserved, bits set in @value are set, and bits not set in either @mask
|
||||
* or @value are cleared in device_state. If the register cannot be accessed,
|
||||
* the resulting state would be invalid, or the device enters an error state,
|
||||
* an error is returned.
|
||||
*/
|
||||
|
||||
static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
|
||||
uint32_t value)
|
||||
{
|
||||
VFIOMigration *migration = vbasedev->migration;
|
||||
VFIORegion *region = &migration->region;
|
||||
off_t dev_state_off = region->fd_offset +
|
||||
VFIO_MIG_STRUCT_OFFSET(device_state);
|
||||
uint32_t device_state;
|
||||
int ret;
|
||||
|
||||
ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
|
||||
dev_state_off);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
device_state = (device_state & mask) | value;
|
||||
|
||||
if (!VFIO_DEVICE_STATE_VALID(device_state)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state),
|
||||
dev_state_off);
|
||||
if (ret < 0) {
|
||||
int rret;
|
||||
|
||||
rret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
|
||||
dev_state_off);
|
||||
|
||||
if ((rret < 0) || (VFIO_DEVICE_STATE_IS_ERROR(device_state))) {
|
||||
hw_error("%s: Device in error state 0x%x", vbasedev->name,
|
||||
device_state);
|
||||
return rret ? rret : -EIO;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
migration->device_state = device_state;
|
||||
trace_vfio_migration_set_state(vbasedev->name, device_state);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void *get_data_section_size(VFIORegion *region, uint64_t data_offset,
|
||||
uint64_t data_size, uint64_t *size)
|
||||
{
|
||||
void *ptr = NULL;
|
||||
uint64_t limit = 0;
|
||||
int i;
|
||||
|
||||
if (!region->mmaps) {
|
||||
if (size) {
|
||||
*size = MIN(data_size, region->size - data_offset);
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
for (i = 0; i < region->nr_mmaps; i++) {
|
||||
VFIOMmap *map = region->mmaps + i;
|
||||
|
||||
if ((data_offset >= map->offset) &&
|
||||
(data_offset < map->offset + map->size)) {
|
||||
|
||||
/* check if data_offset is within sparse mmap areas */
|
||||
ptr = map->mmap + data_offset - map->offset;
|
||||
if (size) {
|
||||
*size = MIN(data_size, map->offset + map->size - data_offset);
|
||||
}
|
||||
break;
|
||||
} else if ((data_offset < map->offset) &&
|
||||
(!limit || limit > map->offset)) {
|
||||
/*
|
||||
* data_offset is not within sparse mmap areas, find size of
|
||||
* non-mapped area. Check through all list since region->mmaps list
|
||||
* is not sorted.
|
||||
*/
|
||||
limit = map->offset;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ptr && size) {
|
||||
*size = limit ? MIN(data_size, limit - data_offset) : data_size;
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size)
|
||||
{
|
||||
VFIOMigration *migration = vbasedev->migration;
|
||||
VFIORegion *region = &migration->region;
|
||||
uint64_t data_offset = 0, data_size = 0, sz;
|
||||
int ret;
|
||||
|
||||
ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
|
||||
region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size),
|
||||
region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
trace_vfio_save_buffer(vbasedev->name, data_offset, data_size,
|
||||
migration->pending_bytes);
|
||||
|
||||
qemu_put_be64(f, data_size);
|
||||
sz = data_size;
|
||||
|
||||
while (sz) {
|
||||
void *buf;
|
||||
uint64_t sec_size;
|
||||
bool buf_allocated = false;
|
||||
|
||||
buf = get_data_section_size(region, data_offset, sz, &sec_size);
|
||||
|
||||
if (!buf) {
|
||||
buf = g_try_malloc(sec_size);
|
||||
if (!buf) {
|
||||
error_report("%s: Error allocating buffer ", __func__);
|
||||
return -ENOMEM;
|
||||
}
|
||||
buf_allocated = true;
|
||||
|
||||
ret = vfio_mig_read(vbasedev, buf, sec_size,
|
||||
region->fd_offset + data_offset);
|
||||
if (ret < 0) {
|
||||
g_free(buf);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
qemu_put_buffer(f, buf, sec_size);
|
||||
|
||||
if (buf_allocated) {
|
||||
g_free(buf);
|
||||
}
|
||||
sz -= sec_size;
|
||||
data_offset += sec_size;
|
||||
}
|
||||
|
||||
ret = qemu_file_get_error(f);
|
||||
|
||||
if (!ret && size) {
|
||||
*size = data_size;
|
||||
}
|
||||
|
||||
bytes_transferred += data_size;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
|
||||
uint64_t data_size)
|
||||
{
|
||||
VFIORegion *region = &vbasedev->migration->region;
|
||||
uint64_t data_offset = 0, size, report_size;
|
||||
int ret;
|
||||
|
||||
do {
|
||||
ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
|
||||
region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (data_offset + data_size > region->size) {
|
||||
/*
|
||||
* If data_size is greater than the data section of migration region
|
||||
* then iterate the write buffer operation. This case can occur if
|
||||
* size of migration region at destination is smaller than size of
|
||||
* migration region at source.
|
||||
*/
|
||||
report_size = size = region->size - data_offset;
|
||||
data_size -= size;
|
||||
} else {
|
||||
report_size = size = data_size;
|
||||
data_size = 0;
|
||||
}
|
||||
|
||||
trace_vfio_load_state_device_data(vbasedev->name, data_offset, size);
|
||||
|
||||
while (size) {
|
||||
void *buf;
|
||||
uint64_t sec_size;
|
||||
bool buf_alloc = false;
|
||||
|
||||
buf = get_data_section_size(region, data_offset, size, &sec_size);
|
||||
|
||||
if (!buf) {
|
||||
buf = g_try_malloc(sec_size);
|
||||
if (!buf) {
|
||||
error_report("%s: Error allocating buffer ", __func__);
|
||||
return -ENOMEM;
|
||||
}
|
||||
buf_alloc = true;
|
||||
}
|
||||
|
||||
qemu_get_buffer(f, buf, sec_size);
|
||||
|
||||
if (buf_alloc) {
|
||||
ret = vfio_mig_write(vbasedev, buf, sec_size,
|
||||
region->fd_offset + data_offset);
|
||||
g_free(buf);
|
||||
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
size -= sec_size;
|
||||
data_offset += sec_size;
|
||||
}
|
||||
|
||||
ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size),
|
||||
region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
} while (data_size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vfio_update_pending(VFIODevice *vbasedev)
|
||||
{
|
||||
VFIOMigration *migration = vbasedev->migration;
|
||||
VFIORegion *region = &migration->region;
|
||||
uint64_t pending_bytes = 0;
|
||||
int ret;
|
||||
|
||||
ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes),
|
||||
region->fd_offset + VFIO_MIG_STRUCT_OFFSET(pending_bytes));
|
||||
if (ret < 0) {
|
||||
migration->pending_bytes = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
migration->pending_bytes = pending_bytes;
|
||||
trace_vfio_update_pending(vbasedev->name, pending_bytes);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vfio_save_device_config_state(QEMUFile *f, void *opaque)
|
||||
{
|
||||
VFIODevice *vbasedev = opaque;
|
||||
|
||||
qemu_put_be64(f, VFIO_MIG_FLAG_DEV_CONFIG_STATE);
|
||||
|
||||
if (vbasedev->ops && vbasedev->ops->vfio_save_config) {
|
||||
vbasedev->ops->vfio_save_config(vbasedev, f);
|
||||
}
|
||||
|
||||
qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
|
||||
|
||||
trace_vfio_save_device_config_state(vbasedev->name);
|
||||
|
||||
return qemu_file_get_error(f);
|
||||
}
|
||||
|
||||
static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
|
||||
{
|
||||
VFIODevice *vbasedev = opaque;
|
||||
uint64_t data;
|
||||
|
||||
if (vbasedev->ops && vbasedev->ops->vfio_load_config) {
|
||||
int ret;
|
||||
|
||||
ret = vbasedev->ops->vfio_load_config(vbasedev, f);
|
||||
if (ret) {
|
||||
error_report("%s: Failed to load device config space",
|
||||
vbasedev->name);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
data = qemu_get_be64(f);
|
||||
if (data != VFIO_MIG_FLAG_END_OF_STATE) {
|
||||
error_report("%s: Failed loading device config space, "
|
||||
"end flag incorrect 0x%"PRIx64, vbasedev->name, data);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
trace_vfio_load_device_config_state(vbasedev->name);
|
||||
return qemu_file_get_error(f);
|
||||
}
|
||||
|
||||
static int vfio_set_dirty_page_tracking(VFIODevice *vbasedev, bool start)
|
||||
{
|
||||
int ret;
|
||||
VFIOMigration *migration = vbasedev->migration;
|
||||
VFIOContainer *container = vbasedev->group->container;
|
||||
struct vfio_iommu_type1_dirty_bitmap dirty = {
|
||||
.argsz = sizeof(dirty),
|
||||
};
|
||||
|
||||
if (start) {
|
||||
if (migration->device_state & VFIO_DEVICE_STATE_SAVING) {
|
||||
dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
}
|
||||
} else {
|
||||
dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP;
|
||||
}
|
||||
|
||||
ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty);
|
||||
if (ret) {
|
||||
error_report("Failed to set dirty tracking flag 0x%x errno: %d",
|
||||
dirty.flags, errno);
|
||||
return -errno;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void vfio_migration_cleanup(VFIODevice *vbasedev)
|
||||
{
|
||||
VFIOMigration *migration = vbasedev->migration;
|
||||
|
||||
vfio_set_dirty_page_tracking(vbasedev, false);
|
||||
|
||||
if (migration->region.mmaps) {
|
||||
vfio_region_unmap(&migration->region);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
static int vfio_save_setup(QEMUFile *f, void *opaque)
|
||||
{
|
||||
VFIODevice *vbasedev = opaque;
|
||||
VFIOMigration *migration = vbasedev->migration;
|
||||
int ret;
|
||||
|
||||
trace_vfio_save_setup(vbasedev->name);
|
||||
|
||||
qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
|
||||
|
||||
if (migration->region.mmaps) {
|
||||
/*
|
||||
* Calling vfio_region_mmap() from migration thread. Memory API called
|
||||
* from this function require locking the iothread when called from
|
||||
* outside the main loop thread.
|
||||
*/
|
||||
qemu_mutex_lock_iothread();
|
||||
ret = vfio_region_mmap(&migration->region);
|
||||
qemu_mutex_unlock_iothread();
|
||||
if (ret) {
|
||||
error_report("%s: Failed to mmap VFIO migration region: %s",
|
||||
vbasedev->name, strerror(-ret));
|
||||
error_report("%s: Falling back to slow path", vbasedev->name);
|
||||
}
|
||||
}
|
||||
|
||||
ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
|
||||
VFIO_DEVICE_STATE_SAVING);
|
||||
if (ret) {
|
||||
error_report("%s: Failed to set state SAVING", vbasedev->name);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = vfio_set_dirty_page_tracking(vbasedev, true);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
|
||||
|
||||
ret = qemu_file_get_error(f);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vfio_save_cleanup(void *opaque)
|
||||
{
|
||||
VFIODevice *vbasedev = opaque;
|
||||
|
||||
vfio_migration_cleanup(vbasedev);
|
||||
trace_vfio_save_cleanup(vbasedev->name);
|
||||
}
|
||||
|
||||
static void vfio_save_pending(QEMUFile *f, void *opaque,
|
||||
uint64_t threshold_size,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only)
|
||||
{
|
||||
VFIODevice *vbasedev = opaque;
|
||||
VFIOMigration *migration = vbasedev->migration;
|
||||
int ret;
|
||||
|
||||
ret = vfio_update_pending(vbasedev);
|
||||
if (ret) {
|
||||
return;
|
||||
}
|
||||
|
||||
*res_precopy_only += migration->pending_bytes;
|
||||
|
||||
trace_vfio_save_pending(vbasedev->name, *res_precopy_only,
|
||||
*res_postcopy_only, *res_compatible);
|
||||
}
|
||||
|
||||
static int vfio_save_iterate(QEMUFile *f, void *opaque)
|
||||
{
|
||||
VFIODevice *vbasedev = opaque;
|
||||
VFIOMigration *migration = vbasedev->migration;
|
||||
uint64_t data_size;
|
||||
int ret;
|
||||
|
||||
qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
|
||||
|
||||
if (migration->pending_bytes == 0) {
|
||||
ret = vfio_update_pending(vbasedev);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (migration->pending_bytes == 0) {
|
||||
qemu_put_be64(f, 0);
|
||||
qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
|
||||
/* indicates data finished, goto complete phase */
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
ret = vfio_save_buffer(f, vbasedev, &data_size);
|
||||
if (ret) {
|
||||
error_report("%s: vfio_save_buffer failed %s", vbasedev->name,
|
||||
strerror(errno));
|
||||
return ret;
|
||||
}
|
||||
|
||||
qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
|
||||
|
||||
ret = qemu_file_get_error(f);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reset pending_bytes as .save_live_pending is not called during savevm or
|
||||
* snapshot case, in such case vfio_update_pending() at the start of this
|
||||
* function updates pending_bytes.
|
||||
*/
|
||||
migration->pending_bytes = 0;
|
||||
trace_vfio_save_iterate(vbasedev->name, data_size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
|
||||
{
|
||||
VFIODevice *vbasedev = opaque;
|
||||
VFIOMigration *migration = vbasedev->migration;
|
||||
uint64_t data_size;
|
||||
int ret;
|
||||
|
||||
ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_RUNNING,
|
||||
VFIO_DEVICE_STATE_SAVING);
|
||||
if (ret) {
|
||||
error_report("%s: Failed to set state STOP and SAVING",
|
||||
vbasedev->name);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = vfio_save_device_config_state(f, opaque);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = vfio_update_pending(vbasedev);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
while (migration->pending_bytes > 0) {
|
||||
qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
|
||||
ret = vfio_save_buffer(f, vbasedev, &data_size);
|
||||
if (ret < 0) {
|
||||
error_report("%s: Failed to save buffer", vbasedev->name);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (data_size == 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
ret = vfio_update_pending(vbasedev);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
|
||||
|
||||
ret = qemu_file_get_error(f);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_SAVING, 0);
|
||||
if (ret) {
|
||||
error_report("%s: Failed to set state STOPPED", vbasedev->name);
|
||||
return ret;
|
||||
}
|
||||
|
||||
trace_vfio_save_complete_precopy(vbasedev->name);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vfio_load_setup(QEMUFile *f, void *opaque)
|
||||
{
|
||||
VFIODevice *vbasedev = opaque;
|
||||
VFIOMigration *migration = vbasedev->migration;
|
||||
int ret = 0;
|
||||
|
||||
if (migration->region.mmaps) {
|
||||
ret = vfio_region_mmap(&migration->region);
|
||||
if (ret) {
|
||||
error_report("%s: Failed to mmap VFIO migration region %d: %s",
|
||||
vbasedev->name, migration->region.nr,
|
||||
strerror(-ret));
|
||||
error_report("%s: Falling back to slow path", vbasedev->name);
|
||||
}
|
||||
}
|
||||
|
||||
ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK,
|
||||
VFIO_DEVICE_STATE_RESUMING);
|
||||
if (ret) {
|
||||
error_report("%s: Failed to set state RESUMING", vbasedev->name);
|
||||
if (migration->region.mmaps) {
|
||||
vfio_region_unmap(&migration->region);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vfio_load_cleanup(void *opaque)
|
||||
{
|
||||
VFIODevice *vbasedev = opaque;
|
||||
|
||||
vfio_migration_cleanup(vbasedev);
|
||||
trace_vfio_load_cleanup(vbasedev->name);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
|
||||
{
|
||||
VFIODevice *vbasedev = opaque;
|
||||
int ret = 0;
|
||||
uint64_t data;
|
||||
|
||||
data = qemu_get_be64(f);
|
||||
while (data != VFIO_MIG_FLAG_END_OF_STATE) {
|
||||
|
||||
trace_vfio_load_state(vbasedev->name, data);
|
||||
|
||||
switch (data) {
|
||||
case VFIO_MIG_FLAG_DEV_CONFIG_STATE:
|
||||
{
|
||||
ret = vfio_load_device_config_state(f, opaque);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VFIO_MIG_FLAG_DEV_SETUP_STATE:
|
||||
{
|
||||
data = qemu_get_be64(f);
|
||||
if (data == VFIO_MIG_FLAG_END_OF_STATE) {
|
||||
return ret;
|
||||
} else {
|
||||
error_report("%s: SETUP STATE: EOS not found 0x%"PRIx64,
|
||||
vbasedev->name, data);
|
||||
return -EINVAL;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VFIO_MIG_FLAG_DEV_DATA_STATE:
|
||||
{
|
||||
uint64_t data_size = qemu_get_be64(f);
|
||||
|
||||
if (data_size) {
|
||||
ret = vfio_load_buffer(f, vbasedev, data_size);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
data = qemu_get_be64(f);
|
||||
ret = qemu_file_get_error(f);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static SaveVMHandlers savevm_vfio_handlers = {
|
||||
.save_setup = vfio_save_setup,
|
||||
.save_cleanup = vfio_save_cleanup,
|
||||
.save_live_pending = vfio_save_pending,
|
||||
.save_live_iterate = vfio_save_iterate,
|
||||
.save_live_complete_precopy = vfio_save_complete_precopy,
|
||||
.load_setup = vfio_load_setup,
|
||||
.load_cleanup = vfio_load_cleanup,
|
||||
.load_state = vfio_load_state,
|
||||
};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
static void vfio_vmstate_change(void *opaque, int running, RunState state)
|
||||
{
|
||||
VFIODevice *vbasedev = opaque;
|
||||
VFIOMigration *migration = vbasedev->migration;
|
||||
uint32_t value, mask;
|
||||
int ret;
|
||||
|
||||
if (vbasedev->migration->vm_running == running) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (running) {
|
||||
/*
|
||||
* Here device state can have one of _SAVING, _RESUMING or _STOP bit.
|
||||
* Transition from _SAVING to _RUNNING can happen if there is migration
|
||||
* failure, in that case clear _SAVING bit.
|
||||
* Transition from _RESUMING to _RUNNING occurs during resuming
|
||||
* phase, in that case clear _RESUMING bit.
|
||||
* In both the above cases, set _RUNNING bit.
|
||||
*/
|
||||
mask = ~VFIO_DEVICE_STATE_MASK;
|
||||
value = VFIO_DEVICE_STATE_RUNNING;
|
||||
} else {
|
||||
/*
|
||||
* Here device state could be either _RUNNING or _SAVING|_RUNNING. Reset
|
||||
* _RUNNING bit
|
||||
*/
|
||||
mask = ~VFIO_DEVICE_STATE_RUNNING;
|
||||
value = 0;
|
||||
}
|
||||
|
||||
ret = vfio_migration_set_state(vbasedev, mask, value);
|
||||
if (ret) {
|
||||
/*
|
||||
* Migration should be aborted in this case, but vm_state_notify()
|
||||
* currently does not support reporting failures.
|
||||
*/
|
||||
error_report("%s: Failed to set device state 0x%x", vbasedev->name,
|
||||
(migration->device_state & mask) | value);
|
||||
qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
|
||||
}
|
||||
vbasedev->migration->vm_running = running;
|
||||
trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state),
|
||||
(migration->device_state & mask) | value);
|
||||
}
|
||||
|
||||
static void vfio_migration_state_notifier(Notifier *notifier, void *data)
|
||||
{
|
||||
MigrationState *s = data;
|
||||
VFIOMigration *migration = container_of(notifier, VFIOMigration,
|
||||
migration_state);
|
||||
VFIODevice *vbasedev = migration->vbasedev;
|
||||
int ret;
|
||||
|
||||
trace_vfio_migration_state_notifier(vbasedev->name,
|
||||
MigrationStatus_str(s->state));
|
||||
|
||||
switch (s->state) {
|
||||
case MIGRATION_STATUS_CANCELLING:
|
||||
case MIGRATION_STATUS_CANCELLED:
|
||||
case MIGRATION_STATUS_FAILED:
|
||||
bytes_transferred = 0;
|
||||
ret = vfio_migration_set_state(vbasedev,
|
||||
~(VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RESUMING),
|
||||
VFIO_DEVICE_STATE_RUNNING);
|
||||
if (ret) {
|
||||
error_report("%s: Failed to set state RUNNING", vbasedev->name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void vfio_migration_exit(VFIODevice *vbasedev)
|
||||
{
|
||||
VFIOMigration *migration = vbasedev->migration;
|
||||
|
||||
vfio_region_exit(&migration->region);
|
||||
vfio_region_finalize(&migration->region);
|
||||
g_free(vbasedev->migration);
|
||||
vbasedev->migration = NULL;
|
||||
}
|
||||
|
||||
static int vfio_migration_init(VFIODevice *vbasedev,
|
||||
struct vfio_region_info *info)
|
||||
{
|
||||
int ret;
|
||||
Object *obj;
|
||||
VFIOMigration *migration;
|
||||
char id[256] = "";
|
||||
g_autofree char *path = NULL, *oid = NULL;
|
||||
|
||||
if (!vbasedev->ops->vfio_get_object) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
obj = vbasedev->ops->vfio_get_object(vbasedev);
|
||||
if (!obj) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
vbasedev->migration = g_new0(VFIOMigration, 1);
|
||||
|
||||
ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region,
|
||||
info->index, "migration");
|
||||
if (ret) {
|
||||
error_report("%s: Failed to setup VFIO migration region %d: %s",
|
||||
vbasedev->name, info->index, strerror(-ret));
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (!vbasedev->migration->region.size) {
|
||||
error_report("%s: Invalid zero-sized VFIO migration region %d",
|
||||
vbasedev->name, info->index);
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
migration = vbasedev->migration;
|
||||
migration->vbasedev = vbasedev;
|
||||
|
||||
oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj)));
|
||||
if (oid) {
|
||||
path = g_strdup_printf("%s/vfio", oid);
|
||||
} else {
|
||||
path = g_strdup("vfio");
|
||||
}
|
||||
strpadcpy(id, sizeof(id), path, '\0');
|
||||
|
||||
register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
|
||||
vbasedev);
|
||||
|
||||
migration->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change,
|
||||
vbasedev);
|
||||
migration->migration_state.notify = vfio_migration_state_notifier;
|
||||
add_migration_state_change_notifier(&migration->migration_state);
|
||||
return 0;
|
||||
|
||||
err:
|
||||
vfio_migration_exit(vbasedev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int64_t vfio_mig_bytes_transferred(void)
|
||||
{
|
||||
return bytes_transferred;
|
||||
}
|
||||
|
||||
int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
|
||||
{
|
||||
VFIOContainer *container = vbasedev->group->container;
|
||||
struct vfio_region_info *info = NULL;
|
||||
Error *local_err = NULL;
|
||||
int ret = -ENOTSUP;
|
||||
|
||||
if (!container->dirty_pages_supported) {
|
||||
goto add_blocker;
|
||||
}
|
||||
|
||||
ret = vfio_get_dev_region_info(vbasedev, VFIO_REGION_TYPE_MIGRATION,
|
||||
VFIO_REGION_SUBTYPE_MIGRATION, &info);
|
||||
if (ret) {
|
||||
goto add_blocker;
|
||||
}
|
||||
|
||||
ret = vfio_migration_init(vbasedev, info);
|
||||
if (ret) {
|
||||
goto add_blocker;
|
||||
}
|
||||
|
||||
g_free(info);
|
||||
trace_vfio_migration_probe(vbasedev->name, info->index);
|
||||
return 0;
|
||||
|
||||
add_blocker:
|
||||
error_setg(&vbasedev->migration_blocker,
|
||||
"VFIO device doesn't support migration");
|
||||
g_free(info);
|
||||
|
||||
ret = migrate_add_blocker(vbasedev->migration_blocker, &local_err);
|
||||
if (local_err) {
|
||||
error_propagate(errp, local_err);
|
||||
error_free(vbasedev->migration_blocker);
|
||||
vbasedev->migration_blocker = NULL;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void vfio_migration_finalize(VFIODevice *vbasedev)
|
||||
{
|
||||
if (vbasedev->migration) {
|
||||
VFIOMigration *migration = vbasedev->migration;
|
||||
|
||||
remove_migration_state_change_notifier(&migration->migration_state);
|
||||
qemu_del_vm_change_state_handler(migration->vm_state);
|
||||
vfio_migration_exit(vbasedev);
|
||||
}
|
||||
|
||||
if (vbasedev->migration_blocker) {
|
||||
migrate_del_blocker(vbasedev->migration_blocker);
|
||||
error_free(vbasedev->migration_blocker);
|
||||
vbasedev->migration_blocker = NULL;
|
||||
}
|
||||
}
|
@ -41,6 +41,7 @@
|
||||
#include "trace.h"
|
||||
#include "qapi/error.h"
|
||||
#include "migration/blocker.h"
|
||||
#include "migration/qemu-file.h"
|
||||
|
||||
#define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
|
||||
|
||||
@ -2394,10 +2395,68 @@ static void vfio_pci_compute_needs_reset(VFIODevice *vbasedev)
|
||||
}
|
||||
}
|
||||
|
||||
static Object *vfio_pci_get_object(VFIODevice *vbasedev)
|
||||
{
|
||||
VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
|
||||
|
||||
return OBJECT(vdev);
|
||||
}
|
||||
|
||||
static bool vfio_msix_present(void *opaque, int version_id)
|
||||
{
|
||||
PCIDevice *pdev = opaque;
|
||||
|
||||
return msix_present(pdev);
|
||||
}
|
||||
|
||||
const VMStateDescription vmstate_vfio_pci_config = {
|
||||
.name = "VFIOPCIDevice",
|
||||
.version_id = 1,
|
||||
.minimum_version_id = 1,
|
||||
.fields = (VMStateField[]) {
|
||||
VMSTATE_PCI_DEVICE(pdev, VFIOPCIDevice),
|
||||
VMSTATE_MSIX_TEST(pdev, VFIOPCIDevice, vfio_msix_present),
|
||||
VMSTATE_END_OF_LIST()
|
||||
}
|
||||
};
|
||||
|
||||
static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f)
|
||||
{
|
||||
VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
|
||||
|
||||
vmstate_save_state(f, &vmstate_vfio_pci_config, vdev, NULL);
|
||||
}
|
||||
|
||||
static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f)
|
||||
{
|
||||
VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
|
||||
PCIDevice *pdev = &vdev->pdev;
|
||||
int ret;
|
||||
|
||||
ret = vmstate_load_state(f, &vmstate_vfio_pci_config, vdev, 1);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
vfio_pci_write_config(pdev, PCI_COMMAND,
|
||||
pci_get_word(pdev->config + PCI_COMMAND), 2);
|
||||
|
||||
if (msi_enabled(pdev)) {
|
||||
vfio_msi_enable(vdev);
|
||||
} else if (msix_enabled(pdev)) {
|
||||
vfio_msix_enable(vdev);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static VFIODeviceOps vfio_pci_ops = {
|
||||
.vfio_compute_needs_reset = vfio_pci_compute_needs_reset,
|
||||
.vfio_hot_reset_multi = vfio_pci_hot_reset_multi,
|
||||
.vfio_eoi = vfio_intx_eoi,
|
||||
.vfio_get_object = vfio_pci_get_object,
|
||||
.vfio_save_config = vfio_pci_save_config,
|
||||
.vfio_load_config = vfio_pci_load_config,
|
||||
};
|
||||
|
||||
int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
|
||||
@ -2732,17 +2791,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
|
||||
return;
|
||||
}
|
||||
|
||||
if (!pdev->failover_pair_id) {
|
||||
error_setg(&vdev->migration_blocker,
|
||||
"VFIO device doesn't support migration");
|
||||
ret = migrate_add_blocker(vdev->migration_blocker, errp);
|
||||
if (ret) {
|
||||
error_free(vdev->migration_blocker);
|
||||
vdev->migration_blocker = NULL;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
vdev->vbasedev.name = g_path_get_basename(vdev->vbasedev.sysfsdev);
|
||||
vdev->vbasedev.ops = &vfio_pci_ops;
|
||||
vdev->vbasedev.type = VFIO_DEVICE_TYPE_PCI;
|
||||
@ -3010,6 +3058,13 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
|
||||
}
|
||||
}
|
||||
|
||||
if (!pdev->failover_pair_id) {
|
||||
ret = vfio_migration_probe(&vdev->vbasedev, errp);
|
||||
if (ret) {
|
||||
error_report("%s: Migration disabled", vdev->vbasedev.name);
|
||||
}
|
||||
}
|
||||
|
||||
vfio_register_err_notifier(vdev);
|
||||
vfio_register_req_notifier(vdev);
|
||||
vfio_setup_resetfn_quirk(vdev);
|
||||
@ -3024,11 +3079,6 @@ out_teardown:
|
||||
vfio_bars_exit(vdev);
|
||||
error:
|
||||
error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
|
||||
if (vdev->migration_blocker) {
|
||||
migrate_del_blocker(vdev->migration_blocker);
|
||||
error_free(vdev->migration_blocker);
|
||||
vdev->migration_blocker = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void vfio_instance_finalize(Object *obj)
|
||||
@ -3040,10 +3090,6 @@ static void vfio_instance_finalize(Object *obj)
|
||||
vfio_bars_finalize(vdev);
|
||||
g_free(vdev->emulated_config_bits);
|
||||
g_free(vdev->rom);
|
||||
if (vdev->migration_blocker) {
|
||||
migrate_del_blocker(vdev->migration_blocker);
|
||||
error_free(vdev->migration_blocker);
|
||||
}
|
||||
/*
|
||||
* XXX Leaking igd_opregion is not an oversight, we can't remove the
|
||||
* fw_cfg entry therefore leaking this allocation seems like the safest
|
||||
@ -3071,6 +3117,7 @@ static void vfio_exitfn(PCIDevice *pdev)
|
||||
}
|
||||
vfio_teardown_msi(vdev);
|
||||
vfio_bars_exit(vdev);
|
||||
vfio_migration_finalize(&vdev->vbasedev);
|
||||
}
|
||||
|
||||
static void vfio_pci_reset(DeviceState *dev)
|
||||
|
@ -172,7 +172,6 @@ struct VFIOPCIDevice {
|
||||
bool no_vfio_ioeventfd;
|
||||
bool enable_ramfb;
|
||||
VFIODisplay *dpy;
|
||||
Error *migration_blocker;
|
||||
Notifier irqchip_change_notifier;
|
||||
};
|
||||
|
||||
|
@ -166,7 +166,7 @@ static void vfio_intp_mmap_enable(void *opaque)
|
||||
VFIOINTp *tmp;
|
||||
VFIOPlatformDevice *vdev = (VFIOPlatformDevice *)opaque;
|
||||
|
||||
qemu_mutex_lock(&vdev->intp_mutex);
|
||||
QEMU_LOCK_GUARD(&vdev->intp_mutex);
|
||||
QLIST_FOREACH(tmp, &vdev->intp_list, next) {
|
||||
if (tmp->state == VFIO_IRQ_ACTIVE) {
|
||||
trace_vfio_platform_intp_mmap_enable(tmp->pin);
|
||||
@ -174,12 +174,10 @@ static void vfio_intp_mmap_enable(void *opaque)
|
||||
timer_mod(vdev->mmap_timer,
|
||||
qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
|
||||
vdev->mmap_timeout);
|
||||
qemu_mutex_unlock(&vdev->intp_mutex);
|
||||
return;
|
||||
}
|
||||
}
|
||||
vfio_mmap_set_enabled(vdev, true);
|
||||
qemu_mutex_unlock(&vdev->intp_mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -289,7 +287,7 @@ static void vfio_platform_eoi(VFIODevice *vbasedev)
|
||||
VFIOPlatformDevice *vdev =
|
||||
container_of(vbasedev, VFIOPlatformDevice, vbasedev);
|
||||
|
||||
qemu_mutex_lock(&vdev->intp_mutex);
|
||||
QEMU_LOCK_GUARD(&vdev->intp_mutex);
|
||||
QLIST_FOREACH(intp, &vdev->intp_list, next) {
|
||||
if (intp->state == VFIO_IRQ_ACTIVE) {
|
||||
trace_vfio_platform_eoi(intp->pin,
|
||||
@ -314,7 +312,6 @@ static void vfio_platform_eoi(VFIODevice *vbasedev)
|
||||
vfio_intp_inject_pending_lockheld(intp);
|
||||
QSIMPLEQ_REMOVE_HEAD(&vdev->pending_intp_queue, pqnext);
|
||||
}
|
||||
qemu_mutex_unlock(&vdev->intp_mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -113,6 +113,7 @@ vfio_region_mmap(const char *name, unsigned long offset, unsigned long end) "Reg
|
||||
vfio_region_exit(const char *name, int index) "Device %s, region %d"
|
||||
vfio_region_finalize(const char *name, int index) "Device %s, region %d"
|
||||
vfio_region_mmaps_set_enabled(const char *name, bool enabled) "Region %s mmaps enabled: %d"
|
||||
vfio_region_unmap(const char *name, unsigned long offset, unsigned long end) "Region %s unmap [0x%lx - 0x%lx]"
|
||||
vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Device %s region %d: %d sparse mmap entries"
|
||||
vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]"
|
||||
vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8"
|
||||
@ -144,3 +145,23 @@ vfio_display_edid_link_up(void) ""
|
||||
vfio_display_edid_link_down(void) ""
|
||||
vfio_display_edid_update(uint32_t prefx, uint32_t prefy) "%ux%u"
|
||||
vfio_display_edid_write_error(void) ""
|
||||
|
||||
# migration.c
|
||||
vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d"
|
||||
vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d"
|
||||
vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d"
|
||||
vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s"
|
||||
vfio_save_setup(const char *name) " (%s)"
|
||||
vfio_save_cleanup(const char *name) " (%s)"
|
||||
vfio_save_buffer(const char *name, uint64_t data_offset, uint64_t data_size, uint64_t pending) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64" pending 0x%"PRIx64
|
||||
vfio_update_pending(const char *name, uint64_t pending) " (%s) pending 0x%"PRIx64
|
||||
vfio_save_device_config_state(const char *name) " (%s)"
|
||||
vfio_save_pending(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t compatible) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" compatible 0x%"PRIx64
|
||||
vfio_save_iterate(const char *name, int data_size) " (%s) data_size %d"
|
||||
vfio_save_complete_precopy(const char *name) " (%s)"
|
||||
vfio_load_device_config_state(const char *name) " (%s)"
|
||||
vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64
|
||||
vfio_load_state_device_data(const char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64
|
||||
vfio_load_cleanup(const char *name) " (%s)"
|
||||
vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64
|
||||
vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "hw/s390x/sclp.h"
|
||||
#include "hw/s390x/s390_flic.h"
|
||||
#include "hw/s390x/css.h"
|
||||
#include "hw/s390x/s390-pci-clp.h"
|
||||
#include "qom/object.h"
|
||||
|
||||
#define TYPE_S390_PCI_HOST_BRIDGE "s390-pcihost"
|
||||
@ -262,6 +263,13 @@ typedef struct S390IOTLBEntry {
|
||||
uint64_t perm;
|
||||
} S390IOTLBEntry;
|
||||
|
||||
typedef struct S390PCIDMACount {
|
||||
int id;
|
||||
int users;
|
||||
uint32_t avail;
|
||||
QTAILQ_ENTRY(S390PCIDMACount) link;
|
||||
} S390PCIDMACount;
|
||||
|
||||
struct S390PCIIOMMU {
|
||||
Object parent_obj;
|
||||
S390PCIBusDevice *pbdev;
|
||||
@ -273,6 +281,7 @@ struct S390PCIIOMMU {
|
||||
uint64_t pba;
|
||||
uint64_t pal;
|
||||
GHashTable *iotlb;
|
||||
S390PCIDMACount *dma_limit;
|
||||
};
|
||||
|
||||
typedef struct S390PCIIOMMUTable {
|
||||
@ -307,6 +316,15 @@ typedef struct ZpciFmb {
|
||||
} ZpciFmb;
|
||||
QEMU_BUILD_BUG_MSG(offsetof(ZpciFmb, fmt0) != 48, "padding in ZpciFmb");
|
||||
|
||||
#define ZPCI_DEFAULT_FN_GRP 0x20
|
||||
typedef struct S390PCIGroup {
|
||||
ClpRspQueryPciGrp zpci_group;
|
||||
int id;
|
||||
QTAILQ_ENTRY(S390PCIGroup) link;
|
||||
} S390PCIGroup;
|
||||
S390PCIGroup *s390_group_create(int id);
|
||||
S390PCIGroup *s390_group_find(int id);
|
||||
|
||||
struct S390PCIBusDevice {
|
||||
DeviceState qdev;
|
||||
PCIDevice *pdev;
|
||||
@ -324,6 +342,8 @@ struct S390PCIBusDevice {
|
||||
uint16_t noi;
|
||||
uint16_t maxstbl;
|
||||
uint8_t sum;
|
||||
S390PCIGroup *pci_group;
|
||||
ClpRspQueryPci zpci_fn;
|
||||
S390MsixInfo msix;
|
||||
AdapterRoutes routes;
|
||||
S390PCIIOMMU *iommu;
|
||||
@ -348,6 +368,8 @@ struct S390pciState {
|
||||
GHashTable *zpci_table;
|
||||
QTAILQ_HEAD(, SeiContainer) pending_sei;
|
||||
QTAILQ_HEAD(, S390PCIBusDevice) zpci_devs;
|
||||
QTAILQ_HEAD(, S390PCIDMACount) zpci_dma_limit;
|
||||
QTAILQ_HEAD(, S390PCIGroup) zpci_groups;
|
||||
};
|
||||
|
||||
S390pciState *s390_get_phb(void);
|
@ -1,21 +1,16 @@
|
||||
/*
|
||||
* s390 PCI instruction definitions
|
||||
* s390 CLP instruction definitions
|
||||
*
|
||||
* Copyright 2014 IBM Corp.
|
||||
* Author(s): Frank Blaschka <frank.blaschka@de.ibm.com>
|
||||
* Hong Bo Li <lihbbj@cn.ibm.com>
|
||||
* Yi Min Zhao <zyimin@cn.ibm.com>
|
||||
* Copyright 2019 IBM Corp.
|
||||
* Author(s): Pierre Morel <pmorel@de.ibm.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or (at
|
||||
* your option) any later version. See the COPYING file in the top-level
|
||||
* directory.
|
||||
*/
|
||||
|
||||
#ifndef HW_S390_PCI_INST_H
|
||||
#define HW_S390_PCI_INST_H
|
||||
|
||||
#include "s390-pci-bus.h"
|
||||
#include "sysemu/dma.h"
|
||||
#ifndef HW_S390_PCI_CLP
|
||||
#define HW_S390_PCI_CLP
|
||||
|
||||
/* CLP common request & response block size */
|
||||
#define CLP_BLK_SIZE 4096
|
||||
@ -84,6 +79,7 @@ typedef struct ClpFhListEntry {
|
||||
#define CLP_SET_DISABLE_PCI_FN 1 /* Yes, 1 disables it */
|
||||
|
||||
#define CLP_UTIL_STR_LEN 64
|
||||
#define CLP_PFIP_NR_SEGMENTS 4
|
||||
|
||||
#define CLP_MASK_FMT 0xf0000000
|
||||
|
||||
@ -125,14 +121,17 @@ typedef struct ClpRspQueryPci {
|
||||
uint32_t fmt;
|
||||
uint64_t reserved1;
|
||||
uint16_t vfn; /* virtual fn number */
|
||||
#define CLP_RSP_QPCI_MASK_UTIL 0x100
|
||||
#define CLP_RSP_QPCI_MASK_PFGID 0xff
|
||||
uint16_t ug;
|
||||
#define CLP_RSP_QPCI_MASK_UTIL 0x01
|
||||
uint8_t flags;
|
||||
uint8_t pfgid;
|
||||
uint32_t fid; /* pci function id */
|
||||
uint8_t bar_size[PCI_BAR_COUNT];
|
||||
uint16_t pchid;
|
||||
uint32_t bar[PCI_BAR_COUNT];
|
||||
uint64_t reserved2;
|
||||
uint8_t pfip[CLP_PFIP_NR_SEGMENTS];
|
||||
uint16_t reserved2;
|
||||
uint8_t fmbl;
|
||||
uint8_t pft;
|
||||
uint64_t sdma; /* start dma as */
|
||||
uint64_t edma; /* end dma as */
|
||||
uint32_t reserved3[11];
|
||||
@ -213,100 +212,4 @@ typedef struct ClpReqRspQueryPciGrp {
|
||||
ClpRspQueryPciGrp response;
|
||||
} QEMU_PACKED ClpReqRspQueryPciGrp;
|
||||
|
||||
/* Load/Store status codes */
|
||||
#define ZPCI_PCI_ST_FUNC_NOT_ENABLED 4
|
||||
#define ZPCI_PCI_ST_FUNC_IN_ERR 8
|
||||
#define ZPCI_PCI_ST_BLOCKED 12
|
||||
#define ZPCI_PCI_ST_INSUF_RES 16
|
||||
#define ZPCI_PCI_ST_INVAL_AS 20
|
||||
#define ZPCI_PCI_ST_FUNC_ALREADY_ENABLED 24
|
||||
#define ZPCI_PCI_ST_DMA_AS_NOT_ENABLED 28
|
||||
#define ZPCI_PCI_ST_2ND_OP_IN_INV_AS 36
|
||||
#define ZPCI_PCI_ST_FUNC_NOT_AVAIL 40
|
||||
#define ZPCI_PCI_ST_ALREADY_IN_RQ_STATE 44
|
||||
|
||||
/* Load/Store return codes */
|
||||
#define ZPCI_PCI_LS_OK 0
|
||||
#define ZPCI_PCI_LS_ERR 1
|
||||
#define ZPCI_PCI_LS_BUSY 2
|
||||
#define ZPCI_PCI_LS_INVAL_HANDLE 3
|
||||
|
||||
/* Modify PCI status codes */
|
||||
#define ZPCI_MOD_ST_RES_NOT_AVAIL 4
|
||||
#define ZPCI_MOD_ST_INSUF_RES 16
|
||||
#define ZPCI_MOD_ST_SEQUENCE 24
|
||||
#define ZPCI_MOD_ST_DMAAS_INVAL 28
|
||||
#define ZPCI_MOD_ST_FRAME_INVAL 32
|
||||
#define ZPCI_MOD_ST_ERROR_RECOVER 40
|
||||
|
||||
/* Modify PCI Function Controls */
|
||||
#define ZPCI_MOD_FC_REG_INT 2
|
||||
#define ZPCI_MOD_FC_DEREG_INT 3
|
||||
#define ZPCI_MOD_FC_REG_IOAT 4
|
||||
#define ZPCI_MOD_FC_DEREG_IOAT 5
|
||||
#define ZPCI_MOD_FC_REREG_IOAT 6
|
||||
#define ZPCI_MOD_FC_RESET_ERROR 7
|
||||
#define ZPCI_MOD_FC_RESET_BLOCK 9
|
||||
#define ZPCI_MOD_FC_SET_MEASURE 10
|
||||
|
||||
/* Store PCI Function Controls status codes */
|
||||
#define ZPCI_STPCIFC_ST_PERM_ERROR 8
|
||||
#define ZPCI_STPCIFC_ST_INVAL_DMAAS 28
|
||||
#define ZPCI_STPCIFC_ST_ERROR_RECOVER 40
|
||||
|
||||
/* FIB function controls */
|
||||
#define ZPCI_FIB_FC_ENABLED 0x80
|
||||
#define ZPCI_FIB_FC_ERROR 0x40
|
||||
#define ZPCI_FIB_FC_LS_BLOCKED 0x20
|
||||
#define ZPCI_FIB_FC_DMAAS_REG 0x10
|
||||
|
||||
/* FIB function controls */
|
||||
#define ZPCI_FIB_FC_ENABLED 0x80
|
||||
#define ZPCI_FIB_FC_ERROR 0x40
|
||||
#define ZPCI_FIB_FC_LS_BLOCKED 0x20
|
||||
#define ZPCI_FIB_FC_DMAAS_REG 0x10
|
||||
|
||||
/* Function Information Block */
|
||||
typedef struct ZpciFib {
|
||||
uint8_t fmt; /* format */
|
||||
uint8_t reserved1[7];
|
||||
uint8_t fc; /* function controls */
|
||||
uint8_t reserved2;
|
||||
uint16_t reserved3;
|
||||
uint32_t reserved4;
|
||||
uint64_t pba; /* PCI base address */
|
||||
uint64_t pal; /* PCI address limit */
|
||||
uint64_t iota; /* I/O Translation Anchor */
|
||||
#define FIB_DATA_ISC(x) (((x) >> 28) & 0x7)
|
||||
#define FIB_DATA_NOI(x) (((x) >> 16) & 0xfff)
|
||||
#define FIB_DATA_AIBVO(x) (((x) >> 8) & 0x3f)
|
||||
#define FIB_DATA_SUM(x) (((x) >> 7) & 0x1)
|
||||
#define FIB_DATA_AISBO(x) ((x) & 0x3f)
|
||||
uint32_t data;
|
||||
uint32_t reserved5;
|
||||
uint64_t aibv; /* Adapter int bit vector address */
|
||||
uint64_t aisb; /* Adapter int summary bit address */
|
||||
uint64_t fmb_addr; /* Function measurement address and key */
|
||||
uint32_t reserved6;
|
||||
uint32_t gd;
|
||||
} QEMU_PACKED ZpciFib;
|
||||
|
||||
int pci_dereg_irqs(S390PCIBusDevice *pbdev);
|
||||
void pci_dereg_ioat(S390PCIIOMMU *iommu);
|
||||
int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra);
|
||||
int pcilg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra);
|
||||
int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra);
|
||||
int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra);
|
||||
int pcistb_service_call(S390CPU *cpu, uint8_t r1, uint8_t r3, uint64_t gaddr,
|
||||
uint8_t ar, uintptr_t ra);
|
||||
int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar,
|
||||
uintptr_t ra);
|
||||
int stpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar,
|
||||
uintptr_t ra);
|
||||
void fmb_timer_free(S390PCIBusDevice *pbdev);
|
||||
|
||||
#define ZPCI_IO_BAR_MIN 0
|
||||
#define ZPCI_IO_BAR_MAX 5
|
||||
#define ZPCI_CONFIG_BAR 15
|
||||
|
||||
#endif
|
119
include/hw/s390x/s390-pci-inst.h
Normal file
119
include/hw/s390x/s390-pci-inst.h
Normal file
@ -0,0 +1,119 @@
|
||||
/*
|
||||
* s390 PCI instruction definitions
|
||||
*
|
||||
* Copyright 2014 IBM Corp.
|
||||
* Author(s): Frank Blaschka <frank.blaschka@de.ibm.com>
|
||||
* Hong Bo Li <lihbbj@cn.ibm.com>
|
||||
* Yi Min Zhao <zyimin@cn.ibm.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or (at
|
||||
* your option) any later version. See the COPYING file in the top-level
|
||||
* directory.
|
||||
*/
|
||||
|
||||
#ifndef HW_S390_PCI_INST_H
|
||||
#define HW_S390_PCI_INST_H
|
||||
|
||||
#include "s390-pci-bus.h"
|
||||
#include "sysemu/dma.h"
|
||||
|
||||
/* Load/Store status codes */
|
||||
#define ZPCI_PCI_ST_FUNC_NOT_ENABLED 4
|
||||
#define ZPCI_PCI_ST_FUNC_IN_ERR 8
|
||||
#define ZPCI_PCI_ST_BLOCKED 12
|
||||
#define ZPCI_PCI_ST_INSUF_RES 16
|
||||
#define ZPCI_PCI_ST_INVAL_AS 20
|
||||
#define ZPCI_PCI_ST_FUNC_ALREADY_ENABLED 24
|
||||
#define ZPCI_PCI_ST_DMA_AS_NOT_ENABLED 28
|
||||
#define ZPCI_PCI_ST_2ND_OP_IN_INV_AS 36
|
||||
#define ZPCI_PCI_ST_FUNC_NOT_AVAIL 40
|
||||
#define ZPCI_PCI_ST_ALREADY_IN_RQ_STATE 44
|
||||
|
||||
/* Load/Store return codes */
|
||||
#define ZPCI_PCI_LS_OK 0
|
||||
#define ZPCI_PCI_LS_ERR 1
|
||||
#define ZPCI_PCI_LS_BUSY 2
|
||||
#define ZPCI_PCI_LS_INVAL_HANDLE 3
|
||||
|
||||
/* Modify PCI status codes */
|
||||
#define ZPCI_MOD_ST_RES_NOT_AVAIL 4
|
||||
#define ZPCI_MOD_ST_INSUF_RES 16
|
||||
#define ZPCI_MOD_ST_SEQUENCE 24
|
||||
#define ZPCI_MOD_ST_DMAAS_INVAL 28
|
||||
#define ZPCI_MOD_ST_FRAME_INVAL 32
|
||||
#define ZPCI_MOD_ST_ERROR_RECOVER 40
|
||||
|
||||
/* Modify PCI Function Controls */
|
||||
#define ZPCI_MOD_FC_REG_INT 2
|
||||
#define ZPCI_MOD_FC_DEREG_INT 3
|
||||
#define ZPCI_MOD_FC_REG_IOAT 4
|
||||
#define ZPCI_MOD_FC_DEREG_IOAT 5
|
||||
#define ZPCI_MOD_FC_REREG_IOAT 6
|
||||
#define ZPCI_MOD_FC_RESET_ERROR 7
|
||||
#define ZPCI_MOD_FC_RESET_BLOCK 9
|
||||
#define ZPCI_MOD_FC_SET_MEASURE 10
|
||||
|
||||
/* Store PCI Function Controls status codes */
|
||||
#define ZPCI_STPCIFC_ST_PERM_ERROR 8
|
||||
#define ZPCI_STPCIFC_ST_INVAL_DMAAS 28
|
||||
#define ZPCI_STPCIFC_ST_ERROR_RECOVER 40
|
||||
|
||||
/* Refresh PCI Translations status codes */
|
||||
#define ZPCI_RPCIT_ST_INSUFF_RES 16
|
||||
|
||||
/* FIB function controls */
|
||||
#define ZPCI_FIB_FC_ENABLED 0x80
|
||||
#define ZPCI_FIB_FC_ERROR 0x40
|
||||
#define ZPCI_FIB_FC_LS_BLOCKED 0x20
|
||||
#define ZPCI_FIB_FC_DMAAS_REG 0x10
|
||||
|
||||
/* FIB function controls */
|
||||
#define ZPCI_FIB_FC_ENABLED 0x80
|
||||
#define ZPCI_FIB_FC_ERROR 0x40
|
||||
#define ZPCI_FIB_FC_LS_BLOCKED 0x20
|
||||
#define ZPCI_FIB_FC_DMAAS_REG 0x10
|
||||
|
||||
/* Function Information Block */
|
||||
typedef struct ZpciFib {
|
||||
uint8_t fmt; /* format */
|
||||
uint8_t reserved1[7];
|
||||
uint8_t fc; /* function controls */
|
||||
uint8_t reserved2;
|
||||
uint16_t reserved3;
|
||||
uint32_t reserved4;
|
||||
uint64_t pba; /* PCI base address */
|
||||
uint64_t pal; /* PCI address limit */
|
||||
uint64_t iota; /* I/O Translation Anchor */
|
||||
#define FIB_DATA_ISC(x) (((x) >> 28) & 0x7)
|
||||
#define FIB_DATA_NOI(x) (((x) >> 16) & 0xfff)
|
||||
#define FIB_DATA_AIBVO(x) (((x) >> 8) & 0x3f)
|
||||
#define FIB_DATA_SUM(x) (((x) >> 7) & 0x1)
|
||||
#define FIB_DATA_AISBO(x) ((x) & 0x3f)
|
||||
uint32_t data;
|
||||
uint32_t reserved5;
|
||||
uint64_t aibv; /* Adapter int bit vector address */
|
||||
uint64_t aisb; /* Adapter int summary bit address */
|
||||
uint64_t fmb_addr; /* Function measurement address and key */
|
||||
uint32_t reserved6;
|
||||
uint32_t gd;
|
||||
} QEMU_PACKED ZpciFib;
|
||||
|
||||
int pci_dereg_irqs(S390PCIBusDevice *pbdev);
|
||||
void pci_dereg_ioat(S390PCIIOMMU *iommu);
|
||||
int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra);
|
||||
int pcilg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra);
|
||||
int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra);
|
||||
int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra);
|
||||
int pcistb_service_call(S390CPU *cpu, uint8_t r1, uint8_t r3, uint64_t gaddr,
|
||||
uint8_t ar, uintptr_t ra);
|
||||
int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar,
|
||||
uintptr_t ra);
|
||||
int stpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar,
|
||||
uintptr_t ra);
|
||||
void fmb_timer_free(S390PCIBusDevice *pbdev);
|
||||
|
||||
#define ZPCI_IO_BAR_MIN 0
|
||||
#define ZPCI_IO_BAR_MAX 5
|
||||
#define ZPCI_CONFIG_BAR 15
|
||||
|
||||
#endif
|
38
include/hw/s390x/s390-pci-vfio.h
Normal file
38
include/hw/s390x/s390-pci-vfio.h
Normal file
@ -0,0 +1,38 @@
|
||||
/*
|
||||
* s390 vfio-pci interfaces
|
||||
*
|
||||
* Copyright 2020 IBM Corp.
|
||||
* Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or (at
|
||||
* your option) any later version. See the COPYING file in the top-level
|
||||
* directory.
|
||||
*/
|
||||
|
||||
#ifndef HW_S390_PCI_VFIO_H
|
||||
#define HW_S390_PCI_VFIO_H
|
||||
|
||||
#include "hw/s390x/s390-pci-bus.h"
|
||||
|
||||
#ifdef CONFIG_LINUX
|
||||
bool s390_pci_update_dma_avail(int fd, unsigned int *avail);
|
||||
S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
|
||||
S390PCIBusDevice *pbdev);
|
||||
void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt);
|
||||
void s390_pci_get_clp_info(S390PCIBusDevice *pbdev);
|
||||
#else
|
||||
static inline bool s390_pci_update_dma_avail(int fd, unsigned int *avail)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
|
||||
S390PCIBusDevice *pbdev)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
static inline void s390_pci_end_dma_count(S390pciState *s,
|
||||
S390PCIDMACount *cnt) { }
|
||||
static inline void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) { }
|
||||
#endif
|
||||
|
||||
#endif
|
@ -29,6 +29,7 @@
|
||||
#ifdef CONFIG_LINUX
|
||||
#include <linux/vfio.h>
|
||||
#endif
|
||||
#include "sysemu/sysemu.h"
|
||||
|
||||
#define VFIO_MSG_PREFIX "vfio %s: "
|
||||
|
||||
@ -57,6 +58,16 @@ typedef struct VFIORegion {
|
||||
uint8_t nr; /* cache the region number for debug */
|
||||
} VFIORegion;
|
||||
|
||||
typedef struct VFIOMigration {
|
||||
struct VFIODevice *vbasedev;
|
||||
VMChangeStateEntry *vm_state;
|
||||
VFIORegion region;
|
||||
uint32_t device_state;
|
||||
int vm_running;
|
||||
Notifier migration_state;
|
||||
uint64_t pending_bytes;
|
||||
} VFIOMigration;
|
||||
|
||||
typedef struct VFIOAddressSpace {
|
||||
AddressSpace *as;
|
||||
QLIST_HEAD(, VFIOContainer) containers;
|
||||
@ -73,6 +84,9 @@ typedef struct VFIOContainer {
|
||||
unsigned iommu_type;
|
||||
Error *error;
|
||||
bool initialized;
|
||||
bool dirty_pages_supported;
|
||||
uint64_t dirty_pgsizes;
|
||||
uint64_t max_dirty_bitmap_size;
|
||||
unsigned long pgsizes;
|
||||
QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
|
||||
QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
|
||||
@ -113,12 +127,17 @@ typedef struct VFIODevice {
|
||||
unsigned int num_irqs;
|
||||
unsigned int num_regions;
|
||||
unsigned int flags;
|
||||
VFIOMigration *migration;
|
||||
Error *migration_blocker;
|
||||
} VFIODevice;
|
||||
|
||||
struct VFIODeviceOps {
|
||||
void (*vfio_compute_needs_reset)(VFIODevice *vdev);
|
||||
int (*vfio_hot_reset_multi)(VFIODevice *vdev);
|
||||
void (*vfio_eoi)(VFIODevice *vdev);
|
||||
Object *(*vfio_get_object)(VFIODevice *vdev);
|
||||
void (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f);
|
||||
int (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f);
|
||||
};
|
||||
|
||||
typedef struct VFIOGroup {
|
||||
@ -171,6 +190,7 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
|
||||
int index, const char *name);
|
||||
int vfio_region_mmap(VFIORegion *region);
|
||||
void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled);
|
||||
void vfio_region_unmap(VFIORegion *region);
|
||||
void vfio_region_exit(VFIORegion *region);
|
||||
void vfio_region_finalize(VFIORegion *region);
|
||||
void vfio_reset_handler(void *opaque);
|
||||
@ -183,6 +203,9 @@ extern const MemoryRegionOps vfio_region_ops;
|
||||
typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
|
||||
extern VFIOGroupList vfio_group_list;
|
||||
|
||||
bool vfio_mig_active(void);
|
||||
int64_t vfio_mig_bytes_transferred(void);
|
||||
|
||||
#ifdef CONFIG_LINUX
|
||||
int vfio_get_region_info(VFIODevice *vbasedev, int index,
|
||||
struct vfio_region_info **info);
|
||||
@ -191,6 +214,10 @@ int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type,
|
||||
bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type);
|
||||
struct vfio_info_cap_header *
|
||||
vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id);
|
||||
bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
|
||||
unsigned int *avail);
|
||||
struct vfio_info_cap_header *
|
||||
vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id);
|
||||
#endif
|
||||
extern const MemoryListener vfio_prereg_listener;
|
||||
|
||||
@ -200,4 +227,7 @@ int vfio_spapr_create_window(VFIOContainer *container,
|
||||
int vfio_spapr_remove_window(VFIOContainer *container,
|
||||
hwaddr offset_within_address_space);
|
||||
|
||||
int vfio_migration_probe(VFIODevice *vbasedev, Error **errp);
|
||||
void vfio_migration_finalize(VFIODevice *vbasedev);
|
||||
|
||||
#endif /* HW_VFIO_VFIO_COMMON_H */
|
||||
|
@ -176,7 +176,7 @@ struct pvrdma_port_attr {
|
||||
uint8_t subnet_timeout;
|
||||
uint8_t init_type_reply;
|
||||
uint8_t active_width;
|
||||
uint8_t active_speed;
|
||||
uint16_t active_speed;
|
||||
uint8_t phys_state;
|
||||
uint8_t reserved[2];
|
||||
};
|
||||
|
@ -1617,6 +1617,8 @@ enum ethtool_link_mode_bit_indices {
|
||||
ETHTOOL_LINK_MODE_400000baseLR4_ER4_FR4_Full_BIT = 87,
|
||||
ETHTOOL_LINK_MODE_400000baseDR4_Full_BIT = 88,
|
||||
ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT = 89,
|
||||
ETHTOOL_LINK_MODE_100baseFX_Half_BIT = 90,
|
||||
ETHTOOL_LINK_MODE_100baseFX_Full_BIT = 91,
|
||||
/* must be last entry */
|
||||
__ETHTOOL_LINK_MODE_MASK_NBITS
|
||||
};
|
||||
|
@ -172,6 +172,9 @@
|
||||
* - add FUSE_WRITE_KILL_PRIV flag
|
||||
* - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING
|
||||
* - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag
|
||||
*
|
||||
* 7.32
|
||||
* - add flags to fuse_attr, add FUSE_ATTR_SUBMOUNT, add FUSE_SUBMOUNTS
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_FUSE_H
|
||||
@ -203,7 +206,7 @@
|
||||
#define FUSE_KERNEL_VERSION 7
|
||||
|
||||
/** Minor version number of this interface */
|
||||
#define FUSE_KERNEL_MINOR_VERSION 31
|
||||
#define FUSE_KERNEL_MINOR_VERSION 32
|
||||
|
||||
/** The node ID of the root inode */
|
||||
#define FUSE_ROOT_ID 1
|
||||
@ -227,7 +230,7 @@ struct fuse_attr {
|
||||
uint32_t gid;
|
||||
uint32_t rdev;
|
||||
uint32_t blksize;
|
||||
uint32_t padding;
|
||||
uint32_t flags;
|
||||
};
|
||||
|
||||
struct fuse_kstatfs {
|
||||
@ -309,7 +312,10 @@ struct fuse_file_lock {
|
||||
* FUSE_CACHE_SYMLINKS: cache READLINK responses
|
||||
* FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir
|
||||
* FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request
|
||||
* FUSE_MAP_ALIGNMENT: map_alignment field is valid
|
||||
* FUSE_MAP_ALIGNMENT: init_out.map_alignment contains log2(byte alignment) for
|
||||
* foffset and moffset fields in struct
|
||||
* fuse_setupmapping_out and fuse_removemapping_one.
|
||||
* FUSE_SUBMOUNTS: kernel supports auto-mounting directory submounts
|
||||
*/
|
||||
#define FUSE_ASYNC_READ (1 << 0)
|
||||
#define FUSE_POSIX_LOCKS (1 << 1)
|
||||
@ -338,6 +344,7 @@ struct fuse_file_lock {
|
||||
#define FUSE_NO_OPENDIR_SUPPORT (1 << 24)
|
||||
#define FUSE_EXPLICIT_INVAL_DATA (1 << 25)
|
||||
#define FUSE_MAP_ALIGNMENT (1 << 26)
|
||||
#define FUSE_SUBMOUNTS (1 << 27)
|
||||
|
||||
/**
|
||||
* CUSE INIT request/reply flags
|
||||
@ -413,6 +420,13 @@ struct fuse_file_lock {
|
||||
*/
|
||||
#define FUSE_FSYNC_FDATASYNC (1 << 0)
|
||||
|
||||
/**
|
||||
* fuse_attr flags
|
||||
*
|
||||
* FUSE_ATTR_SUBMOUNT: Object is a submount root
|
||||
*/
|
||||
#define FUSE_ATTR_SUBMOUNT (1 << 0)
|
||||
|
||||
enum fuse_opcode {
|
||||
FUSE_LOOKUP = 1,
|
||||
FUSE_FORGET = 2, /* no reply */
|
||||
@ -888,4 +902,34 @@ struct fuse_copy_file_range_in {
|
||||
uint64_t flags;
|
||||
};
|
||||
|
||||
#define FUSE_SETUPMAPPING_FLAG_WRITE (1ull << 0)
|
||||
#define FUSE_SETUPMAPPING_FLAG_READ (1ull << 1)
|
||||
struct fuse_setupmapping_in {
|
||||
/* An already open handle */
|
||||
uint64_t fh;
|
||||
/* Offset into the file to start the mapping */
|
||||
uint64_t foffset;
|
||||
/* Length of mapping required */
|
||||
uint64_t len;
|
||||
/* Flags, FUSE_SETUPMAPPING_FLAG_* */
|
||||
uint64_t flags;
|
||||
/* Offset in Memory Window */
|
||||
uint64_t moffset;
|
||||
};
|
||||
|
||||
struct fuse_removemapping_in {
|
||||
/* number of fuse_removemapping_one follows */
|
||||
uint32_t count;
|
||||
};
|
||||
|
||||
struct fuse_removemapping_one {
|
||||
/* Offset into the dax window start the unmapping */
|
||||
uint64_t moffset;
|
||||
/* Length of mapping required */
|
||||
uint64_t len;
|
||||
};
|
||||
|
||||
#define FUSE_REMOVEMAPPING_MAX_ENTRY \
|
||||
(PAGE_SIZE / sizeof(struct fuse_removemapping_one))
|
||||
|
||||
#endif /* _LINUX_FUSE_H */
|
||||
|
@ -515,6 +515,9 @@
|
||||
#define KEY_10CHANNELSUP 0x1b8 /* 10 channels up (10+) */
|
||||
#define KEY_10CHANNELSDOWN 0x1b9 /* 10 channels down (10-) */
|
||||
#define KEY_IMAGES 0x1ba /* AL Image Browser */
|
||||
#define KEY_NOTIFICATION_CENTER 0x1bc /* Show/hide the notification center */
|
||||
#define KEY_PICKUP_PHONE 0x1bd /* Answer incoming call */
|
||||
#define KEY_HANGUP_PHONE 0x1be /* Decline incoming call */
|
||||
|
||||
#define KEY_DEL_EOL 0x1c0
|
||||
#define KEY_DEL_EOS 0x1c1
|
||||
@ -542,6 +545,7 @@
|
||||
#define KEY_FN_F 0x1e2
|
||||
#define KEY_FN_S 0x1e3
|
||||
#define KEY_FN_B 0x1e4
|
||||
#define KEY_FN_RIGHT_SHIFT 0x1e5
|
||||
|
||||
#define KEY_BRL_DOT1 0x1f1
|
||||
#define KEY_BRL_DOT2 0x1f2
|
||||
|
@ -76,6 +76,7 @@
|
||||
#define PCI_CACHE_LINE_SIZE 0x0c /* 8 bits */
|
||||
#define PCI_LATENCY_TIMER 0x0d /* 8 bits */
|
||||
#define PCI_HEADER_TYPE 0x0e /* 8 bits */
|
||||
#define PCI_HEADER_TYPE_MASK 0x7f
|
||||
#define PCI_HEADER_TYPE_NORMAL 0
|
||||
#define PCI_HEADER_TYPE_BRIDGE 1
|
||||
#define PCI_HEADER_TYPE_CARDBUS 2
|
||||
@ -246,7 +247,7 @@
|
||||
#define PCI_PM_CAP_PME_D0 0x0800 /* PME# from D0 */
|
||||
#define PCI_PM_CAP_PME_D1 0x1000 /* PME# from D1 */
|
||||
#define PCI_PM_CAP_PME_D2 0x2000 /* PME# from D2 */
|
||||
#define PCI_PM_CAP_PME_D3 0x4000 /* PME# from D3 (hot) */
|
||||
#define PCI_PM_CAP_PME_D3hot 0x4000 /* PME# from D3 (hot) */
|
||||
#define PCI_PM_CAP_PME_D3cold 0x8000 /* PME# from D3 (cold) */
|
||||
#define PCI_PM_CAP_PME_SHIFT 11 /* Start of the PME Mask in PMC */
|
||||
#define PCI_PM_CTRL 4 /* PM control and status register */
|
||||
@ -532,6 +533,8 @@
|
||||
#define PCI_EXP_LNKCAP_SLS_32_0GB 0x00000005 /* LNKCAP2 SLS Vector bit 4 */
|
||||
#define PCI_EXP_LNKCAP_MLW 0x000003f0 /* Maximum Link Width */
|
||||
#define PCI_EXP_LNKCAP_ASPMS 0x00000c00 /* ASPM Support */
|
||||
#define PCI_EXP_LNKCAP_ASPM_L0S 0x00000400 /* ASPM L0s Support */
|
||||
#define PCI_EXP_LNKCAP_ASPM_L1 0x00000800 /* ASPM L1 Support */
|
||||
#define PCI_EXP_LNKCAP_L0SEL 0x00007000 /* L0s Exit Latency */
|
||||
#define PCI_EXP_LNKCAP_L1EL 0x00038000 /* L1 Exit Latency */
|
||||
#define PCI_EXP_LNKCAP_CLKPM 0x00040000 /* Clock Power Management */
|
||||
@ -1056,6 +1059,7 @@
|
||||
#define PCI_L1SS_CTL1_PCIPM_L1_1 0x00000002 /* PCI-PM L1.1 Enable */
|
||||
#define PCI_L1SS_CTL1_ASPM_L1_2 0x00000004 /* ASPM L1.2 Enable */
|
||||
#define PCI_L1SS_CTL1_ASPM_L1_1 0x00000008 /* ASPM L1.1 Enable */
|
||||
#define PCI_L1SS_CTL1_L1_2_MASK 0x00000005
|
||||
#define PCI_L1SS_CTL1_L1SS_MASK 0x0000000f
|
||||
#define PCI_L1SS_CTL1_CM_RESTORE_TIME 0x0000ff00 /* Common_Mode_Restore_Time */
|
||||
#define PCI_L1SS_CTL1_LTR_L12_TH_VALUE 0x03ff0000 /* LTR_L1.2_THRESHOLD_Value */
|
||||
|
@ -16,4 +16,7 @@ struct virtio_fs_config {
|
||||
uint32_t num_request_queues;
|
||||
} QEMU_PACKED;
|
||||
|
||||
/* For the id field in virtio_pci_shm_cap */
|
||||
#define VIRTIO_FS_SHMCAP_ID_CACHE 0
|
||||
|
||||
#endif /* _LINUX_VIRTIO_FS_H */
|
||||
|
@ -50,6 +50,10 @@
|
||||
* VIRTIO_GPU_CMD_GET_EDID
|
||||
*/
|
||||
#define VIRTIO_GPU_F_EDID 1
|
||||
/*
|
||||
* VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID
|
||||
*/
|
||||
#define VIRTIO_GPU_F_RESOURCE_UUID 2
|
||||
|
||||
enum virtio_gpu_ctrl_type {
|
||||
VIRTIO_GPU_UNDEFINED = 0,
|
||||
@ -66,6 +70,7 @@ enum virtio_gpu_ctrl_type {
|
||||
VIRTIO_GPU_CMD_GET_CAPSET_INFO,
|
||||
VIRTIO_GPU_CMD_GET_CAPSET,
|
||||
VIRTIO_GPU_CMD_GET_EDID,
|
||||
VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID,
|
||||
|
||||
/* 3d commands */
|
||||
VIRTIO_GPU_CMD_CTX_CREATE = 0x0200,
|
||||
@ -87,6 +92,7 @@ enum virtio_gpu_ctrl_type {
|
||||
VIRTIO_GPU_RESP_OK_CAPSET_INFO,
|
||||
VIRTIO_GPU_RESP_OK_CAPSET,
|
||||
VIRTIO_GPU_RESP_OK_EDID,
|
||||
VIRTIO_GPU_RESP_OK_RESOURCE_UUID,
|
||||
|
||||
/* error responses */
|
||||
VIRTIO_GPU_RESP_ERR_UNSPEC = 0x1200,
|
||||
@ -340,4 +346,17 @@ enum virtio_gpu_formats {
|
||||
VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM = 134,
|
||||
};
|
||||
|
||||
/* VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID */
|
||||
struct virtio_gpu_resource_assign_uuid {
|
||||
struct virtio_gpu_ctrl_hdr hdr;
|
||||
uint32_t resource_id;
|
||||
uint32_t padding;
|
||||
};
|
||||
|
||||
/* VIRTIO_GPU_RESP_OK_RESOURCE_UUID */
|
||||
struct virtio_gpu_resp_resource_uuid {
|
||||
struct virtio_gpu_ctrl_hdr hdr;
|
||||
uint8_t uuid[16];
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -122,6 +122,17 @@
|
||||
#define VIRTIO_MMIO_QUEUE_USED_LOW 0x0a0
|
||||
#define VIRTIO_MMIO_QUEUE_USED_HIGH 0x0a4
|
||||
|
||||
/* Shared memory region id */
|
||||
#define VIRTIO_MMIO_SHM_SEL 0x0ac
|
||||
|
||||
/* Shared memory region length, 64 bits in two halves */
|
||||
#define VIRTIO_MMIO_SHM_LEN_LOW 0x0b0
|
||||
#define VIRTIO_MMIO_SHM_LEN_HIGH 0x0b4
|
||||
|
||||
/* Shared memory region base address, 64 bits in two halves */
|
||||
#define VIRTIO_MMIO_SHM_BASE_LOW 0x0b8
|
||||
#define VIRTIO_MMIO_SHM_BASE_HIGH 0x0bc
|
||||
|
||||
/* Configuration atomicity value */
|
||||
#define VIRTIO_MMIO_CONFIG_GENERATION 0x0fc
|
||||
|
||||
|
@ -113,6 +113,8 @@
|
||||
#define VIRTIO_PCI_CAP_DEVICE_CFG 4
|
||||
/* PCI configuration access */
|
||||
#define VIRTIO_PCI_CAP_PCI_CFG 5
|
||||
/* Additional shared memory capability */
|
||||
#define VIRTIO_PCI_CAP_SHARED_MEMORY_CFG 8
|
||||
|
||||
/* This is the PCI capability header: */
|
||||
struct virtio_pci_cap {
|
||||
@ -121,11 +123,18 @@ struct virtio_pci_cap {
|
||||
uint8_t cap_len; /* Generic PCI field: capability length */
|
||||
uint8_t cfg_type; /* Identifies the structure. */
|
||||
uint8_t bar; /* Where to find it. */
|
||||
uint8_t padding[3]; /* Pad to full dword. */
|
||||
uint8_t id; /* Multiple capabilities of the same type */
|
||||
uint8_t padding[2]; /* Pad to full dword. */
|
||||
uint32_t offset; /* Offset within bar. */
|
||||
uint32_t length; /* Length of the structure, in bytes. */
|
||||
};
|
||||
|
||||
struct virtio_pci_cap64 {
|
||||
struct virtio_pci_cap cap;
|
||||
uint32_t offset_hi; /* Most sig 32 bits of offset */
|
||||
uint32_t length_hi; /* Most sig 32 bits of length */
|
||||
};
|
||||
|
||||
struct virtio_pci_notify_cap {
|
||||
struct virtio_pci_cap cap;
|
||||
uint32_t notify_off_multiplier; /* Multiplier for queue_notify_off. */
|
||||
|
@ -159,6 +159,21 @@ struct kvm_sync_regs {
|
||||
struct kvm_arch_memory_slot {
|
||||
};
|
||||
|
||||
/*
|
||||
* PMU filter structure. Describe a range of events with a particular
|
||||
* action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER.
|
||||
*/
|
||||
struct kvm_pmu_event_filter {
|
||||
__u16 base_event;
|
||||
__u16 nevents;
|
||||
|
||||
#define KVM_PMU_EVENT_ALLOW 0
|
||||
#define KVM_PMU_EVENT_DENY 1
|
||||
|
||||
__u8 action;
|
||||
__u8 pad[3];
|
||||
};
|
||||
|
||||
/* for KVM_GET/SET_VCPU_EVENTS */
|
||||
struct kvm_vcpu_events {
|
||||
struct {
|
||||
@ -242,6 +257,15 @@ struct kvm_vcpu_events {
|
||||
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL 0
|
||||
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL 1
|
||||
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED 2
|
||||
|
||||
/*
|
||||
* Only two states can be presented by the host kernel:
|
||||
* - NOT_REQUIRED: the guest doesn't need to do anything
|
||||
* - NOT_AVAIL: the guest isn't mitigated (it can still use SSBS if available)
|
||||
*
|
||||
* All the other values are deprecated. The host still accepts all
|
||||
* values (they are ABI), but will narrow them to the above two.
|
||||
*/
|
||||
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 KVM_REG_ARM_FW_REG(2)
|
||||
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL 0
|
||||
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN 1
|
||||
@ -329,6 +353,7 @@ struct kvm_vcpu_events {
|
||||
#define KVM_ARM_VCPU_PMU_V3_CTRL 0
|
||||
#define KVM_ARM_VCPU_PMU_V3_IRQ 0
|
||||
#define KVM_ARM_VCPU_PMU_V3_INIT 1
|
||||
#define KVM_ARM_VCPU_PMU_V3_FILTER 2
|
||||
#define KVM_ARM_VCPU_TIMER_CTRL 1
|
||||
#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
|
||||
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
|
||||
|
@ -5,5 +5,6 @@
|
||||
#include <asm-generic/mman.h>
|
||||
|
||||
#define PROT_BTI 0x10 /* BTI guarded page */
|
||||
#define PROT_MTE 0x20 /* Normal Tagged mapping */
|
||||
|
||||
#endif /* ! _UAPI__ASM_MMAN_H */
|
||||
|
@ -20,6 +20,7 @@
|
||||
#define HUGETLB_FLAG_ENCODE_SHIFT 26
|
||||
#define HUGETLB_FLAG_ENCODE_MASK 0x3f
|
||||
|
||||
#define HUGETLB_FLAG_ENCODE_16KB (14 << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_64KB (16 << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_512KB (19 << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
#define HUGETLB_FLAG_ENCODE_1MB (20 << HUGETLB_FLAG_ENCODE_SHIFT)
|
||||
|
@ -140,7 +140,7 @@ __SYSCALL(__NR_renameat, sys_renameat)
|
||||
#define __NR_umount2 39
|
||||
__SYSCALL(__NR_umount2, sys_umount)
|
||||
#define __NR_mount 40
|
||||
__SC_COMP(__NR_mount, sys_mount, compat_sys_mount)
|
||||
__SYSCALL(__NR_mount, sys_mount)
|
||||
#define __NR_pivot_root 41
|
||||
__SYSCALL(__NR_pivot_root, sys_pivot_root)
|
||||
|
||||
@ -207,9 +207,9 @@ __SYSCALL(__NR_read, sys_read)
|
||||
#define __NR_write 64
|
||||
__SYSCALL(__NR_write, sys_write)
|
||||
#define __NR_readv 65
|
||||
__SC_COMP(__NR_readv, sys_readv, compat_sys_readv)
|
||||
__SC_COMP(__NR_readv, sys_readv, sys_readv)
|
||||
#define __NR_writev 66
|
||||
__SC_COMP(__NR_writev, sys_writev, compat_sys_writev)
|
||||
__SC_COMP(__NR_writev, sys_writev, sys_writev)
|
||||
#define __NR_pread64 67
|
||||
__SC_COMP(__NR_pread64, sys_pread64, compat_sys_pread64)
|
||||
#define __NR_pwrite64 68
|
||||
@ -237,7 +237,7 @@ __SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4)
|
||||
|
||||
/* fs/splice.c */
|
||||
#define __NR_vmsplice 75
|
||||
__SC_COMP(__NR_vmsplice, sys_vmsplice, compat_sys_vmsplice)
|
||||
__SYSCALL(__NR_vmsplice, sys_vmsplice)
|
||||
#define __NR_splice 76
|
||||
__SYSCALL(__NR_splice, sys_splice)
|
||||
#define __NR_tee 77
|
||||
@ -727,11 +727,9 @@ __SYSCALL(__NR_setns, sys_setns)
|
||||
#define __NR_sendmmsg 269
|
||||
__SC_COMP(__NR_sendmmsg, sys_sendmmsg, compat_sys_sendmmsg)
|
||||
#define __NR_process_vm_readv 270
|
||||
__SC_COMP(__NR_process_vm_readv, sys_process_vm_readv, \
|
||||
compat_sys_process_vm_readv)
|
||||
__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv)
|
||||
#define __NR_process_vm_writev 271
|
||||
__SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \
|
||||
compat_sys_process_vm_writev)
|
||||
__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev)
|
||||
#define __NR_kcmp 272
|
||||
__SYSCALL(__NR_kcmp, sys_kcmp)
|
||||
#define __NR_finit_module 273
|
||||
@ -859,9 +857,11 @@ __SYSCALL(__NR_openat2, sys_openat2)
|
||||
__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
|
||||
#define __NR_faccessat2 439
|
||||
__SYSCALL(__NR_faccessat2, sys_faccessat2)
|
||||
#define __NR_process_madvise 440
|
||||
__SYSCALL(__NR_process_madvise, sys_process_madvise)
|
||||
|
||||
#undef __NR_syscalls
|
||||
#define __NR_syscalls 440
|
||||
#define __NR_syscalls 441
|
||||
|
||||
/*
|
||||
* 32 bit systems traditionally used different
|
||||
|
@ -369,6 +369,7 @@
|
||||
#define __NR_openat2 (__NR_Linux + 437)
|
||||
#define __NR_pidfd_getfd (__NR_Linux + 438)
|
||||
#define __NR_faccessat2 (__NR_Linux + 439)
|
||||
#define __NR_process_madvise (__NR_Linux + 440)
|
||||
|
||||
|
||||
#endif /* _ASM_MIPS_UNISTD_N32_H */
|
||||
|
@ -345,6 +345,7 @@
|
||||
#define __NR_openat2 (__NR_Linux + 437)
|
||||
#define __NR_pidfd_getfd (__NR_Linux + 438)
|
||||
#define __NR_faccessat2 (__NR_Linux + 439)
|
||||
#define __NR_process_madvise (__NR_Linux + 440)
|
||||
|
||||
|
||||
#endif /* _ASM_MIPS_UNISTD_N64_H */
|
||||
|
@ -415,6 +415,7 @@
|
||||
#define __NR_openat2 (__NR_Linux + 437)
|
||||
#define __NR_pidfd_getfd (__NR_Linux + 438)
|
||||
#define __NR_faccessat2 (__NR_Linux + 439)
|
||||
#define __NR_process_madvise (__NR_Linux + 440)
|
||||
|
||||
|
||||
#endif /* _ASM_MIPS_UNISTD_O32_H */
|
||||
|
@ -422,6 +422,7 @@
|
||||
#define __NR_openat2 437
|
||||
#define __NR_pidfd_getfd 438
|
||||
#define __NR_faccessat2 439
|
||||
#define __NR_process_madvise 440
|
||||
|
||||
|
||||
#endif /* _ASM_POWERPC_UNISTD_32_H */
|
||||
|
@ -394,6 +394,7 @@
|
||||
#define __NR_openat2 437
|
||||
#define __NR_pidfd_getfd 438
|
||||
#define __NR_faccessat2 439
|
||||
#define __NR_process_madvise 440
|
||||
|
||||
|
||||
#endif /* _ASM_POWERPC_UNISTD_64_H */
|
||||
|
@ -412,5 +412,6 @@
|
||||
#define __NR_openat2 437
|
||||
#define __NR_pidfd_getfd 438
|
||||
#define __NR_faccessat2 439
|
||||
#define __NR_process_madvise 440
|
||||
|
||||
#endif /* _ASM_S390_UNISTD_32_H */
|
||||
|
@ -360,5 +360,6 @@
|
||||
#define __NR_openat2 437
|
||||
#define __NR_pidfd_getfd 438
|
||||
#define __NR_faccessat2 439
|
||||
#define __NR_process_madvise 440
|
||||
|
||||
#endif /* _ASM_S390_UNISTD_64_H */
|
||||
|
@ -192,6 +192,26 @@ struct kvm_msr_list {
|
||||
__u32 indices[0];
|
||||
};
|
||||
|
||||
/* Maximum size of any access bitmap in bytes */
|
||||
#define KVM_MSR_FILTER_MAX_BITMAP_SIZE 0x600
|
||||
|
||||
/* for KVM_X86_SET_MSR_FILTER */
|
||||
struct kvm_msr_filter_range {
|
||||
#define KVM_MSR_FILTER_READ (1 << 0)
|
||||
#define KVM_MSR_FILTER_WRITE (1 << 1)
|
||||
__u32 flags;
|
||||
__u32 nmsrs; /* number of msrs in bitmap */
|
||||
__u32 base; /* MSR index the bitmap starts at */
|
||||
__u8 *bitmap; /* a 1 bit allows the operations in flags, 0 denies */
|
||||
};
|
||||
|
||||
#define KVM_MSR_FILTER_MAX_RANGES 16
|
||||
struct kvm_msr_filter {
|
||||
#define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0)
|
||||
#define KVM_MSR_FILTER_DEFAULT_DENY (1 << 0)
|
||||
__u32 flags;
|
||||
struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES];
|
||||
};
|
||||
|
||||
struct kvm_cpuid_entry {
|
||||
__u32 function;
|
||||
|
@ -430,6 +430,7 @@
|
||||
#define __NR_openat2 437
|
||||
#define __NR_pidfd_getfd 438
|
||||
#define __NR_faccessat2 439
|
||||
#define __NR_process_madvise 440
|
||||
|
||||
|
||||
#endif /* _ASM_X86_UNISTD_32_H */
|
||||
|
@ -352,6 +352,7 @@
|
||||
#define __NR_openat2 437
|
||||
#define __NR_pidfd_getfd 438
|
||||
#define __NR_faccessat2 439
|
||||
#define __NR_process_madvise 440
|
||||
|
||||
|
||||
#endif /* _ASM_X86_UNISTD_64_H */
|
||||
|
@ -305,6 +305,7 @@
|
||||
#define __NR_openat2 (__X32_SYSCALL_BIT + 437)
|
||||
#define __NR_pidfd_getfd (__X32_SYSCALL_BIT + 438)
|
||||
#define __NR_faccessat2 (__X32_SYSCALL_BIT + 439)
|
||||
#define __NR_process_madvise (__X32_SYSCALL_BIT + 440)
|
||||
#define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512)
|
||||
#define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513)
|
||||
#define __NR_ioctl (__X32_SYSCALL_BIT + 514)
|
||||
|
@ -248,6 +248,8 @@ struct kvm_hyperv_exit {
|
||||
#define KVM_EXIT_IOAPIC_EOI 26
|
||||
#define KVM_EXIT_HYPERV 27
|
||||
#define KVM_EXIT_ARM_NISV 28
|
||||
#define KVM_EXIT_X86_RDMSR 29
|
||||
#define KVM_EXIT_X86_WRMSR 30
|
||||
|
||||
/* For KVM_EXIT_INTERNAL_ERROR */
|
||||
/* Emulate instruction failed. */
|
||||
@ -413,6 +415,17 @@ struct kvm_run {
|
||||
__u64 esr_iss;
|
||||
__u64 fault_ipa;
|
||||
} arm_nisv;
|
||||
/* KVM_EXIT_X86_RDMSR / KVM_EXIT_X86_WRMSR */
|
||||
struct {
|
||||
__u8 error; /* user -> kernel */
|
||||
__u8 pad[7];
|
||||
#define KVM_MSR_EXIT_REASON_INVAL (1 << 0)
|
||||
#define KVM_MSR_EXIT_REASON_UNKNOWN (1 << 1)
|
||||
#define KVM_MSR_EXIT_REASON_FILTER (1 << 2)
|
||||
__u32 reason; /* kernel -> user */
|
||||
__u32 index; /* kernel -> user */
|
||||
__u64 data; /* kernel <-> user */
|
||||
} msr;
|
||||
/* Fix the size of the union. */
|
||||
char padding[256];
|
||||
};
|
||||
@ -1037,6 +1050,9 @@ struct kvm_ppc_resize_hpt {
|
||||
#define KVM_CAP_SMALLER_MAXPHYADDR 185
|
||||
#define KVM_CAP_S390_DIAG318 186
|
||||
#define KVM_CAP_STEAL_TIME 187
|
||||
#define KVM_CAP_X86_USER_SPACE_MSR 188
|
||||
#define KVM_CAP_X86_MSR_FILTER 189
|
||||
#define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
@ -1538,6 +1554,9 @@ struct kvm_pv_cmd {
|
||||
/* Available with KVM_CAP_S390_PROTECTED */
|
||||
#define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd)
|
||||
|
||||
/* Available with KVM_CAP_X86_MSR_FILTER */
|
||||
#define KVM_X86_SET_MSR_FILTER _IOW(KVMIO, 0xc6, struct kvm_msr_filter)
|
||||
|
||||
/* Secure Encrypted Virtualization command */
|
||||
enum sev_cmd_id {
|
||||
/* Guest initialization commands */
|
||||
|
@ -27,6 +27,7 @@
|
||||
#define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT
|
||||
#define MAP_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK
|
||||
|
||||
#define MAP_HUGE_16KB HUGETLB_FLAG_ENCODE_16KB
|
||||
#define MAP_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB
|
||||
#define MAP_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB
|
||||
#define MAP_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB
|
||||
|
@ -201,8 +201,11 @@ struct vfio_device_info {
|
||||
#define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */
|
||||
#define VFIO_DEVICE_FLAGS_CCW (1 << 4) /* vfio-ccw device */
|
||||
#define VFIO_DEVICE_FLAGS_AP (1 << 5) /* vfio-ap device */
|
||||
#define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6) /* vfio-fsl-mc device */
|
||||
#define VFIO_DEVICE_FLAGS_CAPS (1 << 7) /* Info supports caps */
|
||||
__u32 num_regions; /* Max region index + 1 */
|
||||
__u32 num_irqs; /* Max IRQ index + 1 */
|
||||
__u32 cap_offset; /* Offset within info struct of first cap */
|
||||
};
|
||||
#define VFIO_DEVICE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 7)
|
||||
|
||||
@ -218,6 +221,15 @@ struct vfio_device_info {
|
||||
#define VFIO_DEVICE_API_CCW_STRING "vfio-ccw"
|
||||
#define VFIO_DEVICE_API_AP_STRING "vfio-ap"
|
||||
|
||||
/*
|
||||
* The following capabilities are unique to s390 zPCI devices. Their contents
|
||||
* are further-defined in vfio_zdev.h
|
||||
*/
|
||||
#define VFIO_DEVICE_INFO_CAP_ZPCI_BASE 1
|
||||
#define VFIO_DEVICE_INFO_CAP_ZPCI_GROUP 2
|
||||
#define VFIO_DEVICE_INFO_CAP_ZPCI_UTIL 3
|
||||
#define VFIO_DEVICE_INFO_CAP_ZPCI_PFIP 4
|
||||
|
||||
/**
|
||||
* VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
|
||||
* struct vfio_region_info)
|
||||
@ -462,7 +474,7 @@ struct vfio_region_gfx_edid {
|
||||
* 5. Resumed
|
||||
* |--------->|
|
||||
*
|
||||
* 0. Default state of VFIO device is _RUNNNG when the user application starts.
|
||||
* 0. Default state of VFIO device is _RUNNING when the user application starts.
|
||||
* 1. During normal shutdown of the user application, the user application may
|
||||
* optionally change the VFIO device state from _RUNNING to _STOP. This
|
||||
* transition is optional. The vendor driver must support this transition but
|
||||
@ -1039,6 +1051,21 @@ struct vfio_iommu_type1_info_cap_migration {
|
||||
__u64 max_dirty_bitmap_size; /* in bytes */
|
||||
};
|
||||
|
||||
/*
|
||||
* The DMA available capability allows to report the current number of
|
||||
* simultaneously outstanding DMA mappings that are allowed.
|
||||
*
|
||||
* The structure below defines version 1 of this capability.
|
||||
*
|
||||
* avail: specifies the current number of outstanding DMA mappings allowed.
|
||||
*/
|
||||
#define VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL 3
|
||||
|
||||
struct vfio_iommu_type1_info_dma_avail {
|
||||
struct vfio_info_cap_header header;
|
||||
__u32 avail;
|
||||
};
|
||||
|
||||
#define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
|
||||
|
||||
/**
|
||||
|
78
linux-headers/linux/vfio_zdev.h
Normal file
78
linux-headers/linux/vfio_zdev.h
Normal file
@ -0,0 +1,78 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
* VFIO Region definitions for ZPCI devices
|
||||
*
|
||||
* Copyright IBM Corp. 2020
|
||||
*
|
||||
* Author(s): Pierre Morel <pmorel@linux.ibm.com>
|
||||
* Matthew Rosato <mjrosato@linux.ibm.com>
|
||||
*/
|
||||
|
||||
#ifndef _VFIO_ZDEV_H_
|
||||
#define _VFIO_ZDEV_H_
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/vfio.h>
|
||||
|
||||
/**
|
||||
* VFIO_DEVICE_INFO_CAP_ZPCI_BASE - Base PCI Function information
|
||||
*
|
||||
* This capability provides a set of descriptive information about the
|
||||
* associated PCI function.
|
||||
*/
|
||||
struct vfio_device_info_cap_zpci_base {
|
||||
struct vfio_info_cap_header header;
|
||||
__u64 start_dma; /* Start of available DMA addresses */
|
||||
__u64 end_dma; /* End of available DMA addresses */
|
||||
__u16 pchid; /* Physical Channel ID */
|
||||
__u16 vfn; /* Virtual function number */
|
||||
__u16 fmb_length; /* Measurement Block Length (in bytes) */
|
||||
__u8 pft; /* PCI Function Type */
|
||||
__u8 gid; /* PCI function group ID */
|
||||
};
|
||||
|
||||
/**
|
||||
* VFIO_DEVICE_INFO_CAP_ZPCI_GROUP - Base PCI Function Group information
|
||||
*
|
||||
* This capability provides a set of descriptive information about the group of
|
||||
* PCI functions that the associated device belongs to.
|
||||
*/
|
||||
struct vfio_device_info_cap_zpci_group {
|
||||
struct vfio_info_cap_header header;
|
||||
__u64 dasm; /* DMA Address space mask */
|
||||
__u64 msi_addr; /* MSI address */
|
||||
__u64 flags;
|
||||
#define VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH 1 /* Program-specified TLB refresh */
|
||||
__u16 mui; /* Measurement Block Update Interval */
|
||||
__u16 noi; /* Maximum number of MSIs */
|
||||
__u16 maxstbl; /* Maximum Store Block Length */
|
||||
__u8 version; /* Supported PCI Version */
|
||||
};
|
||||
|
||||
/**
|
||||
* VFIO_DEVICE_INFO_CAP_ZPCI_UTIL - Utility String
|
||||
*
|
||||
* This capability provides the utility string for the associated device, which
|
||||
* is a device identifier string made up of EBCDID characters. 'size' specifies
|
||||
* the length of 'util_str'.
|
||||
*/
|
||||
struct vfio_device_info_cap_zpci_util {
|
||||
struct vfio_info_cap_header header;
|
||||
__u32 size;
|
||||
__u8 util_str[];
|
||||
};
|
||||
|
||||
/**
|
||||
* VFIO_DEVICE_INFO_CAP_ZPCI_PFIP - PCI Function Path
|
||||
*
|
||||
* This capability provides the PCI function path string, which is an identifier
|
||||
* that describes the internal hardware path of the device. 'size' specifies
|
||||
* the length of 'pfip'.
|
||||
*/
|
||||
struct vfio_device_info_cap_zpci_pfip {
|
||||
struct vfio_info_cap_header header;
|
||||
__u32 size;
|
||||
__u8 pfip[];
|
||||
};
|
||||
|
||||
#endif
|
@ -57,6 +57,10 @@
|
||||
#include "qemu/queue.h"
|
||||
#include "multifd.h"
|
||||
|
||||
#ifdef CONFIG_VFIO
|
||||
#include "hw/vfio/vfio-common.h"
|
||||
#endif
|
||||
|
||||
#define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */
|
||||
|
||||
/* Amount of time to allocate to each "chunk" of bandwidth-throttled
|
||||
@ -1037,6 +1041,17 @@ static void populate_disk_info(MigrationInfo *info)
|
||||
}
|
||||
}
|
||||
|
||||
static void populate_vfio_info(MigrationInfo *info)
|
||||
{
|
||||
#ifdef CONFIG_VFIO
|
||||
if (vfio_mig_active()) {
|
||||
info->has_vfio = true;
|
||||
info->vfio = g_malloc0(sizeof(*info->vfio));
|
||||
info->vfio->transferred = vfio_mig_bytes_transferred();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void fill_source_migration_info(MigrationInfo *info)
|
||||
{
|
||||
MigrationState *s = migrate_get_current();
|
||||
@ -1061,6 +1076,7 @@ static void fill_source_migration_info(MigrationInfo *info)
|
||||
populate_time_info(info, s);
|
||||
populate_ram_info(info, s);
|
||||
populate_disk_info(info);
|
||||
populate_vfio_info(info);
|
||||
break;
|
||||
case MIGRATION_STATUS_COLO:
|
||||
info->has_status = true;
|
||||
@ -1069,6 +1085,7 @@ static void fill_source_migration_info(MigrationInfo *info)
|
||||
case MIGRATION_STATUS_COMPLETED:
|
||||
populate_time_info(info, s);
|
||||
populate_ram_info(info, s);
|
||||
populate_vfio_info(info);
|
||||
break;
|
||||
case MIGRATION_STATUS_FAILED:
|
||||
info->has_status = true;
|
||||
|
@ -357,6 +357,12 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
|
||||
}
|
||||
monitor_printf(mon, "]\n");
|
||||
}
|
||||
|
||||
if (info->has_vfio) {
|
||||
monitor_printf(mon, "vfio device transferred: %" PRIu64 " kbytes\n",
|
||||
info->vfio->transferred >> 10);
|
||||
}
|
||||
|
||||
qapi_free_MigrationInfo(info);
|
||||
}
|
||||
|
||||
|
@ -147,6 +147,18 @@
|
||||
'active', 'postcopy-active', 'postcopy-paused',
|
||||
'postcopy-recover', 'completed', 'failed', 'colo',
|
||||
'pre-switchover', 'device', 'wait-unplug' ] }
|
||||
##
|
||||
# @VfioStats:
|
||||
#
|
||||
# Detailed VFIO devices migration statistics
|
||||
#
|
||||
# @transferred: amount of bytes transferred to the target VM by VFIO devices
|
||||
#
|
||||
# Since: 5.2
|
||||
#
|
||||
##
|
||||
{ 'struct': 'VfioStats',
|
||||
'data': {'transferred': 'int' } }
|
||||
|
||||
##
|
||||
# @MigrationInfo:
|
||||
@ -208,11 +220,16 @@
|
||||
#
|
||||
# @socket-address: Only used for tcp, to know what the real port is (Since 4.0)
|
||||
#
|
||||
# @vfio: @VfioStats containing detailed VFIO devices migration statistics,
|
||||
# only returned if VFIO device is present, migration is supported by all
|
||||
# VFIO devices and status is 'active' or 'completed' (since 5.2)
|
||||
#
|
||||
# Since: 0.14.0
|
||||
##
|
||||
{ 'struct': 'MigrationInfo',
|
||||
'data': {'*status': 'MigrationStatus', '*ram': 'MigrationStats',
|
||||
'*disk': 'MigrationStats',
|
||||
'*vfio': 'VfioStats',
|
||||
'*xbzrle-cache': 'XBZRLECacheStats',
|
||||
'*total-time': 'int',
|
||||
'*expected-downtime': 'int',
|
||||
|
@ -141,7 +141,7 @@ done
|
||||
|
||||
rm -rf "$output/linux-headers/linux"
|
||||
mkdir -p "$output/linux-headers/linux"
|
||||
for header in kvm.h vfio.h vfio_ccw.h vhost.h \
|
||||
for header in kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \
|
||||
psci.h psp-sev.h userfaultfd.h mman.h; do
|
||||
cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux"
|
||||
done
|
||||
|
@ -1806,7 +1806,7 @@ bool memory_region_is_ram_device(MemoryRegion *mr)
|
||||
uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr)
|
||||
{
|
||||
uint8_t mask = mr->dirty_log_mask;
|
||||
if (global_dirty_log && mr->ram_block) {
|
||||
if (global_dirty_log && (mr->ram_block || memory_region_is_iommu(mr))) {
|
||||
mask |= (1 << DIRTY_MEMORY_MIGRATION);
|
||||
}
|
||||
return mask;
|
||||
|
Loading…
Reference in New Issue
Block a user