pc,pci,vhost,virtio: fixes
Lots of fixes all over the place. virtio-mem and virtio-iommu patches are kind of fixes but it seems better to just make them behave sanely than try to educate users about the limitations ... Signed-off-by: Michael S. Tsirkin <mst@redhat.com> -----BEGIN PGP SIGNATURE----- iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAl+i9YMPHG1zdEByZWRo YXQuY29tAAoJECgfDbjSjVRpySQH/Ru/sxB9PncR1HsqSf0HC0tt/EMKgyZTXEwQ FITcjkCvBDS98a1VUvvZbjzTEDEZNnoUv94MjdLeBoptJ7GtK6nPoI6Ke0p1Zqbe mlY2BCb0FpN8FE+mthjAI03mhw6o8Qo/OPtyISQzUxCVVqUHL5TRAVAQdeidoK8n RBQ4WogwM/h7wI0d9GGgSxAON8IRQnBYImtzJieBb6zeScwKVFTWI1tqBdOyFN0/ AhzQiNZuhZ7a1XGJIsxmWB1NK2kcXNJuOF0ANh4coIHR0JzmH3xRy+Jnf5e3dYsw LI23DUZPSTJJXAwKPucyTG7RTX8F55N9DVHC9KDRD6Ntq1oreJ4= =pcbN -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging pc,pci,vhost,virtio: fixes Lots of fixes all over the place. virtio-mem and virtio-iommu patches are kind of fixes but it seems better to just make them behave sanely than try to educate users about the limitations ... Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # gpg: Signature made Wed 04 Nov 2020 18:40:03 GMT # gpg: using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469 # gpg: issuer "mst@redhat.com" # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [full] # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [full] # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * remotes/mst/tags/for_upstream: (31 commits) contrib/vhost-user-blk: fix get_config() information leak block/export: fix vhost-user-blk get_config() information leak block/export: make vhost-user-blk config space little-endian configure: introduce --enable-vhost-user-blk-server libvhost-user: follow QEMU comment style vhost-blk: set features before setting inflight feature Revert "vhost-blk: set features before setting inflight feature" net: Add vhost-vdpa in show_netdevs() vhost-vdpa: Add qemu_close in vhost_vdpa_cleanup vfio: Don't issue full 2^64 unmap virtio-iommu: Set supported page size mask vfio: Set IOMMU page size as per host supported page size memory: Add interface to set iommu page size mask virtio-iommu: Add notify_flag_changed() memory region callback virtio-iommu: Add replay() memory region callback virtio-iommu: Call memory notifiers in attach/detach virtio-iommu: Add memory notifiers for map/unmap virtio-iommu: Store memory region in endpoint struct virtio-iommu: Fix virtio_iommu_mr() hw/smbios: Fix leaked fd in save_opt_one() error path ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
85c3ed4417
@ -22,13 +22,13 @@
|
||||
#include "qapi/qapi-commands-block-export.h"
|
||||
#include "qapi/qapi-events-block-export.h"
|
||||
#include "qemu/id.h"
|
||||
#if defined(CONFIG_LINUX) && defined(CONFIG_VHOST_USER)
|
||||
#ifdef CONFIG_VHOST_USER_BLK_SERVER
|
||||
#include "vhost-user-blk-server.h"
|
||||
#endif
|
||||
|
||||
static const BlockExportDriver *blk_exp_drivers[] = {
|
||||
&blk_exp_nbd,
|
||||
#if defined(CONFIG_LINUX) && defined(CONFIG_VHOST_USER)
|
||||
#ifdef CONFIG_VHOST_USER_BLK_SERVER
|
||||
&blk_exp_vhost_user_blk,
|
||||
#endif
|
||||
};
|
||||
|
@ -1,2 +1,2 @@
|
||||
blockdev_ss.add(files('export.c'))
|
||||
blockdev_ss.add(when: ['CONFIG_LINUX', 'CONFIG_VHOST_USER'], if_true: files('vhost-user-blk-server.c'))
|
||||
blockdev_ss.add(when: 'CONFIG_VHOST_USER_BLK_SERVER', if_true: files('vhost-user-blk-server.c'))
|
||||
|
@ -264,9 +264,11 @@ static uint64_t vu_blk_get_protocol_features(VuDev *dev)
|
||||
static int
|
||||
vu_blk_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
|
||||
{
|
||||
/* TODO blkcfg must be little-endian for VIRTIO 1.0 */
|
||||
VuServer *server = container_of(vu_dev, VuServer, vu_dev);
|
||||
VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server);
|
||||
|
||||
g_return_val_if_fail(len <= sizeof(struct virtio_blk_config), -1);
|
||||
|
||||
memcpy(config, &vexp->blkcfg, len);
|
||||
return 0;
|
||||
}
|
||||
@ -343,18 +345,18 @@ vu_blk_initialize_config(BlockDriverState *bs,
|
||||
uint32_t blk_size,
|
||||
uint16_t num_queues)
|
||||
{
|
||||
config->capacity = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
|
||||
config->blk_size = blk_size;
|
||||
config->size_max = 0;
|
||||
config->seg_max = 128 - 2;
|
||||
config->min_io_size = 1;
|
||||
config->opt_io_size = 1;
|
||||
config->num_queues = num_queues;
|
||||
config->max_discard_sectors = 32768;
|
||||
config->max_discard_seg = 1;
|
||||
config->discard_sector_alignment = config->blk_size >> 9;
|
||||
config->max_write_zeroes_sectors = 32768;
|
||||
config->max_write_zeroes_seg = 1;
|
||||
config->capacity = cpu_to_le64(bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
|
||||
config->blk_size = cpu_to_le32(blk_size);
|
||||
config->size_max = cpu_to_le32(0);
|
||||
config->seg_max = cpu_to_le32(128 - 2);
|
||||
config->min_io_size = cpu_to_le16(1);
|
||||
config->opt_io_size = cpu_to_le32(1);
|
||||
config->num_queues = cpu_to_le16(num_queues);
|
||||
config->max_discard_sectors = cpu_to_le32(32768);
|
||||
config->max_discard_seg = cpu_to_le32(1);
|
||||
config->discard_sector_alignment = cpu_to_le32(config->blk_size >> 9);
|
||||
config->max_write_zeroes_sectors = cpu_to_le32(32768);
|
||||
config->max_write_zeroes_seg = cpu_to_le32(1);
|
||||
}
|
||||
|
||||
static void vu_blk_exp_request_shutdown(BlockExport *exp)
|
||||
|
15
configure
vendored
15
configure
vendored
@ -329,6 +329,7 @@ vhost_crypto=""
|
||||
vhost_scsi=""
|
||||
vhost_vsock=""
|
||||
vhost_user=""
|
||||
vhost_user_blk_server=""
|
||||
vhost_user_fs=""
|
||||
kvm="auto"
|
||||
hax="auto"
|
||||
@ -1246,6 +1247,10 @@ for opt do
|
||||
;;
|
||||
--enable-vhost-vsock) vhost_vsock="yes"
|
||||
;;
|
||||
--disable-vhost-user-blk-server) vhost_user_blk_server="no"
|
||||
;;
|
||||
--enable-vhost-user-blk-server) vhost_user_blk_server="yes"
|
||||
;;
|
||||
--disable-vhost-user-fs) vhost_user_fs="no"
|
||||
;;
|
||||
--enable-vhost-user-fs) vhost_user_fs="yes"
|
||||
@ -1791,6 +1796,7 @@ disabled with --disable-FEATURE, default is enabled if available:
|
||||
vhost-crypto vhost-user-crypto backend support
|
||||
vhost-kernel vhost kernel backend support
|
||||
vhost-user vhost-user backend support
|
||||
vhost-user-blk-server vhost-user-blk server support
|
||||
vhost-vdpa vhost-vdpa kernel backend support
|
||||
spice spice
|
||||
rbd rados block device (rbd)
|
||||
@ -2382,6 +2388,12 @@ if test "$vhost_net" = ""; then
|
||||
test "$vhost_kernel" = "yes" && vhost_net=yes
|
||||
fi
|
||||
|
||||
# libvhost-user is Linux-only
|
||||
test "$vhost_user_blk_server" = "" && vhost_user_blk_server=$linux
|
||||
if test "$vhost_user_blk_server" = "yes" && test "$linux" = "no"; then
|
||||
error_exit "--enable-vhost-user-blk-server is only available on Linux"
|
||||
fi
|
||||
|
||||
##########################################
|
||||
# pkg-config probe
|
||||
|
||||
@ -6275,6 +6287,9 @@ fi
|
||||
if test "$vhost_vdpa" = "yes" ; then
|
||||
echo "CONFIG_VHOST_VDPA=y" >> $config_host_mak
|
||||
fi
|
||||
if test "$vhost_user_blk_server" = "yes" ; then
|
||||
echo "CONFIG_VHOST_USER_BLK_SERVER=y" >> $config_host_mak
|
||||
fi
|
||||
if test "$vhost_user_fs" = "yes" ; then
|
||||
echo "CONFIG_VHOST_USER_FS=y" >> $config_host_mak
|
||||
fi
|
||||
|
@ -392,7 +392,8 @@ struct VuDev {
|
||||
bool broken;
|
||||
uint16_t max_queues;
|
||||
|
||||
/* @read_msg: custom method to read vhost-user message
|
||||
/*
|
||||
* @read_msg: custom method to read vhost-user message
|
||||
*
|
||||
* Read data from vhost_user socket fd and fill up
|
||||
* the passed VhostUserMsg *vmsg struct.
|
||||
@ -409,15 +410,19 @@ struct VuDev {
|
||||
*
|
||||
*/
|
||||
vu_read_msg_cb read_msg;
|
||||
/* @set_watch: add or update the given fd to the watch set,
|
||||
* call cb when condition is met */
|
||||
|
||||
/*
|
||||
* @set_watch: add or update the given fd to the watch set,
|
||||
* call cb when condition is met.
|
||||
*/
|
||||
vu_set_watch_cb set_watch;
|
||||
|
||||
/* @remove_watch: remove the given fd from the watch set */
|
||||
vu_remove_watch_cb remove_watch;
|
||||
|
||||
/* @panic: encountered an unrecoverable error, you may try to
|
||||
* re-initialize */
|
||||
/*
|
||||
* @panic: encountered an unrecoverable error, you may try to re-initialize
|
||||
*/
|
||||
vu_panic_cb panic;
|
||||
const VuDevIface *iface;
|
||||
|
||||
|
@ -404,6 +404,8 @@ vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
|
||||
VugDev *gdev;
|
||||
VubDev *vdev_blk;
|
||||
|
||||
g_return_val_if_fail(len <= sizeof(struct virtio_blk_config), -1);
|
||||
|
||||
gdev = container_of(vu_dev, VugDev, parent);
|
||||
vdev_blk = container_of(gdev, VubDev, parent);
|
||||
memcpy(config, &vdev_blk->blkcfg, len);
|
||||
|
@ -558,7 +558,7 @@ static void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val)
|
||||
if (val & ACPI_BITMASK_SLEEP_ENABLE) {
|
||||
/* change suspend type */
|
||||
uint16_t sus_typ = (val >> 10) & 7;
|
||||
switch(sus_typ) {
|
||||
switch (sus_typ) {
|
||||
case 0: /* soft power off */
|
||||
qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
|
||||
break;
|
||||
|
@ -556,7 +556,7 @@ static void nvdimm_dsm_func_read_fit(NVDIMMState *state, NvdimmDsmIn *in,
|
||||
|
||||
fit = fit_buf->fit;
|
||||
|
||||
nvdimm_debug("Read FIT: offset %#x FIT size %#x Dirty %s.\n",
|
||||
nvdimm_debug("Read FIT: offset 0x%x FIT size 0x%x Dirty %s.\n",
|
||||
read_fit->offset, fit->len, fit_buf->dirty ? "Yes" : "No");
|
||||
|
||||
if (read_fit->offset > fit->len) {
|
||||
@ -664,7 +664,7 @@ static void nvdimm_dsm_label_size(NVDIMMDevice *nvdimm, hwaddr dsm_mem_addr)
|
||||
label_size = nvdimm->label_size;
|
||||
mxfer = nvdimm_get_max_xfer_label_size();
|
||||
|
||||
nvdimm_debug("label_size %#x, max_xfer %#x.\n", label_size, mxfer);
|
||||
nvdimm_debug("label_size 0x%x, max_xfer 0x%x.\n", label_size, mxfer);
|
||||
|
||||
label_size_out.func_ret_status = cpu_to_le32(NVDIMM_DSM_RET_STATUS_SUCCESS);
|
||||
label_size_out.label_size = cpu_to_le32(label_size);
|
||||
@ -680,19 +680,19 @@ static uint32_t nvdimm_rw_label_data_check(NVDIMMDevice *nvdimm,
|
||||
uint32_t ret = NVDIMM_DSM_RET_STATUS_INVALID;
|
||||
|
||||
if (offset + length < offset) {
|
||||
nvdimm_debug("offset %#x + length %#x is overflow.\n", offset,
|
||||
nvdimm_debug("offset 0x%x + length 0x%x is overflow.\n", offset,
|
||||
length);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (nvdimm->label_size < offset + length) {
|
||||
nvdimm_debug("position %#x is beyond label data (len = %" PRIx64 ").\n",
|
||||
nvdimm_debug("position 0x%x is beyond label data (len = %" PRIx64 ").\n",
|
||||
offset + length, nvdimm->label_size);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (length > nvdimm_get_max_xfer_label_size()) {
|
||||
nvdimm_debug("length (%#x) is larger than max_xfer (%#x).\n",
|
||||
nvdimm_debug("length (0x%x) is larger than max_xfer (0x%x).\n",
|
||||
length, nvdimm_get_max_xfer_label_size());
|
||||
return ret;
|
||||
}
|
||||
@ -716,7 +716,7 @@ static void nvdimm_dsm_get_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in,
|
||||
get_label_data->offset = le32_to_cpu(get_label_data->offset);
|
||||
get_label_data->length = le32_to_cpu(get_label_data->length);
|
||||
|
||||
nvdimm_debug("Read Label Data: offset %#x length %#x.\n",
|
||||
nvdimm_debug("Read Label Data: offset 0x%x length 0x%x.\n",
|
||||
get_label_data->offset, get_label_data->length);
|
||||
|
||||
status = nvdimm_rw_label_data_check(nvdimm, get_label_data->offset,
|
||||
@ -755,7 +755,7 @@ static void nvdimm_dsm_set_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in,
|
||||
set_label_data->offset = le32_to_cpu(set_label_data->offset);
|
||||
set_label_data->length = le32_to_cpu(set_label_data->length);
|
||||
|
||||
nvdimm_debug("Write Label Data: offset %#x length %#x.\n",
|
||||
nvdimm_debug("Write Label Data: offset 0x%x length 0x%x.\n",
|
||||
set_label_data->offset, set_label_data->length);
|
||||
|
||||
status = nvdimm_rw_label_data_check(nvdimm, set_label_data->offset,
|
||||
@ -838,7 +838,7 @@ nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
|
||||
NvdimmDsmIn *in;
|
||||
hwaddr dsm_mem_addr = val;
|
||||
|
||||
nvdimm_debug("dsm memory address %#" HWADDR_PRIx ".\n", dsm_mem_addr);
|
||||
nvdimm_debug("dsm memory address 0x%" HWADDR_PRIx ".\n", dsm_mem_addr);
|
||||
|
||||
/*
|
||||
* The DSM memory is mapped to guest address space so an evil guest
|
||||
@ -852,11 +852,11 @@ nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
|
||||
in->function = le32_to_cpu(in->function);
|
||||
in->handle = le32_to_cpu(in->handle);
|
||||
|
||||
nvdimm_debug("Revision %#x Handler %#x Function %#x.\n", in->revision,
|
||||
nvdimm_debug("Revision 0x%x Handler 0x%x Function 0x%x.\n", in->revision,
|
||||
in->handle, in->function);
|
||||
|
||||
if (in->revision != 0x1 /* Currently we only support DSM Spec Rev1. */) {
|
||||
nvdimm_debug("Revision %#x is not supported, expect %#x.\n",
|
||||
nvdimm_debug("Revision 0x%x is not supported, expect 0x%x.\n",
|
||||
in->revision, 0x1);
|
||||
nvdimm_dsm_no_payload(NVDIMM_DSM_RET_STATUS_UNSUPPORT, dsm_mem_addr);
|
||||
goto exit;
|
||||
|
@ -400,7 +400,7 @@ void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus,
|
||||
s->io_len = ACPI_PCIHP_SIZE;
|
||||
s->io_base = ACPI_PCIHP_ADDR;
|
||||
|
||||
s->root= root_bus;
|
||||
s->root = root_bus;
|
||||
s->legacy_piix = !bridges_enabled;
|
||||
|
||||
memory_region_init_io(&s->io, owner, &acpi_pcihp_io_ops, s,
|
||||
|
@ -131,7 +131,7 @@ static int vhost_user_blk_start(VirtIODevice *vdev)
|
||||
|
||||
s->dev.acked_features = vdev->guest_features;
|
||||
|
||||
ret = vhost_dev_prepare_inflight(&s->dev);
|
||||
ret = vhost_dev_prepare_inflight(&s->dev, vdev);
|
||||
if (ret < 0) {
|
||||
error_report("Error set inflight format: %d", -ret);
|
||||
goto err_guest_notifiers;
|
||||
|
@ -1149,10 +1149,11 @@ void pc_basic_device_init(struct PCMachineState *pcms,
|
||||
error_report("couldn't create HPET device");
|
||||
exit(1);
|
||||
}
|
||||
/* For pc-piix-*, hpet's intcap is always IRQ2. For pc-q35-1.7
|
||||
* and earlier, use IRQ2 for compat. Otherwise, use IRQ16~23,
|
||||
* IRQ8 and IRQ2.
|
||||
*/
|
||||
/*
|
||||
* For pc-piix-*, hpet's intcap is always IRQ2. For pc-q35-1.7 and
|
||||
* earlier, use IRQ2 for compat. Otherwise, use IRQ16~23, IRQ8 and
|
||||
* IRQ2.
|
||||
*/
|
||||
uint8_t compat = object_property_get_uint(OBJECT(hpet),
|
||||
HPET_INTCAP, NULL);
|
||||
if (!compat) {
|
||||
|
@ -119,9 +119,10 @@ static uint64_t memory_device_get_free_addr(MachineState *ms,
|
||||
|
||||
/* start of address space indicates the maximum alignment we expect */
|
||||
if (!QEMU_IS_ALIGNED(range_lob(&as), align)) {
|
||||
error_setg(errp, "the alignment (0x%" PRIx64 ") is not supported",
|
||||
align);
|
||||
return 0;
|
||||
warn_report("the alignment (0x%" PRIx64 ") exceeds the expected"
|
||||
" maximum alignment, memory will get fragmented and not"
|
||||
" all 'maxmem' might be usable for memory devices.",
|
||||
align);
|
||||
}
|
||||
|
||||
memory_device_check_addable(ms, size, &err);
|
||||
@ -151,7 +152,7 @@ static uint64_t memory_device_get_free_addr(MachineState *ms,
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
if (range_init(&new, range_lob(&as), size)) {
|
||||
if (range_init(&new, QEMU_ALIGN_UP(range_lob(&as), align), size)) {
|
||||
error_setg(errp, "can't add memory device, device too big");
|
||||
return 0;
|
||||
}
|
||||
@ -258,7 +259,7 @@ void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms,
|
||||
{
|
||||
const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
|
||||
Error *local_err = NULL;
|
||||
uint64_t addr, align;
|
||||
uint64_t addr, align = 0;
|
||||
MemoryRegion *mr;
|
||||
|
||||
mr = mdc->get_memory_region(md, &local_err);
|
||||
@ -266,7 +267,14 @@ void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms,
|
||||
goto out;
|
||||
}
|
||||
|
||||
align = legacy_align ? *legacy_align : memory_region_get_alignment(mr);
|
||||
if (legacy_align) {
|
||||
align = *legacy_align;
|
||||
} else {
|
||||
if (mdc->get_min_alignment) {
|
||||
align = mdc->get_min_alignment(md);
|
||||
}
|
||||
align = MAX(align, memory_region_get_alignment(mr));
|
||||
}
|
||||
addr = mdc->get_addr(md);
|
||||
addr = memory_device_get_free_addr(ms, !addr ? NULL : &addr, align,
|
||||
memory_region_size(mr), &local_err);
|
||||
|
@ -988,16 +988,18 @@ static int save_opt_one(void *opaque,
|
||||
if (ret < 0) {
|
||||
error_setg(errp, "Unable to read from %s: %s",
|
||||
value, strerror(errno));
|
||||
qemu_close(fd);
|
||||
return -1;
|
||||
}
|
||||
if (memchr(buf, '\0', ret)) {
|
||||
error_setg(errp, "NUL in OEM strings value in %s", value);
|
||||
qemu_close(fd);
|
||||
return -1;
|
||||
}
|
||||
g_byte_array_append(data, (guint8 *)buf, ret);
|
||||
}
|
||||
|
||||
close(fd);
|
||||
qemu_close(fd);
|
||||
|
||||
*opt->dest = g_renew(char *, *opt->dest, (*opt->ndest) + 1);
|
||||
(*opt->dest)[*opt->ndest] = (char *)g_byte_array_free(data, FALSE);
|
||||
|
@ -789,6 +789,14 @@ static void vfio_listener_region_add(MemoryListener *listener,
|
||||
int128_get64(llend),
|
||||
iommu_idx);
|
||||
|
||||
ret = memory_region_iommu_set_page_size_mask(giommu->iommu,
|
||||
container->pgsizes,
|
||||
&err);
|
||||
if (ret) {
|
||||
g_free(giommu);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
ret = memory_region_register_iommu_notifier(section->mr, &giommu->n,
|
||||
&err);
|
||||
if (ret) {
|
||||
@ -942,6 +950,17 @@ static void vfio_listener_region_del(MemoryListener *listener,
|
||||
}
|
||||
|
||||
if (try_unmap) {
|
||||
if (int128_eq(llsize, int128_2_64())) {
|
||||
/* The unmap ioctl doesn't accept a full 64-bit span. */
|
||||
llsize = int128_rshift(llsize, 1);
|
||||
ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
|
||||
if (ret) {
|
||||
error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
|
||||
"0x%"HWADDR_PRIx") = %d (%m)",
|
||||
container, iova, int128_get64(llsize), ret);
|
||||
}
|
||||
iova += int128_get64(llsize);
|
||||
}
|
||||
ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
|
||||
if (ret) {
|
||||
error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
|
||||
|
@ -106,6 +106,12 @@ virtio_iommu_put_domain(uint32_t domain_id) "Free domain=%d"
|
||||
virtio_iommu_translate_out(uint64_t virt_addr, uint64_t phys_addr, uint32_t sid) "0x%"PRIx64" -> 0x%"PRIx64 " for sid=%d"
|
||||
virtio_iommu_report_fault(uint8_t reason, uint32_t flags, uint32_t endpoint, uint64_t addr) "FAULT reason=%d flags=%d endpoint=%d address =0x%"PRIx64
|
||||
virtio_iommu_fill_resv_property(uint32_t devid, uint8_t subtype, uint64_t start, uint64_t end) "dev= %d, type=%d start=0x%"PRIx64" end=0x%"PRIx64
|
||||
virtio_iommu_notify_map(const char *name, uint64_t virt_start, uint64_t virt_end, uint64_t phys_start, uint32_t flags) "mr=%s virt_start=0x%"PRIx64" virt_end=0x%"PRIx64" phys_start=0x%"PRIx64" flags=%d"
|
||||
virtio_iommu_notify_unmap(const char *name, uint64_t virt_start, uint64_t virt_end) "mr=%s virt_start=0x%"PRIx64" virt_end=0x%"PRIx64
|
||||
virtio_iommu_remap(const char *name, uint64_t virt_start, uint64_t virt_end, uint64_t phys_start) "mr=%s virt_start=0x%"PRIx64" virt_end=0x%"PRIx64" phys_start=0x%"PRIx64
|
||||
virtio_iommu_set_page_size_mask(const char *name, uint64_t old, uint64_t new) "mr=%s old_mask=0x%"PRIx64" new_mask=0x%"PRIx64
|
||||
virtio_iommu_notify_flag_add(const char *name) "add notifier to mr %s"
|
||||
virtio_iommu_notify_flag_del(const char *name) "del notifier from mr %s"
|
||||
|
||||
# virtio-mem.c
|
||||
virtio_mem_send_response(uint16_t type) "type=%" PRIu16
|
||||
|
@ -257,7 +257,7 @@ static int vhost_kernel_send_device_iotlb_msg(struct vhost_dev *dev,
|
||||
struct vhost_iotlb_msg *imsg)
|
||||
{
|
||||
if (dev->backend_cap & (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)) {
|
||||
struct vhost_msg_v2 msg;
|
||||
struct vhost_msg_v2 msg = {};
|
||||
|
||||
msg.type = VHOST_IOTLB_MSG_V2;
|
||||
msg.iotlb = *imsg;
|
||||
@ -267,7 +267,7 @@ static int vhost_kernel_send_device_iotlb_msg(struct vhost_dev *dev,
|
||||
return -EFAULT;
|
||||
}
|
||||
} else {
|
||||
struct vhost_msg msg;
|
||||
struct vhost_msg msg = {};
|
||||
|
||||
msg.type = VHOST_IOTLB_MSG;
|
||||
msg.iotlb = *imsg;
|
||||
|
@ -1645,15 +1645,17 @@ int vhost_dev_load_inflight(struct vhost_inflight *inflight, QEMUFile *f)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vhost_dev_prepare_inflight(struct vhost_dev *hdev)
|
||||
int vhost_dev_prepare_inflight(struct vhost_dev *hdev, VirtIODevice *vdev)
|
||||
{
|
||||
int r;
|
||||
|
||||
|
||||
if (hdev->vhost_ops->vhost_get_inflight_fd == NULL ||
|
||||
hdev->vhost_ops->vhost_set_inflight_fd == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
hdev->vdev = vdev;
|
||||
|
||||
r = vhost_dev_set_features(hdev, hdev->log_enabled);
|
||||
if (r < 0) {
|
||||
VHOST_OPS_DEBUG("vhost_dev_prepare_inflight failed");
|
||||
|
@ -49,6 +49,7 @@ typedef struct VirtIOIOMMUDomain {
|
||||
typedef struct VirtIOIOMMUEndpoint {
|
||||
uint32_t id;
|
||||
VirtIOIOMMUDomain *domain;
|
||||
IOMMUMemoryRegion *iommu_mr;
|
||||
QLIST_ENTRY(VirtIOIOMMUEndpoint) next;
|
||||
} VirtIOIOMMUEndpoint;
|
||||
|
||||
@ -101,7 +102,7 @@ static IOMMUMemoryRegion *virtio_iommu_mr(VirtIOIOMMU *s, uint32_t sid)
|
||||
bus_n = PCI_BUS_NUM(sid);
|
||||
iommu_pci_bus = iommu_find_iommu_pcibus(s, bus_n);
|
||||
if (iommu_pci_bus) {
|
||||
devfn = sid & PCI_DEVFN_MAX;
|
||||
devfn = sid & (PCI_DEVFN_MAX - 1);
|
||||
dev = iommu_pci_bus->pbdev[devfn];
|
||||
if (dev) {
|
||||
return &dev->iommu_mr;
|
||||
@ -124,11 +125,84 @@ static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
|
||||
}
|
||||
}
|
||||
|
||||
static void virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr virt_start,
|
||||
hwaddr virt_end, hwaddr paddr,
|
||||
uint32_t flags)
|
||||
{
|
||||
IOMMUTLBEntry entry;
|
||||
IOMMUAccessFlags perm = IOMMU_ACCESS_FLAG(flags & VIRTIO_IOMMU_MAP_F_READ,
|
||||
flags & VIRTIO_IOMMU_MAP_F_WRITE);
|
||||
|
||||
if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_MAP) ||
|
||||
(flags & VIRTIO_IOMMU_MAP_F_MMIO) || !perm) {
|
||||
return;
|
||||
}
|
||||
|
||||
trace_virtio_iommu_notify_map(mr->parent_obj.name, virt_start, virt_end,
|
||||
paddr, perm);
|
||||
|
||||
entry.target_as = &address_space_memory;
|
||||
entry.addr_mask = virt_end - virt_start;
|
||||
entry.iova = virt_start;
|
||||
entry.perm = perm;
|
||||
entry.translated_addr = paddr;
|
||||
|
||||
memory_region_notify_iommu(mr, 0, entry);
|
||||
}
|
||||
|
||||
static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr virt_start,
|
||||
hwaddr virt_end)
|
||||
{
|
||||
IOMMUTLBEntry entry;
|
||||
|
||||
if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_UNMAP)) {
|
||||
return;
|
||||
}
|
||||
|
||||
trace_virtio_iommu_notify_unmap(mr->parent_obj.name, virt_start, virt_end);
|
||||
|
||||
entry.target_as = &address_space_memory;
|
||||
entry.addr_mask = virt_end - virt_start;
|
||||
entry.iova = virt_start;
|
||||
entry.perm = IOMMU_NONE;
|
||||
entry.translated_addr = 0;
|
||||
|
||||
memory_region_notify_iommu(mr, 0, entry);
|
||||
}
|
||||
|
||||
static gboolean virtio_iommu_notify_unmap_cb(gpointer key, gpointer value,
|
||||
gpointer data)
|
||||
{
|
||||
VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
|
||||
IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
|
||||
|
||||
virtio_iommu_notify_unmap(mr, interval->low, interval->high);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static gboolean virtio_iommu_notify_map_cb(gpointer key, gpointer value,
|
||||
gpointer data)
|
||||
{
|
||||
VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value;
|
||||
VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
|
||||
IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
|
||||
|
||||
virtio_iommu_notify_map(mr, interval->low, interval->high,
|
||||
mapping->phys_addr, mapping->flags);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep)
|
||||
{
|
||||
VirtIOIOMMUDomain *domain = ep->domain;
|
||||
|
||||
if (!ep->domain) {
|
||||
return;
|
||||
}
|
||||
g_tree_foreach(domain->mappings, virtio_iommu_notify_unmap_cb,
|
||||
ep->iommu_mr);
|
||||
QLIST_REMOVE(ep, next);
|
||||
ep->domain = NULL;
|
||||
}
|
||||
@ -137,16 +211,19 @@ static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
|
||||
uint32_t ep_id)
|
||||
{
|
||||
VirtIOIOMMUEndpoint *ep;
|
||||
IOMMUMemoryRegion *mr;
|
||||
|
||||
ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id));
|
||||
if (ep) {
|
||||
return ep;
|
||||
}
|
||||
if (!virtio_iommu_mr(s, ep_id)) {
|
||||
mr = virtio_iommu_mr(s, ep_id);
|
||||
if (!mr) {
|
||||
return NULL;
|
||||
}
|
||||
ep = g_malloc0(sizeof(*ep));
|
||||
ep->id = ep_id;
|
||||
ep->iommu_mr = mr;
|
||||
trace_virtio_iommu_get_endpoint(ep_id);
|
||||
g_tree_insert(s->endpoints, GUINT_TO_POINTER(ep_id), ep);
|
||||
return ep;
|
||||
@ -268,6 +345,10 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
|
||||
|
||||
ep->domain = domain;
|
||||
|
||||
/* Replay domain mappings on the associated memory region */
|
||||
g_tree_foreach(domain->mappings, virtio_iommu_notify_map_cb,
|
||||
ep->iommu_mr);
|
||||
|
||||
return VIRTIO_IOMMU_S_OK;
|
||||
}
|
||||
|
||||
@ -311,6 +392,7 @@ static int virtio_iommu_map(VirtIOIOMMU *s,
|
||||
VirtIOIOMMUDomain *domain;
|
||||
VirtIOIOMMUInterval *interval;
|
||||
VirtIOIOMMUMapping *mapping;
|
||||
VirtIOIOMMUEndpoint *ep;
|
||||
|
||||
if (flags & ~VIRTIO_IOMMU_MAP_F_MASK) {
|
||||
return VIRTIO_IOMMU_S_INVAL;
|
||||
@ -340,6 +422,11 @@ static int virtio_iommu_map(VirtIOIOMMU *s,
|
||||
|
||||
g_tree_insert(domain->mappings, interval, mapping);
|
||||
|
||||
QLIST_FOREACH(ep, &domain->endpoint_list, next) {
|
||||
virtio_iommu_notify_map(ep->iommu_mr, virt_start, virt_end, phys_start,
|
||||
flags);
|
||||
}
|
||||
|
||||
return VIRTIO_IOMMU_S_OK;
|
||||
}
|
||||
|
||||
@ -352,6 +439,7 @@ static int virtio_iommu_unmap(VirtIOIOMMU *s,
|
||||
VirtIOIOMMUMapping *iter_val;
|
||||
VirtIOIOMMUInterval interval, *iter_key;
|
||||
VirtIOIOMMUDomain *domain;
|
||||
VirtIOIOMMUEndpoint *ep;
|
||||
int ret = VIRTIO_IOMMU_S_OK;
|
||||
|
||||
trace_virtio_iommu_unmap(domain_id, virt_start, virt_end);
|
||||
@ -369,6 +457,10 @@ static int virtio_iommu_unmap(VirtIOIOMMU *s,
|
||||
uint64_t current_high = iter_key->high;
|
||||
|
||||
if (interval.low <= current_low && interval.high >= current_high) {
|
||||
QLIST_FOREACH(ep, &domain->endpoint_list, next) {
|
||||
virtio_iommu_notify_unmap(ep->iommu_mr, current_low,
|
||||
current_high);
|
||||
}
|
||||
g_tree_remove(domain->mappings, iter_key);
|
||||
trace_virtio_iommu_unmap_done(domain_id, current_low, current_high);
|
||||
} else {
|
||||
@ -755,6 +847,107 @@ static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
|
||||
return (ua > ub) - (ua < ub);
|
||||
}
|
||||
|
||||
static gboolean virtio_iommu_remap(gpointer key, gpointer value, gpointer data)
|
||||
{
|
||||
VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value;
|
||||
VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
|
||||
IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
|
||||
|
||||
trace_virtio_iommu_remap(mr->parent_obj.name, interval->low, interval->high,
|
||||
mapping->phys_addr);
|
||||
virtio_iommu_notify_map(mr, interval->low, interval->high,
|
||||
mapping->phys_addr, mapping->flags);
|
||||
return false;
|
||||
}
|
||||
|
||||
static void virtio_iommu_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n)
|
||||
{
|
||||
IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
|
||||
VirtIOIOMMU *s = sdev->viommu;
|
||||
uint32_t sid;
|
||||
VirtIOIOMMUEndpoint *ep;
|
||||
|
||||
sid = virtio_iommu_get_bdf(sdev);
|
||||
|
||||
qemu_mutex_lock(&s->mutex);
|
||||
|
||||
if (!s->endpoints) {
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
|
||||
if (!ep || !ep->domain) {
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
g_tree_foreach(ep->domain->mappings, virtio_iommu_remap, mr);
|
||||
|
||||
unlock:
|
||||
qemu_mutex_unlock(&s->mutex);
|
||||
}
|
||||
|
||||
static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr,
|
||||
IOMMUNotifierFlag old,
|
||||
IOMMUNotifierFlag new,
|
||||
Error **errp)
|
||||
{
|
||||
if (old == IOMMU_NOTIFIER_NONE) {
|
||||
trace_virtio_iommu_notify_flag_add(iommu_mr->parent_obj.name);
|
||||
} else if (new == IOMMU_NOTIFIER_NONE) {
|
||||
trace_virtio_iommu_notify_flag_del(iommu_mr->parent_obj.name);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The default mask (TARGET_PAGE_MASK) is the smallest supported guest granule,
|
||||
* for example 0xfffffffffffff000. When an assigned device has page size
|
||||
* restrictions due to the hardware IOMMU configuration, apply this restriction
|
||||
* to the mask.
|
||||
*/
|
||||
static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr,
|
||||
uint64_t new_mask,
|
||||
Error **errp)
|
||||
{
|
||||
IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
|
||||
VirtIOIOMMU *s = sdev->viommu;
|
||||
uint64_t cur_mask = s->config.page_size_mask;
|
||||
|
||||
trace_virtio_iommu_set_page_size_mask(mr->parent_obj.name, cur_mask,
|
||||
new_mask);
|
||||
|
||||
if ((cur_mask & new_mask) == 0) {
|
||||
error_setg(errp, "virtio-iommu page mask 0x%"PRIx64
|
||||
" is incompatible with mask 0x%"PRIx64, cur_mask, new_mask);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* After the machine is finalized, we can't change the mask anymore. If by
|
||||
* chance the hotplugged device supports the same granule, we can still
|
||||
* accept it. Having a different masks is possible but the guest will use
|
||||
* sub-optimal block sizes, so warn about it.
|
||||
*/
|
||||
if (qdev_hotplug) {
|
||||
int new_granule = ctz64(new_mask);
|
||||
int cur_granule = ctz64(cur_mask);
|
||||
|
||||
if (new_granule != cur_granule) {
|
||||
error_setg(errp, "virtio-iommu page mask 0x%"PRIx64
|
||||
" is incompatible with mask 0x%"PRIx64, cur_mask,
|
||||
new_mask);
|
||||
return -1;
|
||||
} else if (new_mask != cur_mask) {
|
||||
warn_report("virtio-iommu page mask 0x%"PRIx64
|
||||
" does not match 0x%"PRIx64, cur_mask, new_mask);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
s->config.page_size_mask &= new_mask;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
|
||||
{
|
||||
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
|
||||
@ -910,9 +1103,14 @@ static gboolean reconstruct_endpoints(gpointer key, gpointer value,
|
||||
VirtIOIOMMU *s = (VirtIOIOMMU *)data;
|
||||
VirtIOIOMMUDomain *d = (VirtIOIOMMUDomain *)value;
|
||||
VirtIOIOMMUEndpoint *iter;
|
||||
IOMMUMemoryRegion *mr;
|
||||
|
||||
QLIST_FOREACH(iter, &d->endpoint_list, next) {
|
||||
mr = virtio_iommu_mr(s, iter->id);
|
||||
assert(mr);
|
||||
|
||||
iter->domain = d;
|
||||
iter->iommu_mr = mr;
|
||||
g_tree_insert(s->endpoints, GUINT_TO_POINTER(iter->id), iter);
|
||||
}
|
||||
return false; /* continue the domain traversal */
|
||||
@ -979,6 +1177,9 @@ static void virtio_iommu_memory_region_class_init(ObjectClass *klass,
|
||||
IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
|
||||
|
||||
imrc->translate = virtio_iommu_translate;
|
||||
imrc->replay = virtio_iommu_replay;
|
||||
imrc->notify_flag_changed = virtio_iommu_notify_flag_changed;
|
||||
imrc->iommu_set_page_size_mask = virtio_iommu_set_page_size_mask;
|
||||
}
|
||||
|
||||
static const TypeInfo virtio_iommu_info = {
|
||||
|
@ -76,6 +76,12 @@ static void virtio_mem_pci_fill_device_info(const MemoryDeviceState *md,
|
||||
info->type = MEMORY_DEVICE_INFO_KIND_VIRTIO_MEM;
|
||||
}
|
||||
|
||||
static uint64_t virtio_mem_pci_get_min_alignment(const MemoryDeviceState *md)
|
||||
{
|
||||
return object_property_get_uint(OBJECT(md), VIRTIO_MEM_BLOCK_SIZE_PROP,
|
||||
&error_abort);
|
||||
}
|
||||
|
||||
static void virtio_mem_pci_size_change_notify(Notifier *notifier, void *data)
|
||||
{
|
||||
VirtIOMEMPCI *pci_mem = container_of(notifier, VirtIOMEMPCI,
|
||||
@ -110,6 +116,7 @@ static void virtio_mem_pci_class_init(ObjectClass *klass, void *data)
|
||||
mdc->get_plugged_size = virtio_mem_pci_get_plugged_size;
|
||||
mdc->get_memory_region = virtio_mem_pci_get_memory_region;
|
||||
mdc->fill_device_info = virtio_mem_pci_fill_device_info;
|
||||
mdc->get_min_alignment = virtio_mem_pci_get_min_alignment;
|
||||
}
|
||||
|
||||
static void virtio_mem_pci_instance_init(Object *obj)
|
||||
|
@ -33,10 +33,83 @@
|
||||
#include "trace.h"
|
||||
|
||||
/*
|
||||
* Use QEMU_VMALLOC_ALIGN, so no THP will have to be split when unplugging
|
||||
* memory (e.g., 2MB on x86_64).
|
||||
* Let's not allow blocks smaller than 1 MiB, for example, to keep the tracking
|
||||
* bitmap small.
|
||||
*/
|
||||
#define VIRTIO_MEM_MIN_BLOCK_SIZE ((uint32_t)QEMU_VMALLOC_ALIGN)
|
||||
#define VIRTIO_MEM_MIN_BLOCK_SIZE ((uint32_t)(1 * MiB))
|
||||
|
||||
#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || \
|
||||
defined(__powerpc64__)
|
||||
#define VIRTIO_MEM_DEFAULT_THP_SIZE ((uint32_t)(2 * MiB))
|
||||
#else
|
||||
/* fallback to 1 MiB (e.g., the THP size on s390x) */
|
||||
#define VIRTIO_MEM_DEFAULT_THP_SIZE VIRTIO_MEM_MIN_BLOCK_SIZE
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We want to have a reasonable default block size such that
|
||||
* 1. We avoid splitting THPs when unplugging memory, which degrades
|
||||
* performance.
|
||||
* 2. We avoid placing THPs for plugged blocks that also cover unplugged
|
||||
* blocks.
|
||||
*
|
||||
* The actual THP size might differ between Linux kernels, so we try to probe
|
||||
* it. In the future (if we ever run into issues regarding 2.), we might want
|
||||
* to disable THP in case we fail to properly probe the THP size, or if the
|
||||
* block size is configured smaller than the THP size.
|
||||
*/
|
||||
static uint32_t thp_size;
|
||||
|
||||
#define HPAGE_PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
|
||||
static uint32_t virtio_mem_thp_size(void)
|
||||
{
|
||||
gchar *content = NULL;
|
||||
const char *endptr;
|
||||
uint64_t tmp;
|
||||
|
||||
if (thp_size) {
|
||||
return thp_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to probe the actual THP size, fallback to (sane but eventually
|
||||
* incorrect) default sizes.
|
||||
*/
|
||||
if (g_file_get_contents(HPAGE_PMD_SIZE_PATH, &content, NULL, NULL) &&
|
||||
!qemu_strtou64(content, &endptr, 0, &tmp) &&
|
||||
(!endptr || *endptr == '\n')) {
|
||||
/*
|
||||
* Sanity-check the value, if it's too big (e.g., aarch64 with 64k base
|
||||
* pages) or weird, fallback to something smaller.
|
||||
*/
|
||||
if (!tmp || !is_power_of_2(tmp) || tmp > 16 * MiB) {
|
||||
warn_report("Read unsupported THP size: %" PRIx64, tmp);
|
||||
} else {
|
||||
thp_size = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
if (!thp_size) {
|
||||
thp_size = VIRTIO_MEM_DEFAULT_THP_SIZE;
|
||||
warn_report("Could not detect THP size, falling back to %" PRIx64
|
||||
" MiB.", thp_size / MiB);
|
||||
}
|
||||
|
||||
g_free(content);
|
||||
return thp_size;
|
||||
}
|
||||
|
||||
static uint64_t virtio_mem_default_block_size(RAMBlock *rb)
|
||||
{
|
||||
const uint64_t page_size = qemu_ram_pagesize(rb);
|
||||
|
||||
/* We can have hugetlbfs with a page size smaller than the THP size. */
|
||||
if (page_size == qemu_real_host_page_size) {
|
||||
return MAX(page_size, virtio_mem_thp_size());
|
||||
}
|
||||
return MAX(page_size, VIRTIO_MEM_MIN_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Size the usable region bigger than the requested size if possible. Esp.
|
||||
* Linux guests will only add (aligned) memory blocks in case they fully
|
||||
@ -227,6 +300,9 @@ static void virtio_mem_resize_usable_region(VirtIOMEM *vmem,
|
||||
uint64_t newsize = MIN(memory_region_size(&vmem->memdev->mr),
|
||||
requested_size + VIRTIO_MEM_USABLE_EXTENT);
|
||||
|
||||
/* The usable region size always has to be multiples of the block size. */
|
||||
newsize = QEMU_ALIGN_UP(newsize, vmem->block_size);
|
||||
|
||||
if (!requested_size) {
|
||||
newsize = 0;
|
||||
}
|
||||
@ -440,15 +516,33 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
|
||||
rb = vmem->memdev->mr.ram_block;
|
||||
page_size = qemu_ram_pagesize(rb);
|
||||
|
||||
/*
|
||||
* If the block size wasn't configured by the user, use a sane default. This
|
||||
* allows using hugetlbfs backends of any page size without manual
|
||||
* intervention.
|
||||
*/
|
||||
if (!vmem->block_size) {
|
||||
vmem->block_size = virtio_mem_default_block_size(rb);
|
||||
}
|
||||
|
||||
if (vmem->block_size < page_size) {
|
||||
error_setg(errp, "'%s' property has to be at least the page size (0x%"
|
||||
PRIx64 ")", VIRTIO_MEM_BLOCK_SIZE_PROP, page_size);
|
||||
return;
|
||||
} else if (vmem->block_size < virtio_mem_default_block_size(rb)) {
|
||||
warn_report("'%s' property is smaller than the default block size (%"
|
||||
PRIx64 " MiB)", VIRTIO_MEM_BLOCK_SIZE_PROP,
|
||||
virtio_mem_default_block_size(rb) / MiB);
|
||||
} else if (!QEMU_IS_ALIGNED(vmem->requested_size, vmem->block_size)) {
|
||||
error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64
|
||||
")", VIRTIO_MEM_REQUESTED_SIZE_PROP,
|
||||
VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size);
|
||||
return;
|
||||
} else if (!QEMU_IS_ALIGNED(vmem->addr, vmem->block_size)) {
|
||||
error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64
|
||||
")", VIRTIO_MEM_ADDR_PROP, VIRTIO_MEM_BLOCK_SIZE_PROP,
|
||||
vmem->block_size);
|
||||
return;
|
||||
} else if (!QEMU_IS_ALIGNED(memory_region_size(&vmem->memdev->mr),
|
||||
vmem->block_size)) {
|
||||
error_setg(errp, "'%s' property memdev size has to be multiples of"
|
||||
@ -734,6 +828,18 @@ static void virtio_mem_get_block_size(Object *obj, Visitor *v, const char *name,
|
||||
const VirtIOMEM *vmem = VIRTIO_MEM(obj);
|
||||
uint64_t value = vmem->block_size;
|
||||
|
||||
/*
|
||||
* If not configured by the user (and we're not realized yet), use the
|
||||
* default block size we would use with the current memory backend.
|
||||
*/
|
||||
if (!value) {
|
||||
if (vmem->memdev && memory_region_is_ram(&vmem->memdev->mr)) {
|
||||
value = virtio_mem_default_block_size(vmem->memdev->mr.ram_block);
|
||||
} else {
|
||||
value = virtio_mem_thp_size();
|
||||
}
|
||||
}
|
||||
|
||||
visit_type_size(v, name, &value, errp);
|
||||
}
|
||||
|
||||
@ -813,7 +919,6 @@ static void virtio_mem_instance_init(Object *obj)
|
||||
{
|
||||
VirtIOMEM *vmem = VIRTIO_MEM(obj);
|
||||
|
||||
vmem->block_size = VIRTIO_MEM_MIN_BLOCK_SIZE;
|
||||
notifier_list_init(&vmem->size_change_notifiers);
|
||||
vmem->precopy_notifier.notify = virtio_mem_precopy_notify;
|
||||
|
||||
|
@ -397,6 +397,32 @@ struct IOMMUMemoryRegionClass {
|
||||
* @iommu: the IOMMUMemoryRegion
|
||||
*/
|
||||
int (*num_indexes)(IOMMUMemoryRegion *iommu);
|
||||
|
||||
/**
|
||||
* @iommu_set_page_size_mask:
|
||||
*
|
||||
* Restrict the page size mask that can be supported with a given IOMMU
|
||||
* memory region. Used for example to propagate host physical IOMMU page
|
||||
* size mask limitations to the virtual IOMMU.
|
||||
*
|
||||
* Optional method: if this method is not provided, then the default global
|
||||
* page mask is used.
|
||||
*
|
||||
* @iommu: the IOMMUMemoryRegion
|
||||
*
|
||||
* @page_size_mask: a bitmask of supported page sizes. At least one bit,
|
||||
* representing the smallest page size, must be set. Additional set bits
|
||||
* represent supported block sizes. For example a host physical IOMMU that
|
||||
* uses page tables with a page size of 4kB, and supports 2MB and 4GB
|
||||
* blocks, will set mask 0x40201000. A granule of 4kB with indiscriminate
|
||||
* block sizes is specified with mask 0xfffffffffffff000.
|
||||
*
|
||||
* Returns 0 on success, or a negative error. In case of failure, the error
|
||||
* object must be created.
|
||||
*/
|
||||
int (*iommu_set_page_size_mask)(IOMMUMemoryRegion *iommu,
|
||||
uint64_t page_size_mask,
|
||||
Error **errp);
|
||||
};
|
||||
|
||||
typedef struct CoalescedMemoryRange CoalescedMemoryRange;
|
||||
@ -1409,6 +1435,18 @@ int memory_region_iommu_attrs_to_index(IOMMUMemoryRegion *iommu_mr,
|
||||
*/
|
||||
int memory_region_iommu_num_indexes(IOMMUMemoryRegion *iommu_mr);
|
||||
|
||||
/**
|
||||
* memory_region_iommu_set_page_size_mask: set the supported page
|
||||
* sizes for a given IOMMU memory region
|
||||
*
|
||||
* @iommu_mr: IOMMU memory region
|
||||
* @page_size_mask: supported page size mask
|
||||
* @errp: pointer to Error*, to store an error if it happens.
|
||||
*/
|
||||
int memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr,
|
||||
uint64_t page_size_mask,
|
||||
Error **errp);
|
||||
|
||||
/**
|
||||
* memory_region_name: get a memory region's name
|
||||
*
|
||||
|
@ -88,6 +88,16 @@ struct MemoryDeviceClass {
|
||||
*/
|
||||
MemoryRegion *(*get_memory_region)(MemoryDeviceState *md, Error **errp);
|
||||
|
||||
/*
|
||||
* Optional: Return the desired minimum alignment of the device in guest
|
||||
* physical address space. The final alignment is computed based on this
|
||||
* alignment and the alignment requirements of the memory region.
|
||||
*
|
||||
* Called when plugging the memory device to detect the required alignment
|
||||
* during address assignment.
|
||||
*/
|
||||
uint64_t (*get_min_alignment)(const MemoryDeviceState *md);
|
||||
|
||||
/*
|
||||
* Translate the memory device into #MemoryDeviceInfo.
|
||||
*/
|
||||
|
@ -141,7 +141,7 @@ void vhost_dev_reset_inflight(struct vhost_inflight *inflight);
|
||||
void vhost_dev_free_inflight(struct vhost_inflight *inflight);
|
||||
void vhost_dev_save_inflight(struct vhost_inflight *inflight, QEMUFile *f);
|
||||
int vhost_dev_load_inflight(struct vhost_inflight *inflight, QEMUFile *f);
|
||||
int vhost_dev_prepare_inflight(struct vhost_dev *hdev);
|
||||
int vhost_dev_prepare_inflight(struct vhost_dev *hdev, VirtIODevice *vdev);
|
||||
int vhost_dev_set_inflight(struct vhost_dev *dev,
|
||||
struct vhost_inflight *inflight);
|
||||
int vhost_dev_get_inflight(struct vhost_dev *dev, uint16_t queue_size,
|
||||
|
@ -1049,6 +1049,9 @@ static void show_netdevs(void)
|
||||
#endif
|
||||
#ifdef CONFIG_POSIX
|
||||
"vhost-user",
|
||||
#endif
|
||||
#ifdef CONFIG_VHOST_VDPA
|
||||
"vhost-vdpa",
|
||||
#endif
|
||||
};
|
||||
|
||||
|
@ -145,6 +145,10 @@ static void vhost_vdpa_cleanup(NetClientState *nc)
|
||||
g_free(s->vhost_net);
|
||||
s->vhost_net = NULL;
|
||||
}
|
||||
if (s->vhost_vdpa.device_fd >= 0) {
|
||||
qemu_close(s->vhost_vdpa.device_fd);
|
||||
s->vhost_vdpa.device_fd = -1;
|
||||
}
|
||||
}
|
||||
|
||||
static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc)
|
||||
|
@ -1841,6 +1841,19 @@ static int memory_region_update_iommu_notify_flags(IOMMUMemoryRegion *iommu_mr,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr,
|
||||
uint64_t page_size_mask,
|
||||
Error **errp)
|
||||
{
|
||||
IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr);
|
||||
int ret = 0;
|
||||
|
||||
if (imrc->iommu_set_page_size_mask) {
|
||||
ret = imrc->iommu_set_page_size_mask(iommu_mr, page_size_mask, errp);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int memory_region_register_iommu_notifier(MemoryRegion *mr,
|
||||
IOMMUNotifier *n, Error **errp)
|
||||
{
|
||||
|
@ -66,7 +66,7 @@ if have_block
|
||||
util_ss.add(files('main-loop.c'))
|
||||
util_ss.add(files('nvdimm-utils.c'))
|
||||
util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c'))
|
||||
util_ss.add(when: ['CONFIG_LINUX', 'CONFIG_VHOST_USER'], if_true: [
|
||||
util_ss.add(when: 'CONFIG_LINUX', if_true: [
|
||||
files('vhost-user-server.c'), vhost_user
|
||||
])
|
||||
util_ss.add(files('block-helpers.c'))
|
||||
|
Loading…
Reference in New Issue
Block a user