2010-03-17 14:08:17 +03:00
|
|
|
/*
|
|
|
|
* vhost support
|
|
|
|
*
|
|
|
|
* Copyright Red Hat, Inc. 2010
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Michael S. Tsirkin <mst@redhat.com>
|
|
|
|
*
|
|
|
|
* This work is licensed under the terms of the GNU GPL, version 2. See
|
|
|
|
* the COPYING file in the top-level directory.
|
2012-01-13 20:44:23 +04:00
|
|
|
*
|
|
|
|
* Contributions after 2012-01-13 are licensed under the terms of the
|
|
|
|
* GNU GPL, version 2 or (at your option) any later version.
|
2010-03-17 14:08:17 +03:00
|
|
|
*/
|
|
|
|
|
2016-01-26 21:17:07 +03:00
|
|
|
#include "qemu/osdep.h"
|
include/qemu/osdep.h: Don't include qapi/error.h
Commit 57cb38b included qapi/error.h into qemu/osdep.h to get the
Error typedef. Since then, we've moved to include qemu/osdep.h
everywhere. Its file comment explains: "To avoid getting into
possible circular include dependencies, this file should not include
any other QEMU headers, with the exceptions of config-host.h,
compiler.h, os-posix.h and os-win32.h, all of which are doing a
similar job to this file and are under similar constraints."
qapi/error.h doesn't do a similar job, and it doesn't adhere to
similar constraints: it includes qapi-types.h. That's in excess of
100KiB of crap most .c files don't actually need.
Add the typedef to qemu/typedefs.h, and include that instead of
qapi/error.h. Include qapi/error.h in .c files that need it and don't
get it now. Include qapi-types.h in qom/object.h for uint16List.
Update scripts/clean-includes accordingly. Update it further to match
reality: replace config.h by config-target.h, add sysemu/os-posix.h,
sysemu/os-win32.h. Update the list of includes in the qemu/osdep.h
comment quoted above similarly.
This reduces the number of objects depending on qapi/error.h from "all
of them" to less than a third. Unfortunately, the number depending on
qapi-types.h shrinks only a little. More work is needed for that one.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
[Fix compilation without the spice devel packages. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-14 11:01:28 +03:00
|
|
|
#include "qapi/error.h"
|
2013-02-05 20:06:20 +04:00
|
|
|
#include "hw/virtio/vhost.h"
|
2010-03-17 14:08:17 +03:00
|
|
|
#include "hw/hw.h"
|
2013-05-13 15:29:47 +04:00
|
|
|
#include "qemu/atomic.h"
|
2012-12-17 21:20:00 +04:00
|
|
|
#include "qemu/range.h"
|
2015-06-17 16:23:39 +03:00
|
|
|
#include "qemu/error-report.h"
|
2015-10-09 18:17:25 +03:00
|
|
|
#include "qemu/memfd.h"
|
2010-04-09 00:49:50 +04:00
|
|
|
#include <linux/vhost.h>
|
2012-12-17 21:19:49 +04:00
|
|
|
#include "exec/address-spaces.h"
|
2013-04-24 12:21:21 +04:00
|
|
|
#include "hw/virtio/virtio-bus.h"
|
2015-06-17 16:23:39 +03:00
|
|
|
#include "hw/virtio/virtio-access.h"
|
2017-04-06 13:00:28 +03:00
|
|
|
#include "migration/blocker.h"
|
2017-01-11 07:32:12 +03:00
|
|
|
#include "sysemu/dma.h"
|
2010-03-17 14:08:17 +03:00
|
|
|
|
2016-07-27 00:15:05 +03:00
|
|
|
/* enabled until disconnected backend stabilizes */
|
|
|
|
#define _VHOST_DEBUG 1
|
|
|
|
|
|
|
|
#ifdef _VHOST_DEBUG
|
|
|
|
#define VHOST_OPS_DEBUG(fmt, ...) \
|
|
|
|
do { error_report(fmt ": %s (%d)", ## __VA_ARGS__, \
|
|
|
|
strerror(errno), errno); } while (0)
|
|
|
|
#else
|
|
|
|
#define VHOST_OPS_DEBUG(fmt, ...) \
|
|
|
|
do { } while (0)
|
|
|
|
#endif
|
|
|
|
|
2015-06-04 12:28:46 +03:00
|
|
|
static struct vhost_log *vhost_log;
|
2015-10-09 18:17:25 +03:00
|
|
|
static struct vhost_log *vhost_log_shm;
|
2015-06-04 12:28:46 +03:00
|
|
|
|
2015-10-06 11:37:27 +03:00
|
|
|
static unsigned int used_memslots;
|
|
|
|
static QLIST_HEAD(, vhost_dev) vhost_devices =
|
|
|
|
QLIST_HEAD_INITIALIZER(vhost_devices);
|
|
|
|
|
|
|
|
bool vhost_has_free_slot(void)
|
|
|
|
{
|
|
|
|
unsigned int slots_limit = ~0U;
|
|
|
|
struct vhost_dev *hdev;
|
|
|
|
|
|
|
|
QLIST_FOREACH(hdev, &vhost_devices, entry) {
|
|
|
|
unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
|
|
|
|
slots_limit = MIN(slots_limit, r);
|
|
|
|
}
|
|
|
|
return slots_limit > used_memslots;
|
|
|
|
}
|
|
|
|
|
2010-03-17 14:08:17 +03:00
|
|
|
static void vhost_dev_sync_region(struct vhost_dev *dev,
|
2011-12-19 15:18:13 +04:00
|
|
|
MemoryRegionSection *section,
|
2010-03-17 14:08:17 +03:00
|
|
|
uint64_t mfirst, uint64_t mlast,
|
|
|
|
uint64_t rfirst, uint64_t rlast)
|
|
|
|
{
|
2015-06-04 12:28:46 +03:00
|
|
|
vhost_log_chunk_t *log = dev->log->log;
|
|
|
|
|
2010-03-17 14:08:17 +03:00
|
|
|
uint64_t start = MAX(mfirst, rfirst);
|
|
|
|
uint64_t end = MIN(mlast, rlast);
|
2015-06-04 12:28:46 +03:00
|
|
|
vhost_log_chunk_t *from = log + start / VHOST_LOG_CHUNK;
|
|
|
|
vhost_log_chunk_t *to = log + end / VHOST_LOG_CHUNK + 1;
|
2017-06-22 14:04:16 +03:00
|
|
|
uint64_t addr = QEMU_ALIGN_DOWN(start, VHOST_LOG_CHUNK);
|
2010-03-17 14:08:17 +03:00
|
|
|
|
|
|
|
if (end < start) {
|
|
|
|
return;
|
|
|
|
}
|
2010-08-13 17:54:52 +04:00
|
|
|
assert(end / VHOST_LOG_CHUNK < dev->log_size);
|
2012-04-01 12:39:43 +04:00
|
|
|
assert(start / VHOST_LOG_CHUNK < dev->log_size);
|
2010-08-13 17:54:52 +04:00
|
|
|
|
2010-03-17 14:08:17 +03:00
|
|
|
for (;from < to; ++from) {
|
|
|
|
vhost_log_chunk_t log;
|
|
|
|
/* We first check with non-atomic: much cheaper,
|
|
|
|
* and we expect non-dirty to be the common case. */
|
|
|
|
if (!*from) {
|
2010-11-27 17:05:07 +03:00
|
|
|
addr += VHOST_LOG_CHUNK;
|
2010-03-17 14:08:17 +03:00
|
|
|
continue;
|
|
|
|
}
|
2013-05-13 15:29:47 +04:00
|
|
|
/* Data must be read atomically. We don't really need barrier semantics
|
|
|
|
* but it's easier to use atomic_* than roll our own. */
|
|
|
|
log = atomic_xchg(from, 0);
|
2014-04-29 18:17:29 +04:00
|
|
|
while (log) {
|
|
|
|
int bit = ctzl(log);
|
2013-02-21 15:16:06 +04:00
|
|
|
hwaddr page_addr;
|
|
|
|
hwaddr section_offset;
|
|
|
|
hwaddr mr_offset;
|
|
|
|
page_addr = addr + bit * VHOST_LOG_PAGE;
|
|
|
|
section_offset = page_addr - section->offset_within_address_space;
|
|
|
|
mr_offset = section_offset + section->offset_within_region;
|
|
|
|
memory_region_set_dirty(section->mr, mr_offset, VHOST_LOG_PAGE);
|
2010-03-17 14:08:17 +03:00
|
|
|
log &= ~(0x1ull << bit);
|
|
|
|
}
|
|
|
|
addr += VHOST_LOG_CHUNK;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-12-18 16:06:05 +04:00
|
|
|
static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
|
2011-12-19 15:18:13 +04:00
|
|
|
MemoryRegionSection *section,
|
2013-02-21 15:16:06 +04:00
|
|
|
hwaddr first,
|
|
|
|
hwaddr last)
|
2010-03-17 14:08:17 +03:00
|
|
|
{
|
|
|
|
int i;
|
2013-02-21 15:16:06 +04:00
|
|
|
hwaddr start_addr;
|
|
|
|
hwaddr end_addr;
|
2011-12-18 16:06:05 +04:00
|
|
|
|
2010-03-17 14:08:17 +03:00
|
|
|
if (!dev->log_enabled || !dev->started) {
|
|
|
|
return 0;
|
|
|
|
}
|
2013-02-21 15:16:06 +04:00
|
|
|
start_addr = section->offset_within_address_space;
|
2013-05-27 12:08:27 +04:00
|
|
|
end_addr = range_get_last(start_addr, int128_get64(section->size));
|
2013-02-21 15:16:06 +04:00
|
|
|
start_addr = MAX(first, start_addr);
|
|
|
|
end_addr = MIN(last, end_addr);
|
|
|
|
|
2010-03-17 14:08:17 +03:00
|
|
|
for (i = 0; i < dev->mem->nregions; ++i) {
|
|
|
|
struct vhost_memory_region *reg = dev->mem->regions + i;
|
2011-12-19 15:18:13 +04:00
|
|
|
vhost_dev_sync_region(dev, section, start_addr, end_addr,
|
2010-03-17 14:08:17 +03:00
|
|
|
reg->guest_phys_addr,
|
|
|
|
range_get_last(reg->guest_phys_addr,
|
|
|
|
reg->memory_size));
|
|
|
|
}
|
|
|
|
for (i = 0; i < dev->nvqs; ++i) {
|
|
|
|
struct vhost_virtqueue *vq = dev->vqs + i;
|
2011-12-19 15:18:13 +04:00
|
|
|
vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys,
|
2010-03-17 14:08:17 +03:00
|
|
|
range_get_last(vq->used_phys, vq->used_size));
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-12-18 16:06:05 +04:00
|
|
|
static void vhost_log_sync(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *section)
|
|
|
|
{
|
|
|
|
struct vhost_dev *dev = container_of(listener, struct vhost_dev,
|
|
|
|
memory_listener);
|
2013-02-21 15:16:06 +04:00
|
|
|
vhost_sync_dirty_bitmap(dev, section, 0x0, ~0x0ULL);
|
|
|
|
}
|
2011-12-18 16:06:05 +04:00
|
|
|
|
2013-02-21 15:16:06 +04:00
|
|
|
static void vhost_log_sync_range(struct vhost_dev *dev,
|
|
|
|
hwaddr first, hwaddr last)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
/* FIXME: this is N^2 in number of sections */
|
|
|
|
for (i = 0; i < dev->n_mem_sections; ++i) {
|
|
|
|
MemoryRegionSection *section = &dev->mem_sections[i];
|
|
|
|
vhost_sync_dirty_bitmap(dev, section, first, last);
|
|
|
|
}
|
2011-12-18 16:06:05 +04:00
|
|
|
}
|
|
|
|
|
2010-03-17 14:08:17 +03:00
|
|
|
/* Assign/unassign. Keep an unsorted array of non-overlapping
|
|
|
|
* memory regions in dev->mem. */
|
|
|
|
static void vhost_dev_unassign_memory(struct vhost_dev *dev,
|
|
|
|
uint64_t start_addr,
|
|
|
|
uint64_t size)
|
|
|
|
{
|
|
|
|
int from, to, n = dev->mem->nregions;
|
|
|
|
/* Track overlapping/split regions for sanity checking. */
|
|
|
|
int overlap_start = 0, overlap_end = 0, overlap_middle = 0, split = 0;
|
|
|
|
|
|
|
|
for (from = 0, to = 0; from < n; ++from, ++to) {
|
|
|
|
struct vhost_memory_region *reg = dev->mem->regions + to;
|
|
|
|
uint64_t reglast;
|
|
|
|
uint64_t memlast;
|
|
|
|
uint64_t change;
|
|
|
|
|
|
|
|
/* clone old region */
|
|
|
|
if (to != from) {
|
|
|
|
memcpy(reg, dev->mem->regions + from, sizeof *reg);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* No overlap is simple */
|
|
|
|
if (!ranges_overlap(reg->guest_phys_addr, reg->memory_size,
|
|
|
|
start_addr, size)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Split only happens if supplied region
|
|
|
|
* is in the middle of an existing one. Thus it can not
|
|
|
|
* overlap with any other existing region. */
|
|
|
|
assert(!split);
|
|
|
|
|
|
|
|
reglast = range_get_last(reg->guest_phys_addr, reg->memory_size);
|
|
|
|
memlast = range_get_last(start_addr, size);
|
|
|
|
|
|
|
|
/* Remove whole region */
|
|
|
|
if (start_addr <= reg->guest_phys_addr && memlast >= reglast) {
|
|
|
|
--dev->mem->nregions;
|
|
|
|
--to;
|
|
|
|
++overlap_middle;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Shrink region */
|
|
|
|
if (memlast >= reglast) {
|
|
|
|
reg->memory_size = start_addr - reg->guest_phys_addr;
|
|
|
|
assert(reg->memory_size);
|
|
|
|
assert(!overlap_end);
|
|
|
|
++overlap_end;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Shift region */
|
|
|
|
if (start_addr <= reg->guest_phys_addr) {
|
|
|
|
change = memlast + 1 - reg->guest_phys_addr;
|
|
|
|
reg->memory_size -= change;
|
|
|
|
reg->guest_phys_addr += change;
|
|
|
|
reg->userspace_addr += change;
|
|
|
|
assert(reg->memory_size);
|
|
|
|
assert(!overlap_start);
|
|
|
|
++overlap_start;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* This only happens if supplied region
|
|
|
|
* is in the middle of an existing one. Thus it can not
|
|
|
|
* overlap with any other existing region. */
|
|
|
|
assert(!overlap_start);
|
|
|
|
assert(!overlap_end);
|
|
|
|
assert(!overlap_middle);
|
|
|
|
/* Split region: shrink first part, shift second part. */
|
|
|
|
memcpy(dev->mem->regions + n, reg, sizeof *reg);
|
|
|
|
reg->memory_size = start_addr - reg->guest_phys_addr;
|
|
|
|
assert(reg->memory_size);
|
|
|
|
change = memlast + 1 - reg->guest_phys_addr;
|
|
|
|
reg = dev->mem->regions + n;
|
|
|
|
reg->memory_size -= change;
|
|
|
|
assert(reg->memory_size);
|
|
|
|
reg->guest_phys_addr += change;
|
|
|
|
reg->userspace_addr += change;
|
|
|
|
/* Never add more than 1 region */
|
|
|
|
assert(dev->mem->nregions == n);
|
|
|
|
++dev->mem->nregions;
|
|
|
|
++split;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Called after unassign, so no regions overlap the given range. */
|
|
|
|
static void vhost_dev_assign_memory(struct vhost_dev *dev,
|
|
|
|
uint64_t start_addr,
|
|
|
|
uint64_t size,
|
|
|
|
uint64_t uaddr)
|
|
|
|
{
|
|
|
|
int from, to;
|
|
|
|
struct vhost_memory_region *merged = NULL;
|
|
|
|
for (from = 0, to = 0; from < dev->mem->nregions; ++from, ++to) {
|
|
|
|
struct vhost_memory_region *reg = dev->mem->regions + to;
|
|
|
|
uint64_t prlast, urlast;
|
|
|
|
uint64_t pmlast, umlast;
|
|
|
|
uint64_t s, e, u;
|
|
|
|
|
|
|
|
/* clone old region */
|
|
|
|
if (to != from) {
|
|
|
|
memcpy(reg, dev->mem->regions + from, sizeof *reg);
|
|
|
|
}
|
|
|
|
prlast = range_get_last(reg->guest_phys_addr, reg->memory_size);
|
|
|
|
pmlast = range_get_last(start_addr, size);
|
|
|
|
urlast = range_get_last(reg->userspace_addr, reg->memory_size);
|
|
|
|
umlast = range_get_last(uaddr, size);
|
|
|
|
|
|
|
|
/* check for overlapping regions: should never happen. */
|
|
|
|
assert(prlast < start_addr || pmlast < reg->guest_phys_addr);
|
|
|
|
/* Not an adjacent or overlapping region - do not merge. */
|
|
|
|
if ((prlast + 1 != start_addr || urlast + 1 != uaddr) &&
|
|
|
|
(pmlast + 1 != reg->guest_phys_addr ||
|
|
|
|
umlast + 1 != reg->userspace_addr)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2016-02-21 18:01:47 +03:00
|
|
|
if (dev->vhost_ops->vhost_backend_can_merge &&
|
|
|
|
!dev->vhost_ops->vhost_backend_can_merge(dev, uaddr, size,
|
|
|
|
reg->userspace_addr,
|
|
|
|
reg->memory_size)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2010-03-17 14:08:17 +03:00
|
|
|
if (merged) {
|
|
|
|
--to;
|
|
|
|
assert(to >= 0);
|
|
|
|
} else {
|
|
|
|
merged = reg;
|
|
|
|
}
|
|
|
|
u = MIN(uaddr, reg->userspace_addr);
|
|
|
|
s = MIN(start_addr, reg->guest_phys_addr);
|
|
|
|
e = MAX(pmlast, prlast);
|
|
|
|
uaddr = merged->userspace_addr = u;
|
|
|
|
start_addr = merged->guest_phys_addr = s;
|
|
|
|
size = merged->memory_size = e - s + 1;
|
|
|
|
assert(merged->memory_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!merged) {
|
|
|
|
struct vhost_memory_region *reg = dev->mem->regions + to;
|
|
|
|
memset(reg, 0, sizeof *reg);
|
|
|
|
reg->memory_size = size;
|
|
|
|
assert(reg->memory_size);
|
|
|
|
reg->guest_phys_addr = start_addr;
|
|
|
|
reg->userspace_addr = uaddr;
|
|
|
|
++to;
|
|
|
|
}
|
|
|
|
assert(to <= dev->mem->nregions + 1);
|
|
|
|
dev->mem->nregions = to;
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint64_t vhost_get_log_size(struct vhost_dev *dev)
|
|
|
|
{
|
|
|
|
uint64_t log_size = 0;
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < dev->mem->nregions; ++i) {
|
|
|
|
struct vhost_memory_region *reg = dev->mem->regions + i;
|
|
|
|
uint64_t last = range_get_last(reg->guest_phys_addr,
|
|
|
|
reg->memory_size);
|
|
|
|
log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1);
|
|
|
|
}
|
|
|
|
for (i = 0; i < dev->nvqs; ++i) {
|
|
|
|
struct vhost_virtqueue *vq = dev->vqs + i;
|
|
|
|
uint64_t last = vq->used_phys + vq->used_size - 1;
|
|
|
|
log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1);
|
|
|
|
}
|
|
|
|
return log_size;
|
|
|
|
}
|
2015-10-09 18:17:25 +03:00
|
|
|
|
|
|
|
static struct vhost_log *vhost_log_alloc(uint64_t size, bool share)
|
2015-06-04 12:28:46 +03:00
|
|
|
{
|
2015-10-09 18:17:25 +03:00
|
|
|
struct vhost_log *log;
|
|
|
|
uint64_t logsize = size * sizeof(*(log->log));
|
|
|
|
int fd = -1;
|
|
|
|
|
|
|
|
log = g_new0(struct vhost_log, 1);
|
|
|
|
if (share) {
|
|
|
|
log->log = qemu_memfd_alloc("vhost-log", logsize,
|
|
|
|
F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
|
|
|
|
&fd);
|
|
|
|
memset(log->log, 0, logsize);
|
|
|
|
} else {
|
|
|
|
log->log = g_malloc0(logsize);
|
|
|
|
}
|
2015-06-04 12:28:46 +03:00
|
|
|
|
|
|
|
log->size = size;
|
|
|
|
log->refcnt = 1;
|
2015-10-09 18:17:25 +03:00
|
|
|
log->fd = fd;
|
2015-06-04 12:28:46 +03:00
|
|
|
|
|
|
|
return log;
|
|
|
|
}
|
|
|
|
|
2015-10-09 18:17:25 +03:00
|
|
|
static struct vhost_log *vhost_log_get(uint64_t size, bool share)
|
2015-06-04 12:28:46 +03:00
|
|
|
{
|
2015-10-09 18:17:25 +03:00
|
|
|
struct vhost_log *log = share ? vhost_log_shm : vhost_log;
|
|
|
|
|
|
|
|
if (!log || log->size != size) {
|
|
|
|
log = vhost_log_alloc(size, share);
|
|
|
|
if (share) {
|
|
|
|
vhost_log_shm = log;
|
|
|
|
} else {
|
|
|
|
vhost_log = log;
|
|
|
|
}
|
2015-06-04 12:28:46 +03:00
|
|
|
} else {
|
2015-10-09 18:17:25 +03:00
|
|
|
++log->refcnt;
|
2015-06-04 12:28:46 +03:00
|
|
|
}
|
|
|
|
|
2015-10-09 18:17:25 +03:00
|
|
|
return log;
|
2015-06-04 12:28:46 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static void vhost_log_put(struct vhost_dev *dev, bool sync)
|
|
|
|
{
|
|
|
|
struct vhost_log *log = dev->log;
|
|
|
|
|
|
|
|
if (!log) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
--log->refcnt;
|
|
|
|
if (log->refcnt == 0) {
|
|
|
|
/* Sync only the range covered by the old log */
|
|
|
|
if (dev->log_size && sync) {
|
|
|
|
vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1);
|
|
|
|
}
|
2015-10-09 18:17:25 +03:00
|
|
|
|
2015-06-04 12:28:46 +03:00
|
|
|
if (vhost_log == log) {
|
2015-10-09 18:17:25 +03:00
|
|
|
g_free(log->log);
|
2015-06-04 12:28:46 +03:00
|
|
|
vhost_log = NULL;
|
2015-10-09 18:17:25 +03:00
|
|
|
} else if (vhost_log_shm == log) {
|
|
|
|
qemu_memfd_free(log->log, log->size * sizeof(*(log->log)),
|
|
|
|
log->fd);
|
|
|
|
vhost_log_shm = NULL;
|
2015-06-04 12:28:46 +03:00
|
|
|
}
|
2015-10-09 18:17:25 +03:00
|
|
|
|
2015-06-04 12:28:46 +03:00
|
|
|
g_free(log);
|
|
|
|
}
|
2017-09-20 21:53:06 +03:00
|
|
|
|
|
|
|
dev->log = NULL;
|
|
|
|
dev->log_size = 0;
|
2015-06-04 12:28:46 +03:00
|
|
|
}
|
2010-03-17 14:08:17 +03:00
|
|
|
|
2015-10-09 18:17:25 +03:00
|
|
|
static bool vhost_dev_log_is_shared(struct vhost_dev *dev)
|
|
|
|
{
|
|
|
|
return dev->vhost_ops->vhost_requires_shm_log &&
|
|
|
|
dev->vhost_ops->vhost_requires_shm_log(dev);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size)
|
2010-03-17 14:08:17 +03:00
|
|
|
{
|
2015-10-09 18:17:25 +03:00
|
|
|
struct vhost_log *log = vhost_log_get(size, vhost_dev_log_is_shared(dev));
|
2015-06-04 12:28:46 +03:00
|
|
|
uint64_t log_base = (uintptr_t)log->log;
|
2013-02-21 15:16:06 +04:00
|
|
|
int r;
|
2013-01-22 14:07:56 +04:00
|
|
|
|
2015-10-09 18:17:22 +03:00
|
|
|
/* inform backend of log switching, this must be done before
|
|
|
|
releasing the current log, to ensure no logging is lost */
|
2015-10-09 18:17:26 +03:00
|
|
|
r = dev->vhost_ops->vhost_set_log_base(dev, log_base, log);
|
2016-07-27 00:15:05 +03:00
|
|
|
if (r < 0) {
|
|
|
|
VHOST_OPS_DEBUG("vhost_set_log_base failed");
|
|
|
|
}
|
|
|
|
|
2015-06-04 12:28:46 +03:00
|
|
|
vhost_log_put(dev, true);
|
2010-03-17 14:08:17 +03:00
|
|
|
dev->log = log;
|
|
|
|
dev->log_size = size;
|
|
|
|
}
|
|
|
|
|
2017-01-11 07:32:12 +03:00
|
|
|
static int vhost_dev_has_iommu(struct vhost_dev *dev)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev = dev->vdev;
|
|
|
|
|
2017-03-29 07:10:04 +03:00
|
|
|
return virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
|
2017-01-11 07:32:12 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr,
|
|
|
|
hwaddr *plen, int is_write)
|
|
|
|
{
|
|
|
|
if (!vhost_dev_has_iommu(dev)) {
|
|
|
|
return cpu_physical_memory_map(addr, plen, is_write);
|
|
|
|
} else {
|
|
|
|
return (void *)(uintptr_t)addr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vhost_memory_unmap(struct vhost_dev *dev, void *buffer,
|
|
|
|
hwaddr len, int is_write,
|
|
|
|
hwaddr access_len)
|
|
|
|
{
|
|
|
|
if (!vhost_dev_has_iommu(dev)) {
|
|
|
|
cpu_physical_memory_unmap(buffer, len, is_write, access_len);
|
|
|
|
}
|
|
|
|
}
|
2016-11-04 11:39:15 +03:00
|
|
|
|
2017-01-11 07:32:12 +03:00
|
|
|
static int vhost_verify_ring_part_mapping(struct vhost_dev *dev,
|
|
|
|
void *part,
|
2016-11-04 11:39:15 +03:00
|
|
|
uint64_t part_addr,
|
|
|
|
uint64_t part_size,
|
|
|
|
uint64_t start_addr,
|
|
|
|
uint64_t size)
|
|
|
|
{
|
|
|
|
hwaddr l;
|
|
|
|
void *p;
|
|
|
|
int r = 0;
|
|
|
|
|
|
|
|
if (!ranges_overlap(start_addr, size, part_addr, part_size)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
l = part_size;
|
2017-01-11 07:32:12 +03:00
|
|
|
p = vhost_memory_map(dev, part_addr, &l, 1);
|
2016-11-04 11:39:15 +03:00
|
|
|
if (!p || l != part_size) {
|
|
|
|
r = -ENOMEM;
|
|
|
|
}
|
|
|
|
if (p != part) {
|
|
|
|
r = -EBUSY;
|
|
|
|
}
|
2017-01-11 07:32:12 +03:00
|
|
|
vhost_memory_unmap(dev, p, l, 0, 0);
|
2016-11-04 11:39:15 +03:00
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2010-03-17 14:08:17 +03:00
|
|
|
static int vhost_verify_ring_mappings(struct vhost_dev *dev,
|
|
|
|
uint64_t start_addr,
|
|
|
|
uint64_t size)
|
|
|
|
{
|
2016-11-04 11:39:15 +03:00
|
|
|
int i, j;
|
2014-06-18 19:55:22 +04:00
|
|
|
int r = 0;
|
2016-11-04 11:39:15 +03:00
|
|
|
const char *part_name[] = {
|
|
|
|
"descriptor table",
|
|
|
|
"available ring",
|
|
|
|
"used ring"
|
|
|
|
};
|
2014-06-18 19:55:22 +04:00
|
|
|
|
2016-11-04 11:39:15 +03:00
|
|
|
for (i = 0; i < dev->nvqs; ++i) {
|
2010-03-17 14:08:17 +03:00
|
|
|
struct vhost_virtqueue *vq = dev->vqs + i;
|
|
|
|
|
2016-11-04 11:39:15 +03:00
|
|
|
j = 0;
|
2017-01-11 07:32:12 +03:00
|
|
|
r = vhost_verify_ring_part_mapping(dev, vq->desc, vq->desc_phys,
|
2016-11-04 11:39:15 +03:00
|
|
|
vq->desc_size, start_addr, size);
|
2017-12-01 00:39:59 +03:00
|
|
|
if (r) {
|
2016-11-04 11:39:15 +03:00
|
|
|
break;
|
2010-03-17 14:08:17 +03:00
|
|
|
}
|
2016-11-04 11:39:15 +03:00
|
|
|
|
|
|
|
j++;
|
2017-01-11 07:32:12 +03:00
|
|
|
r = vhost_verify_ring_part_mapping(dev, vq->avail, vq->avail_phys,
|
2016-11-04 11:39:15 +03:00
|
|
|
vq->avail_size, start_addr, size);
|
2017-12-01 00:39:59 +03:00
|
|
|
if (r) {
|
2016-11-04 11:39:15 +03:00
|
|
|
break;
|
2010-03-17 14:08:17 +03:00
|
|
|
}
|
2016-11-04 11:39:15 +03:00
|
|
|
|
|
|
|
j++;
|
2017-01-11 07:32:12 +03:00
|
|
|
r = vhost_verify_ring_part_mapping(dev, vq->used, vq->used_phys,
|
2016-11-04 11:39:15 +03:00
|
|
|
vq->used_size, start_addr, size);
|
2017-12-01 00:39:59 +03:00
|
|
|
if (r) {
|
2016-11-04 11:39:15 +03:00
|
|
|
break;
|
2010-03-17 14:08:17 +03:00
|
|
|
}
|
2016-11-04 11:39:15 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (r == -ENOMEM) {
|
|
|
|
error_report("Unable to map %s for ring %d", part_name[j], i);
|
|
|
|
} else if (r == -EBUSY) {
|
|
|
|
error_report("%s relocated for ring %d", part_name[j], i);
|
2010-03-17 14:08:17 +03:00
|
|
|
}
|
2014-06-18 19:55:22 +04:00
|
|
|
return r;
|
2010-03-17 14:08:17 +03:00
|
|
|
}
|
|
|
|
|
2011-03-31 17:45:51 +04:00
|
|
|
static struct vhost_memory_region *vhost_dev_find_reg(struct vhost_dev *dev,
|
|
|
|
uint64_t start_addr,
|
|
|
|
uint64_t size)
|
|
|
|
{
|
|
|
|
int i, n = dev->mem->nregions;
|
|
|
|
for (i = 0; i < n; ++i) {
|
|
|
|
struct vhost_memory_region *reg = dev->mem->regions + i;
|
|
|
|
if (ranges_overlap(reg->guest_phys_addr, reg->memory_size,
|
|
|
|
start_addr, size)) {
|
|
|
|
return reg;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool vhost_dev_cmp_memory(struct vhost_dev *dev,
|
|
|
|
uint64_t start_addr,
|
|
|
|
uint64_t size,
|
|
|
|
uint64_t uaddr)
|
|
|
|
{
|
|
|
|
struct vhost_memory_region *reg = vhost_dev_find_reg(dev, start_addr, size);
|
|
|
|
uint64_t reglast;
|
|
|
|
uint64_t memlast;
|
|
|
|
|
|
|
|
if (!reg) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
reglast = range_get_last(reg->guest_phys_addr, reg->memory_size);
|
|
|
|
memlast = range_get_last(start_addr, size);
|
|
|
|
|
|
|
|
/* Need to extend region? */
|
|
|
|
if (start_addr < reg->guest_phys_addr || memlast > reglast) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
/* userspace_addr changed? */
|
|
|
|
return uaddr != reg->userspace_addr + start_addr - reg->guest_phys_addr;
|
|
|
|
}
|
|
|
|
|
2011-12-18 16:06:05 +04:00
|
|
|
static void vhost_set_memory(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *section,
|
|
|
|
bool add)
|
2010-03-17 14:08:17 +03:00
|
|
|
{
|
2011-12-18 16:06:05 +04:00
|
|
|
struct vhost_dev *dev = container_of(listener, struct vhost_dev,
|
|
|
|
memory_listener);
|
2012-10-23 14:30:10 +04:00
|
|
|
hwaddr start_addr = section->offset_within_address_space;
|
2013-05-27 12:08:27 +04:00
|
|
|
ram_addr_t size = int128_get64(section->size);
|
2015-03-23 12:50:57 +03:00
|
|
|
bool log_dirty =
|
|
|
|
memory_region_get_dirty_log_mask(section->mr) & ~(1 << DIRTY_MEMORY_MIGRATION);
|
2010-03-17 14:08:17 +03:00
|
|
|
int s = offsetof(struct vhost_memory, regions) +
|
|
|
|
(dev->mem->nregions + 1) * sizeof dev->mem->regions[0];
|
2011-12-18 16:06:05 +04:00
|
|
|
void *ram;
|
|
|
|
|
2011-08-21 07:09:37 +04:00
|
|
|
dev->mem = g_realloc(dev->mem, s);
|
2010-03-17 14:08:17 +03:00
|
|
|
|
2011-04-06 23:30:24 +04:00
|
|
|
if (log_dirty) {
|
2011-12-18 16:06:05 +04:00
|
|
|
add = false;
|
2011-04-06 23:30:24 +04:00
|
|
|
}
|
|
|
|
|
2010-03-17 14:08:17 +03:00
|
|
|
assert(size);
|
|
|
|
|
2011-03-31 17:45:51 +04:00
|
|
|
/* Optimize no-change case. At least cirrus_vga does this a lot at this time. */
|
2012-01-09 15:09:40 +04:00
|
|
|
ram = memory_region_get_ram_ptr(section->mr) + section->offset_within_region;
|
2011-12-18 16:06:05 +04:00
|
|
|
if (add) {
|
|
|
|
if (!vhost_dev_cmp_memory(dev, start_addr, size, (uintptr_t)ram)) {
|
2011-03-31 17:45:51 +04:00
|
|
|
/* Region exists with same address. Nothing to do. */
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (!vhost_dev_find_reg(dev, start_addr, size)) {
|
|
|
|
/* Removing region that we don't access. Nothing to do. */
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-03-17 14:08:17 +03:00
|
|
|
vhost_dev_unassign_memory(dev, start_addr, size);
|
2011-12-18 16:06:05 +04:00
|
|
|
if (add) {
|
2010-03-17 14:08:17 +03:00
|
|
|
/* Add given mapping, merging adjacent regions if any */
|
2011-12-18 16:06:05 +04:00
|
|
|
vhost_dev_assign_memory(dev, start_addr, size, (uintptr_t)ram);
|
2010-03-17 14:08:17 +03:00
|
|
|
} else {
|
|
|
|
/* Remove old mapping for this memory, if any. */
|
|
|
|
vhost_dev_unassign_memory(dev, start_addr, size);
|
|
|
|
}
|
2013-04-03 13:15:11 +04:00
|
|
|
dev->mem_changed_start_addr = MIN(dev->mem_changed_start_addr, start_addr);
|
|
|
|
dev->mem_changed_end_addr = MAX(dev->mem_changed_end_addr, start_addr + size - 1);
|
|
|
|
dev->memory_changed = true;
|
2015-10-06 11:37:27 +03:00
|
|
|
used_memslots = dev->mem->nregions;
|
2013-04-03 13:15:11 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static bool vhost_section(MemoryRegionSection *section)
|
|
|
|
{
|
2017-01-24 20:03:40 +03:00
|
|
|
return memory_region_is_ram(section->mr) &&
|
|
|
|
!memory_region_is_rom(section->mr);
|
2013-04-03 13:15:11 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static void vhost_begin(MemoryListener *listener)
|
|
|
|
{
|
|
|
|
struct vhost_dev *dev = container_of(listener, struct vhost_dev,
|
|
|
|
memory_listener);
|
|
|
|
dev->mem_changed_end_addr = 0;
|
|
|
|
dev->mem_changed_start_addr = -1;
|
|
|
|
}
|
2010-03-17 14:08:17 +03:00
|
|
|
|
2013-04-03 13:15:11 +04:00
|
|
|
static void vhost_commit(MemoryListener *listener)
|
|
|
|
{
|
|
|
|
struct vhost_dev *dev = container_of(listener, struct vhost_dev,
|
|
|
|
memory_listener);
|
|
|
|
hwaddr start_addr = 0;
|
|
|
|
ram_addr_t size = 0;
|
|
|
|
uint64_t log_size;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
if (!dev->memory_changed) {
|
|
|
|
return;
|
|
|
|
}
|
2010-03-17 14:08:17 +03:00
|
|
|
if (!dev->started) {
|
|
|
|
return;
|
|
|
|
}
|
2013-04-03 13:15:11 +04:00
|
|
|
if (dev->mem_changed_start_addr > dev->mem_changed_end_addr) {
|
|
|
|
return;
|
|
|
|
}
|
2010-03-17 14:08:17 +03:00
|
|
|
|
|
|
|
if (dev->started) {
|
2013-04-03 13:15:11 +04:00
|
|
|
start_addr = dev->mem_changed_start_addr;
|
|
|
|
size = dev->mem_changed_end_addr - dev->mem_changed_start_addr + 1;
|
|
|
|
|
2010-03-17 14:08:17 +03:00
|
|
|
r = vhost_verify_ring_mappings(dev, start_addr, size);
|
|
|
|
assert(r >= 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!dev->log_enabled) {
|
2015-10-09 18:17:28 +03:00
|
|
|
r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem);
|
2016-07-27 00:15:05 +03:00
|
|
|
if (r < 0) {
|
|
|
|
VHOST_OPS_DEBUG("vhost_set_mem_table failed");
|
|
|
|
}
|
2013-04-03 13:15:11 +04:00
|
|
|
dev->memory_changed = false;
|
2010-03-17 14:08:17 +03:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
log_size = vhost_get_log_size(dev);
|
|
|
|
/* We allocate an extra 4K bytes to log,
|
|
|
|
* to reduce the * number of reallocations. */
|
|
|
|
#define VHOST_LOG_BUFFER (0x1000 / sizeof *dev->log)
|
|
|
|
/* To log more, must increase log size before table update. */
|
|
|
|
if (dev->log_size < log_size) {
|
|
|
|
vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER);
|
|
|
|
}
|
2015-10-09 18:17:28 +03:00
|
|
|
r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem);
|
2016-07-27 00:15:05 +03:00
|
|
|
if (r < 0) {
|
|
|
|
VHOST_OPS_DEBUG("vhost_set_mem_table failed");
|
|
|
|
}
|
2010-03-17 14:08:17 +03:00
|
|
|
/* To log less, can only decrease log size after table update. */
|
|
|
|
if (dev->log_size > log_size + VHOST_LOG_BUFFER) {
|
|
|
|
vhost_dev_log_resize(dev, log_size);
|
|
|
|
}
|
2013-04-03 13:15:11 +04:00
|
|
|
dev->memory_changed = false;
|
2012-02-08 23:36:02 +04:00
|
|
|
}
|
|
|
|
|
2011-12-18 16:06:05 +04:00
|
|
|
static void vhost_region_add(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *section)
|
|
|
|
{
|
2011-12-19 15:18:13 +04:00
|
|
|
struct vhost_dev *dev = container_of(listener, struct vhost_dev,
|
|
|
|
memory_listener);
|
|
|
|
|
2012-01-09 16:01:39 +04:00
|
|
|
if (!vhost_section(section)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2011-12-19 15:18:13 +04:00
|
|
|
++dev->n_mem_sections;
|
|
|
|
dev->mem_sections = g_renew(MemoryRegionSection, dev->mem_sections,
|
|
|
|
dev->n_mem_sections);
|
|
|
|
dev->mem_sections[dev->n_mem_sections - 1] = *section;
|
2013-05-06 12:46:11 +04:00
|
|
|
memory_region_ref(section->mr);
|
2011-12-18 16:06:05 +04:00
|
|
|
vhost_set_memory(listener, section, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vhost_region_del(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *section)
|
|
|
|
{
|
2011-12-19 15:18:13 +04:00
|
|
|
struct vhost_dev *dev = container_of(listener, struct vhost_dev,
|
|
|
|
memory_listener);
|
|
|
|
int i;
|
|
|
|
|
2012-01-09 16:01:39 +04:00
|
|
|
if (!vhost_section(section)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2011-12-18 16:06:05 +04:00
|
|
|
vhost_set_memory(listener, section, false);
|
2013-05-06 12:46:11 +04:00
|
|
|
memory_region_unref(section->mr);
|
2011-12-19 15:18:13 +04:00
|
|
|
for (i = 0; i < dev->n_mem_sections; ++i) {
|
|
|
|
if (dev->mem_sections[i].offset_within_address_space
|
|
|
|
== section->offset_within_address_space) {
|
|
|
|
--dev->n_mem_sections;
|
|
|
|
memmove(&dev->mem_sections[i], &dev->mem_sections[i+1],
|
2012-01-09 15:59:50 +04:00
|
|
|
(dev->n_mem_sections - i) * sizeof(*dev->mem_sections));
|
2011-12-19 15:18:13 +04:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2011-12-18 16:06:05 +04:00
|
|
|
}
|
|
|
|
|
2017-03-29 07:10:04 +03:00
|
|
|
static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
|
|
|
|
{
|
|
|
|
struct vhost_iommu *iommu = container_of(n, struct vhost_iommu, n);
|
|
|
|
struct vhost_dev *hdev = iommu->hdev;
|
|
|
|
hwaddr iova = iotlb->iova + iommu->iommu_offset;
|
|
|
|
|
2017-06-02 13:18:28 +03:00
|
|
|
if (vhost_backend_invalidate_device_iotlb(hdev, iova,
|
|
|
|
iotlb->addr_mask + 1)) {
|
2017-03-29 07:10:04 +03:00
|
|
|
error_report("Fail to invalidate device iotlb");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vhost_iommu_region_add(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *section)
|
|
|
|
{
|
|
|
|
struct vhost_dev *dev = container_of(listener, struct vhost_dev,
|
|
|
|
iommu_listener);
|
|
|
|
struct vhost_iommu *iommu;
|
memory: add section range info for IOMMU notifier
In this patch, IOMMUNotifier.{start|end} are introduced to store section
information for a specific notifier. When notification occurs, we not
only check the notification type (MAP|UNMAP), but also check whether the
notified iova range overlaps with the range of specific IOMMU notifier,
and skip those notifiers if not in the listened range.
When removing an region, we need to make sure we removed the correct
VFIOGuestIOMMU by checking the IOMMUNotifier.start address as well.
This patch is solving the problem that vfio-pci devices receive
duplicated UNMAP notification on x86 platform when vIOMMU is there. The
issue is that x86 IOMMU has a (0, 2^64-1) IOMMU region, which is
splitted by the (0xfee00000, 0xfeefffff) IRQ region. AFAIK
this (splitted IOMMU region) is only happening on x86.
This patch also helps vhost to leverage the new interface as well, so
that vhost won't get duplicated cache flushes. In that sense, it's an
slight performance improvement.
Suggested-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1491562755-23867-2-git-send-email-peterx@redhat.com>
[ehabkost: included extra vhost_iommu_region_del() change from Peter Xu]
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-04-07 13:59:07 +03:00
|
|
|
Int128 end;
|
2017-03-29 07:10:04 +03:00
|
|
|
|
|
|
|
if (!memory_region_is_iommu(section->mr)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
iommu = g_malloc0(sizeof(*iommu));
|
memory: add section range info for IOMMU notifier
In this patch, IOMMUNotifier.{start|end} are introduced to store section
information for a specific notifier. When notification occurs, we not
only check the notification type (MAP|UNMAP), but also check whether the
notified iova range overlaps with the range of specific IOMMU notifier,
and skip those notifiers if not in the listened range.
When removing an region, we need to make sure we removed the correct
VFIOGuestIOMMU by checking the IOMMUNotifier.start address as well.
This patch is solving the problem that vfio-pci devices receive
duplicated UNMAP notification on x86 platform when vIOMMU is there. The
issue is that x86 IOMMU has a (0, 2^64-1) IOMMU region, which is
splitted by the (0xfee00000, 0xfeefffff) IRQ region. AFAIK
this (splitted IOMMU region) is only happening on x86.
This patch also helps vhost to leverage the new interface as well, so
that vhost won't get duplicated cache flushes. In that sense, it's an
slight performance improvement.
Suggested-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1491562755-23867-2-git-send-email-peterx@redhat.com>
[ehabkost: included extra vhost_iommu_region_del() change from Peter Xu]
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-04-07 13:59:07 +03:00
|
|
|
end = int128_add(int128_make64(section->offset_within_region),
|
|
|
|
section->size);
|
|
|
|
end = int128_sub(end, int128_one());
|
|
|
|
iommu_notifier_init(&iommu->n, vhost_iommu_unmap_notify,
|
|
|
|
IOMMU_NOTIFIER_UNMAP,
|
|
|
|
section->offset_within_region,
|
|
|
|
int128_get64(end));
|
2017-03-29 07:10:04 +03:00
|
|
|
iommu->mr = section->mr;
|
|
|
|
iommu->iommu_offset = section->offset_within_address_space -
|
|
|
|
section->offset_within_region;
|
|
|
|
iommu->hdev = dev;
|
|
|
|
memory_region_register_iommu_notifier(section->mr, &iommu->n);
|
|
|
|
QLIST_INSERT_HEAD(&dev->iommu_list, iommu, iommu_next);
|
|
|
|
/* TODO: can replay help performance here? */
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vhost_iommu_region_del(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *section)
|
|
|
|
{
|
|
|
|
struct vhost_dev *dev = container_of(listener, struct vhost_dev,
|
|
|
|
iommu_listener);
|
|
|
|
struct vhost_iommu *iommu;
|
|
|
|
|
|
|
|
if (!memory_region_is_iommu(section->mr)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
QLIST_FOREACH(iommu, &dev->iommu_list, iommu_next) {
|
memory: add section range info for IOMMU notifier
In this patch, IOMMUNotifier.{start|end} are introduced to store section
information for a specific notifier. When notification occurs, we not
only check the notification type (MAP|UNMAP), but also check whether the
notified iova range overlaps with the range of specific IOMMU notifier,
and skip those notifiers if not in the listened range.
When removing an region, we need to make sure we removed the correct
VFIOGuestIOMMU by checking the IOMMUNotifier.start address as well.
This patch is solving the problem that vfio-pci devices receive
duplicated UNMAP notification on x86 platform when vIOMMU is there. The
issue is that x86 IOMMU has a (0, 2^64-1) IOMMU region, which is
splitted by the (0xfee00000, 0xfeefffff) IRQ region. AFAIK
this (splitted IOMMU region) is only happening on x86.
This patch also helps vhost to leverage the new interface as well, so
that vhost won't get duplicated cache flushes. In that sense, it's an
slight performance improvement.
Suggested-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <1491562755-23867-2-git-send-email-peterx@redhat.com>
[ehabkost: included extra vhost_iommu_region_del() change from Peter Xu]
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-04-07 13:59:07 +03:00
|
|
|
if (iommu->mr == section->mr &&
|
|
|
|
iommu->n.start == section->offset_within_region) {
|
2017-03-29 07:10:04 +03:00
|
|
|
memory_region_unregister_iommu_notifier(iommu->mr,
|
|
|
|
&iommu->n);
|
|
|
|
QLIST_REMOVE(iommu, iommu_next);
|
|
|
|
g_free(iommu);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-02-08 23:36:02 +04:00
|
|
|
static void vhost_region_nop(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *section)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2010-03-17 14:08:17 +03:00
|
|
|
static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
|
|
|
|
struct vhost_virtqueue *vq,
|
|
|
|
unsigned idx, bool enable_log)
|
|
|
|
{
|
|
|
|
struct vhost_vring_addr addr = {
|
|
|
|
.index = idx,
|
2010-04-02 01:59:51 +04:00
|
|
|
.desc_user_addr = (uint64_t)(unsigned long)vq->desc,
|
|
|
|
.avail_user_addr = (uint64_t)(unsigned long)vq->avail,
|
|
|
|
.used_user_addr = (uint64_t)(unsigned long)vq->used,
|
2010-03-17 14:08:17 +03:00
|
|
|
.log_guest_addr = vq->used_phys,
|
|
|
|
.flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0,
|
|
|
|
};
|
2015-10-09 18:17:28 +03:00
|
|
|
int r = dev->vhost_ops->vhost_set_vring_addr(dev, &addr);
|
2010-03-17 14:08:17 +03:00
|
|
|
if (r < 0) {
|
2016-07-27 00:15:06 +03:00
|
|
|
VHOST_OPS_DEBUG("vhost_set_vring_addr failed");
|
2010-03-17 14:08:17 +03:00
|
|
|
return -errno;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-01-11 07:32:12 +03:00
|
|
|
static int vhost_dev_set_features(struct vhost_dev *dev,
|
|
|
|
bool enable_log)
|
2010-03-17 14:08:17 +03:00
|
|
|
{
|
|
|
|
uint64_t features = dev->acked_features;
|
|
|
|
int r;
|
|
|
|
if (enable_log) {
|
2015-06-04 13:34:20 +03:00
|
|
|
features |= 0x1ULL << VHOST_F_LOG_ALL;
|
2010-03-17 14:08:17 +03:00
|
|
|
}
|
2015-10-09 18:17:28 +03:00
|
|
|
r = dev->vhost_ops->vhost_set_features(dev, features);
|
2016-07-27 00:15:06 +03:00
|
|
|
if (r < 0) {
|
|
|
|
VHOST_OPS_DEBUG("vhost_set_features failed");
|
|
|
|
}
|
2010-03-17 14:08:17 +03:00
|
|
|
return r < 0 ? -errno : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log)
|
|
|
|
{
|
2016-07-27 00:15:05 +03:00
|
|
|
int r, i, idx;
|
2010-03-17 14:08:17 +03:00
|
|
|
r = vhost_dev_set_features(dev, enable_log);
|
|
|
|
if (r < 0) {
|
|
|
|
goto err_features;
|
|
|
|
}
|
|
|
|
for (i = 0; i < dev->nvqs; ++i) {
|
2015-10-19 15:59:27 +03:00
|
|
|
idx = dev->vhost_ops->vhost_get_vq_index(dev, dev->vq_index + i);
|
|
|
|
r = vhost_virtqueue_set_addr(dev, dev->vqs + i, idx,
|
2010-03-17 14:08:17 +03:00
|
|
|
enable_log);
|
|
|
|
if (r < 0) {
|
|
|
|
goto err_vq;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
err_vq:
|
|
|
|
for (; i >= 0; --i) {
|
2015-10-19 15:59:27 +03:00
|
|
|
idx = dev->vhost_ops->vhost_get_vq_index(dev, dev->vq_index + i);
|
2016-07-27 00:15:05 +03:00
|
|
|
vhost_virtqueue_set_addr(dev, dev->vqs + i, idx,
|
|
|
|
dev->log_enabled);
|
2010-03-17 14:08:17 +03:00
|
|
|
}
|
2016-07-27 00:15:05 +03:00
|
|
|
vhost_dev_set_features(dev, dev->log_enabled);
|
2010-03-17 14:08:17 +03:00
|
|
|
err_features:
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2011-12-18 16:06:05 +04:00
|
|
|
static int vhost_migration_log(MemoryListener *listener, int enable)
|
2010-03-17 14:08:17 +03:00
|
|
|
{
|
2011-12-18 16:06:05 +04:00
|
|
|
struct vhost_dev *dev = container_of(listener, struct vhost_dev,
|
|
|
|
memory_listener);
|
2010-03-17 14:08:17 +03:00
|
|
|
int r;
|
|
|
|
if (!!enable == dev->log_enabled) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (!dev->started) {
|
|
|
|
dev->log_enabled = enable;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (!enable) {
|
|
|
|
r = vhost_dev_set_log(dev, false);
|
|
|
|
if (r < 0) {
|
|
|
|
return r;
|
|
|
|
}
|
2015-06-04 12:28:46 +03:00
|
|
|
vhost_log_put(dev, false);
|
2010-03-17 14:08:17 +03:00
|
|
|
} else {
|
|
|
|
vhost_dev_log_resize(dev, vhost_get_log_size(dev));
|
|
|
|
r = vhost_dev_set_log(dev, true);
|
|
|
|
if (r < 0) {
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
dev->log_enabled = enable;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-12-18 16:06:05 +04:00
|
|
|
static void vhost_log_global_start(MemoryListener *listener)
|
|
|
|
{
|
|
|
|
int r;
|
|
|
|
|
|
|
|
r = vhost_migration_log(listener, true);
|
|
|
|
if (r < 0) {
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vhost_log_global_stop(MemoryListener *listener)
|
|
|
|
{
|
|
|
|
int r;
|
|
|
|
|
|
|
|
r = vhost_migration_log(listener, false);
|
|
|
|
if (r < 0) {
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vhost_log_start(MemoryListener *listener,
|
2015-04-25 15:38:30 +03:00
|
|
|
MemoryRegionSection *section,
|
|
|
|
int old, int new)
|
2011-12-18 16:06:05 +04:00
|
|
|
{
|
|
|
|
/* FIXME: implement */
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vhost_log_stop(MemoryListener *listener,
|
2015-04-25 15:38:30 +03:00
|
|
|
MemoryRegionSection *section,
|
|
|
|
int old, int new)
|
2011-12-18 16:06:05 +04:00
|
|
|
{
|
|
|
|
/* FIXME: implement */
|
|
|
|
}
|
|
|
|
|
2016-02-05 13:46:04 +03:00
|
|
|
/* The vhost driver natively knows how to handle the vrings of non
|
|
|
|
* cross-endian legacy devices and modern devices. Only legacy devices
|
|
|
|
* exposed to a bi-endian guest may require the vhost driver to use a
|
|
|
|
* specific endianness.
|
|
|
|
*/
|
2016-02-05 13:45:40 +03:00
|
|
|
static inline bool vhost_needs_vring_endian(VirtIODevice *vdev)
|
|
|
|
{
|
2016-02-05 13:45:49 +03:00
|
|
|
if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
|
|
|
|
return false;
|
|
|
|
}
|
2016-02-05 13:45:40 +03:00
|
|
|
#ifdef HOST_WORDS_BIGENDIAN
|
2016-02-05 13:46:04 +03:00
|
|
|
return vdev->device_endian == VIRTIO_DEVICE_ENDIAN_LITTLE;
|
2016-02-05 13:45:40 +03:00
|
|
|
#else
|
2016-02-05 13:46:04 +03:00
|
|
|
return vdev->device_endian == VIRTIO_DEVICE_ENDIAN_BIG;
|
2016-02-05 13:45:40 +03:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2015-06-17 16:23:39 +03:00
|
|
|
static int vhost_virtqueue_set_vring_endian_legacy(struct vhost_dev *dev,
|
|
|
|
bool is_big_endian,
|
|
|
|
int vhost_vq_index)
|
|
|
|
{
|
|
|
|
struct vhost_vring_state s = {
|
|
|
|
.index = vhost_vq_index,
|
|
|
|
.num = is_big_endian
|
|
|
|
};
|
|
|
|
|
2015-10-09 18:17:28 +03:00
|
|
|
if (!dev->vhost_ops->vhost_set_vring_endian(dev, &s)) {
|
2015-06-17 16:23:39 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-07-27 00:15:06 +03:00
|
|
|
VHOST_OPS_DEBUG("vhost_set_vring_endian failed");
|
2015-06-17 16:23:39 +03:00
|
|
|
if (errno == ENOTTY) {
|
|
|
|
error_report("vhost does not support cross-endian");
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
|
|
|
|
return -errno;
|
|
|
|
}
|
|
|
|
|
2017-01-11 07:32:12 +03:00
|
|
|
static int vhost_memory_region_lookup(struct vhost_dev *hdev,
|
|
|
|
uint64_t gpa, uint64_t *uaddr,
|
|
|
|
uint64_t *len)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < hdev->mem->nregions; i++) {
|
|
|
|
struct vhost_memory_region *reg = hdev->mem->regions + i;
|
|
|
|
|
|
|
|
if (gpa >= reg->guest_phys_addr &&
|
|
|
|
reg->guest_phys_addr + reg->memory_size > gpa) {
|
|
|
|
*uaddr = reg->userspace_addr + gpa - reg->guest_phys_addr;
|
|
|
|
*len = reg->guest_phys_addr + reg->memory_size - gpa;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
|
2017-06-02 13:18:27 +03:00
|
|
|
int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write)
|
2017-01-11 07:32:12 +03:00
|
|
|
{
|
|
|
|
IOMMUTLBEntry iotlb;
|
|
|
|
uint64_t uaddr, len;
|
2017-06-02 13:18:27 +03:00
|
|
|
int ret = -EFAULT;
|
2017-01-11 07:32:12 +03:00
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
|
|
|
|
iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as,
|
|
|
|
iova, write);
|
|
|
|
if (iotlb.target_as != NULL) {
|
2017-06-02 13:18:27 +03:00
|
|
|
ret = vhost_memory_region_lookup(dev, iotlb.translated_addr,
|
|
|
|
&uaddr, &len);
|
|
|
|
if (ret) {
|
2017-01-11 07:32:12 +03:00
|
|
|
error_report("Fail to lookup the translated address "
|
|
|
|
"%"PRIx64, iotlb.translated_addr);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
len = MIN(iotlb.addr_mask + 1, len);
|
|
|
|
iova = iova & ~iotlb.addr_mask;
|
|
|
|
|
2017-06-02 13:18:28 +03:00
|
|
|
ret = vhost_backend_update_device_iotlb(dev, iova, uaddr,
|
|
|
|
len, iotlb.perm);
|
2017-06-02 13:18:27 +03:00
|
|
|
if (ret) {
|
2017-01-11 07:32:12 +03:00
|
|
|
error_report("Fail to update device iotlb");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
out:
|
|
|
|
rcu_read_unlock();
|
2017-06-02 13:18:27 +03:00
|
|
|
|
|
|
|
return ret;
|
2017-01-11 07:32:12 +03:00
|
|
|
}
|
|
|
|
|
2012-12-24 19:37:01 +04:00
|
|
|
static int vhost_virtqueue_start(struct vhost_dev *dev,
|
2010-03-17 14:08:17 +03:00
|
|
|
struct VirtIODevice *vdev,
|
|
|
|
struct vhost_virtqueue *vq,
|
|
|
|
unsigned idx)
|
|
|
|
{
|
2016-08-01 11:07:58 +03:00
|
|
|
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
|
|
|
|
VirtioBusState *vbus = VIRTIO_BUS(qbus);
|
|
|
|
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
|
2012-10-23 14:30:10 +04:00
|
|
|
hwaddr s, l, a;
|
2010-03-17 14:08:17 +03:00
|
|
|
int r;
|
2015-10-09 18:17:28 +03:00
|
|
|
int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx);
|
2010-03-17 14:08:17 +03:00
|
|
|
struct vhost_vring_file file = {
|
2013-01-30 15:12:35 +04:00
|
|
|
.index = vhost_vq_index
|
2010-03-17 14:08:17 +03:00
|
|
|
};
|
|
|
|
struct vhost_vring_state state = {
|
2013-01-30 15:12:35 +04:00
|
|
|
.index = vhost_vq_index
|
2010-03-17 14:08:17 +03:00
|
|
|
};
|
|
|
|
struct VirtQueue *vvq = virtio_get_queue(vdev, idx);
|
|
|
|
|
2013-01-30 15:12:35 +04:00
|
|
|
|
2010-03-17 14:08:17 +03:00
|
|
|
vq->num = state.num = virtio_queue_get_num(vdev, idx);
|
2015-10-09 18:17:28 +03:00
|
|
|
r = dev->vhost_ops->vhost_set_vring_num(dev, &state);
|
2010-03-17 14:08:17 +03:00
|
|
|
if (r) {
|
2016-07-27 00:15:06 +03:00
|
|
|
VHOST_OPS_DEBUG("vhost_set_vring_num failed");
|
2010-03-17 14:08:17 +03:00
|
|
|
return -errno;
|
|
|
|
}
|
|
|
|
|
|
|
|
state.num = virtio_queue_get_last_avail_idx(vdev, idx);
|
2015-10-09 18:17:28 +03:00
|
|
|
r = dev->vhost_ops->vhost_set_vring_base(dev, &state);
|
2010-03-17 14:08:17 +03:00
|
|
|
if (r) {
|
2016-07-27 00:15:06 +03:00
|
|
|
VHOST_OPS_DEBUG("vhost_set_vring_base failed");
|
2010-03-17 14:08:17 +03:00
|
|
|
return -errno;
|
|
|
|
}
|
|
|
|
|
2016-02-05 13:45:49 +03:00
|
|
|
if (vhost_needs_vring_endian(vdev)) {
|
2015-06-17 16:23:39 +03:00
|
|
|
r = vhost_virtqueue_set_vring_endian_legacy(dev,
|
|
|
|
virtio_is_big_endian(vdev),
|
|
|
|
vhost_vq_index);
|
|
|
|
if (r) {
|
|
|
|
return -errno;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-11-04 11:39:15 +03:00
|
|
|
vq->desc_size = s = l = virtio_queue_get_desc_size(vdev, idx);
|
|
|
|
vq->desc_phys = a = virtio_queue_get_desc_addr(vdev, idx);
|
2017-01-11 07:32:12 +03:00
|
|
|
vq->desc = vhost_memory_map(dev, a, &l, 0);
|
2010-03-17 14:08:17 +03:00
|
|
|
if (!vq->desc || l != s) {
|
|
|
|
r = -ENOMEM;
|
|
|
|
goto fail_alloc_desc;
|
|
|
|
}
|
2016-11-04 11:39:15 +03:00
|
|
|
vq->avail_size = s = l = virtio_queue_get_avail_size(vdev, idx);
|
|
|
|
vq->avail_phys = a = virtio_queue_get_avail_addr(vdev, idx);
|
2017-01-11 07:32:12 +03:00
|
|
|
vq->avail = vhost_memory_map(dev, a, &l, 0);
|
2010-03-17 14:08:17 +03:00
|
|
|
if (!vq->avail || l != s) {
|
|
|
|
r = -ENOMEM;
|
|
|
|
goto fail_alloc_avail;
|
|
|
|
}
|
|
|
|
vq->used_size = s = l = virtio_queue_get_used_size(vdev, idx);
|
|
|
|
vq->used_phys = a = virtio_queue_get_used_addr(vdev, idx);
|
2017-01-11 07:32:12 +03:00
|
|
|
vq->used = vhost_memory_map(dev, a, &l, 1);
|
2010-03-17 14:08:17 +03:00
|
|
|
if (!vq->used || l != s) {
|
|
|
|
r = -ENOMEM;
|
|
|
|
goto fail_alloc_used;
|
|
|
|
}
|
|
|
|
|
2013-01-30 15:12:35 +04:00
|
|
|
r = vhost_virtqueue_set_addr(dev, vq, vhost_vq_index, dev->log_enabled);
|
2010-03-17 14:08:17 +03:00
|
|
|
if (r < 0) {
|
|
|
|
r = -errno;
|
|
|
|
goto fail_alloc;
|
|
|
|
}
|
2013-01-30 15:12:35 +04:00
|
|
|
|
2010-03-17 14:08:17 +03:00
|
|
|
file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq));
|
2015-10-09 18:17:28 +03:00
|
|
|
r = dev->vhost_ops->vhost_set_vring_kick(dev, &file);
|
2010-03-17 14:08:17 +03:00
|
|
|
if (r) {
|
2016-07-27 00:15:06 +03:00
|
|
|
VHOST_OPS_DEBUG("vhost_set_vring_kick failed");
|
2010-10-06 17:20:28 +04:00
|
|
|
r = -errno;
|
2010-03-17 14:08:17 +03:00
|
|
|
goto fail_kick;
|
|
|
|
}
|
|
|
|
|
2012-12-24 19:37:01 +04:00
|
|
|
/* Clear and discard previous events if any. */
|
|
|
|
event_notifier_test_and_clear(&vq->masked_notifier);
|
2010-03-17 14:08:17 +03:00
|
|
|
|
2016-02-18 17:12:23 +03:00
|
|
|
/* Init vring in unmasked state, unless guest_notifier_mask
|
|
|
|
* will do it later.
|
|
|
|
*/
|
|
|
|
if (!vdev->use_guest_notifier_mask) {
|
|
|
|
/* TODO: check and handle errors. */
|
|
|
|
vhost_virtqueue_mask(dev, vdev, idx, false);
|
|
|
|
}
|
|
|
|
|
2016-08-01 11:07:58 +03:00
|
|
|
if (k->query_guest_notifiers &&
|
|
|
|
k->query_guest_notifiers(qbus->parent) &&
|
|
|
|
virtio_queue_vector(vdev, idx) == VIRTIO_NO_VECTOR) {
|
|
|
|
file.fd = -1;
|
|
|
|
r = dev->vhost_ops->vhost_set_vring_call(dev, &file);
|
|
|
|
if (r) {
|
|
|
|
goto fail_vector;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-03-17 14:08:17 +03:00
|
|
|
return 0;
|
|
|
|
|
2016-08-01 11:07:58 +03:00
|
|
|
fail_vector:
|
2010-03-17 14:08:17 +03:00
|
|
|
fail_kick:
|
|
|
|
fail_alloc:
|
2017-01-11 07:32:12 +03:00
|
|
|
vhost_memory_unmap(dev, vq->used, virtio_queue_get_used_size(vdev, idx),
|
|
|
|
0, 0);
|
2010-03-17 14:08:17 +03:00
|
|
|
fail_alloc_used:
|
2017-01-11 07:32:12 +03:00
|
|
|
vhost_memory_unmap(dev, vq->avail, virtio_queue_get_avail_size(vdev, idx),
|
|
|
|
0, 0);
|
2010-03-17 14:08:17 +03:00
|
|
|
fail_alloc_avail:
|
2017-01-11 07:32:12 +03:00
|
|
|
vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx),
|
|
|
|
0, 0);
|
2010-03-17 14:08:17 +03:00
|
|
|
fail_alloc_desc:
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2012-12-24 19:37:01 +04:00
|
|
|
static void vhost_virtqueue_stop(struct vhost_dev *dev,
|
2010-03-17 14:08:17 +03:00
|
|
|
struct VirtIODevice *vdev,
|
|
|
|
struct vhost_virtqueue *vq,
|
|
|
|
unsigned idx)
|
|
|
|
{
|
2015-10-09 18:17:28 +03:00
|
|
|
int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx);
|
2010-03-17 14:08:17 +03:00
|
|
|
struct vhost_vring_state state = {
|
2015-06-17 16:23:39 +03:00
|
|
|
.index = vhost_vq_index,
|
2010-03-17 14:08:17 +03:00
|
|
|
};
|
|
|
|
int r;
|
2015-09-23 07:19:59 +03:00
|
|
|
|
2015-10-09 18:17:28 +03:00
|
|
|
r = dev->vhost_ops->vhost_get_vring_base(dev, &state);
|
2010-03-17 14:08:17 +03:00
|
|
|
if (r < 0) {
|
2016-07-27 00:15:06 +03:00
|
|
|
VHOST_OPS_DEBUG("vhost VQ %d ring restore failed: %d", idx, r);
|
2017-11-16 21:48:35 +03:00
|
|
|
/* Connection to the backend is broken, so let's sync internal
|
|
|
|
* last avail idx to the device used idx.
|
|
|
|
*/
|
|
|
|
virtio_queue_restore_last_avail_idx(vdev, idx);
|
2016-07-27 00:15:27 +03:00
|
|
|
} else {
|
|
|
|
virtio_queue_set_last_avail_idx(vdev, idx, state.num);
|
2010-03-17 14:08:17 +03:00
|
|
|
}
|
2013-08-12 13:21:36 +04:00
|
|
|
virtio_queue_invalidate_signalled_used(vdev, idx);
|
2016-12-13 11:12:06 +03:00
|
|
|
virtio_queue_update_used_idx(vdev, idx);
|
2015-06-17 16:23:39 +03:00
|
|
|
|
|
|
|
/* In the cross-endian case, we need to reset the vring endianness to
|
|
|
|
* native as legacy devices expect so by default.
|
|
|
|
*/
|
2016-02-05 13:45:49 +03:00
|
|
|
if (vhost_needs_vring_endian(vdev)) {
|
2016-07-27 00:15:05 +03:00
|
|
|
vhost_virtqueue_set_vring_endian_legacy(dev,
|
|
|
|
!virtio_is_big_endian(vdev),
|
|
|
|
vhost_vq_index);
|
2015-06-17 16:23:39 +03:00
|
|
|
}
|
|
|
|
|
2017-01-11 07:32:12 +03:00
|
|
|
vhost_memory_unmap(dev, vq->used, virtio_queue_get_used_size(vdev, idx),
|
|
|
|
1, virtio_queue_get_used_size(vdev, idx));
|
|
|
|
vhost_memory_unmap(dev, vq->avail, virtio_queue_get_avail_size(vdev, idx),
|
|
|
|
0, virtio_queue_get_avail_size(vdev, idx));
|
|
|
|
vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx),
|
|
|
|
0, virtio_queue_get_desc_size(vdev, idx));
|
2010-03-17 14:08:17 +03:00
|
|
|
}
|
|
|
|
|
2012-02-08 18:39:06 +04:00
|
|
|
static void vhost_eventfd_add(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *section,
|
2012-07-05 19:16:27 +04:00
|
|
|
bool match_data, uint64_t data, EventNotifier *e)
|
2012-02-08 18:39:06 +04:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vhost_eventfd_del(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *section,
|
2012-07-05 19:16:27 +04:00
|
|
|
bool match_data, uint64_t data, EventNotifier *e)
|
2012-02-08 18:39:06 +04:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2016-07-06 04:57:55 +03:00
|
|
|
static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev,
|
|
|
|
int n, uint32_t timeout)
|
|
|
|
{
|
|
|
|
int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, n);
|
|
|
|
struct vhost_vring_state state = {
|
|
|
|
.index = vhost_vq_index,
|
|
|
|
.num = timeout,
|
|
|
|
};
|
|
|
|
int r;
|
|
|
|
|
|
|
|
if (!dev->vhost_ops->vhost_set_vring_busyloop_timeout) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
r = dev->vhost_ops->vhost_set_vring_busyloop_timeout(dev, &state);
|
|
|
|
if (r) {
|
2016-07-27 00:15:06 +03:00
|
|
|
VHOST_OPS_DEBUG("vhost_set_vring_busyloop_timeout failed");
|
2016-07-06 04:57:55 +03:00
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-12-24 19:37:01 +04:00
|
|
|
static int vhost_virtqueue_init(struct vhost_dev *dev,
|
|
|
|
struct vhost_virtqueue *vq, int n)
|
|
|
|
{
|
2015-10-09 18:17:28 +03:00
|
|
|
int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, n);
|
2012-12-24 19:37:01 +04:00
|
|
|
struct vhost_vring_file file = {
|
vhost-user: add multiple queue support
This patch is initially based a patch from Nikolay Nikolaev.
This patch adds vhost-user multiple queue support, by creating a nc
and vhost_net pair for each queue.
Qemu exits if find that the backend can't support the number of requested
queues (by providing queues=# option). The max number is queried by a
new message, VHOST_USER_GET_QUEUE_NUM, and is sent only when protocol
feature VHOST_USER_PROTOCOL_F_MQ is present first.
The max queue check is done at vhost-user initiation stage. We initiate
one queue first, which, in the meantime, also gets the max_queues the
backend supports.
In older version, it was reported that some messages are sent more times
than necessary. Here we came an agreement with Michael that we could
categorize vhost user messages to 2 types: non-vring specific messages,
which should be sent only once, and vring specific messages, which should
be sent per queue.
Here I introduced a helper function vhost_user_one_time_request(), which
lists following messages as non-vring specific messages:
VHOST_USER_SET_OWNER
VHOST_USER_RESET_DEVICE
VHOST_USER_SET_MEM_TABLE
VHOST_USER_GET_QUEUE_NUM
For above messages, we simply ignore them when they are not sent the first
time.
Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Reviewed-by: Jason Wang <jasowang@redhat.com>
Tested-by: Marcel Apfelbaum <marcel@redhat.com>
2015-09-23 07:20:00 +03:00
|
|
|
.index = vhost_vq_index,
|
2012-12-24 19:37:01 +04:00
|
|
|
};
|
|
|
|
int r = event_notifier_init(&vq->masked_notifier, 0);
|
|
|
|
if (r < 0) {
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
file.fd = event_notifier_get_fd(&vq->masked_notifier);
|
2015-10-09 18:17:28 +03:00
|
|
|
r = dev->vhost_ops->vhost_set_vring_call(dev, &file);
|
2012-12-24 19:37:01 +04:00
|
|
|
if (r) {
|
2016-07-27 00:15:06 +03:00
|
|
|
VHOST_OPS_DEBUG("vhost_set_vring_call failed");
|
2012-12-24 19:37:01 +04:00
|
|
|
r = -errno;
|
|
|
|
goto fail_call;
|
|
|
|
}
|
2017-01-11 07:32:12 +03:00
|
|
|
|
|
|
|
vq->dev = dev;
|
|
|
|
|
2012-12-24 19:37:01 +04:00
|
|
|
return 0;
|
|
|
|
fail_call:
|
|
|
|
event_notifier_cleanup(&vq->masked_notifier);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq)
|
|
|
|
{
|
|
|
|
event_notifier_cleanup(&vq->masked_notifier);
|
|
|
|
}
|
|
|
|
|
2014-05-27 16:05:22 +04:00
|
|
|
int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
|
2016-07-06 04:57:55 +03:00
|
|
|
VhostBackendType backend_type, uint32_t busyloop_timeout)
|
2010-03-17 14:08:17 +03:00
|
|
|
{
|
|
|
|
uint64_t features;
|
2016-07-27 00:15:04 +03:00
|
|
|
int i, r, n_initialized_vqs = 0;
|
2017-01-16 14:31:53 +03:00
|
|
|
Error *local_err = NULL;
|
2014-05-27 16:05:22 +04:00
|
|
|
|
2017-01-11 07:32:12 +03:00
|
|
|
hdev->vdev = NULL;
|
2015-10-09 18:17:27 +03:00
|
|
|
hdev->migration_blocker = NULL;
|
|
|
|
|
2016-07-27 00:14:58 +03:00
|
|
|
r = vhost_set_backend_type(hdev, backend_type);
|
|
|
|
assert(r >= 0);
|
2014-05-27 16:05:49 +04:00
|
|
|
|
2016-07-27 00:14:58 +03:00
|
|
|
r = hdev->vhost_ops->vhost_backend_init(hdev, opaque);
|
|
|
|
if (r < 0) {
|
|
|
|
goto fail;
|
2014-05-27 16:05:35 +04:00
|
|
|
}
|
|
|
|
|
2015-10-06 11:37:29 +03:00
|
|
|
if (used_memslots > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) {
|
2016-07-27 00:15:07 +03:00
|
|
|
error_report("vhost backend memory slots limit is less"
|
|
|
|
" than current number of present memory slots");
|
2016-07-27 00:14:58 +03:00
|
|
|
r = -1;
|
|
|
|
goto fail;
|
2015-10-06 11:37:29 +03:00
|
|
|
}
|
2015-10-06 11:37:27 +03:00
|
|
|
|
2015-10-09 18:17:28 +03:00
|
|
|
r = hdev->vhost_ops->vhost_set_owner(hdev);
|
2010-03-17 14:08:17 +03:00
|
|
|
if (r < 0) {
|
2016-07-27 00:15:06 +03:00
|
|
|
VHOST_OPS_DEBUG("vhost_set_owner failed");
|
2010-03-17 14:08:17 +03:00
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2015-10-09 18:17:28 +03:00
|
|
|
r = hdev->vhost_ops->vhost_get_features(hdev, &features);
|
2010-03-17 14:08:17 +03:00
|
|
|
if (r < 0) {
|
2016-07-27 00:15:06 +03:00
|
|
|
VHOST_OPS_DEBUG("vhost_get_features failed");
|
2010-03-17 14:08:17 +03:00
|
|
|
goto fail;
|
|
|
|
}
|
2012-12-24 19:37:01 +04:00
|
|
|
|
2016-07-27 00:15:04 +03:00
|
|
|
for (i = 0; i < hdev->nvqs; ++i, ++n_initialized_vqs) {
|
vhost-user: add multiple queue support
This patch is initially based a patch from Nikolay Nikolaev.
This patch adds vhost-user multiple queue support, by creating a nc
and vhost_net pair for each queue.
Qemu exits if find that the backend can't support the number of requested
queues (by providing queues=# option). The max number is queried by a
new message, VHOST_USER_GET_QUEUE_NUM, and is sent only when protocol
feature VHOST_USER_PROTOCOL_F_MQ is present first.
The max queue check is done at vhost-user initiation stage. We initiate
one queue first, which, in the meantime, also gets the max_queues the
backend supports.
In older version, it was reported that some messages are sent more times
than necessary. Here we came an agreement with Michael that we could
categorize vhost user messages to 2 types: non-vring specific messages,
which should be sent only once, and vring specific messages, which should
be sent per queue.
Here I introduced a helper function vhost_user_one_time_request(), which
lists following messages as non-vring specific messages:
VHOST_USER_SET_OWNER
VHOST_USER_RESET_DEVICE
VHOST_USER_SET_MEM_TABLE
VHOST_USER_GET_QUEUE_NUM
For above messages, we simply ignore them when they are not sent the first
time.
Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Reviewed-by: Jason Wang <jasowang@redhat.com>
Tested-by: Marcel Apfelbaum <marcel@redhat.com>
2015-09-23 07:20:00 +03:00
|
|
|
r = vhost_virtqueue_init(hdev, hdev->vqs + i, hdev->vq_index + i);
|
2012-12-24 19:37:01 +04:00
|
|
|
if (r < 0) {
|
2016-07-27 00:15:04 +03:00
|
|
|
goto fail;
|
2012-12-24 19:37:01 +04:00
|
|
|
}
|
|
|
|
}
|
2016-07-06 04:57:55 +03:00
|
|
|
|
|
|
|
if (busyloop_timeout) {
|
|
|
|
for (i = 0; i < hdev->nvqs; ++i) {
|
|
|
|
r = vhost_virtqueue_set_busyloop_timeout(hdev, hdev->vq_index + i,
|
|
|
|
busyloop_timeout);
|
|
|
|
if (r < 0) {
|
|
|
|
goto fail_busyloop;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-03-17 14:08:17 +03:00
|
|
|
hdev->features = features;
|
|
|
|
|
2011-12-18 16:06:05 +04:00
|
|
|
hdev->memory_listener = (MemoryListener) {
|
2012-02-08 23:36:02 +04:00
|
|
|
.begin = vhost_begin,
|
|
|
|
.commit = vhost_commit,
|
2011-12-18 16:06:05 +04:00
|
|
|
.region_add = vhost_region_add,
|
|
|
|
.region_del = vhost_region_del,
|
2012-02-08 23:36:02 +04:00
|
|
|
.region_nop = vhost_region_nop,
|
2011-12-18 16:06:05 +04:00
|
|
|
.log_start = vhost_log_start,
|
|
|
|
.log_stop = vhost_log_stop,
|
|
|
|
.log_sync = vhost_log_sync,
|
|
|
|
.log_global_start = vhost_log_global_start,
|
|
|
|
.log_global_stop = vhost_log_global_stop,
|
2012-02-08 18:39:06 +04:00
|
|
|
.eventfd_add = vhost_eventfd_add,
|
|
|
|
.eventfd_del = vhost_eventfd_del,
|
2012-02-08 17:05:50 +04:00
|
|
|
.priority = 10
|
2011-12-18 16:06:05 +04:00
|
|
|
};
|
2015-10-09 18:17:27 +03:00
|
|
|
|
2017-03-29 07:10:04 +03:00
|
|
|
hdev->iommu_listener = (MemoryListener) {
|
|
|
|
.region_add = vhost_iommu_region_add,
|
|
|
|
.region_del = vhost_iommu_region_del,
|
|
|
|
};
|
2017-01-11 07:32:12 +03:00
|
|
|
|
2015-10-09 18:17:27 +03:00
|
|
|
if (hdev->migration_blocker == NULL) {
|
|
|
|
if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) {
|
|
|
|
error_setg(&hdev->migration_blocker,
|
|
|
|
"Migration disabled: vhost lacks VHOST_F_LOG_ALL feature.");
|
2016-10-24 18:35:03 +03:00
|
|
|
} else if (vhost_dev_log_is_shared(hdev) && !qemu_memfd_check()) {
|
2015-10-09 18:17:34 +03:00
|
|
|
error_setg(&hdev->migration_blocker,
|
|
|
|
"Migration disabled: failed to allocate shared memory");
|
2015-10-09 18:17:27 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (hdev->migration_blocker != NULL) {
|
2017-01-16 14:31:53 +03:00
|
|
|
r = migrate_add_blocker(hdev->migration_blocker, &local_err);
|
|
|
|
if (local_err) {
|
|
|
|
error_report_err(local_err);
|
|
|
|
error_free(hdev->migration_blocker);
|
|
|
|
goto fail_busyloop;
|
|
|
|
}
|
2014-06-18 18:20:42 +04:00
|
|
|
}
|
2015-10-09 18:17:27 +03:00
|
|
|
|
2011-08-21 07:09:37 +04:00
|
|
|
hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions));
|
2011-12-19 15:18:13 +04:00
|
|
|
hdev->n_mem_sections = 0;
|
|
|
|
hdev->mem_sections = NULL;
|
2010-03-17 14:08:17 +03:00
|
|
|
hdev->log = NULL;
|
|
|
|
hdev->log_size = 0;
|
|
|
|
hdev->log_enabled = false;
|
|
|
|
hdev->started = false;
|
2013-04-03 13:15:11 +04:00
|
|
|
hdev->memory_changed = false;
|
2012-10-02 22:13:51 +04:00
|
|
|
memory_listener_register(&hdev->memory_listener, &address_space_memory);
|
2016-07-27 00:15:01 +03:00
|
|
|
QLIST_INSERT_HEAD(&vhost_devices, hdev, entry);
|
2010-03-17 14:08:17 +03:00
|
|
|
return 0;
|
2016-07-27 00:15:04 +03:00
|
|
|
|
2016-07-06 04:57:55 +03:00
|
|
|
fail_busyloop:
|
|
|
|
while (--i >= 0) {
|
|
|
|
vhost_virtqueue_set_busyloop_timeout(hdev, hdev->vq_index + i, 0);
|
|
|
|
}
|
2010-03-17 14:08:17 +03:00
|
|
|
fail:
|
2016-07-27 00:15:04 +03:00
|
|
|
hdev->nvqs = n_initialized_vqs;
|
|
|
|
vhost_dev_cleanup(hdev);
|
2010-03-17 14:08:17 +03:00
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
void vhost_dev_cleanup(struct vhost_dev *hdev)
|
|
|
|
{
|
2012-12-24 19:37:01 +04:00
|
|
|
int i;
|
2016-07-27 00:15:02 +03:00
|
|
|
|
2012-12-24 19:37:01 +04:00
|
|
|
for (i = 0; i < hdev->nvqs; ++i) {
|
|
|
|
vhost_virtqueue_cleanup(hdev->vqs + i);
|
|
|
|
}
|
2016-07-27 00:15:01 +03:00
|
|
|
if (hdev->mem) {
|
|
|
|
/* those are only safe after successful init */
|
|
|
|
memory_listener_unregister(&hdev->memory_listener);
|
2017-09-07 23:27:09 +03:00
|
|
|
for (i = 0; i < hdev->n_mem_sections; ++i) {
|
|
|
|
MemoryRegionSection *section = &hdev->mem_sections[i];
|
|
|
|
memory_region_unref(section->mr);
|
|
|
|
}
|
2016-07-27 00:15:01 +03:00
|
|
|
QLIST_REMOVE(hdev, entry);
|
|
|
|
}
|
2014-06-18 18:20:42 +04:00
|
|
|
if (hdev->migration_blocker) {
|
|
|
|
migrate_del_blocker(hdev->migration_blocker);
|
|
|
|
error_free(hdev->migration_blocker);
|
|
|
|
}
|
2011-08-21 07:09:37 +04:00
|
|
|
g_free(hdev->mem);
|
2011-12-19 15:18:13 +04:00
|
|
|
g_free(hdev->mem_sections);
|
2016-07-27 00:15:02 +03:00
|
|
|
if (hdev->vhost_ops) {
|
|
|
|
hdev->vhost_ops->vhost_backend_cleanup(hdev);
|
|
|
|
}
|
2016-07-27 00:15:00 +03:00
|
|
|
assert(!hdev->log);
|
2016-07-27 00:15:02 +03:00
|
|
|
|
|
|
|
memset(hdev, 0, sizeof(struct vhost_dev));
|
2010-03-17 14:08:17 +03:00
|
|
|
}
|
|
|
|
|
2011-08-11 11:21:18 +04:00
|
|
|
/* Stop processing guest IO notifications in qemu.
|
|
|
|
* Start processing them in vhost in kernel.
|
|
|
|
*/
|
|
|
|
int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
|
|
|
|
{
|
2013-04-24 12:21:21 +04:00
|
|
|
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
|
2015-05-29 09:13:14 +03:00
|
|
|
int i, r, e;
|
2016-07-27 00:15:07 +03:00
|
|
|
|
2016-11-18 18:07:00 +03:00
|
|
|
/* We will pass the notifiers to the kernel, make sure that QEMU
|
|
|
|
* doesn't interfere.
|
|
|
|
*/
|
|
|
|
r = virtio_device_grab_ioeventfd(vdev);
|
|
|
|
if (r < 0) {
|
2016-07-27 00:15:07 +03:00
|
|
|
error_report("binding does not support host notifiers");
|
2011-08-11 11:21:18 +04:00
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < hdev->nvqs; ++i) {
|
2016-06-10 12:04:10 +03:00
|
|
|
r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i,
|
|
|
|
true);
|
2011-08-11 11:21:18 +04:00
|
|
|
if (r < 0) {
|
2016-07-27 00:15:07 +03:00
|
|
|
error_report("vhost VQ %d notifier binding failed: %d", i, -r);
|
2011-08-11 11:21:18 +04:00
|
|
|
goto fail_vq;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
fail_vq:
|
|
|
|
while (--i >= 0) {
|
2016-06-10 12:04:10 +03:00
|
|
|
e = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i,
|
|
|
|
false);
|
2015-05-29 09:13:14 +03:00
|
|
|
if (e < 0) {
|
2016-07-27 00:15:07 +03:00
|
|
|
error_report("vhost VQ %d notifier cleanup error: %d", i, -r);
|
2011-08-11 11:21:18 +04:00
|
|
|
}
|
2015-05-29 09:13:14 +03:00
|
|
|
assert (e >= 0);
|
2011-08-11 11:21:18 +04:00
|
|
|
}
|
2016-11-18 18:07:00 +03:00
|
|
|
virtio_device_release_ioeventfd(vdev);
|
2011-08-11 11:21:18 +04:00
|
|
|
fail:
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Stop processing guest IO notifications in vhost.
|
|
|
|
* Start processing them in qemu.
|
|
|
|
* This might actually run the qemu handlers right away,
|
|
|
|
* so virtio in qemu must be completely setup when this is called.
|
|
|
|
*/
|
|
|
|
void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
|
|
|
|
{
|
2013-04-24 12:21:21 +04:00
|
|
|
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
|
2011-08-11 11:21:18 +04:00
|
|
|
int i, r;
|
|
|
|
|
|
|
|
for (i = 0; i < hdev->nvqs; ++i) {
|
2016-06-10 12:04:10 +03:00
|
|
|
r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i,
|
|
|
|
false);
|
2011-08-11 11:21:18 +04:00
|
|
|
if (r < 0) {
|
2016-07-27 00:15:07 +03:00
|
|
|
error_report("vhost VQ %d notifier cleanup failed: %d", i, -r);
|
2011-08-11 11:21:18 +04:00
|
|
|
}
|
|
|
|
assert (r >= 0);
|
|
|
|
}
|
2016-11-18 18:07:00 +03:00
|
|
|
virtio_device_release_ioeventfd(vdev);
|
2011-08-11 11:21:18 +04:00
|
|
|
}
|
|
|
|
|
2012-12-24 19:37:01 +04:00
|
|
|
/* Test and clear event pending status.
|
|
|
|
* Should be called after unmask to avoid losing events.
|
|
|
|
*/
|
|
|
|
bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n)
|
|
|
|
{
|
2013-01-30 15:12:35 +04:00
|
|
|
struct vhost_virtqueue *vq = hdev->vqs + n - hdev->vq_index;
|
|
|
|
assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs);
|
2012-12-24 19:37:01 +04:00
|
|
|
return event_notifier_test_and_clear(&vq->masked_notifier);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Mask/unmask events from this vq. */
|
|
|
|
void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
|
|
|
|
bool mask)
|
|
|
|
{
|
|
|
|
struct VirtQueue *vvq = virtio_get_queue(vdev, n);
|
2013-01-30 15:12:35 +04:00
|
|
|
int r, index = n - hdev->vq_index;
|
2015-09-23 07:19:59 +03:00
|
|
|
struct vhost_vring_file file;
|
2012-12-24 19:37:01 +04:00
|
|
|
|
2016-07-27 00:15:16 +03:00
|
|
|
/* should only be called after backend is connected */
|
|
|
|
assert(hdev->vhost_ops);
|
|
|
|
|
2012-12-24 19:37:01 +04:00
|
|
|
if (mask) {
|
2016-02-18 17:12:23 +03:00
|
|
|
assert(vdev->use_guest_notifier_mask);
|
2013-01-30 15:12:35 +04:00
|
|
|
file.fd = event_notifier_get_fd(&hdev->vqs[index].masked_notifier);
|
2012-12-24 19:37:01 +04:00
|
|
|
} else {
|
|
|
|
file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
|
|
|
|
}
|
2015-09-23 07:19:59 +03:00
|
|
|
|
2015-10-09 18:17:28 +03:00
|
|
|
file.index = hdev->vhost_ops->vhost_get_vq_index(hdev, n);
|
|
|
|
r = hdev->vhost_ops->vhost_set_vring_call(hdev, &file);
|
2016-07-27 00:15:05 +03:00
|
|
|
if (r < 0) {
|
|
|
|
VHOST_OPS_DEBUG("vhost_set_vring_call failed");
|
|
|
|
}
|
2012-12-24 19:37:01 +04:00
|
|
|
}
|
|
|
|
|
2015-06-04 13:34:20 +03:00
|
|
|
uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
|
|
|
|
uint64_t features)
|
2014-05-27 16:04:42 +04:00
|
|
|
{
|
|
|
|
const int *bit = feature_bits;
|
|
|
|
while (*bit != VHOST_INVALID_FEATURE_BIT) {
|
2015-06-04 13:34:20 +03:00
|
|
|
uint64_t bit_mask = (1ULL << *bit);
|
2014-05-27 16:04:42 +04:00
|
|
|
if (!(hdev->features & bit_mask)) {
|
|
|
|
features &= ~bit_mask;
|
|
|
|
}
|
|
|
|
bit++;
|
|
|
|
}
|
|
|
|
return features;
|
|
|
|
}
|
|
|
|
|
|
|
|
void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits,
|
2015-06-04 13:34:20 +03:00
|
|
|
uint64_t features)
|
2014-05-27 16:04:42 +04:00
|
|
|
{
|
|
|
|
const int *bit = feature_bits;
|
|
|
|
while (*bit != VHOST_INVALID_FEATURE_BIT) {
|
2015-06-04 13:34:20 +03:00
|
|
|
uint64_t bit_mask = (1ULL << *bit);
|
2014-05-27 16:04:42 +04:00
|
|
|
if (features & bit_mask) {
|
|
|
|
hdev->acked_features |= bit_mask;
|
|
|
|
}
|
|
|
|
bit++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-08-11 11:21:18 +04:00
|
|
|
/* Host notifiers must be enabled at this point. */
|
2010-03-17 14:08:17 +03:00
|
|
|
int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
|
|
|
|
{
|
|
|
|
int i, r;
|
2012-12-25 19:41:07 +04:00
|
|
|
|
2016-07-27 00:15:16 +03:00
|
|
|
/* should only be called after backend is connected */
|
|
|
|
assert(hdev->vhost_ops);
|
|
|
|
|
2012-12-25 19:41:07 +04:00
|
|
|
hdev->started = true;
|
2017-01-11 07:32:12 +03:00
|
|
|
hdev->vdev = vdev;
|
2012-12-25 19:41:07 +04:00
|
|
|
|
2010-03-17 14:08:17 +03:00
|
|
|
r = vhost_dev_set_features(hdev, hdev->log_enabled);
|
|
|
|
if (r < 0) {
|
2010-10-06 17:20:17 +04:00
|
|
|
goto fail_features;
|
2010-03-17 14:08:17 +03:00
|
|
|
}
|
2017-01-11 07:32:12 +03:00
|
|
|
|
|
|
|
if (vhost_dev_has_iommu(hdev)) {
|
2017-03-29 07:10:04 +03:00
|
|
|
memory_listener_register(&hdev->iommu_listener, vdev->dma_as);
|
2017-01-11 07:32:12 +03:00
|
|
|
}
|
|
|
|
|
2015-10-09 18:17:28 +03:00
|
|
|
r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem);
|
2010-03-17 14:08:17 +03:00
|
|
|
if (r < 0) {
|
2016-07-27 00:15:06 +03:00
|
|
|
VHOST_OPS_DEBUG("vhost_set_mem_table failed");
|
2010-03-17 14:08:17 +03:00
|
|
|
r = -errno;
|
2010-10-06 17:20:17 +04:00
|
|
|
goto fail_mem;
|
2010-03-17 14:08:17 +03:00
|
|
|
}
|
2010-07-16 18:11:46 +04:00
|
|
|
for (i = 0; i < hdev->nvqs; ++i) {
|
2012-12-24 19:37:01 +04:00
|
|
|
r = vhost_virtqueue_start(hdev,
|
2013-01-30 15:12:35 +04:00
|
|
|
vdev,
|
|
|
|
hdev->vqs + i,
|
|
|
|
hdev->vq_index + i);
|
2010-07-16 18:11:46 +04:00
|
|
|
if (r < 0) {
|
|
|
|
goto fail_vq;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-03-17 14:08:17 +03:00
|
|
|
if (hdev->log_enabled) {
|
2015-04-17 18:13:24 +03:00
|
|
|
uint64_t log_base;
|
|
|
|
|
2010-03-17 14:08:17 +03:00
|
|
|
hdev->log_size = vhost_get_log_size(hdev);
|
2015-10-09 18:17:25 +03:00
|
|
|
hdev->log = vhost_log_get(hdev->log_size,
|
|
|
|
vhost_dev_log_is_shared(hdev));
|
2015-06-04 12:28:46 +03:00
|
|
|
log_base = (uintptr_t)hdev->log->log;
|
2015-10-09 18:17:23 +03:00
|
|
|
r = hdev->vhost_ops->vhost_set_log_base(hdev,
|
2015-10-09 18:17:26 +03:00
|
|
|
hdev->log_size ? log_base : 0,
|
|
|
|
hdev->log);
|
2010-03-17 14:08:17 +03:00
|
|
|
if (r < 0) {
|
2016-07-27 00:15:06 +03:00
|
|
|
VHOST_OPS_DEBUG("vhost_set_log_base failed");
|
2010-03-17 14:08:17 +03:00
|
|
|
r = -errno;
|
2010-10-06 17:20:17 +04:00
|
|
|
goto fail_log;
|
2010-03-17 14:08:17 +03:00
|
|
|
}
|
|
|
|
}
|
2010-07-16 18:11:46 +04:00
|
|
|
|
2017-01-11 07:32:12 +03:00
|
|
|
if (vhost_dev_has_iommu(hdev)) {
|
|
|
|
hdev->vhost_ops->vhost_set_iotlb_callback(hdev, true);
|
|
|
|
|
|
|
|
/* Update used ring information for IOTLB to work correctly,
|
|
|
|
* vhost-kernel code requires for this.*/
|
|
|
|
for (i = 0; i < hdev->nvqs; ++i) {
|
|
|
|
struct vhost_virtqueue *vq = hdev->vqs + i;
|
|
|
|
vhost_device_iotlb_miss(hdev, vq->used_phys, true);
|
|
|
|
}
|
|
|
|
}
|
2010-03-17 14:08:17 +03:00
|
|
|
return 0;
|
2010-10-06 17:20:17 +04:00
|
|
|
fail_log:
|
2015-06-05 06:05:58 +03:00
|
|
|
vhost_log_put(hdev, false);
|
2010-03-17 14:08:17 +03:00
|
|
|
fail_vq:
|
|
|
|
while (--i >= 0) {
|
2012-12-24 19:37:01 +04:00
|
|
|
vhost_virtqueue_stop(hdev,
|
2013-01-30 15:12:35 +04:00
|
|
|
vdev,
|
|
|
|
hdev->vqs + i,
|
|
|
|
hdev->vq_index + i);
|
2010-03-17 14:08:17 +03:00
|
|
|
}
|
2013-01-30 15:12:35 +04:00
|
|
|
i = hdev->nvqs;
|
2017-01-11 07:32:12 +03:00
|
|
|
|
2010-10-06 17:20:17 +04:00
|
|
|
fail_mem:
|
|
|
|
fail_features:
|
2012-12-25 19:41:07 +04:00
|
|
|
|
|
|
|
hdev->started = false;
|
2010-03-17 14:08:17 +03:00
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2011-08-11 11:21:18 +04:00
|
|
|
/* Host notifiers must be enabled at this point. */
|
2010-03-17 14:08:17 +03:00
|
|
|
void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
|
|
|
|
{
|
2013-01-30 15:12:35 +04:00
|
|
|
int i;
|
2010-10-06 17:20:17 +04:00
|
|
|
|
2016-07-27 00:15:16 +03:00
|
|
|
/* should only be called after backend is connected */
|
|
|
|
assert(hdev->vhost_ops);
|
|
|
|
|
2010-03-17 14:08:17 +03:00
|
|
|
for (i = 0; i < hdev->nvqs; ++i) {
|
2012-12-24 19:37:01 +04:00
|
|
|
vhost_virtqueue_stop(hdev,
|
2013-01-30 15:12:35 +04:00
|
|
|
vdev,
|
|
|
|
hdev->vqs + i,
|
|
|
|
hdev->vq_index + i);
|
2010-03-17 14:08:17 +03:00
|
|
|
}
|
2010-10-06 17:20:17 +04:00
|
|
|
|
2017-01-11 07:32:12 +03:00
|
|
|
if (vhost_dev_has_iommu(hdev)) {
|
|
|
|
hdev->vhost_ops->vhost_set_iotlb_callback(hdev, false);
|
2017-03-29 07:10:04 +03:00
|
|
|
memory_listener_unregister(&hdev->iommu_listener);
|
2017-01-11 07:32:12 +03:00
|
|
|
}
|
2015-06-04 12:28:46 +03:00
|
|
|
vhost_log_put(hdev, true);
|
2010-03-17 14:08:17 +03:00
|
|
|
hdev->started = false;
|
2017-01-11 07:32:12 +03:00
|
|
|
hdev->vdev = NULL;
|
2010-03-17 14:08:17 +03:00
|
|
|
}
|
2016-07-27 00:15:25 +03:00
|
|
|
|
|
|
|
int vhost_net_set_backend(struct vhost_dev *hdev,
|
|
|
|
struct vhost_vring_file *file)
|
|
|
|
{
|
|
|
|
if (hdev->vhost_ops->vhost_net_set_backend) {
|
|
|
|
return hdev->vhost_ops->vhost_net_set_backend(hdev, file);
|
|
|
|
}
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
}
|