diff --git a/MAINTAINERS b/MAINTAINERS index 36d94c17a6..df1786db32 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1639,6 +1639,12 @@ F: hw/input/virtio-input*.c F: include/hw/virtio/virtio-input.h F: contrib/vhost-user-input/* +virtio-iommu +M: Eric Auger +S: Maintained +F: hw/virtio/virtio-iommu*.c +F: include/hw/virtio/virtio-iommu.h + virtio-serial M: Laurent Vivier R: Amit Shah diff --git a/contrib/libvhost-user/libvhost-user-glib.c b/contrib/libvhost-user/libvhost-user-glib.c index 99edd2f3de..53f1ca4cdd 100644 --- a/contrib/libvhost-user/libvhost-user-glib.c +++ b/contrib/libvhost-user/libvhost-user-glib.c @@ -89,9 +89,8 @@ vug_source_new(VugDev *gdev, int fd, GIOCondition cond, src->gfd.events = cond; g_source_add_poll(gsrc, &src->gfd); - id = g_source_attach(gsrc, NULL); + id = g_source_attach(gsrc, g_main_context_get_thread_default()); g_assert(id); - g_source_unref(gsrc); return gsrc; } @@ -131,6 +130,16 @@ static void vug_watch(VuDev *dev, int condition, void *data) } } +void vug_source_destroy(GSource *src) +{ + if (!src) { + return; + } + + g_source_destroy(src); + g_source_unref(src); +} + bool vug_init(VugDev *dev, uint16_t max_queues, int socket, vu_panic_cb panic, const VuDevIface *iface) @@ -144,7 +153,7 @@ vug_init(VugDev *dev, uint16_t max_queues, int socket, } dev->fdmap = g_hash_table_new_full(NULL, NULL, NULL, - (GDestroyNotify) g_source_destroy); + (GDestroyNotify) vug_source_destroy); dev->src = vug_source_new(dev, socket, G_IO_IN, vug_watch, NULL); @@ -157,5 +166,5 @@ vug_deinit(VugDev *dev) g_assert(dev); g_hash_table_unref(dev->fdmap); - g_source_unref(dev->src); + vug_source_destroy(dev->src); } diff --git a/contrib/libvhost-user/libvhost-user-glib.h b/contrib/libvhost-user/libvhost-user-glib.h index 64d539d93a..1a79a4916e 100644 --- a/contrib/libvhost-user/libvhost-user-glib.h +++ b/contrib/libvhost-user/libvhost-user-glib.h @@ -31,5 +31,6 @@ void vug_deinit(VugDev *dev); GSource *vug_source_new(VugDev *dev, int fd, GIOCondition cond, vu_watch_cb vu_cb, gpointer data); +void vug_source_destroy(GSource *src); #endif /* LIBVHOST_USER_GLIB_H */ diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c index b89bf18501..3bca996c62 100644 --- a/contrib/libvhost-user/libvhost-user.c +++ b/contrib/libvhost-user/libvhost-user.c @@ -136,6 +136,7 @@ vu_request_to_string(unsigned int req) REQ(VHOST_USER_GET_INFLIGHT_FD), REQ(VHOST_USER_SET_INFLIGHT_FD), REQ(VHOST_USER_GPU_SET_SOCKET), + REQ(VHOST_USER_VRING_KICK), REQ(VHOST_USER_MAX), }; #undef REQ @@ -163,7 +164,10 @@ vu_panic(VuDev *dev, const char *msg, ...) dev->panic(dev, buf); free(buf); - /* FIXME: find a way to call virtio_error? */ + /* + * FIXME: + * find a way to call virtio_error, or perhaps close the connection? + */ } /* Translate guest physical address to our virtual address. */ @@ -948,6 +952,7 @@ static bool vu_check_queue_msg_file(VuDev *dev, VhostUserMsg *vmsg) { int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK; + bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK; if (index >= dev->max_queues) { vmsg_close_fds(vmsg); @@ -955,8 +960,12 @@ vu_check_queue_msg_file(VuDev *dev, VhostUserMsg *vmsg) return false; } - if (vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK || - vmsg->fd_num != 1) { + if (nofd) { + vmsg_close_fds(vmsg); + return true; + } + + if (vmsg->fd_num != 1) { vmsg_close_fds(vmsg); vu_panic(dev, "Invalid fds in request: %d", vmsg->request); return false; @@ -1053,6 +1062,7 @@ static bool vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg) { int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK; + bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK; DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); @@ -1066,8 +1076,8 @@ vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg) dev->vq[index].kick_fd = -1; } - dev->vq[index].kick_fd = vmsg->fds[0]; - DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index); + dev->vq[index].kick_fd = nofd ? -1 : vmsg->fds[0]; + DPRINT("Got kick_fd: %d for vq: %d\n", dev->vq[index].kick_fd, index); dev->vq[index].started = true; if (dev->iface->queue_set_started) { @@ -1147,6 +1157,7 @@ static bool vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg) { int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK; + bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK; DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); @@ -1159,14 +1170,14 @@ vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg) dev->vq[index].call_fd = -1; } - dev->vq[index].call_fd = vmsg->fds[0]; + dev->vq[index].call_fd = nofd ? -1 : vmsg->fds[0]; /* in case of I/O hang after reconnecting */ - if (eventfd_write(vmsg->fds[0], 1)) { + if (dev->vq[index].call_fd != -1 && eventfd_write(vmsg->fds[0], 1)) { return -1; } - DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index); + DPRINT("Got call_fd: %d for vq: %d\n", dev->vq[index].call_fd, index); return false; } @@ -1175,6 +1186,7 @@ static bool vu_set_vring_err_exec(VuDev *dev, VhostUserMsg *vmsg) { int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK; + bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK; DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); @@ -1187,7 +1199,7 @@ vu_set_vring_err_exec(VuDev *dev, VhostUserMsg *vmsg) dev->vq[index].err_fd = -1; } - dev->vq[index].err_fd = vmsg->fds[0]; + dev->vq[index].err_fd = nofd ? -1 : vmsg->fds[0]; return false; } @@ -1195,11 +1207,20 @@ vu_set_vring_err_exec(VuDev *dev, VhostUserMsg *vmsg) static bool vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg) { + /* + * Note that we support, but intentionally do not set, + * VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. This means that + * a device implementation can return it in its callback + * (get_protocol_features) if it wants to use this for + * simulation, but it is otherwise not desirable (if even + * implemented by the master.) + */ uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_MQ | 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ | 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | - 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD; + 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD | + 1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK; if (have_userfault()) { features |= 1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT; @@ -1226,6 +1247,25 @@ vu_set_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg) dev->protocol_features = vmsg->payload.u64; + if (vu_has_protocol_feature(dev, + VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) && + (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SLAVE_REQ) || + !vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_REPLY_ACK))) { + /* + * The use case for using messages for kick/call is simulation, to make + * the kick and call synchronous. To actually get that behaviour, both + * of the other features are required. + * Theoretically, one could use only kick messages, or do them without + * having F_REPLY_ACK, but too many (possibly pending) messages on the + * socket will eventually cause the master to hang, to avoid this in + * scenarios where not desired enforce that the settings are in a way + * that actually enables the simulation case. + */ + vu_panic(dev, + "F_IN_BAND_NOTIFICATIONS requires F_SLAVE_REQ && F_REPLY_ACK"); + return false; + } + if (dev->iface->set_protocol_features) { dev->iface->set_protocol_features(dev, features); } @@ -1486,6 +1526,34 @@ vu_set_inflight_fd(VuDev *dev, VhostUserMsg *vmsg) return false; } +static bool +vu_handle_vring_kick(VuDev *dev, VhostUserMsg *vmsg) +{ + unsigned int index = vmsg->payload.state.index; + + if (index >= dev->max_queues) { + vu_panic(dev, "Invalid queue index: %u", index); + return false; + } + + DPRINT("Got kick message: handler:%p idx:%d\n", + dev->vq[index].handler, index); + + if (!dev->vq[index].started) { + dev->vq[index].started = true; + + if (dev->iface->queue_set_started) { + dev->iface->queue_set_started(dev, index, true); + } + } + + if (dev->vq[index].handler) { + dev->vq[index].handler(dev, index); + } + + return false; +} + static bool vu_process_message(VuDev *dev, VhostUserMsg *vmsg) { @@ -1568,6 +1636,8 @@ vu_process_message(VuDev *dev, VhostUserMsg *vmsg) return vu_get_inflight_fd(dev, vmsg); case VHOST_USER_SET_INFLIGHT_FD: return vu_set_inflight_fd(dev, vmsg); + case VHOST_USER_VRING_KICK: + return vu_handle_vring_kick(dev, vmsg); default: vmsg_close_fds(vmsg); vu_panic(dev, "Unhandled request: %d", vmsg->request); @@ -1581,13 +1651,20 @@ vu_dispatch(VuDev *dev) { VhostUserMsg vmsg = { 0, }; int reply_requested; - bool success = false; + bool need_reply, success = false; if (!vu_message_read(dev, dev->sock, &vmsg)) { goto end; } + need_reply = vmsg.flags & VHOST_USER_NEED_REPLY_MASK; + reply_requested = vu_process_message(dev, &vmsg); + if (!reply_requested && need_reply) { + vmsg_set_reply_u64(&vmsg, 0); + reply_requested = 1; + } + if (!reply_requested) { success = true; goto end; @@ -2022,8 +2099,7 @@ vring_notify(VuDev *dev, VuVirtq *vq) return !v || vring_need_event(vring_get_used_event(vq), new, old); } -void -vu_queue_notify(VuDev *dev, VuVirtq *vq) +static void _vu_queue_notify(VuDev *dev, VuVirtq *vq, bool sync) { if (unlikely(dev->broken) || unlikely(!vq->vring.avail)) { @@ -2035,11 +2111,48 @@ vu_queue_notify(VuDev *dev, VuVirtq *vq) return; } + if (vq->call_fd < 0 && + vu_has_protocol_feature(dev, + VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) && + vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { + VhostUserMsg vmsg = { + .request = VHOST_USER_SLAVE_VRING_CALL, + .flags = VHOST_USER_VERSION, + .size = sizeof(vmsg.payload.state), + .payload.state = { + .index = vq - dev->vq, + }, + }; + bool ack = sync && + vu_has_protocol_feature(dev, + VHOST_USER_PROTOCOL_F_REPLY_ACK); + + if (ack) { + vmsg.flags |= VHOST_USER_NEED_REPLY_MASK; + } + + vu_message_write(dev, dev->slave_fd, &vmsg); + if (ack) { + vu_message_read(dev, dev->slave_fd, &vmsg); + } + return; + } + if (eventfd_write(vq->call_fd, 1) < 0) { vu_panic(dev, "Error writing eventfd: %s", strerror(errno)); } } +void vu_queue_notify(VuDev *dev, VuVirtq *vq) +{ + _vu_queue_notify(dev, vq, false); +} + +void vu_queue_notify_sync(VuDev *dev, VuVirtq *vq) +{ + _vu_queue_notify(dev, vq, true); +} + static inline void vring_used_flags_set_bit(VuVirtq *vq, int mask) { diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h index 5cb7708559..6fc8000e99 100644 --- a/contrib/libvhost-user/libvhost-user.h +++ b/contrib/libvhost-user/libvhost-user.h @@ -54,6 +54,7 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, + VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14, VHOST_USER_PROTOCOL_F_MAX }; @@ -95,6 +96,7 @@ typedef enum VhostUserRequest { VHOST_USER_GET_INFLIGHT_FD = 31, VHOST_USER_SET_INFLIGHT_FD = 32, VHOST_USER_GPU_SET_SOCKET = 33, + VHOST_USER_VRING_KICK = 35, VHOST_USER_MAX } VhostUserRequest; @@ -103,6 +105,8 @@ typedef enum VhostUserSlaveRequest { VHOST_USER_SLAVE_IOTLB_MSG = 1, VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, + VHOST_USER_SLAVE_VRING_CALL = 4, + VHOST_USER_SLAVE_VRING_ERR = 5, VHOST_USER_SLAVE_MAX } VhostUserSlaveRequest; @@ -528,6 +532,16 @@ bool vu_queue_empty(VuDev *dev, VuVirtq *vq); */ void vu_queue_notify(VuDev *dev, VuVirtq *vq); +/** + * vu_queue_notify_sync: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * + * Request to notify the queue via callfd (skipped if unnecessary) + * or sync message if possible. + */ +void vu_queue_notify_sync(VuDev *dev, VuVirtq *vq); + /** * vu_queue_pop: * @dev: a VuDev context diff --git a/contrib/vhost-user-input/main.c b/contrib/vhost-user-input/main.c index ef4b7769f2..6020c6f33a 100644 --- a/contrib/vhost-user-input/main.c +++ b/contrib/vhost-user-input/main.c @@ -187,7 +187,7 @@ vi_queue_set_started(VuDev *dev, int qidx, bool started) } if (!started && vi->evsrc) { - g_source_destroy(vi->evsrc); + vug_source_destroy(vi->evsrc); vi->evsrc = NULL; } } @@ -401,9 +401,7 @@ main(int argc, char *argv[]) vug_deinit(&vi.dev); - if (vi.evsrc) { - g_source_unref(vi.evsrc); - } + vug_source_destroy(vi.evsrc); g_array_free(vi.config, TRUE); g_free(vi.queue); return 0; diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst index 5f8b3a456b..401652397c 100644 --- a/docs/interop/vhost-user.rst +++ b/docs/interop/vhost-user.rst @@ -2,6 +2,7 @@ Vhost-user Protocol =================== :Copyright: 2014 Virtual Open Systems Sarl. +:Copyright: 2019 Intel Corporation :Licence: This work is licensed under the terms of the GNU GPL, version 2 or later. See the COPYING file in the top-level directory. @@ -279,6 +280,9 @@ If *master* is unable to send the full message or receives a wrong reply it will close the connection. An optional reconnection mechanism can be implemented. +If *slave* detects some error such as incompatible features, it may also +close the connection. This should only happen in exceptional circumstances. + Any protocol extensions are gated by protocol feature bits, which allows full backwards compatibility on both master and slave. As older slaves don't support negotiating protocol features, a feature @@ -315,7 +319,8 @@ it until ring is started, or after it has been stopped. Client must start ring upon receiving a kick (that is, detecting that file descriptor is readable) on the descriptor specified by -``VHOST_USER_SET_VRING_KICK``, and stop ring upon receiving +``VHOST_USER_SET_VRING_KICK`` or receiving the in-band message +``VHOST_USER_VRING_KICK`` if negotiated, and stop ring upon receiving ``VHOST_USER_GET_VRING_BASE``. While processing the rings (whether they are enabled or not), client @@ -767,25 +772,49 @@ When reconnecting: #. Resubmit inflight ``DescStatePacked`` entries in order of their counter value +In-band notifications +--------------------- + +In some limited situations (e.g. for simulation) it is desirable to +have the kick, call and error (if used) signals done via in-band +messages instead of asynchronous eventfd notifications. This can be +done by negotiating the ``VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS`` +protocol feature. + +Note that due to the fact that too many messages on the sockets can +cause the sending application(s) to block, it is not advised to use +this feature unless absolutely necessary. It is also considered an +error to negotiate this feature without also negotiating +``VHOST_USER_PROTOCOL_F_SLAVE_REQ`` and ``VHOST_USER_PROTOCOL_F_REPLY_ACK``, +the former is necessary for getting a message channel from the slave +to the master, while the latter needs to be used with the in-band +notification messages to block until they are processed, both to avoid +blocking later and for proper processing (at least in the simulation +use case.) As it has no other way of signalling this error, the slave +should close the connection as a response to a +``VHOST_USER_SET_PROTOCOL_FEATURES`` message that sets the in-band +notifications feature flag without the other two. + Protocol features ----------------- .. code:: c - #define VHOST_USER_PROTOCOL_F_MQ 0 - #define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1 - #define VHOST_USER_PROTOCOL_F_RARP 2 - #define VHOST_USER_PROTOCOL_F_REPLY_ACK 3 - #define VHOST_USER_PROTOCOL_F_MTU 4 - #define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5 - #define VHOST_USER_PROTOCOL_F_CROSS_ENDIAN 6 - #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7 - #define VHOST_USER_PROTOCOL_F_PAGEFAULT 8 - #define VHOST_USER_PROTOCOL_F_CONFIG 9 - #define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD 10 - #define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11 - #define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12 - #define VHOST_USER_PROTOCOL_F_RESET_DEVICE 13 + #define VHOST_USER_PROTOCOL_F_MQ 0 + #define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1 + #define VHOST_USER_PROTOCOL_F_RARP 2 + #define VHOST_USER_PROTOCOL_F_REPLY_ACK 3 + #define VHOST_USER_PROTOCOL_F_MTU 4 + #define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5 + #define VHOST_USER_PROTOCOL_F_CROSS_ENDIAN 6 + #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7 + #define VHOST_USER_PROTOCOL_F_PAGEFAULT 8 + #define VHOST_USER_PROTOCOL_F_CONFIG 9 + #define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD 10 + #define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11 + #define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12 + #define VHOST_USER_PROTOCOL_F_RESET_DEVICE 13 + #define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14 Master message types -------------------- @@ -947,7 +976,12 @@ Master message types Bits (0-7) of the payload contain the vring index. Bit 8 is the invalid FD flag. This flag is set when there is no file descriptor in the ancillary data. This signals that polling should be used - instead of waiting for a kick. + instead of waiting for the kick. Note that if the protocol feature + ``VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS`` has been negotiated + this message isn't necessary as the ring is also started on the + ``VHOST_USER_VRING_KICK`` message, it may however still be used to + set an event file descriptor (which will be preferred over the + message) or to enable polling. ``VHOST_USER_SET_VRING_CALL`` :id: 13 @@ -960,7 +994,12 @@ Master message types Bits (0-7) of the payload contain the vring index. Bit 8 is the invalid FD flag. This flag is set when there is no file descriptor in the ancillary data. This signals that polling will be used - instead of waiting for the call. + instead of waiting for the call. Note that if the protocol features + ``VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS`` and + ``VHOST_USER_PROTOCOL_F_SLAVE_REQ`` have been negotiated this message + isn't necessary as the ``VHOST_USER_SLAVE_VRING_CALL`` message can be + used, it may however still be used to set an event file descriptor + or to enable polling. ``VHOST_USER_SET_VRING_ERR`` :id: 14 @@ -972,7 +1011,12 @@ Master message types Bits (0-7) of the payload contain the vring index. Bit 8 is the invalid FD flag. This flag is set when there is no file descriptor - in the ancillary data. + in the ancillary data. Note that if the protocol features + ``VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS`` and + ``VHOST_USER_PROTOCOL_F_SLAVE_REQ`` have been negotiated this message + isn't necessary as the ``VHOST_USER_SLAVE_VRING_ERR`` message can be + used, it may however still be used to set an event file descriptor + (which will be preferred over the message). ``VHOST_USER_GET_QUEUE_NUM`` :id: 17 @@ -1205,6 +1249,20 @@ Master message types Only valid if the ``VHOST_USER_PROTOCOL_F_RESET_DEVICE`` protocol feature is set by the backend. +``VHOST_USER_VRING_KICK`` + :id: 35 + :equivalent ioctl: N/A + :slave payload: vring state description + :master payload: N/A + + When the ``VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS`` protocol + feature has been successfully negotiated, this message may be + submitted by the master to indicate that a buffer was added to + the vring instead of signalling it using the vring's kick file + descriptor or having the slave rely on polling. + + The state.num field is currently reserved and must be set to 0. + Slave message types ------------------- @@ -1261,6 +1319,34 @@ Slave message types ``VHOST_USER_PROTOCOL_F_HOST_NOTIFIER`` protocol feature has been successfully negotiated. +``VHOST_USER_SLAVE_VRING_CALL`` + :id: 4 + :equivalent ioctl: N/A + :slave payload: vring state description + :master payload: N/A + + When the ``VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS`` protocol + feature has been successfully negotiated, this message may be + submitted by the slave to indicate that a buffer was used from + the vring instead of signalling this using the vring's call file + descriptor or having the master relying on polling. + + The state.num field is currently reserved and must be set to 0. + +``VHOST_USER_SLAVE_VRING_ERR`` + :id: 5 + :equivalent ioctl: N/A + :slave payload: vring state description + :master payload: N/A + + When the ``VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS`` protocol + feature has been successfully negotiated, this message may be + submitted by the slave to indicate that an error occurred on the + specific vring, instead of signalling the error file descriptor + set by the master via ``VHOST_USER_SET_VRING_ERR``. + + The state.num field is currently reserved and must be set to 0. + .. _reply_ack: VHOST_USER_PROTOCOL_F_REPLY_ACK diff --git a/docs/specs/acpi_cpu_hotplug.txt b/docs/specs/acpi_cpu_hotplug.txt index a8ce5e7402..9bb22d1270 100644 --- a/docs/specs/acpi_cpu_hotplug.txt +++ b/docs/specs/acpi_cpu_hotplug.txt @@ -94,6 +94,8 @@ write access: register in QEMU 2: following writes to 'Command data' register set OST status register in QEMU + 3: following reads from 'Command data' and 'Command data 2' return + architecture specific CPU ID value for currently selected CPU. other values: reserved [0x6-0x7] reserved [0x8] Command data: (DWORD access) diff --git a/hw/arm/virt.c b/hw/arm/virt.c index e591a126e7..a8191a3e75 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -32,6 +32,7 @@ #include "qemu-common.h" #include "qemu/units.h" #include "qemu/option.h" +#include "monitor/qdev.h" #include "qapi/error.h" #include "hw/sysbus.h" #include "hw/boards.h" @@ -54,6 +55,7 @@ #include "qemu/error-report.h" #include "qemu/module.h" #include "hw/pci-host/gpex.h" +#include "hw/virtio/virtio-pci.h" #include "hw/arm/sysbus-fdt.h" #include "hw/platform-bus.h" #include "hw/qdev-properties.h" @@ -71,6 +73,7 @@ #include "hw/mem/pc-dimm.h" #include "hw/mem/nvdimm.h" #include "hw/acpi/generic_event_device.h" +#include "hw/virtio/virtio-iommu.h" #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ @@ -1180,6 +1183,30 @@ static void create_smmu(const VirtMachineState *vms, g_free(node); } +static void create_virtio_iommu_dt_bindings(VirtMachineState *vms, Error **errp) +{ + const char compat[] = "virtio,pci-iommu"; + uint16_t bdf = vms->virtio_iommu_bdf; + char *node; + + vms->iommu_phandle = qemu_fdt_alloc_phandle(vms->fdt); + + node = g_strdup_printf("%s/virtio_iommu@%d", vms->pciehb_nodename, bdf); + qemu_fdt_add_subnode(vms->fdt, node); + qemu_fdt_setprop(vms->fdt, node, "compatible", compat, sizeof(compat)); + qemu_fdt_setprop_sized_cells(vms->fdt, node, "reg", + 1, bdf << 8, 1, 0, 1, 0, + 1, 0, 1, 0); + + qemu_fdt_setprop_cell(vms->fdt, node, "#iommu-cells", 1); + qemu_fdt_setprop_cell(vms->fdt, node, "phandle", vms->iommu_phandle); + g_free(node); + + qemu_fdt_setprop_cells(vms->fdt, vms->pciehb_nodename, "iommu-map", + 0x0, vms->iommu_phandle, 0x0, bdf, + bdf + 1, vms->iommu_phandle, bdf + 1, 0xffff - bdf); +} + static void create_pcie(VirtMachineState *vms) { hwaddr base_mmio = vms->memmap[VIRT_PCIE_MMIO].base; @@ -1258,7 +1285,7 @@ static void create_pcie(VirtMachineState *vms) } } - nodename = g_strdup_printf("/pcie@%" PRIx64, base); + nodename = vms->pciehb_nodename = g_strdup_printf("/pcie@%" PRIx64, base); qemu_fdt_add_subnode(vms->fdt, nodename); qemu_fdt_setprop_string(vms->fdt, nodename, "compatible", "pci-host-ecam-generic"); @@ -1301,13 +1328,16 @@ static void create_pcie(VirtMachineState *vms) if (vms->iommu) { vms->iommu_phandle = qemu_fdt_alloc_phandle(vms->fdt); - create_smmu(vms, pci->bus); - - qemu_fdt_setprop_cells(vms->fdt, nodename, "iommu-map", - 0x0, vms->iommu_phandle, 0x0, 0x10000); + switch (vms->iommu) { + case VIRT_IOMMU_SMMUV3: + create_smmu(vms, pci->bus); + qemu_fdt_setprop_cells(vms->fdt, nodename, "iommu-map", + 0x0, vms->iommu_phandle, 0x0, 0x10000); + break; + default: + g_assert_not_reached(); + } } - - g_free(nodename); } static void create_platform_bus(VirtMachineState *vms) @@ -1974,6 +2004,13 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { virt_memory_plug(hotplug_dev, dev, errp); } + if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { + PCIDevice *pdev = PCI_DEVICE(dev); + + vms->iommu = VIRT_IOMMU_VIRTIO; + vms->virtio_iommu_bdf = pci_get_bdf(pdev); + create_virtio_iommu_dt_bindings(vms, errp); + } } static void virt_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev, @@ -1990,7 +2027,13 @@ static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine, (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM))) { return HOTPLUG_HANDLER(machine); } + if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { + VirtMachineState *vms = VIRT_MACHINE(machine); + if (!vms->bootinfo.firmware_loaded || !acpi_enabled) { + return HOTPLUG_HANDLER(machine); + } + } return NULL; } diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index d8c459c575..12925a47ec 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -306,7 +306,7 @@ static int vhost_user_blk_connect(DeviceState *dev) s->connected = true; s->dev.nvqs = s->num_queues; - s->dev.vqs = s->vqs; + s->dev.vqs = s->vhost_vqs; s->dev.vq_index = 0; s->dev.backend_features = 0; @@ -420,13 +420,14 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, sizeof(struct virtio_blk_config)); + s->virtqs = g_new(VirtQueue *, s->num_queues); for (i = 0; i < s->num_queues; i++) { - virtio_add_queue(vdev, s->queue_size, - vhost_user_blk_handle_output); + s->virtqs[i] = virtio_add_queue(vdev, s->queue_size, + vhost_user_blk_handle_output); } s->inflight = g_new0(struct vhost_inflight, 1); - s->vqs = g_new0(struct vhost_virtqueue, s->num_queues); + s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues); s->watch = 0; s->connected = false; @@ -458,8 +459,12 @@ reconnect: return; virtio_err: - g_free(s->vqs); + g_free(s->vhost_vqs); g_free(s->inflight); + for (i = 0; i < s->num_queues; i++) { + virtio_delete_queue(s->virtqs[i]); + } + g_free(s->virtqs); virtio_cleanup(vdev); vhost_user_cleanup(&s->vhost_user); } @@ -468,14 +473,20 @@ static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostUserBlk *s = VHOST_USER_BLK(dev); + int i; virtio_set_status(vdev, 0); qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, NULL, NULL, NULL, NULL, false); vhost_dev_cleanup(&s->dev); vhost_dev_free_inflight(s->inflight); - g_free(s->vqs); + g_free(s->vhost_vqs); g_free(s->inflight); + + for (i = 0; i < s->num_queues; i++) { + virtio_delete_queue(s->virtqs[i]); + } + g_free(s->virtqs); virtio_cleanup(vdev); vhost_user_cleanup(&s->vhost_user); } diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index f87def27a6..d29525b36f 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -9,6 +9,11 @@ config VIRTIO_RNG default y depends on VIRTIO +config VIRTIO_IOMMU + bool + default y + depends on VIRTIO + config VIRTIO_PCI bool default y if PCI_DEVICES diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs index de0f5fc39b..4e4d39a0a4 100644 --- a/hw/virtio/Makefile.objs +++ b/hw/virtio/Makefile.objs @@ -16,6 +16,7 @@ obj-$(call land,$(CONFIG_VIRTIO_CRYPTO),$(CONFIG_VIRTIO_PCI)) += virtio-crypto-p obj-$(CONFIG_VIRTIO_PMEM) += virtio-pmem.o common-obj-$(call land,$(CONFIG_VIRTIO_PMEM),$(CONFIG_VIRTIO_PCI)) += virtio-pmem-pci.o obj-$(call land,$(CONFIG_VHOST_USER_FS),$(CONFIG_VIRTIO_PCI)) += vhost-user-fs-pci.o +obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o obj-$(CONFIG_VHOST_VSOCK) += vhost-vsock.o ifeq ($(CONFIG_VIRTIO_PCI),y) @@ -28,6 +29,7 @@ obj-$(CONFIG_VIRTIO_INPUT_HOST) += virtio-input-host-pci.o obj-$(CONFIG_VIRTIO_INPUT) += virtio-input-pci.o obj-$(CONFIG_VIRTIO_RNG) += virtio-rng-pci.o obj-$(CONFIG_VIRTIO_BALLOON) += virtio-balloon-pci.o +obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu-pci.o obj-$(CONFIG_VIRTIO_9P) += virtio-9p-pci.o obj-$(CONFIG_VIRTIO_SCSI) += virtio-scsi-pci.o obj-$(CONFIG_VIRTIO_BLK) += virtio-blk-pci.o diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index e28ba48da6..e83500bee9 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -53,3 +53,23 @@ virtio_mmio_write_offset(uint64_t offset, uint64_t value) "virtio_mmio_write off virtio_mmio_guest_page(uint64_t size, int shift) "guest page size 0x%" PRIx64 " shift %d" virtio_mmio_queue_write(uint64_t value, int max_size) "mmio_queue write 0x%" PRIx64 " max %d" virtio_mmio_setting_irq(int level) "virtio_mmio setting IRQ %d" + +# hw/virtio/virtio-iommu.c +virtio_iommu_device_reset(void) "reset!" +virtio_iommu_get_features(uint64_t features) "device supports features=0x%"PRIx64 +virtio_iommu_device_status(uint8_t status) "driver status = %d" +virtio_iommu_get_config(uint64_t page_size_mask, uint64_t start, uint64_t end, uint32_t domain_range, uint32_t probe_size) "page_size_mask=0x%"PRIx64" start=0x%"PRIx64" end=0x%"PRIx64" domain_range=%d probe_size=0x%x" +virtio_iommu_set_config(uint64_t page_size_mask, uint64_t start, uint64_t end, uint32_t domain_range, uint32_t probe_size) "page_size_mask=0x%"PRIx64" start=0x%"PRIx64" end=0x%"PRIx64" domain_bits=%d probe_size=0x%x" +virtio_iommu_attach(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d" +virtio_iommu_detach(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d" +virtio_iommu_map(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end, uint64_t phys_start, uint32_t flags) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64 " phys_start=0x%"PRIx64" flags=%d" +virtio_iommu_unmap(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64 +virtio_iommu_unmap_done(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64 +virtio_iommu_translate(const char *name, uint32_t rid, uint64_t iova, int flag) "mr=%s rid=%d addr=0x%"PRIx64" flag=%d" +virtio_iommu_init_iommu_mr(char *iommu_mr) "init %s" +virtio_iommu_get_endpoint(uint32_t ep_id) "Alloc endpoint=%d" +virtio_iommu_put_endpoint(uint32_t ep_id) "Free endpoint=%d" +virtio_iommu_get_domain(uint32_t domain_id) "Alloc domain=%d" +virtio_iommu_put_domain(uint32_t domain_id) "Free domain=%d" +virtio_iommu_translate_out(uint64_t virt_addr, uint64_t phys_addr, uint32_t sid) "0x%"PRIx64" -> 0x%"PRIx64 " for sid=%d" +virtio_iommu_report_fault(uint8_t reason, uint32_t flags, uint32_t endpoint, uint64_t addr) "FAULT reason=%d flags=%d endpoint=%d address =0x%"PRIx64 diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c index 33b17848c2..6136768875 100644 --- a/hw/virtio/vhost-user-fs.c +++ b/hw/virtio/vhost-user-fs.c @@ -209,11 +209,12 @@ static void vuf_device_realize(DeviceState *dev, Error **errp) sizeof(struct virtio_fs_config)); /* Hiprio queue */ - virtio_add_queue(vdev, fs->conf.queue_size, vuf_handle_output); + fs->hiprio_vq = virtio_add_queue(vdev, fs->conf.queue_size, vuf_handle_output); /* Request queues */ + fs->req_vqs = g_new(VirtQueue *, fs->conf.num_request_queues); for (i = 0; i < fs->conf.num_request_queues; i++) { - virtio_add_queue(vdev, fs->conf.queue_size, vuf_handle_output); + fs->req_vqs[i] = virtio_add_queue(vdev, fs->conf.queue_size, vuf_handle_output); } /* 1 high prio queue, plus the number configured */ @@ -230,6 +231,11 @@ static void vuf_device_realize(DeviceState *dev, Error **errp) err_virtio: vhost_user_cleanup(&fs->vhost_user); + virtio_delete_queue(fs->hiprio_vq); + for (i = 0; i < fs->conf.num_request_queues; i++) { + virtio_delete_queue(fs->req_vqs[i]); + } + g_free(fs->req_vqs); virtio_cleanup(vdev); g_free(fs->vhost_dev.vqs); return; @@ -239,6 +245,7 @@ static void vuf_device_unrealize(DeviceState *dev, Error **errp) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostUserFS *fs = VHOST_USER_FS(dev); + int i; /* This will stop vhost backend if appropriate. */ vuf_set_status(vdev, 0); @@ -247,6 +254,11 @@ static void vuf_device_unrealize(DeviceState *dev, Error **errp) vhost_user_cleanup(&fs->vhost_user); + virtio_delete_queue(fs->hiprio_vq); + for (i = 0; i < fs->conf.num_request_queues; i++) { + virtio_delete_queue(fs->req_vqs[i]); + } + g_free(fs->req_vqs); virtio_cleanup(vdev); g_free(fs->vhost_dev.vqs); fs->vhost_dev.vqs = NULL; diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 2e81f5514f..08e7e63790 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -443,6 +443,7 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, &offset); fd = memory_region_get_fd(mr); if (fd > 0) { + assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); trace_vhost_user_set_mem_table_withfd(fd_num, mr->name, reg->memory_size, reg->guest_phys_addr, @@ -455,7 +456,6 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr; msg.payload.memory.regions[fd_num].mmap_offset = offset; - assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); fds[fd_num++] = fd; } else { u->region_rb_offset[i] = 0; @@ -1458,9 +1458,11 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); } - err = vhost_setup_slave_channel(dev); - if (err < 0) { - return err; + if (dev->vq_index == 0) { + err = vhost_setup_slave_channel(dev); + if (err < 0) { + return err; + } } u->postcopy_notifier.notify = vhost_user_postcopy_notifier; diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c index 7351ab0a19..4c65114de5 100644 --- a/hw/virtio/virtio-crypto.c +++ b/hw/virtio/virtio-crypto.c @@ -831,12 +831,13 @@ static void virtio_crypto_device_unrealize(DeviceState *dev, Error **errp) max_queues = vcrypto->multiqueue ? vcrypto->max_queues : 1; for (i = 0; i < max_queues; i++) { - virtio_del_queue(vdev, i); + virtio_delete_queue(vcrypto->vqs[i].dataq); q = &vcrypto->vqs[i]; qemu_bh_delete(q->dataq_bh); } g_free(vcrypto->vqs); + virtio_delete_queue(vcrypto->ctrl_vq); virtio_cleanup(vdev); cryptodev_backend_set_used(vcrypto->cryptodev, false); diff --git a/hw/virtio/virtio-iommu-pci.c b/hw/virtio/virtio-iommu-pci.c new file mode 100644 index 0000000000..3dfbf55b47 --- /dev/null +++ b/hw/virtio/virtio-iommu-pci.c @@ -0,0 +1,104 @@ +/* + * Virtio IOMMU PCI Bindings + * + * Copyright (c) 2019 Red Hat, Inc. + * Written by Eric Auger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 or + * (at your option) any later version. + */ + +#include "qemu/osdep.h" + +#include "virtio-pci.h" +#include "hw/virtio/virtio-iommu.h" +#include "hw/qdev-properties.h" +#include "qapi/error.h" +#include "hw/boards.h" + +typedef struct VirtIOIOMMUPCI VirtIOIOMMUPCI; + +/* + * virtio-iommu-pci: This extends VirtioPCIProxy. + * + */ +#define VIRTIO_IOMMU_PCI(obj) \ + OBJECT_CHECK(VirtIOIOMMUPCI, (obj), TYPE_VIRTIO_IOMMU_PCI) + +struct VirtIOIOMMUPCI { + VirtIOPCIProxy parent_obj; + VirtIOIOMMU vdev; +}; + +static Property virtio_iommu_pci_properties[] = { + DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0), + DEFINE_PROP_END_OF_LIST(), +}; + +static void virtio_iommu_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ + VirtIOIOMMUPCI *dev = VIRTIO_IOMMU_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&dev->vdev); + + if (!qdev_get_machine_hotplug_handler(DEVICE(vpci_dev))) { + MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); + + error_setg(errp, + "%s machine fails to create iommu-map device tree bindings", + mc->name); + error_append_hint(errp, + "Check you machine implements a hotplug handler " + "for the virtio-iommu-pci device\n"); + error_append_hint(errp, "Check the guest is booted without FW or with " + "-no-acpi\n"); + return; + } + qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); + object_property_set_link(OBJECT(dev), + OBJECT(pci_get_bus(&vpci_dev->pci_dev)), + "primary-bus", errp); + object_property_set_bool(OBJECT(vdev), true, "realized", errp); +} + +static void virtio_iommu_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); + k->realize = virtio_iommu_pci_realize; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + device_class_set_props(dc, virtio_iommu_pci_properties); + pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; + pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_IOMMU; + pcidev_k->revision = VIRTIO_PCI_ABI_VERSION; + pcidev_k->class_id = PCI_CLASS_OTHERS; + dc->hotpluggable = false; +} + +static void virtio_iommu_pci_instance_init(Object *obj) +{ + VirtIOIOMMUPCI *dev = VIRTIO_IOMMU_PCI(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VIRTIO_IOMMU); +} + +static const VirtioPCIDeviceTypeInfo virtio_iommu_pci_info = { + .base_name = TYPE_VIRTIO_IOMMU_PCI, + .generic_name = "virtio-iommu-pci", + .transitional_name = "virtio-iommu-pci-transitional", + .non_transitional_name = "virtio-iommu-pci-non-transitional", + .instance_size = sizeof(VirtIOIOMMUPCI), + .instance_init = virtio_iommu_pci_instance_init, + .class_init = virtio_iommu_pci_class_init, +}; + +static void virtio_iommu_pci_register(void) +{ + virtio_pci_types_register(&virtio_iommu_pci_info); +} + +type_init(virtio_iommu_pci_register) + + diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c new file mode 100644 index 0000000000..4cee8083bc --- /dev/null +++ b/hw/virtio/virtio-iommu.c @@ -0,0 +1,890 @@ +/* + * virtio-iommu device + * + * Copyright (c) 2020 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + * + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/iov.h" +#include "qemu-common.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/virtio.h" +#include "sysemu/kvm.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "trace.h" + +#include "standard-headers/linux/virtio_ids.h" + +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/virtio-access.h" +#include "hw/virtio/virtio-iommu.h" +#include "hw/pci/pci_bus.h" +#include "hw/pci/pci.h" + +/* Max size */ +#define VIOMMU_DEFAULT_QUEUE_SIZE 256 + +typedef struct VirtIOIOMMUDomain { + uint32_t id; + GTree *mappings; + QLIST_HEAD(, VirtIOIOMMUEndpoint) endpoint_list; +} VirtIOIOMMUDomain; + +typedef struct VirtIOIOMMUEndpoint { + uint32_t id; + VirtIOIOMMUDomain *domain; + QLIST_ENTRY(VirtIOIOMMUEndpoint) next; +} VirtIOIOMMUEndpoint; + +typedef struct VirtIOIOMMUInterval { + uint64_t low; + uint64_t high; +} VirtIOIOMMUInterval; + +typedef struct VirtIOIOMMUMapping { + uint64_t phys_addr; + uint32_t flags; +} VirtIOIOMMUMapping; + +static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev) +{ + return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn); +} + +/** + * The bus number is used for lookup when SID based operations occur. + * In that case we lazily populate the IOMMUPciBus array from the bus hash + * table. At the time the IOMMUPciBus is created (iommu_find_add_as), the bus + * numbers may not be always initialized yet. + */ +static IOMMUPciBus *iommu_find_iommu_pcibus(VirtIOIOMMU *s, uint8_t bus_num) +{ + IOMMUPciBus *iommu_pci_bus = s->iommu_pcibus_by_bus_num[bus_num]; + + if (!iommu_pci_bus) { + GHashTableIter iter; + + g_hash_table_iter_init(&iter, s->as_by_busptr); + while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) { + if (pci_bus_num(iommu_pci_bus->bus) == bus_num) { + s->iommu_pcibus_by_bus_num[bus_num] = iommu_pci_bus; + return iommu_pci_bus; + } + } + return NULL; + } + return iommu_pci_bus; +} + +static IOMMUMemoryRegion *virtio_iommu_mr(VirtIOIOMMU *s, uint32_t sid) +{ + uint8_t bus_n, devfn; + IOMMUPciBus *iommu_pci_bus; + IOMMUDevice *dev; + + bus_n = PCI_BUS_NUM(sid); + iommu_pci_bus = iommu_find_iommu_pcibus(s, bus_n); + if (iommu_pci_bus) { + devfn = sid & PCI_DEVFN_MAX; + dev = iommu_pci_bus->pbdev[devfn]; + if (dev) { + return &dev->iommu_mr; + } + } + return NULL; +} + +static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer user_data) +{ + VirtIOIOMMUInterval *inta = (VirtIOIOMMUInterval *)a; + VirtIOIOMMUInterval *intb = (VirtIOIOMMUInterval *)b; + + if (inta->high < intb->low) { + return -1; + } else if (intb->high < inta->low) { + return 1; + } else { + return 0; + } +} + +static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep) +{ + if (!ep->domain) { + return; + } + QLIST_REMOVE(ep, next); + ep->domain = NULL; +} + +static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s, + uint32_t ep_id) +{ + VirtIOIOMMUEndpoint *ep; + + ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id)); + if (ep) { + return ep; + } + if (!virtio_iommu_mr(s, ep_id)) { + return NULL; + } + ep = g_malloc0(sizeof(*ep)); + ep->id = ep_id; + trace_virtio_iommu_get_endpoint(ep_id); + g_tree_insert(s->endpoints, GUINT_TO_POINTER(ep_id), ep); + return ep; +} + +static void virtio_iommu_put_endpoint(gpointer data) +{ + VirtIOIOMMUEndpoint *ep = (VirtIOIOMMUEndpoint *)data; + + if (ep->domain) { + virtio_iommu_detach_endpoint_from_domain(ep); + } + + trace_virtio_iommu_put_endpoint(ep->id); + g_free(ep); +} + +static VirtIOIOMMUDomain *virtio_iommu_get_domain(VirtIOIOMMU *s, + uint32_t domain_id) +{ + VirtIOIOMMUDomain *domain; + + domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id)); + if (domain) { + return domain; + } + domain = g_malloc0(sizeof(*domain)); + domain->id = domain_id; + domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp, + NULL, (GDestroyNotify)g_free, + (GDestroyNotify)g_free); + g_tree_insert(s->domains, GUINT_TO_POINTER(domain_id), domain); + QLIST_INIT(&domain->endpoint_list); + trace_virtio_iommu_get_domain(domain_id); + return domain; +} + +static void virtio_iommu_put_domain(gpointer data) +{ + VirtIOIOMMUDomain *domain = (VirtIOIOMMUDomain *)data; + VirtIOIOMMUEndpoint *iter, *tmp; + + QLIST_FOREACH_SAFE(iter, &domain->endpoint_list, next, tmp) { + virtio_iommu_detach_endpoint_from_domain(iter); + } + g_tree_destroy(domain->mappings); + trace_virtio_iommu_put_domain(domain->id); + g_free(domain); +} + +static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, + int devfn) +{ + VirtIOIOMMU *s = opaque; + IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus); + static uint32_t mr_index; + IOMMUDevice *sdev; + + if (!sbus) { + sbus = g_malloc0(sizeof(IOMMUPciBus) + + sizeof(IOMMUDevice *) * PCI_DEVFN_MAX); + sbus->bus = bus; + g_hash_table_insert(s->as_by_busptr, bus, sbus); + } + + sdev = sbus->pbdev[devfn]; + if (!sdev) { + char *name = g_strdup_printf("%s-%d-%d", + TYPE_VIRTIO_IOMMU_MEMORY_REGION, + mr_index++, devfn); + sdev = sbus->pbdev[devfn] = g_malloc0(sizeof(IOMMUDevice)); + + sdev->viommu = s; + sdev->bus = bus; + sdev->devfn = devfn; + + trace_virtio_iommu_init_iommu_mr(name); + + memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr), + TYPE_VIRTIO_IOMMU_MEMORY_REGION, + OBJECT(s), name, + UINT64_MAX); + address_space_init(&sdev->as, + MEMORY_REGION(&sdev->iommu_mr), TYPE_VIRTIO_IOMMU); + g_free(name); + } + return &sdev->as; +} + +static int virtio_iommu_attach(VirtIOIOMMU *s, + struct virtio_iommu_req_attach *req) +{ + uint32_t domain_id = le32_to_cpu(req->domain); + uint32_t ep_id = le32_to_cpu(req->endpoint); + VirtIOIOMMUDomain *domain; + VirtIOIOMMUEndpoint *ep; + + trace_virtio_iommu_attach(domain_id, ep_id); + + ep = virtio_iommu_get_endpoint(s, ep_id); + if (!ep) { + return VIRTIO_IOMMU_S_NOENT; + } + + if (ep->domain) { + VirtIOIOMMUDomain *previous_domain = ep->domain; + /* + * the device is already attached to a domain, + * detach it first + */ + virtio_iommu_detach_endpoint_from_domain(ep); + if (QLIST_EMPTY(&previous_domain->endpoint_list)) { + g_tree_remove(s->domains, GUINT_TO_POINTER(previous_domain->id)); + } + } + + domain = virtio_iommu_get_domain(s, domain_id); + QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next); + + ep->domain = domain; + + return VIRTIO_IOMMU_S_OK; +} + +static int virtio_iommu_detach(VirtIOIOMMU *s, + struct virtio_iommu_req_detach *req) +{ + uint32_t domain_id = le32_to_cpu(req->domain); + uint32_t ep_id = le32_to_cpu(req->endpoint); + VirtIOIOMMUDomain *domain; + VirtIOIOMMUEndpoint *ep; + + trace_virtio_iommu_detach(domain_id, ep_id); + + ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id)); + if (!ep) { + return VIRTIO_IOMMU_S_NOENT; + } + + domain = ep->domain; + + if (!domain || domain->id != domain_id) { + return VIRTIO_IOMMU_S_INVAL; + } + + virtio_iommu_detach_endpoint_from_domain(ep); + + if (QLIST_EMPTY(&domain->endpoint_list)) { + g_tree_remove(s->domains, GUINT_TO_POINTER(domain->id)); + } + return VIRTIO_IOMMU_S_OK; +} + +static int virtio_iommu_map(VirtIOIOMMU *s, + struct virtio_iommu_req_map *req) +{ + uint32_t domain_id = le32_to_cpu(req->domain); + uint64_t phys_start = le64_to_cpu(req->phys_start); + uint64_t virt_start = le64_to_cpu(req->virt_start); + uint64_t virt_end = le64_to_cpu(req->virt_end); + uint32_t flags = le32_to_cpu(req->flags); + VirtIOIOMMUDomain *domain; + VirtIOIOMMUInterval *interval; + VirtIOIOMMUMapping *mapping; + + if (flags & ~VIRTIO_IOMMU_MAP_F_MASK) { + return VIRTIO_IOMMU_S_INVAL; + } + + domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id)); + if (!domain) { + return VIRTIO_IOMMU_S_NOENT; + } + + interval = g_malloc0(sizeof(*interval)); + + interval->low = virt_start; + interval->high = virt_end; + + mapping = g_tree_lookup(domain->mappings, (gpointer)interval); + if (mapping) { + g_free(interval); + return VIRTIO_IOMMU_S_INVAL; + } + + trace_virtio_iommu_map(domain_id, virt_start, virt_end, phys_start, flags); + + mapping = g_malloc0(sizeof(*mapping)); + mapping->phys_addr = phys_start; + mapping->flags = flags; + + g_tree_insert(domain->mappings, interval, mapping); + + return VIRTIO_IOMMU_S_OK; +} + +static int virtio_iommu_unmap(VirtIOIOMMU *s, + struct virtio_iommu_req_unmap *req) +{ + uint32_t domain_id = le32_to_cpu(req->domain); + uint64_t virt_start = le64_to_cpu(req->virt_start); + uint64_t virt_end = le64_to_cpu(req->virt_end); + VirtIOIOMMUMapping *iter_val; + VirtIOIOMMUInterval interval, *iter_key; + VirtIOIOMMUDomain *domain; + int ret = VIRTIO_IOMMU_S_OK; + + trace_virtio_iommu_unmap(domain_id, virt_start, virt_end); + + domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id)); + if (!domain) { + return VIRTIO_IOMMU_S_NOENT; + } + interval.low = virt_start; + interval.high = virt_end; + + while (g_tree_lookup_extended(domain->mappings, &interval, + (void **)&iter_key, (void**)&iter_val)) { + uint64_t current_low = iter_key->low; + uint64_t current_high = iter_key->high; + + if (interval.low <= current_low && interval.high >= current_high) { + g_tree_remove(domain->mappings, iter_key); + trace_virtio_iommu_unmap_done(domain_id, current_low, current_high); + } else { + ret = VIRTIO_IOMMU_S_RANGE; + break; + } + } + return ret; +} + +static int virtio_iommu_iov_to_req(struct iovec *iov, + unsigned int iov_cnt, + void *req, size_t req_sz) +{ + size_t sz, payload_sz = req_sz - sizeof(struct virtio_iommu_req_tail); + + sz = iov_to_buf(iov, iov_cnt, 0, req, payload_sz); + if (unlikely(sz != payload_sz)) { + return VIRTIO_IOMMU_S_INVAL; + } + return 0; +} + +#define virtio_iommu_handle_req(__req) \ +static int virtio_iommu_handle_ ## __req(VirtIOIOMMU *s, \ + struct iovec *iov, \ + unsigned int iov_cnt) \ +{ \ + struct virtio_iommu_req_ ## __req req; \ + int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, sizeof(req)); \ + \ + return ret ? ret : virtio_iommu_ ## __req(s, &req); \ +} + +virtio_iommu_handle_req(attach) +virtio_iommu_handle_req(detach) +virtio_iommu_handle_req(map) +virtio_iommu_handle_req(unmap) + +static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) +{ + VirtIOIOMMU *s = VIRTIO_IOMMU(vdev); + struct virtio_iommu_req_head head; + struct virtio_iommu_req_tail tail = {}; + VirtQueueElement *elem; + unsigned int iov_cnt; + struct iovec *iov; + size_t sz; + + for (;;) { + elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); + if (!elem) { + return; + } + + if (iov_size(elem->in_sg, elem->in_num) < sizeof(tail) || + iov_size(elem->out_sg, elem->out_num) < sizeof(head)) { + virtio_error(vdev, "virtio-iommu bad head/tail size"); + virtqueue_detach_element(vq, elem, 0); + g_free(elem); + break; + } + + iov_cnt = elem->out_num; + iov = elem->out_sg; + sz = iov_to_buf(iov, iov_cnt, 0, &head, sizeof(head)); + if (unlikely(sz != sizeof(head))) { + tail.status = VIRTIO_IOMMU_S_DEVERR; + goto out; + } + qemu_mutex_lock(&s->mutex); + switch (head.type) { + case VIRTIO_IOMMU_T_ATTACH: + tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt); + break; + case VIRTIO_IOMMU_T_DETACH: + tail.status = virtio_iommu_handle_detach(s, iov, iov_cnt); + break; + case VIRTIO_IOMMU_T_MAP: + tail.status = virtio_iommu_handle_map(s, iov, iov_cnt); + break; + case VIRTIO_IOMMU_T_UNMAP: + tail.status = virtio_iommu_handle_unmap(s, iov, iov_cnt); + break; + default: + tail.status = VIRTIO_IOMMU_S_UNSUPP; + } + qemu_mutex_unlock(&s->mutex); + +out: + sz = iov_from_buf(elem->in_sg, elem->in_num, 0, + &tail, sizeof(tail)); + assert(sz == sizeof(tail)); + + virtqueue_push(vq, elem, sizeof(tail)); + virtio_notify(vdev, vq); + g_free(elem); + } +} + +static void virtio_iommu_report_fault(VirtIOIOMMU *viommu, uint8_t reason, + int flags, uint32_t endpoint, + uint64_t address) +{ + VirtIODevice *vdev = &viommu->parent_obj; + VirtQueue *vq = viommu->event_vq; + struct virtio_iommu_fault fault; + VirtQueueElement *elem; + size_t sz; + + memset(&fault, 0, sizeof(fault)); + fault.reason = reason; + fault.flags = cpu_to_le32(flags); + fault.endpoint = cpu_to_le32(endpoint); + fault.address = cpu_to_le64(address); + + elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); + + if (!elem) { + error_report_once( + "no buffer available in event queue to report event"); + return; + } + + if (iov_size(elem->in_sg, elem->in_num) < sizeof(fault)) { + virtio_error(vdev, "error buffer of wrong size"); + virtqueue_detach_element(vq, elem, 0); + g_free(elem); + return; + } + + sz = iov_from_buf(elem->in_sg, elem->in_num, 0, + &fault, sizeof(fault)); + assert(sz == sizeof(fault)); + + trace_virtio_iommu_report_fault(reason, flags, endpoint, address); + virtqueue_push(vq, elem, sz); + virtio_notify(vdev, vq); + g_free(elem); + +} + +static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, + IOMMUAccessFlags flag, + int iommu_idx) +{ + IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); + VirtIOIOMMUInterval interval, *mapping_key; + VirtIOIOMMUMapping *mapping_value; + VirtIOIOMMU *s = sdev->viommu; + bool read_fault, write_fault; + VirtIOIOMMUEndpoint *ep; + uint32_t sid, flags; + bool bypass_allowed; + bool found; + + interval.low = addr; + interval.high = addr + 1; + + IOMMUTLBEntry entry = { + .target_as = &address_space_memory, + .iova = addr, + .translated_addr = addr, + .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1, + .perm = IOMMU_NONE, + }; + + bypass_allowed = virtio_vdev_has_feature(&s->parent_obj, + VIRTIO_IOMMU_F_BYPASS); + + sid = virtio_iommu_get_bdf(sdev); + + trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag); + qemu_mutex_lock(&s->mutex); + + ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); + if (!ep) { + if (!bypass_allowed) { + error_report_once("%s sid=%d is not known!!", __func__, sid); + virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_UNKNOWN, + VIRTIO_IOMMU_FAULT_F_ADDRESS, + sid, addr); + } else { + entry.perm = flag; + } + goto unlock; + } + + if (!ep->domain) { + if (!bypass_allowed) { + error_report_once("%s %02x:%02x.%01x not attached to any domain", + __func__, PCI_BUS_NUM(sid), + PCI_SLOT(sid), PCI_FUNC(sid)); + virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_DOMAIN, + VIRTIO_IOMMU_FAULT_F_ADDRESS, + sid, addr); + } else { + entry.perm = flag; + } + goto unlock; + } + + found = g_tree_lookup_extended(ep->domain->mappings, (gpointer)(&interval), + (void **)&mapping_key, + (void **)&mapping_value); + if (!found) { + error_report_once("%s no mapping for 0x%"PRIx64" for sid=%d", + __func__, addr, sid); + virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING, + VIRTIO_IOMMU_FAULT_F_ADDRESS, + sid, addr); + goto unlock; + } + + read_fault = (flag & IOMMU_RO) && + !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_READ); + write_fault = (flag & IOMMU_WO) && + !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_WRITE); + + flags = read_fault ? VIRTIO_IOMMU_FAULT_F_READ : 0; + flags |= write_fault ? VIRTIO_IOMMU_FAULT_F_WRITE : 0; + if (flags) { + error_report_once("%s permission error on 0x%"PRIx64"(%d): allowed=%d", + __func__, addr, flag, mapping_value->flags); + flags |= VIRTIO_IOMMU_FAULT_F_ADDRESS; + virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING, + flags | VIRTIO_IOMMU_FAULT_F_ADDRESS, + sid, addr); + goto unlock; + } + entry.translated_addr = addr - mapping_key->low + mapping_value->phys_addr; + entry.perm = flag; + trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid); + +unlock: + qemu_mutex_unlock(&s->mutex); + return entry; +} + +static void virtio_iommu_get_config(VirtIODevice *vdev, uint8_t *config_data) +{ + VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev); + struct virtio_iommu_config *config = &dev->config; + + trace_virtio_iommu_get_config(config->page_size_mask, + config->input_range.start, + config->input_range.end, + config->domain_range.end, + config->probe_size); + memcpy(config_data, &dev->config, sizeof(struct virtio_iommu_config)); +} + +static void virtio_iommu_set_config(VirtIODevice *vdev, + const uint8_t *config_data) +{ + struct virtio_iommu_config config; + + memcpy(&config, config_data, sizeof(struct virtio_iommu_config)); + trace_virtio_iommu_set_config(config.page_size_mask, + config.input_range.start, + config.input_range.end, + config.domain_range.end, + config.probe_size); +} + +static uint64_t virtio_iommu_get_features(VirtIODevice *vdev, uint64_t f, + Error **errp) +{ + VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev); + + f |= dev->features; + trace_virtio_iommu_get_features(f); + return f; +} + +static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data) +{ + guint ua = GPOINTER_TO_UINT(a); + guint ub = GPOINTER_TO_UINT(b); + return (ua > ub) - (ua < ub); +} + +static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtIOIOMMU *s = VIRTIO_IOMMU(dev); + + virtio_init(vdev, "virtio-iommu", VIRTIO_ID_IOMMU, + sizeof(struct virtio_iommu_config)); + + memset(s->iommu_pcibus_by_bus_num, 0, sizeof(s->iommu_pcibus_by_bus_num)); + + s->req_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, + virtio_iommu_handle_command); + s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL); + + s->config.page_size_mask = TARGET_PAGE_MASK; + s->config.input_range.end = -1UL; + s->config.domain_range.end = 32; + + virtio_add_feature(&s->features, VIRTIO_RING_F_EVENT_IDX); + virtio_add_feature(&s->features, VIRTIO_RING_F_INDIRECT_DESC); + virtio_add_feature(&s->features, VIRTIO_F_VERSION_1); + virtio_add_feature(&s->features, VIRTIO_IOMMU_F_INPUT_RANGE); + virtio_add_feature(&s->features, VIRTIO_IOMMU_F_DOMAIN_RANGE); + virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MAP_UNMAP); + virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS); + virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MMIO); + + qemu_mutex_init(&s->mutex); + + s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free); + + if (s->primary_bus) { + pci_setup_iommu(s->primary_bus, virtio_iommu_find_add_as, s); + } else { + error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!"); + } +} + +static void virtio_iommu_device_unrealize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtIOIOMMU *s = VIRTIO_IOMMU(dev); + + g_tree_destroy(s->domains); + g_tree_destroy(s->endpoints); + + virtio_cleanup(vdev); +} + +static void virtio_iommu_device_reset(VirtIODevice *vdev) +{ + VirtIOIOMMU *s = VIRTIO_IOMMU(vdev); + + trace_virtio_iommu_device_reset(); + + if (s->domains) { + g_tree_destroy(s->domains); + } + if (s->endpoints) { + g_tree_destroy(s->endpoints); + } + s->domains = g_tree_new_full((GCompareDataFunc)int_cmp, + NULL, NULL, virtio_iommu_put_domain); + s->endpoints = g_tree_new_full((GCompareDataFunc)int_cmp, + NULL, NULL, virtio_iommu_put_endpoint); +} + +static void virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status) +{ + trace_virtio_iommu_device_status(status); +} + +static void virtio_iommu_instance_init(Object *obj) +{ +} + +#define VMSTATE_INTERVAL \ +{ \ + .name = "interval", \ + .version_id = 1, \ + .minimum_version_id = 1, \ + .fields = (VMStateField[]) { \ + VMSTATE_UINT64(low, VirtIOIOMMUInterval), \ + VMSTATE_UINT64(high, VirtIOIOMMUInterval), \ + VMSTATE_END_OF_LIST() \ + } \ +} + +#define VMSTATE_MAPPING \ +{ \ + .name = "mapping", \ + .version_id = 1, \ + .minimum_version_id = 1, \ + .fields = (VMStateField[]) { \ + VMSTATE_UINT64(phys_addr, VirtIOIOMMUMapping),\ + VMSTATE_UINT32(flags, VirtIOIOMMUMapping), \ + VMSTATE_END_OF_LIST() \ + }, \ +} + +static const VMStateDescription vmstate_interval_mapping[2] = { + VMSTATE_MAPPING, /* value */ + VMSTATE_INTERVAL /* key */ +}; + +static int domain_preload(void *opaque) +{ + VirtIOIOMMUDomain *domain = opaque; + + domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp, + NULL, g_free, g_free); + return 0; +} + +static const VMStateDescription vmstate_endpoint = { + .name = "endpoint", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(id, VirtIOIOMMUEndpoint), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_domain = { + .name = "domain", + .version_id = 1, + .minimum_version_id = 1, + .pre_load = domain_preload, + .fields = (VMStateField[]) { + VMSTATE_UINT32(id, VirtIOIOMMUDomain), + VMSTATE_GTREE_V(mappings, VirtIOIOMMUDomain, 1, + vmstate_interval_mapping, + VirtIOIOMMUInterval, VirtIOIOMMUMapping), + VMSTATE_QLIST_V(endpoint_list, VirtIOIOMMUDomain, 1, + vmstate_endpoint, VirtIOIOMMUEndpoint, next), + VMSTATE_END_OF_LIST() + } +}; + +static gboolean reconstruct_endpoints(gpointer key, gpointer value, + gpointer data) +{ + VirtIOIOMMU *s = (VirtIOIOMMU *)data; + VirtIOIOMMUDomain *d = (VirtIOIOMMUDomain *)value; + VirtIOIOMMUEndpoint *iter; + + QLIST_FOREACH(iter, &d->endpoint_list, next) { + iter->domain = d; + g_tree_insert(s->endpoints, GUINT_TO_POINTER(iter->id), iter); + } + return false; /* continue the domain traversal */ +} + +static int iommu_post_load(void *opaque, int version_id) +{ + VirtIOIOMMU *s = opaque; + + g_tree_foreach(s->domains, reconstruct_endpoints, s); + return 0; +} + +static const VMStateDescription vmstate_virtio_iommu_device = { + .name = "virtio-iommu-device", + .minimum_version_id = 1, + .version_id = 1, + .post_load = iommu_post_load, + .fields = (VMStateField[]) { + VMSTATE_GTREE_DIRECT_KEY_V(domains, VirtIOIOMMU, 1, + &vmstate_domain, VirtIOIOMMUDomain), + VMSTATE_END_OF_LIST() + }, +}; + +static const VMStateDescription vmstate_virtio_iommu = { + .name = "virtio-iommu", + .minimum_version_id = 1, + .priority = MIG_PRI_IOMMU, + .version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_VIRTIO_DEVICE, + VMSTATE_END_OF_LIST() + }, +}; + +static Property virtio_iommu_properties[] = { + DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus, "PCI", PCIBus *), + DEFINE_PROP_END_OF_LIST(), +}; + +static void virtio_iommu_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + + device_class_set_props(dc, virtio_iommu_properties); + dc->vmsd = &vmstate_virtio_iommu; + + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + vdc->realize = virtio_iommu_device_realize; + vdc->unrealize = virtio_iommu_device_unrealize; + vdc->reset = virtio_iommu_device_reset; + vdc->get_config = virtio_iommu_get_config; + vdc->set_config = virtio_iommu_set_config; + vdc->get_features = virtio_iommu_get_features; + vdc->set_status = virtio_iommu_set_status; + vdc->vmsd = &vmstate_virtio_iommu_device; +} + +static void virtio_iommu_memory_region_class_init(ObjectClass *klass, + void *data) +{ + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); + + imrc->translate = virtio_iommu_translate; +} + +static const TypeInfo virtio_iommu_info = { + .name = TYPE_VIRTIO_IOMMU, + .parent = TYPE_VIRTIO_DEVICE, + .instance_size = sizeof(VirtIOIOMMU), + .instance_init = virtio_iommu_instance_init, + .class_init = virtio_iommu_class_init, +}; + +static const TypeInfo virtio_iommu_memory_region_info = { + .parent = TYPE_IOMMU_MEMORY_REGION, + .name = TYPE_VIRTIO_IOMMU_MEMORY_REGION, + .class_init = virtio_iommu_memory_region_class_init, +}; + +static void virtio_register_types(void) +{ + type_register_static(&virtio_iommu_info); + type_register_static(&virtio_iommu_memory_region_info); +} + +type_init(virtio_register_types) diff --git a/hw/virtio/virtio-pmem.c b/hw/virtio/virtio-pmem.c index 97287e923b..43399522f5 100644 --- a/hw/virtio/virtio-pmem.c +++ b/hw/virtio/virtio-pmem.c @@ -130,6 +130,7 @@ static void virtio_pmem_unrealize(DeviceState *dev, Error **errp) VirtIOPMEM *pmem = VIRTIO_PMEM(dev); host_memory_backend_set_mapped(pmem->memdev, false); + virtio_delete_queue(pmem->rq_vq); virtio_cleanup(vdev); } diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 9d06dbe3ef..b2d415e5dd 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -282,15 +282,19 @@ static void vring_packed_flags_write(VirtIODevice *vdev, /* Called within rcu_read_lock(). */ static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq) { - VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches); - assert(caches != NULL); - return caches; + return atomic_rcu_read(&vq->vring.caches); } + /* Called within rcu_read_lock(). */ static inline uint16_t vring_avail_flags(VirtQueue *vq) { VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); hwaddr pa = offsetof(VRingAvail, flags); + + if (!caches) { + return 0; + } + return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa); } @@ -299,6 +303,11 @@ static inline uint16_t vring_avail_idx(VirtQueue *vq) { VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); hwaddr pa = offsetof(VRingAvail, idx); + + if (!caches) { + return 0; + } + vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa); return vq->shadow_avail_idx; } @@ -308,6 +317,11 @@ static inline uint16_t vring_avail_ring(VirtQueue *vq, int i) { VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); hwaddr pa = offsetof(VRingAvail, ring[i]); + + if (!caches) { + return 0; + } + return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa); } @@ -323,6 +337,11 @@ static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem, { VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); hwaddr pa = offsetof(VRingUsed, ring[i]); + + if (!caches) { + return; + } + virtio_tswap32s(vq->vdev, &uelem->id); virtio_tswap32s(vq->vdev, &uelem->len); address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem)); @@ -334,6 +353,11 @@ static uint16_t vring_used_idx(VirtQueue *vq) { VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); hwaddr pa = offsetof(VRingUsed, idx); + + if (!caches) { + return 0; + } + return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa); } @@ -342,8 +366,12 @@ static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val) { VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); hwaddr pa = offsetof(VRingUsed, idx); - virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val); - address_space_cache_invalidate(&caches->used, pa, sizeof(val)); + + if (caches) { + virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val); + address_space_cache_invalidate(&caches->used, pa, sizeof(val)); + } + vq->used_idx = val; } @@ -353,8 +381,13 @@ static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask) VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); VirtIODevice *vdev = vq->vdev; hwaddr pa = offsetof(VRingUsed, flags); - uint16_t flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa); + uint16_t flags; + if (!caches) { + return; + } + + flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa); virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask); address_space_cache_invalidate(&caches->used, pa, sizeof(flags)); } @@ -365,8 +398,13 @@ static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask) VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); VirtIODevice *vdev = vq->vdev; hwaddr pa = offsetof(VRingUsed, flags); - uint16_t flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa); + uint16_t flags; + if (!caches) { + return; + } + + flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa); virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask); address_space_cache_invalidate(&caches->used, pa, sizeof(flags)); } @@ -381,6 +419,10 @@ static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val) } caches = vring_get_region_caches(vq); + if (!caches) { + return; + } + pa = offsetof(VRingUsed, ring[vq->vring.num]); virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val); address_space_cache_invalidate(&caches->used, pa, sizeof(val)); @@ -410,7 +452,11 @@ static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable) VRingMemoryRegionCaches *caches; RCU_READ_LOCK_GUARD(); - caches = vring_get_region_caches(vq); + caches = vring_get_region_caches(vq); + if (!caches) { + return; + } + vring_packed_event_read(vq->vdev, &caches->used, &e); if (!enable) { @@ -597,6 +643,10 @@ static int virtio_queue_packed_empty_rcu(VirtQueue *vq) } cache = vring_get_region_caches(vq); + if (!cache) { + return 1; + } + vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc, vq->last_avail_idx); @@ -777,6 +827,10 @@ static void virtqueue_packed_fill_desc(VirtQueue *vq, } caches = vring_get_region_caches(vq); + if (!caches) { + return; + } + vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order); } @@ -949,6 +1003,10 @@ static void virtqueue_split_get_avail_bytes(VirtQueue *vq, max = vq->vring.num; caches = vring_get_region_caches(vq); + if (!caches) { + goto err; + } + while ((rc = virtqueue_num_heads(vq, idx)) > 0) { MemoryRegionCache *desc_cache = &caches->desc; unsigned int num_bufs; @@ -1089,6 +1147,9 @@ static void virtqueue_packed_get_avail_bytes(VirtQueue *vq, max = vq->vring.num; caches = vring_get_region_caches(vq); + if (!caches) { + goto err; + } for (;;) { unsigned int num_bufs = total_bufs; @@ -1194,6 +1255,10 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, } caches = vring_get_region_caches(vq); + if (!caches) { + goto err; + } + desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ? sizeof(VRingPackedDesc) : sizeof(VRingDesc); if (caches->desc.len < vq->vring.num * desc_size) { @@ -1388,6 +1453,11 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) i = head; caches = vring_get_region_caches(vq); + if (!caches) { + virtio_error(vdev, "Region caches not initialized"); + goto done; + } + if (caches->desc.len < max * sizeof(VRingDesc)) { virtio_error(vdev, "Cannot map descriptor ring"); goto done; @@ -1510,6 +1580,11 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz) i = vq->last_avail_idx; caches = vring_get_region_caches(vq); + if (!caches) { + virtio_error(vdev, "Region caches not initialized"); + goto done; + } + if (caches->desc.len < max * sizeof(VRingDesc)) { virtio_error(vdev, "Cannot map descriptor ring"); goto done; @@ -1629,6 +1704,10 @@ static unsigned int virtqueue_packed_drop_all(VirtQueue *vq) VRingPackedDesc desc; caches = vring_get_region_caches(vq); + if (!caches) { + return 0; + } + desc_cache = &caches->desc; virtio_queue_set_notification(vq, 0); @@ -2413,6 +2492,10 @@ static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq) VRingMemoryRegionCaches *caches; caches = vring_get_region_caches(vq); + if (!caches) { + return false; + } + vring_packed_event_read(vdev, &caches->avail, &e); old = vq->signalled_used; diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h index 71508bf40c..02f500cb8e 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h @@ -125,8 +125,10 @@ typedef struct { bool virt; int32_t gic_version; VirtIOMMUType iommu; + uint16_t virtio_iommu_bdf; struct arm_boot_info bootinfo; MemMapEntry *memmap; + char *pciehb_nodename; const int *irqmap; int smp_cpus; void *fdt; diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 2acd8321af..cfedf5a995 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -86,6 +86,7 @@ extern bool pci_available; #define PCI_DEVICE_ID_VIRTIO_9P 0x1009 #define PCI_DEVICE_ID_VIRTIO_VSOCK 0x1012 #define PCI_DEVICE_ID_VIRTIO_PMEM 0x1013 +#define PCI_DEVICE_ID_VIRTIO_IOMMU 0x1014 #define PCI_VENDOR_ID_REDHAT 0x1b36 #define PCI_DEVICE_ID_REDHAT_BRIDGE 0x0001 diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h index 108bfadeeb..05ea0ad183 100644 --- a/include/hw/virtio/vhost-user-blk.h +++ b/include/hw/virtio/vhost-user-blk.h @@ -36,7 +36,8 @@ typedef struct VHostUserBlk { struct vhost_dev dev; struct vhost_inflight *inflight; VhostUserState vhost_user; - struct vhost_virtqueue *vqs; + struct vhost_virtqueue *vhost_vqs; + VirtQueue **virtqs; guint watch; bool connected; } VHostUserBlk; diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h index 9ff1bdb7cf..6f3030d288 100644 --- a/include/hw/virtio/vhost-user-fs.h +++ b/include/hw/virtio/vhost-user-fs.h @@ -37,6 +37,8 @@ typedef struct { struct vhost_virtqueue *vhost_vqs; struct vhost_dev vhost_dev; VhostUserState vhost_user; + VirtQueue **req_vqs; + VirtQueue *hiprio_vq; /*< public >*/ } VHostUserFS; diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h new file mode 100644 index 0000000000..6f67f1020a --- /dev/null +++ b/include/hw/virtio/virtio-iommu.h @@ -0,0 +1,61 @@ +/* + * virtio-iommu device + * + * Copyright (c) 2020 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + * + */ + +#ifndef QEMU_VIRTIO_IOMMU_H +#define QEMU_VIRTIO_IOMMU_H + +#include "standard-headers/linux/virtio_iommu.h" +#include "hw/virtio/virtio.h" +#include "hw/pci/pci.h" + +#define TYPE_VIRTIO_IOMMU "virtio-iommu-device" +#define TYPE_VIRTIO_IOMMU_PCI "virtio-iommu-device-base" +#define VIRTIO_IOMMU(obj) \ + OBJECT_CHECK(VirtIOIOMMU, (obj), TYPE_VIRTIO_IOMMU) + +#define TYPE_VIRTIO_IOMMU_MEMORY_REGION "virtio-iommu-memory-region" + +typedef struct IOMMUDevice { + void *viommu; + PCIBus *bus; + int devfn; + IOMMUMemoryRegion iommu_mr; + AddressSpace as; +} IOMMUDevice; + +typedef struct IOMMUPciBus { + PCIBus *bus; + IOMMUDevice *pbdev[0]; /* Parent array is sparse, so dynamically alloc */ +} IOMMUPciBus; + +typedef struct VirtIOIOMMU { + VirtIODevice parent_obj; + VirtQueue *req_vq; + VirtQueue *event_vq; + struct virtio_iommu_config config; + uint64_t features; + GHashTable *as_by_busptr; + IOMMUPciBus *iommu_pcibus_by_bus_num[PCI_BUS_MAX]; + PCIBus *primary_bus; + GTree *domains; + QemuMutex mutex; + GTree *endpoints; +} VirtIOIOMMU; + +#endif diff --git a/qdev-monitor.c b/qdev-monitor.c index 8a2a9538cd..9833b33549 100644 --- a/qdev-monitor.c +++ b/qdev-monitor.c @@ -67,6 +67,7 @@ static const QDevAlias qdev_alias_table[] = { { "virtio-input-host-ccw", "virtio-input-host", QEMU_ARCH_S390X }, { "virtio-input-host-pci", "virtio-input-host", QEMU_ARCH_ALL & ~QEMU_ARCH_S390X }, + { "virtio-iommu-pci", "virtio-iommu", QEMU_ARCH_ALL & ~QEMU_ARCH_S390X }, { "virtio-keyboard-ccw", "virtio-keyboard", QEMU_ARCH_S390X }, { "virtio-keyboard-pci", "virtio-keyboard", QEMU_ARCH_ALL & ~QEMU_ARCH_S390X }, diff --git a/tests/data/acpi/rebuild-expected-aml.sh b/tests/data/acpi/rebuild-expected-aml.sh index d44e511533..9cbaab1a4d 100755 --- a/tests/data/acpi/rebuild-expected-aml.sh +++ b/tests/data/acpi/rebuild-expected-aml.sh @@ -31,6 +31,13 @@ done eval `grep SRC_PATH= config-host.mak` +old_allowed_dif=`grep -v -e 'List of comma-separated changed AML files to ignore' ${SRC_PATH}/tests/qtest/bios-tables-test-allowed-diff.h` + echo '/* List of comma-separated changed AML files to ignore */' > ${SRC_PATH}/tests/qtest/bios-tables-test-allowed-diff.h echo "The files were rebuilt and can be added to git." + +if [ -z "$old_allowed_dif" ]; then + echo "Note! Please do not commit expected files with source changes" + echo "Note! Please follow the process documented in ${SRC_PATH}/tests/qtest/bios-tables-test.c" +fi diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c index b4752c644c..0a597bbacf 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c @@ -426,7 +426,9 @@ static void test_acpi_asl(test_data *data) fprintf(stderr, "acpi-test: Warning! %.4s binary file mismatch. " - "Actual [aml:%s], Expected [aml:%s].\n", + "Actual [aml:%s], Expected [aml:%s].\n" + "See source file tests/qtest/bios-tables-test.c " + "for instructions on how to update expected files.\n", exp_sdt->aml, sdt->aml_file, exp_sdt->aml_file); all_tables_match = all_tables_match && @@ -461,21 +463,20 @@ static void test_acpi_asl(test_data *data) "Actual [asl:%s, aml:%s], Expected [asl:%s, aml:%s].\n", exp_sdt->aml, sdt->asl_file, sdt->aml_file, exp_sdt->asl_file, exp_sdt->aml_file); + fflush(stderr); if (getenv("V")) { - const char *diff_cmd = getenv("DIFF"); - if (diff_cmd) { - int ret G_GNUC_UNUSED; - char *diff = g_strdup_printf("%s %s %s", diff_cmd, - exp_sdt->asl_file, sdt->asl_file); - ret = system(diff) ; - g_free(diff); - } else { - fprintf(stderr, "acpi-test: Warning. not showing " - "difference since no diff utility is specified. " - "Set 'DIFF' environment variable to a preferred " - "diff utility and run 'make V=1 check' again to " - "see ASL difference."); - } + const char *diff_env = getenv("DIFF"); + const char *diff_cmd = diff_env ? diff_env : "diff -u"; + char *diff = g_strdup_printf("%s %s %s", diff_cmd, + exp_sdt->asl_file, sdt->asl_file); + int out = dup(STDOUT_FILENO); + int ret G_GNUC_UNUSED; + + dup2(STDERR_FILENO, STDOUT_FILENO); + ret = system(diff) ; + dup2(out, STDOUT_FILENO); + close(out); + g_free(diff); } } }