pci,virtio

This optimizes MSIX handling in virtio-pci.
 Also included is pci express capability bugfix.
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.12 (GNU/Linux)
 
 iQEcBAABAgAGBQJQ2tD2AAoJECgfDbjSjVRpUNcIAKN2c+3iiutUWFBBII2TWppc
 QAQ4Q5HK7gCtAnwNrlQMAIXcUzHBd5s6BW74BaFBZYymf/tqe4CsvmIH15qQyvm0
 McdJAba3FLk0+TELG/Fmf4+faM/kr3gl5Cve3YJC69NHpcq3gi8V4696sP8cGfUt
 atA+NR8AITBJDmQlcq6Vwfp+t+B1MY9D9SROT/BmfO+/kY3krkhlPL2pdcoinBa2
 zKJLz+jE0tjz7kZ99bmbb2uzKImvtFwxCVZjhD0UINjDOWd9k6ao2pWQIEftv56z
 zwz/L8TKCFdM2350XXPg99f4WbrvBqmg3Slb4vrsIYEuAWvArI8sUSYG3rC4fS4=
 =8Jun
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'mst/tags/for_anthony' into staging

pci,virtio

This optimizes MSIX handling in virtio-pci.
Also included is pci express capability bugfix.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>

* mst/tags/for_anthony:
  virtio-pci: don't poll masked vectors
  msix: expose access to masked/pending state
  msi: add API to get notified about pending bit poll
  pcie: Fix bug in pcie_ext_cap_set_next
  virtio: make bindings typesafe
This commit is contained in:
Anthony Liguori 2013-01-02 08:01:36 -06:00
commit 079944e695
9 changed files with 137 additions and 62 deletions

View File

@ -65,7 +65,7 @@ static int msix_is_pending(PCIDevice *dev, int vector)
return *msix_pending_byte(dev, vector) & msix_pending_mask(vector);
}
static void msix_set_pending(PCIDevice *dev, int vector)
void msix_set_pending(PCIDevice *dev, unsigned int vector)
{
*msix_pending_byte(dev, vector) |= msix_pending_mask(vector);
}
@ -75,13 +75,13 @@ static void msix_clr_pending(PCIDevice *dev, int vector)
*msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector);
}
static bool msix_vector_masked(PCIDevice *dev, int vector, bool fmask)
static bool msix_vector_masked(PCIDevice *dev, unsigned int vector, bool fmask)
{
unsigned offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
return fmask || dev->msix_table[offset] & PCI_MSIX_ENTRY_CTRL_MASKBIT;
}
static bool msix_is_masked(PCIDevice *dev, int vector)
bool msix_is_masked(PCIDevice *dev, unsigned int vector)
{
return msix_vector_masked(dev, vector, dev->msix_function_masked);
}
@ -191,6 +191,11 @@ static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr,
unsigned size)
{
PCIDevice *dev = opaque;
if (dev->msix_vector_poll_notifier) {
unsigned vector_start = addr * 8;
unsigned vector_end = MIN(addr + size * 8, dev->msix_entries_nr);
dev->msix_vector_poll_notifier(dev, vector_start, vector_end);
}
return pci_get_long(dev->msix_pba + addr);
}
@ -513,7 +518,8 @@ static void msix_unset_notifier_for_vector(PCIDevice *dev, unsigned int vector)
int msix_set_vector_notifiers(PCIDevice *dev,
MSIVectorUseNotifier use_notifier,
MSIVectorReleaseNotifier release_notifier)
MSIVectorReleaseNotifier release_notifier,
MSIVectorPollNotifier poll_notifier)
{
int vector, ret;
@ -521,6 +527,7 @@ int msix_set_vector_notifiers(PCIDevice *dev,
dev->msix_vector_use_notifier = use_notifier;
dev->msix_vector_release_notifier = release_notifier;
dev->msix_vector_poll_notifier = poll_notifier;
if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
(MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) {
@ -531,6 +538,9 @@ int msix_set_vector_notifiers(PCIDevice *dev,
}
}
}
if (dev->msix_vector_poll_notifier) {
dev->msix_vector_poll_notifier(dev, 0, dev->msix_entries_nr);
}
return 0;
undo:
@ -557,4 +567,5 @@ void msix_unset_vector_notifiers(PCIDevice *dev)
}
dev->msix_vector_use_notifier = NULL;
dev->msix_vector_release_notifier = NULL;
dev->msix_vector_poll_notifier = NULL;
}

View File

@ -26,6 +26,9 @@ void msix_load(PCIDevice *dev, QEMUFile *f);
int msix_enabled(PCIDevice *dev);
int msix_present(PCIDevice *dev);
bool msix_is_masked(PCIDevice *dev, unsigned vector);
void msix_set_pending(PCIDevice *dev, unsigned vector);
int msix_vector_use(PCIDevice *dev, unsigned vector);
void msix_vector_unuse(PCIDevice *dev, unsigned vector);
void msix_unuse_all_vectors(PCIDevice *dev);
@ -36,6 +39,7 @@ void msix_reset(PCIDevice *dev);
int msix_set_vector_notifiers(PCIDevice *dev,
MSIVectorUseNotifier use_notifier,
MSIVectorReleaseNotifier release_notifier);
MSIVectorReleaseNotifier release_notifier,
MSIVectorPollNotifier poll_notifier);
void msix_unset_vector_notifiers(PCIDevice *dev);
#endif

View File

@ -187,6 +187,9 @@ typedef void (*PCIINTxRoutingNotifier)(PCIDevice *dev);
typedef int (*MSIVectorUseNotifier)(PCIDevice *dev, unsigned int vector,
MSIMessage msg);
typedef void (*MSIVectorReleaseNotifier)(PCIDevice *dev, unsigned int vector);
typedef void (*MSIVectorPollNotifier)(PCIDevice *dev,
unsigned int vector_start,
unsigned int vector_end);
struct PCIDevice {
DeviceState qdev;
@ -271,6 +274,7 @@ struct PCIDevice {
/* MSI-X notifiers */
MSIVectorUseNotifier msix_vector_use_notifier;
MSIVectorReleaseNotifier msix_vector_release_notifier;
MSIVectorPollNotifier msix_vector_poll_notifier;
};
void pci_register_bar(PCIDevice *pci_dev, int region_num,

View File

@ -494,7 +494,7 @@ uint16_t pcie_find_capability(PCIDevice *dev, uint16_t cap_id)
static void pcie_ext_cap_set_next(PCIDevice *dev, uint16_t pos, uint16_t next)
{
uint16_t header = pci_get_long(dev->config + pos);
uint32_t header = pci_get_long(dev->config + pos);
assert(!(next & (PCI_EXT_CAP_ALIGN - 1)));
header = (header & ~PCI_EXT_CAP_NEXT_MASK) |
((next << PCI_EXT_CAP_NEXT_SHIFT) & PCI_EXT_CAP_NEXT_MASK);

View File

@ -138,7 +138,7 @@ static int s390_virtio_device_init(VirtIOS390Device *dev, VirtIODevice *vdev)
bus->dev_offs += dev_len;
virtio_bind_device(vdev, &virtio_s390_bindings, dev);
virtio_bind_device(vdev, &virtio_s390_bindings, DEVICE(dev));
dev->host_features = vdev->get_features(vdev, dev->host_features);
s390_virtio_device_sync(dev);
s390_virtio_reset_idx(dev);
@ -364,18 +364,32 @@ VirtIOS390Device *s390_virtio_bus_find_mem(VirtIOS390Bus *bus, ram_addr_t mem)
return NULL;
}
static void virtio_s390_notify(void *opaque, uint16_t vector)
/* DeviceState to VirtIOS390Device. Note: used on datapath,
* be careful and test performance if you change this.
*/
static inline VirtIOS390Device *to_virtio_s390_device_fast(DeviceState *d)
{
VirtIOS390Device *dev = (VirtIOS390Device*)opaque;
return container_of(d, VirtIOS390Device, qdev);
}
/* DeviceState to VirtIOS390Device. TODO: use QOM. */
static inline VirtIOS390Device *to_virtio_s390_device(DeviceState *d)
{
return container_of(d, VirtIOS390Device, qdev);
}
static void virtio_s390_notify(DeviceState *d, uint16_t vector)
{
VirtIOS390Device *dev = to_virtio_s390_device_fast(d);
uint64_t token = s390_virtio_device_vq_token(dev, vector);
S390CPU *cpu = s390_cpu_addr2state(0);
s390_virtio_irq(cpu, 0, token);
}
static unsigned virtio_s390_get_features(void *opaque)
static unsigned virtio_s390_get_features(DeviceState *d)
{
VirtIOS390Device *dev = (VirtIOS390Device*)opaque;
VirtIOS390Device *dev = to_virtio_s390_device(d);
return dev->host_features;
}

View File

@ -698,7 +698,7 @@ static void vfio_enable_msix(VFIODevice *vdev)
vdev->interrupt = VFIO_INT_MSIX;
if (msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use,
vfio_msix_vector_release)) {
vfio_msix_vector_release, NULL)) {
error_report("vfio: msix_set_vector_notifiers failed\n");
}

View File

@ -96,35 +96,48 @@
bool virtio_is_big_endian(void);
/* virtio device */
static void virtio_pci_notify(void *opaque, uint16_t vector)
/* DeviceState to VirtIOPCIProxy. For use off data-path. TODO: use QOM. */
static inline VirtIOPCIProxy *to_virtio_pci_proxy(DeviceState *d)
{
VirtIOPCIProxy *proxy = opaque;
return container_of(d, VirtIOPCIProxy, pci_dev.qdev);
}
/* DeviceState to VirtIOPCIProxy. Note: used on datapath,
* be careful and test performance if you change this.
*/
static inline VirtIOPCIProxy *to_virtio_pci_proxy_fast(DeviceState *d)
{
return container_of(d, VirtIOPCIProxy, pci_dev.qdev);
}
static void virtio_pci_notify(DeviceState *d, uint16_t vector)
{
VirtIOPCIProxy *proxy = to_virtio_pci_proxy_fast(d);
if (msix_enabled(&proxy->pci_dev))
msix_notify(&proxy->pci_dev, vector);
else
qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
}
static void virtio_pci_save_config(void * opaque, QEMUFile *f)
static void virtio_pci_save_config(DeviceState *d, QEMUFile *f)
{
VirtIOPCIProxy *proxy = opaque;
VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
pci_device_save(&proxy->pci_dev, f);
msix_save(&proxy->pci_dev, f);
if (msix_present(&proxy->pci_dev))
qemu_put_be16(f, proxy->vdev->config_vector);
}
static void virtio_pci_save_queue(void * opaque, int n, QEMUFile *f)
static void virtio_pci_save_queue(DeviceState *d, int n, QEMUFile *f)
{
VirtIOPCIProxy *proxy = opaque;
VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
if (msix_present(&proxy->pci_dev))
qemu_put_be16(f, virtio_queue_vector(proxy->vdev, n));
}
static int virtio_pci_load_config(void * opaque, QEMUFile *f)
static int virtio_pci_load_config(DeviceState *d, QEMUFile *f)
{
VirtIOPCIProxy *proxy = opaque;
VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
int ret;
ret = pci_device_load(&proxy->pci_dev, f);
if (ret) {
@ -143,9 +156,9 @@ static int virtio_pci_load_config(void * opaque, QEMUFile *f)
return 0;
}
static int virtio_pci_load_queue(void * opaque, int n, QEMUFile *f)
static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f)
{
VirtIOPCIProxy *proxy = opaque;
VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
uint16_t vector;
if (msix_present(&proxy->pci_dev)) {
qemu_get_be16s(f, &vector);
@ -243,7 +256,7 @@ static void virtio_pci_stop_ioeventfd(VirtIOPCIProxy *proxy)
void virtio_pci_reset(DeviceState *d)
{
VirtIOPCIProxy *proxy = container_of(d, VirtIOPCIProxy, pci_dev.qdev);
VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
virtio_pci_stop_ioeventfd(proxy);
virtio_reset(proxy->vdev);
msix_unuse_all_vectors(&proxy->pci_dev);
@ -463,9 +476,9 @@ static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
}
}
static unsigned virtio_pci_get_features(void *opaque)
static unsigned virtio_pci_get_features(DeviceState *d)
{
VirtIOPCIProxy *proxy = opaque;
VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
return proxy->host_features;
}
@ -495,8 +508,6 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
}
return ret;
}
virtio_queue_set_guest_notifier_fd_handler(vq, true, true);
return 0;
}
@ -515,8 +526,6 @@ static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
if (--irqfd->users == 0) {
kvm_irqchip_release_virq(kvm_state, irqfd->virq);
}
virtio_queue_set_guest_notifier_fd_handler(vq, true, false);
}
static int kvm_virtio_pci_vector_use(PCIDevice *dev, unsigned vector,
@ -567,9 +576,38 @@ static void kvm_virtio_pci_vector_release(PCIDevice *dev, unsigned vector)
}
}
static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign)
static void kvm_virtio_pci_vector_poll(PCIDevice *dev,
unsigned int vector_start,
unsigned int vector_end)
{
VirtIOPCIProxy *proxy = opaque;
VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
VirtIODevice *vdev = proxy->vdev;
int queue_no;
unsigned int vector;
EventNotifier *notifier;
VirtQueue *vq;
for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
if (!virtio_queue_get_num(vdev, queue_no)) {
break;
}
vector = virtio_queue_vector(vdev, queue_no);
if (vector < vector_start || vector >= vector_end ||
!msix_is_masked(dev, vector)) {
continue;
}
vq = virtio_get_queue(vdev, queue_no);
notifier = virtio_queue_get_guest_notifier(vq);
if (event_notifier_test_and_clear(notifier)) {
msix_set_pending(dev, vector);
}
}
}
static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign,
bool with_irqfd)
{
VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
EventNotifier *notifier = virtio_queue_get_guest_notifier(vq);
@ -578,29 +616,31 @@ static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign)
if (r < 0) {
return r;
}
virtio_queue_set_guest_notifier_fd_handler(vq, true, false);
virtio_queue_set_guest_notifier_fd_handler(vq, true, with_irqfd);
} else {
virtio_queue_set_guest_notifier_fd_handler(vq, false, false);
virtio_queue_set_guest_notifier_fd_handler(vq, false, with_irqfd);
event_notifier_cleanup(notifier);
}
return 0;
}
static bool virtio_pci_query_guest_notifiers(void *opaque)
static bool virtio_pci_query_guest_notifiers(DeviceState *d)
{
VirtIOPCIProxy *proxy = opaque;
VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
return msix_enabled(&proxy->pci_dev);
}
static int virtio_pci_set_guest_notifiers(void *opaque, bool assign)
static int virtio_pci_set_guest_notifiers(DeviceState *d, bool assign)
{
VirtIOPCIProxy *proxy = opaque;
VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
VirtIODevice *vdev = proxy->vdev;
int r, n;
bool with_irqfd = msix_enabled(&proxy->pci_dev) &&
kvm_msi_via_irqfd_enabled();
/* Must unset vector notifier while guest notifier is still assigned */
if (kvm_msi_via_irqfd_enabled() && !assign) {
if (proxy->vector_irqfd && !assign) {
msix_unset_vector_notifiers(&proxy->pci_dev);
g_free(proxy->vector_irqfd);
proxy->vector_irqfd = NULL;
@ -611,20 +651,22 @@ static int virtio_pci_set_guest_notifiers(void *opaque, bool assign)
break;
}
r = virtio_pci_set_guest_notifier(opaque, n, assign);
r = virtio_pci_set_guest_notifier(d, n, assign,
kvm_msi_via_irqfd_enabled());
if (r < 0) {
goto assign_error;
}
}
/* Must set vector notifier after guest notifier has been assigned */
if (kvm_msi_via_irqfd_enabled() && assign) {
if (with_irqfd && assign) {
proxy->vector_irqfd =
g_malloc0(sizeof(*proxy->vector_irqfd) *
msix_nr_vectors_allocated(&proxy->pci_dev));
r = msix_set_vector_notifiers(&proxy->pci_dev,
kvm_virtio_pci_vector_use,
kvm_virtio_pci_vector_release);
kvm_virtio_pci_vector_release,
kvm_virtio_pci_vector_poll);
if (r < 0) {
goto assign_error;
}
@ -636,14 +678,14 @@ assign_error:
/* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */
assert(assign);
while (--n >= 0) {
virtio_pci_set_guest_notifier(opaque, n, !assign);
virtio_pci_set_guest_notifier(d, n, !assign, with_irqfd);
}
return r;
}
static int virtio_pci_set_host_notifier(void *opaque, int n, bool assign)
static int virtio_pci_set_host_notifier(DeviceState *d, int n, bool assign)
{
VirtIOPCIProxy *proxy = opaque;
VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
/* Stop using ioeventfd for virtqueue kick if the device starts using host
* notifiers. This makes it easy to avoid stepping on each others' toes.
@ -659,9 +701,9 @@ static int virtio_pci_set_host_notifier(void *opaque, int n, bool assign)
return virtio_pci_set_host_notifier_internal(proxy, n, assign, false);
}
static void virtio_pci_vmstate_change(void *opaque, bool running)
static void virtio_pci_vmstate_change(DeviceState *d, bool running)
{
VirtIOPCIProxy *proxy = opaque;
VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
if (running) {
/* Try to find out if the guest has bus master disabled, but is
@ -726,7 +768,7 @@ void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev)
proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
}
virtio_bind_device(vdev, &virtio_pci_bindings, proxy);
virtio_bind_device(vdev, &virtio_pci_bindings, DEVICE(proxy));
proxy->host_features |= 0x1 << VIRTIO_F_NOTIFY_ON_EMPTY;
proxy->host_features |= 0x1 << VIRTIO_F_BAD_FEATURE;
proxy->host_features = vdev->get_features(vdev, proxy->host_features);

View File

@ -935,7 +935,7 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
}
void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
void *opaque)
DeviceState *opaque)
{
vdev->binding = binding;
vdev->binding_opaque = opaque;

View File

@ -91,17 +91,17 @@ typedef struct VirtQueueElement
} VirtQueueElement;
typedef struct {
void (*notify)(void * opaque, uint16_t vector);
void (*save_config)(void * opaque, QEMUFile *f);
void (*save_queue)(void * opaque, int n, QEMUFile *f);
int (*load_config)(void * opaque, QEMUFile *f);
int (*load_queue)(void * opaque, int n, QEMUFile *f);
int (*load_done)(void * opaque, QEMUFile *f);
unsigned (*get_features)(void * opaque);
bool (*query_guest_notifiers)(void * opaque);
int (*set_guest_notifiers)(void * opaque, bool assigned);
int (*set_host_notifier)(void * opaque, int n, bool assigned);
void (*vmstate_change)(void * opaque, bool running);
void (*notify)(DeviceState *d, uint16_t vector);
void (*save_config)(DeviceState *d, QEMUFile *f);
void (*save_queue)(DeviceState *d, int n, QEMUFile *f);
int (*load_config)(DeviceState *d, QEMUFile *f);
int (*load_queue)(DeviceState *d, int n, QEMUFile *f);
int (*load_done)(DeviceState *d, QEMUFile *f);
unsigned (*get_features)(DeviceState *d);
bool (*query_guest_notifiers)(DeviceState *d);
int (*set_guest_notifiers)(DeviceState *d, bool assigned);
int (*set_host_notifier)(DeviceState *d, int n, bool assigned);
void (*vmstate_change)(DeviceState *d, bool running);
} VirtIOBindings;
#define VIRTIO_PCI_QUEUE_MAX 64
@ -128,7 +128,7 @@ struct VirtIODevice
void (*set_status)(VirtIODevice *vdev, uint8_t val);
VirtQueue *vq;
const VirtIOBindings *binding;
void *binding_opaque;
DeviceState *binding_opaque;
uint16_t device_id;
bool vm_running;
VMChangeStateEntry *vmstate;
@ -191,7 +191,7 @@ void virtio_update_irq(VirtIODevice *vdev);
int virtio_set_features(VirtIODevice *vdev, uint32_t val);
void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
void *opaque);
DeviceState *opaque);
/* Base devices. */
typedef struct VirtIOBlkConf VirtIOBlkConf;