qemu/include/hw/vfio/vfio-common.h
Alexey Kardashevskiy c26bc185b7 vfio/spapr: Allow backing bigger guest IOMMU pages with smaller physical pages
At the moment the PPC64/pseries guest only supports 4K/64K/16M IOMMU
pages and POWER8 CPU supports the exact same set of page size so
so far things worked fine.

However POWER9 supports different set of sizes - 4K/64K/2M/1G and
the last two - 2M and 1G - are not even allowed in the paravirt interface
(RTAS DDW) so we always end up using 64K IOMMU pages, although we could
back guest's 16MB IOMMU pages with 2MB pages on the host.

This stores the supported host IOMMU page sizes in VFIOContainer and uses
this later when creating a new DMA window. This uses the system page size
(64k normally, 2M/16M/1G if hugepages used) as the upper limit of
the IOMMU pagesize.

This changes the type of @pagesize to uint64_t as this is what
memory_region_iommu_get_min_page_size() returns and clz64() takes.

There should be no behavioral changes on platforms other than pseries.
The guest will keep using the IOMMU page size selected by the PHB pagesize
property as this only changes the underlying hardware TCE table
granularity.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2018-08-21 14:28:45 +10:00

200 lines
5.9 KiB
C

/*
* common header for vfio based device assignment support
*
* Copyright Red Hat, Inc. 2012
*
* Authors:
* Alex Williamson <alex.williamson@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*
* Based on qemu-kvm device-assignment:
* Adapted for KVM by Qumranet.
* Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
* Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
* Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
* Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
* Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
*/
#ifndef HW_VFIO_VFIO_COMMON_H
#define HW_VFIO_VFIO_COMMON_H
#include "qemu-common.h"
#include "exec/memory.h"
#include "qemu/queue.h"
#include "qemu/notify.h"
#include "ui/console.h"
#ifdef CONFIG_LINUX
#include <linux/vfio.h>
#endif
#define ERR_PREFIX "vfio error: %s: "
#define WARN_PREFIX "vfio warning: %s: "
enum {
VFIO_DEVICE_TYPE_PCI = 0,
VFIO_DEVICE_TYPE_PLATFORM = 1,
VFIO_DEVICE_TYPE_CCW = 2,
};
typedef struct VFIOMmap {
MemoryRegion mem;
void *mmap;
off_t offset;
size_t size;
} VFIOMmap;
typedef struct VFIORegion {
struct VFIODevice *vbasedev;
off_t fd_offset; /* offset of region within device fd */
MemoryRegion *mem; /* slow, read/write access */
size_t size;
uint32_t flags; /* VFIO region flags (rd/wr/mmap) */
uint32_t nr_mmaps;
VFIOMmap *mmaps;
uint8_t nr; /* cache the region number for debug */
} VFIORegion;
typedef struct VFIOAddressSpace {
AddressSpace *as;
QLIST_HEAD(, VFIOContainer) containers;
QLIST_ENTRY(VFIOAddressSpace) list;
} VFIOAddressSpace;
struct VFIOGroup;
typedef struct VFIOContainer {
VFIOAddressSpace *space;
int fd; /* /dev/vfio/vfio, empowered by the attached groups */
MemoryListener listener;
MemoryListener prereg_listener;
unsigned iommu_type;
int error;
bool initialized;
unsigned long pgsizes;
/*
* This assumes the host IOMMU can support only a single
* contiguous IOVA window. We may need to generalize that in
* future
*/
QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
QLIST_HEAD(, VFIOGroup) group_list;
QLIST_ENTRY(VFIOContainer) next;
} VFIOContainer;
typedef struct VFIOGuestIOMMU {
VFIOContainer *container;
IOMMUMemoryRegion *iommu;
hwaddr iommu_offset;
IOMMUNotifier n;
QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
} VFIOGuestIOMMU;
typedef struct VFIOHostDMAWindow {
hwaddr min_iova;
hwaddr max_iova;
uint64_t iova_pgsizes;
QLIST_ENTRY(VFIOHostDMAWindow) hostwin_next;
} VFIOHostDMAWindow;
typedef struct VFIODeviceOps VFIODeviceOps;
typedef struct VFIODevice {
QLIST_ENTRY(VFIODevice) next;
struct VFIOGroup *group;
char *sysfsdev;
char *name;
DeviceState *dev;
int fd;
int type;
bool reset_works;
bool needs_reset;
bool no_mmap;
bool balloon_allowed;
VFIODeviceOps *ops;
unsigned int num_irqs;
unsigned int num_regions;
unsigned int flags;
} VFIODevice;
struct VFIODeviceOps {
void (*vfio_compute_needs_reset)(VFIODevice *vdev);
int (*vfio_hot_reset_multi)(VFIODevice *vdev);
void (*vfio_eoi)(VFIODevice *vdev);
};
typedef struct VFIOGroup {
int fd;
int groupid;
VFIOContainer *container;
QLIST_HEAD(, VFIODevice) device_list;
QLIST_ENTRY(VFIOGroup) next;
QLIST_ENTRY(VFIOGroup) container_next;
bool balloon_allowed;
} VFIOGroup;
typedef struct VFIODMABuf {
QemuDmaBuf buf;
uint32_t pos_x, pos_y, pos_updates;
uint32_t hot_x, hot_y, hot_updates;
int dmabuf_id;
QTAILQ_ENTRY(VFIODMABuf) next;
} VFIODMABuf;
typedef struct VFIODisplay {
QemuConsole *con;
struct {
VFIORegion buffer;
DisplaySurface *surface;
} region;
struct {
QTAILQ_HEAD(, VFIODMABuf) bufs;
VFIODMABuf *primary;
VFIODMABuf *cursor;
} dmabuf;
} VFIODisplay;
void vfio_put_base_device(VFIODevice *vbasedev);
void vfio_disable_irqindex(VFIODevice *vbasedev, int index);
void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index);
void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index);
void vfio_region_write(void *opaque, hwaddr addr,
uint64_t data, unsigned size);
uint64_t vfio_region_read(void *opaque,
hwaddr addr, unsigned size);
int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
int index, const char *name);
int vfio_region_mmap(VFIORegion *region);
void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled);
void vfio_region_exit(VFIORegion *region);
void vfio_region_finalize(VFIORegion *region);
void vfio_reset_handler(void *opaque);
VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp);
void vfio_put_group(VFIOGroup *group);
int vfio_get_device(VFIOGroup *group, const char *name,
VFIODevice *vbasedev, Error **errp);
extern const MemoryRegionOps vfio_region_ops;
extern QLIST_HEAD(vfio_group_head, VFIOGroup) vfio_group_list;
extern QLIST_HEAD(vfio_as_head, VFIOAddressSpace) vfio_address_spaces;
#ifdef CONFIG_LINUX
int vfio_get_region_info(VFIODevice *vbasedev, int index,
struct vfio_region_info **info);
int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type,
uint32_t subtype, struct vfio_region_info **info);
bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type);
#endif
extern const MemoryListener vfio_prereg_listener;
int vfio_spapr_create_window(VFIOContainer *container,
MemoryRegionSection *section,
hwaddr *pgsize);
int vfio_spapr_remove_window(VFIOContainer *container,
hwaddr offset_within_address_space);
#endif /* HW_VFIO_VFIO_COMMON_H */