ivshmem: Fixes, cleanups, device model split
-----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAABAgAGBQJW8FqyAAoJEDhwtADrkYZTjYcP/R1m2LcFnLTxzDjSK38nxWcw 5t/Do7nBNgXL2ZdRHfJsy7bx/9RR55k16rvzkFgW8LpUa5Ro64onRh2PfMz2p0e8 QvZRBhXTh5/y4TD61y5Y8d9xawA6Hr1oEUtwsfovI9EiXzVaLl3sLI/nleed68Rk eAD2h8+ZcBeJ+lRK3UHEzAvqh0u+IScRMJifCxHyJuoZiylHIHVVq7x40ywg0Ejq 8wHEj/nDJZHUxbuH4sm215Lv4dK6CmIP8UzuhfY6MxAS6Jo7Zdk1zv2SjJO2DzwT rWU4hD0+khwTz3hBR341oWxb84C5MujPwkeP7mibR46HLHCn5imQMz0W+6tj7umb dxnwPpXzON00+56B7e4i21aXTO0IaY3AcL9QuETSAaoy3SD5BdDkt3R9XWM+jqqZ armE5nNAv8WEN8qUYL/YpBxFDYSZ3CFgNv1enoP2pSp4DqeF/H3aP4RWu+dYqLDm MyVhcXUkjHfTCY6NVPPBkNwSvz2vq4ft/b6t7tLN+0ZmIRsEegKxxRrI2vB6O8Ga Gh2iKcJfMp90jwwvywfGO+DNQ8npHvhxMkioyzMHflo0QyS2ZDhlf4ubp7cXlYZ6 tj7iGXJKJQpQyJWA58k8EXR9wc2W+fgRYD/H61QTTyTUgxEo6w10KjBDTsbFwvIY R0poHCfRR0DQ7y3GerZO =XEMm -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/armbru/tags/pull-ivshmem-2016-03-18' into staging ivshmem: Fixes, cleanups, device model split # gpg: Signature made Mon 21 Mar 2016 20:33:54 GMT using RSA key ID EB918653 # gpg: Good signature from "Markus Armbruster <armbru@redhat.com>" # gpg: aka "Markus Armbruster <armbru@pond.sub.org>" * remotes/armbru/tags/pull-ivshmem-2016-03-18: (40 commits) contrib/ivshmem-server: Print "not for production" warning ivshmem: Require master to have ID zero ivshmem: Drop ivshmem property x-memdev ivshmem: Clean up after the previous commit ivshmem: Split ivshmem-plain, ivshmem-doorbell off ivshmem ivshmem: Replace int role_val by OnOffAuto master qdev: New DEFINE_PROP_ON_OFF_AUTO ivshmem: Inline check_shm_size() into its only caller ivshmem: Simplify memory regions for BAR 2 (shared memory) ivshmem: Implement shm=... with a memory backend ivshmem: Tighten check of property "size" ivshmem: Simplify how we cope with short reads from server ivshmem: Drop the hackish test for UNIX domain chardev ivshmem: Rely on server sending the ID right after the version ivshmem: Propagate errors through ivshmem_recv_setup() ivshmem: Receive shared memory synchronously in realize() ivshmem: Plug leaks on unplug, fix peer disconnect ivshmem: Disentangle ivshmem_read() ivshmem: Simplify rejection of invalid peer ID from server ivshmem: Assert interrupts are set up once ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
2538039f2c
@ -12,9 +12,6 @@
|
|||||||
#include <sys/mman.h>
|
#include <sys/mman.h>
|
||||||
#include <sys/socket.h>
|
#include <sys/socket.h>
|
||||||
#include <sys/un.h>
|
#include <sys/un.h>
|
||||||
#ifdef CONFIG_LINUX
|
|
||||||
#include <sys/vfs.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "ivshmem-server.h"
|
#include "ivshmem-server.h"
|
||||||
|
|
||||||
@ -257,7 +254,8 @@ ivshmem_server_ftruncate(int fd, unsigned shmsize)
|
|||||||
/* Init a new ivshmem server */
|
/* Init a new ivshmem server */
|
||||||
int
|
int
|
||||||
ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
|
ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
|
||||||
const char *shm_path, size_t shm_size, unsigned n_vectors,
|
const char *shm_path, bool use_shm_open,
|
||||||
|
size_t shm_size, unsigned n_vectors,
|
||||||
bool verbose)
|
bool verbose)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
@ -278,6 +276,7 @@ ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
server->use_shm_open = use_shm_open;
|
||||||
server->shm_size = shm_size;
|
server->shm_size = shm_size;
|
||||||
server->n_vectors = n_vectors;
|
server->n_vectors = n_vectors;
|
||||||
|
|
||||||
@ -286,31 +285,6 @@ ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_LINUX
|
|
||||||
|
|
||||||
#define HUGETLBFS_MAGIC 0x958458f6
|
|
||||||
|
|
||||||
static long gethugepagesize(const char *path)
|
|
||||||
{
|
|
||||||
struct statfs fs;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
do {
|
|
||||||
ret = statfs(path, &fs);
|
|
||||||
} while (ret != 0 && errno == EINTR);
|
|
||||||
|
|
||||||
if (ret != 0) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fs.f_type != HUGETLBFS_MAGIC) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return fs.f_bsize;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* open shm, create and bind to the unix socket */
|
/* open shm, create and bind to the unix socket */
|
||||||
int
|
int
|
||||||
ivshmem_server_start(IvshmemServer *server)
|
ivshmem_server_start(IvshmemServer *server)
|
||||||
@ -319,27 +293,17 @@ ivshmem_server_start(IvshmemServer *server)
|
|||||||
int shm_fd, sock_fd, ret;
|
int shm_fd, sock_fd, ret;
|
||||||
|
|
||||||
/* open shm file */
|
/* open shm file */
|
||||||
#ifdef CONFIG_LINUX
|
if (server->use_shm_open) {
|
||||||
long hpagesize;
|
IVSHMEM_SERVER_DEBUG(server, "Using POSIX shared memory: %s\n",
|
||||||
|
server->shm_path);
|
||||||
hpagesize = gethugepagesize(server->shm_path);
|
shm_fd = shm_open(server->shm_path, O_CREAT | O_RDWR, S_IRWXU);
|
||||||
if (hpagesize < 0 && errno != ENOENT) {
|
} else {
|
||||||
IVSHMEM_SERVER_DEBUG(server, "cannot stat shm file %s: %s\n",
|
|
||||||
server->shm_path, strerror(errno));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (hpagesize > 0) {
|
|
||||||
gchar *filename = g_strdup_printf("%s/ivshmem.XXXXXX", server->shm_path);
|
gchar *filename = g_strdup_printf("%s/ivshmem.XXXXXX", server->shm_path);
|
||||||
IVSHMEM_SERVER_DEBUG(server, "Using hugepages: %s\n", server->shm_path);
|
IVSHMEM_SERVER_DEBUG(server, "Using file-backed shared memory: %s\n",
|
||||||
|
server->shm_path);
|
||||||
shm_fd = mkstemp(filename);
|
shm_fd = mkstemp(filename);
|
||||||
unlink(filename);
|
unlink(filename);
|
||||||
g_free(filename);
|
g_free(filename);
|
||||||
} else
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
IVSHMEM_SERVER_DEBUG(server, "Using POSIX shared memory: %s\n",
|
|
||||||
server->shm_path);
|
|
||||||
shm_fd = shm_open(server->shm_path, O_CREAT|O_RDWR, S_IRWXU);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (shm_fd < 0) {
|
if (shm_fd < 0) {
|
||||||
|
@ -66,6 +66,7 @@ typedef struct IvshmemServer {
|
|||||||
char unix_sock_path[PATH_MAX]; /**< path to unix socket */
|
char unix_sock_path[PATH_MAX]; /**< path to unix socket */
|
||||||
int sock_fd; /**< unix sock file descriptor */
|
int sock_fd; /**< unix sock file descriptor */
|
||||||
char shm_path[PATH_MAX]; /**< path to shm */
|
char shm_path[PATH_MAX]; /**< path to shm */
|
||||||
|
bool use_shm_open;
|
||||||
size_t shm_size; /**< size of shm */
|
size_t shm_size; /**< size of shm */
|
||||||
int shm_fd; /**< shm file descriptor */
|
int shm_fd; /**< shm file descriptor */
|
||||||
unsigned n_vectors; /**< number of vectors */
|
unsigned n_vectors; /**< number of vectors */
|
||||||
@ -89,7 +90,8 @@ typedef struct IvshmemServer {
|
|||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
|
ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
|
||||||
const char *shm_path, size_t shm_size, unsigned n_vectors,
|
const char *shm_path, bool use_shm_open,
|
||||||
|
size_t shm_size, unsigned n_vectors,
|
||||||
bool verbose);
|
bool verbose);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -29,35 +29,38 @@ typedef struct IvshmemServerArgs {
|
|||||||
const char *pid_file;
|
const char *pid_file;
|
||||||
const char *unix_socket_path;
|
const char *unix_socket_path;
|
||||||
const char *shm_path;
|
const char *shm_path;
|
||||||
|
bool use_shm_open;
|
||||||
uint64_t shm_size;
|
uint64_t shm_size;
|
||||||
unsigned n_vectors;
|
unsigned n_vectors;
|
||||||
} IvshmemServerArgs;
|
} IvshmemServerArgs;
|
||||||
|
|
||||||
/* show ivshmem_server_usage and exit with given error code */
|
|
||||||
static void
|
static void
|
||||||
ivshmem_server_usage(const char *name, int code)
|
ivshmem_server_usage(const char *progname)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "%s [opts]\n", name);
|
printf("Usage: %s [OPTION]...\n"
|
||||||
fprintf(stderr, " -h: show this help\n");
|
" -h: show this help\n"
|
||||||
fprintf(stderr, " -v: verbose mode\n");
|
" -v: verbose mode\n"
|
||||||
fprintf(stderr, " -F: foreground mode (default is to daemonize)\n");
|
" -F: foreground mode (default is to daemonize)\n"
|
||||||
fprintf(stderr, " -p <pid_file>: path to the PID file (used in daemon\n"
|
" -p <pid-file>: path to the PID file (used in daemon mode only)\n"
|
||||||
" mode only).\n"
|
" default " IVSHMEM_SERVER_DEFAULT_PID_FILE "\n"
|
||||||
" Default=%s\n", IVSHMEM_SERVER_DEFAULT_SHM_PATH);
|
" -S <unix-socket-path>: path to the unix socket to listen to\n"
|
||||||
fprintf(stderr, " -S <unix_socket_path>: path to the unix socket\n"
|
" default " IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH "\n"
|
||||||
" to listen to.\n"
|
" -M <shm-name>: POSIX shared memory object to use\n"
|
||||||
" Default=%s\n", IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH);
|
" default " IVSHMEM_SERVER_DEFAULT_SHM_PATH "\n"
|
||||||
fprintf(stderr, " -m <shm_path>: path to the shared memory.\n"
|
" -m <dir-name>: where to create shared memory\n"
|
||||||
" The path corresponds to a POSIX shm name or a\n"
|
" -l <size>: size of shared memory in bytes\n"
|
||||||
" hugetlbfs mount point.\n"
|
" suffixes K, M and G can be used, e.g. 1K means 1024\n"
|
||||||
" default=%s\n", IVSHMEM_SERVER_DEFAULT_SHM_PATH);
|
" default %u\n"
|
||||||
fprintf(stderr, " -l <size>: size of shared memory in bytes. The suffix\n"
|
" -n <nvectors>: number of vectors\n"
|
||||||
" K, M and G can be used (ex: 1K means 1024).\n"
|
" default %u\n",
|
||||||
" default=%u\n", IVSHMEM_SERVER_DEFAULT_SHM_SIZE);
|
progname, IVSHMEM_SERVER_DEFAULT_SHM_SIZE,
|
||||||
fprintf(stderr, " -n <n_vects>: number of vectors.\n"
|
IVSHMEM_SERVER_DEFAULT_N_VECTORS);
|
||||||
" default=%u\n", IVSHMEM_SERVER_DEFAULT_N_VECTORS);
|
}
|
||||||
|
|
||||||
exit(code);
|
static void
|
||||||
|
ivshmem_server_help(const char *progname)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Try '%s -h' for more information.\n", progname);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* parse the program arguments, exit on error */
|
/* parse the program arguments, exit on error */
|
||||||
@ -68,20 +71,12 @@ ivshmem_server_parse_args(IvshmemServerArgs *args, int argc, char *argv[])
|
|||||||
unsigned long long v;
|
unsigned long long v;
|
||||||
Error *err = NULL;
|
Error *err = NULL;
|
||||||
|
|
||||||
while ((c = getopt(argc, argv,
|
while ((c = getopt(argc, argv, "hvFp:S:m:M:l:n:")) != -1) {
|
||||||
"h" /* help */
|
|
||||||
"v" /* verbose */
|
|
||||||
"F" /* foreground */
|
|
||||||
"p:" /* pid_file */
|
|
||||||
"S:" /* unix_socket_path */
|
|
||||||
"m:" /* shm_path */
|
|
||||||
"l:" /* shm_size */
|
|
||||||
"n:" /* n_vectors */
|
|
||||||
)) != -1) {
|
|
||||||
|
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case 'h': /* help */
|
case 'h': /* help */
|
||||||
ivshmem_server_usage(argv[0], 0);
|
ivshmem_server_usage(argv[0]);
|
||||||
|
exit(0);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'v': /* verbose */
|
case 'v': /* verbose */
|
||||||
@ -92,36 +87,41 @@ ivshmem_server_parse_args(IvshmemServerArgs *args, int argc, char *argv[])
|
|||||||
args->foreground = 1;
|
args->foreground = 1;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'p': /* pid_file */
|
case 'p': /* pid file */
|
||||||
args->pid_file = optarg;
|
args->pid_file = optarg;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'S': /* unix_socket_path */
|
case 'S': /* unix socket path */
|
||||||
args->unix_socket_path = optarg;
|
args->unix_socket_path = optarg;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'm': /* shm_path */
|
case 'M': /* shm name */
|
||||||
|
case 'm': /* dir name */
|
||||||
args->shm_path = optarg;
|
args->shm_path = optarg;
|
||||||
|
args->use_shm_open = c == 'M';
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'l': /* shm_size */
|
case 'l': /* shm size */
|
||||||
parse_option_size("shm_size", optarg, &args->shm_size, &err);
|
parse_option_size("shm_size", optarg, &args->shm_size, &err);
|
||||||
if (err) {
|
if (err) {
|
||||||
error_report_err(err);
|
error_report_err(err);
|
||||||
ivshmem_server_usage(argv[0], 1);
|
ivshmem_server_help(argv[0]);
|
||||||
|
exit(1);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'n': /* n_vectors */
|
case 'n': /* number of vectors */
|
||||||
if (parse_uint_full(optarg, &v, 0) < 0) {
|
if (parse_uint_full(optarg, &v, 0) < 0) {
|
||||||
fprintf(stderr, "cannot parse n_vectors\n");
|
fprintf(stderr, "cannot parse n_vectors\n");
|
||||||
ivshmem_server_usage(argv[0], 1);
|
ivshmem_server_help(argv[0]);
|
||||||
|
exit(1);
|
||||||
}
|
}
|
||||||
args->n_vectors = v;
|
args->n_vectors = v;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
ivshmem_server_usage(argv[0], 1);
|
ivshmem_server_usage(argv[0]);
|
||||||
|
exit(1);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -129,12 +129,14 @@ ivshmem_server_parse_args(IvshmemServerArgs *args, int argc, char *argv[])
|
|||||||
if (args->n_vectors > IVSHMEM_SERVER_MAX_VECTORS) {
|
if (args->n_vectors > IVSHMEM_SERVER_MAX_VECTORS) {
|
||||||
fprintf(stderr, "too many requested vectors (max is %d)\n",
|
fprintf(stderr, "too many requested vectors (max is %d)\n",
|
||||||
IVSHMEM_SERVER_MAX_VECTORS);
|
IVSHMEM_SERVER_MAX_VECTORS);
|
||||||
ivshmem_server_usage(argv[0], 1);
|
ivshmem_server_help(argv[0]);
|
||||||
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (args->verbose == 1 && args->foreground == 0) {
|
if (args->verbose == 1 && args->foreground == 0) {
|
||||||
fprintf(stderr, "cannot use verbose in daemon mode\n");
|
fprintf(stderr, "cannot use verbose in daemon mode\n");
|
||||||
ivshmem_server_usage(argv[0], 1);
|
ivshmem_server_help(argv[0]);
|
||||||
|
exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -192,11 +194,18 @@ main(int argc, char *argv[])
|
|||||||
.pid_file = IVSHMEM_SERVER_DEFAULT_PID_FILE,
|
.pid_file = IVSHMEM_SERVER_DEFAULT_PID_FILE,
|
||||||
.unix_socket_path = IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH,
|
.unix_socket_path = IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH,
|
||||||
.shm_path = IVSHMEM_SERVER_DEFAULT_SHM_PATH,
|
.shm_path = IVSHMEM_SERVER_DEFAULT_SHM_PATH,
|
||||||
|
.use_shm_open = true,
|
||||||
.shm_size = IVSHMEM_SERVER_DEFAULT_SHM_SIZE,
|
.shm_size = IVSHMEM_SERVER_DEFAULT_SHM_SIZE,
|
||||||
.n_vectors = IVSHMEM_SERVER_DEFAULT_N_VECTORS,
|
.n_vectors = IVSHMEM_SERVER_DEFAULT_N_VECTORS,
|
||||||
};
|
};
|
||||||
int ret = 1;
|
int ret = 1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Do not remove this notice without adding proper error handling!
|
||||||
|
* Start with handling ivshmem_server_send_one_msg() failure.
|
||||||
|
*/
|
||||||
|
printf("*** Example code, do not use in production ***\n");
|
||||||
|
|
||||||
/* parse arguments, will exit on error */
|
/* parse arguments, will exit on error */
|
||||||
ivshmem_server_parse_args(&args, argc, argv);
|
ivshmem_server_parse_args(&args, argc, argv);
|
||||||
|
|
||||||
@ -219,7 +228,8 @@ main(int argc, char *argv[])
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* init the ivshms structure */
|
/* init the ivshms structure */
|
||||||
if (ivshmem_server_init(&server, args.unix_socket_path, args.shm_path,
|
if (ivshmem_server_init(&server, args.unix_socket_path,
|
||||||
|
args.shm_path, args.use_shm_open,
|
||||||
args.shm_size, args.n_vectors, args.verbose) < 0) {
|
args.shm_size, args.n_vectors, args.verbose) < 0) {
|
||||||
fprintf(stderr, "cannot init server\n");
|
fprintf(stderr, "cannot init server\n");
|
||||||
goto err;
|
goto err;
|
||||||
|
@ -36,5 +36,5 @@ CONFIG_SDHCI=y
|
|||||||
CONFIG_EDU=y
|
CONFIG_EDU=y
|
||||||
CONFIG_VGA=y
|
CONFIG_VGA=y
|
||||||
CONFIG_VGA_PCI=y
|
CONFIG_VGA_PCI=y
|
||||||
CONFIG_IVSHMEM=$(CONFIG_POSIX)
|
CONFIG_IVSHMEM=$(CONFIG_EVENTFD)
|
||||||
CONFIG_ROCKER=y
|
CONFIG_ROCKER=y
|
||||||
|
254
docs/specs/ivshmem-spec.txt
Normal file
254
docs/specs/ivshmem-spec.txt
Normal file
@ -0,0 +1,254 @@
|
|||||||
|
= Device Specification for Inter-VM shared memory device =
|
||||||
|
|
||||||
|
The Inter-VM shared memory device (ivshmem) is designed to share a
|
||||||
|
memory region between multiple QEMU processes running different guests
|
||||||
|
and the host. In order for all guests to be able to pick up the
|
||||||
|
shared memory area, it is modeled by QEMU as a PCI device exposing
|
||||||
|
said memory to the guest as a PCI BAR.
|
||||||
|
|
||||||
|
The device can use a shared memory object on the host directly, or it
|
||||||
|
can obtain one from an ivshmem server.
|
||||||
|
|
||||||
|
In the latter case, the device can additionally interrupt its peers, and
|
||||||
|
get interrupted by its peers.
|
||||||
|
|
||||||
|
|
||||||
|
== Configuring the ivshmem PCI device ==
|
||||||
|
|
||||||
|
There are two basic configurations:
|
||||||
|
|
||||||
|
- Just shared memory: -device ivshmem-plain,memdev=HMB,...
|
||||||
|
|
||||||
|
This uses host memory backend HMB. It should have option "share"
|
||||||
|
set.
|
||||||
|
|
||||||
|
- Shared memory plus interrupts: -device ivshmem,chardev=CHR,vectors=N,...
|
||||||
|
|
||||||
|
An ivshmem server must already be running on the host. The device
|
||||||
|
connects to the server's UNIX domain socket via character device
|
||||||
|
CHR.
|
||||||
|
|
||||||
|
Each peer gets assigned a unique ID by the server. IDs must be
|
||||||
|
between 0 and 65535.
|
||||||
|
|
||||||
|
Interrupts are message-signaled (MSI-X). vectors=N configures the
|
||||||
|
number of vectors to use.
|
||||||
|
|
||||||
|
For more details on ivshmem device properties, see The QEMU Emulator
|
||||||
|
User Documentation (qemu-doc.*).
|
||||||
|
|
||||||
|
|
||||||
|
== The ivshmem PCI device's guest interface ==
|
||||||
|
|
||||||
|
The device has vendor ID 1af4, device ID 1110, revision 1. Before
|
||||||
|
QEMU 2.6.0, it had revision 0.
|
||||||
|
|
||||||
|
=== PCI BARs ===
|
||||||
|
|
||||||
|
The ivshmem PCI device has two or three BARs:
|
||||||
|
|
||||||
|
- BAR0 holds device registers (256 Byte MMIO)
|
||||||
|
- BAR1 holds MSI-X table and PBA (only ivshmem-doorbell)
|
||||||
|
- BAR2 maps the shared memory object
|
||||||
|
|
||||||
|
There are two ways to use this device:
|
||||||
|
|
||||||
|
- If you only need the shared memory part, BAR2 suffices. This way,
|
||||||
|
you have access to the shared memory in the guest and can use it as
|
||||||
|
you see fit. Memnic, for example, uses ivshmem this way from guest
|
||||||
|
user space (see http://dpdk.org/browse/memnic).
|
||||||
|
|
||||||
|
- If you additionally need the capability for peers to interrupt each
|
||||||
|
other, you need BAR0 and BAR1. You will most likely want to write a
|
||||||
|
kernel driver to handle interrupts. Requires the device to be
|
||||||
|
configured for interrupts, obviously.
|
||||||
|
|
||||||
|
Before QEMU 2.6.0, BAR2 can initially be invalid if the device is
|
||||||
|
configured for interrupts. It becomes safely accessible only after
|
||||||
|
the ivshmem server provided the shared memory. These devices have PCI
|
||||||
|
revision 0 rather than 1. Guest software should wait for the
|
||||||
|
IVPosition register (described below) to become non-negative before
|
||||||
|
accessing BAR2.
|
||||||
|
|
||||||
|
Revision 0 of the device is not capable to tell guest software whether
|
||||||
|
it is configured for interrupts.
|
||||||
|
|
||||||
|
=== PCI device registers ===
|
||||||
|
|
||||||
|
BAR 0 contains the following registers:
|
||||||
|
|
||||||
|
Offset Size Access On reset Function
|
||||||
|
0 4 read/write 0 Interrupt Mask
|
||||||
|
bit 0: peer interrupt (rev 0)
|
||||||
|
reserved (rev 1)
|
||||||
|
bit 1..31: reserved
|
||||||
|
4 4 read/write 0 Interrupt Status
|
||||||
|
bit 0: peer interrupt (rev 0)
|
||||||
|
reserved (rev 1)
|
||||||
|
bit 1..31: reserved
|
||||||
|
8 4 read-only 0 or ID IVPosition
|
||||||
|
12 4 write-only N/A Doorbell
|
||||||
|
bit 0..15: vector
|
||||||
|
bit 16..31: peer ID
|
||||||
|
16 240 none N/A reserved
|
||||||
|
|
||||||
|
Software should only access the registers as specified in column
|
||||||
|
"Access". Reserved bits should be ignored on read, and preserved on
|
||||||
|
write.
|
||||||
|
|
||||||
|
In revision 0 of the device, Interrupt Status and Mask Register
|
||||||
|
together control the legacy INTx interrupt when the device has no
|
||||||
|
MSI-X capability: INTx is asserted when the bit-wise AND of Status and
|
||||||
|
Mask is non-zero and the device has no MSI-X capability. Interrupt
|
||||||
|
Status Register bit 0 becomes 1 when an interrupt request from a peer
|
||||||
|
is received. Reading the register clears it.
|
||||||
|
|
||||||
|
IVPosition Register: if the device is not configured for interrupts,
|
||||||
|
this is zero. Else, it is the device's ID (between 0 and 65535).
|
||||||
|
|
||||||
|
Before QEMU 2.6.0, the register may read -1 for a short while after
|
||||||
|
reset. These devices have PCI revision 0 rather than 1.
|
||||||
|
|
||||||
|
There is no good way for software to find out whether the device is
|
||||||
|
configured for interrupts. A positive IVPosition means interrupts,
|
||||||
|
but zero could be either.
|
||||||
|
|
||||||
|
Doorbell Register: writing this register requests to interrupt a peer.
|
||||||
|
The written value's high 16 bits are the ID of the peer to interrupt,
|
||||||
|
and its low 16 bits select an interrupt vector.
|
||||||
|
|
||||||
|
If the device is not configured for interrupts, the write is ignored.
|
||||||
|
|
||||||
|
If the interrupt hasn't completed setup, the write is ignored. The
|
||||||
|
device is not capable to tell guest software whether setup is
|
||||||
|
complete. Interrupts can regress to this state on migration.
|
||||||
|
|
||||||
|
If the peer with the requested ID isn't connected, or it has fewer
|
||||||
|
interrupt vectors connected, the write is ignored. The device is not
|
||||||
|
capable to tell guest software what peers are connected, or how many
|
||||||
|
interrupt vectors are connected.
|
||||||
|
|
||||||
|
The peer's interrupt for this vector then becomes pending. There is
|
||||||
|
no way for software to clear the pending bit, and a polling mode of
|
||||||
|
operation is therefore impossible.
|
||||||
|
|
||||||
|
If the peer is a revision 0 device without MSI-X capability, its
|
||||||
|
Interrupt Status register is set to 1. This asserts INTx unless
|
||||||
|
masked by the Interrupt Mask register. The device is not capable to
|
||||||
|
communicate the interrupt vector to guest software then.
|
||||||
|
|
||||||
|
With multiple MSI-X vectors, different vectors can be used to indicate
|
||||||
|
different events have occurred. The semantics of interrupt vectors
|
||||||
|
are left to the application.
|
||||||
|
|
||||||
|
|
||||||
|
== Interrupt infrastructure ==
|
||||||
|
|
||||||
|
When configured for interrupts, the peers share eventfd objects in
|
||||||
|
addition to shared memory. The shared resources are managed by an
|
||||||
|
ivshmem server.
|
||||||
|
|
||||||
|
=== The ivshmem server ===
|
||||||
|
|
||||||
|
The server listens on a UNIX domain socket.
|
||||||
|
|
||||||
|
For each new client that connects to the server, the server
|
||||||
|
- picks an ID,
|
||||||
|
- creates eventfd file descriptors for the interrupt vectors,
|
||||||
|
- sends the ID and the file descriptor for the shared memory to the
|
||||||
|
new client,
|
||||||
|
- sends connect notifications for the new client to the other clients
|
||||||
|
(these contain file descriptors for sending interrupts),
|
||||||
|
- sends connect notifications for the other clients to the new client,
|
||||||
|
and
|
||||||
|
- sends interrupt setup messages to the new client (these contain file
|
||||||
|
descriptors for receiving interrupts).
|
||||||
|
|
||||||
|
The first client to connect to the server receives ID zero.
|
||||||
|
|
||||||
|
When a client disconnects from the server, the server sends disconnect
|
||||||
|
notifications to the other clients.
|
||||||
|
|
||||||
|
The next section describes the protocol in detail.
|
||||||
|
|
||||||
|
If the server terminates without sending disconnect notifications for
|
||||||
|
its connected clients, the clients can elect to continue. They can
|
||||||
|
communicate with each other normally, but won't receive disconnect
|
||||||
|
notification on disconnect, and no new clients can connect. There is
|
||||||
|
no way for the clients to connect to a restarted server. The device
|
||||||
|
is not capable to tell guest software whether the server is still up.
|
||||||
|
|
||||||
|
Example server code is in contrib/ivshmem-server/. Not to be used in
|
||||||
|
production. It assumes all clients use the same number of interrupt
|
||||||
|
vectors.
|
||||||
|
|
||||||
|
A standalone client is in contrib/ivshmem-client/. It can be useful
|
||||||
|
for debugging.
|
||||||
|
|
||||||
|
=== The ivshmem Client-Server Protocol ===
|
||||||
|
|
||||||
|
An ivshmem device configured for interrupts connects to an ivshmem
|
||||||
|
server. This section details the protocol between the two.
|
||||||
|
|
||||||
|
The connection is one-way: the server sends messages to the client.
|
||||||
|
Each message consists of a single 8 byte little-endian signed number,
|
||||||
|
and may be accompanied by a file descriptor via SCM_RIGHTS. Both
|
||||||
|
client and server close the connection on error.
|
||||||
|
|
||||||
|
Note: QEMU currently doesn't close the connection right on error, but
|
||||||
|
only when the character device is destroyed.
|
||||||
|
|
||||||
|
On connect, the server sends the following messages in order:
|
||||||
|
|
||||||
|
1. The protocol version number, currently zero. The client should
|
||||||
|
close the connection on receipt of versions it can't handle.
|
||||||
|
|
||||||
|
2. The client's ID. This is unique among all clients of this server.
|
||||||
|
IDs must be between 0 and 65535, because the Doorbell register
|
||||||
|
provides only 16 bits for them.
|
||||||
|
|
||||||
|
3. The number -1, accompanied by the file descriptor for the shared
|
||||||
|
memory.
|
||||||
|
|
||||||
|
4. Connect notifications for existing other clients, if any. This is
|
||||||
|
a peer ID (number between 0 and 65535 other than the client's ID),
|
||||||
|
repeated N times. Each repetition is accompanied by one file
|
||||||
|
descriptor. These are for interrupting the peer with that ID using
|
||||||
|
vector 0,..,N-1, in order. If the client is configured for fewer
|
||||||
|
vectors, it closes the extra file descriptors. If it is configured
|
||||||
|
for more, the extra vectors remain unconnected.
|
||||||
|
|
||||||
|
5. Interrupt setup. This is the client's own ID, repeated N times.
|
||||||
|
Each repetition is accompanied by one file descriptor. These are
|
||||||
|
for receiving interrupts from peers using vector 0,..,N-1, in
|
||||||
|
order. If the client is configured for fewer vectors, it closes
|
||||||
|
the extra file descriptors. If it is configured for more, the
|
||||||
|
extra vectors remain unconnected.
|
||||||
|
|
||||||
|
From then on, the server sends these kinds of messages:
|
||||||
|
|
||||||
|
6. Connection / disconnection notification. This is a peer ID.
|
||||||
|
|
||||||
|
- If the number comes with a file descriptor, it's a connection
|
||||||
|
notification, exactly like in step 4.
|
||||||
|
|
||||||
|
- Else, it's a disconnection notification for the peer with that ID.
|
||||||
|
|
||||||
|
Known bugs:
|
||||||
|
|
||||||
|
* The protocol changed incompatibly in QEMU 2.5. Before, messages
|
||||||
|
were native endian long, and there was no version number.
|
||||||
|
|
||||||
|
* The protocol is poorly designed.
|
||||||
|
|
||||||
|
=== The ivshmem Client-Client Protocol ===
|
||||||
|
|
||||||
|
An ivshmem device configured for interrupts receives eventfd file
|
||||||
|
descriptors for interrupting peers and getting interrupted by peers
|
||||||
|
from the server, as explained in the previous section.
|
||||||
|
|
||||||
|
To interrupt a peer, the device writes the 8-byte integer 1 in native
|
||||||
|
byte order to the respective file descriptor.
|
||||||
|
|
||||||
|
To receive an interrupt, the device reads and discards as many 8-byte
|
||||||
|
integers as it can.
|
@ -1,161 +0,0 @@
|
|||||||
|
|
||||||
Device Specification for Inter-VM shared memory device
|
|
||||||
------------------------------------------------------
|
|
||||||
|
|
||||||
The Inter-VM shared memory device is designed to share a memory region (created
|
|
||||||
on the host via the POSIX shared memory API) between multiple QEMU processes
|
|
||||||
running different guests. In order for all guests to be able to pick up the
|
|
||||||
shared memory area, it is modeled by QEMU as a PCI device exposing said memory
|
|
||||||
to the guest as a PCI BAR.
|
|
||||||
The memory region does not belong to any guest, but is a POSIX memory object on
|
|
||||||
the host. The host can access this shared memory if needed.
|
|
||||||
|
|
||||||
The device also provides an optional communication mechanism between guests
|
|
||||||
sharing the same memory object. More details about that in the section 'Guest to
|
|
||||||
guest communication' section.
|
|
||||||
|
|
||||||
|
|
||||||
The Inter-VM PCI device
|
|
||||||
-----------------------
|
|
||||||
|
|
||||||
From the VM point of view, the ivshmem PCI device supports three BARs.
|
|
||||||
|
|
||||||
- BAR0 is a 1 Kbyte MMIO region to support registers and interrupts when MSI is
|
|
||||||
not used.
|
|
||||||
- BAR1 is used for MSI-X when it is enabled in the device.
|
|
||||||
- BAR2 is used to access the shared memory object.
|
|
||||||
|
|
||||||
It is your choice how to use the device but you must choose between two
|
|
||||||
behaviors :
|
|
||||||
|
|
||||||
- basically, if you only need the shared memory part, you will map BAR2.
|
|
||||||
This way, you have access to the shared memory in guest and can use it as you
|
|
||||||
see fit (memnic, for example, uses it in userland
|
|
||||||
http://dpdk.org/browse/memnic).
|
|
||||||
|
|
||||||
- BAR0 and BAR1 are used to implement an optional communication mechanism
|
|
||||||
through interrupts in the guests. If you need an event mechanism between the
|
|
||||||
guests accessing the shared memory, you will most likely want to write a
|
|
||||||
kernel driver that will handle interrupts. See details in the section 'Guest
|
|
||||||
to guest communication' section.
|
|
||||||
|
|
||||||
The behavior is chosen when starting your QEMU processes:
|
|
||||||
- no communication mechanism needed, the first QEMU to start creates the shared
|
|
||||||
memory on the host, subsequent QEMU processes will use it.
|
|
||||||
|
|
||||||
- communication mechanism needed, an ivshmem server must be started before any
|
|
||||||
QEMU processes, then each QEMU process connects to the server unix socket.
|
|
||||||
|
|
||||||
For more details on the QEMU ivshmem parameters, see qemu-doc documentation.
|
|
||||||
|
|
||||||
|
|
||||||
Guest to guest communication
|
|
||||||
----------------------------
|
|
||||||
|
|
||||||
This section details the communication mechanism between the guests accessing
|
|
||||||
the ivhsmem shared memory.
|
|
||||||
|
|
||||||
*ivshmem server*
|
|
||||||
|
|
||||||
This server code is available in qemu.git/contrib/ivshmem-server.
|
|
||||||
|
|
||||||
The server must be started on the host before any guest.
|
|
||||||
It creates a shared memory object then waits for clients to connect on a unix
|
|
||||||
socket. All the messages are little-endian int64_t integer.
|
|
||||||
|
|
||||||
For each client (QEMU process) that connects to the server:
|
|
||||||
- the server sends a protocol version, if client does not support it, the client
|
|
||||||
closes the communication,
|
|
||||||
- the server assigns an ID for this client and sends this ID to him as the first
|
|
||||||
message,
|
|
||||||
- the server sends a fd to the shared memory object to this client,
|
|
||||||
- the server creates a new set of host eventfds associated to the new client and
|
|
||||||
sends this set to all already connected clients,
|
|
||||||
- finally, the server sends all the eventfds sets for all clients to the new
|
|
||||||
client.
|
|
||||||
|
|
||||||
The server signals all clients when one of them disconnects.
|
|
||||||
|
|
||||||
The client IDs are limited to 16 bits because of the current implementation (see
|
|
||||||
Doorbell register in 'PCI device registers' subsection). Hence only 65536
|
|
||||||
clients are supported.
|
|
||||||
|
|
||||||
All the file descriptors (fd to the shared memory, eventfds for each client)
|
|
||||||
are passed to clients using SCM_RIGHTS over the server unix socket.
|
|
||||||
|
|
||||||
Apart from the current ivshmem implementation in QEMU, an ivshmem client has
|
|
||||||
been provided in qemu.git/contrib/ivshmem-client for debug.
|
|
||||||
|
|
||||||
*QEMU as an ivshmem client*
|
|
||||||
|
|
||||||
At initialisation, when creating the ivshmem device, QEMU first receives a
|
|
||||||
protocol version and closes communication with server if it does not match.
|
|
||||||
Then, QEMU gets its ID from the server then makes it available through BAR0
|
|
||||||
IVPosition register for the VM to use (see 'PCI device registers' subsection).
|
|
||||||
QEMU then uses the fd to the shared memory to map it to BAR2.
|
|
||||||
eventfds for all other clients received from the server are stored to implement
|
|
||||||
BAR0 Doorbell register (see 'PCI device registers' subsection).
|
|
||||||
Finally, eventfds assigned to this QEMU process are used to send interrupts in
|
|
||||||
this VM.
|
|
||||||
|
|
||||||
*PCI device registers*
|
|
||||||
|
|
||||||
From the VM point of view, the ivshmem PCI device supports 4 registers of
|
|
||||||
32-bits each.
|
|
||||||
|
|
||||||
enum ivshmem_registers {
|
|
||||||
IntrMask = 0,
|
|
||||||
IntrStatus = 4,
|
|
||||||
IVPosition = 8,
|
|
||||||
Doorbell = 12
|
|
||||||
};
|
|
||||||
|
|
||||||
The first two registers are the interrupt mask and status registers. Mask and
|
|
||||||
status are only used with pin-based interrupts. They are unused with MSI
|
|
||||||
interrupts.
|
|
||||||
|
|
||||||
Status Register: The status register is set to 1 when an interrupt occurs.
|
|
||||||
|
|
||||||
Mask Register: The mask register is bitwise ANDed with the interrupt status
|
|
||||||
and the result will raise an interrupt if it is non-zero. However, since 1 is
|
|
||||||
the only value the status will be set to, it is only the first bit of the mask
|
|
||||||
that has any effect. Therefore interrupts can be masked by setting the first
|
|
||||||
bit to 0 and unmasked by setting the first bit to 1.
|
|
||||||
|
|
||||||
IVPosition Register: The IVPosition register is read-only and reports the
|
|
||||||
guest's ID number. The guest IDs are non-negative integers. When using the
|
|
||||||
server, since the server is a separate process, the VM ID will only be set when
|
|
||||||
the device is ready (shared memory is received from the server and accessible
|
|
||||||
via the device). If the device is not ready, the IVPosition will return -1.
|
|
||||||
Applications should ensure that they have a valid VM ID before accessing the
|
|
||||||
shared memory.
|
|
||||||
|
|
||||||
Doorbell Register: To interrupt another guest, a guest must write to the
|
|
||||||
Doorbell register. The doorbell register is 32-bits, logically divided into
|
|
||||||
two 16-bit fields. The high 16-bits are the guest ID to interrupt and the low
|
|
||||||
16-bits are the interrupt vector to trigger. The semantics of the value
|
|
||||||
written to the doorbell depends on whether the device is using MSI or a regular
|
|
||||||
pin-based interrupt. In short, MSI uses vectors while regular interrupts set
|
|
||||||
the status register.
|
|
||||||
|
|
||||||
Regular Interrupts
|
|
||||||
|
|
||||||
If regular interrupts are used (due to either a guest not supporting MSI or the
|
|
||||||
user specifying not to use them on startup) then the value written to the lower
|
|
||||||
16-bits of the Doorbell register results is arbitrary and will trigger an
|
|
||||||
interrupt in the destination guest.
|
|
||||||
|
|
||||||
Message Signalled Interrupts
|
|
||||||
|
|
||||||
An ivshmem device may support multiple MSI vectors. If so, the lower 16-bits
|
|
||||||
written to the Doorbell register must be between 0 and the maximum number of
|
|
||||||
vectors the guest supports. The lower 16 bits written to the doorbell is the
|
|
||||||
MSI vector that will be raised in the destination guest. The number of MSI
|
|
||||||
vectors is configurable but it is set when the VM is started.
|
|
||||||
|
|
||||||
The important thing to remember with MSI is that it is only a signal, no status
|
|
||||||
is set (since MSI interrupts are not shared). All information other than the
|
|
||||||
interrupt itself should be communicated via the shared memory region. Devices
|
|
||||||
supporting multiple MSI vectors can use different vectors to indicate different
|
|
||||||
events have occurred. The semantics of interrupt vectors are left to the
|
|
||||||
user's discretion.
|
|
@ -516,6 +516,16 @@ PropertyInfo qdev_prop_macaddr = {
|
|||||||
.set = set_mac,
|
.set = set_mac,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* --- on/off/auto --- */
|
||||||
|
|
||||||
|
PropertyInfo qdev_prop_on_off_auto = {
|
||||||
|
.name = "OnOffAuto",
|
||||||
|
.description = "on/off/auto",
|
||||||
|
.enum_table = OnOffAuto_lookup,
|
||||||
|
.get = get_enum,
|
||||||
|
.set = set_enum,
|
||||||
|
};
|
||||||
|
|
||||||
/* --- lost tick policy --- */
|
/* --- lost tick policy --- */
|
||||||
|
|
||||||
QEMU_BUILD_BUG_ON(sizeof(LostTickPolicy) != sizeof(int));
|
QEMU_BUILD_BUG_ON(sizeof(LostTickPolicy) != sizeof(int));
|
||||||
|
1152
hw/misc/ivshmem.c
1152
hw/misc/ivshmem.c
File diff suppressed because it is too large
Load Diff
@ -18,6 +18,7 @@ extern PropertyInfo qdev_prop_string;
|
|||||||
extern PropertyInfo qdev_prop_chr;
|
extern PropertyInfo qdev_prop_chr;
|
||||||
extern PropertyInfo qdev_prop_ptr;
|
extern PropertyInfo qdev_prop_ptr;
|
||||||
extern PropertyInfo qdev_prop_macaddr;
|
extern PropertyInfo qdev_prop_macaddr;
|
||||||
|
extern PropertyInfo qdev_prop_on_off_auto;
|
||||||
extern PropertyInfo qdev_prop_losttickpolicy;
|
extern PropertyInfo qdev_prop_losttickpolicy;
|
||||||
extern PropertyInfo qdev_prop_bios_chs_trans;
|
extern PropertyInfo qdev_prop_bios_chs_trans;
|
||||||
extern PropertyInfo qdev_prop_fdc_drive_type;
|
extern PropertyInfo qdev_prop_fdc_drive_type;
|
||||||
@ -155,6 +156,8 @@ extern PropertyInfo qdev_prop_arraylen;
|
|||||||
DEFINE_PROP(_n, _s, _f, qdev_prop_drive, BlockBackend *)
|
DEFINE_PROP(_n, _s, _f, qdev_prop_drive, BlockBackend *)
|
||||||
#define DEFINE_PROP_MACADDR(_n, _s, _f) \
|
#define DEFINE_PROP_MACADDR(_n, _s, _f) \
|
||||||
DEFINE_PROP(_n, _s, _f, qdev_prop_macaddr, MACAddr)
|
DEFINE_PROP(_n, _s, _f, qdev_prop_macaddr, MACAddr)
|
||||||
|
#define DEFINE_PROP_ON_OFF_AUTO(_n, _s, _f, _d) \
|
||||||
|
DEFINE_PROP_DEFAULT(_n, _s, _f, _d, qdev_prop_on_off_auto, OnOffAuto)
|
||||||
#define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \
|
#define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \
|
||||||
DEFINE_PROP_DEFAULT(_n, _s, _f, _d, qdev_prop_losttickpolicy, \
|
DEFINE_PROP_DEFAULT(_n, _s, _f, _d, qdev_prop_losttickpolicy, \
|
||||||
LostTickPolicy)
|
LostTickPolicy)
|
||||||
|
@ -1262,13 +1262,18 @@ basic example.
|
|||||||
|
|
||||||
@subsection Inter-VM Shared Memory device
|
@subsection Inter-VM Shared Memory device
|
||||||
|
|
||||||
With KVM enabled on a Linux host, a shared memory device is available. Guests
|
On Linux hosts, a shared memory device is available. The basic syntax
|
||||||
map a POSIX shared memory region into the guest as a PCI device that enables
|
is:
|
||||||
zero-copy communication to the application level of the guests. The basic
|
|
||||||
syntax is:
|
|
||||||
|
|
||||||
@example
|
@example
|
||||||
qemu-system-i386 -device ivshmem,size=@var{size},shm=@var{shm-name}
|
qemu-system-x86_64 -device ivshmem-plain,memdev=@var{hostmem}
|
||||||
|
@end example
|
||||||
|
|
||||||
|
where @var{hostmem} names a host memory backend. For a POSIX shared
|
||||||
|
memory backend, use something like
|
||||||
|
|
||||||
|
@example
|
||||||
|
-object memory-backend-file,size=1M,share,mem-path=/dev/shm/ivshmem,id=@var{hostmem}
|
||||||
@end example
|
@end example
|
||||||
|
|
||||||
If desired, interrupts can be sent between guest VMs accessing the same shared
|
If desired, interrupts can be sent between guest VMs accessing the same shared
|
||||||
@ -1282,28 +1287,24 @@ memory server is:
|
|||||||
ivshmem-server -p @var{pidfile} -S @var{path} -m @var{shm-name} -l @var{shm-size} -n @var{vectors}
|
ivshmem-server -p @var{pidfile} -S @var{path} -m @var{shm-name} -l @var{shm-size} -n @var{vectors}
|
||||||
|
|
||||||
# Then start your qemu instances with matching arguments
|
# Then start your qemu instances with matching arguments
|
||||||
qemu-system-i386 -device ivshmem,size=@var{shm-size},vectors=@var{vectors},chardev=@var{id}
|
qemu-system-x86_64 -device ivshmem-doorbell,vectors=@var{vectors},chardev=@var{id}
|
||||||
[,msi=on][,ioeventfd=on][,role=peer|master]
|
|
||||||
-chardev socket,path=@var{path},id=@var{id}
|
-chardev socket,path=@var{path},id=@var{id}
|
||||||
@end example
|
@end example
|
||||||
|
|
||||||
When using the server, the guest will be assigned a VM ID (>=0) that allows guests
|
When using the server, the guest will be assigned a VM ID (>=0) that allows guests
|
||||||
using the same server to communicate via interrupts. Guests can read their
|
using the same server to communicate via interrupts. Guests can read their
|
||||||
VM ID from a device register (see example code). Since receiving the shared
|
VM ID from a device register (see ivshmem-spec.txt).
|
||||||
memory region from the server is asynchronous, there is a (small) chance the
|
|
||||||
guest may boot before the shared memory is attached. To allow an application
|
|
||||||
to ensure shared memory is attached, the VM ID register will return -1 (an
|
|
||||||
invalid VM ID) until the memory is attached. Once the shared memory is
|
|
||||||
attached, the VM ID will return the guest's valid VM ID. With these semantics,
|
|
||||||
the guest application can check to ensure the shared memory is attached to the
|
|
||||||
guest before proceeding.
|
|
||||||
|
|
||||||
The @option{role} argument can be set to either master or peer and will affect
|
@subsubsection Migration with ivshmem
|
||||||
how the shared memory is migrated. With @option{role=master}, the guest will
|
|
||||||
copy the shared memory on migration to the destination host. With
|
With device property @option{master=on}, the guest will copy the shared
|
||||||
@option{role=peer}, the guest will not be able to migrate with the device attached.
|
memory on migration to the destination host. With @option{master=off},
|
||||||
With the @option{peer} case, the device should be detached and then reattached
|
the guest will not be able to migrate with the device attached. In the
|
||||||
after migration using the PCI hotplug support.
|
latter case, the device should be detached and then reattached after
|
||||||
|
migration using the PCI hotplug support.
|
||||||
|
|
||||||
|
At most one of the devices sharing the same memory can be master. The
|
||||||
|
master must complete migration before you plug back the other devices.
|
||||||
|
|
||||||
@subsubsection ivshmem and hugepages
|
@subsubsection ivshmem and hugepages
|
||||||
|
|
||||||
@ -1311,8 +1312,8 @@ Instead of specifying the <shm size> using POSIX shm, you may specify
|
|||||||
a memory backend that has hugepage support:
|
a memory backend that has hugepage support:
|
||||||
|
|
||||||
@example
|
@example
|
||||||
qemu-system-i386 -object memory-backend-file,size=1G,mem-path=/mnt/hugepages/my-shmem-file,id=mb1
|
qemu-system-x86_64 -object memory-backend-file,size=1G,mem-path=/dev/hugepages/my-shmem-file,share,id=mb1
|
||||||
-device ivshmem,x-memdev=mb1
|
-device ivshmem-plain,memdev=mb1
|
||||||
@end example
|
@end example
|
||||||
|
|
||||||
ivshmem-server also supports hugepages mount points with the
|
ivshmem-server also supports hugepages mount points with the
|
||||||
|
@ -333,6 +333,12 @@ static long gethugepagesize(const char *mem_path)
|
|||||||
return fs.f_bsize;
|
return fs.f_bsize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* FIXME TOCTTOU: this iterates over memory backends' mem-path, which
|
||||||
|
* may or may not name the same files / on the same filesystem now as
|
||||||
|
* when we actually open and map them. Iterate over the file
|
||||||
|
* descriptors instead, and use qemu_fd_getpagesize().
|
||||||
|
*/
|
||||||
static int find_max_supported_pagesize(Object *obj, void *opaque)
|
static int find_max_supported_pagesize(Object *obj, void *opaque)
|
||||||
{
|
{
|
||||||
char *mem_path;
|
char *mem_path;
|
||||||
|
@ -166,7 +166,7 @@ gcov-files-pci-y += hw/display/virtio-gpu-pci.c
|
|||||||
gcov-files-pci-$(CONFIG_VIRTIO_VGA) += hw/display/virtio-vga.c
|
gcov-files-pci-$(CONFIG_VIRTIO_VGA) += hw/display/virtio-vga.c
|
||||||
check-qtest-pci-y += tests/intel-hda-test$(EXESUF)
|
check-qtest-pci-y += tests/intel-hda-test$(EXESUF)
|
||||||
gcov-files-pci-y += hw/audio/intel-hda.c hw/audio/hda-codec.c
|
gcov-files-pci-y += hw/audio/intel-hda.c hw/audio/hda-codec.c
|
||||||
check-qtest-pci-$(CONFIG_POSIX) += tests/ivshmem-test$(EXESUF)
|
check-qtest-pci-$(CONFIG_EVENTFD) += tests/ivshmem-test$(EXESUF)
|
||||||
gcov-files-pci-y += hw/misc/ivshmem.c
|
gcov-files-pci-y += hw/misc/ivshmem.c
|
||||||
|
|
||||||
check-qtest-i386-y = tests/endianness-test$(EXESUF)
|
check-qtest-i386-y = tests/endianness-test$(EXESUF)
|
||||||
|
@ -110,25 +110,26 @@ static void setup_vm_cmd(IVState *s, const char *cmd, bool msix)
|
|||||||
s->pcibus = qpci_init_pc();
|
s->pcibus = qpci_init_pc();
|
||||||
s->dev = get_device(s->pcibus);
|
s->dev = get_device(s->pcibus);
|
||||||
|
|
||||||
/* FIXME: other bar order fails, mappings changes */
|
s->reg_base = qpci_iomap(s->dev, 0, &barsize);
|
||||||
s->mem_base = qpci_iomap(s->dev, 2, &barsize);
|
g_assert_nonnull(s->reg_base);
|
||||||
g_assert_nonnull(s->mem_base);
|
g_assert_cmpuint(barsize, ==, 256);
|
||||||
g_assert_cmpuint(barsize, ==, TMPSHMSIZE);
|
|
||||||
|
|
||||||
if (msix) {
|
if (msix) {
|
||||||
qpci_msix_enable(s->dev);
|
qpci_msix_enable(s->dev);
|
||||||
}
|
}
|
||||||
|
|
||||||
s->reg_base = qpci_iomap(s->dev, 0, &barsize);
|
s->mem_base = qpci_iomap(s->dev, 2, &barsize);
|
||||||
g_assert_nonnull(s->reg_base);
|
g_assert_nonnull(s->mem_base);
|
||||||
g_assert_cmpuint(barsize, ==, 256);
|
g_assert_cmpuint(barsize, ==, TMPSHMSIZE);
|
||||||
|
|
||||||
qpci_device_enable(s->dev);
|
qpci_device_enable(s->dev);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void setup_vm(IVState *s)
|
static void setup_vm(IVState *s)
|
||||||
{
|
{
|
||||||
char *cmd = g_strdup_printf("-device ivshmem,shm=%s,size=1M", tmpshm);
|
char *cmd = g_strdup_printf("-object memory-backend-file"
|
||||||
|
",id=mb1,size=1M,share,mem-path=/dev/shm%s"
|
||||||
|
" -device ivshmem-plain,memdev=mb1", tmpshm);
|
||||||
|
|
||||||
setup_vm_cmd(s, cmd, false);
|
setup_vm_cmd(s, cmd, false);
|
||||||
|
|
||||||
@ -144,32 +145,41 @@ static void test_ivshmem_single(void)
|
|||||||
setup_vm(&state);
|
setup_vm(&state);
|
||||||
s = &state;
|
s = &state;
|
||||||
|
|
||||||
/* valid io */
|
/* initial state of readable registers */
|
||||||
out_reg(s, INTRMASK, 0);
|
g_assert_cmpuint(in_reg(s, INTRMASK), ==, 0);
|
||||||
in_reg(s, INTRSTATUS);
|
g_assert_cmpuint(in_reg(s, INTRSTATUS), ==, 0);
|
||||||
in_reg(s, IVPOSITION);
|
g_assert_cmpuint(in_reg(s, IVPOSITION), ==, 0);
|
||||||
|
|
||||||
|
/* trigger interrupt via registers */
|
||||||
out_reg(s, INTRMASK, 0xffffffff);
|
out_reg(s, INTRMASK, 0xffffffff);
|
||||||
g_assert_cmpuint(in_reg(s, INTRMASK), ==, 0xffffffff);
|
g_assert_cmpuint(in_reg(s, INTRMASK), ==, 0xffffffff);
|
||||||
out_reg(s, INTRSTATUS, 1);
|
out_reg(s, INTRSTATUS, 1);
|
||||||
/* XXX: intercept IRQ, not seen in resp */
|
/* check interrupt status */
|
||||||
g_assert_cmpuint(in_reg(s, INTRSTATUS), ==, 1);
|
g_assert_cmpuint(in_reg(s, INTRSTATUS), ==, 1);
|
||||||
|
/* reading clears */
|
||||||
|
g_assert_cmpuint(in_reg(s, INTRSTATUS), ==, 0);
|
||||||
|
/* TODO intercept actual interrupt (needs qtest work) */
|
||||||
|
|
||||||
/* invalid io */
|
/* invalid register access */
|
||||||
out_reg(s, IVPOSITION, 1);
|
out_reg(s, IVPOSITION, 1);
|
||||||
|
in_reg(s, DOORBELL);
|
||||||
|
|
||||||
|
/* ring the (non-functional) doorbell */
|
||||||
out_reg(s, DOORBELL, 8 << 16);
|
out_reg(s, DOORBELL, 8 << 16);
|
||||||
|
|
||||||
|
/* write shared memory */
|
||||||
for (i = 0; i < G_N_ELEMENTS(data); i++) {
|
for (i = 0; i < G_N_ELEMENTS(data); i++) {
|
||||||
data[i] = i;
|
data[i] = i;
|
||||||
}
|
}
|
||||||
qtest_memwrite(s->qtest, (uintptr_t)s->mem_base, data, sizeof(data));
|
qtest_memwrite(s->qtest, (uintptr_t)s->mem_base, data, sizeof(data));
|
||||||
|
|
||||||
|
/* verify write */
|
||||||
for (i = 0; i < G_N_ELEMENTS(data); i++) {
|
for (i = 0; i < G_N_ELEMENTS(data); i++) {
|
||||||
g_assert_cmpuint(((uint32_t *)tmpshmem)[i], ==, i);
|
g_assert_cmpuint(((uint32_t *)tmpshmem)[i], ==, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* read it back and verify read */
|
||||||
memset(data, 0, sizeof(data));
|
memset(data, 0, sizeof(data));
|
||||||
|
|
||||||
qtest_memread(s->qtest, (uintptr_t)s->mem_base, data, sizeof(data));
|
qtest_memread(s->qtest, (uintptr_t)s->mem_base, data, sizeof(data));
|
||||||
for (i = 0; i < G_N_ELEMENTS(data); i++) {
|
for (i = 0; i < G_N_ELEMENTS(data); i++) {
|
||||||
g_assert_cmpuint(data[i], ==, i);
|
g_assert_cmpuint(data[i], ==, i);
|
||||||
@ -276,8 +286,10 @@ static void *server_thread(void *data)
|
|||||||
static void setup_vm_with_server(IVState *s, int nvectors, bool msi)
|
static void setup_vm_with_server(IVState *s, int nvectors, bool msi)
|
||||||
{
|
{
|
||||||
char *cmd = g_strdup_printf("-chardev socket,id=chr0,path=%s,nowait "
|
char *cmd = g_strdup_printf("-chardev socket,id=chr0,path=%s,nowait "
|
||||||
"-device ivshmem,size=1M,chardev=chr0,vectors=%d,msi=%s",
|
"-device ivshmem%s,chardev=chr0,vectors=%d",
|
||||||
tmpserver, nvectors, msi ? "true" : "false");
|
tmpserver,
|
||||||
|
msi ? "-doorbell" : ",size=1M,msi=off",
|
||||||
|
nvectors);
|
||||||
|
|
||||||
setup_vm_cmd(s, cmd, msi);
|
setup_vm_cmd(s, cmd, msi);
|
||||||
|
|
||||||
@ -293,8 +305,7 @@ static void test_ivshmem_server(bool msi)
|
|||||||
int nvectors = 2;
|
int nvectors = 2;
|
||||||
guint64 end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND;
|
guint64 end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND;
|
||||||
|
|
||||||
memset(tmpshmem, 0x42, TMPSHMSIZE);
|
ret = ivshmem_server_init(&server, tmpserver, tmpshm, true,
|
||||||
ret = ivshmem_server_init(&server, tmpserver, tmpshm,
|
|
||||||
TMPSHMSIZE, nvectors,
|
TMPSHMSIZE, nvectors,
|
||||||
g_test_verbose());
|
g_test_verbose());
|
||||||
g_assert_cmpint(ret, ==, 0);
|
g_assert_cmpint(ret, ==, 0);
|
||||||
@ -302,49 +313,39 @@ static void test_ivshmem_server(bool msi)
|
|||||||
ret = ivshmem_server_start(&server);
|
ret = ivshmem_server_start(&server);
|
||||||
g_assert_cmpint(ret, ==, 0);
|
g_assert_cmpint(ret, ==, 0);
|
||||||
|
|
||||||
setup_vm_with_server(&state1, nvectors, msi);
|
|
||||||
s1 = &state1;
|
|
||||||
setup_vm_with_server(&state2, nvectors, msi);
|
|
||||||
s2 = &state2;
|
|
||||||
|
|
||||||
g_assert_cmpuint(in_reg(s1, IVPOSITION), ==, 0xffffffff);
|
|
||||||
g_assert_cmpuint(in_reg(s2, IVPOSITION), ==, 0xffffffff);
|
|
||||||
|
|
||||||
g_assert_cmpuint(qtest_readb(s1->qtest, (uintptr_t)s1->mem_base), ==, 0x00);
|
|
||||||
|
|
||||||
thread.server = &server;
|
thread.server = &server;
|
||||||
ret = pipe(thread.pipe);
|
ret = pipe(thread.pipe);
|
||||||
g_assert_cmpint(ret, ==, 0);
|
g_assert_cmpint(ret, ==, 0);
|
||||||
thread.thread = g_thread_new("ivshmem-server", server_thread, &thread);
|
thread.thread = g_thread_new("ivshmem-server", server_thread, &thread);
|
||||||
g_assert(thread.thread != NULL);
|
g_assert(thread.thread != NULL);
|
||||||
|
|
||||||
/* waiting until mapping is done */
|
setup_vm_with_server(&state1, nvectors, msi);
|
||||||
while (g_get_monotonic_time() < end_time) {
|
s1 = &state1;
|
||||||
g_usleep(1000);
|
setup_vm_with_server(&state2, nvectors, msi);
|
||||||
|
s2 = &state2;
|
||||||
if (qtest_readb(s1->qtest, (uintptr_t)s1->mem_base) == 0x42 &&
|
|
||||||
qtest_readb(s2->qtest, (uintptr_t)s2->mem_base) == 0x42) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* check got different VM ids */
|
/* check got different VM ids */
|
||||||
vm1 = in_reg(s1, IVPOSITION);
|
vm1 = in_reg(s1, IVPOSITION);
|
||||||
vm2 = in_reg(s2, IVPOSITION);
|
vm2 = in_reg(s2, IVPOSITION);
|
||||||
g_assert_cmpuint(vm1, !=, vm2);
|
g_assert_cmpint(vm1, >=, 0);
|
||||||
|
g_assert_cmpint(vm2, >=, 0);
|
||||||
|
g_assert_cmpint(vm1, !=, vm2);
|
||||||
|
|
||||||
|
/* check number of MSI-X vectors */
|
||||||
global_qtest = s1->qtest;
|
global_qtest = s1->qtest;
|
||||||
if (msi) {
|
if (msi) {
|
||||||
ret = qpci_msix_table_size(s1->dev);
|
ret = qpci_msix_table_size(s1->dev);
|
||||||
g_assert_cmpuint(ret, ==, nvectors);
|
g_assert_cmpuint(ret, ==, nvectors);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ping vm2 -> vm1 */
|
/* TODO test behavior before MSI-X is enabled */
|
||||||
|
|
||||||
|
/* ping vm2 -> vm1 on vector 0 */
|
||||||
if (msi) {
|
if (msi) {
|
||||||
ret = qpci_msix_pending(s1->dev, 0);
|
ret = qpci_msix_pending(s1->dev, 0);
|
||||||
g_assert_cmpuint(ret, ==, 0);
|
g_assert_cmpuint(ret, ==, 0);
|
||||||
} else {
|
} else {
|
||||||
out_reg(s1, INTRSTATUS, 0);
|
g_assert_cmpuint(in_reg(s1, INTRSTATUS), ==, 0);
|
||||||
}
|
}
|
||||||
out_reg(s2, DOORBELL, vm1 << 16);
|
out_reg(s2, DOORBELL, vm1 << 16);
|
||||||
do {
|
do {
|
||||||
@ -353,18 +354,18 @@ static void test_ivshmem_server(bool msi)
|
|||||||
} while (ret == 0 && g_get_monotonic_time() < end_time);
|
} while (ret == 0 && g_get_monotonic_time() < end_time);
|
||||||
g_assert_cmpuint(ret, !=, 0);
|
g_assert_cmpuint(ret, !=, 0);
|
||||||
|
|
||||||
/* ping vm1 -> vm2 */
|
/* ping vm1 -> vm2 on vector 1 */
|
||||||
global_qtest = s2->qtest;
|
global_qtest = s2->qtest;
|
||||||
if (msi) {
|
if (msi) {
|
||||||
ret = qpci_msix_pending(s2->dev, 0);
|
ret = qpci_msix_pending(s2->dev, 1);
|
||||||
g_assert_cmpuint(ret, ==, 0);
|
g_assert_cmpuint(ret, ==, 0);
|
||||||
} else {
|
} else {
|
||||||
out_reg(s2, INTRSTATUS, 0);
|
g_assert_cmpuint(in_reg(s2, INTRSTATUS), ==, 0);
|
||||||
}
|
}
|
||||||
out_reg(s1, DOORBELL, vm2 << 16);
|
out_reg(s1, DOORBELL, vm2 << 16 | 1);
|
||||||
do {
|
do {
|
||||||
g_usleep(10000);
|
g_usleep(10000);
|
||||||
ret = msi ? qpci_msix_pending(s2->dev, 0) : in_reg(s2, INTRSTATUS);
|
ret = msi ? qpci_msix_pending(s2->dev, 1) : in_reg(s2, INTRSTATUS);
|
||||||
} while (ret == 0 && g_get_monotonic_time() < end_time);
|
} while (ret == 0 && g_get_monotonic_time() < end_time);
|
||||||
g_assert_cmpuint(ret, !=, 0);
|
g_assert_cmpuint(ret, !=, 0);
|
||||||
|
|
||||||
@ -415,7 +416,7 @@ static void test_ivshmem_memdev(void)
|
|||||||
|
|
||||||
/* just for the sake of checking memory-backend property */
|
/* just for the sake of checking memory-backend property */
|
||||||
setup_vm_cmd(&state, "-object memory-backend-ram,size=1M,id=mb1"
|
setup_vm_cmd(&state, "-object memory-backend-ram,size=1M,id=mb1"
|
||||||
" -device ivshmem,x-memdev=mb1", false);
|
" -device ivshmem-plain,memdev=mb1", false);
|
||||||
|
|
||||||
cleanup_vm(&state);
|
cleanup_vm(&state);
|
||||||
}
|
}
|
||||||
|
@ -184,7 +184,9 @@ static void *qpci_pc_iomap(QPCIBus *bus, QPCIDevice *dev, int barno, uint64_t *s
|
|||||||
if (io_type == PCI_BASE_ADDRESS_SPACE_IO) {
|
if (io_type == PCI_BASE_ADDRESS_SPACE_IO) {
|
||||||
uint16_t loc;
|
uint16_t loc;
|
||||||
|
|
||||||
g_assert((s->pci_iohole_alloc + size) <= s->pci_iohole_size);
|
g_assert(QEMU_ALIGN_UP(s->pci_iohole_alloc, size) + size
|
||||||
|
<= s->pci_iohole_size);
|
||||||
|
s->pci_iohole_alloc = QEMU_ALIGN_UP(s->pci_iohole_alloc, size);
|
||||||
loc = s->pci_iohole_start + s->pci_iohole_alloc;
|
loc = s->pci_iohole_start + s->pci_iohole_alloc;
|
||||||
s->pci_iohole_alloc += size;
|
s->pci_iohole_alloc += size;
|
||||||
|
|
||||||
@ -194,7 +196,9 @@ static void *qpci_pc_iomap(QPCIBus *bus, QPCIDevice *dev, int barno, uint64_t *s
|
|||||||
} else {
|
} else {
|
||||||
uint64_t loc;
|
uint64_t loc;
|
||||||
|
|
||||||
g_assert((s->pci_hole_alloc + size) <= s->pci_hole_size);
|
g_assert(QEMU_ALIGN_UP(s->pci_hole_alloc, size) + size
|
||||||
|
<= s->pci_hole_size);
|
||||||
|
s->pci_hole_alloc = QEMU_ALIGN_UP(s->pci_hole_alloc, size);
|
||||||
loc = s->pci_hole_start + s->pci_hole_alloc;
|
loc = s->pci_hole_start + s->pci_hole_alloc;
|
||||||
s->pci_hole_alloc += size;
|
s->pci_hole_alloc += size;
|
||||||
|
|
||||||
|
@ -20,11 +20,17 @@
|
|||||||
#include <sys/eventfd.h>
|
#include <sys/eventfd.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_EVENTFD
|
||||||
|
/*
|
||||||
|
* Initialize @e with existing file descriptor @fd.
|
||||||
|
* @fd must be a genuine eventfd object, emulation with pipe won't do.
|
||||||
|
*/
|
||||||
void event_notifier_init_fd(EventNotifier *e, int fd)
|
void event_notifier_init_fd(EventNotifier *e, int fd)
|
||||||
{
|
{
|
||||||
e->rfd = fd;
|
e->rfd = fd;
|
||||||
e->wfd = fd;
|
e->wfd = fd;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
int event_notifier_init(EventNotifier *e, int active)
|
int event_notifier_init(EventNotifier *e, int active)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user