Add libnvmm, NetBSD's new virtualization API. It provides a way for VMM

software to effortlessly create and manage virtual machines via NVMM.

It is mostly complete, only nvmm_assist_mem needs to be filled -- I have
a draft for that, but it needs some more care. This Mem Assist should
not be needed when emulating a system in x2apic mode, so theoretically
the current form of libnvmm is sufficient to emulate a whole class of
systems.

Generally speaking, there are so many modes in x86 that it is difficult
to handle each corner case without introducing a ton of checks that just
slow down the common-case execution. Currently we check a limited number
of things; we may add more checks in the future if they turn out to be
needed, but that's rather low priority.

Libnvmm is compiled and installed only on amd64. A man page (reviewed by
wiz@) is provided.
This commit is contained in:
maxv 2018-11-10 09:28:56 +00:00
parent e7b9134fea
commit 2760ca24b5
8 changed files with 1650 additions and 2 deletions

View File

@ -1,4 +1,4 @@
# $NetBSD: md.amd64,v 1.260 2018/11/07 07:43:07 maxv Exp $
# $NetBSD: md.amd64,v 1.261 2018/11/10 09:28:56 maxv Exp $
./usr/include/amd64 comp-c-include
./usr/include/amd64/ansi.h comp-c-include
@ -682,6 +682,7 @@
./usr/include/dev/nvmm/nvmm_ioctl.h comp-c-include
./usr/include/dev/nvmm/x86 comp-c-include
./usr/include/dev/nvmm/x86/nvmm_x86.h comp-c-include
./usr/include/nvmm.h comp-c-include
./usr/include/pmmintrin.h comp-obsolete obsolete
./usr/include/x64_64 comp-obsolete obsolete
./usr/include/x64_64/ansi.h comp-obsolete obsolete
@ -783,6 +784,12 @@
./usr/lib/i386/libi386.so comp-sys-shlib compat,pic
./usr/lib/i386/libi386_p.a comp-c-proflib compat,profile
./usr/lib/i386/libi386_pic.a comp-c-piclib compat,pic,picinstall
./usr/lib/libnvmm.a comp-c-lib compatfile
./usr/lib/libnvmm.so comp-sys-shlib compat,pic
./usr/lib/libnvmm.so.0 comp-sys-shlib compat,pic
./usr/lib/libnvmm.so.0.1 comp-sys-shlib compat,pic
./usr/lib/libnvmm_p.a comp-c-proflib compatfile,profile
./usr/lib/libnvmm_pic.a comp-c-piclib compat,pic,picinstall
./usr/lib/libx86_64.a comp-c-lib
./usr/lib/libx86_64_p.a comp-c-proflib profile
./usr/lib/libx86_64_pic.a comp-c-piclib pic,picinstall
@ -888,3 +895,6 @@
./usr/share/ldscripts/i386nbsd.xn comp-obsolete obsolete
./usr/share/ldscripts/i386nbsd.xr comp-obsolete obsolete
./usr/share/ldscripts/i386nbsd.xu comp-obsolete obsolete
./usr/share/man/cat3/libnvmm.0 comp-c-catman .cat
./usr/share/man/html3/libnvmm.html comp-c-htmlman html
./usr/share/man/man3/libnvmm.3 comp-c-man .man

View File

@ -1,4 +1,4 @@
# $NetBSD: Makefile,v 1.261 2018/09/08 14:11:41 christos Exp $
# $NetBSD: Makefile,v 1.262 2018/11/10 09:28:56 maxv Exp $
# from: @(#)Makefile 5.25.1.1 (Berkeley) 5/7/91
.include <bsd.own.mk>
@ -50,6 +50,10 @@ SUBDIR+= librumpclient
SUBDIR+= libskey
.endif
.if ${MACHINE_ARCH} == "x86_64"
SUBDIR+= libnvmm
.endif
.if (${MKMDNS} != "no")
SUBDIR+= ../external/apache2/mDNSResponder/lib
.endif

17
lib/libnvmm/Makefile Normal file
View File

@ -0,0 +1,17 @@
# $NetBSD: Makefile,v 1.1 2018/11/10 09:28:56 maxv Exp $
USE_SHLIBDIR= yes
.include <bsd.own.mk>
LIB= nvmm
MAN= libnvmm.3
SRCS= libnvmm.c libnvmm_x86.c
INCS= nvmm.h
INCSDIR= /usr/include
WARNS= 5
.include <bsd.lib.mk>

484
lib/libnvmm/libnvmm.3 Normal file
View File

@ -0,0 +1,484 @@
.Dd September 12, 2018
.Dt LIBNVMM 3
.Os
.Sh NAME
.Nm libnvmm
.Nd NetBSD Virtualization API
.Sh LIBRARY
.Lb libnvmm
.Sh SYNOPSIS
.In nvmm.h
.Ft int
.Fn nvmm_capability "struct nvmm_capability *cap"
.Ft int
.Fn nvmm_machine_create "struct nvmm_machine *mach"
.Ft int
.Fn nvmm_machine_destroy "struct nvmm_machine *mach"
.Ft int
.Fn nvmm_machine_configure "struct nvmm_machine *mach" "uint64_t op" \
"void *conf"
.Ft int
.Fn nvmm_vcpu_create "struct nvmm_machine *mach" "nvmm_cpuid_t cpuid"
.Ft int
.Fn nvmm_vcpu_destroy "struct nvmm_machine *mach" "nvmm_cpuid_t cpuid"
.Ft int
.Fn nvmm_vcpu_getstate "struct nvmm_machine *mach" "nvmm_cpuid_t cpuid" \
"void *state" "uint64_t flags"
.Ft int
.Fn nvmm_vcpu_setstate "struct nvmm_machine *mach" "nvmm_cpuid_t cpuid" \
"void *state" "uint64_t flags"
.Ft int
.Fn nvmm_vcpu_inject "struct nvmm_machine *mach" "nvmm_cpuid_t cpuid" \
"struct nvmm_event *event"
.Ft int
.Fn nvmm_vcpu_run "struct nvmm_machine *mach" "nvmm_cpuid_t cpuid" \
"struct nvmm_exit *exit"
.Ft int
.Fn nvmm_gpa_map "struct nvmm_machine *mach" "uintptr_t hva" "gpaddr_t gpa" \
"size_t size" "int flags"
.Ft int
.Fn nvmm_gpa_unmap "struct nvmm_machine *mach" "uintptr_t hva" "gpaddr_t gpa" \
"size_t size"
.Ft int
.Fn nvmm_gva_to_gpa "struct nvmm_machine *mach" "nvmm_cpuid_t cpuid" \
"gvaddr_t gva" "gpaddr_t *gpa" "nvmm_prot_t *prot"
.Ft int
.Fn nvmm_gpa_to_hva "struct nvmm_machine *mach" "gpaddr_t gpa" \
"uintptr_t *hva"
.Ft int
.Fn nvmm_assist_io "struct nvmm_machine *mach" "nvmm_cpuid_t cpuid" \
"struct nvmm_exit *exit" "void (*cb)(struct nvmm_io *)"
.Ft int
.Fn nvmm_assist_mem "struct nvmm_machine *mach" "nvmm_cpuid_t cpuid" \
"struct nvmm_exit *exit" "void (*cb)(struct nvmm_mem *)"
.Sh DESCRIPTION
.Nm
provides a library for VMM software to handle hardware-accelerated virtual
machines in
.Nx .
A virtual machine is described by an opaque structure,
.Cd nvmm_machine .
VMM software should not attempt to modify this structure directly, and should
use the API provided by
.Nm
to handle virtual machines.
.Pp
.Fn nvmm_capability
gets the capabilities of NVMM.
.Pp
.Fn nvmm_machine_create
creates a virtual machine in the kernel.
The
.Fa mach
structure is initialized, and describes the machine.
.Pp
.Fn nvmm_machine_destroy
destroys the virtual machine described in
.Fa mach .
.Pp
.Fn nvmm_machine_configure
configures, on the machine
.Fa mach ,
the parameter indicated in
.Fa op .
.Fa conf
describes the value of the parameter.
.Pp
.Fn nvmm_vcpu_create
creates a virtual CPU in the machine
.Fa mach ,
giving it the CPU id
.Fa cpuid .
.Pp
.Fn nvmm_vcpu_destroy
destroys the virtual CPU identified by
.Fa cpuid
in the machine
.Fa mach .
.Pp
.Fn nvmm_vcpu_getstate
gets the state of the virtual CPU identified by
.Fa cpuid
in the machine
.Fa mach .
The
.Fa state
argument is the address of a state area, and
.Fa flags
is the bitmap of the components that are to be retrieved.
See
.Sx VCPU State Area
below for details.
.Pp
.Fn nvmm_vcpu_setstate
sets the state of the virtual CPU identified by
.Fa cpuid
in the machine
.Fa mach .
The
.Fa state
argument is the address of a state area, and
.Fa flags
is the bitmap of the components that are to be set.
See
.Sx VCPU State Area
below for details.
.Pp
.Fn nvmm_vcpu_run
runs the CPU identified by
.Fa cpuid
in the machine
.Fa mach ,
until a VM exit is triggered.
The
.Fa exit
structure is filled to indicate the exit reason, and the associated parameters
if any.
.Pp
.Fn nvmm_gpa_map
makes the guest physical memory area beginning on address
.Fa gpa
and of size
.Fa size
available in the machine
.Fa mach .
The area is mapped in the calling process' virtual address space, at address
.Fa hva .
.Pp
.Fn nvmm_gpa_unmap
removes the guest physical memory area beginning on address
.Fa gpa
and of size
.Fa size
from the machine
.Fa mach .
It also unmaps the area beginning on
.Fa hva
from the calling process' virtual address space.
.Pp
.Fn nvmm_gva_to_gpa
translates, on the CPU
.Fa cpuid
from the machine
.Fa mach ,
the guest virtual address given in
.Fa gva
into a guest physical address returned in
.Fa gpa .
The associated page premissions are returned in
.Fa prot .
.Fa gva
must be page-aligned.
.Pp
.Fn nvmm_gpa_to_hva
translates, on the machine
.Fa mach ,
the guest physical address indicated in
.Fa gpa
into a host virtual address returned in
.Fa hva .
.Fa gpa
must be page-aligned.
.Pp
.Fn nvmm_assist_io
emulates the I/O operation described in
.Fa exit
on CPU
.Fa cpuid
from machine
.Fa mach .
.Fa cb
will be called to handle the transaction.
See
.Sx I/O Assist
below for details.
.Pp
.Fn nvmm_assist_mem
emulates the Mem operation described in
.Fa exit
on CPU
.Fa cpuid
from machine
.Fa mach .
.Fa cb
will be called to handle the transaction.
See
.Sx Mem Assist
below for details.
.Ss NVMM Capability
The
.Cd nvmm_capability
structure helps VMM software identify the capabilities offered by NVMM on the
host:
.Bd -literal
struct nvmm_capability {
uint64_t version;
uint64_t state_size;
uint64_t max_machines;
uint64_t max_vcpus;
uint64_t max_ram;
union {
struct {
...
} x86;
uint64_t rsvd[8];
} u;
};
.Ed
.Pp
For example, the
.Cd max_machines
field indicates the maximum number of virtual machines supported, while
.Cd max_vcpus
indicates the maximum number of VCPUs supported per virtual machine.
.Ss VCPU State Area
A VCPU state area is a structure that entirely defines the content of the
registers of a VCPU.
Only one such structure exists, for x86:
.Bd -literal
struct nvmm_x64_state {
...
};
.Ed
.Pp
Refer to functional examples to see precisely how to use this structure.
.Ss Exit Reasons
The
.Cd nvmm_exit
structure is used to handle VM exits:
.Bd -literal
enum nvmm_exit_reason {
NVMM_EXIT_NONE = 0x0000000000000000,
/* General. */
NVMM_EXIT_MEMORY = 0x0000000000000001,
NVMM_EXIT_IO = 0x0000000000000002,
NVMM_EXIT_MSR = 0x0000000000000003,
NVMM_EXIT_INT_READY = 0x0000000000000004,
NVMM_EXIT_NMI_READY = 0x0000000000000005,
NVMM_EXIT_SHUTDOWN = 0x0000000000000006,
/* Instructions (x86). */
...
NVMM_EXIT_INVALID = 0xFFFFFFFFFFFFFFFF
};
struct nvmm_exit {
enum nvmm_exit_reason reason;
union {
...
} u;
uint64_t exitstate[8];
};
.Ed
.Pp
The
.Va reason
field indicates the reason of the VM exit.
Additional parameters describing the exit can be present in
.Va u .
.Va exitstate
contains a partial, implementation-specific VCPU state, usable as a fast-path
to retrieve certain state values.
.Pp
It is possible that a VM exit was caused by a reason internal to the host
kernel, and that VMM software should not be concerned with.
In this case, the exit reason is set to
.Cd NVMM_EXIT_NONE .
This gives a chance for VMM software to halt the VM in its tracks.
.Pp
Refer to functional examples to see precisely how to handle VM exits.
.Ss Event Injection
It is possible to inject an event into a VCPU.
An event can be a hardware interrupt, a software interrupt, or a software
exception, defined by:
.Bd -literal
enum nvmm_event_type {
NVMM_EVENT_INTERRUPT_HW,
NVMM_EVENT_INTERRUPT_SW,
NVMM_EVENT_EXCEPTION
};
struct nvmm_event {
enum nvmm_event_type type;
uint64_t vector;
union {
uint64_t error;
uint64_t prio;
} u;
};
.Ed
.Pp
This describes an event of type
.Va type ,
to be sent to vector number
.Va vector ,
with a possible additional
.Va error
or
.Va prio
code that is implementation-specific.
.Pp
It is possible that the VCPU is in a state where it cannot receive this
event, if:
.Pp
.Bl -bullet -offset indent -compact
.It
the event is a hardware interrupt, and the VCPU runs with interrupts disabled,
or
.It
the event is a non-maskable interrupt (NMI), and the VCPU is already in a
in-NMI context.
.El
.Pp
In this case,
.Fn nvmm_vcpu_inject
will return
.Er EAGAIN ,
and NVMM will cause a VM exit with reason
.Cd NVMM_EXIT_INT_READY
or
.Cd NVMM_EXIT_NMI_READY
to indicate that VMM software can now reinject the desired event.
.Ss I/O Assist
When a VM exit occurs with reason
.Cd NVMM_EXIT_IO ,
it is necessary for VMM software to emulate the associated I/O operation.
.Nm
provides an easy way for VMM software to perform that.
.Pp
.Fn nvmm_assist_io
will call the
.Fa cb
callback function and give it a
.Cd nvmm_io
structure as argument.
This structure describes an I/O transaction:
.Bd -literal
struct nvmm_io {
uint64_t port;
bool in;
size_t size;
uint8_t data[8];
};
.Ed
.Pp
The callback can emulate the operation using this descriptor, following two
unique cases:
.Pp
.Bl -bullet -offset indent -compact
.It
The operation is an input.
In this case, the callback should fill
.Va data
with the desired value.
.It
The operation is an output.
In this case, the callback should read
.Va data
to retrieve the desired value.
.El
.Pp
In either case,
.Va port
will indicate the I/O port,
.Va in
will indicate if the operation is an input, and
.Va size
will indicate the size of the access.
.Ss Mem Assist
When a VM exit occurs with reason
.Cd NVMM_EXIT_MEMORY ,
it is necessary for VMM software to emulate the associated memory operation.
.Nm
provides an easy way for VMM software to perform that, similar to the I/O
Assist.
.Pp
.Fn nvmm_assist_mem
will call the
.Fa cb
callback function and give it a
.Cd nvmm_mem
structure as argument.
This structure describes a Mem transaction:
.Bd -literal
struct nvmm_mem {
gvaddr_t gva;
gpaddr_t gpa;
bool write;
size_t size;
uint8_t data[8];
};
.Ed
.Pp
The callback can emulate the operation using this descriptor, following two
unique cases:
.Pp
.Bl -bullet -offset indent -compact
.It
The operation is a read.
In this case, the callback should fill
.Va data
with the desired value.
.It
The operation is a write.
In this case, the callback should read
.Va data
to retrieve the desired value.
.El
.Pp
In either case,
.Va gva
will indicate the guest virtual address,
.Va gpa
will indicate the guest physical address,
.Va write
will indicate if the access is a write, and
.Va size
will indicate the size of the access.
.Sh RETURN VALUES
Upon successful completion, each of these functions returns zero.
Otherwise, a value of \-1 is returned and the global
variable
.Va errno
is set to indicate the error.
.Sh FILES
Functional examples:
.Pp
.Bl -tag -width XXXX -compact
.It Pa src/share/examples/nvmm/toyvirt/
Example of virtualizer.
Launches the binary given as argument in a virtual machine.
.It Pa src/share/examples/nvmm/smallkern/
Example of a kernel that can be executed by toyvirt.
.El
.Sh ERRORS
These functions will fail if:
.Bl -tag -width [ENOBUFS]
.It Bq Er EEXIST
An attempt was made to create a machine or a VCPU that already exists.
.It Bq Er EFAULT
An attempt was made to emulate a memory-based operation in a guest, and the
guest page tables did not have the permissions necessary for the operation
to complete successfully.
.It Bq Er EINVAL
An inappropriate parameter was used.
.It Bq Er ENOBUFS
The maximum number of machines or VCPUs was reached.
.It Bq Er ENOENT
A query was made on a machine or a VCPU that does not exist.
.It Bq Er EPERM
An attempt was made to access a machine that does not belong to the process.
.El
.Pp
In addition,
.Fn nvmm_vcpu_inject
uses the following error codes:
.Bl -tag -width [ENOBUFS]
.It Bq Er EAGAIN
The VCPU cannot receive the event immediately.
.El
.Sh AUTHORS
NVMM was designed and implemented by
.An Maxime Villard .

433
lib/libnvmm/libnvmm.c Normal file
View File

@ -0,0 +1,433 @@
/* $NetBSD: libnvmm.c,v 1.1 2018/11/10 09:28:56 maxv Exp $ */
/*
* Copyright (c) 2018 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Maxime Villard.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include "nvmm.h"
static int nvmm_fd = -1;
static size_t nvmm_page_size = 0;
/* -------------------------------------------------------------------------- */
static int
_nvmm_area_add(struct nvmm_machine *mach, gpaddr_t gpa, uintptr_t hva,
size_t size)
{
struct nvmm_area *area;
void *ptr;
size_t i;
for (i = 0; i < mach->nareas; i++) {
if (gpa >= mach->areas[i].gpa &&
gpa < mach->areas[i].gpa + mach->areas[i].size) {
goto error;
}
if (gpa + size >= mach->areas[i].gpa &&
gpa + size < mach->areas[i].gpa + mach->areas[i].size) {
goto error;
}
if (gpa < mach->areas[i].gpa &&
gpa + size >= mach->areas[i].gpa + mach->areas[i].size) {
goto error;
}
}
mach->nareas++;
ptr = realloc(mach->areas, mach->nareas * sizeof(struct nvmm_area));
if (ptr == NULL)
return -1;
mach->areas = ptr;
area = &mach->areas[mach->nareas-1];
area->gpa = gpa;
area->hva = hva;
area->size = size;
return 0;
error:
errno = EEXIST;
return -1;
}
static int
_nvmm_area_delete(struct nvmm_machine *mach, gpaddr_t gpa, uintptr_t hva,
size_t size)
{
size_t i;
for (i = 0; i < mach->nareas; i++) {
if (gpa == mach->areas[i].gpa &&
hva == mach->areas[i].hva &&
size == mach->areas[i].size) {
break;
}
}
if (i == mach->nareas) {
errno = ENOENT;
return -1;
}
memcpy(&mach->areas[i], &mach->areas[i+1],
(mach->nareas - i - 1) * sizeof(struct nvmm_area));
mach->nareas--;
return 0;
}
/* -------------------------------------------------------------------------- */
static int
nvmm_init(void)
{
if (nvmm_fd != -1)
return 0;
nvmm_fd = open("/dev/nvmm", O_RDWR);
if (nvmm_fd == -1)
return -1;
nvmm_page_size = sysconf(_SC_PAGESIZE);
return 0;
}
int
nvmm_capability(struct nvmm_capability *cap)
{
struct nvmm_ioc_capability args;
int ret;
if (nvmm_init() == -1) {
return -1;
}
ret = ioctl(nvmm_fd, NVMM_IOC_CAPABILITY, &args);
if (ret == -1)
return -1;
memcpy(cap, &args.cap, sizeof(args.cap));
return 0;
}
int
nvmm_machine_create(struct nvmm_machine *mach)
{
struct nvmm_ioc_machine_create args;
int ret;
if (nvmm_init() == -1) {
return -1;
}
ret = ioctl(nvmm_fd, NVMM_IOC_MACHINE_CREATE, &args);
if (ret == -1)
return -1;
memset(mach, 0, sizeof(*mach));
mach->machid = args.machid;
return 0;
}
int
nvmm_machine_destroy(struct nvmm_machine *mach)
{
struct nvmm_ioc_machine_destroy args;
int ret;
if (nvmm_init() == -1) {
return -1;
}
args.machid = mach->machid;
ret = ioctl(nvmm_fd, NVMM_IOC_MACHINE_DESTROY, &args);
if (ret == -1)
return -1;
free(mach->areas);
return 0;
}
int
nvmm_machine_configure(struct nvmm_machine *mach, uint64_t op, void *conf)
{
struct nvmm_ioc_machine_configure args;
int ret;
if (nvmm_init() == -1) {
return -1;
}
args.machid = mach->machid;
args.op = op;
args.conf = conf;
ret = ioctl(nvmm_fd, NVMM_IOC_MACHINE_CONFIGURE, &args);
if (ret == -1)
return -1;
return 0;
}
int
nvmm_vcpu_create(struct nvmm_machine *mach, nvmm_cpuid_t cpuid)
{
struct nvmm_ioc_vcpu_create args;
int ret;
if (nvmm_init() == -1) {
return -1;
}
args.machid = mach->machid;
args.cpuid = cpuid;
ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_CREATE, &args);
if (ret == -1)
return -1;
return 0;
}
int
nvmm_vcpu_destroy(struct nvmm_machine *mach, nvmm_cpuid_t cpuid)
{
struct nvmm_ioc_vcpu_destroy args;
int ret;
if (nvmm_init() == -1) {
return -1;
}
args.machid = mach->machid;
args.cpuid = cpuid;
ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_DESTROY, &args);
if (ret == -1)
return -1;
return 0;
}
int
nvmm_vcpu_setstate(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
void *state, uint64_t flags)
{
struct nvmm_ioc_vcpu_setstate args;
int ret;
if (nvmm_init() == -1) {
return -1;
}
args.machid = mach->machid;
args.cpuid = cpuid;
args.state = state;
args.flags = flags;
ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_SETSTATE, &args);
if (ret == -1)
return -1;
return 0;
}
int
nvmm_vcpu_getstate(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
void *state, uint64_t flags)
{
struct nvmm_ioc_vcpu_getstate args;
int ret;
if (nvmm_init() == -1) {
return -1;
}
args.machid = mach->machid;
args.cpuid = cpuid;
args.state = state;
args.flags = flags;
ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_GETSTATE, &args);
if (ret == -1)
return -1;
return 0;
}
int
nvmm_vcpu_inject(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
struct nvmm_event *event)
{
struct nvmm_ioc_vcpu_inject args;
int ret;
if (nvmm_init() == -1) {
return -1;
}
args.machid = mach->machid;
args.cpuid = cpuid;
memcpy(&args.event, event, sizeof(args.event));
ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_INJECT, &args);
if (ret == -1)
return -1;
return 0;
}
int
nvmm_vcpu_run(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
struct nvmm_exit *exit)
{
struct nvmm_ioc_vcpu_run args;
int ret;
if (nvmm_init() == -1) {
return -1;
}
args.machid = mach->machid;
args.cpuid = cpuid;
memset(&args.exit, 0, sizeof(args.exit));
ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_RUN, &args);
if (ret == -1)
return -1;
memcpy(exit, &args.exit, sizeof(args.exit));
return 0;
}
int
nvmm_gpa_map(struct nvmm_machine *mach, uintptr_t hva, gpaddr_t gpa,
size_t size, int flags)
{
struct nvmm_ioc_gpa_map args;
int ret;
if (nvmm_init() == -1) {
return -1;
}
args.machid = mach->machid;
args.hva = hva;
args.gpa = gpa;
args.size = size;
args.flags = flags;
ret = ioctl(nvmm_fd, NVMM_IOC_GPA_MAP, &args);
if (ret == -1)
return -1;
ret = _nvmm_area_add(mach, gpa, hva, size);
if (ret == -1) {
nvmm_gpa_unmap(mach, hva, gpa, size);
return -1;
}
return 0;
}
int
nvmm_gpa_unmap(struct nvmm_machine *mach, uintptr_t hva, gpaddr_t gpa,
size_t size)
{
struct nvmm_ioc_gpa_unmap args;
int ret;
if (nvmm_init() == -1) {
return -1;
}
ret = _nvmm_area_delete(mach, gpa, hva, size);
if (ret == -1)
return -1;
args.machid = mach->machid;
args.gpa = gpa;
args.size = size;
ret = ioctl(nvmm_fd, NVMM_IOC_GPA_UNMAP, &args);
if (ret == -1)
return -1;
ret = munmap((void *)hva, size);
return ret;
}
/*
* nvmm_gva_to_gpa(): architecture-specific.
*/
int
nvmm_gpa_to_hva(struct nvmm_machine *mach, gpaddr_t gpa, uintptr_t *hva)
{
size_t i;
if (gpa % nvmm_page_size != 0) {
errno = EINVAL;
return -1;
}
for (i = 0; i < mach->nareas; i++) {
if (gpa < mach->areas[i].gpa) {
continue;
}
if (gpa >= mach->areas[i].gpa + mach->areas[i].size) {
continue;
}
*hva = mach->areas[i].hva + (gpa - mach->areas[i].gpa);
return 0;
}
errno = ENOENT;
return -1;
}
/*
* nvmm_assist_io(): architecture-specific.
*/

592
lib/libnvmm/libnvmm_x86.c Normal file
View File

@ -0,0 +1,592 @@
/* $NetBSD: libnvmm_x86.c,v 1.1 2018/11/10 09:28:56 maxv Exp $ */
/*
* Copyright (c) 2018 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Maxime Villard.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <machine/vmparam.h>
#include <machine/pte.h>
#include <machine/psl.h>
#include "nvmm.h"
#include <x86/specialreg.h>
/* -------------------------------------------------------------------------- */
#define PTE32_L1_SHIFT 12
#define PTE32_L2_SHIFT 22
#define PTE32_L2_MASK 0xffc00000
#define PTE32_L1_MASK 0x003ff000
#define PTE32_L2_FRAME (PTE32_L2_MASK)
#define PTE32_L1_FRAME (PTE32_L2_FRAME|PTE32_L1_MASK)
#define pte32_l1idx(va) (((va) & PTE32_L1_MASK) >> PTE32_L1_SHIFT)
#define pte32_l2idx(va) (((va) & PTE32_L2_MASK) >> PTE32_L2_SHIFT)
typedef uint32_t pte_32bit_t;
static int
x86_gva_to_gpa_32bit(struct nvmm_machine *mach, uint64_t cr3,
gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
{
gpaddr_t L2gpa, L1gpa;
uintptr_t L2hva, L1hva;
pte_32bit_t *pdir, pte;
/* We begin with an RWXU access. */
*prot = NVMM_PROT_ALL;
/* Parse L2. */
L2gpa = (cr3 & PG_FRAME);
if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
return -1;
pdir = (pte_32bit_t *)L2hva;
pte = pdir[pte32_l2idx(gva)];
if ((pte & PG_V) == 0)
return -1;
if ((pte & PG_u) == 0)
*prot &= ~NVMM_PROT_USER;
if ((pte & PG_KW) == 0)
*prot &= ~NVMM_PROT_WRITE;
if ((pte & PG_PS) && !has_pse)
return -1;
if (pte & PG_PS) {
*gpa = (pte & PTE32_L2_FRAME);
return 0;
}
/* Parse L1. */
L1gpa = (pte & PG_FRAME);
if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
return -1;
pdir = (pte_32bit_t *)L1hva;
pte = pdir[pte32_l1idx(gva)];
if ((pte & PG_V) == 0)
return -1;
if ((pte & PG_u) == 0)
*prot &= ~NVMM_PROT_USER;
if ((pte & PG_KW) == 0)
*prot &= ~NVMM_PROT_WRITE;
if (pte & PG_PS)
return -1;
*gpa = (pte & PG_FRAME);
return 0;
}
/* -------------------------------------------------------------------------- */
#define PTE32_PAE_L1_SHIFT 12
#define PTE32_PAE_L2_SHIFT 21
#define PTE32_PAE_L3_SHIFT 30
#define PTE32_PAE_L3_MASK 0xc0000000
#define PTE32_PAE_L2_MASK 0x3fe00000
#define PTE32_PAE_L1_MASK 0x001ff000
#define PTE32_PAE_L3_FRAME (PTE32_PAE_L3_MASK)
#define PTE32_PAE_L2_FRAME (PTE32_PAE_L3_FRAME|PTE32_PAE_L2_MASK)
#define PTE32_PAE_L1_FRAME (PTE32_PAE_L2_FRAME|PTE32_PAE_L1_MASK)
#define pte32_pae_l1idx(va) (((va) & PTE32_PAE_L1_MASK) >> PTE32_PAE_L1_SHIFT)
#define pte32_pae_l2idx(va) (((va) & PTE32_PAE_L2_MASK) >> PTE32_PAE_L2_SHIFT)
#define pte32_pae_l3idx(va) (((va) & PTE32_PAE_L3_MASK) >> PTE32_PAE_L3_SHIFT)
typedef uint64_t pte_32bit_pae_t;
static int
x86_gva_to_gpa_32bit_pae(struct nvmm_machine *mach, uint64_t cr3,
gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
{
gpaddr_t L3gpa, L2gpa, L1gpa;
uintptr_t L3hva, L2hva, L1hva;
pte_32bit_pae_t *pdir, pte;
/* We begin with an RWXU access. */
*prot = NVMM_PROT_ALL;
/* Parse L3. */
L3gpa = (cr3 & PG_FRAME);
if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva) == -1)
return -1;
pdir = (pte_32bit_pae_t *)L3hva;
pte = pdir[pte32_pae_l3idx(gva)];
if ((pte & PG_V) == 0)
return -1;
if (pte & PG_NX)
*prot &= ~NVMM_PROT_EXEC;
if (pte & PG_PS)
return -1;
/* Parse L2. */
L2gpa = (pte & PG_FRAME);
if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
return -1;
pdir = (pte_32bit_pae_t *)L2hva;
pte = pdir[pte32_pae_l2idx(gva)];
if ((pte & PG_V) == 0)
return -1;
if ((pte & PG_u) == 0)
*prot &= ~NVMM_PROT_USER;
if ((pte & PG_KW) == 0)
*prot &= ~NVMM_PROT_WRITE;
if (pte & PG_NX)
*prot &= ~NVMM_PROT_EXEC;
if ((pte & PG_PS) && !has_pse)
return -1;
if (pte & PG_PS) {
*gpa = (pte & PTE32_PAE_L2_FRAME);
return 0;
}
/* Parse L1. */
L1gpa = (pte & PG_FRAME);
if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
return -1;
pdir = (pte_32bit_pae_t *)L1hva;
pte = pdir[pte32_pae_l1idx(gva)];
if ((pte & PG_V) == 0)
return -1;
if ((pte & PG_u) == 0)
*prot &= ~NVMM_PROT_USER;
if ((pte & PG_KW) == 0)
*prot &= ~NVMM_PROT_WRITE;
if (pte & PG_NX)
*prot &= ~NVMM_PROT_EXEC;
if (pte & PG_PS)
return -1;
*gpa = (pte & PG_FRAME);
return 0;
}
/* -------------------------------------------------------------------------- */
#define PTE64_L1_SHIFT 12
#define PTE64_L2_SHIFT 21
#define PTE64_L3_SHIFT 30
#define PTE64_L4_SHIFT 39
#define PTE64_L4_MASK 0x0000ff8000000000
#define PTE64_L3_MASK 0x0000007fc0000000
#define PTE64_L2_MASK 0x000000003fe00000
#define PTE64_L1_MASK 0x00000000001ff000
#define PTE64_L4_FRAME PTE64_L4_MASK
#define PTE64_L3_FRAME (PTE64_L4_FRAME|PTE64_L3_MASK)
#define PTE64_L2_FRAME (PTE64_L3_FRAME|PTE64_L2_MASK)
#define PTE64_L1_FRAME (PTE64_L2_FRAME|PTE64_L1_MASK)
#define pte64_l1idx(va) (((va) & PTE64_L1_MASK) >> PTE64_L1_SHIFT)
#define pte64_l2idx(va) (((va) & PTE64_L2_MASK) >> PTE64_L2_SHIFT)
#define pte64_l3idx(va) (((va) & PTE64_L3_MASK) >> PTE64_L3_SHIFT)
#define pte64_l4idx(va) (((va) & PTE64_L4_MASK) >> PTE64_L4_SHIFT)
typedef uint64_t pte_64bit_t;
static inline bool
x86_gva_64bit_canonical(gvaddr_t gva)
{
/* Bits 63:47 must have the same value. */
#define SIGN_EXTEND 0xffff800000000000ULL
return (gva & SIGN_EXTEND) == 0 || (gva & SIGN_EXTEND) == SIGN_EXTEND;
}
static int
x86_gva_to_gpa_64bit(struct nvmm_machine *mach, uint64_t cr3,
gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
{
gpaddr_t L4gpa, L3gpa, L2gpa, L1gpa;
uintptr_t L4hva, L3hva, L2hva, L1hva;
pte_64bit_t *pdir, pte;
/* We begin with an RWXU access. */
*prot = NVMM_PROT_ALL;
if (!x86_gva_64bit_canonical(gva))
return -1;
/* Parse L4. */
L4gpa = (cr3 & PG_FRAME);
if (nvmm_gpa_to_hva(mach, L4gpa, &L4hva) == -1)
return -1;
pdir = (pte_64bit_t *)L4hva;
pte = pdir[pte64_l4idx(gva)];
if ((pte & PG_V) == 0)
return -1;
if ((pte & PG_u) == 0)
*prot &= ~NVMM_PROT_USER;
if ((pte & PG_KW) == 0)
*prot &= ~NVMM_PROT_WRITE;
if (pte & PG_NX)
*prot &= ~NVMM_PROT_EXEC;
if (pte & PG_PS)
return -1;
/* Parse L3. */
L3gpa = (pte & PG_FRAME);
if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva) == -1)
return -1;
pdir = (pte_64bit_t *)L3hva;
pte = pdir[pte64_l3idx(gva)];
if ((pte & PG_V) == 0)
return -1;
if ((pte & PG_u) == 0)
*prot &= ~NVMM_PROT_USER;
if ((pte & PG_KW) == 0)
*prot &= ~NVMM_PROT_WRITE;
if (pte & PG_NX)
*prot &= ~NVMM_PROT_EXEC;
if ((pte & PG_PS) && !has_pse)
return -1;
if (pte & PG_PS) {
*gpa = (pte & PTE64_L3_FRAME);
return 0;
}
/* Parse L2. */
L2gpa = (pte & PG_FRAME);
if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
return -1;
pdir = (pte_64bit_t *)L2hva;
pte = pdir[pte64_l2idx(gva)];
if ((pte & PG_V) == 0)
return -1;
if ((pte & PG_u) == 0)
*prot &= ~NVMM_PROT_USER;
if ((pte & PG_KW) == 0)
*prot &= ~NVMM_PROT_WRITE;
if (pte & PG_NX)
*prot &= ~NVMM_PROT_EXEC;
if ((pte & PG_PS) && !has_pse)
return -1;
if (pte & PG_PS) {
*gpa = (pte & PTE64_L2_FRAME);
return 0;
}
/* Parse L1. */
L1gpa = (pte & PG_FRAME);
if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
return -1;
pdir = (pte_64bit_t *)L1hva;
pte = pdir[pte64_l1idx(gva)];
if ((pte & PG_V) == 0)
return -1;
if ((pte & PG_u) == 0)
*prot &= ~NVMM_PROT_USER;
if ((pte & PG_KW) == 0)
*prot &= ~NVMM_PROT_WRITE;
if (pte & PG_NX)
*prot &= ~NVMM_PROT_EXEC;
if (pte & PG_PS)
return -1;
*gpa = (pte & PG_FRAME);
return 0;
}
static inline int
x86_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_x64_state *state,
gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
{
bool is_pae, is_lng, has_pse;
uint64_t cr3;
int ret;
if ((state->crs[NVMM_X64_CR_CR0] & CR0_PG) == 0) {
/* No paging. */
*gpa = gva;
return 0;
}
is_pae = (state->crs[NVMM_X64_CR_CR4] & CR4_PAE) != 0;
is_lng = (state->msrs[NVMM_X64_MSR_EFER] & EFER_LME) != 0;
has_pse = (state->crs[NVMM_X64_CR_CR4] & CR4_PSE) != 0;
cr3 = state->crs[NVMM_X64_CR_CR3];
if (is_pae && is_lng) {
/* 64bit */
ret = x86_gva_to_gpa_64bit(mach, cr3, gva, gpa, has_pse, prot);
} else if (is_pae && !is_lng) {
/* 32bit PAE */
ret = x86_gva_to_gpa_32bit_pae(mach, cr3, gva, gpa, has_pse,
prot);
} else if (!is_pae && !is_lng) {
/* 32bit */
ret = x86_gva_to_gpa_32bit(mach, cr3, gva, gpa, has_pse, prot);
} else {
ret = -1;
}
if (ret == -1) {
errno = EFAULT;
}
return ret;
}
int
nvmm_gva_to_gpa(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
{
struct nvmm_x64_state state;
int ret;
if (gva & PAGE_MASK) {
errno = EINVAL;
return -1;
}
ret = nvmm_vcpu_getstate(mach, cpuid, &state,
NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
if (ret == -1)
return -1;
return x86_gva_to_gpa(mach, &state, gva, gpa, prot);
}
/* -------------------------------------------------------------------------- */
static inline bool
is_long_mode(struct nvmm_x64_state *state)
{
return (state->msrs[NVMM_X64_MSR_EFER] & EFER_LME) != 0;
}
static inline bool
is_illegal(struct nvmm_io *io, nvmm_prot_t prot)
{
return (io->in && !(prot & NVMM_PROT_WRITE));
}
static int
segment_apply(struct nvmm_x64_state_seg *seg, gvaddr_t *gva, size_t size)
{
uint64_t limit;
/*
* This is incomplete. We should check topdown, etc, really that's
* tiring.
*/
if (__predict_false(!seg->attrib.p)) {
goto error;
}
limit = (seg->limit + 1);
if (__predict_true(seg->attrib.gran)) {
limit *= PAGE_SIZE;
}
if (__predict_false(*gva + seg->base + size > limit)) {
goto error;
}
*gva += seg->base;
return 0;
error:
errno = EFAULT;
return -1;
}
int
nvmm_assist_io(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
struct nvmm_exit *exit, void (*cb)(struct nvmm_io *))
{
struct nvmm_x64_state state;
struct nvmm_io io;
nvmm_prot_t prot;
size_t remain, done;
uintptr_t hva;
gvaddr_t gva, off;
gpaddr_t gpa;
uint64_t rsi;
uint8_t tmp[8];
uint8_t *ptr, *ptr2;
bool cross;
int ret;
if (__predict_false(exit->reason != NVMM_EXIT_IO)) {
errno = EINVAL;
return -1;
}
io.port = exit->u.io.port;
io.in = (exit->u.io.type == NVMM_EXIT_IO_IN);
io.size = exit->u.io.operand_size;
ret = nvmm_vcpu_getstate(mach, cpuid, &state,
NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
if (ret == -1)
return -1;
cross = false;
if (!exit->u.io.str) {
ptr = (uint8_t *)&state.gprs[NVMM_X64_GPR_RAX];
} else {
rsi = state.gprs[NVMM_X64_GPR_RSI];
switch (exit->u.io.address_size) {
case 8:
gva = rsi;
break;
case 4:
gva = (rsi & 0x00000000FFFFFFFF);
break;
case 2:
default: /* impossible */
gva = (rsi & 0x000000000000FFFF);
break;
}
if (!is_long_mode(&state)) {
ret = segment_apply(&state.segs[exit->u.io.seg], &gva,
io.size);
if (ret == -1)
return -1;
}
off = (gva & PAGE_MASK);
gva &= ~PAGE_MASK;
ret = x86_gva_to_gpa(mach, &state, gva, &gpa, &prot);
if (ret == -1)
return -1;
if (__predict_false(is_illegal(&io, prot))) {
errno = EFAULT;
return -1;
}
ret = nvmm_gpa_to_hva(mach, gpa, &hva);
if (ret == -1)
return -1;
ptr = (uint8_t *)hva + off;
/*
* Special case. If the buffer is in between two pages, we
* need to retrieve data from the next page.
*/
if (__predict_false(off + io.size > PAGE_SIZE)) {
cross = true;
remain = off + io.size - PAGE_SIZE;
done = PAGE_SIZE - off;
memcpy(tmp, ptr, done);
ret = x86_gva_to_gpa(mach, &state, gva + PAGE_SIZE,
&gpa, &prot);
if (ret == -1)
return -1;
if (__predict_false(is_illegal(&io, prot))) {
errno = EFAULT;
return -1;
}
ret = nvmm_gpa_to_hva(mach, gpa, &hva);
if (ret == -1)
return -1;
memcpy(&tmp[done], (uint8_t *)hva, remain);
ptr2 = &tmp[done];
}
}
if (io.in) {
/* nothing to do */
} else {
memcpy(io.data, ptr, io.size);
}
(*cb)(&io);
if (io.in) {
if (!exit->u.io.str)
state.gprs[NVMM_X64_GPR_RAX] = 0;
if (__predict_false(cross)) {
memcpy(ptr, io.data, done);
memcpy(ptr2, &io.data[done], remain);
} else {
memcpy(ptr, io.data, io.size);
}
} else {
/* nothing to do */
}
if (exit->u.io.rep) {
state.gprs[NVMM_X64_GPR_RCX] -= 1;
if (state.gprs[NVMM_X64_GPR_RCX] == 0) {
state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
}
if (exit->u.io.str) {
if (state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
state.gprs[NVMM_X64_GPR_RSI] -= io.size;
} else {
state.gprs[NVMM_X64_GPR_RSI] += io.size;
}
}
} else {
state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
}
ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS);
if (ret == -1)
return -1;
return 0;
}
/* -------------------------------------------------------------------------- */
int
nvmm_assist_mem(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
struct nvmm_exit *exit, void (*cb)(struct nvmm_mem *))
{
if (__predict_false(exit->reason != NVMM_EXIT_MEMORY)) {
errno = EINVAL;
return -1;
}
// TODO
errno = ENOSYS;
return -1;
}

103
lib/libnvmm/nvmm.h Normal file
View File

@ -0,0 +1,103 @@
/* $NetBSD: nvmm.h,v 1.1 2018/11/10 09:28:56 maxv Exp $ */
/*
* Copyright (c) 2018 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Maxime Villard.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _LIBNVMM_H_
#define _LIBNVMM_H_
#include <stdint.h>
#include <stdbool.h>
#include <dev/nvmm/nvmm.h>
#include <dev/nvmm/nvmm_ioctl.h>
#ifdef __x86_64__
#include <dev/nvmm/x86/nvmm_x86.h>
#endif
struct nvmm_area {
gpaddr_t gpa;
uintptr_t hva;
size_t size;
};
struct nvmm_machine {
nvmm_machid_t machid;
struct nvmm_area *areas;
size_t nareas;
};
struct nvmm_io {
uint64_t port;
bool in;
size_t size;
uint8_t data[8];
};
struct nvmm_mem {
gvaddr_t gva;
gpaddr_t gpa;
bool write;
size_t size;
uint8_t data[8];
};
#define NVMM_PROT_READ 0x01
#define NVMM_PROT_WRITE 0x02
#define NVMM_PROT_EXEC 0x04
#define NVMM_PROT_USER 0x08
#define NVMM_PROT_ALL 0x0F
typedef uint64_t nvmm_prot_t;
int nvmm_capability(struct nvmm_capability *);
int nvmm_machine_create(struct nvmm_machine *);
int nvmm_machine_destroy(struct nvmm_machine *);
int nvmm_machine_configure(struct nvmm_machine *, uint64_t, void *);
int nvmm_vcpu_create(struct nvmm_machine *, nvmm_cpuid_t);
int nvmm_vcpu_destroy(struct nvmm_machine *, nvmm_cpuid_t);
int nvmm_vcpu_setstate(struct nvmm_machine *, nvmm_cpuid_t, void *, uint64_t);
int nvmm_vcpu_getstate(struct nvmm_machine *, nvmm_cpuid_t, void *, uint64_t);
int nvmm_vcpu_inject(struct nvmm_machine *, nvmm_cpuid_t, struct nvmm_event *);
int nvmm_vcpu_run(struct nvmm_machine *, nvmm_cpuid_t, struct nvmm_exit *);
int nvmm_gpa_map(struct nvmm_machine *, uintptr_t, gpaddr_t, size_t, int);
int nvmm_gpa_unmap(struct nvmm_machine *, uintptr_t, gpaddr_t, size_t);
int nvmm_gva_to_gpa(struct nvmm_machine *, nvmm_cpuid_t, gvaddr_t, gpaddr_t *,
nvmm_prot_t *);
int nvmm_gpa_to_hva(struct nvmm_machine *, gpaddr_t, uintptr_t *);
int nvmm_assist_io(struct nvmm_machine *, nvmm_cpuid_t, struct nvmm_exit *,
void (*)(struct nvmm_io *));
int nvmm_assist_mem(struct nvmm_machine *, nvmm_cpuid_t, struct nvmm_exit *,
void (*)(struct nvmm_mem *));
#endif /* _LIBNVMM_H_ */

View File

@ -0,0 +1,5 @@
# $NetBSD: shlib_version,v 1.1 2018/11/10 09:28:56 maxv Exp $
# Remember to update distrib/sets/lists/base/shl.* when changing
#
major=0
minor=1