2019-04-07 17:13:03 +03:00
|
|
|
.\" $NetBSD: libnvmm.3,v 1.14 2019/04/07 14:13:03 maxv Exp $
|
2018-11-10 13:57:06 +03:00
|
|
|
.\"
|
2019-02-05 16:56:32 +03:00
|
|
|
.\" Copyright (c) 2018, 2019 The NetBSD Foundation, Inc.
|
2018-11-10 13:57:06 +03:00
|
|
|
.\" All rights reserved.
|
|
|
|
.\"
|
|
|
|
.\" This code is derived from software contributed to The NetBSD Foundation
|
|
|
|
.\" by Maxime Villard.
|
|
|
|
.\"
|
|
|
|
.\" Redistribution and use in source and binary forms, with or without
|
|
|
|
.\" modification, are permitted provided that the following conditions
|
|
|
|
.\" are met:
|
|
|
|
.\" 1. Redistributions of source code must retain the above copyright
|
|
|
|
.\" notice, this list of conditions and the following disclaimer.
|
|
|
|
.\" 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
.\" notice, this list of conditions and the following disclaimer in the
|
|
|
|
.\" documentation and/or other materials provided with the distribution.
|
|
|
|
.\"
|
|
|
|
.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
|
|
|
.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
|
|
|
.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
|
|
.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
|
|
|
.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
.\" POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
.\"
|
2019-04-07 17:13:03 +03:00
|
|
|
.Dd April 7, 2019
|
2018-11-10 12:28:56 +03:00
|
|
|
.Dt LIBNVMM 3
|
|
|
|
.Os
|
|
|
|
.Sh NAME
|
|
|
|
.Nm libnvmm
|
|
|
|
.Nd NetBSD Virtualization API
|
|
|
|
.Sh LIBRARY
|
|
|
|
.Lb libnvmm
|
|
|
|
.Sh SYNOPSIS
|
|
|
|
.In nvmm.h
|
|
|
|
.Ft int
|
|
|
|
.Fn nvmm_capability "struct nvmm_capability *cap"
|
|
|
|
.Ft int
|
|
|
|
.Fn nvmm_machine_create "struct nvmm_machine *mach"
|
|
|
|
.Ft int
|
|
|
|
.Fn nvmm_machine_destroy "struct nvmm_machine *mach"
|
|
|
|
.Ft int
|
|
|
|
.Fn nvmm_machine_configure "struct nvmm_machine *mach" "uint64_t op" \
|
|
|
|
"void *conf"
|
|
|
|
.Ft int
|
|
|
|
.Fn nvmm_vcpu_create "struct nvmm_machine *mach" "nvmm_cpuid_t cpuid"
|
|
|
|
.Ft int
|
|
|
|
.Fn nvmm_vcpu_destroy "struct nvmm_machine *mach" "nvmm_cpuid_t cpuid"
|
|
|
|
.Ft int
|
|
|
|
.Fn nvmm_vcpu_getstate "struct nvmm_machine *mach" "nvmm_cpuid_t cpuid" \
|
|
|
|
"void *state" "uint64_t flags"
|
|
|
|
.Ft int
|
|
|
|
.Fn nvmm_vcpu_setstate "struct nvmm_machine *mach" "nvmm_cpuid_t cpuid" \
|
|
|
|
"void *state" "uint64_t flags"
|
|
|
|
.Ft int
|
|
|
|
.Fn nvmm_vcpu_inject "struct nvmm_machine *mach" "nvmm_cpuid_t cpuid" \
|
|
|
|
"struct nvmm_event *event"
|
|
|
|
.Ft int
|
|
|
|
.Fn nvmm_vcpu_run "struct nvmm_machine *mach" "nvmm_cpuid_t cpuid" \
|
|
|
|
"struct nvmm_exit *exit"
|
|
|
|
.Ft int
|
2018-12-15 16:39:43 +03:00
|
|
|
.Fn nvmm_hva_map "struct nvmm_machine *mach" "uintptr_t hva" "size_t size"
|
|
|
|
.Ft int
|
|
|
|
.Fn nvmm_hva_unmap "struct nvmm_machine *mach" "uintptr_t hva" "size_t size"
|
|
|
|
.Ft int
|
2018-11-10 12:28:56 +03:00
|
|
|
.Fn nvmm_gpa_map "struct nvmm_machine *mach" "uintptr_t hva" "gpaddr_t gpa" \
|
2019-03-21 23:21:40 +03:00
|
|
|
"size_t size" "int prot"
|
2018-11-10 12:28:56 +03:00
|
|
|
.Ft int
|
|
|
|
.Fn nvmm_gpa_unmap "struct nvmm_machine *mach" "uintptr_t hva" "gpaddr_t gpa" \
|
|
|
|
"size_t size"
|
|
|
|
.Ft int
|
|
|
|
.Fn nvmm_gva_to_gpa "struct nvmm_machine *mach" "nvmm_cpuid_t cpuid" \
|
|
|
|
"gvaddr_t gva" "gpaddr_t *gpa" "nvmm_prot_t *prot"
|
|
|
|
.Ft int
|
|
|
|
.Fn nvmm_gpa_to_hva "struct nvmm_machine *mach" "gpaddr_t gpa" \
|
2019-04-04 20:33:47 +03:00
|
|
|
"uintptr_t *hva" "nvmm_prot_t *prot"
|
2018-12-27 10:22:31 +03:00
|
|
|
.Ft void
|
|
|
|
.Fn nvmm_callbacks_register "const struct nvmm_callbacks *cbs"
|
2018-11-10 12:28:56 +03:00
|
|
|
.Ft int
|
|
|
|
.Fn nvmm_assist_io "struct nvmm_machine *mach" "nvmm_cpuid_t cpuid" \
|
2018-12-27 10:22:31 +03:00
|
|
|
"struct nvmm_exit *exit"
|
2018-11-10 12:28:56 +03:00
|
|
|
.Ft int
|
|
|
|
.Fn nvmm_assist_mem "struct nvmm_machine *mach" "nvmm_cpuid_t cpuid" \
|
2018-12-27 10:22:31 +03:00
|
|
|
"struct nvmm_exit *exit"
|
2018-11-10 12:28:56 +03:00
|
|
|
.Sh DESCRIPTION
|
|
|
|
.Nm
|
|
|
|
provides a library for VMM software to handle hardware-accelerated virtual
|
|
|
|
machines in
|
|
|
|
.Nx .
|
|
|
|
A virtual machine is described by an opaque structure,
|
|
|
|
.Cd nvmm_machine .
|
|
|
|
VMM software should not attempt to modify this structure directly, and should
|
|
|
|
use the API provided by
|
|
|
|
.Nm
|
2019-02-05 16:56:32 +03:00
|
|
|
to manage virtual machines.
|
2018-11-10 12:28:56 +03:00
|
|
|
.Pp
|
|
|
|
.Fn nvmm_capability
|
|
|
|
gets the capabilities of NVMM.
|
2019-02-05 16:56:32 +03:00
|
|
|
See
|
|
|
|
.Sx NVMM Capability
|
|
|
|
below for details.
|
2018-11-10 12:28:56 +03:00
|
|
|
.Pp
|
|
|
|
.Fn nvmm_machine_create
|
|
|
|
creates a virtual machine in the kernel.
|
|
|
|
The
|
|
|
|
.Fa mach
|
|
|
|
structure is initialized, and describes the machine.
|
|
|
|
.Pp
|
|
|
|
.Fn nvmm_machine_destroy
|
|
|
|
destroys the virtual machine described in
|
|
|
|
.Fa mach .
|
|
|
|
.Pp
|
|
|
|
.Fn nvmm_machine_configure
|
|
|
|
configures, on the machine
|
|
|
|
.Fa mach ,
|
|
|
|
the parameter indicated in
|
|
|
|
.Fa op .
|
|
|
|
.Fa conf
|
|
|
|
describes the value of the parameter.
|
|
|
|
.Pp
|
|
|
|
.Fn nvmm_vcpu_create
|
|
|
|
creates a virtual CPU in the machine
|
|
|
|
.Fa mach ,
|
|
|
|
giving it the CPU id
|
|
|
|
.Fa cpuid .
|
|
|
|
.Pp
|
|
|
|
.Fn nvmm_vcpu_destroy
|
|
|
|
destroys the virtual CPU identified by
|
|
|
|
.Fa cpuid
|
|
|
|
in the machine
|
|
|
|
.Fa mach .
|
|
|
|
.Pp
|
|
|
|
.Fn nvmm_vcpu_getstate
|
|
|
|
gets the state of the virtual CPU identified by
|
|
|
|
.Fa cpuid
|
|
|
|
in the machine
|
|
|
|
.Fa mach .
|
|
|
|
The
|
|
|
|
.Fa state
|
|
|
|
argument is the address of a state area, and
|
|
|
|
.Fa flags
|
|
|
|
is the bitmap of the components that are to be retrieved.
|
|
|
|
See
|
|
|
|
.Sx VCPU State Area
|
|
|
|
below for details.
|
|
|
|
.Pp
|
|
|
|
.Fn nvmm_vcpu_setstate
|
|
|
|
sets the state of the virtual CPU identified by
|
|
|
|
.Fa cpuid
|
|
|
|
in the machine
|
|
|
|
.Fa mach .
|
|
|
|
The
|
|
|
|
.Fa state
|
|
|
|
argument is the address of a state area, and
|
|
|
|
.Fa flags
|
|
|
|
is the bitmap of the components that are to be set.
|
|
|
|
See
|
|
|
|
.Sx VCPU State Area
|
|
|
|
below for details.
|
|
|
|
.Pp
|
2019-02-05 16:56:32 +03:00
|
|
|
.Fn nvmm_vcpu_inject
|
|
|
|
injects into the CPU identified by
|
|
|
|
.Fa cpuid
|
|
|
|
of the machine
|
|
|
|
.Fa mach
|
|
|
|
an event described by
|
|
|
|
.Fa event .
|
|
|
|
See
|
|
|
|
.Sx Event Injection
|
|
|
|
below for details.
|
|
|
|
.Pp
|
2018-11-10 12:28:56 +03:00
|
|
|
.Fn nvmm_vcpu_run
|
|
|
|
runs the CPU identified by
|
|
|
|
.Fa cpuid
|
|
|
|
in the machine
|
|
|
|
.Fa mach ,
|
|
|
|
until a VM exit is triggered.
|
|
|
|
The
|
|
|
|
.Fa exit
|
|
|
|
structure is filled to indicate the exit reason, and the associated parameters
|
|
|
|
if any.
|
|
|
|
.Pp
|
2018-12-15 16:39:43 +03:00
|
|
|
.Fn nvmm_hva_map
|
|
|
|
maps at address
|
|
|
|
.Fa hva
|
|
|
|
a buffer of size
|
|
|
|
.Fa size
|
|
|
|
in the calling process' virtual address space.
|
|
|
|
This buffer is allowed to be subsequently mapped in a virtual machine.
|
|
|
|
.Pp
|
|
|
|
.Fn nvmm_hva_unmap
|
|
|
|
unmaps the buffer of size
|
|
|
|
.Fa size
|
|
|
|
at address
|
|
|
|
.Fa hva
|
|
|
|
from the calling process' virtual address space.
|
|
|
|
.Pp
|
2018-11-10 12:28:56 +03:00
|
|
|
.Fn nvmm_gpa_map
|
2018-12-15 16:39:43 +03:00
|
|
|
maps into the guest physical memory beginning on address
|
2018-11-10 12:28:56 +03:00
|
|
|
.Fa gpa
|
2018-12-15 16:39:43 +03:00
|
|
|
the buffer of size
|
2018-11-10 12:28:56 +03:00
|
|
|
.Fa size
|
2018-12-15 16:39:43 +03:00
|
|
|
located at address
|
|
|
|
.Fa hva
|
|
|
|
of the calling process' virtual address space.
|
|
|
|
The
|
|
|
|
.Fa hva
|
|
|
|
parameter must point to a buffer that was previously mapped with
|
|
|
|
.Fn nvmm_hva_map .
|
2018-11-10 12:28:56 +03:00
|
|
|
.Pp
|
|
|
|
.Fn nvmm_gpa_unmap
|
|
|
|
removes the guest physical memory area beginning on address
|
|
|
|
.Fa gpa
|
|
|
|
and of size
|
|
|
|
.Fa size
|
|
|
|
from the machine
|
|
|
|
.Fa mach .
|
|
|
|
.Pp
|
|
|
|
.Fn nvmm_gva_to_gpa
|
|
|
|
translates, on the CPU
|
|
|
|
.Fa cpuid
|
|
|
|
from the machine
|
|
|
|
.Fa mach ,
|
|
|
|
the guest virtual address given in
|
|
|
|
.Fa gva
|
|
|
|
into a guest physical address returned in
|
|
|
|
.Fa gpa .
|
|
|
|
The associated page premissions are returned in
|
|
|
|
.Fa prot .
|
|
|
|
.Fa gva
|
|
|
|
must be page-aligned.
|
|
|
|
.Pp
|
|
|
|
.Fn nvmm_gpa_to_hva
|
|
|
|
translates, on the machine
|
|
|
|
.Fa mach ,
|
|
|
|
the guest physical address indicated in
|
|
|
|
.Fa gpa
|
|
|
|
into a host virtual address returned in
|
|
|
|
.Fa hva .
|
2019-04-04 20:33:47 +03:00
|
|
|
The associated page premissions are returned in
|
|
|
|
.Fa prot .
|
2018-11-10 12:28:56 +03:00
|
|
|
.Fa gpa
|
|
|
|
must be page-aligned.
|
|
|
|
.Pp
|
2018-12-27 10:22:31 +03:00
|
|
|
.Fn nvmm_callbacks_register
|
|
|
|
registers in
|
|
|
|
.Nm
|
|
|
|
the callbacks descriptor passed as argument.
|
|
|
|
.Pp
|
2018-11-10 12:28:56 +03:00
|
|
|
.Fn nvmm_assist_io
|
|
|
|
emulates the I/O operation described in
|
|
|
|
.Fa exit
|
|
|
|
on CPU
|
|
|
|
.Fa cpuid
|
|
|
|
from machine
|
|
|
|
.Fa mach .
|
|
|
|
See
|
|
|
|
.Sx I/O Assist
|
|
|
|
below for details.
|
|
|
|
.Pp
|
|
|
|
.Fn nvmm_assist_mem
|
|
|
|
emulates the Mem operation described in
|
|
|
|
.Fa exit
|
|
|
|
on CPU
|
|
|
|
.Fa cpuid
|
|
|
|
from machine
|
|
|
|
.Fa mach .
|
|
|
|
See
|
|
|
|
.Sx Mem Assist
|
|
|
|
below for details.
|
|
|
|
.Ss NVMM Capability
|
|
|
|
The
|
|
|
|
.Cd nvmm_capability
|
|
|
|
structure helps VMM software identify the capabilities offered by NVMM on the
|
|
|
|
host:
|
|
|
|
.Bd -literal
|
|
|
|
struct nvmm_capability {
|
|
|
|
uint64_t version;
|
|
|
|
uint64_t state_size;
|
|
|
|
uint64_t max_machines;
|
|
|
|
uint64_t max_vcpus;
|
|
|
|
uint64_t max_ram;
|
|
|
|
union {
|
|
|
|
struct {
|
|
|
|
...
|
|
|
|
} x86;
|
|
|
|
uint64_t rsvd[8];
|
|
|
|
} u;
|
|
|
|
};
|
|
|
|
.Ed
|
|
|
|
.Pp
|
|
|
|
For example, the
|
|
|
|
.Cd max_machines
|
|
|
|
field indicates the maximum number of virtual machines supported, while
|
|
|
|
.Cd max_vcpus
|
|
|
|
indicates the maximum number of VCPUs supported per virtual machine.
|
2019-02-05 16:56:32 +03:00
|
|
|
.Ss Guest-Host Mappings
|
|
|
|
Each virtual machine has an associated guest physical memory.
|
|
|
|
VMM software is allowed to modify this guest physical memory by mapping
|
|
|
|
it into some parts of its virtual address space.
|
|
|
|
.Pp
|
|
|
|
VMM software should follow the following steps to achieve that:
|
|
|
|
.Pp
|
|
|
|
.Bl -bullet -offset indent -compact
|
|
|
|
.It
|
|
|
|
Call
|
|
|
|
.Fn nvmm_hva_map
|
|
|
|
to create in the host's virtual address space an area of memory that can
|
|
|
|
be shared with a guest.
|
|
|
|
Typically, the
|
|
|
|
.Fa hva
|
|
|
|
parameter will be a pointer to an area that was previously mapped via
|
|
|
|
.Fn mmap .
|
|
|
|
.Fn nvmm_hva_map
|
|
|
|
will replace the content of the area, and will make it read-write (but not
|
|
|
|
executable).
|
|
|
|
.It
|
|
|
|
Make available in the guest an area of guest physical memory, by calling
|
|
|
|
.Fn nvmm_gpa_map
|
|
|
|
and passing in the
|
|
|
|
.Fa hva
|
|
|
|
parameter the value that was previously given to
|
|
|
|
.Fn nvmm_hva_map .
|
|
|
|
.Fn nvmm_gpa_map
|
|
|
|
does not replace the content of any memory, it only creates a direct link
|
|
|
|
from
|
|
|
|
.Fa gpa
|
|
|
|
into
|
|
|
|
.Fa hva .
|
|
|
|
.Fn nvmm_gpa_unmap
|
|
|
|
removes this link without modifying
|
|
|
|
.Fa hva .
|
|
|
|
.El
|
|
|
|
.Pp
|
|
|
|
The guest will then be able to use the guest physical address passed in the
|
|
|
|
.Fa gpa
|
|
|
|
parameter of
|
|
|
|
.Fn nvmm_gpa_map .
|
|
|
|
Each change the guest makes in
|
|
|
|
.Fa gpa
|
|
|
|
will be reflected in the host's
|
|
|
|
.Fa hva ,
|
|
|
|
and vice versa.
|
|
|
|
.Pp
|
|
|
|
It is illegal for VMM software to use
|
|
|
|
.Fn munmap
|
|
|
|
on an area that was mapped via
|
|
|
|
.Fn nvmm_hva_map .
|
2018-11-10 12:28:56 +03:00
|
|
|
.Ss VCPU State Area
|
|
|
|
A VCPU state area is a structure that entirely defines the content of the
|
|
|
|
registers of a VCPU.
|
|
|
|
Only one such structure exists, for x86:
|
|
|
|
.Bd -literal
|
|
|
|
struct nvmm_x64_state {
|
2019-02-05 16:56:32 +03:00
|
|
|
struct nvmm_x64_state_seg segs[NVMM_X64_NSEG];
|
|
|
|
uint64_t gprs[NVMM_X64_NGPR];
|
|
|
|
uint64_t crs[NVMM_X64_NCR];
|
|
|
|
uint64_t drs[NVMM_X64_NDR];
|
|
|
|
uint64_t msrs[NVMM_X64_NMSR];
|
2019-04-07 17:13:03 +03:00
|
|
|
struct nvmm_x64_state_intr intr;
|
2019-02-05 16:56:32 +03:00
|
|
|
struct fxsave fpu;
|
2018-11-10 12:28:56 +03:00
|
|
|
};
|
|
|
|
.Ed
|
|
|
|
.Pp
|
|
|
|
Refer to functional examples to see precisely how to use this structure.
|
2019-02-05 16:56:32 +03:00
|
|
|
.Pp
|
|
|
|
A VCPU state area is divided in sub-states.
|
|
|
|
A
|
|
|
|
.Fa flags
|
|
|
|
parameter is used to set and get the VCPU state; it acts as a bitmap which
|
|
|
|
indicates which sub-states to set or get.
|
|
|
|
.Pp
|
|
|
|
During VM exits, a partial VCPU state area is provided in
|
|
|
|
.Va exitstate ,
|
|
|
|
see
|
|
|
|
.Sx Exit Reasons
|
|
|
|
below for details.
|
2018-11-10 12:28:56 +03:00
|
|
|
.Ss Exit Reasons
|
|
|
|
The
|
|
|
|
.Cd nvmm_exit
|
|
|
|
structure is used to handle VM exits:
|
|
|
|
.Bd -literal
|
|
|
|
enum nvmm_exit_reason {
|
|
|
|
NVMM_EXIT_NONE = 0x0000000000000000,
|
|
|
|
|
|
|
|
/* General. */
|
|
|
|
NVMM_EXIT_MEMORY = 0x0000000000000001,
|
|
|
|
NVMM_EXIT_IO = 0x0000000000000002,
|
|
|
|
NVMM_EXIT_MSR = 0x0000000000000003,
|
|
|
|
NVMM_EXIT_INT_READY = 0x0000000000000004,
|
|
|
|
NVMM_EXIT_NMI_READY = 0x0000000000000005,
|
2019-02-05 16:56:32 +03:00
|
|
|
NVMM_EXIT_HALTED = 0x0000000000000006,
|
|
|
|
NVMM_EXIT_SHUTDOWN = 0x0000000000000007,
|
2018-11-10 12:28:56 +03:00
|
|
|
|
|
|
|
/* Instructions (x86). */
|
|
|
|
...
|
|
|
|
|
|
|
|
NVMM_EXIT_INVALID = 0xFFFFFFFFFFFFFFFF
|
|
|
|
};
|
|
|
|
|
|
|
|
struct nvmm_exit {
|
|
|
|
enum nvmm_exit_reason reason;
|
|
|
|
union {
|
|
|
|
...
|
|
|
|
} u;
|
|
|
|
uint64_t exitstate[8];
|
|
|
|
};
|
|
|
|
.Ed
|
|
|
|
.Pp
|
|
|
|
The
|
|
|
|
.Va reason
|
|
|
|
field indicates the reason of the VM exit.
|
|
|
|
Additional parameters describing the exit can be present in
|
|
|
|
.Va u .
|
|
|
|
.Va exitstate
|
|
|
|
contains a partial, implementation-specific VCPU state, usable as a fast-path
|
|
|
|
to retrieve certain state values.
|
|
|
|
.Pp
|
|
|
|
It is possible that a VM exit was caused by a reason internal to the host
|
|
|
|
kernel, and that VMM software should not be concerned with.
|
|
|
|
In this case, the exit reason is set to
|
|
|
|
.Cd NVMM_EXIT_NONE .
|
|
|
|
This gives a chance for VMM software to halt the VM in its tracks.
|
|
|
|
.Pp
|
|
|
|
Refer to functional examples to see precisely how to handle VM exits.
|
|
|
|
.Ss Event Injection
|
|
|
|
It is possible to inject an event into a VCPU.
|
|
|
|
An event can be a hardware interrupt, a software interrupt, or a software
|
|
|
|
exception, defined by:
|
|
|
|
.Bd -literal
|
|
|
|
enum nvmm_event_type {
|
|
|
|
NVMM_EVENT_INTERRUPT_HW,
|
|
|
|
NVMM_EVENT_INTERRUPT_SW,
|
|
|
|
NVMM_EVENT_EXCEPTION
|
|
|
|
};
|
|
|
|
|
|
|
|
struct nvmm_event {
|
|
|
|
enum nvmm_event_type type;
|
|
|
|
uint64_t vector;
|
|
|
|
union {
|
|
|
|
uint64_t error;
|
|
|
|
uint64_t prio;
|
|
|
|
} u;
|
|
|
|
};
|
|
|
|
.Ed
|
|
|
|
.Pp
|
|
|
|
This describes an event of type
|
|
|
|
.Va type ,
|
|
|
|
to be sent to vector number
|
|
|
|
.Va vector ,
|
|
|
|
with a possible additional
|
|
|
|
.Va error
|
|
|
|
or
|
|
|
|
.Va prio
|
|
|
|
code that is implementation-specific.
|
|
|
|
.Pp
|
|
|
|
It is possible that the VCPU is in a state where it cannot receive this
|
|
|
|
event, if:
|
|
|
|
.Pp
|
|
|
|
.Bl -bullet -offset indent -compact
|
|
|
|
.It
|
|
|
|
the event is a hardware interrupt, and the VCPU runs with interrupts disabled,
|
|
|
|
or
|
|
|
|
.It
|
2019-04-07 17:13:03 +03:00
|
|
|
the event is a non-maskable interrupt (NMI), and the VCPU is already in an
|
2018-11-10 12:28:56 +03:00
|
|
|
in-NMI context.
|
|
|
|
.El
|
|
|
|
.Pp
|
|
|
|
In this case,
|
|
|
|
.Fn nvmm_vcpu_inject
|
|
|
|
will return
|
|
|
|
.Er EAGAIN ,
|
|
|
|
and NVMM will cause a VM exit with reason
|
|
|
|
.Cd NVMM_EXIT_INT_READY
|
|
|
|
or
|
|
|
|
.Cd NVMM_EXIT_NMI_READY
|
|
|
|
to indicate that VMM software can now reinject the desired event.
|
2019-02-05 16:56:32 +03:00
|
|
|
.Ss Assist Callbacks
|
|
|
|
In order to assist emulation of certain operations,
|
|
|
|
.Nm
|
|
|
|
requires VMM software to register, via
|
|
|
|
.Fn nvmm_callbacks_register ,
|
|
|
|
a set of callbacks described in the following structure:
|
|
|
|
.Bd -literal
|
|
|
|
struct nvmm_callbacks {
|
|
|
|
void (*io)(struct nvmm_io *);
|
|
|
|
void (*mem)(struct nvmm_mem *);
|
|
|
|
};
|
|
|
|
.Ed
|
|
|
|
.Pp
|
|
|
|
These callbacks are used by
|
|
|
|
.Nm
|
|
|
|
each time
|
|
|
|
.Fn nvmm_assist_io
|
|
|
|
or
|
|
|
|
.Fn nvmm_assist_mem
|
|
|
|
are invoked.
|
|
|
|
VMM software that does not intend to use either of these assists can put
|
2019-02-05 18:03:35 +03:00
|
|
|
.Dv NULL
|
|
|
|
in the callbacks.
|
2018-11-10 12:28:56 +03:00
|
|
|
.Ss I/O Assist
|
|
|
|
When a VM exit occurs with reason
|
|
|
|
.Cd NVMM_EXIT_IO ,
|
|
|
|
it is necessary for VMM software to emulate the associated I/O operation.
|
|
|
|
.Nm
|
|
|
|
provides an easy way for VMM software to perform that.
|
|
|
|
.Pp
|
|
|
|
.Fn nvmm_assist_io
|
2018-12-27 10:22:31 +03:00
|
|
|
will call the registered
|
|
|
|
.Fa io
|
2018-11-10 12:28:56 +03:00
|
|
|
callback function and give it a
|
|
|
|
.Cd nvmm_io
|
|
|
|
structure as argument.
|
|
|
|
This structure describes an I/O transaction:
|
|
|
|
.Bd -literal
|
|
|
|
struct nvmm_io {
|
|
|
|
uint64_t port;
|
|
|
|
bool in;
|
|
|
|
size_t size;
|
Improvements and fixes in NVMM.
Kernel driver:
* Don't take an extra (unneeded) reference to the UAO.
* Provide npc for HLT. I'm not really happy with it right now, will
likely be revisited.
* Add the INT_SHADOW, INT_WINDOW_EXIT and NMI_WINDOW_EXIT states. Provide
them in the exitstate too.
* Don't take the TPR into account when processing INTs. The virtualizer
can do that itself (Qemu already does).
* Provide a hypervisor signature in CPUID, and hide SVM.
* Ignore certain MSRs. One special case is MSR_NB_CFG in which we set
NB_CFG_INITAPICCPUIDLO. Allow reads of MSR_TSC.
* If the LWP has pending signals or softints, leave, rather than waiting
for a rescheduling to happen later. This reduces interrupt processing
time in the guest (Qemu sends a signal to the thread, and now we leave
right away). This could be improved even more by sending an actual IPI
to the CPU, but I'll see later.
Libnvmm:
* Fix the MMU translation of large pages, we need to add the lower bits
too.
* Change the IO and Mem structures to take a pointer rather than a
static array. This provides more flexibility.
* Batch together the str+rep IO transactions. We do one big memory
read/write, and then send the IO commands to the hypervisor all at
once. This considerably increases performance.
* Decode MOVZX.
With these changes in place, Qemu+NVMM works. I can install NetBSD 8.0
in a VM with multiple VCPUs, connect to the network, etc.
2019-01-06 19:10:51 +03:00
|
|
|
uint8_t *data;
|
2018-11-10 12:28:56 +03:00
|
|
|
};
|
|
|
|
.Ed
|
|
|
|
.Pp
|
|
|
|
The callback can emulate the operation using this descriptor, following two
|
|
|
|
unique cases:
|
|
|
|
.Pp
|
|
|
|
.Bl -bullet -offset indent -compact
|
|
|
|
.It
|
|
|
|
The operation is an input.
|
|
|
|
In this case, the callback should fill
|
|
|
|
.Va data
|
|
|
|
with the desired value.
|
|
|
|
.It
|
|
|
|
The operation is an output.
|
|
|
|
In this case, the callback should read
|
|
|
|
.Va data
|
|
|
|
to retrieve the desired value.
|
|
|
|
.El
|
|
|
|
.Pp
|
|
|
|
In either case,
|
|
|
|
.Va port
|
|
|
|
will indicate the I/O port,
|
|
|
|
.Va in
|
|
|
|
will indicate if the operation is an input, and
|
|
|
|
.Va size
|
|
|
|
will indicate the size of the access.
|
|
|
|
.Ss Mem Assist
|
|
|
|
When a VM exit occurs with reason
|
|
|
|
.Cd NVMM_EXIT_MEMORY ,
|
|
|
|
it is necessary for VMM software to emulate the associated memory operation.
|
|
|
|
.Nm
|
|
|
|
provides an easy way for VMM software to perform that, similar to the I/O
|
|
|
|
Assist.
|
|
|
|
.Pp
|
|
|
|
.Fn nvmm_assist_mem
|
2018-12-27 10:22:31 +03:00
|
|
|
will call the registered
|
|
|
|
.Fa mem
|
2018-11-10 12:28:56 +03:00
|
|
|
callback function and give it a
|
|
|
|
.Cd nvmm_mem
|
|
|
|
structure as argument.
|
|
|
|
This structure describes a Mem transaction:
|
|
|
|
.Bd -literal
|
|
|
|
struct nvmm_mem {
|
|
|
|
gpaddr_t gpa;
|
|
|
|
bool write;
|
|
|
|
size_t size;
|
Improvements and fixes in NVMM.
Kernel driver:
* Don't take an extra (unneeded) reference to the UAO.
* Provide npc for HLT. I'm not really happy with it right now, will
likely be revisited.
* Add the INT_SHADOW, INT_WINDOW_EXIT and NMI_WINDOW_EXIT states. Provide
them in the exitstate too.
* Don't take the TPR into account when processing INTs. The virtualizer
can do that itself (Qemu already does).
* Provide a hypervisor signature in CPUID, and hide SVM.
* Ignore certain MSRs. One special case is MSR_NB_CFG in which we set
NB_CFG_INITAPICCPUIDLO. Allow reads of MSR_TSC.
* If the LWP has pending signals or softints, leave, rather than waiting
for a rescheduling to happen later. This reduces interrupt processing
time in the guest (Qemu sends a signal to the thread, and now we leave
right away). This could be improved even more by sending an actual IPI
to the CPU, but I'll see later.
Libnvmm:
* Fix the MMU translation of large pages, we need to add the lower bits
too.
* Change the IO and Mem structures to take a pointer rather than a
static array. This provides more flexibility.
* Batch together the str+rep IO transactions. We do one big memory
read/write, and then send the IO commands to the hypervisor all at
once. This considerably increases performance.
* Decode MOVZX.
With these changes in place, Qemu+NVMM works. I can install NetBSD 8.0
in a VM with multiple VCPUs, connect to the network, etc.
2019-01-06 19:10:51 +03:00
|
|
|
uint8_t *data;
|
2018-11-10 12:28:56 +03:00
|
|
|
};
|
|
|
|
.Ed
|
|
|
|
.Pp
|
|
|
|
The callback can emulate the operation using this descriptor, following two
|
|
|
|
unique cases:
|
|
|
|
.Pp
|
|
|
|
.Bl -bullet -offset indent -compact
|
|
|
|
.It
|
|
|
|
The operation is a read.
|
|
|
|
In this case, the callback should fill
|
|
|
|
.Va data
|
|
|
|
with the desired value.
|
|
|
|
.It
|
|
|
|
The operation is a write.
|
|
|
|
In this case, the callback should read
|
|
|
|
.Va data
|
|
|
|
to retrieve the desired value.
|
|
|
|
.El
|
|
|
|
.Pp
|
|
|
|
In either case,
|
|
|
|
.Va gpa
|
|
|
|
will indicate the guest physical address,
|
|
|
|
.Va write
|
|
|
|
will indicate if the access is a write, and
|
|
|
|
.Va size
|
|
|
|
will indicate the size of the access.
|
|
|
|
.Sh RETURN VALUES
|
|
|
|
Upon successful completion, each of these functions returns zero.
|
|
|
|
Otherwise, a value of \-1 is returned and the global
|
|
|
|
variable
|
|
|
|
.Va errno
|
|
|
|
is set to indicate the error.
|
|
|
|
.Sh FILES
|
|
|
|
.Bl -tag -width XXXX -compact
|
2018-12-12 12:09:08 +03:00
|
|
|
.It Lk https://www.netbsd.org/~maxv/nvmm/nvmm-demo.zip
|
|
|
|
Functional example (demonstrator).
|
|
|
|
Contains a virtualizer that uses the
|
|
|
|
.Nm
|
|
|
|
API, and a small kernel that exercises this virtualizer.
|
|
|
|
.It Pa src/sys/dev/nvmm/
|
|
|
|
Source code of the kernel NVMM driver.
|
|
|
|
.It Pa src/lib/libnvmm/
|
|
|
|
Source code of the
|
|
|
|
.Nm
|
|
|
|
library.
|
2018-11-10 12:28:56 +03:00
|
|
|
.El
|
|
|
|
.Sh ERRORS
|
|
|
|
These functions will fail if:
|
|
|
|
.Bl -tag -width [ENOBUFS]
|
|
|
|
.It Bq Er EEXIST
|
|
|
|
An attempt was made to create a machine or a VCPU that already exists.
|
|
|
|
.It Bq Er EFAULT
|
|
|
|
An attempt was made to emulate a memory-based operation in a guest, and the
|
|
|
|
guest page tables did not have the permissions necessary for the operation
|
|
|
|
to complete successfully.
|
|
|
|
.It Bq Er EINVAL
|
|
|
|
An inappropriate parameter was used.
|
|
|
|
.It Bq Er ENOBUFS
|
|
|
|
The maximum number of machines or VCPUs was reached.
|
|
|
|
.It Bq Er ENOENT
|
|
|
|
A query was made on a machine or a VCPU that does not exist.
|
|
|
|
.It Bq Er EPERM
|
|
|
|
An attempt was made to access a machine that does not belong to the process.
|
|
|
|
.El
|
|
|
|
.Pp
|
|
|
|
In addition,
|
|
|
|
.Fn nvmm_vcpu_inject
|
|
|
|
uses the following error codes:
|
|
|
|
.Bl -tag -width [ENOBUFS]
|
|
|
|
.It Bq Er EAGAIN
|
|
|
|
The VCPU cannot receive the event immediately.
|
|
|
|
.El
|
2018-12-12 12:09:08 +03:00
|
|
|
.Sh SEE ALSO
|
2018-12-12 14:40:08 +03:00
|
|
|
.Xr nvmm 4
|
2018-11-10 12:28:56 +03:00
|
|
|
.Sh AUTHORS
|
|
|
|
NVMM was designed and implemented by
|
|
|
|
.An Maxime Villard .
|