195801d700
The Big QEMU Lock (BQL) has many names and they are confusing. The actual QemuMutex variable is called qemu_global_mutex but it's commonly referred to as the BQL in discussions and some code comments. The locking APIs, however, are called qemu_mutex_lock_iothread() and qemu_mutex_unlock_iothread(). The "iothread" name is historic and comes from when the main thread was split into into KVM vcpu threads and the "iothread" (now called the main loop thread). I have contributed to the confusion myself by introducing a separate --object iothread, a separate concept unrelated to the BQL. The "iothread" name is no longer appropriate for the BQL. Rename the locking APIs to: - void bql_lock(void) - void bql_unlock(void) - bool bql_locked(void) There are more APIs with "iothread" in their names. Subsequent patches will rename them. There are also comments and documentation that will be updated in later patches. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paul Durrant <paul@xen.org> Acked-by: Fabiano Rosas <farosas@suse.de> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Cédric Le Goater <clg@kaod.org> Acked-by: Peter Xu <peterx@redhat.com> Acked-by: Eric Farman <farman@linux.ibm.com> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com> Acked-by: Hyman Huang <yong.huang@smartx.com> Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com> Message-id: 20240102153529.486531-2-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
1225 lines
34 KiB
C
1225 lines
34 KiB
C
/*
|
|
* Copyright (c) 2018-2019 Maxime Villard, All rights reserved.
|
|
*
|
|
* NetBSD Virtual Machine Monitor (NVMM) accelerator for QEMU.
|
|
*
|
|
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
|
* See the COPYING file in the top-level directory.
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "cpu.h"
|
|
#include "exec/address-spaces.h"
|
|
#include "exec/ioport.h"
|
|
#include "qemu/accel.h"
|
|
#include "sysemu/nvmm.h"
|
|
#include "sysemu/cpus.h"
|
|
#include "sysemu/runstate.h"
|
|
#include "qemu/main-loop.h"
|
|
#include "qemu/error-report.h"
|
|
#include "qapi/error.h"
|
|
#include "qemu/queue.h"
|
|
#include "migration/blocker.h"
|
|
#include "strings.h"
|
|
|
|
#include "nvmm-accel-ops.h"
|
|
|
|
#include <nvmm.h>
|
|
|
|
struct AccelCPUState {
|
|
struct nvmm_vcpu vcpu;
|
|
uint8_t tpr;
|
|
bool stop;
|
|
|
|
/* Window-exiting for INTs/NMIs. */
|
|
bool int_window_exit;
|
|
bool nmi_window_exit;
|
|
|
|
/* The guest is in an interrupt shadow (POP SS, etc). */
|
|
bool int_shadow;
|
|
};
|
|
|
|
struct qemu_machine {
|
|
struct nvmm_capability cap;
|
|
struct nvmm_machine mach;
|
|
};
|
|
|
|
/* -------------------------------------------------------------------------- */
|
|
|
|
static bool nvmm_allowed;
|
|
static struct qemu_machine qemu_mach;
|
|
|
|
static struct nvmm_machine *
|
|
get_nvmm_mach(void)
|
|
{
|
|
return &qemu_mach.mach;
|
|
}
|
|
|
|
/* -------------------------------------------------------------------------- */
|
|
|
|
static void
|
|
nvmm_set_segment(struct nvmm_x64_state_seg *nseg, const SegmentCache *qseg)
|
|
{
|
|
uint32_t attrib = qseg->flags;
|
|
|
|
nseg->selector = qseg->selector;
|
|
nseg->limit = qseg->limit;
|
|
nseg->base = qseg->base;
|
|
nseg->attrib.type = __SHIFTOUT(attrib, DESC_TYPE_MASK);
|
|
nseg->attrib.s = __SHIFTOUT(attrib, DESC_S_MASK);
|
|
nseg->attrib.dpl = __SHIFTOUT(attrib, DESC_DPL_MASK);
|
|
nseg->attrib.p = __SHIFTOUT(attrib, DESC_P_MASK);
|
|
nseg->attrib.avl = __SHIFTOUT(attrib, DESC_AVL_MASK);
|
|
nseg->attrib.l = __SHIFTOUT(attrib, DESC_L_MASK);
|
|
nseg->attrib.def = __SHIFTOUT(attrib, DESC_B_MASK);
|
|
nseg->attrib.g = __SHIFTOUT(attrib, DESC_G_MASK);
|
|
}
|
|
|
|
static void
|
|
nvmm_set_registers(CPUState *cpu)
|
|
{
|
|
CPUX86State *env = cpu_env(cpu);
|
|
struct nvmm_machine *mach = get_nvmm_mach();
|
|
AccelCPUState *qcpu = cpu->accel;
|
|
struct nvmm_vcpu *vcpu = &qcpu->vcpu;
|
|
struct nvmm_x64_state *state = vcpu->state;
|
|
uint64_t bitmap;
|
|
size_t i;
|
|
int ret;
|
|
|
|
assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
|
|
|
|
/* GPRs. */
|
|
state->gprs[NVMM_X64_GPR_RAX] = env->regs[R_EAX];
|
|
state->gprs[NVMM_X64_GPR_RCX] = env->regs[R_ECX];
|
|
state->gprs[NVMM_X64_GPR_RDX] = env->regs[R_EDX];
|
|
state->gprs[NVMM_X64_GPR_RBX] = env->regs[R_EBX];
|
|
state->gprs[NVMM_X64_GPR_RSP] = env->regs[R_ESP];
|
|
state->gprs[NVMM_X64_GPR_RBP] = env->regs[R_EBP];
|
|
state->gprs[NVMM_X64_GPR_RSI] = env->regs[R_ESI];
|
|
state->gprs[NVMM_X64_GPR_RDI] = env->regs[R_EDI];
|
|
#ifdef TARGET_X86_64
|
|
state->gprs[NVMM_X64_GPR_R8] = env->regs[R_R8];
|
|
state->gprs[NVMM_X64_GPR_R9] = env->regs[R_R9];
|
|
state->gprs[NVMM_X64_GPR_R10] = env->regs[R_R10];
|
|
state->gprs[NVMM_X64_GPR_R11] = env->regs[R_R11];
|
|
state->gprs[NVMM_X64_GPR_R12] = env->regs[R_R12];
|
|
state->gprs[NVMM_X64_GPR_R13] = env->regs[R_R13];
|
|
state->gprs[NVMM_X64_GPR_R14] = env->regs[R_R14];
|
|
state->gprs[NVMM_X64_GPR_R15] = env->regs[R_R15];
|
|
#endif
|
|
|
|
/* RIP and RFLAGS. */
|
|
state->gprs[NVMM_X64_GPR_RIP] = env->eip;
|
|
state->gprs[NVMM_X64_GPR_RFLAGS] = env->eflags;
|
|
|
|
/* Segments. */
|
|
nvmm_set_segment(&state->segs[NVMM_X64_SEG_CS], &env->segs[R_CS]);
|
|
nvmm_set_segment(&state->segs[NVMM_X64_SEG_DS], &env->segs[R_DS]);
|
|
nvmm_set_segment(&state->segs[NVMM_X64_SEG_ES], &env->segs[R_ES]);
|
|
nvmm_set_segment(&state->segs[NVMM_X64_SEG_FS], &env->segs[R_FS]);
|
|
nvmm_set_segment(&state->segs[NVMM_X64_SEG_GS], &env->segs[R_GS]);
|
|
nvmm_set_segment(&state->segs[NVMM_X64_SEG_SS], &env->segs[R_SS]);
|
|
|
|
/* Special segments. */
|
|
nvmm_set_segment(&state->segs[NVMM_X64_SEG_GDT], &env->gdt);
|
|
nvmm_set_segment(&state->segs[NVMM_X64_SEG_LDT], &env->ldt);
|
|
nvmm_set_segment(&state->segs[NVMM_X64_SEG_TR], &env->tr);
|
|
nvmm_set_segment(&state->segs[NVMM_X64_SEG_IDT], &env->idt);
|
|
|
|
/* Control registers. */
|
|
state->crs[NVMM_X64_CR_CR0] = env->cr[0];
|
|
state->crs[NVMM_X64_CR_CR2] = env->cr[2];
|
|
state->crs[NVMM_X64_CR_CR3] = env->cr[3];
|
|
state->crs[NVMM_X64_CR_CR4] = env->cr[4];
|
|
state->crs[NVMM_X64_CR_CR8] = qcpu->tpr;
|
|
state->crs[NVMM_X64_CR_XCR0] = env->xcr0;
|
|
|
|
/* Debug registers. */
|
|
state->drs[NVMM_X64_DR_DR0] = env->dr[0];
|
|
state->drs[NVMM_X64_DR_DR1] = env->dr[1];
|
|
state->drs[NVMM_X64_DR_DR2] = env->dr[2];
|
|
state->drs[NVMM_X64_DR_DR3] = env->dr[3];
|
|
state->drs[NVMM_X64_DR_DR6] = env->dr[6];
|
|
state->drs[NVMM_X64_DR_DR7] = env->dr[7];
|
|
|
|
/* FPU. */
|
|
state->fpu.fx_cw = env->fpuc;
|
|
state->fpu.fx_sw = (env->fpus & ~0x3800) | ((env->fpstt & 0x7) << 11);
|
|
state->fpu.fx_tw = 0;
|
|
for (i = 0; i < 8; i++) {
|
|
state->fpu.fx_tw |= (!env->fptags[i]) << i;
|
|
}
|
|
state->fpu.fx_opcode = env->fpop;
|
|
state->fpu.fx_ip.fa_64 = env->fpip;
|
|
state->fpu.fx_dp.fa_64 = env->fpdp;
|
|
state->fpu.fx_mxcsr = env->mxcsr;
|
|
state->fpu.fx_mxcsr_mask = 0x0000FFFF;
|
|
assert(sizeof(state->fpu.fx_87_ac) == sizeof(env->fpregs));
|
|
memcpy(state->fpu.fx_87_ac, env->fpregs, sizeof(env->fpregs));
|
|
for (i = 0; i < CPU_NB_REGS; i++) {
|
|
memcpy(&state->fpu.fx_xmm[i].xmm_bytes[0],
|
|
&env->xmm_regs[i].ZMM_Q(0), 8);
|
|
memcpy(&state->fpu.fx_xmm[i].xmm_bytes[8],
|
|
&env->xmm_regs[i].ZMM_Q(1), 8);
|
|
}
|
|
|
|
/* MSRs. */
|
|
state->msrs[NVMM_X64_MSR_EFER] = env->efer;
|
|
state->msrs[NVMM_X64_MSR_STAR] = env->star;
|
|
#ifdef TARGET_X86_64
|
|
state->msrs[NVMM_X64_MSR_LSTAR] = env->lstar;
|
|
state->msrs[NVMM_X64_MSR_CSTAR] = env->cstar;
|
|
state->msrs[NVMM_X64_MSR_SFMASK] = env->fmask;
|
|
state->msrs[NVMM_X64_MSR_KERNELGSBASE] = env->kernelgsbase;
|
|
#endif
|
|
state->msrs[NVMM_X64_MSR_SYSENTER_CS] = env->sysenter_cs;
|
|
state->msrs[NVMM_X64_MSR_SYSENTER_ESP] = env->sysenter_esp;
|
|
state->msrs[NVMM_X64_MSR_SYSENTER_EIP] = env->sysenter_eip;
|
|
state->msrs[NVMM_X64_MSR_PAT] = env->pat;
|
|
state->msrs[NVMM_X64_MSR_TSC] = env->tsc;
|
|
|
|
bitmap =
|
|
NVMM_X64_STATE_SEGS |
|
|
NVMM_X64_STATE_GPRS |
|
|
NVMM_X64_STATE_CRS |
|
|
NVMM_X64_STATE_DRS |
|
|
NVMM_X64_STATE_MSRS |
|
|
NVMM_X64_STATE_FPU;
|
|
|
|
ret = nvmm_vcpu_setstate(mach, vcpu, bitmap);
|
|
if (ret == -1) {
|
|
error_report("NVMM: Failed to set virtual processor context,"
|
|
" error=%d", errno);
|
|
}
|
|
}
|
|
|
|
static void
|
|
nvmm_get_segment(SegmentCache *qseg, const struct nvmm_x64_state_seg *nseg)
|
|
{
|
|
qseg->selector = nseg->selector;
|
|
qseg->limit = nseg->limit;
|
|
qseg->base = nseg->base;
|
|
|
|
qseg->flags =
|
|
__SHIFTIN((uint32_t)nseg->attrib.type, DESC_TYPE_MASK) |
|
|
__SHIFTIN((uint32_t)nseg->attrib.s, DESC_S_MASK) |
|
|
__SHIFTIN((uint32_t)nseg->attrib.dpl, DESC_DPL_MASK) |
|
|
__SHIFTIN((uint32_t)nseg->attrib.p, DESC_P_MASK) |
|
|
__SHIFTIN((uint32_t)nseg->attrib.avl, DESC_AVL_MASK) |
|
|
__SHIFTIN((uint32_t)nseg->attrib.l, DESC_L_MASK) |
|
|
__SHIFTIN((uint32_t)nseg->attrib.def, DESC_B_MASK) |
|
|
__SHIFTIN((uint32_t)nseg->attrib.g, DESC_G_MASK);
|
|
}
|
|
|
|
static void
|
|
nvmm_get_registers(CPUState *cpu)
|
|
{
|
|
CPUX86State *env = cpu_env(cpu);
|
|
struct nvmm_machine *mach = get_nvmm_mach();
|
|
AccelCPUState *qcpu = cpu->accel;
|
|
struct nvmm_vcpu *vcpu = &qcpu->vcpu;
|
|
X86CPU *x86_cpu = X86_CPU(cpu);
|
|
struct nvmm_x64_state *state = vcpu->state;
|
|
uint64_t bitmap, tpr;
|
|
size_t i;
|
|
int ret;
|
|
|
|
assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
|
|
|
|
bitmap =
|
|
NVMM_X64_STATE_SEGS |
|
|
NVMM_X64_STATE_GPRS |
|
|
NVMM_X64_STATE_CRS |
|
|
NVMM_X64_STATE_DRS |
|
|
NVMM_X64_STATE_MSRS |
|
|
NVMM_X64_STATE_FPU;
|
|
|
|
ret = nvmm_vcpu_getstate(mach, vcpu, bitmap);
|
|
if (ret == -1) {
|
|
error_report("NVMM: Failed to get virtual processor context,"
|
|
" error=%d", errno);
|
|
}
|
|
|
|
/* GPRs. */
|
|
env->regs[R_EAX] = state->gprs[NVMM_X64_GPR_RAX];
|
|
env->regs[R_ECX] = state->gprs[NVMM_X64_GPR_RCX];
|
|
env->regs[R_EDX] = state->gprs[NVMM_X64_GPR_RDX];
|
|
env->regs[R_EBX] = state->gprs[NVMM_X64_GPR_RBX];
|
|
env->regs[R_ESP] = state->gprs[NVMM_X64_GPR_RSP];
|
|
env->regs[R_EBP] = state->gprs[NVMM_X64_GPR_RBP];
|
|
env->regs[R_ESI] = state->gprs[NVMM_X64_GPR_RSI];
|
|
env->regs[R_EDI] = state->gprs[NVMM_X64_GPR_RDI];
|
|
#ifdef TARGET_X86_64
|
|
env->regs[R_R8] = state->gprs[NVMM_X64_GPR_R8];
|
|
env->regs[R_R9] = state->gprs[NVMM_X64_GPR_R9];
|
|
env->regs[R_R10] = state->gprs[NVMM_X64_GPR_R10];
|
|
env->regs[R_R11] = state->gprs[NVMM_X64_GPR_R11];
|
|
env->regs[R_R12] = state->gprs[NVMM_X64_GPR_R12];
|
|
env->regs[R_R13] = state->gprs[NVMM_X64_GPR_R13];
|
|
env->regs[R_R14] = state->gprs[NVMM_X64_GPR_R14];
|
|
env->regs[R_R15] = state->gprs[NVMM_X64_GPR_R15];
|
|
#endif
|
|
|
|
/* RIP and RFLAGS. */
|
|
env->eip = state->gprs[NVMM_X64_GPR_RIP];
|
|
env->eflags = state->gprs[NVMM_X64_GPR_RFLAGS];
|
|
|
|
/* Segments. */
|
|
nvmm_get_segment(&env->segs[R_ES], &state->segs[NVMM_X64_SEG_ES]);
|
|
nvmm_get_segment(&env->segs[R_CS], &state->segs[NVMM_X64_SEG_CS]);
|
|
nvmm_get_segment(&env->segs[R_SS], &state->segs[NVMM_X64_SEG_SS]);
|
|
nvmm_get_segment(&env->segs[R_DS], &state->segs[NVMM_X64_SEG_DS]);
|
|
nvmm_get_segment(&env->segs[R_FS], &state->segs[NVMM_X64_SEG_FS]);
|
|
nvmm_get_segment(&env->segs[R_GS], &state->segs[NVMM_X64_SEG_GS]);
|
|
|
|
/* Special segments. */
|
|
nvmm_get_segment(&env->gdt, &state->segs[NVMM_X64_SEG_GDT]);
|
|
nvmm_get_segment(&env->ldt, &state->segs[NVMM_X64_SEG_LDT]);
|
|
nvmm_get_segment(&env->tr, &state->segs[NVMM_X64_SEG_TR]);
|
|
nvmm_get_segment(&env->idt, &state->segs[NVMM_X64_SEG_IDT]);
|
|
|
|
/* Control registers. */
|
|
env->cr[0] = state->crs[NVMM_X64_CR_CR0];
|
|
env->cr[2] = state->crs[NVMM_X64_CR_CR2];
|
|
env->cr[3] = state->crs[NVMM_X64_CR_CR3];
|
|
env->cr[4] = state->crs[NVMM_X64_CR_CR4];
|
|
tpr = state->crs[NVMM_X64_CR_CR8];
|
|
if (tpr != qcpu->tpr) {
|
|
qcpu->tpr = tpr;
|
|
cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
|
|
}
|
|
env->xcr0 = state->crs[NVMM_X64_CR_XCR0];
|
|
|
|
/* Debug registers. */
|
|
env->dr[0] = state->drs[NVMM_X64_DR_DR0];
|
|
env->dr[1] = state->drs[NVMM_X64_DR_DR1];
|
|
env->dr[2] = state->drs[NVMM_X64_DR_DR2];
|
|
env->dr[3] = state->drs[NVMM_X64_DR_DR3];
|
|
env->dr[6] = state->drs[NVMM_X64_DR_DR6];
|
|
env->dr[7] = state->drs[NVMM_X64_DR_DR7];
|
|
|
|
/* FPU. */
|
|
env->fpuc = state->fpu.fx_cw;
|
|
env->fpstt = (state->fpu.fx_sw >> 11) & 0x7;
|
|
env->fpus = state->fpu.fx_sw & ~0x3800;
|
|
for (i = 0; i < 8; i++) {
|
|
env->fptags[i] = !((state->fpu.fx_tw >> i) & 1);
|
|
}
|
|
env->fpop = state->fpu.fx_opcode;
|
|
env->fpip = state->fpu.fx_ip.fa_64;
|
|
env->fpdp = state->fpu.fx_dp.fa_64;
|
|
env->mxcsr = state->fpu.fx_mxcsr;
|
|
assert(sizeof(state->fpu.fx_87_ac) == sizeof(env->fpregs));
|
|
memcpy(env->fpregs, state->fpu.fx_87_ac, sizeof(env->fpregs));
|
|
for (i = 0; i < CPU_NB_REGS; i++) {
|
|
memcpy(&env->xmm_regs[i].ZMM_Q(0),
|
|
&state->fpu.fx_xmm[i].xmm_bytes[0], 8);
|
|
memcpy(&env->xmm_regs[i].ZMM_Q(1),
|
|
&state->fpu.fx_xmm[i].xmm_bytes[8], 8);
|
|
}
|
|
|
|
/* MSRs. */
|
|
env->efer = state->msrs[NVMM_X64_MSR_EFER];
|
|
env->star = state->msrs[NVMM_X64_MSR_STAR];
|
|
#ifdef TARGET_X86_64
|
|
env->lstar = state->msrs[NVMM_X64_MSR_LSTAR];
|
|
env->cstar = state->msrs[NVMM_X64_MSR_CSTAR];
|
|
env->fmask = state->msrs[NVMM_X64_MSR_SFMASK];
|
|
env->kernelgsbase = state->msrs[NVMM_X64_MSR_KERNELGSBASE];
|
|
#endif
|
|
env->sysenter_cs = state->msrs[NVMM_X64_MSR_SYSENTER_CS];
|
|
env->sysenter_esp = state->msrs[NVMM_X64_MSR_SYSENTER_ESP];
|
|
env->sysenter_eip = state->msrs[NVMM_X64_MSR_SYSENTER_EIP];
|
|
env->pat = state->msrs[NVMM_X64_MSR_PAT];
|
|
env->tsc = state->msrs[NVMM_X64_MSR_TSC];
|
|
|
|
x86_update_hflags(env);
|
|
}
|
|
|
|
static bool
|
|
nvmm_can_take_int(CPUState *cpu)
|
|
{
|
|
CPUX86State *env = cpu_env(cpu);
|
|
AccelCPUState *qcpu = cpu->accel;
|
|
struct nvmm_vcpu *vcpu = &qcpu->vcpu;
|
|
struct nvmm_machine *mach = get_nvmm_mach();
|
|
|
|
if (qcpu->int_window_exit) {
|
|
return false;
|
|
}
|
|
|
|
if (qcpu->int_shadow || !(env->eflags & IF_MASK)) {
|
|
struct nvmm_x64_state *state = vcpu->state;
|
|
|
|
/* Exit on interrupt window. */
|
|
nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_INTR);
|
|
state->intr.int_window_exiting = 1;
|
|
nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_INTR);
|
|
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
nvmm_can_take_nmi(CPUState *cpu)
|
|
{
|
|
AccelCPUState *qcpu = cpu->accel;
|
|
|
|
/*
|
|
* Contrary to INTs, NMIs always schedule an exit when they are
|
|
* completed. Therefore, if window-exiting is enabled, it means
|
|
* NMIs are blocked.
|
|
*/
|
|
if (qcpu->nmi_window_exit) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Called before the VCPU is run. We inject events generated by the I/O
|
|
* thread, and synchronize the guest TPR.
|
|
*/
|
|
static void
|
|
nvmm_vcpu_pre_run(CPUState *cpu)
|
|
{
|
|
CPUX86State *env = cpu_env(cpu);
|
|
struct nvmm_machine *mach = get_nvmm_mach();
|
|
AccelCPUState *qcpu = cpu->accel;
|
|
struct nvmm_vcpu *vcpu = &qcpu->vcpu;
|
|
X86CPU *x86_cpu = X86_CPU(cpu);
|
|
struct nvmm_x64_state *state = vcpu->state;
|
|
struct nvmm_vcpu_event *event = vcpu->event;
|
|
bool has_event = false;
|
|
bool sync_tpr = false;
|
|
uint8_t tpr;
|
|
int ret;
|
|
|
|
bql_lock();
|
|
|
|
tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
|
|
if (tpr != qcpu->tpr) {
|
|
qcpu->tpr = tpr;
|
|
sync_tpr = true;
|
|
}
|
|
|
|
/*
|
|
* Force the VCPU out of its inner loop to process any INIT requests
|
|
* or commit pending TPR access.
|
|
*/
|
|
if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
|
|
cpu->exit_request = 1;
|
|
}
|
|
|
|
if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
|
|
if (nvmm_can_take_nmi(cpu)) {
|
|
cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
|
|
event->type = NVMM_VCPU_EVENT_INTR;
|
|
event->vector = 2;
|
|
has_event = true;
|
|
}
|
|
}
|
|
|
|
if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
|
|
if (nvmm_can_take_int(cpu)) {
|
|
cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
|
|
event->type = NVMM_VCPU_EVENT_INTR;
|
|
event->vector = cpu_get_pic_interrupt(env);
|
|
has_event = true;
|
|
}
|
|
}
|
|
|
|
/* Don't want SMIs. */
|
|
if (cpu->interrupt_request & CPU_INTERRUPT_SMI) {
|
|
cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
|
|
}
|
|
|
|
if (sync_tpr) {
|
|
ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_CRS);
|
|
if (ret == -1) {
|
|
error_report("NVMM: Failed to get CPU state,"
|
|
" error=%d", errno);
|
|
}
|
|
|
|
state->crs[NVMM_X64_CR_CR8] = qcpu->tpr;
|
|
|
|
ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_CRS);
|
|
if (ret == -1) {
|
|
error_report("NVMM: Failed to set CPU state,"
|
|
" error=%d", errno);
|
|
}
|
|
}
|
|
|
|
if (has_event) {
|
|
ret = nvmm_vcpu_inject(mach, vcpu);
|
|
if (ret == -1) {
|
|
error_report("NVMM: Failed to inject event,"
|
|
" error=%d", errno);
|
|
}
|
|
}
|
|
|
|
bql_unlock();
|
|
}
|
|
|
|
/*
|
|
* Called after the VCPU ran. We synchronize the host view of the TPR and
|
|
* RFLAGS.
|
|
*/
|
|
static void
|
|
nvmm_vcpu_post_run(CPUState *cpu, struct nvmm_vcpu_exit *exit)
|
|
{
|
|
AccelCPUState *qcpu = cpu->accel;
|
|
X86CPU *x86_cpu = X86_CPU(cpu);
|
|
CPUX86State *env = &x86_cpu->env;
|
|
uint64_t tpr;
|
|
|
|
env->eflags = exit->exitstate.rflags;
|
|
qcpu->int_shadow = exit->exitstate.int_shadow;
|
|
qcpu->int_window_exit = exit->exitstate.int_window_exiting;
|
|
qcpu->nmi_window_exit = exit->exitstate.nmi_window_exiting;
|
|
|
|
tpr = exit->exitstate.cr8;
|
|
if (qcpu->tpr != tpr) {
|
|
qcpu->tpr = tpr;
|
|
bql_lock();
|
|
cpu_set_apic_tpr(x86_cpu->apic_state, qcpu->tpr);
|
|
bql_unlock();
|
|
}
|
|
}
|
|
|
|
/* -------------------------------------------------------------------------- */
|
|
|
|
static void
|
|
nvmm_io_callback(struct nvmm_io *io)
|
|
{
|
|
MemTxAttrs attrs = { 0 };
|
|
int ret;
|
|
|
|
ret = address_space_rw(&address_space_io, io->port, attrs, io->data,
|
|
io->size, !io->in);
|
|
if (ret != MEMTX_OK) {
|
|
error_report("NVMM: I/O Transaction Failed "
|
|
"[%s, port=%u, size=%zu]", (io->in ? "in" : "out"),
|
|
io->port, io->size);
|
|
}
|
|
|
|
/* Needed, otherwise infinite loop. */
|
|
current_cpu->vcpu_dirty = false;
|
|
}
|
|
|
|
static void
|
|
nvmm_mem_callback(struct nvmm_mem *mem)
|
|
{
|
|
cpu_physical_memory_rw(mem->gpa, mem->data, mem->size, mem->write);
|
|
|
|
/* Needed, otherwise infinite loop. */
|
|
current_cpu->vcpu_dirty = false;
|
|
}
|
|
|
|
static struct nvmm_assist_callbacks nvmm_callbacks = {
|
|
.io = nvmm_io_callback,
|
|
.mem = nvmm_mem_callback
|
|
};
|
|
|
|
/* -------------------------------------------------------------------------- */
|
|
|
|
static int
|
|
nvmm_handle_mem(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
|
|
{
|
|
int ret;
|
|
|
|
ret = nvmm_assist_mem(mach, vcpu);
|
|
if (ret == -1) {
|
|
error_report("NVMM: Mem Assist Failed [gpa=%p]",
|
|
(void *)vcpu->exit->u.mem.gpa);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int
|
|
nvmm_handle_io(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
|
|
{
|
|
int ret;
|
|
|
|
ret = nvmm_assist_io(mach, vcpu);
|
|
if (ret == -1) {
|
|
error_report("NVMM: I/O Assist Failed [port=%d]",
|
|
(int)vcpu->exit->u.io.port);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int
|
|
nvmm_handle_rdmsr(struct nvmm_machine *mach, CPUState *cpu,
|
|
struct nvmm_vcpu_exit *exit)
|
|
{
|
|
AccelCPUState *qcpu = cpu->accel;
|
|
struct nvmm_vcpu *vcpu = &qcpu->vcpu;
|
|
X86CPU *x86_cpu = X86_CPU(cpu);
|
|
struct nvmm_x64_state *state = vcpu->state;
|
|
uint64_t val;
|
|
int ret;
|
|
|
|
switch (exit->u.rdmsr.msr) {
|
|
case MSR_IA32_APICBASE:
|
|
val = cpu_get_apic_base(x86_cpu->apic_state);
|
|
break;
|
|
case MSR_MTRRcap:
|
|
case MSR_MTRRdefType:
|
|
case MSR_MCG_CAP:
|
|
case MSR_MCG_STATUS:
|
|
val = 0;
|
|
break;
|
|
default: /* More MSRs to add? */
|
|
val = 0;
|
|
error_report("NVMM: Unexpected RDMSR 0x%x, ignored",
|
|
exit->u.rdmsr.msr);
|
|
break;
|
|
}
|
|
|
|
ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_GPRS);
|
|
if (ret == -1) {
|
|
return -1;
|
|
}
|
|
|
|
state->gprs[NVMM_X64_GPR_RAX] = (val & 0xFFFFFFFF);
|
|
state->gprs[NVMM_X64_GPR_RDX] = (val >> 32);
|
|
state->gprs[NVMM_X64_GPR_RIP] = exit->u.rdmsr.npc;
|
|
|
|
ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
|
|
if (ret == -1) {
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
nvmm_handle_wrmsr(struct nvmm_machine *mach, CPUState *cpu,
|
|
struct nvmm_vcpu_exit *exit)
|
|
{
|
|
AccelCPUState *qcpu = cpu->accel;
|
|
struct nvmm_vcpu *vcpu = &qcpu->vcpu;
|
|
X86CPU *x86_cpu = X86_CPU(cpu);
|
|
struct nvmm_x64_state *state = vcpu->state;
|
|
uint64_t val;
|
|
int ret;
|
|
|
|
val = exit->u.wrmsr.val;
|
|
|
|
switch (exit->u.wrmsr.msr) {
|
|
case MSR_IA32_APICBASE:
|
|
cpu_set_apic_base(x86_cpu->apic_state, val);
|
|
break;
|
|
case MSR_MTRRdefType:
|
|
case MSR_MCG_STATUS:
|
|
break;
|
|
default: /* More MSRs to add? */
|
|
error_report("NVMM: Unexpected WRMSR 0x%x [val=0x%lx], ignored",
|
|
exit->u.wrmsr.msr, val);
|
|
break;
|
|
}
|
|
|
|
ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_GPRS);
|
|
if (ret == -1) {
|
|
return -1;
|
|
}
|
|
|
|
state->gprs[NVMM_X64_GPR_RIP] = exit->u.wrmsr.npc;
|
|
|
|
ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
|
|
if (ret == -1) {
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
nvmm_handle_halted(struct nvmm_machine *mach, CPUState *cpu,
|
|
struct nvmm_vcpu_exit *exit)
|
|
{
|
|
CPUX86State *env = cpu_env(cpu);
|
|
int ret = 0;
|
|
|
|
bql_lock();
|
|
|
|
if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
|
|
(env->eflags & IF_MASK)) &&
|
|
!(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
|
|
cpu->exception_index = EXCP_HLT;
|
|
cpu->halted = true;
|
|
ret = 1;
|
|
}
|
|
|
|
bql_unlock();
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int
|
|
nvmm_inject_ud(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
|
|
{
|
|
struct nvmm_vcpu_event *event = vcpu->event;
|
|
|
|
event->type = NVMM_VCPU_EVENT_EXCP;
|
|
event->vector = 6;
|
|
event->u.excp.error = 0;
|
|
|
|
return nvmm_vcpu_inject(mach, vcpu);
|
|
}
|
|
|
|
static int
|
|
nvmm_vcpu_loop(CPUState *cpu)
|
|
{
|
|
struct nvmm_machine *mach = get_nvmm_mach();
|
|
AccelCPUState *qcpu = cpu->accel;
|
|
struct nvmm_vcpu *vcpu = &qcpu->vcpu;
|
|
X86CPU *x86_cpu = X86_CPU(cpu);
|
|
CPUX86State *env = &x86_cpu->env;
|
|
struct nvmm_vcpu_exit *exit = vcpu->exit;
|
|
int ret;
|
|
|
|
/*
|
|
* Some asynchronous events must be handled outside of the inner
|
|
* VCPU loop. They are handled here.
|
|
*/
|
|
if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
|
|
nvmm_cpu_synchronize_state(cpu);
|
|
do_cpu_init(x86_cpu);
|
|
/* set int/nmi windows back to the reset state */
|
|
}
|
|
if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
|
|
cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
|
|
apic_poll_irq(x86_cpu->apic_state);
|
|
}
|
|
if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
|
|
(env->eflags & IF_MASK)) ||
|
|
(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
|
|
cpu->halted = false;
|
|
}
|
|
if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
|
|
nvmm_cpu_synchronize_state(cpu);
|
|
do_cpu_sipi(x86_cpu);
|
|
}
|
|
if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
|
|
cpu->interrupt_request &= ~CPU_INTERRUPT_TPR;
|
|
nvmm_cpu_synchronize_state(cpu);
|
|
apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip,
|
|
env->tpr_access_type);
|
|
}
|
|
|
|
if (cpu->halted) {
|
|
cpu->exception_index = EXCP_HLT;
|
|
qatomic_set(&cpu->exit_request, false);
|
|
return 0;
|
|
}
|
|
|
|
bql_unlock();
|
|
cpu_exec_start(cpu);
|
|
|
|
/*
|
|
* Inner VCPU loop.
|
|
*/
|
|
do {
|
|
if (cpu->vcpu_dirty) {
|
|
nvmm_set_registers(cpu);
|
|
cpu->vcpu_dirty = false;
|
|
}
|
|
|
|
if (qcpu->stop) {
|
|
cpu->exception_index = EXCP_INTERRUPT;
|
|
qcpu->stop = false;
|
|
ret = 1;
|
|
break;
|
|
}
|
|
|
|
nvmm_vcpu_pre_run(cpu);
|
|
|
|
if (qatomic_read(&cpu->exit_request)) {
|
|
#if NVMM_USER_VERSION >= 2
|
|
nvmm_vcpu_stop(vcpu);
|
|
#else
|
|
qemu_cpu_kick_self();
|
|
#endif
|
|
}
|
|
|
|
/* Read exit_request before the kernel reads the immediate exit flag */
|
|
smp_rmb();
|
|
ret = nvmm_vcpu_run(mach, vcpu);
|
|
if (ret == -1) {
|
|
error_report("NVMM: Failed to exec a virtual processor,"
|
|
" error=%d", errno);
|
|
break;
|
|
}
|
|
|
|
nvmm_vcpu_post_run(cpu, exit);
|
|
|
|
switch (exit->reason) {
|
|
case NVMM_VCPU_EXIT_NONE:
|
|
break;
|
|
#if NVMM_USER_VERSION >= 2
|
|
case NVMM_VCPU_EXIT_STOPPED:
|
|
/*
|
|
* The kernel cleared the immediate exit flag; cpu->exit_request
|
|
* must be cleared after
|
|
*/
|
|
smp_wmb();
|
|
qcpu->stop = true;
|
|
break;
|
|
#endif
|
|
case NVMM_VCPU_EXIT_MEMORY:
|
|
ret = nvmm_handle_mem(mach, vcpu);
|
|
break;
|
|
case NVMM_VCPU_EXIT_IO:
|
|
ret = nvmm_handle_io(mach, vcpu);
|
|
break;
|
|
case NVMM_VCPU_EXIT_INT_READY:
|
|
case NVMM_VCPU_EXIT_NMI_READY:
|
|
case NVMM_VCPU_EXIT_TPR_CHANGED:
|
|
break;
|
|
case NVMM_VCPU_EXIT_HALTED:
|
|
ret = nvmm_handle_halted(mach, cpu, exit);
|
|
break;
|
|
case NVMM_VCPU_EXIT_SHUTDOWN:
|
|
qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
|
|
cpu->exception_index = EXCP_INTERRUPT;
|
|
ret = 1;
|
|
break;
|
|
case NVMM_VCPU_EXIT_RDMSR:
|
|
ret = nvmm_handle_rdmsr(mach, cpu, exit);
|
|
break;
|
|
case NVMM_VCPU_EXIT_WRMSR:
|
|
ret = nvmm_handle_wrmsr(mach, cpu, exit);
|
|
break;
|
|
case NVMM_VCPU_EXIT_MONITOR:
|
|
case NVMM_VCPU_EXIT_MWAIT:
|
|
ret = nvmm_inject_ud(mach, vcpu);
|
|
break;
|
|
default:
|
|
error_report("NVMM: Unexpected VM exit code 0x%lx [hw=0x%lx]",
|
|
exit->reason, exit->u.inv.hwcode);
|
|
nvmm_get_registers(cpu);
|
|
bql_lock();
|
|
qemu_system_guest_panicked(cpu_get_crash_info(cpu));
|
|
bql_unlock();
|
|
ret = -1;
|
|
break;
|
|
}
|
|
} while (ret == 0);
|
|
|
|
cpu_exec_end(cpu);
|
|
bql_lock();
|
|
|
|
qatomic_set(&cpu->exit_request, false);
|
|
|
|
return ret < 0;
|
|
}
|
|
|
|
/* -------------------------------------------------------------------------- */
|
|
|
|
static void
|
|
do_nvmm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
|
|
{
|
|
nvmm_get_registers(cpu);
|
|
cpu->vcpu_dirty = true;
|
|
}
|
|
|
|
static void
|
|
do_nvmm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
|
|
{
|
|
nvmm_set_registers(cpu);
|
|
cpu->vcpu_dirty = false;
|
|
}
|
|
|
|
static void
|
|
do_nvmm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
|
|
{
|
|
nvmm_set_registers(cpu);
|
|
cpu->vcpu_dirty = false;
|
|
}
|
|
|
|
static void
|
|
do_nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
|
|
{
|
|
cpu->vcpu_dirty = true;
|
|
}
|
|
|
|
void nvmm_cpu_synchronize_state(CPUState *cpu)
|
|
{
|
|
if (!cpu->vcpu_dirty) {
|
|
run_on_cpu(cpu, do_nvmm_cpu_synchronize_state, RUN_ON_CPU_NULL);
|
|
}
|
|
}
|
|
|
|
void nvmm_cpu_synchronize_post_reset(CPUState *cpu)
|
|
{
|
|
run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
|
|
}
|
|
|
|
void nvmm_cpu_synchronize_post_init(CPUState *cpu)
|
|
{
|
|
run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
|
|
}
|
|
|
|
void nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu)
|
|
{
|
|
run_on_cpu(cpu, do_nvmm_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
|
|
}
|
|
|
|
/* -------------------------------------------------------------------------- */
|
|
|
|
static Error *nvmm_migration_blocker;
|
|
|
|
/*
|
|
* The nvmm_vcpu_stop() mechanism breaks races between entering the VMM
|
|
* and another thread signaling the vCPU thread to exit.
|
|
*/
|
|
|
|
static void
|
|
nvmm_ipi_signal(int sigcpu)
|
|
{
|
|
if (current_cpu) {
|
|
AccelCPUState *qcpu = current_cpu->accel;
|
|
#if NVMM_USER_VERSION >= 2
|
|
struct nvmm_vcpu *vcpu = &qcpu->vcpu;
|
|
nvmm_vcpu_stop(vcpu);
|
|
#else
|
|
qcpu->stop = true;
|
|
#endif
|
|
}
|
|
}
|
|
|
|
static void
|
|
nvmm_init_cpu_signals(void)
|
|
{
|
|
struct sigaction sigact;
|
|
sigset_t set;
|
|
|
|
/* Install the IPI handler. */
|
|
memset(&sigact, 0, sizeof(sigact));
|
|
sigact.sa_handler = nvmm_ipi_signal;
|
|
sigaction(SIG_IPI, &sigact, NULL);
|
|
|
|
/* Allow IPIs on the current thread. */
|
|
sigprocmask(SIG_BLOCK, NULL, &set);
|
|
sigdelset(&set, SIG_IPI);
|
|
pthread_sigmask(SIG_SETMASK, &set, NULL);
|
|
}
|
|
|
|
int
|
|
nvmm_init_vcpu(CPUState *cpu)
|
|
{
|
|
struct nvmm_machine *mach = get_nvmm_mach();
|
|
struct nvmm_vcpu_conf_cpuid cpuid;
|
|
struct nvmm_vcpu_conf_tpr tpr;
|
|
Error *local_error = NULL;
|
|
AccelCPUState *qcpu;
|
|
int ret, err;
|
|
|
|
nvmm_init_cpu_signals();
|
|
|
|
if (nvmm_migration_blocker == NULL) {
|
|
error_setg(&nvmm_migration_blocker,
|
|
"NVMM: Migration not supported");
|
|
|
|
if (migrate_add_blocker(&nvmm_migration_blocker, &local_error) < 0) {
|
|
error_report_err(local_error);
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
qcpu = g_new0(AccelCPUState, 1);
|
|
|
|
ret = nvmm_vcpu_create(mach, cpu->cpu_index, &qcpu->vcpu);
|
|
if (ret == -1) {
|
|
err = errno;
|
|
error_report("NVMM: Failed to create a virtual processor,"
|
|
" error=%d", err);
|
|
g_free(qcpu);
|
|
return -err;
|
|
}
|
|
|
|
memset(&cpuid, 0, sizeof(cpuid));
|
|
cpuid.mask = 1;
|
|
cpuid.leaf = 0x00000001;
|
|
cpuid.u.mask.set.edx = CPUID_MCE | CPUID_MCA | CPUID_MTRR;
|
|
ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_CPUID,
|
|
&cpuid);
|
|
if (ret == -1) {
|
|
err = errno;
|
|
error_report("NVMM: Failed to configure a virtual processor,"
|
|
" error=%d", err);
|
|
g_free(qcpu);
|
|
return -err;
|
|
}
|
|
|
|
ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_CALLBACKS,
|
|
&nvmm_callbacks);
|
|
if (ret == -1) {
|
|
err = errno;
|
|
error_report("NVMM: Failed to configure a virtual processor,"
|
|
" error=%d", err);
|
|
g_free(qcpu);
|
|
return -err;
|
|
}
|
|
|
|
if (qemu_mach.cap.arch.vcpu_conf_support & NVMM_CAP_ARCH_VCPU_CONF_TPR) {
|
|
memset(&tpr, 0, sizeof(tpr));
|
|
tpr.exit_changed = 1;
|
|
ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_TPR, &tpr);
|
|
if (ret == -1) {
|
|
err = errno;
|
|
error_report("NVMM: Failed to configure a virtual processor,"
|
|
" error=%d", err);
|
|
g_free(qcpu);
|
|
return -err;
|
|
}
|
|
}
|
|
|
|
cpu->vcpu_dirty = true;
|
|
cpu->accel = qcpu;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
nvmm_vcpu_exec(CPUState *cpu)
|
|
{
|
|
int ret, fatal;
|
|
|
|
while (1) {
|
|
if (cpu->exception_index >= EXCP_INTERRUPT) {
|
|
ret = cpu->exception_index;
|
|
cpu->exception_index = -1;
|
|
break;
|
|
}
|
|
|
|
fatal = nvmm_vcpu_loop(cpu);
|
|
|
|
if (fatal) {
|
|
error_report("NVMM: Failed to execute a VCPU.");
|
|
abort();
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
void
|
|
nvmm_destroy_vcpu(CPUState *cpu)
|
|
{
|
|
struct nvmm_machine *mach = get_nvmm_mach();
|
|
AccelCPUState *qcpu = cpu->accel;
|
|
|
|
nvmm_vcpu_destroy(mach, &qcpu->vcpu);
|
|
g_free(cpu->accel);
|
|
}
|
|
|
|
/* -------------------------------------------------------------------------- */
|
|
|
|
static void
|
|
nvmm_update_mapping(hwaddr start_pa, ram_addr_t size, uintptr_t hva,
|
|
bool add, bool rom, const char *name)
|
|
{
|
|
struct nvmm_machine *mach = get_nvmm_mach();
|
|
int ret, prot;
|
|
|
|
if (add) {
|
|
prot = PROT_READ | PROT_EXEC;
|
|
if (!rom) {
|
|
prot |= PROT_WRITE;
|
|
}
|
|
ret = nvmm_gpa_map(mach, hva, start_pa, size, prot);
|
|
} else {
|
|
ret = nvmm_gpa_unmap(mach, hva, start_pa, size);
|
|
}
|
|
|
|
if (ret == -1) {
|
|
error_report("NVMM: Failed to %s GPA range '%s' PA:%p, "
|
|
"Size:%p bytes, HostVA:%p, error=%d",
|
|
(add ? "map" : "unmap"), name, (void *)(uintptr_t)start_pa,
|
|
(void *)size, (void *)hva, errno);
|
|
}
|
|
}
|
|
|
|
static void
|
|
nvmm_process_section(MemoryRegionSection *section, int add)
|
|
{
|
|
MemoryRegion *mr = section->mr;
|
|
hwaddr start_pa = section->offset_within_address_space;
|
|
ram_addr_t size = int128_get64(section->size);
|
|
unsigned int delta;
|
|
uintptr_t hva;
|
|
|
|
if (!memory_region_is_ram(mr)) {
|
|
return;
|
|
}
|
|
|
|
/* Adjust start_pa and size so that they are page-aligned. */
|
|
delta = qemu_real_host_page_size() - (start_pa & ~qemu_real_host_page_mask());
|
|
delta &= ~qemu_real_host_page_mask();
|
|
if (delta > size) {
|
|
return;
|
|
}
|
|
start_pa += delta;
|
|
size -= delta;
|
|
size &= qemu_real_host_page_mask();
|
|
if (!size || (start_pa & ~qemu_real_host_page_mask())) {
|
|
return;
|
|
}
|
|
|
|
hva = (uintptr_t)memory_region_get_ram_ptr(mr) +
|
|
section->offset_within_region + delta;
|
|
|
|
nvmm_update_mapping(start_pa, size, hva, add,
|
|
memory_region_is_rom(mr), mr->name);
|
|
}
|
|
|
|
static void
|
|
nvmm_region_add(MemoryListener *listener, MemoryRegionSection *section)
|
|
{
|
|
memory_region_ref(section->mr);
|
|
nvmm_process_section(section, 1);
|
|
}
|
|
|
|
static void
|
|
nvmm_region_del(MemoryListener *listener, MemoryRegionSection *section)
|
|
{
|
|
nvmm_process_section(section, 0);
|
|
memory_region_unref(section->mr);
|
|
}
|
|
|
|
static void
|
|
nvmm_transaction_begin(MemoryListener *listener)
|
|
{
|
|
/* nothing */
|
|
}
|
|
|
|
static void
|
|
nvmm_transaction_commit(MemoryListener *listener)
|
|
{
|
|
/* nothing */
|
|
}
|
|
|
|
static void
|
|
nvmm_log_sync(MemoryListener *listener, MemoryRegionSection *section)
|
|
{
|
|
MemoryRegion *mr = section->mr;
|
|
|
|
if (!memory_region_is_ram(mr)) {
|
|
return;
|
|
}
|
|
|
|
memory_region_set_dirty(mr, 0, int128_get64(section->size));
|
|
}
|
|
|
|
static MemoryListener nvmm_memory_listener = {
|
|
.name = "nvmm",
|
|
.begin = nvmm_transaction_begin,
|
|
.commit = nvmm_transaction_commit,
|
|
.region_add = nvmm_region_add,
|
|
.region_del = nvmm_region_del,
|
|
.log_sync = nvmm_log_sync,
|
|
.priority = MEMORY_LISTENER_PRIORITY_ACCEL,
|
|
};
|
|
|
|
static void
|
|
nvmm_ram_block_added(RAMBlockNotifier *n, void *host, size_t size,
|
|
size_t max_size)
|
|
{
|
|
struct nvmm_machine *mach = get_nvmm_mach();
|
|
uintptr_t hva = (uintptr_t)host;
|
|
int ret;
|
|
|
|
ret = nvmm_hva_map(mach, hva, max_size);
|
|
|
|
if (ret == -1) {
|
|
error_report("NVMM: Failed to map HVA, HostVA:%p "
|
|
"Size:%p bytes, error=%d",
|
|
(void *)hva, (void *)size, errno);
|
|
}
|
|
}
|
|
|
|
static struct RAMBlockNotifier nvmm_ram_notifier = {
|
|
.ram_block_added = nvmm_ram_block_added
|
|
};
|
|
|
|
/* -------------------------------------------------------------------------- */
|
|
|
|
static int
|
|
nvmm_accel_init(MachineState *ms)
|
|
{
|
|
int ret, err;
|
|
|
|
ret = nvmm_init();
|
|
if (ret == -1) {
|
|
err = errno;
|
|
error_report("NVMM: Initialization failed, error=%d", errno);
|
|
return -err;
|
|
}
|
|
|
|
ret = nvmm_capability(&qemu_mach.cap);
|
|
if (ret == -1) {
|
|
err = errno;
|
|
error_report("NVMM: Unable to fetch capability, error=%d", errno);
|
|
return -err;
|
|
}
|
|
if (qemu_mach.cap.version < NVMM_KERN_VERSION) {
|
|
error_report("NVMM: Unsupported version %u", qemu_mach.cap.version);
|
|
return -EPROGMISMATCH;
|
|
}
|
|
if (qemu_mach.cap.state_size != sizeof(struct nvmm_x64_state)) {
|
|
error_report("NVMM: Wrong state size %u", qemu_mach.cap.state_size);
|
|
return -EPROGMISMATCH;
|
|
}
|
|
|
|
ret = nvmm_machine_create(&qemu_mach.mach);
|
|
if (ret == -1) {
|
|
err = errno;
|
|
error_report("NVMM: Machine creation failed, error=%d", errno);
|
|
return -err;
|
|
}
|
|
|
|
memory_listener_register(&nvmm_memory_listener, &address_space_memory);
|
|
ram_block_notifier_add(&nvmm_ram_notifier);
|
|
|
|
printf("NetBSD Virtual Machine Monitor accelerator is operational\n");
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
nvmm_enabled(void)
|
|
{
|
|
return nvmm_allowed;
|
|
}
|
|
|
|
static void
|
|
nvmm_accel_class_init(ObjectClass *oc, void *data)
|
|
{
|
|
AccelClass *ac = ACCEL_CLASS(oc);
|
|
ac->name = "NVMM";
|
|
ac->init_machine = nvmm_accel_init;
|
|
ac->allowed = &nvmm_allowed;
|
|
}
|
|
|
|
static const TypeInfo nvmm_accel_type = {
|
|
.name = ACCEL_CLASS_NAME("nvmm"),
|
|
.parent = TYPE_ACCEL,
|
|
.class_init = nvmm_accel_class_init,
|
|
};
|
|
|
|
static void
|
|
nvmm_type_init(void)
|
|
{
|
|
type_register_static(&nvmm_accel_type);
|
|
}
|
|
|
|
type_init(nvmm_type_init);
|