NetBSD/lib/libnvmm/libnvmm_x86.c
reinoud e6b71c1487 Implement support for trapping REP CMPS instructions in NVMM.
Qemu would abort hard when NVMM would get a memory trap on the instruction
since it didn't know it.
2020-12-27 20:56:14 +00:00

3496 lines
77 KiB
C

/* $NetBSD: libnvmm_x86.c,v 1.43 2020/12/27 20:56:14 reinoud Exp $ */
/*
* Copyright (c) 2018-2020 Maxime Villard, m00nbsd.net
* All rights reserved.
*
* This code is part of the NVMM hypervisor.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <machine/vmparam.h>
#include <machine/pte.h>
#include <machine/psl.h>
#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
#define __cacheline_aligned __attribute__((__aligned__(64)))
#include <x86/specialreg.h>
/* -------------------------------------------------------------------------- */
/*
* Undocumented debugging function. Helpful.
*/
int
nvmm_vcpu_dump(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
{
struct nvmm_x64_state *state = vcpu->state;
uint16_t *attr;
size_t i;
int ret;
const char *segnames[] = {
"ES", "CS", "SS", "DS", "FS", "GS", "GDT", "IDT", "LDT", "TR"
};
ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_ALL);
if (ret == -1)
return -1;
printf("+ VCPU id=%d\n", (int)vcpu->cpuid);
printf("| -> RAX=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RAX]);
printf("| -> RCX=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RCX]);
printf("| -> RDX=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RDX]);
printf("| -> RBX=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RBX]);
printf("| -> RSP=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RSP]);
printf("| -> RBP=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RBP]);
printf("| -> RSI=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RSI]);
printf("| -> RDI=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RDI]);
printf("| -> RIP=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RIP]);
printf("| -> RFLAGS=%p\n", (void *)state->gprs[NVMM_X64_GPR_RFLAGS]);
for (i = 0; i < NVMM_X64_NSEG; i++) {
attr = (uint16_t *)&state->segs[i].attrib;
printf("| -> %s: sel=0x%x base=%"PRIx64", limit=%x, "
"attrib=%x [type=%d,l=%d,def=%d]\n",
segnames[i],
state->segs[i].selector,
state->segs[i].base,
state->segs[i].limit,
*attr,
state->segs[i].attrib.type,
state->segs[i].attrib.l,
state->segs[i].attrib.def);
}
printf("| -> MSR_EFER=%"PRIx64"\n", state->msrs[NVMM_X64_MSR_EFER]);
printf("| -> CR0=%"PRIx64"\n", state->crs[NVMM_X64_CR_CR0]);
printf("| -> CR3=%"PRIx64"\n", state->crs[NVMM_X64_CR_CR3]);
printf("| -> CR4=%"PRIx64"\n", state->crs[NVMM_X64_CR_CR4]);
printf("| -> CR8=%"PRIx64"\n", state->crs[NVMM_X64_CR_CR8]);
return 0;
}
/* -------------------------------------------------------------------------- */
#define PTE32_L1_SHIFT 12
#define PTE32_L2_SHIFT 22
#define PTE32_L2_MASK 0xffc00000
#define PTE32_L1_MASK 0x003ff000
#define PTE32_L2_FRAME (PTE32_L2_MASK)
#define PTE32_L1_FRAME (PTE32_L2_FRAME|PTE32_L1_MASK)
#define pte32_l1idx(va) (((va) & PTE32_L1_MASK) >> PTE32_L1_SHIFT)
#define pte32_l2idx(va) (((va) & PTE32_L2_MASK) >> PTE32_L2_SHIFT)
#define CR3_FRAME_32BIT __BITS(31, 12)
typedef uint32_t pte_32bit_t;
static int
x86_gva_to_gpa_32bit(struct nvmm_machine *mach, uint64_t cr3,
gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
{
gpaddr_t L2gpa, L1gpa;
uintptr_t L2hva, L1hva;
pte_32bit_t *pdir, pte;
nvmm_prot_t pageprot;
/* We begin with an RWXU access. */
*prot = NVMM_PROT_ALL;
/* Parse L2. */
L2gpa = (cr3 & CR3_FRAME_32BIT);
if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1)
return -1;
pdir = (pte_32bit_t *)L2hva;
pte = pdir[pte32_l2idx(gva)];
if ((pte & PTE_P) == 0)
return -1;
if ((pte & PTE_U) == 0)
*prot &= ~NVMM_PROT_USER;
if ((pte & PTE_W) == 0)
*prot &= ~NVMM_PROT_WRITE;
if ((pte & PTE_PS) && !has_pse)
return -1;
if (pte & PTE_PS) {
*gpa = (pte & PTE32_L2_FRAME);
*gpa = *gpa + (gva & PTE32_L1_MASK);
return 0;
}
/* Parse L1. */
L1gpa = (pte & PTE_FRAME);
if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1)
return -1;
pdir = (pte_32bit_t *)L1hva;
pte = pdir[pte32_l1idx(gva)];
if ((pte & PTE_P) == 0)
return -1;
if ((pte & PTE_U) == 0)
*prot &= ~NVMM_PROT_USER;
if ((pte & PTE_W) == 0)
*prot &= ~NVMM_PROT_WRITE;
if (pte & PTE_PS)
return -1;
*gpa = (pte & PTE_FRAME);
return 0;
}
/* -------------------------------------------------------------------------- */
#define PTE32_PAE_L1_SHIFT 12
#define PTE32_PAE_L2_SHIFT 21
#define PTE32_PAE_L3_SHIFT 30
#define PTE32_PAE_L3_MASK 0xc0000000
#define PTE32_PAE_L2_MASK 0x3fe00000
#define PTE32_PAE_L1_MASK 0x001ff000
#define PTE32_PAE_L3_FRAME (PTE32_PAE_L3_MASK)
#define PTE32_PAE_L2_FRAME (PTE32_PAE_L3_FRAME|PTE32_PAE_L2_MASK)
#define PTE32_PAE_L1_FRAME (PTE32_PAE_L2_FRAME|PTE32_PAE_L1_MASK)
#define pte32_pae_l1idx(va) (((va) & PTE32_PAE_L1_MASK) >> PTE32_PAE_L1_SHIFT)
#define pte32_pae_l2idx(va) (((va) & PTE32_PAE_L2_MASK) >> PTE32_PAE_L2_SHIFT)
#define pte32_pae_l3idx(va) (((va) & PTE32_PAE_L3_MASK) >> PTE32_PAE_L3_SHIFT)
#define CR3_FRAME_32BIT_PAE __BITS(31, 5)
typedef uint64_t pte_32bit_pae_t;
static int
x86_gva_to_gpa_32bit_pae(struct nvmm_machine *mach, uint64_t cr3,
gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
{
gpaddr_t L3gpa, L2gpa, L1gpa;
uintptr_t L3hva, L2hva, L1hva;
pte_32bit_pae_t *pdir, pte;
nvmm_prot_t pageprot;
/* We begin with an RWXU access. */
*prot = NVMM_PROT_ALL;
/* Parse L3. */
L3gpa = (cr3 & CR3_FRAME_32BIT_PAE);
if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva, &pageprot) == -1)
return -1;
pdir = (pte_32bit_pae_t *)L3hva;
pte = pdir[pte32_pae_l3idx(gva)];
if ((pte & PTE_P) == 0)
return -1;
if (pte & PTE_NX)
*prot &= ~NVMM_PROT_EXEC;
if (pte & PTE_PS)
return -1;
/* Parse L2. */
L2gpa = (pte & PTE_FRAME);
if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1)
return -1;
pdir = (pte_32bit_pae_t *)L2hva;
pte = pdir[pte32_pae_l2idx(gva)];
if ((pte & PTE_P) == 0)
return -1;
if ((pte & PTE_U) == 0)
*prot &= ~NVMM_PROT_USER;
if ((pte & PTE_W) == 0)
*prot &= ~NVMM_PROT_WRITE;
if (pte & PTE_NX)
*prot &= ~NVMM_PROT_EXEC;
if (pte & PTE_PS) {
*gpa = (pte & PTE32_PAE_L2_FRAME);
*gpa = *gpa + (gva & PTE32_PAE_L1_MASK);
return 0;
}
/* Parse L1. */
L1gpa = (pte & PTE_FRAME);
if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1)
return -1;
pdir = (pte_32bit_pae_t *)L1hva;
pte = pdir[pte32_pae_l1idx(gva)];
if ((pte & PTE_P) == 0)
return -1;
if ((pte & PTE_U) == 0)
*prot &= ~NVMM_PROT_USER;
if ((pte & PTE_W) == 0)
*prot &= ~NVMM_PROT_WRITE;
if (pte & PTE_NX)
*prot &= ~NVMM_PROT_EXEC;
if (pte & PTE_PS)
return -1;
*gpa = (pte & PTE_FRAME);
return 0;
}
/* -------------------------------------------------------------------------- */
#define PTE64_L1_SHIFT 12
#define PTE64_L2_SHIFT 21
#define PTE64_L3_SHIFT 30
#define PTE64_L4_SHIFT 39
#define PTE64_L4_MASK 0x0000ff8000000000
#define PTE64_L3_MASK 0x0000007fc0000000
#define PTE64_L2_MASK 0x000000003fe00000
#define PTE64_L1_MASK 0x00000000001ff000
#define PTE64_L4_FRAME PTE64_L4_MASK
#define PTE64_L3_FRAME (PTE64_L4_FRAME|PTE64_L3_MASK)
#define PTE64_L2_FRAME (PTE64_L3_FRAME|PTE64_L2_MASK)
#define PTE64_L1_FRAME (PTE64_L2_FRAME|PTE64_L1_MASK)
#define pte64_l1idx(va) (((va) & PTE64_L1_MASK) >> PTE64_L1_SHIFT)
#define pte64_l2idx(va) (((va) & PTE64_L2_MASK) >> PTE64_L2_SHIFT)
#define pte64_l3idx(va) (((va) & PTE64_L3_MASK) >> PTE64_L3_SHIFT)
#define pte64_l4idx(va) (((va) & PTE64_L4_MASK) >> PTE64_L4_SHIFT)
#define CR3_FRAME_64BIT __BITS(51, 12)
typedef uint64_t pte_64bit_t;
static inline bool
x86_gva_64bit_canonical(gvaddr_t gva)
{
/* Bits 63:47 must have the same value. */
#define SIGN_EXTEND 0xffff800000000000ULL
return (gva & SIGN_EXTEND) == 0 || (gva & SIGN_EXTEND) == SIGN_EXTEND;
}
static int
x86_gva_to_gpa_64bit(struct nvmm_machine *mach, uint64_t cr3,
gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
{
gpaddr_t L4gpa, L3gpa, L2gpa, L1gpa;
uintptr_t L4hva, L3hva, L2hva, L1hva;
pte_64bit_t *pdir, pte;
nvmm_prot_t pageprot;
/* We begin with an RWXU access. */
*prot = NVMM_PROT_ALL;
if (!x86_gva_64bit_canonical(gva))
return -1;
/* Parse L4. */
L4gpa = (cr3 & CR3_FRAME_64BIT);
if (nvmm_gpa_to_hva(mach, L4gpa, &L4hva, &pageprot) == -1)
return -1;
pdir = (pte_64bit_t *)L4hva;
pte = pdir[pte64_l4idx(gva)];
if ((pte & PTE_P) == 0)
return -1;
if ((pte & PTE_U) == 0)
*prot &= ~NVMM_PROT_USER;
if ((pte & PTE_W) == 0)
*prot &= ~NVMM_PROT_WRITE;
if (pte & PTE_NX)
*prot &= ~NVMM_PROT_EXEC;
if (pte & PTE_PS)
return -1;
/* Parse L3. */
L3gpa = (pte & PTE_FRAME);
if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva, &pageprot) == -1)
return -1;
pdir = (pte_64bit_t *)L3hva;
pte = pdir[pte64_l3idx(gva)];
if ((pte & PTE_P) == 0)
return -1;
if ((pte & PTE_U) == 0)
*prot &= ~NVMM_PROT_USER;
if ((pte & PTE_W) == 0)
*prot &= ~NVMM_PROT_WRITE;
if (pte & PTE_NX)
*prot &= ~NVMM_PROT_EXEC;
if (pte & PTE_PS) {
*gpa = (pte & PTE64_L3_FRAME);
*gpa = *gpa + (gva & (PTE64_L2_MASK|PTE64_L1_MASK));
return 0;
}
/* Parse L2. */
L2gpa = (pte & PTE_FRAME);
if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1)
return -1;
pdir = (pte_64bit_t *)L2hva;
pte = pdir[pte64_l2idx(gva)];
if ((pte & PTE_P) == 0)
return -1;
if ((pte & PTE_U) == 0)
*prot &= ~NVMM_PROT_USER;
if ((pte & PTE_W) == 0)
*prot &= ~NVMM_PROT_WRITE;
if (pte & PTE_NX)
*prot &= ~NVMM_PROT_EXEC;
if (pte & PTE_PS) {
*gpa = (pte & PTE64_L2_FRAME);
*gpa = *gpa + (gva & PTE64_L1_MASK);
return 0;
}
/* Parse L1. */
L1gpa = (pte & PTE_FRAME);
if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1)
return -1;
pdir = (pte_64bit_t *)L1hva;
pte = pdir[pte64_l1idx(gva)];
if ((pte & PTE_P) == 0)
return -1;
if ((pte & PTE_U) == 0)
*prot &= ~NVMM_PROT_USER;
if ((pte & PTE_W) == 0)
*prot &= ~NVMM_PROT_WRITE;
if (pte & PTE_NX)
*prot &= ~NVMM_PROT_EXEC;
if (pte & PTE_PS)
return -1;
*gpa = (pte & PTE_FRAME);
return 0;
}
static inline int
x86_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_x64_state *state,
gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
{
bool is_pae, is_lng, has_pse;
uint64_t cr3;
size_t off;
int ret;
if ((state->crs[NVMM_X64_CR_CR0] & CR0_PG) == 0) {
/* No paging. */
*prot = NVMM_PROT_ALL;
*gpa = gva;
return 0;
}
off = (gva & PAGE_MASK);
gva &= ~PAGE_MASK;
is_pae = (state->crs[NVMM_X64_CR_CR4] & CR4_PAE) != 0;
is_lng = (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
has_pse = (state->crs[NVMM_X64_CR_CR4] & CR4_PSE) != 0;
cr3 = state->crs[NVMM_X64_CR_CR3];
if (is_pae && is_lng) {
/* 64bit */
ret = x86_gva_to_gpa_64bit(mach, cr3, gva, gpa, prot);
} else if (is_pae && !is_lng) {
/* 32bit PAE */
ret = x86_gva_to_gpa_32bit_pae(mach, cr3, gva, gpa, prot);
} else if (!is_pae && !is_lng) {
/* 32bit */
ret = x86_gva_to_gpa_32bit(mach, cr3, gva, gpa, has_pse, prot);
} else {
ret = -1;
}
if (ret == -1) {
errno = EFAULT;
}
*gpa = *gpa + off;
return ret;
}
int
nvmm_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
{
struct nvmm_x64_state *state = vcpu->state;
int ret;
ret = nvmm_vcpu_getstate(mach, vcpu,
NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
if (ret == -1)
return -1;
return x86_gva_to_gpa(mach, state, gva, gpa, prot);
}
/* -------------------------------------------------------------------------- */
#define DISASSEMBLER_BUG() \
do { \
errno = EINVAL; \
return -1; \
} while (0);
static inline bool
is_long_mode(struct nvmm_x64_state *state)
{
return (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
}
static inline bool
is_64bit(struct nvmm_x64_state *state)
{
return (state->segs[NVMM_X64_SEG_CS].attrib.l != 0);
}
static inline bool
is_32bit(struct nvmm_x64_state *state)
{
return (state->segs[NVMM_X64_SEG_CS].attrib.l == 0) &&
(state->segs[NVMM_X64_SEG_CS].attrib.def == 1);
}
static inline bool
is_16bit(struct nvmm_x64_state *state)
{
return (state->segs[NVMM_X64_SEG_CS].attrib.l == 0) &&
(state->segs[NVMM_X64_SEG_CS].attrib.def == 0);
}
static int
segment_check(struct nvmm_x64_state_seg *seg, gvaddr_t gva, size_t size)
{
uint64_t limit;
/*
* This is incomplete. We should check topdown, etc, really that's
* tiring.
*/
if (__predict_false(!seg->attrib.p)) {
goto error;
}
limit = (uint64_t)seg->limit + 1;
if (__predict_true(seg->attrib.g)) {
limit *= PAGE_SIZE;
}
if (__predict_false(gva + size > limit)) {
goto error;
}
return 0;
error:
errno = EFAULT;
return -1;
}
static inline void
segment_apply(struct nvmm_x64_state_seg *seg, gvaddr_t *gva)
{
*gva += seg->base;
}
static inline uint64_t
size_to_mask(size_t size)
{
switch (size) {
case 1:
return 0x00000000000000FF;
case 2:
return 0x000000000000FFFF;
case 4:
return 0x00000000FFFFFFFF;
case 8:
default:
return 0xFFFFFFFFFFFFFFFF;
}
}
static uint64_t
rep_get_cnt(struct nvmm_x64_state *state, size_t adsize)
{
uint64_t mask, cnt;
mask = size_to_mask(adsize);
cnt = state->gprs[NVMM_X64_GPR_RCX] & mask;
return cnt;
}
static void
rep_set_cnt(struct nvmm_x64_state *state, size_t adsize, uint64_t cnt)
{
uint64_t mask;
/* XXX: should we zero-extend? */
mask = size_to_mask(adsize);
state->gprs[NVMM_X64_GPR_RCX] &= ~mask;
state->gprs[NVMM_X64_GPR_RCX] |= cnt;
}
static int
read_guest_memory(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
gvaddr_t gva, uint8_t *data, size_t size)
{
struct nvmm_x64_state *state = vcpu->state;
struct nvmm_mem mem;
nvmm_prot_t prot;
gpaddr_t gpa;
uintptr_t hva;
bool is_mmio;
int ret, remain;
ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
if (__predict_false(ret == -1)) {
return -1;
}
if (__predict_false(!(prot & NVMM_PROT_READ))) {
errno = EFAULT;
return -1;
}
if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
} else {
remain = 0;
}
size -= remain;
ret = nvmm_gpa_to_hva(mach, gpa, &hva, &prot);
is_mmio = (ret == -1);
if (is_mmio) {
mem.mach = mach;
mem.vcpu = vcpu;
mem.data = data;
mem.gpa = gpa;
mem.write = false;
mem.size = size;
(*vcpu->cbs.mem)(&mem);
} else {
if (__predict_false(!(prot & NVMM_PROT_READ))) {
errno = EFAULT;
return -1;
}
memcpy(data, (uint8_t *)hva, size);
}
if (remain > 0) {
ret = read_guest_memory(mach, vcpu, gva + size,
data + size, remain);
} else {
ret = 0;
}
return ret;
}
static int
write_guest_memory(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
gvaddr_t gva, uint8_t *data, size_t size)
{
struct nvmm_x64_state *state = vcpu->state;
struct nvmm_mem mem;
nvmm_prot_t prot;
gpaddr_t gpa;
uintptr_t hva;
bool is_mmio;
int ret, remain;
ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
if (__predict_false(ret == -1)) {
return -1;
}
if (__predict_false(!(prot & NVMM_PROT_WRITE))) {
errno = EFAULT;
return -1;
}
if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
} else {
remain = 0;
}
size -= remain;
ret = nvmm_gpa_to_hva(mach, gpa, &hva, &prot);
is_mmio = (ret == -1);
if (is_mmio) {
mem.mach = mach;
mem.vcpu = vcpu;
mem.data = data;
mem.gpa = gpa;
mem.write = true;
mem.size = size;
(*vcpu->cbs.mem)(&mem);
} else {
if (__predict_false(!(prot & NVMM_PROT_WRITE))) {
errno = EFAULT;
return -1;
}
memcpy((uint8_t *)hva, data, size);
}
if (remain > 0) {
ret = write_guest_memory(mach, vcpu, gva + size,
data + size, remain);
} else {
ret = 0;
}
return ret;
}
/* -------------------------------------------------------------------------- */
static int fetch_segment(struct nvmm_machine *, struct nvmm_vcpu *);
#define NVMM_IO_BATCH_SIZE 32
static int
assist_io_batch(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
struct nvmm_io *io, gvaddr_t gva, uint64_t cnt)
{
uint8_t iobuf[NVMM_IO_BATCH_SIZE];
size_t i, iosize, iocnt;
int ret;
cnt = MIN(cnt, NVMM_IO_BATCH_SIZE);
iosize = MIN(io->size * cnt, NVMM_IO_BATCH_SIZE);
iocnt = iosize / io->size;
io->data = iobuf;
if (!io->in) {
ret = read_guest_memory(mach, vcpu, gva, iobuf, iosize);
if (ret == -1)
return -1;
}
for (i = 0; i < iocnt; i++) {
(*vcpu->cbs.io)(io);
io->data += io->size;
}
if (io->in) {
ret = write_guest_memory(mach, vcpu, gva, iobuf, iosize);
if (ret == -1)
return -1;
}
return iocnt;
}
int
nvmm_assist_io(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
{
struct nvmm_x64_state *state = vcpu->state;
struct nvmm_vcpu_exit *exit = vcpu->exit;
struct nvmm_io io;
uint64_t cnt = 0; /* GCC */
uint8_t iobuf[8];
int iocnt = 1;
gvaddr_t gva = 0; /* GCC */
int reg = 0; /* GCC */
int ret, seg;
bool psld = false;
if (__predict_false(exit->reason != NVMM_VCPU_EXIT_IO)) {
errno = EINVAL;
return -1;
}
io.mach = mach;
io.vcpu = vcpu;
io.port = exit->u.io.port;
io.in = exit->u.io.in;
io.size = exit->u.io.operand_size;
io.data = iobuf;
ret = nvmm_vcpu_getstate(mach, vcpu,
NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
if (ret == -1)
return -1;
if (exit->u.io.rep) {
cnt = rep_get_cnt(state, exit->u.io.address_size);
if (__predict_false(cnt == 0)) {
state->gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
goto out;
}
}
if (__predict_false(state->gprs[NVMM_X64_GPR_RFLAGS] & PSL_D)) {
psld = true;
}
/*
* Determine GVA.
*/
if (exit->u.io.str) {
if (io.in) {
reg = NVMM_X64_GPR_RDI;
} else {
reg = NVMM_X64_GPR_RSI;
}
gva = state->gprs[reg];
gva &= size_to_mask(exit->u.io.address_size);
if (exit->u.io.seg != -1) {
seg = exit->u.io.seg;
} else {
if (io.in) {
seg = NVMM_X64_SEG_ES;
} else {
seg = fetch_segment(mach, vcpu);
if (seg == -1)
return -1;
}
}
if (__predict_true(is_long_mode(state))) {
if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
segment_apply(&state->segs[seg], &gva);
}
} else {
ret = segment_check(&state->segs[seg], gva, io.size);
if (ret == -1)
return -1;
segment_apply(&state->segs[seg], &gva);
}
if (exit->u.io.rep && !psld) {
iocnt = assist_io_batch(mach, vcpu, &io, gva, cnt);
if (iocnt == -1)
return -1;
goto done;
}
}
if (!io.in) {
if (!exit->u.io.str) {
memcpy(io.data, &state->gprs[NVMM_X64_GPR_RAX], io.size);
} else {
ret = read_guest_memory(mach, vcpu, gva, io.data,
io.size);
if (ret == -1)
return -1;
}
}
(*vcpu->cbs.io)(&io);
if (io.in) {
if (!exit->u.io.str) {
memcpy(&state->gprs[NVMM_X64_GPR_RAX], io.data, io.size);
if (io.size == 4) {
/* Zero-extend to 64 bits. */
state->gprs[NVMM_X64_GPR_RAX] &= size_to_mask(4);
}
} else {
ret = write_guest_memory(mach, vcpu, gva, io.data,
io.size);
if (ret == -1)
return -1;
}
}
done:
if (exit->u.io.str) {
if (__predict_false(psld)) {
state->gprs[reg] -= iocnt * io.size;
} else {
state->gprs[reg] += iocnt * io.size;
}
}
if (exit->u.io.rep) {
cnt -= iocnt;
rep_set_cnt(state, exit->u.io.address_size, cnt);
if (cnt == 0) {
state->gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
}
} else {
state->gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
}
out:
ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
if (ret == -1)
return -1;
return 0;
}
/* -------------------------------------------------------------------------- */
struct x86_emul {
bool readreg;
bool backprop;
bool notouch;
void (*func)(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
};
static void x86_func_or(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
static void x86_func_and(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
static void x86_func_xchg(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
static void x86_func_sub(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
static void x86_func_xor(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
static void x86_func_cmp(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
static void x86_func_test(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
static void x86_func_mov(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
static void x86_func_stos(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
static void x86_func_lods(struct nvmm_vcpu *, struct nvmm_mem *, uint64_t *);
static const struct x86_emul x86_emul_or = {
.readreg = true,
.func = x86_func_or
};
static const struct x86_emul x86_emul_and = {
.readreg = true,
.func = x86_func_and
};
static const struct x86_emul x86_emul_xchg = {
.readreg = true,
.backprop = true,
.func = x86_func_xchg
};
static const struct x86_emul x86_emul_sub = {
.readreg = true,
.func = x86_func_sub
};
static const struct x86_emul x86_emul_xor = {
.readreg = true,
.func = x86_func_xor
};
static const struct x86_emul x86_emul_cmp = {
.notouch = true,
.func = x86_func_cmp
};
static const struct x86_emul x86_emul_test = {
.notouch = true,
.func = x86_func_test
};
static const struct x86_emul x86_emul_mov = {
.func = x86_func_mov
};
static const struct x86_emul x86_emul_stos = {
.func = x86_func_stos
};
static const struct x86_emul x86_emul_lods = {
.func = x86_func_lods
};
/* Legacy prefixes. */
#define LEG_LOCK 0xF0
#define LEG_REPN 0xF2
#define LEG_REP 0xF3
#define LEG_OVR_CS 0x2E
#define LEG_OVR_SS 0x36
#define LEG_OVR_DS 0x3E
#define LEG_OVR_ES 0x26
#define LEG_OVR_FS 0x64
#define LEG_OVR_GS 0x65
#define LEG_OPR_OVR 0x66
#define LEG_ADR_OVR 0x67
struct x86_legpref {
bool opr_ovr:1;
bool adr_ovr:1;
bool rep:1;
bool repn:1;
bool repe:1;
int8_t seg;
};
struct x86_rexpref {
bool b:1;
bool x:1;
bool r:1;
bool w:1;
bool present:1;
};
struct x86_reg {
int num; /* NVMM GPR state index */
uint64_t mask;
};
struct x86_dualreg {
int reg1;
int reg2;
};
enum x86_disp_type {
DISP_NONE,
DISP_0,
DISP_1,
DISP_2,
DISP_4
};
struct x86_disp {
enum x86_disp_type type;
uint64_t data; /* 4 bytes, but can be sign-extended */
};
struct x86_regmodrm {
uint8_t mod:2;
uint8_t reg:3;
uint8_t rm:3;
};
struct x86_immediate {
uint64_t data;
};
struct x86_sib {
uint8_t scale;
const struct x86_reg *idx;
const struct x86_reg *bas;
};
enum x86_store_type {
STORE_NONE,
STORE_REG,
STORE_DUALREG,
STORE_IMM,
STORE_SIB,
STORE_DMO
};
struct x86_store {
enum x86_store_type type;
union {
const struct x86_reg *reg;
struct x86_dualreg dualreg;
struct x86_immediate imm;
struct x86_sib sib;
uint64_t dmo;
} u;
struct x86_disp disp;
int hardseg;
};
struct x86_instr {
uint8_t len;
struct x86_legpref legpref;
struct x86_rexpref rexpref;
struct x86_regmodrm regmodrm;
uint8_t operand_size;
uint8_t address_size;
uint64_t zeroextend_mask;
const struct x86_opcode *opcode;
const struct x86_emul *emul;
struct x86_store src;
struct x86_store dst;
struct x86_store *strm;
};
struct x86_decode_fsm {
/* vcpu */
bool is64bit;
bool is32bit;
bool is16bit;
/* fsm */
int (*fn)(struct x86_decode_fsm *, struct x86_instr *);
uint8_t *buf;
uint8_t *end;
};
struct x86_opcode {
bool valid:1;
bool regmodrm:1;
bool regtorm:1;
bool dmo:1;
bool todmo:1;
bool movs:1;
bool cmps:1;
bool stos:1;
bool lods:1;
bool szoverride:1;
bool group1:1;
bool group3:1;
bool group11:1;
bool immediate:1;
uint8_t defsize;
uint8_t flags;
const struct x86_emul *emul;
};
struct x86_group_entry {
const struct x86_emul *emul;
};
#define OPSIZE_BYTE 0x01
#define OPSIZE_WORD 0x02 /* 2 bytes */
#define OPSIZE_DOUB 0x04 /* 4 bytes */
#define OPSIZE_QUAD 0x08 /* 8 bytes */
#define FLAG_imm8 0x01
#define FLAG_immz 0x02
#define FLAG_ze 0x04
static const struct x86_group_entry group1[8] __cacheline_aligned = {
[1] = { .emul = &x86_emul_or },
[4] = { .emul = &x86_emul_and },
[6] = { .emul = &x86_emul_xor },
[7] = { .emul = &x86_emul_cmp }
};
static const struct x86_group_entry group3[8] __cacheline_aligned = {
[0] = { .emul = &x86_emul_test },
[1] = { .emul = &x86_emul_test }
};
static const struct x86_group_entry group11[8] __cacheline_aligned = {
[0] = { .emul = &x86_emul_mov }
};
static const struct x86_opcode primary_opcode_table[256] __cacheline_aligned = {
/*
* Group1
*/
[0x80] = {
/* Eb, Ib */
.valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
.group1 = true,
.immediate = true,
.emul = NULL /* group1 */
},
[0x81] = {
/* Ev, Iz */
.valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = true,
.defsize = -1,
.group1 = true,
.immediate = true,
.flags = FLAG_immz,
.emul = NULL /* group1 */
},
[0x83] = {
/* Ev, Ib */
.valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = true,
.defsize = -1,
.group1 = true,
.immediate = true,
.flags = FLAG_imm8,
.emul = NULL /* group1 */
},
/*
* Group3
*/
[0xF6] = {
/* Eb, Ib */
.valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
.group3 = true,
.immediate = true,
.emul = NULL /* group3 */
},
[0xF7] = {
/* Ev, Iz */
.valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = true,
.defsize = -1,
.group3 = true,
.immediate = true,
.flags = FLAG_immz,
.emul = NULL /* group3 */
},
/*
* Group11
*/
[0xC6] = {
/* Eb, Ib */
.valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
.group11 = true,
.immediate = true,
.emul = NULL /* group11 */
},
[0xC7] = {
/* Ev, Iz */
.valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = true,
.defsize = -1,
.group11 = true,
.immediate = true,
.flags = FLAG_immz,
.emul = NULL /* group11 */
},
/*
* OR
*/
[0x08] = {
/* Eb, Gb */
.valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
.emul = &x86_emul_or
},
[0x09] = {
/* Ev, Gv */
.valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = true,
.defsize = -1,
.emul = &x86_emul_or
},
[0x0A] = {
/* Gb, Eb */
.valid = true,
.regmodrm = true,
.regtorm = false,
.szoverride = false,
.defsize = OPSIZE_BYTE,
.emul = &x86_emul_or
},
[0x0B] = {
/* Gv, Ev */
.valid = true,
.regmodrm = true,
.regtorm = false,
.szoverride = true,
.defsize = -1,
.emul = &x86_emul_or
},
/*
* AND
*/
[0x20] = {
/* Eb, Gb */
.valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
.emul = &x86_emul_and
},
[0x21] = {
/* Ev, Gv */
.valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = true,
.defsize = -1,
.emul = &x86_emul_and
},
[0x22] = {
/* Gb, Eb */
.valid = true,
.regmodrm = true,
.regtorm = false,
.szoverride = false,
.defsize = OPSIZE_BYTE,
.emul = &x86_emul_and
},
[0x23] = {
/* Gv, Ev */
.valid = true,
.regmodrm = true,
.regtorm = false,
.szoverride = true,
.defsize = -1,
.emul = &x86_emul_and
},
/*
* SUB
*/
[0x28] = {
/* Eb, Gb */
.valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
.emul = &x86_emul_sub
},
[0x29] = {
/* Ev, Gv */
.valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = true,
.defsize = -1,
.emul = &x86_emul_sub
},
[0x2A] = {
/* Gb, Eb */
.valid = true,
.regmodrm = true,
.regtorm = false,
.szoverride = false,
.defsize = OPSIZE_BYTE,
.emul = &x86_emul_sub
},
[0x2B] = {
/* Gv, Ev */
.valid = true,
.regmodrm = true,
.regtorm = false,
.szoverride = true,
.defsize = -1,
.emul = &x86_emul_sub
},
/*
* XOR
*/
[0x30] = {
/* Eb, Gb */
.valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
.emul = &x86_emul_xor
},
[0x31] = {
/* Ev, Gv */
.valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = true,
.defsize = -1,
.emul = &x86_emul_xor
},
[0x32] = {
/* Gb, Eb */
.valid = true,
.regmodrm = true,
.regtorm = false,
.szoverride = false,
.defsize = OPSIZE_BYTE,
.emul = &x86_emul_xor
},
[0x33] = {
/* Gv, Ev */
.valid = true,
.regmodrm = true,
.regtorm = false,
.szoverride = true,
.defsize = -1,
.emul = &x86_emul_xor
},
/*
* XCHG
*/
[0x86] = {
/* Eb, Gb */
.valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
.emul = &x86_emul_xchg
},
[0x87] = {
/* Ev, Gv */
.valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = true,
.defsize = -1,
.emul = &x86_emul_xchg
},
/*
* MOV
*/
[0x88] = {
/* Eb, Gb */
.valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
.emul = &x86_emul_mov
},
[0x89] = {
/* Ev, Gv */
.valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = true,
.defsize = -1,
.emul = &x86_emul_mov
},
[0x8A] = {
/* Gb, Eb */
.valid = true,
.regmodrm = true,
.regtorm = false,
.szoverride = false,
.defsize = OPSIZE_BYTE,
.emul = &x86_emul_mov
},
[0x8B] = {
/* Gv, Ev */
.valid = true,
.regmodrm = true,
.regtorm = false,
.szoverride = true,
.defsize = -1,
.emul = &x86_emul_mov
},
[0xA0] = {
/* AL, Ob */
.valid = true,
.dmo = true,
.todmo = false,
.szoverride = false,
.defsize = OPSIZE_BYTE,
.emul = &x86_emul_mov
},
[0xA1] = {
/* rAX, Ov */
.valid = true,
.dmo = true,
.todmo = false,
.szoverride = true,
.defsize = -1,
.emul = &x86_emul_mov
},
[0xA2] = {
/* Ob, AL */
.valid = true,
.dmo = true,
.todmo = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
.emul = &x86_emul_mov
},
[0xA3] = {
/* Ov, rAX */
.valid = true,
.dmo = true,
.todmo = true,
.szoverride = true,
.defsize = -1,
.emul = &x86_emul_mov
},
/*
* MOVS
*/
[0xA4] = {
/* Yb, Xb */
.valid = true,
.movs = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
.emul = NULL
},
[0xA5] = {
/* Yv, Xv */
.valid = true,
.movs = true,
.szoverride = true,
.defsize = -1,
.emul = NULL
},
/*
* CMPS
*/
[0xA6] = {
/* Yb, Xb */
.valid = true,
.cmps = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
.emul = NULL
},
[0xA7] = {
/* Yv, Xv */
.valid = true,
.cmps = true,
.szoverride = true,
.defsize = -1,
.emul = NULL
},
/*
* STOS
*/
[0xAA] = {
/* Yb, AL */
.valid = true,
.stos = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
.emul = &x86_emul_stos
},
[0xAB] = {
/* Yv, rAX */
.valid = true,
.stos = true,
.szoverride = true,
.defsize = -1,
.emul = &x86_emul_stos
},
/*
* LODS
*/
[0xAC] = {
/* AL, Xb */
.valid = true,
.lods = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
.emul = &x86_emul_lods
},
[0xAD] = {
/* rAX, Xv */
.valid = true,
.lods = true,
.szoverride = true,
.defsize = -1,
.emul = &x86_emul_lods
},
};
static const struct x86_opcode secondary_opcode_table[256] __cacheline_aligned = {
/*
* MOVZX
*/
[0xB6] = {
/* Gv, Eb */
.valid = true,
.regmodrm = true,
.regtorm = false,
.szoverride = true,
.defsize = OPSIZE_BYTE,
.flags = FLAG_ze,
.emul = &x86_emul_mov
},
[0xB7] = {
/* Gv, Ew */
.valid = true,
.regmodrm = true,
.regtorm = false,
.szoverride = true,
.defsize = OPSIZE_WORD,
.flags = FLAG_ze,
.emul = &x86_emul_mov
},
};
static const struct x86_reg gpr_map__rip = { NVMM_X64_GPR_RIP, 0xFFFFFFFFFFFFFFFF };
/* [REX-present][enc][opsize] */
static const struct x86_reg gpr_map__special[2][4][8] __cacheline_aligned = {
[false] = {
/* No REX prefix. */
[0b00] = {
[0] = { NVMM_X64_GPR_RAX, 0x000000000000FF00 }, /* AH */
[1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
[2] = { -1, 0 },
[3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
[4] = { -1, 0 },
[5] = { -1, 0 },
[6] = { -1, 0 },
[7] = { -1, 0 },
},
[0b01] = {
[0] = { NVMM_X64_GPR_RCX, 0x000000000000FF00 }, /* CH */
[1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
[2] = { -1, 0 },
[3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
[4] = { -1, 0 },
[5] = { -1, 0 },
[6] = { -1, 0 },
[7] = { -1, 0 },
},
[0b10] = {
[0] = { NVMM_X64_GPR_RDX, 0x000000000000FF00 }, /* DH */
[1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
[2] = { -1, 0 },
[3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
[4] = { -1, 0 },
[5] = { -1, 0 },
[6] = { -1, 0 },
[7] = { -1, 0 },
},
[0b11] = {
[0] = { NVMM_X64_GPR_RBX, 0x000000000000FF00 }, /* BH */
[1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
[2] = { -1, 0 },
[3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
[4] = { -1, 0 },
[5] = { -1, 0 },
[6] = { -1, 0 },
[7] = { -1, 0 },
}
},
[true] = {
/* Has REX prefix. */
[0b00] = {
[0] = { NVMM_X64_GPR_RSP, 0x00000000000000FF }, /* SPL */
[1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
[2] = { -1, 0 },
[3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
[4] = { -1, 0 },
[5] = { -1, 0 },
[6] = { -1, 0 },
[7] = { NVMM_X64_GPR_RSP, 0xFFFFFFFFFFFFFFFF }, /* RSP */
},
[0b01] = {
[0] = { NVMM_X64_GPR_RBP, 0x00000000000000FF }, /* BPL */
[1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
[2] = { -1, 0 },
[3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
[4] = { -1, 0 },
[5] = { -1, 0 },
[6] = { -1, 0 },
[7] = { NVMM_X64_GPR_RBP, 0xFFFFFFFFFFFFFFFF }, /* RBP */
},
[0b10] = {
[0] = { NVMM_X64_GPR_RSI, 0x00000000000000FF }, /* SIL */
[1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
[2] = { -1, 0 },
[3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
[4] = { -1, 0 },
[5] = { -1, 0 },
[6] = { -1, 0 },
[7] = { NVMM_X64_GPR_RSI, 0xFFFFFFFFFFFFFFFF }, /* RSI */
},
[0b11] = {
[0] = { NVMM_X64_GPR_RDI, 0x00000000000000FF }, /* DIL */
[1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
[2] = { -1, 0 },
[3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
[4] = { -1, 0 },
[5] = { -1, 0 },
[6] = { -1, 0 },
[7] = { NVMM_X64_GPR_RDI, 0xFFFFFFFFFFFFFFFF }, /* RDI */
}
}
};
/* [depends][enc][size] */
static const struct x86_reg gpr_map[2][8][8] __cacheline_aligned = {
[false] = {
/* Not extended. */
[0b000] = {
[0] = { NVMM_X64_GPR_RAX, 0x00000000000000FF }, /* AL */
[1] = { NVMM_X64_GPR_RAX, 0x000000000000FFFF }, /* AX */
[2] = { -1, 0 },
[3] = { NVMM_X64_GPR_RAX, 0x00000000FFFFFFFF }, /* EAX */
[4] = { -1, 0 },
[5] = { -1, 0 },
[6] = { -1, 0 },
[7] = { NVMM_X64_GPR_RAX, 0xFFFFFFFFFFFFFFFF }, /* RAX */
},
[0b001] = {
[0] = { NVMM_X64_GPR_RCX, 0x00000000000000FF }, /* CL */
[1] = { NVMM_X64_GPR_RCX, 0x000000000000FFFF }, /* CX */
[2] = { -1, 0 },
[3] = { NVMM_X64_GPR_RCX, 0x00000000FFFFFFFF }, /* ECX */
[4] = { -1, 0 },
[5] = { -1, 0 },
[6] = { -1, 0 },
[7] = { NVMM_X64_GPR_RCX, 0xFFFFFFFFFFFFFFFF }, /* RCX */
},
[0b010] = {
[0] = { NVMM_X64_GPR_RDX, 0x00000000000000FF }, /* DL */
[1] = { NVMM_X64_GPR_RDX, 0x000000000000FFFF }, /* DX */
[2] = { -1, 0 },
[3] = { NVMM_X64_GPR_RDX, 0x00000000FFFFFFFF }, /* EDX */
[4] = { -1, 0 },
[5] = { -1, 0 },
[6] = { -1, 0 },
[7] = { NVMM_X64_GPR_RDX, 0xFFFFFFFFFFFFFFFF }, /* RDX */
},
[0b011] = {
[0] = { NVMM_X64_GPR_RBX, 0x00000000000000FF }, /* BL */
[1] = { NVMM_X64_GPR_RBX, 0x000000000000FFFF }, /* BX */
[2] = { -1, 0 },
[3] = { NVMM_X64_GPR_RBX, 0x00000000FFFFFFFF }, /* EBX */
[4] = { -1, 0 },
[5] = { -1, 0 },
[6] = { -1, 0 },
[7] = { NVMM_X64_GPR_RBX, 0xFFFFFFFFFFFFFFFF }, /* RBX */
},
[0b100] = {
[0] = { -1, 0 }, /* SPECIAL */
[1] = { -1, 0 }, /* SPECIAL */
[2] = { -1, 0 },
[3] = { -1, 0 }, /* SPECIAL */
[4] = { -1, 0 },
[5] = { -1, 0 },
[6] = { -1, 0 },
[7] = { -1, 0 }, /* SPECIAL */
},
[0b101] = {
[0] = { -1, 0 }, /* SPECIAL */
[1] = { -1, 0 }, /* SPECIAL */
[2] = { -1, 0 },
[3] = { -1, 0 }, /* SPECIAL */
[4] = { -1, 0 },
[5] = { -1, 0 },
[6] = { -1, 0 },
[7] = { -1, 0 }, /* SPECIAL */
},
[0b110] = {
[0] = { -1, 0 }, /* SPECIAL */
[1] = { -1, 0 }, /* SPECIAL */
[2] = { -1, 0 },
[3] = { -1, 0 }, /* SPECIAL */
[4] = { -1, 0 },
[5] = { -1, 0 },
[6] = { -1, 0 },
[7] = { -1, 0 }, /* SPECIAL */
},
[0b111] = {
[0] = { -1, 0 }, /* SPECIAL */
[1] = { -1, 0 }, /* SPECIAL */
[2] = { -1, 0 },
[3] = { -1, 0 }, /* SPECIAL */
[4] = { -1, 0 },
[5] = { -1, 0 },
[6] = { -1, 0 },
[7] = { -1, 0 }, /* SPECIAL */
},
},
[true] = {
/* Extended. */
[0b000] = {
[0] = { NVMM_X64_GPR_R8, 0x00000000000000FF }, /* R8B */
[1] = { NVMM_X64_GPR_R8, 0x000000000000FFFF }, /* R8W */
[2] = { -1, 0 },
[3] = { NVMM_X64_GPR_R8, 0x00000000FFFFFFFF }, /* R8D */
[4] = { -1, 0 },
[5] = { -1, 0 },
[6] = { -1, 0 },
[7] = { NVMM_X64_GPR_R8, 0xFFFFFFFFFFFFFFFF }, /* R8 */
},
[0b001] = {
[0] = { NVMM_X64_GPR_R9, 0x00000000000000FF }, /* R9B */
[1] = { NVMM_X64_GPR_R9, 0x000000000000FFFF }, /* R9W */
[2] = { -1, 0 },
[3] = { NVMM_X64_GPR_R9, 0x00000000FFFFFFFF }, /* R9D */
[4] = { -1, 0 },
[5] = { -1, 0 },
[6] = { -1, 0 },
[7] = { NVMM_X64_GPR_R9, 0xFFFFFFFFFFFFFFFF }, /* R9 */
},
[0b010] = {
[0] = { NVMM_X64_GPR_R10, 0x00000000000000FF }, /* R10B */
[1] = { NVMM_X64_GPR_R10, 0x000000000000FFFF }, /* R10W */
[2] = { -1, 0 },
[3] = { NVMM_X64_GPR_R10, 0x00000000FFFFFFFF }, /* R10D */
[4] = { -1, 0 },
[5] = { -1, 0 },
[6] = { -1, 0 },
[7] = { NVMM_X64_GPR_R10, 0xFFFFFFFFFFFFFFFF }, /* R10 */
},
[0b011] = {
[0] = { NVMM_X64_GPR_R11, 0x00000000000000FF }, /* R11B */
[1] = { NVMM_X64_GPR_R11, 0x000000000000FFFF }, /* R11W */
[2] = { -1, 0 },
[3] = { NVMM_X64_GPR_R11, 0x00000000FFFFFFFF }, /* R11D */
[4] = { -1, 0 },
[5] = { -1, 0 },
[6] = { -1, 0 },
[7] = { NVMM_X64_GPR_R11, 0xFFFFFFFFFFFFFFFF }, /* R11 */
},
[0b100] = {
[0] = { NVMM_X64_GPR_R12, 0x00000000000000FF }, /* R12B */
[1] = { NVMM_X64_GPR_R12, 0x000000000000FFFF }, /* R12W */
[2] = { -1, 0 },
[3] = { NVMM_X64_GPR_R12, 0x00000000FFFFFFFF }, /* R12D */
[4] = { -1, 0 },
[5] = { -1, 0 },
[6] = { -1, 0 },
[7] = { NVMM_X64_GPR_R12, 0xFFFFFFFFFFFFFFFF }, /* R12 */
},
[0b101] = {
[0] = { NVMM_X64_GPR_R13, 0x00000000000000FF }, /* R13B */
[1] = { NVMM_X64_GPR_R13, 0x000000000000FFFF }, /* R13W */
[2] = { -1, 0 },
[3] = { NVMM_X64_GPR_R13, 0x00000000FFFFFFFF }, /* R13D */
[4] = { -1, 0 },
[5] = { -1, 0 },
[6] = { -1, 0 },
[7] = { NVMM_X64_GPR_R13, 0xFFFFFFFFFFFFFFFF }, /* R13 */
},
[0b110] = {
[0] = { NVMM_X64_GPR_R14, 0x00000000000000FF }, /* R14B */
[1] = { NVMM_X64_GPR_R14, 0x000000000000FFFF }, /* R14W */
[2] = { -1, 0 },
[3] = { NVMM_X64_GPR_R14, 0x00000000FFFFFFFF }, /* R14D */
[4] = { -1, 0 },
[5] = { -1, 0 },
[6] = { -1, 0 },
[7] = { NVMM_X64_GPR_R14, 0xFFFFFFFFFFFFFFFF }, /* R14 */
},
[0b111] = {
[0] = { NVMM_X64_GPR_R15, 0x00000000000000FF }, /* R15B */
[1] = { NVMM_X64_GPR_R15, 0x000000000000FFFF }, /* R15W */
[2] = { -1, 0 },
[3] = { NVMM_X64_GPR_R15, 0x00000000FFFFFFFF }, /* R15D */
[4] = { -1, 0 },
[5] = { -1, 0 },
[6] = { -1, 0 },
[7] = { NVMM_X64_GPR_R15, 0xFFFFFFFFFFFFFFFF }, /* R15 */
},
}
};
/* [enc] */
static const int gpr_dual_reg1_rm[8] __cacheline_aligned = {
[0b000] = NVMM_X64_GPR_RBX, /* BX (+SI) */
[0b001] = NVMM_X64_GPR_RBX, /* BX (+DI) */
[0b010] = NVMM_X64_GPR_RBP, /* BP (+SI) */
[0b011] = NVMM_X64_GPR_RBP, /* BP (+DI) */
[0b100] = NVMM_X64_GPR_RSI, /* SI */
[0b101] = NVMM_X64_GPR_RDI, /* DI */
[0b110] = NVMM_X64_GPR_RBP, /* BP */
[0b111] = NVMM_X64_GPR_RBX, /* BX */
};
static int
node_overflow(struct x86_decode_fsm *fsm, struct x86_instr *instr)
{
fsm->fn = NULL;
return -1;
}
static int
fsm_read(struct x86_decode_fsm *fsm, uint8_t *bytes, size_t n)
{
if (fsm->buf + n > fsm->end) {
return -1;
}
memcpy(bytes, fsm->buf, n);
return 0;
}
static inline void
fsm_advance(struct x86_decode_fsm *fsm, size_t n,
int (*fn)(struct x86_decode_fsm *, struct x86_instr *))
{
fsm->buf += n;
if (fsm->buf > fsm->end) {
fsm->fn = node_overflow;
} else {
fsm->fn = fn;
}
}
static const struct x86_reg *
resolve_special_register(struct x86_instr *instr, uint8_t enc, size_t regsize)
{
enc &= 0b11;
if (regsize == 8) {
/* May be 64bit without REX */
return &gpr_map__special[1][enc][regsize-1];
}
return &gpr_map__special[instr->rexpref.present][enc][regsize-1];
}
/*
* Special node, for MOVS. Fake two displacements of zero on the source and
* destination registers.
*/
static int
node_movs(struct x86_decode_fsm *fsm, struct x86_instr *instr)
{
size_t adrsize;
adrsize = instr->address_size;
/* DS:RSI */
instr->src.type = STORE_REG;
instr->src.u.reg = &gpr_map__special[1][2][adrsize-1];
instr->src.disp.type = DISP_0;
/* ES:RDI, force ES */
instr->dst.type = STORE_REG;
instr->dst.u.reg = &gpr_map__special[1][3][adrsize-1];
instr->dst.disp.type = DISP_0;
instr->dst.hardseg = NVMM_X64_SEG_ES;
fsm_advance(fsm, 0, NULL);
return 0;
}
/*
* Special node, for CMPS. Fake two displacements of zero on the source and
* destination registers.
* XXX coded as clone of movs as its similar in register usage
* XXX might be merged with node_movs()
*/
static int
node_cmps(struct x86_decode_fsm *fsm, struct x86_instr *instr)
{
size_t adrsize;
adrsize = instr->address_size;
/* DS:RSI */
instr->src.type = STORE_REG;
instr->src.u.reg = &gpr_map__special[1][2][adrsize-1];
instr->src.disp.type = DISP_0;
/* ES:RDI, force ES */
instr->dst.type = STORE_REG;
instr->dst.u.reg = &gpr_map__special[1][3][adrsize-1];
instr->dst.disp.type = DISP_0;
instr->dst.hardseg = NVMM_X64_SEG_ES;
fsm_advance(fsm, 0, NULL);
return 0;
}
/*
* Special node, for STOS and LODS. Fake a displacement of zero on the
* destination register.
*/
static int
node_stlo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
{
const struct x86_opcode *opcode = instr->opcode;
struct x86_store *stlo, *streg;
size_t adrsize, regsize;
adrsize = instr->address_size;
regsize = instr->operand_size;
if (opcode->stos) {
streg = &instr->src;
stlo = &instr->dst;
} else {
streg = &instr->dst;
stlo = &instr->src;
}
streg->type = STORE_REG;
streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
stlo->type = STORE_REG;
if (opcode->stos) {
/* ES:RDI, force ES */
stlo->u.reg = &gpr_map__special[1][3][adrsize-1];
stlo->hardseg = NVMM_X64_SEG_ES;
} else {
/* DS:RSI */
stlo->u.reg = &gpr_map__special[1][2][adrsize-1];
}
stlo->disp.type = DISP_0;
fsm_advance(fsm, 0, NULL);
return 0;
}
static int
node_dmo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
{
const struct x86_opcode *opcode = instr->opcode;
struct x86_store *stdmo, *streg;
size_t adrsize, regsize;
adrsize = instr->address_size;
regsize = instr->operand_size;
if (opcode->todmo) {
streg = &instr->src;
stdmo = &instr->dst;
} else {
streg = &instr->dst;
stdmo = &instr->src;
}
streg->type = STORE_REG;
streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
stdmo->type = STORE_DMO;
if (fsm_read(fsm, (uint8_t *)&stdmo->u.dmo, adrsize) == -1) {
return -1;
}
fsm_advance(fsm, adrsize, NULL);
return 0;
}
static inline uint64_t
sign_extend(uint64_t val, int size)
{
if (size == 1) {
if (val & __BIT(7))
val |= 0xFFFFFFFFFFFFFF00;
} else if (size == 2) {
if (val & __BIT(15))
val |= 0xFFFFFFFFFFFF0000;
} else if (size == 4) {
if (val & __BIT(31))
val |= 0xFFFFFFFF00000000;
}
return val;
}
static int
node_immediate(struct x86_decode_fsm *fsm, struct x86_instr *instr)
{
const struct x86_opcode *opcode = instr->opcode;
struct x86_store *store;
uint8_t immsize;
size_t sesize = 0;
/* The immediate is the source */
store = &instr->src;
immsize = instr->operand_size;
if (opcode->flags & FLAG_imm8) {
sesize = immsize;
immsize = 1;
} else if ((opcode->flags & FLAG_immz) && (immsize == 8)) {
sesize = immsize;
immsize = 4;
}
store->type = STORE_IMM;
if (fsm_read(fsm, (uint8_t *)&store->u.imm.data, immsize) == -1) {
return -1;
}
fsm_advance(fsm, immsize, NULL);
if (sesize != 0) {
store->u.imm.data = sign_extend(store->u.imm.data, sesize);
}
return 0;
}
static int
node_disp(struct x86_decode_fsm *fsm, struct x86_instr *instr)
{
const struct x86_opcode *opcode = instr->opcode;
uint64_t data = 0;
size_t n;
if (instr->strm->disp.type == DISP_1) {
n = 1;
} else if (instr->strm->disp.type == DISP_2) {
n = 2;
} else if (instr->strm->disp.type == DISP_4) {
n = 4;
} else {
DISASSEMBLER_BUG();
}
if (fsm_read(fsm, (uint8_t *)&data, n) == -1) {
return -1;
}
if (__predict_true(fsm->is64bit)) {
data = sign_extend(data, n);
}
instr->strm->disp.data = data;
if (opcode->immediate) {
fsm_advance(fsm, n, node_immediate);
} else {
fsm_advance(fsm, n, NULL);
}
return 0;
}
/*
* Special node to handle 16bit addressing encoding, which can reference two
* registers at once.
*/
static int
node_dual(struct x86_decode_fsm *fsm, struct x86_instr *instr)
{
int reg1, reg2;
reg1 = gpr_dual_reg1_rm[instr->regmodrm.rm];
if (instr->regmodrm.rm == 0b000 ||
instr->regmodrm.rm == 0b010) {
reg2 = NVMM_X64_GPR_RSI;
} else if (instr->regmodrm.rm == 0b001 ||
instr->regmodrm.rm == 0b011) {
reg2 = NVMM_X64_GPR_RDI;
} else {
DISASSEMBLER_BUG();
}
instr->strm->type = STORE_DUALREG;
instr->strm->u.dualreg.reg1 = reg1;
instr->strm->u.dualreg.reg2 = reg2;
if (instr->strm->disp.type == DISP_NONE) {
DISASSEMBLER_BUG();
} else if (instr->strm->disp.type == DISP_0) {
/* Indirect register addressing mode */
if (instr->opcode->immediate) {
fsm_advance(fsm, 1, node_immediate);
} else {
fsm_advance(fsm, 1, NULL);
}
} else {
fsm_advance(fsm, 1, node_disp);
}
return 0;
}
static const struct x86_reg *
get_register_idx(struct x86_instr *instr, uint8_t index)
{
uint8_t enc = index;
const struct x86_reg *reg;
size_t regsize;
regsize = instr->address_size;
reg = &gpr_map[instr->rexpref.x][enc][regsize-1];
if (reg->num == -1) {
reg = resolve_special_register(instr, enc, regsize);
}
return reg;
}
static const struct x86_reg *
get_register_bas(struct x86_instr *instr, uint8_t base)
{
uint8_t enc = base;
const struct x86_reg *reg;
size_t regsize;
regsize = instr->address_size;
reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
if (reg->num == -1) {
reg = resolve_special_register(instr, enc, regsize);
}
return reg;
}
static int
node_sib(struct x86_decode_fsm *fsm, struct x86_instr *instr)
{
const struct x86_opcode *opcode;
uint8_t scale, index, base;
bool noindex, nobase;
uint8_t byte;
if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
return -1;
}
scale = ((byte & 0b11000000) >> 6);
index = ((byte & 0b00111000) >> 3);
base = ((byte & 0b00000111) >> 0);
opcode = instr->opcode;
noindex = false;
nobase = false;
if (index == 0b100 && !instr->rexpref.x) {
/* Special case: the index is null */
noindex = true;
}
if (instr->regmodrm.mod == 0b00 && base == 0b101) {
/* Special case: the base is null + disp32 */
instr->strm->disp.type = DISP_4;
nobase = true;
}
instr->strm->type = STORE_SIB;
instr->strm->u.sib.scale = (1 << scale);
if (!noindex)
instr->strm->u.sib.idx = get_register_idx(instr, index);
if (!nobase)
instr->strm->u.sib.bas = get_register_bas(instr, base);
/* May have a displacement, or an immediate */
if (instr->strm->disp.type == DISP_1 ||
instr->strm->disp.type == DISP_2 ||
instr->strm->disp.type == DISP_4) {
fsm_advance(fsm, 1, node_disp);
} else if (opcode->immediate) {
fsm_advance(fsm, 1, node_immediate);
} else {
fsm_advance(fsm, 1, NULL);
}
return 0;
}
static const struct x86_reg *
get_register_reg(struct x86_instr *instr, const struct x86_opcode *opcode)
{
uint8_t enc = instr->regmodrm.reg;
const struct x86_reg *reg;
size_t regsize;
regsize = instr->operand_size;
reg = &gpr_map[instr->rexpref.r][enc][regsize-1];
if (reg->num == -1) {
reg = resolve_special_register(instr, enc, regsize);
}
return reg;
}
static const struct x86_reg *
get_register_rm(struct x86_instr *instr, const struct x86_opcode *opcode)
{
uint8_t enc = instr->regmodrm.rm;
const struct x86_reg *reg;
size_t regsize;
if (instr->strm->disp.type == DISP_NONE) {
regsize = instr->operand_size;
} else {
/* Indirect access, the size is that of the address. */
regsize = instr->address_size;
}
reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
if (reg->num == -1) {
reg = resolve_special_register(instr, enc, regsize);
}
return reg;
}
static inline bool
has_sib(struct x86_instr *instr)
{
return (instr->address_size != 2 && /* no SIB in 16bit addressing */
instr->regmodrm.mod != 0b11 &&
instr->regmodrm.rm == 0b100);
}
static inline bool
is_rip_relative(struct x86_decode_fsm *fsm, struct x86_instr *instr)
{
return (fsm->is64bit && /* RIP-relative only in 64bit mode */
instr->regmodrm.mod == 0b00 &&
instr->regmodrm.rm == 0b101);
}
static inline bool
is_disp32_only(struct x86_decode_fsm *fsm, struct x86_instr *instr)
{
return (!fsm->is64bit && /* no disp32-only in 64bit mode */
instr->address_size != 2 && /* no disp32-only in 16bit addressing */
instr->regmodrm.mod == 0b00 &&
instr->regmodrm.rm == 0b101);
}
static inline bool
is_disp16_only(struct x86_decode_fsm *fsm, struct x86_instr *instr)
{
return (instr->address_size == 2 && /* disp16-only only in 16bit addr */
instr->regmodrm.mod == 0b00 &&
instr->regmodrm.rm == 0b110);
}
static inline bool
is_dual(struct x86_decode_fsm *fsm, struct x86_instr *instr)
{
return (instr->address_size == 2 &&
instr->regmodrm.mod != 0b11 &&
instr->regmodrm.rm <= 0b011);
}
static enum x86_disp_type
get_disp_type(struct x86_instr *instr)
{
switch (instr->regmodrm.mod) {
case 0b00: /* indirect */
return DISP_0;
case 0b01: /* indirect+1 */
return DISP_1;
case 0b10: /* indirect+{2,4} */
if (__predict_false(instr->address_size == 2)) {
return DISP_2;
}
return DISP_4;
case 0b11: /* direct */
default: /* llvm */
return DISP_NONE;
}
__unreachable();
}
static int
node_regmodrm(struct x86_decode_fsm *fsm, struct x86_instr *instr)
{
struct x86_store *strg, *strm;
const struct x86_opcode *opcode;
const struct x86_reg *reg;
uint8_t byte;
if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
return -1;
}
opcode = instr->opcode;
instr->regmodrm.rm = ((byte & 0b00000111) >> 0);
instr->regmodrm.reg = ((byte & 0b00111000) >> 3);
instr->regmodrm.mod = ((byte & 0b11000000) >> 6);
if (opcode->regtorm) {
strg = &instr->src;
strm = &instr->dst;
} else { /* RM to REG */
strm = &instr->src;
strg = &instr->dst;
}
/* Save for later use. */
instr->strm = strm;
/*
* Special cases: Groups. The REG field of REGMODRM is the index in
* the group. op1 gets overwritten in the Immediate node, if any.
*/
if (opcode->group1) {
if (group1[instr->regmodrm.reg].emul == NULL) {
return -1;
}
instr->emul = group1[instr->regmodrm.reg].emul;
} else if (opcode->group3) {
if (group3[instr->regmodrm.reg].emul == NULL) {
return -1;
}
instr->emul = group3[instr->regmodrm.reg].emul;
} else if (opcode->group11) {
if (group11[instr->regmodrm.reg].emul == NULL) {
return -1;
}
instr->emul = group11[instr->regmodrm.reg].emul;
}
if (!opcode->immediate) {
reg = get_register_reg(instr, opcode);
if (reg == NULL) {
return -1;
}
strg->type = STORE_REG;
strg->u.reg = reg;
}
/* The displacement applies to RM. */
strm->disp.type = get_disp_type(instr);
if (has_sib(instr)) {
/* Overwrites RM */
fsm_advance(fsm, 1, node_sib);
return 0;
}
if (is_rip_relative(fsm, instr)) {
/* Overwrites RM */
strm->type = STORE_REG;
strm->u.reg = &gpr_map__rip;
strm->disp.type = DISP_4;
fsm_advance(fsm, 1, node_disp);
return 0;
}
if (is_disp32_only(fsm, instr)) {
/* Overwrites RM */
strm->type = STORE_REG;
strm->u.reg = NULL;
strm->disp.type = DISP_4;
fsm_advance(fsm, 1, node_disp);
return 0;
}
if (__predict_false(is_disp16_only(fsm, instr))) {
/* Overwrites RM */
strm->type = STORE_REG;
strm->u.reg = NULL;
strm->disp.type = DISP_2;
fsm_advance(fsm, 1, node_disp);
return 0;
}
if (__predict_false(is_dual(fsm, instr))) {
/* Overwrites RM */
fsm_advance(fsm, 0, node_dual);
return 0;
}
reg = get_register_rm(instr, opcode);
if (reg == NULL) {
return -1;
}
strm->type = STORE_REG;
strm->u.reg = reg;
if (strm->disp.type == DISP_NONE) {
/* Direct register addressing mode */
if (opcode->immediate) {
fsm_advance(fsm, 1, node_immediate);
} else {
fsm_advance(fsm, 1, NULL);
}
} else if (strm->disp.type == DISP_0) {
/* Indirect register addressing mode */
if (opcode->immediate) {
fsm_advance(fsm, 1, node_immediate);
} else {
fsm_advance(fsm, 1, NULL);
}
} else {
fsm_advance(fsm, 1, node_disp);
}
return 0;
}
static size_t
get_operand_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
{
const struct x86_opcode *opcode = instr->opcode;
int opsize;
/* Get the opsize */
if (!opcode->szoverride) {
opsize = opcode->defsize;
} else if (instr->rexpref.present && instr->rexpref.w) {
opsize = 8;
} else {
if (!fsm->is16bit) {
if (instr->legpref.opr_ovr) {
opsize = 2;
} else {
opsize = 4;
}
} else { /* 16bit */
if (instr->legpref.opr_ovr) {
opsize = 4;
} else {
opsize = 2;
}
}
}
return opsize;
}
static size_t
get_address_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
{
if (fsm->is64bit) {
if (__predict_false(instr->legpref.adr_ovr)) {
return 4;
}
return 8;
}
if (fsm->is32bit) {
if (__predict_false(instr->legpref.adr_ovr)) {
return 2;
}
return 4;
}
/* 16bit. */
if (__predict_false(instr->legpref.adr_ovr)) {
return 4;
}
return 2;
}
static int
node_primary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
{
const struct x86_opcode *opcode;
uint8_t byte;
if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
return -1;
}
opcode = &primary_opcode_table[byte];
if (__predict_false(!opcode->valid)) {
return -1;
}
instr->opcode = opcode;
instr->emul = opcode->emul;
instr->operand_size = get_operand_size(fsm, instr);
instr->address_size = get_address_size(fsm, instr);
if (fsm->is64bit && (instr->operand_size == 4)) {
/* Zero-extend to 64 bits. */
instr->zeroextend_mask = ~size_to_mask(4);
}
if (opcode->regmodrm) {
fsm_advance(fsm, 1, node_regmodrm);
} else if (opcode->dmo) {
/* Direct-Memory Offsets */
fsm_advance(fsm, 1, node_dmo);
} else if (opcode->stos || opcode->lods) {
fsm_advance(fsm, 1, node_stlo);
} else if (opcode->movs) {
fsm_advance(fsm, 1, node_movs);
} else if (opcode->cmps) {
fsm_advance(fsm, 1, node_cmps);
} else {
return -1;
}
return 0;
}
static int
node_secondary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
{
const struct x86_opcode *opcode;
uint8_t byte;
if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
return -1;
}
opcode = &secondary_opcode_table[byte];
if (__predict_false(!opcode->valid)) {
return -1;
}
instr->opcode = opcode;
instr->emul = opcode->emul;
instr->operand_size = get_operand_size(fsm, instr);
instr->address_size = get_address_size(fsm, instr);
if (fsm->is64bit && (instr->operand_size == 4)) {
/* Zero-extend to 64 bits. */
instr->zeroextend_mask = ~size_to_mask(4);
}
if (opcode->flags & FLAG_ze) {
/*
* Compute the mask for zero-extend. Update the operand size,
* we move fewer bytes.
*/
instr->zeroextend_mask |= size_to_mask(instr->operand_size);
instr->zeroextend_mask &= ~size_to_mask(opcode->defsize);
instr->operand_size = opcode->defsize;
}
if (opcode->regmodrm) {
fsm_advance(fsm, 1, node_regmodrm);
} else {
return -1;
}
return 0;
}
static int
node_main(struct x86_decode_fsm *fsm, struct x86_instr *instr)
{
uint8_t byte;
#define ESCAPE 0x0F
#define VEX_1 0xC5
#define VEX_2 0xC4
#define XOP 0x8F
if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
return -1;
}
/*
* We don't take XOP. It is AMD-specific, and it was removed shortly
* after being introduced.
*/
if (byte == ESCAPE) {
fsm_advance(fsm, 1, node_secondary_opcode);
} else if (!instr->rexpref.present) {
if (byte == VEX_1) {
return -1;
} else if (byte == VEX_2) {
return -1;
} else {
fsm->fn = node_primary_opcode;
}
} else {
fsm->fn = node_primary_opcode;
}
return 0;
}
static int
node_rex_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
{
struct x86_rexpref *rexpref = &instr->rexpref;
uint8_t byte;
size_t n = 0;
if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
return -1;
}
if (byte >= 0x40 && byte <= 0x4F) {
if (__predict_false(!fsm->is64bit)) {
return -1;
}
rexpref->b = ((byte & 0x1) != 0);
rexpref->x = ((byte & 0x2) != 0);
rexpref->r = ((byte & 0x4) != 0);
rexpref->w = ((byte & 0x8) != 0);
rexpref->present = true;
n = 1;
}
fsm_advance(fsm, n, node_main);
return 0;
}
static int
node_legacy_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
{
uint8_t byte;
if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
return -1;
}
if (byte == LEG_OPR_OVR) {
instr->legpref.opr_ovr = 1;
} else if (byte == LEG_OVR_DS) {
instr->legpref.seg = NVMM_X64_SEG_DS;
} else if (byte == LEG_OVR_ES) {
instr->legpref.seg = NVMM_X64_SEG_ES;
} else if (byte == LEG_REP) {
instr->legpref.rep = 1;
} else if (byte == LEG_OVR_GS) {
instr->legpref.seg = NVMM_X64_SEG_GS;
} else if (byte == LEG_OVR_FS) {
instr->legpref.seg = NVMM_X64_SEG_FS;
} else if (byte == LEG_ADR_OVR) {
instr->legpref.adr_ovr = 1;
} else if (byte == LEG_OVR_CS) {
instr->legpref.seg = NVMM_X64_SEG_CS;
} else if (byte == LEG_OVR_SS) {
instr->legpref.seg = NVMM_X64_SEG_SS;
} else if (byte == LEG_REPN) {
instr->legpref.repn = 1;
} else if (byte == LEG_LOCK) {
/* ignore */
} else {
/* not a legacy prefix */
fsm_advance(fsm, 0, node_rex_prefix);
return 0;
}
fsm_advance(fsm, 1, node_legacy_prefix);
return 0;
}
static int
x86_decode(uint8_t *inst_bytes, size_t inst_len, struct x86_instr *instr,
struct nvmm_x64_state *state)
{
struct x86_decode_fsm fsm;
int ret;
memset(instr, 0, sizeof(*instr));
instr->legpref.seg = -1;
instr->src.hardseg = -1;
instr->dst.hardseg = -1;
fsm.is64bit = is_64bit(state);
fsm.is32bit = is_32bit(state);
fsm.is16bit = is_16bit(state);
fsm.fn = node_legacy_prefix;
fsm.buf = inst_bytes;
fsm.end = inst_bytes + inst_len;
while (fsm.fn != NULL) {
ret = (*fsm.fn)(&fsm, instr);
if (ret == -1) {
#ifdef NVMM_DEBUG
printf("\n%s debug: unrecognized instruction found " \
"with max length %ld : [ ", __func__, inst_len);
for (uint i = 0; i < inst_len; i++)
printf("%02x ", inst_bytes[i]);
printf("]\n");
fflush(stdout);
#endif
return -1;
}
}
instr->len = fsm.buf - inst_bytes;
return 0;
}
/* -------------------------------------------------------------------------- */
#define EXEC_INSTR(sz, instr) \
static uint##sz##_t \
exec_##instr##sz(uint##sz##_t op1, uint##sz##_t op2, uint64_t *rflags) \
{ \
uint##sz##_t res; \
__asm __volatile ( \
#instr" %2, %3;" \
"mov %3, %1;" \
"pushfq;" \
"popq %0" \
: "=r" (*rflags), "=r" (res) \
: "r" (op1), "r" (op2)); \
return res; \
}
#define EXEC_DISPATCHER(instr) \
static uint64_t \
exec_##instr(uint64_t op1, uint64_t op2, uint64_t *rflags, size_t opsize) \
{ \
switch (opsize) { \
case 1: \
return exec_##instr##8(op1, op2, rflags); \
case 2: \
return exec_##instr##16(op1, op2, rflags); \
case 4: \
return exec_##instr##32(op1, op2, rflags); \
default: \
return exec_##instr##64(op1, op2, rflags); \
} \
}
/* SUB: ret = op1 - op2 */
#define PSL_SUB_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF|PSL_AF)
EXEC_INSTR(8, sub)
EXEC_INSTR(16, sub)
EXEC_INSTR(32, sub)
EXEC_INSTR(64, sub)
EXEC_DISPATCHER(sub)
/* OR: ret = op1 | op2 */
#define PSL_OR_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
EXEC_INSTR(8, or)
EXEC_INSTR(16, or)
EXEC_INSTR(32, or)
EXEC_INSTR(64, or)
EXEC_DISPATCHER(or)
/* AND: ret = op1 & op2 */
#define PSL_AND_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
EXEC_INSTR(8, and)
EXEC_INSTR(16, and)
EXEC_INSTR(32, and)
EXEC_INSTR(64, and)
EXEC_DISPATCHER(and)
/* XOR: ret = op1 ^ op2 */
#define PSL_XOR_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
EXEC_INSTR(8, xor)
EXEC_INSTR(16, xor)
EXEC_INSTR(32, xor)
EXEC_INSTR(64, xor)
EXEC_DISPATCHER(xor)
/* -------------------------------------------------------------------------- */
/*
* Emulation functions. We don't care about the order of the operands, except
* for SUB, CMP and TEST. For these ones we look at mem->write to determine who
* is op1 and who is op2.
*/
static void
x86_func_or(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
{
uint64_t *retval = (uint64_t *)mem->data;
const bool write = mem->write;
uint64_t *op1, op2, fl, ret;
op1 = (uint64_t *)mem->data;
op2 = 0;
/* Fetch the value to be OR'ed (op2). */
mem->data = (uint8_t *)&op2;
mem->write = false;
(*vcpu->cbs.mem)(mem);
/* Perform the OR. */
ret = exec_or(*op1, op2, &fl, mem->size);
if (write) {
/* Write back the result. */
mem->data = (uint8_t *)&ret;
mem->write = true;
(*vcpu->cbs.mem)(mem);
} else {
/* Return data to the caller. */
*retval = ret;
}
gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_OR_MASK;
gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_OR_MASK);
}
static void
x86_func_and(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
{
uint64_t *retval = (uint64_t *)mem->data;
const bool write = mem->write;
uint64_t *op1, op2, fl, ret;
op1 = (uint64_t *)mem->data;
op2 = 0;
/* Fetch the value to be AND'ed (op2). */
mem->data = (uint8_t *)&op2;
mem->write = false;
(*vcpu->cbs.mem)(mem);
/* Perform the AND. */
ret = exec_and(*op1, op2, &fl, mem->size);
if (write) {
/* Write back the result. */
mem->data = (uint8_t *)&ret;
mem->write = true;
(*vcpu->cbs.mem)(mem);
} else {
/* Return data to the caller. */
*retval = ret;
}
gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK;
gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK);
}
static void
x86_func_xchg(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
{
uint64_t *op1, op2;
op1 = (uint64_t *)mem->data;
op2 = 0;
/* Fetch op2. */
mem->data = (uint8_t *)&op2;
mem->write = false;
(*vcpu->cbs.mem)(mem);
/* Write op1 in op2. */
mem->data = (uint8_t *)op1;
mem->write = true;
(*vcpu->cbs.mem)(mem);
/* Write op2 in op1. */
*op1 = op2;
}
static void
x86_func_sub(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
{
uint64_t *retval = (uint64_t *)mem->data;
const bool write = mem->write;
uint64_t *op1, *op2, fl, ret;
uint64_t tmp;
bool memop1;
memop1 = !mem->write;
op1 = memop1 ? &tmp : (uint64_t *)mem->data;
op2 = memop1 ? (uint64_t *)mem->data : &tmp;
/* Fetch the value to be SUB'ed (op1 or op2). */
mem->data = (uint8_t *)&tmp;
mem->write = false;
(*vcpu->cbs.mem)(mem);
/* Perform the SUB. */
ret = exec_sub(*op1, *op2, &fl, mem->size);
if (write) {
/* Write back the result. */
mem->data = (uint8_t *)&ret;
mem->write = true;
(*vcpu->cbs.mem)(mem);
} else {
/* Return data to the caller. */
*retval = ret;
}
gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
}
static void
x86_func_xor(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
{
uint64_t *retval = (uint64_t *)mem->data;
const bool write = mem->write;
uint64_t *op1, op2, fl, ret;
op1 = (uint64_t *)mem->data;
op2 = 0;
/* Fetch the value to be XOR'ed (op2). */
mem->data = (uint8_t *)&op2;
mem->write = false;
(*vcpu->cbs.mem)(mem);
/* Perform the XOR. */
ret = exec_xor(*op1, op2, &fl, mem->size);
if (write) {
/* Write back the result. */
mem->data = (uint8_t *)&ret;
mem->write = true;
(*vcpu->cbs.mem)(mem);
} else {
/* Return data to the caller. */
*retval = ret;
}
gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_XOR_MASK;
gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_XOR_MASK);
}
static void
x86_func_cmp(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
{
uint64_t *op1, *op2, fl;
uint64_t tmp;
bool memop1;
memop1 = !mem->write;
op1 = memop1 ? &tmp : (uint64_t *)mem->data;
op2 = memop1 ? (uint64_t *)mem->data : &tmp;
/* Fetch the value to be CMP'ed (op1 or op2). */
mem->data = (uint8_t *)&tmp;
mem->write = false;
(*vcpu->cbs.mem)(mem);
/* Perform the CMP. */
exec_sub(*op1, *op2, &fl, mem->size);
gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
}
static void
x86_func_test(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
{
uint64_t *op1, *op2, fl;
uint64_t tmp;
bool memop1;
memop1 = !mem->write;
op1 = memop1 ? &tmp : (uint64_t *)mem->data;
op2 = memop1 ? (uint64_t *)mem->data : &tmp;
/* Fetch the value to be TEST'ed (op1 or op2). */
mem->data = (uint8_t *)&tmp;
mem->write = false;
(*vcpu->cbs.mem)(mem);
/* Perform the TEST. */
exec_and(*op1, *op2, &fl, mem->size);
gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK;
gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK);
}
static void
x86_func_mov(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
{
/*
* Nothing special, just move without emulation.
*/
(*vcpu->cbs.mem)(mem);
}
static void
x86_func_stos(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
{
/*
* Just move, and update RDI.
*/
(*vcpu->cbs.mem)(mem);
if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
gprs[NVMM_X64_GPR_RDI] -= mem->size;
} else {
gprs[NVMM_X64_GPR_RDI] += mem->size;
}
}
static void
x86_func_lods(struct nvmm_vcpu *vcpu, struct nvmm_mem *mem, uint64_t *gprs)
{
/*
* Just move, and update RSI.
*/
(*vcpu->cbs.mem)(mem);
if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
gprs[NVMM_X64_GPR_RSI] -= mem->size;
} else {
gprs[NVMM_X64_GPR_RSI] += mem->size;
}
}
/* -------------------------------------------------------------------------- */
static inline uint64_t
gpr_read_address(struct x86_instr *instr, struct nvmm_x64_state *state, int gpr)
{
uint64_t val;
val = state->gprs[gpr];
val &= size_to_mask(instr->address_size);
return val;
}
static int
store_to_gva(struct nvmm_x64_state *state, struct x86_instr *instr,
struct x86_store *store, gvaddr_t *gvap, size_t size)
{
struct x86_sib *sib;
gvaddr_t gva = 0;
uint64_t reg;
int ret, seg;
if (store->type == STORE_SIB) {
sib = &store->u.sib;
if (sib->bas != NULL)
gva += gpr_read_address(instr, state, sib->bas->num);
if (sib->idx != NULL) {
reg = gpr_read_address(instr, state, sib->idx->num);
gva += sib->scale * reg;
}
} else if (store->type == STORE_REG) {
if (store->u.reg == NULL) {
/* The base is null. Happens with disp32-only and
* disp16-only. */
} else {
gva = gpr_read_address(instr, state, store->u.reg->num);
}
} else if (store->type == STORE_DUALREG) {
gva = gpr_read_address(instr, state, store->u.dualreg.reg1) +
gpr_read_address(instr, state, store->u.dualreg.reg2);
} else {
gva = store->u.dmo;
}
if (store->disp.type != DISP_NONE) {
gva += store->disp.data;
}
if (store->hardseg != -1) {
seg = store->hardseg;
} else {
if (__predict_false(instr->legpref.seg != -1)) {
seg = instr->legpref.seg;
} else {
seg = NVMM_X64_SEG_DS;
}
}
if (__predict_true(is_long_mode(state))) {
if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
segment_apply(&state->segs[seg], &gva);
}
} else {
ret = segment_check(&state->segs[seg], gva, size);
if (ret == -1)
return -1;
segment_apply(&state->segs[seg], &gva);
}
*gvap = gva;
return 0;
}
static int
fetch_segment(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
{
struct nvmm_x64_state *state = vcpu->state;
uint8_t inst_bytes[5], byte;
size_t i, fetchsize;
gvaddr_t gva;
int ret, seg;
fetchsize = sizeof(inst_bytes);
gva = state->gprs[NVMM_X64_GPR_RIP];
if (__predict_false(!is_long_mode(state))) {
ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
fetchsize);
if (ret == -1)
return -1;
segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
}
ret = read_guest_memory(mach, vcpu, gva, inst_bytes, fetchsize);
if (ret == -1)
return -1;
seg = NVMM_X64_SEG_DS;
for (i = 0; i < fetchsize; i++) {
byte = inst_bytes[i];
if (byte == LEG_OVR_DS) {
seg = NVMM_X64_SEG_DS;
} else if (byte == LEG_OVR_ES) {
seg = NVMM_X64_SEG_ES;
} else if (byte == LEG_OVR_GS) {
seg = NVMM_X64_SEG_GS;
} else if (byte == LEG_OVR_FS) {
seg = NVMM_X64_SEG_FS;
} else if (byte == LEG_OVR_CS) {
seg = NVMM_X64_SEG_CS;
} else if (byte == LEG_OVR_SS) {
seg = NVMM_X64_SEG_SS;
} else if (byte == LEG_OPR_OVR) {
/* nothing */
} else if (byte == LEG_ADR_OVR) {
/* nothing */
} else if (byte == LEG_REP) {
/* nothing */
} else if (byte == LEG_REPN) {
/* nothing */
} else if (byte == LEG_LOCK) {
/* nothing */
} else {
return seg;
}
}
return seg;
}
static int
fetch_instruction(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
struct nvmm_vcpu_exit *exit)
{
struct nvmm_x64_state *state = vcpu->state;
size_t fetchsize;
gvaddr_t gva;
int ret;
fetchsize = sizeof(exit->u.mem.inst_bytes);
gva = state->gprs[NVMM_X64_GPR_RIP];
if (__predict_false(!is_long_mode(state))) {
ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
fetchsize);
if (ret == -1)
return -1;
segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
}
ret = read_guest_memory(mach, vcpu, gva, exit->u.mem.inst_bytes,
fetchsize);
if (ret == -1)
return -1;
exit->u.mem.inst_len = fetchsize;
return 0;
}
static int
assist_mem_movs(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
struct x86_instr *instr)
{
struct nvmm_x64_state *state = vcpu->state;
uint64_t *gprs;
uint8_t data[8];
gvaddr_t gva;
size_t size;
int ret;
size = instr->operand_size;
gprs = state->gprs;
/* Source. */
ret = store_to_gva(state, instr, &instr->src, &gva, size);
if (ret == -1)
return -1;
ret = read_guest_memory(mach, vcpu, gva, data, size);
if (ret == -1)
return -1;
/* Destination. */
ret = store_to_gva(state, instr, &instr->dst, &gva, size);
if (ret == -1)
return -1;
ret = write_guest_memory(mach, vcpu, gva, data, size);
if (ret == -1)
return -1;
/*
* Inlined x86_func_movs() call
* (*instr->emul->func)(vcpu, &mem, state->gprs);
*/
if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
gprs[NVMM_X64_GPR_RSI] -= size;
gprs[NVMM_X64_GPR_RDI] -= size;
} else {
gprs[NVMM_X64_GPR_RSI] += size;
gprs[NVMM_X64_GPR_RDI] += size;
}
return 0;
}
static int
assist_mem_cmps(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
struct x86_instr *instr)
{
struct nvmm_x64_state *state = vcpu->state;
uint64_t *gprs, op1, op2, fl;
uint8_t data1[8], data2[8];
gvaddr_t gva;
size_t size;
int ret;
size = instr->operand_size;
gprs = state->gprs;
/* Source 1. */
ret = store_to_gva(state, instr, &instr->src, &gva, size);
if (ret == -1)
return -1;
ret = read_guest_memory(mach, vcpu, gva, data1, size);
if (ret == -1)
return -1;
/* Source 2. */
ret = store_to_gva(state, instr, &instr->dst, &gva, size);
if (ret == -1)
return -1;
ret = read_guest_memory(mach, vcpu, gva, data2, size);
if (ret == -1)
return -1;
/*
* Inlined x86_func_cmps() call
* (*instr->emul->func)(vcpu, &mem, state->gprs);
*/
/* Perform the CMP. */
op1 = *((uint64_t *) data1);
op2 = *((uint64_t *) data2);
exec_sub(op1, op2, &fl, size);
gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
gprs[NVMM_X64_GPR_RSI] -= size;
gprs[NVMM_X64_GPR_RDI] -= size;
} else {
gprs[NVMM_X64_GPR_RSI] += size;
gprs[NVMM_X64_GPR_RDI] += size;
}
return 0;
}
static int
assist_mem_single(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
struct x86_instr *instr)
{
struct nvmm_x64_state *state = vcpu->state;
struct nvmm_vcpu_exit *exit = vcpu->exit;
struct nvmm_mem mem;
uint8_t membuf[8];
uint64_t val;
memset(membuf, 0, sizeof(membuf));
mem.mach = mach;
mem.vcpu = vcpu;
mem.gpa = exit->u.mem.gpa;
mem.size = instr->operand_size;
mem.data = membuf;
/* Determine the direction. */
switch (instr->src.type) {
case STORE_REG:
if (instr->src.disp.type != DISP_NONE) {
/* Indirect access. */
mem.write = false;
} else {
/* Direct access. */
mem.write = true;
}
break;
case STORE_DUALREG:
if (instr->src.disp.type == DISP_NONE) {
DISASSEMBLER_BUG();
}
mem.write = false;
break;
case STORE_IMM:
mem.write = true;
break;
case STORE_SIB:
mem.write = false;
break;
case STORE_DMO:
mem.write = false;
break;
default:
DISASSEMBLER_BUG();
}
if (mem.write) {
switch (instr->src.type) {
case STORE_REG:
/* The instruction was "reg -> mem". Fetch the register
* in membuf. */
if (__predict_false(instr->src.disp.type != DISP_NONE)) {
DISASSEMBLER_BUG();
}
val = state->gprs[instr->src.u.reg->num];
val = __SHIFTOUT(val, instr->src.u.reg->mask);
memcpy(mem.data, &val, mem.size);
break;
case STORE_IMM:
/* The instruction was "imm -> mem". Fetch the immediate
* in membuf. */
memcpy(mem.data, &instr->src.u.imm.data, mem.size);
break;
default:
DISASSEMBLER_BUG();
}
} else if (instr->emul->readreg) {
/* The instruction was "mem -> reg", but the value of the
* register matters for the emul func. Fetch it in membuf. */
if (__predict_false(instr->dst.type != STORE_REG)) {
DISASSEMBLER_BUG();
}
if (__predict_false(instr->dst.disp.type != DISP_NONE)) {
DISASSEMBLER_BUG();
}
val = state->gprs[instr->dst.u.reg->num];
val = __SHIFTOUT(val, instr->dst.u.reg->mask);
memcpy(mem.data, &val, mem.size);
}
(*instr->emul->func)(vcpu, &mem, state->gprs);
if (instr->emul->notouch) {
/* We're done. */
return 0;
}
if (!mem.write) {
/* The instruction was "mem -> reg". The emul func has filled
* membuf with the memory content. Install membuf in the
* register. */
if (__predict_false(instr->dst.type != STORE_REG)) {
DISASSEMBLER_BUG();
}
if (__predict_false(instr->dst.disp.type != DISP_NONE)) {
DISASSEMBLER_BUG();
}
memcpy(&val, membuf, sizeof(uint64_t));
val = __SHIFTIN(val, instr->dst.u.reg->mask);
state->gprs[instr->dst.u.reg->num] &= ~instr->dst.u.reg->mask;
state->gprs[instr->dst.u.reg->num] |= val;
state->gprs[instr->dst.u.reg->num] &= ~instr->zeroextend_mask;
} else if (instr->emul->backprop) {
/* The instruction was "reg -> mem", but the memory must be
* back-propagated to the register. Install membuf in the
* register. */
if (__predict_false(instr->src.type != STORE_REG)) {
DISASSEMBLER_BUG();
}
if (__predict_false(instr->src.disp.type != DISP_NONE)) {
DISASSEMBLER_BUG();
}
memcpy(&val, membuf, sizeof(uint64_t));
val = __SHIFTIN(val, instr->src.u.reg->mask);
state->gprs[instr->src.u.reg->num] &= ~instr->src.u.reg->mask;
state->gprs[instr->src.u.reg->num] |= val;
state->gprs[instr->src.u.reg->num] &= ~instr->zeroextend_mask;
}
return 0;
}
int
nvmm_assist_mem(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
{
struct nvmm_x64_state *state = vcpu->state;
struct nvmm_vcpu_exit *exit = vcpu->exit;
struct x86_instr instr;
uint64_t cnt = 0; /* GCC */
int ret;
if (__predict_false(exit->reason != NVMM_VCPU_EXIT_MEMORY)) {
errno = EINVAL;
return -1;
}
ret = nvmm_vcpu_getstate(mach, vcpu,
NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
if (ret == -1)
return -1;
if (exit->u.mem.inst_len == 0) {
/*
* The instruction was not fetched from the kernel. Fetch
* it ourselves.
*/
ret = fetch_instruction(mach, vcpu, exit);
if (ret == -1)
return -1;
}
ret = x86_decode(exit->u.mem.inst_bytes, exit->u.mem.inst_len,
&instr, state);
if (ret == -1) {
errno = ENODEV;
return -1;
}
if (instr.legpref.rep || instr.legpref.repn) {
cnt = rep_get_cnt(state, instr.address_size);
if (__predict_false(cnt == 0)) {
state->gprs[NVMM_X64_GPR_RIP] += instr.len;
goto out;
}
}
if (instr.opcode->movs) {
ret = assist_mem_movs(mach, vcpu, &instr);
} else if (instr.opcode->cmps) {
instr.legpref.repe = !instr.legpref.repn;
ret = assist_mem_cmps(mach, vcpu, &instr);
} else {
ret = assist_mem_single(mach, vcpu, &instr);
}
if (ret == -1) {
errno = ENODEV;
return -1;
}
if (instr.legpref.rep || instr.legpref.repn) {
cnt -= 1;
rep_set_cnt(state, instr.address_size, cnt);
if (cnt == 0) {
state->gprs[NVMM_X64_GPR_RIP] += instr.len;
} else if (__predict_false(instr.legpref.repn)) {
/* repn */
if (state->gprs[NVMM_X64_GPR_RFLAGS] & PSL_Z) {
state->gprs[NVMM_X64_GPR_RIP] += instr.len;
}
} else if (__predict_false(instr.legpref.repe)) {
/* repe */
if ((state->gprs[NVMM_X64_GPR_RFLAGS] & PSL_Z) == 0) {
state->gprs[NVMM_X64_GPR_RIP] += instr.len;
}
}
} else {
state->gprs[NVMM_X64_GPR_RIP] += instr.len;
}
out:
ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
if (ret == -1)
return -1;
return 0;
}