Several improvements and fixes:

* Change the Assist API. Rather than passing callbacks in each call, the callbacks are now registered beforehand. Then change the I/O Assist to fetch MMIO data via the Mem callback. This allows a guest to perform an I/O string operation on a memory that is itself an MMIO. * Introduce two new functions internal to libnvmm, read_guest_memory and write_guest_memory. They can handle mapped memory, MMIO memory and cross-page transactions. * Allow nvmm_gva_to_gpa and nvmm_gpa_to_hva to take non-page-aligned addresses. This simplifies a lot of things. * Support the MOVS instruction, and add a test for it. This instruction is special, in that it takes two implicit memory operands. In particular, it means that the two buffers can both be in MMIO memory, and we handle this case. * Fix gross copy-pasto in nvmm_hva_unmap. Also fix a few things here and there.
2018-12-27 07:22:31 +00:00 · 2018-12-27 07:22:31 +00:00 · 38b2a665bf
parent 450723dc75
commit 38b2a665bf
6 changed files with 552 additions and 304 deletions
--- a/lib/libnvmm/libnvmm.3
+++ b/lib/libnvmm/libnvmm.3
@ -1,4 +1,4 @@
-.\"	$NetBSD: libnvmm.3,v 1.5 2018/12/15 13:39:43 maxv Exp $
+.\"	$NetBSD: libnvmm.3,v 1.6 2018/12/27 07:22:31 maxv Exp $
 .\"
 .\" Copyright (c) 2018 The NetBSD Foundation, Inc.
 .\" All rights reserved.
@ -27,7 +27,7 @@
 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGE.
 .\"
-.Dd December 14, 2018
+.Dd December 26, 2018
 .Dt LIBNVMM 3
 .Os
 .Sh NAME
@ -78,12 +78,14 @@
 .Ft int
 .Fn nvmm_gpa_to_hva "struct nvmm_machine *mach" "gpaddr_t gpa" \
    "uintptr_t *hva"
+.Ft void
+.Fn nvmm_callbacks_register "const struct nvmm_callbacks *cbs"
 .Ft int
 .Fn nvmm_assist_io "struct nvmm_machine *mach" "nvmm_cpuid_t cpuid" \
-    "struct nvmm_exit *exit" "void (*cb)(struct nvmm_io *)"
+    "struct nvmm_exit *exit"
 .Ft int
 .Fn nvmm_assist_mem "struct nvmm_machine *mach" "nvmm_cpuid_t cpuid" \
-    "struct nvmm_exit *exit" "void (*cb)(struct nvmm_mem *)"
+    "struct nvmm_exit *exit"
 .Sh DESCRIPTION
 .Nm
 provides a library for VMM software to handle hardware-accelerated virtual
@ -228,6 +230,11 @@ into a host virtual address returned in
 .Fa gpa
 must be page-aligned.
 .Pp
+.Fn nvmm_callbacks_register
+registers in
+.Nm
+the callbacks descriptor passed as argument.
+.Pp
 .Fn nvmm_assist_io
 emulates the I/O operation described in
 .Fa exit
@ -397,8 +404,8 @@ it is necessary for VMM software to emulate the associated I/O operation.
 provides an easy way for VMM software to perform that.
 .Pp
 .Fn nvmm_assist_io
-will call the
-.Fa cb
+will call the registered
+.Fa io
 callback function and give it a
 .Cd nvmm_io
 structure as argument.
@ -444,8 +451,8 @@ provides an easy way for VMM software to perform that, similar to the I/O
 Assist.
 .Pp
 .Fn nvmm_assist_mem
-will call the
-.Fa cb
+will call the registered
+.Fa mem
 callback function and give it a
 .Cd nvmm_mem
 structure as argument.
--- a/lib/libnvmm/libnvmm.c
+++ b/lib/libnvmm/libnvmm.c
@ -1,4 +1,4 @@
-/*	$NetBSD: libnvmm.c,v 1.5 2018/12/15 13:39:43 maxv Exp $	*/
+/*	$NetBSD: libnvmm.c,v 1.6 2018/12/27 07:22:31 maxv Exp $	*/

 /*
 * Copyright (c) 2018 The NetBSD Foundation, Inc.
@ -43,6 +43,8 @@

 #include "nvmm.h"

+struct nvmm_callbacks __callbacks;
+
 typedef struct __area {
 	LIST_ENTRY(__area) list;
 	gpaddr_t gpa;
@ -53,7 +55,6 @@ typedef struct __area {
 typedef LIST_HEAD(, __area) area_list_t;

 static int nvmm_fd = -1;
-static size_t nvmm_page_size = 0;

 /* -------------------------------------------------------------------------- */

@ -146,7 +147,6 @@ nvmm_init(void)
 	nvmm_fd = open("/dev/nvmm", O_RDWR);
 	if (nvmm_fd == -1)
 		return -1;
-	nvmm_page_size = sysconf(_SC_PAGESIZE);
 	return 0;
 }

@ -454,7 +454,7 @@ nvmm_hva_map(struct nvmm_machine *mach, uintptr_t hva, size_t size)
 int
 nvmm_hva_unmap(struct nvmm_machine *mach, uintptr_t hva, size_t size)
 {
-	struct nvmm_ioc_hva_map args;
+	struct nvmm_ioc_hva_unmap args;
 	int ret;

 	if (nvmm_init() == -1) {
@ -465,7 +465,7 @@ nvmm_hva_unmap(struct nvmm_machine *mach, uintptr_t hva, size_t size)
 	args.hva = hva;
 	args.size = size;

-	ret = ioctl(nvmm_fd, NVMM_IOC_HVA_MAP, &args);
+	ret = ioctl(nvmm_fd, NVMM_IOC_HVA_UNMAP, &args);
 	if (ret == -1)
 		return -1;

@ -482,11 +482,6 @@ nvmm_gpa_to_hva(struct nvmm_machine *mach, gpaddr_t gpa, uintptr_t *hva)
 	area_list_t *areas = mach->areas;
 	area_t *ent;

-	if (gpa % nvmm_page_size != 0) {
-		errno = EINVAL;
-		return -1;
-	}
-
 	LIST_FOREACH(ent, areas, list) {
 		if (gpa >= ent->gpa && gpa < ent->gpa + ent->size) {
 			*hva = ent->hva + (gpa - ent->gpa);
@ -501,3 +496,13 @@ nvmm_gpa_to_hva(struct nvmm_machine *mach, gpaddr_t gpa, uintptr_t *hva)
 /*
 * nvmm_assist_io(): architecture-specific.
 */
+
+/*
+ * nvmm_assist_mem(): architecture-specific.
+ */
+
+void
+nvmm_callbacks_register(const struct nvmm_callbacks *cbs)
+{
+	memcpy(&__callbacks, cbs, sizeof(__callbacks));
+}
--- a/lib/libnvmm/libnvmm_x86.c
+++ b/lib/libnvmm/libnvmm_x86.c
@ -1,4 +1,4 @@
-/*	$NetBSD: libnvmm_x86.c,v 1.5 2018/12/15 13:09:02 maxv Exp $	*/
+/*	$NetBSD: libnvmm_x86.c,v 1.6 2018/12/27 07:22:31 maxv Exp $	*/

 /*
 * Copyright (c) 2018 The NetBSD Foundation, Inc.
@ -47,6 +47,46 @@

 #include <x86/specialreg.h>

+extern struct nvmm_callbacks __callbacks;
+
+/* -------------------------------------------------------------------------- */
+
+/*
+ * Undocumented debugging function. Helpful.
+ */
+int
+nvmm_vcpu_dump(struct nvmm_machine *mach, nvmm_cpuid_t cpuid)
+{
+	struct nvmm_x64_state state;
+	size_t i;
+	int ret;
+
+	const char *segnames[] = {
+		"CS", "DS", "ES", "FS", "GS", "SS", "GDT", "IDT", "LDT", "TR"
+	};
+
+	ret = nvmm_vcpu_getstate(mach, cpuid, &state, NVMM_X64_STATE_ALL);
+	if (ret == -1)
+		return -1;
+
+	printf("+ VCPU id=%d\n", (int)cpuid);
+	printf("| -> RIP=%p\n", (void *)state.gprs[NVMM_X64_GPR_RIP]);
+	printf("| -> RSP=%p\n", (void *)state.gprs[NVMM_X64_GPR_RSP]);
+	printf("| -> RAX=%p\n", (void *)state.gprs[NVMM_X64_GPR_RAX]);
+	printf("| -> RBX=%p\n", (void *)state.gprs[NVMM_X64_GPR_RBX]);
+	printf("| -> RCX=%p\n", (void *)state.gprs[NVMM_X64_GPR_RCX]);
+	for (i = 0; i < NVMM_X64_NSEG; i++) {
+		printf("| -> %s: sel=0x%lx base=%p, limit=%p, P=%d\n",
+		    segnames[i],
+		    state.segs[i].selector,
+		    (void *)state.segs[i].base,
+		    (void *)state.segs[i].limit,
+		    state.segs[i].attrib.p);
+	}
+
+	return 0;
+}
+
 /* -------------------------------------------------------------------------- */

 #define PTE32_L1_SHIFT	12
@ -330,6 +370,7 @@ x86_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_x64_state *state,
 {
 	bool is_pae, is_lng, has_pse;
 	uint64_t cr3;
+	size_t off;
 	int ret;

 	if ((state->crs[NVMM_X64_CR_CR0] & CR0_PG) == 0) {
@ -339,6 +380,9 @@ x86_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_x64_state *state,
 		return 0;
 	}

+	off = (gva & PAGE_MASK);
+	gva &= ~PAGE_MASK;
+
 	is_pae = (state->crs[NVMM_X64_CR_CR4] & CR4_PAE) != 0;
 	is_lng = (state->msrs[NVMM_X64_MSR_EFER] & EFER_LME) != 0;
 	has_pse = (state->crs[NVMM_X64_CR_CR4] & CR4_PSE) != 0;
@ -362,6 +406,8 @@ x86_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_x64_state *state,
 		errno = EFAULT;
 	}

+	*gpa = *gpa + off;
+
 	return ret;
 }

@ -372,11 +418,6 @@ nvmm_gva_to_gpa(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
 	struct nvmm_x64_state state;
 	int ret;

-	if (gva & PAGE_MASK) {
-		errno = EINVAL;
-		return -1;
-	}
-
 	ret = nvmm_vcpu_getstate(mach, cpuid, &state,
 	    NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
 	if (ret == -1)
@ -413,12 +454,6 @@ is_long_mode(struct nvmm_x64_state *state)
 	return (state->msrs[NVMM_X64_MSR_EFER] & EFER_LME) != 0;
 }

-static inline bool
-is_illegal(struct nvmm_io *io, nvmm_prot_t prot)
-{
-	return (io->in && !(prot & NVMM_PROT_WRITE));
-}
-
 static int
 segment_apply(struct nvmm_x64_state_seg *seg, gvaddr_t *gva, size_t size)
 {
@ -449,21 +484,150 @@ error:
 	return -1;
 }

+static uint64_t
+mask_from_adsize(size_t adsize)
+{
+	switch (adsize) {
+	case 8:
+		return 0xFFFFFFFFFFFFFFFF;
+	case 4:
+		return 0x00000000FFFFFFFF;
+	case 2:
+	default: /* impossible */
+		return 0x000000000000FFFF;
+	}
+}
+
+static uint64_t
+rep_dec_apply(struct nvmm_x64_state *state, size_t adsize)
+{
+	uint64_t mask, cnt;
+
+	mask = mask_from_adsize(adsize);
+
+	cnt = state->gprs[NVMM_X64_GPR_RCX] & mask; 
+	cnt -= 1;
+	cnt &= mask;
+
+	state->gprs[NVMM_X64_GPR_RCX] &= ~mask;
+	state->gprs[NVMM_X64_GPR_RCX] |= cnt;
+
+	return cnt;
+}
+
+static int
+read_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
+    gvaddr_t gva, uint8_t *data, size_t size)
+{
+	struct nvmm_mem mem;
+	nvmm_prot_t prot;
+	gpaddr_t gpa;
+	uintptr_t hva;
+	bool is_mmio;
+	int ret, remain;
+
+	ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
+	if (__predict_false(ret == -1)) {
+		return -1;
+	}
+	if (__predict_false(!(prot & NVMM_PROT_READ))) {
+		errno = EFAULT;
+		return -1;
+	}
+
+	if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
+		remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
+	} else {
+		remain = 0;
+	}
+	size -= remain;
+
+	ret = nvmm_gpa_to_hva(mach, gpa, &hva);
+	is_mmio = (ret == -1);
+
+	if (is_mmio) {
+		mem.gva = gva;
+		mem.gpa = gpa;
+		mem.write = false;
+		mem.size = size;
+		(*__callbacks.mem)(&mem);
+		memcpy(data, mem.data, size);
+	} else {
+		memcpy(data, (uint8_t *)hva, size);
+	}
+
+	if (remain > 0) {
+		ret = read_guest_memory(mach, state, gva + size,
+		    data + size, remain);
+	} else {
+		ret = 0;
+	}
+
+	return ret;
+}
+
+static int
+write_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
+    gvaddr_t gva, uint8_t *data, size_t size)
+{
+	struct nvmm_mem mem;
+	nvmm_prot_t prot;
+	gpaddr_t gpa;
+	uintptr_t hva;
+	bool is_mmio;
+	int ret, remain;
+
+	ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
+	if (__predict_false(ret == -1)) {
+		return -1;
+	}
+	if (__predict_false(!(prot & NVMM_PROT_WRITE))) {
+		errno = EFAULT;
+		return -1;
+	}
+
+	if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
+		remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
+	} else {
+		remain = 0;
+	}
+	size -= remain;
+
+	ret = nvmm_gpa_to_hva(mach, gpa, &hva);
+	is_mmio = (ret == -1);
+
+	if (is_mmio) {
+		mem.gva = gva;
+		mem.gpa = gpa;
+		mem.write = true;
+		memcpy(mem.data, data, size);
+		mem.size = size;
+		(*__callbacks.mem)(&mem);
+	} else {
+		memcpy((uint8_t *)hva, data, size);
+	}
+
+	if (remain > 0) {
+		ret = write_guest_memory(mach, state, gva + size,
+		    data + size, remain);
+	} else {
+		ret = 0;
+	}
+
+	return ret;
+}
+
+/* -------------------------------------------------------------------------- */
+
 int
 nvmm_assist_io(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
-    struct nvmm_exit *exit, void (*cb)(struct nvmm_io *))
+    struct nvmm_exit *exit)
 {
 	struct nvmm_x64_state state;
 	struct nvmm_io io;
-	nvmm_prot_t prot;
-	size_t remain, done;
-	uintptr_t hva;
-	gvaddr_t gva, off;
-	gpaddr_t gpa;
-	uint8_t tmp[8];
-	uint8_t *ptr, *ptr2;
+	uint64_t cnt;
+	gvaddr_t gva;
 	int reg = 0; /* GCC */
-	bool cross;
 	int ret;

 	if (__predict_false(exit->reason != NVMM_EXIT_IO)) {
@ -481,29 +645,18 @@ nvmm_assist_io(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
 	if (ret == -1)
 		return -1;

-	cross = false;
-
-	if (!exit->u.io.str) {
-		ptr = (uint8_t *)&state.gprs[NVMM_X64_GPR_RAX];
-	} else {
+	/*
+	 * Determine GVA.
+	 */
+	if (exit->u.io.str) {
 		if (io.in) {
 			reg = NVMM_X64_GPR_RDI;
 		} else {
 			reg = NVMM_X64_GPR_RSI;
 		}

-		switch (exit->u.io.address_size) {
-		case 8:
-			gva = state.gprs[reg];
-			break;
-		case 4:
-			gva = (state.gprs[reg] & 0x00000000FFFFFFFF);
-			break;
-		case 2:
-		default: /* impossible */
-			gva = (state.gprs[reg] & 0x000000000000FFFF);
-			break;
-		}
+		gva = state.gprs[reg];
+		gva &= mask_from_adsize(exit->u.io.address_size);

 		if (!is_long_mode(&state)) {
 			ret = segment_apply(&state.segs[exit->u.io.seg], &gva,
@ -511,70 +664,30 @@ nvmm_assist_io(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
 			if (ret == -1)
 				return -1;
 		}
-
-		off = (gva & PAGE_MASK);
-		gva &= ~PAGE_MASK;
-
-		ret = x86_gva_to_gpa(mach, &state, gva, &gpa, &prot);
-		if (ret == -1)
-			return -1;
-		if (__predict_false(is_illegal(&io, prot))) {
-			errno = EFAULT;
-			return -1;
-		}
-		ret = nvmm_gpa_to_hva(mach, gpa, &hva);
-		if (ret == -1)
-			return -1;
-
-		ptr = (uint8_t *)hva + off;
-
-		/*
-		 * Special case. If the buffer is in between two pages, we
-		 * need to retrieve data from the next page.
-		 */
-		if (__predict_false(off + io.size > PAGE_SIZE)) {
-			cross = true;
-			remain = off + io.size - PAGE_SIZE;
-			done = PAGE_SIZE - off;
-
-			memcpy(tmp, ptr, done);
-
-			ret = x86_gva_to_gpa(mach, &state, gva + PAGE_SIZE,
-			    &gpa, &prot);
-			if (ret == -1)
-				return -1;
-			if (__predict_false(is_illegal(&io, prot))) {
-				errno = EFAULT;
-				return -1;
-			}
-			ret = nvmm_gpa_to_hva(mach, gpa, &hva);
-			if (ret == -1)
-				return -1;
-
-			memcpy(&tmp[done], (uint8_t *)hva, remain);
-			ptr2 = &tmp[done];
-		}
 	}

-	if (io.in) {
-		/* nothing to do */
-	} else {
-		memcpy(io.data, ptr, io.size);
-	}
-
-	(*cb)(&io);
-
-	if (io.in) {
-		if (!exit->u.io.str)
-			state.gprs[NVMM_X64_GPR_RAX] = 0;
-		if (__predict_false(cross)) {
-			memcpy(ptr, io.data, done);
-			memcpy(ptr2, &io.data[done], remain);
+	if (!io.in) {
+		if (!exit->u.io.str) {
+			memcpy(io.data, &state.gprs[NVMM_X64_GPR_RAX], io.size);
 		} else {
-			memcpy(ptr, io.data, io.size);
+			ret = read_guest_memory(mach, &state, gva, io.data,
+			    io.size);
+			if (ret == -1)
+				return -1;
+		}
+	}
+
+	(*__callbacks.io)(&io);
+
+	if (io.in) {
+		if (!exit->u.io.str) {
+			memcpy(&state.gprs[NVMM_X64_GPR_RAX], io.data, io.size);
+		} else {
+			ret = write_guest_memory(mach, &state, gva, io.data,
+			    io.size);
+			if (ret == -1)
+				return -1;
 		}
-	} else {
-		/* nothing to do */
 	}

 	if (exit->u.io.str) {
@ -586,8 +699,8 @@ nvmm_assist_io(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
 	}

 	if (exit->u.io.rep) {
-		state.gprs[NVMM_X64_GPR_RCX] -= 1;
-		if (state.gprs[NVMM_X64_GPR_RCX] == 0) {
+		cnt = rep_dec_apply(&state, exit->u.io.address_size);
+		if (cnt == 0) {
 			state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
 		}
 	} else {
@ -609,6 +722,7 @@ static void x86_emul_xor(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_
 static void x86_emul_mov(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
 static void x86_emul_stos(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
 static void x86_emul_lods(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
+static void x86_emul_movs(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);

 enum x86_legpref {
 	/* Group 1 */
@ -721,6 +835,7 @@ struct x86_store {
 		uint64_t dmo;
 	} u;
 	struct x86_disp disp;
+	int hardseg;
 };

 struct x86_instr {
@ -760,6 +875,7 @@ struct x86_opcode {
 	bool regtorm;
 	bool dmo;
 	bool todmo;
+	bool movs;
 	bool stos;
 	bool lods;
 	bool szoverride;
@ -1033,6 +1149,28 @@ static const struct x86_opcode primary_opcode_table[] = {
 		.emul = x86_emul_mov
 	},

+	/*
+	 * MOVS
+	 */
+	{
+		/* Yb, Xb */
+		.byte = 0xA4,
+		.movs = true,
+		.szoverride = false,
+		.defsize = OPSIZE_BYTE,
+		.allsize = -1,
+		.emul = x86_emul_movs
+	},
+	{
+		/* Yv, Xv */
+		.byte = 0xA5,
+		.movs = true,
+		.szoverride = true,
+		.defsize = -1,
+		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
+		.emul = x86_emul_movs
+	},
+
 	/*
 	 * STOS
 	 */
@ -1380,6 +1518,33 @@ resolve_special_register(struct x86_instr *instr, uint8_t enc, size_t regsize)
 	return &gpr_map__special[instr->rexpref.present][enc][regsize-1];
 }

+/*
+ * Special node, for MOVS. Fake two displacements of zero on the source and
+ * destination registers.
+ */
+static int
+node_movs(struct x86_decode_fsm *fsm, struct x86_instr *instr)
+{
+	size_t adrsize;
+
+	adrsize = instr->address_size;
+
+	/* DS:RSI */
+	instr->src.type = STORE_REG;
+	instr->src.u.reg = &gpr_map__special[1][2][adrsize-1];
+	instr->src.disp.type = DISP_0;
+
+	/* ES:RDI, force ES */
+	instr->dst.type = STORE_REG;
+	instr->dst.u.reg = &gpr_map__special[1][3][adrsize-1];
+	instr->dst.disp.type = DISP_0;
+	instr->dst.hardseg = NVMM_X64_SEG_ES;
+
+	fsm_advance(fsm, 0, NULL);
+
+	return 0;
+}
+
 /*
 * Special node, for STOS and LODS. Fake a displacement of zero on the
 * destination register.
@ -1409,7 +1574,7 @@ node_stlo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
 	if (opcode->stos) {
 		/* ES:RDI, force ES */
 		stlo->u.reg = &gpr_map__special[1][3][adrsize-1];
-		instr->legpref[LEG_OVR_ES] = true;
+		stlo->hardseg = NVMM_X64_SEG_ES;
 	} else {
 		/* DS:RSI */
 		stlo->u.reg = &gpr_map__special[1][2][adrsize-1];
@ -1855,6 +2020,8 @@ node_primary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
 		fsm_advance(fsm, 1, node_dmo);
 	} else if (opcode->stos || opcode->lods) {
 		fsm_advance(fsm, 1, node_stlo);
+	} else if (opcode->movs) {
+		fsm_advance(fsm, 1, node_movs);
 	} else {
 		return -1;
 	}
@ -2167,6 +2334,24 @@ x86_emul_lods(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
 	}
 }

+static void
+x86_emul_movs(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
+    uint64_t *gprs)
+{
+	/*
+	 * Special instruction: double memory operand. Don't call the cb,
+	 * because the storage has already been performed earlier.
+	 */
+
+	if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
+		gprs[NVMM_X64_GPR_RSI] -= mem->size;
+		gprs[NVMM_X64_GPR_RDI] -= mem->size;
+	} else {
+		gprs[NVMM_X64_GPR_RSI] += mem->size;
+		gprs[NVMM_X64_GPR_RDI] += mem->size;
+	}
+}
+
 /* -------------------------------------------------------------------------- */

 static inline uint64_t
@ -2185,18 +2370,15 @@ gpr_read_address(struct x86_instr *instr, struct nvmm_x64_state *state, int gpr)
 }

 static int
-store_to_mem(struct nvmm_machine *mach, struct nvmm_x64_state *state,
-    struct x86_instr *instr, struct x86_store *store, struct nvmm_mem *mem)
+store_to_gva(struct nvmm_x64_state *state, struct x86_instr *instr,
+    struct x86_store *store, gvaddr_t *gvap, size_t size)
 {
 	struct x86_sib *sib;
-	nvmm_prot_t prot;
-	gvaddr_t gva, off;
+	gvaddr_t gva = 0;
 	uint64_t reg;
 	int ret, seg;
 	uint32_t *p;

-	gva = 0;
-
 	if (store->type == STORE_SIB) {
 		sib = &store->u.sib;
 		if (sib->bas != NULL)
@ -2216,44 +2398,55 @@ store_to_mem(struct nvmm_machine *mach, struct nvmm_x64_state *state,
 		gva += *p;
 	}

-	mem->gva = gva;
-
 	if (!is_long_mode(state)) {
-		if (instr->legpref[LEG_OVR_CS]) {
-			seg = NVMM_X64_SEG_CS;
-		} else if (instr->legpref[LEG_OVR_SS]) {
-			seg = NVMM_X64_SEG_SS;
-		} else if (instr->legpref[LEG_OVR_ES]) {
-			seg = NVMM_X64_SEG_ES;
-		} else if (instr->legpref[LEG_OVR_FS]) {
-			seg = NVMM_X64_SEG_FS;
-		} else if (instr->legpref[LEG_OVR_GS]) {
-			seg = NVMM_X64_SEG_GS;
+		if (store->hardseg != 0) {
+			seg = store->hardseg;
 		} else {
-			seg = NVMM_X64_SEG_DS;
+			if (instr->legpref[LEG_OVR_CS]) {
+				seg = NVMM_X64_SEG_CS;
+			} else if (instr->legpref[LEG_OVR_SS]) {
+				seg = NVMM_X64_SEG_SS;
+			} else if (instr->legpref[LEG_OVR_ES]) {
+				seg = NVMM_X64_SEG_ES;
+			} else if (instr->legpref[LEG_OVR_FS]) {
+				seg = NVMM_X64_SEG_FS;
+			} else if (instr->legpref[LEG_OVR_GS]) {
+				seg = NVMM_X64_SEG_GS;
+			} else {
+				seg = NVMM_X64_SEG_DS;
+			}
 		}

-		ret = segment_apply(&state->segs[seg], &mem->gva, mem->size);
+		ret = segment_apply(&state->segs[seg], &gva, size);
 		if (ret == -1)
 			return -1;
 	}

+	*gvap = gva;
+	return 0;
+}
+
+static int
+store_to_mem(struct nvmm_machine *mach, struct nvmm_x64_state *state,
+    struct x86_instr *instr, struct x86_store *store, struct nvmm_mem *mem)
+{
+	nvmm_prot_t prot;
+	int ret;
+
+	ret = store_to_gva(state, instr, store, &mem->gva, mem->size);
+	if (ret == -1)
+		return -1;
+
 	if ((mem->gva & PAGE_MASK) + mem->size > PAGE_SIZE) {
 		/* Don't allow a cross-page MMIO. */
 		errno = EINVAL;
 		return -1;
 	}

-	off = (mem->gva & PAGE_MASK);
-	mem->gva &= ~PAGE_MASK;
-
 	ret = x86_gva_to_gpa(mach, state, mem->gva, &mem->gpa, &prot);
 	if (ret == -1)
 		return -1;

-	mem->gva += off;
-	mem->gpa += off;
-
 	return 0;
 }

@ -2261,12 +2454,8 @@ static int
 fetch_instruction(struct nvmm_machine *mach, struct nvmm_x64_state *state,
    struct nvmm_exit *exit)
 {
-	size_t fetchsize, remain, done;
-	gvaddr_t gva, off;
-	nvmm_prot_t prot;
-	gpaddr_t gpa;
-	uintptr_t hva;
-	uint8_t *ptr;
+	size_t fetchsize;
+	gvaddr_t gva;
 	int ret;

 	fetchsize = sizeof(exit->u.mem.inst_bytes);
@ -2279,50 +2468,46 @@ fetch_instruction(struct nvmm_machine *mach, struct nvmm_x64_state *state,
 			return -1;
 	}

-	off = (gva & PAGE_MASK);
-	gva &= ~PAGE_MASK;
-
-	ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
-	if (ret == -1)
-		return -1;
-	if (__predict_false((prot & NVMM_PROT_EXEC) == 0)) {
-		errno = EFAULT;
-		return -1;
-	}
-
-	ret = nvmm_gpa_to_hva(mach, gpa, &hva);
+	ret = read_guest_memory(mach, state, gva, exit->u.mem.inst_bytes,
+	    fetchsize);
 	if (ret == -1)
 		return -1;

-	ptr = (uint8_t *)hva + off;
+	exit->u.mem.inst_len = fetchsize;

-	/*
-	 * Special case. If the buffer is in between two pages, we
-	 * need to retrieve data from the next page.
-	 */
-	if (__predict_false(off + fetchsize > PAGE_SIZE)) {
-		remain = off + fetchsize - PAGE_SIZE;
-		done = PAGE_SIZE - off;
+	return 0;
+}

-		memcpy(exit->u.mem.inst_bytes, ptr, done);
+static int
+assist_mem_double(struct nvmm_machine *mach, struct nvmm_x64_state *state,
+    struct x86_instr *instr)
+{
+	struct nvmm_mem mem;
+	uint8_t data[8];
+	gvaddr_t gva;
+	size_t size;
+	int ret;

-		ret = x86_gva_to_gpa(mach, state, gva + PAGE_SIZE,
-		    &gpa, &prot);
-		if (ret == -1)
-			return -1;
-		if (__predict_false((prot & NVMM_PROT_EXEC) == 0)) {
-			errno = EFAULT;
-			return -1;
-		}
-		ret = nvmm_gpa_to_hva(mach, gpa, &hva);
-		if (ret == -1)
-			return -1;
+	size = instr->operand_size;

-		memcpy(&exit->u.mem.inst_bytes[done], (uint8_t *)hva, remain);
-	} else {
-		memcpy(exit->u.mem.inst_bytes, ptr, fetchsize);
-		exit->u.mem.inst_len = fetchsize;
-	}
+	/* Source. */
+	ret = store_to_gva(state, instr, &instr->src, &gva, size);
+	if (ret == -1)
+		return -1;
+	ret = read_guest_memory(mach, state, gva, data, size);
+	if (ret == -1)
+		return -1;
+
+	/* Destination. */
+	ret = store_to_gva(state, instr, &instr->dst, &gva, size);
+	if (ret == -1)
+		return -1;
+	ret = write_guest_memory(mach, state, gva, data, size);
+	if (ret == -1)
+		return -1;
+
+	mem.size = size;
+	(*instr->emul)(&mem, NULL, state->gprs);

 	return 0;
 }
@ -2333,14 +2518,126 @@ fetch_instruction(struct nvmm_machine *mach, struct nvmm_x64_state *state,
 		return -1;	\
 	} while (0);

+static int
+assist_mem_single(struct nvmm_machine *mach, struct nvmm_x64_state *state,
+    struct x86_instr *instr)
+{
+	struct nvmm_mem mem;
+	uint64_t val;
+	int ret;
+
+	memset(&mem, 0, sizeof(mem));
+
+	switch (instr->src.type) {
+	case STORE_REG:
+		if (instr->src.disp.type != DISP_NONE) {
+			/* Indirect access. */
+			mem.write = false;
+			mem.size = instr->operand_size;
+			ret = store_to_mem(mach, state, instr, &instr->src,
+			    &mem);
+			if (ret == -1)
+				return -1;
+		} else {
+			/* Direct access. */
+			mem.write = true;
+			mem.size = instr->operand_size;
+			val = state->gprs[instr->src.u.reg->num];
+			val = __SHIFTOUT(val, instr->src.u.reg->mask);
+			memcpy(mem.data, &val, mem.size);
+		}
+		break;
+
+	case STORE_IMM:
+		mem.write = true;
+		mem.size = instr->src.u.imm.size;
+		memcpy(mem.data, instr->src.u.imm.data, mem.size);
+		break;
+
+	case STORE_SIB:
+		mem.write = false;
+		mem.size = instr->operand_size;
+		ret = store_to_mem(mach, state, instr, &instr->src, &mem);
+		if (ret == -1)
+			return -1;
+		break;
+
+	case STORE_DMO:
+		mem.write = false;
+		mem.size = instr->operand_size;
+		ret = store_to_mem(mach, state, instr, &instr->src, &mem);
+		if (ret == -1)
+			return -1;
+		break;
+
+	default:
+		return -1;
+	}
+
+	switch (instr->dst.type) {
+	case STORE_REG:
+		if (instr->dst.disp.type != DISP_NONE) {
+			if (__predict_false(!mem.write)) {
+				DISASSEMBLER_BUG();
+			}
+			mem.size = instr->operand_size;
+			ret = store_to_mem(mach, state, instr, &instr->dst,
+			    &mem);
+			if (ret == -1)
+				return -1;
+		} else {
+			/* nothing */
+		}
+		break;
+
+	case STORE_IMM:
+		/* The dst can't be an immediate. */
+		DISASSEMBLER_BUG();
+
+	case STORE_SIB:
+		if (__predict_false(!mem.write)) {
+			DISASSEMBLER_BUG();
+		}
+		mem.size = instr->operand_size;
+		ret = store_to_mem(mach, state, instr, &instr->dst, &mem);
+		if (ret == -1)
+			return -1;
+		break;
+
+	case STORE_DMO:
+		if (__predict_false(!mem.write)) {
+			DISASSEMBLER_BUG();
+		}
+		mem.size = instr->operand_size;
+		ret = store_to_mem(mach, state, instr, &instr->dst, &mem);
+		if (ret == -1)
+			return -1;
+		break;
+
+	default:
+		return -1;
+	}
+
+	(*instr->emul)(&mem, __callbacks.mem, state->gprs);
+
+	if (!mem.write) {
+		/* instr->dst.type == STORE_REG */
+		memcpy(&val, mem.data, sizeof(uint64_t));
+		val = __SHIFTIN(val, instr->dst.u.reg->mask);
+		state->gprs[instr->dst.u.reg->num] &= ~instr->dst.u.reg->mask;
+		state->gprs[instr->dst.u.reg->num] |= val;
+	}
+
+	return 0;
+}
+
 int
 nvmm_assist_mem(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
-    struct nvmm_exit *exit, void (*cb)(struct nvmm_mem *))
+    struct nvmm_exit *exit)
 {
 	struct nvmm_x64_state state;
 	struct x86_instr instr;
-	struct nvmm_mem mem;
-	uint64_t val;
+	uint64_t cnt;
 	int ret;

 	if (__predict_false(exit->reason != NVMM_EXIT_MEMORY)) {
@ -2371,120 +2668,24 @@ nvmm_assist_mem(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
 		return -1;
 	}

-	if (instr.legpref[LEG_REPN]) {
+	if (__predict_false(instr.legpref[LEG_REPN])) {
 		errno = ENODEV;
 		return -1;
 	}

-	memset(&mem, 0, sizeof(mem));
-
-	switch (instr.src.type) {
-	case STORE_REG:
-		if (instr.src.disp.type != DISP_NONE) {
-			/* Indirect access. */
-			mem.write = false;
-			mem.size = instr.operand_size;
-			ret = store_to_mem(mach, &state, &instr, &instr.src,
-			    &mem);
-			if (ret == -1)
-				return -1;
-		} else {
-			/* Direct access. */
-			mem.write = true;
-			mem.size = instr.operand_size;
-			val = state.gprs[instr.src.u.reg->num];
-			val = __SHIFTOUT(val, instr.src.u.reg->mask);
-			memcpy(mem.data, &val, mem.size);
-		}
-		break;
-
-	case STORE_IMM:
-		mem.write = true;
-		mem.size = instr.src.u.imm.size;
-		memcpy(mem.data, instr.src.u.imm.data, mem.size);
-		break;
-
-	case STORE_SIB:
-		mem.write = false;
-		mem.size = instr.operand_size;
-		ret = store_to_mem(mach, &state, &instr, &instr.src,
-		    &mem);
-		if (ret == -1)
-			return -1;
-		break;
-
-	case STORE_DMO:
-		mem.write = false;
-		mem.size = instr.operand_size;
-		ret = store_to_mem(mach, &state, &instr, &instr.src,
-		    &mem);
-		if (ret == -1)
-			return -1;
-		break;
-
-	default:
-		return -1;
+	if (instr.opcode->movs) {
+		ret = assist_mem_double(mach, &state, &instr);
+	} else {
+		ret = assist_mem_single(mach, &state, &instr);
 	}
-
-	switch (instr.dst.type) {
-	case STORE_REG:
-		if (instr.dst.disp.type != DISP_NONE) {
-			if (__predict_false(!mem.write)) {
-				DISASSEMBLER_BUG();
-			}
-			mem.size = instr.operand_size;
-			ret = store_to_mem(mach, &state, &instr, &instr.dst,
-			    &mem);
-			if (ret == -1)
-				return -1;
-		} else {
-			/* nothing */
-		}
-		break;
-
-	case STORE_IMM:
-		/* The dst can't be an immediate. */
-		DISASSEMBLER_BUG();
-
-	case STORE_SIB:
-		if (__predict_false(!mem.write)) {
-			DISASSEMBLER_BUG();
-		}
-		mem.size = instr.operand_size;
-		ret = store_to_mem(mach, &state, &instr, &instr.dst,
-		    &mem);
-		if (ret == -1)
-			return -1;
-		break;
-
-	case STORE_DMO:
-		if (__predict_false(!mem.write)) {
-			DISASSEMBLER_BUG();
-		}
-		mem.size = instr.operand_size;
-		ret = store_to_mem(mach, &state, &instr, &instr.dst,
-		    &mem);
-		if (ret == -1)
-			return -1;
-		break;
-
-	default:
+	if (ret == -1) {
+		errno = ENODEV;
 		return -1;
 	}

-	(*instr.emul)(&mem, cb, state.gprs);
-
-	if (!mem.write) {
-		/* instr.dst.type == STORE_REG */
-		memcpy(&val, mem.data, sizeof(uint64_t));
-		val = __SHIFTIN(val, instr.dst.u.reg->mask);
-		state.gprs[instr.dst.u.reg->num] &= ~instr.dst.u.reg->mask;
-		state.gprs[instr.dst.u.reg->num] |= val;
-	}
-
 	if (instr.legpref[LEG_REP]) {
-		state.gprs[NVMM_X64_GPR_RCX] -= 1;
-		if (state.gprs[NVMM_X64_GPR_RCX] == 0) {
+		cnt = rep_dec_apply(&state, instr.address_size);
+		if (cnt == 0) {
 			state.gprs[NVMM_X64_GPR_RIP] += instr.len;
 		}
 	} else {
--- a/lib/libnvmm/nvmm.h
+++ b/lib/libnvmm/nvmm.h
@ -1,4 +1,4 @@
-/*	$NetBSD: nvmm.h,v 1.3 2018/12/15 13:39:43 maxv Exp $	*/
+/*	$NetBSD: nvmm.h,v 1.4 2018/12/27 07:22:31 maxv Exp $	*/

 /*
 * Copyright (c) 2018 The NetBSD Foundation, Inc.
@ -61,6 +61,11 @@ struct nvmm_mem {
 	uint8_t data[8];
 };

+struct nvmm_callbacks {
+	void (*io)(struct nvmm_io *);
+	void (*mem)(struct nvmm_mem *);
+};
+
 #define NVMM_PROT_READ		0x01
 #define NVMM_PROT_WRITE		0x02
 #define NVMM_PROT_EXEC		0x04
@ -90,9 +95,10 @@ int nvmm_gva_to_gpa(struct nvmm_machine *, nvmm_cpuid_t, gvaddr_t, gpaddr_t *,
    nvmm_prot_t *);
 int nvmm_gpa_to_hva(struct nvmm_machine *, gpaddr_t, uintptr_t *);

-int nvmm_assist_io(struct nvmm_machine *, nvmm_cpuid_t, struct nvmm_exit *,
-    void (*)(struct nvmm_io *));
-int nvmm_assist_mem(struct nvmm_machine *, nvmm_cpuid_t, struct nvmm_exit *,
-    void (*)(struct nvmm_mem *));
+int nvmm_assist_io(struct nvmm_machine *, nvmm_cpuid_t, struct nvmm_exit *);
+int nvmm_assist_mem(struct nvmm_machine *, nvmm_cpuid_t, struct nvmm_exit *);
+void nvmm_callbacks_register(const struct nvmm_callbacks *);
+
+int nvmm_vcpu_dump(struct nvmm_machine *, nvmm_cpuid_t);

 #endif /* _LIBNVMM_H_ */
--- a/tests/lib/libnvmm/h_mem_assist.c
+++ b/tests/lib/libnvmm/h_mem_assist.c
@ -200,12 +200,13 @@ mem_callback(struct nvmm_mem *mem)
 		memcpy(mem->data, mmiobuf + off, mem->size);
 	}
 }
+
 static int
 handle_memory(struct nvmm_machine *mach, struct nvmm_exit *exit)
 {
 	int ret;

-	ret = nvmm_assist_mem(mach, 0, exit, mem_callback);
+	ret = nvmm_assist_mem(mach, 0, exit);
 	if (ret == -1) {
 		err(errno, "nvmm_assist_mem");
 	}
@ -255,7 +256,7 @@ struct test {
 };

 static void
-run_test(struct nvmm_machine *mach, struct test *test)
+run_test(struct nvmm_machine *mach, const struct test *test)
 {
 	uint64_t *res;
 	size_t size;
@ -288,8 +289,9 @@ extern uint8_t test5_begin, test5_end;
 extern uint8_t test6_begin, test6_end;
 extern uint8_t test7_begin, test7_end;
 extern uint8_t test8_begin, test8_end;
+extern uint8_t test9_begin, test9_end;

-struct test tests[] = {
+static const struct test tests[] = {
 	{ "test1 - MOV", &test1_begin, &test1_end, 0x3004 },
 	{ "test2 - OR",  &test2_begin, &test2_end, 0x14FF },
 	{ "test3 - AND", &test3_begin, &test3_end, 0x1FC0 },
@ -298,9 +300,15 @@ struct test tests[] = {
 	{ "test6 - DMO", &test6_begin, &test6_end, 0xFFAB },
 	{ "test7 - STOS", &test7_begin, &test7_end, 0x00123456 },
 	{ "test8 - LODS", &test8_begin, &test8_end, 0x12345678 },
+	{ "test9 - MOVS", &test9_begin, &test9_end, 0x12345678 },
 	{ NULL, NULL, NULL, -1 }
 };

+static const struct nvmm_callbacks callbacks = {
+	.io = NULL,
+	.mem = mem_callback
+};
+
 /*
 * 0x1000: MMIO address, unmapped
 * 0x2000: Instructions, mapped
@ -318,6 +326,7 @@ int main(int argc, char *argv[])
 		err(errno, "nvmm_machine_create");
 	if (nvmm_vcpu_create(&mach, 0) == -1)
 		err(errno, "nvmm_vcpu_create");
+	nvmm_callbacks_register(&callbacks);
 	map_pages(&mach);

 	for (i = 0; tests[i].name != NULL; i++) {
--- a/tests/lib/libnvmm/h_mem_assist_asm.S
+++ b/tests/lib/libnvmm/h_mem_assist_asm.S
@ -35,6 +35,7 @@
 	.globl	test6_begin, test6_end
 	.globl	test7_begin, test7_end
 	.globl	test8_begin, test8_end
+	.globl	test9_begin, test9_end
 	.text
 	.code64

@ -157,3 +158,22 @@ test8_begin:

 	TEST_END
 test8_end:
+
+	.align	64
+test9_begin:
+	movq	$0x1000,%rax
+
+	movq	$0x12345678,8(%rax)
+
+	movq	$0x1008,%rsi
+	movq	$0x1000,%rdi
+
+	movq	$4,%rcx
+	rep movsb
+
+	movq	$2,%rcx
+	rep movsw
+
+	TEST_END
+test9_end:
+