87d648682e
into the host-specific files and wrapped access to them with atomic operations since that's a structure global to all the VMs. I think all the other globals are SMP clean since they are only written once during module init time, and read thereafter my all VMs. Renamed all host OS specific functions to hostOS*(). All host independent functions to host*(). I'd like to rename all monitor space functions to mon*() next.
1887 lines
58 KiB
C
1887 lines
58 KiB
C
/*
|
|
* plex86: run multiple x86 operating systems concurrently
|
|
* Copyright (C) 1999-2003 Kevin P. Lawton
|
|
*
|
|
* monitor-host.c: This file contains the top-level monitor code,
|
|
* accessible from the host space. (kernel independent code)
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
*/
|
|
|
|
|
|
#include "plex86.h"
|
|
#define IN_HOST_SPACE
|
|
#include "monitor.h"
|
|
|
|
|
|
/* =====================================================================
|
|
* Plex86 module global variables. This should be the _only_ place
|
|
* where globals are declared. Since plex86 supports multiple VMs, almost
|
|
* all data is stored per-VM. For the few variables which are global
|
|
* to all VMs, we have to be careful to access them in SMP friendly ways.
|
|
* The ones which are written upon kernel module initialization are fine,
|
|
* since they are only written once.
|
|
* =====================================================================
|
|
*/
|
|
|
|
/* Info regarding the physical pages that comprise the kernel module,
|
|
* including physical page information. This is written (once) at
|
|
* kernel module initialization time. Thus there are no SMP access issues.
|
|
*/
|
|
kernelModulePages_t kernelModulePages;
|
|
|
|
/* Information of the host processor as returned by the CPUID
|
|
* instruction. This is written (once) at kernel module initialization time.
|
|
* Thus there no are SMP access issues.
|
|
*/
|
|
cpuid_info_t hostCpuIDInfo;
|
|
|
|
|
|
/* Some constants used by the VM logic. Since they're "const", there are
|
|
* no problems with SMP access.
|
|
*/
|
|
static const selector_t nullSelector = { raw: 0 };
|
|
static const descriptor_t nullDescriptor = {
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
};
|
|
|
|
|
|
|
|
|
|
static int hostInitIDTSlot(vm_t *vm, unsigned vec, int type);
|
|
static void hostMapMonPages(vm_t *vm, Bit32u *, unsigned, Bit32u *, page_t *,
|
|
unsigned user, unsigned writable, char *name);
|
|
#if ANAL_CHECKS
|
|
static void hostMapBlankPage(vm_t *vm, Bit32u *laddr_p, page_t *pageTable);
|
|
#endif
|
|
|
|
#define RW0 0
|
|
#define RW1 1
|
|
#define US0 0
|
|
#define US1 1
|
|
|
|
#define IDT_INTERRUPT 0
|
|
#define IDT_EXCEPTION_ERROR 1
|
|
#define IDT_EXCEPTION_NOERROR 2
|
|
|
|
|
|
|
|
|
|
|
|
unsigned
|
|
hostModuleInit(void)
|
|
{
|
|
/* Kernel independent stuff to do at kernel module load time. */
|
|
|
|
if (!hostGetCpuCapabilities()) {
|
|
hostOSPrint("getCpuCapabilities returned error\n");
|
|
return(0); /* Fail. */
|
|
}
|
|
else {
|
|
#if 0
|
|
hostOSPrint("ptype:%u, family:%u, model:%u stepping:%u\n",
|
|
hostCpuIDInfo.procSignature.fields.procType,
|
|
hostCpuIDInfo.procSignature.fields.family,
|
|
hostCpuIDInfo.procSignature.fields.model,
|
|
hostCpuIDInfo.procSignature.fields.stepping);
|
|
#endif
|
|
}
|
|
|
|
/* xxx Should check that host CS.base is page aligned here. */
|
|
|
|
#if 1
|
|
{
|
|
Bit32u cr0;
|
|
|
|
asm volatile ( "movl %%cr0, %0" : "=r" (cr0) );
|
|
hostOSPrint("host CR0=0x%x\n", cr0);
|
|
}
|
|
#endif
|
|
|
|
return(1); /* Pass. */
|
|
}
|
|
|
|
void
|
|
hostDeviceOpen(vm_t *vm)
|
|
{
|
|
/* Kernel independent stuff to do at device open time. */
|
|
|
|
/* Zero out entire VM structure. */
|
|
mon_memzero( vm, sizeof(vm_t) );
|
|
|
|
vm->vmState = VMStateFDOpened;
|
|
}
|
|
|
|
int
|
|
hostInitMonitor(vm_t *vm)
|
|
{
|
|
unsigned pdi, pti;
|
|
unsigned int i;
|
|
Bit32u nexus_size;
|
|
page_t *pageTable;
|
|
Bit32u laddr, base;
|
|
int r;
|
|
|
|
vm->kernel_offset = hostOSKernelOffset();
|
|
|
|
vm->system.a20Enable = 1; /* Start with A20 line enabled. */
|
|
vm->system.a20AddrMask = 0xffffffff; /* All address lines contribute. */
|
|
vm->system.a20IndexMask = 0x000fffff; /* All address lines contribute. */
|
|
|
|
/* Initialize nexus */
|
|
mon_memzero(vm->host.addr.nexus, 4096);
|
|
|
|
/* Copy transition code (nexus) into code page allocated for this VM. */
|
|
nexus_size = ((Bit32u) &__nexus_end) - ((Bit32u) &__nexus_start);
|
|
if (nexus_size > 4096)
|
|
goto error;
|
|
mon_memcpy(vm->host.addr.nexus, &__nexus_start, nexus_size);
|
|
|
|
|
|
/* Init the convenience pointers. */
|
|
|
|
/* Pointer to host2mon routine inside nexus page */
|
|
vm->host.__host2mon = (void (*)(void)) HOST_NEXUS_OFFSET(vm, __host2mon);
|
|
|
|
/* Pointer to guest context on monitor stack */
|
|
vm->host.addr.guest_context = (guest_context_t *)
|
|
( (Bit32u)vm->host.addr.nexus + PAGESIZE - sizeof(guest_context_t) );
|
|
|
|
/* Zero out various monitor data structures */
|
|
mon_memzero(vm->host.addr.log_buffer, 4096*LOG_BUFF_PAGES);
|
|
mon_memzero(&vm->log_buffer_info,
|
|
sizeof(vm->log_buffer_info));
|
|
mon_memzero(vm->host.addr.page_dir, 4096);
|
|
mon_memzero(vm->host.addr.guest_cpu, 4096);
|
|
mon_memzero(vm->host.addr.idt, MON_IDT_PAGES*4096);
|
|
mon_memzero(vm->host.addr.gdt, MON_GDT_PAGES*4096);
|
|
mon_memzero(vm->host.addr.ldt, MON_LDT_PAGES*4096);
|
|
mon_memzero(vm->host.addr.tss, MON_TSS_PAGES*4096);
|
|
mon_memzero(vm->host.addr.idt_stubs, MON_IDT_STUBS_PAGES*4096);
|
|
|
|
vm->guestPhyPagePinQueue.nEntries = 0;
|
|
vm->guestPhyPagePinQueue.tail = 0;
|
|
|
|
/*
|
|
* ================
|
|
* Nexus Page Table
|
|
* ================
|
|
*
|
|
* All structures needed by the monitor inside the guest environment
|
|
* (code to perform the transition between host<-->guest, fault handler
|
|
* code, various processor data structures like page directory, GDT,
|
|
* IDT, TSS etc.) are mapped into a single Page Table.
|
|
*
|
|
* This allows us to migrate the complete nexus to anywhere in the
|
|
* guest address space by just updating a single (unused) page directory
|
|
* entry in the monitor/guest page directory to point to this nexus
|
|
* page table.
|
|
*
|
|
* To simplify nexus migration, we try to avoid storing guest linear
|
|
* addresses to nexus structures as far as possible. Instead, we use
|
|
* offsets relative to the monitor code/data segments. As we update
|
|
* the base of these segments whenever the monitor migrates, the net
|
|
* effect is that those *offsets* remain valid across nexus migration.
|
|
*/
|
|
|
|
/* Fill in the PDE flags. The US bit is set to 1 (user access).
|
|
* All of the US bits in the monitor PTEs are set to 0 (system access).
|
|
*/
|
|
vm->host.nexus_pde.fields.base = vm->pages.nexus_page_tbl;
|
|
vm->host.nexus_pde.fields.avail = 0;
|
|
vm->host.nexus_pde.fields.G = 0; /* not global */
|
|
vm->host.nexus_pde.fields.PS = 0; /* 4K pages */
|
|
vm->host.nexus_pde.fields.D = 0; /* (unused in pde) */
|
|
vm->host.nexus_pde.fields.A = 0; /* not accessed */
|
|
vm->host.nexus_pde.fields.PCD = 0; /* normal caching */
|
|
vm->host.nexus_pde.fields.PWT = 0; /* normal write-back */
|
|
vm->host.nexus_pde.fields.US = 1; /* user access (see above) */
|
|
vm->host.nexus_pde.fields.RW = 1; /* read or write */
|
|
vm->host.nexus_pde.fields.P = 1; /* present in memory */
|
|
|
|
/* Clear Page Table. */
|
|
pageTable = vm->host.addr.nexus_page_tbl;
|
|
mon_memzero(pageTable, 4096);
|
|
|
|
/* xxx Comment here */
|
|
laddr = 0;
|
|
base = MON_BASE_FROM_LADDR(laddr);
|
|
|
|
hostMapMonPages(vm, kernelModulePages.ppi, kernelModulePages.nPages, &laddr,
|
|
pageTable, US0, RW1, "Monitor code/data pages");
|
|
|
|
|
|
#if ANAL_CHECKS
|
|
hostMapBlankPage(vm, &laddr, pageTable);
|
|
#endif
|
|
|
|
vm->guest.addr.nexus = (nexus_t *) (laddr - base);
|
|
hostMapMonPages(vm, &vm->pages.nexus, 1, &laddr, pageTable, US0, RW1, "Nexus");
|
|
vm->guest.addr.guest_context = (guest_context_t *)
|
|
( (Bit32u)vm->guest.addr.nexus + PAGESIZE - sizeof(guest_context_t) );
|
|
|
|
#if ANAL_CHECKS
|
|
hostMapBlankPage(vm, &laddr, pageTable);
|
|
#endif
|
|
vm->host.addr.nexus->vm = (void *) (laddr - base);
|
|
hostMapMonPages(vm, vm->pages.vm, BytesToPages(sizeof(*vm)),
|
|
&laddr, pageTable, US0, RW1, "VM structure");
|
|
|
|
#if ANAL_CHECKS
|
|
hostMapBlankPage(vm, &laddr, pageTable);
|
|
#endif
|
|
vm->guest.addr.idt = (gate_t *) (laddr - base);
|
|
hostMapMonPages(vm, vm->pages.idt, MON_IDT_PAGES, &laddr, pageTable, US0, RW1,
|
|
"IDT");
|
|
|
|
#if ANAL_CHECKS
|
|
hostMapBlankPage(vm, &laddr, pageTable);
|
|
#endif
|
|
vm->guest.addr.gdt = (descriptor_t *) (laddr - base);
|
|
hostMapMonPages(vm, vm->pages.gdt, MON_GDT_PAGES, &laddr, pageTable, US0, RW1,
|
|
"GDT");
|
|
|
|
#if ANAL_CHECKS
|
|
hostMapBlankPage(vm, &laddr, pageTable);
|
|
#endif
|
|
vm->guest.addr.ldt = (descriptor_t *) (laddr - base);
|
|
hostMapMonPages(vm, vm->pages.ldt, MON_LDT_PAGES, &laddr, pageTable, US0, RW1,
|
|
"LDT");
|
|
|
|
#if ANAL_CHECKS
|
|
hostMapBlankPage(vm, &laddr, pageTable);
|
|
#endif
|
|
vm->guest.addr.tss = (tss_t *) (laddr - base);
|
|
hostMapMonPages(vm, vm->pages.tss, MON_TSS_PAGES, &laddr, pageTable, US0, RW1,
|
|
"TSS");
|
|
|
|
#if ANAL_CHECKS
|
|
hostMapBlankPage(vm, &laddr, pageTable);
|
|
#endif
|
|
vm->guest.addr.idt_stubs = (idt_stub_t *) (laddr - base);
|
|
hostMapMonPages(vm, vm->pages.idt_stubs, MON_IDT_STUBS_PAGES, &laddr,
|
|
pageTable, US0, RW1, "IDT stubs");
|
|
|
|
#if ANAL_CHECKS
|
|
hostMapBlankPage(vm, &laddr, pageTable);
|
|
#endif
|
|
/* Monitor Page Directory */
|
|
vm->guest.addr.page_dir = (pageEntry_t *) (laddr - base);
|
|
hostMapMonPages(vm, &vm->pages.page_dir, 1, &laddr, pageTable, US0, RW1,
|
|
"Monitor Page Directory");
|
|
|
|
#if ANAL_CHECKS
|
|
hostMapBlankPage(vm, &laddr, pageTable);
|
|
#endif
|
|
/* Nexus Page Table */
|
|
vm->guest.addr.nexus_page_tbl = (page_t *) (laddr - base);
|
|
hostMapMonPages(vm, &vm->pages.nexus_page_tbl, 1, &laddr, pageTable, US0, RW1,
|
|
"Nexus Page Table");
|
|
|
|
#if ANAL_CHECKS
|
|
hostMapBlankPage(vm, &laddr, pageTable);
|
|
#endif
|
|
/* Map virtualized guest page tables into monitor. */
|
|
vm->guest.addr.page_tbl = (page_t *) (laddr - base);
|
|
hostMapMonPages(vm, vm->pages.page_tbl, MON_PAGE_TABLES,
|
|
&laddr, pageTable, US0, RW1, "Guest Page Tables");
|
|
|
|
#if ANAL_CHECKS
|
|
hostMapBlankPage(vm, &laddr, pageTable);
|
|
#endif
|
|
/* Map of linear addresses of page tables mapped into monitor */
|
|
vm->guest.addr.page_tbl_laddr_map = (unsigned *) (laddr - base);
|
|
hostMapMonPages(vm, &vm->pages.page_tbl_laddr_map, 1, &laddr, pageTable,
|
|
US0, RW1, "Page Table Laddr Map");
|
|
|
|
#if ANAL_CHECKS
|
|
hostMapBlankPage(vm, &laddr, pageTable);
|
|
#endif
|
|
/* Guest CPU state (mapped RW into user space also). */
|
|
vm->guest.addr.guest_cpu = (guest_cpu_t *) (laddr - base);
|
|
hostMapMonPages(vm, &vm->pages.guest_cpu, 1, &laddr,
|
|
pageTable, US0, RW1, "Guest CPU State");
|
|
|
|
|
|
#if ANAL_CHECKS
|
|
hostMapBlankPage(vm, &laddr, pageTable);
|
|
#endif
|
|
/*
|
|
* We need a buffer to implement a debug print facility which
|
|
* can work in either host or monitor space. Map the buffer
|
|
* into monitor/guest space.
|
|
*/
|
|
vm->guest.addr.log_buffer = (unsigned char *) (laddr - base);
|
|
hostMapMonPages(vm, vm->pages.log_buffer, LOG_BUFF_PAGES, &laddr,
|
|
pageTable, US0, RW1, "Log Buffer");
|
|
|
|
{
|
|
/* The physical addresses of the following pages are not */
|
|
/* yet established. Pass dummy info until they are mapped. */
|
|
Bit32u tmp[1];
|
|
tmp[0] = 0;
|
|
|
|
#if ANAL_CHECKS
|
|
hostMapBlankPage(vm, &laddr, pageTable);
|
|
#endif
|
|
/* Window into the guest's current physical code page */
|
|
vm->guest.addr.code_phy_page = (unsigned char *) (laddr - base);
|
|
hostMapMonPages(vm, tmp, 1, &laddr, pageTable, US0, RW1, "Code Phy Page");
|
|
|
|
#if ANAL_CHECKS
|
|
hostMapBlankPage(vm, &laddr, pageTable);
|
|
#endif
|
|
/* Temporary window into a guest physical page, for accessing */
|
|
/* guest GDT, IDT, etc info. */
|
|
vm->guest.addr.tmp_phy_page0 = (unsigned char *) (laddr - base);
|
|
hostMapMonPages(vm, tmp, 1, &laddr, pageTable, US0, RW1, "Tmp Phy Page0");
|
|
|
|
vm->guest.addr.tmp_phy_page1 = (unsigned char *) (laddr - base);
|
|
hostMapMonPages(vm, tmp, 1, &laddr, pageTable, US0, RW1, "Tmp Phy Page1");
|
|
}
|
|
|
|
#if ANAL_CHECKS
|
|
hostMapBlankPage(vm, &laddr, pageTable);
|
|
#endif
|
|
|
|
hostOSPrint("Using %u/1024 PTE slots in 4Meg monitor range.\n",
|
|
(laddr >> 12) & 0x3ff);
|
|
|
|
/* Pointer to mon2host routine inside nexus page */
|
|
vm->guest.__mon2host = (void (*)(void)) MON_NEXUS_OFFSET(vm, __mon2host);
|
|
|
|
|
|
/*
|
|
* =====================
|
|
* Transition Page Table
|
|
* =====================
|
|
*
|
|
* To aid in the transition between host<-->monitor/guest spaces,
|
|
* we need to have an address identity map situation for at least
|
|
* one page; the page containing the transition code. As we do
|
|
* not know in advance whether this linear address range is in use
|
|
* by the guest as well, we set aside a complete additional Page
|
|
* Table, which contains only a single PTE pointing to the nexus page.
|
|
*
|
|
* To create the identity map, we simply change the corresponding
|
|
* monitor page directory entry to point to this transition Page Table.
|
|
* This happens transparently inside the host<-->guest transition code;
|
|
* both the guest/monitor code and the host side code never see this
|
|
* transition page table entered into the page directory!
|
|
*
|
|
* NOTE: We need to ensure that the nexus page table never spans the
|
|
* same 4Meg linear address space region as this page table!
|
|
* As we are free to choose the nexus linear address, this is
|
|
* not a problem.
|
|
*/
|
|
|
|
/* Get full linear address of nexus code page, as seen in host space. */
|
|
laddr = (Bit32u)vm->host.addr.nexus + vm->kernel_offset;
|
|
pdi = laddr >> 22;
|
|
pti = (laddr >> 12) & 0x3ff;
|
|
|
|
/*
|
|
* We need to be able to access the PDE in the monitor page directory
|
|
* that corresponds to this linear address from both host and monitor
|
|
* address spaces.
|
|
*/
|
|
vm->host.addr.nexus->transition_pde_p_host = vm->host.addr.page_dir + pdi;
|
|
vm->host.addr.nexus->transition_pde_p_mon = (pageEntry_t *)
|
|
(((Bit32u)vm->guest.addr.page_dir) + (pdi << 2));
|
|
vm->host.addr.nexus->transition_laddr = laddr;
|
|
|
|
/* Fill in the PDE flags */
|
|
vm->host.addr.nexus->transition_pde.fields.base = vm->pages.transition_PT;
|
|
vm->host.addr.nexus->transition_pde.fields.avail = 0;
|
|
vm->host.addr.nexus->transition_pde.fields.G = 0; /* not global */
|
|
vm->host.addr.nexus->transition_pde.fields.PS = 0; /* 4K pages */
|
|
vm->host.addr.nexus->transition_pde.fields.D = 0; /* (unused in pde) */
|
|
vm->host.addr.nexus->transition_pde.fields.A = 0; /* not accessed */
|
|
vm->host.addr.nexus->transition_pde.fields.PCD = 0; /* normal caching */
|
|
vm->host.addr.nexus->transition_pde.fields.PWT = 0; /* normal write-back*/
|
|
vm->host.addr.nexus->transition_pde.fields.US = 0; /* no user access */
|
|
vm->host.addr.nexus->transition_pde.fields.RW = 1; /* read or write */
|
|
vm->host.addr.nexus->transition_pde.fields.P = 1; /* present in memory*/
|
|
|
|
/* Clear Page Table; only one PTE is used. */
|
|
pageTable = vm->host.addr.transition_PT;
|
|
mon_memzero(pageTable, 4096);
|
|
|
|
/* Fill in the PTE for identity mapping the code page */
|
|
pageTable->pte[pti].fields.base = vm->pages.nexus;
|
|
pageTable->pte[pti].fields.avail = 0;
|
|
pageTable->pte[pti].fields.G = 0; /* not global */
|
|
pageTable->pte[pti].fields.PS = 0; /* (unused in pte) */
|
|
pageTable->pte[pti].fields.D = 0; /* clean */
|
|
pageTable->pte[pti].fields.A = 0; /* not accessed */
|
|
pageTable->pte[pti].fields.PCD = 0; /* normal caching */
|
|
pageTable->pte[pti].fields.PWT = 0; /* normal write-back */
|
|
pageTable->pte[pti].fields.US = 0; /* user can not access */
|
|
pageTable->pte[pti].fields.RW = 1; /* read or write */
|
|
pageTable->pte[pti].fields.P = 1; /* present in memory */
|
|
|
|
|
|
/*
|
|
* Setup the TSS for the monitor/guest environment.
|
|
*
|
|
* We don't need to set the pagedir in the TSS, because we don't
|
|
* actually jump to it anyway. The TSS is just used to set the kernel
|
|
* stack and in a later stage, perhaps the I/O permission bitmap.
|
|
*/
|
|
|
|
/* No task chain. */
|
|
vm->host.addr.tss->back = 0;
|
|
|
|
/* No debugging or I/O, for now. */
|
|
vm->host.addr.tss->trap = 0;
|
|
vm->host.addr.tss->io = sizeof(tss_t);
|
|
|
|
/* Monitor stack offset. */
|
|
vm->host.addr.tss->esp0 =
|
|
((Bit32u)vm->guest.addr.nexus) + PAGESIZE;
|
|
|
|
|
|
/*
|
|
* Set up initial monitor code and stack offset.
|
|
*/
|
|
|
|
vm->host.addr.nexus->mon_jmp_info.offset = MON_NEXUS_OFFSET(vm, __mon_cs);
|
|
vm->host.addr.nexus->mon_stack_info.offset =
|
|
vm->host.addr.tss->esp0 - (sizeof(guest_context_t) + 48);
|
|
/* xxx 48 above should be calculated from code below which winds
|
|
* xxx up monitor stack.
|
|
*/
|
|
|
|
|
|
/*
|
|
* Setup the IDT for the monitor/guest environment
|
|
*/
|
|
|
|
r = 0;
|
|
r |= hostInitIDTSlot(vm, 0, IDT_EXCEPTION_NOERROR); /* Divide error */
|
|
r |= hostInitIDTSlot(vm, 1, IDT_EXCEPTION_NOERROR); /* Debug exceptions */
|
|
r |= hostInitIDTSlot(vm, 2, IDT_INTERRUPT); /* NMI */
|
|
r |= hostInitIDTSlot(vm, 3, IDT_EXCEPTION_NOERROR); /* Breakpoint */
|
|
r |= hostInitIDTSlot(vm, 4, IDT_EXCEPTION_NOERROR); /* Overflow */
|
|
r |= hostInitIDTSlot(vm, 5, IDT_EXCEPTION_NOERROR); /* Bounds check */
|
|
r |= hostInitIDTSlot(vm, 6, IDT_EXCEPTION_NOERROR); /* Invalid opcode */
|
|
r |= hostInitIDTSlot(vm, 7, IDT_EXCEPTION_NOERROR); /* FPU not available */
|
|
r |= hostInitIDTSlot(vm, 8, IDT_EXCEPTION_ERROR); /* Double fault */
|
|
r |= hostInitIDTSlot(vm, 9, IDT_EXCEPTION_NOERROR); /* FPU segment overrun */
|
|
r |= hostInitIDTSlot(vm, 10, IDT_EXCEPTION_ERROR); /* Invalid TSS */
|
|
r |= hostInitIDTSlot(vm, 11, IDT_EXCEPTION_ERROR); /* Segment not present */
|
|
r |= hostInitIDTSlot(vm, 12, IDT_EXCEPTION_ERROR); /* Stack exception */
|
|
r |= hostInitIDTSlot(vm, 13, IDT_EXCEPTION_ERROR); /* GP fault */
|
|
r |= hostInitIDTSlot(vm, 14, IDT_EXCEPTION_ERROR); /* Page fault */
|
|
r |= hostInitIDTSlot(vm, 15, IDT_EXCEPTION_NOERROR); /* reserved */
|
|
r |= hostInitIDTSlot(vm, 16, IDT_EXCEPTION_NOERROR); /* Coprocessor error */
|
|
r |= hostInitIDTSlot(vm, 17, IDT_EXCEPTION_ERROR); /* Alignment check */
|
|
r |= hostInitIDTSlot(vm, 18, IDT_EXCEPTION_NOERROR); /* Machine check */
|
|
|
|
/* Reserved exceptions */
|
|
for (i = 19; i < 32; i++)
|
|
r |= hostInitIDTSlot(vm, i, IDT_EXCEPTION_NOERROR);
|
|
|
|
/* Hardware interrupts */
|
|
for (i = 32; i < 256; i++)
|
|
r |= hostInitIDTSlot(vm, i, IDT_INTERRUPT);
|
|
if (r!=0)
|
|
goto error;
|
|
|
|
|
|
/*
|
|
* Setup the initial guest context
|
|
*/
|
|
|
|
mon_memzero(vm->host.addr.guest_context, sizeof(guest_context_t));
|
|
|
|
/* Wind up the monitor stack for the initial transition via
|
|
* __host2mon. At the tail end, monitor state is popped from the
|
|
* stack and a RET is executed.
|
|
*/
|
|
{
|
|
Bit32u *ptr;
|
|
|
|
ptr = (Bit32u *) (((unsigned char *) vm->host.addr.guest_context) - 4);
|
|
*ptr-- = (Bit32u) &__ret_to_guest;
|
|
*ptr-- = 0x02; /* eflags: only reserved bit on */
|
|
*ptr-- = 0; /* eax */
|
|
*ptr-- = 0; /* ecx */
|
|
*ptr-- = 0; /* edx */
|
|
*ptr-- = 0; /* ebx */
|
|
*ptr-- = 0; /* esp dummy */
|
|
*ptr-- = 0; /* ebp */
|
|
*ptr-- = 0; /* esi */
|
|
*ptr-- = 0; /* edi */
|
|
*ptr-- = 0; /* FS; start with null value. */
|
|
*ptr-- = 0; /* GS; start with null value. */
|
|
}
|
|
|
|
vm->vmState |= VMStateInitMonitor;
|
|
vm->mon_request = MonReqNone;
|
|
|
|
return(1); /* all OK */
|
|
|
|
error:
|
|
return(0); /* error */
|
|
}
|
|
|
|
|
|
|
|
unsigned
|
|
hostInitGuestPhyMem(vm_t *vm)
|
|
{
|
|
unsigned i;
|
|
mon_memzero(vm->pageInfo, sizeof(vm->pageInfo));
|
|
for (i=0; i<vm->pages.guest_n_pages; i++) {
|
|
/* For now, we start out by preallocating physical pages */
|
|
/* for the guest, though not necessarily mapped into linear */
|
|
/* space. */
|
|
vm->pageInfo[i].attr.raw = 0;
|
|
vm->pageInfo[i].tsc = 0;
|
|
vm->pageInfo[i].attr.fields.allocated = 1;
|
|
}
|
|
|
|
{
|
|
Bit32u rom_page;
|
|
unsigned npages;
|
|
|
|
/* Mark BIOS ROM area as ReadOnly */
|
|
rom_page = 0xf0000 >> 12;
|
|
npages = (1 + 0xfffff - 0xf0000) / 4096;
|
|
for (i=0; i<npages; i++)
|
|
vm->pageInfo[rom_page + i].attr.fields.RO = 1;
|
|
|
|
/* Mark VGA BIOS ROM area as ReadOnly */
|
|
rom_page = 0xc0000 >> 12;
|
|
npages = (1 + 0xc7fff - 0xc0000) / 4096;
|
|
for (i=0; i<npages; i++)
|
|
vm->pageInfo[rom_page + i].attr.fields.RO = 1;
|
|
}
|
|
|
|
#if 1
|
|
/* Mark VGA framebuffer area as Memory Mapped IO */
|
|
{
|
|
Bit32u vga_page;
|
|
unsigned npages;
|
|
|
|
vga_page = 0xa0000 >> 12;
|
|
npages = (1 + 0xbffff - 0xa0000) / 4096;
|
|
for (i=0; i<npages; i++)
|
|
vm->pageInfo[vga_page + i].attr.fields.memMapIO = 1;
|
|
}
|
|
#endif
|
|
|
|
return(0);
|
|
}
|
|
|
|
|
|
int
|
|
hostInitIDTSlot(vm_t *vm, unsigned vec, int type)
|
|
/*
|
|
* initIDTSlot(): Initialize a monitor IDT slot.
|
|
*/
|
|
{
|
|
/* IDT slot stubs */
|
|
|
|
idt_stub_t *stub = &vm->host.addr.idt_stubs[vec];
|
|
Bit32u stub_mon = ((Bit32u) vm->guest.addr.idt_stubs) +
|
|
vec*sizeof(idt_stub_t);
|
|
|
|
if (sizeof(idt_stub_t) != IDT_STUB_SIZE)
|
|
return( -1 );
|
|
|
|
switch (type) {
|
|
case IDT_INTERRUPT:
|
|
stub->m2.pushla = 0x68;
|
|
stub->m2.dummy = 0;
|
|
stub->m2.pushlb = 0x68;
|
|
stub->m2.vector = vec;
|
|
stub->m2.jmp = 0xe9;
|
|
stub->m2.reloc = ((Bit32u) &__handle_int) -
|
|
(stub_mon + sizeof(idt_method2_t));
|
|
break;
|
|
|
|
case IDT_EXCEPTION_ERROR:
|
|
stub->m1.pushl = 0x68;
|
|
stub->m1.vector = vec;
|
|
stub->m1.jmp = 0xe9;
|
|
stub->m1.reloc = ((Bit32u) &__handle_fault) -
|
|
(stub_mon + sizeof(idt_method1_t));
|
|
break;
|
|
|
|
case IDT_EXCEPTION_NOERROR:
|
|
stub->m2.pushla = 0x68;
|
|
stub->m2.dummy = 0;
|
|
stub->m2.pushlb = 0x68;
|
|
stub->m2.vector = vec;
|
|
stub->m2.jmp = 0xe9;
|
|
stub->m2.reloc = ((Bit32u) &__handle_fault) -
|
|
(stub_mon + sizeof(idt_method2_t));
|
|
break;
|
|
|
|
default:
|
|
return -1;
|
|
}
|
|
|
|
/* Set the interrupt gate */
|
|
SET_INT_GATE(vm->host.addr.idt[vec],
|
|
nullSelector, stub_mon, D_PRESENT, D_DPL0, D_D32);
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* Map pages allocated by host, into the linear address space of
|
|
* the monitor/guest, given the Page Table supplied.
|
|
*/
|
|
|
|
void
|
|
hostMapMonPages(vm_t *vm, Bit32u *pages, unsigned n, Bit32u *laddr_p,
|
|
page_t *pageTable, unsigned user, unsigned writable, char *name)
|
|
{
|
|
unsigned i, pti;
|
|
|
|
|
|
#if 0
|
|
hostOSPrint("hostMapMonPages: '%s' mapped at 0x%x .. 0x%x.\n",
|
|
name,
|
|
(*laddr_p) - MON_BASE_FROM_LADDR(0),
|
|
((*laddr_p) + (n*4096)) - MON_BASE_FROM_LADDR(0) );
|
|
#endif
|
|
|
|
pti = (*laddr_p >> 12) & 0x3ff;
|
|
for (i = 0; i < n; i++, pti++) {
|
|
if (pti > 1024)
|
|
break; /* This should not happen! */
|
|
|
|
/* Fill in the PTE flags */
|
|
pageTable->pte[pti].fields.base = pages[i];
|
|
pageTable->pte[pti].fields.avail = 0;
|
|
pageTable->pte[pti].fields.G = 0; /* not global */
|
|
pageTable->pte[pti].fields.PS = 0; /* (unused in pte) */
|
|
pageTable->pte[pti].fields.D = 0; /* clean */
|
|
pageTable->pte[pti].fields.A = 0; /* not accessed */
|
|
pageTable->pte[pti].fields.PCD = 0; /* normal caching */
|
|
pageTable->pte[pti].fields.PWT = 0; /* normal write-back */
|
|
pageTable->pte[pti].fields.US = user; /* 0=system, 1=user */
|
|
pageTable->pte[pti].fields.RW = writable; /* 0=RO, 1=RW */
|
|
pageTable->pte[pti].fields.P = 1; /* present in memory */
|
|
}
|
|
|
|
/*
|
|
* Advance linear address pointer, for the next set of pages
|
|
* to be mapped.
|
|
*/
|
|
*laddr_p += 4096 * n;
|
|
}
|
|
|
|
#if ANAL_CHECKS
|
|
void
|
|
hostMapBlankPage(vm_t *vm, Bit32u *laddr_p, page_t *pageTable)
|
|
{
|
|
unsigned pti;
|
|
|
|
pti = (*laddr_p >> 12) & 0x3ff;
|
|
if (pti > 1024)
|
|
return; /* This should not happen! */
|
|
|
|
/* Fill in the PTE flags */
|
|
pageTable->pte[pti].fields.base = 0;
|
|
pageTable->pte[pti].fields.avail = 0;
|
|
pageTable->pte[pti].fields.G = 0; /* not global */
|
|
pageTable->pte[pti].fields.PS = 0; /* (unused in pte) */
|
|
pageTable->pte[pti].fields.D = 0; /* clean */
|
|
pageTable->pte[pti].fields.A = 0; /* not accessed */
|
|
pageTable->pte[pti].fields.PCD = 0; /* normal caching */
|
|
pageTable->pte[pti].fields.PWT = 0; /* normal write-back */
|
|
pageTable->pte[pti].fields.US = 0;
|
|
pageTable->pte[pti].fields.RW = 0;
|
|
pageTable->pte[pti].fields.P = 0;
|
|
|
|
/*
|
|
* Advance linear address pointer, for the next set of pages
|
|
* to be mapped.
|
|
*/
|
|
*laddr_p += 4096;
|
|
}
|
|
#endif
|
|
|
|
int
|
|
hostIoctlGeneric(vm_t *vm, void *inode, void *filp,
|
|
unsigned int cmd, unsigned long arg)
|
|
{
|
|
switch (cmd) {
|
|
|
|
/*
|
|
* Set the guest CPUID info.
|
|
*/
|
|
case PLEX86_CPUID:
|
|
{
|
|
if ( vm->vmState & VMStateGuestCPUID ) {
|
|
/* Can't change guest CPUID. */
|
|
return -Plex86ErrnoEINVAL;
|
|
}
|
|
if ( hostOSCopyFromUser(&vm->guestCPUIDInfo, (void *)arg,
|
|
sizeof(vm->guestCPUIDInfo)) )
|
|
return -Plex86ErrnoEFAULT;
|
|
/* xxx Value checks here. */
|
|
vm->vmState |= VMStateGuestCPUID;
|
|
return 0;
|
|
}
|
|
|
|
case PLEX86_REGISTER_MEMORY:
|
|
{
|
|
plex86IoctlRegisterMem_t registerMemMsg;
|
|
if ( hostOSCopyFromUser(®isterMemMsg, (void *)arg,
|
|
sizeof(registerMemMsg)) )
|
|
return -Plex86ErrnoEFAULT;
|
|
return( hostIoctlRegisterMem(vm, ®isterMemMsg) );
|
|
}
|
|
|
|
/*
|
|
* Tear down the VM environment.
|
|
*/
|
|
case PLEX86_TEARDOWN:
|
|
if ( vm->vmState & VMStateRegisteredAll ) {
|
|
hostOSPrint("plex86: guest memory is still registered!\n");
|
|
/* Could effect the unpinning here and then do:
|
|
* vm->vmState &= ~VMStateRegisteredAll;
|
|
*/
|
|
return -Plex86ErrnoEBUSY;
|
|
}
|
|
|
|
hostUnallocVmPages(vm);
|
|
/* Fixme: deal with state better here. */
|
|
|
|
/* Reset state to only FD opened. */
|
|
vm->vmState = VMStateFDOpened;
|
|
return 0;
|
|
|
|
|
|
/*
|
|
* Execute the guest in the VM for a while. The guest CPU state
|
|
* is specified in a memory window mmap()'d to user space.
|
|
*/
|
|
case PLEX86_EXECUTE:
|
|
{
|
|
plex86IoctlExecute_t executeMsg;
|
|
int ret;
|
|
|
|
if ( hostOSCopyFromUser(&executeMsg, (void *)arg, sizeof(executeMsg)) )
|
|
return -Plex86ErrnoEFAULT;
|
|
ret = hostIoctlExecute(vm, &executeMsg);
|
|
if ( hostOSCopyToUser((void *)arg, &executeMsg, sizeof(executeMsg)) )
|
|
return -Plex86ErrnoEFAULT;
|
|
return ret;
|
|
}
|
|
|
|
#warning "PLEX86_RESET should only conditionally compiled for debugging."
|
|
/*
|
|
* For debugging, when the module gets hosed, this is a way
|
|
* to reset the in-use count, so we can rmmod it.
|
|
*/
|
|
case PLEX86_RESET:
|
|
hostOSModuleCountReset(vm, inode, filp);
|
|
return 0;
|
|
|
|
|
|
default:
|
|
hostOSPrint("plex86: unknown ioctl(%d) called\n", cmd);
|
|
return -Plex86ErrnoEINVAL;
|
|
}
|
|
}
|
|
|
|
int
|
|
hostIoctlExecute(vm_t *vm, plex86IoctlExecute_t *executeMsg)
|
|
{
|
|
guest_cpu_t *guest_cpu;
|
|
guest_context_t *guest_stack_context;
|
|
nexus_t *nexus;
|
|
unsigned s;
|
|
int retval;
|
|
|
|
if ( (vm->vmState != VMStateReady) ||
|
|
(vm->mon_request != MonReqNone) ) {
|
|
retval = Plex86NoExecute_VMState; /* Fail. */
|
|
goto handlePanic;
|
|
}
|
|
|
|
/* Only (virtualized) native execution is supported currently.
|
|
* Later, it will be interesting to breakpoint one instruction
|
|
* at-a-time using Plex86ExecuteMethodBreakpoint, for
|
|
* cosimulation.
|
|
*/
|
|
if (executeMsg->executeMethod != Plex86ExecuteMethodNative) {
|
|
retval = Plex86NoExecute_Method; /* Fail. */
|
|
goto handleFail;
|
|
}
|
|
|
|
/* A pointer to the guest CPU state as passed from host-user space.
|
|
* This structure is memory mapped between user and kernel/monitor space.
|
|
*/
|
|
guest_cpu = vm->host.addr.guest_cpu;
|
|
|
|
/* A pointer to the guest CPU state saved on the monitor stack. */
|
|
guest_stack_context = vm->host.addr.guest_context;
|
|
|
|
/* =================================================================
|
|
* Before executing the guest in the VM, we must check that
|
|
* the guest conditions meet the requirements of the user-level-only
|
|
* VM.
|
|
* =================================================================
|
|
*/
|
|
|
|
/* CR0:
|
|
* PG(31)==1
|
|
* CD(30)==? (look into this later)
|
|
* NW(29)==? (look into this later)
|
|
* AM(18)==pass-thru from guest
|
|
* WP(16)==Don't care. Monitor always sets this to 1.
|
|
* NE( 5)==? (look into this later)
|
|
* ET( 4)==? (look into this later)
|
|
* TS( 3)==? (look into this later)
|
|
* EM( 2)==? (look into this later)
|
|
* MP( 1)==? (look into this later)
|
|
* PE( 0)==1
|
|
*/
|
|
/* 0x8005003b */
|
|
if ( (guest_cpu->cr0.raw & 0xe0000037) != 0x80000033 ) {
|
|
hostOSPrint("plex86: guest CR0=0x%x\n", guest_cpu->cr0.raw);
|
|
retval = Plex86NoExecute_CR0; /* Fail. */
|
|
goto handleFail;
|
|
}
|
|
|
|
/* CR4:
|
|
* OSXMMEXCPT(10)==? (look into this later)
|
|
* OSFXSR(9)==? (look into this later)
|
|
* PCE(8)==? (look into this later)
|
|
* PGE(7)==? (look into this later)
|
|
* MCE(6)==? (look into this later)
|
|
* PAE(5)==? (look into this later)
|
|
* PSE(4)==? (look into this later)
|
|
* DE(3)==? (look into this later)
|
|
* TSD(2)==? (look into this later)
|
|
* PVI(1)==? (look into this later)
|
|
* VME(0)==? (look into this later)
|
|
*/
|
|
if ( (guest_cpu->cr4.raw & 0x000007ff) != 0x00000000 ) {
|
|
hostOSPrint("plex86: guest CR4=0x%x\n", guest_cpu->cr4.raw);
|
|
retval = Plex86NoExecute_CR4; /* Fail. */
|
|
goto handleFail;
|
|
}
|
|
|
|
/* Guest CPL must be 3 (user-level).
|
|
* CS selector must not be NULL.
|
|
*/
|
|
if ( (guest_cpu->sreg[SRegCS].sel.fields.rpl != 3) ||
|
|
(guest_cpu->sreg[SRegCS].sel.fields.index == 0) ||
|
|
(guest_cpu->sreg[SRegCS].des.dpl != 3) ) {
|
|
retval = Plex86NoExecute_CS; /* Fail. */
|
|
goto handleFail;
|
|
}
|
|
|
|
/* A20 line must be enabled. */
|
|
if ( guest_cpu->a20Enable != 1 ) {
|
|
retval = Plex86NoExecute_A20; /* Fail. */
|
|
goto handleFail;
|
|
}
|
|
|
|
/* Some code not really used now, since we only support A20 being enabled. */
|
|
{
|
|
unsigned newA20Enable;
|
|
newA20Enable = guest_cpu->a20Enable > 0; /* Make 0 or 1. */
|
|
if ( newA20Enable != vm->system.a20Enable ) {
|
|
if ( (!newA20Enable) && guest_cpu->cr0.fields.pg ) {
|
|
/* A20 disabled, paging on not supported. Well, really I have to
|
|
* see if it matters. This check was in old plex86 code.
|
|
*/
|
|
retval = Plex86NoExecute_A20; /* Fail. */
|
|
goto handleFail;
|
|
}
|
|
vm->system.a20Enable = newA20Enable;
|
|
vm->system.a20AddrMask = 0xffefffff | (newA20Enable << 20);
|
|
vm->system.a20IndexMask = 0x000ffeff | (newA20Enable << 8);
|
|
}
|
|
}
|
|
|
|
/* LDT not supported.
|
|
* Monitor uses GDT slots 1,2,3, so guest segments can not.
|
|
* Segment descriptor cache DPL should equal 3.
|
|
*/
|
|
for (s=0; s<6; s++) {
|
|
unsigned selector = guest_cpu->sreg[s].sel.raw;
|
|
unsigned index;
|
|
/* Only care if selector is not NULL. */
|
|
if ( selector & 0xfffc ) {
|
|
if ( (selector & 0x0007) != 3 ) {
|
|
/* Either TI=1 (LDT usage) or RPL!=3. */
|
|
retval = Plex86NoExecute_Selector; /* Fail. */
|
|
goto handleFail;
|
|
}
|
|
index = selector >> 3;
|
|
if ( index <= 3 ) {
|
|
/* Selector index field uses one of the monitor GDT slots. */
|
|
retval = Plex86NoExecute_Selector; /* Fail. */
|
|
goto handleFail;
|
|
}
|
|
if ( index >= (MON_GDT_SIZE/8) ) {
|
|
/* Selector index field uses a slot beyond the monitor GDT size. */
|
|
retval = Plex86NoExecute_Selector; /* Fail. */
|
|
goto handleFail;
|
|
}
|
|
if ( guest_cpu->sreg[s].des.dpl != 3 ) {
|
|
retval = Plex86NoExecute_DPL; /* Fail. */
|
|
goto handleFail;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* EFlags constraints:
|
|
* VIP/VIF==0
|
|
* VM==0
|
|
* RF==0
|
|
* NT==0
|
|
* IOPL==0 (We may be able to allow this to be 0..2)
|
|
* IF==1
|
|
* TF==0
|
|
* bit1==1
|
|
*/
|
|
if ( (guest_cpu->eflags & (0x001b7302)) !=
|
|
(0x00000202) ) {
|
|
retval = Plex86NoExecute_EFlags; /* Fail. */
|
|
goto handleFail;
|
|
}
|
|
|
|
/* Notes on other stuff:
|
|
* - CPUID emulation vs virtualization match.
|
|
*/
|
|
|
|
/* NOTE: We should commit to executing the guest at this point.
|
|
* We must not leave stray entries in the GDT.
|
|
*/
|
|
|
|
/* Install virtualized guest descriptors in GDT.
|
|
* Either use descriptor caches from guest space, or we have
|
|
* to chase down the GDT entries using the guest's paging
|
|
* system. Might be a cheaper/safe bet to just use the
|
|
* descriptor caches. If the guest reloads a descriptor,
|
|
* just let the user space deal with it.
|
|
*/
|
|
for (s=0; s<6; s++) {
|
|
if ( (guest_cpu->sreg[s].sel.raw & 0xfffc) != 0) {
|
|
vm->host.addr.gdt[ guest_cpu->sreg[s].sel.fields.index ] =
|
|
guest_cpu->sreg[s].des;
|
|
}
|
|
}
|
|
#warning "Have to clear out GDT"
|
|
|
|
guest_stack_context->gs = guest_cpu->sreg[SRegGS].sel.raw;
|
|
guest_stack_context->fs = guest_cpu->sreg[SRegFS].sel.raw;
|
|
guest_stack_context->ds = guest_cpu->sreg[SRegDS].sel.raw;
|
|
guest_stack_context->es = guest_cpu->sreg[SRegES].sel.raw;
|
|
|
|
/* Could use memcpy(); both are in order. Pack both structs. */
|
|
guest_stack_context->edi = guest_cpu->edi;
|
|
guest_stack_context->esi = guest_cpu->esi;
|
|
guest_stack_context->ebp = guest_cpu->ebp;
|
|
guest_stack_context->dummy_esp = 0; /* Not needed. */
|
|
guest_stack_context->ebx = guest_cpu->ebx;
|
|
guest_stack_context->edx = guest_cpu->edx;
|
|
guest_stack_context->ecx = guest_cpu->ecx;
|
|
guest_stack_context->eax = guest_cpu->eax;
|
|
|
|
/* Fields vector/error are ignored for return to guest. */
|
|
|
|
/* CS:EIP */
|
|
guest_stack_context->eip = guest_cpu->eip;
|
|
guest_stack_context->cs = guest_cpu->sreg[SRegCS].sel.raw;
|
|
|
|
guest_stack_context->eflags.raw = guest_cpu->eflags;
|
|
vm->veflags.raw = 0; /* Virtualized EFLAGS - implement later. */
|
|
|
|
guest_stack_context->esp = guest_cpu->esp;
|
|
guest_stack_context->ss = guest_cpu->sreg[SRegSS].sel.raw;
|
|
|
|
/* Pointer to the fields in the nexus.S assembly code. */
|
|
nexus = vm->host.addr.nexus;
|
|
|
|
#warning "Monitor CRx hacks"
|
|
nexus->mon_cr0 = 0x8001003b | /* PG/WP/NE/ET/TS/MP/PE */
|
|
(guest_cpu->cr0.raw & 0x00040000); /* Pass-thru AM from guest. */
|
|
/* Could move mon_cr3 load to mapMonitor. */
|
|
nexus->mon_cr3 = vm->pages.page_dir << 12;
|
|
nexus->mon_cr4 = 0x00000004; /* TSD=1 */
|
|
|
|
/* vm->guest_cpu.cr0.raw = guest_cpu->cr0 | 0x32; */ /* +++ hack for now */
|
|
|
|
// Notes:
|
|
// - Implement some of monPagingRemap from old code, since that
|
|
// was intended to be run/triggered by an initial mode change.
|
|
// - After execution of 1st timeslice, need to copy dynamic state
|
|
// from VM to guest_cpu area.
|
|
// - Deal with cycle counts etc.
|
|
|
|
hostInitShadowPaging(vm);
|
|
|
|
for (;;) {
|
|
unsigned long eflags;
|
|
|
|
#if 0
|
|
/* If print buffer has contents, return to user space to print. */
|
|
if (vm->log_buffer_info.offset) {
|
|
vm->mon_msgs.header.msg_type = VMMessagePrintBuf;
|
|
vm->mon_msgs.header.msg_len = 0;
|
|
vm->mon_request = MonReqNone; /* Request satisfied */
|
|
resetPrintBuf(vm); /* xxx Fix print mess */
|
|
retval = 100;
|
|
goto handleFail;
|
|
}
|
|
#endif
|
|
|
|
vm_save_flags(eflags);
|
|
vm_restore_flags(eflags & ~0x00004300); /* clear NT/IF/TF */
|
|
#if ANAL_CHECKS
|
|
if (!(eflags & 0x200)) {
|
|
vm_restore_flags(eflags);
|
|
hostOSPrint("ioctlExecute: EFLAGS.IF==0\n");
|
|
retval = 101; /* Fail. */
|
|
goto handlePanic;
|
|
}
|
|
#endif
|
|
|
|
/* Call assembly routine to effect transition. */
|
|
vm->host.__host2mon();
|
|
|
|
/* First check for an asynchronous event (interrupt redirection) */
|
|
if ( vm->mon_request == MonReqRedirect ) {
|
|
vm_restore_flags(eflags & ~0x00000200); /* restore all but IF */
|
|
soft_int(vm->redirect_vector); /* sets IF to 1 */
|
|
hostOSInstrumentIntRedirCount(vm->redirect_vector);
|
|
vm->mon_request = MonReqNone; /* Request satisfied */
|
|
}
|
|
|
|
/* Event was synchronous; monitor requested a switch back to host. */
|
|
else {
|
|
vm_restore_flags(eflags);
|
|
|
|
/* Perform action requested by monitor. */
|
|
switch ( vm->mon_request ) {
|
|
case MonReqRemapMonitor:
|
|
#if 0
|
|
if ( mapMonitor(vm) ) {
|
|
vm->mon_request = MonReqNone; /* Request satisfied */
|
|
break;
|
|
}
|
|
else {
|
|
hostOSPrint("mapMonitor failed.\n");
|
|
hostOSPrint("Panic w/ abort_code=%u\n", vm->abort_code);
|
|
retval = 102;
|
|
goto handlePanic;
|
|
}
|
|
#endif
|
|
hostOSPrint("ioctlExecute: case MonReqRemapMonitor.\n");
|
|
retval = 103;
|
|
goto handlePanic;
|
|
|
|
case MonReqFlushPrintBuf:
|
|
hostOSPrint("ioctlExecute: case MonReqFlushPrintBuf.\n");
|
|
retval = 104;
|
|
goto handlePanic;
|
|
|
|
case MonReqGuestFault:
|
|
/* Encountered a guest fault. */
|
|
hostCopyGuestStateToUserSpace(vm);
|
|
executeMsg->cyclesExecuted = 0; /* Handle later. */
|
|
executeMsg->instructionsExecuted = 0; /* Handle later. */
|
|
executeMsg->monitorState.state = vm->vmState;
|
|
executeMsg->monitorState.request = vm->mon_request;
|
|
executeMsg->monitorState.guestFaultNo = vm->guestFaultNo;
|
|
vm->mon_request = MonReqNone;
|
|
return 0;
|
|
|
|
case MonReqPanic:
|
|
if (vm->abort_code)
|
|
hostOSPrint("Panic w/ abort_code=%u\n", vm->abort_code);
|
|
hostOSPrint("ioctlExecute: case MonReqPanic.\n");
|
|
retval = 106;
|
|
goto handlePanic;
|
|
|
|
case MonReqPinUserPage:
|
|
if ( !hostHandlePagePinRequest(vm, vm->pinReqPPI) ) {
|
|
retval = 108;
|
|
goto handlePanic;
|
|
}
|
|
continue; /* Back to VM monitor. */
|
|
|
|
default:
|
|
hostOSPrint("ioctlExecute: default case (%u).\n", vm->mon_request);
|
|
retval = 107;
|
|
goto handlePanic;
|
|
}
|
|
}
|
|
|
|
/* Let host decide whether we are allowed another timeslice */
|
|
if ( !hostOSIdle() ) {
|
|
/* We are returning only because the host wants to
|
|
* schedule other work.
|
|
*/
|
|
executeMsg->monitorState.state = vm->vmState;
|
|
executeMsg->monitorState.request = MonReqNone;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/* Should not get here. */
|
|
retval = 109;
|
|
goto handlePanic;
|
|
|
|
handleFail:
|
|
/* Handle inabilitiy to execute the guest due to certain state. */
|
|
executeMsg->monitorState.state = vm->vmState;
|
|
executeMsg->monitorState.request = vm->mon_request;
|
|
return(retval);
|
|
|
|
handlePanic:
|
|
vm->vmState |= VMStatePanic;
|
|
vm->mon_request = MonReqPanic;
|
|
executeMsg->monitorState.state = vm->vmState;
|
|
executeMsg->monitorState.request = vm->mon_request;
|
|
return(retval);
|
|
}
|
|
|
|
void
|
|
hostCopyGuestStateToUserSpace(vm_t *vm)
|
|
{
|
|
guest_cpu_t *guest_cpu;
|
|
guest_context_t *guest_stack_context;
|
|
|
|
/* A pointer to the guest CPU state as passed from host-user space.
|
|
* This structure is memory mapped between user and kernel/monitor space.
|
|
*/
|
|
guest_cpu = vm->host.addr.guest_cpu;
|
|
|
|
/* A pointer to the guest CPU state saved on the monitor stack. */
|
|
guest_stack_context = vm->host.addr.guest_context;
|
|
|
|
guest_cpu->sreg[SRegES].sel.raw = guest_stack_context->es;
|
|
if ( (guest_stack_context->es & 0xfffc) == 0 ) {
|
|
guest_cpu->sreg[SRegES].des = nullDescriptor;
|
|
guest_cpu->sreg[SRegES].valid = 0;
|
|
}
|
|
else {
|
|
guest_cpu->sreg[SRegES].des =
|
|
vm->host.addr.gdt[ guest_cpu->sreg[SRegES].sel.fields.index ];
|
|
guest_cpu->sreg[SRegES].valid = 1;
|
|
}
|
|
|
|
guest_cpu->sreg[SRegCS].sel.raw = guest_stack_context->cs;
|
|
if ( (guest_stack_context->cs & 0xfffc) == 0 ) {
|
|
guest_cpu->sreg[SRegCS].des = nullDescriptor;
|
|
guest_cpu->sreg[SRegCS].valid = 0;
|
|
}
|
|
else {
|
|
guest_cpu->sreg[SRegCS].des =
|
|
vm->host.addr.gdt[ guest_cpu->sreg[SRegCS].sel.fields.index ];
|
|
guest_cpu->sreg[SRegCS].valid = 1;
|
|
}
|
|
|
|
guest_cpu->sreg[SRegSS].sel.raw = guest_stack_context->ss;
|
|
if ( (guest_stack_context->ss & 0xfffc) == 0 ) {
|
|
guest_cpu->sreg[SRegSS].des = nullDescriptor;
|
|
guest_cpu->sreg[SRegSS].valid = 0;
|
|
}
|
|
else {
|
|
guest_cpu->sreg[SRegSS].des =
|
|
vm->host.addr.gdt[ guest_cpu->sreg[SRegSS].sel.fields.index ];
|
|
guest_cpu->sreg[SRegSS].valid = 1;
|
|
}
|
|
|
|
guest_cpu->sreg[SRegDS].sel.raw = guest_stack_context->ds;
|
|
if ( (guest_stack_context->ds & 0xfffc) == 0 ) {
|
|
guest_cpu->sreg[SRegDS].des = nullDescriptor;
|
|
guest_cpu->sreg[SRegDS].valid = 0;
|
|
}
|
|
else {
|
|
guest_cpu->sreg[SRegDS].des =
|
|
vm->host.addr.gdt[ guest_cpu->sreg[SRegDS].sel.fields.index ];
|
|
guest_cpu->sreg[SRegDS].valid = 1;
|
|
}
|
|
|
|
guest_cpu->sreg[SRegFS].sel.raw = guest_stack_context->fs;
|
|
if ( (guest_stack_context->fs & 0xfffc) == 0 ) {
|
|
guest_cpu->sreg[SRegFS].des = nullDescriptor;
|
|
guest_cpu->sreg[SRegFS].valid = 0;
|
|
}
|
|
else {
|
|
guest_cpu->sreg[SRegFS].des =
|
|
vm->host.addr.gdt[ guest_cpu->sreg[SRegFS].sel.fields.index ];
|
|
guest_cpu->sreg[SRegFS].valid = 1;
|
|
}
|
|
|
|
guest_cpu->sreg[SRegGS].sel.raw = guest_stack_context->gs;
|
|
if ( (guest_stack_context->gs & 0xfffc) == 0 ) {
|
|
guest_cpu->sreg[SRegGS].des = nullDescriptor;
|
|
guest_cpu->sreg[SRegGS].valid = 0;
|
|
}
|
|
else {
|
|
guest_cpu->sreg[SRegGS].des =
|
|
vm->host.addr.gdt[ guest_cpu->sreg[SRegGS].sel.fields.index ];
|
|
guest_cpu->sreg[SRegGS].valid = 1;
|
|
}
|
|
|
|
/* Could use memcpy(); both are in order. Pack both structs. */
|
|
guest_cpu->edi = guest_stack_context->edi;
|
|
guest_cpu->esi = guest_stack_context->esi;
|
|
guest_cpu->ebp = guest_stack_context->ebp;
|
|
guest_cpu->esp = guest_stack_context->esp;
|
|
guest_cpu->ebx = guest_stack_context->ebx;
|
|
guest_cpu->edx = guest_stack_context->edx;
|
|
guest_cpu->ecx = guest_stack_context->ecx;
|
|
guest_cpu->eax = guest_stack_context->eax;
|
|
|
|
/* CS:EIP */
|
|
guest_cpu->eip = guest_stack_context->eip;
|
|
|
|
guest_cpu->eflags = guest_stack_context->eflags.raw;
|
|
/* vm->veflags.raw = 0; */ /* Virtualized EFLAGS - implement later. */
|
|
}
|
|
|
|
|
|
int
|
|
hostIoctlRegisterMem(vm_t *vm, plex86IoctlRegisterMem_t *registerMemMsg)
|
|
{
|
|
unsigned error;
|
|
|
|
/* Do not allow duplicate allocation. The file descriptor must be
|
|
* opened. The guest CPUID info can be filled in later.
|
|
*/
|
|
if ( (vm->vmState & ~VMStateGuestCPUID) != VMStateFDOpened )
|
|
return -Plex86ErrnoEBUSY;
|
|
|
|
if (vm->pages.guest_n_megs != 0)
|
|
return -Plex86ErrnoEBUSY;
|
|
|
|
/* Check that the amount of memory is reasonable. */
|
|
if ( (registerMemMsg->nMegs > PLEX86_MAX_PHY_MEGS) ||
|
|
(registerMemMsg->nMegs < 4) ||
|
|
(registerMemMsg->nMegs & 0x3) )
|
|
return -Plex86ErrnoEINVAL;
|
|
|
|
/* Check that the guest memory vector is page aligned. */
|
|
if ( registerMemMsg->guestPhyMemVector & 0xfff )
|
|
return -Plex86ErrnoEINVAL;
|
|
|
|
/* Check that the log buffer area is page aligned. */
|
|
if ( registerMemMsg->logBufferWindow & 0xfff )
|
|
return -Plex86ErrnoEINVAL;
|
|
|
|
/* Check that the guest CPU area is page aligned. */
|
|
if ( registerMemMsg->guestCPUWindow & 0xfff )
|
|
return -Plex86ErrnoEINVAL;
|
|
|
|
/* Check that none of the user areas overlap. In case we have a
|
|
* number of regions, use some generic code to handle N regions.
|
|
*/
|
|
{
|
|
#define NumUserRegions 3
|
|
struct {
|
|
Bit32u min, max;
|
|
} userRegion[NumUserRegions];
|
|
unsigned i,j;
|
|
|
|
userRegion[0].min = registerMemMsg->guestPhyMemVector;
|
|
userRegion[0].max = userRegion[0].min + (registerMemMsg->nMegs<<20) - 1;
|
|
userRegion[1].min = registerMemMsg->logBufferWindow;
|
|
userRegion[1].max = userRegion[1].min + LOG_BUFF_SIZE - 1;
|
|
userRegion[2].min = registerMemMsg->guestCPUWindow;
|
|
userRegion[2].max = userRegion[2].min + (4096) - 1;
|
|
|
|
for (i=1; i<NumUserRegions; i++) {
|
|
for (j=1; j<NumUserRegions; j++) {
|
|
if (j == i)
|
|
continue; /* Don't compare at the same region. */
|
|
/* Check for min(j) contained in region(i). */
|
|
if ( (userRegion[j].min >= userRegion[i].min) &&
|
|
(userRegion[j].min <= userRegion[i].max) )
|
|
return -Plex86ErrnoEINVAL;
|
|
/* Check for max(j) contained in region(i). */
|
|
if ( (userRegion[j].max >= userRegion[i].min) &&
|
|
(userRegion[j].max <= userRegion[i].max) )
|
|
return -Plex86ErrnoEINVAL;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/* Allocate memory */
|
|
if ( (error = hostAllocVmPages(vm, registerMemMsg)) != 0 ) {
|
|
hostOSPrint("plex86: allocVmPages failed at %u\n",
|
|
error);
|
|
return -Plex86ErrnoENOMEM;
|
|
}
|
|
|
|
/* Initialize the guests physical memory. */
|
|
if ( hostInitGuestPhyMem(vm) ) {
|
|
hostUnallocVmPages(vm);
|
|
return -Plex86ErrnoEFAULT;
|
|
}
|
|
|
|
/* Initialize the monitor. */
|
|
if ( !hostInitMonitor(vm) ||
|
|
!hostMapMonitor(vm) ) {
|
|
hostUnallocVmPages(vm);
|
|
return -Plex86ErrnoEFAULT;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
* Allocate various pages/memory needed by monitor.
|
|
*/
|
|
|
|
int
|
|
hostAllocVmPages(vm_t *vm, plex86IoctlRegisterMem_t *registerMemMsg)
|
|
{
|
|
vm_pages_t *pg = &vm->pages;
|
|
vm_addr_t *ad = &vm->host.addr;
|
|
#warning "Fix these shortcuts"
|
|
unsigned where = 1;
|
|
|
|
/* clear out allocated pages lists */
|
|
mon_memzero(pg, sizeof(*pg));
|
|
mon_memzero(ad, sizeof(*ad));
|
|
|
|
/* Guest physical memory pages */
|
|
pg->guest_n_megs = registerMemMsg->nMegs;
|
|
pg->guest_n_pages = registerMemMsg->nMegs * 256;
|
|
pg->guest_n_bytes = registerMemMsg->nMegs * 1024 * 1024;
|
|
if ( pg->guest_n_pages > MAX_MON_GUEST_PAGES) {
|
|
/* The size of the user-space allocated guest physical memory must
|
|
* fit within the maximum number of guest pages that the VM monitor
|
|
* supports.
|
|
*/
|
|
goto error;
|
|
}
|
|
where++;
|
|
|
|
vm->guestPhyMemAddr = registerMemMsg->guestPhyMemVector;
|
|
vm->vmState |= VMStateRegisteredPhyMem; /* Bogus for now. */
|
|
where++;
|
|
|
|
{
|
|
Bit32u hostPPI, kernelAddr;
|
|
|
|
/* Guest CPU state (malloc()'d in user space). */
|
|
if ( !hostOSGetAndPinUserPage(vm, registerMemMsg->guestCPUWindow,
|
|
&pg->guest_cpu_hostOSPtr, &hostPPI, &kernelAddr) ) {
|
|
goto error;
|
|
}
|
|
ad->guest_cpu = (guest_cpu_t *) kernelAddr;
|
|
pg->guest_cpu = hostPPI;
|
|
vm->vmState |= VMStateRegisteredGuestCPU; /* For now. */
|
|
where++;
|
|
|
|
/* Log buffer area (malloc()'d in user space). */
|
|
/* LOG_BUFF_PAGES */
|
|
if ( !hostOSGetAndPinUserPage(vm, registerMemMsg->logBufferWindow,
|
|
&pg->log_buffer_hostOSPtr[0], &hostPPI, &kernelAddr) ) {
|
|
goto error;
|
|
}
|
|
ad->log_buffer = (Bit8u *) kernelAddr;
|
|
pg->log_buffer[0] = hostPPI;
|
|
where++;
|
|
vm->vmState |= VMStateRegisteredPrintBuffer; /* For now. */
|
|
}
|
|
|
|
|
|
/* Monitor page directory */
|
|
if ( !(ad->page_dir = (pageEntry_t *) hostOSAllocZeroedPage()) ) {
|
|
goto error;
|
|
}
|
|
where++;
|
|
if (!(pg->page_dir = hostOSGetAllocedPagePhyPage(ad->page_dir))) {
|
|
goto error;
|
|
}
|
|
where++;
|
|
|
|
/* Monitor page tables */
|
|
if ( !(ad->page_tbl = hostOSAllocZeroedMem(4096 * MON_PAGE_TABLES)) ) {
|
|
goto error;
|
|
}
|
|
where++;
|
|
if (!hostOSGetAllocedMemPhyPages(pg->page_tbl, MON_PAGE_TABLES,
|
|
ad->page_tbl, 4096 * MON_PAGE_TABLES)) {
|
|
goto error;
|
|
}
|
|
where++;
|
|
|
|
/* Map of the linear addresses of page tables currently */
|
|
/* mapped into the monitor space. */
|
|
if ( !(ad->page_tbl_laddr_map = (unsigned *) hostOSAllocZeroedPage()) ) {
|
|
goto error;
|
|
}
|
|
where++;
|
|
if ( !(pg->page_tbl_laddr_map =
|
|
hostOSGetAllocedPagePhyPage(ad->page_tbl_laddr_map)) ) {
|
|
goto error;
|
|
}
|
|
where++;
|
|
|
|
/* Nexus page table */
|
|
if ( !(ad->nexus_page_tbl = (page_t *) hostOSAllocZeroedPage()) ) {
|
|
goto error;
|
|
}
|
|
where++;
|
|
if ( !(pg->nexus_page_tbl = hostOSGetAllocedPagePhyPage(ad->nexus_page_tbl)) ) {
|
|
goto error;
|
|
}
|
|
where++;
|
|
|
|
/* Transition page table */
|
|
if ( !(ad->transition_PT = (page_t *) hostOSAllocZeroedPage()) ) {
|
|
goto error;
|
|
}
|
|
where++;
|
|
if ( !(pg->transition_PT = hostOSGetAllocedPagePhyPage(ad->transition_PT)) ) {
|
|
goto error;
|
|
}
|
|
where++;
|
|
|
|
/* Nexus page */
|
|
if ( !(ad->nexus = (nexus_t *) hostOSAllocZeroedPage()) ) {
|
|
goto error;
|
|
}
|
|
where++;
|
|
if ( !(pg->nexus = hostOSGetAllocedPagePhyPage(ad->nexus)) ) {
|
|
goto error;
|
|
}
|
|
where++;
|
|
|
|
/* Monitor IDT */
|
|
if ( !(ad->idt = hostOSAllocZeroedMem(MON_IDT_PAGES*4096)) ) {
|
|
goto error;
|
|
}
|
|
where++;
|
|
if (!hostOSGetAllocedMemPhyPages(pg->idt, MON_IDT_PAGES, ad->idt, MON_IDT_SIZE)) {
|
|
goto error;
|
|
}
|
|
where++;
|
|
|
|
/* Monitor GDT */
|
|
if ( !(ad->gdt = hostOSAllocZeroedMem(MON_GDT_PAGES*4096)) ) {
|
|
goto error;
|
|
}
|
|
where++;
|
|
if (!hostOSGetAllocedMemPhyPages(pg->gdt, MON_GDT_PAGES, ad->gdt, MON_GDT_SIZE)) {
|
|
goto error;
|
|
}
|
|
where++;
|
|
|
|
/* Monitor LDT */
|
|
if ( !(ad->ldt = hostOSAllocZeroedMem(MON_LDT_PAGES*4096)) ) {
|
|
goto error;
|
|
}
|
|
where++;
|
|
if (!hostOSGetAllocedMemPhyPages(pg->ldt, MON_LDT_PAGES, ad->ldt, MON_LDT_SIZE)) {
|
|
goto error;
|
|
}
|
|
where++;
|
|
|
|
/* Monitor TSS */
|
|
if ( !(ad->tss = hostOSAllocZeroedMem(MON_TSS_PAGES*4096)) ) {
|
|
goto error;
|
|
}
|
|
where++;
|
|
if (!hostOSGetAllocedMemPhyPages(pg->tss, MON_TSS_PAGES, ad->tss, MON_TSS_SIZE)) {
|
|
goto error;
|
|
}
|
|
where++;
|
|
|
|
/* Monitor IDT stubs */
|
|
if ( !(ad->idt_stubs = hostOSAllocZeroedMem(MON_IDT_STUBS_PAGES*4096)) ) {
|
|
goto error;
|
|
}
|
|
where++;
|
|
if (!hostOSGetAllocedMemPhyPages(pg->idt_stubs, MON_IDT_STUBS_PAGES,
|
|
ad->idt_stubs, MON_IDT_STUBS_SIZE)) {
|
|
goto error;
|
|
}
|
|
where++;
|
|
|
|
/* Get the physical pages associated with the vm_t structure. */
|
|
if (!hostOSGetAllocedMemPhyPages(pg->vm, MAX_VM_STRUCT_PAGES, vm, sizeof(*vm))) {
|
|
goto error;
|
|
}
|
|
where++;
|
|
|
|
vm->vmState |= VMStateMemAllocated;
|
|
return 0; /* OK. */
|
|
|
|
error:
|
|
hostUnallocVmPages( vm );
|
|
return( where );
|
|
}
|
|
|
|
|
|
/* */
|
|
/* Unallocate pages/memory used by monitor */
|
|
/* */
|
|
|
|
void
|
|
hostUnallocVmPages( vm_t *vm )
|
|
{
|
|
vm_pages_t *pg = &vm->pages;
|
|
vm_addr_t *ad = &vm->host.addr;
|
|
|
|
/* Guest physical memory pages */
|
|
if (vm->guestPhyMemAddr) {
|
|
hostReleasePinnedUserPages(vm);
|
|
vm->guestPhyMemAddr = 0;
|
|
}
|
|
vm->vmState &= ~VMStateRegisteredPhyMem; /* Bogus for now. */
|
|
|
|
/* Monitor page directory */
|
|
if (ad->page_dir) hostOSFreePage(ad->page_dir);
|
|
|
|
/* Monitor page tables */
|
|
if (ad->page_tbl) hostOSFreeMem(ad->page_tbl);
|
|
|
|
/* Map of linear addresses of page tables mapped into monitor. */
|
|
if (ad->page_tbl_laddr_map) hostOSFreePage(ad->page_tbl_laddr_map);
|
|
|
|
/* Nexus page table */
|
|
if (ad->nexus_page_tbl) hostOSFreePage(ad->nexus_page_tbl);
|
|
|
|
/* Guest CPU state. */
|
|
if (ad->guest_cpu) hostOSFreePage(ad->guest_cpu);
|
|
|
|
/* Transition page table */
|
|
if (ad->transition_PT) hostOSFreePage(ad->transition_PT);
|
|
|
|
if (ad->log_buffer) hostOSFreeMem(ad->log_buffer);
|
|
|
|
/* Nexus page */
|
|
if (ad->nexus) hostOSFreePage(ad->nexus);
|
|
|
|
/* Monitor IDT */
|
|
if (ad->idt) hostOSFreeMem(ad->idt);
|
|
|
|
/* Monitor GDT */
|
|
if (ad->gdt) hostOSFreeMem(ad->gdt);
|
|
|
|
/* Monitor LDT */
|
|
if (ad->ldt) hostOSFreeMem(ad->ldt);
|
|
|
|
/* Monitor TSS */
|
|
if (ad->tss) hostOSFreeMem(ad->tss);
|
|
|
|
/* Monitor IDT stubs */
|
|
if (ad->idt_stubs) hostOSFreeMem(ad->idt_stubs);
|
|
|
|
|
|
/* clear out allocated pages lists */
|
|
mon_memzero(pg, sizeof(*pg));
|
|
mon_memzero(ad, sizeof(*ad));
|
|
}
|
|
|
|
unsigned
|
|
hostGetCpuCapabilities(void)
|
|
{
|
|
Bit32u eax, ebx, ecx, edx;
|
|
|
|
/* Get the highest allowed cpuid level */
|
|
asm volatile (
|
|
"xorl %%eax,%%eax\n\t"
|
|
"cpuid"
|
|
: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
|
|
:
|
|
: "cc"
|
|
);
|
|
if (eax < 1)
|
|
return(0); /* not enough capabilities */
|
|
|
|
/* Copy vendor string. */
|
|
hostCpuIDInfo.vendorDWord0 = ebx;
|
|
hostCpuIDInfo.vendorDWord1 = edx;
|
|
hostCpuIDInfo.vendorDWord2 = ecx;
|
|
|
|
/* CPUID w/ EAX==1: Processor Signature & Feature Flags */
|
|
asm volatile (
|
|
"movl $1,%%eax\n\t"
|
|
"cpuid"
|
|
: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
|
|
:
|
|
: "cc"
|
|
);
|
|
hostCpuIDInfo.procSignature.raw = eax;
|
|
hostCpuIDInfo.featureFlags.raw = edx;
|
|
/* Plex86 needs TSC */
|
|
if (hostCpuIDInfo.featureFlags.fields.tsc==0)
|
|
return(0);
|
|
|
|
return(1);
|
|
}
|
|
|
|
|
|
/* Map the monitor and guest into the VM. */
|
|
|
|
unsigned
|
|
hostMapMonitor(vm_t *vm)
|
|
{
|
|
selector_t monCsSel, monSsSel, monTssSel;
|
|
Bit32u laddr, base;
|
|
unsigned slot;
|
|
guest_context_t *guestContext;
|
|
nexus_t *nexus;
|
|
descriptor_t *gdt;
|
|
|
|
/* For convenience, some pointers. */
|
|
guestContext = vm->host.addr.guest_context;
|
|
nexus = vm->host.addr.nexus;
|
|
gdt = vm->host.addr.gdt;
|
|
|
|
#warning "Is the GDT being cleared of old values?"
|
|
/* +++ should zero out GDT, so prev entries do not remain */
|
|
|
|
/* =========================
|
|
* Map in Monitor structures
|
|
* =========================
|
|
*/
|
|
|
|
/* CS/SS/TSS selectors:
|
|
* For now, hardcode in monitor descriptors at slots 1,2,3. As we
|
|
* are only running user code in the VM, these are likely safe slots
|
|
* as they are often used guest OSes for kernel descriptors.
|
|
*/
|
|
monCsSel.raw = Selector(1, 0, RPL0);
|
|
monSsSel.raw = Selector(2, 0, RPL0);
|
|
monTssSel.raw = Selector(3, 0, RPL0);
|
|
|
|
/* Search for unused PDE for nexus PT (fixed for now) */
|
|
laddr = 0x70000000;
|
|
vm->mon_pde_mask = laddr & 0xffc00000;
|
|
vm->mon_pdi = laddr >> 22;
|
|
base = MON_BASE_FROM_LADDR(laddr);
|
|
|
|
/* Map nexus into monitor/guest address space */
|
|
vm->host.addr.page_dir[laddr >> 22] = vm->host.nexus_pde;
|
|
|
|
/* CS/SS/TSS descriptors: Put at fixed GDT location for now. */
|
|
SET_DESCRIPTOR(gdt[monCsSel.fields.index], base, 0xfffff,
|
|
D_PG, D_D32, D_AVL0, D_PRESENT, D_DPL0, D_CODE | D_READ)
|
|
SET_DESCRIPTOR(gdt[monSsSel.fields.index], base, 0xfffff,
|
|
D_PG, D_D32, D_AVL0, D_PRESENT, D_DPL0, D_DATA | D_WRITE)
|
|
SET_DESCRIPTOR(gdt[monTssSel.fields.index],
|
|
base + (Bit32u) vm->guest.addr.tss,
|
|
sizeof(tss_t)-1,
|
|
D_BG, 0, D_AVL0, D_PRESENT, D_DPL0, D_TSS)
|
|
|
|
|
|
/* Fix up the selectors of all IDT entries. */
|
|
for ( slot = 0; slot < 256; slot++ )
|
|
vm->host.addr.idt[slot].selector = monCsSel;
|
|
|
|
/* The monitor GDT/IDT loading info. */
|
|
nexus->mon_gdt_info.base = base + (Bit32u) vm->guest.addr.gdt;
|
|
nexus->mon_gdt_info.limit = MON_GDT_SIZE;
|
|
nexus->mon_idt_info.base = base + (Bit32u) vm->guest.addr.idt;
|
|
nexus->mon_idt_info.limit = MON_IDT_SIZE;
|
|
|
|
/* We don't have a monitor LDT for now. */
|
|
nexus->mon_ldt_sel = 0;
|
|
|
|
/* The monitor TSS. */
|
|
nexus->mon_tss_sel = monTssSel.raw;
|
|
vm->host.addr.tss->esp0 = ((Bit32u)vm->guest.addr.nexus) + PAGESIZE;
|
|
vm->host.addr.tss->ss0 = monSsSel.raw;
|
|
|
|
/* Monitor code and stack segments. */
|
|
nexus->mon_jmp_info.selector = monCsSel.raw;
|
|
nexus->mon_stack_info.selector = monSsSel.raw;
|
|
|
|
/* Monitor code/data segment base. */
|
|
nexus->mon_base = base;
|
|
|
|
vm->vmState |= VMStateMapMonitor;
|
|
return(1);
|
|
}
|
|
|
|
void
|
|
hostInitShadowPaging(vm_t *vm)
|
|
{
|
|
pageEntry_t *monPDir;
|
|
Bit32u pdi;
|
|
/*Bit32u cr3_page_index;*/
|
|
/*phy_page_usage_t *pusage;*/
|
|
|
|
#if 0
|
|
cr3_page_index = A20Addr(vm, vm->guest_cpu.cr3) >> 12;
|
|
if ( cr3_page_index >= vm->pages.guest_n_pages)
|
|
xxxpanic(vm, "monPagingRemap: CR3 conflicts with monitor space\n");
|
|
#endif
|
|
|
|
/* Reset page table heap */
|
|
vm->ptbl_laddr_map_i = 0;
|
|
|
|
/* Clear monitor PD except 4Meg range used by monitor */
|
|
monPDir = vm->host.addr.page_dir;
|
|
for (pdi=0; pdi<1024; pdi++) {
|
|
#if ANAL_CHECKS
|
|
vm->host.addr.page_tbl_laddr_map[pdi] = -1; /* max unsigned */
|
|
#endif
|
|
if (pdi != vm->mon_pdi)
|
|
monPDir[pdi].raw = 0;
|
|
}
|
|
|
|
/* Update vpaging timestamp. */
|
|
vm->vpaging_tsc = vm_rdtsc();
|
|
|
|
#if 0
|
|
/* When we remap the monitor page tables, IF guest paging is
|
|
* enabled, then mark the page containing the guest page directory
|
|
* as such. In non-paged mode, there is no page directory.
|
|
*/
|
|
if (vm->guest_cpu.cr0.fields.pg) {
|
|
pusage = &vm->pageInfo[cr3_page_index];
|
|
pusage->tsc = vm->vpaging_tsc;
|
|
pusage->attr.raw &= PageUsageSticky;
|
|
pusage->attr.raw |= PageUsagePDir;
|
|
pusage->attr.fields.access_perm = PagePermNA;
|
|
if (pusage->attr.raw & PageBadUsage4PDir)
|
|
xxxpanic(vm, "monPagingRemap: BadUsage4PDir\n");
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
void
|
|
hostReleasePinnedUserPages(vm_t *vm)
|
|
{
|
|
unsigned ppi;
|
|
unsigned dirty;
|
|
unsigned nPages;
|
|
Bit32u kernelAddr;
|
|
|
|
/* Unpin the pages associate with the guest physical memory. */
|
|
nPages = vm->pages.guest_n_pages;
|
|
for (ppi=0; ppi<nPages; ppi++) {
|
|
if ( vm->pageInfo[ppi].attr.fields.pinned ) {
|
|
void *osSpecificPtr;
|
|
|
|
osSpecificPtr = (void *) vm->hostStructPagePtr[ppi];
|
|
#warning "Conditionalize page dirtying before page release."
|
|
dirty = 1; /* FIXME: 1 for now. */
|
|
hostOSUnpinUserPage(vm,
|
|
vm->guestPhyMemAddr + (ppi<<12),
|
|
osSpecificPtr,
|
|
ppi,
|
|
0 /* There was no host kernel addr mapped for this page. */,
|
|
dirty);
|
|
vm->pageInfo[ppi].attr.fields.pinned = 0;
|
|
}
|
|
}
|
|
|
|
/* Unpin the pages associated with the guest_cpu area. */
|
|
kernelAddr = (Bit32u) vm->host.addr.guest_cpu;
|
|
hostOSUnpinUserPage(vm,
|
|
0, /* User space address. */
|
|
vm->pages.guest_cpu_hostOSPtr,
|
|
vm->pages.guest_cpu,
|
|
&kernelAddr,
|
|
1 /* Dirty. */);
|
|
|
|
/* Unpin the pages associated with the log buffer area. */
|
|
kernelAddr = (Bit32u) vm->host.addr.log_buffer;
|
|
hostOSUnpinUserPage(vm,
|
|
0, /* User space address. */
|
|
vm->pages.log_buffer_hostOSPtr[0],
|
|
vm->pages.log_buffer[0],
|
|
&kernelAddr,
|
|
1 /* Dirty. */);
|
|
#warning "User space address is passed as 0 for now..."
|
|
}
|
|
|
|
unsigned
|
|
hostHandlePagePinRequest(vm_t *vm, Bit32u reqGuestPPI)
|
|
{
|
|
Bit32u hostPPI;
|
|
unsigned qIndex;
|
|
|
|
#warning "We must not unpin open pages (for page walking) here."
|
|
if (vm->guestPhyPagePinQueue.nEntries < MaxPhyPagesPinned) {
|
|
/* There is room in the Q for another entry - we have not reached
|
|
* the upper limit of allowable number of pinned pages.
|
|
*/
|
|
qIndex = vm->guestPhyPagePinQueue.nEntries;
|
|
}
|
|
else {
|
|
unsigned dirty;
|
|
Bit32u unpinGuestPPI;
|
|
/* There is no room in the Q for another entry - we have reached
|
|
* the upper limit of allowable number of pinned pages. We must
|
|
* first unpin a page to free up the limit, then we can pin the
|
|
* requested page. This keeps plex86 from pinning an unconstrained
|
|
* number of pages at one time.
|
|
*/
|
|
qIndex = vm->guestPhyPagePinQueue.tail;
|
|
dirty = 1; /* FIXME: 1 for now. */
|
|
unpinGuestPPI = vm->guestPhyPagePinQueue.ppi[qIndex];
|
|
hostOSUnpinUserPage(vm,
|
|
vm->guestPhyMemAddr + (unpinGuestPPI<<12),
|
|
vm->hostStructPagePtr[unpinGuestPPI],
|
|
unpinGuestPPI,
|
|
0 /* There was no host kernel addr mapped for this page. */,
|
|
dirty);
|
|
vm->pageInfo[unpinGuestPPI].attr.fields.pinned = 0;
|
|
}
|
|
|
|
/* Pin the requested guest physical page in the host OS. */
|
|
if ( !hostOSGetAndPinUserPage(vm,
|
|
vm->guestPhyMemAddr + (reqGuestPPI<<12),
|
|
&vm->hostStructPagePtr[reqGuestPPI],
|
|
&hostPPI,
|
|
0 /* Don't need a host kernel address. */
|
|
) ) {
|
|
hostOSPrint("handlePagePinReq: request to pin failed.\n");
|
|
return(0); /* Fail. */
|
|
}
|
|
|
|
/* Pinning activities have succeeded. Mark this physical page as being
|
|
* pinnned, and store it's physical address.
|
|
*/
|
|
vm->pageInfo[reqGuestPPI].attr.fields.pinned = 1;
|
|
vm->pageInfo[reqGuestPPI].hostPPI = hostPPI;
|
|
|
|
/* Now add this entry to the Q. */
|
|
vm->guestPhyPagePinQueue.ppi[qIndex] = reqGuestPPI;
|
|
|
|
if (vm->guestPhyPagePinQueue.nEntries < MaxPhyPagesPinned) {
|
|
vm->guestPhyPagePinQueue.nEntries++;
|
|
vm->guestPhyPagePinQueue.tail =
|
|
vm->guestPhyPagePinQueue.nEntries % MaxPhyPagesPinned;
|
|
}
|
|
else {
|
|
/* Leave .nEntries at the maximum value - Q is full. */
|
|
vm->guestPhyPagePinQueue.tail =
|
|
(vm->guestPhyPagePinQueue.tail + 1) % MaxPhyPagesPinned;
|
|
}
|
|
|
|
return(1); /* OK. */
|
|
}
|