Got rid of use of mmap() altogether. User space (bochs) allocates

all the memory it needs, and the plex86 kernel module uses
  get_user_pages() from the Linux kernel to get at them and
  pin the few that are needed statically (and later up to
  a watermark of pages that are needed dynamically).
Guest physical memory pages are now dynamically pinned/unpinned.
  For now, I use a hard limit of 4Megs of pinned pages and
  a really primitive algorithm to decide which one to unpin
  when the limit is reached and one needs to be bumped.  Seems
  to work.  Though I haven't run into the limit yet since I'm using
  just a small test program.
This commit is contained in:
Kevin Lawton 2003-01-09 04:02:31 +00:00
parent c518ebe945
commit f45a747e59
7 changed files with 348 additions and 317 deletions

View File

@ -1,3 +1,8 @@
Main monitor loop should compare cycles burned thus far vs
cycles requested, so it doesn't keep bopping back and forth
between host-kernel and monitor spaces without returning to
user space.
Fix monPrint and friends.
monpanic breaks up into 2 monprints which hit user space twice.

View File

@ -338,7 +338,6 @@ plex86_release(struct inode *inode, struct file *filp)
filp->private_data = NULL;
/* Free the virtual memory. */
unreserveGuestPhyPages(vm);
unallocVmPages( vm );
/* Free the VM structure. */
@ -384,94 +383,11 @@ plex86_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vm
#endif
{
vm_t *vm = (vm_t *)file->private_data;
int firstpage, pagesN;
#if LINUX_VERSION_CODE >= VERSION_CODE(2,1,0)
void *inode = NULL; /* Not used; for consistency of passing args. */
#endif
int ret;
/* Private mappings make no sense ... */
if ( !(vma->vm_flags & VM_SHARED) ) {
printk(KERN_WARNING "plex86: private mapping\n");
return -EINVAL;
}
#if LINUX_VERSION_CODE < VERSION_CODE(2,3,25)
/* To simplify things, allow only page-aligned offsets */
if ( vma->vm_offset & (PAGE_SIZE - 1) ) {
printk(KERN_WARNING "plex86: unaligned offset %08lx\n", vma->vm_offset);
return -EINVAL;
}
#endif
#if LINUX_VERSION_CODE >= VERSION_CODE(2,3,25)
firstpage = vma->vm_pgoff;
#else
firstpage = vma->vm_offset >> PAGE_SHIFT;
#endif
pagesN = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
ret = genericMMap(vm, inode, file, vma, firstpage, pagesN);
return( - hostConvertPlex86Errno(ret) );
UNUSED(vm);
return -EINVAL;
}
int
hostMMap(vm_t *vm, void *iV, void *fV, void *vmaV,
unsigned pagesN, Bit32u *pagesArray)
{
#if LINUX_VERSION_CODE >= VERSION_CODE(2,1,0)
void *inode = NULL;
#else
struct inode * inode = (struct inode *) iV;
#endif
struct file * file = (struct file *) fV;
struct vm_area_struct * vma = (struct vm_area_struct *) vmaV;
unsigned i;
UNUSED(file);
/* Note: this function returns Plex86Errno style errors, since
* it reports to the hostOS-independent logic.
*/
/* Sanity check. */
#if LINUX_VERSION_CODE >= VERSION_CODE(2,3,25)
if ( ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) > pagesN ) {
printk(KERN_WARNING "plex86: mmap sanity checks failed.\n");
return Plex86ErrnoEINVAL;
}
#else
if ( (vma->vm_end - vma->vm_start) > (pagesN << PAGE_SHIFT) ) {
printk(KERN_WARNING "plex86: mmap sanity checks failed.\n");
return Plex86ErrnoEINVAL;
}
#endif
for (i = 0; i < pagesN; i++) {
if ( remap_page_range(vma->vm_start + (i << PAGE_SHIFT),
pagesArray[i] << 12,
PAGE_SIZE,
vma->vm_page_prot) )
/* xxx What about fixing partial remaps? */
return Plex86ErrnoEAGAIN;
}
#if LINUX_VERSION_CODE < VERSION_CODE(2,1,0)
/* Enter our inode into the VMA; no need to change the default ops. */
vma->vm_inode = inode;
if (!inode->i_count)
inode->i_count++;
#else
UNUSED(inode);
#endif
return 0; /* OK. */
}
/************************************************************************/
/* Status reporting: /proc code */
/************************************************************************/
@ -653,42 +569,6 @@ retrievePhyPages(Bit32u *page, int max_pages, void *addr_v, unsigned size)
* these functions needs to be offered for each host-XYZ.c file.
************************************************************************/
void
hostReservePhyPages(vm_t *vm, Bit32u *hostPhyPages, unsigned nPages)
{
unsigned p;
/*
* As we want to map these pages to user space, we need to mark
* them as 'reserved' pages by setting the PG_reserved bit.
*
* This has the effect that:
* - remap_page_range accepts them as candidates for remapping
* - the swapper does *not* try to swap these pages out, even
* after they are mapped to user space
*/
for (p = 0; p < nPages; p++)
#if LINUX_VERSION_CODE >= VERSION_CODE(2,4,0)
set_bit(PG_reserved, &((mem_map + hostPhyPages[p])->flags));
#else
mem_map_reserve(hostPhyPages[p]);
#endif
}
void
hostUnreservePhyPages(vm_t *vm, Bit32u *hostPhyPages, unsigned nPages)
{
unsigned p;
/* Remove the PG_reserved flags before returning the pages. */
for (p = 0; p < nPages; p++)
#if LINUX_VERSION_CODE >= VERSION_CODE(2,4,0)
clear_bit(PG_reserved, &((mem_map + hostPhyPages[p])->flags));
#else
mem_map_unreserve(hostPhyPages[p]);
#endif
}
unsigned
hostIdle(void)
@ -838,43 +718,51 @@ hostCopyToUser(void *to, void *from, unsigned long len)
return( copy_to_user(to, from, len) );
}
unsigned
hostGetAndPinUserPages(vm_t *vm, Bit32u *pagePhyAddrList, void *userPtr,
unsigned nPages)
Bit32u
hostGetAndPinUserPage(vm_t *vm, Bit32u userAddr, void **osSpecificPtr,
Bit32u *ppi, Bit32u *kernelAddr)
{
int ret;
unsigned p;
struct page **linuxKernelPageList;
struct page **pagePtr;
struct page *page;
linuxKernelPageList = (struct page **) vm->pages.hostStructPagePtr;
pagePtr = (struct page **) osSpecificPtr;
ret = get_user_pages(current,
current->mm,
(unsigned long) userPtr,
nPages,
(unsigned long) userAddr,
1, /* 1 page. */
1, /* 'write': intent to write. */
0, /* 'force': ? */
linuxKernelPageList,
pagePtr,
NULL /* struct vm_area_struct *[] */
);
if (ret != nPages) {
if (ret != 1) {
printk(KERN_ERR "plex86: hostGetAndPinUserPages: failed.\n");
return(0); /* Error. */
}
page = *pagePtr; /* The returned "struct page *" value. */
/* Now that we have a list of "struct page *", one for each physical
* page of memory of the user space process's requested area, we can
* calculate the physical page address by simple pointer arithmetic
* based on "mem_map".
*/
for (p=0; p<nPages; p++) {
pagePhyAddrList[p] = linuxKernelPageList[p] - mem_map;
*ppi = page - mem_map;
if (kernelAddr) {
/* Caller wants a kernel address returned which maps to this physical
* address.
*/
*kernelAddr = (Bit32u) kmap( page );
#warning "FIXME: Check return value here."
#warning "Also, conditionally compile for version and high memory support."
}
return(1); /* OK. */
}
void
hostReleasePinnedUserPages(vm_t *vm, Bit32u *pageAddrList, unsigned nPages)
hostUnpinUserPage(vm_t *vm, Bit32u userAddr, void *osSpecificPtr,
Bit32u ppi, Bit32u *kernelAddr, unsigned dirty)
{
#if 0
/* Here is some sample code from Linux 2.4.18, mm/memory.c:__free_pte() */
@ -887,14 +775,23 @@ hostReleasePinnedUserPages(vm_t *vm, Bit32u *pageAddrList, unsigned nPages)
#endif
struct page *page;
unsigned p;
static unsigned iteration = 0;
printk(KERN_WARNING "plex86: Release called %u.\n", iteration++);
for (p=0; p<nPages; p++) {
page = (struct page *) vm->pages.hostStructPagePtr[p];
if (1) /* If dirty. */
set_page_dirty(page);
put_page(page);
}
page = (struct page *) osSpecificPtr;
/* If a kernel address is passed, that means that previously we created
* a mapping for this physical page in the kernel address pace.
* We should unmap it. Only really useful for pages allocated from
* high memory.
*/
if (kernelAddr)
kunmap(page);
/* If the page was dirtied due to the guest running in the VM, we
* need to tell the kernel about that since it is not aware of
* the VM page tables.
*/
if (dirty)
set_page_dirty(page);
/* Release/unpin the page. */
put_page(page);
}

View File

@ -204,15 +204,6 @@ typedef struct {
/* pages comprising the vm_t struct itself. */
Bit32u vm[MAX_VM_STRUCT_PAGES];
/* for the guest OS/app code */
Bit32u guestPhyMem[MAX_MON_GUEST_PAGES];
/* This is a hack for now. I need to store the "struct page *"
* information returned by get_user_pages() in the Linux kernel.
* Should clean this up.
*/
void *hostStructPagePtr[MAX_MON_GUEST_PAGES];
/* for the monitor's page directory */
Bit32u page_dir;
@ -228,12 +219,14 @@ typedef struct {
/* For the CPU state passed between user and kernel/monitor space. */
Bit32u guest_cpu;
void *guest_cpu_hostOSPtr;
/* We need a Page Table for identity mapping the transition code */
/* between host and monitor spaces. */
Bit32u transition_PT;
Bit32u log_buffer[LOG_BUFF_PAGES];
void *log_buffer_hostOSPtr[LOG_BUFF_PAGES];
/* Physical addresses of host pages which comprise the actual */
/* monitor structures. These will be mapped into the current */
@ -284,12 +277,12 @@ typedef union {
Bit32u lmap_count:2; /* */
Bit32u ptbl:1; /* page table */
Bit32u pdir:1; /* page directory */
Bit32u vcode:1; /* vcode */
Bit32u spare0:1; /* (spare) */
Bit32u memMapIO:1; /* MemMapIO */
Bit32u RO:1; /* RO */
Bit32u allocated:1; /* Allocated */
Bit32u swappable:1; /* Swappable */
Bit32u spare:1; /* (spare) */
Bit32u pinned:1; /* Pinned by host OS. */
Bit32u spare1:1; /* (spare) */
Bit32u laddr_backlink:20; /* 1st unvirtualized laddr backlink */
} __attribute__ ((packed)) fields;
Bit32u raw;
@ -298,7 +291,9 @@ typedef union {
typedef struct {
phy_page_attr_t attr;
Bit64u tsc; /* for comparing to CR3 timestamp counter */
} __attribute__ ((packed)) phy_page_usage_t;
Bit32u hostPPI;
} __attribute__ ((packed)) phyPageInfo_t;
/* Possible values of the access_perm field above. */
#define PagePermRW 0
@ -375,7 +370,7 @@ typedef struct {
* Complete state of the VM (Virtual Machine).
*/
typedef struct {
Bit8u *guestPhyMemVector; /* Ptr to malloced memory from user space. */
Bit32u guestPhyMemAddr; /* Ptr to malloced memory from user space. */
/* Store eflags values of the guest which are virtualized to
* run in the monitor
@ -387,6 +382,7 @@ typedef struct {
unsigned mon_request;
unsigned guestFaultNo;
Bit32u pinReqPPI;
unsigned redirect_vector;
@ -430,7 +426,27 @@ typedef struct {
/* pages contains, and maintain some additional attributes. */
/* We determine which kinds of information reside in the page, */
/* dynamically. */
phy_page_usage_t page_usage[MAX_MON_GUEST_PAGES];
phyPageInfo_t pageInfo[MAX_MON_GUEST_PAGES];
/* This is a hack for now. I need to store the "struct page *"
* information returned by get_user_pages() in the Linux kernel.
* Should clean this up.
*/
void *hostStructPagePtr[MAX_MON_GUEST_PAGES];
/* A revolving queue, which stores information on guest physical memory
* pages which are currently pinned. Only a certain number of pages
* may be pinned at any one time. This is a really simplistic
* strategy - when the Q is full, the page which was pinned the
* longest time ago is unpinned to make room. It's a
* "least recently pinned" strategy.
*/
#define MaxPhyPagesPinned 1024 /* 4Megs of pinned pages max per VM. */
struct {
unsigned nEntries; /* Number of entries in table. */
unsigned tail;
Bit32u ppi[MaxPhyPagesPinned]; /* Physical Page Index of pinned guest page. */
} guestPhyPagePinQueue;
struct {
volatile unsigned event; /* Any log event occurred. */
@ -601,10 +617,8 @@ int ioctlGeneric(vm_t *vm, void *inode, void *filp,
int ioctlExecute(vm_t *vm, plex86IoctlExecute_t *executeMsg);
int ioctlRegisterMem(vm_t *vm, plex86IoctlRegisterMem_t *registerMsg);
void copyGuestStateToUserSpace(vm_t *vm);
void unreserveGuestPhyPages(vm_t *vm);
void reserveGuestPhyPages(vm_t *vm);
int genericMMap(vm_t *vm, void *inode, void *file, void *vma,
unsigned firstPage, unsigned pagesN);
void releasePinnedUserPages(vm_t *vm);
unsigned handlePagePinRequest(vm_t *vm, Bit32u reqPPI);
/* These are the functions that the host-OS-specific file of the
* plex86 device driver must define.
@ -616,21 +630,20 @@ void *hostAllocZeroedPage(void);
void hostFreePage(void *ptr);
unsigned hostGetAllocedMemPhyPages(Bit32u *page, int max_pages, void *ptr,
unsigned size);
unsigned hostGetAndPinUserPages(vm_t *vm, Bit32u *pageList, void *userPtr,
unsigned sizeInPages);
void hostReleasePinnedUserPages(vm_t *vm, Bit32u *pageList, unsigned nPages);
Bit32u hostGetAndPinUserPage(vm_t *vm, Bit32u userAddr, void **osSpecificPtr,
Bit32u *ppi, Bit32u *kernelAddr);
void hostUnpinUserPage(vm_t *vm, Bit32u userAddr, void *osSpecificPtr,
Bit32u ppi, Bit32u *kernelAddr, unsigned dirty);
Bit32u hostGetAllocedPagePhyPage(void *ptr);
void hostPrint(char *fmt, ...);
Bit32u hostKernelOffset(void);
void hostReservePhyPages(vm_t *vm, Bit32u *hostPhyPages, unsigned nPages);
void hostUnreservePhyPages(vm_t *vm, Bit32u *hostPhyPages, unsigned nPages);
int hostConvertPlex86Errno(unsigned ret);
unsigned hostMMapCheck(void *i, void *f);
void hostModuleCountReset(vm_t *vm, void *inode, void *filp);
unsigned long hostCopyFromUser(void *to, void *from, unsigned long len);
unsigned long hostCopyToUser(void *to, void *from, unsigned long len);
int hostMMap(vm_t *vm, void *iV, void *fV, void *vmaV,
unsigned pagesN, Bit32u *pagesArray);
#endif /* HOST Space */
@ -658,6 +671,7 @@ void monpanic(vm_t *, char *fmt, ...) __attribute__ ((noreturn));
void monpanic_nomess(vm_t *);
void toHostGuestFault(vm_t *, unsigned fault);
void toHostPinUserPage(vm_t *, Bit32u ppi);
void guestPageFault(vm_t *, guest_context_t *context, Bit32u cr2);
void *open_guest_phy_page(vm_t *, Bit32u ppage_index, Bit8u *mon_offset);
@ -673,7 +687,7 @@ unsigned mapGuestLinAddr(vm_t *, Bit32u guest_laddr,
Bit32u *guest_ppage_index, unsigned us,
unsigned rw, Bit32u attr, Bit32u *error);
unsigned addPageAttributes(vm_t *, Bit32u ppi, Bit32u attr);
phy_page_usage_t *getPageUsage(vm_t *, Bit32u ppage_index);
phyPageInfo_t *getPageUsage(vm_t *, Bit32u ppage_index);
void virtualize_lconstruct(vm_t *, Bit32u l0, Bit32u l1, unsigned perm);
unsigned getMonPTi(vm_t *, unsigned pdi, unsigned source);

View File

@ -150,6 +150,8 @@ initMonitor(vm_t *vm)
mon_memzero(vm->host.addr.tss, MON_TSS_PAGES*4096);
mon_memzero(vm->host.addr.idt_stubs, MON_IDT_STUBS_PAGES*4096);
vm->guestPhyPagePinQueue.nEntries = 0;
vm->guestPhyPagePinQueue.tail = 0;
/*
* ================
@ -522,14 +524,14 @@ error:
initGuestPhyMem(vm_t *vm)
{
unsigned i;
mon_memzero(vm->page_usage, sizeof(vm->page_usage));
mon_memzero(vm->pageInfo, sizeof(vm->pageInfo));
for (i=0; i<vm->pages.guest_n_pages; i++) {
/* For now, we start out by preallocating physical pages */
/* for the guest, though not necessarily mapped into linear */
/* space. */
vm->page_usage[i].attr.raw = 0;
vm->page_usage[i].tsc = 0;
vm->page_usage[i].attr.fields.allocated = 1;
vm->pageInfo[i].attr.raw = 0;
vm->pageInfo[i].tsc = 0;
vm->pageInfo[i].attr.fields.allocated = 1;
}
{
@ -540,13 +542,13 @@ initGuestPhyMem(vm_t *vm)
rom_page = 0xf0000 >> 12;
npages = (1 + 0xfffff - 0xf0000) / 4096;
for (i=0; i<npages; i++)
vm->page_usage[rom_page + i].attr.fields.RO = 1;
vm->pageInfo[rom_page + i].attr.fields.RO = 1;
/* Mark VGA BIOS ROM area as ReadOnly */
rom_page = 0xc0000 >> 12;
npages = (1 + 0xc7fff - 0xc0000) / 4096;
for (i=0; i<npages; i++)
vm->page_usage[rom_page + i].attr.fields.RO = 1;
vm->pageInfo[rom_page + i].attr.fields.RO = 1;
}
#if 1
@ -558,7 +560,7 @@ initGuestPhyMem(vm_t *vm)
vga_page = 0xa0000 >> 12;
npages = (1 + 0xbffff - 0xa0000) / 4096;
for (i=0; i<npages; i++)
vm->page_usage[vga_page + i].attr.fields.memMapIO = 1;
vm->pageInfo[vga_page + i].attr.fields.memMapIO = 1;
}
#endif
@ -734,7 +736,7 @@ ioctlGeneric(vm_t *vm, void *inode, void *filp,
*/
case PLEX86_TEARDOWN:
/* We can't use the VMStateMMapAll bits, because we don't hook
* mmap().
* munmap().
*/
if ( hostMMapCheck(inode, filp) ) {
@ -742,13 +744,8 @@ ioctlGeneric(vm_t *vm, void *inode, void *filp,
hostPrint("plex86: guest memory is still mmap()'d!\n");
return -Plex86ErrnoEBUSY;
}
/* Remove mmap()'d flag bits from state. The user must have done
* all the appropriate mmap() calls.
*/
vm->vmState &= ~VMStateMMapAll;
vm->vmState &= ~VMStateMMapAll;
#warning "Add check before calling unreserveGuestPhyPages()"
unreserveGuestPhyPages(vm);
unallocVmPages(vm);
/* Reset state to only FD opened. */
@ -1104,6 +1101,13 @@ ioctlExecute(vm_t *vm, plex86IoctlExecute_t *executeMsg)
retval = 106;
goto handlePanic;
case MonReqPinUserPage:
if ( !handlePagePinRequest(vm, vm->pinReqPPI) ) {
retval = 108;
goto handlePanic;
}
continue; /* Back to VM monitor. */
default:
hostPrint("ioctlExecute: default case (%u).\n", vm->mon_request);
retval = 107;
@ -1123,7 +1127,7 @@ ioctlExecute(vm_t *vm, plex86IoctlExecute_t *executeMsg)
}
/* Should not get here. */
retval = 108;
retval = 109;
goto handlePanic;
handleFail:
@ -1259,9 +1263,52 @@ ioctlRegisterMem(vm_t *vm, plex86IoctlRegisterMem_t *registerMemMsg)
return -Plex86ErrnoEINVAL;
/* Check that the guest memory vector is page aligned. */
if ( ((unsigned)registerMemMsg->vector) & 0xfff )
if ( registerMemMsg->guestPhyMemVector & 0xfff )
return -Plex86ErrnoEINVAL;
/* Check that the log buffer area is page aligned. */
if ( registerMemMsg->logBufferWindow & 0xfff )
return -Plex86ErrnoEINVAL;
/* Check that the guest CPU area is page aligned. */
if ( registerMemMsg->guestCPUWindow & 0xfff )
return -Plex86ErrnoEINVAL;
/* Check that none of the user areas overlap. In case we have a
* number of regions, use some generic code to handle N regions.
*/
{
#define NumUserRegions 3
struct {
Bit32u min, max;
} userRegion[NumUserRegions];
unsigned i,j;
userRegion[0].min = registerMemMsg->guestPhyMemVector;
userRegion[0].max = userRegion[0].min + (registerMemMsg->nMegs<<20) - 1;
userRegion[1].min = registerMemMsg->logBufferWindow;
userRegion[1].max = userRegion[1].min + LOG_BUFF_SIZE - 1;
userRegion[2].min = registerMemMsg->guestCPUWindow;
userRegion[2].max = userRegion[2].min + (4096) - 1;
for (i=1; i<NumUserRegions; i++) {
for (j=1; j<NumUserRegions; j++) {
if (j == i)
continue; /* Don't compare at the same region. */
/* Check for min(j) contained in region(i). */
if ( (userRegion[j].min >= userRegion[i].min) &&
(userRegion[j].min <= userRegion[i].max) )
return -Plex86ErrnoEINVAL;
/* Check for max(j) contained in region(i). */
if ( (userRegion[j].max >= userRegion[i].min) &&
(userRegion[j].max <= userRegion[i].max) )
return -Plex86ErrnoEINVAL;
}
}
}
/* Allocate memory */
if ( (error = allocVmPages(vm, registerMemMsg)) != 0 ) {
hostPrint("plex86: allocVmPages failed at %u\n",
@ -1269,12 +1316,8 @@ ioctlRegisterMem(vm_t *vm, plex86IoctlRegisterMem_t *registerMemMsg)
return -Plex86ErrnoENOMEM;
}
/* Mark guest pages as reserved (for mmap()). */
reserveGuestPhyPages(vm);
/* Initialize the guests physical memory. */
if ( initGuestPhyMem(vm) ) {
unreserveGuestPhyPages(vm);
unallocVmPages(vm);
return -Plex86ErrnoEFAULT;
}
@ -1282,7 +1325,6 @@ ioctlRegisterMem(vm_t *vm, plex86IoctlRegisterMem_t *registerMemMsg)
/* Initialize the monitor. */
if ( !initMonitor(vm) ||
!mapMonitor(vm) ) {
unreserveGuestPhyPages(vm);
unallocVmPages(vm);
return -Plex86ErrnoEFAULT;
}
@ -1319,14 +1361,38 @@ allocVmPages(vm_t *vm, plex86IoctlRegisterMem_t *registerMemMsg)
goto error;
}
where++;
if ( !hostGetAndPinUserPages(vm, pg->guestPhyMem, registerMemMsg->vector,
pg->guest_n_pages) ) {
goto error;
}
vm->guestPhyMemVector = registerMemMsg->vector;
vm->guestPhyMemAddr = registerMemMsg->guestPhyMemVector;
#warning "VMStateMMapPhyMem bogus"
vm->vmState |= VMStateMMapPhyMem; /* Bogus for now. */
where++;
{
Bit32u hostPPI, kernelAddr;
/* Guest CPU state (malloc()'d in user space). */
if ( !hostGetAndPinUserPage(vm, registerMemMsg->guestCPUWindow,
&pg->guest_cpu_hostOSPtr, &hostPPI, &kernelAddr) ) {
goto error;
}
ad->guest_cpu = (guest_cpu_t *) kernelAddr;
pg->guest_cpu = hostPPI;
vm->vmState |= VMStateMMapGuestCPU; /* For now. */
where++;
/* Log buffer area (malloc()'d in user space). */
/* LOG_BUFF_PAGES */
if ( !hostGetAndPinUserPage(vm, registerMemMsg->logBufferWindow,
&pg->log_buffer_hostOSPtr[0], &hostPPI, &kernelAddr) ) {
goto error;
}
ad->log_buffer = (Bit8u *) kernelAddr;
pg->log_buffer[0] = hostPPI;
where++;
vm->vmState |= VMStateMMapPrintBuffer; /* For now. */
}
/* Monitor page directory */
if ( !(ad->page_dir = (pageEntry_t *)hostAllocZeroedPage()) ) {
goto error;
@ -1370,16 +1436,6 @@ allocVmPages(vm_t *vm, plex86IoctlRegisterMem_t *registerMemMsg)
}
where++;
/* Guest CPU state (mapped into user space also). */
if ( !(ad->guest_cpu = (guest_cpu_t *)hostAllocZeroedPage()) ) {
goto error;
}
where++;
if ( !(pg->guest_cpu = hostGetAllocedPagePhyPage(ad->guest_cpu)) ) {
goto error;
}
where++;
/* Transition page table */
if ( !(ad->transition_PT = (page_t *)hostAllocZeroedPage()) ) {
goto error;
@ -1390,16 +1446,6 @@ allocVmPages(vm_t *vm, plex86IoctlRegisterMem_t *registerMemMsg)
}
where++;
if ( !(ad->log_buffer = hostAllocZeroedMem(4096 * LOG_BUFF_PAGES)) ) {
goto error;
}
where++;
if (!hostGetAllocedMemPhyPages(pg->log_buffer, LOG_BUFF_PAGES,
ad->log_buffer, 4096 * LOG_BUFF_PAGES)) {
goto error;
}
where++;
/* Nexus page */
if ( !(ad->nexus = (nexus_t *)hostAllocZeroedPage()) ) {
goto error;
@ -1487,9 +1533,9 @@ unallocVmPages( vm_t *vm )
vm_addr_t *ad = &vm->host.addr;
/* Guest physical memory pages */
if (vm->guestPhyMemVector) {
hostReleasePinnedUserPages(vm, pg->guestPhyMem, pg->guest_n_pages);
vm->guestPhyMemVector = 0;
if (vm->guestPhyMemAddr) {
releasePinnedUserPages(vm);
vm->guestPhyMemAddr = 0;
}
#warning "Fix bogus VMStateMMapPhyMem."
vm->vmState &= ~VMStateMMapPhyMem; /* Bogus for now. */
@ -1696,7 +1742,7 @@ initShadowPaging(vm_t *vm)
* as such. In non-paged mode, there is no page directory.
*/
if (vm->guest_cpu.cr0.fields.pg) {
pusage = &vm->page_usage[cr3_page_index];
pusage = &vm->pageInfo[cr3_page_index];
pusage->tsc = vm->vpaging_tsc;
pusage->attr.raw &= PageUsageSticky;
pusage->attr.raw |= PageUsagePDir;
@ -1707,85 +1753,118 @@ initShadowPaging(vm_t *vm)
#endif
}
void
reserveGuestPhyPages(vm_t *vm)
{
/* Mark guest pages as reserved (for mmap()). */
hostReservePhyPages(vm, vm->pages.log_buffer, LOG_BUFF_PAGES);
hostReservePhyPages(vm, &vm->pages.guest_cpu, 1);
}
void
unreserveGuestPhyPages(vm_t *vm)
releasePinnedUserPages(vm_t *vm)
{
hostUnreservePhyPages(vm, vm->pages.log_buffer, LOG_BUFF_PAGES);
hostUnreservePhyPages(vm, &vm->pages.guest_cpu, 1);
unsigned ppi;
unsigned dirty;
unsigned nPages;
Bit32u kernelAddr;
/* Unpin the pages associate with the guest physical memory. */
nPages = vm->pages.guest_n_pages;
for (ppi=0; ppi<nPages; ppi++) {
if ( vm->pageInfo[ppi].attr.fields.pinned ) {
void *osSpecificPtr;
osSpecificPtr = (void *) vm->hostStructPagePtr[ppi];
#warning "Conditionalize page dirtying before page release."
dirty = 1; /* FIXME: 1 for now. */
hostUnpinUserPage(vm,
vm->guestPhyMemAddr + (ppi<<12),
osSpecificPtr,
ppi,
0 /* There was no host kernel addr mapped for this page. */,
dirty);
vm->pageInfo[ppi].attr.fields.pinned = 0;
}
}
/* Unpin the pages associated with the guest_cpu area. */
kernelAddr = (Bit32u) vm->host.addr.guest_cpu;
hostUnpinUserPage(vm,
0, /* User space address. */
vm->pages.guest_cpu_hostOSPtr,
vm->pages.guest_cpu,
&kernelAddr,
1 /* Dirty. */);
/* Unpin the pages associated with the log buffer area. */
kernelAddr = (Bit32u) vm->host.addr.log_buffer;
hostUnpinUserPage(vm,
0, /* User space address. */
vm->pages.log_buffer_hostOSPtr[0],
vm->pages.log_buffer[0],
&kernelAddr,
1 /* Dirty. */);
#warning "User space address is passed as 0 for now..."
}
int
genericMMap(vm_t *vm, void *inode, void *file, void *vma, unsigned firstPage,
unsigned pagesN)
unsigned
handlePagePinRequest(vm_t *vm, Bit32u reqGuestPPI)
{
unsigned stateMask;
Bit32u *pagesArray;
int ret;
Bit32u hostPPI;
unsigned qIndex;
/* The memory map:
* guest physical memory (guest_n_pages)
* log_buffer (1)
* guest_cpu (1)
*/
/* Must have memory allocated. */
if (!vm->pages.guest_n_pages) {
hostPrint("plex86: genericMMap: device not initialized\n");
return Plex86ErrnoEACCES;
}
#if 0
if ( firstPage == 0 ) {
if (pagesN != vm->pages.guest_n_pages) {
hostPrint("plex86: mmap of guest phy mem, "
"pagesN of %u != guest_n_pages of %u\n",
pagesN, vm->pages.guest_n_pages);
return Plex86ErrnoEINVAL;
}
/* hostPrint("plex86: found mmap of guest phy memory.\n"); */
pagesArray = &vm->pages.guest[0];
stateMask = VMStateMMapPhyMem;
}
else
#endif
if ( firstPage == (vm->pages.guest_n_pages+0) ) {
if (pagesN != 1) {
hostPrint("plex86: mmap of log_buffer, pages>1.\n");
return Plex86ErrnoEINVAL;
}
/* hostPrint("plex86: found mmap of log_buffer.\n"); */
pagesArray = &vm->pages.log_buffer[0];
stateMask = VMStateMMapPrintBuffer;
}
else if ( firstPage == (vm->pages.guest_n_pages+1) ) {
if (pagesN != 1) {
hostPrint("plex86: mmap of guest_cpu, pages>1.\n");
return Plex86ErrnoEINVAL;
}
/* hostPrint("plex86: found mmap of guest_cpu.\n"); */
pagesArray = &vm->pages.guest_cpu;
stateMask = VMStateMMapGuestCPU;
#warning "We must not unpin open pages (for page walking) here."
if (vm->guestPhyPagePinQueue.nEntries < MaxPhyPagesPinned) {
/* There is room in the Q for another entry - we have not reached
* the upper limit of allowable number of pinned pages.
*/
qIndex = vm->guestPhyPagePinQueue.nEntries;
}
else {
hostPrint("plex86: mmap with firstPage of 0x%x.\n", firstPage);
return Plex86ErrnoEINVAL;
unsigned dirty;
Bit32u unpinGuestPPI;
/* There is no room in the Q for another entry - we have reached
* the upper limit of allowable number of pinned pages. We must
* first unpin a page to free up the limit, then we can pin the
* requested page. This keeps plex86 from pinning an unconstrained
* number of pages at one time.
*/
qIndex = vm->guestPhyPagePinQueue.tail;
dirty = 1; /* FIXME: 1 for now. */
unpinGuestPPI = vm->guestPhyPagePinQueue.ppi[qIndex];
hostUnpinUserPage(vm,
vm->guestPhyMemAddr + (unpinGuestPPI<<12),
vm->hostStructPagePtr[unpinGuestPPI],
unpinGuestPPI,
0 /* There was no host kernel addr mapped for this page. */,
dirty);
vm->pageInfo[unpinGuestPPI].attr.fields.pinned = 0;
}
/* Call the hostOS-specific mmap code. */
ret = hostMMap(vm, inode, file, vma, pagesN, pagesArray);
if (ret != 0) {
/* Host-specific mmap code returned an error. Return that. */
return( ret );
/* Pin the requested guest physical page in the host OS. */
if ( !hostGetAndPinUserPage(vm,
vm->guestPhyMemAddr + (reqGuestPPI<<12),
&vm->hostStructPagePtr[reqGuestPPI],
&hostPPI,
0 /* Don't need a host kernel address. */
) ) {
hostPrint("handlePagePinReq: request to pin failed.\n");
return(0); /* Fail. */
}
vm->vmState |= stateMask;
return 0; /* OK. */
/* Pinning activities have succeeded. Mark this physical page as being
* pinnned, and store it's physical address.
*/
vm->pageInfo[reqGuestPPI].attr.fields.pinned = 1;
vm->pageInfo[reqGuestPPI].hostPPI = hostPPI;
/* Now add this entry to the Q. */
vm->guestPhyPagePinQueue.ppi[qIndex] = reqGuestPPI;
if (vm->guestPhyPagePinQueue.nEntries < MaxPhyPagesPinned) {
vm->guestPhyPagePinQueue.nEntries++;
vm->guestPhyPagePinQueue.tail =
vm->guestPhyPagePinQueue.nEntries % MaxPhyPagesPinned;
}
else {
/* Leave .nEntries at the maximum value - Q is full. */
vm->guestPhyPagePinQueue.tail =
(vm->guestPhyPagePinQueue.tail + 1) % MaxPhyPagesPinned;
}
return(1); /* OK. */
}

View File

@ -27,27 +27,37 @@
void
sysFlushPrintBuf(vm_t *vm)
{
CLI();
vm->mon_request = MonReqFlushPrintBuf;
vm->guest.__mon2host();
STI();
CLI();
vm->mon_request = MonReqFlushPrintBuf;
vm->guest.__mon2host();
STI();
}
void
sysRemapMonitor(vm_t *vm)
{
CLI();
vm->mon_request = MonReqRemapMonitor;
vm->guest.__mon2host();
STI();
CLI();
vm->mon_request = MonReqRemapMonitor;
vm->guest.__mon2host();
STI();
}
void
toHostGuestFault(vm_t *vm, unsigned fault)
{
CLI();
vm->mon_request = MonReqGuestFault;
vm->guestFaultNo = fault;
vm->guest.__mon2host();
STI();
CLI();
vm->mon_request = MonReqGuestFault;
vm->guestFaultNo = fault;
vm->guest.__mon2host();
STI();
}
void
toHostPinUserPage(vm_t *vm, Bit32u ppi)
{
CLI();
vm->mon_request = MonReqPinUserPage;
vm->pinReqPPI = ppi;
vm->guest.__mon2host();
STI();
}

View File

@ -27,7 +27,7 @@
static unsigned allocatePT(vm_t *, unsigned pdi);
static unsigned strengthenPagePermissions(vm_t *, phy_page_usage_t *usage,
static unsigned strengthenPagePermissions(vm_t *, phyPageInfo_t *usage,
unsigned new_access_perm);
/*static void sanity_check_pdir(vm_t *vm, unsigned id, Bit32u guest_laddr); */
@ -69,8 +69,30 @@ page fault because of monP?E.RW==0, but guestP?E==1
/* +++ what about virtualized linear structs like GDT, IDT, ... */
#endif
#warning "Have to be careful unpinning a page which is open"
#warning " via open_guest_phy_page(). Multiple pages could be"
#warning " open in the page walk at one time until D/A bits are set."
static inline Bit32u
getHostOSPinnedPage(vm_t *vm, Bit32u ppi)
{
/* If physical page is already pinned by host OS, then we already
* know the physical address of the page.
*/
if (vm->pageInfo[ppi].attr.fields.pinned)
return( vm->pageInfo[ppi].hostPPI );
/* Page is not already pinned by the host OS. We need to request
* from the host OS, that this page is pinned and find the
* physical address.
*/
toHostPinUserPage(vm, ppi);
if ( !vm->pageInfo[ppi].attr.fields.pinned )
monpanic(vm, "getHostOSPinnedPage: page was not marked pinned.\n");
return( vm->pageInfo[ppi].hostPPI );
}
unsigned
allocatePT(vm_t *vm, unsigned pdi)
@ -121,7 +143,7 @@ getMonPTi(vm_t *vm, unsigned pdi, unsigned source)
* this first, before remapping with the new permissions.
*/
unsigned
strengthenPagePermissions(vm_t *vm, phy_page_usage_t *pusage,
strengthenPagePermissions(vm_t *vm, phyPageInfo_t *pusage,
unsigned new_access_perm)
{
pusage->attr.fields.access_perm = new_access_perm;
@ -184,12 +206,12 @@ monpanic(vm, "strengthenPP: multiple lin addr\n");
unsigned
addPageAttributes(vm_t *vm, Bit32u ppi, Bit32u req_attr)
{
phy_page_usage_t *pusage;
phyPageInfo_t *pusage;
unsigned new_access_perm;
VM_ASSERT(vm, ppi < vm->pages.guest_n_pages);
pusage = &vm->page_usage[ppi];
pusage = &vm->pageInfo[ppi];
if (pusage->tsc < vm->vpaging_tsc) {
/* The dynamic attributes for this page are not valid since
* the last remap. getPageUsage() has logic to build attributes.
@ -215,13 +237,13 @@ addPageAttributes(vm_t *vm, Bit32u ppi, Bit32u req_attr)
return 0;
}
phy_page_usage_t *
phyPageInfo_t *
getPageUsage(vm_t *vm, Bit32u ppi)
{
phy_page_usage_t *pusage;
phyPageInfo_t *pusage;
VM_ASSERT(vm, ppi < vm->pages.guest_n_pages);
pusage = &vm->page_usage[ppi];
pusage = &vm->pageInfo[ppi];
if (pusage->tsc < vm->vpaging_tsc) {
/* The dynamic attributes for this page are not valid since
@ -258,7 +280,7 @@ open_guest_phy_page(vm_t *vm, Bit32u ppi, Bit8u *mon_offset)
/* Remap the base field. All the rest of the fields are */
/* set previously, and can remain the same. */
pageTable->pte[pti].fields.base = vm->pages.guestPhyMem[ppi];
pageTable->pte[pti].fields.base = getHostOSPinnedPage(vm, ppi);
invlpg_mon_offset( (Bit32u) mon_offset );
return(mon_offset);
}
@ -341,7 +363,7 @@ mapGuestLinAddr(vm_t *vm, Bit32u guest_laddr, Bit32u *guest_ppi,
pageEntry_t *guestPDir, guestPDE, *guestPTbl, guestPTE;
Bit32u guest_pdir_page_index;
unsigned pt_index, us, rw;
phy_page_usage_t *pusage;
phyPageInfo_t *pusage;
unsigned wasRemap = 0;
guest_lpage_index = guest_laddr >> 12;
@ -471,7 +493,7 @@ mapIntoMonitor:
if (monPDE->fields.P == 0) {
/* OK, Lazy PT map/allocate */
if (vm->guest.addr.guest_cpu->cr0.fields.pg) {
phy_page_usage_t *pde_pusage;
phyPageInfo_t *pde_pusage;
pde_pusage =
getPageUsage(vm, A20PageIndex(vm, guestPDE.fields.base));
@ -620,7 +642,7 @@ return(MapLinEmulate);
/* Base/Avail=0/G=0/PS=0/D=d/A=a/PCD=0/PWT=0/US=1/RW=rw/P=1 */
monPTE->raw =
(vm->pages.guestPhyMem[*guest_ppi] << 12) | (guestPTE.raw & 0x60) |
(getHostOSPinnedPage(vm, *guest_ppi) << 12) | (guestPTE.raw & 0x60) |
0x5 | (rw<<1);
}
else { /* CR0.PG==0 */
@ -634,7 +656,7 @@ return(MapLinEmulate);
return(MapLinEmulate);
/* Base/Avail=0/G=0/PS=0/D=0/A=0/PCD=0/PWT=0/US=1/RW=rw/P=1 */
monPTE->raw =
(vm->pages.guestPhyMem[*guest_ppi] << 12) | 0x5 | (rw<<1);
(getHostOSPinnedPage(vm, *guest_ppi) << 12) | 0x5 | (rw<<1);
}
/* Mark physical page as having an unvirtualized linear address

View File

@ -1,5 +1,5 @@
/************************************************************************
* $Id: plex86.h,v 1.4 2003-01-08 17:19:57 kevinlawton Exp $
* $Id: plex86.h,v 1.5 2003-01-09 04:02:30 kevinlawton Exp $
************************************************************************
*
* plex86: run multiple x86 operating systems concurrently
@ -196,6 +196,7 @@ typedef struct {
#define MonReqRedirect 4 /* Only to host-kernel. */
#define MonReqRemapMonitor 5
#define MonReqGuestFault 6
#define MonReqPinUserPage 7
#define MonReqPanic 8
#define VMStateFDOpened 0x001
@ -237,7 +238,10 @@ typedef struct {
typedef struct {
unsigned nMegs;
Bit8u *vector;
Bit32u guestPhyMemVector;
Bit32u logBufferWindow;
Bit32u guestCPUWindow;
} plex86IoctlRegisterMem_t;
#endif /* #ifndef __PLEX86_H__ */