Bochs/bochs/plex86/kernel/host-linux.c
Kevin Lawton 87d648682e Code cleanup. Moved the interrupt redirection instrumentation
into the host-specific files and wrapped access to them
  with atomic operations since that's a structure global to
  all the VMs.  I think all the other globals are SMP clean
  since they are only written once during module init time,
  and read thereafter my all VMs.
Renamed all host OS specific functions to hostOS*().  All host
  independent functions to host*().
I'd like to rename all monitor space functions to mon*() next.
2003-01-10 04:27:51 +00:00

801 lines
21 KiB
C

/*
* plex86: run multiple x86 operating systems concurrently
* Copyright (C) 1999-2003 Kevin P. Lawton
*
* host-linux.c: Linux specific VM host driver functionality
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "plex86.h"
#define IN_HOST_SPACE
#include "monitor.h"
#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/proc_fs.h>
#include <linux/wrapper.h>
#include <linux/version.h>
#include <asm/irq.h>
#include <asm/atomic.h>
#ifndef VERSION_CODE
# define VERSION_CODE(vers,rel,seq) ( ((vers)<<16) | ((rel)<<8) | (seq) )
#endif
#if LINUX_VERSION_CODE < VERSION_CODE(2,4,20)
/* I use get_user_pages() to find and pin physical pages of memory
* underlying the guest physical memory malloc()'d from user space.
* This became an exported symbol available for kernel modules
* as of 2.4.20. You will have to recode some functions for
* lesser kernels.
*/
# error "Currently, you need Linux kernel 2.4.20 or above."
#endif
#if LINUX_VERSION_CODE >= VERSION_CODE(2,1,0)
# include <asm/uaccess.h>
#endif
#include <asm/io.h>
/************************************************************************/
/* Compatibility macros & convenience functions for older kernels */
/************************************************************************/
#ifndef EXPORT_NO_SYMBOLS
# define EXPORT_NO_SYMBOLS register_symtab(NULL)
#endif
#if LINUX_VERSION_CODE >= VERSION_CODE(2,1,29)
# define proc_register_dynamic proc_register
#endif
#if LINUX_VERSION_CODE < VERSION_CODE(2,2,0)
#define NEED_RESCHED need_resched
#else
#define NEED_RESCHED current->need_resched
#endif
/* Instrumentation of how many hardware interrupts were redirected
* to the host, while the VM monitor/guest was running. This can be
* written to by multiple contexts, so it needs SMP protection.
*/
static atomic_t interruptRedirCount[256];
#if LINUX_VERSION_CODE < VERSION_CODE(2,1,0)
static inline unsigned long
copy_from_user(void *to, const void *from, unsigned long n)
{
int i;
if ( (i = verify_area(VERIFY_READ, from, n)) != 0 )
return i;
memcpy_fromfs(to, from, n);
return 0;
}
static inline unsigned long
copy_to_user(void *to, const void *from, unsigned long n)
{
int i;
if ( (i = verify_area(VERIFY_WRITE, to, n)) != 0 )
return i;
memcpy_tofs(to, from, n);
return 0;
}
#endif
#if LINUX_VERSION_CODE >= VERSION_CODE(2,1,18) && !defined(THIS_MODULE)
/* Starting with version 2.1.18, the __this_module symbol is present,
* but the THIS_MODULE #define was introduced much later ...
*/
#define THIS_MODULE (&__this_module)
#endif
/************************************************************************/
/* Declarations */
/************************************************************************/
/* Use dynamic major number allocation. (Set non-zero for static allocation) */
#define PLEX86_MAJOR 0
static int plex_major = PLEX86_MAJOR;
#if LINUX_VERSION_CODE >= VERSION_CODE(2,1,18)
MODULE_PARM(plex_major, "i");
MODULE_PARM_DESC(plex_major, "major number (default " __MODULE_STRING(PLEX86_MAJOR) ")");
#endif
/* The kernel segment base. */
#if LINUX_VERSION_CODE < VERSION_CODE(2,1,0)
# define KERNEL_OFFSET 0xc0000000
#else
# define KERNEL_OFFSET 0x00000000
#endif
/* File operations. */
static int plex86_ioctl(struct inode *, struct file *, unsigned int,
unsigned long);
static int plex86_open(struct inode *, struct file *);
#if LINUX_VERSION_CODE >= VERSION_CODE(2,1,31)
static int plex86_release(struct inode *, struct file *);
#else
static void plex86_release(struct inode *, struct file *);
#endif
#if LINUX_VERSION_CODE >= VERSION_CODE(2,1,0)
static int plex86_mmap(struct file * file, struct vm_area_struct * vma);
#else
static int plex86_mmap(struct inode * inode, struct file * file,
struct vm_area_struct * vma);
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,9)
/* New License scheme. */
#ifdef MODULE_LICENSE
MODULE_LICENSE("GPL"); /* Close enough. Keeps kernel from complaining. */
#endif
#endif
/************************************************************************/
/* Structures / Variables */
/************************************************************************/
static int retrieveKernelModulePages(void);
static unsigned retrievePhyPages(Bit32u *page, int max_pages, void *addr,
unsigned size);
static struct file_operations plex86_fops = {
#if LINUX_VERSION_CODE >= VERSION_CODE(2,4,0)
owner: THIS_MODULE,
#endif
mmap: plex86_mmap,
ioctl: plex86_ioctl,
open: plex86_open,
release: plex86_release,
};
#ifdef CONFIG_DEVFS_FS
#include <linux/devfs_fs_kernel.h>
devfs_handle_t my_devfs_entry;
#endif
/* For the /proc/driver/plex86 entry. */
#if LINUX_VERSION_CODE >= VERSION_CODE(2,4,0) /* XXX - How far back? */
int plex86_read_procmem(char *, char **, off_t, int);
#else
int plex86_read_procmem(char *, char **, off_t, int, int);
#endif
#if LINUX_VERSION_CODE < VERSION_CODE(2,3,25)
static struct proc_dir_entry plex86_proc_entry = {
0, /* dynamic inode */
6, "driver/plex86", /* len, name */
S_IFREG | S_IRUGO, /* mode */
1, 0, 0,
0,
NULL,
&plex86_read_procmem, /* read function */
};
#endif
#if CONFIG_X86_PAE
# error "CONFIG_X86_PAE defined for this kernel, but unhandled in plex86"
#endif
/************************************************************************/
/* Main kernel module code */
/************************************************************************/
int
init_module(void)
{
int err;
/* Initialize structures which are not specific to each VM. These
* are things which are set only once upon kernel module initialization.
*/
memset(&kernelModulePages, 0, sizeof(kernelModulePages));
memset(&interruptRedirCount, 0, sizeof(interruptRedirCount));
/* Register the device with the kernel. */
err = register_chrdev(plex_major, "plex86", &plex86_fops);
if (err < 0) {
printk(KERN_WARNING "plex86: can't get major %d\n", plex_major);
return(err);
}
/* If this was a dynamic allocation, save the major for
* the release code
*/
if(!plex_major)
plex_major = err;
/* Register the /proc entry. */
#ifdef CONFIG_PROC_FS
#if LINUX_VERSION_CODE >= VERSION_CODE(2,3,25)
if (!create_proc_info_entry("driver/plex86", 0, NULL, plex86_read_procmem))
printk(KERN_ERR "plex86: registering /proc/driver/plex86 failed\n");
#else
proc_register_dynamic(&proc_root, &plex86_proc_entry);
#endif
#endif
/* Register /dev/misc/plex86 with devfs. */
#ifdef CONFIG_DEVFS_FS
my_devfs_entry = devfs_register(NULL, "misc/plex86",
DEVFS_FL_DEFAULT,
plex_major, 0 /* minor mode*/,
S_IFCHR | 0666, &plex86_fops,
NULL /* "info" */);
if (!my_devfs_entry)
printk(KERN_ERR "plex86: registering misc/plex86 devfs entry failed\n");
#endif
/* Retrieve the monitor physical pages. */
if ( !retrieveKernelModulePages() ) {
printk(KERN_ERR "plex86: retrieveKernelModulePages returned error\n");
err = -EINVAL;
goto fail_retrieve_pages;
}
/* Kernel independent code to be run when kernel module is loaded. */
if ( !hostModuleInit() ) {
printk(KERN_ERR "plex86: genericModuleInit returned error\n");
err = -EINVAL;
goto fail_cpu_capabilities;
}
/* Success. */
EXPORT_NO_SYMBOLS;
return(0);
fail_cpu_capabilities:
fail_retrieve_pages:
/* Unregister /proc entry. */
#ifdef CONFIG_PROC_FS
#if LINUX_VERSION_CODE >= VERSION_CODE(2,3,25)
remove_proc_entry("driver/plex86", NULL);
#else
proc_unregister(&proc_root, plex86_proc_entry.low_ino);
#endif
#endif
/* Unregister device. */
unregister_chrdev(plex_major, "plex86");
return err;
}
void
cleanup_module(void)
{
/* Unregister device. */
unregister_chrdev(plex_major, "plex86");
/* Unregister /proc entry. */
#ifdef CONFIG_PROC_FS
#if LINUX_VERSION_CODE >= VERSION_CODE(2,3,25)
remove_proc_entry("driver/plex86", NULL);
#else
proc_unregister(&proc_root, plex86_proc_entry.low_ino);
#endif
#endif
#ifdef CONFIG_DEVFS_FS
devfs_unregister(my_devfs_entry);
#endif
}
/************************************************************************/
/* Open / Release a VM */
/************************************************************************/
int
plex86_open(struct inode *inode, struct file *filp)
{
vm_t *vm;
#if LINUX_VERSION_CODE < VERSION_CODE(2,4,0)
MOD_INC_USE_COUNT;
#endif
/* Allocate a VM structure. */
if ( (vm = hostOSAllocZeroedMem(sizeof(vm_t))) == NULL )
return -ENOMEM;
filp->private_data = vm;
/* Kernel independent device open code. */
hostDeviceOpen(vm);
return(0);
}
#if LINUX_VERSION_CODE >= VERSION_CODE(2,1,31)
int
#else
void
#endif
plex86_release(struct inode *inode, struct file *filp)
{
vm_t *vm = (vm_t *)filp->private_data;
filp->private_data = NULL;
/* Free the virtual memory. */
hostUnallocVmPages( vm );
/* Free the VM structure. */
memset( vm, 0, sizeof(*vm) );
vfree( vm );
#if LINUX_VERSION_CODE < VERSION_CODE(2,4,0)
MOD_DEC_USE_COUNT;
#endif
#if LINUX_VERSION_CODE >= VERSION_CODE(2,1,31)
return(0);
#endif
}
int
plex86_ioctl(struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg)
{
vm_t *vm = (vm_t *)filp->private_data;
int ret;
/* Call non host-specific ioctl() code which calls back to this
* module only when it needs host-specific features.
*/
ret = hostIoctlGeneric(vm, inode, filp, cmd, arg);
/* Convert from plex86 errno codes to host-specific errno codes. Not
* very exciting.
*/
if ( ret < 0 )
ret = - hostOSConvertPlex86Errno(- ret);
return( ret );
}
int
#if LINUX_VERSION_CODE >= VERSION_CODE(2,1,0)
plex86_mmap(struct file * file, struct vm_area_struct * vma)
#else
plex86_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma)
#endif
{
vm_t *vm = (vm_t *)file->private_data;
UNUSED(vm);
return -EINVAL;
}
/************************************************************************/
/* Status reporting: /proc code */
/************************************************************************/
int
plex86_read_procmem(char *buf, char **start, off_t offset,
#if LINUX_VERSION_CODE >= VERSION_CODE(2,4,0)
int len
#else
int len, int unused
#endif
)
{
unsigned i;
len = 0;
len += sprintf(buf, "monitor-->host interrupt reflection counts\n");
for (i=0; i<256; i++) {
int count;
count = atomic_read( &interruptRedirCount[i] );
if (count)
len += sprintf(buf+len, " 0x%2x:%10u\n", i, count);
}
return(len);
}
int
retrieveKernelModulePages(void)
{
/*
* Retrieve start address and size of this module.
*
* Note that with old kernels, we cannot access the module info (size),
* hence we rely on the fact that Linux lets at least one page of
* virtual address space unused after the end of the module.
*/
#ifdef THIS_MODULE
Bit32u driverStartAddr = (Bit32u) THIS_MODULE;
unsigned size = THIS_MODULE->size;
#else
Bit32u driverStartAddr = (Bit32u) &mod_use_count_;
unsigned size = 0; /* Actual size determined below */
#endif
Bit32u driverStartAddrPageAligned = driverStartAddr & ~0xfff;
int nPages;
if (driverStartAddr != driverStartAddrPageAligned) {
/* Pretend this kernel module starts at the beginning of the page. */
/* If size is known, we have to add the extra offset from the beginning
* of the page.
*/
if (size)
size += (driverStartAddr & 0xfff);
}
nPages = retrievePhyPages(kernelModulePages.ppi, Plex86MaxKernelModulePages,
(void *) driverStartAddrPageAligned, size);
if (nPages == 0) {
printk(KERN_ERR "plex86: retrieveKernelModulePages: retrieve returned error.\n");
return( 0 ); /* Error. */
}
printk(KERN_WARNING "plex86: %u monitor pages located\n", nPages);
kernelModulePages.startOffset = driverStartAddr;
kernelModulePages.startOffsetPageAligned = driverStartAddrPageAligned;
kernelModulePages.nPages = nPages;
return( 1 ); /* OK. */
}
unsigned
retrievePhyPages(Bit32u *page, int max_pages, void *addr_v, unsigned size)
{
/*
* Grrr. There doesn't seem to be an exported mechanism to retrieve
* the physical pages underlying a vmalloc()'ed area. We do it the
* hard way ...
*/
pageEntry_t *host_pgd;
Bit32u host_cr3;
Bit32u addr; // start_addr;
unsigned n_pages;
int i;
addr = (Bit32u) addr_v;
if ( addr & 0xfff ) {
printk(KERN_ERR "plex86: retrievePhyPages: not page aligned!\n");
return 0;
}
if (!addr) {
printk(KERN_ERR "plex86: retrievePhyPages: addr NULL!\n");
return 0;
}
if (size == 0) {
/* Size unknown. Determine by cycling through page tables until
* we find one which is not present. We will assume that means
* the end of the data structure. Set the number of pages to
* cycle through, to one more than the maximum requested. This
* way we'll look through enough pages.
*/
n_pages = max_pages + 1;
}
else {
n_pages = BytesToPages(size);
if ( n_pages > max_pages ) {
printk(KERN_ERR "plex86: retrievePhyPages: n=%u > max=%u\n",
n_pages, max_pages);
return 0;
}
}
asm volatile ("movl %%cr3, %0" : "=r" (host_cr3));
host_pgd = (pageEntry_t *)(phys_to_virt(host_cr3 & ~0xfff));
for (i = 0; i < n_pages; i++) {
Bit32u laddr;
unsigned long lpage;
pgd_t *pgdPtr; pmd_t *pmdPtr; pte_t *ptePtr;
pgd_t pgdVal; pmd_t pmdVal; pte_t pteVal;
laddr = KERNEL_OFFSET + ((Bit32u) addr);
lpage = VMALLOC_VMADDR(laddr);
/* About to traverse the page tables. We need to lock others
* out of them briefly. Newer Linux versions can do a fine-grained
* lock on the page tables themselves. Older ones have to do
* a "big kernel lock".
*/
#if LINUX_VERSION_CODE >= VERSION_CODE(2,3,10)
spin_lock(&init_mm.page_table_lock);
#else
lock_kernel(); /* Big kernel lock. */
#endif
pgdPtr = pgd_offset(&init_mm, lpage);
pmdPtr = pmd_offset(pgdPtr, lpage);
ptePtr = pte_offset(pmdPtr, lpage);
pgdVal = *pgdPtr;
pmdVal = *pmdPtr;
pteVal = *ptePtr;
#if LINUX_VERSION_CODE >= VERSION_CODE(2,3,10)
spin_unlock(&init_mm.page_table_lock);
#else
unlock_kernel(); /* Big kernel unlock. */
#endif
if ( !(pgdVal.pgd & 1) ||
!(pmdVal.pmd & 1) ||
!(pteVal.pte_low & 1) ) {
if (size == 0)
return i; /* Report number of pages until area ended. */
printk(KERN_ERR "plex86: retrievePhyPages: "
"PDE.P==0: i=%u, n=%u laddr=0x%x\n", i, n_pages, laddr);
return 0; /* Error, ran into unmapped page in memory range. */
}
/* Abort if our page list is too small. */
if (i >= max_pages) {
printk(KERN_WARNING "plex86: page list is too small!\n");
printk(KERN_WARNING "plex86: n_pages=%u, max_pages=%u\n",
n_pages, max_pages);
return 0;
}
/* Get physical page address for this virtual page address. */
page[i] = pte_val(pteVal) >> 12;
/* Increment to the next virtual page address. */
addr += 4096;
}
return(n_pages);
}
/************************************************************************
* The requisite host-specific functions. An implementation of each of
* these functions needs to be offered for each host-XYZ.c file.
************************************************************************/
unsigned
hostOSIdle(void)
{
if (NEED_RESCHED)
schedule();
/* return !current_got_fatal_signal(); */
return( ! signal_pending(current) );
}
void *
hostOSAllocZeroedMem(unsigned long size)
{
void *ptr;
ptr = vmalloc(size);
if ( ((Bit32u) ptr) & 0x00000fff )
return( 0 ); /* Error. */
/* Zero pages. This also demand maps the pages in, which we need
* since we'll cycle through all the pages to get the physical
* address mappings.
*/
mon_memzero(ptr, size);
return( ptr );
}
void
hostOSFreeMem(void *ptr)
{
vfree(ptr);
}
void *
hostOSAllocZeroedPage(void)
{
return( (void *) get_zeroed_page(GFP_KERNEL) );
}
void
hostOSFreePage(void *ptr)
{
free_page( (Bit32u)ptr );
}
unsigned
hostOSGetAllocedMemPhyPages(Bit32u *page, int max_pages, void *ptr, unsigned size)
{
return( retrievePhyPages(page, max_pages, ptr, size) );
}
Bit32u
hostOSGetAllocedPagePhyPage(void *ptr)
{
if (!ptr) return 0;
/* return MAP_NR(ptr); */
return(__pa(ptr) >> PAGE_SHIFT);
}
void
hostOSPrint(char *fmt, ...)
{
#warning "Fix hostPrint"
#if 0
va_list args;
int ret;
unsigned char buffer[256];
va_start(args, fmt);
ret = mon_vsnprintf(buffer, 256, fmt, args);
if (ret == -1) {
printk(KERN_ERR "plex86: hostPrint: vsnprintf returns error.\n");
}
else {
printk(KERN_WARNING "plex86: %s\n", buffer);
}
#endif
}
int
hostOSConvertPlex86Errno(unsigned ret)
{
switch (ret) {
case 0: return(0);
case Plex86ErrnoEBUSY: return(EBUSY);
case Plex86ErrnoENOMEM: return(ENOMEM);
case Plex86ErrnoEFAULT: return(EFAULT);
case Plex86ErrnoEINVAL: return(EINVAL);
case Plex86ErrnoEACCES: return(EACCES);
case Plex86ErrnoEAGAIN: return(EAGAIN);
default:
printk(KERN_ERR "plex86: ioctlAllocVPhys: case %u\n", ret);
return(EINVAL);
}
}
Bit32u
hostOSKernelOffset(void)
{
return( KERNEL_OFFSET );
}
void
hostOSModuleCountReset(vm_t *vm, void *inode, void *filp)
{
#if LINUX_VERSION_CODE < VERSION_CODE(2,4,0)
while (MOD_IN_USE) {
MOD_DEC_USE_COUNT;
}
MOD_INC_USE_COUNT; /* bump back to 1 so release can decrement */
#endif
}
unsigned long
hostOSCopyFromUser(void *to, void *from, unsigned long len)
{
return( copy_from_user(to, from, len) );
}
unsigned long
hostOSCopyToUser(void *to, void *from, unsigned long len)
{
return( copy_to_user(to, from, len) );
}
Bit32u
hostOSGetAndPinUserPage(vm_t *vm, Bit32u userAddr, void **osSpecificPtr,
Bit32u *ppi, Bit32u *kernelAddr)
{
int ret;
struct page **pagePtr;
struct page *page;
pagePtr = (struct page **) osSpecificPtr;
ret = get_user_pages(current,
current->mm,
(unsigned long) userAddr,
1, /* 1 page. */
1, /* 'write': intent to write. */
0, /* 'force': ? */
pagePtr,
NULL /* struct vm_area_struct *[] */
);
if (ret != 1) {
printk(KERN_ERR "plex86: hostGetAndPinUserPages: failed.\n");
return(0); /* Error. */
}
page = *pagePtr; /* The returned "struct page *" value. */
/* Now that we have a list of "struct page *", one for each physical
* page of memory of the user space process's requested area, we can
* calculate the physical page address by simple pointer arithmetic
* based on "mem_map".
*/
*ppi = page - mem_map;
if (kernelAddr) {
/* Caller wants a kernel address returned which maps to this physical
* address.
*/
*kernelAddr = (Bit32u) kmap( page );
#warning "FIXME: Check return value here."
#warning "Also, conditionally compile for version and high memory support."
}
return(1); /* OK. */
}
void
hostOSUnpinUserPage(vm_t *vm, Bit32u userAddr, void *osSpecificPtr,
Bit32u ppi, Bit32u *kernelAddr, unsigned dirty)
{
#if 0
/* Here is some sample code from Linux 2.4.18, mm/memory.c:__free_pte() */
struct page *page = pte_page(pte);
if ((!VALID_PAGE(page)) || PageReserved(page))
return;
if (pte_dirty(pte))
set_page_dirty(page);
free_page_and_swap_cache(page);
#endif
struct page *page;
page = (struct page *) osSpecificPtr;
/* If a kernel address is passed, that means that previously we created
* a mapping for this physical page in the kernel address pace.
* We should unmap it. Only really useful for pages allocated from
* high memory.
*/
if (kernelAddr)
kunmap(page);
/* If the page was dirtied due to the guest running in the VM, we
* need to tell the kernel about that since it is not aware of
* the VM page tables.
*/
if (dirty)
set_page_dirty(page);
/* Release/unpin the page. */
put_page(page);
}
void
hostOSInstrumentIntRedirCount(unsigned interruptVector)
{
atomic_inc( &interruptRedirCount[interruptVector] );
}