NetBSD/lib/librumpuser/rumpuser.c

675 lines
13 KiB
C

/* $NetBSD: rumpuser.c,v 1.58 2014/03/16 10:23:59 njoly Exp $ */
/*
* Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "rumpuser_port.h"
#if !defined(lint)
__RCSID("$NetBSD: rumpuser.c,v 1.58 2014/03/16 10:23:59 njoly Exp $");
#endif /* !lint */
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/uio.h>
#include <sys/stat.h>
#include <sys/time.h>
#ifdef __NetBSD__
#include <sys/disk.h>
#include <sys/disklabel.h>
#include <sys/dkio.h>
#endif
#if defined(__NetBSD__) || defined(__FreeBSD__) || \
defined(__DragonFly__) || defined(__APPLE__)
#define __BSD__
#endif
#if defined(__BSD__)
#include <sys/sysctl.h>
#endif
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <netdb.h>
#include <signal.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <rump/rumpuser.h>
#include "rumpuser_int.h"
struct rumpuser_hyperup rumpuser__hyp;
int
rumpuser_init(int version, const struct rumpuser_hyperup *hyp)
{
if (version != RUMPUSER_VERSION) {
fprintf(stderr, "rumpuser mismatch, kern: %d, hypervisor %d\n",
version, RUMPUSER_VERSION);
return 1;
}
#ifdef RUMPUSER_USE_DEVRANDOM
uint32_t rv;
int fd;
if ((fd = open("/dev/urandom", O_RDONLY)) == -1) {
srandom(time(NULL));
} else {
if (read(fd, &rv, sizeof(rv)) != sizeof(rv))
srandom(time(NULL));
else
srandom(rv);
close(fd);
}
#endif
rumpuser__thrinit();
rumpuser__hyp = *hyp;
return 0;
}
int
rumpuser_getfileinfo(const char *path, uint64_t *sizep, int *ftp)
{
struct stat sb;
uint64_t size = 0;
int needsdev = 0, rv = 0, ft = 0;
int fd = -1;
if (stat(path, &sb) == -1) {
rv = errno;
goto out;
}
switch (sb.st_mode & S_IFMT) {
case S_IFDIR:
ft = RUMPUSER_FT_DIR;
break;
case S_IFREG:
ft = RUMPUSER_FT_REG;
break;
case S_IFBLK:
ft = RUMPUSER_FT_BLK;
needsdev = 1;
break;
case S_IFCHR:
ft = RUMPUSER_FT_CHR;
needsdev = 1;
break;
default:
ft = RUMPUSER_FT_OTHER;
break;
}
if (!needsdev) {
size = sb.st_size;
} else if (sizep) {
/*
* Welcome to the jungle. Of course querying the kernel
* for a device partition size is supposed to be far from
* trivial. On NetBSD we use ioctl. On $other platform
* we have a problem. We try "the lseek trick" and just
* fail if that fails. Platform specific code can later
* be written here if appropriate.
*
* On NetBSD we hope and pray that for block devices nobody
* else is holding them open, because otherwise the kernel
* will not permit us to open it. Thankfully, this is
* usually called only in bootstrap and then we can
* forget about it.
*/
#ifndef __NetBSD__
off_t off;
fd = open(path, O_RDONLY);
if (fd == -1) {
rv = errno;
goto out;
}
off = lseek(fd, 0, SEEK_END);
if (off != 0) {
size = off;
goto out;
}
fprintf(stderr, "error: device size query not implemented on "
"this platform\n");
rv = EOPNOTSUPP;
goto out;
#else
struct disklabel lab;
struct partition *parta;
struct dkwedge_info dkw;
fd = open(path, O_RDONLY);
if (fd == -1) {
rv = errno;
goto out;
}
if (ioctl(fd, DIOCGDINFO, &lab) == 0) {
parta = &lab.d_partitions[DISKPART(sb.st_rdev)];
size = (uint64_t)lab.d_secsize * parta->p_size;
goto out;
}
if (ioctl(fd, DIOCGWEDGEINFO, &dkw) == 0) {
/*
* XXX: should use DIOCGDISKINFO to query
* sector size, but that requires proplib,
* so just don't bother for now. it's nice
* that something as difficult as figuring out
* a partition's size has been made so easy.
*/
size = dkw.dkw_size << DEV_BSHIFT;
goto out;
}
rv = errno;
#endif /* __NetBSD__ */
}
out:
if (rv == 0 && sizep)
*sizep = size;
if (rv == 0 && ftp)
*ftp = ft;
if (fd != -1)
close(fd);
ET(rv);
}
int
rumpuser_malloc(size_t howmuch, int alignment, void **memp)
{
void *mem = NULL;
int rv;
if (alignment == 0)
alignment = sizeof(void *);
rv = posix_memalign(&mem, (size_t)alignment, howmuch);
if (__predict_false(rv != 0)) {
if (rv == EINVAL) {
printf("rumpuser_malloc: invalid alignment %d\n",
alignment);
abort();
}
}
*memp = mem;
ET(rv);
}
/*ARGSUSED1*/
void
rumpuser_free(void *ptr, size_t size)
{
free(ptr);
}
int
rumpuser_anonmmap(void *prefaddr, size_t size, int alignbit,
int exec, void **memp)
{
void *mem;
int prot, rv;
#ifndef MAP_ALIGNED
#define MAP_ALIGNED(a) 0
if (alignbit)
fprintf(stderr, "rumpuser_anonmmap: warning, requested "
"alignment not supported by hypervisor\n");
#endif
prot = PROT_READ|PROT_WRITE;
if (exec)
prot |= PROT_EXEC;
mem = mmap(prefaddr, size, prot,
MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(alignbit), -1, 0);
if (mem == MAP_FAILED) {
rv = errno;
} else {
*memp = mem;
rv = 0;
}
ET(rv);
}
void
rumpuser_unmap(void *addr, size_t len)
{
munmap(addr, len);
}
int
rumpuser_open(const char *path, int ruflags, int *fdp)
{
int fd, flags, rv;
switch (ruflags & RUMPUSER_OPEN_ACCMODE) {
case RUMPUSER_OPEN_RDONLY:
flags = O_RDONLY;
break;
case RUMPUSER_OPEN_WRONLY:
flags = O_WRONLY;
break;
case RUMPUSER_OPEN_RDWR:
flags = O_RDWR;
break;
default:
rv = EINVAL;
goto out;
}
#define TESTSET(_ru_, _h_) if (ruflags & _ru_) flags |= _h_;
TESTSET(RUMPUSER_OPEN_CREATE, O_CREAT);
TESTSET(RUMPUSER_OPEN_EXCL, O_EXCL);
#undef TESTSET
KLOCK_WRAP(fd = open(path, flags, 0644));
if (fd == -1) {
rv = errno;
} else {
*fdp = fd;
rv = 0;
}
out:
ET(rv);
}
int
rumpuser_close(int fd)
{
int nlocks;
rumpkern_unsched(&nlocks, NULL);
fsync(fd);
close(fd);
rumpkern_sched(nlocks, NULL);
ET(0);
}
/*
* Assume "struct rumpuser_iovec" and "struct iovec" are the same.
* If you encounter POSIX platforms where they aren't, add some
* translation for iovlen > 1.
*/
int
rumpuser_iovread(int fd, struct rumpuser_iovec *ruiov, size_t iovlen,
int64_t roff, size_t *retp)
{
struct iovec *iov = (struct iovec *)ruiov;
off_t off = (off_t)roff;
ssize_t nn;
int rv;
if (off == RUMPUSER_IOV_NOSEEK) {
KLOCK_WRAP(nn = readv(fd, iov, iovlen));
} else {
int nlocks;
rumpkern_unsched(&nlocks, NULL);
if (lseek(fd, off, SEEK_SET) == off) {
nn = readv(fd, iov, iovlen);
} else {
nn = -1;
}
rumpkern_sched(nlocks, NULL);
}
if (nn == -1) {
rv = errno;
} else {
*retp = (size_t)nn;
rv = 0;
}
ET(rv);
}
int
rumpuser_iovwrite(int fd, const struct rumpuser_iovec *ruiov, size_t iovlen,
int64_t roff, size_t *retp)
{
const struct iovec *iov = (const struct iovec *)ruiov;
off_t off = (off_t)roff;
ssize_t nn;
int rv;
if (off == RUMPUSER_IOV_NOSEEK) {
KLOCK_WRAP(nn = writev(fd, iov, iovlen));
} else {
int nlocks;
rumpkern_unsched(&nlocks, NULL);
if (lseek(fd, off, SEEK_SET) == off) {
nn = writev(fd, iov, iovlen);
} else {
nn = -1;
}
rumpkern_sched(nlocks, NULL);
}
if (nn == -1) {
rv = errno;
} else {
*retp = (size_t)nn;
rv = 0;
}
ET(rv);
}
int
rumpuser_syncfd(int fd, int flags, uint64_t start, uint64_t len)
{
int rv = 0;
/*
* For now, assume fd is regular file and does not care
* about read syncing
*/
if ((flags & RUMPUSER_SYNCFD_BOTH) == 0) {
rv = EINVAL;
goto out;
}
if ((flags & RUMPUSER_SYNCFD_WRITE) == 0) {
rv = 0;
goto out;
}
#ifdef __NetBSD__
{
int fsflags = FDATASYNC;
if (fsflags & RUMPUSER_SYNCFD_SYNC)
fsflags |= FDISKSYNC;
if (fsync_range(fd, fsflags, start, len) == -1)
rv = errno;
}
#else
/* el-simplo */
if (fsync(fd) == -1)
rv = errno;
#endif
out:
ET(rv);
}
int
rumpuser_clock_gettime(int enum_rumpclock, int64_t *sec, long *nsec)
{
enum rumpclock rclk = enum_rumpclock;
struct timespec ts;
clockid_t clk;
int rv;
switch (rclk) {
case RUMPUSER_CLOCK_RELWALL:
clk = CLOCK_REALTIME;
break;
case RUMPUSER_CLOCK_ABSMONO:
#ifdef HAVE_CLOCK_NANOSLEEP
clk = CLOCK_MONOTONIC;
#else
clk = CLOCK_REALTIME;
#endif
break;
default:
abort();
}
if (clock_gettime(clk, &ts) == -1) {
rv = errno;
} else {
*sec = ts.tv_sec;
*nsec = ts.tv_nsec;
rv = 0;
}
ET(rv);
}
int
rumpuser_clock_sleep(int enum_rumpclock, int64_t sec, long nsec)
{
enum rumpclock rclk = enum_rumpclock;
struct timespec rqt, rmt;
int nlocks;
int rv;
rumpkern_unsched(&nlocks, NULL);
/*LINTED*/
rqt.tv_sec = sec;
/*LINTED*/
rqt.tv_nsec = nsec;
switch (rclk) {
case RUMPUSER_CLOCK_RELWALL:
do {
rv = nanosleep(&rqt, &rmt);
rqt = rmt;
} while (rv == -1 && errno == EINTR);
if (rv == -1) {
rv = errno;
}
break;
case RUMPUSER_CLOCK_ABSMONO:
do {
#ifdef HAVE_CLOCK_NANOSLEEP
rv = clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME,
&rqt, NULL);
#else
/* le/la/der/die/das sigh. timevalspec tailspin */
struct timespec ts, tsr;
clock_gettime(CLOCK_REALTIME, &ts);
if (ts.tv_sec == rqt.tv_sec ?
ts.tv_nsec > rqt.tv_nsec : ts.tv_sec > rqt.tv_sec) {
rv = 0;
} else {
tsr.tv_sec = rqt.tv_sec - ts.tv_sec;
tsr.tv_nsec = rqt.tv_nsec - ts.tv_nsec;
if (tsr.tv_nsec < 0) {
tsr.tv_sec--;
tsr.tv_nsec += 1000*1000*1000;
}
rv = nanosleep(&tsr, NULL);
}
#endif
} while (rv == -1 && errno == EINTR);
if (rv == -1) {
rv = errno;
}
break;
default:
abort();
}
rumpkern_sched(nlocks, NULL);
ET(rv);
}
static int
gethostncpu(void)
{
int ncpu = 1;
#if defined(__BSD__)
size_t sz = sizeof(ncpu);
sysctlbyname("hw.ncpu", &ncpu, &sz, NULL, 0);
#elif defined(__linux__) || defined(__CYGWIN__)
FILE *fp;
char *line = NULL;
size_t n = 0;
/* If anyone knows a better way, I'm all ears */
if ((fp = fopen("/proc/cpuinfo", "r")) != NULL) {
ncpu = 0;
while (getline(&line, &n, fp) != -1) {
if (strncmp(line,
"processor", sizeof("processor")-1) == 0)
ncpu++;
}
if (ncpu == 0)
ncpu = 1;
free(line);
fclose(fp);
}
#elif __sun__
/* XXX: this is just a rough estimate ... */
ncpu = sysconf(_SC_NPROCESSORS_ONLN);
#endif
return ncpu;
}
int
rumpuser_getparam(const char *name, void *buf, size_t blen)
{
int rv;
if (strcmp(name, RUMPUSER_PARAM_NCPU) == 0) {
int ncpu;
if (getenv_r("RUMP_NCPU", buf, blen) == -1) {
sprintf(buf, "2"); /* default */
} else if (strcmp(buf, "host") == 0) {
ncpu = gethostncpu();
snprintf(buf, blen, "%d", ncpu);
}
rv = 0;
} else if (strcmp(name, RUMPUSER_PARAM_HOSTNAME) == 0) {
char tmp[MAXHOSTNAMELEN];
if (gethostname(tmp, sizeof(tmp)) == -1) {
snprintf(buf, blen, "rump-%05d", (int)getpid());
} else {
snprintf(buf, blen, "rump-%05d.%s",
(int)getpid(), tmp);
}
rv = 0;
} else if (*name == '_') {
rv = EINVAL;
} else {
if (getenv_r(name, buf, blen) == -1)
rv = errno;
else
rv = 0;
}
ET(rv);
}
void
rumpuser_putchar(int c)
{
putchar(c);
}
__dead void
rumpuser_exit(int rv)
{
if (rv == RUMPUSER_PANIC)
abort();
else
exit(rv);
}
void
rumpuser_seterrno(int error)
{
errno = error;
}
/*
* This is meant for safe debugging prints from the kernel.
*/
void
rumpuser_dprintf(const char *format, ...)
{
va_list ap;
va_start(ap, format);
vfprintf(stderr, format, ap);
va_end(ap);
}
int
rumpuser_kill(int64_t pid, int rumpsig)
{
int sig;
sig = rumpuser__sig_rump2host(rumpsig);
if (sig > 0)
raise(sig);
return 0;
}
int
rumpuser_getrandom(void *buf, size_t buflen, int flags, size_t *retp)
{
size_t origlen = buflen;
uint32_t *p = buf;
uint32_t tmp;
int chunk;
do {
chunk = buflen < 4 ? buflen : 4; /* portable MIN ... */
tmp = RUMPUSER_RANDOM();
memcpy(p, &tmp, chunk);
p++;
buflen -= chunk;
} while (chunk);
*retp = origlen;
ET(0);
}