NetBSD/lib/librumpuser/rumpuser_sp.c

717 lines
14 KiB
C
Raw Normal View History

/* $NetBSD: rumpuser_sp.c,v 1.13 2010/11/24 17:00:10 pooka Exp $ */
/*
* Copyright (c) 2010 Antti Kantee. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Sysproxy routines. This provides system RPC support over host sockets.
* The most notable limitation is that the client and server must share
* the same ABI. This does not mean that they have to be the same
* machine or that they need to run the same version of the host OS,
* just that they must agree on the data structures. This even *might*
* work correctly from one hardware architecture to another.
*
* Not finished yet, i.e. don't use in production. Lacks locking plus
* handling of multiple clients and unexpected connection closes.
*/
#include <sys/cdefs.h>
__RCSID("$NetBSD: rumpuser_sp.c,v 1.13 2010/11/24 17:00:10 pooka Exp $");
#include <sys/types.h>
#include <sys/atomic.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <poll.h>
#include <pthread.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <rump/rumpuser.h>
#include "rumpuser_int.h"
#include "sp_common.c"
#define MAXCLI 4
static struct pollfd pfdlist[MAXCLI];
static struct spclient spclist[MAXCLI];
static unsigned int disco;
static struct rumpuser_sp_ops spops;
/*
* Manual wrappers, since librump does not have access to the
* user namespace wrapped interfaces.
*/
static void
lwproc_switch(struct lwp *l)
{
spops.spop_schedule();
spops.spop_lwproc_switch(l);
spops.spop_unschedule();
}
static void
lwproc_release(void)
{
spops.spop_schedule();
spops.spop_lwproc_release();
spops.spop_unschedule();
}
static int
lwproc_newproc(struct spclient *spc)
{
int rv;
spops.spop_schedule();
rv = spops.spop_lwproc_newproc(spc);
spops.spop_unschedule();
return rv;
}
static int
lwproc_newlwp(pid_t pid)
{
int rv;
spops.spop_schedule();
rv = spops.spop_lwproc_newlwp(pid);
spops.spop_unschedule();
return rv;
}
static struct lwp *
lwproc_curlwp(void)
{
struct lwp *l;
spops.spop_schedule();
l = spops.spop_lwproc_curlwp();
spops.spop_unschedule();
return l;
}
static pid_t
lwproc_getpid(void)
{
pid_t p;
spops.spop_schedule();
p = spops.spop_getpid();
spops.spop_unschedule();
return p;
}
static int
rumpsyscall(int sysnum, void *data, register_t *retval)
{
int rv;
spops.spop_schedule();
rv = spops.spop_syscall(sysnum, data, retval);
spops.spop_unschedule();
return rv;
}
static uint64_t
nextreq(struct spclient *spc)
{
uint64_t nw;
pthread_mutex_lock(&spc->spc_mtx);
nw = spc->spc_nextreq++;
pthread_mutex_unlock(&spc->spc_mtx);
return nw;
}
static int
send_syscall_resp(struct spclient *spc, uint64_t reqno, int error,
register_t *retval)
{
struct rsp_hdr rhdr;
struct rsp_sysresp sysresp;
int rv;
rhdr.rsp_len = sizeof(rhdr) + sizeof(sysresp);
rhdr.rsp_reqno = reqno;
rhdr.rsp_class = RUMPSP_RESP;
rhdr.rsp_type = RUMPSP_SYSCALL;
rhdr.rsp_sysnum = 0;
sysresp.rsys_error = error;
memcpy(sysresp.rsys_retval, retval, sizeof(sysresp.rsys_retval));
sendlock(spc);
rv = dosend(spc, &rhdr, sizeof(rhdr));
rv = dosend(spc, &sysresp, sizeof(sysresp));
sendunlock(spc);
return rv;
}
static int
copyin_req(struct spclient *spc, const void *remaddr, size_t dlen, void **resp)
{
struct rsp_hdr rhdr;
struct rsp_copydata copydata;
struct respwait rw;
int rv;
DPRINTF(("copyin_req: %zu bytes from %p\n", dlen, remaddr));
rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata);
rhdr.rsp_class = RUMPSP_REQ;
rhdr.rsp_type = RUMPSP_COPYIN;
rhdr.rsp_sysnum = 0;
copydata.rcp_addr = __UNCONST(remaddr);
copydata.rcp_len = dlen;
putwait(spc, &rw, &rhdr);
rv = dosend(spc, &rhdr, sizeof(rhdr));
rv = dosend(spc, &copydata, sizeof(copydata));
if (rv) {
unputwait(spc, &rw);
return rv;
}
rv = waitresp(spc, &rw);
DPRINTF(("copyin: response %d\n", rv));
*resp = rw.rw_data;
return rv;
}
static int
send_copyout_req(struct spclient *spc, const void *remaddr,
const void *data, size_t dlen)
{
struct rsp_hdr rhdr;
struct rsp_copydata copydata;
int rv;
DPRINTF(("copyout_req (async): %zu bytes to %p\n", dlen, remaddr));
rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata) + dlen;
rhdr.rsp_reqno = nextreq(spc);
rhdr.rsp_class = RUMPSP_REQ;
rhdr.rsp_type = RUMPSP_COPYOUT;
rhdr.rsp_sysnum = 0;
copydata.rcp_addr = __UNCONST(remaddr);
copydata.rcp_len = dlen;
sendlock(spc);
rv = dosend(spc, &rhdr, sizeof(rhdr));
rv = dosend(spc, &copydata, sizeof(copydata));
rv = dosend(spc, data, dlen);
sendunlock(spc);
return rv;
}
static int
anonmmap_req(struct spclient *spc, size_t howmuch, void **resp)
{
struct rsp_hdr rhdr;
struct respwait rw;
int rv;
DPRINTF(("anonmmap_req: %zu bytes\n", howmuch));
rhdr.rsp_len = sizeof(rhdr) + sizeof(howmuch);
rhdr.rsp_class = RUMPSP_REQ;
rhdr.rsp_type = RUMPSP_ANONMMAP;
rhdr.rsp_sysnum = 0;
putwait(spc, &rw, &rhdr);
rv = dosend(spc, &rhdr, sizeof(rhdr));
rv = dosend(spc, &howmuch, sizeof(howmuch));
if (rv) {
unputwait(spc, &rw);
return rv;
}
rv = waitresp(spc, &rw);
*resp = rw.rw_data;
DPRINTF(("anonmmap: mapped at %p\n", **(void ***)resp));
return rv;
}
static void
spcref(struct spclient *spc)
{
pthread_mutex_lock(&spc->spc_mtx);
spc->spc_refcnt++;
pthread_mutex_unlock(&spc->spc_mtx);
}
static void
spcrelease(struct spclient *spc)
{
int ref;
pthread_mutex_lock(&spc->spc_mtx);
ref = --spc->spc_refcnt;
pthread_mutex_unlock(&spc->spc_mtx);
if (ref > 0)
return;
2010-11-24 18:17:46 +03:00
DPRINTF(("spcrelease: spc %p fd %d\n", spc, spc->spc_fd));
_DIAGASSERT(TAILQ_EMPTY(&spc->spc_respwait));
_DIAGASSERT(spc->spc_buf == NULL);
lwproc_switch(spc->spc_mainlwp);
lwproc_release();
spc->spc_mainlwp = NULL;
close(spc->spc_fd);
spc->spc_fd = -1;
2010-11-24 18:17:46 +03:00
spc->spc_dying = 0;
atomic_inc_uint(&disco);
}
static void
serv_handledisco(unsigned int idx)
{
struct spclient *spc = &spclist[idx];
DPRINTF(("rump_sp: disconnecting [%u]\n", idx));
2010-11-24 18:17:46 +03:00
pfdlist[idx].fd = -1;
pfdlist[idx].revents = 0;
pthread_mutex_lock(&spc->spc_mtx);
spc->spc_dying = 1;
kickall(spc);
pthread_mutex_unlock(&spc->spc_mtx);
spcrelease(spc);
}
static unsigned
serv_handleconn(int fd, connecthook_fn connhook, int busy)
{
struct sockaddr_storage ss;
socklen_t sl = sizeof(ss);
int newfd, flags;
unsigned i;
/*LINTED: cast ok */
newfd = accept(fd, (struct sockaddr *)&ss, &sl);
if (newfd == -1)
return 0;
if (busy) {
close(newfd); /* EBUSY */
return 0;
}
/* XXX: should do some sort of handshake too */
flags = fcntl(newfd, F_GETFL, 0);
if (fcntl(newfd, F_SETFL, flags | O_NONBLOCK) == -1) {
close(newfd);
return 0;
}
if (connhook(newfd) != 0) {
close(newfd);
return 0;
}
/* find empty slot the simple way */
for (i = 0; i < MAXCLI; i++) {
2010-11-24 18:17:46 +03:00
if (pfdlist[i].fd == -1 && spclist[i].spc_dying == 0)
break;
}
if (lwproc_newproc(&spclist[i]) != 0) {
close(newfd);
return 0;
}
assert(i < MAXCLI);
pfdlist[i].fd = newfd;
spclist[i].spc_fd = newfd;
spclist[i].spc_mainlwp = lwproc_curlwp();
spclist[i].spc_istatus = SPCSTATUS_BUSY; /* dedicated receiver */
spclist[i].spc_pid = lwproc_getpid();
spclist[i].spc_refcnt = 1;
TAILQ_INIT(&spclist[i].spc_respwait);
DPRINTF(("rump_sp: added new connection fd %d at idx %u, pid %d\n",
newfd, i, lwproc_getpid()));
lwproc_switch(NULL);
return i;
}
static void
serv_handlesyscall(struct spclient *spc, struct rsp_hdr *rhdr, uint8_t *data)
{
register_t retval[2] = {0, 0};
int rv, sysnum;
sysnum = (int)rhdr->rsp_sysnum;
DPRINTF(("rump_sp: handling syscall %d from client %d\n",
sysnum, 0));
lwproc_newlwp(spc->spc_pid);
rv = rumpsyscall(sysnum, data, retval);
lwproc_switch(NULL);
2010-11-17 20:36:14 +03:00
free(data);
DPRINTF(("rump_sp: got return value %d & %d/%d\n",
rv, retval[0], retval[1]));
send_syscall_resp(spc, rhdr->rsp_reqno, rv, retval);
}
struct sysbouncearg {
struct spclient *sba_spc;
struct rsp_hdr sba_hdr;
uint8_t *sba_data;
};
static void *
serv_syscallbouncer(void *arg)
{
struct sysbouncearg *barg = arg;
serv_handlesyscall(barg->sba_spc, &barg->sba_hdr, barg->sba_data);
2010-11-24 18:17:46 +03:00
spcrelease(barg->sba_spc);
free(arg);
return NULL;
}
int
rumpuser_sp_copyin(void *arg, const void *uaddr, void *kaddr, size_t len)
{
struct spclient *spc = arg;
2010-11-19 20:47:44 +03:00
void *rdata = NULL; /* XXXuninit */
int rv, nlocks;
rumpuser__kunlock(0, &nlocks, NULL);
2010-11-24 18:17:46 +03:00
rv = copyin_req(spc, uaddr, len, &rdata);
if (rv)
goto out;
memcpy(kaddr, rdata, len);
free(rdata);
out:
rumpuser__klock(nlocks, NULL);
if (rv)
return EFAULT;
return 0;
}
int
rumpuser_sp_copyout(void *arg, const void *kaddr, void *uaddr, size_t dlen)
{
struct spclient *spc = arg;
int nlocks, rv;
rumpuser__kunlock(0, &nlocks, NULL);
rv = send_copyout_req(spc, uaddr, kaddr, dlen);
rumpuser__klock(nlocks, NULL);
if (rv)
return EFAULT;
return 0;
}
int
rumpuser_sp_anonmmap(void *arg, size_t howmuch, void **addr)
{
struct spclient *spc = arg;
void *resp, *rdata;
int nlocks, rv;
rumpuser__kunlock(0, &nlocks, NULL);
rv = anonmmap_req(spc, howmuch, &rdata);
if (rv) {
rv = EFAULT;
goto out;
}
resp = *(void **)rdata;
free(rdata);
if (resp == NULL) {
rv = ENOMEM;
}
*addr = resp;
out:
rumpuser__klock(nlocks, NULL);
if (rv)
return rv;
return 0;
}
/*
*
* Startup routines and mainloop for server.
*
*/
struct spservarg {
int sps_sock;
connecthook_fn sps_connhook;
};
static void
handlereq(struct spclient *spc)
{
struct sysbouncearg *sba;
pthread_attr_t pattr;
pthread_t pt;
int rv;
/* XXX: check that it's a syscall */
sba = malloc(sizeof(*sba));
if (sba == NULL) {
/* panic */
abort();
}
sba->sba_spc = spc;
sba->sba_hdr = spc->spc_hdr;
sba->sba_data = spc->spc_buf;
spc->spc_buf = NULL;
spc->spc_off = 0;
pthread_attr_init(&pattr);
pthread_attr_setdetachstate(&pattr, 1);
spcref(spc);
if ((rv = pthread_create(&pt, &pattr, serv_syscallbouncer, sba)) != 0) {
/* panic */
abort();
}
}
static void *
spserver(void *arg)
{
struct spservarg *sarg = arg;
struct spclient *spc;
unsigned idx;
int seen;
int rv;
unsigned int nfds, maxidx;
for (idx = 0; idx < MAXCLI; idx++) {
pfdlist[idx].fd = -1;
pfdlist[idx].events = POLLIN;
spc = &spclist[idx];
pthread_mutex_init(&spc->spc_mtx, NULL);
pthread_cond_init(&spc->spc_cv, NULL);
}
pfdlist[0].fd = sarg->sps_sock;
pfdlist[0].events = POLLIN;
nfds = 1;
maxidx = 0;
DPRINTF(("rump_sp: server mainloop\n"));
for (;;) {
/* g/c hangarounds (eventually) */
if (disco) {
int discoed;
discoed = atomic_swap_uint(&disco, 0);
while (discoed--) {
nfds--;
idx = maxidx;
2010-11-24 18:17:46 +03:00
while (idx) {
if (pfdlist[idx].fd != -1) {
maxidx = idx;
break;
}
2010-11-24 18:17:46 +03:00
idx--;
}
DPRINTF(("rump_sp: set maxidx to [%u]\n",
maxidx));
2010-11-24 18:17:46 +03:00
assert(maxidx+1 >= nfds);
}
}
DPRINTF(("rump_sp: loop nfd %d\n", maxidx+1));
seen = 0;
rv = poll(pfdlist, maxidx+1, INFTIM);
assert(maxidx+1 <= MAXCLI);
assert(rv != 0);
if (rv == -1) {
if (errno == EINTR)
continue;
fprintf(stderr, "rump_spserver: poll returned %d\n",
errno);
break;
}
2010-11-24 18:17:46 +03:00
for (idx = 0; seen < rv && idx < MAXCLI; idx++) {
if ((pfdlist[idx].revents & POLLIN) == 0)
continue;
seen++;
DPRINTF(("rump_sp: activity at [%u] %d/%d\n",
idx, seen, rv));
if (idx > 0) {
spc = &spclist[idx];
DPRINTF(("rump_sp: mainloop read [%u]\n", idx));
switch (readframe(spc)) {
case 0:
break;
case -1:
serv_handledisco(idx);
break;
default:
switch (spc->spc_hdr.rsp_class) {
case RUMPSP_RESP:
kickwaiter(spc);
break;
case RUMPSP_REQ:
handlereq(spc);
break;
default:
printf("PANIC\n");
abort();
break;
}
break;
}
} else {
DPRINTF(("rump_sp: mainloop new connection\n"));
idx = serv_handleconn(pfdlist[0].fd,
sarg->sps_connhook, nfds == MAXCLI);
if (idx)
nfds++;
if (idx > maxidx)
maxidx = idx;
2010-11-24 18:17:46 +03:00
DPRINTF(("rump_sp: maxid now %d\n", maxidx));
}
}
}
return NULL;
}
int
rumpuser_sp_init(const struct rumpuser_sp_ops *spopsp, const char *url)
{
pthread_t pt;
struct spservarg *sarg;
struct sockaddr *sap;
char *p;
unsigned idx;
int error, s;
p = strdup(url);
if (p == NULL)
return ENOMEM;
error = parseurl(p, &sap, &idx, 1);
free(p);
if (error)
return error;
s = socket(parsetab[idx].domain, SOCK_STREAM, 0);
if (s == -1)
return errno;
spops = *spopsp;
sarg = malloc(sizeof(*sarg));
if (sarg == NULL) {
close(s);
return ENOMEM;
}
sarg->sps_sock = s;
sarg->sps_connhook = parsetab[idx].connhook;
/* sloppy error recovery */
/*LINTED*/
if (bind(s, sap, sap->sa_len) == -1) {
fprintf(stderr, "rump_sp: server bind failed\n");
return errno;
}
if (listen(s, 20) == -1) {
fprintf(stderr, "rump_sp: server listen failed\n");
return errno;
}
if ((error = pthread_create(&pt, NULL, spserver, sarg)) != 0) {
fprintf(stderr, "rump_sp: cannot create wrkr thread\n");
return errno;
}
pthread_detach(pt);
return 0;
}