/* $NetBSD: rumpuser_sp.c,v 1.12 2010/11/24 15:17:46 pooka Exp $ */ /* * Copyright (c) 2010 Antti Kantee. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Sysproxy routines. This provides system RPC support over host sockets. * The most notable limitation is that the client and server must share * the same ABI. This does not mean that they have to be the same * machine or that they need to run the same version of the host OS, * just that they must agree on the data structures. This even *might* * work correctly from one hardware architecture to another. * * Not finished yet, i.e. don't use in production. Lacks locking plus * handling of multiple clients and unexpected connection closes. */ #include __RCSID("$NetBSD: rumpuser_sp.c,v 1.12 2010/11/24 15:17:46 pooka Exp $"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "sp_common.c" #define MAXCLI 4 static struct pollfd pfdlist[MAXCLI]; static struct spclient spclist[MAXCLI]; static unsigned int disco; static struct rumpuser_sp_ops spops; /* * Manual wrappers, since librump does not have access to the * user namespace wrapped interfaces. */ static void lwproc_switch(struct lwp *l) { spops.spop_schedule(); spops.spop_lwproc_switch(l); spops.spop_unschedule(); } static void lwproc_release(void) { spops.spop_schedule(); spops.spop_lwproc_release(); spops.spop_unschedule(); } static int lwproc_newproc(struct spclient *spc) { int rv; spops.spop_schedule(); rv = spops.spop_lwproc_newproc(spc); spops.spop_unschedule(); return rv; } static int lwproc_newlwp(pid_t pid) { int rv; spops.spop_schedule(); rv = spops.spop_lwproc_newlwp(pid); spops.spop_unschedule(); return rv; } static struct lwp * lwproc_curlwp(void) { struct lwp *l; spops.spop_schedule(); l = spops.spop_lwproc_curlwp(); spops.spop_unschedule(); return l; } static pid_t lwproc_getpid(void) { pid_t p; spops.spop_schedule(); p = spops.spop_getpid(); spops.spop_unschedule(); return p; } static int rumpsyscall(int sysnum, void *data, register_t *retval) { int rv; spops.spop_schedule(); rv = spops.spop_syscall(sysnum, data, retval); spops.spop_unschedule(); return rv; } static uint64_t nextreq(struct spclient *spc) { uint64_t nw; pthread_mutex_lock(&spc->spc_mtx); nw = spc->spc_nextreq++; pthread_mutex_unlock(&spc->spc_mtx); return nw; } static int send_syscall_resp(struct spclient *spc, uint64_t reqno, int error, register_t *retval) { struct rsp_hdr rhdr; struct rsp_sysresp sysresp; int rv; rhdr.rsp_len = sizeof(rhdr) + sizeof(sysresp); rhdr.rsp_reqno = reqno; rhdr.rsp_class = RUMPSP_RESP; rhdr.rsp_type = RUMPSP_SYSCALL; rhdr.rsp_sysnum = 0; sysresp.rsys_error = error; memcpy(sysresp.rsys_retval, retval, sizeof(sysresp.rsys_retval)); sendlock(spc); rv = dosend(spc, &rhdr, sizeof(rhdr)); rv = dosend(spc, &sysresp, sizeof(sysresp)); sendunlock(spc); return rv; } static int copyin_req(struct spclient *spc, const void *remaddr, size_t dlen, void **resp) { struct rsp_hdr rhdr; struct rsp_copydata copydata; struct respwait rw; int rv; DPRINTF(("copyin_req: %zu bytes from %p\n", dlen, remaddr)); rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata); rhdr.rsp_class = RUMPSP_REQ; rhdr.rsp_type = RUMPSP_COPYIN; rhdr.rsp_sysnum = 0; copydata.rcp_addr = __UNCONST(remaddr); copydata.rcp_len = dlen; putwait(spc, &rw, &rhdr); sendlock(spc); rv = dosend(spc, &rhdr, sizeof(rhdr)); rv = dosend(spc, ©data, sizeof(copydata)); sendunlock(spc); if (rv) return rv; /* XXX: unputwait */ rv = waitresp(spc, &rw); DPRINTF(("copyin: response %d\n", rv)); *resp = rw.rw_data; return rv; } static int send_copyout_req(struct spclient *spc, const void *remaddr, const void *data, size_t dlen) { struct rsp_hdr rhdr; struct rsp_copydata copydata; int rv; DPRINTF(("copyout_req (async): %zu bytes to %p\n", dlen, remaddr)); rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata) + dlen; rhdr.rsp_reqno = nextreq(spc); rhdr.rsp_class = RUMPSP_REQ; rhdr.rsp_type = RUMPSP_COPYOUT; rhdr.rsp_sysnum = 0; copydata.rcp_addr = __UNCONST(remaddr); copydata.rcp_len = dlen; sendlock(spc); rv = dosend(spc, &rhdr, sizeof(rhdr)); rv = dosend(spc, ©data, sizeof(copydata)); rv = dosend(spc, data, dlen); sendunlock(spc); return rv; } static int anonmmap_req(struct spclient *spc, size_t howmuch, void **resp) { struct rsp_hdr rhdr; struct respwait rw; int rv; DPRINTF(("anonmmap_req: %zu bytes\n", howmuch)); rhdr.rsp_len = sizeof(rhdr) + sizeof(howmuch); rhdr.rsp_class = RUMPSP_REQ; rhdr.rsp_type = RUMPSP_ANONMMAP; rhdr.rsp_sysnum = 0; putwait(spc, &rw, &rhdr); sendlock(spc); rv = dosend(spc, &rhdr, sizeof(rhdr)); rv = dosend(spc, &howmuch, sizeof(howmuch)); sendunlock(spc); if (rv) return rv; /* XXX: unputwait */ rv = waitresp(spc, &rw); *resp = rw.rw_data; DPRINTF(("anonmmap: mapped at %p\n", **(void ***)resp)); return rv; } static void spcref(struct spclient *spc) { pthread_mutex_lock(&spc->spc_mtx); spc->spc_refcnt++; pthread_mutex_unlock(&spc->spc_mtx); } static void spcrelease(struct spclient *spc) { int ref; pthread_mutex_lock(&spc->spc_mtx); ref = --spc->spc_refcnt; pthread_mutex_unlock(&spc->spc_mtx); if (ref > 0) return; DPRINTF(("spcrelease: spc %p fd %d\n", spc, spc->spc_fd)); _DIAGASSERT(TAILQ_EMPTY(spc->spc_respwait)); _DIAGASSERT(spc->spc_buf == NULL); lwproc_switch(spc->spc_mainlwp); lwproc_release(); spc->spc_mainlwp = NULL; close(spc->spc_fd); spc->spc_fd = -1; spc->spc_dying = 0; atomic_inc_uint(&disco); } static void serv_handledisco(unsigned int idx) { struct spclient *spc = &spclist[idx]; DPRINTF(("rump_sp: disconnecting [%u]\n", idx)); pfdlist[idx].fd = -1; pfdlist[idx].revents = 0; pthread_mutex_lock(&spc->spc_mtx); spc->spc_dying = 1; kickall(spc); pthread_mutex_unlock(&spc->spc_mtx); spcrelease(spc); } static unsigned serv_handleconn(int fd, connecthook_fn connhook, int busy) { struct sockaddr_storage ss; socklen_t sl = sizeof(ss); int newfd, flags; unsigned i; /*LINTED: cast ok */ newfd = accept(fd, (struct sockaddr *)&ss, &sl); if (newfd == -1) return 0; if (busy) { close(newfd); /* EBUSY */ return 0; } /* XXX: should do some sort of handshake too */ flags = fcntl(newfd, F_GETFL, 0); if (fcntl(newfd, F_SETFL, flags | O_NONBLOCK) == -1) { close(newfd); return 0; } if (connhook(newfd) != 0) { close(newfd); return 0; } /* find empty slot the simple way */ for (i = 0; i < MAXCLI; i++) { if (pfdlist[i].fd == -1 && spclist[i].spc_dying == 0) break; } if (lwproc_newproc(&spclist[i]) != 0) { close(newfd); return 0; } assert(i < MAXCLI); pfdlist[i].fd = newfd; spclist[i].spc_fd = newfd; spclist[i].spc_mainlwp = lwproc_curlwp(); spclist[i].spc_istatus = SPCSTATUS_BUSY; /* dedicated receiver */ spclist[i].spc_pid = lwproc_getpid(); spclist[i].spc_refcnt = 1; TAILQ_INIT(&spclist[i].spc_respwait); DPRINTF(("rump_sp: added new connection at idx %u, pid %d\n", i, lwproc_getpid())); lwproc_switch(NULL); return i; } static void serv_handlesyscall(struct spclient *spc, struct rsp_hdr *rhdr, uint8_t *data) { register_t retval[2] = {0, 0}; int rv, sysnum; sysnum = (int)rhdr->rsp_sysnum; DPRINTF(("rump_sp: handling syscall %d from client %d\n", sysnum, 0)); lwproc_newlwp(spc->spc_pid); rv = rumpsyscall(sysnum, data, retval); lwproc_switch(NULL); free(data); DPRINTF(("rump_sp: got return value %d & %d/%d\n", rv, retval[0], retval[1])); send_syscall_resp(spc, rhdr->rsp_reqno, rv, retval); } struct sysbouncearg { struct spclient *sba_spc; struct rsp_hdr sba_hdr; uint8_t *sba_data; }; static void * serv_syscallbouncer(void *arg) { struct sysbouncearg *barg = arg; serv_handlesyscall(barg->sba_spc, &barg->sba_hdr, barg->sba_data); spcrelease(barg->sba_spc); free(arg); return NULL; } int rumpuser_sp_copyin(void *arg, const void *uaddr, void *kaddr, size_t len) { struct spclient *spc = arg; void *rdata = NULL; /* XXXuninit */ int rv; rv = copyin_req(spc, uaddr, len, &rdata); if (rv) return EFAULT; memcpy(kaddr, rdata, len); free(rdata); return 0; } int rumpuser_sp_copyout(void *arg, const void *kaddr, void *uaddr, size_t dlen) { struct spclient *spc = arg; if (send_copyout_req(spc, uaddr, kaddr, dlen) != 0) return EFAULT; return 0; } int rumpuser_sp_anonmmap(void *arg, size_t howmuch, void **addr) { struct spclient *spc = arg; void *resp, *rdata; int rv; rv = anonmmap_req(spc, howmuch, &rdata); if (rv) return rv; resp = *(void **)rdata; free(rdata); if (resp == NULL) { return ENOMEM; } *addr = resp; return 0; } /* * * Startup routines and mainloop for server. * */ struct spservarg { int sps_sock; connecthook_fn sps_connhook; }; static void handlereq(struct spclient *spc) { struct sysbouncearg *sba; pthread_attr_t pattr; pthread_t pt; int rv; /* XXX: check that it's a syscall */ sba = malloc(sizeof(*sba)); if (sba == NULL) { /* panic */ abort(); } sba->sba_spc = spc; sba->sba_hdr = spc->spc_hdr; sba->sba_data = spc->spc_buf; spc->spc_buf = NULL; spc->spc_off = 0; pthread_attr_init(&pattr); pthread_attr_setdetachstate(&pattr, 1); spcref(spc); if ((rv = pthread_create(&pt, &pattr, serv_syscallbouncer, sba)) != 0) { /* panic */ abort(); } } static void * spserver(void *arg) { struct spservarg *sarg = arg; struct spclient *spc; unsigned idx; int seen; int rv; unsigned int nfds, maxidx; for (idx = 0; idx < MAXCLI; idx++) { pfdlist[idx].fd = -1; pfdlist[idx].events = POLLIN; spc = &spclist[idx]; pthread_mutex_init(&spc->spc_mtx, NULL); pthread_cond_init(&spc->spc_cv, NULL); } pfdlist[0].fd = sarg->sps_sock; pfdlist[0].events = POLLIN; nfds = 1; maxidx = 0; DPRINTF(("rump_sp: server mainloop\n")); for (;;) { /* g/c hangarounds (eventually) */ if (disco) { int discoed; discoed = atomic_swap_uint(&disco, 0); while (discoed--) { nfds--; idx = maxidx; while (idx) { if (pfdlist[idx].fd != -1) { maxidx = idx; break; } idx--; } DPRINTF(("rump_sp: set maxidx to [%u]\n", maxidx)); assert(maxidx+1 >= nfds); } } DPRINTF(("rump_sp: loop nfd %d\n", maxidx+1)); seen = 0; rv = poll(pfdlist, maxidx+1, INFTIM); assert(maxidx+1 <= MAXCLI); assert(rv != 0); if (rv == -1) { if (errno == EINTR) continue; fprintf(stderr, "rump_spserver: poll returned %d\n", errno); break; } for (idx = 0; seen < rv && idx < MAXCLI; idx++) { if ((pfdlist[idx].revents & POLLIN) == 0) continue; seen++; DPRINTF(("rump_sp: activity at [%u] %d/%d\n", idx, seen, rv)); if (idx > 0) { spc = &spclist[idx]; DPRINTF(("rump_sp: mainloop read [%u]\n", idx)); switch (readframe(spc)) { case 0: break; case -1: serv_handledisco(idx); break; default: switch (spc->spc_hdr.rsp_class) { case RUMPSP_RESP: kickwaiter(spc); break; case RUMPSP_REQ: handlereq(spc); break; default: printf("PANIC\n"); abort(); break; } break; } } else { DPRINTF(("rump_sp: mainloop new connection\n")); idx = serv_handleconn(pfdlist[0].fd, sarg->sps_connhook, nfds == MAXCLI); if (idx) nfds++; if (idx > maxidx) maxidx = idx; DPRINTF(("rump_sp: maxid now %d\n", maxidx)); } } } return NULL; } int rumpuser_sp_init(const struct rumpuser_sp_ops *spopsp, const char *url) { pthread_t pt; struct spservarg *sarg; struct sockaddr *sap; char *p; unsigned idx; int error, s; p = strdup(url); if (p == NULL) return ENOMEM; error = parseurl(p, &sap, &idx, 1); free(p); if (error) return error; s = socket(parsetab[idx].domain, SOCK_STREAM, 0); if (s == -1) return errno; spops = *spopsp; sarg = malloc(sizeof(*sarg)); if (sarg == NULL) { close(s); return ENOMEM; } sarg->sps_sock = s; sarg->sps_connhook = parsetab[idx].connhook; /* sloppy error recovery */ /*LINTED*/ if (bind(s, sap, sap->sa_len) == -1) { fprintf(stderr, "rump_sp: server bind failed\n"); return errno; } if (listen(s, 20) == -1) { fprintf(stderr, "rump_sp: server listen failed\n"); return errno; } if ((error = pthread_create(&pt, NULL, spserver, sarg)) != 0) { fprintf(stderr, "rump_sp: cannot create wrkr thread\n"); return errno; } pthread_detach(pt); return 0; }