Pull in the changes from the dpdk, netmap and snabb switch repos.

There are two major changes:

1) All thread context policy is pushed down to the hypercalls.
   This is meant to help performance-mongering implementations be
   able to control packet scheduling better (e.g. pin down packet
   reception to certain physical cores).
2) Generalize linkstr, meaning that the interface can now take an
   arbitrary string which is passed to the create() hypercall.  This can
   be used to map backend device characteristics to the rump kernel
   interface instance.
This commit is contained in:
pooka 2014-03-03 13:56:40 +00:00
parent 2b310a0c9d
commit c40bbed43a
4 changed files with 352 additions and 237 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: if_virt.c,v 1.38 2014/02/21 08:33:51 skrll Exp $ */
/* $NetBSD: if_virt.c,v 1.39 2014/03/03 13:56:40 pooka Exp $ */
/*
* Copyright (c) 2008, 2013 Antti Kantee. All Rights Reserved.
@ -26,24 +26,17 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_virt.c,v 1.38 2014/02/21 08:33:51 skrll Exp $");
__KERNEL_RCSID(0, "$NetBSD: if_virt.c,v 1.39 2014/03/03 13:56:40 pooka Exp $");
#include <sys/param.h>
#include <sys/condvar.h>
#include <sys/fcntl.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/kthread.h>
#include <sys/mutex.h>
#include <sys/poll.h>
#include <sys/sockio.h>
#include <sys/socketvar.h>
#include <sys/cprng.h>
#include <net/bpf.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_ether.h>
#include <net/if_tap.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
@ -70,75 +63,86 @@ static void virtif_stop(struct ifnet *, int);
struct virtif_sc {
struct ethercom sc_ec;
struct virtif_user *sc_viu;
bool sc_dying;
struct lwp *sc_l_snd, *sc_l_rcv;
kmutex_t sc_mtx;
kcondvar_t sc_cv;
int sc_num;
char *sc_linkstr;
};
static void virtif_receiver(void *);
static void virtif_sender(void *);
static int virtif_clone(struct if_clone *, int);
static int virtif_unclone(struct ifnet *);
struct if_clone VIF_CLONER =
IF_CLONE_INITIALIZER(VIF_NAME, virtif_clone, virtif_unclone);
static int
virtif_create(struct ifnet *ifp)
{
uint8_t enaddr[ETHER_ADDR_LEN] = { 0xb2, 0x0a, 0x00, 0x0b, 0x0e, 0x01 };
char enaddrstr[3*ETHER_ADDR_LEN];
struct virtif_sc *sc = ifp->if_softc;
int error;
if (sc->sc_viu)
panic("%s: already created", ifp->if_xname);
enaddr[2] = cprng_fast32() & 0xff;
enaddr[5] = sc->sc_num & 0xff;
if ((error = VIFHYPER_CREATE(sc->sc_linkstr,
sc, enaddr, &sc->sc_viu)) != 0) {
printf("VIFHYPER_CREATE failed: %d\n", error);
return error;
}
ether_ifattach(ifp, enaddr);
ether_snprintf(enaddrstr, sizeof(enaddrstr), enaddr);
aprint_normal_ifnet(ifp, "Ethernet address %s\n", enaddrstr);
IFQ_SET_READY(&ifp->if_snd);
return 0;
}
static int
virtif_clone(struct if_clone *ifc, int num)
{
struct virtif_sc *sc;
struct virtif_user *viu;
struct ifnet *ifp;
uint8_t enaddr[ETHER_ADDR_LEN] = { 0xb2, 0x0a, 0x00, 0x0b, 0x0e, 0x01 };
int error = 0;
if (num >= 0x100)
return E2BIG;
if ((error = VIFHYPER_CREATE(num, &viu)) != 0)
return error;
enaddr[2] = cprng_fast32() & 0xff;
enaddr[5] = num;
sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
sc->sc_dying = false;
sc->sc_viu = viu;
mutex_init(&sc->sc_mtx, MUTEX_DEFAULT, IPL_NONE);
cv_init(&sc->sc_cv, VIF_NAME "snd");
sc->sc_num = num;
ifp = &sc->sc_ec.ec_if;
sprintf(ifp->if_xname, "%s%d", VIF_NAME, num);
ifp->if_softc = sc;
if (rump_threads) {
if ((error = kthread_create(PRI_NONE, KTHREAD_MUSTJOIN, NULL,
virtif_receiver, ifp, &sc->sc_l_rcv, VIF_NAME "ifr")) != 0)
goto out;
if ((error = kthread_create(PRI_NONE,
KTHREAD_MUSTJOIN | KTHREAD_MPSAFE, NULL,
virtif_sender, ifp, &sc->sc_l_snd, VIF_NAME "ifs")) != 0)
goto out;
} else {
printf("WARNING: threads not enabled, receive NOT working\n");
}
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_init = virtif_init;
ifp->if_ioctl = virtif_ioctl;
ifp->if_start = virtif_start;
ifp->if_stop = virtif_stop;
IFQ_SET_READY(&ifp->if_snd);
ifp->if_mtu = ETHERMTU;
ifp->if_dlt = DLT_EN10MB;
if_attach(ifp);
ether_ifattach(ifp, enaddr);
out:
#ifndef RUMP_VIF_LINKSTR
/*
* if the underlying interface does not expect linkstr, we can
* create everything now. Otherwise, we need to wait for
* SIOCSLINKSTR.
*/
#define LINKSTRNUMLEN 16
sc->sc_linkstr = kmem_alloc(LINKSTRNUMLEN, KM_SLEEP);
snprintf(sc->sc_linkstr, LINKSTRNUMLEN, "%d", sc->sc_num);
#undef LINKSTRNUMLEN
error = virtif_create(ifp);
if (error) {
virtif_unclone(ifp);
if_detach(ifp);
kmem_free(sc, sizeof(*sc));
ifp->if_softc = NULL;
}
#endif /* !RUMP_VIF_LINKSTR */
return error;
}
@ -148,33 +152,16 @@ virtif_unclone(struct ifnet *ifp)
{
struct virtif_sc *sc = ifp->if_softc;
mutex_enter(&sc->sc_mtx);
if (sc->sc_dying) {
mutex_exit(&sc->sc_mtx);
return EINPROGRESS;
}
sc->sc_dying = true;
cv_broadcast(&sc->sc_cv);
mutex_exit(&sc->sc_mtx);
if (ifp->if_flags & IFF_UP)
return EBUSY;
VIFHYPER_DYING(sc->sc_viu);
virtif_stop(ifp, 1);
if_down(ifp);
if (sc->sc_l_snd) {
kthread_join(sc->sc_l_snd);
sc->sc_l_snd = NULL;
}
if (sc->sc_l_rcv) {
kthread_join(sc->sc_l_rcv);
sc->sc_l_rcv = NULL;
}
VIFHYPER_DESTROY(sc->sc_viu);
mutex_destroy(&sc->sc_mtx);
cv_destroy(&sc->sc_cv);
kmem_free(sc, sizeof(*sc));
ether_ifdetach(ifp);
@ -188,127 +175,125 @@ virtif_init(struct ifnet *ifp)
{
struct virtif_sc *sc = ifp->if_softc;
ifp->if_flags |= IFF_RUNNING;
if (sc->sc_viu == NULL)
return ENXIO;
mutex_enter(&sc->sc_mtx);
cv_broadcast(&sc->sc_cv);
mutex_exit(&sc->sc_mtx);
ifp->if_flags |= IFF_RUNNING;
return 0;
}
static int
virtif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
{
int s, rv;
struct virtif_sc *sc = ifp->if_softc;
int rv;
s = splnet();
rv = ether_ioctl(ifp, cmd, data);
if (rv == ENETRESET)
rv = 0;
splx(s);
switch (cmd) {
#ifdef RUMP_VIF_LINKSTR
struct ifdrv *ifd;
size_t linkstrlen;
#ifndef RUMP_VIF_LINKSTRMAX
#define RUMP_VIF_LINKSTRMAX 4096
#endif
case SIOCGLINKSTR:
ifd = data;
if (!sc->sc_linkstr) {
rv = ENOENT;
break;
}
linkstrlen = strlen(sc->sc_linkstr)+1;
if (ifd->ifd_cmd == IFLINKSTR_QUERYLEN) {
ifd->ifd_len = linkstrlen;
rv = 0;
break;
}
if (ifd->ifd_cmd != 0) {
rv = ENOTTY;
break;
}
rv = copyoutstr(sc->sc_linkstr,
ifd->ifd_data, MIN(ifd->ifd_len,linkstrlen), NULL);
break;
case SIOCSLINKSTR:
if (ifp->if_flags & IFF_UP) {
rv = EBUSY;
break;
}
ifd = data;
if (ifd->ifd_cmd == IFLINKSTR_UNSET) {
panic("unset linkstr not implemented");
} else if (ifd->ifd_cmd != 0) {
rv = ENOTTY;
break;
} else if (sc->sc_linkstr) {
rv = EBUSY;
break;
}
if (ifd->ifd_len > RUMP_VIF_LINKSTRMAX) {
rv = E2BIG;
break;
} else if (ifd->ifd_len < 1) {
rv = EINVAL;
break;
}
sc->sc_linkstr = kmem_alloc(ifd->ifd_len, KM_SLEEP);
rv = copyinstr(ifd->ifd_data, sc->sc_linkstr,
ifd->ifd_len, NULL);
if (rv) {
kmem_free(sc->sc_linkstr, ifd->ifd_len);
break;
}
rv = virtif_create(ifp);
if (rv) {
kmem_free(sc->sc_linkstr, ifd->ifd_len);
}
break;
#endif /* RUMP_VIF_LINKSTR */
default:
if (!sc->sc_linkstr)
rv = ENXIO;
else
rv = ether_ioctl(ifp, cmd, data);
if (rv == ENETRESET)
rv = 0;
break;
}
return rv;
}
/*
* Output packets in-context until outgoing queue is empty.
* Leave responsibility of choosing whether or not to drop the
* kernel lock to VIPHYPER_SEND().
*/
#define LB_SH 32
static void
virtif_start(struct ifnet *ifp)
{
struct virtif_sc *sc = ifp->if_softc;
mutex_enter(&sc->sc_mtx);
ifp->if_flags |= IFF_OACTIVE;
cv_broadcast(&sc->sc_cv);
mutex_exit(&sc->sc_mtx);
}
static void
virtif_stop(struct ifnet *ifp, int disable)
{
struct virtif_sc *sc = ifp->if_softc;
ifp->if_flags &= ~IFF_RUNNING;
mutex_enter(&sc->sc_mtx);
cv_broadcast(&sc->sc_cv);
mutex_exit(&sc->sc_mtx);
}
#define POLLTIMO_MS 1
static void
virtif_receiver(void *arg)
{
struct ifnet *ifp = arg;
struct virtif_sc *sc = ifp->if_softc;
struct mbuf *m;
size_t plen = ETHER_MAX_LEN_JUMBO+1;
size_t n;
int error;
for (;;) {
m = m_gethdr(M_WAIT, MT_DATA);
MEXTMALLOC(m, plen, M_WAIT);
again:
if (sc->sc_dying) {
m_freem(m);
break;
}
error = VIFHYPER_RECV(sc->sc_viu,
mtod(m, void *), plen, &n);
if (error) {
printf("%s: read hypercall failed %d. host if down?\n",
ifp->if_xname, error);
mutex_enter(&sc->sc_mtx);
/* could check if need go, done soon anyway */
cv_timedwait(&sc->sc_cv, &sc->sc_mtx, hz);
mutex_exit(&sc->sc_mtx);
goto again;
}
/* tap sometimes returns EOF. don't sweat it and plow on */
if (__predict_false(n == 0))
goto again;
/* discard if we're not up */
if ((ifp->if_flags & IFF_RUNNING) == 0)
goto again;
m->m_len = m->m_pkthdr.len = n;
m->m_pkthdr.rcvif = ifp;
bpf_mtap(ifp, m);
(*ifp->if_input)(ifp, m);
}
kthread_exit(0);
}
/* lazy bum stetson-harrison magic value */
#define LB_SH 32
static void
virtif_sender(void *arg)
{
struct ifnet *ifp = arg;
struct virtif_sc *sc = ifp->if_softc;
struct mbuf *m, *m0;
struct iovec io[LB_SH];
int i;
mutex_enter(&sc->sc_mtx);
KERNEL_LOCK(1, NULL);
while (!sc->sc_dying) {
if (!(ifp->if_flags & IFF_RUNNING)) {
cv_wait(&sc->sc_cv, &sc->sc_mtx);
continue;
}
ifp->if_flags |= IFF_OACTIVE;
for (;;) {
IF_DEQUEUE(&ifp->if_snd, m0);
if (!m0) {
ifp->if_flags &= ~IFF_OACTIVE;
cv_wait(&sc->sc_cv, &sc->sc_mtx);
continue;
break;
}
mutex_exit(&sc->sc_mtx);
m = m0;
for (i = 0; i < LB_SH && m; i++) {
@ -323,11 +308,73 @@ virtif_sender(void *arg)
VIFHYPER_SEND(sc->sc_viu, io, i);
m_freem(m0);
mutex_enter(&sc->sc_mtx);
}
KERNEL_UNLOCK_LAST(curlwp);
mutex_exit(&sc->sc_mtx);
kthread_exit(0);
ifp->if_flags &= ~IFF_OACTIVE;
}
static void
virtif_stop(struct ifnet *ifp, int disable)
{
/* XXX: VIFHYPER_STOP() */
ifp->if_flags &= ~IFF_RUNNING;
}
void
VIF_DELIVERPKT(struct virtif_sc *sc, struct iovec *iov, size_t iovlen)
{
struct ifnet *ifp = &sc->sc_ec.ec_if;
struct ether_header *eth;
struct mbuf *m;
size_t i;
int off, olen;
bool passup;
const int align
= ALIGN(sizeof(struct ether_header)) - sizeof(struct ether_header);
if ((ifp->if_flags & IFF_RUNNING) == 0)
return;
m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return; /* drop packet */
m->m_len = m->m_pkthdr.len = 0;
for (i = 0, off = align; i < iovlen; i++) {
olen = m->m_pkthdr.len;
m_copyback(m, off, iov[i].iov_len, iov[i].iov_base);
off += iov[i].iov_len;
if (olen + off != m->m_pkthdr.len) {
aprint_verbose_ifnet(ifp, "m_copyback failed\n");
m_freem(m);
return;
}
}
m->m_data += align;
eth = mtod(m, struct ether_header *);
if (memcmp(eth->ether_dhost, CLLADDR(ifp->if_sadl),
ETHER_ADDR_LEN) == 0) {
passup = true;
} else if (ETHER_IS_MULTICAST(eth->ether_dhost)) {
passup = true;
} else if (ifp->if_flags & IFF_PROMISC) {
m->m_flags |= M_PROMISC;
passup = true;
} else {
passup = false;
}
if (passup) {
m->m_pkthdr.rcvif = ifp;
KERNEL_LOCK(1, NULL);
bpf_mtap(ifp, m);
ifp->if_input(ifp, m);
KERNEL_UNLOCK_LAST(NULL);
} else {
m_freem(m);
}
m = NULL;
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: if_virt.h,v 1.2 2013/07/04 11:58:11 pooka Exp $ */
/* $NetBSD: if_virt.h,v 1.3 2014/03/03 13:56:40 pooka Exp $ */
/*
* NOTE! This file is supposed to work on !NetBSD platforms.
@ -22,4 +22,9 @@
#define VIFHYPER_DYING VIF_BASENAME3(rumpcomp_,VIRTIF_BASE,_dying)
#define VIFHYPER_DESTROY VIF_BASENAME3(rumpcomp_,VIRTIF_BASE,_destroy)
#define VIFHYPER_SEND VIF_BASENAME3(rumpcomp_,VIRTIF_BASE,_send)
#define VIFHYPER_RECV VIF_BASENAME3(rumpcomp_,VIRTIF_BASE,_recv)
#define VIFHYPER_FLAGS VIF_BASENAME3(rumpcomp_,VIRTIF_BASE,_flags)
#define VIF_DELIVERPKT VIF_BASENAME3(rump_virtif_,VIRTIF_BASE,_deliverpkt)
struct virtif_sc;

View File

@ -1,4 +1,4 @@
/* $NetBSD: rumpcomp_user.c,v 1.11 2013/10/27 16:03:19 pooka Exp $ */
/* $NetBSD: rumpcomp_user.c,v 1.12 2014/03/03 13:56:40 pooka Exp $ */
/*
* Copyright (c) 2013 Antti Kantee. All Rights Reserved.
@ -30,9 +30,12 @@
#include <sys/ioctl.h>
#include <sys/uio.h>
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <poll.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -48,13 +51,21 @@
#include "if_virt.h"
#include "rumpcomp_user.h"
#if VIFHYPER_REVISION != 20130704
#if VIFHYPER_REVISION != 20140302
#error VIFHYPER_REVISION mismatch
#endif
struct virtif_user {
struct virtif_sc *viu_virtifsc;
int viu_devnum;
int viu_fd;
int viu_pipe[2];
pthread_t viu_rcvthr;
int viu_dying;
char viu_rcvbuf[9018]; /* jumbo frame max len */
};
static int
@ -100,34 +111,121 @@ opentapdev(int devnum)
return fd;
}
static void
closetapdev(struct virtif_user *viu)
{
close(viu->viu_fd);
}
static void *
rcvthread(void *aaargh)
{
struct virtif_user *viu = aaargh;
struct pollfd pfd[2];
struct iovec iov;
ssize_t nn = 0;
int prv;
rumpuser_component_kthread();
pfd[0].fd = viu->viu_fd;
pfd[0].events = POLLIN;
pfd[1].fd = viu->viu_pipe[0];
pfd[1].events = POLLIN;
while (!viu->viu_dying) {
prv = poll(pfd, 2, -1);
if (prv == 0)
continue;
if (prv == -1) {
/* XXX */
fprintf(stderr, "virt%d: poll error: %d\n",
viu->viu_devnum, errno);
sleep(1);
continue;
}
if (pfd[1].revents & POLLIN)
continue;
nn = read(viu->viu_fd,
viu->viu_rcvbuf, sizeof(viu->viu_rcvbuf));
if (nn == -1 && errno == EAGAIN)
continue;
if (nn < 1) {
/* XXX */
fprintf(stderr, "virt%d: receive failed\n",
viu->viu_devnum);
sleep(1);
continue;
}
iov.iov_base = viu->viu_rcvbuf;
iov.iov_len = nn;
rumpuser_component_schedule(NULL);
VIF_DELIVERPKT(viu->viu_virtifsc, &iov, 1);
rumpuser_component_unschedule();
}
assert(viu->viu_dying);
rumpuser_component_kthread_release();
return NULL;
}
int
VIFHYPER_CREATE(int devnum, struct virtif_user **viup)
VIFHYPER_CREATE(const char *devstr, struct virtif_sc *vif_sc, uint8_t *enaddr,
struct virtif_user **viup)
{
struct virtif_user *viu = NULL;
void *cookie;
int devnum;
int rv;
cookie = rumpuser_component_unschedule();
viu = malloc(sizeof(*viu));
/*
* Since this interface doesn't do LINKSTR, we know devstr to be
* well-formatted.
*/
devnum = atoi(devstr);
viu = calloc(1, sizeof(*viu));
if (viu == NULL) {
rv = errno;
goto out;
goto oerr1;
}
viu->viu_virtifsc = vif_sc;
viu->viu_fd = opentapdev(devnum);
if (viu->viu_fd == -1) {
rv = errno;
free(viu);
goto out;
goto oerr2;
}
viu->viu_dying = 0;
rv = 0;
viu->viu_devnum = devnum;
if (pipe(viu->viu_pipe) == -1) {
rv = errno;
goto oerr3;
}
if ((rv = pthread_create(&viu->viu_rcvthr, NULL, rcvthread, viu)) != 0)
goto oerr4;
out:
rumpuser_component_schedule(cookie);
*viup = viu;
return 0;
oerr4:
close(viu->viu_pipe[0]);
close(viu->viu_pipe[1]);
oerr3:
closetapdev(viu);
oerr2:
free(viu);
oerr1:
rumpuser_component_schedule(cookie);
return rumpuser_component_errtrans(rv);
}
@ -153,59 +251,19 @@ VIFHYPER_SEND(struct virtif_user *viu,
rumpuser_component_schedule(cookie);
}
/* how often to check for interface going south */
#define POLLTIMO_MS 10
int
VIFHYPER_RECV(struct virtif_user *viu,
void *data, size_t dlen, size_t *rcv)
{
void *cookie = rumpuser_component_unschedule();
struct pollfd pfd;
ssize_t nn = 0;
int rv, prv;
pfd.fd = viu->viu_fd;
pfd.events = POLLIN;
for (;;) {
if (viu->viu_dying) {
rv = 0;
*rcv = 0;
break;
}
prv = poll(&pfd, 1, POLLTIMO_MS);
if (prv == 0)
continue;
if (prv == -1) {
rv = errno;
break;
}
nn = read(viu->viu_fd, data, dlen);
if (nn == -1) {
if (errno == EAGAIN)
continue;
rv = errno;
} else {
*rcv = (size_t)nn;
rv = 0;
}
break;
}
rumpuser_component_schedule(cookie);
return rumpuser_component_errtrans(rv);
}
#undef POLLTIMO_MS
void
VIFHYPER_DYING(struct virtif_user *viu)
{
void *cookie = rumpuser_component_unschedule();
/* no locking necessary. it'll be seen eventually */
viu->viu_dying = 1;
if (write(viu->viu_pipe[1],
&viu->viu_dying, sizeof(viu->viu_dying)) == -1) {
fprintf(stderr, "%s: failed to signal thread\n",
VIF_STRING(VIFHYPER_DYING));
}
rumpuser_component_schedule(cookie);
}
void
@ -213,7 +271,10 @@ VIFHYPER_DESTROY(struct virtif_user *viu)
{
void *cookie = rumpuser_component_unschedule();
close(viu->viu_fd);
pthread_join(viu->viu_rcvthr, NULL);
closetapdev(viu);
close(viu->viu_pipe[0]);
close(viu->viu_pipe[1]);
free(viu);
rumpuser_component_schedule(cookie);

View File

@ -1,4 +1,4 @@
/* $NetBSD: rumpcomp_user.h,v 1.5 2013/10/27 16:03:19 pooka Exp $ */
/* $NetBSD: rumpcomp_user.h,v 1.6 2014/03/03 13:56:40 pooka Exp $ */
/*
* Copyright (c) 2013 Antti Kantee. All Rights Reserved.
@ -27,11 +27,13 @@
struct virtif_user;
#define VIFHYPER_REVISION 20130704
#define VIFHYPER_REVISION 20140302
int VIFHYPER_CREATE(int, struct virtif_user **);
int VIFHYPER_CREATE(const char *, struct virtif_sc *, uint8_t *,
struct virtif_user **);
void VIFHYPER_DYING(struct virtif_user *);
void VIFHYPER_DESTROY(struct virtif_user *);
void VIFHYPER_SEND(struct virtif_user *, struct iovec *, size_t);
int VIFHYPER_RECV(struct virtif_user *, void *, size_t, size_t *);
void VIF_DELIVERPKT(struct virtif_sc *, struct iovec *, size_t);