NetBSD/lib/librumpuser/rumpuser_pth.c
pooka f950fd3d52 Push rwlock upgrade and downgrade into the hypervisor where there's
at least a chance to implement them with minimal fuss.
2013-05-02 21:35:19 +00:00

628 lines
13 KiB
C

/* $NetBSD: rumpuser_pth.c,v 1.25 2013/05/02 21:35:19 pooka Exp $ */
/*
* Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "rumpuser_port.h"
#if !defined(lint)
__RCSID("$NetBSD: rumpuser_pth.c,v 1.25 2013/05/02 21:35:19 pooka Exp $");
#endif /* !lint */
#include <sys/queue.h>
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <unistd.h>
#include <rump/rumpuser.h>
#include "rumpuser_int.h"
static pthread_key_t curlwpkey;
struct rumpuser_mtx {
pthread_mutex_t pthmtx;
struct lwp *owner;
int flags;
};
#define RURW_AMWRITER(rw) (rw->writer == rumpuser_curlwp() \
&& rw->readers == -1)
#define RURW_HASREAD(rw) (rw->readers > 0)
#define RURW_SETWRITE(rw) \
do { \
assert(rw->readers == 0); \
rw->writer = rumpuser_curlwp(); \
rw->readers = -1; \
} while (/*CONSTCOND*/0)
#define RURW_CLRWRITE(rw) \
do { \
assert(RURW_AMWRITER(rw)); \
rw->readers = 0; \
rw->writer = NULL; \
} while (/*CONSTCOND*/0)
#define RURW_INCREAD(rw) \
do { \
pthread_spin_lock(&rw->spin); \
assert(rw->readers >= 0); \
++(rw)->readers; \
pthread_spin_unlock(&rw->spin); \
} while (/*CONSTCOND*/0)
#define RURW_DECREAD(rw) \
do { \
pthread_spin_lock(&rw->spin); \
assert(rw->readers > 0); \
--(rw)->readers; \
pthread_spin_unlock(&rw->spin); \
} while (/*CONSTCOND*/0)
struct rumpuser_rw {
pthread_rwlock_t pthrw;
pthread_spinlock_t spin;
int readers;
struct lwp *writer;
};
struct rumpuser_cv {
pthread_cond_t pthcv;
int nwaiters;
};
void
rumpuser__thrinit(void)
{
pthread_key_create(&curlwpkey, NULL);
}
int
rumpuser_thread_create(void *(*f)(void *), void *arg, const char *thrname,
int joinable, int priority, int cpuidx, void **ptcookie)
{
pthread_t ptid;
pthread_t *ptidp;
pthread_attr_t pattr;
int rv;
if ((rv = pthread_attr_init(&pattr)) != 0)
return rv;
if (joinable) {
NOFAIL(ptidp = malloc(sizeof(*ptidp)));
pthread_attr_setdetachstate(&pattr, PTHREAD_CREATE_JOINABLE);
} else {
ptidp = &ptid;
pthread_attr_setdetachstate(&pattr, PTHREAD_CREATE_DETACHED);
}
rv = pthread_create(ptidp, &pattr, f, arg);
#if defined(__NetBSD__)
if (rv == 0 && thrname)
pthread_setname_np(ptid, thrname, NULL);
#elif defined(__linux__)
/*
* The pthread_setname_np() call varies from one Linux distro to
* another. Comment out the call pending autoconf support.
*/
#if 0
if (rv == 0 && thrname)
pthread_setname_np(ptid, thrname);
#endif
#endif
if (joinable) {
assert(ptcookie);
*ptcookie = ptidp;
}
pthread_attr_destroy(&pattr);
ET(rv);
}
__dead void
rumpuser_thread_exit(void)
{
pthread_exit(NULL);
}
int
rumpuser_thread_join(void *ptcookie)
{
pthread_t *pt = ptcookie;
int rv;
KLOCK_WRAP((rv = pthread_join(*pt, NULL)));
if (rv == 0)
free(pt);
ET(rv);
}
void
rumpuser_mutex_init(struct rumpuser_mtx **mtx, int flags)
{
pthread_mutexattr_t att;
NOFAIL(*mtx = malloc(sizeof(struct rumpuser_mtx)));
pthread_mutexattr_init(&att);
pthread_mutexattr_settype(&att, PTHREAD_MUTEX_ERRORCHECK);
NOFAIL_ERRNO(pthread_mutex_init(&((*mtx)->pthmtx), &att));
pthread_mutexattr_destroy(&att);
(*mtx)->owner = NULL;
assert(flags != 0);
(*mtx)->flags = flags;
}
static void
mtxenter(struct rumpuser_mtx *mtx)
{
if (!(mtx->flags & RUMPUSER_MTX_KMUTEX))
return;
assert(mtx->owner == NULL);
mtx->owner = rumpuser_curlwp();
}
static void
mtxexit(struct rumpuser_mtx *mtx)
{
if (!(mtx->flags & RUMPUSER_MTX_KMUTEX))
return;
assert(mtx->owner != NULL);
mtx->owner = NULL;
}
void
rumpuser_mutex_enter(struct rumpuser_mtx *mtx)
{
if (mtx->flags & RUMPUSER_MTX_SPIN) {
rumpuser_mutex_enter_nowrap(mtx);
return;
}
assert(mtx->flags & RUMPUSER_MTX_KMUTEX);
if (pthread_mutex_trylock(&mtx->pthmtx) != 0)
KLOCK_WRAP(NOFAIL_ERRNO(pthread_mutex_lock(&mtx->pthmtx)));
mtxenter(mtx);
}
void
rumpuser_mutex_enter_nowrap(struct rumpuser_mtx *mtx)
{
assert(mtx->flags & RUMPUSER_MTX_SPIN);
NOFAIL_ERRNO(pthread_mutex_lock(&mtx->pthmtx));
mtxenter(mtx);
}
int
rumpuser_mutex_tryenter(struct rumpuser_mtx *mtx)
{
int rv;
rv = pthread_mutex_trylock(&mtx->pthmtx);
if (rv == 0) {
mtxenter(mtx);
}
ET(rv);
}
void
rumpuser_mutex_exit(struct rumpuser_mtx *mtx)
{
mtxexit(mtx);
NOFAIL_ERRNO(pthread_mutex_unlock(&mtx->pthmtx));
}
void
rumpuser_mutex_destroy(struct rumpuser_mtx *mtx)
{
NOFAIL_ERRNO(pthread_mutex_destroy(&mtx->pthmtx));
free(mtx);
}
void
rumpuser_mutex_owner(struct rumpuser_mtx *mtx, struct lwp **lp)
{
if (__predict_false(!(mtx->flags & RUMPUSER_MTX_KMUTEX))) {
printf("panic: rumpuser_mutex_held unsupported on non-kmtx\n");
abort();
}
*lp = mtx->owner;
}
void
rumpuser_rw_init(struct rumpuser_rw **rw)
{
NOFAIL(*rw = malloc(sizeof(struct rumpuser_rw)));
NOFAIL_ERRNO(pthread_rwlock_init(&((*rw)->pthrw), NULL));
NOFAIL_ERRNO(pthread_spin_init(&((*rw)->spin),PTHREAD_PROCESS_PRIVATE));
(*rw)->readers = 0;
(*rw)->writer = NULL;
}
void
rumpuser_rw_enter(struct rumpuser_rw *rw, const enum rumprwlock lk)
{
switch (lk) {
case RUMPUSER_RW_WRITER:
if (pthread_rwlock_trywrlock(&rw->pthrw) != 0)
KLOCK_WRAP(NOFAIL_ERRNO(
pthread_rwlock_wrlock(&rw->pthrw)));
RURW_SETWRITE(rw);
break;
case RUMPUSER_RW_READER:
if (pthread_rwlock_tryrdlock(&rw->pthrw) != 0)
KLOCK_WRAP(NOFAIL_ERRNO(
pthread_rwlock_rdlock(&rw->pthrw)));
RURW_INCREAD(rw);
break;
}
}
int
rumpuser_rw_tryenter(struct rumpuser_rw *rw, const enum rumprwlock lk)
{
int rv;
switch (lk) {
case RUMPUSER_RW_WRITER:
rv = pthread_rwlock_trywrlock(&rw->pthrw);
if (rv == 0)
RURW_SETWRITE(rw);
break;
case RUMPUSER_RW_READER:
rv = pthread_rwlock_tryrdlock(&rw->pthrw);
if (rv == 0)
RURW_INCREAD(rw);
break;
default:
rv = EINVAL;
break;
}
ET(rv);
}
int
rumpuser_rw_tryupgrade(struct rumpuser_rw *rw)
{
/* not supported by pthreads */
ET(EBUSY);
}
void
rumpuser_rw_downgrade(struct rumpuser_rw *rw)
{
/*
* I guess this is not strictly speaking correct,
* but the option is to provide a complete implementation
* of rwlocks here, or at least wrap acquiry in 1) lock
* 2) check if someone is downgrading. if not, we're done
* 3) unlock 4) yield 5) goto 1.
*/
rumpuser_rw_exit(rw);
rumpuser_rw_enter(rw, RUMPUSER_RW_READER);
}
void
rumpuser_rw_exit(struct rumpuser_rw *rw)
{
if (RURW_HASREAD(rw))
RURW_DECREAD(rw);
else
RURW_CLRWRITE(rw);
NOFAIL_ERRNO(pthread_rwlock_unlock(&rw->pthrw));
}
void
rumpuser_rw_destroy(struct rumpuser_rw *rw)
{
NOFAIL_ERRNO(pthread_rwlock_destroy(&rw->pthrw));
NOFAIL_ERRNO(pthread_spin_destroy(&rw->spin));
free(rw);
}
void
rumpuser_rw_held(struct rumpuser_rw *rw, const enum rumprwlock lk, int *rv)
{
switch (lk) {
case RUMPUSER_RW_WRITER:
*rv = RURW_AMWRITER(rw);
break;
case RUMPUSER_RW_READER:
*rv = RURW_HASREAD(rw);
break;
}
}
void
rumpuser_cv_init(struct rumpuser_cv **cv)
{
NOFAIL(*cv = malloc(sizeof(struct rumpuser_cv)));
NOFAIL_ERRNO(pthread_cond_init(&((*cv)->pthcv), NULL));
(*cv)->nwaiters = 0;
}
void
rumpuser_cv_destroy(struct rumpuser_cv *cv)
{
NOFAIL_ERRNO(pthread_cond_destroy(&cv->pthcv));
free(cv);
}
static void
cv_unschedule(struct rumpuser_mtx *mtx, int *nlocks)
{
rumpkern_unsched(nlocks, mtx);
mtxexit(mtx);
}
static void
cv_reschedule(struct rumpuser_mtx *mtx, int nlocks)
{
/*
* If the cv interlock is a spin mutex, we must first release
* the mutex that was reacquired by pthread_cond_wait(),
* acquire the CPU context and only then relock the mutex.
* This is to preserve resource allocation order so that
* we don't deadlock. Non-spinning mutexes don't have this
* problem since they don't use a hold-and-wait approach
* to acquiring the mutex wrt the rump kernel CPU context.
*
* The more optimal solution would be to rework rumpkern_sched()
* so that it's possible to tell the scheduler
* "if you need to block, drop this lock first", but I'm not
* going poking there without some numbers on how often this
* path is taken for spin mutexes.
*/
if ((mtx->flags & (RUMPUSER_MTX_SPIN | RUMPUSER_MTX_KMUTEX)) ==
(RUMPUSER_MTX_SPIN | RUMPUSER_MTX_KMUTEX)) {
NOFAIL_ERRNO(pthread_mutex_unlock(&mtx->pthmtx));
rumpkern_sched(nlocks, mtx);
rumpuser_mutex_enter_nowrap(mtx);
} else {
mtxenter(mtx);
rumpkern_sched(nlocks, mtx);
}
}
void
rumpuser_cv_wait(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx)
{
int nlocks;
cv->nwaiters++;
cv_unschedule(mtx, &nlocks);
NOFAIL_ERRNO(pthread_cond_wait(&cv->pthcv, &mtx->pthmtx));
cv_reschedule(mtx, nlocks);
cv->nwaiters--;
}
void
rumpuser_cv_wait_nowrap(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx)
{
cv->nwaiters++;
mtxexit(mtx);
NOFAIL_ERRNO(pthread_cond_wait(&cv->pthcv, &mtx->pthmtx));
mtxenter(mtx);
cv->nwaiters--;
}
int
rumpuser_cv_timedwait(struct rumpuser_cv *cv, struct rumpuser_mtx *mtx,
int64_t sec, int64_t nsec)
{
struct timespec ts;
int rv, nlocks;
/*
* Get clock already here, just in case we will be put to sleep
* after releasing the kernel context.
*
* The condition variables should use CLOCK_MONOTONIC, but since
* that's not available everywhere, leave it for another day.
*/
clock_gettime(CLOCK_REALTIME, &ts);
cv->nwaiters++;
cv_unschedule(mtx, &nlocks);
ts.tv_sec += sec;
ts.tv_nsec += nsec;
if (ts.tv_nsec >= 1000*1000*1000) {
ts.tv_sec++;
ts.tv_nsec -= 1000*1000*1000;
}
rv = pthread_cond_timedwait(&cv->pthcv, &mtx->pthmtx, &ts);
cv_reschedule(mtx, nlocks);
cv->nwaiters--;
ET(rv);
}
void
rumpuser_cv_signal(struct rumpuser_cv *cv)
{
NOFAIL_ERRNO(pthread_cond_signal(&cv->pthcv));
}
void
rumpuser_cv_broadcast(struct rumpuser_cv *cv)
{
NOFAIL_ERRNO(pthread_cond_broadcast(&cv->pthcv));
}
void
rumpuser_cv_has_waiters(struct rumpuser_cv *cv, int *nwaiters)
{
*nwaiters = cv->nwaiters;
}
/*
* curlwp
*/
/*
* the if0'd curlwp implementation is not used by this hypervisor,
* but serves as test code to check that the intended usage works.
*/
#if 0
struct rumpuser_lwp {
struct lwp *l;
LIST_ENTRY(rumpuser_lwp) l_entries;
};
static LIST_HEAD(, rumpuser_lwp) lwps = LIST_HEAD_INITIALIZER(lwps);
static pthread_mutex_t lwplock = PTHREAD_MUTEX_INITIALIZER;
void
rumpuser_curlwpop(enum rumplwpop op, struct lwp *l)
{
struct rumpuser_lwp *rl, *rliter;
switch (op) {
case RUMPUSER_LWP_CREATE:
rl = malloc(sizeof(*rl));
rl->l = l;
pthread_mutex_lock(&lwplock);
LIST_FOREACH(rliter, &lwps, l_entries) {
if (rliter->l == l) {
fprintf(stderr, "LWP_CREATE: %p exists\n", l);
abort();
}
}
LIST_INSERT_HEAD(&lwps, rl, l_entries);
pthread_mutex_unlock(&lwplock);
break;
case RUMPUSER_LWP_DESTROY:
pthread_mutex_lock(&lwplock);
LIST_FOREACH(rl, &lwps, l_entries) {
if (rl->l == l)
break;
}
if (!rl) {
fprintf(stderr, "LWP_DESTROY: %p does not exist\n", l);
abort();
}
LIST_REMOVE(rl, l_entries);
pthread_mutex_unlock(&lwplock);
free(rl);
break;
case RUMPUSER_LWP_SET:
assert(pthread_getspecific(curlwpkey) == NULL || l == NULL);
if (l) {
pthread_mutex_lock(&lwplock);
LIST_FOREACH(rl, &lwps, l_entries) {
if (rl->l == l)
break;
}
if (!rl) {
fprintf(stderr,
"LWP_SET: %p does not exist\n", l);
abort();
}
pthread_mutex_unlock(&lwplock);
} else {
rl = NULL;
}
pthread_setspecific(curlwpkey, rl);
break;
}
}
struct lwp *
rumpuser_curlwp(void)
{
struct rumpuser_lwp *rl;
rl = pthread_getspecific(curlwpkey);
return rl ? rl->l : NULL;
}
#else
void
rumpuser_curlwpop(enum rumplwpop op, struct lwp *l)
{
switch (op) {
case RUMPUSER_LWP_CREATE:
break;
case RUMPUSER_LWP_DESTROY:
break;
case RUMPUSER_LWP_SET:
assert(pthread_getspecific(curlwpkey) == NULL || l == NULL);
pthread_setspecific(curlwpkey, l);
break;
}
}
struct lwp *
rumpuser_curlwp(void)
{
return pthread_getspecific(curlwpkey);
}
#endif