/* $NetBSD: kern_rwlock.c,v 1.13 2007/12/04 09:13:59 ad Exp $ */ /*- * Copyright (c) 2002, 2006, 2007 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe and Andrew Doran. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the NetBSD * Foundation, Inc. and its contributors. * 4. Neither the name of The NetBSD Foundation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Kernel reader/writer lock implementation, modeled after those * found in Solaris, a description of which can be found in: * * Solaris Internals: Core Kernel Architecture, Jim Mauro and * Richard McDougall. */ #include __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.13 2007/12/04 09:13:59 ad Exp $"); #include "opt_multiprocessor.h" #define __RWLOCK_PRIVATE #include #include #include #include #include #include #include #include #include /* * LOCKDEBUG */ #if defined(LOCKDEBUG) #define RW_WANTLOCK(rw, op) \ LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw), \ (uintptr_t)__builtin_return_address(0), op == RW_READER); #define RW_LOCKED(rw, op) \ LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw), \ (uintptr_t)__builtin_return_address(0), op == RW_READER); #define RW_UNLOCKED(rw, op) \ LOCKDEBUG_UNLOCKED(RW_DEBUG_P(rw), (rw), \ (uintptr_t)__builtin_return_address(0), op == RW_READER); #define RW_DASSERT(rw, cond) \ do { \ if (!(cond)) \ rw_abort(rw, __func__, "assertion failed: " #cond); \ } while (/* CONSTCOND */ 0); #else /* LOCKDEBUG */ #define RW_WANTLOCK(rw, op) /* nothing */ #define RW_LOCKED(rw, op) /* nothing */ #define RW_UNLOCKED(rw, op) /* nothing */ #define RW_DASSERT(rw, cond) /* nothing */ #endif /* LOCKDEBUG */ /* * DIAGNOSTIC */ #if defined(DIAGNOSTIC) #define RW_ASSERT(rw, cond) \ do { \ if (!(cond)) \ rw_abort(rw, __func__, "assertion failed: " #cond); \ } while (/* CONSTCOND */ 0) #else #define RW_ASSERT(rw, cond) /* nothing */ #endif /* DIAGNOSTIC */ /* * For platforms that use 'simple' RW locks. */ #ifdef __HAVE_SIMPLE_RW_LOCKS #define RW_ACQUIRE(rw, old, new) RW_CAS1(&(rw)->rw_owner, old, new) #define RW_RELEASE(rw, old, new) RW_CAS1(&(rw)->rw_owner, old, new) #define RW_SETDEBUG(rw, on) ((rw)->rw_owner |= (on) ? RW_DEBUG : 0) #define RW_DEBUG_P(rw) (((rw)->rw_owner & RW_DEBUG) != 0) #if defined(LOCKDEBUG) #define RW_INHERITDEBUG(new, old) (new) |= (old) & RW_DEBUG #else /* defined(LOCKDEBUG) */ #define RW_INHERITDEBUG(new, old) /* nothing */ #endif /* defined(LOCKDEBUG) */ static inline int RW_CAS1(volatile uintptr_t *ptr, uintptr_t old, uintptr_t new) { RW_INHERITDEBUG(new, old); return RW_CAS(ptr, old, new); } static inline int RW_SET_WAITERS(krwlock_t *rw, uintptr_t need, uintptr_t set) { uintptr_t old; if (((old = rw->rw_owner) & need) == 0) return 0; return RW_CAS(&rw->rw_owner, old, old | set); } #endif /* __HAVE_SIMPLE_RW_LOCKS */ /* * For platforms that do not provide stubs, or for the LOCKDEBUG case. */ #ifdef LOCKDEBUG #undef __HAVE_RW_STUBS #endif #ifndef __HAVE_RW_STUBS __strong_alias(rw_enter,rw_vector_enter); __strong_alias(rw_exit,rw_vector_exit); #endif static void rw_dump(volatile void *); static lwp_t *rw_owner(wchan_t); lockops_t rwlock_lockops = { "Reader / writer lock", 1, rw_dump }; syncobj_t rw_syncobj = { SOBJ_SLEEPQ_SORTED, turnstile_unsleep, turnstile_changepri, sleepq_lendpri, rw_owner, }; /* * rw_dump: * * Dump the contents of a rwlock structure. */ static void rw_dump(volatile void *cookie) { volatile krwlock_t *rw = cookie; printf_nolog("owner/count : %#018lx flags : %#018x\n", (long)RW_OWNER(rw), (int)RW_FLAGS(rw)); } /* * rw_abort: * * Dump information about an error and panic the system. This * generates a lot of machine code in the DIAGNOSTIC case, so * we ask the compiler to not inline it. */ #if __GNUC_PREREQ__(3, 0) __attribute ((noinline)) #endif static void rw_abort(krwlock_t *rw, const char *func, const char *msg) { if (panicstr != NULL) return; LOCKDEBUG_ABORT(rw, &rwlock_lockops, func, msg); } /* * rw_init: * * Initialize a rwlock for use. */ void rw_init(krwlock_t *rw) { bool dodebug; memset(rw, 0, sizeof(*rw)); dodebug = LOCKDEBUG_ALLOC(rw, &rwlock_lockops, (uintptr_t)__builtin_return_address(0)); RW_SETDEBUG(rw, dodebug); } /* * rw_destroy: * * Tear down a rwlock. */ void rw_destroy(krwlock_t *rw) { RW_ASSERT(rw, (rw->rw_owner & ~RW_DEBUG) == 0); LOCKDEBUG_FREE(RW_DEBUG_P(rw), rw); } /* * rw_vector_enter: * * Acquire a rwlock. */ void rw_vector_enter(krwlock_t *rw, const krw_t op) { uintptr_t owner, incr, need_wait, set_wait, curthread; turnstile_t *ts; int queue; lwp_t *l; LOCKSTAT_TIMER(slptime); LOCKSTAT_FLAG(lsflag); l = curlwp; curthread = (uintptr_t)l; RW_ASSERT(rw, !cpu_intr_p()); RW_ASSERT(rw, curthread != 0); RW_WANTLOCK(rw, op); if (panicstr == NULL) { LOCKDEBUG_BARRIER(&kernel_lock, 1); } /* * We play a slight trick here. If we're a reader, we want * increment the read count. If we're a writer, we want to * set the owner field and whe WRITE_LOCKED bit. * * In the latter case, we expect those bits to be zero, * therefore we can use an add operation to set them, which * means an add operation for both cases. */ if (__predict_true(op == RW_READER)) { incr = RW_READ_INCR; set_wait = RW_HAS_WAITERS; need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED; queue = TS_READER_Q; } else { RW_DASSERT(rw, op == RW_WRITER); incr = curthread | RW_WRITE_LOCKED; set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED; need_wait = RW_WRITE_LOCKED | RW_THREAD; queue = TS_WRITER_Q; } LOCKSTAT_ENTER(lsflag); for (;;) { /* * Read the lock owner field. If the need-to-wait * indicator is clear, then try to acquire the lock. */ owner = rw->rw_owner; if ((owner & need_wait) == 0) { if (RW_ACQUIRE(rw, owner, owner + incr)) { /* Got it! */ break; } /* * Didn't get it -- spin around again (we'll * probably sleep on the next iteration). */ continue; } if (panicstr != NULL) return; if (RW_OWNER(rw) == curthread) rw_abort(rw, __func__, "locking against myself"); /* * Grab the turnstile chain lock. Once we have that, we * can adjust the waiter bits and sleep queue. */ ts = turnstile_lookup(rw); /* * Mark the rwlock as having waiters. If the set fails, * then we may not need to sleep and should spin again. */ if (!RW_SET_WAITERS(rw, need_wait, set_wait)) { turnstile_exit(rw); continue; } LOCKSTAT_START_TIMER(lsflag, slptime); turnstile_block(ts, queue, rw, &rw_syncobj); /* If we wake up and arrive here, we've been handed the lock. */ RW_RECEIVE(rw); LOCKSTAT_STOP_TIMER(lsflag, slptime); LOCKSTAT_EVENT(lsflag, rw, LB_RWLOCK | (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), 1, slptime); break; } LOCKSTAT_EXIT(lsflag); RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) || (op == RW_READER && RW_COUNT(rw) != 0)); RW_LOCKED(rw, op); } /* * rw_vector_exit: * * Release a rwlock. */ void rw_vector_exit(krwlock_t *rw) { uintptr_t curthread, owner, decr, new; turnstile_t *ts; int rcnt, wcnt; lwp_t *l; curthread = (uintptr_t)curlwp; RW_ASSERT(rw, curthread != 0); if (panicstr != NULL) return; /* * Again, we use a trick. Since we used an add operation to * set the required lock bits, we can use a subtract to clear * them, which makes the read-release and write-release path * the same. */ owner = rw->rw_owner; if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) { RW_UNLOCKED(rw, RW_WRITER); RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0); RW_ASSERT(rw, RW_OWNER(rw) == curthread); decr = curthread | RW_WRITE_LOCKED; } else { RW_UNLOCKED(rw, RW_READER); RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0); RW_ASSERT(rw, RW_COUNT(rw) != 0); decr = RW_READ_INCR; } /* * Compute what we expect the new value of the lock to be. Only * proceed to do direct handoff if there are waiters, and if the * lock would become unowned. */ for (;; owner = rw->rw_owner) { new = (owner - decr); if ((new & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS) break; if (RW_RELEASE(rw, owner, new)) return; } for (;;) { /* * Grab the turnstile chain lock. This gets the interlock * on the sleep queue. Once we have that, we can adjust the * waiter bits. */ ts = turnstile_lookup(rw); RW_DASSERT(rw, ts != NULL); RW_DASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0); owner = rw->rw_owner; wcnt = TS_WAITERS(ts, TS_WRITER_Q); rcnt = TS_WAITERS(ts, TS_READER_Q); /* * Give the lock away. * * If we are releasing a write lock, then wake all * outstanding readers. If we are releasing a read * lock, then wake one writer. */ if (rcnt == 0 || (decr == RW_READ_INCR && wcnt != 0)) { RW_DASSERT(rw, wcnt != 0); RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0); /* * Give the lock to the longest waiting * writer. */ l = TS_FIRST(ts, TS_WRITER_Q); new = (uintptr_t)l | RW_WRITE_LOCKED; if (wcnt > 1) new |= RW_HAS_WAITERS | RW_WRITE_WANTED; else if (rcnt != 0) new |= RW_HAS_WAITERS; RW_GIVE(rw); if (!RW_RELEASE(rw, owner, new)) { /* Oops, try again. */ turnstile_exit(rw); continue; } /* Wake the writer. */ turnstile_wakeup(ts, TS_WRITER_Q, 1, l); } else { RW_DASSERT(rw, rcnt != 0); /* * Give the lock to all blocked readers. If there * is a writer waiting, new readers that arrive * after the release will be blocked out. */ new = rcnt << RW_READ_COUNT_SHIFT; if (wcnt != 0) new |= RW_HAS_WAITERS | RW_WRITE_WANTED; RW_GIVE(rw); if (!RW_RELEASE(rw, owner, new)) { /* Oops, try again. */ turnstile_exit(rw); continue; } /* Wake up all sleeping readers. */ turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL); } break; } } /* * rw_tryenter: * * Try to acquire a rwlock. */ int rw_tryenter(krwlock_t *rw, const krw_t op) { uintptr_t curthread, owner, incr, need_wait; curthread = (uintptr_t)curlwp; RW_ASSERT(rw, curthread != 0); RW_WANTLOCK(rw, op); if (op == RW_READER) { incr = RW_READ_INCR; need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED; } else { RW_DASSERT(rw, op == RW_WRITER); incr = curthread | RW_WRITE_LOCKED; need_wait = RW_WRITE_LOCKED | RW_THREAD; } for (;;) { owner = rw->rw_owner; if ((owner & need_wait) == 0) { if (RW_ACQUIRE(rw, owner, owner + incr)) { /* Got it! */ break; } continue; } return 0; } RW_LOCKED(rw, op); RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) || (op == RW_READER && RW_COUNT(rw) != 0)); return 1; } /* * rw_downgrade: * * Downgrade a write lock to a read lock. */ void rw_downgrade(krwlock_t *rw) { uintptr_t owner, curthread, new; turnstile_t *ts; int rcnt, wcnt; curthread = (uintptr_t)curlwp; RW_ASSERT(rw, curthread != 0); RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0); RW_ASSERT(rw, RW_OWNER(rw) == curthread); RW_UNLOCKED(rw, RW_WRITER); owner = rw->rw_owner; if ((owner & RW_HAS_WAITERS) == 0) { /* * There are no waiters, so we can do this the easy way. * Try swapping us down to one read hold. If it fails, the * lock condition has changed and we most likely now have * waiters. */ if (RW_RELEASE(rw, owner, RW_READ_INCR)) { RW_LOCKED(rw, RW_READER); RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0); RW_DASSERT(rw, RW_COUNT(rw) != 0); return; } } /* * Grab the turnstile chain lock. This gets the interlock * on the sleep queue. Once we have that, we can adjust the * waiter bits. */ for (;;) { ts = turnstile_lookup(rw); RW_DASSERT(rw, ts != NULL); owner = rw->rw_owner; rcnt = TS_WAITERS(ts, TS_READER_Q); wcnt = TS_WAITERS(ts, TS_WRITER_Q); /* * If there are no readers, just preserve the waiters * bits, swap us down to one read hold and return. */ if (rcnt == 0) { RW_DASSERT(rw, wcnt != 0); RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0); RW_DASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0); new = RW_READ_INCR | RW_HAS_WAITERS | RW_WRITE_WANTED; if (!RW_RELEASE(rw, owner, new)) { /* Oops, try again. */ turnstile_exit(ts); continue; } break; } /* * Give the lock to all blocked readers. We may * retain one read hold if downgrading. If there * is a writer waiting, new readers will be blocked * out. */ new = (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR; if (wcnt != 0) new |= RW_HAS_WAITERS | RW_WRITE_WANTED; RW_GIVE(rw); if (!RW_RELEASE(rw, owner, new)) { /* Oops, try again. */ turnstile_exit(rw); continue; } /* Wake up all sleeping readers. */ turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL); break; } RW_LOCKED(rw, RW_READER); RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0); RW_DASSERT(rw, RW_COUNT(rw) != 0); } /* * rw_tryupgrade: * * Try to upgrade a read lock to a write lock. We must be the * only reader. */ int rw_tryupgrade(krwlock_t *rw) { uintptr_t owner, curthread, new; curthread = (uintptr_t)curlwp; RW_ASSERT(rw, curthread != 0); RW_WANTLOCK(rw, RW_WRITER); for (;;) { owner = rw->rw_owner; RW_ASSERT(rw, (owner & RW_WRITE_LOCKED) == 0); if ((owner & RW_THREAD) != RW_READ_INCR) { RW_ASSERT(rw, (owner & RW_THREAD) != 0); return 0; } new = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD); if (RW_ACQUIRE(rw, owner, new)) break; } RW_UNLOCKED(rw, RW_READER); RW_LOCKED(rw, RW_WRITER); RW_DASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED); RW_DASSERT(rw, RW_OWNER(rw) == curthread); return 1; } /* * rw_read_held: * * Returns true if the rwlock is held for reading. Must only be * used for diagnostic assertions, and never be used to make * decisions about how to use a rwlock. */ int rw_read_held(krwlock_t *rw) { uintptr_t owner; if (panicstr != NULL) return 1; owner = rw->rw_owner; return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0; } /* * rw_write_held: * * Returns true if the rwlock is held for writing. Must only be * used for diagnostic assertions, and never be used to make * decisions about how to use a rwlock. */ int rw_write_held(krwlock_t *rw) { if (panicstr != NULL) return 1; return (rw->rw_owner & RW_WRITE_LOCKED) != 0; } /* * rw_lock_held: * * Returns true if the rwlock is held for reading or writing. Must * only be used for diagnostic assertions, and never be used to make * decisions about how to use a rwlock. */ int rw_lock_held(krwlock_t *rw) { if (panicstr != NULL) return 1; return (rw->rw_owner & RW_THREAD) != 0; } /* * rw_owner: * * Return the current owner of an RW lock, but only if it is write * held. Used for priority inheritance. */ static lwp_t * rw_owner(wchan_t obj) { krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */ uintptr_t owner = rw->rw_owner; if ((owner & RW_WRITE_LOCKED) == 0) return NULL; return (void *)(owner & RW_THREAD); }