diff --git a/sys/arch/acorn32/mainbus/fd.c b/sys/arch/acorn32/mainbus/fd.c
index d34f23df5a5f..42379dad0f08 100644
--- a/sys/arch/acorn32/mainbus/fd.c
+++ b/sys/arch/acorn32/mainbus/fd.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: fd.c,v 1.34 2007/10/25 12:48:11 yamt Exp $	*/
+/*	$NetBSD: fd.c,v 1.35 2008/01/02 11:48:20 ad Exp $	*/
 
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
@@ -89,7 +89,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fd.c,v 1.34 2007/10/25 12:48:11 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fd.c,v 1.35 2008/01/02 11:48:20 ad Exp $");
 
 #include "opt_ddb.h"
 
@@ -1513,11 +1513,11 @@ fdformat(dev, finfo, l)
 	struct buf *bp;
 
 	/* set up a buffer header for fdstrategy() */
-	bp = (struct buf *)malloc(sizeof(struct buf), M_TEMP, M_NOWAIT);
+	bp = getiobuf(NULL, false);
 	if(bp == 0)
 		return ENOBUFS;
-	memset((void *)bp, 0, sizeof(struct buf));
-	bp->b_flags = B_BUSY | B_PHYS | B_FORMAT;
+	bp->b_flags = B_PHYS | B_FORMAT;
+	bp->b_cflags |= BC_BUSY;
 	bp->b_proc = l->l_proc;
 	bp->b_dev = dev;
 
@@ -1540,21 +1540,22 @@ fdformat(dev, finfo, l)
 	fdstrategy(bp);
 
 	/* ...and wait for it to complete */
-	s = splbio();
-	while(!(bp->b_flags & B_DONE)) {
-		rv = tsleep((void *)bp, PRIBIO, "fdform", 20 * hz);
+	/* XXX very dodgy */
+	mutex_enter(bp->b_objlock);
+	while (!(bp->b_oflags & BO_DONE)) {
+		rv = cv_timedwait(&bp->b_done, 20 * hz);
 		if (rv == EWOULDBLOCK)
 			break;
 	}
-	splx(s);
-       
+	mutex_exit(bp->b_objlock);
+
 	if (rv == EWOULDBLOCK) {
 		/* timed out */
 		rv = EIO;
 		biodone(bp);
 	} else if (bp->b_error != 0)
 		rv = bp->b_error;
-	free(bp, M_TEMP);
+	putiobuf(bp);
 	return rv;
 }
 
diff --git a/sys/arch/algor/algor/disksubr.c b/sys/arch/algor/algor/disksubr.c
index d9effccf1115..198f56c9305b 100644
--- a/sys/arch/algor/algor/disksubr.c
+++ b/sys/arch/algor/algor/disksubr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: disksubr.c,v 1.16 2007/10/17 19:52:54 garbled Exp $	*/
+/*	$NetBSD: disksubr.c,v 1.17 2008/01/02 11:48:20 ad Exp $	*/
 
 /*
  * Copyright (c) 1994, 1995, 1996 Carnegie-Mellon University.
@@ -29,7 +29,7 @@
 
 #include <sys/cdefs.h>			/* RCS ID & Copyright macro defns */
 
-__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.16 2007/10/17 19:52:54 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.17 2008/01/02 11:48:20 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/buf.h>
@@ -105,7 +105,7 @@ readdisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp,
 		i = 0;
 		do {
 			/* read a bad sector table */
-			bp->b_flags &= ~(B_DONE);
+			bp->b_oflags &= ~(BO_DONE);
 			bp->b_flags |= B_READ;
 			bp->b_blkno = lp->d_secperunit - lp->d_nsectors + i;
 			if (lp->d_secsize > DEV_BSIZE)
@@ -219,7 +219,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp,
 	dlp = (struct disklabel *)((char*)bp->b_data + LABELOFFSET);
 	*dlp = *lp;     /* struct assignment */
 
-	bp->b_flags &= ~(B_READ|B_DONE);
+	bp->b_oflags &= ~(BO_DONE);
+	bp->b_flags &= ~(B_READ);
 	bp->b_flags |= B_WRITE;
 	(*strat)(bp);
 	error = biowait(bp);
diff --git a/sys/arch/alpha/alpha/disksubr.c b/sys/arch/alpha/alpha/disksubr.c
index 597bf76363d1..7d5cf1633f33 100644
--- a/sys/arch/alpha/alpha/disksubr.c
+++ b/sys/arch/alpha/alpha/disksubr.c
@@ -1,4 +1,4 @@
-/* $NetBSD: disksubr.c,v 1.35 2007/10/17 19:52:55 garbled Exp $ */
+/* $NetBSD: disksubr.c,v 1.36 2008/01/02 11:48:21 ad Exp $ */
 
 /*
  * Copyright (c) 1994, 1995, 1996 Carnegie-Mellon University.
@@ -29,7 +29,7 @@
 
 #include <sys/cdefs.h>			/* RCS ID & Copyright macro defns */
 
-__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.35 2007/10/17 19:52:55 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.36 2008/01/02 11:48:21 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -110,7 +110,7 @@ readdisklabel(dev, strat, lp, clp)
 		i = 0;
 		do {
 			/* read a bad sector table */
-			bp->b_flags &= ~(B_DONE);
+			bp->b_oflags &= ~BO_DONE;
 			bp->b_flags |= B_READ;
 			bp->b_blkno = lp->d_secperunit - lp->d_nsectors + i;
 			if (lp->d_secsize > DEV_BSIZE)
@@ -244,8 +244,9 @@ writedisklabel(dev, strat, lp, clp)
 		dp[63] = sum;
 	}
 
-	bp->b_flags &= ~(B_READ|B_DONE);
+	bp->b_flags &= ~B_READ;
 	bp->b_flags |= B_WRITE;
+	bp->b_oflags &= ~BO_DONE;
 	(*strat)(bp);
 	error = biowait(bp);
 
diff --git a/sys/arch/alpha/alpha/pmap.c b/sys/arch/alpha/alpha/pmap.c
index 7b17e921c021..04079b405538 100644
--- a/sys/arch/alpha/alpha/pmap.c
+++ b/sys/arch/alpha/alpha/pmap.c
@@ -1,7 +1,7 @@
-/* $NetBSD: pmap.c,v 1.228 2007/11/07 00:23:14 ad Exp $ */
+/* $NetBSD: pmap.c,v 1.229 2008/01/02 11:48:21 ad Exp $ */
 
 /*-
- * Copyright (c) 1998, 1999, 2000, 2001 The NetBSD Foundation, Inc.
+ * Copyright (c) 1998, 1999, 2000, 2001, 2007 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -145,7 +145,7 @@
 
 #include <sys/cdefs.h>			/* RCS ID & Copyright macro defns */
 
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.228 2007/11/07 00:23:14 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.229 2008/01/02 11:48:21 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -154,14 +154,12 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.228 2007/11/07 00:23:14 ad Exp $");
 #include <sys/pool.h>
 #include <sys/user.h>
 #include <sys/buf.h>
-#ifdef SYSVSHM
 #include <sys/shm.h>
-#endif
+#include <sys/atomic.h>
+#include <sys/cpu.h>
 
 #include <uvm/uvm.h>
 
-#include <machine/atomic.h>
-#include <machine/cpu.h>
 #if defined(_PMAP_MAY_USE_PROM_CONSOLE) || defined(MULTIPROCESSOR)
 #include <machine/rpb.h>
 #endif
@@ -326,11 +324,8 @@ static struct pmap_asn_info pmap_asn_info[ALPHA_MAXPROCS];
 /*
  * Locking:
  *
- *	This pmap module uses two types of locks: `normal' (sleep)
- *	locks and `simple' (spin) locks.  They are used as follows:
- *
- *	READ/WRITE SPIN LOCKS
- *	---------------------
+ *	READ/WRITE LOCKS
+ *	----------------
  *
  *	* pmap_main_lock - This lock is used to prevent deadlock and/or
  *	  provide mutex access to the pmap module.  Most operations lock
@@ -343,10 +338,10 @@ static struct pmap_asn_info pmap_asn_info[ALPHA_MAXPROCS];
  *	  the PV->pmap direction.  Since only one thread can hold a write
  *	  lock at a time, this provides the mutex.
  *
- *	SIMPLE LOCKS
- *	------------
+ *	MUTEXES
+ *	-------
  *
- *	* pm_slock (per-pmap) - This lock protects all of the members
+ *	* pm_lock (per-pmap) - This lock protects all of the members
  *	  of the pmap structure itself.  This lock will be asserted
  *	  in pmap_activate() and pmap_deactivate() from a critical
  *	  section of mi_switch(), and must never sleep.  Note that
@@ -354,27 +349,27 @@ static struct pmap_asn_info pmap_asn_info[ALPHA_MAXPROCS];
  *	  memory allocation *must* be blocked while this lock is
  *	  asserted.
  *
- *	* pvh_slock (per-vm_page) - This lock protects the PV list
+ *	* pvh_lock (per-vm_page) - This lock protects the PV list
  *	  for a specified managed page.
  *
- *	* pmap_all_pmaps_slock - This lock protects the global list of
- *	  all pmaps.  Note that a pm_slock must never be held while this
+ *	* pmap_all_pmaps_lock - This lock protects the global list of
+ *	  all pmaps.  Note that a pm_lock must never be held while this
  *	  lock is held.
  *
- *	* pmap_growkernel_slock - This lock protects pmap_growkernel()
+ *	* pmap_growkernel_lock - This lock protects pmap_growkernel()
  *	  and the virtual_end variable.
  *
- *	  There is a lock ordering constraint for pmap_growkernel_slock.
+ *	  There is a lock ordering constraint for pmap_growkernel_lock.
  *	  pmap_growkernel() acquires the locks in the following order:
  *
- *		pmap_growkernel_slock -> pmap_all_pmaps_slock ->
- *		    pmap->pm_slock
+ *		pmap_growkernel_lock -> pmap_all_pmaps_lock ->
+ *		    pmap->pm_lock
  *
- *	  But pmap_lev1map_create() is called with pmap->pm_slock held,
- *	  and also needs to acquire the pmap_growkernel_slock.  So,
+ *	  But pmap_lev1map_create() is called with pmap->pm_lock held,
+ *	  and also needs to acquire the pmap_growkernel_lock.  So,
  *	  we require that the caller of pmap_lev1map_create() (currently,
- *	  the only caller is pmap_enter()) acquire pmap_growkernel_slock
- *	  before acquring pmap->pm_slock.
+ *	  the only caller is pmap_enter()) acquire pmap_growkernel_lock
+ *	  before acquring pmap->pm_lock.
  *
  *	Address space number management (global ASN counters and per-pmap
  *	ASN state) are not locked; they use arrays of values indexed
@@ -384,14 +379,14 @@ static struct pmap_asn_info pmap_asn_info[ALPHA_MAXPROCS];
  *	with the pmap already locked by the caller (which will be
  *	an interface function).
  */
-/* static struct lock pmap_main_lock; */ 
-static struct simplelock pmap_all_pmaps_slock;
-static struct simplelock pmap_growkernel_slock;
+static krwlock_t pmap_main_lock;
+static kmutex_t pmap_all_pmaps_lock;
+static kmutex_t pmap_growkernel_lock;
 
-#define	PMAP_MAP_TO_HEAD_LOCK()		/* nothing */
-#define	PMAP_MAP_TO_HEAD_UNLOCK()	/* nothing */
-#define	PMAP_HEAD_TO_MAP_LOCK()		/* nothing */
-#define	PMAP_HEAD_TO_MAP_UNLOCK()	/* nothing */
+#define	PMAP_MAP_TO_HEAD_LOCK()		rw_enter(&pmap_main_lock, RW_READER)
+#define	PMAP_MAP_TO_HEAD_UNLOCK()	rw_exit(&pmap_main_lock)
+#define	PMAP_HEAD_TO_MAP_LOCK()		rw_enter(&pmap_main_lock, RW_WRITER)
+#define	PMAP_HEAD_TO_MAP_UNLOCK()	rw_exit(&pmap_main_lock)
 
 #if defined(MULTIPROCESSOR)
 /*
@@ -421,21 +416,9 @@ static struct pmap_tlb_shootdown_q {
 	int pq_pte;			/* aggregate PTE bits */
 	int pq_count;			/* number of pending requests */
 	int pq_tbia;			/* pending global flush */
-	struct simplelock pq_slock;	/* spin lock on queue */
+	kmutex_t pq_lock;		/* spin lock on queue */
 } pmap_tlb_shootdown_q[ALPHA_MAXPROCS];
 
-#define	PSJQ_LOCK(pq, s)						\
-do {									\
-	s = splvm();							\
-	simple_lock(&(pq)->pq_slock);					\
-} while (/*CONSTCOND*/0)
-
-#define	PSJQ_UNLOCK(pq, s)						\
-do {									\
-	simple_unlock(&(pq)->pq_slock);					\
-	splx(s);							\
-} while (/*CONSTCOND*/0)
-
 /* If we have more pending jobs than this, we just nail the whole TLB. */
 #define	PMAP_TLB_SHOOTDOWN_MAXJOBS	6
 
@@ -511,16 +494,6 @@ static void	pmap_physpage_free(paddr_t);
 static int	pmap_physpage_addref(void *);
 static int	pmap_physpage_delref(void *);
 
-/*
- * Define PMAP_NO_LAZY_LEV1MAP in order to have a lev1map allocated
- * in pmap_create(), rather than when the first mapping is entered.
- * This causes pmaps to use an extra page of memory if no mappings
- * are entered in them, but in practice this is probably not going
- * to be a problem, and it allows us to avoid locking pmaps in
- * pmap_activate().
- */
-#define	PMAP_NO_LAZY_LEV1MAP
-
 /*
  * PMAP_ISACTIVE{,_TEST}:
  *
@@ -919,8 +892,8 @@ pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids)
 		    (i*PAGE_SIZE*NPTEPG))] = pte;
 	}
 
-	/* Initialize the pmap_growkernel_slock. */
-	simple_lock_init(&pmap_growkernel_slock);
+	/* Initialize the pmap_growkernel_lock. */
+	mutex_init(&pmap_growkernel_lock, MUTEX_DEFAULT, IPL_NONE);
 
 	/*
 	 * Set up level three page table (lev3map)
@@ -952,8 +925,8 @@ pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids)
 	/*
 	 * Initialize the locks.
 	 */
-	/* spinlockinit(&pmap_main_lock, "pmaplk", 0); */
-	simple_lock_init(&pmap_all_pmaps_slock);
+	rw_init(&pmap_main_lock);
+	mutex_init(&pmap_all_pmaps_lock, MUTEX_DEFAULT, IPL_NONE);
 
 	/*
 	 * Initialize kernel pmap.  Note that all kernel mappings
@@ -970,7 +943,7 @@ pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids)
 		pmap_kernel()->pm_asni[i].pma_asngen =
 		    pmap_asn_info[i].pma_asngen;
 	}
-	simple_lock_init(&pmap_kernel()->pm_slock);
+	mutex_init(&pmap_kernel()->pm_lock, MUTEX_DEFAULT, IPL_NONE);
 	TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap_kernel(), pm_list);
 
 #if defined(MULTIPROCESSOR)
@@ -982,7 +955,8 @@ pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids)
 	    IPL_VM);
 	for (i = 0; i < ALPHA_MAXPROCS; i++) {
 		TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_head);
-		simple_lock_init(&pmap_tlb_shootdown_q[i].pq_slock);
+		mutex_init(&pmap_tlb_shootdown_q[i].pq_lock, MUTEX_DEFAULT,
+		    IPL_VM);
 	}
 #endif
 
@@ -998,7 +972,7 @@ pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids)
 	/*
 	 * Mark the kernel pmap `active' on this processor.
 	 */
-	atomic_setbits_ulong(&pmap_kernel()->pm_cpus,
+	atomic_or_ulong(&pmap_kernel()->pm_cpus,
 	    (1UL << cpu_number()));
 }
 
@@ -1197,16 +1171,14 @@ pmap_create(void)
 		/* XXX Locking? */
 		pmap->pm_asni[i].pma_asngen = pmap_asn_info[i].pma_asngen;
 	}
-	simple_lock_init(&pmap->pm_slock);
+	mutex_init(&pmap->pm_lock, MUTEX_DEFAULT, IPL_NONE);
 
-	simple_lock(&pmap_all_pmaps_slock);
+	mutex_enter(&pmap_all_pmaps_lock);
 	TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap, pm_list);
-	simple_unlock(&pmap_all_pmaps_slock);
+	mutex_exit(&pmap_all_pmaps_lock);
 
-#ifdef PMAP_NO_LAZY_LEV1MAP
 	i = pmap_lev1map_create(pmap, cpu_number());
 	KASSERT(i == 0);
-#endif
 
 	return (pmap);
 }
@@ -1220,30 +1192,23 @@ pmap_create(void)
 void
 pmap_destroy(pmap_t pmap)
 {
-	int refs;
 
 #ifdef DEBUG
 	if (pmapdebug & PDB_FOLLOW)
 		printf("pmap_destroy(%p)\n", pmap);
 #endif
 
-	PMAP_LOCK(pmap);
-	refs = --pmap->pm_count;
-	PMAP_UNLOCK(pmap);
-
-	if (refs > 0)
+	if (atomic_dec_uint_nv(&pmap->pm_count) > 0)
 		return;
 
 	/*
 	 * Remove it from the global list of all pmaps.
 	 */
-	simple_lock(&pmap_all_pmaps_slock);
+	mutex_enter(&pmap_all_pmaps_lock);
 	TAILQ_REMOVE(&pmap_all_pmaps, pmap, pm_list);
-	simple_unlock(&pmap_all_pmaps_slock);
+	mutex_exit(&pmap_all_pmaps_lock);
 
-#ifdef PMAP_NO_LAZY_LEV1MAP
 	pmap_lev1map_destroy(pmap, cpu_number());
-#endif
 
 	/*
 	 * Since the pmap is supposed to contain no valid
@@ -1252,6 +1217,7 @@ pmap_destroy(pmap_t pmap)
 	 */
 	KASSERT(pmap->pm_lev1map == kernel_lev1map);
 
+	mutex_destroy(&pmap->pm_lock);
 	pool_cache_put(&pmap_pmap_cache, pmap);
 }
 
@@ -1269,9 +1235,7 @@ pmap_reference(pmap_t pmap)
 		printf("pmap_reference(%p)\n", pmap);
 #endif
 
-	PMAP_LOCK(pmap);
-	pmap->pm_count++;
-	PMAP_UNLOCK(pmap);
+	atomic_inc_uint(&pmap->pm_count);
 }
 
 /*
@@ -1486,7 +1450,7 @@ pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
 	case VM_PROT_READ|VM_PROT_EXECUTE:
 	case VM_PROT_READ:
 		PMAP_HEAD_TO_MAP_LOCK();
-		simple_lock(&pg->mdpage.pvh_slock);
+		mutex_enter(&pg->mdpage.pvh_lock);
 		for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next) {
 			PMAP_LOCK(pv->pv_pmap);
 			if (*pv->pv_pte & (PG_KWE | PG_UWE)) {
@@ -1499,7 +1463,7 @@ pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
 			}
 			PMAP_UNLOCK(pv->pv_pmap);
 		}
-		simple_unlock(&pg->mdpage.pvh_slock);
+		mutex_exit(&pg->mdpage.pvh_lock);
 		PMAP_HEAD_TO_MAP_UNLOCK();
 		PMAP_TLB_SHOOTNOW();
 		return;
@@ -1510,7 +1474,7 @@ pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
 	}
 
 	PMAP_HEAD_TO_MAP_LOCK();
-	simple_lock(&pg->mdpage.pvh_slock);
+	mutex_enter(&pg->mdpage.pvh_lock);
 	for (pv = pg->mdpage.pvh_list; pv != NULL; pv = nextpv) {
 		nextpv = pv->pv_next;
 		pmap = pv->pv_pmap;
@@ -1534,7 +1498,7 @@ pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
 	if (needkisync)
 		PMAP_SYNC_ISTREAM_KERNEL();
 
-	simple_unlock(&pg->mdpage.pvh_slock);
+	mutex_exit(&pg->mdpage.pvh_lock);
 	PMAP_HEAD_TO_MAP_UNLOCK();
 }
 
@@ -1688,40 +1652,7 @@ pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
 			panic("pmap_enter: user pmap, invalid va 0x%lx", va);
 #endif
 
-#ifdef PMAP_NO_LAZY_LEV1MAP
 		KASSERT(pmap->pm_lev1map != kernel_lev1map);
-#else
-		/*
-		 * If we're still referencing the kernel kernel_lev1map,
-		 * create a new level 1 page table.  A reference will be
-		 * added to the level 1 table when the level 2 table is
-		 * created.
-		 */
-		if (pmap->pm_lev1map == kernel_lev1map) {
-			/*
-			 * XXX Yuck.
-			 * We have to unlock the pmap, lock the
-			 * pmap_growkernel_slock, and re-lock the
-			 * pmap here, in order to avoid a deadlock
-			 * with pmap_growkernel().
-			 *
-			 * Because we unlock, we have a window for
-			 * someone else to add a mapping, thus creating
-			 * a level 1 map; pmap_lev1map_create() checks
-			 * for this condition.
-			 */
-			PMAP_UNLOCK(pmap);
-			simple_lock(&pmap_growkernel_slock);
-			PMAP_LOCK(pmap);
-			error = pmap_lev1map_create(pmap, cpu_id);
-			simple_unlock(&pmap_growkernel_slock);
-			if (error) {
-				if (flags & PMAP_CANFAIL)
-					goto out;
-				panic("pmap_enter: unable to create lev1map");
-			}
-		}
-#endif /* PMAP_NO_LAZY_LEV1MAP */
 
 		/*
 		 * Check to see if the level 1 PTE is valid, and
@@ -1890,13 +1821,13 @@ pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
 		if ((flags & VM_PROT_ALL) & ~prot)
 			panic("pmap_enter: access type exceeds prot");
 #endif
-		simple_lock(&pg->mdpage.pvh_slock);
+		mutex_enter(&pg->mdpage.pvh_lock);
 		if (flags & VM_PROT_WRITE)
 			pg->mdpage.pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED);
 		else if (flags & VM_PROT_ALL)
 			pg->mdpage.pvh_attrs |= PGA_REFERENCED;
 		attrs = pg->mdpage.pvh_attrs;
-		simple_unlock(&pg->mdpage.pvh_slock);
+		mutex_exit(&pg->mdpage.pvh_lock);
 
 		/*
 		 * Set up referenced/modified emulation for new mapping.
@@ -2258,21 +2189,13 @@ pmap_activate(struct lwp *l)
 		printf("pmap_activate(%p)\n", l);
 #endif
 
-#ifndef PMAP_NO_LAZY_LEV1MAP
-	PMAP_LOCK(pmap);
-#endif
-
 	/* Mark the pmap in use by this processor. */
-	atomic_setbits_ulong(&pmap->pm_cpus, (1UL << cpu_id));
+	atomic_or_ulong(&pmap->pm_cpus, (1UL << cpu_id));
 
 	/* Allocate an ASN. */
 	pmap_asn_alloc(pmap, cpu_id);
 
 	PMAP_ACTIVATE(pmap, l, cpu_id);
-
-#ifndef PMAP_NO_LAZY_LEV1MAP
-	PMAP_UNLOCK(pmap);
-#endif
 }
 
 /*
@@ -2298,7 +2221,7 @@ pmap_deactivate(struct lwp *l)
 	/*
 	 * Mark the pmap no longer in use by this processor.
 	 */
-	atomic_clearbits_ulong(&pmap->pm_cpus, (1UL << cpu_number()));
+	atomic_and_ulong(&pmap->pm_cpus, ~(1UL << cpu_number()));
 }
 
 #if defined(MULTIPROCESSOR)
@@ -2448,7 +2371,7 @@ pmap_clear_modify(struct vm_page *pg)
 #endif
 
 	PMAP_HEAD_TO_MAP_LOCK();
-	simple_lock(&pg->mdpage.pvh_slock);
+	mutex_enter(&pg->mdpage.pvh_lock);
 
 	if (pg->mdpage.pvh_attrs & PGA_MODIFIED) {
 		rv = true;
@@ -2456,7 +2379,7 @@ pmap_clear_modify(struct vm_page *pg)
 		pg->mdpage.pvh_attrs &= ~PGA_MODIFIED;
 	}
 
-	simple_unlock(&pg->mdpage.pvh_slock);
+	mutex_exit(&pg->mdpage.pvh_lock);
 	PMAP_HEAD_TO_MAP_UNLOCK();
 
 	return (rv);
@@ -2479,7 +2402,7 @@ pmap_clear_reference(struct vm_page *pg)
 #endif
 
 	PMAP_HEAD_TO_MAP_LOCK();
-	simple_lock(&pg->mdpage.pvh_slock);
+	mutex_enter(&pg->mdpage.pvh_lock);
 
 	if (pg->mdpage.pvh_attrs & PGA_REFERENCED) {
 		rv = true;
@@ -2487,7 +2410,7 @@ pmap_clear_reference(struct vm_page *pg)
 		pg->mdpage.pvh_attrs &= ~PGA_REFERENCED;
 	}
 
-	simple_unlock(&pg->mdpage.pvh_slock);
+	mutex_exit(&pg->mdpage.pvh_lock);
 	PMAP_HEAD_TO_MAP_UNLOCK();
 
 	return (rv);
@@ -2835,7 +2758,7 @@ pmap_emulate_reference(struct lwp *l, vaddr_t v, int user, int type)
 	pg = PHYS_TO_VM_PAGE(pa);
 
 	PMAP_HEAD_TO_MAP_LOCK();
-	simple_lock(&pg->mdpage.pvh_slock);
+	mutex_enter(&pg->mdpage.pvh_lock);
 
 	if (type == ALPHA_MMCSR_FOW) {
 		pg->mdpage.pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED);
@@ -2849,7 +2772,7 @@ pmap_emulate_reference(struct lwp *l, vaddr_t v, int user, int type)
 	}
 	pmap_changebit(pg, 0, ~faultoff, cpu_id);
 
-	simple_unlock(&pg->mdpage.pvh_slock);
+	mutex_exit(&pg->mdpage.pvh_lock);
 	PMAP_HEAD_TO_MAP_UNLOCK();
 	return (0);
 }
@@ -2868,7 +2791,7 @@ pmap_pv_dump(paddr_t pa)
 
 	pg = PHYS_TO_VM_PAGE(pa);
 
-	simple_lock(&pg->mdpage.pvh_slock);
+	mutex_enter(&pg->mdpage.pvh_lock);
 
 	printf("pa 0x%lx (attrs = 0x%x):\n", pa, pg->mdpage.pvh_attrs);
 	for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next)
@@ -2876,7 +2799,7 @@ pmap_pv_dump(paddr_t pa)
 		    pv->pv_pmap, pv->pv_va);
 	printf("\n");
 
-	simple_unlock(&pg->mdpage.pvh_slock);
+	mutex_exit(&pg->mdpage.pvh_lock);
 }
 #endif
  
@@ -2935,7 +2858,7 @@ pmap_pv_enter(pmap_t pmap, struct vm_page *pg, vaddr_t va, pt_entry_t *pte,
 	newpv->pv_pte = pte;
 
 	if (dolock)
-		simple_lock(&pg->mdpage.pvh_slock);
+		mutex_enter(&pg->mdpage.pvh_lock);
 
 #ifdef DEBUG
     {
@@ -2959,7 +2882,7 @@ pmap_pv_enter(pmap_t pmap, struct vm_page *pg, vaddr_t va, pt_entry_t *pte,
 	pg->mdpage.pvh_list = newpv;
 
 	if (dolock)
-		simple_unlock(&pg->mdpage.pvh_slock);
+		mutex_exit(&pg->mdpage.pvh_lock);
 
 	return 0;
 }
@@ -2975,7 +2898,7 @@ pmap_pv_remove(pmap_t pmap, struct vm_page *pg, vaddr_t va, bool dolock)
 	pv_entry_t pv, *pvp;
 
 	if (dolock)
-		simple_lock(&pg->mdpage.pvh_slock);
+		mutex_enter(&pg->mdpage.pvh_lock);
 
 	/*
 	 * Find the entry to remove.
@@ -2993,7 +2916,7 @@ pmap_pv_remove(pmap_t pmap, struct vm_page *pg, vaddr_t va, bool dolock)
 	*pvp = pv->pv_next;
 
 	if (dolock)
-		simple_unlock(&pg->mdpage.pvh_slock);
+		mutex_exit(&pg->mdpage.pvh_lock);
 
 	pmap_pv_free(pv);
 }
@@ -3050,13 +2973,13 @@ pmap_physpage_alloc(int usage, paddr_t *pap)
 		pa = VM_PAGE_TO_PHYS(pg);
 
 #ifdef DEBUG
-		simple_lock(&pg->mdpage.pvh_slock);
+		mutex_enter(&pg->mdpage.pvh_lock);
 		if (pg->wire_count != 0) {
 			printf("pmap_physpage_alloc: page 0x%lx has "
 			    "%d references\n", pa, pg->wire_count);
 			panic("pmap_physpage_alloc");
 		}
-		simple_unlock(&pg->mdpage.pvh_slock);
+		mutex_exit(&pg->mdpage.pvh_lock);
 #endif
 		*pap = pa;
 		return (true);
@@ -3078,10 +3001,10 @@ pmap_physpage_free(paddr_t pa)
 		panic("pmap_physpage_free: bogus physical page address");
 
 #ifdef DEBUG
-	simple_lock(&pg->mdpage.pvh_slock);
+	mutex_enter(&pg->mdpage.pvh_lock);
 	if (pg->wire_count != 0)
 		panic("pmap_physpage_free: page still has references");
-	simple_unlock(&pg->mdpage.pvh_slock);
+	mutex_exit(&pg->mdpage.pvh_lock);
 #endif
 
 	uvm_pagefree(pg);
@@ -3102,9 +3025,9 @@ pmap_physpage_addref(void *kva)
 	pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva));
 	pg = PHYS_TO_VM_PAGE(pa);
 
-	simple_lock(&pg->mdpage.pvh_slock);
+	mutex_enter(&pg->mdpage.pvh_lock);
 	rval = ++pg->wire_count;
-	simple_unlock(&pg->mdpage.pvh_slock);
+	mutex_exit(&pg->mdpage.pvh_lock);
 
 	return (rval);
 }
@@ -3124,7 +3047,7 @@ pmap_physpage_delref(void *kva)
 	pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva));
 	pg = PHYS_TO_VM_PAGE(pa);
 
-	simple_lock(&pg->mdpage.pvh_slock);
+	mutex_enter(&pg->mdpage.pvh_lock);
 
 #ifdef DIAGNOSTIC
 	/*
@@ -3136,7 +3059,7 @@ pmap_physpage_delref(void *kva)
 
 	rval = --pg->wire_count;
 
-	simple_unlock(&pg->mdpage.pvh_slock);
+	mutex_exit(&pg->mdpage.pvh_lock);
 
 	return (rval);
 }
@@ -3161,7 +3084,7 @@ pmap_growkernel(vaddr_t maxkvaddr)
 	if (maxkvaddr <= virtual_end)
 		goto out;		/* we are OK */
 
-	simple_lock(&pmap_growkernel_slock);
+	mutex_enter(&pmap_growkernel_lock);
 
 	va = virtual_end;
 
@@ -3194,7 +3117,7 @@ pmap_growkernel(vaddr_t maxkvaddr)
 			l1idx = l1pte_index(va);
 
 			/* Update all the user pmaps. */
-			simple_lock(&pmap_all_pmaps_slock);
+			mutex_enter(&pmap_all_pmaps_lock);
 			for (pm = TAILQ_FIRST(&pmap_all_pmaps);
 			     pm != NULL; pm = TAILQ_NEXT(pm, pm_list)) {
 				/* Skip the kernel pmap. */
@@ -3209,7 +3132,7 @@ pmap_growkernel(vaddr_t maxkvaddr)
 				pm->pm_lev1map[l1idx] = pte;
 				PMAP_UNLOCK(pm);
 			}
-			simple_unlock(&pmap_all_pmaps_slock);
+			mutex_exit(&pmap_all_pmaps_lock);
 		}
 
 		/*
@@ -3235,7 +3158,7 @@ pmap_growkernel(vaddr_t maxkvaddr)
 
 	virtual_end = va;
 
-	simple_unlock(&pmap_growkernel_slock);
+	mutex_exit(&pmap_growkernel_lock);
 
  out:
 	return (virtual_end);
@@ -3275,28 +3198,12 @@ pmap_lev1map_create(pmap_t pmap, long cpu_id)
 		panic("pmap_lev1map_create: pmap uses non-reserved ASN");
 #endif
 
-#ifdef PMAP_NO_LAZY_LEV1MAP
 	/* Being called from pmap_create() in this case; we can sleep. */
 	l1pt = pool_cache_get(&pmap_l1pt_cache, PR_WAITOK);
-#else
-	l1pt = pool_cache_get(&pmap_l1pt_cache, PR_NOWAIT);
-#endif
 	if (l1pt == NULL)
 		return (ENOMEM);
 
 	pmap->pm_lev1map = l1pt;
-
-#ifndef PMAP_NO_LAZY_LEV1MAP	/* guaranteed not to be active */
-	/*
-	 * The page table base has changed; if the pmap was active,
-	 * reactivate it.
-	 */
-	if (PMAP_ISACTIVE(pmap, cpu_id)) {
-		pmap_asn_alloc(pmap, cpu_id);
-		PMAP_ACTIVATE(pmap, curlwp, cpu_id);
-	}
-	PMAP_LEV1MAP_SHOOTDOWN(pmap, cpu_id);
-#endif /* ! PMAP_NO_LAZY_LEV1MAP */
 	return (0);
 }
 
@@ -3322,31 +3229,6 @@ pmap_lev1map_destroy(pmap_t pmap, long cpu_id)
 	 */
 	pmap->pm_lev1map = kernel_lev1map;
 
-#ifndef PMAP_NO_LAZY_LEV1MAP	/* pmap is being destroyed */
-	/*
-	 * The page table base has changed; if the pmap was active,
-	 * reactivate it.  Note that allocation of a new ASN is
-	 * not necessary here:
-	 *
-	 *	(1) We've gotten here because we've deleted all
-	 *	    user mappings in the pmap, invalidating the
-	 *	    TLB entries for them as we go.
-	 *
-	 *	(2) kernel_lev1map contains only kernel mappings, which
-	 *	    were identical in the user pmap, and all of
-	 *	    those mappings have PG_ASM, so the ASN doesn't
-	 *	    matter.
-	 *
-	 * We do, however, ensure that the pmap is using the
-	 * reserved ASN, to ensure that no two pmaps never have
-	 * clashing TLB entries.
-	 */
-	PMAP_INVALIDATE_ASN(pmap, cpu_id);
-	if (PMAP_ISACTIVE(pmap, cpu_id))
-		PMAP_ACTIVATE(pmap, curlwp, cpu_id);
-	PMAP_LEV1MAP_SHOOTDOWN(pmap, cpu_id);
-#endif /* ! PMAP_NO_LAZY_LEV1MAP */
-
 	/*
 	 * Free the old level 1 page table page.
 	 */
@@ -3583,15 +3465,7 @@ pmap_l1pt_delref(pmap_t pmap, pt_entry_t *l1pte, long cpu_id)
 		panic("pmap_l1pt_delref: kernel pmap");
 #endif
 
-	if (pmap_physpage_delref(l1pte) == 0) {
-#ifndef PMAP_NO_LAZY_LEV1MAP
-		/*
-		 * No more level 2 tables left, go back to the global
-		 * kernel_lev1map.
-		 */
-		pmap_lev1map_destroy(pmap, cpu_id);
-#endif /* ! PMAP_NO_LAZY_LEV1MAP */
-	}
+	(void)pmap_physpage_delref(l1pte);
 }
 
 /******************** Address Space Number management ********************/
@@ -3623,7 +3497,6 @@ pmap_asn_alloc(pmap_t pmap, long cpu_id)
 	 * have PG_ASM set.  If the pmap eventually gets its own
 	 * lev1map, an ASN will be allocated at that time.
 	 *
-	 * #ifdef PMAP_NO_LAZY_LEV1MAP
 	 * Only the kernel pmap will reference kernel_lev1map.  Do the
 	 * same old fixups, but note that we no longer need the pmap
 	 * to be locked if we're in this mode, since pm_lev1map will
@@ -3747,7 +3620,7 @@ pmap_asn_alloc(pmap_t pmap, long cpu_id)
 	 * Have a new ASN, so there's no need to sync the I-stream
 	 * on the way back out to userspace.
 	 */
-	atomic_clearbits_ulong(&pmap->pm_needisync, (1UL << cpu_id));
+	atomic_and_ulong(&pmap->pm_needisync, ~(1UL << cpu_id));
 }
 
 #if defined(MULTIPROCESSOR)
@@ -3768,10 +3641,8 @@ pmap_tlb_shootdown(pmap_t pmap, vaddr_t va, pt_entry_t pte, u_long *cpumaskp)
 	struct cpu_info *ci, *self = curcpu();
 	u_long cpumask;
 	CPU_INFO_ITERATOR cii;
-	int s;
 
-	LOCK_ASSERT((pmap == pmap_kernel()) ||
-	    simple_lock_held(&pmap->pm_slock));
+	KASSERT((pmap == pmap_kernel()) || mutex_owned(&pmap->pm_lock));
 
 	cpumask = 0;
 
@@ -3803,7 +3674,7 @@ pmap_tlb_shootdown(pmap_t pmap, vaddr_t va, pt_entry_t pte, u_long *cpumaskp)
 
 		pq = &pmap_tlb_shootdown_q[ci->ci_cpuid];
 
-		PSJQ_LOCK(pq, s);
+		mutex_spin_enter(&pq->pq_lock);
 
 		pq->pq_pte |= pte;
 
@@ -3812,7 +3683,7 @@ pmap_tlb_shootdown(pmap_t pmap, vaddr_t va, pt_entry_t pte, u_long *cpumaskp)
 		 * don't really have to do anything else.
 		 */
 		if (pq->pq_tbia) {
-			PSJQ_UNLOCK(pq, s);
+			mutex_spin_exit(&pq->pq_lock);
 			continue;
 		}
 
@@ -3832,7 +3703,7 @@ pmap_tlb_shootdown(pmap_t pmap, vaddr_t va, pt_entry_t pte, u_long *cpumaskp)
 
 		cpumask |= 1UL << ci->ci_cpuid;
 
-		PSJQ_UNLOCK(pq, s);
+		mutex_spin_exit(&pq->pq_lock);
 	}
 
 	*cpumaskp |= cpumask;
@@ -3863,9 +3734,8 @@ pmap_do_tlb_shootdown(struct cpu_info *ci, struct trapframe *framep)
 	u_long cpu_mask = (1UL << cpu_id);
 	struct pmap_tlb_shootdown_q *pq = &pmap_tlb_shootdown_q[cpu_id];
 	struct pmap_tlb_shootdown_job *pj;
-	int s;
 
-	PSJQ_LOCK(pq, s);
+	mutex_spin_enter(&pq->pq_lock);
 
 	if (pq->pq_tbia) {
 		if (pq->pq_pte & PG_ASM)
@@ -3885,7 +3755,7 @@ pmap_do_tlb_shootdown(struct cpu_info *ci, struct trapframe *framep)
 		pq->pq_pte = 0;
 	}
 
-	PSJQ_UNLOCK(pq, s);
+	mutex_spin_exit(&pq->pq_lock);
 }
 
 /*
diff --git a/sys/arch/alpha/alpha/trap.c b/sys/arch/alpha/alpha/trap.c
index 30117ac7150d..48f4db3581f7 100644
--- a/sys/arch/alpha/alpha/trap.c
+++ b/sys/arch/alpha/alpha/trap.c
@@ -1,4 +1,4 @@
-/* $NetBSD: trap.c,v 1.116 2007/10/17 19:52:56 garbled Exp $ */
+/* $NetBSD: trap.c,v 1.117 2008/01/02 11:48:21 ad Exp $ */
 
 /*-
  * Copyright (c) 2000, 2001 The NetBSD Foundation, Inc.
@@ -100,7 +100,7 @@
 
 #include <sys/cdefs.h>			/* RCS ID & Copyright macro defns */
 
-__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.116 2007/10/17 19:52:56 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.117 2008/01/02 11:48:21 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -263,9 +263,7 @@ trap(const u_long a0, const u_long a1, const u_long a2, const u_long entry,
 		 * and per-process unaligned-access-handling flags).
 		 */
 		if (user) {
-			KERNEL_LOCK(1, l);
 			i = unaligned_fixup(a0, a1, a2, l);
-			KERNEL_UNLOCK_LAST(l);
 			if (i == 0)
 				goto out;
 
@@ -360,9 +358,7 @@ trap(const u_long a0, const u_long a1, const u_long a2, const u_long entry,
 			break;
 
 		case ALPHA_IF_CODE_OPDEC:
-			KERNEL_LOCK(1, l);
 			i = handle_opdec(l, &ucode);
-			KERNEL_UNLOCK_LAST(l);
 			KSI_INIT_TRAP(&ksi);
 			if (i == 0)
 				goto out;
@@ -392,20 +388,10 @@ trap(const u_long a0, const u_long a1, const u_long a2, const u_long entry,
 		case ALPHA_MMCSR_FOR:
 		case ALPHA_MMCSR_FOE:
 		case ALPHA_MMCSR_FOW:
-			if (user)
-				KERNEL_LOCK(1, l);
-			else
-				KERNEL_LOCK(1, NULL);
-
 			if (pmap_emulate_reference(l, a0, user, a1)) {
 				ftype = VM_PROT_EXECUTE;
 				goto do_fault;
 			}
-
-			if (user)
-				KERNEL_UNLOCK_LAST(l);
-			else
-				KERNEL_UNLOCK_ONE(NULL);
 			goto out;
 
 		case ALPHA_MMCSR_INVALTRANS:
@@ -435,9 +421,7 @@ trap(const u_long a0, const u_long a1, const u_long a2, const u_long entry,
 #endif
 			}
 
-			if (user)
-				KERNEL_LOCK(1, l);
-			else {
+			if (!user) {
 				struct cpu_info *ci = curcpu();
 
 				if (l == NULL) {
@@ -472,8 +456,6 @@ trap(const u_long a0, const u_long a1, const u_long a2, const u_long entry,
 				 */
 				if (ci->ci_intrdepth != 0)
 					goto dopanic;
-
-				KERNEL_LOCK(1, NULL);
 			}
 
 			/*
@@ -513,16 +495,10 @@ do_fault:
 					rv = EFAULT;
 			}
 			if (rv == 0) {
-				if (user)
-					KERNEL_UNLOCK_LAST(l);
-				else
-					KERNEL_UNLOCK_ONE(NULL);
 				goto out;
 			}
 
 			if (user == 0) {
-				KERNEL_UNLOCK_ONE(NULL);
-
 				/* Check for copyin/copyout fault */
 				if (l != NULL &&
 				    l->l_addr->u_pcb.pcb_onfault != 0) {
@@ -550,7 +526,6 @@ do_fault:
 				ksi.ksi_code = SEGV_ACCERR;
 			else
 				ksi.ksi_code = SEGV_MAPERR;
-			KERNEL_UNLOCK_LAST(l);
 			break;
 		    }
 
@@ -567,9 +542,7 @@ do_fault:
 #ifdef DEBUG
 	printtrap(a0, a1, a2, entry, framep, 1, user);
 #endif
-	KERNEL_LOCK(1, l);
 	(*p->p_emul->e_trapsignal)(l, &ksi);
-	KERNEL_UNLOCK_LAST(l);
 out:
 	if (user)
 		userret(l);
@@ -675,8 +648,6 @@ ast(struct trapframe *framep)
 	if (l == NULL)
 		return;
 
-	KERNEL_LOCK(1, l);
-
 	uvmexp.softs++;
 	l->l_md.md_tf = framep;
 
@@ -692,7 +663,6 @@ ast(struct trapframe *framep)
 		preempt();
 	}
 
-	KERNEL_UNLOCK_LAST(l);
 	userret(l);
 }
 
@@ -1247,6 +1217,5 @@ startlwp(void *arg)
 #endif
 	pool_put(&lwp_uc_pool, uc);
 
-	KERNEL_UNLOCK_LAST(l);
 	userret(l);
 }
diff --git a/sys/arch/alpha/include/pmap.h b/sys/arch/alpha/include/pmap.h
index 39a391dfc830..3ee1d10eca2b 100644
--- a/sys/arch/alpha/include/pmap.h
+++ b/sys/arch/alpha/include/pmap.h
@@ -1,7 +1,7 @@
-/* $NetBSD: pmap.h,v 1.68 2007/02/21 22:59:37 thorpej Exp $ */
+/* $NetBSD: pmap.h,v 1.69 2008/01/02 11:48:21 ad Exp $ */
 
 /*-
- * Copyright (c) 1998, 1999, 2000, 2001 The NetBSD Foundation, Inc.
+ * Copyright (c) 1998, 1999, 2000, 2001, 2007 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -117,7 +117,7 @@
 #include "opt_multiprocessor.h"
 #endif
 
-#include <sys/lock.h>
+#include <sys/mutex.h>
 #include <sys/queue.h>
 
 #include <machine/pte.h>
@@ -144,7 +144,7 @@ struct pmap {
 	TAILQ_ENTRY(pmap)	pm_list;	/* list of all pmaps */
 	pt_entry_t		*pm_lev1map;	/* level 1 map */
 	int			pm_count;	/* pmap reference count */
-	struct simplelock	pm_slock;	/* lock on pmap */
+	kmutex_t		pm_lock;	/* lock on pmap */
 	struct pmap_statistics	pm_stats;	/* pmap statistics */
 	unsigned long		pm_cpus;	/* mask of CPUs using pmap */
 	unsigned long		pm_needisync;	/* mask of CPUs needing isync */
@@ -343,8 +343,8 @@ pmap_l3pte(pmap, v, l2pte)
  * operations, locking the kernel pmap is not necessary.  Therefore,
  * it is not necessary to block interrupts when locking pmap strucutres.
  */
-#define	PMAP_LOCK(pmap)		simple_lock(&(pmap)->pm_slock)
-#define	PMAP_UNLOCK(pmap)	simple_unlock(&(pmap)->pm_slock)
+#define	PMAP_LOCK(pmap)		mutex_enter(&(pmap)->pm_lock)
+#define	PMAP_UNLOCK(pmap)	mutex_exit(&(pmap)->pm_lock)
 
 /*
  * Macro for processing deferred I-stream synchronization.
diff --git a/sys/arch/alpha/include/vmparam.h b/sys/arch/alpha/include/vmparam.h
index 718b13db5f7e..3265f339b371 100644
--- a/sys/arch/alpha/include/vmparam.h
+++ b/sys/arch/alpha/include/vmparam.h
@@ -1,4 +1,4 @@
-/* $NetBSD: vmparam.h,v 1.29 2005/12/11 12:16:16 christos Exp $ */
+/* $NetBSD: vmparam.h,v 1.30 2008/01/02 11:48:21 ad Exp $ */
 
 /*
  * Copyright (c) 1992, 1993
@@ -166,14 +166,14 @@
 #define	__HAVE_VM_PAGE_MD
 struct vm_page_md {
 	struct pv_entry *pvh_list;		/* pv_entry list */
-	struct simplelock pvh_slock;		/* lock on this head */
+	kmutex_t pvh_lock;			/* lock on this head */
 	int pvh_attrs;				/* page attributes */
 };
 
 #define	VM_MDPAGE_INIT(pg)						\
 do {									\
 	(pg)->mdpage.pvh_list = NULL;					\
-	simple_lock_init(&(pg)->mdpage.pvh_slock);			\
+	mutex_init(&(pg)->mdpage.pvh_lock, MUTEX_DEFAULT, IPL_NONE);	\
 } while (/*CONSTCOND*/0)
 
 #endif	/* ! _ALPHA_VMPARAM_H_ */
diff --git a/sys/arch/amd64/amd64/fpu.c b/sys/arch/amd64/amd64/fpu.c
index fd8cd01319da..22d02c84cd2a 100644
--- a/sys/arch/amd64/amd64/fpu.c
+++ b/sys/arch/amd64/amd64/fpu.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: fpu.c,v 1.20 2007/11/22 16:16:41 bouyer Exp $	*/
+/*	$NetBSD: fpu.c,v 1.21 2008/01/02 11:48:21 ad Exp $	*/
 
 /*-
  * Copyright (c) 1991 The Regents of the University of California.
@@ -71,7 +71,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.20 2007/11/22 16:16:41 bouyer Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.21 2008/01/02 11:48:21 ad Exp $");
 
 #include "opt_multiprocessor.h"
 
@@ -179,9 +179,7 @@ fputrap(frame)
 	ksi.ksi_addr = (void *)frame->tf_rip;
 	ksi.ksi_code = x86fpflags_to_ksiginfo(statbits);
 	ksi.ksi_trap = statbits;
-	KERNEL_LOCK(1, l);
 	(*l->l_proc->p_emul->e_trapsignal)(l, &ksi);
-	KERNEL_UNLOCK_LAST(l);
 }
 
 static int
diff --git a/sys/arch/amd64/amd64/trap.c b/sys/arch/amd64/amd64/trap.c
index a93f04def038..fe78a079dcfe 100644
--- a/sys/arch/amd64/amd64/trap.c
+++ b/sys/arch/amd64/amd64/trap.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: trap.c,v 1.42 2008/01/01 21:28:40 yamt Exp $	*/
+/*	$NetBSD: trap.c,v 1.43 2008/01/02 11:48:22 ad Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
@@ -75,7 +75,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.42 2008/01/01 21:28:40 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.43 2008/01/02 11:48:22 ad Exp $");
 
 #include "opt_ddb.h"
 #include "opt_kgdb.h"
@@ -439,7 +439,6 @@ copyfault:
 			goto copyefault;
 
 		cr2 = rcr2();
-		KERNEL_LOCK(1, NULL);
 		goto faultcommon;
 
 	case T_PAGEFLT|T_USER: {	/* page fault */
@@ -453,7 +452,6 @@ copyfault:
 		if (p->p_emul->e_usertrap != NULL &&
 		    (*p->p_emul->e_usertrap)(l, cr2, frame) != 0)
 			return;
-		KERNEL_LOCK(1, l);
 faultcommon:
 		vm = p->p_vmspace;
 		if (vm == NULL)
@@ -496,8 +494,6 @@ faultcommon:
 				uvm_grow(p, va);
 
 			if (type == T_PAGEFLT) {
-				KERNEL_UNLOCK_ONE(NULL);
-
 				/*
 				 * we need to switch pmap now if we're in
 				 * the middle of copyin/out.
@@ -511,7 +507,6 @@ faultcommon:
 					pmap_load();
 				return;
 			}
-			KERNEL_UNLOCK_LAST(l);
 			goto out;
 		}
 		KSI_INIT_TRAP(&ksi);
@@ -524,10 +519,8 @@ faultcommon:
 			ksi.ksi_code = SEGV_MAPERR;
 
 		if (type == T_PAGEFLT) {
-			if (pcb->pcb_onfault != 0) {
-				KERNEL_UNLOCK_ONE(NULL);
+			if (pcb->pcb_onfault != 0)
 				goto copyfault;
-			}
 			printf("uvm_fault(%p, 0x%lx, %d) -> %x\n",
 			    map, va, ftype, error);
 			goto we_re_toast;
@@ -547,10 +540,6 @@ faultcommon:
 			ksi.ksi_signo = SIGSEGV;
 		}
 		(*p->p_emul->e_trapsignal)(l, &ksi);
-		if (type == T_PAGEFLT)
-			KERNEL_UNLOCK_ONE(NULL);
-		else
-			KERNEL_UNLOCK_LAST(l);
 		break;
 	}
 
@@ -580,9 +569,7 @@ faultcommon:
 				ksi.ksi_code = TRAP_BRKPT;
 			else
 				ksi.ksi_code = TRAP_TRACE;
-			KERNEL_LOCK(1, l);
 			(*p->p_emul->e_trapsignal)(l, &ksi);
-			KERNEL_UNLOCK_LAST(l);
 		}
 		break;
 
@@ -621,9 +608,7 @@ out:
 	userret(l);
 	return;
 trapsignal:
-	KERNEL_LOCK(1, l);
 	(*p->p_emul->e_trapsignal)(l, &ksi);
-	KERNEL_UNLOCK_LAST(l);
 	userret(l);
 }
 
@@ -636,9 +621,6 @@ startlwp(void *arg)
 
 	err = cpu_setmcontext(l, &uc->uc_mcontext, uc->uc_flags);
 	pool_put(&lwp_uc_pool, uc);
-
-	KERNEL_UNLOCK_LAST(l);
-
 	userret(l);
 }
 
diff --git a/sys/arch/amiga/amiga/disksubr.c b/sys/arch/amiga/amiga/disksubr.c
index 4482dd44f601..4d5ce55180b5 100644
--- a/sys/arch/amiga/amiga/disksubr.c
+++ b/sys/arch/amiga/amiga/disksubr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: disksubr.c,v 1.55 2007/10/17 19:53:12 garbled Exp $	*/
+/*	$NetBSD: disksubr.c,v 1.56 2008/01/02 11:48:22 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.55 2007/10/17 19:53:12 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.56 2008/01/02 11:48:22 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -190,7 +190,7 @@ readdisklabel(dev, strat, lp, clp)
 		bp->b_blkno = nextb;
 		bp->b_cylinder = bp->b_blkno / lp->d_secpercyl;
 		bp->b_bcount = lp->d_secsize;
-		bp->b_flags &= ~(B_DONE);
+		bp->b_oflags &= ~(BO_DONE);
 		bp->b_flags |= B_READ;
 #ifdef SD_C_ADJUSTS_NR
 		bp->b_blkno *= (lp->d_secsize / DEV_BSIZE);
@@ -307,7 +307,7 @@ readdisklabel(dev, strat, lp, clp)
 		bp->b_blkno = nextb;
 		bp->b_cylinder = bp->b_blkno / lp->d_secpercyl;
 		bp->b_bcount = lp->d_secsize;
-		bp->b_flags &= ~(B_DONE);
+		bp->b_oflags &= ~(BO_DONE);
 		bp->b_flags |= B_READ;
 #ifdef SD_C_ADJUSTS_NR
 		bp->b_blkno *= (lp->d_secsize / DEV_BSIZE);
@@ -571,7 +571,8 @@ writedisklabel(dev, strat, lp, clp)
 	dlp = (struct disklabel *)((char*)bp->b_data + LABELOFFSET);
 	*dlp = *lp;     /* struct assignment */
 
-	bp->b_flags &= ~(B_READ|B_DONE);
+	bp->b_oflags &= ~(BO_DONE);
+	bp->b_flags &= ~(B_READ);
 	bp->b_flags |= B_WRITE;
 	(*strat)(bp);
 	error = biowait(bp);
diff --git a/sys/arch/amiga/dev/fd.c b/sys/arch/amiga/dev/fd.c
index 6085b92a0dcb..78bc6a14be01 100644
--- a/sys/arch/amiga/dev/fd.c
+++ b/sys/arch/amiga/dev/fd.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: fd.c,v 1.77 2007/10/17 19:53:15 garbled Exp $ */
+/*	$NetBSD: fd.c,v 1.78 2008/01/02 11:48:22 ad Exp $ */
 
 /*
  * Copyright (c) 1994 Christian E. Hopps
@@ -33,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fd.c,v 1.77 2007/10/17 19:53:15 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fd.c,v 1.78 2008/01/02 11:48:22 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -912,7 +912,8 @@ fdputdisklabel(struct fd_softc *sc, dev_t dev)
 	bcopy(lp, dlp, sizeof(struct disklabel));
 	bp->b_blkno = 0;
 	bp->b_cylinder = 0;
-	bp->b_flags &= ~(B_READ|B_DONE);
+	bp->b_flags &= ~(B_READ);
+	bp->b_oflags &= ~(BO_DONE);
 	bp->b_flags |= B_WRITE;
 	fdstrategy(bp);
 	error = biowait(bp);
diff --git a/sys/arch/arc/arc/disksubr.c b/sys/arch/arc/arc/disksubr.c
index c0f9edd0ff6e..0a930b8c0b60 100644
--- a/sys/arch/arc/arc/disksubr.c
+++ b/sys/arch/arc/arc/disksubr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: disksubr.c,v 1.27 2007/10/17 19:53:27 garbled Exp $	*/
+/*	$NetBSD: disksubr.c,v 1.28 2008/01/02 11:48:22 ad Exp $	*/
 /*	$OpenBSD: disksubr.c,v 1.14 1997/05/08 00:14:29 deraadt Exp $	*/
 /*	NetBSD: disksubr.c,v 1.40 1999/05/06 15:45:51 christos Exp	*/
 
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.27 2007/10/17 19:53:27 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.28 2008/01/02 11:48:22 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -270,7 +270,7 @@ nombrpart:
 	bp->b_blkno = dospartoff + LABELSECTOR;
 	bp->b_cylinder = cyl;
 	bp->b_bcount = lp->d_secsize;
-	bp->b_flags &= ~(B_DONE);
+	bp->b_oflags &= ~(BO_DONE);
 	bp->b_flags |= B_READ;
 	(*strat)(bp);
 
@@ -321,7 +321,7 @@ nombrpart:
 		i = 0;
 		do {
 			/* read a bad sector table */
-			bp->b_flags &= ~(B_DONE);
+			bp->b_oflags &= ~(BO_DONE);
 			bp->b_flags |= B_READ;
 			bp->b_blkno = lp->d_secperunit - lp->d_nsectors + i;
 			if (lp->d_secsize > DEV_BSIZE)
@@ -475,7 +475,7 @@ nombrpart:
 	bp->b_blkno = dospartoff + LABELSECTOR;
 	bp->b_cylinder = cyl;
 	bp->b_bcount = lp->d_secsize;
-	bp->b_flags &= ~(B_DONE);
+	bp->b_oflags &= ~(BO_DONE);
 	bp->b_flags |= B_READ;
 	(*strat)(bp);
 
@@ -489,7 +489,8 @@ nombrpart:
 		if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
 		    dkcksum(dlp) == 0) {
 			*dlp = *lp;
-			bp->b_flags &= ~(B_READ|B_DONE);
+			bp->b_oflags &= ~(BO_DONE);
+			bp->b_flags &= ~(B_READ);
 			bp->b_flags |= B_WRITE;
 			(*strat)(bp);
 			error = biowait(bp);
diff --git a/sys/arch/arm/arm/disksubr.c b/sys/arch/arm/arm/disksubr.c
index 1cb2b648eaba..3ba6d9165370 100644
--- a/sys/arch/arm/arm/disksubr.c
+++ b/sys/arch/arm/arm/disksubr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: disksubr.c,v 1.19 2007/10/17 19:53:30 garbled Exp $	*/
+/*	$NetBSD: disksubr.c,v 1.20 2008/01/02 11:48:23 ad Exp $	*/
 
 /*
  * Copyright (c) 1998 Christopher G. Demetriou.  All rights reserved.
@@ -97,7 +97,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.19 2007/10/17 19:53:30 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.20 2008/01/02 11:48:23 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -224,7 +224,7 @@ readdisklabel(dev, strat, lp, osdep)
 		i = 0;
 		do {
 			/* read a bad sector table */
-			bp->b_flags &= ~(B_DONE);
+			bp->b_oflags &= ~(BO_DONE);
 			bp->b_flags |= B_READ;
 			bp->b_blkno = lp->d_secperunit - lp->d_nsectors + i;
 			if (lp->d_secsize > DEV_BSIZE)
@@ -376,7 +376,7 @@ writedisklabel(dev, strat, lp, osdep)
 	bp->b_blkno = netbsdpartoff + LABELSECTOR;
 	bp->b_cylinder = cyl;
 	bp->b_bcount = lp->d_secsize;
-	bp->b_flags &= ~(B_DONE);
+	bp->b_oflags &= ~(BO_DONE);
 	bp->b_flags |= B_READ;
 	(*strat)(bp);
 
@@ -390,7 +390,8 @@ writedisklabel(dev, strat, lp, osdep)
 		if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
 		    dkcksum(dlp) == 0) {
 			*dlp = *lp;
-			bp->b_flags &= ~(B_READ|B_DONE);
+			bp->b_flags &= ~(B_READ);
+			bp->b_oflags &= ~(BO_DONE);
 			bp->b_flags |= B_WRITE;
 			(*strat)(bp);
 			error = biowait(bp);
diff --git a/sys/arch/arm/arm/disksubr_acorn.c b/sys/arch/arm/arm/disksubr_acorn.c
index 1224404e1c5b..b1c64dc61a57 100644
--- a/sys/arch/arm/arm/disksubr_acorn.c
+++ b/sys/arch/arm/arm/disksubr_acorn.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: disksubr_acorn.c,v 1.7 2007/10/17 19:53:30 garbled Exp $	*/
+/*	$NetBSD: disksubr_acorn.c,v 1.8 2008/01/02 11:48:23 ad Exp $	*/
 
 /*
  * Copyright (c) 1998 Christopher G. Demetriou.  All rights reserved.
@@ -97,7 +97,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: disksubr_acorn.c,v 1.7 2007/10/17 19:53:30 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: disksubr_acorn.c,v 1.8 2008/01/02 11:48:23 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -259,7 +259,7 @@ filecore_label_read(dev, strat, lp, osdep, msgp, cylp, netbsd_label_offp)
 		    bp->b_blkno);*/
 		bp->b_cylinder = bp->b_blkno / lp->d_secpercyl;
 		bp->b_bcount = lp->d_secsize;
-		bp->b_flags &= ~(B_DONE);
+		bp->b_oflags &= ~(BO_DONE);
 		bp->b_flags |= B_READ;
 		(*strat)(bp);
 
diff --git a/sys/arch/atari/atari/disksubr.c b/sys/arch/atari/atari/disksubr.c
index b671594a0075..29f415674108 100644
--- a/sys/arch/atari/atari/disksubr.c
+++ b/sys/arch/atari/atari/disksubr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: disksubr.c,v 1.34 2007/10/17 19:53:45 garbled Exp $	*/
+/*	$NetBSD: disksubr.c,v 1.35 2008/01/02 11:48:23 ad Exp $	*/
 
 /*
  * Copyright (c) 1995 Leo Weppelman.
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.34 2007/10/17 19:53:45 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.35 2008/01/02 11:48:23 ad Exp $");
 
 #ifndef DISKLABEL_NBDA
 #define	DISKLABEL_NBDA	/* required */
@@ -234,7 +234,8 @@ writedisklabel(dev, strat, lp, clp)
 		bb->bb_magic = (blk == 0) ? NBDAMAGIC : AHDIMAGIC;
 		BBSETLABEL(bb, lp);
 
-		bp->b_flags    &= ~(B_READ|B_DONE);
+		bp->b_oflags   &= ~(BO_DONE);
+		bp->b_flags    &= ~(B_READ);
 		bp->b_flags    |= B_WRITE;
 		bp->b_bcount   = BBMINSIZE;
 		bp->b_blkno    = blk;
diff --git a/sys/arch/atari/dev/hdfd.c b/sys/arch/atari/dev/hdfd.c
index d0e6c2832640..bbee5de9d155 100644
--- a/sys/arch/atari/dev/hdfd.c
+++ b/sys/arch/atari/dev/hdfd.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: hdfd.c,v 1.59 2007/10/17 19:53:47 garbled Exp $	*/
+/*	$NetBSD: hdfd.c,v 1.60 2008/01/02 11:48:23 ad Exp $	*/
 
 /*-
  * Copyright (c) 1996 Leo Weppelman
@@ -91,7 +91,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: hdfd.c,v 1.59 2007/10/17 19:53:47 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: hdfd.c,v 1.60 2008/01/02 11:48:23 ad Exp $");
 
 #include "opt_ddb.h"
 
@@ -1531,11 +1531,12 @@ fdformat(dev, finfo, p)
 	struct buf *bp;
 
 	/* set up a buffer header for fdstrategy() */
-	bp = (struct buf *)malloc(sizeof(struct buf), M_TEMP, M_NOWAIT);
+	bp = getiobuf(NULL, false);
 	if(bp == 0)
 		return ENOBUFS;
 	bzero((void *)bp, sizeof(struct buf));
-	bp->b_flags = B_BUSY | B_PHYS | B_FORMAT;
+	bp->b_flags = B_PHYS | B_FORMAT;
+	bp->b_cflags |= BC_BUSY;
 	bp->b_proc = p;
 	bp->b_dev = dev;
 
@@ -1557,13 +1558,13 @@ fdformat(dev, finfo, p)
 	fdstrategy(bp);
 
 	/* ...and wait for it to complete */
-	s = splbio();
-	while(!(bp->b_flags & B_DONE)) {
-		rv = tsleep((void *)bp, PRIBIO, "fdform", 20 * hz);
+	mutex_enter(bp->b_objlock);
+	while(!(bp->b_oflags & BO_DONE)) {
+		rv = cv_timedwait(&bp->b_done, 20 * hz);
 		if (rv == EWOULDBLOCK)
 			break;
 	}
-	splx(s);
+	mutex_exit(bp->b_objlock);
        
 	if (rv == EWOULDBLOCK) {
 		/* timed out */
@@ -1572,7 +1573,7 @@ fdformat(dev, finfo, p)
 	} else if (bp->b_error != 0) {
 		rv = bp->b_error;
 	}
-	free(bp, M_TEMP);
+	putiobuf(bp);
 	return rv;
 }
 
diff --git a/sys/arch/atari/dev/md_root.c b/sys/arch/atari/dev/md_root.c
index e32c622f821d..db4e55862c5b 100644
--- a/sys/arch/atari/dev/md_root.c
+++ b/sys/arch/atari/dev/md_root.c
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: md_root.c,v 1.24 2007/10/17 19:53:47 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: md_root.c,v 1.25 2008/01/02 11:48:24 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -157,7 +157,7 @@ struct md_conf		*md;
 dev_t			ld_dev;
 struct lwp		*lwp;
 {
-	struct buf		buf;
+	struct buf		*buf;
 	int			error;
 	const struct bdevsw	*bdp;
 	struct disklabel	dl;
@@ -170,17 +170,16 @@ struct lwp		*lwp;
 	/*
 	 * Initialize our buffer header:
 	 */
-	memset(&buf, 0, sizeof(buf));
-	buf.b_vnbufs.le_next = NOLIST;
-	buf.b_flags = B_BUSY;
-	buf.b_dev   = ld_dev;
-	buf.b_error = 0;
-	buf.b_proc  = lwp->l_proc;
+	buf = getiobuf(NULL, false);
+	buf->b_cflags = BC_BUSY;
+	buf->b_dev   = ld_dev;
+	buf->b_error = 0;
+	buf->b_proc  = lwp->l_proc;
 
 	/*
 	 * Setup read_info:
 	 */
-	rs.bp       = &buf;
+	rs.bp       = buf;
 	rs.nbytes   = md->md_size;
 	rs.offset   = 0;
 	rs.bufp     = md->md_addr;
@@ -192,8 +191,10 @@ struct lwp		*lwp;
 	/*
 	 * Open device and try to get some statistics.
 	 */
-	if((error = bdp->d_open(ld_dev, FREAD | FNONBLOCK, 0, lwp)) != 0)
+	if((error = bdp->d_open(ld_dev, FREAD | FNONBLOCK, 0, lwp)) != 0) {
+		putiobuf(buf);
 		return(error);
+	}
 	if(bdp->d_ioctl(ld_dev, DIOCGDINFO, (void *)&dl, FREAD, lwp) == 0) {
 		/* Read on a cylinder basis */
 		rs.chunk    = dl.d_secsize * dl.d_secpercyl;
@@ -208,6 +209,7 @@ struct lwp		*lwp;
 		error = ramd_norm_read(&rs);
 
 	bdp->d_close(ld_dev,FREAD | FNONBLOCK, 0, lwp);
+	putiobuf(buf);
 	return(error);
 }
 
@@ -218,7 +220,6 @@ struct read_info	*rsp;
 	long		bytes_left;
 	int		done, error;
 	struct buf	*bp;
-	int		s;
 	int		dotc = 0;
 
 	bytes_left = rsp->nbytes;
@@ -226,9 +227,8 @@ struct read_info	*rsp;
 	error      = 0;
 
 	while(bytes_left > 0) {
-		s = splbio();
-		bp->b_flags = B_BUSY | B_PHYS | B_READ;
-		splx(s);
+		bp->b_cflags = BC_BUSY;
+		bp->b_flags  = B_PHYS | B_READ;
 		bp->b_blkno  = btodb(rsp->offset);
 		bp->b_bcount = rsp->chunk;
 		bp->b_data   = rsp->bufp;
@@ -238,10 +238,7 @@ struct read_info	*rsp;
 		(*rsp->strat)(bp);
 
 		/* Wait for results	*/
-		s = splbio();
-		while ((bp->b_flags & B_DONE) == 0)
-			tsleep((void *) bp, PRIBIO + 1, "ramd_norm_read", 0);
-		splx(s);
+		biowait(bp);
 		error = bp->b_error;
 
 		/* Dot counter */
@@ -300,7 +297,6 @@ int			nbyte;
 	static int	dotc = 0;
 	struct buf	*bp;
 	       int	nread = 0;
-	       int	s;
 	       int	done, error;
 
 
@@ -309,9 +305,8 @@ int			nbyte;
 	nbyte &= ~(DEV_BSIZE - 1);
 
 	while(nbyte > 0) {
-		s = splbio();
-		bp->b_flags = B_BUSY | B_PHYS | B_READ;
-		splx(s);
+		bp->b_cflags = BC_BUSY;
+		bp->b_flags  = B_PHYS | B_READ;
 		bp->b_blkno  = btodb(rsp->offset);
 		bp->b_bcount = min(rsp->chunk, nbyte);
 		bp->b_data   = buf;
@@ -321,11 +316,7 @@ int			nbyte;
 		(*rsp->strat)(bp);
 
 		/* Wait for results	*/
-		s = splbio();
-		while ((bp->b_flags & B_DONE) == 0)
-			tsleep((void *) bp, PRIBIO + 1, "ramd_norm_read", 0);
-		error = bp->b_error;
-		splx(s);
+		biowait(bp);
 
 		/* Dot counter */
 		printf(".");
@@ -348,8 +339,6 @@ int			nbyte;
 			rsp->offset = 0;
 		}
 	}
-	s = splbio();
-	splx(s);
 	return(nread);
 }
 #endif /* support_compression */
diff --git a/sys/arch/cobalt/cobalt/disksubr.c b/sys/arch/cobalt/cobalt/disksubr.c
index 8f2ee75dd478..85bd32cbe806 100644
--- a/sys/arch/cobalt/cobalt/disksubr.c
+++ b/sys/arch/cobalt/cobalt/disksubr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: disksubr.c,v 1.22 2007/10/17 19:54:08 garbled Exp $	*/
+/*	$NetBSD: disksubr.c,v 1.23 2008/01/02 11:48:24 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.22 2007/10/17 19:54:08 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.23 2008/01/02 11:48:24 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -190,7 +190,7 @@ readdisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp,
 	bp->b_blkno = dospartoff + LABELSECTOR;
 	bp->b_cylinder = cyl;
 	bp->b_bcount = lp->d_secsize;
-	bp->b_flags &= ~(B_DONE);
+	bp->b_oflags &= ~(BO_DONE);
 	bp->b_flags |= B_READ;
 	(*strat)(bp);
 
@@ -227,7 +227,7 @@ readdisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp,
 		i = 0;
 		do {
 			/* read a bad sector table */
-			bp->b_flags &= ~(B_DONE);
+			bp->b_oflags &= ~(BO_DONE);
 			bp->b_flags |= B_READ;
 			bp->b_blkno = lp->d_secperunit - lp->d_nsectors + i;
 			if (lp->d_secsize > DEV_BSIZE)
@@ -366,7 +366,7 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp,
 	bp->b_blkno = dospartoff + LABELSECTOR;
 	bp->b_cylinder = cyl;
 	bp->b_bcount = lp->d_secsize;
-	bp->b_flags &= ~(B_DONE);
+	bp->b_oflags &= ~(BO_DONE);
 	bp->b_flags |= B_READ;
 	(*strat)(bp);
 
@@ -380,7 +380,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp,
 		if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
 		    dkcksum(dlp) == 0) {
 			*dlp = *lp;
-			bp->b_flags &= ~(B_READ|B_DONE);
+			bp->b_oflags &= ~(BO_DONE);
+			bp->b_flags &= ~(B_READ);
 			bp->b_flags |= B_WRITE;
 			(*strat)(bp);
 			error = biowait(bp);
diff --git a/sys/arch/evbmips/evbmips/disksubr.c b/sys/arch/evbmips/evbmips/disksubr.c
index 90a960966eb3..a97e06287472 100644
--- a/sys/arch/evbmips/evbmips/disksubr.c
+++ b/sys/arch/evbmips/evbmips/disksubr.c
@@ -1,4 +1,4 @@
-/* $NetBSD: disksubr.c,v 1.16 2007/10/17 19:54:15 garbled Exp $ */
+/* $NetBSD: disksubr.c,v 1.17 2008/01/02 11:48:24 ad Exp $ */
 
 /*
  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.16 2007/10/17 19:54:15 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.17 2008/01/02 11:48:24 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -111,7 +111,7 @@ readdisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp,
 		i = 0;
 		do {
 			/* read a bad sector table */
-			bp->b_flags &= ~(B_DONE);
+			bp->b_oflags &= ~(BO_DONE);
 			bp->b_flags |= B_READ;
 			bp->b_blkno = lp->d_secperunit - lp->d_nsectors + i;
 			if (lp->d_secsize > DEV_BSIZE)
@@ -216,7 +216,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp,
 	dlp = (struct disklabel *)((char *)bp->b_data + LABELOFFSET);
 	*dlp = *lp;     /* struct assignment */
 
-	bp->b_flags &= ~(B_READ|B_DONE);
+	bp->b_oflags &= ~(BO_DONE);
+	bp->b_flags &= ~(B_READ);
 	bp->b_flags |= B_WRITE;
 	(*strat)(bp);
 	error = biowait(bp);
diff --git a/sys/arch/evbppc/evbppc/disksubr.c b/sys/arch/evbppc/evbppc/disksubr.c
index dbbfbd66d033..ffd0604e67e6 100644
--- a/sys/arch/evbppc/evbppc/disksubr.c
+++ b/sys/arch/evbppc/evbppc/disksubr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: disksubr.c,v 1.15 2007/10/17 19:54:17 garbled Exp $	*/
+/*	$NetBSD: disksubr.c,v 1.16 2008/01/02 11:48:24 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.15 2007/10/17 19:54:17 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.16 2008/01/02 11:48:24 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -121,7 +121,7 @@ readdisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp,
 		i = 0;
 		do {
 			/* read a bad sector table */
-			bp->b_flags &= ~(B_DONE);
+			bp->b_oflags &= ~BO_DONE;
 			bp->b_flags |= B_READ;
 			bp->b_blkno = lp->d_secperunit - lp->d_nsectors + i;
 			if (lp->d_secsize > DEV_BSIZE)
@@ -246,7 +246,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp,
 		if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
 		    dkcksum(dlp) == 0) {
 			*dlp = *lp;
-			bp->b_flags = B_BUSY | B_WRITE;
+			bp->b_cflags = BC_BUSY;
+			bp->b_flags = B_WRITE;
 			(*strat)(bp);
 			error = biowait(bp);
 			goto done;
diff --git a/sys/arch/ews4800mips/ews4800mips/sector.c b/sys/arch/ews4800mips/ews4800mips/sector.c
index f7e7040f5a6f..d70d5bab57d1 100644
--- a/sys/arch/ews4800mips/ews4800mips/sector.c
+++ b/sys/arch/ews4800mips/ews4800mips/sector.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: sector.c,v 1.5 2007/10/17 19:54:21 garbled Exp $	*/
+/*	$NetBSD: sector.c,v 1.6 2008/01/02 11:48:24 ad Exp $	*/
 
 /*-
  * Copyright (c) 2004 The NetBSD Foundation, Inc.
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sector.c,v 1.5 2007/10/17 19:54:21 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: sector.c,v 1.6 2008/01/02 11:48:24 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -98,7 +98,7 @@ sector_read(void *self, uint8_t *buf, daddr_t sector)
 	b->b_blkno = sector;
 	b->b_cylinder = sector / 100;
 	b->b_bcount = DEV_BSIZE;
-	b->b_flags &= ~(B_DONE);
+	b->b_oflags &= ~(BO_DONE);
 	b->b_flags |= B_READ;
 	rw->strategy(b);
 
@@ -134,7 +134,8 @@ sector_write(void *self, uint8_t *buf, daddr_t sector)
 	b->b_blkno = sector;
 	b->b_cylinder = sector / 100;
 	b->b_bcount = DEV_BSIZE;
-	b->b_flags &= ~(B_READ | B_DONE);
+	b->b_flags &= ~(B_READ);
+	b->b_oflags &= ~(BO_DONE);
 	b->b_flags |= B_WRITE;
 	memcpy(b->b_data, buf, DEV_BSIZE);
 	rw->strategy(b);
diff --git a/sys/arch/hp300/dev/ct.c b/sys/arch/hp300/dev/ct.c
index 3c5ecc07763d..eec45a7c8c36 100644
--- a/sys/arch/hp300/dev/ct.c
+++ b/sys/arch/hp300/dev/ct.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ct.c,v 1.51 2007/10/17 19:54:22 garbled Exp $	*/
+/*	$NetBSD: ct.c,v 1.52 2008/01/02 11:48:24 ad Exp $	*/
 
 /*-
  * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc.
@@ -82,7 +82,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ct.c,v 1.51 2007/10/17 19:54:22 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ct.c,v 1.52 2008/01/02 11:48:24 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -468,7 +468,7 @@ ctcommand(dev_t dev, int cmd, int cnt)
 	}
 
 	while (cnt-- > 0) {
-		bp->b_flags = B_BUSY;
+		bp->b_cflags = BC_BUSY;
 		if (cmd == MTBSF) {
 			sc->sc_blkno = sc->sc_eofs[sc->sc_eofp];
 			sc->sc_eofp--;
diff --git a/sys/arch/hp300/dev/mt.c b/sys/arch/hp300/dev/mt.c
index a548fd214670..ea85e8274b4f 100644
--- a/sys/arch/hp300/dev/mt.c
+++ b/sys/arch/hp300/dev/mt.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: mt.c,v 1.40 2007/10/17 19:54:23 garbled Exp $	*/
+/*	$NetBSD: mt.c,v 1.41 2008/01/02 11:48:25 ad Exp $	*/
 
 /*-
  * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc.
@@ -67,7 +67,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: mt.c,v 1.40 2007/10/17 19:54:23 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: mt.c,v 1.41 2008/01/02 11:48:25 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -435,13 +435,14 @@ mtcommand(dev_t dev, int cmd, int cnt)
 	int error = 0;
 
 #if 1
-	if (bp->b_flags & B_BUSY)
+	if (bp->b_cflags & BC_BUSY)
 		return EBUSY;
 #endif
 	bp->b_cmd = cmd;
 	bp->b_dev = dev;
 	do {
-		bp->b_flags = B_BUSY | B_CMD;
+		bp->b_cflags = BC_BUSY;
+		bp->b_flags = B_CMD;
 		mtstrategy(bp);
 		biowait(bp);
 		if (bp->b_error != 0) {
@@ -450,9 +451,9 @@ mtcommand(dev_t dev, int cmd, int cnt)
 		}
 	} while (--cnt > 0);
 #if 0
-	bp->b_flags = 0 /*&= ~B_BUSY*/;
+	bp->b_flags = 0 /*&= ~BC_BUSY*/;
 #else
-	bp->b_flags &= ~B_BUSY;
+	bp->b_flags &= ~BC_BUSY;
 #endif
 	return error;
 }
diff --git a/sys/arch/hp300/hp300/disksubr.c b/sys/arch/hp300/hp300/disksubr.c
index f480b0945d6c..13a019b9972b 100644
--- a/sys/arch/hp300/hp300/disksubr.c
+++ b/sys/arch/hp300/hp300/disksubr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: disksubr.c,v 1.26 2007/10/17 19:54:23 garbled Exp $	*/
+/*	$NetBSD: disksubr.c,v 1.27 2008/01/02 11:48:25 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1988, 1993
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.26 2007/10/17 19:54:23 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.27 2008/01/02 11:48:25 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -179,7 +179,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp,
 		if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
 		    dkcksum(dlp) == 0) {
 			*dlp = *lp;
-			bp->b_flags &= ~(B_READ|B_DONE);
+			bp->b_oflags &= ~(BO_DONE);
+			bp->b_flags &= ~(B_READ);
 			bp->b_flags |= B_WRITE;
 			(*strat)(bp);
 			error = biowait(bp);
diff --git a/sys/arch/hp700/hp700/disksubr.c b/sys/arch/hp700/hp700/disksubr.c
index 17b1d6a594b1..22f54842475a 100644
--- a/sys/arch/hp700/hp700/disksubr.c
+++ b/sys/arch/hp700/hp700/disksubr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: disksubr.c,v 1.23 2007/10/17 19:54:26 garbled Exp $	*/
+/*	$NetBSD: disksubr.c,v 1.24 2008/01/02 11:48:25 ad Exp $	*/
 
 /*	$OpenBSD: disksubr.c,v 1.6 2000/10/18 21:00:34 mickey Exp $	*/
 
@@ -68,7 +68,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.23 2007/10/17 19:54:26 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.24 2008/01/02 11:48:25 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -101,7 +101,8 @@ readbsdlabel(struct buf *bp, void (*strat)(struct buf *), int cyl, int sec,
 	bp->b_blkno = sec;
 	bp->b_cylinder = cyl;
 	bp->b_bcount = lp->d_secsize;
-	bp->b_flags = B_BUSY | B_READ;
+	bp->b_cflags = BC_BUSY;
+	bp->b_flags = B_READ;
 	(*strat)(bp);
 
 	/* if successful, locate disk label within block and validate */
@@ -208,7 +209,8 @@ readliflabel(struct buf *bp, void (*strat)(struct buf *), struct disklabel *lp,
 	/* read LIF volume header */
 	bp->b_blkno = btodb(HP700_LIF_VOLSTART);
 	bp->b_bcount = lp->d_secsize;
-	bp->b_flags = B_BUSY | B_READ;
+	bp->b_cflags = BC_BUSY;
+	bp->b_flags = B_READ;
 	bp->b_cylinder = btodb(HP700_LIF_VOLSTART) / lp->d_secpercyl;
 	(*strat)(bp);
 
@@ -231,7 +233,8 @@ readliflabel(struct buf *bp, void (*strat)(struct buf *), struct disklabel *lp,
 		/* read LIF directory */
 		dbp->b_blkno = btodb(HP700_LIF_DIRSTART);
 		dbp->b_bcount = lp->d_secsize;
-		dbp->b_flags = B_BUSY | B_READ;
+		dbp->b_cflags = BC_BUSY;
+		dbp->b_flags = B_READ;
 		dbp->b_cylinder = (HP700_LIF_DIRSTART) / lp->d_secpercyl;
 
 		(*strat)(dbp);
@@ -367,7 +370,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp,
 
 	*(struct disklabel *)((char *)bp->b_data + labeloffset) = *lp;
 
-	bp->b_flags = B_BUSY | B_WRITE;
+	bp->b_cflags = BC_BUSY;
+	bp->b_flags = B_WRITE;
 	(*strat)(bp);
 	error = biowait(bp);
 
diff --git a/sys/arch/hpc/hpc/disksubr.c b/sys/arch/hpc/hpc/disksubr.c
index 8a1573740672..315b7c6e5d73 100644
--- a/sys/arch/hpc/hpc/disksubr.c
+++ b/sys/arch/hpc/hpc/disksubr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: disksubr.c,v 1.19 2007/10/17 19:54:27 garbled Exp $	*/
+/*	$NetBSD: disksubr.c,v 1.20 2008/01/02 11:48:25 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.19 2007/10/17 19:54:27 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.20 2008/01/02 11:48:25 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -239,7 +239,7 @@ nombrpart:
 	bp->b_blkno = dospartoff + LABELSECTOR;
 	bp->b_cylinder = cyl;
 	bp->b_bcount = lp->d_secsize;
-	bp->b_flags &= ~(B_DONE);
+	bp->b_oflags &= ~(BO_DONE);
 	bp->b_flags |= B_READ;
 	(*strat)(bp);
 
@@ -276,7 +276,7 @@ nombrpart:
 		i = 0;
 		do {
 			/* read a bad sector table */
-			bp->b_flags &= ~(B_DONE);
+			bp->b_oflags &= ~(BO_DONE);
 			bp->b_flags |= B_READ;
 			bp->b_blkno = lp->d_secperunit - lp->d_nsectors + i;
 			if (lp->d_secsize > DEV_BSIZE)
@@ -424,7 +424,7 @@ nombrpart:
 	bp->b_blkno = dospartoff + LABELSECTOR;
 	bp->b_cylinder = cyl;
 	bp->b_bcount = lp->d_secsize;
-	bp->b_flags &= ~(B_DONE);
+	bp->b_oflags &= ~(BO_DONE);
 	bp->b_flags |= B_READ;
 	(*strat)(bp);
 
@@ -438,7 +438,8 @@ nombrpart:
 		if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
 		    dkcksum(dlp) == 0) {
 			*dlp = *lp;
-			bp->b_flags &= ~(B_READ|B_DONE);
+			bp->b_oflags &= ~(BO_DONE);
+			bp->b_flags &= ~(B_READ);
 			bp->b_flags |= B_WRITE;
 			(*strat)(bp);
 			error = biowait(bp);
diff --git a/sys/arch/i386/i386/trap.c b/sys/arch/i386/i386/trap.c
index f45070300c22..5ec7041dda7f 100644
--- a/sys/arch/i386/i386/trap.c
+++ b/sys/arch/i386/i386/trap.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: trap.c,v 1.230 2008/01/01 21:28:40 yamt Exp $	*/
+/*	$NetBSD: trap.c,v 1.231 2008/01/02 11:48:25 ad Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2000, 2005 The NetBSD Foundation, Inc.
@@ -75,7 +75,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.230 2008/01/01 21:28:40 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.231 2008/01/02 11:48:25 ad Exp $");
 
 #include "opt_ddb.h"
 #include "opt_kgdb.h"
@@ -472,25 +472,21 @@ copyfault:
 		return;
 
 	case T_PROTFLT|T_USER:		/* protection fault */
-		KERNEL_LOCK(1, l);
 #ifdef VM86
 		if (frame->tf_eflags & PSL_VM) {
 			vm86_gpfault(l, type & ~T_USER);
-			KERNEL_UNLOCK_LAST(l);
 			goto out;
 		}
 #endif
 		/* If pmap_exec_fixup does something, let's retry the trap. */
 		if (pmap_exec_fixup(&p->p_vmspace->vm_map, frame,
 		    &l->l_addr->u_pcb)) {
-			KERNEL_UNLOCK_LAST(l);
 			goto out;
 		}
 		KSI_INIT_TRAP(&ksi);
 		ksi.ksi_signo = SIGSEGV;
 		ksi.ksi_addr = (void *)rcr2();
 		ksi.ksi_code = SEGV_ACCERR;
-		KERNEL_UNLOCK_LAST(l);
 		goto trapsignal;
 
 	case T_TSSFLT|T_USER:
@@ -621,7 +617,6 @@ copyfault:
 #endif /* defined(XEN) && !defined(XEN3) */
 
 		cr2 = FETCH_CR2;
-		KERNEL_LOCK(1, NULL);
 		goto faultcommon;
 
 	case T_PAGEFLT|T_USER: {	/* page fault */
@@ -632,7 +627,6 @@ copyfault:
 		extern struct vm_map *kernel_map;
 
 		cr2 = FETCH_CR2;
-		KERNEL_LOCK(1, l);
 	faultcommon:
 		vm = p->p_vmspace;
 		if (vm == NULL)
@@ -673,8 +667,6 @@ copyfault:
 				uvm_grow(p, va);
 
 			if (type == T_PAGEFLT) {
-				KERNEL_UNLOCK_ONE(NULL);
-
 				/*
 				 * we need to switch pmap now if we're in
 				 * the middle of copyin/out.
@@ -692,7 +684,6 @@ copyfault:
 				}
 				return;
 			}
-			KERNEL_UNLOCK_LAST(l);
 			goto out;
 		}
 		KSI_INIT_TRAP(&ksi);
@@ -707,10 +698,8 @@ copyfault:
 
 		if (type == T_PAGEFLT) {
 			onfault = onfault_handler(pcb, frame);
-			if (onfault != NULL) {
-				KERNEL_UNLOCK_ONE(NULL);
+			if (onfault != NULL)
 				goto copyfault;
-			}
 			printf("uvm_fault(%p, %#lx, %d) -> %#x\n",
 			    map, va, ftype, error);
 			goto we_re_toast;
@@ -725,11 +714,6 @@ copyfault:
 			ksi.ksi_signo = SIGSEGV;
 		}
 		(*p->p_emul->e_trapsignal)(l, &ksi);
-		if (type != T_PAGEFLT) {
-			KERNEL_UNLOCK_LAST(l);
-		} else {
-			KERNEL_UNLOCK_ONE(NULL);
-		}
 		break;
 	}
 
@@ -761,9 +745,7 @@ copyfault:
 			else
 				ksi.ksi_code = TRAP_TRACE;
 			ksi.ksi_addr = (void *)frame->tf_eip;
-			KERNEL_LOCK(1, l);
 			(*p->p_emul->e_trapsignal)(l, &ksi);
-			KERNEL_UNLOCK_LAST(l);
 		}
 		break;
 
@@ -811,9 +793,7 @@ out:
 	return;
 trapsignal:
 	ksi.ksi_trap = type & ~T_USER;
-	KERNEL_LOCK(1, l);
 	(*p->p_emul->e_trapsignal)(l, &ksi);
-	KERNEL_UNLOCK_LAST(l);
 	userret(l);
 }
 
@@ -835,7 +815,5 @@ startlwp(arg)
 	}
 #endif
 	pool_put(&lwp_uc_pool, uc);
-
-	KERNEL_UNLOCK_LAST(l);
 	userret(l);
 }
diff --git a/sys/arch/mac68k/mac68k/disksubr.c b/sys/arch/mac68k/mac68k/disksubr.c
index ebe1ced20868..ad12a6f8626e 100644
--- a/sys/arch/mac68k/mac68k/disksubr.c
+++ b/sys/arch/mac68k/mac68k/disksubr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: disksubr.c,v 1.56 2007/10/17 19:55:14 garbled Exp $	*/
+/*	$NetBSD: disksubr.c,v 1.57 2008/01/02 11:48:26 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
@@ -65,7 +65,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.56 2007/10/17 19:55:14 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.57 2008/01/02 11:48:26 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -553,7 +553,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp,
 		if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
 		    dkcksum(dlp) == 0) {
 			*dlp = *lp;
-			bp->b_flags &= ~(B_READ|B_DONE);
+			bp->b_oflags &= ~(BO_DONE);
+			bp->b_flags &= ~(B_READ);
 			bp->b_flags |= B_WRITE;
 			(*strat)(bp);
 			error = biowait(bp);
diff --git a/sys/arch/macppc/macppc/disksubr.c b/sys/arch/macppc/macppc/disksubr.c
index bc5c46808662..591714abcea3 100644
--- a/sys/arch/macppc/macppc/disksubr.c
+++ b/sys/arch/macppc/macppc/disksubr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: disksubr.c,v 1.42 2007/10/17 19:55:32 garbled Exp $	*/
+/*	$NetBSD: disksubr.c,v 1.43 2008/01/02 11:48:26 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
@@ -106,7 +106,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.42 2007/10/17 19:55:32 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.43 2008/01/02 11:48:26 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -695,8 +695,9 @@ writedisklabel(dev, strat, lp, osdep)
 	if (error != 0)
 		goto done;
 
-	bp->b_flags &= ~(B_READ|B_DONE);
+	bp->b_flags &= ~B_READ;
 	bp->b_flags |= B_WRITE;
+	bp->b_oflags &= ~BO_DONE;
 
 	memcpy((char *)bp->b_data + osdep->cd_labeloffset, (void *)lp,
 	    sizeof *lp);
diff --git a/sys/arch/mipsco/mipsco/disksubr.c b/sys/arch/mipsco/mipsco/disksubr.c
index 10007b55fe21..cdd1fcb616bf 100644
--- a/sys/arch/mipsco/mipsco/disksubr.c
+++ b/sys/arch/mipsco/mipsco/disksubr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: disksubr.c,v 1.22 2007/12/24 15:06:38 ad Exp $	*/
+/*	$NetBSD: disksubr.c,v 1.23 2008/01/02 11:48:26 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.22 2007/12/24 15:06:38 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.23 2008/01/02 11:48:26 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -226,7 +226,8 @@ writedisklabel(dev, strat, lp, clp)
 		goto ioerror;
 
 	/* Write MIPS RISC/os label to first sector */
-	bp->b_flags &= ~(B_READ|B_DONE);
+	bp->b_flags &= ~(B_READ);
+	bp->b_oflags &= ~(BO_DONE);
 	bp->b_flags |= B_WRITE;
 	(*strat)(bp);
 	if ((error = biowait(bp)) != 0)
@@ -238,7 +239,8 @@ writedisklabel(dev, strat, lp, clp)
 	bp->b_blkno = LABELSECTOR;
 	bp->b_bcount = lp->d_secsize;
 	bp->b_cylinder = bp->b_blkno / lp->d_secpercyl;
-	bp->b_flags &= ~(B_READ | B_DONE);
+	bp->b_flags &= ~(B_READ);
+	bp->b_oflags &= ~(BO_DONE);
 	bp->b_flags |= B_WRITE;
 	(*strat)(bp);
 	error = biowait(bp);
diff --git a/sys/arch/news68k/news68k/disksubr.c b/sys/arch/news68k/news68k/disksubr.c
index f33077194287..bd2c31db7752 100644
--- a/sys/arch/news68k/news68k/disksubr.c
+++ b/sys/arch/news68k/news68k/disksubr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: disksubr.c,v 1.30 2007/10/17 19:55:53 garbled Exp $	*/
+/*	$NetBSD: disksubr.c,v 1.31 2008/01/02 11:48:26 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.30 2007/10/17 19:55:53 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.31 2008/01/02 11:48:26 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -178,7 +178,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp,
 		if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
 		    dkcksum(dlp) == 0) {
 			*dlp = *lp;
-			bp->b_flags &= ~(B_READ|B_DONE);
+			bp->b_oflags &= ~(BO_DONE);
+			bp->b_flags &= ~(B_READ);
 			bp->b_flags |= B_WRITE;
 			(*strat)(bp);
 			error = biowait(bp);
diff --git a/sys/arch/newsmips/newsmips/disksubr.c b/sys/arch/newsmips/newsmips/disksubr.c
index 263162137816..eac5526eb436 100644
--- a/sys/arch/newsmips/newsmips/disksubr.c
+++ b/sys/arch/newsmips/newsmips/disksubr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: disksubr.c,v 1.27 2007/10/17 19:55:55 garbled Exp $	*/
+/*	$NetBSD: disksubr.c,v 1.28 2008/01/02 11:48:26 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.27 2007/10/17 19:55:55 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.28 2008/01/02 11:48:26 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -178,7 +178,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp,
 		if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
 		    dkcksum(dlp) == 0) {
 			*dlp = *lp;
-			bp->b_flags &= ~(B_READ|B_DONE);
+			bp->b_flags &= ~(B_READ);
+			bp->b_oflags &= ~(BO_DONE);
 			bp->b_flags |= B_WRITE;
 			(*strat)(bp);
 			error = biowait(bp);
diff --git a/sys/arch/playstation2/playstation2/disksubr.c b/sys/arch/playstation2/playstation2/disksubr.c
index 675b9232e66a..ad28a68592c6 100644
--- a/sys/arch/playstation2/playstation2/disksubr.c
+++ b/sys/arch/playstation2/playstation2/disksubr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: disksubr.c,v 1.15 2007/10/17 19:56:14 garbled Exp $	*/
+/*	$NetBSD: disksubr.c,v 1.16 2008/01/02 11:48:26 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.15 2007/10/17 19:56:14 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.16 2008/01/02 11:48:26 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -188,7 +188,7 @@ nombrpart:
 	bp->b_blkno = dospartoff + LABELSECTOR;
 	bp->b_cylinder = cyl;
 	bp->b_bcount = lp->d_secsize;
-	bp->b_flags &= ~(B_DONE);
+	bp->b_oflags &= ~(BO_DONE);
 	bp->b_flags |= B_READ;
 	(*strat)(bp);
 
@@ -224,7 +224,7 @@ nombrpart:
 		i = 0;
 		do {
 			/* read a bad sector table */
-			bp->b_flags &= ~(B_DONE);
+			bp->b_oflags &= ~(BO_DONE);
 			bp->b_flags |= B_READ;
 			bp->b_blkno = lp->d_secperunit - lp->d_nsectors + i;
 			if (lp->d_secsize > DEV_BSIZE)
@@ -363,7 +363,7 @@ nombrpart:
 	bp->b_blkno = dospartoff + LABELSECTOR;
 	bp->b_cylinder = cyl;
 	bp->b_bcount = lp->d_secsize;
-	bp->b_flags &= ~(B_DONE);
+	bp->b_oflags &= ~(BO_DONE);
 	bp->b_flags |= B_READ;
 	(*strat)(bp);
 
@@ -376,7 +376,8 @@ nombrpart:
 		if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
 		    dkcksum(dlp) == 0) {
 			*dlp = *lp;
-			bp->b_flags &= ~(B_READ|B_DONE);
+			bp->b_oflags &= ~(BO_DONE);
+			bp->b_flags &= ~(B_READ);
 			bp->b_flags |= B_WRITE;
 			(*strat)(bp);
 			error = biowait(bp);
diff --git a/sys/arch/pmax/pmax/disksubr.c b/sys/arch/pmax/pmax/disksubr.c
index a0f4c0c29128..86a37de5f238 100644
--- a/sys/arch/pmax/pmax/disksubr.c
+++ b/sys/arch/pmax/pmax/disksubr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: disksubr.c,v 1.47 2007/10/17 19:56:15 garbled Exp $	*/
+/*	$NetBSD: disksubr.c,v 1.48 2008/01/02 11:48:27 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.47 2007/10/17 19:56:15 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.48 2008/01/02 11:48:27 ad Exp $");
 
 #include "opt_compat_ultrix.h"
 
@@ -281,7 +281,8 @@ writedisklabel(dev, strat, lp, osdep)
 		if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
 		    dkcksum(dlp) == 0) {
 			*dlp = *lp;
-			bp->b_flags &= ~(B_READ|B_DONE);
+			bp->b_oflags &= ~(BO_DONE);
+			bp->b_flags &= ~(B_READ);
 			bp->b_flags |= B_WRITE;
 			(*strat)(bp);
 			error = biowait(bp);
diff --git a/sys/arch/powerpc/ibm4xx/intr.c b/sys/arch/powerpc/ibm4xx/intr.c
index 82ec42977d13..db11b4bfc957 100644
--- a/sys/arch/powerpc/ibm4xx/intr.c
+++ b/sys/arch/powerpc/ibm4xx/intr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: intr.c,v 1.19 2007/12/03 15:34:11 ad Exp $	*/
+/*	$NetBSD: intr.c,v 1.20 2008/01/02 11:48:27 ad Exp $	*/
 
 /*
  * Copyright 2002 Wasabi Systems, Inc.
@@ -36,7 +36,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.19 2007/12/03 15:34:11 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.20 2008/01/02 11:48:27 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/malloc.h>
@@ -256,13 +256,15 @@ ext_intr(void)
 				disable_irq(i);
 			wrteei(1);
 
-			KERNEL_LOCK(1, NULL);
 			ih = intrs[i].is_head;
 			while (ih) {
+				if (ih->ih_level == IPL_VM)
+					KERNEL_LOCK(1, NULL);
 				(*ih->ih_fun)(ih->ih_arg);
 				ih = ih->ih_next;
+				if (ih->ih_level == IPL_VM)
+					KERNEL_UNLOCK_ONE(NULL);
 			}
-			KERNEL_UNLOCK_ONE(NULL);
 
 			mtmsr(msr);
 			if (intrs[i].is_type == IST_LEVEL)
diff --git a/sys/arch/powerpc/ibm4xx/trap.c b/sys/arch/powerpc/ibm4xx/trap.c
index 04c47ee3f67d..a6080915e91a 100644
--- a/sys/arch/powerpc/ibm4xx/trap.c
+++ b/sys/arch/powerpc/ibm4xx/trap.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: trap.c,v 1.47 2007/11/28 12:22:28 simonb Exp $	*/
+/*	$NetBSD: trap.c,v 1.48 2008/01/02 11:48:27 ad Exp $	*/
 
 /*
  * Copyright 2001 Wasabi Systems, Inc.
@@ -67,7 +67,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.47 2007/11/28 12:22:28 simonb Exp $");
+__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.48 2008/01/02 11:48:27 ad Exp $");
 
 #include "opt_altivec.h"
 #include "opt_ddb.h"
@@ -166,9 +166,7 @@ trap(struct trapframe *frame)
 		ksi.ksi_signo = SIGTRAP;
 		ksi.ksi_trap = EXC_TRC;
 		ksi.ksi_addr = (void *)frame->srr0;
-		KERNEL_LOCK(1, l);
 		trapsignal(l, &ksi);
-		KERNEL_UNLOCK_LAST(l);
 		break;
 
 	/*
@@ -183,7 +181,6 @@ trap(struct trapframe *frame)
 			vaddr_t va;
 			struct faultbuf *fb = NULL;
 
-			KERNEL_LOCK(1, NULL);
 			va = frame->dar;
 			if (frame->tf_xtra[TF_PID] == KERNEL_PID) {
 				map = kernel_map;
@@ -200,7 +197,6 @@ trap(struct trapframe *frame)
 			    (ftype & VM_PROT_WRITE) ? "write" : "read",
 			    (void *)va, frame->tf_xtra[TF_ESR]));
 			rv = uvm_fault(map, trunc_page(va), ftype);
-			KERNEL_UNLOCK_ONE(NULL);
 			if (rv == 0)
 				goto done;
 			if ((fb = l->l_addr->u_pcb.pcb_onfault) != NULL) {
@@ -221,8 +217,6 @@ trap(struct trapframe *frame)
 	case EXC_DSI|EXC_USER:
 		/* FALLTHROUGH */
 	case EXC_DTMISS|EXC_USER:
-		KERNEL_LOCK(1, l);
-
 		if (frame->tf_xtra[TF_ESR] & (ESR_DST|ESR_DIZ))
 			ftype = VM_PROT_WRITE;
 
@@ -234,7 +228,6 @@ trap(struct trapframe *frame)
 		rv = uvm_fault(&p->p_vmspace->vm_map, trunc_page(frame->dar),
 		    ftype);
 		if (rv == 0) {
-			KERNEL_UNLOCK_LAST(l);
 			break;
 		}
 		KSI_INIT_TRAP(&ksi);
@@ -250,12 +243,10 @@ trap(struct trapframe *frame)
 			ksi.ksi_signo = SIGKILL;
 		}
 		trapsignal(l, &ksi);
-		KERNEL_UNLOCK_LAST(l);
 		break;
 
 	case EXC_ITMISS|EXC_USER:
 	case EXC_ISI|EXC_USER:
-		KERNEL_LOCK(1, l);
 		ftype = VM_PROT_EXECUTE;
 		DBPRINTF(TDB_ALL,
 		    ("trap(EXC_ISI|EXC_USER) at %lx execute fault tf %p\n",
@@ -263,7 +254,6 @@ trap(struct trapframe *frame)
 		rv = uvm_fault(&p->p_vmspace->vm_map, trunc_page(frame->srr0),
 		    ftype);
 		if (rv == 0) {
-			KERNEL_UNLOCK_LAST(l);
 			break;
 		}
 		KSI_INIT_TRAP(&ksi);
@@ -272,7 +262,6 @@ trap(struct trapframe *frame)
 		ksi.ksi_addr = (void *)frame->srr0;
 		ksi.ksi_code = (rv == EACCES ? SEGV_ACCERR : SEGV_MAPERR);
 		trapsignal(l, &ksi);
-		KERNEL_UNLOCK_LAST(l);
 		break;
 
 	case EXC_AST|EXC_USER:
@@ -289,7 +278,6 @@ trap(struct trapframe *frame)
 
 
 	case EXC_ALI|EXC_USER:
-		KERNEL_LOCK(1, l);
 		if (fix_unaligned(l, frame) != 0) {
 			KSI_INIT_TRAP(&ksi);
 			ksi.ksi_signo = SIGBUS;
@@ -298,7 +286,6 @@ trap(struct trapframe *frame)
 			trapsignal(l, &ksi);
 		} else
 			frame->srr0 += 4;
-		KERNEL_UNLOCK_LAST(l);
 		break;
 
 	case EXC_PGM|EXC_USER:
@@ -320,9 +307,7 @@ trap(struct trapframe *frame)
 			ksi.ksi_signo = rv;
 			ksi.ksi_trap = EXC_PGM;
 			ksi.ksi_addr = (void *)frame->srr0;
-			KERNEL_LOCK(1, l);
 			trapsignal(l, &ksi);
-			KERNEL_UNLOCK_LAST(l);
 		}
 		break;
 
diff --git a/sys/arch/powerpc/oea/pmap.c b/sys/arch/powerpc/oea/pmap.c
index 8ca34bb501cc..71ee52fe3116 100644
--- a/sys/arch/powerpc/oea/pmap.c
+++ b/sys/arch/powerpc/oea/pmap.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.c,v 1.49 2007/12/15 00:39:23 perry Exp $	*/
+/*	$NetBSD: pmap.c,v 1.50 2008/01/02 11:48:27 ad Exp $	*/
 /*-
  * Copyright (c) 2001 The NetBSD Foundation, Inc.
  * All rights reserved.
@@ -70,7 +70,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.49 2007/12/15 00:39:23 perry Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.50 2008/01/02 11:48:27 ad Exp $");
 
 #include "opt_ppcarch.h"
 #include "opt_altivec.h"
@@ -83,6 +83,7 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.49 2007/12/15 00:39:23 perry Exp $");
 #include <sys/queue.h>
 #include <sys/device.h>		/* for evcnt */
 #include <sys/systm.h>
+#include <sys/atomic.h>
 
 #if __NetBSD_Version__ < 105010000
 #include <vm/vm.h>
@@ -515,6 +516,8 @@ mfsrin(vaddr_t va)
 extern void mfmsr64 (register64_t *result);
 #endif /* PPC_OEA64_BRIDGE */
 
+#define	PMAP_LOCK()		KERNEL_LOCK(1, NULL)
+#define	PMAP_UNLOCK()		KERNEL_UNLOCK_ONE(NULL)
 
 static inline register_t
 pmap_interrupts_off(void)
@@ -942,6 +945,8 @@ pmap_pte_spill(struct pmap *pm, vaddr_t addr, bool exec)
 	volatile struct pteg *pteg;
 	volatile struct pte *pt;
 
+	PMAP_LOCK();
+
 	ptegidx = va_to_pteg(pm, addr);
 
 	/*
@@ -1015,6 +1020,7 @@ pmap_pte_spill(struct pmap *pm, vaddr_t addr, bool exec)
 					TAILQ_REMOVE(pvoh, pvo, pvo_olink);
 					TAILQ_INSERT_TAIL(pvoh, pvo, pvo_olink);
 				}
+				PMAP_UNLOCK();
 				return 1;
 			}
 			source_pvo = pvo;
@@ -1040,6 +1046,7 @@ pmap_pte_spill(struct pmap *pm, vaddr_t addr, bool exec)
 
 	if (source_pvo == NULL) {
 		PMAPCOUNT(ptes_unspilled);
+		PMAP_UNLOCK();
 		return 0;
 	}
 
@@ -1112,6 +1119,8 @@ pmap_pte_spill(struct pmap *pm, vaddr_t addr, bool exec)
 
 	PMAP_PVO_CHECK(victim_pvo);
 	PMAP_PVO_CHECK(source_pvo);
+
+	PMAP_UNLOCK();
 	return 1;
 }
 
@@ -1228,6 +1237,7 @@ pmap_pinit(pmap_t pm)
 	 * Allocate some segment registers for this pmap.
 	 */
 	pm->pm_refs = 1;
+	PMAP_LOCK();
 	for (i = 0; i < NPMAPS; i += VSID_NBPW) {
 		static register_t pmap_vsidcontext;
 		register_t hash;
@@ -1267,8 +1277,10 @@ pmap_pinit(pmap_t pm)
 			pm->pm_sr[i] = VSID_MAKE(i, hash) | SR_PRKEY |
 			    SR_NOEXEC;
 #endif
+		PMAP_UNLOCK();
 		return;
 	}
+	PMAP_UNLOCK();
 	panic("pmap_pinit: out of segments");
 }
 
@@ -1278,7 +1290,7 @@ pmap_pinit(pmap_t pm)
 void
 pmap_reference(pmap_t pm)
 {
-	pm->pm_refs++;
+	atomic_inc_uint(&pm->pm_refs);
 }
 
 /*
@@ -1288,7 +1300,7 @@ pmap_reference(pmap_t pm)
 void
 pmap_destroy(pmap_t pm)
 {
-	if (--pm->pm_refs == 0) {
+	if (atomic_dec_uint_nv(&pm->pm_refs) == 0) {
 		pmap_release(pm);
 		pool_put(&pmap_pool, pm);
 	}
@@ -1306,6 +1318,7 @@ pmap_release(pmap_t pm)
 	KASSERT(pm->pm_stats.resident_count == 0);
 	KASSERT(pm->pm_stats.wired_count == 0);
 	
+	PMAP_LOCK();
 	if (pm->pm_sr[0] == 0)
 		panic("pmap_release");
 	idx = pm->pm_vsid & (NPMAPS-1);
@@ -1314,6 +1327,7 @@ pmap_release(pmap_t pm)
 
 	KASSERT(pmap_vsid_bitmap[idx] & mask);
 	pmap_vsid_bitmap[idx] &= ~mask;
+	PMAP_UNLOCK();
 }
 
 /*
@@ -1471,6 +1485,8 @@ pmap_pvo_check(const struct pvo_entry *pvo)
 	volatile struct pte *pt;
 	int failed = 0;
 
+	PMAP_LOCK();
+
 	if ((uintptr_t)(pvo+1) >= SEGMENT_LENGTH)
 		panic("pmap_pvo_check: pvo %p: invalid address", pvo);
 
@@ -1563,6 +1579,8 @@ pmap_pvo_check(const struct pvo_entry *pvo)
 	if (failed)
 		panic("pmap_pvo_check: pvo %p, pm %p: bugcheck!", pvo,
 		    pvo->pvo_pmap);
+
+	PMAP_UNLOCK();
 }
 #endif /* DEBUG || PMAPCHECK */
 
@@ -1942,6 +1960,8 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
 	u_int pvo_flags;
 	u_int was_exec = 0;
 
+	PMAP_LOCK();
+
 	if (__predict_false(!pmap_initialized)) {
 		pvo_head = &pmap_pvo_kunmanaged;
 		pl = &pmap_upvo_pool;
@@ -2035,6 +2055,8 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
 
 	DPRINTFN(ENTER, (": error=%d\n", error));
 
+	PMAP_UNLOCK();
+
 	return error;
 }
 
@@ -2054,6 +2076,8 @@ pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)
 	DPRINTFN(KENTER,
 	    ("pmap_kenter_pa(%#lx,%#lx,%#x)\n", va, pa, prot));
 
+	PMAP_LOCK();
+
 	/*
 	 * Assume the page is cache inhibited and access is guarded unless
 	 * it's in our available memory array.  If it is in the memory array,
@@ -2083,6 +2107,8 @@ pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)
 	if (error != 0)
 		panic("pmap_kenter_pa: failed to enter va %#lx pa %#lx: %d",
 		      va, pa, error);
+
+	PMAP_UNLOCK();
 }
 
 void
@@ -2107,6 +2133,7 @@ pmap_remove(pmap_t pm, vaddr_t va, vaddr_t endva)
 	register_t msr;
 	int pteidx;
 
+	PMAP_LOCK();
 	LIST_INIT(&pvol);
 	msr = pmap_interrupts_off();
 	for (; va < endva; va += PAGE_SIZE) {
@@ -2117,6 +2144,7 @@ pmap_remove(pmap_t pm, vaddr_t va, vaddr_t endva)
 	}
 	pmap_interrupts_restore(msr);
 	pmap_pvo_free_list(&pvol);
+	PMAP_UNLOCK();
 }
 
 /*
@@ -2128,6 +2156,7 @@ pmap_extract(pmap_t pm, vaddr_t va, paddr_t *pap)
 	struct pvo_entry *pvo;
 	register_t msr;
 
+	PMAP_LOCK();
 
 	/*
 	 * If this is a kernel pmap lookup, also check the battable
@@ -2149,6 +2178,7 @@ pmap_extract(pmap_t pm, vaddr_t va, paddr_t *pap)
 				    (~(batu & BAT_BL) << 15) & ~0x1ffffL;
 				if (pap)
 					*pap = (batl & mask) | (va & ~mask);
+				PMAP_UNLOCK();
 				return true;
 			}
 		} else {
@@ -2161,14 +2191,17 @@ pmap_extract(pmap_t pm, vaddr_t va, paddr_t *pap)
 				    (~(batl & BAT601_BSM) << 17) & ~0x1ffffL;
 				if (pap)
 					*pap = (batl & mask) | (va & ~mask);
+				PMAP_UNLOCK();
 				return true;
 			} else if (SR601_VALID_P(sr) &&
 				   SR601_PA_MATCH_P(sr, va)) {
 				if (pap)
 					*pap = va;
+				PMAP_UNLOCK();
 				return true;
 			}
 		}
+		PMAP_UNLOCK();
 		return false;
 #elif defined (PPC_OEA64_BRIDGE)
 	panic("%s: pm: %s, va: 0x%08lx\n", __func__, 
@@ -2187,6 +2220,7 @@ pmap_extract(pmap_t pm, vaddr_t va, paddr_t *pap)
 			    | (va & ADDR_POFF);
 	}
 	pmap_interrupts_restore(msr);
+	PMAP_UNLOCK();
 	return pvo != NULL;
 }
 
@@ -2216,6 +2250,8 @@ pmap_protect(pmap_t pm, vaddr_t va, vaddr_t endva, vm_prot_t prot)
 		return;
 	}
 
+	PMAP_LOCK();
+
 	msr = pmap_interrupts_off();
 	for (; va < endva; va += PAGE_SIZE) {
 		pvo = pmap_pvo_find_va(pm, va, &pteidx);
@@ -2261,6 +2297,7 @@ pmap_protect(pmap_t pm, vaddr_t va, vaddr_t endva, vm_prot_t prot)
 		PMAP_PVO_CHECK(pvo);		/* sanity check */
 	}
 	pmap_interrupts_restore(msr);
+	PMAP_UNLOCK();
 }
 
 void
@@ -2269,6 +2306,7 @@ pmap_unwire(pmap_t pm, vaddr_t va)
 	struct pvo_entry *pvo;
 	register_t msr;
 
+	PMAP_LOCK();
 	msr = pmap_interrupts_off();
 	pvo = pmap_pvo_find_va(pm, va, NULL);
 	if (pvo != NULL) {
@@ -2279,6 +2317,7 @@ pmap_unwire(pmap_t pm, vaddr_t va)
 		PMAP_PVO_CHECK(pvo);		/* sanity check */
 	}
 	pmap_interrupts_restore(msr);
+	PMAP_UNLOCK();
 }
 
 /*
@@ -2292,6 +2331,8 @@ pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
 	volatile struct pte *pt;
 	register_t msr;
 
+	PMAP_LOCK();
+
 	KASSERT(prot != VM_PROT_ALL);
 	LIST_INIT(&pvol);
 	msr = pmap_interrupts_off();
@@ -2356,6 +2397,8 @@ pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
 	}
 	pmap_interrupts_restore(msr);
 	pmap_pvo_free_list(&pvol);
+
+	PMAP_UNLOCK();
 }
 
 /*
@@ -2401,8 +2444,12 @@ pmap_query_bit(struct vm_page *pg, int ptebit)
 	volatile struct pte *pt;
 	register_t msr;
 
-	if (pmap_attr_fetch(pg) & ptebit)
+	PMAP_LOCK();
+
+	if (pmap_attr_fetch(pg) & ptebit) {
+		PMAP_UNLOCK();
 		return true;
+	}
 
 	msr = pmap_interrupts_off();
 	LIST_FOREACH(pvo, vm_page_to_pvoh(pg), pvo_vlink) {
@@ -2415,6 +2462,7 @@ pmap_query_bit(struct vm_page *pg, int ptebit)
 			pmap_attr_save(pg, ptebit);
 			PMAP_PVO_CHECK(pvo);		/* sanity check */
 			pmap_interrupts_restore(msr);
+			PMAP_UNLOCK();
 			return true;
 		}
 	}
@@ -2438,11 +2486,13 @@ pmap_query_bit(struct vm_page *pg, int ptebit)
 				pmap_attr_save(pg, ptebit);
 				PMAP_PVO_CHECK(pvo);		/* sanity check */
 				pmap_interrupts_restore(msr);
+				PMAP_UNLOCK();
 				return true;
 			}
 		}
 	}
 	pmap_interrupts_restore(msr);
+	PMAP_UNLOCK();
 	return false;
 }
 
@@ -2455,6 +2505,7 @@ pmap_clear_bit(struct vm_page *pg, int ptebit)
 	register_t msr;
 	int rv = 0;
 
+	PMAP_LOCK();
 	msr = pmap_interrupts_off();
 
 	/*
@@ -2523,6 +2574,7 @@ pmap_clear_bit(struct vm_page *pg, int ptebit)
 			PMAPCOUNT(exec_synced_clear_modify);
 		}
 	}
+	PMAP_UNLOCK();
 	return (rv & ptebit) != 0;
 }
 
@@ -2533,6 +2585,7 @@ pmap_procwr(struct proc *p, vaddr_t va, size_t len)
 	size_t offset = va & ADDR_POFF;
 	int s;
 
+	PMAP_LOCK();
 	s = splvm();
 	while (len > 0) {
 		size_t seglen = PAGE_SIZE - offset;
@@ -2549,6 +2602,7 @@ pmap_procwr(struct proc *p, vaddr_t va, size_t len)
 		offset = 0;
 	}
 	splx(s);
+	PMAP_UNLOCK();
 }
 
 #if defined(DEBUG) || defined(PMAPCHECK) || defined(DDB)
@@ -2822,15 +2876,19 @@ pmap_pool_ualloc(struct pool *pp, int flags)
 {
 	struct pvo_page *pvop;
 
+	if (uvm.page_init_done != true) {
+		return (void *) uvm_pageboot_alloc(PAGE_SIZE);
+	}
+
+	PMAP_LOCK();
 	pvop = SIMPLEQ_FIRST(&pmap_upvop_head);
 	if (pvop != NULL) {
 		pmap_upvop_free--;
 		SIMPLEQ_REMOVE_HEAD(&pmap_upvop_head, pvop_link);
+		PMAP_UNLOCK();
 		return pvop;
 	}
-	if (uvm.page_init_done != true) {
-		return (void *) uvm_pageboot_alloc(PAGE_SIZE);
-	}
+	PMAP_UNLOCK();
 	return pmap_pool_malloc(pp, flags);
 }
 
@@ -2840,12 +2898,15 @@ pmap_pool_malloc(struct pool *pp, int flags)
 	struct pvo_page *pvop;
 	struct vm_page *pg;
 
+	PMAP_LOCK();
 	pvop = SIMPLEQ_FIRST(&pmap_mpvop_head);
 	if (pvop != NULL) {
 		pmap_mpvop_free--;
 		SIMPLEQ_REMOVE_HEAD(&pmap_mpvop_head, pvop_link);
+		PMAP_UNLOCK();
 		return pvop;
 	}
+	PMAP_UNLOCK();
  again:
 	pg = uvm_pagealloc_strat(NULL, 0, NULL, UVM_PGA_USERESERVE,
 	    UVM_PGA_STRAT_ONLY, VM_FREELIST_FIRST256);
@@ -2870,11 +2931,13 @@ pmap_pool_ufree(struct pool *pp, void *va)
 		return;
 	}
 #endif
+	PMAP_LOCK();
 	pvop = va;
 	SIMPLEQ_INSERT_HEAD(&pmap_upvop_head, pvop, pvop_link);
 	pmap_upvop_free++;
 	if (pmap_upvop_free > pmap_upvop_maxfree)
 		pmap_upvop_maxfree = pmap_upvop_free;
+	PMAP_UNLOCK();
 }
 
 void
@@ -2882,11 +2945,13 @@ pmap_pool_mfree(struct pool *pp, void *va)
 {
 	struct pvo_page *pvop;
 
+	PMAP_LOCK();
 	pvop = va;
 	SIMPLEQ_INSERT_HEAD(&pmap_mpvop_head, pvop, pvop_link);
 	pmap_mpvop_free++;
 	if (pmap_mpvop_free > pmap_mpvop_maxfree)
 		pmap_mpvop_maxfree = pmap_mpvop_free;
+	PMAP_UNLOCK();
 #if 0
 	uvm_pagefree(PHYS_TO_VM_PAGE((paddr_t) va));
 #endif
diff --git a/sys/arch/powerpc/powerpc/syscall.c b/sys/arch/powerpc/powerpc/syscall.c
index 19b54e89e389..3165a0818793 100644
--- a/sys/arch/powerpc/powerpc/syscall.c
+++ b/sys/arch/powerpc/powerpc/syscall.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: syscall.c,v 1.36 2007/11/05 20:43:04 ad Exp $	*/
+/*	$NetBSD: syscall.c,v 1.37 2008/01/02 11:48:27 ad Exp $	*/
 
 /*
  * Copyright (C) 2002 Matt Thomas
@@ -60,7 +60,7 @@
 #define EMULNAME(x)	(x)
 #define EMULNAMEU(x)	(x)
 
-__KERNEL_RCSID(0, "$NetBSD: syscall.c,v 1.36 2007/11/05 20:43:04 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: syscall.c,v 1.37 2008/01/02 11:48:27 ad Exp $");
 
 void
 child_return(void *arg)
@@ -135,11 +135,9 @@ EMULNAME(syscall_plain)(struct trapframe *frame)
 
 	if (argsize > n * sizeof(register_t)) {
 		memcpy(args, params, n * sizeof(register_t));
-		KERNEL_LOCK(1, l);
 		error = copyin(MOREARGS(frame->fixreg[1]),
 		       args + n,
 		       argsize - n * sizeof(register_t));
-		KERNEL_UNLOCK_LAST(l);
 		if (error)
 			goto bad;
 		params = args;
diff --git a/sys/arch/powerpc/powerpc/trap.c b/sys/arch/powerpc/powerpc/trap.c
index 51edc39e67f0..b71d93dfe651 100644
--- a/sys/arch/powerpc/powerpc/trap.c
+++ b/sys/arch/powerpc/powerpc/trap.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: trap.c,v 1.122 2007/10/24 14:50:39 ad Exp $	*/
+/*	$NetBSD: trap.c,v 1.123 2008/01/02 11:48:27 ad Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996 Wolfgang Solfrank.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.122 2007/10/24 14:50:39 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.123 2008/01/02 11:48:27 ad Exp $");
 
 #include "opt_altivec.h"
 #include "opt_ddb.h"
@@ -112,9 +112,7 @@ trap(struct trapframe *frame)
 			ksi.ksi_trap = EXC_TRC;
 			ksi.ksi_addr = (void *)frame->srr0;
 			ksi.ksi_code = TRAP_TRACE;
-			KERNEL_LOCK(1, l);
 			(*p->p_emul->e_trapsignal)(l, &ksi);
-			KERNEL_UNLOCK_LAST(l);
 		}
 		break;
 	case EXC_DSI: {
@@ -126,7 +124,6 @@ trap(struct trapframe *frame)
 		 * Only query UVM if no interrupts are active.
 		 */
 		if (ci->ci_intrdepth < 0) {
-			KERNEL_LOCK(1, NULL);
 			if ((va >> ADDR_SR_SHFT) == pcb->pcb_kmapsr) {
 				va &= ADDR_PIDX | ADDR_POFF;
 				va |= pcb->pcb_umapsr << ADDR_SR_SHFT;
@@ -137,8 +134,6 @@ trap(struct trapframe *frame)
 				    vm_map_pmap(map)->pm_ste_evictions > 0 &&
 				    pmap_ste_spill(vm_map_pmap(map),
 					    trunc_page(va), false)) {
-					/* KERNEL_UNLOCK_LAST(l); */
-					KERNEL_UNLOCK_ONE(NULL);
 					return;
 				}
 #endif
@@ -147,8 +142,6 @@ trap(struct trapframe *frame)
 				    vm_map_pmap(map)->pm_evictions > 0 &&
 				    pmap_pte_spill(vm_map_pmap(map),
 					    trunc_page(va), false)) {
-					/* KERNEL_UNLOCK_LAST(l); */
-					KERNEL_UNLOCK_ONE(NULL);
 					return;
 				}
 #if defined(DIAGNOSTIC) && (defined(PPC_OEA) || defined (PPC_OEA64_BRIDGE))
@@ -180,9 +173,7 @@ trap(struct trapframe *frame)
 				 */
 				if (rv == 0)
 					uvm_grow(p, trunc_page(va));
-				/* KERNEL_UNLOCK_LAST(l); */
 			}
-			KERNEL_UNLOCK_ONE(NULL);
 			if (rv == 0)
 				return;
 			if (rv == EACCES)
@@ -210,7 +201,6 @@ trap(struct trapframe *frame)
 		goto brain_damage2;
 	}
 	case EXC_DSI|EXC_USER:
-		KERNEL_LOCK(1, l);
 		ci->ci_ev_udsi.ev_count++;
 		if (frame->dsisr & DSISR_STORE)
 			ftype = VM_PROT_WRITE;
@@ -228,7 +218,6 @@ trap(struct trapframe *frame)
 		    vm_map_pmap(map)->pm_ste_evictions > 0 &&
 		    pmap_ste_spill(vm_map_pmap(map), trunc_page(frame->dar),
 				   false)) {
-			KERNEL_UNLOCK_LAST(l);
 			break;
 		}
 #endif
@@ -237,7 +226,6 @@ trap(struct trapframe *frame)
 		    vm_map_pmap(map)->pm_evictions > 0 &&
 		    pmap_pte_spill(vm_map_pmap(map), trunc_page(frame->dar),
 				   false)) {
-			KERNEL_UNLOCK_LAST(l);
 			break;
 		}
 
@@ -247,7 +235,6 @@ trap(struct trapframe *frame)
 			 * Record any stack growth...
 			 */
 			uvm_grow(p, trunc_page(frame->dar));
-			KERNEL_UNLOCK_LAST(l);
 			break;
 		}
 		ci->ci_ev_udsi_fatal.ev_count++;
@@ -273,7 +260,6 @@ trap(struct trapframe *frame)
 			ksi.ksi_signo = SIGKILL;
 		}
 		(*p->p_emul->e_trapsignal)(l, &ksi);
-		KERNEL_UNLOCK_LAST(l);
 		break;
 
 	case EXC_ISI:
@@ -284,7 +270,6 @@ trap(struct trapframe *frame)
 		goto brain_damage2;
 
 	case EXC_ISI|EXC_USER:
-		KERNEL_LOCK(1, l);
 		ci->ci_ev_isi.ev_count++;
 
 		/*
@@ -297,7 +282,6 @@ trap(struct trapframe *frame)
 		if (vm_map_pmap(map)->pm_ste_evictions > 0 &&
 		    pmap_ste_spill(vm_map_pmap(map), trunc_page(frame->srr0),
 				   true)) {
-			KERNEL_UNLOCK_LAST(l);
 			break;
 		}
 #endif
@@ -305,14 +289,12 @@ trap(struct trapframe *frame)
 		if (vm_map_pmap(map)->pm_evictions > 0 &&
 		    pmap_pte_spill(vm_map_pmap(map), trunc_page(frame->srr0),
 				   true)) {
-			KERNEL_UNLOCK_LAST(l);
 			break;
 		}
 
 		ftype = VM_PROT_EXECUTE;
 		rv = uvm_fault(map, trunc_page(frame->srr0), ftype);
 		if (rv == 0) {
-			KERNEL_UNLOCK_LAST(l);
 			break;
 		}
 		ci->ci_ev_isi_fatal.ev_count++;
@@ -327,7 +309,6 @@ trap(struct trapframe *frame)
 		ksi.ksi_addr = (void *)frame->srr0;
 		ksi.ksi_code = (rv == EACCES ? SEGV_ACCERR : SEGV_MAPERR);
 		(*p->p_emul->e_trapsignal)(l, &ksi);
-		KERNEL_UNLOCK_LAST(l);
 		break;
 
 	case EXC_FPU|EXC_USER:
@@ -340,7 +321,6 @@ trap(struct trapframe *frame)
 
 	case EXC_AST|EXC_USER:
 		ci->ci_astpending = 0;		/* we are about to do it */
-		KERNEL_LOCK(1, l);
 		uvmexp.softs++;
 		if (l->l_pflag & LP_OWEUPC) {
 			l->l_flag &= ~LP_OWEUPC;
@@ -349,11 +329,9 @@ trap(struct trapframe *frame)
 		/* Check whether we are being preempted. */
 		if (ci->ci_want_resched)
 			preempt();
-		KERNEL_UNLOCK_LAST(l);
 		break;
 
 	case EXC_ALI|EXC_USER:
-		KERNEL_LOCK(1, l);
 		ci->ci_ev_ali.ev_count++;
 		if (fix_unaligned(l, frame) != 0) {
 			ci->ci_ev_ali_fatal.ev_count++;
@@ -371,7 +349,6 @@ trap(struct trapframe *frame)
 			(*p->p_emul->e_trapsignal)(l, &ksi);
 		} else
 			frame->srr0 += 4;
-		KERNEL_UNLOCK_LAST(l);
 		break;
 
 	case EXC_PERF|EXC_USER:
@@ -384,7 +361,6 @@ trap(struct trapframe *frame)
 		enable_vec();
 		break;
 #else
-		KERNEL_LOCK(1, l);
 		if (cpu_printfataltraps) {
 			printf("trap: pid %d.%d (%s): user VEC trap @ %#lx "
 			    "(SRR1=%#lx)\n",
@@ -397,7 +373,6 @@ trap(struct trapframe *frame)
 		ksi.ksi_addr = (void *)frame->srr0;
 		ksi.ksi_code = ILL_ILLOPC;
 		(*p->p_emul->e_trapsignal)(l, &ksi);
-		KERNEL_UNLOCK_LAST(l);
 		break;
 #endif
 	case EXC_MCHK|EXC_USER:
@@ -412,15 +387,12 @@ trap(struct trapframe *frame)
 		ksi.ksi_trap = EXC_MCHK;
 		ksi.ksi_addr = (void *)frame->srr0;
 		ksi.ksi_code = BUS_OBJERR;
-		KERNEL_LOCK(1, l);
 		(*p->p_emul->e_trapsignal)(l, &ksi);
-		KERNEL_UNLOCK_LAST(l);
 		break;
 
 	case EXC_PGM|EXC_USER:
 		ci->ci_ev_pgm.ev_count++;
 		if (frame->srr1 & 0x00020000) {	/* Bit 14 is set if trap */
-			KERNEL_LOCK(1, l);
 			if (p->p_raslist == NULL ||
 			    ras_lookup(p, (void *)frame->srr0) == (void *) -1) {
 				KSI_INIT_TRAP(&ksi);
@@ -433,7 +405,6 @@ trap(struct trapframe *frame)
 				/* skip the trap instruction */
 				frame->srr0 += 4;
 			}
-			KERNEL_UNLOCK_LAST(l);
 		} else {
 			KSI_INIT_TRAP(&ksi);
 			ksi.ksi_signo = SIGILL;
@@ -454,9 +425,7 @@ trap(struct trapframe *frame)
 				printf("trap: pid %d.%d (%s): user PGM trap @"
 				    " %#lx (SRR1=%#lx)\n", p->p_pid, l->l_lid,
 				    p->p_comm, frame->srr0, frame->srr1);
-			KERNEL_LOCK(1, l);
 			(*p->p_emul->e_trapsignal)(l, &ksi);
-			KERNEL_UNLOCK_LAST(l);
 		}
 		break;
 
@@ -918,6 +887,5 @@ startlwp(void *arg)
 	}
 #endif
 	pool_put(&lwp_uc_pool, uc);
-	KERNEL_UNLOCK_LAST(l);
 	userret(l, frame);
 }
diff --git a/sys/arch/sandpoint/sandpoint/disksubr.c b/sys/arch/sandpoint/sandpoint/disksubr.c
index 15790ccec9cf..a68e531d346b 100644
--- a/sys/arch/sandpoint/sandpoint/disksubr.c
+++ b/sys/arch/sandpoint/sandpoint/disksubr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: disksubr.c,v 1.16 2007/10/17 19:56:59 garbled Exp $	*/
+/*	$NetBSD: disksubr.c,v 1.17 2008/01/02 11:48:28 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.16 2007/10/17 19:56:59 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.17 2008/01/02 11:48:28 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -177,7 +177,8 @@ readdisklabel(dev, strat, lp, osdep)
 	/* read master boot record */
 	bp->b_blkno = MBR_BBSECTOR;
 	bp->b_bcount = lp->d_secsize;
-	bp->b_flags = B_BUSY | B_READ;
+	bp->b_cflags = BC_BUSY;
+	bp->b_flags = B_READ;
 	bp->b_cylinder = MBR_BBSECTOR / lp->d_secpercyl;
 	(*strat)(bp);
 
@@ -246,7 +247,8 @@ nombrpart:
 	bp->b_blkno = dospartoff + LABELSECTOR;
 	bp->b_cylinder = cyl;
 	bp->b_bcount = lp->d_secsize;
-	bp->b_flags = B_BUSY | B_READ;
+	bp->b_cflags = BC_BUSY;
+	bp->b_flags = B_READ;
 	(*strat)(bp);
 
 	/* if successful, locate disk label within block and validate */
@@ -282,7 +284,8 @@ nombrpart:
 		i = 0;
 		do {
 			/* read a bad sector table */
-			bp->b_flags = B_BUSY | B_READ;
+			bp->b_cflags = BC_BUSY;
+			bp->b_flags = B_READ;
 			bp->b_blkno = lp->d_secperunit - lp->d_nsectors + i;
 			if (lp->d_secsize > DEV_BSIZE)
 				bp->b_blkno *= lp->d_secsize / DEV_BSIZE;
@@ -401,7 +404,8 @@ writedisklabel(dev, strat, lp, osdep)
 	/* read master boot record */
 	bp->b_blkno = MBR_BBSECTOR;
 	bp->b_bcount = lp->d_secsize;
-	bp->b_flags = B_BUSY | B_READ;
+	bp->b_cflags = BC_BUSY;
+	bp->b_flags = B_READ;
 	bp->b_cylinder = MBR_BBSECTOR / lp->d_secpercyl;
 	(*strat)(bp);
 
@@ -434,7 +438,8 @@ nombrpart:
 	bp->b_blkno = dospartoff + LABELSECTOR;
 	bp->b_cylinder = cyl;
 	bp->b_bcount = lp->d_secsize;
-	bp->b_flags = B_BUSY | B_READ;
+	bp->b_cflags = BC_BUSY;
+	bp->b_flags = B_READ;
 	(*strat)(bp);
 
 	/* if successful, locate disk label within block and validate */
@@ -447,7 +452,8 @@ nombrpart:
 		if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
 		    dkcksum(dlp) == 0) {
 			*dlp = *lp;
-			bp->b_flags = B_BUSY | B_WRITE;
+			bp->b_cflags = BC_BUSY;
+			bp->b_flags = B_WRITE;
 			(*strat)(bp);
 			error = biowait(bp);
 			goto done;
diff --git a/sys/arch/sbmips/sbmips/disksubr.c b/sys/arch/sbmips/sbmips/disksubr.c
index a18d1237595b..3e2355963e0e 100644
--- a/sys/arch/sbmips/sbmips/disksubr.c
+++ b/sys/arch/sbmips/sbmips/disksubr.c
@@ -1,4 +1,4 @@
-/* $NetBSD: disksubr.c,v 1.16 2007/10/17 19:57:02 garbled Exp $ */
+/* $NetBSD: disksubr.c,v 1.17 2008/01/02 11:48:28 ad Exp $ */
 
 /*
  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.16 2007/10/17 19:57:02 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.17 2008/01/02 11:48:28 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -146,7 +146,8 @@ readdisklabel(dev, strat, lp, osdep)
 	/* read master boot record */
 	bp->b_blkno = MBR_BBSECTOR;
 	bp->b_bcount = lp->d_secsize;
-	bp->b_flags = B_BUSY | B_READ;
+	bp->b_cflags = BC_BUSY;
+	bp->b_flags |= B_READ;
 	bp->b_cylinder = MBR_BBSECTOR / lp->d_secpercyl;
 	(*strat)(bp);
 
@@ -194,7 +195,8 @@ nombrpart:
 	bp->b_blkno = dospartoff + LABELSECTOR;
 	bp->b_cylinder = cyl;
 	bp->b_bcount = lp->d_secsize;
-	bp->b_flags = B_BUSY | B_READ;
+	bp->b_cflags = BC_BUSY;
+	bp->b_flags = B_READ;
 	(*strat)(bp);
 
 	/* if successful, locate disk label within block and validate */
@@ -230,7 +232,8 @@ nombrpart:
 		i = 0;
 		do {
 			/* read a bad sector table */
-			bp->b_flags = B_BUSY | B_READ;
+			bp->b_cflags = BC_BUSY;
+			bp->b_flags = B_READ;
 			bp->b_blkno = lp->d_secperunit - lp->d_nsectors + i;
 			if (lp->d_secsize > DEV_BSIZE)
 				bp->b_blkno *= lp->d_secsize / DEV_BSIZE;
@@ -349,7 +352,8 @@ writedisklabel(dev, strat, lp, osdep)
 	/* read master boot record */
 	bp->b_blkno = MBR_BBSECTOR;
 	bp->b_bcount = lp->d_secsize;
-	bp->b_flags = B_BUSY | B_READ;
+	bp->b_cflags = BC_BUSY;
+	bp->b_flags = B_READ;
 	bp->b_cylinder = MBR_BBSECTOR / lp->d_secpercyl;
 	(*strat)(bp);
 
@@ -373,7 +377,8 @@ nombrpart:
 	bp->b_blkno = dospartoff + LABELSECTOR;
 	bp->b_cylinder = cyl;
 	bp->b_bcount = lp->d_secsize;
-	bp->b_flags = B_BUSY | B_READ;
+	bp->b_cflags = BC_BUSY;
+	bp->b_flags = B_READ;
 	(*strat)(bp);
 
 	/* if successful, locate disk label within block and validate */
@@ -386,7 +391,8 @@ nombrpart:
 		if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
 		    dkcksum(dlp) == 0) {
 			*dlp = *lp;
-			bp->b_flags = B_BUSY | B_WRITE;
+			bp->b_cflags = BC_BUSY;
+			bp->b_flags = B_WRITE;
 			(*strat)(bp);
 			error = biowait(bp);
 			goto done;
diff --git a/sys/arch/sgimips/sgimips/disksubr.c b/sys/arch/sgimips/sgimips/disksubr.c
index ee78f7b9449c..9aab0a033a68 100644
--- a/sys/arch/sgimips/sgimips/disksubr.c
+++ b/sys/arch/sgimips/sgimips/disksubr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: disksubr.c,v 1.22 2007/12/24 15:06:38 ad Exp $	*/
+/*	$NetBSD: disksubr.c,v 1.23 2008/01/02 11:48:28 ad Exp $	*/
 
 /*
  * Copyright (c) 2001 Christopher Sekiya
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.22 2007/12/24 15:06:38 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.23 2008/01/02 11:48:28 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -197,7 +197,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp, str
 		goto ioerror;
 
 	/* Write sgimips label to first sector */
-	bp->b_flags &= ~(B_READ|B_DONE);
+	bp->b_oflags &= ~(BO_DONE);
+	bp->b_flags &= ~(B_READ);
 	bp->b_flags |= B_WRITE;
 	(*strat)(bp);
 	if ((error = biowait(bp)) != 0)
@@ -209,7 +210,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp, str
 	bp->b_blkno = LABELSECTOR;
 	bp->b_bcount = lp->d_secsize;
 	bp->b_cylinder = bp->b_blkno / lp->d_secpercyl;
-	bp->b_flags &= ~(B_READ | B_DONE);
+	bp->b_oflags &= ~(BO_DONE);
+	bp->b_flags &= ~(B_READ);
 	bp->b_flags |= B_WRITE;
 	(*strat)(bp);
 	error = biowait(bp);
diff --git a/sys/arch/sh3/sh3/disksubr.c b/sys/arch/sh3/sh3/disksubr.c
index 0703cca26910..7def17440a0c 100644
--- a/sys/arch/sh3/sh3/disksubr.c
+++ b/sys/arch/sh3/sh3/disksubr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: disksubr.c,v 1.26 2007/10/17 19:57:07 garbled Exp $	*/
+/*	$NetBSD: disksubr.c,v 1.27 2008/01/02 11:48:28 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.26 2007/10/17 19:57:07 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.27 2008/01/02 11:48:28 ad Exp $");
 
 #include "opt_mbr.h"
 
@@ -410,7 +410,7 @@ readdisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp,
 	bp->b_blkno = dospartoff + LABELSECTOR;
 	bp->b_cylinder = cyl;
 	bp->b_bcount = lp->d_secsize;
-	bp->b_flags &= ~(B_DONE);
+	bp->b_oflags &= ~(BO_DONE);
 	bp->b_flags |= B_READ;
 	(*strat)(bp);
 
@@ -485,7 +485,7 @@ readdisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp,
 		i = 0;
 		do {
 			/* read a bad sector table */
-			bp->b_flags &= ~(B_DONE);
+			bp->b_oflags &= ~(BO_DONE);
 			bp->b_flags |= B_READ;
 			bp->b_blkno = lp->d_secperunit - lp->d_nsectors + i;
 			if (lp->d_secsize > DEV_BSIZE)
@@ -633,7 +633,7 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp,
 	bp->b_blkno = dospartoff + LABELSECTOR;
 	bp->b_cylinder = cyl;
 	bp->b_bcount = lp->d_secsize;
-	bp->b_flags &= ~(B_DONE);
+	bp->b_oflags &= ~(BO_DONE);
 	bp->b_flags |= B_READ;
 	(*strat)(bp);
 
@@ -681,7 +681,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp,
 	goto done;
 
  found:
-	bp->b_flags &= ~(B_READ|B_DONE);
+	bp->b_oflags &= ~(BO_DONE);
+	bp->b_flags &= ~(B_READ);
 	bp->b_flags |= B_WRITE;
 	(*strat)(bp);
 	error = biowait(bp);
diff --git a/sys/arch/sparc/dev/fd.c b/sys/arch/sparc/dev/fd.c
index baec3e94b94e..672b93168013 100644
--- a/sys/arch/sparc/dev/fd.c
+++ b/sys/arch/sparc/dev/fd.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: fd.c,v 1.138 2007/11/27 21:56:06 ad Exp $	*/
+/*	$NetBSD: fd.c,v 1.139 2008/01/02 11:48:28 ad Exp $	*/
 
 /*-
  * Copyright (c) 2000 The NetBSD Foundation, Inc.
@@ -108,7 +108,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fd.c,v 1.138 2007/11/27 21:56:06 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fd.c,v 1.139 2008/01/02 11:48:28 ad Exp $");
 
 #include "opt_ddb.h"
 #include "opt_md.h"
@@ -2139,12 +2139,13 @@ fdformat(dev_t dev, struct ne7_fd_formb *finfo, struct proc *p)
 	struct buf *bp;
 
 	/* set up a buffer header for fdstrategy() */
-	bp = getiobuf_nowait();
+	bp = getiobuf(NULL, false);
 	if (bp == NULL)
 		return (ENOBUFS);
 
 	bp->b_vp = NULL;
-	bp->b_flags = B_BUSY | B_PHYS | B_FORMAT;
+	bp->b_cflags = BC_BUSY;
+	bp->b_flags = B_PHYS | B_FORMAT;
 	bp->b_proc = p;
 	bp->b_dev = dev;
 
@@ -2322,14 +2323,13 @@ fd_read_md_image(size_t	*sizep, void *	*addrp)
 		bp->b_error = 0;
 		bp->b_resid = 0;
 		bp->b_proc = NULL;
-		bp->b_flags = B_BUSY | B_PHYS | B_RAW | B_READ;
+		bp->b_cflags |= BC_BUSY;
+		bp->b_flags = B_PHYS | B_RAW | B_READ;
 		bp->b_blkno = btodb(offset);
 		bp->b_bcount = DEV_BSIZE;
 		bp->b_data = addr;
 		fdstrategy(bp);
-		while ((bp->b_flags & B_DONE) == 0) {
-			tsleep((void *)bp, PRIBIO + 1, "physio", 0);
-		}
+		biowait(bp);
 		if (bp->b_error)
 			panic("fd: mountroot: fdread error %d", bp->b_error);
 
diff --git a/sys/arch/sparc/include/mutex.h b/sys/arch/sparc/include/mutex.h
index df7592ff737e..df7a957edb03 100644
--- a/sys/arch/sparc/include/mutex.h
+++ b/sys/arch/sparc/include/mutex.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: mutex.h,v 1.6 2007/11/21 10:19:08 yamt Exp $	*/
+/*	$NetBSD: mutex.h,v 1.7 2008/01/02 11:48:28 ad Exp $	*/
 
 /*-
  * Copyright (c) 2002, 2007 The NetBSD Foundation, Inc.
@@ -89,6 +89,8 @@ struct kmutex {
 
 #else	/* __MUTEX_PRIVATE */
 
+#include <machine/lock.h>
+
 struct kmutex {
 	union {
 		/* Adaptive mutex */
diff --git a/sys/arch/sparc/include/pmap.h b/sys/arch/sparc/include/pmap.h
index b5c0a223810b..22c8ec4e75d0 100644
--- a/sys/arch/sparc/include/pmap.h
+++ b/sys/arch/sparc/include/pmap.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.h,v 1.81 2007/10/17 19:57:13 garbled Exp $ */
+/*	$NetBSD: pmap.h,v 1.82 2008/01/02 11:48:29 ad Exp $ */
 
 /*
  * Copyright (c) 1996
@@ -143,7 +143,6 @@ struct pmap {
 	union	ctxinfo *pm_ctx;	/* current context, if any */
 	int	pm_ctxnum;		/* current context's number */
 	u_int	pm_cpuset;		/* CPU's this pmap has context on */
-	struct simplelock pm_lock;	/* spinlock */
 	int	pm_refcount;		/* just what it says */
 
 	struct mmuhd	pm_reglist;	/* MMU regions on this pmap (4/4c) */
diff --git a/sys/arch/sparc/include/vmparam.h b/sys/arch/sparc/include/vmparam.h
index 0453aff2c7c8..3abbe8a2a930 100644
--- a/sys/arch/sparc/include/vmparam.h
+++ b/sys/arch/sparc/include/vmparam.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: vmparam.h,v 1.38 2006/02/07 16:55:31 chs Exp $ */
+/*	$NetBSD: vmparam.h,v 1.39 2008/01/02 11:48:29 ad Exp $ */
 
 /*
  * Copyright (c) 1992, 1993
@@ -142,7 +142,6 @@ struct vm_page_md {
 		vaddr_t	pv_va;			/* virtual address */
 		int	pv_flags;		/* flags (below) */
 	} pvlisthead;
-	struct		simplelock pv_slock;
 };
 #define VM_MDPAGE_PVHEAD(pg)	(&(pg)->mdpage.pvlisthead)
 
@@ -151,7 +150,6 @@ struct vm_page_md {
 	(pg)->mdpage.pvlisthead.pv_pmap = NULL;		\
 	(pg)->mdpage.pvlisthead.pv_va = 0;		\
 	(pg)->mdpage.pvlisthead.pv_flags = 0;		\
-	simple_lock_init(&(pg)->mdpage.pv_slock);	\
 } while(/*CONSTCOND*/0)
 
 #endif /* _SPARC_VMPARAM_H_ */
diff --git a/sys/arch/sparc/sparc/pmap.c b/sys/arch/sparc/sparc/pmap.c
index 0311c479bad9..78a59d785575 100644
--- a/sys/arch/sparc/sparc/pmap.c
+++ b/sys/arch/sparc/sparc/pmap.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.c,v 1.321 2007/11/16 23:46:20 martin Exp $ */
+/*	$NetBSD: pmap.c,v 1.322 2008/01/02 11:48:29 ad Exp $ */
 
 /*
  * Copyright (c) 1996
@@ -56,12 +56,10 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.321 2007/11/16 23:46:20 martin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.322 2008/01/02 11:48:29 ad Exp $");
 
 #include "opt_ddb.h"
 #include "opt_kgdb.h"
-#include "opt_lockdebug.h"
-#include "opt_multiprocessor.h"
 #include "opt_sparc_arch.h"
 
 #include <sys/param.h>
@@ -69,12 +67,12 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.321 2007/11/16 23:46:20 martin Exp $");
 #include <sys/device.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
-#include <sys/lock.h>
 #include <sys/pool.h>
 #include <sys/exec.h>
 #include <sys/core.h>
 #include <sys/kcore.h>
 #include <sys/kernel.h>
+#include <sys/atomic.h>
 
 #include <uvm/uvm.h>
 
@@ -176,58 +174,8 @@ paddr_t	vm_first_phys = (paddr_t)-1;
 paddr_t	vm_last_phys = 0;
 psize_t vm_num_phys;
 
-/*
- * Locking:
- *
- *	This pmap module uses two types of locks: `normal' (sleep)
- *	locks and `simple' (spin) locks.  They are used as follows:
- *
- *	READ/WRITE SPIN LOCKS
- *	---------------------
- *
- *	* pmap_main_lock - This lock is used to prevent deadlock and/or
- *	  provide mutex access to the pmap module.  Most operations lock
- *	  the pmap first, then PV lists as needed.  However, some operations,
- *	  such as pmap_page_protect(), lock the PV lists before locking
- *	  the pmaps.  To prevent deadlock, we require a mutex lock on the
- *	  pmap module if locking in the PV->pmap direction.  This is
- *	  implemented by acquiring a (shared) read lock on pmap_main_lock
- *	  if locking pmap->PV and a (exclusive) write lock if locking in
- *	  the PV->pmap direction.  Since only one thread can hold a write
- *	  lock at a time, this provides the mutex.
- *
- *	SIMPLE LOCKS
- *	------------
- *
- *	* pm_slock (per-pmap) - This lock protects all of the members
- *	  of the pmap structure itself. Note that in the case of the
- *	  kernel pmap, interrupts which cause memory allocation *must*
- *	  be blocked while this lock is asserted.
- *
- *	* pv_slock (per-vm_page) - This lock protects the PV list
- *	  for a specified managed page.
- *
- *	All internal functions which operate on a pmap are called
- *	with the pmap already locked by the caller (which will be
- *	an interface function).
- */
-/* struct lock pmap_main_lock; */
-
-#if 0 /* defined(MULTIPROCESSOR) || defined(LOCKDEBUG) */ 
-#define	PMAP_MAP_TO_HEAD_LOCK() \
-	spinlockmgr(&pmap_main_lock, LK_SHARED, NULL)
-#define	PMAP_MAP_TO_HEAD_UNLOCK() \
-	spinlockmgr(&pmap_main_lock, LK_RELEASE, NULL)
-#define	PMAP_HEAD_TO_MAP_LOCK() \
-	spinlockmgr(&pmap_main_lock, LK_EXCLUSIVE, NULL)
-#define	PMAP_HEAD_TO_MAP_UNLOCK() \
-	spinlockmgr(&pmap_main_lock, LK_RELEASE, NULL)
-#else
-#define	PMAP_MAP_TO_HEAD_LOCK()		/* nothing */
-#define	PMAP_MAP_TO_HEAD_UNLOCK()	/* nothing */
-#define	PMAP_HEAD_TO_MAP_LOCK()		/* nothing */
-#define	PMAP_HEAD_TO_MAP_UNLOCK()	/* nothing */
-#endif /* MULTIPROCESSOR || LOCKDEBUG */
+#define	PMAP_LOCK()	KERNEL_LOCK(1, NULL)
+#define	PMAP_UNLOCK()	KERNEL_UNLOCK_ONE(NULL)
 
 /*
  * Flags in pvlist.pv_flags.  Note that PV_MOD must be 1 and PV_REF must be 2
@@ -365,6 +313,8 @@ struct mmuq region_freelist, region_lru, region_locked;
 int	seginval;		/* [4/4c] the invalid segment number */
 int	reginval;		/* [4/3mmu] the invalid region number */
 
+static kmutex_t demap_lock;
+
 /*
  * (sun4/4c)
  * A context is simply a small number that dictates which set of 4096
@@ -387,7 +337,7 @@ union ctxinfo {
 	struct	pmap *c_pmap;		/* pmap (if busy) */
 };
 
-static struct simplelock ctx_lock;	/* lock for below */
+static kmutex_t	ctx_lock;		/* lock for below */
 union	ctxinfo *ctxinfo;		/* allocated at in pmap_bootstrap */
 union	ctxinfo *ctx_freelist;		/* context free list */
 int	ctx_kick;			/* allocation rover when none free */
@@ -823,8 +773,6 @@ VA2PA(void *addr)
  * PTE at the same time we are.  This is the procedure that is
  * recommended in the SuperSPARC user's manual.
  */
-static struct simplelock demap_lock = SIMPLELOCK_INITIALIZER;
-
 int
 updatepte4m(vaddr_t va, int *pte, int bic, int bis, int ctx, u_int cpuset)
 {
@@ -835,7 +783,7 @@ updatepte4m(vaddr_t va, int *pte, int bic, int bis, int ctx, u_int cpuset)
 	 * Can only be one of these happening in the system
 	 * at any one time.
 	 */
-	simple_lock(&demap_lock);
+	mutex_spin_enter(&demap_lock);
 
 	/*
 	 * The idea is to loop swapping zero into the pte, flushing
@@ -854,7 +802,7 @@ updatepte4m(vaddr_t va, int *pte, int bic, int bis, int ctx, u_int cpuset)
 	swapval = (oldval & ~bic) | bis;
 	swap(vpte, swapval);
 
-	simple_unlock(&demap_lock);
+	mutex_spin_exit(&demap_lock);
 
 	return (oldval);
 }
@@ -1732,7 +1680,6 @@ me_alloc(struct mmuq *mh, struct pmap *newpm, int newvreg, int newvseg)
 	} while (--i > 0);
 
 	/* update segment tables */
-	simple_lock(&pm->pm_lock);
 	if (CTX_USABLE(pm,rp)) {
 		va = VSTOVA(me->me_vreg,me->me_vseg);
 		if (pm != pmap_kernel() || HASSUN4_MMU3L)
@@ -1749,7 +1696,6 @@ me_alloc(struct mmuq *mh, struct pmap *newpm, int newvreg, int newvseg)
 
 	/* off old pmap chain */
 	TAILQ_REMOVE(&pm->pm_seglist, me, me_pmchain);
-	simple_unlock(&pm->pm_lock);
 	setcontext4(ctx);
 
 	/* onto new pmap chain; new pmap is already locked, if needed */
@@ -1897,14 +1843,12 @@ region_alloc(struct mmuq *mh, struct pmap *newpm, int newvr)
 	}
 
 	/* update region tables */
-	simple_lock(&pm->pm_lock); /* what if other CPU takes mmuentry ?? */
 	if (pm->pm_ctx)
 		setregmap(VRTOVA(me->me_vreg), reginval);
 	rp->rg_smeg = reginval;
 
 	/* off old pmap chain */
 	TAILQ_REMOVE(&pm->pm_reglist, me, me_pmchain);
-	simple_unlock(&pm->pm_lock);
 	setcontext4(ctx);	/* done with old context */
 
 	/* onto new pmap chain; new pmap is already locked, if needed */
@@ -2055,6 +1999,8 @@ mmu_pagein(struct pmap *pm, vaddr_t va, int prot)
 	struct regmap *rp;
 	struct segmap *sp;
 
+	PMAP_LOCK();
+
 	if (prot != VM_PROT_NONE)
 		bits = PG_V | ((prot & VM_PROT_WRITE) ? PG_W : 0);
 	else
@@ -2065,8 +2011,10 @@ mmu_pagein(struct pmap *pm, vaddr_t va, int prot)
 	rp = &pm->pm_regmap[vr];
 
 	/* return 0 if we have no PMEGs to load */
-	if (rp->rg_nsegmap == 0)
+	if (rp->rg_nsegmap == 0) {
+		PMAP_UNLOCK();
 		return (0);
+	}
 
 #ifdef DIAGNOSTIC
 	if (rp->rg_segmap == NULL)
@@ -2080,14 +2028,19 @@ mmu_pagein(struct pmap *pm, vaddr_t va, int prot)
 	sp = &rp->rg_segmap[vs];
 
 	/* return 0 if we have no PTEs to load */
-	if (sp->sg_npte == 0)
+	if (sp->sg_npte == 0) {
+		PMAP_UNLOCK();
 		return (0);
+	}
 
 	/* return -1 if the fault is `hard', 0 if not */
-	if (sp->sg_pmeg != seginval)
+	if (sp->sg_pmeg != seginval) {
+		PMAP_UNLOCK();
 		return (bits && (getpte4(va) & bits) == bits ? -1 : 0);
+	}
 
 	mmu_pagein_seg(pm, sp, va, vr, vs, &segm_lru);
+	PMAP_UNLOCK();
 	return (1);
 }
 #endif /* SUN4 or SUN4C */
@@ -2103,7 +2056,7 @@ void
 ctx_alloc(struct pmap *pm)
 {
 	union ctxinfo *c;
-	int s, cnum, i, doflush;
+	int cnum, i, doflush;
 	struct regmap *rp;
 	int gap_start, gap_end;
 	vaddr_t va;
@@ -2120,8 +2073,7 @@ ctx_alloc(struct pmap *pm)
 		gap_end = pm->pm_gap_end;
 	}
 
-	s = splvm();
-	simple_lock(&ctx_lock);
+	mutex_spin_enter(&ctx_lock);
 	if ((c = ctx_freelist) != NULL) {
 		ctx_freelist = c->c_nextfree;
 		cnum = c - ctxinfo;
@@ -2152,7 +2104,6 @@ ctx_alloc(struct pmap *pm)
 				gap_end = c->c_pmap->pm_gap_end;
 		}
 	}
-	simple_unlock(&ctx_lock);
 
 	c->c_pmap = pm;
 	pm->pm_ctx = c;
@@ -2181,7 +2132,6 @@ ctx_alloc(struct pmap *pm)
 		 */
 
 		setcontext4(cnum);
-		splx(s);
 		if (doflush)
 			cache_flush_context(cnum);
 
@@ -2255,7 +2205,6 @@ ctx_alloc(struct pmap *pm)
 		 * Note on multi-threaded processes: a context must remain
 		 * valid as long as any thread is still running on a CPU.
 		 */
-		simple_lock(&pm->pm_lock);
 #if defined(MULTIPROCESSOR)
 		for (i = 0; i < sparc_ncpus; i++)
 #else
@@ -2271,18 +2220,17 @@ ctx_alloc(struct pmap *pm)
 				 (pm->pm_reg_ptps_pa[i] >> SRMMU_PPNPASHIFT) |
 					SRMMU_TEPTD);
 		}
-		simple_unlock(&pm->pm_lock);
 
 		/* And finally switch to the new context */
 		(*cpuinfo.pure_vcache_flush)();
 		setcontext4m(cnum);
 #endif /* SUN4M || SUN4D */
-		splx(s);
 	}
+	mutex_spin_exit(&ctx_lock);
 }
 
 /*
- * Give away a context. Always called in the context of proc0 (reaper)
+ * Give away a context.
  */
 void
 ctx_free(struct pmap *pm)
@@ -2303,6 +2251,8 @@ ctx_free(struct pmap *pm)
 	}
 #endif /* SUN4 || SUN4C */
 
+	mutex_spin_enter(&ctx_lock);
+
 #if defined(SUN4M) || defined(SUN4D)
 	if (CPU_HAS_SRMMU) {
 		int i;
@@ -2325,10 +2275,9 @@ ctx_free(struct pmap *pm)
 	}
 #endif
 
-	simple_lock(&ctx_lock);
 	c->c_nextfree = ctx_freelist;
 	ctx_freelist = c;
-	simple_unlock(&ctx_lock);
+	mutex_spin_exit(&ctx_lock);
 }
 
 
@@ -2704,7 +2653,7 @@ pv_syncflags4m(struct vm_page *pg)
 	int tpte;
 
 	s = splvm();
-	PMAP_HEAD_TO_MAP_LOCK();
+	PMAP_LOCK();
 	pv = VM_MDPAGE_PVHEAD(pg);
 	if (pv->pv_pmap == NULL) {
 		/* Page not mapped; pv_flags is already up to date */
@@ -2712,11 +2661,9 @@ pv_syncflags4m(struct vm_page *pg)
 		goto out;
 	}
 
-	simple_lock(&pg->mdpage.pv_slock);
 	flags = pv->pv_flags;
 	for (; pv != NULL; pv = pv->pv_next) {
 		pm = pv->pv_pmap;
-		simple_lock(&pm->pm_lock);
 		va = pv->pv_va;
 		rp = &pm->pm_regmap[VA_VREG(va)];
 		sp = &rp->rg_segmap[VA_VSEG(va)];
@@ -2736,14 +2683,11 @@ pv_syncflags4m(struct vm_page *pg)
 					SRMMU_PG_M | SRMMU_PG_R,
 					0, pm->pm_ctxnum, PMAP_CPUSET(pm)));
 		}
-
-		simple_unlock(&pm->pm_lock);
 	}
 
 	VM_MDPAGE_PVHEAD(pg)->pv_flags = flags;
-	simple_unlock(&pg->mdpage.pv_slock);
 out:
-	PMAP_HEAD_TO_MAP_UNLOCK();
+	PMAP_UNLOCK();
 	splx(s);
 	return (flags);
 }
@@ -2758,7 +2702,6 @@ pv_unlink4m(struct vm_page *pg, struct pmap *pm, vaddr_t va)
 
 	pv0 = VM_MDPAGE_PVHEAD(pg);
 
-	simple_lock(&pg->mdpage.pv_slock);
 	npv = pv0->pv_next;
 	/*
 	 * First entry is special (sigh).
@@ -2785,7 +2728,7 @@ pv_unlink4m(struct vm_page *pg, struct pmap *pm, vaddr_t va)
 			 */
 			pv0->pv_pmap = NULL;
 			pv0->pv_flags &= ~(PV_NC|PV_ANC);
-			goto out;
+			return;
 		}
 	} else {
 		struct pvlist *prev;
@@ -2795,7 +2738,7 @@ pv_unlink4m(struct vm_page *pg, struct pmap *pm, vaddr_t va)
 			if (npv == NULL) {
 				panic("pv_unlink: pm %p is missing on pg %p",
 					pm, pg);
-				goto out;
+				return;
 			}
 			if (npv->pv_pmap == pm && npv->pv_va == va)
 				break;
@@ -2813,7 +2756,7 @@ pv_unlink4m(struct vm_page *pg, struct pmap *pm, vaddr_t va)
 		for (npv = pv0->pv_next; npv != NULL; npv = npv->pv_next)
 			if (BADALIAS(va, npv->pv_va) ||
 			    (npv->pv_flags & PV_NC) != 0)
-				goto out;
+				return;
 #ifdef DEBUG
 		if (pmapdebug & PDB_CACHESTUFF)
 			printf(
@@ -2824,9 +2767,6 @@ pv_unlink4m(struct vm_page *pg, struct pmap *pm, vaddr_t va)
 		pv0->pv_flags &= ~PV_ANC;
 		pv_changepte4m(pg, SRMMU_PG_C, 0);
 	}
-
-out:
-	simple_unlock(&pg->mdpage.pv_slock);
 }
 
 /*
@@ -2843,7 +2783,6 @@ pv_link4m(struct vm_page *pg, struct pmap *pm, vaddr_t va,
 	int error = 0;
 
 	pv0 = VM_MDPAGE_PVHEAD(pg);
-	simple_lock(&pg->mdpage.pv_slock);
 
 	if (pv0->pv_pmap == NULL) {
 		/* no pvlist entries yet */
@@ -2914,7 +2853,6 @@ link_npv:
 	pv0->pv_next = npv;
 
 out:
-	simple_unlock(&pg->mdpage.pv_slock);
 	return (error);
 }
 #endif
@@ -2932,8 +2870,7 @@ pv_uncache(struct vm_page *pg)
 	int s;
 
 	s = splvm();
-	PMAP_HEAD_TO_MAP_LOCK();
-	simple_lock(&pg->mdpage.pv_slock);
+	PMAP_LOCK();
 
 	for (pv = VM_MDPAGE_PVHEAD(pg); pv != NULL; pv = pv->pv_next)
 		pv->pv_flags |= PV_NC;
@@ -2946,8 +2883,7 @@ pv_uncache(struct vm_page *pg)
 	if (CPU_HAS_SUNMMU)
 		pv_changepte4_4c(pg, PG_NC, 0);
 #endif
-	simple_unlock(&pg->mdpage.pv_slock);
-	PMAP_HEAD_TO_MAP_UNLOCK();
+	PMAP_UNLOCK();
 	splx(s);
 }
 
@@ -3050,12 +2986,6 @@ pmap_bootstrap(int nctx, int nregion, int nsegment)
 	nptesg = (NBPSG >> pgshift);
 #endif
 
-	/*
-	 * Initialize the locks.
-	 */
-	/* spinlockinit(&pmap_main_lock, "pmaplk", 0); */
-	simple_lock_init(&kernel_pmap_store.pm_lock);
-
 	/*
 	 * Grab physical memory list.
 	 */
@@ -3279,12 +3209,14 @@ pmap_bootstrap4_4c(void *top, int nctx, int nregion, int nsegment)
 
 	p = i;			/* retract to first free phys */
 
+	mutex_init(&demap_lock, MUTEX_DEFAULT, IPL_VM);
+
 	/*
 	 * All contexts are free except the kernel's.
 	 *
 	 * XXX sun4c could use context 0 for users?
 	 */
-	simple_lock_init(&ctx_lock);
+	mutex_init(&ctx_lock, MUTEX_DEFAULT, IPL_SCHED);
 	ci->c_pmap = pmap_kernel();
 	ctx_freelist = ci + 1;
 	for (i = 1; i < ncontext; i++) {
@@ -3767,10 +3699,12 @@ pmap_bootstrap4m(void *top)
 
 	p = q;			/* retract to first free phys */
 
+	mutex_init(&demap_lock, MUTEX_DEFAULT, IPL_VM);
+
 	/*
 	 * Set up the ctxinfo structures (freelist of contexts)
 	 */
-	simple_lock_init(&ctx_lock);
+	mutex_init(&ctx_lock, MUTEX_DEFAULT, IPL_SCHED);
 	ci->c_pmap = pmap_kernel();
 	ctx_freelist = ci + 1;
 	for (i = 1; i < ncontext; i++) {
@@ -4238,7 +4172,6 @@ pmap_pmap_pool_ctor(void *arg, void *object, int flags)
 	qzero((void *)pm->pm_regmap, NUREG * sizeof(struct regmap));
 
 	/* pm->pm_ctx = NULL; // already done */
-	simple_lock_init(&pm->pm_lock);
 
 	if (CPU_HAS_SUNMMU) {
 		TAILQ_INIT(&pm->pm_seglist);
@@ -4376,16 +4309,12 @@ pmap_create(void)
 void
 pmap_destroy(struct pmap *pm)
 {
-	int count;
 
 #ifdef DEBUG
 	if (pmapdebug & PDB_DESTROY)
 		printf("pmap_destroy[%d](%p)\n", cpu_number(), pm);
 #endif
-	simple_lock(&pm->pm_lock);
-	count = --pm->pm_refcount;
-	simple_unlock(&pm->pm_lock);
-	if (count == 0) {
+	if (atomic_dec_uint_nv(&pm->pm_refcount) == 0) {
 #ifdef DEBUG
 		pmap_quiet_check(pm);
 #endif
@@ -4399,11 +4328,8 @@ pmap_destroy(struct pmap *pm)
 void
 pmap_reference(struct pmap *pm)
 {
-	int s = splvm();
-	simple_lock(&pm->pm_lock);
-	pm->pm_refcount++;
-	simple_unlock(&pm->pm_lock);
-	splx(s);
+
+	atomic_inc_uint(&pm->pm_refcount);
 }
 
 #if defined(SUN4) || defined(SUN4C)
@@ -4584,8 +4510,7 @@ pmap_remove(struct pmap *pm, vaddr_t va, vaddr_t endva)
 
 	ctx = getcontext();
 	s = splvm();		/* XXX conservative */
-	PMAP_MAP_TO_HEAD_LOCK();
-	simple_lock(&pm->pm_lock);
+	PMAP_LOCK();
 	for (; va < endva; va = nva) {
 		/* do one virtual segment at a time */
 		vr = VA_VREG(va);
@@ -4596,8 +4521,7 @@ pmap_remove(struct pmap *pm, vaddr_t va, vaddr_t endva)
 		if (pm->pm_regmap[vr].rg_nsegmap != 0)
 			(*rm)(pm, va, nva, vr, vs);
 	}
-	simple_unlock(&pm->pm_lock);
-	PMAP_MAP_TO_HEAD_UNLOCK();
+	PMAP_UNLOCK();
 	splx(s);
 	setcontext(ctx);
 }
@@ -5104,7 +5028,7 @@ pmap_protect4_4c(struct pmap *pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
 	write_user_windows();
 	ctx = getcontext4();
 	s = splvm();
-	simple_lock(&pm->pm_lock);
+	PMAP_LOCK();
 	for (va = sva; va < eva;) {
 		vr = VA_VREG(va);
 		vs = VA_VSEG(va);
@@ -5175,7 +5099,7 @@ pmap_protect4_4c(struct pmap *pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
 			}
 		}
 	}
-	simple_unlock(&pm->pm_lock);
+	PMAP_UNLOCK();
 	splx(s);
 	setcontext4(ctx);
 }
@@ -5292,8 +5216,7 @@ pmap_page_protect4m(struct vm_page *pg, vm_prot_t prot)
 			cpu_number(), VM_PAGE_TO_PHYS(pg), prot);
 #endif
 	s = splvm();
-	PMAP_HEAD_TO_MAP_LOCK();
-	simple_lock(&pg->mdpage.pv_slock);
+	PMAP_LOCK();
 
 	if (prot & VM_PROT_READ) {
 		pv_changepte4m(pg, 0, PPROT_WRITE);
@@ -5314,7 +5237,6 @@ pmap_page_protect4m(struct vm_page *pg, vm_prot_t prot)
 	flags = pv->pv_flags & ~(PV_NC|PV_ANC);
 	while (pv != NULL) {
 		pm = pv->pv_pmap;
-		simple_lock(&pm->pm_lock);
 		va = pv->pv_va;
 		vr = VA_VREG(va);
 		vs = VA_VSEG(va);
@@ -5361,7 +5283,6 @@ pmap_page_protect4m(struct vm_page *pg, vm_prot_t prot)
 		npv = pv->pv_next;
 		if (pv != VM_MDPAGE_PVHEAD(pg))
 			pool_put(&pv_pool, pv);
-		simple_unlock(&pm->pm_lock);
 		pv = npv;
 	}
 
@@ -5371,8 +5292,7 @@ pmap_page_protect4m(struct vm_page *pg, vm_prot_t prot)
 	VM_MDPAGE_PVHEAD(pg)->pv_flags = flags;
 
 out:
-	simple_unlock(&pg->mdpage.pv_slock);
-	PMAP_HEAD_TO_MAP_UNLOCK();
+	PMAP_UNLOCK();
 	splx(s);
 }
 
@@ -5406,8 +5326,7 @@ pmap_protect4m(struct pmap *pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
 
 	write_user_windows();
 	s = splvm();
-	PMAP_MAP_TO_HEAD_LOCK();
-	simple_lock(&pm->pm_lock);
+	PMAP_LOCK();
 
 	for (va = sva; va < eva;) {
 		vr = VA_VREG(va);
@@ -5456,8 +5375,7 @@ pmap_protect4m(struct pmap *pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
 			    PMAP_CPUSET(pm));
 		}
 	}
-	simple_unlock(&pm->pm_lock);
-	PMAP_MAP_TO_HEAD_UNLOCK();
+	PMAP_UNLOCK();
 	splx(s);
 }
 
@@ -6006,7 +5924,7 @@ pmap_kremove4_4c(vaddr_t va, vsize_t len)
 
 	s = splvm();
 	ctx = getcontext();
-	simple_lock(&pm->pm_lock);
+	PMAP_LOCK();
 	setcontext4(0);
 	for (; va < endva; va = nva) {
 		/* do one virtual segment at a time */
@@ -6086,7 +6004,7 @@ pmap_kremove4_4c(vaddr_t va, vsize_t len)
 				mmu_pmeg_unlock(sp->sg_pmeg);
 		}
 	}
-	simple_unlock(&pm->pm_lock);
+	PMAP_UNLOCK();
 	setcontext4(ctx);
 	splx(s);
 }
@@ -6215,8 +6133,7 @@ pmap_enk4m(struct pmap *pm, vaddr_t va, vm_prot_t prot, int flags,
 	sp = &rp->rg_segmap[vs];
 
 	s = splvm();		/* XXX way too conservative */
-	PMAP_MAP_TO_HEAD_LOCK();
-	simple_lock(&pm->pm_lock);
+	PMAP_LOCK();
 
 	if (rp->rg_seg_ptps == NULL) /* enter new region */
 		panic("pmap_enk4m: missing kernel region table for va 0x%lx",va);
@@ -6229,8 +6146,7 @@ pmap_enk4m(struct pmap *pm, vaddr_t va, vm_prot_t prot, int flags,
 		if ((tpte & SRMMU_PPNMASK) == (pteproto & SRMMU_PPNMASK)) {
 			/* just changing protection and/or wiring */
 			pmap_changeprot4m(pm, va, prot, flags);
-			simple_unlock(&pm->pm_lock);
-			PMAP_MAP_TO_HEAD_UNLOCK();
+			PMAP_UNLOCK();
 			splx(s);
 			return (0);
 		}
@@ -6276,8 +6192,7 @@ printf("pmap_enk4m: changing existing va=>pa entry: va 0x%lx, pteproto 0x%x, "
 	setpgt4m(&sp->sg_pte[VA_SUN4M_VPG(va)], pteproto);
 	pm->pm_stats.resident_count++;
 out:
-	simple_unlock(&pm->pm_lock);
-	PMAP_MAP_TO_HEAD_UNLOCK();
+	PMAP_UNLOCK();
 	splx(s);
 	return (error);
 }
@@ -6304,8 +6219,7 @@ pmap_enu4m(struct pmap *pm, vaddr_t va, vm_prot_t prot, int flags,
 	vs = VA_VSEG(va);
 	rp = &pm->pm_regmap[vr];
 	s = splvm();			/* XXX conservative */
-	PMAP_MAP_TO_HEAD_LOCK();
-	simple_lock(&pm->pm_lock);
+	PMAP_LOCK();
 
 	if (rp->rg_segmap == NULL) {
 		/* definitely a new mapping */
@@ -6409,8 +6323,7 @@ pmap_enu4m(struct pmap *pm, vaddr_t va, vm_prot_t prot, int flags,
 				/* just changing prot and/or wiring */
 				/* caller should call this directly: */
 				pmap_changeprot4m(pm, va, prot, flags);
-				simple_unlock(&pm->pm_lock);
-				PMAP_MAP_TO_HEAD_UNLOCK();
+				PMAP_UNLOCK();
 				splx(s);
 				return (0);
 			}
@@ -6479,8 +6392,7 @@ pmap_enu4m(struct pmap *pm, vaddr_t va, vm_prot_t prot, int flags,
 	}
 
 out:
-	simple_unlock(&pm->pm_lock);
-	PMAP_MAP_TO_HEAD_UNLOCK();
+	PMAP_UNLOCK();
 	splx(s);
 	return (error);
 }
@@ -6733,8 +6645,9 @@ pmap_extract4m(struct pmap *pm, vaddr_t va, paddr_t *pap)
 	 * requires interrupt protection.
 	 */
 	s = splvm();
-	if (pm != pmap_kernel())
-		simple_lock(&pm->pm_lock);
+	if (pm != pmap_kernel()) {
+		PMAP_LOCK();
+	}
 
 	rp = &pm->pm_regmap[vr];
 	if (rp->rg_segmap == NULL) {
@@ -6766,9 +6679,9 @@ pmap_extract4m(struct pmap *pm, vaddr_t va, paddr_t *pap)
 		 * the middle of the PTE update protocol. So, acquire the
 		 * demap lock and retry.
 		 */
-		simple_lock(&demap_lock);
+		mutex_spin_enter(&demap_lock);
 		pte = sp->sg_pte[VA_SUN4M_VPG(va)];
-		simple_unlock(&demap_lock);
+		mutex_spin_exit(&demap_lock);
 		if ((pte & SRMMU_TETYPE) != SRMMU_TEPTE)
 			goto out;
 	}
@@ -6783,8 +6696,9 @@ pmap_extract4m(struct pmap *pm, vaddr_t va, paddr_t *pap)
 
 	v = true;
 out:
-	if (pm != pmap_kernel())
-		simple_unlock(&pm->pm_lock);
+	if (pm != pmap_kernel()) {
+		PMAP_UNLOCK();
+	}
 	splx(s);
 	return (v);
 }
diff --git a/sys/arch/sparc/sparc/trap.c b/sys/arch/sparc/sparc/trap.c
index 36d56844385e..e06ae9b542b0 100644
--- a/sys/arch/sparc/sparc/trap.c
+++ b/sys/arch/sparc/sparc/trap.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: trap.c,v 1.172 2007/03/04 06:00:47 christos Exp $ */
+/*	$NetBSD: trap.c,v 1.173 2008/01/02 11:48:29 ad Exp $ */
 
 /*
  * Copyright (c) 1996
@@ -49,7 +49,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.172 2007/03/04 06:00:47 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.173 2008/01/02 11:48:29 ad Exp $");
 
 #include "opt_ddb.h"
 #include "opt_compat_svr4.h"
@@ -417,9 +417,7 @@ badtrap:
 #endif
 
 		if (fs == NULL) {
-			KERNEL_LOCK(1, l);
 			fs = malloc(sizeof *fs, M_SUBPROC, M_WAITOK);
-			KERNEL_UNLOCK_LAST(l);
 			*fs = initfpstate;
 			l->l_md.md_fpstate = fs;
 		}
@@ -495,12 +493,10 @@ badtrap:
 	}
 
 	case T_WINOF:
-		KERNEL_LOCK(1, l);
 		if (rwindow_save(l)) {
 			mutex_enter(&p->p_smutex);
 			sigexit(l, SIGILL);
 		}
-		KERNEL_UNLOCK_LAST(l);
 		break;
 
 #define read_rw(src, dst) \
@@ -515,7 +511,6 @@ badtrap:
 		 * nsaved to -1.  If we decide to deliver a signal on
 		 * our way out, we will clear nsaved.
 		 */
-		KERNEL_LOCK(1, l);
 		if (pcb->pcb_uw || pcb->pcb_nsaved)
 			panic("trap T_RWRET 1");
 #ifdef DEBUG
@@ -531,7 +526,6 @@ badtrap:
 		if (pcb->pcb_nsaved)
 			panic("trap T_RWRET 2");
 		pcb->pcb_nsaved = -1;		/* mark success */
-		KERNEL_UNLOCK_LAST(l);
 		break;
 
 	case T_WINUF:
@@ -544,7 +538,6 @@ badtrap:
 		 * in the pcb.  The restore's window may still be in
 		 * the CPU; we need to force it out to the stack.
 		 */
-		KERNEL_LOCK(1, l);
 #ifdef DEBUG
 		if (rwindow_debug)
 			printf("cpu%d:%s[%d]: rwindow: T_WINUF 0: pcb<-stack: 0x%x\n",
@@ -569,14 +562,11 @@ badtrap:
 		if (pcb->pcb_nsaved)
 			panic("trap T_WINUF");
 		pcb->pcb_nsaved = -1;		/* mark success */
-		KERNEL_UNLOCK_LAST(l);
 		break;
 
 	case T_ALIGN:
 		if ((p->p_md.md_flags & MDP_FIXALIGN) != 0) {
-			KERNEL_LOCK(1, l);
 			n = fixalign(l, tf);
-			KERNEL_UNLOCK_LAST(l);
 			if (n == 0) {
 				ADVANCE;
 				break;
@@ -598,7 +588,6 @@ badtrap:
 		 * will not match once fpu_cleanup does its job, so
 		 * we must not save again later.)
 		 */
-		KERNEL_LOCK(1, l);
 		if (l != cpuinfo.fplwp)
 			panic("fpe without being the FP user");
 		FPU_LOCK(s);
@@ -606,7 +595,6 @@ badtrap:
 		cpuinfo.fplwp = NULL;
 		l->l_md.md_fpu = NULL;
 		FPU_UNLOCK(s);
-		KERNEL_UNLOCK_LAST(l);
 		/* tf->tf_psr &= ~PSR_EF; */	/* share_fpu will do this */
 		if ((code = fpu_cleanup(l, l->l_md.md_fpstate)) != 0) {
 			sig = SIGFPE;
@@ -658,12 +646,10 @@ badtrap:
 	case T_FLUSHWIN:
 		write_user_windows();
 #ifdef probably_slower_since_this_is_usually_false
-		KERNEL_LOCK(1, l);
 		if (pcb->pcb_nsaved && rwindow_save(p)) {
 			mutex_enter(&p->p_smutex);
 			sigexit(l, SIGILL);
 		}
-		KERNEL_UNLOCK_LAST(l);
 #endif
 		ADVANCE;
 		break;
@@ -703,10 +689,8 @@ badtrap:
 		break;
 	}
 	if (sig != 0) {
-		KERNEL_LOCK(1, l);
 		ksi.ksi_signo = sig;
 		trapsignal(l, &ksi);
-		KERNEL_UNLOCK_LAST(l);
 	}
 	userret(l, pc, sticks);
 	share_fpu(l, tf);
@@ -806,9 +790,6 @@ mem_access_fault(unsigned type, int ser, u_int v, int pc, int psr,
 	LWP_CACHE_CREDS(l, p);
 	sticks = p->p_sticks;
 
-	if ((psr & PSR_PS) == 0)
-		KERNEL_LOCK(1, l);
-
 #ifdef FPU_DEBUG
 	if ((tf->tf_psr & PSR_EF) != 0) {
 		if (cpuinfo.fplwp != l)
@@ -974,7 +955,6 @@ kfault:
 	}
 out:
 	if ((psr & PSR_PS) == 0) {
-		KERNEL_UNLOCK_LAST(l);
 		userret(l, pc, sticks);
 		share_fpu(l, tf);
 	}
@@ -1054,11 +1034,6 @@ mem_access_fault4m(unsigned type, u_int sfsr, u_int sfva, struct trapframe *tf)
 		goto out_nounlock;
 	}
 
-	if ((psr & PSR_PS) == 0)
-		KERNEL_LOCK(1, l);
-	else
-		KERNEL_LOCK(1, NULL);
-
 	/*
 	 * Figure out what to pass the VM code. We cannot ignore the sfva
 	 * register on text faults, since this might be a trap on an
@@ -1188,7 +1163,6 @@ mem_access_fault4m(unsigned type, u_int sfsr, u_int sfva, struct trapframe *tf)
 		if (va >= KERNBASE) {
 			rv = uvm_fault(kernel_map, va, atype);
 			if (rv == 0) {
-				KERNEL_UNLOCK_ONE(NULL);
 				return;
 			}
 			goto kfault;
@@ -1232,7 +1206,6 @@ kfault:
 			tf->tf_pc = onfault;
 			tf->tf_npc = onfault + 4;
 			tf->tf_out[0] = (rv == EACCES) ? EFAULT : rv;
-			KERNEL_UNLOCK_ONE(NULL);
 			return;
 		}
 		KSI_INIT_TRAP(&ksi);
@@ -1255,13 +1228,10 @@ kfault:
 	}
 out:
 	if ((psr & PSR_PS) == 0) {
-		KERNEL_UNLOCK_LAST(l);
 out_nounlock:
 		userret(l, pc, sticks);
 		share_fpu(l, tf);
 	}
-	else
-		KERNEL_UNLOCK_ONE(NULL);
 }
 #endif /* SUN4M */
 
@@ -1283,7 +1253,6 @@ startlwp(void *arg)
 #endif
 	pool_put(&lwp_uc_pool, uc);
 
-	KERNEL_UNLOCK_LAST(l);
 	userret(l, l->l_md.md_tf->tf_pc, 0);
 }
 
diff --git a/sys/arch/sparc64/dev/fdc.c b/sys/arch/sparc64/dev/fdc.c
index 9700a86258ac..bec1b2b7addb 100644
--- a/sys/arch/sparc64/dev/fdc.c
+++ b/sys/arch/sparc64/dev/fdc.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: fdc.c,v 1.19 2007/11/28 20:41:35 jnemeth Exp $	*/
+/*	$NetBSD: fdc.c,v 1.20 2008/01/02 11:48:29 ad Exp $	*/
 
 /*-
  * Copyright (c) 2000 The NetBSD Foundation, Inc.
@@ -108,7 +108,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fdc.c,v 1.19 2007/11/28 20:41:35 jnemeth Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fdc.c,v 1.20 2008/01/02 11:48:29 ad Exp $");
 
 #include "opt_ddb.h"
 #include "opt_md.h"
@@ -2327,12 +2327,13 @@ fdformat(dev_t dev, struct ne7_fd_formb *finfo, struct proc *p)
 	struct buf *bp;
 
 	/* set up a buffer header for fdstrategy() */
-	bp = getiobuf_nowait();
+	bp = getiobuf(NULL, false);
 	if (bp == NULL)
 		return ENOBUFS;
 
 	bp->b_vp = NULL;
-	bp->b_flags = B_BUSY | B_PHYS | B_FORMAT;
+	bp->b_cflags = BC_BUSY;
+	bp->b_flags = B_PHYS | B_FORMAT;
 	bp->b_proc = p;
 	bp->b_dev = dev;
 
@@ -2515,14 +2516,13 @@ fd_read_md_image(size_t	*sizep, void **addrp)
 		bp->b_error = 0;
 		bp->b_resid = 0;
 		bp->b_proc = NULL;
-		bp->b_flags = B_BUSY | B_PHYS | B_RAW | B_READ;
+		bp->b_cflags = BC_BUSY;
+		bp->b_flags = B_PHYS | B_RAW | B_READ;
 		bp->b_blkno = btodb(offset);
 		bp->b_bcount = DEV_BSIZE;
 		bp->b_data = addr;
 		fdstrategy(bp);
-		while ((bp->b_flags & B_DONE) == 0) {
-			tsleep((void *)bp, PRIBIO + 1, "physio", 0);
-		}
+		biowait(bp);
 		if (bp->b_error)
 			panic("fd: mountroot: fdread error %d", bp->b_error);
 
diff --git a/sys/arch/sparc64/sparc64/pmap.c b/sys/arch/sparc64/sparc64/pmap.c
index 9f9d81a526ed..8e3920bfdfa5 100644
--- a/sys/arch/sparc64/sparc64/pmap.c
+++ b/sys/arch/sparc64/sparc64/pmap.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.c,v 1.202 2007/12/09 20:12:55 martin Exp $	*/
+/*	$NetBSD: pmap.c,v 1.203 2008/01/02 11:48:30 ad Exp $	*/
 /*
  *
  * Copyright (C) 1996-1999 Eduardo Horvath.
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.202 2007/12/09 20:12:55 martin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.203 2008/01/02 11:48:30 ad Exp $");
 
 #undef	NO_VCACHE /* Don't forget the locked TLB in dostart */
 #define	HWREF
@@ -45,6 +45,8 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.202 2007/12/09 20:12:55 martin Exp $");
 #include <sys/core.h>
 #include <sys/kcore.h>
 #include <sys/proc.h>
+#include <sys/atomic.h>
+#include <sys/cpu.h>
 
 #include <uvm/uvm.h>
 
@@ -53,7 +55,6 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.202 2007/12/09 20:12:55 martin Exp $");
 #include <machine/ctlreg.h>
 #include <machine/promlib.h>
 #include <machine/kcore.h>
-#include <machine/cpu.h>
 #include <machine/bootinfo.h>
 
 #include "cache.h"
@@ -122,8 +123,8 @@ extern int pseg_set(struct pmap *, vaddr_t, int64_t, paddr_t);
 #define PV_SETVA(pv,va) ((pv)->pv_va = (((va) & PV_VAMASK) | \
 					(((pv)->pv_va) & PV_MASK)))
 
-struct pool pmap_pmap_pool;
-struct pool pmap_pv_pool;
+struct pool_cache pmap_cache;
+struct pool_cache pmap_pv_cache;
 
 pv_entry_t	pmap_remove_pv(struct pmap *, vaddr_t, struct vm_page *);
 void	pmap_enter_pv(struct pmap *, vaddr_t, paddr_t, struct vm_page *,
@@ -292,6 +293,10 @@ int numctx;
 static int pmap_get_page(paddr_t *p);
 static void pmap_free_page(paddr_t pa);
 
+/*
+ * Global pmap lock.
+ */
+static kmutex_t pmap_lock;
 
 /*
  * Support for big page sizes.  This maps the page size to the
@@ -929,7 +934,7 @@ pmap_bootstrap(u_long kernelstart, u_long kernelend)
 	/*
 	 * Allocate and clear out pmap_kernel()->pm_segs[]
 	 */
-	simple_lock_init(&pmap_kernel()->pm_lock);
+	mutex_init(&pmap_lock, MUTEX_DEFAULT, IPL_NONE);
 	pmap_kernel()->pm_refs = 1;
 	pmap_kernel()->pm_ctx = 0;
 
@@ -1170,10 +1175,10 @@ pmap_init()
 	/*
 	 * initialize the pmap pools.
 	 */
-	pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl",
-	    &pool_allocator_nointr, IPL_NONE);
-	pool_init(&pmap_pv_pool, sizeof(struct pv_entry), 0, 0, 0, "pv_entry",
-	    &pool_allocator_nointr, IPL_NONE);
+	pool_cache_bootstrap(&pmap_cache, sizeof(struct pmap), 0, 0, 0,
+	    "pmappl", NULL, IPL_NONE, NULL, NULL, NULL);
+	pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0, 0,
+	    "pv_entry", NULL, IPL_NONE, NULL, NULL, NULL);
 
 	vm_first_phys = avail_start;
 	vm_num_phys = avail_end - avail_start;
@@ -1219,7 +1224,7 @@ pmap_growkernel(maxkvaddr)
 		       (void *)KERNEND, (void *)maxkvaddr);
 		return (kbreak);
 	}
-	simple_lock(&pm->pm_lock);
+	mutex_enter(&pmap_lock);
 	DPRINTF(PDB_GROW, ("pmap_growkernel(%lx...%lx)\n", kbreak, maxkvaddr));
 	/* Align with the start of a page table */
 	for (kbreak &= (-1 << PDSHIFT); kbreak < maxkvaddr;
@@ -1237,7 +1242,7 @@ pmap_growkernel(maxkvaddr)
 			ENTER_STAT(ptpneeded);
 		}
 	}
-	simple_unlock(&pm->pm_lock);
+	mutex_exit(&pmap_lock);
 	return (kbreak);
 }
 
@@ -1251,11 +1256,10 @@ pmap_create()
 
 	DPRINTF(PDB_CREATE, ("pmap_create()\n"));
 
-	pm = pool_get(&pmap_pmap_pool, PR_WAITOK);
+	pm = pool_cache_get(&pmap_cache, PR_WAITOK);
 	memset(pm, 0, sizeof *pm);
 	DPRINTF(PDB_CREATE, ("pmap_create(): created %p\n", pm));
 
-	simple_lock_init(&pm->pm_lock);
 	pm->pm_refs = 1;
 	TAILQ_INIT(&pm->pm_obj.memq);
 	if (pm != pmap_kernel()) {
@@ -1276,9 +1280,7 @@ pmap_reference(pm)
 	struct pmap *pm;
 {
 
-	simple_lock(&pm->pm_lock);
-	pm->pm_refs++;
-	simple_unlock(&pm->pm_lock);
+	atomic_inc_uint(&pm->pm_refs);
 }
 
 /*
@@ -1290,12 +1292,8 @@ pmap_destroy(pm)
 	struct pmap *pm;
 {
 	struct vm_page *pg, *nextpg;
-	int refs;
 
-	simple_lock(&pm->pm_lock);
-	refs = --pm->pm_refs;
-	simple_unlock(&pm->pm_lock);
-	if (refs > 0) {
+	if (atomic_dec_uint_nv(&pm->pm_refs) > 0) {
 		return;
 	}
 	DPRINTF(PDB_DESTROY, ("pmap_destroy: freeing pmap %p\n", pm));
@@ -1309,7 +1307,7 @@ pmap_destroy(pm)
 		uvm_pagefree(pg);
 	}
 	pmap_free_page((paddr_t)(u_long)pm->pm_segs);
-	pool_put(&pmap_pmap_pool, pm);
+	pool_cache_put(&pmap_cache, pm);
 }
 
 /*
@@ -1357,7 +1355,7 @@ pmap_collect(pm)
 	if (pm == pmap_kernel())
 		return;
 
-	simple_lock(&pm->pm_lock);
+	mutex_enter(&pmap_lock);
 	for (i = 0; i < STSZ; i++) {
 		pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i],
 					       ASI_PHYS_CACHED);
@@ -1396,7 +1394,7 @@ pmap_collect(pm)
 			pmap_free_page(pa);
 		}
 	}
-	simple_unlock(&pm->pm_lock);
+	mutex_exit(&pmap_lock);
 }
 
 /*
@@ -1627,7 +1625,7 @@ pmap_enter(pm, va, pa, prot, flags)
 	 * entering the same PA again.  if it's different remove it.
 	 */
 
-	simple_lock(&pm->pm_lock);
+	mutex_enter(&pmap_lock);
 	data = pseg_get(pm, va);
 	if (data & TLB_V) {
 		wasmapped = TRUE;
@@ -1665,17 +1663,17 @@ pmap_enter(pm, va, pa, prot, flags)
 		 */
 		if (pvh->pv_pmap == NULL || (wasmapped && opa == pa)) {
 			if (npv != NULL) {
-				pool_put(&pmap_pv_pool, npv);
+				pool_cache_put(&pmap_pv_cache, npv);	/* XXXAD defer */
 				npv = NULL;
 			}
 			if (wasmapped && opa == pa) {
 				dopv = FALSE;
 			}
 		} else if (npv == NULL) {
-			npv = pool_get(&pmap_pv_pool, PR_NOWAIT);
+			npv = pool_cache_get(&pmap_pv_cache, PR_NOWAIT);	/* XXXAD defer */
 			if (npv == NULL) {
 				if (flags & PMAP_CANFAIL) {
-					simple_unlock(&pm->pm_lock);
+					mutex_exit(&pmap_lock);
 					return (ENOMEM);
 				}
 				panic("pmap_enter: no pv entries available");
@@ -1686,7 +1684,7 @@ pmap_enter(pm, va, pa, prot, flags)
 		ENTER_STAT(unmanaged);
 		dopv = FALSE;
 		if (npv != NULL) {
-			pool_put(&pmap_pv_pool, npv);
+			pool_cache_put(&pmap_pv_cache, npv);	/* XXXAD defer */
 			npv = NULL;
 		}
 	}
@@ -1709,7 +1707,7 @@ pmap_enter(pm, va, pa, prot, flags)
 #else
 	/* If it needs ref accounting do nothing. */
 	if (!(flags & VM_PROT_READ)) {
-		simple_unlock(&pm->pm_lock);
+		mutex_exit(&pmap_lock);
 		return 0;
 	}
 #endif
@@ -1754,9 +1752,9 @@ pmap_enter(pm, va, pa, prot, flags)
 		ptp = 0;
 		if (!pmap_get_page(&ptp)) {
 			if (flags & PMAP_CANFAIL) {
-				simple_unlock(&pm->pm_lock);
+				mutex_exit(&pmap_lock);
 				if (npv != NULL) {
-					pool_put(&pmap_pv_pool, npv);
+					pool_cache_put(&pmap_pv_cache, npv);	/* XXXAD defer */
 				}
 				return (ENOMEM);
 			} else {
@@ -1776,7 +1774,7 @@ pmap_enter(pm, va, pa, prot, flags)
 		pmap_enter_pv(pm, va, pa, pg, npv);
 	}
 
-	simple_unlock(&pm->pm_lock);
+	mutex_exit(&pmap_lock);
 #ifdef DEBUG
 	i = ptelookup_va(va);
 	if (pmapdebug & PDB_ENTER)
@@ -1876,7 +1874,7 @@ pmap_remove(pm, va, endva)
 	KASSERT(pm != pmap_kernel() || endva < INTSTACK || va > EINTSTACK);
 	KASSERT(pm != pmap_kernel() || endva < kdata || va > ekdata);
 
-	simple_lock(&pm->pm_lock);
+	mutex_enter(&pmap_lock);
 	DPRINTF(PDB_REMOVE, ("pmap_remove(pm=%p, va=%p, endva=%p):", pm,
 			     (void *)(u_long)va, (void *)(u_long)endva));
 	REMOVE_STAT(calls);
@@ -1905,7 +1903,7 @@ pmap_remove(pm, va, endva)
 		if (pg) {
 			pv = pmap_remove_pv(pm, va, pg);
 			if (pv != NULL) {
-				pool_put(&pmap_pv_pool, pv);
+				pool_cache_put(&pmap_pv_cache, pv);	/* XXXAD defer */
 			}
 		}
 
@@ -1942,13 +1940,13 @@ pmap_remove(pm, va, endva)
 		REMOVE_STAT(tflushes);
 		tlb_flush_pte(va, pm->pm_ctx);
 	}
-	simple_unlock(&pm->pm_lock);
 	if (flush && pm->pm_refs) {
 		REMOVE_STAT(flushes);
 		blast_dcache();
 	}
 	DPRINTF(PDB_REMOVE, ("\n"));
 	pv_check();
+	mutex_exit(&pmap_lock);
 }
 
 /*
@@ -1974,7 +1972,7 @@ pmap_protect(pm, sva, eva, prot)
 		return;
 	}
 
-	simple_lock(&pm->pm_lock);
+	mutex_enter(&pmap_lock);
 	sva = sva & ~PGOFSET;
 	for (; sva < eva; sva += PAGE_SIZE) {
 #ifdef DEBUG
@@ -2030,8 +2028,8 @@ pmap_protect(pm, sva, eva, prot)
 		tsb_invalidate(pm->pm_ctx, sva);
 		tlb_flush_pte(sva, pm->pm_ctx);
 	}
-	simple_unlock(&pm->pm_lock);
 	pv_check();
+	mutex_exit(&pmap_lock);
 }
 
 /*
@@ -2066,7 +2064,7 @@ pmap_extract(pm, va, pap)
 		return TRUE;
 	} else {
 		if (pm != pmap_kernel()) {
-			simple_lock(&pm->pm_lock);
+			mutex_enter(&pmap_lock);
 		}
 		data = pseg_get(pm, va);
 		pa = data & TLB_PA_MASK;
@@ -2100,7 +2098,7 @@ pmap_extract(pm, va, pap)
 		}
 #endif
 		if (pm != pmap_kernel()) {
-			simple_unlock(&pm->pm_lock);
+			mutex_exit(&pmap_lock);
 		}
 	}
 	if ((data & TLB_V) == 0)
@@ -2123,7 +2121,7 @@ pmap_kprotect(va, prot)
 	int64_t data;
 	int rv;
 
-	simple_lock(&pm->pm_lock);
+	mutex_enter(&pmap_lock);
 	data = pseg_get(pm, va);
 	KASSERT(data & TLB_V);
 	if (prot & VM_PROT_WRITE) {
@@ -2136,7 +2134,7 @@ pmap_kprotect(va, prot)
 		panic("pmap_kprotect: pseg_set needs spare! rv=%d", rv);
 	tsb_invalidate(pm->pm_ctx, va);
 	tlb_flush_pte(va, pm->pm_ctx);
-	simple_unlock(&pm->pm_lock);
+	mutex_exit(&pmap_lock);
 }
 
 /*
@@ -2355,6 +2353,7 @@ pmap_clear_modify(pg)
 #if defined(DEBUG)
 	modified = pmap_is_modified(pg);
 #endif
+	mutex_enter(&pmap_lock);
 	/* Clear all mappings */
 	pv = &pg->mdpage.mdpg_pvh;
 #ifdef DEBUG
@@ -2376,7 +2375,6 @@ pmap_clear_modify(pg)
 			struct pmap *pmap = pv->pv_pmap;
 			vaddr_t va = pv->pv_va & PV_VAMASK;
 
-			simple_lock(&pmap->pm_lock);
 			/* First clear the mod bit in the PTE and make it R/O */
 			data = pseg_get(pmap, va);
 			KASSERT(data & TLB_V);
@@ -2400,10 +2398,10 @@ pmap_clear_modify(pg)
 			if (pv->pv_va & PV_MOD)
 				changed |= 1;
 			pv->pv_va &= ~(PV_MOD);
-			simple_unlock(&pmap->pm_lock);
 		}
 	}
 	pv_check();
+	mutex_exit(&pmap_lock);
 #ifdef DEBUG
 	if (pmap_is_modified(pg)) {
 		printf("pmap_clear_modify(): %p still modified!\n", pg);
@@ -2432,6 +2430,7 @@ pmap_clear_reference(pg)
 	int referenced = 0;
 #endif
 
+	mutex_enter(&pmap_lock);
 #ifdef DEBUG
 	DPRINTF(PDB_CHANGEPROT|PDB_REF, ("pmap_clear_reference(%p)\n", pg));
 	referenced = pmap_is_referenced(pg);
@@ -2453,7 +2452,6 @@ pmap_clear_reference(pg)
 			struct pmap *pmap = pv->pv_pmap;
 			vaddr_t va = pv->pv_va & PV_VAMASK;
 
-			simple_lock(&pmap->pm_lock);
 			data = pseg_get(pmap, va);
 			KASSERT(data & TLB_V);
 			DPRINTF(PDB_CHANGEPROT,
@@ -2480,7 +2478,6 @@ pmap_clear_reference(pg)
 			if (pv->pv_va & PV_REF)
 				changed |= 1;
 			pv->pv_va &= ~(PV_REF);
-			simple_unlock(&pmap->pm_lock);
 		}
 	}
 	dcache_flush_page(pa);
@@ -2497,8 +2494,12 @@ pmap_clear_reference(pg)
 		printf("pmap_clear_reference: referenced %d changed %d\n",
 		       referenced, changed);
 		Debugger();
-	} else return (referenced);
+	} else {
+		mutex_exit(&pmap_lock);
+		return (referenced);
+	}
 #endif
+	mutex_exit(&pmap_lock);
 	return (changed);
 }
 
@@ -2622,15 +2623,15 @@ pmap_unwire(pmap, va)
 		return;
 	}
 #endif
-	simple_lock(&pmap->pm_lock);
+	mutex_enter(&pmap_lock);
 	data = pseg_get(pmap, va & PV_VAMASK);
 	KASSERT(data & TLB_V);
 	data &= ~TLB_TSB_LOCK;
 	rv = pseg_set(pmap, va & PV_VAMASK, data, 0);
 	if (rv & 1)
 		panic("pmap_unwire: pseg_set needs spare! rv=%d\n", rv);
-	simple_unlock(&pmap->pm_lock);
 	pv_check();
+	mutex_exit(&pmap_lock);
 }
 
 /*
@@ -2648,7 +2649,7 @@ pmap_page_protect(pg, prot)
 	int64_t data = 0;
 	int rv;
 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
-	pv_entry_t pv, npv, firstpv;
+	pv_entry_t pv, npv, firstpv, freepv = NULL;
 	struct pmap *pmap;
 	vaddr_t va;
 	bool needflush = FALSE;
@@ -2656,6 +2657,7 @@ pmap_page_protect(pg, prot)
 	DPRINTF(PDB_CHANGEPROT,
 	    ("pmap_page_protect: pg %p prot %x\n", pg, prot));
 
+	mutex_enter(&pmap_lock);
 	pv = &pg->mdpage.mdpg_pvh;
 	if (prot & (VM_PROT_READ|VM_PROT_EXECUTE)) {
 		/* copy_on_write */
@@ -2680,7 +2682,6 @@ pmap_page_protect(pg, prot)
 				pmap = pv->pv_pmap;
 				va = pv->pv_va & PV_VAMASK;
 
-				simple_lock(&pmap->pm_lock);
 				DPRINTF(PDB_CHANGEPROT | PDB_REF,
 					("pmap_page_protect: "
 					 "RO va %p of pg %p...\n",
@@ -2705,7 +2706,6 @@ pmap_page_protect(pg, prot)
 					tsb_invalidate(pmap->pm_ctx, va);
 					tlb_flush_pte(va, pmap->pm_ctx);
 				}
-				simple_unlock(&pmap->pm_lock);
 			}
 		}
 	} else {
@@ -2721,7 +2721,6 @@ pmap_page_protect(pg, prot)
 			va = npv->pv_va & PV_VAMASK;
 
 			/* We're removing npv from pv->pv_next */
-			simple_lock(&pmap->pm_lock);
 			DPRINTF(PDB_CHANGEPROT|PDB_REF|PDB_REMOVE,
 				("pmap_page_protect: "
 				 "demap va %p of pg %p in pmap %p...\n",
@@ -2748,11 +2747,11 @@ pmap_page_protect(pg, prot)
 			if (pmap->pm_refs > 0) {
 				needflush = TRUE;
 			}
-			simple_unlock(&pmap->pm_lock);
 
 			/* free the pv */
 			pv->pv_next = npv->pv_next;
-			pool_put(&pmap_pv_pool, npv);
+			npv->pv_next = freepv;
+			freepv = npv;
 		}
 
 		pv = firstpv;
@@ -2768,7 +2767,6 @@ pmap_page_protect(pg, prot)
 			pmap = pv->pv_pmap;
 			va = pv->pv_va & PV_VAMASK;
 
-			simple_lock(&pmap->pm_lock);
 			DPRINTF(PDB_CHANGEPROT|PDB_REF|PDB_REMOVE,
 				("pmap_page_protect: "
 				 "demap va %p of pg %p from pm %p...\n",
@@ -2793,7 +2791,6 @@ pmap_page_protect(pg, prot)
 			if (pmap->pm_refs > 0) {
 				needflush = TRUE;
 			}
-			simple_unlock(&pmap->pm_lock);
 			npv = pv->pv_next;
 			/* dump the first pv */
 			if (npv) {
@@ -2801,7 +2798,8 @@ pmap_page_protect(pg, prot)
 				pv->pv_pmap = npv->pv_pmap;
 				pv->pv_va |= npv->pv_va & PV_MASK;
 				pv->pv_next = npv->pv_next;
-				pool_put(&pmap_pv_pool, npv);
+				npv->pv_next = freepv;
+				freepv = npv;
 			} else {
 				pv->pv_pmap = NULL;
 				pv->pv_next = NULL;
@@ -2813,6 +2811,13 @@ pmap_page_protect(pg, prot)
 	}
 	/* We should really only flush the pages we demapped. */
 	pv_check();
+	mutex_exit(&pmap_lock);
+
+	/* Catch up on deferred frees. */
+	for (; freepv != NULL; freepv = npv) {
+		npv = freepv->pv_next;
+		pool_cache_put(&pmap_pv_cache, freepv);
+	}
 }
 
 #ifdef PMAP_COUNT_DEBUG
@@ -2828,7 +2833,7 @@ pmap_count_res(struct pmap *pm)
 
 	/* Almost the same as pmap_collect() */
 	/* Don't want one of these pages reused while we're reading it. */
-	simple_lock(&pm->pm_lock);
+	mutex_enter(&pmap_lock);
 	n = 0;
 	for (i = 0; i < STSZ; i++) {
 		pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i],
@@ -2850,7 +2855,7 @@ pmap_count_res(struct pmap *pm)
 			}
 		}
 	}
-	simple_unlock(&pm->pm_lock);
+	mutex_exit(&pmap_lock);
 
 	if (pm->pm_stats.resident_count != n)
 		printf("pmap_count_resident: pm_stats = %ld, counted: %d\n",
@@ -2871,7 +2876,7 @@ pmap_count_wired(struct pmap *pm)
 
 	/* Almost the same as pmap_collect() */
 	/* Don't want one of these pages reused while we're reading it. */
-	simple_lock(&pm->pm_lock);
+	mutex_enter(&pmap_lock);
 	n = 0;
 	for (i = 0; i < STSZ; i++) {
 		pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i],
@@ -2893,7 +2898,7 @@ pmap_count_wired(struct pmap *pm)
 			}
 		}
 	}
-	simple_unlock(&pm->pm_lock);
+	mutex_exit(&pmap_lock);
 
 	if (pm->pm_stats.wired_count != n)
 		printf("pmap_count_wired: pm_stats = %ld, counted: %d\n",
@@ -2921,7 +2926,7 @@ ctx_alloc(struct pmap *pm)
 
 	KASSERT(pm != pmap_kernel());
 	KASSERT(pm == curproc->p_vmspace->vm_map.pmap);
-	simple_lock(&pm->pm_lock);
+	mutex_enter(&pmap_lock);	/* XXXAD ctxswitch */
 	ctx = pmap_next_ctx++;
 
 	/*
@@ -2949,7 +2954,7 @@ ctx_alloc(struct pmap *pm)
 	ctxbusy[ctx] = pm->pm_physaddr;
 	LIST_INSERT_HEAD(&pmap_ctxlist, pm, pm_list);
 	pm->pm_ctx = ctx;
-	simple_unlock(&pm->pm_lock);
+	mutex_exit(&pmap_lock);
 	DPRINTF(PDB_CTX_ALLOC, ("ctx_alloc: allocated ctx %d\n", ctx));
 	return ctx;
 }
@@ -3000,6 +3005,8 @@ pmap_enter_pv(struct pmap *pmap, vaddr_t va, paddr_t pa, struct vm_page *pg,
 {
 	pv_entry_t pvh;
 
+	KASSERT(mutex_owned(&pmap_lock));
+
 	pvh = &pg->mdpage.mdpg_pvh;
 	DPRINTF(PDB_ENTER, ("pmap_enter: pvh %p: was %lx/%p/%p\n",
 	    pvh, pvh->pv_va, pvh->pv_pmap, pvh->pv_next));
@@ -3060,6 +3067,8 @@ pmap_remove_pv(struct pmap *pmap, vaddr_t va, struct vm_page *pg)
 	pv_entry_t pvh, npv, pv;
 	int64_t data = 0;
 
+	KASSERT(mutex_owned(&pmap_lock));
+
 	pvh = &pg->mdpage.mdpg_pvh;
 
 	DPRINTF(PDB_REMOVE, ("pmap_remove_pv(pm=%p, va=%p, pg=%p)\n", pmap,
@@ -3135,14 +3144,14 @@ pmap_page_cache(struct pmap *pm, paddr_t pa, int mode)
 	vaddr_t va;
 	int rv;
 
+	KASSERT(mutex_owned(&pmap_lock));
+
 	DPRINTF(PDB_ENTER, ("pmap_page_uncache(%llx)\n",
 	    (unsigned long long)pa));
 	pg = PHYS_TO_VM_PAGE(pa);
 	pv = &pg->mdpage.mdpg_pvh;
 	while (pv) {
 		va = pv->pv_va & PV_VAMASK;
-		if (pv->pv_pmap != pm)
-			simple_lock(&pv->pv_pmap->pm_lock);
 		if (pv->pv_va & PV_NC) {
 			int64_t data;
 
@@ -3174,8 +3183,6 @@ pmap_page_cache(struct pmap *pm, paddr_t pa, int mode)
 				panic("pmap_page_cache: pseg_set needs"
 				    " spare! rv=%d\n", rv);
 		}
-		if (pv->pv_pmap != pm)
-			simple_unlock(&pv->pv_pmap->pm_lock);
 		if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
 			/* Force reload -- cache bits have changed */
 			tsb_invalidate(pv->pv_pmap->pm_ctx, va);
diff --git a/sys/arch/sun3/dev/fd.c b/sys/arch/sun3/dev/fd.c
index 4843c883dcdd..76e0dd1d4420 100644
--- a/sys/arch/sun3/dev/fd.c
+++ b/sys/arch/sun3/dev/fd.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: fd.c,v 1.60 2007/12/04 15:12:07 tsutsui Exp $	*/
+/*	$NetBSD: fd.c,v 1.61 2008/01/02 11:48:30 ad Exp $	*/
 
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
@@ -72,7 +72,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fd.c,v 1.60 2007/12/04 15:12:07 tsutsui Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fd.c,v 1.61 2008/01/02 11:48:30 ad Exp $");
 
 #include "opt_ddb.h"
 
@@ -1765,18 +1765,19 @@ fdioctl(dev_t dev, u_long cmd, void *addr, int flag, struct lwp *l)
 int 
 fdformat(dev_t dev, struct ne7_fd_formb *finfo, struct proc *p)
 {
-	int rv = 0, s;
+	int rv = 0;
 	struct fd_softc *fd = fd_cd.cd_devs[FDUNIT(dev)];
 	struct fd_type *type = fd->sc_type;
 	struct buf *bp;
 
 	/* set up a buffer header for fdstrategy() */
-	bp = (struct buf *)malloc(sizeof(struct buf), M_TEMP, M_NOWAIT);
+	bp = getiobuf(NULL, false);
 	if (bp == 0)
 		return (ENOBUFS);
 
 	memset((void *)bp, 0, sizeof(struct buf));
-	bp->b_flags = B_BUSY | B_PHYS | B_FORMAT;
+	bp->b_flags = B_PHYS | B_FORMAT;
+	bp->b_cflags = BC_BUSY;
 	bp->b_proc = p;
 	bp->b_dev = dev;
 
@@ -1800,13 +1801,14 @@ fdformat(dev_t dev, struct ne7_fd_formb *finfo, struct proc *p)
 	fdstrategy(bp);
 
 	/* ...and wait for it to complete */
-	s = splbio();
-	while (!(bp->b_flags & B_DONE)) {
-		rv = tsleep((void *)bp, PRIBIO, "fdform", 20 * hz);
+	/* XXX dodgy */
+	mutex_enter(bp->b_objlock);
+	while (!(bp->b_oflags & BO_DONE)) {
+		rv = cv_timedwait(&bp->b_done, 20 * hz);
 		if (rv == EWOULDBLOCK)
 			break;
 	}
-	splx(s);
+	mutex_exit(bp->b_objlock);
 
 	if (rv == EWOULDBLOCK) {
 		/* timed out */
@@ -1814,7 +1816,7 @@ fdformat(dev_t dev, struct ne7_fd_formb *finfo, struct proc *p)
 		biodone(bp);
 	} else if (bp->b_error != 0)
 		rv = bp->b_error;
-	free(bp, M_TEMP);
+	putiobuf(bp);
 	return (rv);
 }
 
@@ -1943,14 +1945,13 @@ fd_read_md_image(size_t *sizep, void **addrp)
 		bp->b_error = 0;
 		bp->b_resid = 0;
 		bp->b_proc = NULL;
-		bp->b_flags = B_BUSY | B_PHYS | B_RAW | B_READ;
+		bp->b_flags = B_PHYS | B_RAW | B_READ;
+		bp->b_cflags = BC_BUSY;
 		bp->b_blkno = btodb(offset);
 		bp->b_bcount = DEV_BSIZE;
 		bp->b_data = addr;
 		fdstrategy(bp);
-		while ((bp->b_flags & B_DONE) == 0) {
-			tsleep((void *)bp, PRIBIO + 1, "physio", 0);
-		}
+		biowait(bp);
 		if (bp->b_error)
 			panic("fd: mountroot: fdread error %d", bp->b_error);
 
diff --git a/sys/arch/sun3/dev/xd.c b/sys/arch/sun3/dev/xd.c
index b1fc811505de..3fee56a37b88 100644
--- a/sys/arch/sun3/dev/xd.c
+++ b/sys/arch/sun3/dev/xd.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: xd.c,v 1.60 2007/10/17 19:57:45 garbled Exp $	*/
+/*	$NetBSD: xd.c,v 1.61 2008/01/02 11:48:30 ad Exp $	*/
 
 /*
  *
@@ -52,7 +52,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xd.c,v 1.60 2007/10/17 19:57:45 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xd.c,v 1.61 2008/01/02 11:48:30 ad Exp $");
 
 #undef XDC_DEBUG		/* full debug */
 #define XDC_DIAG		/* extra sanity checks */
@@ -309,8 +309,8 @@ xddummystrat(struct buf *bp)
 	if (bp->b_bcount != XDFM_BPS)
 		panic("xddummystrat");
 	memcpy(bp->b_data, xd_labeldata, XDFM_BPS);
-	bp->b_flags |= B_DONE;
-	bp->b_flags &= ~B_BUSY;
+	bp->b_oflags |= BO_DONE;
+	bp->b_cflags &= ~BC_BUSY;
 }
 
 int 
diff --git a/sys/arch/sun3/dev/xy.c b/sys/arch/sun3/dev/xy.c
index df6e1b4c6410..32632712a4e9 100644
--- a/sys/arch/sun3/dev/xy.c
+++ b/sys/arch/sun3/dev/xy.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: xy.c,v 1.63 2007/10/17 19:57:45 garbled Exp $	*/
+/*	$NetBSD: xy.c,v 1.64 2008/01/02 11:48:30 ad Exp $	*/
 
 /*
  *
@@ -52,7 +52,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xy.c,v 1.63 2007/10/17 19:57:45 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xy.c,v 1.64 2008/01/02 11:48:30 ad Exp $");
 
 #undef XYC_DEBUG		/* full debug */
 #undef XYC_DIAG			/* extra sanity checks */
@@ -247,8 +247,8 @@ xydummystrat(struct buf *bp)
 	if (bp->b_bcount != XYFM_BPS)
 		panic("xydummystrat");
 	memcpy(bp->b_data, xy_labeldata, XYFM_BPS);
-	bp->b_flags |= B_DONE;
-	bp->b_flags &= ~B_BUSY;
+	bp->b_oflags |= BO_DONE;
+	bp->b_cflags &= ~BC_BUSY;
 }
 
 int 
diff --git a/sys/arch/vax/uba/ts.c b/sys/arch/vax/uba/ts.c
index 8cf1c5c48742..fef715aa293c 100644
--- a/sys/arch/vax/uba/ts.c
+++ b/sys/arch/vax/uba/ts.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ts.c,v 1.34 2007/10/17 19:57:58 garbled Exp $ */
+/*	$NetBSD: ts.c,v 1.35 2008/01/02 11:48:31 ad Exp $ */
 
 /*-
  * Copyright (c) 1991 The Regents of the University of California.
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ts.c,v 1.34 2007/10/17 19:57:58 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ts.c,v 1.35 2008/01/02 11:48:31 ad Exp $");
 
 #define TS11_COMPAT	/* don't use extended features provided by TS05 */
 
@@ -371,27 +371,15 @@ tscommand (dev, cmd, count)
 	int count;
 {
 	register struct buf *bp;
-	register int s;	 
 
 	trace (("tscommand (%d, %x, %d)\n", TS_UNIT(dev), cmd, count));
 
-	s = splbio();
 	bp = &ts_cbuf[TS_UNIT(dev)];
-
-	while (bp->b_flags & B_BUSY) {
-		/*
-		 * This special check is because B_BUSY never
-		 * gets cleared in the non-waiting rewind case. ???
-		 */
-		if (bp->b_bcount == 0 && (bp->b_flags & B_DONE))
-			break;
-		bp->b_flags |= B_WANTED;
-		(void) tsleep(bp, PRIBIO, "tscmd", 0);
-		/* check MOT-flag !!! */
-	}
-	bp->b_flags = B_BUSY | B_READ;
-
-	splx(s);
+	mutex_enter(&bufcache_lock);
+	while (bbusy(bp) != 0)
+		;
+	mutex_exit(&bufcache_lock);
+	bp->b_flags |= B_READ;
 
 	/*
 	 * Load the buffer.  The b_count field gets used to hold the command
@@ -414,8 +402,10 @@ tscommand (dev, cmd, count)
 	}
 	debug (("tscommand: calling biowait ...\n"));
 	biowait (bp);
-	if (bp->b_flags & B_WANTED)
-		wakeup ((void *)bp);
+	mutex_enter(&bufcache_lock);
+	bp->b_flags &= ~B_WANTED;
+	cv_broadcast(&bp->b_busy);
+	mutex_exit(&bufcache_lock);
 	bp->b_error = 0;
 }
 
diff --git a/sys/arch/vax/vax/cfl.c b/sys/arch/vax/vax/cfl.c
index 852db1803bdb..fe854510b909 100644
--- a/sys/arch/vax/vax/cfl.c
+++ b/sys/arch/vax/vax/cfl.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: cfl.c,v 1.16 2007/10/17 19:57:59 garbled Exp $	*/
+/*	$NetBSD: cfl.c,v 1.17 2008/01/02 11:48:31 ad Exp $	*/
 /*-
  * Copyright (c) 1982, 1986 The Regents of the University of California.
  * All rights reserved.
@@ -71,7 +71,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cfl.c,v 1.16 2007/10/17 19:57:59 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cfl.c,v 1.17 2008/01/02 11:48:31 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -195,16 +195,17 @@ cflrw(dev, uio, flag)
 				break;
 		}
 		if (uio->uio_rw == UIO_WRITE) {
-			bp->b_flags &= ~(B_READ|B_DONE);
+			bp->b_oflags &= ~(BO_DONE);
+			bp->b_flags &= ~(B_READ);
 			bp->b_flags |= B_WRITE;
 		} else {
-			bp->b_flags &= ~(B_WRITE|B_DONE);
+			bp->b_oflags &= ~(BO_DONE);
+			bp->b_flags &= ~(B_WRITE);
 			bp->b_flags |= B_READ;
 		}
 		s = splconsmedia(); 
 		cflstart();
-		while ((bp->b_flags & B_DONE) == 0)
-			(void) tsleep(bp, PRIBIO, "cflrw", 0);
+		biowait(bp);
 		splx(s);
 		if (bp->b_error != 0) {
 			error = bp->b_error;
@@ -284,7 +285,6 @@ void
 cflrint(int ch)
 {
 	struct buf *bp = cfltab.cfl_buf;
-	int s;
 
 	switch (cfltab.cfl_active) {
 	case CFL_NEXT:
@@ -292,10 +292,10 @@ cflrint(int ch)
 			cfltab.cfl_active = CFL_GETIN;
 		else {
 			cfltab.cfl_active = CFL_IDLE;
-			s = splbio();
-			bp->b_flags |= B_DONE;
-			splx(s);
-			wakeup(bp);
+			mutex_enter(bp->b_objlock);
+			bp->b_oflags |= BO_DONE;
+			cv_broadcast(&bp->b_done);
+			mutex_exit(bp->b_objlock);
 		}
 		break;
 
@@ -303,10 +303,10 @@ cflrint(int ch)
 		*cfltab.cfl_xaddr++ = ch & 0377;
 		if (--bp->b_bcount==0) {
 			cfltab.cfl_active = CFL_IDLE;
-			s = splbio();
-			bp->b_flags |= B_DONE;
-			splx(s);
-			wakeup(bp);
+			mutex_enter(bp->b_objlock);
+			bp->b_oflags |= BO_DONE;
+			cv_broadcast(&bp->b_done);
+			mutex_exit(bp->b_objlock);
 		}
 		break;
 	}
diff --git a/sys/arch/vax/vax/crl.c b/sys/arch/vax/vax/crl.c
index 02539cadaa2b..ce2bd24e330e 100644
--- a/sys/arch/vax/vax/crl.c
+++ b/sys/arch/vax/vax/crl.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: crl.c,v 1.23 2007/10/17 19:57:59 garbled Exp $	*/
+/*	$NetBSD: crl.c,v 1.24 2008/01/02 11:48:31 ad Exp $	*/
 /*-
  * Copyright (c) 1982, 1986 The Regents of the University of California.
  * All rights reserved.
@@ -36,7 +36,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: crl.c,v 1.23 2007/10/17 19:57:59 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: crl.c,v 1.24 2008/01/02 11:48:31 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -150,16 +150,17 @@ crlrw(dev, uio, flag)
 				break;
 		}
 		if (uio->uio_rw == UIO_WRITE) {
-			bp->b_flags &= ~(B_READ|B_DONE);
+			bp->b_oflags &= ~(BO_DONE);
+			bp->b_flags &= ~(B_READ);
 			bp->b_flags |= B_WRITE;
 		} else {
-			bp->b_flags &= ~(B_WRITE|B_DONE);
+			bp->b_oflags &= ~(BO_DONE);
+			bp->b_flags &= ~(B_WRITE);
 			bp->b_flags |= B_READ;
 		}
 		s = splconsmedia(); 
 		crlstart();
-		while ((bp->b_flags & B_DONE) == 0)
-			(void) tsleep(bp, PRIBIO, "crlrw", 0);
+		biowait(bp);
 		splx(s);
 		if (bp->b_error != 0) {
 			error = bp->b_error;
@@ -231,7 +232,7 @@ crlintr(arg)
 
 		case CRL_F_READ:
 		case CRL_F_WRITE:
-			bp->b_flags |= B_DONE;
+			bp->b_oflags |= BO_DONE;
 		}
 		crltab.crl_active = 0;
 		wakeup((void *)bp);
@@ -254,7 +255,7 @@ crlintr(arg)
 	case CRL_S_ABORT:
 		crltab.crl_active = CRL_F_RETSTS;
 		mtpr(STXCS_IE | CRL_F_RETSTS, PR_STXCS);
-		bp->b_flags |= B_DONE;
+		bp->b_oflags |= BO_DONE;
 		bp->b_error = EIO;
 		break;
 
@@ -266,9 +267,9 @@ crlintr(arg)
 	case CRL_S_HNDSHK:
 		printf("crl: hndshk error\n");	/* dump out some status too? */
 		crltab.crl_active = 0;
-		bp->b_flags |= B_DONE;
+		bp->b_oflags |= BO_DONE;
 		bp->b_error = EIO;
-		wakeup((void *)bp);
+		cv_broadcast(&bp->b_done);
 		break;
 
 	case CRL_S_HWERR:
diff --git a/sys/arch/vax/vax/disksubr.c b/sys/arch/vax/vax/disksubr.c
index f043b89b7c12..88dabe37f660 100644
--- a/sys/arch/vax/vax/disksubr.c
+++ b/sys/arch/vax/vax/disksubr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: disksubr.c,v 1.45 2007/10/17 19:57:59 garbled Exp $	*/
+/*	$NetBSD: disksubr.c,v 1.46 2008/01/02 11:48:31 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.45 2007/10/17 19:57:59 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.46 2008/01/02 11:48:31 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -263,7 +263,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *),
 		goto done;
 	dlp = (struct disklabel *)((char *)bp->b_data + LABELOFFSET);
 	bcopy(lp, dlp, sizeof(struct disklabel));
-	bp->b_flags &= ~(B_READ|B_DONE);
+	bp->b_oflags &= ~(BO_DONE);
+	bp->b_flags &= ~(B_READ);
 	bp->b_flags |= B_WRITE;
 	(*strat)(bp);
 	error = biowait(bp);
diff --git a/sys/arch/x68k/x68k/disksubr.c b/sys/arch/x68k/x68k/disksubr.c
index 297fff1b72c0..cd3e959bab05 100644
--- a/sys/arch/x68k/x68k/disksubr.c
+++ b/sys/arch/x68k/x68k/disksubr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: disksubr.c,v 1.32 2007/10/17 19:58:04 garbled Exp $	*/
+/*	$NetBSD: disksubr.c,v 1.33 2008/01/02 11:48:32 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.32 2007/10/17 19:58:04 garbled Exp $");
+__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.33 2008/01/02 11:48:32 ad Exp $");
 
 #include "opt_compat_netbsd.h"
 
@@ -138,7 +138,7 @@ dodospart:
 	labelsz = howmany(sizeof(struct cpu_disklabel),
 			  lp->d_secsize) * lp->d_secsize;
 	bp->b_bcount = labelsz;	/* to support < 512B/sector disks */
-	bp->b_flags &= ~(B_DONE);
+	bp->b_oflags &= ~(BO_DONE);
 	(*strat)(bp);
 
 	/* if successful, wander through Human68k partition table */
@@ -213,7 +213,7 @@ dobadsect:
 		i = 0;
 		do {
 			/* read a bad sector table */
-			bp->b_flags &= ~(B_DONE);
+			bp->b_oflags &= ~(BO_DONE);
 			bp->b_flags |= B_READ;
 			bp->b_blkno = lp->d_secperunit - lp->d_nsectors + i;
 			if (lp->d_secsize > DEF_BSIZE)
@@ -349,7 +349,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *),
 		if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
 		    dkcksum(dlp) == 0) {
 			*dlp = *lp;
-			bp->b_flags &= ~(B_READ|B_DONE);
+			bp->b_oflags &= ~(BO_DONE);
+			bp->b_flags &= ~(B_READ);
 			bp->b_flags |= B_WRITE;
 			(*strat)(bp);
 			error = biowait(bp);
@@ -368,7 +369,7 @@ dodospart:
 		/* read the x68k disk magic */
 		bp->b_blkno = DOSBBSECTOR;
 		bp->b_bcount = lp->d_secsize;
-		bp->b_flags &= ~(B_WRITE|B_DONE);
+		bp->b_oflags &= ~(BO_DONE);
 		bp->b_flags |= B_READ;
 		bp->b_cylinder = DOSBBSECTOR / lp->d_secpercyl;
 		(*strat)(bp);
@@ -381,7 +382,7 @@ dodospart:
 		labelsz = howmany(sizeof(struct cpu_disklabel),
 				  lp->d_secsize) * lp->d_secsize;
 		bp->b_bcount = labelsz;
-		bp->b_flags &= ~(B_WRITE|B_DONE);
+		bp->b_oflags &= ~(BO_DONE);
 		bp->b_flags |= B_READ;
 		bp->b_cylinder = DOSPARTOFF / lp->d_secpercyl;
 		(*strat)(bp);
@@ -441,7 +442,8 @@ dodospart:
 				dp->dp_start = start;
 				dp->dp_size = size;
 			}
-			bp->b_flags &= ~(B_READ|B_DONE);
+			bp->b_oflags &= ~(BO_DONE);
+			bp->b_flags &= ~(B_READ);
 			bp->b_flags |= B_WRITE;
 			(*strat)(bp);
 			error = biowait(bp);
diff --git a/sys/arch/x86/x86/pmap.c b/sys/arch/x86/x86/pmap.c
index c07f4a0f5ad5..fc57b5def559 100644
--- a/sys/arch/x86/x86/pmap.c
+++ b/sys/arch/x86/x86/pmap.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.c,v 1.15 2007/12/20 23:46:11 ad Exp $	*/
+/*	$NetBSD: pmap.c,v 1.16 2008/01/02 11:48:33 ad Exp $	*/
 
 /*
  * Copyright (c) 2007 Manuel Bouyer.
@@ -154,7 +154,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.15 2007/12/20 23:46:11 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.16 2008/01/02 11:48:33 ad Exp $");
 
 #include "opt_user_ldt.h"
 #include "opt_lockdebug.h"
@@ -364,20 +364,6 @@ long nbpd[] = NBPD_INITIALIZER;
 pd_entry_t *normal_pdes[] = PDES_INITIALIZER;
 pd_entry_t *alternate_pdes[] = APDES_INITIALIZER;
 
-/*
- * locking data structures.  to enable the locks, changes from the
- * 'vmlocking' cvs branch are required.  for now, just stub them out.
- */
-
-#define rw_enter(a, b)		/* nothing */
-#define	rw_exit(a)		/* nothing */
-#define	mutex_enter(a)		simple_lock(a)
-#define	mutex_exit(a)		simple_unlock(a)
-#define	mutex_init(a, b, c)	simple_lock_init(a)
-#define	mutex_owned(a)		(1)
-#define	mutex_destroy(a)	/* nothing */
-#define kmutex_t		struct simplelock
-
 static kmutex_t pmaps_lock;
 static krwlock_t pmap_main_lock;
 
@@ -1986,8 +1972,6 @@ pmap_destroy(struct pmap *pmap)
 	 * remove it from global list of pmaps
 	 */
 
-	KERNEL_LOCK(1, NULL);
-
 	mutex_enter(&pmaps_lock);
 	LIST_REMOVE(pmap, pm_list);
 	mutex_exit(&pmaps_lock);
@@ -2025,8 +2009,6 @@ pmap_destroy(struct pmap *pmap)
 	for (i = 0; i < PTP_LEVELS - 1; i++)
 		mutex_destroy(&pmap->pm_obj[i].vmobjlock);
 	pool_cache_put(&pmap_cache, pmap);
-
-	KERNEL_UNLOCK_ONE(NULL);
 }
 
 /*
diff --git a/sys/arch/xen/xen/xbdback.c b/sys/arch/xen/xen/xbdback.c
index 6528902df9f8..6b7518812988 100644
--- a/sys/arch/xen/xen/xbdback.c
+++ b/sys/arch/xen/xen/xbdback.c
@@ -1,4 +1,4 @@
-/*      $NetBSD: xbdback.c,v 1.28 2007/11/26 19:01:27 pooka Exp $      */
+/*      $NetBSD: xbdback.c,v 1.29 2008/01/02 11:48:33 ad Exp $      */
 
 /*
  * Copyright (c) 2005 Manuel Bouyer.
@@ -853,6 +853,8 @@ xbdback_co_io_gotreq(struct xbdback_instance *xbdi, void *obj)
 static void *
 xbdback_co_io_loop(struct xbdback_instance *xbdi, void *obj)
 {
+	struct xbdback_io *xio;
+
 	(void)obj;
 	if (xbdi->segno < xbdi->xen_req->nr_segments) {
 		unsigned long this_fas, last_fas;
@@ -898,7 +900,9 @@ xbdback_co_io_loop(struct xbdback_instance *xbdi, void *obj)
 
 		if (xbdi->io == NULL) {
 			xbdi->cont = xbdback_co_io_gotio;
-			return xbdback_pool_get(&xbdback_io_pool, xbdi);
+			xio = xbdback_pool_get(&xbdback_io_pool, xbdi);
+			buf_init(&xio->xio_buf);
+			return xio;
 		} else {
 			xbdi->cont = xbdback_co_io_gotio2;
 		}
@@ -929,16 +933,18 @@ xbdback_co_io_gotio(struct xbdback_instance *xbdi, void *obj)
 	start_offset = blkif_first_sect(xbdi->this_fas) * VBD_BSIZE;
 	
 	if (xbdi->xen_req->operation == BLKIF_OP_WRITE) {
-		buf_flags = B_WRITE | B_CALL;
+		buf_flags = B_WRITE;
 	} else {
-		buf_flags = B_READ | B_CALL;
+		buf_flags = B_READ;
 	}
 
-	BUF_INIT(&xbd_io->xio_buf);
 	xbd_io->xio_buf.b_flags = buf_flags;
+	xbd_io->xio_buf.b_cflags = 0;
+	xbd_io->xio_buf.b_oflags = 0;
 	xbd_io->xio_buf.b_iodone = xbdback_iodone;
 	xbd_io->xio_buf.b_proc = NULL;
 	xbd_io->xio_buf.b_vp = xbdi->req_vbd->vp;
+	xbd_io->xio_buf.b_objlock = &xbdi->req_vbd->vp->v_interlock;
 	xbd_io->xio_buf.b_dev = xbdi->req_vbd->dev;
 	xbd_io->xio_buf.b_blkno = xbdi->next_sector;
 	xbd_io->xio_buf.b_bcount = 0;
@@ -1133,12 +1139,14 @@ xbdback_iodone(struct buf *bp)
 		xbdback_pool_put(&xbdback_request_pool, xbd_req);
 	}
 	xbdi_put(xbdi);
+	buf_destroy(&xbd_io->xio_buf);
 	xbdback_pool_put(&xbdback_io_pool, xbd_io);
 }
 
 static void *
 xbdback_co_probe(struct xbdback_instance *xbdi, void *obj)
 {
+	struct xbdback_io *xio;
 	(void)obj;
 	/*
 	 * There should be only one page in the request. Map it and store
@@ -1153,7 +1161,9 @@ xbdback_co_probe(struct xbdback_instance *xbdi, void *obj)
 		return xbdi;
 	}
 	xbdi->cont = xbdback_co_probe_gotio;
-	return xbdback_pool_get(&xbdback_io_pool, xbdi);
+	xio = xbdback_pool_get(&xbdback_io_pool, xbdi);
+	buf_init(&xio->xio_buf);
+	return xio;
 }
 
 static void *
@@ -1202,6 +1212,7 @@ xbdback_co_probe_gotvm(struct xbdback_instance *xbdi, void *obj)
 	xbdback_unmap_shm(xbdi->io);
 	XENPRINTF(("xbdback_probe: nreplies=%d\n", i));
 	xbdback_send_reply(xbdi, req->id, req->operation, i);
+	buf_destroy(&xbdi->io->xio_buf);
 	xbdback_pool_put(&xbdback_io_pool, xbdi->io);
 	xbdi->io = NULL;
 	xbdi->cont = xbdback_co_main_incr;
diff --git a/sys/arch/xen/xen/xbdback_xenbus.c b/sys/arch/xen/xen/xbdback_xenbus.c
index 394923b16fdd..f0d06b36fa0f 100644
--- a/sys/arch/xen/xen/xbdback_xenbus.c
+++ b/sys/arch/xen/xen/xbdback_xenbus.c
@@ -1,4 +1,4 @@
-/*      $NetBSD: xbdback_xenbus.c,v 1.11 2007/11/26 19:01:28 pooka Exp $      */
+/*      $NetBSD: xbdback_xenbus.c,v 1.12 2008/01/02 11:48:33 ad Exp $      */
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -953,6 +953,8 @@ xbdback_co_io_gotreq(struct xbdback_instance *xbdi, void *obj)
 static void *
 xbdback_co_io_loop(struct xbdback_instance *xbdi, void *obj)
 {
+	struct xbdback_io *xio;
+
 	(void)obj;
 	if (xbdi->xbdi_segno < xbdi->xbdi_xen_req.nr_segments) {
 		uint8_t this_fs, this_ls, last_fs, last_ls;
@@ -1014,7 +1016,8 @@ xbdback_co_io_loop(struct xbdback_instance *xbdi, void *obj)
 
 		if (xbdi->xbdi_io == NULL) {
 			xbdi->xbdi_cont = xbdback_co_io_gotio;
-			return xbdback_pool_get(&xbdback_io_pool, xbdi);
+			xio = xbdback_pool_get(&xbdback_io_pool, xbdi);
+			buf_init(&xio->xio_buf);
 		} else {
 			xbdi->xbdi_cont = xbdback_co_io_gotio2;
 		}
@@ -1045,16 +1048,18 @@ xbdback_co_io_gotio(struct xbdback_instance *xbdi, void *obj)
 	start_offset = xbdi->xbdi_this_fs * VBD_BSIZE;
 	
 	if (xbdi->xbdi_xen_req.operation == BLKIF_OP_WRITE) {
-		buf_flags = B_WRITE | B_CALL;
+		buf_flags = B_WRITE;
 	} else {
-		buf_flags = B_READ | B_CALL;
+		buf_flags = B_READ;
 	}
 
-	BUF_INIT(&xbd_io->xio_buf);
 	xbd_io->xio_buf.b_flags = buf_flags;
+	xbd_io->xio_buf.b_cflags = 0;
+	xbd_io->xio_buf.b_oflags = 0;
 	xbd_io->xio_buf.b_iodone = xbdback_iodone;
 	xbd_io->xio_buf.b_proc = NULL;
 	xbd_io->xio_buf.b_vp = xbdi->xbdi_vp;
+	xbd_io->xio_buf.b_objlock = &xbdi->xbdi_vp->v_interlock;
 	xbd_io->xio_buf.b_dev = xbdi->xbdi_dev;
 	xbd_io->xio_buf.b_blkno = xbdi->xbdi_next_sector;
 	xbd_io->xio_buf.b_bcount = 0;
@@ -1249,6 +1254,7 @@ xbdback_iodone(struct buf *bp)
 		xbdback_pool_put(&xbdback_request_pool, xbd_req);
 	}
 	xbdi_put(xbdi);
+	buf_destroy(&xbd_io->xio_buf);
 	xbdback_pool_put(&xbdback_io_pool, xbd_io);
 }
 
diff --git a/sys/coda/coda_vnops.c b/sys/coda/coda_vnops.c
index 77ef9b11c108..a637338a523f 100644
--- a/sys/coda/coda_vnops.c
+++ b/sys/coda/coda_vnops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: coda_vnops.c,v 1.65 2007/12/25 18:33:35 perry Exp $	*/
+/*	$NetBSD: coda_vnops.c,v 1.66 2008/01/02 11:48:34 ad Exp $	*/
 
 /*
  *
@@ -46,7 +46,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: coda_vnops.c,v 1.65 2007/12/25 18:33:35 perry Exp $");
+__KERNEL_RCSID(0, "$NetBSD: coda_vnops.c,v 1.66 2008/01/02 11:48:34 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -872,7 +872,7 @@ coda_inactive(void *v)
 	    printf("coda_inactive: %p ovp != NULL\n", vp);
 	}
 	VOP_UNLOCK(vp, 0);
-	vgone(vp);
+	*ap->a_recycle = true;
     }
 
     MARK_INT_SAT(CODA_INACTIVE_STATS);
@@ -2002,7 +2002,7 @@ coda_getpages(void *v)
 	/* Check for control object. */
 	if (IS_CTL_VP(vp)) {
 		printf("coda_getpages: control object %p\n", vp);
-		simple_unlock(&vp->v_uobj.vmobjlock);
+		mutex_exit(&vp->v_uobj.vmobjlock);
 		return(EINVAL);
 	}
 
@@ -2017,7 +2017,7 @@ coda_getpages(void *v)
 	waslocked = VOP_ISLOCKED(vp);
 
 	/* Drop the vmobject lock. */
-	simple_unlock(&vp->v_uobj.vmobjlock);
+	mutex_exit(&vp->v_uobj.vmobjlock);
 
 	/* Get container file if not already present. */
 	if (cp->c_ovp == NULL) {
@@ -2065,7 +2065,7 @@ coda_getpages(void *v)
 	ap->a_vp = cp->c_ovp;
 
 	/* Get the lock on the container vnode, and call getpages on it. */
-	simple_lock(&ap->a_vp->v_uobj.vmobjlock);
+	mutex_enter(&ap->a_vp->v_uobj.vmobjlock);
 	error = VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);
 
 	/* If we opened the vnode, we must close it. */
@@ -2106,7 +2106,7 @@ coda_putpages(void *v)
 	int error;
 
 	/* Drop the vmobject lock. */
-	simple_unlock(&vp->v_uobj.vmobjlock);
+	mutex_exit(&vp->v_uobj.vmobjlock);
 
 	/* Check for control object. */
 	if (IS_CTL_VP(vp)) {
@@ -2127,7 +2127,7 @@ coda_putpages(void *v)
 	ap->a_vp = cp->c_ovp;
 
 	/* Get the lock on the container vnode, and call putpages on it. */
-	simple_lock(&ap->a_vp->v_uobj.vmobjlock);
+	mutex_enter(&ap->a_vp->v_uobj.vmobjlock);
 	error = VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);
 
 	return error;
diff --git a/sys/compat/svr4/svr4_fcntl.c b/sys/compat/svr4/svr4_fcntl.c
index 73a15a07fefa..324846a43925 100644
--- a/sys/compat/svr4/svr4_fcntl.c
+++ b/sys/compat/svr4/svr4_fcntl.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: svr4_fcntl.c,v 1.64 2007/12/20 23:03:04 dsl Exp $	 */
+/*	$NetBSD: svr4_fcntl.c,v 1.65 2008/01/02 11:48:35 ad Exp $	 */
 
 /*-
  * Copyright (c) 1994, 1997 The NetBSD Foundation, Inc.
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: svr4_fcntl.c,v 1.64 2007/12/20 23:03:04 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: svr4_fcntl.c,v 1.65 2008/01/02 11:48:35 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -253,9 +253,9 @@ fd_revoke(struct lwp *l, int fd, register_t *retval)
 	    KAUTH_GENERIC_ISSUSER, NULL)) != 0)
 		goto out;
 
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	revoke = (vp->v_usecount > 1 || (vp->v_iflag & VI_ALIASED));
-	simple_unlock(&vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
 	if (revoke)
 		VOP_REVOKE(vp, REVOKEALL);
 out:
diff --git a/sys/dev/ata/ata_raid.c b/sys/dev/ata/ata_raid.c
index 7049f2888161..2d30b5d7116a 100644
--- a/sys/dev/ata/ata_raid.c
+++ b/sys/dev/ata/ata_raid.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ata_raid.c,v 1.23 2007/07/09 21:00:30 ad Exp $	*/
+/*	$NetBSD: ata_raid.c,v 1.24 2008/01/02 11:48:36 ad Exp $	*/
 
 /*
  * Copyright (c) 2003 Wasabi Systems, Inc.
@@ -40,7 +40,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ata_raid.c,v 1.23 2007/07/09 21:00:30 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ata_raid.c,v 1.24 2008/01/02 11:48:36 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/buf.h>
@@ -294,8 +294,7 @@ ata_raid_config_block_rw(struct vnode *vp, daddr_t blkno, void *tbuf,
 	struct buf *bp;
 	int error;
 
-	bp = getiobuf();
-	bp->b_vp = vp;
+	bp = getiobuf(vp, NULL);
 	bp->b_blkno = blkno;
 	bp->b_bcount = bp->b_resid = size;
 	bp->b_flags = bflags;
diff --git a/sys/dev/ata/ld_ataraid.c b/sys/dev/ata/ld_ataraid.c
index e44b68882fe4..21a88d86e959 100644
--- a/sys/dev/ata/ld_ataraid.c
+++ b/sys/dev/ata/ld_ataraid.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ld_ataraid.c,v 1.22 2007/11/26 19:01:36 pooka Exp $	*/
+/*	$NetBSD: ld_ataraid.c,v 1.23 2008/01/02 11:48:37 ad Exp $	*/
 
 /*
  * Copyright (c) 2003 Wasabi Systems, Inc.
@@ -45,7 +45,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ld_ataraid.c,v 1.22 2007/11/26 19:01:36 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ld_ataraid.c,v 1.23 2008/01/02 11:48:37 ad Exp $");
 
 #include "rnd.h"
 
@@ -246,8 +246,10 @@ ld_ataraid_make_cbuf(struct ld_ataraid_softc *sc, struct buf *bp,
 	cbp = CBUF_GET();
 	if (cbp == NULL)
 		return (NULL);
-	BUF_INIT(&cbp->cb_buf);
-	cbp->cb_buf.b_flags = bp->b_flags | B_CALL;
+	buf_init(&cbp->cb_buf);
+	cbp->cb_buf.b_flags = bp->b_flags;
+	cbp->cb_buf.b_oflags = bp->b_oflags;
+	cbp->cb_buf.b_cflags = bp->b_cflags;
 	cbp->cb_buf.b_iodone = sc->sc_iodone;
 	cbp->cb_buf.b_proc = bp->b_proc;
 	cbp->cb_buf.b_vp = sc->sc_vnodes[comp];
@@ -303,6 +305,7 @@ ld_ataraid_start_span(struct ld_softc *ld, struct buf *bp)
 			/* Free the already allocated component buffers. */
 			while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) {
 				SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q);
+				buf_destroy(&cbp->cb_buf);
 				CBUF_PUT(cbp);
 			}
 			return (EAGAIN);
@@ -400,6 +403,7 @@ free_and_exit:
 			/* Free the already allocated component buffers. */
 			while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) {
 				SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q);
+				buf_destroy(&cbp->cb_buf);
 				CBUF_PUT(cbp);
 			}
 			return (error);
diff --git a/sys/dev/ata/wd.c b/sys/dev/ata/wd.c
index dcdc9855a6f3..2f4acf51a015 100644
--- a/sys/dev/ata/wd.c
+++ b/sys/dev/ata/wd.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: wd.c,v 1.354 2007/12/18 15:30:40 joerg Exp $ */
+/*	$NetBSD: wd.c,v 1.355 2008/01/02 11:48:37 ad Exp $ */
 
 /*
  * Copyright (c) 1998, 2001 Manuel Bouyer.  All rights reserved.
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: wd.c,v 1.354 2007/12/18 15:30:40 joerg Exp $");
+__KERNEL_RCSID(0, "$NetBSD: wd.c,v 1.355 2008/01/02 11:48:37 ad Exp $");
 
 #include "opt_ata.h"
 
@@ -670,7 +670,9 @@ wd_split_mod15_write(struct buf *bp)
 	 * Advance the pointer to the second half and issue that command
 	 * using the same opening.
 	 */
-	bp->b_flags = obp->b_flags | B_CALL;
+	bp->b_flags = obp->b_flags;
+	bp->b_oflags = obp->b_oflags;
+	bp->b_cflags = obp->b_cflags;
 	bp->b_data = (char *)bp->b_data + bp->b_bcount;
 	bp->b_blkno += (bp->b_bcount / 512);
 	bp->b_rawblkno += (bp->b_bcount / 512);
@@ -705,7 +707,7 @@ __wdstart(struct wd_softc *wd, struct buf *bp)
 		struct buf *nbp;
 
 		/* already at splbio */
-		nbp = getiobuf_nowait();
+		nbp = getiobuf(NULL, false);
 		if (__predict_false(nbp == NULL)) {
 			/* No memory -- fail the iop. */
 			bp->b_error = ENOMEM;
@@ -717,7 +719,6 @@ __wdstart(struct wd_softc *wd, struct buf *bp)
 
 		nbp->b_error = 0;
 		nbp->b_proc = bp->b_proc;
-		nbp->b_vp = NULLVP;
 		nbp->b_dev = bp->b_dev;
 
 		nbp->b_bcount = bp->b_bcount / 2;
@@ -727,7 +728,9 @@ __wdstart(struct wd_softc *wd, struct buf *bp)
 		nbp->b_blkno = bp->b_blkno;
 		nbp->b_rawblkno = bp->b_rawblkno;
 
-		nbp->b_flags = bp->b_flags | B_CALL;
+		nbp->b_flags = bp->b_flags;
+		nbp->b_oflags = bp->b_oflags;
+		nbp->b_cflags = bp->b_cflags;
 		nbp->b_iodone = wd_split_mod15_write;
 
 		/* Put ptr to orig buf in b_private and use new buf */
@@ -881,8 +884,7 @@ noerror:	if ((wd->sc_wdc_bio.flags & ATA_CORR) || wd->retries > 0)
 	rnd_add_uint32(&wd->rnd_source, bp->b_blkno);
 #endif
 	/* XXX Yuck, but we don't want to increment openings in this case */
-	if (__predict_false((bp->b_flags & B_CALL) != 0 &&
-			    bp->b_iodone == wd_split_mod15_write))
+	if (__predict_false(bp->b_iodone == wd_split_mod15_write))
 		biodone(bp);
 	else {
 		biodone(bp);
@@ -1957,7 +1959,7 @@ wi_get(void)
 	int s;
 
 	wi = malloc(sizeof(struct wd_ioctl), M_TEMP, M_WAITOK|M_ZERO);
-	simple_lock_init(&wi->wi_bp.b_interlock);
+	buf_init(&wi->wi_bp);
 	s = splbio();
 	LIST_INSERT_HEAD(&wi_head, wi, wi_list);
 	splx(s);
@@ -1976,6 +1978,7 @@ wi_free(struct wd_ioctl *wi)
 	s = splbio();
 	LIST_REMOVE(wi, wi_list);
 	splx(s);
+	buf_destroy(&wi->wi_bp);
 	free(wi, M_TEMP);
 }
 
@@ -2031,7 +2034,7 @@ wdioctlstrategy(struct buf *bp)
 		printf("wdioctlstrategy: "
 		    "No matching ioctl request found in queue\n");
 		error = EINVAL;
-		goto done;
+		goto bad;
 	}
 
 	memset(&ata_c, 0, sizeof(ata_c));
@@ -2043,7 +2046,7 @@ wdioctlstrategy(struct buf *bp)
 	if (bp->b_bcount != wi->wi_atareq.datalen) {
 		printf("physio split wd ioctl request... cannot proceed\n");
 		error = EIO;
-		goto done;
+		goto bad;
 	}
 
 	/*
@@ -2055,7 +2058,7 @@ wdioctlstrategy(struct buf *bp)
 	    (bp->b_bcount / wi->wi_softc->sc_dk.dk_label->d_secsize) >=
 	     (1 << NBBY)) {
 		error = EINVAL;
-		goto done;
+		goto bad;
 	}
 
 	/*
@@ -2064,7 +2067,7 @@ wdioctlstrategy(struct buf *bp)
 
 	if (wi->wi_atareq.timeout == 0) {
 		error = EINVAL;
-		goto done;
+		goto bad;
 	}
 
 	if (wi->wi_atareq.flags & ATACMD_READ)
@@ -2092,8 +2095,7 @@ wdioctlstrategy(struct buf *bp)
 	if (wi->wi_softc->atabus->ata_exec_command(wi->wi_softc->drvp, &ata_c)
 	    != ATACMD_COMPLETE) {
 		wi->wi_atareq.retsts = ATACMD_ERROR;
-		error = EIO;
-		goto done;
+		goto bad;
 	}
 
 	if (ata_c.flags & (AT_ERROR | AT_TIMEOU | AT_DF)) {
@@ -2116,7 +2118,10 @@ wdioctlstrategy(struct buf *bp)
 		}
 	}
 
-done:
+	bp->b_error = 0;
+	biodone(bp);
+	return;
+bad:
 	bp->b_error = error;
 	biodone(bp);
 }
diff --git a/sys/dev/ccd.c b/sys/dev/ccd.c
index 0088ab2163ba..cc88bd4921f4 100644
--- a/sys/dev/ccd.c
+++ b/sys/dev/ccd.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ccd.c,v 1.125 2007/12/05 07:06:50 ad Exp $	*/
+/*	$NetBSD: ccd.c,v 1.126 2008/01/02 11:48:36 ad Exp $	*/
 
 /*-
  * Copyright (c) 1996, 1997, 1998, 1999, 2007 The NetBSD Foundation, Inc.
@@ -125,7 +125,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ccd.c,v 1.125 2007/12/05 07:06:50 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ccd.c,v 1.126 2008/01/02 11:48:36 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -837,8 +837,10 @@ ccdbuffer(struct ccd_softc *cs, struct buf *bp, daddr_t bn, void *addr,
 	cbp = CCD_GETBUF();
 	if (cbp == NULL)
 		return (NULL);
-	BUF_INIT(&cbp->cb_buf);
-	cbp->cb_buf.b_flags = bp->b_flags | B_CALL;
+	buf_init(&cbp->cb_buf);
+	cbp->cb_buf.b_flags = bp->b_flags;
+	cbp->cb_buf.b_oflags = bp->b_oflags;
+	cbp->cb_buf.b_cflags = bp->b_cflags;
 	cbp->cb_buf.b_iodone = ccdiodone;
 	cbp->cb_buf.b_proc = bp->b_proc;
 	cbp->cb_buf.b_dev = ci->ci_dev;
@@ -924,6 +926,7 @@ ccdiodone(struct buf *vbp)
 		       cs->sc_xname, bp->b_error, cbp->cb_comp);
 	}
 	count = cbp->cb_buf.b_bcount;
+	buf_destroy(&cbp->cb_buf);
 	CCD_PUTBUF(cbp);
 
 	/*
diff --git a/sys/dev/cgd.c b/sys/dev/cgd.c
index 680b8c0adc80..bff5c17e45fc 100644
--- a/sys/dev/cgd.c
+++ b/sys/dev/cgd.c
@@ -1,4 +1,4 @@
-/* $NetBSD: cgd.c,v 1.48 2007/11/26 19:01:34 pooka Exp $ */
+/* $NetBSD: cgd.c,v 1.49 2008/01/02 11:48:36 ad Exp $ */
 
 /*-
  * Copyright (c) 2002 The NetBSD Foundation, Inc.
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cgd.c,v 1.48 2007/11/26 19:01:34 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cgd.c,v 1.49 2008/01/02 11:48:36 ad Exp $");
 
 #include <sys/types.h>
 #include <sys/param.h>
@@ -295,6 +295,7 @@ cgdstart(struct dk_softc *dksc, struct buf *bp)
 	void *	addr;
 	void *	newaddr;
 	daddr_t	bn;
+	struct	vnode *vp;
 
 	DPRINTF_FOLLOW(("cgdstart(%p, %p)\n", dksc, bp));
 	disk_busy(&dksc->sc_dkdev); /* XXX: put in dksubr.c */
@@ -306,7 +307,7 @@ cgdstart(struct dk_softc *dksc, struct buf *bp)
 	 * we can fail quickly if they are unavailable.
 	 */
 
-	nbp = getiobuf_nowait();
+	nbp = getiobuf(cs->sc_tvn, false);
 	if (nbp == NULL) {
 		disk_unbusy(&dksc->sc_dkdev, 0, (bp->b_flags & B_READ));
 		return -1;
@@ -330,18 +331,22 @@ cgdstart(struct dk_softc *dksc, struct buf *bp)
 	}
 
 	nbp->b_data = newaddr;
-	nbp->b_flags = bp->b_flags | B_CALL;
+	nbp->b_flags = bp->b_flags;
+	nbp->b_oflags = bp->b_oflags;
+	nbp->b_cflags = bp->b_cflags;
 	nbp->b_iodone = cgdiodone;
 	nbp->b_proc = bp->b_proc;
 	nbp->b_blkno = bn;
-	nbp->b_vp = cs->sc_tvn;
 	nbp->b_bcount = bp->b_bcount;
 	nbp->b_private = bp;
 
 	BIO_COPYPRIO(nbp, bp);
 
 	if ((nbp->b_flags & B_READ) == 0) {
-		V_INCR_NUMOUTPUT(nbp->b_vp);
+		vp = nbp->b_vp;
+		mutex_enter(&vp->v_interlock);
+		vp->v_numoutput++;
+		mutex_exit(&vp->v_interlock);
 	}
 	VOP_STRATEGY(cs->sc_tvn, nbp);
 	return 0;
diff --git a/sys/dev/dkwedge/dk.c b/sys/dev/dkwedge/dk.c
index 932682a9b5d8..9bb55d29a267 100644
--- a/sys/dev/dkwedge/dk.c
+++ b/sys/dev/dkwedge/dk.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: dk.c,v 1.31 2007/12/09 20:27:56 jmcneill Exp $	*/
+/*	$NetBSD: dk.c,v 1.32 2008/01/02 11:48:37 ad Exp $	*/
 
 /*-
  * Copyright (c) 2004, 2005, 2006, 2007 The NetBSD Foundation, Inc.
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: dk.c,v 1.31 2007/12/09 20:27:56 jmcneill Exp $");
+__KERNEL_RCSID(0, "$NetBSD: dk.c,v 1.32 2008/01/02 11:48:37 ad Exp $");
 
 #include "opt_dkwedge.h"
 
@@ -856,7 +856,7 @@ dkwedge_read(struct disk *pdk, struct vnode *vp, daddr_t blkno,
 {
 	struct buf b;
 
-	BUF_INIT(&b);
+	buf_init(&b);
 
 	b.b_vp = vp;
 	b.b_dev = vp->v_rdev;
@@ -1037,6 +1037,7 @@ dkstrategy(struct buf *bp)
 static void
 dkstart(struct dkwedge_softc *sc)
 {
+	struct vnode *vp;
 	struct buf *bp, *nbp;
 
 	/* Do as much work as has been enqueued. */
@@ -1056,7 +1057,7 @@ dkstart(struct dkwedge_softc *sc)
 		/* Instrumentation. */
 		disk_busy(&sc->sc_dk);
 
-		nbp = getiobuf_nowait();
+		nbp = getiobuf(sc->sc_parent->dk_rawvp, false);
 		if (nbp == NULL) {
 			/*
 			 * No resources to run this request; leave the
@@ -1070,21 +1071,25 @@ dkstart(struct dkwedge_softc *sc)
 
 		(void) BUFQ_GET(sc->sc_bufq);
 
-		BUF_INIT(nbp);
 		nbp->b_data = bp->b_data;
-		nbp->b_flags = bp->b_flags | B_CALL;
+		nbp->b_flags = bp->b_flags;
+		nbp->b_oflags = bp->b_oflags;
+		nbp->b_cflags = bp->b_cflags;
 		nbp->b_iodone = dkiodone;
 		nbp->b_proc = bp->b_proc;
 		nbp->b_blkno = bp->b_rawblkno;
 		nbp->b_dev = sc->sc_parent->dk_rawvp->v_rdev;
-		nbp->b_vp = sc->sc_parent->dk_rawvp;
 		nbp->b_bcount = bp->b_bcount;
 		nbp->b_private = bp;
 		BIO_COPYPRIO(nbp, bp);
 
-		if ((nbp->b_flags & B_READ) == 0)
-			V_INCR_NUMOUTPUT(nbp->b_vp);
-		VOP_STRATEGY(nbp->b_vp, nbp);
+		vp = nbp->b_vp;
+		if ((nbp->b_flags & B_READ) == 0) {
+			mutex_enter(&vp->v_interlock);
+			vp->v_numoutput++;
+			mutex_exit(&vp->v_interlock);
+		}
+		VOP_STRATEGY(vp, nbp);
 	}
 }
 
diff --git a/sys/dev/fss.c b/sys/dev/fss.c
index 15b71fee8a5d..b432ad8aa6c1 100644
--- a/sys/dev/fss.c
+++ b/sys/dev/fss.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: fss.c,v 1.41 2007/12/08 19:29:41 pooka Exp $	*/
+/*	$NetBSD: fss.c,v 1.42 2008/01/02 11:48:36 ad Exp $	*/
 
 /*-
  * Copyright (c) 2003 The NetBSD Foundation, Inc.
@@ -43,7 +43,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fss.c,v 1.41 2007/12/08 19:29:41 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fss.c,v 1.42 2008/01/02 11:48:36 ad Exp $");
 
 #include "fss.h"
 
@@ -895,8 +895,8 @@ restart:
 		if (len > MAXPHYS)
 			len = MAXPHYS;
 
-		bp = getiobuf();
-		bp->b_flags = B_READ|B_CALL;
+		bp = getiobuf(NULL, true);
+		bp->b_flags = B_READ;
 		bp->b_bcount = len;
 		bp->b_bufsize = bp->b_bcount;
 		bp->b_error = 0;
@@ -904,7 +904,6 @@ restart:
 		bp->b_blkno = dblk;
 		bp->b_proc = NULL;
 		bp->b_dev = sc->sc_bdev;
-		bp->b_vp = NULLVP;
 		bp->b_private = scp;
 		bp->b_iodone = fss_cluster_iodone;
 
@@ -952,7 +951,7 @@ fss_bs_io(struct fss_softc *sc, fss_io_type rw,
 	    data, len, off, UIO_SYSSPACE, IO_UNIT|IO_NODELOCKED,
 	    sc->sc_bs_lwp->l_cred, NULL, NULL);
 	if (error == 0) {
-		simple_lock(&sc->sc_bs_vp->v_interlock);
+		mutex_enter(&sc->sc_bs_vp->v_interlock);
 		error = VOP_PUTPAGES(sc->sc_bs_vp, trunc_page(off),
 		    round_page(off+len), PGO_CLEANIT|PGO_SYNCIO|PGO_FREE);
 	}
@@ -1019,7 +1018,7 @@ fss_bs_thread(void *arg)
 
 	scl = sc->sc_cache+sc->sc_cache_size;
 
-	nbp = getiobuf();
+	nbp = getiobuf(NULL, true);
 
 	nfreed = nio = 1;		/* Dont sleep the first time */
 
@@ -1148,7 +1147,7 @@ fss_bs_thread(void *arg)
 
 		FSS_UNLOCK(sc, s);
 
-		BUF_INIT(nbp);
+		buf_init(nbp);
 		nbp->b_flags = B_READ;
 		nbp->b_bcount = bp->b_bcount;
 		nbp->b_bufsize = bp->b_bcount;
@@ -1157,7 +1156,6 @@ fss_bs_thread(void *arg)
 		nbp->b_blkno = bp->b_blkno;
 		nbp->b_proc = bp->b_proc;
 		nbp->b_dev = sc->sc_bdev;
-		nbp->b_vp = NULLVP;
 
 		bdev_strategy(nbp);
 
diff --git a/sys/dev/gpib/ct.c b/sys/dev/gpib/ct.c
index bcab573fd047..fdc550e55b68 100644
--- a/sys/dev/gpib/ct.c
+++ b/sys/dev/gpib/ct.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ct.c,v 1.12 2007/10/08 20:12:06 ad Exp $ */
+/*	$NetBSD: ct.c,v 1.13 2008/01/02 11:48:37 ad Exp $ */
 
 /*-
  * Copyright (c) 1996-2003 The NetBSD Foundation, Inc.
@@ -128,7 +128,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ct.c,v 1.12 2007/10/08 20:12:06 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ct.c,v 1.13 2008/01/02 11:48:37 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -475,6 +475,7 @@ ctcommand(dev, cmd, cnt)
 	sc->sc_bp = bp;
 	sc->sc_cmd = cmd;
 	bp->b_dev = dev;
+	bp->b_objlock = &buffer_lock;
 	if (cmd == MTFSF) {
 		nbp = (struct buf *)geteblk(MAXBSIZE);
 		bp->b_data = nbp->b_data;
@@ -482,7 +483,9 @@ ctcommand(dev, cmd, cnt)
 	}
 
 	while (cnt-- > 0) {
-		bp->b_flags = B_BUSY;
+		bp->b_flags = 0;
+		bp->b_cflags = BC_BUSY;
+		bp->b_oflags = 0;
 		if (cmd == MTBSF) {
 			sc->sc_blkno = sc->sc_eofs[sc->sc_eofp];
 			sc->sc_eofp--;
diff --git a/sys/dev/gpib/mt.c b/sys/dev/gpib/mt.c
index 026e1531c0a1..a071717fafae 100644
--- a/sys/dev/gpib/mt.c
+++ b/sys/dev/gpib/mt.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: mt.c,v 1.11 2007/07/29 12:15:43 ad Exp $ */
+/*	$NetBSD: mt.c,v 1.12 2008/01/02 11:48:37 ad Exp $ */
 
 /*-
  * Copyright (c) 1996-2003 The NetBSD Foundation, Inc.
@@ -121,7 +121,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: mt.c,v 1.11 2007/07/29 12:15:43 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: mt.c,v 1.12 2008/01/02 11:48:37 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -515,13 +515,16 @@ mtcommand(dev, cmd, cnt)
 	sc = device_lookup(&mt_cd, MTUNIT(dev));
 	bp = &sc->sc_bufstore;
 
-	if (bp->b_flags & B_BUSY)
+	if (bp->b_cflags & BC_BUSY)
 		return (EBUSY);
 
 	bp->b_cmd = cmd;
 	bp->b_dev = dev;
+	bp->b_objlock = &buffer_lock;
 	do {
-		bp->b_flags = B_BUSY | B_CMD;
+		bp->b_cflags = BC_BUSY;
+		bp->b_flags = B_CMD;
+		bp->b_oflags = 0;
 		mtstrategy(bp);
 		biowait(bp);
 		if (bp->b_error != 0) {
@@ -530,9 +533,9 @@ mtcommand(dev, cmd, cnt)
 		}
 	} while (--cnt > 0);
 #if 0
-	bp->b_flags = 0 /*&= ~B_BUSY*/;
+	bp->b_cflags = 0 /*&= ~BC_BUSY*/;
 #else
-	bp->b_flags &= ~B_BUSY;
+	bp->b_cflags &= ~BC_BUSY;
 #endif
 	return (error);
 }
diff --git a/sys/dev/isa/fd.c b/sys/dev/isa/fd.c
index 35a00734a580..86e351b64685 100644
--- a/sys/dev/isa/fd.c
+++ b/sys/dev/isa/fd.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: fd.c,v 1.77 2007/10/19 12:00:16 ad Exp $	*/
+/*	$NetBSD: fd.c,v 1.78 2008/01/02 11:48:37 ad Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2003 The NetBSD Foundation, Inc.
@@ -88,7 +88,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fd.c,v 1.77 2007/10/19 12:00:16 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fd.c,v 1.78 2008/01/02 11:48:37 ad Exp $");
 
 #include "rnd.h"
 #include "opt_ddb.h"
@@ -1518,12 +1518,12 @@ fdformat(dev, finfo, l)
 	struct buf *bp;
 
 	/* set up a buffer header for fdstrategy() */
-	bp = getiobuf_nowait();
+	bp = getiobuf(NULL, false);
 	if (bp == NULL)
 		return ENOBUFS;
 
-	bp->b_vp = NULL;
-	bp->b_flags = B_BUSY | B_PHYS | B_FORMAT;
+	bp->b_cflags = BC_BUSY;
+	bp->b_flags = B_PHYS | B_FORMAT;
 	bp->b_proc = l->l_proc;
 	bp->b_dev = dev;
 
diff --git a/sys/dev/qbus/ts.c b/sys/dev/qbus/ts.c
index 32fcbdaec5fd..19320ad545c4 100644
--- a/sys/dev/qbus/ts.c
+++ b/sys/dev/qbus/ts.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ts.c,v 1.21 2007/10/19 12:01:09 ad Exp $ */
+/*	$NetBSD: ts.c,v 1.22 2008/01/02 11:48:38 ad Exp $ */
 
 /*-
  * Copyright (c) 1991 The Regents of the University of California.
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ts.c,v 1.21 2007/10/19 12:01:09 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ts.c,v 1.22 2008/01/02 11:48:38 ad Exp $");
 
 #undef	TSDEBUG
 
@@ -321,7 +321,6 @@ void
 tscommand(struct ts_softc *sc, dev_t dev, int cmd, int count)
 {
 	struct buf *bp;
-	int s;
 
 #ifdef TSDEBUG
 	printf("tscommand (%x, %d)\n", cmd, count);
@@ -329,20 +328,19 @@ tscommand(struct ts_softc *sc, dev_t dev, int cmd, int count)
 
 	bp = &sc->ts_cbuf;
 
-	s = splbio();
-	while (bp->b_flags & B_BUSY) {
+	mutex_enter(&bufcache_lock);
+	while (bp->b_cflags & BC_BUSY) {
 		/*
-		 * This special check is because B_BUSY never
+		 * This special check is because BC_BUSY never
 		 * gets cleared in the non-waiting rewind case. ???
 		 */
-		if (bp->b_bcount == 0 && (bp->b_flags & B_DONE))
+		if (bp->b_bcount == 0 && (bp->b_oflags & BO_DONE))
 			break;
-		bp->b_flags |= B_WANTED;
-		(void) tsleep(bp, PRIBIO, "tscmd", 0);
+		(void )bbusy(bp, false, 0);
 		/* check MOT-flag !!! */
 	}
-	bp->b_flags = B_BUSY | B_READ;
-	splx(s);
+	bp->b_flags = B_READ;
+	mutex_exit(&bufcache_lock);
 
 	/*
 	 * Load the buffer.  The b_count field gets used to hold the command
@@ -354,6 +352,8 @@ tscommand(struct ts_softc *sc, dev_t dev, int cmd, int count)
 	bp->b_bcount = count;
 	bp->b_resid = cmd;
 	bp->b_blkno = 0;
+	bp->b_oflags = 0;
+	bp->b_objlock = &buffer_lock;
 	tsstrategy(bp);
 	/*
 	 * In case of rewind from close, don't wait.
@@ -362,9 +362,10 @@ tscommand(struct ts_softc *sc, dev_t dev, int cmd, int count)
 	if (count == 0)
 		return;
 	biowait(bp);
-	if (bp->b_flags & B_WANTED)
-		wakeup((void *)bp);
-	bp->b_flags = 0;
+	mutex_enter(&bufcache_lock);
+	cv_broadcast(&bp->b_busy);
+	bp->b_cflags = 0;
+	mutex_exit(&bufcache_lock);
 }
 
 /*
diff --git a/sys/dev/raidframe/rf_diskqueue.c b/sys/dev/raidframe/rf_diskqueue.c
index 3a7a7cb16f29..5f557b039e5f 100644
--- a/sys/dev/raidframe/rf_diskqueue.c
+++ b/sys/dev/raidframe/rf_diskqueue.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: rf_diskqueue.c,v 1.49 2007/03/04 06:02:37 christos Exp $	*/
+/*	$NetBSD: rf_diskqueue.c,v 1.50 2008/01/02 11:48:38 ad Exp $	*/
 /*
  * Copyright (c) 1995 Carnegie-Mellon University.
  * All rights reserved.
@@ -66,7 +66,7 @@
  ****************************************************************************/
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rf_diskqueue.c,v 1.49 2007/03/04 06:02:37 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rf_diskqueue.c,v 1.50 2008/01/02 11:48:38 ad Exp $");
 
 #include <dev/raidframe/raidframevar.h>
 
@@ -449,25 +449,19 @@ rf_CreateDiskQueueData(RF_IoType_t typ, RF_SectorNum_t ssect,
 		       int waitflag)
 {
 	RF_DiskQueueData_t *p;
-	int s;
 
-	s = splbio();
 	p = pool_get(&rf_pools.dqd, waitflag);
-	splx(s);
 	if (p == NULL)
 		return (NULL);
 
 	memset(p, 0, sizeof(RF_DiskQueueData_t));
 	if (waitflag == PR_WAITOK) {
-		p->bp = getiobuf();
+		p->bp = getiobuf(NULL, true);
 	} else {
-		p->bp = getiobuf_nowait();
+		p->bp = getiobuf(NULL, false);
 	}
 	if (p->bp == NULL) {
-		/* no memory for the buffer!?!? */
-		s = splbio();
 		pool_put(&rf_pools.dqd, p);
-		splx(s);
 		return (NULL);
 	}
 
diff --git a/sys/dev/raidframe/rf_netbsdkintf.c b/sys/dev/raidframe/rf_netbsdkintf.c
index 2d3d898ec204..608c1440b212 100644
--- a/sys/dev/raidframe/rf_netbsdkintf.c
+++ b/sys/dev/raidframe/rf_netbsdkintf.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: rf_netbsdkintf.c,v 1.241 2007/12/18 01:09:46 oster Exp $	*/
+/*	$NetBSD: rf_netbsdkintf.c,v 1.242 2008/01/02 11:48:38 ad Exp $	*/
 /*-
  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
  * All rights reserved.
@@ -146,7 +146,7 @@
  ***********************************************************/
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.241 2007/12/18 01:09:46 oster Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.242 2008/01/02 11:48:38 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/errno.h>
@@ -2208,7 +2208,9 @@ InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
        struct proc *b_proc)
 {
 	/* bp->b_flags       = B_PHYS | rw_flag; */
-	bp->b_flags = B_CALL | rw_flag;	/* XXX need B_PHYS here too??? */
+	bp->b_flags = rw_flag;	/* XXX need B_PHYS here too??? */
+	bp->b_oflags = 0;
+	bp->b_cflags = 0;
 	bp->b_bcount = numSect << logBytesPerSector;
 	bp->b_bufsize = bp->b_bcount;
 	bp->b_error = 0;
@@ -2223,8 +2225,11 @@ InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
 	bp->b_iodone = cbFunc;
 	bp->b_private = cbArg;
 	bp->b_vp = b_vp;
+	bp->b_objlock = &b_vp->v_interlock;
 	if ((bp->b_flags & B_READ) == 0) {
-		bp->b_vp->v_numoutput++;
+		mutex_enter(&b_vp->v_interlock);
+		b_vp->v_numoutput++;
+		mutex_exit(&b_vp->v_interlock);
 	}
 
 }
diff --git a/sys/dev/scsipi/cd.c b/sys/dev/scsipi/cd.c
index 7f0f12731dfc..ff43d77c9fb0 100644
--- a/sys/dev/scsipi/cd.c
+++ b/sys/dev/scsipi/cd.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: cd.c,v 1.271 2007/12/09 20:28:22 jmcneill Exp $	*/
+/*	$NetBSD: cd.c,v 1.272 2008/01/02 11:48:38 ad Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2001, 2003, 2004, 2005 The NetBSD Foundation, Inc.
@@ -57,7 +57,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cd.c,v 1.271 2007/12/09 20:28:22 jmcneill Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cd.c,v 1.272 2008/01/02 11:48:38 ad Exp $");
 
 #include "rnd.h"
 
@@ -679,7 +679,7 @@ cdstrategy(struct buf *bp)
 			}
 
 			blkno = ((blkno * lp->d_secsize) / cd->params.blksize);
-			nbp = getiobuf_nowait();
+			nbp = getiobuf(false, NULL);
 			if (!nbp) {
 				/* No memory -- fail the iop. */
 				free(bounce, M_DEVBUF);
@@ -698,14 +698,12 @@ cdstrategy(struct buf *bp)
 			/* Set up the IOP to the bounce buffer. */
 			nbp->b_error = 0;
 			nbp->b_proc = bp->b_proc;
-			nbp->b_vp = NULLVP;
-
 			nbp->b_bcount = count;
 			nbp->b_bufsize = count;
-
 			nbp->b_rawblkno = blkno;
-
-			nbp->b_flags = bp->b_flags | B_READ | B_CALL;
+			nbp->b_flags = bp->b_flags | B_READ;
+			nbp->b_oflags = bp->b_oflags;
+			nbp->b_cflags = bp->b_cflags;
 			nbp->b_iodone = cdbounce;
 
 			/* store bounce state in b_private and use new buf */
@@ -970,7 +968,7 @@ cdbounce(struct buf *bp)
 			count = MAXPHYS;
 		}
 
-		nbp = getiobuf_nowait();
+		nbp = getiobuf(false, NULL);
 		if (!nbp) {
 			/* No memory -- fail the iop. */
 			bp->b_error = ENOMEM;
@@ -980,15 +978,13 @@ cdbounce(struct buf *bp)
 		/* Set up the IOP to the bounce buffer. */
 		nbp->b_error = 0;
 		nbp->b_proc = obp->b_proc;
-		nbp->b_vp = NULLVP;
-
 		nbp->b_bcount = count;
 		nbp->b_bufsize = count;
 		nbp->b_data = bp->b_data;
-
 		nbp->b_rawblkno = blkno;
-
-		nbp->b_flags = obp->b_flags | B_READ | B_CALL;
+		nbp->b_flags = obp->b_flags | B_READ;
+		nbp->b_oflags = obp->b_oflags;
+		nbp->b_cflags = obp->b_cflags;
 		nbp->b_iodone = cdbounce;
 
 		/* store bounce state in b_private and use new buf */
diff --git a/sys/dev/scsipi/scsipi_ioctl.c b/sys/dev/scsipi/scsipi_ioctl.c
index cdeccae4aab8..a10be17d2f39 100644
--- a/sys/dev/scsipi/scsipi_ioctl.c
+++ b/sys/dev/scsipi/scsipi_ioctl.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: scsipi_ioctl.c,v 1.63 2007/07/29 12:50:23 ad Exp $	*/
+/*	$NetBSD: scsipi_ioctl.c,v 1.64 2008/01/02 11:48:39 ad Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2004 The NetBSD Foundation, Inc.
@@ -44,7 +44,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: scsipi_ioctl.c,v 1.63 2007/07/29 12:50:23 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: scsipi_ioctl.c,v 1.64 2008/01/02 11:48:39 ad Exp $");
 
 #include "opt_compat_freebsd.h"
 #include "opt_compat_netbsd.h"
@@ -85,7 +85,7 @@ si_get(void)
 	int s;
 
 	si = malloc(sizeof(struct scsi_ioctl), M_TEMP, M_WAITOK|M_ZERO);
-	simple_lock_init(&si->si_bp.b_interlock);
+	buf_init(&si->si_bp);
 	s = splbio();
 	LIST_INSERT_HEAD(&si_head, si, si_list);
 	splx(s);
@@ -100,6 +100,7 @@ si_free(struct scsi_ioctl *si)
 	s = splbio();
 	LIST_REMOVE(si, si_list);
 	splx(s);
+	buf_destroy(&si->si_bp);
 	free(si, M_TEMP);
 }
 
diff --git a/sys/dev/vme/xd.c b/sys/dev/vme/xd.c
index dd5cdd962ad1..dc03257323d7 100644
--- a/sys/dev/vme/xd.c
+++ b/sys/dev/vme/xd.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: xd.c,v 1.71 2007/10/19 12:01:23 ad Exp $	*/
+/*	$NetBSD: xd.c,v 1.72 2008/01/02 11:48:39 ad Exp $	*/
 
 /*
  *
@@ -51,7 +51,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xd.c,v 1.71 2007/10/19 12:01:23 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xd.c,v 1.72 2008/01/02 11:48:39 ad Exp $");
 
 #undef XDC_DEBUG		/* full debug */
 #define XDC_DIAG		/* extra sanity checks */
@@ -331,8 +331,8 @@ xddummystrat(bp)
 	if (bp->b_bcount != XDFM_BPS)
 		panic("xddummystrat");
 	bcopy(xd_labeldata, bp->b_data, XDFM_BPS);
-	bp->b_flags |= B_DONE;
-	bp->b_flags &= ~B_BUSY;
+	bp->b_oflags |= BO_DONE;
+	bp->b_cflags &= ~BC_BUSY;
 }
 
 int
diff --git a/sys/dev/vme/xy.c b/sys/dev/vme/xy.c
index 479887202daa..3087343f66ac 100644
--- a/sys/dev/vme/xy.c
+++ b/sys/dev/vme/xy.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: xy.c,v 1.74 2007/10/19 12:01:23 ad Exp $	*/
+/*	$NetBSD: xy.c,v 1.75 2008/01/02 11:48:39 ad Exp $	*/
 
 /*
  *
@@ -51,7 +51,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xy.c,v 1.74 2007/10/19 12:01:23 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xy.c,v 1.75 2008/01/02 11:48:39 ad Exp $");
 
 #undef XYC_DEBUG		/* full debug */
 #undef XYC_DIAG			/* extra sanity checks */
@@ -247,8 +247,8 @@ xydummystrat(bp)
 	if (bp->b_bcount != XYFM_BPS)
 		panic("xydummystrat");
 	bcopy(xy_labeldata, bp->b_data, XYFM_BPS);
-	bp->b_flags |= B_DONE;
-	bp->b_flags &= ~B_BUSY;
+	bp->b_oflags |= BO_DONE;
+	bp->b_cflags &= ~BC_BUSY;
 }
 
 int
diff --git a/sys/dev/vnd.c b/sys/dev/vnd.c
index 4b110c45eccd..e75ba3bc607f 100644
--- a/sys/dev/vnd.c
+++ b/sys/dev/vnd.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: vnd.c,v 1.174 2007/12/18 23:22:18 riz Exp $	*/
+/*	$NetBSD: vnd.c,v 1.175 2008/01/02 11:48:36 ad Exp $	*/
 
 /*-
  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
@@ -137,7 +137,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.174 2007/12/18 23:22:18 riz Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.175 2008/01/02 11:48:36 ad Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "fs_nfs.h"
@@ -620,11 +620,14 @@ vndthread(void *arg)
 		disk_busy(&vnd->sc_dkdev);
 
 		bp = &vnx->vx_buf;
-		BUF_INIT(bp);
-		bp->b_flags = (obp->b_flags & B_READ) | B_CALL;
+		buf_init(bp);
+		bp->b_flags = (obp->b_flags & B_READ);
+		bp->b_oflags = obp->b_oflags;
+		bp->b_cflags = obp->b_cflags;
 		bp->b_iodone = vndiodone;
 		bp->b_private = obp;
 		bp->b_vp = vnd->sc_vp;
+		bp->b_objlock = &bp->b_vp->v_interlock;
 		bp->b_data = obp->b_data;
 		bp->b_bcount = obp->b_bcount;
 		BIO_COPYPRIO(bp, obp);
@@ -708,8 +711,11 @@ handle_with_rdwr(struct vnd_softc *vnd, const struct buf *obp, struct buf *bp)
 
 	/* We need to increase the number of outputs on the vnode if
 	 * there was any write to it. */
-	if (!doread)
-		V_INCR_NUMOUTPUT(vp);
+	if (!doread) {
+		mutex_enter(&vp->v_interlock);
+		vp->v_numoutput++;
+		mutex_exit(&vp->v_interlock);
+	}
 
 	biodone(bp);
 }
@@ -727,15 +733,15 @@ handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp,
 	int bsize, error, flags, skipped;
 	size_t resid, sz;
 	off_t bn, offset;
+	struct vnode *vp;
 
 	flags = obp->b_flags;
 
 	if (!(flags & B_READ)) {
-		int s;
-		
-		s = splbio();
-		V_INCR_NUMOUTPUT(bp->b_vp);
-		splx(s);
+		vp = bp->b_vp;
+		mutex_enter(&vp->v_interlock);
+		vp->v_numoutput++;
+		mutex_exit(&vp->v_interlock);
 	}
 
 	/* convert to a byte offset within the file. */
@@ -756,7 +762,6 @@ handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp,
 	for (offset = 0, resid = bp->b_resid; resid;
 	    resid -= sz, offset += sz) {
 		struct buf *nbp;
-		struct vnode *vp;
 		daddr_t nbn;
 		int off, nra;
 
@@ -792,11 +797,11 @@ handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp,
 #ifdef	DEBUG
 		if (vnddebug & VDB_IO)
 			printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64
-			       " sz 0x%zx\n",
-			    vnd->sc_vp, vp, (long long)bn, nbn, sz);
+			    " sz 0x%zx\n", vnd->sc_vp, vp, (long long)bn,
+			    nbn, sz);
 #endif
 
-		nbp = getiobuf();
+		nbp = getiobuf(vp, true);
 		nestiobuf_setup(bp, nbp, offset, sz);
 		nbp->b_blkno = nbn + btodb(off);
 
diff --git a/sys/fs/adosfs/adutil.c b/sys/fs/adosfs/adutil.c
index 5e101c160010..d482c1c96424 100644
--- a/sys/fs/adosfs/adutil.c
+++ b/sys/fs/adosfs/adutil.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: adutil.c,v 1.5 2007/10/10 20:42:22 ad Exp $	*/
+/*	$NetBSD: adutil.c,v 1.6 2008/01/02 11:48:39 ad Exp $	*/
 
 /*
  * Copyright (c) 1994 Christian E. Hopps
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: adutil.c,v 1.5 2007/10/10 20:42:22 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: adutil.c,v 1.6 2008/01/02 11:48:39 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/vnode.h>
@@ -69,7 +69,7 @@ start_over:
 	for (ap = hp->lh_first; ap != NULL; ap = ap->link.le_next) {
 		if (ap->block == an) {
 			vp = ATOV(ap);
-			simple_lock(&vp->v_interlock);
+			mutex_enter(&vp->v_interlock);
 			simple_unlock(&adosfs_hashlock);
 			if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK))
 				goto start_over;
diff --git a/sys/fs/adosfs/advnops.c b/sys/fs/adosfs/advnops.c
index 1a4fd2e58381..27898c6e94e0 100644
--- a/sys/fs/adosfs/advnops.c
+++ b/sys/fs/adosfs/advnops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: advnops.c,v 1.26 2007/11/26 19:01:41 pooka Exp $	*/
+/*	$NetBSD: advnops.c,v 1.27 2008/01/02 11:48:40 ad Exp $	*/
 
 /*
  * Copyright (c) 1994 Christian E. Hopps
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: advnops.c,v 1.26 2007/11/26 19:01:41 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: advnops.c,v 1.27 2008/01/02 11:48:40 ad Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_quota.h"
@@ -847,15 +847,15 @@ adosfs_inactive(v)
 {
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
+		bool *a_recycle;
 	} */ *sp = v;
 	struct vnode *vp = sp->a_vp;
-	struct lwp *l = curlwp;
 #ifdef ADOSFS_DIAGNOSTIC
 	advopprint(sp);
 #endif
 	VOP_UNLOCK(vp, 0);
 	/* XXX this needs to check if file was deleted */
-	vrecycle(vp, NULL, l);
+	*sp->a_recycle = true;
 
 #ifdef ADOSFS_DIAGNOSTIC
 	printf(" 0)");
diff --git a/sys/fs/cd9660/cd9660_node.c b/sys/fs/cd9660/cd9660_node.c
index ed431af4bd61..f8c40e9b5e0c 100644
--- a/sys/fs/cd9660/cd9660_node.c
+++ b/sys/fs/cd9660/cd9660_node.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: cd9660_node.c,v 1.19 2007/12/08 14:41:11 ad Exp $	*/
+/*	$NetBSD: cd9660_node.c,v 1.20 2008/01/02 11:48:40 ad Exp $	*/
 
 /*-
  * Copyright (c) 1982, 1986, 1989, 1994
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cd9660_node.c,v 1.19 2007/12/08 14:41:11 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cd9660_node.c,v 1.20 2008/01/02 11:48:40 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -154,7 +154,7 @@ loop:
 			if (flags == 0) {
 				mutex_exit(&cd9660_ihash_lock);
 			} else {
-				simple_lock(&vp->v_interlock);
+				mutex_enter(&vp->v_interlock);
 				mutex_exit(&cd9660_ihash_lock);
 				if (vget(vp, flags | LK_INTERLOCK))
 					goto loop;
@@ -209,6 +209,7 @@ cd9660_inactive(v)
 {
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
+		bool *a_recycle;
 	} */ *ap = v;
 	struct vnode *vp = ap->a_vp;
 	struct iso_node *ip = VTOI(vp);
@@ -217,14 +218,13 @@ cd9660_inactive(v)
 	if (prtactive && vp->v_usecount != 0)
 		vprint("cd9660_inactive: pushing active", vp);
 
-	ip->i_flag = 0;
-	VOP_UNLOCK(vp, 0);
 	/*
 	 * If we are done with the inode, reclaim it
 	 * so that it can be reused immediately.
 	 */
-	if (ip->inode.iso_mode == 0)
-		vrecycle(vp, (struct simplelock *)0, curlwp);
+	ip->i_flag = 0;
+	*ap->a_recycle = (ip->inode.iso_mode == 0);
+	VOP_UNLOCK(vp, 0);
 	return error;
 }
 
diff --git a/sys/fs/cd9660/cd9660_vfsops.c b/sys/fs/cd9660/cd9660_vfsops.c
index 604764ef0df0..4eed0d8e9171 100644
--- a/sys/fs/cd9660/cd9660_vfsops.c
+++ b/sys/fs/cd9660/cd9660_vfsops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: cd9660_vfsops.c,v 1.52 2007/12/08 19:29:42 pooka Exp $	*/
+/*	$NetBSD: cd9660_vfsops.c,v 1.53 2008/01/02 11:48:40 ad Exp $	*/
 
 /*-
  * Copyright (c) 1994
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cd9660_vfsops.c,v 1.52 2007/12/08 19:29:42 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cd9660_vfsops.c,v 1.53 2008/01/02 11:48:40 ad Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_compat_netbsd.h"
@@ -420,6 +420,7 @@ iso_mountfs(devvp, mp, l, argp)
 	mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
 	mp->mnt_stat.f_namemax = MAXNAMLEN;
 	mp->mnt_flag |= MNT_LOCAL;
+	mp->mnt_iflag |= IMNT_MPSAFE;
 	mp->mnt_dev_bshift = iso_bsize;
 	mp->mnt_fs_bshift = isomp->im_bshift;
 	isomp->im_mountp = mp;
@@ -871,7 +872,6 @@ cd9660_vget_internal(mp, ino, vpp, relocated, isodir)
 			vp->v_data = NULL;
 			VOP_UNLOCK(vp, 0);
 			vp->v_op = spec_vnodeop_p;
-			vrele(vp);
 			vgone(vp);
 			lockmgr(&nvp->v_lock, LK_EXCLUSIVE, &nvp->v_interlock);
 			/*
diff --git a/sys/fs/efs/efs_ihash.c b/sys/fs/efs/efs_ihash.c
index bac0090e3a16..edd0f2de3e7a 100644
--- a/sys/fs/efs/efs_ihash.c
+++ b/sys/fs/efs/efs_ihash.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: efs_ihash.c,v 1.1 2007/06/29 23:30:28 rumble Exp $	*/
+/*	$NetBSD: efs_ihash.c,v 1.2 2008/01/02 11:48:40 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
@@ -36,7 +36,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: efs_ihash.c,v 1.1 2007/06/29 23:30:28 rumble Exp $");
+__KERNEL_RCSID(0, "$NetBSD: efs_ihash.c,v 1.2 2008/01/02 11:48:40 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -146,7 +146,7 @@ efs_ihashget(dev_t dev, ino_t inum, int flags)
 			if (flags == 0) {
 				mutex_exit(&efs_ihash_lock);
 			} else {
-				simple_lock(&vp->v_interlock);
+				mutex_enter(&vp->v_interlock);
 				mutex_exit(&efs_ihash_lock);
 				if (vget(vp, flags | LK_INTERLOCK))
 					goto loop;
diff --git a/sys/fs/efs/efs_vnops.c b/sys/fs/efs/efs_vnops.c
index 407375279cb6..64d5cb0f9ede 100644
--- a/sys/fs/efs/efs_vnops.c
+++ b/sys/fs/efs/efs_vnops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: efs_vnops.c,v 1.12 2007/11/26 19:01:43 pooka Exp $	*/
+/*	$NetBSD: efs_vnops.c,v 1.13 2008/01/02 11:48:40 ad Exp $	*/
 
 /*
  * Copyright (c) 2006 Stephen M. Rumble <rumble@ephemeral.org>
@@ -17,7 +17,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: efs_vnops.c,v 1.12 2007/11/26 19:01:43 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: efs_vnops.c,v 1.13 2008/01/02 11:48:40 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -559,14 +559,13 @@ efs_inactive(void *v)
 	struct vop_inactive_args /* {
 		const struct vnodeop_desc *a_desc;
 		struct vnode *a_vp;
+		bool *a_recycle
 	} */ *ap = v;
 	struct efs_inode *eip = EFS_VTOI(ap->a_vp);
 
+	*ap->a_recycle = (eip->ei_mode == 0);
 	VOP_UNLOCK(ap->a_vp, 0);
 
-	if (eip->ei_mode == 0)
-		vrecycle(ap->a_vp, NULL, curlwp);
-
 	return (0);
 }
 
diff --git a/sys/fs/filecorefs/filecore_node.c b/sys/fs/filecorefs/filecore_node.c
index 228818301796..24a52126f44a 100644
--- a/sys/fs/filecorefs/filecore_node.c
+++ b/sys/fs/filecorefs/filecore_node.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: filecore_node.c,v 1.12 2007/11/26 19:01:44 pooka Exp $	*/
+/*	$NetBSD: filecore_node.c,v 1.13 2008/01/02 11:48:41 ad Exp $	*/
 
 /*-
  * Copyright (c) 1982, 1986, 1989, 1994
@@ -67,7 +67,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: filecore_node.c,v 1.12 2007/11/26 19:01:44 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: filecore_node.c,v 1.13 2008/01/02 11:48:41 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -174,7 +174,7 @@ loop:
 	LIST_FOREACH(ip, &filecorehashtbl[INOHASH(dev, inum)], i_hash) {
 		if (inum == ip->i_number && dev == ip->i_dev) {
 			vp = ITOV(ip);
-			simple_lock(&vp->v_interlock);
+			mutex_enter(&vp->v_interlock);
 			simple_unlock(&filecore_ihash_slock);
 			if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK))
 				goto loop;
@@ -226,7 +226,7 @@ filecore_inactive(v)
 {
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
-		struct lwp *a_l;
+		bool *a_recycle;
 	} */ *ap = v;
 	struct vnode *vp = ap->a_vp;
 	struct filecore_node *ip = VTOI(vp);
@@ -235,14 +235,13 @@ filecore_inactive(v)
 	if (prtactive && vp->v_usecount != 0)
 		vprint("filecore_inactive: pushing active", vp);
 
-	ip->i_flag = 0;
-	VOP_UNLOCK(vp, 0);
 	/*
 	 * If we are done with the inode, reclaim it
 	 * so that it can be reused immediately.
 	 */
-	if (filecore_staleinode(ip))
-		vrecycle(vp, (struct simplelock *)0, curlwp);
+	ip->i_flag = 0;
+	*ap->a_recycle = (filecore_staleinode(ip) != 0);
+	VOP_UNLOCK(vp, 0);
 	return error;
 }
 
diff --git a/sys/fs/hfs/hfs_nhash.c b/sys/fs/hfs/hfs_nhash.c
index 8ebf878852ff..4ff1e52cd61d 100644
--- a/sys/fs/hfs/hfs_nhash.c
+++ b/sys/fs/hfs/hfs_nhash.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: hfs_nhash.c,v 1.3 2007/12/11 12:04:23 lukem Exp $	*/
+/*	$NetBSD: hfs_nhash.c,v 1.4 2008/01/02 11:48:41 ad Exp $	*/
 
 /*-
  * Copyright (c) 2005, 2007 The NetBSD Foundation, Inc.
@@ -59,7 +59,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: hfs_nhash.c,v 1.3 2007/12/11 12:04:23 lukem Exp $");
+__KERNEL_RCSID(0, "$NetBSD: hfs_nhash.c,v 1.4 2008/01/02 11:48:41 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -127,7 +127,7 @@ loop:
 	LIST_FOREACH(hp, hpp, h_hash) {
 		if (cnid == hp->h_rec.cnid && dev == hp->h_dev) {
 			vp = HTOV(hp);
-			simple_lock(&vp->v_interlock);
+			mutex_enter(&vp->v_interlock);
 			simple_unlock(&hfs_nhash_slock);
 			if (vget(vp, flags | LK_INTERLOCK))
 				goto loop;
diff --git a/sys/fs/hfs/hfs_subr.c b/sys/fs/hfs/hfs_subr.c
index 2929b31854d7..5d11b289467c 100644
--- a/sys/fs/hfs/hfs_subr.c
+++ b/sys/fs/hfs/hfs_subr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: hfs_subr.c,v 1.6 2007/11/26 19:01:45 pooka Exp $	*/
+/*	$NetBSD: hfs_subr.c,v 1.7 2008/01/02 11:48:41 ad Exp $	*/
 
 /*-
  * Copyright (c) 2005, 2007 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */                                     
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: hfs_subr.c,v 1.6 2007/11/26 19:01:45 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: hfs_subr.c,v 1.7 2008/01/02 11:48:41 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -83,7 +83,6 @@ hfs_vinit(struct mount *mp, int (**specops)(void *), int (**fifoops)(void *),
 			    vp->v_vflag &= ~VV_LOCKSWORK;
 			    VOP_UNLOCK(vp, 0);
 			    vp->v_op = specops;
-			    vrele(vp);
 			    vgone(vp);
 			    lockmgr(&nvp->v_lock, LK_EXCLUSIVE,
 				    &nvp->v_interlock);
diff --git a/sys/fs/msdosfs/msdosfs_denode.c b/sys/fs/msdosfs/msdosfs_denode.c
index bcaea49a9988..46b52412f54f 100644
--- a/sys/fs/msdosfs/msdosfs_denode.c
+++ b/sys/fs/msdosfs/msdosfs_denode.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: msdosfs_denode.c,v 1.29 2007/12/28 17:46:48 reinoud Exp $	*/
+/*	$NetBSD: msdosfs_denode.c,v 1.30 2008/01/02 11:48:41 ad Exp $	*/
 
 /*-
  * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
@@ -48,7 +48,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: msdosfs_denode.c,v 1.29 2007/12/28 17:46:48 reinoud Exp $");
+__KERNEL_RCSID(0, "$NetBSD: msdosfs_denode.c,v 1.30 2008/01/02 11:48:41 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -176,7 +176,7 @@ loop:
 			if (flags == 0) {
 				mutex_exit(&msdosfs_ihash_lock);
 			} else {
-				simple_lock(&vp->v_interlock);
+				mutex_enter(&vp->v_interlock);
 				mutex_exit(&msdosfs_ihash_lock);
 				if (vget(vp, flags | LK_INTERLOCK))
 					goto loop;
@@ -668,6 +668,7 @@ msdosfs_inactive(v)
 {
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
+		bool *a_recycle;
 	} */ *ap = v;
 	struct vnode *vp = ap->a_vp;
 	struct denode *dep = VTODE(vp);
@@ -704,7 +705,6 @@ msdosfs_inactive(v)
 	}
 	deupdat(dep, 0);
 out:
-	VOP_UNLOCK(vp, 0);
 	/*
 	 * If we are done with the denode, reclaim it
 	 * so that it can be reused immediately.
@@ -713,8 +713,8 @@ out:
 	printf("msdosfs_inactive(): v_usecount %d, de_Name[0] %x\n",
 		vp->v_usecount, dep->de_Name[0]);
 #endif
-	if (dep->de_Name[0] == SLOT_DELETED)
-		vrecycle(vp, (struct simplelock *)0, curlwp);
+	*ap->a_recycle = (dep->de_Name[0] == SLOT_DELETED);
+	VOP_UNLOCK(vp, 0);
 	return (error);
 }
 
diff --git a/sys/fs/msdosfs/msdosfs_vfsops.c b/sys/fs/msdosfs/msdosfs_vfsops.c
index b428b3e33a3f..7e162241d9fa 100644
--- a/sys/fs/msdosfs/msdosfs_vfsops.c
+++ b/sys/fs/msdosfs/msdosfs_vfsops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: msdosfs_vfsops.c,v 1.55 2007/12/08 19:29:43 pooka Exp $	*/
+/*	$NetBSD: msdosfs_vfsops.c,v 1.56 2008/01/02 11:48:41 ad Exp $	*/
 
 /*-
  * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
@@ -48,7 +48,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: msdosfs_vfsops.c,v 1.55 2007/12/08 19:29:43 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: msdosfs_vfsops.c,v 1.56 2008/01/02 11:48:41 ad Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_quota.h"
@@ -937,7 +937,7 @@ msdosfs_sync(mp, waitfor, cred)
 	int waitfor;
 	kauth_cred_t cred;
 {
-	struct vnode *vp, *nvp;
+	struct vnode *vp, *mvp;
 	struct denode *dep;
 	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
 	int error, allerror = 0;
@@ -953,44 +953,47 @@ msdosfs_sync(mp, waitfor, cred)
 			/* update fats here */
 		}
 	}
+	/* Allocate a marker vnode. */
+	if ((mvp = valloc(mp)) == NULL)
+		return ENOMEM;
 	/*
 	 * Write back each (modified) denode.
 	 */
-	simple_lock(&mntvnode_slock);
+	mutex_enter(&mntvnode_lock);
 loop:
-	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
-		/*
-		 * If the vnode that we are about to sync is no longer
-		 * associated with this mount point, start over.
-		 */
-		if (vp->v_mount != mp)
-			goto loop;
-		simple_lock(&vp->v_interlock);
-		nvp = TAILQ_NEXT(vp, v_mntvnodes);
+	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
+		vmark(mvp, vp);
+		if (vp->v_mount != mp || vismarker(vp))
+			continue;
+		mutex_enter(&vp->v_interlock);
 		dep = VTODE(vp);
 		if (waitfor == MNT_LAZY || vp->v_type == VNON ||
 		    (((dep->de_flag &
 		    (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0) &&
 		     (LIST_EMPTY(&vp->v_dirtyblkhd) &&
 		      UVM_OBJ_IS_CLEAN(&vp->v_uobj)))) {
-			simple_unlock(&vp->v_interlock);
+			mutex_exit(&vp->v_interlock);
 			continue;
 		}
-		simple_unlock(&mntvnode_slock);
+		mutex_exit(&mntvnode_lock);
 		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK);
 		if (error) {
-			simple_lock(&mntvnode_slock);
-			if (error == ENOENT)
+			mutex_enter(&mntvnode_lock);
+			if (error == ENOENT) {
+				(void)vunmark(mvp);
 				goto loop;
+			}
 			continue;
 		}
 		if ((error = VOP_FSYNC(vp, cred,
 		    waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0)) != 0)
 			allerror = error;
 		vput(vp);
-		simple_lock(&mntvnode_slock);
+		mutex_enter(&mntvnode_lock);
 	}
-	simple_unlock(&mntvnode_slock);
+	mutex_exit(&mntvnode_lock);
+	vfree(mvp);
+
 	/*
 	 * Force stale file system control information to be flushed.
 	 */
diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c
index 7e44440c72b1..0e473287384a 100644
--- a/sys/fs/msdosfs/msdosfs_vnops.c
+++ b/sys/fs/msdosfs/msdosfs_vnops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: msdosfs_vnops.c,v 1.45 2007/12/28 17:46:48 reinoud Exp $	*/
+/*	$NetBSD: msdosfs_vnops.c,v 1.46 2008/01/02 11:48:42 ad Exp $	*/
 
 /*-
  * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
@@ -48,7 +48,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: msdosfs_vnops.c,v 1.45 2007/12/28 17:46:48 reinoud Exp $");
+__KERNEL_RCSID(0, "$NetBSD: msdosfs_vnops.c,v 1.46 2008/01/02 11:48:42 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -213,10 +213,10 @@ msdosfs_close(v)
 	struct vnode *vp = ap->a_vp;
 	struct denode *dep = VTODE(vp);
 
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	if (vp->v_usecount > 1)
 		DETIMES(dep, NULL, NULL, NULL, dep->de_pmp->pm_gmtoff);
-	simple_unlock(&vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
 	return (0);
 }
 
@@ -659,7 +659,7 @@ msdosfs_write(v)
 		 */
 
 		if (!async && oldoff >> 16 != uio->uio_offset >> 16) {
-			simple_lock(&vp->v_interlock);
+			mutex_enter(&vp->v_interlock);
 			error = VOP_PUTPAGES(vp, (oldoff >> 16) << 16,
 			    (uio->uio_offset >> 16) << 16, PGO_CLEANIT);
 		}
@@ -668,7 +668,7 @@ msdosfs_write(v)
 	/* set final size */
 	uvm_vnp_setsize(vp, dep->de_FileSize);
 	if (error == 0 && ioflag & IO_SYNC) {
-		simple_lock(&vp->v_interlock);
+		mutex_enter(&vp->v_interlock);
 		error = VOP_PUTPAGES(vp, trunc_page(oldoff),
 		    round_page(oldoff + bytelen), PGO_CLEANIT | PGO_SYNCIO);
 	}
diff --git a/sys/fs/ntfs/ntfs_inode.h b/sys/fs/ntfs/ntfs_inode.h
index 2deafe3b03be..6650354fc37a 100644
--- a/sys/fs/ntfs/ntfs_inode.h
+++ b/sys/fs/ntfs/ntfs_inode.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: ntfs_inode.h,v 1.4 2007/03/04 06:03:00 christos Exp $	*/
+/*	$NetBSD: ntfs_inode.h,v 1.5 2008/01/02 11:48:42 ad Exp $	*/
 
 /*-
  * Copyright (c) 1998, 1999 Semen Ustimenko
@@ -73,7 +73,7 @@ struct ntnode {
 
 	/* locking */
 	struct lock	i_lock;
-	struct simplelock i_interlock;
+	kmutex_t	i_interlock;
 	int		i_usecount;
 
 	LIST_HEAD(,fnode)	i_fnlist;
diff --git a/sys/fs/ntfs/ntfs_subr.c b/sys/fs/ntfs/ntfs_subr.c
index 7a0927c3d674..b0e2c31a18d6 100644
--- a/sys/fs/ntfs/ntfs_subr.c
+++ b/sys/fs/ntfs/ntfs_subr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ntfs_subr.c,v 1.33 2007/10/10 20:42:24 ad Exp $	*/
+/*	$NetBSD: ntfs_subr.c,v 1.34 2008/01/02 11:48:42 ad Exp $	*/
 
 /*-
  * Copyright (c) 1998, 1999 Semen Ustimenko (semenu@FreeBSD.org)
@@ -29,7 +29,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ntfs_subr.c,v 1.33 2007/10/10 20:42:24 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ntfs_subr.c,v 1.34 2008/01/02 11:48:42 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -385,7 +385,7 @@ ntfs_ntget(ip)
 	dprintf(("ntfs_ntget: get ntnode %llu: %p, usecount: %d\n",
 	    (unsigned long long)ip->i_number, ip, ip->i_usecount));
 
-	simple_lock(&ip->i_interlock);
+	mutex_enter(&ip->i_interlock);
 	ip->i_usecount++;
 	lockmgr(&ip->i_lock, LK_EXCLUSIVE | LK_INTERLOCK, &ip->i_interlock);
 
@@ -445,7 +445,7 @@ ntfs_ntlookup(
 
 	/* init lock and lock the newborn ntnode */
 	lockinit(&ip->i_lock, PINOD, "ntnode", 0, LK_EXCLUSIVE);
-	simple_lock_init(&ip->i_interlock);
+	mutex_init(&ip->i_interlock, MUTEX_DEFAULT, IPL_NONE);
 	ntfs_ntget(ip);
 
 	ntfs_nthashins(ip);
@@ -475,7 +475,7 @@ ntfs_ntput(ip)
 	dprintf(("ntfs_ntput: rele ntnode %llu: %p, usecount: %d\n",
 	    (unsigned long long)ip->i_number, ip, ip->i_usecount));
 
-	simple_lock(&ip->i_interlock);
+	mutex_enter(&ip->i_interlock);
 	ip->i_usecount--;
 
 #ifdef DIAGNOSTIC
@@ -501,6 +501,8 @@ ntfs_ntput(ip)
 			LIST_REMOVE(vap,va_list);
 			ntfs_freentvattr(vap);
 		}
+		mutex_destroy(&ip->i_interlock);
+		lockdestroy(&ip->i_lock);
 		FREE(ip, M_NTFSNTNODE);
 	}
 }
@@ -512,9 +514,9 @@ void
 ntfs_ntref(ip)
 	struct ntnode *ip;
 {
-	simple_lock(&ip->i_interlock);
+	mutex_enter(&ip->i_interlock);
 	ip->i_usecount++;
-	simple_unlock(&ip->i_interlock);
+	mutex_exit(&ip->i_interlock);
 
 	dprintf(("ntfs_ntref: ino %llu, usecount: %d\n",
 	    (unsigned long long)ip->i_number, ip->i_usecount));
@@ -531,13 +533,13 @@ ntfs_ntrele(ip)
 	dprintf(("ntfs_ntrele: rele ntnode %llu: %p, usecount: %d\n",
 	    (unsigned long long)ip->i_number, ip, ip->i_usecount));
 
-	simple_lock(&ip->i_interlock);
+	mutex_enter(&ip->i_interlock);
 	ip->i_usecount--;
 
 	if (ip->i_usecount < 0)
 		panic("ntfs_ntrele: ino: %llu usecount: %d ",
 		    (unsigned long long)ip->i_number, ip->i_usecount);
-	simple_unlock(&ip->i_interlock);
+	mutex_exit(&ip->i_interlock);
 }
 
 /*
diff --git a/sys/fs/ptyfs/ptyfs_subr.c b/sys/fs/ptyfs/ptyfs_subr.c
index ecf862610c00..ace55860ec6e 100644
--- a/sys/fs/ptyfs/ptyfs_subr.c
+++ b/sys/fs/ptyfs/ptyfs_subr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ptyfs_subr.c,v 1.11 2007/12/08 19:29:44 pooka Exp $	*/
+/*	$NetBSD: ptyfs_subr.c,v 1.12 2008/01/02 11:48:43 ad Exp $	*/
 
 /*
  * Copyright (c) 1993
@@ -73,7 +73,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ptyfs_subr.c,v 1.11 2007/12/08 19:29:44 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ptyfs_subr.c,v 1.12 2008/01/02 11:48:43 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -239,7 +239,6 @@ ptyfs_allocvp(struct mount *mp, struct vnode **vpp, ptyfstype type, int pty,
 			vp->v_vflag &= ~VV_LOCKSWORK;
 			VOP_UNLOCK(vp, 0);
 			vp->v_op = spec_vnodeop_p;
-			vrele(vp);
 			vgone(vp);
 			lockmgr(&nvp->v_lock, LK_EXCLUSIVE, &nvp->v_interlock);
 			/*
@@ -374,7 +373,7 @@ loop:
 		vp = PTYFSTOV(pp);
 		if (pty == pp->ptyfs_pty && pp->ptyfs_type == type &&
 		    vp->v_mount == mp) {
-			simple_lock(&vp->v_interlock);
+			mutex_enter(&vp->v_interlock);
 			mutex_exit(&ptyfs_used_slock);
 			if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK))
 				goto loop;
diff --git a/sys/fs/ptyfs/ptyfs_vnops.c b/sys/fs/ptyfs/ptyfs_vnops.c
index 8d8f5a0ecb23..d700fcf3f3e0 100644
--- a/sys/fs/ptyfs/ptyfs_vnops.c
+++ b/sys/fs/ptyfs/ptyfs_vnops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ptyfs_vnops.c,v 1.26 2007/11/26 19:01:49 pooka Exp $	*/
+/*	$NetBSD: ptyfs_vnops.c,v 1.27 2008/01/02 11:48:43 ad Exp $	*/
 
 /*
  * Copyright (c) 1993, 1995
@@ -76,7 +76,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ptyfs_vnops.c,v 1.26 2007/11/26 19:01:49 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ptyfs_vnops.c,v 1.27 2008/01/02 11:48:43 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -745,10 +745,10 @@ ptyfs_close(void *v)
 	struct vnode *vp = ap->a_vp;
 	struct ptyfsnode *ptyfs = VTOPTYFS(vp);
 
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	if (vp->v_usecount > 1)
 		PTYFS_ITIMES(ptyfs, NULL, NULL, NULL);
-	simple_unlock(&vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
 
 	switch (ptyfs->ptyfs_type) {
 	case PTYFSpts:
diff --git a/sys/fs/puffs/puffs_msgif.c b/sys/fs/puffs/puffs_msgif.c
index 6035ad78e5b9..b18eea97c17c 100644
--- a/sys/fs/puffs/puffs_msgif.c
+++ b/sys/fs/puffs/puffs_msgif.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: puffs_msgif.c,v 1.61 2007/12/05 12:11:56 pooka Exp $	*/
+/*	$NetBSD: puffs_msgif.c,v 1.62 2008/01/02 11:48:43 ad Exp $	*/
 
 /*
  * Copyright (c) 2005, 2006, 2007  Antti Kantee.  All Rights Reserved.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: puffs_msgif.c,v 1.61 2007/12/05 12:11:56 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: puffs_msgif.c,v 1.62 2008/01/02 11:48:43 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/fstrans.h>
@@ -927,7 +927,7 @@ puffsop_flush(struct puffs_mount *pmp, struct puffs_flush *pf)
 			break;
 		}
 
-		simple_lock(&vp->v_uobj.vmobjlock);
+		mutex_enter(&vp->v_uobj.vmobjlock);
 		rv = VOP_PUTPAGES(vp, offlo, offhi, flags);
 		break;
 
@@ -1032,18 +1032,18 @@ puffs_msgif_close(void *this)
 	 * wait for syncer_mutex.  Otherwise the mointpoint can be
 	 * wiped out while we wait.
 	 */
-	simple_lock(&mp->mnt_slock);
+	mutex_enter(&mp->mnt_mutex);
 	mp->mnt_wcnt++;
-	simple_unlock(&mp->mnt_slock);
+	mutex_exit(&mp->mnt_mutex);
 
 	mutex_enter(&syncer_mutex);
 
-	simple_lock(&mp->mnt_slock);
+	mutex_enter(&mp->mnt_mutex);
 	mp->mnt_wcnt--;
 	if (mp->mnt_wcnt == 0)
 		wakeup(&mp->mnt_wcnt);
 	gone = mp->mnt_iflag & IMNT_GONE;
-	simple_unlock(&mp->mnt_slock);
+	mutex_exit(&mp->mnt_mutex);
 	if (gone) {
 		mutex_exit(&syncer_mutex);
 		return 0;
diff --git a/sys/fs/puffs/puffs_node.c b/sys/fs/puffs/puffs_node.c
index f338f36599af..bf71c3a3f0ed 100644
--- a/sys/fs/puffs/puffs_node.c
+++ b/sys/fs/puffs/puffs_node.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: puffs_node.c,v 1.8 2007/11/17 21:55:29 pooka Exp $	*/
+/*	$NetBSD: puffs_node.c,v 1.9 2008/01/02 11:48:43 ad Exp $	*/
 
 /*
  * Copyright (c) 2005, 2006, 2007  Antti Kantee.  All Rights Reserved.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: puffs_node.c,v 1.8 2007/11/17 21:55:29 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: puffs_node.c,v 1.9 2008/01/02 11:48:43 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/hash.h>
@@ -136,10 +136,10 @@ puffs_getvnode(struct mount *mp, void *cookie, enum vtype type,
 	 */
 
 	/* So mp is not dead yet.. good.. inform new vnode of its master */
-	simple_lock(&mntvnode_slock);
+	mutex_enter(&mntvnode_lock);
 	TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes);
-	simple_unlock(&mntvnode_slock);
 	vp->v_mount = mp;
+	mutex_exit(&mntvnode_lock);
 
 	/*
 	 * clerical tasks & footwork
@@ -167,7 +167,6 @@ puffs_getvnode(struct mount *mp, void *cookie, enum vtype type,
 			 */
 			vp->v_op = spec_vnodeop_p;
 			vp->v_vflag &= ~VV_LOCKSWORK;
-			vrele(vp);
 			vgone(vp); /* cya */
 
 			/* init "new" vnode */
@@ -374,7 +373,7 @@ puffs_makeroot(struct puffs_mount *pmp)
 	mutex_enter(&pmp->pmp_lock);
 	vp = pmp->pmp_root;
 	if (vp) {
-		simple_lock(&vp->v_interlock);
+		mutex_enter(&vp->v_interlock);
 		mutex_exit(&pmp->pmp_lock);
 		if (vget(vp, LK_INTERLOCK) == 0)
 			return 0;
@@ -452,7 +451,7 @@ puffs_cookie2vnode(struct puffs_mount *pmp, void *cookie, int lock,
 		return PUFFS_NOSUCHCOOKIE;
 	}
 	vp = pnode->pn_vp;
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	mutex_exit(&pmp->pmp_lock);
 
 	vgetflags = LK_INTERLOCK;
diff --git a/sys/fs/puffs/puffs_sys.h b/sys/fs/puffs/puffs_sys.h
index b8df5c12f7a7..4f0abeb5eac8 100644
--- a/sys/fs/puffs/puffs_sys.h
+++ b/sys/fs/puffs/puffs_sys.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: puffs_sys.h,v 1.67 2007/12/08 19:57:04 pooka Exp $	*/
+/*	$NetBSD: puffs_sys.h,v 1.68 2008/01/02 11:48:43 ad Exp $	*/
 
 /*
  * Copyright (c) 2005, 2006  Antti Kantee.  All Rights Reserved.
@@ -152,7 +152,6 @@ struct puffs_mount {
 
 
 #define PNODE_NOREFS	0x01	/* no backend reference			*/
-#define PNODE_DYING	0x02	/* NOREF + inactive 			*/
 #define PNODE_SUSPEND	0x04	/* issue all operations as FAF		*/
 #define PNODE_DOINACT	0x08	/* if inactive-on-demand, call inactive */
 
diff --git a/sys/fs/puffs/puffs_vfsops.c b/sys/fs/puffs/puffs_vfsops.c
index c5537330eb8f..1ecaf16a00d5 100644
--- a/sys/fs/puffs/puffs_vfsops.c
+++ b/sys/fs/puffs/puffs_vfsops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: puffs_vfsops.c,v 1.73 2007/12/30 23:04:12 pooka Exp $	*/
+/*	$NetBSD: puffs_vfsops.c,v 1.74 2008/01/02 11:48:44 ad Exp $	*/
 
 /*
  * Copyright (c) 2005, 2006  Antti Kantee.  All Rights Reserved.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: puffs_vfsops.c,v 1.73 2007/12/30 23:04:12 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: puffs_vfsops.c,v 1.74 2008/01/02 11:48:44 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/mount.h>
@@ -428,7 +428,7 @@ static int
 pageflush(struct mount *mp, kauth_cred_t cred, int waitfor, int suspending)
 {
 	struct puffs_node *pn;
-	struct vnode *vp, *nvp;
+	struct vnode *vp, *mvp;
 	int error, rv;
 
 	KASSERT(((waitfor == MNT_WAIT) && suspending) == 0);
@@ -438,29 +438,31 @@ pageflush(struct mount *mp, kauth_cred_t cred, int waitfor, int suspending)
 
 	error = 0;
 
+	/* Allocate a marker vnode. */
+	if ((mvp = valloc(mp)) == NULL)
+		return ENOMEM;
+
 	/*
 	 * Sync all cached data from regular vnodes (which are not
 	 * currently locked, see below).  After this we call VFS_SYNC
 	 * for the fs server, which should handle data and metadata for
 	 * all the nodes it knows to exist.
 	 */
-	simple_lock(&mntvnode_slock);
+	mutex_enter(&mntvnode_lock);
  loop:
-	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
-		/* check if we're on the right list */
-		if (vp->v_mount != mp)
-			goto loop;
+	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
+		vmark(mvp, vp);
+		if (vp->v_mount != mp || vismarker(vp))
+			continue;
 
-		simple_lock(&vp->v_interlock);
+		mutex_enter(&vp->v_interlock);
 		pn = VPTOPP(vp);
-		nvp = TAILQ_NEXT(vp, v_mntvnodes);
-
 		if (vp->v_type != VREG || UVM_OBJ_IS_CLEAN(&vp->v_uobj)) {
-			simple_unlock(&vp->v_interlock);
+			mutex_exit(&vp->v_interlock);
 			continue;
 		}
 
-		simple_unlock(&mntvnode_slock);
+		mutex_exit(&mntvnode_lock);
 
 		/*
 		 * Here we try to get a reference to the vnode and to
@@ -482,9 +484,11 @@ pageflush(struct mount *mp, kauth_cred_t cred, int waitfor, int suspending)
 		 */
 		rv = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK);
 		if (rv) {
-			simple_lock(&mntvnode_slock);
-			if (rv == ENOENT)
+			mutex_enter(&mntvnode_lock);
+			if (rv == ENOENT) {
+				(void)vunmark(mvp);
 				goto loop;
+			}
 			continue;
 		}
 
@@ -510,22 +514,23 @@ pageflush(struct mount *mp, kauth_cred_t cred, int waitfor, int suspending)
 		 * TODO: Maybe also hint the user server of this twist?
 		 */
 		if (suspending || waitfor == MNT_LAZY) {
-			simple_lock(&vp->v_interlock);
+			mutex_enter(&vp->v_interlock);
 			pn->pn_stat |= PNODE_SUSPEND;
-			simple_unlock(&vp->v_interlock);
+			mutex_exit(&vp->v_interlock);
 		}
 		rv = VOP_FSYNC(vp, cred, waitfor, 0, 0);
 		if (suspending || waitfor == MNT_LAZY) {
-			simple_lock(&vp->v_interlock);
+			mutex_enter(&vp->v_interlock);
 			pn->pn_stat &= ~PNODE_SUSPEND;
-			simple_unlock(&vp->v_interlock);
+			mutex_exit(&vp->v_interlock);
 		}
 		if (rv)
 			error = rv;
 		vput(vp);
-		simple_lock(&mntvnode_slock);
+		mutex_enter(&mntvnode_lock);
 	}
-	simple_unlock(&mntvnode_slock);
+	mutex_exit(&mntvnode_lock);
+	vfree(mvp);
 
 	return error;
 }
diff --git a/sys/fs/puffs/puffs_vnops.c b/sys/fs/puffs/puffs_vnops.c
index f6374bc17968..98142db08b01 100644
--- a/sys/fs/puffs/puffs_vnops.c
+++ b/sys/fs/puffs/puffs_vnops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: puffs_vnops.c,v 1.123 2007/12/30 23:04:12 pooka Exp $	*/
+/*	$NetBSD: puffs_vnops.c,v 1.124 2008/01/02 11:48:44 ad Exp $	*/
 
 /*
  * Copyright (c) 2005, 2006, 2007  Antti Kantee.  All Rights Reserved.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: puffs_vnops.c,v 1.123 2007/12/30 23:04:12 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: puffs_vnops.c,v 1.124 2008/01/02 11:48:44 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/fstrans.h>
@@ -1028,16 +1028,13 @@ puffs_vnop_inactive(void *v)
 	}
 	pnode->pn_stat &= ~PNODE_DOINACT;
 
-	VOP_UNLOCK(vp, 0);
-
 	/*
 	 * file server thinks it's gone?  then don't be afraid care,
 	 * node's life was already all it would ever be
 	 */
-	if (pnode->pn_stat & PNODE_NOREFS) {
-		pnode->pn_stat |= PNODE_DYING;
-		vrecycle(vp, NULL, curlwp);
-	}
+	*ap->a_recycle = ((pnode->pn_stat & PNODE_NOREFS) != 0);
+
+	VOP_UNLOCK(vp, 0);
 
 	return 0;
 }
@@ -1291,8 +1288,7 @@ puffs_vnop_fsync(void *v)
 	pn = VPTOPP(vp);
 
 	/* flush out information from our metacache, see vop_setattr */
-	if (pn->pn_stat & PNODE_METACACHE_MASK
-	    && (pn->pn_stat & PNODE_DYING) == 0) {
+	if (pn->pn_stat & PNODE_METACACHE_MASK) {
 		vattr_null(&va);
 		error = VOP_SETATTR(vp, &va, FSCRED); 
 		if (error)
@@ -1305,7 +1301,7 @@ puffs_vnop_fsync(void *v)
 	pflags = PGO_CLEANIT;
 	if (ap->a_flags & FSYNC_WAIT)
 		pflags |= PGO_SYNCIO;
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo),
 	    round_page(ap->a_offhi), pflags);
 	if (error)
@@ -1317,7 +1313,7 @@ puffs_vnop_fsync(void *v)
 	 * has references neither in the kernel or the fs server.
 	 * Otherwise we continue to issue fsync() forward.
 	 */
-	if (!EXISTSOP(pmp, FSYNC) || (pn->pn_stat & PNODE_DYING))
+	if (!EXISTSOP(pmp, FSYNC))
 		return 0;
 
 	dofaf = (ap->a_flags & FSYNC_WAIT) == 0 || ap->a_flags == FSYNC_LAZY;
@@ -1329,10 +1325,10 @@ puffs_vnop_fsync(void *v)
 	 * vnode to be reclaimed from the freelist for this fs.
 	 */
 	if (dofaf == 0) {
-		simple_lock(&vp->v_interlock);
+		mutex_enter(&vp->v_interlock);
 		if (vp->v_iflag & VI_XLOCK)
 			dofaf = 1;
-		simple_unlock(&vp->v_interlock);
+		mutex_exit(&vp->v_interlock);
 	}
 
 	PUFFS_MSG_ALLOC(vn, fsync);
@@ -1948,7 +1944,7 @@ puffs_vnop_write(void *v)
 			 * that gives userland too much say in the kernel.
 			 */
 			if (oldoff >> 16 != uio->uio_offset >> 16) {
-				simple_lock(&vp->v_interlock);
+				mutex_enter(&vp->v_interlock);
 				error = VOP_PUTPAGES(vp, oldoff & ~0xffff,
 				    uio->uio_offset & ~0xffff,
 				    PGO_CLEANIT | PGO_SYNCIO);
@@ -1959,14 +1955,14 @@ puffs_vnop_write(void *v)
 
 		/* synchronous I/O? */
 		if (error == 0 && ap->a_ioflag & IO_SYNC) {
-			simple_lock(&vp->v_interlock);
+			mutex_enter(&vp->v_interlock);
 			error = VOP_PUTPAGES(vp, trunc_page(origoff),
 			    round_page(uio->uio_offset),
 			    PGO_CLEANIT | PGO_SYNCIO);
 
 		/* write through page cache? */
 		} else if (error == 0 && pmp->pmp_flags & PUFFS_KFLAG_WTCACHE) {
-			simple_lock(&vp->v_interlock);
+			mutex_enter(&vp->v_interlock);
 			error = VOP_PUTPAGES(vp, trunc_page(origoff),
 			    round_page(uio->uio_offset), PGO_CLEANIT);
 		}
@@ -2147,16 +2143,6 @@ puffs_vnop_strategy(void *v)
 	    || (BUF_ISWRITE(bp) && !EXISTSOP(pmp, WRITE)))
 		ERROUT(EOPNOTSUPP);
 
-	/*
-	 * Short-circuit optimization: don't flush buffer in between
-	 * VOP_INACTIVE and VOP_RECLAIM in case the node has no references.
-	 */
-	if (pn->pn_stat & PNODE_DYING) {
-		KASSERT(BUF_ISWRITE(bp));
-		bp->b_resid = 0;
-		goto out;
-	}
-
 #ifdef DIAGNOSTIC
 	if (bp->b_bcount > pmp->pmp_msg_maxsize - PUFFS_MSGSTRUCT_MAX)
 		panic("puffs_strategy: wildly inappropriate buf bcount %d",
@@ -2170,12 +2156,12 @@ puffs_vnop_strategy(void *v)
 	 * See puffs_vfsops.c:pageflush()
 	 */
 	if (BUF_ISWRITE(bp)) {
-		simple_lock(&vp->v_interlock);
+		mutex_enter(&vp->v_interlock);
 		if (vp->v_iflag & VI_XLOCK)
 			dofaf = 1;
 		if (pn->pn_stat & PNODE_SUSPEND)
 			dofaf = 1;
-		simple_unlock(&vp->v_interlock);
+		mutex_exit(&vp->v_interlock);
 	}
 
 #ifdef DIAGNOSTIC
@@ -2239,10 +2225,10 @@ puffs_vnop_strategy(void *v)
 				DPRINTF(("puffs_strategy: write-protecting "
 				    "vp %p page %p, offset %" PRId64"\n",
 				    vp, vmp, vmp->offset));
-				simple_lock(&uobj->vmobjlock);
+				mutex_enter(&uobj->vmobjlock);
 				vmp->flags |= PG_RDONLY;
 				pmap_page_protect(vmp, VM_PROT_READ);
-				simple_unlock(&uobj->vmobjlock);
+				mutex_exit(&uobj->vmobjlock);
 			}
 		}
 
@@ -2434,13 +2420,13 @@ puffs_vnop_getpages(void *v)
 		if (locked)
 			ERROUT(EBUSY);
 
-		simple_unlock(&vp->v_interlock);
+		mutex_exit(&vp->v_interlock);
 		vattr_null(&va);
 		va.va_size = vp->v_size;
 		error = dosetattr(vp, &va, FSCRED, 0);
 		if (error)
 			ERROUT(error);
-		simple_lock(&vp->v_interlock);
+		mutex_enter(&vp->v_interlock);
 	}
 
 	if (write && PUFFS_WCACHEINFO(pmp)) {
@@ -2481,7 +2467,7 @@ puffs_vnop_getpages(void *v)
 	 * when the page is actually write-faulted to.
 	 */
 	if (!locked)
-		simple_lock(&vp->v_uobj.vmobjlock);
+		mutex_enter(&vp->v_uobj.vmobjlock);
 	for (i = 0, si = 0, streakon = 0; i < npages; i++) {
 		if (pgs[i] == NULL || pgs[i] == PGO_DONTCARE) {
 			if (streakon && write) {
@@ -2507,7 +2493,7 @@ puffs_vnop_getpages(void *v)
 		si++;
 	}
 	if (!locked)
-		simple_unlock(&vp->v_uobj.vmobjlock);
+		mutex_exit(&vp->v_uobj.vmobjlock);
 
 	KASSERT(si <= (npages / 2) + 1);
 
@@ -2559,7 +2545,7 @@ puffs_vnop_lock(void *v)
 	 */
 	if (fstrans_is_owner(mp) && fstrans_getstate(mp) == FSTRANS_SUSPENDING){
 		if (ap->a_flags & LK_INTERLOCK)
-			simple_unlock(&vp->v_interlock);
+			mutex_exit(&vp->v_interlock);
 		return 0;
 	}
 
@@ -2583,7 +2569,7 @@ puffs_vnop_unlock(void *v)
 	/* XXX: see puffs_lock() */
 	if (fstrans_is_owner(mp) && fstrans_getstate(mp) == FSTRANS_SUSPENDING){
 		if (ap->a_flags & LK_INTERLOCK)
-			simple_unlock(&vp->v_interlock);
+			mutex_exit(&vp->v_interlock);
 		return 0;
 	}
 
diff --git a/sys/fs/smbfs/smbfs_io.c b/sys/fs/smbfs/smbfs_io.c
index 378cf3416edc..c33b1e4d75ea 100644
--- a/sys/fs/smbfs/smbfs_io.c
+++ b/sys/fs/smbfs/smbfs_io.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: smbfs_io.c,v 1.28 2007/11/26 19:01:52 pooka Exp $	*/
+/*	$NetBSD: smbfs_io.c,v 1.29 2008/01/02 11:48:44 ad Exp $	*/
 
 /*
  * Copyright (c) 2000-2001, Boris Popov
@@ -36,7 +36,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: smbfs_io.c,v 1.28 2007/11/26 19:01:52 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: smbfs_io.c,v 1.29 2008/01/02 11:48:44 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -354,9 +354,9 @@ smbfs_doio(struct buf *bp, kauth_cred_t cr, struct lwp *l)
 		uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT;
 		io.iov_base = bp->b_data;
 		uiop->uio_rw = UIO_WRITE;
-		bp->b_flags |= B_BUSY;
+		bp->b_cflags |= BC_BUSY;
 		error = smb_write(smp->sm_share, np->n_fid, uiop, &scred);
-		bp->b_flags &= ~B_BUSY;
+		bp->b_cflags &= ~BC_BUSY;
 
 
 #ifndef __NetBSD__ /* XXX */
diff --git a/sys/fs/smbfs/smbfs_node.c b/sys/fs/smbfs/smbfs_node.c
index 97ca97451616..c282cec96130 100644
--- a/sys/fs/smbfs/smbfs_node.c
+++ b/sys/fs/smbfs/smbfs_node.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: smbfs_node.c,v 1.34 2007/11/30 11:23:10 pooka Exp $	*/
+/*	$NetBSD: smbfs_node.c,v 1.35 2008/01/02 11:48:45 ad Exp $	*/
 
 /*
  * Copyright (c) 2000-2001 Boris Popov
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: smbfs_node.c,v 1.34 2007/11/30 11:23:10 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: smbfs_node.c,v 1.35 2008/01/02 11:48:45 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -135,7 +135,7 @@ loop:
 		    || memcmp(name, np->n_name, nmlen) != 0)
 			continue;
 		vp = SMBTOV(np);
-		simple_lock(&(vp)->v_interlock);
+		mutex_enter(&(vp)->v_interlock);
 		smbfs_hash_unlock(smp);
 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK) != 0)
 			goto retry;
diff --git a/sys/fs/smbfs/smbfs_vfsops.c b/sys/fs/smbfs/smbfs_vfsops.c
index 03ca4e594ead..5bf566b1f63e 100644
--- a/sys/fs/smbfs/smbfs_vfsops.c
+++ b/sys/fs/smbfs/smbfs_vfsops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: smbfs_vfsops.c,v 1.73 2007/11/26 19:01:52 pooka Exp $	*/
+/*	$NetBSD: smbfs_vfsops.c,v 1.74 2008/01/02 11:48:45 ad Exp $	*/
 
 /*
  * Copyright (c) 2000-2001, Boris Popov
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: smbfs_vfsops.c,v 1.73 2007/11/26 19:01:52 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: smbfs_vfsops.c,v 1.74 2008/01/02 11:48:45 ad Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_quota.h"
@@ -394,46 +394,46 @@ smbfs_statvfs(struct mount *mp, struct statvfs *sbp)
 int
 smbfs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
 {
-	struct vnode *vp, *nvp;
+	struct vnode *vp, *mvp;
 	struct smbnode *np;
 	int error, allerror = 0;
+
+	/* Allocate a marker vnode. */
+	if ((mvp = valloc(mp)) == NULL)
+		return ENOMEM;
 	/*
 	 * Force stale buffer cache information to be flushed.
 	 */
-	simple_lock(&mntvnode_slock);
+	mutex_enter(&mntvnode_lock);
 loop:
-	/*
-	 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
-	 * and vclean() can be called indirectly
-	 */
-	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
+	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
+		vmark(mvp, vp);
 		/*
 		 * If the vnode that we are about to sync is no longer
 		 * associated with this mount point, start over.
 		 */
-		if (vp->v_mount != mp)
-			goto loop;
-		simple_lock(&vp->v_interlock);
-		nvp = TAILQ_NEXT(vp, v_mntvnodes);
-
+		if (vp->v_mount != mp || vismarker(vp))
+			continue;
+		mutex_enter(&vp->v_interlock);
 		np = VTOSMB(vp);
 		if (np == NULL) {
-			simple_unlock(&vp->v_interlock);
+			mutex_exit(&vp->v_interlock);
 			continue;
 		}
-			
 		if ((vp->v_type == VNON || (np->n_flag & NMODIFIED) == 0) &&
 		    LIST_EMPTY(&vp->v_dirtyblkhd) &&
-		     UVM_OBJ_IS_CLEAN(&vp->v_uobj)) {
-			simple_unlock(&vp->v_interlock);
+		     vp->v_uobj.uo_npages == 0) {
+			mutex_exit(&vp->v_interlock);
 			continue;
 		}
-		simple_unlock(&mntvnode_slock);
+		mutex_exit(&mntvnode_lock);
 		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK);
 		if (error) {
-			simple_lock(&mntvnode_slock);
-			if (error == ENOENT)
+			mutex_enter(&mntvnode_lock);
+			if (error == ENOENT) {
+				(void)vunmark(mvp);
 				goto loop;
+			}
 			continue;
 		}
 		error = VOP_FSYNC(vp, cred,
@@ -441,9 +441,10 @@ loop:
 		if (error)
 			allerror = error;
 		vput(vp);
-		simple_lock(&mntvnode_slock);
+		mutex_enter(&mntvnode_lock);
 	}
-	simple_unlock(&mntvnode_slock);
+	mutex_exit(&mntvnode_lock);
+	vfree(mvp);
 	return (allerror);
 }
 
diff --git a/sys/fs/sysvbfs/sysvbfs_vfsops.c b/sys/fs/sysvbfs/sysvbfs_vfsops.c
index 39fc5739a6fa..334190c6ab5d 100644
--- a/sys/fs/sysvbfs/sysvbfs_vfsops.c
+++ b/sys/fs/sysvbfs/sysvbfs_vfsops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: sysvbfs_vfsops.c,v 1.20 2007/12/15 00:39:36 perry Exp $	*/
+/*	$NetBSD: sysvbfs_vfsops.c,v 1.21 2008/01/02 11:48:46 ad Exp $	*/
 
 /*-
  * Copyright (c) 2004 The NetBSD Foundation, Inc.
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sysvbfs_vfsops.c,v 1.20 2007/12/15 00:39:36 perry Exp $");
+__KERNEL_RCSID(0, "$NetBSD: sysvbfs_vfsops.c,v 1.21 2008/01/02 11:48:46 ad Exp $");
 
 #include <sys/types.h>
 #include <sys/param.h>
@@ -307,11 +307,12 @@ sysvbfs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
 
 	DPRINTF("%s:\n", __func__);
 	error = 0;
-	simple_lock(&mntvnode_slock);
+	mutex_enter(&mntvnode_lock);
 	for (bnode = LIST_FIRST(&bmp->bnode_head); bnode != NULL;
 	    bnode = LIST_NEXT(bnode, link)) {
-		simple_unlock(&mntvnode_slock);
 		v = bnode->vnode;
+	    	mutex_enter(&v->v_interlock);
+		mutex_exit(&mntvnode_lock);
 		err = vget(v, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK);
 		if (err == 0) {
 			err = VOP_FSYNC(v, cred, FSYNC_WAIT, 0, 0);
@@ -319,9 +320,9 @@ sysvbfs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
 		}
 		if (err != 0)
 			error = err;
-		simple_lock(&mntvnode_slock);
+		mutex_enter(&mntvnode_lock);
 	}
-	simple_unlock(&mntvnode_slock);
+	mutex_exit(&mntvnode_lock);
 
 	return error;
 }
@@ -362,9 +363,9 @@ sysvbfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
 	vp->v_data = pool_get(&sysvbfs_node_pool, PR_WAITOK);
 	memset(vp->v_data, 0, sizeof(struct sysvbfs_node));
 	bnode = vp->v_data;
-	simple_lock(&mntvnode_slock);
+	mutex_enter(&mntvnode_lock);
 	LIST_INSERT_HEAD(&bmp->bnode_head, bnode, link);
-	simple_unlock(&mntvnode_slock);
+	mutex_exit(&mntvnode_lock);
 	bnode->vnode = vp;
 	bnode->bmp = bmp;
 	bnode->inode = inode;
diff --git a/sys/fs/sysvbfs/sysvbfs_vnops.c b/sys/fs/sysvbfs/sysvbfs_vnops.c
index 30b41b02d243..ec3dcae3f12f 100644
--- a/sys/fs/sysvbfs/sysvbfs_vnops.c
+++ b/sys/fs/sysvbfs/sysvbfs_vnops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: sysvbfs_vnops.c,v 1.16 2007/12/15 00:39:36 perry Exp $	*/
+/*	$NetBSD: sysvbfs_vnops.c,v 1.17 2008/01/02 11:48:46 ad Exp $	*/
 
 /*-
  * Copyright (c) 2004 The NetBSD Foundation, Inc.
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sysvbfs_vnops.c,v 1.16 2007/12/15 00:39:36 perry Exp $");
+__KERNEL_RCSID(0, "$NetBSD: sysvbfs_vnops.c,v 1.17 2008/01/02 11:48:46 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -572,12 +572,13 @@ sysvbfs_inactive(void *arg)
 {
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
+		bool *a_recycle;
 	} */ *a = arg;
 	struct vnode *v = a->a_vp;
 
 	DPRINTF("%s:\n", __func__);
+	*a->a_recycle = true;
 	VOP_UNLOCK(v, 0);
-	vrecycle(v, NULL, curlwp);
 
 	return 0;
 }
@@ -593,9 +594,9 @@ sysvbfs_reclaim(void *v)
 	struct sysvbfs_node *bnode = vp->v_data;
 
 	DPRINTF("%s:\n", __func__);
-	simple_lock(&mntvnode_slock);
+	mutex_enter(&mntvnode_lock);
 	LIST_REMOVE(bnode, link);
-	simple_unlock(&mntvnode_slock);
+	mutex_exit(&mntvnode_lock);
 	cache_purge(vp);
 	genfs_node_destroy(vp);
 	pool_put(&sysvbfs_node_pool, bnode);
diff --git a/sys/fs/tmpfs/tmpfs.h b/sys/fs/tmpfs/tmpfs.h
index 0bbe79baf09f..636e41403f94 100644
--- a/sys/fs/tmpfs/tmpfs.h
+++ b/sys/fs/tmpfs/tmpfs.h
@@ -1,7 +1,7 @@
-/*	$NetBSD: tmpfs.h,v 1.29 2007/12/08 19:29:44 pooka Exp $	*/
+/*	$NetBSD: tmpfs.h,v 1.30 2008/01/02 11:48:46 ad Exp $	*/
 
 /*
- * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
+ * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -221,14 +221,9 @@ struct tmpfs_node {
 	 *
 	 * May be NULL when the node is unused (that is, no vnode has been
 	 * allocated for it or it has been reclaimed). */
+	kmutex_t		tn_vlock;
 	struct vnode *		tn_vnode;
 
-	/* Pointer to the node returned by tmpfs_lookup() after doing a
-	 * delete or a rename lookup; its value is only valid in these two
-	 * situations.  In case we were looking up . or .., it holds a null
-	 * pointer. */
-	struct tmpfs_dirent *	tn_lookup_dirent;
-
 	union {
 		/* Valid when tn_type == VBLK || tn_type == VCHR. */
 		struct {
@@ -298,11 +293,11 @@ struct tmpfs_mount {
 	 * used directly as it may be bigger than the current amount of
 	 * free memory; in the extreme case, it will hold the SIZE_MAX
 	 * value.  Instead, use the TMPFS_PAGES_MAX macro. */
-	size_t			tm_pages_max;
+	u_int			tm_pages_max;
 
 	/* Number of pages in use by the file system.  Cannot be bigger
 	 * than the value returned by TMPFS_PAGES_MAX in any case. */
-	size_t			tm_pages_used;
+	u_int			tm_pages_used;
 
 	/* Pointer to the node representing the root directory of this
 	 * file system. */
@@ -314,28 +309,16 @@ struct tmpfs_mount {
 	 * cannot be released until the file system is unmounted.
 	 * Otherwise, we could easily run out of memory by creating lots
 	 * of empty files and then simply removing them. */
-	ino_t			tm_nodes_max;
+	u_int			tm_nodes_max;
 
 	/* Number of nodes currently allocated.  This number only grows.
 	 * When it reaches tm_nodes_max, no more new nodes can be allocated.
 	 * Of course, the old, unused ones can be reused. */
-	ino_t			tm_nodes_last;
+	u_int			tm_nodes_cnt;
 
-	/* Nodes are organized in two different lists.  The used list
-	 * contains all nodes that are currently used by the file system;
-	 * i.e., they refer to existing files.  The available list contains
-	 * all nodes that are currently available for use by new files.
-	 * Nodes must be kept in this list (instead of deleting them)
-	 * because we need to keep track of their generation number (tn_gen
-	 * field).
-	 *
-	 * Note that nodes are lazily allocated: if the available list is
-	 * empty and we have enough space to create more nodes, they will be
-	 * created and inserted in the used list.  Once these are released,
-	 * they will go into the available list, remaining alive until the
-	 * file system is unmounted. */
-	struct tmpfs_node_list	tm_nodes_used;
-	struct tmpfs_node_list	tm_nodes_avail;
+	/* Node list. */
+	kmutex_t		tm_lock;
+	struct tmpfs_node_list	tm_nodes;
 
 	/* Pools used to store file system meta data.  These are not shared
 	 * across several instances of tmpfs for the reasons described in
@@ -466,7 +449,8 @@ TMPFS_PAGES_MAX(struct tmpfs_mount *tmp)
 }
 
 /* Returns the available space for the given file system. */
-#define TMPFS_PAGES_AVAIL(tmp) (TMPFS_PAGES_MAX(tmp) - (tmp)->tm_pages_used)
+#define TMPFS_PAGES_AVAIL(tmp)		\
+    ((ssize_t)(TMPFS_PAGES_MAX(tmp) - (tmp)->tm_pages_used))
 
 /* --------------------------------------------------------------------- */
 
diff --git a/sys/fs/tmpfs/tmpfs_pool.c b/sys/fs/tmpfs/tmpfs_pool.c
index 47445b30fade..eb10888e4584 100644
--- a/sys/fs/tmpfs/tmpfs_pool.c
+++ b/sys/fs/tmpfs/tmpfs_pool.c
@@ -1,7 +1,7 @@
-/*	$NetBSD: tmpfs_pool.c,v 1.11 2007/11/22 21:08:10 pooka Exp $	*/
+/*	$NetBSD: tmpfs_pool.c,v 1.12 2008/01/02 11:48:46 ad Exp $	*/
 
 /*
- * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
+ * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -42,10 +42,11 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tmpfs_pool.c,v 1.11 2007/11/22 21:08:10 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tmpfs_pool.c,v 1.12 2008/01/02 11:48:46 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/pool.h>
+#include <sys/atomic.h>
 
 #include <uvm/uvm.h>
 
@@ -155,15 +156,29 @@ tmpfs_pool_page_alloc(struct pool *pp, int flags)
 {
 	struct tmpfs_pool *tpp;
 	struct tmpfs_mount *tmp;
+	u_int pages;
+	void *page;
 
 	tpp = (struct tmpfs_pool *)pp;
 	tmp = tpp->tp_mount;
 
-	if (TMPFS_PAGES_MAX(tmp) - tmp->tm_pages_used == 0)
+	pages = atomic_inc_uint_nv(&tmp->tm_pages_used);
+	if (pages >= TMPFS_PAGES_MAX(tmp)) {
+		atomic_dec_uint(&tmp->tm_pages_used);
 		return NULL;
+	}
+	/*
+	 * tmpfs never specifies PR_WAITOK as we enforce local limits
+	 * on memory allocation.  However, we should wait for memory
+	 * to become available if under our limit.  XXX The result of
+	 * the TMPFS_PAGES_MAX() check is stale.
+	 */
+	page = pool_page_alloc_nointr(pp, flags | PR_WAITOK);
+	if (page == NULL) {
+		atomic_dec_uint(&tmp->tm_pages_used);
+	}
 
-	tmp->tm_pages_used += 1;
-	return pool_page_alloc_nointr(pp, flags);
+	return page;
 }
 
 /* --------------------------------------------------------------------- */
@@ -177,7 +192,7 @@ tmpfs_pool_page_free(struct pool *pp, void *v)
 	tpp = (struct tmpfs_pool *)pp;
 	tmp = tpp->tp_mount;
 
-	tmp->tm_pages_used -= 1;
+	atomic_dec_uint(&tmp->tm_pages_used);
 	pool_page_free_nointr(pp, v);
 }
 
diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c
index d7ab571bb8e5..cd31e1444fbf 100644
--- a/sys/fs/tmpfs/tmpfs_subr.c
+++ b/sys/fs/tmpfs/tmpfs_subr.c
@@ -1,7 +1,7 @@
-/*	$NetBSD: tmpfs_subr.c,v 1.42 2007/12/08 19:29:45 pooka Exp $	*/
+/*	$NetBSD: tmpfs_subr.c,v 1.43 2008/01/02 11:48:46 ad Exp $	*/
 
 /*
- * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
+ * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -42,12 +42,12 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.42 2007/12/08 19:29:45 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.43 2008/01/02 11:48:46 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/dirent.h>
 #include <sys/event.h>
-#include <sys/malloc.h>
+#include <sys/kmem.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/time.h>
@@ -57,6 +57,7 @@ __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.42 2007/12/08 19:29:45 pooka Exp $"
 #include <sys/vnode.h>
 #include <sys/kauth.h>
 #include <sys/proc.h>
+#include <sys/atomic.h>
 
 #include <uvm/uvm.h>
 
@@ -66,8 +67,6 @@ __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.42 2007/12/08 19:29:45 pooka Exp $"
 #include <fs/tmpfs/tmpfs_specops.h>
 #include <fs/tmpfs/tmpfs_vnops.h>
 
-MALLOC_DECLARE(M_TMPFSTMP);
-
 /* --------------------------------------------------------------------- */
 
 /*
@@ -109,24 +108,24 @@ tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type,
 	KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL);
 
 	nnode = NULL;
-	if (LIST_EMPTY(&tmp->tm_nodes_avail)) {
-		KASSERT(tmp->tm_nodes_last <= tmp->tm_nodes_max);
-		if (tmp->tm_nodes_last == tmp->tm_nodes_max)
-			return ENOSPC;
-
-		nnode =
-		    (struct tmpfs_node *)TMPFS_POOL_GET(&tmp->tm_node_pool, 0);
-		if (nnode == NULL)
-			return ENOSPC;
-		nnode->tn_id = tmp->tm_nodes_last++;
-		nnode->tn_gen = arc4random();
-	} else {
-		nnode = LIST_FIRST(&tmp->tm_nodes_avail);
-		LIST_REMOVE(nnode, tn_entries);
-		nnode->tn_gen++;
+	if (atomic_inc_uint_nv(&tmp->tm_nodes_cnt) >= tmp->tm_nodes_max) {
+		atomic_dec_uint(&tmp->tm_nodes_cnt);
+		return ENOSPC;
 	}
-	KASSERT(nnode != NULL);
-	LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries);
+
+	nnode = (struct tmpfs_node *)TMPFS_POOL_GET(&tmp->tm_node_pool, 0);
+	if (nnode == NULL) {
+		atomic_dec_uint(&tmp->tm_nodes_cnt);
+		return ENOSPC;
+	}
+
+	/*
+	 * XXX Where the pool is backed by a map larger than (4GB *
+	 * sizeof(*nnode)), this may produce duplicate inode numbers
+	 * for applications that do not understand 64-bit ino_t.
+	 */
+	nnode->tn_id = (ino_t)((uintptr_t)nnode / sizeof(*nnode));
+	nnode->tn_gen = arc4random();
 
 	/* Generic initialization. */
 	nnode->tn_type = type;
@@ -157,11 +156,6 @@ tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type,
 		nnode->tn_spec.tn_dir.tn_readdir_lastn = 0;
 		nnode->tn_spec.tn_dir.tn_readdir_lastp = NULL;
 		nnode->tn_links++;
-		nnode->tn_spec.tn_dir.tn_parent->tn_links++;
-		if (parent != NULL) {
-			KASSERT(parent->tn_vnode != NULL);
-			VN_KNOTE(parent->tn_vnode, NOTE_LINK);
-		}
 		break;
 
 	case VFIFO:
@@ -175,8 +169,8 @@ tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type,
 		nnode->tn_spec.tn_lnk.tn_link =
 		    tmpfs_str_pool_get(&tmp->tm_str_pool, nnode->tn_size, 0);
 		if (nnode->tn_spec.tn_lnk.tn_link == NULL) {
-			nnode->tn_type = VNON;
-			tmpfs_free_node(tmp, nnode);
+			atomic_dec_uint(&tmp->tm_nodes_cnt);
+			TMPFS_POOL_PUT(&tmp->tm_node_pool, nnode);
 			return ENOSPC;
 		}
 		memcpy(nnode->tn_spec.tn_lnk.tn_link, target, nnode->tn_size);
@@ -192,6 +186,12 @@ tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type,
 		KASSERT(0);
 	}
 
+	mutex_init(&nnode->tn_vlock, MUTEX_DEFAULT, IPL_NONE);
+
+	mutex_enter(&tmp->tm_lock);
+	LIST_INSERT_HEAD(&tmp->tm_nodes, nnode, tn_entries);
+	mutex_exit(&tmp->tm_lock);
+
 	*node = nnode;
 	return 0;
 }
@@ -218,56 +218,33 @@ tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type,
 void
 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node)
 {
-	ino_t id;
-	unsigned long gen;
-	size_t pages;
+
+	if (node->tn_type == VREG) {
+		atomic_add_int(&tmp->tm_pages_used,
+		    -node->tn_spec.tn_reg.tn_aobj_pages);
+	}
+	atomic_dec_uint(&tmp->tm_nodes_cnt);
+	mutex_enter(&tmp->tm_lock);
+	LIST_REMOVE(node, tn_entries);
+	mutex_exit(&tmp->tm_lock);
 
 	switch (node->tn_type) {
-	case VNON:
-		/* Do not do anything.  VNON is provided to let the
-		 * allocation routine clean itself easily by avoiding
-		 * duplicating code in it. */
-		/* FALLTHROUGH */
-	case VBLK:
-		/* FALLTHROUGH */
-	case VCHR:
-		/* FALLTHROUGH */
-	case VDIR:
-		/* FALLTHROUGH */
-	case VFIFO:
-		/* FALLTHROUGH */
-	case VSOCK:
-		pages = 0;
-		break;
-
 	case VLNK:
 		tmpfs_str_pool_put(&tmp->tm_str_pool,
 		    node->tn_spec.tn_lnk.tn_link, node->tn_size);
-		pages = 0;
 		break;
 
 	case VREG:
 		if (node->tn_spec.tn_reg.tn_aobj != NULL)
 			uao_detach(node->tn_spec.tn_reg.tn_aobj);
-		pages = node->tn_spec.tn_reg.tn_aobj_pages;
 		break;
 
 	default:
-		KASSERT(0);
-		pages = 0; /* Shut up gcc when !DIAGNOSTIC. */
 		break;
 	}
 
-	tmp->tm_pages_used -= pages;
-
-	LIST_REMOVE(node, tn_entries);
-	id = node->tn_id;
-	gen = node->tn_gen;
-	memset(node, 0, sizeof(struct tmpfs_node));
-	node->tn_id = id;
-	node->tn_type = VNON;
-	node->tn_gen = gen;
-	LIST_INSERT_HEAD(&tmp->tm_nodes_avail, node, tn_entries);
+	mutex_destroy(&node->tn_vlock);
+	TMPFS_POOL_PUT(&tmp->tm_node_pool, node);
 }
 
 /* --------------------------------------------------------------------- */
@@ -363,30 +340,37 @@ tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, struct vnode **vpp)
 	struct vnode *nvp;
 	struct vnode *vp;
 
-	vp = NULL;
-
-	if (node->tn_vnode != NULL) {
-		vp = node->tn_vnode;
-		vget(vp, LK_EXCLUSIVE | LK_RETRY);
-		error = 0;
-		goto out;
+	/* If there is already a vnode, then lock it. */
+	for (;;) {
+		mutex_enter(&node->tn_vlock);
+		if ((vp = node->tn_vnode) != NULL) {
+			mutex_enter(&vp->v_interlock);
+			mutex_exit(&node->tn_vlock);
+			error = vget(vp, LK_EXCLUSIVE | LK_INTERLOCK);
+			if (error == ENOENT) {
+				/* vnode was reclaimed. */
+				continue;
+			}
+			*vpp = vp;
+			return error;
+		}
+		break;
 	}
 
 	/* Get a new vnode and associate it with our node. */
 	error = getnewvnode(VT_TMPFS, mp, tmpfs_vnodeop_p, &vp);
-	if (error != 0)
-		goto out;
-	KASSERT(vp != NULL);
+	if (error != 0) {
+		mutex_exit(&node->tn_vlock);
+		return error;
+	}
 
 	error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	if (error != 0) {
-		vp->v_data = NULL;
+		mutex_exit(&node->tn_vlock);
 		ungetnewvnode(vp);
-		vp = NULL;
-		goto out;
+		return error;
 	}
 
-	vp->v_data = node;
 	vp->v_type = node->tn_type;
 
 	/* Type-specific initialization. */
@@ -398,24 +382,21 @@ tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, struct vnode **vpp)
 		nvp = checkalias(vp, node->tn_spec.tn_dev.tn_rdev, mp);
 		if (nvp != NULL) {
 			/* Discard unneeded vnode, but save its inode. */
-			nvp->v_data = vp->v_data;
-			vp->v_data = NULL;
+			nvp->v_data = node;
 
 			/* XXX spec_vnodeops has no locking, so we have to
 			 * do it explicitly. */
 			vp->v_vflag &= ~VV_LOCKSWORK;
 			VOP_UNLOCK(vp, 0);
 			vp->v_op = spec_vnodeop_p;
-			vrele(vp);
 			vgone(vp);
 
 			/* Reinitialize aliased node. */
 			vp = nvp;
 			error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 			if (error != 0) {
-				vp->v_data = NULL;
-				vp = NULL;
-				goto out;
+				mutex_exit(&node->tn_vlock);
+				return error;
 			}
 		}
 		break;
@@ -441,11 +422,10 @@ tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, struct vnode **vpp)
 	}
 
 	uvm_vnp_setsize(vp, node->tn_size);
-
-	error = 0;
-
-out:
-	*vpp = node->tn_vnode = vp;
+	vp->v_data = node;
+	node->tn_vnode = vp;
+	mutex_exit(&node->tn_vlock);
+	*vpp = vp;
 
 	KASSERT(IFF(error == 0, *vpp != NULL && VOP_ISLOCKED(*vpp)));
 	KASSERT(*vpp == node->tn_vnode);
@@ -466,7 +446,9 @@ tmpfs_free_vp(struct vnode *vp)
 
 	node = VP_TO_TMPFS_NODE(vp);
 
+	mutex_enter(&node->tn_vlock);
 	node->tn_vnode = NULL;
+	mutex_exit(&node->tn_vlock);
 	vp->v_data = NULL;
 }
 
@@ -542,13 +524,17 @@ tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap,
 	 * insert the new node into the directory, an operation that
 	 * cannot fail. */
 	tmpfs_dir_attach(dvp, de);
+	if (vap->va_type == VDIR) {
+		VN_KNOTE(dvp, NOTE_LINK);
+		dnode->tn_links++;
+		KASSERT(dnode->tn_links <= LINK_MAX);
+	}
 
 out:
 	if (error != 0 || !(cnp->cn_flags & SAVESTART))
 		PNBUF_PUT(cnp->cn_pnbuf);
 	vput(dvp);
 
-	KASSERT(!VOP_ISLOCKED(dvp));
 	KASSERT(IFF(error == 0, *vpp != NULL));
 
 	return error;
@@ -667,7 +653,7 @@ tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio)
 	TMPFS_VALIDATE_DIR(node);
 	KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT);
 
-	dentp = malloc(sizeof(struct dirent), M_TMPFSTMP, M_WAITOK | M_ZERO);
+	dentp = kmem_zalloc(sizeof(struct dirent), KM_SLEEP);
 
 	dentp->d_fileno = node->tn_id;
 	dentp->d_type = DT_DIR;
@@ -686,7 +672,7 @@ tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio)
 
 	node->tn_status |= TMPFS_NODE_ACCESSED;
 
-	free(dentp, M_TMPFSTMP);
+	kmem_free(dentp, sizeof(struct dirent));
 	return error;
 }
 
@@ -708,7 +694,7 @@ tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio)
 	TMPFS_VALIDATE_DIR(node);
 	KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT);
 
-	dentp = malloc(sizeof(struct dirent), M_TMPFSTMP, M_WAITOK | M_ZERO);
+	dentp = kmem_zalloc(sizeof(struct dirent), KM_SLEEP);
 
 	dentp->d_fileno = node->tn_spec.tn_dir.tn_parent->tn_id;
 	dentp->d_type = DT_DIR;
@@ -735,7 +721,7 @@ tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio)
 
 	node->tn_status |= TMPFS_NODE_ACCESSED;
 
-	free(dentp, M_TMPFSTMP);
+	kmem_free(dentp, sizeof(struct dirent));
 	return error;
 }
 
@@ -797,7 +783,7 @@ tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp)
 		return EINVAL;
 	}
 
-	dentp = malloc(sizeof(struct dirent), M_TMPFSTMP, M_WAITOK | M_ZERO);
+	dentp = kmem_zalloc(sizeof(struct dirent), KM_SLEEP);
 
 	/* Read as much entries as possible; i.e., until we reach the end of
 	 * the directory or we exhaust uio space. */
@@ -871,7 +857,7 @@ tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp)
 
 	node->tn_status |= TMPFS_NODE_ACCESSED;
 
-	free(dentp, M_TMPFSTMP);
+	kmem_free(dentp, sizeof(struct dirent));
 	return error;
 }
 
@@ -892,7 +878,7 @@ int
 tmpfs_reg_resize(struct vnode *vp, off_t newsize)
 {
 	int error;
-	size_t newpages, oldpages;
+	u_int newpages, oldpages;
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *node;
 	off_t oldsize;
@@ -913,28 +899,15 @@ tmpfs_reg_resize(struct vnode *vp, off_t newsize)
 	newpages = round_page(newsize) / PAGE_SIZE;
 
 	if (newpages > oldpages &&
-	    newpages - oldpages > TMPFS_PAGES_AVAIL(tmp)) {
+	    (ssize_t)(newpages - oldpages) > TMPFS_PAGES_AVAIL(tmp)) {
 		error = ENOSPC;
 		goto out;
 	}
+	atomic_add_int(&tmp->tm_pages_used, newpages - oldpages);
 
 	if (newsize < oldsize) {
 		int zerolen = MIN(round_page(newsize), node->tn_size) - newsize;
 
-		/*
-		 * free "backing store"
-		 */
-
-		if (newpages < oldpages) {
-			struct uvm_object *uobj;
-			
-			uobj = node->tn_spec.tn_reg.tn_aobj;
-
-			simple_lock(&uobj->vmobjlock);
-			uao_dropswap_range(uobj, newpages, oldpages);
-			simple_unlock(&uobj->vmobjlock);
-		}
-
 		/*
 		 * zero out the truncated part of the last page.
 		 */
@@ -946,7 +919,19 @@ tmpfs_reg_resize(struct vnode *vp, off_t newsize)
 	node->tn_size = newsize;
 	uvm_vnp_setsize(vp, newsize);
 
-	tmp->tm_pages_used += (newpages - oldpages);
+	/*
+	 * free "backing store"
+	 */
+
+	if (newpages < oldpages) {
+		struct uvm_object *uobj;
+
+		uobj = node->tn_spec.tn_reg.tn_aobj;
+
+		mutex_enter(&uobj->vmobjlock);
+		uao_dropswap_range(uobj, newpages, oldpages);
+		mutex_exit(&uobj->vmobjlock);
+	}
 
 	error = 0;
 
diff --git a/sys/fs/tmpfs/tmpfs_vfsops.c b/sys/fs/tmpfs/tmpfs_vfsops.c
index 02cf3d7eb413..e7af958cc987 100644
--- a/sys/fs/tmpfs/tmpfs_vfsops.c
+++ b/sys/fs/tmpfs/tmpfs_vfsops.c
@@ -1,7 +1,7 @@
-/*	$NetBSD: tmpfs_vfsops.c,v 1.33 2007/12/08 19:29:45 pooka Exp $	*/
+/*	$NetBSD: tmpfs_vfsops.c,v 1.34 2008/01/02 11:48:47 ad Exp $	*/
 
 /*
- * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
+ * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -49,11 +49,11 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tmpfs_vfsops.c,v 1.33 2007/12/08 19:29:45 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tmpfs_vfsops.c,v 1.34 2008/01/02 11:48:47 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
-#include <sys/malloc.h>
+#include <sys/kmem.h>
 #include <sys/mount.h>
 #include <sys/stat.h>
 #include <sys/systm.h>
@@ -62,9 +62,6 @@ __KERNEL_RCSID(0, "$NetBSD: tmpfs_vfsops.c,v 1.33 2007/12/08 19:29:45 pooka Exp
 
 #include <fs/tmpfs/tmpfs.h>
 
-MALLOC_JUSTDEFINE(M_TMPFSMNT, "tmpfs mount", "tmpfs mount structures");
-MALLOC_JUSTDEFINE(M_TMPFSTMP, "tmpfs temp", "tmpfs temporary structures");
-
 /* --------------------------------------------------------------------- */
 
 static int	tmpfs_mount(struct mount *, const char *, void *, size_t *);
@@ -149,14 +146,15 @@ tmpfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
 	KASSERT(nodes >= 3);
 
 	/* Allocate the tmpfs mount structure and fill it. */
-	tmp = (struct tmpfs_mount *)malloc(sizeof(struct tmpfs_mount),
-	    M_TMPFSMNT, M_WAITOK);
-	KASSERT(tmp != NULL);
+	tmp = kmem_alloc(sizeof(struct tmpfs_mount), KM_SLEEP);
+	if (tmp == NULL)
+		return ENOMEM;
 
 	tmp->tm_nodes_max = nodes;
-	tmp->tm_nodes_last = 2;
-	LIST_INIT(&tmp->tm_nodes_used);
-	LIST_INIT(&tmp->tm_nodes_avail);
+	tmp->tm_nodes_cnt = 0;
+	LIST_INIT(&tmp->tm_nodes);
+
+	mutex_init(&tmp->tm_lock, MUTEX_DEFAULT, IPL_NONE);
 
 	tmp->tm_pages_max = pages;
 	tmp->tm_pages_used = 0;
@@ -171,6 +169,7 @@ tmpfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
 	    args->ta_root_gid, args->ta_root_mode & ALLPERMS, NULL, NULL,
 	    VNOVAL, &root);
 	KASSERT(error == 0 && root != NULL);
+	root->tn_links++;
 	tmp->tm_root = root;
 
 	mp->mnt_data = tmp;
@@ -178,6 +177,7 @@ tmpfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
 	mp->mnt_stat.f_namemax = MAXNAMLEN;
 	mp->mnt_fs_bshift = PAGE_SHIFT;
 	mp->mnt_dev_bshift = DEV_BSHIFT;
+	mp->mnt_iflag |= IMNT_MPSAFE;
 	vfs_getnewfsid(mp);
 
 	return set_statvfs_info(path, UIO_USERSPACE, "tmpfs", UIO_SYSSPACE,
@@ -220,7 +220,7 @@ tmpfs_unmount(struct mount *mp, int mntflags)
 	 * a directory, we free all its directory entries.  Note that after
 	 * freeing a node, it will automatically go to the available list,
 	 * so we will later have to iterate over it to release its items. */
-	node = LIST_FIRST(&tmp->tm_nodes_used);
+	node = LIST_FIRST(&tmp->tm_nodes);
 	while (node != NULL) {
 		struct tmpfs_node *next;
 
@@ -243,15 +243,6 @@ tmpfs_unmount(struct mount *mp, int mntflags)
 		tmpfs_free_node(tmp, node);
 		node = next;
 	}
-	node = LIST_FIRST(&tmp->tm_nodes_avail);
-	while (node != NULL) {
-		struct tmpfs_node *next;
-
-		next = LIST_NEXT(node, tn_entries);
-		LIST_REMOVE(node, tn_entries);
-		TMPFS_POOL_PUT(&tmp->tm_node_pool, node);
-		node = next;
-	}
 
 	tmpfs_pool_destroy(&tmp->tm_dirent_pool);
 	tmpfs_pool_destroy(&tmp->tm_node_pool);
@@ -260,7 +251,8 @@ tmpfs_unmount(struct mount *mp, int mntflags)
 	KASSERT(tmp->tm_pages_used == 0);
 
 	/* Throw away the tmpfs_mount structure. */
-	free(mp->mnt_data, M_TMPFSMNT);
+	mutex_destroy(&tmp->tm_lock);
+	kmem_free(tmp, sizeof(*tmp));
 	mp->mnt_data = NULL;
 
 	return 0;
@@ -307,14 +299,17 @@ tmpfs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
 		return EINVAL;
 
 	found = false;
-	LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
+	mutex_enter(&tmp->tm_lock);
+	LIST_FOREACH(node, &tmp->tm_nodes, tn_entries) {
 		if (node->tn_id == tfh.tf_id &&
 		    node->tn_gen == tfh.tf_gen) {
 			found = true;
 			break;
 		}
 	}
+	mutex_exit(&tmp->tm_lock);
 
+	/* XXXAD nothing to prevent 'node' from being removed. */
 	return found ? tmpfs_alloc_vp(mp, node, vpp) : EINVAL;
 }
 
@@ -348,9 +343,8 @@ tmpfs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size)
 static int
 tmpfs_statvfs(struct mount *mp, struct statvfs *sbp)
 {
-	fsfilcnt_t freenodes, usednodes;
+	fsfilcnt_t freenodes;
 	struct tmpfs_mount *tmp;
-	struct tmpfs_node *dummy;
 
 	tmp = VFS_TO_TMPFS(mp);
 
@@ -360,16 +354,10 @@ tmpfs_statvfs(struct mount *mp, struct statvfs *sbp)
 	sbp->f_bavail = sbp->f_bfree = TMPFS_PAGES_AVAIL(tmp);
 	sbp->f_bresvd = 0;
 
-	freenodes = MIN(tmp->tm_nodes_max - tmp->tm_nodes_last,
+	freenodes = MIN(tmp->tm_nodes_max - tmp->tm_nodes_cnt,
 	    TMPFS_PAGES_AVAIL(tmp) * PAGE_SIZE / sizeof(struct tmpfs_node));
-	LIST_FOREACH(dummy, &tmp->tm_nodes_avail, tn_entries)
-		freenodes++;
 
-	usednodes = 0;
-	LIST_FOREACH(dummy, &tmp->tm_nodes_used, tn_entries)
-		usednodes++;
-
-	sbp->f_files = freenodes + usednodes;
+	sbp->f_files = tmp->tm_nodes_cnt + freenodes;
 	sbp->f_favail = sbp->f_ffree = freenodes;
 	sbp->f_fresvd = 0;
 
@@ -395,8 +383,6 @@ static void
 tmpfs_init(void)
 {
 
-	malloc_type_attach(M_TMPFSMNT);
-	malloc_type_attach(M_TMPFSTMP);
 }
 
 /* --------------------------------------------------------------------- */
@@ -405,8 +391,6 @@ static void
 tmpfs_done(void)
 {
 
-	malloc_type_detach(M_TMPFSTMP);
-	malloc_type_detach(M_TMPFSMNT);
 }
 
 /* --------------------------------------------------------------------- */
diff --git a/sys/fs/tmpfs/tmpfs_vnops.c b/sys/fs/tmpfs/tmpfs_vnops.c
index ee5ba8fb564d..bddf950b5e30 100644
--- a/sys/fs/tmpfs/tmpfs_vnops.c
+++ b/sys/fs/tmpfs/tmpfs_vnops.c
@@ -1,7 +1,7 @@
-/*	$NetBSD: tmpfs_vnops.c,v 1.44 2007/11/26 19:01:55 pooka Exp $	*/
+/*	$NetBSD: tmpfs_vnops.c,v 1.45 2008/01/02 11:48:47 ad Exp $	*/
 
 /*
- * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
+ * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -42,7 +42,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.44 2007/11/26 19:01:55 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.45 2008/01/02 11:48:47 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/dirent.h>
@@ -169,11 +169,9 @@ tmpfs_lookup(void *v)
 		    dnode->tn_spec.tn_dir.tn_parent, vpp);
 
 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
-		dnode->tn_spec.tn_dir.tn_parent->tn_lookup_dirent = NULL;
 	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
 		VREF(dvp);
 		*vpp = dvp;
-		dnode->tn_lookup_dirent = NULL;
 		error = 0;
 	} else {
 		de = tmpfs_dir_lookup(dnode, cnp);
@@ -229,8 +227,8 @@ tmpfs_lookup(void *v)
 				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
 				if (error != 0)
 					goto out;
-				tnode->tn_lookup_dirent = de;
-			}
+			} else
+				de = NULL;
 
 			/* Allocate a new vnode on the matching entry. */
 			error = tmpfs_alloc_vp(dvp->v_mount, tnode, vpp);
@@ -240,7 +238,8 @@ tmpfs_lookup(void *v)
 	/* Store the result of this lookup in the cache.  Avoid this if the
 	 * request was for creation, as it does not improve timings on
 	 * emprical tests. */
-	if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE)
+	if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE &&
+	    (cnp->cn_flags & ISDOTDOT) == 0)
 		cache_enter(dvp, *vpp, cnp);
 
 out:
@@ -651,6 +650,7 @@ tmpfs_remove(void *v)
 {
 	struct vnode *dvp = ((struct vop_remove_args *)v)->a_dvp;
 	struct vnode *vp = ((struct vop_remove_args *)v)->a_vp;
+	struct componentname *cnp = (((struct vop_remove_args *)v)->a_cnp);
 
 	int error;
 	struct tmpfs_dirent *de;
@@ -669,8 +669,12 @@ tmpfs_remove(void *v)
 	dnode = VP_TO_TMPFS_DIR(dvp);
 	node = VP_TO_TMPFS_NODE(vp);
 	tmp = VFS_TO_TMPFS(vp->v_mount);
-	de = node->tn_lookup_dirent;
-	KASSERT(de != NULL);
+	de = tmpfs_dir_lookup(dnode, cnp);
+	if (de == NULL) {
+		error = ENOENT;
+		goto out;
+	}
+	KASSERT(de->td_node == node);
 
 	/* Files marked as immutable or append-only cannot be deleted. */
 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
@@ -696,8 +700,6 @@ out:
 	else
 		vput(dvp);
 
-	KASSERT(!VOP_ISLOCKED(dvp));
-
 	return error;
 }
 
@@ -716,7 +718,6 @@ tmpfs_link(void *v)
 	struct tmpfs_node *node;
 
 	KASSERT(VOP_ISLOCKED(dvp));
-	KASSERT(!VOP_ISLOCKED(vp));
 	KASSERT(cnp->cn_flags & HASBUF);
 	KASSERT(dvp != vp); /* XXX When can this be false? */
 
@@ -727,7 +728,7 @@ tmpfs_link(void *v)
 	 * needs the vnode to be locked. */
 	error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	if (error != 0)
-		goto out;
+		goto out1;
 
 	/* XXX: Why aren't the following two tests done by the caller? */
 
@@ -773,17 +774,12 @@ tmpfs_link(void *v)
 	error = 0;
 
 out:
-	if (VOP_ISLOCKED(vp))
-		VOP_UNLOCK(vp, 0);
-
+	VOP_UNLOCK(vp, 0);
+out1:
 	PNBUF_PUT(cnp->cn_pnbuf);
 
 	vput(dvp);
 
-	/* XXX Locking status of dvp does not match manual page. */
-	KASSERT(!VOP_ISLOCKED(dvp));
-	KASSERT(!VOP_ISLOCKED(vp));
-
 	return error;
 }
 
@@ -801,33 +797,52 @@ tmpfs_rename(void *v)
 
 	char *newname;
 	int error;
-	struct tmpfs_dirent *de;
+	struct tmpfs_dirent *de, *de2;
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *fdnode;
 	struct tmpfs_node *fnode;
 	struct tmpfs_node *tnode;
 	struct tmpfs_node *tdnode;
+	size_t namelen;
 
 	KASSERT(VOP_ISLOCKED(tdvp));
-	KASSERT(IMPLIES(tvp != NULL, VOP_ISLOCKED(tvp)));
+	KASSERT(IMPLIES(tvp != NULL, VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
 	KASSERT(fcnp->cn_flags & HASBUF);
 	KASSERT(tcnp->cn_flags & HASBUF);
 
-	fdnode = VP_TO_TMPFS_DIR(fdvp);
-	fnode = VP_TO_TMPFS_NODE(fvp);
-	tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp);
-	de = fnode->tn_lookup_dirent;
+	newname = NULL;
+	namelen = 0;
+	tmp = NULL;
 
-	/* Disallow cross-device renames.
-	 * XXX Why isn't this done by the caller? */
+	/* Disallow cross-device renames. */
 	if (fvp->v_mount != tdvp->v_mount ||
 	    (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
 		error = EXDEV;
-		goto out;
+		goto out_unlocked;
 	}
 
-	tmp = VFS_TO_TMPFS(tdvp->v_mount);
+	fnode = VP_TO_TMPFS_NODE(fvp);
+	fdnode = VP_TO_TMPFS_DIR(fdvp);
+	tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp);
 	tdnode = VP_TO_TMPFS_DIR(tdvp);
+	tmp = VFS_TO_TMPFS(tdvp->v_mount);
+
+	/* If we need to move the directory between entries, lock the
+	 * source so that we can safely operate on it. */
+
+	/* XXX: this is a potential locking order violation! */
+	if (fdnode != tdnode) {
+		error = vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY);
+		if (error != 0)
+			goto out_unlocked;
+	}
+
+	de = tmpfs_dir_lookup(fdnode, fcnp);
+	if (de == NULL) {
+		error = ENOENT;
+		goto out;
+	}
+	KASSERT(de->td_node == fnode);
 
 	/* If source and target are the same file, there is nothing to do. */
 	if (fvp == tvp) {
@@ -863,44 +878,17 @@ tmpfs_rename(void *v)
 		}
 	}
 
-	/* If we need to move the directory between entries, lock the
-	 * source so that we can safely operate on it. */
-
-	/* XXX: this is a potential locking order violation! */
-	if (fdnode != tdnode) {
-		error = vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY);
-		if (error != 0)
-			goto out;
-	}
-
-	/* Make sure we have the correct cached dirent */
-	fcnp->cn_flags &= ~(MODMASK | SAVESTART);
-	fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
-	if ((error = relookup(fdvp, &fvp, fcnp))) {
-		goto out_locked;
-	}
-	KASSERT(fvp != NULL);
-	/* Relookup always returns with vpp locked and 1UP referenced */
-	VOP_UNLOCK(fvp, 0);
-	vrele(((struct vop_rename_args *)v)->a_fvp);
-
-	/* Reacquire values.  fvp might have changed.  Since we only
-	 * used fvp to sanitycheck fcnp values above, we can do this. */
-	fnode = VP_TO_TMPFS_NODE(fvp);
-	de = fnode->tn_lookup_dirent;
-
 	/* Ensure that we have enough memory to hold the new name, if it
 	 * has to be changed. */
+	namelen = tcnp->cn_namelen;
 	if (fcnp->cn_namelen != tcnp->cn_namelen ||
 	    memcmp(fcnp->cn_nameptr, tcnp->cn_nameptr, fcnp->cn_namelen) != 0) {
-		newname = tmpfs_str_pool_get(&tmp->tm_str_pool,
-		    tcnp->cn_namelen, 0);
+		newname = tmpfs_str_pool_get(&tmp->tm_str_pool, namelen, 0);
 		if (newname == NULL) {
 			error = ENOSPC;
-			goto out_locked;
+			goto out;
 		}
-	} else
-		newname = NULL;
+	}
 
 	/* If the node is being moved to another directory, we have to do
 	 * the move. */
@@ -917,7 +905,7 @@ tmpfs_rename(void *v)
 			while (n != n->tn_spec.tn_dir.tn_parent) {
 				if (n == fnode) {
 					error = EINVAL;
-					goto out_locked;
+					goto out;
 				}
 				n = n->tn_spec.tn_dir.tn_parent;
 			}
@@ -944,6 +932,37 @@ tmpfs_rename(void *v)
 		VN_KNOTE(fdvp, NOTE_WRITE);
 	}
 
+	/* If we are overwriting an entry, we have to remove the old one
+	 * from the target directory. */
+	if (tvp != NULL) {
+		KASSERT(tnode != NULL);
+
+		/* Remove the old entry from the target directory.
+		 * Note! This relies on tmpfs_dir_attach() putting the new
+		 * node on the end of the target's node list. */
+		de2 = tmpfs_dir_lookup(tdnode, tcnp);
+		KASSERT(de2 != NULL);
+/* XXXREMOVEME */
+		if (de2 == de) {
+			panic("tmpfs_rename: to self 1");
+		}
+		if (de2->td_node == de->td_node) {
+			panic("tmpfs_rename: to self 2");
+		}
+		if (de2->td_node != tnode) {
+			panic("tmpfs_rename: found wrong entry [%s]",
+			    tcnp->cn_nameptr);
+		}
+/* XXXREMOVEME */
+		KASSERT(de2->td_node == tnode);
+		tmpfs_dir_detach(tdvp, de2);
+
+		/* Free the directory entry we just deleted.  Note that the
+		 * node referred by it will not be removed until the vnode is
+		 * really reclaimed. */
+		tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), de2, true);
+	}
+
 	/* If the name has changed, we need to make it effective by changing
 	 * it in the directory entry. */
 	if (newname != NULL) {
@@ -952,29 +971,15 @@ tmpfs_rename(void *v)
 
 		tmpfs_str_pool_put(&tmp->tm_str_pool, de->td_name,
 		    de->td_namelen);
-		de->td_namelen = (uint16_t)tcnp->cn_namelen;
-		memcpy(newname, tcnp->cn_nameptr, tcnp->cn_namelen);
+		de->td_namelen = (uint16_t)namelen;
+		memcpy(newname, tcnp->cn_nameptr, namelen);
 		de->td_name = newname;
+		newname = NULL;
 
 		fnode->tn_status |= TMPFS_NODE_CHANGED;
 		tdnode->tn_status |= TMPFS_NODE_MODIFIED;
 	}
 
-	/* If we are overwriting an entry, we have to remove the old one
-	 * from the target directory. */
-	if (tvp != NULL) {
-		KASSERT(tnode != NULL);
-
-		/* Remove the old entry from the target directory. */
-		de = tnode->tn_lookup_dirent;
-		tmpfs_dir_detach(tdvp, de);
-
-		/* Free the directory entry we just deleted.  Note that the
-		 * node referred by it will not be removed until the vnode is
-		 * really reclaimed. */
-		tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), de, true);
-	}
-
 	/* Notify listeners of tdvp about the change in the directory (either
 	 * because a new entry was added or because one was removed) and
 	 * listeners of fvp about the rename. */
@@ -983,11 +988,11 @@ tmpfs_rename(void *v)
 
 	error = 0;
 
-out_locked:
+ out:
 	if (fdnode != tdnode)
 		VOP_UNLOCK(fdvp, 0);
 
-out:
+ out_unlocked:
 	/* Release target nodes. */
 	if (tdvp == tvp)
 		vrele(tdvp);
@@ -1000,6 +1005,9 @@ out:
 	vrele(fdvp);
 	vrele(fvp);
 
+	if (newname != NULL)
+		tmpfs_str_pool_put(&tmp->tm_str_pool, newname, namelen);
+
 	return error;
 }
 
@@ -1025,6 +1033,7 @@ tmpfs_rmdir(void *v)
 {
 	struct vnode *dvp = ((struct vop_rmdir_args *)v)->a_dvp;
 	struct vnode *vp = ((struct vop_rmdir_args *)v)->a_vp;
+	struct componentname *cnp = ((struct vop_rmdir_args *)v)->a_cnp;
 
 	int error;
 	struct tmpfs_dirent *de;
@@ -1051,12 +1060,13 @@ tmpfs_rmdir(void *v)
 	 * We checked for that above so this is safe now. */
 	KASSERT(node->tn_spec.tn_dir.tn_parent == dnode);
 
-	/* Get the directory entry associated with node (vp).  This was
-	 * filled by tmpfs_lookup while looking up the entry. */
-	de = node->tn_lookup_dirent;
-	KASSERT(TMPFS_DIRENT_MATCHES(de,
-	    ((struct vop_rmdir_args *)v)->a_cnp->cn_nameptr,
-	    ((struct vop_rmdir_args *)v)->a_cnp->cn_namelen));
+	/* Get the directory entry associated with node (vp). */
+	de = tmpfs_dir_lookup(dnode, cnp);
+	if (de == NULL) {
+		error = ENOENT;
+		goto out;
+	}
+	KASSERT(de->td_node == node);
 
 	/* Check flags to see if we are allowed to remove the directory. */
 	if (dnode->tn_flags & APPEND || node->tn_flags & (IMMUTABLE | APPEND)) {
@@ -1082,6 +1092,7 @@ tmpfs_rmdir(void *v)
 	 * reclaimed. */
 	tmpfs_free_dirent(tmp, de, true);
 
+	KASSERT(node->tn_links == 0);
  out:
 	/* Release the nodes. */
 	vput(dvp);
@@ -1244,20 +1255,15 @@ int
 tmpfs_inactive(void *v)
 {
 	struct vnode *vp = ((struct vop_inactive_args *)v)->a_vp;
-	nlink_t links;
 
 	struct tmpfs_node *node;
 
 	KASSERT(VOP_ISLOCKED(vp));
 
 	node = VP_TO_TMPFS_NODE(vp);
-	links = node->tn_links;
-
+	*((struct vop_inactive_args *)v)->a_recycle = (node->tn_links == 0);
 	VOP_UNLOCK(vp, 0);
 
-	if (links == 0)
-		vrecycle(vp, NULL, curlwp);
-
 	return 0;
 }
 
@@ -1271,8 +1277,6 @@ tmpfs_reclaim(void *v)
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *node;
 
-	KASSERT(!VOP_ISLOCKED(vp));
-
 	node = VP_TO_TMPFS_NODE(vp);
 	tmp = VFS_TO_TMPFS(vp->v_mount);
 
@@ -1285,7 +1289,6 @@ tmpfs_reclaim(void *v)
 	if (node->tn_links == 0)
 		tmpfs_free_node(tmp, node);
 
-	KASSERT(!VOP_ISLOCKED(vp));
 	KASSERT(vp->v_data == NULL);
 
 	return 0;
@@ -1405,7 +1408,7 @@ tmpfs_getpages(void *v)
 	int npages = *count;
 
 	KASSERT(vp->v_type == VREG);
-	LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
+	KASSERT(mutex_owned(&vp->v_interlock));
 
 	node = VP_TO_TMPFS_NODE(vp);
 	uobj = node->tn_spec.tn_reg.tn_aobj;
@@ -1414,7 +1417,7 @@ tmpfs_getpages(void *v)
 
 	if (vp->v_size <= offset + (centeridx << PAGE_SHIFT)) {
 		if ((flags & PGO_LOCKED) == 0)
-			simple_unlock(&vp->v_interlock);
+			mutex_exit(&vp->v_interlock);
 		return EINVAL;
 	}
 
@@ -1433,7 +1436,7 @@ tmpfs_getpages(void *v)
 			node->tn_status |= TMPFS_NODE_MODIFIED;
 	}
 
-	simple_unlock(&vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
 
 	/*
 	 * Make sure that the array on which we will store the
@@ -1448,7 +1451,7 @@ tmpfs_getpages(void *v)
 	if (m != NULL)
 		for (i = 0; i < npages; i++)
 			m[i] = NULL;
-	simple_lock(&uobj->vmobjlock);
+	mutex_enter(&uobj->vmobjlock);
 	error = (*uobj->pgops->pgo_get)(uobj, offset, m, &npages, centeridx,
 	    access_type, advice, flags | PGO_ALLPAGES);
 #if defined(DEBUG)
@@ -1478,19 +1481,19 @@ tmpfs_putpages(void *v)
 	struct tmpfs_node *node;
 	struct uvm_object *uobj;
 
-	LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
+	KASSERT(mutex_owned(&vp->v_interlock));
 
 	node = VP_TO_TMPFS_NODE(vp);
 
 	if (vp->v_type != VREG) {
-		simple_unlock(&vp->v_interlock);
+		mutex_exit(&vp->v_interlock);
 		return 0;
 	}
 
 	uobj = node->tn_spec.tn_reg.tn_aobj;
-	simple_unlock(&vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
 
-	simple_lock(&uobj->vmobjlock);
+	mutex_enter(&uobj->vmobjlock);
 	error = (*uobj->pgops->pgo_put)(uobj, offlo, offhi, flags);
 
 	/* XXX mtime */
diff --git a/sys/fs/udf/udf_subr.c b/sys/fs/udf/udf_subr.c
index 42cec501df16..1309569266dc 100644
--- a/sys/fs/udf/udf_subr.c
+++ b/sys/fs/udf/udf_subr.c
@@ -1,4 +1,4 @@
-/* $NetBSD: udf_subr.c,v 1.43 2007/12/11 12:05:27 lukem Exp $ */
+/* $NetBSD: udf_subr.c,v 1.44 2008/01/02 11:48:47 ad Exp $ */
 
 /*
  * Copyright (c) 2006 Reinoud Zandijk
@@ -36,7 +36,7 @@
 
 #include <sys/cdefs.h>
 #ifndef lint
-__KERNEL_RCSID(0, "$NetBSD: udf_subr.c,v 1.43 2007/12/11 12:05:27 lukem Exp $");
+__KERNEL_RCSID(0, "$NetBSD: udf_subr.c,v 1.44 2008/01/02 11:48:47 ad Exp $");
 #endif /* not lint */
 
 
@@ -1783,7 +1783,7 @@ loop:
 		    unp->loc.loc.part_num == icbptr->loc.part_num) {
 			vp = unp->vnode;
 			assert(vp);
-			simple_lock(&vp->v_interlock);
+			mutex_enter(&vp->v_interlock);
 			mutex_exit(&ump->ihash_lock);
 			if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK))
 				goto loop;
@@ -2709,27 +2709,32 @@ udf_read_file_extent(struct udf_node *node,
 		     uint32_t from, uint32_t sectors,
 		     uint8_t *blob)
 {
-	struct buf buf;
+	struct buf *buf;
 	uint32_t sector_size;
+	int rv;
 
-	BUF_INIT(&buf);
+	buf = getiobuf(NULL, true);
 
 	sector_size = node->ump->discinfo.sector_size;
 
-	buf.b_bufsize = sectors * sector_size;
-	buf.b_data    = blob;
-	buf.b_bcount  = buf.b_bufsize;
-	buf.b_resid   = buf.b_bcount;
-	buf.b_flags   = B_BUSY | B_READ;
-	buf.b_vp      = node->vnode;
-	buf.b_proc    = NULL;
+	buf->b_bufsize = sectors * sector_size;
+	buf->b_data    = blob;
+	buf->b_bcount  = buf->b_bufsize;
+	buf->b_resid   = buf->b_bcount;
+	buf->b_cflags  = BC_BUSY;
+	buf->b_flags   = B_READ;
+	buf->b_vp      = node->vnode;
+	buf->b_proc    = NULL;
 
-	buf.b_blkno  = from;
-	buf.b_lblkno = 0;
-	BIO_SETPRIO(&buf, BPRIO_TIMELIMITED);
+	buf->b_blkno  = from;
+	buf->b_lblkno = 0;
+	BIO_SETPRIO(buf, BPRIO_TIMELIMITED);
 
-	udf_read_filebuf(node, &buf);
-	return biowait(&buf);
+	udf_read_filebuf(node, buf);
+	rv = biowait(buf);
+	putiobuf(buf);
+
+	return rv;
 }
 
 
@@ -2835,7 +2840,7 @@ udf_read_filebuf(struct udf_node *node, struct buf *buf)
 			rbuflen = run_length * sector_size;
 			rblk    = run_start  * (sector_size/DEV_BSIZE);
 
-			nestbuf = getiobuf();
+			nestbuf = getiobuf(NULL, true);
 			nestiobuf_setup(buf, nestbuf, buf_offset, rbuflen);
 			/* nestbuf is B_ASYNC */
 
diff --git a/sys/fs/udf/udf_vnops.c b/sys/fs/udf/udf_vnops.c
index e50a02d8e5d8..3277616d53a8 100644
--- a/sys/fs/udf/udf_vnops.c
+++ b/sys/fs/udf/udf_vnops.c
@@ -1,4 +1,4 @@
-/* $NetBSD: udf_vnops.c,v 1.14 2007/12/11 12:05:28 lukem Exp $ */
+/* $NetBSD: udf_vnops.c,v 1.15 2008/01/02 11:48:47 ad Exp $ */
 
 /*
  * Copyright (c) 2006 Reinoud Zandijk
@@ -36,7 +36,7 @@
 
 #include <sys/cdefs.h>
 #ifndef lint
-__KERNEL_RCSID(0, "$NetBSD: udf_vnops.c,v 1.14 2007/12/11 12:05:28 lukem Exp $");
+__KERNEL_RCSID(0, "$NetBSD: udf_vnops.c,v 1.15 2008/01/02 11:48:47 ad Exp $");
 #endif /* not lint */
 
 
@@ -842,11 +842,11 @@ udf_close(void *v)
 	DPRINTF(CALL, ("udf_close called\n"));
 	udf_node = udf_node;	/* shut up gcc */
 
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 		if (vp->v_usecount > 1) {
 			/* TODO update times */
 		}
-	simple_unlock(&vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
 
 	return 0;
 }
diff --git a/sys/fs/union/union.h b/sys/fs/union/union.h
index 219eb9fe816d..aff9f53a4386 100644
--- a/sys/fs/union/union.h
+++ b/sys/fs/union/union.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: union.h,v 1.16 2007/12/08 19:29:45 pooka Exp $	*/
+/*	$NetBSD: union.h,v 1.17 2008/01/02 11:48:47 ad Exp $	*/
 
 /*
  * Copyright (c) 1994 The Regents of the University of California.
@@ -129,8 +129,6 @@ struct union_node {
 #define UN_ULOCK	0x04		/* Upper node is locked */
 #define UN_KLOCK	0x08		/* Keep upper node locked on vput */
 #define UN_CACHED	0x10		/* In union cache */
-#define UN_DRAINING	0x20		/* upper node lock is draining */
-#define UN_DRAINED	0x40		/* upper node lock is drained */
 
 extern int union_allocvp(struct vnode **, struct mount *,
 				struct vnode *, struct vnode *,
diff --git a/sys/fs/union/union_vnops.c b/sys/fs/union/union_vnops.c
index 0a25ac6c232b..6759314faab7 100644
--- a/sys/fs/union/union_vnops.c
+++ b/sys/fs/union/union_vnops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: union_vnops.c,v 1.24 2007/12/08 19:29:46 pooka Exp $	*/
+/*	$NetBSD: union_vnops.c,v 1.25 2008/01/02 11:48:48 ad Exp $	*/
 
 /*
  * Copyright (c) 1992, 1993, 1994, 1995
@@ -72,7 +72,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: union_vnops.c,v 1.24 2007/12/08 19:29:46 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: union_vnops.c,v 1.25 2008/01/02 11:48:48 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -1093,7 +1093,7 @@ union_revoke(v)
 		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
 	if (LOWERVP(vp))
 		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
-	vgone(vp);
+	vgone(vp);	/* XXXAD?? */
 	return (0);
 }
 
@@ -1619,6 +1619,7 @@ union_inactive(v)
 	struct vop_inactive_args /* {
 		const struct vnodeop_desc *a_desc;
 		struct vnode *a_vp;
+		bool *a_recycle;
 	} */ *ap = v;
 	struct vnode *vp = ap->a_vp;
 	struct union_node *un = VTOUNION(vp);
@@ -1644,11 +1645,9 @@ union_inactive(v)
 		un->un_dircache = 0;
 	}
 
+	*ap->a_recycle = ((un->un_flags & UN_CACHED) == 0);
 	VOP_UNLOCK(vp, 0);
 
-	if ((un->un_flags & UN_CACHED) == 0)
-		vgone(vp);
-
 	return (0);
 }
 
@@ -1677,9 +1676,6 @@ union_lock(v)
 	int flags = ap->a_flags;
 	struct union_node *un;
 	int error;
-#ifdef DIAGNOSTIC
-	int drain = 0;
-#endif
 
 	/* XXX unionfs can't handle shared locks yet */
 	if ((flags & LK_TYPE_MASK) == LK_SHARED) {
@@ -1699,30 +1695,6 @@ union_lock(v)
 	flags &= ~LK_INTERLOCK;
 
 	un = VTOUNION(vp);
-#ifdef DIAGNOSTIC
-	if (un->un_flags & (UN_DRAINING|UN_DRAINED)) {
-		if (un->un_flags & UN_DRAINED)
-			panic("union: %p: warning: locking decommissioned lock", vp);
-		if ((flags & LK_TYPE_MASK) != LK_RELEASE)
-			panic("union: %p: non-release on draining lock: %d",
-			    vp, flags & LK_TYPE_MASK);
-		un->un_flags &= ~UN_DRAINING;
-		if ((flags & LK_REENABLE) == 0)
-			un->un_flags |= UN_DRAINED;
-	}
-#endif
-
-	/*
-	 * Don't pass DRAIN through to sub-vnode lock; keep track of
-	 * DRAIN state at this level, and just get an exclusive lock
-	 * on the underlying vnode.
-	 */
-	if ((flags & LK_TYPE_MASK) == LK_DRAIN) {
-#ifdef DIAGNOSTIC
-		drain = 1;
-#endif
-		flags = LK_EXCLUSIVE | (flags & ~LK_TYPE_MASK);
-	}
 start:
 	un = VTOUNION(vp);
 
@@ -1764,8 +1736,6 @@ start:
 		un->un_pid = curproc->p_pid;
 	else
 		un->un_pid = -1;
-	if (drain)
-		un->un_flags |= UN_DRAINING;
 #endif
 
 	un->un_flags |= UN_LOCKED;
@@ -1799,8 +1769,6 @@ union_unlock(v)
 	if (curproc && un->un_pid != curproc->p_pid &&
 			curproc->p_pid > -1 && un->un_pid > -1)
 		panic("union: unlocking other process's union node");
-	if (un->un_flags & UN_DRAINED)
-		panic("union: %p: warning: unlocking decommissioned lock", ap->a_vp);
 #endif
 
 	un->un_flags &= ~UN_LOCKED;
@@ -1817,10 +1785,6 @@ union_unlock(v)
 
 #ifdef DIAGNOSTIC
 	un->un_pid = 0;
-	if (un->un_flags & UN_DRAINING) {
-		un->un_flags |= UN_DRAINED;
-		un->un_flags &= ~UN_DRAINING;
-	}
 #endif
 	genfs_nounlock(ap);
 
@@ -1984,8 +1948,8 @@ union_getpages(v)
 		return EBUSY;
 	}
 	ap->a_vp = OTHERVP(vp);
-	simple_unlock(&vp->v_interlock);
-	simple_lock(&ap->a_vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
+	mutex_enter(&ap->a_vp->v_interlock);
 	error = VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);
 	return error;
 }
@@ -2008,11 +1972,11 @@ union_putpages(v)
 	 */
 
 	ap->a_vp = OTHERVP(vp);
-	simple_unlock(&vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
+	mutex_enter(&ap->a_vp->v_interlock);
 	if (ap->a_flags & PGO_RECLAIM) {
 		return 0;
 	}
-	simple_lock(&ap->a_vp->v_interlock);
 	error = VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);
 	return error;
 }
diff --git a/sys/kern/exec_subr.c b/sys/kern/exec_subr.c
index 6e8dfd7017d2..f2fc337cd493 100644
--- a/sys/kern/exec_subr.c
+++ b/sys/kern/exec_subr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: exec_subr.c,v 1.57 2007/12/26 22:11:48 christos Exp $	*/
+/*	$NetBSD: exec_subr.c,v 1.58 2008/01/02 11:48:48 ad Exp $	*/
 
 /*
  * Copyright (c) 1993, 1994, 1996 Christopher G. Demetriou
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: exec_subr.c,v 1.57 2007/12/26 22:11:48 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: exec_subr.c,v 1.58 2008/01/02 11:48:48 ad Exp $");
 
 #include "opt_pax.h"
 
@@ -191,10 +191,10 @@ vmcmd_map_pagedvn(struct lwp *l, struct exec_vmcmd *cmd)
 
 	if ((vp->v_vflag & VV_MAPPED) == 0) {
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
-		simple_lock(&vp->v_interlock);
+		mutex_enter(&vp->v_interlock);
 		vp->v_vflag |= VV_MAPPED;
 		vp->v_iflag |= VI_MAPPED;
-		simple_unlock(&vp->v_interlock);
+		mutex_exit(&vp->v_interlock);
 		VOP_UNLOCK(vp, 0);
 	}
 
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index c40c12c05a10..6b2431410ffc 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: init_main.c,v 1.335 2007/12/31 15:32:10 ad Exp $	*/
+/*	$NetBSD: init_main.c,v 1.336 2008/01/02 11:48:48 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
@@ -71,7 +71,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.335 2007/12/31 15:32:10 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.336 2008/01/02 11:48:48 ad Exp $");
 
 #include "opt_ipsec.h"
 #include "opt_ntp.h"
@@ -423,6 +423,9 @@ main(void)
 	tty_init();
 	ttyldisc_init();
 
+	/* Initialize the buffer cache, part 2. */
+	bufinit2();
+
 	/* Initialize the disk wedge subsystem. */
 	dkwedge_init();
 
@@ -639,17 +642,18 @@ main(void)
 
 	/* Create the pageout daemon kernel thread. */
 	uvm_swap_init();
-	if (kthread_create(PRI_PGDAEMON, 0, NULL, uvm_pageout,
+	if (kthread_create(PRI_PGDAEMON, KTHREAD_MPSAFE, NULL, uvm_pageout,
 	    NULL, NULL, "pgdaemon"))
 		panic("fork pagedaemon");
 
 	/* Create the filesystem syncer kernel thread. */
-	if (kthread_create(PRI_IOFLUSH, 0, NULL, sched_sync, NULL, NULL, "ioflush"))
+	if (kthread_create(PRI_IOFLUSH, KTHREAD_MPSAFE, NULL, sched_sync,
+	    NULL, NULL, "ioflush"))
 		panic("fork syncer");
 
 	/* Create the aiodone daemon kernel thread. */
 	if (workqueue_create(&uvm.aiodone_queue, "aiodoned",
-	    uvm_aiodone_worker, NULL, PRI_VM, IPL_BIO, 0))
+	    uvm_aiodone_worker, NULL, PRI_VM, IPL_NONE, WQ_MPSAFE))
 		panic("fork aiodoned");
 
 	vmem_rehash_start();
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 94c018c425c8..7caaa0949b44 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_exec.c,v 1.263 2007/12/31 15:32:10 ad Exp $	*/
+/*	$NetBSD: kern_exec.c,v 1.264 2008/01/02 11:48:49 ad Exp $	*/
 
 /*-
  * Copyright (C) 1993, 1994, 1996 Christopher G. Demetriou
@@ -33,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.263 2007/12/31 15:32:10 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.264 2008/01/02 11:48:49 ad Exp $");
 
 #include "opt_ktrace.h"
 #include "opt_syscall_debug.h"
@@ -1078,6 +1078,7 @@ execve1(struct lwp *l, const char *path, char * const *args,
 		vrele(pack.ep_interp);
 
 	/* Acquire the sched-state mutex (exit1() will release it). */
+	KERNEL_LOCK(1, NULL);	/* XXXSMP */
 	mutex_enter(&p->p_smutex);
 	exit1(l, W_EXITCODE(error, SIGABRT));
 
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index 41f31332f981..f23f7c58dc1c 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_exit.c,v 1.197 2007/12/31 15:32:11 ad Exp $	*/
+/*	$NetBSD: kern_exit.c,v 1.198 2008/01/02 11:48:49 ad Exp $	*/
 
 /*-
  * Copyright (c) 1998, 1999, 2006, 2007 The NetBSD Foundation, Inc.
@@ -74,7 +74,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.197 2007/12/31 15:32:11 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.198 2008/01/02 11:48:49 ad Exp $");
 
 #include "opt_ktrace.h"
 #include "opt_perfctrs.h"
@@ -173,6 +173,7 @@ sys_exit(struct lwp *l, const struct sys_exit_args *uap, register_t *retval)
 	struct proc *p = l->l_proc;
 
 	/* Don't call exit1() multiple times in the same process. */
+	KERNEL_LOCK(1, NULL);
 	mutex_enter(&p->p_smutex);
 	if (p->p_sflag & PS_WEXIT) {
 		mutex_exit(&p->p_smutex);
@@ -375,15 +376,11 @@ exit1(struct lwp *l, int rv)
 				tp->t_pgrp = NULL;
 				tp->t_session = NULL;
 				mutex_spin_exit(&tty_lock);
-				SESSRELE(sp);
 				mutex_exit(&proclist_lock);
 				(void) ttywait(tp);
 				mutex_enter(&proclist_lock);
 
-				/*
-				 * The tty could have been revoked
-				 * if we blocked.
-				 */
+				/* The tty could have been revoked. */
 				vprevoke = sp->s_ttyvp;
 			} else
 				mutex_spin_exit(&tty_lock);
@@ -398,9 +395,12 @@ exit1(struct lwp *l, int rv)
 		sp->s_leader = NULL;
 
 		if (vprevoke != NULL || vprele != NULL) {
-			mutex_exit(&proclist_lock);
-			if (vprevoke != NULL)
+			if (vprevoke != NULL) {
+				SESSRELE(sp);
+				mutex_exit(&proclist_lock);
 				VOP_REVOKE(vprevoke, REVOKEALL);
+			} else
+				mutex_exit(&proclist_lock);
 			if (vprele != NULL)
 				vrele(vprele);
 			mutex_enter(&proclist_lock);
@@ -423,6 +423,7 @@ exit1(struct lwp *l, int rv)
 	KNOTE(&p->p_klist, NOTE_EXIT);
 
 
+
 #if PERFCTRS
 	/*
 	 * Save final PMC information in parent process & clean up.
@@ -678,9 +679,10 @@ do_sys_wait(struct lwp *l, int *pid, int *status, int options,
 	struct proc	*child;
 	int		error;
 
+	KERNEL_LOCK(1, NULL);		/* XXXSMP */
 	mutex_enter(&proclist_lock);
-
 	error = find_stopped_child(l->l_proc, *pid, options, &child, status);
+	KERNEL_UNLOCK_ONE(NULL);	/* XXXSMP */
 
 	if (child == NULL) {
 		mutex_exit(&proclist_lock);
@@ -696,9 +698,7 @@ do_sys_wait(struct lwp *l, int *pid, int *status, int options,
 		if (options & WNOWAIT)
 			mutex_exit(&proclist_lock);
 		else {
-			KERNEL_LOCK(1, l);		/* XXXSMP */
 			proc_free(child, ru);
-			KERNEL_UNLOCK_ONE(l);		/* XXXSMP */
 		}
 	} else {
 		/* Child state must have been SSTOP. */
@@ -906,7 +906,9 @@ proc_free(struct proc *p, struct rusage *ru)
 				kpsignal(parent, &ksi, NULL);
 				mutex_exit(&proclist_mutex);
 			}
+			KERNEL_LOCK(1, NULL);		/* XXXSMP */
 			cv_broadcast(&parent->p_waitcv);
+			KERNEL_UNLOCK_ONE(NULL);	/* XXXSMP */
 			mutex_exit(&proclist_lock);
 			return;
 		}
@@ -990,7 +992,7 @@ proc_free(struct proc *p, struct rusage *ru)
 	if (p->p_textvp)
 		vrele(p->p_textvp);
 
-	mutex_destroy(&p->p_raslock);
+	mutex_destroy(&p->p_auxlock);
 	mutex_destroy(&p->p_mutex);
 	mutex_destroy(&p->p_stmutex);
 	mutex_destroy(&p->p_smutex);
diff --git a/sys/kern/kern_fileassoc.c b/sys/kern/kern_fileassoc.c
index deea240c91fa..8f11656cd174 100644
--- a/sys/kern/kern_fileassoc.c
+++ b/sys/kern/kern_fileassoc.c
@@ -1,4 +1,4 @@
-/* $NetBSD: kern_fileassoc.c,v 1.29 2007/05/15 19:47:45 elad Exp $ */
+/* $NetBSD: kern_fileassoc.c,v 1.30 2008/01/02 11:48:49 ad Exp $ */
 
 /*-
  * Copyright (c) 2006 Elad Efrat <elad@NetBSD.org>
@@ -28,7 +28,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_fileassoc.c,v 1.29 2007/05/15 19:47:45 elad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_fileassoc.c,v 1.30 2008/01/02 11:48:49 ad Exp $");
 
 #include "opt_fileassoc.h"
 
@@ -528,15 +528,21 @@ fileassoc_file_delete(struct vnode *vp)
 	struct fileassoc_table *tbl;
 	struct fileassoc_hash_entry *mhe;
 
+	KERNEL_LOCK(1, NULL);
+
 	mhe = fileassoc_file_lookup(vp, NULL);
-	if (mhe == NULL)
+	if (mhe == NULL) {
+		KERNEL_UNLOCK_ONE(NULL);
 		return (ENOENT);
+	}
 
 	file_free(mhe);
 
 	tbl = fileassoc_table_lookup(vp->v_mount);
 	--(tbl->hash_used); /* XXX gc? */
 
+	KERNEL_UNLOCK_ONE(NULL);
+
 	return (0);
 }
 
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index 70d927715937..29ab8ac3d57b 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_fork.c,v 1.154 2007/12/31 15:32:11 ad Exp $	*/
+/*	$NetBSD: kern_fork.c,v 1.155 2008/01/02 11:48:49 ad Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2001, 2004, 2006, 2007 The NetBSD Foundation, Inc.
@@ -74,7 +74,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_fork.c,v 1.154 2007/12/31 15:32:11 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_fork.c,v 1.155 2008/01/02 11:48:49 ad Exp $");
 
 #include "opt_ktrace.h"
 #include "opt_multiprocessor.h"
@@ -315,7 +315,7 @@ fork1(struct lwp *l1, int flags, int exitsig, void *stack, size_t stacksize,
 	/* XXX p_smutex can be IPL_VM except for audio drivers */
 	mutex_init(&p2->p_smutex, MUTEX_DEFAULT, IPL_SCHED);
 	mutex_init(&p2->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
-	mutex_init(&p2->p_raslock, MUTEX_DEFAULT, IPL_NONE);
+	mutex_init(&p2->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
 	mutex_init(&p2->p_mutex, MUTEX_DEFAULT, IPL_NONE);
 	rw_init(&p2->p_reflock);
 	cv_init(&p2->p_waitcv, "wait");
diff --git a/sys/kern/kern_kthread.c b/sys/kern/kern_kthread.c
index 840497d2c945..6f60f4553097 100644
--- a/sys/kern/kern_kthread.c
+++ b/sys/kern/kern_kthread.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_kthread.c,v 1.19 2007/11/06 00:42:41 ad Exp $	*/
+/*	$NetBSD: kern_kthread.c,v 1.20 2008/01/02 11:48:50 ad Exp $	*/
 
 /*-
  * Copyright (c) 1998, 1999, 2007 The NetBSD Foundation, Inc.
@@ -38,7 +38,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_kthread.c,v 1.19 2007/11/06 00:42:41 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_kthread.c,v 1.20 2008/01/02 11:48:50 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -116,8 +116,8 @@ kthread_create(pri_t pri, int flag, struct cpu_info *ci,
 	}
 	if ((flag & KTHREAD_INTR) != 0)
 		l->l_pflag |= LP_INTR;
-	if ((flag & KTHREAD_MPSAFE) != 0)
-		l->l_pflag |= LP_MPSAFE;
+	if ((flag & KTHREAD_MPSAFE) == 0)
+		l->l_pflag &= ~LP_MPSAFE;
 
 	/*
 	 * Set the new LWP running, unless the caller has requested
diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c
index 29d5906e6ede..fd8309e8a7b5 100644
--- a/sys/kern/kern_ktrace.c
+++ b/sys/kern/kern_ktrace.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_ktrace.c,v 1.132 2007/12/22 11:38:54 dsl Exp $	*/
+/*	$NetBSD: kern_ktrace.c,v 1.133 2008/01/02 11:48:50 ad Exp $	*/
 
 /*-
  * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
@@ -68,7 +68,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_ktrace.c,v 1.132 2007/12/22 11:38:54 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_ktrace.c,v 1.133 2008/01/02 11:48:50 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -412,11 +412,9 @@ void
 ktefree(struct ktrace_entry *kte)
 {
 
-	KERNEL_LOCK(1, curlwp);			/* XXXSMP */
 	if (kte->kte_buf != kte->kte_space)
 		kmem_free(kte->kte_buf, kte->kte_bufsz);
 	pool_put(&kte_pool, kte);
-	KERNEL_UNLOCK_ONE(curlwp);		/* XXXSMP */
 }
 
 /*
@@ -500,18 +498,15 @@ ktealloc(struct ktrace_entry **ktep, void **bufp, lwp_t *l, int type,
 	if (ktrenter(l))
 		return EAGAIN;
 
-	KERNEL_LOCK(1, l);			/* XXXSMP */
 	kte = pool_get(&kte_pool, PR_WAITOK);
 	if (sz > sizeof(kte->kte_space)) {
 		if ((buf = kmem_alloc(sz, KM_SLEEP)) == NULL) {
 			pool_put(&kte_pool, kte);
-			KERNEL_UNLOCK_ONE(l);	/* XXXSMP */
 			ktrexit(l);
 			return ENOMEM;
 		}
 	} else
 		buf = kte->kte_space;
-	KERNEL_UNLOCK_ONE(l);			/* XXXSMP */
 
 	kte->kte_bufsz = sz;
 	kte->kte_buf = buf;
@@ -1059,7 +1054,7 @@ ktrace_common(lwp_t *curl, int ops, int facs, int pid, struct file *fp)
 		if (ktd == NULL) {
 			ktd = kmem_alloc(sizeof(*ktd), KM_SLEEP);
 			TAILQ_INIT(&ktd->ktd_queue);
-			callout_init(&ktd->ktd_wakch, 0);
+			callout_init(&ktd->ktd_wakch, CALLOUT_MPSAFE);
 			cv_init(&ktd->ktd_cv, "ktrwait");
 			cv_init(&ktd->ktd_sync_cv, "ktrsync");
 			ktd->ktd_flags = 0;
@@ -1081,7 +1076,7 @@ ktrace_common(lwp_t *curl, int ops, int facs, int pid, struct file *fp)
 			if (fp->f_type == DTYPE_PIPE)
 				ktd->ktd_flags |= KTDF_INTERACTIVE;
 
-			error = kthread_create(PRI_NONE, 0, NULL,
+			error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL,
 			    ktrace_thread, ktd, &ktd->ktd_lwp, "ktrace");
 			if (error != 0) {
 				kmem_free(ktd, sizeof(*ktd));
diff --git a/sys/kern/kern_lock.c b/sys/kern/kern_lock.c
index cde8509d10ef..2fb157226857 100644
--- a/sys/kern/kern_lock.c
+++ b/sys/kern/kern_lock.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_lock.c,v 1.129 2007/12/06 17:05:08 ad Exp $	*/
+/*	$NetBSD: kern_lock.c,v 1.130 2008/01/02 11:48:50 ad Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2006, 2007 The NetBSD Foundation, Inc.
@@ -76,7 +76,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_lock.c,v 1.129 2007/12/06 17:05:08 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_lock.c,v 1.130 2008/01/02 11:48:50 ad Exp $");
 
 #include "opt_multiprocessor.h"
 
@@ -108,6 +108,14 @@ int	lock_debug_syslog = 0;	/* defaults to printf, but can be patched */
 bool	kernel_lock_dodebug;
 __cpu_simple_lock_t kernel_lock;
 
+#ifdef LOCKDEBUG
+static lockops_t lockmgr_lockops = {
+	"lockmgr",
+	1,
+	(void *)nullop
+};
+#endif
+
 #if defined(LOCKDEBUG) || defined(DIAGNOSTIC) /* { */
 #define	COUNT(lkp, l, cpu_id, x)	(l)->l_locks += (x)
 #else
@@ -140,9 +148,9 @@ acquire(struct lock **lkpp, int *s, int extflags,
 			lkp->lk_flags |= LK_WAIT_NONZERO;
 		}
 		LOCKSTAT_START_TIMER(lsflag, slptime);
-		error = ltsleep(drain ? (void *)&lkp->lk_flags : (void *)lkp,
+		error = mtsleep(drain ? (void *)&lkp->lk_flags : (void *)lkp,
 		    lkp->lk_prio, lkp->lk_wmesg, lkp->lk_timo,
-		    &lkp->lk_interlock);
+		    __UNVOLATILE(&lkp->lk_interlock));
 		LOCKSTAT_STOP_TIMER(lsflag, slptime);
 		LOCKSTAT_EVENT_RA(lsflag, (void *)(uintptr_t)lkp,
 		    LB_LOCKMGR | LB_SLEEP1, 1, slptime, ra);
@@ -237,20 +245,26 @@ lockinit(struct lock *lkp, pri_t prio, const char *wmesg, int timo, int flags)
 
 	memset(lkp, 0, sizeof(struct lock));
 	lkp->lk_flags = flags & LK_EXTFLG_MASK;
-	simple_lock_init(&lkp->lk_interlock);
+	mutex_init(&lkp->lk_interlock, MUTEX_DEFAULT, IPL_NONE);
 	lkp->lk_lockholder = LK_NOPROC;
 	lkp->lk_prio = prio;
 	lkp->lk_timo = timo;
 	lkp->lk_wmesg = wmesg;
 	lkp->lk_lock_addr = 0;
 	lkp->lk_unlock_addr = 0;
+
+	if (LOCKDEBUG_ALLOC(lkp, &lockmgr_lockops,
+	    (uintptr_t)__builtin_return_address(0))) {
+		lkp->lk_flags |= LK_DODEBUG;
+	}
 }
 
 void
 lockdestroy(struct lock *lkp)
 {
 
-	/* nothing yet */
+	LOCKDEBUG_FREE(((lkp->lk_flags & LK_DODEBUG) != 0), lkp);
+	mutex_destroy(&lkp->lk_interlock);
 }
 
 /*
@@ -275,7 +289,7 @@ lockstatus(struct lock *lkp)
 		lid = l->l_lid;
 	}
 
-	simple_lock(&lkp->lk_interlock);
+	mutex_enter(&lkp->lk_interlock);
 	if (lkp->lk_exclusivecount != 0) {
 		if (WEHOLDIT(lkp, pid, lid, cpu_num))
 			lock_type = LK_EXCLUSIVE;
@@ -285,7 +299,7 @@ lockstatus(struct lock *lkp)
 		lock_type = LK_SHARED;
 	else if (lkp->lk_flags & LK_WANT_EXCL)
 		lock_type = LK_EXCLOTHER;
-	simple_unlock(&lkp->lk_interlock);
+	mutex_exit(&lkp->lk_interlock);
 	return (lock_type);
 }
 
@@ -312,7 +326,7 @@ lockstatus(struct lock *lkp)
  * accepted shared locks to go away.
  */
 int
-lockmgr(struct lock *lkp, u_int flags, struct simplelock *interlkp)
+lockmgr(struct lock *lkp, u_int flags, kmutex_t *interlkp)
 {
 	int error;
 	pid_t pid;
@@ -329,9 +343,9 @@ lockmgr(struct lock *lkp, u_int flags, struct simplelock *interlkp)
 	KASSERT((flags & LK_RETRY) == 0);
 	KASSERT((l->l_pflag & LP_INTR) == 0 || panicstr != NULL);
 
-	simple_lock(&lkp->lk_interlock);
+	mutex_enter(&lkp->lk_interlock);
 	if (flags & LK_INTERLOCK)
-		simple_unlock(interlkp);
+		mutex_exit(interlkp);
 	extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK;
 
 	if (l == NULL) {
@@ -438,9 +452,6 @@ lockmgr(struct lock *lkp, u_int flags, struct simplelock *interlkp)
 					lockpanic(lkp, "lockmgr: locking against myself");
 			}
 			lkp->lk_exclusivecount++;
-			if (extflags & LK_SETRECURSE &&
-			    lkp->lk_recurselevel == 0)
-				lkp->lk_recurselevel = lkp->lk_exclusivecount;
 			COUNT(lkp, l, cpu_num, 1);
 			break;
 		}
@@ -479,8 +490,6 @@ lockmgr(struct lock *lkp, u_int flags, struct simplelock *interlkp)
 		if (lkp->lk_exclusivecount != 0)
 			lockpanic(lkp, "lockmgr: non-zero exclusive count");
 		lkp->lk_exclusivecount = 1;
-		if (extflags & LK_SETRECURSE)
-			lkp->lk_recurselevel = 1;
 		COUNT(lkp, l, cpu_num, 1);
 		break;
 
@@ -549,14 +558,11 @@ lockmgr(struct lock *lkp, u_int flags, struct simplelock *interlkp)
 		lkp->lk_lock_addr = RETURN_ADDRESS;
 #endif
 		lkp->lk_exclusivecount = 1;
-		/* XXX unlikely that we'd want this */
-		if (extflags & LK_SETRECURSE)
-			lkp->lk_recurselevel = 1;
 		COUNT(lkp, l, cpu_num, 1);
 		break;
 
 	default:
-		simple_unlock(&lkp->lk_interlock);
+		mutex_exit(&lkp->lk_interlock);
 		lockpanic(lkp, "lockmgr: unknown locktype request %d",
 		    flags & LK_TYPE_MASK);
 		/* NOTREACHED */
@@ -575,7 +581,7 @@ lockmgr(struct lock *lkp, u_int flags, struct simplelock *interlkp)
 	if (error && lock_shutdown_noblock)
 		lockpanic(lkp, "lockmgr: deadlock (see previous panic)");
 
-	simple_unlock(&lkp->lk_interlock);
+	mutex_exit(&lkp->lk_interlock);
 	return (error);
 }
 
diff --git a/sys/kern/kern_lwp.c b/sys/kern/kern_lwp.c
index 9802c5bc4275..826f00a258e2 100644
--- a/sys/kern/kern_lwp.c
+++ b/sys/kern/kern_lwp.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_lwp.c,v 1.87 2007/12/26 16:01:36 ad Exp $	*/
+/*	$NetBSD: kern_lwp.c,v 1.88 2008/01/02 11:48:50 ad Exp $	*/
 
 /*-
  * Copyright (c) 2001, 2006, 2007 The NetBSD Foundation, Inc.
@@ -205,7 +205,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.87 2007/12/26 16:01:36 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.88 2008/01/02 11:48:50 ad Exp $");
 
 #include "opt_ddb.h"
 #include "opt_multiprocessor.h"
@@ -581,6 +581,7 @@ lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, bool inmem, int flags,
 	l2->l_mutex = l1->l_cpu->ci_schedstate.spc_mutex;
 	l2->l_cpu = l1->l_cpu;
 	l2->l_flag = inmem ? LW_INMEM : 0;
+	l2->l_pflag = LP_MPSAFE;
 
 	if (p2->p_flag & PK_SYSTEM) {
 		/*
@@ -706,6 +707,7 @@ lwp_exit(struct lwp *l)
 	mutex_enter(&p->p_smutex);
 	if (p->p_nlwps - p->p_nzlwps == 1) {
 		KASSERT(current == true);
+		/* XXXSMP kernel_lock not held */
 		exit1(l, 0);
 		/* NOTREACHED */
 	}
@@ -913,8 +915,6 @@ lwp_free(struct lwp *l, bool recycle, bool last)
 	 *
 	 * We don't recycle the VM resources at this time.
 	 */
-	KERNEL_LOCK(1, curlwp);		/* XXXSMP */
-
 	if (l->l_lwpctl != NULL)
 		lwp_ctl_free(l);
 	sched_lwp_exit(l);
@@ -929,7 +929,6 @@ lwp_free(struct lwp *l, bool recycle, bool last)
 	KASSERT(l->l_inheritedprio == -1);
 	if (!recycle)
 		pool_cache_put(lwp_cache, l);
-	KERNEL_UNLOCK_ONE(curlwp);	/* XXXSMP */
 }
 
 /*
@@ -1089,11 +1088,8 @@ lwp_update_creds(struct lwp *l)
 	kauth_cred_hold(p->p_cred);
 	l->l_cred = p->p_cred;
 	mutex_exit(&p->p_mutex);
-	if (oc != NULL) {
-		KERNEL_LOCK(1, l);	/* XXXSMP */
+	if (oc != NULL)
 		kauth_cred_free(oc);
-		KERNEL_UNLOCK_ONE(l);	/* XXXSMP */
-	}
 }
 
 /*
@@ -1234,12 +1230,10 @@ lwp_userret(struct lwp *l)
 		 */
 		if ((l->l_flag & (LW_PENDSIG | LW_WCORE | LW_WEXIT)) ==
 		    LW_PENDSIG) {
-			KERNEL_LOCK(1, l);	/* XXXSMP pool_put() below */
 			mutex_enter(&p->p_smutex);
 			while ((sig = issignal(l)) != 0)
 				postsig(sig);
 			mutex_exit(&p->p_smutex);
-			KERNEL_UNLOCK_LAST(l);	/* XXXSMP */
 		}
 
 		/*
@@ -1264,7 +1258,6 @@ lwp_userret(struct lwp *l)
 
 		/* Process is exiting. */
 		if ((l->l_flag & LW_WEXIT) != 0) {
-			KERNEL_LOCK(1, l);
 			lwp_exit(l);
 			KASSERT(0);
 			/* NOTREACHED */
diff --git a/sys/kern/kern_physio.c b/sys/kern/kern_physio.c
index 09243ce7202a..eab604ce4294 100644
--- a/sys/kern/kern_physio.c
+++ b/sys/kern/kern_physio.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_physio.c,v 1.85 2007/11/06 00:42:42 ad Exp $	*/
+/*	$NetBSD: kern_physio.c,v 1.86 2008/01/02 11:48:51 ad Exp $	*/
 
 /*-
  * Copyright (c) 1982, 1986, 1990, 1993
@@ -71,7 +71,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_physio.c,v 1.85 2007/11/06 00:42:42 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_physio.c,v 1.86 2008/01/02 11:48:51 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -116,7 +116,7 @@ struct physio_stat {
 };
 
 /* abuse these flags of struct buf */
-#define	B_DONTFREE	B_AGE
+#define	BC_DONTFREE	BC_AGE
 
 /*
  * allocate a buffer structure for use in physical I/O.
@@ -126,9 +126,9 @@ getphysbuf(void)
 {
 	struct buf *bp;
 
-	bp = getiobuf();
+	bp = getiobuf(NULL, true);
 	bp->b_error = 0;
-	bp->b_flags = B_BUSY;
+	bp->b_cflags = BC_BUSY;
 	return(bp);
 }
 
@@ -139,12 +139,12 @@ static void
 putphysbuf(struct buf *bp)
 {
 
-	if ((bp->b_flags & B_DONTFREE) != 0) {
+	if ((bp->b_cflags & BC_DONTFREE) != 0) {
 		return;
 	}
 
-	if (__predict_false(bp->b_flags & B_WANTED))
-		panic("putphysbuf: private buf B_WANTED");
+	if (__predict_false(bp->b_cflags & BC_WANTED))
+		panic("putphysbuf: private buf BC_WANTED");
 	putiobuf(bp);
 }
 
@@ -237,7 +237,7 @@ physio_init(void)
 	KASSERT(physio_workqueue == NULL);
 
 	error = workqueue_create(&physio_workqueue, "physiod",
-	    physio_done, NULL, PRI_BIO, IPL_BIO, 0);
+	    physio_done, NULL, PRI_BIO, IPL_BIO, WQ_MPSAFE);
 
 	return error;
 }
@@ -257,8 +257,7 @@ physio(void (*strategy)(struct buf *), struct buf *obp, dev_t dev, int flags,
 	struct iovec *iovp;
 	struct lwp *l = curlwp;
 	struct proc *p = l->l_proc;
-	int i, s;
-	int error;
+	int i, error;
 	struct buf *bp = NULL;
 	struct physio_stat *ps;
 	int concurrency = PHYSIO_CONCURRENCY - 1;
@@ -285,24 +284,12 @@ physio(void (*strategy)(struct buf *), struct buf *obp, dev_t dev, int flags,
 	/* Make sure we have a buffer, creating one if necessary. */
 	if (obp != NULL) {
 		/* [raise the processor priority level to splbio;] */
-		s = splbio();
-		simple_lock(&obp->b_interlock);
-
-		/* [while the buffer is marked busy] */
-		while (obp->b_flags & B_BUSY) {
-			/* [mark the buffer wanted] */
-			obp->b_flags |= B_WANTED;
-			/* [wait until the buffer is available] */
-			ltsleep(obp, PRIBIO+1, "physbuf", 0, &obp->b_interlock);
-		}
-
+		mutex_enter(&bufcache_lock);
+		while (bbusy(obp, false, 0) == EPASSTHROUGH)
+			;
 		/* Mark it busy, so nobody else will use it. */
-		obp->b_flags = B_BUSY | B_DONTFREE;
-
-		/* [lower the priority level] */
-		simple_unlock(&obp->b_interlock);
-		splx(s);
-
+		obp->b_cflags |= BC_DONTFREE;
+		mutex_exit(&bufcache_lock);
 		concurrency = 0; /* see "XXXkludge" comment below */
 	}
 
@@ -334,7 +321,6 @@ physio(void (*strategy)(struct buf *), struct buf *obp, dev_t dev, int flags,
 			bp->b_dev = dev;
 			bp->b_proc = p;
 			bp->b_private = ps;
-			bp->b_vp = NULL;
 
 			/*
 			 * [mark the buffer busy for physical I/O]
@@ -343,8 +329,9 @@ physio(void (*strategy)(struct buf *), struct buf *obp, dev_t dev, int flags,
 			 * "Set by physio for raw transfers.", in addition
 			 * to the "busy" and read/write flag.)
 			 */
-			bp->b_flags = (bp->b_flags & B_DONTFREE) |
-			    B_BUSY | B_PHYS | B_RAW | B_CALL | flags;
+			bp->b_oflags = 0;
+			bp->b_cflags = (bp->b_cflags & BC_DONTFREE) | BC_BUSY;
+			bp->b_flags = flags | B_PHYS | B_RAW;
 			bp->b_iodone = physio_biodone;
 
 			/* [set up the buffer for a maximum-sized transfer] */
@@ -442,23 +429,19 @@ done_locked:
 	 * Also, if we had to steal it, give it back.
 	 */
 	if (obp != NULL) {
-		KASSERT((obp->b_flags & B_BUSY) != 0);
-		KASSERT((obp->b_flags & B_DONTFREE) != 0);
+		KASSERT((obp->b_cflags & BC_BUSY) != 0);
+		KASSERT((obp->b_cflags & BC_DONTFREE) != 0);
 
 		/*
 		 * [if another process is waiting for the raw I/O buffer,
 		 *    wake up processes waiting to do physical I/O;
 		 */
-		s = splbio();
-		simple_lock(&obp->b_interlock);
-		obp->b_flags &=
-		    ~(B_BUSY | B_PHYS | B_RAW | B_CALL | B_DONTFREE);
-		if ((obp->b_flags & B_WANTED) != 0) {
-			obp->b_flags &= ~B_WANTED;
-			wakeup(obp);
-		}
-		simple_unlock(&obp->b_interlock);
-		splx(s);
+		mutex_enter(&bufcache_lock);
+		obp->b_cflags &= ~(BC_DONTFREE | BC_BUSY | BC_WANTED);
+		obp->b_flags &= ~(B_PHYS | B_RAW);
+		obp->b_iodone = NULL;
+		cv_broadcast(&obp->b_busy);
+		mutex_exit(&bufcache_lock);
 	}
 	uvm_lwp_rele(l);
 
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index f6767c898788..a2f55e0d923b 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_proc.c,v 1.128 2007/12/26 16:01:36 ad Exp $	*/
+/*	$NetBSD: kern_proc.c,v 1.129 2008/01/02 11:48:51 ad Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2006, 2007 The NetBSD Foundation, Inc.
@@ -69,7 +69,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.128 2007/12/26 16:01:36 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.129 2008/01/02 11:48:51 ad Exp $");
 
 #include "opt_kstack.h"
 #include "opt_maxuprc.h"
@@ -365,7 +365,7 @@ proc0_init(void)
 
 	mutex_init(&p->p_smutex, MUTEX_DEFAULT, IPL_SCHED);
 	mutex_init(&p->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
-	mutex_init(&p->p_raslock, MUTEX_DEFAULT, IPL_NONE);
+	mutex_init(&p->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
 	mutex_init(&p->p_mutex, MUTEX_DEFAULT, IPL_NONE);
 	mutex_init(&l->l_swaplock, MUTEX_DEFAULT, IPL_NONE);
 
diff --git a/sys/kern/kern_ras.c b/sys/kern/kern_ras.c
index 16e0b6235174..aea8bba4628f 100644
--- a/sys/kern/kern_ras.c
+++ b/sys/kern/kern_ras.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_ras.c,v 1.24 2007/12/20 23:03:09 dsl Exp $	*/
+/*	$NetBSD: kern_ras.c,v 1.25 2008/01/02 11:48:51 ad Exp $	*/
 
 /*-
  * Copyright (c) 2002, 2006, 2007 The NetBSD Foundation, Inc.
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_ras.c,v 1.24 2007/12/20 23:03:09 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_ras.c,v 1.25 2008/01/02 11:48:51 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/lock.h>
@@ -166,7 +166,7 @@ ras_purgeall(void)
 
 	p = curproc;
 
-	mutex_enter(&p->p_raslock);
+	mutex_enter(&p->p_auxlock);
 	if ((rp = p->p_raslist) != NULL) {
 		p->p_raslist = NULL;
 		ras_sync();
@@ -175,7 +175,7 @@ ras_purgeall(void)
 			pool_put(&ras_pool, rp);
 		}
 	}
-	mutex_exit(&p->p_raslock);
+	mutex_exit(&p->p_auxlock);
 
 	return 0;
 }
@@ -211,7 +211,7 @@ ras_install(void *addr, size_t len)
 	nras = 0;
 	p = curproc;
 
-	mutex_enter(&p->p_raslock);
+	mutex_enter(&p->p_auxlock);
 	for (rp = p->p_raslist; rp != NULL; rp = rp->ras_next) {
 		if (++nras >= ras_per_proc) {
 			error = EINVAL;
@@ -226,9 +226,9 @@ ras_install(void *addr, size_t len)
 		newrp->ras_next = p->p_raslist;
 		p->p_raslist = newrp;
 		ras_sync();
-	 	mutex_exit(&p->p_raslock);
+	 	mutex_exit(&p->p_auxlock);
 	} else {
-	 	mutex_exit(&p->p_raslock);
+	 	mutex_exit(&p->p_auxlock);
  		pool_put(&ras_pool, newrp);
 	}
 
@@ -249,7 +249,7 @@ ras_purge(void *addr, size_t len)
 	endaddr = (char *)addr + len;
 	p = curproc;
 
-	mutex_enter(&p->p_raslock);
+	mutex_enter(&p->p_auxlock);
 	link = &p->p_raslist;
 	for (rp = *link; rp != NULL; link = &rp->ras_next, rp = *link) {
 		if (addr == rp->ras_startaddr && endaddr == rp->ras_endaddr)
@@ -258,11 +258,11 @@ ras_purge(void *addr, size_t len)
 	if (rp != NULL) {
 		*link = rp->ras_next;
 		ras_sync();
-		mutex_exit(&p->p_raslock);
+		mutex_exit(&p->p_auxlock);
 		pool_put(&ras_pool, rp);
 		return 0;
 	} else {
-		mutex_exit(&p->p_raslock);
+		mutex_exit(&p->p_auxlock);
 		return ESRCH;
 	}
 }
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index 9335e69f0272..0a2f57735cc2 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_synch.c,v 1.213 2007/12/27 22:13:19 ad Exp $	*/
+/*	$NetBSD: kern_synch.c,v 1.214 2008/01/02 11:48:51 ad Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2004, 2006, 2007 The NetBSD Foundation, Inc.
@@ -75,7 +75,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.213 2007/12/27 22:13:19 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.214 2008/01/02 11:48:51 ad Exp $");
 
 #include "opt_kstack.h"
 #include "opt_lockdebug.h"
@@ -945,7 +945,7 @@ sched_init(void)
 {
 
 	cv_init(&lbolt, "lbolt");
-	callout_init(&sched_pstats_ch, 0);
+	callout_init(&sched_pstats_ch, CALLOUT_MPSAFE);
 	callout_setfunc(&sched_pstats_ch, sched_pstats, NULL);
 	sched_setup();
 	sched_pstats(NULL);
diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c
index 7b214e920d47..e191167ec842 100644
--- a/sys/kern/kern_sysctl.c
+++ b/sys/kern/kern_sysctl.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_sysctl.c,v 1.211 2007/12/20 23:03:09 dsl Exp $	*/
+/*	$NetBSD: kern_sysctl.c,v 1.212 2008/01/02 11:48:52 ad Exp $	*/
 
 /*-
  * Copyright (c) 2003 The NetBSD Foundation, Inc.
@@ -75,7 +75,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_sysctl.c,v 1.211 2007/12/20 23:03:09 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_sysctl.c,v 1.212 2008/01/02 11:48:52 ad Exp $");
 
 #include "opt_defcorename.h"
 #include "ksyms.h"
@@ -304,9 +304,12 @@ sys___sysctl(struct lwp *l, const struct sys___sysctl_args *uap, register_t *ret
 	/*
 	 * wire old so that copyout() is less likely to fail?
 	 */
+	KERNEL_LOCK(1, NULL);			/* XXXSMP */
 	error = sysctl_lock(l, SCARG(uap, old), savelen);
-	if (error)
+	if (error) {
+		KERNEL_UNLOCK_ONE(NULL);	/* XXXSMP */
 		return (error);
+	}
 
 	/*
 	 * do sysctl work (NULL means main built-in default tree)
@@ -320,6 +323,7 @@ sys___sysctl(struct lwp *l, const struct sys___sysctl_args *uap, register_t *ret
 	 * release the sysctl lock
 	 */
 	sysctl_unlock(l);
+	KERNEL_UNLOCK_ONE(NULL);		/* XXXSMP */
 
 	/*
 	 * set caller's oldlen to new value even in the face of an
diff --git a/sys/kern/kern_verifiedexec.c b/sys/kern/kern_verifiedexec.c
index 1a08896ea655..705ca5af34c7 100644
--- a/sys/kern/kern_verifiedexec.c
+++ b/sys/kern/kern_verifiedexec.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_verifiedexec.c,v 1.104 2007/12/08 19:29:48 pooka Exp $	*/
+/*	$NetBSD: kern_verifiedexec.c,v 1.105 2008/01/02 11:48:52 ad Exp $	*/
 
 /*-
  * Copyright (c) 2005, 2006 Elad Efrat <elad@NetBSD.org>
@@ -29,7 +29,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_verifiedexec.c,v 1.104 2007/12/08 19:29:48 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_verifiedexec.c,v 1.105 2008/01/02 11:48:52 ad Exp $");
 
 #include "opt_veriexec.h"
 
@@ -106,6 +106,7 @@ struct veriexec_table_entry {
 
 static int veriexec_verbose;
 int veriexec_strict;
+static int veriexec_bypass = 1;
 
 static char *veriexec_fp_names = NULL;
 static size_t veriexec_name_max = 0;
@@ -668,11 +669,18 @@ veriexec_verify(struct lwp *l, struct vnode *vp, const u_char *name, int flag,
 	struct veriexec_file_entry *vfe;
 	int r;
 
+	if (veriexec_bypass)
+		return 0;
+
+	KERNEL_LOCK(1, NULL);
+
 	r = veriexec_file_verify(l, vp, name, flag, &vfe);
 
 	if (found != NULL)
 		*found = (vfe != NULL) ? true : false;
 
+	KERNEL_UNLOCK_ONE(NULL);
+
 	return (r);
 }
 
@@ -755,9 +763,16 @@ int
 veriexec_removechk(struct lwp *l, struct vnode *vp, const char *pathbuf)
 {
 	struct veriexec_file_entry *vfe;
+	int error;
+
+	if (veriexec_bypass)
+		return 0;
+
+	KERNEL_LOCK(1, NULL);
 
 	vfe = veriexec_get(vp);
 	if (vfe == NULL) {
+		KERNEL_UNLOCK_ONE(NULL);
 		/* Lockdown mode: Deny access to non-monitored files. */
 		if (veriexec_strict >= VERIEXEC_LOCKDOWN)
 			return (EPERM);
@@ -770,9 +785,12 @@ veriexec_removechk(struct lwp *l, struct vnode *vp, const char *pathbuf)
 
 	/* IDS mode: Deny removal of monitored files. */
 	if (veriexec_strict >= VERIEXEC_IDS)
-		return (EPERM);
+		error = EPERM;
+	else
+		error = veriexec_file_delete(l, vp);
 
-	return (veriexec_file_delete(l, vp));
+	KERNEL_UNLOCK_ONE(NULL);
+	return error;
 }
 
 /*
@@ -788,11 +806,17 @@ veriexec_renamechk(struct lwp *l, struct vnode *fromvp, const char *fromname,
 {
 	struct veriexec_file_entry *vfe, *tvfe;
 
+	if (veriexec_bypass)
+		return 0;
+
+	KERNEL_LOCK(1, NULL);
+
 	if (veriexec_strict >= VERIEXEC_LOCKDOWN) {
 		log(LOG_ALERT, "Veriexec: Preventing rename of `%s' to "
 		    "`%s', uid=%u, pid=%u: Lockdown mode.\n", fromname, toname,
 		    kauth_cred_geteuid(l->l_cred), l->l_proc->p_pid);
 
+		KERNEL_UNLOCK_ONE(NULL);
 		return (EPERM);
 	}
 
@@ -810,6 +834,7 @@ veriexec_renamechk(struct lwp *l, struct vnode *fromvp, const char *fromname,
 			    l->l_proc->p_pid, (vfe != NULL && tvfe != NULL) ?
 			    "files" : "file");
 
+			KERNEL_UNLOCK_ONE(NULL);
 			return (EPERM);
 		}
 
@@ -838,6 +863,7 @@ veriexec_renamechk(struct lwp *l, struct vnode *fromvp, const char *fromname,
 		    kauth_cred_geteuid(l->l_cred), l->l_proc->p_pid);
 	}
 
+	KERNEL_UNLOCK_ONE(NULL);
 	return (0);
 }
 
@@ -1214,6 +1240,7 @@ veriexec_file_add(struct lwp *l, prop_dictionary_t dict)
 	}
 
 	veriexec_file_report(NULL, "New entry.", file, NULL, REPORT_DEBUG);
+	veriexec_bypass = 0;
 
  out:
 	vrele(nid.ni_vp);
@@ -1289,9 +1316,11 @@ veriexec_unmountchk(struct mount *mp)
 {
 	int error;
 
-	if (doing_shutdown)
+	if (veriexec_bypass || doing_shutdown)
 		return (0);
 
+	KERNEL_LOCK(1, NULL);
+
 	switch (veriexec_strict) {
 	case VERIEXEC_LEARNING:
 		error = 0;
@@ -1329,6 +1358,7 @@ veriexec_unmountchk(struct mount *mp)
 		break;
 	}
 
+	KERNEL_UNLOCK_ONE(NULL);
 	return (error);
 }
 
@@ -1338,6 +1368,11 @@ veriexec_openchk(struct lwp *l, struct vnode *vp, const char *path, int fmode)
 	struct veriexec_file_entry *vfe = NULL;
 	int error = 0;
 
+	if (veriexec_bypass)
+		return 0;
+
+	KERNEL_LOCK(1, NULL);
+
 	if (vp == NULL) {
 		/* If no creation requested, let this fail normally. */
 		if (!(fmode & O_CREAT))
@@ -1369,6 +1404,7 @@ veriexec_openchk(struct lwp *l, struct vnode *vp, const char *path, int fmode)
 	}
 
  out:
+	KERNEL_UNLOCK_ONE(NULL);
 	return (error);
 }
 
diff --git a/sys/kern/subr_disk.c b/sys/kern/subr_disk.c
index de35711df912..1b37050adc19 100644
--- a/sys/kern/subr_disk.c
+++ b/sys/kern/subr_disk.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: subr_disk.c,v 1.89 2007/10/08 16:41:15 ad Exp $	*/
+/*	$NetBSD: subr_disk.c,v 1.90 2008/01/02 11:48:52 ad Exp $	*/
 
 /*-
  * Copyright (c) 1996, 1997, 1999, 2000 The NetBSD Foundation, Inc.
@@ -74,7 +74,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: subr_disk.c,v 1.89 2007/10/08 16:41:15 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: subr_disk.c,v 1.90 2008/01/02 11:48:52 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -391,7 +391,8 @@ disk_read_sectors(void (*strat)(struct buf *), const struct disklabel *lp,
 {
 	bp->b_blkno = sector;
 	bp->b_bcount = count * lp->d_secsize;
-	bp->b_flags = (bp->b_flags & ~(B_WRITE | B_DONE)) | B_READ;
+	bp->b_flags = (bp->b_flags & ~B_WRITE) | B_READ;
+	bp->b_oflags &= ~BO_DONE;
 	bp->b_cylinder = sector / lp->d_secpercyl;
 	(*strat)(bp);
 	return biowait(bp);
diff --git a/sys/kern/subr_disk_mbr.c b/sys/kern/subr_disk_mbr.c
index de8767e580e7..4118bf71aa87 100644
--- a/sys/kern/subr_disk_mbr.c
+++ b/sys/kern/subr_disk_mbr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: subr_disk_mbr.c,v 1.30 2007/10/08 18:04:05 ad Exp $	*/
+/*	$NetBSD: subr_disk_mbr.c,v 1.31 2008/01/02 11:48:53 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
@@ -54,7 +54,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: subr_disk_mbr.c,v 1.30 2007/10/08 18:04:05 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: subr_disk_mbr.c,v 1.31 2008/01/02 11:48:53 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -463,7 +463,8 @@ validate_label(mbr_args_t *a, uint label_sector)
 	case UPDATE_LABEL:
 	case WRITE_LABEL:
 		*dlp = *a->lp;
-		a->bp->b_flags &= ~(B_READ|B_DONE);
+		a->bp->b_oflags &= ~BO_DONE;
+		a->bp->b_flags &= ~B_READ;
 		a->bp->b_flags |= B_WRITE;
 		(*a->strat)(a->bp);
 		error = biowait(a->bp);
diff --git a/sys/kern/subr_log.c b/sys/kern/subr_log.c
index 8a324e902afc..38bbcdb020e8 100644
--- a/sys/kern/subr_log.c
+++ b/sys/kern/subr_log.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: subr_log.c,v 1.45 2007/12/25 00:00:00 ad Exp $	*/
+/*	$NetBSD: subr_log.c,v 1.46 2008/01/02 11:48:53 ad Exp $	*/
 
 /*-
  * Copyright (c) 2007 The NetBSD Foundation, Inc.
@@ -72,7 +72,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: subr_log.c,v 1.45 2007/12/25 00:00:00 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: subr_log.c,v 1.46 2008/01/02 11:48:53 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -140,7 +140,7 @@ loginit(void)
 	mutex_init(&log_lock, MUTEX_DEFAULT, IPL_VM);
 	selinit(&log_selp);
 	cv_init(&log_cv, "klog");
-	log_sih = softint_establish(SOFTINT_CLOCK,
+	log_sih = softint_establish(SOFTINT_CLOCK | SOFTINT_MPSAFE,
 	    logsoftintr, NULL);
 }
 
diff --git a/sys/kern/subr_pool.c b/sys/kern/subr_pool.c
index 7f999e61947f..2a3c02fda0b4 100644
--- a/sys/kern/subr_pool.c
+++ b/sys/kern/subr_pool.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: subr_pool.c,v 1.145 2007/12/26 16:01:36 ad Exp $	*/
+/*	$NetBSD: subr_pool.c,v 1.146 2008/01/02 11:48:53 ad Exp $	*/
 
 /*-
  * Copyright (c) 1997, 1999, 2000, 2002, 2007 The NetBSD Foundation, Inc.
@@ -38,7 +38,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.145 2007/12/26 16:01:36 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.146 2008/01/02 11:48:53 ad Exp $");
 
 #include "opt_ddb.h"
 #include "opt_pool.h"
@@ -2606,17 +2606,21 @@ pool_cache_put_slow(pool_cache_cpu_t *cc, int *s, void *object, paddr_t pa)
 		/*
 		 * If there's a empty group, release our full
 		 * group back to the cache.  Install the empty
-		 * group as cc_current and return.
+		 * group and return.
 		 */
-		if ((cur = cc->cc_current) != NULL) {
-			KASSERT(cur->pcg_avail == pcg->pcg_size);
-			cur->pcg_next = pc->pc_fullgroups;
-			pc->pc_fullgroups = cur;
-			pc->pc_nfull++;
-		}
 		KASSERT(pcg->pcg_avail == 0);
-		cc->cc_current = pcg;
 		pc->pc_emptygroups = pcg->pcg_next;
+		if (cc->cc_previous == NULL) {
+			cc->cc_previous = pcg;
+		} else {
+			if ((cur = cc->cc_current) != NULL) {
+				KASSERT(cur->pcg_avail == pcg->pcg_size);
+				cur->pcg_next = pc->pc_fullgroups;
+				pc->pc_fullgroups = cur;
+				pc->pc_nfull++;
+			}
+			cc->cc_current = pcg;
+		}
 		pc->pc_hits++;
 		pc->pc_nempty--;
 		mutex_exit(&pc->pc_lock);
@@ -2637,7 +2641,9 @@ pool_cache_put_slow(pool_cache_cpu_t *cc, int *s, void *object, paddr_t pa)
 	 * object away.
 	 */
 	nobj = pc->pc_pcgsize;
-	if (nobj == PCG_NOBJECTS_LARGE) {
+	if (pool_cache_disable) {
+		pcg = NULL;
+	} else if (nobj == PCG_NOBJECTS_LARGE) {
 		pcg = pool_get(&pcg_large_pool, PR_NOWAIT);
 	} else {
 		pcg = pool_get(&pcg_normal_pool, PR_NOWAIT);
diff --git a/sys/kern/sys_lwp.c b/sys/kern/sys_lwp.c
index 194af997dafe..d2ff88bb86be 100644
--- a/sys/kern/sys_lwp.c
+++ b/sys/kern/sys_lwp.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: sys_lwp.c,v 1.32 2007/12/20 23:03:10 dsl Exp $	*/
+/*	$NetBSD: sys_lwp.c,v 1.33 2008/01/02 11:48:53 ad Exp $	*/
 
 /*-
  * Copyright (c) 2001, 2006, 2007 The NetBSD Foundation, Inc.
@@ -42,7 +42,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.32 2007/12/20 23:03:10 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.33 2008/01/02 11:48:53 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -657,18 +657,14 @@ sys__lwp_unpark_all(struct lwp *l, const struct sys__lwp_unpark_all_args *uap, r
 	if (sz <= sizeof(targets))
 		tp = targets;
 	else {
-		KERNEL_LOCK(1, l);		/* XXXSMP */
 		tp = kmem_alloc(sz, KM_SLEEP);
-		KERNEL_UNLOCK_ONE(l);		/* XXXSMP */
 		if (tp == NULL)
 			return ENOMEM;
 	}
 	error = copyin(SCARG(uap, targets), tp, sz);
 	if (error != 0) {
 		if (tp != targets) {
-			KERNEL_LOCK(1, l);	/* XXXSMP */
 			kmem_free(tp, sz);
-			KERNEL_UNLOCK_ONE(l);	/* XXXSMP */
 		}
 		return error;
 	}
@@ -727,11 +723,8 @@ sys__lwp_unpark_all(struct lwp *l, const struct sys__lwp_unpark_all_args *uap, r
 	}
 
 	sleepq_unlock(sq);
-	if (tp != targets) {
-		KERNEL_LOCK(1, l);		/* XXXSMP */
+	if (tp != targets)
 		kmem_free(tp, sz);
-		KERNEL_UNLOCK_ONE(l);		/* XXXSMP */
-	}
 	if (swapin)
 		uvm_kick_scheduler();
 
diff --git a/sys/kern/sys_sig.c b/sys/kern/sys_sig.c
index 4c1ebbfce22a..8a8b7b797a86 100644
--- a/sys/kern/sys_sig.c
+++ b/sys/kern/sys_sig.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: sys_sig.c,v 1.9 2007/12/20 23:03:12 dsl Exp $	*/
+/*	$NetBSD: sys_sig.c,v 1.10 2008/01/02 11:48:53 ad Exp $	*/
 
 /*-
  * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
@@ -73,7 +73,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sys_sig.c,v 1.9 2007/12/20 23:03:12 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: sys_sig.c,v 1.10 2008/01/02 11:48:53 ad Exp $");
 
 #include "opt_ptrace.h"
 #include "opt_compat_netbsd.h"
@@ -670,9 +670,7 @@ __sigtimedwait1(struct lwp *l, const struct sys___sigtimedwait_args *uap, regist
 	/*
 	 * Allocate a ksi up front.  We can't sleep with the mutex held.
 	 */
-	KERNEL_LOCK(1, l);	/* XXXSMP ksiginfo_alloc() -> pool_get()  */
 	ksi = ksiginfo_alloc(p, NULL, PR_WAITOK);
-	KERNEL_UNLOCK_ONE(l);	/* XXXSMP */
 	if (ksi == NULL)
 		return (ENOMEM);
 
@@ -750,9 +748,7 @@ __sigtimedwait1(struct lwp *l, const struct sys___sigtimedwait_args *uap, regist
 		error = (*put_info)(&ksi->ksi_info, SCARG(uap, info),
 		    sizeof(ksi->ksi_info));
 
-	KERNEL_LOCK(1, l);	/* XXXSMP ksiginfo_free() -> pool_put()  */	
 	ksiginfo_free(ksi);
-	KERNEL_UNLOCK_ONE(l);	/* XXXSMP */
 
 	return error;
 }
diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c
index 7d00b6db8283..0195e2fcb377 100644
--- a/sys/kern/syscalls.c
+++ b/sys/kern/syscalls.c
@@ -1,4 +1,4 @@
-/* $NetBSD: syscalls.c,v 1.195 2007/12/20 23:07:25 dsl Exp $ */
+/* $NetBSD: syscalls.c,v 1.196 2008/01/02 11:48:54 ad Exp $ */
 
 /*
  * System call names.
@@ -8,7 +8,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: syscalls.c,v 1.195 2007/12/20 23:07:25 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: syscalls.c,v 1.196 2008/01/02 11:48:54 ad Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_nfsserver.h"
diff --git a/sys/kern/sysv_sem.c b/sys/kern/sysv_sem.c
index fc6fd9a4191b..f5eaeccbadd3 100644
--- a/sys/kern/sysv_sem.c
+++ b/sys/kern/sysv_sem.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: sysv_sem.c,v 1.78 2007/12/20 23:03:12 dsl Exp $	*/
+/*	$NetBSD: sysv_sem.c,v 1.79 2008/01/02 11:48:54 ad Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc.
@@ -46,7 +46,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sysv_sem.c,v 1.78 2007/12/20 23:03:12 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: sysv_sem.c,v 1.79 2008/01/02 11:48:54 ad Exp $");
 
 #define SYSVSEM
 
@@ -764,9 +764,7 @@ restart:
 	if (nsops <= SMALL_SOPS) {
 		sops = small_sops;
 	} else if (nsops <= seminfo.semopm) {
-		KERNEL_LOCK(1, l);		/* XXXSMP */
 		sops = kmem_alloc(nsops * sizeof(*sops), KM_SLEEP);
-		KERNEL_UNLOCK_ONE(l);		/* XXXSMP */
 	} else {
 		SEM_PRINTF(("too many sops (max=%d, nsops=%zd)\n",
 		    seminfo.semopm, nsops));
@@ -777,11 +775,8 @@ restart:
 	if (error) {
 		SEM_PRINTF(("error = %d from copyin(%p, %p, %zd)\n", error,
 		    SCARG(uap, sops), &sops, nsops * sizeof(sops[0])));
-		if (sops != small_sops) {
-			KERNEL_LOCK(1, l);		/* XXXSMP */
+		if (sops != small_sops)
 			kmem_free(sops, nsops * sizeof(*sops));
-			KERNEL_UNLOCK_ONE(l);		/* XXXSMP */
-		}
 		return error;
 	}
 
@@ -1010,11 +1005,8 @@ done:
 
  out:
 	mutex_exit(&semlock);
-	if (sops != small_sops) {
-		KERNEL_LOCK(1, l);		/* XXXSMP */
+	if (sops != small_sops)
 		kmem_free(sops, nsops * sizeof(*sops));
-		KERNEL_UNLOCK_ONE(l);		/* XXXSMP */
-	}
 	return error;
 }
 
diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c
index 995b533c5c79..845bb52f2ffd 100644
--- a/sys/kern/sysv_shm.c
+++ b/sys/kern/sysv_shm.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: sysv_shm.c,v 1.101 2007/12/20 23:03:12 dsl Exp $	*/
+/*	$NetBSD: sysv_shm.c,v 1.102 2008/01/02 11:48:54 ad Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc.
@@ -6,7 +6,7 @@
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
- * NASA Ames Research Center.
+ * NASA Ames Research Center, and by Mindaugas Rasiukevicius.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -68,15 +68,15 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sysv_shm.c,v 1.101 2007/12/20 23:03:12 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: sysv_shm.c,v 1.102 2008/01/02 11:48:54 ad Exp $");
 
 #define SYSVSHM
 
 #include <sys/param.h>
 #include <sys/kernel.h>
+#include <sys/kmem.h>
 #include <sys/shm.h>
 #include <sys/mutex.h>
-#include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
@@ -89,20 +89,6 @@ __KERNEL_RCSID(0, "$NetBSD: sysv_shm.c,v 1.101 2007/12/20 23:03:12 dsl Exp $");
 #include <uvm/uvm_extern.h>
 #include <uvm/uvm_object.h>
 
-static MALLOC_DEFINE(M_SHM, "shm", "SVID compatible shared memory segments");
-
-/*
- * Provides the following externally accessible functions:
- *
- * shminit(void);		                 initialization
- * shmexit(struct vmspace *)                     cleanup
- * shmfork(struct vmspace *, struct vmspace *)   fork handling
- *
- * Structures:
- * shmsegs (an array of 'struct shmid_ds')
- * per proc array of 'struct shmmap_state'
- */
-
 int shm_nused;
 struct	shmid_ds *shmsegs;
 
@@ -112,11 +98,14 @@ struct shmmap_entry {
 	int shmid;
 };
 
-static kmutex_t	shm_lock;
-static int	shm_last_free, shm_committed, shm_use_phys;
+static kmutex_t		shm_lock;
+static kcondvar_t *	shm_cv;
+static struct pool	shmmap_entry_pool;
+static int		shm_last_free, shm_committed, shm_use_phys;
 
-static POOL_INIT(shmmap_entry_pool, sizeof(struct shmmap_entry), 0, 0, 0,
-    "shmmp", &pool_allocator_nointr, IPL_NONE);
+static kcondvar_t	shm_realloc_cv;
+static bool		shm_realloc_state;
+static u_int		shm_realloc_disable;
 
 struct shmmap_state {
 	unsigned int nitems;
@@ -124,102 +113,101 @@ struct shmmap_state {
 	SLIST_HEAD(, shmmap_entry) entries;
 };
 
-static int shm_find_segment_by_key(key_t);
-static void shm_deallocate_segment(struct shmid_ds *);
-static void shm_delete_mapping(struct vmspace *, struct shmmap_state *,
-			       struct shmmap_entry *);
-static int shmget_existing(struct lwp *, const struct sys_shmget_args *,
-			   int, int, register_t *);
-static int shmget_allocate_segment(struct lwp *, const struct sys_shmget_args *,
-				   int, register_t *);
-static struct shmmap_state *shmmap_getprivate(struct proc *);
-static struct shmmap_entry *shm_find_mapping(struct shmmap_state *, vaddr_t);
+#ifdef SHMDEBUG
+#define SHMPRINTF(a) printf a
+#else
+#define SHMPRINTF(a)
+#endif
+
 static int shmrealloc(int);
 
-static int
-shm_find_segment_by_key(key_t key)
-{
-	int i;
-
-	for (i = 0; i < shminfo.shmmni; i++)
-		if ((shmsegs[i].shm_perm.mode & SHMSEG_ALLOCATED) &&
-		    shmsegs[i].shm_perm._key == key)
-			return i;
-	return -1;
-}
-
+/*
+ * Find the shared memory segment by the identifier.
+ *  => must be called with shm_lock held;
+ */
 static struct shmid_ds *
 shm_find_segment_by_shmid(int shmid)
 {
 	int segnum;
 	struct shmid_ds *shmseg;
 
+	KASSERT(mutex_owned(&shm_lock));
+
 	segnum = IPCID_TO_IX(shmid);
 	if (segnum < 0 || segnum >= shminfo.shmmni)
 		return NULL;
 	shmseg = &shmsegs[segnum];
 	if ((shmseg->shm_perm.mode & SHMSEG_ALLOCATED) == 0)
 		return NULL;
-	if ((shmseg->shm_perm.mode & (SHMSEG_REMOVED|SHMSEG_RMLINGER)) == SHMSEG_REMOVED)
+	if ((shmseg->shm_perm.mode &
+	    (SHMSEG_REMOVED|SHMSEG_RMLINGER)) == SHMSEG_REMOVED)
 		return NULL;
 	if (shmseg->shm_perm._seq != IPCID_TO_SEQ(shmid))
 		return NULL;
+
 	return shmseg;
 }
 
+/*
+ * Free memory segment.
+ *  => must be called with shm_lock held;
+ */
 static void
-shm_deallocate_segment(struct shmid_ds *shmseg)
-{
-	struct uvm_object *uobj = shmseg->_shm_internal;
-	size_t size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
-
-#ifdef SHMDEBUG
-	printf("shm freeing key 0x%lx seq 0x%x\n",
-	       shmseg->shm_perm._key, shmseg->shm_perm._seq);
-#endif
-
-	(*uobj->pgops->pgo_detach)(uobj);
-	shmseg->_shm_internal = NULL;
-	shm_committed -= btoc(size);
-	shmseg->shm_perm.mode = SHMSEG_FREE;
-	shm_nused--;
-}
-
-static void
-shm_delete_mapping(struct vmspace *vm, struct shmmap_state *shmmap_s,
-    struct shmmap_entry *shmmap_se)
+shm_free_segment(int segnum)
 {
 	struct shmid_ds *shmseg;
-	int segnum;
 	size_t size;
+	bool wanted;
+
+	KASSERT(mutex_owned(&shm_lock));
 
-	segnum = IPCID_TO_IX(shmmap_se->shmid);
-#ifdef DEBUG
-	if (segnum < 0 || segnum >= shminfo.shmmni)
-		panic("shm_delete_mapping: vmspace %p state %p entry %p - "
-		    "entry segment ID bad (%d)",
-		    vm, shmmap_s, shmmap_se, segnum);
-#endif
 	shmseg = &shmsegs[segnum];
+	SHMPRINTF(("shm freeing key 0x%lx seq 0x%x\n",
+	    shmseg->shm_perm._key, shmseg->shm_perm._seq));
+
 	size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
-	uvm_deallocate(&vm->vm_map, shmmap_se->va, size);
-	SLIST_REMOVE(&shmmap_s->entries, shmmap_se, shmmap_entry, next);
-	shmmap_s->nitems--;
-	pool_put(&shmmap_entry_pool, shmmap_se);
-	shmseg->shm_dtime = time_second;
-	if ((--shmseg->shm_nattch <= 0) &&
-	    (shmseg->shm_perm.mode & SHMSEG_REMOVED)) {
-		shm_deallocate_segment(shmseg);
-		shm_last_free = segnum;
-	}
+	wanted = (shmseg->shm_perm.mode & SHMSEG_WANTED);
+
+	shmseg->_shm_internal = NULL;
+	shm_committed -= btoc(size);
+	shm_nused--;
+	shmseg->shm_perm.mode = SHMSEG_FREE;
+	shm_last_free = segnum;
+	if (wanted == true)
+		cv_broadcast(&shm_cv[segnum]);
 }
 
 /*
- * Get a non-shared shm map for that vmspace.
- * 3 cases:
- *   - no shm map present: create a fresh one
- *   - a shm map with refcount=1, just used by ourselves: fine
- *   - a shared shm map: copy to a fresh one and adjust refcounts
+ * Delete entry from the shm map.
+ *  => must be called with shm_lock held;
+ */
+static struct uvm_object *
+shm_delete_mapping(struct shmmap_state *shmmap_s,
+    struct shmmap_entry *shmmap_se)
+{
+	struct uvm_object *uobj = NULL;
+	struct shmid_ds *shmseg;
+	int segnum;
+
+	KASSERT(mutex_owned(&shm_lock));
+
+	segnum = IPCID_TO_IX(shmmap_se->shmid);
+	shmseg = &shmsegs[segnum];
+	SLIST_REMOVE(&shmmap_s->entries, shmmap_se, shmmap_entry, next);
+	shmmap_s->nitems--;
+	shmseg->shm_dtime = time_second;
+	if ((--shmseg->shm_nattch <= 0) &&
+	    (shmseg->shm_perm.mode & SHMSEG_REMOVED)) {
+		uobj = shmseg->_shm_internal;
+		shm_free_segment(segnum);
+	}
+
+	return uobj;
+}
+
+/*
+ * Get a non-shared shm map for that vmspace.  Note, that memory
+ * allocation might be performed with lock held.
  */
 static struct shmmap_state *
 shmmap_getprivate(struct proc *p)
@@ -227,23 +215,26 @@ shmmap_getprivate(struct proc *p)
 	struct shmmap_state *oshmmap_s, *shmmap_s;
 	struct shmmap_entry *oshmmap_se, *shmmap_se;
 
+	KASSERT(mutex_owned(&shm_lock));
+
+	/* 1. A shm map with refcnt = 1, used by ourselves, thus return */
 	oshmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm;
 	if (oshmmap_s && oshmmap_s->nrefs == 1)
-		return (oshmmap_s);
+		return oshmmap_s;
 
-	shmmap_s = malloc(sizeof(struct shmmap_state), M_SHM, M_WAITOK);
-	memset(shmmap_s, 0, sizeof(struct shmmap_state));
+	/* 2. No shm map preset - create a fresh one */
+	shmmap_s = kmem_zalloc(sizeof(struct shmmap_state), KM_SLEEP);
 	shmmap_s->nrefs = 1;
 	SLIST_INIT(&shmmap_s->entries);
 	p->p_vmspace->vm_shm = (void *)shmmap_s;
 
-	if (!oshmmap_s)
-		return (shmmap_s);
+	if (oshmmap_s == NULL)
+		return shmmap_s;
 
-#ifdef SHMDEBUG
-	printf("shmmap_getprivate: vm %p split (%d entries), was used by %d\n",
-	       p->p_vmspace, oshmmap_s->nitems, oshmmap_s->nrefs);
-#endif
+	SHMPRINTF(("shmmap_getprivate: vm %p split (%d entries), was used by %d\n",
+	    p->p_vmspace, oshmmap_s->nitems, oshmmap_s->nrefs));
+
+	/* 3. A shared shm map, copy to a fresh one and adjust refcounts */
 	SLIST_FOREACH(oshmmap_se, &oshmmap_s->entries, next) {
 		shmmap_se = pool_get(&shmmap_entry_pool, PR_WAITOK);
 		shmmap_se->va = oshmmap_se->va;
@@ -252,21 +243,71 @@ shmmap_getprivate(struct proc *p)
 	}
 	shmmap_s->nitems = oshmmap_s->nitems;
 	oshmmap_s->nrefs--;
-	return (shmmap_s);
+
+	return shmmap_s;
 }
 
-static struct shmmap_entry *
-shm_find_mapping(struct shmmap_state *map, vaddr_t va)
+/*
+ * Lock/unlock the memory.
+ *  => must be called with shm_lock held;
+ *  => called from one place, thus, inline;
+ */
+static inline int
+shm_memlock(struct lwp *l, struct shmid_ds *shmseg, int shmid, int cmd)
 {
+	struct proc *p = l->l_proc;
 	struct shmmap_entry *shmmap_se;
+	struct shmmap_state *shmmap_s;
+	size_t size;
+	int error;
 
-	SLIST_FOREACH(shmmap_se, &map->entries, next) {
-		if (shmmap_se->va == va)
-			return shmmap_se;
+	KASSERT(mutex_owned(&shm_lock));
+	shmmap_s = shmmap_getprivate(p);
+
+	/* Find our shared memory address by shmid */
+	SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) {
+		if (shmmap_se->shmid != shmid)
+			continue;
+
+		size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
+
+		if (cmd == SHM_LOCK &&
+		    (shmseg->shm_perm.mode & SHMSEG_WIRED) == 0) {
+			/* Wire the object and map, then tag it */
+			error = uobj_wirepages(shmseg->_shm_internal, 0,
+			    round_page(shmseg->shm_segsz));
+			if (error)
+				return EIO;
+			error = uvm_map_pageable(&p->p_vmspace->vm_map,
+			    shmmap_se->va, shmmap_se->va + size, false, 0);
+			if (error) {
+				uobj_unwirepages(shmseg->_shm_internal, 0,
+				    round_page(shmseg->shm_segsz));
+				if (error == EFAULT)
+					error = ENOMEM;
+				return error;
+			}
+			shmseg->shm_perm.mode |= SHMSEG_WIRED;
+
+		} else if (cmd == SHM_UNLOCK &&
+		    (shmseg->shm_perm.mode & SHMSEG_WIRED) != 0) {
+			/* Unwire the object and map, then untag it */
+			uobj_unwirepages(shmseg->_shm_internal, 0,
+			    round_page(shmseg->shm_segsz));
+			error = uvm_map_pageable(&p->p_vmspace->vm_map,
+			    shmmap_se->va, shmmap_se->va + size, true, 0);
+			if (error)
+				return EIO;
+			shmseg->shm_perm.mode &= ~SHMSEG_WIRED;
+		}
 	}
+
 	return 0;
 }
 
+/*
+ * Unmap shared memory.
+ */
 int
 sys_shmdt(struct lwp *l, const struct sys_shmdt_args *uap, register_t *retval)
 {
@@ -274,32 +315,64 @@ sys_shmdt(struct lwp *l, const struct sys_shmdt_args *uap, register_t *retval)
 		syscallarg(const void *) shmaddr;
 	} */
 	struct proc *p = l->l_proc;
-	struct shmmap_state *shmmap_s, *shmmap_s1;
+	struct shmmap_state *shmmap_s1, *shmmap_s;
 	struct shmmap_entry *shmmap_se;
+	struct uvm_object *uobj;
+	struct shmid_ds *shmseg;
+	size_t size;
 
-	shmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm;
-	if (shmmap_s == NULL)
+	mutex_enter(&shm_lock);
+	/* In case of reallocation, we will wait for completion */
+	while (__predict_false(shm_realloc_state))
+		cv_wait(&shm_realloc_cv, &shm_lock);
+
+	shmmap_s1 = (struct shmmap_state *)p->p_vmspace->vm_shm;
+	if (shmmap_s1 == NULL) {
+		mutex_exit(&shm_lock);
 		return EINVAL;
-
-	shmmap_se = shm_find_mapping(shmmap_s, (vaddr_t)SCARG(uap, shmaddr));
-	if (!shmmap_se)
-		return EINVAL;
-
-	shmmap_s1 = shmmap_getprivate(p);
-	if (shmmap_s1 != shmmap_s) {
-		/* map has been copied, lookup entry in new map */
-		shmmap_se = shm_find_mapping(shmmap_s1,
-					     (vaddr_t)SCARG(uap, shmaddr));
-		KASSERT(shmmap_se != NULL);
 	}
-#ifdef SHMDEBUG
-	printf("shmdt: vm %p: remove %d @%lx\n",
-	       p->p_vmspace, shmmap_se->shmid, shmmap_se->va);
-#endif
-	shm_delete_mapping(p->p_vmspace, shmmap_s1, shmmap_se);
+
+	/* Find the map entry */
+	SLIST_FOREACH(shmmap_se, &shmmap_s1->entries, next)
+		if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr))
+			break;
+	if (shmmap_se == NULL) {
+		mutex_exit(&shm_lock);
+		return EINVAL;
+	}
+
+	shmmap_s = shmmap_getprivate(p);
+	if (shmmap_s != shmmap_s1) {
+		/* Map has been copied, lookup entry in new map */
+		SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next)
+			if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr))
+				break;
+		if (shmmap_se == NULL) {
+			mutex_exit(&shm_lock);
+			return EINVAL;
+		}
+	}
+
+	SHMPRINTF(("shmdt: vm %p: remove %d @%lx\n",
+	    p->p_vmspace, shmmap_se->shmid, shmmap_se->va));
+
+	/* Delete the entry from shm map */
+	uobj = shm_delete_mapping(shmmap_s, shmmap_se);
+	shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)];
+	size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
+	mutex_exit(&shm_lock);
+
+	uvm_deallocate(&p->p_vmspace->vm_map, shmmap_se->va, size);
+	if (uobj != NULL)
+		uao_detach(uobj);
+	pool_put(&shmmap_entry_pool, shmmap_se);
+
 	return 0;
 }
 
+/*
+ * Map shared memory.
+ */
 int
 sys_shmat(struct lwp *l, const struct sys_shmat_args *uap, register_t *retval)
 {
@@ -313,23 +386,37 @@ sys_shmat(struct lwp *l, const struct sys_shmat_args *uap, register_t *retval)
 	kauth_cred_t cred = l->l_cred;
 	struct shmid_ds *shmseg;
 	struct shmmap_state *shmmap_s;
+	struct shmmap_entry *shmmap_se;
 	struct uvm_object *uobj;
+	struct vmspace *vm;
 	vaddr_t attach_va;
 	vm_prot_t prot;
 	vsize_t size;
-	struct shmmap_entry *shmmap_se;
+
+	/* Allocate a new map entry and set it */
+	shmmap_se = pool_get(&shmmap_entry_pool, PR_WAITOK);
+
+	mutex_enter(&shm_lock);
+	/* In case of reallocation, we will wait for completion */
+	while (__predict_false(shm_realloc_state))
+		cv_wait(&shm_realloc_cv, &shm_lock);
 
 	shmseg = shm_find_segment_by_shmid(SCARG(uap, shmid));
-	if (shmseg == NULL)
-		return EINVAL;
+	if (shmseg == NULL) {
+		error = EINVAL;
+		goto err;
+	}
 	error = ipcperm(cred, &shmseg->shm_perm,
-		    (SCARG(uap, shmflg) & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
+	    (SCARG(uap, shmflg) & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
 	if (error)
-		return error;
+		goto err;
 
-	shmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm;
-	if (shmmap_s && shmmap_s->nitems >= shminfo.shmseg)
-		return EMFILE;
+	vm = p->p_vmspace;
+	shmmap_s = (struct shmmap_state *)vm->vm_shm;
+	if (shmmap_s && shmmap_s->nitems >= shminfo.shmseg) {
+		error = EMFILE;
+		goto err;
+	}
 
 	size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
 	prot = VM_PROT_READ;
@@ -342,52 +429,81 @@ sys_shmat(struct lwp *l, const struct sys_shmat_args *uap, register_t *retval)
 			    (vaddr_t)SCARG(uap, shmaddr) & ~(SHMLBA-1);
 		else if (((vaddr_t)SCARG(uap, shmaddr) & (SHMLBA-1)) == 0)
 			attach_va = (vaddr_t)SCARG(uap, shmaddr);
-		else
-			return EINVAL;
+		else {
+			error = EINVAL;
+			goto err;
+		}
 	} else {
 		/* This is just a hint to uvm_mmap() about where to put it. */
 		attach_va = p->p_emul->e_vm_default_addr(p,
-		    (vaddr_t)p->p_vmspace->vm_daddr, size);
+		    (vaddr_t)vm->vm_daddr, size);
 	}
+
+	/*
+	 * Create a map entry, add it to the list and increase the counters.
+	 * The lock will be dropped before the mapping, disable reallocation.
+	 */
+	shmmap_s = shmmap_getprivate(p);
+	SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next);
+	shmmap_s->nitems++;
+	shmseg->shm_lpid = p->p_pid;
+	shmseg->shm_nattch++;
+	shm_realloc_disable++;
+	mutex_exit(&shm_lock);
+
+	/*
+	 * Add a reference to the memory object, map it to the
+	 * address space, and lock the memory, if needed.
+	 */
 	uobj = shmseg->_shm_internal;
-	(*uobj->pgops->pgo_reference)(uobj);
-	error = uvm_map(&p->p_vmspace->vm_map, &attach_va, size,
-	    uobj, 0, 0,
+	uao_reference(uobj);
+	error = uvm_map(&vm->vm_map, &attach_va, size, uobj, 0, 0,
 	    UVM_MAPFLAG(prot, prot, UVM_INH_SHARE, UVM_ADV_RANDOM, flags));
 	if (error)
-		goto out;
-	/* Lock the memory */
+		goto err_detach;
 	if (shm_use_phys || (shmseg->shm_perm.mode & SHMSEG_WIRED)) {
-		/* Wire the map */
-		error = uvm_map_pageable(&p->p_vmspace->vm_map, attach_va,
+		error = uvm_map_pageable(&vm->vm_map, attach_va,
 		    attach_va + size, false, 0);
 		if (error) {
 			if (error == EFAULT)
 				error = ENOMEM;
-			goto out;
+			uvm_deallocate(&vm->vm_map, attach_va, size);
+			goto err_detach;
 		}
 	}
 
-	shmmap_se = pool_get(&shmmap_entry_pool, PR_WAITOK);
+	/* Set the new address, and update the time */
+	mutex_enter(&shm_lock);
 	shmmap_se->va = attach_va;
 	shmmap_se->shmid = SCARG(uap, shmid);
-	shmmap_s = shmmap_getprivate(p);
-#ifdef SHMDEBUG
-	printf("shmat: vm %p: add %d @%lx\n", p->p_vmspace, shmmap_se->shmid, attach_va);
-#endif
-	SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next);
-	shmmap_s->nitems++;
-	shmseg->shm_lpid = p->p_pid;
 	shmseg->shm_atime = time_second;
-	shmseg->shm_nattch++;
-
+	shm_realloc_disable--;
 	retval[0] = attach_va;
-	return 0;
-out:
-	(*uobj->pgops->pgo_detach)(uobj);
+	SHMPRINTF(("shmat: vm %p: add %d @%lx\n",
+	    p->p_vmspace, shmmap_se->shmid, attach_va));
+err:
+	cv_broadcast(&shm_realloc_cv);
+	mutex_exit(&shm_lock);
+	if (error && shmmap_se)
+		pool_put(&shmmap_entry_pool, shmmap_se);
+	return error;
+
+err_detach:
+	uao_detach(uobj);
+	mutex_enter(&shm_lock);
+	uobj = shm_delete_mapping(shmmap_s, shmmap_se);
+	shm_realloc_disable--;
+	cv_broadcast(&shm_realloc_cv);
+	mutex_exit(&shm_lock);
+	if (uobj != NULL)
+		uao_detach(uobj);
+	pool_put(&shmmap_entry_pool, shmmap_se);
 	return error;
 }
 
+/*
+ * Shared memory control operations.
+ */
 int
 sys___shmctl13(struct lwp *l, const struct sys___shmctl13_args *uap, register_t *retval)
 {
@@ -400,11 +516,10 @@ sys___shmctl13(struct lwp *l, const struct sys___shmctl13_args *uap, register_t
 	int cmd, error;
 
 	cmd = SCARG(uap, cmd);
-
 	if (cmd == IPC_SET) {
 		error = copyin(SCARG(uap, buf), &shmbuf, sizeof(shmbuf));
 		if (error)
-			return (error);
+			return error;
 	}
 
 	error = shmctl1(l, SCARG(uap, shmid), cmd,
@@ -413,33 +528,37 @@ sys___shmctl13(struct lwp *l, const struct sys___shmctl13_args *uap, register_t
 	if (error == 0 && cmd == IPC_STAT)
 		error = copyout(&shmbuf, SCARG(uap, buf), sizeof(shmbuf));
 
-	return (error);
+	return error;
 }
 
 int
 shmctl1(struct lwp *l, int shmid, int cmd, struct shmid_ds *shmbuf)
 {
+	struct uvm_object *uobj = NULL;
 	kauth_cred_t cred = l->l_cred;
- 	struct proc *p = l->l_proc;
 	struct shmid_ds *shmseg;
-	struct shmmap_entry *shmmap_se;
-	struct shmmap_state *shmmap_s;
 	int error = 0;
-	size_t size;
+
+	mutex_enter(&shm_lock);
+	/* In case of reallocation, we will wait for completion */
+	while (__predict_false(shm_realloc_state))
+		cv_wait(&shm_realloc_cv, &shm_lock);
 
 	shmseg = shm_find_segment_by_shmid(shmid);
-	if (shmseg == NULL)
+	if (shmseg == NULL) {
+		mutex_exit(&shm_lock);
 		return EINVAL;
+	}
 
 	switch (cmd) {
 	case IPC_STAT:
 		if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_R)) != 0)
-			return error;
+			break;
 		memcpy(shmbuf, shmseg, sizeof(struct shmid_ds));
 		break;
 	case IPC_SET:
 		if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0)
-			return error;
+			break;
 		shmseg->shm_perm.uid = shmbuf->shm_perm.uid;
 		shmseg->shm_perm.gid = shmbuf->shm_perm.gid;
 		shmseg->shm_perm.mode =
@@ -449,82 +568,55 @@ shmctl1(struct lwp *l, int shmid, int cmd, struct shmid_ds *shmbuf)
 		break;
 	case IPC_RMID:
 		if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0)
-			return error;
+			break;
 		shmseg->shm_perm._key = IPC_PRIVATE;
 		shmseg->shm_perm.mode |= SHMSEG_REMOVED;
 		if (shmseg->shm_nattch <= 0) {
-			shm_deallocate_segment(shmseg);
-			shm_last_free = IPCID_TO_IX(shmid);
+			uobj = shmseg->_shm_internal;
+			shm_free_segment(IPCID_TO_IX(shmid));
 		}
 		break;
 	case SHM_LOCK:
 	case SHM_UNLOCK:
 		if ((error = kauth_authorize_generic(cred,
 		    KAUTH_GENERIC_ISSUSER, NULL)) != 0)
-			return error;
-		shmmap_s = shmmap_getprivate(p);
-		/* Find our shared memory address by shmid */
-		SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) {
-			if (shmmap_se->shmid != shmid)
-				continue;
-
-			size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
-
-			if (cmd == SHM_LOCK &&
-			    !(shmseg->shm_perm.mode & SHMSEG_WIRED)) {
-				/* Wire the entire object */
-				error = uobj_wirepages(shmseg->_shm_internal, 0,
-					round_page(shmseg->shm_segsz));
-				if (error)
-					return EIO;
-				/* Wire the map */
-				error = uvm_map_pageable(&p->p_vmspace->vm_map,
-				    shmmap_se->va, shmmap_se->va + size, false,
-				    0);
-				if (error) {
-					uobj_unwirepages(shmseg->_shm_internal,
-					    0, round_page(shmseg->shm_segsz));
-					if (error == EFAULT)
-						error = ENOMEM;
-					return error;
-				}
-				/* Tag as wired */
-				shmseg->shm_perm.mode |= SHMSEG_WIRED;
-
-			} else if (cmd == SHM_UNLOCK &&
-			    (shmseg->shm_perm.mode & SHMSEG_WIRED)) {
-				/* Unwire the object */
-				uobj_unwirepages(shmseg->_shm_internal, 0,
-				    round_page(shmseg->shm_segsz));
-				error = uvm_map_pageable(&p->p_vmspace->vm_map,
-				    shmmap_se->va, shmmap_se->va + size, true,
-				    0);
-				if (error) {
-					/*
-					 * In fact, uvm_map_pageable could fail
-					 * only if arguments are invalid,
-					 * otherwise it should always return 0.
-					 */
-					return EIO;
-				}
-				/* Tag as unwired */
-				shmseg->shm_perm.mode &= ~SHMSEG_WIRED;
-			}
-		}
+			break;
+		error = shm_memlock(l, shmseg, shmid, cmd);
 		break;
 	default:
-		return EINVAL;
+		error = EINVAL;
 	}
-	return 0;
+
+	mutex_exit(&shm_lock);
+	if (uobj != NULL)
+		uao_detach(uobj);
+	return error;
 }
 
-static int
+/*
+ * Try to take an already existing segment.
+ *  => must be called with shm_lock held;
+ *  => called from one place, thus, inline;
+ */
+static inline int
 shmget_existing(struct lwp *l, const struct sys_shmget_args *uap, int mode,
-    int segnum, register_t *retval)
+    register_t *retval)
 {
 	struct shmid_ds *shmseg;
 	kauth_cred_t cred = l->l_cred;
-	int error;
+	int segnum, error;
+again:
+	KASSERT(mutex_owned(&shm_lock));
+
+	/* Find segment by key */
+	for (segnum = 0; segnum < shminfo.shmmni; segnum++)
+		if ((shmsegs[segnum].shm_perm.mode & SHMSEG_ALLOCATED) &&
+		    shmsegs[segnum].shm_perm._key == SCARG(uap, key))
+			break;
+	if (segnum == shminfo.shmmni) {
+		/* Not found */
+		return -1;
+	}
 
 	shmseg = &shmsegs[segnum];
 	if (shmseg->shm_perm.mode & SHMSEG_REMOVED) {
@@ -534,100 +626,26 @@ shmget_existing(struct lwp *l, const struct sys_shmget_args *uap, int mode,
 		 * allocation failed or it was freed).
 		 */
 		shmseg->shm_perm.mode |= SHMSEG_WANTED;
-		error = tsleep((void *)shmseg, PLOCK | PCATCH, "shmget", 0);
+		error = cv_wait_sig(&shm_cv[segnum], &shm_lock);
 		if (error)
 			return error;
-		return EAGAIN;
+		goto again;
 	}
-	if ((error = ipcperm(cred, &shmseg->shm_perm, mode)) != 0)
+
+	/* Check the permission, segment size and appropriate flag */
+	error = ipcperm(cred, &shmseg->shm_perm, mode);
+	if (error)
 		return error;
 	if (SCARG(uap, size) && SCARG(uap, size) > shmseg->shm_segsz)
 		return EINVAL;
 	if ((SCARG(uap, shmflg) & (IPC_CREAT | IPC_EXCL)) ==
 	    (IPC_CREAT | IPC_EXCL))
 		return EEXIST;
+
 	*retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
 	return 0;
 }
 
-static int
-shmget_allocate_segment(struct lwp *l, const struct sys_shmget_args *uap, int mode,
-    register_t *retval)
-{
-	int i, segnum, shmid, size;
-	kauth_cred_t cred = l->l_cred;
-	struct shmid_ds *shmseg;
-	int error = 0;
-
-	if (SCARG(uap, size) < shminfo.shmmin ||
-	    SCARG(uap, size) > shminfo.shmmax)
-		return EINVAL;
-	if (shm_nused >= shminfo.shmmni) /* any shmids left? */
-		return ENOSPC;
-	size = (SCARG(uap, size) + PGOFSET) & ~PGOFSET;
-	if (shm_committed + btoc(size) > shminfo.shmall)
-		return ENOMEM;
-	if (shm_last_free < 0) {
-		for (i = 0; i < shminfo.shmmni; i++)
-			if (shmsegs[i].shm_perm.mode & SHMSEG_FREE)
-				break;
-		if (i == shminfo.shmmni)
-			panic("shmseg free count inconsistent");
-		segnum = i;
-	} else  {
-		segnum = shm_last_free;
-		shm_last_free = -1;
-	}
-	shmseg = &shmsegs[segnum];
-	/*
-	 * In case we sleep in malloc(), mark the segment present but deleted
-	 * so that noone else tries to create the same key.
-	 */
-	shmseg->shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
-	shmseg->shm_perm._key = SCARG(uap, key);
-	shmseg->shm_perm._seq = (shmseg->shm_perm._seq + 1) & 0x7fff;
-	shmid = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
-
-	shmseg->_shm_internal = uao_create(size, 0);
-
-	shmseg->shm_perm.cuid = shmseg->shm_perm.uid = kauth_cred_geteuid(cred);
-	shmseg->shm_perm.cgid = shmseg->shm_perm.gid = kauth_cred_getegid(cred);
-	shmseg->shm_perm.mode = (shmseg->shm_perm.mode & SHMSEG_WANTED) |
-	    (mode & (ACCESSPERMS|SHMSEG_RMLINGER)) | SHMSEG_ALLOCATED;
-	shmseg->shm_segsz = SCARG(uap, size);
-	shmseg->shm_cpid = l->l_proc->p_pid;
-	shmseg->shm_lpid = shmseg->shm_nattch = 0;
-	shmseg->shm_atime = shmseg->shm_dtime = 0;
-	shmseg->shm_ctime = time_second;
-	shm_committed += btoc(size);
-	shm_nused++;
-
-	*retval = shmid;
-	if (shmseg->shm_perm.mode & SHMSEG_WANTED) {
-		/*
-		 * Somebody else wanted this key while we were asleep.  Wake
-		 * them up now.
-		 */
-		shmseg->shm_perm.mode &= ~SHMSEG_WANTED;
-		wakeup((void *)shmseg);
-	}
-
-	/* Lock the memory */
-	if (shm_use_phys) {
-		/* Wire the entire object */
-		error = uobj_wirepages(shmseg->_shm_internal, 0,
-		    round_page(shmseg->shm_segsz));
-		if (error) {
-			shm_deallocate_segment(shmseg);
-		} else {
-			/* Tag as wired */
-			shmseg->shm_perm.mode |= SHMSEG_WIRED;
-		}
-	}
-
-	return error;
-}
-
 int
 sys_shmget(struct lwp *l, const struct sys_shmget_args *uap, register_t *retval)
 {
@@ -636,30 +654,129 @@ sys_shmget(struct lwp *l, const struct sys_shmget_args *uap, register_t *retval)
 		syscallarg(int) size;
 		syscallarg(int) shmflg;
 	} */
-	int segnum, mode, error;
+	struct shmid_ds *shmseg;
+	kauth_cred_t cred = l->l_cred;
+	key_t key = SCARG(uap, key);
+	int error, mode, segnum, size;
+	bool lockmem;
 
 	mode = SCARG(uap, shmflg) & ACCESSPERMS;
 	if (SCARG(uap, shmflg) & _SHM_RMLINGER)
 		mode |= SHMSEG_RMLINGER;
 
-#ifdef SHMDEBUG
-	printf("shmget: key 0x%lx size 0x%x shmflg 0x%x mode 0x%x\n",
-        	SCARG(uap, key), SCARG(uap, size), SCARG(uap, shmflg), mode);
-#endif
+	SHMPRINTF(("shmget: key 0x%lx size 0x%x shmflg 0x%x mode 0x%x\n",
+	    SCARG(uap, key), SCARG(uap, size), SCARG(uap, shmflg), mode));
 
-	if (SCARG(uap, key) != IPC_PRIVATE) {
-again:
-		segnum = shm_find_segment_by_key(SCARG(uap, key));
-		if (segnum >= 0) {
-			error = shmget_existing(l, uap, mode, segnum, retval);
-			if (error == EAGAIN)
-				goto again;
+	mutex_enter(&shm_lock);
+	/* In case of reallocation, we will wait for completion */
+	while (__predict_false(shm_realloc_state))
+		cv_wait(&shm_realloc_cv, &shm_lock);
+
+	if (key != IPC_PRIVATE) {
+		error = shmget_existing(l, uap, mode, retval);
+		if (error != -1) {
+			mutex_exit(&shm_lock);
 			return error;
 		}
-		if ((SCARG(uap, shmflg) & IPC_CREAT) == 0)
+		if ((SCARG(uap, shmflg) & IPC_CREAT) == 0) {
+			mutex_exit(&shm_lock);
 			return ENOENT;
+		}
 	}
-	return shmget_allocate_segment(l, uap, mode, retval);
+	error = 0;
+
+	/*
+	 * Check the for the limits.
+	 */
+	size = SCARG(uap, size);
+	if (size < shminfo.shmmin || size > shminfo.shmmax) {
+		mutex_exit(&shm_lock);
+		return EINVAL;
+	}
+	if (shm_nused >= shminfo.shmmni) {
+		mutex_exit(&shm_lock);
+		return ENOSPC;
+	}
+	size = (size + PGOFSET) & ~PGOFSET;
+	if (shm_committed + btoc(size) > shminfo.shmall) {
+		mutex_exit(&shm_lock);
+		return ENOMEM;
+	}
+
+	/* Find the first available segment */
+	if (shm_last_free < 0) {
+		for (segnum = 0; segnum < shminfo.shmmni; segnum++)
+			if (shmsegs[segnum].shm_perm.mode & SHMSEG_FREE)
+				break;
+		KASSERT(segnum < shminfo.shmmni);
+	} else {
+		segnum = shm_last_free;
+		shm_last_free = -1;
+	}
+
+	/*
+	 * Initialize the segment.
+	 * We will drop the lock while allocating the memory, thus mark the
+	 * segment present, but removed, that no other thread could take it.
+	 * Also, disable reallocation, while lock is dropped.
+	 */
+	shmseg = &shmsegs[segnum];
+	shmseg->shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
+	shm_committed += btoc(size);
+	shm_nused++;
+	lockmem = shm_use_phys;
+	shm_realloc_disable++;
+	mutex_exit(&shm_lock);
+
+	/* Allocate the memory object and lock it if needed */
+	shmseg->_shm_internal = uao_create(size, 0);
+	if (lockmem) {
+		/* Wire the pages and tag it */
+		error = uobj_wirepages(shmseg->_shm_internal, 0,
+		    round_page(shmseg->shm_segsz));
+		if (error) {
+			mutex_enter(&shm_lock);
+			shm_free_segment(segnum);
+			shm_realloc_disable--;
+			mutex_exit(&shm_lock);
+			return error;
+		}
+	}
+
+	/*
+	 * Please note, while segment is marked, there are no need to hold the
+	 * lock, while setting it (except shm_perm.mode).
+	 */
+	shmseg->shm_perm._key = SCARG(uap, key);
+	shmseg->shm_perm._seq = (shmseg->shm_perm._seq + 1) & 0x7fff;
+	*retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
+
+	shmseg->shm_perm.cuid = shmseg->shm_perm.uid = kauth_cred_geteuid(cred);
+	shmseg->shm_perm.cgid = shmseg->shm_perm.gid = kauth_cred_getegid(cred);
+	shmseg->shm_segsz = SCARG(uap, size);
+	shmseg->shm_cpid = l->l_proc->p_pid;
+	shmseg->shm_lpid = shmseg->shm_nattch = 0;
+	shmseg->shm_atime = shmseg->shm_dtime = 0;
+	shmseg->shm_ctime = time_second;
+
+	/*
+	 * Segment is initialized.
+	 * Enter the lock, mark as allocated, and notify waiters (if any).
+	 * Also, unmark the state of reallocation.
+	 */
+	mutex_enter(&shm_lock);
+	shmseg->shm_perm.mode = (shmseg->shm_perm.mode & SHMSEG_WANTED) |
+	    (mode & (ACCESSPERMS | SHMSEG_RMLINGER)) |
+	    SHMSEG_ALLOCATED | (lockmem ? SHMSEG_WIRED : 0);
+	if (shmseg->shm_perm.mode & SHMSEG_WANTED) {
+		shmseg->shm_perm.mode &= ~SHMSEG_WANTED;
+		cv_broadcast(&shm_cv[segnum]);
+	}
+	shm_realloc_disable--;
+	cv_broadcast(&shm_realloc_cv);
+	mutex_exit(&shm_lock);
+
+	return error;
 }
 
 void
@@ -668,20 +785,16 @@ shmfork(struct vmspace *vm1, struct vmspace *vm2)
 	struct shmmap_state *shmmap_s;
 	struct shmmap_entry *shmmap_se;
 
+	SHMPRINTF(("shmfork %p->%p\n", vm1, vm2));
+	mutex_enter(&shm_lock);
 	vm2->vm_shm = vm1->vm_shm;
-
-	if (vm1->vm_shm == NULL)
-		return;
-
-#ifdef SHMDEBUG
-	printf("shmfork %p->%p\n", vm1, vm2);
-#endif
-
-	shmmap_s = (struct shmmap_state *)vm1->vm_shm;
-
-	SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next)
-		shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch++;
-	shmmap_s->nrefs++;
+	if (vm1->vm_shm) {
+		shmmap_s = (struct shmmap_state *)vm1->vm_shm;
+		SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next)
+			shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch++;
+		shmmap_s->nrefs++;
+	}
+	mutex_exit(&shm_lock);
 }
 
 void
@@ -689,56 +802,118 @@ shmexit(struct vmspace *vm)
 {
 	struct shmmap_state *shmmap_s;
 	struct shmmap_entry *shmmap_se;
+	struct uvm_object **uobj;
+	size_t *size;
+	u_int i, n;
 
+	SLIST_HEAD(, shmmap_entry) tmp_entries;
+
+	mutex_enter(&shm_lock);
 	shmmap_s = (struct shmmap_state *)vm->vm_shm;
-	if (shmmap_s == NULL)
+	if (shmmap_s == NULL) {
+		mutex_exit(&shm_lock);
 		return;
+	}
 
 	vm->vm_shm = NULL;
 
 	if (--shmmap_s->nrefs > 0) {
-#ifdef SHMDEBUG
-		printf("shmexit: vm %p drop ref (%d entries), now used by %d\n",
-		       vm, shmmap_s->nitems, shmmap_s->nrefs);
-#endif
+		SHMPRINTF(("shmexit: vm %p drop ref (%d entries), refs = %d\n",
+		    vm, shmmap_s->nitems, shmmap_s->nrefs));
 		SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next)
 			shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch--;
+		mutex_exit(&shm_lock);
 		return;
 	}
 
-#ifdef SHMDEBUG
-	printf("shmexit: vm %p cleanup (%d entries)\n", vm, shmmap_s->nitems);
-#endif
-	while (!SLIST_EMPTY(&shmmap_s->entries)) {
-		shmmap_se = SLIST_FIRST(&shmmap_s->entries);
-		shm_delete_mapping(vm, shmmap_s, shmmap_se);
+	KASSERT(shmmap_s->nrefs == 0);
+	n = shmmap_s->nitems;
+	SHMPRINTF(("shmexit: vm %p cleanup (%d entries)\n", vm, n));
+	mutex_exit(&shm_lock);
+	if (n == 0) {
+		kmem_free(shmmap_s, sizeof(struct shmmap_state));
+		return;
 	}
-	KASSERT(shmmap_s->nitems == 0);
-	free(shmmap_s, M_SHM);
+
+	/* Allocate the arrays */
+	SLIST_INIT(&tmp_entries);
+	uobj = kmem_zalloc(n * sizeof(void *), KM_SLEEP);
+	size = kmem_zalloc(n * sizeof(size_t), KM_SLEEP);
+
+	/* Delete the entry from shm map */
+	i = 0;
+	mutex_enter(&shm_lock);
+	while (!SLIST_EMPTY(&shmmap_s->entries)) {
+		struct shmid_ds *shmseg;
+
+		shmmap_se = SLIST_FIRST(&shmmap_s->entries);
+		shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)];
+		size[i] = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
+		uobj[i] = shm_delete_mapping(shmmap_s, shmmap_se);
+		SLIST_INSERT_HEAD(&tmp_entries, shmmap_se, next);
+		i++;
+	}
+	mutex_exit(&shm_lock);
+
+	/* Unmap all segments, free the entries */
+	i = 0;
+	while (!SLIST_EMPTY(&tmp_entries)) {
+		KASSERT(i < n);
+		shmmap_se = SLIST_FIRST(&tmp_entries);
+		SLIST_REMOVE(&tmp_entries, shmmap_se, shmmap_entry, next);
+		uvm_deallocate(&vm->vm_map, shmmap_se->va, size[i]);
+		if (uobj[i] != NULL)
+			uao_detach(uobj[i]);
+		pool_put(&shmmap_entry_pool, shmmap_se);
+		i++;
+	}
+
+	kmem_free(uobj, n * sizeof(void *));
+	kmem_free(size, n * sizeof(size_t));
+	kmem_free(shmmap_s, sizeof(struct shmmap_state));
 }
 
 static int
 shmrealloc(int newshmni)
 {
-	int i, sz;
+	int i, lsegid, sz;
 	vaddr_t v;
-	struct shmid_ds *newshmsegs;
+	struct shmid_ds *oldshmsegs, *newshmsegs;
+	kcondvar_t *newshm_cv;
 
-	/* XXX: Would be good to have a upper limit */
 	if (newshmni < 1)
 		return EINVAL;
 
-	/* We can't reallocate less memory than we use */
-	if (shm_nused > newshmni)
-		return EPERM;
-
 	/* Allocate new memory area */
-	sz = newshmni * sizeof(struct shmid_ds);
-	v = uvm_km_alloc(kernel_map, round_page(sz), 0, UVM_KMF_WIRED);
+	sz = ALIGN(newshmni * sizeof(struct shmid_ds)) +
+	    ALIGN(shminfo.shmmni * sizeof(kcondvar_t));
+	v = uvm_km_alloc(kernel_map, round_page(sz), 0,
+	    UVM_KMF_WIRED|UVM_KMF_ZERO);
 	if (v == 0)
 		return ENOMEM;
 
+	mutex_enter(&shm_lock);
+	while (shm_realloc_state || shm_realloc_disable)
+		cv_wait(&shm_realloc_cv, &shm_lock);
+
+	/*
+	 * Get the number of last segment.  Fail we are trying to
+	 * reallocate less memory than we use.
+	 * */
+	lsegid = 0;
+	for (i = 0; i < shminfo.shmmni; i++)
+		if ((shmsegs[i].shm_perm.mode & SHMSEG_FREE) == 0)
+			lsegid = i;
+	if (lsegid >= newshmni) {
+		mutex_exit(&shm_lock);
+		uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
+		return EBUSY;
+	}
+	shm_realloc_state = true;
+
 	newshmsegs = (void *)v;
+	newshm_cv = (void *)(ALIGN(newshmsegs) +
+	    newshmni * sizeof(kcondvar_t));
 
 	/* Copy all memory to the new area */
 	for (i = 0; i < shm_nused; i++)
@@ -747,14 +922,25 @@ shmrealloc(int newshmni)
 
 	/* Mark as free all new segments, if there is any */
 	for (; i < newshmni; i++) {
+		cv_init(&newshm_cv[i], "shmwait");
 		newshmsegs[i].shm_perm.mode = SHMSEG_FREE;
 		newshmsegs[i].shm_perm._seq = 0;
 	}
 
-	sz = shminfo.shmmni * sizeof(struct shmid_ds);
-	uvm_km_free(kernel_map, (vaddr_t)shmsegs, sz, UVM_KMF_WIRED);
-	shmsegs = newshmsegs;
+	oldshmsegs = shmsegs;
+	sz = ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)) +
+	    ALIGN(shminfo.shmmni * sizeof(kcondvar_t));
 
+	shminfo.shmmni = newshmni;
+	shmsegs = newshmsegs;
+	shm_cv = newshm_cv;
+
+	/* Reallocation completed - notify all waiters, if any */
+	shm_realloc_state = false;
+	cv_broadcast(&shm_realloc_cv);
+	mutex_exit(&shm_lock);
+
+	uvm_km_free(kernel_map, (vaddr_t)oldshmsegs, sz, UVM_KMF_WIRED);
 	return 0;
 }
 
@@ -765,23 +951,33 @@ shminit(void)
 	vaddr_t v;
 
 	mutex_init(&shm_lock, MUTEX_DEFAULT, IPL_NONE);
+	pool_init(&shmmap_entry_pool, sizeof(struct shmmap_entry), 0, 0, 0,
+	    "shmmp", &pool_allocator_nointr, IPL_NONE);
+	cv_init(&shm_realloc_cv, "shmrealc");
 
-	/* Allocate pageable memory for our structures */
-	sz = shminfo.shmmni * sizeof(struct shmid_ds);
-	v = uvm_km_alloc(kernel_map, round_page(sz), 0, UVM_KMF_WIRED);
+	/* Allocate the wired memory for our structures */
+	sz = ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)) +
+	    ALIGN(shminfo.shmmni * sizeof(kcondvar_t));
+	v = uvm_km_alloc(kernel_map, round_page(sz), 0,
+	    UVM_KMF_WIRED|UVM_KMF_ZERO);
 	if (v == 0)
 		panic("sysv_shm: cannot allocate memory");
 	shmsegs = (void *)v;
+	shm_cv = (void *)(ALIGN(shmsegs) +
+	    shminfo.shmmni * sizeof(kcondvar_t));
 
 	shminfo.shmmax *= PAGE_SIZE;
 
 	for (i = 0; i < shminfo.shmmni; i++) {
+		cv_init(&shm_cv[i], "shmwait");
 		shmsegs[i].shm_perm.mode = SHMSEG_FREE;
 		shmsegs[i].shm_perm._seq = 0;
 	}
 	shm_last_free = 0;
 	shm_nused = 0;
 	shm_committed = 0;
+	shm_realloc_disable = 0;
+	shm_realloc_state = false;
 }
 
 static int
@@ -797,13 +993,7 @@ sysctl_ipc_shmmni(SYSCTLFN_ARGS)
 	if (error || newp == NULL)
 		return error;
 
-	mutex_enter(&shm_lock);
-	error = shmrealloc(newsize);
-	if (error == 0)
-		shminfo.shmmni = newsize;
-	mutex_exit(&shm_lock);
-
-	return error;
+	return shmrealloc(newsize);
 }
 
 static int
@@ -813,12 +1003,12 @@ sysctl_ipc_shmmaxpgs(SYSCTLFN_ARGS)
 	struct sysctlnode node;
 	node = *rnode;
 	node.sysctl_data = &newsize;
+
 	newsize = shminfo.shmall;
 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
 	if (error || newp == NULL)
 		return error;
 
-	/* XXX: Would be good to have a upper limit */
 	if (newsize < 1)
 		return EINVAL;
 
@@ -830,47 +1020,42 @@ sysctl_ipc_shmmaxpgs(SYSCTLFN_ARGS)
 
 SYSCTL_SETUP(sysctl_ipc_shm_setup, "sysctl kern.ipc subtree setup")
 {
+
 	sysctl_createv(clog, 0, NULL, NULL,
 		CTLFLAG_PERMANENT,
 		CTLTYPE_NODE, "kern", NULL,
 		NULL, 0, NULL, 0,
 		CTL_KERN, CTL_EOL);
-
 	sysctl_createv(clog, 0, NULL, NULL,
 		CTLFLAG_PERMANENT,
 		CTLTYPE_NODE, "ipc",
 		SYSCTL_DESCR("SysV IPC options"),
 		NULL, 0, NULL, 0,
 		CTL_KERN, KERN_SYSVIPC, CTL_EOL);
-
 	sysctl_createv(clog, 0, NULL, NULL,
 		CTLFLAG_PERMANENT | CTLFLAG_READONLY,
 		CTLTYPE_INT, "shmmax",
 		SYSCTL_DESCR("Max shared memory segment size in bytes"),
 		NULL, 0, &shminfo.shmmax, 0,
 		CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAX, CTL_EOL);
-
 	sysctl_createv(clog, 0, NULL, NULL,
 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
 		CTLTYPE_INT, "shmmni",
 		SYSCTL_DESCR("Max number of shared memory identifiers"),
 		sysctl_ipc_shmmni, 0, &shminfo.shmmni, 0,
 		CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMNI, CTL_EOL);
-
 	sysctl_createv(clog, 0, NULL, NULL,
 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
 		CTLTYPE_INT, "shmseg",
 		SYSCTL_DESCR("Max shared memory segments per process"),
 		NULL, 0, &shminfo.shmseg, 0,
 		CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMSEG, CTL_EOL);
-
 	sysctl_createv(clog, 0, NULL, NULL,
 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
 		CTLTYPE_INT, "shmmaxpgs",
 		SYSCTL_DESCR("Max amount of shared memory in pages"),
 		sysctl_ipc_shmmaxpgs, 0, &shminfo.shmall, 0,
 		CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAXPGS, CTL_EOL);
-
 	sysctl_createv(clog, 0, NULL, NULL,
 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
 		CTLTYPE_INT, "shm_use_phys",
diff --git a/sys/kern/tty.c b/sys/kern/tty.c
index 2c9f7358947a..c0a3b1313207 100644
--- a/sys/kern/tty.c
+++ b/sys/kern/tty.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: tty.c,v 1.210 2007/12/31 21:11:13 ad Exp $	*/
+/*	$NetBSD: tty.c,v 1.211 2008/01/02 11:48:55 ad Exp $	*/
 
 /*-
  * Copyright (c) 1982, 1986, 1990, 1991, 1993
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tty.c,v 1.210 2007/12/31 21:11:13 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tty.c,v 1.211 2008/01/02 11:48:55 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -1905,6 +1905,7 @@ ttwrite(struct tty *tp, struct uio *uio, int flag)
 			goto loop;
 		}
 	}
+
 	/*
 	 * Hang the process if it's in the background.
 	 */
diff --git a/sys/kern/tty_ptm.c b/sys/kern/tty_ptm.c
index 42a0f8e0f6b0..f88354ce373d 100644
--- a/sys/kern/tty_ptm.c
+++ b/sys/kern/tty_ptm.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: tty_ptm.c,v 1.21 2007/11/26 19:02:05 pooka Exp $	*/
+/*	$NetBSD: tty_ptm.c,v 1.22 2008/01/02 11:48:55 ad Exp $	*/
 
 /*-
  * Copyright (c) 2004 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tty_ptm.c,v 1.21 2007/11/26 19:02:05 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tty_ptm.c,v 1.22 2008/01/02 11:48:55 ad Exp $");
 
 #include "opt_ptm.h"
 
@@ -224,10 +224,10 @@ pty_grant_slave(struct lwp *l, dev_t dev)
 			return error;
 		}
 	}
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	revoke = (vp->v_usecount > 1 || (vp->v_iflag & VI_ALIASED) ||
 	    (vp->v_iflag & VI_LAYER));
-	simple_unlock(&vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
 	VOP_UNLOCK(vp, 0);
 	if (revoke)
 		VOP_REVOKE(vp, REVOKEALL);
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 47a835a760a5..2018a2378dfc 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -1,4 +1,40 @@
-/*	$NetBSD: vfs_bio.c,v 1.182 2007/12/24 15:00:20 ad Exp $	*/
+/*	$NetBSD: vfs_bio.c,v 1.183 2008/01/02 11:48:55 ad Exp $	*/
+
+/*-
+ * Copyright (c) 2007 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Andrew Doran.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the NetBSD
+ *	Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
 
 /*-
  * Copyright (c) 1982, 1986, 1989, 1993
@@ -78,7 +114,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_bio.c,v 1.182 2007/12/24 15:00:20 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_bio.c,v 1.183 2008/01/02 11:48:55 ad Exp $");
 
 #include "fs_ffs.h"
 #include "opt_bufcache.h"
@@ -95,6 +131,8 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_bio.c,v 1.182 2007/12/24 15:00:20 ad Exp $");
 #include <sys/sysctl.h>
 #include <sys/conf.h>
 #include <sys/kauth.h>
+#include <sys/intr.h>
+#include <sys/cpu.h>
 
 #include <uvm/uvm.h>
 
@@ -123,21 +161,25 @@ static void buf_setwm(void);
 static int buf_trim(void);
 static void *bufpool_page_alloc(struct pool *, int);
 static void bufpool_page_free(struct pool *, void *);
-static inline struct buf *bio_doread(struct vnode *, daddr_t, int,
+static buf_t *bio_doread(struct vnode *, daddr_t, int,
     kauth_cred_t, int);
-static struct buf *getnewbuf(int, int, int);
+static buf_t *getnewbuf(int, int, int);
 static int buf_lotsfree(void);
 static int buf_canrelease(void);
-static inline u_long buf_mempoolidx(u_long);
-static inline u_long buf_roundsize(u_long);
-static inline void *buf_malloc(size_t);
+static u_long buf_mempoolidx(u_long);
+static u_long buf_roundsize(u_long);
+static void *buf_malloc(size_t);
 static void buf_mrelease(void *, size_t);
-static inline void binsheadfree(struct buf *, struct bqueue *);
-static inline void binstailfree(struct buf *, struct bqueue *);
+static void binsheadfree(buf_t *, struct bqueue *);
+static void binstailfree(buf_t *, struct bqueue *);
 int count_lock_queue(void); /* XXX */
 #ifdef DEBUG
-static int checkfreelist(struct buf *, struct bqueue *);
+static int checkfreelist(buf_t *, struct bqueue *);
 #endif
+static void biointr(void *);
+static void biodone2(buf_t *);
+static void bref(buf_t *);
+static void brele(buf_t *);
 
 /*
  * Definitions for the buffer hash lists.
@@ -146,14 +188,7 @@ static int checkfreelist(struct buf *, struct bqueue *);
 	(&bufhashtbl[(((long)(dvp) >> 8) + (int)(lbn)) & bufhash])
 LIST_HEAD(bufhashhdr, buf) *bufhashtbl, invalhash;
 u_long	bufhash;
-
-struct bio_ops *bioopsp;	/* can be overriden by ffs_softdep */
-
-/*
- * Insq/Remq for the buffer hash lists.
- */
-#define	binshash(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_hash)
-#define	bremhash(bp)		LIST_REMOVE(bp, b_hash)
+struct bio_ops *bioopsp;	/* I/O operation notification */
 
 /*
  * Definitions for the buffer free lists.
@@ -165,23 +200,25 @@ struct bio_ops *bioopsp;	/* can be overriden by ffs_softdep */
 #define	BQ_AGE		2		/* rubbish */
 
 struct bqueue {
-	TAILQ_HEAD(, buf) bq_queue;
-	uint64_t bq_bytes;
+	TAILQ_HEAD(, buf)	bq_queue;
+	uint64_t		bq_bytes;
+	buf_t			*bq_marker;
 } bufqueues[BQUEUES];
-int needbuffer;
+
+static kcondvar_t needbuffer_cv;
 
 /*
  * Buffer queue lock.
- * Take this lock first if also taking some buffer's b_interlock.
  */
-struct simplelock bqueue_slock = SIMPLELOCK_INITIALIZER;
+kmutex_t bufcache_lock;
+kmutex_t buffer_lock;
 
-/*
- * Buffer pools for I/O buffers.
- */
-static struct pool bufpool;
-static struct pool bufiopool;
+/* Software ISR for completed transfers. */
+static void *biodone_sih;
 
+/* Buffer pool for I/O buffers. */
+static pool_cache_t buf_cache;
+static pool_cache_t bufio_cache;
 
 /* XXX - somewhat gross.. */
 #if MAXBSIZE == 0x2000
@@ -231,10 +268,10 @@ static struct pool_allocator bufmempool_allocator = {
 };
 
 /* Buffer memory management variables */
-uint64_t bufmem_valimit;
-uint64_t bufmem_hiwater;
-uint64_t bufmem_lowater;
-uint64_t bufmem;
+u_long bufmem_valimit;
+u_long bufmem_hiwater;
+u_long bufmem_lowater;
+u_long bufmem;
 
 /*
  * MD code can call this to set a hard limit on the amount
@@ -269,14 +306,18 @@ buf_setwm(void)
 #ifdef DEBUG
 int debug_verify_freelist = 0;
 static int
-checkfreelist(struct buf *bp, struct bqueue *dp)
+checkfreelist(buf_t *bp, struct bqueue *dp)
 {
-	struct buf *b;
+	buf_t *b;
+
+	if (!debug_verify_freelist)
+		return 1;
 
 	TAILQ_FOREACH(b, &dp->bq_queue, b_freelist) {
 		if (b == bp)
 			return 1;
 	}
+
 	return 0;
 }
 #endif
@@ -285,8 +326,8 @@ checkfreelist(struct buf *bp, struct bqueue *dp)
  * Insq/Remq for the buffer hash lists.
  * Call with buffer queue locked.
  */
-static inline void
-binsheadfree(struct buf *bp, struct bqueue *dp)
+static void
+binsheadfree(buf_t *bp, struct bqueue *dp)
 {
 
 	KASSERT(bp->b_freelistindex == -1);
@@ -295,8 +336,8 @@ binsheadfree(struct buf *bp, struct bqueue *dp)
 	bp->b_freelistindex = dp - bufqueues;
 }
 
-static inline void
-binstailfree(struct buf *bp, struct bqueue *dp)
+static void
+binstailfree(buf_t *bp, struct bqueue *dp)
 {
 
 	KASSERT(bp->b_freelistindex == -1);
@@ -306,24 +347,61 @@ binstailfree(struct buf *bp, struct bqueue *dp)
 }
 
 void
-bremfree(struct buf *bp)
+bremfree(buf_t *bp)
 {
 	struct bqueue *dp;
 	int bqidx = bp->b_freelistindex;
 
-	LOCK_ASSERT(simple_lock_held(&bqueue_slock));
+	KASSERT(mutex_owned(&bufcache_lock));
 
 	KASSERT(bqidx != -1);
 	dp = &bufqueues[bqidx];
-	KDASSERT(!debug_verify_freelist || checkfreelist(bp, dp));
+	KDASSERT(checkfreelist(bp, dp));
 	KASSERT(dp->bq_bytes >= bp->b_bufsize);
 	TAILQ_REMOVE(&dp->bq_queue, bp, b_freelist);
 	dp->bq_bytes -= bp->b_bufsize;
+
+	/* For the sysctl helper. */
+	if (bp == dp->bq_marker)
+		dp->bq_marker = NULL;
+
 #if defined(DIAGNOSTIC)
 	bp->b_freelistindex = -1;
 #endif /* defined(DIAGNOSTIC) */
 }
 
+/*
+ * Add a reference to an buffer structure that came from buf_cache.
+ */
+static inline void
+bref(buf_t *bp)
+{
+
+	KASSERT(mutex_owned(&bufcache_lock));
+	KASSERT(bp->b_refcnt > 0);
+
+	bp->b_refcnt++;
+}
+
+/*
+ * Free an unused buffer structure that came from buf_cache.
+ */
+static inline void
+brele(buf_t *bp)
+{
+
+	KASSERT(mutex_owned(&bufcache_lock));
+	KASSERT(bp->b_refcnt > 0);
+
+	if (bp->b_refcnt-- == 1) {
+		buf_destroy(bp);
+#ifdef DEBUG
+		memset((char *)bp, 0, sizeof(*bp));
+#endif
+		pool_cache_put(buf_cache, bp);
+	}
+}
+
 u_long
 buf_memcalc(void)
 {
@@ -369,6 +447,10 @@ bufinit(void)
 	int use_std;
 	u_int i;
 
+	mutex_init(&bufcache_lock, MUTEX_DEFAULT, IPL_NONE);
+	mutex_init(&buffer_lock, MUTEX_DEFAULT, IPL_NONE);
+	cv_init(&needbuffer_cv, "needbuf");
+
 	/*
 	 * Initialize buffer cache memory parameters.
 	 */
@@ -395,10 +477,10 @@ bufinit(void)
 	use_std = 1;
 #endif
 
-	pool_init(&bufpool, sizeof(struct buf), 0, 0, 0, "bufpl",
-	    &pool_allocator_nointr, IPL_NONE);
-	pool_init(&bufiopool, sizeof(struct buf), 0, 0, 0, "biopl",
-	    NULL, IPL_BIO);
+	buf_cache = pool_cache_init(sizeof(buf_t), 0, 0, 0,
+	    "bufpl", NULL, IPL_SOFTBIO, NULL, NULL, NULL);
+	bufio_cache = pool_cache_init(sizeof(buf_t), 0, 0, 0,
+	    "biopl", NULL, IPL_BIO, NULL, NULL, NULL);
 
 	bufmempool_allocator.pa_backingmap = buf_map;
 	for (i = 0; i < NMEMPOOLS; i++) {
@@ -435,6 +517,16 @@ bufinit(void)
 	bufhashtbl = hashinit(nbuf, HASH_LIST, M_CACHE, M_WAITOK, &bufhash);
 }
 
+void
+bufinit2(void)
+{
+
+	biodone_sih = softint_establish(SOFTINT_BIO | SOFTINT_MPSAFE, biointr,
+	    NULL);
+	if (biodone_sih == NULL)
+		panic("bufinit2: can't establish soft interrupt");
+}
+
 static int
 buf_lotsfree(void)
 {
@@ -478,15 +570,14 @@ buf_lotsfree(void)
  * Return estimate of bytes we think need to be
  * released to help resolve low memory conditions.
  *
- * => called at splbio.
- * => called with bqueue_slock held.
+ * => called with bufcache_lock held.
  */
 static int
 buf_canrelease(void)
 {
 	int pagedemand, ninvalid = 0;
 
-	LOCK_ASSERT(simple_lock_held(&bqueue_slock));
+	KASSERT(mutex_owned(&bufcache_lock));
 
 	if (bufmem < bufmem_lowater)
 		return 0;
@@ -506,7 +597,7 @@ buf_canrelease(void)
 /*
  * Buffer memory allocation helper functions
  */
-static inline u_long
+static u_long
 buf_mempoolidx(u_long size)
 {
 	u_int n = 0;
@@ -522,19 +613,18 @@ buf_mempoolidx(u_long size)
 	return n;
 }
 
-static inline u_long
+static u_long
 buf_roundsize(u_long size)
 {
 	/* Round up to nearest power of 2 */
 	return (1 << (buf_mempoolidx(size) + MEMPOOL_INDEX_OFFSET));
 }
 
-static inline void *
+static void *
 buf_malloc(size_t size)
 {
 	u_int n = buf_mempoolidx(size);
 	void *addr;
-	int s;
 
 	while (1) {
 		addr = pool_get(&bmempools[n], PR_NOWAIT);
@@ -542,16 +632,20 @@ buf_malloc(size_t size)
 			break;
 
 		/* No memory, see if we can free some. If so, try again */
-		if (buf_drain(1) > 0)
+		mutex_enter(&bufcache_lock);
+		if (buf_drain(1) > 0) {
+			mutex_exit(&bufcache_lock);
 			continue;
+		}
+
+		if (curlwp == uvm.pagedaemon_lwp) {
+			mutex_exit(&bufcache_lock);
+			return NULL;
+		}
 
 		/* Wait for buffers to arrive on the LRU queue */
-		s = splbio();
-		simple_lock(&bqueue_slock);
-		needbuffer = 1;
-		ltsleep(&needbuffer, PNORELOCK | (PRIBIO + 1),
-			"buf_malloc", 0, &bqueue_slock);
-		splx(s);
+		cv_timedwait(&needbuffer_cv, &bufcache_lock, hz / 4);
+		mutex_exit(&bufcache_lock);
 	}
 
 	return addr;
@@ -567,11 +661,11 @@ buf_mrelease(void *addr, size_t size)
 /*
  * bread()/breadn() helper.
  */
-static inline struct buf *
+static buf_t *
 bio_doread(struct vnode *vp, daddr_t blkno, int size, kauth_cred_t cred,
     int async)
 {
-	struct buf *bp;
+	buf_t *bp;
 	struct mount *mp;
 
 	bp = getblk(vp, blkno, size, 0, 0);
@@ -584,10 +678,10 @@ bio_doread(struct vnode *vp, daddr_t blkno, int size, kauth_cred_t cred,
 
 	/*
 	 * If buffer does not have data valid, start a read.
-	 * Note that if buffer is B_INVAL, getblk() won't return it.
+	 * Note that if buffer is BC_INVAL, getblk() won't return it.
 	 * Therefore, it's valid if its I/O has completed or been delayed.
 	 */
-	if (!ISSET(bp->b_flags, (B_DONE | B_DELWRI))) {
+	if (!ISSET(bp->b_oflags, (BO_DONE | BO_DELWRI))) {
 		/* Start I/O for the buffer. */
 		SET(bp->b_flags, B_READ | async);
 		if (async)
@@ -598,9 +692,8 @@ bio_doread(struct vnode *vp, daddr_t blkno, int size, kauth_cred_t cred,
 
 		/* Pay for the read. */
 		curproc->p_stats->p_ru.ru_inblock++;
-	} else if (async) {
+	} else if (async)
 		brelse(bp, 0);
-	}
 
 	if (vp->v_type == VBLK)
 		mp = vp->v_specmountpoint;
@@ -628,9 +721,9 @@ bio_doread(struct vnode *vp, daddr_t blkno, int size, kauth_cred_t cred,
  */
 int
 bread(struct vnode *vp, daddr_t blkno, int size, kauth_cred_t cred,
-    struct buf **bpp)
+    buf_t **bpp)
 {
-	struct buf *bp;
+	buf_t *bp;
 
 	/* Get buffer for block. */
 	bp = *bpp = bio_doread(vp, blkno, size, cred, 0);
@@ -645,9 +738,9 @@ bread(struct vnode *vp, daddr_t blkno, int size, kauth_cred_t cred,
  */
 int
 breadn(struct vnode *vp, daddr_t blkno, int size, daddr_t *rablks,
-    int *rasizes, int nrablks, kauth_cred_t cred, struct buf **bpp)
+    int *rasizes, int nrablks, kauth_cred_t cred, buf_t **bpp)
 {
-	struct buf *bp;
+	buf_t *bp;
 	int i;
 
 	bp = *bpp = bio_doread(vp, blkno, size, cred, 0);
@@ -655,14 +748,18 @@ breadn(struct vnode *vp, daddr_t blkno, int size, daddr_t *rablks,
 	/*
 	 * For each of the read-ahead blocks, start a read, if necessary.
 	 */
+	mutex_enter(&bufcache_lock);
 	for (i = 0; i < nrablks; i++) {
 		/* If it's in the cache, just go on to next one. */
 		if (incore(vp, rablks[i]))
 			continue;
 
 		/* Get a buffer for the read-ahead block */
+		mutex_exit(&bufcache_lock);
 		(void) bio_doread(vp, rablks[i], rasizes[i], cred, B_ASYNC);
+		mutex_enter(&bufcache_lock);
 	}
+	mutex_exit(&bufcache_lock);
 
 	/* Otherwise, we had to start a read for it; wait until it's valid. */
 	return (biowait(bp));
@@ -675,7 +772,7 @@ breadn(struct vnode *vp, daddr_t blkno, int size, daddr_t *rablks,
  */
 int
 breada(struct vnode *vp, daddr_t blkno, int size, daddr_t rablkno,
-    int rabsize, kauth_cred_t cred, struct buf **bpp)
+    int rabsize, kauth_cred_t cred, buf_t **bpp)
 {
 
 	return (breadn(vp, blkno, size, &rablkno, &rabsize, 1, cred, bpp));
@@ -685,16 +782,17 @@ breada(struct vnode *vp, daddr_t blkno, int size, daddr_t rablkno,
  * Block write.  Described in Bach (p.56)
  */
 int
-bwrite(struct buf *bp)
+bwrite(buf_t *bp)
 {
-	int rv, sync, wasdelayed, s;
+	int rv, sync, wasdelayed;
 	struct vnode *vp;
 	struct mount *mp;
 
-	KASSERT(ISSET(bp->b_flags, B_BUSY));
+	KASSERT(ISSET(bp->b_cflags, BC_BUSY));
 
 	vp = bp->b_vp;
 	if (vp != NULL) {
+		KASSERT(bp->b_objlock == &vp->v_interlock);
 		if (vp->v_type == VBLK)
 			mp = vp->v_specmountpoint;
 		else
@@ -728,28 +826,24 @@ bwrite(struct buf *bp)
 			mp->mnt_stat.f_asyncwrites++;
 	}
 
-	s = splbio();
-	simple_lock(&bp->b_interlock);
-
-	wasdelayed = ISSET(bp->b_flags, B_DELWRI);
-
-	CLR(bp->b_flags, (B_READ | B_DONE | B_DELWRI));
-	bp->b_error = 0;
-
 	/*
 	 * Pay for the I/O operation and make sure the buf is on the correct
 	 * vnode queue.
 	 */
+	CLR(bp->b_flags, B_READ);
+	mutex_enter(bp->b_objlock);
+	wasdelayed = ISSET(bp->b_oflags, BO_DELWRI);
+	CLR(bp->b_oflags, BO_DONE | BO_DELWRI);
+	bp->b_error = 0;
 	if (wasdelayed)
 		reassignbuf(bp, bp->b_vp);
 	else
 		curproc->p_stats->p_ru.ru_oublock++;
+	if (vp != NULL)
+		vp->v_numoutput++;
+	mutex_exit(bp->b_objlock);
 
-	/* Initiate disk write.  Make sure the appropriate party is charged. */
-	V_INCR_NUMOUTPUT(bp->b_vp);
-	simple_unlock(&bp->b_interlock);
-	splx(s);
-
+	/* Initiate disk write. */
 	if (sync)
 		BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
 	else
@@ -792,9 +886,10 @@ vn_bwrite(void *v)
  * Described in Leffler, et al. (pp. 208-213).
  */
 void
-bdwrite(struct buf *bp)
+bdwrite(buf_t *bp)
 {
-	int s;
+
+	KASSERT(ISSET(bp->b_cflags, BC_BUSY));
 
 	/* If this is a tape block, write the block now. */
 	if (bdev_type(bp->b_dev) == D_TAPE) {
@@ -808,21 +903,17 @@ bdwrite(struct buf *bp)
 	 *	(2) Charge for the write,
 	 *	(3) Make sure it's on its vnode's correct block list.
 	 */
-	s = splbio();
-	simple_lock(&bp->b_interlock);
+	KASSERT(bp->b_vp == NULL || bp->b_objlock == &bp->b_vp->v_interlock);
 
-	KASSERT(ISSET(bp->b_flags, B_BUSY));
-
-	if (!ISSET(bp->b_flags, B_DELWRI)) {
-		SET(bp->b_flags, B_DELWRI);
+	mutex_enter(bp->b_objlock);
+	if (!ISSET(bp->b_oflags, BO_DELWRI)) {
+		SET(bp->b_oflags, BO_DELWRI);
 		curproc->p_stats->p_ru.ru_oublock++;
 		reassignbuf(bp, bp->b_vp);
 	}
-
 	/* Otherwise, the "write" is done, so mark and release the buffer. */
-	CLR(bp->b_flags, B_DONE);
-	simple_unlock(&bp->b_interlock);
-	splx(s);
+	CLR(bp->b_oflags, BO_DONE);
+	mutex_exit(bp->b_objlock);
 
 	brelse(bp, 0);
 }
@@ -831,72 +922,64 @@ bdwrite(struct buf *bp)
  * Asynchronous block write; just an asynchronous bwrite().
  */
 void
-bawrite(struct buf *bp)
+bawrite(buf_t *bp)
 {
-	int s;
 
-	s = splbio();
-	simple_lock(&bp->b_interlock);
-
-	KASSERT(ISSET(bp->b_flags, B_BUSY));
+	KASSERT(ISSET(bp->b_cflags, BC_BUSY));
 
 	SET(bp->b_flags, B_ASYNC);
-	simple_unlock(&bp->b_interlock);
-	splx(s);
 	VOP_BWRITE(bp);
 }
 
 /*
  * Same as first half of bdwrite, mark buffer dirty, but do not release it.
- * Call at splbio() and with the buffer interlock locked.
- * Note: called only from biodone() through ffs softdep's bioopsp->io_complete()
+ * Call with the buffer interlock held.
+ *
+ * Note: called only from biodone() through ffs softdep's io_complete()
  */
 void
-bdirty(struct buf *bp)
+bdirty(buf_t *bp)
 {
 
-	LOCK_ASSERT(simple_lock_held(&bp->b_interlock));
-	KASSERT(ISSET(bp->b_flags, B_BUSY));
+	KASSERT(mutex_owned(&bufcache_lock));
+	KASSERT(bp->b_objlock == &bp->b_vp->v_interlock);
+	KASSERT(mutex_owned(bp->b_objlock));
+	KASSERT(ISSET(bp->b_cflags, BC_BUSY));
 
-	CLR(bp->b_flags, B_AGE);
+	CLR(bp->b_cflags, BC_AGE);
 
-	if (!ISSET(bp->b_flags, B_DELWRI)) {
-		SET(bp->b_flags, B_DELWRI);
+	if (!ISSET(bp->b_oflags, BO_DELWRI)) {
+		SET(bp->b_oflags, BO_DELWRI);
 		curproc->p_stats->p_ru.ru_oublock++;
 		reassignbuf(bp, bp->b_vp);
 	}
 }
 
+
 /*
  * Release a buffer on to the free lists.
  * Described in Bach (p. 46).
  */
 void
-brelse(struct buf *bp, int set)
+brelsel(buf_t *bp, int set)
 {
 	struct bqueue *bufq;
-	int s;
+	struct vnode *vp;
 
-	/* Block disk interrupts. */
-	s = splbio();
-	simple_lock(&bqueue_slock);
-	simple_lock(&bp->b_interlock);
+	KASSERT(mutex_owned(&bufcache_lock));
 
-	bp->b_flags |= set;
+	SET(bp->b_cflags, set);
 
-	KASSERT(ISSET(bp->b_flags, B_BUSY));
-	KASSERT(!ISSET(bp->b_flags, B_CALL));
+	KASSERT(ISSET(bp->b_cflags, BC_BUSY));
+	KASSERT(bp->b_iodone == NULL);
 
 	/* Wake up any processes waiting for any buffer to become free. */
-	if (needbuffer) {
-		needbuffer = 0;
-		wakeup(&needbuffer);
-	}
+	cv_signal(&needbuffer_cv);
 
-	/* Wake up any proceeses waiting for _this_ buffer to become free. */
-	if (ISSET(bp->b_flags, B_WANTED)) {
-		CLR(bp->b_flags, B_WANTED|B_AGE);
-		wakeup(bp);
+	/* Wake up any proceeses waiting for _this_ buffer to become */
+	if (ISSET(bp->b_cflags, BC_WANTED) != 0) {
+		CLR(bp->b_cflags, BC_WANTED|BC_AGE);
+		cv_broadcast(&bp->b_busy);
 	}
 
 	/*
@@ -904,46 +987,54 @@ brelse(struct buf *bp, int set)
 	 */
 
 	/* If it's locked, don't report an error; try again later. */
-	if (ISSET(bp->b_flags, B_LOCKED) && bp->b_error != 0)
+	if (ISSET(bp->b_cflags, BC_LOCKED))
 		bp->b_error = 0;
 
 	/* If it's not cacheable, or an error, mark it invalid. */
-	if (ISSET(bp->b_flags, B_NOCACHE) || bp->b_error != 0)
-		SET(bp->b_flags, B_INVAL);
+	if (ISSET(bp->b_cflags, BC_NOCACHE) || bp->b_error != 0)
+		SET(bp->b_cflags, BC_INVAL);
 
-	if (ISSET(bp->b_flags, B_VFLUSH)) {
+	if (ISSET(bp->b_cflags, BC_VFLUSH)) {
 		/*
 		 * This is a delayed write buffer that was just flushed to
 		 * disk.  It is still on the LRU queue.  If it's become
 		 * invalid, then we need to move it to a different queue;
 		 * otherwise leave it in its current position.
 		 */
-		CLR(bp->b_flags, B_VFLUSH);
-		if (!ISSET(bp->b_flags, B_INVAL|B_LOCKED|B_AGE) &&
+		CLR(bp->b_cflags, BC_VFLUSH);
+		if (!ISSET(bp->b_cflags, BC_INVAL|BC_LOCKED|BC_AGE) &&
 		    bp->b_error == 0) {
-			KDASSERT(!debug_verify_freelist || checkfreelist(bp, &bufqueues[BQ_LRU]));
+			KDASSERT(checkfreelist(bp, &bufqueues[BQ_LRU]));
 			goto already_queued;
 		} else {
 			bremfree(bp);
 		}
 	}
 
-  KDASSERT(!debug_verify_freelist || !checkfreelist(bp, &bufqueues[BQ_AGE]));
-  KDASSERT(!debug_verify_freelist || !checkfreelist(bp, &bufqueues[BQ_LRU]));
-  KDASSERT(!debug_verify_freelist || !checkfreelist(bp, &bufqueues[BQ_LOCKED]));
+	KDASSERT(checkfreelist(bp, &bufqueues[BQ_AGE]));
+	KDASSERT(checkfreelist(bp, &bufqueues[BQ_LRU]));
+	KDASSERT(checkfreelist(bp, &bufqueues[BQ_LOCKED]));
 
-	if ((bp->b_bufsize <= 0) || ISSET(bp->b_flags, B_INVAL)) {
+	if ((bp->b_bufsize <= 0) || ISSET(bp->b_cflags, BC_INVAL)) {
 		/*
 		 * If it's invalid or empty, dissociate it from its vnode
 		 * and put on the head of the appropriate queue.
 		 */
-		if (LIST_FIRST(&bp->b_dep) != NULL && bioopsp)
-			bioopsp->io_deallocate(bp);
-		CLR(bp->b_flags, B_DONE|B_DELWRI);
-		if (bp->b_vp) {
+		if (bioopsp != NULL)
+			(*bioopsp->io_deallocate)(bp);
+
+		mutex_enter(bp->b_objlock);
+		CLR(bp->b_oflags, BO_DONE|BO_DELWRI);
+		if ((vp = bp->b_vp) != NULL) {
+			KASSERT(bp->b_objlock == &vp->v_interlock);
 			reassignbuf(bp, bp->b_vp);
 			brelvp(bp);
+			mutex_exit(&vp->v_interlock);
+		} else {
+			KASSERT(bp->b_objlock == &buffer_lock);
+			mutex_exit(bp->b_objlock);
 		}
+
 		if (bp->b_bufsize <= 0)
 			/* no data */
 			goto already_queued;
@@ -951,7 +1042,7 @@ brelse(struct buf *bp, int set)
 			/* invalid data */
 			bufq = &bufqueues[BQ_AGE];
 		binsheadfree(bp, bufq);
-	} else {
+	} else  {
 		/*
 		 * It has valid data.  Put it on the end of the appropriate
 		 * queue, so that it'll stick around for as long as possible.
@@ -960,18 +1051,18 @@ brelse(struct buf *bp, int set)
 		 * livelock where BQ_AGE only has buffers with dependencies,
 		 * and we thus never get to the dependent buffers in BQ_LRU.
 		 */
-		if (ISSET(bp->b_flags, B_LOCKED))
+		if (ISSET(bp->b_cflags, BC_LOCKED)) {
 			/* locked in core */
 			bufq = &bufqueues[BQ_LOCKED];
-		else if (!ISSET(bp->b_flags, B_AGE))
+		} else if (!ISSET(bp->b_cflags, BC_AGE)) {
 			/* valid data */
 			bufq = &bufqueues[BQ_LRU];
-		else {
+		} else {
 			/* stale but valid data */
 			int has_deps;
 
-			if (LIST_FIRST(&bp->b_dep) != NULL && bioopsp)
-				has_deps = bioopsp->io_countdeps(bp, 0);
+			if (bioopsp != NULL)
+				has_deps = (*bioopsp->io_countdeps)(bp, 0);
 			else
 				has_deps = 0;
 			bufq = has_deps ? &bufqueues[BQ_LRU] :
@@ -979,22 +1070,22 @@ brelse(struct buf *bp, int set)
 		}
 		binstailfree(bp, bufq);
 	}
-
 already_queued:
 	/* Unlock the buffer. */
-	CLR(bp->b_flags, B_AGE|B_ASYNC|B_BUSY|B_NOCACHE);
-	SET(bp->b_flags, B_CACHE);
+	CLR(bp->b_cflags, BC_AGE|BC_BUSY|BC_NOCACHE);
+	CLR(bp->b_flags, B_ASYNC);
 
-	/* Allow disk interrupts. */
-	simple_unlock(&bp->b_interlock);
-	simple_unlock(&bqueue_slock);
-	splx(s);
-	if (bp->b_bufsize <= 0) {
-#ifdef DEBUG
-		memset((char *)bp, 0, sizeof(*bp));
-#endif
-		pool_put(&bufpool, bp);
-	}
+	if (bp->b_bufsize <= 0)
+		brele(bp);
+}
+
+void
+brelse(buf_t *bp, int set)
+{
+
+	mutex_enter(&bufcache_lock);
+	brelsel(bp, set);
+	mutex_exit(&bufcache_lock);
 }
 
 /*
@@ -1004,16 +1095,20 @@ already_queued:
  * we normally don't return the buffer, unless the caller explicitly
  * wants us to.
  */
-struct buf *
+buf_t *
 incore(struct vnode *vp, daddr_t blkno)
 {
-	struct buf *bp;
+	buf_t *bp;
+
+	KASSERT(mutex_owned(&bufcache_lock));
 
 	/* Search hash chain */
 	LIST_FOREACH(bp, BUFHASH(vp, blkno), b_hash) {
 		if (bp->b_lblkno == blkno && bp->b_vp == vp &&
-		    !ISSET(bp->b_flags, B_INVAL))
-		return (bp);
+		    !ISSET(bp->b_cflags, BC_INVAL)) {
+		    	KASSERT(bp->b_objlock == &vp->v_interlock);
+		    	return (bp);
+		}
 	}
 
 	return (NULL);
@@ -1027,65 +1122,63 @@ incore(struct vnode *vp, daddr_t blkno)
  * correct size. It is up to the caller to insure that the
  * cached blocks be of the correct size.
  */
-struct buf *
+buf_t *
 getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo)
 {
-	struct buf *bp;
-	int s, err;
-	int preserve;
+	int err, preserve;
+	buf_t *bp;
 
-start:
-	s = splbio();
-	simple_lock(&bqueue_slock);
+	mutex_enter(&bufcache_lock);
+ loop:
 	bp = incore(vp, blkno);
 	if (bp != NULL) {
-		simple_lock(&bp->b_interlock);
-		if (ISSET(bp->b_flags, B_BUSY)) {
-			simple_unlock(&bqueue_slock);
-			if (curlwp == uvm.pagedaemon_lwp) {
-				simple_unlock(&bp->b_interlock);
-				splx(s);
-				return NULL;
-			}
-			SET(bp->b_flags, B_WANTED);
-			err = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK,
-					"getblk", slptimeo, &bp->b_interlock);
-			splx(s);
-			if (err)
-				return (NULL);
-			goto start;
+		err = bbusy(bp, ((slpflag & PCATCH) != 0), slptimeo);
+		if (err != 0) {
+			if (err == EPASSTHROUGH)
+				goto loop;
+			mutex_exit(&bufcache_lock);
+			return (NULL);
 		}
 #ifdef DIAGNOSTIC
-		if (ISSET(bp->b_flags, B_DONE|B_DELWRI) &&
+		if (ISSET(bp->b_oflags, BO_DONE|BO_DELWRI) &&
 		    bp->b_bcount < size && vp->v_type != VBLK)
 			panic("getblk: block size invariant failed");
 #endif
-		SET(bp->b_flags, B_BUSY);
 		bremfree(bp);
 		preserve = 1;
 	} else {
-		if ((bp = getnewbuf(slpflag, slptimeo, 0)) == NULL) {
-			simple_unlock(&bqueue_slock);
-			splx(s);
-			goto start;
+		if ((bp = getnewbuf(slpflag, slptimeo, 0)) == NULL)
+			goto loop;
+
+		if (incore(vp, blkno) != NULL) {
+			/* The block has come into memory in the meantime. */
+			brelsel(bp, 0);
+			goto loop;
 		}
 
-		binshash(bp, BUFHASH(vp, blkno));
+		LIST_INSERT_HEAD(BUFHASH(vp, blkno), bp, b_hash);
 		bp->b_blkno = bp->b_lblkno = bp->b_rawblkno = blkno;
+		mutex_enter(&vp->v_interlock);
 		bgetvp(vp, bp);
+		mutex_exit(&vp->v_interlock);
 		preserve = 0;
 	}
-	simple_unlock(&bp->b_interlock);
-	simple_unlock(&bqueue_slock);
-	splx(s);
+	mutex_exit(&bufcache_lock);
+
 	/*
-	 * LFS can't track total size of B_LOCKED buffer (locked_queue_bytes)
+	 * LFS can't track total size of BC_LOCKED buffer (locked_queue_bytes)
 	 * if we re-size buffers here.
 	 */
-	if (ISSET(bp->b_flags, B_LOCKED)) {
+	if (ISSET(bp->b_cflags, BC_LOCKED)) {
 		KASSERT(bp->b_bufsize >= size);
 	} else {
-		allocbuf(bp, size, preserve);
+		if (allocbuf(bp, size, preserve)) {
+			mutex_enter(&bufcache_lock);
+			LIST_REMOVE(bp, b_hash);
+			mutex_exit(&bufcache_lock);
+			brelse(bp, BC_INVAL);
+			return NULL;
+		}
 	}
 	BIO_SETPRIO(bp, BPRIO_DEFAULT);
 	return (bp);
@@ -1094,24 +1187,22 @@ start:
 /*
  * Get an empty, disassociated buffer of given size.
  */
-struct buf *
+buf_t *
 geteblk(int size)
 {
-	struct buf *bp;
-	int s;
+	buf_t *bp;
+	int error;
 
-	s = splbio();
-	simple_lock(&bqueue_slock);
-	while ((bp = getnewbuf(0, 0, 0)) == 0)
+	mutex_enter(&bufcache_lock);
+	while ((bp = getnewbuf(0, 0, 0)) == NULL)
 		;
 
-	SET(bp->b_flags, B_INVAL);
-	binshash(bp, &invalhash);
-	simple_unlock(&bqueue_slock);
-	simple_unlock(&bp->b_interlock);
-	splx(s);
+	SET(bp->b_cflags, BC_INVAL);
+	LIST_INSERT_HEAD(&invalhash, bp, b_hash);
+	mutex_exit(&bufcache_lock);
 	BIO_SETPRIO(bp, BPRIO_DEFAULT);
-	allocbuf(bp, size, 0);
+	error = allocbuf(bp, size, 0);
+	KASSERT(error == 0);
 	return (bp);
 }
 
@@ -1123,12 +1214,12 @@ geteblk(int size)
  * start a write.  If the buffer grows, it's the callers
  * responsibility to fill out the buffer's additional contents.
  */
-void
-allocbuf(struct buf *bp, int size, int preserve)
+int
+allocbuf(buf_t *bp, int size, int preserve)
 {
 	vsize_t oldsize, desired_size;
 	void *addr;
-	int s, delta;
+	int delta;
 
 	desired_size = buf_roundsize(size);
 	if (desired_size > MAXBSIZE)
@@ -1138,13 +1229,15 @@ allocbuf(struct buf *bp, int size, int preserve)
 
 	oldsize = bp->b_bufsize;
 	if (oldsize == desired_size)
-		return;
+		return 0;
 
 	/*
 	 * If we want a buffer of a different size, re-allocate the
 	 * buffer's memory; copy old content only if needed.
 	 */
 	addr = buf_malloc(desired_size);
+	if (addr == NULL)
+		return ENOMEM;
 	if (preserve)
 		memcpy(addr, bp->b_data, MIN(oldsize,desired_size));
 	if (bp->b_data != NULL)
@@ -1153,12 +1246,11 @@ allocbuf(struct buf *bp, int size, int preserve)
 	bp->b_bufsize = desired_size;
 
 	/*
-	 * Update overall buffer memory counter (protected by bqueue_slock)
+	 * Update overall buffer memory counter (protected by bufcache_lock)
 	 */
 	delta = (long)desired_size - (long)oldsize;
 
-	s = splbio();
-	simple_lock(&bqueue_slock);
+	mutex_enter(&bufcache_lock);
 	if ((bufmem += delta) > bufmem_hiwater) {
 		/*
 		 * Need to trim overall memory usage.
@@ -1166,20 +1258,16 @@ allocbuf(struct buf *bp, int size, int preserve)
 		while (buf_canrelease()) {
 			if (curcpu()->ci_schedstate.spc_flags &
 			    SPCF_SHOULDYIELD) {
-				simple_unlock(&bqueue_slock);
-				splx(s);
+				mutex_exit(&bufcache_lock);
 				preempt();
-				s = splbio();
-				simple_lock(&bqueue_slock);
+				mutex_enter(&bufcache_lock);
 			}
-
 			if (buf_trim() == 0)
 				break;
 		}
 	}
-
-	simple_unlock(&bqueue_slock);
-	splx(s);
+	mutex_exit(&bufcache_lock);
+	return 0;
 }
 
 /*
@@ -1187,38 +1275,42 @@ allocbuf(struct buf *bp, int size, int preserve)
  * Select something from a free list.
  * Preference is to AGE list, then LRU list.
  *
- * Called at splbio and with buffer queues locked.
+ * Called with the buffer queues locked.
  * Return buffer locked.
  */
-struct buf *
+buf_t *
 getnewbuf(int slpflag, int slptimeo, int from_bufq)
 {
-	struct buf *bp;
+	buf_t *bp;
+	struct vnode *vp;
 
-start:
-	LOCK_ASSERT(simple_lock_held(&bqueue_slock));
+ start:
+	KASSERT(mutex_owned(&bufcache_lock));
 
 	/*
-	 * Get a new buffer from the pool; but use NOWAIT because
-	 * we have the buffer queues locked.
+	 * Get a new buffer from the pool.
 	 */
-	if (!from_bufq && buf_lotsfree() &&
-	    (bp = pool_get(&bufpool, PR_NOWAIT)) != NULL) {
-		memset((char *)bp, 0, sizeof(*bp));
-		BUF_INIT(bp);
-		bp->b_dev = NODEV;
-		bp->b_vnbufs.le_next = NOLIST;
-		bp->b_flags = B_BUSY;
-		simple_lock(&bp->b_interlock);
+	if (!from_bufq && buf_lotsfree()) {
+		mutex_exit(&bufcache_lock);
+		bp = pool_cache_get(buf_cache, PR_NOWAIT);
+		if (bp != NULL) {
+			memset((char *)bp, 0, sizeof(*bp));
+			buf_init(bp);
+			bp->b_dev = NODEV;
+			bp->b_vnbufs.le_next = NOLIST;
+			bp->b_cflags = BC_BUSY;
+			bp->b_refcnt = 1;
+			mutex_enter(&bufcache_lock);
 #if defined(DIAGNOSTIC)
-		bp->b_freelistindex = -1;
+			bp->b_freelistindex = -1;
 #endif /* defined(DIAGNOSTIC) */
-		return (bp);
+			return (bp);
+		}
+		mutex_enter(&bufcache_lock);
 	}
 
 	if ((bp = TAILQ_FIRST(&bufqueues[BQ_AGE].bq_queue)) != NULL ||
 	    (bp = TAILQ_FIRST(&bufqueues[BQ_LRU].bq_queue)) != NULL) {
-		simple_lock(&bp->b_interlock);
 		bremfree(bp);
 	} else {
 		/*
@@ -1226,9 +1318,12 @@ start:
 		 */
 		if (!from_bufq || curlwp != uvm.pagedaemon_lwp) {
 			/* wait for a free buffer of any kind */
-			needbuffer = 1;
-			ltsleep(&needbuffer, slpflag|(PRIBIO + 1),
-			    "getnewbuf", slptimeo, &bqueue_slock);
+			if ((slpflag & PCATCH) != 0)
+				(void)cv_timedwait_sig(&needbuffer_cv,
+				    &bufcache_lock, slptimeo);
+			else
+				(void)cv_timedwait(&needbuffer_cv,
+				    &bufcache_lock, slptimeo);
 		}
 		return (NULL);
 	}
@@ -1238,95 +1333,100 @@ start:
 		panic("buffer %p: on queue but empty", bp);
 #endif
 
-	if (ISSET(bp->b_flags, B_VFLUSH)) {
+	if (ISSET(bp->b_cflags, BC_VFLUSH)) {
 		/*
 		 * This is a delayed write buffer being flushed to disk.  Make
 		 * sure it gets aged out of the queue when it's finished, and
 		 * leave it off the LRU queue.
 		 */
-		CLR(bp->b_flags, B_VFLUSH);
-		SET(bp->b_flags, B_AGE);
-		simple_unlock(&bp->b_interlock);
+		CLR(bp->b_cflags, BC_VFLUSH);
+		SET(bp->b_cflags, BC_AGE);
 		goto start;
 	}
 
 	/* Buffer is no longer on free lists. */
-	SET(bp->b_flags, B_BUSY);
+	SET(bp->b_cflags, BC_BUSY);
 
 	/*
 	 * If buffer was a delayed write, start it and return NULL
 	 * (since we might sleep while starting the write).
 	 */
-	if (ISSET(bp->b_flags, B_DELWRI)) {
+	if (ISSET(bp->b_oflags, BO_DELWRI)) {
 		/*
 		 * This buffer has gone through the LRU, so make sure it gets
 		 * reused ASAP.
 		 */
-		SET(bp->b_flags, B_AGE);
-		simple_unlock(&bp->b_interlock);
-		simple_unlock(&bqueue_slock);
+		SET(bp->b_cflags, BC_AGE);
+		mutex_exit(&bufcache_lock);
 		bawrite(bp);
-		simple_lock(&bqueue_slock);
+		mutex_enter(&bufcache_lock);
 		return (NULL);
 	}
 
-	/* disassociate us from our vnode, if we had one... */
-	if (bp->b_vp)
-		brelvp(bp);
-
-	if (LIST_FIRST(&bp->b_dep) != NULL && bioopsp)
-		bioopsp->io_deallocate(bp);
+	vp = bp->b_vp;
+	if (bioopsp != NULL)
+		(*bioopsp->io_deallocate)(bp);
 
 	/* clear out various other fields */
-	bp->b_flags = B_BUSY;
+	bp->b_cflags = BC_BUSY;
+	bp->b_oflags = 0;
+	bp->b_flags = 0;
 	bp->b_dev = NODEV;
-	bp->b_blkno = bp->b_lblkno = bp->b_rawblkno = 0;
+	bp->b_blkno = 0;
+	bp->b_lblkno = 0;
+	bp->b_rawblkno = 0;
 	bp->b_iodone = 0;
 	bp->b_error = 0;
 	bp->b_resid = 0;
 	bp->b_bcount = 0;
 
-	bremhash(bp);
+	LIST_REMOVE(bp, b_hash);
+
+	/* Disassociate us from our vnode, if we had one... */
+	if (vp != NULL) {
+		mutex_enter(&vp->v_interlock);
+		brelvp(bp);
+		mutex_exit(&vp->v_interlock);
+	}
+
 	return (bp);
 }
 
 /*
  * Attempt to free an aged buffer off the queues.
- * Called at splbio and with queue lock held.
+ * Called with queue lock held.
  * Returns the amount of buffer memory freed.
  */
 static int
 buf_trim(void)
 {
-	struct buf *bp;
+	buf_t *bp;
 	long size = 0;
 
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	/* Instruct getnewbuf() to get buffers off the queues */
 	if ((bp = getnewbuf(PCATCH, 1, 1)) == NULL)
 		return 0;
 
-	KASSERT(!ISSET(bp->b_flags, B_WANTED));
-	simple_unlock(&bp->b_interlock);
+	KASSERT((bp->b_cflags & BC_WANTED) == 0);
 	size = bp->b_bufsize;
 	bufmem -= size;
-	simple_unlock(&bqueue_slock);
 	if (size > 0) {
 		buf_mrelease(bp->b_data, size);
 		bp->b_bcount = bp->b_bufsize = 0;
 	}
 	/* brelse() will return the buffer to the global buffer pool */
-	brelse(bp, 0);
-	simple_lock(&bqueue_slock);
+	brelsel(bp, 0);
 	return size;
 }
 
 int
 buf_drain(int n)
 {
-	int s, size = 0, sz;
+	int size = 0, sz;
 
-	s = splbio();
-	simple_lock(&bqueue_slock);
+	KASSERT(mutex_owned(&bufcache_lock));
 
 	while (size < n && bufmem > bufmem_lowater) {
 		sz = buf_trim();
@@ -1335,8 +1435,6 @@ buf_drain(int n)
 		size += sz;
 	}
 
-	simple_unlock(&bqueue_slock);
-	splx(s);
 	return size;
 }
 
@@ -1345,18 +1443,15 @@ buf_drain(int n)
  * When they do, extract and return the I/O's error value.
  */
 int
-biowait(struct buf *bp)
+biowait(buf_t *bp)
 {
-	int s, error;
 
-	s = splbio();
-	simple_lock(&bp->b_interlock);
-	while (!ISSET(bp->b_flags, B_DONE | B_DELWRI))
-		ltsleep(bp, PRIBIO + 1, "biowait", 0, &bp->b_interlock);
-	error = bp->b_error;
-	simple_unlock(&bp->b_interlock);
-	splx(s);
-	return (error);
+	mutex_enter(bp->b_objlock);
+	while (!ISSET(bp->b_oflags, BO_DONE | BO_DELWRI))
+		cv_wait(&bp->b_done, bp->b_objlock);
+	mutex_exit(bp->b_objlock);
+
+	return bp->b_error;
 }
 
 /*
@@ -1376,43 +1471,81 @@ biowait(struct buf *bp)
  * for the vn device, that puts malloc'd buffers on the free lists!)
  */
 void
-biodone(struct buf *bp)
+biodone(buf_t *bp)
 {
-	int s = splbio();
+	int s;
 
-	simple_lock(&bp->b_interlock);
-	if (ISSET(bp->b_flags, B_DONE))
-		panic("biodone already");
-	CLR(bp->b_flags, B_COWDONE);
-	SET(bp->b_flags, B_DONE);		/* note that it's done */
+	KASSERT(!ISSET(bp->b_oflags, BO_DONE));
+
+	if (cpu_intr_p()) {
+		/* From interrupt mode: defer to a soft interrupt. */
+		s = splvm();
+		TAILQ_INSERT_TAIL(&curcpu()->ci_data.cpu_biodone, bp, b_actq);
+		softint_schedule(biodone_sih);
+		splx(s);
+	} else {
+		/* Process now - the buffer may be freed soon. */
+		biodone2(bp);
+	}
+}
+
+static void
+biodone2(buf_t *bp)
+{
+	void (*callout)(buf_t *);
+
+	if (bioopsp != NULL)
+		(*bioopsp->io_complete)(bp);
+
+	mutex_enter(bp->b_objlock);
+	/* Note that the transfer is done. */
+	if (ISSET(bp->b_oflags, BO_DONE))
+		panic("biodone2 already");
+	CLR(bp->b_oflags, BO_COWDONE);
+	SET(bp->b_oflags, BO_DONE);
 	BIO_SETPRIO(bp, BPRIO_DEFAULT);
 
-	if (LIST_FIRST(&bp->b_dep) != NULL && bioopsp)
-		bioopsp->io_complete(bp);
-
-	if (!ISSET(bp->b_flags, B_READ))	/* wake up reader */
+	/* Wake up waiting writers. */
+	if (!ISSET(bp->b_flags, B_READ))
 		vwakeup(bp);
 
-	/*
-	 * If necessary, call out.  Unlock the buffer before calling
-	 * iodone() as the buffer isn't valid any more when it return.
-	 */
-	if (ISSET(bp->b_flags, B_CALL)) {
-		CLR(bp->b_flags, B_CALL);	/* but note callout done */
-		simple_unlock(&bp->b_interlock);
-		(*bp->b_iodone)(bp);
+	if ((callout = bp->b_iodone) != NULL) {
+		/* Note callout done, then call out. */
+		KERNEL_LOCK(1, NULL);		/* XXXSMP */
+		bp->b_iodone = NULL;
+		mutex_exit(bp->b_objlock);
+		(*callout)(bp);
+		KERNEL_UNLOCK_ONE(NULL);	/* XXXSMP */
+	} else if (ISSET(bp->b_flags, B_ASYNC)) {
+		/* If async, release. */
+		mutex_exit(bp->b_objlock);
+		brelse(bp, 0);
 	} else {
-		if (ISSET(bp->b_flags, B_ASYNC)) {	/* if async, release */
-			simple_unlock(&bp->b_interlock);
-			brelse(bp, 0);
-		} else {			/* or just wakeup the buffer */
-			CLR(bp->b_flags, B_WANTED);
-			wakeup(bp);
-			simple_unlock(&bp->b_interlock);
-		}
+		/* Otherwise just wake up waiters in biowait(). */
+		cv_broadcast(&bp->b_done);
+		mutex_exit(bp->b_objlock);
 	}
+}
 
-	splx(s);
+static void
+biointr(void *cookie)
+{
+	struct cpu_info *ci;
+	buf_t *bp;
+	int s;
+
+	ci = curcpu();
+
+	while (!TAILQ_EMPTY(&ci->ci_data.cpu_biodone)) {
+		KASSERT(curcpu() == ci);
+
+		s = splvm();
+		bp = TAILQ_FIRST(&ci->ci_data.cpu_biodone);
+		TAILQ_REMOVE(&ci->ci_data.cpu_biodone, bp, b_actq);
+		splx(s);
+
+		biodone2(bp);
+	}
 }
 
 /*
@@ -1421,13 +1554,13 @@ biodone(struct buf *bp)
 int
 count_lock_queue(void)
 {
-	struct buf *bp;
+	buf_t *bp;
 	int n = 0;
 
-	simple_lock(&bqueue_slock);
+	mutex_enter(&bufcache_lock);
 	TAILQ_FOREACH(bp, &bufqueues[BQ_LOCKED].bq_queue, b_freelist)
 		n++;
-	simple_unlock(&bqueue_slock);
+	mutex_exit(&bufcache_lock);
 	return (n);
 }
 
@@ -1438,18 +1571,17 @@ count_lock_queue(void)
 int
 buf_syncwait(void)
 {
-	struct buf *bp;
-	int iter, nbusy, nbusy_prev = 0, dcount, s, ihash;
+	buf_t *bp;
+	int iter, nbusy, nbusy_prev = 0, dcount, ihash;
 
 	dcount = 10000;
 	for (iter = 0; iter < 20;) {
-		s = splbio();
-		simple_lock(&bqueue_slock);
+		mutex_enter(&bufcache_lock);
 		nbusy = 0;
 		for (ihash = 0; ihash < bufhash+1; ihash++) {
 		    LIST_FOREACH(bp, &bufhashtbl[ihash], b_hash) {
-			if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
-				nbusy++;
+			if ((bp->b_cflags & (BC_BUSY|BC_INVAL)) == BC_BUSY)
+				nbusy += ((bp->b_flags & B_READ) == 0);
 			/*
 			 * With soft updates, some buffers that are
 			 * written will be remarked as dirty until other
@@ -1457,26 +1589,21 @@ buf_syncwait(void)
 			 */
 			if (bp->b_vp && bp->b_vp->v_mount
 			    && (bp->b_vp->v_mount->mnt_flag & MNT_SOFTDEP)
-			    && (bp->b_flags & B_DELWRI)) {
-				simple_lock(&bp->b_interlock);
+			    && (bp->b_oflags & BO_DELWRI)) {
 				bremfree(bp);
-				bp->b_flags |= B_BUSY;
+				bp->b_cflags |= BC_BUSY;
 				nbusy++;
-				simple_unlock(&bp->b_interlock);
-				simple_unlock(&bqueue_slock);
+				mutex_exit(&bufcache_lock);
 				bawrite(bp);
 				if (dcount-- <= 0) {
 					printf("softdep ");
-					splx(s);
 					goto fail;
 				}
-				simple_lock(&bqueue_slock);
+				mutex_enter(&bufcache_lock);
 			}
 		    }
 		}
-
-		simple_unlock(&bqueue_slock);
-		splx(s);
+		mutex_exit(&bufcache_lock);
 
 		if (nbusy == 0)
 			break;
@@ -1495,14 +1622,13 @@ buf_syncwait(void)
 fail:;
 #if defined(DEBUG) || defined(DEBUG_HALT_BUSY)
 		printf("giving up\nPrinting vnodes for busy buffers\n");
-		s = splbio();
 		for (ihash = 0; ihash < bufhash+1; ihash++) {
 		    LIST_FOREACH(bp, &bufhashtbl[ihash], b_hash) {
-			if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
+			if ((bp->b_cflags & (BC_BUSY|BC_INVAL)) == BC_BUSY &&
+			    (bp->b_flags & B_READ) == 0)
 				vprint(NULL, bp->b_vp);
 		    }
 		}
-		splx(s);
 #endif
 	}
 
@@ -1510,10 +1636,10 @@ fail:;
 }
 
 static void
-sysctl_fillbuf(struct buf *i, struct buf_sysctl *o)
+sysctl_fillbuf(buf_t *i, struct buf_sysctl *o)
 {
 
-	o->b_flags = i->b_flags;
+	o->b_flags = i->b_flags | i->b_cflags | i->b_oflags;
 	o->b_error = i->b_error;
 	o->b_prio = i->b_prio;
 	o->b_dev = i->b_dev;
@@ -1534,12 +1660,13 @@ sysctl_fillbuf(struct buf *i, struct buf_sysctl *o)
 static int
 sysctl_dobuf(SYSCTLFN_ARGS)
 {
-	struct buf *bp;
+	buf_t *bp;
 	struct buf_sysctl bs;
+	struct bqueue *bq;
 	char *dp;
 	u_int i, op, arg;
 	size_t len, needed, elem_size, out_size;
-	int error, s, elem_count;
+	int error, elem_count, retries;
 
 	if (namelen == 1 && name[0] == CTL_QUERY)
 		return (sysctl_query(SYSCTLFN_CALL(rnode)));
@@ -1547,6 +1674,8 @@ sysctl_dobuf(SYSCTLFN_ARGS)
 	if (namelen != 4)
 		return (EINVAL);
 
+	retries = 100;
+ retry:
 	dp = oldp;
 	len = (oldp != NULL) ? *oldlenp : 0;
 	op = name[0];
@@ -1569,15 +1698,32 @@ sysctl_dobuf(SYSCTLFN_ARGS)
 
 	error = 0;
 	needed = 0;
-	s = splbio();
-	simple_lock(&bqueue_slock);
+	mutex_enter(&bufcache_lock);
 	for (i = 0; i < BQUEUES; i++) {
-		TAILQ_FOREACH(bp, &bufqueues[i].bq_queue, b_freelist) {
+		bq = &bufqueues[i];
+		TAILQ_FOREACH(bp, &bq->bq_queue, b_freelist) {
+			bq->bq_marker = bp;
 			if (len >= elem_size && elem_count > 0) {
 				sysctl_fillbuf(bp, &bs);
+				mutex_exit(&bufcache_lock);
 				error = copyout(&bs, dp, out_size);
+				mutex_enter(&bufcache_lock);
 				if (error)
-					goto cleanup;
+					break;
+				if (bq->bq_marker != bp) {
+					/*
+					 * This sysctl node is only for
+					 * statistics.  Retry; if the
+					 * queue keeps changing, then
+					 * bail out.
+					 */
+					if (retries-- == 0) {
+						error = EAGAIN;
+						break;
+					}
+					mutex_exit(&bufcache_lock);
+					goto retry;
+				}
 				dp += elem_size;
 				len -= elem_size;
 			}
@@ -1587,34 +1733,22 @@ sysctl_dobuf(SYSCTLFN_ARGS)
 					elem_count--;
 			}
 		}
+		if (error != 0)
+			break;
 	}
-cleanup:
-	simple_unlock(&bqueue_slock);
-	splx(s);
+	mutex_exit(&bufcache_lock);
 
 	*oldlenp = needed;
 	if (oldp == NULL)
-		*oldlenp += KERN_BUFSLOP * sizeof(struct buf);
+		*oldlenp += KERN_BUFSLOP * sizeof(buf_t);
 
 	return (error);
 }
 
-static void
-sysctl_bufvm_common(void)
-{
-	int64_t t;
-
-	/* Drain until below new high water mark */
-	while ((t = (int64_t)bufmem - (int64_t)bufmem_hiwater) >= 0) {
-		if (buf_drain(t / (2 * 1024)) <= 0)
-			break;
-	}
-}
-
 static int
-sysctl_bufcache_update(SYSCTLFN_ARGS)
+sysctl_bufvm_update(SYSCTLFN_ARGS)
 {
-	int t, error;
+	int t, error, rv;
 	struct sysctlnode node;
 
 	node = *rnode;
@@ -1624,32 +1758,14 @@ sysctl_bufcache_update(SYSCTLFN_ARGS)
 	if (error || newp == NULL)
 		return (error);
 
-	if (t < 0 || t > 100)
-		return EINVAL;
-	bufcache = t;
-	buf_setwm();
-
-	sysctl_bufvm_common();
-	return 0;
-}
-
-static int
-sysctl_bufvm_update(SYSCTLFN_ARGS)
-{
-	int64_t t;
-	int error;
-	struct sysctlnode node;
-
-	node = *rnode;
-	node.sysctl_data = &t;
-	t = *(int64_t *)rnode->sysctl_data;
-	error = sysctl_lookup(SYSCTLFN_CALL(&node));
-	if (error || newp == NULL)
-		return (error);
-
 	if (t < 0)
 		return EINVAL;
-	if (rnode->sysctl_data == &bufmem_lowater) {
+	if (rnode->sysctl_data == &bufcache) {
+		if (t > 100)
+			return (EINVAL);
+		bufcache = t;
+		buf_setwm();
+	} else if (rnode->sysctl_data == &bufmem_lowater) {
 		if (bufmem_hiwater - t < 16)
 			return (EINVAL);
 		bufmem_lowater = t;
@@ -1660,7 +1776,14 @@ sysctl_bufvm_update(SYSCTLFN_ARGS)
 	} else
 		return (EINVAL);
 
-	sysctl_bufvm_common();
+	/* Drain until below new high water mark */
+	mutex_enter(&bufcache_lock);
+	while ((t = bufmem - bufmem_hiwater) >= 0) {
+		rv = buf_drain(t / (2 * 1024));
+		if (rv <= 0)
+			break;
+	}
+	mutex_exit(&bufcache_lock);
 
 	return 0;
 }
@@ -1695,25 +1818,25 @@ SYSCTL_SETUP(sysctl_vm_buf_setup, "sysctl vm.buf* subtree setup")
 		       CTLTYPE_INT, "bufcache",
 		       SYSCTL_DESCR("Percentage of physical memory to use for "
 				    "buffer cache"),
-		       sysctl_bufcache_update, 0, &bufcache, 0,
+		       sysctl_bufvm_update, 0, &bufcache, 0,
 		       CTL_VM, CTL_CREATE, CTL_EOL);
 	sysctl_createv(clog, 0, NULL, NULL,
 		       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
-		       CTLTYPE_QUAD, "bufmem",
+		       CTLTYPE_INT, "bufmem",
 		       SYSCTL_DESCR("Amount of kernel memory used by buffer "
 				    "cache"),
 		       NULL, 0, &bufmem, 0,
 		       CTL_VM, CTL_CREATE, CTL_EOL);
 	sysctl_createv(clog, 0, NULL, NULL,
 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
-		       CTLTYPE_QUAD, "bufmem_lowater",
+		       CTLTYPE_INT, "bufmem_lowater",
 		       SYSCTL_DESCR("Minimum amount of kernel memory to "
 				    "reserve for buffer cache"),
 		       sysctl_bufvm_update, 0, &bufmem_lowater, 0,
 		       CTL_VM, CTL_CREATE, CTL_EOL);
 	sysctl_createv(clog, 0, NULL, NULL,
 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
-		       CTLTYPE_QUAD, "bufmem_hiwater",
+		       CTLTYPE_INT, "bufmem_hiwater",
 		       SYSCTL_DESCR("Maximum amount of kernel memory to use "
 				    "for buffer cache"),
 		       sysctl_bufvm_update, 0, &bufmem_hiwater, 0,
@@ -1729,8 +1852,8 @@ SYSCTL_SETUP(sysctl_vm_buf_setup, "sysctl vm.buf* subtree setup")
 void
 vfs_bufstats(void)
 {
-	int s, i, j, count;
-	struct buf *bp;
+	int i, j, count;
+	buf_t *bp;
 	struct bqueue *dp;
 	int counts[(MAXBSIZE / PAGE_SIZE) + 1];
 	static const char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE" };
@@ -1739,12 +1862,10 @@ vfs_bufstats(void)
 		count = 0;
 		for (j = 0; j <= MAXBSIZE/PAGE_SIZE; j++)
 			counts[j] = 0;
-		s = splbio();
 		TAILQ_FOREACH(bp, &dp->bq_queue, b_freelist) {
 			counts[bp->b_bufsize/PAGE_SIZE]++;
 			count++;
 		}
-		splx(s);
 		printf("%s: total-%d", bname[i], count);
 		for (j = 0; j <= MAXBSIZE/PAGE_SIZE; j++)
 			if (counts[j] != 0)
@@ -1756,43 +1877,31 @@ vfs_bufstats(void)
 
 /* ------------------------------ */
 
-static struct buf *
-getiobuf1(int prflags)
+buf_t *
+getiobuf(struct vnode *vp, bool waitok)
 {
-	struct buf *bp;
-	int s;
+	buf_t *bp;
 
-	s = splbio();
-	bp = pool_get(&bufiopool, prflags);
-	splx(s);
-	if (bp != NULL) {
-		BUF_INIT(bp);
-	}
+	bp = pool_cache_get(bufio_cache, (waitok ? PR_WAITOK : PR_NOWAIT));
+	if (bp == NULL)
+		return bp;
+
+	buf_init(bp);
+
+	if ((bp->b_vp = vp) == NULL)
+		bp->b_objlock = &buffer_lock;
+	else
+		bp->b_objlock = &vp->v_interlock;
+	
 	return bp;
 }
 
-struct buf *
-getiobuf(void)
-{
-
-	return getiobuf1(PR_WAITOK);
-}
-
-struct buf *
-getiobuf_nowait(void)
-{
-
-	return getiobuf1(PR_NOWAIT);
-}
-
 void
-putiobuf(struct buf *bp)
+putiobuf(buf_t *bp)
 {
-	int s;
 
-	s = splbio();
-	pool_put(&bufiopool, bp);
-	splx(s);
+	buf_destroy(bp);
+	pool_cache_put(bufio_cache, bp);
 }
 
 /*
@@ -1800,9 +1909,9 @@ putiobuf(struct buf *bp)
  */
 
 void
-nestiobuf_iodone(struct buf *bp)
+nestiobuf_iodone(buf_t *bp)
 {
-	struct buf *mbp = bp->b_private;
+	buf_t *mbp = bp->b_private;
 	int error;
 	int donebytes;
 
@@ -1810,9 +1919,8 @@ nestiobuf_iodone(struct buf *bp)
 	KASSERT(mbp != bp);
 
 	error = 0;
-	if (bp->b_error != 0) {
-		error = bp->b_error;
-	} else if ((bp->b_bcount < bp->b_bufsize) || (bp->b_resid > 0)) {
+	if (bp->b_error == 0 &&
+	    (bp->b_bcount < bp->b_bufsize || bp->b_resid > 0)) {
 		/*
 		 * Not all got transfered, raise an error. We have no way to
 		 * propagate these conditions to mbp.
@@ -1836,14 +1944,16 @@ nestiobuf_iodone(struct buf *bp)
  */
 
 void
-nestiobuf_setup(struct buf *mbp, struct buf *bp, int offset, size_t size)
+nestiobuf_setup(buf_t *mbp, buf_t *bp, int offset, size_t size)
 {
 	const int b_read = mbp->b_flags & B_READ;
 	struct vnode *vp = mbp->b_vp;
 
 	KASSERT(mbp->b_bcount >= offset + size);
 	bp->b_vp = vp;
-	bp->b_flags = B_BUSY | B_CALL | B_ASYNC | b_read;
+	bp->b_objlock = mbp->b_objlock;
+	bp->b_cflags = BC_BUSY;
+	bp->b_flags = B_ASYNC | b_read;
 	bp->b_iodone = nestiobuf_iodone;
 	bp->b_data = (char *)mbp->b_data + offset;
 	bp->b_resid = bp->b_bcount = size;
@@ -1851,11 +1961,9 @@ nestiobuf_setup(struct buf *mbp, struct buf *bp, int offset, size_t size)
 	bp->b_private = mbp;
 	BIO_COPYPRIO(bp, mbp);
 	if (!b_read && vp != NULL) {
-		int s;
-
-		s = splbio();
-		V_INCR_NUMOUTPUT(vp);
-		splx(s);
+		mutex_enter(&vp->v_interlock);
+		vp->v_numoutput++;
+		mutex_exit(&vp->v_interlock);
 	}
 }
 
@@ -1867,24 +1975,73 @@ nestiobuf_setup(struct buf *mbp, struct buf *bp, int offset, size_t size)
  */
 
 void
-nestiobuf_done(struct buf *mbp, int donebytes, int error)
+nestiobuf_done(buf_t *mbp, int donebytes, int error)
 {
-	int s;
 
 	if (donebytes == 0) {
 		return;
 	}
-	s = splbio();
+	mutex_enter(mbp->b_objlock);
 	KASSERT(mbp->b_resid >= donebytes);
-	if (error) {
-		mbp->b_error = error;
-	}
 	mbp->b_resid -= donebytes;
+	mbp->b_error = error;
 	if (mbp->b_resid == 0) {
-		if (mbp->b_error != 0) {
-			mbp->b_resid = mbp->b_bcount; /* be conservative */
-		}
+		mutex_exit(mbp->b_objlock);
 		biodone(mbp);
-	}
-	splx(s);
+	} else
+		mutex_exit(mbp->b_objlock);
+}
+
+void
+buf_init(buf_t *bp)
+{
+
+	LIST_INIT(&bp->b_dep);
+	cv_init(&bp->b_busy, "biolock");
+	cv_init(&bp->b_done, "biowait");
+	bp->b_dev = NODEV;
+	bp->b_error = 0;
+	bp->b_flags = 0;
+	bp->b_cflags = 0;
+	bp->b_oflags = 0;
+	bp->b_objlock = &buffer_lock;
+	bp->b_iodone = NULL;
+	BIO_SETPRIO(bp, BPRIO_DEFAULT);
+}
+
+void
+buf_destroy(buf_t *bp)
+{
+
+	cv_destroy(&bp->b_done);
+	cv_destroy(&bp->b_busy);
+}
+
+int
+bbusy(buf_t *bp, bool intr, int timo)
+{
+	int error;
+
+	KASSERT(mutex_owned(&bufcache_lock));
+
+	if ((bp->b_cflags & BC_BUSY) != 0) {
+		if (curlwp == uvm.pagedaemon_lwp)
+			return EDEADLK;
+		bp->b_cflags |= BC_WANTED;
+		bref(bp);
+		if (intr) {
+			error = cv_timedwait_sig(&bp->b_busy, &bufcache_lock,
+			    timo);
+		} else {
+			error = cv_timedwait(&bp->b_busy, &bufcache_lock,
+			    timo);
+		}
+		brele(bp);
+		if (error != 0)
+			return error;
+		return EPASSTHROUGH;
+	}
+	bp->b_cflags |= BC_BUSY;
+
+	return 0;
 }
diff --git a/sys/kern/vfs_lockf.c b/sys/kern/vfs_lockf.c
index ddc32d343ebd..140222688931 100644
--- a/sys/kern/vfs_lockf.c
+++ b/sys/kern/vfs_lockf.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_lockf.c,v 1.60 2007/07/09 21:10:57 ad Exp $	*/
+/*	$NetBSD: vfs_lockf.c,v 1.61 2008/01/02 11:48:56 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_lockf.c,v 1.60 2007/07/09 21:10:57 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_lockf.c,v 1.61 2008/01/02 11:48:56 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -74,6 +74,7 @@ struct lockf {
 	struct  locklist lf_blkhd; /* List of requests blocked on this lock */
 	TAILQ_ENTRY(lockf) lf_block;/* A request waiting for a lock */
 	uid_t	lf_uid;		 /* User ID responsible */
+	kcondvar_t lf_cv;	 /* Signalling */
 };
 
 /* Maximum length of sleep chains to traverse to try and detect deadlock. */
@@ -202,6 +203,7 @@ lf_alloc(uid_t uid, int allowfail)
 	mutex_exit(&uip->ui_lock);
 	lock = pool_get(&lockfpool, PR_WAITOK);
 	lock->lf_uid = uid;
+	cv_init(&lock->lf_cv, "lockf");
 	return lock;
 }
 
@@ -214,6 +216,7 @@ lf_free(struct lockf *lock)
 	mutex_enter(&uip->ui_lock);
 	uip->ui_lockcnt--;
 	mutex_exit(&uip->ui_lock);
+	cv_destroy(&lock->lf_cv);
 	pool_put(&lockfpool, lock);
 }
 
@@ -389,7 +392,7 @@ lf_wakelock(struct lockf *listhead)
 		if (lockf_debug & 2)
 			lf_print("lf_wakelock: awakening", wakelock);
 #endif
-		wakeup(wakelock);
+		cv_broadcast(&wakelock->lf_cv);
 	}
 }
 
@@ -417,7 +420,7 @@ lf_clearlock(struct lockf *unlock, struct lockf **sparelock)
 #endif /* LOCKF_DEBUG */
 	prev = head;
 	while ((ovcase = lf_findoverlap(lf, unlock, SELF,
-					&prev, &overlap)) != 0) {
+	    &prev, &overlap)) != 0) {
 		/*
 		 * Wakeup the list of locks to be retried.
 		 */
@@ -494,13 +497,13 @@ lf_getblock(struct lockf *lock)
  */
 static int
 lf_setlock(struct lockf *lock, struct lockf **sparelock,
-    struct simplelock *interlock)
+    kmutex_t *interlock)
 {
 	struct lockf *block;
 	struct lockf **head = lock->lf_head;
 	struct lockf **prev, *overlap, *ltmp;
 	static char lockstr[] = "lockf";
-	int ovcase, priority, needtolink, error;
+	int ovcase, needtolink, error;
 
 #ifdef LOCKF_DEBUG
 	if (lockf_debug & 1)
@@ -508,12 +511,12 @@ lf_setlock(struct lockf *lock, struct lockf **sparelock,
 #endif /* LOCKF_DEBUG */
 
 	/*
-	 * Set the priority
+	 * XXX Here we used to set the sleep priority so that writers
+	 * took priority.  That's of dubious use, and is not possible
+	 * with condition variables.  Need to find a better way to ensure
+	 * fairness.
 	 */
-	priority = PLOCK;
-	if (lock->lf_type == F_WRLCK)
-		priority += 4;
-	priority |= PCATCH;
+        
 	/*
 	 * Scan lock list for this file looking for locks that would block us.
 	 */
@@ -610,7 +613,7 @@ lf_setlock(struct lockf *lock, struct lockf **sparelock,
 			lf_printlist("lf_setlock", block);
 		}
 #endif /* LOCKF_DEBUG */
-		error = ltsleep(lock, priority, lockstr, 0, interlock);
+		error = cv_wait_sig(&lock->lf_cv, interlock);
 
 		/*
 		 * We may have been awakened by a signal (in
@@ -800,7 +803,7 @@ lf_advlock(struct vop_advlock_args *ap, struct lockf **head, off_t size)
 	struct flock *fl = ap->a_fl;
 	struct lockf *lock = NULL;
 	struct lockf *sparelock;
-	struct simplelock *interlock = &ap->a_vp->v_interlock;
+	kmutex_t *interlock = &ap->a_vp->v_interlock;
 	off_t start, end;
 	int error = 0;
 
@@ -828,7 +831,7 @@ lf_advlock(struct vop_advlock_args *ap, struct lockf **head, off_t size)
 		return EINVAL;
 
 	/*
-	 * Allocate locks before acquiring the simple lock.  We need two
+	 * Allocate locks before acquiring the interlock.  We need two
 	 * locks in the worst case.
 	 */
 	switch (ap->a_op) {
@@ -865,7 +868,7 @@ lf_advlock(struct vop_advlock_args *ap, struct lockf **head, off_t size)
 		goto quit;
 	}
 
-	simple_lock(interlock);
+	mutex_enter(interlock);
 
 	/*
 	 * Avoid the common case of unlocking when inode has no locks.
@@ -927,7 +930,7 @@ lf_advlock(struct vop_advlock_args *ap, struct lockf **head, off_t size)
 	}
 
 quit_unlock:
-	simple_unlock(interlock);
+	mutex_exit(interlock);
 quit:
 	if (lock)
 		lf_free(lock);
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 4bc650dd7970..e4d57567820e 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_subr.c,v 1.308 2007/12/01 10:36:47 yamt Exp $	*/
+/*	$NetBSD: vfs_subr.c,v 1.309 2008/01/02 11:48:56 ad Exp $	*/
 
 /*-
  * Copyright (c) 1997, 1998, 2004, 2005, 2007 The NetBSD Foundation, Inc.
@@ -82,7 +82,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.308 2007/12/01 10:36:47 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.309 2008/01/02 11:48:56 ad Exp $");
 
 #include "opt_inet.h"
 #include "opt_ddb.h"
@@ -107,6 +107,7 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.308 2007/12/01 10:36:47 yamt Exp $");
 #include <sys/filedesc.h>
 #include <sys/kauth.h>
 #include <sys/atomic.h>
+#include <sys/kthread.h>
 
 #include <miscfs/specfs/specdev.h>
 #include <miscfs/syncfs/syncfs.h>
@@ -120,14 +121,16 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.308 2007/12/01 10:36:47 yamt Exp $");
 extern int dovfsusermount;	/* 1 => permit any user to mount filesystems */
 extern int vfs_magiclinks;	/* 1 => expand "magic" symlinks */
 
-/* TAILQ_HEAD(freelst, vnode) vnode_free_list =	vnode free list (in vnode.h) */
-struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
-struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
+static vnodelst_t vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
+static vnodelst_t vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
+static vnodelst_t vrele_list = TAILQ_HEAD_INITIALIZER(vrele_list);
 
-struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER;
+static int vrele_pending;
+static kmutex_t	vrele_lock;
+static kcondvar_t vrele_cv;
+static lwp_t *vrele_lwp;
 
-POOL_INIT(vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl",
-    &pool_allocator_nointr, IPL_NONE);
+static pool_cache_t vnode_cache;
 
 MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes");
 
@@ -135,27 +138,53 @@ MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes");
  * Local declarations.
  */
 
-static void insmntque(struct vnode *, struct mount *);
-static int getdevvp(dev_t, struct vnode **, enum vtype);
-static void vclean(struct vnode *, int, struct lwp *);
-static struct vnode *getcleanvnode(struct lwp *);
+static void vrele_thread(void *);
+static void insmntque(vnode_t *, struct mount *);
+static int getdevvp(dev_t, vnode_t **, enum vtype);
+static vnode_t *getcleanvnode(void);;
+void vpanic(vnode_t *, const char *);
+
+#ifdef DIAGNOSTIC
+void
+vpanic(vnode_t *vp, const char *msg)
+{
+
+	vprint(NULL, vp);
+	panic("%s\n", msg);
+}
+#else
+#define	vpanic(vp, msg)	/* nothing */
+#endif
+
+void
+vn_init1(void)
+{
+
+	vnode_cache = pool_cache_init(sizeof(struct vnode), 0, 0, 0, "vnodepl",
+	    NULL, IPL_NONE, NULL, NULL, NULL);
+	KASSERT(vnode_cache != NULL);
+
+	/* Create deferred release thread. */
+	mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE);
+	cv_init(&vrele_cv, "vrele");
+	if (kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread,
+	    NULL, &vrele_lwp, "vrele"))
+		panic("fork vrele");
+}
 
 int
 vfs_drainvnodes(long target, struct lwp *l)
 {
 
-	simple_lock(&vnode_free_list_slock);
 	while (numvnodes > target) {
-		struct vnode *vp;
+		vnode_t *vp;
 
-		vp = getcleanvnode(l);
+		mutex_enter(&vnode_free_list_lock);
+		vp = getcleanvnode();
 		if (vp == NULL)
 			return EBUSY; /* give up */
-		pool_put(&vnode_pool, vp);
-		simple_lock(&vnode_free_list_slock);
-		numvnodes--;
+		ungetnewvnode(vp);
 	}
-	simple_unlock(&vnode_free_list_slock);
 
 	return 0;
 }
@@ -163,58 +192,95 @@ vfs_drainvnodes(long target, struct lwp *l)
 /*
  * grab a vnode from freelist and clean it.
  */
-struct vnode *
-getcleanvnode(struct lwp *l)
+vnode_t *
+getcleanvnode(void)
 {
-	struct vnode *vp;
-	struct freelst *listhd;
+	vnode_t *vp;
+	vnodelst_t *listhd;
 
-	LOCK_ASSERT(simple_lock_held(&vnode_free_list_slock));
+	KASSERT(mutex_owned(&vnode_free_list_lock));
 
+retry:
 	listhd = &vnode_free_list;
 try_nextlist:
 	TAILQ_FOREACH(vp, listhd, v_freelist) {
-		if (!simple_lock_try(&vp->v_interlock))
+		/*
+		 * It's safe to test v_usecount and v_iflag
+		 * without holding the interlock here, since
+		 * these vnodes should never appear on the
+		 * lists.
+		 */
+		if (vp->v_usecount != 0) {
+			vpanic(vp, "free vnode isn't");
+		}
+		if ((vp->v_iflag & VI_CLEAN) != 0) {
+			vpanic(vp, "clean vnode on freelist");
+		}
+		if (vp->v_freelisthd != listhd) {
+			printf("vnode sez %p, listhd %p\n", vp->v_freelisthd, listhd);
+			vpanic(vp, "list head mismatch");
+		}
+		if (!mutex_tryenter(&vp->v_interlock))
 			continue;
 		/*
-		 * as our lwp might hold the underlying vnode locked,
-		 * don't try to reclaim the VLAYER vnode if it's locked.
+		 * Our lwp might hold the underlying vnode
+		 * locked, so don't try to reclaim a VI_LAYER
+		 * node if it's locked.
 		 */
 		if ((vp->v_iflag & VI_XLOCK) == 0 &&
 		    ((vp->v_iflag & VI_LAYER) == 0 || VOP_ISLOCKED(vp) == 0)) {
 			break;
 		}
-		simple_unlock(&vp->v_interlock);
+		mutex_exit(&vp->v_interlock);
 	}
 
-	if (vp == NULLVP) {
+	if (vp == NULL) {
 		if (listhd == &vnode_free_list) {
 			listhd = &vnode_hold_list;
 			goto try_nextlist;
 		}
-		simple_unlock(&vnode_free_list_slock);
-		return NULLVP;
+		mutex_exit(&vnode_free_list_lock);
+		return NULL;
 	}
 
-	if (vp->v_usecount)
-		panic("free vnode isn't, vp %p", vp);
+	/* Remove it from the freelist. */
 	TAILQ_REMOVE(listhd, vp, v_freelist);
-	/* see comment on why 0xdeadb is set at end of vgone (below) */
-	vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
-	simple_unlock(&vnode_free_list_slock);
+	vp->v_freelisthd = NULL;
+	mutex_exit(&vnode_free_list_lock);
 
-	if (vp->v_type != VBAD)
-		vgonel(vp, l);
-	else
-		simple_unlock(&vp->v_interlock);
-#ifdef DIAGNOSTIC
-	if (vp->v_data || vp->v_uobj.uo_npages ||
-	    TAILQ_FIRST(&vp->v_uobj.memq))
-		panic("cleaned vnode isn't, vp %p", vp);
-	if (vp->v_numoutput)
-		panic("clean vnode has pending I/O's, vp %p", vp);
-#endif
-	KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
+	/*
+	 * The vnode is still associated with a file system, so we must
+	 * clean it out before reusing it.  We need to add a reference
+	 * before doing this.  If the vnode gains another reference while
+	 * being cleaned out then we lose - retry.
+	 */
+	vp->v_usecount++;
+	vclean(vp, DOCLOSE);
+	if (vp->v_usecount == 1) {
+		/* We're about to dirty it. */
+		vp->v_iflag &= ~VI_CLEAN;
+		mutex_exit(&vp->v_interlock);
+	} else {
+		/*
+		 * Don't return to freelist - the holder of the last
+		 * reference will destroy it.
+		 */
+		vp->v_usecount--;
+		mutex_exit(&vp->v_interlock);
+		mutex_enter(&vnode_free_list_lock);
+		goto retry;
+	}
+
+	if (vp->v_data != NULL || vp->v_uobj.uo_npages != 0 ||
+	    !TAILQ_EMPTY(&vp->v_uobj.memq)) {
+		vpanic(vp, "cleaned vnode isn't");
+	}
+	if (vp->v_numoutput != 0) {
+		vpanic(vp, "clean vnode has pending I/O's");
+	}
+	if ((vp->v_iflag & VI_ONWORKLST) != 0) {
+		vpanic(vp, "clean vnode on syncer list");
+	}
 
 	return vp;
 }
@@ -244,11 +310,11 @@ vfs_busy(struct mount *mp, int flags, kmutex_t *interlkp)
 		 * wakeup needs to be done is at the release of the
 		 * exclusive lock at the end of dounmount.
 		 */
-		simple_lock(&mp->mnt_slock);
+		mutex_enter(&mp->mnt_mutex);
 		mp->mnt_wcnt++;
-		ltsleep((void *)mp, PVFS, "vfs_busy", 0, &mp->mnt_slock);
+		mtsleep((void *)mp, PVFS, "vfs_busy", 0, &mp->mnt_mutex);
 		n = --mp->mnt_wcnt;
-		simple_unlock(&mp->mnt_slock);
+		mutex_exit(&mp->mnt_mutex);
 		gone = mp->mnt_iflag & IMNT_GONE;
 
 		if (n == 0)
@@ -259,11 +325,9 @@ vfs_busy(struct mount *mp, int flags, kmutex_t *interlkp)
 			return (ENOENT);
 	}
 	lkflags = LK_SHARED;
-	if (interlkp) {
-		/* lkflags |= LK_INTERLOCK; XXX */
-		mutex_exit(interlkp);	/* XXX */
-	}
-	if (lockmgr(&mp->mnt_lock, lkflags, NULL))
+	if (interlkp)
+		lkflags |= LK_INTERLOCK;
+	if (lockmgr(&mp->mnt_lock, lkflags, interlkp))
 		panic("vfs_busy: unexpected lock failure");
 	return (0);
 }
@@ -291,27 +355,29 @@ vfs_rootmountalloc(const char *fstypename, const char *devname,
 	struct vfsops *vfsp = NULL;
 	struct mount *mp;
 
+	mutex_enter(&vfs_list_lock);
 	LIST_FOREACH(vfsp, &vfs_list, vfs_list)
 		if (!strncmp(vfsp->vfs_name, fstypename, 
 		    sizeof(mp->mnt_stat.f_fstypename)))
 			break;
-
 	if (vfsp == NULL)
 		return (ENODEV);
+	vfsp->vfs_refcount++;
+	mutex_exit(&vfs_list_lock);
+
 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
 	memset((char *)mp, 0, (u_long)sizeof(struct mount));
 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
-	simple_lock_init(&mp->mnt_slock);
+	mutex_init(&mp->mnt_mutex, MUTEX_DEFAULT, IPL_NONE);
 	(void)vfs_busy(mp, LK_NOWAIT, 0);
 	TAILQ_INIT(&mp->mnt_vnodelist);
 	mp->mnt_op = vfsp;
 	mp->mnt_flag = MNT_RDONLY;
-	mp->mnt_vnodecovered = NULLVP;
-	vfsp->vfs_refcount++;
+	mp->mnt_vnodecovered = NULL;
 	(void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name,
 	    sizeof(mp->mnt_stat.f_fstypename));
 	mp->mnt_stat.f_mntonname[0] = '/';
-	mp->mnt_stat.f_mntonname[1] = '\0';
+	mp->mnt_stat.f_mntonname[0] = '\0';
 	mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] =
 	    '\0';
 	(void)copystr(devname, mp->mnt_stat.f_mntfromname,
@@ -321,7 +387,6 @@ vfs_rootmountalloc(const char *fstypename, const char *devname,
 	return (0);
 }
 
-
 /*
  * Routines having to do with the management of the vnode table.
  */
@@ -332,12 +397,11 @@ extern int (**dead_vnodeop_p)(void *);
  */
 int
 getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
-    struct vnode **vpp)
+	    vnode_t **vpp)
 {
 	struct uvm_object *uobj;
-	struct lwp *l = curlwp;		/* XXX */
 	static int toggle;
-	struct vnode *vp;
+	vnode_t *vp;
 	int error = 0, tryalloc;
 
  try_again:
@@ -374,7 +438,7 @@ getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
 
 	vp = NULL;
 
-	simple_lock(&vnode_free_list_slock);
+	mutex_enter(&vnode_free_list_lock);
 
 	toggle ^= 1;
 	if (numvnodes > 2 * desiredvnodes)
@@ -384,25 +448,19 @@ getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
 	    (TAILQ_FIRST(&vnode_free_list) == NULL &&
 	     (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
 
-	if (tryalloc &&
-	    (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) {
+	if (tryalloc) {
 		numvnodes++;
-		simple_unlock(&vnode_free_list_slock);
-		memset(vp, 0, sizeof(*vp));
-		UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 1);
-		/*
-		 * done by memset() above.
-		 *	LIST_INIT(&vp->v_nclist);
-		 *	LIST_INIT(&vp->v_dnclist);
-		 */
-	} else {
-		vp = getcleanvnode(l);
-		/*
-		 * Unless this is a bad time of the month, at most
-		 * the first NCPUS items on the free list are
-		 * locked, so this is close enough to being empty.
-		 */
-		if (vp == NULLVP) {
+		mutex_exit(&vnode_free_list_lock);
+		if ((vp = valloc(NULL)) == NULL) {
+			mutex_enter(&vnode_free_list_lock);
+			numvnodes--;
+		} else
+			vp->v_usecount = 1;
+	}
+
+	if (vp == NULL) {
+		vp = getcleanvnode();
+		if (vp == NULL) {
 			if (mp && error != EDEADLK)
 				vfs_unbusy(mp);
 			if (tryalloc) {
@@ -415,23 +473,24 @@ getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
 			*vpp = 0;
 			return (ENFILE);
 		}
-		vp->v_usecount = 1;
 		vp->v_iflag = 0;
 		vp->v_vflag = 0;
 		vp->v_uflag = 0;
 		vp->v_socket = NULL;
 	}
-	vp->v_type = VNON;
-	vp->v_vnlock = &vp->v_lock;
-	lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
+
+	KASSERT(vp->v_usecount == 1);
+	KASSERT(vp->v_freelisthd == NULL);
 	KASSERT(LIST_EMPTY(&vp->v_nclist));
 	KASSERT(LIST_EMPTY(&vp->v_dnclist));
+
+	vp->v_type = VNON;
+	vp->v_vnlock = &vp->v_lock;
 	vp->v_tag = tag;
 	vp->v_op = vops;
 	insmntque(vp, mp);
 	*vpp = vp;
 	vp->v_data = 0;
-	simple_lock_init(&vp->v_interlock);
 
 	/*
 	 * initialize uvm_object within vnode.
@@ -443,8 +502,13 @@ getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
 	KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
 	vp->v_size = vp->v_writesize = VSIZENOTSET;
 
-	if (mp && error != EDEADLK)
-		vfs_unbusy(mp);
+	if (mp != NULL) {
+		if ((mp->mnt_iflag & IMNT_MPSAFE) != 0)
+			vp->v_vflag |= VV_MPSAFE;
+		if (error != EDEADLK)
+			vfs_unbusy(mp);
+	}
+
 	return (0);
 }
 
@@ -454,34 +518,103 @@ getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
  * of a locking race.
  */
 void
-ungetnewvnode(struct vnode *vp)
+ungetnewvnode(vnode_t *vp)
 {
-#ifdef DIAGNOSTIC
-	if (vp->v_usecount != 1)
-		panic("ungetnewvnode: busy vnode");
-#endif
-	vp->v_usecount--;
-	insmntque(vp, NULL);
-	vp->v_type = VBAD;
 
-	simple_lock(&vp->v_interlock);
+	KASSERT(vp->v_usecount == 1);
+	KASSERT(vp->v_data == NULL);
+	KASSERT(vp->v_freelisthd == NULL);
+
+	mutex_enter(&vp->v_interlock);
+	vp->v_iflag |= VI_CLEAN;
+	vrelel(vp, 0, 0);
+}
+
+/*
+ * Allocate a new, uninitialized vnode.  If 'mp' is non-NULL, this is a
+ * marker vnode and we are prepared to wait for the allocation.
+ */
+vnode_t *
+valloc(struct mount *mp)
+{
+	vnode_t *vp;
+
+	vp = pool_cache_get(vnode_cache, (mp != NULL ? PR_WAITOK : PR_NOWAIT));
+	if (vp == NULL) {
+		return NULL;
+	}
+
+	memset(vp, 0, sizeof(*vp));
+	UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 0);
+	cv_init(&vp->v_cv, "vnode");
 	/*
-	 * Insert at head of LRU list
+	 * done by memset() above.
+	 *	LIST_INIT(&vp->v_nclist);
+	 *	LIST_INIT(&vp->v_dnclist);
 	 */
-	simple_lock(&vnode_free_list_slock);
-	if (vp->v_holdcnt > 0)
-		TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist);
-	else
-		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
-	simple_unlock(&vnode_free_list_slock);
-	simple_unlock(&vp->v_interlock);
+
+	if (mp != NULL) {
+		vp->v_mount = mp;
+		vp->v_type = VBAD;
+		vp->v_iflag = VI_MARKER;
+	} else {
+		lockinit(&vp->v_lock, PVFS, "vnlock", 0, 0);
+	}
+
+	return vp;
+}
+
+/*
+ * Free an unused, unreferenced vnode.
+ */
+void
+vfree(vnode_t *vp)
+{
+
+	KASSERT(vp->v_usecount == 0);
+
+	if ((vp->v_iflag & VI_MARKER) == 0) {
+		lockdestroy(&vp->v_lock);
+		mutex_enter(&vnode_free_list_lock);
+		numvnodes--;
+		mutex_exit(&vnode_free_list_lock);
+	}
+
+	UVM_OBJ_DESTROY(&vp->v_uobj);
+	cv_destroy(&vp->v_cv);
+	pool_cache_put(vnode_cache, vp);
+}
+
+/*
+ * Remove a vnode from its freelist.
+ */
+static inline void
+vremfree(vnode_t *vp)
+{
+
+	KASSERT(mutex_owned(&vp->v_interlock));
+	KASSERT(vp->v_usecount == 0);
+
+	/*
+	 * Note that the reference count must not change until
+	 * the vnode is removed.
+	 */
+	mutex_enter(&vnode_free_list_lock);
+	if (vp->v_holdcnt > 0) {
+		KASSERT(vp->v_freelisthd == &vnode_hold_list);
+	} else {
+		KASSERT(vp->v_freelisthd == &vnode_free_list);
+	}
+	TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
+	vp->v_freelisthd = NULL;
+	mutex_exit(&vnode_free_list_lock);
 }
 
 /*
  * Move a vnode from one mount queue to another.
  */
 static void
-insmntque(struct vnode *vp, struct mount *mp)
+insmntque(vnode_t *vp, struct mount *mp)
 {
 
 #ifdef DIAGNOSTIC
@@ -493,7 +626,7 @@ insmntque(struct vnode *vp, struct mount *mp)
 	}
 #endif
 
-	simple_lock(&mntvnode_slock);
+	mutex_enter(&mntvnode_lock);
 	/*
 	 * Delete from old mount point vnode list, if on one.
 	 */
@@ -504,7 +637,7 @@ insmntque(struct vnode *vp, struct mount *mp)
 	 */
 	if ((vp->v_mount = mp) != NULL)
 		TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes);
-	simple_unlock(&mntvnode_slock);
+	mutex_exit(&mntvnode_lock);
 }
 
 /*
@@ -513,7 +646,7 @@ insmntque(struct vnode *vp, struct mount *mp)
  * Also used for memory file system special devices.
  */
 int
-bdevvp(dev_t dev, struct vnode **vpp)
+bdevvp(dev_t dev, vnode_t **vpp)
 {
 
 	return (getdevvp(dev, vpp, VBLK));
@@ -524,7 +657,7 @@ bdevvp(dev_t dev, struct vnode **vpp)
  * Used for kernfs and some console handling.
  */
 int
-cdevvp(dev_t dev, struct vnode **vpp)
+cdevvp(dev_t dev, vnode_t **vpp)
 {
 
 	return (getdevvp(dev, vpp, VCHR));
@@ -536,10 +669,10 @@ cdevvp(dev_t dev, struct vnode **vpp)
  * and by cdevvp (character device) for console and kernfs.
  */
 static int
-getdevvp(dev_t dev, struct vnode **vpp, enum vtype type)
+getdevvp(dev_t dev, vnode_t **vpp, enum vtype type)
 {
-	struct vnode *vp;
-	struct vnode *nvp;
+	vnode_t *vp;
+	vnode_t *nvp;
 	int error;
 
 	if (dev == NODEV) {
@@ -553,6 +686,7 @@ getdevvp(dev_t dev, struct vnode **vpp, enum vtype type)
 	}
 	vp = nvp;
 	vp->v_type = type;
+	vp->v_vflag |= VV_MPSAFE;
 	uvm_vnp_setsize(vp, 0);
 	if ((nvp = checkalias(vp, dev, NULL)) != 0) {
 		vput(vp);
@@ -570,29 +704,34 @@ getdevvp(dev_t dev, struct vnode **vpp, enum vtype type)
  * the existing contents and return the aliased vnode. The
  * caller is responsible for filling it with its new contents.
  */
-struct vnode *
-checkalias(struct vnode *nvp, dev_t nvp_rdev, struct mount *mp)
+vnode_t *
+checkalias(vnode_t *nvp, dev_t nvp_rdev, struct mount *mp)
 {
-	struct lwp *l = curlwp;		/* XXX */
-	struct vnode *vp;
-	struct vnode **vpp;
+	vnode_t *vp;
+	vnode_t **vpp;
 
 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
-		return (NULLVP);
+		return (NULL);
 
 	vpp = &speclisth[SPECHASH(nvp_rdev)];
 loop:
-	simple_lock(&spechash_slock);
+	mutex_enter(&spechash_lock);
 	for (vp = *vpp; vp; vp = vp->v_specnext) {
+		if (vp->v_specinfo == NULL) {
+			vpanic(vp, "checkalias: no specinfo");
+		}
 		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
 			continue;
 		/*
 		 * Alias, but not in use, so flush it out.
 		 */
-		simple_lock(&vp->v_interlock);
-		simple_unlock(&spechash_slock);
+		mutex_enter(&vp->v_interlock);
+		mutex_exit(&spechash_lock);
 		if (vp->v_usecount == 0) {
-			vgonel(vp, l);
+			vremfree(vp);
+			vp->v_usecount++;
+			vclean(vp, DOCLOSE);
+			vrelel(vp, 1, 1);
 			goto loop;
 		}
 		/*
@@ -603,11 +742,12 @@ loop:
 		 */
 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK))
 			goto loop;
+		mutex_enter(&spechash_lock);
 		if (vp->v_specinfo == NULL) {
+			mutex_exit(&spechash_lock);
 			vput(vp);
 			goto loop;
 		}
-		simple_lock(&spechash_slock);
 		break;
 	}
 	if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) {
@@ -615,8 +755,10 @@ loop:
 			sizeof(struct specinfo), M_VNODE, M_NOWAIT);
 		/* XXX Erg. */
 		if (nvp->v_specinfo == NULL) {
-			simple_unlock(&spechash_slock);
+			mutex_exit(&spechash_lock);
 			uvm_wait("checkalias");
+			if (vp != NULL)
+				vput(vp);
 			goto loop;
 		}
 
@@ -624,24 +766,27 @@ loop:
 		nvp->v_hashchain = vpp;
 		nvp->v_specnext = *vpp;
 		nvp->v_specmountpoint = NULL;
-		simple_unlock(&spechash_slock);
+		mutex_exit(&spechash_lock);
 		nvp->v_speclockf = NULL;
 
 		*vpp = nvp;
-		if (vp != NULLVP) {
+		if (vp != NULL) {
+			/* XXX locking */
 			nvp->v_iflag |= VI_ALIASED;
 			vp->v_iflag |= VI_ALIASED;
 			vput(vp);
 		}
-		return (NULLVP);
+		return (NULL);
 	}
-	simple_unlock(&spechash_slock);
+	mutex_exit(&spechash_lock);
 	VOP_UNLOCK(vp, 0);
-	simple_lock(&vp->v_interlock);
-	vclean(vp, 0, l);
+	mutex_enter(&vp->v_interlock);
+	vclean(vp, 0);
+	mutex_exit(&vp->v_interlock);
 	vp->v_op = nvp->v_op;
 	vp->v_tag = nvp->v_tag;
 	vp->v_vnlock = &vp->v_lock;
+	lockdestroy(vp->v_vnlock);
 	lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
 	nvp->v_type = VNON;
 	insmntque(vp, mp);
@@ -657,50 +802,48 @@ loop:
  * longer usable (possibly having been changed to a new file system type).
  */
 int
-vget(struct vnode *vp, int flags)
+vget(vnode_t *vp, int flags)
 {
 	int error;
 
+	KASSERT((vp->v_iflag & VI_MARKER) == 0);
+
+	if ((flags & LK_INTERLOCK) == 0)
+		mutex_enter(&vp->v_interlock);
+
+	/*
+	 * Before adding a reference, we must remove the vnode
+	 * from its freelist.
+	 */
+	if (vp->v_usecount == 0) {
+		vremfree(vp);
+	}
+	if (++vp->v_usecount == 0) {
+		vpanic(vp, "vget: usecount overflow");
+	}
+
 	/*
 	 * If the vnode is in the process of being cleaned out for
 	 * another use, we wait for the cleaning to finish and then
 	 * return failure. Cleaning is determined by checking that
 	 * the VI_XLOCK flag is set.
 	 */
-
-	if ((flags & LK_INTERLOCK) == 0)
-		simple_lock(&vp->v_interlock);
-	if ((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0) {
+	if ((vp->v_iflag & VI_XLOCK) != 0) {
 		if (flags & LK_NOWAIT) {
-			simple_unlock(&vp->v_interlock);
+			mutex_exit(&vp->v_interlock);
 			return EBUSY;
 		}
-		vp->v_iflag |= VI_XWANT;
-		ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock);
+		vwait(vp, VI_XLOCK);
+		vrelel(vp, 1, 0);
 		return (ENOENT);
 	}
-	if (vp->v_usecount == 0) {
-		simple_lock(&vnode_free_list_slock);
-		if (vp->v_holdcnt > 0)
-			TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
-		else
-			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
-		simple_unlock(&vnode_free_list_slock);
-	}
-	vp->v_usecount++;
-#ifdef DIAGNOSTIC
-	if (vp->v_usecount == 0) {
-		vprint("vget", vp);
-		panic("vget: usecount overflow, vp %p", vp);
-	}
-#endif
 	if (flags & LK_TYPE_MASK) {
 		if ((error = vn_lock(vp, flags | LK_INTERLOCK))) {
 			vrele(vp);
 		}
 		return (error);
 	}
-	simple_unlock(&vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
 	return (0);
 }
 
@@ -708,109 +851,222 @@ vget(struct vnode *vp, int flags)
  * vput(), just unlock and vrele()
  */
 void
-vput(struct vnode *vp)
+vput(vnode_t *vp)
 {
 
-#ifdef DIAGNOSTIC
-	if (vp == NULL)
-		panic("vput: null vp");
-#endif
-	simple_lock(&vp->v_interlock);
-	vp->v_usecount--;
-	if (vp->v_usecount > 0) {
-		simple_unlock(&vp->v_interlock);
-		VOP_UNLOCK(vp, 0);
-		return;
-	}
-#ifdef DIAGNOSTIC
-	if (vp->v_usecount < 0 || vp->v_writecount != 0) {
-		vprint("vput: bad ref count", vp);
-		panic("vput: ref cnt");
-	}
-#endif
-	/*
-	 * Insert at tail of LRU list.
-	 */
-	simple_lock(&vnode_free_list_slock);
-	if (vp->v_holdcnt > 0)
-		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
-	else
-		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
-	simple_unlock(&vnode_free_list_slock);
-	if (vp->v_iflag & VI_EXECMAP) {
-		atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages);
-		atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages);
-	}
-	vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP|VI_MAPPED);
-	vp->v_vflag &= ~VV_MAPPED;
-	simple_unlock(&vp->v_interlock);
-	VOP_INACTIVE(vp);
+	KASSERT((vp->v_iflag & VI_MARKER) == 0);
+
+	VOP_UNLOCK(vp, 0);
+	vrele(vp);
 }
 
 /*
- * Vnode release.
- * If count drops to zero, call inactive routine and return to freelist.
+ * Vnode release.  If reference count drops to zero, call inactive
+ * routine and either return to freelist or free to the pool.
  */
-static void
-do_vrele(struct vnode *vp, int doinactive, int onhead)
+void
+vrelel(vnode_t *vp, int doinactive, int onhead)
 {
+	bool recycle, defer;
+	int error;
 
-#ifdef DIAGNOSTIC
-	if (vp == NULL)
-		panic("vrele: null vp");
-#endif
-	simple_lock(&vp->v_interlock);
-	vp->v_usecount--;
-	if (vp->v_usecount > 0) {
-		simple_unlock(&vp->v_interlock);
+	KASSERT(mutex_owned(&vp->v_interlock));
+	KASSERT((vp->v_iflag & VI_MARKER) == 0);
+
+	if (vp->v_op == dead_vnodeop_p && (vp->v_iflag & VI_CLEAN) == 0) {
+		vpanic(vp, "dead but not clean");
+	}
+
+	/*
+	 * If not the last reference, just drop the reference count
+	 * and unlock.
+	 */
+	if (vp->v_usecount > 1) {
+		vp->v_usecount--;
+		vp->v_iflag |= VI_INACTREDO;
+		mutex_exit(&vp->v_interlock);
 		return;
 	}
-#ifdef DIAGNOSTIC
-	if (vp->v_usecount < 0 || vp->v_writecount != 0) {
-		vprint("vrele: bad ref count", vp);
-		panic("vrele: ref cnt vp %p", vp);
+	if (vp->v_usecount <= 0 || vp->v_writecount != 0) {
+		vpanic(vp, "vput: bad ref count");
 	}
-#endif
+
 	/*
-	 * Insert at tail of LRU list.
+	 * If not clean, deactivate the vnode, but preserve
+	 * our reference across the call to VOP_INACTIVE().
 	 */
-	simple_lock(&vnode_free_list_slock);
-	if (vp->v_holdcnt > 0) {
-		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
-	} else {
-		if (onhead)
-			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
-		else
-			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
-	}
-	simple_unlock(&vnode_free_list_slock);
-	if (vp->v_iflag & VI_EXECMAP) {
-		atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages);
-		atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages);
-	}
-	vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP|VI_MAPPED);
-	vp->v_vflag &= ~VV_MAPPED;
+ retry:
+	if ((vp->v_iflag & VI_CLEAN) == 0) {
+		recycle = false;
+		/*
+		 * XXX This ugly block can be largely eliminated if
+		 * locking is pushed down into the file systems.
+		 */
+		if (curlwp == uvm.pagedaemon_lwp) {
+			/* The pagedaemon can't wait around; defer. */
+			defer = true;
+		} else if (curlwp == vrele_lwp) {
+			/* We have to try harder. */
+			vp->v_iflag &= ~VI_INACTREDO;
+			error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK |
+			    LK_RETRY);
+			if (error != 0) {
+				/* XXX */
+				vpanic(vp, "vrele: unable to lock %p");
+			}
+			defer = false;
+		} else if ((vp->v_iflag & VI_LAYER) != 0) {
+			/* 
+			 * Acquiring the stack's lock in vclean() even
+			 * for an honest vput/vrele is dangerous because
+			 * our caller may hold other vnode locks; defer.
+			 */
+			defer = true;
+		} else {		
+			/* If we can't acquire the lock, then defer. */
+			vp->v_iflag &= ~VI_INACTREDO;
+			error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK |
+			    LK_NOWAIT);
+			if (error != 0) {
+				defer = true;
+				mutex_enter(&vp->v_interlock);
+			} else {
+				defer = false;
+			}
+		}
 
-	if (doinactive) {
-		if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0)
-			VOP_INACTIVE(vp);
+		if (defer) {
+			/*
+			 * Defer reclaim to the kthread; it's not safe to
+			 * clean it here.  We donate it our last reference.
+			 */
+			KASSERT(mutex_owned(&vp->v_interlock));
+			KASSERT((vp->v_iflag & VI_INACTPEND) == 0);
+			KASSERT(vp->v_usecount == 1);
+			vp->v_iflag |= VI_INACTPEND;
+			mutex_enter(&vrele_lock);
+			TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist);
+			if (++vrele_pending > (desiredvnodes >> 8))
+				cv_signal(&vrele_cv); 
+			mutex_exit(&vrele_lock);
+			mutex_exit(&vp->v_interlock);
+			return;
+		}
+
+		/*
+		 * The vnode may gain another reference while being
+		 * deactivated.  Note that VOP_INACTIVE() will drop
+		 * the vnode lock.
+		 */
+		VOP_INACTIVE(vp, &recycle);
+		mutex_enter(&vp->v_interlock);
+		if (vp->v_usecount > 1) {
+			vp->v_usecount--;
+			mutex_exit(&vp->v_interlock);
+			return;
+		}
+
+		/*
+		 * If we grew another reference while VOP_INACTIVE()
+		 * was underway, then retry.
+		 */
+		if ((vp->v_iflag & VI_INACTREDO) != 0) {
+			goto retry;
+		}
+
+		/* Take care of space accounting. */
+		if (vp->v_iflag & VI_EXECMAP) {
+			atomic_add_int(&uvmexp.execpages,
+			    -vp->v_uobj.uo_npages);
+			atomic_add_int(&uvmexp.filepages,
+			    vp->v_uobj.uo_npages);
+		}
+		vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP|VI_MAPPED);
+		vp->v_vflag &= ~VV_MAPPED;
+
+		/*
+		 * Recycle the vnode if the file is now unused (unlinked),
+		 * otherwise just free it.
+		 */
+		if (recycle) {
+			vclean(vp, DOCLOSE);
+		}
+		KASSERT(vp->v_usecount > 0);
+	}
+
+	if (--vp->v_usecount != 0) {
+		/* Gained another reference while being reclaimed. */
+		mutex_exit(&vp->v_interlock);
+		return;
+	}
+
+	if ((vp->v_iflag & VI_CLEAN) != 0) {
+		/*
+		 * It's clean so destroy it.  It isn't referenced
+		 * anywhere since it has been reclaimed.
+		 */
+		KASSERT(vp->v_holdcnt == 0);
+		KASSERT(vp->v_writecount == 0);
+		mutex_exit(&vp->v_interlock);
+		insmntque(vp, NULL);
+		vfree(vp);
 	} else {
-		simple_unlock(&vp->v_interlock);
+		/*
+		 * Otherwise, put it back onto the freelist.  It
+		 * can't be destroyed while still associated with
+		 * a file system.
+		 */
+		mutex_enter(&vnode_free_list_lock);
+		if (vp->v_holdcnt > 0) {
+			vp->v_freelisthd = &vnode_hold_list;
+		} else {
+			vp->v_freelisthd = &vnode_free_list;
+		}
+		TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
+		mutex_exit(&vnode_free_list_lock);
+		mutex_exit(&vp->v_interlock);
 	}
 }
 
 void
-vrele(struct vnode *vp)
+vrele(vnode_t *vp)
 {
 
-	do_vrele(vp, 1, 0);
+	KASSERT((vp->v_iflag & VI_MARKER) == 0);
+
+	mutex_enter(&vp->v_interlock);
+	vrelel(vp, 1, 0);
 }
 
-void
-vrele2(struct vnode *vp, int onhead)
+static void
+vrele_thread(void *cookie)
 {
+	vnode_t *vp;
 
-	do_vrele(vp, 0, onhead);
+	for (;;) {
+		mutex_enter(&vrele_lock);
+		while (TAILQ_EMPTY(&vrele_list)) {
+			cv_timedwait(&vrele_cv, &vrele_lock, hz);
+		}
+		vp = TAILQ_FIRST(&vrele_list);
+		TAILQ_REMOVE(&vrele_list, vp, v_freelist);
+		vrele_pending--;
+		mutex_exit(&vrele_lock);
+
+		/*
+		 * If not the last reference, then ignore the vnode
+		 * and look for more work.
+		 */
+		mutex_enter(&vp->v_interlock);
+		KASSERT((vp->v_iflag & VI_INACTPEND) != 0);
+		vp->v_iflag &= ~VI_INACTPEND;
+		if (vp->v_usecount > 1) {
+			vp->v_usecount--;
+			mutex_exit(&vp->v_interlock);
+			continue;
+		}
+		vrelel(vp, 1, 0);
+	}
 }
 
 /*
@@ -818,30 +1074,20 @@ vrele2(struct vnode *vp, int onhead)
  * Called with v_interlock held.
  */
 void
-vholdl(struct vnode *vp)
+vholdl(vnode_t *vp)
 {
 
-	/*
-	 * If it is on the freelist and the hold count is currently
-	 * zero, move it to the hold list. The test of the back
-	 * pointer and the use reference count of zero is because
-	 * it will be removed from a free list by getnewvnode,
-	 * but will not have its reference count incremented until
-	 * after calling vgone. If the reference count were
-	 * incremented first, vgone would (incorrectly) try to
-	 * close the previous instance of the underlying object.
-	 * So, the back pointer is explicitly set to `0xdeadb' in
-	 * getnewvnode after removing it from a freelist to ensure
-	 * that we do not try to move it here.
-	 */
-	if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
-	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
-		simple_lock(&vnode_free_list_slock);
-		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
-		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
-		simple_unlock(&vnode_free_list_slock);
+	KASSERT(mutex_owned(&vp->v_interlock));
+	KASSERT((vp->v_iflag & VI_MARKER) == 0);
+
+	if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) {
+		mutex_enter(&vnode_free_list_lock);
+		KASSERT(vp->v_freelisthd == &vnode_free_list);
+		TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
+		vp->v_freelisthd = &vnode_hold_list;
+		TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
+		mutex_exit(&vnode_free_list_lock);
 	}
-	vp->v_holdcnt++;
 }
 
 /*
@@ -849,54 +1095,45 @@ vholdl(struct vnode *vp)
  * Called with v_interlock held.
  */
 void
-holdrelel(struct vnode *vp)
+holdrelel(vnode_t *vp)
 {
 
-	if (vp->v_holdcnt <= 0)
-		panic("holdrelel: holdcnt vp %p", vp);
+	KASSERT(mutex_owned(&vp->v_interlock));
+	KASSERT((vp->v_iflag & VI_MARKER) == 0);
+
+	if (vp->v_holdcnt <= 0) {
+		vpanic(vp, "holdrelel: holdcnt vp %p");
+	}
+
 	vp->v_holdcnt--;
-
-	/*
-	 * If it is on the holdlist and the hold count drops to
-	 * zero, move it to the free list. The test of the back
-	 * pointer and the use reference count of zero is because
-	 * it will be removed from a free list by getnewvnode,
-	 * but will not have its reference count incremented until
-	 * after calling vgone. If the reference count were
-	 * incremented first, vgone would (incorrectly) try to
-	 * close the previous instance of the underlying object.
-	 * So, the back pointer is explicitly set to `0xdeadb' in
-	 * getnewvnode after removing it from a freelist to ensure
-	 * that we do not try to move it here.
-	 */
-
-	if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
-	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
-		simple_lock(&vnode_free_list_slock);
-		TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
-		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
-		simple_unlock(&vnode_free_list_slock);
+	if (vp->v_holdcnt == 0 && vp->v_usecount == 0) {
+		mutex_enter(&vnode_free_list_lock);
+		KASSERT(vp->v_freelisthd == &vnode_hold_list);
+		TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
+		vp->v_freelisthd = &vnode_free_list;
+		TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
+		mutex_exit(&vnode_free_list_lock);
 	}
 }
 
 /*
- * Vnode reference.
+ * Vnode reference, where a reference is already held by some other
+ * object (for example, a file structure).
  */
 void
-vref(struct vnode *vp)
+vref(vnode_t *vp)
 {
 
-	simple_lock(&vp->v_interlock);
-	if (vp->v_usecount <= 0)
-		panic("vref used where vget required, vp %p", vp);
-	vp->v_usecount++;
-#ifdef DIAGNOSTIC
-	if (vp->v_usecount == 0) {
-		vprint("vref", vp);
-		panic("vref: usecount overflow, vp %p", vp);
+	KASSERT((vp->v_iflag & VI_MARKER) == 0);
+
+	mutex_enter(&vp->v_interlock);
+	if (vp->v_usecount <= 0) {
+		vpanic(vp, "vref used where vget required");
 	}
-#endif
-	simple_unlock(&vp->v_interlock);
+	if (++vp->v_usecount == 0) {
+		vpanic(vp, "vref: usecount overflow");
+	}
+	mutex_exit(&vp->v_interlock);
 }
 
 /*
@@ -918,33 +1155,35 @@ struct ctldebug debug1 = { "busyprt", &busyprt };
 #endif
 
 int
-vflush(struct mount *mp, struct vnode *skipvp, int flags)
+vflush(struct mount *mp, vnode_t *skipvp, int flags)
 {
-	struct lwp *l = curlwp;		/* XXX */
-	struct vnode *vp, *nvp;
+	vnode_t *vp, *mvp;
 	int busy = 0;
 
-	simple_lock(&mntvnode_slock);
-loop:
+	/* Allocate a marker vnode. */
+	if ((mvp = valloc(mp)) == NULL)
+		return (ENOMEM);
+
+	mutex_enter(&mntvnode_lock);
 	/*
 	 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
 	 * and vclean() are called
 	 */
-	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
-		if (vp->v_mount != mp)
-			goto loop;
-		nvp = TAILQ_NEXT(vp, v_mntvnodes);
+	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
+		vmark(mvp, vp);
+		if (vp->v_mount != mp || vismarker(vp))
+			continue;
 		/*
 		 * Skip over a selected vnode.
 		 */
 		if (vp == skipvp)
 			continue;
-		simple_lock(&vp->v_interlock);
+		mutex_enter(&vp->v_interlock);
 		/*
-		 * Skip over a vnodes marked VV_SYSTEM.
+		 * Skip over a vnodes marked VSYSTEM.
 		 */
 		if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) {
-			simple_unlock(&vp->v_interlock);
+			mutex_exit(&vp->v_interlock);
 			continue;
 		}
 		/*
@@ -953,7 +1192,7 @@ loop:
 		 */
 		if ((flags & WRITECLOSE) &&
 		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
-			simple_unlock(&vp->v_interlock);
+			mutex_exit(&vp->v_interlock);
 			continue;
 		}
 		/*
@@ -961,36 +1200,42 @@ loop:
 		 * out the vnode data structures and we are done.
 		 */
 		if (vp->v_usecount == 0) {
-			simple_unlock(&mntvnode_slock);
-			vgonel(vp, l);
-			simple_lock(&mntvnode_slock);
+			mutex_exit(&mntvnode_lock);
+			vremfree(vp);
+			vp->v_usecount++;
+			vclean(vp, DOCLOSE);
+			vrelel(vp, 1, 0);
+			mutex_enter(&mntvnode_lock);
 			continue;
 		}
 		/*
 		 * If FORCECLOSE is set, forcibly close the vnode.
 		 * For block or character devices, revert to an
 		 * anonymous device. For all other files, just kill them.
+		 * XXXAD what?
 		 */
 		if (flags & FORCECLOSE) {
-			simple_unlock(&mntvnode_slock);
+			mutex_exit(&mntvnode_lock);
+			vp->v_usecount++;
 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
-				vgonel(vp, l);
+				vclean(vp, DOCLOSE);
 			} else {
-				vclean(vp, 0, l);
+				vclean(vp, 0);
 				vp->v_op = spec_vnodeop_p;
-				insmntque(vp, (struct mount *)0);
 			}
-			simple_lock(&mntvnode_slock);
+			vrelel(vp, 1, 0);
+			mutex_enter(&mntvnode_lock);
 			continue;
 		}
 #ifdef DEBUG
 		if (busyprt)
 			vprint("vflush: busy vnode", vp);
 #endif
-		simple_unlock(&vp->v_interlock);
+		mutex_exit(&vp->v_interlock);
 		busy++;
 	}
-	simple_unlock(&mntvnode_slock);
+	mutex_exit(&mntvnode_lock);
+	vfree(mvp);
 	if (busy)
 		return (EBUSY);
 	return (0);
@@ -998,58 +1243,45 @@ loop:
 
 /*
  * Disassociate the underlying file system from a vnode.
+ *
+ * Must be called with the interlock held, and will return with it held.
  */
-static void
-vclean(struct vnode *vp, int flags, struct lwp *l)
+void
+vclean(vnode_t *vp, int flags)
 {
-	int active;
+	lwp_t *l = curlwp;
+	bool recycle, active;
+	struct specinfo *si;
 
-	LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
+	KASSERT(mutex_owned(&vp->v_interlock));
+	KASSERT((vp->v_iflag & VI_MARKER) == 0);
+	KASSERT(vp->v_usecount != 0);
 
-	/*
-	 * Check to see if the vnode is in use.
-	 * If so we have to reference it before we clean it out
-	 * so that its count cannot fall to zero and generate a
-	 * race against ourselves to recycle it.
-	 */
+	/* If cleaning is already in progress wait until done and return. */
+	if (vp->v_iflag & VI_XLOCK) {
+		vwait(vp, VI_XLOCK);
+		return;
+	}
 
-	if ((active = vp->v_usecount) != 0) {
-		vp->v_usecount++;
-#ifdef DIAGNOSTIC
-		if (vp->v_usecount == 0) {
-			vprint("vclean", vp);
-			panic("vclean: usecount overflow");
-		}
-#endif
+	/* If already clean, nothing to do. */
+	if ((vp->v_iflag & VI_CLEAN) != 0) {
+		return;
 	}
 
 	/*
-	 * Prevent the vnode from being recycled or
-	 * brought into use while we clean it out.
+	 * Prevent the vnode from being recycled or brought into use
+	 * while we clean it out.
 	 */
-	if (vp->v_iflag & VI_XLOCK)
-		panic("vclean: deadlock, vp %p", vp);
 	vp->v_iflag |= VI_XLOCK;
 	if (vp->v_iflag & VI_EXECMAP) {
 		atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages);
 		atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages);
 	}
 	vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP);
+	active = (vp->v_usecount > 1);
 
-	/*
-	 * Even if the count is zero, the VOP_INACTIVE routine may still
-	 * have the object locked while it cleans it out.  For
-	 * active vnodes, it ensures that no other activity can
-	 * occur while the underlying object is being cleaned out.
-	 *
-	 * We drain the lock to make sure we are the last one trying to
-	 * get it and immediately resurrect the lock.  Future accesses
-	 * for locking this _vnode_ will be protected by VI_XLOCK.  However,
-	 * upper layers might be using the _lock_ in case the file system
-	 * exported it and might access it while the vnode lingers in
-	 * deadfs.
-	 */
-	VOP_LOCK(vp, LK_DRAIN | LK_RESURRECT | LK_INTERLOCK);
+	/* XXXAD should not lock vnode under layer */
+	VOP_LOCK(vp, LK_EXCLUSIVE | LK_INTERLOCK);
 
 	/*
 	 * Clean out any cached data associated with the vnode.
@@ -1058,7 +1290,7 @@ vclean(struct vnode *vp, int flags, struct lwp *l)
 	 */
 	if (flags & DOCLOSE) {
 		int error;
-		struct vnode *vq, *vx;
+		vnode_t *vq, *vx;
 
 		error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0);
 		if (error)
@@ -1066,12 +1298,13 @@ vclean(struct vnode *vp, int flags, struct lwp *l)
 		KASSERT(error == 0);
 		KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
 
+		/* XXXAD close should not happen on layered vnode */
 		if (active)
 			VOP_CLOSE(vp, FNONBLOCK, NOCRED);
 
 		if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
-		    vp->v_specinfo != 0) {
-			simple_lock(&spechash_slock);
+		    vp->v_specinfo != NULL) {
+			mutex_enter(&spechash_lock);
 			if (vp->v_hashchain != NULL) {
 				if (*vp->v_hashchain == vp) {
 					*vp->v_hashchain = vp->v_specnext;
@@ -1088,8 +1321,8 @@ vclean(struct vnode *vp, int flags, struct lwp *l)
 				}
 				if (vp->v_iflag & VI_ALIASED) {
 					vx = NULL;
-						for (vq = *vp->v_hashchain; vq;
-						     vq = vq->v_specnext) {
+					for (vq = *vp->v_hashchain; vq;
+					     vq = vq->v_specnext) {
 						if (vq->v_rdev != vp->v_rdev ||
 						    vq->v_type != vp->v_type)
 							continue;
@@ -1104,9 +1337,10 @@ vclean(struct vnode *vp, int flags, struct lwp *l)
 					vp->v_iflag &= ~VI_ALIASED;
 				}
 			}
-			simple_unlock(&spechash_slock);
-			FREE(vp->v_specinfo, M_VNODE);
+			si = vp->v_specinfo;
 			vp->v_specinfo = NULL;
+			mutex_exit(&spechash_lock);
+			FREE(si, M_VNODE);
 		}
 	}
 
@@ -1116,7 +1350,7 @@ vclean(struct vnode *vp, int flags, struct lwp *l)
 	 * VOP_INACTIVE will unlock the vnode.
 	 */
 	if (active) {
-		VOP_INACTIVE(vp);
+		VOP_INACTIVE(vp, &recycle);
 	} else {
 		/*
 		 * Any other processes trying to obtain this lock must first
@@ -1124,38 +1358,10 @@ vclean(struct vnode *vp, int flags, struct lwp *l)
 		 */
 		VOP_UNLOCK(vp, 0);
 	}
-	/*
-	 * Reclaim the vnode.
-	 */
-	if (VOP_RECLAIM(vp))
-		panic("vclean: cannot reclaim, vp %p", vp);
-	if (active) {
-		/*
-		 * Inline copy of vrele() since VOP_INACTIVE
-		 * has already been called.
-		 */
-		simple_lock(&vp->v_interlock);
-		if (--vp->v_usecount <= 0) {
-#ifdef DIAGNOSTIC
-			if (vp->v_usecount < 0 || vp->v_writecount != 0) {
-				vprint("vclean: bad ref count", vp);
-				panic("vclean: ref cnt");
-			}
-#endif
-			/*
-			 * Insert at tail of LRU list.
-			 */
 
-			simple_unlock(&vp->v_interlock);
-			simple_lock(&vnode_free_list_slock);
-#ifdef DIAGNOSTIC
-			if (vp->v_holdcnt > 0)
-				panic("vclean: not clean, vp %p", vp);
-#endif
-			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
-			simple_unlock(&vnode_free_list_slock);
-		} else
-			simple_unlock(&vp->v_interlock);
+	/* Disassociate the underlying file system from the vnode. */
+	if (VOP_RECLAIM(vp)) {
+		vpanic(vp, "vclean: cannot reclaim");
 	}
 
 	KASSERT(vp->v_uobj.uo_npages == 0);
@@ -1165,22 +1371,18 @@ vclean(struct vnode *vp, int flags, struct lwp *l)
 	}
 	cache_purge(vp);
 
-	/*
-	 * Done with purge, notify sleepers of the grim news.
-	 */
+	/* Done with purge, notify sleepers of the grim news. */
 	vp->v_op = dead_vnodeop_p;
 	vp->v_tag = VT_NON;
-	vp->v_vnlock = NULL;
-	simple_lock(&vp->v_interlock);
-	VN_KNOTE(vp, NOTE_REVOKE);	/* FreeBSD has this in vn_pollgone() */
+	mutex_enter(&vp->v_interlock);
+	vp->v_vnlock = &vp->v_lock;
+	VN_KNOTE(vp, NOTE_REVOKE);
 	vp->v_iflag &= ~VI_XLOCK;
+	vp->v_iflag |= VI_CLEAN;
 	vp->v_vflag &= ~VV_LOCKSWORK;
-	if (vp->v_iflag & VI_XWANT) {
-		vp->v_iflag &= ~VI_XWANT;
-		simple_unlock(&vp->v_interlock);
-		wakeup((void *)vp);
-	} else
-		simple_unlock(&vp->v_interlock);
+	cv_broadcast(&vp->v_cv);
+
+	KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
 }
 
 /*
@@ -1188,111 +1390,48 @@ vclean(struct vnode *vp, int flags, struct lwp *l)
  * Release the passed interlock if the vnode will be recycled.
  */
 int
-vrecycle(struct vnode *vp, struct simplelock *inter_lkp, struct lwp *l)
+vrecycle(vnode_t *vp, kmutex_t *inter_lkp, struct lwp *l)
 {
 
-	simple_lock(&vp->v_interlock);
-	if (vp->v_usecount == 0) {
-		if (inter_lkp)
-			simple_unlock(inter_lkp);
-		vgonel(vp, l);
-		return (1);
+	KASSERT((vp->v_iflag & VI_MARKER) == 0);
+
+	mutex_enter(&vp->v_interlock);
+	if (vp->v_usecount != 0) {
+		mutex_exit(&vp->v_interlock);
+		return (0);
 	}
-	simple_unlock(&vp->v_interlock);
-	return (0);
+	if (inter_lkp)
+		mutex_exit(inter_lkp);
+	vremfree(vp);
+	vp->v_usecount++;
+	vclean(vp, DOCLOSE);
+	vrelel(vp, 0, 0);
+	return (1);
 }
 
 /*
- * Eliminate all activity associated with a vnode
- * in preparation for reuse.
+ * Eliminate all activity associated with a vnode in preparation for
+ * reuse.  Drops a reference from the vnode.
  */
 void
-vgone(struct vnode *vp)
-{
-	struct lwp *l = curlwp;		/* XXX */
-
-	simple_lock(&vp->v_interlock);
-	vgonel(vp, l);
-}
-
-/*
- * vgone, with the vp interlock held.
- */
-void
-vgonel(struct vnode *vp, struct lwp *l)
+vgone(vnode_t *vp)
 {
 
-	LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
-
-	/*
-	 * If a vgone (or vclean) is already in progress,
-	 * wait until it is done and return.
-	 */
-
-	if (vp->v_iflag & VI_XLOCK) {
-		vp->v_iflag |= VI_XWANT;
-		ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock);
-		return;
-	}
-
-	/*
-	 * Clean out the filesystem specific data.
-	 */
-
-	vclean(vp, DOCLOSE, l);
-	KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
-
-	/*
-	 * Delete from old mount point vnode list, if on one.
-	 */
-
-	if (vp->v_mount != NULL)
-		insmntque(vp, (struct mount *)0);
-
-	/*
-	 * The test of the back pointer and the reference count of
-	 * zero is because it will be removed from the free list by
-	 * getcleanvnode, but will not have its reference count
-	 * incremented until after calling vgone. If the reference
-	 * count were incremented first, vgone would (incorrectly)
-	 * try to close the previous instance of the underlying object.
-	 * So, the back pointer is explicitly set to `0xdeadb' in
-	 * getnewvnode after removing it from the freelist to ensure
-	 * that we do not try to move it here.
-	 */
-
-	vp->v_type = VBAD;
-	if (vp->v_usecount == 0) {
-		bool dofree;
-
-		simple_lock(&vnode_free_list_slock);
-		if (vp->v_holdcnt > 0)
-			panic("vgonel: not clean, vp %p", vp);
-		/*
-		 * if it isn't on the freelist, we're called by getcleanvnode
-		 * and vnode is being re-used.  otherwise, we'll free it.
-		 */
-		dofree = vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb;
-		if (dofree) {
-			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
-			numvnodes--;
-		}
-		simple_unlock(&vnode_free_list_slock);
-		if (dofree)
-			pool_put(&vnode_pool, vp);
-	}
+	mutex_enter(&vp->v_interlock);
+	vclean(vp, DOCLOSE);
+	vrelel(vp, 0, 0);
 }
 
 /*
  * Lookup a vnode by device number.
  */
 int
-vfinddev(dev_t dev, enum vtype type, struct vnode **vpp)
+vfinddev(dev_t dev, enum vtype type, vnode_t **vpp)
 {
-	struct vnode *vp;
+	vnode_t *vp;
 	int rc = 0;
 
-	simple_lock(&spechash_slock);
+	mutex_enter(&spechash_lock);
 	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
 		if (dev != vp->v_rdev || type != vp->v_type)
 			continue;
@@ -1300,7 +1439,7 @@ vfinddev(dev_t dev, enum vtype type, struct vnode **vpp)
 		rc = 1;
 		break;
 	}
-	simple_unlock(&spechash_slock);
+	mutex_exit(&spechash_lock);
 	return (rc);
 }
 
@@ -1311,7 +1450,7 @@ vfinddev(dev_t dev, enum vtype type, struct vnode **vpp)
 void
 vdevgone(int maj, int minl, int minh, enum vtype type)
 {
-	struct vnode *vp;
+	vnode_t *vp;
 	int mn;
 
 	vp = NULL;	/* XXX gcc */
@@ -1325,15 +1464,21 @@ vdevgone(int maj, int minl, int minh, enum vtype type)
  * Calculate the total number of references to a special device.
  */
 int
-vcount(struct vnode *vp)
+vcount(vnode_t *vp)
 {
-	struct vnode *vq, *vnext;
+	vnode_t *vq, *vnext;
 	int count;
 
 loop:
-	if ((vp->v_iflag & VI_ALIASED) == 0)
-		return (vp->v_usecount);
-	simple_lock(&spechash_slock);
+	mutex_enter(&spechash_lock);
+	mutex_enter(&vp->v_interlock);
+	if ((vp->v_iflag & VI_ALIASED) == 0) {
+		count = vp->v_usecount - ((vp->v_iflag & VI_INACTPEND) != 0);
+		mutex_exit(&vp->v_interlock);
+		mutex_exit(&spechash_lock);
+		return (count);
+	}
+	mutex_exit(&vp->v_interlock);
 	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
 		vnext = vq->v_specnext;
 		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
@@ -1341,19 +1486,23 @@ loop:
 		/*
 		 * Alias, but not in use, so flush it out.
 		 */
+	    	mutex_enter(&vq->v_interlock);
 		if (vq->v_usecount == 0 && vq != vp &&
 		    (vq->v_iflag & VI_XLOCK) == 0) {
-			simple_unlock(&spechash_slock);
-			vgone(vq);
+			mutex_exit(&spechash_lock);
+			vremfree(vq);
+			vq->v_usecount++;
+			vclean(vq, DOCLOSE);
+			vrelel(vq, 1, 0);
 			goto loop;
 		}
 		count += vq->v_usecount;
+	    	mutex_exit(&vq->v_interlock);
 	}
-	simple_unlock(&spechash_slock);
+	mutex_exit(&spechash_lock);
 	return (count);
 }
 
-
 /*
  * sysctl helper routine to return list of supported fstypes
  */
@@ -1463,7 +1612,7 @@ sysctl_kern_vnode(SYSCTLFN_ARGS)
 	char *where = oldp;
 	size_t *sizep = oldlenp;
 	struct mount *mp, *nmp;
-	struct vnode *vp;
+	vnode_t *vp, *mvp;
 	char *bp = where, *savebp;
 	char *ewhere;
 	int error;
@@ -1473,14 +1622,15 @@ sysctl_kern_vnode(SYSCTLFN_ARGS)
 	if (newp != NULL)
 		return (EPERM);
 
-#define VPTRSZ	sizeof(struct vnode *)
-#define VNODESZ	sizeof(struct vnode)
+#define VPTRSZ	sizeof(vnode_t *)
+#define VNODESZ	sizeof(vnode_t)
 	if (where == NULL) {
 		*sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
 		return (0);
 	}
 	ewhere = where + *sizep;
 
+
 	mutex_enter(&mountlist_lock);
 	for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
 	     mp = nmp) {
@@ -1489,37 +1639,44 @@ sysctl_kern_vnode(SYSCTLFN_ARGS)
 			continue;
 		}
 		savebp = bp;
-again:
-		simple_lock(&mntvnode_slock);
-		TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
+		/* Allocate a marker vnode. */
+		if ((mvp = valloc(mp)) == NULL)
+			return (ENOMEM);
+		mutex_enter(&mntvnode_lock);
+		for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
+			vmark(mvp, vp);
 			/*
 			 * Check that the vp is still associated with
 			 * this filesystem.  RACE: could have been
 			 * recycled onto the same filesystem.
 			 */
-			if (vp->v_mount != mp) {
-				simple_unlock(&mntvnode_slock);
-				if (kinfo_vdebug)
-					printf("kinfo: vp changed\n");
-				bp = savebp;
-				goto again;
-			}
+			if (vp->v_mount != mp || vismarker(vp))
+				continue;
 			if (bp + VPTRSZ + VNODESZ > ewhere) {
-				simple_unlock(&mntvnode_slock);
+				(void)vunmark(mvp);
+				mutex_exit(&mntvnode_lock);
+				vfree(mvp);
 				*sizep = bp - where;
 				return (ENOMEM);
 			}
-			simple_unlock(&mntvnode_slock);
+			/* XXXAD copy to temporary buffer */
+			mutex_exit(&mntvnode_lock);
 			if ((error = copyout((void *)&vp, bp, VPTRSZ)) ||
-			   (error = copyout((void *)vp, bp + VPTRSZ, VNODESZ)))
+			   (error = copyout((void *)vp, bp + VPTRSZ, VNODESZ))) {
+			   	mutex_enter(&mntvnode_lock);
+				(void)vunmark(mvp);
+				mutex_exit(&mntvnode_lock);
+				vfree(mvp);
 				return (error);
+			}
 			bp += VPTRSZ + VNODESZ;
-			simple_lock(&mntvnode_slock);
+			mutex_enter(&mntvnode_lock);
 		}
-		simple_unlock(&mntvnode_slock);
+		mutex_exit(&mntvnode_lock);
 		mutex_enter(&mountlist_lock);
 		nmp = CIRCLEQ_NEXT(mp, mnt_list);
 		vfs_unbusy(mp);
+		vfree(mvp);
 	}
 	mutex_exit(&mountlist_lock);
 
@@ -1527,13 +1684,32 @@ again:
 	return (0);
 }
 
+/*
+ * Remove clean vnodes from a mountpoint's vnode list.
+ */
+void
+vfs_scrubvnlist(struct mount *mp)
+{
+	vnode_t *vp, *nvp;
+
+	mutex_enter(&mntvnode_lock);
+	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
+		nvp = TAILQ_NEXT(vp, v_mntvnodes);
+		mutex_enter(&vp->v_interlock);
+		if ((vp->v_iflag & VI_CLEAN) != 0)
+			TAILQ_REMOVE(&mp->mnt_vnodelist, vp, v_mntvnodes);
+		mutex_exit(&vp->v_interlock);
+	}
+	mutex_exit(&mntvnode_lock);
+}
+
 /*
  * Check to see if a filesystem is mounted on a block device.
  */
 int
-vfs_mountedon(struct vnode *vp)
+vfs_mountedon(vnode_t *vp)
 {
-	struct vnode *vq;
+	vnode_t *vq;
 	int error = 0;
 
 	if (vp->v_type != VBLK)
@@ -1541,7 +1717,7 @@ vfs_mountedon(struct vnode *vp)
 	if (vp->v_specmountpoint != NULL)
 		return (EBUSY);
 	if (vp->v_iflag & VI_ALIASED) {
-		simple_lock(&spechash_slock);
+		mutex_enter(&spechash_lock);
 		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
 			if (vq->v_rdev != vp->v_rdev ||
 			    vq->v_type != vp->v_type)
@@ -1551,7 +1727,7 @@ vfs_mountedon(struct vnode *vp)
 				break;
 			}
 		}
-		simple_unlock(&spechash_slock);
+		mutex_exit(&spechash_lock);
 	}
 	return (error);
 }
@@ -1595,8 +1771,6 @@ vfs_unmountall(struct lwp *l)
 		printf("WARNING: some file systems would not unmount\n");
 }
 
-extern struct simplelock bqueue_slock; /* XXX */
-
 /*
  * Sync and unmount file systems before shutting down.
  */
diff --git a/sys/kern/vfs_subr2.c b/sys/kern/vfs_subr2.c
index f190c34d5278..d66b502417f5 100644
--- a/sys/kern/vfs_subr2.c
+++ b/sys/kern/vfs_subr2.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_subr2.c,v 1.8 2007/11/26 19:02:08 pooka Exp $	*/
+/*	$NetBSD: vfs_subr2.c,v 1.9 2008/01/02 11:48:56 ad Exp $	*/
 
 /*-
  * Copyright (c) 1997, 1998, 2004, 2005, 2007 The NetBSD Foundation, Inc.
@@ -82,7 +82,7 @@
  */
 
 #include <sys/cdefs.h>  
-__KERNEL_RCSID(0, "$NetBSD: vfs_subr2.c,v 1.8 2007/11/26 19:02:08 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_subr2.c,v 1.9 2008/01/02 11:48:56 ad Exp $");
 
 #include "opt_ddb.h"
 
@@ -95,6 +95,7 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_subr2.c,v 1.8 2007/11/26 19:02:08 pooka Exp $");
 #include <sys/systm.h>
 #include <sys/vnode.h>
 #include <sys/proc.h>
+#include <sys/kthread.h>
 
 #include <miscfs/syncfs/syncfs.h>
 #include <miscfs/specfs/specdev.h>
@@ -124,13 +125,11 @@ int prtactive = 0;		/* 1 => print out reclaim of active vnodes */
 
 kmutex_t mountlist_lock;
 kmutex_t mntid_lock;
-struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER;
-struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER;
+kmutex_t mntvnode_lock;
+kmutex_t vnode_free_list_lock;
+kmutex_t spechash_lock;
 kmutex_t vfs_list_lock;
 
-/* XXX - gross; single global lock to protect v_numoutput */
-struct simplelock global_v_numoutput_slock = SIMPLELOCK_INITIALIZER;
-
 struct mntlist mountlist =			/* mounted filesystem list */
     CIRCLEQ_HEAD_INITIALIZER(mountlist);
 
@@ -146,7 +145,7 @@ struct device *root_device;			/* root device */
 void printlockedvnodes(void);
 #endif
 
-long numvnodes;
+u_int numvnodes;
 
 /*
  * Initialize the vnode management data structures.
@@ -157,14 +156,16 @@ vntblinit(void)
 
 	mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE);
 	mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE);
+	mutex_init(&mntvnode_lock, MUTEX_DEFAULT, IPL_NONE);
+	mutex_init(&vnode_free_list_lock, MUTEX_DEFAULT, IPL_NONE);
+	mutex_init(&spechash_lock, MUTEX_DEFAULT, IPL_NONE);
 	mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE);
 
 	mount_specificdata_domain = specificdata_domain_create();
 
-	/*
-	 * Initialize the filesystem syncer.
-	 */
+	/* Initialize the filesystem syncer. */
 	vn_initialize_syncerd();
+	vn_init1();
 }
 
 /*
@@ -195,9 +196,68 @@ vfs_destroy(struct mount *mp)
 {
 
 	specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
+	mutex_destroy(&mp->mnt_mutex);
+	lockdestroy(&mp->mnt_lock);
 	free(mp, M_MOUNT);
 }
 
+/*
+ * Wait for a vnode (typically with VI_XLOCK set) to be cleaned or
+ * recycled.
+ */
+void
+vwait(vnode_t *vp, int flags)
+{
+
+	KASSERT(mutex_owned(&vp->v_interlock));
+	KASSERT(vp->v_usecount != 0);
+
+	while ((vp->v_iflag & flags) != 0)
+		cv_wait(&vp->v_cv, &vp->v_interlock);
+}
+
+/*
+ * Insert a marker vnode into a mount's vnode list, after the
+ * specified vnode.  mntvnode_lock must be held.
+ */
+void
+vmark(vnode_t *mvp, vnode_t *vp)
+{
+	struct mount *mp;
+
+	mp = mvp->v_mount;
+
+	KASSERT(mutex_owned(&mntvnode_lock));
+	KASSERT((mvp->v_iflag & VI_MARKER) != 0);
+	KASSERT(vp->v_mount == mp);
+
+	TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vp, mvp, v_mntvnodes);
+}
+
+/*
+ * Remove a marker vnode from a mount's vnode list, and return
+ * a pointer to the next vnode in the list.  mntvnode_lock must
+ * be held.
+ */
+vnode_t *
+vunmark(vnode_t *mvp)
+{
+	vnode_t *vp;
+	struct mount *mp;
+
+	mp = mvp->v_mount;
+
+	KASSERT(mutex_owned(&mntvnode_lock));
+	KASSERT((mvp->v_iflag & VI_MARKER) != 0);
+
+	vp = TAILQ_NEXT(mvp, v_mntvnodes);
+	TAILQ_REMOVE(&mp->mnt_vnodelist, mvp, v_mntvnodes); 
+
+	KASSERT(vp == NULL || vp->v_mount == mp);
+
+	return vp;
+}
+
 /*
  * Update outstanding I/O count and do wakeup if requested.
  */
@@ -206,20 +266,16 @@ vwakeup(struct buf *bp)
 {
 	struct vnode *vp;
 
-	if ((vp = bp->b_vp) != NULL) {
-		/* XXX global lock hack
-		 * can't use v_interlock here since this is called
-		 * in interrupt context from biodone().
-		 */
-		simple_lock(&global_v_numoutput_slock);
-		if (--vp->v_numoutput < 0)
-			panic("vwakeup: neg numoutput, vp %p", vp);
-		if ((vp->v_iflag & VI_BWAIT) && vp->v_numoutput <= 0) {
-			vp->v_iflag &= ~VI_BWAIT;
-			wakeup((void *)&vp->v_numoutput);
-		}
-		simple_unlock(&global_v_numoutput_slock);
-	}
+	if ((vp = bp->b_vp) == NULL)
+		return;
+
+	KASSERT(bp->b_objlock == &vp->v_interlock);
+	KASSERT(mutex_owned(bp->b_objlock));
+
+	if (--vp->v_numoutput < 0)
+		panic("vwakeup: neg numoutput, vp %p", vp);
+	if (vp->v_numoutput == 0)
+		cv_broadcast(&vp->v_cv);
 }
 
 /*
@@ -229,15 +285,15 @@ vwakeup(struct buf *bp)
  */
 int
 vinvalbuf(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l,
-    int slpflag, int slptimeo)
+	  bool catch, int slptimeo)
 {
 	struct buf *bp, *nbp;
-	int s, error;
+	int error;
 	int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO |
-		(flags & V_SAVE ? PGO_CLEANIT | PGO_RECLAIM : 0);
+	    (flags & V_SAVE ? PGO_CLEANIT | PGO_RECLAIM : 0);
 
 	/* XXXUBC this doesn't look at flags or slp* */
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	error = VOP_PUTPAGES(vp, 0, 0, flushflags);
 	if (error) {
 		return error;
@@ -247,67 +303,48 @@ vinvalbuf(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l,
 		error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0);
 		if (error)
 		        return (error);
-#ifdef DIAGNOSTIC
-		s = splbio();
-		if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd))
-		        panic("vinvalbuf: dirty bufs, vp %p", vp);
-		splx(s);
-#endif
+		KASSERT(vp->v_numoutput == 0 && LIST_EMPTY(&vp->v_dirtyblkhd));
 	}
 
-	s = splbio();
-
+	mutex_enter(&bufcache_lock);
 restart:
-	for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
-		nbp = LIST_NEXT(bp, b_vnbufs);
-		simple_lock(&bp->b_interlock);
-		if (bp->b_flags & B_BUSY) {
-			bp->b_flags |= B_WANTED;
-			error = ltsleep((void *)bp,
-				    slpflag | (PRIBIO + 1) | PNORELOCK,
-				    "vinvalbuf", slptimeo, &bp->b_interlock);
-			if (error) {
-				splx(s);
-				return (error);
-			}
-			goto restart;
-		}
-		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
-		simple_unlock(&bp->b_interlock);
-		brelse(bp, 0);
-	}
-
 	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 		nbp = LIST_NEXT(bp, b_vnbufs);
-		simple_lock(&bp->b_interlock);
-		if (bp->b_flags & B_BUSY) {
-			bp->b_flags |= B_WANTED;
-			error = ltsleep((void *)bp,
-				    slpflag | (PRIBIO + 1) | PNORELOCK,
-				    "vinvalbuf", slptimeo, &bp->b_interlock);
-			if (error) {
-				splx(s);
-				return (error);
-			}
-			goto restart;
+		error = bbusy(bp, catch, slptimeo);
+		if (error != 0) {
+			if (error == EPASSTHROUGH)
+				goto restart;
+			mutex_exit(&bufcache_lock);
+			return (error);
+		}
+		brelsel(bp, BC_INVAL | BC_VFLUSH);
+	}
+
+	for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
+		nbp = LIST_NEXT(bp, b_vnbufs);
+		error = bbusy(bp, catch, slptimeo);
+		if (error != 0) {
+			if (error == EPASSTHROUGH)
+				goto restart;
+			mutex_exit(&bufcache_lock);
+			return (error);
 		}
 		/*
 		 * XXX Since there are no node locks for NFS, I believe
 		 * there is a slight chance that a delayed write will
 		 * occur while sleeping just above, so check for it.
 		 */
-		if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
+		if ((bp->b_oflags & BO_DELWRI) && (flags & V_SAVE)) {
 #ifdef DEBUG
 			printf("buffer still DELWRI\n");
 #endif
-			bp->b_flags |= B_BUSY | B_VFLUSH;
-			simple_unlock(&bp->b_interlock);
+			bp->b_cflags |= BC_BUSY | BC_VFLUSH;
+			mutex_exit(&bufcache_lock);
 			VOP_BWRITE(bp);
+			mutex_enter(&bufcache_lock);
 			goto restart;
 		}
-		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
-		simple_unlock(&bp->b_interlock);
-		brelse(bp, 0);
+		brelsel(bp, BC_INVAL | BC_VFLUSH);
 	}
 
 #ifdef DIAGNOSTIC
@@ -315,7 +352,7 @@ restart:
 		panic("vinvalbuf: flush failed, vp %p", vp);
 #endif
 
-	splx(s);
+	mutex_exit(&bufcache_lock);
 
 	return (0);
 }
@@ -326,91 +363,78 @@ restart:
  * buffers from being queued.
  */
 int
-vtruncbuf(struct vnode *vp, daddr_t lbn, int slpflag, int slptimeo)
+vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch, int slptimeo)
 {
 	struct buf *bp, *nbp;
-	int s, error;
+	int error;
 	voff_t off;
 
 	off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
 	if (error) {
 		return error;
 	}
 
-	s = splbio();
-
+	mutex_enter(&bufcache_lock);
 restart:
-	for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
-		nbp = LIST_NEXT(bp, b_vnbufs);
-		if (bp->b_lblkno < lbn)
-			continue;
-		simple_lock(&bp->b_interlock);
-		if (bp->b_flags & B_BUSY) {
-			bp->b_flags |= B_WANTED;
-			error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK,
-			    "vtruncbuf", slptimeo, &bp->b_interlock);
-			if (error) {
-				splx(s);
-				return (error);
-			}
-			goto restart;
-		}
-		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
-		simple_unlock(&bp->b_interlock);
-		brelse(bp, 0);
-	}
-
 	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 		nbp = LIST_NEXT(bp, b_vnbufs);
 		if (bp->b_lblkno < lbn)
 			continue;
-		simple_lock(&bp->b_interlock);
-		if (bp->b_flags & B_BUSY) {
-			bp->b_flags |= B_WANTED;
-			error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK,
-			    "vtruncbuf", slptimeo, &bp->b_interlock);
-			if (error) {
-				splx(s);
-				return (error);
-			}
-			goto restart;
+		error = bbusy(bp, catch, slptimeo);
+		if (error != 0) {
+			if (error == EPASSTHROUGH)
+				goto restart;
+			mutex_exit(&bufcache_lock);
+			return (error);
 		}
-		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
-		simple_unlock(&bp->b_interlock);
-		brelse(bp, 0);
+		brelsel(bp, BC_INVAL | BC_VFLUSH);
 	}
 
-	splx(s);
+	for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
+		nbp = LIST_NEXT(bp, b_vnbufs);
+		if (bp->b_lblkno < lbn)
+			continue;
+		error = bbusy(bp, catch, slptimeo);
+		if (error != 0) {
+			if (error == EPASSTHROUGH)
+				goto restart;
+			mutex_exit(&bufcache_lock);
+			return (error);
+		}
+		brelsel(bp, BC_INVAL | BC_VFLUSH);
+	}
+	mutex_exit(&bufcache_lock);
 
 	return (0);
 }
 
+/*
+ * Flush all dirty buffers from a vnode.
+ * Called with the underlying vnode locked, which should prevent new dirty
+ * buffers from being queued.
+ */
 void
 vflushbuf(struct vnode *vp, int sync)
 {
 	struct buf *bp, *nbp;
 	int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0);
-	int s;
+	bool dirty;
 
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	(void) VOP_PUTPAGES(vp, 0, 0, flags);
 
 loop:
-	s = splbio();
+	mutex_enter(&bufcache_lock);
 	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 		nbp = LIST_NEXT(bp, b_vnbufs);
-		simple_lock(&bp->b_interlock);
-		if ((bp->b_flags & B_BUSY)) {
-			simple_unlock(&bp->b_interlock);
+		if ((bp->b_cflags & BC_BUSY))
 			continue;
-		}
-		if ((bp->b_flags & B_DELWRI) == 0)
+		if ((bp->b_oflags & BO_DELWRI) == 0)
 			panic("vflushbuf: not dirty, bp %p", bp);
-		bp->b_flags |= B_BUSY | B_VFLUSH;
-		simple_unlock(&bp->b_interlock);
-		splx(s);
+		bp->b_cflags |= BC_BUSY | BC_VFLUSH;
+		mutex_exit(&bufcache_lock);
 		/*
 		 * Wait for I/O associated with indirect blocks to complete,
 		 * since there is no way to quickly wait for them below.
@@ -421,46 +445,49 @@ loop:
 			(void) bwrite(bp);
 		goto loop;
 	}
-	if (sync == 0) {
-		splx(s);
+	mutex_exit(&bufcache_lock);
+
+	if (sync == 0)
 		return;
-	}
-	simple_lock(&global_v_numoutput_slock);
-	while (vp->v_numoutput) {
-		vp->v_iflag |= VI_BWAIT;
-		ltsleep((void *)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0,
-			&global_v_numoutput_slock);
-	}
-	simple_unlock(&global_v_numoutput_slock);
-	splx(s);
-	if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
+
+	mutex_enter(&vp->v_interlock);
+	while (vp->v_numoutput != 0)
+		cv_wait(&vp->v_cv, &vp->v_interlock);
+	dirty = !LIST_EMPTY(&vp->v_dirtyblkhd);
+	mutex_exit(&vp->v_interlock);
+
+	if (dirty) {
 		vprint("vflushbuf: dirty", vp);
 		goto loop;
 	}
 }
 
 /*
- * Associate a buffer with a vnode.
+ * Associate a buffer with a vnode.  There must already be a hold on
+ * the vnode.
  */
 void
 bgetvp(struct vnode *vp, struct buf *bp)
 {
-	int s;
 
-	if (bp->b_vp)
-		panic("bgetvp: not free, bp %p", bp);
-	VHOLD(vp);
-	s = splbio();
+	KASSERT(bp->b_vp == NULL);
+	KASSERT(bp->b_objlock == &buffer_lock);
+	KASSERT(mutex_owned(&vp->v_interlock));
+	KASSERT(mutex_owned(&bufcache_lock));
+	KASSERT((bp->b_cflags & BC_BUSY) != 0);
+
+	vholdl(vp);
 	bp->b_vp = vp;
 	if (vp->v_type == VBLK || vp->v_type == VCHR)
 		bp->b_dev = vp->v_rdev;
 	else
 		bp->b_dev = NODEV;
+
 	/*
 	 * Insert onto list for new vnode.
 	 */
 	bufinsvn(bp, &vp->v_cleanblkhd);
-	splx(s);
+	bp->b_objlock = &vp->v_interlock;
 }
 
 /*
@@ -469,14 +496,14 @@ bgetvp(struct vnode *vp, struct buf *bp)
 void
 brelvp(struct buf *bp)
 {
-	struct vnode *vp;
-	int s;
+	struct vnode *vp = bp->b_vp;
 
-	if (bp->b_vp == NULL)
-		panic("brelvp: vp NULL, bp %p", bp);
+	KASSERT(vp != NULL);
+	KASSERT(bp->b_objlock == &vp->v_interlock);
+	KASSERT(mutex_owned(&vp->v_interlock));
+	KASSERT(mutex_owned(&bufcache_lock));
+	KASSERT((bp->b_cflags & BC_BUSY) != 0);
 
-	s = splbio();
-	vp = bp->b_vp;
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
@@ -489,50 +516,54 @@ brelvp(struct buf *bp)
 		vn_syncer_remove_from_worklist(vp);
 	}
 
+	bp->b_objlock = &buffer_lock;
 	bp->b_vp = NULL;
-	HOLDRELE(vp);
-	splx(s);
+	holdrelel(vp);
 }
 
 /*
- * Reassign a buffer from one vnode to another.
+ * Reassign a buffer from one vnode list to another.
+ * The list reassignment must be within the same vnode.
  * Used to assign file specific control information
- * (indirect blocks) to the vnode to which they belong.
- *
- * This function must be called at splbio().
+ * (indirect blocks) to the list to which they belong.
  */
 void
-reassignbuf(struct buf *bp, struct vnode *newvp)
+reassignbuf(struct buf *bp, struct vnode *vp)
 {
 	struct buflists *listheadp;
 	int delayx;
 
+	KASSERT(bp->b_objlock == &vp->v_interlock);
+	KASSERT(mutex_owned(&vp->v_interlock));
+	KASSERT((bp->b_cflags & BC_BUSY) != 0);
+
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
 	if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
 		bufremvn(bp);
+
 	/*
 	 * If dirty, put on list of dirty buffers;
 	 * otherwise insert onto list of clean buffers.
 	 */
-	if ((bp->b_flags & B_DELWRI) == 0) {
-		listheadp = &newvp->v_cleanblkhd;
-		if (TAILQ_EMPTY(&newvp->v_uobj.memq) &&
-		    (newvp->v_iflag & VI_ONWORKLST) &&
-		    LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) {
-			newvp->v_iflag &= ~VI_WRMAPDIRTY;
-			vn_syncer_remove_from_worklist(newvp);
+	if ((bp->b_oflags & BO_DELWRI) == 0) {
+		listheadp = &vp->v_cleanblkhd;
+		if (TAILQ_EMPTY(&vp->v_uobj.memq) &&
+		    (vp->v_iflag & VI_ONWORKLST) &&
+		    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
+			vp->v_iflag &= ~VI_WRMAPDIRTY;
+			vn_syncer_remove_from_worklist(vp);
 		}
 	} else {
-		listheadp = &newvp->v_dirtyblkhd;
-		if ((newvp->v_iflag & VI_ONWORKLST) == 0) {
-			switch (newvp->v_type) {
+		listheadp = &vp->v_dirtyblkhd;
+		if ((vp->v_iflag & VI_ONWORKLST) == 0) {
+			switch (vp->v_type) {
 			case VDIR:
 				delayx = dirdelay;
 				break;
 			case VBLK:
-				if (newvp->v_specmountpoint != NULL) {
+				if (vp->v_specmountpoint != NULL) {
 					delayx = metadelay;
 					break;
 				}
@@ -541,9 +572,9 @@ reassignbuf(struct buf *bp, struct vnode *newvp)
 				delayx = filedelay;
 				break;
 			}
-			if (!newvp->v_mount ||
-			    (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0)
-				vn_syncer_add_to_worklist(newvp, delayx);
+			if (!vp->v_mount ||
+			    (vp->v_mount->mnt_flag & MNT_ASYNC) == 0)
+				vn_syncer_add_to_worklist(vp, delayx);
 		}
 	}
 	bufinsvn(bp, listheadp);
@@ -654,11 +685,11 @@ vprint(const char *label, struct vnode *vp)
 		printf("%s: ", label);
 	printf("vnode @ %p, flags (%s)\n\ttag %s(%d), type %s(%d), "
 	    "usecount %d, writecount %ld, holdcount %ld\n"
-	    "\tmount %p, data %p\n", vp, bf,
+	    "\tfreelisthd %p, mount %p, data %p\n", vp, bf,
 	    ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
 	    ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
 	    vp->v_usecount, vp->v_writecount, vp->v_holdcnt,
-	    vp->v_mount, vp->v_data);
+	    vp->v_freelisthd, vp->v_mount, vp->v_data);
 	if (vp->v_data != NULL) {
 		printf("\t");
 		VOP_PRINT(vp);
@@ -972,14 +1003,15 @@ vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...))
 	    PRIx64 " dev 0x%x\n",
 	    bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_rawblkno, bp->b_dev);
 
-	bitmask_snprintf(bp->b_flags, buf_flagbits, bf, sizeof(bf));
+	bitmask_snprintf(bp->b_flags | bp->b_oflags | bp->b_cflags,
+	    buf_flagbits, bf, sizeof(bf));
 	(*pr)("  error %d flags 0x%s\n", bp->b_error, bf);
 
 	(*pr)("  bufsize 0x%lx bcount 0x%lx resid 0x%lx\n",
 		  bp->b_bufsize, bp->b_bcount, bp->b_resid);
 	(*pr)("  data %p saveaddr %p dep %p\n",
 		  bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep));
-	(*pr)("  iodone %p\n", bp->b_iodone);
+	(*pr)("  iodone %p objlock %p\n", bp->b_iodone, bp->b_objlock);
 }
 
 
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 1b3dbf6e24c5..fadd0dc24264 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_syscalls.c,v 1.337 2007/12/26 16:01:37 ad Exp $	*/
+/*	$NetBSD: vfs_syscalls.c,v 1.338 2008/01/02 11:48:56 ad Exp $	*/
 
 /*
  * Copyright (c) 1989, 1993
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.337 2007/12/26 16:01:37 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.338 2008/01/02 11:48:56 ad Exp $");
 
 #include "opt_compat_netbsd.h"
 #include "opt_compat_43.h"
@@ -305,7 +305,7 @@ mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops,
 
 	TAILQ_INIT(&mp->mnt_vnodelist);
 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
-	simple_lock_init(&mp->mnt_slock);
+	mutex_init(&mp->mnt_mutex, MUTEX_DEFAULT, IPL_NONE);
 	(void)vfs_busy(mp, LK_NOWAIT, 0);
 
 	mp->mnt_vnodecovered = vp;
@@ -665,8 +665,7 @@ dounmount(struct mount *mp, int flags, struct lwp *l)
 
 	mp->mnt_iflag |= IMNT_UNMOUNT;
 	mp->mnt_unmounter = l;
-	mutex_exit(&mountlist_lock);	/* XXX */
-	lockmgr(&mp->mnt_lock, LK_DRAIN, NULL);
+	lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_lock);
 
 	async = mp->mnt_flag & MNT_ASYNC;
 	mp->mnt_flag &= ~MNT_ASYNC;
@@ -690,20 +689,20 @@ dounmount(struct mount *mp, int flags, struct lwp *l)
 		mp->mnt_iflag &= ~IMNT_UNMOUNT;
 		mp->mnt_unmounter = NULL;
 		mp->mnt_flag |= async;
-		mutex_exit(&mountlist_lock);	/* XXX */
-		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_REENABLE,
-		    NULL);
+		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
+		    &mountlist_lock);
 		if (used_syncer)
 			mutex_exit(&syncer_mutex);
-		simple_lock(&mp->mnt_slock);
+		mutex_enter(&mp->mnt_mutex);
 		while (mp->mnt_wcnt > 0) {
 			wakeup(mp);
-			ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt1",
-				0, &mp->mnt_slock);
+			mtsleep(&mp->mnt_wcnt, PVFS, "mntwcnt1",
+				0, &mp->mnt_mutex);
 		}
-		simple_unlock(&mp->mnt_slock);
+		mutex_exit(&mp->mnt_mutex);
 		return (error);
 	}
+	vfs_scrubvnlist(mp);
 	mutex_enter(&mountlist_lock);
 	CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP)
@@ -711,18 +710,17 @@ dounmount(struct mount *mp, int flags, struct lwp *l)
 	if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
 		panic("unmount: dangling vnode");
 	mp->mnt_iflag |= IMNT_GONE;
-	mutex_exit(&mountlist_lock);
-	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL);
+	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_lock);
 	if (coveredvp != NULLVP)
 		vrele(coveredvp);
 	if (used_syncer)
 		mutex_exit(&syncer_mutex);
-	simple_lock(&mp->mnt_slock);
+	mutex_enter(&mp->mnt_mutex);
 	while (mp->mnt_wcnt > 0) {
 		wakeup(mp);
-		ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt2", 0, &mp->mnt_slock);
+		mtsleep(&mp->mnt_wcnt, PVFS, "mntwcnt2", 0, &mp->mnt_mutex);
 	}
-	simple_unlock(&mp->mnt_slock);
+	mutex_exit(&mp->mnt_mutex);
 	vfs_hooks_unmount(mp);
 	vfs_delref(mp->mnt_op);
 	vfs_destroy(mp);
@@ -3582,9 +3580,9 @@ sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval)
 	    (error = kauth_authorize_generic(l->l_cred,
 	    KAUTH_GENERIC_ISSUSER, NULL)) != 0)
 		goto out;
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	revoke = (vp->v_usecount > 1 || (vp->v_iflag & (VI_ALIASED|VI_LAYER)));
-	simple_unlock(&vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
 	if (revoke)
 		VOP_REVOKE(vp, REVOKEALL);
 out:
diff --git a/sys/kern/vfs_trans.c b/sys/kern/vfs_trans.c
index aa00af5024cd..989229f037d6 100644
--- a/sys/kern/vfs_trans.c
+++ b/sys/kern/vfs_trans.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_trans.c,v 1.15 2007/12/02 13:56:16 hannken Exp $	*/
+/*	$NetBSD: vfs_trans.c,v 1.16 2008/01/02 11:48:57 ad Exp $	*/
 
 /*-
  * Copyright (c) 2007 The NetBSD Foundation, Inc.
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.15 2007/12/02 13:56:16 hannken Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.16 2008/01/02 11:48:57 ad Exp $");
 
 /*
  * File system transaction operations.
@@ -51,7 +51,6 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.15 2007/12/02 13:56:16 hannken Exp $
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/malloc.h>
 #include <sys/kmem.h>
 #include <sys/mount.h>
 #include <sys/rwlock.h>
@@ -63,6 +62,11 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.15 2007/12/02 13:56:16 hannken Exp $
 #include <miscfs/specfs/specdev.h>
 #include <miscfs/syncfs/syncfs.h>
 
+struct fscow_handler {
+	SLIST_ENTRY(fscow_handler) ch_list;
+	int (*ch_func)(void *, struct buf *, bool);
+	void *ch_arg;
+};
 struct fstrans_lwp_info {
 	struct fstrans_lwp_info *fli_succ;
 	struct mount *fli_mount;
@@ -73,21 +77,17 @@ struct fstrans_mount_info {
 	enum fstrans_state fmi_state;
 	krwlock_t fmi_shared_lock;
 	krwlock_t fmi_lazy_lock;
+	krwlock_t fmi_cow_lock;
+	SLIST_HEAD(, fscow_handler) fmi_cow_handler;
 };
 
 static specificdata_key_t lwp_data_key;
-static specificdata_key_t mount_data_key;
-static specificdata_key_t mount_cow_key;
 static kmutex_t vfs_suspend_lock;	/* Serialize suspensions. */
-static kmutex_t fstrans_init_lock;
 
 POOL_INIT(fstrans_pl, sizeof(struct fstrans_lwp_info), 0, 0, 0,
     "fstrans", NULL, IPL_NONE);
 
 static void fstrans_lwp_dtor(void *);
-static void fstrans_mount_dtor(void *);
-static void fscow_mount_dtor(void *);
-static struct fstrans_mount_info *fstrans_mount_init(struct mount *);
 
 /*
  * Initialize
@@ -99,13 +99,8 @@ fstrans_init(void)
 
 	error = lwp_specific_key_create(&lwp_data_key, fstrans_lwp_dtor);
 	KASSERT(error == 0);
-	error = mount_specific_key_create(&mount_data_key, fstrans_mount_dtor);
-	KASSERT(error == 0);
-	error = mount_specific_key_create(&mount_cow_key, fscow_mount_dtor);
-	KASSERT(error == 0);
 
 	mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE);
-	mutex_init(&fstrans_init_lock, MUTEX_DEFAULT, IPL_NONE);
 }
 
 /*
@@ -124,44 +119,47 @@ fstrans_lwp_dtor(void *arg)
 	}
 }
 
+/*
+ * Allocate mount state
+ */
+int
+fstrans_mount(struct mount *mp)
+{
+	struct fstrans_mount_info *new;
+
+	if ((new = kmem_alloc(sizeof(*new), KM_SLEEP)) == NULL)
+		return ENOMEM;
+	new->fmi_state = FSTRANS_NORMAL;
+	rw_init(&new->fmi_lazy_lock);
+	rw_init(&new->fmi_shared_lock);
+	SLIST_INIT(&new->fmi_cow_handler);
+	rw_init(&new->fmi_cow_lock);
+
+	mp->mnt_transinfo = new;
+	mp->mnt_iflag |= IMNT_HAS_TRANS;
+
+	return 0;
+}
+
 /*
  * Deallocate mount state
  */
-static void
-fstrans_mount_dtor(void *arg)
+void
+fstrans_unmount(struct mount *mp)
 {
-	struct fstrans_mount_info *fmi = arg;
+	struct fstrans_mount_info *fmi;
+
+	if ((fmi = mp->mnt_transinfo) == NULL)
+		return;
 
 	KASSERT(fmi->fmi_state == FSTRANS_NORMAL);
 	rw_destroy(&fmi->fmi_lazy_lock);
 	rw_destroy(&fmi->fmi_shared_lock);
-	free(fmi, M_MOUNT);
-}
-
-/*
- * Create mount info for this mount
- */
-static struct fstrans_mount_info *
-fstrans_mount_init(struct mount *mp)
-{
-	struct fstrans_mount_info *new;
-
-	mutex_enter(&fstrans_init_lock);
-
-	if ((new = mount_getspecific(mp, mount_data_key)) != NULL) {
-		mutex_exit(&fstrans_init_lock);
-		return new;
-	}
-
-	new = malloc(sizeof(*new), M_MOUNT, M_WAITOK);
-	new->fmi_state = FSTRANS_NORMAL;
-	rw_init(&new->fmi_lazy_lock);
-	rw_init(&new->fmi_shared_lock);
-
-	mount_setspecific(mp, mount_data_key, new);
-	mutex_exit(&fstrans_init_lock);
-
-	return new;
+	KASSERT(SLIST_EMPTY(&fmi->fmi_cow_handler));
+	rw_destroy(&fmi->fmi_cow_lock);
+	kmem_free(fmi, sizeof(*fmi));
+	mp->mnt_iflag &= ~IMNT_HAS_TRANS;
+	mp->mnt_transinfo = NULL;
 }
 
 /*
@@ -209,8 +207,7 @@ _fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait)
 	KASSERT(new_fli->fli_mount == NULL);
 	KASSERT(new_fli->fli_count == 0);
 
-	if ((fmi = mount_getspecific(mp, mount_data_key)) == NULL)
-		fmi = fstrans_mount_init(mp);
+	fmi = mp->mnt_transinfo;
 
 	if (lock_type == FSTRANS_LAZY)
 		lock_p = &fmi->fmi_lazy_lock;
@@ -255,7 +252,7 @@ fstrans_done(struct mount *mp)
 	KASSERT(fli->fli_mount == mp);
 	KASSERT(fli->fli_count == 0);
 	fli->fli_mount = NULL;
-	fmi = mount_getspecific(mp, mount_data_key);
+	fmi = mp->mnt_transinfo;
 	KASSERT(fmi != NULL);
 	if (fli->fli_lock_type == FSTRANS_LAZY)
 		rw_exit(&fmi->fmi_lazy_lock);
@@ -296,8 +293,7 @@ fstrans_setstate(struct mount *mp, enum fstrans_state new_state)
 {
 	struct fstrans_mount_info *fmi;
 
-	if ((fmi = mount_getspecific(mp, mount_data_key)) == NULL)
-		fmi = fstrans_mount_init(mp);
+	fmi = mp->mnt_transinfo;
 
 	switch (new_state) {
 	case FSTRANS_SUSPENDING:
@@ -344,8 +340,7 @@ fstrans_getstate(struct mount *mp)
 {
 	struct fstrans_mount_info *fmi;
 
-	if ((fmi = mount_getspecific(mp, mount_data_key)) == NULL)
-		return FSTRANS_NORMAL;
+	fmi = mp->mnt_transinfo;
 
 	return fmi->fmi_state;
 }
@@ -432,7 +427,7 @@ fstrans_print_mount(struct mount *mp, int verbose)
 {
 	struct fstrans_mount_info *fmi;
 
-	fmi = mount_getspecific(mp, mount_data_key);
+	fmi = mp->mnt_transinfo;
 	if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL))
 		return;
 
@@ -483,76 +478,25 @@ fstrans_dump(int full)
 }
 #endif /* defined(DDB) */
 
-
-struct fscow_handler {
-	SLIST_ENTRY(fscow_handler) ch_list;
-	int (*ch_func)(void *, struct buf *, bool);
-	void *ch_arg;
-};
-
-struct fscow_mount_info {
-	krwlock_t cmi_lock;
-	SLIST_HEAD(, fscow_handler) cmi_handler;
-};
-
-/*
- * Deallocate mount state
- */
-static void
-fscow_mount_dtor(void *arg)
-{
-	struct fscow_mount_info *cmi = arg;
-
-	KASSERT(SLIST_EMPTY(&cmi->cmi_handler));
-	rw_destroy(&cmi->cmi_lock);
-	kmem_free(cmi, sizeof(*cmi));
-}
-
-/*
- * Create mount info for this mount
- */
-static struct fscow_mount_info *
-fscow_mount_init(struct mount *mp)
-{
-	struct fscow_mount_info *new;
-
-	mutex_enter(&fstrans_init_lock);
-
-	if ((new = mount_getspecific(mp, mount_cow_key)) != NULL) {
-		mutex_exit(&fstrans_init_lock);
-		return new;
-	}
-
-	if ((new = kmem_alloc(sizeof(*new), KM_SLEEP)) != NULL) {
-		SLIST_INIT(&new->cmi_handler);
-		rw_init(&new->cmi_lock);
-		mount_setspecific(mp, mount_cow_key, new);
-	}
-
-	mutex_exit(&fstrans_init_lock);
-
-	return new;
-}
-
 int
 fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool),
     void *arg)
 {
-	struct fscow_mount_info *cmi;
+	struct fstrans_mount_info *fmi;
 	struct fscow_handler *new;
 
-	if ((cmi = mount_getspecific(mp, mount_cow_key)) == NULL)
-		cmi = fscow_mount_init(mp);
-	if (cmi == NULL)
-		return ENOMEM;
+	if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
+		return EINVAL;
+
+	fmi = mp->mnt_transinfo;
 
 	if ((new = kmem_alloc(sizeof(*new), KM_SLEEP)) == NULL)
 		return ENOMEM;
 	new->ch_func = func;
 	new->ch_arg = arg;
-	rw_enter(&cmi->cmi_lock, RW_WRITER);
-	SLIST_INSERT_HEAD(&cmi->cmi_handler, new, ch_list);
-	rw_exit(&cmi->cmi_lock);
+	rw_enter(&fmi->fmi_cow_lock, RW_WRITER);
+	SLIST_INSERT_HEAD(&fmi->fmi_cow_handler, new, ch_list);
+	rw_exit(&fmi->fmi_cow_lock);
 
 	return 0;
 }
@@ -561,21 +505,23 @@ int
 fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool),
     void *arg)
 {
-	struct fscow_mount_info *cmi;
+	struct fstrans_mount_info *fmi;
 	struct fscow_handler *hp = NULL;
 
-	if ((cmi = mount_getspecific(mp, mount_cow_key)) == NULL)
+	if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
 		return EINVAL;
 
-	rw_enter(&cmi->cmi_lock, RW_WRITER);
-	SLIST_FOREACH(hp, &cmi->cmi_handler, ch_list)
+	fmi = mp->mnt_transinfo;
+
+	rw_enter(&fmi->fmi_cow_lock, RW_WRITER);
+	SLIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
 		if (hp->ch_func == func && hp->ch_arg == arg)
 			break;
 	if (hp != NULL) {
-		SLIST_REMOVE(&cmi->cmi_handler, hp, fscow_handler, ch_list);
+		SLIST_REMOVE(&fmi->fmi_cow_handler, hp, fscow_handler, ch_list);
 		kmem_free(hp, sizeof(*hp));
 	}
-	rw_exit(&cmi->cmi_lock);
+	rw_exit(&fmi->fmi_cow_lock);
 
 	return hp ? 0 : EINVAL;
 }
@@ -585,10 +531,10 @@ fscow_run(struct buf *bp, bool data_valid)
 {
 	int error = 0;
 	struct mount *mp;
-	struct fscow_mount_info *cmi;
+	struct fstrans_mount_info *fmi;
 	struct fscow_handler *hp;
 
-	if ((bp->b_flags & B_COWDONE))
+	if ((bp->b_oflags & BO_COWDONE))
 		goto done;
 	if (bp->b_vp == NULL)
 		goto done;
@@ -596,21 +542,23 @@ fscow_run(struct buf *bp, bool data_valid)
 		mp = bp->b_vp->v_specmountpoint;
 	else
 		mp = bp->b_vp->v_mount;
-	if (mp == NULL)
+	if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
 		goto done;
 
-	if ((cmi = mount_getspecific(mp, mount_cow_key)) == NULL)
-		goto done;
+	fmi = mp->mnt_transinfo;
 
-	rw_enter(&cmi->cmi_lock, RW_READER);
-	SLIST_FOREACH(hp, &cmi->cmi_handler, ch_list)
+	rw_enter(&fmi->fmi_cow_lock, RW_READER);
+	SLIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
 		if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0)
 			break;
-	rw_exit(&cmi->cmi_lock);
+	rw_exit(&fmi->fmi_cow_lock);
 
-done:
-	if (error == 0)
-		bp->b_flags |= B_COWDONE;
+ done:
+ 	if (error == 0) {
+ 		mutex_enter(bp->b_objlock);
+ 		bp->b_oflags |= BO_COWDONE;
+ 		mutex_exit(bp->b_objlock);
+ 	}
 
 	return error;
 }
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 67e2c87375c4..5738a12b5a2d 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_vnops.c,v 1.148 2007/12/08 19:29:50 pooka Exp $	*/
+/*	$NetBSD: vfs_vnops.c,v 1.149 2008/01/02 11:48:57 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.148 2007/12/08 19:29:50 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.149 2008/01/02 11:48:57 ad Exp $");
 
 #include "fs_union.h"
 #include "veriexec.h"
@@ -194,9 +194,9 @@ vn_open(struct nameidata *ndp, int fmode, int cmode)
 	if ((error = VOP_OPEN(vp, fmode, cred)) != 0)
 		goto bad;
 	if (fmode & FWRITE) {
-		simple_lock(&vp->v_interlock);
+		mutex_enter(&vp->v_interlock);
 		vp->v_writecount++;
-		simple_unlock(&vp->v_interlock);
+		mutex_exit(&vp->v_interlock);
 	}
 
 bad:
@@ -255,7 +255,7 @@ void
 vn_markexec(struct vnode *vp)
 {
 
-	LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
+	KASSERT(mutex_owned(&vp->v_interlock));
 
 	if ((vp->v_iflag & VI_EXECMAP) == 0) {
 		atomic_add_int(&uvmexp.filepages, -vp->v_uobj.uo_npages);
@@ -272,15 +272,15 @@ int
 vn_marktext(struct vnode *vp)
 {
 
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	if (vp->v_writecount != 0) {
 		KASSERT((vp->v_iflag & VI_TEXT) == 0);
-		simple_unlock(&vp->v_interlock);
+		mutex_exit(&vp->v_interlock);
 		return (ETXTBSY);
 	}
 	vp->v_iflag |= VI_TEXT;
 	vn_markexec(vp);
-	simple_unlock(&vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
 	return (0);
 }
 
@@ -294,7 +294,7 @@ vn_close(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l)
 {
 	int error;
 
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	if (flags & FWRITE)
 		vp->v_writecount--;
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK);
@@ -676,21 +676,20 @@ vn_lock(struct vnode *vp, int flags)
 	    || (vp->v_iflag & VI_ONWORKLST) != 0);
 #endif
 	KASSERT((flags &
-	    ~(LK_INTERLOCK|LK_SHARED|LK_EXCLUSIVE|LK_DRAIN|LK_NOWAIT|LK_RETRY|
-	    LK_SETRECURSE|LK_CANRECURSE))
+	    ~(LK_INTERLOCK|LK_SHARED|LK_EXCLUSIVE|LK_NOWAIT|LK_RETRY|
+	    LK_CANRECURSE))
 	    == 0);
 
 	do {
 		if ((flags & LK_INTERLOCK) == 0)
-			simple_lock(&vp->v_interlock);
+			mutex_enter(&vp->v_interlock);
 		if (vp->v_iflag & VI_XLOCK) {
 			if (flags & LK_NOWAIT) {
-				simple_unlock(&vp->v_interlock);
+				mutex_exit(&vp->v_interlock);
 				return EBUSY;
 			}
-			vp->v_iflag |= VI_XWANT;
-			ltsleep(vp, PINOD | PNORELOCK,
-			    "vn_lock", 0, &vp->v_interlock);
+			vwait(vp, VI_XLOCK);
+			mutex_exit(&vp->v_interlock);
 			error = ENOENT;
 		} else {
 			error = VOP_LOCK(vp,
@@ -720,13 +719,13 @@ vn_closefile(struct file *fp, struct lwp *l)
 u_int
 vn_setrecurse(struct vnode *vp)
 {
-	struct lock *lkp = &vp->v_lock;
+	struct lock *lkp = vp->v_vnlock;
 	u_int retval;
 
-	simple_lock(&lkp->lk_interlock);
+	mutex_enter(&lkp->lk_interlock);
 	retval = lkp->lk_flags & LK_CANRECURSE;
 	lkp->lk_flags |= LK_CANRECURSE;
-	simple_unlock(&lkp->lk_interlock);
+	mutex_exit(&lkp->lk_interlock);
 
 	return retval;
 }
@@ -737,12 +736,12 @@ vn_setrecurse(struct vnode *vp)
 void
 vn_restorerecurse(struct vnode *vp, u_int flags)
 {
-	struct lock *lkp = &vp->v_lock;
+	struct lock *lkp = vp->v_vnlock;
 
-	simple_lock(&lkp->lk_interlock);
+	mutex_enter(&lkp->lk_interlock);
 	lkp->lk_flags &= ~LK_CANRECURSE;
 	lkp->lk_flags |= flags;
-	simple_unlock(&lkp->lk_interlock);
+	mutex_exit(&lkp->lk_interlock);
 }
 
 /*
@@ -842,23 +841,23 @@ vn_ra_allocctx(struct vnode *vp)
 {
 	struct uvm_ractx *ra = NULL;
 
+	KASSERT(mutex_owned(&vp->v_interlock));
+
 	if (vp->v_type != VREG) {
 		return;
 	}
 	if (vp->v_ractx != NULL) {
 		return;
 	}
-	simple_lock(&vp->v_interlock);
 	if (vp->v_ractx == NULL) {
-		simple_unlock(&vp->v_interlock);
+		mutex_exit(&vp->v_interlock);
 		ra = uvm_ra_allocctx();
-		simple_lock(&vp->v_interlock);
+		mutex_enter(&vp->v_interlock);
 		if (ra != NULL && vp->v_ractx == NULL) {
 			vp->v_ractx = ra;
 			ra = NULL;
 		}
 	}
-	simple_unlock(&vp->v_interlock);
 	if (ra != NULL) {
 		uvm_ra_freectx(ra);
 	}
diff --git a/sys/kern/vnode_if.sh b/sys/kern/vnode_if.sh
index 919364ed9cc3..b50cc0fadec7 100644
--- a/sys/kern/vnode_if.sh
+++ b/sys/kern/vnode_if.sh
@@ -29,7 +29,7 @@ copyright="\
  * SUCH DAMAGE.
  */
 "
-SCRIPT_ID='$NetBSD: vnode_if.sh,v 1.47 2007/11/26 19:02:11 pooka Exp $'
+SCRIPT_ID='$NetBSD: vnode_if.sh,v 1.48 2008/01/02 11:48:57 ad Exp $'
 
 # Script to produce VFS front-end sugar.
 #
@@ -220,6 +220,8 @@ BEGIN	{
 	vop_offset = 1; # start at 1, to count the 'default' op
 
 	printf("\n/* Special cases: */\n#include <sys/buf.h>\n");
+	printf("#ifndef _KERNEL\n#include <stdbool.h>\n#endif\n\n");
+
 	argc=1;
 	argtype[0]="struct buf *";
 	argname[0]="bp";
diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src
index 5479cac10487..d70a0ec9be2a 100644
--- a/sys/kern/vnode_if.src
+++ b/sys/kern/vnode_if.src
@@ -1,4 +1,4 @@
-#	$NetBSD: vnode_if.src,v 1.55 2007/11/26 19:02:11 pooka Exp $
+#	$NetBSD: vnode_if.src,v 1.56 2008/01/02 11:48:58 ad Exp $
 #
 # Copyright (c) 1992, 1993
 #	The Regents of the University of California.  All rights reserved.
@@ -373,6 +373,7 @@ vop_abortop {
 #
 vop_inactive {
 	IN LOCKED=YES WILLUNLOCK struct vnode *vp;
+	INOUT bool *recycle;
 };
 
 #
diff --git a/sys/lib/libsa/ufs.c b/sys/lib/libsa/ufs.c
index 055ed409d0b3..12061eff6cec 100644
--- a/sys/lib/libsa/ufs.c
+++ b/sys/lib/libsa/ufs.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ufs.c,v 1.52 2007/12/01 17:44:16 tsutsui Exp $	*/
+/*	$NetBSD: ufs.c,v 1.53 2008/01/02 11:48:58 ad Exp $	*/
 
 /*-
  * Copyright (c) 1993
@@ -68,6 +68,7 @@
 #include <ufs/ufs/dir.h>
 #ifdef LIBSA_LFS
 #include <sys/queue.h>
+#include <sys/condvar.h>
 #include <sys/mount.h>			/* XXX for MNAMELEN */
 #include <ufs/lfs/lfs.h>
 #else
diff --git a/sys/miscfs/deadfs/dead_vnops.c b/sys/miscfs/deadfs/dead_vnops.c
index 96fbb5bf1230..ff9f25c98712 100644
--- a/sys/miscfs/deadfs/dead_vnops.c
+++ b/sys/miscfs/deadfs/dead_vnops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: dead_vnops.c,v 1.45 2007/10/10 20:42:28 ad Exp $	*/
+/*	$NetBSD: dead_vnops.c,v 1.46 2008/01/02 11:48:58 ad Exp $	*/
 
 /*
  * Copyright (c) 1989, 1993
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: dead_vnops.c,v 1.45 2007/10/10 20:42:28 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: dead_vnops.c,v 1.46 2008/01/02 11:48:58 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -300,7 +300,7 @@ dead_getpages(void *v)
 	} */ *ap = v;
 
 	if ((ap->a_flags & PGO_LOCKED) == 0)
-		simple_unlock(&ap->a_vp->v_interlock);
+		mutex_exit(&ap->a_vp->v_interlock);
 
 	return (EFAULT);
 }
@@ -317,13 +317,12 @@ chkvnlock(vp, interlock)
 	int locked = 0;
 
 	if (!interlock)
-		simple_lock(&vp->v_interlock);
+		mutex_enter(&vp->v_interlock);
 	while (vp->v_iflag & VI_XLOCK) {
-		vp->v_iflag |= VI_XWANT;
-		(void) tsleep(vp, PINOD, "deadchk", 0);
+		vwait(vp, VI_XLOCK);
 		locked = 1;
 	}
-	simple_unlock(&vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
 
 	return (locked);
 }
diff --git a/sys/miscfs/fdesc/fdesc_vfsops.c b/sys/miscfs/fdesc/fdesc_vfsops.c
index a2a140a0c721..8af010c5410e 100644
--- a/sys/miscfs/fdesc/fdesc_vfsops.c
+++ b/sys/miscfs/fdesc/fdesc_vfsops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: fdesc_vfsops.c,v 1.71 2007/11/26 19:02:12 pooka Exp $	*/
+/*	$NetBSD: fdesc_vfsops.c,v 1.72 2008/01/02 11:48:58 ad Exp $	*/
 
 /*
  * Copyright (c) 1992, 1993, 1995
@@ -41,7 +41,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fdesc_vfsops.c,v 1.71 2007/11/26 19:02:12 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fdesc_vfsops.c,v 1.72 2008/01/02 11:48:58 ad Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_compat_netbsd.h"
@@ -128,11 +128,7 @@ fdesc_unmount(struct mount *mp, int mntflags)
 		return (error);
 
 	/*
-	 * Release reference on underlying root vnode
-	 */
-	vrele(rtvp);
-	/*
-	 * And blow it away for future re-use
+	 * Blow it away for future re-use
 	 */
 	vgone(rtvp);
 	/*
diff --git a/sys/miscfs/genfs/genfs_io.c b/sys/miscfs/genfs/genfs_io.c
index 13da88a4d761..802ec58e1c76 100644
--- a/sys/miscfs/genfs/genfs_io.c
+++ b/sys/miscfs/genfs/genfs_io.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: genfs_io.c,v 1.1 2007/10/17 16:45:00 pooka Exp $	*/
+/*	$NetBSD: genfs_io.c,v 1.2 2008/01/02 11:48:59 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.1 2007/10/17 16:45:00 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.2 2008/01/02 11:48:59 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -81,9 +81,9 @@ genfs_rel_pages(struct vm_page **pgs, int npages)
 			pg->flags |= PG_RELEASED;
 		}
 	}
-	uvm_lock_pageq();
+	mutex_enter(&uvm_pageqlock);
 	uvm_page_unbusy(pgs, npages);
-	uvm_unlock_pageq();
+	mutex_exit(&uvm_pageqlock);
 }
 
 /*
@@ -178,7 +178,7 @@ startover:
 
 	if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= memeof) {
 		if ((flags & PGO_LOCKED) == 0) {
-			simple_unlock(&uobj->vmobjlock);
+			mutex_exit(&uobj->vmobjlock);
 		}
 		UVMHIST_LOG(ubchist, "off 0x%x count %d goes past EOF 0x%x",
 		    origoffset, *ap->a_count, memeof,0);
@@ -255,7 +255,7 @@ startover:
 		error = (ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0);
 		goto out_err;
 	}
-	simple_unlock(&uobj->vmobjlock);
+	mutex_exit(&uobj->vmobjlock);
 
 	/*
 	 * find the requested pages and make some simple checks.
@@ -312,7 +312,7 @@ startover:
 	} else {
 		rw_enter(&gp->g_glock, RW_READER);
 	}
-	simple_lock(&uobj->vmobjlock);
+	mutex_enter(&uobj->vmobjlock);
 	if (vp->v_size < origvsize) {
 		rw_exit(&gp->g_glock);
 		if (pgs != pgs_onstack)
@@ -325,7 +325,7 @@ startover:
 		rw_exit(&gp->g_glock);
 		KASSERT(async != 0);
 		genfs_rel_pages(&pgs[ridx], orignpages);
-		simple_unlock(&uobj->vmobjlock);
+		mutex_exit(&uobj->vmobjlock);
 		error = EBUSY;
 		goto out_err;
 	}
@@ -392,12 +392,12 @@ startover:
 			rw_exit(&gp->g_glock);
 			KASSERT(async != 0);
 			genfs_rel_pages(pgs, npages);
-			simple_unlock(&uobj->vmobjlock);
+			mutex_exit(&uobj->vmobjlock);
 			error = EBUSY;
 			goto out_err;
 		}
 	}
-	simple_unlock(&uobj->vmobjlock);
+	mutex_exit(&uobj->vmobjlock);
 
 	/*
 	 * read the desired page(s).
@@ -411,13 +411,18 @@ startover:
 	kva = uvm_pagermapin(pgs, npages,
 	    UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK);
 
-	mbp = getiobuf();
+	mbp = getiobuf(vp, true);
 	mbp->b_bufsize = totalbytes;
 	mbp->b_data = (void *)kva;
 	mbp->b_resid = mbp->b_bcount = bytes;
-	mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL|B_ASYNC : 0);
-	mbp->b_iodone = (async ? uvm_aio_biodone : 0);
-	mbp->b_vp = vp;
+	mbp->b_cflags = BC_BUSY;
+	if (async) {
+		mbp->b_flags = B_READ | B_ASYNC;
+		mbp->b_iodone = uvm_aio_biodone;
+	} else {
+		mbp->b_flags = B_READ;
+		mbp->b_iodone = NULL;
+	}	
 	if (async)
 		BIO_SETPRIO(mbp, BPRIO_TIMELIMITED);
 	else
@@ -545,7 +550,7 @@ startover:
 		if (offset == startoffset && iobytes == bytes) {
 			bp = mbp;
 		} else {
-			bp = getiobuf();
+			bp = getiobuf(vp, true);
 			nestiobuf_setup(mbp, bp, offset - startoffset, iobytes);
 		}
 		bp->b_lblkno = 0;
@@ -600,7 +605,7 @@ loopdone:
 		}
 	}
 	rw_exit(&gp->g_glock);
-	simple_lock(&uobj->vmobjlock);
+	mutex_enter(&uobj->vmobjlock);
 
 	/*
 	 * we're almost done!  release the pages...
@@ -620,10 +625,10 @@ loopdone:
 				pgs[i]->flags |= PG_RELEASED;
 			}
 		}
-		uvm_lock_pageq();
+		mutex_enter(&uvm_pageqlock);
 		uvm_page_unbusy(pgs, npages);
-		uvm_unlock_pageq();
-		simple_unlock(&uobj->vmobjlock);
+		mutex_exit(&uvm_pageqlock);
+		mutex_exit(&uobj->vmobjlock);
 		UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0);
 		goto out_err;
 	}
@@ -631,7 +636,7 @@ loopdone:
 out:
 	UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0);
 	error = 0;
-	uvm_lock_pageq();
+	mutex_enter(&uvm_pageqlock);
 	for (i = 0; i < npages; i++) {
 		pg = pgs[i];
 		if (pg == NULL) {
@@ -663,8 +668,8 @@ out:
 			UVM_PAGE_OWN(pg, NULL);
 		}
 	}
-	uvm_unlock_pageq();
-	simple_unlock(&uobj->vmobjlock);
+	mutex_exit(&uvm_pageqlock);
+	mutex_exit(&uobj->vmobjlock);
 	if (ap->a_m != NULL) {
 		memcpy(ap->a_m, &pgs[ridx],
 		    orignpages * sizeof(struct vm_page *));
@@ -745,11 +750,11 @@ genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags,
 	struct vm_page **busypg)
 {
 	struct uvm_object *uobj = &vp->v_uobj;
-	struct simplelock *slock = &uobj->vmobjlock;
+	kmutex_t *slock = &uobj->vmobjlock;
 	off_t off;
 	/* Even for strange MAXPHYS, the shift rounds down to a page */
 #define maxpages (MAXPHYS >> PAGE_SHIFT)
-	int i, s, error, npages, nback;
+	int i, error, npages, nback;
 	int freeflag;
 	struct vm_page *pgs[maxpages], *pg, *nextpg, *tpg, curmp, endmp;
 	bool wasclean, by_list, needs_clean, yld;
@@ -774,14 +779,12 @@ genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags,
 	KASSERT((vp->v_iflag & VI_ONWORKLST) != 0 ||
 	    (vp->v_iflag & VI_WRMAPDIRTY) == 0);
 	if (uobj->uo_npages == 0) {
-		s = splbio();
 		if (vp->v_iflag & VI_ONWORKLST) {
 			vp->v_iflag &= ~VI_WRMAPDIRTY;
 			if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL)
 				vn_syncer_remove_from_worklist(vp);
 		}
-		splx(s);
-		simple_unlock(slock);
+		mutex_exit(slock);
 		return (0);
 	}
 
@@ -790,7 +793,7 @@ genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags,
 	 */
 
 	if ((flags & PGO_CLEANIT) != 0) {
-		simple_unlock(slock);
+		mutex_exit(slock);
 		if (pagedaemon) {
 			error = fstrans_start_nowait(vp->v_mount, FSTRANS_LAZY);
 			if (error)
@@ -798,15 +801,11 @@ genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags,
 		} else
 			fstrans_start(vp->v_mount, FSTRANS_LAZY);
 		has_trans = true;
-		simple_lock(slock);
+		mutex_enter(slock);
 	}
 
 	error = 0;
-	s = splbio();
-	simple_lock(&global_v_numoutput_slock);
 	wasclean = (vp->v_numoutput == 0);
-	simple_unlock(&global_v_numoutput_slock);
-	splx(s);
 	off = startoff;
 	if (endoff == 0 || flags & PGO_ALLPAGES) {
 		endoff = trunc_page(LLONG_MAX);
@@ -917,13 +916,13 @@ genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags,
 				    TAILQ_NEXT(&curmp, listq), 0,0,0);
 			}
 			if (yld) {
-				simple_unlock(slock);
+				mutex_exit(slock);
 				preempt();
-				simple_lock(slock);
+				mutex_enter(slock);
 			} else {
 				pg->flags |= PG_WANTED;
 				UVM_UNLOCK_AND_WAIT(pg, slock, 0, "genput", 0);
-				simple_lock(slock);
+				mutex_enter(slock);
 			}
 			if (by_list) {
 				UVMHIST_LOG(ubchist, "after next %p",
@@ -1035,7 +1034,7 @@ genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags,
 		 */
 
 		if (flags & (PGO_DEACTIVATE|PGO_FREE)) {
-			uvm_lock_pageq();
+			mutex_enter(&uvm_pageqlock);
 		}
 		for (i = 0; i < npages; i++) {
 			tpg = pgs[i];
@@ -1052,7 +1051,7 @@ genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags,
 				if (tpg->flags & PG_BUSY) {
 					tpg->flags |= freeflag;
 					if (pagedaemon) {
-						uvmexp.paging++;
+						uvm_pageout_start(1);
 						uvm_pagedequeue(tpg);
 					}
 				} else {
@@ -1071,7 +1070,7 @@ genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags,
 			}
 		}
 		if (flags & (PGO_DEACTIVATE|PGO_FREE)) {
-			uvm_unlock_pageq();
+			mutex_exit(&uvm_pageqlock);
 		}
 		if (needs_clean) {
 			modified = true;
@@ -1085,9 +1084,9 @@ genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags,
 				TAILQ_INSERT_AFTER(&uobj->memq, pg, &curmp,
 				    listq);
 			}
-			simple_unlock(slock);
+			mutex_exit(slock);
 			error = GOP_WRITE(vp, pgs, npages, flags);
-			simple_lock(slock);
+			mutex_enter(slock);
 			if (by_list) {
 				pg = TAILQ_NEXT(&curmp, listq);
 				TAILQ_REMOVE(&uobj->memq, &curmp, listq);
@@ -1135,35 +1134,23 @@ genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags,
 	 * and we're doing sync i/o, wait for all writes to finish.
 	 */
 
-	s = splbio();
 	if (cleanall && wasclean && gp->g_dirtygen == dirtygen &&
 	    (vp->v_iflag & VI_ONWORKLST) != 0) {
 		vp->v_iflag &= ~VI_WRMAPDIRTY;
 		if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL)
 			vn_syncer_remove_from_worklist(vp);
 	}
-	splx(s);
 
 #if !defined(DEBUG)
 skip_scan:
 #endif /* !defined(DEBUG) */
-	if (!wasclean && !async) {
-		s = splbio();
-		/*
-		 * XXX - we want simple_unlock(&global_v_numoutput_slock);
-		 *	 but the slot in ltsleep() is taken!
-		 * XXX - try to recover from missed wakeups with a timeout..
-		 *	 must think of something better.
-		 */
-		while (vp->v_numoutput != 0) {
-			vp->v_iflag |= VI_BWAIT;
-			UVM_UNLOCK_AND_WAIT(&vp->v_numoutput, slock, false,
-			    "genput2", hz);
-			simple_lock(slock);
-		}
-		splx(s);
+
+	/* Wait for output to complete. */
+	if (!wasclean && !async && vp->v_numoutput != 0) {
+		while (vp->v_numoutput != 0)
+			cv_wait(&vp->v_cv, slock);
 	}
-	simple_unlock(slock);
+	mutex_exit(slock);
 
 	if (has_trans)
 		fstrans_done(vp->v_mount);
@@ -1235,21 +1222,24 @@ genfs_do_io(struct vnode *vp, off_t off, vaddr_t kva, size_t len, int flags,
 	KASSERT(bytes != 0);
 
 	if (write) {
-		s = splbio();
-		simple_lock(&global_v_numoutput_slock);
+		mutex_enter(&vp->v_interlock);
 		vp->v_numoutput += 2;
-		simple_unlock(&global_v_numoutput_slock);
-		splx(s);
+		mutex_exit(&vp->v_interlock);
 	}
-	mbp = getiobuf();
+	mbp = getiobuf(vp, true);
 	UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x",
 	    vp, mbp, vp->v_numoutput, bytes);
 	mbp->b_bufsize = len;
 	mbp->b_data = (void *)kva;
 	mbp->b_resid = mbp->b_bcount = bytes;
-	mbp->b_flags = B_BUSY | brw | B_AGE | (async ? (B_CALL | B_ASYNC) : 0);
-	mbp->b_iodone = iodone;
-	mbp->b_vp = vp;
+	mbp->b_cflags = BC_BUSY | BC_AGE;
+	if (async) {
+		mbp->b_flags = brw | B_ASYNC;
+		mbp->b_iodone = iodone;
+	} else {
+		mbp->b_flags = brw;
+		mbp->b_iodone = NULL;
+	}
 	if (curlwp == uvm.pagedaemon_lwp)
 		BIO_SETPRIO(mbp, BPRIO_TIMELIMITED);
 	else if (async)
@@ -1287,7 +1277,7 @@ genfs_do_io(struct vnode *vp, off_t off, vaddr_t kva, size_t len, int flags,
 		} else {
 			UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",
 			    vp, bp, vp->v_numoutput, 0);
-			bp = getiobuf();
+			bp = getiobuf(vp, true);
 			nestiobuf_setup(mbp, bp, offset - startoffset, iobytes);
 		}
 		bp->b_lblkno = 0;
@@ -1334,7 +1324,7 @@ genfs_null_putpages(void *v)
 	struct vnode *vp = ap->a_vp;
 
 	KASSERT(vp->v_uobj.uo_npages == 0);
-	simple_unlock(&vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
 	return (0);
 }
 
@@ -1378,16 +1368,16 @@ genfs_compat_getpages(void *v)
 		return (ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0);
 	}
 	if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= vp->v_size) {
-		simple_unlock(&uobj->vmobjlock);
+		mutex_exit(&uobj->vmobjlock);
 		return (EINVAL);
 	}
 	if ((ap->a_flags & PGO_SYNCIO) == 0) {
-		simple_unlock(&uobj->vmobjlock);
+		mutex_exit(&uobj->vmobjlock);
 		return 0;
 	}
 	npages = orignpages;
 	uvn_findpages(uobj, origoffset, &npages, pgs, UFP_ALL);
-	simple_unlock(&uobj->vmobjlock);
+	mutex_exit(&uobj->vmobjlock);
 	kva = uvm_pagermapin(pgs, npages,
 	    UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK);
 	for (i = 0; i < npages; i++) {
@@ -1413,8 +1403,8 @@ genfs_compat_getpages(void *v)
 		}
 	}
 	uvm_pagermapout(kva, npages);
-	simple_lock(&uobj->vmobjlock);
-	uvm_lock_pageq();
+	mutex_enter(&uobj->vmobjlock);
+	mutex_enter(&uvm_pageqlock);
 	for (i = 0; i < npages; i++) {
 		pg = pgs[i];
 		if (error && (pg->flags & PG_FAKE) != 0) {
@@ -1427,8 +1417,8 @@ genfs_compat_getpages(void *v)
 	if (error) {
 		uvm_page_unbusy(pgs, npages);
 	}
-	uvm_unlock_pageq();
-	simple_unlock(&uobj->vmobjlock);
+	mutex_exit(&uvm_pageqlock);
+	mutex_exit(&uobj->vmobjlock);
 	return (error);
 }
 
@@ -1442,7 +1432,7 @@ genfs_compat_gop_write(struct vnode *vp, struct vm_page **pgs, int npages,
 	kauth_cred_t cred = curlwp->l_cred;
 	struct buf *bp;
 	vaddr_t kva;
-	int s, error;
+	int error;
 
 	offset = pgs[0]->offset;
 	kva = uvm_pagermapin(pgs, npages,
@@ -1459,13 +1449,12 @@ genfs_compat_gop_write(struct vnode *vp, struct vm_page **pgs, int npages,
 	/* XXX vn_lock */
 	error = VOP_WRITE(vp, &uio, 0, cred);
 
-	s = splbio();
-	V_INCR_NUMOUTPUT(vp);
-	splx(s);
+	mutex_enter(&vp->v_interlock);
+	vp->v_numoutput++;
+	mutex_exit(&vp->v_interlock);
 
-	bp = getiobuf();
-	bp->b_flags = B_BUSY | B_WRITE | B_AGE;
-	bp->b_vp = vp;
+	bp = getiobuf(vp, true);
+	bp->b_cflags = BC_BUSY | BC_AGE;
 	bp->b_lblkno = offset >> vp->v_mount->mnt_fs_bshift;
 	bp->b_data = (char *)kva;
 	bp->b_bcount = npages << PAGE_SHIFT;
@@ -1568,15 +1557,14 @@ genfs_directio(struct vnode *vp, struct uio *uio, int ioflag)
 static void
 genfs_dio_iodone(struct buf *bp)
 {
-	int s;
 
 	KASSERT((bp->b_flags & B_ASYNC) == 0);
-	s = splbio();
-	if ((bp->b_flags & (B_READ | B_AGE)) == B_AGE) {
+	if ((bp->b_flags & B_READ) == 0 && (bp->b_cflags & BC_AGE) != 0) {
+		mutex_enter(bp->b_objlock);
 		vwakeup(bp);
+		mutex_exit(bp->b_objlock);
 	}
 	putiobuf(bp);
-	splx(s);
 }
 
 /*
@@ -1631,7 +1619,7 @@ genfs_do_directio(struct vmspace *vs, vaddr_t uva, size_t len, struct vnode *vp,
 
 	spoff = trunc_page(off);
 	epoff = round_page(off + len);
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	error = VOP_PUTPAGES(vp, spoff, epoff, pgoflags);
 	if (error) {
 		return error;
@@ -1683,3 +1671,4 @@ genfs_do_directio(struct vmspace *vs, vaddr_t uva, size_t len, struct vnode *vp,
 	uvm_vsunlock(vs, (void *)uva, len);
 	return error;
 }
+
diff --git a/sys/miscfs/genfs/genfs_vnops.c b/sys/miscfs/genfs/genfs_vnops.c
index a548160741d5..35f6791b4a2f 100644
--- a/sys/miscfs/genfs/genfs_vnops.c
+++ b/sys/miscfs/genfs/genfs_vnops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: genfs_vnops.c,v 1.159 2007/12/05 17:19:59 pooka Exp $	*/
+/*	$NetBSD: genfs_vnops.c,v 1.160 2008/01/02 11:48:59 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: genfs_vnops.c,v 1.159 2007/12/05 17:19:59 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: genfs_vnops.c,v 1.160 2008/01/02 11:48:59 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -219,56 +219,46 @@ genfs_revoke(void *v)
 		struct vnode *a_vp;
 		int a_flags;
 	} */ *ap = v;
-	struct vnode *vp, *vq;
-	struct lwp *l = curlwp;		/* XXX */
+	struct vnode *vp, *vq, **vpp;
+	enum vtype type;
+	dev_t dev;
 
 #ifdef DIAGNOSTIC
 	if ((ap->a_flags & REVOKEALL) == 0)
 		panic("genfs_revoke: not revokeall");
 #endif
-
 	vp = ap->a_vp;
-	simple_lock(&vp->v_interlock);
 
-	if (vp->v_iflag & VI_ALIASED) {
-		/*
-		 * If a vgone (or vclean) is already in progress,
-		 * wait until it is done and return.
-		 */
-		if (vp->v_iflag & VI_XLOCK) {
-			vp->v_iflag |= VI_XWANT;
-			ltsleep(vp, PINOD|PNORELOCK, "vop_revokeall", 0,
-				&vp->v_interlock);
-			return (0);
-		}
-		/*
-		 * Ensure that vp will not be vgone'd while we
-		 * are eliminating its aliases.
-		 */
-		vp->v_iflag |= VI_XLOCK;
-		simple_unlock(&vp->v_interlock);
-		while (vp->v_iflag & VI_ALIASED) {
-			simple_lock(&spechash_slock);
-			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
-				if (vq->v_rdev != vp->v_rdev ||
-				    vq->v_type != vp->v_type || vp == vq)
-					continue;
-				simple_unlock(&spechash_slock);
-				vgone(vq);
-				break;
-			}
-			if (vq == NULLVP)
-				simple_unlock(&spechash_slock);
-		}
-		/*
-		 * Remove the lock so that vgone below will
-		 * really eliminate the vnode after which time
-		 * vgone will awaken any sleepers.
-		 */
-		simple_lock(&vp->v_interlock);
-		vp->v_iflag &= ~VI_XLOCK;
+	mutex_enter(&vp->v_interlock);
+	if ((vp->v_iflag & VI_CLEAN) != 0) {
+		mutex_exit(&vp->v_interlock);
+		return (0);
+	} else {
+		dev = vp->v_rdev;
+		type = vp->v_type;
+		mutex_exit(&vp->v_interlock);
 	}
-	vgonel(vp, l);
+
+	if (type != VBLK && type != VCHR)
+		return (0);
+
+	vpp = &speclisth[SPECHASH(dev)];
+	mutex_enter(&spechash_lock);
+	for (vq = *vpp; vq != NULL;) {
+		if (vq->v_rdev != dev || vq->v_type != type) {
+			vq = vq->v_specnext;
+			continue;
+		}
+		mutex_enter(&vq->v_interlock);
+		mutex_exit(&spechash_lock);
+		vq->v_usecount++;
+		vclean(vq, DOCLOSE);
+		vrelel(vq, 1, 0);
+		mutex_enter(&spechash_lock);
+		vq = *vpp;
+	}
+	mutex_exit(&spechash_lock);
+
 	return (0);
 }
 
@@ -334,7 +324,7 @@ genfs_nolock(void *v)
 	 * the interlock here.
 	 */
 	if (ap->a_flags & LK_INTERLOCK)
-		simple_unlock(&ap->a_vp->v_interlock);
+		mutex_exit(&ap->a_vp->v_interlock);
 	return (0);
 }
 
diff --git a/sys/miscfs/genfs/layer_subr.c b/sys/miscfs/genfs/layer_subr.c
index 39b55aef4ca7..75bf9c017dc1 100644
--- a/sys/miscfs/genfs/layer_subr.c
+++ b/sys/miscfs/genfs/layer_subr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: layer_subr.c,v 1.22 2007/10/10 20:42:28 ad Exp $	*/
+/*	$NetBSD: layer_subr.c,v 1.23 2008/01/02 11:48:59 ad Exp $	*/
 
 /*
  * Copyright (c) 1999 National Aeronautics & Space Administration
@@ -68,7 +68,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: layer_subr.c,v 1.22 2007/10/10 20:42:28 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: layer_subr.c,v 1.23 2008/01/02 11:48:59 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -77,7 +77,9 @@ __KERNEL_RCSID(0, "$NetBSD: layer_subr.c,v 1.22 2007/10/10 20:42:28 ad Exp $");
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
+#include <sys/kmem.h>
 #include <sys/malloc.h>
+
 #include <miscfs/specfs/specdev.h>
 #include <miscfs/genfs/layer.h>
 #include <miscfs/genfs/layer_extern.h>
@@ -157,8 +159,9 @@ loop:
 			 * the layer vp's lock separately afterward, but only
 			 * if it does not share the lower vp's lock.
 			 */
+			mutex_enter(&vp->v_interlock);
 			mutex_exit(&lmp->layerm_hashlock);
-			error = vget(vp, 0);
+			error = vget(vp, LK_INTERLOCK);
 			if (error) {
 				mutex_enter(&lmp->layerm_hashlock);
 				goto loop;
@@ -189,15 +192,19 @@ layer_node_alloc(mp, lowervp, vpp)
 	int error;
 	extern int (**dead_vnodeop_p)(void *);
 
-	if ((error = getnewvnode(lmp->layerm_tag, mp, lmp->layerm_vnodeop_p,
-			&vp)) != 0)
+	error = getnewvnode(lmp->layerm_tag, mp, lmp->layerm_vnodeop_p, &vp);
+	if (error != 0)
 		return (error);
 	vp->v_type = lowervp->v_type;
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	vp->v_iflag |= VI_LAYER;
-	simple_unlock(&vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
 
-	xp = malloc(lmp->layerm_size, M_TEMP, M_WAITOK);
+	xp = kmem_alloc(lmp->layerm_size, KM_SLEEP);
+	if (xp == NULL) {
+		ungetnewvnode(vp);
+		return ENOMEM;
+	}
 	if (vp->v_type == VBLK || vp->v_type == VCHR) {
 		MALLOC(vp->v_specinfo, struct specinfo *,
 		    sizeof(struct specinfo), M_VNODE, M_WAITOK);
@@ -206,6 +213,8 @@ layer_node_alloc(mp, lowervp, vpp)
 	}
 
 	vp->v_data = xp;
+	vp->v_vflag = (vp->v_vflag & ~VV_MPSAFE) |
+	    (lowervp->v_vflag & VV_MPSAFE);
 	xp->layer_vnode = vp;
 	xp->layer_lowervp = lowervp;
 	xp->layer_flags = 0;
@@ -220,7 +229,7 @@ layer_node_alloc(mp, lowervp, vpp)
 		*vpp = nvp;
 
 		/* free the substructures we've allocated. */
-		FREE(xp, M_TEMP);
+		kmem_free(xp, lmp->layerm_size);
 		if (vp->v_type == VBLK || vp->v_type == VCHR)
 			FREE(vp->v_specinfo, M_VNODE);
 
diff --git a/sys/miscfs/genfs/layer_vnops.c b/sys/miscfs/genfs/layer_vnops.c
index 61d9666f3044..ff59798b820d 100644
--- a/sys/miscfs/genfs/layer_vnops.c
+++ b/sys/miscfs/genfs/layer_vnops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: layer_vnops.c,v 1.33 2007/12/22 00:48:46 dyoung Exp $	*/
+/*	$NetBSD: layer_vnops.c,v 1.34 2008/01/02 11:49:00 ad Exp $	*/
 
 /*
  * Copyright (c) 1999 National Aeronautics & Space Administration
@@ -232,7 +232,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: layer_vnops.c,v 1.33 2007/12/22 00:48:46 dyoung Exp $");
+__KERNEL_RCSID(0, "$NetBSD: layer_vnops.c,v 1.34 2008/01/02 11:49:00 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -241,7 +241,7 @@ __KERNEL_RCSID(0, "$NetBSD: layer_vnops.c,v 1.33 2007/12/22 00:48:46 dyoung Exp
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
-#include <sys/malloc.h>
+#include <sys/kmem.h>
 #include <sys/buf.h>
 #include <sys/kauth.h>
 
@@ -619,12 +619,7 @@ layer_lock(v)
 		 * going away doesn't mean the struct lock below us is.
 		 * LK_EXCLUSIVE is fine.
 		 */
-		if ((flags & LK_TYPE_MASK) == LK_DRAIN) {
-			return(lockmgr(vp->v_vnlock,
-				(flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE,
-				&vp->v_interlock));
-		} else
-			return(lockmgr(vp->v_vnlock, flags, &vp->v_interlock));
+		return (lockmgr(vp->v_vnlock, flags, &vp->v_interlock));
 	} else {
 		/*
 		 * Ahh well. It would be nice if the fs we're over would
@@ -634,19 +629,14 @@ layer_lock(v)
 		 * on "..", we have to lock the lower node, then lock our
 		 * node. Most of the time it won't matter that we lock our
 		 * node (as any locking would need the lower one locked
-		 * first). But we can LK_DRAIN the upper lock as a step
-		 * towards decomissioning it.
+		 * first).
 		 */
 		lowervp = LAYERVPTOLOWERVP(vp);
 		if (flags & LK_INTERLOCK) {
-			simple_unlock(&vp->v_interlock);
+			mutex_exit(&vp->v_interlock);
 			flags &= ~LK_INTERLOCK;
 		}
-		if ((flags & LK_TYPE_MASK) == LK_DRAIN) {
-			error = VOP_LOCK(lowervp,
-				(flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE);
-		} else
-			error = VOP_LOCK(lowervp, flags);
+		error = VOP_LOCK(lowervp, flags);
 		if (error)
 			return (error);
 		if ((error = lockmgr(&vp->v_lock, flags, &vp->v_interlock))) {
@@ -675,7 +665,7 @@ layer_unlock(v)
 			&vp->v_interlock));
 	} else {
 		if (flags & LK_INTERLOCK) {
-			simple_unlock(&vp->v_interlock);
+			mutex_exit(&vp->v_interlock);
 			flags &= ~LK_INTERLOCK;
 		}
 		VOP_UNLOCK(LAYERVPTOLOWERVP(vp), flags);
@@ -740,10 +730,17 @@ layer_inactive(v)
 {
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
-		struct lwp *a_l;
+		bool *a_recycle;
 	} */ *ap = v;
 	struct vnode *vp = ap->a_vp;
 
+	/*
+	 * ..., but don't cache the device node. Also, if we did a
+	 * remove, don't cache the node.
+	 */
+	*ap->a_recycle = (vp->v_type == VBLK || vp->v_type == VCHR
+	    || (VTOLAYER(vp)->layer_flags & LAYERFS_REMOVED));
+
 	/*
 	 * Do nothing (and _don't_ bypass).
 	 * Wait to vrele lowervp until reclaim,
@@ -758,13 +755,6 @@ layer_inactive(v)
 	 */
 	VOP_UNLOCK(vp, 0);
 
-	/*
-	 * ..., but don't cache the device node. Also, if we did a
-	 * remove, don't cache the node.
-	 */
-	if (vp->v_type == VBLK || vp->v_type == VCHR
-	    || (VTOLAYER(vp)->layer_flags & LAYERFS_REMOVED))
-		vgone(vp);
 	return (0);
 }
 
@@ -861,9 +851,7 @@ layer_reclaim(v)
 	/*
 	 * Note: in vop_reclaim, the node's struct lock has been
 	 * decomissioned, so we have to be careful about calling
-	 * VOP's on ourself. Even if we turned a LK_DRAIN into an
-	 * LK_EXCLUSIVE in layer_lock, we still must be careful as VXLOCK is
-	 * set.
+	 * VOP's on ourself.  We must be careful as VXLOCK is set.
 	 */
 	/* After this assignment, this node will not be re-used. */
 	if ((vp == lmp->layerm_rootvp)) {
@@ -879,9 +867,10 @@ layer_reclaim(v)
 	mutex_enter(&lmp->layerm_hashlock);
 	LIST_REMOVE(xp, layer_hash);
 	mutex_exit(&lmp->layerm_hashlock);
-	FREE(vp->v_data, M_TEMP);
+	kmem_free(vp->v_data, lmp->layerm_size);
 	vp->v_data = NULL;
 	vrele(lowervp);
+
 	return (0);
 }
 
@@ -971,8 +960,8 @@ layer_getpages(v)
 		return EBUSY;
 	}
 	ap->a_vp = LAYERVPTOLOWERVP(vp);
-	simple_unlock(&vp->v_interlock);
-	simple_lock(&ap->a_vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
+	mutex_enter(&ap->a_vp->v_interlock);
 	error = VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);
 	return error;
 }
@@ -995,11 +984,11 @@ layer_putpages(v)
 	 */
 
 	ap->a_vp = LAYERVPTOLOWERVP(vp);
-	simple_unlock(&vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
 	if (ap->a_flags & PGO_RECLAIM) {
 		return 0;
 	}
-	simple_lock(&ap->a_vp->v_interlock);
+	mutex_enter(&ap->a_vp->v_interlock);
 	error = VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);
 	return error;
 }
diff --git a/sys/miscfs/kernfs/kernfs_subr.c b/sys/miscfs/kernfs/kernfs_subr.c
index 4084fd88dc4d..82dc21085a02 100644
--- a/sys/miscfs/kernfs/kernfs_subr.c
+++ b/sys/miscfs/kernfs/kernfs_subr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: kernfs_subr.c,v 1.13 2007/10/10 20:42:29 ad Exp $	*/
+/*	$NetBSD: kernfs_subr.c,v 1.14 2008/01/02 11:49:00 ad Exp $	*/
 
 /*
  * Copyright (c) 1993
@@ -73,7 +73,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kernfs_subr.c,v 1.13 2007/10/10 20:42:29 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kernfs_subr.c,v 1.14 2008/01/02 11:49:00 ad Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_ipsec.h"
@@ -326,7 +326,7 @@ kernfs_hashget(type, mp, kt, value)
 		vp = KERNFSTOV(pp);
 		if (pp->kfs_type == type && vp->v_mount == mp &&
 		    pp->kfs_kt == kt && pp->kfs_value == value) {
-			simple_lock(&vp->v_interlock);
+			mutex_enter(&vp->v_interlock);
 			mutex_exit(&kfs_ihash_lock);
 			if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK))
 				goto loop;
diff --git a/sys/miscfs/kernfs/kernfs_vnops.c b/sys/miscfs/kernfs/kernfs_vnops.c
index c29294824f4d..f04492d5cdca 100644
--- a/sys/miscfs/kernfs/kernfs_vnops.c
+++ b/sys/miscfs/kernfs/kernfs_vnops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: kernfs_vnops.c,v 1.133 2007/11/26 19:02:14 pooka Exp $	*/
+/*	$NetBSD: kernfs_vnops.c,v 1.134 2008/01/02 11:49:00 ad Exp $	*/
 
 /*
  * Copyright (c) 1992, 1993
@@ -39,7 +39,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kernfs_vnops.c,v 1.133 2007/11/26 19:02:14 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kernfs_vnops.c,v 1.134 2008/01/02 11:49:00 ad Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_ipsec.h"
@@ -1407,6 +1407,7 @@ kernfs_inactive(v)
 {
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
+		bool *a_recycle;
 	} */ *ap = v;
 	struct vnode *vp = ap->a_vp;
 	const struct kernfs_node *kfs = VTOKERN(ap->a_vp);
@@ -1415,7 +1416,7 @@ kernfs_inactive(v)
 	struct secpolicy *sp;
 #endif
 
-	VOP_UNLOCK(vp, 0);
+	*ap->a_recycle = false;
 	switch (kfs->kfs_type) {
 #ifdef IPSEC
 	case KFSipsecsa:
@@ -1423,21 +1424,21 @@ kernfs_inactive(v)
 		if (m)
 			m_freem(m);
 		else
-			vgone(vp);
+			*ap->a_recycle = true;
 		break;
 	case KFSipsecsp:
 		sp = key_getspbyid(kfs->kfs_value);
 		if (sp)
 			key_freesp(sp);
 		else {
-			/* should never happen as we hold a refcnt */
-			vgone(vp);
+			*ap->a_recycle = true;
 		}
 		break;
 #endif
 	default:
 		break;
 	}
+	VOP_UNLOCK(vp, 0);
 	return (0);
 }
 
diff --git a/sys/miscfs/nullfs/null_vfsops.c b/sys/miscfs/nullfs/null_vfsops.c
index 953dd1296038..f5ced049b943 100644
--- a/sys/miscfs/nullfs/null_vfsops.c
+++ b/sys/miscfs/nullfs/null_vfsops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: null_vfsops.c,v 1.71 2007/12/08 19:29:51 pooka Exp $	*/
+/*	$NetBSD: null_vfsops.c,v 1.72 2008/01/02 11:49:00 ad Exp $	*/
 
 /*
  * Copyright (c) 1999 National Aeronautics & Space Administration
@@ -74,7 +74,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: null_vfsops.c,v 1.71 2007/12/08 19:29:51 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: null_vfsops.c,v 1.72 2008/01/02 11:49:00 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -183,18 +183,21 @@ nullfs_mount(mp, path, data, data_len)
 		free(nmp, M_UFSMNT);	/* XXX */
 		return (error);
 	}
-	/*
-	 * Unlock the node
-	 */
-	vp->v_vflag |= VV_ROOT;
-	VOP_UNLOCK(vp, 0);
-
 	/*
 	 * Keep a held reference to the root vnode.
 	 * It is vrele'd in nullfs_unmount.
 	 */
+	vp->v_vflag |= VV_ROOT;
 	nmp->nullm_rootvp = vp;
 
+	/* We don't need kernel_lock. */
+	mp->mnt_iflag |= IMNT_MPSAFE;
+
+	/*
+	 * Unlock the node
+	 */
+	VOP_UNLOCK(vp, 0);
+
 	error = set_statvfs_info(path, UIO_USERSPACE, args->la.target,
 	    UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l);
 #ifdef NULLFS_DIAGNOSTIC
@@ -231,12 +234,7 @@ nullfs_unmount(struct mount *mp, int mntflags)
 	vprint("alias root of lower", null_rootvp);
 #endif
 	/*
-	 * Release reference on underlying root vnode
-	 */
-	vrele(null_rootvp);
-
-	/*
-	 * And blow it away for future re-use
+	 * Blow it away for future re-use
 	 */
 	vgone(null_rootvp);
 
diff --git a/sys/miscfs/overlay/overlay_vfsops.c b/sys/miscfs/overlay/overlay_vfsops.c
index cc4b8650a2f9..d380c539d263 100644
--- a/sys/miscfs/overlay/overlay_vfsops.c
+++ b/sys/miscfs/overlay/overlay_vfsops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: overlay_vfsops.c,v 1.46 2007/12/08 15:12:15 ad Exp $	*/
+/*	$NetBSD: overlay_vfsops.c,v 1.47 2008/01/02 11:49:01 ad Exp $	*/
 
 /*
  * Copyright (c) 1999, 2000 National Aeronautics & Space Administration
@@ -74,7 +74,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: overlay_vfsops.c,v 1.46 2007/12/08 15:12:15 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: overlay_vfsops.c,v 1.47 2008/01/02 11:49:01 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -220,11 +220,7 @@ ov_unmount(struct mount *mp, int mntflags)
 	vprint("alias root of lower", overlay_rootvp);
 #endif
 	/*
-	 * Release reference on underlying root vnode
-	 */
-	vrele(overlay_rootvp);
-	/*
-	 * And blow it away for future re-use
+	 * Blow it away for future re-use
 	 */
 	vgone(overlay_rootvp);
 	/*
diff --git a/sys/miscfs/portal/portal_vfsops.c b/sys/miscfs/portal/portal_vfsops.c
index 4becf27fca2a..7ad19298eec7 100644
--- a/sys/miscfs/portal/portal_vfsops.c
+++ b/sys/miscfs/portal/portal_vfsops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: portal_vfsops.c,v 1.68 2007/11/26 19:02:15 pooka Exp $	*/
+/*	$NetBSD: portal_vfsops.c,v 1.69 2008/01/02 11:49:01 ad Exp $	*/
 
 /*
  * Copyright (c) 1992, 1993, 1995
@@ -40,7 +40,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: portal_vfsops.c,v 1.68 2007/11/26 19:02:15 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: portal_vfsops.c,v 1.69 2008/01/02 11:49:01 ad Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_compat_netbsd.h"
@@ -179,11 +179,7 @@ portal_unmount(struct mount *mp, int mntflags)
 		return (error);
 
 	/*
-	 * Release reference on underlying root vnode
-	 */
-	vrele(rtvp);
-	/*
-	 * And blow it away for future re-use
+	 * Blow it away for future re-use
 	 */
 	vgone(rtvp);
 	/*
diff --git a/sys/miscfs/procfs/procfs_subr.c b/sys/miscfs/procfs/procfs_subr.c
index 3dfa4d23c951..cbc814af01c4 100644
--- a/sys/miscfs/procfs/procfs_subr.c
+++ b/sys/miscfs/procfs/procfs_subr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: procfs_subr.c,v 1.82 2007/11/07 00:23:38 ad Exp $	*/
+/*	$NetBSD: procfs_subr.c,v 1.83 2008/01/02 11:49:01 ad Exp $	*/
 
 /*-
  * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
@@ -109,7 +109,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: procfs_subr.c,v 1.82 2007/11/07 00:23:38 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: procfs_subr.c,v 1.83 2008/01/02 11:49:01 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -619,7 +619,7 @@ loop:
 		    	if (flags == 0) {
 				mutex_exit(&pfs_ihash_lock);
 			} else {
-				simple_lock(&vp->v_interlock);
+				mutex_enter(&vp->v_interlock);
 				mutex_exit(&pfs_ihash_lock);
 				if (vget(vp, flags | LK_INTERLOCK))
 					goto loop;
@@ -679,17 +679,17 @@ procfs_revoke_vnodes(p, arg)
 	for (pfs = LIST_FIRST(ppp); pfs; pfs = pnext) {
 		vp = PFSTOV(pfs);
 		pnext = LIST_NEXT(pfs, pfs_hash);
-		simple_lock(&vp->v_interlock);
+		mutex_enter(&vp->v_interlock);
 		if (vp->v_usecount > 0 && pfs->pfs_pid == p->p_pid &&
 		    vp->v_mount == mp) {
 		    	vp->v_usecount++;
-		    	simple_unlock(&vp->v_interlock);
+		    	mutex_exit(&vp->v_interlock);
 			mutex_exit(&pfs_ihash_lock);
 			VOP_REVOKE(vp, REVOKEALL);
 			vrele(vp);
 			mutex_enter(&pfs_ihash_lock);
 		} else {
-			simple_unlock(&vp->v_interlock);
+			mutex_exit(&vp->v_interlock);
 		}
 	}
 	mutex_exit(&pfs_ihash_lock);
diff --git a/sys/miscfs/procfs/procfs_vnops.c b/sys/miscfs/procfs/procfs_vnops.c
index 38b8130c209a..d8e9190fd5f3 100644
--- a/sys/miscfs/procfs/procfs_vnops.c
+++ b/sys/miscfs/procfs/procfs_vnops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: procfs_vnops.c,v 1.163 2007/11/26 19:02:16 pooka Exp $	*/
+/*	$NetBSD: procfs_vnops.c,v 1.164 2008/01/02 11:49:01 ad Exp $	*/
 
 /*-
  * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
@@ -112,7 +112,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: procfs_vnops.c,v 1.163 2007/11/26 19:02:16 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: procfs_vnops.c,v 1.164 2008/01/02 11:49:01 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -424,17 +424,13 @@ procfs_inactive(v)
 	} */ *ap = v;
 	struct vnode *vp = ap->a_vp;
 	struct pfsnode *pfs = VTOPFS(vp);
-	bool recycle;
 
 	mutex_enter(&proclist_lock);
-	recycle = (p_find(pfs->pfs_pid, PFIND_LOCKED) == NULL);
+	*ap->a_recycle = (p_find(pfs->pfs_pid, PFIND_LOCKED) == NULL);
 	mutex_exit(&proclist_lock);
 
 	VOP_UNLOCK(vp, 0);
 
-	if (recycle)
-		vgone(vp);
-
 	return (0);
 }
 
diff --git a/sys/miscfs/specfs/spec_vnops.c b/sys/miscfs/specfs/spec_vnops.c
index cd899defadf9..ea4cd5e39efd 100644
--- a/sys/miscfs/specfs/spec_vnops.c
+++ b/sys/miscfs/specfs/spec_vnops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: spec_vnops.c,v 1.110 2007/12/02 13:56:18 hannken Exp $	*/
+/*	$NetBSD: spec_vnops.c,v 1.111 2008/01/02 11:49:02 ad Exp $	*/
 
 /*
  * Copyright (c) 1989, 1993
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: spec_vnops.c,v 1.110 2007/12/02 13:56:18 hannken Exp $");
+__KERNEL_RCSID(0, "$NetBSD: spec_vnops.c,v 1.111 2008/01/02 11:49:02 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/proc.h>
@@ -417,11 +417,11 @@ spec_ioctl(void *v)
 
 	vp = ap->a_vp;
 	dev = NODEV;
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	if ((vp->v_iflag & VI_XLOCK) == 0 && vp->v_specinfo) {
 		dev = vp->v_rdev;
 	}
-	simple_unlock(&vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
 	if (dev == NODEV) {
 		return ENXIO;
 	}
@@ -460,11 +460,11 @@ spec_poll(void *v)
 
 	vp = ap->a_vp;
 	dev = NODEV;
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	if ((vp->v_iflag & VI_XLOCK) == 0 && vp->v_specinfo) {
 		dev = vp->v_rdev;
 	}
-	simple_unlock(&vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
 	if (dev == NODEV) {
 		return POLLERR;
 	}
diff --git a/sys/miscfs/syncfs/sync_subr.c b/sys/miscfs/syncfs/sync_subr.c
index 40339351bdf9..14d85bf4625d 100644
--- a/sys/miscfs/syncfs/sync_subr.c
+++ b/sys/miscfs/syncfs/sync_subr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: sync_subr.c,v 1.33 2007/12/08 15:47:32 ad Exp $	*/
+/*	$NetBSD: sync_subr.c,v 1.34 2008/01/02 11:49:02 ad Exp $	*/
 
 /*
  * Copyright 1997 Marshall Kirk McKusick. All Rights Reserved.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sync_subr.c,v 1.33 2007/12/08 15:47:32 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: sync_subr.c,v 1.34 2008/01/02 11:49:02 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -86,7 +86,7 @@ vn_initialize_syncerd()
 		TAILQ_INIT(&syncer_workitem_pending[i]);
 
 	mutex_init(&syncer_mutex, MUTEX_DEFAULT, IPL_NONE);
-	mutex_init(&syncer_data_lock, MUTEX_DEFAULT, IPL_VM);	/* XXX vmlocking */
+	mutex_init(&syncer_data_lock, MUTEX_DEFAULT, IPL_NONE);
 	cv_init(&syncer_cv, "syncer");
 }
 
@@ -140,7 +140,7 @@ vn_syncer_add1(vp, delayx)
 		 * position of the vnode.  syncer_data_lock
 		 * does not protect v_iflag.
 		 */
-		/* notyet KASSERT(mutex_owned(&vp->v_interlock)); */
+		KASSERT(mutex_owned(&vp->v_interlock));
 		vp->v_iflag |= VI_ONWORKLST;
 	}
 
@@ -158,7 +158,7 @@ vn_syncer_add_to_worklist(vp, delayx)
 	int delayx;
 {
 
-	/* notyet KASSERT(mutex_owned(&vp->v_interlock)); */ 
+	KASSERT(mutex_owned(&vp->v_interlock));
 
 	mutex_enter(&syncer_data_lock);
 	vn_syncer_add1(vp, delayx);
@@ -174,7 +174,7 @@ vn_syncer_remove_from_worklist(vp)
 {
 	struct synclist *slp;
 
-	/* not yet KASSERT(mutex_owned(&vp->v_interlock)); */
+	KASSERT(mutex_owned(&vp->v_interlock));
 
 	mutex_enter(&syncer_data_lock);
 
@@ -216,7 +216,7 @@ sched_sync(void *v)
 
 		while ((vp = TAILQ_FIRST(slp)) != NULL) {
 			/* We are locking in the wrong direction. */
-			if (simple_lock_try(&vp->v_interlock)) {
+			if (mutex_tryenter(&vp->v_interlock)) {
 				mutex_exit(&syncer_data_lock);
 				if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT |
 				    LK_INTERLOCK) == 0) {
diff --git a/sys/miscfs/syncfs/sync_vnops.c b/sys/miscfs/syncfs/sync_vnops.c
index 2356523e5936..453c921f07f1 100644
--- a/sys/miscfs/syncfs/sync_vnops.c
+++ b/sys/miscfs/syncfs/sync_vnops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: sync_vnops.c,v 1.19 2007/11/26 19:02:18 pooka Exp $	*/
+/*	$NetBSD: sync_vnops.c,v 1.20 2008/01/02 11:49:02 ad Exp $	*/
 
 /*
  * Copyright 1997 Marshall Kirk McKusick. All Rights Reserved.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sync_vnops.c,v 1.19 2007/11/26 19:02:18 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: sync_vnops.c,v 1.20 2008/01/02 11:49:02 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/proc.h>
@@ -97,7 +97,9 @@ vfs_allocate_syncvnode(mp)
 		}
 		next = start;
 	}
+	mutex_enter(&vp->v_interlock);
 	vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0);
+	mutex_exit(&vp->v_interlock);
 	mp->mnt_syncer = vp;
 	return (0);
 }
@@ -113,9 +115,10 @@ vfs_deallocate_syncvnode(mp)
 
 	vp = mp->mnt_syncer;
 	mp->mnt_syncer = NULL;
+	mutex_enter(&vp->v_interlock);
 	vn_syncer_remove_from_worklist(vp);
 	vp->v_writecount = 0;
-	vrele(vp);
+	mutex_exit(&vp->v_interlock);
 	vgone(vp);
 }
 
@@ -146,7 +149,9 @@ sync_fsync(v)
 	/*
 	 * Move ourselves to the back of the sync list.
 	 */
+	mutex_enter(&syncvp->v_interlock);
 	vn_syncer_add_to_worklist(syncvp, syncdelay);
+	mutex_exit(&syncvp->v_interlock);
 
 	/*
 	 * Walk the list of vnodes pushing all that are dirty and
diff --git a/sys/miscfs/umapfs/umap_vfsops.c b/sys/miscfs/umapfs/umap_vfsops.c
index b4b87ef4000b..c0e3dc50a48b 100644
--- a/sys/miscfs/umapfs/umap_vfsops.c
+++ b/sys/miscfs/umapfs/umap_vfsops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: umap_vfsops.c,v 1.73 2007/12/08 19:29:51 pooka Exp $	*/
+/*	$NetBSD: umap_vfsops.c,v 1.74 2008/01/02 11:49:02 ad Exp $	*/
 
 /*
  * Copyright (c) 1992, 1993
@@ -41,7 +41,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: umap_vfsops.c,v 1.73 2007/12/08 19:29:51 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: umap_vfsops.c,v 1.74 2008/01/02 11:49:02 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -256,18 +256,14 @@ umapfs_unmount(struct mount *mp, int mntflags)
 	vprint("alias root of lower", rtvp);
 #endif
 	/*
-	 * Release reference on underlying root vnode
-	 */
-	vrele(rtvp);
-	/*
-	 * And blow it away for future re-use
+	 * Blow it away for future re-use
 	 */
 	vgone(rtvp);
 	/*
 	 * Finally, throw away the umap_mount structure
 	 */
 	mutex_destroy(&amp->umapm_hashlock);
-	free(mp->mnt_data, M_UFSMNT);	/* XXX */
+	free(amp, M_UFSMNT);	/* XXX */
 	mp->mnt_data = 0;
 	return (0);
 }
diff --git a/sys/netsmb/smb_iod.c b/sys/netsmb/smb_iod.c
index 77cb7606edd1..bd38598f07aa 100644
--- a/sys/netsmb/smb_iod.c
+++ b/sys/netsmb/smb_iod.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: smb_iod.c,v 1.27 2007/07/09 21:11:15 ad Exp $	*/
+/*	$NetBSD: smb_iod.c,v 1.28 2008/01/02 11:49:02 ad Exp $	*/
 
 /*
  * Copyright (c) 2000-2001 Boris Popov
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: smb_iod.c,v 1.27 2007/07/09 21:11:15 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: smb_iod.c,v 1.28 2008/01/02 11:49:02 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -409,7 +409,7 @@ smb_iod_request(struct smbiod *iod, int event, void *ident)
 		return 0;
 	}
 	smb_iod_wakeup(iod);
-	ltsleep(evp, PWAIT | PNORELOCK, "smbevw", 0, SMB_IOD_EVLOCKPTR(iod));
+	mtsleep(evp, PWAIT | PNORELOCK, "smbevw", 0, SMB_IOD_EVLOCKPTR(iod));
 	error = evp->ev_error;
 	free(evp, M_SMBIOD);
 	return error;
@@ -475,7 +475,7 @@ smb_iod_addrq(struct smb_rq *rqp)
 			break;
 		iod->iod_muxwant++;
 		/* XXX use interruptible sleep? */
-		ltsleep(&iod->iod_muxwant, PWAIT, "smbmux",
+		mtsleep(&iod->iod_muxwant, PWAIT, "smbmux",
 			0, SMB_IOD_RQLOCKPTR(iod));
 	}
 	iod->iod_muxcnt++;
@@ -501,7 +501,7 @@ smb_iod_removerq(struct smb_rq *rqp)
 	SMB_IOD_RQLOCK(iod);
 	while (rqp->sr_flags & SMBR_XLOCK) {
 		rqp->sr_flags |= SMBR_XLOCKWANT;
-		ltsleep(rqp, PWAIT, "smbxrm", 0, SMB_IOD_RQLOCKPTR(iod));
+		mtsleep(rqp, PWAIT, "smbxrm", 0, SMB_IOD_RQLOCKPTR(iod));
 	}
 	SIMPLEQ_REMOVE(&iod->iod_rqlist, rqp, smb_rq, sr_link);
 	iod->iod_muxcnt--;
@@ -535,7 +535,7 @@ smb_iod_waitrq(struct smb_rq *rqp)
 	SMBRQ_SLOCK(rqp);
 	if (rqp->sr_rpgen == rqp->sr_rplast) {
 		/* XXX interruptible sleep? */
-		ltsleep(&rqp->sr_state, PWAIT, "smbwrq", 0,
+		mtsleep(&rqp->sr_state, PWAIT, "smbwrq", 0,
 			SMBRQ_SLOCKPTR(rqp));
 	}
 	rqp->sr_rplast++;
diff --git a/sys/netsmb/smb_rq.c b/sys/netsmb/smb_rq.c
index 5d80b52f945c..562c961336aa 100644
--- a/sys/netsmb/smb_rq.c
+++ b/sys/netsmb/smb_rq.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: smb_rq.c,v 1.28 2007/03/12 18:18:36 ad Exp $	*/
+/*	$NetBSD: smb_rq.c,v 1.29 2008/01/02 11:49:03 ad Exp $	*/
 
 /*
  * Copyright (c) 2000-2001, Boris Popov
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: smb_rq.c,v 1.28 2007/03/12 18:18:36 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: smb_rq.c,v 1.29 2008/01/02 11:49:03 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -206,7 +206,7 @@ smb_rq_enqueue(struct smb_rq *rqp)
 		SMBS_ST_LOCK(ssp);
 		if (ssp->ss_flags & SMBS_RECONNECTING) {
 			SMBS_ST_UNLOCK(ssp);
-			error = ltsleep(&ssp->ss_vcgenid,
+			error = mtsleep(&ssp->ss_vcgenid,
 				PWAIT | PCATCH | PNORELOCK,
 				"smbtrcn", hz, SMBS_ST_LOCKPTR(ssp));
 			if (error && error != EWOULDBLOCK)
diff --git a/sys/netsmb/smb_subr.h b/sys/netsmb/smb_subr.h
index 591b808c7208..aae8d2f2ca7f 100644
--- a/sys/netsmb/smb_subr.h
+++ b/sys/netsmb/smb_subr.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: smb_subr.h,v 1.16 2006/08/17 17:11:29 christos Exp $	*/
+/*	$NetBSD: smb_subr.h,v 1.17 2008/01/02 11:49:03 ad Exp $	*/
 
 /*
  * Copyright (c) 2000-2001, Boris Popov
@@ -79,11 +79,11 @@ void m_dumpm(struct mbuf *m);
  * Compatibility wrappers for simple locks
  */
 
-#define	smb_slock			simplelock
-#define	smb_sl_init(mtx, desc)		simple_lock_init(mtx)
-#define	smb_sl_destroy(mtx)		/*simple_lock_destroy(mtx)*/
-#define	smb_sl_lock(mtx)		simple_lock(mtx)
-#define	smb_sl_unlock(mtx)		simple_unlock(mtx)
+#define	smb_slock			kmutex
+#define	smb_sl_init(mtx, desc)		mutex_init((mtx), MUTEX_DEFAULT, IPL_NONE)
+#define	smb_sl_destroy(mtx)		mutex_destroy(mtx)
+#define	smb_sl_lock(mtx)		mutex_enter(mtx)
+#define	smb_sl_unlock(mtx)		mutex_exit(mtx)
 
 #define SMB_STRFREE(p)	do { if (p) smb_strfree(p); } while(0)
 
diff --git a/sys/nfs/nfs_bio.c b/sys/nfs/nfs_bio.c
index 9fd6863944c7..30bdaf4318d0 100644
--- a/sys/nfs/nfs_bio.c
+++ b/sys/nfs/nfs_bio.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: nfs_bio.c,v 1.171 2007/12/04 17:42:30 yamt Exp $	*/
+/*	$NetBSD: nfs_bio.c,v 1.172 2008/01/02 11:49:03 ad Exp $	*/
 
 /*
  * Copyright (c) 1989, 1993
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nfs_bio.c,v 1.171 2007/12/04 17:42:30 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nfs_bio.c,v 1.172 2008/01/02 11:49:03 ad Exp $");
 
 #include "opt_nfs.h"
 #include "opt_ddb.h"
@@ -183,7 +183,7 @@ nfs_bioread(vp, uio, ioflag, cred, cflag)
 		bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, l);
 		if (!bp)
 			return (EINTR);
-		if ((bp->b_flags & B_DONE) == 0) {
+		if ((bp->b_oflags & BO_DONE) == 0) {
 			bp->b_flags |= B_READ;
 			error = nfs_doio(bp);
 			if (error) {
@@ -227,7 +227,7 @@ diragain:
 		bp = nfs_getcacheblk(vp, NFSDC_BLKNO(ndp), NFS_DIRBLKSIZ, l);
 		if (!bp)
 		    return (EINTR);
-		if ((bp->b_flags & B_DONE) == 0) {
+		if ((bp->b_oflags & BO_DONE) == 0) {
 		    bp->b_flags |= B_READ;
 		    bp->b_dcookie = ndp->dc_blkcookie;
 		    error = nfs_doio(bp);
@@ -393,7 +393,7 @@ diragain:
 			rabp = nfs_getcacheblk(vp, NFSDC_BLKNO(nndp),
 						NFS_DIRBLKSIZ, l);
 			if (rabp) {
-			    if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) {
+			    if ((rabp->b_oflags & (BO_DONE | BO_DELWRI)) == 0) {
 				rabp->b_dcookie = nndp->dc_cookie;
 				rabp->b_flags |= (B_READ | B_ASYNC);
 				if (nfs_asyncio(rabp)) {
@@ -541,7 +541,7 @@ nfs_write(v)
 				 * backout size and free pages past eof.
 				 */
 				np->n_size = oldsize;
-				simple_lock(&vp->v_interlock);
+				mutex_enter(&vp->v_interlock);
 				(void)VOP_PUTPAGES(vp, round_page(vp->v_size),
 				    0, PGO_SYNCIO | PGO_FREE);
 			}
@@ -561,7 +561,7 @@ nfs_write(v)
 
 		if ((oldoff & ~(nmp->nm_wsize - 1)) !=
 		    (uio->uio_offset & ~(nmp->nm_wsize - 1))) {
-			simple_lock(&vp->v_interlock);
+			mutex_enter(&vp->v_interlock);
 			error = VOP_PUTPAGES(vp,
 			    trunc_page(oldoff & ~(nmp->nm_wsize - 1)),
 			    round_page((uio->uio_offset + nmp->nm_wsize - 1) &
@@ -571,7 +571,7 @@ nfs_write(v)
 	if (wrotedata)
 		VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0));
 	if (error == 0 && (ioflag & IO_SYNC) != 0) {
-		simple_lock(&vp->v_interlock);
+		mutex_enter(&vp->v_interlock);
 		error = VOP_PUTPAGES(vp,
 		    trunc_page(origoff & ~(nmp->nm_wsize - 1)),
 		    round_page((uio->uio_offset + nmp->nm_wsize - 1) &
@@ -624,27 +624,28 @@ nfs_vinvalbuf(vp, flags, cred, l, intrflg)
 {
 	struct nfsnode *np = VTONFS(vp);
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
-	int error = 0, slpflag, slptimeo;
+	int error = 0, slptimeo;
+	bool catch;
 
 	if ((nmp->nm_flag & NFSMNT_INT) == 0)
 		intrflg = 0;
 	if (intrflg) {
-		slpflag = PCATCH;
+		catch = true;
 		slptimeo = 2 * hz;
 	} else {
-		slpflag = 0;
+		catch = false;
 		slptimeo = 0;
 	}
 	/*
 	 * First wait for any other process doing a flush to complete.
 	 */
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	while (np->n_flag & NFLUSHINPROG) {
 		np->n_flag |= NFLUSHWANT;
-		error = ltsleep(&np->n_flag, PRIBIO + 2, "nfsvinval",
+		error = mtsleep(&np->n_flag, PRIBIO + 2, "nfsvinval",
 			slptimeo, &vp->v_interlock);
 		if (error && intrflg && nfs_sigintr(nmp, NULL, l)) {
-			simple_unlock(&vp->v_interlock);
+			mutex_exit(&vp->v_interlock);
 			return EINTR;
 		}
 	}
@@ -653,8 +654,8 @@ nfs_vinvalbuf(vp, flags, cred, l, intrflg)
 	 * Now, flush as required.
 	 */
 	np->n_flag |= NFLUSHINPROG;
-	simple_unlock(&vp->v_interlock);
-	error = vinvalbuf(vp, flags, cred, l, slpflag, 0);
+	mutex_exit(&vp->v_interlock);
+	error = vinvalbuf(vp, flags, cred, l, catch, 0);
 	while (error) {
 		if (intrflg && nfs_sigintr(nmp, NULL, l)) {
 			error = EINTR;
@@ -662,7 +663,7 @@ nfs_vinvalbuf(vp, flags, cred, l, intrflg)
 		}
 		error = vinvalbuf(vp, flags, cred, l, 0, slptimeo);
 	}
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	if (error == 0)
 		np->n_flag &= ~NMODIFIED;
 	np->n_flag &= ~NFLUSHINPROG;
@@ -670,7 +671,7 @@ nfs_vinvalbuf(vp, flags, cred, l, intrflg)
 		np->n_flag &= ~NFLUSHWANT;
 		wakeup(&np->n_flag);
 	}
-	simple_unlock(&vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
 	return error;
 }
 
@@ -918,9 +919,7 @@ nfs_doio_read(bp, uiop)
 		printf("nfs_doio:  type %x unexpected\n", vp->v_type);
 		break;
 	}
-	if (error) {
-		bp->b_error = error;
-	}
+	bp->b_error = error;
 	return error;
 }
 
@@ -968,7 +967,7 @@ again:
 			/*
 			 * this page belongs to our object.
 			 */
-			simple_lock(&uobj->vmobjlock);
+			mutex_enter(&uobj->vmobjlock);
 			/*
 			 * write out the page stably if it's about to
 			 * be released because we can't resend it
@@ -985,19 +984,19 @@ again:
 			 */
 			if ((pgs[i]->flags & PG_NEEDCOMMIT) == 0)
 				needcommit = false;
-			simple_unlock(&uobj->vmobjlock);
+			mutex_exit(&uobj->vmobjlock);
 		} else {
 			iomode = NFSV3WRITE_FILESYNC;
 			needcommit = false;
 		}
 	}
 	if (!needcommit && iomode == NFSV3WRITE_UNSTABLE) {
-		simple_lock(&uobj->vmobjlock);
+		mutex_enter(&uobj->vmobjlock);
 		for (i = 0; i < npages; i++) {
 			pgs[i]->flags |= PG_NEEDCOMMIT | PG_RDONLY;
 			pmap_page_protect(pgs[i], VM_PROT_READ);
 		}
-		simple_unlock(&uobj->vmobjlock);
+		mutex_exit(&uobj->vmobjlock);
 		pageprotected = true; /* pages can't be modified during i/o. */
 	} else
 		pageprotected = false;
@@ -1049,11 +1048,11 @@ again:
 			 * pages are now on stable storage.
 			 */
 			uiop->uio_resid = 0;
-			simple_lock(&uobj->vmobjlock);
+			mutex_enter(&uobj->vmobjlock);
 			for (i = 0; i < npages; i++) {
 				pgs[i]->flags &= ~(PG_NEEDCOMMIT | PG_RDONLY);
 			}
-			simple_unlock(&uobj->vmobjlock);
+			mutex_exit(&uobj->vmobjlock);
 			return 0;
 		} else if (error == NFSERR_STALEWRITEVERF) {
 			nfs_clearcommit(vp->v_mount);
@@ -1098,11 +1097,11 @@ again:
 			 * re-dirty pages so that they will be passed
 			 * to us later again.
 			 */
-			simple_lock(&uobj->vmobjlock);
+			mutex_enter(&uobj->vmobjlock);
 			for (i = 0; i < npages; i++) {
 				pgs[i]->flags &= ~PG_CLEAN;
 			}
-			simple_unlock(&uobj->vmobjlock);
+			mutex_exit(&uobj->vmobjlock);
 		}
 		mutex_exit(&np->n_commitlock);
 	} else
@@ -1114,11 +1113,11 @@ again:
 		mutex_enter(&np->n_commitlock);
 		nfs_del_committed_range(vp, off, cnt);
 		mutex_exit(&np->n_commitlock);
-		simple_lock(&uobj->vmobjlock);
+		mutex_enter(&uobj->vmobjlock);
 		for (i = 0; i < npages; i++) {
 			pgs[i]->flags &= ~(PG_NEEDCOMMIT | PG_RDONLY);
 		}
-		simple_unlock(&uobj->vmobjlock);
+		mutex_exit(&uobj->vmobjlock);
 	} else {
 		/*
 		 * we got an error.
@@ -1165,9 +1164,7 @@ nfs_doio_phys(bp, uiop)
 			nfs_clearcommit(bp->b_vp->v_mount);
 		}
 	}
-	if (error) {
-		bp->b_error = error;
-	}
+	bp->b_error = error;
 	return error;
 }
 
@@ -1274,7 +1271,7 @@ nfs_getpages(v)
 
 	if (!write && (np->n_flag & NMODIFIED) == 0 && pgs != NULL) {
 		if (!locked) {
-			simple_lock(&uobj->vmobjlock);
+			mutex_enter(&uobj->vmobjlock);
 		}
 		for (i = 0; i < npages; i++) {
 			pg = pgs[i];
@@ -1284,7 +1281,7 @@ nfs_getpages(v)
 			pg->flags |= PG_RDONLY;
 		}
 		if (!locked) {
-			simple_unlock(&uobj->vmobjlock);
+			mutex_exit(&uobj->vmobjlock);
 		}
 	}
 	if (!write) {
@@ -1311,9 +1308,9 @@ nfs_getpages(v)
 				 * available and put back original pgs array.
 				 */
 
-				uvm_lock_pageq();
+				mutex_enter(&uvm_pageqlock);
 				uvm_page_unbusy(pgs, npages);
-				uvm_unlock_pageq();
+				mutex_exit(&uvm_pageqlock);
 				*ap->a_count = 0;
 				memcpy(pgs, opgs,
 				    npages * sizeof(struct vm_pages *));
@@ -1325,7 +1322,7 @@ nfs_getpages(v)
 	}
 	np->n_flag |= NMODIFIED;
 	if (!locked) {
-		simple_lock(&uobj->vmobjlock);
+		mutex_enter(&uobj->vmobjlock);
 	}
 	for (i = 0; i < npages; i++) {
 		pg = pgs[i];
@@ -1335,7 +1332,7 @@ nfs_getpages(v)
 		pg->flags &= ~(PG_NEEDCOMMIT | PG_RDONLY);
 	}
 	if (!locked) {
-		simple_unlock(&uobj->vmobjlock);
+		mutex_exit(&uobj->vmobjlock);
 	}
 	if (v3) {
 		mutex_exit(&np->n_commitlock);
diff --git a/sys/nfs/nfs_node.c b/sys/nfs/nfs_node.c
index 65967245b50e..73ed3bdde487 100644
--- a/sys/nfs/nfs_node.c
+++ b/sys/nfs/nfs_node.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: nfs_node.c,v 1.96 2007/11/26 19:02:20 pooka Exp $	*/
+/*	$NetBSD: nfs_node.c,v 1.97 2008/01/02 11:49:03 ad Exp $	*/
 
 /*
  * Copyright (c) 1989, 1993
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nfs_node.c,v 1.96 2007/11/26 19:02:20 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nfs_node.c,v 1.97 2008/01/02 11:49:03 ad Exp $");
 
 #include "opt_nfs.h"
 
@@ -224,12 +224,11 @@ nfs_inactive(v)
 {
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
+		bool *a_recycle;
 	} */ *ap = v;
 	struct nfsnode *np;
 	struct sillyrename *sp;
-	struct lwp *l = curlwp;
 	struct vnode *vp = ap->a_vp;
-	bool removed;
 
 	np = VTONFS(vp);
 	if (prtactive && vp->v_usecount != 0)
@@ -240,8 +239,8 @@ nfs_inactive(v)
 	} else
 		sp = NULL;
 	if (sp != NULL)
-		nfs_vinvalbuf(vp, 0, sp->s_cred, l, 1);
-	removed = (np->n_flag & NREMOVED) != 0;
+		nfs_vinvalbuf(vp, 0, sp->s_cred, curlwp, 1);
+	*ap->a_recycle = (np->n_flag & NREMOVED) != 0;
 	np->n_flag &=
 	    (NMODIFIED | NFLUSHINPROG | NFLUSHWANT | NEOFVALID | NTRUNCDELAYED);
 
@@ -251,10 +250,6 @@ nfs_inactive(v)
 
 	VOP_UNLOCK(vp, 0);
 
-	/* XXXMP only kernel_lock protects vp */
-	if (removed)
-		vrecycle(vp, NULL, l);
-
 	if (sp != NULL) {
 		int error;
 
diff --git a/sys/nfs/nfs_socket.c b/sys/nfs/nfs_socket.c
index 65a96dd321d4..b463e4b66fe3 100644
--- a/sys/nfs/nfs_socket.c
+++ b/sys/nfs/nfs_socket.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: nfs_socket.c,v 1.165 2007/12/04 17:42:31 yamt Exp $	*/
+/*	$NetBSD: nfs_socket.c,v 1.166 2008/01/02 11:49:03 ad Exp $	*/
 
 /*
  * Copyright (c) 1989, 1991, 1993, 1995
@@ -39,7 +39,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nfs_socket.c,v 1.165 2007/12/04 17:42:31 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nfs_socket.c,v 1.166 2008/01/02 11:49:03 ad Exp $");
 
 #include "fs_nfs.h"
 #include "opt_nfs.h"
@@ -2242,7 +2242,9 @@ nfsrv_rcv(struct nfssvc_sock *slp)
 		auio.uio_resid = 1000000000;
 		/* not need to setup uio_vmspace */
 		flags = MSG_DONTWAIT;
+		KERNEL_LOCK(1, curlwp);
 		error = (*so->so_receive)(so, &nam, &auio, &mp, NULL, &flags);
+		KERNEL_UNLOCK_ONE(curlwp);
 		if (error || mp == NULL) {
 			if (error == EWOULDBLOCK)
 				setflags |= SLP_A_NEEDQ;
@@ -2278,8 +2280,10 @@ nfsrv_rcv(struct nfssvc_sock *slp)
 			auio.uio_resid = 1000000000;
 			/* not need to setup uio_vmspace */
 			flags = MSG_DONTWAIT;
+			KERNEL_LOCK(1, curlwp);
 			error = (*so->so_receive)(so, &nam, &auio, &mp, NULL,
 			    &flags);
+			KERNEL_UNLOCK_ONE(curlwp);
 			if (mp) {
 				if (nam) {
 					m = nam;
diff --git a/sys/nfs/nfs_subs.c b/sys/nfs/nfs_subs.c
index 30e033ab2490..d5ddbb902808 100644
--- a/sys/nfs/nfs_subs.c
+++ b/sys/nfs/nfs_subs.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: nfs_subs.c,v 1.194 2007/12/08 19:29:51 pooka Exp $	*/
+/*	$NetBSD: nfs_subs.c,v 1.195 2008/01/02 11:49:04 ad Exp $	*/
 
 /*
  * Copyright (c) 1989, 1993
@@ -70,7 +70,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nfs_subs.c,v 1.194 2007/12/08 19:29:51 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nfs_subs.c,v 1.195 2008/01/02 11:49:04 ad Exp $");
 
 #include "fs_nfs.h"
 #include "opt_nfs.h"
@@ -1162,9 +1162,9 @@ nfs_dirhash(off)
 }
 
 #define	_NFSDC_MTX(np)		(&NFSTOV(np)->v_interlock)
-#define	NFSDC_LOCK(np)		simple_lock(_NFSDC_MTX(np))
-#define	NFSDC_UNLOCK(np)	simple_unlock(_NFSDC_MTX(np))
-#define	NFSDC_ASSERT_LOCKED(np) LOCK_ASSERT(simple_lock_held(_NFSDC_MTX(np)))
+#define	NFSDC_LOCK(np)		mutex_enter(_NFSDC_MTX(np))
+#define	NFSDC_UNLOCK(np)	mutex_exit(_NFSDC_MTX(np))
+#define	NFSDC_ASSERT_LOCKED(np) KASSERT(mutex_owned(_NFSDC_MTX(np)))
 
 void
 nfs_initdircache(vp)
@@ -1689,7 +1689,6 @@ nfs_loadattrcache(vpp, fp, vaper, flags)
 				vp->v_data = NULL;
 				VOP_UNLOCK(vp, 0);
 				vp->v_op = spec_vnodeop_p;
-				vrele(vp);
 				vgone(vp);
 				lockmgr(&nvp->v_lock, LK_EXCLUSIVE,
 				    &nvp->v_interlock);
@@ -1772,7 +1771,7 @@ nfs_loadattrcache(vpp, fp, vaper, flags)
 					np->n_flag |= NTRUNCDELAYED;
 				} else {
 					genfs_node_wrlock(vp);
-					simple_lock(&vp->v_interlock);
+					mutex_enter(&vp->v_interlock);
 					(void)VOP_PUTPAGES(vp, 0,
 					    0, PGO_SYNCIO | PGO_CLEANIT |
 					    PGO_FREE | PGO_ALLPAGES);
@@ -1849,7 +1848,7 @@ nfs_delayedtruncate(vp)
 	if (np->n_flag & NTRUNCDELAYED) {
 		np->n_flag &= ~NTRUNCDELAYED;
 		genfs_node_wrlock(vp);
-		simple_lock(&vp->v_interlock);
+		mutex_enter(&vp->v_interlock);
 		(void)VOP_PUTPAGES(vp, 0,
 		    0, PGO_SYNCIO | PGO_CLEANIT | PGO_FREE | PGO_ALLPAGES);
 		uvm_vnp_setsize(vp, np->n_size);
@@ -2652,7 +2651,7 @@ nfs_clearcommit(mp)
 	struct nfsmount *nmp = VFSTONFS(mp);
 
 	rw_enter(&nmp->nm_writeverflock, RW_WRITER);
-
+	mutex_enter(&mntvnode_lock);
 	TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
 		KASSERT(vp->v_mount == mp);
 		if (vp->v_type != VREG)
@@ -2662,12 +2661,13 @@ nfs_clearcommit(mp)
 		    np->n_pushedhi = 0;
 		np->n_commitflags &=
 		    ~(NFS_COMMIT_PUSH_VALID | NFS_COMMIT_PUSHED_VALID);
-		simple_lock(&vp->v_uobj.vmobjlock);
+		mutex_enter(&vp->v_uobj.vmobjlock);
 		TAILQ_FOREACH(pg, &vp->v_uobj.memq, listq) {
 			pg->flags &= ~PG_NEEDCOMMIT;
 		}
-		simple_unlock(&vp->v_uobj.vmobjlock);
+		mutex_exit(&vp->v_uobj.vmobjlock);
 	}
+	mutex_exit(&mntvnode_lock);
 	mutex_enter(&nmp->nm_lock);
 	nmp->nm_iflag &= ~NFSMNT_STALEWRITEVERF;
 	mutex_exit(&nmp->nm_lock);
diff --git a/sys/nfs/nfs_syscalls.c b/sys/nfs/nfs_syscalls.c
index bd05e5c5046b..adab0ae4e28b 100644
--- a/sys/nfs/nfs_syscalls.c
+++ b/sys/nfs/nfs_syscalls.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: nfs_syscalls.c,v 1.128 2007/12/20 23:03:14 dsl Exp $	*/
+/*	$NetBSD: nfs_syscalls.c,v 1.129 2008/01/02 11:49:04 ad Exp $	*/
 
 /*
  * Copyright (c) 1989, 1993
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nfs_syscalls.c,v 1.128 2007/12/20 23:03:14 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nfs_syscalls.c,v 1.129 2008/01/02 11:49:04 ad Exp $");
 
 #include "fs_nfs.h"
 #include "opt_nfs.h"
@@ -479,11 +479,13 @@ nfssvc_addsock(fp, mynam)
 	slp->ns_aflags = SLP_A_NEEDQ;
 	slp->ns_gflags = 0;
 	slp->ns_sflags = 0;
+	KERNEL_LOCK(1, curlwp);
 	s = splsoftnet();
 	so->so_upcallarg = (void *)slp;
 	so->so_upcall = nfsrv_soupcall;
 	so->so_rcv.sb_flags |= SB_UPCALL;
 	splx(s);
+	KERNEL_UNLOCK_ONE(curlwp);
 	nfsrv_wakenfsd(slp);
 	return (0);
 }
@@ -855,12 +857,14 @@ nfsrv_zapsock(slp)
 
 	so = slp->ns_so;
 	KASSERT(so != NULL);
+	KERNEL_LOCK(1, curlwp);
 	s = splsoftnet();
 	so->so_upcall = NULL;
 	so->so_upcallarg = NULL;
 	so->so_rcv.sb_flags &= ~SB_UPCALL;
 	splx(s);
 	soshutdown(so, SHUT_RDWR);
+	KERNEL_UNLOCK_ONE(curlwp);
 
 	if (slp->ns_nam)
 		m_free(slp->ns_nam);
@@ -1045,10 +1049,8 @@ nfssvc_iod(void *arg)
 	struct nfs_iod *myiod;
 	struct nfsmount *nmp;
 
-	KERNEL_LOCK(1, curlwp);
 	myiod = kmem_alloc(sizeof(*myiod), KM_SLEEP);
 	mutex_init(&myiod->nid_lock, MUTEX_DEFAULT, IPL_NONE);
-	KERNEL_UNLOCK_LAST(curlwp);
 	cv_init(&myiod->nid_cv, "nfsiod");
 	myiod->nid_exiting = false;
 	myiod->nid_mount = NULL;
@@ -1123,10 +1125,8 @@ quit:
 	mutex_exit(&myiod->nid_lock);
 
 	cv_destroy(&myiod->nid_cv);
-	KERNEL_LOCK(1, curlwp);
 	mutex_destroy(&myiod->nid_lock);
 	kmem_free(myiod, sizeof(*myiod));
-	KERNEL_UNLOCK_LAST(curlwp);
 
 	kthread_exit(0);
 }
@@ -1166,10 +1166,8 @@ nfs_set_niothreads(int newval)
 			 */
 
 			mutex_exit(&nfs_iodlist_lock);
-			KERNEL_LOCK(1, curlwp);
 			error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL,
 			    nfssvc_iod, NULL, NULL, "nfsio");
-			KERNEL_UNLOCK_LAST(curlwp);
 			mutex_enter(&nfs_iodlist_lock);
 			if (error) {
 				/* give up */
diff --git a/sys/nfs/nfs_vfsops.c b/sys/nfs/nfs_vfsops.c
index 6eea244eeb18..676c173d444e 100644
--- a/sys/nfs/nfs_vfsops.c
+++ b/sys/nfs/nfs_vfsops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: nfs_vfsops.c,v 1.188 2007/11/26 19:02:21 pooka Exp $	*/
+/*	$NetBSD: nfs_vfsops.c,v 1.189 2008/01/02 11:49:04 ad Exp $	*/
 
 /*
  * Copyright (c) 1989, 1993, 1995
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nfs_vfsops.c,v 1.188 2007/11/26 19:02:21 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nfs_vfsops.c,v 1.189 2008/01/02 11:49:04 ad Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_compat_netbsd.h"
@@ -893,7 +893,6 @@ nfs_unmount(struct mount *mp, int mntflags)
 	 * There are two reference counts to get rid of here
 	 * (see comment in mountnfs()).
 	 */
-	vrele(vp);
 	vput(vp);
 	vgone(vp);
 	nfs_disconnect(nmp);
@@ -945,37 +944,46 @@ nfs_sync(mp, waitfor, cred)
 	int waitfor;
 	kauth_cred_t cred;
 {
-	struct vnode *vp, *nvp;
+	struct vnode *vp, *mvp;
 	int error, allerror = 0;
 
 	/*
 	 * Force stale buffer cache information to be flushed.
 	 */
+	if ((mvp = valloc(mp)) == NULL)
+		return (ENOMEM);
 loop:
 	/*
 	 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
 	 * and vclean() can be called indirectly
 	 */
-	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
-		/*
-		 * If the vnode that we are about to sync is no longer
-		 * associated with this mount point, start over.
-		 */
-		if (vp->v_mount != mp)
-			goto loop;
-		nvp = TAILQ_NEXT(vp, v_mntvnodes);
+	mutex_enter(&mntvnode_lock);
+	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
+		vmark(mvp, vp);
+		if (vp->v_mount != mp || vismarker(vp))
+			continue;
+		mutex_enter(&vp->v_interlock);
+		/* XXX MNT_LAZY cannot be right? */
 		if (waitfor == MNT_LAZY || VOP_ISLOCKED(vp) ||
 		    (LIST_EMPTY(&vp->v_dirtyblkhd) &&
-		     UVM_OBJ_IS_CLEAN(&vp->v_uobj)))
+		     UVM_OBJ_IS_CLEAN(&vp->v_uobj))) {
+			mutex_exit(&vp->v_interlock);
 			continue;
-		if (vget(vp, LK_EXCLUSIVE))
+		}
+		mutex_exit(&mntvnode_lock);
+		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
+			(void)vunmark(mvp);
 			goto loop;
+		}
 		error = VOP_FSYNC(vp, cred,
 		    waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0);
 		if (error)
 			allerror = error;
 		vput(vp);
+		mutex_enter(&mntvnode_lock);
 	}
+	mutex_exit(&mntvnode_lock);
+	vfree(mvp);
 	return (allerror);
 }
 
diff --git a/sys/nfs/nfs_vnops.c b/sys/nfs/nfs_vnops.c
index 282a69793891..07141498ae74 100644
--- a/sys/nfs/nfs_vnops.c
+++ b/sys/nfs/nfs_vnops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: nfs_vnops.c,v 1.262 2007/12/17 16:04:31 yamt Exp $	*/
+/*	$NetBSD: nfs_vnops.c,v 1.263 2008/01/02 11:49:04 ad Exp $	*/
 
 /*
  * Copyright (c) 1989, 1993
@@ -39,7 +39,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nfs_vnops.c,v 1.262 2007/12/17 16:04:31 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nfs_vnops.c,v 1.263 2008/01/02 11:49:04 ad Exp $");
 
 #include "opt_inet.h"
 #include "opt_nfs.h"
@@ -3284,7 +3284,7 @@ nfs_flush(struct vnode *vp, kauth_cred_t cred, int waitfor, struct lwp *l,
 	int flushflags = PGO_ALLPAGES|PGO_CLEANIT|PGO_SYNCIO;
 	UVMHIST_FUNC("nfs_flush"); UVMHIST_CALLED(ubchist);
 
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	error = VOP_PUTPAGES(vp, 0, 0, flushflags);
 	if (np->n_flag & NWRITEERR) {
 		error = np->n_error;
diff --git a/sys/rump/fs/lib/libp2k/p2k.c b/sys/rump/fs/lib/libp2k/p2k.c
index b21da382d4f6..5f20ba799cff 100644
--- a/sys/rump/fs/lib/libp2k/p2k.c
+++ b/sys/rump/fs/lib/libp2k/p2k.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: p2k.c,v 1.32 2008/01/01 22:31:42 pooka Exp $	*/
+/*	$NetBSD: p2k.c,v 1.33 2008/01/02 11:49:05 ad Exp $	*/
 
 /*
  * Copyright (c) 2007 Antti Kantee.  All Rights Reserved.
@@ -683,11 +683,12 @@ int
 p2k_node_inactive(struct puffs_usermount *pu, void *opc)
 {
 	struct vnode *vp = opc;
+	bool recycle;
 	int rv;
 
 	(void) RUMP_VOP_PUTPAGES(vp, 0, 0, PGO_ALLPAGES);
 	VLE(vp);
-	rv = RUMP_VOP_INACTIVE(vp);
+	rv = RUMP_VOP_INACTIVE(vp, &recycle);
 	if (vp->v_usecount == 0)
 		puffs_setback(puffs_cc_getcc(pu), PUFFS_SETBACK_NOREF_N1);
 
diff --git a/sys/rump/fs/lib/libukfs/ukfs.c b/sys/rump/fs/lib/libukfs/ukfs.c
index 6aa218c2b15e..bab096f94459 100644
--- a/sys/rump/fs/lib/libukfs/ukfs.c
+++ b/sys/rump/fs/lib/libukfs/ukfs.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ukfs.c,v 1.15 2007/11/26 19:02:23 pooka Exp $	*/
+/*	$NetBSD: ukfs.c,v 1.16 2008/01/02 11:49:05 ad Exp $	*/
 
 /*
  * Copyright (c) 2007 Antti Kantee.  All Rights Reserved.
@@ -155,6 +155,7 @@ ukfs_release(struct ukfs *fs, int dounmount)
 void
 ukfs_ll_recycle(struct vnode *vp)
 {
+	bool recycle;
 
 	/* XXXXX */
 	if (vp == NULL || rump_vp_getref(vp))
@@ -162,7 +163,7 @@ ukfs_ll_recycle(struct vnode *vp)
 
 	VLE(vp);
 	RUMP_VOP_FSYNC(vp, NULL, 0, 0, 0);
-	RUMP_VOP_INACTIVE(vp);
+	RUMP_VOP_INACTIVE(vp, &recycle);
 	rump_recyclenode(vp);
 	rump_putnode(vp);
 }
diff --git a/sys/rump/include/machine/intr.h b/sys/rump/include/machine/intr.h
index 0bd30eddda83..4f073ac3f70b 100644
--- a/sys/rump/include/machine/intr.h
+++ b/sys/rump/include/machine/intr.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: intr.h,v 1.7 2007/12/03 15:34:33 ad Exp $	*/
+/*	$NetBSD: intr.h,v 1.8 2008/01/02 11:49:05 ad Exp $	*/
 
 /*
  * Copyright (c) 2007 Antti Kantee.  All Rights Reserved.
@@ -47,6 +47,7 @@ void rump_splx(int);
 #define splx(x) rump_splx(x)
 
 #define IPL_NONE 0
+#define	IPL_SOFTBIO 0
 #define IPL_SCHED 0
 #define IPL_VM 0
 
diff --git a/sys/rump/librump/rumpkern/fstrans_stub.c b/sys/rump/librump/rumpkern/fstrans_stub.c
index 1b0d2c409b46..77bd01e6318a 100644
--- a/sys/rump/librump/rumpkern/fstrans_stub.c
+++ b/sys/rump/librump/rumpkern/fstrans_stub.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: fstrans_stub.c,v 1.3 2007/12/02 18:24:34 hannken Exp $	*/
+/*	$NetBSD: fstrans_stub.c,v 1.4 2008/01/02 11:49:05 ad Exp $	*/
 
 /*
  * Copyright (c) 2007 Antti Kantee.  All Rights Reserved.
@@ -87,3 +87,16 @@ fscow_run(struct buf *bp, bool data_valid)
 
 	return 0;
 }
+
+int
+fstrans_mount(struct mount *mp)
+{
+
+	return 0;
+}
+
+void
+fstrans_unmount(struct mount *mp)
+{
+
+}
diff --git a/sys/rump/librump/rumpkern/genfs_io.c b/sys/rump/librump/rumpkern/genfs_io.c
index 81c9320a6c9f..47d6ad601613 100644
--- a/sys/rump/librump/rumpkern/genfs_io.c
+++ b/sys/rump/librump/rumpkern/genfs_io.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: genfs_io.c,v 1.6 2007/11/07 18:59:18 pooka Exp $	*/
+/*	$NetBSD: genfs_io.c,v 1.7 2008/01/02 11:49:05 ad Exp $	*/
 
 /*
  * Copyright (c) 2007 Antti Kantee.  All Rights Reserved.
@@ -192,17 +192,17 @@ genfs_getpages(void *v)
 			continue;
 		}
 
-		bp = getiobuf();
+		bp = getiobuf(vp, true);
 
 		bp->b_data = tmpbuf + bufoff;
 		bp->b_bcount = xfersize;
 		bp->b_blkno = bn;
 		bp->b_lblkno = 0;
-		bp->b_flags = B_READ | B_BUSY;
-		bp->b_vp = vp;
+		bp->b_flags = B_READ;
+		bp->b_cflags = BC_BUSY;
 
 		if (async) {
-			bp->b_flags |= B_ASYNC | B_CALL;
+			bp->b_flags |= B_ASYNC;
 			bp->b_iodone = uvm_aio_biodone;
 		}
 
@@ -374,7 +374,7 @@ genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags,
 		if (bn == -1)
 			continue;
 
-		bp = getiobuf();
+		bp = getiobuf(vp, true);
 
 		/* only write max what we are allowed to write */
 		bp->b_bcount = xfersize;
@@ -394,11 +394,11 @@ genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags,
 		bp->b_lblkno = 0;
 		bp->b_blkno = bn + (((smallest+bufoff)&(bsize-1))>>DEV_BSHIFT);
 		bp->b_data = databuf + bufoff;
-		bp->b_vp = vp;
-		bp->b_flags = B_WRITE | B_BUSY;
-		bp->b_iodone = uvm_aio_biodone;
+		bp->b_flags = B_WRITE;
+		bp->b_cflags |= BC_BUSY;
+
 		if (async) {
-			bp->b_flags |= B_CALL | B_ASYNC;
+			bp->b_flags |= B_ASYNC;
 			bp->b_iodone = uvm_aio_biodone;
 		}
 
diff --git a/sys/rump/librump/rumpkern/intr.c b/sys/rump/librump/rumpkern/intr.c
new file mode 100644
index 000000000000..45a8f73eb880
--- /dev/null
+++ b/sys/rump/librump/rumpkern/intr.c
@@ -0,0 +1,87 @@
+/*	$NetBSD: intr.c,v 1.2 2008/01/02 11:49:06 ad Exp $	*/
+
+/*-
+ * Copyright (c) 2007 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Andrew Doran.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the NetBSD
+ *	Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/intr.h>
+#include <sys/kmem.h>
+#include <sys/cpu.h>
+
+#include "rump.h"
+#include "rumpuser.h"
+
+struct v_dodgy {
+	void	(*func)(void *);
+	void	*arg;
+};
+
+void *
+softint_establish(u_int flags, void (*func)(void *), void *arg)
+{
+	struct v_dodgy *vd;
+
+	vd = kmem_alloc(sizeof(*vd), KM_SLEEP);
+	if (vd != NULL) {
+		vd->func = func;
+		vd->arg = arg;
+	}
+	return vd;
+}
+
+void
+softint_disestablish(void *arg)
+{
+
+	kmem_free(arg, sizeof(struct v_dodgy));
+}
+
+void
+softint_schedule(void *arg)
+{
+	struct v_dodgy *vd;
+
+	vd = arg;
+	(*(vd->func))(arg);
+}
+
+bool
+cpu_intr_p(void)
+{
+
+	return false;
+}
diff --git a/sys/rump/librump/rumpkern/locks.c b/sys/rump/librump/rumpkern/locks.c
index 21559aa6f25a..00819d826553 100644
--- a/sys/rump/librump/rumpkern/locks.c
+++ b/sys/rump/librump/rumpkern/locks.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: locks.c,v 1.6 2008/01/01 22:03:24 pooka Exp $	*/
+/*	$NetBSD: locks.c,v 1.7 2008/01/02 11:49:06 ad Exp $	*/
 
 /*
  * Copyright (c) 2007 Antti Kantee.  All Rights Reserved.
@@ -185,6 +185,14 @@ cv_wait_sig(kcondvar_t *cv, kmutex_t *mtx)
 	return 0;
 }
 
+int
+cv_wait_sig(kcondvar_t *cv, kmutex_t *mtx)
+{
+
+	rumpuser_cv_wait(RUMPCV(cv), mtx->kmtx_mtx);
+	return 0;
+}
+
 int
 cv_timedwait(kcondvar_t *cv, kmutex_t *mtx, int ticks)
 {
@@ -203,6 +211,15 @@ cv_timedwait_sig(kcondvar_t *cv, kmutex_t *mtx, int ticks)
 	return rumpuser_cv_timedwait(RUMPCV(cv), mtx->kmtx_mtx, ticks);
 }
 
+int
+cv_timedwait_sig(kcondvar_t *cv, kmutex_t *mtx, int ticks)
+{
+	extern int hz;
+
+	KASSERT(hz == 100);
+	return rumpuser_cv_timedwait(RUMPCV(cv), mtx->kmtx_mtx, ticks);
+}
+
 void
 cv_signal(kcondvar_t *cv)
 {
diff --git a/sys/rump/librump/rumpkern/ltsleep.c b/sys/rump/librump/rumpkern/ltsleep.c
index 019a1a8629a0..19263f2c82cc 100644
--- a/sys/rump/librump/rumpkern/ltsleep.c
+++ b/sys/rump/librump/rumpkern/ltsleep.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ltsleep.c,v 1.3 2007/11/07 18:59:18 pooka Exp $	*/
+/*	$NetBSD: ltsleep.c,v 1.4 2008/01/02 11:49:06 ad Exp $	*/
 
 /*
  * Copyright (c) 2007 Antti Kantee.  All Rights Reserved.
@@ -83,6 +83,44 @@ ltsleep(wchan_t ident, pri_t prio, const char *wmesg, int timo,
 	return 0;
 }
 
+int
+mtsleep(wchan_t ident, pri_t prio, const char *wmesg, int timo,
+	kmutex_t *lock)
+{
+	struct ltsleeper lts;
+	int iplrecurse;
+
+	lts.id = ident;
+	cv_init(&lts.cv, NULL);
+
+	mutex_enter(&sleepermtx);
+	LIST_INSERT_HEAD(&sleepers, &lts, entries);
+
+	/* release spl */
+	iplrecurse = rumpuser_whatis_ipl();
+	while (iplrecurse--)
+		rumpuser_rw_exit(&rumpspl);
+
+	/* protected by sleepermtx */
+	mutex_exit(lock);
+	cv_wait(&lts.cv, &sleepermtx);
+
+	/* retake ipl */
+	iplrecurse = rumpuser_whatis_ipl();
+	while (iplrecurse--)
+		rumpuser_rw_enter(&rumpspl, 0);
+
+	LIST_REMOVE(&lts, entries);
+	mutex_exit(&sleepermtx);
+
+	cv_destroy(&lts.cv);
+
+	if ((prio & PNORELOCK) == 0)
+		mutex_enter(lock);
+
+	return 0;
+}
+
 void
 wakeup(wchan_t ident)
 {
diff --git a/sys/rump/librump/rumpkern/vfs.c b/sys/rump/librump/rumpkern/vfs.c
index b646623b6db8..536021a9a461 100644
--- a/sys/rump/librump/rumpkern/vfs.c
+++ b/sys/rump/librump/rumpkern/vfs.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs.c,v 1.21 2007/11/26 19:02:24 pooka Exp $	*/
+/*	$NetBSD: vfs.c,v 1.22 2008/01/02 11:49:06 ad Exp $	*/
 
 /*
  * Copyright (c) 2007 Antti Kantee.  All Rights Reserved.
@@ -70,6 +70,13 @@ const struct vnodeopv_entry_desc fifo_vnodeop_entries[] = {
 const struct vnodeopv_desc fifo_vnodeop_opv_desc =
 	{ &fifo_vnodeop_p, fifo_vnodeop_entries };
 
+struct vnode *speclisth[SPECHSZ];
+
+void
+vn_init1(void)
+{
+
+}
 
 int
 getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
@@ -145,11 +152,24 @@ vrele(struct vnode *vp)
 }
 
 void
-vrele2(struct vnode *vp, int onhead)
+vrelel(struct vnode *vp, int doinactive, int onhead)
 {
 
 }
 
+void
+vrele2(struct vnode *vp, bool onhead)
+{
+
+}
+
+void
+vfree(vnode_t *vp)
+{
+
+	/* XXX */
+}
+
 void
 vput(struct vnode *vp)
 {
@@ -164,6 +184,13 @@ vgone(struct vnode *vp)
 	vgonel(vp, curlwp);
 }
 
+void
+vclean(struct vnode *vp, int flag)
+{
+
+	vgonel(vp, curlwp);
+}
+
 void
 vgonel(struct vnode *vp, struct lwp *l)
 {
@@ -183,18 +210,19 @@ holdrelel(struct vnode *vp)
 }
 
 int
-vrecycle(struct vnode *vp, struct simplelock *inter_lkp, struct lwp *l)
+vrecycle(struct vnode *vp, kmutex_t *inter_lkp, struct lwp *l)
 {
 	struct mount *mp = vp->v_mount;
+	bool recycle;
 
 	if (vp->v_usecount == 1) {
 		vp->v_usecount = 0;
 		simple_lock(&vp->v_interlock);
 		if (inter_lkp)
-			simple_unlock(inter_lkp);
+			mutex_exit(inter_lkp);
 		VOP_LOCK(vp, LK_EXCLUSIVE | LK_INTERLOCK);
 		vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0);
-		VOP_INACTIVE(vp);
+		VOP_INACTIVE(vp, &recycle);
 
 		VOP_RECLAIM(vp);
 		TAILQ_REMOVE(&mp->mnt_vnodelist, vp, v_mntvnodes);
diff --git a/sys/rump/librump/rumpkern/vm.c b/sys/rump/librump/rumpkern/vm.c
index 4921b9961dd0..28746e80f0a3 100644
--- a/sys/rump/librump/rumpkern/vm.c
+++ b/sys/rump/librump/rumpkern/vm.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: vm.c,v 1.24 2007/12/01 10:45:42 yamt Exp $	*/
+/*	$NetBSD: vm.c,v 1.25 2008/01/02 11:49:06 ad Exp $	*/
 
 /*
  * Copyright (c) 2007 Antti Kantee.  All Rights Reserved.
@@ -79,6 +79,8 @@ const struct uvm_pagerops aobj_pager = {
 	.pgo_put = ao_put,
 };
 
+kmutex_t uvm_pageqlock;
+
 struct uvmexp uvmexp;
 struct uvm uvm;
 
@@ -423,6 +425,7 @@ rumpvm_init()
 
 	mutex_init(&rvamtx, MUTEX_DEFAULT, 0);
 	mutex_init(&uwinmtx, MUTEX_DEFAULT, 0);
+	mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0);
 }
 
 void
@@ -531,7 +534,7 @@ void
 uvm_aio_aiodone(struct buf *bp)
 {
 
-	if ((bp->b_flags & (B_READ | B_NOCACHE)) == 0 && bioopsp)
+	if (((bp->b_flags | bp->b_cflags) & (B_READ | BC_NOCACHE)) == 0 && bioopsp)
 		bioopsp->io_pageiodone(bp);
 }
 
@@ -668,3 +671,27 @@ uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr,
 
 	return (struct vm_map *)417416;
 }
+
+void
+uvm_pageout_start(int npages)
+{
+
+	uvmexp.paging += npages;
+}
+
+void
+uvm_pageout_done(int npages)
+{
+
+	uvmexp.paging -= npages;
+
+	/*
+	 * wake up either of pagedaemon or LWPs waiting for it.
+	 */
+
+	if (uvmexp.free <= uvmexp.reserve_kernel) {
+		wakeup(&uvm.pagedaemon);
+	} else {
+		wakeup(&uvmexp.free);
+	}
+}
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
index 7de2890da56b..2a5aab671ed5 100644
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -1,12 +1,12 @@
-/*     $NetBSD: buf.h,v 1.101 2007/12/24 15:11:19 ad Exp $ */
+/*     $NetBSD: buf.h,v 1.102 2008/01/02 11:49:07 ad Exp $ */
 
 /*-
- * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
+ * Copyright (c) 1999, 2000, 2007 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
- * NASA Ames Research Center.
+ * NASA Ames Research Center, and by Andrew Doran.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -78,7 +78,8 @@
 
 #include <sys/pool.h>
 #include <sys/queue.h>
-#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/condvar.h>
 #if defined(_KERNEL)
 #include <sys/workqueue.h>
 #endif /* defined(_KERNEL) */
@@ -95,7 +96,6 @@ struct kauth_cred;
  */
 LIST_HEAD(workhead, worklist);
 
-
 /*
  * These are currently used only by the soft dependency code, hence
  * are stored once in a global variable. If other subsystems wanted
@@ -113,40 +113,48 @@ struct bio_ops {
 	void	(*io_pageiodone)(struct buf *);
 };
 
+extern kmutex_t bufcache_lock;
+extern kmutex_t buffer_lock;
+
 /*
  * The buffer header describes an I/O operation in the kernel.
+ *
+ * Field markings and the corresponding locks:
+ *
+ * b	owner (thread that holds BB_BUSY) and/or thread calling biodone()
+ * c	bufcache_lock
+ * l	b_objlock
+ *
+ * For buffers associated with a vnode, b_objlock points to vp->v_interlock.
+ * If not associated with a vnode, it points to the generic buffer_lock.
  */
 struct buf {
 	union {
-		TAILQ_ENTRY(buf) u_actq; /* Device driver queue when active. */
+		TAILQ_ENTRY(buf) u_actq;
 #if defined(_KERNEL) /* u_work is smaller than u_actq. XXX */
 		struct work u_work;
 #endif /* defined(_KERNEL) */
-	} b_u;
+	} b_u;					/* b: device driver queue */
 #define	b_actq	b_u.u_actq
 #define	b_work	b_u.u_work
-	struct simplelock b_interlock;	/* Lock for b_flags changes */
-	volatile int b_flags;		/* B_* flags. */
-	int	b_error;		/* Errno value. */
-	int	b_prio;			/* Hint for buffer queue discipline. */
-	int	b_bufsize;		/* Allocated buffer size. */
-	int	b_bcount;		/* Valid bytes in buffer. */
-	int	b_resid;		/* Remaining I/O. */
-	dev_t	b_dev;			/* Device associated with buffer. */
-	void	*b_data;		/* Memory, superblocks, indirect etc. */
-	daddr_t	b_blkno;		/* Underlying physical block number
-					   (partition relative) */
-	daddr_t	b_rawblkno;		/* Raw underlying physical block
-					   number (not partition relative) */
-					/* Function to call upon completion. */
-	void	(*b_iodone)(struct buf *);
-	struct  proc *b_proc;		/* Associated proc if B_PHYS set. */
-	struct	vnode *b_vp;		/* File vnode. */
-	struct  workhead b_dep;		/* List of filesystem dependencies. */
-	void	*b_saveaddr;		/* Original b_addr for physio. */
+	void			(*b_iodone)(struct buf *);/* b: call when done */
+	int			b_error;	/* b: errno value. */
+	int			b_resid;	/* b: remaining I/O. */
+	u_int			b_flags;	/* b: B_* flags */
+	int			b_prio;		/* b: priority for queue */
+	int			b_bufsize;	/* b: allocated size */
+	int			b_bcount;	/* b: valid bytes in buffer */
+	dev_t			b_dev;		/* b: associated device */
+	void			*b_data;	/* b: fs private data */
+	daddr_t			b_blkno;	/* b: physical block number
+						      (partition relative) */
+	daddr_t			b_rawblkno;	/* b: raw physical block number
+						      (volume relative) */
+	struct proc		*b_proc;	/* b: proc if BB_PHYS */
+	void			*b_saveaddr;	/* b: saved b_data for physio */
 
 	/*
-	 * private data for owner.
+	 * b: private data for owner.
 	 *  - buffer cache buffers are owned by corresponding filesystem.
 	 *  - non-buffer cache buffers are owned by subsystem which
 	 *    allocated them. (filesystem, disk driver, etc)
@@ -154,24 +162,21 @@ struct buf {
 	void	*b_private;
 	off_t	b_dcookie;		/* NFS: Offset cookie if dir block */
 
-	/*
-	 * buffer cache specific data
-	 */
-	LIST_ENTRY(buf) b_hash;		/* Hash chain. */
-	LIST_ENTRY(buf) b_vnbufs;	/* Buffer's associated vnode. */
-	TAILQ_ENTRY(buf) b_freelist;	/* Free list position if not active. */
-	daddr_t	b_lblkno;		/* Logical block number. */
-	int b_freelistindex;		/* Free list index. (BQ_) */
-};
+	kcondvar_t		b_busy;		/* c: threads waiting on buf */
+	u_int			b_refcnt;	/* c: refcount for b_busy */
+	struct workhead		b_dep;		/* c: softdep */
+	LIST_ENTRY(buf)		b_hash;		/* c: hash chain */
+	LIST_ENTRY(buf)		b_vnbufs;	/* c: associated vnode */
+	TAILQ_ENTRY(buf)	b_freelist;	/* c: position if not active */
+	daddr_t			b_lblkno;	/* c: logical block number */
+	int			b_freelistindex;/* c: free list index (BQ_) */
+	u_int			b_cflags;	/* c: BC_* flags */
+	struct vnode		*b_vp;		/* c: file vnode */
 
-#define	BUF_INIT(bp)							\
-do {									\
-	LIST_INIT(&(bp)->b_dep);					\
-	simple_lock_init(&(bp)->b_interlock);				\
-	(bp)->b_dev = NODEV;						\
-	(bp)->b_error = 0;						\
-	BIO_SETPRIO((bp), BPRIO_DEFAULT);				\
-} while (/*CONSTCOND*/0)
+	kcondvar_t		b_done;		/* o: waiting on completion */
+	u_int			b_oflags;	/* o: BO_* flags */
+	kmutex_t		*b_objlock;	/* o: completion lock */
+};
 
 /*
  * For portability with historic industry practice, the cylinder number has
@@ -180,43 +185,39 @@ do {									\
 #define	b_cylinder b_resid		/* Cylinder number for disksort(). */
 
 /*
- * These flags are kept in b_flags.
+ * These flags are kept in b_cflags (owned by buffer cache).
  */
-#define	B_AGE		0x00000001	/* Move to age queue when I/O done. */
+#define	BC_AGE		0x00000001	/* Move to age queue when I/O done. */
+#define	BC_BUSY		0x00000010	/* I/O in progress. */
+#define BC_SCANNED	0x00000020	/* Block already pushed during sync */
+#define	BC_INVAL	0x00002000	/* Does not contain valid info. */
+#define	BC_LOCKED	0x00004000	/* Locked in core (not reusable). */
+#define	BC_NOCACHE	0x00008000	/* Do not cache block after use. */
+#define	BC_WANTED	0x00800000	/* Process wants this buffer. */
+#define	BC_VFLUSH	0x04000000	/* Buffer is being synced. */
+
+/*
+ * These flags are kept in b_oflags (owned by associated object).
+ */
+#define	BO_DELWRI	0x00000080	/* Delay I/O until buffer reused. */
+#define	BO_DONE		0x00000200	/* I/O completed. */
+#define	BO_COWDONE	0x00000400	/* Copy-on-write already done. */
+
+/*
+ * These flags are kept in b_flags (owned by buffer holder).
+ */
+#define	B_WRITE		0x00000000	/* Write buffer (pseudo flag). */
 #define	B_ASYNC		0x00000004	/* Start I/O, do not wait. */
-#define	B_BAD		0x00000008	/* Bad block revectoring in progress. */
-#define	B_BUSY		0x00000010	/* I/O in progress. */
-#define B_SCANNED	0x00000020	/* Block already pushed during sync */
-#define	B_CALL		0x00000040	/* Call b_iodone from biodone. */
-#define	B_DELWRI	0x00000080	/* Delay I/O until buffer reused. */
-#define	B_DIRTY		0x00000100	/* Dirty page to be pushed out async. */
-#define	B_DONE		0x00000200	/* I/O completed. */
-#define	B_COWDONE	0x00000400	/* Copy-on-write already done. */
 #define	B_GATHERED	0x00001000	/* LFS: already in a segment. */
-#define	B_INVAL		0x00002000	/* Does not contain valid info. */
-#define	B_LOCKED	0x00004000	/* Locked in core (not reusable). */
-#define	B_NOCACHE	0x00008000	/* Do not cache block after use. */
-#define	B_CACHE		0x00020000	/* Bread found us in the cache. */
 #define	B_PHYS		0x00040000	/* I/O to user memory. */
 #define	B_RAW		0x00080000	/* Set by physio for raw transfers. */
 #define	B_READ		0x00100000	/* Read buffer. */
-#define	B_TAPE		0x00200000	/* Magnetic tape I/O. */
-#define	B_WANTED	0x00800000	/* Process wants this buffer. */
-#define	B_WRITE		0x00000000	/* Write buffer (pseudo flag). */
-#define	B_FSPRIVATE	0x01000000	/* File system private flag. */
 #define	B_DEVPRIVATE	0x02000000	/* Device driver private flag. */
-#define	B_VFLUSH	0x04000000	/* Buffer is being synced. */
 
 #define BUF_FLAGBITS \
-    "\20\1AGE\3ASYNC\4BAD\5BUSY\6SCANNED\7CALL\10DELWRI" \
-    "\11DIRTY\12DONE\13COWDONE\15GATHERED\16INVAL\17LOCKED\20NOCACHE" \
-    "\22CACHE\23PHYS\24RAW\25READ\26TAPE\30WANTED\31FSPRIVATE\32DEVPRIVATE" \
-    "\33VFLUSH"
-
-/* XXX Compat for vmlocking branch. */
-#define	BC_AGE		B_AGE
-#define	BC_INVAL	B_INVAL
-#define	BC_NOCACHE	B_NOCACHE
+    "\20\1AGE\3ASYNC\4BAD\5BUSY\6SCANNED\10DELWRI" \
+    "\12DONE\13COWDONE\15GATHERED\16INVAL\17LOCKED\20NOCACHE" \
+    "\23PHYS\24RAW\25READ\32DEVPRIVATE\33VFLUSH"
 
 /* Avoid weird code due to B_WRITE being a "pseudo flag" */
 #define BUF_ISREAD(bp)	(((bp)->b_flags & B_READ) == B_READ)
@@ -233,7 +234,7 @@ struct cluster_save {
 	long	bs_bufsize;		/* Saved b_bufsize. */
 	void	*bs_saveaddr;		/* Saved b_addr. */
 	int	bs_nchildren;		/* Number of associated buffers. */
-	struct buf **bs_children;	/* List of associated buffers. */
+	struct buf *bs_children;	/* List of associated buffers. */
 };
 
 /*
@@ -266,46 +267,50 @@ extern	struct bio_ops *bioopsp;
 extern	u_int nbuf;		/* The number of buffer headers */
 
 __BEGIN_DECLS
-void	allocbuf(struct buf *, int, int);
-void	bawrite(struct buf *);
-void	bdirty(struct buf *);
-void	bdwrite(struct buf *);
-void	biodone(struct buf *);
-int	biowait(struct buf *);
-int	bread(struct vnode *, daddr_t, int, struct kauth_cred *, struct buf **);
+int	allocbuf(buf_t *, int, int);
+void	bawrite(buf_t *);
+void	bdirty(buf_t *);
+void	bdwrite(buf_t *);
+void	biodone(buf_t *);
+int	biowait(buf_t *);
+int	bread(struct vnode *, daddr_t, int, struct kauth_cred *, buf_t **);
 int	breada(struct vnode *, daddr_t, int, daddr_t, int, struct kauth_cred *,
-	       struct buf **);
+	       buf_t **);
 int	breadn(struct vnode *, daddr_t, int, daddr_t *, int *, int,
-	       struct kauth_cred *, struct buf **);
-void	brelse(struct buf *, int);
-void	bremfree(struct buf *);
+	       struct kauth_cred *, buf_t **);
+void	brelsel(buf_t *, int);
+void	brelse(buf_t *, int);
+void	bremfree(buf_t *);
 void	bufinit(void);
-int	bwrite(struct buf *);
-struct buf *getblk(struct vnode *, daddr_t, int, int, int);
-struct buf *geteblk(int);
-struct buf *incore(struct vnode *, daddr_t);
+void	bufinit2(void);
+int	bwrite(buf_t *);
+buf_t	*getblk(struct vnode *, daddr_t, int, int, int);
+buf_t	*geteblk(int);
+buf_t	*incore(struct vnode *, daddr_t);
 
-void	minphys(struct buf *);
-int	physio(void (*)(struct buf *), struct buf *, dev_t, int,
-	       void (*)(struct buf *), struct uio *);
+void	minphys(buf_t *);
+int	physio(void (*)(buf_t *), buf_t *, dev_t, int,
+	       void (*)(buf_t *), struct uio *);
 
-void	brelvp(struct buf *);
-void	reassignbuf(struct buf *, struct vnode *);
-void	bgetvp(struct vnode *, struct buf *);
+void	brelvp(buf_t *);
+void	reassignbuf(buf_t *, struct vnode *);
+void	bgetvp(struct vnode *, buf_t *);
 int	buf_syncwait(void);
 u_long	buf_memcalc(void);
 int	buf_drain(int);
 int	buf_setvalimit(vsize_t);
 #ifdef DDB
-void	vfs_buf_print(struct buf *, int, void (*)(const char *, ...));
+void	vfs_buf_print(buf_t *, int, void (*)(const char *, ...));
 #endif
-struct buf *getiobuf(void);
-struct buf *getiobuf_nowait(void);
-void putiobuf(struct buf *);
+buf_t	*getiobuf(struct vnode *, bool);
+void	putiobuf(buf_t *);
+void	buf_init(buf_t *);
+void	buf_destroy(buf_t *);
+int	bbusy(buf_t *, bool, int);
 
-void nestiobuf_iodone(struct buf *);
-void nestiobuf_setup(struct buf *, struct buf *, int, size_t);
-void nestiobuf_done(struct buf *, int, int);
+void	nestiobuf_iodone(buf_t *);
+void	nestiobuf_setup(buf_t *, buf_t *, int, size_t);
+void	nestiobuf_done(buf_t *, int, int);
 
 __END_DECLS
 #endif /* _KERNEL */
diff --git a/sys/sys/fstrans.h b/sys/sys/fstrans.h
index bcef8c1352aa..3aa16e2b5551 100644
--- a/sys/sys/fstrans.h
+++ b/sys/sys/fstrans.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: fstrans.h,v 1.7 2007/12/02 13:56:19 hannken Exp $	*/
+/*	$NetBSD: fstrans.h,v 1.8 2008/01/02 11:49:07 ad Exp $	*/
 
 /*-
  * Copyright (c) 2007 The NetBSD Foundation, Inc.
@@ -72,6 +72,8 @@ do {									\
 int	_fstrans_start(struct mount *, enum fstrans_lock_type, int);
 void	fstrans_done(struct mount *);
 int	fstrans_is_owner(struct mount *);
+int	fstrans_mount(struct mount *);
+void	fstrans_unmount(struct mount *);
 
 int	fstrans_setstate(struct mount *, enum fstrans_state);
 enum fstrans_state fstrans_getstate(struct mount *);
diff --git a/sys/sys/lock.h b/sys/sys/lock.h
index 36e7bbd6473b..4d85721ca9d8 100644
--- a/sys/sys/lock.h
+++ b/sys/sys/lock.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: lock.h,v 1.76 2007/12/06 17:05:07 ad Exp $	*/
+/*	$NetBSD: lock.h,v 1.77 2008/01/02 11:49:07 ad Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2006, 2007 The NetBSD Foundation, Inc.
@@ -82,6 +82,7 @@
 
 #include <sys/stdint.h>
 #include <sys/queue.h>
+#include <sys/mutex.h>
 #include <sys/simplelock.h>
 
 #include <machine/lock.h>
@@ -90,9 +91,9 @@
  * The general lock structure.
  */
 struct lock {
-	struct	simplelock lk_interlock;/* lock on remaining fields */
 	u_int	lk_flags;		/* see below */
 	int	lk_sharecount;		/* # of accepted shared locks */
+	kmutex_t lk_interlock;		/* lock on structure */
 	short	lk_exclusivecount;	/* # of recursive exclusive locks */
 	short	lk_recurselevel;	/* lvl above which recursion ok */
 	int	lk_waitcount;		/* # of sleepers */
@@ -147,7 +148,6 @@ struct lock {
 #define	LK_SLEEPFAIL	0x00000020	/* sleep, then return failure */
 #define	LK_CANRECURSE	0x00000040	/* this may be recursive lock attempt */
 #define	LK_REENABLE	0x00000080	/* lock is be reenabled after drain */
-#define	LK_SETRECURSE	0x00100000	/* other locks while we have it OK */
 #define	LK_RECURSEFAIL  0x00200000	/* attempt at recursive lock fails */
 #define	LK_RESURRECT	0x00800000	/* immediately reenable drained lock */
 /*
@@ -160,6 +160,7 @@ struct lock {
 #define	LK_WAITDRAIN	0x00000800	/* process waiting for lock to drain */
 #define	LK_DRAINING	0x00004000	/* lock is being drained */
 #define	LK_DRAINED	0x00008000	/* lock has been decommissioned */
+#define	LK_DODEBUG	0x00010000	/* has lockdebug bits */
 /*
  * Internal state flags corresponding to lk_sharecount, and lk_waitcount
  */
@@ -177,7 +178,6 @@ struct lock {
 #define __LK_FLAG_BITS \
 	"\20" \
 	"\22LK_RECURSEFAIL" \
-	"\21LK_SETRECURSE" \
 	"\20LK_WAIT_NOZERO" \
 	"\19LK_SHARE_NOZERO" \
 	"\18LK_RETRY" \
@@ -211,7 +211,7 @@ struct proc;
 
 void	lockinit(struct lock *, pri_t, const char *, int, int);
 void	lockdestroy(struct lock *);
-int	lockmgr(struct lock *, u_int flags, struct simplelock *);
+int	lockmgr(struct lock *, u_int flags, kmutex_t *);
 void	transferlockers(struct lock *, struct lock *);
 int	lockstatus(struct lock *);
 void	lockmgr_printinfo(struct lock *);
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
index d175bdc345dc..b8a17691a843 100644
--- a/sys/sys/mount.h
+++ b/sys/sys/mount.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: mount.h,v 1.168 2007/12/24 14:58:38 ad Exp $	*/
+/*	$NetBSD: mount.h,v 1.169 2008/01/02 11:49:07 ad Exp $	*/
 
 /*
  * Copyright (c) 1989, 1991, 1993
@@ -40,13 +40,16 @@
 #include <sys/stat.h>
 #endif /* _NETBSD_SOURCE */
 #endif
+
+#ifndef _STANDALONE
 #include <sys/ucred.h>
 #include <sys/fstypes.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/statvfs.h>
+#include <sys/vnode.h>
 #include <sys/specificdata.h>
-#include <sys/mutex.h>
+#endif	/* !_STANDALONE */
 
 /*
  * file system statistics
@@ -89,13 +92,13 @@
 #define MOUNT_EFS	"efs"		/* SGI's Extent Filesystem */
 #define MOUNT_ZFS	"zfs"		/* Sun ZFS */
 
+#ifndef _STANDALONE
+
 /*
  * Structure per mounted file system.  Each mounted file system has an
  * array of operations and an instance record.  The file systems are
  * put on a doubly linked list.
  */
-TAILQ_HEAD(vnodelst, vnode);
-
 struct mount {
 	CIRCLEQ_ENTRY(mount) mnt_list;		/* mount list */
 	struct vfsops	*mnt_op;		/* operations on fs */
@@ -111,7 +114,8 @@ struct mount {
 	void		*mnt_data;		/* private data */
 	int		mnt_wcnt;		/* count of vfs_busy waiters */
 	struct lwp	*mnt_unmounter;		/* who is unmounting */
-	struct simplelock mnt_slock;		/* mutex for wcnt */
+	kmutex_t	mnt_mutex;		/* mutex for wcnt */
+	void		*mnt_transinfo;		/* for FS-internal use */
 	specificdata_reference
 			mnt_specdataref;	/* subsystem specific data */
 };
@@ -133,7 +137,6 @@ struct mount {
 #define	VFS_MAGICLINKS  4		/* expand 'magic' symlinks */
 #define	VFSGEN_MAXID	5		/* number of valid vfs.generic ids */
 
-#ifndef _STANDALONE
 /*
  * USE THE SAME NAMES AS MOUNT_*!
  *
@@ -334,9 +337,9 @@ int	vfs_stdextattrctl(struct mount *, int, struct vnode *,
 extern	CIRCLEQ_HEAD(mntlist, mount) mountlist;	/* mounted filesystem list */
 extern	struct vfsops *vfssw[];			/* filesystem type table */
 extern	int nvfssw;
-extern	kmutex_t mountlist_lock;
-extern	struct simplelock spechash_slock;
-extern  kmutex_t vfs_list_lock;
+extern  kmutex_t mountlist_lock;
+extern	kmutex_t spechash_lock;
+extern	kmutex_t vfs_list_lock;
 
 long	makefstype(const char *);
 int	dounmount(struct mount *, int, struct lwp *);
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index bf2bc686dc1f..4b7cde525dba 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: proc.h,v 1.267 2007/12/31 15:32:14 ad Exp $	*/
+/*	$NetBSD: proc.h,v 1.268 2008/01/02 11:49:07 ad Exp $	*/
 
 /*-
  * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
@@ -203,19 +203,19 @@ struct emul {
  * Field markings and the corresponding locks (not yet fully implemented,
  * more a statement of intent):
  *
+ * a:	p_auxlock
  * k:	ktrace_mutex
  * m:	proclist_mutex
  * l:	proclist_lock
  * s:	p_smutex
  * t:	p_stmutex
  * p:	p_mutex
- * r:	p_raslock
  * (:	unlocked, stable
  */
 struct proc {
 	LIST_ENTRY(proc) p_list;	/* l, m: List of all processes */
 
-	kmutex_t	p_raslock;	/* :: RAS modification lock */
+	kmutex_t	p_auxlock;	/* :: secondary, longer term lock */
 	kmutex_t	p_mutex;	/* :: general mutex */
 	kmutex_t	p_smutex;	/* :: mutex on scheduling state */
 	kmutex_t	p_stmutex;	/* :: mutex on profiling state */
@@ -254,7 +254,7 @@ struct proc {
 	LIST_ENTRY(proc) p_sibling;	/* l: List of sibling processes. */
 	LIST_HEAD(, proc) p_children;	/* l: List of children. */
 	LIST_HEAD(, lwp) p_lwps;	/* s: List of LWPs. */
-	struct ras	*p_raslist;	/* r: List of RAS entries */
+	struct ras	*p_raslist;	/* a: List of RAS entries */
 
 /* The following fields are all zeroed upon creation in fork. */
 #define	p_startzero	p_nlwps
diff --git a/sys/sys/shm.h b/sys/sys/shm.h
index 4f967d73ae94..3c6d1f189b81 100644
--- a/sys/sys/shm.h
+++ b/sys/sys/shm.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: shm.h,v 1.42 2006/11/25 21:40:06 christos Exp $	*/
+/*	$NetBSD: shm.h,v 1.43 2008/01/02 11:49:07 ad Exp $	*/
 
 /*-
  * Copyright (c) 1999 The NetBSD Foundation, Inc.
@@ -171,8 +171,8 @@ extern struct shminfo shminfo;
 extern struct shmid_ds *shmsegs;
 extern int shm_nused;
 
-#define	SHMSEG_FREE     	0x0200
-#define	SHMSEG_REMOVED  	0x0400
+#define	SHMSEG_FREE		0x0200
+#define	SHMSEG_REMOVED		0x0400
 #define	SHMSEG_ALLOCATED	0x0800
 #define	SHMSEG_WANTED		0x1000
 #define	SHMSEG_RMLINGER		0x2000
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index 4a37b5a37f7c..532065400dfd 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: vnode.h,v 1.179 2007/12/25 18:33:49 perry Exp $	*/
+/*	$NetBSD: vnode.h,v 1.180 2008/01/02 11:49:07 ad Exp $	*/
 
 /*
  * Copyright (c) 1989, 1993
@@ -37,6 +37,7 @@
 #include <sys/event.h>
 #include <sys/lock.h>
 #include <sys/queue.h>
+#include <sys/condvar.h>
 
 /* XXX: clean up includes later */
 #include <uvm/uvm_param.h>	/* XXX */
@@ -86,57 +87,77 @@ enum vtagtype	{
     "VT_FILECORE", "VT_NTFS", "VT_VFS", "VT_OVERLAY", "VT_SMBFS", "VT_PTYFS", \
     "VT_TMPFS", "VT_UDF", "VT_SYSVBFS", "VT_PUFFS", "VT_HFS", "VT_EFS", "VT_ZFS"
 
+struct vnode;
+struct buf;
+
 LIST_HEAD(buflists, buf);
+TAILQ_HEAD(vnodelst, vnode);
 
 /*
  * Reading or writing any of these items requires holding the appropriate
- * lock. [XXX documented on the vmlocking branch.]
+ * lock.  Field markings and the corresponding locks:
+ *
+ *	:	stable, reference to the vnode is is required
+ *	f	vnode_free_list_lock, or vrele_lock if VI_INACTPEND
+ *	i	v_interlock
+ *	m	mntvnode_lock
+ *	n	namecache_lock
+ *	s	syncer_data_lock
+ *	u	locked by underlying filesystem
+ *	v	v_vnlock
+ *	x	v_interlock + bufcache_lock to modify, either to inspect
  *
  * Each underlying filesystem allocates its own private area and hangs
  * it from v_data.
  */
 struct vnode {
-	struct uvm_object v_uobj;		/* the VM object */
-#define	v_usecount	v_uobj.uo_refs
-#define	v_interlock	v_uobj.vmobjlock
-	voff_t		v_size;			/* size of file */
-	voff_t		v_writesize;		/* new size after write */
+	struct uvm_object v_uobj;		/* i: the VM object */
+	kcondvar_t	v_cv;			/* i: synchronization */
+	int		v_waitcnt;		/* i: # waiters for VXLOCK */
+	voff_t		v_size;			/* i: size of file */
+	voff_t		v_writesize;		/* i: new size after write */
 	int		v_iflag;		/* i: VI_* flags */
 	int		v_vflag;		/* v: VV_* flags */
 	int		v_uflag;		/* u: VU_* flags */
-	int		v_numoutput;		/* number of pending writes */
-	long		v_writecount;		/* reference count of writers */
-	long		v_holdcnt;		/* page & buffer references */
-	struct mount	*v_mount;		/* ptr to vfs we are in */
-	int		(**v_op)(void *);	/* vnode operations vector */
-	TAILQ_ENTRY(vnode) v_freelist;		/* vnode freelist */
-	TAILQ_ENTRY(vnode) v_mntvnodes;		/* vnodes for mount point */
-	struct buflists	v_cleanblkhd;		/* clean blocklist head */
-	struct buflists	v_dirtyblkhd;		/* dirty blocklist head */
-	int		v_synclist_slot;	/* synclist slot index */
-	TAILQ_ENTRY(vnode) v_synclist;		/* vnodes with dirty buffers */
-	LIST_HEAD(, namecache) v_dnclist;	/* namecaches for children */
-	LIST_HEAD(, namecache) v_nclist;	/* namecaches for our parent */
+	int		v_numoutput;		/* i: # of pending writes */
+	long		v_writecount;		/* i: ref count of writers */
+	long		v_holdcnt;		/* i: page & buffer refs */
+	struct mount	*v_mount;		/* v: ptr to vfs we are in */
+	int		(**v_op)(void *);	/* :: vnode operations vector */
+	TAILQ_ENTRY(vnode) v_freelist;		/* f: vnode freelist */
+	struct vnodelst	*v_freelisthd;		/* f: which freelist? */
+	TAILQ_ENTRY(vnode) v_mntvnodes;		/* m: vnodes for mount point */
+	struct buflists	v_cleanblkhd;		/* x: clean blocklist head */
+	struct buflists	v_dirtyblkhd;		/* x: dirty blocklist head */
+	int		v_synclist_slot;	/* s: synclist slot index */
+	TAILQ_ENTRY(vnode) v_synclist;		/* s: vnodes with dirty bufs */
+	LIST_HEAD(, namecache) v_dnclist;	/* n: namecaches (children) */
+	LIST_HEAD(, namecache) v_nclist;	/* n: namecaches (parent) */
 	union {
-		struct mount	*vu_mountedhere;/* ptr to mounted vfs (VDIR) */
-		struct socket	*vu_socket;	/* unix ipc (VSOCK) */
-		struct specinfo	*vu_specinfo;	/* device (VCHR, VBLK) */
-		struct fifoinfo	*vu_fifoinfo;	/* fifo (VFIFO) */
-		struct uvm_ractx *vu_ractx;	/* read-ahead context (VREG) */
+		struct mount	*vu_mountedhere;/* v: ptr to vfs (VDIR) */
+		struct socket	*vu_socket;	/* v: unix ipc (VSOCK) */
+		struct specinfo	*vu_specinfo;	/* v: device (VCHR, VBLK) */
+		struct fifoinfo	*vu_fifoinfo;	/* v: fifo (VFIFO) */
+		struct uvm_ractx *vu_ractx;	/* i: read-ahead ctx (VREG) */
 	} v_un;
-	enum vtype	v_type;			/* vnode type */
-	enum vtagtype	v_tag;			/* type of underlying data */
-	struct lock	v_lock;			/* lock for this vnode */
-	struct lock	*v_vnlock;		/* pointer to lock */
-	void 		*v_data;		/* private data for fs */
-	struct klist	v_klist;		/* knotes attached to vnode */
+	enum vtype	v_type;			/* :: vnode type */
+	enum vtagtype	v_tag;			/* :: type of underlying data */
+	struct lock	v_lock;			/* v: lock for this vnode */
+	struct lock	*v_vnlock;		/* v: pointer to lock */
+	void 		*v_data;		/* :: private data for fs */
+	struct klist	v_klist;		/* i: notes attached to vnode */
 };
+#define	v_usecount	v_uobj.uo_refs
+#define	v_interlock	v_uobj.vmobjlock
 #define	v_mountedhere	v_un.vu_mountedhere
 #define	v_socket	v_un.vu_socket
 #define	v_specinfo	v_un.vu_specinfo
 #define	v_fifoinfo	v_un.vu_fifoinfo
 #define	v_ractx		v_un.vu_ractx
 
+typedef struct vnodelst vnodelst_t;
+typedef struct vnode vnode_t;
+
 /*
  * All vnode locking operations should use vp->v_vnlock. For leaf filesystems
  * (such as ffs, lfs, msdosfs, etc), vp->v_vnlock = &vp->v_lock. For
@@ -178,9 +199,8 @@ struct vnode {
 #define	VI_LAYER	0x00020000	/* vnode is on a layer filesystem */
 #define	VI_MAPPED	0x00040000	/* duplicate of VV_MAPPED */
 #define	VI_CLEAN	0x00080000	/* has been reclaimed */
-#define	VI_XWANT	0x00100000	/* process is waiting for vnode */
-#define	VI_BWAIT	0x00200000	/* waiting for output to complete */
-#define	VI_FREEING	0x00400000	/* vnode is being freed */
+#define	VI_INACTPEND	0x00100000	/* inactivation is pending */
+#define	VI_INACTREDO	0x00200000	/* need to redo VOP_INACTIVE() */
 
 /*
  * The third set are locked by the underlying file system.
@@ -190,7 +210,7 @@ struct vnode {
 #define	VNODE_FLAGBITS \
     "\20\1ROOT\2SYSTEM\3ISTTY\4MAPPED\5MPSAFE\6LOCKSWORK\11TEXT\12EXECMAP" \
     "\13WRMAP\14WRMAPDIRTY\15XLOCK\16ALIASED\17ONWORKLST\20MARKER" \
-    "\22LAYER\23MAPPED\24CLEAN\25XWANT\26BWAIT\31DIROP" 
+    "\22LAYER\23MAPPED\24CLEAN\25INACTPEND\26INACTREDO\31DIROP" 
 
 #define	VSIZENOTSET	((voff_t)-1)
 
@@ -229,18 +249,6 @@ struct vattr {
 
 #ifdef _KERNEL
 
-/*
- * Use a global lock for all v_numoutput updates.
- * Define a convenience macro to increment by one.
- * Note: the only place where v_numoutput is decremented is in vwakeup().
- */
-extern struct simplelock global_v_numoutput_slock;
-#define	V_INCR_NUMOUTPUT(vp) do {			\
-	simple_lock(&global_v_numoutput_slock);		\
-	(vp)->v_numoutput++;				\
-	simple_unlock(&global_v_numoutput_slock);	\
-} while (/*CONSTCOND*/ 0)
-
 /*
  * Flags for ioflag.
  */
@@ -309,10 +317,7 @@ extern const int	vttoif_tab[];
 #define	HOLDRELE(vp)	holdrele(vp)
 #define	VHOLD(vp)	vhold(vp)
 #define	VREF(vp)	vref(vp)
-TAILQ_HEAD(freelst, vnode);
-extern struct freelst	vnode_hold_list; /* free vnodes referencing buffers */
-extern struct freelst	vnode_free_list; /* vnode free list */
-extern struct simplelock vnode_free_list_slock;
+extern kmutex_t	vnode_free_list_lock;
 
 void holdrelel(struct vnode *);
 void vholdl(struct vnode *);
@@ -330,9 +335,9 @@ static __inline void
 holdrele(struct vnode *vp)
 {
 
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	holdrelel(vp);
-	simple_unlock(&vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
 }
 
 /*
@@ -342,9 +347,16 @@ static __inline void
 vhold(struct vnode *vp)
 {
 
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	vholdl(vp);
-	simple_unlock(&vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
+}
+
+static __inline bool
+vismarker(struct vnode *vp)
+{
+
+	return (vp->v_iflag & VI_MARKER) != 0;
 }
 
 #define	NULLVP	((struct vnode *)NULL)
@@ -356,7 +368,7 @@ vhold(struct vnode *vp)
  */
 extern struct vnode	*rootvnode;	/* root (i.e. "/") vnode */
 extern int		desiredvnodes;	/* number of vnodes desired */
-extern long		numvnodes;	/* current number of vnodes */
+extern u_int		numvnodes;	/* current number of vnodes */
 extern time_t		syncdelay;	/* max time to delay syncing data */
 extern time_t		filedelay;	/* time to delay syncing files */
 extern time_t		dirdelay;	/* time to delay syncing directories */
@@ -440,7 +452,7 @@ extern struct vnodeop_desc	*vnodeop_descs[];
 /*
  * Interlock for scanning list of vnodes attached to a mountpoint
  */
-extern struct simplelock	mntvnode_slock;
+extern kmutex_t		mntvnode_lock;
 
 /*
  * Union filesystem hook for vn_readdir().
@@ -545,14 +557,22 @@ void	vflushbuf(struct vnode *, int);
 int 	vget(struct vnode *, int);
 void 	vgone(struct vnode *);
 void	vgonel(struct vnode *, struct lwp *);
-int	vinvalbuf(struct vnode *, int, kauth_cred_t, struct lwp *, int, int);
+int	vinvalbuf(struct vnode *, int, kauth_cred_t, struct lwp *, bool, int);
 void	vprint(const char *, struct vnode *);
 void 	vput(struct vnode *);
-int	vrecycle(struct vnode *, struct simplelock *, struct lwp *);
+int	vrecycle(struct vnode *, kmutex_t *, struct lwp *);
 void 	vrele(struct vnode *);
-void 	vrele2(struct vnode *, int);
-int	vtruncbuf(struct vnode *, daddr_t, int, int);
+void	vrele2(struct vnode *, bool);
+int	vtruncbuf(struct vnode *, daddr_t, bool, int);
 void	vwakeup(struct buf *);
+void	vwait(struct vnode *, int);
+void	vclean(struct vnode *, int);
+void	vrelel(struct vnode *, int, int);
+struct	vnode *valloc(struct mount *);
+void	vfree(struct vnode *);
+void	vmark(struct vnode *, struct vnode *);
+struct	vnode *vunmark(struct vnode *);
+void	vn_init1(void);
 
 /* see vnsubr(9) */
 int	vn_bwrite(void *);
diff --git a/sys/ufs/ext2fs/ext2fs_bmap.c b/sys/ufs/ext2fs/ext2fs_bmap.c
index 258b727ec85a..ae77d7ad7ac8 100644
--- a/sys/ufs/ext2fs/ext2fs_bmap.c
+++ b/sys/ufs/ext2fs/ext2fs_bmap.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ext2fs_bmap.c,v 1.22 2007/10/08 18:01:27 ad Exp $	*/
+/*	$NetBSD: ext2fs_bmap.c,v 1.23 2008/01/02 11:49:08 ad Exp $	*/
 
 /*
  * Copyright (c) 1989, 1991, 1993
@@ -70,7 +70,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ext2fs_bmap.c,v 1.22 2007/10/08 18:01:27 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ext2fs_bmap.c,v 1.23 2008/01/02 11:49:08 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -141,7 +141,7 @@ ext2fs_bmaparray(struct vnode *vp, daddr_t bn, daddr_t *bnp, struct indir *ap,
 		int *nump, int *runp)
 {
 	struct inode *ip;
-	struct buf *bp;
+	struct buf *bp, *cbp;
 	struct ufsmount *ump;
 	struct mount *mp;
 	struct indir a[NIADDR+1], *xap;
@@ -208,8 +208,15 @@ ext2fs_bmaparray(struct vnode *vp, daddr_t bn, daddr_t *bnp, struct indir *ap,
 		 */
 
 		metalbn = xap->in_lbn;
-		if ((daddr == 0 && !incore(vp, metalbn)) || metalbn == bn)
+		if (metalbn == bn)
 			break;
+		if (daddr == 0) {
+			mutex_enter(&bufcache_lock);
+			cbp = incore(vp, metalbn);
+			mutex_exit(&bufcache_lock);
+			if (cbp == NULL)
+				break;
+		}
 		/*
 		 * If we get here, we've either got the block in the cache
 		 * or we have a disk address for it, go fetch it.
@@ -229,7 +236,7 @@ ext2fs_bmaparray(struct vnode *vp, daddr_t bn, daddr_t *bnp, struct indir *ap,
 
 			 return (ENOMEM);
 		}
-		if (bp->b_flags & (B_DONE | B_DELWRI)) {
+		if (bp->b_oflags & (BO_DONE | BO_DELWRI)) {
 			trace(TR_BREADHIT, pack(vp, size), metalbn);
 		}
 #ifdef DIAGNOSTIC
diff --git a/sys/ufs/ext2fs/ext2fs_inode.c b/sys/ufs/ext2fs/ext2fs_inode.c
index ac7b7649e57c..e697a08c9a64 100644
--- a/sys/ufs/ext2fs/ext2fs_inode.c
+++ b/sys/ufs/ext2fs/ext2fs_inode.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ext2fs_inode.c,v 1.62 2007/12/08 19:29:53 pooka Exp $	*/
+/*	$NetBSD: ext2fs_inode.c,v 1.63 2008/01/02 11:49:08 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
@@ -65,7 +65,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ext2fs_inode.c,v 1.62 2007/12/08 19:29:53 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ext2fs_inode.c,v 1.63 2008/01/02 11:49:08 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -141,6 +141,7 @@ ext2fs_inactive(void *v)
 {
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
+		bool *a_recycle;
 	} */ *ap = v;
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
@@ -154,24 +155,23 @@ ext2fs_inactive(void *v)
 
 	error = 0;
 	if (ip->i_e2fs_nlink == 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
+		/* Defer final inode free and update to reclaim.*/
 		if (ext2fs_size(ip) != 0) {
 			error = ext2fs_truncate(vp, (off_t)0, 0, NOCRED);
 		}
 		ip->i_e2fs_dtime = time_second;
-		ip->i_flag |= IN_CHANGE | IN_UPDATE;
-		ext2fs_vfree(vp, ip->i_number, ip->i_e2fs_mode);
-	}
-	if (ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) {
+		ip->i_flag |= IN_CHANGE | IN_UPDATE | IN_MODIFIED;
+		ip->i_omode = 1;
+	} else if (ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) {
 		ext2fs_update(vp, NULL, NULL, 0);
 	}
 out:
-	VOP_UNLOCK(vp, 0);
 	/*
 	 * If we are done with the inode, reclaim it
 	 * so that it can be reused immediately.
 	 */
-	if (ip->i_e2fs_dtime != 0)
-		vrecycle(vp, NULL, curlwp);
+	*ap->a_recycle = (ip->i_e2fs_dtime != 0);
+	VOP_UNLOCK(vp, 0);
 	return (error);
 }
 
@@ -478,7 +478,7 @@ ext2fs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, daddr_t lastbn,
 	 */
 	vp = ITOV(ip);
 	bp = getblk(vp, lbn, (int)fs->e2fs_bsize, 0, 0);
-	if (bp->b_flags & (B_DONE | B_DELWRI)) {
+	if (bp->b_oflags & (BO_DONE | BO_DELWRI)) {
 		/* Braces must be here in case trace evaluates to nothing. */
 		trace(TR_BREADHIT, pack(vp, fs->e2fs_bsize), lbn);
 	} else {
diff --git a/sys/ufs/ext2fs/ext2fs_readwrite.c b/sys/ufs/ext2fs/ext2fs_readwrite.c
index af41db29d5e7..2085d0c6252e 100644
--- a/sys/ufs/ext2fs/ext2fs_readwrite.c
+++ b/sys/ufs/ext2fs/ext2fs_readwrite.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ext2fs_readwrite.c,v 1.49 2007/12/08 19:29:53 pooka Exp $	*/
+/*	$NetBSD: ext2fs_readwrite.c,v 1.50 2008/01/02 11:49:08 ad Exp $	*/
 
 /*-
  * Copyright (c) 1993
@@ -65,7 +65,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ext2fs_readwrite.c,v 1.49 2007/12/08 19:29:53 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ext2fs_readwrite.c,v 1.50 2008/01/02 11:49:08 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -336,13 +336,13 @@ ext2fs_write(void *v)
 			 */
 
 			if (!async && oldoff >> 16 != uio->uio_offset >> 16) {
-				simple_lock(&vp->v_interlock);
+				mutex_enter(&vp->v_interlock);
 				error = VOP_PUTPAGES(vp, (oldoff >> 16) << 16,
 				    (uio->uio_offset >> 16) << 16, PGO_CLEANIT);
 			}
 		}
 		if (error == 0 && ioflag & IO_SYNC) {
-			simple_lock(&vp->v_interlock);
+			mutex_enter(&vp->v_interlock);
 			error = VOP_PUTPAGES(vp, trunc_page(oldoff),
 			    round_page(blkroundup(fs, uio->uio_offset)),
 			    PGO_CLEANIT | PGO_SYNCIO);
diff --git a/sys/ufs/ext2fs/ext2fs_vfsops.c b/sys/ufs/ext2fs/ext2fs_vfsops.c
index c8fb5df5c872..9138dd4d7cce 100644
--- a/sys/ufs/ext2fs/ext2fs_vfsops.c
+++ b/sys/ufs/ext2fs/ext2fs_vfsops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ext2fs_vfsops.c,v 1.125 2007/12/08 19:29:53 pooka Exp $	*/
+/*	$NetBSD: ext2fs_vfsops.c,v 1.126 2008/01/02 11:49:08 ad Exp $	*/
 
 /*
  * Copyright (c) 1989, 1991, 1993, 1994
@@ -65,7 +65,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ext2fs_vfsops.c,v 1.125 2007/12/08 19:29:53 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ext2fs_vfsops.c,v 1.126 2008/01/02 11:49:08 ad Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_compat_netbsd.h"
@@ -157,6 +157,7 @@ static const struct genfs_ops ext2fs_genfsops = {
 static const struct ufs_ops ext2fs_ufsops = {
 	.uo_itimes = ext2fs_itimes,
 	.uo_update = ext2fs_update,
+	.uo_vfree = ext2fs_vfree,
 };
 
 /*
@@ -465,7 +466,7 @@ int
 ext2fs_reload(struct mount *mountp, kauth_cred_t cred)
 {
 	struct lwp *l = curlwp;
-	struct vnode *vp, *nvp, *devvp;
+	struct vnode *vp, *mvp, *devvp;
 	struct inode *ip;
 	struct buf *bp;
 	struct m_ext2fs *fs;
@@ -476,6 +477,7 @@ ext2fs_reload(struct mount *mountp, kauth_cred_t cred)
 
 	if ((mountp->mnt_flag & MNT_RDONLY) == 0)
 		return (EINVAL);
+
 	/*
 	 * Step 1: invalidate all cached meta-data.
 	 */
@@ -542,30 +544,37 @@ ext2fs_reload(struct mount *mountp, kauth_cred_t cred)
 		brelse(bp, 0);
 	}
 
-loop:
+	/* Allocate a marker vnode. */
+	if ((mvp = valloc(mountp)) == NULL)
+		return (ENOMEM);
 	/*
 	 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
 	 * and vclean() can be called indirectly
 	 */
-	simple_lock(&mntvnode_slock);
-	for (vp = TAILQ_FIRST(&mountp->mnt_vnodelist); vp; vp = nvp) {
-		if (vp->v_mount != mountp) {
-			simple_unlock(&mntvnode_slock);
-			goto loop;
-		}
+	mutex_enter(&mntvnode_lock);
+loop:
+	for (vp = TAILQ_FIRST(&mountp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
+		vmark(mvp, vp);
+		if (vp->v_mount != mountp || vismarker(vp))
+			continue;
 		/*
 		 * Step 4: invalidate all inactive vnodes.
 		 */
-		if (vrecycle(vp, &mntvnode_slock, l))
+		if (vrecycle(vp, &mntvnode_lock, l)) {
+			mutex_enter(&mntvnode_lock);
+			(void)vunmark(mvp);
 			goto loop;
+		}
 		/*
 		 * Step 5: invalidate all cached file data.
 		 */
-		simple_lock(&vp->v_interlock);
-		nvp = TAILQ_NEXT(vp, v_mntvnodes);
-		simple_unlock(&mntvnode_slock);
-		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK))
+		mutex_enter(&vp->v_interlock);
+		mutex_exit(&mntvnode_lock);
+		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
+			mutex_enter(&mntvnode_lock);
+			(void)vunmark(mvp);
 			goto loop;
+		}
 		if (vinvalbuf(vp, 0, cred, l, 0, 0))
 			panic("ext2fs_reload: dirty2");
 		/*
@@ -576,17 +585,20 @@ loop:
 		    (int)fs->e2fs_bsize, NOCRED, &bp);
 		if (error) {
 			vput(vp);
-			return (error);
+			mutex_enter(&mntvnode_lock);
+			(void)vunmark(mvp);
+			break;
 		}
 		cp = (char *)bp->b_data +
 		    (ino_to_fsbo(fs, ip->i_number) * EXT2_DINODE_SIZE);
 		e2fs_iload((struct ext2fs_dinode *)cp, ip->i_din.e2fs_din);
 		brelse(bp, 0);
 		vput(vp);
-		simple_lock(&mntvnode_slock);
+		mutex_enter(&mntvnode_lock);
 	}
-	simple_unlock(&mntvnode_slock);
-	return (0);
+	mutex_exit(&mntvnode_lock);
+	vfree(mvp);
+	return (error);
 }
 
 /*
@@ -844,7 +856,7 @@ ext2fs_statvfs(struct mount *mp, struct statvfs *sbp)
 int
 ext2fs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
 {
-	struct vnode *vp, *nvp;
+	struct vnode *vp, *mvp;
 	struct inode *ip;
 	struct ufsmount *ump = VFSTOUFS(mp);
 	struct m_ext2fs *fs;
@@ -855,40 +867,45 @@ ext2fs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
 		printf("fs = %s\n", fs->e2fs_fsmnt);
 		panic("update: rofs mod");
 	}
+
+	/* Allocate a marker vnode. */
+	if ((mvp = valloc(mp)) == NULL)
+		return (ENOMEM);
+
 	/*
 	 * Write back each (modified) inode.
 	 */
-	simple_lock(&mntvnode_slock);
+	mutex_enter(&mntvnode_lock);
 loop:
 	/*
 	 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
 	 * and vclean() can be called indirectly
 	 */
-	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
-		/*
-		 * If the vnode that we are about to sync is no longer
-		 * associated with this mount point, start over.
-		 */
-		if (vp->v_mount != mp)
-			goto loop;
-		simple_lock(&vp->v_interlock);
-		nvp = TAILQ_NEXT(vp, v_mntvnodes);
+	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
+		vmark(mvp, vp);
+		if (vp->v_mount != mp || vismarker(vp))
+			continue;
+		mutex_enter(&vp->v_interlock);
 		ip = VTOI(vp);
-		if (vp->v_type == VNON ||
+		if (ip == NULL || (vp->v_iflag & (VI_XLOCK|VI_CLEAN)) != 0 ||
+		    vp->v_type == VNON ||
 		    ((ip->i_flag &
 		      (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 &&
 		     LIST_EMPTY(&vp->v_dirtyblkhd) &&
 		     UVM_OBJ_IS_CLEAN(&vp->v_uobj)))
 		{
-			simple_unlock(&vp->v_interlock);
+			mutex_exit(&vp->v_interlock);
 			continue;
 		}
-		simple_unlock(&mntvnode_slock);
+		mutex_exit(&mntvnode_lock);
 		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK);
 		if (error) {
-			simple_lock(&mntvnode_slock);
-			if (error == ENOENT)
+			mutex_enter(&mntvnode_lock);
+			if (error == ENOENT) {
+				mutex_enter(&mntvnode_lock);
+				(void)vunmark(mvp);
 				goto loop;
+			}
 			continue;
 		}
 		if (vp->v_type == VREG && waitfor == MNT_LAZY)
@@ -899,9 +916,10 @@ loop:
 		if (error)
 			allerror = error;
 		vput(vp);
-		simple_lock(&mntvnode_slock);
+		mutex_enter(&mntvnode_lock);
 	}
-	simple_unlock(&mntvnode_slock);
+	mutex_exit(&mntvnode_lock);
+	vfree(mvp);
 	/*
 	 * Force stale file system control information to be flushed.
 	 */
diff --git a/sys/ufs/ext2fs/ext2fs_vnops.c b/sys/ufs/ext2fs/ext2fs_vnops.c
index 33cc4e79129a..ae571b88c42d 100644
--- a/sys/ufs/ext2fs/ext2fs_vnops.c
+++ b/sys/ufs/ext2fs/ext2fs_vnops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ext2fs_vnops.c,v 1.77 2007/12/08 19:29:54 pooka Exp $	*/
+/*	$NetBSD: ext2fs_vnops.c,v 1.78 2008/01/02 11:49:08 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
@@ -70,7 +70,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ext2fs_vnops.c,v 1.77 2007/12/08 19:29:54 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ext2fs_vnops.c,v 1.78 2008/01/02 11:49:08 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -189,7 +189,7 @@ ext2fs_mknod(void *v)
 	 * checked to see if it is an alias of an existing entry in
 	 * the inode cache.
 	 */
-	vput(*vpp);
+	VOP_UNLOCK(*vpp, 0);
 	(*vpp)->v_type = VNON;
 	vgone(*vpp);
 	error = VFS_VGET(mp, ino, vpp);
@@ -1377,7 +1377,6 @@ ext2fs_vinit(struct mount *mntp, int (**specops)(void *),
 			vp->v_vflag &= ~VV_LOCKSWORK;
 			VOP_UNLOCK(vp, 0);
 			vp->v_op = spec_vnodeop_p;
-			vrele(vp);
 			vgone(vp);
 			lockmgr(&nvp->v_lock, LK_EXCLUSIVE, &nvp->v_interlock);
 			/*
@@ -1488,6 +1487,13 @@ ext2fs_reclaim(void *v)
 	struct inode *ip = VTOI(vp);
 	int error;
 
+	/*
+	 * The inode must be freed and updated before being removed
+	 * from its hash chain.  Other threads trying to gain a hold
+	 * on the inode will be stalled because it is locked (VI_XLOCK).
+	 */
+	if (ip->i_omode == 1 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
+		ext2fs_vfree(vp, ip->i_number, ip->i_e2fs_mode);
 	if ((error = ufs_reclaim(vp)) != 0)
 		return (error);
 	if (ip->i_din.e2fs_din != NULL)
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index 1d58cef77481..4152cb864712 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ffs_alloc.c,v 1.104 2007/11/01 06:31:59 hannken Exp $	*/
+/*	$NetBSD: ffs_alloc.c,v 1.105 2008/01/02 11:49:08 ad Exp $	*/
 
 /*
  * Copyright (c) 2002 Networks Associates Technology, Inc.
@@ -41,7 +41,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.104 2007/11/01 06:31:59 hannken Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.105 2008/01/02 11:49:08 ad Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_ffs.h"
@@ -138,14 +138,14 @@ ffs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, int size,
 		voff_t off = trunc_page(lblktosize(fs, lbn));
 		voff_t endoff = round_page(lblktosize(fs, lbn) + size);
 
-		simple_lock(&uobj->vmobjlock);
+		mutex_enter(&uobj->vmobjlock);
 		while (off < endoff) {
 			pg = uvm_pagelookup(uobj, off);
 			KASSERT(pg != NULL);
 			KASSERT(pg->owner == curproc->p_pid);
 			off += PAGE_SIZE;
 		}
-		simple_unlock(&uobj->vmobjlock);
+		mutex_exit(&uobj->vmobjlock);
 	}
 #endif
 
@@ -226,7 +226,7 @@ ffs_realloccg(struct inode *ip, daddr_t lbprev, daddr_t bpref, int osize,
 		voff_t off = trunc_page(lblktosize(fs, lbprev));
 		voff_t endoff = round_page(lblktosize(fs, lbprev) + osize);
 
-		simple_lock(&uobj->vmobjlock);
+		mutex_enter(&uobj->vmobjlock);
 		while (off < endoff) {
 			pg = uvm_pagelookup(uobj, off);
 			KASSERT(pg != NULL);
@@ -234,7 +234,7 @@ ffs_realloccg(struct inode *ip, daddr_t lbprev, daddr_t bpref, int osize,
 			KASSERT((pg->flags & PG_CLEAN) == 0);
 			off += PAGE_SIZE;
 		}
-		simple_unlock(&uobj->vmobjlock);
+		mutex_exit(&uobj->vmobjlock);
 	}
 #endif
 
@@ -295,8 +295,10 @@ ffs_realloccg(struct inode *ip, daddr_t lbprev, daddr_t bpref, int osize,
 			if (bp->b_blkno != fsbtodb(fs, bno))
 				panic("bad blockno");
 			allocbuf(bp, nsize, 1);
-			bp->b_flags |= B_DONE;
 			memset((char *)bp->b_data + osize, 0, nsize - osize);
+			mutex_enter(bp->b_objlock);
+			bp->b_oflags |= BO_DONE;
+			mutex_exit(bp->b_objlock);
 			*bpp = bp;
 		}
 		if (blknop != NULL) {
@@ -375,8 +377,10 @@ ffs_realloccg(struct inode *ip, daddr_t lbprev, daddr_t bpref, int osize,
 		if (bpp != NULL) {
 			bp->b_blkno = fsbtodb(fs, bno);
 			allocbuf(bp, nsize, 1);
-			bp->b_flags |= B_DONE;
 			memset((char *)bp->b_data + osize, 0, (u_int)nsize - osize);
+			mutex_enter(bp->b_objlock);
+			bp->b_oflags |= BO_DONE;
+			mutex_exit(bp->b_objlock);
 			*bpp = bp;
 		}
 		if (blknop != NULL) {
diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c
index 436312f0f1e5..c6636c702c70 100644
--- a/sys/ufs/ffs/ffs_balloc.c
+++ b/sys/ufs/ffs/ffs_balloc.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ffs_balloc.c,v 1.47 2007/12/08 15:21:19 ad Exp $	*/
+/*	$NetBSD: ffs_balloc.c,v 1.48 2008/01/02 11:49:09 ad Exp $	*/
 
 /*
  * Copyright (c) 2002 Networks Associates Technology, Inc.
@@ -41,7 +41,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.47 2007/12/08 15:21:19 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.48 2008/01/02 11:49:09 ad Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_quota.h"
@@ -460,13 +460,13 @@ fail:
 			}
 			bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
 			    0);
-			if (bp->b_flags & B_DELWRI) {
+			if (bp->b_oflags & BO_DELWRI) {
 				nb = fsbtodb(fs, cgtod(fs, dtog(fs,
 				    dbtofsb(fs, bp->b_blkno))));
 				bwrite(bp);
 				bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize,
 				    0, 0);
-				if (bp->b_flags & B_DELWRI) {
+				if (bp->b_oflags & BO_DELWRI) {
 					bwrite(bp);
 				} else {
 					brelse(bp, BC_INVAL);
@@ -627,8 +627,10 @@ ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
 				brelse(bp, 0);
 				return (error);
 			}
+			mutex_enter(&bp->b_interlock);
 			bp->b_blkno = fsbtodb(fs, nb);
 			bp->b_xflags |= BX_ALTDATA;
+			mutex_exit(&bp->b_interlock);
 			*bpp = bp;
 			return (0);
 		}
@@ -1026,13 +1028,13 @@ fail:
 			}
 			bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
 			    0);
-			if (bp->b_flags & B_DELWRI) {
+			if (bp->b_oflags & BO_DELWRI) {
 				nb = fsbtodb(fs, cgtod(fs, dtog(fs,
 				    dbtofsb(fs, bp->b_blkno))));
 				bwrite(bp);
 				bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize,
 				    0, 0);
-				if (bp->b_flags & B_DELWRI) {
+				if (bp->b_oflags & BO_DELWRI) {
 					bwrite(bp);
 				} else {
 					brelse(bp, BC_INVAL);
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h
index 3ce72f20ba5f..97a1e9cffc19 100644
--- a/sys/ufs/ffs/ffs_extern.h
+++ b/sys/ufs/ffs/ffs_extern.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: ffs_extern.h,v 1.61 2007/12/08 19:29:54 pooka Exp $	*/
+/*	$NetBSD: ffs_extern.h,v 1.62 2008/01/02 11:49:09 ad Exp $	*/
 
 /*-
  * Copyright (c) 1991, 1993, 1994
@@ -131,6 +131,7 @@ int	ffs_deleteextattr(void *);
 int	ffs_lock(void *);
 int	ffs_unlock(void *);
 int	ffs_islocked(void *);
+int	ffs_full_fsync(struct vnode *, int);
 
 #ifdef SYSCTL_SETUP_PROTO
 SYSCTL_SETUP_PROTO(sysctl_vfs_ffs_setup);
@@ -155,6 +156,7 @@ void	softdep_initialize(void);
 void	softdep_reinitialize(void);
 int	softdep_mount(struct vnode *, struct mount *, struct fs *,
 		      kauth_cred_t);
+void	softdep_unmount(struct mount *);
 int	softdep_flushworklist(struct mount *, int *, struct lwp *);
 int	softdep_flushfiles(struct mount *, int, struct lwp *);
 void	softdep_update_inodeblock(struct inode *, struct buf *, int);
@@ -171,7 +173,7 @@ void	softdep_setup_allocindir_page(struct inode *, daddr_t,
 				      struct buf *, int, daddr_t, daddr_t,
 				      struct buf *);
 void	softdep_fsync_mountdev(struct vnode *);
-int	softdep_sync_metadata(void *);
+int	softdep_sync_metadata(struct vnode *);
 
 extern int (**ffs_vnodeop_p)(void *);
 extern int (**ffs_specop_p)(void *);
diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c
index 9a7d9979d77a..0550b06e4a31 100644
--- a/sys/ufs/ffs/ffs_inode.c
+++ b/sys/ufs/ffs/ffs_inode.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ffs_inode.c,v 1.92 2007/12/08 19:29:54 pooka Exp $	*/
+/*	$NetBSD: ffs_inode.c,v 1.93 2008/01/02 11:49:09 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ffs_inode.c,v 1.92 2007/12/08 19:29:54 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ffs_inode.c,v 1.93 2008/01/02 11:49:09 ad Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_ffs.h"
@@ -123,9 +123,18 @@ ffs_update(struct vnode *vp, const struct timespec *acc,
 		return (error);
 	}
 	ip->i_flag &= ~(IN_MODIFIED | IN_ACCESSED);
-	if (DOINGSOFTDEP(vp))
+	if (DOINGSOFTDEP(vp)) {
+		if (ip->i_omode != 0) {
+			/*
+			 * XXX If the inode has been unlinked, wait
+			 * for the update (and so dependencies) to
+			 * flush.  Ensures that the slate is clean
+			 * when the inode is reused.
+			 */
+			waitfor |= UPDATE_WAIT;
+		}
 		softdep_update_inodeblock(ip, bp, waitfor);
-	else if (ip->i_ffs_effnlink != ip->i_nlink)
+	} else if (ip->i_ffs_effnlink != ip->i_nlink)
 		panic("ffs_update: bad link cnt");
 	if (fs->fs_magic == FS_UFS1_MAGIC) {
 		cp = (char *)bp->b_data +
@@ -231,7 +240,7 @@ ffs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred)
 			if (error)
 				return error;
 			if (ioflag & IO_SYNC) {
-				simple_lock(&ovp->v_interlock);
+				mutex_enter(&ovp->v_interlock);
 				VOP_PUTPAGES(ovp,
 				    trunc_page(osize & fs->fs_bmask),
 				    round_page(eob), PGO_CLEANIT | PGO_SYNCIO);
@@ -281,7 +290,7 @@ ffs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred)
 		    osize);
 		uvm_vnp_zerorange(ovp, length, eoz - length);
 		if (round_page(eoz) > round_page(length)) {
-			simple_lock(&ovp->v_interlock);
+			mutex_enter(&ovp->v_interlock);
 			error = VOP_PUTPAGES(ovp, round_page(length),
 			    round_page(eoz),
 			    PGO_CLEANIT | PGO_DEACTIVATE |
@@ -559,7 +568,7 @@ ffs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, daddr_t lastbn,
 	 */
 	vp = ITOV(ip);
 	bp = getblk(vp, lbn, (int)fs->fs_bsize, 0, 0);
-	if (bp->b_flags & (B_DONE | B_DELWRI)) {
+	if (bp->b_oflags & (BO_DONE | BO_DELWRI)) {
 		/* Braces must be here in case trace evaluates to nothing. */
 		trace(TR_BREADHIT, pack(vp, fs->fs_bsize), lbn);
 	} else {
diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c
index 8fbe6970e257..de6ab812a9ae 100644
--- a/sys/ufs/ffs/ffs_snapshot.c
+++ b/sys/ufs/ffs/ffs_snapshot.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ffs_snapshot.c,v 1.56 2007/12/08 19:29:54 pooka Exp $	*/
+/*	$NetBSD: ffs_snapshot.c,v 1.57 2008/01/02 11:49:09 ad Exp $	*/
 
 /*
  * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved.
@@ -38,7 +38,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ffs_snapshot.c,v 1.56 2007/12/08 19:29:54 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ffs_snapshot.c,v 1.57 2008/01/02 11:49:09 ad Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_ffs.h"
@@ -78,10 +78,10 @@ __KERNEL_RCSID(0, "$NetBSD: ffs_snapshot.c,v 1.56 2007/12/08 19:29:54 pooka Exp
 #define ufs2_daddr_t	int64_t
 #define ufs_lbn_t	daddr_t
 #define VI_MTX(v)	(&(v)->v_interlock)
-#define VI_LOCK(v)	simple_lock(&(v)->v_interlock)
-#define VI_UNLOCK(v)	simple_unlock(&(v)->v_interlock)
-#define MNT_ILOCK(v)	simple_lock(&mntvnode_slock)
-#define MNT_IUNLOCK(v)	simple_unlock(&mntvnode_slock)
+#define VI_LOCK(v)	mutex_enter(&(v)->v_interlock)
+#define VI_UNLOCK(v)	mutex_exit(&(v)->v_interlock)
+#define MNT_ILOCK(v)	mutex_enter(&mntvnode_lock)
+#define MNT_IUNLOCK(v)	mutex_exit(&mntvnode_lock)
 
 #if !defined(FFS_NO_SNAPSHOT)
 static int cgaccount(int, struct vnode *, void *, int);
@@ -204,7 +204,7 @@ ffs_snapshot(struct mount *mp, struct vnode *vp,
 #else /* defined(FFS_NO_SNAPSHOT) */
 	ufs2_daddr_t numblks, blkno, *blkp, snaplistsize = 0, *snapblklist;
 	int error, ns, cg, snaploc;
-	int i, s, size, len, loc;
+	int i, size, len, loc;
 	int flag = mp->mnt_flag;
 	struct timeval starttime;
 #ifdef DEBUG
@@ -220,7 +220,7 @@ ffs_snapshot(struct mount *mp, struct vnode *vp,
 	struct inode *ip, *xp;
 	struct buf *bp, *ibp, *nbp;
 	struct vattr vat;
-	struct vnode *xvp, *nvp, *devvp;
+	struct vnode *xvp, *mvp, *devvp;
 	struct snap_info *si;
 
 	ns = UFS_FSNEEDSWAP(fs);
@@ -426,29 +426,36 @@ ffs_snapshot(struct mount *mp, struct vnode *vp,
 	 */
 	snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) +
 	    FSMAXSNAP + 1 /* superblock */ + 1 /* last block */ + 1 /* size */;
+	/* Allocate a marker vnode */
+	if ((mvp = valloc(mp)) == NULL) {
+		error = ENOMEM;
+		goto out1;
+	}
 	MNT_ILOCK(mp);
-loop:
 	/*
 	 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
 	 * and vclean() can be called indirectly
 	 */
-	for (xvp = TAILQ_FIRST(&mp->mnt_vnodelist); xvp; xvp = nvp) {
+	for (xvp = TAILQ_FIRST(&mp->mnt_vnodelist); xvp; xvp = vunmark(mvp)) {
+		vmark(mvp, vp);
 		/*
 		 * Make sure this vnode wasn't reclaimed in getnewvnode().
 		 * Start over if it has (it won't be on the list anymore).
 		 */
-		if (xvp->v_mount != mp)
-			goto loop;
+		if (xvp->v_mount != mp || vismarker(xvp))
+			continue;
 		VI_LOCK(xvp);
-		nvp = TAILQ_NEXT(xvp, v_mntvnodes);
-		MNT_IUNLOCK(mp);
 		if ((xvp->v_iflag & VI_XLOCK) ||
 		    xvp->v_usecount == 0 || xvp->v_type == VNON ||
 		    (VTOI(xvp)->i_flags & SF_SNAPSHOT)) {
 			VI_UNLOCK(xvp);
-			MNT_ILOCK(mp);
 			continue;
 		}
+		MNT_IUNLOCK(mp);
+		/*
+		 * XXXAD should increase vnode ref count to prevent it
+		 * disappearing or being recycled.
+		 */
 		VI_UNLOCK(xvp);
 #ifdef DEBUG
 		if (snapdebug)
@@ -492,11 +499,13 @@ loop:
 			    xp->i_mode);
 		if (error) {
 			free(copy_fs->fs_csp, M_UFSMNT);
+			(void)vunmark(mvp);
 			goto out1;
 		}
 		MNT_ILOCK(mp);
 	}
 	MNT_IUNLOCK(mp);
+	vfree(mvp);
 	/*
 	 * If there already exist snapshots on this filesystem, grab a
 	 * reference to their shared lock. If this is the first snapshot
@@ -702,34 +711,27 @@ out:
 	 * Clean all dirty buffers now to avoid UBC inconsistencies.
 	 */
 	if (!error) {
-		simple_lock(&vp->v_interlock);
+		mutex_enter(&vp->v_interlock);
 		error = VOP_PUTPAGES(vp, 0, 0,
 		    PGO_ALLPAGES|PGO_CLEANIT|PGO_SYNCIO|PGO_FREE);
 	}
 	if (!error) {
-		s = splbio();
+		mutex_enter(&bufcache_lock);
 		for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 			nbp = LIST_NEXT(bp, b_vnbufs);
-			simple_lock(&bp->b_interlock);
-			splx(s);
-			if ((bp->b_flags & (B_DELWRI|B_BUSY)) != B_DELWRI)
-				panic("ffs_snapshot: not dirty or busy, bp %p",
-				    bp);
-			bp->b_flags |= B_BUSY|B_VFLUSH;
+			bp->b_cflags |= BC_BUSY|BC_VFLUSH;
 			if (LIST_FIRST(&bp->b_dep) == NULL)
-				bp->b_flags |= B_NOCACHE;
-			simple_unlock(&bp->b_interlock);
+				bp->b_cflags |= BC_NOCACHE;
+			mutex_exit(&bufcache_lock);
 			bwrite(bp);
-			s = splbio();
+			mutex_enter(&bufcache_lock);
 		}
-		simple_lock(&global_v_numoutput_slock);
-		while (vp->v_numoutput) {
-			vp->v_iflag |= VI_BWAIT;
-			ltsleep((void *)&vp->v_numoutput, PRIBIO+1,
-			    "snapflushbuf", 0, &global_v_numoutput_slock);
-		}
-		simple_unlock(&global_v_numoutput_slock);
-		splx(s);
+		mutex_exit(&bufcache_lock);
+
+		mutex_enter(&vp->v_interlock);
+		while (vp->v_numoutput > 0)
+			cv_wait(&vp->v_cv, &vp->v_interlock);
+		mutex_exit(&vp->v_interlock);
 	}
 	if (sbbuf)
 		free(sbbuf, M_UFSMNT);
@@ -958,7 +960,7 @@ indiracct_ufs1(struct vnode *snapvp, struct vnode *cancelvp, int level,
 	 */
 	bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0);
 	bp->b_blkno = fsbtodb(fs, blkno);
-	if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 &&
+	if ((bp->b_oflags & (BO_DONE | BO_DELWRI)) == 0 &&
 	    (error = readfsblk(bp->b_vp, bp->b_data, fragstoblks(fs, blkno)))) {
 		brelse(bp, 0);
 		return (error);
@@ -1226,7 +1228,7 @@ indiracct_ufs2(struct vnode *snapvp, struct vnode *cancelvp, int level,
 	 */
 	bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0);
 	bp->b_blkno = fsbtodb(fs, blkno);
-	if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 &&
+	if ((bp->b_oflags & (BO_DONE | BO_DELWRI)) == 0 &&
 	    (error = readfsblk(bp->b_vp, bp->b_data, fragstoblks(fs, blkno)))) {
 		brelse(bp, 0);
 		return (error);
@@ -2063,7 +2065,7 @@ readfsblk(struct vnode *vp, void *data, ufs2_daddr_t lbn)
 	struct fs *fs = ip->i_fs;
 	struct buf *nbp;
 
-	nbp = getiobuf();
+	nbp = getiobuf(NULL, true);
 	nbp->b_flags = B_READ;
 	nbp->b_bcount = nbp->b_bufsize = fs->fs_bsize;
 	nbp->b_error = 0;
@@ -2071,7 +2073,6 @@ readfsblk(struct vnode *vp, void *data, ufs2_daddr_t lbn)
 	nbp->b_blkno = nbp->b_rawblkno = fsbtodb(fs, blkstofrags(fs, lbn));
 	nbp->b_proc = NULL;
 	nbp->b_dev = ip->i_devvp->v_rdev;
-	nbp->b_vp = NULLVP;
 
 	bdev_strategy(nbp);
 
@@ -2101,7 +2102,7 @@ readvnblk(struct vnode *vp, void *data, ufs2_daddr_t lbn)
 
 	if (bn != (daddr_t)-1) {
 		offset = dbtob(bn);
-		simple_lock(&vp->v_interlock);
+		mutex_enter(&vp->v_interlock);
 		error = VOP_PUTPAGES(vp, trunc_page(offset),
 		    round_page(offset+fs->fs_bsize),
 		    PGO_CLEANIT|PGO_SYNCIO|PGO_FREE);
@@ -2131,7 +2132,7 @@ writevnblk(struct vnode *vp, void *data, ufs2_daddr_t lbn)
 
 	offset = lblktosize(fs, (off_t)lbn);
 	s = cow_enter();
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	error = VOP_PUTPAGES(vp, trunc_page(offset),
 	    round_page(offset+fs->fs_bsize), PGO_CLEANIT|PGO_SYNCIO|PGO_FREE);
 	if (error == 0)
@@ -2142,7 +2143,10 @@ writevnblk(struct vnode *vp, void *data, ufs2_daddr_t lbn)
 		return error;
 
 	bcopy(data, bp->b_data, fs->fs_bsize);
-	bp->b_flags |= B_NOCACHE;
+	mutex_enter(&bufcache_lock);
+	/* XXX Shouldn't need to lock for this, NOCACHE is only read later. */
+	bp->b_cflags |= BC_NOCACHE;
+	mutex_exit(&bufcache_lock);
 
 	return bwrite(bp);
 }
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 713fff1a5d8d..3dfdc44e45aa 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ffs_softdep.c,v 1.102 2007/12/08 19:29:55 pooka Exp $	*/
+/*	$NetBSD: ffs_softdep.c,v 1.103 2008/01/02 11:49:09 ad Exp $	*/
 
 /*
  * Copyright 1998 Marshall Kirk McKusick. All Rights Reserved.
@@ -33,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ffs_softdep.c,v 1.102 2007/12/08 19:29:55 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ffs_softdep.c,v 1.103 2008/01/02 11:49:09 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/buf.h>
@@ -62,11 +62,8 @@ __KERNEL_RCSID(0, "$NetBSD: ffs_softdep.c,v 1.102 2007/12/08 19:29:55 pooka Exp
 
 #include <uvm/uvm.h>
 
-static struct pool sdpcpool;
 u_int softdep_lockedbufs;
 
-extern struct simplelock bqueue_slock; /* XXX */
-
 MALLOC_JUSTDEFINE(M_PAGEDEP, "pagedep", "file page dependencies");
 MALLOC_JUSTDEFINE(M_INODEDEP, "inodedep", "Inode depependencies");
 MALLOC_JUSTDEFINE(M_NEWBLK, "newblk", "New block allocation");
@@ -131,13 +128,14 @@ LIST_HEAD(, buf) pcbphashhead[PCBPHASHSIZE];
  * Internal function prototypes.
  */
 static	void softdep_error(const char *, int);
-static	void drain_output(struct vnode *, int);
+static	void drain_output(struct vnode *);
 static	int getdirtybuf(struct buf **, int);
 static	void clear_remove(struct lwp *);
 static	void clear_inodedeps(struct lwp *);
 static	int flush_pagedep_deps(struct vnode *, struct mount *,
 	    struct diraddhd *);
 static	int flush_inodedep_deps(struct fs *, ino_t);
+static int flush_deplist(struct allocdirectlst *, int, int *);
 static	int handle_written_filepage(struct pagedep *, struct buf *);
 static  void diradd_inode_written(struct diradd *, struct inodedep *);
 static	int handle_written_inodeblock(struct inodedep *, struct buf *);
@@ -180,7 +178,7 @@ static	int inodedep_lookup(struct fs *, ino_t, int, struct inodedep **);
 static	int pagedep_lookup(struct inode *, daddr_t, int,
 	    struct pagedep **);
 static	void pause_timer(void *);
-static	int request_cleanup(int, int);
+static	int request_cleanup(int);
 static	void add_to_worklist(struct worklist *);
 static	struct buf *softdep_setup_pagecache(struct inode *, daddr_t,
 						 long);
@@ -188,9 +186,7 @@ static	void softdep_collect_pagecache(struct inode *);
 static	void softdep_free_pagecache(struct inode *);
 static	struct vnode *softdep_lookupvp(struct fs *, ino_t);
 static	struct buf *softdep_lookup_pcbp(struct vnode *, daddr_t);
-#ifdef UVMHIST
 void softdep_pageiodone1(struct buf *);
-#endif
 void softdep_pageiodone(struct buf *);
 void softdep_flush_vnode(struct vnode *, daddr_t);
 static void softdep_trackbufs(int, bool);
@@ -220,149 +216,46 @@ static struct bio_ops bioops_softdep = {
 	softdep_pageiodone,			/* io_pageiodone */
 };
 
-/*
- * Locking primitives.
- *
- * For a uniprocessor, all we need to do is protect against disk
- * interrupts. For a multiprocessor, this lock would have to be
- * a mutex. A single mutex is used throughout this file, though
- * finer grain locking could be used if contention warranted it.
- *
- * For a multiprocessor, the sleep call would accept a lock and
- * release it after the sleep processing was complete. In a uniprocessor
- * implementation there is no such interlock, so we simple mark
- * the places where it needs to be done with the `interlocked' form
- * of the lock calls. Since the uniprocessor sleep already interlocks
- * the spl, there is nothing that really needs to be done.
- */
-#ifndef /* NOT */ DEBUG
-static struct lockit {
-	int	lkt_spl;
-} lk = { 0 };
-#define ACQUIRE_LOCK(lk)		(lk)->lkt_spl = splbio()
-#define FREE_LOCK(lk)			splx((lk)->lkt_spl)
-#define ACQUIRE_LOCK_INTERLOCKED(lk, s)	(lk)->lkt_spl = s
-#define FREE_LOCK_INTERLOCKED(lk)	(lk)->lkt_spl
-
-#else /* DEBUG */
-static struct lockit {
-	int	lkt_spl;
-	lwp_t	*lkt_held;
-} lk = { 0, NULL };
-static int lockcnt;
-
-static	void acquire_lock(struct lockit *);
-static	void free_lock(struct lockit *);
-static	void acquire_lock_interlocked(struct lockit *, int);
-static	int  free_lock_interlocked(struct lockit *);
-
-#define ACQUIRE_LOCK(lk)		acquire_lock(lk)
-#define FREE_LOCK(lk)			free_lock(lk)
-#define ACQUIRE_LOCK_INTERLOCKED(lk, s)	acquire_lock_interlocked(lk, s)
-#define FREE_LOCK_INTERLOCKED(lk)	free_lock_interlocked(lk)
-
-static void
-acquire_lock(lkp)
-	struct lockit *lkp;
-{
-	if (lkp->lkt_held != NULL) {
-		if (lkp->lkt_held == curlwp)
-			panic("softdep_lock: locking against myself");
-		else
-			panic("softdep_lock: lock held by %p", lkp->lkt_held);
-	}
-	lkp->lkt_spl = splbio();
-	lkp->lkt_held = curlwp;
-	lockcnt++;
-}
-
-static void
-free_lock(lkp)
-	struct lockit *lkp;
-{
-
-	if (lkp->lkt_held == NULL)
-		panic("softdep_unlock: lock not held");
-	lkp->lkt_held = NULL;
-	splx(lkp->lkt_spl);
-}
-
-static void
-acquire_lock_interlocked(lkp, s)
-	struct lockit *lkp;
-	int s;
-{
-	if (lkp->lkt_held != NULL) {
-		if (lkp->lkt_held == curlwp)
-			panic("softdep_lock_interlocked: locking against self");
-		else
-			panic("softdep_lock_interlocked: lock held by %p",
-			    lkp->lkt_held);
-	}
-	lkp->lkt_spl = s;
-	lkp->lkt_held = curlwp;
-	lockcnt++;
-}
-
-static int
-free_lock_interlocked(lkp)
-	struct lockit *lkp;
-{
-	if (lkp->lkt_held == NULL)
-		panic("softdep_unlock_interlocked: lock not held");
-	lkp->lkt_held = NULL;
-	return lkp->lkt_spl;
-}
-#endif /* DEBUG */
+static kcondvar_t softdep_tb_cv;
 
 /*
  * Place holder for real semaphores.
  */
 struct sema {
-	lwp_t	*holder;
-	const char *name;
+	kcondvar_t cv;
 	int	value;
-	int	prio;
+	struct lwp *holder;
 	int	timo;
 };
-static	void sema_init(struct sema *, const char *, int, int);
-static	int sema_get(struct sema *, struct lockit *);
+static	void sema_init(struct sema *, const char *, int);
+static	int sema_get(struct sema *, kmutex_t *);
 static	void sema_release(struct sema *);
 
 static void
-sema_init(semap, name, prio, timo)
+sema_init(semap, name, timo)
 	struct sema *semap;
 	const char *name;
-	int prio, timo;
+	int timo;
 {
 
 	semap->holder = NULL;
 	semap->value = 0;
-	semap->name = name;
-	semap->prio = prio;
 	semap->timo = timo;
+	cv_init(&semap->cv, name);
 }
 
 static int
 sema_get(semap, interlock)
 	struct sema *semap;
-	struct lockit *interlock;
+	kmutex_t *interlock;
 {
-	int s = 0;	/* Quell an uninitialized variable warning */
 
 	if (semap->value++ > 0) {
-		if (interlock != NULL)
-			s = FREE_LOCK_INTERLOCKED(interlock);
-		tsleep((void *)semap, semap->prio, semap->name, semap->timo);
-		if (interlock != NULL) {
-			ACQUIRE_LOCK_INTERLOCKED(interlock, s);
-			FREE_LOCK(interlock);
-		}
+		cv_wait(&semap->cv, interlock);
 		return (0);
 	}
 	semap->holder = curlwp;
-	if (interlock != NULL)
-		FREE_LOCK(interlock);
+	mutex_exit(interlock);
 	return (1);
 }
 
@@ -375,7 +268,7 @@ sema_release(semap)
 		panic("sema_release: not held");
 	if (--semap->value > 0) {
 		semap->value = 0;
-		wakeup(semap);
+		cv_broadcast(&semap->cv);
 	}
 	semap->holder = NULL;
 }
@@ -384,118 +277,100 @@ sema_release(semap)
  * Memory management.
  */
 
-static struct pool pagedep_pool;
-static struct pool inodedep_pool;
-static struct pool newblk_pool;
-static struct pool bmsafemap_pool;
-static struct pool allocdirect_pool;
-static struct pool indirdep_pool;
-static struct pool allocindir_pool;
-static struct pool freefrag_pool;
-static struct pool freeblks_pool;
-static struct pool freefile_pool;
-static struct pool diradd_pool;
-static struct pool mkdir_pool;
-static struct pool dirrem_pool;
-static struct pool newdirblk_pool;
+static pool_cache_t softdep_small_cache;
+static pool_cache_t softdep_medium_cache;
+static pool_cache_t softdep_large_cache;
 
-static inline void
+static inline void *
+softdep_alloc(const int type)
+{
+
+	switch (type) {
+	case D_BMSAFEMAP:
+	case D_INDIRDEP:
+	case D_FREEFRAG:
+	case D_FREEFILE:
+	case D_DIRADD:
+	case D_MKDIR:
+	case D_DIRREM:
+	case D_NEWDIRBLK:
+		return pool_cache_get(softdep_small_cache, PR_WAITOK);
+
+	case D_ALLOCDIRECT:
+	case D_ALLOCINDIR:
+	case D_PAGEDEP:
+	case D_INODEDEP:
+		return pool_cache_get(softdep_medium_cache, PR_WAITOK);
+
+	case D_FREEBLKS:
+		return pool_cache_get(softdep_large_cache, PR_WAITOK);
+
+	default:
+		panic("softdep_alloc");
+	}
+}
+
+static void
 softdep_free(struct worklist *item, int type)
 {
 	switch (type) {
-
-	case D_PAGEDEP:
-		pool_put(&pagedep_pool, item);
-		return;
-
-	case D_INODEDEP:
-		pool_put(&inodedep_pool, item);
-		return;
-
 	case D_BMSAFEMAP:
-		pool_put(&bmsafemap_pool, item);
+	case D_INDIRDEP:
+	case D_FREEFRAG:
+	case D_FREEFILE:
+	case D_DIRADD:
+	case D_MKDIR:
+	case D_DIRREM:
+	case D_NEWDIRBLK:
+		pool_cache_put(softdep_small_cache, item);
 		return;
 
 	case D_ALLOCDIRECT:
-		pool_put(&allocdirect_pool, item);
-		return;
-
-	case D_INDIRDEP:
-		pool_put(&indirdep_pool, item);
-		return;
-
 	case D_ALLOCINDIR:
-		pool_put(&allocindir_pool, item);
+	case D_PAGEDEP:
+	case D_INODEDEP:
+		pool_cache_put(softdep_medium_cache, item);
 		return;
 
-	case D_FREEFRAG:
-		pool_put(&freefrag_pool, item);
-		return;
 
 	case D_FREEBLKS:
-		pool_put(&freeblks_pool, item);
+		pool_cache_put(softdep_large_cache, item);
 		return;
-
-	case D_FREEFILE:
-		pool_put(&freefile_pool, item);
-		return;
-
-	case D_DIRADD:
-		pool_put(&diradd_pool, item);
-		return;
-
-	case D_MKDIR:
-		pool_put(&mkdir_pool, item);
-		return;
-
-	case D_DIRREM:
-		pool_put(&dirrem_pool, item);
-		return;
-
-	case D_NEWDIRBLK:
-		pool_put(&newdirblk_pool, item);
-		return;
-
 	}
 	panic("softdep_free: unknown type %d", type);
 }
 
+static kmutex_t freequeue_lock;
 struct workhead softdep_freequeue;
 
-static inline void
-softdep_freequeue_add(struct worklist *item)
-{
-	int s;
-
-	s = splbio();
-	LIST_INSERT_HEAD(&softdep_freequeue, item, wk_list);
-	splx(s);
-}
-
-static inline void
+static void
 softdep_freequeue_process(void)
 {
 	struct worklist *wk;
 
-	while ((wk = LIST_FIRST(&softdep_freequeue)) != NULL) {
+	while (!LIST_EMPTY(&softdep_freequeue)) {
+		mutex_enter(&freequeue_lock);
+		if ((wk = LIST_FIRST(&softdep_freequeue)) == NULL) {
+			mutex_exit(&freequeue_lock);
+			return;
+		}
 		LIST_REMOVE(wk, wk_list);
-		FREE_LOCK(&lk);
+		mutex_exit(&freequeue_lock);
 		softdep_free(wk, wk->wk_type);
-		ACQUIRE_LOCK(&lk);
 	}
 }
 
 static char emerginoblk[MAXBSIZE];
 static int emerginoblk_inuse;
 static const struct buf *emerginoblk_origbp;
-static struct simplelock emerginoblk_slock = SIMPLELOCK_INITIALIZER;
+static kmutex_t emerginoblk_lock;
+static kcondvar_t emerginoblk_cv;
 
-static inline void *
+static void *
 inodedep_allocdino(struct inodedep *inodedep, const struct buf *origbp,
     size_t size)
 {
 	void *vp;
-	int s;
 
 	KASSERT(inodedep->id_savedino1 == NULL);
 
@@ -506,17 +381,14 @@ inodedep_allocdino(struct inodedep *inodedep, const struct buf *origbp,
 	if (vp)
 		return vp;
 
-	s = splbio();
-	simple_lock(&emerginoblk_slock);
+	mutex_enter(&emerginoblk_lock);
 	while (emerginoblk_inuse && emerginoblk_origbp != origbp)
-		ltsleep(&emerginoblk_inuse, PVM, "emdino", 0,
-		    &emerginoblk_slock);
+		cv_wait(&emerginoblk_cv, &emerginoblk_lock);
 	emerginoblk_origbp = origbp;
 	emerginoblk_inuse++;
 	KASSERT(emerginoblk_inuse <= sizeof(emerginoblk) /
 	    MIN(sizeof(struct ufs1_dinode), sizeof(struct ufs2_dinode)));
-	simple_unlock(&emerginoblk_slock);
-	splx(s);
+	mutex_exit(&emerginoblk_lock);
 
 	KASSERT(inodedep->id_savedino1 == NULL);
 
@@ -528,7 +400,7 @@ inodedep_allocdino(struct inodedep *inodedep, const struct buf *origbp,
 	return vp;
 }
 
-static inline void
+static void
 inodedep_freedino(struct inodedep *inodedep)
 {
 	void *vp = inodedep->id_savedino1;
@@ -537,17 +409,12 @@ inodedep_freedino(struct inodedep *inodedep)
 	KASSERT(vp != NULL);
 	if (__predict_false((void *)&emerginoblk[0] <= vp &&
 	    vp < (void *)&emerginoblk[MAXBSIZE])) {
-		int s;
-
 		KASSERT(emerginoblk_inuse > 0);
-		s = splbio();
-		simple_lock(&emerginoblk_slock);
+		mutex_enter(&emerginoblk_lock);
 		emerginoblk_inuse--;
 		if (emerginoblk_inuse == 0)
-			wakeup(&emerginoblk_inuse);
-		simple_unlock(&emerginoblk_slock);
-		splx(s);
-
+			cv_broadcast(&emerginoblk_cv);
+		mutex_exit(&emerginoblk_lock);
 		return;
 	}
 
@@ -556,39 +423,31 @@ inodedep_freedino(struct inodedep *inodedep)
 
 /*
  * Worklist queue management.
- * These routines require that the lock be held.
  */
-#ifndef /* NOT */ DEBUG
-#define WORKLIST_INSERT(head, item) do {	\
-	(item)->wk_state |= ONWORKLIST;		\
-	LIST_INSERT_HEAD(head, item, wk_list);	\
-} while (0)
-#define WORKLIST_REMOVE(item) do {		\
-	(item)->wk_state &= ~ONWORKLIST;	\
-	LIST_REMOVE(item, wk_list);		\
-} while (0)
-#define WORKITEM_FREE(item, type)		\
-	softdep_freequeue_add((struct worklist *)item)
-
-#else /* DEBUG */
-static	void worklist_insert(struct workhead *, struct worklist *);
-static	void worklist_remove(struct worklist *);
-static	void workitem_free(struct worklist *, int);
-
-#define WORKLIST_INSERT(head, item) worklist_insert(head, item)
-#define WORKLIST_REMOVE(item) worklist_remove(item)
-#define WORKITEM_FREE(item, type) workitem_free((struct worklist *)item, type)
+static void	worklist_insert(struct workhead *, struct worklist *);
+static void	worklist_remove(struct worklist *);
+static void	workitem_free(void *, int);
 
 static void
 worklist_insert(head, item)
 	struct workhead *head;
 	struct worklist *item;
 {
+#ifdef DIAGNOSTIC
+	struct worklist *test;
+
+	if (item->wk_type == D_FREEFILE) {
+		LIST_FOREACH(test, head, wk_list) {
+			if (test->wk_type == D_FREEFILE) {
+				panic("worklist_insert: freefile");
+			}
+		}
+	}
+#endif
+
+	KASSERT(mutex_owned(&bufcache_lock));
+	KASSERT((item->wk_state & ONWORKLIST) == 0);
 
-	if (lk.lkt_held == NULL)
-		panic("worklist_insert: lock not held");
-	if (item->wk_state & ONWORKLIST)
-		panic("worklist_insert: already on list");
 	item->wk_state |= ONWORKLIST;
 	LIST_INSERT_HEAD(head, item, wk_list);
 }
@@ -598,23 +457,24 @@ worklist_remove(item)
 	struct worklist *item;
 {
 
-	if (lk.lkt_held == NULL)
-		panic("worklist_remove: lock not held");
-	if ((item->wk_state & ONWORKLIST) == 0)
-		panic("worklist_remove: not on list");
+	KASSERT(mutex_owned(&bufcache_lock));
+	KASSERT((item->wk_state & ONWORKLIST) != 0);
+
 	item->wk_state &= ~ONWORKLIST;
 	LIST_REMOVE(item, wk_list);
 }
 
 static void
-workitem_free(struct worklist *item, int type)
+workitem_free(void *object, int type)
 {
+	struct worklist *item = object;
 
-	if (item->wk_state & ONWORKLIST)
-		panic("workitem_free: still on list");
-	softdep_freequeue_add(item);
+	KASSERT((item->wk_state & ONWORKLIST) == 0);
+
+	mutex_enter(&freequeue_lock);
+	LIST_INSERT_HEAD(&softdep_freequeue, item, wk_list);
+	mutex_exit(&freequeue_lock);
 }
-#endif /* DEBUG */
 
 /*
  * Workitem queue management
@@ -623,9 +483,11 @@ static struct workhead softdep_workitem_pending;
 static struct worklist *worklist_tail;
 static int softdep_worklist_busy; /* 1 => trying to do unmount */
 static int softdep_worklist_req; /* serialized waiters */
+static kcondvar_t softdep_worklist_cv;
 static int max_softdeps;	/* maximum number of structs before slowdown */
 static int tickdelay = 2;	/* number of ticks to pause during slowdown */
 static int proc_waiting;	/* tracks whether we have a timeout posted */
+static kcondvar_t proc_wait_cv;
 static callout_t pause_timer_ch;
 static lwp_t *filesys_syncer;	/* filesystem syncer thread */
 static int req_clear_inodedeps;	/* syncer process flush some inodedeps */
@@ -703,9 +565,8 @@ softdep_process_worklist(matchmnt)
 	 * First process any items on the delayed-free queue.
 	 */
 
-	ACQUIRE_LOCK(&lk);
+	mutex_enter(&bufcache_lock);
 	softdep_freequeue_process();
-	FREE_LOCK(&lk);
 
 	/*
 	 * Record the process identifier of our caller so that we can give
@@ -713,6 +574,7 @@ softdep_process_worklist(matchmnt)
 	 */
 	filesys_syncer = l;
 	matchcnt = 0;
+
 	/*
 	 * There is no danger of having multiple processes run this
 	 * code. It is single threaded solely so that softdep_flushfiles
@@ -720,8 +582,10 @@ softdep_process_worklist(matchmnt)
 	 * related to its mount point that are in the list.
 	 */
 	if (matchmnt == NULL) {
-		if (softdep_worklist_busy < 0)
+		if (softdep_worklist_busy < 0) {
+			mutex_exit(&bufcache_lock);
 			return (-1);
+		}
 		softdep_worklist_busy += 1;
 	}
 
@@ -731,14 +595,13 @@ softdep_process_worklist(matchmnt)
 	if (req_clear_inodedeps) {
 		clear_inodedeps(l);
 		req_clear_inodedeps = 0;
-		wakeup(&proc_waiting);
+		cv_broadcast(&proc_wait_cv);
 	}
 	if (req_clear_remove) {
 		clear_remove(l);
 		req_clear_remove = 0;
-		wakeup(&proc_waiting);
+		cv_broadcast(&proc_wait_cv);
 	}
-	ACQUIRE_LOCK(&lk);
 	while ((wk = LIST_FIRST(&softdep_workitem_pending)) != 0) {
 		/*
 		 * Remove the item to be processed. If we are removing the last
@@ -747,14 +610,14 @@ softdep_process_worklist(matchmnt)
 		 * we just run down the list to find it rather than tracking it
 		 * in the above loop.
 		 */
-		WORKLIST_REMOVE(wk);
+		worklist_remove(wk);
 		if (wk == worklist_tail) {
 			LIST_FOREACH(wkend, &softdep_workitem_pending, wk_list)
 				if (LIST_NEXT(wkend, wk_list) == NULL)
 					break;
 			worklist_tail = wkend;
 		}
-		FREE_LOCK(&lk);
+		mutex_exit(&bufcache_lock);
 		switch (wk->wk_type) {
 
 		case D_DIRREM:
@@ -794,13 +657,13 @@ softdep_process_worklist(matchmnt)
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
+		mutex_enter(&bufcache_lock);
 
 		/*
 		 * If a umount operation wants to run the worklist
 		 * accurately, abort.
 		 */
 		if (softdep_worklist_req && matchmnt == NULL) {
-			ACQUIRE_LOCK(&lk);
 			matchcnt = -1;
 			break;
 		}
@@ -810,27 +673,26 @@ softdep_process_worklist(matchmnt)
 		if (req_clear_inodedeps) {
 			clear_inodedeps(l);
 			req_clear_inodedeps = 0;
-			wakeup(&proc_waiting);
+			cv_broadcast(&proc_wait_cv);
 		}
 		if (req_clear_remove) {
 			clear_remove(l);
 			req_clear_remove = 0;
-			wakeup(&proc_waiting);
+			cv_broadcast(&proc_wait_cv);
 		}
 
 		/*
 		 * Process any new items on the delayed-free queue.
 		 */
 
-		ACQUIRE_LOCK(&lk);
 		softdep_freequeue_process();
 	}
 	if (matchmnt == NULL) {
 		softdep_worklist_busy -= 1;
 		if (softdep_worklist_req && softdep_worklist_busy == 0)
-			wakeup(&softdep_worklist_req);
+			cv_broadcast(&softdep_worklist_cv);
 	}
-	FREE_LOCK(&lk);
+	mutex_exit(&bufcache_lock);
 	return (matchcnt);
 }
 
@@ -844,10 +706,10 @@ softdep_move_dependencies(oldbp, newbp)
 {
 	struct worklist *wk, *wktail;
 
+	mutex_enter(&bufcache_lock);
 	if (LIST_FIRST(&newbp->b_dep) != NULL)
 		panic("softdep_move_dependencies: need merge code");
 	wktail = 0;
-	ACQUIRE_LOCK(&lk);
 	while ((wk = LIST_FIRST(&oldbp->b_dep)) != NULL) {
 		LIST_REMOVE(wk, wk_list);
 		if (wktail == 0)
@@ -856,7 +718,7 @@ softdep_move_dependencies(oldbp, newbp)
 			LIST_INSERT_AFTER(wktail, wk, wk_list);
 		wktail = wk;
 	}
-	FREE_LOCK(&lk);
+	mutex_exit(&bufcache_lock);
 }
 
 /*
@@ -877,12 +739,15 @@ softdep_flushworklist(oldmnt, countp, l)
 	/*
 	 * Await our turn to clear out the queue.
 	 */
+	mutex_enter(&bufcache_lock);
 	while (softdep_worklist_busy) {
 		softdep_worklist_req += 1;
-		tsleep(&softdep_worklist_req, PRIBIO, "softflush", 0);
+		cv_wait(&softdep_worklist_cv, &bufcache_lock);
 		softdep_worklist_req -= 1;
 	}
 	softdep_worklist_busy = -1;
+	mutex_exit(&bufcache_lock);
+
 	/*
 	 * Alternately flush the block device associated with the mount
 	 * point and process any dependencies that the flushing
@@ -899,9 +764,13 @@ softdep_flushworklist(oldmnt, countp, l)
 		if (error)
 			break;
 	}
+
+	mutex_enter(&bufcache_lock);
 	softdep_worklist_busy = 0;
 	if (softdep_worklist_req)
-		wakeup(&softdep_worklist_req);
+		cv_broadcast(&softdep_worklist_cv);
+	mutex_exit(&bufcache_lock);
+
 	return (error);
 }
 
@@ -985,7 +854,6 @@ static struct sema pagedep_in_progress;
  * when asked to allocate but not associated with any buffer.
  * If not found, allocate if DEPALLOC flag is passed.
  * Found or allocated entry is returned in pagedeppp.
- * This routine must be called with splbio interrupts blocked.
  */
 static int
 pagedep_lookup(ip, lbn, flags, pagedeppp)
@@ -999,10 +867,8 @@ pagedep_lookup(ip, lbn, flags, pagedeppp)
 	struct mount *mp;
 	int i;
 
-#ifdef DEBUG
-	if (lk.lkt_held == NULL)
-		panic("pagedep_lookup: lock not held");
-#endif
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	mp = ITOV(ip)->v_mount;
 	pagedephd = &pagedep_hashtbl[PAGEDEP_HASH(mp, ip->i_number, lbn)];
 top:
@@ -1023,11 +889,9 @@ top:
 		*pagedeppp = NULL;
 		return (0);
 	}
-	if (sema_get(&pagedep_in_progress, &lk) == 0) {
-		ACQUIRE_LOCK(&lk);
+	if (sema_get(&pagedep_in_progress, &bufcache_lock) == 0)
 		goto top;
-	}
-	pagedep = pool_get(&pagedep_pool, PR_WAITOK);
+	pagedep = pool_cache_get(softdep_medium_cache, PR_WAITOK);
 	bzero(pagedep, sizeof(struct pagedep));
 	pagedep->pd_list.wk_type = D_PAGEDEP;
 	pagedep->pd_mnt = mp;
@@ -1037,7 +901,7 @@ top:
 	LIST_INIT(&pagedep->pd_pendinghd);
 	for (i = 0; i < DAHASHSZ; i++)
 		LIST_INIT(&pagedep->pd_diraddhd[i]);
-	ACQUIRE_LOCK(&lk);
+	mutex_enter(&bufcache_lock);
 	LIST_INSERT_HEAD(pagedephd, pagedep, pd_hash);
 	sema_release(&pagedep_in_progress);
 	*pagedeppp = pagedep;
@@ -1058,7 +922,6 @@ static struct sema inodedep_in_progress;
  * Look up a inodedep. Return 1 if found, 0 if not found.
  * If not found, allocate if DEPALLOC flag is passed.
  * Found or allocated entry is returned in inodedeppp.
- * This routine must be called with splbio interrupts blocked.
  */
 static int
 inodedep_lookup(fs, inum, flags, inodedeppp)
@@ -1071,10 +934,8 @@ inodedep_lookup(fs, inum, flags, inodedeppp)
 	struct inodedep_hashhead *inodedephd;
 	int firsttry;
 
-#ifdef DEBUG
-	if (lk.lkt_held == NULL)
-		panic("inodedep_lookup: lock not held");
-#endif
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	firsttry = 1;
 	inodedephd = &inodedep_hashtbl[INODEDEP_HASH(fs, inum)];
 top:
@@ -1094,16 +955,14 @@ top:
 	 * If we are over our limit, try to improve the situation.
 	 */
 	if (num_inodedep > max_softdeps && firsttry && speedup_syncer() == 0 &&
-	    request_cleanup(FLUSH_INODES, 1)) {
+	    request_cleanup(FLUSH_INODES)) {
 		firsttry = 0;
 		goto top;
 	}
-	if (sema_get(&inodedep_in_progress, &lk) == 0) {
-		ACQUIRE_LOCK(&lk);
+	if (sema_get(&inodedep_in_progress, &bufcache_lock) == 0)
 		goto top;
-	}
 	num_inodedep += 1;
-	inodedep = pool_get(&inodedep_pool, PR_WAITOK);
+	inodedep = softdep_alloc(D_INODEDEP);
 	inodedep->id_list.wk_type = D_INODEDEP;
 	inodedep->id_fs = fs;
 	inodedep->id_ino = inum;
@@ -1117,7 +976,7 @@ top:
 	LIST_INIT(&inodedep->id_bufwait);
 	TAILQ_INIT(&inodedep->id_inoupdt);
 	TAILQ_INIT(&inodedep->id_newinoupdt);
-	ACQUIRE_LOCK(&lk);
+	mutex_enter(&bufcache_lock);
 	LIST_INSERT_HEAD(inodedephd, inodedep, id_hash);
 	sema_release(&inodedep_in_progress);
 	*inodedeppp = inodedep;
@@ -1149,6 +1008,8 @@ newblk_lookup(fs, newblkno, flags, newblkpp)
 	struct newblk *newblk;
 	struct newblk_hashhead *newblkhd;
 
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	newblkhd = NEWBLK_HASH(fs, newblkno);
 top:
 	for (newblk = LIST_FIRST(newblkhd); newblk;
@@ -1163,12 +1024,13 @@ top:
 		*newblkpp = NULL;
 		return (0);
 	}
-	if (sema_get(&newblk_in_progress, 0) == 0)
+	if (sema_get(&newblk_in_progress, &bufcache_lock) == 0)
 		goto top;
-	newblk = pool_get(&newblk_pool, PR_WAITOK);
+	newblk = pool_cache_get(softdep_small_cache, PR_WAITOK);
 	newblk->nb_state = 0;
 	newblk->nb_fs = fs;
 	newblk->nb_newblkno = newblkno;
+	mutex_enter(&bufcache_lock);
 	LIST_INSERT_HEAD(newblkhd, newblk, nb_hash);
 	sema_release(&newblk_in_progress);
 	*newblkpp = newblk;
@@ -1184,56 +1046,71 @@ softdep_initialize()
 {
 	int i;
 
+ 	mutex_init(&emerginoblk_lock, MUTEX_DEFAULT, IPL_NONE);
+	mutex_init(&freequeue_lock, MUTEX_DEFAULT, IPL_NONE);
+	cv_init(&softdep_tb_cv, "softdbuf");
+	cv_init(&proc_wait_cv, "softdep");
+	cv_init(&emerginoblk_cv, "emdino");
+	cv_init(&softdep_worklist_cv, "softflsh");
+	callout_init(&pause_timer_ch, CALLOUT_MPSAFE);
+
 	bioopsp = &bioops_softdep;
 
 	malloc_type_attach(M_PAGEDEP);
 	malloc_type_attach(M_INODEDEP);
 	malloc_type_attach(M_NEWBLK);
-	callout_init(&pause_timer_ch, CALLOUT_MPSAFE);
 
-	pool_init(&sdpcpool, sizeof(struct buf), 0, 0, 0, "sdpcpool",
-	    &pool_allocator_nointr, IPL_NONE);
-	pool_init(&pagedep_pool, sizeof(struct pagedep), 0, 0, 0, "pagedeppl",
-	    &pool_allocator_nointr, IPL_NONE);
-	pool_init(&inodedep_pool, sizeof(struct inodedep), 0, 0, 0,"inodedeppl",
-	    &pool_allocator_nointr, IPL_NONE);
-	pool_init(&newblk_pool, sizeof(struct newblk), 0, 0, 0, "newblkpl",
-	    &pool_allocator_nointr, IPL_NONE);
-	pool_init(&bmsafemap_pool, sizeof(struct bmsafemap), 0, 0, 0,
-	    "bmsafemappl", &pool_allocator_nointr, IPL_NONE);
-	pool_init(&allocdirect_pool, sizeof(struct allocdirect), 0, 0, 0,
-	    "allocdirectpl", &pool_allocator_nointr, IPL_NONE);
-	pool_init(&indirdep_pool, sizeof(struct indirdep), 0, 0, 0,"indirdeppl",
-	    &pool_allocator_nointr, IPL_NONE);
-	pool_init(&allocindir_pool, sizeof(struct allocindir), 0, 0, 0,
-	    "allocindirpl", &pool_allocator_nointr, IPL_NONE);
-	pool_init(&freefrag_pool, sizeof(struct freefrag), 0, 0, 0,
-	    "freefragpl", &pool_allocator_nointr, IPL_NONE);
-	pool_init(&freeblks_pool, sizeof(struct freeblks), 0, 0, 0,
-	    "freeblkspl", &pool_allocator_nointr, IPL_NONE);
-	pool_init(&freefile_pool, sizeof(struct freefile), 0, 0, 0,
-	    "freefilepl", &pool_allocator_nointr, IPL_NONE);
-	pool_init(&diradd_pool, sizeof(struct diradd), 0, 0, 0, "diraddpl",
-	    &pool_allocator_nointr, IPL_NONE);
-	pool_init(&mkdir_pool, sizeof(struct mkdir), 0, 0, 0, "mkdirpl",
-	    &pool_allocator_nointr, IPL_NONE);
-	pool_init(&dirrem_pool, sizeof(struct dirrem), 0, 0, 0, "dirrempl",
-	    &pool_allocator_nointr, IPL_NONE);
-	pool_init(&newdirblk_pool, sizeof (struct newdirblk), 0, 0, 0,
-	    "newdirblkpl", &pool_allocator_nointr, IPL_NONE);
+	i = sizeof(struct freeblks);
+	if (i < sizeof(struct buf))
+		i = sizeof(struct buf);
+	softdep_large_cache = pool_cache_init(i, 0, 0, 0, "sdeplarge", NULL,
+	    IPL_NONE, NULL, NULL, NULL);
+	KASSERT(softdep_large_cache != NULL);	/* XXX */
+
+	i =  sizeof(struct allocdirect);
+	if (i < sizeof(struct allocindir))
+		i = sizeof(struct allocindir);
+	if (i < sizeof(struct pagedep))
+		i = sizeof(struct pagedep);
+	if (i < sizeof(struct inodedep))
+		i = sizeof(struct inodedep);
+	softdep_medium_cache = pool_cache_init(i, 0, 0, 0, "sdepmedium", NULL,
+	    IPL_NONE, NULL, NULL, NULL);
+	KASSERT(softdep_medium_cache != NULL);	/* XXX */
+
+	i = sizeof(struct newblk);
+	if (i < sizeof(struct bmsafemap))
+		i = sizeof(struct bmsafemap);
+	if (i < sizeof(struct indirdep))
+		i = sizeof(struct indirdep);
+	if (i < sizeof(struct freefrag))
+		i = sizeof(struct freefrag);
+	if (i < sizeof(struct freefile))
+		i = sizeof(struct freefile);
+	if (i < sizeof(struct diradd))
+		i = sizeof(struct diradd);
+	if (i < sizeof(struct mkdir))
+		i = sizeof(struct mkdir);
+	if (i < sizeof(struct dirrem))
+		i = sizeof(struct dirrem);
+	if (i <  sizeof(struct newdirblk))
+		i =  sizeof(struct newdirblk);
+	softdep_small_cache = pool_cache_init(i, 0, 0, 0, "sdepsmall", NULL,
+	    IPL_NONE, NULL, NULL, NULL);
+	KASSERT(softdep_small_cache != NULL);	/* XXX */
 
 	LIST_INIT(&mkdirlisthd);
 	LIST_INIT(&softdep_workitem_pending);
 	max_softdeps = desiredvnodes / 4;
 	pagedep_hashtbl = hashinit(max_softdeps / 2, HASH_LIST, M_PAGEDEP,
 	    M_WAITOK, &pagedep_hash);
-	sema_init(&pagedep_in_progress, "pagedep", PRIBIO, 0);
+	sema_init(&pagedep_in_progress, "pagedep", 0);
 	inodedep_hashtbl = hashinit(max_softdeps / 2, HASH_LIST, M_INODEDEP,
 	    M_WAITOK, &inodedep_hash);
-	sema_init(&inodedep_in_progress, "inodedep", PRIBIO, 0);
+	sema_init(&inodedep_in_progress, "inodedep", 0);
 	newblk_hashtbl = hashinit(64, HASH_LIST, M_NEWBLK, M_WAITOK,
 	    &newblk_hash);
-	sema_init(&newblk_in_progress, "newblk", PRIBIO, 0);
+	sema_init(&newblk_in_progress, "newblk", 0);
 	for (i = 0; i < PCBPHASHSIZE; i++) {
 		LIST_INIT(&pcbphashhead[i]);
 	}
@@ -1259,7 +1136,7 @@ softdep_reinitialize()
 
 	max_softdeps = desiredvnodes * 4;
 
-	ACQUIRE_LOCK(&lk);
+	mutex_enter(&bufcache_lock);
 	oldhash1 = pagedep_hashtbl;
 	oldmask1 = pagedep_hash;
 	pagedep_hashtbl = hash1;
@@ -1283,7 +1160,7 @@ softdep_reinitialize()
 			LIST_INSERT_HEAD(&hash2[val], inodedep, id_hash);
 		}
 	}
-	FREE_LOCK(&lk);
+	mutex_exit(&bufcache_lock);
 	hashdone(oldhash1, M_PAGEDEP);
 	hashdone(oldhash2, M_INODEDEP);
 }
@@ -1312,7 +1189,8 @@ softdep_mount(devvp, mp, fs, cred)
 	int needswap = UFS_FSNEEDSWAP(fs);
 #endif
 
-	mp->mnt_flag &= ~MNT_ASYNC;
+	mp->mnt_flag &= ~MNT_ASYNC;	/* XXXSMP */
+
 	/*
 	 * When doing soft updates, the counters in the
 	 * superblock may have gotten out of sync, so we have
@@ -1321,6 +1199,7 @@ softdep_mount(devvp, mp, fs, cred)
 	if ((fs->fs_clean & FS_ISCLEAN) ||
 	    (fs->fs_fmod != 0 && (fs->fs_clean & FS_WASCLEAN)))
 		return (0);
+
 	bzero(&cstotal, sizeof cstotal);
 	for (cyl = 0; cyl < fs->fs_ncg; cyl++) {
 		if ((error = bread(devvp, fsbtodb(fs, cgtod(fs, cyl)),
@@ -1344,6 +1223,12 @@ softdep_mount(devvp, mp, fs, cred)
 	return (0);
 }
 
+void
+softdep_unmount(struct mount *mp)
+{
+
+}
+
 /*
  * Protecting the freemaps (or bitmaps).
  *
@@ -1394,14 +1279,14 @@ softdep_setup_inomapdep(bp, ip, newinum)
 	 * Otherwise add it to the dependency list for the buffer holding
 	 * the cylinder group map from which it was allocated.
 	 */
-	ACQUIRE_LOCK(&lk);
+	mutex_enter(&bufcache_lock);
 	if (inodedep_lookup(ip->i_fs, newinum, DEPALLOC, &inodedep) != 0)
 		panic("softdep_setup_inomapdep: found inode");
 	inodedep->id_buf = bp;
 	inodedep->id_state &= ~DEPCOMPLETE;
 	bmsafemap = bmsafemap_lookup(bp);
 	LIST_INSERT_HEAD(&bmsafemap->sm_inodedephd, inodedep, id_deps);
-	FREE_LOCK(&lk);
+	mutex_exit(&bufcache_lock);
 }
 
 /*
@@ -1422,19 +1307,18 @@ softdep_setup_blkmapdep(bp, fs, newblkno)
 	 * Add it to the dependency list for the buffer holding
 	 * the cylinder group map from which it was allocated.
 	 */
+	mutex_enter(&bufcache_lock);
 	if (newblk_lookup(fs, newblkno, DEPALLOC, &newblk) != 0)
 		panic("softdep_setup_blkmapdep: found block");
-	ACQUIRE_LOCK(&lk);
 	newblk->nb_bmsafemap = bmsafemap = bmsafemap_lookup(bp);
 	LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps);
-	FREE_LOCK(&lk);
+	mutex_exit(&bufcache_lock);
 }
 
 /*
  * Find the bmsafemap associated with a cylinder group buffer.
  * If none exists, create one. The buffer must be locked when
- * this routine is called and this routine must be called with
- * splbio interrupts blocked.
+ * this routine is called.
  */
 static struct bmsafemap *
 bmsafemap_lookup(bp)
@@ -1443,15 +1327,13 @@ bmsafemap_lookup(bp)
 	struct bmsafemap *bmsafemap;
 	struct worklist *wk;
 
-#ifdef DEBUG
-	if (lk.lkt_held == NULL)
-		panic("bmsafemap_lookup: lock not held");
-#endif
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	for (wk = LIST_FIRST(&bp->b_dep); wk; wk = LIST_NEXT(wk, wk_list))
 		if (wk->wk_type == D_BMSAFEMAP)
 			return (WK_BMSAFEMAP(wk));
-	FREE_LOCK(&lk);
-	bmsafemap = pool_get(&bmsafemap_pool, PR_WAITOK);
+	mutex_exit(&bufcache_lock);
+	bmsafemap = softdep_alloc(D_BMSAFEMAP);
 	bmsafemap->sm_list.wk_type = D_BMSAFEMAP;
 	bmsafemap->sm_list.wk_state = 0;
 	bmsafemap->sm_buf = bp;
@@ -1459,8 +1341,8 @@ bmsafemap_lookup(bp)
 	LIST_INIT(&bmsafemap->sm_allocindirhd);
 	LIST_INIT(&bmsafemap->sm_inodedephd);
 	LIST_INIT(&bmsafemap->sm_newblkhd);
-	ACQUIRE_LOCK(&lk);
-	WORKLIST_INSERT(&bp->b_dep, &bmsafemap->sm_list);
+	mutex_enter(&bufcache_lock);
+	worklist_insert(&bp->b_dep, &bmsafemap->sm_list);
 	return (bmsafemap);
 }
 
@@ -1511,7 +1393,7 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
 	struct newblk *newblk;
 	UVMHIST_FUNC("softdep_setup_allocdirect"); UVMHIST_CALLED(ubchist);
 
-	adp = pool_get(&allocdirect_pool, PR_WAITOK);
+	adp = softdep_alloc(D_ALLOCDIRECT);
 	bzero(adp, sizeof(struct allocdirect));
 	adp->ad_list.wk_type = D_ALLOCDIRECT;
 	adp->ad_lbn = lbn;
@@ -1525,6 +1407,7 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
 	else
 		adp->ad_freefrag = newfreefrag(ip, oldblkno, oldsize);
 
+	mutex_enter(&bufcache_lock);
 	if (newblk_lookup(ip->i_fs, newblkno, 0, &newblk) == 0)
 		panic("softdep_setup_allocdirect: lost block");
 
@@ -1542,7 +1425,6 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
 		UVMHIST_LOG(ubchist, "bp = %p, size = %ld -> %ld",
 		    bp, oldsize, newsize, 0);
 	}
-	ACQUIRE_LOCK(&lk);
 	(void) inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC, &inodedep);
 	adp->ad_inodedep = inodedep;
 
@@ -1556,8 +1438,8 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
 		LIST_INSERT_HEAD(&bmsafemap->sm_allocdirecthd, adp, ad_deps);
 	}
 	LIST_REMOVE(newblk, nb_hash);
-	pool_put(&newblk_pool, newblk);
-	WORKLIST_INSERT(&bp->b_dep, &adp->ad_list);
+	pool_cache_put(softdep_small_cache, newblk);
+	worklist_insert(&bp->b_dep, &adp->ad_list);
 	if (lbn >= NDADDR) {
 		/* allocating an indirect block */
 		if (oldblkno != 0)
@@ -1572,7 +1454,7 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
 		 */
 		if ((ip->i_mode & IFMT) == IFDIR &&
 		    pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0)
-			WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);
+			worklist_insert(&bp->b_dep, &pagedep->pd_list);
 	}
 	/*
 	 * The list of allocdirects must be kept in sorted and ascending
@@ -1593,7 +1475,7 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
 		TAILQ_INSERT_TAIL(adphead, adp, ad_next);
 		if (oldadp != NULL && oldadp->ad_lbn == lbn)
 			allocdirect_merge(adphead, adp, oldadp);
-		FREE_LOCK(&lk);
+		mutex_exit(&bufcache_lock);
 		return;
 	}
 	for (oldadp = TAILQ_FIRST(adphead); oldadp;
@@ -1607,12 +1489,11 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
 	TAILQ_INSERT_BEFORE(oldadp, adp, ad_next);
 	if (oldadp->ad_lbn == lbn)
 		allocdirect_merge(adphead, adp, oldadp);
-	FREE_LOCK(&lk);
+	mutex_exit(&bufcache_lock);
 }
 
 /*
  * Replace an old allocdirect dependency with a newer one.
- * This routine must be called with splbio interrupts blocked.
  */
 static void
 allocdirect_merge(adphead, newadp, oldadp)
@@ -1624,10 +1505,8 @@ allocdirect_merge(adphead, newadp, oldadp)
 	struct freefrag *freefrag;
 	struct newdirblk *newdirblk;
 
-#ifdef DEBUG
-	if (lk.lkt_held == NULL)
-		panic("allocdirect_merge: lock not held");
-#endif
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	if (newadp->ad_oldblkno != oldadp->ad_newblkno ||
 	    newadp->ad_oldsize != oldadp->ad_newsize ||
 	    newadp->ad_lbn >= NDADDR)
@@ -1667,10 +1546,10 @@ allocdirect_merge(adphead, newadp, oldadp)
 	 */
 	if ((wk = LIST_FIRST(&oldadp->ad_newdirblk)) != NULL) {
 		newdirblk = WK_NEWDIRBLK(wk);
-		WORKLIST_REMOVE(&newdirblk->db_list);
+		worklist_remove(&newdirblk->db_list);
 		if (LIST_FIRST(&oldadp->ad_newdirblk) != NULL)
 			panic("allocdirect_merge: extra newdirblk");
-		WORKLIST_INSERT(&newadp->ad_newdirblk, &newdirblk->db_list);
+		worklist_insert(&newadp->ad_newdirblk, &newdirblk->db_list);
 	}
 	free_allocdirect(adphead, oldadp, 0);
 }
@@ -1692,7 +1571,7 @@ newfreefrag(ip, blkno, size)
 	fs = ip->i_fs;
 	if (fragnum(fs, blkno) + numfrags(fs, size) > fs->fs_frag)
 		panic("newfreefrag: frag size");
-	freefrag = pool_get(&freefrag_pool, PR_WAITOK);
+	freefrag = softdep_alloc(D_FREEFRAG);
 	freefrag->ff_list.wk_type = D_FREEFRAG;
 	freefrag->ff_state = ip->i_uid & ~ONWORKLIST; /* XXX - used below */
 	freefrag->ff_inum = ip->i_number;
@@ -1715,7 +1594,7 @@ handle_workitem_freefrag(freefrag)
 
 	ffs_blkfree(ump->um_fs, ump->um_devvp, freefrag->ff_blkno,
 	    freefrag->ff_fragsize, freefrag->ff_inum);
-	pool_put(&freefrag_pool, freefrag);
+	pool_cache_put(softdep_small_cache, freefrag);
 }
 
 /*
@@ -1755,7 +1634,7 @@ newallocindir(ip, ptrno, newblkno, oldblkno)
 {
 	struct allocindir *aip;
 
-	aip = pool_get(&allocindir_pool, PR_WAITOK);
+	aip = softdep_alloc(D_ALLOCINDIR);
 	bzero(aip, sizeof(struct allocindir));
 	aip->ai_list.wk_type = D_ALLOCINDIR;
 	aip->ai_state = ATTACHED;
@@ -1784,10 +1663,10 @@ softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp)
 	struct pagedep *pagedep;
 
 	aip = newallocindir(ip, ptrno, newblkno, oldblkno);
+	mutex_enter(&bufcache_lock);
 	if (nbp == NULL) {
 		nbp = softdep_setup_pagecache(ip, lbn, ip->i_fs->fs_bsize);
 	}
-	ACQUIRE_LOCK(&lk);
 
 	/*
 	 * If we are allocating a directory page, then we must
@@ -1796,10 +1675,10 @@ softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp)
 	 */
 	if ((ip->i_mode & IFMT) == IFDIR &&
 	    pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0)
-		WORKLIST_INSERT(&nbp->b_dep, &pagedep->pd_list);
-	WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list);
-	FREE_LOCK(&lk);
+		worklist_insert(&nbp->b_dep, &pagedep->pd_list);
+	worklist_insert(&nbp->b_dep, &aip->ai_list);
 	setup_allocindir_phase2(bp, ip, aip);
+	mutex_exit(&bufcache_lock);
 }
 
 /*
@@ -1817,10 +1696,10 @@ softdep_setup_allocindir_meta(nbp, ip, bp, ptrno, newblkno)
 	struct allocindir *aip;
 
 	aip = newallocindir(ip, ptrno, newblkno, 0);
-	ACQUIRE_LOCK(&lk);
-	WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list);
-	FREE_LOCK(&lk);
+	mutex_enter(&bufcache_lock);
+	worklist_insert(&nbp->b_dep, &aip->ai_list);
 	setup_allocindir_phase2(bp, ip, aip);
+	mutex_exit(&bufcache_lock);
 }
 
 /*
@@ -1840,10 +1719,11 @@ setup_allocindir_phase2(bp, ip, aip)
 	struct freefrag *freefrag;
 	struct newblk *newblk;
 
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	if (bp->b_lblkno >= 0)
 		panic("setup_allocindir_phase2: not indir blk");
 	for (indirdep = NULL, newindirdep = NULL; ; ) {
-		ACQUIRE_LOCK(&lk);
 		for (wk = LIST_FIRST(&bp->b_dep); wk;
 		     wk = LIST_NEXT(wk, wk_list)) {
 			if (wk->wk_type != D_INDIRDEP)
@@ -1853,15 +1733,13 @@ setup_allocindir_phase2(bp, ip, aip)
 		}
 		if (indirdep == NULL && newindirdep) {
 			indirdep = newindirdep;
-			WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list);
+			worklist_insert(&bp->b_dep, &indirdep->ir_list);
 			newindirdep = NULL;
 		}
-		FREE_LOCK(&lk);
 		if (indirdep) {
 			if (newblk_lookup(ip->i_fs, aip->ai_newblkno, 0,
 			    &newblk) == 0)
 				panic("setup_allocindir: lost block");
-			ACQUIRE_LOCK(&lk);
 			if (newblk->nb_state == DEPCOMPLETE) {
 				aip->ai_state |= DEPCOMPLETE;
 				aip->ai_buf = NULL;
@@ -1873,7 +1751,7 @@ setup_allocindir_phase2(bp, ip, aip)
 				    aip, ai_deps);
 			}
 			LIST_REMOVE(newblk, nb_hash);
-			pool_put(&newblk_pool, newblk);
+			pool_cache_put(softdep_small_cache, newblk);
 			aip->ai_indirdep = indirdep;
 			/*
 			 * Check to see if there is an existing dependency
@@ -1905,20 +1783,25 @@ setup_allocindir_phase2(bp, ip, aip)
 			else
 				((int64_t *)indirdep->ir_savebp->b_data)
 				    [aip->ai_offset] = aip->ai_oldblkno;
-			FREE_LOCK(&lk);
+			mutex_exit(&bufcache_lock);
 			if (freefrag != NULL)
 				handle_workitem_freefrag(freefrag);
-		}
+		} else
+			mutex_exit(&bufcache_lock);
 		if (newindirdep) {
 			if (indirdep->ir_savebp != NULL) {
-				brelse(newindirdep->ir_savebp, 0);
+				mutex_enter(&bufcache_lock);
+				brelsel(newindirdep->ir_savebp, 0);
 				softdep_trackbufs(-1, false);
+				mutex_exit(&bufcache_lock);
 			}
-			WORKITEM_FREE(newindirdep, D_INDIRDEP);
+			workitem_free(newindirdep, D_INDIRDEP);
 		}
-		if (indirdep)
+		if (indirdep) {
+			mutex_enter(&bufcache_lock);
 			break;
-		newindirdep = pool_get(&indirdep_pool, PR_WAITOK);
+		}
+		newindirdep = softdep_alloc(D_INDIRDEP);
 		newindirdep->ir_list.wk_type = D_INDIRDEP;
 		newindirdep->ir_state = ATTACHED;
 		if (ip->i_ump->um_fstype == UFS1)
@@ -1929,11 +1812,12 @@ setup_allocindir_phase2(bp, ip, aip)
 			VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno,
 				 NULL);
 		}
-		softdep_trackbufs(1, true);
 		newindirdep->ir_savebp =
 		    getblk(ip->i_devvp, bp->b_blkno, bp->b_bcount, 0, 0);
 		newindirdep->ir_savebp->b_flags |= B_ASYNC;
 		bcopy(bp->b_data, newindirdep->ir_savebp->b_data, bp->b_bcount);
+		mutex_enter(&bufcache_lock);
+		softdep_trackbufs(1, true);
 	}
 }
 
@@ -1978,6 +1862,7 @@ softdep_setup_freeblocks(
 	struct vnode *vp = ITOV(ip);
 	struct buf *bp;
 	struct fs *fs = ip->i_fs;
+	struct ufsmount *ump = ip->i_ump;
 	int i, error, delayx;
 #ifdef FFS_EI
 	const int needswap = UFS_FSNEEDSWAP(fs);
@@ -1985,7 +1870,7 @@ softdep_setup_freeblocks(
 
 	if (length != 0)
 		panic("softdep_setup_freeblocks: non-zero length");
-	freeblks = pool_get(&freeblks_pool, PR_WAITOK);
+	freeblks = softdep_alloc(D_FREEBLKS);
 	bzero(freeblks, sizeof(struct freeblks));
 	freeblks->fb_list.wk_type = D_FREEBLKS;
 	freeblks->fb_uid = ip->i_uid;
@@ -2025,8 +1910,12 @@ softdep_setup_freeblocks(
 	 * accounted for then (see softdep_filereleased()). If the
 	 * file is merely being truncated, then we account for it now.
 	 */
-	if ((ip->i_flag & IN_SPACECOUNTED) == 0)
+	if ((ip->i_flag & IN_SPACECOUNTED) == 0) {
+		mutex_enter(&ump->um_lock);
 		fs->fs_pendingblocks += freeblks->fb_chkcnt;
+		mutex_exit(&ump->um_lock);
+	}
+
 	/*
 	 * Push the zero'ed inode to to its disk buffer so that we are free
 	 * to delete its dependencies below. Once the dependencies are gone
@@ -2062,7 +1951,7 @@ softdep_setup_freeblocks(
 	/*
 	 * Find and eliminate any inode dependencies.
 	 */
-	ACQUIRE_LOCK(&lk);
+	mutex_enter(&bufcache_lock);
 	(void) inodedep_lookup(fs, ip->i_number, DEPALLOC, &inodedep);
 	if ((inodedep->id_state & IOSTARTED) != 0)
 		panic("softdep_setup_freeblocks: inode busy");
@@ -2075,7 +1964,7 @@ softdep_setup_freeblocks(
 	 */
 	delayx = (inodedep->id_state & DEPCOMPLETE);
 	if (delayx)
-		WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list);
+		worklist_insert(&inodedep->id_bufwait, &freeblks->fb_list);
 	/*
 	 * Because the file length has been truncated to zero, any
 	 * pending block allocation dependency structures associated
@@ -2094,7 +1983,7 @@ softdep_setup_freeblocks(
 	merge_inode_lists(inodedep);
 	while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0)
 		free_allocdirect(&inodedep->id_inoupdt, adp, delayx);
-	FREE_LOCK(&lk);
+	mutex_exit(&bufcache_lock);
 	bdwrite(bp);
 	/*
 	 * We must wait for any I/O in progress to finish so that
@@ -2102,20 +1991,18 @@ softdep_setup_freeblocks(
 	 * Once they are all there, walk the list and get rid of
 	 * any dependencies.
 	 */
-	ACQUIRE_LOCK(&lk);
-	drain_output(vp, 1);
+	drain_output(vp);
+	mutex_enter(&bufcache_lock);
 	while (getdirtybuf(&vp->v_dirtyblkhd.lh_first, MNT_WAIT)) {
 		bp = vp->v_dirtyblkhd.lh_first;
 		(void) inodedep_lookup(fs, ip->i_number, 0, &inodedep);
 		deallocate_dependencies(bp, inodedep);
-		FREE_LOCK(&lk);
-		brelse(bp, BC_INVAL | BC_NOCACHE);
-		ACQUIRE_LOCK(&lk);
+		brelsel(bp, BC_INVAL | BC_NOCACHE);
 	}
 	softdep_free_pagecache(ip);
 	if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) != 0)
 		(void) free_inodedep(inodedep);
-	FREE_LOCK(&lk);
+	mutex_exit(&bufcache_lock);
 	/*
 	 * If the inode has never been written to disk (delayx == 0),
 	 * then we can process the freeblks now that we have deleted
@@ -2174,8 +2061,8 @@ deallocate_dependencies(bp, inodedep)
 				panic("deallocate_dependencies: not indir");
 			bcopy(bp->b_data, indirdep->ir_savebp->b_data,
 			    bp->b_bcount);
-			WORKLIST_REMOVE(wk);
-			WORKLIST_INSERT(&indirdep->ir_savebp->b_dep, wk);
+			worklist_remove(wk);
+			worklist_insert(&indirdep->ir_savebp->b_dep, wk);
 			continue;
 
 		case D_PAGEDEP:
@@ -2205,7 +2092,7 @@ deallocate_dependencies(bp, inodedep)
 				     ALLCOMPLETE)
 					add_to_worklist(&dirrem->dm_list);
 				else
-					WORKLIST_INSERT(&inodedep->id_bufwait,
+					worklist_insert(&inodedep->id_bufwait,
 					    &dirrem->dm_list);
 			}
 			if ((pagedep->pd_state & NEWBLOCK) != 0) {
@@ -2215,17 +2102,17 @@ deallocate_dependencies(bp, inodedep)
 					      pagedep)
 						break;
 				if (wk != NULL) {
-					WORKLIST_REMOVE(wk);
+					worklist_remove(wk);
 					free_newdirblk(WK_NEWDIRBLK(wk));
 				} else {
-					FREE_LOCK(&lk);
+					mutex_exit(&bufcache_lock);
 					panic("deallocate_dependencies: "
 					      "lost pagedep");
 				}
 			}
-			WORKLIST_REMOVE(&pagedep->pd_list);
+			worklist_remove(&pagedep->pd_list);
 			LIST_REMOVE(pagedep, pd_hash);
-			WORKITEM_FREE(pagedep, D_PAGEDEP);
+			workitem_free(pagedep, D_PAGEDEP);
 			continue;
 
 		case D_ALLOCINDIR:
@@ -2248,7 +2135,6 @@ deallocate_dependencies(bp, inodedep)
 
 /*
  * Free an allocdirect. Generate a new freefrag work request if appropriate.
- * This routine must be called with splbio interrupts blocked.
  */
 static void
 free_allocdirect(adphead, adp, delayx)
@@ -2259,39 +2145,36 @@ free_allocdirect(adphead, adp, delayx)
 	struct newdirblk *newdirblk;
 	struct worklist *wk;
 
-#ifdef DEBUG
-	if (lk.lkt_held == NULL)
-		panic("free_allocdirect: lock not held");
-#endif
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	if ((adp->ad_state & DEPCOMPLETE) == 0)
 		LIST_REMOVE(adp, ad_deps);
 	TAILQ_REMOVE(adphead, adp, ad_next);
 	if ((adp->ad_state & COMPLETE) == 0)
-		WORKLIST_REMOVE(&adp->ad_list);
+		worklist_remove(&adp->ad_list);
 	if (adp->ad_freefrag != NULL) {
 		if (delayx)
-			WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait,
+			worklist_insert(&adp->ad_inodedep->id_bufwait,
 			    &adp->ad_freefrag->ff_list);
 		else
 			add_to_worklist(&adp->ad_freefrag->ff_list);
 	}
 	if ((wk = LIST_FIRST(&adp->ad_newdirblk)) != NULL) {
 		newdirblk = WK_NEWDIRBLK(wk);
-		WORKLIST_REMOVE(&newdirblk->db_list);
+		worklist_remove(&newdirblk->db_list);
 		if (LIST_FIRST(&adp->ad_newdirblk) != NULL)
 			panic("free_allocdirect: extra newdirblk");
 		if (delayx)
-			WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait,
+			worklist_insert(&adp->ad_inodedep->id_bufwait,
 			    &newdirblk->db_list);
 		else
 			free_newdirblk(newdirblk);
 	}
-	WORKITEM_FREE(adp, D_ALLOCDIRECT);
+	workitem_free(adp, D_ALLOCDIRECT);
 }
 
 /*
  * Free a newdirblk. Clear the NEWBLOCK flag on its associated pagedep.
- * This routine must be called with splbio interrupts blocked.
  */
 static void
 free_newdirblk(newdirblk)
@@ -2301,10 +2184,8 @@ free_newdirblk(newdirblk)
 	struct diradd *dap;
 	int i;
 
-#ifdef DEBUG
-	if (lk.lkt_held == NULL)
-		panic("free_newdirblk: lock not held");
-#endif
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	/*
 	 * If the pagedep is still linked onto the directory buffer
 	 * dependency chain, then some of the entries on the
@@ -2328,9 +2209,9 @@ free_newdirblk(newdirblk)
 			break;
 	if (i == DAHASHSZ && (pagedep->pd_state & ONWORKLIST) == 0) {
 		LIST_REMOVE(pagedep, pd_hash);
-		WORKITEM_FREE(pagedep, D_PAGEDEP);
+		workitem_free(pagedep, D_PAGEDEP);
 	}
-	WORKITEM_FREE(newdirblk, D_NEWDIRBLK);
+	workitem_free(newdirblk, D_NEWDIRBLK);
 }
 
 /*
@@ -2341,13 +2222,14 @@ void
 softdep_freefile(struct vnode *pvp, ino_t ino, int mode)
 {
 	struct inode *ip = VTOI(pvp);
+	struct ufsmount *ump = ip->i_ump;
 	struct inodedep *inodedep;
 	struct freefile *freefile;
 
 	/*
 	 * This sets up the inode de-allocation dependency.
 	 */
-	freefile = pool_get(&freefile_pool, PR_WAITOK);
+	freefile = softdep_alloc(D_FREEFILE);
 	freefile->fx_list.wk_type = D_FREEFILE;
 	freefile->fx_list.wk_state = 0;
 	freefile->fx_mode = mode;
@@ -2355,8 +2237,11 @@ softdep_freefile(struct vnode *pvp, ino_t ino, int mode)
 	freefile->fx_devvp = ip->i_devvp;
 	freefile->fx_fs = ip->i_fs;
 	freefile->fx_mnt = ITOV(ip)->v_mount;
-	if ((ip->i_flag & IN_SPACECOUNTED) == 0)
+	if ((ip->i_flag & IN_SPACECOUNTED) == 0) {
+		mutex_enter(&ump->um_lock);
 		ip->i_fs->fs_pendinginodes += 1;
+		mutex_enter(&ump->um_lock);
+	}
 
 	/*
 	 * If the inodedep does not exist, then the zero'ed inode has
@@ -2364,21 +2249,21 @@ softdep_freefile(struct vnode *pvp, ino_t ino, int mode)
 	 * written to disk, then the on-disk inode is zero'ed. In either
 	 * case we can free the file immediately.
 	 */
-	ACQUIRE_LOCK(&lk);
+	mutex_enter(&bufcache_lock);
 	if (inodedep_lookup(ip->i_fs, ino, 0, &inodedep) == 0 ||
 	    check_inode_unwritten(inodedep)) {
-		FREE_LOCK(&lk);
+		mutex_exit(&bufcache_lock);
 		handle_workitem_freefile(freefile);
 		return;
 	}
-	WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list);
-	FREE_LOCK(&lk);
+	worklist_insert(&inodedep->id_inowait, &freefile->fx_list);
+	mutex_exit(&bufcache_lock);
+	ip->i_flag |= IN_MODIFIED;
 }
 
 /*
  * Check to see if an inode has never been written to disk. If
  * so free the inodedep and return success, otherwise return failure.
- * This routine must be called with splbio interrupts blocked.
  *
  * If we still have a bitmap dependency, then the inode has never
  * been written to disk. Drop the dependency as it is no longer
@@ -2395,6 +2280,8 @@ check_inode_unwritten(inodedep)
 	struct inodedep *inodedep;
 {
 
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	if ((inodedep->id_state & DEPCOMPLETE) != 0 ||
 	    LIST_FIRST(&inodedep->id_pendinghd) != NULL ||
 	    LIST_FIRST(&inodedep->id_bufwait) != NULL ||
@@ -2407,7 +2294,7 @@ check_inode_unwritten(inodedep)
 	LIST_REMOVE(inodedep, id_deps);
 	inodedep->id_buf = NULL;
 	if (inodedep->id_state & ONWORKLIST)
-		WORKLIST_REMOVE(&inodedep->id_list);
+		worklist_remove(&inodedep->id_list);
 	if (inodedep->id_savedino1 != NULL) {
 		inodedep_freedino(inodedep);
 	}
@@ -2424,6 +2311,8 @@ free_inodedep(inodedep)
 	struct inodedep *inodedep;
 {
 
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	if ((inodedep->id_state & ONWORKLIST) != 0 ||
 	    (inodedep->id_state & ALLCOMPLETE) != ALLCOMPLETE ||
 	    LIST_FIRST(&inodedep->id_pendinghd) != NULL ||
@@ -2434,7 +2323,7 @@ free_inodedep(inodedep)
 	    inodedep->id_nlinkdelta != 0 || inodedep->id_savedino1 != NULL)
 		return (0);
 	LIST_REMOVE(inodedep, id_hash);
-	WORKITEM_FREE(inodedep, D_INODEDEP);
+	workitem_free(inodedep, D_INODEDEP);
 	num_inodedep -= 1;
 	return (1);
 }
@@ -2458,9 +2347,11 @@ handle_workitem_freeblocks(freeblks)
 	int64_t blocksreleased = 0;
 	int error, allerror = 0;
 	daddr_t baselbns[NIADDR], tmpval;
+	struct ufsmount *ump;
 
-	devvp = freeblks->fb_ump->um_devvp;
-	fs = freeblks->fb_ump->um_fs;
+	ump = freeblks->fb_ump;
+	devvp = ump->um_devvp;
+	fs = ump->um_fs;
 	tmpval = 1;
 	baselbns[0] = NDADDR;
 	for (i = 1; i < NIADDR; i++) {
@@ -2492,7 +2383,9 @@ handle_workitem_freeblocks(freeblks)
 			continue;
 		bsize = sblksize(fs, freeblks->fb_oldsize, i);
 		ffs_blkfree(fs, devvp, bn, bsize, freeblks->fb_previousinum);
+		mutex_enter(&ump->um_lock);
 		fs->fs_pendingblocks -= btodb(bsize);
+		mutex_exit(&ump->um_lock);
 		blocksreleased += btodb(bsize);
 	}
 
@@ -2502,7 +2395,7 @@ handle_workitem_freeblocks(freeblks)
 	if (allerror)
 		softdep_error("handle_workitem_freeblks", allerror);
 #endif /* DIAGNOSTIC */
-	WORKITEM_FREE(freeblks, D_FREEBLKS);
+	workitem_free(freeblks, D_FREEBLKS);
 }
 
 /*
@@ -2524,13 +2417,14 @@ indir_trunc(freeblks, dbn, level, lbn, countp)
 	int32_t *bap1 = NULL;
 	int64_t *bap2 = NULL;
 	daddr_t nb;
-	struct fs *fs = freeblks->fb_ump->um_fs;
+	struct ufsmount *ump = freeblks->fb_ump;
+	struct fs *fs = ump->um_fs;
 	struct worklist *wk;
 	struct indirdep *indirdep;
 	daddr_t lbnadd;
 	int i, nblocks, ufs1fmt;
 	int error, allerror = 0;
-	struct vnode *devvp = freeblks->fb_ump->um_devvp;
+	struct vnode *devvp = ump->um_devvp;
 #ifdef FFS_EI
 	const int needswap = UFS_FSNEEDSWAP(fs);
 #endif
@@ -2550,21 +2444,21 @@ indir_trunc(freeblks, dbn, level, lbn, countp)
 	 * a complete copy of the indirect block in memory for our use.
 	 * Otherwise we have to read the blocks in from the disk.
 	 */
-	ACQUIRE_LOCK(&lk);
+	mutex_enter(&bufcache_lock);
 	if ((bp = incore(devvp, dbn)) != NULL &&
 	    (wk = LIST_FIRST(&bp->b_dep)) != NULL) {
 		if (wk->wk_type != D_INDIRDEP ||
 		    (indirdep = WK_INDIRDEP(wk))->ir_savebp != bp ||
 		    (indirdep->ir_state & GOINGAWAY) == 0)
 			panic("indir_trunc: lost indirdep");
-		WORKLIST_REMOVE(wk);
-		WORKITEM_FREE(indirdep, D_INDIRDEP);
+		worklist_remove(wk);
+		mutex_exit(&bufcache_lock);
+		workitem_free(indirdep, D_INDIRDEP);
 		if (LIST_FIRST(&bp->b_dep) != NULL)
 			panic("indir_trunc: dangling dep");
-		FREE_LOCK(&lk);
 	} else {
-		FREE_LOCK(&lk);
 		softdep_trackbufs(1, false);
+		mutex_exit(&bufcache_lock);
 		error = bread(devvp, dbn, (int)fs->fs_bsize, NOCRED, &bp);
 		if (error)
 			return (error);
@@ -2596,17 +2490,20 @@ indir_trunc(freeblks, dbn, level, lbn, countp)
 		}
 		ffs_blkfree(fs, devvp, nb, fs->fs_bsize,
 		    freeblks->fb_previousinum);
+		mutex_enter(&ump->um_lock);
 		fs->fs_pendingblocks -= nblocks;
+		mutex_exit(&ump->um_lock);
 		*countp += nblocks;
 	}
-	brelse(bp, BC_INVAL | BC_NOCACHE);
+	mutex_enter(&bufcache_lock);
+	brelsel(bp, BC_INVAL | BC_NOCACHE);
 	softdep_trackbufs(-1, false);
+	mutex_exit(&bufcache_lock);
 	return (allerror);
 }
 
 /*
  * Free an allocindir.
- * This routine must be called with splbio interrupts blocked.
  */
 static void
 free_allocindir(aip, inodedep)
@@ -2615,23 +2512,21 @@ free_allocindir(aip, inodedep)
 {
 	struct freefrag *freefrag;
 
-#ifdef DEBUG
-	if (lk.lkt_held == NULL)
-		panic("free_allocindir: lock not held");
-#endif
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	if ((aip->ai_state & DEPCOMPLETE) == 0)
 		LIST_REMOVE(aip, ai_deps);
 	if (aip->ai_state & ONWORKLIST)
-		WORKLIST_REMOVE(&aip->ai_list);
+		worklist_remove(&aip->ai_list);
 	LIST_REMOVE(aip, ai_next);
 	if ((freefrag = aip->ai_freefrag) != NULL) {
 		if (inodedep == NULL)
 			add_to_worklist(&freefrag->ff_list);
 		else
-			WORKLIST_INSERT(&inodedep->id_bufwait,
+			worklist_insert(&inodedep->id_bufwait,
 			    &freefrag->ff_list);
 	}
-	WORKITEM_FREE(aip, D_ALLOCINDIR);
+	workitem_free(aip, D_ALLOCINDIR);
 }
 
 /*
@@ -2688,27 +2583,27 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk)
 	fs = dp->i_fs;
 	lbn = lblkno(fs, diroffset);
 	offset = blkoff(fs, diroffset);
-	dap = pool_get(&diradd_pool, PR_WAITOK);
+	dap = softdep_alloc(D_DIRADD);
 	bzero(dap, sizeof(struct diradd));
 	dap->da_list.wk_type = D_DIRADD;
 	dap->da_offset = offset;
 	dap->da_newinum = newinum;
 	dap->da_state = ATTACHED;
 	if (isnewblk && lbn < NDADDR && fragoff(fs, diroffset) == 0) {
-		newdirblk = pool_get(&newdirblk_pool, PR_WAITOK);
+		newdirblk = softdep_alloc(D_NEWDIRBLK);
 		newdirblk->db_list.wk_type = D_NEWDIRBLK;
 		newdirblk->db_state = 0;
 	}
 	if (newdirbp == NULL) {
 		dap->da_state |= DEPCOMPLETE;
-		ACQUIRE_LOCK(&lk);
+		mutex_enter(&bufcache_lock);
 	} else {
 		dap->da_state |= MKDIR_BODY | MKDIR_PARENT;
-		mkdir1 = pool_get(&mkdir_pool, PR_WAITOK);
+		mkdir1 = softdep_alloc(D_MKDIR);
 		mkdir1->md_list.wk_type = D_MKDIR;
 		mkdir1->md_state = MKDIR_BODY;
 		mkdir1->md_diradd = dap;
-		mkdir2 = pool_get(&mkdir_pool, PR_WAITOK);
+		mkdir2 = softdep_alloc(D_MKDIR);
 		mkdir2->md_list.wk_type = D_MKDIR;
 		mkdir2->md_state = MKDIR_PARENT;
 		mkdir2->md_diradd = dap;
@@ -2716,29 +2611,29 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk)
 		 * Dependency on "." and ".." being written to disk.
 		 */
 		mkdir1->md_buf = newdirbp;
-		ACQUIRE_LOCK(&lk);
+		mutex_enter(&bufcache_lock);
 		LIST_INSERT_HEAD(&mkdirlisthd, mkdir1, md_mkdirs);
-		WORKLIST_INSERT(&newdirbp->b_dep, &mkdir1->md_list);
-		FREE_LOCK(&lk);
+		worklist_insert(&newdirbp->b_dep, &mkdir1->md_list);
+		mutex_exit(&bufcache_lock);
 		bdwrite(newdirbp);
 		/*
 		 * Dependency on link count increase for parent directory
 		 */
-		ACQUIRE_LOCK(&lk);
+		mutex_enter(&bufcache_lock);
 		if (inodedep_lookup(fs, dp->i_number, 0, &inodedep) == 0
 		    || (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
 			dap->da_state &= ~MKDIR_PARENT;
-			WORKITEM_FREE(mkdir2, D_MKDIR);
+			workitem_free(mkdir2, D_MKDIR);
 		} else {
 			LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs);
-			WORKLIST_INSERT(&inodedep->id_bufwait,&mkdir2->md_list);
+			worklist_insert(&inodedep->id_bufwait,&mkdir2->md_list);
 		}
 	}
 	/*
 	 * Link into parent directory pagedep to await its being written.
 	 */
 	if (pagedep_lookup(dp, lbn, DEPALLOC, &pagedep) == 0)
-		WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);
+		worklist_insert(&bp->b_dep, &pagedep->pd_list);
 	dap->da_pagedep = pagedep;
 	LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], dap,
 	    da_pdlist);
@@ -2751,7 +2646,7 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk)
 	if ((inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE)
 		diradd_inode_written(dap, inodedep);
 	else
-		WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list);
+		worklist_insert(&inodedep->id_bufwait, &dap->da_list);
 	if (isnewblk) {
 		/*
 		 * Directories growing into indirect blocks are rare
@@ -2761,7 +2656,7 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk)
 		 * new directory entry to disk.
 		 */
 		if (lbn >= NDADDR) {
-			FREE_LOCK(&lk);
+			mutex_exit(&bufcache_lock);
 			/*
 			 * We only have a new allocation when at the
 			 * beginning of a new block, not when we are
@@ -2778,12 +2673,12 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk)
 		 * are already tracking this block.
 		 */
 		if (fragoff(fs, diroffset) != 0) {
-			FREE_LOCK(&lk);
+			mutex_exit(&bufcache_lock);
 			return (0);
 		}
 		if ((pagedep->pd_state & NEWBLOCK) != 0) {
-			WORKITEM_FREE(newdirblk, D_NEWDIRBLK);
-			FREE_LOCK(&lk);
+			mutex_exit(&bufcache_lock);
+			workitem_free(newdirblk, D_NEWDIRBLK);
 			return (0);
 		}
 		/*
@@ -2793,14 +2688,14 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk)
 			panic("softdep_setup_directory_add: lost inodedep");
 		adp = TAILQ_LAST(&inodedep->id_newinoupdt, allocdirectlst);
 		if (adp == NULL || adp->ad_lbn != lbn) {
-			FREE_LOCK(&lk);
+			mutex_exit(&bufcache_lock);
 			panic("softdep_setup_directory_add: lost entry");
 		}
 		pagedep->pd_state |= NEWBLOCK;
 		newdirblk->db_pagedep = pagedep;
-		WORKLIST_INSERT(&adp->ad_newdirblk, &newdirblk->db_list);
+		worklist_insert(&adp->ad_newdirblk, &newdirblk->db_list);
 	}
-	FREE_LOCK(&lk);
+	mutex_exit(&bufcache_lock);
 	return (0);
 }
 
@@ -2824,7 +2719,7 @@ softdep_change_directoryentry_offset(dp, base, oldloc, newloc, entrysize)
 	struct diradd *dap;
 	daddr_t lbn;
 
-	ACQUIRE_LOCK(&lk);
+	mutex_enter(&bufcache_lock);
 	lbn = lblkno(dp->i_fs, dp->i_offset);
 	offset = blkoff(dp->i_fs, dp->i_offset);
 	if (pagedep_lookup(dp, lbn, 0, &pagedep) == 0)
@@ -2854,12 +2749,11 @@ softdep_change_directoryentry_offset(dp, base, oldloc, newloc, entrysize)
 	}
 done:
 	bcopy(oldloc, newloc, entrysize);
-	FREE_LOCK(&lk);
+	mutex_exit(&bufcache_lock);
 }
 
 /*
- * Free a diradd dependency structure. This routine must be called
- * with splbio interrupts blocked.
+ * Free a diradd dependency structure.
  */
 static void
 free_diradd(dap)
@@ -2870,11 +2764,9 @@ free_diradd(dap)
 	struct inodedep *inodedep;
 	struct mkdir *mkdir, *nextmd;
 
-#ifdef DEBUG
-	if (lk.lkt_held == NULL)
-		panic("free_diradd: lock not held");
-#endif
-	WORKLIST_REMOVE(&dap->da_list);
+	KASSERT(mutex_owned(&bufcache_lock));
+
+	worklist_remove(&dap->da_list);
 	LIST_REMOVE(dap, da_pdlist);
 	if ((dap->da_state & DIRCHG) == 0) {
 		pagedep = dap->da_pagedep;
@@ -2893,14 +2785,14 @@ free_diradd(dap)
 			if (mkdir->md_diradd != dap)
 				continue;
 			dap->da_state &= ~mkdir->md_state;
-			WORKLIST_REMOVE(&mkdir->md_list);
+			worklist_remove(&mkdir->md_list);
 			LIST_REMOVE(mkdir, md_mkdirs);
-			WORKITEM_FREE(mkdir, D_MKDIR);
+			workitem_free(mkdir, D_MKDIR);
 		}
 		if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0)
 			panic("free_diradd: unfound ref");
 	}
-	WORKITEM_FREE(dap, D_DIRADD);
+	workitem_free(dap, D_DIRADD);
 }
 
 /*
@@ -2948,7 +2840,7 @@ softdep_setup_remove(bp, dp, ip, isrmdir)
 	if ((dirrem->dm_state & COMPLETE) == 0) {
 		LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, dirrem,
 		    dm_next);
-		FREE_LOCK(&lk);
+		mutex_exit(&bufcache_lock);
 	} else {
 		u_int ipflag, dpflag;
 		struct vnode *vp = ITOV(ip);
@@ -2958,7 +2850,7 @@ softdep_setup_remove(bp, dp, ip, isrmdir)
 			LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd,
 			    prevdirrem, dm_next);
 		dirrem->dm_dirinum = dirrem->dm_pagedep->pd_ino;
-		FREE_LOCK(&lk);
+		mutex_exit(&bufcache_lock);
 		ipflag = vn_setrecurse(vp);
 		dpflag = vn_setrecurse(dvp);
 		handle_workitem_remove(dirrem);
@@ -2991,16 +2883,8 @@ newdirrem(bp, dp, ip, isrmdir, prevdirremp)
 	 */
 	if (ip == NULL)
 		panic("newdirrem: whiteout");
-	/*
-	 * If we are over our limit, try to improve the situation.
-	 * Limiting the number of dirrem structures will also limit
-	 * the number of freefile and freeblks structures.
-	 */
-	if (num_dirrem > max_softdeps / 2 && speedup_syncer() == 0)
-		(void) request_cleanup(FLUSH_REMOVE, 0);
 
-	num_dirrem += 1;
-	dirrem = pool_get(&dirrem_pool, PR_WAITOK);
+	dirrem = softdep_alloc(D_DIRREM);
 	bzero(dirrem, sizeof(struct dirrem));
 	dirrem->dm_list.wk_type = D_DIRREM;
 	dirrem->dm_state = isrmdir ? RMDIR : 0;
@@ -3008,11 +2892,20 @@ newdirrem(bp, dp, ip, isrmdir, prevdirremp)
 	dirrem->dm_oldinum = ip->i_number;
 	*prevdirremp = NULL;
 
-	ACQUIRE_LOCK(&lk);
+	/*
+	 * If we are over our limit, try to improve the situation.
+	 * Limiting the number of dirrem structures will also limit
+	 * the number of freefile and freeblks structures.
+	 */
+	mutex_enter(&bufcache_lock);
+	num_dirrem += 1;
+	if (num_dirrem > max_softdeps / 2 && speedup_syncer() == 0) {
+		(void) request_cleanup(FLUSH_REMOVE);
+	}
 	lbn = lblkno(dp->i_fs, dp->i_offset);
 	offset = blkoff(dp->i_fs, dp->i_offset);
 	if (pagedep_lookup(dp, lbn, DEPALLOC, &pagedep) == 0)
-		WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);
+		worklist_insert(&bp->b_dep, &pagedep->pd_list);
 	dirrem->dm_pagedep = pagedep;
 	/*
 	 * Check for a diradd dependency for the same directory entry.
@@ -3097,7 +2990,7 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
 	 * Whiteouts do not need diradd dependencies.
 	 */
 	if (newinum != WINO) {
-		dap = pool_get(&diradd_pool, PR_WAITOK);
+		dap = softdep_alloc(D_DIRADD);
 		bzero(dap, sizeof(struct diradd));
 		dap->da_list.wk_type = D_DIRADD;
 		dap->da_state = DIRCHG | ATTACHED | DEPCOMPLETE;
@@ -3137,7 +3030,7 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
 			dirrem->dm_dirinum = pagedep->pd_ino;
 			add_to_worklist(&dirrem->dm_list);
 		}
-		FREE_LOCK(&lk);
+		mutex_exit(&bufcache_lock);
 		return;
 	}
 
@@ -3175,13 +3068,13 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
 	    (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
 		dap->da_state |= COMPLETE;
 		LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
-		WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list);
+		worklist_insert(&inodedep->id_pendinghd, &dap->da_list);
 	} else {
 		LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)],
 		    dap, da_pdlist);
-		WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list);
+		worklist_insert(&inodedep->id_bufwait, &dap->da_list);
 	}
-	FREE_LOCK(&lk);
+	mutex_exit(&bufcache_lock);
 }
 
 /*
@@ -3196,12 +3089,12 @@ softdep_change_linkcnt(ip)
 {
 	struct inodedep *inodedep;
 
-	ACQUIRE_LOCK(&lk);
+	mutex_enter(&bufcache_lock);
 	(void) inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC, &inodedep);
 	if (ip->i_nlink < ip->i_ffs_effnlink)
 		panic("softdep_change_linkcnt: bad delta");
 	inodedep->id_nlinkdelta = ip->i_nlink - ip->i_ffs_effnlink;
-	FREE_LOCK(&lk);
+	mutex_exit(&bufcache_lock);
 }
 
 /*
@@ -3216,6 +3109,7 @@ softdep_releasefile(ip)
 	struct inode *ip;	/* inode with the zero effective link count */
 {
 	struct inodedep *inodedep;
+	struct ufsmount *ump;
 
 	if (ip->i_ffs_effnlink > 0)
 		panic("softdep_filerelease: file still referenced");
@@ -3236,12 +3130,16 @@ softdep_releasefile(ip)
 	 * If we are tracking an nlinkdelta, we have to also remember
 	 * whether we accounted for the freed space yet.
 	 */
-	ACQUIRE_LOCK(&lk);
+	mutex_enter(&bufcache_lock);
 	if ((inodedep_lookup(ip->i_fs, ip->i_number, 0, &inodedep)))
 		inodedep->id_state |= SPACECOUNTED;
-	FREE_LOCK(&lk);
+	mutex_exit(&bufcache_lock);
+
+	ump = ip->i_ump;
+	mutex_enter(&ump->um_lock);
 	ip->i_fs->fs_pendingblocks += DIP(ip, blocks);
 	ip->i_fs->fs_pendinginodes += 1;
+	mutex_exit(&ump->um_lock);
 	ip->i_flag |= IN_SPACECOUNTED;
 }
 
@@ -3265,7 +3163,7 @@ handle_workitem_remove(dirrem)
 		return;
 	}
 	ip = VTOI(vp);
-	ACQUIRE_LOCK(&lk);
+	mutex_enter(&bufcache_lock);
 	if ((inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, 0, &inodedep)) == 0)
 		panic("handle_workitem_remove: lost inodedep");
 	/*
@@ -3278,10 +3176,10 @@ handle_workitem_remove(dirrem)
 		if (ip->i_nlink < ip->i_ffs_effnlink)
 			panic("handle_workitem_remove: bad file delta");
 		inodedep->id_nlinkdelta = ip->i_nlink - ip->i_ffs_effnlink;
-		FREE_LOCK(&lk);
-		vput(vp);
 		num_dirrem -= 1;
-		WORKITEM_FREE(dirrem, D_DIRREM);
+		workitem_free(dirrem, D_DIRREM);
+		mutex_exit(&bufcache_lock);
+		vput(vp);
 		return;
 	}
 	/*
@@ -3297,7 +3195,7 @@ handle_workitem_remove(dirrem)
 	if (ip->i_nlink < ip->i_ffs_effnlink)
 		panic("handle_workitem_remove: bad dir delta");
 	inodedep->id_nlinkdelta = ip->i_nlink - ip->i_ffs_effnlink;
-	FREE_LOCK(&lk);
+	mutex_exit(&bufcache_lock);
 	if ((error = ffs_truncate(vp, (off_t)0, 0, l->l_cred)) != 0)
 		softdep_error("handle_workitem_remove: truncate", error);
 	/*
@@ -3307,8 +3205,10 @@ handle_workitem_remove(dirrem)
 	 */
 	if (dirrem->dm_state & DIRCHG) {
 		vput(vp);
+		mutex_enter(&bufcache_lock);
 		num_dirrem -= 1;
-		WORKITEM_FREE(dirrem, D_DIRREM);
+		mutex_exit(&bufcache_lock);
+		workitem_free(dirrem, D_DIRREM);
 		return;
 	}
 	/*
@@ -3317,19 +3217,19 @@ handle_workitem_remove(dirrem)
 	 * written to disk, then the on-disk inode is zero'ed. In either
 	 * case we can remove the file immediately.
 	 */
-	ACQUIRE_LOCK(&lk);
+	mutex_enter(&bufcache_lock);
 	dirrem->dm_state = 0;
 	oldinum = dirrem->dm_oldinum;
 	dirrem->dm_oldinum = dirrem->dm_dirinum;
 	if (inodedep_lookup(ip->i_fs, oldinum, 0, &inodedep) == 0 ||
 	    check_inode_unwritten(inodedep)) {
-		FREE_LOCK(&lk);
+		mutex_exit(&bufcache_lock);
 		vput(vp);
 		handle_workitem_remove(dirrem);
 		return;
 	}
-	WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list);
-	FREE_LOCK(&lk);
+	worklist_insert(&inodedep->id_inowait, &dirrem->dm_list);
+	mutex_exit(&bufcache_lock);
 	ip->i_flag |= IN_CHANGE;
 	ffs_update(vp, NULL, NULL, 0);
 	vput(vp);
@@ -3356,19 +3256,25 @@ handle_workitem_freefile(freefile)
 #ifdef DEBUG
 	struct inodedep *idp;
 #endif
+	struct ufsmount *ump;
 	int error;
 
 #ifdef DEBUG
-	ACQUIRE_LOCK(&lk);
+	mutex_enter(&bufcache_lock);
 	if (inodedep_lookup(freefile->fx_fs, freefile->fx_oldinum, 0, &idp))
 		panic("handle_workitem_freefile: inodedep survived");
-	FREE_LOCK(&lk);
+	mutex_exit(&bufcache_lock);
 #endif
+
+	ump = VFSTOUFS(freefile->fx_mnt);
+	mutex_enter(&ump->um_lock);
 	freefile->fx_fs->fs_pendinginodes -= 1;
+	mutex_exit(&ump->um_lock);
+
 	if ((error = ffs_freefile(freefile->fx_fs, freefile->fx_devvp,
 	    freefile->fx_oldinum, freefile->fx_mode)) != 0)
 		softdep_error("handle_workitem_freefile", error);
-	WORKITEM_FREE(freefile, D_FREEFILE);
+	workitem_free(freefile, D_FREEFILE);
 }
 
 /*
@@ -3411,6 +3317,7 @@ softdep_disk_io_initiation(bp)
 	/*
 	 * Do any necessary pre-I/O processing.
 	 */
+	mutex_enter(&bufcache_lock);
 	for (wk = LIST_FIRST(&bp->b_dep); wk; wk = nextwk) {
 		nextwk = LIST_NEXT(wk, wk_list);
 		switch (wk->wk_type) {
@@ -3437,24 +3344,21 @@ softdep_disk_io_initiation(bp)
 			 * dependency can be freed.
 			 */
 			if (LIST_FIRST(&indirdep->ir_deplisthd) == NULL) {
-				brelse(indirdep->ir_savebp, BC_INVAL | BC_NOCACHE);
-				softdep_trackbufs(-1, false);
-
-				/* inline expand WORKLIST_REMOVE(wk); */
+				/* inline expand worklist_remove(wk); */
 				wk->wk_state &= ~ONWORKLIST;
 				LIST_REMOVE(wk, wk_list);
-				WORKITEM_FREE(indirdep, D_INDIRDEP);
+				brelsel(indirdep->ir_savebp, BC_INVAL | BC_NOCACHE);
+				softdep_trackbufs(-1, false);
+				workitem_free(indirdep, D_INDIRDEP);
 				continue;
 			}
 			/*
 			 * Replace up-to-date version with safe version.
 			 */
-			ACQUIRE_LOCK(&lk);
 			indirdep->ir_state &= ~ATTACHED;
 			indirdep->ir_state |= UNDONE;
 			indirdep->ir_saveddata = bp->b_data;
 			bp->b_data = indirdep->ir_savebp->b_data;
-			FREE_LOCK(&lk);
 			continue;
 
 		case D_MKDIR:
@@ -3469,6 +3373,7 @@ softdep_disk_io_initiation(bp)
 			/* NOTREACHED */
 		}
 	}
+	mutex_exit(&bufcache_lock);
 }
 
 /*
@@ -3489,6 +3394,8 @@ initiate_write_filepage(pagedep, bp)
 	const int needswap = UFS_FSNEEDSWAP(VFSTOUFS(pagedep->pd_mnt)->um_fs);
 #endif
 
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	if (pagedep->pd_state & IOSTARTED) {
 		/*
 		 * This can only happen if there is a driver that does not
@@ -3499,7 +3406,6 @@ initiate_write_filepage(pagedep, bp)
 		return;
 	}
 	pagedep->pd_state |= IOSTARTED;
-	ACQUIRE_LOCK(&lk);
 	for (i = 0; i < DAHASHSZ; i++) {
 		for (dap = LIST_FIRST(&pagedep->pd_diraddhd[i]); dap;
 		     dap = LIST_NEXT(dap, da_pdlist)) {
@@ -3520,7 +3426,6 @@ initiate_write_filepage(pagedep, bp)
 			dap->da_state |= UNDONE;
 		}
 	}
-	FREE_LOCK(&lk);
 }
 
 /*
@@ -3545,6 +3450,8 @@ initiate_write_inodeblock_ufs1(inodedep, bp)
 	const int needswap = UFS_FSNEEDSWAP(fs);
 #endif
 
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	if (inodedep->id_state & IOSTARTED)
 		panic("initiate_write_inodeblock: already started");
 	inodedep->id_state |= IOSTARTED;
@@ -3573,7 +3480,6 @@ initiate_write_inodeblock_ufs1(inodedep, bp)
 	/*
 	 * Set the dependencies to busy.
 	 */
-	ACQUIRE_LOCK(&lk);
 	for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
 	     adp = TAILQ_NEXT(adp, ad_next)) {
 #ifdef DIAGNOSTIC
@@ -3635,7 +3541,6 @@ initiate_write_inodeblock_ufs1(inodedep, bp)
 			dp->di_ib[i] = 0;
 		}
 		dp->di_size = ufs_rw64(dp->di_size, needswap);
-		FREE_LOCK(&lk);
 		return;
 	}
 	/*
@@ -3665,7 +3570,6 @@ initiate_write_inodeblock_ufs1(inodedep, bp)
 	 */
 	for (; adp; adp = TAILQ_NEXT(adp, ad_next))
 		dp->di_ib[adp->ad_lbn - NDADDR] = 0;
-	FREE_LOCK(&lk);
 }
 
 static void
@@ -3684,6 +3588,8 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
 	const int needswap = UFS_FSNEEDSWAP(fs);
 #endif
 
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	if (inodedep->id_state & IOSTARTED)
 		panic("initiate_write_inodeblock_ufs2: already started");
 	inodedep->id_state |= IOSTARTED;
@@ -3710,7 +3616,6 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
 	inodedep->id_savedsize = dp->di_size;
 	if (TAILQ_FIRST(&inodedep->id_inoupdt) == NULL)
 		return;
-	ACQUIRE_LOCK(&lk);
 
 #ifdef notyet
 	inodedep->id_savedextsize = dp->di_extsize;
@@ -3720,17 +3625,16 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
 	/*
 	 * Set the ext data dependencies to busy.
 	 */
-	ACQUIRE_LOCK(&lk);
 	for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_extupdt); adp;
 	     adp = TAILQ_NEXT(adp, ad_next)) {
 #ifdef DIAGNOSTIC
 		if (deplist != 0 && prevlbn >= adp->ad_lbn) {
-			FREE_LOCK(&lk);
+			mutex_exit(&bufcache_lock);
 			panic("softdep_write_inodeblock: lbn order");
 		}
 		prevlbn = adp->ad_lbn;
 		if (dp->di_extb[adp->ad_lbn] != adp->ad_newblkno) {
-			FREE_LOCK(&lk);
+			mutex_exit(&bufcache_lock);
 			panic("%s: direct pointer #%jd mismatch %jd != %jd",
 			    "softdep_write_inodeblock",
 			    (intmax_t)adp->ad_lbn,
@@ -3739,7 +3643,7 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
 		}
 		deplist |= 1 << adp->ad_lbn;
 		if ((adp->ad_state & ATTACHED) == 0) {
-			FREE_LOCK(&lk);
+			mutex_exit(&bufcache_lock);
 			panic("softdep_write_inodeblock: Unknown state 0x%x",
 			    adp->ad_state);
 		}
@@ -3763,7 +3667,7 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
 		for (i = adp->ad_lbn + 1; i < NXADDR; i++) {
 #ifdef DIAGNOSTIC
 			if (dp->di_extb[i] != 0 && (deplist & (1 << i)) == 0) {
-				FREE_LOCK(&lk);
+				mutex_exit(&bufcache_lock);
 				panic("softdep_write_inodeblock: lost dep1");
 			}
 #endif /* DIAGNOSTIC */
@@ -3794,14 +3698,14 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
 	     adp = TAILQ_NEXT(adp, ad_next)) {
 #ifdef DIAGNOSTIC
 		if (deplist != 0 && prevlbn >= adp->ad_lbn) {
-			FREE_LOCK(&lk);
+			mutex_exit(&bufcache_lock);
 			panic("softdep_write_inodeblock: lbn order");
 		}
 		prevlbn = adp->ad_lbn;
 		if (adp->ad_lbn < NDADDR &&
 		    ufs_rw64(dp->di_db[adp->ad_lbn], needswap) !=
 		    adp->ad_newblkno) {
-			FREE_LOCK(&lk);
+			mutex_exit(&bufcache_lock);
 			panic("%s: direct pointer #%" PRId64 " mismatch %"
 			    PRId64 " != %" PRId64,
 			    "softdep_write_inodeblock",
@@ -3812,7 +3716,7 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
 		if (adp->ad_lbn >= NDADDR &&
 		    ufs_rw64(dp->di_ib[adp->ad_lbn - NDADDR], needswap) !=
 		    adp->ad_newblkno) {
-			FREE_LOCK(&lk);
+			mutex_exit(&bufcache_lock);
 			panic("%s: indirect pointer #%" PRId64 " mismatch %"
 			    PRId64 " != %" PRId64,
 			    "softdep_write_inodeblock",
@@ -3822,7 +3726,7 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
 		}
 		deplist |= 1 << adp->ad_lbn;
 		if ((adp->ad_state & ATTACHED) == 0) {
-			FREE_LOCK(&lk);
+			mutex_exit(&bufcache_lock);
 			panic("softdep_write_inodeblock: Unknown state 0x%x",
 			    adp->ad_state);
 		}
@@ -3848,7 +3752,7 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
 		for (i = adp->ad_lbn + 1; i < NDADDR; i++) {
 #ifdef DIAGNOSTIC
 			if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0) {
-				FREE_LOCK(&lk);
+				mutex_exit(&bufcache_lock);
 				panic("softdep_write_inodeblock: lost dep2");
 			}
 #endif /* DIAGNOSTIC */
@@ -3858,14 +3762,13 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
 #ifdef DIAGNOSTIC
 			if (dp->di_ib[i] != 0 &&
 			    (deplist & ((1 << NDADDR) << i)) == 0) {
-				FREE_LOCK(&lk);
+				mutex_exit(&bufcache_lock);
 				panic("softdep_write_inodeblock: lost dep3");
 			}
 #endif /* DIAGNOSTIC */
 			dp->di_ib[i] = 0;
 		}
 		dp->di_size = ufs_rw64(dp->di_size, needswap);
-		FREE_LOCK(&lk);
 		return;
 	}
 	/*
@@ -3895,7 +3798,6 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
 	 */
 	for (; adp; adp = TAILQ_NEXT(adp, ad_next))
 		dp->di_ib[adp->ad_lbn - NDADDR] = 0;
-	FREE_LOCK(&lk);
 }
 
 /*
@@ -3919,31 +3821,37 @@ softdep_disk_write_complete(bp)
 	struct inodedep *inodedep;
 	struct bmsafemap *bmsafemap;
 
+	if ((bp->b_flags & B_READ) != 0) {
+		KASSERT(LIST_EMPTY(&bp->b_dep));
+		return;
+	}
+
+	/* Avoid taking bufcache_lock if not doing softdep. */
+	if (bp->b_vp == NULL || bp->b_vp->v_mount == NULL ||
+	    !DOINGSOFTDEP(bp->b_vp))
+	    	return;
+
 	/*
 	 * If an error occurred while doing the write, then the data
 	 * has not hit the disk and the dependencies cannot be unrolled.
 	 */
-	if (bp->b_error != 0 && (bp->b_flags & B_INVAL) == 0)
+	if (bp->b_error != 0 && (bp->b_cflags & BC_INVAL) == 0)
 		return;
 
-#ifdef DEBUG
-	if (lk.lkt_held != NULL)
-		panic("softdep_disk_write_complete: lock is held");
-	lk.lkt_held = (struct lwp *)1;
-#endif
+	mutex_enter(&bufcache_lock);
 	LIST_INIT(&reattach);
 	while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) {
-		WORKLIST_REMOVE(wk);
+		worklist_remove(wk);
 		switch (wk->wk_type) {
 
 		case D_PAGEDEP:
 			if (handle_written_filepage(WK_PAGEDEP(wk), bp))
-				WORKLIST_INSERT(&reattach, wk);
+				worklist_insert(&reattach, wk);
 			continue;
 
 		case D_INODEDEP:
 			if (handle_written_inodeblock(WK_INODEDEP(wk), bp))
-				WORKLIST_INSERT(&reattach, wk);
+				worklist_insert(&reattach, wk);
 			continue;
 
 		case D_BMSAFEMAP:
@@ -3973,7 +3881,7 @@ softdep_disk_write_complete(bp)
 				LIST_REMOVE(inodedep, id_deps);
 				inodedep->id_buf = NULL;
 			}
-			WORKITEM_FREE(bmsafemap, D_BMSAFEMAP);
+			workitem_free(bmsafemap, D_BMSAFEMAP);
 			continue;
 
 		case D_MKDIR:
@@ -4005,10 +3913,12 @@ softdep_disk_write_complete(bp)
 				if (aip == LIST_FIRST(&indirdep->ir_donehd))
 					panic("disk_write_complete: not gone");
 			}
-			WORKLIST_INSERT(&reattach, wk);
-			if ((bp->b_flags & B_DELWRI) == 0)
+			worklist_insert(&reattach, wk);
+			mutex_enter(bp->b_objlock);
+			if ((bp->b_oflags & BO_DELWRI) == 0)
 				stat_indir_blk_ptrs++;
 			bdirty(bp);
+			mutex_exit(bp->b_objlock);
 			continue;
 
 		default:
@@ -4021,20 +3931,15 @@ softdep_disk_write_complete(bp)
 	 * Reattach any requests that must be redone.
 	 */
 	while ((wk = LIST_FIRST(&reattach)) != NULL) {
-		WORKLIST_REMOVE(wk);
-		WORKLIST_INSERT(&bp->b_dep, wk);
+		worklist_remove(wk);
+		worklist_insert(&bp->b_dep, wk);
 	}
-#ifdef DEBUG
-	if (lk.lkt_held != (struct lwp *)1)
-		panic("softdep_disk_write_complete: lock lost");
-	lk.lkt_held = NULL;
-#endif
+	mutex_exit(&bufcache_lock);
 }
 
 /*
  * Called from within softdep_disk_write_complete above. Note that
- * this routine is always called from interrupt level with further
- * splbio interrupts blocked.
+ * this routine is always called from interrupt level.
  */
 static void
 handle_allocdirect_partdone(adp)
@@ -4045,6 +3950,8 @@ handle_allocdirect_partdone(adp)
 	long bsize;
 	int delayx;
 
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE)
 		return;
 	if (adp->ad_buf != NULL)
@@ -4108,8 +4015,7 @@ handle_allocdirect_partdone(adp)
 
 /*
  * Called from within softdep_disk_write_complete above. Note that
- * this routine is always called from interrupt level with further
- * splbio interrupts blocked.
+ * this routine is always called from interrupt level.
  */
 static void
 handle_allocindir_partdone(aip)
@@ -4117,6 +4023,8 @@ handle_allocindir_partdone(aip)
 {
 	struct indirdep *indirdep;
 
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	if ((aip->ai_state & ALLCOMPLETE) != ALLCOMPLETE)
 		return;
 	if (aip->ai_buf != NULL)
@@ -4136,14 +4044,13 @@ handle_allocindir_partdone(aip)
 	LIST_REMOVE(aip, ai_next);
 	if (aip->ai_freefrag != NULL)
 		add_to_worklist(&aip->ai_freefrag->ff_list);
-	WORKITEM_FREE(aip, D_ALLOCINDIR);
+	workitem_free(aip, D_ALLOCINDIR);
 }
 
 /*
  * Called from within softdep_disk_write_complete above to restore
  * in-memory inode block contents to their most up-to-date state. Note
- * that this routine is always called from interrupt level with further
- * splbio interrupts blocked.
+ * that this routine is always called from interrupt level.
  */
 static int
 handle_written_inodeblock(inodedep, bp)
@@ -4159,6 +4066,8 @@ handle_written_inodeblock(inodedep, bp)
 	const int needswap = UFS_FSNEEDSWAP(inodedep->id_fs);
 #endif
 
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	if ((inodedep->id_state & IOSTARTED) == 0)
 		panic("handle_written_inodeblock: not started");
 	inodedep->id_state &= ~IOSTARTED;
@@ -4185,9 +4094,11 @@ handle_written_inodeblock(inodedep, bp)
 		else
 			*dp2 = *inodedep->id_savedino2;
 		inodedep_freedino(inodedep);
-		if ((bp->b_flags & B_DELWRI) == 0)
+		mutex_enter(bp->b_objlock);
+		if ((bp->b_oflags & BO_DELWRI) == 0)
 			stat_inode_bitmap++;
 		bdirty(bp);
+		mutex_exit(bp->b_objlock);
 		return (1);
 	}
 	/*
@@ -4255,7 +4166,7 @@ handle_written_inodeblock(inodedep, bp)
 		adp->ad_state |= ATTACHED;
 		hadchanges = 1;
 	}
-	if (hadchanges && (bp->b_flags & B_DELWRI) == 0)
+	if (hadchanges && (bp->b_oflags & BO_DELWRI) == 0)
 		stat_direct_blk_ptrs++;
 	/*
 	 * Reset the file size to its most up-to-date value.
@@ -4276,13 +4187,16 @@ handle_written_inodeblock(inodedep, bp)
 		}
 	}
 	inodedep->id_savedsize = -1;
+
 	/*
 	 * If there were any rollbacks in the inode block, then it must be
 	 * marked dirty so that its will eventually get written back in
 	 * its correct form.
 	 */
 	if (hadchanges) {
+		mutex_enter(bp->b_objlock);
 		bdirty(bp);
+		mutex_exit(bp->b_objlock);
 	}
 	/*
 	 * Process any allocdirects that completed during the update.
@@ -4298,7 +4212,7 @@ handle_written_inodeblock(inodedep, bp)
 	 */
 	filefree = NULL;
 	while ((wk = LIST_FIRST(&inodedep->id_bufwait)) != NULL) {
-		WORKLIST_REMOVE(wk);
+		worklist_remove(wk);
 		switch (wk->wk_type) {
 
 		case D_FREEFILE:
@@ -4354,7 +4268,6 @@ handle_written_inodeblock(inodedep, bp)
 
 /*
  * Process a diradd entry after its dependent inode has been written.
- * This routine must be called with splbio interrupts blocked.
  */
 static void
 diradd_inode_written(dap, inodedep)
@@ -4363,6 +4276,8 @@ diradd_inode_written(dap, inodedep)
 {
 	struct pagedep *pagedep;
 
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	dap->da_state |= COMPLETE;
 	if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {
 		if (dap->da_state & DIRCHG)
@@ -4372,7 +4287,7 @@ diradd_inode_written(dap, inodedep)
 		LIST_REMOVE(dap, da_pdlist);
 		LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
 	}
-	WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list);
+	worklist_insert(&inodedep->id_pendinghd, &dap->da_list);
 }
 
 /*
@@ -4401,15 +4316,14 @@ handle_written_mkdir(mkdir, type)
 		LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
 	}
 	LIST_REMOVE(mkdir, md_mkdirs);
-	WORKITEM_FREE(mkdir, D_MKDIR);
+	workitem_free(mkdir, D_MKDIR);
 }
 
 /*
  * Called from within softdep_disk_write_complete above.
  * A write operation was just completed. Removed inodes can
  * now be freed and associated block pointers may be committed.
- * Note that this routine is always called from interrupt level
- * with further splbio interrupts blocked.
+ * Note that this routine is always called from interrupt level.
  */
 static int
 handle_written_filepage(pagedep, bp)
@@ -4424,6 +4338,8 @@ handle_written_filepage(pagedep, bp)
 	const int needswap = UFS_FSNEEDSWAP(VFSTOUFS(pagedep->pd_mnt)->um_fs);
 #endif
 
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	if ((pagedep->pd_state & IOSTARTED) == 0)
 		panic("handle_written_filepage: not started");
 	pagedep->pd_state &= ~IOSTARTED;
@@ -4477,9 +4393,11 @@ handle_written_filepage(pagedep, bp)
 	 * its correct form.
 	 */
 	if (chgs) {
-		if ((bp->b_flags & B_DELWRI) == 0)
+		mutex_enter(bp->b_objlock);
+		if ((bp->b_oflags & BO_DELWRI) == 0)
 			stat_dir_entry++;
 		bdirty(bp);
+		mutex_exit(bp->b_objlock);
 		return (1);
 	}
 	/*
@@ -4490,7 +4408,7 @@ handle_written_filepage(pagedep, bp)
 	 */
 	if ((pagedep->pd_state & NEWBLOCK) == 0) {
 		LIST_REMOVE(pagedep, pd_hash);
-		WORKITEM_FREE(pagedep, D_PAGEDEP);
+		workitem_free(pagedep, D_PAGEDEP);
 	}
 	return (0);
 }
@@ -4522,15 +4440,15 @@ softdep_load_inodeblock(ip)
 	 * Check for alternate nlink count.
 	 */
 	ip->i_ffs_effnlink = ip->i_nlink;
-	ACQUIRE_LOCK(&lk);
+	mutex_enter(&bufcache_lock);
 	if (inodedep_lookup(ip->i_fs, ip->i_number, 0, &inodedep) == 0) {
-		FREE_LOCK(&lk);
+		mutex_exit(&bufcache_lock);
 		return;
 	}
 	ip->i_ffs_effnlink -= inodedep->id_nlinkdelta;
 	if (inodedep->id_state & SPACECOUNTED)
 		ip->i_flag |= IN_SPACECOUNTED;
-	FREE_LOCK(&lk);
+	mutex_exit(&bufcache_lock);
 }
 
 /*
@@ -4560,11 +4478,11 @@ softdep_update_inodeblock(ip, bp, waitfor)
 	 * if there is no existing inodedep, then there are no dependencies
 	 * to track.
 	 */
-	ACQUIRE_LOCK(&lk);
+	mutex_enter(&bufcache_lock);
 	if (inodedep_lookup(ip->i_fs, ip->i_number, 0, &inodedep) == 0) {
 		if (ip->i_ffs_effnlink != ip->i_nlink)
 			panic("softdep_update_inodeblock: bad link count");
-		FREE_LOCK(&lk);
+		mutex_exit(&bufcache_lock);
 		return;
 	}
 	if (inodedep->id_nlinkdelta != ip->i_nlink - ip->i_ffs_effnlink)
@@ -4575,7 +4493,7 @@ softdep_update_inodeblock(ip, bp, waitfor)
 	 */
 	inodedep->id_state &= ~COMPLETE;
 	if ((inodedep->id_state & ONWORKLIST) == 0) {
-		WORKLIST_INSERT(&bp->b_dep, &inodedep->id_list);
+		worklist_insert(&bp->b_dep, &inodedep->id_list);
 	}
 	/*
 	 * Any new dependencies associated with the incore inode must
@@ -4593,8 +4511,8 @@ softdep_update_inodeblock(ip, bp, waitfor)
 	 * processed when the buffer I/O completes.
 	 */
 	while ((wk = LIST_FIRST(&inodedep->id_inowait)) != NULL) {
-		WORKLIST_REMOVE(wk);
-		WORKLIST_INSERT(&inodedep->id_bufwait, wk);
+		worklist_remove(wk);
+		worklist_insert(&inodedep->id_bufwait, wk);
 	}
 	/*
 	 * Newly allocated inodes cannot be written until the bitmap
@@ -4604,11 +4522,11 @@ softdep_update_inodeblock(ip, bp, waitfor)
 	 * to be written so that the update can be done.
 	 */
 	if ((inodedep->id_state & DEPCOMPLETE) != 0 || waitfor == 0) {
-		FREE_LOCK(&lk);
+		mutex_exit(&bufcache_lock);
 		return;
 	}
 	gotit = getdirtybuf(&inodedep->id_buf, MNT_WAIT);
-	FREE_LOCK(&lk);
+	mutex_exit(&bufcache_lock);
 	if (gotit && (error = VOP_BWRITE(inodedep->id_buf)) != 0)
 		softdep_error("softdep_update_inodeblock: bwrite", error);
 	if ((inodedep->id_state & DEPCOMPLETE) == 0)
@@ -4617,8 +4535,7 @@ softdep_update_inodeblock(ip, bp, waitfor)
 
 /*
  * Merge the new inode dependency list (id_newinoupdt) into the old
- * inode dependency list (id_inoupdt). This routine must be called
- * with splbio interrupts blocked.
+ * inode dependency list (id_inoupdt).
  */
 static void
 merge_inode_lists(inodedep)
@@ -4626,6 +4543,8 @@ merge_inode_lists(inodedep)
 {
 	struct allocdirect *listadp, *newadp;
 
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	listadp = TAILQ_FIRST(&inodedep->id_inoupdt);
 	newadp = TAILQ_FIRST(&inodedep->id_newinoupdt);
 	while (listadp && newadp) {
@@ -4674,9 +4593,9 @@ softdep_fsync(vp, f)
 
 	ip = VTOI(vp);
 	fs = ip->i_fs;
-	ACQUIRE_LOCK(&lk);
+	mutex_enter(&bufcache_lock);
 	if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) == 0) {
-		FREE_LOCK(&lk);
+		mutex_exit(&bufcache_lock);
 		return (0);
 	}
 	if (LIST_FIRST(&inodedep->id_inowait) != NULL ||
@@ -4726,7 +4645,7 @@ softdep_fsync(vp, f)
 		 * requesting the lock on our parent. See the comment in
 		 * ufs_lookup for details on possible races.
 		 */
-		FREE_LOCK(&lk);
+		mutex_exit(&bufcache_lock);
 		VOP_UNLOCK(vp, 0);
 		error = VFS_VGET(mnt, parentino, &pvp);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
@@ -4764,11 +4683,11 @@ softdep_fsync(vp, f)
 		vput(pvp);
 		if (error != 0)
 			return (error);
-		ACQUIRE_LOCK(&lk);
+		mutex_enter(&bufcache_lock);
 		if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) == 0)
 			break;
 	}
-	FREE_LOCK(&lk);
+	mutex_exit(&bufcache_lock);
 	if (f & FSYNC_CACHE) {
 		/*
 		 * If requested, make sure all of these changes don't
@@ -4795,46 +4714,37 @@ softdep_fsync_mountdev(vp)
 
 	if (vp->v_type != VBLK)
 		panic("softdep_fsync_mountdev: vnode not VBLK");
-	ACQUIRE_LOCK(&lk);
-	simple_lock(&bqueue_slock);
+	mutex_enter(&bufcache_lock);
 	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
 		nbp = bp->b_vnbufs.le_next;
-		simple_lock(&bp->b_interlock);
+		KASSERT(bp->b_objlock == &vp->v_interlock);
 		/*
 		 * If it is already scheduled, skip to the next buffer.
 		 */
-		if (bp->b_flags & B_BUSY) {
-			simple_unlock(&bp->b_interlock);
+		if (bp->b_cflags & BC_BUSY) {
 			continue;
 		}
-		if ((bp->b_flags & B_DELWRI) == 0)
+		if ((bp->b_oflags & BO_DELWRI) == 0)
 			panic("softdep_fsync_mountdev: not dirty");
 		/*
 		 * We are only interested in bitmaps with outstanding
 		 * dependencies.
 		 */
 		if ((wk = LIST_FIRST(&bp->b_dep)) == NULL ||
-		    wk->wk_type != D_BMSAFEMAP) {
-			simple_unlock(&bp->b_interlock);
+		    wk->wk_type != D_BMSAFEMAP)
 			continue;
-		}
 		bremfree(bp);
-		simple_unlock(&bqueue_slock);
-		bp->b_flags |= B_BUSY;
-		simple_unlock(&bp->b_interlock);
-		FREE_LOCK(&lk);
+		bp->b_cflags |= BC_BUSY;
+		mutex_exit(&bufcache_lock);
 		(void) bawrite(bp);
-		ACQUIRE_LOCK(&lk);
-		simple_lock(&bqueue_slock);
+		mutex_enter(&bufcache_lock);
 		/*
-		 * Since we may have slept during the I/O, we need
-		 * to start from a known point.
+		 * Since we unlocked, we need to start from a known point.
 		 */
 		nbp = vp->v_dirtyblkhd.lh_first;
 	}
-	simple_unlock(&bqueue_slock);
-	drain_output(vp, 1);
-	FREE_LOCK(&lk);
+	mutex_exit(&bufcache_lock);
+	drain_output(vp);
 }
 
 /*
@@ -4844,17 +4754,8 @@ softdep_fsync_mountdev(vp)
  * associated with the file. If any I/O errors occur, they are returned.
  */
 int
-softdep_sync_metadata(v)
-	void *v;
+softdep_sync_metadata(struct vnode *vp)
 {
-	struct vop_fsync_args /* {
-		struct vnode *a_vp;
-		kauth_cred_t a_cred;
-		int a_waitfor;
-		off_t a_offlo;
-		off_t a_offhi;
-	} */ *ap = v;
-	struct vnode *vp = ap->a_vp;
 	struct inodedep *inodedep;
 	struct pagedep *pagedep;
 	struct allocdirect *adp;
@@ -4877,10 +4778,10 @@ softdep_sync_metadata(v)
 	/*
 	 * Ensure that any direct block dependencies have been cleared.
 	 */
-	ACQUIRE_LOCK(&lk);
+	mutex_enter(&bufcache_lock);
 	error = flush_inodedep_deps(VTOI(vp)->i_fs, VTOI(vp)->i_number);
 	if (error) {
-		FREE_LOCK(&lk);
+		mutex_exit(&bufcache_lock);
 		return (error);
 	}
 	/*
@@ -4923,14 +4824,14 @@ loop:
 			nbp = adp->ad_buf;
 			if (getdirtybuf(&nbp, waitfor) == 0)
 				break;
-			FREE_LOCK(&lk);
+			mutex_exit(&bufcache_lock);
 			if (waitfor == MNT_NOWAIT) {
 				bawrite(nbp);
 			} else if ((error = VOP_BWRITE(nbp)) != 0) {
 				bawrite(bp);
 				return (error);
 			}
-			ACQUIRE_LOCK(&lk);
+			mutex_enter(&bufcache_lock);
 			break;
 
 		case D_ALLOCINDIR:
@@ -4940,14 +4841,14 @@ loop:
 			nbp = aip->ai_buf;
 			if (getdirtybuf(&nbp, waitfor) == 0)
 				break;
-			FREE_LOCK(&lk);
+			mutex_exit(&bufcache_lock);
 			if (waitfor == MNT_NOWAIT) {
 				bawrite(nbp);
 			} else if ((error = VOP_BWRITE(nbp)) != 0) {
 				bawrite(bp);
 				return (error);
 			}
-			ACQUIRE_LOCK(&lk);
+			mutex_enter(&bufcache_lock);
 			break;
 
 		case D_INDIRDEP:
@@ -4959,12 +4860,12 @@ loop:
 				nbp = aip->ai_buf;
 				if (getdirtybuf(&nbp, MNT_WAIT) == 0)
 					goto restart;
-				FREE_LOCK(&lk);
+				mutex_exit(&bufcache_lock);
 				if ((error = VOP_BWRITE(nbp)) != 0) {
 					bawrite(bp);
 					return (error);
 				}
-				ACQUIRE_LOCK(&lk);
+				mutex_enter(&bufcache_lock);
 				goto restart;
 			}
 			break;
@@ -4972,7 +4873,7 @@ loop:
 		case D_INODEDEP:
 			if ((error = flush_inodedep_deps(WK_INODEDEP(wk)->id_fs,
 			    WK_INODEDEP(wk)->id_ino)) != 0) {
-				FREE_LOCK(&lk);
+				mutex_exit(&bufcache_lock);
 				bawrite(bp);
 				return (error);
 			}
@@ -4993,7 +4894,7 @@ loop:
 				error = flush_pagedep_deps(vp, pagedep->pd_mnt,
 					    &pagedep->pd_diraddhd[i]);
 				if (error) {
-					FREE_LOCK(&lk);
+					mutex_exit(&bufcache_lock);
 					bawrite(bp);
 					return (error);
 				}
@@ -5011,14 +4912,14 @@ loop:
 			nbp = WK_MKDIR(wk)->md_buf;
 			if (getdirtybuf(&nbp, waitfor) == 0)
 				break;
-			FREE_LOCK(&lk);
+			mutex_exit(&bufcache_lock);
 			if (waitfor == MNT_NOWAIT) {
 				bawrite(nbp);
 			} else if ((error = VOP_BWRITE(nbp)) != 0) {
 				bawrite(bp);
 				return (error);
 			}
-			ACQUIRE_LOCK(&lk);
+			mutex_enter(&bufcache_lock);
 			break;
 
 		case D_BMSAFEMAP:
@@ -5052,15 +4953,15 @@ loop:
 	}
 	(void) getdirtybuf(&bp->b_vnbufs.le_next, MNT_WAIT);
 	nbp = bp->b_vnbufs.le_next;
-	FREE_LOCK(&lk);
+	mutex_exit(&bufcache_lock);
 	if (must_sync) {
 		if ((error = VOP_BWRITE(bp)) != 0)
 			return error;
 	} else
 		bawrite(bp);
-	ACQUIRE_LOCK(&lk);
 	if (nbp != NULL) {
 		bp = nbp;
+		mutex_enter(&bufcache_lock);
 		goto loop;
 	}
 	/*
@@ -5069,15 +4970,10 @@ loop:
 	 * Once they are all there, proceed with the second pass
 	 * which will wait for the I/O as per above.
 	 */
-	drain_output(vp, 1);
-	/*
-	 * The brief unlock is to allow any pent up dependency
-	 * processing to be done.
-	 */
+	drain_output(vp);
 	if (waitfor == MNT_NOWAIT) {
 		waitfor = MNT_WAIT;
-		FREE_LOCK(&lk);
-		ACQUIRE_LOCK(&lk);
+		mutex_enter(&bufcache_lock);
 		goto top;
 	}
 
@@ -5087,7 +4983,6 @@ loop:
 	 * devices, we may need to do further work.
 	 */
 	if (vp->v_dirtyblkhd.lh_first != NULL) {
-		FREE_LOCK(&lk);
 		/*
 		 * If we are trying to sync a block device, some of its buffers
 		 * may contain metadata that cannot be written until the
@@ -5098,11 +4993,11 @@ loop:
 		if (vp->v_type == VBLK && vp->v_specmountpoint &&
 		    !VOP_ISLOCKED(vp) &&
 		    (error = VFS_SYNC(vp->v_specmountpoint, MNT_WAIT,
-		     ap->a_cred)) != 0)
+		     curlwp->l_cred)) != 0)
 			return (error);
-		ACQUIRE_LOCK(&lk);
 	}
 
+	mutex_enter(&bufcache_lock);
 clean:
 	/*
 	 * If there is still an inodedep, we know that the inode has pending
@@ -5112,13 +5007,12 @@ clean:
 	 */
 	if (inodedep_lookup(VTOI(vp)->i_fs, VTOI(vp)->i_number, 0, &inodedep))
 		VTOI(vp)->i_flag |= IN_MODIFIED;
-	FREE_LOCK(&lk);
+	mutex_exit(&bufcache_lock);
 	return (0);
 }
 
 /*
  * Flush the dependencies associated with an inodedep.
- * Called with splbio blocked.
  */
 static int
 flush_inodedep_deps(fs, ino)
@@ -5126,11 +5020,11 @@ flush_inodedep_deps(fs, ino)
 	ino_t ino;
 {
 	struct inodedep *inodedep;
-	struct allocdirect *adp;
 	int error, waitfor;
-	struct buf *bp;
 	struct vnode *vp;
 
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	vp = softdep_lookupvp(fs, ino);
 
 	/*
@@ -5143,12 +5037,10 @@ flush_inodedep_deps(fs, ino)
 	 * usual case we will be blocking against a write that we
 	 * initiated, so when it is done the dependency will have been
 	 * resolved. Thus the second pass is expected to end quickly.
-	 * We give a brief window at the top of the loop to allow
-	 * any pending I/O to complete.
 	 */
-	for (waitfor = MNT_NOWAIT; ; ) {
-		FREE_LOCK(&lk);
-		ACQUIRE_LOCK(&lk);
+	for (error = 0, waitfor = MNT_NOWAIT; ; ) {
+		if (error)
+			return (error);
 		if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
 			return (0);
 
@@ -5162,15 +5054,15 @@ flush_inodedep_deps(fs, ino)
 		 */
 
 		if (vp != NULL) {
-			FREE_LOCK(&lk);
-			simple_lock(&vp->v_interlock);
+			mutex_exit(&bufcache_lock);
+			mutex_enter(&vp->v_interlock);
 			error = VOP_PUTPAGES(vp, 0, 0,
 			    PGO_ALLPAGES | PGO_CLEANIT |
 			    (waitfor == MNT_NOWAIT ? 0: PGO_SYNCIO));
 			if (waitfor == MNT_WAIT) {
-				drain_output(vp, 0);
+				drain_output(vp);
 			}
-			ACQUIRE_LOCK(&lk);
+			mutex_enter(&bufcache_lock);
 			if (error) {
 				return error;
 			}
@@ -5184,52 +5076,13 @@ flush_inodedep_deps(fs, ino)
 			 */
 			KASSERT(TAILQ_EMPTY(&inodedep->id_inoupdt));
 			KASSERT(TAILQ_EMPTY(&inodedep->id_newinoupdt));
+			break;
 		}
 
-		for (adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
-		     adp = TAILQ_NEXT(adp, ad_next)) {
-			if (adp->ad_state & DEPCOMPLETE)
-				continue;
-			bp = adp->ad_buf;
-			if (getdirtybuf(&bp, waitfor) == 0) {
-				if (waitfor == MNT_NOWAIT)
-					continue;
-				break;
-			}
-			FREE_LOCK(&lk);
-			if (waitfor == MNT_NOWAIT) {
-				bawrite(bp);
-			} else if ((error = VOP_BWRITE(bp)) != 0) {
-				ACQUIRE_LOCK(&lk);
-				return (error);
-			}
-			ACQUIRE_LOCK(&lk);
-			break;
-		}
-		if (adp != NULL)
-			continue;
-		for (adp = TAILQ_FIRST(&inodedep->id_newinoupdt); adp;
-		     adp = TAILQ_NEXT(adp, ad_next)) {
-			if (adp->ad_state & DEPCOMPLETE)
-				continue;
-			bp = adp->ad_buf;
-			if (getdirtybuf(&bp, waitfor) == 0) {
-				if (waitfor == MNT_NOWAIT)
-					continue;
-				break;
-			}
-			FREE_LOCK(&lk);
-			if (waitfor == MNT_NOWAIT) {
-				bawrite(bp);
-			} else if ((error = VOP_BWRITE(bp)) != 0) {
-				ACQUIRE_LOCK(&lk);
-				return (error);
-			}
-			ACQUIRE_LOCK(&lk);
-			break;
-		}
-		if (adp != NULL)
-			continue;
+		if (flush_deplist(&inodedep->id_inoupdt, waitfor, &error) ||
+		    flush_deplist(&inodedep->id_newinoupdt, waitfor, &error))
+		    	continue;
+
 		/*
 		 * If pass2, we are done, otherwise do pass 2.
 		 */
@@ -5237,6 +5090,7 @@ flush_inodedep_deps(fs, ino)
 			break;
 		waitfor = MNT_WAIT;
 	}
+
 	/*
 	 * Try freeing inodedep in case all dependencies have been removed.
 	 */
@@ -5245,9 +5099,45 @@ flush_inodedep_deps(fs, ino)
 	return (0);
 }
 
+/*
+ * Flush an inode dependency list.
+ */
+static int
+flush_deplist(listhead, waitfor, errorp)
+	struct allocdirectlst *listhead;
+	int waitfor;
+	int *errorp;
+{
+	struct allocdirect *adp;
+	struct buf *bp;
+
+	KASSERT(mutex_owned(&bufcache_lock));
+
+	TAILQ_FOREACH(adp, listhead, ad_next) {
+		if (adp->ad_state & DEPCOMPLETE)
+			continue;
+		bp = adp->ad_buf;
+		if (getdirtybuf(&bp, waitfor) == 0) {
+			if (waitfor == MNT_NOWAIT)
+				continue;
+			return (1);
+		}
+		mutex_exit(&bufcache_lock);
+		if (waitfor == MNT_NOWAIT) {
+			bawrite(bp);
+		} else if ((*errorp = VOP_BWRITE(bp)) != 0) {
+			mutex_enter(&bufcache_lock);
+			return (1);
+		}
+		mutex_enter(&bufcache_lock);
+		return (1);
+	}
+
+	return (0);
+}
+
 /*
  * Eliminate a pagedep dependency by flushing out all its diradd dependencies.
- * Called with splbio blocked.
  */
 static int
 flush_pagedep_deps(pvp, mp, diraddhdp)
@@ -5265,6 +5155,8 @@ flush_pagedep_deps(pvp, mp, diraddhdp)
 	ino_t inum;
 	u_int ipflag;
 
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	ump = VFSTOUFS(mp);
 	while ((dap = LIST_FIRST(diraddhdp)) != NULL) {
 		/*
@@ -5272,12 +5164,12 @@ flush_pagedep_deps(pvp, mp, diraddhdp)
 		 * has a MKDIR_PARENT dependency.
 		 */
 		if (dap->da_state & MKDIR_PARENT) {
-			FREE_LOCK(&lk);
+			mutex_exit(&bufcache_lock);
 			VTOI(pvp)->i_flag |= IN_MODIFIED;
 			error = ffs_update(pvp, NULL, NULL, UPDATE_WAIT);
 			if (error)
 				break;
-			ACQUIRE_LOCK(&lk);
+			mutex_enter(&bufcache_lock);
 			/*
 			 * If that cleared dependencies, go on to next.
 			 */
@@ -5300,7 +5192,7 @@ flush_pagedep_deps(pvp, mp, diraddhdp)
 		 */
 		inum = dap->da_newinum;
 		if (dap->da_state & MKDIR_BODY) {
-			FREE_LOCK(&lk);
+			mutex_exit(&bufcache_lock);
 			ipflag = vn_setrecurse(pvp);	/* XXX */
 			if ((error = VFS_VGET(mp, inum, &vp)) != 0)
 				break;
@@ -5311,10 +5203,10 @@ flush_pagedep_deps(pvp, mp, diraddhdp)
 				vput(vp);
 				break;
 			}
-			drain_output(vp, 0);
+			drain_output(vp);
 			vput(vp);
 			vn_restorerecurse(pvp, ipflag);
-			ACQUIRE_LOCK(&lk);
+			mutex_enter(&bufcache_lock);
 			/*
 			 * If that cleared dependencies, go on to next.
 			 */
@@ -5341,11 +5233,11 @@ flush_pagedep_deps(pvp, mp, diraddhdp)
 		 */
 		if ((inodedep->id_state & DEPCOMPLETE) == 0) {
 			gotit = getdirtybuf(&inodedep->id_buf, MNT_WAIT);
-			FREE_LOCK(&lk);
+			mutex_exit(&bufcache_lock);
 			if (gotit &&
 			    (error = VOP_BWRITE(inodedep->id_buf)) != 0)
 				break;
-			ACQUIRE_LOCK(&lk);
+			mutex_enter(&bufcache_lock);
 			if (dap != LIST_FIRST(diraddhdp))
 				continue;
 		}
@@ -5353,14 +5245,14 @@ flush_pagedep_deps(pvp, mp, diraddhdp)
 		 * If the inode is still sitting in a buffer waiting
 		 * to be written, push it to disk.
 		 */
-		FREE_LOCK(&lk);
+		mutex_exit(&bufcache_lock);
 		if ((error = bread(ump->um_devvp,
 		    fsbtodb(ump->um_fs, ino_to_fsba(ump->um_fs, inum)),
 		    (int)ump->um_fs->fs_bsize, NOCRED, &bp)) != 0)
 			break;
 		if ((error = VOP_BWRITE(bp)) != 0)
 			break;
-		ACQUIRE_LOCK(&lk);
+		mutex_enter(&bufcache_lock);
 		/*
 		 * If we have failed to get rid of all the dependencies
 		 * then something is seriously wrong.
@@ -5369,7 +5261,7 @@ flush_pagedep_deps(pvp, mp, diraddhdp)
 			panic("flush_pagedep_deps: flush failed");
 	}
 	if (error)
-		ACQUIRE_LOCK(&lk);
+		mutex_enter(&bufcache_lock);
 	return (error);
 }
 
@@ -5380,12 +5272,12 @@ flush_pagedep_deps(pvp, mp, diraddhdp)
  * many dependencies in progress.
  */
 static int
-request_cleanup(resource, islocked)
+request_cleanup(resource)
 	int resource;
-	int islocked;
 {
-	lwp_t *l = curlwp;
-	int s;
+	struct lwp *l = curlwp;
+
+	KASSERT(mutex_owned(&bufcache_lock));
 
 	/*
 	 * We never hold up the filesystem syncer process.
@@ -5421,19 +5313,17 @@ request_cleanup(resource, islocked)
 	 * Hopefully the syncer daemon will catch up and awaken us.
 	 * We wait at most tickdelay before proceeding in any case.
 	 */
-	if (islocked == 0)
-		ACQUIRE_LOCK(&lk);
 	if (proc_waiting++ == 0)
 		callout_reset(&pause_timer_ch,
 		    tickdelay > 2 ? tickdelay : 2, pause_timer, NULL);
-	s = FREE_LOCK_INTERLOCKED(&lk);
-	(void) tsleep((void *)&proc_waiting, PPAUSE, "softupdate", 0);
-	ACQUIRE_LOCK_INTERLOCKED(&lk, s);
+	cv_wait(&proc_wait_cv, &bufcache_lock);
 	if (--proc_waiting)
 		callout_reset(&pause_timer_ch,
 		    tickdelay > 2 ? tickdelay : 2, pause_timer, NULL);
 	else {
+		mutex_exit(&bufcache_lock);
 		callout_stop(&pause_timer_ch);
+		mutex_enter(&bufcache_lock);
 #if 0
 		switch (resource) {
 
@@ -5447,8 +5337,7 @@ request_cleanup(resource, islocked)
 		}
 #endif
 	}
-	if (islocked == 0)
-		FREE_LOCK(&lk);
+
 	return (1);
 }
 
@@ -5460,8 +5349,9 @@ void
 pause_timer(void *arg)
 {
 
-	/* XXX was wakeup_one(), but makes no difference in uniprocessor */
-	wakeup(&proc_waiting);
+	mutex_enter(&bufcache_lock);
+	cv_broadcast(&proc_wait_cv);
+	mutex_exit(&bufcache_lock);
 }
 
 /*
@@ -5480,7 +5370,8 @@ clear_remove(l)
 	int error, cnt;
 	ino_t ino;
 
-	ACQUIRE_LOCK(&lk);
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	for (cnt = 0; cnt < pagedep_hash; cnt++) {
 		pagedephd = &pagedep_hashtbl[next++];
 		if (next >= pagedep_hash)
@@ -5490,19 +5381,19 @@ clear_remove(l)
 				continue;
 			mp = pagedep->pd_mnt;
 			ino = pagedep->pd_ino;
-			FREE_LOCK(&lk);
+			mutex_exit(&bufcache_lock);
 			if ((error = VFS_VGET(mp, ino, &vp)) != 0) {
 				softdep_error("clear_remove: vget", error);
 				return;
 			}
 			if ((error = VOP_FSYNC(vp, l->l_cred, 0, 0, 0)))
 				softdep_error("clear_remove: fsync", error);
-			drain_output(vp, 0);
+			drain_output(vp);
 			vput(vp);
+			mutex_enter(&bufcache_lock);
 			return;
 		}
 	}
-	FREE_LOCK(&lk);
 }
 
 /*
@@ -5522,7 +5413,8 @@ clear_inodedeps(l)
 	int error, cnt;
 	ino_t firstino, lastino, ino;
 
-	ACQUIRE_LOCK(&lk);
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	/*
 	 * Pick a random inode dependency to be cleared.
 	 * We will then gather up all the inodes in its block
@@ -5539,10 +5431,12 @@ clear_inodedeps(l)
 	 * Ugly code to find mount point given pointer to superblock.
 	 */
 	fs = inodedep->id_fs;
+	mutex_enter(&mountlist_lock);
 	CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
 		if ((mp->mnt_flag & MNT_SOFTDEP) && fs == VFSTOUFS(mp)->um_fs)
 			break;
 	}
+	mutex_exit(&mountlist_lock);
 
 	/*
 	 * Find the last inode in the block with dependencies.
@@ -5559,7 +5453,7 @@ clear_inodedeps(l)
 	for (ino = firstino; ino <= lastino; ino++) {
 		if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
 			continue;
-		FREE_LOCK(&lk);
+		mutex_exit(&bufcache_lock);
 		if ((error = VFS_VGET(mp, ino, &vp)) != 0) {
 			softdep_error("clear_inodedeps: vget", error);
 			return;
@@ -5571,12 +5465,11 @@ clear_inodedeps(l)
 		} else {
 			if ((error = VOP_FSYNC(vp, l->l_cred, 0, 0, 0)))
 				softdep_error("clear_inodedeps: fsync2", error);
-			drain_output(vp, 0);
+			drain_output(vp);
 		}
 		vput(vp);
-		ACQUIRE_LOCK(&lk);
+		mutex_enter(&bufcache_lock);
 	}
-	FREE_LOCK(&lk);
 }
 
 /*
@@ -5597,8 +5490,12 @@ softdep_count_dependencies(bp, wantcount)
 	struct diradd *dap;
 	int i, retval;
 
+	KASSERT(mutex_owned(&bufcache_lock));
+
+	if (LIST_EMPTY(&bp->b_dep))
+		return 0;
+
 	retval = 0;
-	ACQUIRE_LOCK(&lk);
 	for (wk = LIST_FIRST(&bp->b_dep); wk; wk = LIST_NEXT(wk, wk_list)) {
 		switch (wk->wk_type) {
 
@@ -5656,13 +5553,11 @@ softdep_count_dependencies(bp, wantcount)
 		}
 	}
 out:
-	FREE_LOCK(&lk);
 	return retval;
 }
 
 /*
  * Acquire exclusive access to a buffer.
- * Must be called with splbio blocked.
  * Return 1 if buffer was acquired.
  */
 static int
@@ -5672,42 +5567,30 @@ getdirtybuf(bpp, waitfor)
 {
 	struct buf *bp;
 
-again:
-	for (;;) {
-		int s;
+	KASSERT(mutex_owned(&bufcache_lock));
 
+	for (;;) {
 		if ((bp = *bpp) == NULL)
 			return (0);
-		simple_lock(&bp->b_interlock);
-		if ((bp->b_flags & B_BUSY) == 0)
+		if ((bp->b_cflags & BC_BUSY) == 0)
 			break;
-		if (waitfor != MNT_WAIT) {
-			simple_unlock(&bp->b_interlock);
+		if (waitfor != MNT_WAIT)
 			return (0);
-		}
-		bp->b_flags |= B_WANTED;
-		s = FREE_LOCK_INTERLOCKED(&lk);
-		(void) ltsleep(bp, (PRIBIO + 1) | PNORELOCK, "softgetdbuf", 0,
-		    &bp->b_interlock);
-		ACQUIRE_LOCK_INTERLOCKED(&lk, s);
+		(void)bbusy(bp, false, 0);
 	}
-	LOCK_ASSERT(simple_lock_held(&bp->b_interlock));
-	if ((bp->b_flags & B_DELWRI) == 0) {
-		simple_unlock(&bp->b_interlock);
+	mutex_enter(bp->b_objlock);
+	if ((bp->b_oflags & BO_DELWRI) == 0) {
+		mutex_exit(bp->b_objlock);
 		return (0);
 	}
-	if (!simple_lock_try(&bqueue_slock)) {
-		simple_unlock(&bp->b_interlock);
-		goto again;
-	}
 #if 1
-	bp->b_flags |= B_BUSY;
+	bp->b_cflags |= BC_BUSY;
 	bremfree(bp);
 #else
-	bp->b_flags |= B_BUSY | B_VFLUSH;
+	bp->b_cflags |= BC_BUSY | BC_VFLUSH;
 #endif
-	simple_unlock(&bqueue_slock);
-	simple_unlock(&bp->b_interlock);
+	mutex_exit(bp->b_objlock);
+
 	return (1);
 }
 
@@ -5716,26 +5599,14 @@ again:
  * Must be called with vnode locked.
  */
 static void
-drain_output(vp, islocked)
+drain_output(vp)
 	struct vnode *vp;
-	int islocked;
 {
 
-	if (!islocked)
-		ACQUIRE_LOCK(&lk);
-	simple_lock(&global_v_numoutput_slock);
-	while (vp->v_numoutput) {
-		int s;
-
-		vp->v_iflag |= VI_BWAIT;
-		s = FREE_LOCK_INTERLOCKED(&lk);
-		ltsleep((void *)&vp->v_numoutput, PRIBIO + 1, "drainvp", 0,
-			&global_v_numoutput_slock);
-		ACQUIRE_LOCK_INTERLOCKED(&lk, s);
-	}
-	simple_unlock(&global_v_numoutput_slock);
-	if (!islocked)
-		FREE_LOCK(&lk);
+	mutex_enter(&vp->v_interlock);
+	while (vp->v_numoutput)
+		cv_wait(&vp->v_cv, &vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
 }
 
 /*
@@ -5748,6 +5619,10 @@ softdep_deallocate_dependencies(bp)
 	struct buf *bp;
 {
 
+	KASSERT(mutex_owned(&bufcache_lock));
+
+	if (LIST_EMPTY(&bp->b_dep))
+		return;
 	if (bp->b_error == 0)
 		panic("softdep_deallocate_dependencies: dangling deps");
 	softdep_error(bp->b_vp->v_mount->mnt_stat.f_mntonname, bp->b_error);
@@ -5780,6 +5655,8 @@ softdep_setup_pagecache(ip, lbn, size)
 	struct buf *bp;
 	UVMHIST_FUNC("softdep_setup_pagecache"); UVMHIST_CALLED(ubchist);
 
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	/*
 	 * Enter pagecache dependency buf in hash.
 	 * Always reset b_resid to be the full amount of data in the block
@@ -5793,10 +5670,12 @@ softdep_setup_pagecache(ip, lbn, size)
 
 	bp = softdep_lookup_pcbp(vp, lbn);
 	if (bp == NULL) {
-		bp = pool_get(&sdpcpool, PR_WAITOK);
+		mutex_exit(&bufcache_lock);
+		bp = pool_cache_get(softdep_large_cache, PR_WAITOK);
 		bp->b_vp = vp;
 		bp->b_lblkno = lbn;
 		LIST_INIT(&bp->b_dep);
+		mutex_enter(&bufcache_lock);
 		LIST_INSERT_HEAD(&pcbphashhead[PCBPHASH(vp, lbn)], bp, b_hash);
 		LIST_INSERT_HEAD(&ip->i_pcbufhd, bp, b_vnbufs);
 	}
@@ -5837,7 +5716,7 @@ softdep_free_pagecache(ip)
 		nextbp = LIST_NEXT(bp, b_vnbufs);
 		LIST_REMOVE(bp, b_vnbufs);
 		KASSERT(LIST_FIRST(&bp->b_dep) == NULL);
-		pool_put(&sdpcpool, bp);
+		pool_cache_put(softdep_large_cache, bp);
 	}
 }
 
@@ -5848,13 +5727,20 @@ softdep_lookupvp(fs, ino)
 {
 	struct mount *mp;
 	extern struct vfsops ffs_vfsops;
+	vnode_t *vp;
 
+	mutex_enter(&mountlist_lock);
 	CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
 		if (mp->mnt_op == &ffs_vfsops &&
 		    VFSTOUFS(mp)->um_fs == fs) {
-			return (ufs_ihashlookup(VFSTOUFS(mp)->um_dev, ino));
+		    	mutex_exit(&mountlist_lock);
+		    	mutex_enter(&ufs_ihash_lock);
+			vp = ufs_ihashlookup(VFSTOUFS(mp)->um_dev, ino);
+			mutex_exit(&ufs_ihash_lock);
+			return vp;
 		}
 	}
+	mutex_exit(&mountlist_lock);
 
 	return (NULL);
 }
@@ -5863,18 +5749,19 @@ static void
 softdep_trackbufs(int delta, bool throttle)
 {
 
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	if (delta < 0) {
 		if (softdep_lockedbufs < nbuf >> 2) {
-			wakeup(&softdep_lockedbufs);
+			cv_broadcast(&softdep_tb_cv);
 		}
 		KASSERT(softdep_lockedbufs >= -delta);
 		softdep_lockedbufs += delta;
 		return;
 	}
-
 	while (throttle && softdep_lockedbufs >= nbuf >> 2) {
 		speedup_syncer();
-		tsleep(&softdep_lockedbufs, PRIBIO, "softdbufs", 0);
+		cv_wait(&softdep_tb_cv, &bufcache_lock);
 	}
 	softdep_lockedbufs += delta;
 }
@@ -5886,6 +5773,8 @@ softdep_lookup_pcbp(vp, lbn)
 {
 	struct buf *bp;
 
+	KASSERT(mutex_owned(&bufcache_lock));
+
 	LIST_FOREACH(bp, &pcbphashhead[PCBPHASH(vp, lbn)], b_hash) {
 		if (bp->b_vp == vp && bp->b_lblkno == lbn) {
 			break;
@@ -5901,10 +5790,13 @@ softdep_lookup_pcbp(vp, lbn)
 void
 softdep_pageiodone(bp)
 	struct buf *bp;
-#ifdef UVMHIST
 {
 	struct vnode *vp = bp->b_vp;
 
+	if (vp == NULL) {
+		/* XXX LFS */
+		return;
+	}
 	if (DOINGSOFTDEP(vp))
 		softdep_pageiodone1(bp);
 }
@@ -5912,7 +5804,6 @@ softdep_pageiodone(bp)
 void
 softdep_pageiodone1(bp)
 	struct buf *bp;
-#endif
 {
 	int npages = bp->b_bufsize >> PAGE_SHIFT;
 	struct vnode *vp = bp->b_vp;
@@ -5932,7 +5823,7 @@ softdep_pageiodone1(bp)
 	bshift = vp->v_mount->mnt_fs_bshift;
 	bsize = 1 << bshift;
 	asize = MIN(PAGE_SIZE, bsize);
-	ACQUIRE_LOCK(&lk);
+	mutex_enter(&bufcache_lock);
 	for (i = 0; i < npages; i++) {
 		pg = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT));
 		if (pg == NULL) {
@@ -5978,7 +5869,7 @@ softdep_pageiodone1(bp)
 
 			KASSERT(LIST_FIRST(&pcbp->b_dep) != NULL);
 			while ((wk = LIST_FIRST(&pcbp->b_dep))) {
-				WORKLIST_REMOVE(wk);
+				worklist_remove(wk);
 				switch (wk->wk_type) {
 				case D_ALLOCDIRECT:
 					adp = WK_ALLOCDIRECT(wk);
@@ -6000,9 +5891,9 @@ softdep_pageiodone1(bp)
 			}
 			LIST_REMOVE(pcbp, b_hash);
 			LIST_REMOVE(pcbp, b_vnbufs);
-			pool_put(&sdpcpool, pcbp);
+			pool_cache_put(softdep_large_cache, pcbp);
 			pcbp = NULL;
 		}
 	}
-	FREE_LOCK(&lk);
+	mutex_exit(&bufcache_lock);
 }
diff --git a/sys/ufs/ffs/ffs_softdep.stub.c b/sys/ufs/ffs/ffs_softdep.stub.c
index c118f3805446..b1fd1c2080de 100644
--- a/sys/ufs/ffs/ffs_softdep.stub.c
+++ b/sys/ufs/ffs/ffs_softdep.stub.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ffs_softdep.stub.c,v 1.21 2007/03/04 06:03:45 christos Exp $	*/
+/*	$NetBSD: ffs_softdep.stub.c,v 1.22 2008/01/02 11:49:09 ad Exp $	*/
 
 /*
  * Copyright 1997 Marshall Kirk McKusick. All Rights Reserved.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ffs_softdep.stub.c,v 1.21 2007/03/04 06:03:45 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ffs_softdep.stub.c,v 1.22 2008/01/02 11:49:09 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/vnode.h>
@@ -204,7 +204,7 @@ softdep_fsync_mountdev(struct vnode *vp)
 }
 
 int
-softdep_sync_metadata(void *v)
+softdep_sync_metadata(struct vnode *vp)
 {
 	return (0);
 }
@@ -214,3 +214,10 @@ softdep_releasefile(struct inode *ip)
 {
 	panic("softdep_releasefile called");
 }
+
+void
+softdep_unmount(struct mount *mp)
+{
+
+	return;
+}
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index 5cbf19504ffb..9113bcda52c6 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ffs_vfsops.c,v 1.213 2007/12/20 16:18:57 dyoung Exp $	*/
+/*	$NetBSD: ffs_vfsops.c,v 1.214 2008/01/02 11:49:09 ad Exp $	*/
 
 /*
  * Copyright (c) 1989, 1991, 1993, 1994
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.213 2007/12/20 16:18:57 dyoung Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.214 2008/01/02 11:49:09 ad Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_ffs.h"
@@ -482,7 +482,7 @@ fail:
 int
 ffs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l)
 {
-	struct vnode *vp, *nvp, *devvp;
+	struct vnode *vp, *mvp, *devvp;
 	struct inode *ip;
 	void *space;
 	struct buf *bp;
@@ -647,30 +647,36 @@ ffs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l)
 			*lp++ = fs->fs_contigsumsize;
 	}
 
-loop:
+	/* Allocate a marker vnode. */
+	if ((mvp = valloc(mp)) == NULL)
+		return ENOMEM;
 	/*
 	 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
 	 * and vclean() can be called indirectly
 	 */
-	simple_lock(&mntvnode_slock);
-	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
-		if (vp->v_mount != mp) {
-			simple_unlock(&mntvnode_slock);
-			goto loop;
-		}
+	mutex_enter(&mntvnode_lock);
+ loop:
+	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
+		vmark(mvp, vp);
+		if (vp->v_mount != mp || vismarker(vp))
+			continue;
 		/*
 		 * Step 4: invalidate all inactive vnodes.
 		 */
-		if (vrecycle(vp, &mntvnode_slock, l))
+		if (vrecycle(vp, &mntvnode_lock, l)) {
+			mutex_enter(&mntvnode_lock);
+			(void)vunmark(mvp);
 			goto loop;
+		}
 		/*
 		 * Step 5: invalidate all cached file data.
 		 */
-		simple_lock(&vp->v_interlock);
-		nvp = TAILQ_NEXT(vp, v_mntvnodes);
-		simple_unlock(&mntvnode_slock);
-		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK))
+		mutex_enter(&vp->v_interlock);
+		mutex_exit(&mntvnode_lock);
+		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
+			(void)vunmark(mvp);
 			goto loop;
+		}
 		if (vinvalbuf(vp, 0, cred, l, 0, 0))
 			panic("ffs_reload: dirty2");
 		/*
@@ -682,16 +688,18 @@ loop:
 		if (error) {
 			brelse(bp, 0);
 			vput(vp);
-			return (error);
+			(void)vunmark(mvp);
+			break;
 		}
 		ffs_load_inode(bp, ip, fs, ip->i_number);
 		ip->i_ffs_effnlink = ip->i_nlink;
 		brelse(bp, 0);
 		vput(vp);
-		simple_lock(&mntvnode_slock);
+		mutex_enter(&mntvnode_lock);
 	}
-	simple_unlock(&mntvnode_slock);
-	return (0);
+	mutex_exit(&mntvnode_lock);
+	vfree(mvp);
+	return (error);
 }
 
 /*
@@ -743,6 +751,10 @@ ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
 	sblockloc = 0;
 	fstype = 0;
 
+	error = fstrans_mount(mp);
+	if (error)
+		return error;
+
 	/*
 	 * Try reading the superblock in each of its possible locations.
 	 */
@@ -974,7 +986,7 @@ ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
 	mp->mnt_fs_bshift = fs->fs_bshift;
 	mp->mnt_dev_bshift = DEV_BSHIFT;	/* XXX */
 	mp->mnt_flag |= MNT_LOCAL;
-	mp->mnt_iflag |= IMNT_HAS_TRANS;
+	mp->mnt_iflag |= IMNT_MPSAFE;
 #ifdef FFS_EI
 	if (needswap)
 		ump->um_flags |= UFS_NEEDSWAP;
@@ -1016,6 +1028,7 @@ ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
 #endif /* UFS_EXTATTR */
 	return (0);
 out:
+	fstrans_unmount(mp);
 	if (fs)
 		free(fs, M_UFSMNT);
 	devvp->v_specmountpoint = NULL;
@@ -1214,10 +1227,12 @@ ffs_unmount(struct mount *mp, int mntflags)
 	free(fs, M_UFSMNT);
 	if (ump->um_oldfscompat != NULL)
 		free(ump->um_oldfscompat, M_UFSMNT);
+	softdep_unmount(mp);
 	mutex_destroy(&ump->um_lock);
 	free(ump, M_UFSMNT);
 	mp->mnt_data = NULL;
 	mp->mnt_flag &= ~MNT_LOCAL;
+	fstrans_unmount(mp);
 	return (0);
 }
 
@@ -1313,7 +1328,7 @@ int
 ffs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
 {
 	struct lwp *l = curlwp;
-	struct vnode *vp, *nvp;
+	struct vnode *vp, *mvp;
 	struct inode *ip;
 	struct ufsmount *ump = VFSTOUFS(mp);
 	struct fs *fs;
@@ -1324,46 +1339,53 @@ ffs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
 		printf("fs = %s\n", fs->fs_fsmnt);
 		panic("update: rofs mod");
 	}
+
+	/* Allocate a marker vnode. */
+	if ((mvp = valloc(mp)) == NULL)
+		return (ENOMEM);
+
 	fstrans_start(mp, FSTRANS_SHARED);
 	/*
 	 * Write back each (modified) inode.
 	 */
-	simple_lock(&mntvnode_slock);
+	mutex_enter(&mntvnode_lock);
 loop:
 	/*
 	 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
 	 * and vclean() can be called indirectly
 	 */
-	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
+	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
+		vmark(mvp, vp);
 		/*
 		 * If the vnode that we are about to sync is no longer
 		 * associated with this mount point, start over.
 		 */
-		if (vp->v_mount != mp)
-			goto loop;
-		simple_lock(&vp->v_interlock);
-		nvp = TAILQ_NEXT(vp, v_mntvnodes);
+		if (vp->v_mount != mp || vismarker(vp))
+			continue;
+		mutex_enter(&vp->v_interlock);
 		ip = VTOI(vp);
-		if (vp->v_type == VNON ||
-		    ((ip->i_flag &
-		      (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 &&
-		     LIST_EMPTY(&vp->v_dirtyblkhd) &&
-		     UVM_OBJ_IS_CLEAN(&vp->v_uobj)))
+		if (ip == NULL || (vp->v_iflag & (VI_XLOCK|VI_CLEAN)) != 0 ||
+		    vp->v_type == VNON || ((ip->i_flag &
+		    (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 &&
+		    LIST_EMPTY(&vp->v_dirtyblkhd) &&
+		    UVM_OBJ_IS_CLEAN(&vp->v_uobj)))
 		{
-			simple_unlock(&vp->v_interlock);
+			mutex_exit(&vp->v_interlock);
 			continue;
 		}
 		if (vp->v_type == VBLK &&
 		    fstrans_getstate(mp) == FSTRANS_SUSPENDING) {
-			simple_unlock(&vp->v_interlock);
+			mutex_exit(&vp->v_interlock);
 			continue;
 		}
-		simple_unlock(&mntvnode_slock);
+		mutex_exit(&mntvnode_lock);
 		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK);
 		if (error) {
-			simple_lock(&mntvnode_slock);
-			if (error == ENOENT)
+			mutex_enter(&mntvnode_lock);
+			if (error == ENOENT) {
+				(void)vunmark(mvp);
 				goto loop;
+			}
 			continue;
 		}
 		if (vp->v_type == VREG && waitfor == MNT_LAZY)
@@ -1374,9 +1396,9 @@ loop:
 		if (error)
 			allerror = error;
 		vput(vp);
-		simple_lock(&mntvnode_slock);
+		mutex_enter(&mntvnode_lock);
 	}
-	simple_unlock(&mntvnode_slock);
+	mutex_exit(&mntvnode_lock);
 	/*
 	 * Force stale file system control information to be flushed.
 	 */
@@ -1385,7 +1407,7 @@ loop:
 			allerror = error;
 		/* Flushed work items may create new vnodes to clean */
 		if (allerror == 0 && count) {
-			simple_lock(&mntvnode_slock);
+			mutex_enter(&mntvnode_lock);
 			goto loop;
 		}
 	}
@@ -1397,7 +1419,7 @@ loop:
 			allerror = error;
 		VOP_UNLOCK(ump->um_devvp, 0);
 		if (allerror == 0 && waitfor == MNT_WAIT) {
-			simple_lock(&mntvnode_slock);
+			mutex_enter(&mntvnode_lock);
 			goto loop;
 		}
 	}
@@ -1414,6 +1436,7 @@ loop:
 			allerror = error;
 	}
 	fstrans_done(mp);
+	vfree(mvp);
 	return (allerror);
 }
 
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
index 0f5e29982299..4253d5a5cc41 100644
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ffs_vnops.c,v 1.93 2007/11/26 19:02:30 pooka Exp $	*/
+/*	$NetBSD: ffs_vnops.c,v 1.94 2008/01/02 11:49:10 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c,v 1.93 2007/11/26 19:02:30 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c,v 1.94 2008/01/02 11:49:10 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -64,8 +64,6 @@ __KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c,v 1.93 2007/11/26 19:02:30 pooka Exp $")
 
 #include <uvm/uvm.h>
 
-static int ffs_full_fsync(void *);
-
 /* Global vfs data structures for ufs. */
 int (**ffs_vnodeop_p)(void *);
 const struct vnodeopv_entry_desc ffs_vnodeop_entries[] = {
@@ -246,7 +244,7 @@ ffs_fsync(void *v)
 		struct lwp *a_l;
 	} */ *ap = v;
 	struct buf *bp;
-	int s, num, error, i;
+	int num, error, i;
 	struct indir ia[NIADDR + 1];
 	int bsize;
 	daddr_t blk_high;
@@ -260,7 +258,7 @@ ffs_fsync(void *v)
 	 */
 	if ((ap->a_offlo == 0 && ap->a_offhi == 0) || DOINGSOFTDEP(vp) ||
 	    (vp->v_type != VREG)) {
-		error = ffs_full_fsync(v);
+		error = ffs_full_fsync(vp, ap->a_flags);
 		goto out;
 	}
 
@@ -273,7 +271,7 @@ ffs_fsync(void *v)
 	 * First, flush all pages in range.
 	 */
 
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo),
 	    round_page(ap->a_offhi), PGO_CLEANIT |
 	    ((ap->a_flags & FSYNC_WAIT) ? PGO_SYNCIO : 0));
@@ -285,40 +283,32 @@ ffs_fsync(void *v)
 	 * Then, flush indirect blocks.
 	 */
 
-	s = splbio();
 	if (blk_high >= NDADDR) {
 		error = ufs_getlbns(vp, blk_high, ia, &num);
-		if (error) {
-			splx(s);
+		if (error)
 			goto out;
-		}
+
+		mutex_enter(&bufcache_lock);
 		for (i = 0; i < num; i++) {
-			bp = incore(vp, ia[i].in_lbn);
-			if (bp != NULL) {
-				simple_lock(&bp->b_interlock);
-				if (!(bp->b_flags & B_BUSY) && (bp->b_flags & B_DELWRI)) {
-					bp->b_flags |= B_BUSY | B_VFLUSH;
-					simple_unlock(&bp->b_interlock);
-					splx(s);
-					bawrite(bp);
-					s = splbio();
-				} else {
-					simple_unlock(&bp->b_interlock);
-				}
-			}
+			if ((bp = incore(vp, ia[i].in_lbn)) == NULL)
+				continue;
+			if ((bp->b_cflags & BC_BUSY) != 0 ||
+			    (bp->b_oflags & BO_DELWRI) == 0)
+				continue;
+			bp->b_cflags |= BC_BUSY | BC_VFLUSH;
+			mutex_exit(&bufcache_lock);
+			bawrite(bp);
+			mutex_enter(&bufcache_lock);
 		}
+		mutex_exit(&bufcache_lock);
 	}
 
 	if (ap->a_flags & FSYNC_WAIT) {
-		simple_lock(&global_v_numoutput_slock);
-		while (vp->v_numoutput > 0) {
-			vp->v_iflag |= VI_BWAIT;
-			ltsleep(&vp->v_numoutput, PRIBIO + 1, "fsync_range", 0,
-				&global_v_numoutput_slock);
-		}
-		simple_unlock(&global_v_numoutput_slock);
+		mutex_enter(&vp->v_interlock);
+		while (vp->v_numoutput > 0)
+			cv_wait(&vp->v_cv, &vp->v_interlock);
+		mutex_exit(&vp->v_interlock);
 	}
-	splx(s);
 
 	error = ffs_update(vp, NULL, NULL,
 	    ((ap->a_flags & (FSYNC_WAIT | FSYNC_DATAONLY)) == FSYNC_WAIT)
@@ -339,27 +329,20 @@ out:
  * Synch an open file.
  */
 /* ARGSUSED */
-static int
-ffs_full_fsync(void *v)
+int
+ffs_full_fsync(struct vnode *vp, int flags)
 {
-	struct vop_fsync_args /* {
-		struct vnode *a_vp;
-		kauth_cred_t a_cred;
-		int a_flags;
-		off_t a_offlo;
-		off_t a_offhi;
-		struct lwp *a_l;
-	} */ *ap = v;
-	struct vnode *vp = ap->a_vp;
 	struct buf *bp, *nbp;
-	int s, error, passes, skipmeta, inodedeps_only, waitfor;
+	int error, passes, skipmeta, inodedeps_only, waitfor;
 
 	if (vp->v_type == VBLK &&
 	    vp->v_specmountpoint != NULL &&
 	    (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP))
 		softdep_fsync_mountdev(vp);
 
-	inodedeps_only = DOINGSOFTDEP(vp) && (ap->a_flags & FSYNC_RECLAIM)
+	mutex_enter(&vp->v_interlock);
+
+	inodedeps_only = DOINGSOFTDEP(vp) && (flags & FSYNC_RECLAIM)
 	    && UVM_OBJ_IS_CLEAN(&vp->v_uobj) && LIST_EMPTY(&vp->v_dirtyblkhd);
 
 	/*
@@ -367,79 +350,72 @@ ffs_full_fsync(void *v)
 	 */
 
 	if (vp->v_type == VREG || vp->v_type == VBLK) {
-		simple_lock(&vp->v_interlock);
 		error = VOP_PUTPAGES(vp, 0, 0, PGO_ALLPAGES | PGO_CLEANIT |
-		    ((ap->a_flags & FSYNC_WAIT) ? PGO_SYNCIO : 0) |
+		    ((flags & FSYNC_WAIT) ? PGO_SYNCIO : 0) |
 		    (fstrans_getstate(vp->v_mount) == FSTRANS_SUSPENDING ?
 			PGO_FREE : 0));
 		if (error) {
 			return error;
 		}
-	}
+	} else
+		mutex_exit(&vp->v_interlock);
 
 	passes = NIADDR + 1;
 	skipmeta = 0;
-	if (ap->a_flags & FSYNC_WAIT)
+	if (flags & FSYNC_WAIT)
 		skipmeta = 1;
-	s = splbio();
 
 loop:
-	LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs)
-		bp->b_flags &= ~B_SCANNED;
+	mutex_enter(&bufcache_lock);
+	LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
+		bp->b_cflags &= ~BC_SCANNED;
+	}
 	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 		nbp = LIST_NEXT(bp, b_vnbufs);
-		simple_lock(&bp->b_interlock);
-		if (bp->b_flags & (B_BUSY | B_SCANNED)) {
-			simple_unlock(&bp->b_interlock);
+		if (bp->b_cflags & (BC_BUSY | BC_SCANNED))
 			continue;
-		}
-		if ((bp->b_flags & B_DELWRI) == 0)
+		if ((bp->b_oflags & BO_DELWRI) == 0)
 			panic("ffs_fsync: not dirty");
-		if (skipmeta && bp->b_lblkno < 0) {
-			simple_unlock(&bp->b_interlock);
+		if (skipmeta && bp->b_lblkno < 0)
 			continue;
-		}
-		simple_unlock(&bp->b_interlock);
-		bp->b_flags |= B_BUSY | B_VFLUSH | B_SCANNED;
-		splx(s);
+		bp->b_cflags |= BC_BUSY | BC_VFLUSH | BC_SCANNED;
+		mutex_exit(&bufcache_lock);
 		/*
 		 * On our final pass through, do all I/O synchronously
 		 * so that we can find out if our flush is failing
 		 * because of write errors.
 		 */
-		if (passes > 0 || !(ap->a_flags & FSYNC_WAIT))
+		if (passes > 0 || !(flags & FSYNC_WAIT))
 			(void) bawrite(bp);
 		else if ((error = bwrite(bp)) != 0)
 			return (error);
-		s = splbio();
 		/*
-		 * Since we may have slept during the I/O, we need
+		 * Since we unlocked during the I/O, we need
 		 * to start from a known point.
 		 */
+		mutex_enter(&bufcache_lock);
 		nbp = LIST_FIRST(&vp->v_dirtyblkhd);
 	}
+	mutex_exit(&bufcache_lock);
 	if (skipmeta) {
 		skipmeta = 0;
 		goto loop;
 	}
-	if (ap->a_flags & FSYNC_WAIT) {
-		simple_lock(&global_v_numoutput_slock);
+
+	if (flags & FSYNC_WAIT) {
+		mutex_enter(&vp->v_interlock);
 		while (vp->v_numoutput) {
-			vp->v_iflag |= VI_BWAIT;
-			(void) ltsleep(&vp->v_numoutput, PRIBIO + 1,
-			    "ffsfsync", 0, &global_v_numoutput_slock);
+			cv_wait(&vp->v_cv, &vp->v_interlock);
 		}
-		simple_unlock(&global_v_numoutput_slock);
-		splx(s);
+		mutex_exit(&vp->v_interlock);
 
 		/*
 		 * Ensure that any filesystem metadata associated
 		 * with the vnode has been written.
 		 */
-		if ((error = softdep_sync_metadata(ap)) != 0)
+		if ((error = softdep_sync_metadata(vp)) != 0)
 			return (error);
 
-		s = splbio();
 		if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
 			/*
 			* Block devices associated with filesystems may
@@ -459,15 +435,14 @@ loop:
 #endif
 		}
 	}
-	splx(s);
 
 	if (inodedeps_only)
 		waitfor = 0;
 	else
-		waitfor = (ap->a_flags & FSYNC_WAIT) ? UPDATE_WAIT : 0;
+		waitfor = (flags & FSYNC_WAIT) ? UPDATE_WAIT : 0;
 	error = ffs_update(vp, NULL, NULL, waitfor);
 
-	if (error == 0 && ap->a_flags & FSYNC_CACHE) {
+	if (error == 0 && flags & FSYNC_CACHE) {
 		int i = 0;
 		VOP_IOCTL(VTOI(vp)->i_devvp, DIOCCACHESYNC, &i, FWRITE,
 			curlwp->l_cred);
@@ -490,9 +465,18 @@ ffs_reclaim(void *v)
 	struct inode *ip = VTOI(vp);
 	struct mount *mp = vp->v_mount;
 	struct ufsmount *ump = ip->i_ump;
+	void *data;
 	int error;
 
 	fstrans_start(mp, FSTRANS_LAZY);
+	/*
+	 * The inode must be freed and updated before being removed
+	 * from its hash chain.  Other threads trying to gain a hold
+	 * on the inode will be stalled because it is locked (VI_XLOCK).
+	 */
+	if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
+		ffs_vfree(vp, ip->i_number, ip->i_omode);
+	}
 	if ((error = ufs_reclaim(vp)) != 0) {
 		fstrans_done(mp);
 		return (error);
@@ -503,13 +487,20 @@ ffs_reclaim(void *v)
 		else
 			pool_put(&ffs_dinode2_pool, ip->i_din.ffs2_din);
 	}
+	/*
+	 * To interlock with ffs_sync().
+	 */
+	genfs_node_destroy(vp);
+	mutex_enter(&vp->v_interlock);
+	data = vp->v_data;
+	vp->v_data = NULL;
+	mutex_exit(&vp->v_interlock);
+
 	/*
 	 * XXX MFS ends up here, too, to free an inode.  Should we create
 	 * XXX a separate pool for MFS inodes?
 	 */
-	genfs_node_destroy(vp);
-	pool_put(&ffs_inode_pool, vp->v_data);
-	vp->v_data = NULL;
+	pool_put(&ffs_inode_pool, data);
 	fstrans_done(mp);
 	return (0);
 }
@@ -543,7 +534,7 @@ ffs_getpages(void *v)
 	     blkoff(fs, *ap->a_count << PAGE_SHIFT) != 0) &&
 	    DOINGSOFTDEP(ap->a_vp)) {
 		if ((ap->a_flags & PGO_LOCKED) == 0) {
-			simple_unlock(&vp->v_interlock);
+			mutex_exit(&vp->v_interlock);
 		}
 		return EINVAL;
 	}
@@ -751,18 +742,15 @@ ffs_lock(void *v)
 	    fstrans_is_owner(mp) &&
 	    fstrans_getstate(mp) == FSTRANS_SUSPENDING) {
 		if ((flags & LK_INTERLOCK) != 0)
-			simple_unlock(&vp->v_interlock);
+			mutex_exit(&vp->v_interlock);
 		return 0;
 	}
 
-	if ((flags & LK_TYPE_MASK) == LK_DRAIN)
-		return (lockmgr(vp->v_vnlock, flags, &vp->v_interlock));
-
 	KASSERT((flags & ~(LK_SHARED | LK_EXCLUSIVE | LK_SLEEPFAIL |
-	    LK_INTERLOCK | LK_NOWAIT | LK_SETRECURSE | LK_CANRECURSE)) == 0);
+	    LK_INTERLOCK | LK_NOWAIT | LK_CANRECURSE)) == 0);
 	for (;;) {
 		if ((flags & LK_INTERLOCK) == 0) {
-			simple_lock(&vp->v_interlock);
+			mutex_enter(&vp->v_interlock);
 			flags |= LK_INTERLOCK;
 		}
 		lkp = vp->v_vnlock;
@@ -800,7 +788,7 @@ ffs_unlock(void *v)
 	    fstrans_is_owner(mp) &&
 	    fstrans_getstate(mp) == FSTRANS_SUSPENDING) {
 		if ((ap->a_flags & LK_INTERLOCK) != 0)
-			simple_unlock(&vp->v_interlock);
+			mutex_exit(&vp->v_interlock);
 		return 0;
 	}
 	return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE,
diff --git a/sys/ufs/lfs/lfs.h b/sys/ufs/lfs/lfs.h
index e11ef3259e4e..2868d2e9312e 100644
--- a/sys/ufs/lfs/lfs.h
+++ b/sys/ufs/lfs/lfs.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: lfs.h,v 1.122 2007/10/10 20:42:34 ad Exp $	*/
+/*	$NetBSD: lfs.h,v 1.123 2008/01/02 11:49:10 ad Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@@ -70,6 +70,7 @@
 #define _UFS_LFS_LFS_H_
 
 #include <sys/rwlock.h>
+#include <sys/mutex.h>
 
 /*
  * Compile-time options for LFS.
@@ -191,34 +192,35 @@ typedef struct lfs_res_blk {
 #define IS_IFILE(bp)	(VTOI(bp->b_vp)->i_number == LFS_IFILE_INUM)
 
 # define LFS_LOCK_BUF(bp) do {						\
-	if (((bp)->b_flags & (B_LOCKED | B_CALL)) == 0) {		\
-		simple_lock(&lfs_subsys_lock);				\
+	KASSERT(mutex_owned(&bufcache_lock));				\
+	if (((bp)->b_cflags & BC_LOCKED) == 0 && bp->b_iodone == NULL) {\
+		mutex_enter(&lfs_lock);					\
 		++locked_queue_count;					\
 		locked_queue_bytes += bp->b_bufsize;			\
-		simple_unlock(&lfs_subsys_lock);			\
+		mutex_exit(&lfs_lock);					\
 	}								\
-	(bp)->b_flags |= B_LOCKED;					\
+	(bp)->b_cflags |= BC_LOCKED;					\
 } while (0)
 
 # define LFS_UNLOCK_BUF(bp) do {					\
-	if (((bp)->b_flags & (B_LOCKED | B_CALL)) == B_LOCKED) {	\
-		simple_lock(&lfs_subsys_lock);				\
+	KASSERT(mutex_owned(&bufcache_lock));				\
+	if (((bp)->b_cflags & BC_LOCKED) != 0 && bp->b_iodone == NULL) {\
+		mutex_enter(&lfs_lock);					\
 		--locked_queue_count;					\
 		locked_queue_bytes -= bp->b_bufsize;			\
 		if (locked_queue_count < LFS_WAIT_BUFS &&		\
 		    locked_queue_bytes < LFS_WAIT_BYTES)		\
 			wakeup(&locked_queue_count);			\
-		simple_unlock(&lfs_subsys_lock);			\
+		mutex_exit(&lfs_lock);					\
 	}								\
-	(bp)->b_flags &= ~B_LOCKED;					\
+	(bp)->b_cflags &= ~BC_LOCKED;					\
 } while (0)
 
 #ifdef _KERNEL
 
 extern u_long bufmem_lowater, bufmem_hiwater; /* XXX */
 
-# define LFS_IS_MALLOC_BUF(bp) (((bp)->b_flags & B_CALL) &&		\
-     (bp)->b_iodone == lfs_callback)
+# define LFS_IS_MALLOC_BUF(bp) ((bp)->b_iodone == lfs_callback)
 
 # ifdef DEBUG
 #  define LFS_DEBUG_COUNTLOCKED(m) do {					\
@@ -247,7 +249,7 @@ extern struct lfs_log_entry lfs_log[LFS_LOGLENGTH];
 #  define LFS_ENTER_LOG(theop, thefile, theline, lbn, theflags, thepid) do {\
 	int _s;								\
 									\
-	simple_lock(&lfs_subsys_lock);					\
+	mutex_enter(&lfs_lock);						\
 	_s = splbio();							\
 	lfs_log[lfs_lognum].op = theop;					\
 	lfs_log[lfs_lognum].file = thefile;				\
@@ -257,7 +259,7 @@ extern struct lfs_log_entry lfs_log[LFS_LOGLENGTH];
 	lfs_log[lfs_lognum].flags = (theflags);				\
 	lfs_lognum = (lfs_lognum + 1) % LFS_LOGLENGTH;			\
 	splx(_s);							\
-	simple_unlock(&lfs_subsys_lock);				\
+	mutex_exit(&lfs_lock);						\
 } while (0)
 
 #  define LFS_BCLEAN_LOG(fs, bp) do {					\
@@ -316,7 +318,6 @@ struct lfid {
 #define IN_ALLMOD (IN_MODIFIED|IN_ACCESS|IN_CHANGE|IN_UPDATE|IN_MODIFY|IN_ACCESSED|IN_CLEANING)
 
 #define LFS_SET_UINO(ip, flags) do {					\
-	simple_lock(&(ip)->i_lfs->lfs_interlock);			\
 	if (((flags) & IN_ACCESSED) && !((ip)->i_flag & IN_ACCESSED))	\
 		++(ip)->i_lfs->lfs_uinodes;				\
 	if (((flags) & IN_CLEANING) && !((ip)->i_flag & IN_CLEANING))	\
@@ -324,11 +325,9 @@ struct lfid {
 	if (((flags) & IN_MODIFIED) && !((ip)->i_flag & IN_MODIFIED))	\
 		++(ip)->i_lfs->lfs_uinodes;				\
 	(ip)->i_flag |= (flags);					\
-	simple_unlock(&(ip)->i_lfs->lfs_interlock);			\
 } while (0)
 
 #define LFS_CLR_UINO(ip, flags) do {					\
-	simple_lock(&(ip)->i_lfs->lfs_interlock);			\
 	if (((flags) & IN_ACCESSED) && ((ip)->i_flag & IN_ACCESSED))	\
 		--(ip)->i_lfs->lfs_uinodes;				\
 	if (((flags) & IN_CLEANING) && ((ip)->i_flag & IN_CLEANING))	\
@@ -339,7 +338,6 @@ struct lfid {
 	if ((ip)->i_lfs->lfs_uinodes < 0) {				\
 		panic("lfs_uinodes < 0");				\
 	}								\
-	simple_unlock(&(ip)->i_lfs->lfs_interlock);			\
 } while (0)
 
 #define LFS_ITIMES(ip, acc, mod, cre) \
@@ -396,15 +394,11 @@ struct segusage_v1 {
 #ifdef _KERNEL
 # define SHARE_IFLOCK(F) 						\
   do {									\
-	simple_lock(&(F)->lfs_interlock);				\
-	lockmgr(&(F)->lfs_iflock, LK_SHARED, &(F)->lfs_interlock);	\
-	simple_unlock(&(F)->lfs_interlock);				\
+	rw_enter(&(F)->lfs_iflock, RW_READER);				\
   } while(0)
 # define UNSHARE_IFLOCK(F)						\
   do {									\
-	simple_lock(&(F)->lfs_interlock);				\
-	lockmgr(&(F)->lfs_iflock, LK_RELEASE, &(F)->lfs_interlock);	\
-	simple_unlock(&(F)->lfs_interlock);				\
+	rw_exit(&(F)->lfs_iflock);					\
   } while(0)
 #else /* ! _KERNEL */
 # define SHARE_IFLOCK(F)
@@ -529,7 +523,7 @@ typedef struct _cleanerinfo {
  * Synchronize the Ifile cleaner info with current avail and bfree.
  */
 #define LFS_SYNC_CLEANERINFO(cip, fs, bp, w) do {		 	\
-    simple_lock(&(fs)->lfs_interlock);					\
+    mutex_enter(&lfs_lock);						\
     if ((w) || (cip)->bfree != (fs)->lfs_bfree ||		 	\
 	(cip)->avail != (fs)->lfs_avail - (fs)->lfs_ravail - 		\
 	(fs)->lfs_favail) {	 					\
@@ -539,10 +533,10 @@ typedef struct _cleanerinfo {
 	if (((bp)->b_flags & B_GATHERED) == 0) {		 	\
 		(fs)->lfs_flags |= LFS_IFDIRTY;			 	\
 	}								\
-	simple_unlock(&(fs)->lfs_interlock);				\
+	mutex_exit(&lfs_lock);						\
 	(void) LFS_BWRITE_LOG(bp); /* Ifile */			 	\
     } else {							 	\
-	simple_unlock(&(fs)->lfs_interlock);				\
+	mutex_exit(&lfs_lock);						\
 	brelse(bp, 0);						 	\
     }									\
 } while (0)
@@ -566,9 +560,9 @@ typedef struct _cleanerinfo {
 		LFS_CLEANERINFO((CIP), (FS), (BP));			\
 		(CIP)->free_head = (VAL);				\
 		LFS_BWRITE_LOG(BP);					\
-		simple_lock(&fs->lfs_interlock);			\
+		mutex_enter(&lfs_lock);					\
 		(FS)->lfs_flags |= LFS_IFDIRTY;				\
-		simple_unlock(&fs->lfs_interlock);			\
+		mutex_exit(&lfs_lock);					\
 	}								\
 } while (0)
 
@@ -582,9 +576,9 @@ typedef struct _cleanerinfo {
 	LFS_CLEANERINFO((CIP), (FS), (BP));				\
 	(CIP)->free_tail = (VAL);					\
 	LFS_BWRITE_LOG(BP);						\
-	simple_lock(&fs->lfs_interlock);				\
+	mutex_enter(&lfs_lock);						\
 	(FS)->lfs_flags |= LFS_IFDIRTY;					\
-	simple_unlock(&fs->lfs_interlock);				\
+	mutex_exit(&lfs_lock);						\
 } while (0)
 
 /*
@@ -825,8 +819,9 @@ struct lfs {
 	size_t lfs_devbsize;		/* Device block size */
 	size_t lfs_devbshift;		/* Device block shift */
 	krwlock_t lfs_fraglock;
-	struct lock lfs_iflock;		/* Ifile lock */
-	struct lock lfs_stoplock;	/* Wrap lock */
+	krwlock_t lfs_iflock;		/* Ifile lock */
+	kcondvar_t lfs_stopcv;		/* Wrap lock */
+	struct lwp *lfs_stoplwp;
 	pid_t lfs_rfpid;		/* Process ID of roll-forward agent */
 	int	  lfs_nadirop;		/* number of active dirop nodes */
 	long	  lfs_ravail;		/* blocks pre-reserved for writing */
@@ -845,8 +840,7 @@ struct lfs {
 #endif /* _KERNEL */
 #define LFS_MAX_CLEANIND 64
 	int32_t  lfs_cleanint[LFS_MAX_CLEANIND]; /* Active cleaning intervals */
-	int 	 lfs_cleanind;	/* Index into intervals */
-	struct simplelock lfs_interlock;  /* lock for lfs_seglock */
+	int 	 lfs_cleanind;		/* Index into intervals */
 	int lfs_sleepers;		/* # procs sleeping this fs */
 	int lfs_pages;			/* dirty pages blaming this fs */
 	lfs_bm_t *lfs_ino_bitmap;	/* Inuse inodes bitmap */
diff --git a/sys/ufs/lfs/lfs_alloc.c b/sys/ufs/lfs/lfs_alloc.c
index 1528ae95b113..ef1e88fb2bf0 100644
--- a/sys/ufs/lfs/lfs_alloc.c
+++ b/sys/ufs/lfs/lfs_alloc.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: lfs_alloc.c,v 1.104 2007/12/12 18:36:10 he Exp $	*/
+/*	$NetBSD: lfs_alloc.c,v 1.105 2008/01/02 11:49:10 ad Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007 The NetBSD Foundation, Inc.
@@ -67,7 +67,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.104 2007/12/12 18:36:10 he Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.105 2008/01/02 11:49:10 ad Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_quota.h"
@@ -256,9 +256,9 @@ lfs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred,
 #endif /* DIAGNOSTIC */
 
 	/* Set superblock modified bit and increment file count. */
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	fs->lfs_fmod = 1;
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 	++fs->lfs_nfiles;
 
 	VOP_UNLOCK(fs->lfs_ivnode, 0);
@@ -285,7 +285,9 @@ lfs_ialloc(struct lfs *fs, struct vnode *pvp, ino_t new_ino, int new_gen,
 	lfs_vcreate(pvp->v_mount, new_ino, vp);
 
 	ip = VTOI(vp);
+	mutex_enter(&lfs_lock);
 	LFS_SET_UINO(ip, IN_CHANGE);
+	mutex_exit(&lfs_lock);
 	/* on-disk structure has been zeroed out by lfs_vcreate */
 	ip->i_din.ffs1_din->di_inumber = new_ino;
 
@@ -434,7 +436,6 @@ lfs_vfree(struct vnode *vp, ino_t ino, int mode)
 	struct lfs *fs;
 	daddr_t old_iaddr;
 	ino_t otail;
-	int s;
 
 	/* Get the inode number and file system. */
 	ip = VTOI(vp);
@@ -445,28 +446,25 @@ lfs_vfree(struct vnode *vp, ino_t ino, int mode)
 	DLOG((DLOG_ALLOC, "lfs_vfree: free ino %lld\n", (long long)ino));
 
 	/* Drain of pending writes */
-	simple_lock(&vp->v_interlock);
-	s = splbio();
-	if (fs->lfs_version > 1 && WRITEINPROG(vp))
-		ltsleep(vp, (PRIBIO+1), "lfs_vfree", 0, &vp->v_interlock);
-	splx(s);
-	simple_unlock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
+	while (fs->lfs_version > 1 && WRITEINPROG(vp)) {
+		cv_wait(&vp->v_cv, &vp->v_interlock);
+	}
+	mutex_exit(&vp->v_interlock);
 
 	lfs_seglock(fs, SEGM_PROT);
 	vn_lock(fs->lfs_ivnode, LK_EXCLUSIVE);
 
 	lfs_unmark_vnode(vp);
+	mutex_enter(&lfs_lock);
 	if (vp->v_uflag & VU_DIROP) {
 		vp->v_uflag &= ~VU_DIROP;
-		simple_lock(&fs->lfs_interlock);
-		simple_lock(&lfs_subsys_lock);
 		--lfs_dirvcount;
-		simple_unlock(&lfs_subsys_lock);
 		--fs->lfs_dirvcount;
 		TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
-		simple_unlock(&fs->lfs_interlock);
 		wakeup(&fs->lfs_dirvcount);
 		wakeup(&lfs_dirvcount);
+		mutex_exit(&lfs_lock);
 		lfs_vunref(vp);
 
 		/*
@@ -487,10 +485,13 @@ lfs_vfree(struct vnode *vp, ino_t ino, int mode)
 		/*
 		 * If it's not a dirop, we can finalize right away.
 		 */
+		mutex_exit(&lfs_lock);
 		lfs_finalize_ino_seguse(fs, ip);
 	}
 
+	mutex_enter(&lfs_lock);
 	LFS_CLR_UINO(ip, IN_ACCESSED|IN_CLEANING|IN_MODIFIED);
+	mutex_exit(&lfs_lock);
 	ip->i_flag &= ~IN_ALLMOD;
 	ip->i_lfs_iflags |= LFSI_DELETED;
 	
@@ -582,9 +583,9 @@ lfs_vfree(struct vnode *vp, ino_t ino, int mode)
 	}
 
 	/* Set superblock modified bit and decrement file count. */
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	fs->lfs_fmod = 1;
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 	--fs->lfs_nfiles;
 
 	VOP_UNLOCK(fs->lfs_ivnode, 0);
diff --git a/sys/ufs/lfs/lfs_balloc.c b/sys/ufs/lfs/lfs_balloc.c
index 796976856d56..c9f745f652dd 100644
--- a/sys/ufs/lfs/lfs_balloc.c
+++ b/sys/ufs/lfs/lfs_balloc.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: lfs_balloc.c,v 1.63 2007/10/08 18:01:29 ad Exp $	*/
+/*	$NetBSD: lfs_balloc.c,v 1.64 2008/01/02 11:49:10 ad Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@@ -67,7 +67,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_balloc.c,v 1.63 2007/10/08 18:01:29 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_balloc.c,v 1.64 2008/01/02 11:49:10 ad Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_quota.h"
@@ -198,9 +198,9 @@ lfs_balloc(struct vnode *vp, off_t startoffset, int iosize, kauth_cred_t cred,
 					clrbuf(bp);
 			}
 			ip->i_lfs_effnblks += bb;
-			simple_lock(&fs->lfs_interlock);
+			mutex_enter(&lfs_lock);
 			fs->lfs_bfree -= bb;
-			simple_unlock(&fs->lfs_interlock);
+			mutex_exit(&lfs_lock);
 			ip->i_ffs1_db[lbn] = UNWRITTEN;
 		} else {
 			if (nsize <= osize) {
@@ -242,9 +242,9 @@ lfs_balloc(struct vnode *vp, off_t startoffset, int iosize, kauth_cred_t cred,
 		}
 	}
 	if (ISSPACE(fs, bcount, cred)) {
-		simple_lock(&fs->lfs_interlock);
+		mutex_enter(&lfs_lock);
 		fs->lfs_bfree -= bcount;
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 		ip->i_lfs_effnblks += bcount;
 	} else {
 		return ENOSPC;
@@ -266,7 +266,7 @@ lfs_balloc(struct vnode *vp, off_t startoffset, int iosize, kauth_cred_t cred,
 				if (!indirs[i].in_exists) {
 					clrbuf(ibp);
 					ibp->b_blkno = UNWRITTEN;
-				} else if (!(ibp->b_flags & (B_DELWRI | B_DONE))) {
+				} else if (!(ibp->b_oflags & (BO_DELWRI | BO_DONE))) {
 					ibp->b_blkno = fsbtodb(fs, idaddr);
 					ibp->b_flags |= B_READ;
 					VOP_STRATEGY(vp, ibp);
@@ -351,7 +351,7 @@ lfs_balloc(struct vnode *vp, off_t startoffset, int iosize, kauth_cred_t cred,
 #endif
 			VOP_BWRITE(ibp);
 		}
-	} else if (bpp && !(bp->b_flags & (B_DONE|B_DELWRI))) {
+	} else if (bpp && !(bp->b_oflags & (BO_DONE|BO_DELWRI))) {
 		/*
 		 * Not a brand new block, also not in the cache;
 		 * read it in from disk.
@@ -434,7 +434,7 @@ lfs_fragextend(struct vnode *vp, int osize, int nsize, daddr_t lbn, struct buf *
 	 * release both and start over after waiting.
 	 */
 
-	if (bpp && ((*bpp)->b_flags & B_DELWRI)) {
+	if (bpp && ((*bpp)->b_oflags & BO_DELWRI)) {
 		if (!lfs_fits(fs, bb)) {
 			if (bpp)
 				brelse(*bpp, 0);
@@ -448,9 +448,9 @@ lfs_fragextend(struct vnode *vp, int osize, int nsize, daddr_t lbn, struct buf *
 		fs->lfs_avail -= bb;
 	}
 
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	fs->lfs_bfree -= bb;
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 	ip->i_lfs_effnblks += bb;
 	ip->i_flag |= IN_CHANGE | IN_UPDATE;
 
@@ -459,10 +459,11 @@ lfs_fragextend(struct vnode *vp, int osize, int nsize, daddr_t lbn, struct buf *
 		allocbuf(*bpp, nsize, 1);
 
 		/* Adjust locked-list accounting */
-		if (((*bpp)->b_flags & (B_LOCKED | B_CALL)) == B_LOCKED) {
-			simple_lock(&lfs_subsys_lock);
+		if (((*bpp)->b_cflags & BC_LOCKED) != 0 &&
+		    (*bpp)->b_iodone == NULL) {
+			mutex_enter(&lfs_lock);
 			locked_queue_bytes += (*bpp)->b_bufsize - obufsize;
-			simple_unlock(&lfs_subsys_lock);
+			mutex_exit(&lfs_lock);
 		}
 
 		bzero((char *)((*bpp)->b_data) + osize, (u_int)(nsize - osize));
@@ -512,21 +513,20 @@ lfs_register_block(struct vnode *vp, daddr_t lbn)
 
 	lbp = (struct lbnentry *)pool_get(&lfs_lbnentry_pool, PR_WAITOK);
 	lbp->lbn = lbn;
+	mutex_enter(&lfs_lock);
 	if (SPLAY_INSERT(lfs_splay, &ip->i_lfs_lbtree, lbp) != NULL) {
+		mutex_exit(&lfs_lock);
 		/* Already there */
 		pool_put(&lfs_lbnentry_pool, lbp);
 		return;
 	}
 
 	++ip->i_lfs_nbtree;
-	simple_lock(&fs->lfs_interlock);
 	fs->lfs_favail += btofsb(fs, (1 << fs->lfs_bshift));
 	fs->lfs_pages += fs->lfs_bsize >> PAGE_SHIFT;
-	simple_lock(&lfs_subsys_lock);
 	++locked_fakequeue_count;
 	lfs_subsys_pages += fs->lfs_bsize >> PAGE_SHIFT;
-	simple_unlock(&lfs_subsys_lock);
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 }
 
 static void
@@ -534,19 +534,18 @@ lfs_do_deregister(struct lfs *fs, struct inode *ip, struct lbnentry *lbp)
 {
 	ASSERT_MAYBE_SEGLOCK(fs);
 
+	mutex_enter(&lfs_lock);
 	--ip->i_lfs_nbtree;
 	SPLAY_REMOVE(lfs_splay, &ip->i_lfs_lbtree, lbp);
-	pool_put(&lfs_lbnentry_pool, lbp);
-	simple_lock(&fs->lfs_interlock);
 	if (fs->lfs_favail > btofsb(fs, (1 << fs->lfs_bshift)))
 		fs->lfs_favail -= btofsb(fs, (1 << fs->lfs_bshift));
 	fs->lfs_pages -= fs->lfs_bsize >> PAGE_SHIFT;
-	simple_lock(&lfs_subsys_lock);
 	if (locked_fakequeue_count > 0)
 		--locked_fakequeue_count;
 	lfs_subsys_pages -= fs->lfs_bsize >> PAGE_SHIFT;
-	simple_unlock(&lfs_subsys_lock);
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
+
+	pool_put(&lfs_lbnentry_pool, lbp);
 }
 
 void
diff --git a/sys/ufs/lfs/lfs_bio.c b/sys/ufs/lfs/lfs_bio.c
index 3ae40e4d8204..4d9ebab01c22 100644
--- a/sys/ufs/lfs/lfs_bio.c
+++ b/sys/ufs/lfs/lfs_bio.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: lfs_bio.c,v 1.106 2007/10/11 19:53:43 ad Exp $	*/
+/*	$NetBSD: lfs_bio.c,v 1.107 2008/01/02 11:49:10 ad Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@@ -67,7 +67,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.106 2007/10/11 19:53:43 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.107 2008/01/02 11:49:10 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -95,7 +95,7 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.106 2007/10/11 19:53:43 ad Exp $");
  * No write cost accounting is done.
  * This is almost certainly wrong for synchronous operations and NFS.
  *
- * protected by lfs_subsys_lock.
+ * protected by lfs_lock.
  */
 int	locked_queue_count   = 0;	/* Count of locked-down buffers. */
 long	locked_queue_bytes   = 0L;	/* Total size of locked buffers. */
@@ -103,8 +103,11 @@ int	lfs_subsys_pages     = 0L;	/* Total number LFS-written pages */
 int	lfs_fs_pagetrip	     = 0;	/* # of pages to trip per-fs write */
 int	lfs_writing	     = 0;	/* Set if already kicked off a writer
 					   because of buffer space */
-/* Lock for aboves */
-struct simplelock lfs_subsys_lock = SIMPLELOCK_INITIALIZER;
+
+/* Lock and condition variables for above. */
+kcondvar_t	locked_queue_cv;
+kcondvar_t	lfs_writing_cv;
+kmutex_t	lfs_lock;
 
 extern int lfs_dostats;
 
@@ -125,7 +128,7 @@ lfs_fits_buf(struct lfs *fs, int n, int bytes)
 	int count_fit, bytes_fit;
 
 	ASSERT_NO_SEGLOCK(fs);
-	LOCK_ASSERT(simple_lock_held(&lfs_subsys_lock));
+	KASSERT(mutex_owned(&lfs_lock));
 
 	count_fit =
 	    (locked_queue_count + locked_queue_rcount + n < LFS_WAIT_BUFS);
@@ -157,16 +160,16 @@ lfs_reservebuf(struct lfs *fs, struct vnode *vp,
 	KASSERT(locked_queue_rcount >= 0);
 	KASSERT(locked_queue_rbytes >= 0);
 
-	simple_lock(&lfs_subsys_lock);
+	mutex_enter(&lfs_lock);
 	while (n > 0 && !lfs_fits_buf(fs, n, bytes)) {
 		int error;
 
 		lfs_flush(fs, 0, 0);
 
-		error = ltsleep(&locked_queue_count, PCATCH | PUSER,
-		    "lfsresbuf", hz * LFS_BUFWAIT, &lfs_subsys_lock);
+		error = cv_timedwait_sig(&locked_queue_cv, &lfs_lock,
+		    hz * LFS_BUFWAIT);
 		if (error && error != EWOULDBLOCK) {
-			simple_unlock(&lfs_subsys_lock);
+			mutex_exit(&lfs_lock);
 			return error;
 		}
 	}
@@ -174,7 +177,7 @@ lfs_reservebuf(struct lfs *fs, struct vnode *vp,
 	locked_queue_rcount += n;
 	locked_queue_rbytes += bytes;
 
-	simple_unlock(&lfs_subsys_lock);
+	mutex_exit(&lfs_lock);
 
 	KASSERT(locked_queue_rcount >= 0);
 	KASSERT(locked_queue_rbytes >= 0);
@@ -208,9 +211,9 @@ lfs_reserveavail(struct lfs *fs, struct vnode *vp,
 
 	ASSERT_MAYBE_SEGLOCK(fs);
 	slept = 0;
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	while (fsb > 0 && !lfs_fits(fs, fsb + fs->lfs_ravail + fs->lfs_favail)) {
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 #if 0
 		/*
 		 * XXX ideally, we should unlock vnodes here
@@ -241,19 +244,19 @@ lfs_reserveavail(struct lfs *fs, struct vnode *vp,
 		LFS_SYNC_CLEANERINFO(cip, fs, bp, 0);
 		lfs_wakeup_cleaner(fs);
 
-		simple_lock(&fs->lfs_interlock);
+		mutex_enter(&lfs_lock);
 		/* Cleaner might have run while we were reading, check again */
 		if (lfs_fits(fs, fsb + fs->lfs_ravail + fs->lfs_favail))
 			break;
 
-		error = ltsleep(&fs->lfs_avail, PCATCH | PUSER, "lfs_reserve",
-				0, &fs->lfs_interlock);
+		error = mtsleep(&fs->lfs_avail, PCATCH | PUSER, "lfs_reserve",
+				0, &lfs_lock);
 #if 0
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX use lockstatus */
 		vn_lock(vp2, LK_EXCLUSIVE | LK_RETRY); /* XXX use lockstatus */
 #endif
 		if (error) {
-			simple_unlock(&fs->lfs_interlock);
+			mutex_exit(&lfs_lock);
 			return error;
 		}
 	}
@@ -263,7 +266,7 @@ lfs_reserveavail(struct lfs *fs, struct vnode *vp,
 	}
 #endif
 	fs->lfs_ravail += fsb;
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 
 	return 0;
 }
@@ -282,12 +285,12 @@ lfs_reserve(struct lfs *fs, struct vnode *vp, struct vnode *vp2, int fsb)
 	ASSERT_MAYBE_SEGLOCK(fs);
 	if (vp2) {
 		/* Make sure we're not in the process of reclaiming vp2 */
-		simple_lock(&fs->lfs_interlock);
+		mutex_enter(&lfs_lock);
 		while(fs->lfs_flags & LFS_UNDIROP) {
-			ltsleep(&fs->lfs_flags, PRIBIO + 1, "lfsrundirop", 0,
-			    &fs->lfs_interlock);
+			mtsleep(&fs->lfs_flags, PRIBIO + 1, "lfsrundirop", 0,
+			    &lfs_lock);
 		}
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 	}
 
 	KASSERT(fsb < 0 || VOP_ISLOCKED(vp));
@@ -322,8 +325,10 @@ lfs_reserve(struct lfs *fs, struct vnode *vp, struct vnode *vp2, int fsb)
 	 * vref vnodes here so that cleaner doesn't try to reuse them.
 	 * (see XXX comment in lfs_reserveavail)
 	 */
+	mutex_enter(&vp->v_interlock);
 	lfs_vref(vp);
 	if (vp2 != NULL) {
+		mutex_enter(&vp2->v_interlock);
 		lfs_vref(vp2);
 	}
 
@@ -401,13 +406,13 @@ lfs_availwait(struct lfs *fs, int fsb)
 
 	ASSERT_NO_SEGLOCK(fs);
 	/* Push cleaner blocks through regardless */
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	if (LFS_SEGLOCK_HELD(fs) &&
 	    fs->lfs_sp->seg_flags & (SEGM_CLEAN | SEGM_FORCE_CKP)) {
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 		return 0;
 	}
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 
 	while (!lfs_fits(fs, fsb)) {
 		/*
@@ -441,15 +446,17 @@ lfs_bwrite_ext(struct buf *bp, int flags)
 {
 	struct lfs *fs;
 	struct inode *ip;
-	int fsb, s;
+	struct vnode *vp;
+	int fsb;
 
-	fs = VFSTOUFS(bp->b_vp->v_mount)->um_lfs;
+	vp = bp->b_vp;
+	fs = VFSTOUFS(vp->v_mount)->um_lfs;
 
 	ASSERT_MAYBE_SEGLOCK(fs);
-	KASSERT(bp->b_flags & B_BUSY);
+	KASSERT(bp->b_cflags & BC_BUSY);
 	KASSERT(flags & BW_CLEAN || !LFS_IS_MALLOC_BUF(bp));
-	KASSERT((bp->b_flags & (B_DELWRI|B_LOCKED)) != B_DELWRI);
-	KASSERT((bp->b_flags & (B_DELWRI|B_LOCKED)) != B_LOCKED);
+	KASSERT(((bp->b_oflags | bp->b_cflags) & (BO_DELWRI|BC_LOCKED))
+	    != BO_DELWRI);
 
 	/*
 	 * Don't write *any* blocks if we're mounted read-only, or
@@ -458,13 +465,16 @@ lfs_bwrite_ext(struct buf *bp, int flags)
 	 * In particular the cleaner can't write blocks either.
 	 */
 	if (fs->lfs_ronly || (fs->lfs_pflags & LFS_PF_CLEAN)) {
-		bp->b_flags &= ~(B_DELWRI | B_READ);
+		bp->b_oflags &= ~BO_DELWRI;
+		bp->b_flags |= B_READ;
 		bp->b_error = 0;
+		mutex_enter(&bufcache_lock);
 		LFS_UNLOCK_BUF(bp);
 		if (LFS_IS_MALLOC_BUF(bp))
-			bp->b_flags &= ~B_BUSY;
+			bp->b_cflags &= ~BC_BUSY;
 		else
-			brelse(bp, 0);
+			brelsel(bp, 0);
+		mutex_exit(&bufcache_lock);
 		return (fs->lfs_ronly ? EROFS : 0);
 	}
 
@@ -472,7 +482,7 @@ lfs_bwrite_ext(struct buf *bp, int flags)
 	 * Set the delayed write flag and use reassignbuf to move the buffer
 	 * from the clean list to the dirty one.
 	 *
-	 * Set the B_LOCKED flag and unlock the buffer, causing brelse to move
+	 * Set the BC_LOCKED flag and unlock the buffer, causing brelse to move
 	 * the buffer onto the LOCKED free list.  This is necessary, otherwise
 	 * getnewbuf() would try to reclaim the buffers using bawrite, which
 	 * isn't going to work.
@@ -482,56 +492,59 @@ lfs_bwrite_ext(struct buf *bp, int flags)
 	 * enough space reserved so that there's room to write meta-data
 	 * blocks.
 	 */
-	if (!(bp->b_flags & B_LOCKED)) {
+	if ((bp->b_cflags & BC_LOCKED) == 0) {
 		fsb = fragstofsb(fs, numfrags(fs, bp->b_bcount));
 
-		ip = VTOI(bp->b_vp);
+		ip = VTOI(vp);
+		mutex_enter(&lfs_lock);
 		if (flags & BW_CLEAN) {
 			LFS_SET_UINO(ip, IN_CLEANING);
 		} else {
 			LFS_SET_UINO(ip, IN_MODIFIED);
 		}
+		mutex_exit(&lfs_lock);
 		fs->lfs_avail -= fsb;
-		bp->b_flags |= B_DELWRI;
 
+		mutex_enter(&bufcache_lock);
+		mutex_enter(&vp->v_interlock);
+		bp->b_oflags = (bp->b_oflags | BO_DELWRI) & ~BO_DONE;
 		LFS_LOCK_BUF(bp);
-		bp->b_flags &= ~(B_READ | B_DONE);
+		bp->b_flags &= ~B_READ;
 		bp->b_error = 0;
-		s = splbio();
 		reassignbuf(bp, bp->b_vp);
-		splx(s);
+		mutex_exit(&vp->v_interlock);
+	} else {
+		mutex_enter(&bufcache_lock);
 	}
 
-	if (bp->b_flags & B_CALL)
-		bp->b_flags &= ~B_BUSY;
+	if (bp->b_iodone != NULL)
+		bp->b_cflags &= ~BC_BUSY;
 	else
-		brelse(bp, 0);
+		brelsel(bp, 0);
+	mutex_exit(&bufcache_lock);
 
 	return (0);
 }
 
 /*
- * Called and return with the lfs_interlock held, but no other simple_locks
- * held.
+ * Called and return with the lfs_lock held.
  */
 void
 lfs_flush_fs(struct lfs *fs, int flags)
 {
 	ASSERT_NO_SEGLOCK(fs);
-	LOCK_ASSERT(simple_lock_held(&fs->lfs_interlock));
+	KASSERT(mutex_owned(&lfs_lock));
 	if (fs->lfs_ronly)
 		return;
 
-	simple_lock(&lfs_subsys_lock);
 	if (lfs_dostats)
 		++lfs_stats.flush_invoked;
-	simple_unlock(&lfs_subsys_lock);
 
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 	lfs_writer_enter(fs, "fldirop");
 	lfs_segwrite(fs->lfs_ivnode->v_mount, flags);
 	lfs_writer_leave(fs);
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	fs->lfs_favail = 0; /* XXX */
 }
 
@@ -542,7 +555,7 @@ lfs_flush_fs(struct lfs *fs, int flags)
  * XXX We have one static count of locked buffers;
  * XXX need to think more about the multiple filesystem case.
  *
- * Called and return with lfs_subsys_lock held.
+ * Called and return with lfs_lock held.
  * If fs != NULL, we hold the segment lock for fs.
  */
 void
@@ -552,7 +565,7 @@ lfs_flush(struct lfs *fs, int flags, int only_onefs)
 	struct mount *mp, *nmp;
 	struct lfs *tfs;
 
-	LOCK_ASSERT(simple_lock_held(&lfs_subsys_lock));
+	KASSERT(mutex_owned(&lfs_lock));
 	KDASSERT(fs == NULL || !LFS_SEGLOCK_HELD(fs));
 
 	if (lfs_dostats)
@@ -563,20 +576,19 @@ lfs_flush(struct lfs *fs, int flags, int only_onefs)
 		return;
 	}
 	while (lfs_writing)
-		ltsleep(&lfs_writing, PRIBIO + 1, "lfsflush", 0,
-			&lfs_subsys_lock);
+		cv_wait(&lfs_writing_cv, &lfs_lock);
 	lfs_writing = 1;
 
-	simple_unlock(&lfs_subsys_lock);
+	mutex_exit(&lfs_lock);
 
 	if (only_onefs) {
 		KASSERT(fs != NULL);
 		if (vfs_busy(fs->lfs_ivnode->v_mount, LK_NOWAIT,
 			     &mountlist_lock))
 			goto errout;
-		simple_lock(&fs->lfs_interlock);
+		mutex_enter(&lfs_lock);
 		lfs_flush_fs(fs, flags);
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 		vfs_unbusy(fs->lfs_ivnode->v_mount);
 	} else {
 		locked_fakequeue_count = 0;
@@ -588,12 +600,12 @@ lfs_flush(struct lfs *fs, int flags, int only_onefs)
 				nmp = CIRCLEQ_NEXT(mp, mnt_list);
 				continue;
 			}
-			if (strncmp(mp->mnt_stat.f_fstypename, MOUNT_LFS,
+			if (strncmp(&mp->mnt_stat.f_fstypename[0], MOUNT_LFS,
 			    sizeof(mp->mnt_stat.f_fstypename)) == 0) {
 				tfs = VFSTOUFS(mp)->um_lfs;
-				simple_lock(&tfs->lfs_interlock);
+				mutex_enter(&lfs_lock);
 				lfs_flush_fs(tfs, flags);
-				simple_unlock(&tfs->lfs_interlock);
+				mutex_exit(&lfs_lock);
 			}
 			mutex_enter(&mountlist_lock);
 			nmp = CIRCLEQ_NEXT(mp, mnt_list);
@@ -605,7 +617,7 @@ lfs_flush(struct lfs *fs, int flags, int only_onefs)
 	wakeup(&lfs_subsys_pages);
 
     errout:
-	simple_lock(&lfs_subsys_lock);
+	mutex_enter(&lfs_lock);
 	KASSERT(lfs_writing);
 	lfs_writing = 0;
 	wakeup(&lfs_writing);
@@ -645,8 +657,7 @@ lfs_check(struct vnode *vp, daddr_t blkno, int flags)
 	 * If we would flush below, but dirops are active, sleep.
 	 * Note that a dirop cannot ever reach this code!
 	 */
-	simple_lock(&fs->lfs_interlock);
-	simple_lock(&lfs_subsys_lock);
+	mutex_enter(&lfs_lock);
 	while (fs->lfs_dirops > 0 &&
 	       (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS ||
 		locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES ||
@@ -654,12 +665,10 @@ lfs_check(struct vnode *vp, daddr_t blkno, int flags)
 		fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) ||
 		lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0))
 	{
-		simple_unlock(&lfs_subsys_lock);
 		++fs->lfs_diropwait;
-		ltsleep(&fs->lfs_writer, PRIBIO+1, "bufdirop", 0,
-			&fs->lfs_interlock);
+		mtsleep(&fs->lfs_writer, PRIBIO+1, "bufdirop", 0,
+			&lfs_lock);
 		--fs->lfs_diropwait;
-		simple_lock(&lfs_subsys_lock);
 	}
 
 #ifdef DEBUG
@@ -697,7 +706,6 @@ lfs_check(struct vnode *vp, daddr_t blkno, int flags)
 	    lfs_subsys_pages > LFS_MAX_PAGES ||
 	    fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) ||
 	    lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0) {
-		simple_unlock(&fs->lfs_interlock);
 		lfs_flush(fs, flags, 0);
 	} else if (lfs_fs_pagetrip && fs->lfs_pages > lfs_fs_pagetrip) {
 		/*
@@ -706,9 +714,7 @@ lfs_check(struct vnode *vp, daddr_t blkno, int flags)
 		 */
 		++fs->lfs_pdflush;
 		wakeup(&lfs_writer_daemon);
-		simple_unlock(&fs->lfs_interlock);
-	} else
-		simple_unlock(&fs->lfs_interlock);
+	}
 
 	while (locked_queue_count + INOCOUNT(fs) > LFS_WAIT_BUFS ||
 		locked_queue_bytes + INOBYTES(fs) > LFS_WAIT_BYTES ||
@@ -720,8 +726,8 @@ lfs_check(struct vnode *vp, daddr_t blkno, int flags)
 			++lfs_stats.wait_exceeded;
 		DLOG((DLOG_AVAIL, "lfs_check: waiting: count=%d, bytes=%ld\n",
 		      locked_queue_count, locked_queue_bytes));
-		error = ltsleep(&locked_queue_count, PCATCH | PUSER,
-				"buffers", hz * LFS_BUFWAIT, &lfs_subsys_lock);
+		error = cv_timedwait_sig(&locked_queue_cv, &lfs_lock,
+		    hz * LFS_BUFWAIT);
 		if (error != EWOULDBLOCK)
 			break;
 
@@ -736,7 +742,7 @@ lfs_check(struct vnode *vp, daddr_t blkno, int flags)
 			lfs_flush(fs, flags | SEGM_CKP, 0);
 		}
 	}
-	simple_unlock(&lfs_subsys_lock);
+	mutex_exit(&lfs_lock);
 	return (error);
 }
 
@@ -748,12 +754,11 @@ lfs_newbuf(struct lfs *fs, struct vnode *vp, daddr_t daddr, size_t size, int typ
 {
 	struct buf *bp;
 	size_t nbytes;
-	int s;
 
 	ASSERT_MAYBE_SEGLOCK(fs);
 	nbytes = roundup(size, fsbtob(fs, 1));
 
-	bp = getiobuf();
+	bp = getiobuf(NULL, true);
 	if (nbytes) {
 		bp->b_data = lfs_malloc(fs, nbytes, type);
 		/* memset(bp->b_data, 0, nbytes); */
@@ -764,10 +769,6 @@ lfs_newbuf(struct lfs *fs, struct vnode *vp, daddr_t daddr, size_t size, int typ
 	if (bp == NULL)
 		panic("bp is NULL after malloc in lfs_newbuf");
 #endif
-	bp->b_vp = NULL;
-	s = splbio();
-	bgetvp(vp, bp);
-	splx(s);
 
 	bp->b_bufsize = size;
 	bp->b_bcount = size;
@@ -776,25 +777,34 @@ lfs_newbuf(struct lfs *fs, struct vnode *vp, daddr_t daddr, size_t size, int typ
 	bp->b_error = 0;
 	bp->b_resid = 0;
 	bp->b_iodone = lfs_callback;
-	bp->b_flags = B_BUSY | B_CALL | B_NOCACHE;
+	bp->b_cflags = BC_BUSY | BC_NOCACHE;
 	bp->b_private = fs;
 
+	mutex_enter(&bufcache_lock);
+	mutex_enter(&vp->v_interlock);
+	bgetvp(vp, bp);
+	mutex_exit(&vp->v_interlock);
+	mutex_exit(&bufcache_lock);
+
 	return (bp);
 }
 
 void
 lfs_freebuf(struct lfs *fs, struct buf *bp)
 {
-	int s;
+	struct vnode *vp;
 
-	s = splbio();
-	if (bp->b_vp)
+	if ((vp = bp->b_vp) != NULL) {
+		mutex_enter(&bufcache_lock);
+		mutex_enter(&vp->v_interlock);
 		brelvp(bp);
-	if (!(bp->b_flags & B_INVAL)) { /* B_INVAL indicates a "fake" buffer */
+		mutex_exit(&vp->v_interlock);
+		mutex_exit(&bufcache_lock);
+	}
+	if (!(bp->b_cflags & BC_INVAL)) { /* BC_INVAL indicates a "fake" buffer */
 		lfs_free(fs, bp->b_data, LFS_NB_UNKNOWN);
 		bp->b_data = NULL;
 	}
-	splx(s);
 	putiobuf(bp);
 }
 
@@ -809,7 +819,6 @@ lfs_freebuf(struct lfs *fs, struct buf *bp)
 #define BQ_EMPTY	3		/* buffer headers with no memory */
 
 extern TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES];
-extern struct simplelock bqueue_slock;
 
 /*
  * Count buffers on the "locked" queue, and compare it to a pro-forma count.
@@ -821,12 +830,10 @@ lfs_countlocked(int *count, long *bytes, const char *msg)
 	struct buf *bp;
 	int n = 0;
 	long int size = 0L;
-	int s;
 
-	s = splbio();
-	simple_lock(&bqueue_slock);
+	mutex_enter(&bufcache_lock);
 	TAILQ_FOREACH(bp, &bufqueues[BQ_LOCKED], b_freelist) {
-		KASSERT(!(bp->b_flags & B_CALL));
+		KASSERT(bp->b_iodone == NULL);
 		n++;
 		size += bp->b_bufsize;
 #ifdef DIAGNOSTIC
@@ -849,8 +856,7 @@ lfs_countlocked(int *count, long *bytes, const char *msg)
 	}
 	*count = n;
 	*bytes = size;
-	simple_unlock(&bqueue_slock);
-	splx(s);
+	mutex_exit(&bufcache_lock);
 	return;
 }
 
diff --git a/sys/ufs/lfs/lfs_debug.c b/sys/ufs/lfs/lfs_debug.c
index 0787ee670551..8e86e14fe609 100644
--- a/sys/ufs/lfs/lfs_debug.c
+++ b/sys/ufs/lfs/lfs_debug.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: lfs_debug.c,v 1.35 2007/12/12 03:49:03 lukem Exp $	*/
+/*	$NetBSD: lfs_debug.c,v 1.36 2008/01/02 11:49:10 ad Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@@ -67,7 +67,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_debug.c,v 1.35 2007/12/12 03:49:03 lukem Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_debug.c,v 1.36 2008/01/02 11:49:10 ad Exp $");
 
 #ifdef DEBUG
 
@@ -95,7 +95,7 @@ int lfs_bwrite_log(struct buf *bp, const char *file, int line)
 	a.a_desc = VDESC(vop_bwrite);
 	a.a_bp = bp;
 
-	if (!(bp->b_flags & (B_DELWRI | B_GATHERED))) {
+	if (!(bp->b_flags & B_GATHERED) && !(bp->b_oflags & BO_DELWRI)) {
 		LFS_ENTER_LOG("write", file, line, bp->b_lblkno, bp->b_flags,
 			curproc->p_pid);
 	}
diff --git a/sys/ufs/lfs/lfs_extern.h b/sys/ufs/lfs/lfs_extern.h
index f691c9531fc3..c64762c606da 100644
--- a/sys/ufs/lfs/lfs_extern.h
+++ b/sys/ufs/lfs/lfs_extern.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: lfs_extern.h,v 1.93 2007/12/08 19:29:55 pooka Exp $	*/
+/*	$NetBSD: lfs_extern.h,v 1.94 2008/01/02 11:49:11 ad Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@@ -121,8 +121,10 @@ extern int locked_queue_count;
 extern long locked_queue_bytes;
 extern int lfs_subsys_pages;
 extern int lfs_dirvcount;
-extern struct simplelock lfs_subsys_lock;
+extern kmutex_t lfs_lock;
 extern int lfs_debug_log_subsys[];
+extern kcondvar_t lfs_writing_cv;
+extern kcondvar_t locked_queue_cv;
 
 __BEGIN_DECLS
 /* lfs_alloc.c */
@@ -181,7 +183,7 @@ int lfs_vflush(struct vnode *);
 int lfs_segwrite(struct mount *, int);
 int lfs_writefile(struct lfs *, struct segment *, struct vnode *);
 int lfs_writeinode(struct lfs *, struct segment *, struct inode *);
-int lfs_gatherblock(struct segment *, struct buf *, int *);
+int lfs_gatherblock(struct segment *, struct buf *, kmutex_t *);
 int lfs_gather(struct lfs *, struct segment *, struct vnode *, int (*match )(struct lfs *, struct buf *));
 void lfs_update_single(struct lfs *, struct segment *, struct vnode *,
     daddr_t, int32_t, int);
diff --git a/sys/ufs/lfs/lfs_inode.c b/sys/ufs/lfs/lfs_inode.c
index a529be55f309..4c3bb3e8d44d 100644
--- a/sys/ufs/lfs/lfs_inode.c
+++ b/sys/ufs/lfs/lfs_inode.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: lfs_inode.c,v 1.115 2007/12/08 19:29:55 pooka Exp $	*/
+/*	$NetBSD: lfs_inode.c,v 1.116 2008/01/02 11:49:11 ad Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@@ -67,7 +67,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_inode.c,v 1.115 2007/12/08 19:29:55 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_inode.c,v 1.116 2008/01/02 11:49:11 ad Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_quota.h"
@@ -98,7 +98,7 @@ static int lfs_update_seguse(struct lfs *, struct inode *ip, long, size_t);
 static int lfs_indirtrunc (struct inode *, daddr_t, daddr_t,
 			   daddr_t, int, long *, long *, long *, size_t *);
 static int lfs_blkfree (struct lfs *, struct inode *, daddr_t, size_t, long *, size_t *);
-static int lfs_vtruncbuf(struct vnode *, daddr_t, int, int);
+static int lfs_vtruncbuf(struct vnode *, daddr_t, bool, int);
 
 /* Search a block for a specific dinode. */
 struct ufs1_dinode *
@@ -134,7 +134,6 @@ lfs_update(struct vnode *vp, const struct timespec *acc,
 {
 	struct inode *ip;
 	struct lfs *fs = VFSTOUFS(vp->v_mount)->um_lfs;
-	int s;
 	int flags;
 
 	ASSERT_NO_SEGLOCK(fs);
@@ -149,16 +148,14 @@ lfs_update(struct vnode *vp, const struct timespec *acc,
 	 * will cause a panic.	So, we must wait until any pending write
 	 * for our inode completes, if we are called with UPDATE_WAIT set.
 	 */
-	s = splbio();
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	while ((updflags & (UPDATE_WAIT|UPDATE_DIROP)) == UPDATE_WAIT &&
 	    WRITEINPROG(vp)) {
 		DLOG((DLOG_SEG, "lfs_update: sleeping on ino %d"
 		      " (in progress)\n", ip->i_number));
-		ltsleep(vp, (PRIBIO+1), "lfs_update", 0, &vp->v_interlock);
+		cv_wait(&vp->v_cv, &vp->v_interlock);
 	}
-	simple_unlock(&vp->v_interlock);
-	splx(s);
+	mutex_exit(&vp->v_interlock);
 	LFS_ITIMES(ip, acc, mod, NULL);
 	if (updflags & UPDATE_CLOSE)
 		flags = ip->i_flag & (IN_MODIFIED | IN_ACCESSED | IN_CLEANING);
@@ -170,24 +167,25 @@ lfs_update(struct vnode *vp, const struct timespec *acc,
 	/* If sync, push back the vnode and any dirty blocks it may have. */
 	if ((updflags & (UPDATE_WAIT|UPDATE_DIROP)) == UPDATE_WAIT) {
 		/* Avoid flushing VU_DIROP. */
-		simple_lock(&fs->lfs_interlock);
+		mutex_enter(&lfs_lock);
 		++fs->lfs_diropwait;
 		while (vp->v_uflag & VU_DIROP) {
 			DLOG((DLOG_DIROP, "lfs_update: sleeping on inode %d"
 			      " (dirops)\n", ip->i_number));
 			DLOG((DLOG_DIROP, "lfs_update: vflags 0x%x, iflags"
-			      " 0x%x\n", vp->v_uflag|vp->v_iflag|vp->v_vflag,
+			      " 0x%x\n",
+			      vp->v_iflag | vp->v_vflag | vp->v_uflag,
 			      ip->i_flag));
 			if (fs->lfs_dirops == 0)
 				lfs_flush_fs(fs, SEGM_SYNC);
 			else
-				ltsleep(&fs->lfs_writer, PRIBIO+1, "lfs_fsync",
-					0, &fs->lfs_interlock);
+				mtsleep(&fs->lfs_writer, PRIBIO+1, "lfs_fsync",
+					0, &lfs_lock);
 			/* XXX KS - by falling out here, are we writing the vn
 			twice? */
 		}
 		--fs->lfs_diropwait;
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 		return lfs_vflush(vp);
 	}
 	return 0;
@@ -285,7 +283,7 @@ lfs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred)
 				if (error)
 					return error;
 				if (ioflag & IO_SYNC) {
-					simple_lock(&ovp->v_interlock);
+					mutex_enter(&ovp->v_interlock);
 					VOP_PUTPAGES(ovp,
 					    trunc_page(osize & fs->lfs_bmask),
 					    round_page(eob),
@@ -364,12 +362,12 @@ lfs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred)
 			memset((char *)bp->b_data + offset, 0,
 			       (u_int)(size - offset));
 		allocbuf(bp, size, 1);
-		if ((bp->b_flags & (B_LOCKED | B_CALL)) == B_LOCKED) {
-			simple_lock(&lfs_subsys_lock);
+		if ((bp->b_cflags & BC_LOCKED) != 0 && bp->b_iodone == NULL) {
+			mutex_enter(&lfs_lock);
 			locked_queue_bytes -= obufsize - bp->b_bufsize;
-			simple_unlock(&lfs_subsys_lock);
+			mutex_exit(&lfs_lock);
 		}
-		if (bp->b_flags & B_DELWRI)
+		if (bp->b_oflags & BO_DELWRI)
 			fs->lfs_avail += odb - btofsb(fs, size);
 		(void) VOP_BWRITE(bp);
 	} else { /* vp->v_type == VREG && length < osize && offset != 0 */
@@ -399,7 +397,7 @@ lfs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred)
 		eoz = MIN(lblktosize(fs, xlbn) + size, osize);
 		uvm_vnp_zerorange(ovp, length, eoz - length);
 		if (round_page(eoz) > round_page(length)) {
-			simple_lock(&ovp->v_interlock);
+			mutex_enter(&ovp->v_interlock);
 			error = VOP_PUTPAGES(ovp, round_page(length),
 			    round_page(eoz),
 			    PGO_CLEANIT | PGO_DEACTIVATE |
@@ -446,7 +444,7 @@ lfs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred)
 		newblks[i] = 0;
 
 	oip->i_size = oip->i_ffs1_size = osize;
-	error = lfs_vtruncbuf(ovp, lastblock + 1, 0, 0);
+	error = lfs_vtruncbuf(ovp, lastblock + 1, false, 0);
 	if (error && !allerror)
 		allerror = error;
 
@@ -564,9 +562,9 @@ done:
 	oip->i_size = oip->i_ffs1_size = length;
 	oip->i_lfs_effnblks -= blocksreleased;
 	oip->i_ffs1_blocks -= real_released;
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	fs->lfs_bfree += blocksreleased;
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 #ifdef DIAGNOSTIC
 	if (oip->i_size == 0 &&
 	    (oip->i_ffs1_blocks != 0 || oip->i_lfs_effnblks != 0)) {
@@ -579,12 +577,12 @@ done:
 	/*
 	 * If we truncated to zero, take us off the paging queue.
 	 */
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	if (oip->i_size == 0 && oip->i_flags & IN_PAGING) {
 		oip->i_flags &= ~IN_PAGING;
 		TAILQ_REMOVE(&fs->lfs_pchainhd, oip, i_lfs_pchain);
 	}
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 
 	oip->i_flag |= IN_CHANGE;
 #ifdef QUOTA
@@ -736,7 +734,7 @@ lfs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn,
 	 */
 	vp = ITOV(ip);
 	bp = getblk(vp, lbn, (int)fs->lfs_bsize, 0, 0);
-	if (bp->b_flags & (B_DONE | B_DELWRI)) {
+	if (bp->b_oflags & (BO_DONE | BO_DELWRI)) {
 		/* Braces must be here in case trace evaluates to nothing. */
 		trace(TR_BREADHIT, pack(vp, fs->lfs_bsize), lbn);
 	} else {
@@ -812,12 +810,14 @@ lfs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn,
 	if (copy != NULL) {
 		lfs_free(fs, copy, LFS_NB_IBLOCK);
 	} else {
-		if (bp->b_flags & B_DELWRI) {
+		mutex_enter(&bufcache_lock);
+		if (bp->b_oflags & BO_DELWRI) {
 			LFS_UNLOCK_BUF(bp);
 			fs->lfs_avail += btofsb(fs, bp->b_bcount);
 			wakeup(&fs->lfs_avail);
 		}
-		brelse(bp, BC_INVAL);
+		brelsel(bp, BC_INVAL);
+		mutex_exit(&bufcache_lock);
 	}
 
 	*countp = blocksreleased;
@@ -832,77 +832,69 @@ lfs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn,
  * invalidating blocks.
  */
 static int
-lfs_vtruncbuf(struct vnode *vp, daddr_t lbn, int slpflag, int slptimeo)
+lfs_vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch, int slptimeo)
 {
 	struct buf *bp, *nbp;
-	int s, error;
+	int error;
 	struct lfs *fs;
 	voff_t off;
 
 	off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
 	if (error)
 		return error;
 
 	fs = VTOI(vp)->i_lfs;
-	s = splbio();
 
 	ASSERT_SEGLOCK(fs);
-restart:
+
+	mutex_enter(&bufcache_lock);
+restart:	
 	for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
 		nbp = LIST_NEXT(bp, b_vnbufs);
 		if (bp->b_lblkno < lbn)
 			continue;
-		simple_lock(&bp->b_interlock);
-		if (bp->b_flags & B_BUSY) {
-			bp->b_flags |= B_WANTED;
-			error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK,
-			    "lfs_vtruncbuf", slptimeo, &bp->b_interlock);
-			if (error) {
-				splx(s);
-				return (error);
-			}
+		error = bbusy(bp, catch, slptimeo);
+		if (error == EPASSTHROUGH)
 			goto restart;
+		if (error != 0) {
+			mutex_exit(&bufcache_lock);
+			return (error);
 		}
-		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
-		if (bp->b_flags & B_DELWRI) {
-			bp->b_flags &= ~B_DELWRI;
+		mutex_enter(bp->b_objlock);
+		if (bp->b_oflags & BO_DELWRI) {
+			bp->b_oflags &= ~BO_DELWRI;
 			fs->lfs_avail += btofsb(fs, bp->b_bcount);
 			wakeup(&fs->lfs_avail);
 		}
+		mutex_exit(bp->b_objlock);
 		LFS_UNLOCK_BUF(bp);
-		simple_unlock(&bp->b_interlock);
-		brelse(bp, 0);
+		brelsel(bp, BC_INVAL | BC_VFLUSH);
 	}
 
 	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 		nbp = LIST_NEXT(bp, b_vnbufs);
 		if (bp->b_lblkno < lbn)
 			continue;
-		simple_lock(&bp->b_interlock);
-		if (bp->b_flags & B_BUSY) {
-			bp->b_flags |= B_WANTED;
-			error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK,
-			    "lfs_vtruncbuf", slptimeo, &bp->b_interlock);
-			if (error) {
-				splx(s);
-				return (error);
-			}
+		error = bbusy(bp, catch, slptimeo);
+		if (error == EPASSTHROUGH)
 			goto restart;
+		if (error != 0) {
+			mutex_exit(&bufcache_lock);
+			return (error);
 		}
-		bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
-		if (bp->b_flags & B_DELWRI) {
-			bp->b_flags &= ~B_DELWRI;
+		mutex_enter(bp->b_objlock);
+		if (bp->b_oflags & BO_DELWRI) {
+			bp->b_oflags &= ~BO_DELWRI;
 			fs->lfs_avail += btofsb(fs, bp->b_bcount);
 			wakeup(&fs->lfs_avail);
 		}
+		mutex_exit(bp->b_objlock);
 		LFS_UNLOCK_BUF(bp);
-		simple_unlock(&bp->b_interlock);
-		brelse(bp, 0);
+		brelsel(bp, BC_INVAL | BC_VFLUSH);
 	}
-
-	splx(s);
+	mutex_exit(&bufcache_lock);
 
 	return (0);
 }
diff --git a/sys/ufs/lfs/lfs_itimes.c b/sys/ufs/lfs/lfs_itimes.c
index 7807a626239a..07fb28a1e326 100644
--- a/sys/ufs/lfs/lfs_itimes.c
+++ b/sys/ufs/lfs/lfs_itimes.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: lfs_itimes.c,v 1.10 2006/06/23 14:13:02 yamt Exp $	*/
+/*	$NetBSD: lfs_itimes.c,v 1.11 2008/01/02 11:49:11 ad Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@@ -36,7 +36,7 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_itimes.c,v 1.10 2006/06/23 14:13:02 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_itimes.c,v 1.11 2008/01/02 11:49:11 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/time.h>
@@ -87,11 +87,13 @@ lfs_itimes(struct inode *ip, const struct timespec *acc,
 			ifp->if_atime_sec = acc->tv_sec;
 			ifp->if_atime_nsec = acc->tv_nsec;
 			LFS_BWRITE_LOG(ibp);
-			simple_lock(&fs->lfs_interlock);
+			mutex_enter(&lfs_lock);
 			fs->lfs_flags |= LFS_IFDIRTY;
-			simple_unlock(&fs->lfs_interlock);
+			mutex_exit(&lfs_lock);
 		} else {
+			mutex_enter(&lfs_lock);
 			LFS_SET_UINO(ip, IN_ACCESSED);
+			mutex_exit(&lfs_lock);
 		}
 	}
 	if (ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFY)) {
@@ -112,10 +114,12 @@ lfs_itimes(struct inode *ip, const struct timespec *acc,
 			ip->i_ffs1_ctime = cre->tv_sec;
 			ip->i_ffs1_ctimensec = cre->tv_nsec;
 		}
+		mutex_enter(&lfs_lock);
 		if (ip->i_flag & (IN_CHANGE | IN_UPDATE))
 			LFS_SET_UINO(ip, IN_MODIFIED);
 		if (ip->i_flag & IN_MODIFY)
 			LFS_SET_UINO(ip, IN_ACCESSED);
+		mutex_exit(&lfs_lock);
 	}
 	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFY);
 }
diff --git a/sys/ufs/lfs/lfs_rfw.c b/sys/ufs/lfs/lfs_rfw.c
index b3fdb9417693..ba043928a853 100644
--- a/sys/ufs/lfs/lfs_rfw.c
+++ b/sys/ufs/lfs/lfs_rfw.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: lfs_rfw.c,v 1.8 2007/12/12 18:35:21 he Exp $	*/
+/*	$NetBSD: lfs_rfw.c,v 1.9 2008/01/02 11:49:11 ad Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_rfw.c,v 1.8 2007/12/12 18:35:21 he Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_rfw.c,v 1.9 2008/01/02 11:49:11 ad Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_quota.h"
@@ -199,15 +199,13 @@ lfs_rf_valloc(struct lfs *fs, ino_t ino, int vers, struct lwp *l,
 		lfs_unmark_vnode(vp);
 		(void)lfs_vunref(vp);
 		vp->v_uflag &= ~VU_DIROP;
-		simple_lock(&fs->lfs_interlock);
-		simple_lock(&lfs_subsys_lock);
+		mutex_enter(&lfs_lock);
 		--lfs_dirvcount;
-		simple_unlock(&lfs_subsys_lock);
 		--fs->lfs_dirvcount;
 		TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
 		wakeup(&lfs_dirvcount);
 		wakeup(&fs->lfs_dirvcount);
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 	}
 	*vpp = vp;
 	return error;
@@ -247,7 +245,7 @@ update_meta(struct lfs *fs, ino_t ino, int vers, daddr_t lbn,
 		return (error);
 	}
 	/* No need to write, the block is already on disk */
-	if (bp->b_flags & B_DELWRI) {
+	if (bp->b_oflags & BO_DELWRI) {
 		LFS_UNLOCK_BUF(bp);
 		fs->lfs_avail += btofsb(fs, bp->b_bcount);
 	}
@@ -570,12 +568,12 @@ check_segsum(struct lfs *fs, daddr_t offset, u_int64_t nextserial,
 	if (flags & CHECK_UPDATE) {
 		fs->lfs_avail -= (offset - oldoffset);
 		/* Don't clog the buffer queue */
-		simple_lock(&lfs_subsys_lock);
+		mutex_enter(&lfs_lock);
 		if (locked_queue_count > LFS_MAX_BUFS ||
 		    locked_queue_bytes > LFS_MAX_BYTES) {
 			lfs_flush(fs, SEGM_CKP, 0);
 		}
-		simple_unlock(&lfs_subsys_lock);
+		mutex_exit(&lfs_lock);
 	}
 
     err2:
diff --git a/sys/ufs/lfs/lfs_segment.c b/sys/ufs/lfs/lfs_segment.c
index 771be8a31c73..feee8059e78f 100644
--- a/sys/ufs/lfs/lfs_segment.c
+++ b/sys/ufs/lfs/lfs_segment.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: lfs_segment.c,v 1.206 2007/10/10 20:42:35 ad Exp $	*/
+/*	$NetBSD: lfs_segment.c,v 1.207 2008/01/02 11:49:11 ad Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@@ -67,7 +67,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_segment.c,v 1.206 2007/10/10 20:42:35 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_segment.c,v 1.207 2008/01/02 11:49:11 ad Exp $");
 
 #ifdef DEBUG
 # define vndebug(vp, str) do {						\
@@ -116,8 +116,7 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_segment.c,v 1.206 2007/10/10 20:42:35 ad Exp $")
 MALLOC_JUSTDEFINE(M_SEGMENT, "LFS segment", "Segment for LFS");
 
 extern int count_lock_queue(void);
-extern struct simplelock vnode_free_list_slock;		/* XXX */
-extern struct simplelock bqueue_slock;			/* XXX */
+extern kmutex_t vnode_free_list_lock;		/* XXX */
 
 static void lfs_generic_callback(struct buf *, void (*)(struct buf *));
 static void lfs_free_aiodone(struct buf *);
@@ -203,7 +202,7 @@ lfs_vflush(struct vnode *vp)
 	struct lfs *fs;
 	struct segment *sp;
 	struct buf *bp, *nbp, *tbp, *tnbp;
-	int error, s;
+	int error;
 	int flushed;
 	int relock;
 	int loopcount;
@@ -216,14 +215,16 @@ lfs_vflush(struct vnode *vp)
 	ASSERT_NO_SEGLOCK(fs);
 	if (ip->i_flag & IN_CLEANING) {
 		ivndebug(vp,"vflush/in_cleaning");
+		mutex_enter(&lfs_lock);
 		LFS_CLR_UINO(ip, IN_CLEANING);
 		LFS_SET_UINO(ip, IN_MODIFIED);
+		mutex_exit(&lfs_lock);
 
 		/*
 		 * Toss any cleaning buffers that have real counterparts
 		 * to avoid losing new data.
 		 */
-		s = splbio();
+		mutex_enter(&vp->v_interlock);
 		for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 			nbp = LIST_NEXT(bp, b_vnbufs);
 			if (!LFS_IS_MALLOC_BUF(bp))
@@ -239,7 +240,6 @@ lfs_vflush(struct vnode *vp)
 				struct vm_page *pg;
 				voff_t off;
 
-				simple_lock(&vp->v_interlock);
 				for (off = lblktosize(fs, bp->b_lblkno);
 				     off < lblktosize(fs, bp->b_lblkno + 1);
 				     off += PAGE_SIZE) {
@@ -251,13 +251,13 @@ lfs_vflush(struct vnode *vp)
 						fs->lfs_avail += btofsb(fs,
 							bp->b_bcount);
 						wakeup(&fs->lfs_avail);
+						mutex_exit(&vp->v_interlock);
 						lfs_freebuf(fs, bp);
+						mutex_enter(&vp->v_interlock);
 						bp = NULL;
-						simple_unlock(&vp->v_interlock);
-						goto nextbp;
+						break;
 					}
 				}
-				simple_unlock(&vp->v_interlock);
 			}
 			for (tbp = LIST_FIRST(&vp->v_dirtyblkhd); tbp;
 			    tbp = tnbp)
@@ -270,26 +270,24 @@ lfs_vflush(struct vnode *vp)
 					fs->lfs_avail += btofsb(fs,
 						bp->b_bcount);
 					wakeup(&fs->lfs_avail);
+					mutex_exit(&vp->v_interlock);
 					lfs_freebuf(fs, bp);
+					mutex_enter(&vp->v_interlock);
 					bp = NULL;
 					break;
 				}
 			}
-		    nextbp:
-			;
 		}
-		splx(s);
+	} else {
+		mutex_enter(&vp->v_interlock);
 	}
 
 	/* If the node is being written, wait until that is done */
-	simple_lock(&vp->v_interlock);
-	s = splbio();
-	if (WRITEINPROG(vp)) {
+	while (WRITEINPROG(vp)) {
 		ivndebug(vp,"vflush/writeinprog");
-		ltsleep(vp, (PRIBIO+1), "lfs_vw", 0, &vp->v_interlock);
+		cv_wait(&vp->v_cv, &vp->v_interlock);
 	}
-	splx(s);
-	simple_unlock(&vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
 
 	/* Protect against VI_XLOCK deadlock in vinvalbuf() */
 	lfs_seglock(fs, SEGM_SYNC);
@@ -298,40 +296,41 @@ lfs_vflush(struct vnode *vp)
 	if (ip->i_lfs_iflags & LFSI_DELETED) {
 		DLOG((DLOG_VNODE, "lfs_vflush: ino %d freed, not flushing\n",
 		      ip->i_number));
-		s = splbio();
 		/* Drain v_numoutput */
-		simple_lock(&global_v_numoutput_slock);
+		mutex_enter(&vp->v_interlock);
 		while (vp->v_numoutput > 0) {
-			vp->v_iflag |= VI_BWAIT;
-			ltsleep(&vp->v_numoutput, PRIBIO + 1, "lfs_vf4", 0,
-				&global_v_numoutput_slock);
+			cv_wait(&vp->v_cv, &vp->v_interlock);
 		}
-		simple_unlock(&global_v_numoutput_slock);
 		KASSERT(vp->v_numoutput == 0);
+		mutex_exit(&vp->v_interlock);
 	
+		mutex_enter(&bufcache_lock);
 		for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 			nbp = LIST_NEXT(bp, b_vnbufs);
 
 			KASSERT((bp->b_flags & B_GATHERED) == 0);
-			if (bp->b_flags & B_DELWRI) { /* XXX always true? */
+			if (bp->b_oflags & BO_DELWRI) { /* XXX always true? */
 				fs->lfs_avail += btofsb(fs, bp->b_bcount);
 				wakeup(&fs->lfs_avail);
 			}
 			/* Copied from lfs_writeseg */
-			if (bp->b_flags & B_CALL) {
+			if (bp->b_iodone != NULL) {
+				mutex_exit(&bufcache_lock);
 				biodone(bp);
+				mutex_enter(&bufcache_lock);
 			} else {
 				bremfree(bp);
 				LFS_UNLOCK_BUF(bp);
-				bp->b_flags &=
-				    ~(B_READ | B_DELWRI | B_GATHERED);
-				bp->b_flags |= B_DONE;
+				mutex_enter(&vp->v_interlock);
+				bp->b_flags &= ~(B_READ | B_GATHERED);
+				bp->b_oflags = (bp->b_oflags & ~BO_DELWRI) | BO_DONE;
 				bp->b_error = 0;
 				reassignbuf(bp, vp);
+				mutex_exit(&vp->v_interlock);
 				brelse(bp, 0);
 			}
 		}
-		splx(s);
+		mutex_exit(&bufcache_lock);
 		LFS_CLR_UINO(ip, IN_CLEANING);
 		LFS_CLR_UINO(ip, IN_MODIFIED | IN_ACCESSED);
 		ip->i_flag &= ~IN_ALLMOD;
@@ -352,18 +351,13 @@ lfs_vflush(struct vnode *vp)
 		lfs_segunlock(fs);
 
 		/* Make sure that any pending buffers get written */
-		s = splbio();
-		simple_lock(&global_v_numoutput_slock);
+		mutex_enter(&vp->v_interlock);
 		while (vp->v_numoutput > 0) {
-			vp->v_iflag |= VI_BWAIT;
-			ltsleep(&vp->v_numoutput, PRIBIO + 1, "lfs_vf3", 0,
-				&global_v_numoutput_slock);
+			cv_wait(&vp->v_cv, &vp->v_interlock);
 		}
-		simple_unlock(&global_v_numoutput_slock);
-		splx(s);
-	
 		KASSERT(LIST_FIRST(&vp->v_dirtyblkhd) == NULL);
 		KASSERT(vp->v_numoutput == 0);
+		mutex_exit(&vp->v_interlock);
 
 		return error;
 	}
@@ -408,7 +402,9 @@ lfs_vflush(struct vnode *vp)
 					 */
 					KDASSERT(ip->i_number != LFS_IFILE_INUM);
 					lfs_writeinode(fs, sp, ip);
+					mutex_enter(&lfs_lock);
 					LFS_SET_UINO(ip, IN_MODIFIED);
+					mutex_exit(&lfs_lock);
 					lfs_writeseg(fs, sp);
 					lfs_segunlock(fs);
 					lfs_segunlock_relock(fs);
@@ -456,29 +452,24 @@ lfs_vflush(struct vnode *vp)
 	 * We compare the iocount against 1, not 0, because it is
 	 * artificially incremented by lfs_seglock().
 	 */
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	if (fs->lfs_seglock > 1) {
 		while (fs->lfs_iocount > 1)
-			(void)ltsleep(&fs->lfs_iocount, PRIBIO + 1,
-				     "lfs_vflush", 0, &fs->lfs_interlock);
+			(void)mtsleep(&fs->lfs_iocount, PRIBIO + 1,
+				     "lfs_vflush", 0, &lfs_lock);
 	}
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 
 	lfs_segunlock(fs);
 
 	/* Wait for these buffers to be recovered by aiodoned */
-	s = splbio();
-	simple_lock(&global_v_numoutput_slock);
+	mutex_enter(&vp->v_interlock);
 	while (vp->v_numoutput > 0) {
-		vp->v_iflag |= VI_BWAIT;
-		ltsleep(&vp->v_numoutput, PRIBIO + 1, "lfs_vf2", 0,
-			&global_v_numoutput_slock);
+		cv_wait(&vp->v_cv, &vp->v_interlock);
 	}
-	simple_unlock(&global_v_numoutput_slock);
-	splx(s);
-
 	KASSERT(LIST_FIRST(&vp->v_dirtyblkhd) == NULL);
 	KASSERT(vp->v_numoutput == 0);
+	mutex_exit(&vp->v_interlock);
 
 	fs->lfs_flushvp = NULL;
 	KASSERT(fs->lfs_flushvp_fakevref == 0);
@@ -497,6 +488,7 @@ lfs_writevnodes(struct lfs *fs, struct mount *mp, struct segment *sp, int op)
 	ASSERT_SEGLOCK(fs);
  loop:
 	/* start at last (newest) vnode. */
+	mutex_enter(&mntvnode_lock);
 	TAILQ_FOREACH_REVERSE(vp, &mp->mnt_vnodelist, vnodelst, v_mntvnodes) {
 		/*
 		 * If the vnode that we are about to sync is no longer
@@ -509,11 +501,15 @@ lfs_writevnodes(struct lfs *fs, struct mount *mp, struct segment *sp, int op)
 			 * due to our own previous putpages.
 			 * Start actual segment write here to avoid deadlock.
 			 */
+			mutex_exit(&mntvnode_lock);
 			(void)lfs_writeseg(fs, sp);
 			goto loop;
 		}
 
-		if (vp->v_type == VNON) {
+		mutex_enter(&vp->v_interlock);
+		if (vp->v_type == VNON || vismarker(vp) ||
+		    (vp->v_iflag & VI_CLEAN) != 0) {
+			mutex_exit(&vp->v_interlock);
 			continue;
 		}
 
@@ -521,11 +517,13 @@ lfs_writevnodes(struct lfs *fs, struct mount *mp, struct segment *sp, int op)
 		if ((op == VN_DIROP && !(vp->v_uflag & VU_DIROP)) ||
 		    (op != VN_DIROP && op != VN_CLEAN &&
 		    (vp->v_uflag & VU_DIROP))) {
+			mutex_exit(&vp->v_interlock);
 			vndebug(vp,"dirop");
 			continue;
 		}
 
 		if (op == VN_EMPTY && !VPISEMPTY(vp)) {
+			mutex_exit(&vp->v_interlock);
 			vndebug(vp,"empty");
 			continue;
 		}
@@ -533,12 +531,15 @@ lfs_writevnodes(struct lfs *fs, struct mount *mp, struct segment *sp, int op)
 		if (op == VN_CLEAN && ip->i_number != LFS_IFILE_INUM
 		   && vp != fs->lfs_flushvp
 		   && !(ip->i_flag & IN_CLEANING)) {
+			mutex_exit(&vp->v_interlock);
 			vndebug(vp,"cleaning");
 			continue;
 		}
 
+		mutex_exit(&mntvnode_lock);
 		if (lfs_vref(vp)) {
 			vndebug(vp,"vref");
+			mutex_enter(&mntvnode_lock);
 			continue;
 		}
 
@@ -566,11 +567,16 @@ lfs_writevnodes(struct lfs *fs, struct mount *mp, struct segment *sp, int op)
 						lfs_writeseg(fs, sp);
 						if (!VPISEMPTY(vp) &&
 						    !WRITEINPROG(vp) &&
-						    !(ip->i_flag & IN_ALLMOD))
+						    !(ip->i_flag & IN_ALLMOD)) {
+							mutex_enter(&lfs_lock);
 							LFS_SET_UINO(ip, IN_MODIFIED);
+							mutex_exit(&lfs_lock);
+						}
+						mutex_enter(&mntvnode_lock);
 						break;
 					}
 					error = 0; /* XXX not quite right */
+					mutex_enter(&mntvnode_lock);
 					continue;
 				}
 				
@@ -578,7 +584,9 @@ lfs_writevnodes(struct lfs *fs, struct mount *mp, struct segment *sp, int op)
 					if (WRITEINPROG(vp)) {
 						ivndebug(vp,"writevnodes/write2");
 					} else if (!(ip->i_flag & IN_ALLMOD)) {
+						mutex_enter(&lfs_lock);
 						LFS_SET_UINO(ip, IN_MODIFIED);
+						mutex_exit(&lfs_lock);
 					}
 				}
 				(void) lfs_writeinode(fs, sp, ip);
@@ -590,7 +598,10 @@ lfs_writevnodes(struct lfs *fs, struct mount *mp, struct segment *sp, int op)
 			lfs_vunref_head(vp);
 		else
 			lfs_vunref(vp);
+
+		mutex_enter(&mntvnode_lock);
 	}
+	mutex_exit(&mntvnode_lock);
 	return error;
 }
 
@@ -606,7 +617,7 @@ lfs_segwrite(struct mount *mp, int flags)
 	struct segment *sp;
 	struct vnode *vp;
 	SEGUSE *segusep;
-	int do_ckp, did_ckp, error, s;
+	int do_ckp, did_ckp, error;
 	unsigned n, segleft, maxseg, sn, i, curseg;
 	int writer_set = 0;
 	int dirty;
@@ -709,7 +720,7 @@ lfs_segwrite(struct mount *mp, int flags)
 		}
 	}
 
-	LOCK_ASSERT(LFS_SEGLOCK_HELD(fs));
+	KASSERT(LFS_SEGLOCK_HELD(fs));
 
 	did_ckp = 0;
 	if (do_ckp || fs->lfs_doifile) {
@@ -720,9 +731,9 @@ lfs_segwrite(struct mount *mp, int flags)
 #ifdef DEBUG
 			LFS_ENTER_LOG("pretend", __FILE__, __LINE__, 0, 0, curproc->p_pid);
 #endif
-			simple_lock(&fs->lfs_interlock);
+			mutex_enter(&lfs_lock);
 			fs->lfs_flags &= ~LFS_IFDIRTY;
-			simple_unlock(&fs->lfs_interlock);
+			mutex_exit(&lfs_lock);
 
 			ip = VTOI(vp);
 
@@ -748,9 +759,9 @@ lfs_segwrite(struct mount *mp, int flags)
 			redo = lfs_writeinode(fs, sp, ip);
 #endif
 			redo += lfs_writeseg(fs, sp);
-			simple_lock(&fs->lfs_interlock);
+			mutex_enter(&lfs_lock);
 			redo += (fs->lfs_flags & LFS_IFDIRTY);
-			simple_unlock(&fs->lfs_interlock);
+			mutex_exit(&lfs_lock);
 #ifdef DEBUG
 			if (++loopcount > 2)
 				log(LOG_NOTICE, "lfs_segwrite: looping count=%d\n",
@@ -765,7 +776,7 @@ lfs_segwrite(struct mount *mp, int flags)
 		 * for other parts of the Ifile to be dirty after the loop
 		 * above, since we hold the segment lock.
 		 */
-		s = splbio();
+		mutex_enter(&vp->v_interlock);
 		if (LIST_EMPTY(&vp->v_dirtyblkhd)) {
 			LFS_CLR_UINO(ip, IN_ALLMOD);
 		}
@@ -786,7 +797,7 @@ lfs_segwrite(struct mount *mp, int flags)
 				panic("dirty blocks");
 		}
 #endif
-		splx(s);
+		mutex_exit(&vp->v_interlock);
 		VOP_UNLOCK(vp, 0);
 	} else {
 		(void) lfs_writeseg(fs, sp);
@@ -866,7 +877,7 @@ lfs_writefile(struct lfs *fs, struct segment *sp, struct vnode *vp)
 		 * everything we've got.
 		 */
 		if (!IS_FLUSHING(fs, vp)) {
-			simple_lock(&vp->v_interlock);
+			mutex_enter(&vp->v_interlock);
 			error = VOP_PUTPAGES(vp, 0, 0,
 				PGO_CLEANIT | PGO_ALLPAGES | PGO_LOCKED);
 		}
@@ -952,7 +963,7 @@ lfs_update_iaddr(struct lfs *fs, struct segment *sp, struct inode *ip, daddr_t n
 		if (sntod(fs, sn) + btofsb(fs, fs->lfs_sumsize) ==
 		    fs->lfs_offset) {
 			LFS_SEGENTRY(sup, fs, sn, bp);
-			KASSERT(bp->b_flags & B_DELWRI);
+			KASSERT(bp->b_oflags & BO_DELWRI);
 			LFS_WRITESEGENTRY(sup, fs, sn, bp);
 			/* fs->lfs_flags |= LFS_IFDIRTY; */
 			redo_ifile |= 1;
@@ -1011,9 +1022,9 @@ lfs_update_iaddr(struct lfs *fs, struct segment *sp, struct inode *ip, daddr_t n
 		redo_ifile |=
 			(ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED));
 		if (redo_ifile) {
-			simple_lock(&fs->lfs_interlock);
+			mutex_enter(&lfs_lock);
 			fs->lfs_flags |= LFS_IFDIRTY;
-			simple_unlock(&fs->lfs_interlock);
+			mutex_exit(&lfs_lock);
 			/* Don't double-account */
 			fs->lfs_idaddr = 0x0;
 		}
@@ -1240,14 +1251,16 @@ lfs_writeinode(struct lfs *fs, struct segment *sp, struct inode *ip)
 			(sp->ninodes % INOPB(fs));
 
 		/* Not dirty any more */
-		simple_lock(&fs->lfs_interlock);
+		mutex_enter(&lfs_lock);
 		fs->lfs_flags &= ~LFS_IFDIRTY;
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 	}
 
 	if (gotblk) {
+		mutex_enter(&bufcache_lock);
 		LFS_LOCK_BUF(bp);
-		brelse(bp, 0);
+		brelsel(bp, 0);
+		mutex_exit(&bufcache_lock);
 	}
 
 	/* Increment inode count in segment summary block. */
@@ -1264,7 +1277,7 @@ lfs_writeinode(struct lfs *fs, struct segment *sp, struct inode *ip)
 }
 
 int
-lfs_gatherblock(struct segment *sp, struct buf *bp, int *sptr)
+lfs_gatherblock(struct segment *sp, struct buf *bp, kmutex_t *mptr)
 {
 	struct lfs *fs;
 	int vers;
@@ -1283,8 +1296,8 @@ lfs_gatherblock(struct segment *sp, struct buf *bp, int *sptr)
 	blksinblk = howmany(bp->b_bcount, fs->lfs_bsize);
 	if (sp->sum_bytes_left < sizeof(int32_t) * blksinblk ||
 	    sp->seg_bytes_left < bp->b_bcount) {
-		if (sptr)
-			splx(*sptr);
+		if (mptr)
+			mutex_exit(mptr);
 		lfs_updatemeta(sp);
 
 		vers = sp->fip->fi_version;
@@ -1293,8 +1306,8 @@ lfs_gatherblock(struct segment *sp, struct buf *bp, int *sptr)
 		/* Add the current file to the segment summary. */
 		lfs_acquire_finfo(fs, VTOI(sp->vp)->i_number, vers);
 
-		if (sptr)
-			*sptr = splbio();
+		if (mptr)
+			mutex_enter(mptr);
 		return (1);
 	}
 
@@ -1325,14 +1338,14 @@ lfs_gather(struct lfs *fs, struct segment *sp, struct vnode *vp,
     int (*match)(struct lfs *, struct buf *))
 {
 	struct buf *bp, *nbp;
-	int s, count = 0;
+	int count = 0;
 
 	ASSERT_SEGLOCK(fs);
 	if (vp->v_type == VBLK)
 		return 0;
 	KASSERT(sp->vp == NULL);
 	sp->vp = vp;
-	s = splbio();
+	mutex_enter(&bufcache_lock);
 
 #ifndef LFS_NO_BACKBUF_HACK
 /* This is a hack to see if ordering the blocks in LFS makes a difference. */
@@ -1356,10 +1369,12 @@ loop:
 	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 		nbp = LIST_NEXT(bp, b_vnbufs);
 #endif /* LFS_NO_BACKBUF_HACK */
-		if ((bp->b_flags & (B_BUSY|B_GATHERED)) || !match(fs, bp)) {
+		if ((bp->b_cflags & BC_BUSY) != 0 ||
+		    (bp->b_flags & B_GATHERED) != 0 || !match(fs, bp)) {
 #ifdef DEBUG
 			if (vp == fs->lfs_ivnode &&
-			    (bp->b_flags & (B_BUSY|B_GATHERED)) == B_BUSY)
+			    (bp->b_cflags & BC_BUSY) != 0 &&
+			    (bp->b_flags & B_GATHERED) == 0)
 				log(LOG_NOTICE, "lfs_gather: ifile lbn %"
 				      PRId64 " busy (%x) at 0x%x",
 				      bp->b_lblkno, bp->b_flags,
@@ -1369,29 +1384,29 @@ loop:
 		}
 #ifdef DIAGNOSTIC
 # ifdef LFS_USE_B_INVAL
-		if ((bp->b_flags & (B_CALL|B_INVAL)) == B_INVAL) {
+		if ((bp->b_flags & BC_INVAL) != 0 && bp->b_iodone == NULL) {
 			DLOG((DLOG_SEG, "lfs_gather: lbn %" PRId64
-			      " is B_INVAL\n", bp->b_lblkno));
+			      " is BC_INVAL\n", bp->b_lblkno));
 			VOP_PRINT(bp->b_vp);
 		}
 # endif /* LFS_USE_B_INVAL */
-		if (!(bp->b_flags & B_DELWRI))
-			panic("lfs_gather: bp not B_DELWRI");
-		if (!(bp->b_flags & B_LOCKED)) {
+		if (!(bp->b_oflags & BO_DELWRI))
+			panic("lfs_gather: bp not BO_DELWRI");
+		if (!(bp->b_cflags & BC_LOCKED)) {
 			DLOG((DLOG_SEG, "lfs_gather: lbn %" PRId64
-			      " blk %" PRId64 " not B_LOCKED\n",
+			      " blk %" PRId64 " not BC_LOCKED\n",
 			      bp->b_lblkno,
 			      dbtofsb(fs, bp->b_blkno)));
 			VOP_PRINT(bp->b_vp);
-			panic("lfs_gather: bp not B_LOCKED");
+			panic("lfs_gather: bp not BC_LOCKED");
 		}
 #endif
-		if (lfs_gatherblock(sp, bp, &s)) {
+		if (lfs_gatherblock(sp, bp, &bufcache_lock)) {
 			goto loop;
 		}
 		count++;
 	}
-	splx(s);
+	mutex_exit(&bufcache_lock);
 	lfs_updatemeta(sp);
 	KASSERT(sp->vp == vp);
 	sp->vp = NULL;
@@ -1538,9 +1553,9 @@ lfs_update_single(struct lfs *fs, struct segment *sp,
 		      ip->i_number, lbn, daddr));
 		sup->su_nbytes -= osize;
 		if (!(bp->b_flags & B_GATHERED)) {
-			simple_lock(&fs->lfs_interlock);
+			mutex_enter(&lfs_lock);
 			fs->lfs_flags |= LFS_IFDIRTY;
-			simple_unlock(&fs->lfs_interlock);
+			mutex_exit(&lfs_lock);
 		}
 		LFS_WRITESEGENTRY(sup, fs, oldsn, bp);
 	}
@@ -1764,12 +1779,12 @@ lfs_initseg(struct lfs *fs)
 		fs->lfs_cleanint[fs->lfs_cleanind] = fs->lfs_offset;
 		if (++fs->lfs_cleanind >= LFS_MAX_CLEANIND) {
 			/* "1" is the artificial inc in lfs_seglock */
-			simple_lock(&fs->lfs_interlock);
+			mutex_enter(&lfs_lock);
 			while (fs->lfs_iocount > 1) {
-				ltsleep(&fs->lfs_iocount, PRIBIO + 1,
-				    "lfs_initseg", 0, &fs->lfs_interlock);
+				mtsleep(&fs->lfs_iocount, PRIBIO + 1,
+				    "lfs_initseg", 0, &lfs_lock);
 			}
-			simple_unlock(&fs->lfs_interlock);
+			mutex_exit(&lfs_lock);
 			fs->lfs_cleanind = 0;
 		}
 	}
@@ -1846,7 +1861,7 @@ lfs_newseg(struct lfs *fs)
 	ASSERT_SEGLOCK(fs);
 
 	/* Honor LFCNWRAPSTOP */
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	while (fs->lfs_nextseg < fs->lfs_curseg && fs->lfs_nowrap) {
 		if (fs->lfs_wrappass) {
 			log(LOG_NOTICE, "%s: wrappass=%d\n",
@@ -1857,11 +1872,11 @@ lfs_newseg(struct lfs *fs)
 		fs->lfs_wrapstatus = LFS_WRAP_WAITING;
 		wakeup(&fs->lfs_nowrap);
 		log(LOG_NOTICE, "%s: waiting at log wrap\n", fs->lfs_fsmnt);
-		ltsleep(&fs->lfs_wrappass, PVFS, "newseg", 10 * hz,
-			&fs->lfs_interlock);
+		mtsleep(&fs->lfs_wrappass, PVFS, "newseg", 10 * hz,
+			&lfs_lock);
 	}
 	fs->lfs_wrapstatus = LFS_WRAP_GOING;
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 
 	LFS_SEGENTRY(sup, fs, dtosn(fs, fs->lfs_nextseg), bp);
 	DLOG((DLOG_SU, "lfs_newseg: seg %d := 0 in newseg\n",
@@ -1936,13 +1951,11 @@ lfs_newclusterbuf(struct lfs *fs, struct vnode *vp, daddr_t addr,
 	}
 
 	/* Get an empty buffer header, or maybe one with something on it */
-	bp = getiobuf();
-	bp->b_flags = B_BUSY | B_CALL;
+	bp = getiobuf(vp, true);
 	bp->b_dev = NODEV;
 	bp->b_blkno = bp->b_lblkno = addr;
 	bp->b_iodone = lfs_cluster_callback;
 	bp->b_private = cl;
-	bp->b_vp = vp;
 
 	return bp;
 }
@@ -1950,10 +1963,10 @@ lfs_newclusterbuf(struct lfs *fs, struct vnode *vp, daddr_t addr,
 int
 lfs_writeseg(struct lfs *fs, struct segment *sp)
 {
-	struct buf **bpp, *bp, *cbp, *newbp;
+	struct buf **bpp, *bp, *cbp, *newbp, *unbusybp;
 	SEGUSE *sup;
 	SEGSUM *ssp;
-	int i, s;
+	int i;
 	int do_again, nblocks, byteoffset;
 	size_t el_size;
 	struct lfs_cluster *cl;
@@ -2041,30 +2054,26 @@ lfs_writeseg(struct lfs *fs, struct segment *sp)
 	 * there are any, replace them with copies that have UNASSIGNED
 	 * instead.
 	 */
+	mutex_enter(&bufcache_lock);
 	for (bpp = sp->bpp, i = nblocks - 1; i--;) {
 		++bpp;
 		bp = *bpp;
-		if (bp->b_flags & B_CALL) { /* UBC or malloced buffer */
-			bp->b_flags |= B_BUSY;
+		if (bp->b_iodone != NULL) {	 /* UBC or malloced buffer */
+			bp->b_cflags |= BC_BUSY;
 			continue;
 		}
 
-		simple_lock(&bp->b_interlock);
-		s = splbio();
-		while (bp->b_flags & B_BUSY) {
+		while (bp->b_cflags & BC_BUSY) {
 			DLOG((DLOG_SEG, "lfs_writeseg: avoiding potential"
 			      " data summary corruption for ino %d, lbn %"
 			      PRId64 "\n",
 			      VTOI(bp->b_vp)->i_number, bp->b_lblkno));
-			bp->b_flags |= B_WANTED;
-			ltsleep(bp, (PRIBIO + 1), "lfs_writeseg", 0,
-				&bp->b_interlock);
-			splx(s);
-			s = splbio();
+			bp->b_cflags |= BC_WANTED;
+			cv_wait(&bp->b_busy, &bufcache_lock);
 		}
-		bp->b_flags |= B_BUSY;
-		splx(s);
-		simple_unlock(&bp->b_interlock);
+		bp->b_cflags |= BC_BUSY;
+		mutex_exit(&bufcache_lock);
+		unbusybp = NULL;
 
 		/*
 		 * Check and replace indirect block UNWRITTEN bogosity.
@@ -2105,20 +2114,14 @@ lfs_writeseg(struct lfs *fs, struct segment *sp)
 				*bpp = newbp;
 				bp->b_flags &= ~B_GATHERED;
 				bp->b_error = 0;
-				if (bp->b_flags & B_CALL) {
+				if (bp->b_iodone != NULL) {
 					DLOG((DLOG_SEG, "lfs_writeseg: "
 					      "indir bp should not be B_CALL\n"));
-					s = splbio();
 					biodone(bp);
-					splx(s);
 					bp = NULL;
 				} else {
 					/* Still on free list, leave it there */
-					s = splbio();
-					bp->b_flags &= ~B_BUSY;
-					if (bp->b_flags & B_WANTED)
-						wakeup(bp);
-					splx(s);
+					unbusybp = bp;
 					/*
 					 * We have to re-decrement lfs_avail
 					 * since this block is going to come
@@ -2132,7 +2135,15 @@ lfs_writeseg(struct lfs *fs, struct segment *sp)
 				lfs_freebuf(fs, newbp);
 			}
 		}
+		mutex_enter(&bufcache_lock);
+		if (unbusybp != NULL) {
+			unbusybp->b_cflags &= ~BC_BUSY;
+			if (unbusybp->b_cflags & BC_WANTED)
+				cv_broadcast(&bp->b_busy);
+		}
 	}
+	mutex_exit(&bufcache_lock);
+
 	/*
 	 * Compute checksum across data and then across summary; the first
 	 * block (the summary block) is skipped.  Set the create time here
@@ -2149,8 +2160,8 @@ lfs_writeseg(struct lfs *fs, struct segment *sp)
 		for (byteoffset = 0; byteoffset < (*bpp)->b_bcount;
 		     byteoffset += fs->lfs_bsize) {
 #ifdef LFS_USE_B_INVAL
-			if (((*bpp)->b_flags & (B_CALL | B_INVAL)) ==
-			    (B_CALL | B_INVAL)) {
+			if ((*bpp)->b_cflags & BC_INVAL) != 0 &&
+			    (*bpp)->b_iodone != NULL) {
 				if (copyin((void *)(*bpp)->b_saveaddr +
 					   byteoffset, dp, el_size)) {
 					panic("lfs_writeseg: copyin failed [1]:"
@@ -2177,12 +2188,12 @@ lfs_writeseg(struct lfs *fs, struct segment *sp)
 	ssp->ss_sumsum = cksum(&ssp->ss_datasum,
 	    fs->lfs_sumsize - sizeof(ssp->ss_sumsum));
 
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	fs->lfs_bfree -= (btofsb(fs, ninos * fs->lfs_ibsize) +
 			  btofsb(fs, fs->lfs_sumsize));
 	fs->lfs_dmeta += (btofsb(fs, ninos * fs->lfs_ibsize) +
 			  btofsb(fs, fs->lfs_sumsize));
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 
 	/*
 	 * When we simply write the blocks we lose a rotation for every block
@@ -2202,7 +2213,8 @@ lfs_writeseg(struct lfs *fs, struct segment *sp)
 		cbp = lfs_newclusterbuf(fs, devvp, (*bpp)->b_blkno, i);
 		cl = cbp->b_private;
 
-		cbp->b_flags |= B_ASYNC | B_BUSY;
+		cbp->b_flags |= B_ASYNC;
+		cbp->b_cflags |= BC_BUSY;
 		cbp->b_bcount = 0;
 
 #if defined(DEBUG) && defined(DIAGNOSTIC)
@@ -2218,9 +2230,9 @@ lfs_writeseg(struct lfs *fs, struct segment *sp)
 		/*
 		 * Construct the cluster.
 		 */
-		simple_lock(&fs->lfs_interlock);
+		mutex_enter(&lfs_lock);
 		++fs->lfs_iocount;
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 		while (i && cbp->b_bcount < CHUNKSIZE) {
 			bp = *bpp;
 
@@ -2261,8 +2273,8 @@ lfs_writeseg(struct lfs *fs, struct segment *sp)
 			 * from the buffer indicated.
 			 * XXX == what do I do on an error?
 			 */
-			if ((bp->b_flags & (B_CALL|B_INVAL)) ==
-			    (B_CALL|B_INVAL)) {
+			if ((bp->b_cflags & BC_INVAL) != 0 &&
+			    bp->b_iodone != NULL) {
 				if (copyin(bp->b_saveaddr, p, bp->b_bcount))
 					panic("lfs_writeseg: "
 					    "copyin failed [2]");
@@ -2277,14 +2289,18 @@ lfs_writeseg(struct lfs *fs, struct segment *sp)
 			cbp->b_bcount += bp->b_bcount;
 			cl->bufsize += bp->b_bcount;
 
-			bp->b_flags &= ~(B_READ | B_DELWRI | B_DONE);
+			bp->b_flags &= ~B_READ;
 			bp->b_error = 0;
 			cl->bpp[cl->bufcount++] = bp;
+
 			vp = bp->b_vp;
-			s = splbio();
+			mutex_enter(&bufcache_lock);
+			mutex_enter(&vp->v_interlock);
+			bp->b_oflags &= ~(BO_DELWRI | BO_DONE);
 			reassignbuf(bp, vp);
-			V_INCR_NUMOUTPUT(vp);
-			splx(s);
+			vp->v_numoutput++;
+			mutex_exit(&vp->v_interlock);
+			mutex_exit(&bufcache_lock);
 
 			bpp++;
 			i--;
@@ -2293,9 +2309,9 @@ lfs_writeseg(struct lfs *fs, struct segment *sp)
 			BIO_SETPRIO(cbp, BPRIO_TIMECRITICAL);
 		else
 			BIO_SETPRIO(cbp, BPRIO_TIMELIMITED);
-		s = splbio();
-		V_INCR_NUMOUTPUT(devvp);
-		splx(s);
+		mutex_enter(&devvp->v_interlock);
+		devvp->v_numoutput++;
+		mutex_exit(&devvp->v_interlock);
 		VOP_STRATEGY(devvp, cbp);
 		curproc->p_stats->p_ru.ru_oublock++;
 	}
@@ -2318,8 +2334,8 @@ void
 lfs_writesuper(struct lfs *fs, daddr_t daddr)
 {
 	struct buf *bp;
-	int s;
 	struct vnode *devvp = VTOI(fs->lfs_ivnode)->i_devvp;
+	int s;
 
 	ASSERT_MAYBE_SEGLOCK(fs);
 #ifdef DIAGNOSTIC
@@ -2330,15 +2346,15 @@ lfs_writesuper(struct lfs *fs, daddr_t daddr)
 	 * progress, we risk not having a complete checkpoint if we crash.
 	 * So, block here if a superblock write is in progress.
 	 */
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	s = splbio();
 	while (fs->lfs_sbactive) {
-		ltsleep(&fs->lfs_sbactive, PRIBIO+1, "lfs sb", 0,
-			&fs->lfs_interlock);
+		mtsleep(&fs->lfs_sbactive, PRIBIO+1, "lfs sb", 0,
+			&lfs_lock);
 	}
 	fs->lfs_sbactive = daddr;
 	splx(s);
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 
 	/* Set timestamp of this version of the superblock */
 	if (fs->lfs_version == 1)
@@ -2353,8 +2369,9 @@ lfs_writesuper(struct lfs *fs, daddr_t daddr)
 	    LFS_SBPAD - sizeof(struct dlfs));
 	*(struct dlfs *)bp->b_data = fs->lfs_dlfs;
 
-	bp->b_flags |= B_BUSY | B_CALL | B_ASYNC;
-	bp->b_flags &= ~(B_DONE | B_READ | B_DELWRI);
+	bp->b_cflags |= BC_BUSY;
+	bp->b_flags = (bp->b_flags & ~B_READ) | B_ASYNC;
+	bp->b_oflags &= ~(BO_DONE | BO_DELWRI);
 	bp->b_error = 0;
 	bp->b_iodone = lfs_supercallback;
 
@@ -2363,12 +2380,14 @@ lfs_writesuper(struct lfs *fs, daddr_t daddr)
 	else
 		BIO_SETPRIO(bp, BPRIO_TIMELIMITED);
 	curproc->p_stats->p_ru.ru_oublock++;
-	s = splbio();
-	V_INCR_NUMOUTPUT(bp->b_vp);
-	splx(s);
-	simple_lock(&fs->lfs_interlock);
+
+	mutex_enter(&devvp->v_interlock);
+	devvp->v_numoutput++;
+	mutex_exit(&devvp->v_interlock);
+
+	mutex_enter(&lfs_lock);
 	++fs->lfs_iocount;
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 	VOP_STRATEGY(devvp, bp);
 }
 
@@ -2437,9 +2456,11 @@ lfs_free_aiodone(struct buf *bp)
 {
 	struct lfs *fs;
 
+	KERNEL_LOCK(1, curlwp);
 	fs = bp->b_private;
 	ASSERT_NO_SEGLOCK(fs);
 	lfs_freebuf(fs, bp);
+	KERNEL_UNLOCK_LAST(curlwp);
 }
 
 static void
@@ -2447,15 +2468,17 @@ lfs_super_aiodone(struct buf *bp)
 {
 	struct lfs *fs;
 
+	KERNEL_LOCK(1, curlwp);
 	fs = bp->b_private;
 	ASSERT_NO_SEGLOCK(fs);
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	fs->lfs_sbactive = 0;
 	if (--fs->lfs_iocount <= 1)
 		wakeup(&fs->lfs_iocount);
-	simple_unlock(&fs->lfs_interlock);
 	wakeup(&fs->lfs_sbactive);
+	mutex_exit(&lfs_lock);
 	lfs_freebuf(fs, bp);
+	KERNEL_UNLOCK_LAST(curlwp);
 }
 
 static void
@@ -2464,9 +2487,11 @@ lfs_cluster_aiodone(struct buf *bp)
 	struct lfs_cluster *cl;
 	struct lfs *fs;
 	struct buf *tbp, *fbp;
-	struct vnode *vp, *devvp;
+	struct vnode *vp, *devvp, *ovp;
 	struct inode *ip;
-	int s, error;
+	int error;
+
+	KERNEL_LOCK(1, curlwp);
 
 	error = bp->b_error;
 	cl = bp->b_private;
@@ -2477,7 +2502,7 @@ lfs_cluster_aiodone(struct buf *bp)
 	/* Put the pages back, and release the buffer */
 	while (cl->bufcount--) {
 		tbp = cl->bpp[cl->bufcount];
-		KASSERT(tbp->b_flags & B_BUSY);
+		KASSERT(tbp->b_cflags & BC_BUSY);
 		if (error) {
 			tbp->b_error = error;
 		}
@@ -2493,47 +2518,50 @@ lfs_cluster_aiodone(struct buf *bp)
 
 		LFS_BCLEAN_LOG(fs, tbp);
 
-		if (!(tbp->b_flags & B_CALL)) {
-			KASSERT(tbp->b_flags & B_LOCKED);
-			s = splbio();
-			simple_lock(&bqueue_slock);
+		mutex_enter(&bufcache_lock);
+		if (tbp->b_iodone == NULL) {
+			KASSERT(tbp->b_cflags & BC_LOCKED);
 			bremfree(tbp);
-			simple_unlock(&bqueue_slock);
-			if (vp)
+			if (vp) {
+				mutex_enter(&vp->v_interlock);
 				reassignbuf(tbp, vp);
-			splx(s);
+				mutex_exit(&vp->v_interlock);
+			}
 			tbp->b_flags |= B_ASYNC; /* for biodone */
 		}
 
-		if ((tbp->b_flags & (B_LOCKED | B_DELWRI)) == B_LOCKED)
+		if (((tbp->b_cflags | tbp->b_oflags) &
+		    (BC_LOCKED | BO_DELWRI)) == BC_LOCKED)
 			LFS_UNLOCK_BUF(tbp);
 
-		if (tbp->b_flags & B_DONE) {
+		if (tbp->b_oflags & BO_DONE) {
 			DLOG((DLOG_SEG, "blk %d biodone already (flags %lx)\n",
 				cl->bufcount, (long)tbp->b_flags));
 		}
 
-		if ((tbp->b_flags & B_CALL) && !LFS_IS_MALLOC_BUF(tbp)) {
+		if (tbp->b_iodone != NULL && !LFS_IS_MALLOC_BUF(tbp)) {
 			/*
 			 * A buffer from the page daemon.
 			 * We use the same iodone as it does,
 			 * so we must manually disassociate its
 			 * buffers from the vp.
 			 */
-			if (tbp->b_vp) {
+			if ((ovp = tbp->b_vp) != NULL) {
 				/* This is just silly */
-				s = splbio();
+				mutex_enter(&ovp->v_interlock);
 				brelvp(tbp);
+				mutex_exit(&ovp->v_interlock);
 				tbp->b_vp = vp;
-				splx(s);
+				tbp->b_objlock = &vp->v_interlock;
 			}
 			/* Put it back the way it was */
 			tbp->b_flags |= B_ASYNC;
-			/* Master buffers have B_AGE */
+			/* Master buffers have BC_AGE */
 			if (tbp->b_private == tbp)
-				tbp->b_flags |= B_AGE;
+				tbp->b_flags |= BC_AGE;
 		}
-		s = splbio();
+		mutex_exit(&bufcache_lock);
+
 		biodone(tbp);
 
 		/*
@@ -2546,7 +2574,8 @@ lfs_cluster_aiodone(struct buf *bp)
 		 * XXX KS - Shouldn't we set *both* if both types
 		 * of blocks are present (traverse the dirty list?)
 		 */
-		simple_lock(&global_v_numoutput_slock);
+		mutex_enter(&lfs_lock);
+		mutex_enter(&vp->v_interlock);
 		if (vp != devvp && vp->v_numoutput == 0 &&
 		    (fbp = LIST_FIRST(&vp->v_dirtyblkhd)) != NULL) {
 			ip = VTOI(vp);
@@ -2557,9 +2586,9 @@ lfs_cluster_aiodone(struct buf *bp)
 			else
 				LFS_SET_UINO(ip, IN_MODIFIED);
 		}
-		simple_unlock(&global_v_numoutput_slock);
-		splx(s);
-		wakeup(vp);
+		cv_broadcast(&vp->v_cv);
+		mutex_exit(&vp->v_interlock);
+		mutex_exit(&lfs_lock);
 	}
 
 	/* Fix up the cluster buffer, and release it */
@@ -2572,14 +2601,16 @@ lfs_cluster_aiodone(struct buf *bp)
 		if (--cl->seg->seg_iocount == 0)
 			wakeup(&cl->seg->seg_iocount);
 	}
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 #ifdef DIAGNOSTIC
 	if (fs->lfs_iocount == 0)
 		panic("lfs_cluster_aiodone: zero iocount");
 #endif
 	if (--fs->lfs_iocount <= 1)
 		wakeup(&fs->lfs_iocount);
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
+
+	KERNEL_UNLOCK_LAST(curlwp);
 
 	pool_put(&fs->lfs_bpppool, cl->bpp);
 	cl->bpp = NULL;
@@ -2696,7 +2727,7 @@ lfs_shellsort(struct buf **bp_array, int32_t *lb_array, int nmemb, int size)
 }
 
 /*
- * Call vget with LK_NOWAIT.  If we are the one who holds VI_XLOCK/VI_FREEING,
+ * Call vget with LK_NOWAIT.  If we are the one who holds VI_XLOCK,
  * however, we must press on.  Just fake success in that case.
  */
 int
@@ -2705,6 +2736,8 @@ lfs_vref(struct vnode *vp)
 	int error;
 	struct lfs *fs;
 
+	KASSERT(mutex_owned(&vp->v_interlock));
+
 	fs = VTOI(vp)->i_lfs;
 
 	ASSERT_MAYBE_SEGLOCK(fs);
@@ -2714,7 +2747,7 @@ lfs_vref(struct vnode *vp)
 	 * being able to flush all of the pages from this vnode, which
 	 * will cause it to panic.  So, return 0 if a flush is in progress.
 	 */
-	error = vget(vp, LK_NOWAIT);
+	error = vget(vp, LK_NOWAIT | LK_INTERLOCK);
 	if (error == EBUSY && IS_FLUSHING(VTOI(vp)->i_lfs, vp)) {
 		++fs->lfs_flushvp_fakevref;
 		return 0;
@@ -2743,7 +2776,7 @@ lfs_vunref(struct vnode *vp)
 	}
 
 	/* does not call inactive */
-	vrele2(vp, 0);
+	vrele(vp);	/* XXXAD fix later */
 }
 
 /*
@@ -2761,7 +2794,7 @@ lfs_vunref_head(struct vnode *vp)
 	ASSERT_SEGLOCK(VTOI(vp)->i_lfs);
 
 	/* does not call inactive, inserts non-held vnode at head of freelist */
-	vrele2(vp, 1);
+	vrele(vp);	/* XXXAD fix later */
 }
 
 
diff --git a/sys/ufs/lfs/lfs_subr.c b/sys/ufs/lfs/lfs_subr.c
index 01e2cda3b94e..b9b44155b581 100644
--- a/sys/ufs/lfs/lfs_subr.c
+++ b/sys/ufs/lfs/lfs_subr.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: lfs_subr.c,v 1.71 2007/10/10 20:42:35 ad Exp $	*/
+/*	$NetBSD: lfs_subr.c,v 1.72 2008/01/02 11:49:12 ad Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@@ -67,7 +67,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.71 2007/10/10 20:42:35 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.72 2008/01/02 11:49:12 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -165,16 +165,16 @@ lfs_free_resblks(struct lfs *fs)
 	pool_destroy(&fs->lfs_segpool);
 	pool_destroy(&fs->lfs_clpool);
 
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	for (i = 0; i < LFS_N_TOTAL; i++) {
 		while (fs->lfs_resblk[i].inuse)
-			ltsleep(&fs->lfs_resblk, PRIBIO + 1, "lfs_free", 0,
-				&fs->lfs_interlock);
+			mtsleep(&fs->lfs_resblk, PRIBIO + 1, "lfs_free", 0,
+				&lfs_lock);
 		if (fs->lfs_resblk[i].p != NULL)
 			free(fs->lfs_resblk[i].p, M_SEGMENT);
 	}
 	free(fs->lfs_resblk, M_SEGMENT);
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 }
 
 static unsigned int
@@ -216,7 +216,7 @@ lfs_malloc(struct lfs *fs, size_t size, int type)
 	 * and several indirect blocks.
 	 */
 
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	/* skip over blocks of other types */
 	for (i = 0, start = 0; i < type; i++)
 		start += lfs_res_qty[i];
@@ -231,19 +231,19 @@ lfs_malloc(struct lfs *fs, size_t size, int type)
 				s = splbio();
 				LIST_INSERT_HEAD(&fs->lfs_reshash[h], re, res);
 				splx(s);
-				simple_unlock(&fs->lfs_interlock);
+				mutex_exit(&lfs_lock);
 				return r;
 			}
 		}
 		DLOG((DLOG_MALLOC, "sleeping on %s (%d)\n",
 		      lfs_res_names[type], lfs_res_qty[type]));
-		ltsleep(&fs->lfs_resblk, PVM, "lfs_malloc", 0,
-			&fs->lfs_interlock);
+		mtsleep(&fs->lfs_resblk, PVM, "lfs_malloc", 0,
+			&lfs_lock);
 		DLOG((DLOG_MALLOC, "done sleeping on %s\n",
 		      lfs_res_names[type]));
 	}
 	/* NOTREACHED */
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 	return r;
 }
 
@@ -259,7 +259,7 @@ lfs_free(struct lfs *fs, void *p, int type)
 
 	ASSERT_MAYBE_SEGLOCK(fs);
 	h = lfs_mhash(p);
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	s = splbio();
 	LIST_FOREACH(re, &fs->lfs_reshash[h], res) {
 		if (re->p == p) {
@@ -268,7 +268,7 @@ lfs_free(struct lfs *fs, void *p, int type)
 			re->inuse = 0;
 			wakeup(&fs->lfs_resblk);
 			splx(s);
-			simple_unlock(&fs->lfs_interlock);
+			mutex_exit(&lfs_lock);
 			return;
 		}
 	}
@@ -279,7 +279,7 @@ lfs_free(struct lfs *fs, void *p, int type)
 	}
 #endif
 	splx(s);
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 	
 	/*
 	 * If we didn't find it, free it.
@@ -296,21 +296,21 @@ lfs_seglock(struct lfs *fs, unsigned long flags)
 {
 	struct segment *sp;
 
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	if (fs->lfs_seglock) {
 		if (fs->lfs_lockpid == curproc->p_pid &&
 		    fs->lfs_locklwp == curlwp->l_lid) {
-			simple_unlock(&fs->lfs_interlock);
+			mutex_exit(&lfs_lock);
 			++fs->lfs_seglock;
 			fs->lfs_sp->seg_flags |= flags;
 			return 0;
 		} else if (flags & SEGM_PAGEDAEMON) {
-			simple_unlock(&fs->lfs_interlock);
+			mutex_exit(&lfs_lock);
 			return EWOULDBLOCK;
 		} else {
 			while (fs->lfs_seglock) {
-				(void)ltsleep(&fs->lfs_seglock, PRIBIO + 1,
-					"lfs_seglock", 0, &fs->lfs_interlock);
+				(void)mtsleep(&fs->lfs_seglock, PRIBIO + 1,
+					"lfs_seglock", 0, &lfs_lock);
 			}
 		}
 	}
@@ -318,7 +318,7 @@ lfs_seglock(struct lfs *fs, unsigned long flags)
 	fs->lfs_seglock = 1;
 	fs->lfs_lockpid = curproc->p_pid;
 	fs->lfs_locklwp = curlwp->l_lid;
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 	fs->lfs_cleanind = 0;
 
 #ifdef DEBUG
@@ -340,9 +340,9 @@ lfs_seglock(struct lfs *fs, unsigned long flags)
 	 * so we artificially increment it by one until we've scheduled all of
 	 * the writes we intend to do.
 	 */
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	++fs->lfs_iocount;
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 	return 0;
 }
 
@@ -356,52 +356,37 @@ lfs_unmark_dirop(struct lfs *fs)
 	int doit;
 
 	ASSERT_NO_SEGLOCK(fs);
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	doit = !(fs->lfs_flags & LFS_UNDIROP);
 	if (doit)
 		fs->lfs_flags |= LFS_UNDIROP;
 	if (!doit) {
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 		return;
 	}
 
 	for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) {
 		nip = TAILQ_NEXT(ip, i_lfs_dchain);
-		simple_unlock(&fs->lfs_interlock);
 		vp = ITOV(ip);
-
-		simple_lock(&vp->v_interlock);
-		if (VOP_ISLOCKED(vp) == LK_EXCLOTHER) {
-			simple_lock(&fs->lfs_interlock);
-			simple_unlock(&vp->v_interlock);
+		if (VOP_ISLOCKED(vp) == LK_EXCLOTHER)
 			continue;
-		}
 		if ((VTOI(vp)->i_flag & (IN_ADIROP | IN_ALLMOD)) == 0) {
-			simple_lock(&fs->lfs_interlock);
-			simple_lock(&lfs_subsys_lock);
 			--lfs_dirvcount;
-			simple_unlock(&lfs_subsys_lock);
 			--fs->lfs_dirvcount;
 			vp->v_uflag &= ~VU_DIROP;
 			TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
-			simple_unlock(&fs->lfs_interlock);
 			wakeup(&lfs_dirvcount);
-			simple_unlock(&vp->v_interlock);
-			simple_lock(&fs->lfs_interlock);
 			fs->lfs_unlockvp = vp;
-			simple_unlock(&fs->lfs_interlock);
+			mutex_exit(&lfs_lock);
 			vrele(vp);
-			simple_lock(&fs->lfs_interlock);
+			mutex_enter(&lfs_lock);
 			fs->lfs_unlockvp = NULL;
-			simple_unlock(&fs->lfs_interlock);
-		} else
-			simple_unlock(&vp->v_interlock);
-		simple_lock(&fs->lfs_interlock);
+		}
 	}
 
 	fs->lfs_flags &= ~LFS_UNDIROP;
-	simple_unlock(&fs->lfs_interlock);
 	wakeup(&fs->lfs_flags);
+	mutex_exit(&lfs_lock);
 }
 
 static void
@@ -426,13 +411,13 @@ lfs_auto_segclean(struct lfs *fs)
 		    (SEGUSE_DIRTY | SEGUSE_EMPTY)) {
 
 			/* Make sure the sb is written before we clean */
-			simple_lock(&fs->lfs_interlock);
+			mutex_enter(&lfs_lock);
 			s = splbio();
 			while (waited == 0 && fs->lfs_sbactive)
-				ltsleep(&fs->lfs_sbactive, PRIBIO+1, "lfs asb",
-					0, &fs->lfs_interlock);
+				mtsleep(&fs->lfs_sbactive, PRIBIO+1, "lfs asb",
+					0, &lfs_lock);
 			splx(s);
-			simple_unlock(&fs->lfs_interlock);
+			mutex_exit(&lfs_lock);
 			waited = 1;
 
 			if ((error = lfs_do_segclean(fs, i)) != 0) {
@@ -458,13 +443,13 @@ lfs_segunlock(struct lfs *fs)
 
 	sp = fs->lfs_sp;
 
-	simple_lock(&fs->lfs_interlock);
-	LOCK_ASSERT(LFS_SEGLOCK_HELD(fs));
+	mutex_enter(&lfs_lock);
+	KASSERT(LFS_SEGLOCK_HELD(fs));
 	if (fs->lfs_seglock == 1) {
 		if ((sp->seg_flags & (SEGM_PROT | SEGM_CLEAN)) == 0 &&
 		    LFS_STARVED_FOR_SEGS(fs) == 0)
 			do_unmark_dirop = 1;
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 		sync = sp->seg_flags & SEGM_SYNC;
 		ckp = sp->seg_flags & SEGM_CKP;
 
@@ -494,13 +479,13 @@ lfs_segunlock(struct lfs *fs)
 		 * At the moment, the user's process hangs around so we can
 		 * sleep.
 		 */
-		simple_lock(&fs->lfs_interlock);
+		mutex_enter(&lfs_lock);
 		if (--fs->lfs_iocount == 0) {
 			LFS_DEBUG_COUNTLOCKED("lfs_segunlock");
 		}
 		if (fs->lfs_iocount <= 1)
 			wakeup(&fs->lfs_iocount);
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 		/*
 		 * If we're not checkpointing, we don't have to block
 		 * other processes to wait for a synchronous write
@@ -510,11 +495,11 @@ lfs_segunlock(struct lfs *fs)
 #ifdef DEBUG
 			LFS_ENTER_LOG("segunlock_std", __FILE__, __LINE__, 0, 0, curproc->p_pid);
 #endif
-			simple_lock(&fs->lfs_interlock);
+			mutex_enter(&lfs_lock);
 			--fs->lfs_seglock;
 			fs->lfs_lockpid = 0;
 			fs->lfs_locklwp = 0;
-			simple_unlock(&fs->lfs_interlock);
+			mutex_exit(&lfs_lock);
 			wakeup(&fs->lfs_seglock);
 		}
 		/*
@@ -524,16 +509,16 @@ lfs_segunlock(struct lfs *fs)
 		 * superblocks to make sure that the checkpoint described
 		 * by a superblock completed.
 		 */
-		simple_lock(&fs->lfs_interlock);
+		mutex_enter(&lfs_lock);
 		while (ckp && sync && fs->lfs_iocount)
-			(void)ltsleep(&fs->lfs_iocount, PRIBIO + 1,
-				      "lfs_iocount", 0, &fs->lfs_interlock);
+			(void)mtsleep(&fs->lfs_iocount, PRIBIO + 1,
+				      "lfs_iocount", 0, &lfs_lock);
 		while (sync && sp->seg_iocount) {
-			(void)ltsleep(&sp->seg_iocount, PRIBIO + 1,
-				     "seg_iocount", 0, &fs->lfs_interlock);
+			(void)mtsleep(&sp->seg_iocount, PRIBIO + 1,
+				     "seg_iocount", 0, &lfs_lock);
 			DLOG((DLOG_SEG, "sleeping on iocount %x == %d\n", sp, sp->seg_iocount));
 		}
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 		if (sync)
 			pool_put(&fs->lfs_segpool, sp);
 
@@ -554,11 +539,11 @@ lfs_segunlock(struct lfs *fs)
 #ifdef DEBUG
 			LFS_ENTER_LOG("segunlock_ckp", __FILE__, __LINE__, 0, 0, curproc->p_pid);
 #endif
-			simple_lock(&fs->lfs_interlock);
+			mutex_enter(&lfs_lock);
 			--fs->lfs_seglock;
 			fs->lfs_lockpid = 0;
 			fs->lfs_locklwp = 0;
-			simple_unlock(&fs->lfs_interlock);
+			mutex_exit(&lfs_lock);
 			wakeup(&fs->lfs_seglock);
 		}
 		/* Reenable fragment size changes */
@@ -566,11 +551,11 @@ lfs_segunlock(struct lfs *fs)
 		if (do_unmark_dirop)
 			lfs_unmark_dirop(fs);
 	} else if (fs->lfs_seglock == 0) {
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 		panic ("Seglock not held");
 	} else {
 		--fs->lfs_seglock;
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 	}
 }
 
@@ -585,22 +570,22 @@ lfs_writer_enter(struct lfs *fs, const char *wmesg)
 	int error = 0;
 
 	ASSERT_MAYBE_SEGLOCK(fs);
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 
 	/* disallow dirops during flush */
 	fs->lfs_writer++;
 
 	while (fs->lfs_dirops > 0) {
 		++fs->lfs_diropwait;
-		error = ltsleep(&fs->lfs_writer, PRIBIO+1, wmesg, 0,
-				&fs->lfs_interlock);
+		error = mtsleep(&fs->lfs_writer, PRIBIO+1, wmesg, 0,
+				&lfs_lock);
 		--fs->lfs_diropwait;
 	}
 
 	if (error)
 		fs->lfs_writer--;
 
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 
 	return error;
 }
@@ -611,9 +596,9 @@ lfs_writer_leave(struct lfs *fs)
 	bool dowakeup;
 
 	ASSERT_MAYBE_SEGLOCK(fs);
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	dowakeup = !(--fs->lfs_writer);
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 	if (dowakeup)
 		wakeup(&fs->lfs_dirops);
 }
@@ -651,11 +636,11 @@ lfs_segunlock_relock(struct lfs *fs)
 
 	/* Wait for the cleaner */
 	lfs_wakeup_cleaner(fs);
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	while (LFS_STARVED_FOR_SEGS(fs))
-		ltsleep(&fs->lfs_avail, PRIBIO, "relock", 0,
-			&fs->lfs_interlock);
-	simple_unlock(&fs->lfs_interlock);
+		mtsleep(&fs->lfs_avail, PRIBIO, "relock", 0,
+			&lfs_lock);
+	mutex_exit(&lfs_lock);
 
 	/* Put the segment lock back the way it was. */
 	while(n--)
diff --git a/sys/ufs/lfs/lfs_syscalls.c b/sys/ufs/lfs/lfs_syscalls.c
index 750609162e77..85d60ae1fece 100644
--- a/sys/ufs/lfs/lfs_syscalls.c
+++ b/sys/ufs/lfs/lfs_syscalls.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: lfs_syscalls.c,v 1.125 2007/12/20 23:03:14 dsl Exp $	*/
+/*	$NetBSD: lfs_syscalls.c,v 1.126 2008/01/02 11:49:12 ad Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007 The NetBSD Foundation, Inc.
@@ -67,7 +67,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_syscalls.c,v 1.125 2007/12/20 23:03:14 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_syscalls.c,v 1.126 2008/01/02 11:49:12 ad Exp $");
 
 #ifndef LFS
 # define LFS		/* for prototypes in syscallargs.h */
@@ -379,8 +379,11 @@ lfs_markv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov,
 			/* XXX but only write the inode if it's the right one */
 			if (blkp->bi_inode != LFS_IFILE_INUM) {
 				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
-				if (ifp->if_daddr == blkp->bi_daddr)
+				if (ifp->if_daddr == blkp->bi_daddr) {
+					mutex_enter(&lfs_lock);
 					LFS_SET_UINO(ip, IN_CLEANING);
+					mutex_exit(&lfs_lock);
+				}
 				brelse(bp, 0);
 			}
 			continue;
@@ -443,7 +446,7 @@ lfs_markv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov,
 				panic("lfs_markv: partial indirect block?"
 				    " size=%d\n", blkp->bi_size);
 			bp = getblk(vp, blkp->bi_lbn, blkp->bi_size, 0, 0);
-			if (!(bp->b_flags & (B_DONE|B_DELWRI))) { /* B_CACHE */
+			if (!(bp->b_oflags & (BO_DONE|BO_DELWRI))) {
 				/*
 				 * The block in question was not found
 				 * in the cache; i.e., the block that
@@ -724,15 +727,19 @@ lfs_bmapv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov, int blkcnt)
 			 * A regular call to VFS_VGET could deadlock
 			 * here.  Instead, we try an unlocked access.
 			 */
+			mutex_enter(&ufs_ihash_lock);
 			vp = ufs_ihashlookup(ump->um_dev, blkp->bi_inode);
 			if (vp != NULL && !(vp->v_iflag & VI_XLOCK)) {
 				ip = VTOI(vp);
+				mutex_enter(&vp->v_interlock);
+				mutex_exit(&ufs_ihash_lock);
 				if (lfs_vref(vp)) {
 					v_daddr = LFS_UNUSED_DADDR;
 					continue;
 				}
 				numrefed++;
 			} else {
+				mutex_exit(&ufs_ihash_lock);
 				/*
 				 * Don't VFS_VGET if we're being unmounted,
 				 * since we hold vfs_busy().
@@ -902,14 +909,14 @@ lfs_do_segclean(struct lfs *fs, unsigned long segnum)
 	if (fs->lfs_version > 1 && segnum == 0 &&
 	    fs->lfs_start < btofsb(fs, LFS_LABELPAD))
 		fs->lfs_avail -= btofsb(fs, LFS_LABELPAD) - fs->lfs_start;
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	fs->lfs_bfree += sup->su_nsums * btofsb(fs, fs->lfs_sumsize) +
 		btofsb(fs, sup->su_ninos * fs->lfs_ibsize);
 	fs->lfs_dmeta -= sup->su_nsums * btofsb(fs, fs->lfs_sumsize) +
 		btofsb(fs, sup->su_ninos * fs->lfs_ibsize);
 	if (fs->lfs_dmeta < 0)
 		fs->lfs_dmeta = 0;
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 	sup->su_flags &= ~SEGUSE_DIRTY;
 	LFS_WRITESEGENTRY(sup, fs, segnum, bp);
 
@@ -918,10 +925,10 @@ lfs_do_segclean(struct lfs *fs, unsigned long segnum)
 	--cip->dirty;
 	fs->lfs_nclean = cip->clean;
 	cip->bfree = fs->lfs_bfree;
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	cip->avail = fs->lfs_avail - fs->lfs_ravail - fs->lfs_favail;
 	wakeup(&fs->lfs_avail);
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 	(void) LFS_BWRITE_LOG(bp);
 
 	if (lfs_dostats)
@@ -1008,21 +1015,29 @@ extern kmutex_t ufs_hashlock;
 int
 lfs_fasthashget(dev_t dev, ino_t ino, struct vnode **vpp)
 {
-	if ((*vpp = ufs_ihashlookup(dev, ino)) != NULL) {
-		if ((*vpp)->v_iflag & VI_XLOCK) {
+	struct vnode *vp;
+
+	mutex_enter(&ufs_ihash_lock);
+	if ((vp = ufs_ihashlookup(dev, ino)) != NULL) {
+		mutex_enter(&vp->v_interlock);
+		mutex_exit(&ufs_ihash_lock);
+		if (vp->v_iflag & VI_XLOCK) {
 			DLOG((DLOG_CLEAN, "lfs_fastvget: ino %d VI_XLOCK\n",
 			      ino));
 			lfs_stats.clean_vnlocked++;
+			mutex_exit(&vp->v_interlock);
 			return EAGAIN;
 		}
-		if (lfs_vref(*vpp)) {
+		if (lfs_vref(vp)) {
 			DLOG((DLOG_CLEAN, "lfs_fastvget: lfs_vref failed"
 			      " for ino %d\n", ino));
 			lfs_stats.clean_inlocked++;
 			return EAGAIN;
 		}
-	} else
-		*vpp = NULL;
+	} else {
+		mutex_exit(&ufs_ihash_lock);
+	}
+	*vpp = vp;
 
 	return (0);
 }
@@ -1048,12 +1063,12 @@ lfs_fastvget(struct mount *mp, ino_t ino, daddr_t daddr, struct vnode **vpp,
 	 * Wait until the filesystem is fully mounted before allowing vget
 	 * to complete.	 This prevents possible problems with roll-forward.
 	 */
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	while (fs->lfs_flags & LFS_NOTYET) {
-		ltsleep(&fs->lfs_flags, PRIBIO+1, "lfs_fnotyet", 0,
-			&fs->lfs_interlock);
+		mtsleep(&fs->lfs_flags, PRIBIO+1, "lfs_fnotyet", 0,
+			&lfs_lock);
 	}
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 
 	/*
 	 * This is playing fast and loose.  Someone may have the inode
@@ -1191,9 +1206,9 @@ lfs_fakebuf(struct lfs *fs, struct vnode *vp, int lbn, size_t size, void *uaddr)
 	KDASSERT(bp->b_iodone == lfs_callback);
 
 #if 0
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	++fs->lfs_iocount;
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 #endif
 	bp->b_bufsize = size;
 	bp->b_bcount = size;
diff --git a/sys/ufs/lfs/lfs_vfsops.c b/sys/ufs/lfs/lfs_vfsops.c
index f9da548f8db5..4c4c22526104 100644
--- a/sys/ufs/lfs/lfs_vfsops.c
+++ b/sys/ufs/lfs/lfs_vfsops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: lfs_vfsops.c,v 1.251 2007/12/12 02:56:04 lukem Exp $	*/
+/*	$NetBSD: lfs_vfsops.c,v 1.252 2008/01/02 11:49:12 ad Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007 The NetBSD Foundation, Inc.
@@ -67,7 +67,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.251 2007/12/12 02:56:04 lukem Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.252 2008/01/02 11:49:12 ad Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_lfs.h"
@@ -204,10 +204,10 @@ lfs_writerd(void *arg)
 
 	lfs_writer_daemon = curproc->p_pid;
 
-	simple_lock(&lfs_subsys_lock);
+	mutex_enter(&lfs_lock);
 	for (;;) {
-		ltsleep(&lfs_writer_daemon, PVM | PNORELOCK, "lfswriter", hz/10,
-		    &lfs_subsys_lock);
+		mtsleep(&lfs_writer_daemon, PVM | PNORELOCK, "lfswriter", hz/10,
+		    &lfs_lock);
 
 		/*
 		 * Look through the list of LFSs to see if any of them
@@ -223,7 +223,7 @@ lfs_writerd(void *arg)
 			if (strncmp(mp->mnt_stat.f_fstypename, MOUNT_LFS,
 			    sizeof(mp->mnt_stat.f_fstypename)) == 0) {
 				fs = VFSTOUFS(mp)->um_lfs;
-				simple_lock(&fs->lfs_interlock);
+				mutex_enter(&lfs_lock);
 				fsflags = 0;
 				if ((fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) ||
 				     lfs_dirvcount > LFS_MAX_DIROP) &&
@@ -233,15 +233,15 @@ lfs_writerd(void *arg)
 					DLOG((DLOG_FLUSH, "lfs_writerd: pdflush set\n"));
 					fs->lfs_pdflush = 0;
 					lfs_flush_fs(fs, fsflags);
-					simple_unlock(&fs->lfs_interlock);
+					mutex_exit(&lfs_lock);
 				} else if (!TAILQ_EMPTY(&fs->lfs_pchainhd)) {
 					DLOG((DLOG_FLUSH, "lfs_writerd: pchain non-empty\n"));
-					simple_unlock(&fs->lfs_interlock);
+					mutex_exit(&lfs_lock);
 					lfs_writer_enter(fs, "wrdirop");
 					lfs_flush_pchain(fs);
 					lfs_writer_leave(fs);
 				} else
-					simple_unlock(&fs->lfs_interlock);
+					mutex_exit(&lfs_lock);
 			}
 
 			mutex_enter(&mountlist_lock);
@@ -253,7 +253,7 @@ lfs_writerd(void *arg)
 		/*
 		 * If global state wants a flush, flush everything.
 		 */
-		simple_lock(&lfs_subsys_lock);
+		mutex_enter(&lfs_lock);
 		loopcount = 0;
 		if (lfs_do_flush || locked_queue_count > LFS_MAX_BUFS ||
 			locked_queue_bytes > LFS_MAX_BYTES ||
@@ -303,7 +303,9 @@ lfs_init()
 #ifdef DEBUG
 	memset(lfs_log, 0, sizeof(lfs_log));
 #endif
-	simple_lock_init(&lfs_subsys_lock);
+	mutex_init(&lfs_lock, MUTEX_DEFAULT, IPL_NONE);
+	cv_init(&locked_queue_cv, "lfsbuf");
+	cv_init(&lfs_writing_cv, "lfsflush");
 }
 
 void
@@ -315,8 +317,10 @@ lfs_reinit()
 void
 lfs_done()
 {
-
 	ufs_done();
+	mutex_destroy(&lfs_lock);
+	cv_destroy(&locked_queue_cv);
+	cv_destroy(&lfs_writing_cv);
 	pool_destroy(&lfs_inode_pool);
 	pool_destroy(&lfs_dinode_pool);
 	pool_destroy(&lfs_inoext_pool);
@@ -735,10 +739,9 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
 	fs->lfs_pdflush = 0;
 	fs->lfs_sleepers = 0;
 	fs->lfs_pages = 0;
-	simple_lock_init(&fs->lfs_interlock);
 	rw_init(&fs->lfs_fraglock);
-	lockinit(&fs->lfs_iflock, PINOD, "lfs_iflock", 0, 0);
-	lockinit(&fs->lfs_stoplock, PINOD, "lfs_stoplock", 0, 0);
+	rw_init(&fs->lfs_iflock);
+	cv_init(&fs->lfs_stopcv, "lfsstop");
 
 	/* Set the file system readonly/modify bits. */
 	fs->lfs_ronly = ronly;
@@ -902,7 +905,7 @@ lfs_unmount(struct mount *mp, int mntflags)
 	struct ufsmount *ump;
 	struct lfs *fs;
 	int error, flags, ronly;
-	int s;
+	vnode_t *vp;
 
 	flags = 0;
 	if (mntflags & MNT_FORCE)
@@ -917,11 +920,11 @@ lfs_unmount(struct mount *mp, int mntflags)
 
 	/* wake up the cleaner so it can die */
 	lfs_wakeup_cleaner(fs);
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	while (fs->lfs_sleepers)
-		ltsleep(&fs->lfs_sleepers, PRIBIO + 1, "lfs_sleepers", 0,
-			&fs->lfs_interlock);
-	simple_unlock(&fs->lfs_interlock);
+		mtsleep(&fs->lfs_sleepers, PRIBIO + 1, "lfs_sleepers", 0,
+			&lfs_lock);
+	mutex_exit(&lfs_lock);
 
 #ifdef QUOTA
 	if (mp->mnt_flag & MNT_QUOTA) {
@@ -944,23 +947,23 @@ lfs_unmount(struct mount *mp, int mntflags)
 		return (error);
 	if ((error = VFS_SYNC(mp, 1, l->l_cred)) != 0)
 		return (error);
-	s = splbio();
-	if (LIST_FIRST(&fs->lfs_ivnode->v_dirtyblkhd))
+	vp = fs->lfs_ivnode;
+	mutex_enter(&vp->v_interlock);
+	if (LIST_FIRST(&vp->v_dirtyblkhd))
 		panic("lfs_unmount: still dirty blocks on ifile vnode");
-	splx(s);
+	mutex_exit(&vp->v_interlock);
 
 	/* Explicitly write the superblock, to update serial and pflags */
 	fs->lfs_pflags |= LFS_PF_CLEAN;
 	lfs_writesuper(fs, fs->lfs_sboffs[0]);
 	lfs_writesuper(fs, fs->lfs_sboffs[1]);
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	while (fs->lfs_iocount)
-		ltsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs_umount", 0,
-			&fs->lfs_interlock);
-	simple_unlock(&fs->lfs_interlock);
+		mtsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs_umount", 0,
+			&lfs_lock);
+	mutex_exit(&lfs_lock);
 
 	/* Finish with the Ifile, now that we're done with it */
-	vrele(fs->lfs_ivnode);
 	vgone(fs->lfs_ivnode);
 
 	ronly = !fs->lfs_ronly;
@@ -982,7 +985,9 @@ lfs_unmount(struct mount *mp, int mntflags)
 	free(fs->lfs_suflags[1], M_SEGMENT);
 	free(fs->lfs_suflags, M_SEGMENT);
 	lfs_free_resblks(fs);
+	cv_destroy(&fs->lfs_stopcv);
 	rw_destroy(&fs->lfs_fraglock);
+	rw_destroy(&fs->lfs_iflock);
 	free(fs, M_UFSMNT);
 	free(ump, M_UFSMNT);
 
@@ -1058,12 +1063,12 @@ lfs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
 	 * XXX than a segment and lfs_nextseg is close to the end of
 	 * XXX the log, we'll likely block.
 	 */
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	if (fs->lfs_nowrap && fs->lfs_nextseg < fs->lfs_curseg) {
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 		return 0;
 	}
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 
 	lfs_writer_enter(fs, "lfs_dirops");
 
@@ -1109,11 +1114,11 @@ lfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
 	 * If the filesystem is not completely mounted yet, suspend
 	 * any access requests (wait for roll-forward to complete).
 	 */
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	while ((fs->lfs_flags & LFS_NOTYET) && curproc->p_pid != fs->lfs_rfpid)
-		ltsleep(&fs->lfs_flags, PRIBIO+1, "lfs_notyet", 0,
-			&fs->lfs_interlock);
-	simple_unlock(&fs->lfs_interlock);
+		mtsleep(&fs->lfs_flags, PRIBIO+1, "lfs_notyet", 0,
+			&lfs_lock);
+	mutex_exit(&lfs_lock);
 
 retry:
 	if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL)
@@ -1201,7 +1206,7 @@ retry:
 #ifdef DEBUG
 			/* If the seglock is held look at the bpp to see
 			   what is there anyway */
-			simple_lock(&fs->lfs_interlock);
+			mutex_enter(&lfs_lock);
 			if (fs->lfs_seglock > 0) {
 				struct buf **bpp;
 				struct ufs1_dinode *dp;
@@ -1222,18 +1227,18 @@ retry:
 					}
 				}
 			}
-			simple_unlock(&fs->lfs_interlock);
+			mutex_exit(&lfs_lock);
 #endif /* DEBUG */
 			panic("lfs_vget: dinode not found");
 		}
-		simple_lock(&fs->lfs_interlock);
+		mutex_enter(&lfs_lock);
 		if (fs->lfs_iocount) {
 			DLOG((DLOG_VNODE, "lfs_vget: dinode %d not found, retrying...\n", ino));
-			(void)ltsleep(&fs->lfs_iocount, PRIBIO + 1,
-				      "lfs ifind", 1, &fs->lfs_interlock);
+			(void)mtsleep(&fs->lfs_iocount, PRIBIO + 1,
+				      "lfs ifind", 1, &lfs_lock);
 		} else
 			retries = LFS_IFIND_RETRIES;
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 		goto again;
 	}
 	*ip->i_din.ffs1_din = *dip;
@@ -1264,6 +1269,7 @@ lfs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
 	IFILE *ifp;
 	int32_t daddr;
 	struct lfs *fs;
+	vnode_t *vp;
 
 	if (fhp->fid_len != sizeof(struct lfid))
 		return EINVAL;
@@ -1281,7 +1287,10 @@ lfs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
 	     fs->lfs_cleansz - fs->lfs_segtabsz) * fs->lfs_ifpb)
 		return ESTALE;
 
-	if (ufs_ihashlookup(VFSTOUFS(mp)->um_dev, lfh.lfid_ino) == NULLVP) {
+	mutex_enter(&ufs_ihash_lock);
+	vp = ufs_ihashlookup(VFSTOUFS(mp)->um_dev, lfh.lfid_ino);
+	mutex_exit(&ufs_ihash_lock);
+	if (vp == NULL) {
 		LFS_IENTRY(ifp, fs, lfh.lfid_ino, bp);
 		daddr = ifp->if_daddr;
 		brelse(bp, 0);
@@ -1525,7 +1534,7 @@ static int
 lfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages,
     int flags)
 {
-	int i, s, error, run, haveeof = 0;
+	int i, error, run, haveeof = 0;
 	int fs_bshift;
 	vaddr_t kva;
 	off_t eof, offset, startoffset = 0;
@@ -1607,10 +1616,10 @@ lfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages,
 			KASSERT(!(pgs[i]->flags & PG_PAGEOUT));
 			pgs[i]->flags &= ~PG_DELWRI;
 			pgs[i]->flags |= PG_PAGEOUT;
-			uvmexp.paging++;
-			uvm_lock_pageq();
+			uvm_pageout_start(1);
+			mutex_enter(&uvm_pageqlock);
 			uvm_pageunwire(pgs[i]);
-			uvm_unlock_pageq();
+			mutex_exit(&uvm_pageqlock);
 		}
 	}
 
@@ -1658,21 +1667,18 @@ lfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages,
 				     UVMPAGER_MAPIN_WAITOK);
 	}
 
-	s = splbio();
-	simple_lock(&global_v_numoutput_slock);
+	mutex_enter(&vp->v_interlock);
 	vp->v_numoutput += 2; /* one for biodone, one for aiodone */
-	simple_unlock(&global_v_numoutput_slock);
-	splx(s);
+	mutex_exit(&vp->v_interlock);
 
-	mbp = getiobuf();
+	mbp = getiobuf(NULL, true);
 	UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x",
 	    vp, mbp, vp->v_numoutput, bytes);
 	mbp->b_bufsize = npages << PAGE_SHIFT;
 	mbp->b_data = (void *)kva;
 	mbp->b_resid = mbp->b_bcount = bytes;
-	mbp->b_flags = B_BUSY|B_WRITE|B_AGE|B_CALL;
+	mbp->b_cflags = BC_BUSY|BC_AGE;
 	mbp->b_iodone = uvm_aio_biodone;
-	mbp->b_vp = vp;
 
 	bp = NULL;
 	for (offset = startoffset;
@@ -1722,27 +1728,26 @@ lfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages,
 		if (offset == startoffset && iobytes == bytes) {
 			bp = mbp;
 			/* correct overcount if there is no second buffer */
-			s = splbio();
-			simple_lock(&global_v_numoutput_slock);
+			mutex_enter(&vp->v_interlock);
 			--vp->v_numoutput;
-			simple_unlock(&global_v_numoutput_slock);
-			splx(s);
+			mutex_exit(&vp->v_interlock);
 		} else {
-			bp = getiobuf();
+			bp = getiobuf(NULL, true);
 			UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",
 			    vp, bp, vp->v_numoutput, 0);
 			bp->b_data = (char *)kva +
 			    (vaddr_t)(offset - pg->offset);
 			bp->b_resid = bp->b_bcount = iobytes;
-			bp->b_flags = B_BUSY|B_WRITE|B_CALL;
+			bp->b_cflags = BC_BUSY;
 			bp->b_iodone = uvm_aio_biodone1;
 		}
 
 		/* XXX This is silly ... is this necessary? */
-		bp->b_vp = NULL;
-		s = splbio();
+		mutex_enter(&bufcache_lock);
+		mutex_enter(&vp->v_interlock);
 		bgetvp(vp, bp);
-		splx(s);
+		mutex_exit(&vp->v_interlock);
+		mutex_exit(&bufcache_lock);
 
 		bp->b_lblkno = lblkno(fs, offset);
 		bp->b_private = mbp;
@@ -1756,15 +1761,15 @@ lfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages,
 
 	if (skipbytes) {
 		UVMHIST_LOG(ubchist, "skipbytes %d", skipbytes, 0,0,0);
-		s = splbio();
+		mutex_enter(mbp->b_objlock);
 		if (error) {
 			mbp->b_error = error;
 		}
 		mbp->b_resid -= skipbytes;
+		mutex_exit(mbp->b_objlock);
 		if (mbp->b_resid == 0) {
 			biodone(mbp);
 		}
-		splx(s);
 	}
 	UVMHIST_LOG(ubchist, "returning 0", 0,0,0,0);
 	return (0);
@@ -1774,7 +1779,7 @@ lfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages,
 	 * We can't write the pages, for whatever reason.
 	 * Clean up after ourselves, and make the caller try again.
 	 */
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 
 	/* Tell why we're here, if we know */
 	if (ip->i_lfs_iflags & LFSI_NO_GOP_WRITE) {
@@ -1791,12 +1796,12 @@ lfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages,
 		DLOG((DLOG_PAGE, "lfs_gop_write: seglock not held\n"));
 	}
 
-	uvm_lock_pageq();
+	mutex_enter(&uvm_pageqlock);
 	for (i = 0; i < npages; i++) {
 		pg = pgs[i];
 
 		if (pg->flags & PG_PAGEOUT)
-			uvmexp.paging--;
+			uvm_pageout_done(1);
 		if (pg->flags & PG_DELWRI) {
 			uvm_pageunwire(pg);
 		}
@@ -1815,8 +1820,8 @@ lfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages,
 	}
 	/* uvm_pageunbusy takes care of PG_BUSY, PG_WANTED */
 	uvm_page_unbusy(pgs, npages);
-	uvm_unlock_pageq();
-	simple_unlock(&vp->v_interlock);
+	mutex_exit(&uvm_pageqlock);
+	mutex_exit(&vp->v_interlock);
 	return EAGAIN;
 }
 
@@ -1974,9 +1979,7 @@ lfs_resize_fs(struct lfs *fs, int newnsegs)
 	 * is holding Ifile buffers, so we get each one, to drain them.
 	 * (XXX this could be done better.)
 	 */
-	simple_lock(&fs->lfs_interlock);
-	lockmgr(&fs->lfs_iflock, LK_EXCLUSIVE, &fs->lfs_interlock);
-	simple_unlock(&fs->lfs_interlock);
+	rw_enter(&fs->lfs_iflock, RW_WRITER);
 	vn_lock(ivp, LK_EXCLUSIVE | LK_RETRY);
 	for (i = 0; i < ilast; i++) {
 		bread(ivp, i, fs->lfs_bsize, NOCRED, &bp);
@@ -2092,9 +2095,7 @@ lfs_resize_fs(struct lfs *fs, int newnsegs)
 
 	/* Let Ifile accesses proceed */
 	VOP_UNLOCK(ivp, 0);
-	simple_lock(&fs->lfs_interlock);
-	lockmgr(&fs->lfs_iflock, LK_RELEASE, &fs->lfs_interlock);
-	simple_unlock(&fs->lfs_interlock);
+	rw_exit(&fs->lfs_iflock);
 
     out:
 	lfs_segunlock(fs);
diff --git a/sys/ufs/lfs/lfs_vnops.c b/sys/ufs/lfs/lfs_vnops.c
index 16c3a2fceabe..2d9506f1b54c 100644
--- a/sys/ufs/lfs/lfs_vnops.c
+++ b/sys/ufs/lfs/lfs_vnops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: lfs_vnops.c,v 1.213 2007/11/26 19:02:32 pooka Exp $	*/
+/*	$NetBSD: lfs_vnops.c,v 1.214 2008/01/02 11:49:12 ad Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@@ -67,7 +67,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.213 2007/11/26 19:02:32 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.214 2008/01/02 11:49:12 ad Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_compat_netbsd.h"
@@ -293,16 +293,14 @@ lfs_fsync(void *v)
 	 */
 	if (ap->a_flags & FSYNC_LAZY) {
 		if (lfs_ignore_lazy_sync == 0) {
-			simple_lock(&fs->lfs_interlock);
+			mutex_enter(&lfs_lock);
 			if (!(ip->i_flags & IN_PAGING)) {
 				ip->i_flags |= IN_PAGING;
 				TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip,
 						  i_lfs_pchain);
 			}
-			simple_unlock(&fs->lfs_interlock);
-			simple_lock(&lfs_subsys_lock);
 			wakeup(&lfs_writer_daemon);
-			simple_unlock(&lfs_subsys_lock);
+			mutex_exit(&lfs_lock);
 		}
 		return 0;
 	}
@@ -318,15 +316,15 @@ lfs_fsync(void *v)
 
 	wait = (ap->a_flags & FSYNC_WAIT);
 	do {
-		simple_lock(&vp->v_interlock);
+		mutex_enter(&vp->v_interlock);
 		error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo),
 				     round_page(ap->a_offhi),
 				     PGO_CLEANIT | (wait ? PGO_SYNCIO : 0));
 		if (error == EAGAIN) {
-			simple_lock(&fs->lfs_interlock);
-			ltsleep(&fs->lfs_avail, PCATCH | PUSER, "lfs_fsync",
-				hz / 100 + 1, &fs->lfs_interlock);
-			simple_unlock(&fs->lfs_interlock);
+			mutex_enter(&lfs_lock);
+			mtsleep(&fs->lfs_avail, PCATCH | PUSER, "lfs_fsync",
+				hz / 100 + 1, &lfs_lock);
+			mutex_exit(&lfs_lock);
 		}
 	} while (error == EAGAIN);
 	if (error)
@@ -365,7 +363,9 @@ lfs_inactive(void *v)
 	 * Streamline this process by not giving it more dirty blocks.
 	 */
 	if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM) {
+		mutex_enter(&lfs_lock);
 		LFS_CLR_UINO(VTOI(ap->a_vp), IN_ALLMOD);
+		mutex_exit(&lfs_lock);
 		VOP_UNLOCK(ap->a_vp, 0);
 		return 0;
 	}
@@ -410,46 +410,43 @@ lfs_set_dirop(struct vnode *dvp, struct vnode *vp)
 	if ((error = lfs_reserve(fs, dvp, vp, LFS_NRESERVE(fs))) != 0)
 		return (error);
 
-  restart:
-	simple_lock(&fs->lfs_interlock);
+    restart:
+	mutex_enter(&lfs_lock);
 	if (fs->lfs_dirops == 0) {
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 		lfs_check(dvp, LFS_UNUSED_LBN, 0);
-		simple_lock(&fs->lfs_interlock);
+		mutex_enter(&lfs_lock);
 	}
 	while (fs->lfs_writer) {
-		error = ltsleep(&fs->lfs_dirops, (PRIBIO + 1) | PCATCH,
-				"lfs_sdirop", 0, &fs->lfs_interlock);
+		error = mtsleep(&fs->lfs_dirops, (PRIBIO + 1) | PCATCH,
+		    "lfs_sdirop", 0, &lfs_lock);
 		if (error == EINTR) {
-			simple_unlock(&fs->lfs_interlock);
+			mutex_exit(&lfs_lock);
 			goto unreserve;
 		}
 	}
-	simple_lock(&lfs_subsys_lock);
 	if (lfs_dirvcount > LFS_MAX_DIROP && fs->lfs_dirops == 0) {
 		wakeup(&lfs_writer_daemon);
-		simple_unlock(&lfs_subsys_lock);
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 		preempt();
 		goto restart;
 	}
 
 	if (lfs_dirvcount > LFS_MAX_DIROP) {
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 		DLOG((DLOG_DIROP, "lfs_set_dirop: sleeping with dirops=%d, "
 		      "dirvcount=%d\n", fs->lfs_dirops, lfs_dirvcount));
-		if ((error = ltsleep(&lfs_dirvcount,
-				     PCATCH | PUSER | PNORELOCK, "lfs_maxdirop", 0,
-				     &lfs_subsys_lock)) != 0) {
+		if ((error = mtsleep(&lfs_dirvcount,
+		    PCATCH | PUSER | PNORELOCK, "lfs_maxdirop", 0,
+		    &lfs_lock)) != 0) {
 			goto unreserve;
 		}
 		goto restart;
 	}
-	simple_unlock(&lfs_subsys_lock);
 
 	++fs->lfs_dirops;
 	fs->lfs_doifile = 1;
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 
 	/* Hold a reference so SET_ENDOP will be happy */
 	vref(dvp);
@@ -501,7 +498,7 @@ lfs_set_dirop_create(struct vnode *dvp, struct vnode **vpp)
 
 #define	SET_ENDOP_BASE(fs, dvp, str)					\
 	do {								\
-		simple_lock(&(fs)->lfs_interlock);			\
+		mutex_enter(&lfs_lock);				\
 		--(fs)->lfs_dirops;					\
 		if (!(fs)->lfs_dirops) {				\
 			if ((fs)->lfs_nadirop) {			\
@@ -510,10 +507,10 @@ lfs_set_dirop_create(struct vnode *dvp, struct vnode **vpp)
 					(fs)->lfs_nadirop);		\
 			}						\
 			wakeup(&(fs)->lfs_writer);			\
-			simple_unlock(&(fs)->lfs_interlock);		\
+			mutex_exit(&lfs_lock);				\
 			lfs_check((dvp), LFS_UNUSED_LBN, 0);		\
 		} else							\
-			simple_unlock(&(fs)->lfs_interlock);		\
+			mutex_exit(&lfs_lock);				\
 	} while(0)
 #define SET_ENDOP_CREATE(fs, dvp, nvpp, str)				\
 	do {								\
@@ -548,14 +545,13 @@ lfs_mark_vnode(struct vnode *vp)
 	struct inode *ip = VTOI(vp);
 	struct lfs *fs = ip->i_lfs;
 
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	if (!(ip->i_flag & IN_ADIROP)) {
 		if (!(vp->v_uflag & VU_DIROP)) {
+			mutex_enter(&vp->v_interlock);
 			(void)lfs_vref(vp);
-			simple_lock(&lfs_subsys_lock);
 			++lfs_dirvcount;
 			++fs->lfs_dirvcount;
-			simple_unlock(&lfs_subsys_lock);
 			TAILQ_INSERT_TAIL(&fs->lfs_dchainhd, ip, i_lfs_dchain);
 			vp->v_uflag |= VU_DIROP;
 		}
@@ -563,7 +559,7 @@ lfs_mark_vnode(struct vnode *vp)
 		ip->i_flag |= IN_ADIROP;
 	} else
 		KASSERT(vp->v_uflag & VU_DIROP);
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 }
 
 void
@@ -573,9 +569,9 @@ lfs_unmark_vnode(struct vnode *vp)
 
 	if (ip && (ip->i_flag & IN_ADIROP)) {
 		KASSERT(vp->v_uflag & VU_DIROP);
-		simple_lock(&ip->i_lfs->lfs_interlock);
+		mutex_enter(&lfs_lock);
 		--ip->i_lfs->lfs_nadirop;
-		simple_unlock(&ip->i_lfs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 		ip->i_flag &= ~IN_ADIROP;
 	}
 }
@@ -668,7 +664,6 @@ lfs_mknod(void *v)
 	/* Used to be vput, but that causes us to call VOP_INACTIVE twice. */
 
 	VOP_UNLOCK(*vpp, 0);
-	lfs_vunref(*vpp);
 	(*vpp)->v_type = VNON;
 	vgone(*vpp);
 	error = VFS_VGET(mp, ino, vpp);
@@ -971,10 +966,11 @@ lfs_setattr(void *v)
 static int
 lfs_wrapgo(struct lfs *fs, struct inode *ip, int waitfor)
 {
-	if (lockstatus(&fs->lfs_stoplock) != LK_EXCLUSIVE)
+	if (fs->lfs_stoplwp != curlwp)
 		return EBUSY;
 
-	lockmgr(&fs->lfs_stoplock, LK_RELEASE, &fs->lfs_interlock);
+	fs->lfs_stoplwp = NULL;
+	cv_signal(&fs->lfs_stopcv);
 
 	KASSERT(fs->lfs_nowrap > 0);
 	if (fs->lfs_nowrap <= 0) {
@@ -987,8 +983,8 @@ lfs_wrapgo(struct lfs *fs, struct inode *ip, int waitfor)
 		lfs_wakeup_cleaner(fs);
 	}
 	if (waitfor) {
-		ltsleep(&fs->lfs_nextseg, PCATCH | PUSER,
-			"segment", 0, &fs->lfs_interlock);
+		mtsleep(&fs->lfs_nextseg, PCATCH | PUSER, "segment",
+		    0, &lfs_lock);
 	}
 
 	return 0;
@@ -1011,11 +1007,11 @@ lfs_close(void *v)
 	struct lfs *fs = ip->i_lfs;
 
 	if ((ip->i_number == ROOTINO || ip->i_number == LFS_IFILE_INUM) &&
-	    lockstatus(&fs->lfs_stoplock) == LK_EXCLUSIVE) {
-		simple_lock(&fs->lfs_interlock);
+	    fs->lfs_stoplwp == curlwp) {
+		mutex_enter(&lfs_lock);
 		log(LOG_NOTICE, "lfs_close: releasing log wrap control\n");
 		lfs_wrapgo(fs, ip, 0);
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 	}
 
 	if (vp == ip->i_lfs->lfs_ivnode &&
@@ -1093,7 +1089,9 @@ lfs_reclaim(void *v)
 
 	KASSERT(ip->i_nlink == ip->i_ffs_effnlink);
 
+	mutex_enter(&lfs_lock);
 	LFS_CLR_UINO(ip, IN_ALLMOD);
+	mutex_exit(&lfs_lock);
 	if ((error = ufs_reclaim(vp)))
 		return (error);
 
@@ -1101,7 +1099,7 @@ lfs_reclaim(void *v)
 	 * Take us off the paging and/or dirop queues if we were on them.
 	 * We shouldn't be on them.
 	 */
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	if (ip->i_flags & IN_PAGING) {
 		log(LOG_WARNING, "%s: reclaimed vnode is IN_PAGING\n",
 		    fs->lfs_fsmnt);
@@ -1113,7 +1111,7 @@ lfs_reclaim(void *v)
 		vp->v_uflag &= ~VU_DIROP;
 		TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
 	}
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 
 	pool_put(&lfs_dinode_pool, ip->i_din.ffs1_din);
 	lfs_deregister_all(vp);
@@ -1167,6 +1165,7 @@ lfs_strategy(void *v)
 				 NULL);
 		if (error) {
 			bp->b_error = error;
+			bp->b_resid = bp->b_bcount;
 			biodone(bp);
 			return (error);
 		}
@@ -1174,14 +1173,15 @@ lfs_strategy(void *v)
 			clrbuf(bp);
 	}
 	if ((long)bp->b_blkno < 0) { /* block is not on disk */
+		bp->b_resid = bp->b_bcount;
 		biodone(bp);
 		return (0);
 	}
 
 	slept = 1;
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	while (slept && fs->lfs_seglock) {
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 		/*
 		 * Look through list of intervals.
 		 * There will only be intervals to look through
@@ -1205,29 +1205,29 @@ lfs_strategy(void *v)
 				DLOG((DLOG_CLEAN,
 				      "lfs_strategy: sleeping on ino %d lbn %"
 				      PRId64 "\n", ip->i_number, bp->b_lblkno));
-				simple_lock(&fs->lfs_interlock);
+				mutex_enter(&lfs_lock);
 				if (LFS_SEGLOCK_HELD(fs) && fs->lfs_iocount) {
 					/* Cleaner can't wait for itself */
-					ltsleep(&fs->lfs_iocount,
+					mtsleep(&fs->lfs_iocount,
 						(PRIBIO + 1) | PNORELOCK,
 						"clean2", 0,
-						&fs->lfs_interlock);
+						&lfs_lock);
 					slept = 1;
 					break;
 				} else if (fs->lfs_seglock) {
-					ltsleep(&fs->lfs_seglock,
+					mtsleep(&fs->lfs_seglock,
 						(PRIBIO + 1) | PNORELOCK,
 						"clean1", 0,
-						&fs->lfs_interlock);
+						&lfs_lock);
 					slept = 1;
 					break;
 				}
-				simple_unlock(&fs->lfs_interlock);
+				mutex_exit(&lfs_lock);
 			}
 		}
-		simple_lock(&fs->lfs_interlock);
+		mutex_enter(&lfs_lock);
 	}
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 
 	vp = ip->i_devvp;
 	VOP_STRATEGY(vp, bp);
@@ -1249,12 +1249,12 @@ lfs_flush_dirops(struct lfs *fs)
 	if (fs->lfs_ronly)
 		return;
 
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	if (TAILQ_FIRST(&fs->lfs_dchainhd) == NULL) {
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 		return;
 	} else
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 
 	if (lfs_dostats)
 		++lfs_stats.flush_invoked;
@@ -1281,10 +1281,10 @@ lfs_flush_dirops(struct lfs *fs)
 	 * no dirops are active.
 	 *
 	 */
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) {
 		nip = TAILQ_NEXT(ip, i_lfs_dchain);
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 		vp = ITOV(ip);
 
 		KASSERT((ip->i_flag & IN_ADIROP) == 0);
@@ -1299,8 +1299,8 @@ lfs_flush_dirops(struct lfs *fs)
 		 * make sure that we don't clear IN_MODIFIED
 		 * unnecessarily.
 		 */
-		if (vp->v_iflag & (VI_XLOCK | VI_FREEING)) {
-			simple_lock(&fs->lfs_interlock);
+		if (vp->v_iflag & VI_XLOCK) {
+			mutex_enter(&lfs_lock);
 			continue;
 		}
 		waslocked = VOP_ISLOCKED(vp);
@@ -1309,16 +1309,18 @@ lfs_flush_dirops(struct lfs *fs)
 			lfs_writefile(fs, sp, vp);
 			if (!VPISEMPTY(vp) && !WRITEINPROG(vp) &&
 			    !(ip->i_flag & IN_ALLMOD)) {
+			    	mutex_enter(&lfs_lock);
 				LFS_SET_UINO(ip, IN_MODIFIED);
+			    	mutex_exit(&lfs_lock);
 			}
 		}
 		KDASSERT(ip->i_number != LFS_IFILE_INUM);
 		(void) lfs_writeinode(fs, sp, ip);
+		mutex_enter(&lfs_lock);
 		if (waslocked == LK_EXCLOTHER)
 			LFS_SET_UINO(ip, IN_MODIFIED);
-		simple_lock(&fs->lfs_interlock);
 	}
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 	/* We've written all the dirops there are */
 	((SEGSUM *)(sp->segsum))->ss_flags &= ~(SS_CONT);
 	lfs_finalize_fs_seguse(fs);
@@ -1347,12 +1349,12 @@ lfs_flush_pchain(struct lfs *fs)
 	if (fs->lfs_ronly)
 		return;
 
-	simple_lock(&fs->lfs_interlock);
+	mutex_enter(&lfs_lock);
 	if (TAILQ_FIRST(&fs->lfs_pchainhd) == NULL) {
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 		return;
 	} else
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 
 	/* Get dirops out of the way */
 	lfs_flush_dirops(fs);
@@ -1373,8 +1375,8 @@ lfs_flush_pchain(struct lfs *fs)
 	 * We're very conservative about what we write; we want to be
 	 * fast and async.
 	 */
-	simple_lock(&fs->lfs_interlock);
-  top:
+	mutex_enter(&lfs_lock);
+    top:
 	for (ip = TAILQ_FIRST(&fs->lfs_pchainhd); ip != NULL; ip = nip) {
 		nip = TAILQ_NEXT(ip, i_lfs_pchain);
 		vp = ITOV(ip);
@@ -1382,24 +1384,31 @@ lfs_flush_pchain(struct lfs *fs)
 		if (!(ip->i_flags & IN_PAGING))
 			goto top;
 
-		if ((vp->v_iflag|vp->v_uflag) & (VI_XLOCK|VU_DIROP))
+		mutex_enter(&vp->v_interlock);
+		if ((vp->v_iflag & VI_XLOCK) || (vp->v_uflag & VU_DIROP) != 0) {
+			mutex_exit(&vp->v_interlock);
 			continue;
-		if (vp->v_type != VREG)
+		}
+		if (vp->v_type != VREG) {
+			mutex_exit(&vp->v_interlock);
 			continue;
+		}
 		if (lfs_vref(vp))
 			continue;
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 
 		if (VOP_ISLOCKED(vp)) {
 			lfs_vunref(vp);
-			simple_lock(&fs->lfs_interlock);
+			mutex_enter(&lfs_lock);
 			continue;
 		}
 
 		error = lfs_writefile(fs, sp, vp);
 		if (!VPISEMPTY(vp) && !WRITEINPROG(vp) &&
 		    !(ip->i_flag & IN_ALLMOD)) {
+		    	mutex_enter(&lfs_lock);
 			LFS_SET_UINO(ip, IN_MODIFIED);
+		    	mutex_exit(&lfs_lock);
 		}
 		KDASSERT(ip->i_number != LFS_IFILE_INUM);
 		(void) lfs_writeinode(fs, sp, ip);
@@ -1408,12 +1417,12 @@ lfs_flush_pchain(struct lfs *fs)
 
 		if (error == EAGAIN) {
 			lfs_writeseg(fs, sp);
-			simple_lock(&fs->lfs_interlock);
+			mutex_enter(&lfs_lock);
 			break;
 		}
-		simple_lock(&fs->lfs_interlock);
+		mutex_enter(&lfs_lock);
 	}
-	simple_unlock(&fs->lfs_interlock);
+	mutex_exit(&lfs_lock);
 	(void) lfs_writeseg(fs, sp);
 	lfs_segunlock(fs);
 }
@@ -1470,201 +1479,206 @@ lfs_fcntl(void *v)
 	switch (ap->a_command) {
 	    case LFCNSEGWAITALL:
 	    case LFCNSEGWAITALL_COMPAT:
-		    fsidp = NULL;
-		    /* FALLSTHROUGH */
+		fsidp = NULL;
+		/* FALLSTHROUGH */
 	    case LFCNSEGWAIT:
 	    case LFCNSEGWAIT_COMPAT:
-		    tvp = (struct timeval *)ap->a_data;
-		    simple_lock(&fs->lfs_interlock);
-		    ++fs->lfs_sleepers;
-		    simple_unlock(&fs->lfs_interlock);
+		tvp = (struct timeval *)ap->a_data;
+		mutex_enter(&lfs_lock);
+		++fs->lfs_sleepers;
+		mutex_exit(&lfs_lock);
 
-		    error = lfs_segwait(fsidp, tvp);
+		error = lfs_segwait(fsidp, tvp);
 
-		    simple_lock(&fs->lfs_interlock);
-		    if (--fs->lfs_sleepers == 0)
-			    wakeup(&fs->lfs_sleepers);
-		    simple_unlock(&fs->lfs_interlock);
-		    return error;
+		mutex_enter(&lfs_lock);
+		if (--fs->lfs_sleepers == 0)
+			wakeup(&fs->lfs_sleepers);
+		mutex_exit(&lfs_lock);
+		return error;
 
 	    case LFCNBMAPV:
 	    case LFCNMARKV:
-		    blkvp = *(struct lfs_fcntl_markv *)ap->a_data;
+		blkvp = *(struct lfs_fcntl_markv *)ap->a_data;
 
-		    blkcnt = blkvp.blkcnt;
-		    if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT)
-			    return (EINVAL);
-		    blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV);
-		    if ((error = copyin(blkvp.blkiov, blkiov,
-					blkcnt * sizeof(BLOCK_INFO))) != 0) {
-			    lfs_free(fs, blkiov, LFS_NB_BLKIOV);
-			    return error;
-		    }
+		blkcnt = blkvp.blkcnt;
+		if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT)
+			return (EINVAL);
+		blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV);
+		if ((error = copyin(blkvp.blkiov, blkiov,
+		     blkcnt * sizeof(BLOCK_INFO))) != 0) {
+			lfs_free(fs, blkiov, LFS_NB_BLKIOV);
+			return error;
+		}
 
-		    simple_lock(&fs->lfs_interlock);
-		    ++fs->lfs_sleepers;
-		    simple_unlock(&fs->lfs_interlock);
-		    if (ap->a_command == LFCNBMAPV)
-			    error = lfs_bmapv(l->l_proc, fsidp, blkiov, blkcnt);
-		    else /* LFCNMARKV */
-			    error = lfs_markv(l->l_proc, fsidp, blkiov, blkcnt);
-		    if (error == 0)
-			    error = copyout(blkiov, blkvp.blkiov,
-					    blkcnt * sizeof(BLOCK_INFO));
-		    simple_lock(&fs->lfs_interlock);
-		    if (--fs->lfs_sleepers == 0)
-			    wakeup(&fs->lfs_sleepers);
-		    simple_unlock(&fs->lfs_interlock);
-		    lfs_free(fs, blkiov, LFS_NB_BLKIOV);
-		    return error;
+		mutex_enter(&lfs_lock);
+		++fs->lfs_sleepers;
+		mutex_exit(&lfs_lock);
+		if (ap->a_command == LFCNBMAPV)
+			error = lfs_bmapv(l->l_proc, fsidp, blkiov, blkcnt);
+		else /* LFCNMARKV */
+			error = lfs_markv(l->l_proc, fsidp, blkiov, blkcnt);
+		if (error == 0)
+			error = copyout(blkiov, blkvp.blkiov,
+					blkcnt * sizeof(BLOCK_INFO));
+		mutex_enter(&lfs_lock);
+		if (--fs->lfs_sleepers == 0)
+			wakeup(&fs->lfs_sleepers);
+		mutex_exit(&lfs_lock);
+		lfs_free(fs, blkiov, LFS_NB_BLKIOV);
+		return error;
 
 	    case LFCNRECLAIM:
-		    /*
-		     * Flush dirops and write Ifile, allowing empty segments
-		     * to be immediately reclaimed.
-		     */
-		    lfs_writer_enter(fs, "pndirop");
-		    off = fs->lfs_offset;
-		    lfs_seglock(fs, SEGM_FORCE_CKP | SEGM_CKP);
-		    lfs_flush_dirops(fs);
-		    LFS_CLEANERINFO(cip, fs, bp);
-		    oclean = cip->clean;
-		    LFS_SYNC_CLEANERINFO(cip, fs, bp, 1);
-		    lfs_segwrite(ap->a_vp->v_mount, SEGM_FORCE_CKP);
-		    fs->lfs_sp->seg_flags |= SEGM_PROT;
-		    lfs_segunlock(fs);
-		    lfs_writer_leave(fs);
+		/*
+		 * Flush dirops and write Ifile, allowing empty segments
+		 * to be immediately reclaimed.
+		 */
+		lfs_writer_enter(fs, "pndirop");
+		off = fs->lfs_offset;
+		lfs_seglock(fs, SEGM_FORCE_CKP | SEGM_CKP);
+		lfs_flush_dirops(fs);
+		LFS_CLEANERINFO(cip, fs, bp);
+		oclean = cip->clean;
+		LFS_SYNC_CLEANERINFO(cip, fs, bp, 1);
+		lfs_segwrite(ap->a_vp->v_mount, SEGM_FORCE_CKP);
+		fs->lfs_sp->seg_flags |= SEGM_PROT;
+		lfs_segunlock(fs);
+		lfs_writer_leave(fs);
 
 #ifdef DEBUG
-		    LFS_CLEANERINFO(cip, fs, bp);
-		    DLOG((DLOG_CLEAN, "lfs_fcntl: reclaim wrote %" PRId64
-			  " blocks, cleaned %" PRId32 " segments (activesb %d)\n",
-			  fs->lfs_offset - off, cip->clean - oclean,
-			  fs->lfs_activesb));
-		    LFS_SYNC_CLEANERINFO(cip, fs, bp, 0);
+		LFS_CLEANERINFO(cip, fs, bp);
+		DLOG((DLOG_CLEAN, "lfs_fcntl: reclaim wrote %" PRId64
+		      " blocks, cleaned %" PRId32 " segments (activesb %d)\n",
+		      fs->lfs_offset - off, cip->clean - oclean,
+		      fs->lfs_activesb));
+		LFS_SYNC_CLEANERINFO(cip, fs, bp, 0);
 #endif
 
-		    return 0;
+		return 0;
 
 #ifdef COMPAT_30
 	    case LFCNIFILEFH_COMPAT:
-		    /* Return the filehandle of the Ifile */
-		    if ((error = kauth_authorize_generic(l->l_cred,
-							 KAUTH_GENERIC_ISSUSER, NULL)) != 0)
-			    return (error);
-		    fhp = (struct fhandle *)ap->a_data;
-		    fhp->fh_fsid = *fsidp;
-		    fh_size = 16;	/* former VFS_MAXFIDSIZ */
-		    return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size);
+		/* Return the filehandle of the Ifile */
+		if ((error = kauth_authorize_generic(l->l_cred,
+		    KAUTH_GENERIC_ISSUSER, NULL)) != 0)
+			return (error);
+		fhp = (struct fhandle *)ap->a_data;
+		fhp->fh_fsid = *fsidp;
+		fh_size = 16;	/* former VFS_MAXFIDSIZ */
+		return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size);
 #endif
 
 	    case LFCNIFILEFH_COMPAT2:
 	    case LFCNIFILEFH:
-		    /* Return the filehandle of the Ifile */
-		    fhp = (struct fhandle *)ap->a_data;
-		    fhp->fh_fsid = *fsidp;
-		    fh_size = sizeof(struct lfs_fhandle) -
-			    offsetof(fhandle_t, fh_fid);
-		    return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size);
+		/* Return the filehandle of the Ifile */
+		fhp = (struct fhandle *)ap->a_data;
+		fhp->fh_fsid = *fsidp;
+		fh_size = sizeof(struct lfs_fhandle) -
+		    offsetof(fhandle_t, fh_fid);
+		return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size);
 
 	    case LFCNREWIND:
-		    /* Move lfs_offset to the lowest-numbered segment */
-		    return lfs_rewind(fs, *(int *)ap->a_data);
+		/* Move lfs_offset to the lowest-numbered segment */
+		return lfs_rewind(fs, *(int *)ap->a_data);
 
 	    case LFCNINVAL:
-		    /* Mark a segment SEGUSE_INVAL */
-		    LFS_SEGENTRY(sup, fs, *(int *)ap->a_data, bp);
-		    if (sup->su_nbytes > 0) {
-			    brelse(bp, 0);
-			    lfs_unset_inval_all(fs);
-			    return EBUSY;
-		    }
-		    sup->su_flags |= SEGUSE_INVAL;
-		    VOP_BWRITE(bp);
-		    return 0;
+		/* Mark a segment SEGUSE_INVAL */
+		LFS_SEGENTRY(sup, fs, *(int *)ap->a_data, bp);
+		if (sup->su_nbytes > 0) {
+			brelse(bp, 0);
+			lfs_unset_inval_all(fs);
+			return EBUSY;
+		}
+		sup->su_flags |= SEGUSE_INVAL;
+		VOP_BWRITE(bp);
+		return 0;
 
 	    case LFCNRESIZE:
-		    /* Resize the filesystem */
-		    return lfs_resize_fs(fs, *(int *)ap->a_data);
+		/* Resize the filesystem */
+		return lfs_resize_fs(fs, *(int *)ap->a_data);
 
 	    case LFCNWRAPSTOP:
 	    case LFCNWRAPSTOP_COMPAT:
-		    /*
-		     * Hold lfs_newseg at segment 0; if requested, sleep until
-		     * the filesystem wraps around.  To support external agents
-		     * (dump, fsck-based regression test) that need to look at
-		     * a snapshot of the filesystem, without necessarily
-		     * requiring that all fs activity stops.
-		     */
-		    if (lockstatus(&fs->lfs_stoplock))
-			    return EALREADY;
+		/*
+		 * Hold lfs_newseg at segment 0; if requested, sleep until
+		 * the filesystem wraps around.  To support external agents
+		 * (dump, fsck-based regression test) that need to look at
+		 * a snapshot of the filesystem, without necessarily
+		 * requiring that all fs activity stops.
+		 */
+		if (fs->lfs_stoplwp == curlwp)
+			return EALREADY;
 
-		    simple_lock(&fs->lfs_interlock);
-		    lockmgr(&fs->lfs_stoplock, LK_EXCLUSIVE, &fs->lfs_interlock);
-		    if (fs->lfs_nowrap == 0)
-			    log(LOG_NOTICE, "%s: disabled log wrap\n", fs->lfs_fsmnt);
-		    ++fs->lfs_nowrap;
-		    if (*(int *)ap->a_data == 1 ||
-			ap->a_command == LFCNWRAPSTOP_COMPAT) {
-			    log(LOG_NOTICE, "LFCNSTOPWRAP waiting for log wrap\n");
-			    error = ltsleep(&fs->lfs_nowrap, PCATCH | PUSER,
-					    "segwrap", 0, &fs->lfs_interlock);
-			    log(LOG_NOTICE, "LFCNSTOPWRAP done waiting\n");
-			    if (error) {
-				    lfs_wrapgo(fs, VTOI(ap->a_vp), 0);
-			    }
-		    }
-		    simple_unlock(&fs->lfs_interlock);
-		    return 0;
+		mutex_enter(&lfs_lock);
+		while (fs->lfs_stoplwp != NULL)
+			cv_wait(&fs->lfs_stopcv, &lfs_lock);
+		fs->lfs_stoplwp = curlwp;
+		if (fs->lfs_nowrap == 0)
+			log(LOG_NOTICE, "%s: disabled log wrap\n", fs->lfs_fsmnt);
+		++fs->lfs_nowrap;
+		if (*(int *)ap->a_data == 1 ||
+		    ap->a_command == LFCNWRAPSTOP_COMPAT) {
+			log(LOG_NOTICE, "LFCNSTOPWRAP waiting for log wrap\n");
+			error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER,
+				"segwrap", 0, &lfs_lock);
+			log(LOG_NOTICE, "LFCNSTOPWRAP done waiting\n");
+			if (error) {
+				lfs_wrapgo(fs, VTOI(ap->a_vp), 0);
+			}
+		}
+		mutex_exit(&lfs_lock);
+		return 0;
 
 	    case LFCNWRAPGO:
 	    case LFCNWRAPGO_COMPAT:
-		    /*
-		     * Having done its work, the agent wakes up the writer.
-		     * If the argument is 1, it sleeps until a new segment
-		     * is selected.
-		     */
-		    simple_lock(&fs->lfs_interlock);
-		    error = lfs_wrapgo(fs, VTOI(ap->a_vp),
-				       (ap->a_command == LFCNWRAPGO_COMPAT ? 1 :
-					*((int *)ap->a_data)));
-		    simple_unlock(&fs->lfs_interlock);
-		    return error;
+		/*
+		 * Having done its work, the agent wakes up the writer.
+		 * If the argument is 1, it sleeps until a new segment
+		 * is selected.
+		 */
+		mutex_enter(&lfs_lock);
+		error = lfs_wrapgo(fs, VTOI(ap->a_vp),
+				   (ap->a_command == LFCNWRAPGO_COMPAT ? 1 :
+				    *((int *)ap->a_data)));
+		mutex_exit(&lfs_lock);
+		return error;
 
 	    case LFCNWRAPPASS:
-		    if (lockstatus(&fs->lfs_stoplock) != LK_EXCLUSIVE)
-			    return EALREADY;
-		    if ((VTOI(ap->a_vp)->i_lfs_iflags & LFSI_WRAPWAIT))
-			    return EALREADY;
-		    simple_lock(&fs->lfs_interlock);
-		    if (fs->lfs_nowrap == 0) {
-			    simple_unlock(&fs->lfs_interlock);
-			    return EBUSY;
-		    }
-		    fs->lfs_wrappass = 1;
-		    wakeup(&fs->lfs_wrappass);
-		    /* Wait for the log to wrap, if asked */
-		    if (*(int *)ap->a_data) {
-			    lfs_vref(ap->a_vp);
-			    VTOI(ap->a_vp)->i_lfs_iflags |= LFSI_WRAPWAIT;
-			    log(LOG_NOTICE, "LFCNPASS waiting for log wrap\n");
-			    error = ltsleep(&fs->lfs_nowrap, PCATCH | PUSER,
-					    "segwrap", 0, &fs->lfs_interlock);
-			    log(LOG_NOTICE, "LFCNPASS done waiting\n");
-			    VTOI(ap->a_vp)->i_lfs_iflags &= ~LFSI_WRAPWAIT;
-			    lfs_vunref(ap->a_vp);
-		    }
-		    simple_unlock(&fs->lfs_interlock);
-		    return error;
+		if ((VTOI(ap->a_vp)->i_lfs_iflags & LFSI_WRAPWAIT))
+			return EALREADY;
+		mutex_enter(&lfs_lock);
+		if (fs->lfs_stoplwp != curlwp) {
+			mutex_exit(&lfs_lock);
+			return EALREADY;
+		}
+		if (fs->lfs_nowrap == 0) {
+			mutex_exit(&lfs_lock);
+			return EBUSY;
+		}
+		fs->lfs_wrappass = 1;
+		wakeup(&fs->lfs_wrappass);
+		/* Wait for the log to wrap, if asked */
+		if (*(int *)ap->a_data) {
+			mutex_enter(&ap->a_vp->v_interlock);
+			lfs_vref(ap->a_vp);
+			VTOI(ap->a_vp)->i_lfs_iflags |= LFSI_WRAPWAIT;
+			log(LOG_NOTICE, "LFCNPASS waiting for log wrap\n");
+			error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER,
+				"segwrap", 0, &lfs_lock);
+			log(LOG_NOTICE, "LFCNPASS done waiting\n");
+			VTOI(ap->a_vp)->i_lfs_iflags &= ~LFSI_WRAPWAIT;
+			lfs_vunref(ap->a_vp);
+		}
+		mutex_exit(&lfs_lock);
+		return error;
 
 	    case LFCNWRAPSTATUS:
-		    simple_lock(&fs->lfs_interlock);
-		    *(int *)ap->a_data = fs->lfs_wrapstatus;
-		    simple_unlock(&fs->lfs_interlock);
-		    return 0;
+		mutex_enter(&lfs_lock);
+		*(int *)ap->a_data = fs->lfs_wrapstatus;
+		mutex_exit(&lfs_lock);
+		return 0;
 
 	    default:
-		    return ufs_fcntl(v);
+		return ufs_fcntl(v);
 	}
 	return 0;
 }
@@ -1688,7 +1702,9 @@ lfs_getpages(void *v)
 		return EPERM;
 	}
 	if ((ap->a_access_type & VM_PROT_WRITE) != 0) {
+		mutex_enter(&lfs_lock);
 		LFS_SET_UINO(VTOI(ap->a_vp), IN_MODIFIED);
+		mutex_exit(&lfs_lock);
 	}
 
 	/*
@@ -1728,7 +1744,7 @@ wait_for_page(struct vnode *vp, struct vm_page *pg, const char *label)
 
 	pg->flags |= PG_WANTED;
 	UVM_UNLOCK_AND_WAIT(pg, &vp->v_interlock, 0, "lfsput", 0);
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 }
 
 /*
@@ -1756,7 +1772,7 @@ write_and_wait(struct lfs *fs, struct vnode *vp, struct vm_page *pg,
 		return;
 
 	while (pg->flags & PG_BUSY) {
-		simple_unlock(&vp->v_interlock);
+		mutex_exit(&vp->v_interlock);
 		if (sp->cbpp - sp->bpp > 1) {
 			/* Write gathered pages */
 			lfs_updatemeta(sp);
@@ -1771,7 +1787,7 @@ write_and_wait(struct lfs *fs, struct vnode *vp, struct vm_page *pg,
 					  ip->i_gen);
 		}
 		++count;
-		simple_lock(&vp->v_interlock);
+		mutex_enter(&vp->v_interlock);
 		wait_for_page(vp, pg, label);
 	}
 	if (label != NULL && count > 1)
@@ -1916,9 +1932,9 @@ check_dirty(struct lfs *fs, struct vnode *vp,
 					 * Wire the page so that
 					 * pdaemon doesn't see it again.
 					 */
-					uvm_lock_pageq();
+					mutex_enter(&uvm_pageqlock);
 					uvm_pagewire(pg);
-					uvm_unlock_pageq();
+					mutex_exit(&uvm_pageqlock);
 
 					/* Suspended write flag */
 					pg->flags |= PG_DELWRI;
@@ -2003,7 +2019,6 @@ lfs_putpages(void *v)
 	struct segment *sp;
 	off_t origoffset, startoffset, endoffset, origendoffset, blkeof;
 	off_t off, max_endoffset;
-	int s;
 	bool seglocked, sync, pagedaemon;
 	struct vm_page *pg, *busypg;
 	UVMHIST_FUNC("lfs_putpages"); UVMHIST_CALLED(ubchist);
@@ -2019,7 +2034,7 @@ lfs_putpages(void *v)
 
 	/* Putpages does nothing for metadata. */
 	if (vp == fs->lfs_ivnode || vp->v_type != VREG) {
-		simple_unlock(&vp->v_interlock);
+		mutex_exit(&vp->v_interlock);
 		return 0;
 	}
 
@@ -2027,23 +2042,21 @@ lfs_putpages(void *v)
 	 * If there are no pages, don't do anything.
 	 */
 	if (vp->v_uobj.uo_npages == 0) {
-		s = splbio();
 		if (TAILQ_EMPTY(&vp->v_uobj.memq) &&
 		    (vp->v_iflag & VI_ONWORKLST) &&
 		    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
 			vp->v_iflag &= ~VI_WRMAPDIRTY;
 			vn_syncer_remove_from_worklist(vp);
 		}
-		splx(s);
-		simple_unlock(&vp->v_interlock);
+		mutex_exit(&vp->v_interlock);
 		
 		/* Remove us from paging queue, if we were on it */
-		simple_lock(&fs->lfs_interlock);
+		mutex_enter(&lfs_lock);
 		if (ip->i_flags & IN_PAGING) {
 			ip->i_flags &= ~IN_PAGING;
 			TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain);
 		}
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 		return 0;
 	}
 
@@ -2065,15 +2078,15 @@ lfs_putpages(void *v)
 				pg->flags |= PG_WANTED;
 				UVM_UNLOCK_AND_WAIT(pg, &vp->v_interlock, 0,
 						    "lfsput2", 0);
-				simple_lock(&vp->v_interlock);
+				mutex_enter(&vp->v_interlock);
 			}
-			uvm_lock_pageq();
+			mutex_enter(&uvm_pageqlock);
 			uvm_pageactivate(pg);
-			uvm_unlock_pageq();
+			mutex_exit(&uvm_pageqlock);
 		}
 		ap->a_offlo = blkeof;
 		if (ap->a_offhi > 0 && ap->a_offhi <= ap->a_offlo) {
-			simple_unlock(&vp->v_interlock);
+			mutex_exit(&vp->v_interlock);
 			return 0;
 		}
 	}
@@ -2099,7 +2112,7 @@ lfs_putpages(void *v)
 	KASSERT(startoffset > 0 || endoffset >= startoffset);
 	if (startoffset == endoffset) {
 		/* Nothing to do, why were we called? */
-		simple_unlock(&vp->v_interlock);
+		mutex_exit(&vp->v_interlock);
 		DLOG((DLOG_PAGE, "lfs_putpages: startoffset = endoffset = %"
 		      PRId64 "\n", startoffset));
 		return 0;
@@ -2133,7 +2146,7 @@ lfs_putpages(void *v)
 				ap->a_flags, 1, NULL);
 		if (r < 0) {
 			/* Pages are busy with another process */
-			simple_unlock(&vp->v_interlock);
+			mutex_exit(&vp->v_interlock);
 			return EDEADLK;
 		}
 		if (r > 0) /* Some pages are dirty */
@@ -2153,7 +2166,7 @@ lfs_putpages(void *v)
 			return r;
 
 		/* One of the pages was busy.  Start over. */
-		simple_lock(&vp->v_interlock);
+		mutex_enter(&vp->v_interlock);
 		wait_for_page(vp, busypg, "dirtyclean");
 #ifdef DEBUG
 		++debug_n_dirtyclean;
@@ -2174,16 +2187,14 @@ lfs_putpages(void *v)
 	 * notice the pager inode queue and act on that.
 	 */
 	if (pagedaemon) {
-		simple_lock(&fs->lfs_interlock);
+		mutex_enter(&lfs_lock);
 		if (!(ip->i_flags & IN_PAGING)) {
 			ip->i_flags |= IN_PAGING;
 			TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip, i_lfs_pchain);
 		}
-		simple_lock(&lfs_subsys_lock);
 		wakeup(&lfs_writer_daemon);
-		simple_unlock(&lfs_subsys_lock);
-		simple_unlock(&fs->lfs_interlock);
-		simple_unlock(&vp->v_interlock);
+		mutex_exit(&lfs_lock);
+		mutex_exit(&vp->v_interlock);
 		preempt();
 		return EWOULDBLOCK;
 	}
@@ -2200,19 +2211,19 @@ lfs_putpages(void *v)
 
 		DLOG((DLOG_PAGE, "lfs_putpages: flushing VU_DIROP\n"));
 		locked = (VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
-		simple_unlock(&vp->v_interlock);
+		mutex_exit(&vp->v_interlock);
 		lfs_writer_enter(fs, "ppdirop");
 		if (locked)
 			VOP_UNLOCK(vp, 0); /* XXX why? */
 
-		simple_lock(&fs->lfs_interlock);
+		mutex_enter(&lfs_lock);
 		lfs_flush_fs(fs, sync ? SEGM_SYNC : 0);
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 
-		simple_lock(&vp->v_interlock);
+		mutex_enter(&vp->v_interlock);
 		if (locked) {
 			VOP_LOCK(vp, LK_EXCLUSIVE | LK_INTERLOCK);
-			simple_lock(&vp->v_interlock);
+			mutex_enter(&vp->v_interlock);
 		}
 		lfs_writer_leave(fs);
 
@@ -2242,11 +2253,11 @@ lfs_putpages(void *v)
 	 */
 	seglocked = (ap->a_flags & PGO_LOCKED) != 0;
 	if (!seglocked) {
-		simple_unlock(&vp->v_interlock);
+		mutex_exit(&vp->v_interlock);
 		error = lfs_seglock(fs, SEGM_PROT | (sync ? SEGM_SYNC : 0));
 		if (error != 0)
 			return error;
-		simple_lock(&vp->v_interlock);
+		mutex_enter(&vp->v_interlock);
 		lfs_acquire_finfo(fs, ip->i_number, ip->i_gen);
 	}
 	sp = fs->lfs_sp;
@@ -2273,9 +2284,9 @@ lfs_putpages(void *v)
 		busypg = NULL;
 		if (check_dirty(fs, vp, startoffset, endoffset, blkeof,
 				ap->a_flags, 0, &busypg) < 0) {
-			simple_unlock(&vp->v_interlock);
+			mutex_exit(&vp->v_interlock);
 
-			simple_lock(&vp->v_interlock);
+			mutex_enter(&vp->v_interlock);
 			write_and_wait(fs, vp, busypg, seglocked, NULL);
 			if (!seglocked) {
 				lfs_release_finfo(fs);
@@ -2295,7 +2306,7 @@ lfs_putpages(void *v)
 			      ip->i_number, fs->lfs_offset,
 			      dtosn(fs, fs->lfs_offset)));
 
-			simple_lock(&vp->v_interlock);
+			mutex_enter(&vp->v_interlock);
 			write_and_wait(fs, vp, busypg, seglocked, "again");
 		}
 #ifdef DEBUG
@@ -2343,12 +2354,12 @@ lfs_putpages(void *v)
 	 * Remove us from paging queue if we wrote all our pages.
 	 */
 	if (origendoffset == 0 || ap->a_flags & PGO_ALLPAGES) {
-		simple_lock(&fs->lfs_interlock);
+		mutex_enter(&lfs_lock);
 		if (ip->i_flags & IN_PAGING) {
 			ip->i_flags &= ~IN_PAGING;
 			TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain);
 		}
-		simple_unlock(&fs->lfs_interlock);
+		mutex_exit(&lfs_lock);
 	}
 
 	/*
@@ -2365,17 +2376,13 @@ lfs_putpages(void *v)
 	 * aiodoned might not have got around to our buffers yet.
 	 */
 	if (sync) {
-		s = splbio();
-		simple_lock(&global_v_numoutput_slock);
+		mutex_enter(&vp->v_interlock);
 		while (vp->v_numoutput > 0) {
 			DLOG((DLOG_PAGE, "lfs_putpages: ino %d sleeping on"
 			      " num %d\n", ip->i_number, vp->v_numoutput));
-			vp->v_iflag |= VI_BWAIT;
-			ltsleep(&vp->v_numoutput, PRIBIO + 1, "lfs_vn", 0,
-				&global_v_numoutput_slock);
+			cv_wait(&vp->v_cv, &vp->v_interlock);
 		}
-		simple_unlock(&global_v_numoutput_slock);
-		splx(s);
+		mutex_exit(&vp->v_interlock);
 	}
 	return error;
 }
diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h
index 79a050ead6aa..4b79ed6233ac 100644
--- a/sys/ufs/ufs/inode.h
+++ b/sys/ufs/ufs/inode.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: inode.h,v 1.48 2007/04/09 12:21:24 pooka Exp $	*/
+/*	$NetBSD: inode.h,v 1.49 2008/01/02 11:49:13 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1989, 1993
@@ -128,6 +128,7 @@ struct inode {
 	int32_t   i_gen;	/* Generation number. */
 	u_int32_t i_uid;	/* File owner. */
 	u_int32_t i_gid;	/* File group. */
+	u_int16_t i_omode;	/* Old mode, for ufs_reclaim */
 
 	struct dirhash *i_dirhash;	/* Hashing for large directories */
 
diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c
index 47aa03b04863..2d22ad2ab16a 100644
--- a/sys/ufs/ufs/ufs_bmap.c
+++ b/sys/ufs/ufs/ufs_bmap.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ufs_bmap.c,v 1.46 2007/10/08 18:01:31 ad Exp $	*/
+/*	$NetBSD: ufs_bmap.c,v 1.47 2008/01/02 11:49:13 ad Exp $	*/
 
 /*
  * Copyright (c) 1989, 1991, 1993
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ufs_bmap.c,v 1.46 2007/10/08 18:01:31 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ufs_bmap.c,v 1.47 2008/01/02 11:49:13 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -119,7 +119,7 @@ ufs_bmaparray(struct vnode *vp, daddr_t bn, daddr_t *bnp, struct indir *ap,
     int *nump, int *runp, ufs_issequential_callback_t is_sequential)
 {
 	struct inode *ip;
-	struct buf *bp;
+	struct buf *bp, *cbp;
 	struct ufsmount *ump;
 	struct mount *mp;
 	struct indir a[NIADDR + 1], *xap;
@@ -219,8 +219,16 @@ ufs_bmaparray(struct vnode *vp, daddr_t bn, daddr_t *bnp, struct indir *ap,
 		 */
 
 		metalbn = xap->in_lbn;
-		if ((daddr == 0 && !incore(vp, metalbn)) || metalbn == bn)
+		if (metalbn == bn)
 			break;
+		if (daddr == 0) {
+			mutex_enter(&bufcache_lock);
+			cbp = incore(vp, metalbn);
+			mutex_exit(&bufcache_lock);
+			if (cbp == NULL)
+				break;
+		}
+
 		/*
 		 * If we get here, we've either got the block in the cache
 		 * or we have a disk address for it, go fetch it.
@@ -240,7 +248,7 @@ ufs_bmaparray(struct vnode *vp, daddr_t bn, daddr_t *bnp, struct indir *ap,
 
 			return (ENOMEM);
 		}
-		if (bp->b_flags & (B_DONE | B_DELWRI)) {
+		if (bp->b_oflags & (BO_DONE | BO_DELWRI)) {
 			trace(TR_BREADHIT, pack(vp, size), metalbn);
 		}
 #ifdef DIAGNOSTIC
diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h
index e59f13cc60bb..c64b52d57e16 100644
--- a/sys/ufs/ufs/ufs_extern.h
+++ b/sys/ufs/ufs/ufs_extern.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: ufs_extern.h,v 1.55 2007/11/26 19:02:34 pooka Exp $	*/
+/*	$NetBSD: ufs_extern.h,v 1.56 2008/01/02 11:49:13 ad Exp $	*/
 
 /*-
  * Copyright (c) 1991, 1993, 1994
@@ -34,6 +34,8 @@
 #ifndef _UFS_UFS_EXTERN_H_
 #define _UFS_UFS_EXTERN_H_
 
+#include <sys/mutex.h>
+
 struct buf;
 struct componentname;
 struct direct;
@@ -186,4 +188,6 @@ void  softdep_releasefile(struct inode *);
 
 __END_DECLS
 
+extern kmutex_t ufs_ihash_lock;
+
 #endif /* !_UFS_UFS_EXTERN_H_ */
diff --git a/sys/ufs/ufs/ufs_ihash.c b/sys/ufs/ufs/ufs_ihash.c
index b9bd3cd897c5..c8928e2513bb 100644
--- a/sys/ufs/ufs/ufs_ihash.c
+++ b/sys/ufs/ufs/ufs_ihash.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ufs_ihash.c,v 1.23 2007/05/28 23:42:56 ad Exp $	*/
+/*	$NetBSD: ufs_ihash.c,v 1.24 2008/01/02 11:49:13 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ufs_ihash.c,v 1.23 2007/05/28 23:42:56 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ufs_ihash.c,v 1.24 2008/01/02 11:49:13 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -116,13 +116,13 @@ ufs_ihashlookup(dev_t dev, ino_t inum)
 	struct inode *ip;
 	struct ihashhead *ipp;
 
-	mutex_enter(&ufs_ihash_lock);
+	KASSERT(mutex_owned(&ufs_ihash_lock));
+
 	ipp = &ihashtbl[INOHASH(dev, inum)];
 	LIST_FOREACH(ip, ipp, i_hash) {
 		if (inum == ip->i_number && dev == ip->i_dev)
 			break;
 	}
-	mutex_exit(&ufs_ihash_lock);
 	if (ip)
 		return (ITOV(ip));
 	return (NULLVP);
@@ -148,7 +148,7 @@ ufs_ihashget(dev_t dev, ino_t inum, int flags)
 			if (flags == 0) {
 				mutex_exit(&ufs_ihash_lock);
 			} else {
-				simple_lock(&vp->v_interlock);
+				mutex_enter(&vp->v_interlock);
 				mutex_exit(&ufs_ihash_lock);
 				if (vget(vp, flags | LK_INTERLOCK))
 					goto loop;
diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c
index 36cf59066bec..e43c4ac864ad 100644
--- a/sys/ufs/ufs/ufs_inode.c
+++ b/sys/ufs/ufs/ufs_inode.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ufs_inode.c,v 1.71 2007/12/08 19:29:56 pooka Exp $	*/
+/*	$NetBSD: ufs_inode.c,v 1.72 2008/01/02 11:49:14 ad Exp $	*/
 
 /*
  * Copyright (c) 1991, 1993
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.71 2007/12/08 19:29:56 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.72 2008/01/02 11:49:14 ad Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_ffs.h"
@@ -77,16 +77,15 @@ ufs_inactive(void *v)
 {
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
-		struct lwp *a_l;
+		struct bool *a_recycle;
 	} */ *ap = v;
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	struct mount *transmp;
-	struct lwp *l = curlwp;
 	mode_t mode;
 	int error = 0;
 
-	if (prtactive && vp->v_usecount != 0)
+	if (prtactive && vp->v_usecount > 1)
 		vprint("ufs_inactive: pushing active", vp);
 
 	transmp = vp->v_mount;
@@ -118,28 +117,24 @@ ufs_inactive(void *v)
 		DIP_ASSIGN(ip, rdev, 0);
 		mode = ip->i_mode;
 		ip->i_mode = 0;
+		ip->i_omode = mode;
 		DIP_ASSIGN(ip, mode, 0);
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
-		simple_lock(&vp->v_interlock);
-		vp->v_iflag |= VI_FREEING;
-		simple_unlock(&vp->v_interlock);
 		if (DOINGSOFTDEP(vp))
 			softdep_change_linkcnt(ip);
-		UFS_VFREE(vp, ip->i_number, mode);
-	}
-
-	if (ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) {
+		/*
+		 * Defer final inode free and update to ufs_reclaim().
+		 */
+	} else if (ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) {
 		UFS_UPDATE(vp, NULL, NULL, 0);
 	}
 out:
-	VOP_UNLOCK(vp, 0);
 	/*
 	 * If we are done with the inode, reclaim it
 	 * so that it can be reused immediately.
 	 */
-
-	if (ip->i_mode == 0)
-		vrecycle(vp, NULL, l);
+	*ap->a_recycle = (ip->i_mode == 0);
+	VOP_UNLOCK(vp, 0);
 	fstrans_done(transmp);
 	return (error);
 }
@@ -152,15 +147,12 @@ ufs_reclaim(struct vnode *vp)
 {
 	struct inode *ip = VTOI(vp);
 
-	if (prtactive && vp->v_usecount != 0)
+	if (prtactive && vp->v_usecount > 1)
 		vprint("ufs_reclaim: pushing active", vp);
 
 	UFS_UPDATE(vp, NULL, NULL, UPDATE_CLOSE);
-
-	/*
-	 * Remove the inode from its hash chain.
-	 */
 	ufs_ihashrem(ip);
+
 	/*
 	 * Purge old data structures associated with the inode.
 	 */
@@ -223,23 +215,23 @@ ufs_balloc_range(struct vnode *vp, off_t off, off_t len, kauth_cred_t cred,
 	pgssize = npages * sizeof(struct vm_page *);
 	pgs = kmem_zalloc(pgssize, KM_SLEEP);
 
-	simple_lock(&uobj->vmobjlock);
+	mutex_enter(&uobj->vmobjlock);
 	error = VOP_GETPAGES(vp, pagestart, pgs, &npages, 0,
 	    VM_PROT_WRITE, 0,
 	    PGO_SYNCIO|PGO_PASTEOF|PGO_NOBLOCKALLOC|PGO_NOTIMESTAMP);
 	if (error) {
 		goto out;
 	}
-	simple_lock(&uobj->vmobjlock);
-	uvm_lock_pageq();
+	mutex_enter(&uobj->vmobjlock);
+	mutex_enter(&uvm_pageqlock);
 	for (i = 0; i < npages; i++) {
 		UVMHIST_LOG(ubchist, "got pgs[%d] %p", i, pgs[i],0,0);
 		KASSERT((pgs[i]->flags & PG_RELEASED) == 0);
 		pgs[i]->flags &= ~PG_CLEAN;
 		uvm_pageactivate(pgs[i]);
 	}
-	uvm_unlock_pageq();
-	simple_unlock(&uobj->vmobjlock);
+	mutex_exit(&uvm_pageqlock);
+	mutex_exit(&uobj->vmobjlock);
 
 	/*
 	 * adjust off to be block-aligned.
@@ -263,7 +255,7 @@ ufs_balloc_range(struct vnode *vp, off_t off, off_t len, kauth_cred_t cred,
 	 */
 
 	GOP_SIZE(vp, off + len, &eob, 0);
-	simple_lock(&uobj->vmobjlock);
+	mutex_enter(&uobj->vmobjlock);
 	for (i = 0; i < npages; i++) {
 		if (error) {
 			pgs[i]->flags |= PG_RELEASED;
@@ -273,15 +265,15 @@ ufs_balloc_range(struct vnode *vp, off_t off, off_t len, kauth_cred_t cred,
 		}
 	}
 	if (error) {
-		uvm_lock_pageq();
+		mutex_enter(&uvm_pageqlock);
 		uvm_page_unbusy(pgs, npages);
-		uvm_unlock_pageq();
+		mutex_exit(&uvm_pageqlock);
 	} else {
 		uvm_page_unbusy(pgs, npages);
 	}
-	simple_unlock(&uobj->vmobjlock);
+	mutex_exit(&uobj->vmobjlock);
 
  out:
-	kmem_free(pgs, pgssize);
+ 	kmem_free(pgs, pgssize);
 	return error;
 }
diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c
index 2eb031aa34a0..5c1cdc049f5e 100644
--- a/sys/ufs/ufs/ufs_quota.c
+++ b/sys/ufs/ufs/ufs_quota.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ufs_quota.c,v 1.52 2007/12/08 19:29:57 pooka Exp $	*/
+/*	$NetBSD: ufs_quota.c,v 1.53 2008/01/02 11:49:14 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1990, 1993, 1995
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ufs_quota.c,v 1.52 2007/12/08 19:29:57 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ufs_quota.c,v 1.53 2008/01/02 11:49:14 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -412,8 +412,7 @@ int
 quotaon(struct lwp *l, struct mount *mp, int type, void *fname)
 {
 	struct ufsmount *ump = VFSTOUFS(mp);
-	struct vnode *vp, **vpp;
-	struct vnode *nextvp;
+	struct vnode *vp, **vpp, *mvp;
 	struct dquot *dq;
 	int error;
 	struct nameidata nd;
@@ -453,29 +452,43 @@ quotaon(struct lwp *l, struct mount *mp, int type, void *fname)
 			ump->um_itime[type] = dq->dq_itime;
 		dqrele(NULLVP, dq);
 	}
+	/* Allocate a marker vnode. */
+	if ((mvp = valloc(mp)) == NULL) {
+		error = ENOMEM;
+		goto out;
+	}
 	/*
 	 * Search vnodes associated with this mount point,
 	 * adding references to quota file being opened.
 	 * NB: only need to add dquot's for inodes being modified.
 	 */
+	mutex_enter(&mntvnode_lock);
 again:
-	TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
-		nextvp = TAILQ_NEXT(vp, v_mntvnodes);
-		if (vp->v_mount != mp)
-			goto again;
-		if (vp->v_type == VNON ||vp->v_writecount == 0)
+	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
+		vmark(mvp, vp);
+		mutex_enter(&vp->v_interlock);
+		if (vp->v_mount != mp || vismarker(vp) ||
+		    vp->v_type == VNON || vp->v_writecount == 0) {
+			mutex_exit(&vp->v_interlock);
 			continue;
-		if (vget(vp, LK_EXCLUSIVE))
+		}
+		mutex_exit(&mntvnode_lock);
+		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
+			mutex_enter(&mntvnode_lock);
+			(void)vunmark(mvp);
 			goto again;
+		}
 		if ((error = getinoquota(VTOI(vp))) != 0) {
 			vput(vp);
+			mutex_enter(&mntvnode_lock);
+			(void)vunmark(mvp);
 			break;
 		}
 		vput(vp);
-		/* if the list changed, start again */
-		if (TAILQ_NEXT(vp, v_mntvnodes) != nextvp)
-			goto again;
 	}
+	mutex_exit(&mntvnode_lock);
+	vfree(mvp);
+ out:
 	mutex_enter(&dqlock);
 	ump->um_qflags[type] &= ~QTF_OPENING;
 	cv_broadcast(&dqcv);
@@ -492,18 +505,23 @@ int
 quotaoff(struct lwp *l, struct mount *mp, int type)
 {
 	struct vnode *vp;
-	struct vnode *qvp, *nextvp;
+	struct vnode *qvp, *mvp;
 	struct ufsmount *ump = VFSTOUFS(mp);
 	struct dquot *dq;
 	struct inode *ip;
 	kauth_cred_t cred;
 	int i, error;
 
+	/* Allocate a marker vnode. */
+	if ((mvp = valloc(mp)) == NULL)
+		return ENOMEM;
+
 	mutex_enter(&dqlock);
 	while ((ump->um_qflags[type] & (QTF_CLOSING | QTF_OPENING)) != 0)
 		cv_wait(&dqcv, &dqlock);
 	if ((qvp = ump->um_quotas[type]) == NULLVP) {
 		mutex_exit(&dqlock);
+		vfree(mvp);
 		return (0);
 	}
 	ump->um_qflags[type] |= QTF_CLOSING;
@@ -512,24 +530,29 @@ quotaoff(struct lwp *l, struct mount *mp, int type)
 	 * Search vnodes associated with this mount point,
 	 * deleting any references to quota file being closed.
 	 */
+	mutex_enter(&mntvnode_lock);
 again:
-	TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
-		nextvp = TAILQ_NEXT(vp, v_mntvnodes);
-		if (vp->v_mount != mp)
-			goto again;
-		if (vp->v_type == VNON)
+	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
+		vmark(mvp, vp);
+		mutex_enter(&vp->v_interlock);
+		if (vp->v_mount != mp || vismarker(vp) || vp->v_type == VNON) {
+			mutex_exit(&vp->v_interlock);
 			continue;
-		if (vget(vp, LK_EXCLUSIVE))
+		}
+		mutex_exit(&mntvnode_lock);
+		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
+			mutex_enter(&mntvnode_lock);
+			(void)vunmark(mvp);
 			goto again;
+		}
 		ip = VTOI(vp);
 		dq = ip->i_dquot[type];
 		ip->i_dquot[type] = NODQUOT;
 		dqrele(vp, dq);
 		vput(vp);
-		/* if the list changed, start again */
-		if (TAILQ_NEXT(vp, v_mntvnodes) != nextvp)
-			goto again;
+		mutex_enter(&mntvnode_lock);
 	}
+	mutex_exit(&mntvnode_lock);
 #ifdef DIAGNOSTIC
 	dqflush(qvp);
 #endif
@@ -669,7 +692,7 @@ int
 qsync(struct mount *mp)
 {
 	struct ufsmount *ump = VFSTOUFS(mp);
-	struct vnode *vp, *nextvp;
+	struct vnode *vp, *mvp;
 	struct dquot *dq;
 	int i, error;
 
@@ -682,25 +705,32 @@ qsync(struct mount *mp)
 			break;
 	if (i == MAXQUOTAS)
 		return (0);
+
+	/* Allocate a marker vnode. */
+	if ((mvp = valloc(mp)) == NULL)
+		return (ENOMEM);
+
 	/*
 	 * Search vnodes associated with this mount point,
 	 * synchronizing any modified dquot structures.
 	 */
-	simple_lock(&mntvnode_slock);
-again:
-	TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
-		nextvp = TAILQ_NEXT(vp, v_mntvnodes);
-		if (vp->v_mount != mp)
-			goto again;
-		if (vp->v_type == VNON)
+	mutex_enter(&mntvnode_lock);
+ again:
+	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
+		vmark(mvp, vp);
+		mutex_enter(&vp->v_interlock);
+		if (vp->v_mount != mp || vismarker(vp) || vp->v_type == VNON) {
+			mutex_exit(&vp->v_interlock);
 			continue;
-		simple_lock(&vp->v_interlock);
-		simple_unlock(&mntvnode_slock);
+		}
+		mutex_exit(&mntvnode_lock);
 		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK);
 		if (error) {
-			simple_lock(&mntvnode_slock);
-			if (error == ENOENT)
+			mutex_enter(&mntvnode_lock);
+			if (error == ENOENT) {
+				(void)vunmark(mvp);
 				goto again;
+			}
 			continue;
 		}
 		for (i = 0; i < MAXQUOTAS; i++) {
@@ -713,12 +743,10 @@ again:
 			mutex_exit(&dq->dq_interlock);
 		}
 		vput(vp);
-		simple_lock(&mntvnode_slock);
-		/* if the list changed, start again */
-		if (TAILQ_NEXT(vp, v_mntvnodes) != nextvp)
-			goto again;
+		mutex_enter(&mntvnode_lock);
 	}
-	simple_unlock(&mntvnode_slock);
+	mutex_exit(&mntvnode_lock);
+	vfree(mvp);
 	return (0);
 }
 
diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c
index 3a7d6de3d3c4..280b22de641d 100644
--- a/sys/ufs/ufs/ufs_readwrite.c
+++ b/sys/ufs/ufs/ufs_readwrite.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ufs_readwrite.c,v 1.85 2007/12/08 19:29:57 pooka Exp $	*/
+/*	$NetBSD: ufs_readwrite.c,v 1.86 2008/01/02 11:49:14 ad Exp $	*/
 
 /*-
  * Copyright (c) 1993
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(1, "$NetBSD: ufs_readwrite.c,v 1.85 2007/12/08 19:29:57 pooka Exp $");
+__KERNEL_RCSID(1, "$NetBSD: ufs_readwrite.c,v 1.86 2008/01/02 11:49:14 ad Exp $");
 
 #ifdef LFS_READWRITE
 #define	FS			struct lfs
@@ -311,7 +311,7 @@ WRITE(void *v)
 		if (error)
 			goto out;
 		if (flags & B_SYNC) {
-			simple_lock(&vp->v_interlock);
+			mutex_enter(&vp->v_interlock);
 			VOP_PUTPAGES(vp, trunc_page(osize & fs->fs_bmask),
 			    round_page(eob), PGO_CLEANIT | PGO_SYNCIO);
 		}
@@ -406,7 +406,7 @@ WRITE(void *v)
 
 #ifndef LFS_READWRITE
 		if (!async && oldoff >> 16 != uio->uio_offset >> 16) {
-			simple_lock(&vp->v_interlock);
+			mutex_enter(&vp->v_interlock);
 			error = VOP_PUTPAGES(vp, (oldoff >> 16) << 16,
 			    (uio->uio_offset >> 16) << 16, PGO_CLEANIT);
 			if (error)
@@ -415,7 +415,7 @@ WRITE(void *v)
 #endif
 	}
 	if (error == 0 && ioflag & IO_SYNC) {
-		simple_lock(&vp->v_interlock);
+		mutex_enter(&vp->v_interlock);
 		error = VOP_PUTPAGES(vp, trunc_page(origoff & fs->fs_bmask),
 		    round_page(blkroundup(fs, uio->uio_offset)),
 		    PGO_CLEANIT | PGO_SYNCIO);
@@ -423,7 +423,7 @@ WRITE(void *v)
 	goto out;
 
  bcache:
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	VOP_PUTPAGES(vp, trunc_page(origoff), round_page(origoff + resid),
 	    PGO_CLEANIT | PGO_FREE | PGO_SYNCIO);
 	while (uio->uio_resid > 0) {
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 102443c79a92..973280db494b 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: ufs_vnops.c,v 1.160 2007/12/08 19:29:57 pooka Exp $	*/
+/*	$NetBSD: ufs_vnops.c,v 1.161 2008/01/02 11:49:14 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1989, 1993, 1995
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.160 2007/12/08 19:29:57 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.161 2008/01/02 11:49:14 ad Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_ffs.h"
@@ -166,7 +166,7 @@ ufs_mknod(void *v)
 	 * checked to see if it is an alias of an existing entry in
 	 * the inode cache.
 	 */
-	vput(*vpp);
+	VOP_UNLOCK(*vpp, 0);
 	(*vpp)->v_type = VNON;
 	vgone(*vpp);
 	error = VFS_VGET(mp, ino, vpp);
@@ -222,10 +222,10 @@ ufs_close(void *v)
 
 	vp = ap->a_vp;
 	ip = VTOI(vp);
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	if (vp->v_usecount > 1)
 		UFS_ITIMES(vp, NULL, NULL, NULL);
-	simple_unlock(&vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
 	return (0);
 }
 
@@ -1888,10 +1888,10 @@ ufsspec_close(void *v)
 
 	vp = ap->a_vp;
 	ip = VTOI(vp);
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	if (vp->v_usecount > 1)
 		UFS_ITIMES(vp, NULL, NULL, NULL);
-	simple_unlock(&vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
 	return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap));
 }
 
@@ -1953,10 +1953,10 @@ ufsfifo_close(void *v)
 
 	vp = ap->a_vp;
 	ip = VTOI(vp);
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	if (ap->a_vp->v_usecount > 1)
 		UFS_ITIMES(vp, NULL, NULL, NULL);
-	simple_unlock(&vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
 	return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap));
 }
 
@@ -2059,7 +2059,6 @@ ufs_vinit(struct mount *mntp, int (**specops)(void *), int (**fifoops)(void *),
 			vp->v_vflag &= ~VV_LOCKSWORK;
 			VOP_UNLOCK(vp, 0);
 			vp->v_op = spec_vnodeop_p;
-			vrele(vp);
 			vgone(vp);
 			lockmgr(&nvp->v_lock, LK_EXCLUSIVE, &nvp->v_interlock);
 			/*
diff --git a/sys/uvm/uvm.h b/sys/uvm/uvm.h
index 801b5abc7e9e..8df5932e28ca 100644
--- a/sys/uvm/uvm.h
+++ b/sys/uvm/uvm.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm.h,v 1.52 2007/07/21 19:21:53 ad Exp $	*/
+/*	$NetBSD: uvm.h,v 1.53 2008/01/02 11:49:15 ad Exp $	*/
 
 /*
  *
@@ -84,7 +84,6 @@ struct uvm {
 		/* vm_page queues */
 	struct pgfreelist page_free[VM_NFREELIST]; /* unallocated pages */
 	int page_free_nextcolor;	/* next color to allocate from */
-	struct simplelock pageqlock;	/* lock for active/inactive page q */
 	bool page_init_done;		/* TRUE if uvm_page_init() finished */
 	bool page_idle_zero;		/* TRUE if we should try to zero
 					   pages in the idle loop */
@@ -120,8 +119,8 @@ extern struct uvm_object *uvm_kernel_object;
  * locks (made globals for lockstat).
  */
 
+extern kmutex_t uvm_pageqlock;		/* lock for active/inactive page q */
 extern kmutex_t uvm_fpageqlock;		/* lock for free page q */
-extern kmutex_t uvm_pagedaemon_lock;
 extern kmutex_t uvm_kentry_lock;
 extern kmutex_t uvm_swap_data_lock;
 extern kmutex_t uvm_scheduler_mutex;
@@ -171,7 +170,7 @@ extern struct evcnt uvm_ra_miss;
 
 #define	UVM_UNLOCK_AND_WAIT(event, slock, intr, msg, timo)		\
 do {									\
-	(void) ltsleep(event, PVM | PNORELOCK | (intr ? PCATCH : 0),	\
+	(void) mtsleep(event, PVM | PNORELOCK | (intr ? PCATCH : 0),	\
 	    msg, timo, slock);						\
 } while (/*CONSTCOND*/ 0)
 
diff --git a/sys/uvm/uvm_amap.c b/sys/uvm/uvm_amap.c
index a64d24d49d8f..b539d33b58a0 100644
--- a/sys/uvm/uvm_amap.c
+++ b/sys/uvm/uvm_amap.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_amap.c,v 1.83 2007/12/08 15:46:31 ad Exp $	*/
+/*	$NetBSD: uvm_amap.c,v 1.84 2008/01/02 11:49:15 ad Exp $	*/
 
 /*
  *
@@ -42,7 +42,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.83 2007/12/08 15:46:31 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.84 2008/01/02 11:49:15 ad Exp $");
 
 #include "opt_uvmhist.h"
 
@@ -704,11 +704,11 @@ amap_wipeout(struct vm_amap *amap)
 		if (anon == NULL || anon->an_ref == 0)
 			panic("amap_wipeout: corrupt amap");
 
-		simple_lock(&anon->an_lock);
+		mutex_enter(&anon->an_lock);
 		UVMHIST_LOG(maphist,"  processing anon 0x%x, ref=%d", anon,
 		    anon->an_ref, 0, 0);
 		refs = --anon->an_ref;
-		simple_unlock(&anon->an_lock);
+		mutex_exit(&anon->an_lock);
 		if (refs == 0) {
 
 			/*
@@ -860,9 +860,9 @@ amap_copy(struct vm_map *map, struct vm_map_entry *entry, int flags,
 		    srcamap->am_anon[entry->aref.ar_pageoff + lcv];
 		if (amap->am_anon[lcv] == NULL)
 			continue;
-		simple_lock(&amap->am_anon[lcv]->an_lock);
+		mutex_enter(&amap->am_anon[lcv]->an_lock);
 		amap->am_anon[lcv]->an_ref++;
-		simple_unlock(&amap->am_anon[lcv]->an_lock);
+		mutex_exit(&amap->am_anon[lcv]->an_lock);
 		amap->am_bckptr[lcv] = amap->am_nused;
 		amap->am_slots[amap->am_nused] = lcv;
 		amap->am_nused++;
@@ -947,7 +947,7 @@ ReStart:
 
 		slot = amap->am_slots[lcv];
 		anon = amap->am_anon[slot];
-		simple_lock(&anon->an_lock);
+		mutex_enter(&anon->an_lock);
 
 		/*
 		 * If the anon has only one ref, we must have already copied it.
@@ -958,7 +958,7 @@ ReStart:
 
 		if (anon->an_ref == 1) {
 			KASSERT(anon->an_page != NULL || anon->an_swslot != 0);
-			simple_unlock(&anon->an_lock);
+			mutex_exit(&anon->an_lock);
 			continue;
 		}
 
@@ -976,7 +976,7 @@ ReStart:
 		 */
 
 		if (pg->loan_count != 0) {
-			simple_unlock(&anon->an_lock);
+			mutex_exit(&anon->an_lock);
 			continue;
 		}
 		KASSERT(pg->uanon == anon && pg->uobject == NULL);
@@ -1011,10 +1011,10 @@ ReStart:
 
 			if (nanon) {
 				nanon->an_ref--;
-				simple_unlock(&nanon->an_lock);
+				mutex_exit(&nanon->an_lock);
 				uvm_anfree(nanon);
 			}
-			simple_unlock(&anon->an_lock);
+			mutex_exit(&anon->an_lock);
 			amap_unlock(amap);
 			uvm_wait("cownowpage");
 			goto ReStart;
@@ -1034,13 +1034,13 @@ ReStart:
 		 * locked the whole time it can't be PG_RELEASED or PG_WANTED.
 		 */
 
-		uvm_lock_pageq();
+		mutex_enter(&uvm_pageqlock);
 		uvm_pageactivate(npg);
-		uvm_unlock_pageq();
+		mutex_exit(&uvm_pageqlock);
 		npg->flags &= ~(PG_BUSY|PG_FAKE);
 		UVM_PAGE_OWN(npg, NULL);
-		simple_unlock(&nanon->an_lock);
-		simple_unlock(&anon->an_lock);
+		mutex_exit(&nanon->an_lock);
+		mutex_exit(&anon->an_lock);
 	}
 	amap_unlock(amap);
 }
@@ -1253,9 +1253,9 @@ amap_wiperange(struct vm_amap *amap, int slotoff, int slots)
 		 * drop anon reference count
 		 */
 
-		simple_lock(&anon->an_lock);
+		mutex_enter(&anon->an_lock);
 		refs = --anon->an_ref;
-		simple_unlock(&anon->an_lock);
+		mutex_exit(&anon->an_lock);
 		if (refs == 0) {
 
 			/*
@@ -1334,11 +1334,11 @@ amap_swap_off(int startslot, int endslot)
 
 			slot = am->am_slots[i];
 			anon = am->am_anon[slot];
-			simple_lock(&anon->an_lock);
+			mutex_enter(&anon->an_lock);
 
 			swslot = anon->an_swslot;
 			if (swslot < startslot || endslot <= swslot) {
-				simple_unlock(&anon->an_lock);
+				mutex_exit(&anon->an_lock);
 				continue;
 			}
 
diff --git a/sys/uvm/uvm_anon.c b/sys/uvm/uvm_anon.c
index 3b0f9c94bcaf..9d78bdd76c95 100644
--- a/sys/uvm/uvm_anon.c
+++ b/sys/uvm/uvm_anon.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_anon.c,v 1.49 2007/12/20 23:50:00 ad Exp $	*/
+/*	$NetBSD: uvm_anon.c,v 1.50 2008/01/02 11:49:15 ad Exp $	*/
 
 /*
  *
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_anon.c,v 1.49 2007/12/20 23:50:00 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_anon.c,v 1.50 2008/01/02 11:49:15 ad Exp $");
 
 #include "opt_uvmhist.h"
 
@@ -75,7 +75,7 @@ uvm_anon_ctor(void *arg, void *object, int flags)
 	struct vm_anon *anon = object;
 
 	anon->an_ref = 0;
-	simple_lock_init(&anon->an_lock);
+	mutex_init(&anon->an_lock, MUTEX_DEFAULT, IPL_NONE);
 	anon->an_page = NULL;
 #if defined(VMSWAP)
 	anon->an_swslot = 0;
@@ -87,8 +87,9 @@ uvm_anon_ctor(void *arg, void *object, int flags)
 static void
 uvm_anon_dtor(void *arg, void *object)
 {
+	struct vm_anon *anon = object;
 
-	/* nothing yet */
+	mutex_destroy(&anon->an_lock);
 }
 
 /*
@@ -104,13 +105,12 @@ uvm_analloc(void)
 	anon = pool_cache_get(&uvm_anon_cache, PR_NOWAIT);
 	if (anon) {
 		KASSERT(anon->an_ref == 0);
-		LOCK_ASSERT(simple_lock_held(&anon->an_lock) == 0);
 		KASSERT(anon->an_page == NULL);
 #if defined(VMSWAP)
 		KASSERT(anon->an_swslot == 0);
 #endif /* defined(VMSWAP) */
 		anon->an_ref = 1;
-		simple_lock(&anon->an_lock);
+		mutex_enter(&anon->an_lock);
 	}
 	return anon;
 }
@@ -132,6 +132,7 @@ uvm_anfree(struct vm_anon *anon)
 	UVMHIST_LOG(maphist,"(anon=0x%x)", anon, 0,0,0);
 
 	KASSERT(anon->an_ref == 0);
+	KASSERT(!mutex_owned(&anon->an_lock));
 
 	/*
 	 * get page
@@ -146,9 +147,9 @@ uvm_anfree(struct vm_anon *anon)
 	 */
 
 	if (pg && pg->loan_count) {
-		simple_lock(&anon->an_lock);
+		mutex_enter(&anon->an_lock);
 		pg = uvm_anon_lockloanpg(anon);
-		simple_unlock(&anon->an_lock);
+		mutex_exit(&anon->an_lock);
 	}
 
 	/*
@@ -164,12 +165,12 @@ uvm_anfree(struct vm_anon *anon)
 		 */
 
 		if (pg->uobject) {
-			uvm_lock_pageq();
+			mutex_enter(&uvm_pageqlock);
 			KASSERT(pg->loan_count > 0);
 			pg->loan_count--;
 			pg->uanon = NULL;
-			uvm_unlock_pageq();
-			simple_unlock(&pg->uobject->vmobjlock);
+			mutex_exit(&uvm_pageqlock);
+			mutex_exit(&pg->uobject->vmobjlock);
 		} else {
 
 			/*
@@ -177,7 +178,7 @@ uvm_anfree(struct vm_anon *anon)
 			 */
 
 			KASSERT((pg->flags & PG_RELEASED) == 0);
-			simple_lock(&anon->an_lock);
+			mutex_enter(&anon->an_lock);
 			pmap_page_protect(pg, VM_PROT_NONE);
 
 			/*
@@ -187,13 +188,13 @@ uvm_anfree(struct vm_anon *anon)
 
 			if (pg->flags & PG_BUSY) {
 				pg->flags |= PG_RELEASED;
-				simple_unlock(&anon->an_lock);
+				mutex_exit(&anon->an_lock);
 				return;
 			}
-			uvm_lock_pageq();
+			mutex_enter(&uvm_pageqlock);
 			uvm_pagefree(pg);
-			uvm_unlock_pageq();
-			simple_unlock(&anon->an_lock);
+			mutex_exit(&uvm_pageqlock);
+			mutex_exit(&anon->an_lock);
 			UVMHIST_LOG(maphist, "anon 0x%x, page 0x%x: "
 				    "freed now!", anon, pg, 0, 0);
 		}
@@ -280,7 +281,7 @@ uvm_anon_lockloanpg(struct vm_anon *anon)
 	struct vm_page *pg;
 	bool locked = false;
 
-	LOCK_ASSERT(simple_lock_held(&anon->an_lock));
+	KASSERT(mutex_owned(&anon->an_lock));
 
 	/*
 	 * loop while we have a resident page that has a non-zero loan count.
@@ -301,15 +302,15 @@ uvm_anon_lockloanpg(struct vm_anon *anon)
 		 */
 
 		if (pg->uobject) {
-			uvm_lock_pageq();
+			mutex_enter(&uvm_pageqlock);
 			if (pg->uobject) {
 				locked =
-				    simple_lock_try(&pg->uobject->vmobjlock);
+				    mutex_tryenter(&pg->uobject->vmobjlock);
 			} else {
 				/* object disowned before we got PQ lock */
 				locked = true;
 			}
-			uvm_unlock_pageq();
+			mutex_exit(&uvm_pageqlock);
 
 			/*
 			 * if we didn't get a lock (try lock failed), then we
@@ -317,14 +318,16 @@ uvm_anon_lockloanpg(struct vm_anon *anon)
 			 */
 
 			if (!locked) {
-				simple_unlock(&anon->an_lock);
+				mutex_exit(&anon->an_lock);
 
 				/*
 				 * someone locking the object has a chance to
 				 * lock us right now
 				 */
+				/* XXX Better than yielding but inadequate. */
+				kpause("livelock", false, 1, NULL);
 
-				simple_lock(&anon->an_lock);
+				mutex_enter(&anon->an_lock);
 				continue;
 			}
 		}
@@ -335,10 +338,10 @@ uvm_anon_lockloanpg(struct vm_anon *anon)
 		 */
 
 		if (pg->uobject == NULL && (pg->pqflags & PQ_ANON) == 0) {
-			uvm_lock_pageq();
+			mutex_enter(&uvm_pageqlock);
 			pg->pqflags |= PQ_ANON;
 			pg->loan_count--;
-			uvm_unlock_pageq();
+			mutex_exit(&uvm_pageqlock);
 		}
 		break;
 	}
@@ -362,7 +365,7 @@ uvm_anon_pagein(struct vm_anon *anon)
 	int rv;
 
 	/* locked: anon */
-	LOCK_ASSERT(simple_lock_held(&anon->an_lock));
+	KASSERT(mutex_owned(&anon->an_lock));
 
 	rv = uvmfault_anonget(NULL, NULL, anon);
 
@@ -407,10 +410,10 @@ uvm_anon_pagein(struct vm_anon *anon)
 	 */
 
 	pmap_clear_reference(pg);
-	uvm_lock_pageq();
+	mutex_enter(&uvm_pageqlock);
 	if (pg->wire_count == 0)
 		uvm_pagedeactivate(pg);
-	uvm_unlock_pageq();
+	mutex_exit(&uvm_pageqlock);
 
 	if (pg->flags & PG_WANTED) {
 		wakeup(pg);
@@ -421,9 +424,9 @@ uvm_anon_pagein(struct vm_anon *anon)
 	 * unlock the anon and we're done.
 	 */
 
-	simple_unlock(&anon->an_lock);
+	mutex_exit(&anon->an_lock);
 	if (uobj) {
-		simple_unlock(&uobj->vmobjlock);
+		mutex_exit(&uobj->vmobjlock);
 	}
 	return false;
 }
@@ -441,8 +444,7 @@ uvm_anon_release(struct vm_anon *anon)
 {
 	struct vm_page *pg = anon->an_page;
 
-	LOCK_ASSERT(simple_lock_held(&anon->an_lock));
-
+	KASSERT(mutex_owned(&anon->an_lock));
 	KASSERT(pg != NULL);
 	KASSERT((pg->flags & PG_RELEASED) != 0);
 	KASSERT((pg->flags & PG_BUSY) != 0);
@@ -451,10 +453,10 @@ uvm_anon_release(struct vm_anon *anon)
 	KASSERT(pg->loan_count == 0);
 	KASSERT(anon->an_ref == 0);
 
-	uvm_lock_pageq();
+	mutex_enter(&uvm_pageqlock);
 	uvm_pagefree(pg);
-	uvm_unlock_pageq();
-	simple_unlock(&anon->an_lock);
+	mutex_exit(&uvm_pageqlock);
+	mutex_exit(&anon->an_lock);
 
 	KASSERT(anon->an_page == NULL);
 
diff --git a/sys/uvm/uvm_anon.h b/sys/uvm/uvm_anon.h
index 26f85f7d6aca..0ba015e9dedb 100644
--- a/sys/uvm/uvm_anon.h
+++ b/sys/uvm/uvm_anon.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_anon.h,v 1.24 2007/02/21 23:00:12 thorpej Exp $	*/
+/*	$NetBSD: uvm_anon.h,v 1.25 2008/01/02 11:49:15 ad Exp $	*/
 
 /*
  *
@@ -53,7 +53,7 @@
 
 struct vm_anon {
 	int an_ref;			/* reference count [an_lock] */
-	struct simplelock an_lock;	/* lock for an_ref */
+	kmutex_t an_lock;		/* lock for an_ref */
 	struct vm_page *an_page;/* if in RAM [an_lock] */
 #if defined(VMSWAP) || 1 /* XXX libkvm */
 	int an_swslot;		/* drum swap slot # (if != 0)
diff --git a/sys/uvm/uvm_aobj.c b/sys/uvm/uvm_aobj.c
index 9c6ed771b295..078b0f012557 100644
--- a/sys/uvm/uvm_aobj.c
+++ b/sys/uvm/uvm_aobj.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_aobj.c,v 1.95 2007/12/01 10:40:27 yamt Exp $	*/
+/*	$NetBSD: uvm_aobj.c,v 1.96 2008/01/02 11:49:15 ad Exp $	*/
 
 /*
  * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
@@ -43,7 +43,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.95 2007/12/01 10:40:27 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.96 2008/01/02 11:49:15 ad Exp $");
 
 #include "opt_uvmhist.h"
 
@@ -398,11 +398,12 @@ uao_free(struct uvm_aobj *aobj)
 {
 	int swpgonlydelta = 0;
 
+
 #if defined(VMSWAP)
 	uao_dropswap_range1(aobj, 0, 0);
 #endif /* defined(VMSWAP) */
 
-	simple_unlock(&aobj->u_obj.vmobjlock);
+	mutex_exit(&aobj->u_obj.vmobjlock);
 
 #if defined(VMSWAP)
 	if (UAO_USES_SWHASH(aobj)) {
@@ -426,6 +427,7 @@ uao_free(struct uvm_aobj *aobj)
 	 * finally free the aobj itself
 	 */
 
+	UVM_OBJ_DESTROY(&aobj->u_obj);
 	pool_put(&uvm_aobj_pool, aobj);
 
 	/*
@@ -552,8 +554,7 @@ uao_init(void)
 		return;
 	uao_initialized = true;
 	LIST_INIT(&uao_list);
-	/* XXXSMP should be adaptive but vmobjlock needs to be too */
-	mutex_init(&uao_list_lock, MUTEX_SPIN, IPL_NONE);
+	mutex_init(&uao_list_lock, MUTEX_DEFAULT, IPL_NONE);
 }
 
 /*
@@ -566,9 +567,9 @@ uao_init(void)
 void
 uao_reference(struct uvm_object *uobj)
 {
-	simple_lock(&uobj->vmobjlock);
+	mutex_enter(&uobj->vmobjlock);
 	uao_reference_locked(uobj);
-	simple_unlock(&uobj->vmobjlock);
+	mutex_exit(&uobj->vmobjlock);
 }
 
 /*
@@ -607,7 +608,7 @@ uao_reference_locked(struct uvm_object *uobj)
 void
 uao_detach(struct uvm_object *uobj)
 {
-	simple_lock(&uobj->vmobjlock);
+	mutex_enter(&uobj->vmobjlock);
 	uao_detach_locked(uobj);
 }
 
@@ -632,14 +633,14 @@ uao_detach_locked(struct uvm_object *uobj)
  	 */
 
 	if (UVM_OBJ_IS_KERN_OBJECT(uobj)) {
-		simple_unlock(&uobj->vmobjlock);
+		mutex_exit(&uobj->vmobjlock);
 		return;
 	}
 
 	UVMHIST_LOG(maphist,"  (uobj=0x%x)  ref=%d", uobj,uobj->uo_refs,0,0);
 	uobj->uo_refs--;
 	if (uobj->uo_refs) {
-		simple_unlock(&uobj->vmobjlock);
+		mutex_exit(&uobj->vmobjlock);
 		UVMHIST_LOG(maphist, "<- done (rc>0)", 0,0,0,0);
 		return;
 	}
@@ -659,22 +660,22 @@ uao_detach_locked(struct uvm_object *uobj)
 	 * free the page itself.
  	 */
 
-	uvm_lock_pageq();
+	mutex_enter(&uvm_pageqlock);
 	while ((pg = TAILQ_FIRST(&uobj->memq)) != NULL) {
 		pmap_page_protect(pg, VM_PROT_NONE);
 		if (pg->flags & PG_BUSY) {
 			pg->flags |= PG_WANTED;
-			uvm_unlock_pageq();
+			mutex_exit(&uvm_pageqlock);
 			UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, false,
 			    "uao_det", 0);
-			simple_lock(&uobj->vmobjlock);
-			uvm_lock_pageq();
+			mutex_enter(&uobj->vmobjlock);
+			mutex_enter(&uvm_pageqlock);
 			continue;
 		}
 		uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
 		uvm_pagefree(pg);
 	}
-	uvm_unlock_pageq();
+	mutex_exit(&uvm_pageqlock);
 
 	/*
  	 * finally, free the aobj itself.
@@ -731,6 +732,8 @@ uao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
 	voff_t curoff;
 	UVMHIST_FUNC("uao_put"); UVMHIST_CALLED(maphist);
 
+	KASSERT(mutex_owned(&uobj->vmobjlock));
+
 	curoff = 0;
 	if (flags & PGO_ALLPAGES) {
 		start = 0;
@@ -761,7 +764,7 @@ uao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
 	 */
 
 	if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
-		simple_unlock(&uobj->vmobjlock);
+		mutex_exit(&uobj->vmobjlock);
 		return 0;
 	}
 
@@ -792,7 +795,7 @@ uao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
 		nextpg = NULL;	/* Quell compiler warning */
 	}
 
-	uvm_lock_pageq();
+	mutex_enter(&uvm_pageqlock);
 
 	/* locked: both page queues and uobj */
 	for (;;) {
@@ -855,11 +858,11 @@ uao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
 					TAILQ_INSERT_BEFORE(pg, &curmp, listq);
 				}
 				pg->flags |= PG_WANTED;
-				uvm_unlock_pageq();
+				mutex_exit(&uvm_pageqlock);
 				UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
 				    "uao_put", 0);
-				simple_lock(&uobj->vmobjlock);
-				uvm_lock_pageq();
+				mutex_enter(&uobj->vmobjlock);
+				mutex_enter(&uvm_pageqlock);
 				if (by_list) {
 					nextpg = TAILQ_NEXT(&curmp, listq);
 					TAILQ_REMOVE(&uobj->memq, &curmp,
@@ -880,14 +883,12 @@ uao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
 			continue;
 		}
 	}
-	uvm_unlock_pageq();
+	mutex_exit(&uvm_pageqlock);
 	if (by_list) {
 		TAILQ_REMOVE(&uobj->memq, &endmp, listq);
-	}
-	simple_unlock(&uobj->vmobjlock);
-	if (by_list) {
 		uvm_lwp_rele(curlwp);
 	}
+	mutex_exit(&uobj->vmobjlock);
 	return 0;
 }
 
@@ -1061,11 +1062,11 @@ gotpage:
 
 				/* out of RAM? */
 				if (ptmp == NULL) {
-					simple_unlock(&uobj->vmobjlock);
+					mutex_exit(&uobj->vmobjlock);
 					UVMHIST_LOG(pdhist,
 					    "sleeping, ptmp == NULL\n",0,0,0,0);
 					uvm_wait("uao_getpage");
-					simple_lock(&uobj->vmobjlock);
+					mutex_enter(&uobj->vmobjlock);
 					continue;
 				}
 
@@ -1092,7 +1093,7 @@ gotpage:
 				    ptmp->flags,0,0,0);
 				UVM_UNLOCK_AND_WAIT(ptmp, &uobj->vmobjlock,
 				    false, "uao_get", 0);
-				simple_lock(&uobj->vmobjlock);
+				mutex_enter(&uobj->vmobjlock);
 				continue;
 			}
 
@@ -1147,9 +1148,9 @@ gotpage:
 			 * unlock object for i/o, relock when done.
 			 */
 
-			simple_unlock(&uobj->vmobjlock);
+			mutex_exit(&uobj->vmobjlock);
 			error = uvm_swap_get(ptmp, swslot, PGO_SYNCIO);
-			simple_lock(&uobj->vmobjlock);
+			mutex_enter(&uobj->vmobjlock);
 
 			/*
 			 * I/O done.  check for errors.
@@ -1174,10 +1175,10 @@ gotpage:
 					uvm_swap_markbad(swslot, 1);
 				}
 
-				uvm_lock_pageq();
+				mutex_enter(&uvm_pageqlock);
 				uvm_pagefree(ptmp);
-				uvm_unlock_pageq();
-				simple_unlock(&uobj->vmobjlock);
+				mutex_exit(&uvm_pageqlock);
+				mutex_exit(&uobj->vmobjlock);
 				return error;
 			}
 #else /* defined(VMSWAP) */
@@ -1210,7 +1211,7 @@ gotpage:
  	 */
 
 done:
-	simple_unlock(&uobj->vmobjlock);
+	mutex_exit(&uobj->vmobjlock);
 	UVMHIST_LOG(pdhist, "<- done (OK)",0,0,0,0);
 	return 0;
 }
@@ -1263,8 +1264,10 @@ restart:
 		 * so this should be a rare case.
 		 */
 
-		if (!simple_lock_try(&aobj->u_obj.vmobjlock)) {
+		if (!mutex_tryenter(&aobj->u_obj.vmobjlock)) {
 			mutex_exit(&uao_list_lock);
+			/* XXX Better than yielding but inadequate. */
+			kpause("livelock", false, 1, NULL);
 			goto restart;
 		}
 
@@ -1412,7 +1415,7 @@ uao_pagein_page(struct uvm_aobj *aobj, int pageidx)
 	 * relock and finish up.
 	 */
 
-	simple_lock(&aobj->u_obj.vmobjlock);
+	mutex_enter(&aobj->u_obj.vmobjlock);
 	switch (rv) {
 	case 0:
 		break;
@@ -1441,10 +1444,10 @@ uao_pagein_page(struct uvm_aobj *aobj, int pageidx)
 	/*
 	 * make sure it's on a page queue.
 	 */
-	uvm_lock_pageq();
+	mutex_enter(&uvm_pageqlock);
 	if (pg->wire_count == 0)
 		uvm_pageenqueue(pg);
-	uvm_unlock_pageq();
+	mutex_exit(&uvm_pageqlock);
 
 	if (pg->flags & PG_WANTED) {
 		wakeup(pg);
@@ -1467,7 +1470,7 @@ uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end)
 {
 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
 
-	LOCK_ASSERT(simple_lock_held(&uobj->vmobjlock));
+	KASSERT(mutex_owned(&uobj->vmobjlock));
 
 	uao_dropswap_range1(aobj, start, end);
 }
diff --git a/sys/uvm/uvm_bio.c b/sys/uvm/uvm_bio.c
index ce96314f294e..7f99e25093cf 100644
--- a/sys/uvm/uvm_bio.c
+++ b/sys/uvm/uvm_bio.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_bio.c,v 1.63 2007/12/01 10:40:27 yamt Exp $	*/
+/*	$NetBSD: uvm_bio.c,v 1.64 2008/01/02 11:49:15 ad Exp $	*/
 
 /*
  * Copyright (c) 1998 Chuck Silvers.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_bio.c,v 1.63 2007/12/01 10:40:27 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_bio.c,v 1.64 2008/01/02 11:49:15 ad Exp $");
 
 #include "opt_uvmhist.h"
 #include "opt_ubc.h"
@@ -286,7 +286,7 @@ ubc_fault(struct uvm_faultinfo *ufi, vaddr_t ign1, struct vm_page **ign2,
 
 again:
 	memset(pgs, 0, sizeof (pgs));
-	simple_lock(&uobj->vmobjlock);
+	mutex_enter(&uobj->vmobjlock);
 
 	UVMHIST_LOG(ubchist, "slot_offset 0x%x writeoff 0x%x writelen 0x%x ",
 	    slot_offset, umap->writeoff, umap->writelen, 0);
@@ -336,16 +336,16 @@ again:
 		}
 
 		uobj = pg->uobject;
-		simple_lock(&uobj->vmobjlock);
+		mutex_enter(&uobj->vmobjlock);
 		if (pg->flags & PG_WANTED) {
 			wakeup(pg);
 		}
 		KASSERT((pg->flags & PG_FAKE) == 0);
 		if (pg->flags & PG_RELEASED) {
-			uvm_lock_pageq();
+			mutex_enter(&uvm_pageqlock);
 			uvm_pagefree(pg);
-			uvm_unlock_pageq();
-			simple_unlock(&uobj->vmobjlock);
+			mutex_exit(&uvm_pageqlock);
+			mutex_exit(&uobj->vmobjlock);
 			continue;
 		}
 		if (pg->loan_count != 0) {
@@ -363,7 +363,7 @@ again:
 				newpg = uvm_loanbreak(pg);
 				if (newpg == NULL) {
 					uvm_page_unbusy(&pg, 1);
-					simple_unlock(&uobj->vmobjlock);
+					mutex_exit(&uobj->vmobjlock);
 					uvm_wait("ubc_loanbrk");
 					continue; /* will re-fault */
 				}
@@ -386,12 +386,12 @@ again:
 		mask = rdonly ? ~VM_PROT_WRITE : VM_PROT_ALL;
 		error = pmap_enter(ufi->orig_map->pmap, va, VM_PAGE_TO_PHYS(pg),
 		    prot & mask, PMAP_CANFAIL | (access_type & mask));
-		uvm_lock_pageq();
+		mutex_enter(&uvm_pageqlock);
 		uvm_pageactivate(pg);
-		uvm_unlock_pageq();
+		mutex_exit(&uvm_pageqlock);
 		pg->flags &= ~(PG_BUSY|PG_WANTED);
 		UVM_PAGE_OWN(pg, NULL);
-		simple_unlock(&uobj->vmobjlock);
+		mutex_exit(&uobj->vmobjlock);
 		if (error) {
 			UVMHIST_LOG(ubchist, "pmap_enter fail %d",
 			    error, 0, 0, 0);
@@ -452,13 +452,13 @@ ubc_alloc(struct uvm_object *uobj, voff_t offset, vsize_t *lenp, int advice,
 	 */
 
 again:
-	simple_lock(&ubc_object.uobj.vmobjlock);
+	mutex_enter(&ubc_object.uobj.vmobjlock);
 	umap = ubc_find_mapping(uobj, umap_offset);
 	if (umap == NULL) {
 		UBC_EVCNT_INCR(wincachemiss);
 		umap = TAILQ_FIRST(UBC_QUEUE(offset));
 		if (umap == NULL) {
-			simple_unlock(&ubc_object.uobj.vmobjlock);
+			mutex_exit(&ubc_object.uobj.vmobjlock);
 			kpause("ubc_alloc", false, hz, NULL);
 			goto again;
 		}
@@ -501,7 +501,7 @@ again:
 
 	umap->refcount++;
 	umap->advice = advice;
-	simple_unlock(&ubc_object.uobj.vmobjlock);
+	mutex_exit(&ubc_object.uobj.vmobjlock);
 	UVMHIST_LOG(ubchist, "umap %p refs %d va %p flags 0x%x",
 	    umap, umap->refcount, va, flags);
 
@@ -522,7 +522,7 @@ again:
 		}
 again_faultbusy:
 		memset(pgs, 0, sizeof(pgs));
-		simple_lock(&uobj->vmobjlock);
+		mutex_enter(&uobj->vmobjlock);
 		error = (*uobj->pgops->pgo_get)(uobj, trunc_page(offset), pgs,
 		    &npages, 0, VM_PROT_READ | VM_PROT_WRITE, advice, gpflags);
 		UVMHIST_LOG(ubchist, "faultbusy getpages %d", error, 0, 0, 0);
@@ -534,17 +534,17 @@ again_faultbusy:
 
 			KASSERT(pg->uobject == uobj);
 			if (pg->loan_count != 0) {
-				simple_lock(&uobj->vmobjlock);
+				mutex_enter(&uobj->vmobjlock);
 				if (pg->loan_count != 0) {
 					pg = uvm_loanbreak(pg);
 				}
-				simple_unlock(&uobj->vmobjlock);
+				mutex_exit(&uobj->vmobjlock);
 				if (pg == NULL) {
 					pmap_kremove(va, ubc_winsize);
 					pmap_update(pmap_kernel());
-					simple_lock(&uobj->vmobjlock);
+					mutex_enter(&uobj->vmobjlock);
 					uvm_page_unbusy(pgs, npages);
-					simple_unlock(&uobj->vmobjlock);
+					mutex_exit(&uobj->vmobjlock);
 					uvm_wait("ubc_alloc");
 					goto again_faultbusy;
 				}
@@ -598,7 +598,7 @@ ubc_release(void *va, int flags)
 			memset((char *)umapva + endoff, 0, zerolen);
 		}
 		umap->flags &= ~UMAP_PAGES_LOCKED;
-		uvm_lock_pageq();
+		mutex_enter(&uvm_pageqlock);
 		for (i = 0; i < npages; i++) {
 			rv = pmap_extract(pmap_kernel(),
 			    umapva + slot_offset + (i << PAGE_SHIFT), &pa);
@@ -608,18 +608,18 @@ ubc_release(void *va, int flags)
 			KASSERT(pgs[i]->loan_count == 0);
 			uvm_pageactivate(pgs[i]);
 		}
-		uvm_unlock_pageq();
+		mutex_exit(&uvm_pageqlock);
 		pmap_kremove(umapva, ubc_winsize);
 		pmap_update(pmap_kernel());
-		simple_lock(&uobj->vmobjlock);
+		mutex_enter(&uobj->vmobjlock);
 		uvm_page_unbusy(pgs, npages);
-		simple_unlock(&uobj->vmobjlock);
+		mutex_exit(&uobj->vmobjlock);
 		unmapped = true;
 	} else {
 		unmapped = false;
 	}
 
-	simple_lock(&ubc_object.uobj.vmobjlock);
+	mutex_enter(&ubc_object.uobj.vmobjlock);
 	umap->writeoff = 0;
 	umap->writelen = 0;
 	umap->refcount--;
@@ -649,7 +649,7 @@ ubc_release(void *va, int flags)
 		}
 	}
 	UVMHIST_LOG(ubchist, "umap %p refs %d", umap, umap->refcount, 0, 0);
-	simple_unlock(&ubc_object.uobj.vmobjlock);
+	mutex_exit(&ubc_object.uobj.vmobjlock);
 }
 
 /*
@@ -684,6 +684,7 @@ ubc_uiomove(struct uvm_object *uobj, struct uio *uio, vsize_t todo, int advice,
 			 * do it now.  it's safe to use memset here
 			 * because we just mapped the pages above.
 			 */
+			printf("%s: error=%d\n", __func__, error);
 			memset(win, 0, bytelen);
 		}
 		ubc_release(win, flags);
@@ -712,7 +713,7 @@ ubc_flush(struct uvm_object *uobj, voff_t start, voff_t end)
 	UVMHIST_LOG(ubchist, "uobj %p start 0x%lx end 0x%lx",
 		    uobj, start, end, 0);
 
-	simple_lock(&ubc_object.uobj.vmobjlock);
+	mutex_enter(&ubc_object.uobj.vmobjlock);
 	for (umap = ubc_object.umap;
 	     umap < &ubc_object.umap[ubc_nwins];
 	     umap++) {
@@ -738,6 +739,6 @@ ubc_flush(struct uvm_object *uobj, voff_t start, voff_t end)
 		TAILQ_INSERT_HEAD(UBC_QUEUE(umap->offset), umap, inactive);
 	}
 	pmap_update(pmap_kernel());
-	simple_unlock(&ubc_object.uobj.vmobjlock);
+	mutex_exit(&ubc_object.uobj.vmobjlock);
 }
 #endif /* notused */
diff --git a/sys/uvm/uvm_device.c b/sys/uvm/uvm_device.c
index 102786711a7c..0af531c94645 100644
--- a/sys/uvm/uvm_device.c
+++ b/sys/uvm/uvm_device.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_device.c,v 1.52 2007/12/08 15:33:09 ad Exp $	*/
+/*	$NetBSD: uvm_device.c,v 1.53 2008/01/02 11:49:16 ad Exp $	*/
 
 /*
  *
@@ -39,7 +39,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_device.c,v 1.52 2007/12/08 15:33:09 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_device.c,v 1.53 2008/01/02 11:49:16 ad Exp $");
 
 #include "opt_uvmhist.h"
 
@@ -61,7 +61,7 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_device.c,v 1.52 2007/12/08 15:33:09 ad Exp $");
 
 LIST_HEAD(udv_list_struct, uvm_device);
 static struct udv_list_struct udv_list;
-static struct simplelock udv_lock;
+static kmutex_t udv_lock;
 
 /*
  * functions
@@ -99,7 +99,7 @@ static void
 udv_init(void)
 {
 	LIST_INIT(&udv_list);
-	simple_lock_init(&udv_lock);
+	mutex_init(&udv_lock, MUTEX_DEFAULT, IPL_NONE);
 }
 
 /*
@@ -171,7 +171,7 @@ udv_attach(void *arg, vm_prot_t accessprot,
 		 * first, attempt to find it on the main list
 		 */
 
-		simple_lock(&udv_lock);
+		mutex_enter(&udv_lock);
 		LIST_FOREACH(lcv, &udv_list, u_list) {
 			if (device == lcv->u_device)
 				break;
@@ -197,21 +197,21 @@ udv_attach(void *arg, vm_prot_t accessprot,
 
 			/* we are now holding it */
 			lcv->u_flags |= UVM_DEVICE_HOLD;
-			simple_unlock(&udv_lock);
+			mutex_exit(&udv_lock);
 
 			/*
 			 * bump reference count, unhold, return.
 			 */
 
-			simple_lock(&lcv->u_obj.vmobjlock);
+			mutex_enter(&lcv->u_obj.vmobjlock);
 			lcv->u_obj.uo_refs++;
-			simple_unlock(&lcv->u_obj.vmobjlock);
+			mutex_exit(&lcv->u_obj.vmobjlock);
 
-			simple_lock(&udv_lock);
+			mutex_enter(&udv_lock);
 			if (lcv->u_flags & UVM_DEVICE_WANTED)
 				wakeup(lcv);
 			lcv->u_flags &= ~(UVM_DEVICE_WANTED|UVM_DEVICE_HOLD);
-			simple_unlock(&udv_lock);
+			mutex_exit(&udv_lock);
 			return(&lcv->u_obj);
 		}
 
@@ -219,11 +219,11 @@ udv_attach(void *arg, vm_prot_t accessprot,
 		 * did not find it on main list.   need to malloc a new one.
 		 */
 
-		simple_unlock(&udv_lock);
+		mutex_exit(&udv_lock);
 		/* NOTE: we could sleep in the following malloc() */
 		MALLOC(udv, struct uvm_device *, sizeof(*udv), M_TEMP,
 		       M_WAITOK);
-		simple_lock(&udv_lock);
+		mutex_enter(&udv_lock);
 
 		/*
 		 * now we have to double check to make sure no one added it
@@ -241,7 +241,7 @@ udv_attach(void *arg, vm_prot_t accessprot,
 		 */
 
 		if (lcv) {
-			simple_unlock(&udv_lock);
+			mutex_exit(&udv_lock);
 			FREE(udv, M_TEMP);
 			continue;
 		}
@@ -255,7 +255,7 @@ udv_attach(void *arg, vm_prot_t accessprot,
 		udv->u_flags = 0;
 		udv->u_device = device;
 		LIST_INSERT_HEAD(&udv_list, udv, u_list);
-		simple_unlock(&udv_lock);
+		mutex_exit(&udv_lock);
 		return(&udv->u_obj);
 	}
 	/*NOTREACHED*/
@@ -276,11 +276,11 @@ udv_reference(struct uvm_object *uobj)
 {
 	UVMHIST_FUNC("udv_reference"); UVMHIST_CALLED(maphist);
 
-	simple_lock(&uobj->vmobjlock);
+	mutex_enter(&uobj->vmobjlock);
 	uobj->uo_refs++;
 	UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)",
 		    uobj, uobj->uo_refs,0,0);
-	simple_unlock(&uobj->vmobjlock);
+	mutex_exit(&uobj->vmobjlock);
 }
 
 /*
@@ -301,10 +301,10 @@ udv_detach(struct uvm_object *uobj)
 	 * loop until done
 	 */
 again:
-	simple_lock(&uobj->vmobjlock);
+	mutex_enter(&uobj->vmobjlock);
 	if (uobj->uo_refs > 1) {
 		uobj->uo_refs--;
-		simple_unlock(&uobj->vmobjlock);
+		mutex_exit(&uobj->vmobjlock);
 		UVMHIST_LOG(maphist," <- done, uobj=0x%x, ref=%d",
 			  uobj,uobj->uo_refs,0,0);
 		return;
@@ -314,10 +314,10 @@ again:
 	 * is it being held?   if so, wait until others are done.
 	 */
 
-	simple_lock(&udv_lock);
+	mutex_enter(&udv_lock);
 	if (udv->u_flags & UVM_DEVICE_HOLD) {
 		udv->u_flags |= UVM_DEVICE_WANTED;
-		simple_unlock(&uobj->vmobjlock);
+		mutex_exit(&uobj->vmobjlock);
 		UVM_UNLOCK_AND_WAIT(udv, &udv_lock, false, "udv_detach",0);
 		goto again;
 	}
@@ -329,8 +329,9 @@ again:
 	LIST_REMOVE(udv, u_list);
 	if (udv->u_flags & UVM_DEVICE_WANTED)
 		wakeup(udv);
-	simple_unlock(&udv_lock);
-	simple_unlock(&uobj->vmobjlock);
+	mutex_exit(&udv_lock);
+	mutex_exit(&uobj->vmobjlock);
+	UVM_OBJ_DESTROY(uobj);
 	FREE(udv, M_TEMP);
 	UVMHIST_LOG(maphist," <- done, freed uobj=0x%x", uobj,0,0,0);
 }
diff --git a/sys/uvm/uvm_extern.h b/sys/uvm/uvm_extern.h
index 6f061f3c344a..ac6804b76f98 100644
--- a/sys/uvm/uvm_extern.h
+++ b/sys/uvm/uvm_extern.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_extern.h,v 1.142 2007/12/26 22:11:53 christos Exp $	*/
+/*	$NetBSD: uvm_extern.h,v 1.143 2008/01/02 11:49:16 ad Exp $	*/
 
 /*
  *
@@ -696,6 +696,8 @@ void			uvm_aio_aiodone(struct buf *);
 void			uvm_pageout(void *);
 struct work;
 void			uvm_aiodone_worker(struct work *, void *);
+void			uvm_pageout_start(int);
+void			uvm_pageout_done(int);
 void			uvm_estimatepageable(int *, int *);
 
 /* uvm_pglist.c */
diff --git a/sys/uvm/uvm_fault.c b/sys/uvm/uvm_fault.c
index 35492408cea1..a9ac94fef028 100644
--- a/sys/uvm/uvm_fault.c
+++ b/sys/uvm/uvm_fault.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_fault.c,v 1.121 2007/10/11 19:53:43 ad Exp $	*/
+/*	$NetBSD: uvm_fault.c,v 1.122 2008/01/02 11:49:16 ad Exp $	*/
 
 /*
  *
@@ -39,7 +39,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.121 2007/10/11 19:53:43 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.122 2008/01/02 11:49:16 ad Exp $");
 
 #include "opt_uvmhist.h"
 
@@ -198,17 +198,17 @@ uvmfault_anonflush(struct vm_anon **anons, int n)
 	for (lcv = 0 ; lcv < n ; lcv++) {
 		if (anons[lcv] == NULL)
 			continue;
-		simple_lock(&anons[lcv]->an_lock);
+		mutex_enter(&anons[lcv]->an_lock);
 		pg = anons[lcv]->an_page;
 		if (pg && (pg->flags & PG_BUSY) == 0) {
-			uvm_lock_pageq();
+			mutex_enter(&uvm_pageqlock);
 			if (pg->wire_count == 0) {
 				pmap_clear_reference(pg);
 				uvm_pagedeactivate(pg);
 			}
-			uvm_unlock_pageq();
+			mutex_exit(&uvm_pageqlock);
 		}
-		simple_unlock(&anons[lcv]->an_lock);
+		mutex_exit(&anons[lcv]->an_lock);
 	}
 }
 
@@ -290,7 +290,7 @@ uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap,
 	int error;
 	UVMHIST_FUNC("uvmfault_anonget"); UVMHIST_CALLED(maphist);
 
-	LOCK_ASSERT(simple_lock_held(&anon->an_lock));
+	KASSERT(mutex_owned(&anon->an_lock));
 
 	error = 0;
 	uvmexp.fltanget++;
@@ -409,7 +409,7 @@ uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap,
 			amap_lock(amap);
 		}
 		if (locked || we_own)
-			simple_lock(&anon->an_lock);
+			mutex_enter(&anon->an_lock);
 
 		/*
 		 * if we own the page (i.e. we set PG_BUSY), then we need
@@ -449,15 +449,15 @@ uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap,
 				 * pmap_page_protect it...
 				 */
 
-				uvm_lock_pageq();
+				mutex_enter(&uvm_pageqlock);
 				uvm_pagefree(pg);
-				uvm_unlock_pageq();
+				mutex_exit(&uvm_pageqlock);
 
 				if (locked)
 					uvmfault_unlockall(ufi, amap, NULL,
 					    anon);
 				else
-					simple_unlock(&anon->an_lock);
+					mutex_exit(&anon->an_lock);
 				UVMHIST_LOG(maphist, "<- ERROR", 0,0,0,0);
 				return error;
 			}
@@ -490,13 +490,13 @@ released:
 			 * we've successfully read the page, activate it.
 			 */
 
-			uvm_lock_pageq();
+			mutex_enter(&uvm_pageqlock);
 			uvm_pageactivate(pg);
-			uvm_unlock_pageq();
+			mutex_exit(&uvm_pageqlock);
 			pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
 			UVM_PAGE_OWN(pg, NULL);
 			if (!locked)
-				simple_unlock(&anon->an_lock);
+				mutex_exit(&anon->an_lock);
 #else /* defined(VMSWAP) */
 			panic("%s: we_own", __func__);
 #endif /* defined(VMSWAP) */
@@ -583,13 +583,16 @@ uvmfault_promote(struct uvm_faultinfo *ufi,
 	KASSERT(uobjpage != NULL);
 	KASSERT(uobjpage == PGO_DONTCARE || (uobjpage->flags & PG_BUSY) != 0);
 	KASSERT(mutex_owned(&amap->am_l));
-	LOCK_ASSERT(oanon == NULL || simple_lock_held(&oanon->an_lock));
-	LOCK_ASSERT(uobj == NULL || simple_lock_held(&uobj->vmobjlock));
+	KASSERT(oanon == NULL || mutex_owned(&oanon->an_lock));
+	KASSERT(uobj == NULL || mutex_owned(&uobj->vmobjlock));
+#if 0
+	KASSERT(*spare == NULL || !mutex_owned(&(*spare)->an_lock));
+#endif
 
 	if (*spare != NULL) {
 		anon = *spare;
 		*spare = NULL;
-		simple_lock(&anon->an_lock);
+		mutex_enter(&anon->an_lock);
 	} else if (ufi->map != kernel_map) {
 		anon = uvm_analloc();
 	} else {
@@ -606,7 +609,7 @@ uvmfault_promote(struct uvm_faultinfo *ufi,
 		if (*spare == NULL) {
 			goto nomem;
 		}
-		simple_unlock(&(*spare)->an_lock);
+		mutex_exit(&(*spare)->an_lock);
 		error = ERESTART;
 		goto done;
 	}
@@ -632,7 +635,7 @@ uvmfault_promote(struct uvm_faultinfo *ufi,
 	if (pg == NULL) {
 		/* save anon for the next try. */
 		if (anon != NULL) {
-			simple_unlock(&anon->an_lock);
+			mutex_exit(&anon->an_lock);
 			*spare = anon;
 		}
 
@@ -911,7 +914,7 @@ ReFault:
 		/* flush object? */
 		if (uobj) {
 			uoff = (startva - ufi.entry->start) + ufi.entry->offset;
-			simple_lock(&uobj->vmobjlock);
+			mutex_enter(&uobj->vmobjlock);
 			(void) (uobj->pgops->pgo_put)(uobj, uoff, uoff +
 				    (nback << PAGE_SHIFT), PGO_DEACTIVATE);
 		}
@@ -965,13 +968,13 @@ ReFault:
 			continue;
 		}
 		anon = anons[lcv];
-		simple_lock(&anon->an_lock);
+		mutex_enter(&anon->an_lock);
 		/* ignore loaned pages */
 		if (anon->an_page && anon->an_page->loan_count == 0 &&
 		    (anon->an_page->flags & PG_BUSY) == 0) {
-			uvm_lock_pageq();
+			mutex_enter(&uvm_pageqlock);
 			uvm_pageenqueue(anon->an_page);
-			uvm_unlock_pageq();
+			mutex_exit(&uvm_pageqlock);
 			UVMHIST_LOG(maphist,
 			    "  MAPPING: n anon: pm=0x%x, va=0x%x, pg=0x%x",
 			    ufi.orig_map->pmap, currva, anon->an_page, 0);
@@ -990,7 +993,7 @@ ReFault:
 			    PMAP_CANFAIL |
 			     (VM_MAPENT_ISWIRED(ufi.entry) ? PMAP_WIRED : 0));
 		}
-		simple_unlock(&anon->an_lock);
+		mutex_exit(&anon->an_lock);
 		pmap_update(ufi.orig_map->pmap);
 	}
 
@@ -1017,8 +1020,7 @@ ReFault:
 	 */
 
 	if (uobj && shadowed == false && uobj->pgops->pgo_fault != NULL) {
-		simple_lock(&uobj->vmobjlock);
-
+		mutex_enter(&uobj->vmobjlock);
 		/* locked: maps(read), amap (if there), uobj */
 		error = uobj->pgops->pgo_fault(&ufi, startva, pages, npages,
 		    centeridx, access_type, PGO_LOCKED|PGO_SYNCIO);
@@ -1042,8 +1044,7 @@ ReFault:
 	 */
 
 	if (uobj && shadowed == false) {
-		simple_lock(&uobj->vmobjlock);
-
+		mutex_enter(&uobj->vmobjlock);
 		/* locked (!shadowed): maps(read), amap (if there), uobj */
 		/*
 		 * the following call to pgo_get does _not_ change locking state
@@ -1099,9 +1100,9 @@ ReFault:
 				 * we can just directly enter the pages.
 				 */
 
-				uvm_lock_pageq();
+				mutex_enter(&uvm_pageqlock);
 				uvm_pageenqueue(curpg);
-				uvm_unlock_pageq();
+				mutex_exit(&uvm_pageqlock);
 				UVMHIST_LOG(maphist,
 				  "  MAPPING: n obj: pm=0x%x, va=0x%x, pg=0x%x",
 				  ufi.orig_map->pmap, currva, curpg, 0);
@@ -1153,7 +1154,7 @@ ReFault:
 		KASSERT(mutex_owned(&amap->am_l));
 	} else {
 		KASSERT(amap == NULL || mutex_owned(&amap->am_l));
-		LOCK_ASSERT(uobj == NULL || simple_lock_held(&uobj->vmobjlock));
+		KASSERT(uobj == NULL || mutex_owned(&uobj->vmobjlock));
 		KASSERT(uobjpage == NULL || (uobjpage->flags & PG_BUSY) != 0);
 	}
 
@@ -1187,11 +1188,11 @@ ReFault:
 
 	anon = anons[centeridx];
 	UVMHIST_LOG(maphist, "  case 1 fault: anon=0x%x", anon, 0,0,0);
-	simple_lock(&anon->an_lock);
+	mutex_enter(&anon->an_lock);
 
 	/* locked: maps(read), amap, anon */
 	KASSERT(mutex_owned(&amap->am_l));
-	LOCK_ASSERT(simple_lock_held(&anon->an_lock));
+	KASSERT(mutex_owned(&anon->an_lock));
 
 	/*
 	 * no matter if we have case 1A or case 1B we are going to need to
@@ -1231,8 +1232,8 @@ ReFault:
 
 	/* locked: maps(read), amap, anon, uobj(if one) */
 	KASSERT(mutex_owned(&amap->am_l));
-	LOCK_ASSERT(simple_lock_held(&anon->an_lock));
-	LOCK_ASSERT(uobj == NULL || simple_lock_held(&uobj->vmobjlock));
+	KASSERT(mutex_owned(&anon->an_lock));
+	KASSERT(uobj == NULL || mutex_owned(&uobj->vmobjlock));
 
 	/*
 	 * special handling for loaned pages
@@ -1284,7 +1285,7 @@ ReFault:
 
 				/* force reload */
 				pmap_page_protect(anon->an_page, VM_PROT_NONE);
-				uvm_lock_pageq();	  /* KILL loan */
+				mutex_enter(&uvm_pageqlock);	  /* KILL loan */
 
 				anon->an_page->uanon = NULL;
 				/* in case we owned */
@@ -1302,7 +1303,7 @@ ReFault:
 				}
 
 				if (uobj) {
-					simple_unlock(&uobj->vmobjlock);
+					mutex_exit(&uobj->vmobjlock);
 					uobj = NULL;
 				}
 
@@ -1312,7 +1313,7 @@ ReFault:
 				pg->pqflags |= PQ_ANON;
 
 				uvm_pageactivate(pg);
-				uvm_unlock_pageq();
+				mutex_exit(&uvm_pageqlock);
 
 				pg->flags &= ~(PG_BUSY|PG_FAKE);
 				UVM_PAGE_OWN(pg, NULL);
@@ -1353,9 +1354,9 @@ ReFault:
 		}
 
 		pg = anon->an_page;
-		uvm_lock_pageq();
+		mutex_enter(&uvm_pageqlock);
 		uvm_pageactivate(pg);
-		uvm_unlock_pageq();
+		mutex_exit(&uvm_pageqlock);
 		pg->flags &= ~(PG_BUSY|PG_FAKE);
 		UVM_PAGE_OWN(pg, NULL);
 
@@ -1380,8 +1381,8 @@ ReFault:
 
 	/* locked: maps(read), amap, oanon, anon (if different from oanon) */
 	KASSERT(mutex_owned(&amap->am_l));
-	LOCK_ASSERT(simple_lock_held(&anon->an_lock));
-	LOCK_ASSERT(simple_lock_held(&oanon->an_lock));
+	KASSERT(mutex_owned(&anon->an_lock));
+	KASSERT(mutex_owned(&oanon->an_lock));
 
 	/*
 	 * now map the page in.
@@ -1402,7 +1403,7 @@ ReFault:
 		 */
 
 		if (anon != oanon)
-			simple_unlock(&anon->an_lock);
+			mutex_exit(&anon->an_lock);
 		uvmfault_unlockall(&ufi, amap, uobj, oanon);
 		if (!uvm_reclaimable()) {
 			UVMHIST_LOG(maphist,
@@ -1420,7 +1421,7 @@ ReFault:
 	 * ... update the page queues.
 	 */
 
-	uvm_lock_pageq();
+	mutex_enter(&uvm_pageqlock);
 	if (wire_fault) {
 		uvm_pagewire(pg);
 
@@ -1436,14 +1437,14 @@ ReFault:
 	} else {
 		uvm_pageactivate(pg);
 	}
-	uvm_unlock_pageq();
+	mutex_exit(&uvm_pageqlock);
 
 	/*
 	 * done case 1!  finish up by unlocking everything and returning success
 	 */
 
 	if (anon != oanon)
-		simple_unlock(&anon->an_lock);
+		mutex_exit(&anon->an_lock);
 	uvmfault_unlockall(&ufi, amap, uobj, oanon);
 	pmap_update(ufi.orig_map->pmap);
 	error = 0;
@@ -1459,7 +1460,7 @@ Case2:
 	 * maps(read), amap(if there), uobj(if !null), uobjpage(if !null)
 	 */
 	KASSERT(amap == NULL || mutex_owned(&amap->am_l));
-	LOCK_ASSERT(uobj == NULL || simple_lock_held(&uobj->vmobjlock));
+	KASSERT(uobj == NULL || mutex_owned(&uobj->vmobjlock));
 	KASSERT(uobjpage == NULL || (uobjpage->flags & PG_BUSY) != 0);
 
 	/*
@@ -1527,9 +1528,9 @@ Case2:
 
 		/* locked: uobjpage */
 
-		uvm_lock_pageq();
+		mutex_enter(&uvm_pageqlock);
 		uvm_pageactivate(uobjpage);
-		uvm_unlock_pageq();
+		mutex_exit(&uvm_pageqlock);
 
 		/*
 		 * re-verify the state of the world by first trying to relock
@@ -1540,7 +1541,7 @@ Case2:
 		if (locked && amap)
 			amap_lock(amap);
 		uobj = uobjpage->uobject;
-		simple_lock(&uobj->vmobjlock);
+		mutex_enter(&uobj->vmobjlock);
 
 		/* locked(locked): maps(read), amap(if !null), uobj, uobjpage */
 		/* locked(!locked): uobj, uobjpage */
@@ -1577,7 +1578,7 @@ Case2:
 			}
 			uobjpage->flags &= ~(PG_BUSY|PG_WANTED);
 			UVM_PAGE_OWN(uobjpage, NULL);
-			simple_unlock(&uobj->vmobjlock);
+			mutex_exit(&uobj->vmobjlock);
 			goto ReFault;
 		}
 
@@ -1595,7 +1596,7 @@ Case2:
 	 * maps(read), amap(if !null), uobj(if !null), uobjpage(if uobj)
 	 */
 	KASSERT(amap == NULL || mutex_owned(&amap->am_l));
-	LOCK_ASSERT(uobj == NULL || simple_lock_held(&uobj->vmobjlock));
+	KASSERT(uobj == NULL || mutex_owned(&uobj->vmobjlock));
 	KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0);
 
 	/*
@@ -1722,7 +1723,7 @@ Case2:
 				wakeup(uobjpage);
 			uobjpage->flags &= ~(PG_BUSY|PG_WANTED);
 			UVM_PAGE_OWN(uobjpage, NULL);
-			simple_unlock(&uobj->vmobjlock);
+			mutex_exit(&uobj->vmobjlock);
 			uobj = NULL;
 
 			UVMHIST_LOG(maphist,
@@ -1750,9 +1751,9 @@ Case2:
 	 * note: pg is either the uobjpage or the new page in the new anon
 	 */
 	KASSERT(amap == NULL || mutex_owned(&amap->am_l));
-	LOCK_ASSERT(uobj == NULL || simple_lock_held(&uobj->vmobjlock));
+	KASSERT(uobj == NULL || mutex_owned(&uobj->vmobjlock));
 	KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0);
-	LOCK_ASSERT(anon == NULL || simple_lock_held(&anon->an_lock));
+	KASSERT(anon == NULL || mutex_owned(&anon->an_lock));
 	KASSERT((pg->flags & PG_BUSY) != 0);
 
 	/*
@@ -1801,7 +1802,7 @@ Case2:
 		goto ReFault;
 	}
 
-	uvm_lock_pageq();
+	mutex_enter(&uvm_pageqlock);
 	if (wire_fault) {
 		uvm_pagewire(pg);
 		if (pg->pqflags & PQ_AOBJ) {
@@ -1820,7 +1821,7 @@ Case2:
 	} else {
 		uvm_pageactivate(pg);
 	}
-	uvm_unlock_pageq();
+	mutex_exit(&uvm_pageqlock);
 	if (pg->flags & PG_WANTED)
 		wakeup(pg);
 
@@ -1924,7 +1925,7 @@ uvm_fault_unwire_locked(struct vm_map *map, vaddr_t start, vaddr_t end)
 	 * we can call uvm_pageunwire.
 	 */
 
-	uvm_lock_pageq();
+	mutex_enter(&uvm_pageqlock);
 
 	/*
 	 * find the beginning map entry for the region.
@@ -1961,5 +1962,5 @@ uvm_fault_unwire_locked(struct vm_map *map, vaddr_t start, vaddr_t end)
 			uvm_pageunwire(pg);
 	}
 
-	uvm_unlock_pageq();
+	mutex_exit(&uvm_pageqlock);
 }
diff --git a/sys/uvm/uvm_fault_i.h b/sys/uvm/uvm_fault_i.h
index d31200ca208c..5102c97217d4 100644
--- a/sys/uvm/uvm_fault_i.h
+++ b/sys/uvm/uvm_fault_i.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_fault_i.h,v 1.23 2007/02/22 06:05:01 thorpej Exp $	*/
+/*	$NetBSD: uvm_fault_i.h,v 1.24 2008/01/02 11:49:16 ad Exp $	*/
 
 /*
  *
@@ -76,9 +76,9 @@ uvmfault_unlockall(struct uvm_faultinfo *ufi, struct vm_amap *amap,
 {
 
 	if (anon)
-		simple_unlock(&anon->an_lock);
+		mutex_exit(&anon->an_lock);
 	if (uobj)
-		simple_unlock(&uobj->vmobjlock);
+		mutex_exit(&uobj->vmobjlock);
 	if (amap)
 		amap_unlock(amap);
 	uvmfault_unlockmaps(ufi, false);
diff --git a/sys/uvm/uvm_glue.c b/sys/uvm/uvm_glue.c
index 8410634ec50c..f21219085cfc 100644
--- a/sys/uvm/uvm_glue.c
+++ b/sys/uvm/uvm_glue.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_glue.c,v 1.113 2007/11/06 00:42:46 ad Exp $	*/
+/*	$NetBSD: uvm_glue.c,v 1.114 2008/01/02 11:49:16 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -67,7 +67,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_glue.c,v 1.113 2007/11/06 00:42:46 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_glue.c,v 1.114 2008/01/02 11:49:16 ad Exp $");
 
 #include "opt_coredump.h"
 #include "opt_kgdb.h"
@@ -86,6 +86,7 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_glue.c,v 1.113 2007/11/06 00:42:46 ad Exp $");
 #include <sys/user.h>
 #include <sys/syncobj.h>
 #include <sys/cpu.h>
+#include <sys/atomic.h>
 
 #include <uvm/uvm.h>
 
@@ -809,10 +810,15 @@ void
 uvm_lwp_hold(struct lwp *l)
 {
 
-	/* XXXSMP mutex_enter(&l->l_swaplock); */
-	if (l->l_holdcnt++ == 0 && (l->l_flag & LW_INMEM) == 0)
-		uvm_swapin(l);
-	/* XXXSMP mutex_exit(&l->l_swaplock); */
+	if (l == curlwp) {
+		atomic_inc_uint(&l->l_holdcnt);
+	} else {
+		mutex_enter(&l->l_swaplock);
+		if (atomic_inc_uint_nv(&l->l_holdcnt) == 1 &&
+		    (l->l_flag & LW_INMEM) == 0)
+			uvm_swapin(l);
+		mutex_exit(&l->l_swaplock);
+	}
 }
 
 /*
@@ -826,9 +832,7 @@ uvm_lwp_rele(struct lwp *l)
 
 	KASSERT(l->l_holdcnt != 0);
 
-	/* XXXSMP mutex_enter(&l->l_swaplock); */
-	l->l_holdcnt--;
-	/* XXXSMP mutex_exit(&l->l_swaplock); */
+	atomic_dec_uint(&l->l_holdcnt);
 }
 
 #ifdef COREDUMP
diff --git a/sys/uvm/uvm_init.c b/sys/uvm/uvm_init.c
index 7e5d328d09b1..d0f0e9c257c5 100644
--- a/sys/uvm/uvm_init.c
+++ b/sys/uvm/uvm_init.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_init.c,v 1.30 2007/11/14 11:04:08 yamt Exp $	*/
+/*	$NetBSD: uvm_init.c,v 1.31 2008/01/02 11:49:17 ad Exp $	*/
 
 /*
  *
@@ -39,7 +39,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_init.c,v 1.30 2007/11/14 11:04:08 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_init.c,v 1.31 2008/01/02 11:49:17 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -54,6 +54,7 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_init.c,v 1.30 2007/11/14 11:04:08 yamt Exp $");
 
 #include <uvm/uvm.h>
 #include <uvm/uvm_pdpolicy.h>
+#include <uvm/uvm_readahead.h>
 
 /*
  * struct uvm: we store most global vars in this structure to make them
@@ -64,8 +65,8 @@ struct uvm uvm;		/* decl */
 struct uvmexp uvmexp;	/* decl */
 struct uvm_object *uvm_kernel_object;
 
+kmutex_t uvm_pageqlock;
 kmutex_t uvm_fpageqlock;
-kmutex_t uvm_pagedaemon_lock;
 kmutex_t uvm_kentry_lock;
 kmutex_t uvm_swap_data_lock;
 kmutex_t uvm_scheduler_mutex;
@@ -175,4 +176,10 @@ uvm_init(void)
 	 */
 
 	uvm_anon_init();
+
+	/*
+	 * init readahead module
+	 */
+
+	uvm_ra_init();
 }
diff --git a/sys/uvm/uvm_km.c b/sys/uvm/uvm_km.c
index d7edbb0d068e..b5d08b842d77 100644
--- a/sys/uvm/uvm_km.c
+++ b/sys/uvm/uvm_km.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_km.c,v 1.96 2007/07/21 20:52:59 ad Exp $	*/
+/*	$NetBSD: uvm_km.c,v 1.97 2008/01/02 11:49:17 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -90,10 +90,8 @@
  *
  * the vm system has several standard kernel submaps, including:
  *   kmem_map => contains only wired kernel memory for the kernel
- *		malloc.   *** access to kmem_map must be protected
- *		by splvm() because we are allowed to call malloc()
- *		at interrupt time ***
- *   mb_map => memory for large mbufs,  *** protected by splvm ***
+ *		malloc.
+ *   mb_map => memory for large mbufs,
  *   pager_map => used to map "buf" structures into kernel space
  *   exec_map => used during exec to handle exec args
  *   etc...
@@ -130,7 +128,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_km.c,v 1.96 2007/07/21 20:52:59 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_km.c,v 1.97 2008/01/02 11:49:17 ad Exp $");
 
 #include "opt_uvmhist.h"
 
@@ -221,6 +219,7 @@ km_vacache_init(struct vm_map *map, const char *name, size_t size)
 	KASSERT(VM_MAP_IS_KERNEL(map));
 	KASSERT(size < (vm_map_max(map) - vm_map_min(map)) / 2); /* sanity */
 
+
 	vmk = vm_map_to_kernel(map);
 	pp = &vmk->vmk_vacache;
 	pa = &vmk->vmk_vacache_allocator;
@@ -265,16 +264,8 @@ void
 uvm_km_va_drain(struct vm_map *map, uvm_flag_t flags)
 {
 	struct vm_map_kernel *vmk = vm_map_to_kernel(map);
-	const bool intrsafe = (map->flags & VM_MAP_INTRSAFE) != 0;
-	int s = 0xdeadbeaf; /* XXX: gcc */
 
-	if (intrsafe) {
-		s = splvm();
-	}
 	callback_run_roundrobin(&vmk->vmk_reclaim_callback, NULL);
-	if (intrsafe) {
-		splx(s);
-	}
 }
 
 /*
@@ -420,7 +411,7 @@ uvm_km_pgremove(vaddr_t startva, vaddr_t endva)
 	KASSERT(startva < endva);
 	KASSERT(endva <= VM_MAX_KERNEL_ADDRESS);
 
-	simple_lock(&uobj->vmobjlock);
+	mutex_enter(&uobj->vmobjlock);
 
 	for (curoff = start; curoff < end; curoff = nextoff) {
 		nextoff = curoff + PAGE_SIZE;
@@ -429,7 +420,7 @@ uvm_km_pgremove(vaddr_t startva, vaddr_t endva)
 			pg->flags |= PG_WANTED;
 			UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
 				    "km_pgrm", 0);
-			simple_lock(&uobj->vmobjlock);
+			mutex_enter(&uobj->vmobjlock);
 			nextoff = curoff;
 			continue;
 		}
@@ -444,12 +435,12 @@ uvm_km_pgremove(vaddr_t startva, vaddr_t endva)
 		}
 		uao_dropswap(uobj, curoff >> PAGE_SHIFT);
 		if (pg != NULL) {
-			uvm_lock_pageq();
+			mutex_enter(&uvm_pageqlock);
 			uvm_pagefree(pg);
-			uvm_unlock_pageq();
+			mutex_exit(&uvm_pageqlock);
 		}
 	}
-	simple_unlock(&uobj->vmobjlock);
+	mutex_exit(&uobj->vmobjlock);
 
 	if (swpgonlydelta > 0) {
 		mutex_enter(&uvm_swap_data_lock);
@@ -511,10 +502,10 @@ uvm_km_check_empty(vaddr_t start, vaddr_t end, bool intrsafe)
 		if (!intrsafe) {
 			const struct vm_page *pg;
 
-			simple_lock(&uvm_kernel_object->vmobjlock);
+			mutex_enter(&uvm_kernel_object->vmobjlock);
 			pg = uvm_pagelookup(uvm_kernel_object,
 			    va - vm_map_min(kernel_map));
-			simple_unlock(&uvm_kernel_object->vmobjlock);
+			mutex_exit(&uvm_kernel_object->vmobjlock);
 			if (pg) {
 				panic("uvm_km_check_empty: "
 				    "has page hashed at %p", (const void *)va);
@@ -693,17 +684,11 @@ uvm_km_alloc_poolpage_cache(struct vm_map *map, bool waitok)
 	struct vm_page *pg;
 	struct pool *pp = &vm_map_to_kernel(map)->vmk_vacache;
 	vaddr_t va;
-	int s = 0xdeadbeaf; /* XXX: gcc */
-	const bool intrsafe = (map->flags & VM_MAP_INTRSAFE) != 0;
 
 	if ((map->flags & VM_MAP_VACACHE) == 0)
 		return uvm_km_alloc_poolpage(map, waitok);
 
-	if (intrsafe)
-		s = splvm();
 	va = (vaddr_t)pool_get(pp, waitok ? PR_WAITOK : PR_NOWAIT);
-	if (intrsafe)
-		splx(s);
 	if (va == 0)
 		return 0;
 	KASSERT(!pmap_extract(pmap_kernel(), va, NULL));
@@ -714,11 +699,7 @@ again:
 			uvm_wait("plpg");
 			goto again;
 		} else {
-			if (intrsafe)
-				s = splvm();
 			pool_put(pp, (void *)va);
-			if (intrsafe)
-				splx(s);
 			return 0;
 		}
 	}
@@ -751,15 +732,9 @@ uvm_km_alloc_poolpage(struct vm_map *map, bool waitok)
 	return (va);
 #else
 	vaddr_t va;
-	int s = 0xdeadbeaf; /* XXX: gcc */
-	const bool intrsafe = (map->flags & VM_MAP_INTRSAFE) != 0;
 
-	if (intrsafe)
-		s = splvm();
 	va = uvm_km_alloc(map, PAGE_SIZE, 0,
 	    (waitok ? 0 : UVM_KMF_NOWAIT | UVM_KMF_TRYLOCK) | UVM_KMF_WIRED);
-	if (intrsafe)
-		splx(s);
 	return (va);
 #endif /* PMAP_MAP_POOLPAGE */
 }
@@ -778,8 +753,6 @@ uvm_km_free_poolpage_cache(struct vm_map *map, vaddr_t addr)
 	uvm_km_free_poolpage(map, addr);
 #else
 	struct pool *pp;
-	int s = 0xdeadbeaf; /* XXX: gcc */
-	const bool intrsafe = (map->flags & VM_MAP_INTRSAFE) != 0;
 
 	if ((map->flags & VM_MAP_VACACHE) == 0) {
 		uvm_km_free_poolpage(map, addr);
@@ -794,11 +767,7 @@ uvm_km_free_poolpage_cache(struct vm_map *map, vaddr_t addr)
 #endif
 	KASSERT(!pmap_extract(pmap_kernel(), addr, NULL));
 	pp = &vm_map_to_kernel(map)->vmk_vacache;
-	if (intrsafe)
-		s = splvm();
 	pool_put(pp, (void *)addr);
-	if (intrsafe)
-		splx(s);
 #endif
 }
 
@@ -812,13 +781,6 @@ uvm_km_free_poolpage(struct vm_map *map, vaddr_t addr)
 	pa = PMAP_UNMAP_POOLPAGE(addr);
 	uvm_pagefree(PHYS_TO_VM_PAGE(pa));
 #else
-	int s = 0xdeadbeaf; /* XXX: gcc */
-	const bool intrsafe = (map->flags & VM_MAP_INTRSAFE) != 0;
-
-	if (intrsafe)
-		s = splvm();
 	uvm_km_free(map, addr, PAGE_SIZE, UVM_KMF_WIRED);
-	if (intrsafe)
-		splx(s);
 #endif /* PMAP_UNMAP_POOLPAGE */
 }
diff --git a/sys/uvm/uvm_loan.c b/sys/uvm/uvm_loan.c
index 30a2733f76f2..1127a9c5a6fc 100644
--- a/sys/uvm/uvm_loan.c
+++ b/sys/uvm/uvm_loan.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_loan.c,v 1.69 2007/12/01 10:40:28 yamt Exp $	*/
+/*	$NetBSD: uvm_loan.c,v 1.70 2008/01/02 11:49:17 ad Exp $	*/
 
 /*
  *
@@ -39,7 +39,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.69 2007/12/01 10:40:28 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.70 2008/01/02 11:49:17 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -185,6 +185,8 @@ uvm_loanentry(struct uvm_faultinfo *ufi, void ***output, int flags)
 		/* locked: if (rv > 0) => map, amap, uobj  [o.w. unlocked] */
 		KASSERT(rv > 0 || aref->ar_amap == NULL ||
 		    !mutex_owned(&aref->ar_amap->am_l));
+		KASSERT(rv > 0 || uobj == NULL ||
+		    !mutex_owned(&uobj->vmobjlock));
 
 		/* total failure */
 		if (rv < 0) {
@@ -357,7 +359,7 @@ uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags,
 	 */
 
 	if (flags & UVM_LOAN_TOANON) {
-		simple_lock(&anon->an_lock);
+		mutex_enter(&anon->an_lock);
 		pg = anon->an_page;
 		if (pg && (pg->pqflags & PQ_ANON) != 0 && anon->an_ref == 1) {
 			if (pg->wire_count > 0) {
@@ -372,7 +374,7 @@ uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags,
 		anon->an_ref++;
 		**output = anon;
 		(*output)++;
-		simple_unlock(&anon->an_lock);
+		mutex_exit(&anon->an_lock);
 		UVMHIST_LOG(loanhist, "->A done", 0,0,0,0);
 		return (1);
 	}
@@ -383,7 +385,7 @@ uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags,
 	 * this for us.
 	 */
 
-	simple_lock(&anon->an_lock);
+	mutex_enter(&anon->an_lock);
 	error = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon);
 
 	/*
@@ -414,9 +416,9 @@ uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags,
 	 */
 
 	pg = anon->an_page;
-	uvm_lock_pageq();
+	mutex_enter(&uvm_pageqlock);
 	if (pg->wire_count > 0) {
-		uvm_unlock_pageq();
+		mutex_exit(&uvm_pageqlock);
 		UVMHIST_LOG(loanhist, "->K wired %p", pg,0,0,0);
 		KASSERT(pg->uobject == NULL);
 		uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap,
@@ -428,14 +430,14 @@ uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags,
 	}
 	pg->loan_count++;
 	uvm_pageactivate(pg);
-	uvm_unlock_pageq();
+	mutex_exit(&uvm_pageqlock);
 	**output = pg;
 	(*output)++;
 
 	/* unlock anon and return success */
 	if (pg->uobject)
-		simple_unlock(&pg->uobject->vmobjlock);
-	simple_unlock(&anon->an_lock);
+		mutex_exit(&pg->uobject->vmobjlock);
+	mutex_exit(&anon->an_lock);
 	UVMHIST_LOG(loanhist, "->K done", 0,0,0,0);
 	return (1);
 }
@@ -463,12 +465,12 @@ uvm_loanpage(struct vm_page **pgpp, int npages)
 		KASSERT(pg->uobject != NULL);
 		KASSERT(pg->uobject == pgpp[0]->uobject);
 		KASSERT(!(pg->flags & (PG_RELEASED|PG_PAGEOUT)));
-		LOCK_ASSERT(simple_lock_held(&pg->uobject->vmobjlock));
+		KASSERT(mutex_owned(&pg->uobject->vmobjlock));
 		KASSERT(pg->flags & PG_BUSY);
 
-		uvm_lock_pageq();
+		mutex_enter(&uvm_pageqlock);
 		if (pg->wire_count > 0) {
-			uvm_unlock_pageq();
+			mutex_exit(&uvm_pageqlock);
 			UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0);
 			error = EBUSY;
 			break;
@@ -478,7 +480,7 @@ uvm_loanpage(struct vm_page **pgpp, int npages)
 		}
 		pg->loan_count++;
 		uvm_pageactivate(pg);
-		uvm_unlock_pageq();
+		mutex_exit(&uvm_pageqlock);
 	}
 
 	uvm_page_unbusy(pgpp, npages);
@@ -487,11 +489,11 @@ uvm_loanpage(struct vm_page **pgpp, int npages)
 		/*
 		 * backout what we've done
 		 */
-		struct simplelock *slock = &pgpp[0]->uobject->vmobjlock;
+		kmutex_t *slock = &pgpp[0]->uobject->vmobjlock;
 
-		simple_unlock(slock);
+		mutex_exit(slock);
 		uvm_unloan(pgpp, i, UVM_LOAN_TOPAGE);
-		simple_lock(slock);
+		mutex_enter(slock);
 	}
 
 	UVMHIST_LOG(loanhist, "done %d", error,0,0,0);
@@ -519,7 +521,7 @@ uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages,
 	struct vm_page **pgpp;
 	int error;
 	int i;
-	struct simplelock *slock;
+	kmutex_t *slock;
 
 	pgpp = origpgpp;
 	for (ndone = 0; ndone < orignpages; ) {
@@ -528,7 +530,7 @@ uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages,
 		int npendloan = 0xdead; /* XXX gcc */
 reget:
 		npages = MIN(UVM_LOAN_GET_CHUNK, orignpages - ndone);
-		simple_lock(&uobj->vmobjlock);
+		mutex_enter(&uobj->vmobjlock);
 		error = (*uobj->pgops->pgo_get)(uobj,
 		    pgoff + (ndone << PAGE_SHIFT), pgpp, &npages, 0,
 		    VM_PROT_READ, 0, PGO_SYNCIO);
@@ -544,7 +546,7 @@ reget:
 		/* loan and unbusy pages */
 		slock = NULL;
 		for (i = 0; i < npages; i++) {
-			struct simplelock *nextslock; /* slock for next page */
+			kmutex_t *nextslock; /* slock for next page */
 			struct vm_page *pg = *pgpp;
 
 			/* XXX assuming that the page is owned by uobj */
@@ -556,7 +558,7 @@ reget:
 					KASSERT(npendloan > 0);
 					error = uvm_loanpage(pgpp - npendloan,
 					    npendloan);
-					simple_unlock(slock);
+					mutex_exit(slock);
 					if (error)
 						goto fail;
 					ndone += npendloan;
@@ -564,23 +566,23 @@ reget:
 				}
 				slock = nextslock;
 				npendloan = 0;
-				simple_lock(slock);
+				mutex_enter(slock);
 			}
 
 			if ((pg->flags & PG_RELEASED) != 0) {
 				/*
 				 * release pages and try again.
 				 */
-				simple_unlock(slock);
+				mutex_exit(slock);
 				for (; i < npages; i++) {
 					pg = pgpp[i];
 					slock = &pg->uobject->vmobjlock;
 
-					simple_lock(slock);
-					uvm_lock_pageq();
+					mutex_enter(slock);
+					mutex_enter(&uvm_pageqlock);
 					uvm_page_unbusy(&pg, 1);
-					uvm_unlock_pageq();
-					simple_unlock(slock);
+					mutex_exit(&uvm_pageqlock);
+					mutex_exit(slock);
 				}
 				goto reget;
 			}
@@ -592,7 +594,7 @@ reget:
 		KASSERT(slock != NULL);
 		KASSERT(npendloan > 0);
 		error = uvm_loanpage(pgpp - npendloan, npendloan);
-		simple_unlock(slock);
+		mutex_exit(slock);
 		if (error)
 			goto fail;
 		ndone += npendloan;
@@ -636,7 +638,7 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
 	 * XXXCDC: duplicate code with uvm_fault().
 	 */
 
-	simple_lock(&uobj->vmobjlock);
+	mutex_enter(&uobj->vmobjlock);
 	if (uobj->pgops->pgo_get) {	/* try locked pgo_get */
 		npages = 1;
 		pg = NULL;
@@ -687,7 +689,7 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
 		if (locked && amap)
 			amap_lock(amap);
 		uobj = pg->uobject;
-		simple_lock(&uobj->vmobjlock);
+		mutex_enter(&uobj->vmobjlock);
 
 		/*
 		 * verify that the page has not be released and re-verify
@@ -712,18 +714,18 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
 				wakeup(pg);
 			}
 			if (pg->flags & PG_RELEASED) {
-				uvm_lock_pageq();
+				mutex_enter(&uvm_pageqlock);
 				uvm_pagefree(pg);
-				uvm_unlock_pageq();
-				simple_unlock(&uobj->vmobjlock);
+				mutex_exit(&uvm_pageqlock);
+				mutex_exit(&uobj->vmobjlock);
 				return (0);
 			}
-			uvm_lock_pageq();
+			mutex_enter(&uvm_pageqlock);
 			uvm_pageactivate(pg);
-			uvm_unlock_pageq();
+			mutex_exit(&uvm_pageqlock);
 			pg->flags &= ~(PG_BUSY|PG_WANTED);
 			UVM_PAGE_OWN(pg, NULL);
-			simple_unlock(&uobj->vmobjlock);
+			mutex_exit(&uobj->vmobjlock);
 			return (0);
 		}
 	}
@@ -741,7 +743,7 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
 			uvmfault_unlockall(ufi, amap, uobj, NULL);
 			return (-1);
 		}
-		simple_unlock(&uobj->vmobjlock);
+		mutex_exit(&uobj->vmobjlock);
 		**output = pg;
 		(*output)++;
 		return (1);
@@ -756,15 +758,15 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
 
 	if (pg->uanon) {
 		anon = pg->uanon;
-		simple_lock(&anon->an_lock);
+		mutex_enter(&anon->an_lock);
 		anon->an_ref++;
-		simple_unlock(&anon->an_lock);
+		mutex_exit(&anon->an_lock);
 		if (pg->flags & PG_WANTED) {
 			wakeup(pg);
 		}
 		pg->flags &= ~(PG_WANTED|PG_BUSY);
 		UVM_PAGE_OWN(pg, NULL);
-		simple_unlock(&uobj->vmobjlock);
+		mutex_exit(&uobj->vmobjlock);
 		**output = anon;
 		(*output)++;
 		return (1);
@@ -780,14 +782,14 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
 	}
 	anon->an_page = pg;
 	pg->uanon = anon;
-	uvm_lock_pageq();
+	mutex_enter(&uvm_pageqlock);
 	if (pg->wire_count > 0) {
-		uvm_unlock_pageq();
+		mutex_exit(&uvm_pageqlock);
 		UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0);
 		pg->uanon = NULL;
 		anon->an_page = NULL;
 		anon->an_ref--;
-		simple_unlock(&anon->an_lock);
+		mutex_exit(&anon->an_lock);
 		uvm_anfree(anon);
 		goto fail;
 	}
@@ -796,14 +798,14 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
 	}
 	pg->loan_count++;
 	uvm_pageactivate(pg);
-	uvm_unlock_pageq();
+	mutex_exit(&uvm_pageqlock);
 	if (pg->flags & PG_WANTED) {
 		wakeup(pg);
 	}
 	pg->flags &= ~(PG_WANTED|PG_BUSY);
 	UVM_PAGE_OWN(pg, NULL);
-	simple_unlock(&uobj->vmobjlock);
-	simple_unlock(&anon->an_lock);
+	mutex_exit(&uobj->vmobjlock);
+	mutex_exit(&anon->an_lock);
 	**output = anon;
 	(*output)++;
 	return (1);
@@ -844,7 +846,7 @@ uvm_loanzero(struct uvm_faultinfo *ufi, void ***output, int flags)
 
 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
 again:
-	simple_lock(&uvm_loanzero_object.vmobjlock);
+	mutex_enter(&uvm_loanzero_object.vmobjlock);
 
 	/*
 	 * first, get ahold of our single zero page.
@@ -854,7 +856,7 @@ again:
 			     TAILQ_FIRST(&uvm_loanzero_object.memq)) == NULL)) {
 		while ((pg = uvm_pagealloc(&uvm_loanzero_object, 0, NULL,
 					   UVM_PGA_ZERO)) == NULL) {
-			simple_unlock(&uvm_loanzero_object.vmobjlock);
+			mutex_exit(&uvm_loanzero_object.vmobjlock);
 			uvmfault_unlockall(ufi, amap, NULL, NULL);
 			uvm_wait("loanzero");
 			if (!uvmfault_relock(ufi)) {
@@ -869,17 +871,17 @@ again:
 		/* got a zero'd page. */
 		pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
 		pg->flags |= PG_RDONLY;
-		uvm_lock_pageq();
+		mutex_enter(&uvm_pageqlock);
 		uvm_pageactivate(pg);
-		uvm_unlock_pageq();
+		mutex_exit(&uvm_pageqlock);
 		UVM_PAGE_OWN(pg, NULL);
 	}
 
 	if ((flags & UVM_LOAN_TOANON) == 0) {	/* loaning to kernel-page */
-		uvm_lock_pageq();
+		mutex_enter(&uvm_pageqlock);
 		pg->loan_count++;
-		uvm_unlock_pageq();
-		simple_unlock(&uvm_loanzero_object.vmobjlock);
+		mutex_exit(&uvm_pageqlock);
+		mutex_exit(&uvm_loanzero_object.vmobjlock);
 		**output = pg;
 		(*output)++;
 		return (1);
@@ -893,10 +895,10 @@ again:
 
 	if (pg->uanon) {
 		anon = pg->uanon;
-		simple_lock(&anon->an_lock);
+		mutex_enter(&anon->an_lock);
 		anon->an_ref++;
-		simple_unlock(&anon->an_lock);
-		simple_unlock(&uvm_loanzero_object.vmobjlock);
+		mutex_exit(&anon->an_lock);
+		mutex_exit(&uvm_loanzero_object.vmobjlock);
 		**output = anon;
 		(*output)++;
 		return (1);
@@ -909,18 +911,18 @@ again:
 	anon = uvm_analloc();
 	if (anon == NULL) {
 		/* out of swap causes us to fail */
-		simple_unlock(&uvm_loanzero_object.vmobjlock);
+		mutex_exit(&uvm_loanzero_object.vmobjlock);
 		uvmfault_unlockall(ufi, amap, NULL, NULL);
 		return (-1);
 	}
 	anon->an_page = pg;
 	pg->uanon = anon;
-	uvm_lock_pageq();
+	mutex_enter(&uvm_pageqlock);
 	pg->loan_count++;
 	uvm_pageactivate(pg);
-	uvm_unlock_pageq();
-	simple_unlock(&anon->an_lock);
-	simple_unlock(&uvm_loanzero_object.vmobjlock);
+	mutex_exit(&uvm_pageqlock);
+	mutex_exit(&anon->an_lock);
+	mutex_exit(&uvm_loanzero_object.vmobjlock);
 	**output = anon;
 	(*output)++;
 	return (1);
@@ -942,9 +944,9 @@ uvm_unloananon(struct vm_anon **aloans, int nanons)
 		int refs;
 
 		anon = *aloans++;
-		simple_lock(&anon->an_lock);
+		mutex_enter(&anon->an_lock);
 		refs = --anon->an_ref;
-		simple_unlock(&anon->an_lock);
+		mutex_exit(&anon->an_lock);
 
 		if (refs == 0) {
 			uvm_anfree(anon);
@@ -962,9 +964,9 @@ static void
 uvm_unloanpage(struct vm_page **ploans, int npages)
 {
 	struct vm_page *pg;
-	struct simplelock *slock;
+	kmutex_t *slock;
 
-	uvm_lock_pageq();
+	mutex_enter(&uvm_pageqlock);
 	while (npages-- > 0) {
 		pg = *ploans++;
 
@@ -981,11 +983,13 @@ uvm_unloanpage(struct vm_page **ploans, int npages)
 			} else {
 				slock = &pg->uanon->an_lock;
 			}
-			if (simple_lock_try(slock)) {
+			if (mutex_tryenter(slock)) {
 				break;
 			}
-			uvm_unlock_pageq();
-			uvm_lock_pageq();
+			mutex_exit(&uvm_pageqlock);
+			/* XXX Better than yielding but inadequate. */
+			kpause("livelock", false, 1, NULL);
+			mutex_enter(&uvm_pageqlock);
 			slock = NULL;
 		}
 
@@ -1014,10 +1018,10 @@ uvm_unloanpage(struct vm_page **ploans, int npages)
 			uvm_pagefree(pg);
 		}
 		if (slock != NULL) {
-			simple_unlock(slock);
+			mutex_exit(slock);
 		}
 	}
-	uvm_unlock_pageq();
+	mutex_exit(&uvm_pageqlock);
 }
 
 /*
@@ -1053,7 +1057,7 @@ ulz_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
 	 */
 
 	if ((flags & PGO_FREE) == 0) {
-		simple_unlock(&uobj->vmobjlock);
+		mutex_exit(&uobj->vmobjlock);
 		return 0;
 	}
 
@@ -1066,14 +1070,14 @@ ulz_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
 	KASSERT(pg != NULL);
 	KASSERT(TAILQ_NEXT(pg, listq) == NULL);
 
-	uvm_lock_pageq();
+	mutex_enter(&uvm_pageqlock);
 	if (pg->uanon)
 		uvm_pageactivate(pg);
 	else
 		uvm_pagedequeue(pg);
-	uvm_unlock_pageq();
+	mutex_exit(&uvm_pageqlock);
 
-	simple_unlock(&uobj->vmobjlock);
+	mutex_exit(&uobj->vmobjlock);
 	return 0;
 }
 
@@ -1089,7 +1093,7 @@ void
 uvm_loan_init(void)
 {
 
-	simple_lock_init(&uvm_loanzero_object.vmobjlock);
+	mutex_init(&uvm_loanzero_object.vmobjlock, MUTEX_DEFAULT, IPL_NONE);
 	TAILQ_INIT(&uvm_loanzero_object.memq);
 	uvm_loanzero_object.pgops = &ulz_pager;
 
@@ -1113,7 +1117,7 @@ uvm_loanbreak(struct vm_page *uobjpage)
 #endif
 
 	KASSERT(uobj != NULL);
-	LOCK_ASSERT(simple_lock_held(&uobj->vmobjlock));
+	KASSERT(mutex_owned(&uobj->vmobjlock));
 	KASSERT(uobjpage->flags & PG_BUSY);
 
 	/* alloc new un-owned page */
@@ -1148,7 +1152,7 @@ uvm_loanbreak(struct vm_page *uobjpage)
 	uobjpage->flags &= ~(PG_WANTED|PG_BUSY);
 	UVM_PAGE_OWN(uobjpage, NULL);
 
-	uvm_lock_pageq();
+	mutex_enter(&uvm_pageqlock);
 
 	/*
 	 * replace uobjpage with new page.
@@ -1171,7 +1175,7 @@ uvm_loanbreak(struct vm_page *uobjpage)
 
 	/* install new page */
 	uvm_pageactivate(pg);
-	uvm_unlock_pageq();
+	mutex_exit(&uvm_pageqlock);
 
 	/*
 	 * done!  loan is broken and "pg" is
diff --git a/sys/uvm/uvm_map.c b/sys/uvm/uvm_map.c
index 2d7e6e761eb8..d6f2a1227f2d 100644
--- a/sys/uvm/uvm_map.c
+++ b/sys/uvm/uvm_map.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_map.c,v 1.247 2007/12/13 02:45:11 yamt Exp $	*/
+/*	$NetBSD: uvm_map.c,v 1.248 2008/01/02 11:49:18 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -71,7 +71,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.247 2007/12/13 02:45:11 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.248 2008/01/02 11:49:18 ad Exp $");
 
 #include "opt_ddb.h"
 #include "opt_uvmhist.h"
@@ -88,6 +88,7 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.247 2007/12/13 02:45:11 yamt Exp $");
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/lockdebug.h>
+#include <sys/atomic.h>
 
 #ifdef SYSVSHM
 #include <sys/shm.h>
@@ -139,18 +140,16 @@ UVMMAP_EVCNT_DEFINE(ukh_free)
 const char vmmapbsy[] = "vmmapbsy";
 
 /*
- * pool for vmspace structures.
+ * cache for vmspace structures.
  */
 
-POOL_INIT(uvm_vmspace_pool, sizeof(struct vmspace), 0, 0, 0, "vmsppl",
-    &pool_allocator_nointr, IPL_NONE);
+static struct pool_cache uvm_vmspace_cache;
 
 /*
- * pool for dynamically-allocated map entries.
+ * cache for dynamically-allocated map entries.
  */
 
-POOL_INIT(uvm_map_entry_pool, sizeof(struct vm_map_entry), 0, 0, 0, "vmmpepl",
-    &pool_allocator_nointr, IPL_NONE);
+static struct pool_cache uvm_map_entry_cache;
 
 MALLOC_DEFINE(M_VMMAP, "VM map", "VM map structures");
 MALLOC_DEFINE(M_VMPMAP, "VM pmap", "VM pmap");
@@ -229,13 +228,10 @@ extern struct vm_map *pager_map; /* XXX */
 /*
  * SAVE_HINT: saves the specified entry as the hint for future lookups.
  *
- * => map need not be locked (protected by hint_lock).
+ * => map need not be locked.
  */
-#define SAVE_HINT(map,check,value) do { \
-	mutex_enter(&(map)->hint_lock); \
-	if ((map)->hint == (check)) \
-		(map)->hint = (value); \
-	mutex_exit(&(map)->hint_lock); \
+#define SAVE_HINT(map, check, value) do { \
+	atomic_cas_ptr(&(map)->hint, (check), (value)); \
 } while (/*CONSTCOND*/ 0)
 
 /*
@@ -538,7 +534,8 @@ vm_map_lock(struct vm_map *map)
 		KASSERT(map->busy != curlwp);
 		mutex_enter(&map->misc_lock);
 		rw_exit(&map->lock);
-		cv_wait(&map->cv, &map->misc_lock);
+		if (map->busy != NULL)
+			cv_wait(&map->cv, &map->misc_lock);
 		mutex_exit(&map->misc_lock);
 	}
 
@@ -578,6 +575,7 @@ vm_map_unlock(struct vm_map *map)
 		mutex_spin_exit(&map->mutex);
 	else {
 		KASSERT(rw_write_held(&map->lock));
+		KASSERT(map->busy == NULL);
 		rw_exit(&map->lock);
 	}
 }
@@ -621,12 +619,80 @@ vm_map_unbusy(struct vm_map *map)
 	 * o writers are blocked out with a read or write hold
 	 * o at any time, only one thread owns the set of values
 	 */
-	map->busy = NULL;
 	mutex_enter(&map->misc_lock);
+	map->busy = NULL;
 	cv_broadcast(&map->cv);
 	mutex_exit(&map->misc_lock);
 }
 
+/*
+ * vm_map_lock_read: acquire a shared (read) lock on a map.
+ */
+
+void
+vm_map_lock_read(struct vm_map *map)
+{
+
+	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
+
+	rw_enter(&map->lock, RW_READER);
+}
+
+/*
+ * vm_map_unlock_read: release a shared lock on a map.
+ */
+ 
+void
+vm_map_unlock_read(struct vm_map *map)
+{
+
+	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
+
+	rw_exit(&map->lock);
+}
+
+/*
+ * vm_map_downgrade: downgrade an exclusive lock to a shared lock.
+ */
+
+void
+vm_map_downgrade(struct vm_map *map)
+{
+
+	rw_downgrade(&map->lock);
+}
+
+/*
+ * vm_map_busy: mark a map as busy.
+ *
+ * => the caller must hold the map write locked
+ */
+
+void
+vm_map_busy(struct vm_map *map)
+{
+
+	KASSERT(rw_write_held(&map->lock));
+	KASSERT(map->busy == NULL);
+
+	map->busy = curlwp;
+}
+
+/*
+ * vm_map_locked_p: return true if the map is write locked.
+ */
+
+bool
+vm_map_locked_p(struct vm_map *map)
+{
+
+	if ((map->flags & VM_MAP_INTRSAFE) != 0) {
+		return mutex_owned(&map->mutex);
+	} else {
+		return rw_write_held(&map->lock);
+	}
+}
+
 /*
  * uvm_mapent_alloc: allocate a map entry
  */
@@ -641,7 +707,7 @@ uvm_mapent_alloc(struct vm_map *map, int flags)
 	if (VM_MAP_USE_KMAPENT(map)) {
 		me = uvm_kmapent_alloc(map, flags);
 	} else {
-		me = pool_get(&uvm_map_entry_pool, pflags);
+		me = pool_cache_get(&uvm_map_entry_cache, pflags);
 		if (__predict_false(me == NULL))
 			return NULL;
 		me->flags = 0;
@@ -654,6 +720,8 @@ uvm_mapent_alloc(struct vm_map *map, int flags)
 
 /*
  * uvm_mapent_alloc_split: allocate a map entry for clipping.
+ *
+ * => map must be locked by caller if UVM_MAP_QUANTUM is set.
  */
 
 static struct vm_map_entry *
@@ -669,11 +737,10 @@ uvm_mapent_alloc_split(struct vm_map *map,
 	if (old_entry->flags & UVM_MAP_QUANTUM) {
 		struct vm_map_kernel *vmk = vm_map_to_kernel(map);
 
-		mutex_spin_enter(&uvm_kentry_lock);
+		KASSERT(vm_map_locked_p(map));
 		me = vmk->vmk_merged_entries;
 		KASSERT(me);
 		vmk->vmk_merged_entries = me->next;
-		mutex_spin_exit(&uvm_kentry_lock);
 		KASSERT(me->flags & UVM_MAP_QUANTUM);
 	} else {
 		me = uvm_mapent_alloc(map, flags);
@@ -696,7 +763,7 @@ uvm_mapent_free(struct vm_map_entry *me)
 	if (me->flags & UVM_MAP_KERNEL) {
 		uvm_kmapent_free(me);
 	} else {
-		pool_put(&uvm_map_entry_pool, me);
+		pool_cache_put(&uvm_map_entry_cache, me);
 	}
 }
 
@@ -705,6 +772,7 @@ uvm_mapent_free(struct vm_map_entry *me)
  *
  * => keep the entry if needed.
  * => caller shouldn't hold map locked if VM_MAP_USE_KMAPENT(map) is true.
+ * => map should be locked if UVM_MAP_QUANTUM is set.
  */
 
 static void
@@ -719,15 +787,14 @@ uvm_mapent_free_merged(struct vm_map *map, struct vm_map_entry *me)
 		 */
 		struct vm_map_kernel *vmk;
 
+		KASSERT(vm_map_locked_p(map));
 		KASSERT(VM_MAP_IS_KERNEL(map));
 		KASSERT(!VM_MAP_USE_KMAPENT(map) ||
 		    (me->flags & UVM_MAP_KERNEL));
 
 		vmk = vm_map_to_kernel(map);
-		mutex_spin_enter(&uvm_kentry_lock);
 		me->next = vmk->vmk_merged_entries;
 		vmk->vmk_merged_entries = me;
-		mutex_spin_exit(&uvm_kentry_lock);
 	} else {
 		uvm_mapent_free(me);
 	}
@@ -837,8 +904,7 @@ uvm_map_unreference_amap(struct vm_map_entry *entry, int flags)
 
 
 /*
- * uvm_map_init: init mapping system at boot time.   note that we allocate
- * and init the static pool of struct vm_map_entry *'s for the kernel here.
+ * uvm_map_init: init mapping system at boot time.
  */
 
 void
@@ -864,6 +930,15 @@ uvm_map_init(void)
 	 */
 
 	mutex_init(&uvm_kentry_lock, MUTEX_DRIVER, IPL_VM);
+
+	/*
+	 * initialize caches.
+	 */
+
+	pool_cache_bootstrap(&uvm_map_entry_cache, sizeof(struct vm_map_entry),
+	    0, 0, 0, "vmmpepl", NULL, IPL_NONE, NULL, NULL, NULL);
+	pool_cache_bootstrap(&uvm_vmspace_cache, sizeof(struct vmspace),
+	    0, 0, 0, "vmsppl", NULL, IPL_NONE, NULL, NULL, NULL);
 }
 
 /*
@@ -1103,7 +1178,8 @@ uvm_map_prepare(struct vm_map *map, vaddr_t start, vsize_t size,
 
 retry:
 	if (vm_map_lock_try(map) == false) {
-		if (flags & UVM_FLAG_TRYLOCK) {
+		if ((flags & UVM_FLAG_TRYLOCK) != 0 &&
+		    (map->flags & VM_MAP_INTRSAFE) == 0) {
 			return EAGAIN;
 		}
 		vm_map_lock(map); /* could sleep here */
@@ -1505,19 +1581,27 @@ nomerge:
 
 	error = 0;
 done:
-	vm_map_unlock(map);
-	if (new_entry) {
-		if (error == 0) {
-			KDASSERT(merged);
-			uvm_mapent_free_merged(map, new_entry);
-		} else {
-			uvm_mapent_free(new_entry);
-		}
+	if ((flags & UVM_FLAG_QUANTUM) == 0) {
+		/*
+		 * vmk_merged_entries is locked by the map's lock.
+		 */
+		vm_map_unlock(map);
+	}
+	if (new_entry && error == 0) {
+		KDASSERT(merged);
+		uvm_mapent_free_merged(map, new_entry);
+		new_entry = NULL;
 	}
 	if (dead) {
 		KDASSERT(merged);
 		uvm_mapent_free_merged(map, dead);
 	}
+	if ((flags & UVM_FLAG_QUANTUM) != 0) {
+		vm_map_unlock(map);
+	}
+	if (new_entry != NULL) {
+		uvm_mapent_free(new_entry);
+	}
 	return error;
 }
 
@@ -1572,9 +1656,7 @@ uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,
 	 * list, or from the hint.
 	 */
 
-	mutex_enter(&map->hint_lock);
 	cur = map->hint;
-	mutex_exit(&map->hint_lock);
 
 	if (cur == &map->header)
 		cur = cur->next;
@@ -3007,9 +3089,9 @@ uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
 
 				if (UVM_OBJ_IS_VNODE(uobj) &&
 				    (current->protection & VM_PROT_EXECUTE)) {
-				    	simple_lock(&uobj->vmobjlock);
+				    	mutex_enter(&uobj->vmobjlock);
 					vn_markexec((struct vnode *) uobj);
-				    	simple_unlock(&uobj->vmobjlock);
+				    	mutex_exit(&uobj->vmobjlock);
 				}
 			}
 		}
@@ -3724,10 +3806,10 @@ uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
 			if (anon == NULL)
 				continue;
 
-			simple_lock(&anon->an_lock);
+			mutex_enter(&anon->an_lock);
 			pg = anon->an_page;
 			if (pg == NULL) {
-				simple_unlock(&anon->an_lock);
+				mutex_exit(&anon->an_lock);
 				continue;
 			}
 
@@ -3747,18 +3829,18 @@ uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
 				 * at all in these cases.
 				 */
 
-				uvm_lock_pageq();
+				mutex_enter(&uvm_pageqlock);
 				if (pg->loan_count != 0 ||
 				    pg->wire_count != 0) {
-					uvm_unlock_pageq();
-					simple_unlock(&anon->an_lock);
+					mutex_exit(&uvm_pageqlock);
+					mutex_exit(&anon->an_lock);
 					continue;
 				}
 				KASSERT(pg->uanon == anon);
 				pmap_clear_reference(pg);
 				uvm_pagedeactivate(pg);
-				uvm_unlock_pageq();
-				simple_unlock(&anon->an_lock);
+				mutex_exit(&uvm_pageqlock);
+				mutex_exit(&anon->an_lock);
 				continue;
 
 			case PGO_FREE:
@@ -3773,12 +3855,12 @@ uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
 
 				/* skip the page if it's wired */
 				if (pg->wire_count != 0) {
-					simple_unlock(&anon->an_lock);
+					mutex_exit(&anon->an_lock);
 					continue;
 				}
 				amap_unadd(&current->aref, offset);
 				refs = --anon->an_ref;
-				simple_unlock(&anon->an_lock);
+				mutex_exit(&anon->an_lock);
 				if (refs == 0)
 					uvm_anfree(anon);
 				continue;
@@ -3797,7 +3879,7 @@ uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
 		uoff = current->offset + (start - current->start);
 		size = MIN(end, current->end) - start;
 		if (uobj != NULL) {
-			simple_lock(&uobj->vmobjlock);
+			mutex_enter(&uobj->vmobjlock);
 			if (uobj->pgops->pgo_put != NULL)
 				error = (uobj->pgops->pgo_put)(uobj, uoff,
 				    uoff + size, flags | PGO_CLEANIT);
@@ -3868,7 +3950,7 @@ uvmspace_alloc(vaddr_t vmin, vaddr_t vmax)
 	struct vmspace *vm;
 	UVMHIST_FUNC("uvmspace_alloc"); UVMHIST_CALLED(maphist);
 
-	vm = pool_get(&uvm_vmspace_pool, PR_WAITOK);
+	vm = pool_cache_get(&uvm_vmspace_cache, PR_WAITOK);
 	uvmspace_init(vm, NULL, vmin, vmax);
 	UVMHIST_LOG(maphist,"<- done (vm=0x%x)", vm,0,0,0);
 	return (vm);
@@ -4078,11 +4160,10 @@ uvmspace_free(struct vmspace *vm)
 	KASSERT(map->nentries == 0);
 	KASSERT(map->size == 0);
 	mutex_destroy(&map->misc_lock);
-	mutex_destroy(&map->hint_lock);
 	mutex_destroy(&map->mutex);
 	rw_destroy(&map->lock);
 	pmap_destroy(map->pmap);
-	pool_put(&uvm_vmspace_pool, vm);
+	pool_cache_put(&uvm_vmspace_cache, vm);
 }
 
 /*
@@ -4806,7 +4887,7 @@ uvm_object_printit(struct uvm_object *uobj, bool full,
 	int cnt = 0;
 
 	(*pr)("OBJECT %p: locked=%d, pgops=%p, npages=%d, ",
-	    uobj, uobj->vmobjlock.lock_data, uobj->pgops, uobj->uo_npages);
+	    uobj, mutex_owned(&uobj->vmobjlock), uobj->pgops, uobj->uo_npages);
 	if (UVM_OBJ_IS_KERN_OBJECT(uobj))
 		(*pr)("refs=<SYSTEM>\n");
 	else
@@ -4995,12 +5076,6 @@ uvm_map_setup(struct vm_map *map, vaddr_t vmin, vaddr_t vmax, int flags)
 	cv_init(&map->cv, "vm_map");
 	mutex_init(&map->misc_lock, MUTEX_DRIVER, ipl);
 	mutex_init(&map->mutex, MUTEX_DRIVER, ipl);
-
-	/*
-	 * The hint lock can get acquired with the pagequeue
-	 * lock held, so must be at IPL_VM.
-	 */
-	mutex_init(&map->hint_lock, MUTEX_DRIVER, IPL_VM);
 }
 
 
diff --git a/sys/uvm/uvm_map.h b/sys/uvm/uvm_map.h
index ab68df60055a..442025c520f1 100644
--- a/sys/uvm/uvm_map.h
+++ b/sys/uvm/uvm_map.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_map.h,v 1.58 2007/07/22 21:07:47 he Exp $	*/
+/*	$NetBSD: uvm_map.h,v 1.59 2008/01/02 11:49:18 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -214,7 +214,6 @@ struct vm_map {
 	struct lwp *		busy;		/* LWP holding map busy */
 	kmutex_t		mutex;		/* INTRSAFE lock */
 	kmutex_t		misc_lock;	/* Lock for ref_count, cv */
-	kmutex_t		hint_lock;	/* lock for hint storage */
 	kcondvar_t		cv;		/* For signalling */
 	int			flags;		/* flags */
 	RB_HEAD(uvm_tree, vm_map_entry) rbhead;	/* Tree for entries */
@@ -362,58 +361,11 @@ void		vm_map_lock(struct vm_map *);
 void		vm_map_unlock(struct vm_map *);
 void		vm_map_upgrade(struct vm_map *);
 void		vm_map_unbusy(struct vm_map *);
-
-/*
- * vm_map_lock_read: acquire a shared (read) lock on a map.
- */
-
-static inline void
-vm_map_lock_read(struct vm_map *map)
-{
-
-	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
-
-	rw_enter(&map->lock, RW_READER);
-}
-
-/*
- * vm_map_unlock_read: release a shared lock on a map.
- */
- 
-static inline void
-vm_map_unlock_read(struct vm_map *map)
-{
-
-	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
-
-	rw_exit(&map->lock);
-}
-/*
- * vm_map_downgrade: downgrade an exclusive lock to a shared lock.
- */
-
-static inline void
-vm_map_downgrade(struct vm_map *map)
-{
-
-	rw_downgrade(&map->lock);
-}
-
-/*
- * vm_map_busy: mark a map as busy.
- *
- * => the caller must hold the map write locked
- */
-
-static inline void
-vm_map_busy(struct vm_map *map)
-{
-
-	KASSERT(rw_write_held(&map->lock));
-	KASSERT(map->busy == NULL);
-
-	map->busy = curlwp;
-}
+void		vm_map_lock_read(struct vm_map *);
+void		vm_map_unlock_read(struct vm_map *);
+void		vm_map_downgrade(struct vm_map *);
+void		vm_map_busy(struct vm_map *);
+bool		vm_map_locked_p(struct vm_map *);
 
 #endif /* _KERNEL */
 
diff --git a/sys/uvm/uvm_mmap.c b/sys/uvm/uvm_mmap.c
index 700528500bf7..04e95063fa2a 100644
--- a/sys/uvm/uvm_mmap.c
+++ b/sys/uvm/uvm_mmap.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_mmap.c,v 1.120 2007/12/26 22:11:53 christos Exp $	*/
+/*	$NetBSD: uvm_mmap.c,v 1.121 2008/01/02 11:49:18 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -51,7 +51,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_mmap.c,v 1.120 2007/12/26 22:11:53 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_mmap.c,v 1.121 2008/01/02 11:49:18 ad Exp $");
 
 #include "opt_compat_netbsd.h"
 #include "opt_pax.h"
@@ -231,7 +231,7 @@ sys_mincore(struct lwp *l, const struct sys_mincore_args *uap, register_t *retva
 		if (amap != NULL)
 			amap_lock(amap);
 		if (uobj != NULL)
-			simple_lock(&uobj->vmobjlock);
+			mutex_enter(&uobj->vmobjlock);
 
 		for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) {
 			pgi = 0;
@@ -267,7 +267,7 @@ sys_mincore(struct lwp *l, const struct sys_mincore_args *uap, register_t *retva
 			(void) subyte(vec, pgi);
 		}
 		if (uobj != NULL)
-			simple_unlock(&uobj->vmobjlock);
+			mutex_exit(&uobj->vmobjlock);
 		if (amap != NULL)
 			amap_unlock(amap);
 	}
@@ -1167,9 +1167,9 @@ uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff, locklimit)
 			 * then mark it as text.
 			 */
 			if (prot & PROT_EXEC) {
-				simple_lock(&uobj->vmobjlock);
+				mutex_enter(&vp->v_interlock);
 				vn_markexec(vp);
-				simple_unlock(&uobj->vmobjlock);
+				mutex_exit(&vp->v_interlock);
 			}
 		} else {
 			int i = maxprot;
@@ -1199,22 +1199,22 @@ uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff, locklimit)
 		 * with direct I/O.
 		 */
 
-		simple_lock(&vp->v_interlock);
+		mutex_enter(&vp->v_interlock);
 		needwritemap = (vp->v_iflag & VI_WRMAP) == 0 &&
 			(flags & MAP_SHARED) != 0 &&
 			(maxprot & VM_PROT_WRITE) != 0;
 		if ((vp->v_iflag & VI_MAPPED) == 0 || needwritemap) {
 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK);
-			simple_lock(&vp->v_interlock);
+			mutex_enter(&vp->v_interlock);
 			vp->v_iflag |= VI_MAPPED;
 			vp->v_vflag |= VV_MAPPED;
 			if (needwritemap) {
 				vp->v_iflag |= VI_WRMAP;
 			}
-			simple_unlock(&vp->v_interlock);
+			mutex_exit(&vp->v_interlock);
 			VOP_UNLOCK(vp, 0);
 		} else
-			simple_unlock(&vp->v_interlock);
+			mutex_exit(&vp->v_interlock);
 	}
 
 	uvmflag = UVM_MAPFLAG(prot, maxprot,
diff --git a/sys/uvm/uvm_mremap.c b/sys/uvm/uvm_mremap.c
index ac91ef6a77e0..946532a7e5bf 100644
--- a/sys/uvm/uvm_mremap.c
+++ b/sys/uvm/uvm_mremap.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_mremap.c,v 1.9 2007/12/20 23:03:15 dsl Exp $	*/
+/*	$NetBSD: uvm_mremap.c,v 1.10 2008/01/02 11:49:18 ad Exp $	*/
 
 /*-
  * Copyright (c)2006 YAMAMOTO Takashi,
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_mremap.c,v 1.9 2007/12/20 23:03:15 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_mremap.c,v 1.10 2008/01/02 11:49:18 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/mman.h>
@@ -78,10 +78,10 @@ uvm_mapent_extend(struct vm_map *map, vaddr_t endva, vsize_t size)
 			error = E2BIG; /* XXX */
 			goto done;
 		}
-		simple_lock(&uobj->vmobjlock);
+		mutex_enter(&uobj->vmobjlock);
 		KASSERT(uobj->uo_refs > 0);
 		uobj->uo_refs++;
-		simple_unlock(&uobj->vmobjlock);
+		mutex_exit(&uobj->vmobjlock);
 		reserved_entry->object.uvm_obj = uobj;
 		reserved_entry->offset = newoffset;
 	}
diff --git a/sys/uvm/uvm_object.c b/sys/uvm/uvm_object.c
index 041fdb812b86..e7c65e6592ee 100644
--- a/sys/uvm/uvm_object.c
+++ b/sys/uvm/uvm_object.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_object.c,v 1.3 2007/02/17 20:45:36 rmind Exp $	*/
+/*	$NetBSD: uvm_object.c,v 1.4 2008/01/02 11:49:18 ad Exp $	*/
 
 /*
  * Copyright (c) 2006 The NetBSD Foundation, Inc.
@@ -45,7 +45,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_object.c,v 1.3 2007/02/17 20:45:36 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_object.c,v 1.4 2008/01/02 11:49:18 ad Exp $");
 
 #include "opt_uvmhist.h"
 
@@ -74,7 +74,7 @@ uobj_wirepages(struct uvm_object *uobj, off_t start, off_t end)
 
 	left = (end - start) >> PAGE_SHIFT;
 
-	simple_lock(&uobj->vmobjlock);
+	mutex_enter(&uobj->vmobjlock);
 	while (left) {
 
 		npages = MIN(FETCH_PAGECOUNT, left);
@@ -88,7 +88,7 @@ uobj_wirepages(struct uvm_object *uobj, off_t start, off_t end)
 		if (error)
 			goto error;
 
-		simple_lock(&uobj->vmobjlock);
+		mutex_enter(&uobj->vmobjlock);
 		for (i = 0; i < npages; i++) {
 
 			KASSERT(pgs[i] != NULL);
@@ -101,9 +101,9 @@ uobj_wirepages(struct uvm_object *uobj, off_t start, off_t end)
 				while (pgs[i]->loan_count) {
 					pg = uvm_loanbreak(pgs[i]);
 					if (!pg) {
-						simple_unlock(&uobj->vmobjlock);
+						mutex_exit(&uobj->vmobjlock);
 						uvm_wait("uobjwirepg");
-						simple_lock(&uobj->vmobjlock);
+						mutex_enter(&uobj->vmobjlock);
 						continue;
 					}
 				}
@@ -117,11 +117,11 @@ uobj_wirepages(struct uvm_object *uobj, off_t start, off_t end)
 		}
 
 		/* Wire the pages */
-		uvm_lock_pageq();
+		mutex_enter(&uvm_pageqlock);
 		for (i = 0; i < npages; i++) {
 			uvm_pagewire(pgs[i]);
 		}
-		uvm_unlock_pageq();
+		mutex_exit(&uvm_pageqlock);
 
 		/* Unbusy the pages */
 		uvm_page_unbusy(pgs, npages);
@@ -129,7 +129,7 @@ uobj_wirepages(struct uvm_object *uobj, off_t start, off_t end)
 		left -= npages;
 		offset += npages << PAGE_SHIFT;
 	}
-	simple_unlock(&uobj->vmobjlock);
+	mutex_exit(&uobj->vmobjlock);
 
 	return 0;
 
@@ -154,8 +154,8 @@ uobj_unwirepages(struct uvm_object *uobj, off_t start, off_t end)
 	struct vm_page *pg;
 	off_t offset;
 
-	simple_lock(&uobj->vmobjlock);
-	uvm_lock_pageq();
+	mutex_enter(&uobj->vmobjlock);
+	mutex_enter(&uvm_pageqlock);
 	for (offset = start; offset < end; offset += PAGE_SIZE) {
 		pg = uvm_pagelookup(uobj, offset);
 
@@ -164,6 +164,6 @@ uobj_unwirepages(struct uvm_object *uobj, off_t start, off_t end)
 
 		uvm_pageunwire(pg);
 	}
-	uvm_unlock_pageq();
-	simple_unlock(&uobj->vmobjlock);
+	mutex_exit(&uvm_pageqlock);
+	mutex_exit(&uobj->vmobjlock);
 }
diff --git a/sys/uvm/uvm_object.h b/sys/uvm/uvm_object.h
index 2dba7daf8f3c..64170daf2f79 100644
--- a/sys/uvm/uvm_object.h
+++ b/sys/uvm/uvm_object.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_object.h,v 1.23 2007/12/01 10:40:28 yamt Exp $	*/
+/*	$NetBSD: uvm_object.h,v 1.24 2008/01/02 11:49:18 ad Exp $	*/
 
 /*
  *
@@ -46,7 +46,7 @@
  */
 
 struct uvm_object {
-	struct simplelock	vmobjlock;	/* lock on memq */
+	kmutex_t		vmobjlock;	/* lock on memq */
 	const struct uvm_pagerops *pgops;	/* pager ops */
 	struct pglist		memq;		/* pages in this object */
 	int			uo_npages;	/* # of pages in memq */
@@ -104,13 +104,18 @@ extern const struct uvm_pagerops aobj_pager;
 
 #define	UVM_OBJ_INIT(uobj, ops, refs)					\
 	do {								\
-		simple_lock_init(&(uobj)->vmobjlock);			\
+		mutex_init(&(uobj)->vmobjlock, MUTEX_DEFAULT, IPL_NONE);\
 		(uobj)->pgops = (ops);					\
 		TAILQ_INIT(&(uobj)->memq);				\
 		(uobj)->uo_npages = 0;					\
 		(uobj)->uo_refs = (refs);				\
 	} while (/* CONSTCOND */ 0)
 
+#define	UVM_OBJ_DESTROY(uobj)						\
+	do {								\
+		mutex_destroy(&(uobj)->vmobjlock);			\
+	} while (/* CONSTCOND */ 0)
+
 #endif /* _KERNEL */
 
 #endif /* _UVM_UVM_OBJECT_H_ */
diff --git a/sys/uvm/uvm_page.c b/sys/uvm/uvm_page.c
index 9e9edfcb79d2..a1e042c352f2 100644
--- a/sys/uvm/uvm_page.c
+++ b/sys/uvm/uvm_page.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_page.c,v 1.126 2007/11/29 18:07:11 ad Exp $	*/
+/*	$NetBSD: uvm_page.c,v 1.127 2008/01/02 11:49:19 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -71,7 +71,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.126 2007/11/29 18:07:11 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.127 2008/01/02 11:49:19 ad Exp $");
 
 #include "opt_uvmhist.h"
 #include "opt_readahead.h"
@@ -191,7 +191,7 @@ uvm_pageinsert_after(struct vm_page *pg, struct vm_page *where)
 	kmutex_t *lock;
 	u_int hash;
 
-	LOCK_ASSERT(simple_lock_held(&uobj->vmobjlock));
+	KASSERT(mutex_owned(&uobj->vmobjlock));
 	KASSERT((pg->flags & PG_TABLED) == 0);
 	KASSERT(where == NULL || (where->flags & PG_TABLED));
 	KASSERT(where == NULL || (where->uobject == uobj));
@@ -248,7 +248,7 @@ uvm_pageremove(struct vm_page *pg)
 	kmutex_t *lock;
 	u_int hash;
 
-	LOCK_ASSERT(simple_lock_held(&uobj->vmobjlock));
+	KASSERT(mutex_owned(&uobj->vmobjlock));
 	KASSERT(pg->flags & PG_TABLED);
 
 	hash = uvm_pagehash(uobj, pg->offset);
@@ -315,7 +315,7 @@ uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp)
 	 */
 
 	uvmpdpol_init();
-	simple_lock_init(&uvm.pageqlock);
+	mutex_init(&uvm_pageqlock, MUTEX_DRIVER, IPL_NONE);
 	mutex_init(&uvm_fpageqlock, MUTEX_DRIVER, IPL_VM);
 
 	/*
@@ -444,12 +444,6 @@ uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp)
 	*kvm_startp += PAGE_SIZE;
 #endif /* DEBUG */
 
-	/*
-	 * init locks for kernel threads
-	 */
-
-	mutex_init(&uvm_pagedaemon_lock, MUTEX_DEFAULT, IPL_NONE);
-
 	/*
 	 * init various thresholds.
 	 */
@@ -1079,8 +1073,8 @@ uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
 	KASSERT(obj == NULL || anon == NULL);
 	KASSERT(anon == NULL || off == 0);
 	KASSERT(off == trunc_page(off));
-	LOCK_ASSERT(obj == NULL || simple_lock_held(&obj->vmobjlock));
-	LOCK_ASSERT(anon == NULL || simple_lock_held(&anon->an_lock));
+	KASSERT(obj == NULL || mutex_owned(&obj->vmobjlock));
+	KASSERT(anon == NULL || mutex_owned(&anon->an_lock));
 
 	mutex_spin_enter(&uvm_fpageqlock);
 
@@ -1244,7 +1238,7 @@ uvm_pagereplace(struct vm_page *oldpg, struct vm_page *newpg)
 	KASSERT(oldpg->uobject != NULL);
 	KASSERT((newpg->flags & PG_TABLED) == 0);
 	KASSERT(newpg->uobject == NULL);
-	LOCK_ASSERT(simple_lock_held(&oldpg->uobject->vmobjlock));
+	KASSERT(mutex_owned(&oldpg->uobject->vmobjlock));
 
 	newpg->uobject = oldpg->uobject;
 	newpg->offset = oldpg->offset;
@@ -1338,12 +1332,12 @@ uvm_pagefree(struct vm_page *pg)
 #endif /* DEBUG */
 
 	KASSERT((pg->flags & PG_PAGEOUT) == 0);
-	LOCK_ASSERT(simple_lock_held(&uvm.pageqlock) ||
+	KASSERT(mutex_owned(&uvm_pageqlock) ||
 		!uvmpdpol_pageisqueued_p(pg));
-	LOCK_ASSERT(pg->uobject == NULL ||
-		simple_lock_held(&pg->uobject->vmobjlock));
-	LOCK_ASSERT(pg->uobject != NULL || pg->uanon == NULL ||
-		simple_lock_held(&pg->uanon->an_lock));
+	KASSERT(pg->uobject == NULL ||
+		mutex_owned(&pg->uobject->vmobjlock));
+	KASSERT(pg->uobject != NULL || pg->uanon == NULL ||
+		mutex_owned(&pg->uanon->an_lock));
 
 	/*
 	 * if the page is loaned, resolve the loan instead of freeing.
@@ -1475,11 +1469,11 @@ uvm_page_unbusy(struct vm_page **pgs, int npgs)
 			continue;
 		}
 
-		LOCK_ASSERT(pg->uobject == NULL ||
-		    simple_lock_held(&pg->uobject->vmobjlock));
-		LOCK_ASSERT(pg->uobject != NULL ||
+		KASSERT(pg->uobject == NULL ||
+		    mutex_owned(&pg->uobject->vmobjlock));
+		KASSERT(pg->uobject != NULL ||
 		    (pg->uanon != NULL &&
-		    simple_lock_held(&pg->uanon->an_lock)));
+		    mutex_owned(&pg->uanon->an_lock)));
 
 		KASSERT(pg->flags & PG_BUSY);
 		KASSERT((pg->flags & PG_PAGEOUT) == 0);
@@ -1521,9 +1515,9 @@ uvm_page_own(struct vm_page *pg, const char *tag)
 	uobj = pg->uobject;
 	anon = pg->uanon;
 	if (uobj != NULL) {
-		LOCK_ASSERT(simple_lock_held(&uobj->vmobjlock));
+		KASSERT(mutex_owned(&uobj->vmobjlock));
 	} else if (anon != NULL) {
-		LOCK_ASSERT(simple_lock_held(&anon->an_lock));
+		KASSERT(mutex_owned(&anon->an_lock));
 	}
 
 	KASSERT((pg->flags & PG_WANTED) == 0);
@@ -1575,7 +1569,6 @@ uvm_pageidlezero(void)
 	int free_list, firstbucket;
 	static int nextbucket;
 
-	KERNEL_LOCK(1, NULL);
 	mutex_spin_enter(&uvm_fpageqlock);
 	firstbucket = nextbucket;
 	do {
@@ -1598,7 +1591,6 @@ uvm_pageidlezero(void)
 				    pg, pageq);
 				uvmexp.free--;
 				mutex_spin_exit(&uvm_fpageqlock);
-				KERNEL_UNLOCK_LAST(NULL);
 #ifdef PMAP_PAGEIDLEZERO
 				if (!PMAP_PAGEIDLEZERO(VM_PAGE_TO_PHYS(pg))) {
 
@@ -1609,7 +1601,6 @@ uvm_pageidlezero(void)
 					 * process now ready to run.
 					 */
 
-					KERNEL_LOCK(1, NULL);
 					mutex_spin_enter(&uvm_fpageqlock);
 					TAILQ_INSERT_HEAD(&pgfl->pgfl_buckets[
 					    nextbucket].pgfl_queues[
@@ -1623,7 +1614,6 @@ uvm_pageidlezero(void)
 #endif /* PMAP_PAGEIDLEZERO */
 				pg->flags |= PG_ZERO;
 
-				KERNEL_LOCK(1, NULL);
 				mutex_spin_enter(&uvm_fpageqlock);
 				TAILQ_INSERT_HEAD(&pgfl->pgfl_buckets[
 				    nextbucket].pgfl_queues[PGFL_ZEROS],
@@ -1636,7 +1626,6 @@ uvm_pageidlezero(void)
 	} while (nextbucket != firstbucket);
 quit:
 	mutex_spin_exit(&uvm_fpageqlock);
-	KERNEL_UNLOCK_LAST(NULL);
 }
 
 /*
@@ -1654,7 +1643,7 @@ uvm_pagelookup(struct uvm_object *obj, voff_t off)
 	kmutex_t *lock;
 	u_int hash;
 
-	LOCK_ASSERT(simple_lock_held(&obj->vmobjlock));
+	KASSERT(mutex_owned(&obj->vmobjlock));
 
 	hash = uvm_pagehash(obj, off);
 	buck = &uvm.page_hash[hash];
@@ -1681,7 +1670,7 @@ uvm_pagelookup(struct uvm_object *obj, voff_t off)
 void
 uvm_pagewire(struct vm_page *pg)
 {
-	UVM_LOCK_ASSERT_PAGEQ();
+	KASSERT(mutex_owned(&uvm_pageqlock));
 #if defined(READAHEAD_STATS)
 	if ((pg->pqflags & PQ_READAHEAD) != 0) {
 		uvm_ra_hit.ev_count++;
@@ -1705,7 +1694,7 @@ uvm_pagewire(struct vm_page *pg)
 void
 uvm_pageunwire(struct vm_page *pg)
 {
-	UVM_LOCK_ASSERT_PAGEQ();
+	KASSERT(mutex_owned(&uvm_pageqlock));
 	pg->wire_count--;
 	if (pg->wire_count == 0) {
 		uvm_pageactivate(pg);
@@ -1726,7 +1715,7 @@ void
 uvm_pagedeactivate(struct vm_page *pg)
 {
 
-	UVM_LOCK_ASSERT_PAGEQ();
+	KASSERT(mutex_owned(&uvm_pageqlock));
 	KASSERT(pg->wire_count != 0 || uvmpdpol_pageisqueued_p(pg));
 	uvmpdpol_pagedeactivate(pg);
 }
@@ -1741,7 +1730,7 @@ void
 uvm_pageactivate(struct vm_page *pg)
 {
 
-	UVM_LOCK_ASSERT_PAGEQ();
+	KASSERT(mutex_owned(&uvm_pageqlock));
 #if defined(READAHEAD_STATS)
 	if ((pg->pqflags & PQ_READAHEAD) != 0) {
 		uvm_ra_hit.ev_count++;
@@ -1763,7 +1752,7 @@ uvm_pagedequeue(struct vm_page *pg)
 {
 
 	if (uvmpdpol_pageisqueued_p(pg)) {
-		UVM_LOCK_ASSERT_PAGEQ();
+		KASSERT(mutex_owned(&uvm_pageqlock));
 	}
 
 	uvmpdpol_pagedequeue(pg);
@@ -1778,7 +1767,7 @@ void
 uvm_pageenqueue(struct vm_page *pg)
 {
 
-	UVM_LOCK_ASSERT_PAGEQ();
+	KASSERT(mutex_owned(&uvm_pageqlock));
 	if (pg->wire_count != 0) {
 		return;
 	}
diff --git a/sys/uvm/uvm_page.h b/sys/uvm/uvm_page.h
index 6a4d16f95ed1..f5ab89f9420d 100644
--- a/sys/uvm/uvm_page.h
+++ b/sys/uvm/uvm_page.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_page.h,v 1.49 2007/07/21 19:21:55 ad Exp $	*/
+/*	$NetBSD: uvm_page.h,v 1.50 2008/01/02 11:49:19 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -288,10 +288,6 @@ static int vm_physseg_find(paddr_t, int *);
 
 #define UVM_PAGE_HASH_PENALTY	4	/* XXX: a guess */
 
-#define uvm_lock_pageq()	simple_lock(&uvm.pageqlock)
-#define uvm_unlock_pageq()	simple_unlock(&uvm.pageqlock)
-#define	UVM_LOCK_ASSERT_PAGEQ()	LOCK_ASSERT(simple_lock_held(&uvm.pageqlock))
-
 #define uvm_pagehash(obj,off) \
 	(((unsigned long)obj+(unsigned long)atop(off)) & uvm.page_hashmask)
 
diff --git a/sys/uvm/uvm_pager.c b/sys/uvm/uvm_pager.c
index b48c01ea6c2c..eb5d2b9ee3ac 100644
--- a/sys/uvm/uvm_pager.c
+++ b/sys/uvm/uvm_pager.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_pager.c,v 1.89 2007/12/01 10:40:28 yamt Exp $	*/
+/*	$NetBSD: uvm_pager.c,v 1.90 2008/01/02 11:49:19 ad Exp $	*/
 
 /*
  *
@@ -39,7 +39,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_pager.c,v 1.89 2007/12/01 10:40:28 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_pager.c,v 1.90 2008/01/02 11:49:19 ad Exp $");
 
 #include "opt_uvmhist.h"
 #include "opt_readahead.h"
@@ -173,8 +173,9 @@ ReStart:
 		if (pdaemon) {
 			mutex_enter(&pager_map_wanted_lock);
 			if (emerginuse) {
-				mtsleep(&emergva, PVM | PNORELOCK, "emergva",
-				    0, &pager_map_wanted_lock);
+				UVM_UNLOCK_AND_WAIT(&emergva,
+				    &pager_map_wanted_lock, false,
+				    "emergva", 0);
 				goto ReStart;
 			}
 			emerginuse = true;
@@ -191,8 +192,8 @@ ReStart:
 		mutex_enter(&pager_map_wanted_lock);
 		pager_map_wanted = true;
 		UVMHIST_LOG(maphist, "  SLEEPING on pager_map",0,0,0,0);
-		mtsleep(pager_map, PVM | PNORELOCK, "pager_map", 0,
-		    &pager_map_wanted_lock);
+		UVM_UNLOCK_AND_WAIT(pager_map, &pager_map_wanted_lock, false,
+		    "pager_map", 0);
 		goto ReStart;
 	}
 
@@ -257,7 +258,7 @@ uvm_pagermapout(vaddr_t kva, int npages)
 /*
  * interrupt-context iodone handler for nested i/o bufs.
  *
- * => must be at splbio().
+ * => the buffer is private so need not be locked here
  */
 
 void
@@ -266,8 +267,9 @@ uvm_aio_biodone1(struct buf *bp)
 	struct buf *mbp = bp->b_private;
 
 	KASSERT(mbp != bp);
-	if (bp->b_error != 0)
+	if (bp->b_error != 0) {
 		mbp->b_error = bp->b_error;
+	}
 	mbp->b_resid -= bp->b_bcount;
 	putiobuf(bp);
 	if (mbp->b_resid == 0) {
@@ -278,8 +280,6 @@ uvm_aio_biodone1(struct buf *bp)
 /*
  * interrupt-context iodone handler for single-buf i/os
  * or the top-level buf of a nested-buf i/o.
- *
- * => must be at splbio().
  */
 
 void
@@ -302,18 +302,18 @@ uvm_aio_aiodone(struct buf *bp)
 	int npages = bp->b_bufsize >> PAGE_SHIFT;
 	struct vm_page *pg, *pgs[npages];
 	struct uvm_object *uobj;
-	struct simplelock *slock;
-	int s, i, error, swslot;
+	kmutex_t *slock;
+	int i, error, swslot;
+	int pageout_done = 0;
 	bool write, swap;
 	UVMHIST_FUNC("uvm_aio_aiodone"); UVMHIST_CALLED(ubchist);
 	UVMHIST_LOG(ubchist, "bp %p", bp, 0,0,0);
 
 	error = bp->b_error;
 	write = (bp->b_flags & B_READ) == 0;
-	/* XXXUBC B_NOCACHE is for swap pager, should be done differently */
-	if (write && !(bp->b_flags & B_NOCACHE) && bioopsp) {
-		bioopsp->io_pageiodone(bp);
-	}
+	/* XXXUBC BC_NOCACHE is for swap pager, should be done differently */
+	if (write && !(bp->b_cflags & BC_NOCACHE) && bioopsp != NULL)
+		(*bioopsp->io_pageiodone)(bp);
 
 	uobj = NULL;
 	for (i = 0; i < npages; i++) {
@@ -330,8 +330,8 @@ uvm_aio_aiodone(struct buf *bp)
 	if (!swap) {
 		uobj = pg->uobject;
 		slock = &uobj->vmobjlock;
-		simple_lock(slock);
-		uvm_lock_pageq();
+		mutex_enter(slock);
+		mutex_enter(&uvm_pageqlock);
 	} else {
 #if defined(VMSWAP)
 		if (error) {
@@ -365,8 +365,8 @@ uvm_aio_aiodone(struct buf *bp)
 			} else {
 				slock = &pg->uanon->an_lock;
 			}
-			simple_lock(slock);
-			uvm_lock_pageq();
+			mutex_enter(slock);
+			mutex_enter(&uvm_pageqlock);
 		}
 #endif /* defined(VMSWAP) */
 
@@ -387,7 +387,7 @@ uvm_aio_aiodone(struct buf *bp)
 			} else if (error == ENOMEM) {
 				if (pg->flags & PG_PAGEOUT) {
 					pg->flags &= ~PG_PAGEOUT;
-					uvmexp.paging--;
+					pageout_done++;
 				}
 				pg->flags &= ~PG_CLEAN;
 				uvm_pageactivate(pg);
@@ -437,7 +437,7 @@ uvm_aio_aiodone(struct buf *bp)
 
 		if (pg->flags & PG_PAGEOUT) {
 			pg->flags &= ~PG_PAGEOUT;
-			uvmexp.paging--;
+			pageout_done++;
 			uvmexp.pdfreed++;
 			pg->flags |= PG_RELEASED;
 		}
@@ -450,20 +450,21 @@ uvm_aio_aiodone(struct buf *bp)
 		if (swap) {
 			if (pg->uobject == NULL && pg->uanon->an_ref == 0 &&
 			    (pg->flags & PG_RELEASED) != 0) {
-				uvm_unlock_pageq();
+				mutex_exit(&uvm_pageqlock);
 				uvm_anon_release(pg->uanon);
 			} else {
 				uvm_page_unbusy(&pg, 1);
-				uvm_unlock_pageq();
-				simple_unlock(slock);
+				mutex_exit(&uvm_pageqlock);
+				mutex_exit(slock);
 			}
 		}
 #endif /* defined(VMSWAP) */
 	}
+	uvm_pageout_done(pageout_done);
 	if (!swap) {
 		uvm_page_unbusy(pgs, npages);
-		uvm_unlock_pageq();
-		simple_unlock(slock);
+		mutex_exit(&uvm_pageqlock);
+		mutex_exit(slock);
 	} else {
 #if defined(VMSWAP)
 		KASSERT(write);
@@ -483,12 +484,12 @@ uvm_aio_aiodone(struct buf *bp)
 		uvmexp.pdpending--;
 #endif /* defined(VMSWAP) */
 	}
-	s = splbio();
-	if (write && (bp->b_flags & B_AGE) != 0) {
+	if (write && (bp->b_cflags & BC_AGE) != 0) {
+		mutex_enter(bp->b_objlock);
 		vwakeup(bp);
+		mutex_exit(bp->b_objlock);
 	}
 	putiobuf(bp);
-	splx(s);
 }
 
 /*
diff --git a/sys/uvm/uvm_pdaemon.c b/sys/uvm/uvm_pdaemon.c
index cb6803d81641..d0cacf7fede1 100644
--- a/sys/uvm/uvm_pdaemon.c
+++ b/sys/uvm/uvm_pdaemon.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_pdaemon.c,v 1.88 2007/11/07 00:23:46 ad Exp $	*/
+/*	$NetBSD: uvm_pdaemon.c,v 1.89 2008/01/02 11:49:19 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -71,7 +71,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.88 2007/11/07 00:23:46 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.89 2008/01/02 11:49:19 ad Exp $");
 
 #include "opt_uvmhist.h"
 #include "opt_readahead.h"
@@ -93,8 +93,9 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.88 2007/11/07 00:23:46 ad Exp $");
  * queue too quickly to for them to be referenced and avoid being freed.
  */
 
-#define UVMPD_NUMDIRTYREACTS 16
+#define	UVMPD_NUMDIRTYREACTS	16
 
+#define	UVMPD_NUMTRYLOCKOWNER	16
 
 /*
  * local prototypes
@@ -104,6 +105,8 @@ static void	uvmpd_scan(void);
 static void	uvmpd_scan_queue(void);
 static void	uvmpd_tune(void);
 
+unsigned int uvm_pagedaemon_waiters;
+
 /*
  * XXX hack to avoid hangs when large processes fork.
  */
@@ -120,7 +123,8 @@ void
 uvm_wait(const char *wmsg)
 {
 	int timo = 0;
-	int s = splbio();
+
+	mutex_spin_enter(&uvm_fpageqlock);
 
 	/*
 	 * check for page daemon going to sleep (waiting for itself)
@@ -152,23 +156,24 @@ uvm_wait(const char *wmsg)
 #endif
 	}
 
-	mutex_enter(&uvm_pagedaemon_lock);
+	uvm_pagedaemon_waiters++;
 	wakeup(&uvm.pagedaemon);		/* wake the daemon! */
-	mtsleep(&uvmexp.free, PVM, wmsg, timo, &uvm_pagedaemon_lock);
-	mutex_exit(&uvm_pagedaemon_lock);
-
-	splx(s);
+	UVM_UNLOCK_AND_WAIT(&uvmexp.free, &uvm_fpageqlock, false, wmsg, timo);
 }
 
 /*
  * uvm_kick_pdaemon: perform checks to determine if we need to
  * give the pagedaemon a nudge, and do so if necessary.
+ *
+ * => called with uvm_fpageqlock held.
  */
 
 void
 uvm_kick_pdaemon(void)
 {
 
+	KASSERT(mutex_owned(&uvm_fpageqlock));
+
 	if (uvmexp.free + uvmexp.paging < uvmexp.freemin ||
 	    (uvmexp.free + uvmexp.paging < uvmexp.freetarg &&
 	     uvmpdpol_needsscan_p())) {
@@ -231,33 +236,40 @@ uvm_pageout(void *arg)
 	 */
 
 	uvm.pagedaemon_lwp = curlwp;
-	uvm_lock_pageq();
+	mutex_enter(&uvm_pageqlock);
 	npages = uvmexp.npages;
 	uvmpd_tune();
-	uvm_unlock_pageq();
+	mutex_exit(&uvm_pageqlock);
 
 	/*
 	 * main loop
 	 */
 
 	for (;;) {
-		mutex_enter(&uvm_pagedaemon_lock);
+		bool needsscan;
 
-		UVMHIST_LOG(pdhist,"  <<SLEEPING>>",0,0,0,0);
-		mtsleep(&uvm.pagedaemon, PVM | PNORELOCK, "pgdaemon", 0,
-		    &uvm_pagedaemon_lock);
-		uvmexp.pdwoke++;
-		UVMHIST_LOG(pdhist,"  <<WOKE UP>>",0,0,0,0);
+		mutex_spin_enter(&uvm_fpageqlock);
+		if (uvm_pagedaemon_waiters == 0 || uvmexp.paging > 0) {
+			UVMHIST_LOG(pdhist,"  <<SLEEPING>>",0,0,0,0);
+			UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon,
+			    &uvm_fpageqlock, false, "pgdaemon", 0);
+			uvmexp.pdwoke++;
+			UVMHIST_LOG(pdhist,"  <<WOKE UP>>",0,0,0,0);
+		} else {
+			mutex_spin_exit(&uvm_fpageqlock);
+		}
 
 		/*
 		 * now lock page queues and recompute inactive count
 		 */
 
-		uvm_lock_pageq();
+		mutex_enter(&uvm_pageqlock);
 		if (npages != uvmexp.npages || extrapages != uvm_extrapages) {
 			npages = uvmexp.npages;
 			extrapages = uvm_extrapages;
+			mutex_spin_enter(&uvm_fpageqlock);
 			uvmpd_tune();
+			mutex_spin_exit(&uvm_fpageqlock);
 		}
 
 		uvmpdpol_tune();
@@ -266,6 +278,7 @@ uvm_pageout(void *arg)
 		 * Estimate a hint.  Note that bufmem are returned to
 		 * system only when entire pool page is empty.
 		 */
+		mutex_spin_enter(&uvm_fpageqlock);
 		bufcnt = uvmexp.freetarg - uvmexp.free;
 		if (bufcnt < 0)
 			bufcnt = 0;
@@ -273,30 +286,33 @@ uvm_pageout(void *arg)
 		UVMHIST_LOG(pdhist,"  free/ftarg=%d/%d",
 		    uvmexp.free, uvmexp.freetarg, 0,0);
 
+		needsscan = uvmexp.free + uvmexp.paging < uvmexp.freetarg ||
+		    uvmpdpol_needsscan_p();
+		mutex_spin_exit(&uvm_fpageqlock);
+
 		/*
 		 * scan if needed
 		 */
-
-		if (uvmexp.free + uvmexp.paging < uvmexp.freetarg ||
-		    uvmpdpol_needsscan_p()) {
+		if (needsscan)
 			uvmpd_scan();
-		}
 
 		/*
 		 * if there's any free memory to be had,
 		 * wake up any waiters.
 		 */
 
+		mutex_spin_enter(&uvm_fpageqlock);
 		if (uvmexp.free > uvmexp.reserve_kernel ||
 		    uvmexp.paging == 0) {
 			wakeup(&uvmexp.free);
+			uvm_pagedaemon_waiters = 0;
 		}
+		mutex_spin_exit(&uvm_fpageqlock);
 
 		/*
 		 * scan done.  unlock page queues (the only lock we are holding)
 		 */
-
-		uvm_unlock_pageq();
+		mutex_exit(&uvm_pageqlock);
 
 		/*
 		 * start draining pool resources now that we're not
@@ -307,7 +323,9 @@ uvm_pageout(void *arg)
 		/*
 		 * kill unused metadata buffers.
 		 */
+		mutex_enter(&bufcache_lock);
 		buf_drain(bufcnt << PAGE_SHIFT);
+		mutex_exit(&bufcache_lock);
 
 		/*
 		 * free any cached u-areas we don't need
@@ -330,7 +348,6 @@ uvm_pageout(void *arg)
 void
 uvm_aiodone_worker(struct work *wk, void *dummy)
 {
-	int free;
 	struct buf *bp = (void *)wk;
 
 	KASSERT(&bp->b_work == wk);
@@ -339,17 +356,37 @@ uvm_aiodone_worker(struct work *wk, void *dummy)
 	 * process an i/o that's done.
 	 */
 
-	free = uvmexp.free;
 	(*bp->b_iodone)(bp);
-	if (free <= uvmexp.reserve_kernel) {
-		mutex_spin_enter(&uvm_fpageqlock);
+}
+
+void
+uvm_pageout_start(int npages)
+{
+
+	mutex_spin_enter(&uvm_fpageqlock);
+	uvmexp.paging += npages;
+	mutex_spin_exit(&uvm_fpageqlock);
+}
+
+void
+uvm_pageout_done(int npages)
+{
+
+	mutex_spin_enter(&uvm_fpageqlock);
+	KASSERT(uvmexp.paging >= npages);
+	uvmexp.paging -= npages;
+
+	/*
+	 * wake up either of pagedaemon or LWPs waiting for it.
+	 */
+
+	if (uvmexp.free <= uvmexp.reserve_kernel) {
 		wakeup(&uvm.pagedaemon);
-		mutex_spin_exit(&uvm_fpageqlock);
 	} else {
-		mutex_enter(&uvm_pagedaemon_lock);
 		wakeup(&uvmexp.free);
-		mutex_exit(&uvm_pagedaemon_lock);
+		uvm_pagedaemon_waiters = 0;
 	}
+	mutex_spin_exit(&uvm_fpageqlock);
 }
 
 /*
@@ -357,16 +394,17 @@ uvm_aiodone_worker(struct work *wk, void *dummy)
  *
  * => called with pageq locked.
  * => resolve orphaned O->A loaned page.
- * => return the locked simplelock on success.  otherwise, return NULL.
+ * => return the locked mutex on success.  otherwise, return NULL.
  */
 
-struct simplelock *
+kmutex_t *
 uvmpd_trylockowner(struct vm_page *pg)
 {
 	struct uvm_object *uobj = pg->uobject;
-	struct simplelock *slock;
+	kmutex_t *slock;
+
+	KASSERT(mutex_owned(&uvm_pageqlock));
 
-	UVM_LOCK_ASSERT_PAGEQ();
 	if (uobj != NULL) {
 		slock = &uobj->vmobjlock;
 	} else {
@@ -376,7 +414,7 @@ uvmpd_trylockowner(struct vm_page *pg)
 		slock = &anon->an_lock;
 	}
 
-	if (!simple_lock_try(slock)) {
+	if (!mutex_tryenter(slock)) {
 		return NULL;
 	}
 
@@ -410,6 +448,7 @@ swapcluster_init(struct swapcluster *swc)
 {
 
 	swc->swc_slot = 0;
+	swc->swc_nused = 0;
 }
 
 static int
@@ -449,12 +488,12 @@ swapcluster_add(struct swapcluster *swc, struct vm_page *pg)
 	slot = swc->swc_slot + swc->swc_nused;
 	uobj = pg->uobject;
 	if (uobj == NULL) {
-		LOCK_ASSERT(simple_lock_held(&pg->uanon->an_lock));
+		KASSERT(mutex_owned(&pg->uanon->an_lock));
 		pg->uanon->an_swslot = slot;
 	} else {
 		int result;
 
-		LOCK_ASSERT(simple_lock_held(&uobj->vmobjlock));
+		KASSERT(mutex_owned(&uobj->vmobjlock));
 		result = uao_set_swslot(uobj, pg->offset >> PAGE_SHIFT, slot);
 		if (result == -1) {
 			return ENOMEM;
@@ -500,6 +539,7 @@ swapcluster_flush(struct swapcluster *swc, bool now)
 	 */
 
 	uvmexp.pdpageouts++;
+	uvm_pageout_start(nused);
 	error = uvm_swap_put(slot, swc->swc_pages, nused, 0);
 	KASSERT(error == 0);
 
@@ -509,6 +549,14 @@ swapcluster_flush(struct swapcluster *swc, bool now)
 	 */
 
 	swc->swc_slot = 0;
+	swc->swc_nused = 0;
+}
+
+static int
+swapcluster_nused(struct swapcluster *swc)
+{
+
+	return swc->swc_nused;
 }
 
 /*
@@ -551,7 +599,7 @@ uvmpd_dropswap(struct vm_page *pg)
 bool
 uvmpd_trydropswap(struct vm_page *pg)
 {
-	struct simplelock *slock;
+	kmutex_t *slock;
 	bool result;
 
 	if ((pg->flags & PG_BUSY) != 0) {
@@ -572,13 +620,13 @@ uvmpd_trydropswap(struct vm_page *pg)
 	 */
 
 	if ((pg->flags & PG_BUSY) != 0) {
-		simple_unlock(slock);
+		mutex_exit(slock);
 		return false;
 	}
 
 	result = uvmpd_dropswap(pg);
 
-	simple_unlock(slock);
+	mutex_exit(slock);
 
 	return result;
 }
@@ -605,7 +653,8 @@ uvmpd_scan_queue(void)
 	struct swapcluster swc;
 #endif /* defined(VMSWAP) */
 	int dirtyreacts;
-	struct simplelock *slock;
+	int lockownerfail;
+	kmutex_t *slock;
 	UVMHIST_FUNC("uvmpd_scan_queue"); UVMHIST_CALLED(pdhist);
 
 	/*
@@ -619,6 +668,7 @@ uvmpd_scan_queue(void)
 #endif /* defined(VMSWAP) */
 
 	dirtyreacts = 0;
+	lockownerfail = 0;
 	uvmpdpol_scaninit();
 
 	while (/* CONSTCOND */ 1) {
@@ -627,7 +677,11 @@ uvmpd_scan_queue(void)
 		 * see if we've met the free target.
 		 */
 
-		if (uvmexp.free + uvmexp.paging >= uvmexp.freetarg << 2 ||
+		if (uvmexp.free + uvmexp.paging
+#if defined(VMSWAP)
+		    + swapcluster_nused(&swc)
+#endif /* defined(VMSWAP) */
+		    >= uvmexp.freetarg << 2 ||
 		    dirtyreacts == UVMPD_NUMDIRTYREACTS) {
 			UVMHIST_LOG(pdhist,"  met free target: "
 				    "exit loop", 0, 0, 0, 0);
@@ -666,10 +720,24 @@ uvmpd_scan_queue(void)
 
 		slock = uvmpd_trylockowner(p);
 		if (slock == NULL) {
+			/*
+			 * yield cpu to make a chance for an LWP holding
+			 * the lock run.  otherwise we can busy-loop too long
+			 * if the page queue is filled with a lot of pages
+			 * from few objects.
+			 */
+			lockownerfail++;
+			if (lockownerfail > UVMPD_NUMTRYLOCKOWNER) {
+				mutex_exit(&uvm_pageqlock);
+				/* XXX Better than yielding but inadequate. */
+				kpause("livelock", false, 1, NULL);
+				mutex_enter(&uvm_pageqlock);
+				lockownerfail = 0;
+			}
 			continue;
 		}
 		if (p->flags & PG_BUSY) {
-			simple_unlock(slock);
+			mutex_exit(slock);
 			uvmexp.pdbusy++;
 			continue;
 		}
@@ -702,10 +770,10 @@ uvmpd_scan_queue(void)
 
 		if ((p->pqflags & PQ_SWAPBACKED) == 0) {
 			KASSERT(uobj != NULL);
-			uvm_unlock_pageq();
+			mutex_exit(&uvm_pageqlock);
 			(void) (uobj->pgops->pgo_put)(uobj, p->offset,
 			    p->offset + PAGE_SIZE, PGO_CLEANIT|PGO_FREE);
-			uvm_lock_pageq();
+			mutex_enter(&uvm_pageqlock);
 			continue;
 		}
 
@@ -741,7 +809,7 @@ uvmpd_scan_queue(void)
 			} else {
 				slot = uao_find_swslot(uobj, pageidx);
 			}
-			simple_unlock(slock);
+			mutex_exit(slock);
 
 			if (slot > 0) {
 				/* this page is now only in swap. */
@@ -760,7 +828,7 @@ uvmpd_scan_queue(void)
 		 */
 
 		if (uvmexp.free + uvmexp.paging > uvmexp.freetarg << 2) {
-			simple_unlock(slock);
+			mutex_exit(slock);
 			continue;
 		}
 
@@ -782,7 +850,7 @@ uvmpd_scan_queue(void)
 		if (uvm_swapisfull()) {
 			dirtyreacts++;
 			uvm_pageactivate(p);
-			simple_unlock(slock);
+			mutex_exit(slock);
 			continue;
 		}
 
@@ -791,7 +859,7 @@ uvmpd_scan_queue(void)
 		 */
 
 		if (swapcluster_allocslots(&swc)) {
-			simple_unlock(slock);
+			mutex_exit(slock);
 			dirtyreacts++; /* XXX */
 			continue;
 		}
@@ -808,11 +876,10 @@ uvmpd_scan_queue(void)
 		UVM_PAGE_OWN(p, "scan_queue");
 
 		p->flags |= PG_PAGEOUT;
-		uvmexp.paging++;
 		uvm_pagedequeue(p);
 
 		uvmexp.pgswapout++;
-		uvm_unlock_pageq();
+		mutex_exit(&uvm_pageqlock);
 
 		/*
 		 * add the new page to the cluster.
@@ -821,17 +888,16 @@ uvmpd_scan_queue(void)
 		if (swapcluster_add(&swc, p)) {
 			p->flags &= ~(PG_BUSY|PG_PAGEOUT);
 			UVM_PAGE_OWN(p, NULL);
-			uvm_lock_pageq();
-			uvmexp.paging--;
+			mutex_enter(&uvm_pageqlock);
 			dirtyreacts++;
 			uvm_pageactivate(p);
-			simple_unlock(slock);
+			mutex_exit(slock);
 			continue;
 		}
-		simple_unlock(slock);
+		mutex_exit(slock);
 
 		swapcluster_flush(&swc, false);
-		uvm_lock_pageq();
+		mutex_enter(&uvm_pageqlock);
 
 		/*
 		 * the pageout is in progress.  bump counters and set up
@@ -842,14 +908,14 @@ uvmpd_scan_queue(void)
 
 #else /* defined(VMSWAP) */
 		uvm_pageactivate(p);
-		simple_unlock(slock);
+		mutex_exit(slock);
 #endif /* defined(VMSWAP) */
 	}
 
 #if defined(VMSWAP)
-	uvm_unlock_pageq();
+	mutex_exit(&uvm_pageqlock);
 	swapcluster_flush(&swc, true);
-	uvm_lock_pageq();
+	mutex_enter(&uvm_pageqlock);
 #endif /* defined(VMSWAP) */
 }
 
@@ -879,9 +945,9 @@ uvmpd_scan(void)
 		uvmexp.pdswout++;
 		UVMHIST_LOG(pdhist,"  free %d < target %d: swapout",
 		    uvmexp.free, uvmexp.freetarg, 0, 0);
-		uvm_unlock_pageq();
+		mutex_exit(&uvm_pageqlock);
 		uvm_swapout_threads();
-		uvm_lock_pageq();
+		mutex_enter(&uvm_pageqlock);
 
 	}
 #endif
diff --git a/sys/uvm/uvm_pdaemon.h b/sys/uvm/uvm_pdaemon.h
index 6f98bdf58650..55a30d15d07d 100644
--- a/sys/uvm/uvm_pdaemon.h
+++ b/sys/uvm/uvm_pdaemon.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_pdaemon.h,v 1.14 2007/02/21 23:00:14 thorpej Exp $	*/
+/*	$NetBSD: uvm_pdaemon.h,v 1.15 2008/01/02 11:49:20 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -82,7 +82,7 @@
 void uvm_wait(const char *);
 bool uvm_reclaimable(void);
 
-struct simplelock *uvmpd_trylockowner(struct vm_page *);
+kmutex_t *uvmpd_trylockowner(struct vm_page *);
 bool uvmpd_trydropswap(struct vm_page *);
 
 #endif /* _KERNEL */
diff --git a/sys/uvm/uvm_pdpolicy_clock.c b/sys/uvm/uvm_pdpolicy_clock.c
index 0f9c87bd3d32..93d403eb8cca 100644
--- a/sys/uvm/uvm_pdpolicy_clock.c
+++ b/sys/uvm/uvm_pdpolicy_clock.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_pdpolicy_clock.c,v 1.8 2007/02/22 06:05:01 thorpej Exp $	*/
+/*	$NetBSD: uvm_pdpolicy_clock.c,v 1.9 2008/01/02 11:49:20 ad Exp $	*/
 /*	NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $	*/
 
 /*
@@ -74,7 +74,7 @@
 #else /* defined(PDSIM) */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.8 2007/02/22 06:05:01 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.9 2008/01/02 11:49:20 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/proc.h>
@@ -175,7 +175,7 @@ uvmpdpol_selectvictim(void)
 	struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
 	struct vm_page *pg;
 
-	UVM_LOCK_ASSERT_PAGEQ();
+	KASSERT(mutex_owned(&uvm_pageqlock));
 
 	while (/* CONSTCOND */ 1) {
 		struct vm_anon *anon;
@@ -287,7 +287,7 @@ void
 uvmpdpol_pagedeactivate(struct vm_page *pg)
 {
 
-	UVM_LOCK_ASSERT_PAGEQ();
+	KASSERT(mutex_owned(&uvm_pageqlock));
 	if (pg->pqflags & PQ_ACTIVE) {
 		TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pageq);
 		pg->pqflags &= ~PQ_ACTIVE;
@@ -317,13 +317,13 @@ uvmpdpol_pagedequeue(struct vm_page *pg)
 {
 
 	if (pg->pqflags & PQ_ACTIVE) {
-		UVM_LOCK_ASSERT_PAGEQ();
+		KASSERT(mutex_owned(&uvm_pageqlock));
 		TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pageq);
 		pg->pqflags &= ~PQ_ACTIVE;
 		KASSERT(pdpol_state.s_active > 0);
 		pdpol_state.s_active--;
 	} else if (pg->pqflags & PQ_INACTIVE) {
-		UVM_LOCK_ASSERT_PAGEQ();
+		KASSERT(mutex_owned(&uvm_pageqlock));
 		TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pageq);
 		pg->pqflags &= ~PQ_INACTIVE;
 		KASSERT(pdpol_state.s_inactive > 0);
diff --git a/sys/uvm/uvm_pdpolicy_clockpro.c b/sys/uvm/uvm_pdpolicy_clockpro.c
index 7c8128ebe60c..84a75b967045 100644
--- a/sys/uvm/uvm_pdpolicy_clockpro.c
+++ b/sys/uvm/uvm_pdpolicy_clockpro.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_pdpolicy_clockpro.c,v 1.9 2007/08/01 14:49:55 yamt Exp $	*/
+/*	$NetBSD: uvm_pdpolicy_clockpro.c,v 1.10 2008/01/02 11:49:20 ad Exp $	*/
 
 /*-
  * Copyright (c)2005, 2006 YAMAMOTO Takashi,
@@ -43,7 +43,7 @@
 #else /* defined(PDSIM) */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.9 2007/08/01 14:49:55 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.10 2008/01/02 11:49:20 ad Exp $");
 
 #include "opt_ddb.h"
 
@@ -712,7 +712,7 @@ clockpro_pageenqueue(struct vm_page *pg)
 	bool speculative = (pg->pqflags & PQ_SPECULATIVE) != 0; /* XXX */
 
 	KASSERT((~pg->pqflags & (PQ_INITIALREF|PQ_SPECULATIVE)) != 0);
-	UVM_LOCK_ASSERT_PAGEQ();
+	KASSERT(mutex_owned(&uvm_pageqlock));
 	check_sanity();
 	KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE);
 	s->s_npages++;
diff --git a/sys/uvm/uvm_readahead.c b/sys/uvm/uvm_readahead.c
index 1154e2e367cc..5318b08778f2 100644
--- a/sys/uvm/uvm_readahead.c
+++ b/sys/uvm/uvm_readahead.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_readahead.c,v 1.4 2007/05/11 12:11:09 tsutsui Exp $	*/
+/*	$NetBSD: uvm_readahead.c,v 1.5 2008/01/02 11:49:20 ad Exp $	*/
 
 /*-
  * Copyright (c)2003, 2005 YAMAMOTO Takashi,
@@ -40,7 +40,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_readahead.c,v 1.4 2007/05/11 12:11:09 tsutsui Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_readahead.c,v 1.5 2008/01/02 11:49:20 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/pool.h>
@@ -83,21 +83,32 @@ static off_t ra_startio(struct uvm_object *, off_t, size_t);
 static struct uvm_ractx *ra_allocctx(void);
 static void ra_freectx(struct uvm_ractx *);
 
-static POOL_INIT(ractx_pool, sizeof(struct uvm_ractx), 0, 0, 0, "ractx",
-    &pool_allocator_nointr, IPL_NONE);
+static struct pool_cache ractx_cache;
+
+/*
+ * uvm_ra_init: initialize readahead module.
+ */
+
+void
+uvm_ra_init(void)
+{
+
+	pool_cache_bootstrap(&ractx_cache, sizeof(struct uvm_ractx), 0, 0, 0,
+	    "ractx", NULL, IPL_NONE, NULL, NULL, NULL);
+}
 
 static struct uvm_ractx *
 ra_allocctx(void)
 {
 
-	return pool_get(&ractx_pool, PR_NOWAIT);
+	return pool_cache_get(&ractx_cache, PR_NOWAIT);
 }
 
 static void
 ra_freectx(struct uvm_ractx *ra)
 {
 
-	pool_put(&ractx_pool, ra);
+	pool_cache_put(&ractx_cache, ra);
 }
 
 /*
@@ -134,11 +145,11 @@ ra_startio(struct uvm_object *uobj, off_t off, size_t sz)
 		 * use UVM_ADV_RANDOM to avoid recursion.
 		 */
 
-		simple_lock(&uobj->vmobjlock);
 		error = (*uobj->pgops->pgo_get)(uobj, off, NULL,
 		    &npages, 0, VM_PROT_READ, UVM_ADV_RANDOM, 0);
 		DPRINTF(("%s:  off=%" PRIu64 ", bytelen=%zu -> %d\n",
 		    __func__, off, bytelen, error));
+		mutex_enter(&uobj->vmobjlock);
 		if (error != 0 && error != EBUSY) {
 			if (error != EINVAL) { /* maybe past EOF */
 				DPRINTF(("%s: error=%d\n", __func__, error));
@@ -188,6 +199,7 @@ uvm_ra_freectx(struct uvm_ractx *ra)
  * uvm_ra_request: update a read-ahead context and start i/o if appropriate.
  *
  * => called when [reqoff, reqoff+reqsize) is requested.
+ * => object must be locked by caller, will return locked.
  */
 
 void
@@ -195,15 +207,12 @@ uvm_ra_request(struct uvm_ractx *ra, int advice, struct uvm_object *uobj,
     off_t reqoff, size_t reqsize)
 {
 
+	KASSERT(mutex_owned(&uobj->vmobjlock));
+
 	if (ra == NULL || advice == UVM_ADV_RANDOM) {
 		return;
 	}
 
-	/*
-	 * XXX needs locking?  maybe.
-	 * but the worst effect is merely a bad read-ahead.
-	 */
-
 	if (advice == UVM_ADV_SEQUENTIAL) {
 
 		/*
@@ -293,12 +302,6 @@ do_readahead:
 
 #if defined(DIAGNOSTIC)
 		if (rasize > RA_WINSIZE_MAX) {
-
-			/*
-			 * shouldn't happen as far as we're protected by
-			 * kernel_lock.
-			 */
-
 			printf("%s: corrupted context", __func__);
 			rasize = RA_WINSIZE_MAX;
 		}
diff --git a/sys/uvm/uvm_readahead.h b/sys/uvm/uvm_readahead.h
index 43878de2cd61..fffe2e26dcfa 100644
--- a/sys/uvm/uvm_readahead.h
+++ b/sys/uvm/uvm_readahead.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_readahead.h,v 1.2 2005/11/29 23:37:59 yamt Exp $	*/
+/*	$NetBSD: uvm_readahead.h,v 1.3 2008/01/02 11:49:21 ad Exp $	*/
 
 /*-
  * Copyright (c)2003, 2005 YAMAMOTO Takashi,
@@ -32,6 +32,7 @@
 struct uvm_object;
 struct uvm_ractx;
 
+void uvm_ra_init(void);
 struct uvm_ractx *uvm_ra_allocctx(void);
 void uvm_ra_freectx(struct uvm_ractx *);
 void uvm_ra_request(struct uvm_ractx *, int, struct uvm_object *, off_t,
diff --git a/sys/uvm/uvm_stat.h b/sys/uvm/uvm_stat.h
index 1feb2d4c8ad0..5b288638a080 100644
--- a/sys/uvm/uvm_stat.h
+++ b/sys/uvm/uvm_stat.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_stat.h,v 1.39 2006/02/16 20:17:20 perry Exp $	*/
+/*	$NetBSD: uvm_stat.h,v 1.40 2008/01/02 11:49:21 ad Exp $	*/
 
 /*
  *
@@ -70,7 +70,7 @@ struct uvm_history {
 	int f; 				/* next free one */
 	int unused;			/* old location of struct simplelock */
 	struct uvm_history_ent *e;	/* the malloc'd entries */
-	struct simplelock l;		/* lock on this history */
+	kmutex_t l;			/* lock on this history */
 };
 
 LIST_HEAD(uvm_history_head, uvm_history);
@@ -117,7 +117,7 @@ do { \
 	(NAME).namelen = strlen(__STRING(NAME)); \
 	(NAME).n = (N); \
 	(NAME).f = 0; \
-	simple_lock_init(&(NAME).l); \
+	mutex_init(&(NAME).l, MUTEX_SPIN, IPL_HIGH); \
 	(NAME).e = (struct uvm_history_ent *) \
 		malloc(sizeof(struct uvm_history_ent) * (N), M_TEMP, \
 		    M_WAITOK); \
@@ -131,7 +131,7 @@ do { \
 	(NAME).namelen = strlen(__STRING(NAME)); \
 	(NAME).n = sizeof(BUF) / sizeof(struct uvm_history_ent); \
 	(NAME).f = 0; \
-	simple_lock_init(&(NAME).l); \
+	mutex_init((&(NAME).l, MUTEX_SPIN, IPL_HIGH); \
 	(NAME).e = (struct uvm_history_ent *) (BUF); \
 	memset((NAME).e, 0, sizeof(struct uvm_history_ent) * (NAME).n); \
 	LIST_INSERT_HEAD(&uvm_histories, &(NAME), list); \
@@ -152,12 +152,11 @@ do { \
 
 #define UVMHIST_LOG(NAME,FMT,A,B,C,D) \
 do { \
-	int _i_, _s_ = splhigh(); \
-	simple_lock(&(NAME).l); \
+	int _i_; \
+	mutex_enter(&(NAME).l); \
 	_i_ = (NAME).f; \
 	(NAME).f = (_i_ + 1 < (NAME).n) ? _i_ + 1 : 0; \
-	simple_unlock(&(NAME).l); \
-	splx(_s_); \
+	mutex_exit(&(NAME).l); \
 	if (!cold) \
 		microtime(&(NAME).e[_i_].tv); \
 	(NAME).e[_i_].cpunum = cpu_number(); \
@@ -176,11 +175,9 @@ do { \
 #define UVMHIST_CALLED(NAME) \
 do { \
 	{ \
-		int _s = splhigh(); \
-		simple_lock(&(NAME).l); \
+		mutex_enter(&(NAME).l); \
 		_uvmhist_call = _uvmhist_cnt++; \
-		simple_unlock(&(NAME).l); \
-		splx(_s); \
+		mutex_exit(&(NAME).l); \
 	} \
 	UVMHIST_LOG(NAME,"called!", 0, 0, 0, 0); \
 } while (/*CONSTCOND*/ 0)
diff --git a/sys/uvm/uvm_swap.c b/sys/uvm/uvm_swap.c
index e2ea33f8cf46..3b1c7c66436f 100644
--- a/sys/uvm/uvm_swap.c
+++ b/sys/uvm/uvm_swap.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_swap.c,v 1.133 2007/12/20 23:03:15 dsl Exp $	*/
+/*	$NetBSD: uvm_swap.c,v 1.134 2008/01/02 11:49:21 ad Exp $	*/
 
 /*
  * Copyright (c) 1995, 1996, 1997 Matthew R. Green
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_swap.c,v 1.133 2007/12/20 23:03:15 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_swap.c,v 1.134 2008/01/02 11:49:21 ad Exp $");
 
 #include "fs_nfs.h"
 #include "opt_uvmhist.h"
@@ -188,26 +188,6 @@ POOL_INIT(vndxfer_pool, sizeof(struct vndxfer), 0, 0, 0, "swp vnx", NULL,
 POOL_INIT(vndbuf_pool, sizeof(struct vndbuf), 0, 0, 0, "swp vnd", NULL,
     IPL_BIO);
 
-#define	getvndxfer(vnx)	do {						\
-	int sp = splbio();						\
-	vnx = pool_get(&vndxfer_pool, PR_WAITOK);			\
-	splx(sp);							\
-} while (/*CONSTCOND*/ 0)
-
-#define putvndxfer(vnx) {						\
-	pool_put(&vndxfer_pool, (void *)(vnx));				\
-}
-
-#define	getvndbuf(vbp)	do {						\
-	int sp = splbio();						\
-	vbp = pool_get(&vndbuf_pool, PR_WAITOK);			\
-	splx(sp);							\
-} while (/*CONSTCOND*/ 0)
-
-#define putvndbuf(vbp) {						\
-	pool_put(&vndbuf_pool, (void *)(vbp));				\
-}
-
 /*
  * local variables
  */
@@ -269,8 +249,7 @@ uvm_swap_init(void)
 	uvmexp.nswapdev = 0;
 	rw_init(&swap_syscall_lock);
 	cv_init(&uvm.scheduler_cv, "schedule");
-	/* XXXSMP should be adaptive, but needs vmobjlock replaced */
-	mutex_init(&uvm_swap_data_lock, MUTEX_SPIN, IPL_NONE);
+	mutex_init(&uvm_swap_data_lock, MUTEX_DEFAULT, IPL_NONE);
 
 	/* XXXSMP should be at IPL_VM, but for audio interrupt handlers. */
 	mutex_init(&uvm_scheduler_mutex, MUTEX_SPIN, IPL_SCHED);
@@ -1103,7 +1082,7 @@ swstrategy(struct buf *bp)
 {
 	struct swapdev *sdp;
 	struct vnode *vp;
-	int s, pageno, bn;
+	int pageno, bn;
 	UVMHIST_FUNC("swstrategy"); UVMHIST_CALLED(pdhist);
 
 	/*
@@ -1139,9 +1118,10 @@ swstrategy(struct buf *bp)
 	 * to sw_reg_strategy().
 	 */
 
-	switch (sdp->swd_vp->v_type) {
+	vp = sdp->swd_vp;		/* swapdev vnode pointer */
+	switch (vp->v_type) {
 	default:
-		panic("swstrategy: vnode type 0x%x", sdp->swd_vp->v_type);
+		panic("swstrategy: vnode type 0x%x", vp->v_type);
 
 	case VBLK:
 
@@ -1149,9 +1129,7 @@ swstrategy(struct buf *bp)
 		 * must convert "bp" from an I/O on /dev/drum to an I/O
 		 * on the swapdev (sdp).
 		 */
-		s = splbio();
 		bp->b_blkno = bn;		/* swapdev block number */
-		vp = sdp->swd_vp;		/* swapdev vnode pointer */
 		bp->b_dev = sdp->swd_dev;	/* swapdev dev_t */
 
 		/*
@@ -1159,15 +1137,19 @@ swstrategy(struct buf *bp)
 		 * drum's v_numoutput counter to the swapdevs.
 		 */
 		if ((bp->b_flags & B_READ) == 0) {
+			mutex_enter(bp->b_objlock);
 			vwakeup(bp);	/* kills one 'v_numoutput' on drum */
-			V_INCR_NUMOUTPUT(vp);	/* put it on swapdev */
+			mutex_exit(bp->b_objlock);
+			mutex_enter(&vp->v_interlock);
+			vp->v_numoutput++;	/* put it on swapdev */
+			mutex_exit(&vp->v_interlock);
 		}
 
 		/*
 		 * finally plug in swapdev vnode and start I/O
 		 */
 		bp->b_vp = vp;
-		splx(s);
+		bp->b_objlock = &vp->v_interlock;
 		VOP_STRATEGY(vp, bp);
 		return;
 
@@ -1234,7 +1216,7 @@ sw_reg_strategy(struct swapdev *sdp, struct buf *bp, int bn)
 	 * allocate a vndxfer head for this transfer and point it to
 	 * our buffer.
 	 */
-	getvndxfer(vnx);
+	vnx = pool_get(&vndxfer_pool, PR_WAITOK);
 	vnx->vx_flags = VX_BUSY;
 	vnx->vx_error = 0;
 	vnx->vx_pending = 0;
@@ -1309,9 +1291,11 @@ sw_reg_strategy(struct swapdev *sdp, struct buf *bp, int bn)
 		 * at the front of the nbp structure so that you can
 		 * cast pointers between the two structure easily.
 		 */
-		getvndbuf(nbp);
-		BUF_INIT(&nbp->vb_buf);
-		nbp->vb_buf.b_flags    = bp->b_flags | B_CALL;
+		nbp = pool_get(&vndbuf_pool, PR_WAITOK);
+		buf_init(&nbp->vb_buf);
+		nbp->vb_buf.b_flags    = bp->b_flags;
+		nbp->vb_buf.b_cflags   = bp->b_cflags;
+		nbp->vb_buf.b_oflags   = bp->b_oflags;
 		nbp->vb_buf.b_bcount   = sz;
 		nbp->vb_buf.b_bufsize  = sz;
 		nbp->vb_buf.b_error    = 0;
@@ -1321,6 +1305,7 @@ sw_reg_strategy(struct swapdev *sdp, struct buf *bp, int bn)
 		nbp->vb_buf.b_rawblkno = nbp->vb_buf.b_blkno;
 		nbp->vb_buf.b_iodone   = sw_reg_biodone;
 		nbp->vb_buf.b_vp       = vp;
+		nbp->vb_buf.b_objlock  = &vp->v_interlock;
 		if (vp->v_type == VBLK) {
 			nbp->vb_buf.b_dev = vp->v_rdev;
 		}
@@ -1332,12 +1317,14 @@ sw_reg_strategy(struct swapdev *sdp, struct buf *bp, int bn)
 		 */
 		s = splbio();
 		if (vnx->vx_error != 0) {
-			putvndbuf(nbp);
+			buf_destroy(&nbp->vb_buf);
+			pool_put(&vndbuf_pool, nbp);
 			goto out;
 		}
 		vnx->vx_pending++;
 
 		/* sort it in and start I/O if we are not over our limit */
+		/* XXXAD locking */
 		BUFQ_PUT(sdp->swd_tab, &nbp->vb_buf);
 		sw_reg_start(sdp);
 		splx(s);
@@ -1354,9 +1341,9 @@ sw_reg_strategy(struct swapdev *sdp, struct buf *bp, int bn)
 out: /* Arrive here at splbio */
 	vnx->vx_flags &= ~VX_BUSY;
 	if (vnx->vx_pending == 0) {
-		if (vnx->vx_error != 0)
-			bp->b_error = vnx->vx_error;
-		putvndxfer(vnx);
+		error = vnx->vx_error;
+		pool_put(&vndxfer_pool, vnx);
+		bp->b_error = error;
 		biodone(bp);
 	}
 	splx(s);
@@ -1371,6 +1358,7 @@ static void
 sw_reg_start(struct swapdev *sdp)
 {
 	struct buf	*bp;
+	struct vnode	*vp;
 	UVMHIST_FUNC("sw_reg_start"); UVMHIST_CALLED(pdhist);
 
 	/* recursion control */
@@ -1388,10 +1376,14 @@ sw_reg_start(struct swapdev *sdp)
 		UVMHIST_LOG(pdhist,
 		    "sw_reg_start:  bp %p vp %p blkno %p cnt %lx",
 		    bp, bp->b_vp, bp->b_blkno, bp->b_bcount);
-		if ((bp->b_flags & B_READ) == 0)
-			V_INCR_NUMOUTPUT(bp->b_vp);
-
-		VOP_STRATEGY(bp->b_vp, bp);
+		vp = bp->b_vp;
+		KASSERT(bp->b_objlock == &vp->v_interlock);
+		if ((bp->b_flags & B_READ) == 0) {
+			mutex_enter(&vp->v_interlock);
+			vp->v_numoutput++;
+			mutex_exit(&vp->v_interlock);
+		}
+		VOP_STRATEGY(vp, bp);
 	}
 	sdp->swd_flags &= ~SWF_BUSY;
 }
@@ -1437,7 +1429,7 @@ sw_reg_iodone(struct work *wk, void *dummy)
 
 	if (vbp->vb_buf.b_error != 0) {
 		/* pass error upward */
-		error = vbp->vb_buf.b_error;
+		error = vbp->vb_buf.b_error ? vbp->vb_buf.b_error : EIO;
 		UVMHIST_LOG(pdhist, "  got error=%d !", error, 0, 0, 0);
 		vnx->vx_error = error;
 	}
@@ -1445,7 +1437,8 @@ sw_reg_iodone(struct work *wk, void *dummy)
 	/*
 	 * kill vbp structure
 	 */
-	putvndbuf(vbp);
+	buf_destroy(&vbp->vb_buf);
+	pool_put(&vndbuf_pool, vbp);
 
 	/*
 	 * wrap up this transaction if it has run to completion or, in
@@ -1453,18 +1446,19 @@ sw_reg_iodone(struct work *wk, void *dummy)
 	 */
 	if (vnx->vx_error != 0) {
 		/* pass error upward */
-		pbp->b_error = vnx->vx_error;
+		error = vnx->vx_error;
 		if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) {
-			putvndxfer(vnx);
+			pbp->b_error = error;
 			biodone(pbp);
+			pool_put(&vndxfer_pool, vnx);
 		}
 	} else if (pbp->b_resid == 0) {
 		KASSERT(vnx->vx_pending == 0);
 		if ((vnx->vx_flags & VX_BUSY) == 0) {
 			UVMHIST_LOG(pdhist, "  iodone error=%d !",
 			    pbp, vnx->vx_error, 0, 0);
-			putvndxfer(vnx);
 			biodone(pbp);
+			pool_put(&vndxfer_pool, vnx);
 		}
 	}
 
@@ -1690,7 +1684,7 @@ uvm_swap_io(struct vm_page **pps, int startslot, int npages, int flags)
 	daddr_t startblk;
 	struct	buf *bp;
 	vaddr_t kva;
-	int	error, s, mapinflags;
+	int	error, mapinflags;
 	bool write, async;
 	UVMHIST_FUNC("uvm_swap_io"); UVMHIST_CALLED(pdhist);
 
@@ -1719,19 +1713,19 @@ uvm_swap_io(struct vm_page **pps, int startslot, int npages, int flags)
 	 * now allocate a buf for the i/o.
 	 */
 
-	bp = getiobuf();
+	bp = getiobuf(swapdev_vp, true);
 
 	/*
 	 * fill in the bp/sbp.   we currently route our i/o through
 	 * /dev/drum's vnode [swapdev_vp].
 	 */
 
-	bp->b_flags = B_BUSY | B_NOCACHE | (flags & (B_READ|B_ASYNC));
+	bp->b_cflags = BC_BUSY | BC_NOCACHE;
+	bp->b_flags = (flags & (B_READ|B_ASYNC));
 	bp->b_proc = &proc0;	/* XXX */
 	bp->b_vnbufs.le_next = NOLIST;
 	bp->b_data = (void *)kva;
 	bp->b_blkno = startblk;
-	bp->b_vp = swapdev_vp;
 	bp->b_bufsize = bp->b_bcount = npages << PAGE_SHIFT;
 
 	/*
@@ -1739,9 +1733,9 @@ uvm_swap_io(struct vm_page **pps, int startslot, int npages, int flags)
 	 */
 
 	if (write) {
-		s = splbio();
-		V_INCR_NUMOUTPUT(swapdev_vp);
-		splx(s);
+		mutex_enter(&swapdev_vp->v_interlock);
+		swapdev_vp->v_numoutput++;
+		mutex_exit(&swapdev_vp->v_interlock);
 	}
 
 	/*
@@ -1749,7 +1743,6 @@ uvm_swap_io(struct vm_page **pps, int startslot, int npages, int flags)
 	 */
 
 	if (async) {
-		bp->b_flags |= B_CALL;
 		bp->b_iodone = uvm_aio_biodone;
 		UVMHIST_LOG(pdhist, "doing async!", 0, 0, 0, 0);
 		if (curlwp == uvm.pagedaemon_lwp)
@@ -1757,6 +1750,7 @@ uvm_swap_io(struct vm_page **pps, int startslot, int npages, int flags)
 		else
 			BIO_SETPRIO(bp, BPRIO_TIMELIMITED);
 	} else {
+		bp->b_iodone = NULL;
 		BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
 	}
 	UVMHIST_LOG(pdhist,
@@ -1787,11 +1781,13 @@ uvm_swap_io(struct vm_page **pps, int startslot, int npages, int flags)
 	 * now dispose of the buf and we're done.
 	 */
 
-	s = splbio();
-	if (write)
+	if (write) {
+		mutex_enter(&swapdev_vp->v_interlock);
 		vwakeup(bp);
+		mutex_exit(&swapdev_vp->v_interlock);
+	}
 	putiobuf(bp);
-	splx(s);
 	UVMHIST_LOG(pdhist, "<- done (sync)  error=%d", error, 0, 0, 0);
+
 	return (error);
 }
diff --git a/sys/uvm/uvm_unix.c b/sys/uvm/uvm_unix.c
index b269db273c91..b899219d2f37 100644
--- a/sys/uvm/uvm_unix.c
+++ b/sys/uvm/uvm_unix.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_unix.c,v 1.39 2007/12/20 23:03:15 dsl Exp $	*/
+/*	$NetBSD: uvm_unix.c,v 1.40 2008/01/02 11:49:21 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -50,7 +50,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_unix.c,v 1.39 2007/12/20 23:03:15 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_unix.c,v 1.40 2008/01/02 11:49:21 ad Exp $");
 
 #include "opt_pax.h"
 
@@ -83,15 +83,20 @@ sys_obreak(struct lwp *l, const struct sys_obreak_args *uap, register_t *retval)
 	vaddr_t new, old;
 	int error;
 
+	mutex_enter(&p->p_auxlock);
 	old = (vaddr_t)vm->vm_daddr;
 	new = round_page((vaddr_t)SCARG(uap, nsize));
-	if ((new - old) > p->p_rlimit[RLIMIT_DATA].rlim_cur && new > old)
+	if ((new - old) > p->p_rlimit[RLIMIT_DATA].rlim_cur && new > old) {
+		mutex_exit(&p->p_auxlock);
 		return (ENOMEM);
+	}
 
 	old = round_page(old + ptoa(vm->vm_dsize));
 
-	if (new == old)
+	if (new == old) {
+		mutex_exit(&p->p_auxlock);
 		return (0);
+	}
 
 	/*
 	 * grow or shrink?
@@ -114,6 +119,7 @@ sys_obreak(struct lwp *l, const struct sys_obreak_args *uap, register_t *retval)
 		if (error) {
 			uprintf("sbrk: grow %ld failed, error = %d\n",
 				new - old, error);
+			mutex_exit(&p->p_auxlock);
 			return (error);
 		}
 		vm->vm_dsize += atop(new - old);
@@ -121,6 +127,8 @@ sys_obreak(struct lwp *l, const struct sys_obreak_args *uap, register_t *retval)
 		uvm_deallocate(&vm->vm_map, new, old - new);
 		vm->vm_dsize -= atop(old - new);
 	}
+	mutex_exit(&p->p_auxlock);
+
 	return (0);
 }
 
diff --git a/sys/uvm/uvm_vnode.c b/sys/uvm/uvm_vnode.c
index 322cf48251a8..c72eab9c15c4 100644
--- a/sys/uvm/uvm_vnode.c
+++ b/sys/uvm/uvm_vnode.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_vnode.c,v 1.89 2007/12/01 10:40:28 yamt Exp $	*/
+/*	$NetBSD: uvm_vnode.c,v 1.90 2008/01/02 11:49:21 ad Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -50,7 +50,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_vnode.c,v 1.89 2007/12/01 10:40:28 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_vnode.c,v 1.90 2008/01/02 11:49:21 ad Exp $");
 
 #include "fs_nfs.h"
 #include "opt_uvmhist.h"
@@ -148,8 +148,9 @@ uvn_put(struct uvm_object *uobj, voff_t offlo, voff_t offhi, int flags)
 	struct vnode *vp = (struct vnode *)uobj;
 	int error;
 
-	LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
+	KASSERT(mutex_owned(&vp->v_interlock));
 	error = VOP_PUTPAGES(vp, offlo, offhi, flags);
+
 	return error;
 }
 
@@ -179,19 +180,16 @@ uvn_get(struct uvm_object *uobj, voff_t offset,
 	UVMHIST_LOG(ubchist, "vp %p off 0x%x", vp, (int)offset, 0,0);
 
 	if ((access_type & VM_PROT_WRITE) == 0 && (flags & PGO_LOCKED) == 0) {
-		simple_unlock(&vp->v_interlock);
 		vn_ra_allocctx(vp);
 		uvm_ra_request(vp->v_ractx, advice, uobj, offset,
 		    *npagesp << PAGE_SHIFT);
-		simple_lock(&vp->v_interlock);
 	}
 
 	error = VOP_GETPAGES(vp, offset, pps, npagesp, centeridx,
 			     access_type, advice, flags);
 
-	LOCK_ASSERT(((flags & PGO_LOCKED) != 0 &&
-		     simple_lock_held(&vp->v_interlock)) ||
-		    (flags & PGO_LOCKED) == 0);
+	KASSERT(((flags & PGO_LOCKED) != 0 && mutex_owned(&vp->v_interlock)) ||
+	    (flags & PGO_LOCKED) == 0);
 	return error;
 }
 
@@ -265,9 +263,9 @@ uvn_findpage(struct uvm_object *uobj, voff_t offset, struct vm_page **pgp,
 					UVMHIST_LOG(ubchist, "nowait",0,0,0,0);
 					return 0;
 				}
-				simple_unlock(&uobj->vmobjlock);
+				mutex_exit(&uobj->vmobjlock);
 				uvm_wait("uvn_fp1");
-				simple_lock(&uobj->vmobjlock);
+				mutex_enter(&uobj->vmobjlock);
 				continue;
 			}
 			UVMHIST_LOG(ubchist, "alloced %p", pg,0,0,0);
@@ -287,7 +285,7 @@ uvn_findpage(struct uvm_object *uobj, voff_t offset, struct vm_page **pgp,
 			UVMHIST_LOG(ubchist, "wait %p", pg,0,0,0);
 			UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
 					    "uvn_fp2", 0);
-			simple_lock(&uobj->vmobjlock);
+			mutex_enter(&uobj->vmobjlock);
 			continue;
 		}
 
@@ -337,7 +335,7 @@ uvm_vnp_setsize(struct vnode *vp, voff_t newsize)
 	voff_t oldsize;
 	UVMHIST_FUNC("uvm_vnp_setsize"); UVMHIST_CALLED(ubchist);
 
-	simple_lock(&uobj->vmobjlock);
+	mutex_enter(&uobj->vmobjlock);
 	UVMHIST_LOG(ubchist, "vp %p old 0x%x new 0x%x",
 	    vp, vp->v_size, newsize, 0);
 
@@ -356,24 +354,24 @@ uvm_vnp_setsize(struct vnode *vp, voff_t newsize)
 
 	if (oldsize > pgend) {
 		(void) uvn_put(uobj, pgend, 0, PGO_FREE | PGO_SYNCIO);
-		simple_lock(&uobj->vmobjlock);
+		mutex_enter(&uobj->vmobjlock);
 	}
 	vp->v_size = vp->v_writesize = newsize;
-	simple_unlock(&uobj->vmobjlock);
+	mutex_exit(&uobj->vmobjlock);
 }
 
 void
 uvm_vnp_setwritesize(struct vnode *vp, voff_t newsize)
 {
 
-	simple_lock(&vp->v_interlock);
+	mutex_enter(&vp->v_interlock);
 	KASSERT(newsize != VSIZENOTSET);
 	KASSERT(vp->v_size != VSIZENOTSET);
 	KASSERT(vp->v_writesize != VSIZENOTSET);
 	KASSERT(vp->v_size <= vp->v_writesize);
 	KASSERT(vp->v_size <= newsize);
 	vp->v_writesize = newsize;
-	simple_unlock(&vp->v_interlock);
+	mutex_exit(&vp->v_interlock);
 }
 
 /*