diff --git a/sys/arch/acorn32/mainbus/fd.c b/sys/arch/acorn32/mainbus/fd.c index d34f23df5a5f..42379dad0f08 100644 --- a/sys/arch/acorn32/mainbus/fd.c +++ b/sys/arch/acorn32/mainbus/fd.c @@ -1,4 +1,4 @@ -/* $NetBSD: fd.c,v 1.34 2007/10/25 12:48:11 yamt Exp $ */ +/* $NetBSD: fd.c,v 1.35 2008/01/02 11:48:20 ad Exp $ */ /*- * Copyright (c) 1998 The NetBSD Foundation, Inc. @@ -89,7 +89,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: fd.c,v 1.34 2007/10/25 12:48:11 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: fd.c,v 1.35 2008/01/02 11:48:20 ad Exp $"); #include "opt_ddb.h" @@ -1513,11 +1513,11 @@ fdformat(dev, finfo, l) struct buf *bp; /* set up a buffer header for fdstrategy() */ - bp = (struct buf *)malloc(sizeof(struct buf), M_TEMP, M_NOWAIT); + bp = getiobuf(NULL, false); if(bp == 0) return ENOBUFS; - memset((void *)bp, 0, sizeof(struct buf)); - bp->b_flags = B_BUSY | B_PHYS | B_FORMAT; + bp->b_flags = B_PHYS | B_FORMAT; + bp->b_cflags |= BC_BUSY; bp->b_proc = l->l_proc; bp->b_dev = dev; @@ -1540,21 +1540,22 @@ fdformat(dev, finfo, l) fdstrategy(bp); /* ...and wait for it to complete */ - s = splbio(); - while(!(bp->b_flags & B_DONE)) { - rv = tsleep((void *)bp, PRIBIO, "fdform", 20 * hz); + /* XXX very dodgy */ + mutex_enter(bp->b_objlock); + while (!(bp->b_oflags & BO_DONE)) { + rv = cv_timedwait(&bp->b_done, 20 * hz); if (rv == EWOULDBLOCK) break; } - splx(s); - + mutex_exit(bp->b_objlock); + if (rv == EWOULDBLOCK) { /* timed out */ rv = EIO; biodone(bp); } else if (bp->b_error != 0) rv = bp->b_error; - free(bp, M_TEMP); + putiobuf(bp); return rv; } diff --git a/sys/arch/algor/algor/disksubr.c b/sys/arch/algor/algor/disksubr.c index d9effccf1115..198f56c9305b 100644 --- a/sys/arch/algor/algor/disksubr.c +++ b/sys/arch/algor/algor/disksubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: disksubr.c,v 1.16 2007/10/17 19:52:54 garbled Exp $ */ +/* $NetBSD: disksubr.c,v 1.17 2008/01/02 11:48:20 ad Exp $ */ /* * Copyright (c) 1994, 1995, 1996 Carnegie-Mellon University. @@ -29,7 +29,7 @@ #include /* RCS ID & Copyright macro defns */ -__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.16 2007/10/17 19:52:54 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.17 2008/01/02 11:48:20 ad Exp $"); #include #include @@ -105,7 +105,7 @@ readdisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp, i = 0; do { /* read a bad sector table */ - bp->b_flags &= ~(B_DONE); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_READ; bp->b_blkno = lp->d_secperunit - lp->d_nsectors + i; if (lp->d_secsize > DEV_BSIZE) @@ -219,7 +219,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp, dlp = (struct disklabel *)((char*)bp->b_data + LABELOFFSET); *dlp = *lp; /* struct assignment */ - bp->b_flags &= ~(B_READ|B_DONE); + bp->b_oflags &= ~(BO_DONE); + bp->b_flags &= ~(B_READ); bp->b_flags |= B_WRITE; (*strat)(bp); error = biowait(bp); diff --git a/sys/arch/alpha/alpha/disksubr.c b/sys/arch/alpha/alpha/disksubr.c index 597bf76363d1..7d5cf1633f33 100644 --- a/sys/arch/alpha/alpha/disksubr.c +++ b/sys/arch/alpha/alpha/disksubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: disksubr.c,v 1.35 2007/10/17 19:52:55 garbled Exp $ */ +/* $NetBSD: disksubr.c,v 1.36 2008/01/02 11:48:21 ad Exp $ */ /* * Copyright (c) 1994, 1995, 1996 Carnegie-Mellon University. @@ -29,7 +29,7 @@ #include /* RCS ID & Copyright macro defns */ -__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.35 2007/10/17 19:52:55 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.36 2008/01/02 11:48:21 ad Exp $"); #include #include @@ -110,7 +110,7 @@ readdisklabel(dev, strat, lp, clp) i = 0; do { /* read a bad sector table */ - bp->b_flags &= ~(B_DONE); + bp->b_oflags &= ~BO_DONE; bp->b_flags |= B_READ; bp->b_blkno = lp->d_secperunit - lp->d_nsectors + i; if (lp->d_secsize > DEV_BSIZE) @@ -244,8 +244,9 @@ writedisklabel(dev, strat, lp, clp) dp[63] = sum; } - bp->b_flags &= ~(B_READ|B_DONE); + bp->b_flags &= ~B_READ; bp->b_flags |= B_WRITE; + bp->b_oflags &= ~BO_DONE; (*strat)(bp); error = biowait(bp); diff --git a/sys/arch/alpha/alpha/pmap.c b/sys/arch/alpha/alpha/pmap.c index 7b17e921c021..04079b405538 100644 --- a/sys/arch/alpha/alpha/pmap.c +++ b/sys/arch/alpha/alpha/pmap.c @@ -1,7 +1,7 @@ -/* $NetBSD: pmap.c,v 1.228 2007/11/07 00:23:14 ad Exp $ */ +/* $NetBSD: pmap.c,v 1.229 2008/01/02 11:48:21 ad Exp $ */ /*- - * Copyright (c) 1998, 1999, 2000, 2001 The NetBSD Foundation, Inc. + * Copyright (c) 1998, 1999, 2000, 2001, 2007 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -145,7 +145,7 @@ #include /* RCS ID & Copyright macro defns */ -__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.228 2007/11/07 00:23:14 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.229 2008/01/02 11:48:21 ad Exp $"); #include #include @@ -154,14 +154,12 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.228 2007/11/07 00:23:14 ad Exp $"); #include #include #include -#ifdef SYSVSHM #include -#endif +#include +#include #include -#include -#include #if defined(_PMAP_MAY_USE_PROM_CONSOLE) || defined(MULTIPROCESSOR) #include #endif @@ -326,11 +324,8 @@ static struct pmap_asn_info pmap_asn_info[ALPHA_MAXPROCS]; /* * Locking: * - * This pmap module uses two types of locks: `normal' (sleep) - * locks and `simple' (spin) locks. They are used as follows: - * - * READ/WRITE SPIN LOCKS - * --------------------- + * READ/WRITE LOCKS + * ---------------- * * * pmap_main_lock - This lock is used to prevent deadlock and/or * provide mutex access to the pmap module. Most operations lock @@ -343,10 +338,10 @@ static struct pmap_asn_info pmap_asn_info[ALPHA_MAXPROCS]; * the PV->pmap direction. Since only one thread can hold a write * lock at a time, this provides the mutex. * - * SIMPLE LOCKS - * ------------ + * MUTEXES + * ------- * - * * pm_slock (per-pmap) - This lock protects all of the members + * * pm_lock (per-pmap) - This lock protects all of the members * of the pmap structure itself. This lock will be asserted * in pmap_activate() and pmap_deactivate() from a critical * section of mi_switch(), and must never sleep. Note that @@ -354,27 +349,27 @@ static struct pmap_asn_info pmap_asn_info[ALPHA_MAXPROCS]; * memory allocation *must* be blocked while this lock is * asserted. * - * * pvh_slock (per-vm_page) - This lock protects the PV list + * * pvh_lock (per-vm_page) - This lock protects the PV list * for a specified managed page. * - * * pmap_all_pmaps_slock - This lock protects the global list of - * all pmaps. Note that a pm_slock must never be held while this + * * pmap_all_pmaps_lock - This lock protects the global list of + * all pmaps. Note that a pm_lock must never be held while this * lock is held. * - * * pmap_growkernel_slock - This lock protects pmap_growkernel() + * * pmap_growkernel_lock - This lock protects pmap_growkernel() * and the virtual_end variable. * - * There is a lock ordering constraint for pmap_growkernel_slock. + * There is a lock ordering constraint for pmap_growkernel_lock. * pmap_growkernel() acquires the locks in the following order: * - * pmap_growkernel_slock -> pmap_all_pmaps_slock -> - * pmap->pm_slock + * pmap_growkernel_lock -> pmap_all_pmaps_lock -> + * pmap->pm_lock * - * But pmap_lev1map_create() is called with pmap->pm_slock held, - * and also needs to acquire the pmap_growkernel_slock. So, + * But pmap_lev1map_create() is called with pmap->pm_lock held, + * and also needs to acquire the pmap_growkernel_lock. So, * we require that the caller of pmap_lev1map_create() (currently, - * the only caller is pmap_enter()) acquire pmap_growkernel_slock - * before acquring pmap->pm_slock. + * the only caller is pmap_enter()) acquire pmap_growkernel_lock + * before acquring pmap->pm_lock. * * Address space number management (global ASN counters and per-pmap * ASN state) are not locked; they use arrays of values indexed @@ -384,14 +379,14 @@ static struct pmap_asn_info pmap_asn_info[ALPHA_MAXPROCS]; * with the pmap already locked by the caller (which will be * an interface function). */ -/* static struct lock pmap_main_lock; */ -static struct simplelock pmap_all_pmaps_slock; -static struct simplelock pmap_growkernel_slock; +static krwlock_t pmap_main_lock; +static kmutex_t pmap_all_pmaps_lock; +static kmutex_t pmap_growkernel_lock; -#define PMAP_MAP_TO_HEAD_LOCK() /* nothing */ -#define PMAP_MAP_TO_HEAD_UNLOCK() /* nothing */ -#define PMAP_HEAD_TO_MAP_LOCK() /* nothing */ -#define PMAP_HEAD_TO_MAP_UNLOCK() /* nothing */ +#define PMAP_MAP_TO_HEAD_LOCK() rw_enter(&pmap_main_lock, RW_READER) +#define PMAP_MAP_TO_HEAD_UNLOCK() rw_exit(&pmap_main_lock) +#define PMAP_HEAD_TO_MAP_LOCK() rw_enter(&pmap_main_lock, RW_WRITER) +#define PMAP_HEAD_TO_MAP_UNLOCK() rw_exit(&pmap_main_lock) #if defined(MULTIPROCESSOR) /* @@ -421,21 +416,9 @@ static struct pmap_tlb_shootdown_q { int pq_pte; /* aggregate PTE bits */ int pq_count; /* number of pending requests */ int pq_tbia; /* pending global flush */ - struct simplelock pq_slock; /* spin lock on queue */ + kmutex_t pq_lock; /* spin lock on queue */ } pmap_tlb_shootdown_q[ALPHA_MAXPROCS]; -#define PSJQ_LOCK(pq, s) \ -do { \ - s = splvm(); \ - simple_lock(&(pq)->pq_slock); \ -} while (/*CONSTCOND*/0) - -#define PSJQ_UNLOCK(pq, s) \ -do { \ - simple_unlock(&(pq)->pq_slock); \ - splx(s); \ -} while (/*CONSTCOND*/0) - /* If we have more pending jobs than this, we just nail the whole TLB. */ #define PMAP_TLB_SHOOTDOWN_MAXJOBS 6 @@ -511,16 +494,6 @@ static void pmap_physpage_free(paddr_t); static int pmap_physpage_addref(void *); static int pmap_physpage_delref(void *); -/* - * Define PMAP_NO_LAZY_LEV1MAP in order to have a lev1map allocated - * in pmap_create(), rather than when the first mapping is entered. - * This causes pmaps to use an extra page of memory if no mappings - * are entered in them, but in practice this is probably not going - * to be a problem, and it allows us to avoid locking pmaps in - * pmap_activate(). - */ -#define PMAP_NO_LAZY_LEV1MAP - /* * PMAP_ISACTIVE{,_TEST}: * @@ -919,8 +892,8 @@ pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids) (i*PAGE_SIZE*NPTEPG))] = pte; } - /* Initialize the pmap_growkernel_slock. */ - simple_lock_init(&pmap_growkernel_slock); + /* Initialize the pmap_growkernel_lock. */ + mutex_init(&pmap_growkernel_lock, MUTEX_DEFAULT, IPL_NONE); /* * Set up level three page table (lev3map) @@ -952,8 +925,8 @@ pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids) /* * Initialize the locks. */ - /* spinlockinit(&pmap_main_lock, "pmaplk", 0); */ - simple_lock_init(&pmap_all_pmaps_slock); + rw_init(&pmap_main_lock); + mutex_init(&pmap_all_pmaps_lock, MUTEX_DEFAULT, IPL_NONE); /* * Initialize kernel pmap. Note that all kernel mappings @@ -970,7 +943,7 @@ pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids) pmap_kernel()->pm_asni[i].pma_asngen = pmap_asn_info[i].pma_asngen; } - simple_lock_init(&pmap_kernel()->pm_slock); + mutex_init(&pmap_kernel()->pm_lock, MUTEX_DEFAULT, IPL_NONE); TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap_kernel(), pm_list); #if defined(MULTIPROCESSOR) @@ -982,7 +955,8 @@ pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids) IPL_VM); for (i = 0; i < ALPHA_MAXPROCS; i++) { TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_head); - simple_lock_init(&pmap_tlb_shootdown_q[i].pq_slock); + mutex_init(&pmap_tlb_shootdown_q[i].pq_lock, MUTEX_DEFAULT, + IPL_VM); } #endif @@ -998,7 +972,7 @@ pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids) /* * Mark the kernel pmap `active' on this processor. */ - atomic_setbits_ulong(&pmap_kernel()->pm_cpus, + atomic_or_ulong(&pmap_kernel()->pm_cpus, (1UL << cpu_number())); } @@ -1197,16 +1171,14 @@ pmap_create(void) /* XXX Locking? */ pmap->pm_asni[i].pma_asngen = pmap_asn_info[i].pma_asngen; } - simple_lock_init(&pmap->pm_slock); + mutex_init(&pmap->pm_lock, MUTEX_DEFAULT, IPL_NONE); - simple_lock(&pmap_all_pmaps_slock); + mutex_enter(&pmap_all_pmaps_lock); TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap, pm_list); - simple_unlock(&pmap_all_pmaps_slock); + mutex_exit(&pmap_all_pmaps_lock); -#ifdef PMAP_NO_LAZY_LEV1MAP i = pmap_lev1map_create(pmap, cpu_number()); KASSERT(i == 0); -#endif return (pmap); } @@ -1220,30 +1192,23 @@ pmap_create(void) void pmap_destroy(pmap_t pmap) { - int refs; #ifdef DEBUG if (pmapdebug & PDB_FOLLOW) printf("pmap_destroy(%p)\n", pmap); #endif - PMAP_LOCK(pmap); - refs = --pmap->pm_count; - PMAP_UNLOCK(pmap); - - if (refs > 0) + if (atomic_dec_uint_nv(&pmap->pm_count) > 0) return; /* * Remove it from the global list of all pmaps. */ - simple_lock(&pmap_all_pmaps_slock); + mutex_enter(&pmap_all_pmaps_lock); TAILQ_REMOVE(&pmap_all_pmaps, pmap, pm_list); - simple_unlock(&pmap_all_pmaps_slock); + mutex_exit(&pmap_all_pmaps_lock); -#ifdef PMAP_NO_LAZY_LEV1MAP pmap_lev1map_destroy(pmap, cpu_number()); -#endif /* * Since the pmap is supposed to contain no valid @@ -1252,6 +1217,7 @@ pmap_destroy(pmap_t pmap) */ KASSERT(pmap->pm_lev1map == kernel_lev1map); + mutex_destroy(&pmap->pm_lock); pool_cache_put(&pmap_pmap_cache, pmap); } @@ -1269,9 +1235,7 @@ pmap_reference(pmap_t pmap) printf("pmap_reference(%p)\n", pmap); #endif - PMAP_LOCK(pmap); - pmap->pm_count++; - PMAP_UNLOCK(pmap); + atomic_inc_uint(&pmap->pm_count); } /* @@ -1486,7 +1450,7 @@ pmap_page_protect(struct vm_page *pg, vm_prot_t prot) case VM_PROT_READ|VM_PROT_EXECUTE: case VM_PROT_READ: PMAP_HEAD_TO_MAP_LOCK(); - simple_lock(&pg->mdpage.pvh_slock); + mutex_enter(&pg->mdpage.pvh_lock); for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next) { PMAP_LOCK(pv->pv_pmap); if (*pv->pv_pte & (PG_KWE | PG_UWE)) { @@ -1499,7 +1463,7 @@ pmap_page_protect(struct vm_page *pg, vm_prot_t prot) } PMAP_UNLOCK(pv->pv_pmap); } - simple_unlock(&pg->mdpage.pvh_slock); + mutex_exit(&pg->mdpage.pvh_lock); PMAP_HEAD_TO_MAP_UNLOCK(); PMAP_TLB_SHOOTNOW(); return; @@ -1510,7 +1474,7 @@ pmap_page_protect(struct vm_page *pg, vm_prot_t prot) } PMAP_HEAD_TO_MAP_LOCK(); - simple_lock(&pg->mdpage.pvh_slock); + mutex_enter(&pg->mdpage.pvh_lock); for (pv = pg->mdpage.pvh_list; pv != NULL; pv = nextpv) { nextpv = pv->pv_next; pmap = pv->pv_pmap; @@ -1534,7 +1498,7 @@ pmap_page_protect(struct vm_page *pg, vm_prot_t prot) if (needkisync) PMAP_SYNC_ISTREAM_KERNEL(); - simple_unlock(&pg->mdpage.pvh_slock); + mutex_exit(&pg->mdpage.pvh_lock); PMAP_HEAD_TO_MAP_UNLOCK(); } @@ -1688,40 +1652,7 @@ pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags) panic("pmap_enter: user pmap, invalid va 0x%lx", va); #endif -#ifdef PMAP_NO_LAZY_LEV1MAP KASSERT(pmap->pm_lev1map != kernel_lev1map); -#else - /* - * If we're still referencing the kernel kernel_lev1map, - * create a new level 1 page table. A reference will be - * added to the level 1 table when the level 2 table is - * created. - */ - if (pmap->pm_lev1map == kernel_lev1map) { - /* - * XXX Yuck. - * We have to unlock the pmap, lock the - * pmap_growkernel_slock, and re-lock the - * pmap here, in order to avoid a deadlock - * with pmap_growkernel(). - * - * Because we unlock, we have a window for - * someone else to add a mapping, thus creating - * a level 1 map; pmap_lev1map_create() checks - * for this condition. - */ - PMAP_UNLOCK(pmap); - simple_lock(&pmap_growkernel_slock); - PMAP_LOCK(pmap); - error = pmap_lev1map_create(pmap, cpu_id); - simple_unlock(&pmap_growkernel_slock); - if (error) { - if (flags & PMAP_CANFAIL) - goto out; - panic("pmap_enter: unable to create lev1map"); - } - } -#endif /* PMAP_NO_LAZY_LEV1MAP */ /* * Check to see if the level 1 PTE is valid, and @@ -1890,13 +1821,13 @@ pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags) if ((flags & VM_PROT_ALL) & ~prot) panic("pmap_enter: access type exceeds prot"); #endif - simple_lock(&pg->mdpage.pvh_slock); + mutex_enter(&pg->mdpage.pvh_lock); if (flags & VM_PROT_WRITE) pg->mdpage.pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED); else if (flags & VM_PROT_ALL) pg->mdpage.pvh_attrs |= PGA_REFERENCED; attrs = pg->mdpage.pvh_attrs; - simple_unlock(&pg->mdpage.pvh_slock); + mutex_exit(&pg->mdpage.pvh_lock); /* * Set up referenced/modified emulation for new mapping. @@ -2258,21 +2189,13 @@ pmap_activate(struct lwp *l) printf("pmap_activate(%p)\n", l); #endif -#ifndef PMAP_NO_LAZY_LEV1MAP - PMAP_LOCK(pmap); -#endif - /* Mark the pmap in use by this processor. */ - atomic_setbits_ulong(&pmap->pm_cpus, (1UL << cpu_id)); + atomic_or_ulong(&pmap->pm_cpus, (1UL << cpu_id)); /* Allocate an ASN. */ pmap_asn_alloc(pmap, cpu_id); PMAP_ACTIVATE(pmap, l, cpu_id); - -#ifndef PMAP_NO_LAZY_LEV1MAP - PMAP_UNLOCK(pmap); -#endif } /* @@ -2298,7 +2221,7 @@ pmap_deactivate(struct lwp *l) /* * Mark the pmap no longer in use by this processor. */ - atomic_clearbits_ulong(&pmap->pm_cpus, (1UL << cpu_number())); + atomic_and_ulong(&pmap->pm_cpus, ~(1UL << cpu_number())); } #if defined(MULTIPROCESSOR) @@ -2448,7 +2371,7 @@ pmap_clear_modify(struct vm_page *pg) #endif PMAP_HEAD_TO_MAP_LOCK(); - simple_lock(&pg->mdpage.pvh_slock); + mutex_enter(&pg->mdpage.pvh_lock); if (pg->mdpage.pvh_attrs & PGA_MODIFIED) { rv = true; @@ -2456,7 +2379,7 @@ pmap_clear_modify(struct vm_page *pg) pg->mdpage.pvh_attrs &= ~PGA_MODIFIED; } - simple_unlock(&pg->mdpage.pvh_slock); + mutex_exit(&pg->mdpage.pvh_lock); PMAP_HEAD_TO_MAP_UNLOCK(); return (rv); @@ -2479,7 +2402,7 @@ pmap_clear_reference(struct vm_page *pg) #endif PMAP_HEAD_TO_MAP_LOCK(); - simple_lock(&pg->mdpage.pvh_slock); + mutex_enter(&pg->mdpage.pvh_lock); if (pg->mdpage.pvh_attrs & PGA_REFERENCED) { rv = true; @@ -2487,7 +2410,7 @@ pmap_clear_reference(struct vm_page *pg) pg->mdpage.pvh_attrs &= ~PGA_REFERENCED; } - simple_unlock(&pg->mdpage.pvh_slock); + mutex_exit(&pg->mdpage.pvh_lock); PMAP_HEAD_TO_MAP_UNLOCK(); return (rv); @@ -2835,7 +2758,7 @@ pmap_emulate_reference(struct lwp *l, vaddr_t v, int user, int type) pg = PHYS_TO_VM_PAGE(pa); PMAP_HEAD_TO_MAP_LOCK(); - simple_lock(&pg->mdpage.pvh_slock); + mutex_enter(&pg->mdpage.pvh_lock); if (type == ALPHA_MMCSR_FOW) { pg->mdpage.pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED); @@ -2849,7 +2772,7 @@ pmap_emulate_reference(struct lwp *l, vaddr_t v, int user, int type) } pmap_changebit(pg, 0, ~faultoff, cpu_id); - simple_unlock(&pg->mdpage.pvh_slock); + mutex_exit(&pg->mdpage.pvh_lock); PMAP_HEAD_TO_MAP_UNLOCK(); return (0); } @@ -2868,7 +2791,7 @@ pmap_pv_dump(paddr_t pa) pg = PHYS_TO_VM_PAGE(pa); - simple_lock(&pg->mdpage.pvh_slock); + mutex_enter(&pg->mdpage.pvh_lock); printf("pa 0x%lx (attrs = 0x%x):\n", pa, pg->mdpage.pvh_attrs); for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next) @@ -2876,7 +2799,7 @@ pmap_pv_dump(paddr_t pa) pv->pv_pmap, pv->pv_va); printf("\n"); - simple_unlock(&pg->mdpage.pvh_slock); + mutex_exit(&pg->mdpage.pvh_lock); } #endif @@ -2935,7 +2858,7 @@ pmap_pv_enter(pmap_t pmap, struct vm_page *pg, vaddr_t va, pt_entry_t *pte, newpv->pv_pte = pte; if (dolock) - simple_lock(&pg->mdpage.pvh_slock); + mutex_enter(&pg->mdpage.pvh_lock); #ifdef DEBUG { @@ -2959,7 +2882,7 @@ pmap_pv_enter(pmap_t pmap, struct vm_page *pg, vaddr_t va, pt_entry_t *pte, pg->mdpage.pvh_list = newpv; if (dolock) - simple_unlock(&pg->mdpage.pvh_slock); + mutex_exit(&pg->mdpage.pvh_lock); return 0; } @@ -2975,7 +2898,7 @@ pmap_pv_remove(pmap_t pmap, struct vm_page *pg, vaddr_t va, bool dolock) pv_entry_t pv, *pvp; if (dolock) - simple_lock(&pg->mdpage.pvh_slock); + mutex_enter(&pg->mdpage.pvh_lock); /* * Find the entry to remove. @@ -2993,7 +2916,7 @@ pmap_pv_remove(pmap_t pmap, struct vm_page *pg, vaddr_t va, bool dolock) *pvp = pv->pv_next; if (dolock) - simple_unlock(&pg->mdpage.pvh_slock); + mutex_exit(&pg->mdpage.pvh_lock); pmap_pv_free(pv); } @@ -3050,13 +2973,13 @@ pmap_physpage_alloc(int usage, paddr_t *pap) pa = VM_PAGE_TO_PHYS(pg); #ifdef DEBUG - simple_lock(&pg->mdpage.pvh_slock); + mutex_enter(&pg->mdpage.pvh_lock); if (pg->wire_count != 0) { printf("pmap_physpage_alloc: page 0x%lx has " "%d references\n", pa, pg->wire_count); panic("pmap_physpage_alloc"); } - simple_unlock(&pg->mdpage.pvh_slock); + mutex_exit(&pg->mdpage.pvh_lock); #endif *pap = pa; return (true); @@ -3078,10 +3001,10 @@ pmap_physpage_free(paddr_t pa) panic("pmap_physpage_free: bogus physical page address"); #ifdef DEBUG - simple_lock(&pg->mdpage.pvh_slock); + mutex_enter(&pg->mdpage.pvh_lock); if (pg->wire_count != 0) panic("pmap_physpage_free: page still has references"); - simple_unlock(&pg->mdpage.pvh_slock); + mutex_exit(&pg->mdpage.pvh_lock); #endif uvm_pagefree(pg); @@ -3102,9 +3025,9 @@ pmap_physpage_addref(void *kva) pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva)); pg = PHYS_TO_VM_PAGE(pa); - simple_lock(&pg->mdpage.pvh_slock); + mutex_enter(&pg->mdpage.pvh_lock); rval = ++pg->wire_count; - simple_unlock(&pg->mdpage.pvh_slock); + mutex_exit(&pg->mdpage.pvh_lock); return (rval); } @@ -3124,7 +3047,7 @@ pmap_physpage_delref(void *kva) pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva)); pg = PHYS_TO_VM_PAGE(pa); - simple_lock(&pg->mdpage.pvh_slock); + mutex_enter(&pg->mdpage.pvh_lock); #ifdef DIAGNOSTIC /* @@ -3136,7 +3059,7 @@ pmap_physpage_delref(void *kva) rval = --pg->wire_count; - simple_unlock(&pg->mdpage.pvh_slock); + mutex_exit(&pg->mdpage.pvh_lock); return (rval); } @@ -3161,7 +3084,7 @@ pmap_growkernel(vaddr_t maxkvaddr) if (maxkvaddr <= virtual_end) goto out; /* we are OK */ - simple_lock(&pmap_growkernel_slock); + mutex_enter(&pmap_growkernel_lock); va = virtual_end; @@ -3194,7 +3117,7 @@ pmap_growkernel(vaddr_t maxkvaddr) l1idx = l1pte_index(va); /* Update all the user pmaps. */ - simple_lock(&pmap_all_pmaps_slock); + mutex_enter(&pmap_all_pmaps_lock); for (pm = TAILQ_FIRST(&pmap_all_pmaps); pm != NULL; pm = TAILQ_NEXT(pm, pm_list)) { /* Skip the kernel pmap. */ @@ -3209,7 +3132,7 @@ pmap_growkernel(vaddr_t maxkvaddr) pm->pm_lev1map[l1idx] = pte; PMAP_UNLOCK(pm); } - simple_unlock(&pmap_all_pmaps_slock); + mutex_exit(&pmap_all_pmaps_lock); } /* @@ -3235,7 +3158,7 @@ pmap_growkernel(vaddr_t maxkvaddr) virtual_end = va; - simple_unlock(&pmap_growkernel_slock); + mutex_exit(&pmap_growkernel_lock); out: return (virtual_end); @@ -3275,28 +3198,12 @@ pmap_lev1map_create(pmap_t pmap, long cpu_id) panic("pmap_lev1map_create: pmap uses non-reserved ASN"); #endif -#ifdef PMAP_NO_LAZY_LEV1MAP /* Being called from pmap_create() in this case; we can sleep. */ l1pt = pool_cache_get(&pmap_l1pt_cache, PR_WAITOK); -#else - l1pt = pool_cache_get(&pmap_l1pt_cache, PR_NOWAIT); -#endif if (l1pt == NULL) return (ENOMEM); pmap->pm_lev1map = l1pt; - -#ifndef PMAP_NO_LAZY_LEV1MAP /* guaranteed not to be active */ - /* - * The page table base has changed; if the pmap was active, - * reactivate it. - */ - if (PMAP_ISACTIVE(pmap, cpu_id)) { - pmap_asn_alloc(pmap, cpu_id); - PMAP_ACTIVATE(pmap, curlwp, cpu_id); - } - PMAP_LEV1MAP_SHOOTDOWN(pmap, cpu_id); -#endif /* ! PMAP_NO_LAZY_LEV1MAP */ return (0); } @@ -3322,31 +3229,6 @@ pmap_lev1map_destroy(pmap_t pmap, long cpu_id) */ pmap->pm_lev1map = kernel_lev1map; -#ifndef PMAP_NO_LAZY_LEV1MAP /* pmap is being destroyed */ - /* - * The page table base has changed; if the pmap was active, - * reactivate it. Note that allocation of a new ASN is - * not necessary here: - * - * (1) We've gotten here because we've deleted all - * user mappings in the pmap, invalidating the - * TLB entries for them as we go. - * - * (2) kernel_lev1map contains only kernel mappings, which - * were identical in the user pmap, and all of - * those mappings have PG_ASM, so the ASN doesn't - * matter. - * - * We do, however, ensure that the pmap is using the - * reserved ASN, to ensure that no two pmaps never have - * clashing TLB entries. - */ - PMAP_INVALIDATE_ASN(pmap, cpu_id); - if (PMAP_ISACTIVE(pmap, cpu_id)) - PMAP_ACTIVATE(pmap, curlwp, cpu_id); - PMAP_LEV1MAP_SHOOTDOWN(pmap, cpu_id); -#endif /* ! PMAP_NO_LAZY_LEV1MAP */ - /* * Free the old level 1 page table page. */ @@ -3583,15 +3465,7 @@ pmap_l1pt_delref(pmap_t pmap, pt_entry_t *l1pte, long cpu_id) panic("pmap_l1pt_delref: kernel pmap"); #endif - if (pmap_physpage_delref(l1pte) == 0) { -#ifndef PMAP_NO_LAZY_LEV1MAP - /* - * No more level 2 tables left, go back to the global - * kernel_lev1map. - */ - pmap_lev1map_destroy(pmap, cpu_id); -#endif /* ! PMAP_NO_LAZY_LEV1MAP */ - } + (void)pmap_physpage_delref(l1pte); } /******************** Address Space Number management ********************/ @@ -3623,7 +3497,6 @@ pmap_asn_alloc(pmap_t pmap, long cpu_id) * have PG_ASM set. If the pmap eventually gets its own * lev1map, an ASN will be allocated at that time. * - * #ifdef PMAP_NO_LAZY_LEV1MAP * Only the kernel pmap will reference kernel_lev1map. Do the * same old fixups, but note that we no longer need the pmap * to be locked if we're in this mode, since pm_lev1map will @@ -3747,7 +3620,7 @@ pmap_asn_alloc(pmap_t pmap, long cpu_id) * Have a new ASN, so there's no need to sync the I-stream * on the way back out to userspace. */ - atomic_clearbits_ulong(&pmap->pm_needisync, (1UL << cpu_id)); + atomic_and_ulong(&pmap->pm_needisync, ~(1UL << cpu_id)); } #if defined(MULTIPROCESSOR) @@ -3768,10 +3641,8 @@ pmap_tlb_shootdown(pmap_t pmap, vaddr_t va, pt_entry_t pte, u_long *cpumaskp) struct cpu_info *ci, *self = curcpu(); u_long cpumask; CPU_INFO_ITERATOR cii; - int s; - LOCK_ASSERT((pmap == pmap_kernel()) || - simple_lock_held(&pmap->pm_slock)); + KASSERT((pmap == pmap_kernel()) || mutex_owned(&pmap->pm_lock)); cpumask = 0; @@ -3803,7 +3674,7 @@ pmap_tlb_shootdown(pmap_t pmap, vaddr_t va, pt_entry_t pte, u_long *cpumaskp) pq = &pmap_tlb_shootdown_q[ci->ci_cpuid]; - PSJQ_LOCK(pq, s); + mutex_spin_enter(&pq->pq_lock); pq->pq_pte |= pte; @@ -3812,7 +3683,7 @@ pmap_tlb_shootdown(pmap_t pmap, vaddr_t va, pt_entry_t pte, u_long *cpumaskp) * don't really have to do anything else. */ if (pq->pq_tbia) { - PSJQ_UNLOCK(pq, s); + mutex_spin_exit(&pq->pq_lock); continue; } @@ -3832,7 +3703,7 @@ pmap_tlb_shootdown(pmap_t pmap, vaddr_t va, pt_entry_t pte, u_long *cpumaskp) cpumask |= 1UL << ci->ci_cpuid; - PSJQ_UNLOCK(pq, s); + mutex_spin_exit(&pq->pq_lock); } *cpumaskp |= cpumask; @@ -3863,9 +3734,8 @@ pmap_do_tlb_shootdown(struct cpu_info *ci, struct trapframe *framep) u_long cpu_mask = (1UL << cpu_id); struct pmap_tlb_shootdown_q *pq = &pmap_tlb_shootdown_q[cpu_id]; struct pmap_tlb_shootdown_job *pj; - int s; - PSJQ_LOCK(pq, s); + mutex_spin_enter(&pq->pq_lock); if (pq->pq_tbia) { if (pq->pq_pte & PG_ASM) @@ -3885,7 +3755,7 @@ pmap_do_tlb_shootdown(struct cpu_info *ci, struct trapframe *framep) pq->pq_pte = 0; } - PSJQ_UNLOCK(pq, s); + mutex_spin_exit(&pq->pq_lock); } /* diff --git a/sys/arch/alpha/alpha/trap.c b/sys/arch/alpha/alpha/trap.c index 30117ac7150d..48f4db3581f7 100644 --- a/sys/arch/alpha/alpha/trap.c +++ b/sys/arch/alpha/alpha/trap.c @@ -1,4 +1,4 @@ -/* $NetBSD: trap.c,v 1.116 2007/10/17 19:52:56 garbled Exp $ */ +/* $NetBSD: trap.c,v 1.117 2008/01/02 11:48:21 ad Exp $ */ /*- * Copyright (c) 2000, 2001 The NetBSD Foundation, Inc. @@ -100,7 +100,7 @@ #include /* RCS ID & Copyright macro defns */ -__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.116 2007/10/17 19:52:56 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.117 2008/01/02 11:48:21 ad Exp $"); #include #include @@ -263,9 +263,7 @@ trap(const u_long a0, const u_long a1, const u_long a2, const u_long entry, * and per-process unaligned-access-handling flags). */ if (user) { - KERNEL_LOCK(1, l); i = unaligned_fixup(a0, a1, a2, l); - KERNEL_UNLOCK_LAST(l); if (i == 0) goto out; @@ -360,9 +358,7 @@ trap(const u_long a0, const u_long a1, const u_long a2, const u_long entry, break; case ALPHA_IF_CODE_OPDEC: - KERNEL_LOCK(1, l); i = handle_opdec(l, &ucode); - KERNEL_UNLOCK_LAST(l); KSI_INIT_TRAP(&ksi); if (i == 0) goto out; @@ -392,20 +388,10 @@ trap(const u_long a0, const u_long a1, const u_long a2, const u_long entry, case ALPHA_MMCSR_FOR: case ALPHA_MMCSR_FOE: case ALPHA_MMCSR_FOW: - if (user) - KERNEL_LOCK(1, l); - else - KERNEL_LOCK(1, NULL); - if (pmap_emulate_reference(l, a0, user, a1)) { ftype = VM_PROT_EXECUTE; goto do_fault; } - - if (user) - KERNEL_UNLOCK_LAST(l); - else - KERNEL_UNLOCK_ONE(NULL); goto out; case ALPHA_MMCSR_INVALTRANS: @@ -435,9 +421,7 @@ trap(const u_long a0, const u_long a1, const u_long a2, const u_long entry, #endif } - if (user) - KERNEL_LOCK(1, l); - else { + if (!user) { struct cpu_info *ci = curcpu(); if (l == NULL) { @@ -472,8 +456,6 @@ trap(const u_long a0, const u_long a1, const u_long a2, const u_long entry, */ if (ci->ci_intrdepth != 0) goto dopanic; - - KERNEL_LOCK(1, NULL); } /* @@ -513,16 +495,10 @@ do_fault: rv = EFAULT; } if (rv == 0) { - if (user) - KERNEL_UNLOCK_LAST(l); - else - KERNEL_UNLOCK_ONE(NULL); goto out; } if (user == 0) { - KERNEL_UNLOCK_ONE(NULL); - /* Check for copyin/copyout fault */ if (l != NULL && l->l_addr->u_pcb.pcb_onfault != 0) { @@ -550,7 +526,6 @@ do_fault: ksi.ksi_code = SEGV_ACCERR; else ksi.ksi_code = SEGV_MAPERR; - KERNEL_UNLOCK_LAST(l); break; } @@ -567,9 +542,7 @@ do_fault: #ifdef DEBUG printtrap(a0, a1, a2, entry, framep, 1, user); #endif - KERNEL_LOCK(1, l); (*p->p_emul->e_trapsignal)(l, &ksi); - KERNEL_UNLOCK_LAST(l); out: if (user) userret(l); @@ -675,8 +648,6 @@ ast(struct trapframe *framep) if (l == NULL) return; - KERNEL_LOCK(1, l); - uvmexp.softs++; l->l_md.md_tf = framep; @@ -692,7 +663,6 @@ ast(struct trapframe *framep) preempt(); } - KERNEL_UNLOCK_LAST(l); userret(l); } @@ -1247,6 +1217,5 @@ startlwp(void *arg) #endif pool_put(&lwp_uc_pool, uc); - KERNEL_UNLOCK_LAST(l); userret(l); } diff --git a/sys/arch/alpha/include/pmap.h b/sys/arch/alpha/include/pmap.h index 39a391dfc830..3ee1d10eca2b 100644 --- a/sys/arch/alpha/include/pmap.h +++ b/sys/arch/alpha/include/pmap.h @@ -1,7 +1,7 @@ -/* $NetBSD: pmap.h,v 1.68 2007/02/21 22:59:37 thorpej Exp $ */ +/* $NetBSD: pmap.h,v 1.69 2008/01/02 11:48:21 ad Exp $ */ /*- - * Copyright (c) 1998, 1999, 2000, 2001 The NetBSD Foundation, Inc. + * Copyright (c) 1998, 1999, 2000, 2001, 2007 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -117,7 +117,7 @@ #include "opt_multiprocessor.h" #endif -#include +#include #include #include @@ -144,7 +144,7 @@ struct pmap { TAILQ_ENTRY(pmap) pm_list; /* list of all pmaps */ pt_entry_t *pm_lev1map; /* level 1 map */ int pm_count; /* pmap reference count */ - struct simplelock pm_slock; /* lock on pmap */ + kmutex_t pm_lock; /* lock on pmap */ struct pmap_statistics pm_stats; /* pmap statistics */ unsigned long pm_cpus; /* mask of CPUs using pmap */ unsigned long pm_needisync; /* mask of CPUs needing isync */ @@ -343,8 +343,8 @@ pmap_l3pte(pmap, v, l2pte) * operations, locking the kernel pmap is not necessary. Therefore, * it is not necessary to block interrupts when locking pmap strucutres. */ -#define PMAP_LOCK(pmap) simple_lock(&(pmap)->pm_slock) -#define PMAP_UNLOCK(pmap) simple_unlock(&(pmap)->pm_slock) +#define PMAP_LOCK(pmap) mutex_enter(&(pmap)->pm_lock) +#define PMAP_UNLOCK(pmap) mutex_exit(&(pmap)->pm_lock) /* * Macro for processing deferred I-stream synchronization. diff --git a/sys/arch/alpha/include/vmparam.h b/sys/arch/alpha/include/vmparam.h index 718b13db5f7e..3265f339b371 100644 --- a/sys/arch/alpha/include/vmparam.h +++ b/sys/arch/alpha/include/vmparam.h @@ -1,4 +1,4 @@ -/* $NetBSD: vmparam.h,v 1.29 2005/12/11 12:16:16 christos Exp $ */ +/* $NetBSD: vmparam.h,v 1.30 2008/01/02 11:48:21 ad Exp $ */ /* * Copyright (c) 1992, 1993 @@ -166,14 +166,14 @@ #define __HAVE_VM_PAGE_MD struct vm_page_md { struct pv_entry *pvh_list; /* pv_entry list */ - struct simplelock pvh_slock; /* lock on this head */ + kmutex_t pvh_lock; /* lock on this head */ int pvh_attrs; /* page attributes */ }; #define VM_MDPAGE_INIT(pg) \ do { \ (pg)->mdpage.pvh_list = NULL; \ - simple_lock_init(&(pg)->mdpage.pvh_slock); \ + mutex_init(&(pg)->mdpage.pvh_lock, MUTEX_DEFAULT, IPL_NONE); \ } while (/*CONSTCOND*/0) #endif /* ! _ALPHA_VMPARAM_H_ */ diff --git a/sys/arch/amd64/amd64/fpu.c b/sys/arch/amd64/amd64/fpu.c index fd8cd01319da..22d02c84cd2a 100644 --- a/sys/arch/amd64/amd64/fpu.c +++ b/sys/arch/amd64/amd64/fpu.c @@ -1,4 +1,4 @@ -/* $NetBSD: fpu.c,v 1.20 2007/11/22 16:16:41 bouyer Exp $ */ +/* $NetBSD: fpu.c,v 1.21 2008/01/02 11:48:21 ad Exp $ */ /*- * Copyright (c) 1991 The Regents of the University of California. @@ -71,7 +71,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.20 2007/11/22 16:16:41 bouyer Exp $"); +__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.21 2008/01/02 11:48:21 ad Exp $"); #include "opt_multiprocessor.h" @@ -179,9 +179,7 @@ fputrap(frame) ksi.ksi_addr = (void *)frame->tf_rip; ksi.ksi_code = x86fpflags_to_ksiginfo(statbits); ksi.ksi_trap = statbits; - KERNEL_LOCK(1, l); (*l->l_proc->p_emul->e_trapsignal)(l, &ksi); - KERNEL_UNLOCK_LAST(l); } static int diff --git a/sys/arch/amd64/amd64/trap.c b/sys/arch/amd64/amd64/trap.c index a93f04def038..fe78a079dcfe 100644 --- a/sys/arch/amd64/amd64/trap.c +++ b/sys/arch/amd64/amd64/trap.c @@ -1,4 +1,4 @@ -/* $NetBSD: trap.c,v 1.42 2008/01/01 21:28:40 yamt Exp $ */ +/* $NetBSD: trap.c,v 1.43 2008/01/02 11:48:22 ad Exp $ */ /*- * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc. @@ -75,7 +75,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.42 2008/01/01 21:28:40 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.43 2008/01/02 11:48:22 ad Exp $"); #include "opt_ddb.h" #include "opt_kgdb.h" @@ -439,7 +439,6 @@ copyfault: goto copyefault; cr2 = rcr2(); - KERNEL_LOCK(1, NULL); goto faultcommon; case T_PAGEFLT|T_USER: { /* page fault */ @@ -453,7 +452,6 @@ copyfault: if (p->p_emul->e_usertrap != NULL && (*p->p_emul->e_usertrap)(l, cr2, frame) != 0) return; - KERNEL_LOCK(1, l); faultcommon: vm = p->p_vmspace; if (vm == NULL) @@ -496,8 +494,6 @@ faultcommon: uvm_grow(p, va); if (type == T_PAGEFLT) { - KERNEL_UNLOCK_ONE(NULL); - /* * we need to switch pmap now if we're in * the middle of copyin/out. @@ -511,7 +507,6 @@ faultcommon: pmap_load(); return; } - KERNEL_UNLOCK_LAST(l); goto out; } KSI_INIT_TRAP(&ksi); @@ -524,10 +519,8 @@ faultcommon: ksi.ksi_code = SEGV_MAPERR; if (type == T_PAGEFLT) { - if (pcb->pcb_onfault != 0) { - KERNEL_UNLOCK_ONE(NULL); + if (pcb->pcb_onfault != 0) goto copyfault; - } printf("uvm_fault(%p, 0x%lx, %d) -> %x\n", map, va, ftype, error); goto we_re_toast; @@ -547,10 +540,6 @@ faultcommon: ksi.ksi_signo = SIGSEGV; } (*p->p_emul->e_trapsignal)(l, &ksi); - if (type == T_PAGEFLT) - KERNEL_UNLOCK_ONE(NULL); - else - KERNEL_UNLOCK_LAST(l); break; } @@ -580,9 +569,7 @@ faultcommon: ksi.ksi_code = TRAP_BRKPT; else ksi.ksi_code = TRAP_TRACE; - KERNEL_LOCK(1, l); (*p->p_emul->e_trapsignal)(l, &ksi); - KERNEL_UNLOCK_LAST(l); } break; @@ -621,9 +608,7 @@ out: userret(l); return; trapsignal: - KERNEL_LOCK(1, l); (*p->p_emul->e_trapsignal)(l, &ksi); - KERNEL_UNLOCK_LAST(l); userret(l); } @@ -636,9 +621,6 @@ startlwp(void *arg) err = cpu_setmcontext(l, &uc->uc_mcontext, uc->uc_flags); pool_put(&lwp_uc_pool, uc); - - KERNEL_UNLOCK_LAST(l); - userret(l); } diff --git a/sys/arch/amiga/amiga/disksubr.c b/sys/arch/amiga/amiga/disksubr.c index 4482dd44f601..4d5ce55180b5 100644 --- a/sys/arch/amiga/amiga/disksubr.c +++ b/sys/arch/amiga/amiga/disksubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: disksubr.c,v 1.55 2007/10/17 19:53:12 garbled Exp $ */ +/* $NetBSD: disksubr.c,v 1.56 2008/01/02 11:48:22 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1988 Regents of the University of California. @@ -66,7 +66,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.55 2007/10/17 19:53:12 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.56 2008/01/02 11:48:22 ad Exp $"); #include #include @@ -190,7 +190,7 @@ readdisklabel(dev, strat, lp, clp) bp->b_blkno = nextb; bp->b_cylinder = bp->b_blkno / lp->d_secpercyl; bp->b_bcount = lp->d_secsize; - bp->b_flags &= ~(B_DONE); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_READ; #ifdef SD_C_ADJUSTS_NR bp->b_blkno *= (lp->d_secsize / DEV_BSIZE); @@ -307,7 +307,7 @@ readdisklabel(dev, strat, lp, clp) bp->b_blkno = nextb; bp->b_cylinder = bp->b_blkno / lp->d_secpercyl; bp->b_bcount = lp->d_secsize; - bp->b_flags &= ~(B_DONE); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_READ; #ifdef SD_C_ADJUSTS_NR bp->b_blkno *= (lp->d_secsize / DEV_BSIZE); @@ -571,7 +571,8 @@ writedisklabel(dev, strat, lp, clp) dlp = (struct disklabel *)((char*)bp->b_data + LABELOFFSET); *dlp = *lp; /* struct assignment */ - bp->b_flags &= ~(B_READ|B_DONE); + bp->b_oflags &= ~(BO_DONE); + bp->b_flags &= ~(B_READ); bp->b_flags |= B_WRITE; (*strat)(bp); error = biowait(bp); diff --git a/sys/arch/amiga/dev/fd.c b/sys/arch/amiga/dev/fd.c index 6085b92a0dcb..78bc6a14be01 100644 --- a/sys/arch/amiga/dev/fd.c +++ b/sys/arch/amiga/dev/fd.c @@ -1,4 +1,4 @@ -/* $NetBSD: fd.c,v 1.77 2007/10/17 19:53:15 garbled Exp $ */ +/* $NetBSD: fd.c,v 1.78 2008/01/02 11:48:22 ad Exp $ */ /* * Copyright (c) 1994 Christian E. Hopps @@ -33,7 +33,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: fd.c,v 1.77 2007/10/17 19:53:15 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: fd.c,v 1.78 2008/01/02 11:48:22 ad Exp $"); #include #include @@ -912,7 +912,8 @@ fdputdisklabel(struct fd_softc *sc, dev_t dev) bcopy(lp, dlp, sizeof(struct disklabel)); bp->b_blkno = 0; bp->b_cylinder = 0; - bp->b_flags &= ~(B_READ|B_DONE); + bp->b_flags &= ~(B_READ); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_WRITE; fdstrategy(bp); error = biowait(bp); diff --git a/sys/arch/arc/arc/disksubr.c b/sys/arch/arc/arc/disksubr.c index c0f9edd0ff6e..0a930b8c0b60 100644 --- a/sys/arch/arc/arc/disksubr.c +++ b/sys/arch/arc/arc/disksubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: disksubr.c,v 1.27 2007/10/17 19:53:27 garbled Exp $ */ +/* $NetBSD: disksubr.c,v 1.28 2008/01/02 11:48:22 ad Exp $ */ /* $OpenBSD: disksubr.c,v 1.14 1997/05/08 00:14:29 deraadt Exp $ */ /* NetBSD: disksubr.c,v 1.40 1999/05/06 15:45:51 christos Exp */ @@ -34,7 +34,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.27 2007/10/17 19:53:27 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.28 2008/01/02 11:48:22 ad Exp $"); #include #include @@ -270,7 +270,7 @@ nombrpart: bp->b_blkno = dospartoff + LABELSECTOR; bp->b_cylinder = cyl; bp->b_bcount = lp->d_secsize; - bp->b_flags &= ~(B_DONE); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_READ; (*strat)(bp); @@ -321,7 +321,7 @@ nombrpart: i = 0; do { /* read a bad sector table */ - bp->b_flags &= ~(B_DONE); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_READ; bp->b_blkno = lp->d_secperunit - lp->d_nsectors + i; if (lp->d_secsize > DEV_BSIZE) @@ -475,7 +475,7 @@ nombrpart: bp->b_blkno = dospartoff + LABELSECTOR; bp->b_cylinder = cyl; bp->b_bcount = lp->d_secsize; - bp->b_flags &= ~(B_DONE); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_READ; (*strat)(bp); @@ -489,7 +489,8 @@ nombrpart: if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC && dkcksum(dlp) == 0) { *dlp = *lp; - bp->b_flags &= ~(B_READ|B_DONE); + bp->b_oflags &= ~(BO_DONE); + bp->b_flags &= ~(B_READ); bp->b_flags |= B_WRITE; (*strat)(bp); error = biowait(bp); diff --git a/sys/arch/arm/arm/disksubr.c b/sys/arch/arm/arm/disksubr.c index 1cb2b648eaba..3ba6d9165370 100644 --- a/sys/arch/arm/arm/disksubr.c +++ b/sys/arch/arm/arm/disksubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: disksubr.c,v 1.19 2007/10/17 19:53:30 garbled Exp $ */ +/* $NetBSD: disksubr.c,v 1.20 2008/01/02 11:48:23 ad Exp $ */ /* * Copyright (c) 1998 Christopher G. Demetriou. All rights reserved. @@ -97,7 +97,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.19 2007/10/17 19:53:30 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.20 2008/01/02 11:48:23 ad Exp $"); #include #include @@ -224,7 +224,7 @@ readdisklabel(dev, strat, lp, osdep) i = 0; do { /* read a bad sector table */ - bp->b_flags &= ~(B_DONE); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_READ; bp->b_blkno = lp->d_secperunit - lp->d_nsectors + i; if (lp->d_secsize > DEV_BSIZE) @@ -376,7 +376,7 @@ writedisklabel(dev, strat, lp, osdep) bp->b_blkno = netbsdpartoff + LABELSECTOR; bp->b_cylinder = cyl; bp->b_bcount = lp->d_secsize; - bp->b_flags &= ~(B_DONE); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_READ; (*strat)(bp); @@ -390,7 +390,8 @@ writedisklabel(dev, strat, lp, osdep) if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC && dkcksum(dlp) == 0) { *dlp = *lp; - bp->b_flags &= ~(B_READ|B_DONE); + bp->b_flags &= ~(B_READ); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_WRITE; (*strat)(bp); error = biowait(bp); diff --git a/sys/arch/arm/arm/disksubr_acorn.c b/sys/arch/arm/arm/disksubr_acorn.c index 1224404e1c5b..b1c64dc61a57 100644 --- a/sys/arch/arm/arm/disksubr_acorn.c +++ b/sys/arch/arm/arm/disksubr_acorn.c @@ -1,4 +1,4 @@ -/* $NetBSD: disksubr_acorn.c,v 1.7 2007/10/17 19:53:30 garbled Exp $ */ +/* $NetBSD: disksubr_acorn.c,v 1.8 2008/01/02 11:48:23 ad Exp $ */ /* * Copyright (c) 1998 Christopher G. Demetriou. All rights reserved. @@ -97,7 +97,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: disksubr_acorn.c,v 1.7 2007/10/17 19:53:30 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: disksubr_acorn.c,v 1.8 2008/01/02 11:48:23 ad Exp $"); #include #include @@ -259,7 +259,7 @@ filecore_label_read(dev, strat, lp, osdep, msgp, cylp, netbsd_label_offp) bp->b_blkno);*/ bp->b_cylinder = bp->b_blkno / lp->d_secpercyl; bp->b_bcount = lp->d_secsize; - bp->b_flags &= ~(B_DONE); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_READ; (*strat)(bp); diff --git a/sys/arch/atari/atari/disksubr.c b/sys/arch/atari/atari/disksubr.c index b671594a0075..29f415674108 100644 --- a/sys/arch/atari/atari/disksubr.c +++ b/sys/arch/atari/atari/disksubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: disksubr.c,v 1.34 2007/10/17 19:53:45 garbled Exp $ */ +/* $NetBSD: disksubr.c,v 1.35 2008/01/02 11:48:23 ad Exp $ */ /* * Copyright (c) 1995 Leo Weppelman. @@ -31,7 +31,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.34 2007/10/17 19:53:45 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.35 2008/01/02 11:48:23 ad Exp $"); #ifndef DISKLABEL_NBDA #define DISKLABEL_NBDA /* required */ @@ -234,7 +234,8 @@ writedisklabel(dev, strat, lp, clp) bb->bb_magic = (blk == 0) ? NBDAMAGIC : AHDIMAGIC; BBSETLABEL(bb, lp); - bp->b_flags &= ~(B_READ|B_DONE); + bp->b_oflags &= ~(BO_DONE); + bp->b_flags &= ~(B_READ); bp->b_flags |= B_WRITE; bp->b_bcount = BBMINSIZE; bp->b_blkno = blk; diff --git a/sys/arch/atari/dev/hdfd.c b/sys/arch/atari/dev/hdfd.c index d0e6c2832640..bbee5de9d155 100644 --- a/sys/arch/atari/dev/hdfd.c +++ b/sys/arch/atari/dev/hdfd.c @@ -1,4 +1,4 @@ -/* $NetBSD: hdfd.c,v 1.59 2007/10/17 19:53:47 garbled Exp $ */ +/* $NetBSD: hdfd.c,v 1.60 2008/01/02 11:48:23 ad Exp $ */ /*- * Copyright (c) 1996 Leo Weppelman @@ -91,7 +91,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: hdfd.c,v 1.59 2007/10/17 19:53:47 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: hdfd.c,v 1.60 2008/01/02 11:48:23 ad Exp $"); #include "opt_ddb.h" @@ -1531,11 +1531,12 @@ fdformat(dev, finfo, p) struct buf *bp; /* set up a buffer header for fdstrategy() */ - bp = (struct buf *)malloc(sizeof(struct buf), M_TEMP, M_NOWAIT); + bp = getiobuf(NULL, false); if(bp == 0) return ENOBUFS; bzero((void *)bp, sizeof(struct buf)); - bp->b_flags = B_BUSY | B_PHYS | B_FORMAT; + bp->b_flags = B_PHYS | B_FORMAT; + bp->b_cflags |= BC_BUSY; bp->b_proc = p; bp->b_dev = dev; @@ -1557,13 +1558,13 @@ fdformat(dev, finfo, p) fdstrategy(bp); /* ...and wait for it to complete */ - s = splbio(); - while(!(bp->b_flags & B_DONE)) { - rv = tsleep((void *)bp, PRIBIO, "fdform", 20 * hz); + mutex_enter(bp->b_objlock); + while(!(bp->b_oflags & BO_DONE)) { + rv = cv_timedwait(&bp->b_done, 20 * hz); if (rv == EWOULDBLOCK) break; } - splx(s); + mutex_exit(bp->b_objlock); if (rv == EWOULDBLOCK) { /* timed out */ @@ -1572,7 +1573,7 @@ fdformat(dev, finfo, p) } else if (bp->b_error != 0) { rv = bp->b_error; } - free(bp, M_TEMP); + putiobuf(bp); return rv; } diff --git a/sys/arch/atari/dev/md_root.c b/sys/arch/atari/dev/md_root.c index e32c622f821d..db4e55862c5b 100644 --- a/sys/arch/atari/dev/md_root.c +++ b/sys/arch/atari/dev/md_root.c @@ -31,7 +31,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: md_root.c,v 1.24 2007/10/17 19:53:47 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: md_root.c,v 1.25 2008/01/02 11:48:24 ad Exp $"); #include #include @@ -157,7 +157,7 @@ struct md_conf *md; dev_t ld_dev; struct lwp *lwp; { - struct buf buf; + struct buf *buf; int error; const struct bdevsw *bdp; struct disklabel dl; @@ -170,17 +170,16 @@ struct lwp *lwp; /* * Initialize our buffer header: */ - memset(&buf, 0, sizeof(buf)); - buf.b_vnbufs.le_next = NOLIST; - buf.b_flags = B_BUSY; - buf.b_dev = ld_dev; - buf.b_error = 0; - buf.b_proc = lwp->l_proc; + buf = getiobuf(NULL, false); + buf->b_cflags = BC_BUSY; + buf->b_dev = ld_dev; + buf->b_error = 0; + buf->b_proc = lwp->l_proc; /* * Setup read_info: */ - rs.bp = &buf; + rs.bp = buf; rs.nbytes = md->md_size; rs.offset = 0; rs.bufp = md->md_addr; @@ -192,8 +191,10 @@ struct lwp *lwp; /* * Open device and try to get some statistics. */ - if((error = bdp->d_open(ld_dev, FREAD | FNONBLOCK, 0, lwp)) != 0) + if((error = bdp->d_open(ld_dev, FREAD | FNONBLOCK, 0, lwp)) != 0) { + putiobuf(buf); return(error); + } if(bdp->d_ioctl(ld_dev, DIOCGDINFO, (void *)&dl, FREAD, lwp) == 0) { /* Read on a cylinder basis */ rs.chunk = dl.d_secsize * dl.d_secpercyl; @@ -208,6 +209,7 @@ struct lwp *lwp; error = ramd_norm_read(&rs); bdp->d_close(ld_dev,FREAD | FNONBLOCK, 0, lwp); + putiobuf(buf); return(error); } @@ -218,7 +220,6 @@ struct read_info *rsp; long bytes_left; int done, error; struct buf *bp; - int s; int dotc = 0; bytes_left = rsp->nbytes; @@ -226,9 +227,8 @@ struct read_info *rsp; error = 0; while(bytes_left > 0) { - s = splbio(); - bp->b_flags = B_BUSY | B_PHYS | B_READ; - splx(s); + bp->b_cflags = BC_BUSY; + bp->b_flags = B_PHYS | B_READ; bp->b_blkno = btodb(rsp->offset); bp->b_bcount = rsp->chunk; bp->b_data = rsp->bufp; @@ -238,10 +238,7 @@ struct read_info *rsp; (*rsp->strat)(bp); /* Wait for results */ - s = splbio(); - while ((bp->b_flags & B_DONE) == 0) - tsleep((void *) bp, PRIBIO + 1, "ramd_norm_read", 0); - splx(s); + biowait(bp); error = bp->b_error; /* Dot counter */ @@ -300,7 +297,6 @@ int nbyte; static int dotc = 0; struct buf *bp; int nread = 0; - int s; int done, error; @@ -309,9 +305,8 @@ int nbyte; nbyte &= ~(DEV_BSIZE - 1); while(nbyte > 0) { - s = splbio(); - bp->b_flags = B_BUSY | B_PHYS | B_READ; - splx(s); + bp->b_cflags = BC_BUSY; + bp->b_flags = B_PHYS | B_READ; bp->b_blkno = btodb(rsp->offset); bp->b_bcount = min(rsp->chunk, nbyte); bp->b_data = buf; @@ -321,11 +316,7 @@ int nbyte; (*rsp->strat)(bp); /* Wait for results */ - s = splbio(); - while ((bp->b_flags & B_DONE) == 0) - tsleep((void *) bp, PRIBIO + 1, "ramd_norm_read", 0); - error = bp->b_error; - splx(s); + biowait(bp); /* Dot counter */ printf("."); @@ -348,8 +339,6 @@ int nbyte; rsp->offset = 0; } } - s = splbio(); - splx(s); return(nread); } #endif /* support_compression */ diff --git a/sys/arch/cobalt/cobalt/disksubr.c b/sys/arch/cobalt/cobalt/disksubr.c index 8f2ee75dd478..85bd32cbe806 100644 --- a/sys/arch/cobalt/cobalt/disksubr.c +++ b/sys/arch/cobalt/cobalt/disksubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: disksubr.c,v 1.22 2007/10/17 19:54:08 garbled Exp $ */ +/* $NetBSD: disksubr.c,v 1.23 2008/01/02 11:48:24 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1988 Regents of the University of California. @@ -30,7 +30,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.22 2007/10/17 19:54:08 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.23 2008/01/02 11:48:24 ad Exp $"); #include #include @@ -190,7 +190,7 @@ readdisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp, bp->b_blkno = dospartoff + LABELSECTOR; bp->b_cylinder = cyl; bp->b_bcount = lp->d_secsize; - bp->b_flags &= ~(B_DONE); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_READ; (*strat)(bp); @@ -227,7 +227,7 @@ readdisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp, i = 0; do { /* read a bad sector table */ - bp->b_flags &= ~(B_DONE); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_READ; bp->b_blkno = lp->d_secperunit - lp->d_nsectors + i; if (lp->d_secsize > DEV_BSIZE) @@ -366,7 +366,7 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp, bp->b_blkno = dospartoff + LABELSECTOR; bp->b_cylinder = cyl; bp->b_bcount = lp->d_secsize; - bp->b_flags &= ~(B_DONE); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_READ; (*strat)(bp); @@ -380,7 +380,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp, if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC && dkcksum(dlp) == 0) { *dlp = *lp; - bp->b_flags &= ~(B_READ|B_DONE); + bp->b_oflags &= ~(BO_DONE); + bp->b_flags &= ~(B_READ); bp->b_flags |= B_WRITE; (*strat)(bp); error = biowait(bp); diff --git a/sys/arch/evbmips/evbmips/disksubr.c b/sys/arch/evbmips/evbmips/disksubr.c index 90a960966eb3..a97e06287472 100644 --- a/sys/arch/evbmips/evbmips/disksubr.c +++ b/sys/arch/evbmips/evbmips/disksubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: disksubr.c,v 1.16 2007/10/17 19:54:15 garbled Exp $ */ +/* $NetBSD: disksubr.c,v 1.17 2008/01/02 11:48:24 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1988 Regents of the University of California. @@ -30,7 +30,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.16 2007/10/17 19:54:15 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.17 2008/01/02 11:48:24 ad Exp $"); #include #include @@ -111,7 +111,7 @@ readdisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp, i = 0; do { /* read a bad sector table */ - bp->b_flags &= ~(B_DONE); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_READ; bp->b_blkno = lp->d_secperunit - lp->d_nsectors + i; if (lp->d_secsize > DEV_BSIZE) @@ -216,7 +216,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp, dlp = (struct disklabel *)((char *)bp->b_data + LABELOFFSET); *dlp = *lp; /* struct assignment */ - bp->b_flags &= ~(B_READ|B_DONE); + bp->b_oflags &= ~(BO_DONE); + bp->b_flags &= ~(B_READ); bp->b_flags |= B_WRITE; (*strat)(bp); error = biowait(bp); diff --git a/sys/arch/evbppc/evbppc/disksubr.c b/sys/arch/evbppc/evbppc/disksubr.c index dbbfbd66d033..ffd0604e67e6 100644 --- a/sys/arch/evbppc/evbppc/disksubr.c +++ b/sys/arch/evbppc/evbppc/disksubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: disksubr.c,v 1.15 2007/10/17 19:54:17 garbled Exp $ */ +/* $NetBSD: disksubr.c,v 1.16 2008/01/02 11:48:24 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1988 Regents of the University of California. @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.15 2007/10/17 19:54:17 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.16 2008/01/02 11:48:24 ad Exp $"); #include #include @@ -121,7 +121,7 @@ readdisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp, i = 0; do { /* read a bad sector table */ - bp->b_flags &= ~(B_DONE); + bp->b_oflags &= ~BO_DONE; bp->b_flags |= B_READ; bp->b_blkno = lp->d_secperunit - lp->d_nsectors + i; if (lp->d_secsize > DEV_BSIZE) @@ -246,7 +246,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp, if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC && dkcksum(dlp) == 0) { *dlp = *lp; - bp->b_flags = B_BUSY | B_WRITE; + bp->b_cflags = BC_BUSY; + bp->b_flags = B_WRITE; (*strat)(bp); error = biowait(bp); goto done; diff --git a/sys/arch/ews4800mips/ews4800mips/sector.c b/sys/arch/ews4800mips/ews4800mips/sector.c index f7e7040f5a6f..d70d5bab57d1 100644 --- a/sys/arch/ews4800mips/ews4800mips/sector.c +++ b/sys/arch/ews4800mips/ews4800mips/sector.c @@ -1,4 +1,4 @@ -/* $NetBSD: sector.c,v 1.5 2007/10/17 19:54:21 garbled Exp $ */ +/* $NetBSD: sector.c,v 1.6 2008/01/02 11:48:24 ad Exp $ */ /*- * Copyright (c) 2004 The NetBSD Foundation, Inc. @@ -37,7 +37,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: sector.c,v 1.5 2007/10/17 19:54:21 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sector.c,v 1.6 2008/01/02 11:48:24 ad Exp $"); #include #include @@ -98,7 +98,7 @@ sector_read(void *self, uint8_t *buf, daddr_t sector) b->b_blkno = sector; b->b_cylinder = sector / 100; b->b_bcount = DEV_BSIZE; - b->b_flags &= ~(B_DONE); + b->b_oflags &= ~(BO_DONE); b->b_flags |= B_READ; rw->strategy(b); @@ -134,7 +134,8 @@ sector_write(void *self, uint8_t *buf, daddr_t sector) b->b_blkno = sector; b->b_cylinder = sector / 100; b->b_bcount = DEV_BSIZE; - b->b_flags &= ~(B_READ | B_DONE); + b->b_flags &= ~(B_READ); + b->b_oflags &= ~(BO_DONE); b->b_flags |= B_WRITE; memcpy(b->b_data, buf, DEV_BSIZE); rw->strategy(b); diff --git a/sys/arch/hp300/dev/ct.c b/sys/arch/hp300/dev/ct.c index 3c5ecc07763d..eec45a7c8c36 100644 --- a/sys/arch/hp300/dev/ct.c +++ b/sys/arch/hp300/dev/ct.c @@ -1,4 +1,4 @@ -/* $NetBSD: ct.c,v 1.51 2007/10/17 19:54:22 garbled Exp $ */ +/* $NetBSD: ct.c,v 1.52 2008/01/02 11:48:24 ad Exp $ */ /*- * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc. @@ -82,7 +82,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ct.c,v 1.51 2007/10/17 19:54:22 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ct.c,v 1.52 2008/01/02 11:48:24 ad Exp $"); #include #include @@ -468,7 +468,7 @@ ctcommand(dev_t dev, int cmd, int cnt) } while (cnt-- > 0) { - bp->b_flags = B_BUSY; + bp->b_cflags = BC_BUSY; if (cmd == MTBSF) { sc->sc_blkno = sc->sc_eofs[sc->sc_eofp]; sc->sc_eofp--; diff --git a/sys/arch/hp300/dev/mt.c b/sys/arch/hp300/dev/mt.c index a548fd214670..ea85e8274b4f 100644 --- a/sys/arch/hp300/dev/mt.c +++ b/sys/arch/hp300/dev/mt.c @@ -1,4 +1,4 @@ -/* $NetBSD: mt.c,v 1.40 2007/10/17 19:54:23 garbled Exp $ */ +/* $NetBSD: mt.c,v 1.41 2008/01/02 11:48:25 ad Exp $ */ /*- * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc. @@ -67,7 +67,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: mt.c,v 1.40 2007/10/17 19:54:23 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: mt.c,v 1.41 2008/01/02 11:48:25 ad Exp $"); #include #include @@ -435,13 +435,14 @@ mtcommand(dev_t dev, int cmd, int cnt) int error = 0; #if 1 - if (bp->b_flags & B_BUSY) + if (bp->b_cflags & BC_BUSY) return EBUSY; #endif bp->b_cmd = cmd; bp->b_dev = dev; do { - bp->b_flags = B_BUSY | B_CMD; + bp->b_cflags = BC_BUSY; + bp->b_flags = B_CMD; mtstrategy(bp); biowait(bp); if (bp->b_error != 0) { @@ -450,9 +451,9 @@ mtcommand(dev_t dev, int cmd, int cnt) } } while (--cnt > 0); #if 0 - bp->b_flags = 0 /*&= ~B_BUSY*/; + bp->b_flags = 0 /*&= ~BC_BUSY*/; #else - bp->b_flags &= ~B_BUSY; + bp->b_flags &= ~BC_BUSY; #endif return error; } diff --git a/sys/arch/hp300/hp300/disksubr.c b/sys/arch/hp300/hp300/disksubr.c index f480b0945d6c..13a019b9972b 100644 --- a/sys/arch/hp300/hp300/disksubr.c +++ b/sys/arch/hp300/hp300/disksubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: disksubr.c,v 1.26 2007/10/17 19:54:23 garbled Exp $ */ +/* $NetBSD: disksubr.c,v 1.27 2008/01/02 11:48:25 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1988, 1993 @@ -37,7 +37,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.26 2007/10/17 19:54:23 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.27 2008/01/02 11:48:25 ad Exp $"); #include #include @@ -179,7 +179,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp, if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC && dkcksum(dlp) == 0) { *dlp = *lp; - bp->b_flags &= ~(B_READ|B_DONE); + bp->b_oflags &= ~(BO_DONE); + bp->b_flags &= ~(B_READ); bp->b_flags |= B_WRITE; (*strat)(bp); error = biowait(bp); diff --git a/sys/arch/hp700/hp700/disksubr.c b/sys/arch/hp700/hp700/disksubr.c index 17b1d6a594b1..22f54842475a 100644 --- a/sys/arch/hp700/hp700/disksubr.c +++ b/sys/arch/hp700/hp700/disksubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: disksubr.c,v 1.23 2007/10/17 19:54:26 garbled Exp $ */ +/* $NetBSD: disksubr.c,v 1.24 2008/01/02 11:48:25 ad Exp $ */ /* $OpenBSD: disksubr.c,v 1.6 2000/10/18 21:00:34 mickey Exp $ */ @@ -68,7 +68,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.23 2007/10/17 19:54:26 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.24 2008/01/02 11:48:25 ad Exp $"); #include #include @@ -101,7 +101,8 @@ readbsdlabel(struct buf *bp, void (*strat)(struct buf *), int cyl, int sec, bp->b_blkno = sec; bp->b_cylinder = cyl; bp->b_bcount = lp->d_secsize; - bp->b_flags = B_BUSY | B_READ; + bp->b_cflags = BC_BUSY; + bp->b_flags = B_READ; (*strat)(bp); /* if successful, locate disk label within block and validate */ @@ -208,7 +209,8 @@ readliflabel(struct buf *bp, void (*strat)(struct buf *), struct disklabel *lp, /* read LIF volume header */ bp->b_blkno = btodb(HP700_LIF_VOLSTART); bp->b_bcount = lp->d_secsize; - bp->b_flags = B_BUSY | B_READ; + bp->b_cflags = BC_BUSY; + bp->b_flags = B_READ; bp->b_cylinder = btodb(HP700_LIF_VOLSTART) / lp->d_secpercyl; (*strat)(bp); @@ -231,7 +233,8 @@ readliflabel(struct buf *bp, void (*strat)(struct buf *), struct disklabel *lp, /* read LIF directory */ dbp->b_blkno = btodb(HP700_LIF_DIRSTART); dbp->b_bcount = lp->d_secsize; - dbp->b_flags = B_BUSY | B_READ; + dbp->b_cflags = BC_BUSY; + dbp->b_flags = B_READ; dbp->b_cylinder = (HP700_LIF_DIRSTART) / lp->d_secpercyl; (*strat)(dbp); @@ -367,7 +370,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp, *(struct disklabel *)((char *)bp->b_data + labeloffset) = *lp; - bp->b_flags = B_BUSY | B_WRITE; + bp->b_cflags = BC_BUSY; + bp->b_flags = B_WRITE; (*strat)(bp); error = biowait(bp); diff --git a/sys/arch/hpc/hpc/disksubr.c b/sys/arch/hpc/hpc/disksubr.c index 8a1573740672..315b7c6e5d73 100644 --- a/sys/arch/hpc/hpc/disksubr.c +++ b/sys/arch/hpc/hpc/disksubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: disksubr.c,v 1.19 2007/10/17 19:54:27 garbled Exp $ */ +/* $NetBSD: disksubr.c,v 1.20 2008/01/02 11:48:25 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1988 Regents of the University of California. @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.19 2007/10/17 19:54:27 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.20 2008/01/02 11:48:25 ad Exp $"); #include #include @@ -239,7 +239,7 @@ nombrpart: bp->b_blkno = dospartoff + LABELSECTOR; bp->b_cylinder = cyl; bp->b_bcount = lp->d_secsize; - bp->b_flags &= ~(B_DONE); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_READ; (*strat)(bp); @@ -276,7 +276,7 @@ nombrpart: i = 0; do { /* read a bad sector table */ - bp->b_flags &= ~(B_DONE); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_READ; bp->b_blkno = lp->d_secperunit - lp->d_nsectors + i; if (lp->d_secsize > DEV_BSIZE) @@ -424,7 +424,7 @@ nombrpart: bp->b_blkno = dospartoff + LABELSECTOR; bp->b_cylinder = cyl; bp->b_bcount = lp->d_secsize; - bp->b_flags &= ~(B_DONE); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_READ; (*strat)(bp); @@ -438,7 +438,8 @@ nombrpart: if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC && dkcksum(dlp) == 0) { *dlp = *lp; - bp->b_flags &= ~(B_READ|B_DONE); + bp->b_oflags &= ~(BO_DONE); + bp->b_flags &= ~(B_READ); bp->b_flags |= B_WRITE; (*strat)(bp); error = biowait(bp); diff --git a/sys/arch/i386/i386/trap.c b/sys/arch/i386/i386/trap.c index f45070300c22..5ec7041dda7f 100644 --- a/sys/arch/i386/i386/trap.c +++ b/sys/arch/i386/i386/trap.c @@ -1,4 +1,4 @@ -/* $NetBSD: trap.c,v 1.230 2008/01/01 21:28:40 yamt Exp $ */ +/* $NetBSD: trap.c,v 1.231 2008/01/02 11:48:25 ad Exp $ */ /*- * Copyright (c) 1998, 2000, 2005 The NetBSD Foundation, Inc. @@ -75,7 +75,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.230 2008/01/01 21:28:40 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.231 2008/01/02 11:48:25 ad Exp $"); #include "opt_ddb.h" #include "opt_kgdb.h" @@ -472,25 +472,21 @@ copyfault: return; case T_PROTFLT|T_USER: /* protection fault */ - KERNEL_LOCK(1, l); #ifdef VM86 if (frame->tf_eflags & PSL_VM) { vm86_gpfault(l, type & ~T_USER); - KERNEL_UNLOCK_LAST(l); goto out; } #endif /* If pmap_exec_fixup does something, let's retry the trap. */ if (pmap_exec_fixup(&p->p_vmspace->vm_map, frame, &l->l_addr->u_pcb)) { - KERNEL_UNLOCK_LAST(l); goto out; } KSI_INIT_TRAP(&ksi); ksi.ksi_signo = SIGSEGV; ksi.ksi_addr = (void *)rcr2(); ksi.ksi_code = SEGV_ACCERR; - KERNEL_UNLOCK_LAST(l); goto trapsignal; case T_TSSFLT|T_USER: @@ -621,7 +617,6 @@ copyfault: #endif /* defined(XEN) && !defined(XEN3) */ cr2 = FETCH_CR2; - KERNEL_LOCK(1, NULL); goto faultcommon; case T_PAGEFLT|T_USER: { /* page fault */ @@ -632,7 +627,6 @@ copyfault: extern struct vm_map *kernel_map; cr2 = FETCH_CR2; - KERNEL_LOCK(1, l); faultcommon: vm = p->p_vmspace; if (vm == NULL) @@ -673,8 +667,6 @@ copyfault: uvm_grow(p, va); if (type == T_PAGEFLT) { - KERNEL_UNLOCK_ONE(NULL); - /* * we need to switch pmap now if we're in * the middle of copyin/out. @@ -692,7 +684,6 @@ copyfault: } return; } - KERNEL_UNLOCK_LAST(l); goto out; } KSI_INIT_TRAP(&ksi); @@ -707,10 +698,8 @@ copyfault: if (type == T_PAGEFLT) { onfault = onfault_handler(pcb, frame); - if (onfault != NULL) { - KERNEL_UNLOCK_ONE(NULL); + if (onfault != NULL) goto copyfault; - } printf("uvm_fault(%p, %#lx, %d) -> %#x\n", map, va, ftype, error); goto we_re_toast; @@ -725,11 +714,6 @@ copyfault: ksi.ksi_signo = SIGSEGV; } (*p->p_emul->e_trapsignal)(l, &ksi); - if (type != T_PAGEFLT) { - KERNEL_UNLOCK_LAST(l); - } else { - KERNEL_UNLOCK_ONE(NULL); - } break; } @@ -761,9 +745,7 @@ copyfault: else ksi.ksi_code = TRAP_TRACE; ksi.ksi_addr = (void *)frame->tf_eip; - KERNEL_LOCK(1, l); (*p->p_emul->e_trapsignal)(l, &ksi); - KERNEL_UNLOCK_LAST(l); } break; @@ -811,9 +793,7 @@ out: return; trapsignal: ksi.ksi_trap = type & ~T_USER; - KERNEL_LOCK(1, l); (*p->p_emul->e_trapsignal)(l, &ksi); - KERNEL_UNLOCK_LAST(l); userret(l); } @@ -835,7 +815,5 @@ startlwp(arg) } #endif pool_put(&lwp_uc_pool, uc); - - KERNEL_UNLOCK_LAST(l); userret(l); } diff --git a/sys/arch/mac68k/mac68k/disksubr.c b/sys/arch/mac68k/mac68k/disksubr.c index ebe1ced20868..ad12a6f8626e 100644 --- a/sys/arch/mac68k/mac68k/disksubr.c +++ b/sys/arch/mac68k/mac68k/disksubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: disksubr.c,v 1.56 2007/10/17 19:55:14 garbled Exp $ */ +/* $NetBSD: disksubr.c,v 1.57 2008/01/02 11:48:26 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1988 Regents of the University of California. @@ -65,7 +65,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.56 2007/10/17 19:55:14 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.57 2008/01/02 11:48:26 ad Exp $"); #include #include @@ -553,7 +553,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp, if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC && dkcksum(dlp) == 0) { *dlp = *lp; - bp->b_flags &= ~(B_READ|B_DONE); + bp->b_oflags &= ~(BO_DONE); + bp->b_flags &= ~(B_READ); bp->b_flags |= B_WRITE; (*strat)(bp); error = biowait(bp); diff --git a/sys/arch/macppc/macppc/disksubr.c b/sys/arch/macppc/macppc/disksubr.c index bc5c46808662..591714abcea3 100644 --- a/sys/arch/macppc/macppc/disksubr.c +++ b/sys/arch/macppc/macppc/disksubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: disksubr.c,v 1.42 2007/10/17 19:55:32 garbled Exp $ */ +/* $NetBSD: disksubr.c,v 1.43 2008/01/02 11:48:26 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1988 Regents of the University of California. @@ -106,7 +106,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.42 2007/10/17 19:55:32 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.43 2008/01/02 11:48:26 ad Exp $"); #include #include @@ -695,8 +695,9 @@ writedisklabel(dev, strat, lp, osdep) if (error != 0) goto done; - bp->b_flags &= ~(B_READ|B_DONE); + bp->b_flags &= ~B_READ; bp->b_flags |= B_WRITE; + bp->b_oflags &= ~BO_DONE; memcpy((char *)bp->b_data + osdep->cd_labeloffset, (void *)lp, sizeof *lp); diff --git a/sys/arch/mipsco/mipsco/disksubr.c b/sys/arch/mipsco/mipsco/disksubr.c index 10007b55fe21..cdd1fcb616bf 100644 --- a/sys/arch/mipsco/mipsco/disksubr.c +++ b/sys/arch/mipsco/mipsco/disksubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: disksubr.c,v 1.22 2007/12/24 15:06:38 ad Exp $ */ +/* $NetBSD: disksubr.c,v 1.23 2008/01/02 11:48:26 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1988 Regents of the University of California. @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.22 2007/12/24 15:06:38 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.23 2008/01/02 11:48:26 ad Exp $"); #include #include @@ -226,7 +226,8 @@ writedisklabel(dev, strat, lp, clp) goto ioerror; /* Write MIPS RISC/os label to first sector */ - bp->b_flags &= ~(B_READ|B_DONE); + bp->b_flags &= ~(B_READ); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_WRITE; (*strat)(bp); if ((error = biowait(bp)) != 0) @@ -238,7 +239,8 @@ writedisklabel(dev, strat, lp, clp) bp->b_blkno = LABELSECTOR; bp->b_bcount = lp->d_secsize; bp->b_cylinder = bp->b_blkno / lp->d_secpercyl; - bp->b_flags &= ~(B_READ | B_DONE); + bp->b_flags &= ~(B_READ); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_WRITE; (*strat)(bp); error = biowait(bp); diff --git a/sys/arch/news68k/news68k/disksubr.c b/sys/arch/news68k/news68k/disksubr.c index f33077194287..bd2c31db7752 100644 --- a/sys/arch/news68k/news68k/disksubr.c +++ b/sys/arch/news68k/news68k/disksubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: disksubr.c,v 1.30 2007/10/17 19:55:53 garbled Exp $ */ +/* $NetBSD: disksubr.c,v 1.31 2008/01/02 11:48:26 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1988 Regents of the University of California. @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.30 2007/10/17 19:55:53 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.31 2008/01/02 11:48:26 ad Exp $"); #include #include @@ -178,7 +178,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp, if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC && dkcksum(dlp) == 0) { *dlp = *lp; - bp->b_flags &= ~(B_READ|B_DONE); + bp->b_oflags &= ~(BO_DONE); + bp->b_flags &= ~(B_READ); bp->b_flags |= B_WRITE; (*strat)(bp); error = biowait(bp); diff --git a/sys/arch/newsmips/newsmips/disksubr.c b/sys/arch/newsmips/newsmips/disksubr.c index 263162137816..eac5526eb436 100644 --- a/sys/arch/newsmips/newsmips/disksubr.c +++ b/sys/arch/newsmips/newsmips/disksubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: disksubr.c,v 1.27 2007/10/17 19:55:55 garbled Exp $ */ +/* $NetBSD: disksubr.c,v 1.28 2008/01/02 11:48:26 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1988 Regents of the University of California. @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.27 2007/10/17 19:55:55 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.28 2008/01/02 11:48:26 ad Exp $"); #include #include @@ -178,7 +178,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp, if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC && dkcksum(dlp) == 0) { *dlp = *lp; - bp->b_flags &= ~(B_READ|B_DONE); + bp->b_flags &= ~(B_READ); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_WRITE; (*strat)(bp); error = biowait(bp); diff --git a/sys/arch/playstation2/playstation2/disksubr.c b/sys/arch/playstation2/playstation2/disksubr.c index 675b9232e66a..ad28a68592c6 100644 --- a/sys/arch/playstation2/playstation2/disksubr.c +++ b/sys/arch/playstation2/playstation2/disksubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: disksubr.c,v 1.15 2007/10/17 19:56:14 garbled Exp $ */ +/* $NetBSD: disksubr.c,v 1.16 2008/01/02 11:48:26 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1988 Regents of the University of California. @@ -30,7 +30,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.15 2007/10/17 19:56:14 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.16 2008/01/02 11:48:26 ad Exp $"); #include #include @@ -188,7 +188,7 @@ nombrpart: bp->b_blkno = dospartoff + LABELSECTOR; bp->b_cylinder = cyl; bp->b_bcount = lp->d_secsize; - bp->b_flags &= ~(B_DONE); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_READ; (*strat)(bp); @@ -224,7 +224,7 @@ nombrpart: i = 0; do { /* read a bad sector table */ - bp->b_flags &= ~(B_DONE); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_READ; bp->b_blkno = lp->d_secperunit - lp->d_nsectors + i; if (lp->d_secsize > DEV_BSIZE) @@ -363,7 +363,7 @@ nombrpart: bp->b_blkno = dospartoff + LABELSECTOR; bp->b_cylinder = cyl; bp->b_bcount = lp->d_secsize; - bp->b_flags &= ~(B_DONE); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_READ; (*strat)(bp); @@ -376,7 +376,8 @@ nombrpart: if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC && dkcksum(dlp) == 0) { *dlp = *lp; - bp->b_flags &= ~(B_READ|B_DONE); + bp->b_oflags &= ~(BO_DONE); + bp->b_flags &= ~(B_READ); bp->b_flags |= B_WRITE; (*strat)(bp); error = biowait(bp); diff --git a/sys/arch/pmax/pmax/disksubr.c b/sys/arch/pmax/pmax/disksubr.c index a0f4c0c29128..86a37de5f238 100644 --- a/sys/arch/pmax/pmax/disksubr.c +++ b/sys/arch/pmax/pmax/disksubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: disksubr.c,v 1.47 2007/10/17 19:56:15 garbled Exp $ */ +/* $NetBSD: disksubr.c,v 1.48 2008/01/02 11:48:27 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1988 Regents of the University of California. @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.47 2007/10/17 19:56:15 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.48 2008/01/02 11:48:27 ad Exp $"); #include "opt_compat_ultrix.h" @@ -281,7 +281,8 @@ writedisklabel(dev, strat, lp, osdep) if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC && dkcksum(dlp) == 0) { *dlp = *lp; - bp->b_flags &= ~(B_READ|B_DONE); + bp->b_oflags &= ~(BO_DONE); + bp->b_flags &= ~(B_READ); bp->b_flags |= B_WRITE; (*strat)(bp); error = biowait(bp); diff --git a/sys/arch/powerpc/ibm4xx/intr.c b/sys/arch/powerpc/ibm4xx/intr.c index 82ec42977d13..db11b4bfc957 100644 --- a/sys/arch/powerpc/ibm4xx/intr.c +++ b/sys/arch/powerpc/ibm4xx/intr.c @@ -1,4 +1,4 @@ -/* $NetBSD: intr.c,v 1.19 2007/12/03 15:34:11 ad Exp $ */ +/* $NetBSD: intr.c,v 1.20 2008/01/02 11:48:27 ad Exp $ */ /* * Copyright 2002 Wasabi Systems, Inc. @@ -36,7 +36,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.19 2007/12/03 15:34:11 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.20 2008/01/02 11:48:27 ad Exp $"); #include #include @@ -256,13 +256,15 @@ ext_intr(void) disable_irq(i); wrteei(1); - KERNEL_LOCK(1, NULL); ih = intrs[i].is_head; while (ih) { + if (ih->ih_level == IPL_VM) + KERNEL_LOCK(1, NULL); (*ih->ih_fun)(ih->ih_arg); ih = ih->ih_next; + if (ih->ih_level == IPL_VM) + KERNEL_UNLOCK_ONE(NULL); } - KERNEL_UNLOCK_ONE(NULL); mtmsr(msr); if (intrs[i].is_type == IST_LEVEL) diff --git a/sys/arch/powerpc/ibm4xx/trap.c b/sys/arch/powerpc/ibm4xx/trap.c index 04c47ee3f67d..a6080915e91a 100644 --- a/sys/arch/powerpc/ibm4xx/trap.c +++ b/sys/arch/powerpc/ibm4xx/trap.c @@ -1,4 +1,4 @@ -/* $NetBSD: trap.c,v 1.47 2007/11/28 12:22:28 simonb Exp $ */ +/* $NetBSD: trap.c,v 1.48 2008/01/02 11:48:27 ad Exp $ */ /* * Copyright 2001 Wasabi Systems, Inc. @@ -67,7 +67,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.47 2007/11/28 12:22:28 simonb Exp $"); +__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.48 2008/01/02 11:48:27 ad Exp $"); #include "opt_altivec.h" #include "opt_ddb.h" @@ -166,9 +166,7 @@ trap(struct trapframe *frame) ksi.ksi_signo = SIGTRAP; ksi.ksi_trap = EXC_TRC; ksi.ksi_addr = (void *)frame->srr0; - KERNEL_LOCK(1, l); trapsignal(l, &ksi); - KERNEL_UNLOCK_LAST(l); break; /* @@ -183,7 +181,6 @@ trap(struct trapframe *frame) vaddr_t va; struct faultbuf *fb = NULL; - KERNEL_LOCK(1, NULL); va = frame->dar; if (frame->tf_xtra[TF_PID] == KERNEL_PID) { map = kernel_map; @@ -200,7 +197,6 @@ trap(struct trapframe *frame) (ftype & VM_PROT_WRITE) ? "write" : "read", (void *)va, frame->tf_xtra[TF_ESR])); rv = uvm_fault(map, trunc_page(va), ftype); - KERNEL_UNLOCK_ONE(NULL); if (rv == 0) goto done; if ((fb = l->l_addr->u_pcb.pcb_onfault) != NULL) { @@ -221,8 +217,6 @@ trap(struct trapframe *frame) case EXC_DSI|EXC_USER: /* FALLTHROUGH */ case EXC_DTMISS|EXC_USER: - KERNEL_LOCK(1, l); - if (frame->tf_xtra[TF_ESR] & (ESR_DST|ESR_DIZ)) ftype = VM_PROT_WRITE; @@ -234,7 +228,6 @@ trap(struct trapframe *frame) rv = uvm_fault(&p->p_vmspace->vm_map, trunc_page(frame->dar), ftype); if (rv == 0) { - KERNEL_UNLOCK_LAST(l); break; } KSI_INIT_TRAP(&ksi); @@ -250,12 +243,10 @@ trap(struct trapframe *frame) ksi.ksi_signo = SIGKILL; } trapsignal(l, &ksi); - KERNEL_UNLOCK_LAST(l); break; case EXC_ITMISS|EXC_USER: case EXC_ISI|EXC_USER: - KERNEL_LOCK(1, l); ftype = VM_PROT_EXECUTE; DBPRINTF(TDB_ALL, ("trap(EXC_ISI|EXC_USER) at %lx execute fault tf %p\n", @@ -263,7 +254,6 @@ trap(struct trapframe *frame) rv = uvm_fault(&p->p_vmspace->vm_map, trunc_page(frame->srr0), ftype); if (rv == 0) { - KERNEL_UNLOCK_LAST(l); break; } KSI_INIT_TRAP(&ksi); @@ -272,7 +262,6 @@ trap(struct trapframe *frame) ksi.ksi_addr = (void *)frame->srr0; ksi.ksi_code = (rv == EACCES ? SEGV_ACCERR : SEGV_MAPERR); trapsignal(l, &ksi); - KERNEL_UNLOCK_LAST(l); break; case EXC_AST|EXC_USER: @@ -289,7 +278,6 @@ trap(struct trapframe *frame) case EXC_ALI|EXC_USER: - KERNEL_LOCK(1, l); if (fix_unaligned(l, frame) != 0) { KSI_INIT_TRAP(&ksi); ksi.ksi_signo = SIGBUS; @@ -298,7 +286,6 @@ trap(struct trapframe *frame) trapsignal(l, &ksi); } else frame->srr0 += 4; - KERNEL_UNLOCK_LAST(l); break; case EXC_PGM|EXC_USER: @@ -320,9 +307,7 @@ trap(struct trapframe *frame) ksi.ksi_signo = rv; ksi.ksi_trap = EXC_PGM; ksi.ksi_addr = (void *)frame->srr0; - KERNEL_LOCK(1, l); trapsignal(l, &ksi); - KERNEL_UNLOCK_LAST(l); } break; diff --git a/sys/arch/powerpc/oea/pmap.c b/sys/arch/powerpc/oea/pmap.c index 8ca34bb501cc..71ee52fe3116 100644 --- a/sys/arch/powerpc/oea/pmap.c +++ b/sys/arch/powerpc/oea/pmap.c @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.c,v 1.49 2007/12/15 00:39:23 perry Exp $ */ +/* $NetBSD: pmap.c,v 1.50 2008/01/02 11:48:27 ad Exp $ */ /*- * Copyright (c) 2001 The NetBSD Foundation, Inc. * All rights reserved. @@ -70,7 +70,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.49 2007/12/15 00:39:23 perry Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.50 2008/01/02 11:48:27 ad Exp $"); #include "opt_ppcarch.h" #include "opt_altivec.h" @@ -83,6 +83,7 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.49 2007/12/15 00:39:23 perry Exp $"); #include #include /* for evcnt */ #include +#include #if __NetBSD_Version__ < 105010000 #include @@ -515,6 +516,8 @@ mfsrin(vaddr_t va) extern void mfmsr64 (register64_t *result); #endif /* PPC_OEA64_BRIDGE */ +#define PMAP_LOCK() KERNEL_LOCK(1, NULL) +#define PMAP_UNLOCK() KERNEL_UNLOCK_ONE(NULL) static inline register_t pmap_interrupts_off(void) @@ -942,6 +945,8 @@ pmap_pte_spill(struct pmap *pm, vaddr_t addr, bool exec) volatile struct pteg *pteg; volatile struct pte *pt; + PMAP_LOCK(); + ptegidx = va_to_pteg(pm, addr); /* @@ -1015,6 +1020,7 @@ pmap_pte_spill(struct pmap *pm, vaddr_t addr, bool exec) TAILQ_REMOVE(pvoh, pvo, pvo_olink); TAILQ_INSERT_TAIL(pvoh, pvo, pvo_olink); } + PMAP_UNLOCK(); return 1; } source_pvo = pvo; @@ -1040,6 +1046,7 @@ pmap_pte_spill(struct pmap *pm, vaddr_t addr, bool exec) if (source_pvo == NULL) { PMAPCOUNT(ptes_unspilled); + PMAP_UNLOCK(); return 0; } @@ -1112,6 +1119,8 @@ pmap_pte_spill(struct pmap *pm, vaddr_t addr, bool exec) PMAP_PVO_CHECK(victim_pvo); PMAP_PVO_CHECK(source_pvo); + + PMAP_UNLOCK(); return 1; } @@ -1228,6 +1237,7 @@ pmap_pinit(pmap_t pm) * Allocate some segment registers for this pmap. */ pm->pm_refs = 1; + PMAP_LOCK(); for (i = 0; i < NPMAPS; i += VSID_NBPW) { static register_t pmap_vsidcontext; register_t hash; @@ -1267,8 +1277,10 @@ pmap_pinit(pmap_t pm) pm->pm_sr[i] = VSID_MAKE(i, hash) | SR_PRKEY | SR_NOEXEC; #endif + PMAP_UNLOCK(); return; } + PMAP_UNLOCK(); panic("pmap_pinit: out of segments"); } @@ -1278,7 +1290,7 @@ pmap_pinit(pmap_t pm) void pmap_reference(pmap_t pm) { - pm->pm_refs++; + atomic_inc_uint(&pm->pm_refs); } /* @@ -1288,7 +1300,7 @@ pmap_reference(pmap_t pm) void pmap_destroy(pmap_t pm) { - if (--pm->pm_refs == 0) { + if (atomic_dec_uint_nv(&pm->pm_refs) == 0) { pmap_release(pm); pool_put(&pmap_pool, pm); } @@ -1306,6 +1318,7 @@ pmap_release(pmap_t pm) KASSERT(pm->pm_stats.resident_count == 0); KASSERT(pm->pm_stats.wired_count == 0); + PMAP_LOCK(); if (pm->pm_sr[0] == 0) panic("pmap_release"); idx = pm->pm_vsid & (NPMAPS-1); @@ -1314,6 +1327,7 @@ pmap_release(pmap_t pm) KASSERT(pmap_vsid_bitmap[idx] & mask); pmap_vsid_bitmap[idx] &= ~mask; + PMAP_UNLOCK(); } /* @@ -1471,6 +1485,8 @@ pmap_pvo_check(const struct pvo_entry *pvo) volatile struct pte *pt; int failed = 0; + PMAP_LOCK(); + if ((uintptr_t)(pvo+1) >= SEGMENT_LENGTH) panic("pmap_pvo_check: pvo %p: invalid address", pvo); @@ -1563,6 +1579,8 @@ pmap_pvo_check(const struct pvo_entry *pvo) if (failed) panic("pmap_pvo_check: pvo %p, pm %p: bugcheck!", pvo, pvo->pvo_pmap); + + PMAP_UNLOCK(); } #endif /* DEBUG || PMAPCHECK */ @@ -1942,6 +1960,8 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags) u_int pvo_flags; u_int was_exec = 0; + PMAP_LOCK(); + if (__predict_false(!pmap_initialized)) { pvo_head = &pmap_pvo_kunmanaged; pl = &pmap_upvo_pool; @@ -2035,6 +2055,8 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags) DPRINTFN(ENTER, (": error=%d\n", error)); + PMAP_UNLOCK(); + return error; } @@ -2054,6 +2076,8 @@ pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot) DPRINTFN(KENTER, ("pmap_kenter_pa(%#lx,%#lx,%#x)\n", va, pa, prot)); + PMAP_LOCK(); + /* * Assume the page is cache inhibited and access is guarded unless * it's in our available memory array. If it is in the memory array, @@ -2083,6 +2107,8 @@ pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot) if (error != 0) panic("pmap_kenter_pa: failed to enter va %#lx pa %#lx: %d", va, pa, error); + + PMAP_UNLOCK(); } void @@ -2107,6 +2133,7 @@ pmap_remove(pmap_t pm, vaddr_t va, vaddr_t endva) register_t msr; int pteidx; + PMAP_LOCK(); LIST_INIT(&pvol); msr = pmap_interrupts_off(); for (; va < endva; va += PAGE_SIZE) { @@ -2117,6 +2144,7 @@ pmap_remove(pmap_t pm, vaddr_t va, vaddr_t endva) } pmap_interrupts_restore(msr); pmap_pvo_free_list(&pvol); + PMAP_UNLOCK(); } /* @@ -2128,6 +2156,7 @@ pmap_extract(pmap_t pm, vaddr_t va, paddr_t *pap) struct pvo_entry *pvo; register_t msr; + PMAP_LOCK(); /* * If this is a kernel pmap lookup, also check the battable @@ -2149,6 +2178,7 @@ pmap_extract(pmap_t pm, vaddr_t va, paddr_t *pap) (~(batu & BAT_BL) << 15) & ~0x1ffffL; if (pap) *pap = (batl & mask) | (va & ~mask); + PMAP_UNLOCK(); return true; } } else { @@ -2161,14 +2191,17 @@ pmap_extract(pmap_t pm, vaddr_t va, paddr_t *pap) (~(batl & BAT601_BSM) << 17) & ~0x1ffffL; if (pap) *pap = (batl & mask) | (va & ~mask); + PMAP_UNLOCK(); return true; } else if (SR601_VALID_P(sr) && SR601_PA_MATCH_P(sr, va)) { if (pap) *pap = va; + PMAP_UNLOCK(); return true; } } + PMAP_UNLOCK(); return false; #elif defined (PPC_OEA64_BRIDGE) panic("%s: pm: %s, va: 0x%08lx\n", __func__, @@ -2187,6 +2220,7 @@ pmap_extract(pmap_t pm, vaddr_t va, paddr_t *pap) | (va & ADDR_POFF); } pmap_interrupts_restore(msr); + PMAP_UNLOCK(); return pvo != NULL; } @@ -2216,6 +2250,8 @@ pmap_protect(pmap_t pm, vaddr_t va, vaddr_t endva, vm_prot_t prot) return; } + PMAP_LOCK(); + msr = pmap_interrupts_off(); for (; va < endva; va += PAGE_SIZE) { pvo = pmap_pvo_find_va(pm, va, &pteidx); @@ -2261,6 +2297,7 @@ pmap_protect(pmap_t pm, vaddr_t va, vaddr_t endva, vm_prot_t prot) PMAP_PVO_CHECK(pvo); /* sanity check */ } pmap_interrupts_restore(msr); + PMAP_UNLOCK(); } void @@ -2269,6 +2306,7 @@ pmap_unwire(pmap_t pm, vaddr_t va) struct pvo_entry *pvo; register_t msr; + PMAP_LOCK(); msr = pmap_interrupts_off(); pvo = pmap_pvo_find_va(pm, va, NULL); if (pvo != NULL) { @@ -2279,6 +2317,7 @@ pmap_unwire(pmap_t pm, vaddr_t va) PMAP_PVO_CHECK(pvo); /* sanity check */ } pmap_interrupts_restore(msr); + PMAP_UNLOCK(); } /* @@ -2292,6 +2331,8 @@ pmap_page_protect(struct vm_page *pg, vm_prot_t prot) volatile struct pte *pt; register_t msr; + PMAP_LOCK(); + KASSERT(prot != VM_PROT_ALL); LIST_INIT(&pvol); msr = pmap_interrupts_off(); @@ -2356,6 +2397,8 @@ pmap_page_protect(struct vm_page *pg, vm_prot_t prot) } pmap_interrupts_restore(msr); pmap_pvo_free_list(&pvol); + + PMAP_UNLOCK(); } /* @@ -2401,8 +2444,12 @@ pmap_query_bit(struct vm_page *pg, int ptebit) volatile struct pte *pt; register_t msr; - if (pmap_attr_fetch(pg) & ptebit) + PMAP_LOCK(); + + if (pmap_attr_fetch(pg) & ptebit) { + PMAP_UNLOCK(); return true; + } msr = pmap_interrupts_off(); LIST_FOREACH(pvo, vm_page_to_pvoh(pg), pvo_vlink) { @@ -2415,6 +2462,7 @@ pmap_query_bit(struct vm_page *pg, int ptebit) pmap_attr_save(pg, ptebit); PMAP_PVO_CHECK(pvo); /* sanity check */ pmap_interrupts_restore(msr); + PMAP_UNLOCK(); return true; } } @@ -2438,11 +2486,13 @@ pmap_query_bit(struct vm_page *pg, int ptebit) pmap_attr_save(pg, ptebit); PMAP_PVO_CHECK(pvo); /* sanity check */ pmap_interrupts_restore(msr); + PMAP_UNLOCK(); return true; } } } pmap_interrupts_restore(msr); + PMAP_UNLOCK(); return false; } @@ -2455,6 +2505,7 @@ pmap_clear_bit(struct vm_page *pg, int ptebit) register_t msr; int rv = 0; + PMAP_LOCK(); msr = pmap_interrupts_off(); /* @@ -2523,6 +2574,7 @@ pmap_clear_bit(struct vm_page *pg, int ptebit) PMAPCOUNT(exec_synced_clear_modify); } } + PMAP_UNLOCK(); return (rv & ptebit) != 0; } @@ -2533,6 +2585,7 @@ pmap_procwr(struct proc *p, vaddr_t va, size_t len) size_t offset = va & ADDR_POFF; int s; + PMAP_LOCK(); s = splvm(); while (len > 0) { size_t seglen = PAGE_SIZE - offset; @@ -2549,6 +2602,7 @@ pmap_procwr(struct proc *p, vaddr_t va, size_t len) offset = 0; } splx(s); + PMAP_UNLOCK(); } #if defined(DEBUG) || defined(PMAPCHECK) || defined(DDB) @@ -2822,15 +2876,19 @@ pmap_pool_ualloc(struct pool *pp, int flags) { struct pvo_page *pvop; + if (uvm.page_init_done != true) { + return (void *) uvm_pageboot_alloc(PAGE_SIZE); + } + + PMAP_LOCK(); pvop = SIMPLEQ_FIRST(&pmap_upvop_head); if (pvop != NULL) { pmap_upvop_free--; SIMPLEQ_REMOVE_HEAD(&pmap_upvop_head, pvop_link); + PMAP_UNLOCK(); return pvop; } - if (uvm.page_init_done != true) { - return (void *) uvm_pageboot_alloc(PAGE_SIZE); - } + PMAP_UNLOCK(); return pmap_pool_malloc(pp, flags); } @@ -2840,12 +2898,15 @@ pmap_pool_malloc(struct pool *pp, int flags) struct pvo_page *pvop; struct vm_page *pg; + PMAP_LOCK(); pvop = SIMPLEQ_FIRST(&pmap_mpvop_head); if (pvop != NULL) { pmap_mpvop_free--; SIMPLEQ_REMOVE_HEAD(&pmap_mpvop_head, pvop_link); + PMAP_UNLOCK(); return pvop; } + PMAP_UNLOCK(); again: pg = uvm_pagealloc_strat(NULL, 0, NULL, UVM_PGA_USERESERVE, UVM_PGA_STRAT_ONLY, VM_FREELIST_FIRST256); @@ -2870,11 +2931,13 @@ pmap_pool_ufree(struct pool *pp, void *va) return; } #endif + PMAP_LOCK(); pvop = va; SIMPLEQ_INSERT_HEAD(&pmap_upvop_head, pvop, pvop_link); pmap_upvop_free++; if (pmap_upvop_free > pmap_upvop_maxfree) pmap_upvop_maxfree = pmap_upvop_free; + PMAP_UNLOCK(); } void @@ -2882,11 +2945,13 @@ pmap_pool_mfree(struct pool *pp, void *va) { struct pvo_page *pvop; + PMAP_LOCK(); pvop = va; SIMPLEQ_INSERT_HEAD(&pmap_mpvop_head, pvop, pvop_link); pmap_mpvop_free++; if (pmap_mpvop_free > pmap_mpvop_maxfree) pmap_mpvop_maxfree = pmap_mpvop_free; + PMAP_UNLOCK(); #if 0 uvm_pagefree(PHYS_TO_VM_PAGE((paddr_t) va)); #endif diff --git a/sys/arch/powerpc/powerpc/syscall.c b/sys/arch/powerpc/powerpc/syscall.c index 19b54e89e389..3165a0818793 100644 --- a/sys/arch/powerpc/powerpc/syscall.c +++ b/sys/arch/powerpc/powerpc/syscall.c @@ -1,4 +1,4 @@ -/* $NetBSD: syscall.c,v 1.36 2007/11/05 20:43:04 ad Exp $ */ +/* $NetBSD: syscall.c,v 1.37 2008/01/02 11:48:27 ad Exp $ */ /* * Copyright (C) 2002 Matt Thomas @@ -60,7 +60,7 @@ #define EMULNAME(x) (x) #define EMULNAMEU(x) (x) -__KERNEL_RCSID(0, "$NetBSD: syscall.c,v 1.36 2007/11/05 20:43:04 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: syscall.c,v 1.37 2008/01/02 11:48:27 ad Exp $"); void child_return(void *arg) @@ -135,11 +135,9 @@ EMULNAME(syscall_plain)(struct trapframe *frame) if (argsize > n * sizeof(register_t)) { memcpy(args, params, n * sizeof(register_t)); - KERNEL_LOCK(1, l); error = copyin(MOREARGS(frame->fixreg[1]), args + n, argsize - n * sizeof(register_t)); - KERNEL_UNLOCK_LAST(l); if (error) goto bad; params = args; diff --git a/sys/arch/powerpc/powerpc/trap.c b/sys/arch/powerpc/powerpc/trap.c index 51edc39e67f0..b71d93dfe651 100644 --- a/sys/arch/powerpc/powerpc/trap.c +++ b/sys/arch/powerpc/powerpc/trap.c @@ -1,4 +1,4 @@ -/* $NetBSD: trap.c,v 1.122 2007/10/24 14:50:39 ad Exp $ */ +/* $NetBSD: trap.c,v 1.123 2008/01/02 11:48:27 ad Exp $ */ /* * Copyright (C) 1995, 1996 Wolfgang Solfrank. @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.122 2007/10/24 14:50:39 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.123 2008/01/02 11:48:27 ad Exp $"); #include "opt_altivec.h" #include "opt_ddb.h" @@ -112,9 +112,7 @@ trap(struct trapframe *frame) ksi.ksi_trap = EXC_TRC; ksi.ksi_addr = (void *)frame->srr0; ksi.ksi_code = TRAP_TRACE; - KERNEL_LOCK(1, l); (*p->p_emul->e_trapsignal)(l, &ksi); - KERNEL_UNLOCK_LAST(l); } break; case EXC_DSI: { @@ -126,7 +124,6 @@ trap(struct trapframe *frame) * Only query UVM if no interrupts are active. */ if (ci->ci_intrdepth < 0) { - KERNEL_LOCK(1, NULL); if ((va >> ADDR_SR_SHFT) == pcb->pcb_kmapsr) { va &= ADDR_PIDX | ADDR_POFF; va |= pcb->pcb_umapsr << ADDR_SR_SHFT; @@ -137,8 +134,6 @@ trap(struct trapframe *frame) vm_map_pmap(map)->pm_ste_evictions > 0 && pmap_ste_spill(vm_map_pmap(map), trunc_page(va), false)) { - /* KERNEL_UNLOCK_LAST(l); */ - KERNEL_UNLOCK_ONE(NULL); return; } #endif @@ -147,8 +142,6 @@ trap(struct trapframe *frame) vm_map_pmap(map)->pm_evictions > 0 && pmap_pte_spill(vm_map_pmap(map), trunc_page(va), false)) { - /* KERNEL_UNLOCK_LAST(l); */ - KERNEL_UNLOCK_ONE(NULL); return; } #if defined(DIAGNOSTIC) && (defined(PPC_OEA) || defined (PPC_OEA64_BRIDGE)) @@ -180,9 +173,7 @@ trap(struct trapframe *frame) */ if (rv == 0) uvm_grow(p, trunc_page(va)); - /* KERNEL_UNLOCK_LAST(l); */ } - KERNEL_UNLOCK_ONE(NULL); if (rv == 0) return; if (rv == EACCES) @@ -210,7 +201,6 @@ trap(struct trapframe *frame) goto brain_damage2; } case EXC_DSI|EXC_USER: - KERNEL_LOCK(1, l); ci->ci_ev_udsi.ev_count++; if (frame->dsisr & DSISR_STORE) ftype = VM_PROT_WRITE; @@ -228,7 +218,6 @@ trap(struct trapframe *frame) vm_map_pmap(map)->pm_ste_evictions > 0 && pmap_ste_spill(vm_map_pmap(map), trunc_page(frame->dar), false)) { - KERNEL_UNLOCK_LAST(l); break; } #endif @@ -237,7 +226,6 @@ trap(struct trapframe *frame) vm_map_pmap(map)->pm_evictions > 0 && pmap_pte_spill(vm_map_pmap(map), trunc_page(frame->dar), false)) { - KERNEL_UNLOCK_LAST(l); break; } @@ -247,7 +235,6 @@ trap(struct trapframe *frame) * Record any stack growth... */ uvm_grow(p, trunc_page(frame->dar)); - KERNEL_UNLOCK_LAST(l); break; } ci->ci_ev_udsi_fatal.ev_count++; @@ -273,7 +260,6 @@ trap(struct trapframe *frame) ksi.ksi_signo = SIGKILL; } (*p->p_emul->e_trapsignal)(l, &ksi); - KERNEL_UNLOCK_LAST(l); break; case EXC_ISI: @@ -284,7 +270,6 @@ trap(struct trapframe *frame) goto brain_damage2; case EXC_ISI|EXC_USER: - KERNEL_LOCK(1, l); ci->ci_ev_isi.ev_count++; /* @@ -297,7 +282,6 @@ trap(struct trapframe *frame) if (vm_map_pmap(map)->pm_ste_evictions > 0 && pmap_ste_spill(vm_map_pmap(map), trunc_page(frame->srr0), true)) { - KERNEL_UNLOCK_LAST(l); break; } #endif @@ -305,14 +289,12 @@ trap(struct trapframe *frame) if (vm_map_pmap(map)->pm_evictions > 0 && pmap_pte_spill(vm_map_pmap(map), trunc_page(frame->srr0), true)) { - KERNEL_UNLOCK_LAST(l); break; } ftype = VM_PROT_EXECUTE; rv = uvm_fault(map, trunc_page(frame->srr0), ftype); if (rv == 0) { - KERNEL_UNLOCK_LAST(l); break; } ci->ci_ev_isi_fatal.ev_count++; @@ -327,7 +309,6 @@ trap(struct trapframe *frame) ksi.ksi_addr = (void *)frame->srr0; ksi.ksi_code = (rv == EACCES ? SEGV_ACCERR : SEGV_MAPERR); (*p->p_emul->e_trapsignal)(l, &ksi); - KERNEL_UNLOCK_LAST(l); break; case EXC_FPU|EXC_USER: @@ -340,7 +321,6 @@ trap(struct trapframe *frame) case EXC_AST|EXC_USER: ci->ci_astpending = 0; /* we are about to do it */ - KERNEL_LOCK(1, l); uvmexp.softs++; if (l->l_pflag & LP_OWEUPC) { l->l_flag &= ~LP_OWEUPC; @@ -349,11 +329,9 @@ trap(struct trapframe *frame) /* Check whether we are being preempted. */ if (ci->ci_want_resched) preempt(); - KERNEL_UNLOCK_LAST(l); break; case EXC_ALI|EXC_USER: - KERNEL_LOCK(1, l); ci->ci_ev_ali.ev_count++; if (fix_unaligned(l, frame) != 0) { ci->ci_ev_ali_fatal.ev_count++; @@ -371,7 +349,6 @@ trap(struct trapframe *frame) (*p->p_emul->e_trapsignal)(l, &ksi); } else frame->srr0 += 4; - KERNEL_UNLOCK_LAST(l); break; case EXC_PERF|EXC_USER: @@ -384,7 +361,6 @@ trap(struct trapframe *frame) enable_vec(); break; #else - KERNEL_LOCK(1, l); if (cpu_printfataltraps) { printf("trap: pid %d.%d (%s): user VEC trap @ %#lx " "(SRR1=%#lx)\n", @@ -397,7 +373,6 @@ trap(struct trapframe *frame) ksi.ksi_addr = (void *)frame->srr0; ksi.ksi_code = ILL_ILLOPC; (*p->p_emul->e_trapsignal)(l, &ksi); - KERNEL_UNLOCK_LAST(l); break; #endif case EXC_MCHK|EXC_USER: @@ -412,15 +387,12 @@ trap(struct trapframe *frame) ksi.ksi_trap = EXC_MCHK; ksi.ksi_addr = (void *)frame->srr0; ksi.ksi_code = BUS_OBJERR; - KERNEL_LOCK(1, l); (*p->p_emul->e_trapsignal)(l, &ksi); - KERNEL_UNLOCK_LAST(l); break; case EXC_PGM|EXC_USER: ci->ci_ev_pgm.ev_count++; if (frame->srr1 & 0x00020000) { /* Bit 14 is set if trap */ - KERNEL_LOCK(1, l); if (p->p_raslist == NULL || ras_lookup(p, (void *)frame->srr0) == (void *) -1) { KSI_INIT_TRAP(&ksi); @@ -433,7 +405,6 @@ trap(struct trapframe *frame) /* skip the trap instruction */ frame->srr0 += 4; } - KERNEL_UNLOCK_LAST(l); } else { KSI_INIT_TRAP(&ksi); ksi.ksi_signo = SIGILL; @@ -454,9 +425,7 @@ trap(struct trapframe *frame) printf("trap: pid %d.%d (%s): user PGM trap @" " %#lx (SRR1=%#lx)\n", p->p_pid, l->l_lid, p->p_comm, frame->srr0, frame->srr1); - KERNEL_LOCK(1, l); (*p->p_emul->e_trapsignal)(l, &ksi); - KERNEL_UNLOCK_LAST(l); } break; @@ -918,6 +887,5 @@ startlwp(void *arg) } #endif pool_put(&lwp_uc_pool, uc); - KERNEL_UNLOCK_LAST(l); userret(l, frame); } diff --git a/sys/arch/sandpoint/sandpoint/disksubr.c b/sys/arch/sandpoint/sandpoint/disksubr.c index 15790ccec9cf..a68e531d346b 100644 --- a/sys/arch/sandpoint/sandpoint/disksubr.c +++ b/sys/arch/sandpoint/sandpoint/disksubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: disksubr.c,v 1.16 2007/10/17 19:56:59 garbled Exp $ */ +/* $NetBSD: disksubr.c,v 1.17 2008/01/02 11:48:28 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1988 Regents of the University of California. @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.16 2007/10/17 19:56:59 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.17 2008/01/02 11:48:28 ad Exp $"); #include #include @@ -177,7 +177,8 @@ readdisklabel(dev, strat, lp, osdep) /* read master boot record */ bp->b_blkno = MBR_BBSECTOR; bp->b_bcount = lp->d_secsize; - bp->b_flags = B_BUSY | B_READ; + bp->b_cflags = BC_BUSY; + bp->b_flags = B_READ; bp->b_cylinder = MBR_BBSECTOR / lp->d_secpercyl; (*strat)(bp); @@ -246,7 +247,8 @@ nombrpart: bp->b_blkno = dospartoff + LABELSECTOR; bp->b_cylinder = cyl; bp->b_bcount = lp->d_secsize; - bp->b_flags = B_BUSY | B_READ; + bp->b_cflags = BC_BUSY; + bp->b_flags = B_READ; (*strat)(bp); /* if successful, locate disk label within block and validate */ @@ -282,7 +284,8 @@ nombrpart: i = 0; do { /* read a bad sector table */ - bp->b_flags = B_BUSY | B_READ; + bp->b_cflags = BC_BUSY; + bp->b_flags = B_READ; bp->b_blkno = lp->d_secperunit - lp->d_nsectors + i; if (lp->d_secsize > DEV_BSIZE) bp->b_blkno *= lp->d_secsize / DEV_BSIZE; @@ -401,7 +404,8 @@ writedisklabel(dev, strat, lp, osdep) /* read master boot record */ bp->b_blkno = MBR_BBSECTOR; bp->b_bcount = lp->d_secsize; - bp->b_flags = B_BUSY | B_READ; + bp->b_cflags = BC_BUSY; + bp->b_flags = B_READ; bp->b_cylinder = MBR_BBSECTOR / lp->d_secpercyl; (*strat)(bp); @@ -434,7 +438,8 @@ nombrpart: bp->b_blkno = dospartoff + LABELSECTOR; bp->b_cylinder = cyl; bp->b_bcount = lp->d_secsize; - bp->b_flags = B_BUSY | B_READ; + bp->b_cflags = BC_BUSY; + bp->b_flags = B_READ; (*strat)(bp); /* if successful, locate disk label within block and validate */ @@ -447,7 +452,8 @@ nombrpart: if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC && dkcksum(dlp) == 0) { *dlp = *lp; - bp->b_flags = B_BUSY | B_WRITE; + bp->b_cflags = BC_BUSY; + bp->b_flags = B_WRITE; (*strat)(bp); error = biowait(bp); goto done; diff --git a/sys/arch/sbmips/sbmips/disksubr.c b/sys/arch/sbmips/sbmips/disksubr.c index a18d1237595b..3e2355963e0e 100644 --- a/sys/arch/sbmips/sbmips/disksubr.c +++ b/sys/arch/sbmips/sbmips/disksubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: disksubr.c,v 1.16 2007/10/17 19:57:02 garbled Exp $ */ +/* $NetBSD: disksubr.c,v 1.17 2008/01/02 11:48:28 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1988 Regents of the University of California. @@ -30,7 +30,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.16 2007/10/17 19:57:02 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.17 2008/01/02 11:48:28 ad Exp $"); #include #include @@ -146,7 +146,8 @@ readdisklabel(dev, strat, lp, osdep) /* read master boot record */ bp->b_blkno = MBR_BBSECTOR; bp->b_bcount = lp->d_secsize; - bp->b_flags = B_BUSY | B_READ; + bp->b_cflags = BC_BUSY; + bp->b_flags |= B_READ; bp->b_cylinder = MBR_BBSECTOR / lp->d_secpercyl; (*strat)(bp); @@ -194,7 +195,8 @@ nombrpart: bp->b_blkno = dospartoff + LABELSECTOR; bp->b_cylinder = cyl; bp->b_bcount = lp->d_secsize; - bp->b_flags = B_BUSY | B_READ; + bp->b_cflags = BC_BUSY; + bp->b_flags = B_READ; (*strat)(bp); /* if successful, locate disk label within block and validate */ @@ -230,7 +232,8 @@ nombrpart: i = 0; do { /* read a bad sector table */ - bp->b_flags = B_BUSY | B_READ; + bp->b_cflags = BC_BUSY; + bp->b_flags = B_READ; bp->b_blkno = lp->d_secperunit - lp->d_nsectors + i; if (lp->d_secsize > DEV_BSIZE) bp->b_blkno *= lp->d_secsize / DEV_BSIZE; @@ -349,7 +352,8 @@ writedisklabel(dev, strat, lp, osdep) /* read master boot record */ bp->b_blkno = MBR_BBSECTOR; bp->b_bcount = lp->d_secsize; - bp->b_flags = B_BUSY | B_READ; + bp->b_cflags = BC_BUSY; + bp->b_flags = B_READ; bp->b_cylinder = MBR_BBSECTOR / lp->d_secpercyl; (*strat)(bp); @@ -373,7 +377,8 @@ nombrpart: bp->b_blkno = dospartoff + LABELSECTOR; bp->b_cylinder = cyl; bp->b_bcount = lp->d_secsize; - bp->b_flags = B_BUSY | B_READ; + bp->b_cflags = BC_BUSY; + bp->b_flags = B_READ; (*strat)(bp); /* if successful, locate disk label within block and validate */ @@ -386,7 +391,8 @@ nombrpart: if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC && dkcksum(dlp) == 0) { *dlp = *lp; - bp->b_flags = B_BUSY | B_WRITE; + bp->b_cflags = BC_BUSY; + bp->b_flags = B_WRITE; (*strat)(bp); error = biowait(bp); goto done; diff --git a/sys/arch/sgimips/sgimips/disksubr.c b/sys/arch/sgimips/sgimips/disksubr.c index ee78f7b9449c..9aab0a033a68 100644 --- a/sys/arch/sgimips/sgimips/disksubr.c +++ b/sys/arch/sgimips/sgimips/disksubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: disksubr.c,v 1.22 2007/12/24 15:06:38 ad Exp $ */ +/* $NetBSD: disksubr.c,v 1.23 2008/01/02 11:48:28 ad Exp $ */ /* * Copyright (c) 2001 Christopher Sekiya @@ -35,7 +35,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.22 2007/12/24 15:06:38 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.23 2008/01/02 11:48:28 ad Exp $"); #include #include @@ -197,7 +197,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp, str goto ioerror; /* Write sgimips label to first sector */ - bp->b_flags &= ~(B_READ|B_DONE); + bp->b_oflags &= ~(BO_DONE); + bp->b_flags &= ~(B_READ); bp->b_flags |= B_WRITE; (*strat)(bp); if ((error = biowait(bp)) != 0) @@ -209,7 +210,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp, str bp->b_blkno = LABELSECTOR; bp->b_bcount = lp->d_secsize; bp->b_cylinder = bp->b_blkno / lp->d_secpercyl; - bp->b_flags &= ~(B_READ | B_DONE); + bp->b_oflags &= ~(BO_DONE); + bp->b_flags &= ~(B_READ); bp->b_flags |= B_WRITE; (*strat)(bp); error = biowait(bp); diff --git a/sys/arch/sh3/sh3/disksubr.c b/sys/arch/sh3/sh3/disksubr.c index 0703cca26910..7def17440a0c 100644 --- a/sys/arch/sh3/sh3/disksubr.c +++ b/sys/arch/sh3/sh3/disksubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: disksubr.c,v 1.26 2007/10/17 19:57:07 garbled Exp $ */ +/* $NetBSD: disksubr.c,v 1.27 2008/01/02 11:48:28 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1988 Regents of the University of California. @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.26 2007/10/17 19:57:07 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.27 2008/01/02 11:48:28 ad Exp $"); #include "opt_mbr.h" @@ -410,7 +410,7 @@ readdisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp, bp->b_blkno = dospartoff + LABELSECTOR; bp->b_cylinder = cyl; bp->b_bcount = lp->d_secsize; - bp->b_flags &= ~(B_DONE); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_READ; (*strat)(bp); @@ -485,7 +485,7 @@ readdisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp, i = 0; do { /* read a bad sector table */ - bp->b_flags &= ~(B_DONE); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_READ; bp->b_blkno = lp->d_secperunit - lp->d_nsectors + i; if (lp->d_secsize > DEV_BSIZE) @@ -633,7 +633,7 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp, bp->b_blkno = dospartoff + LABELSECTOR; bp->b_cylinder = cyl; bp->b_bcount = lp->d_secsize; - bp->b_flags &= ~(B_DONE); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_READ; (*strat)(bp); @@ -681,7 +681,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), struct disklabel *lp, goto done; found: - bp->b_flags &= ~(B_READ|B_DONE); + bp->b_oflags &= ~(BO_DONE); + bp->b_flags &= ~(B_READ); bp->b_flags |= B_WRITE; (*strat)(bp); error = biowait(bp); diff --git a/sys/arch/sparc/dev/fd.c b/sys/arch/sparc/dev/fd.c index baec3e94b94e..672b93168013 100644 --- a/sys/arch/sparc/dev/fd.c +++ b/sys/arch/sparc/dev/fd.c @@ -1,4 +1,4 @@ -/* $NetBSD: fd.c,v 1.138 2007/11/27 21:56:06 ad Exp $ */ +/* $NetBSD: fd.c,v 1.139 2008/01/02 11:48:28 ad Exp $ */ /*- * Copyright (c) 2000 The NetBSD Foundation, Inc. @@ -108,7 +108,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: fd.c,v 1.138 2007/11/27 21:56:06 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: fd.c,v 1.139 2008/01/02 11:48:28 ad Exp $"); #include "opt_ddb.h" #include "opt_md.h" @@ -2139,12 +2139,13 @@ fdformat(dev_t dev, struct ne7_fd_formb *finfo, struct proc *p) struct buf *bp; /* set up a buffer header for fdstrategy() */ - bp = getiobuf_nowait(); + bp = getiobuf(NULL, false); if (bp == NULL) return (ENOBUFS); bp->b_vp = NULL; - bp->b_flags = B_BUSY | B_PHYS | B_FORMAT; + bp->b_cflags = BC_BUSY; + bp->b_flags = B_PHYS | B_FORMAT; bp->b_proc = p; bp->b_dev = dev; @@ -2322,14 +2323,13 @@ fd_read_md_image(size_t *sizep, void * *addrp) bp->b_error = 0; bp->b_resid = 0; bp->b_proc = NULL; - bp->b_flags = B_BUSY | B_PHYS | B_RAW | B_READ; + bp->b_cflags |= BC_BUSY; + bp->b_flags = B_PHYS | B_RAW | B_READ; bp->b_blkno = btodb(offset); bp->b_bcount = DEV_BSIZE; bp->b_data = addr; fdstrategy(bp); - while ((bp->b_flags & B_DONE) == 0) { - tsleep((void *)bp, PRIBIO + 1, "physio", 0); - } + biowait(bp); if (bp->b_error) panic("fd: mountroot: fdread error %d", bp->b_error); diff --git a/sys/arch/sparc/include/mutex.h b/sys/arch/sparc/include/mutex.h index df7592ff737e..df7a957edb03 100644 --- a/sys/arch/sparc/include/mutex.h +++ b/sys/arch/sparc/include/mutex.h @@ -1,4 +1,4 @@ -/* $NetBSD: mutex.h,v 1.6 2007/11/21 10:19:08 yamt Exp $ */ +/* $NetBSD: mutex.h,v 1.7 2008/01/02 11:48:28 ad Exp $ */ /*- * Copyright (c) 2002, 2007 The NetBSD Foundation, Inc. @@ -89,6 +89,8 @@ struct kmutex { #else /* __MUTEX_PRIVATE */ +#include + struct kmutex { union { /* Adaptive mutex */ diff --git a/sys/arch/sparc/include/pmap.h b/sys/arch/sparc/include/pmap.h index b5c0a223810b..22c8ec4e75d0 100644 --- a/sys/arch/sparc/include/pmap.h +++ b/sys/arch/sparc/include/pmap.h @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.h,v 1.81 2007/10/17 19:57:13 garbled Exp $ */ +/* $NetBSD: pmap.h,v 1.82 2008/01/02 11:48:29 ad Exp $ */ /* * Copyright (c) 1996 @@ -143,7 +143,6 @@ struct pmap { union ctxinfo *pm_ctx; /* current context, if any */ int pm_ctxnum; /* current context's number */ u_int pm_cpuset; /* CPU's this pmap has context on */ - struct simplelock pm_lock; /* spinlock */ int pm_refcount; /* just what it says */ struct mmuhd pm_reglist; /* MMU regions on this pmap (4/4c) */ diff --git a/sys/arch/sparc/include/vmparam.h b/sys/arch/sparc/include/vmparam.h index 0453aff2c7c8..3abbe8a2a930 100644 --- a/sys/arch/sparc/include/vmparam.h +++ b/sys/arch/sparc/include/vmparam.h @@ -1,4 +1,4 @@ -/* $NetBSD: vmparam.h,v 1.38 2006/02/07 16:55:31 chs Exp $ */ +/* $NetBSD: vmparam.h,v 1.39 2008/01/02 11:48:29 ad Exp $ */ /* * Copyright (c) 1992, 1993 @@ -142,7 +142,6 @@ struct vm_page_md { vaddr_t pv_va; /* virtual address */ int pv_flags; /* flags (below) */ } pvlisthead; - struct simplelock pv_slock; }; #define VM_MDPAGE_PVHEAD(pg) (&(pg)->mdpage.pvlisthead) @@ -151,7 +150,6 @@ struct vm_page_md { (pg)->mdpage.pvlisthead.pv_pmap = NULL; \ (pg)->mdpage.pvlisthead.pv_va = 0; \ (pg)->mdpage.pvlisthead.pv_flags = 0; \ - simple_lock_init(&(pg)->mdpage.pv_slock); \ } while(/*CONSTCOND*/0) #endif /* _SPARC_VMPARAM_H_ */ diff --git a/sys/arch/sparc/sparc/pmap.c b/sys/arch/sparc/sparc/pmap.c index 0311c479bad9..78a59d785575 100644 --- a/sys/arch/sparc/sparc/pmap.c +++ b/sys/arch/sparc/sparc/pmap.c @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.c,v 1.321 2007/11/16 23:46:20 martin Exp $ */ +/* $NetBSD: pmap.c,v 1.322 2008/01/02 11:48:29 ad Exp $ */ /* * Copyright (c) 1996 @@ -56,12 +56,10 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.321 2007/11/16 23:46:20 martin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.322 2008/01/02 11:48:29 ad Exp $"); #include "opt_ddb.h" #include "opt_kgdb.h" -#include "opt_lockdebug.h" -#include "opt_multiprocessor.h" #include "opt_sparc_arch.h" #include @@ -69,12 +67,12 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.321 2007/11/16 23:46:20 martin Exp $"); #include #include #include -#include #include #include #include #include #include +#include #include @@ -176,58 +174,8 @@ paddr_t vm_first_phys = (paddr_t)-1; paddr_t vm_last_phys = 0; psize_t vm_num_phys; -/* - * Locking: - * - * This pmap module uses two types of locks: `normal' (sleep) - * locks and `simple' (spin) locks. They are used as follows: - * - * READ/WRITE SPIN LOCKS - * --------------------- - * - * * pmap_main_lock - This lock is used to prevent deadlock and/or - * provide mutex access to the pmap module. Most operations lock - * the pmap first, then PV lists as needed. However, some operations, - * such as pmap_page_protect(), lock the PV lists before locking - * the pmaps. To prevent deadlock, we require a mutex lock on the - * pmap module if locking in the PV->pmap direction. This is - * implemented by acquiring a (shared) read lock on pmap_main_lock - * if locking pmap->PV and a (exclusive) write lock if locking in - * the PV->pmap direction. Since only one thread can hold a write - * lock at a time, this provides the mutex. - * - * SIMPLE LOCKS - * ------------ - * - * * pm_slock (per-pmap) - This lock protects all of the members - * of the pmap structure itself. Note that in the case of the - * kernel pmap, interrupts which cause memory allocation *must* - * be blocked while this lock is asserted. - * - * * pv_slock (per-vm_page) - This lock protects the PV list - * for a specified managed page. - * - * All internal functions which operate on a pmap are called - * with the pmap already locked by the caller (which will be - * an interface function). - */ -/* struct lock pmap_main_lock; */ - -#if 0 /* defined(MULTIPROCESSOR) || defined(LOCKDEBUG) */ -#define PMAP_MAP_TO_HEAD_LOCK() \ - spinlockmgr(&pmap_main_lock, LK_SHARED, NULL) -#define PMAP_MAP_TO_HEAD_UNLOCK() \ - spinlockmgr(&pmap_main_lock, LK_RELEASE, NULL) -#define PMAP_HEAD_TO_MAP_LOCK() \ - spinlockmgr(&pmap_main_lock, LK_EXCLUSIVE, NULL) -#define PMAP_HEAD_TO_MAP_UNLOCK() \ - spinlockmgr(&pmap_main_lock, LK_RELEASE, NULL) -#else -#define PMAP_MAP_TO_HEAD_LOCK() /* nothing */ -#define PMAP_MAP_TO_HEAD_UNLOCK() /* nothing */ -#define PMAP_HEAD_TO_MAP_LOCK() /* nothing */ -#define PMAP_HEAD_TO_MAP_UNLOCK() /* nothing */ -#endif /* MULTIPROCESSOR || LOCKDEBUG */ +#define PMAP_LOCK() KERNEL_LOCK(1, NULL) +#define PMAP_UNLOCK() KERNEL_UNLOCK_ONE(NULL) /* * Flags in pvlist.pv_flags. Note that PV_MOD must be 1 and PV_REF must be 2 @@ -365,6 +313,8 @@ struct mmuq region_freelist, region_lru, region_locked; int seginval; /* [4/4c] the invalid segment number */ int reginval; /* [4/3mmu] the invalid region number */ +static kmutex_t demap_lock; + /* * (sun4/4c) * A context is simply a small number that dictates which set of 4096 @@ -387,7 +337,7 @@ union ctxinfo { struct pmap *c_pmap; /* pmap (if busy) */ }; -static struct simplelock ctx_lock; /* lock for below */ +static kmutex_t ctx_lock; /* lock for below */ union ctxinfo *ctxinfo; /* allocated at in pmap_bootstrap */ union ctxinfo *ctx_freelist; /* context free list */ int ctx_kick; /* allocation rover when none free */ @@ -823,8 +773,6 @@ VA2PA(void *addr) * PTE at the same time we are. This is the procedure that is * recommended in the SuperSPARC user's manual. */ -static struct simplelock demap_lock = SIMPLELOCK_INITIALIZER; - int updatepte4m(vaddr_t va, int *pte, int bic, int bis, int ctx, u_int cpuset) { @@ -835,7 +783,7 @@ updatepte4m(vaddr_t va, int *pte, int bic, int bis, int ctx, u_int cpuset) * Can only be one of these happening in the system * at any one time. */ - simple_lock(&demap_lock); + mutex_spin_enter(&demap_lock); /* * The idea is to loop swapping zero into the pte, flushing @@ -854,7 +802,7 @@ updatepte4m(vaddr_t va, int *pte, int bic, int bis, int ctx, u_int cpuset) swapval = (oldval & ~bic) | bis; swap(vpte, swapval); - simple_unlock(&demap_lock); + mutex_spin_exit(&demap_lock); return (oldval); } @@ -1732,7 +1680,6 @@ me_alloc(struct mmuq *mh, struct pmap *newpm, int newvreg, int newvseg) } while (--i > 0); /* update segment tables */ - simple_lock(&pm->pm_lock); if (CTX_USABLE(pm,rp)) { va = VSTOVA(me->me_vreg,me->me_vseg); if (pm != pmap_kernel() || HASSUN4_MMU3L) @@ -1749,7 +1696,6 @@ me_alloc(struct mmuq *mh, struct pmap *newpm, int newvreg, int newvseg) /* off old pmap chain */ TAILQ_REMOVE(&pm->pm_seglist, me, me_pmchain); - simple_unlock(&pm->pm_lock); setcontext4(ctx); /* onto new pmap chain; new pmap is already locked, if needed */ @@ -1897,14 +1843,12 @@ region_alloc(struct mmuq *mh, struct pmap *newpm, int newvr) } /* update region tables */ - simple_lock(&pm->pm_lock); /* what if other CPU takes mmuentry ?? */ if (pm->pm_ctx) setregmap(VRTOVA(me->me_vreg), reginval); rp->rg_smeg = reginval; /* off old pmap chain */ TAILQ_REMOVE(&pm->pm_reglist, me, me_pmchain); - simple_unlock(&pm->pm_lock); setcontext4(ctx); /* done with old context */ /* onto new pmap chain; new pmap is already locked, if needed */ @@ -2055,6 +1999,8 @@ mmu_pagein(struct pmap *pm, vaddr_t va, int prot) struct regmap *rp; struct segmap *sp; + PMAP_LOCK(); + if (prot != VM_PROT_NONE) bits = PG_V | ((prot & VM_PROT_WRITE) ? PG_W : 0); else @@ -2065,8 +2011,10 @@ mmu_pagein(struct pmap *pm, vaddr_t va, int prot) rp = &pm->pm_regmap[vr]; /* return 0 if we have no PMEGs to load */ - if (rp->rg_nsegmap == 0) + if (rp->rg_nsegmap == 0) { + PMAP_UNLOCK(); return (0); + } #ifdef DIAGNOSTIC if (rp->rg_segmap == NULL) @@ -2080,14 +2028,19 @@ mmu_pagein(struct pmap *pm, vaddr_t va, int prot) sp = &rp->rg_segmap[vs]; /* return 0 if we have no PTEs to load */ - if (sp->sg_npte == 0) + if (sp->sg_npte == 0) { + PMAP_UNLOCK(); return (0); + } /* return -1 if the fault is `hard', 0 if not */ - if (sp->sg_pmeg != seginval) + if (sp->sg_pmeg != seginval) { + PMAP_UNLOCK(); return (bits && (getpte4(va) & bits) == bits ? -1 : 0); + } mmu_pagein_seg(pm, sp, va, vr, vs, &segm_lru); + PMAP_UNLOCK(); return (1); } #endif /* SUN4 or SUN4C */ @@ -2103,7 +2056,7 @@ void ctx_alloc(struct pmap *pm) { union ctxinfo *c; - int s, cnum, i, doflush; + int cnum, i, doflush; struct regmap *rp; int gap_start, gap_end; vaddr_t va; @@ -2120,8 +2073,7 @@ ctx_alloc(struct pmap *pm) gap_end = pm->pm_gap_end; } - s = splvm(); - simple_lock(&ctx_lock); + mutex_spin_enter(&ctx_lock); if ((c = ctx_freelist) != NULL) { ctx_freelist = c->c_nextfree; cnum = c - ctxinfo; @@ -2152,7 +2104,6 @@ ctx_alloc(struct pmap *pm) gap_end = c->c_pmap->pm_gap_end; } } - simple_unlock(&ctx_lock); c->c_pmap = pm; pm->pm_ctx = c; @@ -2181,7 +2132,6 @@ ctx_alloc(struct pmap *pm) */ setcontext4(cnum); - splx(s); if (doflush) cache_flush_context(cnum); @@ -2255,7 +2205,6 @@ ctx_alloc(struct pmap *pm) * Note on multi-threaded processes: a context must remain * valid as long as any thread is still running on a CPU. */ - simple_lock(&pm->pm_lock); #if defined(MULTIPROCESSOR) for (i = 0; i < sparc_ncpus; i++) #else @@ -2271,18 +2220,17 @@ ctx_alloc(struct pmap *pm) (pm->pm_reg_ptps_pa[i] >> SRMMU_PPNPASHIFT) | SRMMU_TEPTD); } - simple_unlock(&pm->pm_lock); /* And finally switch to the new context */ (*cpuinfo.pure_vcache_flush)(); setcontext4m(cnum); #endif /* SUN4M || SUN4D */ - splx(s); } + mutex_spin_exit(&ctx_lock); } /* - * Give away a context. Always called in the context of proc0 (reaper) + * Give away a context. */ void ctx_free(struct pmap *pm) @@ -2303,6 +2251,8 @@ ctx_free(struct pmap *pm) } #endif /* SUN4 || SUN4C */ + mutex_spin_enter(&ctx_lock); + #if defined(SUN4M) || defined(SUN4D) if (CPU_HAS_SRMMU) { int i; @@ -2325,10 +2275,9 @@ ctx_free(struct pmap *pm) } #endif - simple_lock(&ctx_lock); c->c_nextfree = ctx_freelist; ctx_freelist = c; - simple_unlock(&ctx_lock); + mutex_spin_exit(&ctx_lock); } @@ -2704,7 +2653,7 @@ pv_syncflags4m(struct vm_page *pg) int tpte; s = splvm(); - PMAP_HEAD_TO_MAP_LOCK(); + PMAP_LOCK(); pv = VM_MDPAGE_PVHEAD(pg); if (pv->pv_pmap == NULL) { /* Page not mapped; pv_flags is already up to date */ @@ -2712,11 +2661,9 @@ pv_syncflags4m(struct vm_page *pg) goto out; } - simple_lock(&pg->mdpage.pv_slock); flags = pv->pv_flags; for (; pv != NULL; pv = pv->pv_next) { pm = pv->pv_pmap; - simple_lock(&pm->pm_lock); va = pv->pv_va; rp = &pm->pm_regmap[VA_VREG(va)]; sp = &rp->rg_segmap[VA_VSEG(va)]; @@ -2736,14 +2683,11 @@ pv_syncflags4m(struct vm_page *pg) SRMMU_PG_M | SRMMU_PG_R, 0, pm->pm_ctxnum, PMAP_CPUSET(pm))); } - - simple_unlock(&pm->pm_lock); } VM_MDPAGE_PVHEAD(pg)->pv_flags = flags; - simple_unlock(&pg->mdpage.pv_slock); out: - PMAP_HEAD_TO_MAP_UNLOCK(); + PMAP_UNLOCK(); splx(s); return (flags); } @@ -2758,7 +2702,6 @@ pv_unlink4m(struct vm_page *pg, struct pmap *pm, vaddr_t va) pv0 = VM_MDPAGE_PVHEAD(pg); - simple_lock(&pg->mdpage.pv_slock); npv = pv0->pv_next; /* * First entry is special (sigh). @@ -2785,7 +2728,7 @@ pv_unlink4m(struct vm_page *pg, struct pmap *pm, vaddr_t va) */ pv0->pv_pmap = NULL; pv0->pv_flags &= ~(PV_NC|PV_ANC); - goto out; + return; } } else { struct pvlist *prev; @@ -2795,7 +2738,7 @@ pv_unlink4m(struct vm_page *pg, struct pmap *pm, vaddr_t va) if (npv == NULL) { panic("pv_unlink: pm %p is missing on pg %p", pm, pg); - goto out; + return; } if (npv->pv_pmap == pm && npv->pv_va == va) break; @@ -2813,7 +2756,7 @@ pv_unlink4m(struct vm_page *pg, struct pmap *pm, vaddr_t va) for (npv = pv0->pv_next; npv != NULL; npv = npv->pv_next) if (BADALIAS(va, npv->pv_va) || (npv->pv_flags & PV_NC) != 0) - goto out; + return; #ifdef DEBUG if (pmapdebug & PDB_CACHESTUFF) printf( @@ -2824,9 +2767,6 @@ pv_unlink4m(struct vm_page *pg, struct pmap *pm, vaddr_t va) pv0->pv_flags &= ~PV_ANC; pv_changepte4m(pg, SRMMU_PG_C, 0); } - -out: - simple_unlock(&pg->mdpage.pv_slock); } /* @@ -2843,7 +2783,6 @@ pv_link4m(struct vm_page *pg, struct pmap *pm, vaddr_t va, int error = 0; pv0 = VM_MDPAGE_PVHEAD(pg); - simple_lock(&pg->mdpage.pv_slock); if (pv0->pv_pmap == NULL) { /* no pvlist entries yet */ @@ -2914,7 +2853,6 @@ link_npv: pv0->pv_next = npv; out: - simple_unlock(&pg->mdpage.pv_slock); return (error); } #endif @@ -2932,8 +2870,7 @@ pv_uncache(struct vm_page *pg) int s; s = splvm(); - PMAP_HEAD_TO_MAP_LOCK(); - simple_lock(&pg->mdpage.pv_slock); + PMAP_LOCK(); for (pv = VM_MDPAGE_PVHEAD(pg); pv != NULL; pv = pv->pv_next) pv->pv_flags |= PV_NC; @@ -2946,8 +2883,7 @@ pv_uncache(struct vm_page *pg) if (CPU_HAS_SUNMMU) pv_changepte4_4c(pg, PG_NC, 0); #endif - simple_unlock(&pg->mdpage.pv_slock); - PMAP_HEAD_TO_MAP_UNLOCK(); + PMAP_UNLOCK(); splx(s); } @@ -3050,12 +2986,6 @@ pmap_bootstrap(int nctx, int nregion, int nsegment) nptesg = (NBPSG >> pgshift); #endif - /* - * Initialize the locks. - */ - /* spinlockinit(&pmap_main_lock, "pmaplk", 0); */ - simple_lock_init(&kernel_pmap_store.pm_lock); - /* * Grab physical memory list. */ @@ -3279,12 +3209,14 @@ pmap_bootstrap4_4c(void *top, int nctx, int nregion, int nsegment) p = i; /* retract to first free phys */ + mutex_init(&demap_lock, MUTEX_DEFAULT, IPL_VM); + /* * All contexts are free except the kernel's. * * XXX sun4c could use context 0 for users? */ - simple_lock_init(&ctx_lock); + mutex_init(&ctx_lock, MUTEX_DEFAULT, IPL_SCHED); ci->c_pmap = pmap_kernel(); ctx_freelist = ci + 1; for (i = 1; i < ncontext; i++) { @@ -3767,10 +3699,12 @@ pmap_bootstrap4m(void *top) p = q; /* retract to first free phys */ + mutex_init(&demap_lock, MUTEX_DEFAULT, IPL_VM); + /* * Set up the ctxinfo structures (freelist of contexts) */ - simple_lock_init(&ctx_lock); + mutex_init(&ctx_lock, MUTEX_DEFAULT, IPL_SCHED); ci->c_pmap = pmap_kernel(); ctx_freelist = ci + 1; for (i = 1; i < ncontext; i++) { @@ -4238,7 +4172,6 @@ pmap_pmap_pool_ctor(void *arg, void *object, int flags) qzero((void *)pm->pm_regmap, NUREG * sizeof(struct regmap)); /* pm->pm_ctx = NULL; // already done */ - simple_lock_init(&pm->pm_lock); if (CPU_HAS_SUNMMU) { TAILQ_INIT(&pm->pm_seglist); @@ -4376,16 +4309,12 @@ pmap_create(void) void pmap_destroy(struct pmap *pm) { - int count; #ifdef DEBUG if (pmapdebug & PDB_DESTROY) printf("pmap_destroy[%d](%p)\n", cpu_number(), pm); #endif - simple_lock(&pm->pm_lock); - count = --pm->pm_refcount; - simple_unlock(&pm->pm_lock); - if (count == 0) { + if (atomic_dec_uint_nv(&pm->pm_refcount) == 0) { #ifdef DEBUG pmap_quiet_check(pm); #endif @@ -4399,11 +4328,8 @@ pmap_destroy(struct pmap *pm) void pmap_reference(struct pmap *pm) { - int s = splvm(); - simple_lock(&pm->pm_lock); - pm->pm_refcount++; - simple_unlock(&pm->pm_lock); - splx(s); + + atomic_inc_uint(&pm->pm_refcount); } #if defined(SUN4) || defined(SUN4C) @@ -4584,8 +4510,7 @@ pmap_remove(struct pmap *pm, vaddr_t va, vaddr_t endva) ctx = getcontext(); s = splvm(); /* XXX conservative */ - PMAP_MAP_TO_HEAD_LOCK(); - simple_lock(&pm->pm_lock); + PMAP_LOCK(); for (; va < endva; va = nva) { /* do one virtual segment at a time */ vr = VA_VREG(va); @@ -4596,8 +4521,7 @@ pmap_remove(struct pmap *pm, vaddr_t va, vaddr_t endva) if (pm->pm_regmap[vr].rg_nsegmap != 0) (*rm)(pm, va, nva, vr, vs); } - simple_unlock(&pm->pm_lock); - PMAP_MAP_TO_HEAD_UNLOCK(); + PMAP_UNLOCK(); splx(s); setcontext(ctx); } @@ -5104,7 +5028,7 @@ pmap_protect4_4c(struct pmap *pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot) write_user_windows(); ctx = getcontext4(); s = splvm(); - simple_lock(&pm->pm_lock); + PMAP_LOCK(); for (va = sva; va < eva;) { vr = VA_VREG(va); vs = VA_VSEG(va); @@ -5175,7 +5099,7 @@ pmap_protect4_4c(struct pmap *pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot) } } } - simple_unlock(&pm->pm_lock); + PMAP_UNLOCK(); splx(s); setcontext4(ctx); } @@ -5292,8 +5216,7 @@ pmap_page_protect4m(struct vm_page *pg, vm_prot_t prot) cpu_number(), VM_PAGE_TO_PHYS(pg), prot); #endif s = splvm(); - PMAP_HEAD_TO_MAP_LOCK(); - simple_lock(&pg->mdpage.pv_slock); + PMAP_LOCK(); if (prot & VM_PROT_READ) { pv_changepte4m(pg, 0, PPROT_WRITE); @@ -5314,7 +5237,6 @@ pmap_page_protect4m(struct vm_page *pg, vm_prot_t prot) flags = pv->pv_flags & ~(PV_NC|PV_ANC); while (pv != NULL) { pm = pv->pv_pmap; - simple_lock(&pm->pm_lock); va = pv->pv_va; vr = VA_VREG(va); vs = VA_VSEG(va); @@ -5361,7 +5283,6 @@ pmap_page_protect4m(struct vm_page *pg, vm_prot_t prot) npv = pv->pv_next; if (pv != VM_MDPAGE_PVHEAD(pg)) pool_put(&pv_pool, pv); - simple_unlock(&pm->pm_lock); pv = npv; } @@ -5371,8 +5292,7 @@ pmap_page_protect4m(struct vm_page *pg, vm_prot_t prot) VM_MDPAGE_PVHEAD(pg)->pv_flags = flags; out: - simple_unlock(&pg->mdpage.pv_slock); - PMAP_HEAD_TO_MAP_UNLOCK(); + PMAP_UNLOCK(); splx(s); } @@ -5406,8 +5326,7 @@ pmap_protect4m(struct pmap *pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot) write_user_windows(); s = splvm(); - PMAP_MAP_TO_HEAD_LOCK(); - simple_lock(&pm->pm_lock); + PMAP_LOCK(); for (va = sva; va < eva;) { vr = VA_VREG(va); @@ -5456,8 +5375,7 @@ pmap_protect4m(struct pmap *pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot) PMAP_CPUSET(pm)); } } - simple_unlock(&pm->pm_lock); - PMAP_MAP_TO_HEAD_UNLOCK(); + PMAP_UNLOCK(); splx(s); } @@ -6006,7 +5924,7 @@ pmap_kremove4_4c(vaddr_t va, vsize_t len) s = splvm(); ctx = getcontext(); - simple_lock(&pm->pm_lock); + PMAP_LOCK(); setcontext4(0); for (; va < endva; va = nva) { /* do one virtual segment at a time */ @@ -6086,7 +6004,7 @@ pmap_kremove4_4c(vaddr_t va, vsize_t len) mmu_pmeg_unlock(sp->sg_pmeg); } } - simple_unlock(&pm->pm_lock); + PMAP_UNLOCK(); setcontext4(ctx); splx(s); } @@ -6215,8 +6133,7 @@ pmap_enk4m(struct pmap *pm, vaddr_t va, vm_prot_t prot, int flags, sp = &rp->rg_segmap[vs]; s = splvm(); /* XXX way too conservative */ - PMAP_MAP_TO_HEAD_LOCK(); - simple_lock(&pm->pm_lock); + PMAP_LOCK(); if (rp->rg_seg_ptps == NULL) /* enter new region */ panic("pmap_enk4m: missing kernel region table for va 0x%lx",va); @@ -6229,8 +6146,7 @@ pmap_enk4m(struct pmap *pm, vaddr_t va, vm_prot_t prot, int flags, if ((tpte & SRMMU_PPNMASK) == (pteproto & SRMMU_PPNMASK)) { /* just changing protection and/or wiring */ pmap_changeprot4m(pm, va, prot, flags); - simple_unlock(&pm->pm_lock); - PMAP_MAP_TO_HEAD_UNLOCK(); + PMAP_UNLOCK(); splx(s); return (0); } @@ -6276,8 +6192,7 @@ printf("pmap_enk4m: changing existing va=>pa entry: va 0x%lx, pteproto 0x%x, " setpgt4m(&sp->sg_pte[VA_SUN4M_VPG(va)], pteproto); pm->pm_stats.resident_count++; out: - simple_unlock(&pm->pm_lock); - PMAP_MAP_TO_HEAD_UNLOCK(); + PMAP_UNLOCK(); splx(s); return (error); } @@ -6304,8 +6219,7 @@ pmap_enu4m(struct pmap *pm, vaddr_t va, vm_prot_t prot, int flags, vs = VA_VSEG(va); rp = &pm->pm_regmap[vr]; s = splvm(); /* XXX conservative */ - PMAP_MAP_TO_HEAD_LOCK(); - simple_lock(&pm->pm_lock); + PMAP_LOCK(); if (rp->rg_segmap == NULL) { /* definitely a new mapping */ @@ -6409,8 +6323,7 @@ pmap_enu4m(struct pmap *pm, vaddr_t va, vm_prot_t prot, int flags, /* just changing prot and/or wiring */ /* caller should call this directly: */ pmap_changeprot4m(pm, va, prot, flags); - simple_unlock(&pm->pm_lock); - PMAP_MAP_TO_HEAD_UNLOCK(); + PMAP_UNLOCK(); splx(s); return (0); } @@ -6479,8 +6392,7 @@ pmap_enu4m(struct pmap *pm, vaddr_t va, vm_prot_t prot, int flags, } out: - simple_unlock(&pm->pm_lock); - PMAP_MAP_TO_HEAD_UNLOCK(); + PMAP_UNLOCK(); splx(s); return (error); } @@ -6733,8 +6645,9 @@ pmap_extract4m(struct pmap *pm, vaddr_t va, paddr_t *pap) * requires interrupt protection. */ s = splvm(); - if (pm != pmap_kernel()) - simple_lock(&pm->pm_lock); + if (pm != pmap_kernel()) { + PMAP_LOCK(); + } rp = &pm->pm_regmap[vr]; if (rp->rg_segmap == NULL) { @@ -6766,9 +6679,9 @@ pmap_extract4m(struct pmap *pm, vaddr_t va, paddr_t *pap) * the middle of the PTE update protocol. So, acquire the * demap lock and retry. */ - simple_lock(&demap_lock); + mutex_spin_enter(&demap_lock); pte = sp->sg_pte[VA_SUN4M_VPG(va)]; - simple_unlock(&demap_lock); + mutex_spin_exit(&demap_lock); if ((pte & SRMMU_TETYPE) != SRMMU_TEPTE) goto out; } @@ -6783,8 +6696,9 @@ pmap_extract4m(struct pmap *pm, vaddr_t va, paddr_t *pap) v = true; out: - if (pm != pmap_kernel()) - simple_unlock(&pm->pm_lock); + if (pm != pmap_kernel()) { + PMAP_UNLOCK(); + } splx(s); return (v); } diff --git a/sys/arch/sparc/sparc/trap.c b/sys/arch/sparc/sparc/trap.c index 36d56844385e..e06ae9b542b0 100644 --- a/sys/arch/sparc/sparc/trap.c +++ b/sys/arch/sparc/sparc/trap.c @@ -1,4 +1,4 @@ -/* $NetBSD: trap.c,v 1.172 2007/03/04 06:00:47 christos Exp $ */ +/* $NetBSD: trap.c,v 1.173 2008/01/02 11:48:29 ad Exp $ */ /* * Copyright (c) 1996 @@ -49,7 +49,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.172 2007/03/04 06:00:47 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.173 2008/01/02 11:48:29 ad Exp $"); #include "opt_ddb.h" #include "opt_compat_svr4.h" @@ -417,9 +417,7 @@ badtrap: #endif if (fs == NULL) { - KERNEL_LOCK(1, l); fs = malloc(sizeof *fs, M_SUBPROC, M_WAITOK); - KERNEL_UNLOCK_LAST(l); *fs = initfpstate; l->l_md.md_fpstate = fs; } @@ -495,12 +493,10 @@ badtrap: } case T_WINOF: - KERNEL_LOCK(1, l); if (rwindow_save(l)) { mutex_enter(&p->p_smutex); sigexit(l, SIGILL); } - KERNEL_UNLOCK_LAST(l); break; #define read_rw(src, dst) \ @@ -515,7 +511,6 @@ badtrap: * nsaved to -1. If we decide to deliver a signal on * our way out, we will clear nsaved. */ - KERNEL_LOCK(1, l); if (pcb->pcb_uw || pcb->pcb_nsaved) panic("trap T_RWRET 1"); #ifdef DEBUG @@ -531,7 +526,6 @@ badtrap: if (pcb->pcb_nsaved) panic("trap T_RWRET 2"); pcb->pcb_nsaved = -1; /* mark success */ - KERNEL_UNLOCK_LAST(l); break; case T_WINUF: @@ -544,7 +538,6 @@ badtrap: * in the pcb. The restore's window may still be in * the CPU; we need to force it out to the stack. */ - KERNEL_LOCK(1, l); #ifdef DEBUG if (rwindow_debug) printf("cpu%d:%s[%d]: rwindow: T_WINUF 0: pcb<-stack: 0x%x\n", @@ -569,14 +562,11 @@ badtrap: if (pcb->pcb_nsaved) panic("trap T_WINUF"); pcb->pcb_nsaved = -1; /* mark success */ - KERNEL_UNLOCK_LAST(l); break; case T_ALIGN: if ((p->p_md.md_flags & MDP_FIXALIGN) != 0) { - KERNEL_LOCK(1, l); n = fixalign(l, tf); - KERNEL_UNLOCK_LAST(l); if (n == 0) { ADVANCE; break; @@ -598,7 +588,6 @@ badtrap: * will not match once fpu_cleanup does its job, so * we must not save again later.) */ - KERNEL_LOCK(1, l); if (l != cpuinfo.fplwp) panic("fpe without being the FP user"); FPU_LOCK(s); @@ -606,7 +595,6 @@ badtrap: cpuinfo.fplwp = NULL; l->l_md.md_fpu = NULL; FPU_UNLOCK(s); - KERNEL_UNLOCK_LAST(l); /* tf->tf_psr &= ~PSR_EF; */ /* share_fpu will do this */ if ((code = fpu_cleanup(l, l->l_md.md_fpstate)) != 0) { sig = SIGFPE; @@ -658,12 +646,10 @@ badtrap: case T_FLUSHWIN: write_user_windows(); #ifdef probably_slower_since_this_is_usually_false - KERNEL_LOCK(1, l); if (pcb->pcb_nsaved && rwindow_save(p)) { mutex_enter(&p->p_smutex); sigexit(l, SIGILL); } - KERNEL_UNLOCK_LAST(l); #endif ADVANCE; break; @@ -703,10 +689,8 @@ badtrap: break; } if (sig != 0) { - KERNEL_LOCK(1, l); ksi.ksi_signo = sig; trapsignal(l, &ksi); - KERNEL_UNLOCK_LAST(l); } userret(l, pc, sticks); share_fpu(l, tf); @@ -806,9 +790,6 @@ mem_access_fault(unsigned type, int ser, u_int v, int pc, int psr, LWP_CACHE_CREDS(l, p); sticks = p->p_sticks; - if ((psr & PSR_PS) == 0) - KERNEL_LOCK(1, l); - #ifdef FPU_DEBUG if ((tf->tf_psr & PSR_EF) != 0) { if (cpuinfo.fplwp != l) @@ -974,7 +955,6 @@ kfault: } out: if ((psr & PSR_PS) == 0) { - KERNEL_UNLOCK_LAST(l); userret(l, pc, sticks); share_fpu(l, tf); } @@ -1054,11 +1034,6 @@ mem_access_fault4m(unsigned type, u_int sfsr, u_int sfva, struct trapframe *tf) goto out_nounlock; } - if ((psr & PSR_PS) == 0) - KERNEL_LOCK(1, l); - else - KERNEL_LOCK(1, NULL); - /* * Figure out what to pass the VM code. We cannot ignore the sfva * register on text faults, since this might be a trap on an @@ -1188,7 +1163,6 @@ mem_access_fault4m(unsigned type, u_int sfsr, u_int sfva, struct trapframe *tf) if (va >= KERNBASE) { rv = uvm_fault(kernel_map, va, atype); if (rv == 0) { - KERNEL_UNLOCK_ONE(NULL); return; } goto kfault; @@ -1232,7 +1206,6 @@ kfault: tf->tf_pc = onfault; tf->tf_npc = onfault + 4; tf->tf_out[0] = (rv == EACCES) ? EFAULT : rv; - KERNEL_UNLOCK_ONE(NULL); return; } KSI_INIT_TRAP(&ksi); @@ -1255,13 +1228,10 @@ kfault: } out: if ((psr & PSR_PS) == 0) { - KERNEL_UNLOCK_LAST(l); out_nounlock: userret(l, pc, sticks); share_fpu(l, tf); } - else - KERNEL_UNLOCK_ONE(NULL); } #endif /* SUN4M */ @@ -1283,7 +1253,6 @@ startlwp(void *arg) #endif pool_put(&lwp_uc_pool, uc); - KERNEL_UNLOCK_LAST(l); userret(l, l->l_md.md_tf->tf_pc, 0); } diff --git a/sys/arch/sparc64/dev/fdc.c b/sys/arch/sparc64/dev/fdc.c index 9700a86258ac..bec1b2b7addb 100644 --- a/sys/arch/sparc64/dev/fdc.c +++ b/sys/arch/sparc64/dev/fdc.c @@ -1,4 +1,4 @@ -/* $NetBSD: fdc.c,v 1.19 2007/11/28 20:41:35 jnemeth Exp $ */ +/* $NetBSD: fdc.c,v 1.20 2008/01/02 11:48:29 ad Exp $ */ /*- * Copyright (c) 2000 The NetBSD Foundation, Inc. @@ -108,7 +108,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: fdc.c,v 1.19 2007/11/28 20:41:35 jnemeth Exp $"); +__KERNEL_RCSID(0, "$NetBSD: fdc.c,v 1.20 2008/01/02 11:48:29 ad Exp $"); #include "opt_ddb.h" #include "opt_md.h" @@ -2327,12 +2327,13 @@ fdformat(dev_t dev, struct ne7_fd_formb *finfo, struct proc *p) struct buf *bp; /* set up a buffer header for fdstrategy() */ - bp = getiobuf_nowait(); + bp = getiobuf(NULL, false); if (bp == NULL) return ENOBUFS; bp->b_vp = NULL; - bp->b_flags = B_BUSY | B_PHYS | B_FORMAT; + bp->b_cflags = BC_BUSY; + bp->b_flags = B_PHYS | B_FORMAT; bp->b_proc = p; bp->b_dev = dev; @@ -2515,14 +2516,13 @@ fd_read_md_image(size_t *sizep, void **addrp) bp->b_error = 0; bp->b_resid = 0; bp->b_proc = NULL; - bp->b_flags = B_BUSY | B_PHYS | B_RAW | B_READ; + bp->b_cflags = BC_BUSY; + bp->b_flags = B_PHYS | B_RAW | B_READ; bp->b_blkno = btodb(offset); bp->b_bcount = DEV_BSIZE; bp->b_data = addr; fdstrategy(bp); - while ((bp->b_flags & B_DONE) == 0) { - tsleep((void *)bp, PRIBIO + 1, "physio", 0); - } + biowait(bp); if (bp->b_error) panic("fd: mountroot: fdread error %d", bp->b_error); diff --git a/sys/arch/sparc64/sparc64/pmap.c b/sys/arch/sparc64/sparc64/pmap.c index 9f9d81a526ed..8e3920bfdfa5 100644 --- a/sys/arch/sparc64/sparc64/pmap.c +++ b/sys/arch/sparc64/sparc64/pmap.c @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.c,v 1.202 2007/12/09 20:12:55 martin Exp $ */ +/* $NetBSD: pmap.c,v 1.203 2008/01/02 11:48:30 ad Exp $ */ /* * * Copyright (C) 1996-1999 Eduardo Horvath. @@ -26,7 +26,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.202 2007/12/09 20:12:55 martin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.203 2008/01/02 11:48:30 ad Exp $"); #undef NO_VCACHE /* Don't forget the locked TLB in dostart */ #define HWREF @@ -45,6 +45,8 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.202 2007/12/09 20:12:55 martin Exp $"); #include #include #include +#include +#include #include @@ -53,7 +55,6 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.202 2007/12/09 20:12:55 martin Exp $"); #include #include #include -#include #include #include "cache.h" @@ -122,8 +123,8 @@ extern int pseg_set(struct pmap *, vaddr_t, int64_t, paddr_t); #define PV_SETVA(pv,va) ((pv)->pv_va = (((va) & PV_VAMASK) | \ (((pv)->pv_va) & PV_MASK))) -struct pool pmap_pmap_pool; -struct pool pmap_pv_pool; +struct pool_cache pmap_cache; +struct pool_cache pmap_pv_cache; pv_entry_t pmap_remove_pv(struct pmap *, vaddr_t, struct vm_page *); void pmap_enter_pv(struct pmap *, vaddr_t, paddr_t, struct vm_page *, @@ -292,6 +293,10 @@ int numctx; static int pmap_get_page(paddr_t *p); static void pmap_free_page(paddr_t pa); +/* + * Global pmap lock. + */ +static kmutex_t pmap_lock; /* * Support for big page sizes. This maps the page size to the @@ -929,7 +934,7 @@ pmap_bootstrap(u_long kernelstart, u_long kernelend) /* * Allocate and clear out pmap_kernel()->pm_segs[] */ - simple_lock_init(&pmap_kernel()->pm_lock); + mutex_init(&pmap_lock, MUTEX_DEFAULT, IPL_NONE); pmap_kernel()->pm_refs = 1; pmap_kernel()->pm_ctx = 0; @@ -1170,10 +1175,10 @@ pmap_init() /* * initialize the pmap pools. */ - pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl", - &pool_allocator_nointr, IPL_NONE); - pool_init(&pmap_pv_pool, sizeof(struct pv_entry), 0, 0, 0, "pv_entry", - &pool_allocator_nointr, IPL_NONE); + pool_cache_bootstrap(&pmap_cache, sizeof(struct pmap), 0, 0, 0, + "pmappl", NULL, IPL_NONE, NULL, NULL, NULL); + pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0, 0, + "pv_entry", NULL, IPL_NONE, NULL, NULL, NULL); vm_first_phys = avail_start; vm_num_phys = avail_end - avail_start; @@ -1219,7 +1224,7 @@ pmap_growkernel(maxkvaddr) (void *)KERNEND, (void *)maxkvaddr); return (kbreak); } - simple_lock(&pm->pm_lock); + mutex_enter(&pmap_lock); DPRINTF(PDB_GROW, ("pmap_growkernel(%lx...%lx)\n", kbreak, maxkvaddr)); /* Align with the start of a page table */ for (kbreak &= (-1 << PDSHIFT); kbreak < maxkvaddr; @@ -1237,7 +1242,7 @@ pmap_growkernel(maxkvaddr) ENTER_STAT(ptpneeded); } } - simple_unlock(&pm->pm_lock); + mutex_exit(&pmap_lock); return (kbreak); } @@ -1251,11 +1256,10 @@ pmap_create() DPRINTF(PDB_CREATE, ("pmap_create()\n")); - pm = pool_get(&pmap_pmap_pool, PR_WAITOK); + pm = pool_cache_get(&pmap_cache, PR_WAITOK); memset(pm, 0, sizeof *pm); DPRINTF(PDB_CREATE, ("pmap_create(): created %p\n", pm)); - simple_lock_init(&pm->pm_lock); pm->pm_refs = 1; TAILQ_INIT(&pm->pm_obj.memq); if (pm != pmap_kernel()) { @@ -1276,9 +1280,7 @@ pmap_reference(pm) struct pmap *pm; { - simple_lock(&pm->pm_lock); - pm->pm_refs++; - simple_unlock(&pm->pm_lock); + atomic_inc_uint(&pm->pm_refs); } /* @@ -1290,12 +1292,8 @@ pmap_destroy(pm) struct pmap *pm; { struct vm_page *pg, *nextpg; - int refs; - simple_lock(&pm->pm_lock); - refs = --pm->pm_refs; - simple_unlock(&pm->pm_lock); - if (refs > 0) { + if (atomic_dec_uint_nv(&pm->pm_refs) > 0) { return; } DPRINTF(PDB_DESTROY, ("pmap_destroy: freeing pmap %p\n", pm)); @@ -1309,7 +1307,7 @@ pmap_destroy(pm) uvm_pagefree(pg); } pmap_free_page((paddr_t)(u_long)pm->pm_segs); - pool_put(&pmap_pmap_pool, pm); + pool_cache_put(&pmap_cache, pm); } /* @@ -1357,7 +1355,7 @@ pmap_collect(pm) if (pm == pmap_kernel()) return; - simple_lock(&pm->pm_lock); + mutex_enter(&pmap_lock); for (i = 0; i < STSZ; i++) { pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i], ASI_PHYS_CACHED); @@ -1396,7 +1394,7 @@ pmap_collect(pm) pmap_free_page(pa); } } - simple_unlock(&pm->pm_lock); + mutex_exit(&pmap_lock); } /* @@ -1627,7 +1625,7 @@ pmap_enter(pm, va, pa, prot, flags) * entering the same PA again. if it's different remove it. */ - simple_lock(&pm->pm_lock); + mutex_enter(&pmap_lock); data = pseg_get(pm, va); if (data & TLB_V) { wasmapped = TRUE; @@ -1665,17 +1663,17 @@ pmap_enter(pm, va, pa, prot, flags) */ if (pvh->pv_pmap == NULL || (wasmapped && opa == pa)) { if (npv != NULL) { - pool_put(&pmap_pv_pool, npv); + pool_cache_put(&pmap_pv_cache, npv); /* XXXAD defer */ npv = NULL; } if (wasmapped && opa == pa) { dopv = FALSE; } } else if (npv == NULL) { - npv = pool_get(&pmap_pv_pool, PR_NOWAIT); + npv = pool_cache_get(&pmap_pv_cache, PR_NOWAIT); /* XXXAD defer */ if (npv == NULL) { if (flags & PMAP_CANFAIL) { - simple_unlock(&pm->pm_lock); + mutex_exit(&pmap_lock); return (ENOMEM); } panic("pmap_enter: no pv entries available"); @@ -1686,7 +1684,7 @@ pmap_enter(pm, va, pa, prot, flags) ENTER_STAT(unmanaged); dopv = FALSE; if (npv != NULL) { - pool_put(&pmap_pv_pool, npv); + pool_cache_put(&pmap_pv_cache, npv); /* XXXAD defer */ npv = NULL; } } @@ -1709,7 +1707,7 @@ pmap_enter(pm, va, pa, prot, flags) #else /* If it needs ref accounting do nothing. */ if (!(flags & VM_PROT_READ)) { - simple_unlock(&pm->pm_lock); + mutex_exit(&pmap_lock); return 0; } #endif @@ -1754,9 +1752,9 @@ pmap_enter(pm, va, pa, prot, flags) ptp = 0; if (!pmap_get_page(&ptp)) { if (flags & PMAP_CANFAIL) { - simple_unlock(&pm->pm_lock); + mutex_exit(&pmap_lock); if (npv != NULL) { - pool_put(&pmap_pv_pool, npv); + pool_cache_put(&pmap_pv_cache, npv); /* XXXAD defer */ } return (ENOMEM); } else { @@ -1776,7 +1774,7 @@ pmap_enter(pm, va, pa, prot, flags) pmap_enter_pv(pm, va, pa, pg, npv); } - simple_unlock(&pm->pm_lock); + mutex_exit(&pmap_lock); #ifdef DEBUG i = ptelookup_va(va); if (pmapdebug & PDB_ENTER) @@ -1876,7 +1874,7 @@ pmap_remove(pm, va, endva) KASSERT(pm != pmap_kernel() || endva < INTSTACK || va > EINTSTACK); KASSERT(pm != pmap_kernel() || endva < kdata || va > ekdata); - simple_lock(&pm->pm_lock); + mutex_enter(&pmap_lock); DPRINTF(PDB_REMOVE, ("pmap_remove(pm=%p, va=%p, endva=%p):", pm, (void *)(u_long)va, (void *)(u_long)endva)); REMOVE_STAT(calls); @@ -1905,7 +1903,7 @@ pmap_remove(pm, va, endva) if (pg) { pv = pmap_remove_pv(pm, va, pg); if (pv != NULL) { - pool_put(&pmap_pv_pool, pv); + pool_cache_put(&pmap_pv_cache, pv); /* XXXAD defer */ } } @@ -1942,13 +1940,13 @@ pmap_remove(pm, va, endva) REMOVE_STAT(tflushes); tlb_flush_pte(va, pm->pm_ctx); } - simple_unlock(&pm->pm_lock); if (flush && pm->pm_refs) { REMOVE_STAT(flushes); blast_dcache(); } DPRINTF(PDB_REMOVE, ("\n")); pv_check(); + mutex_exit(&pmap_lock); } /* @@ -1974,7 +1972,7 @@ pmap_protect(pm, sva, eva, prot) return; } - simple_lock(&pm->pm_lock); + mutex_enter(&pmap_lock); sva = sva & ~PGOFSET; for (; sva < eva; sva += PAGE_SIZE) { #ifdef DEBUG @@ -2030,8 +2028,8 @@ pmap_protect(pm, sva, eva, prot) tsb_invalidate(pm->pm_ctx, sva); tlb_flush_pte(sva, pm->pm_ctx); } - simple_unlock(&pm->pm_lock); pv_check(); + mutex_exit(&pmap_lock); } /* @@ -2066,7 +2064,7 @@ pmap_extract(pm, va, pap) return TRUE; } else { if (pm != pmap_kernel()) { - simple_lock(&pm->pm_lock); + mutex_enter(&pmap_lock); } data = pseg_get(pm, va); pa = data & TLB_PA_MASK; @@ -2100,7 +2098,7 @@ pmap_extract(pm, va, pap) } #endif if (pm != pmap_kernel()) { - simple_unlock(&pm->pm_lock); + mutex_exit(&pmap_lock); } } if ((data & TLB_V) == 0) @@ -2123,7 +2121,7 @@ pmap_kprotect(va, prot) int64_t data; int rv; - simple_lock(&pm->pm_lock); + mutex_enter(&pmap_lock); data = pseg_get(pm, va); KASSERT(data & TLB_V); if (prot & VM_PROT_WRITE) { @@ -2136,7 +2134,7 @@ pmap_kprotect(va, prot) panic("pmap_kprotect: pseg_set needs spare! rv=%d", rv); tsb_invalidate(pm->pm_ctx, va); tlb_flush_pte(va, pm->pm_ctx); - simple_unlock(&pm->pm_lock); + mutex_exit(&pmap_lock); } /* @@ -2355,6 +2353,7 @@ pmap_clear_modify(pg) #if defined(DEBUG) modified = pmap_is_modified(pg); #endif + mutex_enter(&pmap_lock); /* Clear all mappings */ pv = &pg->mdpage.mdpg_pvh; #ifdef DEBUG @@ -2376,7 +2375,6 @@ pmap_clear_modify(pg) struct pmap *pmap = pv->pv_pmap; vaddr_t va = pv->pv_va & PV_VAMASK; - simple_lock(&pmap->pm_lock); /* First clear the mod bit in the PTE and make it R/O */ data = pseg_get(pmap, va); KASSERT(data & TLB_V); @@ -2400,10 +2398,10 @@ pmap_clear_modify(pg) if (pv->pv_va & PV_MOD) changed |= 1; pv->pv_va &= ~(PV_MOD); - simple_unlock(&pmap->pm_lock); } } pv_check(); + mutex_exit(&pmap_lock); #ifdef DEBUG if (pmap_is_modified(pg)) { printf("pmap_clear_modify(): %p still modified!\n", pg); @@ -2432,6 +2430,7 @@ pmap_clear_reference(pg) int referenced = 0; #endif + mutex_enter(&pmap_lock); #ifdef DEBUG DPRINTF(PDB_CHANGEPROT|PDB_REF, ("pmap_clear_reference(%p)\n", pg)); referenced = pmap_is_referenced(pg); @@ -2453,7 +2452,6 @@ pmap_clear_reference(pg) struct pmap *pmap = pv->pv_pmap; vaddr_t va = pv->pv_va & PV_VAMASK; - simple_lock(&pmap->pm_lock); data = pseg_get(pmap, va); KASSERT(data & TLB_V); DPRINTF(PDB_CHANGEPROT, @@ -2480,7 +2478,6 @@ pmap_clear_reference(pg) if (pv->pv_va & PV_REF) changed |= 1; pv->pv_va &= ~(PV_REF); - simple_unlock(&pmap->pm_lock); } } dcache_flush_page(pa); @@ -2497,8 +2494,12 @@ pmap_clear_reference(pg) printf("pmap_clear_reference: referenced %d changed %d\n", referenced, changed); Debugger(); - } else return (referenced); + } else { + mutex_exit(&pmap_lock); + return (referenced); + } #endif + mutex_exit(&pmap_lock); return (changed); } @@ -2622,15 +2623,15 @@ pmap_unwire(pmap, va) return; } #endif - simple_lock(&pmap->pm_lock); + mutex_enter(&pmap_lock); data = pseg_get(pmap, va & PV_VAMASK); KASSERT(data & TLB_V); data &= ~TLB_TSB_LOCK; rv = pseg_set(pmap, va & PV_VAMASK, data, 0); if (rv & 1) panic("pmap_unwire: pseg_set needs spare! rv=%d\n", rv); - simple_unlock(&pmap->pm_lock); pv_check(); + mutex_exit(&pmap_lock); } /* @@ -2648,7 +2649,7 @@ pmap_page_protect(pg, prot) int64_t data = 0; int rv; paddr_t pa = VM_PAGE_TO_PHYS(pg); - pv_entry_t pv, npv, firstpv; + pv_entry_t pv, npv, firstpv, freepv = NULL; struct pmap *pmap; vaddr_t va; bool needflush = FALSE; @@ -2656,6 +2657,7 @@ pmap_page_protect(pg, prot) DPRINTF(PDB_CHANGEPROT, ("pmap_page_protect: pg %p prot %x\n", pg, prot)); + mutex_enter(&pmap_lock); pv = &pg->mdpage.mdpg_pvh; if (prot & (VM_PROT_READ|VM_PROT_EXECUTE)) { /* copy_on_write */ @@ -2680,7 +2682,6 @@ pmap_page_protect(pg, prot) pmap = pv->pv_pmap; va = pv->pv_va & PV_VAMASK; - simple_lock(&pmap->pm_lock); DPRINTF(PDB_CHANGEPROT | PDB_REF, ("pmap_page_protect: " "RO va %p of pg %p...\n", @@ -2705,7 +2706,6 @@ pmap_page_protect(pg, prot) tsb_invalidate(pmap->pm_ctx, va); tlb_flush_pte(va, pmap->pm_ctx); } - simple_unlock(&pmap->pm_lock); } } } else { @@ -2721,7 +2721,6 @@ pmap_page_protect(pg, prot) va = npv->pv_va & PV_VAMASK; /* We're removing npv from pv->pv_next */ - simple_lock(&pmap->pm_lock); DPRINTF(PDB_CHANGEPROT|PDB_REF|PDB_REMOVE, ("pmap_page_protect: " "demap va %p of pg %p in pmap %p...\n", @@ -2748,11 +2747,11 @@ pmap_page_protect(pg, prot) if (pmap->pm_refs > 0) { needflush = TRUE; } - simple_unlock(&pmap->pm_lock); /* free the pv */ pv->pv_next = npv->pv_next; - pool_put(&pmap_pv_pool, npv); + npv->pv_next = freepv; + freepv = npv; } pv = firstpv; @@ -2768,7 +2767,6 @@ pmap_page_protect(pg, prot) pmap = pv->pv_pmap; va = pv->pv_va & PV_VAMASK; - simple_lock(&pmap->pm_lock); DPRINTF(PDB_CHANGEPROT|PDB_REF|PDB_REMOVE, ("pmap_page_protect: " "demap va %p of pg %p from pm %p...\n", @@ -2793,7 +2791,6 @@ pmap_page_protect(pg, prot) if (pmap->pm_refs > 0) { needflush = TRUE; } - simple_unlock(&pmap->pm_lock); npv = pv->pv_next; /* dump the first pv */ if (npv) { @@ -2801,7 +2798,8 @@ pmap_page_protect(pg, prot) pv->pv_pmap = npv->pv_pmap; pv->pv_va |= npv->pv_va & PV_MASK; pv->pv_next = npv->pv_next; - pool_put(&pmap_pv_pool, npv); + npv->pv_next = freepv; + freepv = npv; } else { pv->pv_pmap = NULL; pv->pv_next = NULL; @@ -2813,6 +2811,13 @@ pmap_page_protect(pg, prot) } /* We should really only flush the pages we demapped. */ pv_check(); + mutex_exit(&pmap_lock); + + /* Catch up on deferred frees. */ + for (; freepv != NULL; freepv = npv) { + npv = freepv->pv_next; + pool_cache_put(&pmap_pv_cache, freepv); + } } #ifdef PMAP_COUNT_DEBUG @@ -2828,7 +2833,7 @@ pmap_count_res(struct pmap *pm) /* Almost the same as pmap_collect() */ /* Don't want one of these pages reused while we're reading it. */ - simple_lock(&pm->pm_lock); + mutex_enter(&pmap_lock); n = 0; for (i = 0; i < STSZ; i++) { pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i], @@ -2850,7 +2855,7 @@ pmap_count_res(struct pmap *pm) } } } - simple_unlock(&pm->pm_lock); + mutex_exit(&pmap_lock); if (pm->pm_stats.resident_count != n) printf("pmap_count_resident: pm_stats = %ld, counted: %d\n", @@ -2871,7 +2876,7 @@ pmap_count_wired(struct pmap *pm) /* Almost the same as pmap_collect() */ /* Don't want one of these pages reused while we're reading it. */ - simple_lock(&pm->pm_lock); + mutex_enter(&pmap_lock); n = 0; for (i = 0; i < STSZ; i++) { pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i], @@ -2893,7 +2898,7 @@ pmap_count_wired(struct pmap *pm) } } } - simple_unlock(&pm->pm_lock); + mutex_exit(&pmap_lock); if (pm->pm_stats.wired_count != n) printf("pmap_count_wired: pm_stats = %ld, counted: %d\n", @@ -2921,7 +2926,7 @@ ctx_alloc(struct pmap *pm) KASSERT(pm != pmap_kernel()); KASSERT(pm == curproc->p_vmspace->vm_map.pmap); - simple_lock(&pm->pm_lock); + mutex_enter(&pmap_lock); /* XXXAD ctxswitch */ ctx = pmap_next_ctx++; /* @@ -2949,7 +2954,7 @@ ctx_alloc(struct pmap *pm) ctxbusy[ctx] = pm->pm_physaddr; LIST_INSERT_HEAD(&pmap_ctxlist, pm, pm_list); pm->pm_ctx = ctx; - simple_unlock(&pm->pm_lock); + mutex_exit(&pmap_lock); DPRINTF(PDB_CTX_ALLOC, ("ctx_alloc: allocated ctx %d\n", ctx)); return ctx; } @@ -3000,6 +3005,8 @@ pmap_enter_pv(struct pmap *pmap, vaddr_t va, paddr_t pa, struct vm_page *pg, { pv_entry_t pvh; + KASSERT(mutex_owned(&pmap_lock)); + pvh = &pg->mdpage.mdpg_pvh; DPRINTF(PDB_ENTER, ("pmap_enter: pvh %p: was %lx/%p/%p\n", pvh, pvh->pv_va, pvh->pv_pmap, pvh->pv_next)); @@ -3060,6 +3067,8 @@ pmap_remove_pv(struct pmap *pmap, vaddr_t va, struct vm_page *pg) pv_entry_t pvh, npv, pv; int64_t data = 0; + KASSERT(mutex_owned(&pmap_lock)); + pvh = &pg->mdpage.mdpg_pvh; DPRINTF(PDB_REMOVE, ("pmap_remove_pv(pm=%p, va=%p, pg=%p)\n", pmap, @@ -3135,14 +3144,14 @@ pmap_page_cache(struct pmap *pm, paddr_t pa, int mode) vaddr_t va; int rv; + KASSERT(mutex_owned(&pmap_lock)); + DPRINTF(PDB_ENTER, ("pmap_page_uncache(%llx)\n", (unsigned long long)pa)); pg = PHYS_TO_VM_PAGE(pa); pv = &pg->mdpage.mdpg_pvh; while (pv) { va = pv->pv_va & PV_VAMASK; - if (pv->pv_pmap != pm) - simple_lock(&pv->pv_pmap->pm_lock); if (pv->pv_va & PV_NC) { int64_t data; @@ -3174,8 +3183,6 @@ pmap_page_cache(struct pmap *pm, paddr_t pa, int mode) panic("pmap_page_cache: pseg_set needs" " spare! rv=%d\n", rv); } - if (pv->pv_pmap != pm) - simple_unlock(&pv->pv_pmap->pm_lock); if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) { /* Force reload -- cache bits have changed */ tsb_invalidate(pv->pv_pmap->pm_ctx, va); diff --git a/sys/arch/sun3/dev/fd.c b/sys/arch/sun3/dev/fd.c index 4843c883dcdd..76e0dd1d4420 100644 --- a/sys/arch/sun3/dev/fd.c +++ b/sys/arch/sun3/dev/fd.c @@ -1,4 +1,4 @@ -/* $NetBSD: fd.c,v 1.60 2007/12/04 15:12:07 tsutsui Exp $ */ +/* $NetBSD: fd.c,v 1.61 2008/01/02 11:48:30 ad Exp $ */ /*- * Copyright (c) 1990 The Regents of the University of California. @@ -72,7 +72,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: fd.c,v 1.60 2007/12/04 15:12:07 tsutsui Exp $"); +__KERNEL_RCSID(0, "$NetBSD: fd.c,v 1.61 2008/01/02 11:48:30 ad Exp $"); #include "opt_ddb.h" @@ -1765,18 +1765,19 @@ fdioctl(dev_t dev, u_long cmd, void *addr, int flag, struct lwp *l) int fdformat(dev_t dev, struct ne7_fd_formb *finfo, struct proc *p) { - int rv = 0, s; + int rv = 0; struct fd_softc *fd = fd_cd.cd_devs[FDUNIT(dev)]; struct fd_type *type = fd->sc_type; struct buf *bp; /* set up a buffer header for fdstrategy() */ - bp = (struct buf *)malloc(sizeof(struct buf), M_TEMP, M_NOWAIT); + bp = getiobuf(NULL, false); if (bp == 0) return (ENOBUFS); memset((void *)bp, 0, sizeof(struct buf)); - bp->b_flags = B_BUSY | B_PHYS | B_FORMAT; + bp->b_flags = B_PHYS | B_FORMAT; + bp->b_cflags = BC_BUSY; bp->b_proc = p; bp->b_dev = dev; @@ -1800,13 +1801,14 @@ fdformat(dev_t dev, struct ne7_fd_formb *finfo, struct proc *p) fdstrategy(bp); /* ...and wait for it to complete */ - s = splbio(); - while (!(bp->b_flags & B_DONE)) { - rv = tsleep((void *)bp, PRIBIO, "fdform", 20 * hz); + /* XXX dodgy */ + mutex_enter(bp->b_objlock); + while (!(bp->b_oflags & BO_DONE)) { + rv = cv_timedwait(&bp->b_done, 20 * hz); if (rv == EWOULDBLOCK) break; } - splx(s); + mutex_exit(bp->b_objlock); if (rv == EWOULDBLOCK) { /* timed out */ @@ -1814,7 +1816,7 @@ fdformat(dev_t dev, struct ne7_fd_formb *finfo, struct proc *p) biodone(bp); } else if (bp->b_error != 0) rv = bp->b_error; - free(bp, M_TEMP); + putiobuf(bp); return (rv); } @@ -1943,14 +1945,13 @@ fd_read_md_image(size_t *sizep, void **addrp) bp->b_error = 0; bp->b_resid = 0; bp->b_proc = NULL; - bp->b_flags = B_BUSY | B_PHYS | B_RAW | B_READ; + bp->b_flags = B_PHYS | B_RAW | B_READ; + bp->b_cflags = BC_BUSY; bp->b_blkno = btodb(offset); bp->b_bcount = DEV_BSIZE; bp->b_data = addr; fdstrategy(bp); - while ((bp->b_flags & B_DONE) == 0) { - tsleep((void *)bp, PRIBIO + 1, "physio", 0); - } + biowait(bp); if (bp->b_error) panic("fd: mountroot: fdread error %d", bp->b_error); diff --git a/sys/arch/sun3/dev/xd.c b/sys/arch/sun3/dev/xd.c index b1fc811505de..3fee56a37b88 100644 --- a/sys/arch/sun3/dev/xd.c +++ b/sys/arch/sun3/dev/xd.c @@ -1,4 +1,4 @@ -/* $NetBSD: xd.c,v 1.60 2007/10/17 19:57:45 garbled Exp $ */ +/* $NetBSD: xd.c,v 1.61 2008/01/02 11:48:30 ad Exp $ */ /* * @@ -52,7 +52,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: xd.c,v 1.60 2007/10/17 19:57:45 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: xd.c,v 1.61 2008/01/02 11:48:30 ad Exp $"); #undef XDC_DEBUG /* full debug */ #define XDC_DIAG /* extra sanity checks */ @@ -309,8 +309,8 @@ xddummystrat(struct buf *bp) if (bp->b_bcount != XDFM_BPS) panic("xddummystrat"); memcpy(bp->b_data, xd_labeldata, XDFM_BPS); - bp->b_flags |= B_DONE; - bp->b_flags &= ~B_BUSY; + bp->b_oflags |= BO_DONE; + bp->b_cflags &= ~BC_BUSY; } int diff --git a/sys/arch/sun3/dev/xy.c b/sys/arch/sun3/dev/xy.c index df6e1b4c6410..32632712a4e9 100644 --- a/sys/arch/sun3/dev/xy.c +++ b/sys/arch/sun3/dev/xy.c @@ -1,4 +1,4 @@ -/* $NetBSD: xy.c,v 1.63 2007/10/17 19:57:45 garbled Exp $ */ +/* $NetBSD: xy.c,v 1.64 2008/01/02 11:48:30 ad Exp $ */ /* * @@ -52,7 +52,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: xy.c,v 1.63 2007/10/17 19:57:45 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: xy.c,v 1.64 2008/01/02 11:48:30 ad Exp $"); #undef XYC_DEBUG /* full debug */ #undef XYC_DIAG /* extra sanity checks */ @@ -247,8 +247,8 @@ xydummystrat(struct buf *bp) if (bp->b_bcount != XYFM_BPS) panic("xydummystrat"); memcpy(bp->b_data, xy_labeldata, XYFM_BPS); - bp->b_flags |= B_DONE; - bp->b_flags &= ~B_BUSY; + bp->b_oflags |= BO_DONE; + bp->b_cflags &= ~BC_BUSY; } int diff --git a/sys/arch/vax/uba/ts.c b/sys/arch/vax/uba/ts.c index 8cf1c5c48742..fef715aa293c 100644 --- a/sys/arch/vax/uba/ts.c +++ b/sys/arch/vax/uba/ts.c @@ -1,4 +1,4 @@ -/* $NetBSD: ts.c,v 1.34 2007/10/17 19:57:58 garbled Exp $ */ +/* $NetBSD: ts.c,v 1.35 2008/01/02 11:48:31 ad Exp $ */ /*- * Copyright (c) 1991 The Regents of the University of California. @@ -66,7 +66,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ts.c,v 1.34 2007/10/17 19:57:58 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ts.c,v 1.35 2008/01/02 11:48:31 ad Exp $"); #define TS11_COMPAT /* don't use extended features provided by TS05 */ @@ -371,27 +371,15 @@ tscommand (dev, cmd, count) int count; { register struct buf *bp; - register int s; trace (("tscommand (%d, %x, %d)\n", TS_UNIT(dev), cmd, count)); - s = splbio(); bp = &ts_cbuf[TS_UNIT(dev)]; - - while (bp->b_flags & B_BUSY) { - /* - * This special check is because B_BUSY never - * gets cleared in the non-waiting rewind case. ??? - */ - if (bp->b_bcount == 0 && (bp->b_flags & B_DONE)) - break; - bp->b_flags |= B_WANTED; - (void) tsleep(bp, PRIBIO, "tscmd", 0); - /* check MOT-flag !!! */ - } - bp->b_flags = B_BUSY | B_READ; - - splx(s); + mutex_enter(&bufcache_lock); + while (bbusy(bp) != 0) + ; + mutex_exit(&bufcache_lock); + bp->b_flags |= B_READ; /* * Load the buffer. The b_count field gets used to hold the command @@ -414,8 +402,10 @@ tscommand (dev, cmd, count) } debug (("tscommand: calling biowait ...\n")); biowait (bp); - if (bp->b_flags & B_WANTED) - wakeup ((void *)bp); + mutex_enter(&bufcache_lock); + bp->b_flags &= ~B_WANTED; + cv_broadcast(&bp->b_busy); + mutex_exit(&bufcache_lock); bp->b_error = 0; } diff --git a/sys/arch/vax/vax/cfl.c b/sys/arch/vax/vax/cfl.c index 852db1803bdb..fe854510b909 100644 --- a/sys/arch/vax/vax/cfl.c +++ b/sys/arch/vax/vax/cfl.c @@ -1,4 +1,4 @@ -/* $NetBSD: cfl.c,v 1.16 2007/10/17 19:57:59 garbled Exp $ */ +/* $NetBSD: cfl.c,v 1.17 2008/01/02 11:48:31 ad Exp $ */ /*- * Copyright (c) 1982, 1986 The Regents of the University of California. * All rights reserved. @@ -71,7 +71,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: cfl.c,v 1.16 2007/10/17 19:57:59 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cfl.c,v 1.17 2008/01/02 11:48:31 ad Exp $"); #include #include @@ -195,16 +195,17 @@ cflrw(dev, uio, flag) break; } if (uio->uio_rw == UIO_WRITE) { - bp->b_flags &= ~(B_READ|B_DONE); + bp->b_oflags &= ~(BO_DONE); + bp->b_flags &= ~(B_READ); bp->b_flags |= B_WRITE; } else { - bp->b_flags &= ~(B_WRITE|B_DONE); + bp->b_oflags &= ~(BO_DONE); + bp->b_flags &= ~(B_WRITE); bp->b_flags |= B_READ; } s = splconsmedia(); cflstart(); - while ((bp->b_flags & B_DONE) == 0) - (void) tsleep(bp, PRIBIO, "cflrw", 0); + biowait(bp); splx(s); if (bp->b_error != 0) { error = bp->b_error; @@ -284,7 +285,6 @@ void cflrint(int ch) { struct buf *bp = cfltab.cfl_buf; - int s; switch (cfltab.cfl_active) { case CFL_NEXT: @@ -292,10 +292,10 @@ cflrint(int ch) cfltab.cfl_active = CFL_GETIN; else { cfltab.cfl_active = CFL_IDLE; - s = splbio(); - bp->b_flags |= B_DONE; - splx(s); - wakeup(bp); + mutex_enter(bp->b_objlock); + bp->b_oflags |= BO_DONE; + cv_broadcast(&bp->b_done); + mutex_exit(bp->b_objlock); } break; @@ -303,10 +303,10 @@ cflrint(int ch) *cfltab.cfl_xaddr++ = ch & 0377; if (--bp->b_bcount==0) { cfltab.cfl_active = CFL_IDLE; - s = splbio(); - bp->b_flags |= B_DONE; - splx(s); - wakeup(bp); + mutex_enter(bp->b_objlock); + bp->b_oflags |= BO_DONE; + cv_broadcast(&bp->b_done); + mutex_exit(bp->b_objlock); } break; } diff --git a/sys/arch/vax/vax/crl.c b/sys/arch/vax/vax/crl.c index 02539cadaa2b..ce2bd24e330e 100644 --- a/sys/arch/vax/vax/crl.c +++ b/sys/arch/vax/vax/crl.c @@ -1,4 +1,4 @@ -/* $NetBSD: crl.c,v 1.23 2007/10/17 19:57:59 garbled Exp $ */ +/* $NetBSD: crl.c,v 1.24 2008/01/02 11:48:31 ad Exp $ */ /*- * Copyright (c) 1982, 1986 The Regents of the University of California. * All rights reserved. @@ -36,7 +36,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: crl.c,v 1.23 2007/10/17 19:57:59 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: crl.c,v 1.24 2008/01/02 11:48:31 ad Exp $"); #include #include @@ -150,16 +150,17 @@ crlrw(dev, uio, flag) break; } if (uio->uio_rw == UIO_WRITE) { - bp->b_flags &= ~(B_READ|B_DONE); + bp->b_oflags &= ~(BO_DONE); + bp->b_flags &= ~(B_READ); bp->b_flags |= B_WRITE; } else { - bp->b_flags &= ~(B_WRITE|B_DONE); + bp->b_oflags &= ~(BO_DONE); + bp->b_flags &= ~(B_WRITE); bp->b_flags |= B_READ; } s = splconsmedia(); crlstart(); - while ((bp->b_flags & B_DONE) == 0) - (void) tsleep(bp, PRIBIO, "crlrw", 0); + biowait(bp); splx(s); if (bp->b_error != 0) { error = bp->b_error; @@ -231,7 +232,7 @@ crlintr(arg) case CRL_F_READ: case CRL_F_WRITE: - bp->b_flags |= B_DONE; + bp->b_oflags |= BO_DONE; } crltab.crl_active = 0; wakeup((void *)bp); @@ -254,7 +255,7 @@ crlintr(arg) case CRL_S_ABORT: crltab.crl_active = CRL_F_RETSTS; mtpr(STXCS_IE | CRL_F_RETSTS, PR_STXCS); - bp->b_flags |= B_DONE; + bp->b_oflags |= BO_DONE; bp->b_error = EIO; break; @@ -266,9 +267,9 @@ crlintr(arg) case CRL_S_HNDSHK: printf("crl: hndshk error\n"); /* dump out some status too? */ crltab.crl_active = 0; - bp->b_flags |= B_DONE; + bp->b_oflags |= BO_DONE; bp->b_error = EIO; - wakeup((void *)bp); + cv_broadcast(&bp->b_done); break; case CRL_S_HWERR: diff --git a/sys/arch/vax/vax/disksubr.c b/sys/arch/vax/vax/disksubr.c index f043b89b7c12..88dabe37f660 100644 --- a/sys/arch/vax/vax/disksubr.c +++ b/sys/arch/vax/vax/disksubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: disksubr.c,v 1.45 2007/10/17 19:57:59 garbled Exp $ */ +/* $NetBSD: disksubr.c,v 1.46 2008/01/02 11:48:31 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1988 Regents of the University of California. @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.45 2007/10/17 19:57:59 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.46 2008/01/02 11:48:31 ad Exp $"); #include #include @@ -263,7 +263,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), goto done; dlp = (struct disklabel *)((char *)bp->b_data + LABELOFFSET); bcopy(lp, dlp, sizeof(struct disklabel)); - bp->b_flags &= ~(B_READ|B_DONE); + bp->b_oflags &= ~(BO_DONE); + bp->b_flags &= ~(B_READ); bp->b_flags |= B_WRITE; (*strat)(bp); error = biowait(bp); diff --git a/sys/arch/x68k/x68k/disksubr.c b/sys/arch/x68k/x68k/disksubr.c index 297fff1b72c0..cd3e959bab05 100644 --- a/sys/arch/x68k/x68k/disksubr.c +++ b/sys/arch/x68k/x68k/disksubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: disksubr.c,v 1.32 2007/10/17 19:58:04 garbled Exp $ */ +/* $NetBSD: disksubr.c,v 1.33 2008/01/02 11:48:32 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1988 Regents of the University of California. @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.32 2007/10/17 19:58:04 garbled Exp $"); +__KERNEL_RCSID(0, "$NetBSD: disksubr.c,v 1.33 2008/01/02 11:48:32 ad Exp $"); #include "opt_compat_netbsd.h" @@ -138,7 +138,7 @@ dodospart: labelsz = howmany(sizeof(struct cpu_disklabel), lp->d_secsize) * lp->d_secsize; bp->b_bcount = labelsz; /* to support < 512B/sector disks */ - bp->b_flags &= ~(B_DONE); + bp->b_oflags &= ~(BO_DONE); (*strat)(bp); /* if successful, wander through Human68k partition table */ @@ -213,7 +213,7 @@ dobadsect: i = 0; do { /* read a bad sector table */ - bp->b_flags &= ~(B_DONE); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_READ; bp->b_blkno = lp->d_secperunit - lp->d_nsectors + i; if (lp->d_secsize > DEF_BSIZE) @@ -349,7 +349,8 @@ writedisklabel(dev_t dev, void (*strat)(struct buf *), if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC && dkcksum(dlp) == 0) { *dlp = *lp; - bp->b_flags &= ~(B_READ|B_DONE); + bp->b_oflags &= ~(BO_DONE); + bp->b_flags &= ~(B_READ); bp->b_flags |= B_WRITE; (*strat)(bp); error = biowait(bp); @@ -368,7 +369,7 @@ dodospart: /* read the x68k disk magic */ bp->b_blkno = DOSBBSECTOR; bp->b_bcount = lp->d_secsize; - bp->b_flags &= ~(B_WRITE|B_DONE); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_READ; bp->b_cylinder = DOSBBSECTOR / lp->d_secpercyl; (*strat)(bp); @@ -381,7 +382,7 @@ dodospart: labelsz = howmany(sizeof(struct cpu_disklabel), lp->d_secsize) * lp->d_secsize; bp->b_bcount = labelsz; - bp->b_flags &= ~(B_WRITE|B_DONE); + bp->b_oflags &= ~(BO_DONE); bp->b_flags |= B_READ; bp->b_cylinder = DOSPARTOFF / lp->d_secpercyl; (*strat)(bp); @@ -441,7 +442,8 @@ dodospart: dp->dp_start = start; dp->dp_size = size; } - bp->b_flags &= ~(B_READ|B_DONE); + bp->b_oflags &= ~(BO_DONE); + bp->b_flags &= ~(B_READ); bp->b_flags |= B_WRITE; (*strat)(bp); error = biowait(bp); diff --git a/sys/arch/x86/x86/pmap.c b/sys/arch/x86/x86/pmap.c index c07f4a0f5ad5..fc57b5def559 100644 --- a/sys/arch/x86/x86/pmap.c +++ b/sys/arch/x86/x86/pmap.c @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.c,v 1.15 2007/12/20 23:46:11 ad Exp $ */ +/* $NetBSD: pmap.c,v 1.16 2008/01/02 11:48:33 ad Exp $ */ /* * Copyright (c) 2007 Manuel Bouyer. @@ -154,7 +154,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.15 2007/12/20 23:46:11 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.16 2008/01/02 11:48:33 ad Exp $"); #include "opt_user_ldt.h" #include "opt_lockdebug.h" @@ -364,20 +364,6 @@ long nbpd[] = NBPD_INITIALIZER; pd_entry_t *normal_pdes[] = PDES_INITIALIZER; pd_entry_t *alternate_pdes[] = APDES_INITIALIZER; -/* - * locking data structures. to enable the locks, changes from the - * 'vmlocking' cvs branch are required. for now, just stub them out. - */ - -#define rw_enter(a, b) /* nothing */ -#define rw_exit(a) /* nothing */ -#define mutex_enter(a) simple_lock(a) -#define mutex_exit(a) simple_unlock(a) -#define mutex_init(a, b, c) simple_lock_init(a) -#define mutex_owned(a) (1) -#define mutex_destroy(a) /* nothing */ -#define kmutex_t struct simplelock - static kmutex_t pmaps_lock; static krwlock_t pmap_main_lock; @@ -1986,8 +1972,6 @@ pmap_destroy(struct pmap *pmap) * remove it from global list of pmaps */ - KERNEL_LOCK(1, NULL); - mutex_enter(&pmaps_lock); LIST_REMOVE(pmap, pm_list); mutex_exit(&pmaps_lock); @@ -2025,8 +2009,6 @@ pmap_destroy(struct pmap *pmap) for (i = 0; i < PTP_LEVELS - 1; i++) mutex_destroy(&pmap->pm_obj[i].vmobjlock); pool_cache_put(&pmap_cache, pmap); - - KERNEL_UNLOCK_ONE(NULL); } /* diff --git a/sys/arch/xen/xen/xbdback.c b/sys/arch/xen/xen/xbdback.c index 6528902df9f8..6b7518812988 100644 --- a/sys/arch/xen/xen/xbdback.c +++ b/sys/arch/xen/xen/xbdback.c @@ -1,4 +1,4 @@ -/* $NetBSD: xbdback.c,v 1.28 2007/11/26 19:01:27 pooka Exp $ */ +/* $NetBSD: xbdback.c,v 1.29 2008/01/02 11:48:33 ad Exp $ */ /* * Copyright (c) 2005 Manuel Bouyer. @@ -853,6 +853,8 @@ xbdback_co_io_gotreq(struct xbdback_instance *xbdi, void *obj) static void * xbdback_co_io_loop(struct xbdback_instance *xbdi, void *obj) { + struct xbdback_io *xio; + (void)obj; if (xbdi->segno < xbdi->xen_req->nr_segments) { unsigned long this_fas, last_fas; @@ -898,7 +900,9 @@ xbdback_co_io_loop(struct xbdback_instance *xbdi, void *obj) if (xbdi->io == NULL) { xbdi->cont = xbdback_co_io_gotio; - return xbdback_pool_get(&xbdback_io_pool, xbdi); + xio = xbdback_pool_get(&xbdback_io_pool, xbdi); + buf_init(&xio->xio_buf); + return xio; } else { xbdi->cont = xbdback_co_io_gotio2; } @@ -929,16 +933,18 @@ xbdback_co_io_gotio(struct xbdback_instance *xbdi, void *obj) start_offset = blkif_first_sect(xbdi->this_fas) * VBD_BSIZE; if (xbdi->xen_req->operation == BLKIF_OP_WRITE) { - buf_flags = B_WRITE | B_CALL; + buf_flags = B_WRITE; } else { - buf_flags = B_READ | B_CALL; + buf_flags = B_READ; } - BUF_INIT(&xbd_io->xio_buf); xbd_io->xio_buf.b_flags = buf_flags; + xbd_io->xio_buf.b_cflags = 0; + xbd_io->xio_buf.b_oflags = 0; xbd_io->xio_buf.b_iodone = xbdback_iodone; xbd_io->xio_buf.b_proc = NULL; xbd_io->xio_buf.b_vp = xbdi->req_vbd->vp; + xbd_io->xio_buf.b_objlock = &xbdi->req_vbd->vp->v_interlock; xbd_io->xio_buf.b_dev = xbdi->req_vbd->dev; xbd_io->xio_buf.b_blkno = xbdi->next_sector; xbd_io->xio_buf.b_bcount = 0; @@ -1133,12 +1139,14 @@ xbdback_iodone(struct buf *bp) xbdback_pool_put(&xbdback_request_pool, xbd_req); } xbdi_put(xbdi); + buf_destroy(&xbd_io->xio_buf); xbdback_pool_put(&xbdback_io_pool, xbd_io); } static void * xbdback_co_probe(struct xbdback_instance *xbdi, void *obj) { + struct xbdback_io *xio; (void)obj; /* * There should be only one page in the request. Map it and store @@ -1153,7 +1161,9 @@ xbdback_co_probe(struct xbdback_instance *xbdi, void *obj) return xbdi; } xbdi->cont = xbdback_co_probe_gotio; - return xbdback_pool_get(&xbdback_io_pool, xbdi); + xio = xbdback_pool_get(&xbdback_io_pool, xbdi); + buf_init(&xio->xio_buf); + return xio; } static void * @@ -1202,6 +1212,7 @@ xbdback_co_probe_gotvm(struct xbdback_instance *xbdi, void *obj) xbdback_unmap_shm(xbdi->io); XENPRINTF(("xbdback_probe: nreplies=%d\n", i)); xbdback_send_reply(xbdi, req->id, req->operation, i); + buf_destroy(&xbdi->io->xio_buf); xbdback_pool_put(&xbdback_io_pool, xbdi->io); xbdi->io = NULL; xbdi->cont = xbdback_co_main_incr; diff --git a/sys/arch/xen/xen/xbdback_xenbus.c b/sys/arch/xen/xen/xbdback_xenbus.c index 394923b16fdd..f0d06b36fa0f 100644 --- a/sys/arch/xen/xen/xbdback_xenbus.c +++ b/sys/arch/xen/xen/xbdback_xenbus.c @@ -1,4 +1,4 @@ -/* $NetBSD: xbdback_xenbus.c,v 1.11 2007/11/26 19:01:28 pooka Exp $ */ +/* $NetBSD: xbdback_xenbus.c,v 1.12 2008/01/02 11:48:33 ad Exp $ */ /* * Copyright (c) 2006 Manuel Bouyer. @@ -953,6 +953,8 @@ xbdback_co_io_gotreq(struct xbdback_instance *xbdi, void *obj) static void * xbdback_co_io_loop(struct xbdback_instance *xbdi, void *obj) { + struct xbdback_io *xio; + (void)obj; if (xbdi->xbdi_segno < xbdi->xbdi_xen_req.nr_segments) { uint8_t this_fs, this_ls, last_fs, last_ls; @@ -1014,7 +1016,8 @@ xbdback_co_io_loop(struct xbdback_instance *xbdi, void *obj) if (xbdi->xbdi_io == NULL) { xbdi->xbdi_cont = xbdback_co_io_gotio; - return xbdback_pool_get(&xbdback_io_pool, xbdi); + xio = xbdback_pool_get(&xbdback_io_pool, xbdi); + buf_init(&xio->xio_buf); } else { xbdi->xbdi_cont = xbdback_co_io_gotio2; } @@ -1045,16 +1048,18 @@ xbdback_co_io_gotio(struct xbdback_instance *xbdi, void *obj) start_offset = xbdi->xbdi_this_fs * VBD_BSIZE; if (xbdi->xbdi_xen_req.operation == BLKIF_OP_WRITE) { - buf_flags = B_WRITE | B_CALL; + buf_flags = B_WRITE; } else { - buf_flags = B_READ | B_CALL; + buf_flags = B_READ; } - BUF_INIT(&xbd_io->xio_buf); xbd_io->xio_buf.b_flags = buf_flags; + xbd_io->xio_buf.b_cflags = 0; + xbd_io->xio_buf.b_oflags = 0; xbd_io->xio_buf.b_iodone = xbdback_iodone; xbd_io->xio_buf.b_proc = NULL; xbd_io->xio_buf.b_vp = xbdi->xbdi_vp; + xbd_io->xio_buf.b_objlock = &xbdi->xbdi_vp->v_interlock; xbd_io->xio_buf.b_dev = xbdi->xbdi_dev; xbd_io->xio_buf.b_blkno = xbdi->xbdi_next_sector; xbd_io->xio_buf.b_bcount = 0; @@ -1249,6 +1254,7 @@ xbdback_iodone(struct buf *bp) xbdback_pool_put(&xbdback_request_pool, xbd_req); } xbdi_put(xbdi); + buf_destroy(&xbd_io->xio_buf); xbdback_pool_put(&xbdback_io_pool, xbd_io); } diff --git a/sys/coda/coda_vnops.c b/sys/coda/coda_vnops.c index 77ef9b11c108..a637338a523f 100644 --- a/sys/coda/coda_vnops.c +++ b/sys/coda/coda_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: coda_vnops.c,v 1.65 2007/12/25 18:33:35 perry Exp $ */ +/* $NetBSD: coda_vnops.c,v 1.66 2008/01/02 11:48:34 ad Exp $ */ /* * @@ -46,7 +46,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: coda_vnops.c,v 1.65 2007/12/25 18:33:35 perry Exp $"); +__KERNEL_RCSID(0, "$NetBSD: coda_vnops.c,v 1.66 2008/01/02 11:48:34 ad Exp $"); #include #include @@ -872,7 +872,7 @@ coda_inactive(void *v) printf("coda_inactive: %p ovp != NULL\n", vp); } VOP_UNLOCK(vp, 0); - vgone(vp); + *ap->a_recycle = true; } MARK_INT_SAT(CODA_INACTIVE_STATS); @@ -2002,7 +2002,7 @@ coda_getpages(void *v) /* Check for control object. */ if (IS_CTL_VP(vp)) { printf("coda_getpages: control object %p\n", vp); - simple_unlock(&vp->v_uobj.vmobjlock); + mutex_exit(&vp->v_uobj.vmobjlock); return(EINVAL); } @@ -2017,7 +2017,7 @@ coda_getpages(void *v) waslocked = VOP_ISLOCKED(vp); /* Drop the vmobject lock. */ - simple_unlock(&vp->v_uobj.vmobjlock); + mutex_exit(&vp->v_uobj.vmobjlock); /* Get container file if not already present. */ if (cp->c_ovp == NULL) { @@ -2065,7 +2065,7 @@ coda_getpages(void *v) ap->a_vp = cp->c_ovp; /* Get the lock on the container vnode, and call getpages on it. */ - simple_lock(&ap->a_vp->v_uobj.vmobjlock); + mutex_enter(&ap->a_vp->v_uobj.vmobjlock); error = VCALL(ap->a_vp, VOFFSET(vop_getpages), ap); /* If we opened the vnode, we must close it. */ @@ -2106,7 +2106,7 @@ coda_putpages(void *v) int error; /* Drop the vmobject lock. */ - simple_unlock(&vp->v_uobj.vmobjlock); + mutex_exit(&vp->v_uobj.vmobjlock); /* Check for control object. */ if (IS_CTL_VP(vp)) { @@ -2127,7 +2127,7 @@ coda_putpages(void *v) ap->a_vp = cp->c_ovp; /* Get the lock on the container vnode, and call putpages on it. */ - simple_lock(&ap->a_vp->v_uobj.vmobjlock); + mutex_enter(&ap->a_vp->v_uobj.vmobjlock); error = VCALL(ap->a_vp, VOFFSET(vop_putpages), ap); return error; diff --git a/sys/compat/svr4/svr4_fcntl.c b/sys/compat/svr4/svr4_fcntl.c index 73a15a07fefa..324846a43925 100644 --- a/sys/compat/svr4/svr4_fcntl.c +++ b/sys/compat/svr4/svr4_fcntl.c @@ -1,4 +1,4 @@ -/* $NetBSD: svr4_fcntl.c,v 1.64 2007/12/20 23:03:04 dsl Exp $ */ +/* $NetBSD: svr4_fcntl.c,v 1.65 2008/01/02 11:48:35 ad Exp $ */ /*- * Copyright (c) 1994, 1997 The NetBSD Foundation, Inc. @@ -37,7 +37,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: svr4_fcntl.c,v 1.64 2007/12/20 23:03:04 dsl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: svr4_fcntl.c,v 1.65 2008/01/02 11:48:35 ad Exp $"); #include #include @@ -253,9 +253,9 @@ fd_revoke(struct lwp *l, int fd, register_t *retval) KAUTH_GENERIC_ISSUSER, NULL)) != 0) goto out; - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); revoke = (vp->v_usecount > 1 || (vp->v_iflag & VI_ALIASED)); - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); if (revoke) VOP_REVOKE(vp, REVOKEALL); out: diff --git a/sys/dev/ata/ata_raid.c b/sys/dev/ata/ata_raid.c index 7049f2888161..2d30b5d7116a 100644 --- a/sys/dev/ata/ata_raid.c +++ b/sys/dev/ata/ata_raid.c @@ -1,4 +1,4 @@ -/* $NetBSD: ata_raid.c,v 1.23 2007/07/09 21:00:30 ad Exp $ */ +/* $NetBSD: ata_raid.c,v 1.24 2008/01/02 11:48:36 ad Exp $ */ /* * Copyright (c) 2003 Wasabi Systems, Inc. @@ -40,7 +40,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ata_raid.c,v 1.23 2007/07/09 21:00:30 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ata_raid.c,v 1.24 2008/01/02 11:48:36 ad Exp $"); #include #include @@ -294,8 +294,7 @@ ata_raid_config_block_rw(struct vnode *vp, daddr_t blkno, void *tbuf, struct buf *bp; int error; - bp = getiobuf(); - bp->b_vp = vp; + bp = getiobuf(vp, NULL); bp->b_blkno = blkno; bp->b_bcount = bp->b_resid = size; bp->b_flags = bflags; diff --git a/sys/dev/ata/ld_ataraid.c b/sys/dev/ata/ld_ataraid.c index e44b68882fe4..21a88d86e959 100644 --- a/sys/dev/ata/ld_ataraid.c +++ b/sys/dev/ata/ld_ataraid.c @@ -1,4 +1,4 @@ -/* $NetBSD: ld_ataraid.c,v 1.22 2007/11/26 19:01:36 pooka Exp $ */ +/* $NetBSD: ld_ataraid.c,v 1.23 2008/01/02 11:48:37 ad Exp $ */ /* * Copyright (c) 2003 Wasabi Systems, Inc. @@ -45,7 +45,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ld_ataraid.c,v 1.22 2007/11/26 19:01:36 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ld_ataraid.c,v 1.23 2008/01/02 11:48:37 ad Exp $"); #include "rnd.h" @@ -246,8 +246,10 @@ ld_ataraid_make_cbuf(struct ld_ataraid_softc *sc, struct buf *bp, cbp = CBUF_GET(); if (cbp == NULL) return (NULL); - BUF_INIT(&cbp->cb_buf); - cbp->cb_buf.b_flags = bp->b_flags | B_CALL; + buf_init(&cbp->cb_buf); + cbp->cb_buf.b_flags = bp->b_flags; + cbp->cb_buf.b_oflags = bp->b_oflags; + cbp->cb_buf.b_cflags = bp->b_cflags; cbp->cb_buf.b_iodone = sc->sc_iodone; cbp->cb_buf.b_proc = bp->b_proc; cbp->cb_buf.b_vp = sc->sc_vnodes[comp]; @@ -303,6 +305,7 @@ ld_ataraid_start_span(struct ld_softc *ld, struct buf *bp) /* Free the already allocated component buffers. */ while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) { SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q); + buf_destroy(&cbp->cb_buf); CBUF_PUT(cbp); } return (EAGAIN); @@ -400,6 +403,7 @@ free_and_exit: /* Free the already allocated component buffers. */ while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) { SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q); + buf_destroy(&cbp->cb_buf); CBUF_PUT(cbp); } return (error); diff --git a/sys/dev/ata/wd.c b/sys/dev/ata/wd.c index dcdc9855a6f3..2f4acf51a015 100644 --- a/sys/dev/ata/wd.c +++ b/sys/dev/ata/wd.c @@ -1,4 +1,4 @@ -/* $NetBSD: wd.c,v 1.354 2007/12/18 15:30:40 joerg Exp $ */ +/* $NetBSD: wd.c,v 1.355 2008/01/02 11:48:37 ad Exp $ */ /* * Copyright (c) 1998, 2001 Manuel Bouyer. All rights reserved. @@ -66,7 +66,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: wd.c,v 1.354 2007/12/18 15:30:40 joerg Exp $"); +__KERNEL_RCSID(0, "$NetBSD: wd.c,v 1.355 2008/01/02 11:48:37 ad Exp $"); #include "opt_ata.h" @@ -670,7 +670,9 @@ wd_split_mod15_write(struct buf *bp) * Advance the pointer to the second half and issue that command * using the same opening. */ - bp->b_flags = obp->b_flags | B_CALL; + bp->b_flags = obp->b_flags; + bp->b_oflags = obp->b_oflags; + bp->b_cflags = obp->b_cflags; bp->b_data = (char *)bp->b_data + bp->b_bcount; bp->b_blkno += (bp->b_bcount / 512); bp->b_rawblkno += (bp->b_bcount / 512); @@ -705,7 +707,7 @@ __wdstart(struct wd_softc *wd, struct buf *bp) struct buf *nbp; /* already at splbio */ - nbp = getiobuf_nowait(); + nbp = getiobuf(NULL, false); if (__predict_false(nbp == NULL)) { /* No memory -- fail the iop. */ bp->b_error = ENOMEM; @@ -717,7 +719,6 @@ __wdstart(struct wd_softc *wd, struct buf *bp) nbp->b_error = 0; nbp->b_proc = bp->b_proc; - nbp->b_vp = NULLVP; nbp->b_dev = bp->b_dev; nbp->b_bcount = bp->b_bcount / 2; @@ -727,7 +728,9 @@ __wdstart(struct wd_softc *wd, struct buf *bp) nbp->b_blkno = bp->b_blkno; nbp->b_rawblkno = bp->b_rawblkno; - nbp->b_flags = bp->b_flags | B_CALL; + nbp->b_flags = bp->b_flags; + nbp->b_oflags = bp->b_oflags; + nbp->b_cflags = bp->b_cflags; nbp->b_iodone = wd_split_mod15_write; /* Put ptr to orig buf in b_private and use new buf */ @@ -881,8 +884,7 @@ noerror: if ((wd->sc_wdc_bio.flags & ATA_CORR) || wd->retries > 0) rnd_add_uint32(&wd->rnd_source, bp->b_blkno); #endif /* XXX Yuck, but we don't want to increment openings in this case */ - if (__predict_false((bp->b_flags & B_CALL) != 0 && - bp->b_iodone == wd_split_mod15_write)) + if (__predict_false(bp->b_iodone == wd_split_mod15_write)) biodone(bp); else { biodone(bp); @@ -1957,7 +1959,7 @@ wi_get(void) int s; wi = malloc(sizeof(struct wd_ioctl), M_TEMP, M_WAITOK|M_ZERO); - simple_lock_init(&wi->wi_bp.b_interlock); + buf_init(&wi->wi_bp); s = splbio(); LIST_INSERT_HEAD(&wi_head, wi, wi_list); splx(s); @@ -1976,6 +1978,7 @@ wi_free(struct wd_ioctl *wi) s = splbio(); LIST_REMOVE(wi, wi_list); splx(s); + buf_destroy(&wi->wi_bp); free(wi, M_TEMP); } @@ -2031,7 +2034,7 @@ wdioctlstrategy(struct buf *bp) printf("wdioctlstrategy: " "No matching ioctl request found in queue\n"); error = EINVAL; - goto done; + goto bad; } memset(&ata_c, 0, sizeof(ata_c)); @@ -2043,7 +2046,7 @@ wdioctlstrategy(struct buf *bp) if (bp->b_bcount != wi->wi_atareq.datalen) { printf("physio split wd ioctl request... cannot proceed\n"); error = EIO; - goto done; + goto bad; } /* @@ -2055,7 +2058,7 @@ wdioctlstrategy(struct buf *bp) (bp->b_bcount / wi->wi_softc->sc_dk.dk_label->d_secsize) >= (1 << NBBY)) { error = EINVAL; - goto done; + goto bad; } /* @@ -2064,7 +2067,7 @@ wdioctlstrategy(struct buf *bp) if (wi->wi_atareq.timeout == 0) { error = EINVAL; - goto done; + goto bad; } if (wi->wi_atareq.flags & ATACMD_READ) @@ -2092,8 +2095,7 @@ wdioctlstrategy(struct buf *bp) if (wi->wi_softc->atabus->ata_exec_command(wi->wi_softc->drvp, &ata_c) != ATACMD_COMPLETE) { wi->wi_atareq.retsts = ATACMD_ERROR; - error = EIO; - goto done; + goto bad; } if (ata_c.flags & (AT_ERROR | AT_TIMEOU | AT_DF)) { @@ -2116,7 +2118,10 @@ wdioctlstrategy(struct buf *bp) } } -done: + bp->b_error = 0; + biodone(bp); + return; +bad: bp->b_error = error; biodone(bp); } diff --git a/sys/dev/ccd.c b/sys/dev/ccd.c index 0088ab2163ba..cc88bd4921f4 100644 --- a/sys/dev/ccd.c +++ b/sys/dev/ccd.c @@ -1,4 +1,4 @@ -/* $NetBSD: ccd.c,v 1.125 2007/12/05 07:06:50 ad Exp $ */ +/* $NetBSD: ccd.c,v 1.126 2008/01/02 11:48:36 ad Exp $ */ /*- * Copyright (c) 1996, 1997, 1998, 1999, 2007 The NetBSD Foundation, Inc. @@ -125,7 +125,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ccd.c,v 1.125 2007/12/05 07:06:50 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ccd.c,v 1.126 2008/01/02 11:48:36 ad Exp $"); #include #include @@ -837,8 +837,10 @@ ccdbuffer(struct ccd_softc *cs, struct buf *bp, daddr_t bn, void *addr, cbp = CCD_GETBUF(); if (cbp == NULL) return (NULL); - BUF_INIT(&cbp->cb_buf); - cbp->cb_buf.b_flags = bp->b_flags | B_CALL; + buf_init(&cbp->cb_buf); + cbp->cb_buf.b_flags = bp->b_flags; + cbp->cb_buf.b_oflags = bp->b_oflags; + cbp->cb_buf.b_cflags = bp->b_cflags; cbp->cb_buf.b_iodone = ccdiodone; cbp->cb_buf.b_proc = bp->b_proc; cbp->cb_buf.b_dev = ci->ci_dev; @@ -924,6 +926,7 @@ ccdiodone(struct buf *vbp) cs->sc_xname, bp->b_error, cbp->cb_comp); } count = cbp->cb_buf.b_bcount; + buf_destroy(&cbp->cb_buf); CCD_PUTBUF(cbp); /* diff --git a/sys/dev/cgd.c b/sys/dev/cgd.c index 680b8c0adc80..bff5c17e45fc 100644 --- a/sys/dev/cgd.c +++ b/sys/dev/cgd.c @@ -1,4 +1,4 @@ -/* $NetBSD: cgd.c,v 1.48 2007/11/26 19:01:34 pooka Exp $ */ +/* $NetBSD: cgd.c,v 1.49 2008/01/02 11:48:36 ad Exp $ */ /*- * Copyright (c) 2002 The NetBSD Foundation, Inc. @@ -37,7 +37,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: cgd.c,v 1.48 2007/11/26 19:01:34 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cgd.c,v 1.49 2008/01/02 11:48:36 ad Exp $"); #include #include @@ -295,6 +295,7 @@ cgdstart(struct dk_softc *dksc, struct buf *bp) void * addr; void * newaddr; daddr_t bn; + struct vnode *vp; DPRINTF_FOLLOW(("cgdstart(%p, %p)\n", dksc, bp)); disk_busy(&dksc->sc_dkdev); /* XXX: put in dksubr.c */ @@ -306,7 +307,7 @@ cgdstart(struct dk_softc *dksc, struct buf *bp) * we can fail quickly if they are unavailable. */ - nbp = getiobuf_nowait(); + nbp = getiobuf(cs->sc_tvn, false); if (nbp == NULL) { disk_unbusy(&dksc->sc_dkdev, 0, (bp->b_flags & B_READ)); return -1; @@ -330,18 +331,22 @@ cgdstart(struct dk_softc *dksc, struct buf *bp) } nbp->b_data = newaddr; - nbp->b_flags = bp->b_flags | B_CALL; + nbp->b_flags = bp->b_flags; + nbp->b_oflags = bp->b_oflags; + nbp->b_cflags = bp->b_cflags; nbp->b_iodone = cgdiodone; nbp->b_proc = bp->b_proc; nbp->b_blkno = bn; - nbp->b_vp = cs->sc_tvn; nbp->b_bcount = bp->b_bcount; nbp->b_private = bp; BIO_COPYPRIO(nbp, bp); if ((nbp->b_flags & B_READ) == 0) { - V_INCR_NUMOUTPUT(nbp->b_vp); + vp = nbp->b_vp; + mutex_enter(&vp->v_interlock); + vp->v_numoutput++; + mutex_exit(&vp->v_interlock); } VOP_STRATEGY(cs->sc_tvn, nbp); return 0; diff --git a/sys/dev/dkwedge/dk.c b/sys/dev/dkwedge/dk.c index 932682a9b5d8..9bb55d29a267 100644 --- a/sys/dev/dkwedge/dk.c +++ b/sys/dev/dkwedge/dk.c @@ -1,4 +1,4 @@ -/* $NetBSD: dk.c,v 1.31 2007/12/09 20:27:56 jmcneill Exp $ */ +/* $NetBSD: dk.c,v 1.32 2008/01/02 11:48:37 ad Exp $ */ /*- * Copyright (c) 2004, 2005, 2006, 2007 The NetBSD Foundation, Inc. @@ -37,7 +37,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: dk.c,v 1.31 2007/12/09 20:27:56 jmcneill Exp $"); +__KERNEL_RCSID(0, "$NetBSD: dk.c,v 1.32 2008/01/02 11:48:37 ad Exp $"); #include "opt_dkwedge.h" @@ -856,7 +856,7 @@ dkwedge_read(struct disk *pdk, struct vnode *vp, daddr_t blkno, { struct buf b; - BUF_INIT(&b); + buf_init(&b); b.b_vp = vp; b.b_dev = vp->v_rdev; @@ -1037,6 +1037,7 @@ dkstrategy(struct buf *bp) static void dkstart(struct dkwedge_softc *sc) { + struct vnode *vp; struct buf *bp, *nbp; /* Do as much work as has been enqueued. */ @@ -1056,7 +1057,7 @@ dkstart(struct dkwedge_softc *sc) /* Instrumentation. */ disk_busy(&sc->sc_dk); - nbp = getiobuf_nowait(); + nbp = getiobuf(sc->sc_parent->dk_rawvp, false); if (nbp == NULL) { /* * No resources to run this request; leave the @@ -1070,21 +1071,25 @@ dkstart(struct dkwedge_softc *sc) (void) BUFQ_GET(sc->sc_bufq); - BUF_INIT(nbp); nbp->b_data = bp->b_data; - nbp->b_flags = bp->b_flags | B_CALL; + nbp->b_flags = bp->b_flags; + nbp->b_oflags = bp->b_oflags; + nbp->b_cflags = bp->b_cflags; nbp->b_iodone = dkiodone; nbp->b_proc = bp->b_proc; nbp->b_blkno = bp->b_rawblkno; nbp->b_dev = sc->sc_parent->dk_rawvp->v_rdev; - nbp->b_vp = sc->sc_parent->dk_rawvp; nbp->b_bcount = bp->b_bcount; nbp->b_private = bp; BIO_COPYPRIO(nbp, bp); - if ((nbp->b_flags & B_READ) == 0) - V_INCR_NUMOUTPUT(nbp->b_vp); - VOP_STRATEGY(nbp->b_vp, nbp); + vp = nbp->b_vp; + if ((nbp->b_flags & B_READ) == 0) { + mutex_enter(&vp->v_interlock); + vp->v_numoutput++; + mutex_exit(&vp->v_interlock); + } + VOP_STRATEGY(vp, nbp); } } diff --git a/sys/dev/fss.c b/sys/dev/fss.c index 15b71fee8a5d..b432ad8aa6c1 100644 --- a/sys/dev/fss.c +++ b/sys/dev/fss.c @@ -1,4 +1,4 @@ -/* $NetBSD: fss.c,v 1.41 2007/12/08 19:29:41 pooka Exp $ */ +/* $NetBSD: fss.c,v 1.42 2008/01/02 11:48:36 ad Exp $ */ /*- * Copyright (c) 2003 The NetBSD Foundation, Inc. @@ -43,7 +43,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: fss.c,v 1.41 2007/12/08 19:29:41 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: fss.c,v 1.42 2008/01/02 11:48:36 ad Exp $"); #include "fss.h" @@ -895,8 +895,8 @@ restart: if (len > MAXPHYS) len = MAXPHYS; - bp = getiobuf(); - bp->b_flags = B_READ|B_CALL; + bp = getiobuf(NULL, true); + bp->b_flags = B_READ; bp->b_bcount = len; bp->b_bufsize = bp->b_bcount; bp->b_error = 0; @@ -904,7 +904,6 @@ restart: bp->b_blkno = dblk; bp->b_proc = NULL; bp->b_dev = sc->sc_bdev; - bp->b_vp = NULLVP; bp->b_private = scp; bp->b_iodone = fss_cluster_iodone; @@ -952,7 +951,7 @@ fss_bs_io(struct fss_softc *sc, fss_io_type rw, data, len, off, UIO_SYSSPACE, IO_UNIT|IO_NODELOCKED, sc->sc_bs_lwp->l_cred, NULL, NULL); if (error == 0) { - simple_lock(&sc->sc_bs_vp->v_interlock); + mutex_enter(&sc->sc_bs_vp->v_interlock); error = VOP_PUTPAGES(sc->sc_bs_vp, trunc_page(off), round_page(off+len), PGO_CLEANIT|PGO_SYNCIO|PGO_FREE); } @@ -1019,7 +1018,7 @@ fss_bs_thread(void *arg) scl = sc->sc_cache+sc->sc_cache_size; - nbp = getiobuf(); + nbp = getiobuf(NULL, true); nfreed = nio = 1; /* Dont sleep the first time */ @@ -1148,7 +1147,7 @@ fss_bs_thread(void *arg) FSS_UNLOCK(sc, s); - BUF_INIT(nbp); + buf_init(nbp); nbp->b_flags = B_READ; nbp->b_bcount = bp->b_bcount; nbp->b_bufsize = bp->b_bcount; @@ -1157,7 +1156,6 @@ fss_bs_thread(void *arg) nbp->b_blkno = bp->b_blkno; nbp->b_proc = bp->b_proc; nbp->b_dev = sc->sc_bdev; - nbp->b_vp = NULLVP; bdev_strategy(nbp); diff --git a/sys/dev/gpib/ct.c b/sys/dev/gpib/ct.c index bcab573fd047..fdc550e55b68 100644 --- a/sys/dev/gpib/ct.c +++ b/sys/dev/gpib/ct.c @@ -1,4 +1,4 @@ -/* $NetBSD: ct.c,v 1.12 2007/10/08 20:12:06 ad Exp $ */ +/* $NetBSD: ct.c,v 1.13 2008/01/02 11:48:37 ad Exp $ */ /*- * Copyright (c) 1996-2003 The NetBSD Foundation, Inc. @@ -128,7 +128,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ct.c,v 1.12 2007/10/08 20:12:06 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ct.c,v 1.13 2008/01/02 11:48:37 ad Exp $"); #include #include @@ -475,6 +475,7 @@ ctcommand(dev, cmd, cnt) sc->sc_bp = bp; sc->sc_cmd = cmd; bp->b_dev = dev; + bp->b_objlock = &buffer_lock; if (cmd == MTFSF) { nbp = (struct buf *)geteblk(MAXBSIZE); bp->b_data = nbp->b_data; @@ -482,7 +483,9 @@ ctcommand(dev, cmd, cnt) } while (cnt-- > 0) { - bp->b_flags = B_BUSY; + bp->b_flags = 0; + bp->b_cflags = BC_BUSY; + bp->b_oflags = 0; if (cmd == MTBSF) { sc->sc_blkno = sc->sc_eofs[sc->sc_eofp]; sc->sc_eofp--; diff --git a/sys/dev/gpib/mt.c b/sys/dev/gpib/mt.c index 026e1531c0a1..a071717fafae 100644 --- a/sys/dev/gpib/mt.c +++ b/sys/dev/gpib/mt.c @@ -1,4 +1,4 @@ -/* $NetBSD: mt.c,v 1.11 2007/07/29 12:15:43 ad Exp $ */ +/* $NetBSD: mt.c,v 1.12 2008/01/02 11:48:37 ad Exp $ */ /*- * Copyright (c) 1996-2003 The NetBSD Foundation, Inc. @@ -121,7 +121,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: mt.c,v 1.11 2007/07/29 12:15:43 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: mt.c,v 1.12 2008/01/02 11:48:37 ad Exp $"); #include #include @@ -515,13 +515,16 @@ mtcommand(dev, cmd, cnt) sc = device_lookup(&mt_cd, MTUNIT(dev)); bp = &sc->sc_bufstore; - if (bp->b_flags & B_BUSY) + if (bp->b_cflags & BC_BUSY) return (EBUSY); bp->b_cmd = cmd; bp->b_dev = dev; + bp->b_objlock = &buffer_lock; do { - bp->b_flags = B_BUSY | B_CMD; + bp->b_cflags = BC_BUSY; + bp->b_flags = B_CMD; + bp->b_oflags = 0; mtstrategy(bp); biowait(bp); if (bp->b_error != 0) { @@ -530,9 +533,9 @@ mtcommand(dev, cmd, cnt) } } while (--cnt > 0); #if 0 - bp->b_flags = 0 /*&= ~B_BUSY*/; + bp->b_cflags = 0 /*&= ~BC_BUSY*/; #else - bp->b_flags &= ~B_BUSY; + bp->b_cflags &= ~BC_BUSY; #endif return (error); } diff --git a/sys/dev/isa/fd.c b/sys/dev/isa/fd.c index 35a00734a580..86e351b64685 100644 --- a/sys/dev/isa/fd.c +++ b/sys/dev/isa/fd.c @@ -1,4 +1,4 @@ -/* $NetBSD: fd.c,v 1.77 2007/10/19 12:00:16 ad Exp $ */ +/* $NetBSD: fd.c,v 1.78 2008/01/02 11:48:37 ad Exp $ */ /*- * Copyright (c) 1998, 2003 The NetBSD Foundation, Inc. @@ -88,7 +88,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: fd.c,v 1.77 2007/10/19 12:00:16 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: fd.c,v 1.78 2008/01/02 11:48:37 ad Exp $"); #include "rnd.h" #include "opt_ddb.h" @@ -1518,12 +1518,12 @@ fdformat(dev, finfo, l) struct buf *bp; /* set up a buffer header for fdstrategy() */ - bp = getiobuf_nowait(); + bp = getiobuf(NULL, false); if (bp == NULL) return ENOBUFS; - bp->b_vp = NULL; - bp->b_flags = B_BUSY | B_PHYS | B_FORMAT; + bp->b_cflags = BC_BUSY; + bp->b_flags = B_PHYS | B_FORMAT; bp->b_proc = l->l_proc; bp->b_dev = dev; diff --git a/sys/dev/qbus/ts.c b/sys/dev/qbus/ts.c index 32fcbdaec5fd..19320ad545c4 100644 --- a/sys/dev/qbus/ts.c +++ b/sys/dev/qbus/ts.c @@ -1,4 +1,4 @@ -/* $NetBSD: ts.c,v 1.21 2007/10/19 12:01:09 ad Exp $ */ +/* $NetBSD: ts.c,v 1.22 2008/01/02 11:48:38 ad Exp $ */ /*- * Copyright (c) 1991 The Regents of the University of California. @@ -66,7 +66,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ts.c,v 1.21 2007/10/19 12:01:09 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ts.c,v 1.22 2008/01/02 11:48:38 ad Exp $"); #undef TSDEBUG @@ -321,7 +321,6 @@ void tscommand(struct ts_softc *sc, dev_t dev, int cmd, int count) { struct buf *bp; - int s; #ifdef TSDEBUG printf("tscommand (%x, %d)\n", cmd, count); @@ -329,20 +328,19 @@ tscommand(struct ts_softc *sc, dev_t dev, int cmd, int count) bp = &sc->ts_cbuf; - s = splbio(); - while (bp->b_flags & B_BUSY) { + mutex_enter(&bufcache_lock); + while (bp->b_cflags & BC_BUSY) { /* - * This special check is because B_BUSY never + * This special check is because BC_BUSY never * gets cleared in the non-waiting rewind case. ??? */ - if (bp->b_bcount == 0 && (bp->b_flags & B_DONE)) + if (bp->b_bcount == 0 && (bp->b_oflags & BO_DONE)) break; - bp->b_flags |= B_WANTED; - (void) tsleep(bp, PRIBIO, "tscmd", 0); + (void )bbusy(bp, false, 0); /* check MOT-flag !!! */ } - bp->b_flags = B_BUSY | B_READ; - splx(s); + bp->b_flags = B_READ; + mutex_exit(&bufcache_lock); /* * Load the buffer. The b_count field gets used to hold the command @@ -354,6 +352,8 @@ tscommand(struct ts_softc *sc, dev_t dev, int cmd, int count) bp->b_bcount = count; bp->b_resid = cmd; bp->b_blkno = 0; + bp->b_oflags = 0; + bp->b_objlock = &buffer_lock; tsstrategy(bp); /* * In case of rewind from close, don't wait. @@ -362,9 +362,10 @@ tscommand(struct ts_softc *sc, dev_t dev, int cmd, int count) if (count == 0) return; biowait(bp); - if (bp->b_flags & B_WANTED) - wakeup((void *)bp); - bp->b_flags = 0; + mutex_enter(&bufcache_lock); + cv_broadcast(&bp->b_busy); + bp->b_cflags = 0; + mutex_exit(&bufcache_lock); } /* diff --git a/sys/dev/raidframe/rf_diskqueue.c b/sys/dev/raidframe/rf_diskqueue.c index 3a7a7cb16f29..5f557b039e5f 100644 --- a/sys/dev/raidframe/rf_diskqueue.c +++ b/sys/dev/raidframe/rf_diskqueue.c @@ -1,4 +1,4 @@ -/* $NetBSD: rf_diskqueue.c,v 1.49 2007/03/04 06:02:37 christos Exp $ */ +/* $NetBSD: rf_diskqueue.c,v 1.50 2008/01/02 11:48:38 ad Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -66,7 +66,7 @@ ****************************************************************************/ #include -__KERNEL_RCSID(0, "$NetBSD: rf_diskqueue.c,v 1.49 2007/03/04 06:02:37 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD: rf_diskqueue.c,v 1.50 2008/01/02 11:48:38 ad Exp $"); #include @@ -449,25 +449,19 @@ rf_CreateDiskQueueData(RF_IoType_t typ, RF_SectorNum_t ssect, int waitflag) { RF_DiskQueueData_t *p; - int s; - s = splbio(); p = pool_get(&rf_pools.dqd, waitflag); - splx(s); if (p == NULL) return (NULL); memset(p, 0, sizeof(RF_DiskQueueData_t)); if (waitflag == PR_WAITOK) { - p->bp = getiobuf(); + p->bp = getiobuf(NULL, true); } else { - p->bp = getiobuf_nowait(); + p->bp = getiobuf(NULL, false); } if (p->bp == NULL) { - /* no memory for the buffer!?!? */ - s = splbio(); pool_put(&rf_pools.dqd, p); - splx(s); return (NULL); } diff --git a/sys/dev/raidframe/rf_netbsdkintf.c b/sys/dev/raidframe/rf_netbsdkintf.c index 2d3d898ec204..608c1440b212 100644 --- a/sys/dev/raidframe/rf_netbsdkintf.c +++ b/sys/dev/raidframe/rf_netbsdkintf.c @@ -1,4 +1,4 @@ -/* $NetBSD: rf_netbsdkintf.c,v 1.241 2007/12/18 01:09:46 oster Exp $ */ +/* $NetBSD: rf_netbsdkintf.c,v 1.242 2008/01/02 11:48:38 ad Exp $ */ /*- * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. * All rights reserved. @@ -146,7 +146,7 @@ ***********************************************************/ #include -__KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.241 2007/12/18 01:09:46 oster Exp $"); +__KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.242 2008/01/02 11:48:38 ad Exp $"); #include #include @@ -2208,7 +2208,9 @@ InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev, struct proc *b_proc) { /* bp->b_flags = B_PHYS | rw_flag; */ - bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */ + bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */ + bp->b_oflags = 0; + bp->b_cflags = 0; bp->b_bcount = numSect << logBytesPerSector; bp->b_bufsize = bp->b_bcount; bp->b_error = 0; @@ -2223,8 +2225,11 @@ InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev, bp->b_iodone = cbFunc; bp->b_private = cbArg; bp->b_vp = b_vp; + bp->b_objlock = &b_vp->v_interlock; if ((bp->b_flags & B_READ) == 0) { - bp->b_vp->v_numoutput++; + mutex_enter(&b_vp->v_interlock); + b_vp->v_numoutput++; + mutex_exit(&b_vp->v_interlock); } } diff --git a/sys/dev/scsipi/cd.c b/sys/dev/scsipi/cd.c index 7f0f12731dfc..ff43d77c9fb0 100644 --- a/sys/dev/scsipi/cd.c +++ b/sys/dev/scsipi/cd.c @@ -1,4 +1,4 @@ -/* $NetBSD: cd.c,v 1.271 2007/12/09 20:28:22 jmcneill Exp $ */ +/* $NetBSD: cd.c,v 1.272 2008/01/02 11:48:38 ad Exp $ */ /*- * Copyright (c) 1998, 2001, 2003, 2004, 2005 The NetBSD Foundation, Inc. @@ -57,7 +57,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: cd.c,v 1.271 2007/12/09 20:28:22 jmcneill Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cd.c,v 1.272 2008/01/02 11:48:38 ad Exp $"); #include "rnd.h" @@ -679,7 +679,7 @@ cdstrategy(struct buf *bp) } blkno = ((blkno * lp->d_secsize) / cd->params.blksize); - nbp = getiobuf_nowait(); + nbp = getiobuf(false, NULL); if (!nbp) { /* No memory -- fail the iop. */ free(bounce, M_DEVBUF); @@ -698,14 +698,12 @@ cdstrategy(struct buf *bp) /* Set up the IOP to the bounce buffer. */ nbp->b_error = 0; nbp->b_proc = bp->b_proc; - nbp->b_vp = NULLVP; - nbp->b_bcount = count; nbp->b_bufsize = count; - nbp->b_rawblkno = blkno; - - nbp->b_flags = bp->b_flags | B_READ | B_CALL; + nbp->b_flags = bp->b_flags | B_READ; + nbp->b_oflags = bp->b_oflags; + nbp->b_cflags = bp->b_cflags; nbp->b_iodone = cdbounce; /* store bounce state in b_private and use new buf */ @@ -970,7 +968,7 @@ cdbounce(struct buf *bp) count = MAXPHYS; } - nbp = getiobuf_nowait(); + nbp = getiobuf(false, NULL); if (!nbp) { /* No memory -- fail the iop. */ bp->b_error = ENOMEM; @@ -980,15 +978,13 @@ cdbounce(struct buf *bp) /* Set up the IOP to the bounce buffer. */ nbp->b_error = 0; nbp->b_proc = obp->b_proc; - nbp->b_vp = NULLVP; - nbp->b_bcount = count; nbp->b_bufsize = count; nbp->b_data = bp->b_data; - nbp->b_rawblkno = blkno; - - nbp->b_flags = obp->b_flags | B_READ | B_CALL; + nbp->b_flags = obp->b_flags | B_READ; + nbp->b_oflags = obp->b_oflags; + nbp->b_cflags = obp->b_cflags; nbp->b_iodone = cdbounce; /* store bounce state in b_private and use new buf */ diff --git a/sys/dev/scsipi/scsipi_ioctl.c b/sys/dev/scsipi/scsipi_ioctl.c index cdeccae4aab8..a10be17d2f39 100644 --- a/sys/dev/scsipi/scsipi_ioctl.c +++ b/sys/dev/scsipi/scsipi_ioctl.c @@ -1,4 +1,4 @@ -/* $NetBSD: scsipi_ioctl.c,v 1.63 2007/07/29 12:50:23 ad Exp $ */ +/* $NetBSD: scsipi_ioctl.c,v 1.64 2008/01/02 11:48:39 ad Exp $ */ /*- * Copyright (c) 1998, 2004 The NetBSD Foundation, Inc. @@ -44,7 +44,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: scsipi_ioctl.c,v 1.63 2007/07/29 12:50:23 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: scsipi_ioctl.c,v 1.64 2008/01/02 11:48:39 ad Exp $"); #include "opt_compat_freebsd.h" #include "opt_compat_netbsd.h" @@ -85,7 +85,7 @@ si_get(void) int s; si = malloc(sizeof(struct scsi_ioctl), M_TEMP, M_WAITOK|M_ZERO); - simple_lock_init(&si->si_bp.b_interlock); + buf_init(&si->si_bp); s = splbio(); LIST_INSERT_HEAD(&si_head, si, si_list); splx(s); @@ -100,6 +100,7 @@ si_free(struct scsi_ioctl *si) s = splbio(); LIST_REMOVE(si, si_list); splx(s); + buf_destroy(&si->si_bp); free(si, M_TEMP); } diff --git a/sys/dev/vme/xd.c b/sys/dev/vme/xd.c index dd5cdd962ad1..dc03257323d7 100644 --- a/sys/dev/vme/xd.c +++ b/sys/dev/vme/xd.c @@ -1,4 +1,4 @@ -/* $NetBSD: xd.c,v 1.71 2007/10/19 12:01:23 ad Exp $ */ +/* $NetBSD: xd.c,v 1.72 2008/01/02 11:48:39 ad Exp $ */ /* * @@ -51,7 +51,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: xd.c,v 1.71 2007/10/19 12:01:23 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: xd.c,v 1.72 2008/01/02 11:48:39 ad Exp $"); #undef XDC_DEBUG /* full debug */ #define XDC_DIAG /* extra sanity checks */ @@ -331,8 +331,8 @@ xddummystrat(bp) if (bp->b_bcount != XDFM_BPS) panic("xddummystrat"); bcopy(xd_labeldata, bp->b_data, XDFM_BPS); - bp->b_flags |= B_DONE; - bp->b_flags &= ~B_BUSY; + bp->b_oflags |= BO_DONE; + bp->b_cflags &= ~BC_BUSY; } int diff --git a/sys/dev/vme/xy.c b/sys/dev/vme/xy.c index 479887202daa..3087343f66ac 100644 --- a/sys/dev/vme/xy.c +++ b/sys/dev/vme/xy.c @@ -1,4 +1,4 @@ -/* $NetBSD: xy.c,v 1.74 2007/10/19 12:01:23 ad Exp $ */ +/* $NetBSD: xy.c,v 1.75 2008/01/02 11:48:39 ad Exp $ */ /* * @@ -51,7 +51,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: xy.c,v 1.74 2007/10/19 12:01:23 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: xy.c,v 1.75 2008/01/02 11:48:39 ad Exp $"); #undef XYC_DEBUG /* full debug */ #undef XYC_DIAG /* extra sanity checks */ @@ -247,8 +247,8 @@ xydummystrat(bp) if (bp->b_bcount != XYFM_BPS) panic("xydummystrat"); bcopy(xy_labeldata, bp->b_data, XYFM_BPS); - bp->b_flags |= B_DONE; - bp->b_flags &= ~B_BUSY; + bp->b_oflags |= BO_DONE; + bp->b_cflags &= ~BC_BUSY; } int diff --git a/sys/dev/vnd.c b/sys/dev/vnd.c index 4b110c45eccd..e75ba3bc607f 100644 --- a/sys/dev/vnd.c +++ b/sys/dev/vnd.c @@ -1,4 +1,4 @@ -/* $NetBSD: vnd.c,v 1.174 2007/12/18 23:22:18 riz Exp $ */ +/* $NetBSD: vnd.c,v 1.175 2008/01/02 11:48:36 ad Exp $ */ /*- * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. @@ -137,7 +137,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.174 2007/12/18 23:22:18 riz Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.175 2008/01/02 11:48:36 ad Exp $"); #if defined(_KERNEL_OPT) #include "fs_nfs.h" @@ -620,11 +620,14 @@ vndthread(void *arg) disk_busy(&vnd->sc_dkdev); bp = &vnx->vx_buf; - BUF_INIT(bp); - bp->b_flags = (obp->b_flags & B_READ) | B_CALL; + buf_init(bp); + bp->b_flags = (obp->b_flags & B_READ); + bp->b_oflags = obp->b_oflags; + bp->b_cflags = obp->b_cflags; bp->b_iodone = vndiodone; bp->b_private = obp; bp->b_vp = vnd->sc_vp; + bp->b_objlock = &bp->b_vp->v_interlock; bp->b_data = obp->b_data; bp->b_bcount = obp->b_bcount; BIO_COPYPRIO(bp, obp); @@ -708,8 +711,11 @@ handle_with_rdwr(struct vnd_softc *vnd, const struct buf *obp, struct buf *bp) /* We need to increase the number of outputs on the vnode if * there was any write to it. */ - if (!doread) - V_INCR_NUMOUTPUT(vp); + if (!doread) { + mutex_enter(&vp->v_interlock); + vp->v_numoutput++; + mutex_exit(&vp->v_interlock); + } biodone(bp); } @@ -727,15 +733,15 @@ handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp, int bsize, error, flags, skipped; size_t resid, sz; off_t bn, offset; + struct vnode *vp; flags = obp->b_flags; if (!(flags & B_READ)) { - int s; - - s = splbio(); - V_INCR_NUMOUTPUT(bp->b_vp); - splx(s); + vp = bp->b_vp; + mutex_enter(&vp->v_interlock); + vp->v_numoutput++; + mutex_exit(&vp->v_interlock); } /* convert to a byte offset within the file. */ @@ -756,7 +762,6 @@ handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp, for (offset = 0, resid = bp->b_resid; resid; resid -= sz, offset += sz) { struct buf *nbp; - struct vnode *vp; daddr_t nbn; int off, nra; @@ -792,11 +797,11 @@ handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp, #ifdef DEBUG if (vnddebug & VDB_IO) printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64 - " sz 0x%zx\n", - vnd->sc_vp, vp, (long long)bn, nbn, sz); + " sz 0x%zx\n", vnd->sc_vp, vp, (long long)bn, + nbn, sz); #endif - nbp = getiobuf(); + nbp = getiobuf(vp, true); nestiobuf_setup(bp, nbp, offset, sz); nbp->b_blkno = nbn + btodb(off); diff --git a/sys/fs/adosfs/adutil.c b/sys/fs/adosfs/adutil.c index 5e101c160010..d482c1c96424 100644 --- a/sys/fs/adosfs/adutil.c +++ b/sys/fs/adosfs/adutil.c @@ -1,4 +1,4 @@ -/* $NetBSD: adutil.c,v 1.5 2007/10/10 20:42:22 ad Exp $ */ +/* $NetBSD: adutil.c,v 1.6 2008/01/02 11:48:39 ad Exp $ */ /* * Copyright (c) 1994 Christian E. Hopps @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: adutil.c,v 1.5 2007/10/10 20:42:22 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: adutil.c,v 1.6 2008/01/02 11:48:39 ad Exp $"); #include #include @@ -69,7 +69,7 @@ start_over: for (ap = hp->lh_first; ap != NULL; ap = ap->link.le_next) { if (ap->block == an) { vp = ATOV(ap); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); simple_unlock(&adosfs_hashlock); if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) goto start_over; diff --git a/sys/fs/adosfs/advnops.c b/sys/fs/adosfs/advnops.c index 1a4fd2e58381..27898c6e94e0 100644 --- a/sys/fs/adosfs/advnops.c +++ b/sys/fs/adosfs/advnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: advnops.c,v 1.26 2007/11/26 19:01:41 pooka Exp $ */ +/* $NetBSD: advnops.c,v 1.27 2008/01/02 11:48:40 ad Exp $ */ /* * Copyright (c) 1994 Christian E. Hopps @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: advnops.c,v 1.26 2007/11/26 19:01:41 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: advnops.c,v 1.27 2008/01/02 11:48:40 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_quota.h" @@ -847,15 +847,15 @@ adosfs_inactive(v) { struct vop_inactive_args /* { struct vnode *a_vp; + bool *a_recycle; } */ *sp = v; struct vnode *vp = sp->a_vp; - struct lwp *l = curlwp; #ifdef ADOSFS_DIAGNOSTIC advopprint(sp); #endif VOP_UNLOCK(vp, 0); /* XXX this needs to check if file was deleted */ - vrecycle(vp, NULL, l); + *sp->a_recycle = true; #ifdef ADOSFS_DIAGNOSTIC printf(" 0)"); diff --git a/sys/fs/cd9660/cd9660_node.c b/sys/fs/cd9660/cd9660_node.c index ed431af4bd61..f8c40e9b5e0c 100644 --- a/sys/fs/cd9660/cd9660_node.c +++ b/sys/fs/cd9660/cd9660_node.c @@ -1,4 +1,4 @@ -/* $NetBSD: cd9660_node.c,v 1.19 2007/12/08 14:41:11 ad Exp $ */ +/* $NetBSD: cd9660_node.c,v 1.20 2008/01/02 11:48:40 ad Exp $ */ /*- * Copyright (c) 1982, 1986, 1989, 1994 @@ -37,7 +37,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: cd9660_node.c,v 1.19 2007/12/08 14:41:11 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cd9660_node.c,v 1.20 2008/01/02 11:48:40 ad Exp $"); #include #include @@ -154,7 +154,7 @@ loop: if (flags == 0) { mutex_exit(&cd9660_ihash_lock); } else { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); mutex_exit(&cd9660_ihash_lock); if (vget(vp, flags | LK_INTERLOCK)) goto loop; @@ -209,6 +209,7 @@ cd9660_inactive(v) { struct vop_inactive_args /* { struct vnode *a_vp; + bool *a_recycle; } */ *ap = v; struct vnode *vp = ap->a_vp; struct iso_node *ip = VTOI(vp); @@ -217,14 +218,13 @@ cd9660_inactive(v) if (prtactive && vp->v_usecount != 0) vprint("cd9660_inactive: pushing active", vp); - ip->i_flag = 0; - VOP_UNLOCK(vp, 0); /* * If we are done with the inode, reclaim it * so that it can be reused immediately. */ - if (ip->inode.iso_mode == 0) - vrecycle(vp, (struct simplelock *)0, curlwp); + ip->i_flag = 0; + *ap->a_recycle = (ip->inode.iso_mode == 0); + VOP_UNLOCK(vp, 0); return error; } diff --git a/sys/fs/cd9660/cd9660_vfsops.c b/sys/fs/cd9660/cd9660_vfsops.c index 604764ef0df0..4eed0d8e9171 100644 --- a/sys/fs/cd9660/cd9660_vfsops.c +++ b/sys/fs/cd9660/cd9660_vfsops.c @@ -1,4 +1,4 @@ -/* $NetBSD: cd9660_vfsops.c,v 1.52 2007/12/08 19:29:42 pooka Exp $ */ +/* $NetBSD: cd9660_vfsops.c,v 1.53 2008/01/02 11:48:40 ad Exp $ */ /*- * Copyright (c) 1994 @@ -37,7 +37,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: cd9660_vfsops.c,v 1.52 2007/12/08 19:29:42 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cd9660_vfsops.c,v 1.53 2008/01/02 11:48:40 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_compat_netbsd.h" @@ -420,6 +420,7 @@ iso_mountfs(devvp, mp, l, argp) mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; mp->mnt_stat.f_namemax = MAXNAMLEN; mp->mnt_flag |= MNT_LOCAL; + mp->mnt_iflag |= IMNT_MPSAFE; mp->mnt_dev_bshift = iso_bsize; mp->mnt_fs_bshift = isomp->im_bshift; isomp->im_mountp = mp; @@ -871,7 +872,6 @@ cd9660_vget_internal(mp, ino, vpp, relocated, isodir) vp->v_data = NULL; VOP_UNLOCK(vp, 0); vp->v_op = spec_vnodeop_p; - vrele(vp); vgone(vp); lockmgr(&nvp->v_lock, LK_EXCLUSIVE, &nvp->v_interlock); /* diff --git a/sys/fs/efs/efs_ihash.c b/sys/fs/efs/efs_ihash.c index bac0090e3a16..edd0f2de3e7a 100644 --- a/sys/fs/efs/efs_ihash.c +++ b/sys/fs/efs/efs_ihash.c @@ -1,4 +1,4 @@ -/* $NetBSD: efs_ihash.c,v 1.1 2007/06/29 23:30:28 rumble Exp $ */ +/* $NetBSD: efs_ihash.c,v 1.2 2008/01/02 11:48:40 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1991, 1993 @@ -36,7 +36,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: efs_ihash.c,v 1.1 2007/06/29 23:30:28 rumble Exp $"); +__KERNEL_RCSID(0, "$NetBSD: efs_ihash.c,v 1.2 2008/01/02 11:48:40 ad Exp $"); #include #include @@ -146,7 +146,7 @@ efs_ihashget(dev_t dev, ino_t inum, int flags) if (flags == 0) { mutex_exit(&efs_ihash_lock); } else { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); mutex_exit(&efs_ihash_lock); if (vget(vp, flags | LK_INTERLOCK)) goto loop; diff --git a/sys/fs/efs/efs_vnops.c b/sys/fs/efs/efs_vnops.c index 407375279cb6..64d5cb0f9ede 100644 --- a/sys/fs/efs/efs_vnops.c +++ b/sys/fs/efs/efs_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: efs_vnops.c,v 1.12 2007/11/26 19:01:43 pooka Exp $ */ +/* $NetBSD: efs_vnops.c,v 1.13 2008/01/02 11:48:40 ad Exp $ */ /* * Copyright (c) 2006 Stephen M. Rumble @@ -17,7 +17,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: efs_vnops.c,v 1.12 2007/11/26 19:01:43 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: efs_vnops.c,v 1.13 2008/01/02 11:48:40 ad Exp $"); #include #include @@ -559,14 +559,13 @@ efs_inactive(void *v) struct vop_inactive_args /* { const struct vnodeop_desc *a_desc; struct vnode *a_vp; + bool *a_recycle } */ *ap = v; struct efs_inode *eip = EFS_VTOI(ap->a_vp); + *ap->a_recycle = (eip->ei_mode == 0); VOP_UNLOCK(ap->a_vp, 0); - if (eip->ei_mode == 0) - vrecycle(ap->a_vp, NULL, curlwp); - return (0); } diff --git a/sys/fs/filecorefs/filecore_node.c b/sys/fs/filecorefs/filecore_node.c index 228818301796..24a52126f44a 100644 --- a/sys/fs/filecorefs/filecore_node.c +++ b/sys/fs/filecorefs/filecore_node.c @@ -1,4 +1,4 @@ -/* $NetBSD: filecore_node.c,v 1.12 2007/11/26 19:01:44 pooka Exp $ */ +/* $NetBSD: filecore_node.c,v 1.13 2008/01/02 11:48:41 ad Exp $ */ /*- * Copyright (c) 1982, 1986, 1989, 1994 @@ -67,7 +67,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: filecore_node.c,v 1.12 2007/11/26 19:01:44 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: filecore_node.c,v 1.13 2008/01/02 11:48:41 ad Exp $"); #include #include @@ -174,7 +174,7 @@ loop: LIST_FOREACH(ip, &filecorehashtbl[INOHASH(dev, inum)], i_hash) { if (inum == ip->i_number && dev == ip->i_dev) { vp = ITOV(ip); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); simple_unlock(&filecore_ihash_slock); if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) goto loop; @@ -226,7 +226,7 @@ filecore_inactive(v) { struct vop_inactive_args /* { struct vnode *a_vp; - struct lwp *a_l; + bool *a_recycle; } */ *ap = v; struct vnode *vp = ap->a_vp; struct filecore_node *ip = VTOI(vp); @@ -235,14 +235,13 @@ filecore_inactive(v) if (prtactive && vp->v_usecount != 0) vprint("filecore_inactive: pushing active", vp); - ip->i_flag = 0; - VOP_UNLOCK(vp, 0); /* * If we are done with the inode, reclaim it * so that it can be reused immediately. */ - if (filecore_staleinode(ip)) - vrecycle(vp, (struct simplelock *)0, curlwp); + ip->i_flag = 0; + *ap->a_recycle = (filecore_staleinode(ip) != 0); + VOP_UNLOCK(vp, 0); return error; } diff --git a/sys/fs/hfs/hfs_nhash.c b/sys/fs/hfs/hfs_nhash.c index 8ebf878852ff..4ff1e52cd61d 100644 --- a/sys/fs/hfs/hfs_nhash.c +++ b/sys/fs/hfs/hfs_nhash.c @@ -1,4 +1,4 @@ -/* $NetBSD: hfs_nhash.c,v 1.3 2007/12/11 12:04:23 lukem Exp $ */ +/* $NetBSD: hfs_nhash.c,v 1.4 2008/01/02 11:48:41 ad Exp $ */ /*- * Copyright (c) 2005, 2007 The NetBSD Foundation, Inc. @@ -59,7 +59,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: hfs_nhash.c,v 1.3 2007/12/11 12:04:23 lukem Exp $"); +__KERNEL_RCSID(0, "$NetBSD: hfs_nhash.c,v 1.4 2008/01/02 11:48:41 ad Exp $"); #include #include @@ -127,7 +127,7 @@ loop: LIST_FOREACH(hp, hpp, h_hash) { if (cnid == hp->h_rec.cnid && dev == hp->h_dev) { vp = HTOV(hp); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); simple_unlock(&hfs_nhash_slock); if (vget(vp, flags | LK_INTERLOCK)) goto loop; diff --git a/sys/fs/hfs/hfs_subr.c b/sys/fs/hfs/hfs_subr.c index 2929b31854d7..5d11b289467c 100644 --- a/sys/fs/hfs/hfs_subr.c +++ b/sys/fs/hfs/hfs_subr.c @@ -1,4 +1,4 @@ -/* $NetBSD: hfs_subr.c,v 1.6 2007/11/26 19:01:45 pooka Exp $ */ +/* $NetBSD: hfs_subr.c,v 1.7 2008/01/02 11:48:41 ad Exp $ */ /*- * Copyright (c) 2005, 2007 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: hfs_subr.c,v 1.6 2007/11/26 19:01:45 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: hfs_subr.c,v 1.7 2008/01/02 11:48:41 ad Exp $"); #include #include @@ -83,7 +83,6 @@ hfs_vinit(struct mount *mp, int (**specops)(void *), int (**fifoops)(void *), vp->v_vflag &= ~VV_LOCKSWORK; VOP_UNLOCK(vp, 0); vp->v_op = specops; - vrele(vp); vgone(vp); lockmgr(&nvp->v_lock, LK_EXCLUSIVE, &nvp->v_interlock); diff --git a/sys/fs/msdosfs/msdosfs_denode.c b/sys/fs/msdosfs/msdosfs_denode.c index bcaea49a9988..46b52412f54f 100644 --- a/sys/fs/msdosfs/msdosfs_denode.c +++ b/sys/fs/msdosfs/msdosfs_denode.c @@ -1,4 +1,4 @@ -/* $NetBSD: msdosfs_denode.c,v 1.29 2007/12/28 17:46:48 reinoud Exp $ */ +/* $NetBSD: msdosfs_denode.c,v 1.30 2008/01/02 11:48:41 ad Exp $ */ /*- * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. @@ -48,7 +48,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: msdosfs_denode.c,v 1.29 2007/12/28 17:46:48 reinoud Exp $"); +__KERNEL_RCSID(0, "$NetBSD: msdosfs_denode.c,v 1.30 2008/01/02 11:48:41 ad Exp $"); #include #include @@ -176,7 +176,7 @@ loop: if (flags == 0) { mutex_exit(&msdosfs_ihash_lock); } else { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); mutex_exit(&msdosfs_ihash_lock); if (vget(vp, flags | LK_INTERLOCK)) goto loop; @@ -668,6 +668,7 @@ msdosfs_inactive(v) { struct vop_inactive_args /* { struct vnode *a_vp; + bool *a_recycle; } */ *ap = v; struct vnode *vp = ap->a_vp; struct denode *dep = VTODE(vp); @@ -704,7 +705,6 @@ msdosfs_inactive(v) } deupdat(dep, 0); out: - VOP_UNLOCK(vp, 0); /* * If we are done with the denode, reclaim it * so that it can be reused immediately. @@ -713,8 +713,8 @@ out: printf("msdosfs_inactive(): v_usecount %d, de_Name[0] %x\n", vp->v_usecount, dep->de_Name[0]); #endif - if (dep->de_Name[0] == SLOT_DELETED) - vrecycle(vp, (struct simplelock *)0, curlwp); + *ap->a_recycle = (dep->de_Name[0] == SLOT_DELETED); + VOP_UNLOCK(vp, 0); return (error); } diff --git a/sys/fs/msdosfs/msdosfs_vfsops.c b/sys/fs/msdosfs/msdosfs_vfsops.c index b428b3e33a3f..7e162241d9fa 100644 --- a/sys/fs/msdosfs/msdosfs_vfsops.c +++ b/sys/fs/msdosfs/msdosfs_vfsops.c @@ -1,4 +1,4 @@ -/* $NetBSD: msdosfs_vfsops.c,v 1.55 2007/12/08 19:29:43 pooka Exp $ */ +/* $NetBSD: msdosfs_vfsops.c,v 1.56 2008/01/02 11:48:41 ad Exp $ */ /*- * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. @@ -48,7 +48,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: msdosfs_vfsops.c,v 1.55 2007/12/08 19:29:43 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: msdosfs_vfsops.c,v 1.56 2008/01/02 11:48:41 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_quota.h" @@ -937,7 +937,7 @@ msdosfs_sync(mp, waitfor, cred) int waitfor; kauth_cred_t cred; { - struct vnode *vp, *nvp; + struct vnode *vp, *mvp; struct denode *dep; struct msdosfsmount *pmp = VFSTOMSDOSFS(mp); int error, allerror = 0; @@ -953,44 +953,47 @@ msdosfs_sync(mp, waitfor, cred) /* update fats here */ } } + /* Allocate a marker vnode. */ + if ((mvp = valloc(mp)) == NULL) + return ENOMEM; /* * Write back each (modified) denode. */ - simple_lock(&mntvnode_slock); + mutex_enter(&mntvnode_lock); loop: - for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { - /* - * If the vnode that we are about to sync is no longer - * associated with this mount point, start over. - */ - if (vp->v_mount != mp) - goto loop; - simple_lock(&vp->v_interlock); - nvp = TAILQ_NEXT(vp, v_mntvnodes); + for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) { + vmark(mvp, vp); + if (vp->v_mount != mp || vismarker(vp)) + continue; + mutex_enter(&vp->v_interlock); dep = VTODE(vp); if (waitfor == MNT_LAZY || vp->v_type == VNON || (((dep->de_flag & (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0) && (LIST_EMPTY(&vp->v_dirtyblkhd) && UVM_OBJ_IS_CLEAN(&vp->v_uobj)))) { - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); continue; } - simple_unlock(&mntvnode_slock); + mutex_exit(&mntvnode_lock); error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK); if (error) { - simple_lock(&mntvnode_slock); - if (error == ENOENT) + mutex_enter(&mntvnode_lock); + if (error == ENOENT) { + (void)vunmark(mvp); goto loop; + } continue; } if ((error = VOP_FSYNC(vp, cred, waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0)) != 0) allerror = error; vput(vp); - simple_lock(&mntvnode_slock); + mutex_enter(&mntvnode_lock); } - simple_unlock(&mntvnode_slock); + mutex_exit(&mntvnode_lock); + vfree(mvp); + /* * Force stale file system control information to be flushed. */ diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c index 7e44440c72b1..0e473287384a 100644 --- a/sys/fs/msdosfs/msdosfs_vnops.c +++ b/sys/fs/msdosfs/msdosfs_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: msdosfs_vnops.c,v 1.45 2007/12/28 17:46:48 reinoud Exp $ */ +/* $NetBSD: msdosfs_vnops.c,v 1.46 2008/01/02 11:48:42 ad Exp $ */ /*- * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. @@ -48,7 +48,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: msdosfs_vnops.c,v 1.45 2007/12/28 17:46:48 reinoud Exp $"); +__KERNEL_RCSID(0, "$NetBSD: msdosfs_vnops.c,v 1.46 2008/01/02 11:48:42 ad Exp $"); #include #include @@ -213,10 +213,10 @@ msdosfs_close(v) struct vnode *vp = ap->a_vp; struct denode *dep = VTODE(vp); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); if (vp->v_usecount > 1) DETIMES(dep, NULL, NULL, NULL, dep->de_pmp->pm_gmtoff); - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); return (0); } @@ -659,7 +659,7 @@ msdosfs_write(v) */ if (!async && oldoff >> 16 != uio->uio_offset >> 16) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); error = VOP_PUTPAGES(vp, (oldoff >> 16) << 16, (uio->uio_offset >> 16) << 16, PGO_CLEANIT); } @@ -668,7 +668,7 @@ msdosfs_write(v) /* set final size */ uvm_vnp_setsize(vp, dep->de_FileSize); if (error == 0 && ioflag & IO_SYNC) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); error = VOP_PUTPAGES(vp, trunc_page(oldoff), round_page(oldoff + bytelen), PGO_CLEANIT | PGO_SYNCIO); } diff --git a/sys/fs/ntfs/ntfs_inode.h b/sys/fs/ntfs/ntfs_inode.h index 2deafe3b03be..6650354fc37a 100644 --- a/sys/fs/ntfs/ntfs_inode.h +++ b/sys/fs/ntfs/ntfs_inode.h @@ -1,4 +1,4 @@ -/* $NetBSD: ntfs_inode.h,v 1.4 2007/03/04 06:03:00 christos Exp $ */ +/* $NetBSD: ntfs_inode.h,v 1.5 2008/01/02 11:48:42 ad Exp $ */ /*- * Copyright (c) 1998, 1999 Semen Ustimenko @@ -73,7 +73,7 @@ struct ntnode { /* locking */ struct lock i_lock; - struct simplelock i_interlock; + kmutex_t i_interlock; int i_usecount; LIST_HEAD(,fnode) i_fnlist; diff --git a/sys/fs/ntfs/ntfs_subr.c b/sys/fs/ntfs/ntfs_subr.c index 7a0927c3d674..b0e2c31a18d6 100644 --- a/sys/fs/ntfs/ntfs_subr.c +++ b/sys/fs/ntfs/ntfs_subr.c @@ -1,4 +1,4 @@ -/* $NetBSD: ntfs_subr.c,v 1.33 2007/10/10 20:42:24 ad Exp $ */ +/* $NetBSD: ntfs_subr.c,v 1.34 2008/01/02 11:48:42 ad Exp $ */ /*- * Copyright (c) 1998, 1999 Semen Ustimenko (semenu@FreeBSD.org) @@ -29,7 +29,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ntfs_subr.c,v 1.33 2007/10/10 20:42:24 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ntfs_subr.c,v 1.34 2008/01/02 11:48:42 ad Exp $"); #include #include @@ -385,7 +385,7 @@ ntfs_ntget(ip) dprintf(("ntfs_ntget: get ntnode %llu: %p, usecount: %d\n", (unsigned long long)ip->i_number, ip, ip->i_usecount)); - simple_lock(&ip->i_interlock); + mutex_enter(&ip->i_interlock); ip->i_usecount++; lockmgr(&ip->i_lock, LK_EXCLUSIVE | LK_INTERLOCK, &ip->i_interlock); @@ -445,7 +445,7 @@ ntfs_ntlookup( /* init lock and lock the newborn ntnode */ lockinit(&ip->i_lock, PINOD, "ntnode", 0, LK_EXCLUSIVE); - simple_lock_init(&ip->i_interlock); + mutex_init(&ip->i_interlock, MUTEX_DEFAULT, IPL_NONE); ntfs_ntget(ip); ntfs_nthashins(ip); @@ -475,7 +475,7 @@ ntfs_ntput(ip) dprintf(("ntfs_ntput: rele ntnode %llu: %p, usecount: %d\n", (unsigned long long)ip->i_number, ip, ip->i_usecount)); - simple_lock(&ip->i_interlock); + mutex_enter(&ip->i_interlock); ip->i_usecount--; #ifdef DIAGNOSTIC @@ -501,6 +501,8 @@ ntfs_ntput(ip) LIST_REMOVE(vap,va_list); ntfs_freentvattr(vap); } + mutex_destroy(&ip->i_interlock); + lockdestroy(&ip->i_lock); FREE(ip, M_NTFSNTNODE); } } @@ -512,9 +514,9 @@ void ntfs_ntref(ip) struct ntnode *ip; { - simple_lock(&ip->i_interlock); + mutex_enter(&ip->i_interlock); ip->i_usecount++; - simple_unlock(&ip->i_interlock); + mutex_exit(&ip->i_interlock); dprintf(("ntfs_ntref: ino %llu, usecount: %d\n", (unsigned long long)ip->i_number, ip->i_usecount)); @@ -531,13 +533,13 @@ ntfs_ntrele(ip) dprintf(("ntfs_ntrele: rele ntnode %llu: %p, usecount: %d\n", (unsigned long long)ip->i_number, ip, ip->i_usecount)); - simple_lock(&ip->i_interlock); + mutex_enter(&ip->i_interlock); ip->i_usecount--; if (ip->i_usecount < 0) panic("ntfs_ntrele: ino: %llu usecount: %d ", (unsigned long long)ip->i_number, ip->i_usecount); - simple_unlock(&ip->i_interlock); + mutex_exit(&ip->i_interlock); } /* diff --git a/sys/fs/ptyfs/ptyfs_subr.c b/sys/fs/ptyfs/ptyfs_subr.c index ecf862610c00..ace55860ec6e 100644 --- a/sys/fs/ptyfs/ptyfs_subr.c +++ b/sys/fs/ptyfs/ptyfs_subr.c @@ -1,4 +1,4 @@ -/* $NetBSD: ptyfs_subr.c,v 1.11 2007/12/08 19:29:44 pooka Exp $ */ +/* $NetBSD: ptyfs_subr.c,v 1.12 2008/01/02 11:48:43 ad Exp $ */ /* * Copyright (c) 1993 @@ -73,7 +73,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ptyfs_subr.c,v 1.11 2007/12/08 19:29:44 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ptyfs_subr.c,v 1.12 2008/01/02 11:48:43 ad Exp $"); #include #include @@ -239,7 +239,6 @@ ptyfs_allocvp(struct mount *mp, struct vnode **vpp, ptyfstype type, int pty, vp->v_vflag &= ~VV_LOCKSWORK; VOP_UNLOCK(vp, 0); vp->v_op = spec_vnodeop_p; - vrele(vp); vgone(vp); lockmgr(&nvp->v_lock, LK_EXCLUSIVE, &nvp->v_interlock); /* @@ -374,7 +373,7 @@ loop: vp = PTYFSTOV(pp); if (pty == pp->ptyfs_pty && pp->ptyfs_type == type && vp->v_mount == mp) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); mutex_exit(&ptyfs_used_slock); if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) goto loop; diff --git a/sys/fs/ptyfs/ptyfs_vnops.c b/sys/fs/ptyfs/ptyfs_vnops.c index 8d8f5a0ecb23..d700fcf3f3e0 100644 --- a/sys/fs/ptyfs/ptyfs_vnops.c +++ b/sys/fs/ptyfs/ptyfs_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: ptyfs_vnops.c,v 1.26 2007/11/26 19:01:49 pooka Exp $ */ +/* $NetBSD: ptyfs_vnops.c,v 1.27 2008/01/02 11:48:43 ad Exp $ */ /* * Copyright (c) 1993, 1995 @@ -76,7 +76,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ptyfs_vnops.c,v 1.26 2007/11/26 19:01:49 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ptyfs_vnops.c,v 1.27 2008/01/02 11:48:43 ad Exp $"); #include #include @@ -745,10 +745,10 @@ ptyfs_close(void *v) struct vnode *vp = ap->a_vp; struct ptyfsnode *ptyfs = VTOPTYFS(vp); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); if (vp->v_usecount > 1) PTYFS_ITIMES(ptyfs, NULL, NULL, NULL); - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); switch (ptyfs->ptyfs_type) { case PTYFSpts: diff --git a/sys/fs/puffs/puffs_msgif.c b/sys/fs/puffs/puffs_msgif.c index 6035ad78e5b9..b18eea97c17c 100644 --- a/sys/fs/puffs/puffs_msgif.c +++ b/sys/fs/puffs/puffs_msgif.c @@ -1,4 +1,4 @@ -/* $NetBSD: puffs_msgif.c,v 1.61 2007/12/05 12:11:56 pooka Exp $ */ +/* $NetBSD: puffs_msgif.c,v 1.62 2008/01/02 11:48:43 ad Exp $ */ /* * Copyright (c) 2005, 2006, 2007 Antti Kantee. All Rights Reserved. @@ -30,7 +30,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: puffs_msgif.c,v 1.61 2007/12/05 12:11:56 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: puffs_msgif.c,v 1.62 2008/01/02 11:48:43 ad Exp $"); #include #include @@ -927,7 +927,7 @@ puffsop_flush(struct puffs_mount *pmp, struct puffs_flush *pf) break; } - simple_lock(&vp->v_uobj.vmobjlock); + mutex_enter(&vp->v_uobj.vmobjlock); rv = VOP_PUTPAGES(vp, offlo, offhi, flags); break; @@ -1032,18 +1032,18 @@ puffs_msgif_close(void *this) * wait for syncer_mutex. Otherwise the mointpoint can be * wiped out while we wait. */ - simple_lock(&mp->mnt_slock); + mutex_enter(&mp->mnt_mutex); mp->mnt_wcnt++; - simple_unlock(&mp->mnt_slock); + mutex_exit(&mp->mnt_mutex); mutex_enter(&syncer_mutex); - simple_lock(&mp->mnt_slock); + mutex_enter(&mp->mnt_mutex); mp->mnt_wcnt--; if (mp->mnt_wcnt == 0) wakeup(&mp->mnt_wcnt); gone = mp->mnt_iflag & IMNT_GONE; - simple_unlock(&mp->mnt_slock); + mutex_exit(&mp->mnt_mutex); if (gone) { mutex_exit(&syncer_mutex); return 0; diff --git a/sys/fs/puffs/puffs_node.c b/sys/fs/puffs/puffs_node.c index f338f36599af..bf71c3a3f0ed 100644 --- a/sys/fs/puffs/puffs_node.c +++ b/sys/fs/puffs/puffs_node.c @@ -1,4 +1,4 @@ -/* $NetBSD: puffs_node.c,v 1.8 2007/11/17 21:55:29 pooka Exp $ */ +/* $NetBSD: puffs_node.c,v 1.9 2008/01/02 11:48:43 ad Exp $ */ /* * Copyright (c) 2005, 2006, 2007 Antti Kantee. All Rights Reserved. @@ -30,7 +30,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: puffs_node.c,v 1.8 2007/11/17 21:55:29 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: puffs_node.c,v 1.9 2008/01/02 11:48:43 ad Exp $"); #include #include @@ -136,10 +136,10 @@ puffs_getvnode(struct mount *mp, void *cookie, enum vtype type, */ /* So mp is not dead yet.. good.. inform new vnode of its master */ - simple_lock(&mntvnode_slock); + mutex_enter(&mntvnode_lock); TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes); - simple_unlock(&mntvnode_slock); vp->v_mount = mp; + mutex_exit(&mntvnode_lock); /* * clerical tasks & footwork @@ -167,7 +167,6 @@ puffs_getvnode(struct mount *mp, void *cookie, enum vtype type, */ vp->v_op = spec_vnodeop_p; vp->v_vflag &= ~VV_LOCKSWORK; - vrele(vp); vgone(vp); /* cya */ /* init "new" vnode */ @@ -374,7 +373,7 @@ puffs_makeroot(struct puffs_mount *pmp) mutex_enter(&pmp->pmp_lock); vp = pmp->pmp_root; if (vp) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); mutex_exit(&pmp->pmp_lock); if (vget(vp, LK_INTERLOCK) == 0) return 0; @@ -452,7 +451,7 @@ puffs_cookie2vnode(struct puffs_mount *pmp, void *cookie, int lock, return PUFFS_NOSUCHCOOKIE; } vp = pnode->pn_vp; - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); mutex_exit(&pmp->pmp_lock); vgetflags = LK_INTERLOCK; diff --git a/sys/fs/puffs/puffs_sys.h b/sys/fs/puffs/puffs_sys.h index b8df5c12f7a7..4f0abeb5eac8 100644 --- a/sys/fs/puffs/puffs_sys.h +++ b/sys/fs/puffs/puffs_sys.h @@ -1,4 +1,4 @@ -/* $NetBSD: puffs_sys.h,v 1.67 2007/12/08 19:57:04 pooka Exp $ */ +/* $NetBSD: puffs_sys.h,v 1.68 2008/01/02 11:48:43 ad Exp $ */ /* * Copyright (c) 2005, 2006 Antti Kantee. All Rights Reserved. @@ -152,7 +152,6 @@ struct puffs_mount { #define PNODE_NOREFS 0x01 /* no backend reference */ -#define PNODE_DYING 0x02 /* NOREF + inactive */ #define PNODE_SUSPEND 0x04 /* issue all operations as FAF */ #define PNODE_DOINACT 0x08 /* if inactive-on-demand, call inactive */ diff --git a/sys/fs/puffs/puffs_vfsops.c b/sys/fs/puffs/puffs_vfsops.c index c5537330eb8f..1ecaf16a00d5 100644 --- a/sys/fs/puffs/puffs_vfsops.c +++ b/sys/fs/puffs/puffs_vfsops.c @@ -1,4 +1,4 @@ -/* $NetBSD: puffs_vfsops.c,v 1.73 2007/12/30 23:04:12 pooka Exp $ */ +/* $NetBSD: puffs_vfsops.c,v 1.74 2008/01/02 11:48:44 ad Exp $ */ /* * Copyright (c) 2005, 2006 Antti Kantee. All Rights Reserved. @@ -30,7 +30,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: puffs_vfsops.c,v 1.73 2007/12/30 23:04:12 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: puffs_vfsops.c,v 1.74 2008/01/02 11:48:44 ad Exp $"); #include #include @@ -428,7 +428,7 @@ static int pageflush(struct mount *mp, kauth_cred_t cred, int waitfor, int suspending) { struct puffs_node *pn; - struct vnode *vp, *nvp; + struct vnode *vp, *mvp; int error, rv; KASSERT(((waitfor == MNT_WAIT) && suspending) == 0); @@ -438,29 +438,31 @@ pageflush(struct mount *mp, kauth_cred_t cred, int waitfor, int suspending) error = 0; + /* Allocate a marker vnode. */ + if ((mvp = valloc(mp)) == NULL) + return ENOMEM; + /* * Sync all cached data from regular vnodes (which are not * currently locked, see below). After this we call VFS_SYNC * for the fs server, which should handle data and metadata for * all the nodes it knows to exist. */ - simple_lock(&mntvnode_slock); + mutex_enter(&mntvnode_lock); loop: - for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { - /* check if we're on the right list */ - if (vp->v_mount != mp) - goto loop; + for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) { + vmark(mvp, vp); + if (vp->v_mount != mp || vismarker(vp)) + continue; - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); pn = VPTOPP(vp); - nvp = TAILQ_NEXT(vp, v_mntvnodes); - if (vp->v_type != VREG || UVM_OBJ_IS_CLEAN(&vp->v_uobj)) { - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); continue; } - simple_unlock(&mntvnode_slock); + mutex_exit(&mntvnode_lock); /* * Here we try to get a reference to the vnode and to @@ -482,9 +484,11 @@ pageflush(struct mount *mp, kauth_cred_t cred, int waitfor, int suspending) */ rv = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK); if (rv) { - simple_lock(&mntvnode_slock); - if (rv == ENOENT) + mutex_enter(&mntvnode_lock); + if (rv == ENOENT) { + (void)vunmark(mvp); goto loop; + } continue; } @@ -510,22 +514,23 @@ pageflush(struct mount *mp, kauth_cred_t cred, int waitfor, int suspending) * TODO: Maybe also hint the user server of this twist? */ if (suspending || waitfor == MNT_LAZY) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); pn->pn_stat |= PNODE_SUSPEND; - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); } rv = VOP_FSYNC(vp, cred, waitfor, 0, 0); if (suspending || waitfor == MNT_LAZY) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); pn->pn_stat &= ~PNODE_SUSPEND; - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); } if (rv) error = rv; vput(vp); - simple_lock(&mntvnode_slock); + mutex_enter(&mntvnode_lock); } - simple_unlock(&mntvnode_slock); + mutex_exit(&mntvnode_lock); + vfree(mvp); return error; } diff --git a/sys/fs/puffs/puffs_vnops.c b/sys/fs/puffs/puffs_vnops.c index f6374bc17968..98142db08b01 100644 --- a/sys/fs/puffs/puffs_vnops.c +++ b/sys/fs/puffs/puffs_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: puffs_vnops.c,v 1.123 2007/12/30 23:04:12 pooka Exp $ */ +/* $NetBSD: puffs_vnops.c,v 1.124 2008/01/02 11:48:44 ad Exp $ */ /* * Copyright (c) 2005, 2006, 2007 Antti Kantee. All Rights Reserved. @@ -30,7 +30,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: puffs_vnops.c,v 1.123 2007/12/30 23:04:12 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: puffs_vnops.c,v 1.124 2008/01/02 11:48:44 ad Exp $"); #include #include @@ -1028,16 +1028,13 @@ puffs_vnop_inactive(void *v) } pnode->pn_stat &= ~PNODE_DOINACT; - VOP_UNLOCK(vp, 0); - /* * file server thinks it's gone? then don't be afraid care, * node's life was already all it would ever be */ - if (pnode->pn_stat & PNODE_NOREFS) { - pnode->pn_stat |= PNODE_DYING; - vrecycle(vp, NULL, curlwp); - } + *ap->a_recycle = ((pnode->pn_stat & PNODE_NOREFS) != 0); + + VOP_UNLOCK(vp, 0); return 0; } @@ -1291,8 +1288,7 @@ puffs_vnop_fsync(void *v) pn = VPTOPP(vp); /* flush out information from our metacache, see vop_setattr */ - if (pn->pn_stat & PNODE_METACACHE_MASK - && (pn->pn_stat & PNODE_DYING) == 0) { + if (pn->pn_stat & PNODE_METACACHE_MASK) { vattr_null(&va); error = VOP_SETATTR(vp, &va, FSCRED); if (error) @@ -1305,7 +1301,7 @@ puffs_vnop_fsync(void *v) pflags = PGO_CLEANIT; if (ap->a_flags & FSYNC_WAIT) pflags |= PGO_SYNCIO; - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo), round_page(ap->a_offhi), pflags); if (error) @@ -1317,7 +1313,7 @@ puffs_vnop_fsync(void *v) * has references neither in the kernel or the fs server. * Otherwise we continue to issue fsync() forward. */ - if (!EXISTSOP(pmp, FSYNC) || (pn->pn_stat & PNODE_DYING)) + if (!EXISTSOP(pmp, FSYNC)) return 0; dofaf = (ap->a_flags & FSYNC_WAIT) == 0 || ap->a_flags == FSYNC_LAZY; @@ -1329,10 +1325,10 @@ puffs_vnop_fsync(void *v) * vnode to be reclaimed from the freelist for this fs. */ if (dofaf == 0) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); if (vp->v_iflag & VI_XLOCK) dofaf = 1; - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); } PUFFS_MSG_ALLOC(vn, fsync); @@ -1948,7 +1944,7 @@ puffs_vnop_write(void *v) * that gives userland too much say in the kernel. */ if (oldoff >> 16 != uio->uio_offset >> 16) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); error = VOP_PUTPAGES(vp, oldoff & ~0xffff, uio->uio_offset & ~0xffff, PGO_CLEANIT | PGO_SYNCIO); @@ -1959,14 +1955,14 @@ puffs_vnop_write(void *v) /* synchronous I/O? */ if (error == 0 && ap->a_ioflag & IO_SYNC) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); error = VOP_PUTPAGES(vp, trunc_page(origoff), round_page(uio->uio_offset), PGO_CLEANIT | PGO_SYNCIO); /* write through page cache? */ } else if (error == 0 && pmp->pmp_flags & PUFFS_KFLAG_WTCACHE) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); error = VOP_PUTPAGES(vp, trunc_page(origoff), round_page(uio->uio_offset), PGO_CLEANIT); } @@ -2147,16 +2143,6 @@ puffs_vnop_strategy(void *v) || (BUF_ISWRITE(bp) && !EXISTSOP(pmp, WRITE))) ERROUT(EOPNOTSUPP); - /* - * Short-circuit optimization: don't flush buffer in between - * VOP_INACTIVE and VOP_RECLAIM in case the node has no references. - */ - if (pn->pn_stat & PNODE_DYING) { - KASSERT(BUF_ISWRITE(bp)); - bp->b_resid = 0; - goto out; - } - #ifdef DIAGNOSTIC if (bp->b_bcount > pmp->pmp_msg_maxsize - PUFFS_MSGSTRUCT_MAX) panic("puffs_strategy: wildly inappropriate buf bcount %d", @@ -2170,12 +2156,12 @@ puffs_vnop_strategy(void *v) * See puffs_vfsops.c:pageflush() */ if (BUF_ISWRITE(bp)) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); if (vp->v_iflag & VI_XLOCK) dofaf = 1; if (pn->pn_stat & PNODE_SUSPEND) dofaf = 1; - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); } #ifdef DIAGNOSTIC @@ -2239,10 +2225,10 @@ puffs_vnop_strategy(void *v) DPRINTF(("puffs_strategy: write-protecting " "vp %p page %p, offset %" PRId64"\n", vp, vmp, vmp->offset)); - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); vmp->flags |= PG_RDONLY; pmap_page_protect(vmp, VM_PROT_READ); - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); } } @@ -2434,13 +2420,13 @@ puffs_vnop_getpages(void *v) if (locked) ERROUT(EBUSY); - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); vattr_null(&va); va.va_size = vp->v_size; error = dosetattr(vp, &va, FSCRED, 0); if (error) ERROUT(error); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); } if (write && PUFFS_WCACHEINFO(pmp)) { @@ -2481,7 +2467,7 @@ puffs_vnop_getpages(void *v) * when the page is actually write-faulted to. */ if (!locked) - simple_lock(&vp->v_uobj.vmobjlock); + mutex_enter(&vp->v_uobj.vmobjlock); for (i = 0, si = 0, streakon = 0; i < npages; i++) { if (pgs[i] == NULL || pgs[i] == PGO_DONTCARE) { if (streakon && write) { @@ -2507,7 +2493,7 @@ puffs_vnop_getpages(void *v) si++; } if (!locked) - simple_unlock(&vp->v_uobj.vmobjlock); + mutex_exit(&vp->v_uobj.vmobjlock); KASSERT(si <= (npages / 2) + 1); @@ -2559,7 +2545,7 @@ puffs_vnop_lock(void *v) */ if (fstrans_is_owner(mp) && fstrans_getstate(mp) == FSTRANS_SUSPENDING){ if (ap->a_flags & LK_INTERLOCK) - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); return 0; } @@ -2583,7 +2569,7 @@ puffs_vnop_unlock(void *v) /* XXX: see puffs_lock() */ if (fstrans_is_owner(mp) && fstrans_getstate(mp) == FSTRANS_SUSPENDING){ if (ap->a_flags & LK_INTERLOCK) - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); return 0; } diff --git a/sys/fs/smbfs/smbfs_io.c b/sys/fs/smbfs/smbfs_io.c index 378cf3416edc..c33b1e4d75ea 100644 --- a/sys/fs/smbfs/smbfs_io.c +++ b/sys/fs/smbfs/smbfs_io.c @@ -1,4 +1,4 @@ -/* $NetBSD: smbfs_io.c,v 1.28 2007/11/26 19:01:52 pooka Exp $ */ +/* $NetBSD: smbfs_io.c,v 1.29 2008/01/02 11:48:44 ad Exp $ */ /* * Copyright (c) 2000-2001, Boris Popov @@ -36,7 +36,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: smbfs_io.c,v 1.28 2007/11/26 19:01:52 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: smbfs_io.c,v 1.29 2008/01/02 11:48:44 ad Exp $"); #include #include @@ -354,9 +354,9 @@ smbfs_doio(struct buf *bp, kauth_cred_t cr, struct lwp *l) uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT; io.iov_base = bp->b_data; uiop->uio_rw = UIO_WRITE; - bp->b_flags |= B_BUSY; + bp->b_cflags |= BC_BUSY; error = smb_write(smp->sm_share, np->n_fid, uiop, &scred); - bp->b_flags &= ~B_BUSY; + bp->b_cflags &= ~BC_BUSY; #ifndef __NetBSD__ /* XXX */ diff --git a/sys/fs/smbfs/smbfs_node.c b/sys/fs/smbfs/smbfs_node.c index 97ca97451616..c282cec96130 100644 --- a/sys/fs/smbfs/smbfs_node.c +++ b/sys/fs/smbfs/smbfs_node.c @@ -1,4 +1,4 @@ -/* $NetBSD: smbfs_node.c,v 1.34 2007/11/30 11:23:10 pooka Exp $ */ +/* $NetBSD: smbfs_node.c,v 1.35 2008/01/02 11:48:45 ad Exp $ */ /* * Copyright (c) 2000-2001 Boris Popov @@ -35,7 +35,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: smbfs_node.c,v 1.34 2007/11/30 11:23:10 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: smbfs_node.c,v 1.35 2008/01/02 11:48:45 ad Exp $"); #include #include @@ -135,7 +135,7 @@ loop: || memcmp(name, np->n_name, nmlen) != 0) continue; vp = SMBTOV(np); - simple_lock(&(vp)->v_interlock); + mutex_enter(&(vp)->v_interlock); smbfs_hash_unlock(smp); if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK) != 0) goto retry; diff --git a/sys/fs/smbfs/smbfs_vfsops.c b/sys/fs/smbfs/smbfs_vfsops.c index 03ca4e594ead..5bf566b1f63e 100644 --- a/sys/fs/smbfs/smbfs_vfsops.c +++ b/sys/fs/smbfs/smbfs_vfsops.c @@ -1,4 +1,4 @@ -/* $NetBSD: smbfs_vfsops.c,v 1.73 2007/11/26 19:01:52 pooka Exp $ */ +/* $NetBSD: smbfs_vfsops.c,v 1.74 2008/01/02 11:48:45 ad Exp $ */ /* * Copyright (c) 2000-2001, Boris Popov @@ -35,7 +35,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: smbfs_vfsops.c,v 1.73 2007/11/26 19:01:52 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: smbfs_vfsops.c,v 1.74 2008/01/02 11:48:45 ad Exp $"); #ifdef _KERNEL_OPT #include "opt_quota.h" @@ -394,46 +394,46 @@ smbfs_statvfs(struct mount *mp, struct statvfs *sbp) int smbfs_sync(struct mount *mp, int waitfor, kauth_cred_t cred) { - struct vnode *vp, *nvp; + struct vnode *vp, *mvp; struct smbnode *np; int error, allerror = 0; + + /* Allocate a marker vnode. */ + if ((mvp = valloc(mp)) == NULL) + return ENOMEM; /* * Force stale buffer cache information to be flushed. */ - simple_lock(&mntvnode_slock); + mutex_enter(&mntvnode_lock); loop: - /* - * NOTE: not using the TAILQ_FOREACH here since in this loop vgone() - * and vclean() can be called indirectly - */ - for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { + for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) { + vmark(mvp, vp); /* * If the vnode that we are about to sync is no longer * associated with this mount point, start over. */ - if (vp->v_mount != mp) - goto loop; - simple_lock(&vp->v_interlock); - nvp = TAILQ_NEXT(vp, v_mntvnodes); - + if (vp->v_mount != mp || vismarker(vp)) + continue; + mutex_enter(&vp->v_interlock); np = VTOSMB(vp); if (np == NULL) { - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); continue; } - if ((vp->v_type == VNON || (np->n_flag & NMODIFIED) == 0) && LIST_EMPTY(&vp->v_dirtyblkhd) && - UVM_OBJ_IS_CLEAN(&vp->v_uobj)) { - simple_unlock(&vp->v_interlock); + vp->v_uobj.uo_npages == 0) { + mutex_exit(&vp->v_interlock); continue; } - simple_unlock(&mntvnode_slock); + mutex_exit(&mntvnode_lock); error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK); if (error) { - simple_lock(&mntvnode_slock); - if (error == ENOENT) + mutex_enter(&mntvnode_lock); + if (error == ENOENT) { + (void)vunmark(mvp); goto loop; + } continue; } error = VOP_FSYNC(vp, cred, @@ -441,9 +441,10 @@ loop: if (error) allerror = error; vput(vp); - simple_lock(&mntvnode_slock); + mutex_enter(&mntvnode_lock); } - simple_unlock(&mntvnode_slock); + mutex_exit(&mntvnode_lock); + vfree(mvp); return (allerror); } diff --git a/sys/fs/sysvbfs/sysvbfs_vfsops.c b/sys/fs/sysvbfs/sysvbfs_vfsops.c index 39fc5739a6fa..334190c6ab5d 100644 --- a/sys/fs/sysvbfs/sysvbfs_vfsops.c +++ b/sys/fs/sysvbfs/sysvbfs_vfsops.c @@ -1,4 +1,4 @@ -/* $NetBSD: sysvbfs_vfsops.c,v 1.20 2007/12/15 00:39:36 perry Exp $ */ +/* $NetBSD: sysvbfs_vfsops.c,v 1.21 2008/01/02 11:48:46 ad Exp $ */ /*- * Copyright (c) 2004 The NetBSD Foundation, Inc. @@ -37,7 +37,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: sysvbfs_vfsops.c,v 1.20 2007/12/15 00:39:36 perry Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sysvbfs_vfsops.c,v 1.21 2008/01/02 11:48:46 ad Exp $"); #include #include @@ -307,11 +307,12 @@ sysvbfs_sync(struct mount *mp, int waitfor, kauth_cred_t cred) DPRINTF("%s:\n", __func__); error = 0; - simple_lock(&mntvnode_slock); + mutex_enter(&mntvnode_lock); for (bnode = LIST_FIRST(&bmp->bnode_head); bnode != NULL; bnode = LIST_NEXT(bnode, link)) { - simple_unlock(&mntvnode_slock); v = bnode->vnode; + mutex_enter(&v->v_interlock); + mutex_exit(&mntvnode_lock); err = vget(v, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK); if (err == 0) { err = VOP_FSYNC(v, cred, FSYNC_WAIT, 0, 0); @@ -319,9 +320,9 @@ sysvbfs_sync(struct mount *mp, int waitfor, kauth_cred_t cred) } if (err != 0) error = err; - simple_lock(&mntvnode_slock); + mutex_enter(&mntvnode_lock); } - simple_unlock(&mntvnode_slock); + mutex_exit(&mntvnode_lock); return error; } @@ -362,9 +363,9 @@ sysvbfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp) vp->v_data = pool_get(&sysvbfs_node_pool, PR_WAITOK); memset(vp->v_data, 0, sizeof(struct sysvbfs_node)); bnode = vp->v_data; - simple_lock(&mntvnode_slock); + mutex_enter(&mntvnode_lock); LIST_INSERT_HEAD(&bmp->bnode_head, bnode, link); - simple_unlock(&mntvnode_slock); + mutex_exit(&mntvnode_lock); bnode->vnode = vp; bnode->bmp = bmp; bnode->inode = inode; diff --git a/sys/fs/sysvbfs/sysvbfs_vnops.c b/sys/fs/sysvbfs/sysvbfs_vnops.c index 30b41b02d243..ec3dcae3f12f 100644 --- a/sys/fs/sysvbfs/sysvbfs_vnops.c +++ b/sys/fs/sysvbfs/sysvbfs_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: sysvbfs_vnops.c,v 1.16 2007/12/15 00:39:36 perry Exp $ */ +/* $NetBSD: sysvbfs_vnops.c,v 1.17 2008/01/02 11:48:46 ad Exp $ */ /*- * Copyright (c) 2004 The NetBSD Foundation, Inc. @@ -37,7 +37,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: sysvbfs_vnops.c,v 1.16 2007/12/15 00:39:36 perry Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sysvbfs_vnops.c,v 1.17 2008/01/02 11:48:46 ad Exp $"); #include #include @@ -572,12 +572,13 @@ sysvbfs_inactive(void *arg) { struct vop_inactive_args /* { struct vnode *a_vp; + bool *a_recycle; } */ *a = arg; struct vnode *v = a->a_vp; DPRINTF("%s:\n", __func__); + *a->a_recycle = true; VOP_UNLOCK(v, 0); - vrecycle(v, NULL, curlwp); return 0; } @@ -593,9 +594,9 @@ sysvbfs_reclaim(void *v) struct sysvbfs_node *bnode = vp->v_data; DPRINTF("%s:\n", __func__); - simple_lock(&mntvnode_slock); + mutex_enter(&mntvnode_lock); LIST_REMOVE(bnode, link); - simple_unlock(&mntvnode_slock); + mutex_exit(&mntvnode_lock); cache_purge(vp); genfs_node_destroy(vp); pool_put(&sysvbfs_node_pool, bnode); diff --git a/sys/fs/tmpfs/tmpfs.h b/sys/fs/tmpfs/tmpfs.h index 0bbe79baf09f..636e41403f94 100644 --- a/sys/fs/tmpfs/tmpfs.h +++ b/sys/fs/tmpfs/tmpfs.h @@ -1,7 +1,7 @@ -/* $NetBSD: tmpfs.h,v 1.29 2007/12/08 19:29:44 pooka Exp $ */ +/* $NetBSD: tmpfs.h,v 1.30 2008/01/02 11:48:46 ad Exp $ */ /* - * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc. + * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -221,14 +221,9 @@ struct tmpfs_node { * * May be NULL when the node is unused (that is, no vnode has been * allocated for it or it has been reclaimed). */ + kmutex_t tn_vlock; struct vnode * tn_vnode; - /* Pointer to the node returned by tmpfs_lookup() after doing a - * delete or a rename lookup; its value is only valid in these two - * situations. In case we were looking up . or .., it holds a null - * pointer. */ - struct tmpfs_dirent * tn_lookup_dirent; - union { /* Valid when tn_type == VBLK || tn_type == VCHR. */ struct { @@ -298,11 +293,11 @@ struct tmpfs_mount { * used directly as it may be bigger than the current amount of * free memory; in the extreme case, it will hold the SIZE_MAX * value. Instead, use the TMPFS_PAGES_MAX macro. */ - size_t tm_pages_max; + u_int tm_pages_max; /* Number of pages in use by the file system. Cannot be bigger * than the value returned by TMPFS_PAGES_MAX in any case. */ - size_t tm_pages_used; + u_int tm_pages_used; /* Pointer to the node representing the root directory of this * file system. */ @@ -314,28 +309,16 @@ struct tmpfs_mount { * cannot be released until the file system is unmounted. * Otherwise, we could easily run out of memory by creating lots * of empty files and then simply removing them. */ - ino_t tm_nodes_max; + u_int tm_nodes_max; /* Number of nodes currently allocated. This number only grows. * When it reaches tm_nodes_max, no more new nodes can be allocated. * Of course, the old, unused ones can be reused. */ - ino_t tm_nodes_last; + u_int tm_nodes_cnt; - /* Nodes are organized in two different lists. The used list - * contains all nodes that are currently used by the file system; - * i.e., they refer to existing files. The available list contains - * all nodes that are currently available for use by new files. - * Nodes must be kept in this list (instead of deleting them) - * because we need to keep track of their generation number (tn_gen - * field). - * - * Note that nodes are lazily allocated: if the available list is - * empty and we have enough space to create more nodes, they will be - * created and inserted in the used list. Once these are released, - * they will go into the available list, remaining alive until the - * file system is unmounted. */ - struct tmpfs_node_list tm_nodes_used; - struct tmpfs_node_list tm_nodes_avail; + /* Node list. */ + kmutex_t tm_lock; + struct tmpfs_node_list tm_nodes; /* Pools used to store file system meta data. These are not shared * across several instances of tmpfs for the reasons described in @@ -466,7 +449,8 @@ TMPFS_PAGES_MAX(struct tmpfs_mount *tmp) } /* Returns the available space for the given file system. */ -#define TMPFS_PAGES_AVAIL(tmp) (TMPFS_PAGES_MAX(tmp) - (tmp)->tm_pages_used) +#define TMPFS_PAGES_AVAIL(tmp) \ + ((ssize_t)(TMPFS_PAGES_MAX(tmp) - (tmp)->tm_pages_used)) /* --------------------------------------------------------------------- */ diff --git a/sys/fs/tmpfs/tmpfs_pool.c b/sys/fs/tmpfs/tmpfs_pool.c index 47445b30fade..eb10888e4584 100644 --- a/sys/fs/tmpfs/tmpfs_pool.c +++ b/sys/fs/tmpfs/tmpfs_pool.c @@ -1,7 +1,7 @@ -/* $NetBSD: tmpfs_pool.c,v 1.11 2007/11/22 21:08:10 pooka Exp $ */ +/* $NetBSD: tmpfs_pool.c,v 1.12 2008/01/02 11:48:46 ad Exp $ */ /* - * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc. + * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -42,10 +42,11 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: tmpfs_pool.c,v 1.11 2007/11/22 21:08:10 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tmpfs_pool.c,v 1.12 2008/01/02 11:48:46 ad Exp $"); #include #include +#include #include @@ -155,15 +156,29 @@ tmpfs_pool_page_alloc(struct pool *pp, int flags) { struct tmpfs_pool *tpp; struct tmpfs_mount *tmp; + u_int pages; + void *page; tpp = (struct tmpfs_pool *)pp; tmp = tpp->tp_mount; - if (TMPFS_PAGES_MAX(tmp) - tmp->tm_pages_used == 0) + pages = atomic_inc_uint_nv(&tmp->tm_pages_used); + if (pages >= TMPFS_PAGES_MAX(tmp)) { + atomic_dec_uint(&tmp->tm_pages_used); return NULL; + } + /* + * tmpfs never specifies PR_WAITOK as we enforce local limits + * on memory allocation. However, we should wait for memory + * to become available if under our limit. XXX The result of + * the TMPFS_PAGES_MAX() check is stale. + */ + page = pool_page_alloc_nointr(pp, flags | PR_WAITOK); + if (page == NULL) { + atomic_dec_uint(&tmp->tm_pages_used); + } - tmp->tm_pages_used += 1; - return pool_page_alloc_nointr(pp, flags); + return page; } /* --------------------------------------------------------------------- */ @@ -177,7 +192,7 @@ tmpfs_pool_page_free(struct pool *pp, void *v) tpp = (struct tmpfs_pool *)pp; tmp = tpp->tp_mount; - tmp->tm_pages_used -= 1; + atomic_dec_uint(&tmp->tm_pages_used); pool_page_free_nointr(pp, v); } diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c index d7ab571bb8e5..cd31e1444fbf 100644 --- a/sys/fs/tmpfs/tmpfs_subr.c +++ b/sys/fs/tmpfs/tmpfs_subr.c @@ -1,7 +1,7 @@ -/* $NetBSD: tmpfs_subr.c,v 1.42 2007/12/08 19:29:45 pooka Exp $ */ +/* $NetBSD: tmpfs_subr.c,v 1.43 2008/01/02 11:48:46 ad Exp $ */ /* - * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc. + * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -42,12 +42,12 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.42 2007/12/08 19:29:45 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.43 2008/01/02 11:48:46 ad Exp $"); #include #include #include -#include +#include #include #include #include @@ -57,6 +57,7 @@ __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.42 2007/12/08 19:29:45 pooka Exp $" #include #include #include +#include #include @@ -66,8 +67,6 @@ __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.42 2007/12/08 19:29:45 pooka Exp $" #include #include -MALLOC_DECLARE(M_TMPFSTMP); - /* --------------------------------------------------------------------- */ /* @@ -109,24 +108,24 @@ tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type, KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL); nnode = NULL; - if (LIST_EMPTY(&tmp->tm_nodes_avail)) { - KASSERT(tmp->tm_nodes_last <= tmp->tm_nodes_max); - if (tmp->tm_nodes_last == tmp->tm_nodes_max) - return ENOSPC; - - nnode = - (struct tmpfs_node *)TMPFS_POOL_GET(&tmp->tm_node_pool, 0); - if (nnode == NULL) - return ENOSPC; - nnode->tn_id = tmp->tm_nodes_last++; - nnode->tn_gen = arc4random(); - } else { - nnode = LIST_FIRST(&tmp->tm_nodes_avail); - LIST_REMOVE(nnode, tn_entries); - nnode->tn_gen++; + if (atomic_inc_uint_nv(&tmp->tm_nodes_cnt) >= tmp->tm_nodes_max) { + atomic_dec_uint(&tmp->tm_nodes_cnt); + return ENOSPC; } - KASSERT(nnode != NULL); - LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries); + + nnode = (struct tmpfs_node *)TMPFS_POOL_GET(&tmp->tm_node_pool, 0); + if (nnode == NULL) { + atomic_dec_uint(&tmp->tm_nodes_cnt); + return ENOSPC; + } + + /* + * XXX Where the pool is backed by a map larger than (4GB * + * sizeof(*nnode)), this may produce duplicate inode numbers + * for applications that do not understand 64-bit ino_t. + */ + nnode->tn_id = (ino_t)((uintptr_t)nnode / sizeof(*nnode)); + nnode->tn_gen = arc4random(); /* Generic initialization. */ nnode->tn_type = type; @@ -157,11 +156,6 @@ tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type, nnode->tn_spec.tn_dir.tn_readdir_lastn = 0; nnode->tn_spec.tn_dir.tn_readdir_lastp = NULL; nnode->tn_links++; - nnode->tn_spec.tn_dir.tn_parent->tn_links++; - if (parent != NULL) { - KASSERT(parent->tn_vnode != NULL); - VN_KNOTE(parent->tn_vnode, NOTE_LINK); - } break; case VFIFO: @@ -175,8 +169,8 @@ tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type, nnode->tn_spec.tn_lnk.tn_link = tmpfs_str_pool_get(&tmp->tm_str_pool, nnode->tn_size, 0); if (nnode->tn_spec.tn_lnk.tn_link == NULL) { - nnode->tn_type = VNON; - tmpfs_free_node(tmp, nnode); + atomic_dec_uint(&tmp->tm_nodes_cnt); + TMPFS_POOL_PUT(&tmp->tm_node_pool, nnode); return ENOSPC; } memcpy(nnode->tn_spec.tn_lnk.tn_link, target, nnode->tn_size); @@ -192,6 +186,12 @@ tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type, KASSERT(0); } + mutex_init(&nnode->tn_vlock, MUTEX_DEFAULT, IPL_NONE); + + mutex_enter(&tmp->tm_lock); + LIST_INSERT_HEAD(&tmp->tm_nodes, nnode, tn_entries); + mutex_exit(&tmp->tm_lock); + *node = nnode; return 0; } @@ -218,56 +218,33 @@ tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type, void tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) { - ino_t id; - unsigned long gen; - size_t pages; + + if (node->tn_type == VREG) { + atomic_add_int(&tmp->tm_pages_used, + -node->tn_spec.tn_reg.tn_aobj_pages); + } + atomic_dec_uint(&tmp->tm_nodes_cnt); + mutex_enter(&tmp->tm_lock); + LIST_REMOVE(node, tn_entries); + mutex_exit(&tmp->tm_lock); switch (node->tn_type) { - case VNON: - /* Do not do anything. VNON is provided to let the - * allocation routine clean itself easily by avoiding - * duplicating code in it. */ - /* FALLTHROUGH */ - case VBLK: - /* FALLTHROUGH */ - case VCHR: - /* FALLTHROUGH */ - case VDIR: - /* FALLTHROUGH */ - case VFIFO: - /* FALLTHROUGH */ - case VSOCK: - pages = 0; - break; - case VLNK: tmpfs_str_pool_put(&tmp->tm_str_pool, node->tn_spec.tn_lnk.tn_link, node->tn_size); - pages = 0; break; case VREG: if (node->tn_spec.tn_reg.tn_aobj != NULL) uao_detach(node->tn_spec.tn_reg.tn_aobj); - pages = node->tn_spec.tn_reg.tn_aobj_pages; break; default: - KASSERT(0); - pages = 0; /* Shut up gcc when !DIAGNOSTIC. */ break; } - tmp->tm_pages_used -= pages; - - LIST_REMOVE(node, tn_entries); - id = node->tn_id; - gen = node->tn_gen; - memset(node, 0, sizeof(struct tmpfs_node)); - node->tn_id = id; - node->tn_type = VNON; - node->tn_gen = gen; - LIST_INSERT_HEAD(&tmp->tm_nodes_avail, node, tn_entries); + mutex_destroy(&node->tn_vlock); + TMPFS_POOL_PUT(&tmp->tm_node_pool, node); } /* --------------------------------------------------------------------- */ @@ -363,30 +340,37 @@ tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, struct vnode **vpp) struct vnode *nvp; struct vnode *vp; - vp = NULL; - - if (node->tn_vnode != NULL) { - vp = node->tn_vnode; - vget(vp, LK_EXCLUSIVE | LK_RETRY); - error = 0; - goto out; + /* If there is already a vnode, then lock it. */ + for (;;) { + mutex_enter(&node->tn_vlock); + if ((vp = node->tn_vnode) != NULL) { + mutex_enter(&vp->v_interlock); + mutex_exit(&node->tn_vlock); + error = vget(vp, LK_EXCLUSIVE | LK_INTERLOCK); + if (error == ENOENT) { + /* vnode was reclaimed. */ + continue; + } + *vpp = vp; + return error; + } + break; } /* Get a new vnode and associate it with our node. */ error = getnewvnode(VT_TMPFS, mp, tmpfs_vnodeop_p, &vp); - if (error != 0) - goto out; - KASSERT(vp != NULL); + if (error != 0) { + mutex_exit(&node->tn_vlock); + return error; + } error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (error != 0) { - vp->v_data = NULL; + mutex_exit(&node->tn_vlock); ungetnewvnode(vp); - vp = NULL; - goto out; + return error; } - vp->v_data = node; vp->v_type = node->tn_type; /* Type-specific initialization. */ @@ -398,24 +382,21 @@ tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, struct vnode **vpp) nvp = checkalias(vp, node->tn_spec.tn_dev.tn_rdev, mp); if (nvp != NULL) { /* Discard unneeded vnode, but save its inode. */ - nvp->v_data = vp->v_data; - vp->v_data = NULL; + nvp->v_data = node; /* XXX spec_vnodeops has no locking, so we have to * do it explicitly. */ vp->v_vflag &= ~VV_LOCKSWORK; VOP_UNLOCK(vp, 0); vp->v_op = spec_vnodeop_p; - vrele(vp); vgone(vp); /* Reinitialize aliased node. */ vp = nvp; error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (error != 0) { - vp->v_data = NULL; - vp = NULL; - goto out; + mutex_exit(&node->tn_vlock); + return error; } } break; @@ -441,11 +422,10 @@ tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, struct vnode **vpp) } uvm_vnp_setsize(vp, node->tn_size); - - error = 0; - -out: - *vpp = node->tn_vnode = vp; + vp->v_data = node; + node->tn_vnode = vp; + mutex_exit(&node->tn_vlock); + *vpp = vp; KASSERT(IFF(error == 0, *vpp != NULL && VOP_ISLOCKED(*vpp))); KASSERT(*vpp == node->tn_vnode); @@ -466,7 +446,9 @@ tmpfs_free_vp(struct vnode *vp) node = VP_TO_TMPFS_NODE(vp); + mutex_enter(&node->tn_vlock); node->tn_vnode = NULL; + mutex_exit(&node->tn_vlock); vp->v_data = NULL; } @@ -542,13 +524,17 @@ tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, * insert the new node into the directory, an operation that * cannot fail. */ tmpfs_dir_attach(dvp, de); + if (vap->va_type == VDIR) { + VN_KNOTE(dvp, NOTE_LINK); + dnode->tn_links++; + KASSERT(dnode->tn_links <= LINK_MAX); + } out: if (error != 0 || !(cnp->cn_flags & SAVESTART)) PNBUF_PUT(cnp->cn_pnbuf); vput(dvp); - KASSERT(!VOP_ISLOCKED(dvp)); KASSERT(IFF(error == 0, *vpp != NULL)); return error; @@ -667,7 +653,7 @@ tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio) TMPFS_VALIDATE_DIR(node); KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); - dentp = malloc(sizeof(struct dirent), M_TMPFSTMP, M_WAITOK | M_ZERO); + dentp = kmem_zalloc(sizeof(struct dirent), KM_SLEEP); dentp->d_fileno = node->tn_id; dentp->d_type = DT_DIR; @@ -686,7 +672,7 @@ tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio) node->tn_status |= TMPFS_NODE_ACCESSED; - free(dentp, M_TMPFSTMP); + kmem_free(dentp, sizeof(struct dirent)); return error; } @@ -708,7 +694,7 @@ tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio) TMPFS_VALIDATE_DIR(node); KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); - dentp = malloc(sizeof(struct dirent), M_TMPFSTMP, M_WAITOK | M_ZERO); + dentp = kmem_zalloc(sizeof(struct dirent), KM_SLEEP); dentp->d_fileno = node->tn_spec.tn_dir.tn_parent->tn_id; dentp->d_type = DT_DIR; @@ -735,7 +721,7 @@ tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio) node->tn_status |= TMPFS_NODE_ACCESSED; - free(dentp, M_TMPFSTMP); + kmem_free(dentp, sizeof(struct dirent)); return error; } @@ -797,7 +783,7 @@ tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp) return EINVAL; } - dentp = malloc(sizeof(struct dirent), M_TMPFSTMP, M_WAITOK | M_ZERO); + dentp = kmem_zalloc(sizeof(struct dirent), KM_SLEEP); /* Read as much entries as possible; i.e., until we reach the end of * the directory or we exhaust uio space. */ @@ -871,7 +857,7 @@ tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp) node->tn_status |= TMPFS_NODE_ACCESSED; - free(dentp, M_TMPFSTMP); + kmem_free(dentp, sizeof(struct dirent)); return error; } @@ -892,7 +878,7 @@ int tmpfs_reg_resize(struct vnode *vp, off_t newsize) { int error; - size_t newpages, oldpages; + u_int newpages, oldpages; struct tmpfs_mount *tmp; struct tmpfs_node *node; off_t oldsize; @@ -913,28 +899,15 @@ tmpfs_reg_resize(struct vnode *vp, off_t newsize) newpages = round_page(newsize) / PAGE_SIZE; if (newpages > oldpages && - newpages - oldpages > TMPFS_PAGES_AVAIL(tmp)) { + (ssize_t)(newpages - oldpages) > TMPFS_PAGES_AVAIL(tmp)) { error = ENOSPC; goto out; } + atomic_add_int(&tmp->tm_pages_used, newpages - oldpages); if (newsize < oldsize) { int zerolen = MIN(round_page(newsize), node->tn_size) - newsize; - /* - * free "backing store" - */ - - if (newpages < oldpages) { - struct uvm_object *uobj; - - uobj = node->tn_spec.tn_reg.tn_aobj; - - simple_lock(&uobj->vmobjlock); - uao_dropswap_range(uobj, newpages, oldpages); - simple_unlock(&uobj->vmobjlock); - } - /* * zero out the truncated part of the last page. */ @@ -946,7 +919,19 @@ tmpfs_reg_resize(struct vnode *vp, off_t newsize) node->tn_size = newsize; uvm_vnp_setsize(vp, newsize); - tmp->tm_pages_used += (newpages - oldpages); + /* + * free "backing store" + */ + + if (newpages < oldpages) { + struct uvm_object *uobj; + + uobj = node->tn_spec.tn_reg.tn_aobj; + + mutex_enter(&uobj->vmobjlock); + uao_dropswap_range(uobj, newpages, oldpages); + mutex_exit(&uobj->vmobjlock); + } error = 0; diff --git a/sys/fs/tmpfs/tmpfs_vfsops.c b/sys/fs/tmpfs/tmpfs_vfsops.c index 02cf3d7eb413..e7af958cc987 100644 --- a/sys/fs/tmpfs/tmpfs_vfsops.c +++ b/sys/fs/tmpfs/tmpfs_vfsops.c @@ -1,7 +1,7 @@ -/* $NetBSD: tmpfs_vfsops.c,v 1.33 2007/12/08 19:29:45 pooka Exp $ */ +/* $NetBSD: tmpfs_vfsops.c,v 1.34 2008/01/02 11:48:47 ad Exp $ */ /* - * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc. + * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -49,11 +49,11 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: tmpfs_vfsops.c,v 1.33 2007/12/08 19:29:45 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tmpfs_vfsops.c,v 1.34 2008/01/02 11:48:47 ad Exp $"); #include #include -#include +#include #include #include #include @@ -62,9 +62,6 @@ __KERNEL_RCSID(0, "$NetBSD: tmpfs_vfsops.c,v 1.33 2007/12/08 19:29:45 pooka Exp #include -MALLOC_JUSTDEFINE(M_TMPFSMNT, "tmpfs mount", "tmpfs mount structures"); -MALLOC_JUSTDEFINE(M_TMPFSTMP, "tmpfs temp", "tmpfs temporary structures"); - /* --------------------------------------------------------------------- */ static int tmpfs_mount(struct mount *, const char *, void *, size_t *); @@ -149,14 +146,15 @@ tmpfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) KASSERT(nodes >= 3); /* Allocate the tmpfs mount structure and fill it. */ - tmp = (struct tmpfs_mount *)malloc(sizeof(struct tmpfs_mount), - M_TMPFSMNT, M_WAITOK); - KASSERT(tmp != NULL); + tmp = kmem_alloc(sizeof(struct tmpfs_mount), KM_SLEEP); + if (tmp == NULL) + return ENOMEM; tmp->tm_nodes_max = nodes; - tmp->tm_nodes_last = 2; - LIST_INIT(&tmp->tm_nodes_used); - LIST_INIT(&tmp->tm_nodes_avail); + tmp->tm_nodes_cnt = 0; + LIST_INIT(&tmp->tm_nodes); + + mutex_init(&tmp->tm_lock, MUTEX_DEFAULT, IPL_NONE); tmp->tm_pages_max = pages; tmp->tm_pages_used = 0; @@ -171,6 +169,7 @@ tmpfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) args->ta_root_gid, args->ta_root_mode & ALLPERMS, NULL, NULL, VNOVAL, &root); KASSERT(error == 0 && root != NULL); + root->tn_links++; tmp->tm_root = root; mp->mnt_data = tmp; @@ -178,6 +177,7 @@ tmpfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) mp->mnt_stat.f_namemax = MAXNAMLEN; mp->mnt_fs_bshift = PAGE_SHIFT; mp->mnt_dev_bshift = DEV_BSHIFT; + mp->mnt_iflag |= IMNT_MPSAFE; vfs_getnewfsid(mp); return set_statvfs_info(path, UIO_USERSPACE, "tmpfs", UIO_SYSSPACE, @@ -220,7 +220,7 @@ tmpfs_unmount(struct mount *mp, int mntflags) * a directory, we free all its directory entries. Note that after * freeing a node, it will automatically go to the available list, * so we will later have to iterate over it to release its items. */ - node = LIST_FIRST(&tmp->tm_nodes_used); + node = LIST_FIRST(&tmp->tm_nodes); while (node != NULL) { struct tmpfs_node *next; @@ -243,15 +243,6 @@ tmpfs_unmount(struct mount *mp, int mntflags) tmpfs_free_node(tmp, node); node = next; } - node = LIST_FIRST(&tmp->tm_nodes_avail); - while (node != NULL) { - struct tmpfs_node *next; - - next = LIST_NEXT(node, tn_entries); - LIST_REMOVE(node, tn_entries); - TMPFS_POOL_PUT(&tmp->tm_node_pool, node); - node = next; - } tmpfs_pool_destroy(&tmp->tm_dirent_pool); tmpfs_pool_destroy(&tmp->tm_node_pool); @@ -260,7 +251,8 @@ tmpfs_unmount(struct mount *mp, int mntflags) KASSERT(tmp->tm_pages_used == 0); /* Throw away the tmpfs_mount structure. */ - free(mp->mnt_data, M_TMPFSMNT); + mutex_destroy(&tmp->tm_lock); + kmem_free(tmp, sizeof(*tmp)); mp->mnt_data = NULL; return 0; @@ -307,14 +299,17 @@ tmpfs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp) return EINVAL; found = false; - LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { + mutex_enter(&tmp->tm_lock); + LIST_FOREACH(node, &tmp->tm_nodes, tn_entries) { if (node->tn_id == tfh.tf_id && node->tn_gen == tfh.tf_gen) { found = true; break; } } + mutex_exit(&tmp->tm_lock); + /* XXXAD nothing to prevent 'node' from being removed. */ return found ? tmpfs_alloc_vp(mp, node, vpp) : EINVAL; } @@ -348,9 +343,8 @@ tmpfs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size) static int tmpfs_statvfs(struct mount *mp, struct statvfs *sbp) { - fsfilcnt_t freenodes, usednodes; + fsfilcnt_t freenodes; struct tmpfs_mount *tmp; - struct tmpfs_node *dummy; tmp = VFS_TO_TMPFS(mp); @@ -360,16 +354,10 @@ tmpfs_statvfs(struct mount *mp, struct statvfs *sbp) sbp->f_bavail = sbp->f_bfree = TMPFS_PAGES_AVAIL(tmp); sbp->f_bresvd = 0; - freenodes = MIN(tmp->tm_nodes_max - tmp->tm_nodes_last, + freenodes = MIN(tmp->tm_nodes_max - tmp->tm_nodes_cnt, TMPFS_PAGES_AVAIL(tmp) * PAGE_SIZE / sizeof(struct tmpfs_node)); - LIST_FOREACH(dummy, &tmp->tm_nodes_avail, tn_entries) - freenodes++; - usednodes = 0; - LIST_FOREACH(dummy, &tmp->tm_nodes_used, tn_entries) - usednodes++; - - sbp->f_files = freenodes + usednodes; + sbp->f_files = tmp->tm_nodes_cnt + freenodes; sbp->f_favail = sbp->f_ffree = freenodes; sbp->f_fresvd = 0; @@ -395,8 +383,6 @@ static void tmpfs_init(void) { - malloc_type_attach(M_TMPFSMNT); - malloc_type_attach(M_TMPFSTMP); } /* --------------------------------------------------------------------- */ @@ -405,8 +391,6 @@ static void tmpfs_done(void) { - malloc_type_detach(M_TMPFSTMP); - malloc_type_detach(M_TMPFSMNT); } /* --------------------------------------------------------------------- */ diff --git a/sys/fs/tmpfs/tmpfs_vnops.c b/sys/fs/tmpfs/tmpfs_vnops.c index ee5ba8fb564d..bddf950b5e30 100644 --- a/sys/fs/tmpfs/tmpfs_vnops.c +++ b/sys/fs/tmpfs/tmpfs_vnops.c @@ -1,7 +1,7 @@ -/* $NetBSD: tmpfs_vnops.c,v 1.44 2007/11/26 19:01:55 pooka Exp $ */ +/* $NetBSD: tmpfs_vnops.c,v 1.45 2008/01/02 11:48:47 ad Exp $ */ /* - * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc. + * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -42,7 +42,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.44 2007/11/26 19:01:55 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.45 2008/01/02 11:48:47 ad Exp $"); #include #include @@ -169,11 +169,9 @@ tmpfs_lookup(void *v) dnode->tn_spec.tn_dir.tn_parent, vpp); vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); - dnode->tn_spec.tn_dir.tn_parent->tn_lookup_dirent = NULL; } else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') { VREF(dvp); *vpp = dvp; - dnode->tn_lookup_dirent = NULL; error = 0; } else { de = tmpfs_dir_lookup(dnode, cnp); @@ -229,8 +227,8 @@ tmpfs_lookup(void *v) error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred); if (error != 0) goto out; - tnode->tn_lookup_dirent = de; - } + } else + de = NULL; /* Allocate a new vnode on the matching entry. */ error = tmpfs_alloc_vp(dvp->v_mount, tnode, vpp); @@ -240,7 +238,8 @@ tmpfs_lookup(void *v) /* Store the result of this lookup in the cache. Avoid this if the * request was for creation, as it does not improve timings on * emprical tests. */ - if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE) + if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE && + (cnp->cn_flags & ISDOTDOT) == 0) cache_enter(dvp, *vpp, cnp); out: @@ -651,6 +650,7 @@ tmpfs_remove(void *v) { struct vnode *dvp = ((struct vop_remove_args *)v)->a_dvp; struct vnode *vp = ((struct vop_remove_args *)v)->a_vp; + struct componentname *cnp = (((struct vop_remove_args *)v)->a_cnp); int error; struct tmpfs_dirent *de; @@ -669,8 +669,12 @@ tmpfs_remove(void *v) dnode = VP_TO_TMPFS_DIR(dvp); node = VP_TO_TMPFS_NODE(vp); tmp = VFS_TO_TMPFS(vp->v_mount); - de = node->tn_lookup_dirent; - KASSERT(de != NULL); + de = tmpfs_dir_lookup(dnode, cnp); + if (de == NULL) { + error = ENOENT; + goto out; + } + KASSERT(de->td_node == node); /* Files marked as immutable or append-only cannot be deleted. */ if (node->tn_flags & (IMMUTABLE | APPEND)) { @@ -696,8 +700,6 @@ out: else vput(dvp); - KASSERT(!VOP_ISLOCKED(dvp)); - return error; } @@ -716,7 +718,6 @@ tmpfs_link(void *v) struct tmpfs_node *node; KASSERT(VOP_ISLOCKED(dvp)); - KASSERT(!VOP_ISLOCKED(vp)); KASSERT(cnp->cn_flags & HASBUF); KASSERT(dvp != vp); /* XXX When can this be false? */ @@ -727,7 +728,7 @@ tmpfs_link(void *v) * needs the vnode to be locked. */ error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (error != 0) - goto out; + goto out1; /* XXX: Why aren't the following two tests done by the caller? */ @@ -773,17 +774,12 @@ tmpfs_link(void *v) error = 0; out: - if (VOP_ISLOCKED(vp)) - VOP_UNLOCK(vp, 0); - + VOP_UNLOCK(vp, 0); +out1: PNBUF_PUT(cnp->cn_pnbuf); vput(dvp); - /* XXX Locking status of dvp does not match manual page. */ - KASSERT(!VOP_ISLOCKED(dvp)); - KASSERT(!VOP_ISLOCKED(vp)); - return error; } @@ -801,33 +797,52 @@ tmpfs_rename(void *v) char *newname; int error; - struct tmpfs_dirent *de; + struct tmpfs_dirent *de, *de2; struct tmpfs_mount *tmp; struct tmpfs_node *fdnode; struct tmpfs_node *fnode; struct tmpfs_node *tnode; struct tmpfs_node *tdnode; + size_t namelen; KASSERT(VOP_ISLOCKED(tdvp)); - KASSERT(IMPLIES(tvp != NULL, VOP_ISLOCKED(tvp))); + KASSERT(IMPLIES(tvp != NULL, VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); KASSERT(fcnp->cn_flags & HASBUF); KASSERT(tcnp->cn_flags & HASBUF); - fdnode = VP_TO_TMPFS_DIR(fdvp); - fnode = VP_TO_TMPFS_NODE(fvp); - tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp); - de = fnode->tn_lookup_dirent; + newname = NULL; + namelen = 0; + tmp = NULL; - /* Disallow cross-device renames. - * XXX Why isn't this done by the caller? */ + /* Disallow cross-device renames. */ if (fvp->v_mount != tdvp->v_mount || (tvp != NULL && fvp->v_mount != tvp->v_mount)) { error = EXDEV; - goto out; + goto out_unlocked; } - tmp = VFS_TO_TMPFS(tdvp->v_mount); + fnode = VP_TO_TMPFS_NODE(fvp); + fdnode = VP_TO_TMPFS_DIR(fdvp); + tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp); tdnode = VP_TO_TMPFS_DIR(tdvp); + tmp = VFS_TO_TMPFS(tdvp->v_mount); + + /* If we need to move the directory between entries, lock the + * source so that we can safely operate on it. */ + + /* XXX: this is a potential locking order violation! */ + if (fdnode != tdnode) { + error = vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY); + if (error != 0) + goto out_unlocked; + } + + de = tmpfs_dir_lookup(fdnode, fcnp); + if (de == NULL) { + error = ENOENT; + goto out; + } + KASSERT(de->td_node == fnode); /* If source and target are the same file, there is nothing to do. */ if (fvp == tvp) { @@ -863,44 +878,17 @@ tmpfs_rename(void *v) } } - /* If we need to move the directory between entries, lock the - * source so that we can safely operate on it. */ - - /* XXX: this is a potential locking order violation! */ - if (fdnode != tdnode) { - error = vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY); - if (error != 0) - goto out; - } - - /* Make sure we have the correct cached dirent */ - fcnp->cn_flags &= ~(MODMASK | SAVESTART); - fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; - if ((error = relookup(fdvp, &fvp, fcnp))) { - goto out_locked; - } - KASSERT(fvp != NULL); - /* Relookup always returns with vpp locked and 1UP referenced */ - VOP_UNLOCK(fvp, 0); - vrele(((struct vop_rename_args *)v)->a_fvp); - - /* Reacquire values. fvp might have changed. Since we only - * used fvp to sanitycheck fcnp values above, we can do this. */ - fnode = VP_TO_TMPFS_NODE(fvp); - de = fnode->tn_lookup_dirent; - /* Ensure that we have enough memory to hold the new name, if it * has to be changed. */ + namelen = tcnp->cn_namelen; if (fcnp->cn_namelen != tcnp->cn_namelen || memcmp(fcnp->cn_nameptr, tcnp->cn_nameptr, fcnp->cn_namelen) != 0) { - newname = tmpfs_str_pool_get(&tmp->tm_str_pool, - tcnp->cn_namelen, 0); + newname = tmpfs_str_pool_get(&tmp->tm_str_pool, namelen, 0); if (newname == NULL) { error = ENOSPC; - goto out_locked; + goto out; } - } else - newname = NULL; + } /* If the node is being moved to another directory, we have to do * the move. */ @@ -917,7 +905,7 @@ tmpfs_rename(void *v) while (n != n->tn_spec.tn_dir.tn_parent) { if (n == fnode) { error = EINVAL; - goto out_locked; + goto out; } n = n->tn_spec.tn_dir.tn_parent; } @@ -944,6 +932,37 @@ tmpfs_rename(void *v) VN_KNOTE(fdvp, NOTE_WRITE); } + /* If we are overwriting an entry, we have to remove the old one + * from the target directory. */ + if (tvp != NULL) { + KASSERT(tnode != NULL); + + /* Remove the old entry from the target directory. + * Note! This relies on tmpfs_dir_attach() putting the new + * node on the end of the target's node list. */ + de2 = tmpfs_dir_lookup(tdnode, tcnp); + KASSERT(de2 != NULL); +/* XXXREMOVEME */ + if (de2 == de) { + panic("tmpfs_rename: to self 1"); + } + if (de2->td_node == de->td_node) { + panic("tmpfs_rename: to self 2"); + } + if (de2->td_node != tnode) { + panic("tmpfs_rename: found wrong entry [%s]", + tcnp->cn_nameptr); + } +/* XXXREMOVEME */ + KASSERT(de2->td_node == tnode); + tmpfs_dir_detach(tdvp, de2); + + /* Free the directory entry we just deleted. Note that the + * node referred by it will not be removed until the vnode is + * really reclaimed. */ + tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), de2, true); + } + /* If the name has changed, we need to make it effective by changing * it in the directory entry. */ if (newname != NULL) { @@ -952,29 +971,15 @@ tmpfs_rename(void *v) tmpfs_str_pool_put(&tmp->tm_str_pool, de->td_name, de->td_namelen); - de->td_namelen = (uint16_t)tcnp->cn_namelen; - memcpy(newname, tcnp->cn_nameptr, tcnp->cn_namelen); + de->td_namelen = (uint16_t)namelen; + memcpy(newname, tcnp->cn_nameptr, namelen); de->td_name = newname; + newname = NULL; fnode->tn_status |= TMPFS_NODE_CHANGED; tdnode->tn_status |= TMPFS_NODE_MODIFIED; } - /* If we are overwriting an entry, we have to remove the old one - * from the target directory. */ - if (tvp != NULL) { - KASSERT(tnode != NULL); - - /* Remove the old entry from the target directory. */ - de = tnode->tn_lookup_dirent; - tmpfs_dir_detach(tdvp, de); - - /* Free the directory entry we just deleted. Note that the - * node referred by it will not be removed until the vnode is - * really reclaimed. */ - tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), de, true); - } - /* Notify listeners of tdvp about the change in the directory (either * because a new entry was added or because one was removed) and * listeners of fvp about the rename. */ @@ -983,11 +988,11 @@ tmpfs_rename(void *v) error = 0; -out_locked: + out: if (fdnode != tdnode) VOP_UNLOCK(fdvp, 0); -out: + out_unlocked: /* Release target nodes. */ if (tdvp == tvp) vrele(tdvp); @@ -1000,6 +1005,9 @@ out: vrele(fdvp); vrele(fvp); + if (newname != NULL) + tmpfs_str_pool_put(&tmp->tm_str_pool, newname, namelen); + return error; } @@ -1025,6 +1033,7 @@ tmpfs_rmdir(void *v) { struct vnode *dvp = ((struct vop_rmdir_args *)v)->a_dvp; struct vnode *vp = ((struct vop_rmdir_args *)v)->a_vp; + struct componentname *cnp = ((struct vop_rmdir_args *)v)->a_cnp; int error; struct tmpfs_dirent *de; @@ -1051,12 +1060,13 @@ tmpfs_rmdir(void *v) * We checked for that above so this is safe now. */ KASSERT(node->tn_spec.tn_dir.tn_parent == dnode); - /* Get the directory entry associated with node (vp). This was - * filled by tmpfs_lookup while looking up the entry. */ - de = node->tn_lookup_dirent; - KASSERT(TMPFS_DIRENT_MATCHES(de, - ((struct vop_rmdir_args *)v)->a_cnp->cn_nameptr, - ((struct vop_rmdir_args *)v)->a_cnp->cn_namelen)); + /* Get the directory entry associated with node (vp). */ + de = tmpfs_dir_lookup(dnode, cnp); + if (de == NULL) { + error = ENOENT; + goto out; + } + KASSERT(de->td_node == node); /* Check flags to see if we are allowed to remove the directory. */ if (dnode->tn_flags & APPEND || node->tn_flags & (IMMUTABLE | APPEND)) { @@ -1082,6 +1092,7 @@ tmpfs_rmdir(void *v) * reclaimed. */ tmpfs_free_dirent(tmp, de, true); + KASSERT(node->tn_links == 0); out: /* Release the nodes. */ vput(dvp); @@ -1244,20 +1255,15 @@ int tmpfs_inactive(void *v) { struct vnode *vp = ((struct vop_inactive_args *)v)->a_vp; - nlink_t links; struct tmpfs_node *node; KASSERT(VOP_ISLOCKED(vp)); node = VP_TO_TMPFS_NODE(vp); - links = node->tn_links; - + *((struct vop_inactive_args *)v)->a_recycle = (node->tn_links == 0); VOP_UNLOCK(vp, 0); - if (links == 0) - vrecycle(vp, NULL, curlwp); - return 0; } @@ -1271,8 +1277,6 @@ tmpfs_reclaim(void *v) struct tmpfs_mount *tmp; struct tmpfs_node *node; - KASSERT(!VOP_ISLOCKED(vp)); - node = VP_TO_TMPFS_NODE(vp); tmp = VFS_TO_TMPFS(vp->v_mount); @@ -1285,7 +1289,6 @@ tmpfs_reclaim(void *v) if (node->tn_links == 0) tmpfs_free_node(tmp, node); - KASSERT(!VOP_ISLOCKED(vp)); KASSERT(vp->v_data == NULL); return 0; @@ -1405,7 +1408,7 @@ tmpfs_getpages(void *v) int npages = *count; KASSERT(vp->v_type == VREG); - LOCK_ASSERT(simple_lock_held(&vp->v_interlock)); + KASSERT(mutex_owned(&vp->v_interlock)); node = VP_TO_TMPFS_NODE(vp); uobj = node->tn_spec.tn_reg.tn_aobj; @@ -1414,7 +1417,7 @@ tmpfs_getpages(void *v) if (vp->v_size <= offset + (centeridx << PAGE_SHIFT)) { if ((flags & PGO_LOCKED) == 0) - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); return EINVAL; } @@ -1433,7 +1436,7 @@ tmpfs_getpages(void *v) node->tn_status |= TMPFS_NODE_MODIFIED; } - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); /* * Make sure that the array on which we will store the @@ -1448,7 +1451,7 @@ tmpfs_getpages(void *v) if (m != NULL) for (i = 0; i < npages; i++) m[i] = NULL; - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); error = (*uobj->pgops->pgo_get)(uobj, offset, m, &npages, centeridx, access_type, advice, flags | PGO_ALLPAGES); #if defined(DEBUG) @@ -1478,19 +1481,19 @@ tmpfs_putpages(void *v) struct tmpfs_node *node; struct uvm_object *uobj; - LOCK_ASSERT(simple_lock_held(&vp->v_interlock)); + KASSERT(mutex_owned(&vp->v_interlock)); node = VP_TO_TMPFS_NODE(vp); if (vp->v_type != VREG) { - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); return 0; } uobj = node->tn_spec.tn_reg.tn_aobj; - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); error = (*uobj->pgops->pgo_put)(uobj, offlo, offhi, flags); /* XXX mtime */ diff --git a/sys/fs/udf/udf_subr.c b/sys/fs/udf/udf_subr.c index 42cec501df16..1309569266dc 100644 --- a/sys/fs/udf/udf_subr.c +++ b/sys/fs/udf/udf_subr.c @@ -1,4 +1,4 @@ -/* $NetBSD: udf_subr.c,v 1.43 2007/12/11 12:05:27 lukem Exp $ */ +/* $NetBSD: udf_subr.c,v 1.44 2008/01/02 11:48:47 ad Exp $ */ /* * Copyright (c) 2006 Reinoud Zandijk @@ -36,7 +36,7 @@ #include #ifndef lint -__KERNEL_RCSID(0, "$NetBSD: udf_subr.c,v 1.43 2007/12/11 12:05:27 lukem Exp $"); +__KERNEL_RCSID(0, "$NetBSD: udf_subr.c,v 1.44 2008/01/02 11:48:47 ad Exp $"); #endif /* not lint */ @@ -1783,7 +1783,7 @@ loop: unp->loc.loc.part_num == icbptr->loc.part_num) { vp = unp->vnode; assert(vp); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); mutex_exit(&ump->ihash_lock); if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) goto loop; @@ -2709,27 +2709,32 @@ udf_read_file_extent(struct udf_node *node, uint32_t from, uint32_t sectors, uint8_t *blob) { - struct buf buf; + struct buf *buf; uint32_t sector_size; + int rv; - BUF_INIT(&buf); + buf = getiobuf(NULL, true); sector_size = node->ump->discinfo.sector_size; - buf.b_bufsize = sectors * sector_size; - buf.b_data = blob; - buf.b_bcount = buf.b_bufsize; - buf.b_resid = buf.b_bcount; - buf.b_flags = B_BUSY | B_READ; - buf.b_vp = node->vnode; - buf.b_proc = NULL; + buf->b_bufsize = sectors * sector_size; + buf->b_data = blob; + buf->b_bcount = buf->b_bufsize; + buf->b_resid = buf->b_bcount; + buf->b_cflags = BC_BUSY; + buf->b_flags = B_READ; + buf->b_vp = node->vnode; + buf->b_proc = NULL; - buf.b_blkno = from; - buf.b_lblkno = 0; - BIO_SETPRIO(&buf, BPRIO_TIMELIMITED); + buf->b_blkno = from; + buf->b_lblkno = 0; + BIO_SETPRIO(buf, BPRIO_TIMELIMITED); - udf_read_filebuf(node, &buf); - return biowait(&buf); + udf_read_filebuf(node, buf); + rv = biowait(buf); + putiobuf(buf); + + return rv; } @@ -2835,7 +2840,7 @@ udf_read_filebuf(struct udf_node *node, struct buf *buf) rbuflen = run_length * sector_size; rblk = run_start * (sector_size/DEV_BSIZE); - nestbuf = getiobuf(); + nestbuf = getiobuf(NULL, true); nestiobuf_setup(buf, nestbuf, buf_offset, rbuflen); /* nestbuf is B_ASYNC */ diff --git a/sys/fs/udf/udf_vnops.c b/sys/fs/udf/udf_vnops.c index e50a02d8e5d8..3277616d53a8 100644 --- a/sys/fs/udf/udf_vnops.c +++ b/sys/fs/udf/udf_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: udf_vnops.c,v 1.14 2007/12/11 12:05:28 lukem Exp $ */ +/* $NetBSD: udf_vnops.c,v 1.15 2008/01/02 11:48:47 ad Exp $ */ /* * Copyright (c) 2006 Reinoud Zandijk @@ -36,7 +36,7 @@ #include #ifndef lint -__KERNEL_RCSID(0, "$NetBSD: udf_vnops.c,v 1.14 2007/12/11 12:05:28 lukem Exp $"); +__KERNEL_RCSID(0, "$NetBSD: udf_vnops.c,v 1.15 2008/01/02 11:48:47 ad Exp $"); #endif /* not lint */ @@ -842,11 +842,11 @@ udf_close(void *v) DPRINTF(CALL, ("udf_close called\n")); udf_node = udf_node; /* shut up gcc */ - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); if (vp->v_usecount > 1) { /* TODO update times */ } - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); return 0; } diff --git a/sys/fs/union/union.h b/sys/fs/union/union.h index 219eb9fe816d..aff9f53a4386 100644 --- a/sys/fs/union/union.h +++ b/sys/fs/union/union.h @@ -1,4 +1,4 @@ -/* $NetBSD: union.h,v 1.16 2007/12/08 19:29:45 pooka Exp $ */ +/* $NetBSD: union.h,v 1.17 2008/01/02 11:48:47 ad Exp $ */ /* * Copyright (c) 1994 The Regents of the University of California. @@ -129,8 +129,6 @@ struct union_node { #define UN_ULOCK 0x04 /* Upper node is locked */ #define UN_KLOCK 0x08 /* Keep upper node locked on vput */ #define UN_CACHED 0x10 /* In union cache */ -#define UN_DRAINING 0x20 /* upper node lock is draining */ -#define UN_DRAINED 0x40 /* upper node lock is drained */ extern int union_allocvp(struct vnode **, struct mount *, struct vnode *, struct vnode *, diff --git a/sys/fs/union/union_vnops.c b/sys/fs/union/union_vnops.c index 0a25ac6c232b..6759314faab7 100644 --- a/sys/fs/union/union_vnops.c +++ b/sys/fs/union/union_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: union_vnops.c,v 1.24 2007/12/08 19:29:46 pooka Exp $ */ +/* $NetBSD: union_vnops.c,v 1.25 2008/01/02 11:48:48 ad Exp $ */ /* * Copyright (c) 1992, 1993, 1994, 1995 @@ -72,7 +72,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: union_vnops.c,v 1.24 2007/12/08 19:29:46 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: union_vnops.c,v 1.25 2008/01/02 11:48:48 ad Exp $"); #include #include @@ -1093,7 +1093,7 @@ union_revoke(v) VOP_REVOKE(UPPERVP(vp), ap->a_flags); if (LOWERVP(vp)) VOP_REVOKE(LOWERVP(vp), ap->a_flags); - vgone(vp); + vgone(vp); /* XXXAD?? */ return (0); } @@ -1619,6 +1619,7 @@ union_inactive(v) struct vop_inactive_args /* { const struct vnodeop_desc *a_desc; struct vnode *a_vp; + bool *a_recycle; } */ *ap = v; struct vnode *vp = ap->a_vp; struct union_node *un = VTOUNION(vp); @@ -1644,11 +1645,9 @@ union_inactive(v) un->un_dircache = 0; } + *ap->a_recycle = ((un->un_flags & UN_CACHED) == 0); VOP_UNLOCK(vp, 0); - if ((un->un_flags & UN_CACHED) == 0) - vgone(vp); - return (0); } @@ -1677,9 +1676,6 @@ union_lock(v) int flags = ap->a_flags; struct union_node *un; int error; -#ifdef DIAGNOSTIC - int drain = 0; -#endif /* XXX unionfs can't handle shared locks yet */ if ((flags & LK_TYPE_MASK) == LK_SHARED) { @@ -1699,30 +1695,6 @@ union_lock(v) flags &= ~LK_INTERLOCK; un = VTOUNION(vp); -#ifdef DIAGNOSTIC - if (un->un_flags & (UN_DRAINING|UN_DRAINED)) { - if (un->un_flags & UN_DRAINED) - panic("union: %p: warning: locking decommissioned lock", vp); - if ((flags & LK_TYPE_MASK) != LK_RELEASE) - panic("union: %p: non-release on draining lock: %d", - vp, flags & LK_TYPE_MASK); - un->un_flags &= ~UN_DRAINING; - if ((flags & LK_REENABLE) == 0) - un->un_flags |= UN_DRAINED; - } -#endif - - /* - * Don't pass DRAIN through to sub-vnode lock; keep track of - * DRAIN state at this level, and just get an exclusive lock - * on the underlying vnode. - */ - if ((flags & LK_TYPE_MASK) == LK_DRAIN) { -#ifdef DIAGNOSTIC - drain = 1; -#endif - flags = LK_EXCLUSIVE | (flags & ~LK_TYPE_MASK); - } start: un = VTOUNION(vp); @@ -1764,8 +1736,6 @@ start: un->un_pid = curproc->p_pid; else un->un_pid = -1; - if (drain) - un->un_flags |= UN_DRAINING; #endif un->un_flags |= UN_LOCKED; @@ -1799,8 +1769,6 @@ union_unlock(v) if (curproc && un->un_pid != curproc->p_pid && curproc->p_pid > -1 && un->un_pid > -1) panic("union: unlocking other process's union node"); - if (un->un_flags & UN_DRAINED) - panic("union: %p: warning: unlocking decommissioned lock", ap->a_vp); #endif un->un_flags &= ~UN_LOCKED; @@ -1817,10 +1785,6 @@ union_unlock(v) #ifdef DIAGNOSTIC un->un_pid = 0; - if (un->un_flags & UN_DRAINING) { - un->un_flags |= UN_DRAINED; - un->un_flags &= ~UN_DRAINING; - } #endif genfs_nounlock(ap); @@ -1984,8 +1948,8 @@ union_getpages(v) return EBUSY; } ap->a_vp = OTHERVP(vp); - simple_unlock(&vp->v_interlock); - simple_lock(&ap->a_vp->v_interlock); + mutex_exit(&vp->v_interlock); + mutex_enter(&ap->a_vp->v_interlock); error = VCALL(ap->a_vp, VOFFSET(vop_getpages), ap); return error; } @@ -2008,11 +1972,11 @@ union_putpages(v) */ ap->a_vp = OTHERVP(vp); - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); + mutex_enter(&ap->a_vp->v_interlock); if (ap->a_flags & PGO_RECLAIM) { return 0; } - simple_lock(&ap->a_vp->v_interlock); error = VCALL(ap->a_vp, VOFFSET(vop_putpages), ap); return error; } diff --git a/sys/kern/exec_subr.c b/sys/kern/exec_subr.c index 6e8dfd7017d2..f2fc337cd493 100644 --- a/sys/kern/exec_subr.c +++ b/sys/kern/exec_subr.c @@ -1,4 +1,4 @@ -/* $NetBSD: exec_subr.c,v 1.57 2007/12/26 22:11:48 christos Exp $ */ +/* $NetBSD: exec_subr.c,v 1.58 2008/01/02 11:48:48 ad Exp $ */ /* * Copyright (c) 1993, 1994, 1996 Christopher G. Demetriou @@ -31,7 +31,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: exec_subr.c,v 1.57 2007/12/26 22:11:48 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD: exec_subr.c,v 1.58 2008/01/02 11:48:48 ad Exp $"); #include "opt_pax.h" @@ -191,10 +191,10 @@ vmcmd_map_pagedvn(struct lwp *l, struct exec_vmcmd *cmd) if ((vp->v_vflag & VV_MAPPED) == 0) { vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); vp->v_vflag |= VV_MAPPED; vp->v_iflag |= VI_MAPPED; - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); VOP_UNLOCK(vp, 0); } diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index c40c12c05a10..6b2431410ffc 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -1,4 +1,4 @@ -/* $NetBSD: init_main.c,v 1.335 2007/12/31 15:32:10 ad Exp $ */ +/* $NetBSD: init_main.c,v 1.336 2008/01/02 11:48:48 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993 @@ -71,7 +71,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.335 2007/12/31 15:32:10 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.336 2008/01/02 11:48:48 ad Exp $"); #include "opt_ipsec.h" #include "opt_ntp.h" @@ -423,6 +423,9 @@ main(void) tty_init(); ttyldisc_init(); + /* Initialize the buffer cache, part 2. */ + bufinit2(); + /* Initialize the disk wedge subsystem. */ dkwedge_init(); @@ -639,17 +642,18 @@ main(void) /* Create the pageout daemon kernel thread. */ uvm_swap_init(); - if (kthread_create(PRI_PGDAEMON, 0, NULL, uvm_pageout, + if (kthread_create(PRI_PGDAEMON, KTHREAD_MPSAFE, NULL, uvm_pageout, NULL, NULL, "pgdaemon")) panic("fork pagedaemon"); /* Create the filesystem syncer kernel thread. */ - if (kthread_create(PRI_IOFLUSH, 0, NULL, sched_sync, NULL, NULL, "ioflush")) + if (kthread_create(PRI_IOFLUSH, KTHREAD_MPSAFE, NULL, sched_sync, + NULL, NULL, "ioflush")) panic("fork syncer"); /* Create the aiodone daemon kernel thread. */ if (workqueue_create(&uvm.aiodone_queue, "aiodoned", - uvm_aiodone_worker, NULL, PRI_VM, IPL_BIO, 0)) + uvm_aiodone_worker, NULL, PRI_VM, IPL_NONE, WQ_MPSAFE)) panic("fork aiodoned"); vmem_rehash_start(); diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 94c018c425c8..7caaa0949b44 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_exec.c,v 1.263 2007/12/31 15:32:10 ad Exp $ */ +/* $NetBSD: kern_exec.c,v 1.264 2008/01/02 11:48:49 ad Exp $ */ /*- * Copyright (C) 1993, 1994, 1996 Christopher G. Demetriou @@ -33,7 +33,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.263 2007/12/31 15:32:10 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.264 2008/01/02 11:48:49 ad Exp $"); #include "opt_ktrace.h" #include "opt_syscall_debug.h" @@ -1078,6 +1078,7 @@ execve1(struct lwp *l, const char *path, char * const *args, vrele(pack.ep_interp); /* Acquire the sched-state mutex (exit1() will release it). */ + KERNEL_LOCK(1, NULL); /* XXXSMP */ mutex_enter(&p->p_smutex); exit1(l, W_EXITCODE(error, SIGABRT)); diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 41f31332f981..f23f7c58dc1c 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_exit.c,v 1.197 2007/12/31 15:32:11 ad Exp $ */ +/* $NetBSD: kern_exit.c,v 1.198 2008/01/02 11:48:49 ad Exp $ */ /*- * Copyright (c) 1998, 1999, 2006, 2007 The NetBSD Foundation, Inc. @@ -74,7 +74,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.197 2007/12/31 15:32:11 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.198 2008/01/02 11:48:49 ad Exp $"); #include "opt_ktrace.h" #include "opt_perfctrs.h" @@ -173,6 +173,7 @@ sys_exit(struct lwp *l, const struct sys_exit_args *uap, register_t *retval) struct proc *p = l->l_proc; /* Don't call exit1() multiple times in the same process. */ + KERNEL_LOCK(1, NULL); mutex_enter(&p->p_smutex); if (p->p_sflag & PS_WEXIT) { mutex_exit(&p->p_smutex); @@ -375,15 +376,11 @@ exit1(struct lwp *l, int rv) tp->t_pgrp = NULL; tp->t_session = NULL; mutex_spin_exit(&tty_lock); - SESSRELE(sp); mutex_exit(&proclist_lock); (void) ttywait(tp); mutex_enter(&proclist_lock); - /* - * The tty could have been revoked - * if we blocked. - */ + /* The tty could have been revoked. */ vprevoke = sp->s_ttyvp; } else mutex_spin_exit(&tty_lock); @@ -398,9 +395,12 @@ exit1(struct lwp *l, int rv) sp->s_leader = NULL; if (vprevoke != NULL || vprele != NULL) { - mutex_exit(&proclist_lock); - if (vprevoke != NULL) + if (vprevoke != NULL) { + SESSRELE(sp); + mutex_exit(&proclist_lock); VOP_REVOKE(vprevoke, REVOKEALL); + } else + mutex_exit(&proclist_lock); if (vprele != NULL) vrele(vprele); mutex_enter(&proclist_lock); @@ -423,6 +423,7 @@ exit1(struct lwp *l, int rv) KNOTE(&p->p_klist, NOTE_EXIT); + #if PERFCTRS /* * Save final PMC information in parent process & clean up. @@ -678,9 +679,10 @@ do_sys_wait(struct lwp *l, int *pid, int *status, int options, struct proc *child; int error; + KERNEL_LOCK(1, NULL); /* XXXSMP */ mutex_enter(&proclist_lock); - error = find_stopped_child(l->l_proc, *pid, options, &child, status); + KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ if (child == NULL) { mutex_exit(&proclist_lock); @@ -696,9 +698,7 @@ do_sys_wait(struct lwp *l, int *pid, int *status, int options, if (options & WNOWAIT) mutex_exit(&proclist_lock); else { - KERNEL_LOCK(1, l); /* XXXSMP */ proc_free(child, ru); - KERNEL_UNLOCK_ONE(l); /* XXXSMP */ } } else { /* Child state must have been SSTOP. */ @@ -906,7 +906,9 @@ proc_free(struct proc *p, struct rusage *ru) kpsignal(parent, &ksi, NULL); mutex_exit(&proclist_mutex); } + KERNEL_LOCK(1, NULL); /* XXXSMP */ cv_broadcast(&parent->p_waitcv); + KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ mutex_exit(&proclist_lock); return; } @@ -990,7 +992,7 @@ proc_free(struct proc *p, struct rusage *ru) if (p->p_textvp) vrele(p->p_textvp); - mutex_destroy(&p->p_raslock); + mutex_destroy(&p->p_auxlock); mutex_destroy(&p->p_mutex); mutex_destroy(&p->p_stmutex); mutex_destroy(&p->p_smutex); diff --git a/sys/kern/kern_fileassoc.c b/sys/kern/kern_fileassoc.c index deea240c91fa..8f11656cd174 100644 --- a/sys/kern/kern_fileassoc.c +++ b/sys/kern/kern_fileassoc.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_fileassoc.c,v 1.29 2007/05/15 19:47:45 elad Exp $ */ +/* $NetBSD: kern_fileassoc.c,v 1.30 2008/01/02 11:48:49 ad Exp $ */ /*- * Copyright (c) 2006 Elad Efrat @@ -28,7 +28,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_fileassoc.c,v 1.29 2007/05/15 19:47:45 elad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_fileassoc.c,v 1.30 2008/01/02 11:48:49 ad Exp $"); #include "opt_fileassoc.h" @@ -528,15 +528,21 @@ fileassoc_file_delete(struct vnode *vp) struct fileassoc_table *tbl; struct fileassoc_hash_entry *mhe; + KERNEL_LOCK(1, NULL); + mhe = fileassoc_file_lookup(vp, NULL); - if (mhe == NULL) + if (mhe == NULL) { + KERNEL_UNLOCK_ONE(NULL); return (ENOENT); + } file_free(mhe); tbl = fileassoc_table_lookup(vp->v_mount); --(tbl->hash_used); /* XXX gc? */ + KERNEL_UNLOCK_ONE(NULL); + return (0); } diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 70d927715937..29ab8ac3d57b 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_fork.c,v 1.154 2007/12/31 15:32:11 ad Exp $ */ +/* $NetBSD: kern_fork.c,v 1.155 2008/01/02 11:48:49 ad Exp $ */ /*- * Copyright (c) 1999, 2001, 2004, 2006, 2007 The NetBSD Foundation, Inc. @@ -74,7 +74,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_fork.c,v 1.154 2007/12/31 15:32:11 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_fork.c,v 1.155 2008/01/02 11:48:49 ad Exp $"); #include "opt_ktrace.h" #include "opt_multiprocessor.h" @@ -315,7 +315,7 @@ fork1(struct lwp *l1, int flags, int exitsig, void *stack, size_t stacksize, /* XXX p_smutex can be IPL_VM except for audio drivers */ mutex_init(&p2->p_smutex, MUTEX_DEFAULT, IPL_SCHED); mutex_init(&p2->p_stmutex, MUTEX_DEFAULT, IPL_HIGH); - mutex_init(&p2->p_raslock, MUTEX_DEFAULT, IPL_NONE); + mutex_init(&p2->p_auxlock, MUTEX_DEFAULT, IPL_NONE); mutex_init(&p2->p_mutex, MUTEX_DEFAULT, IPL_NONE); rw_init(&p2->p_reflock); cv_init(&p2->p_waitcv, "wait"); diff --git a/sys/kern/kern_kthread.c b/sys/kern/kern_kthread.c index 840497d2c945..6f60f4553097 100644 --- a/sys/kern/kern_kthread.c +++ b/sys/kern/kern_kthread.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_kthread.c,v 1.19 2007/11/06 00:42:41 ad Exp $ */ +/* $NetBSD: kern_kthread.c,v 1.20 2008/01/02 11:48:50 ad Exp $ */ /*- * Copyright (c) 1998, 1999, 2007 The NetBSD Foundation, Inc. @@ -38,7 +38,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_kthread.c,v 1.19 2007/11/06 00:42:41 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_kthread.c,v 1.20 2008/01/02 11:48:50 ad Exp $"); #include #include @@ -116,8 +116,8 @@ kthread_create(pri_t pri, int flag, struct cpu_info *ci, } if ((flag & KTHREAD_INTR) != 0) l->l_pflag |= LP_INTR; - if ((flag & KTHREAD_MPSAFE) != 0) - l->l_pflag |= LP_MPSAFE; + if ((flag & KTHREAD_MPSAFE) == 0) + l->l_pflag &= ~LP_MPSAFE; /* * Set the new LWP running, unless the caller has requested diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c index 29d5906e6ede..fd8309e8a7b5 100644 --- a/sys/kern/kern_ktrace.c +++ b/sys/kern/kern_ktrace.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_ktrace.c,v 1.132 2007/12/22 11:38:54 dsl Exp $ */ +/* $NetBSD: kern_ktrace.c,v 1.133 2008/01/02 11:48:50 ad Exp $ */ /*- * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc. @@ -68,7 +68,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_ktrace.c,v 1.132 2007/12/22 11:38:54 dsl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_ktrace.c,v 1.133 2008/01/02 11:48:50 ad Exp $"); #include #include @@ -412,11 +412,9 @@ void ktefree(struct ktrace_entry *kte) { - KERNEL_LOCK(1, curlwp); /* XXXSMP */ if (kte->kte_buf != kte->kte_space) kmem_free(kte->kte_buf, kte->kte_bufsz); pool_put(&kte_pool, kte); - KERNEL_UNLOCK_ONE(curlwp); /* XXXSMP */ } /* @@ -500,18 +498,15 @@ ktealloc(struct ktrace_entry **ktep, void **bufp, lwp_t *l, int type, if (ktrenter(l)) return EAGAIN; - KERNEL_LOCK(1, l); /* XXXSMP */ kte = pool_get(&kte_pool, PR_WAITOK); if (sz > sizeof(kte->kte_space)) { if ((buf = kmem_alloc(sz, KM_SLEEP)) == NULL) { pool_put(&kte_pool, kte); - KERNEL_UNLOCK_ONE(l); /* XXXSMP */ ktrexit(l); return ENOMEM; } } else buf = kte->kte_space; - KERNEL_UNLOCK_ONE(l); /* XXXSMP */ kte->kte_bufsz = sz; kte->kte_buf = buf; @@ -1059,7 +1054,7 @@ ktrace_common(lwp_t *curl, int ops, int facs, int pid, struct file *fp) if (ktd == NULL) { ktd = kmem_alloc(sizeof(*ktd), KM_SLEEP); TAILQ_INIT(&ktd->ktd_queue); - callout_init(&ktd->ktd_wakch, 0); + callout_init(&ktd->ktd_wakch, CALLOUT_MPSAFE); cv_init(&ktd->ktd_cv, "ktrwait"); cv_init(&ktd->ktd_sync_cv, "ktrsync"); ktd->ktd_flags = 0; @@ -1081,7 +1076,7 @@ ktrace_common(lwp_t *curl, int ops, int facs, int pid, struct file *fp) if (fp->f_type == DTYPE_PIPE) ktd->ktd_flags |= KTDF_INTERACTIVE; - error = kthread_create(PRI_NONE, 0, NULL, + error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, ktrace_thread, ktd, &ktd->ktd_lwp, "ktrace"); if (error != 0) { kmem_free(ktd, sizeof(*ktd)); diff --git a/sys/kern/kern_lock.c b/sys/kern/kern_lock.c index cde8509d10ef..2fb157226857 100644 --- a/sys/kern/kern_lock.c +++ b/sys/kern/kern_lock.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_lock.c,v 1.129 2007/12/06 17:05:08 ad Exp $ */ +/* $NetBSD: kern_lock.c,v 1.130 2008/01/02 11:48:50 ad Exp $ */ /*- * Copyright (c) 1999, 2000, 2006, 2007 The NetBSD Foundation, Inc. @@ -76,7 +76,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_lock.c,v 1.129 2007/12/06 17:05:08 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_lock.c,v 1.130 2008/01/02 11:48:50 ad Exp $"); #include "opt_multiprocessor.h" @@ -108,6 +108,14 @@ int lock_debug_syslog = 0; /* defaults to printf, but can be patched */ bool kernel_lock_dodebug; __cpu_simple_lock_t kernel_lock; +#ifdef LOCKDEBUG +static lockops_t lockmgr_lockops = { + "lockmgr", + 1, + (void *)nullop +}; +#endif + #if defined(LOCKDEBUG) || defined(DIAGNOSTIC) /* { */ #define COUNT(lkp, l, cpu_id, x) (l)->l_locks += (x) #else @@ -140,9 +148,9 @@ acquire(struct lock **lkpp, int *s, int extflags, lkp->lk_flags |= LK_WAIT_NONZERO; } LOCKSTAT_START_TIMER(lsflag, slptime); - error = ltsleep(drain ? (void *)&lkp->lk_flags : (void *)lkp, + error = mtsleep(drain ? (void *)&lkp->lk_flags : (void *)lkp, lkp->lk_prio, lkp->lk_wmesg, lkp->lk_timo, - &lkp->lk_interlock); + __UNVOLATILE(&lkp->lk_interlock)); LOCKSTAT_STOP_TIMER(lsflag, slptime); LOCKSTAT_EVENT_RA(lsflag, (void *)(uintptr_t)lkp, LB_LOCKMGR | LB_SLEEP1, 1, slptime, ra); @@ -237,20 +245,26 @@ lockinit(struct lock *lkp, pri_t prio, const char *wmesg, int timo, int flags) memset(lkp, 0, sizeof(struct lock)); lkp->lk_flags = flags & LK_EXTFLG_MASK; - simple_lock_init(&lkp->lk_interlock); + mutex_init(&lkp->lk_interlock, MUTEX_DEFAULT, IPL_NONE); lkp->lk_lockholder = LK_NOPROC; lkp->lk_prio = prio; lkp->lk_timo = timo; lkp->lk_wmesg = wmesg; lkp->lk_lock_addr = 0; lkp->lk_unlock_addr = 0; + + if (LOCKDEBUG_ALLOC(lkp, &lockmgr_lockops, + (uintptr_t)__builtin_return_address(0))) { + lkp->lk_flags |= LK_DODEBUG; + } } void lockdestroy(struct lock *lkp) { - /* nothing yet */ + LOCKDEBUG_FREE(((lkp->lk_flags & LK_DODEBUG) != 0), lkp); + mutex_destroy(&lkp->lk_interlock); } /* @@ -275,7 +289,7 @@ lockstatus(struct lock *lkp) lid = l->l_lid; } - simple_lock(&lkp->lk_interlock); + mutex_enter(&lkp->lk_interlock); if (lkp->lk_exclusivecount != 0) { if (WEHOLDIT(lkp, pid, lid, cpu_num)) lock_type = LK_EXCLUSIVE; @@ -285,7 +299,7 @@ lockstatus(struct lock *lkp) lock_type = LK_SHARED; else if (lkp->lk_flags & LK_WANT_EXCL) lock_type = LK_EXCLOTHER; - simple_unlock(&lkp->lk_interlock); + mutex_exit(&lkp->lk_interlock); return (lock_type); } @@ -312,7 +326,7 @@ lockstatus(struct lock *lkp) * accepted shared locks to go away. */ int -lockmgr(struct lock *lkp, u_int flags, struct simplelock *interlkp) +lockmgr(struct lock *lkp, u_int flags, kmutex_t *interlkp) { int error; pid_t pid; @@ -329,9 +343,9 @@ lockmgr(struct lock *lkp, u_int flags, struct simplelock *interlkp) KASSERT((flags & LK_RETRY) == 0); KASSERT((l->l_pflag & LP_INTR) == 0 || panicstr != NULL); - simple_lock(&lkp->lk_interlock); + mutex_enter(&lkp->lk_interlock); if (flags & LK_INTERLOCK) - simple_unlock(interlkp); + mutex_exit(interlkp); extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK; if (l == NULL) { @@ -438,9 +452,6 @@ lockmgr(struct lock *lkp, u_int flags, struct simplelock *interlkp) lockpanic(lkp, "lockmgr: locking against myself"); } lkp->lk_exclusivecount++; - if (extflags & LK_SETRECURSE && - lkp->lk_recurselevel == 0) - lkp->lk_recurselevel = lkp->lk_exclusivecount; COUNT(lkp, l, cpu_num, 1); break; } @@ -479,8 +490,6 @@ lockmgr(struct lock *lkp, u_int flags, struct simplelock *interlkp) if (lkp->lk_exclusivecount != 0) lockpanic(lkp, "lockmgr: non-zero exclusive count"); lkp->lk_exclusivecount = 1; - if (extflags & LK_SETRECURSE) - lkp->lk_recurselevel = 1; COUNT(lkp, l, cpu_num, 1); break; @@ -549,14 +558,11 @@ lockmgr(struct lock *lkp, u_int flags, struct simplelock *interlkp) lkp->lk_lock_addr = RETURN_ADDRESS; #endif lkp->lk_exclusivecount = 1; - /* XXX unlikely that we'd want this */ - if (extflags & LK_SETRECURSE) - lkp->lk_recurselevel = 1; COUNT(lkp, l, cpu_num, 1); break; default: - simple_unlock(&lkp->lk_interlock); + mutex_exit(&lkp->lk_interlock); lockpanic(lkp, "lockmgr: unknown locktype request %d", flags & LK_TYPE_MASK); /* NOTREACHED */ @@ -575,7 +581,7 @@ lockmgr(struct lock *lkp, u_int flags, struct simplelock *interlkp) if (error && lock_shutdown_noblock) lockpanic(lkp, "lockmgr: deadlock (see previous panic)"); - simple_unlock(&lkp->lk_interlock); + mutex_exit(&lkp->lk_interlock); return (error); } diff --git a/sys/kern/kern_lwp.c b/sys/kern/kern_lwp.c index 9802c5bc4275..826f00a258e2 100644 --- a/sys/kern/kern_lwp.c +++ b/sys/kern/kern_lwp.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_lwp.c,v 1.87 2007/12/26 16:01:36 ad Exp $ */ +/* $NetBSD: kern_lwp.c,v 1.88 2008/01/02 11:48:50 ad Exp $ */ /*- * Copyright (c) 2001, 2006, 2007 The NetBSD Foundation, Inc. @@ -205,7 +205,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.87 2007/12/26 16:01:36 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.88 2008/01/02 11:48:50 ad Exp $"); #include "opt_ddb.h" #include "opt_multiprocessor.h" @@ -581,6 +581,7 @@ lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, bool inmem, int flags, l2->l_mutex = l1->l_cpu->ci_schedstate.spc_mutex; l2->l_cpu = l1->l_cpu; l2->l_flag = inmem ? LW_INMEM : 0; + l2->l_pflag = LP_MPSAFE; if (p2->p_flag & PK_SYSTEM) { /* @@ -706,6 +707,7 @@ lwp_exit(struct lwp *l) mutex_enter(&p->p_smutex); if (p->p_nlwps - p->p_nzlwps == 1) { KASSERT(current == true); + /* XXXSMP kernel_lock not held */ exit1(l, 0); /* NOTREACHED */ } @@ -913,8 +915,6 @@ lwp_free(struct lwp *l, bool recycle, bool last) * * We don't recycle the VM resources at this time. */ - KERNEL_LOCK(1, curlwp); /* XXXSMP */ - if (l->l_lwpctl != NULL) lwp_ctl_free(l); sched_lwp_exit(l); @@ -929,7 +929,6 @@ lwp_free(struct lwp *l, bool recycle, bool last) KASSERT(l->l_inheritedprio == -1); if (!recycle) pool_cache_put(lwp_cache, l); - KERNEL_UNLOCK_ONE(curlwp); /* XXXSMP */ } /* @@ -1089,11 +1088,8 @@ lwp_update_creds(struct lwp *l) kauth_cred_hold(p->p_cred); l->l_cred = p->p_cred; mutex_exit(&p->p_mutex); - if (oc != NULL) { - KERNEL_LOCK(1, l); /* XXXSMP */ + if (oc != NULL) kauth_cred_free(oc); - KERNEL_UNLOCK_ONE(l); /* XXXSMP */ - } } /* @@ -1234,12 +1230,10 @@ lwp_userret(struct lwp *l) */ if ((l->l_flag & (LW_PENDSIG | LW_WCORE | LW_WEXIT)) == LW_PENDSIG) { - KERNEL_LOCK(1, l); /* XXXSMP pool_put() below */ mutex_enter(&p->p_smutex); while ((sig = issignal(l)) != 0) postsig(sig); mutex_exit(&p->p_smutex); - KERNEL_UNLOCK_LAST(l); /* XXXSMP */ } /* @@ -1264,7 +1258,6 @@ lwp_userret(struct lwp *l) /* Process is exiting. */ if ((l->l_flag & LW_WEXIT) != 0) { - KERNEL_LOCK(1, l); lwp_exit(l); KASSERT(0); /* NOTREACHED */ diff --git a/sys/kern/kern_physio.c b/sys/kern/kern_physio.c index 09243ce7202a..eab604ce4294 100644 --- a/sys/kern/kern_physio.c +++ b/sys/kern/kern_physio.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_physio.c,v 1.85 2007/11/06 00:42:42 ad Exp $ */ +/* $NetBSD: kern_physio.c,v 1.86 2008/01/02 11:48:51 ad Exp $ */ /*- * Copyright (c) 1982, 1986, 1990, 1993 @@ -71,7 +71,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_physio.c,v 1.85 2007/11/06 00:42:42 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_physio.c,v 1.86 2008/01/02 11:48:51 ad Exp $"); #include #include @@ -116,7 +116,7 @@ struct physio_stat { }; /* abuse these flags of struct buf */ -#define B_DONTFREE B_AGE +#define BC_DONTFREE BC_AGE /* * allocate a buffer structure for use in physical I/O. @@ -126,9 +126,9 @@ getphysbuf(void) { struct buf *bp; - bp = getiobuf(); + bp = getiobuf(NULL, true); bp->b_error = 0; - bp->b_flags = B_BUSY; + bp->b_cflags = BC_BUSY; return(bp); } @@ -139,12 +139,12 @@ static void putphysbuf(struct buf *bp) { - if ((bp->b_flags & B_DONTFREE) != 0) { + if ((bp->b_cflags & BC_DONTFREE) != 0) { return; } - if (__predict_false(bp->b_flags & B_WANTED)) - panic("putphysbuf: private buf B_WANTED"); + if (__predict_false(bp->b_cflags & BC_WANTED)) + panic("putphysbuf: private buf BC_WANTED"); putiobuf(bp); } @@ -237,7 +237,7 @@ physio_init(void) KASSERT(physio_workqueue == NULL); error = workqueue_create(&physio_workqueue, "physiod", - physio_done, NULL, PRI_BIO, IPL_BIO, 0); + physio_done, NULL, PRI_BIO, IPL_BIO, WQ_MPSAFE); return error; } @@ -257,8 +257,7 @@ physio(void (*strategy)(struct buf *), struct buf *obp, dev_t dev, int flags, struct iovec *iovp; struct lwp *l = curlwp; struct proc *p = l->l_proc; - int i, s; - int error; + int i, error; struct buf *bp = NULL; struct physio_stat *ps; int concurrency = PHYSIO_CONCURRENCY - 1; @@ -285,24 +284,12 @@ physio(void (*strategy)(struct buf *), struct buf *obp, dev_t dev, int flags, /* Make sure we have a buffer, creating one if necessary. */ if (obp != NULL) { /* [raise the processor priority level to splbio;] */ - s = splbio(); - simple_lock(&obp->b_interlock); - - /* [while the buffer is marked busy] */ - while (obp->b_flags & B_BUSY) { - /* [mark the buffer wanted] */ - obp->b_flags |= B_WANTED; - /* [wait until the buffer is available] */ - ltsleep(obp, PRIBIO+1, "physbuf", 0, &obp->b_interlock); - } - + mutex_enter(&bufcache_lock); + while (bbusy(obp, false, 0) == EPASSTHROUGH) + ; /* Mark it busy, so nobody else will use it. */ - obp->b_flags = B_BUSY | B_DONTFREE; - - /* [lower the priority level] */ - simple_unlock(&obp->b_interlock); - splx(s); - + obp->b_cflags |= BC_DONTFREE; + mutex_exit(&bufcache_lock); concurrency = 0; /* see "XXXkludge" comment below */ } @@ -334,7 +321,6 @@ physio(void (*strategy)(struct buf *), struct buf *obp, dev_t dev, int flags, bp->b_dev = dev; bp->b_proc = p; bp->b_private = ps; - bp->b_vp = NULL; /* * [mark the buffer busy for physical I/O] @@ -343,8 +329,9 @@ physio(void (*strategy)(struct buf *), struct buf *obp, dev_t dev, int flags, * "Set by physio for raw transfers.", in addition * to the "busy" and read/write flag.) */ - bp->b_flags = (bp->b_flags & B_DONTFREE) | - B_BUSY | B_PHYS | B_RAW | B_CALL | flags; + bp->b_oflags = 0; + bp->b_cflags = (bp->b_cflags & BC_DONTFREE) | BC_BUSY; + bp->b_flags = flags | B_PHYS | B_RAW; bp->b_iodone = physio_biodone; /* [set up the buffer for a maximum-sized transfer] */ @@ -442,23 +429,19 @@ done_locked: * Also, if we had to steal it, give it back. */ if (obp != NULL) { - KASSERT((obp->b_flags & B_BUSY) != 0); - KASSERT((obp->b_flags & B_DONTFREE) != 0); + KASSERT((obp->b_cflags & BC_BUSY) != 0); + KASSERT((obp->b_cflags & BC_DONTFREE) != 0); /* * [if another process is waiting for the raw I/O buffer, * wake up processes waiting to do physical I/O; */ - s = splbio(); - simple_lock(&obp->b_interlock); - obp->b_flags &= - ~(B_BUSY | B_PHYS | B_RAW | B_CALL | B_DONTFREE); - if ((obp->b_flags & B_WANTED) != 0) { - obp->b_flags &= ~B_WANTED; - wakeup(obp); - } - simple_unlock(&obp->b_interlock); - splx(s); + mutex_enter(&bufcache_lock); + obp->b_cflags &= ~(BC_DONTFREE | BC_BUSY | BC_WANTED); + obp->b_flags &= ~(B_PHYS | B_RAW); + obp->b_iodone = NULL; + cv_broadcast(&obp->b_busy); + mutex_exit(&bufcache_lock); } uvm_lwp_rele(l); diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index f6767c898788..a2f55e0d923b 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_proc.c,v 1.128 2007/12/26 16:01:36 ad Exp $ */ +/* $NetBSD: kern_proc.c,v 1.129 2008/01/02 11:48:51 ad Exp $ */ /*- * Copyright (c) 1999, 2006, 2007 The NetBSD Foundation, Inc. @@ -69,7 +69,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.128 2007/12/26 16:01:36 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.129 2008/01/02 11:48:51 ad Exp $"); #include "opt_kstack.h" #include "opt_maxuprc.h" @@ -365,7 +365,7 @@ proc0_init(void) mutex_init(&p->p_smutex, MUTEX_DEFAULT, IPL_SCHED); mutex_init(&p->p_stmutex, MUTEX_DEFAULT, IPL_HIGH); - mutex_init(&p->p_raslock, MUTEX_DEFAULT, IPL_NONE); + mutex_init(&p->p_auxlock, MUTEX_DEFAULT, IPL_NONE); mutex_init(&p->p_mutex, MUTEX_DEFAULT, IPL_NONE); mutex_init(&l->l_swaplock, MUTEX_DEFAULT, IPL_NONE); diff --git a/sys/kern/kern_ras.c b/sys/kern/kern_ras.c index 16e0b6235174..aea8bba4628f 100644 --- a/sys/kern/kern_ras.c +++ b/sys/kern/kern_ras.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_ras.c,v 1.24 2007/12/20 23:03:09 dsl Exp $ */ +/* $NetBSD: kern_ras.c,v 1.25 2008/01/02 11:48:51 ad Exp $ */ /*- * Copyright (c) 2002, 2006, 2007 The NetBSD Foundation, Inc. @@ -37,7 +37,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_ras.c,v 1.24 2007/12/20 23:03:09 dsl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_ras.c,v 1.25 2008/01/02 11:48:51 ad Exp $"); #include #include @@ -166,7 +166,7 @@ ras_purgeall(void) p = curproc; - mutex_enter(&p->p_raslock); + mutex_enter(&p->p_auxlock); if ((rp = p->p_raslist) != NULL) { p->p_raslist = NULL; ras_sync(); @@ -175,7 +175,7 @@ ras_purgeall(void) pool_put(&ras_pool, rp); } } - mutex_exit(&p->p_raslock); + mutex_exit(&p->p_auxlock); return 0; } @@ -211,7 +211,7 @@ ras_install(void *addr, size_t len) nras = 0; p = curproc; - mutex_enter(&p->p_raslock); + mutex_enter(&p->p_auxlock); for (rp = p->p_raslist; rp != NULL; rp = rp->ras_next) { if (++nras >= ras_per_proc) { error = EINVAL; @@ -226,9 +226,9 @@ ras_install(void *addr, size_t len) newrp->ras_next = p->p_raslist; p->p_raslist = newrp; ras_sync(); - mutex_exit(&p->p_raslock); + mutex_exit(&p->p_auxlock); } else { - mutex_exit(&p->p_raslock); + mutex_exit(&p->p_auxlock); pool_put(&ras_pool, newrp); } @@ -249,7 +249,7 @@ ras_purge(void *addr, size_t len) endaddr = (char *)addr + len; p = curproc; - mutex_enter(&p->p_raslock); + mutex_enter(&p->p_auxlock); link = &p->p_raslist; for (rp = *link; rp != NULL; link = &rp->ras_next, rp = *link) { if (addr == rp->ras_startaddr && endaddr == rp->ras_endaddr) @@ -258,11 +258,11 @@ ras_purge(void *addr, size_t len) if (rp != NULL) { *link = rp->ras_next; ras_sync(); - mutex_exit(&p->p_raslock); + mutex_exit(&p->p_auxlock); pool_put(&ras_pool, rp); return 0; } else { - mutex_exit(&p->p_raslock); + mutex_exit(&p->p_auxlock); return ESRCH; } } diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index 9335e69f0272..0a2f57735cc2 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_synch.c,v 1.213 2007/12/27 22:13:19 ad Exp $ */ +/* $NetBSD: kern_synch.c,v 1.214 2008/01/02 11:48:51 ad Exp $ */ /*- * Copyright (c) 1999, 2000, 2004, 2006, 2007 The NetBSD Foundation, Inc. @@ -75,7 +75,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.213 2007/12/27 22:13:19 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.214 2008/01/02 11:48:51 ad Exp $"); #include "opt_kstack.h" #include "opt_lockdebug.h" @@ -945,7 +945,7 @@ sched_init(void) { cv_init(&lbolt, "lbolt"); - callout_init(&sched_pstats_ch, 0); + callout_init(&sched_pstats_ch, CALLOUT_MPSAFE); callout_setfunc(&sched_pstats_ch, sched_pstats, NULL); sched_setup(); sched_pstats(NULL); diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c index 7b214e920d47..e191167ec842 100644 --- a/sys/kern/kern_sysctl.c +++ b/sys/kern/kern_sysctl.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_sysctl.c,v 1.211 2007/12/20 23:03:09 dsl Exp $ */ +/* $NetBSD: kern_sysctl.c,v 1.212 2008/01/02 11:48:52 ad Exp $ */ /*- * Copyright (c) 2003 The NetBSD Foundation, Inc. @@ -75,7 +75,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_sysctl.c,v 1.211 2007/12/20 23:03:09 dsl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_sysctl.c,v 1.212 2008/01/02 11:48:52 ad Exp $"); #include "opt_defcorename.h" #include "ksyms.h" @@ -304,9 +304,12 @@ sys___sysctl(struct lwp *l, const struct sys___sysctl_args *uap, register_t *ret /* * wire old so that copyout() is less likely to fail? */ + KERNEL_LOCK(1, NULL); /* XXXSMP */ error = sysctl_lock(l, SCARG(uap, old), savelen); - if (error) + if (error) { + KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ return (error); + } /* * do sysctl work (NULL means main built-in default tree) @@ -320,6 +323,7 @@ sys___sysctl(struct lwp *l, const struct sys___sysctl_args *uap, register_t *ret * release the sysctl lock */ sysctl_unlock(l); + KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ /* * set caller's oldlen to new value even in the face of an diff --git a/sys/kern/kern_verifiedexec.c b/sys/kern/kern_verifiedexec.c index 1a08896ea655..705ca5af34c7 100644 --- a/sys/kern/kern_verifiedexec.c +++ b/sys/kern/kern_verifiedexec.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_verifiedexec.c,v 1.104 2007/12/08 19:29:48 pooka Exp $ */ +/* $NetBSD: kern_verifiedexec.c,v 1.105 2008/01/02 11:48:52 ad Exp $ */ /*- * Copyright (c) 2005, 2006 Elad Efrat @@ -29,7 +29,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_verifiedexec.c,v 1.104 2007/12/08 19:29:48 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_verifiedexec.c,v 1.105 2008/01/02 11:48:52 ad Exp $"); #include "opt_veriexec.h" @@ -106,6 +106,7 @@ struct veriexec_table_entry { static int veriexec_verbose; int veriexec_strict; +static int veriexec_bypass = 1; static char *veriexec_fp_names = NULL; static size_t veriexec_name_max = 0; @@ -668,11 +669,18 @@ veriexec_verify(struct lwp *l, struct vnode *vp, const u_char *name, int flag, struct veriexec_file_entry *vfe; int r; + if (veriexec_bypass) + return 0; + + KERNEL_LOCK(1, NULL); + r = veriexec_file_verify(l, vp, name, flag, &vfe); if (found != NULL) *found = (vfe != NULL) ? true : false; + KERNEL_UNLOCK_ONE(NULL); + return (r); } @@ -755,9 +763,16 @@ int veriexec_removechk(struct lwp *l, struct vnode *vp, const char *pathbuf) { struct veriexec_file_entry *vfe; + int error; + + if (veriexec_bypass) + return 0; + + KERNEL_LOCK(1, NULL); vfe = veriexec_get(vp); if (vfe == NULL) { + KERNEL_UNLOCK_ONE(NULL); /* Lockdown mode: Deny access to non-monitored files. */ if (veriexec_strict >= VERIEXEC_LOCKDOWN) return (EPERM); @@ -770,9 +785,12 @@ veriexec_removechk(struct lwp *l, struct vnode *vp, const char *pathbuf) /* IDS mode: Deny removal of monitored files. */ if (veriexec_strict >= VERIEXEC_IDS) - return (EPERM); + error = EPERM; + else + error = veriexec_file_delete(l, vp); - return (veriexec_file_delete(l, vp)); + KERNEL_UNLOCK_ONE(NULL); + return error; } /* @@ -788,11 +806,17 @@ veriexec_renamechk(struct lwp *l, struct vnode *fromvp, const char *fromname, { struct veriexec_file_entry *vfe, *tvfe; + if (veriexec_bypass) + return 0; + + KERNEL_LOCK(1, NULL); + if (veriexec_strict >= VERIEXEC_LOCKDOWN) { log(LOG_ALERT, "Veriexec: Preventing rename of `%s' to " "`%s', uid=%u, pid=%u: Lockdown mode.\n", fromname, toname, kauth_cred_geteuid(l->l_cred), l->l_proc->p_pid); + KERNEL_UNLOCK_ONE(NULL); return (EPERM); } @@ -810,6 +834,7 @@ veriexec_renamechk(struct lwp *l, struct vnode *fromvp, const char *fromname, l->l_proc->p_pid, (vfe != NULL && tvfe != NULL) ? "files" : "file"); + KERNEL_UNLOCK_ONE(NULL); return (EPERM); } @@ -838,6 +863,7 @@ veriexec_renamechk(struct lwp *l, struct vnode *fromvp, const char *fromname, kauth_cred_geteuid(l->l_cred), l->l_proc->p_pid); } + KERNEL_UNLOCK_ONE(NULL); return (0); } @@ -1214,6 +1240,7 @@ veriexec_file_add(struct lwp *l, prop_dictionary_t dict) } veriexec_file_report(NULL, "New entry.", file, NULL, REPORT_DEBUG); + veriexec_bypass = 0; out: vrele(nid.ni_vp); @@ -1289,9 +1316,11 @@ veriexec_unmountchk(struct mount *mp) { int error; - if (doing_shutdown) + if (veriexec_bypass || doing_shutdown) return (0); + KERNEL_LOCK(1, NULL); + switch (veriexec_strict) { case VERIEXEC_LEARNING: error = 0; @@ -1329,6 +1358,7 @@ veriexec_unmountchk(struct mount *mp) break; } + KERNEL_UNLOCK_ONE(NULL); return (error); } @@ -1338,6 +1368,11 @@ veriexec_openchk(struct lwp *l, struct vnode *vp, const char *path, int fmode) struct veriexec_file_entry *vfe = NULL; int error = 0; + if (veriexec_bypass) + return 0; + + KERNEL_LOCK(1, NULL); + if (vp == NULL) { /* If no creation requested, let this fail normally. */ if (!(fmode & O_CREAT)) @@ -1369,6 +1404,7 @@ veriexec_openchk(struct lwp *l, struct vnode *vp, const char *path, int fmode) } out: + KERNEL_UNLOCK_ONE(NULL); return (error); } diff --git a/sys/kern/subr_disk.c b/sys/kern/subr_disk.c index de35711df912..1b37050adc19 100644 --- a/sys/kern/subr_disk.c +++ b/sys/kern/subr_disk.c @@ -1,4 +1,4 @@ -/* $NetBSD: subr_disk.c,v 1.89 2007/10/08 16:41:15 ad Exp $ */ +/* $NetBSD: subr_disk.c,v 1.90 2008/01/02 11:48:52 ad Exp $ */ /*- * Copyright (c) 1996, 1997, 1999, 2000 The NetBSD Foundation, Inc. @@ -74,7 +74,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: subr_disk.c,v 1.89 2007/10/08 16:41:15 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: subr_disk.c,v 1.90 2008/01/02 11:48:52 ad Exp $"); #include #include @@ -391,7 +391,8 @@ disk_read_sectors(void (*strat)(struct buf *), const struct disklabel *lp, { bp->b_blkno = sector; bp->b_bcount = count * lp->d_secsize; - bp->b_flags = (bp->b_flags & ~(B_WRITE | B_DONE)) | B_READ; + bp->b_flags = (bp->b_flags & ~B_WRITE) | B_READ; + bp->b_oflags &= ~BO_DONE; bp->b_cylinder = sector / lp->d_secpercyl; (*strat)(bp); return biowait(bp); diff --git a/sys/kern/subr_disk_mbr.c b/sys/kern/subr_disk_mbr.c index de8767e580e7..4118bf71aa87 100644 --- a/sys/kern/subr_disk_mbr.c +++ b/sys/kern/subr_disk_mbr.c @@ -1,4 +1,4 @@ -/* $NetBSD: subr_disk_mbr.c,v 1.30 2007/10/08 18:04:05 ad Exp $ */ +/* $NetBSD: subr_disk_mbr.c,v 1.31 2008/01/02 11:48:53 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1988 Regents of the University of California. @@ -54,7 +54,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: subr_disk_mbr.c,v 1.30 2007/10/08 18:04:05 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: subr_disk_mbr.c,v 1.31 2008/01/02 11:48:53 ad Exp $"); #include #include @@ -463,7 +463,8 @@ validate_label(mbr_args_t *a, uint label_sector) case UPDATE_LABEL: case WRITE_LABEL: *dlp = *a->lp; - a->bp->b_flags &= ~(B_READ|B_DONE); + a->bp->b_oflags &= ~BO_DONE; + a->bp->b_flags &= ~B_READ; a->bp->b_flags |= B_WRITE; (*a->strat)(a->bp); error = biowait(a->bp); diff --git a/sys/kern/subr_log.c b/sys/kern/subr_log.c index 8a324e902afc..38bbcdb020e8 100644 --- a/sys/kern/subr_log.c +++ b/sys/kern/subr_log.c @@ -1,4 +1,4 @@ -/* $NetBSD: subr_log.c,v 1.45 2007/12/25 00:00:00 ad Exp $ */ +/* $NetBSD: subr_log.c,v 1.46 2008/01/02 11:48:53 ad Exp $ */ /*- * Copyright (c) 2007 The NetBSD Foundation, Inc. @@ -72,7 +72,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: subr_log.c,v 1.45 2007/12/25 00:00:00 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: subr_log.c,v 1.46 2008/01/02 11:48:53 ad Exp $"); #include #include @@ -140,7 +140,7 @@ loginit(void) mutex_init(&log_lock, MUTEX_DEFAULT, IPL_VM); selinit(&log_selp); cv_init(&log_cv, "klog"); - log_sih = softint_establish(SOFTINT_CLOCK, + log_sih = softint_establish(SOFTINT_CLOCK | SOFTINT_MPSAFE, logsoftintr, NULL); } diff --git a/sys/kern/subr_pool.c b/sys/kern/subr_pool.c index 7f999e61947f..2a3c02fda0b4 100644 --- a/sys/kern/subr_pool.c +++ b/sys/kern/subr_pool.c @@ -1,4 +1,4 @@ -/* $NetBSD: subr_pool.c,v 1.145 2007/12/26 16:01:36 ad Exp $ */ +/* $NetBSD: subr_pool.c,v 1.146 2008/01/02 11:48:53 ad Exp $ */ /*- * Copyright (c) 1997, 1999, 2000, 2002, 2007 The NetBSD Foundation, Inc. @@ -38,7 +38,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.145 2007/12/26 16:01:36 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.146 2008/01/02 11:48:53 ad Exp $"); #include "opt_ddb.h" #include "opt_pool.h" @@ -2606,17 +2606,21 @@ pool_cache_put_slow(pool_cache_cpu_t *cc, int *s, void *object, paddr_t pa) /* * If there's a empty group, release our full * group back to the cache. Install the empty - * group as cc_current and return. + * group and return. */ - if ((cur = cc->cc_current) != NULL) { - KASSERT(cur->pcg_avail == pcg->pcg_size); - cur->pcg_next = pc->pc_fullgroups; - pc->pc_fullgroups = cur; - pc->pc_nfull++; - } KASSERT(pcg->pcg_avail == 0); - cc->cc_current = pcg; pc->pc_emptygroups = pcg->pcg_next; + if (cc->cc_previous == NULL) { + cc->cc_previous = pcg; + } else { + if ((cur = cc->cc_current) != NULL) { + KASSERT(cur->pcg_avail == pcg->pcg_size); + cur->pcg_next = pc->pc_fullgroups; + pc->pc_fullgroups = cur; + pc->pc_nfull++; + } + cc->cc_current = pcg; + } pc->pc_hits++; pc->pc_nempty--; mutex_exit(&pc->pc_lock); @@ -2637,7 +2641,9 @@ pool_cache_put_slow(pool_cache_cpu_t *cc, int *s, void *object, paddr_t pa) * object away. */ nobj = pc->pc_pcgsize; - if (nobj == PCG_NOBJECTS_LARGE) { + if (pool_cache_disable) { + pcg = NULL; + } else if (nobj == PCG_NOBJECTS_LARGE) { pcg = pool_get(&pcg_large_pool, PR_NOWAIT); } else { pcg = pool_get(&pcg_normal_pool, PR_NOWAIT); diff --git a/sys/kern/sys_lwp.c b/sys/kern/sys_lwp.c index 194af997dafe..d2ff88bb86be 100644 --- a/sys/kern/sys_lwp.c +++ b/sys/kern/sys_lwp.c @@ -1,4 +1,4 @@ -/* $NetBSD: sys_lwp.c,v 1.32 2007/12/20 23:03:10 dsl Exp $ */ +/* $NetBSD: sys_lwp.c,v 1.33 2008/01/02 11:48:53 ad Exp $ */ /*- * Copyright (c) 2001, 2006, 2007 The NetBSD Foundation, Inc. @@ -42,7 +42,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.32 2007/12/20 23:03:10 dsl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.33 2008/01/02 11:48:53 ad Exp $"); #include #include @@ -657,18 +657,14 @@ sys__lwp_unpark_all(struct lwp *l, const struct sys__lwp_unpark_all_args *uap, r if (sz <= sizeof(targets)) tp = targets; else { - KERNEL_LOCK(1, l); /* XXXSMP */ tp = kmem_alloc(sz, KM_SLEEP); - KERNEL_UNLOCK_ONE(l); /* XXXSMP */ if (tp == NULL) return ENOMEM; } error = copyin(SCARG(uap, targets), tp, sz); if (error != 0) { if (tp != targets) { - KERNEL_LOCK(1, l); /* XXXSMP */ kmem_free(tp, sz); - KERNEL_UNLOCK_ONE(l); /* XXXSMP */ } return error; } @@ -727,11 +723,8 @@ sys__lwp_unpark_all(struct lwp *l, const struct sys__lwp_unpark_all_args *uap, r } sleepq_unlock(sq); - if (tp != targets) { - KERNEL_LOCK(1, l); /* XXXSMP */ + if (tp != targets) kmem_free(tp, sz); - KERNEL_UNLOCK_ONE(l); /* XXXSMP */ - } if (swapin) uvm_kick_scheduler(); diff --git a/sys/kern/sys_sig.c b/sys/kern/sys_sig.c index 4c1ebbfce22a..8a8b7b797a86 100644 --- a/sys/kern/sys_sig.c +++ b/sys/kern/sys_sig.c @@ -1,4 +1,4 @@ -/* $NetBSD: sys_sig.c,v 1.9 2007/12/20 23:03:12 dsl Exp $ */ +/* $NetBSD: sys_sig.c,v 1.10 2008/01/02 11:48:53 ad Exp $ */ /*- * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc. @@ -73,7 +73,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: sys_sig.c,v 1.9 2007/12/20 23:03:12 dsl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sys_sig.c,v 1.10 2008/01/02 11:48:53 ad Exp $"); #include "opt_ptrace.h" #include "opt_compat_netbsd.h" @@ -670,9 +670,7 @@ __sigtimedwait1(struct lwp *l, const struct sys___sigtimedwait_args *uap, regist /* * Allocate a ksi up front. We can't sleep with the mutex held. */ - KERNEL_LOCK(1, l); /* XXXSMP ksiginfo_alloc() -> pool_get() */ ksi = ksiginfo_alloc(p, NULL, PR_WAITOK); - KERNEL_UNLOCK_ONE(l); /* XXXSMP */ if (ksi == NULL) return (ENOMEM); @@ -750,9 +748,7 @@ __sigtimedwait1(struct lwp *l, const struct sys___sigtimedwait_args *uap, regist error = (*put_info)(&ksi->ksi_info, SCARG(uap, info), sizeof(ksi->ksi_info)); - KERNEL_LOCK(1, l); /* XXXSMP ksiginfo_free() -> pool_put() */ ksiginfo_free(ksi); - KERNEL_UNLOCK_ONE(l); /* XXXSMP */ return error; } diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c index 7d00b6db8283..0195e2fcb377 100644 --- a/sys/kern/syscalls.c +++ b/sys/kern/syscalls.c @@ -1,4 +1,4 @@ -/* $NetBSD: syscalls.c,v 1.195 2007/12/20 23:07:25 dsl Exp $ */ +/* $NetBSD: syscalls.c,v 1.196 2008/01/02 11:48:54 ad Exp $ */ /* * System call names. @@ -8,7 +8,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: syscalls.c,v 1.195 2007/12/20 23:07:25 dsl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: syscalls.c,v 1.196 2008/01/02 11:48:54 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_nfsserver.h" diff --git a/sys/kern/sysv_sem.c b/sys/kern/sysv_sem.c index fc6fd9a4191b..f5eaeccbadd3 100644 --- a/sys/kern/sysv_sem.c +++ b/sys/kern/sysv_sem.c @@ -1,4 +1,4 @@ -/* $NetBSD: sysv_sem.c,v 1.78 2007/12/20 23:03:12 dsl Exp $ */ +/* $NetBSD: sysv_sem.c,v 1.79 2008/01/02 11:48:54 ad Exp $ */ /*- * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc. @@ -46,7 +46,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: sysv_sem.c,v 1.78 2007/12/20 23:03:12 dsl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sysv_sem.c,v 1.79 2008/01/02 11:48:54 ad Exp $"); #define SYSVSEM @@ -764,9 +764,7 @@ restart: if (nsops <= SMALL_SOPS) { sops = small_sops; } else if (nsops <= seminfo.semopm) { - KERNEL_LOCK(1, l); /* XXXSMP */ sops = kmem_alloc(nsops * sizeof(*sops), KM_SLEEP); - KERNEL_UNLOCK_ONE(l); /* XXXSMP */ } else { SEM_PRINTF(("too many sops (max=%d, nsops=%zd)\n", seminfo.semopm, nsops)); @@ -777,11 +775,8 @@ restart: if (error) { SEM_PRINTF(("error = %d from copyin(%p, %p, %zd)\n", error, SCARG(uap, sops), &sops, nsops * sizeof(sops[0]))); - if (sops != small_sops) { - KERNEL_LOCK(1, l); /* XXXSMP */ + if (sops != small_sops) kmem_free(sops, nsops * sizeof(*sops)); - KERNEL_UNLOCK_ONE(l); /* XXXSMP */ - } return error; } @@ -1010,11 +1005,8 @@ done: out: mutex_exit(&semlock); - if (sops != small_sops) { - KERNEL_LOCK(1, l); /* XXXSMP */ + if (sops != small_sops) kmem_free(sops, nsops * sizeof(*sops)); - KERNEL_UNLOCK_ONE(l); /* XXXSMP */ - } return error; } diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c index 995b533c5c79..845bb52f2ffd 100644 --- a/sys/kern/sysv_shm.c +++ b/sys/kern/sysv_shm.c @@ -1,4 +1,4 @@ -/* $NetBSD: sysv_shm.c,v 1.101 2007/12/20 23:03:12 dsl Exp $ */ +/* $NetBSD: sysv_shm.c,v 1.102 2008/01/02 11:48:54 ad Exp $ */ /*- * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc. @@ -6,7 +6,7 @@ * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, - * NASA Ames Research Center. + * NASA Ames Research Center, and by Mindaugas Rasiukevicius. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -68,15 +68,15 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: sysv_shm.c,v 1.101 2007/12/20 23:03:12 dsl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sysv_shm.c,v 1.102 2008/01/02 11:48:54 ad Exp $"); #define SYSVSHM #include #include +#include #include #include -#include #include #include #include @@ -89,20 +89,6 @@ __KERNEL_RCSID(0, "$NetBSD: sysv_shm.c,v 1.101 2007/12/20 23:03:12 dsl Exp $"); #include #include -static MALLOC_DEFINE(M_SHM, "shm", "SVID compatible shared memory segments"); - -/* - * Provides the following externally accessible functions: - * - * shminit(void); initialization - * shmexit(struct vmspace *) cleanup - * shmfork(struct vmspace *, struct vmspace *) fork handling - * - * Structures: - * shmsegs (an array of 'struct shmid_ds') - * per proc array of 'struct shmmap_state' - */ - int shm_nused; struct shmid_ds *shmsegs; @@ -112,11 +98,14 @@ struct shmmap_entry { int shmid; }; -static kmutex_t shm_lock; -static int shm_last_free, shm_committed, shm_use_phys; +static kmutex_t shm_lock; +static kcondvar_t * shm_cv; +static struct pool shmmap_entry_pool; +static int shm_last_free, shm_committed, shm_use_phys; -static POOL_INIT(shmmap_entry_pool, sizeof(struct shmmap_entry), 0, 0, 0, - "shmmp", &pool_allocator_nointr, IPL_NONE); +static kcondvar_t shm_realloc_cv; +static bool shm_realloc_state; +static u_int shm_realloc_disable; struct shmmap_state { unsigned int nitems; @@ -124,102 +113,101 @@ struct shmmap_state { SLIST_HEAD(, shmmap_entry) entries; }; -static int shm_find_segment_by_key(key_t); -static void shm_deallocate_segment(struct shmid_ds *); -static void shm_delete_mapping(struct vmspace *, struct shmmap_state *, - struct shmmap_entry *); -static int shmget_existing(struct lwp *, const struct sys_shmget_args *, - int, int, register_t *); -static int shmget_allocate_segment(struct lwp *, const struct sys_shmget_args *, - int, register_t *); -static struct shmmap_state *shmmap_getprivate(struct proc *); -static struct shmmap_entry *shm_find_mapping(struct shmmap_state *, vaddr_t); +#ifdef SHMDEBUG +#define SHMPRINTF(a) printf a +#else +#define SHMPRINTF(a) +#endif + static int shmrealloc(int); -static int -shm_find_segment_by_key(key_t key) -{ - int i; - - for (i = 0; i < shminfo.shmmni; i++) - if ((shmsegs[i].shm_perm.mode & SHMSEG_ALLOCATED) && - shmsegs[i].shm_perm._key == key) - return i; - return -1; -} - +/* + * Find the shared memory segment by the identifier. + * => must be called with shm_lock held; + */ static struct shmid_ds * shm_find_segment_by_shmid(int shmid) { int segnum; struct shmid_ds *shmseg; + KASSERT(mutex_owned(&shm_lock)); + segnum = IPCID_TO_IX(shmid); if (segnum < 0 || segnum >= shminfo.shmmni) return NULL; shmseg = &shmsegs[segnum]; if ((shmseg->shm_perm.mode & SHMSEG_ALLOCATED) == 0) return NULL; - if ((shmseg->shm_perm.mode & (SHMSEG_REMOVED|SHMSEG_RMLINGER)) == SHMSEG_REMOVED) + if ((shmseg->shm_perm.mode & + (SHMSEG_REMOVED|SHMSEG_RMLINGER)) == SHMSEG_REMOVED) return NULL; if (shmseg->shm_perm._seq != IPCID_TO_SEQ(shmid)) return NULL; + return shmseg; } +/* + * Free memory segment. + * => must be called with shm_lock held; + */ static void -shm_deallocate_segment(struct shmid_ds *shmseg) -{ - struct uvm_object *uobj = shmseg->_shm_internal; - size_t size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; - -#ifdef SHMDEBUG - printf("shm freeing key 0x%lx seq 0x%x\n", - shmseg->shm_perm._key, shmseg->shm_perm._seq); -#endif - - (*uobj->pgops->pgo_detach)(uobj); - shmseg->_shm_internal = NULL; - shm_committed -= btoc(size); - shmseg->shm_perm.mode = SHMSEG_FREE; - shm_nused--; -} - -static void -shm_delete_mapping(struct vmspace *vm, struct shmmap_state *shmmap_s, - struct shmmap_entry *shmmap_se) +shm_free_segment(int segnum) { struct shmid_ds *shmseg; - int segnum; size_t size; + bool wanted; + + KASSERT(mutex_owned(&shm_lock)); - segnum = IPCID_TO_IX(shmmap_se->shmid); -#ifdef DEBUG - if (segnum < 0 || segnum >= shminfo.shmmni) - panic("shm_delete_mapping: vmspace %p state %p entry %p - " - "entry segment ID bad (%d)", - vm, shmmap_s, shmmap_se, segnum); -#endif shmseg = &shmsegs[segnum]; + SHMPRINTF(("shm freeing key 0x%lx seq 0x%x\n", + shmseg->shm_perm._key, shmseg->shm_perm._seq)); + size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; - uvm_deallocate(&vm->vm_map, shmmap_se->va, size); - SLIST_REMOVE(&shmmap_s->entries, shmmap_se, shmmap_entry, next); - shmmap_s->nitems--; - pool_put(&shmmap_entry_pool, shmmap_se); - shmseg->shm_dtime = time_second; - if ((--shmseg->shm_nattch <= 0) && - (shmseg->shm_perm.mode & SHMSEG_REMOVED)) { - shm_deallocate_segment(shmseg); - shm_last_free = segnum; - } + wanted = (shmseg->shm_perm.mode & SHMSEG_WANTED); + + shmseg->_shm_internal = NULL; + shm_committed -= btoc(size); + shm_nused--; + shmseg->shm_perm.mode = SHMSEG_FREE; + shm_last_free = segnum; + if (wanted == true) + cv_broadcast(&shm_cv[segnum]); } /* - * Get a non-shared shm map for that vmspace. - * 3 cases: - * - no shm map present: create a fresh one - * - a shm map with refcount=1, just used by ourselves: fine - * - a shared shm map: copy to a fresh one and adjust refcounts + * Delete entry from the shm map. + * => must be called with shm_lock held; + */ +static struct uvm_object * +shm_delete_mapping(struct shmmap_state *shmmap_s, + struct shmmap_entry *shmmap_se) +{ + struct uvm_object *uobj = NULL; + struct shmid_ds *shmseg; + int segnum; + + KASSERT(mutex_owned(&shm_lock)); + + segnum = IPCID_TO_IX(shmmap_se->shmid); + shmseg = &shmsegs[segnum]; + SLIST_REMOVE(&shmmap_s->entries, shmmap_se, shmmap_entry, next); + shmmap_s->nitems--; + shmseg->shm_dtime = time_second; + if ((--shmseg->shm_nattch <= 0) && + (shmseg->shm_perm.mode & SHMSEG_REMOVED)) { + uobj = shmseg->_shm_internal; + shm_free_segment(segnum); + } + + return uobj; +} + +/* + * Get a non-shared shm map for that vmspace. Note, that memory + * allocation might be performed with lock held. */ static struct shmmap_state * shmmap_getprivate(struct proc *p) @@ -227,23 +215,26 @@ shmmap_getprivate(struct proc *p) struct shmmap_state *oshmmap_s, *shmmap_s; struct shmmap_entry *oshmmap_se, *shmmap_se; + KASSERT(mutex_owned(&shm_lock)); + + /* 1. A shm map with refcnt = 1, used by ourselves, thus return */ oshmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm; if (oshmmap_s && oshmmap_s->nrefs == 1) - return (oshmmap_s); + return oshmmap_s; - shmmap_s = malloc(sizeof(struct shmmap_state), M_SHM, M_WAITOK); - memset(shmmap_s, 0, sizeof(struct shmmap_state)); + /* 2. No shm map preset - create a fresh one */ + shmmap_s = kmem_zalloc(sizeof(struct shmmap_state), KM_SLEEP); shmmap_s->nrefs = 1; SLIST_INIT(&shmmap_s->entries); p->p_vmspace->vm_shm = (void *)shmmap_s; - if (!oshmmap_s) - return (shmmap_s); + if (oshmmap_s == NULL) + return shmmap_s; -#ifdef SHMDEBUG - printf("shmmap_getprivate: vm %p split (%d entries), was used by %d\n", - p->p_vmspace, oshmmap_s->nitems, oshmmap_s->nrefs); -#endif + SHMPRINTF(("shmmap_getprivate: vm %p split (%d entries), was used by %d\n", + p->p_vmspace, oshmmap_s->nitems, oshmmap_s->nrefs)); + + /* 3. A shared shm map, copy to a fresh one and adjust refcounts */ SLIST_FOREACH(oshmmap_se, &oshmmap_s->entries, next) { shmmap_se = pool_get(&shmmap_entry_pool, PR_WAITOK); shmmap_se->va = oshmmap_se->va; @@ -252,21 +243,71 @@ shmmap_getprivate(struct proc *p) } shmmap_s->nitems = oshmmap_s->nitems; oshmmap_s->nrefs--; - return (shmmap_s); + + return shmmap_s; } -static struct shmmap_entry * -shm_find_mapping(struct shmmap_state *map, vaddr_t va) +/* + * Lock/unlock the memory. + * => must be called with shm_lock held; + * => called from one place, thus, inline; + */ +static inline int +shm_memlock(struct lwp *l, struct shmid_ds *shmseg, int shmid, int cmd) { + struct proc *p = l->l_proc; struct shmmap_entry *shmmap_se; + struct shmmap_state *shmmap_s; + size_t size; + int error; - SLIST_FOREACH(shmmap_se, &map->entries, next) { - if (shmmap_se->va == va) - return shmmap_se; + KASSERT(mutex_owned(&shm_lock)); + shmmap_s = shmmap_getprivate(p); + + /* Find our shared memory address by shmid */ + SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) { + if (shmmap_se->shmid != shmid) + continue; + + size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; + + if (cmd == SHM_LOCK && + (shmseg->shm_perm.mode & SHMSEG_WIRED) == 0) { + /* Wire the object and map, then tag it */ + error = uobj_wirepages(shmseg->_shm_internal, 0, + round_page(shmseg->shm_segsz)); + if (error) + return EIO; + error = uvm_map_pageable(&p->p_vmspace->vm_map, + shmmap_se->va, shmmap_se->va + size, false, 0); + if (error) { + uobj_unwirepages(shmseg->_shm_internal, 0, + round_page(shmseg->shm_segsz)); + if (error == EFAULT) + error = ENOMEM; + return error; + } + shmseg->shm_perm.mode |= SHMSEG_WIRED; + + } else if (cmd == SHM_UNLOCK && + (shmseg->shm_perm.mode & SHMSEG_WIRED) != 0) { + /* Unwire the object and map, then untag it */ + uobj_unwirepages(shmseg->_shm_internal, 0, + round_page(shmseg->shm_segsz)); + error = uvm_map_pageable(&p->p_vmspace->vm_map, + shmmap_se->va, shmmap_se->va + size, true, 0); + if (error) + return EIO; + shmseg->shm_perm.mode &= ~SHMSEG_WIRED; + } } + return 0; } +/* + * Unmap shared memory. + */ int sys_shmdt(struct lwp *l, const struct sys_shmdt_args *uap, register_t *retval) { @@ -274,32 +315,64 @@ sys_shmdt(struct lwp *l, const struct sys_shmdt_args *uap, register_t *retval) syscallarg(const void *) shmaddr; } */ struct proc *p = l->l_proc; - struct shmmap_state *shmmap_s, *shmmap_s1; + struct shmmap_state *shmmap_s1, *shmmap_s; struct shmmap_entry *shmmap_se; + struct uvm_object *uobj; + struct shmid_ds *shmseg; + size_t size; - shmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm; - if (shmmap_s == NULL) + mutex_enter(&shm_lock); + /* In case of reallocation, we will wait for completion */ + while (__predict_false(shm_realloc_state)) + cv_wait(&shm_realloc_cv, &shm_lock); + + shmmap_s1 = (struct shmmap_state *)p->p_vmspace->vm_shm; + if (shmmap_s1 == NULL) { + mutex_exit(&shm_lock); return EINVAL; - - shmmap_se = shm_find_mapping(shmmap_s, (vaddr_t)SCARG(uap, shmaddr)); - if (!shmmap_se) - return EINVAL; - - shmmap_s1 = shmmap_getprivate(p); - if (shmmap_s1 != shmmap_s) { - /* map has been copied, lookup entry in new map */ - shmmap_se = shm_find_mapping(shmmap_s1, - (vaddr_t)SCARG(uap, shmaddr)); - KASSERT(shmmap_se != NULL); } -#ifdef SHMDEBUG - printf("shmdt: vm %p: remove %d @%lx\n", - p->p_vmspace, shmmap_se->shmid, shmmap_se->va); -#endif - shm_delete_mapping(p->p_vmspace, shmmap_s1, shmmap_se); + + /* Find the map entry */ + SLIST_FOREACH(shmmap_se, &shmmap_s1->entries, next) + if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr)) + break; + if (shmmap_se == NULL) { + mutex_exit(&shm_lock); + return EINVAL; + } + + shmmap_s = shmmap_getprivate(p); + if (shmmap_s != shmmap_s1) { + /* Map has been copied, lookup entry in new map */ + SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) + if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr)) + break; + if (shmmap_se == NULL) { + mutex_exit(&shm_lock); + return EINVAL; + } + } + + SHMPRINTF(("shmdt: vm %p: remove %d @%lx\n", + p->p_vmspace, shmmap_se->shmid, shmmap_se->va)); + + /* Delete the entry from shm map */ + uobj = shm_delete_mapping(shmmap_s, shmmap_se); + shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)]; + size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; + mutex_exit(&shm_lock); + + uvm_deallocate(&p->p_vmspace->vm_map, shmmap_se->va, size); + if (uobj != NULL) + uao_detach(uobj); + pool_put(&shmmap_entry_pool, shmmap_se); + return 0; } +/* + * Map shared memory. + */ int sys_shmat(struct lwp *l, const struct sys_shmat_args *uap, register_t *retval) { @@ -313,23 +386,37 @@ sys_shmat(struct lwp *l, const struct sys_shmat_args *uap, register_t *retval) kauth_cred_t cred = l->l_cred; struct shmid_ds *shmseg; struct shmmap_state *shmmap_s; + struct shmmap_entry *shmmap_se; struct uvm_object *uobj; + struct vmspace *vm; vaddr_t attach_va; vm_prot_t prot; vsize_t size; - struct shmmap_entry *shmmap_se; + + /* Allocate a new map entry and set it */ + shmmap_se = pool_get(&shmmap_entry_pool, PR_WAITOK); + + mutex_enter(&shm_lock); + /* In case of reallocation, we will wait for completion */ + while (__predict_false(shm_realloc_state)) + cv_wait(&shm_realloc_cv, &shm_lock); shmseg = shm_find_segment_by_shmid(SCARG(uap, shmid)); - if (shmseg == NULL) - return EINVAL; + if (shmseg == NULL) { + error = EINVAL; + goto err; + } error = ipcperm(cred, &shmseg->shm_perm, - (SCARG(uap, shmflg) & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W); + (SCARG(uap, shmflg) & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W); if (error) - return error; + goto err; - shmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm; - if (shmmap_s && shmmap_s->nitems >= shminfo.shmseg) - return EMFILE; + vm = p->p_vmspace; + shmmap_s = (struct shmmap_state *)vm->vm_shm; + if (shmmap_s && shmmap_s->nitems >= shminfo.shmseg) { + error = EMFILE; + goto err; + } size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; prot = VM_PROT_READ; @@ -342,52 +429,81 @@ sys_shmat(struct lwp *l, const struct sys_shmat_args *uap, register_t *retval) (vaddr_t)SCARG(uap, shmaddr) & ~(SHMLBA-1); else if (((vaddr_t)SCARG(uap, shmaddr) & (SHMLBA-1)) == 0) attach_va = (vaddr_t)SCARG(uap, shmaddr); - else - return EINVAL; + else { + error = EINVAL; + goto err; + } } else { /* This is just a hint to uvm_mmap() about where to put it. */ attach_va = p->p_emul->e_vm_default_addr(p, - (vaddr_t)p->p_vmspace->vm_daddr, size); + (vaddr_t)vm->vm_daddr, size); } + + /* + * Create a map entry, add it to the list and increase the counters. + * The lock will be dropped before the mapping, disable reallocation. + */ + shmmap_s = shmmap_getprivate(p); + SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next); + shmmap_s->nitems++; + shmseg->shm_lpid = p->p_pid; + shmseg->shm_nattch++; + shm_realloc_disable++; + mutex_exit(&shm_lock); + + /* + * Add a reference to the memory object, map it to the + * address space, and lock the memory, if needed. + */ uobj = shmseg->_shm_internal; - (*uobj->pgops->pgo_reference)(uobj); - error = uvm_map(&p->p_vmspace->vm_map, &attach_va, size, - uobj, 0, 0, + uao_reference(uobj); + error = uvm_map(&vm->vm_map, &attach_va, size, uobj, 0, 0, UVM_MAPFLAG(prot, prot, UVM_INH_SHARE, UVM_ADV_RANDOM, flags)); if (error) - goto out; - /* Lock the memory */ + goto err_detach; if (shm_use_phys || (shmseg->shm_perm.mode & SHMSEG_WIRED)) { - /* Wire the map */ - error = uvm_map_pageable(&p->p_vmspace->vm_map, attach_va, + error = uvm_map_pageable(&vm->vm_map, attach_va, attach_va + size, false, 0); if (error) { if (error == EFAULT) error = ENOMEM; - goto out; + uvm_deallocate(&vm->vm_map, attach_va, size); + goto err_detach; } } - shmmap_se = pool_get(&shmmap_entry_pool, PR_WAITOK); + /* Set the new address, and update the time */ + mutex_enter(&shm_lock); shmmap_se->va = attach_va; shmmap_se->shmid = SCARG(uap, shmid); - shmmap_s = shmmap_getprivate(p); -#ifdef SHMDEBUG - printf("shmat: vm %p: add %d @%lx\n", p->p_vmspace, shmmap_se->shmid, attach_va); -#endif - SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next); - shmmap_s->nitems++; - shmseg->shm_lpid = p->p_pid; shmseg->shm_atime = time_second; - shmseg->shm_nattch++; - + shm_realloc_disable--; retval[0] = attach_va; - return 0; -out: - (*uobj->pgops->pgo_detach)(uobj); + SHMPRINTF(("shmat: vm %p: add %d @%lx\n", + p->p_vmspace, shmmap_se->shmid, attach_va)); +err: + cv_broadcast(&shm_realloc_cv); + mutex_exit(&shm_lock); + if (error && shmmap_se) + pool_put(&shmmap_entry_pool, shmmap_se); + return error; + +err_detach: + uao_detach(uobj); + mutex_enter(&shm_lock); + uobj = shm_delete_mapping(shmmap_s, shmmap_se); + shm_realloc_disable--; + cv_broadcast(&shm_realloc_cv); + mutex_exit(&shm_lock); + if (uobj != NULL) + uao_detach(uobj); + pool_put(&shmmap_entry_pool, shmmap_se); return error; } +/* + * Shared memory control operations. + */ int sys___shmctl13(struct lwp *l, const struct sys___shmctl13_args *uap, register_t *retval) { @@ -400,11 +516,10 @@ sys___shmctl13(struct lwp *l, const struct sys___shmctl13_args *uap, register_t int cmd, error; cmd = SCARG(uap, cmd); - if (cmd == IPC_SET) { error = copyin(SCARG(uap, buf), &shmbuf, sizeof(shmbuf)); if (error) - return (error); + return error; } error = shmctl1(l, SCARG(uap, shmid), cmd, @@ -413,33 +528,37 @@ sys___shmctl13(struct lwp *l, const struct sys___shmctl13_args *uap, register_t if (error == 0 && cmd == IPC_STAT) error = copyout(&shmbuf, SCARG(uap, buf), sizeof(shmbuf)); - return (error); + return error; } int shmctl1(struct lwp *l, int shmid, int cmd, struct shmid_ds *shmbuf) { + struct uvm_object *uobj = NULL; kauth_cred_t cred = l->l_cred; - struct proc *p = l->l_proc; struct shmid_ds *shmseg; - struct shmmap_entry *shmmap_se; - struct shmmap_state *shmmap_s; int error = 0; - size_t size; + + mutex_enter(&shm_lock); + /* In case of reallocation, we will wait for completion */ + while (__predict_false(shm_realloc_state)) + cv_wait(&shm_realloc_cv, &shm_lock); shmseg = shm_find_segment_by_shmid(shmid); - if (shmseg == NULL) + if (shmseg == NULL) { + mutex_exit(&shm_lock); return EINVAL; + } switch (cmd) { case IPC_STAT: if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_R)) != 0) - return error; + break; memcpy(shmbuf, shmseg, sizeof(struct shmid_ds)); break; case IPC_SET: if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0) - return error; + break; shmseg->shm_perm.uid = shmbuf->shm_perm.uid; shmseg->shm_perm.gid = shmbuf->shm_perm.gid; shmseg->shm_perm.mode = @@ -449,82 +568,55 @@ shmctl1(struct lwp *l, int shmid, int cmd, struct shmid_ds *shmbuf) break; case IPC_RMID: if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0) - return error; + break; shmseg->shm_perm._key = IPC_PRIVATE; shmseg->shm_perm.mode |= SHMSEG_REMOVED; if (shmseg->shm_nattch <= 0) { - shm_deallocate_segment(shmseg); - shm_last_free = IPCID_TO_IX(shmid); + uobj = shmseg->_shm_internal; + shm_free_segment(IPCID_TO_IX(shmid)); } break; case SHM_LOCK: case SHM_UNLOCK: if ((error = kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL)) != 0) - return error; - shmmap_s = shmmap_getprivate(p); - /* Find our shared memory address by shmid */ - SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) { - if (shmmap_se->shmid != shmid) - continue; - - size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; - - if (cmd == SHM_LOCK && - !(shmseg->shm_perm.mode & SHMSEG_WIRED)) { - /* Wire the entire object */ - error = uobj_wirepages(shmseg->_shm_internal, 0, - round_page(shmseg->shm_segsz)); - if (error) - return EIO; - /* Wire the map */ - error = uvm_map_pageable(&p->p_vmspace->vm_map, - shmmap_se->va, shmmap_se->va + size, false, - 0); - if (error) { - uobj_unwirepages(shmseg->_shm_internal, - 0, round_page(shmseg->shm_segsz)); - if (error == EFAULT) - error = ENOMEM; - return error; - } - /* Tag as wired */ - shmseg->shm_perm.mode |= SHMSEG_WIRED; - - } else if (cmd == SHM_UNLOCK && - (shmseg->shm_perm.mode & SHMSEG_WIRED)) { - /* Unwire the object */ - uobj_unwirepages(shmseg->_shm_internal, 0, - round_page(shmseg->shm_segsz)); - error = uvm_map_pageable(&p->p_vmspace->vm_map, - shmmap_se->va, shmmap_se->va + size, true, - 0); - if (error) { - /* - * In fact, uvm_map_pageable could fail - * only if arguments are invalid, - * otherwise it should always return 0. - */ - return EIO; - } - /* Tag as unwired */ - shmseg->shm_perm.mode &= ~SHMSEG_WIRED; - } - } + break; + error = shm_memlock(l, shmseg, shmid, cmd); break; default: - return EINVAL; + error = EINVAL; } - return 0; + + mutex_exit(&shm_lock); + if (uobj != NULL) + uao_detach(uobj); + return error; } -static int +/* + * Try to take an already existing segment. + * => must be called with shm_lock held; + * => called from one place, thus, inline; + */ +static inline int shmget_existing(struct lwp *l, const struct sys_shmget_args *uap, int mode, - int segnum, register_t *retval) + register_t *retval) { struct shmid_ds *shmseg; kauth_cred_t cred = l->l_cred; - int error; + int segnum, error; +again: + KASSERT(mutex_owned(&shm_lock)); + + /* Find segment by key */ + for (segnum = 0; segnum < shminfo.shmmni; segnum++) + if ((shmsegs[segnum].shm_perm.mode & SHMSEG_ALLOCATED) && + shmsegs[segnum].shm_perm._key == SCARG(uap, key)) + break; + if (segnum == shminfo.shmmni) { + /* Not found */ + return -1; + } shmseg = &shmsegs[segnum]; if (shmseg->shm_perm.mode & SHMSEG_REMOVED) { @@ -534,100 +626,26 @@ shmget_existing(struct lwp *l, const struct sys_shmget_args *uap, int mode, * allocation failed or it was freed). */ shmseg->shm_perm.mode |= SHMSEG_WANTED; - error = tsleep((void *)shmseg, PLOCK | PCATCH, "shmget", 0); + error = cv_wait_sig(&shm_cv[segnum], &shm_lock); if (error) return error; - return EAGAIN; + goto again; } - if ((error = ipcperm(cred, &shmseg->shm_perm, mode)) != 0) + + /* Check the permission, segment size and appropriate flag */ + error = ipcperm(cred, &shmseg->shm_perm, mode); + if (error) return error; if (SCARG(uap, size) && SCARG(uap, size) > shmseg->shm_segsz) return EINVAL; if ((SCARG(uap, shmflg) & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL)) return EEXIST; + *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm); return 0; } -static int -shmget_allocate_segment(struct lwp *l, const struct sys_shmget_args *uap, int mode, - register_t *retval) -{ - int i, segnum, shmid, size; - kauth_cred_t cred = l->l_cred; - struct shmid_ds *shmseg; - int error = 0; - - if (SCARG(uap, size) < shminfo.shmmin || - SCARG(uap, size) > shminfo.shmmax) - return EINVAL; - if (shm_nused >= shminfo.shmmni) /* any shmids left? */ - return ENOSPC; - size = (SCARG(uap, size) + PGOFSET) & ~PGOFSET; - if (shm_committed + btoc(size) > shminfo.shmall) - return ENOMEM; - if (shm_last_free < 0) { - for (i = 0; i < shminfo.shmmni; i++) - if (shmsegs[i].shm_perm.mode & SHMSEG_FREE) - break; - if (i == shminfo.shmmni) - panic("shmseg free count inconsistent"); - segnum = i; - } else { - segnum = shm_last_free; - shm_last_free = -1; - } - shmseg = &shmsegs[segnum]; - /* - * In case we sleep in malloc(), mark the segment present but deleted - * so that noone else tries to create the same key. - */ - shmseg->shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED; - shmseg->shm_perm._key = SCARG(uap, key); - shmseg->shm_perm._seq = (shmseg->shm_perm._seq + 1) & 0x7fff; - shmid = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm); - - shmseg->_shm_internal = uao_create(size, 0); - - shmseg->shm_perm.cuid = shmseg->shm_perm.uid = kauth_cred_geteuid(cred); - shmseg->shm_perm.cgid = shmseg->shm_perm.gid = kauth_cred_getegid(cred); - shmseg->shm_perm.mode = (shmseg->shm_perm.mode & SHMSEG_WANTED) | - (mode & (ACCESSPERMS|SHMSEG_RMLINGER)) | SHMSEG_ALLOCATED; - shmseg->shm_segsz = SCARG(uap, size); - shmseg->shm_cpid = l->l_proc->p_pid; - shmseg->shm_lpid = shmseg->shm_nattch = 0; - shmseg->shm_atime = shmseg->shm_dtime = 0; - shmseg->shm_ctime = time_second; - shm_committed += btoc(size); - shm_nused++; - - *retval = shmid; - if (shmseg->shm_perm.mode & SHMSEG_WANTED) { - /* - * Somebody else wanted this key while we were asleep. Wake - * them up now. - */ - shmseg->shm_perm.mode &= ~SHMSEG_WANTED; - wakeup((void *)shmseg); - } - - /* Lock the memory */ - if (shm_use_phys) { - /* Wire the entire object */ - error = uobj_wirepages(shmseg->_shm_internal, 0, - round_page(shmseg->shm_segsz)); - if (error) { - shm_deallocate_segment(shmseg); - } else { - /* Tag as wired */ - shmseg->shm_perm.mode |= SHMSEG_WIRED; - } - } - - return error; -} - int sys_shmget(struct lwp *l, const struct sys_shmget_args *uap, register_t *retval) { @@ -636,30 +654,129 @@ sys_shmget(struct lwp *l, const struct sys_shmget_args *uap, register_t *retval) syscallarg(int) size; syscallarg(int) shmflg; } */ - int segnum, mode, error; + struct shmid_ds *shmseg; + kauth_cred_t cred = l->l_cred; + key_t key = SCARG(uap, key); + int error, mode, segnum, size; + bool lockmem; mode = SCARG(uap, shmflg) & ACCESSPERMS; if (SCARG(uap, shmflg) & _SHM_RMLINGER) mode |= SHMSEG_RMLINGER; -#ifdef SHMDEBUG - printf("shmget: key 0x%lx size 0x%x shmflg 0x%x mode 0x%x\n", - SCARG(uap, key), SCARG(uap, size), SCARG(uap, shmflg), mode); -#endif + SHMPRINTF(("shmget: key 0x%lx size 0x%x shmflg 0x%x mode 0x%x\n", + SCARG(uap, key), SCARG(uap, size), SCARG(uap, shmflg), mode)); - if (SCARG(uap, key) != IPC_PRIVATE) { -again: - segnum = shm_find_segment_by_key(SCARG(uap, key)); - if (segnum >= 0) { - error = shmget_existing(l, uap, mode, segnum, retval); - if (error == EAGAIN) - goto again; + mutex_enter(&shm_lock); + /* In case of reallocation, we will wait for completion */ + while (__predict_false(shm_realloc_state)) + cv_wait(&shm_realloc_cv, &shm_lock); + + if (key != IPC_PRIVATE) { + error = shmget_existing(l, uap, mode, retval); + if (error != -1) { + mutex_exit(&shm_lock); return error; } - if ((SCARG(uap, shmflg) & IPC_CREAT) == 0) + if ((SCARG(uap, shmflg) & IPC_CREAT) == 0) { + mutex_exit(&shm_lock); return ENOENT; + } } - return shmget_allocate_segment(l, uap, mode, retval); + error = 0; + + /* + * Check the for the limits. + */ + size = SCARG(uap, size); + if (size < shminfo.shmmin || size > shminfo.shmmax) { + mutex_exit(&shm_lock); + return EINVAL; + } + if (shm_nused >= shminfo.shmmni) { + mutex_exit(&shm_lock); + return ENOSPC; + } + size = (size + PGOFSET) & ~PGOFSET; + if (shm_committed + btoc(size) > shminfo.shmall) { + mutex_exit(&shm_lock); + return ENOMEM; + } + + /* Find the first available segment */ + if (shm_last_free < 0) { + for (segnum = 0; segnum < shminfo.shmmni; segnum++) + if (shmsegs[segnum].shm_perm.mode & SHMSEG_FREE) + break; + KASSERT(segnum < shminfo.shmmni); + } else { + segnum = shm_last_free; + shm_last_free = -1; + } + + /* + * Initialize the segment. + * We will drop the lock while allocating the memory, thus mark the + * segment present, but removed, that no other thread could take it. + * Also, disable reallocation, while lock is dropped. + */ + shmseg = &shmsegs[segnum]; + shmseg->shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED; + shm_committed += btoc(size); + shm_nused++; + lockmem = shm_use_phys; + shm_realloc_disable++; + mutex_exit(&shm_lock); + + /* Allocate the memory object and lock it if needed */ + shmseg->_shm_internal = uao_create(size, 0); + if (lockmem) { + /* Wire the pages and tag it */ + error = uobj_wirepages(shmseg->_shm_internal, 0, + round_page(shmseg->shm_segsz)); + if (error) { + mutex_enter(&shm_lock); + shm_free_segment(segnum); + shm_realloc_disable--; + mutex_exit(&shm_lock); + return error; + } + } + + /* + * Please note, while segment is marked, there are no need to hold the + * lock, while setting it (except shm_perm.mode). + */ + shmseg->shm_perm._key = SCARG(uap, key); + shmseg->shm_perm._seq = (shmseg->shm_perm._seq + 1) & 0x7fff; + *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm); + + shmseg->shm_perm.cuid = shmseg->shm_perm.uid = kauth_cred_geteuid(cred); + shmseg->shm_perm.cgid = shmseg->shm_perm.gid = kauth_cred_getegid(cred); + shmseg->shm_segsz = SCARG(uap, size); + shmseg->shm_cpid = l->l_proc->p_pid; + shmseg->shm_lpid = shmseg->shm_nattch = 0; + shmseg->shm_atime = shmseg->shm_dtime = 0; + shmseg->shm_ctime = time_second; + + /* + * Segment is initialized. + * Enter the lock, mark as allocated, and notify waiters (if any). + * Also, unmark the state of reallocation. + */ + mutex_enter(&shm_lock); + shmseg->shm_perm.mode = (shmseg->shm_perm.mode & SHMSEG_WANTED) | + (mode & (ACCESSPERMS | SHMSEG_RMLINGER)) | + SHMSEG_ALLOCATED | (lockmem ? SHMSEG_WIRED : 0); + if (shmseg->shm_perm.mode & SHMSEG_WANTED) { + shmseg->shm_perm.mode &= ~SHMSEG_WANTED; + cv_broadcast(&shm_cv[segnum]); + } + shm_realloc_disable--; + cv_broadcast(&shm_realloc_cv); + mutex_exit(&shm_lock); + + return error; } void @@ -668,20 +785,16 @@ shmfork(struct vmspace *vm1, struct vmspace *vm2) struct shmmap_state *shmmap_s; struct shmmap_entry *shmmap_se; + SHMPRINTF(("shmfork %p->%p\n", vm1, vm2)); + mutex_enter(&shm_lock); vm2->vm_shm = vm1->vm_shm; - - if (vm1->vm_shm == NULL) - return; - -#ifdef SHMDEBUG - printf("shmfork %p->%p\n", vm1, vm2); -#endif - - shmmap_s = (struct shmmap_state *)vm1->vm_shm; - - SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) - shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch++; - shmmap_s->nrefs++; + if (vm1->vm_shm) { + shmmap_s = (struct shmmap_state *)vm1->vm_shm; + SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) + shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch++; + shmmap_s->nrefs++; + } + mutex_exit(&shm_lock); } void @@ -689,56 +802,118 @@ shmexit(struct vmspace *vm) { struct shmmap_state *shmmap_s; struct shmmap_entry *shmmap_se; + struct uvm_object **uobj; + size_t *size; + u_int i, n; + SLIST_HEAD(, shmmap_entry) tmp_entries; + + mutex_enter(&shm_lock); shmmap_s = (struct shmmap_state *)vm->vm_shm; - if (shmmap_s == NULL) + if (shmmap_s == NULL) { + mutex_exit(&shm_lock); return; + } vm->vm_shm = NULL; if (--shmmap_s->nrefs > 0) { -#ifdef SHMDEBUG - printf("shmexit: vm %p drop ref (%d entries), now used by %d\n", - vm, shmmap_s->nitems, shmmap_s->nrefs); -#endif + SHMPRINTF(("shmexit: vm %p drop ref (%d entries), refs = %d\n", + vm, shmmap_s->nitems, shmmap_s->nrefs)); SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch--; + mutex_exit(&shm_lock); return; } -#ifdef SHMDEBUG - printf("shmexit: vm %p cleanup (%d entries)\n", vm, shmmap_s->nitems); -#endif - while (!SLIST_EMPTY(&shmmap_s->entries)) { - shmmap_se = SLIST_FIRST(&shmmap_s->entries); - shm_delete_mapping(vm, shmmap_s, shmmap_se); + KASSERT(shmmap_s->nrefs == 0); + n = shmmap_s->nitems; + SHMPRINTF(("shmexit: vm %p cleanup (%d entries)\n", vm, n)); + mutex_exit(&shm_lock); + if (n == 0) { + kmem_free(shmmap_s, sizeof(struct shmmap_state)); + return; } - KASSERT(shmmap_s->nitems == 0); - free(shmmap_s, M_SHM); + + /* Allocate the arrays */ + SLIST_INIT(&tmp_entries); + uobj = kmem_zalloc(n * sizeof(void *), KM_SLEEP); + size = kmem_zalloc(n * sizeof(size_t), KM_SLEEP); + + /* Delete the entry from shm map */ + i = 0; + mutex_enter(&shm_lock); + while (!SLIST_EMPTY(&shmmap_s->entries)) { + struct shmid_ds *shmseg; + + shmmap_se = SLIST_FIRST(&shmmap_s->entries); + shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)]; + size[i] = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; + uobj[i] = shm_delete_mapping(shmmap_s, shmmap_se); + SLIST_INSERT_HEAD(&tmp_entries, shmmap_se, next); + i++; + } + mutex_exit(&shm_lock); + + /* Unmap all segments, free the entries */ + i = 0; + while (!SLIST_EMPTY(&tmp_entries)) { + KASSERT(i < n); + shmmap_se = SLIST_FIRST(&tmp_entries); + SLIST_REMOVE(&tmp_entries, shmmap_se, shmmap_entry, next); + uvm_deallocate(&vm->vm_map, shmmap_se->va, size[i]); + if (uobj[i] != NULL) + uao_detach(uobj[i]); + pool_put(&shmmap_entry_pool, shmmap_se); + i++; + } + + kmem_free(uobj, n * sizeof(void *)); + kmem_free(size, n * sizeof(size_t)); + kmem_free(shmmap_s, sizeof(struct shmmap_state)); } static int shmrealloc(int newshmni) { - int i, sz; + int i, lsegid, sz; vaddr_t v; - struct shmid_ds *newshmsegs; + struct shmid_ds *oldshmsegs, *newshmsegs; + kcondvar_t *newshm_cv; - /* XXX: Would be good to have a upper limit */ if (newshmni < 1) return EINVAL; - /* We can't reallocate less memory than we use */ - if (shm_nused > newshmni) - return EPERM; - /* Allocate new memory area */ - sz = newshmni * sizeof(struct shmid_ds); - v = uvm_km_alloc(kernel_map, round_page(sz), 0, UVM_KMF_WIRED); + sz = ALIGN(newshmni * sizeof(struct shmid_ds)) + + ALIGN(shminfo.shmmni * sizeof(kcondvar_t)); + v = uvm_km_alloc(kernel_map, round_page(sz), 0, + UVM_KMF_WIRED|UVM_KMF_ZERO); if (v == 0) return ENOMEM; + mutex_enter(&shm_lock); + while (shm_realloc_state || shm_realloc_disable) + cv_wait(&shm_realloc_cv, &shm_lock); + + /* + * Get the number of last segment. Fail we are trying to + * reallocate less memory than we use. + * */ + lsegid = 0; + for (i = 0; i < shminfo.shmmni; i++) + if ((shmsegs[i].shm_perm.mode & SHMSEG_FREE) == 0) + lsegid = i; + if (lsegid >= newshmni) { + mutex_exit(&shm_lock); + uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED); + return EBUSY; + } + shm_realloc_state = true; + newshmsegs = (void *)v; + newshm_cv = (void *)(ALIGN(newshmsegs) + + newshmni * sizeof(kcondvar_t)); /* Copy all memory to the new area */ for (i = 0; i < shm_nused; i++) @@ -747,14 +922,25 @@ shmrealloc(int newshmni) /* Mark as free all new segments, if there is any */ for (; i < newshmni; i++) { + cv_init(&newshm_cv[i], "shmwait"); newshmsegs[i].shm_perm.mode = SHMSEG_FREE; newshmsegs[i].shm_perm._seq = 0; } - sz = shminfo.shmmni * sizeof(struct shmid_ds); - uvm_km_free(kernel_map, (vaddr_t)shmsegs, sz, UVM_KMF_WIRED); - shmsegs = newshmsegs; + oldshmsegs = shmsegs; + sz = ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)) + + ALIGN(shminfo.shmmni * sizeof(kcondvar_t)); + shminfo.shmmni = newshmni; + shmsegs = newshmsegs; + shm_cv = newshm_cv; + + /* Reallocation completed - notify all waiters, if any */ + shm_realloc_state = false; + cv_broadcast(&shm_realloc_cv); + mutex_exit(&shm_lock); + + uvm_km_free(kernel_map, (vaddr_t)oldshmsegs, sz, UVM_KMF_WIRED); return 0; } @@ -765,23 +951,33 @@ shminit(void) vaddr_t v; mutex_init(&shm_lock, MUTEX_DEFAULT, IPL_NONE); + pool_init(&shmmap_entry_pool, sizeof(struct shmmap_entry), 0, 0, 0, + "shmmp", &pool_allocator_nointr, IPL_NONE); + cv_init(&shm_realloc_cv, "shmrealc"); - /* Allocate pageable memory for our structures */ - sz = shminfo.shmmni * sizeof(struct shmid_ds); - v = uvm_km_alloc(kernel_map, round_page(sz), 0, UVM_KMF_WIRED); + /* Allocate the wired memory for our structures */ + sz = ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)) + + ALIGN(shminfo.shmmni * sizeof(kcondvar_t)); + v = uvm_km_alloc(kernel_map, round_page(sz), 0, + UVM_KMF_WIRED|UVM_KMF_ZERO); if (v == 0) panic("sysv_shm: cannot allocate memory"); shmsegs = (void *)v; + shm_cv = (void *)(ALIGN(shmsegs) + + shminfo.shmmni * sizeof(kcondvar_t)); shminfo.shmmax *= PAGE_SIZE; for (i = 0; i < shminfo.shmmni; i++) { + cv_init(&shm_cv[i], "shmwait"); shmsegs[i].shm_perm.mode = SHMSEG_FREE; shmsegs[i].shm_perm._seq = 0; } shm_last_free = 0; shm_nused = 0; shm_committed = 0; + shm_realloc_disable = 0; + shm_realloc_state = false; } static int @@ -797,13 +993,7 @@ sysctl_ipc_shmmni(SYSCTLFN_ARGS) if (error || newp == NULL) return error; - mutex_enter(&shm_lock); - error = shmrealloc(newsize); - if (error == 0) - shminfo.shmmni = newsize; - mutex_exit(&shm_lock); - - return error; + return shmrealloc(newsize); } static int @@ -813,12 +1003,12 @@ sysctl_ipc_shmmaxpgs(SYSCTLFN_ARGS) struct sysctlnode node; node = *rnode; node.sysctl_data = &newsize; + newsize = shminfo.shmall; error = sysctl_lookup(SYSCTLFN_CALL(&node)); if (error || newp == NULL) return error; - /* XXX: Would be good to have a upper limit */ if (newsize < 1) return EINVAL; @@ -830,47 +1020,42 @@ sysctl_ipc_shmmaxpgs(SYSCTLFN_ARGS) SYSCTL_SETUP(sysctl_ipc_shm_setup, "sysctl kern.ipc subtree setup") { + sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_NODE, "kern", NULL, NULL, 0, NULL, 0, CTL_KERN, CTL_EOL); - sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_NODE, "ipc", SYSCTL_DESCR("SysV IPC options"), NULL, 0, NULL, 0, CTL_KERN, KERN_SYSVIPC, CTL_EOL); - sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT | CTLFLAG_READONLY, CTLTYPE_INT, "shmmax", SYSCTL_DESCR("Max shared memory segment size in bytes"), NULL, 0, &shminfo.shmmax, 0, CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAX, CTL_EOL); - sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT | CTLFLAG_READWRITE, CTLTYPE_INT, "shmmni", SYSCTL_DESCR("Max number of shared memory identifiers"), sysctl_ipc_shmmni, 0, &shminfo.shmmni, 0, CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMNI, CTL_EOL); - sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT | CTLFLAG_READWRITE, CTLTYPE_INT, "shmseg", SYSCTL_DESCR("Max shared memory segments per process"), NULL, 0, &shminfo.shmseg, 0, CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMSEG, CTL_EOL); - sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT | CTLFLAG_READWRITE, CTLTYPE_INT, "shmmaxpgs", SYSCTL_DESCR("Max amount of shared memory in pages"), sysctl_ipc_shmmaxpgs, 0, &shminfo.shmall, 0, CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAXPGS, CTL_EOL); - sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT | CTLFLAG_READWRITE, CTLTYPE_INT, "shm_use_phys", diff --git a/sys/kern/tty.c b/sys/kern/tty.c index 2c9f7358947a..c0a3b1313207 100644 --- a/sys/kern/tty.c +++ b/sys/kern/tty.c @@ -1,4 +1,4 @@ -/* $NetBSD: tty.c,v 1.210 2007/12/31 21:11:13 ad Exp $ */ +/* $NetBSD: tty.c,v 1.211 2008/01/02 11:48:55 ad Exp $ */ /*- * Copyright (c) 1982, 1986, 1990, 1991, 1993 @@ -37,7 +37,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: tty.c,v 1.210 2007/12/31 21:11:13 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tty.c,v 1.211 2008/01/02 11:48:55 ad Exp $"); #include #include @@ -1905,6 +1905,7 @@ ttwrite(struct tty *tp, struct uio *uio, int flag) goto loop; } } + /* * Hang the process if it's in the background. */ diff --git a/sys/kern/tty_ptm.c b/sys/kern/tty_ptm.c index 42a0f8e0f6b0..f88354ce373d 100644 --- a/sys/kern/tty_ptm.c +++ b/sys/kern/tty_ptm.c @@ -1,4 +1,4 @@ -/* $NetBSD: tty_ptm.c,v 1.21 2007/11/26 19:02:05 pooka Exp $ */ +/* $NetBSD: tty_ptm.c,v 1.22 2008/01/02 11:48:55 ad Exp $ */ /*- * Copyright (c) 2004 The NetBSD Foundation, Inc. @@ -34,7 +34,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: tty_ptm.c,v 1.21 2007/11/26 19:02:05 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tty_ptm.c,v 1.22 2008/01/02 11:48:55 ad Exp $"); #include "opt_ptm.h" @@ -224,10 +224,10 @@ pty_grant_slave(struct lwp *l, dev_t dev) return error; } } - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); revoke = (vp->v_usecount > 1 || (vp->v_iflag & VI_ALIASED) || (vp->v_iflag & VI_LAYER)); - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); VOP_UNLOCK(vp, 0); if (revoke) VOP_REVOKE(vp, REVOKEALL); diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 47a835a760a5..2018a2378dfc 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -1,4 +1,40 @@ -/* $NetBSD: vfs_bio.c,v 1.182 2007/12/24 15:00:20 ad Exp $ */ +/* $NetBSD: vfs_bio.c,v 1.183 2008/01/02 11:48:55 ad Exp $ */ + +/*- + * Copyright (c) 2007 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Andrew Doran. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ /*- * Copyright (c) 1982, 1986, 1989, 1993 @@ -78,7 +114,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: vfs_bio.c,v 1.182 2007/12/24 15:00:20 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_bio.c,v 1.183 2008/01/02 11:48:55 ad Exp $"); #include "fs_ffs.h" #include "opt_bufcache.h" @@ -95,6 +131,8 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_bio.c,v 1.182 2007/12/24 15:00:20 ad Exp $"); #include #include #include +#include +#include #include @@ -123,21 +161,25 @@ static void buf_setwm(void); static int buf_trim(void); static void *bufpool_page_alloc(struct pool *, int); static void bufpool_page_free(struct pool *, void *); -static inline struct buf *bio_doread(struct vnode *, daddr_t, int, +static buf_t *bio_doread(struct vnode *, daddr_t, int, kauth_cred_t, int); -static struct buf *getnewbuf(int, int, int); +static buf_t *getnewbuf(int, int, int); static int buf_lotsfree(void); static int buf_canrelease(void); -static inline u_long buf_mempoolidx(u_long); -static inline u_long buf_roundsize(u_long); -static inline void *buf_malloc(size_t); +static u_long buf_mempoolidx(u_long); +static u_long buf_roundsize(u_long); +static void *buf_malloc(size_t); static void buf_mrelease(void *, size_t); -static inline void binsheadfree(struct buf *, struct bqueue *); -static inline void binstailfree(struct buf *, struct bqueue *); +static void binsheadfree(buf_t *, struct bqueue *); +static void binstailfree(buf_t *, struct bqueue *); int count_lock_queue(void); /* XXX */ #ifdef DEBUG -static int checkfreelist(struct buf *, struct bqueue *); +static int checkfreelist(buf_t *, struct bqueue *); #endif +static void biointr(void *); +static void biodone2(buf_t *); +static void bref(buf_t *); +static void brele(buf_t *); /* * Definitions for the buffer hash lists. @@ -146,14 +188,7 @@ static int checkfreelist(struct buf *, struct bqueue *); (&bufhashtbl[(((long)(dvp) >> 8) + (int)(lbn)) & bufhash]) LIST_HEAD(bufhashhdr, buf) *bufhashtbl, invalhash; u_long bufhash; - -struct bio_ops *bioopsp; /* can be overriden by ffs_softdep */ - -/* - * Insq/Remq for the buffer hash lists. - */ -#define binshash(bp, dp) LIST_INSERT_HEAD(dp, bp, b_hash) -#define bremhash(bp) LIST_REMOVE(bp, b_hash) +struct bio_ops *bioopsp; /* I/O operation notification */ /* * Definitions for the buffer free lists. @@ -165,23 +200,25 @@ struct bio_ops *bioopsp; /* can be overriden by ffs_softdep */ #define BQ_AGE 2 /* rubbish */ struct bqueue { - TAILQ_HEAD(, buf) bq_queue; - uint64_t bq_bytes; + TAILQ_HEAD(, buf) bq_queue; + uint64_t bq_bytes; + buf_t *bq_marker; } bufqueues[BQUEUES]; -int needbuffer; + +static kcondvar_t needbuffer_cv; /* * Buffer queue lock. - * Take this lock first if also taking some buffer's b_interlock. */ -struct simplelock bqueue_slock = SIMPLELOCK_INITIALIZER; +kmutex_t bufcache_lock; +kmutex_t buffer_lock; -/* - * Buffer pools for I/O buffers. - */ -static struct pool bufpool; -static struct pool bufiopool; +/* Software ISR for completed transfers. */ +static void *biodone_sih; +/* Buffer pool for I/O buffers. */ +static pool_cache_t buf_cache; +static pool_cache_t bufio_cache; /* XXX - somewhat gross.. */ #if MAXBSIZE == 0x2000 @@ -231,10 +268,10 @@ static struct pool_allocator bufmempool_allocator = { }; /* Buffer memory management variables */ -uint64_t bufmem_valimit; -uint64_t bufmem_hiwater; -uint64_t bufmem_lowater; -uint64_t bufmem; +u_long bufmem_valimit; +u_long bufmem_hiwater; +u_long bufmem_lowater; +u_long bufmem; /* * MD code can call this to set a hard limit on the amount @@ -269,14 +306,18 @@ buf_setwm(void) #ifdef DEBUG int debug_verify_freelist = 0; static int -checkfreelist(struct buf *bp, struct bqueue *dp) +checkfreelist(buf_t *bp, struct bqueue *dp) { - struct buf *b; + buf_t *b; + + if (!debug_verify_freelist) + return 1; TAILQ_FOREACH(b, &dp->bq_queue, b_freelist) { if (b == bp) return 1; } + return 0; } #endif @@ -285,8 +326,8 @@ checkfreelist(struct buf *bp, struct bqueue *dp) * Insq/Remq for the buffer hash lists. * Call with buffer queue locked. */ -static inline void -binsheadfree(struct buf *bp, struct bqueue *dp) +static void +binsheadfree(buf_t *bp, struct bqueue *dp) { KASSERT(bp->b_freelistindex == -1); @@ -295,8 +336,8 @@ binsheadfree(struct buf *bp, struct bqueue *dp) bp->b_freelistindex = dp - bufqueues; } -static inline void -binstailfree(struct buf *bp, struct bqueue *dp) +static void +binstailfree(buf_t *bp, struct bqueue *dp) { KASSERT(bp->b_freelistindex == -1); @@ -306,24 +347,61 @@ binstailfree(struct buf *bp, struct bqueue *dp) } void -bremfree(struct buf *bp) +bremfree(buf_t *bp) { struct bqueue *dp; int bqidx = bp->b_freelistindex; - LOCK_ASSERT(simple_lock_held(&bqueue_slock)); + KASSERT(mutex_owned(&bufcache_lock)); KASSERT(bqidx != -1); dp = &bufqueues[bqidx]; - KDASSERT(!debug_verify_freelist || checkfreelist(bp, dp)); + KDASSERT(checkfreelist(bp, dp)); KASSERT(dp->bq_bytes >= bp->b_bufsize); TAILQ_REMOVE(&dp->bq_queue, bp, b_freelist); dp->bq_bytes -= bp->b_bufsize; + + /* For the sysctl helper. */ + if (bp == dp->bq_marker) + dp->bq_marker = NULL; + #if defined(DIAGNOSTIC) bp->b_freelistindex = -1; #endif /* defined(DIAGNOSTIC) */ } +/* + * Add a reference to an buffer structure that came from buf_cache. + */ +static inline void +bref(buf_t *bp) +{ + + KASSERT(mutex_owned(&bufcache_lock)); + KASSERT(bp->b_refcnt > 0); + + bp->b_refcnt++; +} + +/* + * Free an unused buffer structure that came from buf_cache. + */ +static inline void +brele(buf_t *bp) +{ + + KASSERT(mutex_owned(&bufcache_lock)); + KASSERT(bp->b_refcnt > 0); + + if (bp->b_refcnt-- == 1) { + buf_destroy(bp); +#ifdef DEBUG + memset((char *)bp, 0, sizeof(*bp)); +#endif + pool_cache_put(buf_cache, bp); + } +} + u_long buf_memcalc(void) { @@ -369,6 +447,10 @@ bufinit(void) int use_std; u_int i; + mutex_init(&bufcache_lock, MUTEX_DEFAULT, IPL_NONE); + mutex_init(&buffer_lock, MUTEX_DEFAULT, IPL_NONE); + cv_init(&needbuffer_cv, "needbuf"); + /* * Initialize buffer cache memory parameters. */ @@ -395,10 +477,10 @@ bufinit(void) use_std = 1; #endif - pool_init(&bufpool, sizeof(struct buf), 0, 0, 0, "bufpl", - &pool_allocator_nointr, IPL_NONE); - pool_init(&bufiopool, sizeof(struct buf), 0, 0, 0, "biopl", - NULL, IPL_BIO); + buf_cache = pool_cache_init(sizeof(buf_t), 0, 0, 0, + "bufpl", NULL, IPL_SOFTBIO, NULL, NULL, NULL); + bufio_cache = pool_cache_init(sizeof(buf_t), 0, 0, 0, + "biopl", NULL, IPL_BIO, NULL, NULL, NULL); bufmempool_allocator.pa_backingmap = buf_map; for (i = 0; i < NMEMPOOLS; i++) { @@ -435,6 +517,16 @@ bufinit(void) bufhashtbl = hashinit(nbuf, HASH_LIST, M_CACHE, M_WAITOK, &bufhash); } +void +bufinit2(void) +{ + + biodone_sih = softint_establish(SOFTINT_BIO | SOFTINT_MPSAFE, biointr, + NULL); + if (biodone_sih == NULL) + panic("bufinit2: can't establish soft interrupt"); +} + static int buf_lotsfree(void) { @@ -478,15 +570,14 @@ buf_lotsfree(void) * Return estimate of bytes we think need to be * released to help resolve low memory conditions. * - * => called at splbio. - * => called with bqueue_slock held. + * => called with bufcache_lock held. */ static int buf_canrelease(void) { int pagedemand, ninvalid = 0; - LOCK_ASSERT(simple_lock_held(&bqueue_slock)); + KASSERT(mutex_owned(&bufcache_lock)); if (bufmem < bufmem_lowater) return 0; @@ -506,7 +597,7 @@ buf_canrelease(void) /* * Buffer memory allocation helper functions */ -static inline u_long +static u_long buf_mempoolidx(u_long size) { u_int n = 0; @@ -522,19 +613,18 @@ buf_mempoolidx(u_long size) return n; } -static inline u_long +static u_long buf_roundsize(u_long size) { /* Round up to nearest power of 2 */ return (1 << (buf_mempoolidx(size) + MEMPOOL_INDEX_OFFSET)); } -static inline void * +static void * buf_malloc(size_t size) { u_int n = buf_mempoolidx(size); void *addr; - int s; while (1) { addr = pool_get(&bmempools[n], PR_NOWAIT); @@ -542,16 +632,20 @@ buf_malloc(size_t size) break; /* No memory, see if we can free some. If so, try again */ - if (buf_drain(1) > 0) + mutex_enter(&bufcache_lock); + if (buf_drain(1) > 0) { + mutex_exit(&bufcache_lock); continue; + } + + if (curlwp == uvm.pagedaemon_lwp) { + mutex_exit(&bufcache_lock); + return NULL; + } /* Wait for buffers to arrive on the LRU queue */ - s = splbio(); - simple_lock(&bqueue_slock); - needbuffer = 1; - ltsleep(&needbuffer, PNORELOCK | (PRIBIO + 1), - "buf_malloc", 0, &bqueue_slock); - splx(s); + cv_timedwait(&needbuffer_cv, &bufcache_lock, hz / 4); + mutex_exit(&bufcache_lock); } return addr; @@ -567,11 +661,11 @@ buf_mrelease(void *addr, size_t size) /* * bread()/breadn() helper. */ -static inline struct buf * +static buf_t * bio_doread(struct vnode *vp, daddr_t blkno, int size, kauth_cred_t cred, int async) { - struct buf *bp; + buf_t *bp; struct mount *mp; bp = getblk(vp, blkno, size, 0, 0); @@ -584,10 +678,10 @@ bio_doread(struct vnode *vp, daddr_t blkno, int size, kauth_cred_t cred, /* * If buffer does not have data valid, start a read. - * Note that if buffer is B_INVAL, getblk() won't return it. + * Note that if buffer is BC_INVAL, getblk() won't return it. * Therefore, it's valid if its I/O has completed or been delayed. */ - if (!ISSET(bp->b_flags, (B_DONE | B_DELWRI))) { + if (!ISSET(bp->b_oflags, (BO_DONE | BO_DELWRI))) { /* Start I/O for the buffer. */ SET(bp->b_flags, B_READ | async); if (async) @@ -598,9 +692,8 @@ bio_doread(struct vnode *vp, daddr_t blkno, int size, kauth_cred_t cred, /* Pay for the read. */ curproc->p_stats->p_ru.ru_inblock++; - } else if (async) { + } else if (async) brelse(bp, 0); - } if (vp->v_type == VBLK) mp = vp->v_specmountpoint; @@ -628,9 +721,9 @@ bio_doread(struct vnode *vp, daddr_t blkno, int size, kauth_cred_t cred, */ int bread(struct vnode *vp, daddr_t blkno, int size, kauth_cred_t cred, - struct buf **bpp) + buf_t **bpp) { - struct buf *bp; + buf_t *bp; /* Get buffer for block. */ bp = *bpp = bio_doread(vp, blkno, size, cred, 0); @@ -645,9 +738,9 @@ bread(struct vnode *vp, daddr_t blkno, int size, kauth_cred_t cred, */ int breadn(struct vnode *vp, daddr_t blkno, int size, daddr_t *rablks, - int *rasizes, int nrablks, kauth_cred_t cred, struct buf **bpp) + int *rasizes, int nrablks, kauth_cred_t cred, buf_t **bpp) { - struct buf *bp; + buf_t *bp; int i; bp = *bpp = bio_doread(vp, blkno, size, cred, 0); @@ -655,14 +748,18 @@ breadn(struct vnode *vp, daddr_t blkno, int size, daddr_t *rablks, /* * For each of the read-ahead blocks, start a read, if necessary. */ + mutex_enter(&bufcache_lock); for (i = 0; i < nrablks; i++) { /* If it's in the cache, just go on to next one. */ if (incore(vp, rablks[i])) continue; /* Get a buffer for the read-ahead block */ + mutex_exit(&bufcache_lock); (void) bio_doread(vp, rablks[i], rasizes[i], cred, B_ASYNC); + mutex_enter(&bufcache_lock); } + mutex_exit(&bufcache_lock); /* Otherwise, we had to start a read for it; wait until it's valid. */ return (biowait(bp)); @@ -675,7 +772,7 @@ breadn(struct vnode *vp, daddr_t blkno, int size, daddr_t *rablks, */ int breada(struct vnode *vp, daddr_t blkno, int size, daddr_t rablkno, - int rabsize, kauth_cred_t cred, struct buf **bpp) + int rabsize, kauth_cred_t cred, buf_t **bpp) { return (breadn(vp, blkno, size, &rablkno, &rabsize, 1, cred, bpp)); @@ -685,16 +782,17 @@ breada(struct vnode *vp, daddr_t blkno, int size, daddr_t rablkno, * Block write. Described in Bach (p.56) */ int -bwrite(struct buf *bp) +bwrite(buf_t *bp) { - int rv, sync, wasdelayed, s; + int rv, sync, wasdelayed; struct vnode *vp; struct mount *mp; - KASSERT(ISSET(bp->b_flags, B_BUSY)); + KASSERT(ISSET(bp->b_cflags, BC_BUSY)); vp = bp->b_vp; if (vp != NULL) { + KASSERT(bp->b_objlock == &vp->v_interlock); if (vp->v_type == VBLK) mp = vp->v_specmountpoint; else @@ -728,28 +826,24 @@ bwrite(struct buf *bp) mp->mnt_stat.f_asyncwrites++; } - s = splbio(); - simple_lock(&bp->b_interlock); - - wasdelayed = ISSET(bp->b_flags, B_DELWRI); - - CLR(bp->b_flags, (B_READ | B_DONE | B_DELWRI)); - bp->b_error = 0; - /* * Pay for the I/O operation and make sure the buf is on the correct * vnode queue. */ + CLR(bp->b_flags, B_READ); + mutex_enter(bp->b_objlock); + wasdelayed = ISSET(bp->b_oflags, BO_DELWRI); + CLR(bp->b_oflags, BO_DONE | BO_DELWRI); + bp->b_error = 0; if (wasdelayed) reassignbuf(bp, bp->b_vp); else curproc->p_stats->p_ru.ru_oublock++; + if (vp != NULL) + vp->v_numoutput++; + mutex_exit(bp->b_objlock); - /* Initiate disk write. Make sure the appropriate party is charged. */ - V_INCR_NUMOUTPUT(bp->b_vp); - simple_unlock(&bp->b_interlock); - splx(s); - + /* Initiate disk write. */ if (sync) BIO_SETPRIO(bp, BPRIO_TIMECRITICAL); else @@ -792,9 +886,10 @@ vn_bwrite(void *v) * Described in Leffler, et al. (pp. 208-213). */ void -bdwrite(struct buf *bp) +bdwrite(buf_t *bp) { - int s; + + KASSERT(ISSET(bp->b_cflags, BC_BUSY)); /* If this is a tape block, write the block now. */ if (bdev_type(bp->b_dev) == D_TAPE) { @@ -808,21 +903,17 @@ bdwrite(struct buf *bp) * (2) Charge for the write, * (3) Make sure it's on its vnode's correct block list. */ - s = splbio(); - simple_lock(&bp->b_interlock); + KASSERT(bp->b_vp == NULL || bp->b_objlock == &bp->b_vp->v_interlock); - KASSERT(ISSET(bp->b_flags, B_BUSY)); - - if (!ISSET(bp->b_flags, B_DELWRI)) { - SET(bp->b_flags, B_DELWRI); + mutex_enter(bp->b_objlock); + if (!ISSET(bp->b_oflags, BO_DELWRI)) { + SET(bp->b_oflags, BO_DELWRI); curproc->p_stats->p_ru.ru_oublock++; reassignbuf(bp, bp->b_vp); } - /* Otherwise, the "write" is done, so mark and release the buffer. */ - CLR(bp->b_flags, B_DONE); - simple_unlock(&bp->b_interlock); - splx(s); + CLR(bp->b_oflags, BO_DONE); + mutex_exit(bp->b_objlock); brelse(bp, 0); } @@ -831,72 +922,64 @@ bdwrite(struct buf *bp) * Asynchronous block write; just an asynchronous bwrite(). */ void -bawrite(struct buf *bp) +bawrite(buf_t *bp) { - int s; - s = splbio(); - simple_lock(&bp->b_interlock); - - KASSERT(ISSET(bp->b_flags, B_BUSY)); + KASSERT(ISSET(bp->b_cflags, BC_BUSY)); SET(bp->b_flags, B_ASYNC); - simple_unlock(&bp->b_interlock); - splx(s); VOP_BWRITE(bp); } /* * Same as first half of bdwrite, mark buffer dirty, but do not release it. - * Call at splbio() and with the buffer interlock locked. - * Note: called only from biodone() through ffs softdep's bioopsp->io_complete() + * Call with the buffer interlock held. + * + * Note: called only from biodone() through ffs softdep's io_complete() */ void -bdirty(struct buf *bp) +bdirty(buf_t *bp) { - LOCK_ASSERT(simple_lock_held(&bp->b_interlock)); - KASSERT(ISSET(bp->b_flags, B_BUSY)); + KASSERT(mutex_owned(&bufcache_lock)); + KASSERT(bp->b_objlock == &bp->b_vp->v_interlock); + KASSERT(mutex_owned(bp->b_objlock)); + KASSERT(ISSET(bp->b_cflags, BC_BUSY)); - CLR(bp->b_flags, B_AGE); + CLR(bp->b_cflags, BC_AGE); - if (!ISSET(bp->b_flags, B_DELWRI)) { - SET(bp->b_flags, B_DELWRI); + if (!ISSET(bp->b_oflags, BO_DELWRI)) { + SET(bp->b_oflags, BO_DELWRI); curproc->p_stats->p_ru.ru_oublock++; reassignbuf(bp, bp->b_vp); } } + /* * Release a buffer on to the free lists. * Described in Bach (p. 46). */ void -brelse(struct buf *bp, int set) +brelsel(buf_t *bp, int set) { struct bqueue *bufq; - int s; + struct vnode *vp; - /* Block disk interrupts. */ - s = splbio(); - simple_lock(&bqueue_slock); - simple_lock(&bp->b_interlock); + KASSERT(mutex_owned(&bufcache_lock)); - bp->b_flags |= set; + SET(bp->b_cflags, set); - KASSERT(ISSET(bp->b_flags, B_BUSY)); - KASSERT(!ISSET(bp->b_flags, B_CALL)); + KASSERT(ISSET(bp->b_cflags, BC_BUSY)); + KASSERT(bp->b_iodone == NULL); /* Wake up any processes waiting for any buffer to become free. */ - if (needbuffer) { - needbuffer = 0; - wakeup(&needbuffer); - } + cv_signal(&needbuffer_cv); - /* Wake up any proceeses waiting for _this_ buffer to become free. */ - if (ISSET(bp->b_flags, B_WANTED)) { - CLR(bp->b_flags, B_WANTED|B_AGE); - wakeup(bp); + /* Wake up any proceeses waiting for _this_ buffer to become */ + if (ISSET(bp->b_cflags, BC_WANTED) != 0) { + CLR(bp->b_cflags, BC_WANTED|BC_AGE); + cv_broadcast(&bp->b_busy); } /* @@ -904,46 +987,54 @@ brelse(struct buf *bp, int set) */ /* If it's locked, don't report an error; try again later. */ - if (ISSET(bp->b_flags, B_LOCKED) && bp->b_error != 0) + if (ISSET(bp->b_cflags, BC_LOCKED)) bp->b_error = 0; /* If it's not cacheable, or an error, mark it invalid. */ - if (ISSET(bp->b_flags, B_NOCACHE) || bp->b_error != 0) - SET(bp->b_flags, B_INVAL); + if (ISSET(bp->b_cflags, BC_NOCACHE) || bp->b_error != 0) + SET(bp->b_cflags, BC_INVAL); - if (ISSET(bp->b_flags, B_VFLUSH)) { + if (ISSET(bp->b_cflags, BC_VFLUSH)) { /* * This is a delayed write buffer that was just flushed to * disk. It is still on the LRU queue. If it's become * invalid, then we need to move it to a different queue; * otherwise leave it in its current position. */ - CLR(bp->b_flags, B_VFLUSH); - if (!ISSET(bp->b_flags, B_INVAL|B_LOCKED|B_AGE) && + CLR(bp->b_cflags, BC_VFLUSH); + if (!ISSET(bp->b_cflags, BC_INVAL|BC_LOCKED|BC_AGE) && bp->b_error == 0) { - KDASSERT(!debug_verify_freelist || checkfreelist(bp, &bufqueues[BQ_LRU])); + KDASSERT(checkfreelist(bp, &bufqueues[BQ_LRU])); goto already_queued; } else { bremfree(bp); } } - KDASSERT(!debug_verify_freelist || !checkfreelist(bp, &bufqueues[BQ_AGE])); - KDASSERT(!debug_verify_freelist || !checkfreelist(bp, &bufqueues[BQ_LRU])); - KDASSERT(!debug_verify_freelist || !checkfreelist(bp, &bufqueues[BQ_LOCKED])); + KDASSERT(checkfreelist(bp, &bufqueues[BQ_AGE])); + KDASSERT(checkfreelist(bp, &bufqueues[BQ_LRU])); + KDASSERT(checkfreelist(bp, &bufqueues[BQ_LOCKED])); - if ((bp->b_bufsize <= 0) || ISSET(bp->b_flags, B_INVAL)) { + if ((bp->b_bufsize <= 0) || ISSET(bp->b_cflags, BC_INVAL)) { /* * If it's invalid or empty, dissociate it from its vnode * and put on the head of the appropriate queue. */ - if (LIST_FIRST(&bp->b_dep) != NULL && bioopsp) - bioopsp->io_deallocate(bp); - CLR(bp->b_flags, B_DONE|B_DELWRI); - if (bp->b_vp) { + if (bioopsp != NULL) + (*bioopsp->io_deallocate)(bp); + + mutex_enter(bp->b_objlock); + CLR(bp->b_oflags, BO_DONE|BO_DELWRI); + if ((vp = bp->b_vp) != NULL) { + KASSERT(bp->b_objlock == &vp->v_interlock); reassignbuf(bp, bp->b_vp); brelvp(bp); + mutex_exit(&vp->v_interlock); + } else { + KASSERT(bp->b_objlock == &buffer_lock); + mutex_exit(bp->b_objlock); } + if (bp->b_bufsize <= 0) /* no data */ goto already_queued; @@ -951,7 +1042,7 @@ brelse(struct buf *bp, int set) /* invalid data */ bufq = &bufqueues[BQ_AGE]; binsheadfree(bp, bufq); - } else { + } else { /* * It has valid data. Put it on the end of the appropriate * queue, so that it'll stick around for as long as possible. @@ -960,18 +1051,18 @@ brelse(struct buf *bp, int set) * livelock where BQ_AGE only has buffers with dependencies, * and we thus never get to the dependent buffers in BQ_LRU. */ - if (ISSET(bp->b_flags, B_LOCKED)) + if (ISSET(bp->b_cflags, BC_LOCKED)) { /* locked in core */ bufq = &bufqueues[BQ_LOCKED]; - else if (!ISSET(bp->b_flags, B_AGE)) + } else if (!ISSET(bp->b_cflags, BC_AGE)) { /* valid data */ bufq = &bufqueues[BQ_LRU]; - else { + } else { /* stale but valid data */ int has_deps; - if (LIST_FIRST(&bp->b_dep) != NULL && bioopsp) - has_deps = bioopsp->io_countdeps(bp, 0); + if (bioopsp != NULL) + has_deps = (*bioopsp->io_countdeps)(bp, 0); else has_deps = 0; bufq = has_deps ? &bufqueues[BQ_LRU] : @@ -979,22 +1070,22 @@ brelse(struct buf *bp, int set) } binstailfree(bp, bufq); } - already_queued: /* Unlock the buffer. */ - CLR(bp->b_flags, B_AGE|B_ASYNC|B_BUSY|B_NOCACHE); - SET(bp->b_flags, B_CACHE); + CLR(bp->b_cflags, BC_AGE|BC_BUSY|BC_NOCACHE); + CLR(bp->b_flags, B_ASYNC); - /* Allow disk interrupts. */ - simple_unlock(&bp->b_interlock); - simple_unlock(&bqueue_slock); - splx(s); - if (bp->b_bufsize <= 0) { -#ifdef DEBUG - memset((char *)bp, 0, sizeof(*bp)); -#endif - pool_put(&bufpool, bp); - } + if (bp->b_bufsize <= 0) + brele(bp); +} + +void +brelse(buf_t *bp, int set) +{ + + mutex_enter(&bufcache_lock); + brelsel(bp, set); + mutex_exit(&bufcache_lock); } /* @@ -1004,16 +1095,20 @@ already_queued: * we normally don't return the buffer, unless the caller explicitly * wants us to. */ -struct buf * +buf_t * incore(struct vnode *vp, daddr_t blkno) { - struct buf *bp; + buf_t *bp; + + KASSERT(mutex_owned(&bufcache_lock)); /* Search hash chain */ LIST_FOREACH(bp, BUFHASH(vp, blkno), b_hash) { if (bp->b_lblkno == blkno && bp->b_vp == vp && - !ISSET(bp->b_flags, B_INVAL)) - return (bp); + !ISSET(bp->b_cflags, BC_INVAL)) { + KASSERT(bp->b_objlock == &vp->v_interlock); + return (bp); + } } return (NULL); @@ -1027,65 +1122,63 @@ incore(struct vnode *vp, daddr_t blkno) * correct size. It is up to the caller to insure that the * cached blocks be of the correct size. */ -struct buf * +buf_t * getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo) { - struct buf *bp; - int s, err; - int preserve; + int err, preserve; + buf_t *bp; -start: - s = splbio(); - simple_lock(&bqueue_slock); + mutex_enter(&bufcache_lock); + loop: bp = incore(vp, blkno); if (bp != NULL) { - simple_lock(&bp->b_interlock); - if (ISSET(bp->b_flags, B_BUSY)) { - simple_unlock(&bqueue_slock); - if (curlwp == uvm.pagedaemon_lwp) { - simple_unlock(&bp->b_interlock); - splx(s); - return NULL; - } - SET(bp->b_flags, B_WANTED); - err = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK, - "getblk", slptimeo, &bp->b_interlock); - splx(s); - if (err) - return (NULL); - goto start; + err = bbusy(bp, ((slpflag & PCATCH) != 0), slptimeo); + if (err != 0) { + if (err == EPASSTHROUGH) + goto loop; + mutex_exit(&bufcache_lock); + return (NULL); } #ifdef DIAGNOSTIC - if (ISSET(bp->b_flags, B_DONE|B_DELWRI) && + if (ISSET(bp->b_oflags, BO_DONE|BO_DELWRI) && bp->b_bcount < size && vp->v_type != VBLK) panic("getblk: block size invariant failed"); #endif - SET(bp->b_flags, B_BUSY); bremfree(bp); preserve = 1; } else { - if ((bp = getnewbuf(slpflag, slptimeo, 0)) == NULL) { - simple_unlock(&bqueue_slock); - splx(s); - goto start; + if ((bp = getnewbuf(slpflag, slptimeo, 0)) == NULL) + goto loop; + + if (incore(vp, blkno) != NULL) { + /* The block has come into memory in the meantime. */ + brelsel(bp, 0); + goto loop; } - binshash(bp, BUFHASH(vp, blkno)); + LIST_INSERT_HEAD(BUFHASH(vp, blkno), bp, b_hash); bp->b_blkno = bp->b_lblkno = bp->b_rawblkno = blkno; + mutex_enter(&vp->v_interlock); bgetvp(vp, bp); + mutex_exit(&vp->v_interlock); preserve = 0; } - simple_unlock(&bp->b_interlock); - simple_unlock(&bqueue_slock); - splx(s); + mutex_exit(&bufcache_lock); + /* - * LFS can't track total size of B_LOCKED buffer (locked_queue_bytes) + * LFS can't track total size of BC_LOCKED buffer (locked_queue_bytes) * if we re-size buffers here. */ - if (ISSET(bp->b_flags, B_LOCKED)) { + if (ISSET(bp->b_cflags, BC_LOCKED)) { KASSERT(bp->b_bufsize >= size); } else { - allocbuf(bp, size, preserve); + if (allocbuf(bp, size, preserve)) { + mutex_enter(&bufcache_lock); + LIST_REMOVE(bp, b_hash); + mutex_exit(&bufcache_lock); + brelse(bp, BC_INVAL); + return NULL; + } } BIO_SETPRIO(bp, BPRIO_DEFAULT); return (bp); @@ -1094,24 +1187,22 @@ start: /* * Get an empty, disassociated buffer of given size. */ -struct buf * +buf_t * geteblk(int size) { - struct buf *bp; - int s; + buf_t *bp; + int error; - s = splbio(); - simple_lock(&bqueue_slock); - while ((bp = getnewbuf(0, 0, 0)) == 0) + mutex_enter(&bufcache_lock); + while ((bp = getnewbuf(0, 0, 0)) == NULL) ; - SET(bp->b_flags, B_INVAL); - binshash(bp, &invalhash); - simple_unlock(&bqueue_slock); - simple_unlock(&bp->b_interlock); - splx(s); + SET(bp->b_cflags, BC_INVAL); + LIST_INSERT_HEAD(&invalhash, bp, b_hash); + mutex_exit(&bufcache_lock); BIO_SETPRIO(bp, BPRIO_DEFAULT); - allocbuf(bp, size, 0); + error = allocbuf(bp, size, 0); + KASSERT(error == 0); return (bp); } @@ -1123,12 +1214,12 @@ geteblk(int size) * start a write. If the buffer grows, it's the callers * responsibility to fill out the buffer's additional contents. */ -void -allocbuf(struct buf *bp, int size, int preserve) +int +allocbuf(buf_t *bp, int size, int preserve) { vsize_t oldsize, desired_size; void *addr; - int s, delta; + int delta; desired_size = buf_roundsize(size); if (desired_size > MAXBSIZE) @@ -1138,13 +1229,15 @@ allocbuf(struct buf *bp, int size, int preserve) oldsize = bp->b_bufsize; if (oldsize == desired_size) - return; + return 0; /* * If we want a buffer of a different size, re-allocate the * buffer's memory; copy old content only if needed. */ addr = buf_malloc(desired_size); + if (addr == NULL) + return ENOMEM; if (preserve) memcpy(addr, bp->b_data, MIN(oldsize,desired_size)); if (bp->b_data != NULL) @@ -1153,12 +1246,11 @@ allocbuf(struct buf *bp, int size, int preserve) bp->b_bufsize = desired_size; /* - * Update overall buffer memory counter (protected by bqueue_slock) + * Update overall buffer memory counter (protected by bufcache_lock) */ delta = (long)desired_size - (long)oldsize; - s = splbio(); - simple_lock(&bqueue_slock); + mutex_enter(&bufcache_lock); if ((bufmem += delta) > bufmem_hiwater) { /* * Need to trim overall memory usage. @@ -1166,20 +1258,16 @@ allocbuf(struct buf *bp, int size, int preserve) while (buf_canrelease()) { if (curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) { - simple_unlock(&bqueue_slock); - splx(s); + mutex_exit(&bufcache_lock); preempt(); - s = splbio(); - simple_lock(&bqueue_slock); + mutex_enter(&bufcache_lock); } - if (buf_trim() == 0) break; } } - - simple_unlock(&bqueue_slock); - splx(s); + mutex_exit(&bufcache_lock); + return 0; } /* @@ -1187,38 +1275,42 @@ allocbuf(struct buf *bp, int size, int preserve) * Select something from a free list. * Preference is to AGE list, then LRU list. * - * Called at splbio and with buffer queues locked. + * Called with the buffer queues locked. * Return buffer locked. */ -struct buf * +buf_t * getnewbuf(int slpflag, int slptimeo, int from_bufq) { - struct buf *bp; + buf_t *bp; + struct vnode *vp; -start: - LOCK_ASSERT(simple_lock_held(&bqueue_slock)); + start: + KASSERT(mutex_owned(&bufcache_lock)); /* - * Get a new buffer from the pool; but use NOWAIT because - * we have the buffer queues locked. + * Get a new buffer from the pool. */ - if (!from_bufq && buf_lotsfree() && - (bp = pool_get(&bufpool, PR_NOWAIT)) != NULL) { - memset((char *)bp, 0, sizeof(*bp)); - BUF_INIT(bp); - bp->b_dev = NODEV; - bp->b_vnbufs.le_next = NOLIST; - bp->b_flags = B_BUSY; - simple_lock(&bp->b_interlock); + if (!from_bufq && buf_lotsfree()) { + mutex_exit(&bufcache_lock); + bp = pool_cache_get(buf_cache, PR_NOWAIT); + if (bp != NULL) { + memset((char *)bp, 0, sizeof(*bp)); + buf_init(bp); + bp->b_dev = NODEV; + bp->b_vnbufs.le_next = NOLIST; + bp->b_cflags = BC_BUSY; + bp->b_refcnt = 1; + mutex_enter(&bufcache_lock); #if defined(DIAGNOSTIC) - bp->b_freelistindex = -1; + bp->b_freelistindex = -1; #endif /* defined(DIAGNOSTIC) */ - return (bp); + return (bp); + } + mutex_enter(&bufcache_lock); } if ((bp = TAILQ_FIRST(&bufqueues[BQ_AGE].bq_queue)) != NULL || (bp = TAILQ_FIRST(&bufqueues[BQ_LRU].bq_queue)) != NULL) { - simple_lock(&bp->b_interlock); bremfree(bp); } else { /* @@ -1226,9 +1318,12 @@ start: */ if (!from_bufq || curlwp != uvm.pagedaemon_lwp) { /* wait for a free buffer of any kind */ - needbuffer = 1; - ltsleep(&needbuffer, slpflag|(PRIBIO + 1), - "getnewbuf", slptimeo, &bqueue_slock); + if ((slpflag & PCATCH) != 0) + (void)cv_timedwait_sig(&needbuffer_cv, + &bufcache_lock, slptimeo); + else + (void)cv_timedwait(&needbuffer_cv, + &bufcache_lock, slptimeo); } return (NULL); } @@ -1238,95 +1333,100 @@ start: panic("buffer %p: on queue but empty", bp); #endif - if (ISSET(bp->b_flags, B_VFLUSH)) { + if (ISSET(bp->b_cflags, BC_VFLUSH)) { /* * This is a delayed write buffer being flushed to disk. Make * sure it gets aged out of the queue when it's finished, and * leave it off the LRU queue. */ - CLR(bp->b_flags, B_VFLUSH); - SET(bp->b_flags, B_AGE); - simple_unlock(&bp->b_interlock); + CLR(bp->b_cflags, BC_VFLUSH); + SET(bp->b_cflags, BC_AGE); goto start; } /* Buffer is no longer on free lists. */ - SET(bp->b_flags, B_BUSY); + SET(bp->b_cflags, BC_BUSY); /* * If buffer was a delayed write, start it and return NULL * (since we might sleep while starting the write). */ - if (ISSET(bp->b_flags, B_DELWRI)) { + if (ISSET(bp->b_oflags, BO_DELWRI)) { /* * This buffer has gone through the LRU, so make sure it gets * reused ASAP. */ - SET(bp->b_flags, B_AGE); - simple_unlock(&bp->b_interlock); - simple_unlock(&bqueue_slock); + SET(bp->b_cflags, BC_AGE); + mutex_exit(&bufcache_lock); bawrite(bp); - simple_lock(&bqueue_slock); + mutex_enter(&bufcache_lock); return (NULL); } - /* disassociate us from our vnode, if we had one... */ - if (bp->b_vp) - brelvp(bp); - - if (LIST_FIRST(&bp->b_dep) != NULL && bioopsp) - bioopsp->io_deallocate(bp); + vp = bp->b_vp; + if (bioopsp != NULL) + (*bioopsp->io_deallocate)(bp); /* clear out various other fields */ - bp->b_flags = B_BUSY; + bp->b_cflags = BC_BUSY; + bp->b_oflags = 0; + bp->b_flags = 0; bp->b_dev = NODEV; - bp->b_blkno = bp->b_lblkno = bp->b_rawblkno = 0; + bp->b_blkno = 0; + bp->b_lblkno = 0; + bp->b_rawblkno = 0; bp->b_iodone = 0; bp->b_error = 0; bp->b_resid = 0; bp->b_bcount = 0; - bremhash(bp); + LIST_REMOVE(bp, b_hash); + + /* Disassociate us from our vnode, if we had one... */ + if (vp != NULL) { + mutex_enter(&vp->v_interlock); + brelvp(bp); + mutex_exit(&vp->v_interlock); + } + return (bp); } /* * Attempt to free an aged buffer off the queues. - * Called at splbio and with queue lock held. + * Called with queue lock held. * Returns the amount of buffer memory freed. */ static int buf_trim(void) { - struct buf *bp; + buf_t *bp; long size = 0; + KASSERT(mutex_owned(&bufcache_lock)); + /* Instruct getnewbuf() to get buffers off the queues */ if ((bp = getnewbuf(PCATCH, 1, 1)) == NULL) return 0; - KASSERT(!ISSET(bp->b_flags, B_WANTED)); - simple_unlock(&bp->b_interlock); + KASSERT((bp->b_cflags & BC_WANTED) == 0); size = bp->b_bufsize; bufmem -= size; - simple_unlock(&bqueue_slock); if (size > 0) { buf_mrelease(bp->b_data, size); bp->b_bcount = bp->b_bufsize = 0; } /* brelse() will return the buffer to the global buffer pool */ - brelse(bp, 0); - simple_lock(&bqueue_slock); + brelsel(bp, 0); return size; } int buf_drain(int n) { - int s, size = 0, sz; + int size = 0, sz; - s = splbio(); - simple_lock(&bqueue_slock); + KASSERT(mutex_owned(&bufcache_lock)); while (size < n && bufmem > bufmem_lowater) { sz = buf_trim(); @@ -1335,8 +1435,6 @@ buf_drain(int n) size += sz; } - simple_unlock(&bqueue_slock); - splx(s); return size; } @@ -1345,18 +1443,15 @@ buf_drain(int n) * When they do, extract and return the I/O's error value. */ int -biowait(struct buf *bp) +biowait(buf_t *bp) { - int s, error; - s = splbio(); - simple_lock(&bp->b_interlock); - while (!ISSET(bp->b_flags, B_DONE | B_DELWRI)) - ltsleep(bp, PRIBIO + 1, "biowait", 0, &bp->b_interlock); - error = bp->b_error; - simple_unlock(&bp->b_interlock); - splx(s); - return (error); + mutex_enter(bp->b_objlock); + while (!ISSET(bp->b_oflags, BO_DONE | BO_DELWRI)) + cv_wait(&bp->b_done, bp->b_objlock); + mutex_exit(bp->b_objlock); + + return bp->b_error; } /* @@ -1376,43 +1471,81 @@ biowait(struct buf *bp) * for the vn device, that puts malloc'd buffers on the free lists!) */ void -biodone(struct buf *bp) +biodone(buf_t *bp) { - int s = splbio(); + int s; - simple_lock(&bp->b_interlock); - if (ISSET(bp->b_flags, B_DONE)) - panic("biodone already"); - CLR(bp->b_flags, B_COWDONE); - SET(bp->b_flags, B_DONE); /* note that it's done */ + KASSERT(!ISSET(bp->b_oflags, BO_DONE)); + + if (cpu_intr_p()) { + /* From interrupt mode: defer to a soft interrupt. */ + s = splvm(); + TAILQ_INSERT_TAIL(&curcpu()->ci_data.cpu_biodone, bp, b_actq); + softint_schedule(biodone_sih); + splx(s); + } else { + /* Process now - the buffer may be freed soon. */ + biodone2(bp); + } +} + +static void +biodone2(buf_t *bp) +{ + void (*callout)(buf_t *); + + if (bioopsp != NULL) + (*bioopsp->io_complete)(bp); + + mutex_enter(bp->b_objlock); + /* Note that the transfer is done. */ + if (ISSET(bp->b_oflags, BO_DONE)) + panic("biodone2 already"); + CLR(bp->b_oflags, BO_COWDONE); + SET(bp->b_oflags, BO_DONE); BIO_SETPRIO(bp, BPRIO_DEFAULT); - if (LIST_FIRST(&bp->b_dep) != NULL && bioopsp) - bioopsp->io_complete(bp); - - if (!ISSET(bp->b_flags, B_READ)) /* wake up reader */ + /* Wake up waiting writers. */ + if (!ISSET(bp->b_flags, B_READ)) vwakeup(bp); - /* - * If necessary, call out. Unlock the buffer before calling - * iodone() as the buffer isn't valid any more when it return. - */ - if (ISSET(bp->b_flags, B_CALL)) { - CLR(bp->b_flags, B_CALL); /* but note callout done */ - simple_unlock(&bp->b_interlock); - (*bp->b_iodone)(bp); + if ((callout = bp->b_iodone) != NULL) { + /* Note callout done, then call out. */ + KERNEL_LOCK(1, NULL); /* XXXSMP */ + bp->b_iodone = NULL; + mutex_exit(bp->b_objlock); + (*callout)(bp); + KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ + } else if (ISSET(bp->b_flags, B_ASYNC)) { + /* If async, release. */ + mutex_exit(bp->b_objlock); + brelse(bp, 0); } else { - if (ISSET(bp->b_flags, B_ASYNC)) { /* if async, release */ - simple_unlock(&bp->b_interlock); - brelse(bp, 0); - } else { /* or just wakeup the buffer */ - CLR(bp->b_flags, B_WANTED); - wakeup(bp); - simple_unlock(&bp->b_interlock); - } + /* Otherwise just wake up waiters in biowait(). */ + cv_broadcast(&bp->b_done); + mutex_exit(bp->b_objlock); } +} - splx(s); +static void +biointr(void *cookie) +{ + struct cpu_info *ci; + buf_t *bp; + int s; + + ci = curcpu(); + + while (!TAILQ_EMPTY(&ci->ci_data.cpu_biodone)) { + KASSERT(curcpu() == ci); + + s = splvm(); + bp = TAILQ_FIRST(&ci->ci_data.cpu_biodone); + TAILQ_REMOVE(&ci->ci_data.cpu_biodone, bp, b_actq); + splx(s); + + biodone2(bp); + } } /* @@ -1421,13 +1554,13 @@ biodone(struct buf *bp) int count_lock_queue(void) { - struct buf *bp; + buf_t *bp; int n = 0; - simple_lock(&bqueue_slock); + mutex_enter(&bufcache_lock); TAILQ_FOREACH(bp, &bufqueues[BQ_LOCKED].bq_queue, b_freelist) n++; - simple_unlock(&bqueue_slock); + mutex_exit(&bufcache_lock); return (n); } @@ -1438,18 +1571,17 @@ count_lock_queue(void) int buf_syncwait(void) { - struct buf *bp; - int iter, nbusy, nbusy_prev = 0, dcount, s, ihash; + buf_t *bp; + int iter, nbusy, nbusy_prev = 0, dcount, ihash; dcount = 10000; for (iter = 0; iter < 20;) { - s = splbio(); - simple_lock(&bqueue_slock); + mutex_enter(&bufcache_lock); nbusy = 0; for (ihash = 0; ihash < bufhash+1; ihash++) { LIST_FOREACH(bp, &bufhashtbl[ihash], b_hash) { - if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY) - nbusy++; + if ((bp->b_cflags & (BC_BUSY|BC_INVAL)) == BC_BUSY) + nbusy += ((bp->b_flags & B_READ) == 0); /* * With soft updates, some buffers that are * written will be remarked as dirty until other @@ -1457,26 +1589,21 @@ buf_syncwait(void) */ if (bp->b_vp && bp->b_vp->v_mount && (bp->b_vp->v_mount->mnt_flag & MNT_SOFTDEP) - && (bp->b_flags & B_DELWRI)) { - simple_lock(&bp->b_interlock); + && (bp->b_oflags & BO_DELWRI)) { bremfree(bp); - bp->b_flags |= B_BUSY; + bp->b_cflags |= BC_BUSY; nbusy++; - simple_unlock(&bp->b_interlock); - simple_unlock(&bqueue_slock); + mutex_exit(&bufcache_lock); bawrite(bp); if (dcount-- <= 0) { printf("softdep "); - splx(s); goto fail; } - simple_lock(&bqueue_slock); + mutex_enter(&bufcache_lock); } } } - - simple_unlock(&bqueue_slock); - splx(s); + mutex_exit(&bufcache_lock); if (nbusy == 0) break; @@ -1495,14 +1622,13 @@ buf_syncwait(void) fail:; #if defined(DEBUG) || defined(DEBUG_HALT_BUSY) printf("giving up\nPrinting vnodes for busy buffers\n"); - s = splbio(); for (ihash = 0; ihash < bufhash+1; ihash++) { LIST_FOREACH(bp, &bufhashtbl[ihash], b_hash) { - if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY) + if ((bp->b_cflags & (BC_BUSY|BC_INVAL)) == BC_BUSY && + (bp->b_flags & B_READ) == 0) vprint(NULL, bp->b_vp); } } - splx(s); #endif } @@ -1510,10 +1636,10 @@ fail:; } static void -sysctl_fillbuf(struct buf *i, struct buf_sysctl *o) +sysctl_fillbuf(buf_t *i, struct buf_sysctl *o) { - o->b_flags = i->b_flags; + o->b_flags = i->b_flags | i->b_cflags | i->b_oflags; o->b_error = i->b_error; o->b_prio = i->b_prio; o->b_dev = i->b_dev; @@ -1534,12 +1660,13 @@ sysctl_fillbuf(struct buf *i, struct buf_sysctl *o) static int sysctl_dobuf(SYSCTLFN_ARGS) { - struct buf *bp; + buf_t *bp; struct buf_sysctl bs; + struct bqueue *bq; char *dp; u_int i, op, arg; size_t len, needed, elem_size, out_size; - int error, s, elem_count; + int error, elem_count, retries; if (namelen == 1 && name[0] == CTL_QUERY) return (sysctl_query(SYSCTLFN_CALL(rnode))); @@ -1547,6 +1674,8 @@ sysctl_dobuf(SYSCTLFN_ARGS) if (namelen != 4) return (EINVAL); + retries = 100; + retry: dp = oldp; len = (oldp != NULL) ? *oldlenp : 0; op = name[0]; @@ -1569,15 +1698,32 @@ sysctl_dobuf(SYSCTLFN_ARGS) error = 0; needed = 0; - s = splbio(); - simple_lock(&bqueue_slock); + mutex_enter(&bufcache_lock); for (i = 0; i < BQUEUES; i++) { - TAILQ_FOREACH(bp, &bufqueues[i].bq_queue, b_freelist) { + bq = &bufqueues[i]; + TAILQ_FOREACH(bp, &bq->bq_queue, b_freelist) { + bq->bq_marker = bp; if (len >= elem_size && elem_count > 0) { sysctl_fillbuf(bp, &bs); + mutex_exit(&bufcache_lock); error = copyout(&bs, dp, out_size); + mutex_enter(&bufcache_lock); if (error) - goto cleanup; + break; + if (bq->bq_marker != bp) { + /* + * This sysctl node is only for + * statistics. Retry; if the + * queue keeps changing, then + * bail out. + */ + if (retries-- == 0) { + error = EAGAIN; + break; + } + mutex_exit(&bufcache_lock); + goto retry; + } dp += elem_size; len -= elem_size; } @@ -1587,34 +1733,22 @@ sysctl_dobuf(SYSCTLFN_ARGS) elem_count--; } } + if (error != 0) + break; } -cleanup: - simple_unlock(&bqueue_slock); - splx(s); + mutex_exit(&bufcache_lock); *oldlenp = needed; if (oldp == NULL) - *oldlenp += KERN_BUFSLOP * sizeof(struct buf); + *oldlenp += KERN_BUFSLOP * sizeof(buf_t); return (error); } -static void -sysctl_bufvm_common(void) -{ - int64_t t; - - /* Drain until below new high water mark */ - while ((t = (int64_t)bufmem - (int64_t)bufmem_hiwater) >= 0) { - if (buf_drain(t / (2 * 1024)) <= 0) - break; - } -} - static int -sysctl_bufcache_update(SYSCTLFN_ARGS) +sysctl_bufvm_update(SYSCTLFN_ARGS) { - int t, error; + int t, error, rv; struct sysctlnode node; node = *rnode; @@ -1624,32 +1758,14 @@ sysctl_bufcache_update(SYSCTLFN_ARGS) if (error || newp == NULL) return (error); - if (t < 0 || t > 100) - return EINVAL; - bufcache = t; - buf_setwm(); - - sysctl_bufvm_common(); - return 0; -} - -static int -sysctl_bufvm_update(SYSCTLFN_ARGS) -{ - int64_t t; - int error; - struct sysctlnode node; - - node = *rnode; - node.sysctl_data = &t; - t = *(int64_t *)rnode->sysctl_data; - error = sysctl_lookup(SYSCTLFN_CALL(&node)); - if (error || newp == NULL) - return (error); - if (t < 0) return EINVAL; - if (rnode->sysctl_data == &bufmem_lowater) { + if (rnode->sysctl_data == &bufcache) { + if (t > 100) + return (EINVAL); + bufcache = t; + buf_setwm(); + } else if (rnode->sysctl_data == &bufmem_lowater) { if (bufmem_hiwater - t < 16) return (EINVAL); bufmem_lowater = t; @@ -1660,7 +1776,14 @@ sysctl_bufvm_update(SYSCTLFN_ARGS) } else return (EINVAL); - sysctl_bufvm_common(); + /* Drain until below new high water mark */ + mutex_enter(&bufcache_lock); + while ((t = bufmem - bufmem_hiwater) >= 0) { + rv = buf_drain(t / (2 * 1024)); + if (rv <= 0) + break; + } + mutex_exit(&bufcache_lock); return 0; } @@ -1695,25 +1818,25 @@ SYSCTL_SETUP(sysctl_vm_buf_setup, "sysctl vm.buf* subtree setup") CTLTYPE_INT, "bufcache", SYSCTL_DESCR("Percentage of physical memory to use for " "buffer cache"), - sysctl_bufcache_update, 0, &bufcache, 0, + sysctl_bufvm_update, 0, &bufcache, 0, CTL_VM, CTL_CREATE, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READONLY, - CTLTYPE_QUAD, "bufmem", + CTLTYPE_INT, "bufmem", SYSCTL_DESCR("Amount of kernel memory used by buffer " "cache"), NULL, 0, &bufmem, 0, CTL_VM, CTL_CREATE, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, - CTLTYPE_QUAD, "bufmem_lowater", + CTLTYPE_INT, "bufmem_lowater", SYSCTL_DESCR("Minimum amount of kernel memory to " "reserve for buffer cache"), sysctl_bufvm_update, 0, &bufmem_lowater, 0, CTL_VM, CTL_CREATE, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, - CTLTYPE_QUAD, "bufmem_hiwater", + CTLTYPE_INT, "bufmem_hiwater", SYSCTL_DESCR("Maximum amount of kernel memory to use " "for buffer cache"), sysctl_bufvm_update, 0, &bufmem_hiwater, 0, @@ -1729,8 +1852,8 @@ SYSCTL_SETUP(sysctl_vm_buf_setup, "sysctl vm.buf* subtree setup") void vfs_bufstats(void) { - int s, i, j, count; - struct buf *bp; + int i, j, count; + buf_t *bp; struct bqueue *dp; int counts[(MAXBSIZE / PAGE_SIZE) + 1]; static const char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE" }; @@ -1739,12 +1862,10 @@ vfs_bufstats(void) count = 0; for (j = 0; j <= MAXBSIZE/PAGE_SIZE; j++) counts[j] = 0; - s = splbio(); TAILQ_FOREACH(bp, &dp->bq_queue, b_freelist) { counts[bp->b_bufsize/PAGE_SIZE]++; count++; } - splx(s); printf("%s: total-%d", bname[i], count); for (j = 0; j <= MAXBSIZE/PAGE_SIZE; j++) if (counts[j] != 0) @@ -1756,43 +1877,31 @@ vfs_bufstats(void) /* ------------------------------ */ -static struct buf * -getiobuf1(int prflags) +buf_t * +getiobuf(struct vnode *vp, bool waitok) { - struct buf *bp; - int s; + buf_t *bp; - s = splbio(); - bp = pool_get(&bufiopool, prflags); - splx(s); - if (bp != NULL) { - BUF_INIT(bp); - } + bp = pool_cache_get(bufio_cache, (waitok ? PR_WAITOK : PR_NOWAIT)); + if (bp == NULL) + return bp; + + buf_init(bp); + + if ((bp->b_vp = vp) == NULL) + bp->b_objlock = &buffer_lock; + else + bp->b_objlock = &vp->v_interlock; + return bp; } -struct buf * -getiobuf(void) -{ - - return getiobuf1(PR_WAITOK); -} - -struct buf * -getiobuf_nowait(void) -{ - - return getiobuf1(PR_NOWAIT); -} - void -putiobuf(struct buf *bp) +putiobuf(buf_t *bp) { - int s; - s = splbio(); - pool_put(&bufiopool, bp); - splx(s); + buf_destroy(bp); + pool_cache_put(bufio_cache, bp); } /* @@ -1800,9 +1909,9 @@ putiobuf(struct buf *bp) */ void -nestiobuf_iodone(struct buf *bp) +nestiobuf_iodone(buf_t *bp) { - struct buf *mbp = bp->b_private; + buf_t *mbp = bp->b_private; int error; int donebytes; @@ -1810,9 +1919,8 @@ nestiobuf_iodone(struct buf *bp) KASSERT(mbp != bp); error = 0; - if (bp->b_error != 0) { - error = bp->b_error; - } else if ((bp->b_bcount < bp->b_bufsize) || (bp->b_resid > 0)) { + if (bp->b_error == 0 && + (bp->b_bcount < bp->b_bufsize || bp->b_resid > 0)) { /* * Not all got transfered, raise an error. We have no way to * propagate these conditions to mbp. @@ -1836,14 +1944,16 @@ nestiobuf_iodone(struct buf *bp) */ void -nestiobuf_setup(struct buf *mbp, struct buf *bp, int offset, size_t size) +nestiobuf_setup(buf_t *mbp, buf_t *bp, int offset, size_t size) { const int b_read = mbp->b_flags & B_READ; struct vnode *vp = mbp->b_vp; KASSERT(mbp->b_bcount >= offset + size); bp->b_vp = vp; - bp->b_flags = B_BUSY | B_CALL | B_ASYNC | b_read; + bp->b_objlock = mbp->b_objlock; + bp->b_cflags = BC_BUSY; + bp->b_flags = B_ASYNC | b_read; bp->b_iodone = nestiobuf_iodone; bp->b_data = (char *)mbp->b_data + offset; bp->b_resid = bp->b_bcount = size; @@ -1851,11 +1961,9 @@ nestiobuf_setup(struct buf *mbp, struct buf *bp, int offset, size_t size) bp->b_private = mbp; BIO_COPYPRIO(bp, mbp); if (!b_read && vp != NULL) { - int s; - - s = splbio(); - V_INCR_NUMOUTPUT(vp); - splx(s); + mutex_enter(&vp->v_interlock); + vp->v_numoutput++; + mutex_exit(&vp->v_interlock); } } @@ -1867,24 +1975,73 @@ nestiobuf_setup(struct buf *mbp, struct buf *bp, int offset, size_t size) */ void -nestiobuf_done(struct buf *mbp, int donebytes, int error) +nestiobuf_done(buf_t *mbp, int donebytes, int error) { - int s; if (donebytes == 0) { return; } - s = splbio(); + mutex_enter(mbp->b_objlock); KASSERT(mbp->b_resid >= donebytes); - if (error) { - mbp->b_error = error; - } mbp->b_resid -= donebytes; + mbp->b_error = error; if (mbp->b_resid == 0) { - if (mbp->b_error != 0) { - mbp->b_resid = mbp->b_bcount; /* be conservative */ - } + mutex_exit(mbp->b_objlock); biodone(mbp); - } - splx(s); + } else + mutex_exit(mbp->b_objlock); +} + +void +buf_init(buf_t *bp) +{ + + LIST_INIT(&bp->b_dep); + cv_init(&bp->b_busy, "biolock"); + cv_init(&bp->b_done, "biowait"); + bp->b_dev = NODEV; + bp->b_error = 0; + bp->b_flags = 0; + bp->b_cflags = 0; + bp->b_oflags = 0; + bp->b_objlock = &buffer_lock; + bp->b_iodone = NULL; + BIO_SETPRIO(bp, BPRIO_DEFAULT); +} + +void +buf_destroy(buf_t *bp) +{ + + cv_destroy(&bp->b_done); + cv_destroy(&bp->b_busy); +} + +int +bbusy(buf_t *bp, bool intr, int timo) +{ + int error; + + KASSERT(mutex_owned(&bufcache_lock)); + + if ((bp->b_cflags & BC_BUSY) != 0) { + if (curlwp == uvm.pagedaemon_lwp) + return EDEADLK; + bp->b_cflags |= BC_WANTED; + bref(bp); + if (intr) { + error = cv_timedwait_sig(&bp->b_busy, &bufcache_lock, + timo); + } else { + error = cv_timedwait(&bp->b_busy, &bufcache_lock, + timo); + } + brele(bp); + if (error != 0) + return error; + return EPASSTHROUGH; + } + bp->b_cflags |= BC_BUSY; + + return 0; } diff --git a/sys/kern/vfs_lockf.c b/sys/kern/vfs_lockf.c index ddc32d343ebd..140222688931 100644 --- a/sys/kern/vfs_lockf.c +++ b/sys/kern/vfs_lockf.c @@ -1,4 +1,4 @@ -/* $NetBSD: vfs_lockf.c,v 1.60 2007/07/09 21:10:57 ad Exp $ */ +/* $NetBSD: vfs_lockf.c,v 1.61 2008/01/02 11:48:56 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1993 @@ -35,7 +35,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: vfs_lockf.c,v 1.60 2007/07/09 21:10:57 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_lockf.c,v 1.61 2008/01/02 11:48:56 ad Exp $"); #include #include @@ -74,6 +74,7 @@ struct lockf { struct locklist lf_blkhd; /* List of requests blocked on this lock */ TAILQ_ENTRY(lockf) lf_block;/* A request waiting for a lock */ uid_t lf_uid; /* User ID responsible */ + kcondvar_t lf_cv; /* Signalling */ }; /* Maximum length of sleep chains to traverse to try and detect deadlock. */ @@ -202,6 +203,7 @@ lf_alloc(uid_t uid, int allowfail) mutex_exit(&uip->ui_lock); lock = pool_get(&lockfpool, PR_WAITOK); lock->lf_uid = uid; + cv_init(&lock->lf_cv, "lockf"); return lock; } @@ -214,6 +216,7 @@ lf_free(struct lockf *lock) mutex_enter(&uip->ui_lock); uip->ui_lockcnt--; mutex_exit(&uip->ui_lock); + cv_destroy(&lock->lf_cv); pool_put(&lockfpool, lock); } @@ -389,7 +392,7 @@ lf_wakelock(struct lockf *listhead) if (lockf_debug & 2) lf_print("lf_wakelock: awakening", wakelock); #endif - wakeup(wakelock); + cv_broadcast(&wakelock->lf_cv); } } @@ -417,7 +420,7 @@ lf_clearlock(struct lockf *unlock, struct lockf **sparelock) #endif /* LOCKF_DEBUG */ prev = head; while ((ovcase = lf_findoverlap(lf, unlock, SELF, - &prev, &overlap)) != 0) { + &prev, &overlap)) != 0) { /* * Wakeup the list of locks to be retried. */ @@ -494,13 +497,13 @@ lf_getblock(struct lockf *lock) */ static int lf_setlock(struct lockf *lock, struct lockf **sparelock, - struct simplelock *interlock) + kmutex_t *interlock) { struct lockf *block; struct lockf **head = lock->lf_head; struct lockf **prev, *overlap, *ltmp; static char lockstr[] = "lockf"; - int ovcase, priority, needtolink, error; + int ovcase, needtolink, error; #ifdef LOCKF_DEBUG if (lockf_debug & 1) @@ -508,12 +511,12 @@ lf_setlock(struct lockf *lock, struct lockf **sparelock, #endif /* LOCKF_DEBUG */ /* - * Set the priority + * XXX Here we used to set the sleep priority so that writers + * took priority. That's of dubious use, and is not possible + * with condition variables. Need to find a better way to ensure + * fairness. */ - priority = PLOCK; - if (lock->lf_type == F_WRLCK) - priority += 4; - priority |= PCATCH; + /* * Scan lock list for this file looking for locks that would block us. */ @@ -610,7 +613,7 @@ lf_setlock(struct lockf *lock, struct lockf **sparelock, lf_printlist("lf_setlock", block); } #endif /* LOCKF_DEBUG */ - error = ltsleep(lock, priority, lockstr, 0, interlock); + error = cv_wait_sig(&lock->lf_cv, interlock); /* * We may have been awakened by a signal (in @@ -800,7 +803,7 @@ lf_advlock(struct vop_advlock_args *ap, struct lockf **head, off_t size) struct flock *fl = ap->a_fl; struct lockf *lock = NULL; struct lockf *sparelock; - struct simplelock *interlock = &ap->a_vp->v_interlock; + kmutex_t *interlock = &ap->a_vp->v_interlock; off_t start, end; int error = 0; @@ -828,7 +831,7 @@ lf_advlock(struct vop_advlock_args *ap, struct lockf **head, off_t size) return EINVAL; /* - * Allocate locks before acquiring the simple lock. We need two + * Allocate locks before acquiring the interlock. We need two * locks in the worst case. */ switch (ap->a_op) { @@ -865,7 +868,7 @@ lf_advlock(struct vop_advlock_args *ap, struct lockf **head, off_t size) goto quit; } - simple_lock(interlock); + mutex_enter(interlock); /* * Avoid the common case of unlocking when inode has no locks. @@ -927,7 +930,7 @@ lf_advlock(struct vop_advlock_args *ap, struct lockf **head, off_t size) } quit_unlock: - simple_unlock(interlock); + mutex_exit(interlock); quit: if (lock) lf_free(lock); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 4bc650dd7970..e4d57567820e 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -1,4 +1,4 @@ -/* $NetBSD: vfs_subr.c,v 1.308 2007/12/01 10:36:47 yamt Exp $ */ +/* $NetBSD: vfs_subr.c,v 1.309 2008/01/02 11:48:56 ad Exp $ */ /*- * Copyright (c) 1997, 1998, 2004, 2005, 2007 The NetBSD Foundation, Inc. @@ -82,7 +82,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.308 2007/12/01 10:36:47 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.309 2008/01/02 11:48:56 ad Exp $"); #include "opt_inet.h" #include "opt_ddb.h" @@ -107,6 +107,7 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.308 2007/12/01 10:36:47 yamt Exp $"); #include #include #include +#include #include #include @@ -120,14 +121,16 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.308 2007/12/01 10:36:47 yamt Exp $"); extern int dovfsusermount; /* 1 => permit any user to mount filesystems */ extern int vfs_magiclinks; /* 1 => expand "magic" symlinks */ -/* TAILQ_HEAD(freelst, vnode) vnode_free_list = vnode free list (in vnode.h) */ -struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list); -struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list); +static vnodelst_t vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list); +static vnodelst_t vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list); +static vnodelst_t vrele_list = TAILQ_HEAD_INITIALIZER(vrele_list); -struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER; +static int vrele_pending; +static kmutex_t vrele_lock; +static kcondvar_t vrele_cv; +static lwp_t *vrele_lwp; -POOL_INIT(vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl", - &pool_allocator_nointr, IPL_NONE); +static pool_cache_t vnode_cache; MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes"); @@ -135,27 +138,53 @@ MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes"); * Local declarations. */ -static void insmntque(struct vnode *, struct mount *); -static int getdevvp(dev_t, struct vnode **, enum vtype); -static void vclean(struct vnode *, int, struct lwp *); -static struct vnode *getcleanvnode(struct lwp *); +static void vrele_thread(void *); +static void insmntque(vnode_t *, struct mount *); +static int getdevvp(dev_t, vnode_t **, enum vtype); +static vnode_t *getcleanvnode(void);; +void vpanic(vnode_t *, const char *); + +#ifdef DIAGNOSTIC +void +vpanic(vnode_t *vp, const char *msg) +{ + + vprint(NULL, vp); + panic("%s\n", msg); +} +#else +#define vpanic(vp, msg) /* nothing */ +#endif + +void +vn_init1(void) +{ + + vnode_cache = pool_cache_init(sizeof(struct vnode), 0, 0, 0, "vnodepl", + NULL, IPL_NONE, NULL, NULL, NULL); + KASSERT(vnode_cache != NULL); + + /* Create deferred release thread. */ + mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE); + cv_init(&vrele_cv, "vrele"); + if (kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread, + NULL, &vrele_lwp, "vrele")) + panic("fork vrele"); +} int vfs_drainvnodes(long target, struct lwp *l) { - simple_lock(&vnode_free_list_slock); while (numvnodes > target) { - struct vnode *vp; + vnode_t *vp; - vp = getcleanvnode(l); + mutex_enter(&vnode_free_list_lock); + vp = getcleanvnode(); if (vp == NULL) return EBUSY; /* give up */ - pool_put(&vnode_pool, vp); - simple_lock(&vnode_free_list_slock); - numvnodes--; + ungetnewvnode(vp); } - simple_unlock(&vnode_free_list_slock); return 0; } @@ -163,58 +192,95 @@ vfs_drainvnodes(long target, struct lwp *l) /* * grab a vnode from freelist and clean it. */ -struct vnode * -getcleanvnode(struct lwp *l) +vnode_t * +getcleanvnode(void) { - struct vnode *vp; - struct freelst *listhd; + vnode_t *vp; + vnodelst_t *listhd; - LOCK_ASSERT(simple_lock_held(&vnode_free_list_slock)); + KASSERT(mutex_owned(&vnode_free_list_lock)); +retry: listhd = &vnode_free_list; try_nextlist: TAILQ_FOREACH(vp, listhd, v_freelist) { - if (!simple_lock_try(&vp->v_interlock)) + /* + * It's safe to test v_usecount and v_iflag + * without holding the interlock here, since + * these vnodes should never appear on the + * lists. + */ + if (vp->v_usecount != 0) { + vpanic(vp, "free vnode isn't"); + } + if ((vp->v_iflag & VI_CLEAN) != 0) { + vpanic(vp, "clean vnode on freelist"); + } + if (vp->v_freelisthd != listhd) { + printf("vnode sez %p, listhd %p\n", vp->v_freelisthd, listhd); + vpanic(vp, "list head mismatch"); + } + if (!mutex_tryenter(&vp->v_interlock)) continue; /* - * as our lwp might hold the underlying vnode locked, - * don't try to reclaim the VLAYER vnode if it's locked. + * Our lwp might hold the underlying vnode + * locked, so don't try to reclaim a VI_LAYER + * node if it's locked. */ if ((vp->v_iflag & VI_XLOCK) == 0 && ((vp->v_iflag & VI_LAYER) == 0 || VOP_ISLOCKED(vp) == 0)) { break; } - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); } - if (vp == NULLVP) { + if (vp == NULL) { if (listhd == &vnode_free_list) { listhd = &vnode_hold_list; goto try_nextlist; } - simple_unlock(&vnode_free_list_slock); - return NULLVP; + mutex_exit(&vnode_free_list_lock); + return NULL; } - if (vp->v_usecount) - panic("free vnode isn't, vp %p", vp); + /* Remove it from the freelist. */ TAILQ_REMOVE(listhd, vp, v_freelist); - /* see comment on why 0xdeadb is set at end of vgone (below) */ - vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; - simple_unlock(&vnode_free_list_slock); + vp->v_freelisthd = NULL; + mutex_exit(&vnode_free_list_lock); - if (vp->v_type != VBAD) - vgonel(vp, l); - else - simple_unlock(&vp->v_interlock); -#ifdef DIAGNOSTIC - if (vp->v_data || vp->v_uobj.uo_npages || - TAILQ_FIRST(&vp->v_uobj.memq)) - panic("cleaned vnode isn't, vp %p", vp); - if (vp->v_numoutput) - panic("clean vnode has pending I/O's, vp %p", vp); -#endif - KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); + /* + * The vnode is still associated with a file system, so we must + * clean it out before reusing it. We need to add a reference + * before doing this. If the vnode gains another reference while + * being cleaned out then we lose - retry. + */ + vp->v_usecount++; + vclean(vp, DOCLOSE); + if (vp->v_usecount == 1) { + /* We're about to dirty it. */ + vp->v_iflag &= ~VI_CLEAN; + mutex_exit(&vp->v_interlock); + } else { + /* + * Don't return to freelist - the holder of the last + * reference will destroy it. + */ + vp->v_usecount--; + mutex_exit(&vp->v_interlock); + mutex_enter(&vnode_free_list_lock); + goto retry; + } + + if (vp->v_data != NULL || vp->v_uobj.uo_npages != 0 || + !TAILQ_EMPTY(&vp->v_uobj.memq)) { + vpanic(vp, "cleaned vnode isn't"); + } + if (vp->v_numoutput != 0) { + vpanic(vp, "clean vnode has pending I/O's"); + } + if ((vp->v_iflag & VI_ONWORKLST) != 0) { + vpanic(vp, "clean vnode on syncer list"); + } return vp; } @@ -244,11 +310,11 @@ vfs_busy(struct mount *mp, int flags, kmutex_t *interlkp) * wakeup needs to be done is at the release of the * exclusive lock at the end of dounmount. */ - simple_lock(&mp->mnt_slock); + mutex_enter(&mp->mnt_mutex); mp->mnt_wcnt++; - ltsleep((void *)mp, PVFS, "vfs_busy", 0, &mp->mnt_slock); + mtsleep((void *)mp, PVFS, "vfs_busy", 0, &mp->mnt_mutex); n = --mp->mnt_wcnt; - simple_unlock(&mp->mnt_slock); + mutex_exit(&mp->mnt_mutex); gone = mp->mnt_iflag & IMNT_GONE; if (n == 0) @@ -259,11 +325,9 @@ vfs_busy(struct mount *mp, int flags, kmutex_t *interlkp) return (ENOENT); } lkflags = LK_SHARED; - if (interlkp) { - /* lkflags |= LK_INTERLOCK; XXX */ - mutex_exit(interlkp); /* XXX */ - } - if (lockmgr(&mp->mnt_lock, lkflags, NULL)) + if (interlkp) + lkflags |= LK_INTERLOCK; + if (lockmgr(&mp->mnt_lock, lkflags, interlkp)) panic("vfs_busy: unexpected lock failure"); return (0); } @@ -291,27 +355,29 @@ vfs_rootmountalloc(const char *fstypename, const char *devname, struct vfsops *vfsp = NULL; struct mount *mp; + mutex_enter(&vfs_list_lock); LIST_FOREACH(vfsp, &vfs_list, vfs_list) if (!strncmp(vfsp->vfs_name, fstypename, sizeof(mp->mnt_stat.f_fstypename))) break; - if (vfsp == NULL) return (ENODEV); + vfsp->vfs_refcount++; + mutex_exit(&vfs_list_lock); + mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); memset((char *)mp, 0, (u_long)sizeof(struct mount)); lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); - simple_lock_init(&mp->mnt_slock); + mutex_init(&mp->mnt_mutex, MUTEX_DEFAULT, IPL_NONE); (void)vfs_busy(mp, LK_NOWAIT, 0); TAILQ_INIT(&mp->mnt_vnodelist); mp->mnt_op = vfsp; mp->mnt_flag = MNT_RDONLY; - mp->mnt_vnodecovered = NULLVP; - vfsp->vfs_refcount++; + mp->mnt_vnodecovered = NULL; (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, sizeof(mp->mnt_stat.f_fstypename)); mp->mnt_stat.f_mntonname[0] = '/'; - mp->mnt_stat.f_mntonname[1] = '\0'; + mp->mnt_stat.f_mntonname[0] = '\0'; mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] = '\0'; (void)copystr(devname, mp->mnt_stat.f_mntfromname, @@ -321,7 +387,6 @@ vfs_rootmountalloc(const char *fstypename, const char *devname, return (0); } - /* * Routines having to do with the management of the vnode table. */ @@ -332,12 +397,11 @@ extern int (**dead_vnodeop_p)(void *); */ int getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *), - struct vnode **vpp) + vnode_t **vpp) { struct uvm_object *uobj; - struct lwp *l = curlwp; /* XXX */ static int toggle; - struct vnode *vp; + vnode_t *vp; int error = 0, tryalloc; try_again: @@ -374,7 +438,7 @@ getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *), vp = NULL; - simple_lock(&vnode_free_list_slock); + mutex_enter(&vnode_free_list_lock); toggle ^= 1; if (numvnodes > 2 * desiredvnodes) @@ -384,25 +448,19 @@ getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *), (TAILQ_FIRST(&vnode_free_list) == NULL && (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle)); - if (tryalloc && - (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) { + if (tryalloc) { numvnodes++; - simple_unlock(&vnode_free_list_slock); - memset(vp, 0, sizeof(*vp)); - UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 1); - /* - * done by memset() above. - * LIST_INIT(&vp->v_nclist); - * LIST_INIT(&vp->v_dnclist); - */ - } else { - vp = getcleanvnode(l); - /* - * Unless this is a bad time of the month, at most - * the first NCPUS items on the free list are - * locked, so this is close enough to being empty. - */ - if (vp == NULLVP) { + mutex_exit(&vnode_free_list_lock); + if ((vp = valloc(NULL)) == NULL) { + mutex_enter(&vnode_free_list_lock); + numvnodes--; + } else + vp->v_usecount = 1; + } + + if (vp == NULL) { + vp = getcleanvnode(); + if (vp == NULL) { if (mp && error != EDEADLK) vfs_unbusy(mp); if (tryalloc) { @@ -415,23 +473,24 @@ getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *), *vpp = 0; return (ENFILE); } - vp->v_usecount = 1; vp->v_iflag = 0; vp->v_vflag = 0; vp->v_uflag = 0; vp->v_socket = NULL; } - vp->v_type = VNON; - vp->v_vnlock = &vp->v_lock; - lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); + + KASSERT(vp->v_usecount == 1); + KASSERT(vp->v_freelisthd == NULL); KASSERT(LIST_EMPTY(&vp->v_nclist)); KASSERT(LIST_EMPTY(&vp->v_dnclist)); + + vp->v_type = VNON; + vp->v_vnlock = &vp->v_lock; vp->v_tag = tag; vp->v_op = vops; insmntque(vp, mp); *vpp = vp; vp->v_data = 0; - simple_lock_init(&vp->v_interlock); /* * initialize uvm_object within vnode. @@ -443,8 +502,13 @@ getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *), KASSERT(TAILQ_FIRST(&uobj->memq) == NULL); vp->v_size = vp->v_writesize = VSIZENOTSET; - if (mp && error != EDEADLK) - vfs_unbusy(mp); + if (mp != NULL) { + if ((mp->mnt_iflag & IMNT_MPSAFE) != 0) + vp->v_vflag |= VV_MPSAFE; + if (error != EDEADLK) + vfs_unbusy(mp); + } + return (0); } @@ -454,34 +518,103 @@ getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *), * of a locking race. */ void -ungetnewvnode(struct vnode *vp) +ungetnewvnode(vnode_t *vp) { -#ifdef DIAGNOSTIC - if (vp->v_usecount != 1) - panic("ungetnewvnode: busy vnode"); -#endif - vp->v_usecount--; - insmntque(vp, NULL); - vp->v_type = VBAD; - simple_lock(&vp->v_interlock); + KASSERT(vp->v_usecount == 1); + KASSERT(vp->v_data == NULL); + KASSERT(vp->v_freelisthd == NULL); + + mutex_enter(&vp->v_interlock); + vp->v_iflag |= VI_CLEAN; + vrelel(vp, 0, 0); +} + +/* + * Allocate a new, uninitialized vnode. If 'mp' is non-NULL, this is a + * marker vnode and we are prepared to wait for the allocation. + */ +vnode_t * +valloc(struct mount *mp) +{ + vnode_t *vp; + + vp = pool_cache_get(vnode_cache, (mp != NULL ? PR_WAITOK : PR_NOWAIT)); + if (vp == NULL) { + return NULL; + } + + memset(vp, 0, sizeof(*vp)); + UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 0); + cv_init(&vp->v_cv, "vnode"); /* - * Insert at head of LRU list + * done by memset() above. + * LIST_INIT(&vp->v_nclist); + * LIST_INIT(&vp->v_dnclist); */ - simple_lock(&vnode_free_list_slock); - if (vp->v_holdcnt > 0) - TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist); - else - TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); - simple_unlock(&vnode_free_list_slock); - simple_unlock(&vp->v_interlock); + + if (mp != NULL) { + vp->v_mount = mp; + vp->v_type = VBAD; + vp->v_iflag = VI_MARKER; + } else { + lockinit(&vp->v_lock, PVFS, "vnlock", 0, 0); + } + + return vp; +} + +/* + * Free an unused, unreferenced vnode. + */ +void +vfree(vnode_t *vp) +{ + + KASSERT(vp->v_usecount == 0); + + if ((vp->v_iflag & VI_MARKER) == 0) { + lockdestroy(&vp->v_lock); + mutex_enter(&vnode_free_list_lock); + numvnodes--; + mutex_exit(&vnode_free_list_lock); + } + + UVM_OBJ_DESTROY(&vp->v_uobj); + cv_destroy(&vp->v_cv); + pool_cache_put(vnode_cache, vp); +} + +/* + * Remove a vnode from its freelist. + */ +static inline void +vremfree(vnode_t *vp) +{ + + KASSERT(mutex_owned(&vp->v_interlock)); + KASSERT(vp->v_usecount == 0); + + /* + * Note that the reference count must not change until + * the vnode is removed. + */ + mutex_enter(&vnode_free_list_lock); + if (vp->v_holdcnt > 0) { + KASSERT(vp->v_freelisthd == &vnode_hold_list); + } else { + KASSERT(vp->v_freelisthd == &vnode_free_list); + } + TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); + vp->v_freelisthd = NULL; + mutex_exit(&vnode_free_list_lock); } /* * Move a vnode from one mount queue to another. */ static void -insmntque(struct vnode *vp, struct mount *mp) +insmntque(vnode_t *vp, struct mount *mp) { #ifdef DIAGNOSTIC @@ -493,7 +626,7 @@ insmntque(struct vnode *vp, struct mount *mp) } #endif - simple_lock(&mntvnode_slock); + mutex_enter(&mntvnode_lock); /* * Delete from old mount point vnode list, if on one. */ @@ -504,7 +637,7 @@ insmntque(struct vnode *vp, struct mount *mp) */ if ((vp->v_mount = mp) != NULL) TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes); - simple_unlock(&mntvnode_slock); + mutex_exit(&mntvnode_lock); } /* @@ -513,7 +646,7 @@ insmntque(struct vnode *vp, struct mount *mp) * Also used for memory file system special devices. */ int -bdevvp(dev_t dev, struct vnode **vpp) +bdevvp(dev_t dev, vnode_t **vpp) { return (getdevvp(dev, vpp, VBLK)); @@ -524,7 +657,7 @@ bdevvp(dev_t dev, struct vnode **vpp) * Used for kernfs and some console handling. */ int -cdevvp(dev_t dev, struct vnode **vpp) +cdevvp(dev_t dev, vnode_t **vpp) { return (getdevvp(dev, vpp, VCHR)); @@ -536,10 +669,10 @@ cdevvp(dev_t dev, struct vnode **vpp) * and by cdevvp (character device) for console and kernfs. */ static int -getdevvp(dev_t dev, struct vnode **vpp, enum vtype type) +getdevvp(dev_t dev, vnode_t **vpp, enum vtype type) { - struct vnode *vp; - struct vnode *nvp; + vnode_t *vp; + vnode_t *nvp; int error; if (dev == NODEV) { @@ -553,6 +686,7 @@ getdevvp(dev_t dev, struct vnode **vpp, enum vtype type) } vp = nvp; vp->v_type = type; + vp->v_vflag |= VV_MPSAFE; uvm_vnp_setsize(vp, 0); if ((nvp = checkalias(vp, dev, NULL)) != 0) { vput(vp); @@ -570,29 +704,34 @@ getdevvp(dev_t dev, struct vnode **vpp, enum vtype type) * the existing contents and return the aliased vnode. The * caller is responsible for filling it with its new contents. */ -struct vnode * -checkalias(struct vnode *nvp, dev_t nvp_rdev, struct mount *mp) +vnode_t * +checkalias(vnode_t *nvp, dev_t nvp_rdev, struct mount *mp) { - struct lwp *l = curlwp; /* XXX */ - struct vnode *vp; - struct vnode **vpp; + vnode_t *vp; + vnode_t **vpp; if (nvp->v_type != VBLK && nvp->v_type != VCHR) - return (NULLVP); + return (NULL); vpp = &speclisth[SPECHASH(nvp_rdev)]; loop: - simple_lock(&spechash_slock); + mutex_enter(&spechash_lock); for (vp = *vpp; vp; vp = vp->v_specnext) { + if (vp->v_specinfo == NULL) { + vpanic(vp, "checkalias: no specinfo"); + } if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) continue; /* * Alias, but not in use, so flush it out. */ - simple_lock(&vp->v_interlock); - simple_unlock(&spechash_slock); + mutex_enter(&vp->v_interlock); + mutex_exit(&spechash_lock); if (vp->v_usecount == 0) { - vgonel(vp, l); + vremfree(vp); + vp->v_usecount++; + vclean(vp, DOCLOSE); + vrelel(vp, 1, 1); goto loop; } /* @@ -603,11 +742,12 @@ loop: */ if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) goto loop; + mutex_enter(&spechash_lock); if (vp->v_specinfo == NULL) { + mutex_exit(&spechash_lock); vput(vp); goto loop; } - simple_lock(&spechash_slock); break; } if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { @@ -615,8 +755,10 @@ loop: sizeof(struct specinfo), M_VNODE, M_NOWAIT); /* XXX Erg. */ if (nvp->v_specinfo == NULL) { - simple_unlock(&spechash_slock); + mutex_exit(&spechash_lock); uvm_wait("checkalias"); + if (vp != NULL) + vput(vp); goto loop; } @@ -624,24 +766,27 @@ loop: nvp->v_hashchain = vpp; nvp->v_specnext = *vpp; nvp->v_specmountpoint = NULL; - simple_unlock(&spechash_slock); + mutex_exit(&spechash_lock); nvp->v_speclockf = NULL; *vpp = nvp; - if (vp != NULLVP) { + if (vp != NULL) { + /* XXX locking */ nvp->v_iflag |= VI_ALIASED; vp->v_iflag |= VI_ALIASED; vput(vp); } - return (NULLVP); + return (NULL); } - simple_unlock(&spechash_slock); + mutex_exit(&spechash_lock); VOP_UNLOCK(vp, 0); - simple_lock(&vp->v_interlock); - vclean(vp, 0, l); + mutex_enter(&vp->v_interlock); + vclean(vp, 0); + mutex_exit(&vp->v_interlock); vp->v_op = nvp->v_op; vp->v_tag = nvp->v_tag; vp->v_vnlock = &vp->v_lock; + lockdestroy(vp->v_vnlock); lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); nvp->v_type = VNON; insmntque(vp, mp); @@ -657,50 +802,48 @@ loop: * longer usable (possibly having been changed to a new file system type). */ int -vget(struct vnode *vp, int flags) +vget(vnode_t *vp, int flags) { int error; + KASSERT((vp->v_iflag & VI_MARKER) == 0); + + if ((flags & LK_INTERLOCK) == 0) + mutex_enter(&vp->v_interlock); + + /* + * Before adding a reference, we must remove the vnode + * from its freelist. + */ + if (vp->v_usecount == 0) { + vremfree(vp); + } + if (++vp->v_usecount == 0) { + vpanic(vp, "vget: usecount overflow"); + } + /* * If the vnode is in the process of being cleaned out for * another use, we wait for the cleaning to finish and then * return failure. Cleaning is determined by checking that * the VI_XLOCK flag is set. */ - - if ((flags & LK_INTERLOCK) == 0) - simple_lock(&vp->v_interlock); - if ((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0) { + if ((vp->v_iflag & VI_XLOCK) != 0) { if (flags & LK_NOWAIT) { - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); return EBUSY; } - vp->v_iflag |= VI_XWANT; - ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock); + vwait(vp, VI_XLOCK); + vrelel(vp, 1, 0); return (ENOENT); } - if (vp->v_usecount == 0) { - simple_lock(&vnode_free_list_slock); - if (vp->v_holdcnt > 0) - TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); - else - TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); - simple_unlock(&vnode_free_list_slock); - } - vp->v_usecount++; -#ifdef DIAGNOSTIC - if (vp->v_usecount == 0) { - vprint("vget", vp); - panic("vget: usecount overflow, vp %p", vp); - } -#endif if (flags & LK_TYPE_MASK) { if ((error = vn_lock(vp, flags | LK_INTERLOCK))) { vrele(vp); } return (error); } - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); return (0); } @@ -708,109 +851,222 @@ vget(struct vnode *vp, int flags) * vput(), just unlock and vrele() */ void -vput(struct vnode *vp) +vput(vnode_t *vp) { -#ifdef DIAGNOSTIC - if (vp == NULL) - panic("vput: null vp"); -#endif - simple_lock(&vp->v_interlock); - vp->v_usecount--; - if (vp->v_usecount > 0) { - simple_unlock(&vp->v_interlock); - VOP_UNLOCK(vp, 0); - return; - } -#ifdef DIAGNOSTIC - if (vp->v_usecount < 0 || vp->v_writecount != 0) { - vprint("vput: bad ref count", vp); - panic("vput: ref cnt"); - } -#endif - /* - * Insert at tail of LRU list. - */ - simple_lock(&vnode_free_list_slock); - if (vp->v_holdcnt > 0) - TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); - else - TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); - simple_unlock(&vnode_free_list_slock); - if (vp->v_iflag & VI_EXECMAP) { - atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages); - atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages); - } - vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP|VI_MAPPED); - vp->v_vflag &= ~VV_MAPPED; - simple_unlock(&vp->v_interlock); - VOP_INACTIVE(vp); + KASSERT((vp->v_iflag & VI_MARKER) == 0); + + VOP_UNLOCK(vp, 0); + vrele(vp); } /* - * Vnode release. - * If count drops to zero, call inactive routine and return to freelist. + * Vnode release. If reference count drops to zero, call inactive + * routine and either return to freelist or free to the pool. */ -static void -do_vrele(struct vnode *vp, int doinactive, int onhead) +void +vrelel(vnode_t *vp, int doinactive, int onhead) { + bool recycle, defer; + int error; -#ifdef DIAGNOSTIC - if (vp == NULL) - panic("vrele: null vp"); -#endif - simple_lock(&vp->v_interlock); - vp->v_usecount--; - if (vp->v_usecount > 0) { - simple_unlock(&vp->v_interlock); + KASSERT(mutex_owned(&vp->v_interlock)); + KASSERT((vp->v_iflag & VI_MARKER) == 0); + + if (vp->v_op == dead_vnodeop_p && (vp->v_iflag & VI_CLEAN) == 0) { + vpanic(vp, "dead but not clean"); + } + + /* + * If not the last reference, just drop the reference count + * and unlock. + */ + if (vp->v_usecount > 1) { + vp->v_usecount--; + vp->v_iflag |= VI_INACTREDO; + mutex_exit(&vp->v_interlock); return; } -#ifdef DIAGNOSTIC - if (vp->v_usecount < 0 || vp->v_writecount != 0) { - vprint("vrele: bad ref count", vp); - panic("vrele: ref cnt vp %p", vp); + if (vp->v_usecount <= 0 || vp->v_writecount != 0) { + vpanic(vp, "vput: bad ref count"); } -#endif + /* - * Insert at tail of LRU list. + * If not clean, deactivate the vnode, but preserve + * our reference across the call to VOP_INACTIVE(). */ - simple_lock(&vnode_free_list_slock); - if (vp->v_holdcnt > 0) { - TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); - } else { - if (onhead) - TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); - else - TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); - } - simple_unlock(&vnode_free_list_slock); - if (vp->v_iflag & VI_EXECMAP) { - atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages); - atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages); - } - vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP|VI_MAPPED); - vp->v_vflag &= ~VV_MAPPED; + retry: + if ((vp->v_iflag & VI_CLEAN) == 0) { + recycle = false; + /* + * XXX This ugly block can be largely eliminated if + * locking is pushed down into the file systems. + */ + if (curlwp == uvm.pagedaemon_lwp) { + /* The pagedaemon can't wait around; defer. */ + defer = true; + } else if (curlwp == vrele_lwp) { + /* We have to try harder. */ + vp->v_iflag &= ~VI_INACTREDO; + error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | + LK_RETRY); + if (error != 0) { + /* XXX */ + vpanic(vp, "vrele: unable to lock %p"); + } + defer = false; + } else if ((vp->v_iflag & VI_LAYER) != 0) { + /* + * Acquiring the stack's lock in vclean() even + * for an honest vput/vrele is dangerous because + * our caller may hold other vnode locks; defer. + */ + defer = true; + } else { + /* If we can't acquire the lock, then defer. */ + vp->v_iflag &= ~VI_INACTREDO; + error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | + LK_NOWAIT); + if (error != 0) { + defer = true; + mutex_enter(&vp->v_interlock); + } else { + defer = false; + } + } - if (doinactive) { - if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0) - VOP_INACTIVE(vp); + if (defer) { + /* + * Defer reclaim to the kthread; it's not safe to + * clean it here. We donate it our last reference. + */ + KASSERT(mutex_owned(&vp->v_interlock)); + KASSERT((vp->v_iflag & VI_INACTPEND) == 0); + KASSERT(vp->v_usecount == 1); + vp->v_iflag |= VI_INACTPEND; + mutex_enter(&vrele_lock); + TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist); + if (++vrele_pending > (desiredvnodes >> 8)) + cv_signal(&vrele_cv); + mutex_exit(&vrele_lock); + mutex_exit(&vp->v_interlock); + return; + } + + /* + * The vnode may gain another reference while being + * deactivated. Note that VOP_INACTIVE() will drop + * the vnode lock. + */ + VOP_INACTIVE(vp, &recycle); + mutex_enter(&vp->v_interlock); + if (vp->v_usecount > 1) { + vp->v_usecount--; + mutex_exit(&vp->v_interlock); + return; + } + + /* + * If we grew another reference while VOP_INACTIVE() + * was underway, then retry. + */ + if ((vp->v_iflag & VI_INACTREDO) != 0) { + goto retry; + } + + /* Take care of space accounting. */ + if (vp->v_iflag & VI_EXECMAP) { + atomic_add_int(&uvmexp.execpages, + -vp->v_uobj.uo_npages); + atomic_add_int(&uvmexp.filepages, + vp->v_uobj.uo_npages); + } + vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP|VI_MAPPED); + vp->v_vflag &= ~VV_MAPPED; + + /* + * Recycle the vnode if the file is now unused (unlinked), + * otherwise just free it. + */ + if (recycle) { + vclean(vp, DOCLOSE); + } + KASSERT(vp->v_usecount > 0); + } + + if (--vp->v_usecount != 0) { + /* Gained another reference while being reclaimed. */ + mutex_exit(&vp->v_interlock); + return; + } + + if ((vp->v_iflag & VI_CLEAN) != 0) { + /* + * It's clean so destroy it. It isn't referenced + * anywhere since it has been reclaimed. + */ + KASSERT(vp->v_holdcnt == 0); + KASSERT(vp->v_writecount == 0); + mutex_exit(&vp->v_interlock); + insmntque(vp, NULL); + vfree(vp); } else { - simple_unlock(&vp->v_interlock); + /* + * Otherwise, put it back onto the freelist. It + * can't be destroyed while still associated with + * a file system. + */ + mutex_enter(&vnode_free_list_lock); + if (vp->v_holdcnt > 0) { + vp->v_freelisthd = &vnode_hold_list; + } else { + vp->v_freelisthd = &vnode_free_list; + } + TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); + mutex_exit(&vnode_free_list_lock); + mutex_exit(&vp->v_interlock); } } void -vrele(struct vnode *vp) +vrele(vnode_t *vp) { - do_vrele(vp, 1, 0); + KASSERT((vp->v_iflag & VI_MARKER) == 0); + + mutex_enter(&vp->v_interlock); + vrelel(vp, 1, 0); } -void -vrele2(struct vnode *vp, int onhead) +static void +vrele_thread(void *cookie) { + vnode_t *vp; - do_vrele(vp, 0, onhead); + for (;;) { + mutex_enter(&vrele_lock); + while (TAILQ_EMPTY(&vrele_list)) { + cv_timedwait(&vrele_cv, &vrele_lock, hz); + } + vp = TAILQ_FIRST(&vrele_list); + TAILQ_REMOVE(&vrele_list, vp, v_freelist); + vrele_pending--; + mutex_exit(&vrele_lock); + + /* + * If not the last reference, then ignore the vnode + * and look for more work. + */ + mutex_enter(&vp->v_interlock); + KASSERT((vp->v_iflag & VI_INACTPEND) != 0); + vp->v_iflag &= ~VI_INACTPEND; + if (vp->v_usecount > 1) { + vp->v_usecount--; + mutex_exit(&vp->v_interlock); + continue; + } + vrelel(vp, 1, 0); + } } /* @@ -818,30 +1074,20 @@ vrele2(struct vnode *vp, int onhead) * Called with v_interlock held. */ void -vholdl(struct vnode *vp) +vholdl(vnode_t *vp) { - /* - * If it is on the freelist and the hold count is currently - * zero, move it to the hold list. The test of the back - * pointer and the use reference count of zero is because - * it will be removed from a free list by getnewvnode, - * but will not have its reference count incremented until - * after calling vgone. If the reference count were - * incremented first, vgone would (incorrectly) try to - * close the previous instance of the underlying object. - * So, the back pointer is explicitly set to `0xdeadb' in - * getnewvnode after removing it from a freelist to ensure - * that we do not try to move it here. - */ - if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && - vp->v_holdcnt == 0 && vp->v_usecount == 0) { - simple_lock(&vnode_free_list_slock); - TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); - TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); - simple_unlock(&vnode_free_list_slock); + KASSERT(mutex_owned(&vp->v_interlock)); + KASSERT((vp->v_iflag & VI_MARKER) == 0); + + if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) { + mutex_enter(&vnode_free_list_lock); + KASSERT(vp->v_freelisthd == &vnode_free_list); + TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); + vp->v_freelisthd = &vnode_hold_list; + TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); + mutex_exit(&vnode_free_list_lock); } - vp->v_holdcnt++; } /* @@ -849,54 +1095,45 @@ vholdl(struct vnode *vp) * Called with v_interlock held. */ void -holdrelel(struct vnode *vp) +holdrelel(vnode_t *vp) { - if (vp->v_holdcnt <= 0) - panic("holdrelel: holdcnt vp %p", vp); + KASSERT(mutex_owned(&vp->v_interlock)); + KASSERT((vp->v_iflag & VI_MARKER) == 0); + + if (vp->v_holdcnt <= 0) { + vpanic(vp, "holdrelel: holdcnt vp %p"); + } + vp->v_holdcnt--; - - /* - * If it is on the holdlist and the hold count drops to - * zero, move it to the free list. The test of the back - * pointer and the use reference count of zero is because - * it will be removed from a free list by getnewvnode, - * but will not have its reference count incremented until - * after calling vgone. If the reference count were - * incremented first, vgone would (incorrectly) try to - * close the previous instance of the underlying object. - * So, the back pointer is explicitly set to `0xdeadb' in - * getnewvnode after removing it from a freelist to ensure - * that we do not try to move it here. - */ - - if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && - vp->v_holdcnt == 0 && vp->v_usecount == 0) { - simple_lock(&vnode_free_list_slock); - TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); - TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); - simple_unlock(&vnode_free_list_slock); + if (vp->v_holdcnt == 0 && vp->v_usecount == 0) { + mutex_enter(&vnode_free_list_lock); + KASSERT(vp->v_freelisthd == &vnode_hold_list); + TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); + vp->v_freelisthd = &vnode_free_list; + TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); + mutex_exit(&vnode_free_list_lock); } } /* - * Vnode reference. + * Vnode reference, where a reference is already held by some other + * object (for example, a file structure). */ void -vref(struct vnode *vp) +vref(vnode_t *vp) { - simple_lock(&vp->v_interlock); - if (vp->v_usecount <= 0) - panic("vref used where vget required, vp %p", vp); - vp->v_usecount++; -#ifdef DIAGNOSTIC - if (vp->v_usecount == 0) { - vprint("vref", vp); - panic("vref: usecount overflow, vp %p", vp); + KASSERT((vp->v_iflag & VI_MARKER) == 0); + + mutex_enter(&vp->v_interlock); + if (vp->v_usecount <= 0) { + vpanic(vp, "vref used where vget required"); } -#endif - simple_unlock(&vp->v_interlock); + if (++vp->v_usecount == 0) { + vpanic(vp, "vref: usecount overflow"); + } + mutex_exit(&vp->v_interlock); } /* @@ -918,33 +1155,35 @@ struct ctldebug debug1 = { "busyprt", &busyprt }; #endif int -vflush(struct mount *mp, struct vnode *skipvp, int flags) +vflush(struct mount *mp, vnode_t *skipvp, int flags) { - struct lwp *l = curlwp; /* XXX */ - struct vnode *vp, *nvp; + vnode_t *vp, *mvp; int busy = 0; - simple_lock(&mntvnode_slock); -loop: + /* Allocate a marker vnode. */ + if ((mvp = valloc(mp)) == NULL) + return (ENOMEM); + + mutex_enter(&mntvnode_lock); /* * NOTE: not using the TAILQ_FOREACH here since in this loop vgone() * and vclean() are called */ - for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { - if (vp->v_mount != mp) - goto loop; - nvp = TAILQ_NEXT(vp, v_mntvnodes); + for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) { + vmark(mvp, vp); + if (vp->v_mount != mp || vismarker(vp)) + continue; /* * Skip over a selected vnode. */ if (vp == skipvp) continue; - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); /* - * Skip over a vnodes marked VV_SYSTEM. + * Skip over a vnodes marked VSYSTEM. */ if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) { - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); continue; } /* @@ -953,7 +1192,7 @@ loop: */ if ((flags & WRITECLOSE) && (vp->v_writecount == 0 || vp->v_type != VREG)) { - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); continue; } /* @@ -961,36 +1200,42 @@ loop: * out the vnode data structures and we are done. */ if (vp->v_usecount == 0) { - simple_unlock(&mntvnode_slock); - vgonel(vp, l); - simple_lock(&mntvnode_slock); + mutex_exit(&mntvnode_lock); + vremfree(vp); + vp->v_usecount++; + vclean(vp, DOCLOSE); + vrelel(vp, 1, 0); + mutex_enter(&mntvnode_lock); continue; } /* * If FORCECLOSE is set, forcibly close the vnode. * For block or character devices, revert to an * anonymous device. For all other files, just kill them. + * XXXAD what? */ if (flags & FORCECLOSE) { - simple_unlock(&mntvnode_slock); + mutex_exit(&mntvnode_lock); + vp->v_usecount++; if (vp->v_type != VBLK && vp->v_type != VCHR) { - vgonel(vp, l); + vclean(vp, DOCLOSE); } else { - vclean(vp, 0, l); + vclean(vp, 0); vp->v_op = spec_vnodeop_p; - insmntque(vp, (struct mount *)0); } - simple_lock(&mntvnode_slock); + vrelel(vp, 1, 0); + mutex_enter(&mntvnode_lock); continue; } #ifdef DEBUG if (busyprt) vprint("vflush: busy vnode", vp); #endif - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); busy++; } - simple_unlock(&mntvnode_slock); + mutex_exit(&mntvnode_lock); + vfree(mvp); if (busy) return (EBUSY); return (0); @@ -998,58 +1243,45 @@ loop: /* * Disassociate the underlying file system from a vnode. + * + * Must be called with the interlock held, and will return with it held. */ -static void -vclean(struct vnode *vp, int flags, struct lwp *l) +void +vclean(vnode_t *vp, int flags) { - int active; + lwp_t *l = curlwp; + bool recycle, active; + struct specinfo *si; - LOCK_ASSERT(simple_lock_held(&vp->v_interlock)); + KASSERT(mutex_owned(&vp->v_interlock)); + KASSERT((vp->v_iflag & VI_MARKER) == 0); + KASSERT(vp->v_usecount != 0); - /* - * Check to see if the vnode is in use. - * If so we have to reference it before we clean it out - * so that its count cannot fall to zero and generate a - * race against ourselves to recycle it. - */ + /* If cleaning is already in progress wait until done and return. */ + if (vp->v_iflag & VI_XLOCK) { + vwait(vp, VI_XLOCK); + return; + } - if ((active = vp->v_usecount) != 0) { - vp->v_usecount++; -#ifdef DIAGNOSTIC - if (vp->v_usecount == 0) { - vprint("vclean", vp); - panic("vclean: usecount overflow"); - } -#endif + /* If already clean, nothing to do. */ + if ((vp->v_iflag & VI_CLEAN) != 0) { + return; } /* - * Prevent the vnode from being recycled or - * brought into use while we clean it out. + * Prevent the vnode from being recycled or brought into use + * while we clean it out. */ - if (vp->v_iflag & VI_XLOCK) - panic("vclean: deadlock, vp %p", vp); vp->v_iflag |= VI_XLOCK; if (vp->v_iflag & VI_EXECMAP) { atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages); atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages); } vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP); + active = (vp->v_usecount > 1); - /* - * Even if the count is zero, the VOP_INACTIVE routine may still - * have the object locked while it cleans it out. For - * active vnodes, it ensures that no other activity can - * occur while the underlying object is being cleaned out. - * - * We drain the lock to make sure we are the last one trying to - * get it and immediately resurrect the lock. Future accesses - * for locking this _vnode_ will be protected by VI_XLOCK. However, - * upper layers might be using the _lock_ in case the file system - * exported it and might access it while the vnode lingers in - * deadfs. - */ - VOP_LOCK(vp, LK_DRAIN | LK_RESURRECT | LK_INTERLOCK); + /* XXXAD should not lock vnode under layer */ + VOP_LOCK(vp, LK_EXCLUSIVE | LK_INTERLOCK); /* * Clean out any cached data associated with the vnode. @@ -1058,7 +1290,7 @@ vclean(struct vnode *vp, int flags, struct lwp *l) */ if (flags & DOCLOSE) { int error; - struct vnode *vq, *vx; + vnode_t *vq, *vx; error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0); if (error) @@ -1066,12 +1298,13 @@ vclean(struct vnode *vp, int flags, struct lwp *l) KASSERT(error == 0); KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); + /* XXXAD close should not happen on layered vnode */ if (active) VOP_CLOSE(vp, FNONBLOCK, NOCRED); if ((vp->v_type == VBLK || vp->v_type == VCHR) && - vp->v_specinfo != 0) { - simple_lock(&spechash_slock); + vp->v_specinfo != NULL) { + mutex_enter(&spechash_lock); if (vp->v_hashchain != NULL) { if (*vp->v_hashchain == vp) { *vp->v_hashchain = vp->v_specnext; @@ -1088,8 +1321,8 @@ vclean(struct vnode *vp, int flags, struct lwp *l) } if (vp->v_iflag & VI_ALIASED) { vx = NULL; - for (vq = *vp->v_hashchain; vq; - vq = vq->v_specnext) { + for (vq = *vp->v_hashchain; vq; + vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) continue; @@ -1104,9 +1337,10 @@ vclean(struct vnode *vp, int flags, struct lwp *l) vp->v_iflag &= ~VI_ALIASED; } } - simple_unlock(&spechash_slock); - FREE(vp->v_specinfo, M_VNODE); + si = vp->v_specinfo; vp->v_specinfo = NULL; + mutex_exit(&spechash_lock); + FREE(si, M_VNODE); } } @@ -1116,7 +1350,7 @@ vclean(struct vnode *vp, int flags, struct lwp *l) * VOP_INACTIVE will unlock the vnode. */ if (active) { - VOP_INACTIVE(vp); + VOP_INACTIVE(vp, &recycle); } else { /* * Any other processes trying to obtain this lock must first @@ -1124,38 +1358,10 @@ vclean(struct vnode *vp, int flags, struct lwp *l) */ VOP_UNLOCK(vp, 0); } - /* - * Reclaim the vnode. - */ - if (VOP_RECLAIM(vp)) - panic("vclean: cannot reclaim, vp %p", vp); - if (active) { - /* - * Inline copy of vrele() since VOP_INACTIVE - * has already been called. - */ - simple_lock(&vp->v_interlock); - if (--vp->v_usecount <= 0) { -#ifdef DIAGNOSTIC - if (vp->v_usecount < 0 || vp->v_writecount != 0) { - vprint("vclean: bad ref count", vp); - panic("vclean: ref cnt"); - } -#endif - /* - * Insert at tail of LRU list. - */ - simple_unlock(&vp->v_interlock); - simple_lock(&vnode_free_list_slock); -#ifdef DIAGNOSTIC - if (vp->v_holdcnt > 0) - panic("vclean: not clean, vp %p", vp); -#endif - TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); - simple_unlock(&vnode_free_list_slock); - } else - simple_unlock(&vp->v_interlock); + /* Disassociate the underlying file system from the vnode. */ + if (VOP_RECLAIM(vp)) { + vpanic(vp, "vclean: cannot reclaim"); } KASSERT(vp->v_uobj.uo_npages == 0); @@ -1165,22 +1371,18 @@ vclean(struct vnode *vp, int flags, struct lwp *l) } cache_purge(vp); - /* - * Done with purge, notify sleepers of the grim news. - */ + /* Done with purge, notify sleepers of the grim news. */ vp->v_op = dead_vnodeop_p; vp->v_tag = VT_NON; - vp->v_vnlock = NULL; - simple_lock(&vp->v_interlock); - VN_KNOTE(vp, NOTE_REVOKE); /* FreeBSD has this in vn_pollgone() */ + mutex_enter(&vp->v_interlock); + vp->v_vnlock = &vp->v_lock; + VN_KNOTE(vp, NOTE_REVOKE); vp->v_iflag &= ~VI_XLOCK; + vp->v_iflag |= VI_CLEAN; vp->v_vflag &= ~VV_LOCKSWORK; - if (vp->v_iflag & VI_XWANT) { - vp->v_iflag &= ~VI_XWANT; - simple_unlock(&vp->v_interlock); - wakeup((void *)vp); - } else - simple_unlock(&vp->v_interlock); + cv_broadcast(&vp->v_cv); + + KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); } /* @@ -1188,111 +1390,48 @@ vclean(struct vnode *vp, int flags, struct lwp *l) * Release the passed interlock if the vnode will be recycled. */ int -vrecycle(struct vnode *vp, struct simplelock *inter_lkp, struct lwp *l) +vrecycle(vnode_t *vp, kmutex_t *inter_lkp, struct lwp *l) { - simple_lock(&vp->v_interlock); - if (vp->v_usecount == 0) { - if (inter_lkp) - simple_unlock(inter_lkp); - vgonel(vp, l); - return (1); + KASSERT((vp->v_iflag & VI_MARKER) == 0); + + mutex_enter(&vp->v_interlock); + if (vp->v_usecount != 0) { + mutex_exit(&vp->v_interlock); + return (0); } - simple_unlock(&vp->v_interlock); - return (0); + if (inter_lkp) + mutex_exit(inter_lkp); + vremfree(vp); + vp->v_usecount++; + vclean(vp, DOCLOSE); + vrelel(vp, 0, 0); + return (1); } /* - * Eliminate all activity associated with a vnode - * in preparation for reuse. + * Eliminate all activity associated with a vnode in preparation for + * reuse. Drops a reference from the vnode. */ void -vgone(struct vnode *vp) -{ - struct lwp *l = curlwp; /* XXX */ - - simple_lock(&vp->v_interlock); - vgonel(vp, l); -} - -/* - * vgone, with the vp interlock held. - */ -void -vgonel(struct vnode *vp, struct lwp *l) +vgone(vnode_t *vp) { - LOCK_ASSERT(simple_lock_held(&vp->v_interlock)); - - /* - * If a vgone (or vclean) is already in progress, - * wait until it is done and return. - */ - - if (vp->v_iflag & VI_XLOCK) { - vp->v_iflag |= VI_XWANT; - ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock); - return; - } - - /* - * Clean out the filesystem specific data. - */ - - vclean(vp, DOCLOSE, l); - KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); - - /* - * Delete from old mount point vnode list, if on one. - */ - - if (vp->v_mount != NULL) - insmntque(vp, (struct mount *)0); - - /* - * The test of the back pointer and the reference count of - * zero is because it will be removed from the free list by - * getcleanvnode, but will not have its reference count - * incremented until after calling vgone. If the reference - * count were incremented first, vgone would (incorrectly) - * try to close the previous instance of the underlying object. - * So, the back pointer is explicitly set to `0xdeadb' in - * getnewvnode after removing it from the freelist to ensure - * that we do not try to move it here. - */ - - vp->v_type = VBAD; - if (vp->v_usecount == 0) { - bool dofree; - - simple_lock(&vnode_free_list_slock); - if (vp->v_holdcnt > 0) - panic("vgonel: not clean, vp %p", vp); - /* - * if it isn't on the freelist, we're called by getcleanvnode - * and vnode is being re-used. otherwise, we'll free it. - */ - dofree = vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb; - if (dofree) { - TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); - numvnodes--; - } - simple_unlock(&vnode_free_list_slock); - if (dofree) - pool_put(&vnode_pool, vp); - } + mutex_enter(&vp->v_interlock); + vclean(vp, DOCLOSE); + vrelel(vp, 0, 0); } /* * Lookup a vnode by device number. */ int -vfinddev(dev_t dev, enum vtype type, struct vnode **vpp) +vfinddev(dev_t dev, enum vtype type, vnode_t **vpp) { - struct vnode *vp; + vnode_t *vp; int rc = 0; - simple_lock(&spechash_slock); + mutex_enter(&spechash_lock); for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { if (dev != vp->v_rdev || type != vp->v_type) continue; @@ -1300,7 +1439,7 @@ vfinddev(dev_t dev, enum vtype type, struct vnode **vpp) rc = 1; break; } - simple_unlock(&spechash_slock); + mutex_exit(&spechash_lock); return (rc); } @@ -1311,7 +1450,7 @@ vfinddev(dev_t dev, enum vtype type, struct vnode **vpp) void vdevgone(int maj, int minl, int minh, enum vtype type) { - struct vnode *vp; + vnode_t *vp; int mn; vp = NULL; /* XXX gcc */ @@ -1325,15 +1464,21 @@ vdevgone(int maj, int minl, int minh, enum vtype type) * Calculate the total number of references to a special device. */ int -vcount(struct vnode *vp) +vcount(vnode_t *vp) { - struct vnode *vq, *vnext; + vnode_t *vq, *vnext; int count; loop: - if ((vp->v_iflag & VI_ALIASED) == 0) - return (vp->v_usecount); - simple_lock(&spechash_slock); + mutex_enter(&spechash_lock); + mutex_enter(&vp->v_interlock); + if ((vp->v_iflag & VI_ALIASED) == 0) { + count = vp->v_usecount - ((vp->v_iflag & VI_INACTPEND) != 0); + mutex_exit(&vp->v_interlock); + mutex_exit(&spechash_lock); + return (count); + } + mutex_exit(&vp->v_interlock); for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { vnext = vq->v_specnext; if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) @@ -1341,19 +1486,23 @@ loop: /* * Alias, but not in use, so flush it out. */ + mutex_enter(&vq->v_interlock); if (vq->v_usecount == 0 && vq != vp && (vq->v_iflag & VI_XLOCK) == 0) { - simple_unlock(&spechash_slock); - vgone(vq); + mutex_exit(&spechash_lock); + vremfree(vq); + vq->v_usecount++; + vclean(vq, DOCLOSE); + vrelel(vq, 1, 0); goto loop; } count += vq->v_usecount; + mutex_exit(&vq->v_interlock); } - simple_unlock(&spechash_slock); + mutex_exit(&spechash_lock); return (count); } - /* * sysctl helper routine to return list of supported fstypes */ @@ -1463,7 +1612,7 @@ sysctl_kern_vnode(SYSCTLFN_ARGS) char *where = oldp; size_t *sizep = oldlenp; struct mount *mp, *nmp; - struct vnode *vp; + vnode_t *vp, *mvp; char *bp = where, *savebp; char *ewhere; int error; @@ -1473,14 +1622,15 @@ sysctl_kern_vnode(SYSCTLFN_ARGS) if (newp != NULL) return (EPERM); -#define VPTRSZ sizeof(struct vnode *) -#define VNODESZ sizeof(struct vnode) +#define VPTRSZ sizeof(vnode_t *) +#define VNODESZ sizeof(vnode_t) if (where == NULL) { *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); return (0); } ewhere = where + *sizep; + mutex_enter(&mountlist_lock); for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; mp = nmp) { @@ -1489,37 +1639,44 @@ sysctl_kern_vnode(SYSCTLFN_ARGS) continue; } savebp = bp; -again: - simple_lock(&mntvnode_slock); - TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { + /* Allocate a marker vnode. */ + if ((mvp = valloc(mp)) == NULL) + return (ENOMEM); + mutex_enter(&mntvnode_lock); + for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) { + vmark(mvp, vp); /* * Check that the vp is still associated with * this filesystem. RACE: could have been * recycled onto the same filesystem. */ - if (vp->v_mount != mp) { - simple_unlock(&mntvnode_slock); - if (kinfo_vdebug) - printf("kinfo: vp changed\n"); - bp = savebp; - goto again; - } + if (vp->v_mount != mp || vismarker(vp)) + continue; if (bp + VPTRSZ + VNODESZ > ewhere) { - simple_unlock(&mntvnode_slock); + (void)vunmark(mvp); + mutex_exit(&mntvnode_lock); + vfree(mvp); *sizep = bp - where; return (ENOMEM); } - simple_unlock(&mntvnode_slock); + /* XXXAD copy to temporary buffer */ + mutex_exit(&mntvnode_lock); if ((error = copyout((void *)&vp, bp, VPTRSZ)) || - (error = copyout((void *)vp, bp + VPTRSZ, VNODESZ))) + (error = copyout((void *)vp, bp + VPTRSZ, VNODESZ))) { + mutex_enter(&mntvnode_lock); + (void)vunmark(mvp); + mutex_exit(&mntvnode_lock); + vfree(mvp); return (error); + } bp += VPTRSZ + VNODESZ; - simple_lock(&mntvnode_slock); + mutex_enter(&mntvnode_lock); } - simple_unlock(&mntvnode_slock); + mutex_exit(&mntvnode_lock); mutex_enter(&mountlist_lock); nmp = CIRCLEQ_NEXT(mp, mnt_list); vfs_unbusy(mp); + vfree(mvp); } mutex_exit(&mountlist_lock); @@ -1527,13 +1684,32 @@ again: return (0); } +/* + * Remove clean vnodes from a mountpoint's vnode list. + */ +void +vfs_scrubvnlist(struct mount *mp) +{ + vnode_t *vp, *nvp; + + mutex_enter(&mntvnode_lock); + for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { + nvp = TAILQ_NEXT(vp, v_mntvnodes); + mutex_enter(&vp->v_interlock); + if ((vp->v_iflag & VI_CLEAN) != 0) + TAILQ_REMOVE(&mp->mnt_vnodelist, vp, v_mntvnodes); + mutex_exit(&vp->v_interlock); + } + mutex_exit(&mntvnode_lock); +} + /* * Check to see if a filesystem is mounted on a block device. */ int -vfs_mountedon(struct vnode *vp) +vfs_mountedon(vnode_t *vp) { - struct vnode *vq; + vnode_t *vq; int error = 0; if (vp->v_type != VBLK) @@ -1541,7 +1717,7 @@ vfs_mountedon(struct vnode *vp) if (vp->v_specmountpoint != NULL) return (EBUSY); if (vp->v_iflag & VI_ALIASED) { - simple_lock(&spechash_slock); + mutex_enter(&spechash_lock); for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) @@ -1551,7 +1727,7 @@ vfs_mountedon(struct vnode *vp) break; } } - simple_unlock(&spechash_slock); + mutex_exit(&spechash_lock); } return (error); } @@ -1595,8 +1771,6 @@ vfs_unmountall(struct lwp *l) printf("WARNING: some file systems would not unmount\n"); } -extern struct simplelock bqueue_slock; /* XXX */ - /* * Sync and unmount file systems before shutting down. */ diff --git a/sys/kern/vfs_subr2.c b/sys/kern/vfs_subr2.c index f190c34d5278..d66b502417f5 100644 --- a/sys/kern/vfs_subr2.c +++ b/sys/kern/vfs_subr2.c @@ -1,4 +1,4 @@ -/* $NetBSD: vfs_subr2.c,v 1.8 2007/11/26 19:02:08 pooka Exp $ */ +/* $NetBSD: vfs_subr2.c,v 1.9 2008/01/02 11:48:56 ad Exp $ */ /*- * Copyright (c) 1997, 1998, 2004, 2005, 2007 The NetBSD Foundation, Inc. @@ -82,7 +82,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: vfs_subr2.c,v 1.8 2007/11/26 19:02:08 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_subr2.c,v 1.9 2008/01/02 11:48:56 ad Exp $"); #include "opt_ddb.h" @@ -95,6 +95,7 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_subr2.c,v 1.8 2007/11/26 19:02:08 pooka Exp $"); #include #include #include +#include #include #include @@ -124,13 +125,11 @@ int prtactive = 0; /* 1 => print out reclaim of active vnodes */ kmutex_t mountlist_lock; kmutex_t mntid_lock; -struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER; -struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER; +kmutex_t mntvnode_lock; +kmutex_t vnode_free_list_lock; +kmutex_t spechash_lock; kmutex_t vfs_list_lock; -/* XXX - gross; single global lock to protect v_numoutput */ -struct simplelock global_v_numoutput_slock = SIMPLELOCK_INITIALIZER; - struct mntlist mountlist = /* mounted filesystem list */ CIRCLEQ_HEAD_INITIALIZER(mountlist); @@ -146,7 +145,7 @@ struct device *root_device; /* root device */ void printlockedvnodes(void); #endif -long numvnodes; +u_int numvnodes; /* * Initialize the vnode management data structures. @@ -157,14 +156,16 @@ vntblinit(void) mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE); mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE); + mutex_init(&mntvnode_lock, MUTEX_DEFAULT, IPL_NONE); + mutex_init(&vnode_free_list_lock, MUTEX_DEFAULT, IPL_NONE); + mutex_init(&spechash_lock, MUTEX_DEFAULT, IPL_NONE); mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE); mount_specificdata_domain = specificdata_domain_create(); - /* - * Initialize the filesystem syncer. - */ + /* Initialize the filesystem syncer. */ vn_initialize_syncerd(); + vn_init1(); } /* @@ -195,9 +196,68 @@ vfs_destroy(struct mount *mp) { specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); + mutex_destroy(&mp->mnt_mutex); + lockdestroy(&mp->mnt_lock); free(mp, M_MOUNT); } +/* + * Wait for a vnode (typically with VI_XLOCK set) to be cleaned or + * recycled. + */ +void +vwait(vnode_t *vp, int flags) +{ + + KASSERT(mutex_owned(&vp->v_interlock)); + KASSERT(vp->v_usecount != 0); + + while ((vp->v_iflag & flags) != 0) + cv_wait(&vp->v_cv, &vp->v_interlock); +} + +/* + * Insert a marker vnode into a mount's vnode list, after the + * specified vnode. mntvnode_lock must be held. + */ +void +vmark(vnode_t *mvp, vnode_t *vp) +{ + struct mount *mp; + + mp = mvp->v_mount; + + KASSERT(mutex_owned(&mntvnode_lock)); + KASSERT((mvp->v_iflag & VI_MARKER) != 0); + KASSERT(vp->v_mount == mp); + + TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vp, mvp, v_mntvnodes); +} + +/* + * Remove a marker vnode from a mount's vnode list, and return + * a pointer to the next vnode in the list. mntvnode_lock must + * be held. + */ +vnode_t * +vunmark(vnode_t *mvp) +{ + vnode_t *vp; + struct mount *mp; + + mp = mvp->v_mount; + + KASSERT(mutex_owned(&mntvnode_lock)); + KASSERT((mvp->v_iflag & VI_MARKER) != 0); + + vp = TAILQ_NEXT(mvp, v_mntvnodes); + TAILQ_REMOVE(&mp->mnt_vnodelist, mvp, v_mntvnodes); + + KASSERT(vp == NULL || vp->v_mount == mp); + + return vp; +} + /* * Update outstanding I/O count and do wakeup if requested. */ @@ -206,20 +266,16 @@ vwakeup(struct buf *bp) { struct vnode *vp; - if ((vp = bp->b_vp) != NULL) { - /* XXX global lock hack - * can't use v_interlock here since this is called - * in interrupt context from biodone(). - */ - simple_lock(&global_v_numoutput_slock); - if (--vp->v_numoutput < 0) - panic("vwakeup: neg numoutput, vp %p", vp); - if ((vp->v_iflag & VI_BWAIT) && vp->v_numoutput <= 0) { - vp->v_iflag &= ~VI_BWAIT; - wakeup((void *)&vp->v_numoutput); - } - simple_unlock(&global_v_numoutput_slock); - } + if ((vp = bp->b_vp) == NULL) + return; + + KASSERT(bp->b_objlock == &vp->v_interlock); + KASSERT(mutex_owned(bp->b_objlock)); + + if (--vp->v_numoutput < 0) + panic("vwakeup: neg numoutput, vp %p", vp); + if (vp->v_numoutput == 0) + cv_broadcast(&vp->v_cv); } /* @@ -229,15 +285,15 @@ vwakeup(struct buf *bp) */ int vinvalbuf(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l, - int slpflag, int slptimeo) + bool catch, int slptimeo) { struct buf *bp, *nbp; - int s, error; + int error; int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO | - (flags & V_SAVE ? PGO_CLEANIT | PGO_RECLAIM : 0); + (flags & V_SAVE ? PGO_CLEANIT | PGO_RECLAIM : 0); /* XXXUBC this doesn't look at flags or slp* */ - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); error = VOP_PUTPAGES(vp, 0, 0, flushflags); if (error) { return error; @@ -247,67 +303,48 @@ vinvalbuf(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l, error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0); if (error) return (error); -#ifdef DIAGNOSTIC - s = splbio(); - if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd)) - panic("vinvalbuf: dirty bufs, vp %p", vp); - splx(s); -#endif + KASSERT(vp->v_numoutput == 0 && LIST_EMPTY(&vp->v_dirtyblkhd)); } - s = splbio(); - + mutex_enter(&bufcache_lock); restart: - for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { - nbp = LIST_NEXT(bp, b_vnbufs); - simple_lock(&bp->b_interlock); - if (bp->b_flags & B_BUSY) { - bp->b_flags |= B_WANTED; - error = ltsleep((void *)bp, - slpflag | (PRIBIO + 1) | PNORELOCK, - "vinvalbuf", slptimeo, &bp->b_interlock); - if (error) { - splx(s); - return (error); - } - goto restart; - } - bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; - simple_unlock(&bp->b_interlock); - brelse(bp, 0); - } - for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { nbp = LIST_NEXT(bp, b_vnbufs); - simple_lock(&bp->b_interlock); - if (bp->b_flags & B_BUSY) { - bp->b_flags |= B_WANTED; - error = ltsleep((void *)bp, - slpflag | (PRIBIO + 1) | PNORELOCK, - "vinvalbuf", slptimeo, &bp->b_interlock); - if (error) { - splx(s); - return (error); - } - goto restart; + error = bbusy(bp, catch, slptimeo); + if (error != 0) { + if (error == EPASSTHROUGH) + goto restart; + mutex_exit(&bufcache_lock); + return (error); + } + brelsel(bp, BC_INVAL | BC_VFLUSH); + } + + for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { + nbp = LIST_NEXT(bp, b_vnbufs); + error = bbusy(bp, catch, slptimeo); + if (error != 0) { + if (error == EPASSTHROUGH) + goto restart; + mutex_exit(&bufcache_lock); + return (error); } /* * XXX Since there are no node locks for NFS, I believe * there is a slight chance that a delayed write will * occur while sleeping just above, so check for it. */ - if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { + if ((bp->b_oflags & BO_DELWRI) && (flags & V_SAVE)) { #ifdef DEBUG printf("buffer still DELWRI\n"); #endif - bp->b_flags |= B_BUSY | B_VFLUSH; - simple_unlock(&bp->b_interlock); + bp->b_cflags |= BC_BUSY | BC_VFLUSH; + mutex_exit(&bufcache_lock); VOP_BWRITE(bp); + mutex_enter(&bufcache_lock); goto restart; } - bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; - simple_unlock(&bp->b_interlock); - brelse(bp, 0); + brelsel(bp, BC_INVAL | BC_VFLUSH); } #ifdef DIAGNOSTIC @@ -315,7 +352,7 @@ restart: panic("vinvalbuf: flush failed, vp %p", vp); #endif - splx(s); + mutex_exit(&bufcache_lock); return (0); } @@ -326,91 +363,78 @@ restart: * buffers from being queued. */ int -vtruncbuf(struct vnode *vp, daddr_t lbn, int slpflag, int slptimeo) +vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch, int slptimeo) { struct buf *bp, *nbp; - int s, error; + int error; voff_t off; off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO); if (error) { return error; } - s = splbio(); - + mutex_enter(&bufcache_lock); restart: - for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { - nbp = LIST_NEXT(bp, b_vnbufs); - if (bp->b_lblkno < lbn) - continue; - simple_lock(&bp->b_interlock); - if (bp->b_flags & B_BUSY) { - bp->b_flags |= B_WANTED; - error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK, - "vtruncbuf", slptimeo, &bp->b_interlock); - if (error) { - splx(s); - return (error); - } - goto restart; - } - bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; - simple_unlock(&bp->b_interlock); - brelse(bp, 0); - } - for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { nbp = LIST_NEXT(bp, b_vnbufs); if (bp->b_lblkno < lbn) continue; - simple_lock(&bp->b_interlock); - if (bp->b_flags & B_BUSY) { - bp->b_flags |= B_WANTED; - error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK, - "vtruncbuf", slptimeo, &bp->b_interlock); - if (error) { - splx(s); - return (error); - } - goto restart; + error = bbusy(bp, catch, slptimeo); + if (error != 0) { + if (error == EPASSTHROUGH) + goto restart; + mutex_exit(&bufcache_lock); + return (error); } - bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; - simple_unlock(&bp->b_interlock); - brelse(bp, 0); + brelsel(bp, BC_INVAL | BC_VFLUSH); } - splx(s); + for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { + nbp = LIST_NEXT(bp, b_vnbufs); + if (bp->b_lblkno < lbn) + continue; + error = bbusy(bp, catch, slptimeo); + if (error != 0) { + if (error == EPASSTHROUGH) + goto restart; + mutex_exit(&bufcache_lock); + return (error); + } + brelsel(bp, BC_INVAL | BC_VFLUSH); + } + mutex_exit(&bufcache_lock); return (0); } +/* + * Flush all dirty buffers from a vnode. + * Called with the underlying vnode locked, which should prevent new dirty + * buffers from being queued. + */ void vflushbuf(struct vnode *vp, int sync) { struct buf *bp, *nbp; int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0); - int s; + bool dirty; - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); (void) VOP_PUTPAGES(vp, 0, 0, flags); loop: - s = splbio(); + mutex_enter(&bufcache_lock); for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { nbp = LIST_NEXT(bp, b_vnbufs); - simple_lock(&bp->b_interlock); - if ((bp->b_flags & B_BUSY)) { - simple_unlock(&bp->b_interlock); + if ((bp->b_cflags & BC_BUSY)) continue; - } - if ((bp->b_flags & B_DELWRI) == 0) + if ((bp->b_oflags & BO_DELWRI) == 0) panic("vflushbuf: not dirty, bp %p", bp); - bp->b_flags |= B_BUSY | B_VFLUSH; - simple_unlock(&bp->b_interlock); - splx(s); + bp->b_cflags |= BC_BUSY | BC_VFLUSH; + mutex_exit(&bufcache_lock); /* * Wait for I/O associated with indirect blocks to complete, * since there is no way to quickly wait for them below. @@ -421,46 +445,49 @@ loop: (void) bwrite(bp); goto loop; } - if (sync == 0) { - splx(s); + mutex_exit(&bufcache_lock); + + if (sync == 0) return; - } - simple_lock(&global_v_numoutput_slock); - while (vp->v_numoutput) { - vp->v_iflag |= VI_BWAIT; - ltsleep((void *)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0, - &global_v_numoutput_slock); - } - simple_unlock(&global_v_numoutput_slock); - splx(s); - if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { + + mutex_enter(&vp->v_interlock); + while (vp->v_numoutput != 0) + cv_wait(&vp->v_cv, &vp->v_interlock); + dirty = !LIST_EMPTY(&vp->v_dirtyblkhd); + mutex_exit(&vp->v_interlock); + + if (dirty) { vprint("vflushbuf: dirty", vp); goto loop; } } /* - * Associate a buffer with a vnode. + * Associate a buffer with a vnode. There must already be a hold on + * the vnode. */ void bgetvp(struct vnode *vp, struct buf *bp) { - int s; - if (bp->b_vp) - panic("bgetvp: not free, bp %p", bp); - VHOLD(vp); - s = splbio(); + KASSERT(bp->b_vp == NULL); + KASSERT(bp->b_objlock == &buffer_lock); + KASSERT(mutex_owned(&vp->v_interlock)); + KASSERT(mutex_owned(&bufcache_lock)); + KASSERT((bp->b_cflags & BC_BUSY) != 0); + + vholdl(vp); bp->b_vp = vp; if (vp->v_type == VBLK || vp->v_type == VCHR) bp->b_dev = vp->v_rdev; else bp->b_dev = NODEV; + /* * Insert onto list for new vnode. */ bufinsvn(bp, &vp->v_cleanblkhd); - splx(s); + bp->b_objlock = &vp->v_interlock; } /* @@ -469,14 +496,14 @@ bgetvp(struct vnode *vp, struct buf *bp) void brelvp(struct buf *bp) { - struct vnode *vp; - int s; + struct vnode *vp = bp->b_vp; - if (bp->b_vp == NULL) - panic("brelvp: vp NULL, bp %p", bp); + KASSERT(vp != NULL); + KASSERT(bp->b_objlock == &vp->v_interlock); + KASSERT(mutex_owned(&vp->v_interlock)); + KASSERT(mutex_owned(&bufcache_lock)); + KASSERT((bp->b_cflags & BC_BUSY) != 0); - s = splbio(); - vp = bp->b_vp; /* * Delete from old vnode list, if on one. */ @@ -489,50 +516,54 @@ brelvp(struct buf *bp) vn_syncer_remove_from_worklist(vp); } + bp->b_objlock = &buffer_lock; bp->b_vp = NULL; - HOLDRELE(vp); - splx(s); + holdrelel(vp); } /* - * Reassign a buffer from one vnode to another. + * Reassign a buffer from one vnode list to another. + * The list reassignment must be within the same vnode. * Used to assign file specific control information - * (indirect blocks) to the vnode to which they belong. - * - * This function must be called at splbio(). + * (indirect blocks) to the list to which they belong. */ void -reassignbuf(struct buf *bp, struct vnode *newvp) +reassignbuf(struct buf *bp, struct vnode *vp) { struct buflists *listheadp; int delayx; + KASSERT(bp->b_objlock == &vp->v_interlock); + KASSERT(mutex_owned(&vp->v_interlock)); + KASSERT((bp->b_cflags & BC_BUSY) != 0); + /* * Delete from old vnode list, if on one. */ if (LIST_NEXT(bp, b_vnbufs) != NOLIST) bufremvn(bp); + /* * If dirty, put on list of dirty buffers; * otherwise insert onto list of clean buffers. */ - if ((bp->b_flags & B_DELWRI) == 0) { - listheadp = &newvp->v_cleanblkhd; - if (TAILQ_EMPTY(&newvp->v_uobj.memq) && - (newvp->v_iflag & VI_ONWORKLST) && - LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) { - newvp->v_iflag &= ~VI_WRMAPDIRTY; - vn_syncer_remove_from_worklist(newvp); + if ((bp->b_oflags & BO_DELWRI) == 0) { + listheadp = &vp->v_cleanblkhd; + if (TAILQ_EMPTY(&vp->v_uobj.memq) && + (vp->v_iflag & VI_ONWORKLST) && + LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { + vp->v_iflag &= ~VI_WRMAPDIRTY; + vn_syncer_remove_from_worklist(vp); } } else { - listheadp = &newvp->v_dirtyblkhd; - if ((newvp->v_iflag & VI_ONWORKLST) == 0) { - switch (newvp->v_type) { + listheadp = &vp->v_dirtyblkhd; + if ((vp->v_iflag & VI_ONWORKLST) == 0) { + switch (vp->v_type) { case VDIR: delayx = dirdelay; break; case VBLK: - if (newvp->v_specmountpoint != NULL) { + if (vp->v_specmountpoint != NULL) { delayx = metadelay; break; } @@ -541,9 +572,9 @@ reassignbuf(struct buf *bp, struct vnode *newvp) delayx = filedelay; break; } - if (!newvp->v_mount || - (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0) - vn_syncer_add_to_worklist(newvp, delayx); + if (!vp->v_mount || + (vp->v_mount->mnt_flag & MNT_ASYNC) == 0) + vn_syncer_add_to_worklist(vp, delayx); } } bufinsvn(bp, listheadp); @@ -654,11 +685,11 @@ vprint(const char *label, struct vnode *vp) printf("%s: ", label); printf("vnode @ %p, flags (%s)\n\ttag %s(%d), type %s(%d), " "usecount %d, writecount %ld, holdcount %ld\n" - "\tmount %p, data %p\n", vp, bf, + "\tfreelisthd %p, mount %p, data %p\n", vp, bf, ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag, ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type, vp->v_usecount, vp->v_writecount, vp->v_holdcnt, - vp->v_mount, vp->v_data); + vp->v_freelisthd, vp->v_mount, vp->v_data); if (vp->v_data != NULL) { printf("\t"); VOP_PRINT(vp); @@ -972,14 +1003,15 @@ vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...)) PRIx64 " dev 0x%x\n", bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_rawblkno, bp->b_dev); - bitmask_snprintf(bp->b_flags, buf_flagbits, bf, sizeof(bf)); + bitmask_snprintf(bp->b_flags | bp->b_oflags | bp->b_cflags, + buf_flagbits, bf, sizeof(bf)); (*pr)(" error %d flags 0x%s\n", bp->b_error, bf); (*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n", bp->b_bufsize, bp->b_bcount, bp->b_resid); (*pr)(" data %p saveaddr %p dep %p\n", bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep)); - (*pr)(" iodone %p\n", bp->b_iodone); + (*pr)(" iodone %p objlock %p\n", bp->b_iodone, bp->b_objlock); } diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 1b3dbf6e24c5..fadd0dc24264 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -1,4 +1,4 @@ -/* $NetBSD: vfs_syscalls.c,v 1.337 2007/12/26 16:01:37 ad Exp $ */ +/* $NetBSD: vfs_syscalls.c,v 1.338 2008/01/02 11:48:56 ad Exp $ */ /* * Copyright (c) 1989, 1993 @@ -37,7 +37,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.337 2007/12/26 16:01:37 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.338 2008/01/02 11:48:56 ad Exp $"); #include "opt_compat_netbsd.h" #include "opt_compat_43.h" @@ -305,7 +305,7 @@ mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops, TAILQ_INIT(&mp->mnt_vnodelist); lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); - simple_lock_init(&mp->mnt_slock); + mutex_init(&mp->mnt_mutex, MUTEX_DEFAULT, IPL_NONE); (void)vfs_busy(mp, LK_NOWAIT, 0); mp->mnt_vnodecovered = vp; @@ -665,8 +665,7 @@ dounmount(struct mount *mp, int flags, struct lwp *l) mp->mnt_iflag |= IMNT_UNMOUNT; mp->mnt_unmounter = l; - mutex_exit(&mountlist_lock); /* XXX */ - lockmgr(&mp->mnt_lock, LK_DRAIN, NULL); + lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_lock); async = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; @@ -690,20 +689,20 @@ dounmount(struct mount *mp, int flags, struct lwp *l) mp->mnt_iflag &= ~IMNT_UNMOUNT; mp->mnt_unmounter = NULL; mp->mnt_flag |= async; - mutex_exit(&mountlist_lock); /* XXX */ - lockmgr(&mp->mnt_lock, LK_RELEASE | LK_REENABLE, - NULL); + lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE, + &mountlist_lock); if (used_syncer) mutex_exit(&syncer_mutex); - simple_lock(&mp->mnt_slock); + mutex_enter(&mp->mnt_mutex); while (mp->mnt_wcnt > 0) { wakeup(mp); - ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt1", - 0, &mp->mnt_slock); + mtsleep(&mp->mnt_wcnt, PVFS, "mntwcnt1", + 0, &mp->mnt_mutex); } - simple_unlock(&mp->mnt_slock); + mutex_exit(&mp->mnt_mutex); return (error); } + vfs_scrubvnlist(mp); mutex_enter(&mountlist_lock); CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) @@ -711,18 +710,17 @@ dounmount(struct mount *mp, int flags, struct lwp *l) if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) panic("unmount: dangling vnode"); mp->mnt_iflag |= IMNT_GONE; - mutex_exit(&mountlist_lock); - lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); + lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_lock); if (coveredvp != NULLVP) vrele(coveredvp); if (used_syncer) mutex_exit(&syncer_mutex); - simple_lock(&mp->mnt_slock); + mutex_enter(&mp->mnt_mutex); while (mp->mnt_wcnt > 0) { wakeup(mp); - ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt2", 0, &mp->mnt_slock); + mtsleep(&mp->mnt_wcnt, PVFS, "mntwcnt2", 0, &mp->mnt_mutex); } - simple_unlock(&mp->mnt_slock); + mutex_exit(&mp->mnt_mutex); vfs_hooks_unmount(mp); vfs_delref(mp->mnt_op); vfs_destroy(mp); @@ -3582,9 +3580,9 @@ sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) (error = kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) != 0) goto out; - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); revoke = (vp->v_usecount > 1 || (vp->v_iflag & (VI_ALIASED|VI_LAYER))); - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); if (revoke) VOP_REVOKE(vp, REVOKEALL); out: diff --git a/sys/kern/vfs_trans.c b/sys/kern/vfs_trans.c index aa00af5024cd..989229f037d6 100644 --- a/sys/kern/vfs_trans.c +++ b/sys/kern/vfs_trans.c @@ -1,4 +1,4 @@ -/* $NetBSD: vfs_trans.c,v 1.15 2007/12/02 13:56:16 hannken Exp $ */ +/* $NetBSD: vfs_trans.c,v 1.16 2008/01/02 11:48:57 ad Exp $ */ /*- * Copyright (c) 2007 The NetBSD Foundation, Inc. @@ -37,7 +37,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.15 2007/12/02 13:56:16 hannken Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.16 2008/01/02 11:48:57 ad Exp $"); /* * File system transaction operations. @@ -51,7 +51,6 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.15 2007/12/02 13:56:16 hannken Exp $ #include #include -#include #include #include #include @@ -63,6 +62,11 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.15 2007/12/02 13:56:16 hannken Exp $ #include #include +struct fscow_handler { + SLIST_ENTRY(fscow_handler) ch_list; + int (*ch_func)(void *, struct buf *, bool); + void *ch_arg; +}; struct fstrans_lwp_info { struct fstrans_lwp_info *fli_succ; struct mount *fli_mount; @@ -73,21 +77,17 @@ struct fstrans_mount_info { enum fstrans_state fmi_state; krwlock_t fmi_shared_lock; krwlock_t fmi_lazy_lock; + krwlock_t fmi_cow_lock; + SLIST_HEAD(, fscow_handler) fmi_cow_handler; }; static specificdata_key_t lwp_data_key; -static specificdata_key_t mount_data_key; -static specificdata_key_t mount_cow_key; static kmutex_t vfs_suspend_lock; /* Serialize suspensions. */ -static kmutex_t fstrans_init_lock; POOL_INIT(fstrans_pl, sizeof(struct fstrans_lwp_info), 0, 0, 0, "fstrans", NULL, IPL_NONE); static void fstrans_lwp_dtor(void *); -static void fstrans_mount_dtor(void *); -static void fscow_mount_dtor(void *); -static struct fstrans_mount_info *fstrans_mount_init(struct mount *); /* * Initialize @@ -99,13 +99,8 @@ fstrans_init(void) error = lwp_specific_key_create(&lwp_data_key, fstrans_lwp_dtor); KASSERT(error == 0); - error = mount_specific_key_create(&mount_data_key, fstrans_mount_dtor); - KASSERT(error == 0); - error = mount_specific_key_create(&mount_cow_key, fscow_mount_dtor); - KASSERT(error == 0); mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE); - mutex_init(&fstrans_init_lock, MUTEX_DEFAULT, IPL_NONE); } /* @@ -124,44 +119,47 @@ fstrans_lwp_dtor(void *arg) } } +/* + * Allocate mount state + */ +int +fstrans_mount(struct mount *mp) +{ + struct fstrans_mount_info *new; + + if ((new = kmem_alloc(sizeof(*new), KM_SLEEP)) == NULL) + return ENOMEM; + new->fmi_state = FSTRANS_NORMAL; + rw_init(&new->fmi_lazy_lock); + rw_init(&new->fmi_shared_lock); + SLIST_INIT(&new->fmi_cow_handler); + rw_init(&new->fmi_cow_lock); + + mp->mnt_transinfo = new; + mp->mnt_iflag |= IMNT_HAS_TRANS; + + return 0; +} + /* * Deallocate mount state */ -static void -fstrans_mount_dtor(void *arg) +void +fstrans_unmount(struct mount *mp) { - struct fstrans_mount_info *fmi = arg; + struct fstrans_mount_info *fmi; + + if ((fmi = mp->mnt_transinfo) == NULL) + return; KASSERT(fmi->fmi_state == FSTRANS_NORMAL); rw_destroy(&fmi->fmi_lazy_lock); rw_destroy(&fmi->fmi_shared_lock); - free(fmi, M_MOUNT); -} - -/* - * Create mount info for this mount - */ -static struct fstrans_mount_info * -fstrans_mount_init(struct mount *mp) -{ - struct fstrans_mount_info *new; - - mutex_enter(&fstrans_init_lock); - - if ((new = mount_getspecific(mp, mount_data_key)) != NULL) { - mutex_exit(&fstrans_init_lock); - return new; - } - - new = malloc(sizeof(*new), M_MOUNT, M_WAITOK); - new->fmi_state = FSTRANS_NORMAL; - rw_init(&new->fmi_lazy_lock); - rw_init(&new->fmi_shared_lock); - - mount_setspecific(mp, mount_data_key, new); - mutex_exit(&fstrans_init_lock); - - return new; + KASSERT(SLIST_EMPTY(&fmi->fmi_cow_handler)); + rw_destroy(&fmi->fmi_cow_lock); + kmem_free(fmi, sizeof(*fmi)); + mp->mnt_iflag &= ~IMNT_HAS_TRANS; + mp->mnt_transinfo = NULL; } /* @@ -209,8 +207,7 @@ _fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait) KASSERT(new_fli->fli_mount == NULL); KASSERT(new_fli->fli_count == 0); - if ((fmi = mount_getspecific(mp, mount_data_key)) == NULL) - fmi = fstrans_mount_init(mp); + fmi = mp->mnt_transinfo; if (lock_type == FSTRANS_LAZY) lock_p = &fmi->fmi_lazy_lock; @@ -255,7 +252,7 @@ fstrans_done(struct mount *mp) KASSERT(fli->fli_mount == mp); KASSERT(fli->fli_count == 0); fli->fli_mount = NULL; - fmi = mount_getspecific(mp, mount_data_key); + fmi = mp->mnt_transinfo; KASSERT(fmi != NULL); if (fli->fli_lock_type == FSTRANS_LAZY) rw_exit(&fmi->fmi_lazy_lock); @@ -296,8 +293,7 @@ fstrans_setstate(struct mount *mp, enum fstrans_state new_state) { struct fstrans_mount_info *fmi; - if ((fmi = mount_getspecific(mp, mount_data_key)) == NULL) - fmi = fstrans_mount_init(mp); + fmi = mp->mnt_transinfo; switch (new_state) { case FSTRANS_SUSPENDING: @@ -344,8 +340,7 @@ fstrans_getstate(struct mount *mp) { struct fstrans_mount_info *fmi; - if ((fmi = mount_getspecific(mp, mount_data_key)) == NULL) - return FSTRANS_NORMAL; + fmi = mp->mnt_transinfo; return fmi->fmi_state; } @@ -432,7 +427,7 @@ fstrans_print_mount(struct mount *mp, int verbose) { struct fstrans_mount_info *fmi; - fmi = mount_getspecific(mp, mount_data_key); + fmi = mp->mnt_transinfo; if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL)) return; @@ -483,76 +478,25 @@ fstrans_dump(int full) } #endif /* defined(DDB) */ - -struct fscow_handler { - SLIST_ENTRY(fscow_handler) ch_list; - int (*ch_func)(void *, struct buf *, bool); - void *ch_arg; -}; - -struct fscow_mount_info { - krwlock_t cmi_lock; - SLIST_HEAD(, fscow_handler) cmi_handler; -}; - -/* - * Deallocate mount state - */ -static void -fscow_mount_dtor(void *arg) -{ - struct fscow_mount_info *cmi = arg; - - KASSERT(SLIST_EMPTY(&cmi->cmi_handler)); - rw_destroy(&cmi->cmi_lock); - kmem_free(cmi, sizeof(*cmi)); -} - -/* - * Create mount info for this mount - */ -static struct fscow_mount_info * -fscow_mount_init(struct mount *mp) -{ - struct fscow_mount_info *new; - - mutex_enter(&fstrans_init_lock); - - if ((new = mount_getspecific(mp, mount_cow_key)) != NULL) { - mutex_exit(&fstrans_init_lock); - return new; - } - - if ((new = kmem_alloc(sizeof(*new), KM_SLEEP)) != NULL) { - SLIST_INIT(&new->cmi_handler); - rw_init(&new->cmi_lock); - mount_setspecific(mp, mount_cow_key, new); - } - - mutex_exit(&fstrans_init_lock); - - return new; -} - int fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool), void *arg) { - struct fscow_mount_info *cmi; + struct fstrans_mount_info *fmi; struct fscow_handler *new; - if ((cmi = mount_getspecific(mp, mount_cow_key)) == NULL) - cmi = fscow_mount_init(mp); - if (cmi == NULL) - return ENOMEM; + if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0) + return EINVAL; + + fmi = mp->mnt_transinfo; if ((new = kmem_alloc(sizeof(*new), KM_SLEEP)) == NULL) return ENOMEM; new->ch_func = func; new->ch_arg = arg; - rw_enter(&cmi->cmi_lock, RW_WRITER); - SLIST_INSERT_HEAD(&cmi->cmi_handler, new, ch_list); - rw_exit(&cmi->cmi_lock); + rw_enter(&fmi->fmi_cow_lock, RW_WRITER); + SLIST_INSERT_HEAD(&fmi->fmi_cow_handler, new, ch_list); + rw_exit(&fmi->fmi_cow_lock); return 0; } @@ -561,21 +505,23 @@ int fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool), void *arg) { - struct fscow_mount_info *cmi; + struct fstrans_mount_info *fmi; struct fscow_handler *hp = NULL; - if ((cmi = mount_getspecific(mp, mount_cow_key)) == NULL) + if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0) return EINVAL; - rw_enter(&cmi->cmi_lock, RW_WRITER); - SLIST_FOREACH(hp, &cmi->cmi_handler, ch_list) + fmi = mp->mnt_transinfo; + + rw_enter(&fmi->fmi_cow_lock, RW_WRITER); + SLIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) if (hp->ch_func == func && hp->ch_arg == arg) break; if (hp != NULL) { - SLIST_REMOVE(&cmi->cmi_handler, hp, fscow_handler, ch_list); + SLIST_REMOVE(&fmi->fmi_cow_handler, hp, fscow_handler, ch_list); kmem_free(hp, sizeof(*hp)); } - rw_exit(&cmi->cmi_lock); + rw_exit(&fmi->fmi_cow_lock); return hp ? 0 : EINVAL; } @@ -585,10 +531,10 @@ fscow_run(struct buf *bp, bool data_valid) { int error = 0; struct mount *mp; - struct fscow_mount_info *cmi; + struct fstrans_mount_info *fmi; struct fscow_handler *hp; - if ((bp->b_flags & B_COWDONE)) + if ((bp->b_oflags & BO_COWDONE)) goto done; if (bp->b_vp == NULL) goto done; @@ -596,21 +542,23 @@ fscow_run(struct buf *bp, bool data_valid) mp = bp->b_vp->v_specmountpoint; else mp = bp->b_vp->v_mount; - if (mp == NULL) + if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0) goto done; - if ((cmi = mount_getspecific(mp, mount_cow_key)) == NULL) - goto done; + fmi = mp->mnt_transinfo; - rw_enter(&cmi->cmi_lock, RW_READER); - SLIST_FOREACH(hp, &cmi->cmi_handler, ch_list) + rw_enter(&fmi->fmi_cow_lock, RW_READER); + SLIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list) if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0) break; - rw_exit(&cmi->cmi_lock); + rw_exit(&fmi->fmi_cow_lock); -done: - if (error == 0) - bp->b_flags |= B_COWDONE; + done: + if (error == 0) { + mutex_enter(bp->b_objlock); + bp->b_oflags |= BO_COWDONE; + mutex_exit(bp->b_objlock); + } return error; } diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 67e2c87375c4..5738a12b5a2d 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: vfs_vnops.c,v 1.148 2007/12/08 19:29:50 pooka Exp $ */ +/* $NetBSD: vfs_vnops.c,v 1.149 2008/01/02 11:48:57 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1993 @@ -37,7 +37,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.148 2007/12/08 19:29:50 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.149 2008/01/02 11:48:57 ad Exp $"); #include "fs_union.h" #include "veriexec.h" @@ -194,9 +194,9 @@ vn_open(struct nameidata *ndp, int fmode, int cmode) if ((error = VOP_OPEN(vp, fmode, cred)) != 0) goto bad; if (fmode & FWRITE) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); vp->v_writecount++; - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); } bad: @@ -255,7 +255,7 @@ void vn_markexec(struct vnode *vp) { - LOCK_ASSERT(simple_lock_held(&vp->v_interlock)); + KASSERT(mutex_owned(&vp->v_interlock)); if ((vp->v_iflag & VI_EXECMAP) == 0) { atomic_add_int(&uvmexp.filepages, -vp->v_uobj.uo_npages); @@ -272,15 +272,15 @@ int vn_marktext(struct vnode *vp) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); if (vp->v_writecount != 0) { KASSERT((vp->v_iflag & VI_TEXT) == 0); - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); return (ETXTBSY); } vp->v_iflag |= VI_TEXT; vn_markexec(vp); - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); return (0); } @@ -294,7 +294,7 @@ vn_close(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l) { int error; - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); if (flags & FWRITE) vp->v_writecount--; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK); @@ -676,21 +676,20 @@ vn_lock(struct vnode *vp, int flags) || (vp->v_iflag & VI_ONWORKLST) != 0); #endif KASSERT((flags & - ~(LK_INTERLOCK|LK_SHARED|LK_EXCLUSIVE|LK_DRAIN|LK_NOWAIT|LK_RETRY| - LK_SETRECURSE|LK_CANRECURSE)) + ~(LK_INTERLOCK|LK_SHARED|LK_EXCLUSIVE|LK_NOWAIT|LK_RETRY| + LK_CANRECURSE)) == 0); do { if ((flags & LK_INTERLOCK) == 0) - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); if (vp->v_iflag & VI_XLOCK) { if (flags & LK_NOWAIT) { - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); return EBUSY; } - vp->v_iflag |= VI_XWANT; - ltsleep(vp, PINOD | PNORELOCK, - "vn_lock", 0, &vp->v_interlock); + vwait(vp, VI_XLOCK); + mutex_exit(&vp->v_interlock); error = ENOENT; } else { error = VOP_LOCK(vp, @@ -720,13 +719,13 @@ vn_closefile(struct file *fp, struct lwp *l) u_int vn_setrecurse(struct vnode *vp) { - struct lock *lkp = &vp->v_lock; + struct lock *lkp = vp->v_vnlock; u_int retval; - simple_lock(&lkp->lk_interlock); + mutex_enter(&lkp->lk_interlock); retval = lkp->lk_flags & LK_CANRECURSE; lkp->lk_flags |= LK_CANRECURSE; - simple_unlock(&lkp->lk_interlock); + mutex_exit(&lkp->lk_interlock); return retval; } @@ -737,12 +736,12 @@ vn_setrecurse(struct vnode *vp) void vn_restorerecurse(struct vnode *vp, u_int flags) { - struct lock *lkp = &vp->v_lock; + struct lock *lkp = vp->v_vnlock; - simple_lock(&lkp->lk_interlock); + mutex_enter(&lkp->lk_interlock); lkp->lk_flags &= ~LK_CANRECURSE; lkp->lk_flags |= flags; - simple_unlock(&lkp->lk_interlock); + mutex_exit(&lkp->lk_interlock); } /* @@ -842,23 +841,23 @@ vn_ra_allocctx(struct vnode *vp) { struct uvm_ractx *ra = NULL; + KASSERT(mutex_owned(&vp->v_interlock)); + if (vp->v_type != VREG) { return; } if (vp->v_ractx != NULL) { return; } - simple_lock(&vp->v_interlock); if (vp->v_ractx == NULL) { - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); ra = uvm_ra_allocctx(); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); if (ra != NULL && vp->v_ractx == NULL) { vp->v_ractx = ra; ra = NULL; } } - simple_unlock(&vp->v_interlock); if (ra != NULL) { uvm_ra_freectx(ra); } diff --git a/sys/kern/vnode_if.sh b/sys/kern/vnode_if.sh index 919364ed9cc3..b50cc0fadec7 100644 --- a/sys/kern/vnode_if.sh +++ b/sys/kern/vnode_if.sh @@ -29,7 +29,7 @@ copyright="\ * SUCH DAMAGE. */ " -SCRIPT_ID='$NetBSD: vnode_if.sh,v 1.47 2007/11/26 19:02:11 pooka Exp $' +SCRIPT_ID='$NetBSD: vnode_if.sh,v 1.48 2008/01/02 11:48:57 ad Exp $' # Script to produce VFS front-end sugar. # @@ -220,6 +220,8 @@ BEGIN { vop_offset = 1; # start at 1, to count the 'default' op printf("\n/* Special cases: */\n#include \n"); + printf("#ifndef _KERNEL\n#include \n#endif\n\n"); + argc=1; argtype[0]="struct buf *"; argname[0]="bp"; diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src index 5479cac10487..d70a0ec9be2a 100644 --- a/sys/kern/vnode_if.src +++ b/sys/kern/vnode_if.src @@ -1,4 +1,4 @@ -# $NetBSD: vnode_if.src,v 1.55 2007/11/26 19:02:11 pooka Exp $ +# $NetBSD: vnode_if.src,v 1.56 2008/01/02 11:48:58 ad Exp $ # # Copyright (c) 1992, 1993 # The Regents of the University of California. All rights reserved. @@ -373,6 +373,7 @@ vop_abortop { # vop_inactive { IN LOCKED=YES WILLUNLOCK struct vnode *vp; + INOUT bool *recycle; }; # diff --git a/sys/lib/libsa/ufs.c b/sys/lib/libsa/ufs.c index 055ed409d0b3..12061eff6cec 100644 --- a/sys/lib/libsa/ufs.c +++ b/sys/lib/libsa/ufs.c @@ -1,4 +1,4 @@ -/* $NetBSD: ufs.c,v 1.52 2007/12/01 17:44:16 tsutsui Exp $ */ +/* $NetBSD: ufs.c,v 1.53 2008/01/02 11:48:58 ad Exp $ */ /*- * Copyright (c) 1993 @@ -68,6 +68,7 @@ #include #ifdef LIBSA_LFS #include +#include #include /* XXX for MNAMELEN */ #include #else diff --git a/sys/miscfs/deadfs/dead_vnops.c b/sys/miscfs/deadfs/dead_vnops.c index 96fbb5bf1230..ff9f25c98712 100644 --- a/sys/miscfs/deadfs/dead_vnops.c +++ b/sys/miscfs/deadfs/dead_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: dead_vnops.c,v 1.45 2007/10/10 20:42:28 ad Exp $ */ +/* $NetBSD: dead_vnops.c,v 1.46 2008/01/02 11:48:58 ad Exp $ */ /* * Copyright (c) 1989, 1993 @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: dead_vnops.c,v 1.45 2007/10/10 20:42:28 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: dead_vnops.c,v 1.46 2008/01/02 11:48:58 ad Exp $"); #include #include @@ -300,7 +300,7 @@ dead_getpages(void *v) } */ *ap = v; if ((ap->a_flags & PGO_LOCKED) == 0) - simple_unlock(&ap->a_vp->v_interlock); + mutex_exit(&ap->a_vp->v_interlock); return (EFAULT); } @@ -317,13 +317,12 @@ chkvnlock(vp, interlock) int locked = 0; if (!interlock) - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); while (vp->v_iflag & VI_XLOCK) { - vp->v_iflag |= VI_XWANT; - (void) tsleep(vp, PINOD, "deadchk", 0); + vwait(vp, VI_XLOCK); locked = 1; } - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); return (locked); } diff --git a/sys/miscfs/fdesc/fdesc_vfsops.c b/sys/miscfs/fdesc/fdesc_vfsops.c index a2a140a0c721..8af010c5410e 100644 --- a/sys/miscfs/fdesc/fdesc_vfsops.c +++ b/sys/miscfs/fdesc/fdesc_vfsops.c @@ -1,4 +1,4 @@ -/* $NetBSD: fdesc_vfsops.c,v 1.71 2007/11/26 19:02:12 pooka Exp $ */ +/* $NetBSD: fdesc_vfsops.c,v 1.72 2008/01/02 11:48:58 ad Exp $ */ /* * Copyright (c) 1992, 1993, 1995 @@ -41,7 +41,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: fdesc_vfsops.c,v 1.71 2007/11/26 19:02:12 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: fdesc_vfsops.c,v 1.72 2008/01/02 11:48:58 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_compat_netbsd.h" @@ -128,11 +128,7 @@ fdesc_unmount(struct mount *mp, int mntflags) return (error); /* - * Release reference on underlying root vnode - */ - vrele(rtvp); - /* - * And blow it away for future re-use + * Blow it away for future re-use */ vgone(rtvp); /* diff --git a/sys/miscfs/genfs/genfs_io.c b/sys/miscfs/genfs/genfs_io.c index 13da88a4d761..802ec58e1c76 100644 --- a/sys/miscfs/genfs/genfs_io.c +++ b/sys/miscfs/genfs/genfs_io.c @@ -1,4 +1,4 @@ -/* $NetBSD: genfs_io.c,v 1.1 2007/10/17 16:45:00 pooka Exp $ */ +/* $NetBSD: genfs_io.c,v 1.2 2008/01/02 11:48:59 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1993 @@ -31,7 +31,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.1 2007/10/17 16:45:00 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.2 2008/01/02 11:48:59 ad Exp $"); #include #include @@ -81,9 +81,9 @@ genfs_rel_pages(struct vm_page **pgs, int npages) pg->flags |= PG_RELEASED; } } - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); uvm_page_unbusy(pgs, npages); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); } /* @@ -178,7 +178,7 @@ startover: if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= memeof) { if ((flags & PGO_LOCKED) == 0) { - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); } UVMHIST_LOG(ubchist, "off 0x%x count %d goes past EOF 0x%x", origoffset, *ap->a_count, memeof,0); @@ -255,7 +255,7 @@ startover: error = (ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0); goto out_err; } - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); /* * find the requested pages and make some simple checks. @@ -312,7 +312,7 @@ startover: } else { rw_enter(&gp->g_glock, RW_READER); } - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); if (vp->v_size < origvsize) { rw_exit(&gp->g_glock); if (pgs != pgs_onstack) @@ -325,7 +325,7 @@ startover: rw_exit(&gp->g_glock); KASSERT(async != 0); genfs_rel_pages(&pgs[ridx], orignpages); - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); error = EBUSY; goto out_err; } @@ -392,12 +392,12 @@ startover: rw_exit(&gp->g_glock); KASSERT(async != 0); genfs_rel_pages(pgs, npages); - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); error = EBUSY; goto out_err; } } - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); /* * read the desired page(s). @@ -411,13 +411,18 @@ startover: kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK); - mbp = getiobuf(); + mbp = getiobuf(vp, true); mbp->b_bufsize = totalbytes; mbp->b_data = (void *)kva; mbp->b_resid = mbp->b_bcount = bytes; - mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL|B_ASYNC : 0); - mbp->b_iodone = (async ? uvm_aio_biodone : 0); - mbp->b_vp = vp; + mbp->b_cflags = BC_BUSY; + if (async) { + mbp->b_flags = B_READ | B_ASYNC; + mbp->b_iodone = uvm_aio_biodone; + } else { + mbp->b_flags = B_READ; + mbp->b_iodone = NULL; + } if (async) BIO_SETPRIO(mbp, BPRIO_TIMELIMITED); else @@ -545,7 +550,7 @@ startover: if (offset == startoffset && iobytes == bytes) { bp = mbp; } else { - bp = getiobuf(); + bp = getiobuf(vp, true); nestiobuf_setup(mbp, bp, offset - startoffset, iobytes); } bp->b_lblkno = 0; @@ -600,7 +605,7 @@ loopdone: } } rw_exit(&gp->g_glock); - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); /* * we're almost done! release the pages... @@ -620,10 +625,10 @@ loopdone: pgs[i]->flags |= PG_RELEASED; } } - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); uvm_page_unbusy(pgs, npages); - uvm_unlock_pageq(); - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uvm_pageqlock); + mutex_exit(&uobj->vmobjlock); UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0); goto out_err; } @@ -631,7 +636,7 @@ loopdone: out: UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0); error = 0; - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); for (i = 0; i < npages; i++) { pg = pgs[i]; if (pg == NULL) { @@ -663,8 +668,8 @@ out: UVM_PAGE_OWN(pg, NULL); } } - uvm_unlock_pageq(); - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uvm_pageqlock); + mutex_exit(&uobj->vmobjlock); if (ap->a_m != NULL) { memcpy(ap->a_m, &pgs[ridx], orignpages * sizeof(struct vm_page *)); @@ -745,11 +750,11 @@ genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags, struct vm_page **busypg) { struct uvm_object *uobj = &vp->v_uobj; - struct simplelock *slock = &uobj->vmobjlock; + kmutex_t *slock = &uobj->vmobjlock; off_t off; /* Even for strange MAXPHYS, the shift rounds down to a page */ #define maxpages (MAXPHYS >> PAGE_SHIFT) - int i, s, error, npages, nback; + int i, error, npages, nback; int freeflag; struct vm_page *pgs[maxpages], *pg, *nextpg, *tpg, curmp, endmp; bool wasclean, by_list, needs_clean, yld; @@ -774,14 +779,12 @@ genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags, KASSERT((vp->v_iflag & VI_ONWORKLST) != 0 || (vp->v_iflag & VI_WRMAPDIRTY) == 0); if (uobj->uo_npages == 0) { - s = splbio(); if (vp->v_iflag & VI_ONWORKLST) { vp->v_iflag &= ~VI_WRMAPDIRTY; if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL) vn_syncer_remove_from_worklist(vp); } - splx(s); - simple_unlock(slock); + mutex_exit(slock); return (0); } @@ -790,7 +793,7 @@ genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags, */ if ((flags & PGO_CLEANIT) != 0) { - simple_unlock(slock); + mutex_exit(slock); if (pagedaemon) { error = fstrans_start_nowait(vp->v_mount, FSTRANS_LAZY); if (error) @@ -798,15 +801,11 @@ genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags, } else fstrans_start(vp->v_mount, FSTRANS_LAZY); has_trans = true; - simple_lock(slock); + mutex_enter(slock); } error = 0; - s = splbio(); - simple_lock(&global_v_numoutput_slock); wasclean = (vp->v_numoutput == 0); - simple_unlock(&global_v_numoutput_slock); - splx(s); off = startoff; if (endoff == 0 || flags & PGO_ALLPAGES) { endoff = trunc_page(LLONG_MAX); @@ -917,13 +916,13 @@ genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags, TAILQ_NEXT(&curmp, listq), 0,0,0); } if (yld) { - simple_unlock(slock); + mutex_exit(slock); preempt(); - simple_lock(slock); + mutex_enter(slock); } else { pg->flags |= PG_WANTED; UVM_UNLOCK_AND_WAIT(pg, slock, 0, "genput", 0); - simple_lock(slock); + mutex_enter(slock); } if (by_list) { UVMHIST_LOG(ubchist, "after next %p", @@ -1035,7 +1034,7 @@ genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags, */ if (flags & (PGO_DEACTIVATE|PGO_FREE)) { - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); } for (i = 0; i < npages; i++) { tpg = pgs[i]; @@ -1052,7 +1051,7 @@ genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags, if (tpg->flags & PG_BUSY) { tpg->flags |= freeflag; if (pagedaemon) { - uvmexp.paging++; + uvm_pageout_start(1); uvm_pagedequeue(tpg); } } else { @@ -1071,7 +1070,7 @@ genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags, } } if (flags & (PGO_DEACTIVATE|PGO_FREE)) { - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); } if (needs_clean) { modified = true; @@ -1085,9 +1084,9 @@ genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags, TAILQ_INSERT_AFTER(&uobj->memq, pg, &curmp, listq); } - simple_unlock(slock); + mutex_exit(slock); error = GOP_WRITE(vp, pgs, npages, flags); - simple_lock(slock); + mutex_enter(slock); if (by_list) { pg = TAILQ_NEXT(&curmp, listq); TAILQ_REMOVE(&uobj->memq, &curmp, listq); @@ -1135,35 +1134,23 @@ genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags, * and we're doing sync i/o, wait for all writes to finish. */ - s = splbio(); if (cleanall && wasclean && gp->g_dirtygen == dirtygen && (vp->v_iflag & VI_ONWORKLST) != 0) { vp->v_iflag &= ~VI_WRMAPDIRTY; if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL) vn_syncer_remove_from_worklist(vp); } - splx(s); #if !defined(DEBUG) skip_scan: #endif /* !defined(DEBUG) */ - if (!wasclean && !async) { - s = splbio(); - /* - * XXX - we want simple_unlock(&global_v_numoutput_slock); - * but the slot in ltsleep() is taken! - * XXX - try to recover from missed wakeups with a timeout.. - * must think of something better. - */ - while (vp->v_numoutput != 0) { - vp->v_iflag |= VI_BWAIT; - UVM_UNLOCK_AND_WAIT(&vp->v_numoutput, slock, false, - "genput2", hz); - simple_lock(slock); - } - splx(s); + + /* Wait for output to complete. */ + if (!wasclean && !async && vp->v_numoutput != 0) { + while (vp->v_numoutput != 0) + cv_wait(&vp->v_cv, slock); } - simple_unlock(slock); + mutex_exit(slock); if (has_trans) fstrans_done(vp->v_mount); @@ -1235,21 +1222,24 @@ genfs_do_io(struct vnode *vp, off_t off, vaddr_t kva, size_t len, int flags, KASSERT(bytes != 0); if (write) { - s = splbio(); - simple_lock(&global_v_numoutput_slock); + mutex_enter(&vp->v_interlock); vp->v_numoutput += 2; - simple_unlock(&global_v_numoutput_slock); - splx(s); + mutex_exit(&vp->v_interlock); } - mbp = getiobuf(); + mbp = getiobuf(vp, true); UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x", vp, mbp, vp->v_numoutput, bytes); mbp->b_bufsize = len; mbp->b_data = (void *)kva; mbp->b_resid = mbp->b_bcount = bytes; - mbp->b_flags = B_BUSY | brw | B_AGE | (async ? (B_CALL | B_ASYNC) : 0); - mbp->b_iodone = iodone; - mbp->b_vp = vp; + mbp->b_cflags = BC_BUSY | BC_AGE; + if (async) { + mbp->b_flags = brw | B_ASYNC; + mbp->b_iodone = iodone; + } else { + mbp->b_flags = brw; + mbp->b_iodone = NULL; + } if (curlwp == uvm.pagedaemon_lwp) BIO_SETPRIO(mbp, BPRIO_TIMELIMITED); else if (async) @@ -1287,7 +1277,7 @@ genfs_do_io(struct vnode *vp, off_t off, vaddr_t kva, size_t len, int flags, } else { UVMHIST_LOG(ubchist, "vp %p bp %p num now %d", vp, bp, vp->v_numoutput, 0); - bp = getiobuf(); + bp = getiobuf(vp, true); nestiobuf_setup(mbp, bp, offset - startoffset, iobytes); } bp->b_lblkno = 0; @@ -1334,7 +1324,7 @@ genfs_null_putpages(void *v) struct vnode *vp = ap->a_vp; KASSERT(vp->v_uobj.uo_npages == 0); - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); return (0); } @@ -1378,16 +1368,16 @@ genfs_compat_getpages(void *v) return (ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0); } if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= vp->v_size) { - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); return (EINVAL); } if ((ap->a_flags & PGO_SYNCIO) == 0) { - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); return 0; } npages = orignpages; uvn_findpages(uobj, origoffset, &npages, pgs, UFP_ALL); - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK); for (i = 0; i < npages; i++) { @@ -1413,8 +1403,8 @@ genfs_compat_getpages(void *v) } } uvm_pagermapout(kva, npages); - simple_lock(&uobj->vmobjlock); - uvm_lock_pageq(); + mutex_enter(&uobj->vmobjlock); + mutex_enter(&uvm_pageqlock); for (i = 0; i < npages; i++) { pg = pgs[i]; if (error && (pg->flags & PG_FAKE) != 0) { @@ -1427,8 +1417,8 @@ genfs_compat_getpages(void *v) if (error) { uvm_page_unbusy(pgs, npages); } - uvm_unlock_pageq(); - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uvm_pageqlock); + mutex_exit(&uobj->vmobjlock); return (error); } @@ -1442,7 +1432,7 @@ genfs_compat_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, kauth_cred_t cred = curlwp->l_cred; struct buf *bp; vaddr_t kva; - int s, error; + int error; offset = pgs[0]->offset; kva = uvm_pagermapin(pgs, npages, @@ -1459,13 +1449,12 @@ genfs_compat_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, /* XXX vn_lock */ error = VOP_WRITE(vp, &uio, 0, cred); - s = splbio(); - V_INCR_NUMOUTPUT(vp); - splx(s); + mutex_enter(&vp->v_interlock); + vp->v_numoutput++; + mutex_exit(&vp->v_interlock); - bp = getiobuf(); - bp->b_flags = B_BUSY | B_WRITE | B_AGE; - bp->b_vp = vp; + bp = getiobuf(vp, true); + bp->b_cflags = BC_BUSY | BC_AGE; bp->b_lblkno = offset >> vp->v_mount->mnt_fs_bshift; bp->b_data = (char *)kva; bp->b_bcount = npages << PAGE_SHIFT; @@ -1568,15 +1557,14 @@ genfs_directio(struct vnode *vp, struct uio *uio, int ioflag) static void genfs_dio_iodone(struct buf *bp) { - int s; KASSERT((bp->b_flags & B_ASYNC) == 0); - s = splbio(); - if ((bp->b_flags & (B_READ | B_AGE)) == B_AGE) { + if ((bp->b_flags & B_READ) == 0 && (bp->b_cflags & BC_AGE) != 0) { + mutex_enter(bp->b_objlock); vwakeup(bp); + mutex_exit(bp->b_objlock); } putiobuf(bp); - splx(s); } /* @@ -1631,7 +1619,7 @@ genfs_do_directio(struct vmspace *vs, vaddr_t uva, size_t len, struct vnode *vp, spoff = trunc_page(off); epoff = round_page(off + len); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); error = VOP_PUTPAGES(vp, spoff, epoff, pgoflags); if (error) { return error; @@ -1683,3 +1671,4 @@ genfs_do_directio(struct vmspace *vs, vaddr_t uva, size_t len, struct vnode *vp, uvm_vsunlock(vs, (void *)uva, len); return error; } + diff --git a/sys/miscfs/genfs/genfs_vnops.c b/sys/miscfs/genfs/genfs_vnops.c index a548160741d5..35f6791b4a2f 100644 --- a/sys/miscfs/genfs/genfs_vnops.c +++ b/sys/miscfs/genfs/genfs_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: genfs_vnops.c,v 1.159 2007/12/05 17:19:59 pooka Exp $ */ +/* $NetBSD: genfs_vnops.c,v 1.160 2008/01/02 11:48:59 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1993 @@ -31,7 +31,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: genfs_vnops.c,v 1.159 2007/12/05 17:19:59 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: genfs_vnops.c,v 1.160 2008/01/02 11:48:59 ad Exp $"); #include #include @@ -219,56 +219,46 @@ genfs_revoke(void *v) struct vnode *a_vp; int a_flags; } */ *ap = v; - struct vnode *vp, *vq; - struct lwp *l = curlwp; /* XXX */ + struct vnode *vp, *vq, **vpp; + enum vtype type; + dev_t dev; #ifdef DIAGNOSTIC if ((ap->a_flags & REVOKEALL) == 0) panic("genfs_revoke: not revokeall"); #endif - vp = ap->a_vp; - simple_lock(&vp->v_interlock); - if (vp->v_iflag & VI_ALIASED) { - /* - * If a vgone (or vclean) is already in progress, - * wait until it is done and return. - */ - if (vp->v_iflag & VI_XLOCK) { - vp->v_iflag |= VI_XWANT; - ltsleep(vp, PINOD|PNORELOCK, "vop_revokeall", 0, - &vp->v_interlock); - return (0); - } - /* - * Ensure that vp will not be vgone'd while we - * are eliminating its aliases. - */ - vp->v_iflag |= VI_XLOCK; - simple_unlock(&vp->v_interlock); - while (vp->v_iflag & VI_ALIASED) { - simple_lock(&spechash_slock); - for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { - if (vq->v_rdev != vp->v_rdev || - vq->v_type != vp->v_type || vp == vq) - continue; - simple_unlock(&spechash_slock); - vgone(vq); - break; - } - if (vq == NULLVP) - simple_unlock(&spechash_slock); - } - /* - * Remove the lock so that vgone below will - * really eliminate the vnode after which time - * vgone will awaken any sleepers. - */ - simple_lock(&vp->v_interlock); - vp->v_iflag &= ~VI_XLOCK; + mutex_enter(&vp->v_interlock); + if ((vp->v_iflag & VI_CLEAN) != 0) { + mutex_exit(&vp->v_interlock); + return (0); + } else { + dev = vp->v_rdev; + type = vp->v_type; + mutex_exit(&vp->v_interlock); } - vgonel(vp, l); + + if (type != VBLK && type != VCHR) + return (0); + + vpp = &speclisth[SPECHASH(dev)]; + mutex_enter(&spechash_lock); + for (vq = *vpp; vq != NULL;) { + if (vq->v_rdev != dev || vq->v_type != type) { + vq = vq->v_specnext; + continue; + } + mutex_enter(&vq->v_interlock); + mutex_exit(&spechash_lock); + vq->v_usecount++; + vclean(vq, DOCLOSE); + vrelel(vq, 1, 0); + mutex_enter(&spechash_lock); + vq = *vpp; + } + mutex_exit(&spechash_lock); + return (0); } @@ -334,7 +324,7 @@ genfs_nolock(void *v) * the interlock here. */ if (ap->a_flags & LK_INTERLOCK) - simple_unlock(&ap->a_vp->v_interlock); + mutex_exit(&ap->a_vp->v_interlock); return (0); } diff --git a/sys/miscfs/genfs/layer_subr.c b/sys/miscfs/genfs/layer_subr.c index 39b55aef4ca7..75bf9c017dc1 100644 --- a/sys/miscfs/genfs/layer_subr.c +++ b/sys/miscfs/genfs/layer_subr.c @@ -1,4 +1,4 @@ -/* $NetBSD: layer_subr.c,v 1.22 2007/10/10 20:42:28 ad Exp $ */ +/* $NetBSD: layer_subr.c,v 1.23 2008/01/02 11:48:59 ad Exp $ */ /* * Copyright (c) 1999 National Aeronautics & Space Administration @@ -68,7 +68,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: layer_subr.c,v 1.22 2007/10/10 20:42:28 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: layer_subr.c,v 1.23 2008/01/02 11:48:59 ad Exp $"); #include #include @@ -77,7 +77,9 @@ __KERNEL_RCSID(0, "$NetBSD: layer_subr.c,v 1.22 2007/10/10 20:42:28 ad Exp $"); #include #include #include +#include #include + #include #include #include @@ -157,8 +159,9 @@ loop: * the layer vp's lock separately afterward, but only * if it does not share the lower vp's lock. */ + mutex_enter(&vp->v_interlock); mutex_exit(&lmp->layerm_hashlock); - error = vget(vp, 0); + error = vget(vp, LK_INTERLOCK); if (error) { mutex_enter(&lmp->layerm_hashlock); goto loop; @@ -189,15 +192,19 @@ layer_node_alloc(mp, lowervp, vpp) int error; extern int (**dead_vnodeop_p)(void *); - if ((error = getnewvnode(lmp->layerm_tag, mp, lmp->layerm_vnodeop_p, - &vp)) != 0) + error = getnewvnode(lmp->layerm_tag, mp, lmp->layerm_vnodeop_p, &vp); + if (error != 0) return (error); vp->v_type = lowervp->v_type; - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); vp->v_iflag |= VI_LAYER; - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); - xp = malloc(lmp->layerm_size, M_TEMP, M_WAITOK); + xp = kmem_alloc(lmp->layerm_size, KM_SLEEP); + if (xp == NULL) { + ungetnewvnode(vp); + return ENOMEM; + } if (vp->v_type == VBLK || vp->v_type == VCHR) { MALLOC(vp->v_specinfo, struct specinfo *, sizeof(struct specinfo), M_VNODE, M_WAITOK); @@ -206,6 +213,8 @@ layer_node_alloc(mp, lowervp, vpp) } vp->v_data = xp; + vp->v_vflag = (vp->v_vflag & ~VV_MPSAFE) | + (lowervp->v_vflag & VV_MPSAFE); xp->layer_vnode = vp; xp->layer_lowervp = lowervp; xp->layer_flags = 0; @@ -220,7 +229,7 @@ layer_node_alloc(mp, lowervp, vpp) *vpp = nvp; /* free the substructures we've allocated. */ - FREE(xp, M_TEMP); + kmem_free(xp, lmp->layerm_size); if (vp->v_type == VBLK || vp->v_type == VCHR) FREE(vp->v_specinfo, M_VNODE); diff --git a/sys/miscfs/genfs/layer_vnops.c b/sys/miscfs/genfs/layer_vnops.c index 61d9666f3044..ff59798b820d 100644 --- a/sys/miscfs/genfs/layer_vnops.c +++ b/sys/miscfs/genfs/layer_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: layer_vnops.c,v 1.33 2007/12/22 00:48:46 dyoung Exp $ */ +/* $NetBSD: layer_vnops.c,v 1.34 2008/01/02 11:49:00 ad Exp $ */ /* * Copyright (c) 1999 National Aeronautics & Space Administration @@ -232,7 +232,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: layer_vnops.c,v 1.33 2007/12/22 00:48:46 dyoung Exp $"); +__KERNEL_RCSID(0, "$NetBSD: layer_vnops.c,v 1.34 2008/01/02 11:49:00 ad Exp $"); #include #include @@ -241,7 +241,7 @@ __KERNEL_RCSID(0, "$NetBSD: layer_vnops.c,v 1.33 2007/12/22 00:48:46 dyoung Exp #include #include #include -#include +#include #include #include @@ -619,12 +619,7 @@ layer_lock(v) * going away doesn't mean the struct lock below us is. * LK_EXCLUSIVE is fine. */ - if ((flags & LK_TYPE_MASK) == LK_DRAIN) { - return(lockmgr(vp->v_vnlock, - (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE, - &vp->v_interlock)); - } else - return(lockmgr(vp->v_vnlock, flags, &vp->v_interlock)); + return (lockmgr(vp->v_vnlock, flags, &vp->v_interlock)); } else { /* * Ahh well. It would be nice if the fs we're over would @@ -634,19 +629,14 @@ layer_lock(v) * on "..", we have to lock the lower node, then lock our * node. Most of the time it won't matter that we lock our * node (as any locking would need the lower one locked - * first). But we can LK_DRAIN the upper lock as a step - * towards decomissioning it. + * first). */ lowervp = LAYERVPTOLOWERVP(vp); if (flags & LK_INTERLOCK) { - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); flags &= ~LK_INTERLOCK; } - if ((flags & LK_TYPE_MASK) == LK_DRAIN) { - error = VOP_LOCK(lowervp, - (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE); - } else - error = VOP_LOCK(lowervp, flags); + error = VOP_LOCK(lowervp, flags); if (error) return (error); if ((error = lockmgr(&vp->v_lock, flags, &vp->v_interlock))) { @@ -675,7 +665,7 @@ layer_unlock(v) &vp->v_interlock)); } else { if (flags & LK_INTERLOCK) { - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); flags &= ~LK_INTERLOCK; } VOP_UNLOCK(LAYERVPTOLOWERVP(vp), flags); @@ -740,10 +730,17 @@ layer_inactive(v) { struct vop_inactive_args /* { struct vnode *a_vp; - struct lwp *a_l; + bool *a_recycle; } */ *ap = v; struct vnode *vp = ap->a_vp; + /* + * ..., but don't cache the device node. Also, if we did a + * remove, don't cache the node. + */ + *ap->a_recycle = (vp->v_type == VBLK || vp->v_type == VCHR + || (VTOLAYER(vp)->layer_flags & LAYERFS_REMOVED)); + /* * Do nothing (and _don't_ bypass). * Wait to vrele lowervp until reclaim, @@ -758,13 +755,6 @@ layer_inactive(v) */ VOP_UNLOCK(vp, 0); - /* - * ..., but don't cache the device node. Also, if we did a - * remove, don't cache the node. - */ - if (vp->v_type == VBLK || vp->v_type == VCHR - || (VTOLAYER(vp)->layer_flags & LAYERFS_REMOVED)) - vgone(vp); return (0); } @@ -861,9 +851,7 @@ layer_reclaim(v) /* * Note: in vop_reclaim, the node's struct lock has been * decomissioned, so we have to be careful about calling - * VOP's on ourself. Even if we turned a LK_DRAIN into an - * LK_EXCLUSIVE in layer_lock, we still must be careful as VXLOCK is - * set. + * VOP's on ourself. We must be careful as VXLOCK is set. */ /* After this assignment, this node will not be re-used. */ if ((vp == lmp->layerm_rootvp)) { @@ -879,9 +867,10 @@ layer_reclaim(v) mutex_enter(&lmp->layerm_hashlock); LIST_REMOVE(xp, layer_hash); mutex_exit(&lmp->layerm_hashlock); - FREE(vp->v_data, M_TEMP); + kmem_free(vp->v_data, lmp->layerm_size); vp->v_data = NULL; vrele(lowervp); + return (0); } @@ -971,8 +960,8 @@ layer_getpages(v) return EBUSY; } ap->a_vp = LAYERVPTOLOWERVP(vp); - simple_unlock(&vp->v_interlock); - simple_lock(&ap->a_vp->v_interlock); + mutex_exit(&vp->v_interlock); + mutex_enter(&ap->a_vp->v_interlock); error = VCALL(ap->a_vp, VOFFSET(vop_getpages), ap); return error; } @@ -995,11 +984,11 @@ layer_putpages(v) */ ap->a_vp = LAYERVPTOLOWERVP(vp); - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); if (ap->a_flags & PGO_RECLAIM) { return 0; } - simple_lock(&ap->a_vp->v_interlock); + mutex_enter(&ap->a_vp->v_interlock); error = VCALL(ap->a_vp, VOFFSET(vop_putpages), ap); return error; } diff --git a/sys/miscfs/kernfs/kernfs_subr.c b/sys/miscfs/kernfs/kernfs_subr.c index 4084fd88dc4d..82dc21085a02 100644 --- a/sys/miscfs/kernfs/kernfs_subr.c +++ b/sys/miscfs/kernfs/kernfs_subr.c @@ -1,4 +1,4 @@ -/* $NetBSD: kernfs_subr.c,v 1.13 2007/10/10 20:42:29 ad Exp $ */ +/* $NetBSD: kernfs_subr.c,v 1.14 2008/01/02 11:49:00 ad Exp $ */ /* * Copyright (c) 1993 @@ -73,7 +73,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kernfs_subr.c,v 1.13 2007/10/10 20:42:29 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kernfs_subr.c,v 1.14 2008/01/02 11:49:00 ad Exp $"); #ifdef _KERNEL_OPT #include "opt_ipsec.h" @@ -326,7 +326,7 @@ kernfs_hashget(type, mp, kt, value) vp = KERNFSTOV(pp); if (pp->kfs_type == type && vp->v_mount == mp && pp->kfs_kt == kt && pp->kfs_value == value) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); mutex_exit(&kfs_ihash_lock); if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) goto loop; diff --git a/sys/miscfs/kernfs/kernfs_vnops.c b/sys/miscfs/kernfs/kernfs_vnops.c index c29294824f4d..f04492d5cdca 100644 --- a/sys/miscfs/kernfs/kernfs_vnops.c +++ b/sys/miscfs/kernfs/kernfs_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: kernfs_vnops.c,v 1.133 2007/11/26 19:02:14 pooka Exp $ */ +/* $NetBSD: kernfs_vnops.c,v 1.134 2008/01/02 11:49:00 ad Exp $ */ /* * Copyright (c) 1992, 1993 @@ -39,7 +39,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kernfs_vnops.c,v 1.133 2007/11/26 19:02:14 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kernfs_vnops.c,v 1.134 2008/01/02 11:49:00 ad Exp $"); #ifdef _KERNEL_OPT #include "opt_ipsec.h" @@ -1407,6 +1407,7 @@ kernfs_inactive(v) { struct vop_inactive_args /* { struct vnode *a_vp; + bool *a_recycle; } */ *ap = v; struct vnode *vp = ap->a_vp; const struct kernfs_node *kfs = VTOKERN(ap->a_vp); @@ -1415,7 +1416,7 @@ kernfs_inactive(v) struct secpolicy *sp; #endif - VOP_UNLOCK(vp, 0); + *ap->a_recycle = false; switch (kfs->kfs_type) { #ifdef IPSEC case KFSipsecsa: @@ -1423,21 +1424,21 @@ kernfs_inactive(v) if (m) m_freem(m); else - vgone(vp); + *ap->a_recycle = true; break; case KFSipsecsp: sp = key_getspbyid(kfs->kfs_value); if (sp) key_freesp(sp); else { - /* should never happen as we hold a refcnt */ - vgone(vp); + *ap->a_recycle = true; } break; #endif default: break; } + VOP_UNLOCK(vp, 0); return (0); } diff --git a/sys/miscfs/nullfs/null_vfsops.c b/sys/miscfs/nullfs/null_vfsops.c index 953dd1296038..f5ced049b943 100644 --- a/sys/miscfs/nullfs/null_vfsops.c +++ b/sys/miscfs/nullfs/null_vfsops.c @@ -1,4 +1,4 @@ -/* $NetBSD: null_vfsops.c,v 1.71 2007/12/08 19:29:51 pooka Exp $ */ +/* $NetBSD: null_vfsops.c,v 1.72 2008/01/02 11:49:00 ad Exp $ */ /* * Copyright (c) 1999 National Aeronautics & Space Administration @@ -74,7 +74,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: null_vfsops.c,v 1.71 2007/12/08 19:29:51 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: null_vfsops.c,v 1.72 2008/01/02 11:49:00 ad Exp $"); #include #include @@ -183,18 +183,21 @@ nullfs_mount(mp, path, data, data_len) free(nmp, M_UFSMNT); /* XXX */ return (error); } - /* - * Unlock the node - */ - vp->v_vflag |= VV_ROOT; - VOP_UNLOCK(vp, 0); - /* * Keep a held reference to the root vnode. * It is vrele'd in nullfs_unmount. */ + vp->v_vflag |= VV_ROOT; nmp->nullm_rootvp = vp; + /* We don't need kernel_lock. */ + mp->mnt_iflag |= IMNT_MPSAFE; + + /* + * Unlock the node + */ + VOP_UNLOCK(vp, 0); + error = set_statvfs_info(path, UIO_USERSPACE, args->la.target, UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l); #ifdef NULLFS_DIAGNOSTIC @@ -231,12 +234,7 @@ nullfs_unmount(struct mount *mp, int mntflags) vprint("alias root of lower", null_rootvp); #endif /* - * Release reference on underlying root vnode - */ - vrele(null_rootvp); - - /* - * And blow it away for future re-use + * Blow it away for future re-use */ vgone(null_rootvp); diff --git a/sys/miscfs/overlay/overlay_vfsops.c b/sys/miscfs/overlay/overlay_vfsops.c index cc4b8650a2f9..d380c539d263 100644 --- a/sys/miscfs/overlay/overlay_vfsops.c +++ b/sys/miscfs/overlay/overlay_vfsops.c @@ -1,4 +1,4 @@ -/* $NetBSD: overlay_vfsops.c,v 1.46 2007/12/08 15:12:15 ad Exp $ */ +/* $NetBSD: overlay_vfsops.c,v 1.47 2008/01/02 11:49:01 ad Exp $ */ /* * Copyright (c) 1999, 2000 National Aeronautics & Space Administration @@ -74,7 +74,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: overlay_vfsops.c,v 1.46 2007/12/08 15:12:15 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: overlay_vfsops.c,v 1.47 2008/01/02 11:49:01 ad Exp $"); #include #include @@ -220,11 +220,7 @@ ov_unmount(struct mount *mp, int mntflags) vprint("alias root of lower", overlay_rootvp); #endif /* - * Release reference on underlying root vnode - */ - vrele(overlay_rootvp); - /* - * And blow it away for future re-use + * Blow it away for future re-use */ vgone(overlay_rootvp); /* diff --git a/sys/miscfs/portal/portal_vfsops.c b/sys/miscfs/portal/portal_vfsops.c index 4becf27fca2a..7ad19298eec7 100644 --- a/sys/miscfs/portal/portal_vfsops.c +++ b/sys/miscfs/portal/portal_vfsops.c @@ -1,4 +1,4 @@ -/* $NetBSD: portal_vfsops.c,v 1.68 2007/11/26 19:02:15 pooka Exp $ */ +/* $NetBSD: portal_vfsops.c,v 1.69 2008/01/02 11:49:01 ad Exp $ */ /* * Copyright (c) 1992, 1993, 1995 @@ -40,7 +40,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: portal_vfsops.c,v 1.68 2007/11/26 19:02:15 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: portal_vfsops.c,v 1.69 2008/01/02 11:49:01 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_compat_netbsd.h" @@ -179,11 +179,7 @@ portal_unmount(struct mount *mp, int mntflags) return (error); /* - * Release reference on underlying root vnode - */ - vrele(rtvp); - /* - * And blow it away for future re-use + * Blow it away for future re-use */ vgone(rtvp); /* diff --git a/sys/miscfs/procfs/procfs_subr.c b/sys/miscfs/procfs/procfs_subr.c index 3dfa4d23c951..cbc814af01c4 100644 --- a/sys/miscfs/procfs/procfs_subr.c +++ b/sys/miscfs/procfs/procfs_subr.c @@ -1,4 +1,4 @@ -/* $NetBSD: procfs_subr.c,v 1.82 2007/11/07 00:23:38 ad Exp $ */ +/* $NetBSD: procfs_subr.c,v 1.83 2008/01/02 11:49:01 ad Exp $ */ /*- * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc. @@ -109,7 +109,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: procfs_subr.c,v 1.82 2007/11/07 00:23:38 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: procfs_subr.c,v 1.83 2008/01/02 11:49:01 ad Exp $"); #include #include @@ -619,7 +619,7 @@ loop: if (flags == 0) { mutex_exit(&pfs_ihash_lock); } else { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); mutex_exit(&pfs_ihash_lock); if (vget(vp, flags | LK_INTERLOCK)) goto loop; @@ -679,17 +679,17 @@ procfs_revoke_vnodes(p, arg) for (pfs = LIST_FIRST(ppp); pfs; pfs = pnext) { vp = PFSTOV(pfs); pnext = LIST_NEXT(pfs, pfs_hash); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); if (vp->v_usecount > 0 && pfs->pfs_pid == p->p_pid && vp->v_mount == mp) { vp->v_usecount++; - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); mutex_exit(&pfs_ihash_lock); VOP_REVOKE(vp, REVOKEALL); vrele(vp); mutex_enter(&pfs_ihash_lock); } else { - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); } } mutex_exit(&pfs_ihash_lock); diff --git a/sys/miscfs/procfs/procfs_vnops.c b/sys/miscfs/procfs/procfs_vnops.c index 38b8130c209a..d8e9190fd5f3 100644 --- a/sys/miscfs/procfs/procfs_vnops.c +++ b/sys/miscfs/procfs/procfs_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: procfs_vnops.c,v 1.163 2007/11/26 19:02:16 pooka Exp $ */ +/* $NetBSD: procfs_vnops.c,v 1.164 2008/01/02 11:49:01 ad Exp $ */ /*- * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc. @@ -112,7 +112,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: procfs_vnops.c,v 1.163 2007/11/26 19:02:16 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: procfs_vnops.c,v 1.164 2008/01/02 11:49:01 ad Exp $"); #include #include @@ -424,17 +424,13 @@ procfs_inactive(v) } */ *ap = v; struct vnode *vp = ap->a_vp; struct pfsnode *pfs = VTOPFS(vp); - bool recycle; mutex_enter(&proclist_lock); - recycle = (p_find(pfs->pfs_pid, PFIND_LOCKED) == NULL); + *ap->a_recycle = (p_find(pfs->pfs_pid, PFIND_LOCKED) == NULL); mutex_exit(&proclist_lock); VOP_UNLOCK(vp, 0); - if (recycle) - vgone(vp); - return (0); } diff --git a/sys/miscfs/specfs/spec_vnops.c b/sys/miscfs/specfs/spec_vnops.c index cd899defadf9..ea4cd5e39efd 100644 --- a/sys/miscfs/specfs/spec_vnops.c +++ b/sys/miscfs/specfs/spec_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: spec_vnops.c,v 1.110 2007/12/02 13:56:18 hannken Exp $ */ +/* $NetBSD: spec_vnops.c,v 1.111 2008/01/02 11:49:02 ad Exp $ */ /* * Copyright (c) 1989, 1993 @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: spec_vnops.c,v 1.110 2007/12/02 13:56:18 hannken Exp $"); +__KERNEL_RCSID(0, "$NetBSD: spec_vnops.c,v 1.111 2008/01/02 11:49:02 ad Exp $"); #include #include @@ -417,11 +417,11 @@ spec_ioctl(void *v) vp = ap->a_vp; dev = NODEV; - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); if ((vp->v_iflag & VI_XLOCK) == 0 && vp->v_specinfo) { dev = vp->v_rdev; } - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); if (dev == NODEV) { return ENXIO; } @@ -460,11 +460,11 @@ spec_poll(void *v) vp = ap->a_vp; dev = NODEV; - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); if ((vp->v_iflag & VI_XLOCK) == 0 && vp->v_specinfo) { dev = vp->v_rdev; } - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); if (dev == NODEV) { return POLLERR; } diff --git a/sys/miscfs/syncfs/sync_subr.c b/sys/miscfs/syncfs/sync_subr.c index 40339351bdf9..14d85bf4625d 100644 --- a/sys/miscfs/syncfs/sync_subr.c +++ b/sys/miscfs/syncfs/sync_subr.c @@ -1,4 +1,4 @@ -/* $NetBSD: sync_subr.c,v 1.33 2007/12/08 15:47:32 ad Exp $ */ +/* $NetBSD: sync_subr.c,v 1.34 2008/01/02 11:49:02 ad Exp $ */ /* * Copyright 1997 Marshall Kirk McKusick. All Rights Reserved. @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: sync_subr.c,v 1.33 2007/12/08 15:47:32 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sync_subr.c,v 1.34 2008/01/02 11:49:02 ad Exp $"); #include #include @@ -86,7 +86,7 @@ vn_initialize_syncerd() TAILQ_INIT(&syncer_workitem_pending[i]); mutex_init(&syncer_mutex, MUTEX_DEFAULT, IPL_NONE); - mutex_init(&syncer_data_lock, MUTEX_DEFAULT, IPL_VM); /* XXX vmlocking */ + mutex_init(&syncer_data_lock, MUTEX_DEFAULT, IPL_NONE); cv_init(&syncer_cv, "syncer"); } @@ -140,7 +140,7 @@ vn_syncer_add1(vp, delayx) * position of the vnode. syncer_data_lock * does not protect v_iflag. */ - /* notyet KASSERT(mutex_owned(&vp->v_interlock)); */ + KASSERT(mutex_owned(&vp->v_interlock)); vp->v_iflag |= VI_ONWORKLST; } @@ -158,7 +158,7 @@ vn_syncer_add_to_worklist(vp, delayx) int delayx; { - /* notyet KASSERT(mutex_owned(&vp->v_interlock)); */ + KASSERT(mutex_owned(&vp->v_interlock)); mutex_enter(&syncer_data_lock); vn_syncer_add1(vp, delayx); @@ -174,7 +174,7 @@ vn_syncer_remove_from_worklist(vp) { struct synclist *slp; - /* not yet KASSERT(mutex_owned(&vp->v_interlock)); */ + KASSERT(mutex_owned(&vp->v_interlock)); mutex_enter(&syncer_data_lock); @@ -216,7 +216,7 @@ sched_sync(void *v) while ((vp = TAILQ_FIRST(slp)) != NULL) { /* We are locking in the wrong direction. */ - if (simple_lock_try(&vp->v_interlock)) { + if (mutex_tryenter(&vp->v_interlock)) { mutex_exit(&syncer_data_lock); if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK) == 0) { diff --git a/sys/miscfs/syncfs/sync_vnops.c b/sys/miscfs/syncfs/sync_vnops.c index 2356523e5936..453c921f07f1 100644 --- a/sys/miscfs/syncfs/sync_vnops.c +++ b/sys/miscfs/syncfs/sync_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: sync_vnops.c,v 1.19 2007/11/26 19:02:18 pooka Exp $ */ +/* $NetBSD: sync_vnops.c,v 1.20 2008/01/02 11:49:02 ad Exp $ */ /* * Copyright 1997 Marshall Kirk McKusick. All Rights Reserved. @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: sync_vnops.c,v 1.19 2007/11/26 19:02:18 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sync_vnops.c,v 1.20 2008/01/02 11:49:02 ad Exp $"); #include #include @@ -97,7 +97,9 @@ vfs_allocate_syncvnode(mp) } next = start; } + mutex_enter(&vp->v_interlock); vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); + mutex_exit(&vp->v_interlock); mp->mnt_syncer = vp; return (0); } @@ -113,9 +115,10 @@ vfs_deallocate_syncvnode(mp) vp = mp->mnt_syncer; mp->mnt_syncer = NULL; + mutex_enter(&vp->v_interlock); vn_syncer_remove_from_worklist(vp); vp->v_writecount = 0; - vrele(vp); + mutex_exit(&vp->v_interlock); vgone(vp); } @@ -146,7 +149,9 @@ sync_fsync(v) /* * Move ourselves to the back of the sync list. */ + mutex_enter(&syncvp->v_interlock); vn_syncer_add_to_worklist(syncvp, syncdelay); + mutex_exit(&syncvp->v_interlock); /* * Walk the list of vnodes pushing all that are dirty and diff --git a/sys/miscfs/umapfs/umap_vfsops.c b/sys/miscfs/umapfs/umap_vfsops.c index b4b87ef4000b..c0e3dc50a48b 100644 --- a/sys/miscfs/umapfs/umap_vfsops.c +++ b/sys/miscfs/umapfs/umap_vfsops.c @@ -1,4 +1,4 @@ -/* $NetBSD: umap_vfsops.c,v 1.73 2007/12/08 19:29:51 pooka Exp $ */ +/* $NetBSD: umap_vfsops.c,v 1.74 2008/01/02 11:49:02 ad Exp $ */ /* * Copyright (c) 1992, 1993 @@ -41,7 +41,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: umap_vfsops.c,v 1.73 2007/12/08 19:29:51 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: umap_vfsops.c,v 1.74 2008/01/02 11:49:02 ad Exp $"); #include #include @@ -256,18 +256,14 @@ umapfs_unmount(struct mount *mp, int mntflags) vprint("alias root of lower", rtvp); #endif /* - * Release reference on underlying root vnode - */ - vrele(rtvp); - /* - * And blow it away for future re-use + * Blow it away for future re-use */ vgone(rtvp); /* * Finally, throw away the umap_mount structure */ mutex_destroy(&->umapm_hashlock); - free(mp->mnt_data, M_UFSMNT); /* XXX */ + free(amp, M_UFSMNT); /* XXX */ mp->mnt_data = 0; return (0); } diff --git a/sys/netsmb/smb_iod.c b/sys/netsmb/smb_iod.c index 77cb7606edd1..bd38598f07aa 100644 --- a/sys/netsmb/smb_iod.c +++ b/sys/netsmb/smb_iod.c @@ -1,4 +1,4 @@ -/* $NetBSD: smb_iod.c,v 1.27 2007/07/09 21:11:15 ad Exp $ */ +/* $NetBSD: smb_iod.c,v 1.28 2008/01/02 11:49:02 ad Exp $ */ /* * Copyright (c) 2000-2001 Boris Popov @@ -35,7 +35,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: smb_iod.c,v 1.27 2007/07/09 21:11:15 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: smb_iod.c,v 1.28 2008/01/02 11:49:02 ad Exp $"); #include #include @@ -409,7 +409,7 @@ smb_iod_request(struct smbiod *iod, int event, void *ident) return 0; } smb_iod_wakeup(iod); - ltsleep(evp, PWAIT | PNORELOCK, "smbevw", 0, SMB_IOD_EVLOCKPTR(iod)); + mtsleep(evp, PWAIT | PNORELOCK, "smbevw", 0, SMB_IOD_EVLOCKPTR(iod)); error = evp->ev_error; free(evp, M_SMBIOD); return error; @@ -475,7 +475,7 @@ smb_iod_addrq(struct smb_rq *rqp) break; iod->iod_muxwant++; /* XXX use interruptible sleep? */ - ltsleep(&iod->iod_muxwant, PWAIT, "smbmux", + mtsleep(&iod->iod_muxwant, PWAIT, "smbmux", 0, SMB_IOD_RQLOCKPTR(iod)); } iod->iod_muxcnt++; @@ -501,7 +501,7 @@ smb_iod_removerq(struct smb_rq *rqp) SMB_IOD_RQLOCK(iod); while (rqp->sr_flags & SMBR_XLOCK) { rqp->sr_flags |= SMBR_XLOCKWANT; - ltsleep(rqp, PWAIT, "smbxrm", 0, SMB_IOD_RQLOCKPTR(iod)); + mtsleep(rqp, PWAIT, "smbxrm", 0, SMB_IOD_RQLOCKPTR(iod)); } SIMPLEQ_REMOVE(&iod->iod_rqlist, rqp, smb_rq, sr_link); iod->iod_muxcnt--; @@ -535,7 +535,7 @@ smb_iod_waitrq(struct smb_rq *rqp) SMBRQ_SLOCK(rqp); if (rqp->sr_rpgen == rqp->sr_rplast) { /* XXX interruptible sleep? */ - ltsleep(&rqp->sr_state, PWAIT, "smbwrq", 0, + mtsleep(&rqp->sr_state, PWAIT, "smbwrq", 0, SMBRQ_SLOCKPTR(rqp)); } rqp->sr_rplast++; diff --git a/sys/netsmb/smb_rq.c b/sys/netsmb/smb_rq.c index 5d80b52f945c..562c961336aa 100644 --- a/sys/netsmb/smb_rq.c +++ b/sys/netsmb/smb_rq.c @@ -1,4 +1,4 @@ -/* $NetBSD: smb_rq.c,v 1.28 2007/03/12 18:18:36 ad Exp $ */ +/* $NetBSD: smb_rq.c,v 1.29 2008/01/02 11:49:03 ad Exp $ */ /* * Copyright (c) 2000-2001, Boris Popov @@ -35,7 +35,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: smb_rq.c,v 1.28 2007/03/12 18:18:36 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: smb_rq.c,v 1.29 2008/01/02 11:49:03 ad Exp $"); #include #include @@ -206,7 +206,7 @@ smb_rq_enqueue(struct smb_rq *rqp) SMBS_ST_LOCK(ssp); if (ssp->ss_flags & SMBS_RECONNECTING) { SMBS_ST_UNLOCK(ssp); - error = ltsleep(&ssp->ss_vcgenid, + error = mtsleep(&ssp->ss_vcgenid, PWAIT | PCATCH | PNORELOCK, "smbtrcn", hz, SMBS_ST_LOCKPTR(ssp)); if (error && error != EWOULDBLOCK) diff --git a/sys/netsmb/smb_subr.h b/sys/netsmb/smb_subr.h index 591b808c7208..aae8d2f2ca7f 100644 --- a/sys/netsmb/smb_subr.h +++ b/sys/netsmb/smb_subr.h @@ -1,4 +1,4 @@ -/* $NetBSD: smb_subr.h,v 1.16 2006/08/17 17:11:29 christos Exp $ */ +/* $NetBSD: smb_subr.h,v 1.17 2008/01/02 11:49:03 ad Exp $ */ /* * Copyright (c) 2000-2001, Boris Popov @@ -79,11 +79,11 @@ void m_dumpm(struct mbuf *m); * Compatibility wrappers for simple locks */ -#define smb_slock simplelock -#define smb_sl_init(mtx, desc) simple_lock_init(mtx) -#define smb_sl_destroy(mtx) /*simple_lock_destroy(mtx)*/ -#define smb_sl_lock(mtx) simple_lock(mtx) -#define smb_sl_unlock(mtx) simple_unlock(mtx) +#define smb_slock kmutex +#define smb_sl_init(mtx, desc) mutex_init((mtx), MUTEX_DEFAULT, IPL_NONE) +#define smb_sl_destroy(mtx) mutex_destroy(mtx) +#define smb_sl_lock(mtx) mutex_enter(mtx) +#define smb_sl_unlock(mtx) mutex_exit(mtx) #define SMB_STRFREE(p) do { if (p) smb_strfree(p); } while(0) diff --git a/sys/nfs/nfs_bio.c b/sys/nfs/nfs_bio.c index 9fd6863944c7..30bdaf4318d0 100644 --- a/sys/nfs/nfs_bio.c +++ b/sys/nfs/nfs_bio.c @@ -1,4 +1,4 @@ -/* $NetBSD: nfs_bio.c,v 1.171 2007/12/04 17:42:30 yamt Exp $ */ +/* $NetBSD: nfs_bio.c,v 1.172 2008/01/02 11:49:03 ad Exp $ */ /* * Copyright (c) 1989, 1993 @@ -35,7 +35,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: nfs_bio.c,v 1.171 2007/12/04 17:42:30 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: nfs_bio.c,v 1.172 2008/01/02 11:49:03 ad Exp $"); #include "opt_nfs.h" #include "opt_ddb.h" @@ -183,7 +183,7 @@ nfs_bioread(vp, uio, ioflag, cred, cflag) bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, l); if (!bp) return (EINTR); - if ((bp->b_flags & B_DONE) == 0) { + if ((bp->b_oflags & BO_DONE) == 0) { bp->b_flags |= B_READ; error = nfs_doio(bp); if (error) { @@ -227,7 +227,7 @@ diragain: bp = nfs_getcacheblk(vp, NFSDC_BLKNO(ndp), NFS_DIRBLKSIZ, l); if (!bp) return (EINTR); - if ((bp->b_flags & B_DONE) == 0) { + if ((bp->b_oflags & BO_DONE) == 0) { bp->b_flags |= B_READ; bp->b_dcookie = ndp->dc_blkcookie; error = nfs_doio(bp); @@ -393,7 +393,7 @@ diragain: rabp = nfs_getcacheblk(vp, NFSDC_BLKNO(nndp), NFS_DIRBLKSIZ, l); if (rabp) { - if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) { + if ((rabp->b_oflags & (BO_DONE | BO_DELWRI)) == 0) { rabp->b_dcookie = nndp->dc_cookie; rabp->b_flags |= (B_READ | B_ASYNC); if (nfs_asyncio(rabp)) { @@ -541,7 +541,7 @@ nfs_write(v) * backout size and free pages past eof. */ np->n_size = oldsize; - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); (void)VOP_PUTPAGES(vp, round_page(vp->v_size), 0, PGO_SYNCIO | PGO_FREE); } @@ -561,7 +561,7 @@ nfs_write(v) if ((oldoff & ~(nmp->nm_wsize - 1)) != (uio->uio_offset & ~(nmp->nm_wsize - 1))) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); error = VOP_PUTPAGES(vp, trunc_page(oldoff & ~(nmp->nm_wsize - 1)), round_page((uio->uio_offset + nmp->nm_wsize - 1) & @@ -571,7 +571,7 @@ nfs_write(v) if (wrotedata) VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0)); if (error == 0 && (ioflag & IO_SYNC) != 0) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); error = VOP_PUTPAGES(vp, trunc_page(origoff & ~(nmp->nm_wsize - 1)), round_page((uio->uio_offset + nmp->nm_wsize - 1) & @@ -624,27 +624,28 @@ nfs_vinvalbuf(vp, flags, cred, l, intrflg) { struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp = VFSTONFS(vp->v_mount); - int error = 0, slpflag, slptimeo; + int error = 0, slptimeo; + bool catch; if ((nmp->nm_flag & NFSMNT_INT) == 0) intrflg = 0; if (intrflg) { - slpflag = PCATCH; + catch = true; slptimeo = 2 * hz; } else { - slpflag = 0; + catch = false; slptimeo = 0; } /* * First wait for any other process doing a flush to complete. */ - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); while (np->n_flag & NFLUSHINPROG) { np->n_flag |= NFLUSHWANT; - error = ltsleep(&np->n_flag, PRIBIO + 2, "nfsvinval", + error = mtsleep(&np->n_flag, PRIBIO + 2, "nfsvinval", slptimeo, &vp->v_interlock); if (error && intrflg && nfs_sigintr(nmp, NULL, l)) { - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); return EINTR; } } @@ -653,8 +654,8 @@ nfs_vinvalbuf(vp, flags, cred, l, intrflg) * Now, flush as required. */ np->n_flag |= NFLUSHINPROG; - simple_unlock(&vp->v_interlock); - error = vinvalbuf(vp, flags, cred, l, slpflag, 0); + mutex_exit(&vp->v_interlock); + error = vinvalbuf(vp, flags, cred, l, catch, 0); while (error) { if (intrflg && nfs_sigintr(nmp, NULL, l)) { error = EINTR; @@ -662,7 +663,7 @@ nfs_vinvalbuf(vp, flags, cred, l, intrflg) } error = vinvalbuf(vp, flags, cred, l, 0, slptimeo); } - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); if (error == 0) np->n_flag &= ~NMODIFIED; np->n_flag &= ~NFLUSHINPROG; @@ -670,7 +671,7 @@ nfs_vinvalbuf(vp, flags, cred, l, intrflg) np->n_flag &= ~NFLUSHWANT; wakeup(&np->n_flag); } - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); return error; } @@ -918,9 +919,7 @@ nfs_doio_read(bp, uiop) printf("nfs_doio: type %x unexpected\n", vp->v_type); break; } - if (error) { - bp->b_error = error; - } + bp->b_error = error; return error; } @@ -968,7 +967,7 @@ again: /* * this page belongs to our object. */ - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); /* * write out the page stably if it's about to * be released because we can't resend it @@ -985,19 +984,19 @@ again: */ if ((pgs[i]->flags & PG_NEEDCOMMIT) == 0) needcommit = false; - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); } else { iomode = NFSV3WRITE_FILESYNC; needcommit = false; } } if (!needcommit && iomode == NFSV3WRITE_UNSTABLE) { - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); for (i = 0; i < npages; i++) { pgs[i]->flags |= PG_NEEDCOMMIT | PG_RDONLY; pmap_page_protect(pgs[i], VM_PROT_READ); } - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); pageprotected = true; /* pages can't be modified during i/o. */ } else pageprotected = false; @@ -1049,11 +1048,11 @@ again: * pages are now on stable storage. */ uiop->uio_resid = 0; - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); for (i = 0; i < npages; i++) { pgs[i]->flags &= ~(PG_NEEDCOMMIT | PG_RDONLY); } - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); return 0; } else if (error == NFSERR_STALEWRITEVERF) { nfs_clearcommit(vp->v_mount); @@ -1098,11 +1097,11 @@ again: * re-dirty pages so that they will be passed * to us later again. */ - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); for (i = 0; i < npages; i++) { pgs[i]->flags &= ~PG_CLEAN; } - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); } mutex_exit(&np->n_commitlock); } else @@ -1114,11 +1113,11 @@ again: mutex_enter(&np->n_commitlock); nfs_del_committed_range(vp, off, cnt); mutex_exit(&np->n_commitlock); - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); for (i = 0; i < npages; i++) { pgs[i]->flags &= ~(PG_NEEDCOMMIT | PG_RDONLY); } - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); } else { /* * we got an error. @@ -1165,9 +1164,7 @@ nfs_doio_phys(bp, uiop) nfs_clearcommit(bp->b_vp->v_mount); } } - if (error) { - bp->b_error = error; - } + bp->b_error = error; return error; } @@ -1274,7 +1271,7 @@ nfs_getpages(v) if (!write && (np->n_flag & NMODIFIED) == 0 && pgs != NULL) { if (!locked) { - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); } for (i = 0; i < npages; i++) { pg = pgs[i]; @@ -1284,7 +1281,7 @@ nfs_getpages(v) pg->flags |= PG_RDONLY; } if (!locked) { - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); } } if (!write) { @@ -1311,9 +1308,9 @@ nfs_getpages(v) * available and put back original pgs array. */ - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); uvm_page_unbusy(pgs, npages); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); *ap->a_count = 0; memcpy(pgs, opgs, npages * sizeof(struct vm_pages *)); @@ -1325,7 +1322,7 @@ nfs_getpages(v) } np->n_flag |= NMODIFIED; if (!locked) { - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); } for (i = 0; i < npages; i++) { pg = pgs[i]; @@ -1335,7 +1332,7 @@ nfs_getpages(v) pg->flags &= ~(PG_NEEDCOMMIT | PG_RDONLY); } if (!locked) { - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); } if (v3) { mutex_exit(&np->n_commitlock); diff --git a/sys/nfs/nfs_node.c b/sys/nfs/nfs_node.c index 65967245b50e..73ed3bdde487 100644 --- a/sys/nfs/nfs_node.c +++ b/sys/nfs/nfs_node.c @@ -1,4 +1,4 @@ -/* $NetBSD: nfs_node.c,v 1.96 2007/11/26 19:02:20 pooka Exp $ */ +/* $NetBSD: nfs_node.c,v 1.97 2008/01/02 11:49:03 ad Exp $ */ /* * Copyright (c) 1989, 1993 @@ -35,7 +35,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: nfs_node.c,v 1.96 2007/11/26 19:02:20 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: nfs_node.c,v 1.97 2008/01/02 11:49:03 ad Exp $"); #include "opt_nfs.h" @@ -224,12 +224,11 @@ nfs_inactive(v) { struct vop_inactive_args /* { struct vnode *a_vp; + bool *a_recycle; } */ *ap = v; struct nfsnode *np; struct sillyrename *sp; - struct lwp *l = curlwp; struct vnode *vp = ap->a_vp; - bool removed; np = VTONFS(vp); if (prtactive && vp->v_usecount != 0) @@ -240,8 +239,8 @@ nfs_inactive(v) } else sp = NULL; if (sp != NULL) - nfs_vinvalbuf(vp, 0, sp->s_cred, l, 1); - removed = (np->n_flag & NREMOVED) != 0; + nfs_vinvalbuf(vp, 0, sp->s_cred, curlwp, 1); + *ap->a_recycle = (np->n_flag & NREMOVED) != 0; np->n_flag &= (NMODIFIED | NFLUSHINPROG | NFLUSHWANT | NEOFVALID | NTRUNCDELAYED); @@ -251,10 +250,6 @@ nfs_inactive(v) VOP_UNLOCK(vp, 0); - /* XXXMP only kernel_lock protects vp */ - if (removed) - vrecycle(vp, NULL, l); - if (sp != NULL) { int error; diff --git a/sys/nfs/nfs_socket.c b/sys/nfs/nfs_socket.c index 65a96dd321d4..b463e4b66fe3 100644 --- a/sys/nfs/nfs_socket.c +++ b/sys/nfs/nfs_socket.c @@ -1,4 +1,4 @@ -/* $NetBSD: nfs_socket.c,v 1.165 2007/12/04 17:42:31 yamt Exp $ */ +/* $NetBSD: nfs_socket.c,v 1.166 2008/01/02 11:49:03 ad Exp $ */ /* * Copyright (c) 1989, 1991, 1993, 1995 @@ -39,7 +39,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: nfs_socket.c,v 1.165 2007/12/04 17:42:31 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: nfs_socket.c,v 1.166 2008/01/02 11:49:03 ad Exp $"); #include "fs_nfs.h" #include "opt_nfs.h" @@ -2242,7 +2242,9 @@ nfsrv_rcv(struct nfssvc_sock *slp) auio.uio_resid = 1000000000; /* not need to setup uio_vmspace */ flags = MSG_DONTWAIT; + KERNEL_LOCK(1, curlwp); error = (*so->so_receive)(so, &nam, &auio, &mp, NULL, &flags); + KERNEL_UNLOCK_ONE(curlwp); if (error || mp == NULL) { if (error == EWOULDBLOCK) setflags |= SLP_A_NEEDQ; @@ -2278,8 +2280,10 @@ nfsrv_rcv(struct nfssvc_sock *slp) auio.uio_resid = 1000000000; /* not need to setup uio_vmspace */ flags = MSG_DONTWAIT; + KERNEL_LOCK(1, curlwp); error = (*so->so_receive)(so, &nam, &auio, &mp, NULL, &flags); + KERNEL_UNLOCK_ONE(curlwp); if (mp) { if (nam) { m = nam; diff --git a/sys/nfs/nfs_subs.c b/sys/nfs/nfs_subs.c index 30e033ab2490..d5ddbb902808 100644 --- a/sys/nfs/nfs_subs.c +++ b/sys/nfs/nfs_subs.c @@ -1,4 +1,4 @@ -/* $NetBSD: nfs_subs.c,v 1.194 2007/12/08 19:29:51 pooka Exp $ */ +/* $NetBSD: nfs_subs.c,v 1.195 2008/01/02 11:49:04 ad Exp $ */ /* * Copyright (c) 1989, 1993 @@ -70,7 +70,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: nfs_subs.c,v 1.194 2007/12/08 19:29:51 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: nfs_subs.c,v 1.195 2008/01/02 11:49:04 ad Exp $"); #include "fs_nfs.h" #include "opt_nfs.h" @@ -1162,9 +1162,9 @@ nfs_dirhash(off) } #define _NFSDC_MTX(np) (&NFSTOV(np)->v_interlock) -#define NFSDC_LOCK(np) simple_lock(_NFSDC_MTX(np)) -#define NFSDC_UNLOCK(np) simple_unlock(_NFSDC_MTX(np)) -#define NFSDC_ASSERT_LOCKED(np) LOCK_ASSERT(simple_lock_held(_NFSDC_MTX(np))) +#define NFSDC_LOCK(np) mutex_enter(_NFSDC_MTX(np)) +#define NFSDC_UNLOCK(np) mutex_exit(_NFSDC_MTX(np)) +#define NFSDC_ASSERT_LOCKED(np) KASSERT(mutex_owned(_NFSDC_MTX(np))) void nfs_initdircache(vp) @@ -1689,7 +1689,6 @@ nfs_loadattrcache(vpp, fp, vaper, flags) vp->v_data = NULL; VOP_UNLOCK(vp, 0); vp->v_op = spec_vnodeop_p; - vrele(vp); vgone(vp); lockmgr(&nvp->v_lock, LK_EXCLUSIVE, &nvp->v_interlock); @@ -1772,7 +1771,7 @@ nfs_loadattrcache(vpp, fp, vaper, flags) np->n_flag |= NTRUNCDELAYED; } else { genfs_node_wrlock(vp); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); (void)VOP_PUTPAGES(vp, 0, 0, PGO_SYNCIO | PGO_CLEANIT | PGO_FREE | PGO_ALLPAGES); @@ -1849,7 +1848,7 @@ nfs_delayedtruncate(vp) if (np->n_flag & NTRUNCDELAYED) { np->n_flag &= ~NTRUNCDELAYED; genfs_node_wrlock(vp); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); (void)VOP_PUTPAGES(vp, 0, 0, PGO_SYNCIO | PGO_CLEANIT | PGO_FREE | PGO_ALLPAGES); uvm_vnp_setsize(vp, np->n_size); @@ -2652,7 +2651,7 @@ nfs_clearcommit(mp) struct nfsmount *nmp = VFSTONFS(mp); rw_enter(&nmp->nm_writeverflock, RW_WRITER); - + mutex_enter(&mntvnode_lock); TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { KASSERT(vp->v_mount == mp); if (vp->v_type != VREG) @@ -2662,12 +2661,13 @@ nfs_clearcommit(mp) np->n_pushedhi = 0; np->n_commitflags &= ~(NFS_COMMIT_PUSH_VALID | NFS_COMMIT_PUSHED_VALID); - simple_lock(&vp->v_uobj.vmobjlock); + mutex_enter(&vp->v_uobj.vmobjlock); TAILQ_FOREACH(pg, &vp->v_uobj.memq, listq) { pg->flags &= ~PG_NEEDCOMMIT; } - simple_unlock(&vp->v_uobj.vmobjlock); + mutex_exit(&vp->v_uobj.vmobjlock); } + mutex_exit(&mntvnode_lock); mutex_enter(&nmp->nm_lock); nmp->nm_iflag &= ~NFSMNT_STALEWRITEVERF; mutex_exit(&nmp->nm_lock); diff --git a/sys/nfs/nfs_syscalls.c b/sys/nfs/nfs_syscalls.c index bd05e5c5046b..adab0ae4e28b 100644 --- a/sys/nfs/nfs_syscalls.c +++ b/sys/nfs/nfs_syscalls.c @@ -1,4 +1,4 @@ -/* $NetBSD: nfs_syscalls.c,v 1.128 2007/12/20 23:03:14 dsl Exp $ */ +/* $NetBSD: nfs_syscalls.c,v 1.129 2008/01/02 11:49:04 ad Exp $ */ /* * Copyright (c) 1989, 1993 @@ -35,7 +35,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: nfs_syscalls.c,v 1.128 2007/12/20 23:03:14 dsl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: nfs_syscalls.c,v 1.129 2008/01/02 11:49:04 ad Exp $"); #include "fs_nfs.h" #include "opt_nfs.h" @@ -479,11 +479,13 @@ nfssvc_addsock(fp, mynam) slp->ns_aflags = SLP_A_NEEDQ; slp->ns_gflags = 0; slp->ns_sflags = 0; + KERNEL_LOCK(1, curlwp); s = splsoftnet(); so->so_upcallarg = (void *)slp; so->so_upcall = nfsrv_soupcall; so->so_rcv.sb_flags |= SB_UPCALL; splx(s); + KERNEL_UNLOCK_ONE(curlwp); nfsrv_wakenfsd(slp); return (0); } @@ -855,12 +857,14 @@ nfsrv_zapsock(slp) so = slp->ns_so; KASSERT(so != NULL); + KERNEL_LOCK(1, curlwp); s = splsoftnet(); so->so_upcall = NULL; so->so_upcallarg = NULL; so->so_rcv.sb_flags &= ~SB_UPCALL; splx(s); soshutdown(so, SHUT_RDWR); + KERNEL_UNLOCK_ONE(curlwp); if (slp->ns_nam) m_free(slp->ns_nam); @@ -1045,10 +1049,8 @@ nfssvc_iod(void *arg) struct nfs_iod *myiod; struct nfsmount *nmp; - KERNEL_LOCK(1, curlwp); myiod = kmem_alloc(sizeof(*myiod), KM_SLEEP); mutex_init(&myiod->nid_lock, MUTEX_DEFAULT, IPL_NONE); - KERNEL_UNLOCK_LAST(curlwp); cv_init(&myiod->nid_cv, "nfsiod"); myiod->nid_exiting = false; myiod->nid_mount = NULL; @@ -1123,10 +1125,8 @@ quit: mutex_exit(&myiod->nid_lock); cv_destroy(&myiod->nid_cv); - KERNEL_LOCK(1, curlwp); mutex_destroy(&myiod->nid_lock); kmem_free(myiod, sizeof(*myiod)); - KERNEL_UNLOCK_LAST(curlwp); kthread_exit(0); } @@ -1166,10 +1166,8 @@ nfs_set_niothreads(int newval) */ mutex_exit(&nfs_iodlist_lock); - KERNEL_LOCK(1, curlwp); error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, nfssvc_iod, NULL, NULL, "nfsio"); - KERNEL_UNLOCK_LAST(curlwp); mutex_enter(&nfs_iodlist_lock); if (error) { /* give up */ diff --git a/sys/nfs/nfs_vfsops.c b/sys/nfs/nfs_vfsops.c index 6eea244eeb18..676c173d444e 100644 --- a/sys/nfs/nfs_vfsops.c +++ b/sys/nfs/nfs_vfsops.c @@ -1,4 +1,4 @@ -/* $NetBSD: nfs_vfsops.c,v 1.188 2007/11/26 19:02:21 pooka Exp $ */ +/* $NetBSD: nfs_vfsops.c,v 1.189 2008/01/02 11:49:04 ad Exp $ */ /* * Copyright (c) 1989, 1993, 1995 @@ -35,7 +35,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: nfs_vfsops.c,v 1.188 2007/11/26 19:02:21 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: nfs_vfsops.c,v 1.189 2008/01/02 11:49:04 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_compat_netbsd.h" @@ -893,7 +893,6 @@ nfs_unmount(struct mount *mp, int mntflags) * There are two reference counts to get rid of here * (see comment in mountnfs()). */ - vrele(vp); vput(vp); vgone(vp); nfs_disconnect(nmp); @@ -945,37 +944,46 @@ nfs_sync(mp, waitfor, cred) int waitfor; kauth_cred_t cred; { - struct vnode *vp, *nvp; + struct vnode *vp, *mvp; int error, allerror = 0; /* * Force stale buffer cache information to be flushed. */ + if ((mvp = valloc(mp)) == NULL) + return (ENOMEM); loop: /* * NOTE: not using the TAILQ_FOREACH here since in this loop vgone() * and vclean() can be called indirectly */ - for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { - /* - * If the vnode that we are about to sync is no longer - * associated with this mount point, start over. - */ - if (vp->v_mount != mp) - goto loop; - nvp = TAILQ_NEXT(vp, v_mntvnodes); + mutex_enter(&mntvnode_lock); + for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) { + vmark(mvp, vp); + if (vp->v_mount != mp || vismarker(vp)) + continue; + mutex_enter(&vp->v_interlock); + /* XXX MNT_LAZY cannot be right? */ if (waitfor == MNT_LAZY || VOP_ISLOCKED(vp) || (LIST_EMPTY(&vp->v_dirtyblkhd) && - UVM_OBJ_IS_CLEAN(&vp->v_uobj))) + UVM_OBJ_IS_CLEAN(&vp->v_uobj))) { + mutex_exit(&vp->v_interlock); continue; - if (vget(vp, LK_EXCLUSIVE)) + } + mutex_exit(&mntvnode_lock); + if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { + (void)vunmark(mvp); goto loop; + } error = VOP_FSYNC(vp, cred, waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0); if (error) allerror = error; vput(vp); + mutex_enter(&mntvnode_lock); } + mutex_exit(&mntvnode_lock); + vfree(mvp); return (allerror); } diff --git a/sys/nfs/nfs_vnops.c b/sys/nfs/nfs_vnops.c index 282a69793891..07141498ae74 100644 --- a/sys/nfs/nfs_vnops.c +++ b/sys/nfs/nfs_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: nfs_vnops.c,v 1.262 2007/12/17 16:04:31 yamt Exp $ */ +/* $NetBSD: nfs_vnops.c,v 1.263 2008/01/02 11:49:04 ad Exp $ */ /* * Copyright (c) 1989, 1993 @@ -39,7 +39,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: nfs_vnops.c,v 1.262 2007/12/17 16:04:31 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: nfs_vnops.c,v 1.263 2008/01/02 11:49:04 ad Exp $"); #include "opt_inet.h" #include "opt_nfs.h" @@ -3284,7 +3284,7 @@ nfs_flush(struct vnode *vp, kauth_cred_t cred, int waitfor, struct lwp *l, int flushflags = PGO_ALLPAGES|PGO_CLEANIT|PGO_SYNCIO; UVMHIST_FUNC("nfs_flush"); UVMHIST_CALLED(ubchist); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); error = VOP_PUTPAGES(vp, 0, 0, flushflags); if (np->n_flag & NWRITEERR) { error = np->n_error; diff --git a/sys/rump/fs/lib/libp2k/p2k.c b/sys/rump/fs/lib/libp2k/p2k.c index b21da382d4f6..5f20ba799cff 100644 --- a/sys/rump/fs/lib/libp2k/p2k.c +++ b/sys/rump/fs/lib/libp2k/p2k.c @@ -1,4 +1,4 @@ -/* $NetBSD: p2k.c,v 1.32 2008/01/01 22:31:42 pooka Exp $ */ +/* $NetBSD: p2k.c,v 1.33 2008/01/02 11:49:05 ad Exp $ */ /* * Copyright (c) 2007 Antti Kantee. All Rights Reserved. @@ -683,11 +683,12 @@ int p2k_node_inactive(struct puffs_usermount *pu, void *opc) { struct vnode *vp = opc; + bool recycle; int rv; (void) RUMP_VOP_PUTPAGES(vp, 0, 0, PGO_ALLPAGES); VLE(vp); - rv = RUMP_VOP_INACTIVE(vp); + rv = RUMP_VOP_INACTIVE(vp, &recycle); if (vp->v_usecount == 0) puffs_setback(puffs_cc_getcc(pu), PUFFS_SETBACK_NOREF_N1); diff --git a/sys/rump/fs/lib/libukfs/ukfs.c b/sys/rump/fs/lib/libukfs/ukfs.c index 6aa218c2b15e..bab096f94459 100644 --- a/sys/rump/fs/lib/libukfs/ukfs.c +++ b/sys/rump/fs/lib/libukfs/ukfs.c @@ -1,4 +1,4 @@ -/* $NetBSD: ukfs.c,v 1.15 2007/11/26 19:02:23 pooka Exp $ */ +/* $NetBSD: ukfs.c,v 1.16 2008/01/02 11:49:05 ad Exp $ */ /* * Copyright (c) 2007 Antti Kantee. All Rights Reserved. @@ -155,6 +155,7 @@ ukfs_release(struct ukfs *fs, int dounmount) void ukfs_ll_recycle(struct vnode *vp) { + bool recycle; /* XXXXX */ if (vp == NULL || rump_vp_getref(vp)) @@ -162,7 +163,7 @@ ukfs_ll_recycle(struct vnode *vp) VLE(vp); RUMP_VOP_FSYNC(vp, NULL, 0, 0, 0); - RUMP_VOP_INACTIVE(vp); + RUMP_VOP_INACTIVE(vp, &recycle); rump_recyclenode(vp); rump_putnode(vp); } diff --git a/sys/rump/include/machine/intr.h b/sys/rump/include/machine/intr.h index 0bd30eddda83..4f073ac3f70b 100644 --- a/sys/rump/include/machine/intr.h +++ b/sys/rump/include/machine/intr.h @@ -1,4 +1,4 @@ -/* $NetBSD: intr.h,v 1.7 2007/12/03 15:34:33 ad Exp $ */ +/* $NetBSD: intr.h,v 1.8 2008/01/02 11:49:05 ad Exp $ */ /* * Copyright (c) 2007 Antti Kantee. All Rights Reserved. @@ -47,6 +47,7 @@ void rump_splx(int); #define splx(x) rump_splx(x) #define IPL_NONE 0 +#define IPL_SOFTBIO 0 #define IPL_SCHED 0 #define IPL_VM 0 diff --git a/sys/rump/librump/rumpkern/fstrans_stub.c b/sys/rump/librump/rumpkern/fstrans_stub.c index 1b0d2c409b46..77bd01e6318a 100644 --- a/sys/rump/librump/rumpkern/fstrans_stub.c +++ b/sys/rump/librump/rumpkern/fstrans_stub.c @@ -1,4 +1,4 @@ -/* $NetBSD: fstrans_stub.c,v 1.3 2007/12/02 18:24:34 hannken Exp $ */ +/* $NetBSD: fstrans_stub.c,v 1.4 2008/01/02 11:49:05 ad Exp $ */ /* * Copyright (c) 2007 Antti Kantee. All Rights Reserved. @@ -87,3 +87,16 @@ fscow_run(struct buf *bp, bool data_valid) return 0; } + +int +fstrans_mount(struct mount *mp) +{ + + return 0; +} + +void +fstrans_unmount(struct mount *mp) +{ + +} diff --git a/sys/rump/librump/rumpkern/genfs_io.c b/sys/rump/librump/rumpkern/genfs_io.c index 81c9320a6c9f..47d6ad601613 100644 --- a/sys/rump/librump/rumpkern/genfs_io.c +++ b/sys/rump/librump/rumpkern/genfs_io.c @@ -1,4 +1,4 @@ -/* $NetBSD: genfs_io.c,v 1.6 2007/11/07 18:59:18 pooka Exp $ */ +/* $NetBSD: genfs_io.c,v 1.7 2008/01/02 11:49:05 ad Exp $ */ /* * Copyright (c) 2007 Antti Kantee. All Rights Reserved. @@ -192,17 +192,17 @@ genfs_getpages(void *v) continue; } - bp = getiobuf(); + bp = getiobuf(vp, true); bp->b_data = tmpbuf + bufoff; bp->b_bcount = xfersize; bp->b_blkno = bn; bp->b_lblkno = 0; - bp->b_flags = B_READ | B_BUSY; - bp->b_vp = vp; + bp->b_flags = B_READ; + bp->b_cflags = BC_BUSY; if (async) { - bp->b_flags |= B_ASYNC | B_CALL; + bp->b_flags |= B_ASYNC; bp->b_iodone = uvm_aio_biodone; } @@ -374,7 +374,7 @@ genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags, if (bn == -1) continue; - bp = getiobuf(); + bp = getiobuf(vp, true); /* only write max what we are allowed to write */ bp->b_bcount = xfersize; @@ -394,11 +394,11 @@ genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags, bp->b_lblkno = 0; bp->b_blkno = bn + (((smallest+bufoff)&(bsize-1))>>DEV_BSHIFT); bp->b_data = databuf + bufoff; - bp->b_vp = vp; - bp->b_flags = B_WRITE | B_BUSY; - bp->b_iodone = uvm_aio_biodone; + bp->b_flags = B_WRITE; + bp->b_cflags |= BC_BUSY; + if (async) { - bp->b_flags |= B_CALL | B_ASYNC; + bp->b_flags |= B_ASYNC; bp->b_iodone = uvm_aio_biodone; } diff --git a/sys/rump/librump/rumpkern/intr.c b/sys/rump/librump/rumpkern/intr.c new file mode 100644 index 000000000000..45a8f73eb880 --- /dev/null +++ b/sys/rump/librump/rumpkern/intr.c @@ -0,0 +1,87 @@ +/* $NetBSD: intr.c,v 1.2 2008/01/02 11:49:06 ad Exp $ */ + +/*- + * Copyright (c) 2007 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Andrew Doran. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#include "rump.h" +#include "rumpuser.h" + +struct v_dodgy { + void (*func)(void *); + void *arg; +}; + +void * +softint_establish(u_int flags, void (*func)(void *), void *arg) +{ + struct v_dodgy *vd; + + vd = kmem_alloc(sizeof(*vd), KM_SLEEP); + if (vd != NULL) { + vd->func = func; + vd->arg = arg; + } + return vd; +} + +void +softint_disestablish(void *arg) +{ + + kmem_free(arg, sizeof(struct v_dodgy)); +} + +void +softint_schedule(void *arg) +{ + struct v_dodgy *vd; + + vd = arg; + (*(vd->func))(arg); +} + +bool +cpu_intr_p(void) +{ + + return false; +} diff --git a/sys/rump/librump/rumpkern/locks.c b/sys/rump/librump/rumpkern/locks.c index 21559aa6f25a..00819d826553 100644 --- a/sys/rump/librump/rumpkern/locks.c +++ b/sys/rump/librump/rumpkern/locks.c @@ -1,4 +1,4 @@ -/* $NetBSD: locks.c,v 1.6 2008/01/01 22:03:24 pooka Exp $ */ +/* $NetBSD: locks.c,v 1.7 2008/01/02 11:49:06 ad Exp $ */ /* * Copyright (c) 2007 Antti Kantee. All Rights Reserved. @@ -185,6 +185,14 @@ cv_wait_sig(kcondvar_t *cv, kmutex_t *mtx) return 0; } +int +cv_wait_sig(kcondvar_t *cv, kmutex_t *mtx) +{ + + rumpuser_cv_wait(RUMPCV(cv), mtx->kmtx_mtx); + return 0; +} + int cv_timedwait(kcondvar_t *cv, kmutex_t *mtx, int ticks) { @@ -203,6 +211,15 @@ cv_timedwait_sig(kcondvar_t *cv, kmutex_t *mtx, int ticks) return rumpuser_cv_timedwait(RUMPCV(cv), mtx->kmtx_mtx, ticks); } +int +cv_timedwait_sig(kcondvar_t *cv, kmutex_t *mtx, int ticks) +{ + extern int hz; + + KASSERT(hz == 100); + return rumpuser_cv_timedwait(RUMPCV(cv), mtx->kmtx_mtx, ticks); +} + void cv_signal(kcondvar_t *cv) { diff --git a/sys/rump/librump/rumpkern/ltsleep.c b/sys/rump/librump/rumpkern/ltsleep.c index 019a1a8629a0..19263f2c82cc 100644 --- a/sys/rump/librump/rumpkern/ltsleep.c +++ b/sys/rump/librump/rumpkern/ltsleep.c @@ -1,4 +1,4 @@ -/* $NetBSD: ltsleep.c,v 1.3 2007/11/07 18:59:18 pooka Exp $ */ +/* $NetBSD: ltsleep.c,v 1.4 2008/01/02 11:49:06 ad Exp $ */ /* * Copyright (c) 2007 Antti Kantee. All Rights Reserved. @@ -83,6 +83,44 @@ ltsleep(wchan_t ident, pri_t prio, const char *wmesg, int timo, return 0; } +int +mtsleep(wchan_t ident, pri_t prio, const char *wmesg, int timo, + kmutex_t *lock) +{ + struct ltsleeper lts; + int iplrecurse; + + lts.id = ident; + cv_init(<s.cv, NULL); + + mutex_enter(&sleepermtx); + LIST_INSERT_HEAD(&sleepers, <s, entries); + + /* release spl */ + iplrecurse = rumpuser_whatis_ipl(); + while (iplrecurse--) + rumpuser_rw_exit(&rumpspl); + + /* protected by sleepermtx */ + mutex_exit(lock); + cv_wait(<s.cv, &sleepermtx); + + /* retake ipl */ + iplrecurse = rumpuser_whatis_ipl(); + while (iplrecurse--) + rumpuser_rw_enter(&rumpspl, 0); + + LIST_REMOVE(<s, entries); + mutex_exit(&sleepermtx); + + cv_destroy(<s.cv); + + if ((prio & PNORELOCK) == 0) + mutex_enter(lock); + + return 0; +} + void wakeup(wchan_t ident) { diff --git a/sys/rump/librump/rumpkern/vfs.c b/sys/rump/librump/rumpkern/vfs.c index b646623b6db8..536021a9a461 100644 --- a/sys/rump/librump/rumpkern/vfs.c +++ b/sys/rump/librump/rumpkern/vfs.c @@ -1,4 +1,4 @@ -/* $NetBSD: vfs.c,v 1.21 2007/11/26 19:02:24 pooka Exp $ */ +/* $NetBSD: vfs.c,v 1.22 2008/01/02 11:49:06 ad Exp $ */ /* * Copyright (c) 2007 Antti Kantee. All Rights Reserved. @@ -70,6 +70,13 @@ const struct vnodeopv_entry_desc fifo_vnodeop_entries[] = { const struct vnodeopv_desc fifo_vnodeop_opv_desc = { &fifo_vnodeop_p, fifo_vnodeop_entries }; +struct vnode *speclisth[SPECHSZ]; + +void +vn_init1(void) +{ + +} int getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *), @@ -145,11 +152,24 @@ vrele(struct vnode *vp) } void -vrele2(struct vnode *vp, int onhead) +vrelel(struct vnode *vp, int doinactive, int onhead) { } +void +vrele2(struct vnode *vp, bool onhead) +{ + +} + +void +vfree(vnode_t *vp) +{ + + /* XXX */ +} + void vput(struct vnode *vp) { @@ -164,6 +184,13 @@ vgone(struct vnode *vp) vgonel(vp, curlwp); } +void +vclean(struct vnode *vp, int flag) +{ + + vgonel(vp, curlwp); +} + void vgonel(struct vnode *vp, struct lwp *l) { @@ -183,18 +210,19 @@ holdrelel(struct vnode *vp) } int -vrecycle(struct vnode *vp, struct simplelock *inter_lkp, struct lwp *l) +vrecycle(struct vnode *vp, kmutex_t *inter_lkp, struct lwp *l) { struct mount *mp = vp->v_mount; + bool recycle; if (vp->v_usecount == 1) { vp->v_usecount = 0; simple_lock(&vp->v_interlock); if (inter_lkp) - simple_unlock(inter_lkp); + mutex_exit(inter_lkp); VOP_LOCK(vp, LK_EXCLUSIVE | LK_INTERLOCK); vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0); - VOP_INACTIVE(vp); + VOP_INACTIVE(vp, &recycle); VOP_RECLAIM(vp); TAILQ_REMOVE(&mp->mnt_vnodelist, vp, v_mntvnodes); diff --git a/sys/rump/librump/rumpkern/vm.c b/sys/rump/librump/rumpkern/vm.c index 4921b9961dd0..28746e80f0a3 100644 --- a/sys/rump/librump/rumpkern/vm.c +++ b/sys/rump/librump/rumpkern/vm.c @@ -1,4 +1,4 @@ -/* $NetBSD: vm.c,v 1.24 2007/12/01 10:45:42 yamt Exp $ */ +/* $NetBSD: vm.c,v 1.25 2008/01/02 11:49:06 ad Exp $ */ /* * Copyright (c) 2007 Antti Kantee. All Rights Reserved. @@ -79,6 +79,8 @@ const struct uvm_pagerops aobj_pager = { .pgo_put = ao_put, }; +kmutex_t uvm_pageqlock; + struct uvmexp uvmexp; struct uvm uvm; @@ -423,6 +425,7 @@ rumpvm_init() mutex_init(&rvamtx, MUTEX_DEFAULT, 0); mutex_init(&uwinmtx, MUTEX_DEFAULT, 0); + mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0); } void @@ -531,7 +534,7 @@ void uvm_aio_aiodone(struct buf *bp) { - if ((bp->b_flags & (B_READ | B_NOCACHE)) == 0 && bioopsp) + if (((bp->b_flags | bp->b_cflags) & (B_READ | BC_NOCACHE)) == 0 && bioopsp) bioopsp->io_pageiodone(bp); } @@ -668,3 +671,27 @@ uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr, return (struct vm_map *)417416; } + +void +uvm_pageout_start(int npages) +{ + + uvmexp.paging += npages; +} + +void +uvm_pageout_done(int npages) +{ + + uvmexp.paging -= npages; + + /* + * wake up either of pagedaemon or LWPs waiting for it. + */ + + if (uvmexp.free <= uvmexp.reserve_kernel) { + wakeup(&uvm.pagedaemon); + } else { + wakeup(&uvmexp.free); + } +} diff --git a/sys/sys/buf.h b/sys/sys/buf.h index 7de2890da56b..2a5aab671ed5 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -1,12 +1,12 @@ -/* $NetBSD: buf.h,v 1.101 2007/12/24 15:11:19 ad Exp $ */ +/* $NetBSD: buf.h,v 1.102 2008/01/02 11:49:07 ad Exp $ */ /*- - * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc. + * Copyright (c) 1999, 2000, 2007 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, - * NASA Ames Research Center. + * NASA Ames Research Center, and by Andrew Doran. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -78,7 +78,8 @@ #include #include -#include +#include +#include #if defined(_KERNEL) #include #endif /* defined(_KERNEL) */ @@ -95,7 +96,6 @@ struct kauth_cred; */ LIST_HEAD(workhead, worklist); - /* * These are currently used only by the soft dependency code, hence * are stored once in a global variable. If other subsystems wanted @@ -113,40 +113,48 @@ struct bio_ops { void (*io_pageiodone)(struct buf *); }; +extern kmutex_t bufcache_lock; +extern kmutex_t buffer_lock; + /* * The buffer header describes an I/O operation in the kernel. + * + * Field markings and the corresponding locks: + * + * b owner (thread that holds BB_BUSY) and/or thread calling biodone() + * c bufcache_lock + * l b_objlock + * + * For buffers associated with a vnode, b_objlock points to vp->v_interlock. + * If not associated with a vnode, it points to the generic buffer_lock. */ struct buf { union { - TAILQ_ENTRY(buf) u_actq; /* Device driver queue when active. */ + TAILQ_ENTRY(buf) u_actq; #if defined(_KERNEL) /* u_work is smaller than u_actq. XXX */ struct work u_work; #endif /* defined(_KERNEL) */ - } b_u; + } b_u; /* b: device driver queue */ #define b_actq b_u.u_actq #define b_work b_u.u_work - struct simplelock b_interlock; /* Lock for b_flags changes */ - volatile int b_flags; /* B_* flags. */ - int b_error; /* Errno value. */ - int b_prio; /* Hint for buffer queue discipline. */ - int b_bufsize; /* Allocated buffer size. */ - int b_bcount; /* Valid bytes in buffer. */ - int b_resid; /* Remaining I/O. */ - dev_t b_dev; /* Device associated with buffer. */ - void *b_data; /* Memory, superblocks, indirect etc. */ - daddr_t b_blkno; /* Underlying physical block number - (partition relative) */ - daddr_t b_rawblkno; /* Raw underlying physical block - number (not partition relative) */ - /* Function to call upon completion. */ - void (*b_iodone)(struct buf *); - struct proc *b_proc; /* Associated proc if B_PHYS set. */ - struct vnode *b_vp; /* File vnode. */ - struct workhead b_dep; /* List of filesystem dependencies. */ - void *b_saveaddr; /* Original b_addr for physio. */ + void (*b_iodone)(struct buf *);/* b: call when done */ + int b_error; /* b: errno value. */ + int b_resid; /* b: remaining I/O. */ + u_int b_flags; /* b: B_* flags */ + int b_prio; /* b: priority for queue */ + int b_bufsize; /* b: allocated size */ + int b_bcount; /* b: valid bytes in buffer */ + dev_t b_dev; /* b: associated device */ + void *b_data; /* b: fs private data */ + daddr_t b_blkno; /* b: physical block number + (partition relative) */ + daddr_t b_rawblkno; /* b: raw physical block number + (volume relative) */ + struct proc *b_proc; /* b: proc if BB_PHYS */ + void *b_saveaddr; /* b: saved b_data for physio */ /* - * private data for owner. + * b: private data for owner. * - buffer cache buffers are owned by corresponding filesystem. * - non-buffer cache buffers are owned by subsystem which * allocated them. (filesystem, disk driver, etc) @@ -154,24 +162,21 @@ struct buf { void *b_private; off_t b_dcookie; /* NFS: Offset cookie if dir block */ - /* - * buffer cache specific data - */ - LIST_ENTRY(buf) b_hash; /* Hash chain. */ - LIST_ENTRY(buf) b_vnbufs; /* Buffer's associated vnode. */ - TAILQ_ENTRY(buf) b_freelist; /* Free list position if not active. */ - daddr_t b_lblkno; /* Logical block number. */ - int b_freelistindex; /* Free list index. (BQ_) */ -}; + kcondvar_t b_busy; /* c: threads waiting on buf */ + u_int b_refcnt; /* c: refcount for b_busy */ + struct workhead b_dep; /* c: softdep */ + LIST_ENTRY(buf) b_hash; /* c: hash chain */ + LIST_ENTRY(buf) b_vnbufs; /* c: associated vnode */ + TAILQ_ENTRY(buf) b_freelist; /* c: position if not active */ + daddr_t b_lblkno; /* c: logical block number */ + int b_freelistindex;/* c: free list index (BQ_) */ + u_int b_cflags; /* c: BC_* flags */ + struct vnode *b_vp; /* c: file vnode */ -#define BUF_INIT(bp) \ -do { \ - LIST_INIT(&(bp)->b_dep); \ - simple_lock_init(&(bp)->b_interlock); \ - (bp)->b_dev = NODEV; \ - (bp)->b_error = 0; \ - BIO_SETPRIO((bp), BPRIO_DEFAULT); \ -} while (/*CONSTCOND*/0) + kcondvar_t b_done; /* o: waiting on completion */ + u_int b_oflags; /* o: BO_* flags */ + kmutex_t *b_objlock; /* o: completion lock */ +}; /* * For portability with historic industry practice, the cylinder number has @@ -180,43 +185,39 @@ do { \ #define b_cylinder b_resid /* Cylinder number for disksort(). */ /* - * These flags are kept in b_flags. + * These flags are kept in b_cflags (owned by buffer cache). */ -#define B_AGE 0x00000001 /* Move to age queue when I/O done. */ +#define BC_AGE 0x00000001 /* Move to age queue when I/O done. */ +#define BC_BUSY 0x00000010 /* I/O in progress. */ +#define BC_SCANNED 0x00000020 /* Block already pushed during sync */ +#define BC_INVAL 0x00002000 /* Does not contain valid info. */ +#define BC_LOCKED 0x00004000 /* Locked in core (not reusable). */ +#define BC_NOCACHE 0x00008000 /* Do not cache block after use. */ +#define BC_WANTED 0x00800000 /* Process wants this buffer. */ +#define BC_VFLUSH 0x04000000 /* Buffer is being synced. */ + +/* + * These flags are kept in b_oflags (owned by associated object). + */ +#define BO_DELWRI 0x00000080 /* Delay I/O until buffer reused. */ +#define BO_DONE 0x00000200 /* I/O completed. */ +#define BO_COWDONE 0x00000400 /* Copy-on-write already done. */ + +/* + * These flags are kept in b_flags (owned by buffer holder). + */ +#define B_WRITE 0x00000000 /* Write buffer (pseudo flag). */ #define B_ASYNC 0x00000004 /* Start I/O, do not wait. */ -#define B_BAD 0x00000008 /* Bad block revectoring in progress. */ -#define B_BUSY 0x00000010 /* I/O in progress. */ -#define B_SCANNED 0x00000020 /* Block already pushed during sync */ -#define B_CALL 0x00000040 /* Call b_iodone from biodone. */ -#define B_DELWRI 0x00000080 /* Delay I/O until buffer reused. */ -#define B_DIRTY 0x00000100 /* Dirty page to be pushed out async. */ -#define B_DONE 0x00000200 /* I/O completed. */ -#define B_COWDONE 0x00000400 /* Copy-on-write already done. */ #define B_GATHERED 0x00001000 /* LFS: already in a segment. */ -#define B_INVAL 0x00002000 /* Does not contain valid info. */ -#define B_LOCKED 0x00004000 /* Locked in core (not reusable). */ -#define B_NOCACHE 0x00008000 /* Do not cache block after use. */ -#define B_CACHE 0x00020000 /* Bread found us in the cache. */ #define B_PHYS 0x00040000 /* I/O to user memory. */ #define B_RAW 0x00080000 /* Set by physio for raw transfers. */ #define B_READ 0x00100000 /* Read buffer. */ -#define B_TAPE 0x00200000 /* Magnetic tape I/O. */ -#define B_WANTED 0x00800000 /* Process wants this buffer. */ -#define B_WRITE 0x00000000 /* Write buffer (pseudo flag). */ -#define B_FSPRIVATE 0x01000000 /* File system private flag. */ #define B_DEVPRIVATE 0x02000000 /* Device driver private flag. */ -#define B_VFLUSH 0x04000000 /* Buffer is being synced. */ #define BUF_FLAGBITS \ - "\20\1AGE\3ASYNC\4BAD\5BUSY\6SCANNED\7CALL\10DELWRI" \ - "\11DIRTY\12DONE\13COWDONE\15GATHERED\16INVAL\17LOCKED\20NOCACHE" \ - "\22CACHE\23PHYS\24RAW\25READ\26TAPE\30WANTED\31FSPRIVATE\32DEVPRIVATE" \ - "\33VFLUSH" - -/* XXX Compat for vmlocking branch. */ -#define BC_AGE B_AGE -#define BC_INVAL B_INVAL -#define BC_NOCACHE B_NOCACHE + "\20\1AGE\3ASYNC\4BAD\5BUSY\6SCANNED\10DELWRI" \ + "\12DONE\13COWDONE\15GATHERED\16INVAL\17LOCKED\20NOCACHE" \ + "\23PHYS\24RAW\25READ\32DEVPRIVATE\33VFLUSH" /* Avoid weird code due to B_WRITE being a "pseudo flag" */ #define BUF_ISREAD(bp) (((bp)->b_flags & B_READ) == B_READ) @@ -233,7 +234,7 @@ struct cluster_save { long bs_bufsize; /* Saved b_bufsize. */ void *bs_saveaddr; /* Saved b_addr. */ int bs_nchildren; /* Number of associated buffers. */ - struct buf **bs_children; /* List of associated buffers. */ + struct buf *bs_children; /* List of associated buffers. */ }; /* @@ -266,46 +267,50 @@ extern struct bio_ops *bioopsp; extern u_int nbuf; /* The number of buffer headers */ __BEGIN_DECLS -void allocbuf(struct buf *, int, int); -void bawrite(struct buf *); -void bdirty(struct buf *); -void bdwrite(struct buf *); -void biodone(struct buf *); -int biowait(struct buf *); -int bread(struct vnode *, daddr_t, int, struct kauth_cred *, struct buf **); +int allocbuf(buf_t *, int, int); +void bawrite(buf_t *); +void bdirty(buf_t *); +void bdwrite(buf_t *); +void biodone(buf_t *); +int biowait(buf_t *); +int bread(struct vnode *, daddr_t, int, struct kauth_cred *, buf_t **); int breada(struct vnode *, daddr_t, int, daddr_t, int, struct kauth_cred *, - struct buf **); + buf_t **); int breadn(struct vnode *, daddr_t, int, daddr_t *, int *, int, - struct kauth_cred *, struct buf **); -void brelse(struct buf *, int); -void bremfree(struct buf *); + struct kauth_cred *, buf_t **); +void brelsel(buf_t *, int); +void brelse(buf_t *, int); +void bremfree(buf_t *); void bufinit(void); -int bwrite(struct buf *); -struct buf *getblk(struct vnode *, daddr_t, int, int, int); -struct buf *geteblk(int); -struct buf *incore(struct vnode *, daddr_t); +void bufinit2(void); +int bwrite(buf_t *); +buf_t *getblk(struct vnode *, daddr_t, int, int, int); +buf_t *geteblk(int); +buf_t *incore(struct vnode *, daddr_t); -void minphys(struct buf *); -int physio(void (*)(struct buf *), struct buf *, dev_t, int, - void (*)(struct buf *), struct uio *); +void minphys(buf_t *); +int physio(void (*)(buf_t *), buf_t *, dev_t, int, + void (*)(buf_t *), struct uio *); -void brelvp(struct buf *); -void reassignbuf(struct buf *, struct vnode *); -void bgetvp(struct vnode *, struct buf *); +void brelvp(buf_t *); +void reassignbuf(buf_t *, struct vnode *); +void bgetvp(struct vnode *, buf_t *); int buf_syncwait(void); u_long buf_memcalc(void); int buf_drain(int); int buf_setvalimit(vsize_t); #ifdef DDB -void vfs_buf_print(struct buf *, int, void (*)(const char *, ...)); +void vfs_buf_print(buf_t *, int, void (*)(const char *, ...)); #endif -struct buf *getiobuf(void); -struct buf *getiobuf_nowait(void); -void putiobuf(struct buf *); +buf_t *getiobuf(struct vnode *, bool); +void putiobuf(buf_t *); +void buf_init(buf_t *); +void buf_destroy(buf_t *); +int bbusy(buf_t *, bool, int); -void nestiobuf_iodone(struct buf *); -void nestiobuf_setup(struct buf *, struct buf *, int, size_t); -void nestiobuf_done(struct buf *, int, int); +void nestiobuf_iodone(buf_t *); +void nestiobuf_setup(buf_t *, buf_t *, int, size_t); +void nestiobuf_done(buf_t *, int, int); __END_DECLS #endif /* _KERNEL */ diff --git a/sys/sys/fstrans.h b/sys/sys/fstrans.h index bcef8c1352aa..3aa16e2b5551 100644 --- a/sys/sys/fstrans.h +++ b/sys/sys/fstrans.h @@ -1,4 +1,4 @@ -/* $NetBSD: fstrans.h,v 1.7 2007/12/02 13:56:19 hannken Exp $ */ +/* $NetBSD: fstrans.h,v 1.8 2008/01/02 11:49:07 ad Exp $ */ /*- * Copyright (c) 2007 The NetBSD Foundation, Inc. @@ -72,6 +72,8 @@ do { \ int _fstrans_start(struct mount *, enum fstrans_lock_type, int); void fstrans_done(struct mount *); int fstrans_is_owner(struct mount *); +int fstrans_mount(struct mount *); +void fstrans_unmount(struct mount *); int fstrans_setstate(struct mount *, enum fstrans_state); enum fstrans_state fstrans_getstate(struct mount *); diff --git a/sys/sys/lock.h b/sys/sys/lock.h index 36e7bbd6473b..4d85721ca9d8 100644 --- a/sys/sys/lock.h +++ b/sys/sys/lock.h @@ -1,4 +1,4 @@ -/* $NetBSD: lock.h,v 1.76 2007/12/06 17:05:07 ad Exp $ */ +/* $NetBSD: lock.h,v 1.77 2008/01/02 11:49:07 ad Exp $ */ /*- * Copyright (c) 1999, 2000, 2006, 2007 The NetBSD Foundation, Inc. @@ -82,6 +82,7 @@ #include #include +#include #include #include @@ -90,9 +91,9 @@ * The general lock structure. */ struct lock { - struct simplelock lk_interlock;/* lock on remaining fields */ u_int lk_flags; /* see below */ int lk_sharecount; /* # of accepted shared locks */ + kmutex_t lk_interlock; /* lock on structure */ short lk_exclusivecount; /* # of recursive exclusive locks */ short lk_recurselevel; /* lvl above which recursion ok */ int lk_waitcount; /* # of sleepers */ @@ -147,7 +148,6 @@ struct lock { #define LK_SLEEPFAIL 0x00000020 /* sleep, then return failure */ #define LK_CANRECURSE 0x00000040 /* this may be recursive lock attempt */ #define LK_REENABLE 0x00000080 /* lock is be reenabled after drain */ -#define LK_SETRECURSE 0x00100000 /* other locks while we have it OK */ #define LK_RECURSEFAIL 0x00200000 /* attempt at recursive lock fails */ #define LK_RESURRECT 0x00800000 /* immediately reenable drained lock */ /* @@ -160,6 +160,7 @@ struct lock { #define LK_WAITDRAIN 0x00000800 /* process waiting for lock to drain */ #define LK_DRAINING 0x00004000 /* lock is being drained */ #define LK_DRAINED 0x00008000 /* lock has been decommissioned */ +#define LK_DODEBUG 0x00010000 /* has lockdebug bits */ /* * Internal state flags corresponding to lk_sharecount, and lk_waitcount */ @@ -177,7 +178,6 @@ struct lock { #define __LK_FLAG_BITS \ "\20" \ "\22LK_RECURSEFAIL" \ - "\21LK_SETRECURSE" \ "\20LK_WAIT_NOZERO" \ "\19LK_SHARE_NOZERO" \ "\18LK_RETRY" \ @@ -211,7 +211,7 @@ struct proc; void lockinit(struct lock *, pri_t, const char *, int, int); void lockdestroy(struct lock *); -int lockmgr(struct lock *, u_int flags, struct simplelock *); +int lockmgr(struct lock *, u_int flags, kmutex_t *); void transferlockers(struct lock *, struct lock *); int lockstatus(struct lock *); void lockmgr_printinfo(struct lock *); diff --git a/sys/sys/mount.h b/sys/sys/mount.h index d175bdc345dc..b8a17691a843 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -1,4 +1,4 @@ -/* $NetBSD: mount.h,v 1.168 2007/12/24 14:58:38 ad Exp $ */ +/* $NetBSD: mount.h,v 1.169 2008/01/02 11:49:07 ad Exp $ */ /* * Copyright (c) 1989, 1991, 1993 @@ -40,13 +40,16 @@ #include #endif /* _NETBSD_SOURCE */ #endif + +#ifndef _STANDALONE #include #include #include #include #include +#include #include -#include +#endif /* !_STANDALONE */ /* * file system statistics @@ -89,13 +92,13 @@ #define MOUNT_EFS "efs" /* SGI's Extent Filesystem */ #define MOUNT_ZFS "zfs" /* Sun ZFS */ +#ifndef _STANDALONE + /* * Structure per mounted file system. Each mounted file system has an * array of operations and an instance record. The file systems are * put on a doubly linked list. */ -TAILQ_HEAD(vnodelst, vnode); - struct mount { CIRCLEQ_ENTRY(mount) mnt_list; /* mount list */ struct vfsops *mnt_op; /* operations on fs */ @@ -111,7 +114,8 @@ struct mount { void *mnt_data; /* private data */ int mnt_wcnt; /* count of vfs_busy waiters */ struct lwp *mnt_unmounter; /* who is unmounting */ - struct simplelock mnt_slock; /* mutex for wcnt */ + kmutex_t mnt_mutex; /* mutex for wcnt */ + void *mnt_transinfo; /* for FS-internal use */ specificdata_reference mnt_specdataref; /* subsystem specific data */ }; @@ -133,7 +137,6 @@ struct mount { #define VFS_MAGICLINKS 4 /* expand 'magic' symlinks */ #define VFSGEN_MAXID 5 /* number of valid vfs.generic ids */ -#ifndef _STANDALONE /* * USE THE SAME NAMES AS MOUNT_*! * @@ -334,9 +337,9 @@ int vfs_stdextattrctl(struct mount *, int, struct vnode *, extern CIRCLEQ_HEAD(mntlist, mount) mountlist; /* mounted filesystem list */ extern struct vfsops *vfssw[]; /* filesystem type table */ extern int nvfssw; -extern kmutex_t mountlist_lock; -extern struct simplelock spechash_slock; -extern kmutex_t vfs_list_lock; +extern kmutex_t mountlist_lock; +extern kmutex_t spechash_lock; +extern kmutex_t vfs_list_lock; long makefstype(const char *); int dounmount(struct mount *, int, struct lwp *); diff --git a/sys/sys/proc.h b/sys/sys/proc.h index bf2bc686dc1f..4b7cde525dba 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -1,4 +1,4 @@ -/* $NetBSD: proc.h,v 1.267 2007/12/31 15:32:14 ad Exp $ */ +/* $NetBSD: proc.h,v 1.268 2008/01/02 11:49:07 ad Exp $ */ /*- * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc. @@ -203,19 +203,19 @@ struct emul { * Field markings and the corresponding locks (not yet fully implemented, * more a statement of intent): * + * a: p_auxlock * k: ktrace_mutex * m: proclist_mutex * l: proclist_lock * s: p_smutex * t: p_stmutex * p: p_mutex - * r: p_raslock * (: unlocked, stable */ struct proc { LIST_ENTRY(proc) p_list; /* l, m: List of all processes */ - kmutex_t p_raslock; /* :: RAS modification lock */ + kmutex_t p_auxlock; /* :: secondary, longer term lock */ kmutex_t p_mutex; /* :: general mutex */ kmutex_t p_smutex; /* :: mutex on scheduling state */ kmutex_t p_stmutex; /* :: mutex on profiling state */ @@ -254,7 +254,7 @@ struct proc { LIST_ENTRY(proc) p_sibling; /* l: List of sibling processes. */ LIST_HEAD(, proc) p_children; /* l: List of children. */ LIST_HEAD(, lwp) p_lwps; /* s: List of LWPs. */ - struct ras *p_raslist; /* r: List of RAS entries */ + struct ras *p_raslist; /* a: List of RAS entries */ /* The following fields are all zeroed upon creation in fork. */ #define p_startzero p_nlwps diff --git a/sys/sys/shm.h b/sys/sys/shm.h index 4f967d73ae94..3c6d1f189b81 100644 --- a/sys/sys/shm.h +++ b/sys/sys/shm.h @@ -1,4 +1,4 @@ -/* $NetBSD: shm.h,v 1.42 2006/11/25 21:40:06 christos Exp $ */ +/* $NetBSD: shm.h,v 1.43 2008/01/02 11:49:07 ad Exp $ */ /*- * Copyright (c) 1999 The NetBSD Foundation, Inc. @@ -171,8 +171,8 @@ extern struct shminfo shminfo; extern struct shmid_ds *shmsegs; extern int shm_nused; -#define SHMSEG_FREE 0x0200 -#define SHMSEG_REMOVED 0x0400 +#define SHMSEG_FREE 0x0200 +#define SHMSEG_REMOVED 0x0400 #define SHMSEG_ALLOCATED 0x0800 #define SHMSEG_WANTED 0x1000 #define SHMSEG_RMLINGER 0x2000 diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 4a37b5a37f7c..532065400dfd 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -1,4 +1,4 @@ -/* $NetBSD: vnode.h,v 1.179 2007/12/25 18:33:49 perry Exp $ */ +/* $NetBSD: vnode.h,v 1.180 2008/01/02 11:49:07 ad Exp $ */ /* * Copyright (c) 1989, 1993 @@ -37,6 +37,7 @@ #include #include #include +#include /* XXX: clean up includes later */ #include /* XXX */ @@ -86,57 +87,77 @@ enum vtagtype { "VT_FILECORE", "VT_NTFS", "VT_VFS", "VT_OVERLAY", "VT_SMBFS", "VT_PTYFS", \ "VT_TMPFS", "VT_UDF", "VT_SYSVBFS", "VT_PUFFS", "VT_HFS", "VT_EFS", "VT_ZFS" +struct vnode; +struct buf; + LIST_HEAD(buflists, buf); +TAILQ_HEAD(vnodelst, vnode); /* * Reading or writing any of these items requires holding the appropriate - * lock. [XXX documented on the vmlocking branch.] + * lock. Field markings and the corresponding locks: + * + * : stable, reference to the vnode is is required + * f vnode_free_list_lock, or vrele_lock if VI_INACTPEND + * i v_interlock + * m mntvnode_lock + * n namecache_lock + * s syncer_data_lock + * u locked by underlying filesystem + * v v_vnlock + * x v_interlock + bufcache_lock to modify, either to inspect * * Each underlying filesystem allocates its own private area and hangs * it from v_data. */ struct vnode { - struct uvm_object v_uobj; /* the VM object */ -#define v_usecount v_uobj.uo_refs -#define v_interlock v_uobj.vmobjlock - voff_t v_size; /* size of file */ - voff_t v_writesize; /* new size after write */ + struct uvm_object v_uobj; /* i: the VM object */ + kcondvar_t v_cv; /* i: synchronization */ + int v_waitcnt; /* i: # waiters for VXLOCK */ + voff_t v_size; /* i: size of file */ + voff_t v_writesize; /* i: new size after write */ int v_iflag; /* i: VI_* flags */ int v_vflag; /* v: VV_* flags */ int v_uflag; /* u: VU_* flags */ - int v_numoutput; /* number of pending writes */ - long v_writecount; /* reference count of writers */ - long v_holdcnt; /* page & buffer references */ - struct mount *v_mount; /* ptr to vfs we are in */ - int (**v_op)(void *); /* vnode operations vector */ - TAILQ_ENTRY(vnode) v_freelist; /* vnode freelist */ - TAILQ_ENTRY(vnode) v_mntvnodes; /* vnodes for mount point */ - struct buflists v_cleanblkhd; /* clean blocklist head */ - struct buflists v_dirtyblkhd; /* dirty blocklist head */ - int v_synclist_slot; /* synclist slot index */ - TAILQ_ENTRY(vnode) v_synclist; /* vnodes with dirty buffers */ - LIST_HEAD(, namecache) v_dnclist; /* namecaches for children */ - LIST_HEAD(, namecache) v_nclist; /* namecaches for our parent */ + int v_numoutput; /* i: # of pending writes */ + long v_writecount; /* i: ref count of writers */ + long v_holdcnt; /* i: page & buffer refs */ + struct mount *v_mount; /* v: ptr to vfs we are in */ + int (**v_op)(void *); /* :: vnode operations vector */ + TAILQ_ENTRY(vnode) v_freelist; /* f: vnode freelist */ + struct vnodelst *v_freelisthd; /* f: which freelist? */ + TAILQ_ENTRY(vnode) v_mntvnodes; /* m: vnodes for mount point */ + struct buflists v_cleanblkhd; /* x: clean blocklist head */ + struct buflists v_dirtyblkhd; /* x: dirty blocklist head */ + int v_synclist_slot; /* s: synclist slot index */ + TAILQ_ENTRY(vnode) v_synclist; /* s: vnodes with dirty bufs */ + LIST_HEAD(, namecache) v_dnclist; /* n: namecaches (children) */ + LIST_HEAD(, namecache) v_nclist; /* n: namecaches (parent) */ union { - struct mount *vu_mountedhere;/* ptr to mounted vfs (VDIR) */ - struct socket *vu_socket; /* unix ipc (VSOCK) */ - struct specinfo *vu_specinfo; /* device (VCHR, VBLK) */ - struct fifoinfo *vu_fifoinfo; /* fifo (VFIFO) */ - struct uvm_ractx *vu_ractx; /* read-ahead context (VREG) */ + struct mount *vu_mountedhere;/* v: ptr to vfs (VDIR) */ + struct socket *vu_socket; /* v: unix ipc (VSOCK) */ + struct specinfo *vu_specinfo; /* v: device (VCHR, VBLK) */ + struct fifoinfo *vu_fifoinfo; /* v: fifo (VFIFO) */ + struct uvm_ractx *vu_ractx; /* i: read-ahead ctx (VREG) */ } v_un; - enum vtype v_type; /* vnode type */ - enum vtagtype v_tag; /* type of underlying data */ - struct lock v_lock; /* lock for this vnode */ - struct lock *v_vnlock; /* pointer to lock */ - void *v_data; /* private data for fs */ - struct klist v_klist; /* knotes attached to vnode */ + enum vtype v_type; /* :: vnode type */ + enum vtagtype v_tag; /* :: type of underlying data */ + struct lock v_lock; /* v: lock for this vnode */ + struct lock *v_vnlock; /* v: pointer to lock */ + void *v_data; /* :: private data for fs */ + struct klist v_klist; /* i: notes attached to vnode */ }; +#define v_usecount v_uobj.uo_refs +#define v_interlock v_uobj.vmobjlock #define v_mountedhere v_un.vu_mountedhere #define v_socket v_un.vu_socket #define v_specinfo v_un.vu_specinfo #define v_fifoinfo v_un.vu_fifoinfo #define v_ractx v_un.vu_ractx +typedef struct vnodelst vnodelst_t; +typedef struct vnode vnode_t; + /* * All vnode locking operations should use vp->v_vnlock. For leaf filesystems * (such as ffs, lfs, msdosfs, etc), vp->v_vnlock = &vp->v_lock. For @@ -178,9 +199,8 @@ struct vnode { #define VI_LAYER 0x00020000 /* vnode is on a layer filesystem */ #define VI_MAPPED 0x00040000 /* duplicate of VV_MAPPED */ #define VI_CLEAN 0x00080000 /* has been reclaimed */ -#define VI_XWANT 0x00100000 /* process is waiting for vnode */ -#define VI_BWAIT 0x00200000 /* waiting for output to complete */ -#define VI_FREEING 0x00400000 /* vnode is being freed */ +#define VI_INACTPEND 0x00100000 /* inactivation is pending */ +#define VI_INACTREDO 0x00200000 /* need to redo VOP_INACTIVE() */ /* * The third set are locked by the underlying file system. @@ -190,7 +210,7 @@ struct vnode { #define VNODE_FLAGBITS \ "\20\1ROOT\2SYSTEM\3ISTTY\4MAPPED\5MPSAFE\6LOCKSWORK\11TEXT\12EXECMAP" \ "\13WRMAP\14WRMAPDIRTY\15XLOCK\16ALIASED\17ONWORKLST\20MARKER" \ - "\22LAYER\23MAPPED\24CLEAN\25XWANT\26BWAIT\31DIROP" + "\22LAYER\23MAPPED\24CLEAN\25INACTPEND\26INACTREDO\31DIROP" #define VSIZENOTSET ((voff_t)-1) @@ -229,18 +249,6 @@ struct vattr { #ifdef _KERNEL -/* - * Use a global lock for all v_numoutput updates. - * Define a convenience macro to increment by one. - * Note: the only place where v_numoutput is decremented is in vwakeup(). - */ -extern struct simplelock global_v_numoutput_slock; -#define V_INCR_NUMOUTPUT(vp) do { \ - simple_lock(&global_v_numoutput_slock); \ - (vp)->v_numoutput++; \ - simple_unlock(&global_v_numoutput_slock); \ -} while (/*CONSTCOND*/ 0) - /* * Flags for ioflag. */ @@ -309,10 +317,7 @@ extern const int vttoif_tab[]; #define HOLDRELE(vp) holdrele(vp) #define VHOLD(vp) vhold(vp) #define VREF(vp) vref(vp) -TAILQ_HEAD(freelst, vnode); -extern struct freelst vnode_hold_list; /* free vnodes referencing buffers */ -extern struct freelst vnode_free_list; /* vnode free list */ -extern struct simplelock vnode_free_list_slock; +extern kmutex_t vnode_free_list_lock; void holdrelel(struct vnode *); void vholdl(struct vnode *); @@ -330,9 +335,9 @@ static __inline void holdrele(struct vnode *vp) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); holdrelel(vp); - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); } /* @@ -342,9 +347,16 @@ static __inline void vhold(struct vnode *vp) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); vholdl(vp); - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); +} + +static __inline bool +vismarker(struct vnode *vp) +{ + + return (vp->v_iflag & VI_MARKER) != 0; } #define NULLVP ((struct vnode *)NULL) @@ -356,7 +368,7 @@ vhold(struct vnode *vp) */ extern struct vnode *rootvnode; /* root (i.e. "/") vnode */ extern int desiredvnodes; /* number of vnodes desired */ -extern long numvnodes; /* current number of vnodes */ +extern u_int numvnodes; /* current number of vnodes */ extern time_t syncdelay; /* max time to delay syncing data */ extern time_t filedelay; /* time to delay syncing files */ extern time_t dirdelay; /* time to delay syncing directories */ @@ -440,7 +452,7 @@ extern struct vnodeop_desc *vnodeop_descs[]; /* * Interlock for scanning list of vnodes attached to a mountpoint */ -extern struct simplelock mntvnode_slock; +extern kmutex_t mntvnode_lock; /* * Union filesystem hook for vn_readdir(). @@ -545,14 +557,22 @@ void vflushbuf(struct vnode *, int); int vget(struct vnode *, int); void vgone(struct vnode *); void vgonel(struct vnode *, struct lwp *); -int vinvalbuf(struct vnode *, int, kauth_cred_t, struct lwp *, int, int); +int vinvalbuf(struct vnode *, int, kauth_cred_t, struct lwp *, bool, int); void vprint(const char *, struct vnode *); void vput(struct vnode *); -int vrecycle(struct vnode *, struct simplelock *, struct lwp *); +int vrecycle(struct vnode *, kmutex_t *, struct lwp *); void vrele(struct vnode *); -void vrele2(struct vnode *, int); -int vtruncbuf(struct vnode *, daddr_t, int, int); +void vrele2(struct vnode *, bool); +int vtruncbuf(struct vnode *, daddr_t, bool, int); void vwakeup(struct buf *); +void vwait(struct vnode *, int); +void vclean(struct vnode *, int); +void vrelel(struct vnode *, int, int); +struct vnode *valloc(struct mount *); +void vfree(struct vnode *); +void vmark(struct vnode *, struct vnode *); +struct vnode *vunmark(struct vnode *); +void vn_init1(void); /* see vnsubr(9) */ int vn_bwrite(void *); diff --git a/sys/ufs/ext2fs/ext2fs_bmap.c b/sys/ufs/ext2fs/ext2fs_bmap.c index 258b727ec85a..ae77d7ad7ac8 100644 --- a/sys/ufs/ext2fs/ext2fs_bmap.c +++ b/sys/ufs/ext2fs/ext2fs_bmap.c @@ -1,4 +1,4 @@ -/* $NetBSD: ext2fs_bmap.c,v 1.22 2007/10/08 18:01:27 ad Exp $ */ +/* $NetBSD: ext2fs_bmap.c,v 1.23 2008/01/02 11:49:08 ad Exp $ */ /* * Copyright (c) 1989, 1991, 1993 @@ -70,7 +70,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ext2fs_bmap.c,v 1.22 2007/10/08 18:01:27 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ext2fs_bmap.c,v 1.23 2008/01/02 11:49:08 ad Exp $"); #include #include @@ -141,7 +141,7 @@ ext2fs_bmaparray(struct vnode *vp, daddr_t bn, daddr_t *bnp, struct indir *ap, int *nump, int *runp) { struct inode *ip; - struct buf *bp; + struct buf *bp, *cbp; struct ufsmount *ump; struct mount *mp; struct indir a[NIADDR+1], *xap; @@ -208,8 +208,15 @@ ext2fs_bmaparray(struct vnode *vp, daddr_t bn, daddr_t *bnp, struct indir *ap, */ metalbn = xap->in_lbn; - if ((daddr == 0 && !incore(vp, metalbn)) || metalbn == bn) + if (metalbn == bn) break; + if (daddr == 0) { + mutex_enter(&bufcache_lock); + cbp = incore(vp, metalbn); + mutex_exit(&bufcache_lock); + if (cbp == NULL) + break; + } /* * If we get here, we've either got the block in the cache * or we have a disk address for it, go fetch it. @@ -229,7 +236,7 @@ ext2fs_bmaparray(struct vnode *vp, daddr_t bn, daddr_t *bnp, struct indir *ap, return (ENOMEM); } - if (bp->b_flags & (B_DONE | B_DELWRI)) { + if (bp->b_oflags & (BO_DONE | BO_DELWRI)) { trace(TR_BREADHIT, pack(vp, size), metalbn); } #ifdef DIAGNOSTIC diff --git a/sys/ufs/ext2fs/ext2fs_inode.c b/sys/ufs/ext2fs/ext2fs_inode.c index ac7b7649e57c..e697a08c9a64 100644 --- a/sys/ufs/ext2fs/ext2fs_inode.c +++ b/sys/ufs/ext2fs/ext2fs_inode.c @@ -1,4 +1,4 @@ -/* $NetBSD: ext2fs_inode.c,v 1.62 2007/12/08 19:29:53 pooka Exp $ */ +/* $NetBSD: ext2fs_inode.c,v 1.63 2008/01/02 11:49:08 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1993 @@ -65,7 +65,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ext2fs_inode.c,v 1.62 2007/12/08 19:29:53 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ext2fs_inode.c,v 1.63 2008/01/02 11:49:08 ad Exp $"); #include #include @@ -141,6 +141,7 @@ ext2fs_inactive(void *v) { struct vop_inactive_args /* { struct vnode *a_vp; + bool *a_recycle; } */ *ap = v; struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); @@ -154,24 +155,23 @@ ext2fs_inactive(void *v) error = 0; if (ip->i_e2fs_nlink == 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { + /* Defer final inode free and update to reclaim.*/ if (ext2fs_size(ip) != 0) { error = ext2fs_truncate(vp, (off_t)0, 0, NOCRED); } ip->i_e2fs_dtime = time_second; - ip->i_flag |= IN_CHANGE | IN_UPDATE; - ext2fs_vfree(vp, ip->i_number, ip->i_e2fs_mode); - } - if (ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) { + ip->i_flag |= IN_CHANGE | IN_UPDATE | IN_MODIFIED; + ip->i_omode = 1; + } else if (ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) { ext2fs_update(vp, NULL, NULL, 0); } out: - VOP_UNLOCK(vp, 0); /* * If we are done with the inode, reclaim it * so that it can be reused immediately. */ - if (ip->i_e2fs_dtime != 0) - vrecycle(vp, NULL, curlwp); + *ap->a_recycle = (ip->i_e2fs_dtime != 0); + VOP_UNLOCK(vp, 0); return (error); } @@ -478,7 +478,7 @@ ext2fs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, daddr_t lastbn, */ vp = ITOV(ip); bp = getblk(vp, lbn, (int)fs->e2fs_bsize, 0, 0); - if (bp->b_flags & (B_DONE | B_DELWRI)) { + if (bp->b_oflags & (BO_DONE | BO_DELWRI)) { /* Braces must be here in case trace evaluates to nothing. */ trace(TR_BREADHIT, pack(vp, fs->e2fs_bsize), lbn); } else { diff --git a/sys/ufs/ext2fs/ext2fs_readwrite.c b/sys/ufs/ext2fs/ext2fs_readwrite.c index af41db29d5e7..2085d0c6252e 100644 --- a/sys/ufs/ext2fs/ext2fs_readwrite.c +++ b/sys/ufs/ext2fs/ext2fs_readwrite.c @@ -1,4 +1,4 @@ -/* $NetBSD: ext2fs_readwrite.c,v 1.49 2007/12/08 19:29:53 pooka Exp $ */ +/* $NetBSD: ext2fs_readwrite.c,v 1.50 2008/01/02 11:49:08 ad Exp $ */ /*- * Copyright (c) 1993 @@ -65,7 +65,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ext2fs_readwrite.c,v 1.49 2007/12/08 19:29:53 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ext2fs_readwrite.c,v 1.50 2008/01/02 11:49:08 ad Exp $"); #include #include @@ -336,13 +336,13 @@ ext2fs_write(void *v) */ if (!async && oldoff >> 16 != uio->uio_offset >> 16) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); error = VOP_PUTPAGES(vp, (oldoff >> 16) << 16, (uio->uio_offset >> 16) << 16, PGO_CLEANIT); } } if (error == 0 && ioflag & IO_SYNC) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); error = VOP_PUTPAGES(vp, trunc_page(oldoff), round_page(blkroundup(fs, uio->uio_offset)), PGO_CLEANIT | PGO_SYNCIO); diff --git a/sys/ufs/ext2fs/ext2fs_vfsops.c b/sys/ufs/ext2fs/ext2fs_vfsops.c index c8fb5df5c872..9138dd4d7cce 100644 --- a/sys/ufs/ext2fs/ext2fs_vfsops.c +++ b/sys/ufs/ext2fs/ext2fs_vfsops.c @@ -1,4 +1,4 @@ -/* $NetBSD: ext2fs_vfsops.c,v 1.125 2007/12/08 19:29:53 pooka Exp $ */ +/* $NetBSD: ext2fs_vfsops.c,v 1.126 2008/01/02 11:49:08 ad Exp $ */ /* * Copyright (c) 1989, 1991, 1993, 1994 @@ -65,7 +65,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ext2fs_vfsops.c,v 1.125 2007/12/08 19:29:53 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ext2fs_vfsops.c,v 1.126 2008/01/02 11:49:08 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_compat_netbsd.h" @@ -157,6 +157,7 @@ static const struct genfs_ops ext2fs_genfsops = { static const struct ufs_ops ext2fs_ufsops = { .uo_itimes = ext2fs_itimes, .uo_update = ext2fs_update, + .uo_vfree = ext2fs_vfree, }; /* @@ -465,7 +466,7 @@ int ext2fs_reload(struct mount *mountp, kauth_cred_t cred) { struct lwp *l = curlwp; - struct vnode *vp, *nvp, *devvp; + struct vnode *vp, *mvp, *devvp; struct inode *ip; struct buf *bp; struct m_ext2fs *fs; @@ -476,6 +477,7 @@ ext2fs_reload(struct mount *mountp, kauth_cred_t cred) if ((mountp->mnt_flag & MNT_RDONLY) == 0) return (EINVAL); + /* * Step 1: invalidate all cached meta-data. */ @@ -542,30 +544,37 @@ ext2fs_reload(struct mount *mountp, kauth_cred_t cred) brelse(bp, 0); } -loop: + /* Allocate a marker vnode. */ + if ((mvp = valloc(mountp)) == NULL) + return (ENOMEM); /* * NOTE: not using the TAILQ_FOREACH here since in this loop vgone() * and vclean() can be called indirectly */ - simple_lock(&mntvnode_slock); - for (vp = TAILQ_FIRST(&mountp->mnt_vnodelist); vp; vp = nvp) { - if (vp->v_mount != mountp) { - simple_unlock(&mntvnode_slock); - goto loop; - } + mutex_enter(&mntvnode_lock); +loop: + for (vp = TAILQ_FIRST(&mountp->mnt_vnodelist); vp; vp = vunmark(mvp)) { + vmark(mvp, vp); + if (vp->v_mount != mountp || vismarker(vp)) + continue; /* * Step 4: invalidate all inactive vnodes. */ - if (vrecycle(vp, &mntvnode_slock, l)) + if (vrecycle(vp, &mntvnode_lock, l)) { + mutex_enter(&mntvnode_lock); + (void)vunmark(mvp); goto loop; + } /* * Step 5: invalidate all cached file data. */ - simple_lock(&vp->v_interlock); - nvp = TAILQ_NEXT(vp, v_mntvnodes); - simple_unlock(&mntvnode_slock); - if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) + mutex_enter(&vp->v_interlock); + mutex_exit(&mntvnode_lock); + if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { + mutex_enter(&mntvnode_lock); + (void)vunmark(mvp); goto loop; + } if (vinvalbuf(vp, 0, cred, l, 0, 0)) panic("ext2fs_reload: dirty2"); /* @@ -576,17 +585,20 @@ loop: (int)fs->e2fs_bsize, NOCRED, &bp); if (error) { vput(vp); - return (error); + mutex_enter(&mntvnode_lock); + (void)vunmark(mvp); + break; } cp = (char *)bp->b_data + (ino_to_fsbo(fs, ip->i_number) * EXT2_DINODE_SIZE); e2fs_iload((struct ext2fs_dinode *)cp, ip->i_din.e2fs_din); brelse(bp, 0); vput(vp); - simple_lock(&mntvnode_slock); + mutex_enter(&mntvnode_lock); } - simple_unlock(&mntvnode_slock); - return (0); + mutex_exit(&mntvnode_lock); + vfree(mvp); + return (error); } /* @@ -844,7 +856,7 @@ ext2fs_statvfs(struct mount *mp, struct statvfs *sbp) int ext2fs_sync(struct mount *mp, int waitfor, kauth_cred_t cred) { - struct vnode *vp, *nvp; + struct vnode *vp, *mvp; struct inode *ip; struct ufsmount *ump = VFSTOUFS(mp); struct m_ext2fs *fs; @@ -855,40 +867,45 @@ ext2fs_sync(struct mount *mp, int waitfor, kauth_cred_t cred) printf("fs = %s\n", fs->e2fs_fsmnt); panic("update: rofs mod"); } + + /* Allocate a marker vnode. */ + if ((mvp = valloc(mp)) == NULL) + return (ENOMEM); + /* * Write back each (modified) inode. */ - simple_lock(&mntvnode_slock); + mutex_enter(&mntvnode_lock); loop: /* * NOTE: not using the TAILQ_FOREACH here since in this loop vgone() * and vclean() can be called indirectly */ - for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { - /* - * If the vnode that we are about to sync is no longer - * associated with this mount point, start over. - */ - if (vp->v_mount != mp) - goto loop; - simple_lock(&vp->v_interlock); - nvp = TAILQ_NEXT(vp, v_mntvnodes); + for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) { + vmark(mvp, vp); + if (vp->v_mount != mp || vismarker(vp)) + continue; + mutex_enter(&vp->v_interlock); ip = VTOI(vp); - if (vp->v_type == VNON || + if (ip == NULL || (vp->v_iflag & (VI_XLOCK|VI_CLEAN)) != 0 || + vp->v_type == VNON || ((ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 && LIST_EMPTY(&vp->v_dirtyblkhd) && UVM_OBJ_IS_CLEAN(&vp->v_uobj))) { - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); continue; } - simple_unlock(&mntvnode_slock); + mutex_exit(&mntvnode_lock); error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK); if (error) { - simple_lock(&mntvnode_slock); - if (error == ENOENT) + mutex_enter(&mntvnode_lock); + if (error == ENOENT) { + mutex_enter(&mntvnode_lock); + (void)vunmark(mvp); goto loop; + } continue; } if (vp->v_type == VREG && waitfor == MNT_LAZY) @@ -899,9 +916,10 @@ loop: if (error) allerror = error; vput(vp); - simple_lock(&mntvnode_slock); + mutex_enter(&mntvnode_lock); } - simple_unlock(&mntvnode_slock); + mutex_exit(&mntvnode_lock); + vfree(mvp); /* * Force stale file system control information to be flushed. */ diff --git a/sys/ufs/ext2fs/ext2fs_vnops.c b/sys/ufs/ext2fs/ext2fs_vnops.c index 33cc4e79129a..ae571b88c42d 100644 --- a/sys/ufs/ext2fs/ext2fs_vnops.c +++ b/sys/ufs/ext2fs/ext2fs_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: ext2fs_vnops.c,v 1.77 2007/12/08 19:29:54 pooka Exp $ */ +/* $NetBSD: ext2fs_vnops.c,v 1.78 2008/01/02 11:49:08 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1993 @@ -70,7 +70,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ext2fs_vnops.c,v 1.77 2007/12/08 19:29:54 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ext2fs_vnops.c,v 1.78 2008/01/02 11:49:08 ad Exp $"); #include #include @@ -189,7 +189,7 @@ ext2fs_mknod(void *v) * checked to see if it is an alias of an existing entry in * the inode cache. */ - vput(*vpp); + VOP_UNLOCK(*vpp, 0); (*vpp)->v_type = VNON; vgone(*vpp); error = VFS_VGET(mp, ino, vpp); @@ -1377,7 +1377,6 @@ ext2fs_vinit(struct mount *mntp, int (**specops)(void *), vp->v_vflag &= ~VV_LOCKSWORK; VOP_UNLOCK(vp, 0); vp->v_op = spec_vnodeop_p; - vrele(vp); vgone(vp); lockmgr(&nvp->v_lock, LK_EXCLUSIVE, &nvp->v_interlock); /* @@ -1488,6 +1487,13 @@ ext2fs_reclaim(void *v) struct inode *ip = VTOI(vp); int error; + /* + * The inode must be freed and updated before being removed + * from its hash chain. Other threads trying to gain a hold + * on the inode will be stalled because it is locked (VI_XLOCK). + */ + if (ip->i_omode == 1 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) + ext2fs_vfree(vp, ip->i_number, ip->i_e2fs_mode); if ((error = ufs_reclaim(vp)) != 0) return (error); if (ip->i_din.e2fs_din != NULL) diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c index 1d58cef77481..4152cb864712 100644 --- a/sys/ufs/ffs/ffs_alloc.c +++ b/sys/ufs/ffs/ffs_alloc.c @@ -1,4 +1,4 @@ -/* $NetBSD: ffs_alloc.c,v 1.104 2007/11/01 06:31:59 hannken Exp $ */ +/* $NetBSD: ffs_alloc.c,v 1.105 2008/01/02 11:49:08 ad Exp $ */ /* * Copyright (c) 2002 Networks Associates Technology, Inc. @@ -41,7 +41,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.104 2007/11/01 06:31:59 hannken Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.105 2008/01/02 11:49:08 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_ffs.h" @@ -138,14 +138,14 @@ ffs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, int size, voff_t off = trunc_page(lblktosize(fs, lbn)); voff_t endoff = round_page(lblktosize(fs, lbn) + size); - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); while (off < endoff) { pg = uvm_pagelookup(uobj, off); KASSERT(pg != NULL); KASSERT(pg->owner == curproc->p_pid); off += PAGE_SIZE; } - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); } #endif @@ -226,7 +226,7 @@ ffs_realloccg(struct inode *ip, daddr_t lbprev, daddr_t bpref, int osize, voff_t off = trunc_page(lblktosize(fs, lbprev)); voff_t endoff = round_page(lblktosize(fs, lbprev) + osize); - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); while (off < endoff) { pg = uvm_pagelookup(uobj, off); KASSERT(pg != NULL); @@ -234,7 +234,7 @@ ffs_realloccg(struct inode *ip, daddr_t lbprev, daddr_t bpref, int osize, KASSERT((pg->flags & PG_CLEAN) == 0); off += PAGE_SIZE; } - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); } #endif @@ -295,8 +295,10 @@ ffs_realloccg(struct inode *ip, daddr_t lbprev, daddr_t bpref, int osize, if (bp->b_blkno != fsbtodb(fs, bno)) panic("bad blockno"); allocbuf(bp, nsize, 1); - bp->b_flags |= B_DONE; memset((char *)bp->b_data + osize, 0, nsize - osize); + mutex_enter(bp->b_objlock); + bp->b_oflags |= BO_DONE; + mutex_exit(bp->b_objlock); *bpp = bp; } if (blknop != NULL) { @@ -375,8 +377,10 @@ ffs_realloccg(struct inode *ip, daddr_t lbprev, daddr_t bpref, int osize, if (bpp != NULL) { bp->b_blkno = fsbtodb(fs, bno); allocbuf(bp, nsize, 1); - bp->b_flags |= B_DONE; memset((char *)bp->b_data + osize, 0, (u_int)nsize - osize); + mutex_enter(bp->b_objlock); + bp->b_oflags |= BO_DONE; + mutex_exit(bp->b_objlock); *bpp = bp; } if (blknop != NULL) { diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c index 436312f0f1e5..c6636c702c70 100644 --- a/sys/ufs/ffs/ffs_balloc.c +++ b/sys/ufs/ffs/ffs_balloc.c @@ -1,4 +1,4 @@ -/* $NetBSD: ffs_balloc.c,v 1.47 2007/12/08 15:21:19 ad Exp $ */ +/* $NetBSD: ffs_balloc.c,v 1.48 2008/01/02 11:49:09 ad Exp $ */ /* * Copyright (c) 2002 Networks Associates Technology, Inc. @@ -41,7 +41,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.47 2007/12/08 15:21:19 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.48 2008/01/02 11:49:09 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_quota.h" @@ -460,13 +460,13 @@ fail: } bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0, 0); - if (bp->b_flags & B_DELWRI) { + if (bp->b_oflags & BO_DELWRI) { nb = fsbtodb(fs, cgtod(fs, dtog(fs, dbtofsb(fs, bp->b_blkno)))); bwrite(bp); bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize, 0, 0); - if (bp->b_flags & B_DELWRI) { + if (bp->b_oflags & BO_DELWRI) { bwrite(bp); } else { brelse(bp, BC_INVAL); @@ -627,8 +627,10 @@ ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred, brelse(bp, 0); return (error); } + mutex_enter(&bp->b_interlock); bp->b_blkno = fsbtodb(fs, nb); bp->b_xflags |= BX_ALTDATA; + mutex_exit(&bp->b_interlock); *bpp = bp; return (0); } @@ -1026,13 +1028,13 @@ fail: } bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0, 0); - if (bp->b_flags & B_DELWRI) { + if (bp->b_oflags & BO_DELWRI) { nb = fsbtodb(fs, cgtod(fs, dtog(fs, dbtofsb(fs, bp->b_blkno)))); bwrite(bp); bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize, 0, 0); - if (bp->b_flags & B_DELWRI) { + if (bp->b_oflags & BO_DELWRI) { bwrite(bp); } else { brelse(bp, BC_INVAL); diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h index 3ce72f20ba5f..97a1e9cffc19 100644 --- a/sys/ufs/ffs/ffs_extern.h +++ b/sys/ufs/ffs/ffs_extern.h @@ -1,4 +1,4 @@ -/* $NetBSD: ffs_extern.h,v 1.61 2007/12/08 19:29:54 pooka Exp $ */ +/* $NetBSD: ffs_extern.h,v 1.62 2008/01/02 11:49:09 ad Exp $ */ /*- * Copyright (c) 1991, 1993, 1994 @@ -131,6 +131,7 @@ int ffs_deleteextattr(void *); int ffs_lock(void *); int ffs_unlock(void *); int ffs_islocked(void *); +int ffs_full_fsync(struct vnode *, int); #ifdef SYSCTL_SETUP_PROTO SYSCTL_SETUP_PROTO(sysctl_vfs_ffs_setup); @@ -155,6 +156,7 @@ void softdep_initialize(void); void softdep_reinitialize(void); int softdep_mount(struct vnode *, struct mount *, struct fs *, kauth_cred_t); +void softdep_unmount(struct mount *); int softdep_flushworklist(struct mount *, int *, struct lwp *); int softdep_flushfiles(struct mount *, int, struct lwp *); void softdep_update_inodeblock(struct inode *, struct buf *, int); @@ -171,7 +173,7 @@ void softdep_setup_allocindir_page(struct inode *, daddr_t, struct buf *, int, daddr_t, daddr_t, struct buf *); void softdep_fsync_mountdev(struct vnode *); -int softdep_sync_metadata(void *); +int softdep_sync_metadata(struct vnode *); extern int (**ffs_vnodeop_p)(void *); extern int (**ffs_specop_p)(void *); diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c index 9a7d9979d77a..0550b06e4a31 100644 --- a/sys/ufs/ffs/ffs_inode.c +++ b/sys/ufs/ffs/ffs_inode.c @@ -1,4 +1,4 @@ -/* $NetBSD: ffs_inode.c,v 1.92 2007/12/08 19:29:54 pooka Exp $ */ +/* $NetBSD: ffs_inode.c,v 1.93 2008/01/02 11:49:09 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1993 @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ffs_inode.c,v 1.92 2007/12/08 19:29:54 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ffs_inode.c,v 1.93 2008/01/02 11:49:09 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_ffs.h" @@ -123,9 +123,18 @@ ffs_update(struct vnode *vp, const struct timespec *acc, return (error); } ip->i_flag &= ~(IN_MODIFIED | IN_ACCESSED); - if (DOINGSOFTDEP(vp)) + if (DOINGSOFTDEP(vp)) { + if (ip->i_omode != 0) { + /* + * XXX If the inode has been unlinked, wait + * for the update (and so dependencies) to + * flush. Ensures that the slate is clean + * when the inode is reused. + */ + waitfor |= UPDATE_WAIT; + } softdep_update_inodeblock(ip, bp, waitfor); - else if (ip->i_ffs_effnlink != ip->i_nlink) + } else if (ip->i_ffs_effnlink != ip->i_nlink) panic("ffs_update: bad link cnt"); if (fs->fs_magic == FS_UFS1_MAGIC) { cp = (char *)bp->b_data + @@ -231,7 +240,7 @@ ffs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred) if (error) return error; if (ioflag & IO_SYNC) { - simple_lock(&ovp->v_interlock); + mutex_enter(&ovp->v_interlock); VOP_PUTPAGES(ovp, trunc_page(osize & fs->fs_bmask), round_page(eob), PGO_CLEANIT | PGO_SYNCIO); @@ -281,7 +290,7 @@ ffs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred) osize); uvm_vnp_zerorange(ovp, length, eoz - length); if (round_page(eoz) > round_page(length)) { - simple_lock(&ovp->v_interlock); + mutex_enter(&ovp->v_interlock); error = VOP_PUTPAGES(ovp, round_page(length), round_page(eoz), PGO_CLEANIT | PGO_DEACTIVATE | @@ -559,7 +568,7 @@ ffs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, daddr_t lastbn, */ vp = ITOV(ip); bp = getblk(vp, lbn, (int)fs->fs_bsize, 0, 0); - if (bp->b_flags & (B_DONE | B_DELWRI)) { + if (bp->b_oflags & (BO_DONE | BO_DELWRI)) { /* Braces must be here in case trace evaluates to nothing. */ trace(TR_BREADHIT, pack(vp, fs->fs_bsize), lbn); } else { diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c index 8fbe6970e257..de6ab812a9ae 100644 --- a/sys/ufs/ffs/ffs_snapshot.c +++ b/sys/ufs/ffs/ffs_snapshot.c @@ -1,4 +1,4 @@ -/* $NetBSD: ffs_snapshot.c,v 1.56 2007/12/08 19:29:54 pooka Exp $ */ +/* $NetBSD: ffs_snapshot.c,v 1.57 2008/01/02 11:49:09 ad Exp $ */ /* * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved. @@ -38,7 +38,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ffs_snapshot.c,v 1.56 2007/12/08 19:29:54 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ffs_snapshot.c,v 1.57 2008/01/02 11:49:09 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_ffs.h" @@ -78,10 +78,10 @@ __KERNEL_RCSID(0, "$NetBSD: ffs_snapshot.c,v 1.56 2007/12/08 19:29:54 pooka Exp #define ufs2_daddr_t int64_t #define ufs_lbn_t daddr_t #define VI_MTX(v) (&(v)->v_interlock) -#define VI_LOCK(v) simple_lock(&(v)->v_interlock) -#define VI_UNLOCK(v) simple_unlock(&(v)->v_interlock) -#define MNT_ILOCK(v) simple_lock(&mntvnode_slock) -#define MNT_IUNLOCK(v) simple_unlock(&mntvnode_slock) +#define VI_LOCK(v) mutex_enter(&(v)->v_interlock) +#define VI_UNLOCK(v) mutex_exit(&(v)->v_interlock) +#define MNT_ILOCK(v) mutex_enter(&mntvnode_lock) +#define MNT_IUNLOCK(v) mutex_exit(&mntvnode_lock) #if !defined(FFS_NO_SNAPSHOT) static int cgaccount(int, struct vnode *, void *, int); @@ -204,7 +204,7 @@ ffs_snapshot(struct mount *mp, struct vnode *vp, #else /* defined(FFS_NO_SNAPSHOT) */ ufs2_daddr_t numblks, blkno, *blkp, snaplistsize = 0, *snapblklist; int error, ns, cg, snaploc; - int i, s, size, len, loc; + int i, size, len, loc; int flag = mp->mnt_flag; struct timeval starttime; #ifdef DEBUG @@ -220,7 +220,7 @@ ffs_snapshot(struct mount *mp, struct vnode *vp, struct inode *ip, *xp; struct buf *bp, *ibp, *nbp; struct vattr vat; - struct vnode *xvp, *nvp, *devvp; + struct vnode *xvp, *mvp, *devvp; struct snap_info *si; ns = UFS_FSNEEDSWAP(fs); @@ -426,29 +426,36 @@ ffs_snapshot(struct mount *mp, struct vnode *vp, */ snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) + FSMAXSNAP + 1 /* superblock */ + 1 /* last block */ + 1 /* size */; + /* Allocate a marker vnode */ + if ((mvp = valloc(mp)) == NULL) { + error = ENOMEM; + goto out1; + } MNT_ILOCK(mp); -loop: /* * NOTE: not using the TAILQ_FOREACH here since in this loop vgone() * and vclean() can be called indirectly */ - for (xvp = TAILQ_FIRST(&mp->mnt_vnodelist); xvp; xvp = nvp) { + for (xvp = TAILQ_FIRST(&mp->mnt_vnodelist); xvp; xvp = vunmark(mvp)) { + vmark(mvp, vp); /* * Make sure this vnode wasn't reclaimed in getnewvnode(). * Start over if it has (it won't be on the list anymore). */ - if (xvp->v_mount != mp) - goto loop; + if (xvp->v_mount != mp || vismarker(xvp)) + continue; VI_LOCK(xvp); - nvp = TAILQ_NEXT(xvp, v_mntvnodes); - MNT_IUNLOCK(mp); if ((xvp->v_iflag & VI_XLOCK) || xvp->v_usecount == 0 || xvp->v_type == VNON || (VTOI(xvp)->i_flags & SF_SNAPSHOT)) { VI_UNLOCK(xvp); - MNT_ILOCK(mp); continue; } + MNT_IUNLOCK(mp); + /* + * XXXAD should increase vnode ref count to prevent it + * disappearing or being recycled. + */ VI_UNLOCK(xvp); #ifdef DEBUG if (snapdebug) @@ -492,11 +499,13 @@ loop: xp->i_mode); if (error) { free(copy_fs->fs_csp, M_UFSMNT); + (void)vunmark(mvp); goto out1; } MNT_ILOCK(mp); } MNT_IUNLOCK(mp); + vfree(mvp); /* * If there already exist snapshots on this filesystem, grab a * reference to their shared lock. If this is the first snapshot @@ -702,34 +711,27 @@ out: * Clean all dirty buffers now to avoid UBC inconsistencies. */ if (!error) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); error = VOP_PUTPAGES(vp, 0, 0, PGO_ALLPAGES|PGO_CLEANIT|PGO_SYNCIO|PGO_FREE); } if (!error) { - s = splbio(); + mutex_enter(&bufcache_lock); for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { nbp = LIST_NEXT(bp, b_vnbufs); - simple_lock(&bp->b_interlock); - splx(s); - if ((bp->b_flags & (B_DELWRI|B_BUSY)) != B_DELWRI) - panic("ffs_snapshot: not dirty or busy, bp %p", - bp); - bp->b_flags |= B_BUSY|B_VFLUSH; + bp->b_cflags |= BC_BUSY|BC_VFLUSH; if (LIST_FIRST(&bp->b_dep) == NULL) - bp->b_flags |= B_NOCACHE; - simple_unlock(&bp->b_interlock); + bp->b_cflags |= BC_NOCACHE; + mutex_exit(&bufcache_lock); bwrite(bp); - s = splbio(); + mutex_enter(&bufcache_lock); } - simple_lock(&global_v_numoutput_slock); - while (vp->v_numoutput) { - vp->v_iflag |= VI_BWAIT; - ltsleep((void *)&vp->v_numoutput, PRIBIO+1, - "snapflushbuf", 0, &global_v_numoutput_slock); - } - simple_unlock(&global_v_numoutput_slock); - splx(s); + mutex_exit(&bufcache_lock); + + mutex_enter(&vp->v_interlock); + while (vp->v_numoutput > 0) + cv_wait(&vp->v_cv, &vp->v_interlock); + mutex_exit(&vp->v_interlock); } if (sbbuf) free(sbbuf, M_UFSMNT); @@ -958,7 +960,7 @@ indiracct_ufs1(struct vnode *snapvp, struct vnode *cancelvp, int level, */ bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0); bp->b_blkno = fsbtodb(fs, blkno); - if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 && + if ((bp->b_oflags & (BO_DONE | BO_DELWRI)) == 0 && (error = readfsblk(bp->b_vp, bp->b_data, fragstoblks(fs, blkno)))) { brelse(bp, 0); return (error); @@ -1226,7 +1228,7 @@ indiracct_ufs2(struct vnode *snapvp, struct vnode *cancelvp, int level, */ bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0); bp->b_blkno = fsbtodb(fs, blkno); - if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 && + if ((bp->b_oflags & (BO_DONE | BO_DELWRI)) == 0 && (error = readfsblk(bp->b_vp, bp->b_data, fragstoblks(fs, blkno)))) { brelse(bp, 0); return (error); @@ -2063,7 +2065,7 @@ readfsblk(struct vnode *vp, void *data, ufs2_daddr_t lbn) struct fs *fs = ip->i_fs; struct buf *nbp; - nbp = getiobuf(); + nbp = getiobuf(NULL, true); nbp->b_flags = B_READ; nbp->b_bcount = nbp->b_bufsize = fs->fs_bsize; nbp->b_error = 0; @@ -2071,7 +2073,6 @@ readfsblk(struct vnode *vp, void *data, ufs2_daddr_t lbn) nbp->b_blkno = nbp->b_rawblkno = fsbtodb(fs, blkstofrags(fs, lbn)); nbp->b_proc = NULL; nbp->b_dev = ip->i_devvp->v_rdev; - nbp->b_vp = NULLVP; bdev_strategy(nbp); @@ -2101,7 +2102,7 @@ readvnblk(struct vnode *vp, void *data, ufs2_daddr_t lbn) if (bn != (daddr_t)-1) { offset = dbtob(bn); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); error = VOP_PUTPAGES(vp, trunc_page(offset), round_page(offset+fs->fs_bsize), PGO_CLEANIT|PGO_SYNCIO|PGO_FREE); @@ -2131,7 +2132,7 @@ writevnblk(struct vnode *vp, void *data, ufs2_daddr_t lbn) offset = lblktosize(fs, (off_t)lbn); s = cow_enter(); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); error = VOP_PUTPAGES(vp, trunc_page(offset), round_page(offset+fs->fs_bsize), PGO_CLEANIT|PGO_SYNCIO|PGO_FREE); if (error == 0) @@ -2142,7 +2143,10 @@ writevnblk(struct vnode *vp, void *data, ufs2_daddr_t lbn) return error; bcopy(data, bp->b_data, fs->fs_bsize); - bp->b_flags |= B_NOCACHE; + mutex_enter(&bufcache_lock); + /* XXX Shouldn't need to lock for this, NOCACHE is only read later. */ + bp->b_cflags |= BC_NOCACHE; + mutex_exit(&bufcache_lock); return bwrite(bp); } diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c index 713fff1a5d8d..3dfdc44e45aa 100644 --- a/sys/ufs/ffs/ffs_softdep.c +++ b/sys/ufs/ffs/ffs_softdep.c @@ -1,4 +1,4 @@ -/* $NetBSD: ffs_softdep.c,v 1.102 2007/12/08 19:29:55 pooka Exp $ */ +/* $NetBSD: ffs_softdep.c,v 1.103 2008/01/02 11:49:09 ad Exp $ */ /* * Copyright 1998 Marshall Kirk McKusick. All Rights Reserved. @@ -33,7 +33,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ffs_softdep.c,v 1.102 2007/12/08 19:29:55 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ffs_softdep.c,v 1.103 2008/01/02 11:49:09 ad Exp $"); #include #include @@ -62,11 +62,8 @@ __KERNEL_RCSID(0, "$NetBSD: ffs_softdep.c,v 1.102 2007/12/08 19:29:55 pooka Exp #include -static struct pool sdpcpool; u_int softdep_lockedbufs; -extern struct simplelock bqueue_slock; /* XXX */ - MALLOC_JUSTDEFINE(M_PAGEDEP, "pagedep", "file page dependencies"); MALLOC_JUSTDEFINE(M_INODEDEP, "inodedep", "Inode depependencies"); MALLOC_JUSTDEFINE(M_NEWBLK, "newblk", "New block allocation"); @@ -131,13 +128,14 @@ LIST_HEAD(, buf) pcbphashhead[PCBPHASHSIZE]; * Internal function prototypes. */ static void softdep_error(const char *, int); -static void drain_output(struct vnode *, int); +static void drain_output(struct vnode *); static int getdirtybuf(struct buf **, int); static void clear_remove(struct lwp *); static void clear_inodedeps(struct lwp *); static int flush_pagedep_deps(struct vnode *, struct mount *, struct diraddhd *); static int flush_inodedep_deps(struct fs *, ino_t); +static int flush_deplist(struct allocdirectlst *, int, int *); static int handle_written_filepage(struct pagedep *, struct buf *); static void diradd_inode_written(struct diradd *, struct inodedep *); static int handle_written_inodeblock(struct inodedep *, struct buf *); @@ -180,7 +178,7 @@ static int inodedep_lookup(struct fs *, ino_t, int, struct inodedep **); static int pagedep_lookup(struct inode *, daddr_t, int, struct pagedep **); static void pause_timer(void *); -static int request_cleanup(int, int); +static int request_cleanup(int); static void add_to_worklist(struct worklist *); static struct buf *softdep_setup_pagecache(struct inode *, daddr_t, long); @@ -188,9 +186,7 @@ static void softdep_collect_pagecache(struct inode *); static void softdep_free_pagecache(struct inode *); static struct vnode *softdep_lookupvp(struct fs *, ino_t); static struct buf *softdep_lookup_pcbp(struct vnode *, daddr_t); -#ifdef UVMHIST void softdep_pageiodone1(struct buf *); -#endif void softdep_pageiodone(struct buf *); void softdep_flush_vnode(struct vnode *, daddr_t); static void softdep_trackbufs(int, bool); @@ -220,149 +216,46 @@ static struct bio_ops bioops_softdep = { softdep_pageiodone, /* io_pageiodone */ }; -/* - * Locking primitives. - * - * For a uniprocessor, all we need to do is protect against disk - * interrupts. For a multiprocessor, this lock would have to be - * a mutex. A single mutex is used throughout this file, though - * finer grain locking could be used if contention warranted it. - * - * For a multiprocessor, the sleep call would accept a lock and - * release it after the sleep processing was complete. In a uniprocessor - * implementation there is no such interlock, so we simple mark - * the places where it needs to be done with the `interlocked' form - * of the lock calls. Since the uniprocessor sleep already interlocks - * the spl, there is nothing that really needs to be done. - */ -#ifndef /* NOT */ DEBUG -static struct lockit { - int lkt_spl; -} lk = { 0 }; -#define ACQUIRE_LOCK(lk) (lk)->lkt_spl = splbio() -#define FREE_LOCK(lk) splx((lk)->lkt_spl) -#define ACQUIRE_LOCK_INTERLOCKED(lk, s) (lk)->lkt_spl = s -#define FREE_LOCK_INTERLOCKED(lk) (lk)->lkt_spl - -#else /* DEBUG */ -static struct lockit { - int lkt_spl; - lwp_t *lkt_held; -} lk = { 0, NULL }; -static int lockcnt; - -static void acquire_lock(struct lockit *); -static void free_lock(struct lockit *); -static void acquire_lock_interlocked(struct lockit *, int); -static int free_lock_interlocked(struct lockit *); - -#define ACQUIRE_LOCK(lk) acquire_lock(lk) -#define FREE_LOCK(lk) free_lock(lk) -#define ACQUIRE_LOCK_INTERLOCKED(lk, s) acquire_lock_interlocked(lk, s) -#define FREE_LOCK_INTERLOCKED(lk) free_lock_interlocked(lk) - -static void -acquire_lock(lkp) - struct lockit *lkp; -{ - if (lkp->lkt_held != NULL) { - if (lkp->lkt_held == curlwp) - panic("softdep_lock: locking against myself"); - else - panic("softdep_lock: lock held by %p", lkp->lkt_held); - } - lkp->lkt_spl = splbio(); - lkp->lkt_held = curlwp; - lockcnt++; -} - -static void -free_lock(lkp) - struct lockit *lkp; -{ - - if (lkp->lkt_held == NULL) - panic("softdep_unlock: lock not held"); - lkp->lkt_held = NULL; - splx(lkp->lkt_spl); -} - -static void -acquire_lock_interlocked(lkp, s) - struct lockit *lkp; - int s; -{ - if (lkp->lkt_held != NULL) { - if (lkp->lkt_held == curlwp) - panic("softdep_lock_interlocked: locking against self"); - else - panic("softdep_lock_interlocked: lock held by %p", - lkp->lkt_held); - } - lkp->lkt_spl = s; - lkp->lkt_held = curlwp; - lockcnt++; -} - -static int -free_lock_interlocked(lkp) - struct lockit *lkp; -{ - if (lkp->lkt_held == NULL) - panic("softdep_unlock_interlocked: lock not held"); - lkp->lkt_held = NULL; - return lkp->lkt_spl; -} -#endif /* DEBUG */ +static kcondvar_t softdep_tb_cv; /* * Place holder for real semaphores. */ struct sema { - lwp_t *holder; - const char *name; + kcondvar_t cv; int value; - int prio; + struct lwp *holder; int timo; }; -static void sema_init(struct sema *, const char *, int, int); -static int sema_get(struct sema *, struct lockit *); +static void sema_init(struct sema *, const char *, int); +static int sema_get(struct sema *, kmutex_t *); static void sema_release(struct sema *); static void -sema_init(semap, name, prio, timo) +sema_init(semap, name, timo) struct sema *semap; const char *name; - int prio, timo; + int timo; { semap->holder = NULL; semap->value = 0; - semap->name = name; - semap->prio = prio; semap->timo = timo; + cv_init(&semap->cv, name); } static int sema_get(semap, interlock) struct sema *semap; - struct lockit *interlock; + kmutex_t *interlock; { - int s = 0; /* Quell an uninitialized variable warning */ if (semap->value++ > 0) { - if (interlock != NULL) - s = FREE_LOCK_INTERLOCKED(interlock); - tsleep((void *)semap, semap->prio, semap->name, semap->timo); - if (interlock != NULL) { - ACQUIRE_LOCK_INTERLOCKED(interlock, s); - FREE_LOCK(interlock); - } + cv_wait(&semap->cv, interlock); return (0); } semap->holder = curlwp; - if (interlock != NULL) - FREE_LOCK(interlock); + mutex_exit(interlock); return (1); } @@ -375,7 +268,7 @@ sema_release(semap) panic("sema_release: not held"); if (--semap->value > 0) { semap->value = 0; - wakeup(semap); + cv_broadcast(&semap->cv); } semap->holder = NULL; } @@ -384,118 +277,100 @@ sema_release(semap) * Memory management. */ -static struct pool pagedep_pool; -static struct pool inodedep_pool; -static struct pool newblk_pool; -static struct pool bmsafemap_pool; -static struct pool allocdirect_pool; -static struct pool indirdep_pool; -static struct pool allocindir_pool; -static struct pool freefrag_pool; -static struct pool freeblks_pool; -static struct pool freefile_pool; -static struct pool diradd_pool; -static struct pool mkdir_pool; -static struct pool dirrem_pool; -static struct pool newdirblk_pool; +static pool_cache_t softdep_small_cache; +static pool_cache_t softdep_medium_cache; +static pool_cache_t softdep_large_cache; -static inline void +static inline void * +softdep_alloc(const int type) +{ + + switch (type) { + case D_BMSAFEMAP: + case D_INDIRDEP: + case D_FREEFRAG: + case D_FREEFILE: + case D_DIRADD: + case D_MKDIR: + case D_DIRREM: + case D_NEWDIRBLK: + return pool_cache_get(softdep_small_cache, PR_WAITOK); + + case D_ALLOCDIRECT: + case D_ALLOCINDIR: + case D_PAGEDEP: + case D_INODEDEP: + return pool_cache_get(softdep_medium_cache, PR_WAITOK); + + case D_FREEBLKS: + return pool_cache_get(softdep_large_cache, PR_WAITOK); + + default: + panic("softdep_alloc"); + } +} + +static void softdep_free(struct worklist *item, int type) { switch (type) { - - case D_PAGEDEP: - pool_put(&pagedep_pool, item); - return; - - case D_INODEDEP: - pool_put(&inodedep_pool, item); - return; - case D_BMSAFEMAP: - pool_put(&bmsafemap_pool, item); + case D_INDIRDEP: + case D_FREEFRAG: + case D_FREEFILE: + case D_DIRADD: + case D_MKDIR: + case D_DIRREM: + case D_NEWDIRBLK: + pool_cache_put(softdep_small_cache, item); return; case D_ALLOCDIRECT: - pool_put(&allocdirect_pool, item); - return; - - case D_INDIRDEP: - pool_put(&indirdep_pool, item); - return; - case D_ALLOCINDIR: - pool_put(&allocindir_pool, item); + case D_PAGEDEP: + case D_INODEDEP: + pool_cache_put(softdep_medium_cache, item); return; - case D_FREEFRAG: - pool_put(&freefrag_pool, item); - return; case D_FREEBLKS: - pool_put(&freeblks_pool, item); + pool_cache_put(softdep_large_cache, item); return; - - case D_FREEFILE: - pool_put(&freefile_pool, item); - return; - - case D_DIRADD: - pool_put(&diradd_pool, item); - return; - - case D_MKDIR: - pool_put(&mkdir_pool, item); - return; - - case D_DIRREM: - pool_put(&dirrem_pool, item); - return; - - case D_NEWDIRBLK: - pool_put(&newdirblk_pool, item); - return; - } panic("softdep_free: unknown type %d", type); } +static kmutex_t freequeue_lock; struct workhead softdep_freequeue; -static inline void -softdep_freequeue_add(struct worklist *item) -{ - int s; - - s = splbio(); - LIST_INSERT_HEAD(&softdep_freequeue, item, wk_list); - splx(s); -} - -static inline void +static void softdep_freequeue_process(void) { struct worklist *wk; - while ((wk = LIST_FIRST(&softdep_freequeue)) != NULL) { + while (!LIST_EMPTY(&softdep_freequeue)) { + mutex_enter(&freequeue_lock); + if ((wk = LIST_FIRST(&softdep_freequeue)) == NULL) { + mutex_exit(&freequeue_lock); + return; + } LIST_REMOVE(wk, wk_list); - FREE_LOCK(&lk); + mutex_exit(&freequeue_lock); softdep_free(wk, wk->wk_type); - ACQUIRE_LOCK(&lk); } } static char emerginoblk[MAXBSIZE]; static int emerginoblk_inuse; static const struct buf *emerginoblk_origbp; -static struct simplelock emerginoblk_slock = SIMPLELOCK_INITIALIZER; +static kmutex_t emerginoblk_lock; +static kcondvar_t emerginoblk_cv; -static inline void * +static void * inodedep_allocdino(struct inodedep *inodedep, const struct buf *origbp, size_t size) { void *vp; - int s; KASSERT(inodedep->id_savedino1 == NULL); @@ -506,17 +381,14 @@ inodedep_allocdino(struct inodedep *inodedep, const struct buf *origbp, if (vp) return vp; - s = splbio(); - simple_lock(&emerginoblk_slock); + mutex_enter(&emerginoblk_lock); while (emerginoblk_inuse && emerginoblk_origbp != origbp) - ltsleep(&emerginoblk_inuse, PVM, "emdino", 0, - &emerginoblk_slock); + cv_wait(&emerginoblk_cv, &emerginoblk_lock); emerginoblk_origbp = origbp; emerginoblk_inuse++; KASSERT(emerginoblk_inuse <= sizeof(emerginoblk) / MIN(sizeof(struct ufs1_dinode), sizeof(struct ufs2_dinode))); - simple_unlock(&emerginoblk_slock); - splx(s); + mutex_exit(&emerginoblk_lock); KASSERT(inodedep->id_savedino1 == NULL); @@ -528,7 +400,7 @@ inodedep_allocdino(struct inodedep *inodedep, const struct buf *origbp, return vp; } -static inline void +static void inodedep_freedino(struct inodedep *inodedep) { void *vp = inodedep->id_savedino1; @@ -537,17 +409,12 @@ inodedep_freedino(struct inodedep *inodedep) KASSERT(vp != NULL); if (__predict_false((void *)&emerginoblk[0] <= vp && vp < (void *)&emerginoblk[MAXBSIZE])) { - int s; - KASSERT(emerginoblk_inuse > 0); - s = splbio(); - simple_lock(&emerginoblk_slock); + mutex_enter(&emerginoblk_lock); emerginoblk_inuse--; if (emerginoblk_inuse == 0) - wakeup(&emerginoblk_inuse); - simple_unlock(&emerginoblk_slock); - splx(s); - + cv_broadcast(&emerginoblk_cv); + mutex_exit(&emerginoblk_lock); return; } @@ -556,39 +423,31 @@ inodedep_freedino(struct inodedep *inodedep) /* * Worklist queue management. - * These routines require that the lock be held. */ -#ifndef /* NOT */ DEBUG -#define WORKLIST_INSERT(head, item) do { \ - (item)->wk_state |= ONWORKLIST; \ - LIST_INSERT_HEAD(head, item, wk_list); \ -} while (0) -#define WORKLIST_REMOVE(item) do { \ - (item)->wk_state &= ~ONWORKLIST; \ - LIST_REMOVE(item, wk_list); \ -} while (0) -#define WORKITEM_FREE(item, type) \ - softdep_freequeue_add((struct worklist *)item) - -#else /* DEBUG */ -static void worklist_insert(struct workhead *, struct worklist *); -static void worklist_remove(struct worklist *); -static void workitem_free(struct worklist *, int); - -#define WORKLIST_INSERT(head, item) worklist_insert(head, item) -#define WORKLIST_REMOVE(item) worklist_remove(item) -#define WORKITEM_FREE(item, type) workitem_free((struct worklist *)item, type) +static void worklist_insert(struct workhead *, struct worklist *); +static void worklist_remove(struct worklist *); +static void workitem_free(void *, int); static void worklist_insert(head, item) struct workhead *head; struct worklist *item; { +#ifdef DIAGNOSTIC + struct worklist *test; + + if (item->wk_type == D_FREEFILE) { + LIST_FOREACH(test, head, wk_list) { + if (test->wk_type == D_FREEFILE) { + panic("worklist_insert: freefile"); + } + } + } +#endif + + KASSERT(mutex_owned(&bufcache_lock)); + KASSERT((item->wk_state & ONWORKLIST) == 0); - if (lk.lkt_held == NULL) - panic("worklist_insert: lock not held"); - if (item->wk_state & ONWORKLIST) - panic("worklist_insert: already on list"); item->wk_state |= ONWORKLIST; LIST_INSERT_HEAD(head, item, wk_list); } @@ -598,23 +457,24 @@ worklist_remove(item) struct worklist *item; { - if (lk.lkt_held == NULL) - panic("worklist_remove: lock not held"); - if ((item->wk_state & ONWORKLIST) == 0) - panic("worklist_remove: not on list"); + KASSERT(mutex_owned(&bufcache_lock)); + KASSERT((item->wk_state & ONWORKLIST) != 0); + item->wk_state &= ~ONWORKLIST; LIST_REMOVE(item, wk_list); } static void -workitem_free(struct worklist *item, int type) +workitem_free(void *object, int type) { + struct worklist *item = object; - if (item->wk_state & ONWORKLIST) - panic("workitem_free: still on list"); - softdep_freequeue_add(item); + KASSERT((item->wk_state & ONWORKLIST) == 0); + + mutex_enter(&freequeue_lock); + LIST_INSERT_HEAD(&softdep_freequeue, item, wk_list); + mutex_exit(&freequeue_lock); } -#endif /* DEBUG */ /* * Workitem queue management @@ -623,9 +483,11 @@ static struct workhead softdep_workitem_pending; static struct worklist *worklist_tail; static int softdep_worklist_busy; /* 1 => trying to do unmount */ static int softdep_worklist_req; /* serialized waiters */ +static kcondvar_t softdep_worklist_cv; static int max_softdeps; /* maximum number of structs before slowdown */ static int tickdelay = 2; /* number of ticks to pause during slowdown */ static int proc_waiting; /* tracks whether we have a timeout posted */ +static kcondvar_t proc_wait_cv; static callout_t pause_timer_ch; static lwp_t *filesys_syncer; /* filesystem syncer thread */ static int req_clear_inodedeps; /* syncer process flush some inodedeps */ @@ -703,9 +565,8 @@ softdep_process_worklist(matchmnt) * First process any items on the delayed-free queue. */ - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); softdep_freequeue_process(); - FREE_LOCK(&lk); /* * Record the process identifier of our caller so that we can give @@ -713,6 +574,7 @@ softdep_process_worklist(matchmnt) */ filesys_syncer = l; matchcnt = 0; + /* * There is no danger of having multiple processes run this * code. It is single threaded solely so that softdep_flushfiles @@ -720,8 +582,10 @@ softdep_process_worklist(matchmnt) * related to its mount point that are in the list. */ if (matchmnt == NULL) { - if (softdep_worklist_busy < 0) + if (softdep_worklist_busy < 0) { + mutex_exit(&bufcache_lock); return (-1); + } softdep_worklist_busy += 1; } @@ -731,14 +595,13 @@ softdep_process_worklist(matchmnt) if (req_clear_inodedeps) { clear_inodedeps(l); req_clear_inodedeps = 0; - wakeup(&proc_waiting); + cv_broadcast(&proc_wait_cv); } if (req_clear_remove) { clear_remove(l); req_clear_remove = 0; - wakeup(&proc_waiting); + cv_broadcast(&proc_wait_cv); } - ACQUIRE_LOCK(&lk); while ((wk = LIST_FIRST(&softdep_workitem_pending)) != 0) { /* * Remove the item to be processed. If we are removing the last @@ -747,14 +610,14 @@ softdep_process_worklist(matchmnt) * we just run down the list to find it rather than tracking it * in the above loop. */ - WORKLIST_REMOVE(wk); + worklist_remove(wk); if (wk == worklist_tail) { LIST_FOREACH(wkend, &softdep_workitem_pending, wk_list) if (LIST_NEXT(wkend, wk_list) == NULL) break; worklist_tail = wkend; } - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); switch (wk->wk_type) { case D_DIRREM: @@ -794,13 +657,13 @@ softdep_process_worklist(matchmnt) TYPENAME(wk->wk_type)); /* NOTREACHED */ } + mutex_enter(&bufcache_lock); /* * If a umount operation wants to run the worklist * accurately, abort. */ if (softdep_worklist_req && matchmnt == NULL) { - ACQUIRE_LOCK(&lk); matchcnt = -1; break; } @@ -810,27 +673,26 @@ softdep_process_worklist(matchmnt) if (req_clear_inodedeps) { clear_inodedeps(l); req_clear_inodedeps = 0; - wakeup(&proc_waiting); + cv_broadcast(&proc_wait_cv); } if (req_clear_remove) { clear_remove(l); req_clear_remove = 0; - wakeup(&proc_waiting); + cv_broadcast(&proc_wait_cv); } /* * Process any new items on the delayed-free queue. */ - ACQUIRE_LOCK(&lk); softdep_freequeue_process(); } if (matchmnt == NULL) { softdep_worklist_busy -= 1; if (softdep_worklist_req && softdep_worklist_busy == 0) - wakeup(&softdep_worklist_req); + cv_broadcast(&softdep_worklist_cv); } - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); return (matchcnt); } @@ -844,10 +706,10 @@ softdep_move_dependencies(oldbp, newbp) { struct worklist *wk, *wktail; + mutex_enter(&bufcache_lock); if (LIST_FIRST(&newbp->b_dep) != NULL) panic("softdep_move_dependencies: need merge code"); wktail = 0; - ACQUIRE_LOCK(&lk); while ((wk = LIST_FIRST(&oldbp->b_dep)) != NULL) { LIST_REMOVE(wk, wk_list); if (wktail == 0) @@ -856,7 +718,7 @@ softdep_move_dependencies(oldbp, newbp) LIST_INSERT_AFTER(wktail, wk, wk_list); wktail = wk; } - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); } /* @@ -877,12 +739,15 @@ softdep_flushworklist(oldmnt, countp, l) /* * Await our turn to clear out the queue. */ + mutex_enter(&bufcache_lock); while (softdep_worklist_busy) { softdep_worklist_req += 1; - tsleep(&softdep_worklist_req, PRIBIO, "softflush", 0); + cv_wait(&softdep_worklist_cv, &bufcache_lock); softdep_worklist_req -= 1; } softdep_worklist_busy = -1; + mutex_exit(&bufcache_lock); + /* * Alternately flush the block device associated with the mount * point and process any dependencies that the flushing @@ -899,9 +764,13 @@ softdep_flushworklist(oldmnt, countp, l) if (error) break; } + + mutex_enter(&bufcache_lock); softdep_worklist_busy = 0; if (softdep_worklist_req) - wakeup(&softdep_worklist_req); + cv_broadcast(&softdep_worklist_cv); + mutex_exit(&bufcache_lock); + return (error); } @@ -985,7 +854,6 @@ static struct sema pagedep_in_progress; * when asked to allocate but not associated with any buffer. * If not found, allocate if DEPALLOC flag is passed. * Found or allocated entry is returned in pagedeppp. - * This routine must be called with splbio interrupts blocked. */ static int pagedep_lookup(ip, lbn, flags, pagedeppp) @@ -999,10 +867,8 @@ pagedep_lookup(ip, lbn, flags, pagedeppp) struct mount *mp; int i; -#ifdef DEBUG - if (lk.lkt_held == NULL) - panic("pagedep_lookup: lock not held"); -#endif + KASSERT(mutex_owned(&bufcache_lock)); + mp = ITOV(ip)->v_mount; pagedephd = &pagedep_hashtbl[PAGEDEP_HASH(mp, ip->i_number, lbn)]; top: @@ -1023,11 +889,9 @@ top: *pagedeppp = NULL; return (0); } - if (sema_get(&pagedep_in_progress, &lk) == 0) { - ACQUIRE_LOCK(&lk); + if (sema_get(&pagedep_in_progress, &bufcache_lock) == 0) goto top; - } - pagedep = pool_get(&pagedep_pool, PR_WAITOK); + pagedep = pool_cache_get(softdep_medium_cache, PR_WAITOK); bzero(pagedep, sizeof(struct pagedep)); pagedep->pd_list.wk_type = D_PAGEDEP; pagedep->pd_mnt = mp; @@ -1037,7 +901,7 @@ top: LIST_INIT(&pagedep->pd_pendinghd); for (i = 0; i < DAHASHSZ; i++) LIST_INIT(&pagedep->pd_diraddhd[i]); - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); LIST_INSERT_HEAD(pagedephd, pagedep, pd_hash); sema_release(&pagedep_in_progress); *pagedeppp = pagedep; @@ -1058,7 +922,6 @@ static struct sema inodedep_in_progress; * Look up a inodedep. Return 1 if found, 0 if not found. * If not found, allocate if DEPALLOC flag is passed. * Found or allocated entry is returned in inodedeppp. - * This routine must be called with splbio interrupts blocked. */ static int inodedep_lookup(fs, inum, flags, inodedeppp) @@ -1071,10 +934,8 @@ inodedep_lookup(fs, inum, flags, inodedeppp) struct inodedep_hashhead *inodedephd; int firsttry; -#ifdef DEBUG - if (lk.lkt_held == NULL) - panic("inodedep_lookup: lock not held"); -#endif + KASSERT(mutex_owned(&bufcache_lock)); + firsttry = 1; inodedephd = &inodedep_hashtbl[INODEDEP_HASH(fs, inum)]; top: @@ -1094,16 +955,14 @@ top: * If we are over our limit, try to improve the situation. */ if (num_inodedep > max_softdeps && firsttry && speedup_syncer() == 0 && - request_cleanup(FLUSH_INODES, 1)) { + request_cleanup(FLUSH_INODES)) { firsttry = 0; goto top; } - if (sema_get(&inodedep_in_progress, &lk) == 0) { - ACQUIRE_LOCK(&lk); + if (sema_get(&inodedep_in_progress, &bufcache_lock) == 0) goto top; - } num_inodedep += 1; - inodedep = pool_get(&inodedep_pool, PR_WAITOK); + inodedep = softdep_alloc(D_INODEDEP); inodedep->id_list.wk_type = D_INODEDEP; inodedep->id_fs = fs; inodedep->id_ino = inum; @@ -1117,7 +976,7 @@ top: LIST_INIT(&inodedep->id_bufwait); TAILQ_INIT(&inodedep->id_inoupdt); TAILQ_INIT(&inodedep->id_newinoupdt); - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); LIST_INSERT_HEAD(inodedephd, inodedep, id_hash); sema_release(&inodedep_in_progress); *inodedeppp = inodedep; @@ -1149,6 +1008,8 @@ newblk_lookup(fs, newblkno, flags, newblkpp) struct newblk *newblk; struct newblk_hashhead *newblkhd; + KASSERT(mutex_owned(&bufcache_lock)); + newblkhd = NEWBLK_HASH(fs, newblkno); top: for (newblk = LIST_FIRST(newblkhd); newblk; @@ -1163,12 +1024,13 @@ top: *newblkpp = NULL; return (0); } - if (sema_get(&newblk_in_progress, 0) == 0) + if (sema_get(&newblk_in_progress, &bufcache_lock) == 0) goto top; - newblk = pool_get(&newblk_pool, PR_WAITOK); + newblk = pool_cache_get(softdep_small_cache, PR_WAITOK); newblk->nb_state = 0; newblk->nb_fs = fs; newblk->nb_newblkno = newblkno; + mutex_enter(&bufcache_lock); LIST_INSERT_HEAD(newblkhd, newblk, nb_hash); sema_release(&newblk_in_progress); *newblkpp = newblk; @@ -1184,56 +1046,71 @@ softdep_initialize() { int i; + mutex_init(&emerginoblk_lock, MUTEX_DEFAULT, IPL_NONE); + mutex_init(&freequeue_lock, MUTEX_DEFAULT, IPL_NONE); + cv_init(&softdep_tb_cv, "softdbuf"); + cv_init(&proc_wait_cv, "softdep"); + cv_init(&emerginoblk_cv, "emdino"); + cv_init(&softdep_worklist_cv, "softflsh"); + callout_init(&pause_timer_ch, CALLOUT_MPSAFE); + bioopsp = &bioops_softdep; malloc_type_attach(M_PAGEDEP); malloc_type_attach(M_INODEDEP); malloc_type_attach(M_NEWBLK); - callout_init(&pause_timer_ch, CALLOUT_MPSAFE); - pool_init(&sdpcpool, sizeof(struct buf), 0, 0, 0, "sdpcpool", - &pool_allocator_nointr, IPL_NONE); - pool_init(&pagedep_pool, sizeof(struct pagedep), 0, 0, 0, "pagedeppl", - &pool_allocator_nointr, IPL_NONE); - pool_init(&inodedep_pool, sizeof(struct inodedep), 0, 0, 0,"inodedeppl", - &pool_allocator_nointr, IPL_NONE); - pool_init(&newblk_pool, sizeof(struct newblk), 0, 0, 0, "newblkpl", - &pool_allocator_nointr, IPL_NONE); - pool_init(&bmsafemap_pool, sizeof(struct bmsafemap), 0, 0, 0, - "bmsafemappl", &pool_allocator_nointr, IPL_NONE); - pool_init(&allocdirect_pool, sizeof(struct allocdirect), 0, 0, 0, - "allocdirectpl", &pool_allocator_nointr, IPL_NONE); - pool_init(&indirdep_pool, sizeof(struct indirdep), 0, 0, 0,"indirdeppl", - &pool_allocator_nointr, IPL_NONE); - pool_init(&allocindir_pool, sizeof(struct allocindir), 0, 0, 0, - "allocindirpl", &pool_allocator_nointr, IPL_NONE); - pool_init(&freefrag_pool, sizeof(struct freefrag), 0, 0, 0, - "freefragpl", &pool_allocator_nointr, IPL_NONE); - pool_init(&freeblks_pool, sizeof(struct freeblks), 0, 0, 0, - "freeblkspl", &pool_allocator_nointr, IPL_NONE); - pool_init(&freefile_pool, sizeof(struct freefile), 0, 0, 0, - "freefilepl", &pool_allocator_nointr, IPL_NONE); - pool_init(&diradd_pool, sizeof(struct diradd), 0, 0, 0, "diraddpl", - &pool_allocator_nointr, IPL_NONE); - pool_init(&mkdir_pool, sizeof(struct mkdir), 0, 0, 0, "mkdirpl", - &pool_allocator_nointr, IPL_NONE); - pool_init(&dirrem_pool, sizeof(struct dirrem), 0, 0, 0, "dirrempl", - &pool_allocator_nointr, IPL_NONE); - pool_init(&newdirblk_pool, sizeof (struct newdirblk), 0, 0, 0, - "newdirblkpl", &pool_allocator_nointr, IPL_NONE); + i = sizeof(struct freeblks); + if (i < sizeof(struct buf)) + i = sizeof(struct buf); + softdep_large_cache = pool_cache_init(i, 0, 0, 0, "sdeplarge", NULL, + IPL_NONE, NULL, NULL, NULL); + KASSERT(softdep_large_cache != NULL); /* XXX */ + + i = sizeof(struct allocdirect); + if (i < sizeof(struct allocindir)) + i = sizeof(struct allocindir); + if (i < sizeof(struct pagedep)) + i = sizeof(struct pagedep); + if (i < sizeof(struct inodedep)) + i = sizeof(struct inodedep); + softdep_medium_cache = pool_cache_init(i, 0, 0, 0, "sdepmedium", NULL, + IPL_NONE, NULL, NULL, NULL); + KASSERT(softdep_medium_cache != NULL); /* XXX */ + + i = sizeof(struct newblk); + if (i < sizeof(struct bmsafemap)) + i = sizeof(struct bmsafemap); + if (i < sizeof(struct indirdep)) + i = sizeof(struct indirdep); + if (i < sizeof(struct freefrag)) + i = sizeof(struct freefrag); + if (i < sizeof(struct freefile)) + i = sizeof(struct freefile); + if (i < sizeof(struct diradd)) + i = sizeof(struct diradd); + if (i < sizeof(struct mkdir)) + i = sizeof(struct mkdir); + if (i < sizeof(struct dirrem)) + i = sizeof(struct dirrem); + if (i < sizeof(struct newdirblk)) + i = sizeof(struct newdirblk); + softdep_small_cache = pool_cache_init(i, 0, 0, 0, "sdepsmall", NULL, + IPL_NONE, NULL, NULL, NULL); + KASSERT(softdep_small_cache != NULL); /* XXX */ LIST_INIT(&mkdirlisthd); LIST_INIT(&softdep_workitem_pending); max_softdeps = desiredvnodes / 4; pagedep_hashtbl = hashinit(max_softdeps / 2, HASH_LIST, M_PAGEDEP, M_WAITOK, &pagedep_hash); - sema_init(&pagedep_in_progress, "pagedep", PRIBIO, 0); + sema_init(&pagedep_in_progress, "pagedep", 0); inodedep_hashtbl = hashinit(max_softdeps / 2, HASH_LIST, M_INODEDEP, M_WAITOK, &inodedep_hash); - sema_init(&inodedep_in_progress, "inodedep", PRIBIO, 0); + sema_init(&inodedep_in_progress, "inodedep", 0); newblk_hashtbl = hashinit(64, HASH_LIST, M_NEWBLK, M_WAITOK, &newblk_hash); - sema_init(&newblk_in_progress, "newblk", PRIBIO, 0); + sema_init(&newblk_in_progress, "newblk", 0); for (i = 0; i < PCBPHASHSIZE; i++) { LIST_INIT(&pcbphashhead[i]); } @@ -1259,7 +1136,7 @@ softdep_reinitialize() max_softdeps = desiredvnodes * 4; - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); oldhash1 = pagedep_hashtbl; oldmask1 = pagedep_hash; pagedep_hashtbl = hash1; @@ -1283,7 +1160,7 @@ softdep_reinitialize() LIST_INSERT_HEAD(&hash2[val], inodedep, id_hash); } } - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); hashdone(oldhash1, M_PAGEDEP); hashdone(oldhash2, M_INODEDEP); } @@ -1312,7 +1189,8 @@ softdep_mount(devvp, mp, fs, cred) int needswap = UFS_FSNEEDSWAP(fs); #endif - mp->mnt_flag &= ~MNT_ASYNC; + mp->mnt_flag &= ~MNT_ASYNC; /* XXXSMP */ + /* * When doing soft updates, the counters in the * superblock may have gotten out of sync, so we have @@ -1321,6 +1199,7 @@ softdep_mount(devvp, mp, fs, cred) if ((fs->fs_clean & FS_ISCLEAN) || (fs->fs_fmod != 0 && (fs->fs_clean & FS_WASCLEAN))) return (0); + bzero(&cstotal, sizeof cstotal); for (cyl = 0; cyl < fs->fs_ncg; cyl++) { if ((error = bread(devvp, fsbtodb(fs, cgtod(fs, cyl)), @@ -1344,6 +1223,12 @@ softdep_mount(devvp, mp, fs, cred) return (0); } +void +softdep_unmount(struct mount *mp) +{ + +} + /* * Protecting the freemaps (or bitmaps). * @@ -1394,14 +1279,14 @@ softdep_setup_inomapdep(bp, ip, newinum) * Otherwise add it to the dependency list for the buffer holding * the cylinder group map from which it was allocated. */ - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); if (inodedep_lookup(ip->i_fs, newinum, DEPALLOC, &inodedep) != 0) panic("softdep_setup_inomapdep: found inode"); inodedep->id_buf = bp; inodedep->id_state &= ~DEPCOMPLETE; bmsafemap = bmsafemap_lookup(bp); LIST_INSERT_HEAD(&bmsafemap->sm_inodedephd, inodedep, id_deps); - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); } /* @@ -1422,19 +1307,18 @@ softdep_setup_blkmapdep(bp, fs, newblkno) * Add it to the dependency list for the buffer holding * the cylinder group map from which it was allocated. */ + mutex_enter(&bufcache_lock); if (newblk_lookup(fs, newblkno, DEPALLOC, &newblk) != 0) panic("softdep_setup_blkmapdep: found block"); - ACQUIRE_LOCK(&lk); newblk->nb_bmsafemap = bmsafemap = bmsafemap_lookup(bp); LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps); - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); } /* * Find the bmsafemap associated with a cylinder group buffer. * If none exists, create one. The buffer must be locked when - * this routine is called and this routine must be called with - * splbio interrupts blocked. + * this routine is called. */ static struct bmsafemap * bmsafemap_lookup(bp) @@ -1443,15 +1327,13 @@ bmsafemap_lookup(bp) struct bmsafemap *bmsafemap; struct worklist *wk; -#ifdef DEBUG - if (lk.lkt_held == NULL) - panic("bmsafemap_lookup: lock not held"); -#endif + KASSERT(mutex_owned(&bufcache_lock)); + for (wk = LIST_FIRST(&bp->b_dep); wk; wk = LIST_NEXT(wk, wk_list)) if (wk->wk_type == D_BMSAFEMAP) return (WK_BMSAFEMAP(wk)); - FREE_LOCK(&lk); - bmsafemap = pool_get(&bmsafemap_pool, PR_WAITOK); + mutex_exit(&bufcache_lock); + bmsafemap = softdep_alloc(D_BMSAFEMAP); bmsafemap->sm_list.wk_type = D_BMSAFEMAP; bmsafemap->sm_list.wk_state = 0; bmsafemap->sm_buf = bp; @@ -1459,8 +1341,8 @@ bmsafemap_lookup(bp) LIST_INIT(&bmsafemap->sm_allocindirhd); LIST_INIT(&bmsafemap->sm_inodedephd); LIST_INIT(&bmsafemap->sm_newblkhd); - ACQUIRE_LOCK(&lk); - WORKLIST_INSERT(&bp->b_dep, &bmsafemap->sm_list); + mutex_enter(&bufcache_lock); + worklist_insert(&bp->b_dep, &bmsafemap->sm_list); return (bmsafemap); } @@ -1511,7 +1393,7 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp) struct newblk *newblk; UVMHIST_FUNC("softdep_setup_allocdirect"); UVMHIST_CALLED(ubchist); - adp = pool_get(&allocdirect_pool, PR_WAITOK); + adp = softdep_alloc(D_ALLOCDIRECT); bzero(adp, sizeof(struct allocdirect)); adp->ad_list.wk_type = D_ALLOCDIRECT; adp->ad_lbn = lbn; @@ -1525,6 +1407,7 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp) else adp->ad_freefrag = newfreefrag(ip, oldblkno, oldsize); + mutex_enter(&bufcache_lock); if (newblk_lookup(ip->i_fs, newblkno, 0, &newblk) == 0) panic("softdep_setup_allocdirect: lost block"); @@ -1542,7 +1425,6 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp) UVMHIST_LOG(ubchist, "bp = %p, size = %ld -> %ld", bp, oldsize, newsize, 0); } - ACQUIRE_LOCK(&lk); (void) inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC, &inodedep); adp->ad_inodedep = inodedep; @@ -1556,8 +1438,8 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp) LIST_INSERT_HEAD(&bmsafemap->sm_allocdirecthd, adp, ad_deps); } LIST_REMOVE(newblk, nb_hash); - pool_put(&newblk_pool, newblk); - WORKLIST_INSERT(&bp->b_dep, &adp->ad_list); + pool_cache_put(softdep_small_cache, newblk); + worklist_insert(&bp->b_dep, &adp->ad_list); if (lbn >= NDADDR) { /* allocating an indirect block */ if (oldblkno != 0) @@ -1572,7 +1454,7 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp) */ if ((ip->i_mode & IFMT) == IFDIR && pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0) - WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list); + worklist_insert(&bp->b_dep, &pagedep->pd_list); } /* * The list of allocdirects must be kept in sorted and ascending @@ -1593,7 +1475,7 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp) TAILQ_INSERT_TAIL(adphead, adp, ad_next); if (oldadp != NULL && oldadp->ad_lbn == lbn) allocdirect_merge(adphead, adp, oldadp); - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); return; } for (oldadp = TAILQ_FIRST(adphead); oldadp; @@ -1607,12 +1489,11 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp) TAILQ_INSERT_BEFORE(oldadp, adp, ad_next); if (oldadp->ad_lbn == lbn) allocdirect_merge(adphead, adp, oldadp); - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); } /* * Replace an old allocdirect dependency with a newer one. - * This routine must be called with splbio interrupts blocked. */ static void allocdirect_merge(adphead, newadp, oldadp) @@ -1624,10 +1505,8 @@ allocdirect_merge(adphead, newadp, oldadp) struct freefrag *freefrag; struct newdirblk *newdirblk; -#ifdef DEBUG - if (lk.lkt_held == NULL) - panic("allocdirect_merge: lock not held"); -#endif + KASSERT(mutex_owned(&bufcache_lock)); + if (newadp->ad_oldblkno != oldadp->ad_newblkno || newadp->ad_oldsize != oldadp->ad_newsize || newadp->ad_lbn >= NDADDR) @@ -1667,10 +1546,10 @@ allocdirect_merge(adphead, newadp, oldadp) */ if ((wk = LIST_FIRST(&oldadp->ad_newdirblk)) != NULL) { newdirblk = WK_NEWDIRBLK(wk); - WORKLIST_REMOVE(&newdirblk->db_list); + worklist_remove(&newdirblk->db_list); if (LIST_FIRST(&oldadp->ad_newdirblk) != NULL) panic("allocdirect_merge: extra newdirblk"); - WORKLIST_INSERT(&newadp->ad_newdirblk, &newdirblk->db_list); + worklist_insert(&newadp->ad_newdirblk, &newdirblk->db_list); } free_allocdirect(adphead, oldadp, 0); } @@ -1692,7 +1571,7 @@ newfreefrag(ip, blkno, size) fs = ip->i_fs; if (fragnum(fs, blkno) + numfrags(fs, size) > fs->fs_frag) panic("newfreefrag: frag size"); - freefrag = pool_get(&freefrag_pool, PR_WAITOK); + freefrag = softdep_alloc(D_FREEFRAG); freefrag->ff_list.wk_type = D_FREEFRAG; freefrag->ff_state = ip->i_uid & ~ONWORKLIST; /* XXX - used below */ freefrag->ff_inum = ip->i_number; @@ -1715,7 +1594,7 @@ handle_workitem_freefrag(freefrag) ffs_blkfree(ump->um_fs, ump->um_devvp, freefrag->ff_blkno, freefrag->ff_fragsize, freefrag->ff_inum); - pool_put(&freefrag_pool, freefrag); + pool_cache_put(softdep_small_cache, freefrag); } /* @@ -1755,7 +1634,7 @@ newallocindir(ip, ptrno, newblkno, oldblkno) { struct allocindir *aip; - aip = pool_get(&allocindir_pool, PR_WAITOK); + aip = softdep_alloc(D_ALLOCINDIR); bzero(aip, sizeof(struct allocindir)); aip->ai_list.wk_type = D_ALLOCINDIR; aip->ai_state = ATTACHED; @@ -1784,10 +1663,10 @@ softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp) struct pagedep *pagedep; aip = newallocindir(ip, ptrno, newblkno, oldblkno); + mutex_enter(&bufcache_lock); if (nbp == NULL) { nbp = softdep_setup_pagecache(ip, lbn, ip->i_fs->fs_bsize); } - ACQUIRE_LOCK(&lk); /* * If we are allocating a directory page, then we must @@ -1796,10 +1675,10 @@ softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp) */ if ((ip->i_mode & IFMT) == IFDIR && pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0) - WORKLIST_INSERT(&nbp->b_dep, &pagedep->pd_list); - WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list); - FREE_LOCK(&lk); + worklist_insert(&nbp->b_dep, &pagedep->pd_list); + worklist_insert(&nbp->b_dep, &aip->ai_list); setup_allocindir_phase2(bp, ip, aip); + mutex_exit(&bufcache_lock); } /* @@ -1817,10 +1696,10 @@ softdep_setup_allocindir_meta(nbp, ip, bp, ptrno, newblkno) struct allocindir *aip; aip = newallocindir(ip, ptrno, newblkno, 0); - ACQUIRE_LOCK(&lk); - WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list); - FREE_LOCK(&lk); + mutex_enter(&bufcache_lock); + worklist_insert(&nbp->b_dep, &aip->ai_list); setup_allocindir_phase2(bp, ip, aip); + mutex_exit(&bufcache_lock); } /* @@ -1840,10 +1719,11 @@ setup_allocindir_phase2(bp, ip, aip) struct freefrag *freefrag; struct newblk *newblk; + KASSERT(mutex_owned(&bufcache_lock)); + if (bp->b_lblkno >= 0) panic("setup_allocindir_phase2: not indir blk"); for (indirdep = NULL, newindirdep = NULL; ; ) { - ACQUIRE_LOCK(&lk); for (wk = LIST_FIRST(&bp->b_dep); wk; wk = LIST_NEXT(wk, wk_list)) { if (wk->wk_type != D_INDIRDEP) @@ -1853,15 +1733,13 @@ setup_allocindir_phase2(bp, ip, aip) } if (indirdep == NULL && newindirdep) { indirdep = newindirdep; - WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list); + worklist_insert(&bp->b_dep, &indirdep->ir_list); newindirdep = NULL; } - FREE_LOCK(&lk); if (indirdep) { if (newblk_lookup(ip->i_fs, aip->ai_newblkno, 0, &newblk) == 0) panic("setup_allocindir: lost block"); - ACQUIRE_LOCK(&lk); if (newblk->nb_state == DEPCOMPLETE) { aip->ai_state |= DEPCOMPLETE; aip->ai_buf = NULL; @@ -1873,7 +1751,7 @@ setup_allocindir_phase2(bp, ip, aip) aip, ai_deps); } LIST_REMOVE(newblk, nb_hash); - pool_put(&newblk_pool, newblk); + pool_cache_put(softdep_small_cache, newblk); aip->ai_indirdep = indirdep; /* * Check to see if there is an existing dependency @@ -1905,20 +1783,25 @@ setup_allocindir_phase2(bp, ip, aip) else ((int64_t *)indirdep->ir_savebp->b_data) [aip->ai_offset] = aip->ai_oldblkno; - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); if (freefrag != NULL) handle_workitem_freefrag(freefrag); - } + } else + mutex_exit(&bufcache_lock); if (newindirdep) { if (indirdep->ir_savebp != NULL) { - brelse(newindirdep->ir_savebp, 0); + mutex_enter(&bufcache_lock); + brelsel(newindirdep->ir_savebp, 0); softdep_trackbufs(-1, false); + mutex_exit(&bufcache_lock); } - WORKITEM_FREE(newindirdep, D_INDIRDEP); + workitem_free(newindirdep, D_INDIRDEP); } - if (indirdep) + if (indirdep) { + mutex_enter(&bufcache_lock); break; - newindirdep = pool_get(&indirdep_pool, PR_WAITOK); + } + newindirdep = softdep_alloc(D_INDIRDEP); newindirdep->ir_list.wk_type = D_INDIRDEP; newindirdep->ir_state = ATTACHED; if (ip->i_ump->um_fstype == UFS1) @@ -1929,11 +1812,12 @@ setup_allocindir_phase2(bp, ip, aip) VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL); } - softdep_trackbufs(1, true); newindirdep->ir_savebp = getblk(ip->i_devvp, bp->b_blkno, bp->b_bcount, 0, 0); newindirdep->ir_savebp->b_flags |= B_ASYNC; bcopy(bp->b_data, newindirdep->ir_savebp->b_data, bp->b_bcount); + mutex_enter(&bufcache_lock); + softdep_trackbufs(1, true); } } @@ -1978,6 +1862,7 @@ softdep_setup_freeblocks( struct vnode *vp = ITOV(ip); struct buf *bp; struct fs *fs = ip->i_fs; + struct ufsmount *ump = ip->i_ump; int i, error, delayx; #ifdef FFS_EI const int needswap = UFS_FSNEEDSWAP(fs); @@ -1985,7 +1870,7 @@ softdep_setup_freeblocks( if (length != 0) panic("softdep_setup_freeblocks: non-zero length"); - freeblks = pool_get(&freeblks_pool, PR_WAITOK); + freeblks = softdep_alloc(D_FREEBLKS); bzero(freeblks, sizeof(struct freeblks)); freeblks->fb_list.wk_type = D_FREEBLKS; freeblks->fb_uid = ip->i_uid; @@ -2025,8 +1910,12 @@ softdep_setup_freeblocks( * accounted for then (see softdep_filereleased()). If the * file is merely being truncated, then we account for it now. */ - if ((ip->i_flag & IN_SPACECOUNTED) == 0) + if ((ip->i_flag & IN_SPACECOUNTED) == 0) { + mutex_enter(&ump->um_lock); fs->fs_pendingblocks += freeblks->fb_chkcnt; + mutex_exit(&ump->um_lock); + } + /* * Push the zero'ed inode to to its disk buffer so that we are free * to delete its dependencies below. Once the dependencies are gone @@ -2062,7 +1951,7 @@ softdep_setup_freeblocks( /* * Find and eliminate any inode dependencies. */ - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); (void) inodedep_lookup(fs, ip->i_number, DEPALLOC, &inodedep); if ((inodedep->id_state & IOSTARTED) != 0) panic("softdep_setup_freeblocks: inode busy"); @@ -2075,7 +1964,7 @@ softdep_setup_freeblocks( */ delayx = (inodedep->id_state & DEPCOMPLETE); if (delayx) - WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list); + worklist_insert(&inodedep->id_bufwait, &freeblks->fb_list); /* * Because the file length has been truncated to zero, any * pending block allocation dependency structures associated @@ -2094,7 +1983,7 @@ softdep_setup_freeblocks( merge_inode_lists(inodedep); while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0) free_allocdirect(&inodedep->id_inoupdt, adp, delayx); - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); bdwrite(bp); /* * We must wait for any I/O in progress to finish so that @@ -2102,20 +1991,18 @@ softdep_setup_freeblocks( * Once they are all there, walk the list and get rid of * any dependencies. */ - ACQUIRE_LOCK(&lk); - drain_output(vp, 1); + drain_output(vp); + mutex_enter(&bufcache_lock); while (getdirtybuf(&vp->v_dirtyblkhd.lh_first, MNT_WAIT)) { bp = vp->v_dirtyblkhd.lh_first; (void) inodedep_lookup(fs, ip->i_number, 0, &inodedep); deallocate_dependencies(bp, inodedep); - FREE_LOCK(&lk); - brelse(bp, BC_INVAL | BC_NOCACHE); - ACQUIRE_LOCK(&lk); + brelsel(bp, BC_INVAL | BC_NOCACHE); } softdep_free_pagecache(ip); if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) != 0) (void) free_inodedep(inodedep); - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); /* * If the inode has never been written to disk (delayx == 0), * then we can process the freeblks now that we have deleted @@ -2174,8 +2061,8 @@ deallocate_dependencies(bp, inodedep) panic("deallocate_dependencies: not indir"); bcopy(bp->b_data, indirdep->ir_savebp->b_data, bp->b_bcount); - WORKLIST_REMOVE(wk); - WORKLIST_INSERT(&indirdep->ir_savebp->b_dep, wk); + worklist_remove(wk); + worklist_insert(&indirdep->ir_savebp->b_dep, wk); continue; case D_PAGEDEP: @@ -2205,7 +2092,7 @@ deallocate_dependencies(bp, inodedep) ALLCOMPLETE) add_to_worklist(&dirrem->dm_list); else - WORKLIST_INSERT(&inodedep->id_bufwait, + worklist_insert(&inodedep->id_bufwait, &dirrem->dm_list); } if ((pagedep->pd_state & NEWBLOCK) != 0) { @@ -2215,17 +2102,17 @@ deallocate_dependencies(bp, inodedep) pagedep) break; if (wk != NULL) { - WORKLIST_REMOVE(wk); + worklist_remove(wk); free_newdirblk(WK_NEWDIRBLK(wk)); } else { - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); panic("deallocate_dependencies: " "lost pagedep"); } } - WORKLIST_REMOVE(&pagedep->pd_list); + worklist_remove(&pagedep->pd_list); LIST_REMOVE(pagedep, pd_hash); - WORKITEM_FREE(pagedep, D_PAGEDEP); + workitem_free(pagedep, D_PAGEDEP); continue; case D_ALLOCINDIR: @@ -2248,7 +2135,6 @@ deallocate_dependencies(bp, inodedep) /* * Free an allocdirect. Generate a new freefrag work request if appropriate. - * This routine must be called with splbio interrupts blocked. */ static void free_allocdirect(adphead, adp, delayx) @@ -2259,39 +2145,36 @@ free_allocdirect(adphead, adp, delayx) struct newdirblk *newdirblk; struct worklist *wk; -#ifdef DEBUG - if (lk.lkt_held == NULL) - panic("free_allocdirect: lock not held"); -#endif + KASSERT(mutex_owned(&bufcache_lock)); + if ((adp->ad_state & DEPCOMPLETE) == 0) LIST_REMOVE(adp, ad_deps); TAILQ_REMOVE(adphead, adp, ad_next); if ((adp->ad_state & COMPLETE) == 0) - WORKLIST_REMOVE(&adp->ad_list); + worklist_remove(&adp->ad_list); if (adp->ad_freefrag != NULL) { if (delayx) - WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait, + worklist_insert(&adp->ad_inodedep->id_bufwait, &adp->ad_freefrag->ff_list); else add_to_worklist(&adp->ad_freefrag->ff_list); } if ((wk = LIST_FIRST(&adp->ad_newdirblk)) != NULL) { newdirblk = WK_NEWDIRBLK(wk); - WORKLIST_REMOVE(&newdirblk->db_list); + worklist_remove(&newdirblk->db_list); if (LIST_FIRST(&adp->ad_newdirblk) != NULL) panic("free_allocdirect: extra newdirblk"); if (delayx) - WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait, + worklist_insert(&adp->ad_inodedep->id_bufwait, &newdirblk->db_list); else free_newdirblk(newdirblk); } - WORKITEM_FREE(adp, D_ALLOCDIRECT); + workitem_free(adp, D_ALLOCDIRECT); } /* * Free a newdirblk. Clear the NEWBLOCK flag on its associated pagedep. - * This routine must be called with splbio interrupts blocked. */ static void free_newdirblk(newdirblk) @@ -2301,10 +2184,8 @@ free_newdirblk(newdirblk) struct diradd *dap; int i; -#ifdef DEBUG - if (lk.lkt_held == NULL) - panic("free_newdirblk: lock not held"); -#endif + KASSERT(mutex_owned(&bufcache_lock)); + /* * If the pagedep is still linked onto the directory buffer * dependency chain, then some of the entries on the @@ -2328,9 +2209,9 @@ free_newdirblk(newdirblk) break; if (i == DAHASHSZ && (pagedep->pd_state & ONWORKLIST) == 0) { LIST_REMOVE(pagedep, pd_hash); - WORKITEM_FREE(pagedep, D_PAGEDEP); + workitem_free(pagedep, D_PAGEDEP); } - WORKITEM_FREE(newdirblk, D_NEWDIRBLK); + workitem_free(newdirblk, D_NEWDIRBLK); } /* @@ -2341,13 +2222,14 @@ void softdep_freefile(struct vnode *pvp, ino_t ino, int mode) { struct inode *ip = VTOI(pvp); + struct ufsmount *ump = ip->i_ump; struct inodedep *inodedep; struct freefile *freefile; /* * This sets up the inode de-allocation dependency. */ - freefile = pool_get(&freefile_pool, PR_WAITOK); + freefile = softdep_alloc(D_FREEFILE); freefile->fx_list.wk_type = D_FREEFILE; freefile->fx_list.wk_state = 0; freefile->fx_mode = mode; @@ -2355,8 +2237,11 @@ softdep_freefile(struct vnode *pvp, ino_t ino, int mode) freefile->fx_devvp = ip->i_devvp; freefile->fx_fs = ip->i_fs; freefile->fx_mnt = ITOV(ip)->v_mount; - if ((ip->i_flag & IN_SPACECOUNTED) == 0) + if ((ip->i_flag & IN_SPACECOUNTED) == 0) { + mutex_enter(&ump->um_lock); ip->i_fs->fs_pendinginodes += 1; + mutex_enter(&ump->um_lock); + } /* * If the inodedep does not exist, then the zero'ed inode has @@ -2364,21 +2249,21 @@ softdep_freefile(struct vnode *pvp, ino_t ino, int mode) * written to disk, then the on-disk inode is zero'ed. In either * case we can free the file immediately. */ - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); if (inodedep_lookup(ip->i_fs, ino, 0, &inodedep) == 0 || check_inode_unwritten(inodedep)) { - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); handle_workitem_freefile(freefile); return; } - WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list); - FREE_LOCK(&lk); + worklist_insert(&inodedep->id_inowait, &freefile->fx_list); + mutex_exit(&bufcache_lock); + ip->i_flag |= IN_MODIFIED; } /* * Check to see if an inode has never been written to disk. If * so free the inodedep and return success, otherwise return failure. - * This routine must be called with splbio interrupts blocked. * * If we still have a bitmap dependency, then the inode has never * been written to disk. Drop the dependency as it is no longer @@ -2395,6 +2280,8 @@ check_inode_unwritten(inodedep) struct inodedep *inodedep; { + KASSERT(mutex_owned(&bufcache_lock)); + if ((inodedep->id_state & DEPCOMPLETE) != 0 || LIST_FIRST(&inodedep->id_pendinghd) != NULL || LIST_FIRST(&inodedep->id_bufwait) != NULL || @@ -2407,7 +2294,7 @@ check_inode_unwritten(inodedep) LIST_REMOVE(inodedep, id_deps); inodedep->id_buf = NULL; if (inodedep->id_state & ONWORKLIST) - WORKLIST_REMOVE(&inodedep->id_list); + worklist_remove(&inodedep->id_list); if (inodedep->id_savedino1 != NULL) { inodedep_freedino(inodedep); } @@ -2424,6 +2311,8 @@ free_inodedep(inodedep) struct inodedep *inodedep; { + KASSERT(mutex_owned(&bufcache_lock)); + if ((inodedep->id_state & ONWORKLIST) != 0 || (inodedep->id_state & ALLCOMPLETE) != ALLCOMPLETE || LIST_FIRST(&inodedep->id_pendinghd) != NULL || @@ -2434,7 +2323,7 @@ free_inodedep(inodedep) inodedep->id_nlinkdelta != 0 || inodedep->id_savedino1 != NULL) return (0); LIST_REMOVE(inodedep, id_hash); - WORKITEM_FREE(inodedep, D_INODEDEP); + workitem_free(inodedep, D_INODEDEP); num_inodedep -= 1; return (1); } @@ -2458,9 +2347,11 @@ handle_workitem_freeblocks(freeblks) int64_t blocksreleased = 0; int error, allerror = 0; daddr_t baselbns[NIADDR], tmpval; + struct ufsmount *ump; - devvp = freeblks->fb_ump->um_devvp; - fs = freeblks->fb_ump->um_fs; + ump = freeblks->fb_ump; + devvp = ump->um_devvp; + fs = ump->um_fs; tmpval = 1; baselbns[0] = NDADDR; for (i = 1; i < NIADDR; i++) { @@ -2492,7 +2383,9 @@ handle_workitem_freeblocks(freeblks) continue; bsize = sblksize(fs, freeblks->fb_oldsize, i); ffs_blkfree(fs, devvp, bn, bsize, freeblks->fb_previousinum); + mutex_enter(&ump->um_lock); fs->fs_pendingblocks -= btodb(bsize); + mutex_exit(&ump->um_lock); blocksreleased += btodb(bsize); } @@ -2502,7 +2395,7 @@ handle_workitem_freeblocks(freeblks) if (allerror) softdep_error("handle_workitem_freeblks", allerror); #endif /* DIAGNOSTIC */ - WORKITEM_FREE(freeblks, D_FREEBLKS); + workitem_free(freeblks, D_FREEBLKS); } /* @@ -2524,13 +2417,14 @@ indir_trunc(freeblks, dbn, level, lbn, countp) int32_t *bap1 = NULL; int64_t *bap2 = NULL; daddr_t nb; - struct fs *fs = freeblks->fb_ump->um_fs; + struct ufsmount *ump = freeblks->fb_ump; + struct fs *fs = ump->um_fs; struct worklist *wk; struct indirdep *indirdep; daddr_t lbnadd; int i, nblocks, ufs1fmt; int error, allerror = 0; - struct vnode *devvp = freeblks->fb_ump->um_devvp; + struct vnode *devvp = ump->um_devvp; #ifdef FFS_EI const int needswap = UFS_FSNEEDSWAP(fs); #endif @@ -2550,21 +2444,21 @@ indir_trunc(freeblks, dbn, level, lbn, countp) * a complete copy of the indirect block in memory for our use. * Otherwise we have to read the blocks in from the disk. */ - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); if ((bp = incore(devvp, dbn)) != NULL && (wk = LIST_FIRST(&bp->b_dep)) != NULL) { if (wk->wk_type != D_INDIRDEP || (indirdep = WK_INDIRDEP(wk))->ir_savebp != bp || (indirdep->ir_state & GOINGAWAY) == 0) panic("indir_trunc: lost indirdep"); - WORKLIST_REMOVE(wk); - WORKITEM_FREE(indirdep, D_INDIRDEP); + worklist_remove(wk); + mutex_exit(&bufcache_lock); + workitem_free(indirdep, D_INDIRDEP); if (LIST_FIRST(&bp->b_dep) != NULL) panic("indir_trunc: dangling dep"); - FREE_LOCK(&lk); } else { - FREE_LOCK(&lk); softdep_trackbufs(1, false); + mutex_exit(&bufcache_lock); error = bread(devvp, dbn, (int)fs->fs_bsize, NOCRED, &bp); if (error) return (error); @@ -2596,17 +2490,20 @@ indir_trunc(freeblks, dbn, level, lbn, countp) } ffs_blkfree(fs, devvp, nb, fs->fs_bsize, freeblks->fb_previousinum); + mutex_enter(&ump->um_lock); fs->fs_pendingblocks -= nblocks; + mutex_exit(&ump->um_lock); *countp += nblocks; } - brelse(bp, BC_INVAL | BC_NOCACHE); + mutex_enter(&bufcache_lock); + brelsel(bp, BC_INVAL | BC_NOCACHE); softdep_trackbufs(-1, false); + mutex_exit(&bufcache_lock); return (allerror); } /* * Free an allocindir. - * This routine must be called with splbio interrupts blocked. */ static void free_allocindir(aip, inodedep) @@ -2615,23 +2512,21 @@ free_allocindir(aip, inodedep) { struct freefrag *freefrag; -#ifdef DEBUG - if (lk.lkt_held == NULL) - panic("free_allocindir: lock not held"); -#endif + KASSERT(mutex_owned(&bufcache_lock)); + if ((aip->ai_state & DEPCOMPLETE) == 0) LIST_REMOVE(aip, ai_deps); if (aip->ai_state & ONWORKLIST) - WORKLIST_REMOVE(&aip->ai_list); + worklist_remove(&aip->ai_list); LIST_REMOVE(aip, ai_next); if ((freefrag = aip->ai_freefrag) != NULL) { if (inodedep == NULL) add_to_worklist(&freefrag->ff_list); else - WORKLIST_INSERT(&inodedep->id_bufwait, + worklist_insert(&inodedep->id_bufwait, &freefrag->ff_list); } - WORKITEM_FREE(aip, D_ALLOCINDIR); + workitem_free(aip, D_ALLOCINDIR); } /* @@ -2688,27 +2583,27 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk) fs = dp->i_fs; lbn = lblkno(fs, diroffset); offset = blkoff(fs, diroffset); - dap = pool_get(&diradd_pool, PR_WAITOK); + dap = softdep_alloc(D_DIRADD); bzero(dap, sizeof(struct diradd)); dap->da_list.wk_type = D_DIRADD; dap->da_offset = offset; dap->da_newinum = newinum; dap->da_state = ATTACHED; if (isnewblk && lbn < NDADDR && fragoff(fs, diroffset) == 0) { - newdirblk = pool_get(&newdirblk_pool, PR_WAITOK); + newdirblk = softdep_alloc(D_NEWDIRBLK); newdirblk->db_list.wk_type = D_NEWDIRBLK; newdirblk->db_state = 0; } if (newdirbp == NULL) { dap->da_state |= DEPCOMPLETE; - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); } else { dap->da_state |= MKDIR_BODY | MKDIR_PARENT; - mkdir1 = pool_get(&mkdir_pool, PR_WAITOK); + mkdir1 = softdep_alloc(D_MKDIR); mkdir1->md_list.wk_type = D_MKDIR; mkdir1->md_state = MKDIR_BODY; mkdir1->md_diradd = dap; - mkdir2 = pool_get(&mkdir_pool, PR_WAITOK); + mkdir2 = softdep_alloc(D_MKDIR); mkdir2->md_list.wk_type = D_MKDIR; mkdir2->md_state = MKDIR_PARENT; mkdir2->md_diradd = dap; @@ -2716,29 +2611,29 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk) * Dependency on "." and ".." being written to disk. */ mkdir1->md_buf = newdirbp; - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); LIST_INSERT_HEAD(&mkdirlisthd, mkdir1, md_mkdirs); - WORKLIST_INSERT(&newdirbp->b_dep, &mkdir1->md_list); - FREE_LOCK(&lk); + worklist_insert(&newdirbp->b_dep, &mkdir1->md_list); + mutex_exit(&bufcache_lock); bdwrite(newdirbp); /* * Dependency on link count increase for parent directory */ - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); if (inodedep_lookup(fs, dp->i_number, 0, &inodedep) == 0 || (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) { dap->da_state &= ~MKDIR_PARENT; - WORKITEM_FREE(mkdir2, D_MKDIR); + workitem_free(mkdir2, D_MKDIR); } else { LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs); - WORKLIST_INSERT(&inodedep->id_bufwait,&mkdir2->md_list); + worklist_insert(&inodedep->id_bufwait,&mkdir2->md_list); } } /* * Link into parent directory pagedep to await its being written. */ if (pagedep_lookup(dp, lbn, DEPALLOC, &pagedep) == 0) - WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list); + worklist_insert(&bp->b_dep, &pagedep->pd_list); dap->da_pagedep = pagedep; LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], dap, da_pdlist); @@ -2751,7 +2646,7 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk) if ((inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) diradd_inode_written(dap, inodedep); else - WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list); + worklist_insert(&inodedep->id_bufwait, &dap->da_list); if (isnewblk) { /* * Directories growing into indirect blocks are rare @@ -2761,7 +2656,7 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk) * new directory entry to disk. */ if (lbn >= NDADDR) { - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); /* * We only have a new allocation when at the * beginning of a new block, not when we are @@ -2778,12 +2673,12 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk) * are already tracking this block. */ if (fragoff(fs, diroffset) != 0) { - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); return (0); } if ((pagedep->pd_state & NEWBLOCK) != 0) { - WORKITEM_FREE(newdirblk, D_NEWDIRBLK); - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); + workitem_free(newdirblk, D_NEWDIRBLK); return (0); } /* @@ -2793,14 +2688,14 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk) panic("softdep_setup_directory_add: lost inodedep"); adp = TAILQ_LAST(&inodedep->id_newinoupdt, allocdirectlst); if (adp == NULL || adp->ad_lbn != lbn) { - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); panic("softdep_setup_directory_add: lost entry"); } pagedep->pd_state |= NEWBLOCK; newdirblk->db_pagedep = pagedep; - WORKLIST_INSERT(&adp->ad_newdirblk, &newdirblk->db_list); + worklist_insert(&adp->ad_newdirblk, &newdirblk->db_list); } - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); return (0); } @@ -2824,7 +2719,7 @@ softdep_change_directoryentry_offset(dp, base, oldloc, newloc, entrysize) struct diradd *dap; daddr_t lbn; - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); lbn = lblkno(dp->i_fs, dp->i_offset); offset = blkoff(dp->i_fs, dp->i_offset); if (pagedep_lookup(dp, lbn, 0, &pagedep) == 0) @@ -2854,12 +2749,11 @@ softdep_change_directoryentry_offset(dp, base, oldloc, newloc, entrysize) } done: bcopy(oldloc, newloc, entrysize); - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); } /* - * Free a diradd dependency structure. This routine must be called - * with splbio interrupts blocked. + * Free a diradd dependency structure. */ static void free_diradd(dap) @@ -2870,11 +2764,9 @@ free_diradd(dap) struct inodedep *inodedep; struct mkdir *mkdir, *nextmd; -#ifdef DEBUG - if (lk.lkt_held == NULL) - panic("free_diradd: lock not held"); -#endif - WORKLIST_REMOVE(&dap->da_list); + KASSERT(mutex_owned(&bufcache_lock)); + + worklist_remove(&dap->da_list); LIST_REMOVE(dap, da_pdlist); if ((dap->da_state & DIRCHG) == 0) { pagedep = dap->da_pagedep; @@ -2893,14 +2785,14 @@ free_diradd(dap) if (mkdir->md_diradd != dap) continue; dap->da_state &= ~mkdir->md_state; - WORKLIST_REMOVE(&mkdir->md_list); + worklist_remove(&mkdir->md_list); LIST_REMOVE(mkdir, md_mkdirs); - WORKITEM_FREE(mkdir, D_MKDIR); + workitem_free(mkdir, D_MKDIR); } if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) panic("free_diradd: unfound ref"); } - WORKITEM_FREE(dap, D_DIRADD); + workitem_free(dap, D_DIRADD); } /* @@ -2948,7 +2840,7 @@ softdep_setup_remove(bp, dp, ip, isrmdir) if ((dirrem->dm_state & COMPLETE) == 0) { LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, dirrem, dm_next); - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); } else { u_int ipflag, dpflag; struct vnode *vp = ITOV(ip); @@ -2958,7 +2850,7 @@ softdep_setup_remove(bp, dp, ip, isrmdir) LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, prevdirrem, dm_next); dirrem->dm_dirinum = dirrem->dm_pagedep->pd_ino; - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); ipflag = vn_setrecurse(vp); dpflag = vn_setrecurse(dvp); handle_workitem_remove(dirrem); @@ -2991,16 +2883,8 @@ newdirrem(bp, dp, ip, isrmdir, prevdirremp) */ if (ip == NULL) panic("newdirrem: whiteout"); - /* - * If we are over our limit, try to improve the situation. - * Limiting the number of dirrem structures will also limit - * the number of freefile and freeblks structures. - */ - if (num_dirrem > max_softdeps / 2 && speedup_syncer() == 0) - (void) request_cleanup(FLUSH_REMOVE, 0); - num_dirrem += 1; - dirrem = pool_get(&dirrem_pool, PR_WAITOK); + dirrem = softdep_alloc(D_DIRREM); bzero(dirrem, sizeof(struct dirrem)); dirrem->dm_list.wk_type = D_DIRREM; dirrem->dm_state = isrmdir ? RMDIR : 0; @@ -3008,11 +2892,20 @@ newdirrem(bp, dp, ip, isrmdir, prevdirremp) dirrem->dm_oldinum = ip->i_number; *prevdirremp = NULL; - ACQUIRE_LOCK(&lk); + /* + * If we are over our limit, try to improve the situation. + * Limiting the number of dirrem structures will also limit + * the number of freefile and freeblks structures. + */ + mutex_enter(&bufcache_lock); + num_dirrem += 1; + if (num_dirrem > max_softdeps / 2 && speedup_syncer() == 0) { + (void) request_cleanup(FLUSH_REMOVE); + } lbn = lblkno(dp->i_fs, dp->i_offset); offset = blkoff(dp->i_fs, dp->i_offset); if (pagedep_lookup(dp, lbn, DEPALLOC, &pagedep) == 0) - WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list); + worklist_insert(&bp->b_dep, &pagedep->pd_list); dirrem->dm_pagedep = pagedep; /* * Check for a diradd dependency for the same directory entry. @@ -3097,7 +2990,7 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir) * Whiteouts do not need diradd dependencies. */ if (newinum != WINO) { - dap = pool_get(&diradd_pool, PR_WAITOK); + dap = softdep_alloc(D_DIRADD); bzero(dap, sizeof(struct diradd)); dap->da_list.wk_type = D_DIRADD; dap->da_state = DIRCHG | ATTACHED | DEPCOMPLETE; @@ -3137,7 +3030,7 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir) dirrem->dm_dirinum = pagedep->pd_ino; add_to_worklist(&dirrem->dm_list); } - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); return; } @@ -3175,13 +3068,13 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir) (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) { dap->da_state |= COMPLETE; LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist); - WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list); + worklist_insert(&inodedep->id_pendinghd, &dap->da_list); } else { LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], dap, da_pdlist); - WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list); + worklist_insert(&inodedep->id_bufwait, &dap->da_list); } - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); } /* @@ -3196,12 +3089,12 @@ softdep_change_linkcnt(ip) { struct inodedep *inodedep; - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); (void) inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC, &inodedep); if (ip->i_nlink < ip->i_ffs_effnlink) panic("softdep_change_linkcnt: bad delta"); inodedep->id_nlinkdelta = ip->i_nlink - ip->i_ffs_effnlink; - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); } /* @@ -3216,6 +3109,7 @@ softdep_releasefile(ip) struct inode *ip; /* inode with the zero effective link count */ { struct inodedep *inodedep; + struct ufsmount *ump; if (ip->i_ffs_effnlink > 0) panic("softdep_filerelease: file still referenced"); @@ -3236,12 +3130,16 @@ softdep_releasefile(ip) * If we are tracking an nlinkdelta, we have to also remember * whether we accounted for the freed space yet. */ - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); if ((inodedep_lookup(ip->i_fs, ip->i_number, 0, &inodedep))) inodedep->id_state |= SPACECOUNTED; - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); + + ump = ip->i_ump; + mutex_enter(&ump->um_lock); ip->i_fs->fs_pendingblocks += DIP(ip, blocks); ip->i_fs->fs_pendinginodes += 1; + mutex_exit(&ump->um_lock); ip->i_flag |= IN_SPACECOUNTED; } @@ -3265,7 +3163,7 @@ handle_workitem_remove(dirrem) return; } ip = VTOI(vp); - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); if ((inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, 0, &inodedep)) == 0) panic("handle_workitem_remove: lost inodedep"); /* @@ -3278,10 +3176,10 @@ handle_workitem_remove(dirrem) if (ip->i_nlink < ip->i_ffs_effnlink) panic("handle_workitem_remove: bad file delta"); inodedep->id_nlinkdelta = ip->i_nlink - ip->i_ffs_effnlink; - FREE_LOCK(&lk); - vput(vp); num_dirrem -= 1; - WORKITEM_FREE(dirrem, D_DIRREM); + workitem_free(dirrem, D_DIRREM); + mutex_exit(&bufcache_lock); + vput(vp); return; } /* @@ -3297,7 +3195,7 @@ handle_workitem_remove(dirrem) if (ip->i_nlink < ip->i_ffs_effnlink) panic("handle_workitem_remove: bad dir delta"); inodedep->id_nlinkdelta = ip->i_nlink - ip->i_ffs_effnlink; - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); if ((error = ffs_truncate(vp, (off_t)0, 0, l->l_cred)) != 0) softdep_error("handle_workitem_remove: truncate", error); /* @@ -3307,8 +3205,10 @@ handle_workitem_remove(dirrem) */ if (dirrem->dm_state & DIRCHG) { vput(vp); + mutex_enter(&bufcache_lock); num_dirrem -= 1; - WORKITEM_FREE(dirrem, D_DIRREM); + mutex_exit(&bufcache_lock); + workitem_free(dirrem, D_DIRREM); return; } /* @@ -3317,19 +3217,19 @@ handle_workitem_remove(dirrem) * written to disk, then the on-disk inode is zero'ed. In either * case we can remove the file immediately. */ - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); dirrem->dm_state = 0; oldinum = dirrem->dm_oldinum; dirrem->dm_oldinum = dirrem->dm_dirinum; if (inodedep_lookup(ip->i_fs, oldinum, 0, &inodedep) == 0 || check_inode_unwritten(inodedep)) { - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); vput(vp); handle_workitem_remove(dirrem); return; } - WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list); - FREE_LOCK(&lk); + worklist_insert(&inodedep->id_inowait, &dirrem->dm_list); + mutex_exit(&bufcache_lock); ip->i_flag |= IN_CHANGE; ffs_update(vp, NULL, NULL, 0); vput(vp); @@ -3356,19 +3256,25 @@ handle_workitem_freefile(freefile) #ifdef DEBUG struct inodedep *idp; #endif + struct ufsmount *ump; int error; #ifdef DEBUG - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); if (inodedep_lookup(freefile->fx_fs, freefile->fx_oldinum, 0, &idp)) panic("handle_workitem_freefile: inodedep survived"); - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); #endif + + ump = VFSTOUFS(freefile->fx_mnt); + mutex_enter(&ump->um_lock); freefile->fx_fs->fs_pendinginodes -= 1; + mutex_exit(&ump->um_lock); + if ((error = ffs_freefile(freefile->fx_fs, freefile->fx_devvp, freefile->fx_oldinum, freefile->fx_mode)) != 0) softdep_error("handle_workitem_freefile", error); - WORKITEM_FREE(freefile, D_FREEFILE); + workitem_free(freefile, D_FREEFILE); } /* @@ -3411,6 +3317,7 @@ softdep_disk_io_initiation(bp) /* * Do any necessary pre-I/O processing. */ + mutex_enter(&bufcache_lock); for (wk = LIST_FIRST(&bp->b_dep); wk; wk = nextwk) { nextwk = LIST_NEXT(wk, wk_list); switch (wk->wk_type) { @@ -3437,24 +3344,21 @@ softdep_disk_io_initiation(bp) * dependency can be freed. */ if (LIST_FIRST(&indirdep->ir_deplisthd) == NULL) { - brelse(indirdep->ir_savebp, BC_INVAL | BC_NOCACHE); - softdep_trackbufs(-1, false); - - /* inline expand WORKLIST_REMOVE(wk); */ + /* inline expand worklist_remove(wk); */ wk->wk_state &= ~ONWORKLIST; LIST_REMOVE(wk, wk_list); - WORKITEM_FREE(indirdep, D_INDIRDEP); + brelsel(indirdep->ir_savebp, BC_INVAL | BC_NOCACHE); + softdep_trackbufs(-1, false); + workitem_free(indirdep, D_INDIRDEP); continue; } /* * Replace up-to-date version with safe version. */ - ACQUIRE_LOCK(&lk); indirdep->ir_state &= ~ATTACHED; indirdep->ir_state |= UNDONE; indirdep->ir_saveddata = bp->b_data; bp->b_data = indirdep->ir_savebp->b_data; - FREE_LOCK(&lk); continue; case D_MKDIR: @@ -3469,6 +3373,7 @@ softdep_disk_io_initiation(bp) /* NOTREACHED */ } } + mutex_exit(&bufcache_lock); } /* @@ -3489,6 +3394,8 @@ initiate_write_filepage(pagedep, bp) const int needswap = UFS_FSNEEDSWAP(VFSTOUFS(pagedep->pd_mnt)->um_fs); #endif + KASSERT(mutex_owned(&bufcache_lock)); + if (pagedep->pd_state & IOSTARTED) { /* * This can only happen if there is a driver that does not @@ -3499,7 +3406,6 @@ initiate_write_filepage(pagedep, bp) return; } pagedep->pd_state |= IOSTARTED; - ACQUIRE_LOCK(&lk); for (i = 0; i < DAHASHSZ; i++) { for (dap = LIST_FIRST(&pagedep->pd_diraddhd[i]); dap; dap = LIST_NEXT(dap, da_pdlist)) { @@ -3520,7 +3426,6 @@ initiate_write_filepage(pagedep, bp) dap->da_state |= UNDONE; } } - FREE_LOCK(&lk); } /* @@ -3545,6 +3450,8 @@ initiate_write_inodeblock_ufs1(inodedep, bp) const int needswap = UFS_FSNEEDSWAP(fs); #endif + KASSERT(mutex_owned(&bufcache_lock)); + if (inodedep->id_state & IOSTARTED) panic("initiate_write_inodeblock: already started"); inodedep->id_state |= IOSTARTED; @@ -3573,7 +3480,6 @@ initiate_write_inodeblock_ufs1(inodedep, bp) /* * Set the dependencies to busy. */ - ACQUIRE_LOCK(&lk); for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; adp = TAILQ_NEXT(adp, ad_next)) { #ifdef DIAGNOSTIC @@ -3635,7 +3541,6 @@ initiate_write_inodeblock_ufs1(inodedep, bp) dp->di_ib[i] = 0; } dp->di_size = ufs_rw64(dp->di_size, needswap); - FREE_LOCK(&lk); return; } /* @@ -3665,7 +3570,6 @@ initiate_write_inodeblock_ufs1(inodedep, bp) */ for (; adp; adp = TAILQ_NEXT(adp, ad_next)) dp->di_ib[adp->ad_lbn - NDADDR] = 0; - FREE_LOCK(&lk); } static void @@ -3684,6 +3588,8 @@ initiate_write_inodeblock_ufs2(inodedep, bp) const int needswap = UFS_FSNEEDSWAP(fs); #endif + KASSERT(mutex_owned(&bufcache_lock)); + if (inodedep->id_state & IOSTARTED) panic("initiate_write_inodeblock_ufs2: already started"); inodedep->id_state |= IOSTARTED; @@ -3710,7 +3616,6 @@ initiate_write_inodeblock_ufs2(inodedep, bp) inodedep->id_savedsize = dp->di_size; if (TAILQ_FIRST(&inodedep->id_inoupdt) == NULL) return; - ACQUIRE_LOCK(&lk); #ifdef notyet inodedep->id_savedextsize = dp->di_extsize; @@ -3720,17 +3625,16 @@ initiate_write_inodeblock_ufs2(inodedep, bp) /* * Set the ext data dependencies to busy. */ - ACQUIRE_LOCK(&lk); for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_extupdt); adp; adp = TAILQ_NEXT(adp, ad_next)) { #ifdef DIAGNOSTIC if (deplist != 0 && prevlbn >= adp->ad_lbn) { - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); panic("softdep_write_inodeblock: lbn order"); } prevlbn = adp->ad_lbn; if (dp->di_extb[adp->ad_lbn] != adp->ad_newblkno) { - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); panic("%s: direct pointer #%jd mismatch %jd != %jd", "softdep_write_inodeblock", (intmax_t)adp->ad_lbn, @@ -3739,7 +3643,7 @@ initiate_write_inodeblock_ufs2(inodedep, bp) } deplist |= 1 << adp->ad_lbn; if ((adp->ad_state & ATTACHED) == 0) { - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); panic("softdep_write_inodeblock: Unknown state 0x%x", adp->ad_state); } @@ -3763,7 +3667,7 @@ initiate_write_inodeblock_ufs2(inodedep, bp) for (i = adp->ad_lbn + 1; i < NXADDR; i++) { #ifdef DIAGNOSTIC if (dp->di_extb[i] != 0 && (deplist & (1 << i)) == 0) { - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); panic("softdep_write_inodeblock: lost dep1"); } #endif /* DIAGNOSTIC */ @@ -3794,14 +3698,14 @@ initiate_write_inodeblock_ufs2(inodedep, bp) adp = TAILQ_NEXT(adp, ad_next)) { #ifdef DIAGNOSTIC if (deplist != 0 && prevlbn >= adp->ad_lbn) { - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); panic("softdep_write_inodeblock: lbn order"); } prevlbn = adp->ad_lbn; if (adp->ad_lbn < NDADDR && ufs_rw64(dp->di_db[adp->ad_lbn], needswap) != adp->ad_newblkno) { - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); panic("%s: direct pointer #%" PRId64 " mismatch %" PRId64 " != %" PRId64, "softdep_write_inodeblock", @@ -3812,7 +3716,7 @@ initiate_write_inodeblock_ufs2(inodedep, bp) if (adp->ad_lbn >= NDADDR && ufs_rw64(dp->di_ib[adp->ad_lbn - NDADDR], needswap) != adp->ad_newblkno) { - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); panic("%s: indirect pointer #%" PRId64 " mismatch %" PRId64 " != %" PRId64, "softdep_write_inodeblock", @@ -3822,7 +3726,7 @@ initiate_write_inodeblock_ufs2(inodedep, bp) } deplist |= 1 << adp->ad_lbn; if ((adp->ad_state & ATTACHED) == 0) { - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); panic("softdep_write_inodeblock: Unknown state 0x%x", adp->ad_state); } @@ -3848,7 +3752,7 @@ initiate_write_inodeblock_ufs2(inodedep, bp) for (i = adp->ad_lbn + 1; i < NDADDR; i++) { #ifdef DIAGNOSTIC if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0) { - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); panic("softdep_write_inodeblock: lost dep2"); } #endif /* DIAGNOSTIC */ @@ -3858,14 +3762,13 @@ initiate_write_inodeblock_ufs2(inodedep, bp) #ifdef DIAGNOSTIC if (dp->di_ib[i] != 0 && (deplist & ((1 << NDADDR) << i)) == 0) { - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); panic("softdep_write_inodeblock: lost dep3"); } #endif /* DIAGNOSTIC */ dp->di_ib[i] = 0; } dp->di_size = ufs_rw64(dp->di_size, needswap); - FREE_LOCK(&lk); return; } /* @@ -3895,7 +3798,6 @@ initiate_write_inodeblock_ufs2(inodedep, bp) */ for (; adp; adp = TAILQ_NEXT(adp, ad_next)) dp->di_ib[adp->ad_lbn - NDADDR] = 0; - FREE_LOCK(&lk); } /* @@ -3919,31 +3821,37 @@ softdep_disk_write_complete(bp) struct inodedep *inodedep; struct bmsafemap *bmsafemap; + if ((bp->b_flags & B_READ) != 0) { + KASSERT(LIST_EMPTY(&bp->b_dep)); + return; + } + + /* Avoid taking bufcache_lock if not doing softdep. */ + if (bp->b_vp == NULL || bp->b_vp->v_mount == NULL || + !DOINGSOFTDEP(bp->b_vp)) + return; + /* * If an error occurred while doing the write, then the data * has not hit the disk and the dependencies cannot be unrolled. */ - if (bp->b_error != 0 && (bp->b_flags & B_INVAL) == 0) + if (bp->b_error != 0 && (bp->b_cflags & BC_INVAL) == 0) return; -#ifdef DEBUG - if (lk.lkt_held != NULL) - panic("softdep_disk_write_complete: lock is held"); - lk.lkt_held = (struct lwp *)1; -#endif + mutex_enter(&bufcache_lock); LIST_INIT(&reattach); while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) { - WORKLIST_REMOVE(wk); + worklist_remove(wk); switch (wk->wk_type) { case D_PAGEDEP: if (handle_written_filepage(WK_PAGEDEP(wk), bp)) - WORKLIST_INSERT(&reattach, wk); + worklist_insert(&reattach, wk); continue; case D_INODEDEP: if (handle_written_inodeblock(WK_INODEDEP(wk), bp)) - WORKLIST_INSERT(&reattach, wk); + worklist_insert(&reattach, wk); continue; case D_BMSAFEMAP: @@ -3973,7 +3881,7 @@ softdep_disk_write_complete(bp) LIST_REMOVE(inodedep, id_deps); inodedep->id_buf = NULL; } - WORKITEM_FREE(bmsafemap, D_BMSAFEMAP); + workitem_free(bmsafemap, D_BMSAFEMAP); continue; case D_MKDIR: @@ -4005,10 +3913,12 @@ softdep_disk_write_complete(bp) if (aip == LIST_FIRST(&indirdep->ir_donehd)) panic("disk_write_complete: not gone"); } - WORKLIST_INSERT(&reattach, wk); - if ((bp->b_flags & B_DELWRI) == 0) + worklist_insert(&reattach, wk); + mutex_enter(bp->b_objlock); + if ((bp->b_oflags & BO_DELWRI) == 0) stat_indir_blk_ptrs++; bdirty(bp); + mutex_exit(bp->b_objlock); continue; default: @@ -4021,20 +3931,15 @@ softdep_disk_write_complete(bp) * Reattach any requests that must be redone. */ while ((wk = LIST_FIRST(&reattach)) != NULL) { - WORKLIST_REMOVE(wk); - WORKLIST_INSERT(&bp->b_dep, wk); + worklist_remove(wk); + worklist_insert(&bp->b_dep, wk); } -#ifdef DEBUG - if (lk.lkt_held != (struct lwp *)1) - panic("softdep_disk_write_complete: lock lost"); - lk.lkt_held = NULL; -#endif + mutex_exit(&bufcache_lock); } /* * Called from within softdep_disk_write_complete above. Note that - * this routine is always called from interrupt level with further - * splbio interrupts blocked. + * this routine is always called from interrupt level. */ static void handle_allocdirect_partdone(adp) @@ -4045,6 +3950,8 @@ handle_allocdirect_partdone(adp) long bsize; int delayx; + KASSERT(mutex_owned(&bufcache_lock)); + if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE) return; if (adp->ad_buf != NULL) @@ -4108,8 +4015,7 @@ handle_allocdirect_partdone(adp) /* * Called from within softdep_disk_write_complete above. Note that - * this routine is always called from interrupt level with further - * splbio interrupts blocked. + * this routine is always called from interrupt level. */ static void handle_allocindir_partdone(aip) @@ -4117,6 +4023,8 @@ handle_allocindir_partdone(aip) { struct indirdep *indirdep; + KASSERT(mutex_owned(&bufcache_lock)); + if ((aip->ai_state & ALLCOMPLETE) != ALLCOMPLETE) return; if (aip->ai_buf != NULL) @@ -4136,14 +4044,13 @@ handle_allocindir_partdone(aip) LIST_REMOVE(aip, ai_next); if (aip->ai_freefrag != NULL) add_to_worklist(&aip->ai_freefrag->ff_list); - WORKITEM_FREE(aip, D_ALLOCINDIR); + workitem_free(aip, D_ALLOCINDIR); } /* * Called from within softdep_disk_write_complete above to restore * in-memory inode block contents to their most up-to-date state. Note - * that this routine is always called from interrupt level with further - * splbio interrupts blocked. + * that this routine is always called from interrupt level. */ static int handle_written_inodeblock(inodedep, bp) @@ -4159,6 +4066,8 @@ handle_written_inodeblock(inodedep, bp) const int needswap = UFS_FSNEEDSWAP(inodedep->id_fs); #endif + KASSERT(mutex_owned(&bufcache_lock)); + if ((inodedep->id_state & IOSTARTED) == 0) panic("handle_written_inodeblock: not started"); inodedep->id_state &= ~IOSTARTED; @@ -4185,9 +4094,11 @@ handle_written_inodeblock(inodedep, bp) else *dp2 = *inodedep->id_savedino2; inodedep_freedino(inodedep); - if ((bp->b_flags & B_DELWRI) == 0) + mutex_enter(bp->b_objlock); + if ((bp->b_oflags & BO_DELWRI) == 0) stat_inode_bitmap++; bdirty(bp); + mutex_exit(bp->b_objlock); return (1); } /* @@ -4255,7 +4166,7 @@ handle_written_inodeblock(inodedep, bp) adp->ad_state |= ATTACHED; hadchanges = 1; } - if (hadchanges && (bp->b_flags & B_DELWRI) == 0) + if (hadchanges && (bp->b_oflags & BO_DELWRI) == 0) stat_direct_blk_ptrs++; /* * Reset the file size to its most up-to-date value. @@ -4276,13 +4187,16 @@ handle_written_inodeblock(inodedep, bp) } } inodedep->id_savedsize = -1; + /* * If there were any rollbacks in the inode block, then it must be * marked dirty so that its will eventually get written back in * its correct form. */ if (hadchanges) { + mutex_enter(bp->b_objlock); bdirty(bp); + mutex_exit(bp->b_objlock); } /* * Process any allocdirects that completed during the update. @@ -4298,7 +4212,7 @@ handle_written_inodeblock(inodedep, bp) */ filefree = NULL; while ((wk = LIST_FIRST(&inodedep->id_bufwait)) != NULL) { - WORKLIST_REMOVE(wk); + worklist_remove(wk); switch (wk->wk_type) { case D_FREEFILE: @@ -4354,7 +4268,6 @@ handle_written_inodeblock(inodedep, bp) /* * Process a diradd entry after its dependent inode has been written. - * This routine must be called with splbio interrupts blocked. */ static void diradd_inode_written(dap, inodedep) @@ -4363,6 +4276,8 @@ diradd_inode_written(dap, inodedep) { struct pagedep *pagedep; + KASSERT(mutex_owned(&bufcache_lock)); + dap->da_state |= COMPLETE; if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) { if (dap->da_state & DIRCHG) @@ -4372,7 +4287,7 @@ diradd_inode_written(dap, inodedep) LIST_REMOVE(dap, da_pdlist); LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist); } - WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list); + worklist_insert(&inodedep->id_pendinghd, &dap->da_list); } /* @@ -4401,15 +4316,14 @@ handle_written_mkdir(mkdir, type) LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist); } LIST_REMOVE(mkdir, md_mkdirs); - WORKITEM_FREE(mkdir, D_MKDIR); + workitem_free(mkdir, D_MKDIR); } /* * Called from within softdep_disk_write_complete above. * A write operation was just completed. Removed inodes can * now be freed and associated block pointers may be committed. - * Note that this routine is always called from interrupt level - * with further splbio interrupts blocked. + * Note that this routine is always called from interrupt level. */ static int handle_written_filepage(pagedep, bp) @@ -4424,6 +4338,8 @@ handle_written_filepage(pagedep, bp) const int needswap = UFS_FSNEEDSWAP(VFSTOUFS(pagedep->pd_mnt)->um_fs); #endif + KASSERT(mutex_owned(&bufcache_lock)); + if ((pagedep->pd_state & IOSTARTED) == 0) panic("handle_written_filepage: not started"); pagedep->pd_state &= ~IOSTARTED; @@ -4477,9 +4393,11 @@ handle_written_filepage(pagedep, bp) * its correct form. */ if (chgs) { - if ((bp->b_flags & B_DELWRI) == 0) + mutex_enter(bp->b_objlock); + if ((bp->b_oflags & BO_DELWRI) == 0) stat_dir_entry++; bdirty(bp); + mutex_exit(bp->b_objlock); return (1); } /* @@ -4490,7 +4408,7 @@ handle_written_filepage(pagedep, bp) */ if ((pagedep->pd_state & NEWBLOCK) == 0) { LIST_REMOVE(pagedep, pd_hash); - WORKITEM_FREE(pagedep, D_PAGEDEP); + workitem_free(pagedep, D_PAGEDEP); } return (0); } @@ -4522,15 +4440,15 @@ softdep_load_inodeblock(ip) * Check for alternate nlink count. */ ip->i_ffs_effnlink = ip->i_nlink; - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); if (inodedep_lookup(ip->i_fs, ip->i_number, 0, &inodedep) == 0) { - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); return; } ip->i_ffs_effnlink -= inodedep->id_nlinkdelta; if (inodedep->id_state & SPACECOUNTED) ip->i_flag |= IN_SPACECOUNTED; - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); } /* @@ -4560,11 +4478,11 @@ softdep_update_inodeblock(ip, bp, waitfor) * if there is no existing inodedep, then there are no dependencies * to track. */ - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); if (inodedep_lookup(ip->i_fs, ip->i_number, 0, &inodedep) == 0) { if (ip->i_ffs_effnlink != ip->i_nlink) panic("softdep_update_inodeblock: bad link count"); - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); return; } if (inodedep->id_nlinkdelta != ip->i_nlink - ip->i_ffs_effnlink) @@ -4575,7 +4493,7 @@ softdep_update_inodeblock(ip, bp, waitfor) */ inodedep->id_state &= ~COMPLETE; if ((inodedep->id_state & ONWORKLIST) == 0) { - WORKLIST_INSERT(&bp->b_dep, &inodedep->id_list); + worklist_insert(&bp->b_dep, &inodedep->id_list); } /* * Any new dependencies associated with the incore inode must @@ -4593,8 +4511,8 @@ softdep_update_inodeblock(ip, bp, waitfor) * processed when the buffer I/O completes. */ while ((wk = LIST_FIRST(&inodedep->id_inowait)) != NULL) { - WORKLIST_REMOVE(wk); - WORKLIST_INSERT(&inodedep->id_bufwait, wk); + worklist_remove(wk); + worklist_insert(&inodedep->id_bufwait, wk); } /* * Newly allocated inodes cannot be written until the bitmap @@ -4604,11 +4522,11 @@ softdep_update_inodeblock(ip, bp, waitfor) * to be written so that the update can be done. */ if ((inodedep->id_state & DEPCOMPLETE) != 0 || waitfor == 0) { - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); return; } gotit = getdirtybuf(&inodedep->id_buf, MNT_WAIT); - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); if (gotit && (error = VOP_BWRITE(inodedep->id_buf)) != 0) softdep_error("softdep_update_inodeblock: bwrite", error); if ((inodedep->id_state & DEPCOMPLETE) == 0) @@ -4617,8 +4535,7 @@ softdep_update_inodeblock(ip, bp, waitfor) /* * Merge the new inode dependency list (id_newinoupdt) into the old - * inode dependency list (id_inoupdt). This routine must be called - * with splbio interrupts blocked. + * inode dependency list (id_inoupdt). */ static void merge_inode_lists(inodedep) @@ -4626,6 +4543,8 @@ merge_inode_lists(inodedep) { struct allocdirect *listadp, *newadp; + KASSERT(mutex_owned(&bufcache_lock)); + listadp = TAILQ_FIRST(&inodedep->id_inoupdt); newadp = TAILQ_FIRST(&inodedep->id_newinoupdt); while (listadp && newadp) { @@ -4674,9 +4593,9 @@ softdep_fsync(vp, f) ip = VTOI(vp); fs = ip->i_fs; - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) == 0) { - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); return (0); } if (LIST_FIRST(&inodedep->id_inowait) != NULL || @@ -4726,7 +4645,7 @@ softdep_fsync(vp, f) * requesting the lock on our parent. See the comment in * ufs_lookup for details on possible races. */ - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); VOP_UNLOCK(vp, 0); error = VFS_VGET(mnt, parentino, &pvp); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); @@ -4764,11 +4683,11 @@ softdep_fsync(vp, f) vput(pvp); if (error != 0) return (error); - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) == 0) break; } - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); if (f & FSYNC_CACHE) { /* * If requested, make sure all of these changes don't @@ -4795,46 +4714,37 @@ softdep_fsync_mountdev(vp) if (vp->v_type != VBLK) panic("softdep_fsync_mountdev: vnode not VBLK"); - ACQUIRE_LOCK(&lk); - simple_lock(&bqueue_slock); + mutex_enter(&bufcache_lock); for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; - simple_lock(&bp->b_interlock); + KASSERT(bp->b_objlock == &vp->v_interlock); /* * If it is already scheduled, skip to the next buffer. */ - if (bp->b_flags & B_BUSY) { - simple_unlock(&bp->b_interlock); + if (bp->b_cflags & BC_BUSY) { continue; } - if ((bp->b_flags & B_DELWRI) == 0) + if ((bp->b_oflags & BO_DELWRI) == 0) panic("softdep_fsync_mountdev: not dirty"); /* * We are only interested in bitmaps with outstanding * dependencies. */ if ((wk = LIST_FIRST(&bp->b_dep)) == NULL || - wk->wk_type != D_BMSAFEMAP) { - simple_unlock(&bp->b_interlock); + wk->wk_type != D_BMSAFEMAP) continue; - } bremfree(bp); - simple_unlock(&bqueue_slock); - bp->b_flags |= B_BUSY; - simple_unlock(&bp->b_interlock); - FREE_LOCK(&lk); + bp->b_cflags |= BC_BUSY; + mutex_exit(&bufcache_lock); (void) bawrite(bp); - ACQUIRE_LOCK(&lk); - simple_lock(&bqueue_slock); + mutex_enter(&bufcache_lock); /* - * Since we may have slept during the I/O, we need - * to start from a known point. + * Since we unlocked, we need to start from a known point. */ nbp = vp->v_dirtyblkhd.lh_first; } - simple_unlock(&bqueue_slock); - drain_output(vp, 1); - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); + drain_output(vp); } /* @@ -4844,17 +4754,8 @@ softdep_fsync_mountdev(vp) * associated with the file. If any I/O errors occur, they are returned. */ int -softdep_sync_metadata(v) - void *v; +softdep_sync_metadata(struct vnode *vp) { - struct vop_fsync_args /* { - struct vnode *a_vp; - kauth_cred_t a_cred; - int a_waitfor; - off_t a_offlo; - off_t a_offhi; - } */ *ap = v; - struct vnode *vp = ap->a_vp; struct inodedep *inodedep; struct pagedep *pagedep; struct allocdirect *adp; @@ -4877,10 +4778,10 @@ softdep_sync_metadata(v) /* * Ensure that any direct block dependencies have been cleared. */ - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); error = flush_inodedep_deps(VTOI(vp)->i_fs, VTOI(vp)->i_number); if (error) { - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); return (error); } /* @@ -4923,14 +4824,14 @@ loop: nbp = adp->ad_buf; if (getdirtybuf(&nbp, waitfor) == 0) break; - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); if (waitfor == MNT_NOWAIT) { bawrite(nbp); } else if ((error = VOP_BWRITE(nbp)) != 0) { bawrite(bp); return (error); } - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); break; case D_ALLOCINDIR: @@ -4940,14 +4841,14 @@ loop: nbp = aip->ai_buf; if (getdirtybuf(&nbp, waitfor) == 0) break; - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); if (waitfor == MNT_NOWAIT) { bawrite(nbp); } else if ((error = VOP_BWRITE(nbp)) != 0) { bawrite(bp); return (error); } - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); break; case D_INDIRDEP: @@ -4959,12 +4860,12 @@ loop: nbp = aip->ai_buf; if (getdirtybuf(&nbp, MNT_WAIT) == 0) goto restart; - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); if ((error = VOP_BWRITE(nbp)) != 0) { bawrite(bp); return (error); } - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); goto restart; } break; @@ -4972,7 +4873,7 @@ loop: case D_INODEDEP: if ((error = flush_inodedep_deps(WK_INODEDEP(wk)->id_fs, WK_INODEDEP(wk)->id_ino)) != 0) { - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); bawrite(bp); return (error); } @@ -4993,7 +4894,7 @@ loop: error = flush_pagedep_deps(vp, pagedep->pd_mnt, &pagedep->pd_diraddhd[i]); if (error) { - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); bawrite(bp); return (error); } @@ -5011,14 +4912,14 @@ loop: nbp = WK_MKDIR(wk)->md_buf; if (getdirtybuf(&nbp, waitfor) == 0) break; - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); if (waitfor == MNT_NOWAIT) { bawrite(nbp); } else if ((error = VOP_BWRITE(nbp)) != 0) { bawrite(bp); return (error); } - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); break; case D_BMSAFEMAP: @@ -5052,15 +4953,15 @@ loop: } (void) getdirtybuf(&bp->b_vnbufs.le_next, MNT_WAIT); nbp = bp->b_vnbufs.le_next; - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); if (must_sync) { if ((error = VOP_BWRITE(bp)) != 0) return error; } else bawrite(bp); - ACQUIRE_LOCK(&lk); if (nbp != NULL) { bp = nbp; + mutex_enter(&bufcache_lock); goto loop; } /* @@ -5069,15 +4970,10 @@ loop: * Once they are all there, proceed with the second pass * which will wait for the I/O as per above. */ - drain_output(vp, 1); - /* - * The brief unlock is to allow any pent up dependency - * processing to be done. - */ + drain_output(vp); if (waitfor == MNT_NOWAIT) { waitfor = MNT_WAIT; - FREE_LOCK(&lk); - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); goto top; } @@ -5087,7 +4983,6 @@ loop: * devices, we may need to do further work. */ if (vp->v_dirtyblkhd.lh_first != NULL) { - FREE_LOCK(&lk); /* * If we are trying to sync a block device, some of its buffers * may contain metadata that cannot be written until the @@ -5098,11 +4993,11 @@ loop: if (vp->v_type == VBLK && vp->v_specmountpoint && !VOP_ISLOCKED(vp) && (error = VFS_SYNC(vp->v_specmountpoint, MNT_WAIT, - ap->a_cred)) != 0) + curlwp->l_cred)) != 0) return (error); - ACQUIRE_LOCK(&lk); } + mutex_enter(&bufcache_lock); clean: /* * If there is still an inodedep, we know that the inode has pending @@ -5112,13 +5007,12 @@ clean: */ if (inodedep_lookup(VTOI(vp)->i_fs, VTOI(vp)->i_number, 0, &inodedep)) VTOI(vp)->i_flag |= IN_MODIFIED; - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); return (0); } /* * Flush the dependencies associated with an inodedep. - * Called with splbio blocked. */ static int flush_inodedep_deps(fs, ino) @@ -5126,11 +5020,11 @@ flush_inodedep_deps(fs, ino) ino_t ino; { struct inodedep *inodedep; - struct allocdirect *adp; int error, waitfor; - struct buf *bp; struct vnode *vp; + KASSERT(mutex_owned(&bufcache_lock)); + vp = softdep_lookupvp(fs, ino); /* @@ -5143,12 +5037,10 @@ flush_inodedep_deps(fs, ino) * usual case we will be blocking against a write that we * initiated, so when it is done the dependency will have been * resolved. Thus the second pass is expected to end quickly. - * We give a brief window at the top of the loop to allow - * any pending I/O to complete. */ - for (waitfor = MNT_NOWAIT; ; ) { - FREE_LOCK(&lk); - ACQUIRE_LOCK(&lk); + for (error = 0, waitfor = MNT_NOWAIT; ; ) { + if (error) + return (error); if (inodedep_lookup(fs, ino, 0, &inodedep) == 0) return (0); @@ -5162,15 +5054,15 @@ flush_inodedep_deps(fs, ino) */ if (vp != NULL) { - FREE_LOCK(&lk); - simple_lock(&vp->v_interlock); + mutex_exit(&bufcache_lock); + mutex_enter(&vp->v_interlock); error = VOP_PUTPAGES(vp, 0, 0, PGO_ALLPAGES | PGO_CLEANIT | (waitfor == MNT_NOWAIT ? 0: PGO_SYNCIO)); if (waitfor == MNT_WAIT) { - drain_output(vp, 0); + drain_output(vp); } - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); if (error) { return error; } @@ -5184,52 +5076,13 @@ flush_inodedep_deps(fs, ino) */ KASSERT(TAILQ_EMPTY(&inodedep->id_inoupdt)); KASSERT(TAILQ_EMPTY(&inodedep->id_newinoupdt)); + break; } - for (adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; - adp = TAILQ_NEXT(adp, ad_next)) { - if (adp->ad_state & DEPCOMPLETE) - continue; - bp = adp->ad_buf; - if (getdirtybuf(&bp, waitfor) == 0) { - if (waitfor == MNT_NOWAIT) - continue; - break; - } - FREE_LOCK(&lk); - if (waitfor == MNT_NOWAIT) { - bawrite(bp); - } else if ((error = VOP_BWRITE(bp)) != 0) { - ACQUIRE_LOCK(&lk); - return (error); - } - ACQUIRE_LOCK(&lk); - break; - } - if (adp != NULL) - continue; - for (adp = TAILQ_FIRST(&inodedep->id_newinoupdt); adp; - adp = TAILQ_NEXT(adp, ad_next)) { - if (adp->ad_state & DEPCOMPLETE) - continue; - bp = adp->ad_buf; - if (getdirtybuf(&bp, waitfor) == 0) { - if (waitfor == MNT_NOWAIT) - continue; - break; - } - FREE_LOCK(&lk); - if (waitfor == MNT_NOWAIT) { - bawrite(bp); - } else if ((error = VOP_BWRITE(bp)) != 0) { - ACQUIRE_LOCK(&lk); - return (error); - } - ACQUIRE_LOCK(&lk); - break; - } - if (adp != NULL) - continue; + if (flush_deplist(&inodedep->id_inoupdt, waitfor, &error) || + flush_deplist(&inodedep->id_newinoupdt, waitfor, &error)) + continue; + /* * If pass2, we are done, otherwise do pass 2. */ @@ -5237,6 +5090,7 @@ flush_inodedep_deps(fs, ino) break; waitfor = MNT_WAIT; } + /* * Try freeing inodedep in case all dependencies have been removed. */ @@ -5245,9 +5099,45 @@ flush_inodedep_deps(fs, ino) return (0); } +/* + * Flush an inode dependency list. + */ +static int +flush_deplist(listhead, waitfor, errorp) + struct allocdirectlst *listhead; + int waitfor; + int *errorp; +{ + struct allocdirect *adp; + struct buf *bp; + + KASSERT(mutex_owned(&bufcache_lock)); + + TAILQ_FOREACH(adp, listhead, ad_next) { + if (adp->ad_state & DEPCOMPLETE) + continue; + bp = adp->ad_buf; + if (getdirtybuf(&bp, waitfor) == 0) { + if (waitfor == MNT_NOWAIT) + continue; + return (1); + } + mutex_exit(&bufcache_lock); + if (waitfor == MNT_NOWAIT) { + bawrite(bp); + } else if ((*errorp = VOP_BWRITE(bp)) != 0) { + mutex_enter(&bufcache_lock); + return (1); + } + mutex_enter(&bufcache_lock); + return (1); + } + + return (0); +} + /* * Eliminate a pagedep dependency by flushing out all its diradd dependencies. - * Called with splbio blocked. */ static int flush_pagedep_deps(pvp, mp, diraddhdp) @@ -5265,6 +5155,8 @@ flush_pagedep_deps(pvp, mp, diraddhdp) ino_t inum; u_int ipflag; + KASSERT(mutex_owned(&bufcache_lock)); + ump = VFSTOUFS(mp); while ((dap = LIST_FIRST(diraddhdp)) != NULL) { /* @@ -5272,12 +5164,12 @@ flush_pagedep_deps(pvp, mp, diraddhdp) * has a MKDIR_PARENT dependency. */ if (dap->da_state & MKDIR_PARENT) { - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); VTOI(pvp)->i_flag |= IN_MODIFIED; error = ffs_update(pvp, NULL, NULL, UPDATE_WAIT); if (error) break; - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); /* * If that cleared dependencies, go on to next. */ @@ -5300,7 +5192,7 @@ flush_pagedep_deps(pvp, mp, diraddhdp) */ inum = dap->da_newinum; if (dap->da_state & MKDIR_BODY) { - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); ipflag = vn_setrecurse(pvp); /* XXX */ if ((error = VFS_VGET(mp, inum, &vp)) != 0) break; @@ -5311,10 +5203,10 @@ flush_pagedep_deps(pvp, mp, diraddhdp) vput(vp); break; } - drain_output(vp, 0); + drain_output(vp); vput(vp); vn_restorerecurse(pvp, ipflag); - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); /* * If that cleared dependencies, go on to next. */ @@ -5341,11 +5233,11 @@ flush_pagedep_deps(pvp, mp, diraddhdp) */ if ((inodedep->id_state & DEPCOMPLETE) == 0) { gotit = getdirtybuf(&inodedep->id_buf, MNT_WAIT); - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); if (gotit && (error = VOP_BWRITE(inodedep->id_buf)) != 0) break; - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); if (dap != LIST_FIRST(diraddhdp)) continue; } @@ -5353,14 +5245,14 @@ flush_pagedep_deps(pvp, mp, diraddhdp) * If the inode is still sitting in a buffer waiting * to be written, push it to disk. */ - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); if ((error = bread(ump->um_devvp, fsbtodb(ump->um_fs, ino_to_fsba(ump->um_fs, inum)), (int)ump->um_fs->fs_bsize, NOCRED, &bp)) != 0) break; if ((error = VOP_BWRITE(bp)) != 0) break; - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); /* * If we have failed to get rid of all the dependencies * then something is seriously wrong. @@ -5369,7 +5261,7 @@ flush_pagedep_deps(pvp, mp, diraddhdp) panic("flush_pagedep_deps: flush failed"); } if (error) - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); return (error); } @@ -5380,12 +5272,12 @@ flush_pagedep_deps(pvp, mp, diraddhdp) * many dependencies in progress. */ static int -request_cleanup(resource, islocked) +request_cleanup(resource) int resource; - int islocked; { - lwp_t *l = curlwp; - int s; + struct lwp *l = curlwp; + + KASSERT(mutex_owned(&bufcache_lock)); /* * We never hold up the filesystem syncer process. @@ -5421,19 +5313,17 @@ request_cleanup(resource, islocked) * Hopefully the syncer daemon will catch up and awaken us. * We wait at most tickdelay before proceeding in any case. */ - if (islocked == 0) - ACQUIRE_LOCK(&lk); if (proc_waiting++ == 0) callout_reset(&pause_timer_ch, tickdelay > 2 ? tickdelay : 2, pause_timer, NULL); - s = FREE_LOCK_INTERLOCKED(&lk); - (void) tsleep((void *)&proc_waiting, PPAUSE, "softupdate", 0); - ACQUIRE_LOCK_INTERLOCKED(&lk, s); + cv_wait(&proc_wait_cv, &bufcache_lock); if (--proc_waiting) callout_reset(&pause_timer_ch, tickdelay > 2 ? tickdelay : 2, pause_timer, NULL); else { + mutex_exit(&bufcache_lock); callout_stop(&pause_timer_ch); + mutex_enter(&bufcache_lock); #if 0 switch (resource) { @@ -5447,8 +5337,7 @@ request_cleanup(resource, islocked) } #endif } - if (islocked == 0) - FREE_LOCK(&lk); + return (1); } @@ -5460,8 +5349,9 @@ void pause_timer(void *arg) { - /* XXX was wakeup_one(), but makes no difference in uniprocessor */ - wakeup(&proc_waiting); + mutex_enter(&bufcache_lock); + cv_broadcast(&proc_wait_cv); + mutex_exit(&bufcache_lock); } /* @@ -5480,7 +5370,8 @@ clear_remove(l) int error, cnt; ino_t ino; - ACQUIRE_LOCK(&lk); + KASSERT(mutex_owned(&bufcache_lock)); + for (cnt = 0; cnt < pagedep_hash; cnt++) { pagedephd = &pagedep_hashtbl[next++]; if (next >= pagedep_hash) @@ -5490,19 +5381,19 @@ clear_remove(l) continue; mp = pagedep->pd_mnt; ino = pagedep->pd_ino; - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); if ((error = VFS_VGET(mp, ino, &vp)) != 0) { softdep_error("clear_remove: vget", error); return; } if ((error = VOP_FSYNC(vp, l->l_cred, 0, 0, 0))) softdep_error("clear_remove: fsync", error); - drain_output(vp, 0); + drain_output(vp); vput(vp); + mutex_enter(&bufcache_lock); return; } } - FREE_LOCK(&lk); } /* @@ -5522,7 +5413,8 @@ clear_inodedeps(l) int error, cnt; ino_t firstino, lastino, ino; - ACQUIRE_LOCK(&lk); + KASSERT(mutex_owned(&bufcache_lock)); + /* * Pick a random inode dependency to be cleared. * We will then gather up all the inodes in its block @@ -5539,10 +5431,12 @@ clear_inodedeps(l) * Ugly code to find mount point given pointer to superblock. */ fs = inodedep->id_fs; + mutex_enter(&mountlist_lock); CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) { if ((mp->mnt_flag & MNT_SOFTDEP) && fs == VFSTOUFS(mp)->um_fs) break; } + mutex_exit(&mountlist_lock); /* * Find the last inode in the block with dependencies. @@ -5559,7 +5453,7 @@ clear_inodedeps(l) for (ino = firstino; ino <= lastino; ino++) { if (inodedep_lookup(fs, ino, 0, &inodedep) == 0) continue; - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); if ((error = VFS_VGET(mp, ino, &vp)) != 0) { softdep_error("clear_inodedeps: vget", error); return; @@ -5571,12 +5465,11 @@ clear_inodedeps(l) } else { if ((error = VOP_FSYNC(vp, l->l_cred, 0, 0, 0))) softdep_error("clear_inodedeps: fsync2", error); - drain_output(vp, 0); + drain_output(vp); } vput(vp); - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); } - FREE_LOCK(&lk); } /* @@ -5597,8 +5490,12 @@ softdep_count_dependencies(bp, wantcount) struct diradd *dap; int i, retval; + KASSERT(mutex_owned(&bufcache_lock)); + + if (LIST_EMPTY(&bp->b_dep)) + return 0; + retval = 0; - ACQUIRE_LOCK(&lk); for (wk = LIST_FIRST(&bp->b_dep); wk; wk = LIST_NEXT(wk, wk_list)) { switch (wk->wk_type) { @@ -5656,13 +5553,11 @@ softdep_count_dependencies(bp, wantcount) } } out: - FREE_LOCK(&lk); return retval; } /* * Acquire exclusive access to a buffer. - * Must be called with splbio blocked. * Return 1 if buffer was acquired. */ static int @@ -5672,42 +5567,30 @@ getdirtybuf(bpp, waitfor) { struct buf *bp; -again: - for (;;) { - int s; + KASSERT(mutex_owned(&bufcache_lock)); + for (;;) { if ((bp = *bpp) == NULL) return (0); - simple_lock(&bp->b_interlock); - if ((bp->b_flags & B_BUSY) == 0) + if ((bp->b_cflags & BC_BUSY) == 0) break; - if (waitfor != MNT_WAIT) { - simple_unlock(&bp->b_interlock); + if (waitfor != MNT_WAIT) return (0); - } - bp->b_flags |= B_WANTED; - s = FREE_LOCK_INTERLOCKED(&lk); - (void) ltsleep(bp, (PRIBIO + 1) | PNORELOCK, "softgetdbuf", 0, - &bp->b_interlock); - ACQUIRE_LOCK_INTERLOCKED(&lk, s); + (void)bbusy(bp, false, 0); } - LOCK_ASSERT(simple_lock_held(&bp->b_interlock)); - if ((bp->b_flags & B_DELWRI) == 0) { - simple_unlock(&bp->b_interlock); + mutex_enter(bp->b_objlock); + if ((bp->b_oflags & BO_DELWRI) == 0) { + mutex_exit(bp->b_objlock); return (0); } - if (!simple_lock_try(&bqueue_slock)) { - simple_unlock(&bp->b_interlock); - goto again; - } #if 1 - bp->b_flags |= B_BUSY; + bp->b_cflags |= BC_BUSY; bremfree(bp); #else - bp->b_flags |= B_BUSY | B_VFLUSH; + bp->b_cflags |= BC_BUSY | BC_VFLUSH; #endif - simple_unlock(&bqueue_slock); - simple_unlock(&bp->b_interlock); + mutex_exit(bp->b_objlock); + return (1); } @@ -5716,26 +5599,14 @@ again: * Must be called with vnode locked. */ static void -drain_output(vp, islocked) +drain_output(vp) struct vnode *vp; - int islocked; { - if (!islocked) - ACQUIRE_LOCK(&lk); - simple_lock(&global_v_numoutput_slock); - while (vp->v_numoutput) { - int s; - - vp->v_iflag |= VI_BWAIT; - s = FREE_LOCK_INTERLOCKED(&lk); - ltsleep((void *)&vp->v_numoutput, PRIBIO + 1, "drainvp", 0, - &global_v_numoutput_slock); - ACQUIRE_LOCK_INTERLOCKED(&lk, s); - } - simple_unlock(&global_v_numoutput_slock); - if (!islocked) - FREE_LOCK(&lk); + mutex_enter(&vp->v_interlock); + while (vp->v_numoutput) + cv_wait(&vp->v_cv, &vp->v_interlock); + mutex_exit(&vp->v_interlock); } /* @@ -5748,6 +5619,10 @@ softdep_deallocate_dependencies(bp) struct buf *bp; { + KASSERT(mutex_owned(&bufcache_lock)); + + if (LIST_EMPTY(&bp->b_dep)) + return; if (bp->b_error == 0) panic("softdep_deallocate_dependencies: dangling deps"); softdep_error(bp->b_vp->v_mount->mnt_stat.f_mntonname, bp->b_error); @@ -5780,6 +5655,8 @@ softdep_setup_pagecache(ip, lbn, size) struct buf *bp; UVMHIST_FUNC("softdep_setup_pagecache"); UVMHIST_CALLED(ubchist); + KASSERT(mutex_owned(&bufcache_lock)); + /* * Enter pagecache dependency buf in hash. * Always reset b_resid to be the full amount of data in the block @@ -5793,10 +5670,12 @@ softdep_setup_pagecache(ip, lbn, size) bp = softdep_lookup_pcbp(vp, lbn); if (bp == NULL) { - bp = pool_get(&sdpcpool, PR_WAITOK); + mutex_exit(&bufcache_lock); + bp = pool_cache_get(softdep_large_cache, PR_WAITOK); bp->b_vp = vp; bp->b_lblkno = lbn; LIST_INIT(&bp->b_dep); + mutex_enter(&bufcache_lock); LIST_INSERT_HEAD(&pcbphashhead[PCBPHASH(vp, lbn)], bp, b_hash); LIST_INSERT_HEAD(&ip->i_pcbufhd, bp, b_vnbufs); } @@ -5837,7 +5716,7 @@ softdep_free_pagecache(ip) nextbp = LIST_NEXT(bp, b_vnbufs); LIST_REMOVE(bp, b_vnbufs); KASSERT(LIST_FIRST(&bp->b_dep) == NULL); - pool_put(&sdpcpool, bp); + pool_cache_put(softdep_large_cache, bp); } } @@ -5848,13 +5727,20 @@ softdep_lookupvp(fs, ino) { struct mount *mp; extern struct vfsops ffs_vfsops; + vnode_t *vp; + mutex_enter(&mountlist_lock); CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) { if (mp->mnt_op == &ffs_vfsops && VFSTOUFS(mp)->um_fs == fs) { - return (ufs_ihashlookup(VFSTOUFS(mp)->um_dev, ino)); + mutex_exit(&mountlist_lock); + mutex_enter(&ufs_ihash_lock); + vp = ufs_ihashlookup(VFSTOUFS(mp)->um_dev, ino); + mutex_exit(&ufs_ihash_lock); + return vp; } } + mutex_exit(&mountlist_lock); return (NULL); } @@ -5863,18 +5749,19 @@ static void softdep_trackbufs(int delta, bool throttle) { + KASSERT(mutex_owned(&bufcache_lock)); + if (delta < 0) { if (softdep_lockedbufs < nbuf >> 2) { - wakeup(&softdep_lockedbufs); + cv_broadcast(&softdep_tb_cv); } KASSERT(softdep_lockedbufs >= -delta); softdep_lockedbufs += delta; return; } - while (throttle && softdep_lockedbufs >= nbuf >> 2) { speedup_syncer(); - tsleep(&softdep_lockedbufs, PRIBIO, "softdbufs", 0); + cv_wait(&softdep_tb_cv, &bufcache_lock); } softdep_lockedbufs += delta; } @@ -5886,6 +5773,8 @@ softdep_lookup_pcbp(vp, lbn) { struct buf *bp; + KASSERT(mutex_owned(&bufcache_lock)); + LIST_FOREACH(bp, &pcbphashhead[PCBPHASH(vp, lbn)], b_hash) { if (bp->b_vp == vp && bp->b_lblkno == lbn) { break; @@ -5901,10 +5790,13 @@ softdep_lookup_pcbp(vp, lbn) void softdep_pageiodone(bp) struct buf *bp; -#ifdef UVMHIST { struct vnode *vp = bp->b_vp; + if (vp == NULL) { + /* XXX LFS */ + return; + } if (DOINGSOFTDEP(vp)) softdep_pageiodone1(bp); } @@ -5912,7 +5804,6 @@ softdep_pageiodone(bp) void softdep_pageiodone1(bp) struct buf *bp; -#endif { int npages = bp->b_bufsize >> PAGE_SHIFT; struct vnode *vp = bp->b_vp; @@ -5932,7 +5823,7 @@ softdep_pageiodone1(bp) bshift = vp->v_mount->mnt_fs_bshift; bsize = 1 << bshift; asize = MIN(PAGE_SIZE, bsize); - ACQUIRE_LOCK(&lk); + mutex_enter(&bufcache_lock); for (i = 0; i < npages; i++) { pg = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT)); if (pg == NULL) { @@ -5978,7 +5869,7 @@ softdep_pageiodone1(bp) KASSERT(LIST_FIRST(&pcbp->b_dep) != NULL); while ((wk = LIST_FIRST(&pcbp->b_dep))) { - WORKLIST_REMOVE(wk); + worklist_remove(wk); switch (wk->wk_type) { case D_ALLOCDIRECT: adp = WK_ALLOCDIRECT(wk); @@ -6000,9 +5891,9 @@ softdep_pageiodone1(bp) } LIST_REMOVE(pcbp, b_hash); LIST_REMOVE(pcbp, b_vnbufs); - pool_put(&sdpcpool, pcbp); + pool_cache_put(softdep_large_cache, pcbp); pcbp = NULL; } } - FREE_LOCK(&lk); + mutex_exit(&bufcache_lock); } diff --git a/sys/ufs/ffs/ffs_softdep.stub.c b/sys/ufs/ffs/ffs_softdep.stub.c index c118f3805446..b1fd1c2080de 100644 --- a/sys/ufs/ffs/ffs_softdep.stub.c +++ b/sys/ufs/ffs/ffs_softdep.stub.c @@ -1,4 +1,4 @@ -/* $NetBSD: ffs_softdep.stub.c,v 1.21 2007/03/04 06:03:45 christos Exp $ */ +/* $NetBSD: ffs_softdep.stub.c,v 1.22 2008/01/02 11:49:09 ad Exp $ */ /* * Copyright 1997 Marshall Kirk McKusick. All Rights Reserved. @@ -34,7 +34,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ffs_softdep.stub.c,v 1.21 2007/03/04 06:03:45 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ffs_softdep.stub.c,v 1.22 2008/01/02 11:49:09 ad Exp $"); #include #include @@ -204,7 +204,7 @@ softdep_fsync_mountdev(struct vnode *vp) } int -softdep_sync_metadata(void *v) +softdep_sync_metadata(struct vnode *vp) { return (0); } @@ -214,3 +214,10 @@ softdep_releasefile(struct inode *ip) { panic("softdep_releasefile called"); } + +void +softdep_unmount(struct mount *mp) +{ + + return; +} diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index 5cbf19504ffb..9113bcda52c6 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -1,4 +1,4 @@ -/* $NetBSD: ffs_vfsops.c,v 1.213 2007/12/20 16:18:57 dyoung Exp $ */ +/* $NetBSD: ffs_vfsops.c,v 1.214 2008/01/02 11:49:09 ad Exp $ */ /* * Copyright (c) 1989, 1991, 1993, 1994 @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.213 2007/12/20 16:18:57 dyoung Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.214 2008/01/02 11:49:09 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_ffs.h" @@ -482,7 +482,7 @@ fail: int ffs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l) { - struct vnode *vp, *nvp, *devvp; + struct vnode *vp, *mvp, *devvp; struct inode *ip; void *space; struct buf *bp; @@ -647,30 +647,36 @@ ffs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l) *lp++ = fs->fs_contigsumsize; } -loop: + /* Allocate a marker vnode. */ + if ((mvp = valloc(mp)) == NULL) + return ENOMEM; /* * NOTE: not using the TAILQ_FOREACH here since in this loop vgone() * and vclean() can be called indirectly */ - simple_lock(&mntvnode_slock); - for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { - if (vp->v_mount != mp) { - simple_unlock(&mntvnode_slock); - goto loop; - } + mutex_enter(&mntvnode_lock); + loop: + for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) { + vmark(mvp, vp); + if (vp->v_mount != mp || vismarker(vp)) + continue; /* * Step 4: invalidate all inactive vnodes. */ - if (vrecycle(vp, &mntvnode_slock, l)) + if (vrecycle(vp, &mntvnode_lock, l)) { + mutex_enter(&mntvnode_lock); + (void)vunmark(mvp); goto loop; + } /* * Step 5: invalidate all cached file data. */ - simple_lock(&vp->v_interlock); - nvp = TAILQ_NEXT(vp, v_mntvnodes); - simple_unlock(&mntvnode_slock); - if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) + mutex_enter(&vp->v_interlock); + mutex_exit(&mntvnode_lock); + if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { + (void)vunmark(mvp); goto loop; + } if (vinvalbuf(vp, 0, cred, l, 0, 0)) panic("ffs_reload: dirty2"); /* @@ -682,16 +688,18 @@ loop: if (error) { brelse(bp, 0); vput(vp); - return (error); + (void)vunmark(mvp); + break; } ffs_load_inode(bp, ip, fs, ip->i_number); ip->i_ffs_effnlink = ip->i_nlink; brelse(bp, 0); vput(vp); - simple_lock(&mntvnode_slock); + mutex_enter(&mntvnode_lock); } - simple_unlock(&mntvnode_slock); - return (0); + mutex_exit(&mntvnode_lock); + vfree(mvp); + return (error); } /* @@ -743,6 +751,10 @@ ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l) sblockloc = 0; fstype = 0; + error = fstrans_mount(mp); + if (error) + return error; + /* * Try reading the superblock in each of its possible locations. */ @@ -974,7 +986,7 @@ ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l) mp->mnt_fs_bshift = fs->fs_bshift; mp->mnt_dev_bshift = DEV_BSHIFT; /* XXX */ mp->mnt_flag |= MNT_LOCAL; - mp->mnt_iflag |= IMNT_HAS_TRANS; + mp->mnt_iflag |= IMNT_MPSAFE; #ifdef FFS_EI if (needswap) ump->um_flags |= UFS_NEEDSWAP; @@ -1016,6 +1028,7 @@ ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l) #endif /* UFS_EXTATTR */ return (0); out: + fstrans_unmount(mp); if (fs) free(fs, M_UFSMNT); devvp->v_specmountpoint = NULL; @@ -1214,10 +1227,12 @@ ffs_unmount(struct mount *mp, int mntflags) free(fs, M_UFSMNT); if (ump->um_oldfscompat != NULL) free(ump->um_oldfscompat, M_UFSMNT); + softdep_unmount(mp); mutex_destroy(&ump->um_lock); free(ump, M_UFSMNT); mp->mnt_data = NULL; mp->mnt_flag &= ~MNT_LOCAL; + fstrans_unmount(mp); return (0); } @@ -1313,7 +1328,7 @@ int ffs_sync(struct mount *mp, int waitfor, kauth_cred_t cred) { struct lwp *l = curlwp; - struct vnode *vp, *nvp; + struct vnode *vp, *mvp; struct inode *ip; struct ufsmount *ump = VFSTOUFS(mp); struct fs *fs; @@ -1324,46 +1339,53 @@ ffs_sync(struct mount *mp, int waitfor, kauth_cred_t cred) printf("fs = %s\n", fs->fs_fsmnt); panic("update: rofs mod"); } + + /* Allocate a marker vnode. */ + if ((mvp = valloc(mp)) == NULL) + return (ENOMEM); + fstrans_start(mp, FSTRANS_SHARED); /* * Write back each (modified) inode. */ - simple_lock(&mntvnode_slock); + mutex_enter(&mntvnode_lock); loop: /* * NOTE: not using the TAILQ_FOREACH here since in this loop vgone() * and vclean() can be called indirectly */ - for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { + for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) { + vmark(mvp, vp); /* * If the vnode that we are about to sync is no longer * associated with this mount point, start over. */ - if (vp->v_mount != mp) - goto loop; - simple_lock(&vp->v_interlock); - nvp = TAILQ_NEXT(vp, v_mntvnodes); + if (vp->v_mount != mp || vismarker(vp)) + continue; + mutex_enter(&vp->v_interlock); ip = VTOI(vp); - if (vp->v_type == VNON || - ((ip->i_flag & - (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 && - LIST_EMPTY(&vp->v_dirtyblkhd) && - UVM_OBJ_IS_CLEAN(&vp->v_uobj))) + if (ip == NULL || (vp->v_iflag & (VI_XLOCK|VI_CLEAN)) != 0 || + vp->v_type == VNON || ((ip->i_flag & + (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 && + LIST_EMPTY(&vp->v_dirtyblkhd) && + UVM_OBJ_IS_CLEAN(&vp->v_uobj))) { - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); continue; } if (vp->v_type == VBLK && fstrans_getstate(mp) == FSTRANS_SUSPENDING) { - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); continue; } - simple_unlock(&mntvnode_slock); + mutex_exit(&mntvnode_lock); error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK); if (error) { - simple_lock(&mntvnode_slock); - if (error == ENOENT) + mutex_enter(&mntvnode_lock); + if (error == ENOENT) { + (void)vunmark(mvp); goto loop; + } continue; } if (vp->v_type == VREG && waitfor == MNT_LAZY) @@ -1374,9 +1396,9 @@ loop: if (error) allerror = error; vput(vp); - simple_lock(&mntvnode_slock); + mutex_enter(&mntvnode_lock); } - simple_unlock(&mntvnode_slock); + mutex_exit(&mntvnode_lock); /* * Force stale file system control information to be flushed. */ @@ -1385,7 +1407,7 @@ loop: allerror = error; /* Flushed work items may create new vnodes to clean */ if (allerror == 0 && count) { - simple_lock(&mntvnode_slock); + mutex_enter(&mntvnode_lock); goto loop; } } @@ -1397,7 +1419,7 @@ loop: allerror = error; VOP_UNLOCK(ump->um_devvp, 0); if (allerror == 0 && waitfor == MNT_WAIT) { - simple_lock(&mntvnode_slock); + mutex_enter(&mntvnode_lock); goto loop; } } @@ -1414,6 +1436,7 @@ loop: allerror = error; } fstrans_done(mp); + vfree(mvp); return (allerror); } diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index 0f5e29982299..4253d5a5cc41 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: ffs_vnops.c,v 1.93 2007/11/26 19:02:30 pooka Exp $ */ +/* $NetBSD: ffs_vnops.c,v 1.94 2008/01/02 11:49:10 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1993 @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c,v 1.93 2007/11/26 19:02:30 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c,v 1.94 2008/01/02 11:49:10 ad Exp $"); #include #include @@ -64,8 +64,6 @@ __KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c,v 1.93 2007/11/26 19:02:30 pooka Exp $") #include -static int ffs_full_fsync(void *); - /* Global vfs data structures for ufs. */ int (**ffs_vnodeop_p)(void *); const struct vnodeopv_entry_desc ffs_vnodeop_entries[] = { @@ -246,7 +244,7 @@ ffs_fsync(void *v) struct lwp *a_l; } */ *ap = v; struct buf *bp; - int s, num, error, i; + int num, error, i; struct indir ia[NIADDR + 1]; int bsize; daddr_t blk_high; @@ -260,7 +258,7 @@ ffs_fsync(void *v) */ if ((ap->a_offlo == 0 && ap->a_offhi == 0) || DOINGSOFTDEP(vp) || (vp->v_type != VREG)) { - error = ffs_full_fsync(v); + error = ffs_full_fsync(vp, ap->a_flags); goto out; } @@ -273,7 +271,7 @@ ffs_fsync(void *v) * First, flush all pages in range. */ - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo), round_page(ap->a_offhi), PGO_CLEANIT | ((ap->a_flags & FSYNC_WAIT) ? PGO_SYNCIO : 0)); @@ -285,40 +283,32 @@ ffs_fsync(void *v) * Then, flush indirect blocks. */ - s = splbio(); if (blk_high >= NDADDR) { error = ufs_getlbns(vp, blk_high, ia, &num); - if (error) { - splx(s); + if (error) goto out; - } + + mutex_enter(&bufcache_lock); for (i = 0; i < num; i++) { - bp = incore(vp, ia[i].in_lbn); - if (bp != NULL) { - simple_lock(&bp->b_interlock); - if (!(bp->b_flags & B_BUSY) && (bp->b_flags & B_DELWRI)) { - bp->b_flags |= B_BUSY | B_VFLUSH; - simple_unlock(&bp->b_interlock); - splx(s); - bawrite(bp); - s = splbio(); - } else { - simple_unlock(&bp->b_interlock); - } - } + if ((bp = incore(vp, ia[i].in_lbn)) == NULL) + continue; + if ((bp->b_cflags & BC_BUSY) != 0 || + (bp->b_oflags & BO_DELWRI) == 0) + continue; + bp->b_cflags |= BC_BUSY | BC_VFLUSH; + mutex_exit(&bufcache_lock); + bawrite(bp); + mutex_enter(&bufcache_lock); } + mutex_exit(&bufcache_lock); } if (ap->a_flags & FSYNC_WAIT) { - simple_lock(&global_v_numoutput_slock); - while (vp->v_numoutput > 0) { - vp->v_iflag |= VI_BWAIT; - ltsleep(&vp->v_numoutput, PRIBIO + 1, "fsync_range", 0, - &global_v_numoutput_slock); - } - simple_unlock(&global_v_numoutput_slock); + mutex_enter(&vp->v_interlock); + while (vp->v_numoutput > 0) + cv_wait(&vp->v_cv, &vp->v_interlock); + mutex_exit(&vp->v_interlock); } - splx(s); error = ffs_update(vp, NULL, NULL, ((ap->a_flags & (FSYNC_WAIT | FSYNC_DATAONLY)) == FSYNC_WAIT) @@ -339,27 +329,20 @@ out: * Synch an open file. */ /* ARGSUSED */ -static int -ffs_full_fsync(void *v) +int +ffs_full_fsync(struct vnode *vp, int flags) { - struct vop_fsync_args /* { - struct vnode *a_vp; - kauth_cred_t a_cred; - int a_flags; - off_t a_offlo; - off_t a_offhi; - struct lwp *a_l; - } */ *ap = v; - struct vnode *vp = ap->a_vp; struct buf *bp, *nbp; - int s, error, passes, skipmeta, inodedeps_only, waitfor; + int error, passes, skipmeta, inodedeps_only, waitfor; if (vp->v_type == VBLK && vp->v_specmountpoint != NULL && (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP)) softdep_fsync_mountdev(vp); - inodedeps_only = DOINGSOFTDEP(vp) && (ap->a_flags & FSYNC_RECLAIM) + mutex_enter(&vp->v_interlock); + + inodedeps_only = DOINGSOFTDEP(vp) && (flags & FSYNC_RECLAIM) && UVM_OBJ_IS_CLEAN(&vp->v_uobj) && LIST_EMPTY(&vp->v_dirtyblkhd); /* @@ -367,79 +350,72 @@ ffs_full_fsync(void *v) */ if (vp->v_type == VREG || vp->v_type == VBLK) { - simple_lock(&vp->v_interlock); error = VOP_PUTPAGES(vp, 0, 0, PGO_ALLPAGES | PGO_CLEANIT | - ((ap->a_flags & FSYNC_WAIT) ? PGO_SYNCIO : 0) | + ((flags & FSYNC_WAIT) ? PGO_SYNCIO : 0) | (fstrans_getstate(vp->v_mount) == FSTRANS_SUSPENDING ? PGO_FREE : 0)); if (error) { return error; } - } + } else + mutex_exit(&vp->v_interlock); passes = NIADDR + 1; skipmeta = 0; - if (ap->a_flags & FSYNC_WAIT) + if (flags & FSYNC_WAIT) skipmeta = 1; - s = splbio(); loop: - LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) - bp->b_flags &= ~B_SCANNED; + mutex_enter(&bufcache_lock); + LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { + bp->b_cflags &= ~BC_SCANNED; + } for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { nbp = LIST_NEXT(bp, b_vnbufs); - simple_lock(&bp->b_interlock); - if (bp->b_flags & (B_BUSY | B_SCANNED)) { - simple_unlock(&bp->b_interlock); + if (bp->b_cflags & (BC_BUSY | BC_SCANNED)) continue; - } - if ((bp->b_flags & B_DELWRI) == 0) + if ((bp->b_oflags & BO_DELWRI) == 0) panic("ffs_fsync: not dirty"); - if (skipmeta && bp->b_lblkno < 0) { - simple_unlock(&bp->b_interlock); + if (skipmeta && bp->b_lblkno < 0) continue; - } - simple_unlock(&bp->b_interlock); - bp->b_flags |= B_BUSY | B_VFLUSH | B_SCANNED; - splx(s); + bp->b_cflags |= BC_BUSY | BC_VFLUSH | BC_SCANNED; + mutex_exit(&bufcache_lock); /* * On our final pass through, do all I/O synchronously * so that we can find out if our flush is failing * because of write errors. */ - if (passes > 0 || !(ap->a_flags & FSYNC_WAIT)) + if (passes > 0 || !(flags & FSYNC_WAIT)) (void) bawrite(bp); else if ((error = bwrite(bp)) != 0) return (error); - s = splbio(); /* - * Since we may have slept during the I/O, we need + * Since we unlocked during the I/O, we need * to start from a known point. */ + mutex_enter(&bufcache_lock); nbp = LIST_FIRST(&vp->v_dirtyblkhd); } + mutex_exit(&bufcache_lock); if (skipmeta) { skipmeta = 0; goto loop; } - if (ap->a_flags & FSYNC_WAIT) { - simple_lock(&global_v_numoutput_slock); + + if (flags & FSYNC_WAIT) { + mutex_enter(&vp->v_interlock); while (vp->v_numoutput) { - vp->v_iflag |= VI_BWAIT; - (void) ltsleep(&vp->v_numoutput, PRIBIO + 1, - "ffsfsync", 0, &global_v_numoutput_slock); + cv_wait(&vp->v_cv, &vp->v_interlock); } - simple_unlock(&global_v_numoutput_slock); - splx(s); + mutex_exit(&vp->v_interlock); /* * Ensure that any filesystem metadata associated * with the vnode has been written. */ - if ((error = softdep_sync_metadata(ap)) != 0) + if ((error = softdep_sync_metadata(vp)) != 0) return (error); - s = splbio(); if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { /* * Block devices associated with filesystems may @@ -459,15 +435,14 @@ loop: #endif } } - splx(s); if (inodedeps_only) waitfor = 0; else - waitfor = (ap->a_flags & FSYNC_WAIT) ? UPDATE_WAIT : 0; + waitfor = (flags & FSYNC_WAIT) ? UPDATE_WAIT : 0; error = ffs_update(vp, NULL, NULL, waitfor); - if (error == 0 && ap->a_flags & FSYNC_CACHE) { + if (error == 0 && flags & FSYNC_CACHE) { int i = 0; VOP_IOCTL(VTOI(vp)->i_devvp, DIOCCACHESYNC, &i, FWRITE, curlwp->l_cred); @@ -490,9 +465,18 @@ ffs_reclaim(void *v) struct inode *ip = VTOI(vp); struct mount *mp = vp->v_mount; struct ufsmount *ump = ip->i_ump; + void *data; int error; fstrans_start(mp, FSTRANS_LAZY); + /* + * The inode must be freed and updated before being removed + * from its hash chain. Other threads trying to gain a hold + * on the inode will be stalled because it is locked (VI_XLOCK). + */ + if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { + ffs_vfree(vp, ip->i_number, ip->i_omode); + } if ((error = ufs_reclaim(vp)) != 0) { fstrans_done(mp); return (error); @@ -503,13 +487,20 @@ ffs_reclaim(void *v) else pool_put(&ffs_dinode2_pool, ip->i_din.ffs2_din); } + /* + * To interlock with ffs_sync(). + */ + genfs_node_destroy(vp); + mutex_enter(&vp->v_interlock); + data = vp->v_data; + vp->v_data = NULL; + mutex_exit(&vp->v_interlock); + /* * XXX MFS ends up here, too, to free an inode. Should we create * XXX a separate pool for MFS inodes? */ - genfs_node_destroy(vp); - pool_put(&ffs_inode_pool, vp->v_data); - vp->v_data = NULL; + pool_put(&ffs_inode_pool, data); fstrans_done(mp); return (0); } @@ -543,7 +534,7 @@ ffs_getpages(void *v) blkoff(fs, *ap->a_count << PAGE_SHIFT) != 0) && DOINGSOFTDEP(ap->a_vp)) { if ((ap->a_flags & PGO_LOCKED) == 0) { - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); } return EINVAL; } @@ -751,18 +742,15 @@ ffs_lock(void *v) fstrans_is_owner(mp) && fstrans_getstate(mp) == FSTRANS_SUSPENDING) { if ((flags & LK_INTERLOCK) != 0) - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); return 0; } - if ((flags & LK_TYPE_MASK) == LK_DRAIN) - return (lockmgr(vp->v_vnlock, flags, &vp->v_interlock)); - KASSERT((flags & ~(LK_SHARED | LK_EXCLUSIVE | LK_SLEEPFAIL | - LK_INTERLOCK | LK_NOWAIT | LK_SETRECURSE | LK_CANRECURSE)) == 0); + LK_INTERLOCK | LK_NOWAIT | LK_CANRECURSE)) == 0); for (;;) { if ((flags & LK_INTERLOCK) == 0) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); flags |= LK_INTERLOCK; } lkp = vp->v_vnlock; @@ -800,7 +788,7 @@ ffs_unlock(void *v) fstrans_is_owner(mp) && fstrans_getstate(mp) == FSTRANS_SUSPENDING) { if ((ap->a_flags & LK_INTERLOCK) != 0) - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); return 0; } return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE, diff --git a/sys/ufs/lfs/lfs.h b/sys/ufs/lfs/lfs.h index e11ef3259e4e..2868d2e9312e 100644 --- a/sys/ufs/lfs/lfs.h +++ b/sys/ufs/lfs/lfs.h @@ -1,4 +1,4 @@ -/* $NetBSD: lfs.h,v 1.122 2007/10/10 20:42:34 ad Exp $ */ +/* $NetBSD: lfs.h,v 1.123 2008/01/02 11:49:10 ad Exp $ */ /*- * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. @@ -70,6 +70,7 @@ #define _UFS_LFS_LFS_H_ #include +#include /* * Compile-time options for LFS. @@ -191,34 +192,35 @@ typedef struct lfs_res_blk { #define IS_IFILE(bp) (VTOI(bp->b_vp)->i_number == LFS_IFILE_INUM) # define LFS_LOCK_BUF(bp) do { \ - if (((bp)->b_flags & (B_LOCKED | B_CALL)) == 0) { \ - simple_lock(&lfs_subsys_lock); \ + KASSERT(mutex_owned(&bufcache_lock)); \ + if (((bp)->b_cflags & BC_LOCKED) == 0 && bp->b_iodone == NULL) {\ + mutex_enter(&lfs_lock); \ ++locked_queue_count; \ locked_queue_bytes += bp->b_bufsize; \ - simple_unlock(&lfs_subsys_lock); \ + mutex_exit(&lfs_lock); \ } \ - (bp)->b_flags |= B_LOCKED; \ + (bp)->b_cflags |= BC_LOCKED; \ } while (0) # define LFS_UNLOCK_BUF(bp) do { \ - if (((bp)->b_flags & (B_LOCKED | B_CALL)) == B_LOCKED) { \ - simple_lock(&lfs_subsys_lock); \ + KASSERT(mutex_owned(&bufcache_lock)); \ + if (((bp)->b_cflags & BC_LOCKED) != 0 && bp->b_iodone == NULL) {\ + mutex_enter(&lfs_lock); \ --locked_queue_count; \ locked_queue_bytes -= bp->b_bufsize; \ if (locked_queue_count < LFS_WAIT_BUFS && \ locked_queue_bytes < LFS_WAIT_BYTES) \ wakeup(&locked_queue_count); \ - simple_unlock(&lfs_subsys_lock); \ + mutex_exit(&lfs_lock); \ } \ - (bp)->b_flags &= ~B_LOCKED; \ + (bp)->b_cflags &= ~BC_LOCKED; \ } while (0) #ifdef _KERNEL extern u_long bufmem_lowater, bufmem_hiwater; /* XXX */ -# define LFS_IS_MALLOC_BUF(bp) (((bp)->b_flags & B_CALL) && \ - (bp)->b_iodone == lfs_callback) +# define LFS_IS_MALLOC_BUF(bp) ((bp)->b_iodone == lfs_callback) # ifdef DEBUG # define LFS_DEBUG_COUNTLOCKED(m) do { \ @@ -247,7 +249,7 @@ extern struct lfs_log_entry lfs_log[LFS_LOGLENGTH]; # define LFS_ENTER_LOG(theop, thefile, theline, lbn, theflags, thepid) do {\ int _s; \ \ - simple_lock(&lfs_subsys_lock); \ + mutex_enter(&lfs_lock); \ _s = splbio(); \ lfs_log[lfs_lognum].op = theop; \ lfs_log[lfs_lognum].file = thefile; \ @@ -257,7 +259,7 @@ extern struct lfs_log_entry lfs_log[LFS_LOGLENGTH]; lfs_log[lfs_lognum].flags = (theflags); \ lfs_lognum = (lfs_lognum + 1) % LFS_LOGLENGTH; \ splx(_s); \ - simple_unlock(&lfs_subsys_lock); \ + mutex_exit(&lfs_lock); \ } while (0) # define LFS_BCLEAN_LOG(fs, bp) do { \ @@ -316,7 +318,6 @@ struct lfid { #define IN_ALLMOD (IN_MODIFIED|IN_ACCESS|IN_CHANGE|IN_UPDATE|IN_MODIFY|IN_ACCESSED|IN_CLEANING) #define LFS_SET_UINO(ip, flags) do { \ - simple_lock(&(ip)->i_lfs->lfs_interlock); \ if (((flags) & IN_ACCESSED) && !((ip)->i_flag & IN_ACCESSED)) \ ++(ip)->i_lfs->lfs_uinodes; \ if (((flags) & IN_CLEANING) && !((ip)->i_flag & IN_CLEANING)) \ @@ -324,11 +325,9 @@ struct lfid { if (((flags) & IN_MODIFIED) && !((ip)->i_flag & IN_MODIFIED)) \ ++(ip)->i_lfs->lfs_uinodes; \ (ip)->i_flag |= (flags); \ - simple_unlock(&(ip)->i_lfs->lfs_interlock); \ } while (0) #define LFS_CLR_UINO(ip, flags) do { \ - simple_lock(&(ip)->i_lfs->lfs_interlock); \ if (((flags) & IN_ACCESSED) && ((ip)->i_flag & IN_ACCESSED)) \ --(ip)->i_lfs->lfs_uinodes; \ if (((flags) & IN_CLEANING) && ((ip)->i_flag & IN_CLEANING)) \ @@ -339,7 +338,6 @@ struct lfid { if ((ip)->i_lfs->lfs_uinodes < 0) { \ panic("lfs_uinodes < 0"); \ } \ - simple_unlock(&(ip)->i_lfs->lfs_interlock); \ } while (0) #define LFS_ITIMES(ip, acc, mod, cre) \ @@ -396,15 +394,11 @@ struct segusage_v1 { #ifdef _KERNEL # define SHARE_IFLOCK(F) \ do { \ - simple_lock(&(F)->lfs_interlock); \ - lockmgr(&(F)->lfs_iflock, LK_SHARED, &(F)->lfs_interlock); \ - simple_unlock(&(F)->lfs_interlock); \ + rw_enter(&(F)->lfs_iflock, RW_READER); \ } while(0) # define UNSHARE_IFLOCK(F) \ do { \ - simple_lock(&(F)->lfs_interlock); \ - lockmgr(&(F)->lfs_iflock, LK_RELEASE, &(F)->lfs_interlock); \ - simple_unlock(&(F)->lfs_interlock); \ + rw_exit(&(F)->lfs_iflock); \ } while(0) #else /* ! _KERNEL */ # define SHARE_IFLOCK(F) @@ -529,7 +523,7 @@ typedef struct _cleanerinfo { * Synchronize the Ifile cleaner info with current avail and bfree. */ #define LFS_SYNC_CLEANERINFO(cip, fs, bp, w) do { \ - simple_lock(&(fs)->lfs_interlock); \ + mutex_enter(&lfs_lock); \ if ((w) || (cip)->bfree != (fs)->lfs_bfree || \ (cip)->avail != (fs)->lfs_avail - (fs)->lfs_ravail - \ (fs)->lfs_favail) { \ @@ -539,10 +533,10 @@ typedef struct _cleanerinfo { if (((bp)->b_flags & B_GATHERED) == 0) { \ (fs)->lfs_flags |= LFS_IFDIRTY; \ } \ - simple_unlock(&(fs)->lfs_interlock); \ + mutex_exit(&lfs_lock); \ (void) LFS_BWRITE_LOG(bp); /* Ifile */ \ } else { \ - simple_unlock(&(fs)->lfs_interlock); \ + mutex_exit(&lfs_lock); \ brelse(bp, 0); \ } \ } while (0) @@ -566,9 +560,9 @@ typedef struct _cleanerinfo { LFS_CLEANERINFO((CIP), (FS), (BP)); \ (CIP)->free_head = (VAL); \ LFS_BWRITE_LOG(BP); \ - simple_lock(&fs->lfs_interlock); \ + mutex_enter(&lfs_lock); \ (FS)->lfs_flags |= LFS_IFDIRTY; \ - simple_unlock(&fs->lfs_interlock); \ + mutex_exit(&lfs_lock); \ } \ } while (0) @@ -582,9 +576,9 @@ typedef struct _cleanerinfo { LFS_CLEANERINFO((CIP), (FS), (BP)); \ (CIP)->free_tail = (VAL); \ LFS_BWRITE_LOG(BP); \ - simple_lock(&fs->lfs_interlock); \ + mutex_enter(&lfs_lock); \ (FS)->lfs_flags |= LFS_IFDIRTY; \ - simple_unlock(&fs->lfs_interlock); \ + mutex_exit(&lfs_lock); \ } while (0) /* @@ -825,8 +819,9 @@ struct lfs { size_t lfs_devbsize; /* Device block size */ size_t lfs_devbshift; /* Device block shift */ krwlock_t lfs_fraglock; - struct lock lfs_iflock; /* Ifile lock */ - struct lock lfs_stoplock; /* Wrap lock */ + krwlock_t lfs_iflock; /* Ifile lock */ + kcondvar_t lfs_stopcv; /* Wrap lock */ + struct lwp *lfs_stoplwp; pid_t lfs_rfpid; /* Process ID of roll-forward agent */ int lfs_nadirop; /* number of active dirop nodes */ long lfs_ravail; /* blocks pre-reserved for writing */ @@ -845,8 +840,7 @@ struct lfs { #endif /* _KERNEL */ #define LFS_MAX_CLEANIND 64 int32_t lfs_cleanint[LFS_MAX_CLEANIND]; /* Active cleaning intervals */ - int lfs_cleanind; /* Index into intervals */ - struct simplelock lfs_interlock; /* lock for lfs_seglock */ + int lfs_cleanind; /* Index into intervals */ int lfs_sleepers; /* # procs sleeping this fs */ int lfs_pages; /* dirty pages blaming this fs */ lfs_bm_t *lfs_ino_bitmap; /* Inuse inodes bitmap */ diff --git a/sys/ufs/lfs/lfs_alloc.c b/sys/ufs/lfs/lfs_alloc.c index 1528ae95b113..ef1e88fb2bf0 100644 --- a/sys/ufs/lfs/lfs_alloc.c +++ b/sys/ufs/lfs/lfs_alloc.c @@ -1,4 +1,4 @@ -/* $NetBSD: lfs_alloc.c,v 1.104 2007/12/12 18:36:10 he Exp $ */ +/* $NetBSD: lfs_alloc.c,v 1.105 2008/01/02 11:49:10 ad Exp $ */ /*- * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007 The NetBSD Foundation, Inc. @@ -67,7 +67,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.104 2007/12/12 18:36:10 he Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.105 2008/01/02 11:49:10 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_quota.h" @@ -256,9 +256,9 @@ lfs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred, #endif /* DIAGNOSTIC */ /* Set superblock modified bit and increment file count. */ - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); fs->lfs_fmod = 1; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); ++fs->lfs_nfiles; VOP_UNLOCK(fs->lfs_ivnode, 0); @@ -285,7 +285,9 @@ lfs_ialloc(struct lfs *fs, struct vnode *pvp, ino_t new_ino, int new_gen, lfs_vcreate(pvp->v_mount, new_ino, vp); ip = VTOI(vp); + mutex_enter(&lfs_lock); LFS_SET_UINO(ip, IN_CHANGE); + mutex_exit(&lfs_lock); /* on-disk structure has been zeroed out by lfs_vcreate */ ip->i_din.ffs1_din->di_inumber = new_ino; @@ -434,7 +436,6 @@ lfs_vfree(struct vnode *vp, ino_t ino, int mode) struct lfs *fs; daddr_t old_iaddr; ino_t otail; - int s; /* Get the inode number and file system. */ ip = VTOI(vp); @@ -445,28 +446,25 @@ lfs_vfree(struct vnode *vp, ino_t ino, int mode) DLOG((DLOG_ALLOC, "lfs_vfree: free ino %lld\n", (long long)ino)); /* Drain of pending writes */ - simple_lock(&vp->v_interlock); - s = splbio(); - if (fs->lfs_version > 1 && WRITEINPROG(vp)) - ltsleep(vp, (PRIBIO+1), "lfs_vfree", 0, &vp->v_interlock); - splx(s); - simple_unlock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); + while (fs->lfs_version > 1 && WRITEINPROG(vp)) { + cv_wait(&vp->v_cv, &vp->v_interlock); + } + mutex_exit(&vp->v_interlock); lfs_seglock(fs, SEGM_PROT); vn_lock(fs->lfs_ivnode, LK_EXCLUSIVE); lfs_unmark_vnode(vp); + mutex_enter(&lfs_lock); if (vp->v_uflag & VU_DIROP) { vp->v_uflag &= ~VU_DIROP; - simple_lock(&fs->lfs_interlock); - simple_lock(&lfs_subsys_lock); --lfs_dirvcount; - simple_unlock(&lfs_subsys_lock); --fs->lfs_dirvcount; TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain); - simple_unlock(&fs->lfs_interlock); wakeup(&fs->lfs_dirvcount); wakeup(&lfs_dirvcount); + mutex_exit(&lfs_lock); lfs_vunref(vp); /* @@ -487,10 +485,13 @@ lfs_vfree(struct vnode *vp, ino_t ino, int mode) /* * If it's not a dirop, we can finalize right away. */ + mutex_exit(&lfs_lock); lfs_finalize_ino_seguse(fs, ip); } + mutex_enter(&lfs_lock); LFS_CLR_UINO(ip, IN_ACCESSED|IN_CLEANING|IN_MODIFIED); + mutex_exit(&lfs_lock); ip->i_flag &= ~IN_ALLMOD; ip->i_lfs_iflags |= LFSI_DELETED; @@ -582,9 +583,9 @@ lfs_vfree(struct vnode *vp, ino_t ino, int mode) } /* Set superblock modified bit and decrement file count. */ - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); fs->lfs_fmod = 1; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); --fs->lfs_nfiles; VOP_UNLOCK(fs->lfs_ivnode, 0); diff --git a/sys/ufs/lfs/lfs_balloc.c b/sys/ufs/lfs/lfs_balloc.c index 796976856d56..c9f745f652dd 100644 --- a/sys/ufs/lfs/lfs_balloc.c +++ b/sys/ufs/lfs/lfs_balloc.c @@ -1,4 +1,4 @@ -/* $NetBSD: lfs_balloc.c,v 1.63 2007/10/08 18:01:29 ad Exp $ */ +/* $NetBSD: lfs_balloc.c,v 1.64 2008/01/02 11:49:10 ad Exp $ */ /*- * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. @@ -67,7 +67,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: lfs_balloc.c,v 1.63 2007/10/08 18:01:29 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_balloc.c,v 1.64 2008/01/02 11:49:10 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_quota.h" @@ -198,9 +198,9 @@ lfs_balloc(struct vnode *vp, off_t startoffset, int iosize, kauth_cred_t cred, clrbuf(bp); } ip->i_lfs_effnblks += bb; - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); fs->lfs_bfree -= bb; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); ip->i_ffs1_db[lbn] = UNWRITTEN; } else { if (nsize <= osize) { @@ -242,9 +242,9 @@ lfs_balloc(struct vnode *vp, off_t startoffset, int iosize, kauth_cred_t cred, } } if (ISSPACE(fs, bcount, cred)) { - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); fs->lfs_bfree -= bcount; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); ip->i_lfs_effnblks += bcount; } else { return ENOSPC; @@ -266,7 +266,7 @@ lfs_balloc(struct vnode *vp, off_t startoffset, int iosize, kauth_cred_t cred, if (!indirs[i].in_exists) { clrbuf(ibp); ibp->b_blkno = UNWRITTEN; - } else if (!(ibp->b_flags & (B_DELWRI | B_DONE))) { + } else if (!(ibp->b_oflags & (BO_DELWRI | BO_DONE))) { ibp->b_blkno = fsbtodb(fs, idaddr); ibp->b_flags |= B_READ; VOP_STRATEGY(vp, ibp); @@ -351,7 +351,7 @@ lfs_balloc(struct vnode *vp, off_t startoffset, int iosize, kauth_cred_t cred, #endif VOP_BWRITE(ibp); } - } else if (bpp && !(bp->b_flags & (B_DONE|B_DELWRI))) { + } else if (bpp && !(bp->b_oflags & (BO_DONE|BO_DELWRI))) { /* * Not a brand new block, also not in the cache; * read it in from disk. @@ -434,7 +434,7 @@ lfs_fragextend(struct vnode *vp, int osize, int nsize, daddr_t lbn, struct buf * * release both and start over after waiting. */ - if (bpp && ((*bpp)->b_flags & B_DELWRI)) { + if (bpp && ((*bpp)->b_oflags & BO_DELWRI)) { if (!lfs_fits(fs, bb)) { if (bpp) brelse(*bpp, 0); @@ -448,9 +448,9 @@ lfs_fragextend(struct vnode *vp, int osize, int nsize, daddr_t lbn, struct buf * fs->lfs_avail -= bb; } - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); fs->lfs_bfree -= bb; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); ip->i_lfs_effnblks += bb; ip->i_flag |= IN_CHANGE | IN_UPDATE; @@ -459,10 +459,11 @@ lfs_fragextend(struct vnode *vp, int osize, int nsize, daddr_t lbn, struct buf * allocbuf(*bpp, nsize, 1); /* Adjust locked-list accounting */ - if (((*bpp)->b_flags & (B_LOCKED | B_CALL)) == B_LOCKED) { - simple_lock(&lfs_subsys_lock); + if (((*bpp)->b_cflags & BC_LOCKED) != 0 && + (*bpp)->b_iodone == NULL) { + mutex_enter(&lfs_lock); locked_queue_bytes += (*bpp)->b_bufsize - obufsize; - simple_unlock(&lfs_subsys_lock); + mutex_exit(&lfs_lock); } bzero((char *)((*bpp)->b_data) + osize, (u_int)(nsize - osize)); @@ -512,21 +513,20 @@ lfs_register_block(struct vnode *vp, daddr_t lbn) lbp = (struct lbnentry *)pool_get(&lfs_lbnentry_pool, PR_WAITOK); lbp->lbn = lbn; + mutex_enter(&lfs_lock); if (SPLAY_INSERT(lfs_splay, &ip->i_lfs_lbtree, lbp) != NULL) { + mutex_exit(&lfs_lock); /* Already there */ pool_put(&lfs_lbnentry_pool, lbp); return; } ++ip->i_lfs_nbtree; - simple_lock(&fs->lfs_interlock); fs->lfs_favail += btofsb(fs, (1 << fs->lfs_bshift)); fs->lfs_pages += fs->lfs_bsize >> PAGE_SHIFT; - simple_lock(&lfs_subsys_lock); ++locked_fakequeue_count; lfs_subsys_pages += fs->lfs_bsize >> PAGE_SHIFT; - simple_unlock(&lfs_subsys_lock); - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); } static void @@ -534,19 +534,18 @@ lfs_do_deregister(struct lfs *fs, struct inode *ip, struct lbnentry *lbp) { ASSERT_MAYBE_SEGLOCK(fs); + mutex_enter(&lfs_lock); --ip->i_lfs_nbtree; SPLAY_REMOVE(lfs_splay, &ip->i_lfs_lbtree, lbp); - pool_put(&lfs_lbnentry_pool, lbp); - simple_lock(&fs->lfs_interlock); if (fs->lfs_favail > btofsb(fs, (1 << fs->lfs_bshift))) fs->lfs_favail -= btofsb(fs, (1 << fs->lfs_bshift)); fs->lfs_pages -= fs->lfs_bsize >> PAGE_SHIFT; - simple_lock(&lfs_subsys_lock); if (locked_fakequeue_count > 0) --locked_fakequeue_count; lfs_subsys_pages -= fs->lfs_bsize >> PAGE_SHIFT; - simple_unlock(&lfs_subsys_lock); - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); + + pool_put(&lfs_lbnentry_pool, lbp); } void diff --git a/sys/ufs/lfs/lfs_bio.c b/sys/ufs/lfs/lfs_bio.c index 3ae40e4d8204..4d9ebab01c22 100644 --- a/sys/ufs/lfs/lfs_bio.c +++ b/sys/ufs/lfs/lfs_bio.c @@ -1,4 +1,4 @@ -/* $NetBSD: lfs_bio.c,v 1.106 2007/10/11 19:53:43 ad Exp $ */ +/* $NetBSD: lfs_bio.c,v 1.107 2008/01/02 11:49:10 ad Exp $ */ /*- * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. @@ -67,7 +67,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.106 2007/10/11 19:53:43 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.107 2008/01/02 11:49:10 ad Exp $"); #include #include @@ -95,7 +95,7 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.106 2007/10/11 19:53:43 ad Exp $"); * No write cost accounting is done. * This is almost certainly wrong for synchronous operations and NFS. * - * protected by lfs_subsys_lock. + * protected by lfs_lock. */ int locked_queue_count = 0; /* Count of locked-down buffers. */ long locked_queue_bytes = 0L; /* Total size of locked buffers. */ @@ -103,8 +103,11 @@ int lfs_subsys_pages = 0L; /* Total number LFS-written pages */ int lfs_fs_pagetrip = 0; /* # of pages to trip per-fs write */ int lfs_writing = 0; /* Set if already kicked off a writer because of buffer space */ -/* Lock for aboves */ -struct simplelock lfs_subsys_lock = SIMPLELOCK_INITIALIZER; + +/* Lock and condition variables for above. */ +kcondvar_t locked_queue_cv; +kcondvar_t lfs_writing_cv; +kmutex_t lfs_lock; extern int lfs_dostats; @@ -125,7 +128,7 @@ lfs_fits_buf(struct lfs *fs, int n, int bytes) int count_fit, bytes_fit; ASSERT_NO_SEGLOCK(fs); - LOCK_ASSERT(simple_lock_held(&lfs_subsys_lock)); + KASSERT(mutex_owned(&lfs_lock)); count_fit = (locked_queue_count + locked_queue_rcount + n < LFS_WAIT_BUFS); @@ -157,16 +160,16 @@ lfs_reservebuf(struct lfs *fs, struct vnode *vp, KASSERT(locked_queue_rcount >= 0); KASSERT(locked_queue_rbytes >= 0); - simple_lock(&lfs_subsys_lock); + mutex_enter(&lfs_lock); while (n > 0 && !lfs_fits_buf(fs, n, bytes)) { int error; lfs_flush(fs, 0, 0); - error = ltsleep(&locked_queue_count, PCATCH | PUSER, - "lfsresbuf", hz * LFS_BUFWAIT, &lfs_subsys_lock); + error = cv_timedwait_sig(&locked_queue_cv, &lfs_lock, + hz * LFS_BUFWAIT); if (error && error != EWOULDBLOCK) { - simple_unlock(&lfs_subsys_lock); + mutex_exit(&lfs_lock); return error; } } @@ -174,7 +177,7 @@ lfs_reservebuf(struct lfs *fs, struct vnode *vp, locked_queue_rcount += n; locked_queue_rbytes += bytes; - simple_unlock(&lfs_subsys_lock); + mutex_exit(&lfs_lock); KASSERT(locked_queue_rcount >= 0); KASSERT(locked_queue_rbytes >= 0); @@ -208,9 +211,9 @@ lfs_reserveavail(struct lfs *fs, struct vnode *vp, ASSERT_MAYBE_SEGLOCK(fs); slept = 0; - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); while (fsb > 0 && !lfs_fits(fs, fsb + fs->lfs_ravail + fs->lfs_favail)) { - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); #if 0 /* * XXX ideally, we should unlock vnodes here @@ -241,19 +244,19 @@ lfs_reserveavail(struct lfs *fs, struct vnode *vp, LFS_SYNC_CLEANERINFO(cip, fs, bp, 0); lfs_wakeup_cleaner(fs); - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); /* Cleaner might have run while we were reading, check again */ if (lfs_fits(fs, fsb + fs->lfs_ravail + fs->lfs_favail)) break; - error = ltsleep(&fs->lfs_avail, PCATCH | PUSER, "lfs_reserve", - 0, &fs->lfs_interlock); + error = mtsleep(&fs->lfs_avail, PCATCH | PUSER, "lfs_reserve", + 0, &lfs_lock); #if 0 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX use lockstatus */ vn_lock(vp2, LK_EXCLUSIVE | LK_RETRY); /* XXX use lockstatus */ #endif if (error) { - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); return error; } } @@ -263,7 +266,7 @@ lfs_reserveavail(struct lfs *fs, struct vnode *vp, } #endif fs->lfs_ravail += fsb; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); return 0; } @@ -282,12 +285,12 @@ lfs_reserve(struct lfs *fs, struct vnode *vp, struct vnode *vp2, int fsb) ASSERT_MAYBE_SEGLOCK(fs); if (vp2) { /* Make sure we're not in the process of reclaiming vp2 */ - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); while(fs->lfs_flags & LFS_UNDIROP) { - ltsleep(&fs->lfs_flags, PRIBIO + 1, "lfsrundirop", 0, - &fs->lfs_interlock); + mtsleep(&fs->lfs_flags, PRIBIO + 1, "lfsrundirop", 0, + &lfs_lock); } - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); } KASSERT(fsb < 0 || VOP_ISLOCKED(vp)); @@ -322,8 +325,10 @@ lfs_reserve(struct lfs *fs, struct vnode *vp, struct vnode *vp2, int fsb) * vref vnodes here so that cleaner doesn't try to reuse them. * (see XXX comment in lfs_reserveavail) */ + mutex_enter(&vp->v_interlock); lfs_vref(vp); if (vp2 != NULL) { + mutex_enter(&vp2->v_interlock); lfs_vref(vp2); } @@ -401,13 +406,13 @@ lfs_availwait(struct lfs *fs, int fsb) ASSERT_NO_SEGLOCK(fs); /* Push cleaner blocks through regardless */ - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); if (LFS_SEGLOCK_HELD(fs) && fs->lfs_sp->seg_flags & (SEGM_CLEAN | SEGM_FORCE_CKP)) { - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); return 0; } - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); while (!lfs_fits(fs, fsb)) { /* @@ -441,15 +446,17 @@ lfs_bwrite_ext(struct buf *bp, int flags) { struct lfs *fs; struct inode *ip; - int fsb, s; + struct vnode *vp; + int fsb; - fs = VFSTOUFS(bp->b_vp->v_mount)->um_lfs; + vp = bp->b_vp; + fs = VFSTOUFS(vp->v_mount)->um_lfs; ASSERT_MAYBE_SEGLOCK(fs); - KASSERT(bp->b_flags & B_BUSY); + KASSERT(bp->b_cflags & BC_BUSY); KASSERT(flags & BW_CLEAN || !LFS_IS_MALLOC_BUF(bp)); - KASSERT((bp->b_flags & (B_DELWRI|B_LOCKED)) != B_DELWRI); - KASSERT((bp->b_flags & (B_DELWRI|B_LOCKED)) != B_LOCKED); + KASSERT(((bp->b_oflags | bp->b_cflags) & (BO_DELWRI|BC_LOCKED)) + != BO_DELWRI); /* * Don't write *any* blocks if we're mounted read-only, or @@ -458,13 +465,16 @@ lfs_bwrite_ext(struct buf *bp, int flags) * In particular the cleaner can't write blocks either. */ if (fs->lfs_ronly || (fs->lfs_pflags & LFS_PF_CLEAN)) { - bp->b_flags &= ~(B_DELWRI | B_READ); + bp->b_oflags &= ~BO_DELWRI; + bp->b_flags |= B_READ; bp->b_error = 0; + mutex_enter(&bufcache_lock); LFS_UNLOCK_BUF(bp); if (LFS_IS_MALLOC_BUF(bp)) - bp->b_flags &= ~B_BUSY; + bp->b_cflags &= ~BC_BUSY; else - brelse(bp, 0); + brelsel(bp, 0); + mutex_exit(&bufcache_lock); return (fs->lfs_ronly ? EROFS : 0); } @@ -472,7 +482,7 @@ lfs_bwrite_ext(struct buf *bp, int flags) * Set the delayed write flag and use reassignbuf to move the buffer * from the clean list to the dirty one. * - * Set the B_LOCKED flag and unlock the buffer, causing brelse to move + * Set the BC_LOCKED flag and unlock the buffer, causing brelse to move * the buffer onto the LOCKED free list. This is necessary, otherwise * getnewbuf() would try to reclaim the buffers using bawrite, which * isn't going to work. @@ -482,56 +492,59 @@ lfs_bwrite_ext(struct buf *bp, int flags) * enough space reserved so that there's room to write meta-data * blocks. */ - if (!(bp->b_flags & B_LOCKED)) { + if ((bp->b_cflags & BC_LOCKED) == 0) { fsb = fragstofsb(fs, numfrags(fs, bp->b_bcount)); - ip = VTOI(bp->b_vp); + ip = VTOI(vp); + mutex_enter(&lfs_lock); if (flags & BW_CLEAN) { LFS_SET_UINO(ip, IN_CLEANING); } else { LFS_SET_UINO(ip, IN_MODIFIED); } + mutex_exit(&lfs_lock); fs->lfs_avail -= fsb; - bp->b_flags |= B_DELWRI; + mutex_enter(&bufcache_lock); + mutex_enter(&vp->v_interlock); + bp->b_oflags = (bp->b_oflags | BO_DELWRI) & ~BO_DONE; LFS_LOCK_BUF(bp); - bp->b_flags &= ~(B_READ | B_DONE); + bp->b_flags &= ~B_READ; bp->b_error = 0; - s = splbio(); reassignbuf(bp, bp->b_vp); - splx(s); + mutex_exit(&vp->v_interlock); + } else { + mutex_enter(&bufcache_lock); } - if (bp->b_flags & B_CALL) - bp->b_flags &= ~B_BUSY; + if (bp->b_iodone != NULL) + bp->b_cflags &= ~BC_BUSY; else - brelse(bp, 0); + brelsel(bp, 0); + mutex_exit(&bufcache_lock); return (0); } /* - * Called and return with the lfs_interlock held, but no other simple_locks - * held. + * Called and return with the lfs_lock held. */ void lfs_flush_fs(struct lfs *fs, int flags) { ASSERT_NO_SEGLOCK(fs); - LOCK_ASSERT(simple_lock_held(&fs->lfs_interlock)); + KASSERT(mutex_owned(&lfs_lock)); if (fs->lfs_ronly) return; - simple_lock(&lfs_subsys_lock); if (lfs_dostats) ++lfs_stats.flush_invoked; - simple_unlock(&lfs_subsys_lock); - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); lfs_writer_enter(fs, "fldirop"); lfs_segwrite(fs->lfs_ivnode->v_mount, flags); lfs_writer_leave(fs); - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); fs->lfs_favail = 0; /* XXX */ } @@ -542,7 +555,7 @@ lfs_flush_fs(struct lfs *fs, int flags) * XXX We have one static count of locked buffers; * XXX need to think more about the multiple filesystem case. * - * Called and return with lfs_subsys_lock held. + * Called and return with lfs_lock held. * If fs != NULL, we hold the segment lock for fs. */ void @@ -552,7 +565,7 @@ lfs_flush(struct lfs *fs, int flags, int only_onefs) struct mount *mp, *nmp; struct lfs *tfs; - LOCK_ASSERT(simple_lock_held(&lfs_subsys_lock)); + KASSERT(mutex_owned(&lfs_lock)); KDASSERT(fs == NULL || !LFS_SEGLOCK_HELD(fs)); if (lfs_dostats) @@ -563,20 +576,19 @@ lfs_flush(struct lfs *fs, int flags, int only_onefs) return; } while (lfs_writing) - ltsleep(&lfs_writing, PRIBIO + 1, "lfsflush", 0, - &lfs_subsys_lock); + cv_wait(&lfs_writing_cv, &lfs_lock); lfs_writing = 1; - simple_unlock(&lfs_subsys_lock); + mutex_exit(&lfs_lock); if (only_onefs) { KASSERT(fs != NULL); if (vfs_busy(fs->lfs_ivnode->v_mount, LK_NOWAIT, &mountlist_lock)) goto errout; - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); lfs_flush_fs(fs, flags); - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); vfs_unbusy(fs->lfs_ivnode->v_mount); } else { locked_fakequeue_count = 0; @@ -588,12 +600,12 @@ lfs_flush(struct lfs *fs, int flags, int only_onefs) nmp = CIRCLEQ_NEXT(mp, mnt_list); continue; } - if (strncmp(mp->mnt_stat.f_fstypename, MOUNT_LFS, + if (strncmp(&mp->mnt_stat.f_fstypename[0], MOUNT_LFS, sizeof(mp->mnt_stat.f_fstypename)) == 0) { tfs = VFSTOUFS(mp)->um_lfs; - simple_lock(&tfs->lfs_interlock); + mutex_enter(&lfs_lock); lfs_flush_fs(tfs, flags); - simple_unlock(&tfs->lfs_interlock); + mutex_exit(&lfs_lock); } mutex_enter(&mountlist_lock); nmp = CIRCLEQ_NEXT(mp, mnt_list); @@ -605,7 +617,7 @@ lfs_flush(struct lfs *fs, int flags, int only_onefs) wakeup(&lfs_subsys_pages); errout: - simple_lock(&lfs_subsys_lock); + mutex_enter(&lfs_lock); KASSERT(lfs_writing); lfs_writing = 0; wakeup(&lfs_writing); @@ -645,8 +657,7 @@ lfs_check(struct vnode *vp, daddr_t blkno, int flags) * If we would flush below, but dirops are active, sleep. * Note that a dirop cannot ever reach this code! */ - simple_lock(&fs->lfs_interlock); - simple_lock(&lfs_subsys_lock); + mutex_enter(&lfs_lock); while (fs->lfs_dirops > 0 && (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS || locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES || @@ -654,12 +665,10 @@ lfs_check(struct vnode *vp, daddr_t blkno, int flags) fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) || lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0)) { - simple_unlock(&lfs_subsys_lock); ++fs->lfs_diropwait; - ltsleep(&fs->lfs_writer, PRIBIO+1, "bufdirop", 0, - &fs->lfs_interlock); + mtsleep(&fs->lfs_writer, PRIBIO+1, "bufdirop", 0, + &lfs_lock); --fs->lfs_diropwait; - simple_lock(&lfs_subsys_lock); } #ifdef DEBUG @@ -697,7 +706,6 @@ lfs_check(struct vnode *vp, daddr_t blkno, int flags) lfs_subsys_pages > LFS_MAX_PAGES || fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) || lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0) { - simple_unlock(&fs->lfs_interlock); lfs_flush(fs, flags, 0); } else if (lfs_fs_pagetrip && fs->lfs_pages > lfs_fs_pagetrip) { /* @@ -706,9 +714,7 @@ lfs_check(struct vnode *vp, daddr_t blkno, int flags) */ ++fs->lfs_pdflush; wakeup(&lfs_writer_daemon); - simple_unlock(&fs->lfs_interlock); - } else - simple_unlock(&fs->lfs_interlock); + } while (locked_queue_count + INOCOUNT(fs) > LFS_WAIT_BUFS || locked_queue_bytes + INOBYTES(fs) > LFS_WAIT_BYTES || @@ -720,8 +726,8 @@ lfs_check(struct vnode *vp, daddr_t blkno, int flags) ++lfs_stats.wait_exceeded; DLOG((DLOG_AVAIL, "lfs_check: waiting: count=%d, bytes=%ld\n", locked_queue_count, locked_queue_bytes)); - error = ltsleep(&locked_queue_count, PCATCH | PUSER, - "buffers", hz * LFS_BUFWAIT, &lfs_subsys_lock); + error = cv_timedwait_sig(&locked_queue_cv, &lfs_lock, + hz * LFS_BUFWAIT); if (error != EWOULDBLOCK) break; @@ -736,7 +742,7 @@ lfs_check(struct vnode *vp, daddr_t blkno, int flags) lfs_flush(fs, flags | SEGM_CKP, 0); } } - simple_unlock(&lfs_subsys_lock); + mutex_exit(&lfs_lock); return (error); } @@ -748,12 +754,11 @@ lfs_newbuf(struct lfs *fs, struct vnode *vp, daddr_t daddr, size_t size, int typ { struct buf *bp; size_t nbytes; - int s; ASSERT_MAYBE_SEGLOCK(fs); nbytes = roundup(size, fsbtob(fs, 1)); - bp = getiobuf(); + bp = getiobuf(NULL, true); if (nbytes) { bp->b_data = lfs_malloc(fs, nbytes, type); /* memset(bp->b_data, 0, nbytes); */ @@ -764,10 +769,6 @@ lfs_newbuf(struct lfs *fs, struct vnode *vp, daddr_t daddr, size_t size, int typ if (bp == NULL) panic("bp is NULL after malloc in lfs_newbuf"); #endif - bp->b_vp = NULL; - s = splbio(); - bgetvp(vp, bp); - splx(s); bp->b_bufsize = size; bp->b_bcount = size; @@ -776,25 +777,34 @@ lfs_newbuf(struct lfs *fs, struct vnode *vp, daddr_t daddr, size_t size, int typ bp->b_error = 0; bp->b_resid = 0; bp->b_iodone = lfs_callback; - bp->b_flags = B_BUSY | B_CALL | B_NOCACHE; + bp->b_cflags = BC_BUSY | BC_NOCACHE; bp->b_private = fs; + mutex_enter(&bufcache_lock); + mutex_enter(&vp->v_interlock); + bgetvp(vp, bp); + mutex_exit(&vp->v_interlock); + mutex_exit(&bufcache_lock); + return (bp); } void lfs_freebuf(struct lfs *fs, struct buf *bp) { - int s; + struct vnode *vp; - s = splbio(); - if (bp->b_vp) + if ((vp = bp->b_vp) != NULL) { + mutex_enter(&bufcache_lock); + mutex_enter(&vp->v_interlock); brelvp(bp); - if (!(bp->b_flags & B_INVAL)) { /* B_INVAL indicates a "fake" buffer */ + mutex_exit(&vp->v_interlock); + mutex_exit(&bufcache_lock); + } + if (!(bp->b_cflags & BC_INVAL)) { /* BC_INVAL indicates a "fake" buffer */ lfs_free(fs, bp->b_data, LFS_NB_UNKNOWN); bp->b_data = NULL; } - splx(s); putiobuf(bp); } @@ -809,7 +819,6 @@ lfs_freebuf(struct lfs *fs, struct buf *bp) #define BQ_EMPTY 3 /* buffer headers with no memory */ extern TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES]; -extern struct simplelock bqueue_slock; /* * Count buffers on the "locked" queue, and compare it to a pro-forma count. @@ -821,12 +830,10 @@ lfs_countlocked(int *count, long *bytes, const char *msg) struct buf *bp; int n = 0; long int size = 0L; - int s; - s = splbio(); - simple_lock(&bqueue_slock); + mutex_enter(&bufcache_lock); TAILQ_FOREACH(bp, &bufqueues[BQ_LOCKED], b_freelist) { - KASSERT(!(bp->b_flags & B_CALL)); + KASSERT(bp->b_iodone == NULL); n++; size += bp->b_bufsize; #ifdef DIAGNOSTIC @@ -849,8 +856,7 @@ lfs_countlocked(int *count, long *bytes, const char *msg) } *count = n; *bytes = size; - simple_unlock(&bqueue_slock); - splx(s); + mutex_exit(&bufcache_lock); return; } diff --git a/sys/ufs/lfs/lfs_debug.c b/sys/ufs/lfs/lfs_debug.c index 0787ee670551..8e86e14fe609 100644 --- a/sys/ufs/lfs/lfs_debug.c +++ b/sys/ufs/lfs/lfs_debug.c @@ -1,4 +1,4 @@ -/* $NetBSD: lfs_debug.c,v 1.35 2007/12/12 03:49:03 lukem Exp $ */ +/* $NetBSD: lfs_debug.c,v 1.36 2008/01/02 11:49:10 ad Exp $ */ /*- * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. @@ -67,7 +67,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: lfs_debug.c,v 1.35 2007/12/12 03:49:03 lukem Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_debug.c,v 1.36 2008/01/02 11:49:10 ad Exp $"); #ifdef DEBUG @@ -95,7 +95,7 @@ int lfs_bwrite_log(struct buf *bp, const char *file, int line) a.a_desc = VDESC(vop_bwrite); a.a_bp = bp; - if (!(bp->b_flags & (B_DELWRI | B_GATHERED))) { + if (!(bp->b_flags & B_GATHERED) && !(bp->b_oflags & BO_DELWRI)) { LFS_ENTER_LOG("write", file, line, bp->b_lblkno, bp->b_flags, curproc->p_pid); } diff --git a/sys/ufs/lfs/lfs_extern.h b/sys/ufs/lfs/lfs_extern.h index f691c9531fc3..c64762c606da 100644 --- a/sys/ufs/lfs/lfs_extern.h +++ b/sys/ufs/lfs/lfs_extern.h @@ -1,4 +1,4 @@ -/* $NetBSD: lfs_extern.h,v 1.93 2007/12/08 19:29:55 pooka Exp $ */ +/* $NetBSD: lfs_extern.h,v 1.94 2008/01/02 11:49:11 ad Exp $ */ /*- * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. @@ -121,8 +121,10 @@ extern int locked_queue_count; extern long locked_queue_bytes; extern int lfs_subsys_pages; extern int lfs_dirvcount; -extern struct simplelock lfs_subsys_lock; +extern kmutex_t lfs_lock; extern int lfs_debug_log_subsys[]; +extern kcondvar_t lfs_writing_cv; +extern kcondvar_t locked_queue_cv; __BEGIN_DECLS /* lfs_alloc.c */ @@ -181,7 +183,7 @@ int lfs_vflush(struct vnode *); int lfs_segwrite(struct mount *, int); int lfs_writefile(struct lfs *, struct segment *, struct vnode *); int lfs_writeinode(struct lfs *, struct segment *, struct inode *); -int lfs_gatherblock(struct segment *, struct buf *, int *); +int lfs_gatherblock(struct segment *, struct buf *, kmutex_t *); int lfs_gather(struct lfs *, struct segment *, struct vnode *, int (*match )(struct lfs *, struct buf *)); void lfs_update_single(struct lfs *, struct segment *, struct vnode *, daddr_t, int32_t, int); diff --git a/sys/ufs/lfs/lfs_inode.c b/sys/ufs/lfs/lfs_inode.c index a529be55f309..4c3bb3e8d44d 100644 --- a/sys/ufs/lfs/lfs_inode.c +++ b/sys/ufs/lfs/lfs_inode.c @@ -1,4 +1,4 @@ -/* $NetBSD: lfs_inode.c,v 1.115 2007/12/08 19:29:55 pooka Exp $ */ +/* $NetBSD: lfs_inode.c,v 1.116 2008/01/02 11:49:11 ad Exp $ */ /*- * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. @@ -67,7 +67,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: lfs_inode.c,v 1.115 2007/12/08 19:29:55 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_inode.c,v 1.116 2008/01/02 11:49:11 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_quota.h" @@ -98,7 +98,7 @@ static int lfs_update_seguse(struct lfs *, struct inode *ip, long, size_t); static int lfs_indirtrunc (struct inode *, daddr_t, daddr_t, daddr_t, int, long *, long *, long *, size_t *); static int lfs_blkfree (struct lfs *, struct inode *, daddr_t, size_t, long *, size_t *); -static int lfs_vtruncbuf(struct vnode *, daddr_t, int, int); +static int lfs_vtruncbuf(struct vnode *, daddr_t, bool, int); /* Search a block for a specific dinode. */ struct ufs1_dinode * @@ -134,7 +134,6 @@ lfs_update(struct vnode *vp, const struct timespec *acc, { struct inode *ip; struct lfs *fs = VFSTOUFS(vp->v_mount)->um_lfs; - int s; int flags; ASSERT_NO_SEGLOCK(fs); @@ -149,16 +148,14 @@ lfs_update(struct vnode *vp, const struct timespec *acc, * will cause a panic. So, we must wait until any pending write * for our inode completes, if we are called with UPDATE_WAIT set. */ - s = splbio(); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); while ((updflags & (UPDATE_WAIT|UPDATE_DIROP)) == UPDATE_WAIT && WRITEINPROG(vp)) { DLOG((DLOG_SEG, "lfs_update: sleeping on ino %d" " (in progress)\n", ip->i_number)); - ltsleep(vp, (PRIBIO+1), "lfs_update", 0, &vp->v_interlock); + cv_wait(&vp->v_cv, &vp->v_interlock); } - simple_unlock(&vp->v_interlock); - splx(s); + mutex_exit(&vp->v_interlock); LFS_ITIMES(ip, acc, mod, NULL); if (updflags & UPDATE_CLOSE) flags = ip->i_flag & (IN_MODIFIED | IN_ACCESSED | IN_CLEANING); @@ -170,24 +167,25 @@ lfs_update(struct vnode *vp, const struct timespec *acc, /* If sync, push back the vnode and any dirty blocks it may have. */ if ((updflags & (UPDATE_WAIT|UPDATE_DIROP)) == UPDATE_WAIT) { /* Avoid flushing VU_DIROP. */ - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); ++fs->lfs_diropwait; while (vp->v_uflag & VU_DIROP) { DLOG((DLOG_DIROP, "lfs_update: sleeping on inode %d" " (dirops)\n", ip->i_number)); DLOG((DLOG_DIROP, "lfs_update: vflags 0x%x, iflags" - " 0x%x\n", vp->v_uflag|vp->v_iflag|vp->v_vflag, + " 0x%x\n", + vp->v_iflag | vp->v_vflag | vp->v_uflag, ip->i_flag)); if (fs->lfs_dirops == 0) lfs_flush_fs(fs, SEGM_SYNC); else - ltsleep(&fs->lfs_writer, PRIBIO+1, "lfs_fsync", - 0, &fs->lfs_interlock); + mtsleep(&fs->lfs_writer, PRIBIO+1, "lfs_fsync", + 0, &lfs_lock); /* XXX KS - by falling out here, are we writing the vn twice? */ } --fs->lfs_diropwait; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); return lfs_vflush(vp); } return 0; @@ -285,7 +283,7 @@ lfs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred) if (error) return error; if (ioflag & IO_SYNC) { - simple_lock(&ovp->v_interlock); + mutex_enter(&ovp->v_interlock); VOP_PUTPAGES(ovp, trunc_page(osize & fs->lfs_bmask), round_page(eob), @@ -364,12 +362,12 @@ lfs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred) memset((char *)bp->b_data + offset, 0, (u_int)(size - offset)); allocbuf(bp, size, 1); - if ((bp->b_flags & (B_LOCKED | B_CALL)) == B_LOCKED) { - simple_lock(&lfs_subsys_lock); + if ((bp->b_cflags & BC_LOCKED) != 0 && bp->b_iodone == NULL) { + mutex_enter(&lfs_lock); locked_queue_bytes -= obufsize - bp->b_bufsize; - simple_unlock(&lfs_subsys_lock); + mutex_exit(&lfs_lock); } - if (bp->b_flags & B_DELWRI) + if (bp->b_oflags & BO_DELWRI) fs->lfs_avail += odb - btofsb(fs, size); (void) VOP_BWRITE(bp); } else { /* vp->v_type == VREG && length < osize && offset != 0 */ @@ -399,7 +397,7 @@ lfs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred) eoz = MIN(lblktosize(fs, xlbn) + size, osize); uvm_vnp_zerorange(ovp, length, eoz - length); if (round_page(eoz) > round_page(length)) { - simple_lock(&ovp->v_interlock); + mutex_enter(&ovp->v_interlock); error = VOP_PUTPAGES(ovp, round_page(length), round_page(eoz), PGO_CLEANIT | PGO_DEACTIVATE | @@ -446,7 +444,7 @@ lfs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred) newblks[i] = 0; oip->i_size = oip->i_ffs1_size = osize; - error = lfs_vtruncbuf(ovp, lastblock + 1, 0, 0); + error = lfs_vtruncbuf(ovp, lastblock + 1, false, 0); if (error && !allerror) allerror = error; @@ -564,9 +562,9 @@ done: oip->i_size = oip->i_ffs1_size = length; oip->i_lfs_effnblks -= blocksreleased; oip->i_ffs1_blocks -= real_released; - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); fs->lfs_bfree += blocksreleased; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); #ifdef DIAGNOSTIC if (oip->i_size == 0 && (oip->i_ffs1_blocks != 0 || oip->i_lfs_effnblks != 0)) { @@ -579,12 +577,12 @@ done: /* * If we truncated to zero, take us off the paging queue. */ - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); if (oip->i_size == 0 && oip->i_flags & IN_PAGING) { oip->i_flags &= ~IN_PAGING; TAILQ_REMOVE(&fs->lfs_pchainhd, oip, i_lfs_pchain); } - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); oip->i_flag |= IN_CHANGE; #ifdef QUOTA @@ -736,7 +734,7 @@ lfs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, */ vp = ITOV(ip); bp = getblk(vp, lbn, (int)fs->lfs_bsize, 0, 0); - if (bp->b_flags & (B_DONE | B_DELWRI)) { + if (bp->b_oflags & (BO_DONE | BO_DELWRI)) { /* Braces must be here in case trace evaluates to nothing. */ trace(TR_BREADHIT, pack(vp, fs->lfs_bsize), lbn); } else { @@ -812,12 +810,14 @@ lfs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, if (copy != NULL) { lfs_free(fs, copy, LFS_NB_IBLOCK); } else { - if (bp->b_flags & B_DELWRI) { + mutex_enter(&bufcache_lock); + if (bp->b_oflags & BO_DELWRI) { LFS_UNLOCK_BUF(bp); fs->lfs_avail += btofsb(fs, bp->b_bcount); wakeup(&fs->lfs_avail); } - brelse(bp, BC_INVAL); + brelsel(bp, BC_INVAL); + mutex_exit(&bufcache_lock); } *countp = blocksreleased; @@ -832,77 +832,69 @@ lfs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, * invalidating blocks. */ static int -lfs_vtruncbuf(struct vnode *vp, daddr_t lbn, int slpflag, int slptimeo) +lfs_vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch, int slptimeo) { struct buf *bp, *nbp; - int s, error; + int error; struct lfs *fs; voff_t off; off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO); if (error) return error; fs = VTOI(vp)->i_lfs; - s = splbio(); ASSERT_SEGLOCK(fs); -restart: + + mutex_enter(&bufcache_lock); +restart: for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { nbp = LIST_NEXT(bp, b_vnbufs); if (bp->b_lblkno < lbn) continue; - simple_lock(&bp->b_interlock); - if (bp->b_flags & B_BUSY) { - bp->b_flags |= B_WANTED; - error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK, - "lfs_vtruncbuf", slptimeo, &bp->b_interlock); - if (error) { - splx(s); - return (error); - } + error = bbusy(bp, catch, slptimeo); + if (error == EPASSTHROUGH) goto restart; + if (error != 0) { + mutex_exit(&bufcache_lock); + return (error); } - bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; - if (bp->b_flags & B_DELWRI) { - bp->b_flags &= ~B_DELWRI; + mutex_enter(bp->b_objlock); + if (bp->b_oflags & BO_DELWRI) { + bp->b_oflags &= ~BO_DELWRI; fs->lfs_avail += btofsb(fs, bp->b_bcount); wakeup(&fs->lfs_avail); } + mutex_exit(bp->b_objlock); LFS_UNLOCK_BUF(bp); - simple_unlock(&bp->b_interlock); - brelse(bp, 0); + brelsel(bp, BC_INVAL | BC_VFLUSH); } for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { nbp = LIST_NEXT(bp, b_vnbufs); if (bp->b_lblkno < lbn) continue; - simple_lock(&bp->b_interlock); - if (bp->b_flags & B_BUSY) { - bp->b_flags |= B_WANTED; - error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK, - "lfs_vtruncbuf", slptimeo, &bp->b_interlock); - if (error) { - splx(s); - return (error); - } + error = bbusy(bp, catch, slptimeo); + if (error == EPASSTHROUGH) goto restart; + if (error != 0) { + mutex_exit(&bufcache_lock); + return (error); } - bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; - if (bp->b_flags & B_DELWRI) { - bp->b_flags &= ~B_DELWRI; + mutex_enter(bp->b_objlock); + if (bp->b_oflags & BO_DELWRI) { + bp->b_oflags &= ~BO_DELWRI; fs->lfs_avail += btofsb(fs, bp->b_bcount); wakeup(&fs->lfs_avail); } + mutex_exit(bp->b_objlock); LFS_UNLOCK_BUF(bp); - simple_unlock(&bp->b_interlock); - brelse(bp, 0); + brelsel(bp, BC_INVAL | BC_VFLUSH); } - - splx(s); + mutex_exit(&bufcache_lock); return (0); } diff --git a/sys/ufs/lfs/lfs_itimes.c b/sys/ufs/lfs/lfs_itimes.c index 7807a626239a..07fb28a1e326 100644 --- a/sys/ufs/lfs/lfs_itimes.c +++ b/sys/ufs/lfs/lfs_itimes.c @@ -1,4 +1,4 @@ -/* $NetBSD: lfs_itimes.c,v 1.10 2006/06/23 14:13:02 yamt Exp $ */ +/* $NetBSD: lfs_itimes.c,v 1.11 2008/01/02 11:49:11 ad Exp $ */ /*- * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. @@ -36,7 +36,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ #include -__KERNEL_RCSID(0, "$NetBSD: lfs_itimes.c,v 1.10 2006/06/23 14:13:02 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_itimes.c,v 1.11 2008/01/02 11:49:11 ad Exp $"); #include #include @@ -87,11 +87,13 @@ lfs_itimes(struct inode *ip, const struct timespec *acc, ifp->if_atime_sec = acc->tv_sec; ifp->if_atime_nsec = acc->tv_nsec; LFS_BWRITE_LOG(ibp); - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); fs->lfs_flags |= LFS_IFDIRTY; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); } else { + mutex_enter(&lfs_lock); LFS_SET_UINO(ip, IN_ACCESSED); + mutex_exit(&lfs_lock); } } if (ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFY)) { @@ -112,10 +114,12 @@ lfs_itimes(struct inode *ip, const struct timespec *acc, ip->i_ffs1_ctime = cre->tv_sec; ip->i_ffs1_ctimensec = cre->tv_nsec; } + mutex_enter(&lfs_lock); if (ip->i_flag & (IN_CHANGE | IN_UPDATE)) LFS_SET_UINO(ip, IN_MODIFIED); if (ip->i_flag & IN_MODIFY) LFS_SET_UINO(ip, IN_ACCESSED); + mutex_exit(&lfs_lock); } ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFY); } diff --git a/sys/ufs/lfs/lfs_rfw.c b/sys/ufs/lfs/lfs_rfw.c index b3fdb9417693..ba043928a853 100644 --- a/sys/ufs/lfs/lfs_rfw.c +++ b/sys/ufs/lfs/lfs_rfw.c @@ -1,4 +1,4 @@ -/* $NetBSD: lfs_rfw.c,v 1.8 2007/12/12 18:35:21 he Exp $ */ +/* $NetBSD: lfs_rfw.c,v 1.9 2008/01/02 11:49:11 ad Exp $ */ /*- * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. @@ -37,7 +37,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: lfs_rfw.c,v 1.8 2007/12/12 18:35:21 he Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_rfw.c,v 1.9 2008/01/02 11:49:11 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_quota.h" @@ -199,15 +199,13 @@ lfs_rf_valloc(struct lfs *fs, ino_t ino, int vers, struct lwp *l, lfs_unmark_vnode(vp); (void)lfs_vunref(vp); vp->v_uflag &= ~VU_DIROP; - simple_lock(&fs->lfs_interlock); - simple_lock(&lfs_subsys_lock); + mutex_enter(&lfs_lock); --lfs_dirvcount; - simple_unlock(&lfs_subsys_lock); --fs->lfs_dirvcount; TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain); wakeup(&lfs_dirvcount); wakeup(&fs->lfs_dirvcount); - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); } *vpp = vp; return error; @@ -247,7 +245,7 @@ update_meta(struct lfs *fs, ino_t ino, int vers, daddr_t lbn, return (error); } /* No need to write, the block is already on disk */ - if (bp->b_flags & B_DELWRI) { + if (bp->b_oflags & BO_DELWRI) { LFS_UNLOCK_BUF(bp); fs->lfs_avail += btofsb(fs, bp->b_bcount); } @@ -570,12 +568,12 @@ check_segsum(struct lfs *fs, daddr_t offset, u_int64_t nextserial, if (flags & CHECK_UPDATE) { fs->lfs_avail -= (offset - oldoffset); /* Don't clog the buffer queue */ - simple_lock(&lfs_subsys_lock); + mutex_enter(&lfs_lock); if (locked_queue_count > LFS_MAX_BUFS || locked_queue_bytes > LFS_MAX_BYTES) { lfs_flush(fs, SEGM_CKP, 0); } - simple_unlock(&lfs_subsys_lock); + mutex_exit(&lfs_lock); } err2: diff --git a/sys/ufs/lfs/lfs_segment.c b/sys/ufs/lfs/lfs_segment.c index 771be8a31c73..feee8059e78f 100644 --- a/sys/ufs/lfs/lfs_segment.c +++ b/sys/ufs/lfs/lfs_segment.c @@ -1,4 +1,4 @@ -/* $NetBSD: lfs_segment.c,v 1.206 2007/10/10 20:42:35 ad Exp $ */ +/* $NetBSD: lfs_segment.c,v 1.207 2008/01/02 11:49:11 ad Exp $ */ /*- * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. @@ -67,7 +67,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: lfs_segment.c,v 1.206 2007/10/10 20:42:35 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_segment.c,v 1.207 2008/01/02 11:49:11 ad Exp $"); #ifdef DEBUG # define vndebug(vp, str) do { \ @@ -116,8 +116,7 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_segment.c,v 1.206 2007/10/10 20:42:35 ad Exp $") MALLOC_JUSTDEFINE(M_SEGMENT, "LFS segment", "Segment for LFS"); extern int count_lock_queue(void); -extern struct simplelock vnode_free_list_slock; /* XXX */ -extern struct simplelock bqueue_slock; /* XXX */ +extern kmutex_t vnode_free_list_lock; /* XXX */ static void lfs_generic_callback(struct buf *, void (*)(struct buf *)); static void lfs_free_aiodone(struct buf *); @@ -203,7 +202,7 @@ lfs_vflush(struct vnode *vp) struct lfs *fs; struct segment *sp; struct buf *bp, *nbp, *tbp, *tnbp; - int error, s; + int error; int flushed; int relock; int loopcount; @@ -216,14 +215,16 @@ lfs_vflush(struct vnode *vp) ASSERT_NO_SEGLOCK(fs); if (ip->i_flag & IN_CLEANING) { ivndebug(vp,"vflush/in_cleaning"); + mutex_enter(&lfs_lock); LFS_CLR_UINO(ip, IN_CLEANING); LFS_SET_UINO(ip, IN_MODIFIED); + mutex_exit(&lfs_lock); /* * Toss any cleaning buffers that have real counterparts * to avoid losing new data. */ - s = splbio(); + mutex_enter(&vp->v_interlock); for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { nbp = LIST_NEXT(bp, b_vnbufs); if (!LFS_IS_MALLOC_BUF(bp)) @@ -239,7 +240,6 @@ lfs_vflush(struct vnode *vp) struct vm_page *pg; voff_t off; - simple_lock(&vp->v_interlock); for (off = lblktosize(fs, bp->b_lblkno); off < lblktosize(fs, bp->b_lblkno + 1); off += PAGE_SIZE) { @@ -251,13 +251,13 @@ lfs_vflush(struct vnode *vp) fs->lfs_avail += btofsb(fs, bp->b_bcount); wakeup(&fs->lfs_avail); + mutex_exit(&vp->v_interlock); lfs_freebuf(fs, bp); + mutex_enter(&vp->v_interlock); bp = NULL; - simple_unlock(&vp->v_interlock); - goto nextbp; + break; } } - simple_unlock(&vp->v_interlock); } for (tbp = LIST_FIRST(&vp->v_dirtyblkhd); tbp; tbp = tnbp) @@ -270,26 +270,24 @@ lfs_vflush(struct vnode *vp) fs->lfs_avail += btofsb(fs, bp->b_bcount); wakeup(&fs->lfs_avail); + mutex_exit(&vp->v_interlock); lfs_freebuf(fs, bp); + mutex_enter(&vp->v_interlock); bp = NULL; break; } } - nextbp: - ; } - splx(s); + } else { + mutex_enter(&vp->v_interlock); } /* If the node is being written, wait until that is done */ - simple_lock(&vp->v_interlock); - s = splbio(); - if (WRITEINPROG(vp)) { + while (WRITEINPROG(vp)) { ivndebug(vp,"vflush/writeinprog"); - ltsleep(vp, (PRIBIO+1), "lfs_vw", 0, &vp->v_interlock); + cv_wait(&vp->v_cv, &vp->v_interlock); } - splx(s); - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); /* Protect against VI_XLOCK deadlock in vinvalbuf() */ lfs_seglock(fs, SEGM_SYNC); @@ -298,40 +296,41 @@ lfs_vflush(struct vnode *vp) if (ip->i_lfs_iflags & LFSI_DELETED) { DLOG((DLOG_VNODE, "lfs_vflush: ino %d freed, not flushing\n", ip->i_number)); - s = splbio(); /* Drain v_numoutput */ - simple_lock(&global_v_numoutput_slock); + mutex_enter(&vp->v_interlock); while (vp->v_numoutput > 0) { - vp->v_iflag |= VI_BWAIT; - ltsleep(&vp->v_numoutput, PRIBIO + 1, "lfs_vf4", 0, - &global_v_numoutput_slock); + cv_wait(&vp->v_cv, &vp->v_interlock); } - simple_unlock(&global_v_numoutput_slock); KASSERT(vp->v_numoutput == 0); + mutex_exit(&vp->v_interlock); + mutex_enter(&bufcache_lock); for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { nbp = LIST_NEXT(bp, b_vnbufs); KASSERT((bp->b_flags & B_GATHERED) == 0); - if (bp->b_flags & B_DELWRI) { /* XXX always true? */ + if (bp->b_oflags & BO_DELWRI) { /* XXX always true? */ fs->lfs_avail += btofsb(fs, bp->b_bcount); wakeup(&fs->lfs_avail); } /* Copied from lfs_writeseg */ - if (bp->b_flags & B_CALL) { + if (bp->b_iodone != NULL) { + mutex_exit(&bufcache_lock); biodone(bp); + mutex_enter(&bufcache_lock); } else { bremfree(bp); LFS_UNLOCK_BUF(bp); - bp->b_flags &= - ~(B_READ | B_DELWRI | B_GATHERED); - bp->b_flags |= B_DONE; + mutex_enter(&vp->v_interlock); + bp->b_flags &= ~(B_READ | B_GATHERED); + bp->b_oflags = (bp->b_oflags & ~BO_DELWRI) | BO_DONE; bp->b_error = 0; reassignbuf(bp, vp); + mutex_exit(&vp->v_interlock); brelse(bp, 0); } } - splx(s); + mutex_exit(&bufcache_lock); LFS_CLR_UINO(ip, IN_CLEANING); LFS_CLR_UINO(ip, IN_MODIFIED | IN_ACCESSED); ip->i_flag &= ~IN_ALLMOD; @@ -352,18 +351,13 @@ lfs_vflush(struct vnode *vp) lfs_segunlock(fs); /* Make sure that any pending buffers get written */ - s = splbio(); - simple_lock(&global_v_numoutput_slock); + mutex_enter(&vp->v_interlock); while (vp->v_numoutput > 0) { - vp->v_iflag |= VI_BWAIT; - ltsleep(&vp->v_numoutput, PRIBIO + 1, "lfs_vf3", 0, - &global_v_numoutput_slock); + cv_wait(&vp->v_cv, &vp->v_interlock); } - simple_unlock(&global_v_numoutput_slock); - splx(s); - KASSERT(LIST_FIRST(&vp->v_dirtyblkhd) == NULL); KASSERT(vp->v_numoutput == 0); + mutex_exit(&vp->v_interlock); return error; } @@ -408,7 +402,9 @@ lfs_vflush(struct vnode *vp) */ KDASSERT(ip->i_number != LFS_IFILE_INUM); lfs_writeinode(fs, sp, ip); + mutex_enter(&lfs_lock); LFS_SET_UINO(ip, IN_MODIFIED); + mutex_exit(&lfs_lock); lfs_writeseg(fs, sp); lfs_segunlock(fs); lfs_segunlock_relock(fs); @@ -456,29 +452,24 @@ lfs_vflush(struct vnode *vp) * We compare the iocount against 1, not 0, because it is * artificially incremented by lfs_seglock(). */ - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); if (fs->lfs_seglock > 1) { while (fs->lfs_iocount > 1) - (void)ltsleep(&fs->lfs_iocount, PRIBIO + 1, - "lfs_vflush", 0, &fs->lfs_interlock); + (void)mtsleep(&fs->lfs_iocount, PRIBIO + 1, + "lfs_vflush", 0, &lfs_lock); } - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); lfs_segunlock(fs); /* Wait for these buffers to be recovered by aiodoned */ - s = splbio(); - simple_lock(&global_v_numoutput_slock); + mutex_enter(&vp->v_interlock); while (vp->v_numoutput > 0) { - vp->v_iflag |= VI_BWAIT; - ltsleep(&vp->v_numoutput, PRIBIO + 1, "lfs_vf2", 0, - &global_v_numoutput_slock); + cv_wait(&vp->v_cv, &vp->v_interlock); } - simple_unlock(&global_v_numoutput_slock); - splx(s); - KASSERT(LIST_FIRST(&vp->v_dirtyblkhd) == NULL); KASSERT(vp->v_numoutput == 0); + mutex_exit(&vp->v_interlock); fs->lfs_flushvp = NULL; KASSERT(fs->lfs_flushvp_fakevref == 0); @@ -497,6 +488,7 @@ lfs_writevnodes(struct lfs *fs, struct mount *mp, struct segment *sp, int op) ASSERT_SEGLOCK(fs); loop: /* start at last (newest) vnode. */ + mutex_enter(&mntvnode_lock); TAILQ_FOREACH_REVERSE(vp, &mp->mnt_vnodelist, vnodelst, v_mntvnodes) { /* * If the vnode that we are about to sync is no longer @@ -509,11 +501,15 @@ lfs_writevnodes(struct lfs *fs, struct mount *mp, struct segment *sp, int op) * due to our own previous putpages. * Start actual segment write here to avoid deadlock. */ + mutex_exit(&mntvnode_lock); (void)lfs_writeseg(fs, sp); goto loop; } - if (vp->v_type == VNON) { + mutex_enter(&vp->v_interlock); + if (vp->v_type == VNON || vismarker(vp) || + (vp->v_iflag & VI_CLEAN) != 0) { + mutex_exit(&vp->v_interlock); continue; } @@ -521,11 +517,13 @@ lfs_writevnodes(struct lfs *fs, struct mount *mp, struct segment *sp, int op) if ((op == VN_DIROP && !(vp->v_uflag & VU_DIROP)) || (op != VN_DIROP && op != VN_CLEAN && (vp->v_uflag & VU_DIROP))) { + mutex_exit(&vp->v_interlock); vndebug(vp,"dirop"); continue; } if (op == VN_EMPTY && !VPISEMPTY(vp)) { + mutex_exit(&vp->v_interlock); vndebug(vp,"empty"); continue; } @@ -533,12 +531,15 @@ lfs_writevnodes(struct lfs *fs, struct mount *mp, struct segment *sp, int op) if (op == VN_CLEAN && ip->i_number != LFS_IFILE_INUM && vp != fs->lfs_flushvp && !(ip->i_flag & IN_CLEANING)) { + mutex_exit(&vp->v_interlock); vndebug(vp,"cleaning"); continue; } + mutex_exit(&mntvnode_lock); if (lfs_vref(vp)) { vndebug(vp,"vref"); + mutex_enter(&mntvnode_lock); continue; } @@ -566,11 +567,16 @@ lfs_writevnodes(struct lfs *fs, struct mount *mp, struct segment *sp, int op) lfs_writeseg(fs, sp); if (!VPISEMPTY(vp) && !WRITEINPROG(vp) && - !(ip->i_flag & IN_ALLMOD)) + !(ip->i_flag & IN_ALLMOD)) { + mutex_enter(&lfs_lock); LFS_SET_UINO(ip, IN_MODIFIED); + mutex_exit(&lfs_lock); + } + mutex_enter(&mntvnode_lock); break; } error = 0; /* XXX not quite right */ + mutex_enter(&mntvnode_lock); continue; } @@ -578,7 +584,9 @@ lfs_writevnodes(struct lfs *fs, struct mount *mp, struct segment *sp, int op) if (WRITEINPROG(vp)) { ivndebug(vp,"writevnodes/write2"); } else if (!(ip->i_flag & IN_ALLMOD)) { + mutex_enter(&lfs_lock); LFS_SET_UINO(ip, IN_MODIFIED); + mutex_exit(&lfs_lock); } } (void) lfs_writeinode(fs, sp, ip); @@ -590,7 +598,10 @@ lfs_writevnodes(struct lfs *fs, struct mount *mp, struct segment *sp, int op) lfs_vunref_head(vp); else lfs_vunref(vp); + + mutex_enter(&mntvnode_lock); } + mutex_exit(&mntvnode_lock); return error; } @@ -606,7 +617,7 @@ lfs_segwrite(struct mount *mp, int flags) struct segment *sp; struct vnode *vp; SEGUSE *segusep; - int do_ckp, did_ckp, error, s; + int do_ckp, did_ckp, error; unsigned n, segleft, maxseg, sn, i, curseg; int writer_set = 0; int dirty; @@ -709,7 +720,7 @@ lfs_segwrite(struct mount *mp, int flags) } } - LOCK_ASSERT(LFS_SEGLOCK_HELD(fs)); + KASSERT(LFS_SEGLOCK_HELD(fs)); did_ckp = 0; if (do_ckp || fs->lfs_doifile) { @@ -720,9 +731,9 @@ lfs_segwrite(struct mount *mp, int flags) #ifdef DEBUG LFS_ENTER_LOG("pretend", __FILE__, __LINE__, 0, 0, curproc->p_pid); #endif - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); fs->lfs_flags &= ~LFS_IFDIRTY; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); ip = VTOI(vp); @@ -748,9 +759,9 @@ lfs_segwrite(struct mount *mp, int flags) redo = lfs_writeinode(fs, sp, ip); #endif redo += lfs_writeseg(fs, sp); - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); redo += (fs->lfs_flags & LFS_IFDIRTY); - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); #ifdef DEBUG if (++loopcount > 2) log(LOG_NOTICE, "lfs_segwrite: looping count=%d\n", @@ -765,7 +776,7 @@ lfs_segwrite(struct mount *mp, int flags) * for other parts of the Ifile to be dirty after the loop * above, since we hold the segment lock. */ - s = splbio(); + mutex_enter(&vp->v_interlock); if (LIST_EMPTY(&vp->v_dirtyblkhd)) { LFS_CLR_UINO(ip, IN_ALLMOD); } @@ -786,7 +797,7 @@ lfs_segwrite(struct mount *mp, int flags) panic("dirty blocks"); } #endif - splx(s); + mutex_exit(&vp->v_interlock); VOP_UNLOCK(vp, 0); } else { (void) lfs_writeseg(fs, sp); @@ -866,7 +877,7 @@ lfs_writefile(struct lfs *fs, struct segment *sp, struct vnode *vp) * everything we've got. */ if (!IS_FLUSHING(fs, vp)) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); error = VOP_PUTPAGES(vp, 0, 0, PGO_CLEANIT | PGO_ALLPAGES | PGO_LOCKED); } @@ -952,7 +963,7 @@ lfs_update_iaddr(struct lfs *fs, struct segment *sp, struct inode *ip, daddr_t n if (sntod(fs, sn) + btofsb(fs, fs->lfs_sumsize) == fs->lfs_offset) { LFS_SEGENTRY(sup, fs, sn, bp); - KASSERT(bp->b_flags & B_DELWRI); + KASSERT(bp->b_oflags & BO_DELWRI); LFS_WRITESEGENTRY(sup, fs, sn, bp); /* fs->lfs_flags |= LFS_IFDIRTY; */ redo_ifile |= 1; @@ -1011,9 +1022,9 @@ lfs_update_iaddr(struct lfs *fs, struct segment *sp, struct inode *ip, daddr_t n redo_ifile |= (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED)); if (redo_ifile) { - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); fs->lfs_flags |= LFS_IFDIRTY; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); /* Don't double-account */ fs->lfs_idaddr = 0x0; } @@ -1240,14 +1251,16 @@ lfs_writeinode(struct lfs *fs, struct segment *sp, struct inode *ip) (sp->ninodes % INOPB(fs)); /* Not dirty any more */ - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); fs->lfs_flags &= ~LFS_IFDIRTY; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); } if (gotblk) { + mutex_enter(&bufcache_lock); LFS_LOCK_BUF(bp); - brelse(bp, 0); + brelsel(bp, 0); + mutex_exit(&bufcache_lock); } /* Increment inode count in segment summary block. */ @@ -1264,7 +1277,7 @@ lfs_writeinode(struct lfs *fs, struct segment *sp, struct inode *ip) } int -lfs_gatherblock(struct segment *sp, struct buf *bp, int *sptr) +lfs_gatherblock(struct segment *sp, struct buf *bp, kmutex_t *mptr) { struct lfs *fs; int vers; @@ -1283,8 +1296,8 @@ lfs_gatherblock(struct segment *sp, struct buf *bp, int *sptr) blksinblk = howmany(bp->b_bcount, fs->lfs_bsize); if (sp->sum_bytes_left < sizeof(int32_t) * blksinblk || sp->seg_bytes_left < bp->b_bcount) { - if (sptr) - splx(*sptr); + if (mptr) + mutex_exit(mptr); lfs_updatemeta(sp); vers = sp->fip->fi_version; @@ -1293,8 +1306,8 @@ lfs_gatherblock(struct segment *sp, struct buf *bp, int *sptr) /* Add the current file to the segment summary. */ lfs_acquire_finfo(fs, VTOI(sp->vp)->i_number, vers); - if (sptr) - *sptr = splbio(); + if (mptr) + mutex_enter(mptr); return (1); } @@ -1325,14 +1338,14 @@ lfs_gather(struct lfs *fs, struct segment *sp, struct vnode *vp, int (*match)(struct lfs *, struct buf *)) { struct buf *bp, *nbp; - int s, count = 0; + int count = 0; ASSERT_SEGLOCK(fs); if (vp->v_type == VBLK) return 0; KASSERT(sp->vp == NULL); sp->vp = vp; - s = splbio(); + mutex_enter(&bufcache_lock); #ifndef LFS_NO_BACKBUF_HACK /* This is a hack to see if ordering the blocks in LFS makes a difference. */ @@ -1356,10 +1369,12 @@ loop: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { nbp = LIST_NEXT(bp, b_vnbufs); #endif /* LFS_NO_BACKBUF_HACK */ - if ((bp->b_flags & (B_BUSY|B_GATHERED)) || !match(fs, bp)) { + if ((bp->b_cflags & BC_BUSY) != 0 || + (bp->b_flags & B_GATHERED) != 0 || !match(fs, bp)) { #ifdef DEBUG if (vp == fs->lfs_ivnode && - (bp->b_flags & (B_BUSY|B_GATHERED)) == B_BUSY) + (bp->b_cflags & BC_BUSY) != 0 && + (bp->b_flags & B_GATHERED) == 0) log(LOG_NOTICE, "lfs_gather: ifile lbn %" PRId64 " busy (%x) at 0x%x", bp->b_lblkno, bp->b_flags, @@ -1369,29 +1384,29 @@ loop: } #ifdef DIAGNOSTIC # ifdef LFS_USE_B_INVAL - if ((bp->b_flags & (B_CALL|B_INVAL)) == B_INVAL) { + if ((bp->b_flags & BC_INVAL) != 0 && bp->b_iodone == NULL) { DLOG((DLOG_SEG, "lfs_gather: lbn %" PRId64 - " is B_INVAL\n", bp->b_lblkno)); + " is BC_INVAL\n", bp->b_lblkno)); VOP_PRINT(bp->b_vp); } # endif /* LFS_USE_B_INVAL */ - if (!(bp->b_flags & B_DELWRI)) - panic("lfs_gather: bp not B_DELWRI"); - if (!(bp->b_flags & B_LOCKED)) { + if (!(bp->b_oflags & BO_DELWRI)) + panic("lfs_gather: bp not BO_DELWRI"); + if (!(bp->b_cflags & BC_LOCKED)) { DLOG((DLOG_SEG, "lfs_gather: lbn %" PRId64 - " blk %" PRId64 " not B_LOCKED\n", + " blk %" PRId64 " not BC_LOCKED\n", bp->b_lblkno, dbtofsb(fs, bp->b_blkno))); VOP_PRINT(bp->b_vp); - panic("lfs_gather: bp not B_LOCKED"); + panic("lfs_gather: bp not BC_LOCKED"); } #endif - if (lfs_gatherblock(sp, bp, &s)) { + if (lfs_gatherblock(sp, bp, &bufcache_lock)) { goto loop; } count++; } - splx(s); + mutex_exit(&bufcache_lock); lfs_updatemeta(sp); KASSERT(sp->vp == vp); sp->vp = NULL; @@ -1538,9 +1553,9 @@ lfs_update_single(struct lfs *fs, struct segment *sp, ip->i_number, lbn, daddr)); sup->su_nbytes -= osize; if (!(bp->b_flags & B_GATHERED)) { - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); fs->lfs_flags |= LFS_IFDIRTY; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); } LFS_WRITESEGENTRY(sup, fs, oldsn, bp); } @@ -1764,12 +1779,12 @@ lfs_initseg(struct lfs *fs) fs->lfs_cleanint[fs->lfs_cleanind] = fs->lfs_offset; if (++fs->lfs_cleanind >= LFS_MAX_CLEANIND) { /* "1" is the artificial inc in lfs_seglock */ - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); while (fs->lfs_iocount > 1) { - ltsleep(&fs->lfs_iocount, PRIBIO + 1, - "lfs_initseg", 0, &fs->lfs_interlock); + mtsleep(&fs->lfs_iocount, PRIBIO + 1, + "lfs_initseg", 0, &lfs_lock); } - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); fs->lfs_cleanind = 0; } } @@ -1846,7 +1861,7 @@ lfs_newseg(struct lfs *fs) ASSERT_SEGLOCK(fs); /* Honor LFCNWRAPSTOP */ - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); while (fs->lfs_nextseg < fs->lfs_curseg && fs->lfs_nowrap) { if (fs->lfs_wrappass) { log(LOG_NOTICE, "%s: wrappass=%d\n", @@ -1857,11 +1872,11 @@ lfs_newseg(struct lfs *fs) fs->lfs_wrapstatus = LFS_WRAP_WAITING; wakeup(&fs->lfs_nowrap); log(LOG_NOTICE, "%s: waiting at log wrap\n", fs->lfs_fsmnt); - ltsleep(&fs->lfs_wrappass, PVFS, "newseg", 10 * hz, - &fs->lfs_interlock); + mtsleep(&fs->lfs_wrappass, PVFS, "newseg", 10 * hz, + &lfs_lock); } fs->lfs_wrapstatus = LFS_WRAP_GOING; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); LFS_SEGENTRY(sup, fs, dtosn(fs, fs->lfs_nextseg), bp); DLOG((DLOG_SU, "lfs_newseg: seg %d := 0 in newseg\n", @@ -1936,13 +1951,11 @@ lfs_newclusterbuf(struct lfs *fs, struct vnode *vp, daddr_t addr, } /* Get an empty buffer header, or maybe one with something on it */ - bp = getiobuf(); - bp->b_flags = B_BUSY | B_CALL; + bp = getiobuf(vp, true); bp->b_dev = NODEV; bp->b_blkno = bp->b_lblkno = addr; bp->b_iodone = lfs_cluster_callback; bp->b_private = cl; - bp->b_vp = vp; return bp; } @@ -1950,10 +1963,10 @@ lfs_newclusterbuf(struct lfs *fs, struct vnode *vp, daddr_t addr, int lfs_writeseg(struct lfs *fs, struct segment *sp) { - struct buf **bpp, *bp, *cbp, *newbp; + struct buf **bpp, *bp, *cbp, *newbp, *unbusybp; SEGUSE *sup; SEGSUM *ssp; - int i, s; + int i; int do_again, nblocks, byteoffset; size_t el_size; struct lfs_cluster *cl; @@ -2041,30 +2054,26 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) * there are any, replace them with copies that have UNASSIGNED * instead. */ + mutex_enter(&bufcache_lock); for (bpp = sp->bpp, i = nblocks - 1; i--;) { ++bpp; bp = *bpp; - if (bp->b_flags & B_CALL) { /* UBC or malloced buffer */ - bp->b_flags |= B_BUSY; + if (bp->b_iodone != NULL) { /* UBC or malloced buffer */ + bp->b_cflags |= BC_BUSY; continue; } - simple_lock(&bp->b_interlock); - s = splbio(); - while (bp->b_flags & B_BUSY) { + while (bp->b_cflags & BC_BUSY) { DLOG((DLOG_SEG, "lfs_writeseg: avoiding potential" " data summary corruption for ino %d, lbn %" PRId64 "\n", VTOI(bp->b_vp)->i_number, bp->b_lblkno)); - bp->b_flags |= B_WANTED; - ltsleep(bp, (PRIBIO + 1), "lfs_writeseg", 0, - &bp->b_interlock); - splx(s); - s = splbio(); + bp->b_cflags |= BC_WANTED; + cv_wait(&bp->b_busy, &bufcache_lock); } - bp->b_flags |= B_BUSY; - splx(s); - simple_unlock(&bp->b_interlock); + bp->b_cflags |= BC_BUSY; + mutex_exit(&bufcache_lock); + unbusybp = NULL; /* * Check and replace indirect block UNWRITTEN bogosity. @@ -2105,20 +2114,14 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) *bpp = newbp; bp->b_flags &= ~B_GATHERED; bp->b_error = 0; - if (bp->b_flags & B_CALL) { + if (bp->b_iodone != NULL) { DLOG((DLOG_SEG, "lfs_writeseg: " "indir bp should not be B_CALL\n")); - s = splbio(); biodone(bp); - splx(s); bp = NULL; } else { /* Still on free list, leave it there */ - s = splbio(); - bp->b_flags &= ~B_BUSY; - if (bp->b_flags & B_WANTED) - wakeup(bp); - splx(s); + unbusybp = bp; /* * We have to re-decrement lfs_avail * since this block is going to come @@ -2132,7 +2135,15 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) lfs_freebuf(fs, newbp); } } + mutex_enter(&bufcache_lock); + if (unbusybp != NULL) { + unbusybp->b_cflags &= ~BC_BUSY; + if (unbusybp->b_cflags & BC_WANTED) + cv_broadcast(&bp->b_busy); + } } + mutex_exit(&bufcache_lock); + /* * Compute checksum across data and then across summary; the first * block (the summary block) is skipped. Set the create time here @@ -2149,8 +2160,8 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) for (byteoffset = 0; byteoffset < (*bpp)->b_bcount; byteoffset += fs->lfs_bsize) { #ifdef LFS_USE_B_INVAL - if (((*bpp)->b_flags & (B_CALL | B_INVAL)) == - (B_CALL | B_INVAL)) { + if ((*bpp)->b_cflags & BC_INVAL) != 0 && + (*bpp)->b_iodone != NULL) { if (copyin((void *)(*bpp)->b_saveaddr + byteoffset, dp, el_size)) { panic("lfs_writeseg: copyin failed [1]:" @@ -2177,12 +2188,12 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) ssp->ss_sumsum = cksum(&ssp->ss_datasum, fs->lfs_sumsize - sizeof(ssp->ss_sumsum)); - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); fs->lfs_bfree -= (btofsb(fs, ninos * fs->lfs_ibsize) + btofsb(fs, fs->lfs_sumsize)); fs->lfs_dmeta += (btofsb(fs, ninos * fs->lfs_ibsize) + btofsb(fs, fs->lfs_sumsize)); - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); /* * When we simply write the blocks we lose a rotation for every block @@ -2202,7 +2213,8 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) cbp = lfs_newclusterbuf(fs, devvp, (*bpp)->b_blkno, i); cl = cbp->b_private; - cbp->b_flags |= B_ASYNC | B_BUSY; + cbp->b_flags |= B_ASYNC; + cbp->b_cflags |= BC_BUSY; cbp->b_bcount = 0; #if defined(DEBUG) && defined(DIAGNOSTIC) @@ -2218,9 +2230,9 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) /* * Construct the cluster. */ - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); ++fs->lfs_iocount; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); while (i && cbp->b_bcount < CHUNKSIZE) { bp = *bpp; @@ -2261,8 +2273,8 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) * from the buffer indicated. * XXX == what do I do on an error? */ - if ((bp->b_flags & (B_CALL|B_INVAL)) == - (B_CALL|B_INVAL)) { + if ((bp->b_cflags & BC_INVAL) != 0 && + bp->b_iodone != NULL) { if (copyin(bp->b_saveaddr, p, bp->b_bcount)) panic("lfs_writeseg: " "copyin failed [2]"); @@ -2277,14 +2289,18 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) cbp->b_bcount += bp->b_bcount; cl->bufsize += bp->b_bcount; - bp->b_flags &= ~(B_READ | B_DELWRI | B_DONE); + bp->b_flags &= ~B_READ; bp->b_error = 0; cl->bpp[cl->bufcount++] = bp; + vp = bp->b_vp; - s = splbio(); + mutex_enter(&bufcache_lock); + mutex_enter(&vp->v_interlock); + bp->b_oflags &= ~(BO_DELWRI | BO_DONE); reassignbuf(bp, vp); - V_INCR_NUMOUTPUT(vp); - splx(s); + vp->v_numoutput++; + mutex_exit(&vp->v_interlock); + mutex_exit(&bufcache_lock); bpp++; i--; @@ -2293,9 +2309,9 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) BIO_SETPRIO(cbp, BPRIO_TIMECRITICAL); else BIO_SETPRIO(cbp, BPRIO_TIMELIMITED); - s = splbio(); - V_INCR_NUMOUTPUT(devvp); - splx(s); + mutex_enter(&devvp->v_interlock); + devvp->v_numoutput++; + mutex_exit(&devvp->v_interlock); VOP_STRATEGY(devvp, cbp); curproc->p_stats->p_ru.ru_oublock++; } @@ -2318,8 +2334,8 @@ void lfs_writesuper(struct lfs *fs, daddr_t daddr) { struct buf *bp; - int s; struct vnode *devvp = VTOI(fs->lfs_ivnode)->i_devvp; + int s; ASSERT_MAYBE_SEGLOCK(fs); #ifdef DIAGNOSTIC @@ -2330,15 +2346,15 @@ lfs_writesuper(struct lfs *fs, daddr_t daddr) * progress, we risk not having a complete checkpoint if we crash. * So, block here if a superblock write is in progress. */ - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); s = splbio(); while (fs->lfs_sbactive) { - ltsleep(&fs->lfs_sbactive, PRIBIO+1, "lfs sb", 0, - &fs->lfs_interlock); + mtsleep(&fs->lfs_sbactive, PRIBIO+1, "lfs sb", 0, + &lfs_lock); } fs->lfs_sbactive = daddr; splx(s); - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); /* Set timestamp of this version of the superblock */ if (fs->lfs_version == 1) @@ -2353,8 +2369,9 @@ lfs_writesuper(struct lfs *fs, daddr_t daddr) LFS_SBPAD - sizeof(struct dlfs)); *(struct dlfs *)bp->b_data = fs->lfs_dlfs; - bp->b_flags |= B_BUSY | B_CALL | B_ASYNC; - bp->b_flags &= ~(B_DONE | B_READ | B_DELWRI); + bp->b_cflags |= BC_BUSY; + bp->b_flags = (bp->b_flags & ~B_READ) | B_ASYNC; + bp->b_oflags &= ~(BO_DONE | BO_DELWRI); bp->b_error = 0; bp->b_iodone = lfs_supercallback; @@ -2363,12 +2380,14 @@ lfs_writesuper(struct lfs *fs, daddr_t daddr) else BIO_SETPRIO(bp, BPRIO_TIMELIMITED); curproc->p_stats->p_ru.ru_oublock++; - s = splbio(); - V_INCR_NUMOUTPUT(bp->b_vp); - splx(s); - simple_lock(&fs->lfs_interlock); + + mutex_enter(&devvp->v_interlock); + devvp->v_numoutput++; + mutex_exit(&devvp->v_interlock); + + mutex_enter(&lfs_lock); ++fs->lfs_iocount; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); VOP_STRATEGY(devvp, bp); } @@ -2437,9 +2456,11 @@ lfs_free_aiodone(struct buf *bp) { struct lfs *fs; + KERNEL_LOCK(1, curlwp); fs = bp->b_private; ASSERT_NO_SEGLOCK(fs); lfs_freebuf(fs, bp); + KERNEL_UNLOCK_LAST(curlwp); } static void @@ -2447,15 +2468,17 @@ lfs_super_aiodone(struct buf *bp) { struct lfs *fs; + KERNEL_LOCK(1, curlwp); fs = bp->b_private; ASSERT_NO_SEGLOCK(fs); - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); fs->lfs_sbactive = 0; if (--fs->lfs_iocount <= 1) wakeup(&fs->lfs_iocount); - simple_unlock(&fs->lfs_interlock); wakeup(&fs->lfs_sbactive); + mutex_exit(&lfs_lock); lfs_freebuf(fs, bp); + KERNEL_UNLOCK_LAST(curlwp); } static void @@ -2464,9 +2487,11 @@ lfs_cluster_aiodone(struct buf *bp) struct lfs_cluster *cl; struct lfs *fs; struct buf *tbp, *fbp; - struct vnode *vp, *devvp; + struct vnode *vp, *devvp, *ovp; struct inode *ip; - int s, error; + int error; + + KERNEL_LOCK(1, curlwp); error = bp->b_error; cl = bp->b_private; @@ -2477,7 +2502,7 @@ lfs_cluster_aiodone(struct buf *bp) /* Put the pages back, and release the buffer */ while (cl->bufcount--) { tbp = cl->bpp[cl->bufcount]; - KASSERT(tbp->b_flags & B_BUSY); + KASSERT(tbp->b_cflags & BC_BUSY); if (error) { tbp->b_error = error; } @@ -2493,47 +2518,50 @@ lfs_cluster_aiodone(struct buf *bp) LFS_BCLEAN_LOG(fs, tbp); - if (!(tbp->b_flags & B_CALL)) { - KASSERT(tbp->b_flags & B_LOCKED); - s = splbio(); - simple_lock(&bqueue_slock); + mutex_enter(&bufcache_lock); + if (tbp->b_iodone == NULL) { + KASSERT(tbp->b_cflags & BC_LOCKED); bremfree(tbp); - simple_unlock(&bqueue_slock); - if (vp) + if (vp) { + mutex_enter(&vp->v_interlock); reassignbuf(tbp, vp); - splx(s); + mutex_exit(&vp->v_interlock); + } tbp->b_flags |= B_ASYNC; /* for biodone */ } - if ((tbp->b_flags & (B_LOCKED | B_DELWRI)) == B_LOCKED) + if (((tbp->b_cflags | tbp->b_oflags) & + (BC_LOCKED | BO_DELWRI)) == BC_LOCKED) LFS_UNLOCK_BUF(tbp); - if (tbp->b_flags & B_DONE) { + if (tbp->b_oflags & BO_DONE) { DLOG((DLOG_SEG, "blk %d biodone already (flags %lx)\n", cl->bufcount, (long)tbp->b_flags)); } - if ((tbp->b_flags & B_CALL) && !LFS_IS_MALLOC_BUF(tbp)) { + if (tbp->b_iodone != NULL && !LFS_IS_MALLOC_BUF(tbp)) { /* * A buffer from the page daemon. * We use the same iodone as it does, * so we must manually disassociate its * buffers from the vp. */ - if (tbp->b_vp) { + if ((ovp = tbp->b_vp) != NULL) { /* This is just silly */ - s = splbio(); + mutex_enter(&ovp->v_interlock); brelvp(tbp); + mutex_exit(&ovp->v_interlock); tbp->b_vp = vp; - splx(s); + tbp->b_objlock = &vp->v_interlock; } /* Put it back the way it was */ tbp->b_flags |= B_ASYNC; - /* Master buffers have B_AGE */ + /* Master buffers have BC_AGE */ if (tbp->b_private == tbp) - tbp->b_flags |= B_AGE; + tbp->b_flags |= BC_AGE; } - s = splbio(); + mutex_exit(&bufcache_lock); + biodone(tbp); /* @@ -2546,7 +2574,8 @@ lfs_cluster_aiodone(struct buf *bp) * XXX KS - Shouldn't we set *both* if both types * of blocks are present (traverse the dirty list?) */ - simple_lock(&global_v_numoutput_slock); + mutex_enter(&lfs_lock); + mutex_enter(&vp->v_interlock); if (vp != devvp && vp->v_numoutput == 0 && (fbp = LIST_FIRST(&vp->v_dirtyblkhd)) != NULL) { ip = VTOI(vp); @@ -2557,9 +2586,9 @@ lfs_cluster_aiodone(struct buf *bp) else LFS_SET_UINO(ip, IN_MODIFIED); } - simple_unlock(&global_v_numoutput_slock); - splx(s); - wakeup(vp); + cv_broadcast(&vp->v_cv); + mutex_exit(&vp->v_interlock); + mutex_exit(&lfs_lock); } /* Fix up the cluster buffer, and release it */ @@ -2572,14 +2601,16 @@ lfs_cluster_aiodone(struct buf *bp) if (--cl->seg->seg_iocount == 0) wakeup(&cl->seg->seg_iocount); } - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); #ifdef DIAGNOSTIC if (fs->lfs_iocount == 0) panic("lfs_cluster_aiodone: zero iocount"); #endif if (--fs->lfs_iocount <= 1) wakeup(&fs->lfs_iocount); - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); + + KERNEL_UNLOCK_LAST(curlwp); pool_put(&fs->lfs_bpppool, cl->bpp); cl->bpp = NULL; @@ -2696,7 +2727,7 @@ lfs_shellsort(struct buf **bp_array, int32_t *lb_array, int nmemb, int size) } /* - * Call vget with LK_NOWAIT. If we are the one who holds VI_XLOCK/VI_FREEING, + * Call vget with LK_NOWAIT. If we are the one who holds VI_XLOCK, * however, we must press on. Just fake success in that case. */ int @@ -2705,6 +2736,8 @@ lfs_vref(struct vnode *vp) int error; struct lfs *fs; + KASSERT(mutex_owned(&vp->v_interlock)); + fs = VTOI(vp)->i_lfs; ASSERT_MAYBE_SEGLOCK(fs); @@ -2714,7 +2747,7 @@ lfs_vref(struct vnode *vp) * being able to flush all of the pages from this vnode, which * will cause it to panic. So, return 0 if a flush is in progress. */ - error = vget(vp, LK_NOWAIT); + error = vget(vp, LK_NOWAIT | LK_INTERLOCK); if (error == EBUSY && IS_FLUSHING(VTOI(vp)->i_lfs, vp)) { ++fs->lfs_flushvp_fakevref; return 0; @@ -2743,7 +2776,7 @@ lfs_vunref(struct vnode *vp) } /* does not call inactive */ - vrele2(vp, 0); + vrele(vp); /* XXXAD fix later */ } /* @@ -2761,7 +2794,7 @@ lfs_vunref_head(struct vnode *vp) ASSERT_SEGLOCK(VTOI(vp)->i_lfs); /* does not call inactive, inserts non-held vnode at head of freelist */ - vrele2(vp, 1); + vrele(vp); /* XXXAD fix later */ } diff --git a/sys/ufs/lfs/lfs_subr.c b/sys/ufs/lfs/lfs_subr.c index 01e2cda3b94e..b9b44155b581 100644 --- a/sys/ufs/lfs/lfs_subr.c +++ b/sys/ufs/lfs/lfs_subr.c @@ -1,4 +1,4 @@ -/* $NetBSD: lfs_subr.c,v 1.71 2007/10/10 20:42:35 ad Exp $ */ +/* $NetBSD: lfs_subr.c,v 1.72 2008/01/02 11:49:12 ad Exp $ */ /*- * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. @@ -67,7 +67,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.71 2007/10/10 20:42:35 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.72 2008/01/02 11:49:12 ad Exp $"); #include #include @@ -165,16 +165,16 @@ lfs_free_resblks(struct lfs *fs) pool_destroy(&fs->lfs_segpool); pool_destroy(&fs->lfs_clpool); - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); for (i = 0; i < LFS_N_TOTAL; i++) { while (fs->lfs_resblk[i].inuse) - ltsleep(&fs->lfs_resblk, PRIBIO + 1, "lfs_free", 0, - &fs->lfs_interlock); + mtsleep(&fs->lfs_resblk, PRIBIO + 1, "lfs_free", 0, + &lfs_lock); if (fs->lfs_resblk[i].p != NULL) free(fs->lfs_resblk[i].p, M_SEGMENT); } free(fs->lfs_resblk, M_SEGMENT); - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); } static unsigned int @@ -216,7 +216,7 @@ lfs_malloc(struct lfs *fs, size_t size, int type) * and several indirect blocks. */ - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); /* skip over blocks of other types */ for (i = 0, start = 0; i < type; i++) start += lfs_res_qty[i]; @@ -231,19 +231,19 @@ lfs_malloc(struct lfs *fs, size_t size, int type) s = splbio(); LIST_INSERT_HEAD(&fs->lfs_reshash[h], re, res); splx(s); - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); return r; } } DLOG((DLOG_MALLOC, "sleeping on %s (%d)\n", lfs_res_names[type], lfs_res_qty[type])); - ltsleep(&fs->lfs_resblk, PVM, "lfs_malloc", 0, - &fs->lfs_interlock); + mtsleep(&fs->lfs_resblk, PVM, "lfs_malloc", 0, + &lfs_lock); DLOG((DLOG_MALLOC, "done sleeping on %s\n", lfs_res_names[type])); } /* NOTREACHED */ - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); return r; } @@ -259,7 +259,7 @@ lfs_free(struct lfs *fs, void *p, int type) ASSERT_MAYBE_SEGLOCK(fs); h = lfs_mhash(p); - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); s = splbio(); LIST_FOREACH(re, &fs->lfs_reshash[h], res) { if (re->p == p) { @@ -268,7 +268,7 @@ lfs_free(struct lfs *fs, void *p, int type) re->inuse = 0; wakeup(&fs->lfs_resblk); splx(s); - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); return; } } @@ -279,7 +279,7 @@ lfs_free(struct lfs *fs, void *p, int type) } #endif splx(s); - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); /* * If we didn't find it, free it. @@ -296,21 +296,21 @@ lfs_seglock(struct lfs *fs, unsigned long flags) { struct segment *sp; - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); if (fs->lfs_seglock) { if (fs->lfs_lockpid == curproc->p_pid && fs->lfs_locklwp == curlwp->l_lid) { - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); ++fs->lfs_seglock; fs->lfs_sp->seg_flags |= flags; return 0; } else if (flags & SEGM_PAGEDAEMON) { - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); return EWOULDBLOCK; } else { while (fs->lfs_seglock) { - (void)ltsleep(&fs->lfs_seglock, PRIBIO + 1, - "lfs_seglock", 0, &fs->lfs_interlock); + (void)mtsleep(&fs->lfs_seglock, PRIBIO + 1, + "lfs_seglock", 0, &lfs_lock); } } } @@ -318,7 +318,7 @@ lfs_seglock(struct lfs *fs, unsigned long flags) fs->lfs_seglock = 1; fs->lfs_lockpid = curproc->p_pid; fs->lfs_locklwp = curlwp->l_lid; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); fs->lfs_cleanind = 0; #ifdef DEBUG @@ -340,9 +340,9 @@ lfs_seglock(struct lfs *fs, unsigned long flags) * so we artificially increment it by one until we've scheduled all of * the writes we intend to do. */ - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); ++fs->lfs_iocount; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); return 0; } @@ -356,52 +356,37 @@ lfs_unmark_dirop(struct lfs *fs) int doit; ASSERT_NO_SEGLOCK(fs); - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); doit = !(fs->lfs_flags & LFS_UNDIROP); if (doit) fs->lfs_flags |= LFS_UNDIROP; if (!doit) { - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); return; } for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) { nip = TAILQ_NEXT(ip, i_lfs_dchain); - simple_unlock(&fs->lfs_interlock); vp = ITOV(ip); - - simple_lock(&vp->v_interlock); - if (VOP_ISLOCKED(vp) == LK_EXCLOTHER) { - simple_lock(&fs->lfs_interlock); - simple_unlock(&vp->v_interlock); + if (VOP_ISLOCKED(vp) == LK_EXCLOTHER) continue; - } if ((VTOI(vp)->i_flag & (IN_ADIROP | IN_ALLMOD)) == 0) { - simple_lock(&fs->lfs_interlock); - simple_lock(&lfs_subsys_lock); --lfs_dirvcount; - simple_unlock(&lfs_subsys_lock); --fs->lfs_dirvcount; vp->v_uflag &= ~VU_DIROP; TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain); - simple_unlock(&fs->lfs_interlock); wakeup(&lfs_dirvcount); - simple_unlock(&vp->v_interlock); - simple_lock(&fs->lfs_interlock); fs->lfs_unlockvp = vp; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); vrele(vp); - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); fs->lfs_unlockvp = NULL; - simple_unlock(&fs->lfs_interlock); - } else - simple_unlock(&vp->v_interlock); - simple_lock(&fs->lfs_interlock); + } } fs->lfs_flags &= ~LFS_UNDIROP; - simple_unlock(&fs->lfs_interlock); wakeup(&fs->lfs_flags); + mutex_exit(&lfs_lock); } static void @@ -426,13 +411,13 @@ lfs_auto_segclean(struct lfs *fs) (SEGUSE_DIRTY | SEGUSE_EMPTY)) { /* Make sure the sb is written before we clean */ - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); s = splbio(); while (waited == 0 && fs->lfs_sbactive) - ltsleep(&fs->lfs_sbactive, PRIBIO+1, "lfs asb", - 0, &fs->lfs_interlock); + mtsleep(&fs->lfs_sbactive, PRIBIO+1, "lfs asb", + 0, &lfs_lock); splx(s); - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); waited = 1; if ((error = lfs_do_segclean(fs, i)) != 0) { @@ -458,13 +443,13 @@ lfs_segunlock(struct lfs *fs) sp = fs->lfs_sp; - simple_lock(&fs->lfs_interlock); - LOCK_ASSERT(LFS_SEGLOCK_HELD(fs)); + mutex_enter(&lfs_lock); + KASSERT(LFS_SEGLOCK_HELD(fs)); if (fs->lfs_seglock == 1) { if ((sp->seg_flags & (SEGM_PROT | SEGM_CLEAN)) == 0 && LFS_STARVED_FOR_SEGS(fs) == 0) do_unmark_dirop = 1; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); sync = sp->seg_flags & SEGM_SYNC; ckp = sp->seg_flags & SEGM_CKP; @@ -494,13 +479,13 @@ lfs_segunlock(struct lfs *fs) * At the moment, the user's process hangs around so we can * sleep. */ - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); if (--fs->lfs_iocount == 0) { LFS_DEBUG_COUNTLOCKED("lfs_segunlock"); } if (fs->lfs_iocount <= 1) wakeup(&fs->lfs_iocount); - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); /* * If we're not checkpointing, we don't have to block * other processes to wait for a synchronous write @@ -510,11 +495,11 @@ lfs_segunlock(struct lfs *fs) #ifdef DEBUG LFS_ENTER_LOG("segunlock_std", __FILE__, __LINE__, 0, 0, curproc->p_pid); #endif - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); --fs->lfs_seglock; fs->lfs_lockpid = 0; fs->lfs_locklwp = 0; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); wakeup(&fs->lfs_seglock); } /* @@ -524,16 +509,16 @@ lfs_segunlock(struct lfs *fs) * superblocks to make sure that the checkpoint described * by a superblock completed. */ - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); while (ckp && sync && fs->lfs_iocount) - (void)ltsleep(&fs->lfs_iocount, PRIBIO + 1, - "lfs_iocount", 0, &fs->lfs_interlock); + (void)mtsleep(&fs->lfs_iocount, PRIBIO + 1, + "lfs_iocount", 0, &lfs_lock); while (sync && sp->seg_iocount) { - (void)ltsleep(&sp->seg_iocount, PRIBIO + 1, - "seg_iocount", 0, &fs->lfs_interlock); + (void)mtsleep(&sp->seg_iocount, PRIBIO + 1, + "seg_iocount", 0, &lfs_lock); DLOG((DLOG_SEG, "sleeping on iocount %x == %d\n", sp, sp->seg_iocount)); } - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); if (sync) pool_put(&fs->lfs_segpool, sp); @@ -554,11 +539,11 @@ lfs_segunlock(struct lfs *fs) #ifdef DEBUG LFS_ENTER_LOG("segunlock_ckp", __FILE__, __LINE__, 0, 0, curproc->p_pid); #endif - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); --fs->lfs_seglock; fs->lfs_lockpid = 0; fs->lfs_locklwp = 0; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); wakeup(&fs->lfs_seglock); } /* Reenable fragment size changes */ @@ -566,11 +551,11 @@ lfs_segunlock(struct lfs *fs) if (do_unmark_dirop) lfs_unmark_dirop(fs); } else if (fs->lfs_seglock == 0) { - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); panic ("Seglock not held"); } else { --fs->lfs_seglock; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); } } @@ -585,22 +570,22 @@ lfs_writer_enter(struct lfs *fs, const char *wmesg) int error = 0; ASSERT_MAYBE_SEGLOCK(fs); - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); /* disallow dirops during flush */ fs->lfs_writer++; while (fs->lfs_dirops > 0) { ++fs->lfs_diropwait; - error = ltsleep(&fs->lfs_writer, PRIBIO+1, wmesg, 0, - &fs->lfs_interlock); + error = mtsleep(&fs->lfs_writer, PRIBIO+1, wmesg, 0, + &lfs_lock); --fs->lfs_diropwait; } if (error) fs->lfs_writer--; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); return error; } @@ -611,9 +596,9 @@ lfs_writer_leave(struct lfs *fs) bool dowakeup; ASSERT_MAYBE_SEGLOCK(fs); - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); dowakeup = !(--fs->lfs_writer); - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); if (dowakeup) wakeup(&fs->lfs_dirops); } @@ -651,11 +636,11 @@ lfs_segunlock_relock(struct lfs *fs) /* Wait for the cleaner */ lfs_wakeup_cleaner(fs); - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); while (LFS_STARVED_FOR_SEGS(fs)) - ltsleep(&fs->lfs_avail, PRIBIO, "relock", 0, - &fs->lfs_interlock); - simple_unlock(&fs->lfs_interlock); + mtsleep(&fs->lfs_avail, PRIBIO, "relock", 0, + &lfs_lock); + mutex_exit(&lfs_lock); /* Put the segment lock back the way it was. */ while(n--) diff --git a/sys/ufs/lfs/lfs_syscalls.c b/sys/ufs/lfs/lfs_syscalls.c index 750609162e77..85d60ae1fece 100644 --- a/sys/ufs/lfs/lfs_syscalls.c +++ b/sys/ufs/lfs/lfs_syscalls.c @@ -1,4 +1,4 @@ -/* $NetBSD: lfs_syscalls.c,v 1.125 2007/12/20 23:03:14 dsl Exp $ */ +/* $NetBSD: lfs_syscalls.c,v 1.126 2008/01/02 11:49:12 ad Exp $ */ /*- * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007 The NetBSD Foundation, Inc. @@ -67,7 +67,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: lfs_syscalls.c,v 1.125 2007/12/20 23:03:14 dsl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_syscalls.c,v 1.126 2008/01/02 11:49:12 ad Exp $"); #ifndef LFS # define LFS /* for prototypes in syscallargs.h */ @@ -379,8 +379,11 @@ lfs_markv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov, /* XXX but only write the inode if it's the right one */ if (blkp->bi_inode != LFS_IFILE_INUM) { LFS_IENTRY(ifp, fs, blkp->bi_inode, bp); - if (ifp->if_daddr == blkp->bi_daddr) + if (ifp->if_daddr == blkp->bi_daddr) { + mutex_enter(&lfs_lock); LFS_SET_UINO(ip, IN_CLEANING); + mutex_exit(&lfs_lock); + } brelse(bp, 0); } continue; @@ -443,7 +446,7 @@ lfs_markv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov, panic("lfs_markv: partial indirect block?" " size=%d\n", blkp->bi_size); bp = getblk(vp, blkp->bi_lbn, blkp->bi_size, 0, 0); - if (!(bp->b_flags & (B_DONE|B_DELWRI))) { /* B_CACHE */ + if (!(bp->b_oflags & (BO_DONE|BO_DELWRI))) { /* * The block in question was not found * in the cache; i.e., the block that @@ -724,15 +727,19 @@ lfs_bmapv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov, int blkcnt) * A regular call to VFS_VGET could deadlock * here. Instead, we try an unlocked access. */ + mutex_enter(&ufs_ihash_lock); vp = ufs_ihashlookup(ump->um_dev, blkp->bi_inode); if (vp != NULL && !(vp->v_iflag & VI_XLOCK)) { ip = VTOI(vp); + mutex_enter(&vp->v_interlock); + mutex_exit(&ufs_ihash_lock); if (lfs_vref(vp)) { v_daddr = LFS_UNUSED_DADDR; continue; } numrefed++; } else { + mutex_exit(&ufs_ihash_lock); /* * Don't VFS_VGET if we're being unmounted, * since we hold vfs_busy(). @@ -902,14 +909,14 @@ lfs_do_segclean(struct lfs *fs, unsigned long segnum) if (fs->lfs_version > 1 && segnum == 0 && fs->lfs_start < btofsb(fs, LFS_LABELPAD)) fs->lfs_avail -= btofsb(fs, LFS_LABELPAD) - fs->lfs_start; - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); fs->lfs_bfree += sup->su_nsums * btofsb(fs, fs->lfs_sumsize) + btofsb(fs, sup->su_ninos * fs->lfs_ibsize); fs->lfs_dmeta -= sup->su_nsums * btofsb(fs, fs->lfs_sumsize) + btofsb(fs, sup->su_ninos * fs->lfs_ibsize); if (fs->lfs_dmeta < 0) fs->lfs_dmeta = 0; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); sup->su_flags &= ~SEGUSE_DIRTY; LFS_WRITESEGENTRY(sup, fs, segnum, bp); @@ -918,10 +925,10 @@ lfs_do_segclean(struct lfs *fs, unsigned long segnum) --cip->dirty; fs->lfs_nclean = cip->clean; cip->bfree = fs->lfs_bfree; - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); cip->avail = fs->lfs_avail - fs->lfs_ravail - fs->lfs_favail; wakeup(&fs->lfs_avail); - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); (void) LFS_BWRITE_LOG(bp); if (lfs_dostats) @@ -1008,21 +1015,29 @@ extern kmutex_t ufs_hashlock; int lfs_fasthashget(dev_t dev, ino_t ino, struct vnode **vpp) { - if ((*vpp = ufs_ihashlookup(dev, ino)) != NULL) { - if ((*vpp)->v_iflag & VI_XLOCK) { + struct vnode *vp; + + mutex_enter(&ufs_ihash_lock); + if ((vp = ufs_ihashlookup(dev, ino)) != NULL) { + mutex_enter(&vp->v_interlock); + mutex_exit(&ufs_ihash_lock); + if (vp->v_iflag & VI_XLOCK) { DLOG((DLOG_CLEAN, "lfs_fastvget: ino %d VI_XLOCK\n", ino)); lfs_stats.clean_vnlocked++; + mutex_exit(&vp->v_interlock); return EAGAIN; } - if (lfs_vref(*vpp)) { + if (lfs_vref(vp)) { DLOG((DLOG_CLEAN, "lfs_fastvget: lfs_vref failed" " for ino %d\n", ino)); lfs_stats.clean_inlocked++; return EAGAIN; } - } else - *vpp = NULL; + } else { + mutex_exit(&ufs_ihash_lock); + } + *vpp = vp; return (0); } @@ -1048,12 +1063,12 @@ lfs_fastvget(struct mount *mp, ino_t ino, daddr_t daddr, struct vnode **vpp, * Wait until the filesystem is fully mounted before allowing vget * to complete. This prevents possible problems with roll-forward. */ - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); while (fs->lfs_flags & LFS_NOTYET) { - ltsleep(&fs->lfs_flags, PRIBIO+1, "lfs_fnotyet", 0, - &fs->lfs_interlock); + mtsleep(&fs->lfs_flags, PRIBIO+1, "lfs_fnotyet", 0, + &lfs_lock); } - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); /* * This is playing fast and loose. Someone may have the inode @@ -1191,9 +1206,9 @@ lfs_fakebuf(struct lfs *fs, struct vnode *vp, int lbn, size_t size, void *uaddr) KDASSERT(bp->b_iodone == lfs_callback); #if 0 - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); ++fs->lfs_iocount; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); #endif bp->b_bufsize = size; bp->b_bcount = size; diff --git a/sys/ufs/lfs/lfs_vfsops.c b/sys/ufs/lfs/lfs_vfsops.c index f9da548f8db5..4c4c22526104 100644 --- a/sys/ufs/lfs/lfs_vfsops.c +++ b/sys/ufs/lfs/lfs_vfsops.c @@ -1,4 +1,4 @@ -/* $NetBSD: lfs_vfsops.c,v 1.251 2007/12/12 02:56:04 lukem Exp $ */ +/* $NetBSD: lfs_vfsops.c,v 1.252 2008/01/02 11:49:12 ad Exp $ */ /*- * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007 The NetBSD Foundation, Inc. @@ -67,7 +67,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.251 2007/12/12 02:56:04 lukem Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.252 2008/01/02 11:49:12 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_lfs.h" @@ -204,10 +204,10 @@ lfs_writerd(void *arg) lfs_writer_daemon = curproc->p_pid; - simple_lock(&lfs_subsys_lock); + mutex_enter(&lfs_lock); for (;;) { - ltsleep(&lfs_writer_daemon, PVM | PNORELOCK, "lfswriter", hz/10, - &lfs_subsys_lock); + mtsleep(&lfs_writer_daemon, PVM | PNORELOCK, "lfswriter", hz/10, + &lfs_lock); /* * Look through the list of LFSs to see if any of them @@ -223,7 +223,7 @@ lfs_writerd(void *arg) if (strncmp(mp->mnt_stat.f_fstypename, MOUNT_LFS, sizeof(mp->mnt_stat.f_fstypename)) == 0) { fs = VFSTOUFS(mp)->um_lfs; - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); fsflags = 0; if ((fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) || lfs_dirvcount > LFS_MAX_DIROP) && @@ -233,15 +233,15 @@ lfs_writerd(void *arg) DLOG((DLOG_FLUSH, "lfs_writerd: pdflush set\n")); fs->lfs_pdflush = 0; lfs_flush_fs(fs, fsflags); - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); } else if (!TAILQ_EMPTY(&fs->lfs_pchainhd)) { DLOG((DLOG_FLUSH, "lfs_writerd: pchain non-empty\n")); - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); lfs_writer_enter(fs, "wrdirop"); lfs_flush_pchain(fs); lfs_writer_leave(fs); } else - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); } mutex_enter(&mountlist_lock); @@ -253,7 +253,7 @@ lfs_writerd(void *arg) /* * If global state wants a flush, flush everything. */ - simple_lock(&lfs_subsys_lock); + mutex_enter(&lfs_lock); loopcount = 0; if (lfs_do_flush || locked_queue_count > LFS_MAX_BUFS || locked_queue_bytes > LFS_MAX_BYTES || @@ -303,7 +303,9 @@ lfs_init() #ifdef DEBUG memset(lfs_log, 0, sizeof(lfs_log)); #endif - simple_lock_init(&lfs_subsys_lock); + mutex_init(&lfs_lock, MUTEX_DEFAULT, IPL_NONE); + cv_init(&locked_queue_cv, "lfsbuf"); + cv_init(&lfs_writing_cv, "lfsflush"); } void @@ -315,8 +317,10 @@ lfs_reinit() void lfs_done() { - ufs_done(); + mutex_destroy(&lfs_lock); + cv_destroy(&locked_queue_cv); + cv_destroy(&lfs_writing_cv); pool_destroy(&lfs_inode_pool); pool_destroy(&lfs_dinode_pool); pool_destroy(&lfs_inoext_pool); @@ -735,10 +739,9 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l) fs->lfs_pdflush = 0; fs->lfs_sleepers = 0; fs->lfs_pages = 0; - simple_lock_init(&fs->lfs_interlock); rw_init(&fs->lfs_fraglock); - lockinit(&fs->lfs_iflock, PINOD, "lfs_iflock", 0, 0); - lockinit(&fs->lfs_stoplock, PINOD, "lfs_stoplock", 0, 0); + rw_init(&fs->lfs_iflock); + cv_init(&fs->lfs_stopcv, "lfsstop"); /* Set the file system readonly/modify bits. */ fs->lfs_ronly = ronly; @@ -902,7 +905,7 @@ lfs_unmount(struct mount *mp, int mntflags) struct ufsmount *ump; struct lfs *fs; int error, flags, ronly; - int s; + vnode_t *vp; flags = 0; if (mntflags & MNT_FORCE) @@ -917,11 +920,11 @@ lfs_unmount(struct mount *mp, int mntflags) /* wake up the cleaner so it can die */ lfs_wakeup_cleaner(fs); - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); while (fs->lfs_sleepers) - ltsleep(&fs->lfs_sleepers, PRIBIO + 1, "lfs_sleepers", 0, - &fs->lfs_interlock); - simple_unlock(&fs->lfs_interlock); + mtsleep(&fs->lfs_sleepers, PRIBIO + 1, "lfs_sleepers", 0, + &lfs_lock); + mutex_exit(&lfs_lock); #ifdef QUOTA if (mp->mnt_flag & MNT_QUOTA) { @@ -944,23 +947,23 @@ lfs_unmount(struct mount *mp, int mntflags) return (error); if ((error = VFS_SYNC(mp, 1, l->l_cred)) != 0) return (error); - s = splbio(); - if (LIST_FIRST(&fs->lfs_ivnode->v_dirtyblkhd)) + vp = fs->lfs_ivnode; + mutex_enter(&vp->v_interlock); + if (LIST_FIRST(&vp->v_dirtyblkhd)) panic("lfs_unmount: still dirty blocks on ifile vnode"); - splx(s); + mutex_exit(&vp->v_interlock); /* Explicitly write the superblock, to update serial and pflags */ fs->lfs_pflags |= LFS_PF_CLEAN; lfs_writesuper(fs, fs->lfs_sboffs[0]); lfs_writesuper(fs, fs->lfs_sboffs[1]); - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); while (fs->lfs_iocount) - ltsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs_umount", 0, - &fs->lfs_interlock); - simple_unlock(&fs->lfs_interlock); + mtsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs_umount", 0, + &lfs_lock); + mutex_exit(&lfs_lock); /* Finish with the Ifile, now that we're done with it */ - vrele(fs->lfs_ivnode); vgone(fs->lfs_ivnode); ronly = !fs->lfs_ronly; @@ -982,7 +985,9 @@ lfs_unmount(struct mount *mp, int mntflags) free(fs->lfs_suflags[1], M_SEGMENT); free(fs->lfs_suflags, M_SEGMENT); lfs_free_resblks(fs); + cv_destroy(&fs->lfs_stopcv); rw_destroy(&fs->lfs_fraglock); + rw_destroy(&fs->lfs_iflock); free(fs, M_UFSMNT); free(ump, M_UFSMNT); @@ -1058,12 +1063,12 @@ lfs_sync(struct mount *mp, int waitfor, kauth_cred_t cred) * XXX than a segment and lfs_nextseg is close to the end of * XXX the log, we'll likely block. */ - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); if (fs->lfs_nowrap && fs->lfs_nextseg < fs->lfs_curseg) { - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); return 0; } - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); lfs_writer_enter(fs, "lfs_dirops"); @@ -1109,11 +1114,11 @@ lfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp) * If the filesystem is not completely mounted yet, suspend * any access requests (wait for roll-forward to complete). */ - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); while ((fs->lfs_flags & LFS_NOTYET) && curproc->p_pid != fs->lfs_rfpid) - ltsleep(&fs->lfs_flags, PRIBIO+1, "lfs_notyet", 0, - &fs->lfs_interlock); - simple_unlock(&fs->lfs_interlock); + mtsleep(&fs->lfs_flags, PRIBIO+1, "lfs_notyet", 0, + &lfs_lock); + mutex_exit(&lfs_lock); retry: if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL) @@ -1201,7 +1206,7 @@ retry: #ifdef DEBUG /* If the seglock is held look at the bpp to see what is there anyway */ - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); if (fs->lfs_seglock > 0) { struct buf **bpp; struct ufs1_dinode *dp; @@ -1222,18 +1227,18 @@ retry: } } } - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); #endif /* DEBUG */ panic("lfs_vget: dinode not found"); } - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); if (fs->lfs_iocount) { DLOG((DLOG_VNODE, "lfs_vget: dinode %d not found, retrying...\n", ino)); - (void)ltsleep(&fs->lfs_iocount, PRIBIO + 1, - "lfs ifind", 1, &fs->lfs_interlock); + (void)mtsleep(&fs->lfs_iocount, PRIBIO + 1, + "lfs ifind", 1, &lfs_lock); } else retries = LFS_IFIND_RETRIES; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); goto again; } *ip->i_din.ffs1_din = *dip; @@ -1264,6 +1269,7 @@ lfs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp) IFILE *ifp; int32_t daddr; struct lfs *fs; + vnode_t *vp; if (fhp->fid_len != sizeof(struct lfid)) return EINVAL; @@ -1281,7 +1287,10 @@ lfs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp) fs->lfs_cleansz - fs->lfs_segtabsz) * fs->lfs_ifpb) return ESTALE; - if (ufs_ihashlookup(VFSTOUFS(mp)->um_dev, lfh.lfid_ino) == NULLVP) { + mutex_enter(&ufs_ihash_lock); + vp = ufs_ihashlookup(VFSTOUFS(mp)->um_dev, lfh.lfid_ino); + mutex_exit(&ufs_ihash_lock); + if (vp == NULL) { LFS_IENTRY(ifp, fs, lfh.lfid_ino, bp); daddr = ifp->if_daddr; brelse(bp, 0); @@ -1525,7 +1534,7 @@ static int lfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags) { - int i, s, error, run, haveeof = 0; + int i, error, run, haveeof = 0; int fs_bshift; vaddr_t kva; off_t eof, offset, startoffset = 0; @@ -1607,10 +1616,10 @@ lfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, KASSERT(!(pgs[i]->flags & PG_PAGEOUT)); pgs[i]->flags &= ~PG_DELWRI; pgs[i]->flags |= PG_PAGEOUT; - uvmexp.paging++; - uvm_lock_pageq(); + uvm_pageout_start(1); + mutex_enter(&uvm_pageqlock); uvm_pageunwire(pgs[i]); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); } } @@ -1658,21 +1667,18 @@ lfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, UVMPAGER_MAPIN_WAITOK); } - s = splbio(); - simple_lock(&global_v_numoutput_slock); + mutex_enter(&vp->v_interlock); vp->v_numoutput += 2; /* one for biodone, one for aiodone */ - simple_unlock(&global_v_numoutput_slock); - splx(s); + mutex_exit(&vp->v_interlock); - mbp = getiobuf(); + mbp = getiobuf(NULL, true); UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x", vp, mbp, vp->v_numoutput, bytes); mbp->b_bufsize = npages << PAGE_SHIFT; mbp->b_data = (void *)kva; mbp->b_resid = mbp->b_bcount = bytes; - mbp->b_flags = B_BUSY|B_WRITE|B_AGE|B_CALL; + mbp->b_cflags = BC_BUSY|BC_AGE; mbp->b_iodone = uvm_aio_biodone; - mbp->b_vp = vp; bp = NULL; for (offset = startoffset; @@ -1722,27 +1728,26 @@ lfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, if (offset == startoffset && iobytes == bytes) { bp = mbp; /* correct overcount if there is no second buffer */ - s = splbio(); - simple_lock(&global_v_numoutput_slock); + mutex_enter(&vp->v_interlock); --vp->v_numoutput; - simple_unlock(&global_v_numoutput_slock); - splx(s); + mutex_exit(&vp->v_interlock); } else { - bp = getiobuf(); + bp = getiobuf(NULL, true); UVMHIST_LOG(ubchist, "vp %p bp %p num now %d", vp, bp, vp->v_numoutput, 0); bp->b_data = (char *)kva + (vaddr_t)(offset - pg->offset); bp->b_resid = bp->b_bcount = iobytes; - bp->b_flags = B_BUSY|B_WRITE|B_CALL; + bp->b_cflags = BC_BUSY; bp->b_iodone = uvm_aio_biodone1; } /* XXX This is silly ... is this necessary? */ - bp->b_vp = NULL; - s = splbio(); + mutex_enter(&bufcache_lock); + mutex_enter(&vp->v_interlock); bgetvp(vp, bp); - splx(s); + mutex_exit(&vp->v_interlock); + mutex_exit(&bufcache_lock); bp->b_lblkno = lblkno(fs, offset); bp->b_private = mbp; @@ -1756,15 +1761,15 @@ lfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, if (skipbytes) { UVMHIST_LOG(ubchist, "skipbytes %d", skipbytes, 0,0,0); - s = splbio(); + mutex_enter(mbp->b_objlock); if (error) { mbp->b_error = error; } mbp->b_resid -= skipbytes; + mutex_exit(mbp->b_objlock); if (mbp->b_resid == 0) { biodone(mbp); } - splx(s); } UVMHIST_LOG(ubchist, "returning 0", 0,0,0,0); return (0); @@ -1774,7 +1779,7 @@ lfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, * We can't write the pages, for whatever reason. * Clean up after ourselves, and make the caller try again. */ - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); /* Tell why we're here, if we know */ if (ip->i_lfs_iflags & LFSI_NO_GOP_WRITE) { @@ -1791,12 +1796,12 @@ lfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, DLOG((DLOG_PAGE, "lfs_gop_write: seglock not held\n")); } - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); for (i = 0; i < npages; i++) { pg = pgs[i]; if (pg->flags & PG_PAGEOUT) - uvmexp.paging--; + uvm_pageout_done(1); if (pg->flags & PG_DELWRI) { uvm_pageunwire(pg); } @@ -1815,8 +1820,8 @@ lfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, } /* uvm_pageunbusy takes care of PG_BUSY, PG_WANTED */ uvm_page_unbusy(pgs, npages); - uvm_unlock_pageq(); - simple_unlock(&vp->v_interlock); + mutex_exit(&uvm_pageqlock); + mutex_exit(&vp->v_interlock); return EAGAIN; } @@ -1974,9 +1979,7 @@ lfs_resize_fs(struct lfs *fs, int newnsegs) * is holding Ifile buffers, so we get each one, to drain them. * (XXX this could be done better.) */ - simple_lock(&fs->lfs_interlock); - lockmgr(&fs->lfs_iflock, LK_EXCLUSIVE, &fs->lfs_interlock); - simple_unlock(&fs->lfs_interlock); + rw_enter(&fs->lfs_iflock, RW_WRITER); vn_lock(ivp, LK_EXCLUSIVE | LK_RETRY); for (i = 0; i < ilast; i++) { bread(ivp, i, fs->lfs_bsize, NOCRED, &bp); @@ -2092,9 +2095,7 @@ lfs_resize_fs(struct lfs *fs, int newnsegs) /* Let Ifile accesses proceed */ VOP_UNLOCK(ivp, 0); - simple_lock(&fs->lfs_interlock); - lockmgr(&fs->lfs_iflock, LK_RELEASE, &fs->lfs_interlock); - simple_unlock(&fs->lfs_interlock); + rw_exit(&fs->lfs_iflock); out: lfs_segunlock(fs); diff --git a/sys/ufs/lfs/lfs_vnops.c b/sys/ufs/lfs/lfs_vnops.c index 16c3a2fceabe..2d9506f1b54c 100644 --- a/sys/ufs/lfs/lfs_vnops.c +++ b/sys/ufs/lfs/lfs_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: lfs_vnops.c,v 1.213 2007/11/26 19:02:32 pooka Exp $ */ +/* $NetBSD: lfs_vnops.c,v 1.214 2008/01/02 11:49:12 ad Exp $ */ /*- * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. @@ -67,7 +67,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.213 2007/11/26 19:02:32 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.214 2008/01/02 11:49:12 ad Exp $"); #ifdef _KERNEL_OPT #include "opt_compat_netbsd.h" @@ -293,16 +293,14 @@ lfs_fsync(void *v) */ if (ap->a_flags & FSYNC_LAZY) { if (lfs_ignore_lazy_sync == 0) { - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); if (!(ip->i_flags & IN_PAGING)) { ip->i_flags |= IN_PAGING; TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip, i_lfs_pchain); } - simple_unlock(&fs->lfs_interlock); - simple_lock(&lfs_subsys_lock); wakeup(&lfs_writer_daemon); - simple_unlock(&lfs_subsys_lock); + mutex_exit(&lfs_lock); } return 0; } @@ -318,15 +316,15 @@ lfs_fsync(void *v) wait = (ap->a_flags & FSYNC_WAIT); do { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo), round_page(ap->a_offhi), PGO_CLEANIT | (wait ? PGO_SYNCIO : 0)); if (error == EAGAIN) { - simple_lock(&fs->lfs_interlock); - ltsleep(&fs->lfs_avail, PCATCH | PUSER, "lfs_fsync", - hz / 100 + 1, &fs->lfs_interlock); - simple_unlock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); + mtsleep(&fs->lfs_avail, PCATCH | PUSER, "lfs_fsync", + hz / 100 + 1, &lfs_lock); + mutex_exit(&lfs_lock); } } while (error == EAGAIN); if (error) @@ -365,7 +363,9 @@ lfs_inactive(void *v) * Streamline this process by not giving it more dirty blocks. */ if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM) { + mutex_enter(&lfs_lock); LFS_CLR_UINO(VTOI(ap->a_vp), IN_ALLMOD); + mutex_exit(&lfs_lock); VOP_UNLOCK(ap->a_vp, 0); return 0; } @@ -410,46 +410,43 @@ lfs_set_dirop(struct vnode *dvp, struct vnode *vp) if ((error = lfs_reserve(fs, dvp, vp, LFS_NRESERVE(fs))) != 0) return (error); - restart: - simple_lock(&fs->lfs_interlock); + restart: + mutex_enter(&lfs_lock); if (fs->lfs_dirops == 0) { - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); lfs_check(dvp, LFS_UNUSED_LBN, 0); - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); } while (fs->lfs_writer) { - error = ltsleep(&fs->lfs_dirops, (PRIBIO + 1) | PCATCH, - "lfs_sdirop", 0, &fs->lfs_interlock); + error = mtsleep(&fs->lfs_dirops, (PRIBIO + 1) | PCATCH, + "lfs_sdirop", 0, &lfs_lock); if (error == EINTR) { - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); goto unreserve; } } - simple_lock(&lfs_subsys_lock); if (lfs_dirvcount > LFS_MAX_DIROP && fs->lfs_dirops == 0) { wakeup(&lfs_writer_daemon); - simple_unlock(&lfs_subsys_lock); - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); preempt(); goto restart; } if (lfs_dirvcount > LFS_MAX_DIROP) { - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); DLOG((DLOG_DIROP, "lfs_set_dirop: sleeping with dirops=%d, " "dirvcount=%d\n", fs->lfs_dirops, lfs_dirvcount)); - if ((error = ltsleep(&lfs_dirvcount, - PCATCH | PUSER | PNORELOCK, "lfs_maxdirop", 0, - &lfs_subsys_lock)) != 0) { + if ((error = mtsleep(&lfs_dirvcount, + PCATCH | PUSER | PNORELOCK, "lfs_maxdirop", 0, + &lfs_lock)) != 0) { goto unreserve; } goto restart; } - simple_unlock(&lfs_subsys_lock); ++fs->lfs_dirops; fs->lfs_doifile = 1; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); /* Hold a reference so SET_ENDOP will be happy */ vref(dvp); @@ -501,7 +498,7 @@ lfs_set_dirop_create(struct vnode *dvp, struct vnode **vpp) #define SET_ENDOP_BASE(fs, dvp, str) \ do { \ - simple_lock(&(fs)->lfs_interlock); \ + mutex_enter(&lfs_lock); \ --(fs)->lfs_dirops; \ if (!(fs)->lfs_dirops) { \ if ((fs)->lfs_nadirop) { \ @@ -510,10 +507,10 @@ lfs_set_dirop_create(struct vnode *dvp, struct vnode **vpp) (fs)->lfs_nadirop); \ } \ wakeup(&(fs)->lfs_writer); \ - simple_unlock(&(fs)->lfs_interlock); \ + mutex_exit(&lfs_lock); \ lfs_check((dvp), LFS_UNUSED_LBN, 0); \ } else \ - simple_unlock(&(fs)->lfs_interlock); \ + mutex_exit(&lfs_lock); \ } while(0) #define SET_ENDOP_CREATE(fs, dvp, nvpp, str) \ do { \ @@ -548,14 +545,13 @@ lfs_mark_vnode(struct vnode *vp) struct inode *ip = VTOI(vp); struct lfs *fs = ip->i_lfs; - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); if (!(ip->i_flag & IN_ADIROP)) { if (!(vp->v_uflag & VU_DIROP)) { + mutex_enter(&vp->v_interlock); (void)lfs_vref(vp); - simple_lock(&lfs_subsys_lock); ++lfs_dirvcount; ++fs->lfs_dirvcount; - simple_unlock(&lfs_subsys_lock); TAILQ_INSERT_TAIL(&fs->lfs_dchainhd, ip, i_lfs_dchain); vp->v_uflag |= VU_DIROP; } @@ -563,7 +559,7 @@ lfs_mark_vnode(struct vnode *vp) ip->i_flag |= IN_ADIROP; } else KASSERT(vp->v_uflag & VU_DIROP); - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); } void @@ -573,9 +569,9 @@ lfs_unmark_vnode(struct vnode *vp) if (ip && (ip->i_flag & IN_ADIROP)) { KASSERT(vp->v_uflag & VU_DIROP); - simple_lock(&ip->i_lfs->lfs_interlock); + mutex_enter(&lfs_lock); --ip->i_lfs->lfs_nadirop; - simple_unlock(&ip->i_lfs->lfs_interlock); + mutex_exit(&lfs_lock); ip->i_flag &= ~IN_ADIROP; } } @@ -668,7 +664,6 @@ lfs_mknod(void *v) /* Used to be vput, but that causes us to call VOP_INACTIVE twice. */ VOP_UNLOCK(*vpp, 0); - lfs_vunref(*vpp); (*vpp)->v_type = VNON; vgone(*vpp); error = VFS_VGET(mp, ino, vpp); @@ -971,10 +966,11 @@ lfs_setattr(void *v) static int lfs_wrapgo(struct lfs *fs, struct inode *ip, int waitfor) { - if (lockstatus(&fs->lfs_stoplock) != LK_EXCLUSIVE) + if (fs->lfs_stoplwp != curlwp) return EBUSY; - lockmgr(&fs->lfs_stoplock, LK_RELEASE, &fs->lfs_interlock); + fs->lfs_stoplwp = NULL; + cv_signal(&fs->lfs_stopcv); KASSERT(fs->lfs_nowrap > 0); if (fs->lfs_nowrap <= 0) { @@ -987,8 +983,8 @@ lfs_wrapgo(struct lfs *fs, struct inode *ip, int waitfor) lfs_wakeup_cleaner(fs); } if (waitfor) { - ltsleep(&fs->lfs_nextseg, PCATCH | PUSER, - "segment", 0, &fs->lfs_interlock); + mtsleep(&fs->lfs_nextseg, PCATCH | PUSER, "segment", + 0, &lfs_lock); } return 0; @@ -1011,11 +1007,11 @@ lfs_close(void *v) struct lfs *fs = ip->i_lfs; if ((ip->i_number == ROOTINO || ip->i_number == LFS_IFILE_INUM) && - lockstatus(&fs->lfs_stoplock) == LK_EXCLUSIVE) { - simple_lock(&fs->lfs_interlock); + fs->lfs_stoplwp == curlwp) { + mutex_enter(&lfs_lock); log(LOG_NOTICE, "lfs_close: releasing log wrap control\n"); lfs_wrapgo(fs, ip, 0); - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); } if (vp == ip->i_lfs->lfs_ivnode && @@ -1093,7 +1089,9 @@ lfs_reclaim(void *v) KASSERT(ip->i_nlink == ip->i_ffs_effnlink); + mutex_enter(&lfs_lock); LFS_CLR_UINO(ip, IN_ALLMOD); + mutex_exit(&lfs_lock); if ((error = ufs_reclaim(vp))) return (error); @@ -1101,7 +1099,7 @@ lfs_reclaim(void *v) * Take us off the paging and/or dirop queues if we were on them. * We shouldn't be on them. */ - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); if (ip->i_flags & IN_PAGING) { log(LOG_WARNING, "%s: reclaimed vnode is IN_PAGING\n", fs->lfs_fsmnt); @@ -1113,7 +1111,7 @@ lfs_reclaim(void *v) vp->v_uflag &= ~VU_DIROP; TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain); } - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); pool_put(&lfs_dinode_pool, ip->i_din.ffs1_din); lfs_deregister_all(vp); @@ -1167,6 +1165,7 @@ lfs_strategy(void *v) NULL); if (error) { bp->b_error = error; + bp->b_resid = bp->b_bcount; biodone(bp); return (error); } @@ -1174,14 +1173,15 @@ lfs_strategy(void *v) clrbuf(bp); } if ((long)bp->b_blkno < 0) { /* block is not on disk */ + bp->b_resid = bp->b_bcount; biodone(bp); return (0); } slept = 1; - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); while (slept && fs->lfs_seglock) { - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); /* * Look through list of intervals. * There will only be intervals to look through @@ -1205,29 +1205,29 @@ lfs_strategy(void *v) DLOG((DLOG_CLEAN, "lfs_strategy: sleeping on ino %d lbn %" PRId64 "\n", ip->i_number, bp->b_lblkno)); - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); if (LFS_SEGLOCK_HELD(fs) && fs->lfs_iocount) { /* Cleaner can't wait for itself */ - ltsleep(&fs->lfs_iocount, + mtsleep(&fs->lfs_iocount, (PRIBIO + 1) | PNORELOCK, "clean2", 0, - &fs->lfs_interlock); + &lfs_lock); slept = 1; break; } else if (fs->lfs_seglock) { - ltsleep(&fs->lfs_seglock, + mtsleep(&fs->lfs_seglock, (PRIBIO + 1) | PNORELOCK, "clean1", 0, - &fs->lfs_interlock); + &lfs_lock); slept = 1; break; } - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); } } - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); } - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); vp = ip->i_devvp; VOP_STRATEGY(vp, bp); @@ -1249,12 +1249,12 @@ lfs_flush_dirops(struct lfs *fs) if (fs->lfs_ronly) return; - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); if (TAILQ_FIRST(&fs->lfs_dchainhd) == NULL) { - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); return; } else - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); if (lfs_dostats) ++lfs_stats.flush_invoked; @@ -1281,10 +1281,10 @@ lfs_flush_dirops(struct lfs *fs) * no dirops are active. * */ - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) { nip = TAILQ_NEXT(ip, i_lfs_dchain); - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); vp = ITOV(ip); KASSERT((ip->i_flag & IN_ADIROP) == 0); @@ -1299,8 +1299,8 @@ lfs_flush_dirops(struct lfs *fs) * make sure that we don't clear IN_MODIFIED * unnecessarily. */ - if (vp->v_iflag & (VI_XLOCK | VI_FREEING)) { - simple_lock(&fs->lfs_interlock); + if (vp->v_iflag & VI_XLOCK) { + mutex_enter(&lfs_lock); continue; } waslocked = VOP_ISLOCKED(vp); @@ -1309,16 +1309,18 @@ lfs_flush_dirops(struct lfs *fs) lfs_writefile(fs, sp, vp); if (!VPISEMPTY(vp) && !WRITEINPROG(vp) && !(ip->i_flag & IN_ALLMOD)) { + mutex_enter(&lfs_lock); LFS_SET_UINO(ip, IN_MODIFIED); + mutex_exit(&lfs_lock); } } KDASSERT(ip->i_number != LFS_IFILE_INUM); (void) lfs_writeinode(fs, sp, ip); + mutex_enter(&lfs_lock); if (waslocked == LK_EXCLOTHER) LFS_SET_UINO(ip, IN_MODIFIED); - simple_lock(&fs->lfs_interlock); } - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); /* We've written all the dirops there are */ ((SEGSUM *)(sp->segsum))->ss_flags &= ~(SS_CONT); lfs_finalize_fs_seguse(fs); @@ -1347,12 +1349,12 @@ lfs_flush_pchain(struct lfs *fs) if (fs->lfs_ronly) return; - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); if (TAILQ_FIRST(&fs->lfs_pchainhd) == NULL) { - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); return; } else - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); /* Get dirops out of the way */ lfs_flush_dirops(fs); @@ -1373,8 +1375,8 @@ lfs_flush_pchain(struct lfs *fs) * We're very conservative about what we write; we want to be * fast and async. */ - simple_lock(&fs->lfs_interlock); - top: + mutex_enter(&lfs_lock); + top: for (ip = TAILQ_FIRST(&fs->lfs_pchainhd); ip != NULL; ip = nip) { nip = TAILQ_NEXT(ip, i_lfs_pchain); vp = ITOV(ip); @@ -1382,24 +1384,31 @@ lfs_flush_pchain(struct lfs *fs) if (!(ip->i_flags & IN_PAGING)) goto top; - if ((vp->v_iflag|vp->v_uflag) & (VI_XLOCK|VU_DIROP)) + mutex_enter(&vp->v_interlock); + if ((vp->v_iflag & VI_XLOCK) || (vp->v_uflag & VU_DIROP) != 0) { + mutex_exit(&vp->v_interlock); continue; - if (vp->v_type != VREG) + } + if (vp->v_type != VREG) { + mutex_exit(&vp->v_interlock); continue; + } if (lfs_vref(vp)) continue; - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); if (VOP_ISLOCKED(vp)) { lfs_vunref(vp); - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); continue; } error = lfs_writefile(fs, sp, vp); if (!VPISEMPTY(vp) && !WRITEINPROG(vp) && !(ip->i_flag & IN_ALLMOD)) { + mutex_enter(&lfs_lock); LFS_SET_UINO(ip, IN_MODIFIED); + mutex_exit(&lfs_lock); } KDASSERT(ip->i_number != LFS_IFILE_INUM); (void) lfs_writeinode(fs, sp, ip); @@ -1408,12 +1417,12 @@ lfs_flush_pchain(struct lfs *fs) if (error == EAGAIN) { lfs_writeseg(fs, sp); - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); break; } - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); } - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); (void) lfs_writeseg(fs, sp); lfs_segunlock(fs); } @@ -1470,201 +1479,206 @@ lfs_fcntl(void *v) switch (ap->a_command) { case LFCNSEGWAITALL: case LFCNSEGWAITALL_COMPAT: - fsidp = NULL; - /* FALLSTHROUGH */ + fsidp = NULL; + /* FALLSTHROUGH */ case LFCNSEGWAIT: case LFCNSEGWAIT_COMPAT: - tvp = (struct timeval *)ap->a_data; - simple_lock(&fs->lfs_interlock); - ++fs->lfs_sleepers; - simple_unlock(&fs->lfs_interlock); + tvp = (struct timeval *)ap->a_data; + mutex_enter(&lfs_lock); + ++fs->lfs_sleepers; + mutex_exit(&lfs_lock); - error = lfs_segwait(fsidp, tvp); + error = lfs_segwait(fsidp, tvp); - simple_lock(&fs->lfs_interlock); - if (--fs->lfs_sleepers == 0) - wakeup(&fs->lfs_sleepers); - simple_unlock(&fs->lfs_interlock); - return error; + mutex_enter(&lfs_lock); + if (--fs->lfs_sleepers == 0) + wakeup(&fs->lfs_sleepers); + mutex_exit(&lfs_lock); + return error; case LFCNBMAPV: case LFCNMARKV: - blkvp = *(struct lfs_fcntl_markv *)ap->a_data; + blkvp = *(struct lfs_fcntl_markv *)ap->a_data; - blkcnt = blkvp.blkcnt; - if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT) - return (EINVAL); - blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV); - if ((error = copyin(blkvp.blkiov, blkiov, - blkcnt * sizeof(BLOCK_INFO))) != 0) { - lfs_free(fs, blkiov, LFS_NB_BLKIOV); - return error; - } + blkcnt = blkvp.blkcnt; + if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT) + return (EINVAL); + blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV); + if ((error = copyin(blkvp.blkiov, blkiov, + blkcnt * sizeof(BLOCK_INFO))) != 0) { + lfs_free(fs, blkiov, LFS_NB_BLKIOV); + return error; + } - simple_lock(&fs->lfs_interlock); - ++fs->lfs_sleepers; - simple_unlock(&fs->lfs_interlock); - if (ap->a_command == LFCNBMAPV) - error = lfs_bmapv(l->l_proc, fsidp, blkiov, blkcnt); - else /* LFCNMARKV */ - error = lfs_markv(l->l_proc, fsidp, blkiov, blkcnt); - if (error == 0) - error = copyout(blkiov, blkvp.blkiov, - blkcnt * sizeof(BLOCK_INFO)); - simple_lock(&fs->lfs_interlock); - if (--fs->lfs_sleepers == 0) - wakeup(&fs->lfs_sleepers); - simple_unlock(&fs->lfs_interlock); - lfs_free(fs, blkiov, LFS_NB_BLKIOV); - return error; + mutex_enter(&lfs_lock); + ++fs->lfs_sleepers; + mutex_exit(&lfs_lock); + if (ap->a_command == LFCNBMAPV) + error = lfs_bmapv(l->l_proc, fsidp, blkiov, blkcnt); + else /* LFCNMARKV */ + error = lfs_markv(l->l_proc, fsidp, blkiov, blkcnt); + if (error == 0) + error = copyout(blkiov, blkvp.blkiov, + blkcnt * sizeof(BLOCK_INFO)); + mutex_enter(&lfs_lock); + if (--fs->lfs_sleepers == 0) + wakeup(&fs->lfs_sleepers); + mutex_exit(&lfs_lock); + lfs_free(fs, blkiov, LFS_NB_BLKIOV); + return error; case LFCNRECLAIM: - /* - * Flush dirops and write Ifile, allowing empty segments - * to be immediately reclaimed. - */ - lfs_writer_enter(fs, "pndirop"); - off = fs->lfs_offset; - lfs_seglock(fs, SEGM_FORCE_CKP | SEGM_CKP); - lfs_flush_dirops(fs); - LFS_CLEANERINFO(cip, fs, bp); - oclean = cip->clean; - LFS_SYNC_CLEANERINFO(cip, fs, bp, 1); - lfs_segwrite(ap->a_vp->v_mount, SEGM_FORCE_CKP); - fs->lfs_sp->seg_flags |= SEGM_PROT; - lfs_segunlock(fs); - lfs_writer_leave(fs); + /* + * Flush dirops and write Ifile, allowing empty segments + * to be immediately reclaimed. + */ + lfs_writer_enter(fs, "pndirop"); + off = fs->lfs_offset; + lfs_seglock(fs, SEGM_FORCE_CKP | SEGM_CKP); + lfs_flush_dirops(fs); + LFS_CLEANERINFO(cip, fs, bp); + oclean = cip->clean; + LFS_SYNC_CLEANERINFO(cip, fs, bp, 1); + lfs_segwrite(ap->a_vp->v_mount, SEGM_FORCE_CKP); + fs->lfs_sp->seg_flags |= SEGM_PROT; + lfs_segunlock(fs); + lfs_writer_leave(fs); #ifdef DEBUG - LFS_CLEANERINFO(cip, fs, bp); - DLOG((DLOG_CLEAN, "lfs_fcntl: reclaim wrote %" PRId64 - " blocks, cleaned %" PRId32 " segments (activesb %d)\n", - fs->lfs_offset - off, cip->clean - oclean, - fs->lfs_activesb)); - LFS_SYNC_CLEANERINFO(cip, fs, bp, 0); + LFS_CLEANERINFO(cip, fs, bp); + DLOG((DLOG_CLEAN, "lfs_fcntl: reclaim wrote %" PRId64 + " blocks, cleaned %" PRId32 " segments (activesb %d)\n", + fs->lfs_offset - off, cip->clean - oclean, + fs->lfs_activesb)); + LFS_SYNC_CLEANERINFO(cip, fs, bp, 0); #endif - return 0; + return 0; #ifdef COMPAT_30 case LFCNIFILEFH_COMPAT: - /* Return the filehandle of the Ifile */ - if ((error = kauth_authorize_generic(l->l_cred, - KAUTH_GENERIC_ISSUSER, NULL)) != 0) - return (error); - fhp = (struct fhandle *)ap->a_data; - fhp->fh_fsid = *fsidp; - fh_size = 16; /* former VFS_MAXFIDSIZ */ - return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size); + /* Return the filehandle of the Ifile */ + if ((error = kauth_authorize_generic(l->l_cred, + KAUTH_GENERIC_ISSUSER, NULL)) != 0) + return (error); + fhp = (struct fhandle *)ap->a_data; + fhp->fh_fsid = *fsidp; + fh_size = 16; /* former VFS_MAXFIDSIZ */ + return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size); #endif case LFCNIFILEFH_COMPAT2: case LFCNIFILEFH: - /* Return the filehandle of the Ifile */ - fhp = (struct fhandle *)ap->a_data; - fhp->fh_fsid = *fsidp; - fh_size = sizeof(struct lfs_fhandle) - - offsetof(fhandle_t, fh_fid); - return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size); + /* Return the filehandle of the Ifile */ + fhp = (struct fhandle *)ap->a_data; + fhp->fh_fsid = *fsidp; + fh_size = sizeof(struct lfs_fhandle) - + offsetof(fhandle_t, fh_fid); + return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size); case LFCNREWIND: - /* Move lfs_offset to the lowest-numbered segment */ - return lfs_rewind(fs, *(int *)ap->a_data); + /* Move lfs_offset to the lowest-numbered segment */ + return lfs_rewind(fs, *(int *)ap->a_data); case LFCNINVAL: - /* Mark a segment SEGUSE_INVAL */ - LFS_SEGENTRY(sup, fs, *(int *)ap->a_data, bp); - if (sup->su_nbytes > 0) { - brelse(bp, 0); - lfs_unset_inval_all(fs); - return EBUSY; - } - sup->su_flags |= SEGUSE_INVAL; - VOP_BWRITE(bp); - return 0; + /* Mark a segment SEGUSE_INVAL */ + LFS_SEGENTRY(sup, fs, *(int *)ap->a_data, bp); + if (sup->su_nbytes > 0) { + brelse(bp, 0); + lfs_unset_inval_all(fs); + return EBUSY; + } + sup->su_flags |= SEGUSE_INVAL; + VOP_BWRITE(bp); + return 0; case LFCNRESIZE: - /* Resize the filesystem */ - return lfs_resize_fs(fs, *(int *)ap->a_data); + /* Resize the filesystem */ + return lfs_resize_fs(fs, *(int *)ap->a_data); case LFCNWRAPSTOP: case LFCNWRAPSTOP_COMPAT: - /* - * Hold lfs_newseg at segment 0; if requested, sleep until - * the filesystem wraps around. To support external agents - * (dump, fsck-based regression test) that need to look at - * a snapshot of the filesystem, without necessarily - * requiring that all fs activity stops. - */ - if (lockstatus(&fs->lfs_stoplock)) - return EALREADY; + /* + * Hold lfs_newseg at segment 0; if requested, sleep until + * the filesystem wraps around. To support external agents + * (dump, fsck-based regression test) that need to look at + * a snapshot of the filesystem, without necessarily + * requiring that all fs activity stops. + */ + if (fs->lfs_stoplwp == curlwp) + return EALREADY; - simple_lock(&fs->lfs_interlock); - lockmgr(&fs->lfs_stoplock, LK_EXCLUSIVE, &fs->lfs_interlock); - if (fs->lfs_nowrap == 0) - log(LOG_NOTICE, "%s: disabled log wrap\n", fs->lfs_fsmnt); - ++fs->lfs_nowrap; - if (*(int *)ap->a_data == 1 || - ap->a_command == LFCNWRAPSTOP_COMPAT) { - log(LOG_NOTICE, "LFCNSTOPWRAP waiting for log wrap\n"); - error = ltsleep(&fs->lfs_nowrap, PCATCH | PUSER, - "segwrap", 0, &fs->lfs_interlock); - log(LOG_NOTICE, "LFCNSTOPWRAP done waiting\n"); - if (error) { - lfs_wrapgo(fs, VTOI(ap->a_vp), 0); - } - } - simple_unlock(&fs->lfs_interlock); - return 0; + mutex_enter(&lfs_lock); + while (fs->lfs_stoplwp != NULL) + cv_wait(&fs->lfs_stopcv, &lfs_lock); + fs->lfs_stoplwp = curlwp; + if (fs->lfs_nowrap == 0) + log(LOG_NOTICE, "%s: disabled log wrap\n", fs->lfs_fsmnt); + ++fs->lfs_nowrap; + if (*(int *)ap->a_data == 1 || + ap->a_command == LFCNWRAPSTOP_COMPAT) { + log(LOG_NOTICE, "LFCNSTOPWRAP waiting for log wrap\n"); + error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER, + "segwrap", 0, &lfs_lock); + log(LOG_NOTICE, "LFCNSTOPWRAP done waiting\n"); + if (error) { + lfs_wrapgo(fs, VTOI(ap->a_vp), 0); + } + } + mutex_exit(&lfs_lock); + return 0; case LFCNWRAPGO: case LFCNWRAPGO_COMPAT: - /* - * Having done its work, the agent wakes up the writer. - * If the argument is 1, it sleeps until a new segment - * is selected. - */ - simple_lock(&fs->lfs_interlock); - error = lfs_wrapgo(fs, VTOI(ap->a_vp), - (ap->a_command == LFCNWRAPGO_COMPAT ? 1 : - *((int *)ap->a_data))); - simple_unlock(&fs->lfs_interlock); - return error; + /* + * Having done its work, the agent wakes up the writer. + * If the argument is 1, it sleeps until a new segment + * is selected. + */ + mutex_enter(&lfs_lock); + error = lfs_wrapgo(fs, VTOI(ap->a_vp), + (ap->a_command == LFCNWRAPGO_COMPAT ? 1 : + *((int *)ap->a_data))); + mutex_exit(&lfs_lock); + return error; case LFCNWRAPPASS: - if (lockstatus(&fs->lfs_stoplock) != LK_EXCLUSIVE) - return EALREADY; - if ((VTOI(ap->a_vp)->i_lfs_iflags & LFSI_WRAPWAIT)) - return EALREADY; - simple_lock(&fs->lfs_interlock); - if (fs->lfs_nowrap == 0) { - simple_unlock(&fs->lfs_interlock); - return EBUSY; - } - fs->lfs_wrappass = 1; - wakeup(&fs->lfs_wrappass); - /* Wait for the log to wrap, if asked */ - if (*(int *)ap->a_data) { - lfs_vref(ap->a_vp); - VTOI(ap->a_vp)->i_lfs_iflags |= LFSI_WRAPWAIT; - log(LOG_NOTICE, "LFCNPASS waiting for log wrap\n"); - error = ltsleep(&fs->lfs_nowrap, PCATCH | PUSER, - "segwrap", 0, &fs->lfs_interlock); - log(LOG_NOTICE, "LFCNPASS done waiting\n"); - VTOI(ap->a_vp)->i_lfs_iflags &= ~LFSI_WRAPWAIT; - lfs_vunref(ap->a_vp); - } - simple_unlock(&fs->lfs_interlock); - return error; + if ((VTOI(ap->a_vp)->i_lfs_iflags & LFSI_WRAPWAIT)) + return EALREADY; + mutex_enter(&lfs_lock); + if (fs->lfs_stoplwp != curlwp) { + mutex_exit(&lfs_lock); + return EALREADY; + } + if (fs->lfs_nowrap == 0) { + mutex_exit(&lfs_lock); + return EBUSY; + } + fs->lfs_wrappass = 1; + wakeup(&fs->lfs_wrappass); + /* Wait for the log to wrap, if asked */ + if (*(int *)ap->a_data) { + mutex_enter(&ap->a_vp->v_interlock); + lfs_vref(ap->a_vp); + VTOI(ap->a_vp)->i_lfs_iflags |= LFSI_WRAPWAIT; + log(LOG_NOTICE, "LFCNPASS waiting for log wrap\n"); + error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER, + "segwrap", 0, &lfs_lock); + log(LOG_NOTICE, "LFCNPASS done waiting\n"); + VTOI(ap->a_vp)->i_lfs_iflags &= ~LFSI_WRAPWAIT; + lfs_vunref(ap->a_vp); + } + mutex_exit(&lfs_lock); + return error; case LFCNWRAPSTATUS: - simple_lock(&fs->lfs_interlock); - *(int *)ap->a_data = fs->lfs_wrapstatus; - simple_unlock(&fs->lfs_interlock); - return 0; + mutex_enter(&lfs_lock); + *(int *)ap->a_data = fs->lfs_wrapstatus; + mutex_exit(&lfs_lock); + return 0; default: - return ufs_fcntl(v); + return ufs_fcntl(v); } return 0; } @@ -1688,7 +1702,9 @@ lfs_getpages(void *v) return EPERM; } if ((ap->a_access_type & VM_PROT_WRITE) != 0) { + mutex_enter(&lfs_lock); LFS_SET_UINO(VTOI(ap->a_vp), IN_MODIFIED); + mutex_exit(&lfs_lock); } /* @@ -1728,7 +1744,7 @@ wait_for_page(struct vnode *vp, struct vm_page *pg, const char *label) pg->flags |= PG_WANTED; UVM_UNLOCK_AND_WAIT(pg, &vp->v_interlock, 0, "lfsput", 0); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); } /* @@ -1756,7 +1772,7 @@ write_and_wait(struct lfs *fs, struct vnode *vp, struct vm_page *pg, return; while (pg->flags & PG_BUSY) { - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); if (sp->cbpp - sp->bpp > 1) { /* Write gathered pages */ lfs_updatemeta(sp); @@ -1771,7 +1787,7 @@ write_and_wait(struct lfs *fs, struct vnode *vp, struct vm_page *pg, ip->i_gen); } ++count; - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); wait_for_page(vp, pg, label); } if (label != NULL && count > 1) @@ -1916,9 +1932,9 @@ check_dirty(struct lfs *fs, struct vnode *vp, * Wire the page so that * pdaemon doesn't see it again. */ - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); uvm_pagewire(pg); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); /* Suspended write flag */ pg->flags |= PG_DELWRI; @@ -2003,7 +2019,6 @@ lfs_putpages(void *v) struct segment *sp; off_t origoffset, startoffset, endoffset, origendoffset, blkeof; off_t off, max_endoffset; - int s; bool seglocked, sync, pagedaemon; struct vm_page *pg, *busypg; UVMHIST_FUNC("lfs_putpages"); UVMHIST_CALLED(ubchist); @@ -2019,7 +2034,7 @@ lfs_putpages(void *v) /* Putpages does nothing for metadata. */ if (vp == fs->lfs_ivnode || vp->v_type != VREG) { - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); return 0; } @@ -2027,23 +2042,21 @@ lfs_putpages(void *v) * If there are no pages, don't do anything. */ if (vp->v_uobj.uo_npages == 0) { - s = splbio(); if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_iflag & VI_ONWORKLST) && LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { vp->v_iflag &= ~VI_WRMAPDIRTY; vn_syncer_remove_from_worklist(vp); } - splx(s); - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); /* Remove us from paging queue, if we were on it */ - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); if (ip->i_flags & IN_PAGING) { ip->i_flags &= ~IN_PAGING; TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); } - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); return 0; } @@ -2065,15 +2078,15 @@ lfs_putpages(void *v) pg->flags |= PG_WANTED; UVM_UNLOCK_AND_WAIT(pg, &vp->v_interlock, 0, "lfsput2", 0); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); } - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); uvm_pageactivate(pg); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); } ap->a_offlo = blkeof; if (ap->a_offhi > 0 && ap->a_offhi <= ap->a_offlo) { - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); return 0; } } @@ -2099,7 +2112,7 @@ lfs_putpages(void *v) KASSERT(startoffset > 0 || endoffset >= startoffset); if (startoffset == endoffset) { /* Nothing to do, why were we called? */ - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); DLOG((DLOG_PAGE, "lfs_putpages: startoffset = endoffset = %" PRId64 "\n", startoffset)); return 0; @@ -2133,7 +2146,7 @@ lfs_putpages(void *v) ap->a_flags, 1, NULL); if (r < 0) { /* Pages are busy with another process */ - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); return EDEADLK; } if (r > 0) /* Some pages are dirty */ @@ -2153,7 +2166,7 @@ lfs_putpages(void *v) return r; /* One of the pages was busy. Start over. */ - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); wait_for_page(vp, busypg, "dirtyclean"); #ifdef DEBUG ++debug_n_dirtyclean; @@ -2174,16 +2187,14 @@ lfs_putpages(void *v) * notice the pager inode queue and act on that. */ if (pagedaemon) { - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); if (!(ip->i_flags & IN_PAGING)) { ip->i_flags |= IN_PAGING; TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip, i_lfs_pchain); } - simple_lock(&lfs_subsys_lock); wakeup(&lfs_writer_daemon); - simple_unlock(&lfs_subsys_lock); - simple_unlock(&fs->lfs_interlock); - simple_unlock(&vp->v_interlock); + mutex_exit(&lfs_lock); + mutex_exit(&vp->v_interlock); preempt(); return EWOULDBLOCK; } @@ -2200,19 +2211,19 @@ lfs_putpages(void *v) DLOG((DLOG_PAGE, "lfs_putpages: flushing VU_DIROP\n")); locked = (VOP_ISLOCKED(vp) == LK_EXCLUSIVE); - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); lfs_writer_enter(fs, "ppdirop"); if (locked) VOP_UNLOCK(vp, 0); /* XXX why? */ - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); lfs_flush_fs(fs, sync ? SEGM_SYNC : 0); - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); if (locked) { VOP_LOCK(vp, LK_EXCLUSIVE | LK_INTERLOCK); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); } lfs_writer_leave(fs); @@ -2242,11 +2253,11 @@ lfs_putpages(void *v) */ seglocked = (ap->a_flags & PGO_LOCKED) != 0; if (!seglocked) { - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); error = lfs_seglock(fs, SEGM_PROT | (sync ? SEGM_SYNC : 0)); if (error != 0) return error; - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); lfs_acquire_finfo(fs, ip->i_number, ip->i_gen); } sp = fs->lfs_sp; @@ -2273,9 +2284,9 @@ lfs_putpages(void *v) busypg = NULL; if (check_dirty(fs, vp, startoffset, endoffset, blkeof, ap->a_flags, 0, &busypg) < 0) { - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); write_and_wait(fs, vp, busypg, seglocked, NULL); if (!seglocked) { lfs_release_finfo(fs); @@ -2295,7 +2306,7 @@ lfs_putpages(void *v) ip->i_number, fs->lfs_offset, dtosn(fs, fs->lfs_offset))); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); write_and_wait(fs, vp, busypg, seglocked, "again"); } #ifdef DEBUG @@ -2343,12 +2354,12 @@ lfs_putpages(void *v) * Remove us from paging queue if we wrote all our pages. */ if (origendoffset == 0 || ap->a_flags & PGO_ALLPAGES) { - simple_lock(&fs->lfs_interlock); + mutex_enter(&lfs_lock); if (ip->i_flags & IN_PAGING) { ip->i_flags &= ~IN_PAGING; TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); } - simple_unlock(&fs->lfs_interlock); + mutex_exit(&lfs_lock); } /* @@ -2365,17 +2376,13 @@ lfs_putpages(void *v) * aiodoned might not have got around to our buffers yet. */ if (sync) { - s = splbio(); - simple_lock(&global_v_numoutput_slock); + mutex_enter(&vp->v_interlock); while (vp->v_numoutput > 0) { DLOG((DLOG_PAGE, "lfs_putpages: ino %d sleeping on" " num %d\n", ip->i_number, vp->v_numoutput)); - vp->v_iflag |= VI_BWAIT; - ltsleep(&vp->v_numoutput, PRIBIO + 1, "lfs_vn", 0, - &global_v_numoutput_slock); + cv_wait(&vp->v_cv, &vp->v_interlock); } - simple_unlock(&global_v_numoutput_slock); - splx(s); + mutex_exit(&vp->v_interlock); } return error; } diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h index 79a050ead6aa..4b79ed6233ac 100644 --- a/sys/ufs/ufs/inode.h +++ b/sys/ufs/ufs/inode.h @@ -1,4 +1,4 @@ -/* $NetBSD: inode.h,v 1.48 2007/04/09 12:21:24 pooka Exp $ */ +/* $NetBSD: inode.h,v 1.49 2008/01/02 11:49:13 ad Exp $ */ /* * Copyright (c) 1982, 1989, 1993 @@ -128,6 +128,7 @@ struct inode { int32_t i_gen; /* Generation number. */ u_int32_t i_uid; /* File owner. */ u_int32_t i_gid; /* File group. */ + u_int16_t i_omode; /* Old mode, for ufs_reclaim */ struct dirhash *i_dirhash; /* Hashing for large directories */ diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c index 47aa03b04863..2d22ad2ab16a 100644 --- a/sys/ufs/ufs/ufs_bmap.c +++ b/sys/ufs/ufs/ufs_bmap.c @@ -1,4 +1,4 @@ -/* $NetBSD: ufs_bmap.c,v 1.46 2007/10/08 18:01:31 ad Exp $ */ +/* $NetBSD: ufs_bmap.c,v 1.47 2008/01/02 11:49:13 ad Exp $ */ /* * Copyright (c) 1989, 1991, 1993 @@ -37,7 +37,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ufs_bmap.c,v 1.46 2007/10/08 18:01:31 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ufs_bmap.c,v 1.47 2008/01/02 11:49:13 ad Exp $"); #include #include @@ -119,7 +119,7 @@ ufs_bmaparray(struct vnode *vp, daddr_t bn, daddr_t *bnp, struct indir *ap, int *nump, int *runp, ufs_issequential_callback_t is_sequential) { struct inode *ip; - struct buf *bp; + struct buf *bp, *cbp; struct ufsmount *ump; struct mount *mp; struct indir a[NIADDR + 1], *xap; @@ -219,8 +219,16 @@ ufs_bmaparray(struct vnode *vp, daddr_t bn, daddr_t *bnp, struct indir *ap, */ metalbn = xap->in_lbn; - if ((daddr == 0 && !incore(vp, metalbn)) || metalbn == bn) + if (metalbn == bn) break; + if (daddr == 0) { + mutex_enter(&bufcache_lock); + cbp = incore(vp, metalbn); + mutex_exit(&bufcache_lock); + if (cbp == NULL) + break; + } + /* * If we get here, we've either got the block in the cache * or we have a disk address for it, go fetch it. @@ -240,7 +248,7 @@ ufs_bmaparray(struct vnode *vp, daddr_t bn, daddr_t *bnp, struct indir *ap, return (ENOMEM); } - if (bp->b_flags & (B_DONE | B_DELWRI)) { + if (bp->b_oflags & (BO_DONE | BO_DELWRI)) { trace(TR_BREADHIT, pack(vp, size), metalbn); } #ifdef DIAGNOSTIC diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h index e59f13cc60bb..c64b52d57e16 100644 --- a/sys/ufs/ufs/ufs_extern.h +++ b/sys/ufs/ufs/ufs_extern.h @@ -1,4 +1,4 @@ -/* $NetBSD: ufs_extern.h,v 1.55 2007/11/26 19:02:34 pooka Exp $ */ +/* $NetBSD: ufs_extern.h,v 1.56 2008/01/02 11:49:13 ad Exp $ */ /*- * Copyright (c) 1991, 1993, 1994 @@ -34,6 +34,8 @@ #ifndef _UFS_UFS_EXTERN_H_ #define _UFS_UFS_EXTERN_H_ +#include + struct buf; struct componentname; struct direct; @@ -186,4 +188,6 @@ void softdep_releasefile(struct inode *); __END_DECLS +extern kmutex_t ufs_ihash_lock; + #endif /* !_UFS_UFS_EXTERN_H_ */ diff --git a/sys/ufs/ufs/ufs_ihash.c b/sys/ufs/ufs/ufs_ihash.c index b9bd3cd897c5..c8928e2513bb 100644 --- a/sys/ufs/ufs/ufs_ihash.c +++ b/sys/ufs/ufs/ufs_ihash.c @@ -1,4 +1,4 @@ -/* $NetBSD: ufs_ihash.c,v 1.23 2007/05/28 23:42:56 ad Exp $ */ +/* $NetBSD: ufs_ihash.c,v 1.24 2008/01/02 11:49:13 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1991, 1993 @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ufs_ihash.c,v 1.23 2007/05/28 23:42:56 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ufs_ihash.c,v 1.24 2008/01/02 11:49:13 ad Exp $"); #include #include @@ -116,13 +116,13 @@ ufs_ihashlookup(dev_t dev, ino_t inum) struct inode *ip; struct ihashhead *ipp; - mutex_enter(&ufs_ihash_lock); + KASSERT(mutex_owned(&ufs_ihash_lock)); + ipp = &ihashtbl[INOHASH(dev, inum)]; LIST_FOREACH(ip, ipp, i_hash) { if (inum == ip->i_number && dev == ip->i_dev) break; } - mutex_exit(&ufs_ihash_lock); if (ip) return (ITOV(ip)); return (NULLVP); @@ -148,7 +148,7 @@ ufs_ihashget(dev_t dev, ino_t inum, int flags) if (flags == 0) { mutex_exit(&ufs_ihash_lock); } else { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); mutex_exit(&ufs_ihash_lock); if (vget(vp, flags | LK_INTERLOCK)) goto loop; diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c index 36cf59066bec..e43c4ac864ad 100644 --- a/sys/ufs/ufs/ufs_inode.c +++ b/sys/ufs/ufs/ufs_inode.c @@ -1,4 +1,4 @@ -/* $NetBSD: ufs_inode.c,v 1.71 2007/12/08 19:29:56 pooka Exp $ */ +/* $NetBSD: ufs_inode.c,v 1.72 2008/01/02 11:49:14 ad Exp $ */ /* * Copyright (c) 1991, 1993 @@ -37,7 +37,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.71 2007/12/08 19:29:56 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.72 2008/01/02 11:49:14 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_ffs.h" @@ -77,16 +77,15 @@ ufs_inactive(void *v) { struct vop_inactive_args /* { struct vnode *a_vp; - struct lwp *a_l; + struct bool *a_recycle; } */ *ap = v; struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); struct mount *transmp; - struct lwp *l = curlwp; mode_t mode; int error = 0; - if (prtactive && vp->v_usecount != 0) + if (prtactive && vp->v_usecount > 1) vprint("ufs_inactive: pushing active", vp); transmp = vp->v_mount; @@ -118,28 +117,24 @@ ufs_inactive(void *v) DIP_ASSIGN(ip, rdev, 0); mode = ip->i_mode; ip->i_mode = 0; + ip->i_omode = mode; DIP_ASSIGN(ip, mode, 0); ip->i_flag |= IN_CHANGE | IN_UPDATE; - simple_lock(&vp->v_interlock); - vp->v_iflag |= VI_FREEING; - simple_unlock(&vp->v_interlock); if (DOINGSOFTDEP(vp)) softdep_change_linkcnt(ip); - UFS_VFREE(vp, ip->i_number, mode); - } - - if (ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) { + /* + * Defer final inode free and update to ufs_reclaim(). + */ + } else if (ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) { UFS_UPDATE(vp, NULL, NULL, 0); } out: - VOP_UNLOCK(vp, 0); /* * If we are done with the inode, reclaim it * so that it can be reused immediately. */ - - if (ip->i_mode == 0) - vrecycle(vp, NULL, l); + *ap->a_recycle = (ip->i_mode == 0); + VOP_UNLOCK(vp, 0); fstrans_done(transmp); return (error); } @@ -152,15 +147,12 @@ ufs_reclaim(struct vnode *vp) { struct inode *ip = VTOI(vp); - if (prtactive && vp->v_usecount != 0) + if (prtactive && vp->v_usecount > 1) vprint("ufs_reclaim: pushing active", vp); UFS_UPDATE(vp, NULL, NULL, UPDATE_CLOSE); - - /* - * Remove the inode from its hash chain. - */ ufs_ihashrem(ip); + /* * Purge old data structures associated with the inode. */ @@ -223,23 +215,23 @@ ufs_balloc_range(struct vnode *vp, off_t off, off_t len, kauth_cred_t cred, pgssize = npages * sizeof(struct vm_page *); pgs = kmem_zalloc(pgssize, KM_SLEEP); - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); error = VOP_GETPAGES(vp, pagestart, pgs, &npages, 0, VM_PROT_WRITE, 0, PGO_SYNCIO|PGO_PASTEOF|PGO_NOBLOCKALLOC|PGO_NOTIMESTAMP); if (error) { goto out; } - simple_lock(&uobj->vmobjlock); - uvm_lock_pageq(); + mutex_enter(&uobj->vmobjlock); + mutex_enter(&uvm_pageqlock); for (i = 0; i < npages; i++) { UVMHIST_LOG(ubchist, "got pgs[%d] %p", i, pgs[i],0,0); KASSERT((pgs[i]->flags & PG_RELEASED) == 0); pgs[i]->flags &= ~PG_CLEAN; uvm_pageactivate(pgs[i]); } - uvm_unlock_pageq(); - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uvm_pageqlock); + mutex_exit(&uobj->vmobjlock); /* * adjust off to be block-aligned. @@ -263,7 +255,7 @@ ufs_balloc_range(struct vnode *vp, off_t off, off_t len, kauth_cred_t cred, */ GOP_SIZE(vp, off + len, &eob, 0); - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); for (i = 0; i < npages; i++) { if (error) { pgs[i]->flags |= PG_RELEASED; @@ -273,15 +265,15 @@ ufs_balloc_range(struct vnode *vp, off_t off, off_t len, kauth_cred_t cred, } } if (error) { - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); uvm_page_unbusy(pgs, npages); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); } else { uvm_page_unbusy(pgs, npages); } - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); out: - kmem_free(pgs, pgssize); + kmem_free(pgs, pgssize); return error; } diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c index 2eb031aa34a0..5c1cdc049f5e 100644 --- a/sys/ufs/ufs/ufs_quota.c +++ b/sys/ufs/ufs/ufs_quota.c @@ -1,4 +1,4 @@ -/* $NetBSD: ufs_quota.c,v 1.52 2007/12/08 19:29:57 pooka Exp $ */ +/* $NetBSD: ufs_quota.c,v 1.53 2008/01/02 11:49:14 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1990, 1993, 1995 @@ -35,7 +35,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ufs_quota.c,v 1.52 2007/12/08 19:29:57 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ufs_quota.c,v 1.53 2008/01/02 11:49:14 ad Exp $"); #include #include @@ -412,8 +412,7 @@ int quotaon(struct lwp *l, struct mount *mp, int type, void *fname) { struct ufsmount *ump = VFSTOUFS(mp); - struct vnode *vp, **vpp; - struct vnode *nextvp; + struct vnode *vp, **vpp, *mvp; struct dquot *dq; int error; struct nameidata nd; @@ -453,29 +452,43 @@ quotaon(struct lwp *l, struct mount *mp, int type, void *fname) ump->um_itime[type] = dq->dq_itime; dqrele(NULLVP, dq); } + /* Allocate a marker vnode. */ + if ((mvp = valloc(mp)) == NULL) { + error = ENOMEM; + goto out; + } /* * Search vnodes associated with this mount point, * adding references to quota file being opened. * NB: only need to add dquot's for inodes being modified. */ + mutex_enter(&mntvnode_lock); again: - TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { - nextvp = TAILQ_NEXT(vp, v_mntvnodes); - if (vp->v_mount != mp) - goto again; - if (vp->v_type == VNON ||vp->v_writecount == 0) + for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) { + vmark(mvp, vp); + mutex_enter(&vp->v_interlock); + if (vp->v_mount != mp || vismarker(vp) || + vp->v_type == VNON || vp->v_writecount == 0) { + mutex_exit(&vp->v_interlock); continue; - if (vget(vp, LK_EXCLUSIVE)) + } + mutex_exit(&mntvnode_lock); + if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { + mutex_enter(&mntvnode_lock); + (void)vunmark(mvp); goto again; + } if ((error = getinoquota(VTOI(vp))) != 0) { vput(vp); + mutex_enter(&mntvnode_lock); + (void)vunmark(mvp); break; } vput(vp); - /* if the list changed, start again */ - if (TAILQ_NEXT(vp, v_mntvnodes) != nextvp) - goto again; } + mutex_exit(&mntvnode_lock); + vfree(mvp); + out: mutex_enter(&dqlock); ump->um_qflags[type] &= ~QTF_OPENING; cv_broadcast(&dqcv); @@ -492,18 +505,23 @@ int quotaoff(struct lwp *l, struct mount *mp, int type) { struct vnode *vp; - struct vnode *qvp, *nextvp; + struct vnode *qvp, *mvp; struct ufsmount *ump = VFSTOUFS(mp); struct dquot *dq; struct inode *ip; kauth_cred_t cred; int i, error; + /* Allocate a marker vnode. */ + if ((mvp = valloc(mp)) == NULL) + return ENOMEM; + mutex_enter(&dqlock); while ((ump->um_qflags[type] & (QTF_CLOSING | QTF_OPENING)) != 0) cv_wait(&dqcv, &dqlock); if ((qvp = ump->um_quotas[type]) == NULLVP) { mutex_exit(&dqlock); + vfree(mvp); return (0); } ump->um_qflags[type] |= QTF_CLOSING; @@ -512,24 +530,29 @@ quotaoff(struct lwp *l, struct mount *mp, int type) * Search vnodes associated with this mount point, * deleting any references to quota file being closed. */ + mutex_enter(&mntvnode_lock); again: - TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { - nextvp = TAILQ_NEXT(vp, v_mntvnodes); - if (vp->v_mount != mp) - goto again; - if (vp->v_type == VNON) + for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) { + vmark(mvp, vp); + mutex_enter(&vp->v_interlock); + if (vp->v_mount != mp || vismarker(vp) || vp->v_type == VNON) { + mutex_exit(&vp->v_interlock); continue; - if (vget(vp, LK_EXCLUSIVE)) + } + mutex_exit(&mntvnode_lock); + if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { + mutex_enter(&mntvnode_lock); + (void)vunmark(mvp); goto again; + } ip = VTOI(vp); dq = ip->i_dquot[type]; ip->i_dquot[type] = NODQUOT; dqrele(vp, dq); vput(vp); - /* if the list changed, start again */ - if (TAILQ_NEXT(vp, v_mntvnodes) != nextvp) - goto again; + mutex_enter(&mntvnode_lock); } + mutex_exit(&mntvnode_lock); #ifdef DIAGNOSTIC dqflush(qvp); #endif @@ -669,7 +692,7 @@ int qsync(struct mount *mp) { struct ufsmount *ump = VFSTOUFS(mp); - struct vnode *vp, *nextvp; + struct vnode *vp, *mvp; struct dquot *dq; int i, error; @@ -682,25 +705,32 @@ qsync(struct mount *mp) break; if (i == MAXQUOTAS) return (0); + + /* Allocate a marker vnode. */ + if ((mvp = valloc(mp)) == NULL) + return (ENOMEM); + /* * Search vnodes associated with this mount point, * synchronizing any modified dquot structures. */ - simple_lock(&mntvnode_slock); -again: - TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { - nextvp = TAILQ_NEXT(vp, v_mntvnodes); - if (vp->v_mount != mp) - goto again; - if (vp->v_type == VNON) + mutex_enter(&mntvnode_lock); + again: + for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) { + vmark(mvp, vp); + mutex_enter(&vp->v_interlock); + if (vp->v_mount != mp || vismarker(vp) || vp->v_type == VNON) { + mutex_exit(&vp->v_interlock); continue; - simple_lock(&vp->v_interlock); - simple_unlock(&mntvnode_slock); + } + mutex_exit(&mntvnode_lock); error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK); if (error) { - simple_lock(&mntvnode_slock); - if (error == ENOENT) + mutex_enter(&mntvnode_lock); + if (error == ENOENT) { + (void)vunmark(mvp); goto again; + } continue; } for (i = 0; i < MAXQUOTAS; i++) { @@ -713,12 +743,10 @@ again: mutex_exit(&dq->dq_interlock); } vput(vp); - simple_lock(&mntvnode_slock); - /* if the list changed, start again */ - if (TAILQ_NEXT(vp, v_mntvnodes) != nextvp) - goto again; + mutex_enter(&mntvnode_lock); } - simple_unlock(&mntvnode_slock); + mutex_exit(&mntvnode_lock); + vfree(mvp); return (0); } diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c index 3a7d6de3d3c4..280b22de641d 100644 --- a/sys/ufs/ufs/ufs_readwrite.c +++ b/sys/ufs/ufs/ufs_readwrite.c @@ -1,4 +1,4 @@ -/* $NetBSD: ufs_readwrite.c,v 1.85 2007/12/08 19:29:57 pooka Exp $ */ +/* $NetBSD: ufs_readwrite.c,v 1.86 2008/01/02 11:49:14 ad Exp $ */ /*- * Copyright (c) 1993 @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: ufs_readwrite.c,v 1.85 2007/12/08 19:29:57 pooka Exp $"); +__KERNEL_RCSID(1, "$NetBSD: ufs_readwrite.c,v 1.86 2008/01/02 11:49:14 ad Exp $"); #ifdef LFS_READWRITE #define FS struct lfs @@ -311,7 +311,7 @@ WRITE(void *v) if (error) goto out; if (flags & B_SYNC) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); VOP_PUTPAGES(vp, trunc_page(osize & fs->fs_bmask), round_page(eob), PGO_CLEANIT | PGO_SYNCIO); } @@ -406,7 +406,7 @@ WRITE(void *v) #ifndef LFS_READWRITE if (!async && oldoff >> 16 != uio->uio_offset >> 16) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); error = VOP_PUTPAGES(vp, (oldoff >> 16) << 16, (uio->uio_offset >> 16) << 16, PGO_CLEANIT); if (error) @@ -415,7 +415,7 @@ WRITE(void *v) #endif } if (error == 0 && ioflag & IO_SYNC) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); error = VOP_PUTPAGES(vp, trunc_page(origoff & fs->fs_bmask), round_page(blkroundup(fs, uio->uio_offset)), PGO_CLEANIT | PGO_SYNCIO); @@ -423,7 +423,7 @@ WRITE(void *v) goto out; bcache: - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); VOP_PUTPAGES(vp, trunc_page(origoff), round_page(origoff + resid), PGO_CLEANIT | PGO_FREE | PGO_SYNCIO); while (uio->uio_resid > 0) { diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index 102443c79a92..973280db494b 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: ufs_vnops.c,v 1.160 2007/12/08 19:29:57 pooka Exp $ */ +/* $NetBSD: ufs_vnops.c,v 1.161 2008/01/02 11:49:14 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1993, 1995 @@ -37,7 +37,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.160 2007/12/08 19:29:57 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.161 2008/01/02 11:49:14 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_ffs.h" @@ -166,7 +166,7 @@ ufs_mknod(void *v) * checked to see if it is an alias of an existing entry in * the inode cache. */ - vput(*vpp); + VOP_UNLOCK(*vpp, 0); (*vpp)->v_type = VNON; vgone(*vpp); error = VFS_VGET(mp, ino, vpp); @@ -222,10 +222,10 @@ ufs_close(void *v) vp = ap->a_vp; ip = VTOI(vp); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); if (vp->v_usecount > 1) UFS_ITIMES(vp, NULL, NULL, NULL); - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); return (0); } @@ -1888,10 +1888,10 @@ ufsspec_close(void *v) vp = ap->a_vp; ip = VTOI(vp); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); if (vp->v_usecount > 1) UFS_ITIMES(vp, NULL, NULL, NULL); - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap)); } @@ -1953,10 +1953,10 @@ ufsfifo_close(void *v) vp = ap->a_vp; ip = VTOI(vp); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); if (ap->a_vp->v_usecount > 1) UFS_ITIMES(vp, NULL, NULL, NULL); - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap)); } @@ -2059,7 +2059,6 @@ ufs_vinit(struct mount *mntp, int (**specops)(void *), int (**fifoops)(void *), vp->v_vflag &= ~VV_LOCKSWORK; VOP_UNLOCK(vp, 0); vp->v_op = spec_vnodeop_p; - vrele(vp); vgone(vp); lockmgr(&nvp->v_lock, LK_EXCLUSIVE, &nvp->v_interlock); /* diff --git a/sys/uvm/uvm.h b/sys/uvm/uvm.h index 801b5abc7e9e..8df5932e28ca 100644 --- a/sys/uvm/uvm.h +++ b/sys/uvm/uvm.h @@ -1,4 +1,4 @@ -/* $NetBSD: uvm.h,v 1.52 2007/07/21 19:21:53 ad Exp $ */ +/* $NetBSD: uvm.h,v 1.53 2008/01/02 11:49:15 ad Exp $ */ /* * @@ -84,7 +84,6 @@ struct uvm { /* vm_page queues */ struct pgfreelist page_free[VM_NFREELIST]; /* unallocated pages */ int page_free_nextcolor; /* next color to allocate from */ - struct simplelock pageqlock; /* lock for active/inactive page q */ bool page_init_done; /* TRUE if uvm_page_init() finished */ bool page_idle_zero; /* TRUE if we should try to zero pages in the idle loop */ @@ -120,8 +119,8 @@ extern struct uvm_object *uvm_kernel_object; * locks (made globals for lockstat). */ +extern kmutex_t uvm_pageqlock; /* lock for active/inactive page q */ extern kmutex_t uvm_fpageqlock; /* lock for free page q */ -extern kmutex_t uvm_pagedaemon_lock; extern kmutex_t uvm_kentry_lock; extern kmutex_t uvm_swap_data_lock; extern kmutex_t uvm_scheduler_mutex; @@ -171,7 +170,7 @@ extern struct evcnt uvm_ra_miss; #define UVM_UNLOCK_AND_WAIT(event, slock, intr, msg, timo) \ do { \ - (void) ltsleep(event, PVM | PNORELOCK | (intr ? PCATCH : 0), \ + (void) mtsleep(event, PVM | PNORELOCK | (intr ? PCATCH : 0), \ msg, timo, slock); \ } while (/*CONSTCOND*/ 0) diff --git a/sys/uvm/uvm_amap.c b/sys/uvm/uvm_amap.c index a64d24d49d8f..b539d33b58a0 100644 --- a/sys/uvm/uvm_amap.c +++ b/sys/uvm/uvm_amap.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_amap.c,v 1.83 2007/12/08 15:46:31 ad Exp $ */ +/* $NetBSD: uvm_amap.c,v 1.84 2008/01/02 11:49:15 ad Exp $ */ /* * @@ -42,7 +42,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.83 2007/12/08 15:46:31 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.84 2008/01/02 11:49:15 ad Exp $"); #include "opt_uvmhist.h" @@ -704,11 +704,11 @@ amap_wipeout(struct vm_amap *amap) if (anon == NULL || anon->an_ref == 0) panic("amap_wipeout: corrupt amap"); - simple_lock(&anon->an_lock); + mutex_enter(&anon->an_lock); UVMHIST_LOG(maphist," processing anon 0x%x, ref=%d", anon, anon->an_ref, 0, 0); refs = --anon->an_ref; - simple_unlock(&anon->an_lock); + mutex_exit(&anon->an_lock); if (refs == 0) { /* @@ -860,9 +860,9 @@ amap_copy(struct vm_map *map, struct vm_map_entry *entry, int flags, srcamap->am_anon[entry->aref.ar_pageoff + lcv]; if (amap->am_anon[lcv] == NULL) continue; - simple_lock(&amap->am_anon[lcv]->an_lock); + mutex_enter(&amap->am_anon[lcv]->an_lock); amap->am_anon[lcv]->an_ref++; - simple_unlock(&amap->am_anon[lcv]->an_lock); + mutex_exit(&amap->am_anon[lcv]->an_lock); amap->am_bckptr[lcv] = amap->am_nused; amap->am_slots[amap->am_nused] = lcv; amap->am_nused++; @@ -947,7 +947,7 @@ ReStart: slot = amap->am_slots[lcv]; anon = amap->am_anon[slot]; - simple_lock(&anon->an_lock); + mutex_enter(&anon->an_lock); /* * If the anon has only one ref, we must have already copied it. @@ -958,7 +958,7 @@ ReStart: if (anon->an_ref == 1) { KASSERT(anon->an_page != NULL || anon->an_swslot != 0); - simple_unlock(&anon->an_lock); + mutex_exit(&anon->an_lock); continue; } @@ -976,7 +976,7 @@ ReStart: */ if (pg->loan_count != 0) { - simple_unlock(&anon->an_lock); + mutex_exit(&anon->an_lock); continue; } KASSERT(pg->uanon == anon && pg->uobject == NULL); @@ -1011,10 +1011,10 @@ ReStart: if (nanon) { nanon->an_ref--; - simple_unlock(&nanon->an_lock); + mutex_exit(&nanon->an_lock); uvm_anfree(nanon); } - simple_unlock(&anon->an_lock); + mutex_exit(&anon->an_lock); amap_unlock(amap); uvm_wait("cownowpage"); goto ReStart; @@ -1034,13 +1034,13 @@ ReStart: * locked the whole time it can't be PG_RELEASED or PG_WANTED. */ - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); uvm_pageactivate(npg); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); npg->flags &= ~(PG_BUSY|PG_FAKE); UVM_PAGE_OWN(npg, NULL); - simple_unlock(&nanon->an_lock); - simple_unlock(&anon->an_lock); + mutex_exit(&nanon->an_lock); + mutex_exit(&anon->an_lock); } amap_unlock(amap); } @@ -1253,9 +1253,9 @@ amap_wiperange(struct vm_amap *amap, int slotoff, int slots) * drop anon reference count */ - simple_lock(&anon->an_lock); + mutex_enter(&anon->an_lock); refs = --anon->an_ref; - simple_unlock(&anon->an_lock); + mutex_exit(&anon->an_lock); if (refs == 0) { /* @@ -1334,11 +1334,11 @@ amap_swap_off(int startslot, int endslot) slot = am->am_slots[i]; anon = am->am_anon[slot]; - simple_lock(&anon->an_lock); + mutex_enter(&anon->an_lock); swslot = anon->an_swslot; if (swslot < startslot || endslot <= swslot) { - simple_unlock(&anon->an_lock); + mutex_exit(&anon->an_lock); continue; } diff --git a/sys/uvm/uvm_anon.c b/sys/uvm/uvm_anon.c index 3b0f9c94bcaf..9d78bdd76c95 100644 --- a/sys/uvm/uvm_anon.c +++ b/sys/uvm/uvm_anon.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_anon.c,v 1.49 2007/12/20 23:50:00 ad Exp $ */ +/* $NetBSD: uvm_anon.c,v 1.50 2008/01/02 11:49:15 ad Exp $ */ /* * @@ -37,7 +37,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uvm_anon.c,v 1.49 2007/12/20 23:50:00 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_anon.c,v 1.50 2008/01/02 11:49:15 ad Exp $"); #include "opt_uvmhist.h" @@ -75,7 +75,7 @@ uvm_anon_ctor(void *arg, void *object, int flags) struct vm_anon *anon = object; anon->an_ref = 0; - simple_lock_init(&anon->an_lock); + mutex_init(&anon->an_lock, MUTEX_DEFAULT, IPL_NONE); anon->an_page = NULL; #if defined(VMSWAP) anon->an_swslot = 0; @@ -87,8 +87,9 @@ uvm_anon_ctor(void *arg, void *object, int flags) static void uvm_anon_dtor(void *arg, void *object) { + struct vm_anon *anon = object; - /* nothing yet */ + mutex_destroy(&anon->an_lock); } /* @@ -104,13 +105,12 @@ uvm_analloc(void) anon = pool_cache_get(&uvm_anon_cache, PR_NOWAIT); if (anon) { KASSERT(anon->an_ref == 0); - LOCK_ASSERT(simple_lock_held(&anon->an_lock) == 0); KASSERT(anon->an_page == NULL); #if defined(VMSWAP) KASSERT(anon->an_swslot == 0); #endif /* defined(VMSWAP) */ anon->an_ref = 1; - simple_lock(&anon->an_lock); + mutex_enter(&anon->an_lock); } return anon; } @@ -132,6 +132,7 @@ uvm_anfree(struct vm_anon *anon) UVMHIST_LOG(maphist,"(anon=0x%x)", anon, 0,0,0); KASSERT(anon->an_ref == 0); + KASSERT(!mutex_owned(&anon->an_lock)); /* * get page @@ -146,9 +147,9 @@ uvm_anfree(struct vm_anon *anon) */ if (pg && pg->loan_count) { - simple_lock(&anon->an_lock); + mutex_enter(&anon->an_lock); pg = uvm_anon_lockloanpg(anon); - simple_unlock(&anon->an_lock); + mutex_exit(&anon->an_lock); } /* @@ -164,12 +165,12 @@ uvm_anfree(struct vm_anon *anon) */ if (pg->uobject) { - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); KASSERT(pg->loan_count > 0); pg->loan_count--; pg->uanon = NULL; - uvm_unlock_pageq(); - simple_unlock(&pg->uobject->vmobjlock); + mutex_exit(&uvm_pageqlock); + mutex_exit(&pg->uobject->vmobjlock); } else { /* @@ -177,7 +178,7 @@ uvm_anfree(struct vm_anon *anon) */ KASSERT((pg->flags & PG_RELEASED) == 0); - simple_lock(&anon->an_lock); + mutex_enter(&anon->an_lock); pmap_page_protect(pg, VM_PROT_NONE); /* @@ -187,13 +188,13 @@ uvm_anfree(struct vm_anon *anon) if (pg->flags & PG_BUSY) { pg->flags |= PG_RELEASED; - simple_unlock(&anon->an_lock); + mutex_exit(&anon->an_lock); return; } - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); uvm_pagefree(pg); - uvm_unlock_pageq(); - simple_unlock(&anon->an_lock); + mutex_exit(&uvm_pageqlock); + mutex_exit(&anon->an_lock); UVMHIST_LOG(maphist, "anon 0x%x, page 0x%x: " "freed now!", anon, pg, 0, 0); } @@ -280,7 +281,7 @@ uvm_anon_lockloanpg(struct vm_anon *anon) struct vm_page *pg; bool locked = false; - LOCK_ASSERT(simple_lock_held(&anon->an_lock)); + KASSERT(mutex_owned(&anon->an_lock)); /* * loop while we have a resident page that has a non-zero loan count. @@ -301,15 +302,15 @@ uvm_anon_lockloanpg(struct vm_anon *anon) */ if (pg->uobject) { - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); if (pg->uobject) { locked = - simple_lock_try(&pg->uobject->vmobjlock); + mutex_tryenter(&pg->uobject->vmobjlock); } else { /* object disowned before we got PQ lock */ locked = true; } - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); /* * if we didn't get a lock (try lock failed), then we @@ -317,14 +318,16 @@ uvm_anon_lockloanpg(struct vm_anon *anon) */ if (!locked) { - simple_unlock(&anon->an_lock); + mutex_exit(&anon->an_lock); /* * someone locking the object has a chance to * lock us right now */ + /* XXX Better than yielding but inadequate. */ + kpause("livelock", false, 1, NULL); - simple_lock(&anon->an_lock); + mutex_enter(&anon->an_lock); continue; } } @@ -335,10 +338,10 @@ uvm_anon_lockloanpg(struct vm_anon *anon) */ if (pg->uobject == NULL && (pg->pqflags & PQ_ANON) == 0) { - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); pg->pqflags |= PQ_ANON; pg->loan_count--; - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); } break; } @@ -362,7 +365,7 @@ uvm_anon_pagein(struct vm_anon *anon) int rv; /* locked: anon */ - LOCK_ASSERT(simple_lock_held(&anon->an_lock)); + KASSERT(mutex_owned(&anon->an_lock)); rv = uvmfault_anonget(NULL, NULL, anon); @@ -407,10 +410,10 @@ uvm_anon_pagein(struct vm_anon *anon) */ pmap_clear_reference(pg); - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); if (pg->wire_count == 0) uvm_pagedeactivate(pg); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); if (pg->flags & PG_WANTED) { wakeup(pg); @@ -421,9 +424,9 @@ uvm_anon_pagein(struct vm_anon *anon) * unlock the anon and we're done. */ - simple_unlock(&anon->an_lock); + mutex_exit(&anon->an_lock); if (uobj) { - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); } return false; } @@ -441,8 +444,7 @@ uvm_anon_release(struct vm_anon *anon) { struct vm_page *pg = anon->an_page; - LOCK_ASSERT(simple_lock_held(&anon->an_lock)); - + KASSERT(mutex_owned(&anon->an_lock)); KASSERT(pg != NULL); KASSERT((pg->flags & PG_RELEASED) != 0); KASSERT((pg->flags & PG_BUSY) != 0); @@ -451,10 +453,10 @@ uvm_anon_release(struct vm_anon *anon) KASSERT(pg->loan_count == 0); KASSERT(anon->an_ref == 0); - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); uvm_pagefree(pg); - uvm_unlock_pageq(); - simple_unlock(&anon->an_lock); + mutex_exit(&uvm_pageqlock); + mutex_exit(&anon->an_lock); KASSERT(anon->an_page == NULL); diff --git a/sys/uvm/uvm_anon.h b/sys/uvm/uvm_anon.h index 26f85f7d6aca..0ba015e9dedb 100644 --- a/sys/uvm/uvm_anon.h +++ b/sys/uvm/uvm_anon.h @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_anon.h,v 1.24 2007/02/21 23:00:12 thorpej Exp $ */ +/* $NetBSD: uvm_anon.h,v 1.25 2008/01/02 11:49:15 ad Exp $ */ /* * @@ -53,7 +53,7 @@ struct vm_anon { int an_ref; /* reference count [an_lock] */ - struct simplelock an_lock; /* lock for an_ref */ + kmutex_t an_lock; /* lock for an_ref */ struct vm_page *an_page;/* if in RAM [an_lock] */ #if defined(VMSWAP) || 1 /* XXX libkvm */ int an_swslot; /* drum swap slot # (if != 0) diff --git a/sys/uvm/uvm_aobj.c b/sys/uvm/uvm_aobj.c index 9c6ed771b295..078b0f012557 100644 --- a/sys/uvm/uvm_aobj.c +++ b/sys/uvm/uvm_aobj.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_aobj.c,v 1.95 2007/12/01 10:40:27 yamt Exp $ */ +/* $NetBSD: uvm_aobj.c,v 1.96 2008/01/02 11:49:15 ad Exp $ */ /* * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and @@ -43,7 +43,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.95 2007/12/01 10:40:27 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.96 2008/01/02 11:49:15 ad Exp $"); #include "opt_uvmhist.h" @@ -398,11 +398,12 @@ uao_free(struct uvm_aobj *aobj) { int swpgonlydelta = 0; + #if defined(VMSWAP) uao_dropswap_range1(aobj, 0, 0); #endif /* defined(VMSWAP) */ - simple_unlock(&aobj->u_obj.vmobjlock); + mutex_exit(&aobj->u_obj.vmobjlock); #if defined(VMSWAP) if (UAO_USES_SWHASH(aobj)) { @@ -426,6 +427,7 @@ uao_free(struct uvm_aobj *aobj) * finally free the aobj itself */ + UVM_OBJ_DESTROY(&aobj->u_obj); pool_put(&uvm_aobj_pool, aobj); /* @@ -552,8 +554,7 @@ uao_init(void) return; uao_initialized = true; LIST_INIT(&uao_list); - /* XXXSMP should be adaptive but vmobjlock needs to be too */ - mutex_init(&uao_list_lock, MUTEX_SPIN, IPL_NONE); + mutex_init(&uao_list_lock, MUTEX_DEFAULT, IPL_NONE); } /* @@ -566,9 +567,9 @@ uao_init(void) void uao_reference(struct uvm_object *uobj) { - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); uao_reference_locked(uobj); - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); } /* @@ -607,7 +608,7 @@ uao_reference_locked(struct uvm_object *uobj) void uao_detach(struct uvm_object *uobj) { - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); uao_detach_locked(uobj); } @@ -632,14 +633,14 @@ uao_detach_locked(struct uvm_object *uobj) */ if (UVM_OBJ_IS_KERN_OBJECT(uobj)) { - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); return; } UVMHIST_LOG(maphist," (uobj=0x%x) ref=%d", uobj,uobj->uo_refs,0,0); uobj->uo_refs--; if (uobj->uo_refs) { - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); UVMHIST_LOG(maphist, "<- done (rc>0)", 0,0,0,0); return; } @@ -659,22 +660,22 @@ uao_detach_locked(struct uvm_object *uobj) * free the page itself. */ - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); while ((pg = TAILQ_FIRST(&uobj->memq)) != NULL) { pmap_page_protect(pg, VM_PROT_NONE); if (pg->flags & PG_BUSY) { pg->flags |= PG_WANTED; - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, false, "uao_det", 0); - simple_lock(&uobj->vmobjlock); - uvm_lock_pageq(); + mutex_enter(&uobj->vmobjlock); + mutex_enter(&uvm_pageqlock); continue; } uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT); uvm_pagefree(pg); } - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); /* * finally, free the aobj itself. @@ -731,6 +732,8 @@ uao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) voff_t curoff; UVMHIST_FUNC("uao_put"); UVMHIST_CALLED(maphist); + KASSERT(mutex_owned(&uobj->vmobjlock)); + curoff = 0; if (flags & PGO_ALLPAGES) { start = 0; @@ -761,7 +764,7 @@ uao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) */ if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) { - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); return 0; } @@ -792,7 +795,7 @@ uao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) nextpg = NULL; /* Quell compiler warning */ } - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); /* locked: both page queues and uobj */ for (;;) { @@ -855,11 +858,11 @@ uao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) TAILQ_INSERT_BEFORE(pg, &curmp, listq); } pg->flags |= PG_WANTED; - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0, "uao_put", 0); - simple_lock(&uobj->vmobjlock); - uvm_lock_pageq(); + mutex_enter(&uobj->vmobjlock); + mutex_enter(&uvm_pageqlock); if (by_list) { nextpg = TAILQ_NEXT(&curmp, listq); TAILQ_REMOVE(&uobj->memq, &curmp, @@ -880,14 +883,12 @@ uao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) continue; } } - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); if (by_list) { TAILQ_REMOVE(&uobj->memq, &endmp, listq); - } - simple_unlock(&uobj->vmobjlock); - if (by_list) { uvm_lwp_rele(curlwp); } + mutex_exit(&uobj->vmobjlock); return 0; } @@ -1061,11 +1062,11 @@ gotpage: /* out of RAM? */ if (ptmp == NULL) { - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); UVMHIST_LOG(pdhist, "sleeping, ptmp == NULL\n",0,0,0,0); uvm_wait("uao_getpage"); - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); continue; } @@ -1092,7 +1093,7 @@ gotpage: ptmp->flags,0,0,0); UVM_UNLOCK_AND_WAIT(ptmp, &uobj->vmobjlock, false, "uao_get", 0); - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); continue; } @@ -1147,9 +1148,9 @@ gotpage: * unlock object for i/o, relock when done. */ - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); error = uvm_swap_get(ptmp, swslot, PGO_SYNCIO); - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); /* * I/O done. check for errors. @@ -1174,10 +1175,10 @@ gotpage: uvm_swap_markbad(swslot, 1); } - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); uvm_pagefree(ptmp); - uvm_unlock_pageq(); - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uvm_pageqlock); + mutex_exit(&uobj->vmobjlock); return error; } #else /* defined(VMSWAP) */ @@ -1210,7 +1211,7 @@ gotpage: */ done: - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); UVMHIST_LOG(pdhist, "<- done (OK)",0,0,0,0); return 0; } @@ -1263,8 +1264,10 @@ restart: * so this should be a rare case. */ - if (!simple_lock_try(&aobj->u_obj.vmobjlock)) { + if (!mutex_tryenter(&aobj->u_obj.vmobjlock)) { mutex_exit(&uao_list_lock); + /* XXX Better than yielding but inadequate. */ + kpause("livelock", false, 1, NULL); goto restart; } @@ -1412,7 +1415,7 @@ uao_pagein_page(struct uvm_aobj *aobj, int pageidx) * relock and finish up. */ - simple_lock(&aobj->u_obj.vmobjlock); + mutex_enter(&aobj->u_obj.vmobjlock); switch (rv) { case 0: break; @@ -1441,10 +1444,10 @@ uao_pagein_page(struct uvm_aobj *aobj, int pageidx) /* * make sure it's on a page queue. */ - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); if (pg->wire_count == 0) uvm_pageenqueue(pg); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); if (pg->flags & PG_WANTED) { wakeup(pg); @@ -1467,7 +1470,7 @@ uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end) { struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; - LOCK_ASSERT(simple_lock_held(&uobj->vmobjlock)); + KASSERT(mutex_owned(&uobj->vmobjlock)); uao_dropswap_range1(aobj, start, end); } diff --git a/sys/uvm/uvm_bio.c b/sys/uvm/uvm_bio.c index ce96314f294e..7f99e25093cf 100644 --- a/sys/uvm/uvm_bio.c +++ b/sys/uvm/uvm_bio.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_bio.c,v 1.63 2007/12/01 10:40:27 yamt Exp $ */ +/* $NetBSD: uvm_bio.c,v 1.64 2008/01/02 11:49:15 ad Exp $ */ /* * Copyright (c) 1998 Chuck Silvers. @@ -34,7 +34,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uvm_bio.c,v 1.63 2007/12/01 10:40:27 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_bio.c,v 1.64 2008/01/02 11:49:15 ad Exp $"); #include "opt_uvmhist.h" #include "opt_ubc.h" @@ -286,7 +286,7 @@ ubc_fault(struct uvm_faultinfo *ufi, vaddr_t ign1, struct vm_page **ign2, again: memset(pgs, 0, sizeof (pgs)); - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); UVMHIST_LOG(ubchist, "slot_offset 0x%x writeoff 0x%x writelen 0x%x ", slot_offset, umap->writeoff, umap->writelen, 0); @@ -336,16 +336,16 @@ again: } uobj = pg->uobject; - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); if (pg->flags & PG_WANTED) { wakeup(pg); } KASSERT((pg->flags & PG_FAKE) == 0); if (pg->flags & PG_RELEASED) { - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); uvm_pagefree(pg); - uvm_unlock_pageq(); - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uvm_pageqlock); + mutex_exit(&uobj->vmobjlock); continue; } if (pg->loan_count != 0) { @@ -363,7 +363,7 @@ again: newpg = uvm_loanbreak(pg); if (newpg == NULL) { uvm_page_unbusy(&pg, 1); - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); uvm_wait("ubc_loanbrk"); continue; /* will re-fault */ } @@ -386,12 +386,12 @@ again: mask = rdonly ? ~VM_PROT_WRITE : VM_PROT_ALL; error = pmap_enter(ufi->orig_map->pmap, va, VM_PAGE_TO_PHYS(pg), prot & mask, PMAP_CANFAIL | (access_type & mask)); - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); uvm_pageactivate(pg); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); pg->flags &= ~(PG_BUSY|PG_WANTED); UVM_PAGE_OWN(pg, NULL); - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); if (error) { UVMHIST_LOG(ubchist, "pmap_enter fail %d", error, 0, 0, 0); @@ -452,13 +452,13 @@ ubc_alloc(struct uvm_object *uobj, voff_t offset, vsize_t *lenp, int advice, */ again: - simple_lock(&ubc_object.uobj.vmobjlock); + mutex_enter(&ubc_object.uobj.vmobjlock); umap = ubc_find_mapping(uobj, umap_offset); if (umap == NULL) { UBC_EVCNT_INCR(wincachemiss); umap = TAILQ_FIRST(UBC_QUEUE(offset)); if (umap == NULL) { - simple_unlock(&ubc_object.uobj.vmobjlock); + mutex_exit(&ubc_object.uobj.vmobjlock); kpause("ubc_alloc", false, hz, NULL); goto again; } @@ -501,7 +501,7 @@ again: umap->refcount++; umap->advice = advice; - simple_unlock(&ubc_object.uobj.vmobjlock); + mutex_exit(&ubc_object.uobj.vmobjlock); UVMHIST_LOG(ubchist, "umap %p refs %d va %p flags 0x%x", umap, umap->refcount, va, flags); @@ -522,7 +522,7 @@ again: } again_faultbusy: memset(pgs, 0, sizeof(pgs)); - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); error = (*uobj->pgops->pgo_get)(uobj, trunc_page(offset), pgs, &npages, 0, VM_PROT_READ | VM_PROT_WRITE, advice, gpflags); UVMHIST_LOG(ubchist, "faultbusy getpages %d", error, 0, 0, 0); @@ -534,17 +534,17 @@ again_faultbusy: KASSERT(pg->uobject == uobj); if (pg->loan_count != 0) { - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); if (pg->loan_count != 0) { pg = uvm_loanbreak(pg); } - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); if (pg == NULL) { pmap_kremove(va, ubc_winsize); pmap_update(pmap_kernel()); - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); uvm_page_unbusy(pgs, npages); - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); uvm_wait("ubc_alloc"); goto again_faultbusy; } @@ -598,7 +598,7 @@ ubc_release(void *va, int flags) memset((char *)umapva + endoff, 0, zerolen); } umap->flags &= ~UMAP_PAGES_LOCKED; - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); for (i = 0; i < npages; i++) { rv = pmap_extract(pmap_kernel(), umapva + slot_offset + (i << PAGE_SHIFT), &pa); @@ -608,18 +608,18 @@ ubc_release(void *va, int flags) KASSERT(pgs[i]->loan_count == 0); uvm_pageactivate(pgs[i]); } - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); pmap_kremove(umapva, ubc_winsize); pmap_update(pmap_kernel()); - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); uvm_page_unbusy(pgs, npages); - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); unmapped = true; } else { unmapped = false; } - simple_lock(&ubc_object.uobj.vmobjlock); + mutex_enter(&ubc_object.uobj.vmobjlock); umap->writeoff = 0; umap->writelen = 0; umap->refcount--; @@ -649,7 +649,7 @@ ubc_release(void *va, int flags) } } UVMHIST_LOG(ubchist, "umap %p refs %d", umap, umap->refcount, 0, 0); - simple_unlock(&ubc_object.uobj.vmobjlock); + mutex_exit(&ubc_object.uobj.vmobjlock); } /* @@ -684,6 +684,7 @@ ubc_uiomove(struct uvm_object *uobj, struct uio *uio, vsize_t todo, int advice, * do it now. it's safe to use memset here * because we just mapped the pages above. */ + printf("%s: error=%d\n", __func__, error); memset(win, 0, bytelen); } ubc_release(win, flags); @@ -712,7 +713,7 @@ ubc_flush(struct uvm_object *uobj, voff_t start, voff_t end) UVMHIST_LOG(ubchist, "uobj %p start 0x%lx end 0x%lx", uobj, start, end, 0); - simple_lock(&ubc_object.uobj.vmobjlock); + mutex_enter(&ubc_object.uobj.vmobjlock); for (umap = ubc_object.umap; umap < &ubc_object.umap[ubc_nwins]; umap++) { @@ -738,6 +739,6 @@ ubc_flush(struct uvm_object *uobj, voff_t start, voff_t end) TAILQ_INSERT_HEAD(UBC_QUEUE(umap->offset), umap, inactive); } pmap_update(pmap_kernel()); - simple_unlock(&ubc_object.uobj.vmobjlock); + mutex_exit(&ubc_object.uobj.vmobjlock); } #endif /* notused */ diff --git a/sys/uvm/uvm_device.c b/sys/uvm/uvm_device.c index 102786711a7c..0af531c94645 100644 --- a/sys/uvm/uvm_device.c +++ b/sys/uvm/uvm_device.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_device.c,v 1.52 2007/12/08 15:33:09 ad Exp $ */ +/* $NetBSD: uvm_device.c,v 1.53 2008/01/02 11:49:16 ad Exp $ */ /* * @@ -39,7 +39,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uvm_device.c,v 1.52 2007/12/08 15:33:09 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_device.c,v 1.53 2008/01/02 11:49:16 ad Exp $"); #include "opt_uvmhist.h" @@ -61,7 +61,7 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_device.c,v 1.52 2007/12/08 15:33:09 ad Exp $"); LIST_HEAD(udv_list_struct, uvm_device); static struct udv_list_struct udv_list; -static struct simplelock udv_lock; +static kmutex_t udv_lock; /* * functions @@ -99,7 +99,7 @@ static void udv_init(void) { LIST_INIT(&udv_list); - simple_lock_init(&udv_lock); + mutex_init(&udv_lock, MUTEX_DEFAULT, IPL_NONE); } /* @@ -171,7 +171,7 @@ udv_attach(void *arg, vm_prot_t accessprot, * first, attempt to find it on the main list */ - simple_lock(&udv_lock); + mutex_enter(&udv_lock); LIST_FOREACH(lcv, &udv_list, u_list) { if (device == lcv->u_device) break; @@ -197,21 +197,21 @@ udv_attach(void *arg, vm_prot_t accessprot, /* we are now holding it */ lcv->u_flags |= UVM_DEVICE_HOLD; - simple_unlock(&udv_lock); + mutex_exit(&udv_lock); /* * bump reference count, unhold, return. */ - simple_lock(&lcv->u_obj.vmobjlock); + mutex_enter(&lcv->u_obj.vmobjlock); lcv->u_obj.uo_refs++; - simple_unlock(&lcv->u_obj.vmobjlock); + mutex_exit(&lcv->u_obj.vmobjlock); - simple_lock(&udv_lock); + mutex_enter(&udv_lock); if (lcv->u_flags & UVM_DEVICE_WANTED) wakeup(lcv); lcv->u_flags &= ~(UVM_DEVICE_WANTED|UVM_DEVICE_HOLD); - simple_unlock(&udv_lock); + mutex_exit(&udv_lock); return(&lcv->u_obj); } @@ -219,11 +219,11 @@ udv_attach(void *arg, vm_prot_t accessprot, * did not find it on main list. need to malloc a new one. */ - simple_unlock(&udv_lock); + mutex_exit(&udv_lock); /* NOTE: we could sleep in the following malloc() */ MALLOC(udv, struct uvm_device *, sizeof(*udv), M_TEMP, M_WAITOK); - simple_lock(&udv_lock); + mutex_enter(&udv_lock); /* * now we have to double check to make sure no one added it @@ -241,7 +241,7 @@ udv_attach(void *arg, vm_prot_t accessprot, */ if (lcv) { - simple_unlock(&udv_lock); + mutex_exit(&udv_lock); FREE(udv, M_TEMP); continue; } @@ -255,7 +255,7 @@ udv_attach(void *arg, vm_prot_t accessprot, udv->u_flags = 0; udv->u_device = device; LIST_INSERT_HEAD(&udv_list, udv, u_list); - simple_unlock(&udv_lock); + mutex_exit(&udv_lock); return(&udv->u_obj); } /*NOTREACHED*/ @@ -276,11 +276,11 @@ udv_reference(struct uvm_object *uobj) { UVMHIST_FUNC("udv_reference"); UVMHIST_CALLED(maphist); - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); uobj->uo_refs++; UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)", uobj, uobj->uo_refs,0,0); - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); } /* @@ -301,10 +301,10 @@ udv_detach(struct uvm_object *uobj) * loop until done */ again: - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); if (uobj->uo_refs > 1) { uobj->uo_refs--; - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); UVMHIST_LOG(maphist," <- done, uobj=0x%x, ref=%d", uobj,uobj->uo_refs,0,0); return; @@ -314,10 +314,10 @@ again: * is it being held? if so, wait until others are done. */ - simple_lock(&udv_lock); + mutex_enter(&udv_lock); if (udv->u_flags & UVM_DEVICE_HOLD) { udv->u_flags |= UVM_DEVICE_WANTED; - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); UVM_UNLOCK_AND_WAIT(udv, &udv_lock, false, "udv_detach",0); goto again; } @@ -329,8 +329,9 @@ again: LIST_REMOVE(udv, u_list); if (udv->u_flags & UVM_DEVICE_WANTED) wakeup(udv); - simple_unlock(&udv_lock); - simple_unlock(&uobj->vmobjlock); + mutex_exit(&udv_lock); + mutex_exit(&uobj->vmobjlock); + UVM_OBJ_DESTROY(uobj); FREE(udv, M_TEMP); UVMHIST_LOG(maphist," <- done, freed uobj=0x%x", uobj,0,0,0); } diff --git a/sys/uvm/uvm_extern.h b/sys/uvm/uvm_extern.h index 6f061f3c344a..ac6804b76f98 100644 --- a/sys/uvm/uvm_extern.h +++ b/sys/uvm/uvm_extern.h @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_extern.h,v 1.142 2007/12/26 22:11:53 christos Exp $ */ +/* $NetBSD: uvm_extern.h,v 1.143 2008/01/02 11:49:16 ad Exp $ */ /* * @@ -696,6 +696,8 @@ void uvm_aio_aiodone(struct buf *); void uvm_pageout(void *); struct work; void uvm_aiodone_worker(struct work *, void *); +void uvm_pageout_start(int); +void uvm_pageout_done(int); void uvm_estimatepageable(int *, int *); /* uvm_pglist.c */ diff --git a/sys/uvm/uvm_fault.c b/sys/uvm/uvm_fault.c index 35492408cea1..a9ac94fef028 100644 --- a/sys/uvm/uvm_fault.c +++ b/sys/uvm/uvm_fault.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_fault.c,v 1.121 2007/10/11 19:53:43 ad Exp $ */ +/* $NetBSD: uvm_fault.c,v 1.122 2008/01/02 11:49:16 ad Exp $ */ /* * @@ -39,7 +39,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.121 2007/10/11 19:53:43 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.122 2008/01/02 11:49:16 ad Exp $"); #include "opt_uvmhist.h" @@ -198,17 +198,17 @@ uvmfault_anonflush(struct vm_anon **anons, int n) for (lcv = 0 ; lcv < n ; lcv++) { if (anons[lcv] == NULL) continue; - simple_lock(&anons[lcv]->an_lock); + mutex_enter(&anons[lcv]->an_lock); pg = anons[lcv]->an_page; if (pg && (pg->flags & PG_BUSY) == 0) { - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); if (pg->wire_count == 0) { pmap_clear_reference(pg); uvm_pagedeactivate(pg); } - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); } - simple_unlock(&anons[lcv]->an_lock); + mutex_exit(&anons[lcv]->an_lock); } } @@ -290,7 +290,7 @@ uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap, int error; UVMHIST_FUNC("uvmfault_anonget"); UVMHIST_CALLED(maphist); - LOCK_ASSERT(simple_lock_held(&anon->an_lock)); + KASSERT(mutex_owned(&anon->an_lock)); error = 0; uvmexp.fltanget++; @@ -409,7 +409,7 @@ uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap, amap_lock(amap); } if (locked || we_own) - simple_lock(&anon->an_lock); + mutex_enter(&anon->an_lock); /* * if we own the page (i.e. we set PG_BUSY), then we need @@ -449,15 +449,15 @@ uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap, * pmap_page_protect it... */ - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); uvm_pagefree(pg); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); if (locked) uvmfault_unlockall(ufi, amap, NULL, anon); else - simple_unlock(&anon->an_lock); + mutex_exit(&anon->an_lock); UVMHIST_LOG(maphist, "<- ERROR", 0,0,0,0); return error; } @@ -490,13 +490,13 @@ released: * we've successfully read the page, activate it. */ - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); uvm_pageactivate(pg); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE); UVM_PAGE_OWN(pg, NULL); if (!locked) - simple_unlock(&anon->an_lock); + mutex_exit(&anon->an_lock); #else /* defined(VMSWAP) */ panic("%s: we_own", __func__); #endif /* defined(VMSWAP) */ @@ -583,13 +583,16 @@ uvmfault_promote(struct uvm_faultinfo *ufi, KASSERT(uobjpage != NULL); KASSERT(uobjpage == PGO_DONTCARE || (uobjpage->flags & PG_BUSY) != 0); KASSERT(mutex_owned(&amap->am_l)); - LOCK_ASSERT(oanon == NULL || simple_lock_held(&oanon->an_lock)); - LOCK_ASSERT(uobj == NULL || simple_lock_held(&uobj->vmobjlock)); + KASSERT(oanon == NULL || mutex_owned(&oanon->an_lock)); + KASSERT(uobj == NULL || mutex_owned(&uobj->vmobjlock)); +#if 0 + KASSERT(*spare == NULL || !mutex_owned(&(*spare)->an_lock)); +#endif if (*spare != NULL) { anon = *spare; *spare = NULL; - simple_lock(&anon->an_lock); + mutex_enter(&anon->an_lock); } else if (ufi->map != kernel_map) { anon = uvm_analloc(); } else { @@ -606,7 +609,7 @@ uvmfault_promote(struct uvm_faultinfo *ufi, if (*spare == NULL) { goto nomem; } - simple_unlock(&(*spare)->an_lock); + mutex_exit(&(*spare)->an_lock); error = ERESTART; goto done; } @@ -632,7 +635,7 @@ uvmfault_promote(struct uvm_faultinfo *ufi, if (pg == NULL) { /* save anon for the next try. */ if (anon != NULL) { - simple_unlock(&anon->an_lock); + mutex_exit(&anon->an_lock); *spare = anon; } @@ -911,7 +914,7 @@ ReFault: /* flush object? */ if (uobj) { uoff = (startva - ufi.entry->start) + ufi.entry->offset; - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); (void) (uobj->pgops->pgo_put)(uobj, uoff, uoff + (nback << PAGE_SHIFT), PGO_DEACTIVATE); } @@ -965,13 +968,13 @@ ReFault: continue; } anon = anons[lcv]; - simple_lock(&anon->an_lock); + mutex_enter(&anon->an_lock); /* ignore loaned pages */ if (anon->an_page && anon->an_page->loan_count == 0 && (anon->an_page->flags & PG_BUSY) == 0) { - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); uvm_pageenqueue(anon->an_page); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); UVMHIST_LOG(maphist, " MAPPING: n anon: pm=0x%x, va=0x%x, pg=0x%x", ufi.orig_map->pmap, currva, anon->an_page, 0); @@ -990,7 +993,7 @@ ReFault: PMAP_CANFAIL | (VM_MAPENT_ISWIRED(ufi.entry) ? PMAP_WIRED : 0)); } - simple_unlock(&anon->an_lock); + mutex_exit(&anon->an_lock); pmap_update(ufi.orig_map->pmap); } @@ -1017,8 +1020,7 @@ ReFault: */ if (uobj && shadowed == false && uobj->pgops->pgo_fault != NULL) { - simple_lock(&uobj->vmobjlock); - + mutex_enter(&uobj->vmobjlock); /* locked: maps(read), amap (if there), uobj */ error = uobj->pgops->pgo_fault(&ufi, startva, pages, npages, centeridx, access_type, PGO_LOCKED|PGO_SYNCIO); @@ -1042,8 +1044,7 @@ ReFault: */ if (uobj && shadowed == false) { - simple_lock(&uobj->vmobjlock); - + mutex_enter(&uobj->vmobjlock); /* locked (!shadowed): maps(read), amap (if there), uobj */ /* * the following call to pgo_get does _not_ change locking state @@ -1099,9 +1100,9 @@ ReFault: * we can just directly enter the pages. */ - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); uvm_pageenqueue(curpg); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); UVMHIST_LOG(maphist, " MAPPING: n obj: pm=0x%x, va=0x%x, pg=0x%x", ufi.orig_map->pmap, currva, curpg, 0); @@ -1153,7 +1154,7 @@ ReFault: KASSERT(mutex_owned(&amap->am_l)); } else { KASSERT(amap == NULL || mutex_owned(&amap->am_l)); - LOCK_ASSERT(uobj == NULL || simple_lock_held(&uobj->vmobjlock)); + KASSERT(uobj == NULL || mutex_owned(&uobj->vmobjlock)); KASSERT(uobjpage == NULL || (uobjpage->flags & PG_BUSY) != 0); } @@ -1187,11 +1188,11 @@ ReFault: anon = anons[centeridx]; UVMHIST_LOG(maphist, " case 1 fault: anon=0x%x", anon, 0,0,0); - simple_lock(&anon->an_lock); + mutex_enter(&anon->an_lock); /* locked: maps(read), amap, anon */ KASSERT(mutex_owned(&amap->am_l)); - LOCK_ASSERT(simple_lock_held(&anon->an_lock)); + KASSERT(mutex_owned(&anon->an_lock)); /* * no matter if we have case 1A or case 1B we are going to need to @@ -1231,8 +1232,8 @@ ReFault: /* locked: maps(read), amap, anon, uobj(if one) */ KASSERT(mutex_owned(&amap->am_l)); - LOCK_ASSERT(simple_lock_held(&anon->an_lock)); - LOCK_ASSERT(uobj == NULL || simple_lock_held(&uobj->vmobjlock)); + KASSERT(mutex_owned(&anon->an_lock)); + KASSERT(uobj == NULL || mutex_owned(&uobj->vmobjlock)); /* * special handling for loaned pages @@ -1284,7 +1285,7 @@ ReFault: /* force reload */ pmap_page_protect(anon->an_page, VM_PROT_NONE); - uvm_lock_pageq(); /* KILL loan */ + mutex_enter(&uvm_pageqlock); /* KILL loan */ anon->an_page->uanon = NULL; /* in case we owned */ @@ -1302,7 +1303,7 @@ ReFault: } if (uobj) { - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); uobj = NULL; } @@ -1312,7 +1313,7 @@ ReFault: pg->pqflags |= PQ_ANON; uvm_pageactivate(pg); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); pg->flags &= ~(PG_BUSY|PG_FAKE); UVM_PAGE_OWN(pg, NULL); @@ -1353,9 +1354,9 @@ ReFault: } pg = anon->an_page; - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); uvm_pageactivate(pg); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); pg->flags &= ~(PG_BUSY|PG_FAKE); UVM_PAGE_OWN(pg, NULL); @@ -1380,8 +1381,8 @@ ReFault: /* locked: maps(read), amap, oanon, anon (if different from oanon) */ KASSERT(mutex_owned(&amap->am_l)); - LOCK_ASSERT(simple_lock_held(&anon->an_lock)); - LOCK_ASSERT(simple_lock_held(&oanon->an_lock)); + KASSERT(mutex_owned(&anon->an_lock)); + KASSERT(mutex_owned(&oanon->an_lock)); /* * now map the page in. @@ -1402,7 +1403,7 @@ ReFault: */ if (anon != oanon) - simple_unlock(&anon->an_lock); + mutex_exit(&anon->an_lock); uvmfault_unlockall(&ufi, amap, uobj, oanon); if (!uvm_reclaimable()) { UVMHIST_LOG(maphist, @@ -1420,7 +1421,7 @@ ReFault: * ... update the page queues. */ - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); if (wire_fault) { uvm_pagewire(pg); @@ -1436,14 +1437,14 @@ ReFault: } else { uvm_pageactivate(pg); } - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); /* * done case 1! finish up by unlocking everything and returning success */ if (anon != oanon) - simple_unlock(&anon->an_lock); + mutex_exit(&anon->an_lock); uvmfault_unlockall(&ufi, amap, uobj, oanon); pmap_update(ufi.orig_map->pmap); error = 0; @@ -1459,7 +1460,7 @@ Case2: * maps(read), amap(if there), uobj(if !null), uobjpage(if !null) */ KASSERT(amap == NULL || mutex_owned(&amap->am_l)); - LOCK_ASSERT(uobj == NULL || simple_lock_held(&uobj->vmobjlock)); + KASSERT(uobj == NULL || mutex_owned(&uobj->vmobjlock)); KASSERT(uobjpage == NULL || (uobjpage->flags & PG_BUSY) != 0); /* @@ -1527,9 +1528,9 @@ Case2: /* locked: uobjpage */ - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); uvm_pageactivate(uobjpage); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); /* * re-verify the state of the world by first trying to relock @@ -1540,7 +1541,7 @@ Case2: if (locked && amap) amap_lock(amap); uobj = uobjpage->uobject; - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); /* locked(locked): maps(read), amap(if !null), uobj, uobjpage */ /* locked(!locked): uobj, uobjpage */ @@ -1577,7 +1578,7 @@ Case2: } uobjpage->flags &= ~(PG_BUSY|PG_WANTED); UVM_PAGE_OWN(uobjpage, NULL); - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); goto ReFault; } @@ -1595,7 +1596,7 @@ Case2: * maps(read), amap(if !null), uobj(if !null), uobjpage(if uobj) */ KASSERT(amap == NULL || mutex_owned(&amap->am_l)); - LOCK_ASSERT(uobj == NULL || simple_lock_held(&uobj->vmobjlock)); + KASSERT(uobj == NULL || mutex_owned(&uobj->vmobjlock)); KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0); /* @@ -1722,7 +1723,7 @@ Case2: wakeup(uobjpage); uobjpage->flags &= ~(PG_BUSY|PG_WANTED); UVM_PAGE_OWN(uobjpage, NULL); - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); uobj = NULL; UVMHIST_LOG(maphist, @@ -1750,9 +1751,9 @@ Case2: * note: pg is either the uobjpage or the new page in the new anon */ KASSERT(amap == NULL || mutex_owned(&amap->am_l)); - LOCK_ASSERT(uobj == NULL || simple_lock_held(&uobj->vmobjlock)); + KASSERT(uobj == NULL || mutex_owned(&uobj->vmobjlock)); KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0); - LOCK_ASSERT(anon == NULL || simple_lock_held(&anon->an_lock)); + KASSERT(anon == NULL || mutex_owned(&anon->an_lock)); KASSERT((pg->flags & PG_BUSY) != 0); /* @@ -1801,7 +1802,7 @@ Case2: goto ReFault; } - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); if (wire_fault) { uvm_pagewire(pg); if (pg->pqflags & PQ_AOBJ) { @@ -1820,7 +1821,7 @@ Case2: } else { uvm_pageactivate(pg); } - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); if (pg->flags & PG_WANTED) wakeup(pg); @@ -1924,7 +1925,7 @@ uvm_fault_unwire_locked(struct vm_map *map, vaddr_t start, vaddr_t end) * we can call uvm_pageunwire. */ - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); /* * find the beginning map entry for the region. @@ -1961,5 +1962,5 @@ uvm_fault_unwire_locked(struct vm_map *map, vaddr_t start, vaddr_t end) uvm_pageunwire(pg); } - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); } diff --git a/sys/uvm/uvm_fault_i.h b/sys/uvm/uvm_fault_i.h index d31200ca208c..5102c97217d4 100644 --- a/sys/uvm/uvm_fault_i.h +++ b/sys/uvm/uvm_fault_i.h @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_fault_i.h,v 1.23 2007/02/22 06:05:01 thorpej Exp $ */ +/* $NetBSD: uvm_fault_i.h,v 1.24 2008/01/02 11:49:16 ad Exp $ */ /* * @@ -76,9 +76,9 @@ uvmfault_unlockall(struct uvm_faultinfo *ufi, struct vm_amap *amap, { if (anon) - simple_unlock(&anon->an_lock); + mutex_exit(&anon->an_lock); if (uobj) - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); if (amap) amap_unlock(amap); uvmfault_unlockmaps(ufi, false); diff --git a/sys/uvm/uvm_glue.c b/sys/uvm/uvm_glue.c index 8410634ec50c..f21219085cfc 100644 --- a/sys/uvm/uvm_glue.c +++ b/sys/uvm/uvm_glue.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_glue.c,v 1.113 2007/11/06 00:42:46 ad Exp $ */ +/* $NetBSD: uvm_glue.c,v 1.114 2008/01/02 11:49:16 ad Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -67,7 +67,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uvm_glue.c,v 1.113 2007/11/06 00:42:46 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_glue.c,v 1.114 2008/01/02 11:49:16 ad Exp $"); #include "opt_coredump.h" #include "opt_kgdb.h" @@ -86,6 +86,7 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_glue.c,v 1.113 2007/11/06 00:42:46 ad Exp $"); #include #include #include +#include #include @@ -809,10 +810,15 @@ void uvm_lwp_hold(struct lwp *l) { - /* XXXSMP mutex_enter(&l->l_swaplock); */ - if (l->l_holdcnt++ == 0 && (l->l_flag & LW_INMEM) == 0) - uvm_swapin(l); - /* XXXSMP mutex_exit(&l->l_swaplock); */ + if (l == curlwp) { + atomic_inc_uint(&l->l_holdcnt); + } else { + mutex_enter(&l->l_swaplock); + if (atomic_inc_uint_nv(&l->l_holdcnt) == 1 && + (l->l_flag & LW_INMEM) == 0) + uvm_swapin(l); + mutex_exit(&l->l_swaplock); + } } /* @@ -826,9 +832,7 @@ uvm_lwp_rele(struct lwp *l) KASSERT(l->l_holdcnt != 0); - /* XXXSMP mutex_enter(&l->l_swaplock); */ - l->l_holdcnt--; - /* XXXSMP mutex_exit(&l->l_swaplock); */ + atomic_dec_uint(&l->l_holdcnt); } #ifdef COREDUMP diff --git a/sys/uvm/uvm_init.c b/sys/uvm/uvm_init.c index 7e5d328d09b1..d0f0e9c257c5 100644 --- a/sys/uvm/uvm_init.c +++ b/sys/uvm/uvm_init.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_init.c,v 1.30 2007/11/14 11:04:08 yamt Exp $ */ +/* $NetBSD: uvm_init.c,v 1.31 2008/01/02 11:49:17 ad Exp $ */ /* * @@ -39,7 +39,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uvm_init.c,v 1.30 2007/11/14 11:04:08 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_init.c,v 1.31 2008/01/02 11:49:17 ad Exp $"); #include #include @@ -54,6 +54,7 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_init.c,v 1.30 2007/11/14 11:04:08 yamt Exp $"); #include #include +#include /* * struct uvm: we store most global vars in this structure to make them @@ -64,8 +65,8 @@ struct uvm uvm; /* decl */ struct uvmexp uvmexp; /* decl */ struct uvm_object *uvm_kernel_object; +kmutex_t uvm_pageqlock; kmutex_t uvm_fpageqlock; -kmutex_t uvm_pagedaemon_lock; kmutex_t uvm_kentry_lock; kmutex_t uvm_swap_data_lock; kmutex_t uvm_scheduler_mutex; @@ -175,4 +176,10 @@ uvm_init(void) */ uvm_anon_init(); + + /* + * init readahead module + */ + + uvm_ra_init(); } diff --git a/sys/uvm/uvm_km.c b/sys/uvm/uvm_km.c index d7edbb0d068e..b5d08b842d77 100644 --- a/sys/uvm/uvm_km.c +++ b/sys/uvm/uvm_km.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_km.c,v 1.96 2007/07/21 20:52:59 ad Exp $ */ +/* $NetBSD: uvm_km.c,v 1.97 2008/01/02 11:49:17 ad Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -90,10 +90,8 @@ * * the vm system has several standard kernel submaps, including: * kmem_map => contains only wired kernel memory for the kernel - * malloc. *** access to kmem_map must be protected - * by splvm() because we are allowed to call malloc() - * at interrupt time *** - * mb_map => memory for large mbufs, *** protected by splvm *** + * malloc. + * mb_map => memory for large mbufs, * pager_map => used to map "buf" structures into kernel space * exec_map => used during exec to handle exec args * etc... @@ -130,7 +128,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uvm_km.c,v 1.96 2007/07/21 20:52:59 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_km.c,v 1.97 2008/01/02 11:49:17 ad Exp $"); #include "opt_uvmhist.h" @@ -221,6 +219,7 @@ km_vacache_init(struct vm_map *map, const char *name, size_t size) KASSERT(VM_MAP_IS_KERNEL(map)); KASSERT(size < (vm_map_max(map) - vm_map_min(map)) / 2); /* sanity */ + vmk = vm_map_to_kernel(map); pp = &vmk->vmk_vacache; pa = &vmk->vmk_vacache_allocator; @@ -265,16 +264,8 @@ void uvm_km_va_drain(struct vm_map *map, uvm_flag_t flags) { struct vm_map_kernel *vmk = vm_map_to_kernel(map); - const bool intrsafe = (map->flags & VM_MAP_INTRSAFE) != 0; - int s = 0xdeadbeaf; /* XXX: gcc */ - if (intrsafe) { - s = splvm(); - } callback_run_roundrobin(&vmk->vmk_reclaim_callback, NULL); - if (intrsafe) { - splx(s); - } } /* @@ -420,7 +411,7 @@ uvm_km_pgremove(vaddr_t startva, vaddr_t endva) KASSERT(startva < endva); KASSERT(endva <= VM_MAX_KERNEL_ADDRESS); - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); for (curoff = start; curoff < end; curoff = nextoff) { nextoff = curoff + PAGE_SIZE; @@ -429,7 +420,7 @@ uvm_km_pgremove(vaddr_t startva, vaddr_t endva) pg->flags |= PG_WANTED; UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0, "km_pgrm", 0); - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); nextoff = curoff; continue; } @@ -444,12 +435,12 @@ uvm_km_pgremove(vaddr_t startva, vaddr_t endva) } uao_dropswap(uobj, curoff >> PAGE_SHIFT); if (pg != NULL) { - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); uvm_pagefree(pg); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); } } - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); if (swpgonlydelta > 0) { mutex_enter(&uvm_swap_data_lock); @@ -511,10 +502,10 @@ uvm_km_check_empty(vaddr_t start, vaddr_t end, bool intrsafe) if (!intrsafe) { const struct vm_page *pg; - simple_lock(&uvm_kernel_object->vmobjlock); + mutex_enter(&uvm_kernel_object->vmobjlock); pg = uvm_pagelookup(uvm_kernel_object, va - vm_map_min(kernel_map)); - simple_unlock(&uvm_kernel_object->vmobjlock); + mutex_exit(&uvm_kernel_object->vmobjlock); if (pg) { panic("uvm_km_check_empty: " "has page hashed at %p", (const void *)va); @@ -693,17 +684,11 @@ uvm_km_alloc_poolpage_cache(struct vm_map *map, bool waitok) struct vm_page *pg; struct pool *pp = &vm_map_to_kernel(map)->vmk_vacache; vaddr_t va; - int s = 0xdeadbeaf; /* XXX: gcc */ - const bool intrsafe = (map->flags & VM_MAP_INTRSAFE) != 0; if ((map->flags & VM_MAP_VACACHE) == 0) return uvm_km_alloc_poolpage(map, waitok); - if (intrsafe) - s = splvm(); va = (vaddr_t)pool_get(pp, waitok ? PR_WAITOK : PR_NOWAIT); - if (intrsafe) - splx(s); if (va == 0) return 0; KASSERT(!pmap_extract(pmap_kernel(), va, NULL)); @@ -714,11 +699,7 @@ again: uvm_wait("plpg"); goto again; } else { - if (intrsafe) - s = splvm(); pool_put(pp, (void *)va); - if (intrsafe) - splx(s); return 0; } } @@ -751,15 +732,9 @@ uvm_km_alloc_poolpage(struct vm_map *map, bool waitok) return (va); #else vaddr_t va; - int s = 0xdeadbeaf; /* XXX: gcc */ - const bool intrsafe = (map->flags & VM_MAP_INTRSAFE) != 0; - if (intrsafe) - s = splvm(); va = uvm_km_alloc(map, PAGE_SIZE, 0, (waitok ? 0 : UVM_KMF_NOWAIT | UVM_KMF_TRYLOCK) | UVM_KMF_WIRED); - if (intrsafe) - splx(s); return (va); #endif /* PMAP_MAP_POOLPAGE */ } @@ -778,8 +753,6 @@ uvm_km_free_poolpage_cache(struct vm_map *map, vaddr_t addr) uvm_km_free_poolpage(map, addr); #else struct pool *pp; - int s = 0xdeadbeaf; /* XXX: gcc */ - const bool intrsafe = (map->flags & VM_MAP_INTRSAFE) != 0; if ((map->flags & VM_MAP_VACACHE) == 0) { uvm_km_free_poolpage(map, addr); @@ -794,11 +767,7 @@ uvm_km_free_poolpage_cache(struct vm_map *map, vaddr_t addr) #endif KASSERT(!pmap_extract(pmap_kernel(), addr, NULL)); pp = &vm_map_to_kernel(map)->vmk_vacache; - if (intrsafe) - s = splvm(); pool_put(pp, (void *)addr); - if (intrsafe) - splx(s); #endif } @@ -812,13 +781,6 @@ uvm_km_free_poolpage(struct vm_map *map, vaddr_t addr) pa = PMAP_UNMAP_POOLPAGE(addr); uvm_pagefree(PHYS_TO_VM_PAGE(pa)); #else - int s = 0xdeadbeaf; /* XXX: gcc */ - const bool intrsafe = (map->flags & VM_MAP_INTRSAFE) != 0; - - if (intrsafe) - s = splvm(); uvm_km_free(map, addr, PAGE_SIZE, UVM_KMF_WIRED); - if (intrsafe) - splx(s); #endif /* PMAP_UNMAP_POOLPAGE */ } diff --git a/sys/uvm/uvm_loan.c b/sys/uvm/uvm_loan.c index 30a2733f76f2..1127a9c5a6fc 100644 --- a/sys/uvm/uvm_loan.c +++ b/sys/uvm/uvm_loan.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_loan.c,v 1.69 2007/12/01 10:40:28 yamt Exp $ */ +/* $NetBSD: uvm_loan.c,v 1.70 2008/01/02 11:49:17 ad Exp $ */ /* * @@ -39,7 +39,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.69 2007/12/01 10:40:28 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.70 2008/01/02 11:49:17 ad Exp $"); #include #include @@ -185,6 +185,8 @@ uvm_loanentry(struct uvm_faultinfo *ufi, void ***output, int flags) /* locked: if (rv > 0) => map, amap, uobj [o.w. unlocked] */ KASSERT(rv > 0 || aref->ar_amap == NULL || !mutex_owned(&aref->ar_amap->am_l)); + KASSERT(rv > 0 || uobj == NULL || + !mutex_owned(&uobj->vmobjlock)); /* total failure */ if (rv < 0) { @@ -357,7 +359,7 @@ uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags, */ if (flags & UVM_LOAN_TOANON) { - simple_lock(&anon->an_lock); + mutex_enter(&anon->an_lock); pg = anon->an_page; if (pg && (pg->pqflags & PQ_ANON) != 0 && anon->an_ref == 1) { if (pg->wire_count > 0) { @@ -372,7 +374,7 @@ uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags, anon->an_ref++; **output = anon; (*output)++; - simple_unlock(&anon->an_lock); + mutex_exit(&anon->an_lock); UVMHIST_LOG(loanhist, "->A done", 0,0,0,0); return (1); } @@ -383,7 +385,7 @@ uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags, * this for us. */ - simple_lock(&anon->an_lock); + mutex_enter(&anon->an_lock); error = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon); /* @@ -414,9 +416,9 @@ uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags, */ pg = anon->an_page; - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); if (pg->wire_count > 0) { - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); UVMHIST_LOG(loanhist, "->K wired %p", pg,0,0,0); KASSERT(pg->uobject == NULL); uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, @@ -428,14 +430,14 @@ uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags, } pg->loan_count++; uvm_pageactivate(pg); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); **output = pg; (*output)++; /* unlock anon and return success */ if (pg->uobject) - simple_unlock(&pg->uobject->vmobjlock); - simple_unlock(&anon->an_lock); + mutex_exit(&pg->uobject->vmobjlock); + mutex_exit(&anon->an_lock); UVMHIST_LOG(loanhist, "->K done", 0,0,0,0); return (1); } @@ -463,12 +465,12 @@ uvm_loanpage(struct vm_page **pgpp, int npages) KASSERT(pg->uobject != NULL); KASSERT(pg->uobject == pgpp[0]->uobject); KASSERT(!(pg->flags & (PG_RELEASED|PG_PAGEOUT))); - LOCK_ASSERT(simple_lock_held(&pg->uobject->vmobjlock)); + KASSERT(mutex_owned(&pg->uobject->vmobjlock)); KASSERT(pg->flags & PG_BUSY); - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); if (pg->wire_count > 0) { - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0); error = EBUSY; break; @@ -478,7 +480,7 @@ uvm_loanpage(struct vm_page **pgpp, int npages) } pg->loan_count++; uvm_pageactivate(pg); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); } uvm_page_unbusy(pgpp, npages); @@ -487,11 +489,11 @@ uvm_loanpage(struct vm_page **pgpp, int npages) /* * backout what we've done */ - struct simplelock *slock = &pgpp[0]->uobject->vmobjlock; + kmutex_t *slock = &pgpp[0]->uobject->vmobjlock; - simple_unlock(slock); + mutex_exit(slock); uvm_unloan(pgpp, i, UVM_LOAN_TOPAGE); - simple_lock(slock); + mutex_enter(slock); } UVMHIST_LOG(loanhist, "done %d", error,0,0,0); @@ -519,7 +521,7 @@ uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages, struct vm_page **pgpp; int error; int i; - struct simplelock *slock; + kmutex_t *slock; pgpp = origpgpp; for (ndone = 0; ndone < orignpages; ) { @@ -528,7 +530,7 @@ uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages, int npendloan = 0xdead; /* XXX gcc */ reget: npages = MIN(UVM_LOAN_GET_CHUNK, orignpages - ndone); - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); error = (*uobj->pgops->pgo_get)(uobj, pgoff + (ndone << PAGE_SHIFT), pgpp, &npages, 0, VM_PROT_READ, 0, PGO_SYNCIO); @@ -544,7 +546,7 @@ reget: /* loan and unbusy pages */ slock = NULL; for (i = 0; i < npages; i++) { - struct simplelock *nextslock; /* slock for next page */ + kmutex_t *nextslock; /* slock for next page */ struct vm_page *pg = *pgpp; /* XXX assuming that the page is owned by uobj */ @@ -556,7 +558,7 @@ reget: KASSERT(npendloan > 0); error = uvm_loanpage(pgpp - npendloan, npendloan); - simple_unlock(slock); + mutex_exit(slock); if (error) goto fail; ndone += npendloan; @@ -564,23 +566,23 @@ reget: } slock = nextslock; npendloan = 0; - simple_lock(slock); + mutex_enter(slock); } if ((pg->flags & PG_RELEASED) != 0) { /* * release pages and try again. */ - simple_unlock(slock); + mutex_exit(slock); for (; i < npages; i++) { pg = pgpp[i]; slock = &pg->uobject->vmobjlock; - simple_lock(slock); - uvm_lock_pageq(); + mutex_enter(slock); + mutex_enter(&uvm_pageqlock); uvm_page_unbusy(&pg, 1); - uvm_unlock_pageq(); - simple_unlock(slock); + mutex_exit(&uvm_pageqlock); + mutex_exit(slock); } goto reget; } @@ -592,7 +594,7 @@ reget: KASSERT(slock != NULL); KASSERT(npendloan > 0); error = uvm_loanpage(pgpp - npendloan, npendloan); - simple_unlock(slock); + mutex_exit(slock); if (error) goto fail; ndone += npendloan; @@ -636,7 +638,7 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va) * XXXCDC: duplicate code with uvm_fault(). */ - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); if (uobj->pgops->pgo_get) { /* try locked pgo_get */ npages = 1; pg = NULL; @@ -687,7 +689,7 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va) if (locked && amap) amap_lock(amap); uobj = pg->uobject; - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); /* * verify that the page has not be released and re-verify @@ -712,18 +714,18 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va) wakeup(pg); } if (pg->flags & PG_RELEASED) { - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); uvm_pagefree(pg); - uvm_unlock_pageq(); - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uvm_pageqlock); + mutex_exit(&uobj->vmobjlock); return (0); } - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); uvm_pageactivate(pg); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); pg->flags &= ~(PG_BUSY|PG_WANTED); UVM_PAGE_OWN(pg, NULL); - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); return (0); } } @@ -741,7 +743,7 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va) uvmfault_unlockall(ufi, amap, uobj, NULL); return (-1); } - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); **output = pg; (*output)++; return (1); @@ -756,15 +758,15 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va) if (pg->uanon) { anon = pg->uanon; - simple_lock(&anon->an_lock); + mutex_enter(&anon->an_lock); anon->an_ref++; - simple_unlock(&anon->an_lock); + mutex_exit(&anon->an_lock); if (pg->flags & PG_WANTED) { wakeup(pg); } pg->flags &= ~(PG_WANTED|PG_BUSY); UVM_PAGE_OWN(pg, NULL); - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); **output = anon; (*output)++; return (1); @@ -780,14 +782,14 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va) } anon->an_page = pg; pg->uanon = anon; - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); if (pg->wire_count > 0) { - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0); pg->uanon = NULL; anon->an_page = NULL; anon->an_ref--; - simple_unlock(&anon->an_lock); + mutex_exit(&anon->an_lock); uvm_anfree(anon); goto fail; } @@ -796,14 +798,14 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va) } pg->loan_count++; uvm_pageactivate(pg); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); if (pg->flags & PG_WANTED) { wakeup(pg); } pg->flags &= ~(PG_WANTED|PG_BUSY); UVM_PAGE_OWN(pg, NULL); - simple_unlock(&uobj->vmobjlock); - simple_unlock(&anon->an_lock); + mutex_exit(&uobj->vmobjlock); + mutex_exit(&anon->an_lock); **output = anon; (*output)++; return (1); @@ -844,7 +846,7 @@ uvm_loanzero(struct uvm_faultinfo *ufi, void ***output, int flags) UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); again: - simple_lock(&uvm_loanzero_object.vmobjlock); + mutex_enter(&uvm_loanzero_object.vmobjlock); /* * first, get ahold of our single zero page. @@ -854,7 +856,7 @@ again: TAILQ_FIRST(&uvm_loanzero_object.memq)) == NULL)) { while ((pg = uvm_pagealloc(&uvm_loanzero_object, 0, NULL, UVM_PGA_ZERO)) == NULL) { - simple_unlock(&uvm_loanzero_object.vmobjlock); + mutex_exit(&uvm_loanzero_object.vmobjlock); uvmfault_unlockall(ufi, amap, NULL, NULL); uvm_wait("loanzero"); if (!uvmfault_relock(ufi)) { @@ -869,17 +871,17 @@ again: /* got a zero'd page. */ pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE); pg->flags |= PG_RDONLY; - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); uvm_pageactivate(pg); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); UVM_PAGE_OWN(pg, NULL); } if ((flags & UVM_LOAN_TOANON) == 0) { /* loaning to kernel-page */ - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); pg->loan_count++; - uvm_unlock_pageq(); - simple_unlock(&uvm_loanzero_object.vmobjlock); + mutex_exit(&uvm_pageqlock); + mutex_exit(&uvm_loanzero_object.vmobjlock); **output = pg; (*output)++; return (1); @@ -893,10 +895,10 @@ again: if (pg->uanon) { anon = pg->uanon; - simple_lock(&anon->an_lock); + mutex_enter(&anon->an_lock); anon->an_ref++; - simple_unlock(&anon->an_lock); - simple_unlock(&uvm_loanzero_object.vmobjlock); + mutex_exit(&anon->an_lock); + mutex_exit(&uvm_loanzero_object.vmobjlock); **output = anon; (*output)++; return (1); @@ -909,18 +911,18 @@ again: anon = uvm_analloc(); if (anon == NULL) { /* out of swap causes us to fail */ - simple_unlock(&uvm_loanzero_object.vmobjlock); + mutex_exit(&uvm_loanzero_object.vmobjlock); uvmfault_unlockall(ufi, amap, NULL, NULL); return (-1); } anon->an_page = pg; pg->uanon = anon; - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); pg->loan_count++; uvm_pageactivate(pg); - uvm_unlock_pageq(); - simple_unlock(&anon->an_lock); - simple_unlock(&uvm_loanzero_object.vmobjlock); + mutex_exit(&uvm_pageqlock); + mutex_exit(&anon->an_lock); + mutex_exit(&uvm_loanzero_object.vmobjlock); **output = anon; (*output)++; return (1); @@ -942,9 +944,9 @@ uvm_unloananon(struct vm_anon **aloans, int nanons) int refs; anon = *aloans++; - simple_lock(&anon->an_lock); + mutex_enter(&anon->an_lock); refs = --anon->an_ref; - simple_unlock(&anon->an_lock); + mutex_exit(&anon->an_lock); if (refs == 0) { uvm_anfree(anon); @@ -962,9 +964,9 @@ static void uvm_unloanpage(struct vm_page **ploans, int npages) { struct vm_page *pg; - struct simplelock *slock; + kmutex_t *slock; - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); while (npages-- > 0) { pg = *ploans++; @@ -981,11 +983,13 @@ uvm_unloanpage(struct vm_page **ploans, int npages) } else { slock = &pg->uanon->an_lock; } - if (simple_lock_try(slock)) { + if (mutex_tryenter(slock)) { break; } - uvm_unlock_pageq(); - uvm_lock_pageq(); + mutex_exit(&uvm_pageqlock); + /* XXX Better than yielding but inadequate. */ + kpause("livelock", false, 1, NULL); + mutex_enter(&uvm_pageqlock); slock = NULL; } @@ -1014,10 +1018,10 @@ uvm_unloanpage(struct vm_page **ploans, int npages) uvm_pagefree(pg); } if (slock != NULL) { - simple_unlock(slock); + mutex_exit(slock); } } - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); } /* @@ -1053,7 +1057,7 @@ ulz_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) */ if ((flags & PGO_FREE) == 0) { - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); return 0; } @@ -1066,14 +1070,14 @@ ulz_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) KASSERT(pg != NULL); KASSERT(TAILQ_NEXT(pg, listq) == NULL); - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); if (pg->uanon) uvm_pageactivate(pg); else uvm_pagedequeue(pg); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); return 0; } @@ -1089,7 +1093,7 @@ void uvm_loan_init(void) { - simple_lock_init(&uvm_loanzero_object.vmobjlock); + mutex_init(&uvm_loanzero_object.vmobjlock, MUTEX_DEFAULT, IPL_NONE); TAILQ_INIT(&uvm_loanzero_object.memq); uvm_loanzero_object.pgops = &ulz_pager; @@ -1113,7 +1117,7 @@ uvm_loanbreak(struct vm_page *uobjpage) #endif KASSERT(uobj != NULL); - LOCK_ASSERT(simple_lock_held(&uobj->vmobjlock)); + KASSERT(mutex_owned(&uobj->vmobjlock)); KASSERT(uobjpage->flags & PG_BUSY); /* alloc new un-owned page */ @@ -1148,7 +1152,7 @@ uvm_loanbreak(struct vm_page *uobjpage) uobjpage->flags &= ~(PG_WANTED|PG_BUSY); UVM_PAGE_OWN(uobjpage, NULL); - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); /* * replace uobjpage with new page. @@ -1171,7 +1175,7 @@ uvm_loanbreak(struct vm_page *uobjpage) /* install new page */ uvm_pageactivate(pg); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); /* * done! loan is broken and "pg" is diff --git a/sys/uvm/uvm_map.c b/sys/uvm/uvm_map.c index 2d7e6e761eb8..d6f2a1227f2d 100644 --- a/sys/uvm/uvm_map.c +++ b/sys/uvm/uvm_map.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_map.c,v 1.247 2007/12/13 02:45:11 yamt Exp $ */ +/* $NetBSD: uvm_map.c,v 1.248 2008/01/02 11:49:18 ad Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -71,7 +71,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.247 2007/12/13 02:45:11 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.248 2008/01/02 11:49:18 ad Exp $"); #include "opt_ddb.h" #include "opt_uvmhist.h" @@ -88,6 +88,7 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.247 2007/12/13 02:45:11 yamt Exp $"); #include #include #include +#include #ifdef SYSVSHM #include @@ -139,18 +140,16 @@ UVMMAP_EVCNT_DEFINE(ukh_free) const char vmmapbsy[] = "vmmapbsy"; /* - * pool for vmspace structures. + * cache for vmspace structures. */ -POOL_INIT(uvm_vmspace_pool, sizeof(struct vmspace), 0, 0, 0, "vmsppl", - &pool_allocator_nointr, IPL_NONE); +static struct pool_cache uvm_vmspace_cache; /* - * pool for dynamically-allocated map entries. + * cache for dynamically-allocated map entries. */ -POOL_INIT(uvm_map_entry_pool, sizeof(struct vm_map_entry), 0, 0, 0, "vmmpepl", - &pool_allocator_nointr, IPL_NONE); +static struct pool_cache uvm_map_entry_cache; MALLOC_DEFINE(M_VMMAP, "VM map", "VM map structures"); MALLOC_DEFINE(M_VMPMAP, "VM pmap", "VM pmap"); @@ -229,13 +228,10 @@ extern struct vm_map *pager_map; /* XXX */ /* * SAVE_HINT: saves the specified entry as the hint for future lookups. * - * => map need not be locked (protected by hint_lock). + * => map need not be locked. */ -#define SAVE_HINT(map,check,value) do { \ - mutex_enter(&(map)->hint_lock); \ - if ((map)->hint == (check)) \ - (map)->hint = (value); \ - mutex_exit(&(map)->hint_lock); \ +#define SAVE_HINT(map, check, value) do { \ + atomic_cas_ptr(&(map)->hint, (check), (value)); \ } while (/*CONSTCOND*/ 0) /* @@ -538,7 +534,8 @@ vm_map_lock(struct vm_map *map) KASSERT(map->busy != curlwp); mutex_enter(&map->misc_lock); rw_exit(&map->lock); - cv_wait(&map->cv, &map->misc_lock); + if (map->busy != NULL) + cv_wait(&map->cv, &map->misc_lock); mutex_exit(&map->misc_lock); } @@ -578,6 +575,7 @@ vm_map_unlock(struct vm_map *map) mutex_spin_exit(&map->mutex); else { KASSERT(rw_write_held(&map->lock)); + KASSERT(map->busy == NULL); rw_exit(&map->lock); } } @@ -621,12 +619,80 @@ vm_map_unbusy(struct vm_map *map) * o writers are blocked out with a read or write hold * o at any time, only one thread owns the set of values */ - map->busy = NULL; mutex_enter(&map->misc_lock); + map->busy = NULL; cv_broadcast(&map->cv); mutex_exit(&map->misc_lock); } +/* + * vm_map_lock_read: acquire a shared (read) lock on a map. + */ + +void +vm_map_lock_read(struct vm_map *map) +{ + + KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); + + rw_enter(&map->lock, RW_READER); +} + +/* + * vm_map_unlock_read: release a shared lock on a map. + */ + +void +vm_map_unlock_read(struct vm_map *map) +{ + + KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); + + rw_exit(&map->lock); +} + +/* + * vm_map_downgrade: downgrade an exclusive lock to a shared lock. + */ + +void +vm_map_downgrade(struct vm_map *map) +{ + + rw_downgrade(&map->lock); +} + +/* + * vm_map_busy: mark a map as busy. + * + * => the caller must hold the map write locked + */ + +void +vm_map_busy(struct vm_map *map) +{ + + KASSERT(rw_write_held(&map->lock)); + KASSERT(map->busy == NULL); + + map->busy = curlwp; +} + +/* + * vm_map_locked_p: return true if the map is write locked. + */ + +bool +vm_map_locked_p(struct vm_map *map) +{ + + if ((map->flags & VM_MAP_INTRSAFE) != 0) { + return mutex_owned(&map->mutex); + } else { + return rw_write_held(&map->lock); + } +} + /* * uvm_mapent_alloc: allocate a map entry */ @@ -641,7 +707,7 @@ uvm_mapent_alloc(struct vm_map *map, int flags) if (VM_MAP_USE_KMAPENT(map)) { me = uvm_kmapent_alloc(map, flags); } else { - me = pool_get(&uvm_map_entry_pool, pflags); + me = pool_cache_get(&uvm_map_entry_cache, pflags); if (__predict_false(me == NULL)) return NULL; me->flags = 0; @@ -654,6 +720,8 @@ uvm_mapent_alloc(struct vm_map *map, int flags) /* * uvm_mapent_alloc_split: allocate a map entry for clipping. + * + * => map must be locked by caller if UVM_MAP_QUANTUM is set. */ static struct vm_map_entry * @@ -669,11 +737,10 @@ uvm_mapent_alloc_split(struct vm_map *map, if (old_entry->flags & UVM_MAP_QUANTUM) { struct vm_map_kernel *vmk = vm_map_to_kernel(map); - mutex_spin_enter(&uvm_kentry_lock); + KASSERT(vm_map_locked_p(map)); me = vmk->vmk_merged_entries; KASSERT(me); vmk->vmk_merged_entries = me->next; - mutex_spin_exit(&uvm_kentry_lock); KASSERT(me->flags & UVM_MAP_QUANTUM); } else { me = uvm_mapent_alloc(map, flags); @@ -696,7 +763,7 @@ uvm_mapent_free(struct vm_map_entry *me) if (me->flags & UVM_MAP_KERNEL) { uvm_kmapent_free(me); } else { - pool_put(&uvm_map_entry_pool, me); + pool_cache_put(&uvm_map_entry_cache, me); } } @@ -705,6 +772,7 @@ uvm_mapent_free(struct vm_map_entry *me) * * => keep the entry if needed. * => caller shouldn't hold map locked if VM_MAP_USE_KMAPENT(map) is true. + * => map should be locked if UVM_MAP_QUANTUM is set. */ static void @@ -719,15 +787,14 @@ uvm_mapent_free_merged(struct vm_map *map, struct vm_map_entry *me) */ struct vm_map_kernel *vmk; + KASSERT(vm_map_locked_p(map)); KASSERT(VM_MAP_IS_KERNEL(map)); KASSERT(!VM_MAP_USE_KMAPENT(map) || (me->flags & UVM_MAP_KERNEL)); vmk = vm_map_to_kernel(map); - mutex_spin_enter(&uvm_kentry_lock); me->next = vmk->vmk_merged_entries; vmk->vmk_merged_entries = me; - mutex_spin_exit(&uvm_kentry_lock); } else { uvm_mapent_free(me); } @@ -837,8 +904,7 @@ uvm_map_unreference_amap(struct vm_map_entry *entry, int flags) /* - * uvm_map_init: init mapping system at boot time. note that we allocate - * and init the static pool of struct vm_map_entry *'s for the kernel here. + * uvm_map_init: init mapping system at boot time. */ void @@ -864,6 +930,15 @@ uvm_map_init(void) */ mutex_init(&uvm_kentry_lock, MUTEX_DRIVER, IPL_VM); + + /* + * initialize caches. + */ + + pool_cache_bootstrap(&uvm_map_entry_cache, sizeof(struct vm_map_entry), + 0, 0, 0, "vmmpepl", NULL, IPL_NONE, NULL, NULL, NULL); + pool_cache_bootstrap(&uvm_vmspace_cache, sizeof(struct vmspace), + 0, 0, 0, "vmsppl", NULL, IPL_NONE, NULL, NULL, NULL); } /* @@ -1103,7 +1178,8 @@ uvm_map_prepare(struct vm_map *map, vaddr_t start, vsize_t size, retry: if (vm_map_lock_try(map) == false) { - if (flags & UVM_FLAG_TRYLOCK) { + if ((flags & UVM_FLAG_TRYLOCK) != 0 && + (map->flags & VM_MAP_INTRSAFE) == 0) { return EAGAIN; } vm_map_lock(map); /* could sleep here */ @@ -1505,19 +1581,27 @@ nomerge: error = 0; done: - vm_map_unlock(map); - if (new_entry) { - if (error == 0) { - KDASSERT(merged); - uvm_mapent_free_merged(map, new_entry); - } else { - uvm_mapent_free(new_entry); - } + if ((flags & UVM_FLAG_QUANTUM) == 0) { + /* + * vmk_merged_entries is locked by the map's lock. + */ + vm_map_unlock(map); + } + if (new_entry && error == 0) { + KDASSERT(merged); + uvm_mapent_free_merged(map, new_entry); + new_entry = NULL; } if (dead) { KDASSERT(merged); uvm_mapent_free_merged(map, dead); } + if ((flags & UVM_FLAG_QUANTUM) != 0) { + vm_map_unlock(map); + } + if (new_entry != NULL) { + uvm_mapent_free(new_entry); + } return error; } @@ -1572,9 +1656,7 @@ uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, * list, or from the hint. */ - mutex_enter(&map->hint_lock); cur = map->hint; - mutex_exit(&map->hint_lock); if (cur == &map->header) cur = cur->next; @@ -3007,9 +3089,9 @@ uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, if (UVM_OBJ_IS_VNODE(uobj) && (current->protection & VM_PROT_EXECUTE)) { - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); vn_markexec((struct vnode *) uobj); - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); } } } @@ -3724,10 +3806,10 @@ uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) if (anon == NULL) continue; - simple_lock(&anon->an_lock); + mutex_enter(&anon->an_lock); pg = anon->an_page; if (pg == NULL) { - simple_unlock(&anon->an_lock); + mutex_exit(&anon->an_lock); continue; } @@ -3747,18 +3829,18 @@ uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) * at all in these cases. */ - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); if (pg->loan_count != 0 || pg->wire_count != 0) { - uvm_unlock_pageq(); - simple_unlock(&anon->an_lock); + mutex_exit(&uvm_pageqlock); + mutex_exit(&anon->an_lock); continue; } KASSERT(pg->uanon == anon); pmap_clear_reference(pg); uvm_pagedeactivate(pg); - uvm_unlock_pageq(); - simple_unlock(&anon->an_lock); + mutex_exit(&uvm_pageqlock); + mutex_exit(&anon->an_lock); continue; case PGO_FREE: @@ -3773,12 +3855,12 @@ uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) /* skip the page if it's wired */ if (pg->wire_count != 0) { - simple_unlock(&anon->an_lock); + mutex_exit(&anon->an_lock); continue; } amap_unadd(¤t->aref, offset); refs = --anon->an_ref; - simple_unlock(&anon->an_lock); + mutex_exit(&anon->an_lock); if (refs == 0) uvm_anfree(anon); continue; @@ -3797,7 +3879,7 @@ uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) uoff = current->offset + (start - current->start); size = MIN(end, current->end) - start; if (uobj != NULL) { - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); if (uobj->pgops->pgo_put != NULL) error = (uobj->pgops->pgo_put)(uobj, uoff, uoff + size, flags | PGO_CLEANIT); @@ -3868,7 +3950,7 @@ uvmspace_alloc(vaddr_t vmin, vaddr_t vmax) struct vmspace *vm; UVMHIST_FUNC("uvmspace_alloc"); UVMHIST_CALLED(maphist); - vm = pool_get(&uvm_vmspace_pool, PR_WAITOK); + vm = pool_cache_get(&uvm_vmspace_cache, PR_WAITOK); uvmspace_init(vm, NULL, vmin, vmax); UVMHIST_LOG(maphist,"<- done (vm=0x%x)", vm,0,0,0); return (vm); @@ -4078,11 +4160,10 @@ uvmspace_free(struct vmspace *vm) KASSERT(map->nentries == 0); KASSERT(map->size == 0); mutex_destroy(&map->misc_lock); - mutex_destroy(&map->hint_lock); mutex_destroy(&map->mutex); rw_destroy(&map->lock); pmap_destroy(map->pmap); - pool_put(&uvm_vmspace_pool, vm); + pool_cache_put(&uvm_vmspace_cache, vm); } /* @@ -4806,7 +4887,7 @@ uvm_object_printit(struct uvm_object *uobj, bool full, int cnt = 0; (*pr)("OBJECT %p: locked=%d, pgops=%p, npages=%d, ", - uobj, uobj->vmobjlock.lock_data, uobj->pgops, uobj->uo_npages); + uobj, mutex_owned(&uobj->vmobjlock), uobj->pgops, uobj->uo_npages); if (UVM_OBJ_IS_KERN_OBJECT(uobj)) (*pr)("refs=\n"); else @@ -4995,12 +5076,6 @@ uvm_map_setup(struct vm_map *map, vaddr_t vmin, vaddr_t vmax, int flags) cv_init(&map->cv, "vm_map"); mutex_init(&map->misc_lock, MUTEX_DRIVER, ipl); mutex_init(&map->mutex, MUTEX_DRIVER, ipl); - - /* - * The hint lock can get acquired with the pagequeue - * lock held, so must be at IPL_VM. - */ - mutex_init(&map->hint_lock, MUTEX_DRIVER, IPL_VM); } diff --git a/sys/uvm/uvm_map.h b/sys/uvm/uvm_map.h index ab68df60055a..442025c520f1 100644 --- a/sys/uvm/uvm_map.h +++ b/sys/uvm/uvm_map.h @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_map.h,v 1.58 2007/07/22 21:07:47 he Exp $ */ +/* $NetBSD: uvm_map.h,v 1.59 2008/01/02 11:49:18 ad Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -214,7 +214,6 @@ struct vm_map { struct lwp * busy; /* LWP holding map busy */ kmutex_t mutex; /* INTRSAFE lock */ kmutex_t misc_lock; /* Lock for ref_count, cv */ - kmutex_t hint_lock; /* lock for hint storage */ kcondvar_t cv; /* For signalling */ int flags; /* flags */ RB_HEAD(uvm_tree, vm_map_entry) rbhead; /* Tree for entries */ @@ -362,58 +361,11 @@ void vm_map_lock(struct vm_map *); void vm_map_unlock(struct vm_map *); void vm_map_upgrade(struct vm_map *); void vm_map_unbusy(struct vm_map *); - -/* - * vm_map_lock_read: acquire a shared (read) lock on a map. - */ - -static inline void -vm_map_lock_read(struct vm_map *map) -{ - - KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); - - rw_enter(&map->lock, RW_READER); -} - -/* - * vm_map_unlock_read: release a shared lock on a map. - */ - -static inline void -vm_map_unlock_read(struct vm_map *map) -{ - - KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); - - rw_exit(&map->lock); -} -/* - * vm_map_downgrade: downgrade an exclusive lock to a shared lock. - */ - -static inline void -vm_map_downgrade(struct vm_map *map) -{ - - rw_downgrade(&map->lock); -} - -/* - * vm_map_busy: mark a map as busy. - * - * => the caller must hold the map write locked - */ - -static inline void -vm_map_busy(struct vm_map *map) -{ - - KASSERT(rw_write_held(&map->lock)); - KASSERT(map->busy == NULL); - - map->busy = curlwp; -} +void vm_map_lock_read(struct vm_map *); +void vm_map_unlock_read(struct vm_map *); +void vm_map_downgrade(struct vm_map *); +void vm_map_busy(struct vm_map *); +bool vm_map_locked_p(struct vm_map *); #endif /* _KERNEL */ diff --git a/sys/uvm/uvm_mmap.c b/sys/uvm/uvm_mmap.c index 700528500bf7..04e95063fa2a 100644 --- a/sys/uvm/uvm_mmap.c +++ b/sys/uvm/uvm_mmap.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_mmap.c,v 1.120 2007/12/26 22:11:53 christos Exp $ */ +/* $NetBSD: uvm_mmap.c,v 1.121 2008/01/02 11:49:18 ad Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -51,7 +51,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uvm_mmap.c,v 1.120 2007/12/26 22:11:53 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_mmap.c,v 1.121 2008/01/02 11:49:18 ad Exp $"); #include "opt_compat_netbsd.h" #include "opt_pax.h" @@ -231,7 +231,7 @@ sys_mincore(struct lwp *l, const struct sys_mincore_args *uap, register_t *retva if (amap != NULL) amap_lock(amap); if (uobj != NULL) - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) { pgi = 0; @@ -267,7 +267,7 @@ sys_mincore(struct lwp *l, const struct sys_mincore_args *uap, register_t *retva (void) subyte(vec, pgi); } if (uobj != NULL) - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); if (amap != NULL) amap_unlock(amap); } @@ -1167,9 +1167,9 @@ uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff, locklimit) * then mark it as text. */ if (prot & PROT_EXEC) { - simple_lock(&uobj->vmobjlock); + mutex_enter(&vp->v_interlock); vn_markexec(vp); - simple_unlock(&uobj->vmobjlock); + mutex_exit(&vp->v_interlock); } } else { int i = maxprot; @@ -1199,22 +1199,22 @@ uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff, locklimit) * with direct I/O. */ - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); needwritemap = (vp->v_iflag & VI_WRMAP) == 0 && (flags & MAP_SHARED) != 0 && (maxprot & VM_PROT_WRITE) != 0; if ((vp->v_iflag & VI_MAPPED) == 0 || needwritemap) { vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK); - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); vp->v_iflag |= VI_MAPPED; vp->v_vflag |= VV_MAPPED; if (needwritemap) { vp->v_iflag |= VI_WRMAP; } - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); VOP_UNLOCK(vp, 0); } else - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); } uvmflag = UVM_MAPFLAG(prot, maxprot, diff --git a/sys/uvm/uvm_mremap.c b/sys/uvm/uvm_mremap.c index ac91ef6a77e0..946532a7e5bf 100644 --- a/sys/uvm/uvm_mremap.c +++ b/sys/uvm/uvm_mremap.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_mremap.c,v 1.9 2007/12/20 23:03:15 dsl Exp $ */ +/* $NetBSD: uvm_mremap.c,v 1.10 2008/01/02 11:49:18 ad Exp $ */ /*- * Copyright (c)2006 YAMAMOTO Takashi, @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uvm_mremap.c,v 1.9 2007/12/20 23:03:15 dsl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_mremap.c,v 1.10 2008/01/02 11:49:18 ad Exp $"); #include #include @@ -78,10 +78,10 @@ uvm_mapent_extend(struct vm_map *map, vaddr_t endva, vsize_t size) error = E2BIG; /* XXX */ goto done; } - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); KASSERT(uobj->uo_refs > 0); uobj->uo_refs++; - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); reserved_entry->object.uvm_obj = uobj; reserved_entry->offset = newoffset; } diff --git a/sys/uvm/uvm_object.c b/sys/uvm/uvm_object.c index 041fdb812b86..e7c65e6592ee 100644 --- a/sys/uvm/uvm_object.c +++ b/sys/uvm/uvm_object.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_object.c,v 1.3 2007/02/17 20:45:36 rmind Exp $ */ +/* $NetBSD: uvm_object.c,v 1.4 2008/01/02 11:49:18 ad Exp $ */ /* * Copyright (c) 2006 The NetBSD Foundation, Inc. @@ -45,7 +45,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uvm_object.c,v 1.3 2007/02/17 20:45:36 rmind Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_object.c,v 1.4 2008/01/02 11:49:18 ad Exp $"); #include "opt_uvmhist.h" @@ -74,7 +74,7 @@ uobj_wirepages(struct uvm_object *uobj, off_t start, off_t end) left = (end - start) >> PAGE_SHIFT; - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); while (left) { npages = MIN(FETCH_PAGECOUNT, left); @@ -88,7 +88,7 @@ uobj_wirepages(struct uvm_object *uobj, off_t start, off_t end) if (error) goto error; - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); for (i = 0; i < npages; i++) { KASSERT(pgs[i] != NULL); @@ -101,9 +101,9 @@ uobj_wirepages(struct uvm_object *uobj, off_t start, off_t end) while (pgs[i]->loan_count) { pg = uvm_loanbreak(pgs[i]); if (!pg) { - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); uvm_wait("uobjwirepg"); - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); continue; } } @@ -117,11 +117,11 @@ uobj_wirepages(struct uvm_object *uobj, off_t start, off_t end) } /* Wire the pages */ - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); for (i = 0; i < npages; i++) { uvm_pagewire(pgs[i]); } - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); /* Unbusy the pages */ uvm_page_unbusy(pgs, npages); @@ -129,7 +129,7 @@ uobj_wirepages(struct uvm_object *uobj, off_t start, off_t end) left -= npages; offset += npages << PAGE_SHIFT; } - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); return 0; @@ -154,8 +154,8 @@ uobj_unwirepages(struct uvm_object *uobj, off_t start, off_t end) struct vm_page *pg; off_t offset; - simple_lock(&uobj->vmobjlock); - uvm_lock_pageq(); + mutex_enter(&uobj->vmobjlock); + mutex_enter(&uvm_pageqlock); for (offset = start; offset < end; offset += PAGE_SIZE) { pg = uvm_pagelookup(uobj, offset); @@ -164,6 +164,6 @@ uobj_unwirepages(struct uvm_object *uobj, off_t start, off_t end) uvm_pageunwire(pg); } - uvm_unlock_pageq(); - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uvm_pageqlock); + mutex_exit(&uobj->vmobjlock); } diff --git a/sys/uvm/uvm_object.h b/sys/uvm/uvm_object.h index 2dba7daf8f3c..64170daf2f79 100644 --- a/sys/uvm/uvm_object.h +++ b/sys/uvm/uvm_object.h @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_object.h,v 1.23 2007/12/01 10:40:28 yamt Exp $ */ +/* $NetBSD: uvm_object.h,v 1.24 2008/01/02 11:49:18 ad Exp $ */ /* * @@ -46,7 +46,7 @@ */ struct uvm_object { - struct simplelock vmobjlock; /* lock on memq */ + kmutex_t vmobjlock; /* lock on memq */ const struct uvm_pagerops *pgops; /* pager ops */ struct pglist memq; /* pages in this object */ int uo_npages; /* # of pages in memq */ @@ -104,13 +104,18 @@ extern const struct uvm_pagerops aobj_pager; #define UVM_OBJ_INIT(uobj, ops, refs) \ do { \ - simple_lock_init(&(uobj)->vmobjlock); \ + mutex_init(&(uobj)->vmobjlock, MUTEX_DEFAULT, IPL_NONE);\ (uobj)->pgops = (ops); \ TAILQ_INIT(&(uobj)->memq); \ (uobj)->uo_npages = 0; \ (uobj)->uo_refs = (refs); \ } while (/* CONSTCOND */ 0) +#define UVM_OBJ_DESTROY(uobj) \ + do { \ + mutex_destroy(&(uobj)->vmobjlock); \ + } while (/* CONSTCOND */ 0) + #endif /* _KERNEL */ #endif /* _UVM_UVM_OBJECT_H_ */ diff --git a/sys/uvm/uvm_page.c b/sys/uvm/uvm_page.c index 9e9edfcb79d2..a1e042c352f2 100644 --- a/sys/uvm/uvm_page.c +++ b/sys/uvm/uvm_page.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_page.c,v 1.126 2007/11/29 18:07:11 ad Exp $ */ +/* $NetBSD: uvm_page.c,v 1.127 2008/01/02 11:49:19 ad Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -71,7 +71,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.126 2007/11/29 18:07:11 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.127 2008/01/02 11:49:19 ad Exp $"); #include "opt_uvmhist.h" #include "opt_readahead.h" @@ -191,7 +191,7 @@ uvm_pageinsert_after(struct vm_page *pg, struct vm_page *where) kmutex_t *lock; u_int hash; - LOCK_ASSERT(simple_lock_held(&uobj->vmobjlock)); + KASSERT(mutex_owned(&uobj->vmobjlock)); KASSERT((pg->flags & PG_TABLED) == 0); KASSERT(where == NULL || (where->flags & PG_TABLED)); KASSERT(where == NULL || (where->uobject == uobj)); @@ -248,7 +248,7 @@ uvm_pageremove(struct vm_page *pg) kmutex_t *lock; u_int hash; - LOCK_ASSERT(simple_lock_held(&uobj->vmobjlock)); + KASSERT(mutex_owned(&uobj->vmobjlock)); KASSERT(pg->flags & PG_TABLED); hash = uvm_pagehash(uobj, pg->offset); @@ -315,7 +315,7 @@ uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp) */ uvmpdpol_init(); - simple_lock_init(&uvm.pageqlock); + mutex_init(&uvm_pageqlock, MUTEX_DRIVER, IPL_NONE); mutex_init(&uvm_fpageqlock, MUTEX_DRIVER, IPL_VM); /* @@ -444,12 +444,6 @@ uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp) *kvm_startp += PAGE_SIZE; #endif /* DEBUG */ - /* - * init locks for kernel threads - */ - - mutex_init(&uvm_pagedaemon_lock, MUTEX_DEFAULT, IPL_NONE); - /* * init various thresholds. */ @@ -1079,8 +1073,8 @@ uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon, KASSERT(obj == NULL || anon == NULL); KASSERT(anon == NULL || off == 0); KASSERT(off == trunc_page(off)); - LOCK_ASSERT(obj == NULL || simple_lock_held(&obj->vmobjlock)); - LOCK_ASSERT(anon == NULL || simple_lock_held(&anon->an_lock)); + KASSERT(obj == NULL || mutex_owned(&obj->vmobjlock)); + KASSERT(anon == NULL || mutex_owned(&anon->an_lock)); mutex_spin_enter(&uvm_fpageqlock); @@ -1244,7 +1238,7 @@ uvm_pagereplace(struct vm_page *oldpg, struct vm_page *newpg) KASSERT(oldpg->uobject != NULL); KASSERT((newpg->flags & PG_TABLED) == 0); KASSERT(newpg->uobject == NULL); - LOCK_ASSERT(simple_lock_held(&oldpg->uobject->vmobjlock)); + KASSERT(mutex_owned(&oldpg->uobject->vmobjlock)); newpg->uobject = oldpg->uobject; newpg->offset = oldpg->offset; @@ -1338,12 +1332,12 @@ uvm_pagefree(struct vm_page *pg) #endif /* DEBUG */ KASSERT((pg->flags & PG_PAGEOUT) == 0); - LOCK_ASSERT(simple_lock_held(&uvm.pageqlock) || + KASSERT(mutex_owned(&uvm_pageqlock) || !uvmpdpol_pageisqueued_p(pg)); - LOCK_ASSERT(pg->uobject == NULL || - simple_lock_held(&pg->uobject->vmobjlock)); - LOCK_ASSERT(pg->uobject != NULL || pg->uanon == NULL || - simple_lock_held(&pg->uanon->an_lock)); + KASSERT(pg->uobject == NULL || + mutex_owned(&pg->uobject->vmobjlock)); + KASSERT(pg->uobject != NULL || pg->uanon == NULL || + mutex_owned(&pg->uanon->an_lock)); /* * if the page is loaned, resolve the loan instead of freeing. @@ -1475,11 +1469,11 @@ uvm_page_unbusy(struct vm_page **pgs, int npgs) continue; } - LOCK_ASSERT(pg->uobject == NULL || - simple_lock_held(&pg->uobject->vmobjlock)); - LOCK_ASSERT(pg->uobject != NULL || + KASSERT(pg->uobject == NULL || + mutex_owned(&pg->uobject->vmobjlock)); + KASSERT(pg->uobject != NULL || (pg->uanon != NULL && - simple_lock_held(&pg->uanon->an_lock))); + mutex_owned(&pg->uanon->an_lock))); KASSERT(pg->flags & PG_BUSY); KASSERT((pg->flags & PG_PAGEOUT) == 0); @@ -1521,9 +1515,9 @@ uvm_page_own(struct vm_page *pg, const char *tag) uobj = pg->uobject; anon = pg->uanon; if (uobj != NULL) { - LOCK_ASSERT(simple_lock_held(&uobj->vmobjlock)); + KASSERT(mutex_owned(&uobj->vmobjlock)); } else if (anon != NULL) { - LOCK_ASSERT(simple_lock_held(&anon->an_lock)); + KASSERT(mutex_owned(&anon->an_lock)); } KASSERT((pg->flags & PG_WANTED) == 0); @@ -1575,7 +1569,6 @@ uvm_pageidlezero(void) int free_list, firstbucket; static int nextbucket; - KERNEL_LOCK(1, NULL); mutex_spin_enter(&uvm_fpageqlock); firstbucket = nextbucket; do { @@ -1598,7 +1591,6 @@ uvm_pageidlezero(void) pg, pageq); uvmexp.free--; mutex_spin_exit(&uvm_fpageqlock); - KERNEL_UNLOCK_LAST(NULL); #ifdef PMAP_PAGEIDLEZERO if (!PMAP_PAGEIDLEZERO(VM_PAGE_TO_PHYS(pg))) { @@ -1609,7 +1601,6 @@ uvm_pageidlezero(void) * process now ready to run. */ - KERNEL_LOCK(1, NULL); mutex_spin_enter(&uvm_fpageqlock); TAILQ_INSERT_HEAD(&pgfl->pgfl_buckets[ nextbucket].pgfl_queues[ @@ -1623,7 +1614,6 @@ uvm_pageidlezero(void) #endif /* PMAP_PAGEIDLEZERO */ pg->flags |= PG_ZERO; - KERNEL_LOCK(1, NULL); mutex_spin_enter(&uvm_fpageqlock); TAILQ_INSERT_HEAD(&pgfl->pgfl_buckets[ nextbucket].pgfl_queues[PGFL_ZEROS], @@ -1636,7 +1626,6 @@ uvm_pageidlezero(void) } while (nextbucket != firstbucket); quit: mutex_spin_exit(&uvm_fpageqlock); - KERNEL_UNLOCK_LAST(NULL); } /* @@ -1654,7 +1643,7 @@ uvm_pagelookup(struct uvm_object *obj, voff_t off) kmutex_t *lock; u_int hash; - LOCK_ASSERT(simple_lock_held(&obj->vmobjlock)); + KASSERT(mutex_owned(&obj->vmobjlock)); hash = uvm_pagehash(obj, off); buck = &uvm.page_hash[hash]; @@ -1681,7 +1670,7 @@ uvm_pagelookup(struct uvm_object *obj, voff_t off) void uvm_pagewire(struct vm_page *pg) { - UVM_LOCK_ASSERT_PAGEQ(); + KASSERT(mutex_owned(&uvm_pageqlock)); #if defined(READAHEAD_STATS) if ((pg->pqflags & PQ_READAHEAD) != 0) { uvm_ra_hit.ev_count++; @@ -1705,7 +1694,7 @@ uvm_pagewire(struct vm_page *pg) void uvm_pageunwire(struct vm_page *pg) { - UVM_LOCK_ASSERT_PAGEQ(); + KASSERT(mutex_owned(&uvm_pageqlock)); pg->wire_count--; if (pg->wire_count == 0) { uvm_pageactivate(pg); @@ -1726,7 +1715,7 @@ void uvm_pagedeactivate(struct vm_page *pg) { - UVM_LOCK_ASSERT_PAGEQ(); + KASSERT(mutex_owned(&uvm_pageqlock)); KASSERT(pg->wire_count != 0 || uvmpdpol_pageisqueued_p(pg)); uvmpdpol_pagedeactivate(pg); } @@ -1741,7 +1730,7 @@ void uvm_pageactivate(struct vm_page *pg) { - UVM_LOCK_ASSERT_PAGEQ(); + KASSERT(mutex_owned(&uvm_pageqlock)); #if defined(READAHEAD_STATS) if ((pg->pqflags & PQ_READAHEAD) != 0) { uvm_ra_hit.ev_count++; @@ -1763,7 +1752,7 @@ uvm_pagedequeue(struct vm_page *pg) { if (uvmpdpol_pageisqueued_p(pg)) { - UVM_LOCK_ASSERT_PAGEQ(); + KASSERT(mutex_owned(&uvm_pageqlock)); } uvmpdpol_pagedequeue(pg); @@ -1778,7 +1767,7 @@ void uvm_pageenqueue(struct vm_page *pg) { - UVM_LOCK_ASSERT_PAGEQ(); + KASSERT(mutex_owned(&uvm_pageqlock)); if (pg->wire_count != 0) { return; } diff --git a/sys/uvm/uvm_page.h b/sys/uvm/uvm_page.h index 6a4d16f95ed1..f5ab89f9420d 100644 --- a/sys/uvm/uvm_page.h +++ b/sys/uvm/uvm_page.h @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_page.h,v 1.49 2007/07/21 19:21:55 ad Exp $ */ +/* $NetBSD: uvm_page.h,v 1.50 2008/01/02 11:49:19 ad Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -288,10 +288,6 @@ static int vm_physseg_find(paddr_t, int *); #define UVM_PAGE_HASH_PENALTY 4 /* XXX: a guess */ -#define uvm_lock_pageq() simple_lock(&uvm.pageqlock) -#define uvm_unlock_pageq() simple_unlock(&uvm.pageqlock) -#define UVM_LOCK_ASSERT_PAGEQ() LOCK_ASSERT(simple_lock_held(&uvm.pageqlock)) - #define uvm_pagehash(obj,off) \ (((unsigned long)obj+(unsigned long)atop(off)) & uvm.page_hashmask) diff --git a/sys/uvm/uvm_pager.c b/sys/uvm/uvm_pager.c index b48c01ea6c2c..eb5d2b9ee3ac 100644 --- a/sys/uvm/uvm_pager.c +++ b/sys/uvm/uvm_pager.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_pager.c,v 1.89 2007/12/01 10:40:28 yamt Exp $ */ +/* $NetBSD: uvm_pager.c,v 1.90 2008/01/02 11:49:19 ad Exp $ */ /* * @@ -39,7 +39,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uvm_pager.c,v 1.89 2007/12/01 10:40:28 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_pager.c,v 1.90 2008/01/02 11:49:19 ad Exp $"); #include "opt_uvmhist.h" #include "opt_readahead.h" @@ -173,8 +173,9 @@ ReStart: if (pdaemon) { mutex_enter(&pager_map_wanted_lock); if (emerginuse) { - mtsleep(&emergva, PVM | PNORELOCK, "emergva", - 0, &pager_map_wanted_lock); + UVM_UNLOCK_AND_WAIT(&emergva, + &pager_map_wanted_lock, false, + "emergva", 0); goto ReStart; } emerginuse = true; @@ -191,8 +192,8 @@ ReStart: mutex_enter(&pager_map_wanted_lock); pager_map_wanted = true; UVMHIST_LOG(maphist, " SLEEPING on pager_map",0,0,0,0); - mtsleep(pager_map, PVM | PNORELOCK, "pager_map", 0, - &pager_map_wanted_lock); + UVM_UNLOCK_AND_WAIT(pager_map, &pager_map_wanted_lock, false, + "pager_map", 0); goto ReStart; } @@ -257,7 +258,7 @@ uvm_pagermapout(vaddr_t kva, int npages) /* * interrupt-context iodone handler for nested i/o bufs. * - * => must be at splbio(). + * => the buffer is private so need not be locked here */ void @@ -266,8 +267,9 @@ uvm_aio_biodone1(struct buf *bp) struct buf *mbp = bp->b_private; KASSERT(mbp != bp); - if (bp->b_error != 0) + if (bp->b_error != 0) { mbp->b_error = bp->b_error; + } mbp->b_resid -= bp->b_bcount; putiobuf(bp); if (mbp->b_resid == 0) { @@ -278,8 +280,6 @@ uvm_aio_biodone1(struct buf *bp) /* * interrupt-context iodone handler for single-buf i/os * or the top-level buf of a nested-buf i/o. - * - * => must be at splbio(). */ void @@ -302,18 +302,18 @@ uvm_aio_aiodone(struct buf *bp) int npages = bp->b_bufsize >> PAGE_SHIFT; struct vm_page *pg, *pgs[npages]; struct uvm_object *uobj; - struct simplelock *slock; - int s, i, error, swslot; + kmutex_t *slock; + int i, error, swslot; + int pageout_done = 0; bool write, swap; UVMHIST_FUNC("uvm_aio_aiodone"); UVMHIST_CALLED(ubchist); UVMHIST_LOG(ubchist, "bp %p", bp, 0,0,0); error = bp->b_error; write = (bp->b_flags & B_READ) == 0; - /* XXXUBC B_NOCACHE is for swap pager, should be done differently */ - if (write && !(bp->b_flags & B_NOCACHE) && bioopsp) { - bioopsp->io_pageiodone(bp); - } + /* XXXUBC BC_NOCACHE is for swap pager, should be done differently */ + if (write && !(bp->b_cflags & BC_NOCACHE) && bioopsp != NULL) + (*bioopsp->io_pageiodone)(bp); uobj = NULL; for (i = 0; i < npages; i++) { @@ -330,8 +330,8 @@ uvm_aio_aiodone(struct buf *bp) if (!swap) { uobj = pg->uobject; slock = &uobj->vmobjlock; - simple_lock(slock); - uvm_lock_pageq(); + mutex_enter(slock); + mutex_enter(&uvm_pageqlock); } else { #if defined(VMSWAP) if (error) { @@ -365,8 +365,8 @@ uvm_aio_aiodone(struct buf *bp) } else { slock = &pg->uanon->an_lock; } - simple_lock(slock); - uvm_lock_pageq(); + mutex_enter(slock); + mutex_enter(&uvm_pageqlock); } #endif /* defined(VMSWAP) */ @@ -387,7 +387,7 @@ uvm_aio_aiodone(struct buf *bp) } else if (error == ENOMEM) { if (pg->flags & PG_PAGEOUT) { pg->flags &= ~PG_PAGEOUT; - uvmexp.paging--; + pageout_done++; } pg->flags &= ~PG_CLEAN; uvm_pageactivate(pg); @@ -437,7 +437,7 @@ uvm_aio_aiodone(struct buf *bp) if (pg->flags & PG_PAGEOUT) { pg->flags &= ~PG_PAGEOUT; - uvmexp.paging--; + pageout_done++; uvmexp.pdfreed++; pg->flags |= PG_RELEASED; } @@ -450,20 +450,21 @@ uvm_aio_aiodone(struct buf *bp) if (swap) { if (pg->uobject == NULL && pg->uanon->an_ref == 0 && (pg->flags & PG_RELEASED) != 0) { - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); uvm_anon_release(pg->uanon); } else { uvm_page_unbusy(&pg, 1); - uvm_unlock_pageq(); - simple_unlock(slock); + mutex_exit(&uvm_pageqlock); + mutex_exit(slock); } } #endif /* defined(VMSWAP) */ } + uvm_pageout_done(pageout_done); if (!swap) { uvm_page_unbusy(pgs, npages); - uvm_unlock_pageq(); - simple_unlock(slock); + mutex_exit(&uvm_pageqlock); + mutex_exit(slock); } else { #if defined(VMSWAP) KASSERT(write); @@ -483,12 +484,12 @@ uvm_aio_aiodone(struct buf *bp) uvmexp.pdpending--; #endif /* defined(VMSWAP) */ } - s = splbio(); - if (write && (bp->b_flags & B_AGE) != 0) { + if (write && (bp->b_cflags & BC_AGE) != 0) { + mutex_enter(bp->b_objlock); vwakeup(bp); + mutex_exit(bp->b_objlock); } putiobuf(bp); - splx(s); } /* diff --git a/sys/uvm/uvm_pdaemon.c b/sys/uvm/uvm_pdaemon.c index cb6803d81641..d0cacf7fede1 100644 --- a/sys/uvm/uvm_pdaemon.c +++ b/sys/uvm/uvm_pdaemon.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_pdaemon.c,v 1.88 2007/11/07 00:23:46 ad Exp $ */ +/* $NetBSD: uvm_pdaemon.c,v 1.89 2008/01/02 11:49:19 ad Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -71,7 +71,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.88 2007/11/07 00:23:46 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.89 2008/01/02 11:49:19 ad Exp $"); #include "opt_uvmhist.h" #include "opt_readahead.h" @@ -93,8 +93,9 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.88 2007/11/07 00:23:46 ad Exp $"); * queue too quickly to for them to be referenced and avoid being freed. */ -#define UVMPD_NUMDIRTYREACTS 16 +#define UVMPD_NUMDIRTYREACTS 16 +#define UVMPD_NUMTRYLOCKOWNER 16 /* * local prototypes @@ -104,6 +105,8 @@ static void uvmpd_scan(void); static void uvmpd_scan_queue(void); static void uvmpd_tune(void); +unsigned int uvm_pagedaemon_waiters; + /* * XXX hack to avoid hangs when large processes fork. */ @@ -120,7 +123,8 @@ void uvm_wait(const char *wmsg) { int timo = 0; - int s = splbio(); + + mutex_spin_enter(&uvm_fpageqlock); /* * check for page daemon going to sleep (waiting for itself) @@ -152,23 +156,24 @@ uvm_wait(const char *wmsg) #endif } - mutex_enter(&uvm_pagedaemon_lock); + uvm_pagedaemon_waiters++; wakeup(&uvm.pagedaemon); /* wake the daemon! */ - mtsleep(&uvmexp.free, PVM, wmsg, timo, &uvm_pagedaemon_lock); - mutex_exit(&uvm_pagedaemon_lock); - - splx(s); + UVM_UNLOCK_AND_WAIT(&uvmexp.free, &uvm_fpageqlock, false, wmsg, timo); } /* * uvm_kick_pdaemon: perform checks to determine if we need to * give the pagedaemon a nudge, and do so if necessary. + * + * => called with uvm_fpageqlock held. */ void uvm_kick_pdaemon(void) { + KASSERT(mutex_owned(&uvm_fpageqlock)); + if (uvmexp.free + uvmexp.paging < uvmexp.freemin || (uvmexp.free + uvmexp.paging < uvmexp.freetarg && uvmpdpol_needsscan_p())) { @@ -231,33 +236,40 @@ uvm_pageout(void *arg) */ uvm.pagedaemon_lwp = curlwp; - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); npages = uvmexp.npages; uvmpd_tune(); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); /* * main loop */ for (;;) { - mutex_enter(&uvm_pagedaemon_lock); + bool needsscan; - UVMHIST_LOG(pdhist," <>",0,0,0,0); - mtsleep(&uvm.pagedaemon, PVM | PNORELOCK, "pgdaemon", 0, - &uvm_pagedaemon_lock); - uvmexp.pdwoke++; - UVMHIST_LOG(pdhist," <>",0,0,0,0); + mutex_spin_enter(&uvm_fpageqlock); + if (uvm_pagedaemon_waiters == 0 || uvmexp.paging > 0) { + UVMHIST_LOG(pdhist," <>",0,0,0,0); + UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon, + &uvm_fpageqlock, false, "pgdaemon", 0); + uvmexp.pdwoke++; + UVMHIST_LOG(pdhist," <>",0,0,0,0); + } else { + mutex_spin_exit(&uvm_fpageqlock); + } /* * now lock page queues and recompute inactive count */ - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); if (npages != uvmexp.npages || extrapages != uvm_extrapages) { npages = uvmexp.npages; extrapages = uvm_extrapages; + mutex_spin_enter(&uvm_fpageqlock); uvmpd_tune(); + mutex_spin_exit(&uvm_fpageqlock); } uvmpdpol_tune(); @@ -266,6 +278,7 @@ uvm_pageout(void *arg) * Estimate a hint. Note that bufmem are returned to * system only when entire pool page is empty. */ + mutex_spin_enter(&uvm_fpageqlock); bufcnt = uvmexp.freetarg - uvmexp.free; if (bufcnt < 0) bufcnt = 0; @@ -273,30 +286,33 @@ uvm_pageout(void *arg) UVMHIST_LOG(pdhist," free/ftarg=%d/%d", uvmexp.free, uvmexp.freetarg, 0,0); + needsscan = uvmexp.free + uvmexp.paging < uvmexp.freetarg || + uvmpdpol_needsscan_p(); + mutex_spin_exit(&uvm_fpageqlock); + /* * scan if needed */ - - if (uvmexp.free + uvmexp.paging < uvmexp.freetarg || - uvmpdpol_needsscan_p()) { + if (needsscan) uvmpd_scan(); - } /* * if there's any free memory to be had, * wake up any waiters. */ + mutex_spin_enter(&uvm_fpageqlock); if (uvmexp.free > uvmexp.reserve_kernel || uvmexp.paging == 0) { wakeup(&uvmexp.free); + uvm_pagedaemon_waiters = 0; } + mutex_spin_exit(&uvm_fpageqlock); /* * scan done. unlock page queues (the only lock we are holding) */ - - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); /* * start draining pool resources now that we're not @@ -307,7 +323,9 @@ uvm_pageout(void *arg) /* * kill unused metadata buffers. */ + mutex_enter(&bufcache_lock); buf_drain(bufcnt << PAGE_SHIFT); + mutex_exit(&bufcache_lock); /* * free any cached u-areas we don't need @@ -330,7 +348,6 @@ uvm_pageout(void *arg) void uvm_aiodone_worker(struct work *wk, void *dummy) { - int free; struct buf *bp = (void *)wk; KASSERT(&bp->b_work == wk); @@ -339,17 +356,37 @@ uvm_aiodone_worker(struct work *wk, void *dummy) * process an i/o that's done. */ - free = uvmexp.free; (*bp->b_iodone)(bp); - if (free <= uvmexp.reserve_kernel) { - mutex_spin_enter(&uvm_fpageqlock); +} + +void +uvm_pageout_start(int npages) +{ + + mutex_spin_enter(&uvm_fpageqlock); + uvmexp.paging += npages; + mutex_spin_exit(&uvm_fpageqlock); +} + +void +uvm_pageout_done(int npages) +{ + + mutex_spin_enter(&uvm_fpageqlock); + KASSERT(uvmexp.paging >= npages); + uvmexp.paging -= npages; + + /* + * wake up either of pagedaemon or LWPs waiting for it. + */ + + if (uvmexp.free <= uvmexp.reserve_kernel) { wakeup(&uvm.pagedaemon); - mutex_spin_exit(&uvm_fpageqlock); } else { - mutex_enter(&uvm_pagedaemon_lock); wakeup(&uvmexp.free); - mutex_exit(&uvm_pagedaemon_lock); + uvm_pagedaemon_waiters = 0; } + mutex_spin_exit(&uvm_fpageqlock); } /* @@ -357,16 +394,17 @@ uvm_aiodone_worker(struct work *wk, void *dummy) * * => called with pageq locked. * => resolve orphaned O->A loaned page. - * => return the locked simplelock on success. otherwise, return NULL. + * => return the locked mutex on success. otherwise, return NULL. */ -struct simplelock * +kmutex_t * uvmpd_trylockowner(struct vm_page *pg) { struct uvm_object *uobj = pg->uobject; - struct simplelock *slock; + kmutex_t *slock; + + KASSERT(mutex_owned(&uvm_pageqlock)); - UVM_LOCK_ASSERT_PAGEQ(); if (uobj != NULL) { slock = &uobj->vmobjlock; } else { @@ -376,7 +414,7 @@ uvmpd_trylockowner(struct vm_page *pg) slock = &anon->an_lock; } - if (!simple_lock_try(slock)) { + if (!mutex_tryenter(slock)) { return NULL; } @@ -410,6 +448,7 @@ swapcluster_init(struct swapcluster *swc) { swc->swc_slot = 0; + swc->swc_nused = 0; } static int @@ -449,12 +488,12 @@ swapcluster_add(struct swapcluster *swc, struct vm_page *pg) slot = swc->swc_slot + swc->swc_nused; uobj = pg->uobject; if (uobj == NULL) { - LOCK_ASSERT(simple_lock_held(&pg->uanon->an_lock)); + KASSERT(mutex_owned(&pg->uanon->an_lock)); pg->uanon->an_swslot = slot; } else { int result; - LOCK_ASSERT(simple_lock_held(&uobj->vmobjlock)); + KASSERT(mutex_owned(&uobj->vmobjlock)); result = uao_set_swslot(uobj, pg->offset >> PAGE_SHIFT, slot); if (result == -1) { return ENOMEM; @@ -500,6 +539,7 @@ swapcluster_flush(struct swapcluster *swc, bool now) */ uvmexp.pdpageouts++; + uvm_pageout_start(nused); error = uvm_swap_put(slot, swc->swc_pages, nused, 0); KASSERT(error == 0); @@ -509,6 +549,14 @@ swapcluster_flush(struct swapcluster *swc, bool now) */ swc->swc_slot = 0; + swc->swc_nused = 0; +} + +static int +swapcluster_nused(struct swapcluster *swc) +{ + + return swc->swc_nused; } /* @@ -551,7 +599,7 @@ uvmpd_dropswap(struct vm_page *pg) bool uvmpd_trydropswap(struct vm_page *pg) { - struct simplelock *slock; + kmutex_t *slock; bool result; if ((pg->flags & PG_BUSY) != 0) { @@ -572,13 +620,13 @@ uvmpd_trydropswap(struct vm_page *pg) */ if ((pg->flags & PG_BUSY) != 0) { - simple_unlock(slock); + mutex_exit(slock); return false; } result = uvmpd_dropswap(pg); - simple_unlock(slock); + mutex_exit(slock); return result; } @@ -605,7 +653,8 @@ uvmpd_scan_queue(void) struct swapcluster swc; #endif /* defined(VMSWAP) */ int dirtyreacts; - struct simplelock *slock; + int lockownerfail; + kmutex_t *slock; UVMHIST_FUNC("uvmpd_scan_queue"); UVMHIST_CALLED(pdhist); /* @@ -619,6 +668,7 @@ uvmpd_scan_queue(void) #endif /* defined(VMSWAP) */ dirtyreacts = 0; + lockownerfail = 0; uvmpdpol_scaninit(); while (/* CONSTCOND */ 1) { @@ -627,7 +677,11 @@ uvmpd_scan_queue(void) * see if we've met the free target. */ - if (uvmexp.free + uvmexp.paging >= uvmexp.freetarg << 2 || + if (uvmexp.free + uvmexp.paging +#if defined(VMSWAP) + + swapcluster_nused(&swc) +#endif /* defined(VMSWAP) */ + >= uvmexp.freetarg << 2 || dirtyreacts == UVMPD_NUMDIRTYREACTS) { UVMHIST_LOG(pdhist," met free target: " "exit loop", 0, 0, 0, 0); @@ -666,10 +720,24 @@ uvmpd_scan_queue(void) slock = uvmpd_trylockowner(p); if (slock == NULL) { + /* + * yield cpu to make a chance for an LWP holding + * the lock run. otherwise we can busy-loop too long + * if the page queue is filled with a lot of pages + * from few objects. + */ + lockownerfail++; + if (lockownerfail > UVMPD_NUMTRYLOCKOWNER) { + mutex_exit(&uvm_pageqlock); + /* XXX Better than yielding but inadequate. */ + kpause("livelock", false, 1, NULL); + mutex_enter(&uvm_pageqlock); + lockownerfail = 0; + } continue; } if (p->flags & PG_BUSY) { - simple_unlock(slock); + mutex_exit(slock); uvmexp.pdbusy++; continue; } @@ -702,10 +770,10 @@ uvmpd_scan_queue(void) if ((p->pqflags & PQ_SWAPBACKED) == 0) { KASSERT(uobj != NULL); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); (void) (uobj->pgops->pgo_put)(uobj, p->offset, p->offset + PAGE_SIZE, PGO_CLEANIT|PGO_FREE); - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); continue; } @@ -741,7 +809,7 @@ uvmpd_scan_queue(void) } else { slot = uao_find_swslot(uobj, pageidx); } - simple_unlock(slock); + mutex_exit(slock); if (slot > 0) { /* this page is now only in swap. */ @@ -760,7 +828,7 @@ uvmpd_scan_queue(void) */ if (uvmexp.free + uvmexp.paging > uvmexp.freetarg << 2) { - simple_unlock(slock); + mutex_exit(slock); continue; } @@ -782,7 +850,7 @@ uvmpd_scan_queue(void) if (uvm_swapisfull()) { dirtyreacts++; uvm_pageactivate(p); - simple_unlock(slock); + mutex_exit(slock); continue; } @@ -791,7 +859,7 @@ uvmpd_scan_queue(void) */ if (swapcluster_allocslots(&swc)) { - simple_unlock(slock); + mutex_exit(slock); dirtyreacts++; /* XXX */ continue; } @@ -808,11 +876,10 @@ uvmpd_scan_queue(void) UVM_PAGE_OWN(p, "scan_queue"); p->flags |= PG_PAGEOUT; - uvmexp.paging++; uvm_pagedequeue(p); uvmexp.pgswapout++; - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); /* * add the new page to the cluster. @@ -821,17 +888,16 @@ uvmpd_scan_queue(void) if (swapcluster_add(&swc, p)) { p->flags &= ~(PG_BUSY|PG_PAGEOUT); UVM_PAGE_OWN(p, NULL); - uvm_lock_pageq(); - uvmexp.paging--; + mutex_enter(&uvm_pageqlock); dirtyreacts++; uvm_pageactivate(p); - simple_unlock(slock); + mutex_exit(slock); continue; } - simple_unlock(slock); + mutex_exit(slock); swapcluster_flush(&swc, false); - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); /* * the pageout is in progress. bump counters and set up @@ -842,14 +908,14 @@ uvmpd_scan_queue(void) #else /* defined(VMSWAP) */ uvm_pageactivate(p); - simple_unlock(slock); + mutex_exit(slock); #endif /* defined(VMSWAP) */ } #if defined(VMSWAP) - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); swapcluster_flush(&swc, true); - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); #endif /* defined(VMSWAP) */ } @@ -879,9 +945,9 @@ uvmpd_scan(void) uvmexp.pdswout++; UVMHIST_LOG(pdhist," free %d < target %d: swapout", uvmexp.free, uvmexp.freetarg, 0, 0); - uvm_unlock_pageq(); + mutex_exit(&uvm_pageqlock); uvm_swapout_threads(); - uvm_lock_pageq(); + mutex_enter(&uvm_pageqlock); } #endif diff --git a/sys/uvm/uvm_pdaemon.h b/sys/uvm/uvm_pdaemon.h index 6f98bdf58650..55a30d15d07d 100644 --- a/sys/uvm/uvm_pdaemon.h +++ b/sys/uvm/uvm_pdaemon.h @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_pdaemon.h,v 1.14 2007/02/21 23:00:14 thorpej Exp $ */ +/* $NetBSD: uvm_pdaemon.h,v 1.15 2008/01/02 11:49:20 ad Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -82,7 +82,7 @@ void uvm_wait(const char *); bool uvm_reclaimable(void); -struct simplelock *uvmpd_trylockowner(struct vm_page *); +kmutex_t *uvmpd_trylockowner(struct vm_page *); bool uvmpd_trydropswap(struct vm_page *); #endif /* _KERNEL */ diff --git a/sys/uvm/uvm_pdpolicy_clock.c b/sys/uvm/uvm_pdpolicy_clock.c index 0f9c87bd3d32..93d403eb8cca 100644 --- a/sys/uvm/uvm_pdpolicy_clock.c +++ b/sys/uvm/uvm_pdpolicy_clock.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_pdpolicy_clock.c,v 1.8 2007/02/22 06:05:01 thorpej Exp $ */ +/* $NetBSD: uvm_pdpolicy_clock.c,v 1.9 2008/01/02 11:49:20 ad Exp $ */ /* NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $ */ /* @@ -74,7 +74,7 @@ #else /* defined(PDSIM) */ #include -__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.8 2007/02/22 06:05:01 thorpej Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.9 2008/01/02 11:49:20 ad Exp $"); #include #include @@ -175,7 +175,7 @@ uvmpdpol_selectvictim(void) struct uvmpdpol_scanstate *ss = &pdpol_scanstate; struct vm_page *pg; - UVM_LOCK_ASSERT_PAGEQ(); + KASSERT(mutex_owned(&uvm_pageqlock)); while (/* CONSTCOND */ 1) { struct vm_anon *anon; @@ -287,7 +287,7 @@ void uvmpdpol_pagedeactivate(struct vm_page *pg) { - UVM_LOCK_ASSERT_PAGEQ(); + KASSERT(mutex_owned(&uvm_pageqlock)); if (pg->pqflags & PQ_ACTIVE) { TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pageq); pg->pqflags &= ~PQ_ACTIVE; @@ -317,13 +317,13 @@ uvmpdpol_pagedequeue(struct vm_page *pg) { if (pg->pqflags & PQ_ACTIVE) { - UVM_LOCK_ASSERT_PAGEQ(); + KASSERT(mutex_owned(&uvm_pageqlock)); TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pageq); pg->pqflags &= ~PQ_ACTIVE; KASSERT(pdpol_state.s_active > 0); pdpol_state.s_active--; } else if (pg->pqflags & PQ_INACTIVE) { - UVM_LOCK_ASSERT_PAGEQ(); + KASSERT(mutex_owned(&uvm_pageqlock)); TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pageq); pg->pqflags &= ~PQ_INACTIVE; KASSERT(pdpol_state.s_inactive > 0); diff --git a/sys/uvm/uvm_pdpolicy_clockpro.c b/sys/uvm/uvm_pdpolicy_clockpro.c index 7c8128ebe60c..84a75b967045 100644 --- a/sys/uvm/uvm_pdpolicy_clockpro.c +++ b/sys/uvm/uvm_pdpolicy_clockpro.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_pdpolicy_clockpro.c,v 1.9 2007/08/01 14:49:55 yamt Exp $ */ +/* $NetBSD: uvm_pdpolicy_clockpro.c,v 1.10 2008/01/02 11:49:20 ad Exp $ */ /*- * Copyright (c)2005, 2006 YAMAMOTO Takashi, @@ -43,7 +43,7 @@ #else /* defined(PDSIM) */ #include -__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.9 2007/08/01 14:49:55 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.10 2008/01/02 11:49:20 ad Exp $"); #include "opt_ddb.h" @@ -712,7 +712,7 @@ clockpro_pageenqueue(struct vm_page *pg) bool speculative = (pg->pqflags & PQ_SPECULATIVE) != 0; /* XXX */ KASSERT((~pg->pqflags & (PQ_INITIALREF|PQ_SPECULATIVE)) != 0); - UVM_LOCK_ASSERT_PAGEQ(); + KASSERT(mutex_owned(&uvm_pageqlock)); check_sanity(); KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE); s->s_npages++; diff --git a/sys/uvm/uvm_readahead.c b/sys/uvm/uvm_readahead.c index 1154e2e367cc..5318b08778f2 100644 --- a/sys/uvm/uvm_readahead.c +++ b/sys/uvm/uvm_readahead.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_readahead.c,v 1.4 2007/05/11 12:11:09 tsutsui Exp $ */ +/* $NetBSD: uvm_readahead.c,v 1.5 2008/01/02 11:49:20 ad Exp $ */ /*- * Copyright (c)2003, 2005 YAMAMOTO Takashi, @@ -40,7 +40,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uvm_readahead.c,v 1.4 2007/05/11 12:11:09 tsutsui Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_readahead.c,v 1.5 2008/01/02 11:49:20 ad Exp $"); #include #include @@ -83,21 +83,32 @@ static off_t ra_startio(struct uvm_object *, off_t, size_t); static struct uvm_ractx *ra_allocctx(void); static void ra_freectx(struct uvm_ractx *); -static POOL_INIT(ractx_pool, sizeof(struct uvm_ractx), 0, 0, 0, "ractx", - &pool_allocator_nointr, IPL_NONE); +static struct pool_cache ractx_cache; + +/* + * uvm_ra_init: initialize readahead module. + */ + +void +uvm_ra_init(void) +{ + + pool_cache_bootstrap(&ractx_cache, sizeof(struct uvm_ractx), 0, 0, 0, + "ractx", NULL, IPL_NONE, NULL, NULL, NULL); +} static struct uvm_ractx * ra_allocctx(void) { - return pool_get(&ractx_pool, PR_NOWAIT); + return pool_cache_get(&ractx_cache, PR_NOWAIT); } static void ra_freectx(struct uvm_ractx *ra) { - pool_put(&ractx_pool, ra); + pool_cache_put(&ractx_cache, ra); } /* @@ -134,11 +145,11 @@ ra_startio(struct uvm_object *uobj, off_t off, size_t sz) * use UVM_ADV_RANDOM to avoid recursion. */ - simple_lock(&uobj->vmobjlock); error = (*uobj->pgops->pgo_get)(uobj, off, NULL, &npages, 0, VM_PROT_READ, UVM_ADV_RANDOM, 0); DPRINTF(("%s: off=%" PRIu64 ", bytelen=%zu -> %d\n", __func__, off, bytelen, error)); + mutex_enter(&uobj->vmobjlock); if (error != 0 && error != EBUSY) { if (error != EINVAL) { /* maybe past EOF */ DPRINTF(("%s: error=%d\n", __func__, error)); @@ -188,6 +199,7 @@ uvm_ra_freectx(struct uvm_ractx *ra) * uvm_ra_request: update a read-ahead context and start i/o if appropriate. * * => called when [reqoff, reqoff+reqsize) is requested. + * => object must be locked by caller, will return locked. */ void @@ -195,15 +207,12 @@ uvm_ra_request(struct uvm_ractx *ra, int advice, struct uvm_object *uobj, off_t reqoff, size_t reqsize) { + KASSERT(mutex_owned(&uobj->vmobjlock)); + if (ra == NULL || advice == UVM_ADV_RANDOM) { return; } - /* - * XXX needs locking? maybe. - * but the worst effect is merely a bad read-ahead. - */ - if (advice == UVM_ADV_SEQUENTIAL) { /* @@ -293,12 +302,6 @@ do_readahead: #if defined(DIAGNOSTIC) if (rasize > RA_WINSIZE_MAX) { - - /* - * shouldn't happen as far as we're protected by - * kernel_lock. - */ - printf("%s: corrupted context", __func__); rasize = RA_WINSIZE_MAX; } diff --git a/sys/uvm/uvm_readahead.h b/sys/uvm/uvm_readahead.h index 43878de2cd61..fffe2e26dcfa 100644 --- a/sys/uvm/uvm_readahead.h +++ b/sys/uvm/uvm_readahead.h @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_readahead.h,v 1.2 2005/11/29 23:37:59 yamt Exp $ */ +/* $NetBSD: uvm_readahead.h,v 1.3 2008/01/02 11:49:21 ad Exp $ */ /*- * Copyright (c)2003, 2005 YAMAMOTO Takashi, @@ -32,6 +32,7 @@ struct uvm_object; struct uvm_ractx; +void uvm_ra_init(void); struct uvm_ractx *uvm_ra_allocctx(void); void uvm_ra_freectx(struct uvm_ractx *); void uvm_ra_request(struct uvm_ractx *, int, struct uvm_object *, off_t, diff --git a/sys/uvm/uvm_stat.h b/sys/uvm/uvm_stat.h index 1feb2d4c8ad0..5b288638a080 100644 --- a/sys/uvm/uvm_stat.h +++ b/sys/uvm/uvm_stat.h @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_stat.h,v 1.39 2006/02/16 20:17:20 perry Exp $ */ +/* $NetBSD: uvm_stat.h,v 1.40 2008/01/02 11:49:21 ad Exp $ */ /* * @@ -70,7 +70,7 @@ struct uvm_history { int f; /* next free one */ int unused; /* old location of struct simplelock */ struct uvm_history_ent *e; /* the malloc'd entries */ - struct simplelock l; /* lock on this history */ + kmutex_t l; /* lock on this history */ }; LIST_HEAD(uvm_history_head, uvm_history); @@ -117,7 +117,7 @@ do { \ (NAME).namelen = strlen(__STRING(NAME)); \ (NAME).n = (N); \ (NAME).f = 0; \ - simple_lock_init(&(NAME).l); \ + mutex_init(&(NAME).l, MUTEX_SPIN, IPL_HIGH); \ (NAME).e = (struct uvm_history_ent *) \ malloc(sizeof(struct uvm_history_ent) * (N), M_TEMP, \ M_WAITOK); \ @@ -131,7 +131,7 @@ do { \ (NAME).namelen = strlen(__STRING(NAME)); \ (NAME).n = sizeof(BUF) / sizeof(struct uvm_history_ent); \ (NAME).f = 0; \ - simple_lock_init(&(NAME).l); \ + mutex_init((&(NAME).l, MUTEX_SPIN, IPL_HIGH); \ (NAME).e = (struct uvm_history_ent *) (BUF); \ memset((NAME).e, 0, sizeof(struct uvm_history_ent) * (NAME).n); \ LIST_INSERT_HEAD(&uvm_histories, &(NAME), list); \ @@ -152,12 +152,11 @@ do { \ #define UVMHIST_LOG(NAME,FMT,A,B,C,D) \ do { \ - int _i_, _s_ = splhigh(); \ - simple_lock(&(NAME).l); \ + int _i_; \ + mutex_enter(&(NAME).l); \ _i_ = (NAME).f; \ (NAME).f = (_i_ + 1 < (NAME).n) ? _i_ + 1 : 0; \ - simple_unlock(&(NAME).l); \ - splx(_s_); \ + mutex_exit(&(NAME).l); \ if (!cold) \ microtime(&(NAME).e[_i_].tv); \ (NAME).e[_i_].cpunum = cpu_number(); \ @@ -176,11 +175,9 @@ do { \ #define UVMHIST_CALLED(NAME) \ do { \ { \ - int _s = splhigh(); \ - simple_lock(&(NAME).l); \ + mutex_enter(&(NAME).l); \ _uvmhist_call = _uvmhist_cnt++; \ - simple_unlock(&(NAME).l); \ - splx(_s); \ + mutex_exit(&(NAME).l); \ } \ UVMHIST_LOG(NAME,"called!", 0, 0, 0, 0); \ } while (/*CONSTCOND*/ 0) diff --git a/sys/uvm/uvm_swap.c b/sys/uvm/uvm_swap.c index e2ea33f8cf46..3b1c7c66436f 100644 --- a/sys/uvm/uvm_swap.c +++ b/sys/uvm/uvm_swap.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_swap.c,v 1.133 2007/12/20 23:03:15 dsl Exp $ */ +/* $NetBSD: uvm_swap.c,v 1.134 2008/01/02 11:49:21 ad Exp $ */ /* * Copyright (c) 1995, 1996, 1997 Matthew R. Green @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uvm_swap.c,v 1.133 2007/12/20 23:03:15 dsl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_swap.c,v 1.134 2008/01/02 11:49:21 ad Exp $"); #include "fs_nfs.h" #include "opt_uvmhist.h" @@ -188,26 +188,6 @@ POOL_INIT(vndxfer_pool, sizeof(struct vndxfer), 0, 0, 0, "swp vnx", NULL, POOL_INIT(vndbuf_pool, sizeof(struct vndbuf), 0, 0, 0, "swp vnd", NULL, IPL_BIO); -#define getvndxfer(vnx) do { \ - int sp = splbio(); \ - vnx = pool_get(&vndxfer_pool, PR_WAITOK); \ - splx(sp); \ -} while (/*CONSTCOND*/ 0) - -#define putvndxfer(vnx) { \ - pool_put(&vndxfer_pool, (void *)(vnx)); \ -} - -#define getvndbuf(vbp) do { \ - int sp = splbio(); \ - vbp = pool_get(&vndbuf_pool, PR_WAITOK); \ - splx(sp); \ -} while (/*CONSTCOND*/ 0) - -#define putvndbuf(vbp) { \ - pool_put(&vndbuf_pool, (void *)(vbp)); \ -} - /* * local variables */ @@ -269,8 +249,7 @@ uvm_swap_init(void) uvmexp.nswapdev = 0; rw_init(&swap_syscall_lock); cv_init(&uvm.scheduler_cv, "schedule"); - /* XXXSMP should be adaptive, but needs vmobjlock replaced */ - mutex_init(&uvm_swap_data_lock, MUTEX_SPIN, IPL_NONE); + mutex_init(&uvm_swap_data_lock, MUTEX_DEFAULT, IPL_NONE); /* XXXSMP should be at IPL_VM, but for audio interrupt handlers. */ mutex_init(&uvm_scheduler_mutex, MUTEX_SPIN, IPL_SCHED); @@ -1103,7 +1082,7 @@ swstrategy(struct buf *bp) { struct swapdev *sdp; struct vnode *vp; - int s, pageno, bn; + int pageno, bn; UVMHIST_FUNC("swstrategy"); UVMHIST_CALLED(pdhist); /* @@ -1139,9 +1118,10 @@ swstrategy(struct buf *bp) * to sw_reg_strategy(). */ - switch (sdp->swd_vp->v_type) { + vp = sdp->swd_vp; /* swapdev vnode pointer */ + switch (vp->v_type) { default: - panic("swstrategy: vnode type 0x%x", sdp->swd_vp->v_type); + panic("swstrategy: vnode type 0x%x", vp->v_type); case VBLK: @@ -1149,9 +1129,7 @@ swstrategy(struct buf *bp) * must convert "bp" from an I/O on /dev/drum to an I/O * on the swapdev (sdp). */ - s = splbio(); bp->b_blkno = bn; /* swapdev block number */ - vp = sdp->swd_vp; /* swapdev vnode pointer */ bp->b_dev = sdp->swd_dev; /* swapdev dev_t */ /* @@ -1159,15 +1137,19 @@ swstrategy(struct buf *bp) * drum's v_numoutput counter to the swapdevs. */ if ((bp->b_flags & B_READ) == 0) { + mutex_enter(bp->b_objlock); vwakeup(bp); /* kills one 'v_numoutput' on drum */ - V_INCR_NUMOUTPUT(vp); /* put it on swapdev */ + mutex_exit(bp->b_objlock); + mutex_enter(&vp->v_interlock); + vp->v_numoutput++; /* put it on swapdev */ + mutex_exit(&vp->v_interlock); } /* * finally plug in swapdev vnode and start I/O */ bp->b_vp = vp; - splx(s); + bp->b_objlock = &vp->v_interlock; VOP_STRATEGY(vp, bp); return; @@ -1234,7 +1216,7 @@ sw_reg_strategy(struct swapdev *sdp, struct buf *bp, int bn) * allocate a vndxfer head for this transfer and point it to * our buffer. */ - getvndxfer(vnx); + vnx = pool_get(&vndxfer_pool, PR_WAITOK); vnx->vx_flags = VX_BUSY; vnx->vx_error = 0; vnx->vx_pending = 0; @@ -1309,9 +1291,11 @@ sw_reg_strategy(struct swapdev *sdp, struct buf *bp, int bn) * at the front of the nbp structure so that you can * cast pointers between the two structure easily. */ - getvndbuf(nbp); - BUF_INIT(&nbp->vb_buf); - nbp->vb_buf.b_flags = bp->b_flags | B_CALL; + nbp = pool_get(&vndbuf_pool, PR_WAITOK); + buf_init(&nbp->vb_buf); + nbp->vb_buf.b_flags = bp->b_flags; + nbp->vb_buf.b_cflags = bp->b_cflags; + nbp->vb_buf.b_oflags = bp->b_oflags; nbp->vb_buf.b_bcount = sz; nbp->vb_buf.b_bufsize = sz; nbp->vb_buf.b_error = 0; @@ -1321,6 +1305,7 @@ sw_reg_strategy(struct swapdev *sdp, struct buf *bp, int bn) nbp->vb_buf.b_rawblkno = nbp->vb_buf.b_blkno; nbp->vb_buf.b_iodone = sw_reg_biodone; nbp->vb_buf.b_vp = vp; + nbp->vb_buf.b_objlock = &vp->v_interlock; if (vp->v_type == VBLK) { nbp->vb_buf.b_dev = vp->v_rdev; } @@ -1332,12 +1317,14 @@ sw_reg_strategy(struct swapdev *sdp, struct buf *bp, int bn) */ s = splbio(); if (vnx->vx_error != 0) { - putvndbuf(nbp); + buf_destroy(&nbp->vb_buf); + pool_put(&vndbuf_pool, nbp); goto out; } vnx->vx_pending++; /* sort it in and start I/O if we are not over our limit */ + /* XXXAD locking */ BUFQ_PUT(sdp->swd_tab, &nbp->vb_buf); sw_reg_start(sdp); splx(s); @@ -1354,9 +1341,9 @@ sw_reg_strategy(struct swapdev *sdp, struct buf *bp, int bn) out: /* Arrive here at splbio */ vnx->vx_flags &= ~VX_BUSY; if (vnx->vx_pending == 0) { - if (vnx->vx_error != 0) - bp->b_error = vnx->vx_error; - putvndxfer(vnx); + error = vnx->vx_error; + pool_put(&vndxfer_pool, vnx); + bp->b_error = error; biodone(bp); } splx(s); @@ -1371,6 +1358,7 @@ static void sw_reg_start(struct swapdev *sdp) { struct buf *bp; + struct vnode *vp; UVMHIST_FUNC("sw_reg_start"); UVMHIST_CALLED(pdhist); /* recursion control */ @@ -1388,10 +1376,14 @@ sw_reg_start(struct swapdev *sdp) UVMHIST_LOG(pdhist, "sw_reg_start: bp %p vp %p blkno %p cnt %lx", bp, bp->b_vp, bp->b_blkno, bp->b_bcount); - if ((bp->b_flags & B_READ) == 0) - V_INCR_NUMOUTPUT(bp->b_vp); - - VOP_STRATEGY(bp->b_vp, bp); + vp = bp->b_vp; + KASSERT(bp->b_objlock == &vp->v_interlock); + if ((bp->b_flags & B_READ) == 0) { + mutex_enter(&vp->v_interlock); + vp->v_numoutput++; + mutex_exit(&vp->v_interlock); + } + VOP_STRATEGY(vp, bp); } sdp->swd_flags &= ~SWF_BUSY; } @@ -1437,7 +1429,7 @@ sw_reg_iodone(struct work *wk, void *dummy) if (vbp->vb_buf.b_error != 0) { /* pass error upward */ - error = vbp->vb_buf.b_error; + error = vbp->vb_buf.b_error ? vbp->vb_buf.b_error : EIO; UVMHIST_LOG(pdhist, " got error=%d !", error, 0, 0, 0); vnx->vx_error = error; } @@ -1445,7 +1437,8 @@ sw_reg_iodone(struct work *wk, void *dummy) /* * kill vbp structure */ - putvndbuf(vbp); + buf_destroy(&vbp->vb_buf); + pool_put(&vndbuf_pool, vbp); /* * wrap up this transaction if it has run to completion or, in @@ -1453,18 +1446,19 @@ sw_reg_iodone(struct work *wk, void *dummy) */ if (vnx->vx_error != 0) { /* pass error upward */ - pbp->b_error = vnx->vx_error; + error = vnx->vx_error; if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) { - putvndxfer(vnx); + pbp->b_error = error; biodone(pbp); + pool_put(&vndxfer_pool, vnx); } } else if (pbp->b_resid == 0) { KASSERT(vnx->vx_pending == 0); if ((vnx->vx_flags & VX_BUSY) == 0) { UVMHIST_LOG(pdhist, " iodone error=%d !", pbp, vnx->vx_error, 0, 0); - putvndxfer(vnx); biodone(pbp); + pool_put(&vndxfer_pool, vnx); } } @@ -1690,7 +1684,7 @@ uvm_swap_io(struct vm_page **pps, int startslot, int npages, int flags) daddr_t startblk; struct buf *bp; vaddr_t kva; - int error, s, mapinflags; + int error, mapinflags; bool write, async; UVMHIST_FUNC("uvm_swap_io"); UVMHIST_CALLED(pdhist); @@ -1719,19 +1713,19 @@ uvm_swap_io(struct vm_page **pps, int startslot, int npages, int flags) * now allocate a buf for the i/o. */ - bp = getiobuf(); + bp = getiobuf(swapdev_vp, true); /* * fill in the bp/sbp. we currently route our i/o through * /dev/drum's vnode [swapdev_vp]. */ - bp->b_flags = B_BUSY | B_NOCACHE | (flags & (B_READ|B_ASYNC)); + bp->b_cflags = BC_BUSY | BC_NOCACHE; + bp->b_flags = (flags & (B_READ|B_ASYNC)); bp->b_proc = &proc0; /* XXX */ bp->b_vnbufs.le_next = NOLIST; bp->b_data = (void *)kva; bp->b_blkno = startblk; - bp->b_vp = swapdev_vp; bp->b_bufsize = bp->b_bcount = npages << PAGE_SHIFT; /* @@ -1739,9 +1733,9 @@ uvm_swap_io(struct vm_page **pps, int startslot, int npages, int flags) */ if (write) { - s = splbio(); - V_INCR_NUMOUTPUT(swapdev_vp); - splx(s); + mutex_enter(&swapdev_vp->v_interlock); + swapdev_vp->v_numoutput++; + mutex_exit(&swapdev_vp->v_interlock); } /* @@ -1749,7 +1743,6 @@ uvm_swap_io(struct vm_page **pps, int startslot, int npages, int flags) */ if (async) { - bp->b_flags |= B_CALL; bp->b_iodone = uvm_aio_biodone; UVMHIST_LOG(pdhist, "doing async!", 0, 0, 0, 0); if (curlwp == uvm.pagedaemon_lwp) @@ -1757,6 +1750,7 @@ uvm_swap_io(struct vm_page **pps, int startslot, int npages, int flags) else BIO_SETPRIO(bp, BPRIO_TIMELIMITED); } else { + bp->b_iodone = NULL; BIO_SETPRIO(bp, BPRIO_TIMECRITICAL); } UVMHIST_LOG(pdhist, @@ -1787,11 +1781,13 @@ uvm_swap_io(struct vm_page **pps, int startslot, int npages, int flags) * now dispose of the buf and we're done. */ - s = splbio(); - if (write) + if (write) { + mutex_enter(&swapdev_vp->v_interlock); vwakeup(bp); + mutex_exit(&swapdev_vp->v_interlock); + } putiobuf(bp); - splx(s); UVMHIST_LOG(pdhist, "<- done (sync) error=%d", error, 0, 0, 0); + return (error); } diff --git a/sys/uvm/uvm_unix.c b/sys/uvm/uvm_unix.c index b269db273c91..b899219d2f37 100644 --- a/sys/uvm/uvm_unix.c +++ b/sys/uvm/uvm_unix.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_unix.c,v 1.39 2007/12/20 23:03:15 dsl Exp $ */ +/* $NetBSD: uvm_unix.c,v 1.40 2008/01/02 11:49:21 ad Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -50,7 +50,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uvm_unix.c,v 1.39 2007/12/20 23:03:15 dsl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_unix.c,v 1.40 2008/01/02 11:49:21 ad Exp $"); #include "opt_pax.h" @@ -83,15 +83,20 @@ sys_obreak(struct lwp *l, const struct sys_obreak_args *uap, register_t *retval) vaddr_t new, old; int error; + mutex_enter(&p->p_auxlock); old = (vaddr_t)vm->vm_daddr; new = round_page((vaddr_t)SCARG(uap, nsize)); - if ((new - old) > p->p_rlimit[RLIMIT_DATA].rlim_cur && new > old) + if ((new - old) > p->p_rlimit[RLIMIT_DATA].rlim_cur && new > old) { + mutex_exit(&p->p_auxlock); return (ENOMEM); + } old = round_page(old + ptoa(vm->vm_dsize)); - if (new == old) + if (new == old) { + mutex_exit(&p->p_auxlock); return (0); + } /* * grow or shrink? @@ -114,6 +119,7 @@ sys_obreak(struct lwp *l, const struct sys_obreak_args *uap, register_t *retval) if (error) { uprintf("sbrk: grow %ld failed, error = %d\n", new - old, error); + mutex_exit(&p->p_auxlock); return (error); } vm->vm_dsize += atop(new - old); @@ -121,6 +127,8 @@ sys_obreak(struct lwp *l, const struct sys_obreak_args *uap, register_t *retval) uvm_deallocate(&vm->vm_map, new, old - new); vm->vm_dsize -= atop(old - new); } + mutex_exit(&p->p_auxlock); + return (0); } diff --git a/sys/uvm/uvm_vnode.c b/sys/uvm/uvm_vnode.c index 322cf48251a8..c72eab9c15c4 100644 --- a/sys/uvm/uvm_vnode.c +++ b/sys/uvm/uvm_vnode.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_vnode.c,v 1.89 2007/12/01 10:40:28 yamt Exp $ */ +/* $NetBSD: uvm_vnode.c,v 1.90 2008/01/02 11:49:21 ad Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -50,7 +50,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uvm_vnode.c,v 1.89 2007/12/01 10:40:28 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_vnode.c,v 1.90 2008/01/02 11:49:21 ad Exp $"); #include "fs_nfs.h" #include "opt_uvmhist.h" @@ -148,8 +148,9 @@ uvn_put(struct uvm_object *uobj, voff_t offlo, voff_t offhi, int flags) struct vnode *vp = (struct vnode *)uobj; int error; - LOCK_ASSERT(simple_lock_held(&vp->v_interlock)); + KASSERT(mutex_owned(&vp->v_interlock)); error = VOP_PUTPAGES(vp, offlo, offhi, flags); + return error; } @@ -179,19 +180,16 @@ uvn_get(struct uvm_object *uobj, voff_t offset, UVMHIST_LOG(ubchist, "vp %p off 0x%x", vp, (int)offset, 0,0); if ((access_type & VM_PROT_WRITE) == 0 && (flags & PGO_LOCKED) == 0) { - simple_unlock(&vp->v_interlock); vn_ra_allocctx(vp); uvm_ra_request(vp->v_ractx, advice, uobj, offset, *npagesp << PAGE_SHIFT); - simple_lock(&vp->v_interlock); } error = VOP_GETPAGES(vp, offset, pps, npagesp, centeridx, access_type, advice, flags); - LOCK_ASSERT(((flags & PGO_LOCKED) != 0 && - simple_lock_held(&vp->v_interlock)) || - (flags & PGO_LOCKED) == 0); + KASSERT(((flags & PGO_LOCKED) != 0 && mutex_owned(&vp->v_interlock)) || + (flags & PGO_LOCKED) == 0); return error; } @@ -265,9 +263,9 @@ uvn_findpage(struct uvm_object *uobj, voff_t offset, struct vm_page **pgp, UVMHIST_LOG(ubchist, "nowait",0,0,0,0); return 0; } - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); uvm_wait("uvn_fp1"); - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); continue; } UVMHIST_LOG(ubchist, "alloced %p", pg,0,0,0); @@ -287,7 +285,7 @@ uvn_findpage(struct uvm_object *uobj, voff_t offset, struct vm_page **pgp, UVMHIST_LOG(ubchist, "wait %p", pg,0,0,0); UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0, "uvn_fp2", 0); - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); continue; } @@ -337,7 +335,7 @@ uvm_vnp_setsize(struct vnode *vp, voff_t newsize) voff_t oldsize; UVMHIST_FUNC("uvm_vnp_setsize"); UVMHIST_CALLED(ubchist); - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); UVMHIST_LOG(ubchist, "vp %p old 0x%x new 0x%x", vp, vp->v_size, newsize, 0); @@ -356,24 +354,24 @@ uvm_vnp_setsize(struct vnode *vp, voff_t newsize) if (oldsize > pgend) { (void) uvn_put(uobj, pgend, 0, PGO_FREE | PGO_SYNCIO); - simple_lock(&uobj->vmobjlock); + mutex_enter(&uobj->vmobjlock); } vp->v_size = vp->v_writesize = newsize; - simple_unlock(&uobj->vmobjlock); + mutex_exit(&uobj->vmobjlock); } void uvm_vnp_setwritesize(struct vnode *vp, voff_t newsize) { - simple_lock(&vp->v_interlock); + mutex_enter(&vp->v_interlock); KASSERT(newsize != VSIZENOTSET); KASSERT(vp->v_size != VSIZENOTSET); KASSERT(vp->v_writesize != VSIZENOTSET); KASSERT(vp->v_size <= vp->v_writesize); KASSERT(vp->v_size <= newsize); vp->v_writesize = newsize; - simple_unlock(&vp->v_interlock); + mutex_exit(&vp->v_interlock); } /*