- use cpu-local temporary mapping for P->V operations and kill pmap_main_lock.

fixes a part of PR/37193 from Andrew Doran.
- use pmap_stats_update_bypte in some more places.

some comments and fixes from Andrew Doran and Manuel Bouyer.
This commit is contained in:
yamt 2008-01-15 11:12:03 +00:00
parent 8409a2d11e
commit 1788a8e60c

View File

@ -1,4 +1,4 @@
/* $NetBSD: pmap.c,v 1.27 2008/01/13 11:34:18 yamt Exp $ */ /* $NetBSD: pmap.c,v 1.28 2008/01/15 11:12:03 yamt Exp $ */
/* /*
* Copyright (c) 2007 Manuel Bouyer. * Copyright (c) 2007 Manuel Bouyer.
@ -154,7 +154,7 @@
*/ */
#include <sys/cdefs.h> #include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.27 2008/01/13 11:34:18 yamt Exp $"); __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.28 2008/01/15 11:12:03 yamt Exp $");
#include "opt_user_ldt.h" #include "opt_user_ldt.h"
#include "opt_lockdebug.h" #include "opt_lockdebug.h"
@ -283,21 +283,6 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.27 2008/01/13 11:34:18 yamt Exp $");
* *
* we have the following locks that we must contend with: * we have the following locks that we must contend with:
* *
* RW locks:
*
* - pmap_main_lock
* this lock is used to prevent deadlock and/or provide mutex
* access to the pmap system. most operations lock the pmap
* structure first, then they lock the pv_lists (if needed).
* however, some operations such as pmap_page_protect lock
* the pv_lists and then lock pmaps. in order to prevent a
* cycle, we require a mutex lock when locking the pv_lists
* first. thus, the "pmap = >pv_list" lockers must gain a
* read-lock on pmap_main_lock before locking the pmap. and
* the "pv_list => pmap" lockers must gain a write-lock on
* pmap_main_lock before locking. since only one thread
* can write-lock a lock at a time, this provides mutex.
*
* mutexes: * mutexes:
* *
* - pmap lock (per pmap, part of uvm_object) * - pmap lock (per pmap, part of uvm_object)
@ -346,7 +331,6 @@ pd_entry_t *normal_pdes[] = PDES_INITIALIZER;
pd_entry_t *alternate_pdes[] = APDES_INITIALIZER; pd_entry_t *alternate_pdes[] = APDES_INITIALIZER;
static kmutex_t pmaps_lock; static kmutex_t pmaps_lock;
static krwlock_t pmap_main_lock;
static vaddr_t pmap_maxkvaddr; static vaddr_t pmap_maxkvaddr;
@ -1326,7 +1310,6 @@ pmap_bootstrap(vaddr_t kva_start)
* again is never taken from interrupt context. * again is never taken from interrupt context.
*/ */
rw_init(&pmap_main_lock);
mutex_init(&pmaps_lock, MUTEX_DEFAULT, IPL_NONE); mutex_init(&pmaps_lock, MUTEX_DEFAULT, IPL_NONE);
LIST_INIT(&pmaps); LIST_INIT(&pmaps);
pmap_cpu_init_early(curcpu()); pmap_cpu_init_early(curcpu());
@ -1532,6 +1515,8 @@ pmap_enter_pv(struct pv_head *pvh,
vaddr_t va, vaddr_t va,
struct vm_page *ptp) /* PTP in pmap that maps this VA */ struct vm_page *ptp) /* PTP in pmap that maps this VA */
{ {
KASSERT(mutex_owned(&pvh->pvh_lock));
pve->pv_pmap = pmap; pve->pv_pmap = pmap;
pve->pv_va = va; pve->pv_va = va;
pve->pv_ptp = ptp; /* NULL for kernel pmap */ pve->pv_ptp = ptp; /* NULL for kernel pmap */
@ -1553,6 +1538,7 @@ pmap_remove_pv(struct pv_head *pvh, struct pmap *pmap, vaddr_t va)
{ {
struct pv_entry tmp, *pve; struct pv_entry tmp, *pve;
KASSERT(mutex_owned(&pvh->pvh_lock));
tmp.pv_pmap = pmap; tmp.pv_pmap = pmap;
tmp.pv_va = va; tmp.pv_va = va;
pve = SPLAY_FIND(pvtree, &pvh->pvh_root, &tmp); pve = SPLAY_FIND(pvtree, &pvh->pvh_root, &tmp);
@ -2678,6 +2664,56 @@ pmap_copy_page(paddr_t srcpa, paddr_t dstpa)
#endif #endif
} }
static pt_entry_t *
pmap_map_ptp(struct vm_page *ptp)
{
pt_entry_t *ptppte;
void *ptpva;
int id;
id = cpu_number();
ptppte = PTESLEW(ptp_pte, id);
ptpva = VASLEW(ptpp, id);
pmap_pte_set(ptppte, pmap_pa2pte(VM_PAGE_TO_PHYS(ptp)) | PG_V | PG_M |
#if !defined(XEN)
PG_RW |
#endif
PG_U | PG_k);
pmap_pte_flush();
pmap_update_pg((vaddr_t)ptpva);
return (pt_entry_t *)ptpva;
}
static void
pmap_unmap_ptp(void)
{
#if defined(DIAGNOSTIC) || defined(XEN)
pt_entry_t *pte;
pte = PTESLEW(ptp_pte, cpu_number());
pmap_pte_set(pte, 0);
pmap_pte_flush();
#endif
}
static pt_entry_t *
pmap_map_pte(struct vm_page *ptp, vaddr_t va)
{
if (ptp == NULL) {
return kvtopte(va);
}
return pmap_map_ptp(ptp) + pl1_pi(va);
}
static void
pmap_unmap_pte(void)
{
pmap_unmap_ptp();
}
/* /*
* p m a p r e m o v e f u n c t i o n s * p m a p r e m o v e f u n c t i o n s
* *
@ -2888,12 +2924,6 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
struct vm_page *ptp, *empty_ptps = NULL; struct vm_page *ptp, *empty_ptps = NULL;
struct pmap *pmap2; struct pmap *pmap2;
/*
* we lock in the pmap => pv_head direction
*/
rw_enter(&pmap_main_lock, RW_READER);
pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */ pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */
/* /*
@ -2995,7 +3025,6 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
} }
pmap_tlb_shootwait(); pmap_tlb_shootwait();
pmap_unmap_ptes(pmap, pmap2); /* unlock pmap */ pmap_unmap_ptes(pmap, pmap2); /* unlock pmap */
rw_exit(&pmap_main_lock);
/* Now we can free unused PVs and ptps */ /* Now we can free unused PVs and ptps */
if (pv_tofree) if (pv_tofree)
@ -3017,15 +3046,11 @@ void
pmap_page_remove(struct vm_page *pg) pmap_page_remove(struct vm_page *pg)
{ {
struct pv_head *pvh; struct pv_head *pvh;
struct pv_entry *pve, *npve, *killlist = NULL; struct pv_entry *pve, *killlist = NULL;
pt_entry_t *ptes, opte;
pd_entry_t **pdes;
#ifdef DIAGNOSTIC
pd_entry_t pde;
#endif
struct vm_page *empty_ptps = NULL; struct vm_page *empty_ptps = NULL;
struct vm_page *ptp; struct vm_page *ptp;
struct pmap *pmap2; pt_entry_t expect;
int count;
#ifdef DIAGNOSTIC #ifdef DIAGNOSTIC
int bank, off; int bank, off;
@ -3040,37 +3065,44 @@ pmap_page_remove(struct vm_page *pg)
return; return;
} }
/* set pv_head => pmap locking */ expect = pmap_pa2pte(VM_PAGE_TO_PHYS(pg)) | PG_V;
rw_enter(&pmap_main_lock, RW_WRITER); count = SPINLOCK_BACKOFF_MIN;
startover:
for (pve = SPLAY_MIN(pvtree, &pvh->pvh_root); pve != NULL; pve = npve) { mutex_spin_enter(&pvh->pvh_lock);
npve = SPLAY_NEXT(pvtree, &pvh->pvh_root, pve); while ((pve = SPLAY_MIN(pvtree, &pvh->pvh_root)) != NULL) {
struct pmap *pmap = pve->pv_pmap;
/* locks pmap */ pt_entry_t *ptep;
pmap_map_ptes(pve->pv_pmap, &pmap2, &ptes, &pdes); pt_entry_t opte;
#ifdef DIAGNOSTIC
if (pve->pv_ptp && pmap_pdes_valid(pve->pv_va, pdes, &pde) &&
pmap_pte2pa(pde) != VM_PAGE_TO_PHYS(pve->pv_ptp)) {
printf("pmap_page_remove: pg=%p: va=%lx, pv_ptp=%p\n",
pg, pve->pv_va, pve->pv_ptp);
printf("pmap_page_remove: PTP's phys addr: "
"actual=%lx, recorded=%lx\n",
(unsigned long)pmap_pte2pa(pde),
(unsigned long)VM_PAGE_TO_PHYS(pve->pv_ptp));
panic("pmap_page_remove: mapped managed page has "
"invalid pv_ptp field");
}
#endif
/* atomically save the old PTE and zap! it */ /* atomically save the old PTE and zap! it */
opte = pmap_pte_testset(&ptes[pl1_i(pve->pv_va)], 0); ptep = pmap_map_pte(pve->pv_ptp, pve->pv_va);
KASSERT(pmap_valid_entry(opte)); do {
KDASSERT(pmap_pte2pa(opte) == VM_PAGE_TO_PHYS(pg)); opte = *ptep;
if ((opte & (PG_FRAME | PG_V)) != expect) {
#if defined(MULTIPROCESSOR)
int hold_count;
#endif /* defined(MULTIPROCESSOR) */
if (opte & PG_W) /*
pve->pv_pmap->pm_stats.wired_count--; * we lost a race with a V->P operation like
pve->pv_pmap->pm_stats.resident_count--; * pmap_remove(). wait for the competitor
* reflecting pte bits into mp_attrs.
*
* issue a redundant TLB shootdown so that
* we can wait for its completion.
*/
pmap_unmap_pte();
pmap_tlb_shootdown(pve->pv_pmap, pve->pv_va, 0,
PG_G);
mutex_spin_exit(&pvh->pvh_lock);
KERNEL_UNLOCK_ALL(curlwp, &hold_count);
SPINLOCK_BACKOFF(count);
KERNEL_LOCK(hold_count, curlwp);
goto startover;
}
} while (pmap_pte_cas(ptep, opte, 0) != opte);
pmap_unmap_pte();
/* Shootdown only if referenced */ /* Shootdown only if referenced */
if (opte & PG_U) if (opte & PG_U)
@ -3079,21 +3111,41 @@ pmap_page_remove(struct vm_page *pg)
/* sync R/M bits */ /* sync R/M bits */
pg->mdpage.mp_attrs |= (opte & (PG_U|PG_M)); pg->mdpage.mp_attrs |= (opte & (PG_U|PG_M));
if (pve->pv_ptp) {
pmap_reference(pmap);
}
SPLAY_REMOVE(pvtree, &pvh->pvh_root, pve); /* remove it */
mutex_spin_exit(&pvh->pvh_lock);
/* update the PTP reference count. free if last reference. */ /* update the PTP reference count. free if last reference. */
if (pve->pv_ptp) { if (pve->pv_ptp) {
struct pmap *pmap2;
pt_entry_t *ptes;
pd_entry_t **pdes;
KASSERT(pmap != pmap_kernel());
crit_enter();
pmap_tlb_shootwait();
crit_exit();
pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
pmap_stats_update_bypte(pmap, 0, opte);
pve->pv_ptp->wire_count--; pve->pv_ptp->wire_count--;
if (pve->pv_ptp->wire_count <= 1) { if (pve->pv_ptp->wire_count <= 1) {
pmap_free_ptp(pve->pv_pmap, pve->pv_ptp, pmap_free_ptp(pve->pv_pmap, pve->pv_ptp,
pve->pv_va, ptes, pdes, &empty_ptps); pve->pv_va, ptes, pdes, &empty_ptps);
} }
pmap_unmap_ptes(pmap, pmap2);
pmap_destroy(pmap);
} else {
pmap_stats_update_bypte(pmap, 0, opte);
} }
pmap_unmap_ptes(pve->pv_pmap, pmap2); /* unlocks pmap */
SPLAY_REMOVE(pvtree, &pvh->pvh_root, pve); /* remove it */
SPLAY_RIGHT(pve, pv_node) = killlist; /* mark it for death */ SPLAY_RIGHT(pve, pv_node) = killlist; /* mark it for death */
killlist = pve; killlist = pve;
mutex_spin_enter(&pvh->pvh_lock);
} }
rw_exit(&pmap_main_lock); mutex_spin_exit(&pvh->pvh_lock);
crit_enter(); crit_enter();
pmap_tlb_shootwait(); pmap_tlb_shootwait();
@ -3127,8 +3179,9 @@ pmap_test_attrs(struct vm_page *pg, unsigned testbits)
int *myattrs; int *myattrs;
struct pv_head *pvh; struct pv_head *pvh;
struct pv_entry *pve; struct pv_entry *pve;
struct pmap *pmap2;
pt_entry_t pte; pt_entry_t pte;
pt_entry_t expect;
int result;
#if DIAGNOSTIC #if DIAGNOSTIC
int bank, off; int bank, off;
@ -3155,27 +3208,30 @@ pmap_test_attrs(struct vm_page *pg, unsigned testbits)
} }
/* nope, gonna have to do it the hard way */ /* nope, gonna have to do it the hard way */
rw_enter(&pmap_main_lock, RW_WRITER);
expect = pmap_pa2pte(VM_PAGE_TO_PHYS(pg)) | PG_V;
mutex_spin_enter(&pvh->pvh_lock);
for (pve = SPLAY_MIN(pvtree, &pvh->pvh_root); for (pve = SPLAY_MIN(pvtree, &pvh->pvh_root);
pve != NULL && (*myattrs & testbits) == 0; pve != NULL && (*myattrs & testbits) == 0;
pve = SPLAY_NEXT(pvtree, &pvh->pvh_root, pve)) { pve = SPLAY_NEXT(pvtree, &pvh->pvh_root, pve)) {
pt_entry_t *ptes; pt_entry_t *ptep;
pd_entry_t **pdes;
pmap_map_ptes(pve->pv_pmap, &pmap2, &ptes, &pdes); ptep = pmap_map_pte(pve->pv_ptp, pve->pv_va);
pte = ptes[pl1_i(pve->pv_va)]; pte = *ptep;
pmap_unmap_ptes(pve->pv_pmap, pmap2); pmap_unmap_pte();
*myattrs |= pte; if ((pte & (PG_FRAME | PG_V)) == expect) {
*myattrs |= pte;
}
} }
result = *myattrs & testbits;
mutex_spin_exit(&pvh->pvh_lock);
/* /*
* note that we will exit the for loop with a non-null pve if * note that we will exit the for loop with a non-null pve if
* we have found the bits we are testing for. * we have found the bits we are testing for.
*/ */
rw_exit(&pmap_main_lock); return result != 0;
return((*myattrs & testbits) != 0);
} }
/* /*
@ -3192,10 +3248,9 @@ pmap_clear_attrs(struct vm_page *pg, unsigned clearbits)
uint32_t result; uint32_t result;
struct pv_head *pvh; struct pv_head *pvh;
struct pv_entry *pve; struct pv_entry *pve;
pt_entry_t *ptes, opte;
int *myattrs; int *myattrs;
struct pmap *pmap2; pt_entry_t expect;
int count;
#ifdef DIAGNOSTIC #ifdef DIAGNOSTIC
int bank, off; int bank, off;
@ -3204,33 +3259,47 @@ pmap_clear_attrs(struct vm_page *pg, unsigned clearbits)
panic("pmap_change_attrs: unmanaged page?"); panic("pmap_change_attrs: unmanaged page?");
#endif #endif
mdpg = &pg->mdpage; mdpg = &pg->mdpage;
rw_enter(&pmap_main_lock, RW_WRITER);
pvh = &mdpg->mp_pvhead; pvh = &mdpg->mp_pvhead;
myattrs = &mdpg->mp_attrs; myattrs = &mdpg->mp_attrs;
result = *myattrs & clearbits; expect = pmap_pa2pte(VM_PAGE_TO_PHYS(pg)) | PG_V;
result = 0;
count = SPINLOCK_BACKOFF_MIN;
startover:
mutex_spin_enter(&pvh->pvh_lock);
result |= *myattrs & clearbits;
*myattrs &= ~clearbits; *myattrs &= ~clearbits;
SPLAY_FOREACH(pve, pvtree, &pvh->pvh_root) { SPLAY_FOREACH(pve, pvtree, &pvh->pvh_root) {
pt_entry_t *ptep; pt_entry_t *ptep;
pd_entry_t **pdes; pt_entry_t opte;
/* locks pmap */ ptep = pmap_map_pte(pve->pv_ptp, pve->pv_va);
pmap_map_ptes(pve->pv_pmap, &pmap2, &ptes, &pdes); retry:
#ifdef DIAGNOSTIC
if (!pmap_pdes_valid(pve->pv_va, pdes, NULL))
panic("pmap_change_attrs: mapping without PTP "
"detected");
#endif
ptep = &ptes[pl1_i(pve->pv_va)];
opte = *ptep; opte = *ptep;
KASSERT(pmap_valid_entry(opte)); if ((opte & (PG_FRAME | PG_V)) != expect) {
KDASSERT(pmap_pte2pa(opte) == VM_PAGE_TO_PHYS(pg)); #if defined(MULTIPROCESSOR)
int hold_count;
#endif /* defined(MULTIPROCESSOR) */
/*
* we lost a race with a V->P operation like
* pmap_remove(). wait for the competitor
* reflecting pte bits into mp_attrs.
*
* issue a redundant TLB shootdown so that
* we can wait for its completion.
*/
pmap_unmap_pte();
pmap_tlb_shootdown(pve->pv_pmap, pve->pv_va, 0, PG_G);
mutex_spin_exit(&pvh->pvh_lock);
KERNEL_UNLOCK_ALL(curlwp, &hold_count);
SPINLOCK_BACKOFF(count);
KERNEL_LOCK(hold_count, curlwp);
goto startover;
}
if (opte & clearbits) { if (opte & clearbits) {
/* We need to do something */ /* We need to do something */
if (clearbits == PG_RW) { if (clearbits == PG_RW) {
result |= PG_RW;
/* /*
* On write protect we might not need to flush * On write protect we might not need to flush
@ -3238,8 +3307,12 @@ pmap_clear_attrs(struct vm_page *pg, unsigned clearbits)
*/ */
/* First zap the RW bit! */ /* First zap the RW bit! */
pmap_pte_clearbits(ptep, PG_RW); if (pmap_pte_cas(ptep, opte, opte & ~PG_RW)
opte = *ptep; != opte) {
goto retry;
}
opte &= ~PG_RW;
result |= PG_RW;
/* /*
* Then test if it is not cached as RW the TLB * Then test if it is not cached as RW the TLB
@ -3254,7 +3327,10 @@ pmap_clear_attrs(struct vm_page *pg, unsigned clearbits)
*/ */
/* zap! */ /* zap! */
opte = pmap_pte_testset(ptep, (opte & ~(PG_U | PG_M))); if (pmap_pte_cas(ptep, opte, opte & ~(PG_U | PG_M))
!= opte) {
goto retry;
}
result |= (opte & clearbits); result |= (opte & clearbits);
*myattrs |= (opte & ~(clearbits)); *myattrs |= (opte & ~(clearbits));
@ -3262,10 +3338,9 @@ pmap_clear_attrs(struct vm_page *pg, unsigned clearbits)
pmap_tlb_shootdown(pve->pv_pmap, pve->pv_va, 0, opte); pmap_tlb_shootdown(pve->pv_pmap, pve->pv_va, 0, opte);
} }
no_tlb_shootdown: no_tlb_shootdown:
pmap_unmap_ptes(pve->pv_pmap, pmap2); /* unlocks pmap */ pmap_unmap_pte();
} }
mutex_spin_exit(&pvh->pvh_lock);
rw_exit(&pmap_main_lock);
crit_enter(); crit_enter();
pmap_tlb_shootwait(); pmap_tlb_shootwait();
@ -3390,14 +3465,18 @@ pmap_unwire(struct pmap *pmap, vaddr_t va)
pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */ pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */
if (pmap_pdes_valid(va, pdes, NULL)) { if (pmap_pdes_valid(va, pdes, NULL)) {
pt_entry_t *ptep = &ptes[pl1_i(va)];
pt_entry_t opte = *ptep;
#ifdef DIAGNOSTIC #ifdef DIAGNOSTIC
if (!pmap_valid_entry(ptes[pl1_i(va)])) if (!pmap_valid_entry(opte))
panic("pmap_unwire: invalid (unmapped) va 0x%lx", va); panic("pmap_unwire: invalid (unmapped) va 0x%lx", va);
#endif #endif
if ((ptes[pl1_i(va)] & PG_W) != 0) { if ((opte & PG_W) != 0) {
pmap_pte_clearbits(&ptes[pl1_i(va)], PG_W); pt_entry_t npte = opte & ~PG_W;
pmap->pm_stats.wired_count--;
opte = pmap_pte_testset(ptep, npte);
pmap_stats_update_bypte(pmap, npte, opte);
} }
#ifdef DIAGNOSTIC #ifdef DIAGNOSTIC
else { else {
@ -3512,9 +3591,6 @@ pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot,
/* get a pve. */ /* get a pve. */
freepve = pool_cache_get(&pmap_pv_cache, PR_NOWAIT); freepve = pool_cache_get(&pmap_pv_cache, PR_NOWAIT);
/* get lock */
rw_enter(&pmap_main_lock, RW_READER);
pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */ pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */
if (pmap == pmap_kernel()) { if (pmap == pmap_kernel()) {
ptp = NULL; ptp = NULL;
@ -3716,7 +3792,6 @@ shootdown_now:
out: out:
pmap_unmap_ptes(pmap, pmap2); pmap_unmap_ptes(pmap, pmap2);
rw_exit(&pmap_main_lock);
if (freepve != NULL) { if (freepve != NULL) {
/* put back the pv, we don't need it. */ /* put back the pv, we don't need it. */
@ -3964,7 +4039,6 @@ pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
* we lock in the pmap => pv_head direction * we lock in the pmap => pv_head direction
*/ */
rw_enter(&pmap_main_lock, RW_READER);
pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */ pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */
/* /*
@ -3992,7 +4066,6 @@ pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
} }
} }
pmap_unmap_ptes(pmap, pmap2); pmap_unmap_ptes(pmap, pmap2);
rw_exit(&pmap_main_lock);
} }
#endif #endif