Several changes, developed and tested concurrently:

* Provide POSIX 1003.1b mlockall(2) and munlockall(2) system calls.
  MCL_CURRENT is presently implemented.  MCL_FUTURE is not fully
  implemented.  Also, the same one-unlock-for-every-lock caveat
  currently applies here as it does to mlock(2).  This will be
  addressed in a future commit.
* Provide the mincore(2) system call, with the same semantics as
  Solaris.
* Clean up the error recovery in uvm_map_pageable().
* Fix a bug where a process would hang if attempting to mlock a
  zero-fill region where none of the pages in that region are resident.
  [ This fix has been submitted for inclusion in 1.4.1 ]
This commit is contained in:
thorpej 1999-06-15 23:27:47 +00:00
parent 10b0c75443
commit c5a43ae10c
5 changed files with 463 additions and 35 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: mman.h,v 1.21 1999/04/27 20:13:06 cgd Exp $ */
/* $NetBSD: mman.h,v 1.22 1999/06/15 23:27:48 thorpej Exp $ */
/*-
* Copyright (c) 1982, 1986, 1993
@ -91,6 +91,12 @@ typedef _BSD_SIZE_T_ size_t;
#define MS_INVALIDATE 0x02 /* invalidate cached data */
#define MS_SYNC 0x04 /* perform synchronous writes */
/*
* Flags to mlockall
*/
#define MCL_CURRENT 0x01 /* lock all pages currently mapped */
#define MCL_FUTURE 0x02 /* lock all pages mapped in the future */
#if !defined(_POSIX_C_SOURCE) && !defined(_XOPEN_SOURCE)
/*
* Advice to madvise
@ -119,8 +125,11 @@ int msync __P((void *, size_t, int)) __RENAME(__msync13);
#endif
int mlock __P((const void *, size_t));
int munlock __P((const void *, size_t));
int mlockall __P((int));
int munlockall __P((void));
#if !defined(_POSIX_C_SOURCE) && !defined(_XOPEN_SOURCE)
int madvise __P((void *, size_t, int));
int mincore __P((void *, size_t, char *));
int minherit __P((void *, size_t, int));
#endif
__END_DECLS

View File

@ -1,4 +1,4 @@
/* $NetBSD: uvm_extern.h,v 1.27 1999/05/26 19:16:36 thorpej Exp $ */
/* $NetBSD: uvm_extern.h,v 1.28 1999/06/15 23:27:47 thorpej Exp $ */
/*
*
@ -319,6 +319,7 @@ int uvm_map __P((vm_map_t, vaddr_t *, vsize_t,
struct uvm_object *, vaddr_t, uvm_flag_t));
int uvm_map_pageable __P((vm_map_t, vaddr_t,
vaddr_t, boolean_t));
int uvm_map_pageable_all __P((vm_map_t, int, vsize_t));
boolean_t uvm_map_checkprot __P((vm_map_t, vaddr_t,
vaddr_t, vm_prot_t));
int uvm_map_protect __P((vm_map_t, vaddr_t,

View File

@ -1,4 +1,4 @@
/* $NetBSD: uvm_map.c,v 1.53 1999/06/07 16:31:42 thorpej Exp $ */
/* $NetBSD: uvm_map.c,v 1.54 1999/06/15 23:27:47 thorpej Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@ -1990,8 +1990,7 @@ uvm_map_pageable(map, start, end, new_pageable)
vaddr_t start, end;
boolean_t new_pageable;
{
vm_map_entry_t entry, start_entry;
vaddr_t failed = 0;
vm_map_entry_t entry, start_entry, failed_entry;
int rv;
UVMHIST_FUNC("uvm_map_pageable"); UVMHIST_CALLED(maphist);
UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_pageable=0x%x)",
@ -2025,7 +2024,7 @@ uvm_map_pageable(map, start, end, new_pageable)
* handle wiring and unwiring seperately.
*/
if (new_pageable) { /* unwire */
if (new_pageable) { /* unwire */
UVM_MAP_CLIP_START(map, entry, start);
@ -2060,11 +2059,9 @@ uvm_map_pageable(map, start, end, new_pageable)
entry = start_entry;
while ((entry != &map->header) && (entry->start < end)) {
UVM_MAP_CLIP_END(map, entry, end);
entry->wired_count--;
if (entry->wired_count == 0)
uvm_map_entry_unwire(map, entry);
entry = entry->next;
}
vm_map_unlock(map);
@ -2100,7 +2097,7 @@ uvm_map_pageable(map, start, end, new_pageable)
while ((entry != &map->header) && (entry->start < end)) {
if (entry->wired_count == 0) { /* not already wired? */
/*
* perform actions of vm_map_lookup that need the
* write lock on the map: create an anonymous map
@ -2108,22 +2105,17 @@ uvm_map_pageable(map, start, end, new_pageable)
* for a zero-fill region. (XXXCDC: submap case
* ok?)
*/
if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */
/*
* XXXCDC: protection vs. max_protection??
* (wirefault uses max?)
* XXXCDC: used to do it always if
* uvm_obj == NULL (wrong?)
*/
if ( UVM_ET_ISNEEDSCOPY(entry) &&
(entry->protection & VM_PROT_WRITE) != 0) {
if (UVM_ET_ISNEEDSCOPY(entry) &&
((entry->protection & VM_PROT_WRITE) ||
(entry->object.uvm_obj == NULL))) {
amap_copy(map, entry, M_WAITOK, TRUE,
start, end);
/* XXXCDC: wait OK? */
}
}
} /* wired_count == 0 */
} /* wired_count == 0 */
UVM_MAP_CLIP_START(map, entry, start);
UVM_MAP_CLIP_END(map, entry, end);
entry->wired_count++;
@ -2131,8 +2123,10 @@ uvm_map_pageable(map, start, end, new_pageable)
/*
* Check for holes
*/
if (entry->end < end && (entry->next == &map->header ||
entry->next->start > entry->end)) {
if (entry->protection == VM_PROT_NONE ||
(entry->end < end &&
(entry->next == &map->header ||
entry->next->start > entry->end))) {
/*
* found one. amap creation actions do not need to
* be undone, but the wired counts need to be restored.
@ -2182,16 +2176,24 @@ uvm_map_pageable(map, start, end, new_pageable)
* first drop the wiring count on all the entries
* which haven't actually been wired yet.
*/
failed = entry->start;
while (entry != &map->header && entry->start < end)
failed_entry = entry;
while (entry != &map->header && entry->start < end) {
entry->wired_count--;
entry = entry->next;
}
/*
* now, unlock the map, and unwire all the pages that
* were successfully wired above.
* now, unwire all the entries that were successfully
* wired above.
*/
entry = start_entry;
while (entry != failed_entry) {
entry->wired_count--;
if (entry->wired_count == 0)
uvm_map_entry_unwire(map, entry);
entry = entry->next;
}
vm_map_unlock(map);
(void) uvm_map_pageable(map, start, failed, TRUE);
UVMHIST_LOG(maphist, "<- done (RV=%d)", rv,0,0,0);
return(rv);
}
@ -2203,6 +2205,214 @@ uvm_map_pageable(map, start, end, new_pageable)
return(KERN_SUCCESS);
}
/*
* uvm_map_pageable_all: special case of uvm_map_pageable - affects
* all mapped regions.
*
* => map must not be locked.
* => if no flags are specified, all regions are unwired.
* => XXXJRT: has some of the same problems as uvm_map_pageable() above.
*/
int
uvm_map_pageable_all(map, flags, limit)
vm_map_t map;
int flags;
vsize_t limit;
{
vm_map_entry_t entry, failed_entry;
vsize_t size;
int rv;
UVMHIST_FUNC("uvm_map_pageable_all"); UVMHIST_CALLED(maphist);
UVMHIST_LOG(maphist,"(map=0x%x,flags=0x%x)", map, flags, 0, 0);
#ifdef DIAGNOSTIC
if ((map->flags & VM_MAP_PAGEABLE) == 0)
panic("uvm_map_pageable_all: map %p not pageable", map);
#endif
vm_map_lock(map);
/*
* handle wiring and unwiring separately.
*/
if (flags == 0) { /* unwire */
/*
* Decrement the wiring count on the entries. If they
* reach zero, unwire them.
*
* Note, uvm_fault_unwire() (called via uvm_map_entry_unwire())
* does not lock the map, so we don't have to do anything
* special regarding locking here.
*/
for (entry = map->header.next; entry != &map->header;
entry = entry->next) {
if (entry->wired_count) {
if (--entry->wired_count == 0)
uvm_map_entry_unwire(map, entry);
}
}
map->flags &= ~VM_MAP_WIREFUTURE;
vm_map_unlock(map);
UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0);
return (KERN_SUCCESS);
/*
* end of unwire case!
*/
}
if (flags & MCL_FUTURE) {
/*
* must wire all future mappings; remember this.
*/
map->flags |= VM_MAP_WIREFUTURE;
}
if ((flags & MCL_CURRENT) == 0) {
/*
* no more work to do!
*/
UVMHIST_LOG(maphist,"<- done (OK no wire)",0,0,0,0);
vm_map_unlock(map);
return (KERN_SUCCESS);
}
/*
* wire case: in three passes [XXXCDC: ugly block of code here]
*
* 1: holding the write lock, count all pages mapped by non-wired
* entries. if this would cause us to go over our limit, we fail.
*
* 2: still holding the write lock, we create any anonymous maps that
* need to be created. then we increment its wiring count.
*
* 3: we downgrade to a read lock, and call uvm_fault_wire to fault
* in the pages for any newly wired area (wired count is 1).
*
* downgrading to a read lock for uvm_fault_wire avoids a possible
* deadlock with another thread that may have faulted on one of
* the pages to be wired (it would mark the page busy, blocking
* us, then in turn block on the map lock that we hold). because
* of problems in the recursive lock package, we cannot upgrade
* to a write lock in vm_map_lookup. thus, any actions that
* require the write lock must be done beforehand. because we
* keep the read lock on the map, the copy-on-write status of the
* entries we modify here cannot change.
*/
for (size = 0, entry = map->header.next; entry != &map->header;
entry = entry->next) {
if (entry->protection != VM_PROT_NONE &&
entry->wired_count == 0) { /* not already wired? */
size += entry->end - entry->start;
}
}
if (atop(size) + uvmexp.wired > uvmexp.wiredmax) {
vm_map_unlock(map);
return (KERN_NO_SPACE); /* XXX overloaded */
}
/* XXX non-pmap_wired_count case must be handled by caller */
#ifdef pmap_wired_count
if (limit != 0 &&
(size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit)) {
vm_map_unlock(map);
return (KERN_NO_SPACE); /* XXX overloaded */
}
#endif
/*
* Pass 2.
*/
for (entry = map->header.next; entry != &map->header;
entry = entry->next) {
if (entry->protection == VM_PROT_NONE)
continue;
if (entry->wired_count == 0) { /* not already wired? */
/*
* perform actions of vm_map_lookup that need the
* write lock on the map: create an anonymous map
* for a copy-on-write region, or an anonymous map
* for a zero-fill region. (XXXCDC: submap case
* ok?)
*/
if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */
if (UVM_ET_ISNEEDSCOPY(entry) &&
((entry->protection & VM_PROT_WRITE) ||
(entry->object.uvm_obj == NULL))) {
amap_copy(map, entry, M_WAITOK, TRUE,
entry->start, entry->end);
/* XXXCDC: wait OK? */
}
}
} /* wired_count == 0 */
entry->wired_count++;
}
/*
* Pass 3.
*/
vm_map_downgrade(map);
rv = KERN_SUCCESS;
for (entry = map->header.next; entry != &map->header;
entry = entry->next) {
if (entry->wired_count == 1) {
rv = uvm_fault_wire(map, entry->start, entry->end,
entry->protection);
if (rv) {
/*
* wiring failed. break out of the loop.
* we'll clean up the map below, once we
* have a write lock again.
*/
break;
}
}
}
if (rv) { /* failed? */
/*
* Get back an exclusive (write) lock.
*/
vm_map_upgrade(map);
/*
* first drop the wiring count on all the entries
* which haven't actually been wired yet.
*/
failed_entry = entry;
for (/* nothing */; entry != &map->header;
entry = entry->next)
entry->wired_count--;
/*
* now, unwire all the entries that were successfully
* wired above.
*/
for (entry = map->header.next; entry != failed_entry;
entry = entry->next) {
entry->wired_count--;
if (entry->wired_count == 0)
uvm_map_entry_unwire(map, entry);
}
vm_map_unlock(map);
UVMHIST_LOG(maphist,"<- done (RV=%d)", rv,0,0,0);
return (rv);
}
/* We are holding a read lock here. */
vm_map_unlock_read(map);
UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0);
return (KERN_SUCCESS);
}
/*
* uvm_map_clean: push dirty pages off to backing store.
*
@ -2480,6 +2690,14 @@ uvmspace_exec(p)
shmexit(ovm);
#endif
/*
* POSIX 1003.1b -- "lock future mappings" is revoked
* when a process execs another program image.
*/
vm_map_lock(map);
map->flags &= ~VM_MAP_WIREFUTURE;
vm_map_unlock(map);
/*
* now unmap the old program
*/

View File

@ -1,4 +1,4 @@
/* $NetBSD: uvm_mmap.c,v 1.21 1999/05/23 06:27:13 mrg Exp $ */
/* $NetBSD: uvm_mmap.c,v 1.22 1999/06/15 23:27:47 thorpej Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@ -130,15 +130,140 @@ sys_mincore(p, v, retval)
void *v;
register_t *retval;
{
#if 0
struct sys_mincore_args /* {
syscallarg(caddr_t) addr;
syscallarg(void *) addr;
syscallarg(size_t) len;
syscallarg(char *) vec;
} */ *uap = v;
#endif
vm_page_t m;
char *vec, pgi;
struct uvm_object *uobj;
struct vm_amap *amap;
struct vm_anon *anon;
vm_map_entry_t entry;
vaddr_t start, end, lim;
vm_map_t map;
vsize_t len;
int error = 0, npgs;
return (ENOSYS);
map = &p->p_vmspace->vm_map;
start = (vaddr_t)SCARG(uap, addr);
len = SCARG(uap, len);
vec = SCARG(uap, vec);
if (start & PAGE_MASK)
return (EINVAL);
len = round_page(len);
end = start + len;
if (end <= start)
return (EINVAL);
npgs = len >> PAGE_SHIFT;
if (uvm_useracc(vec, npgs, B_WRITE) == FALSE)
return (EFAULT);
/*
* Lock down vec, so our returned status isn't outdated by
* storing the status byte for a page.
*/
uvm_vslock(p, vec, npgs, VM_PROT_WRITE);
vm_map_lock_read(map);
if (uvm_map_lookup_entry(map, start, &entry) == FALSE) {
error = ENOMEM;
goto out;
}
for (/* nothing */;
entry != &map->header && entry->start < end;
entry = entry->next) {
#ifdef DIAGNOSTIC
if (UVM_ET_ISSUBMAP(entry))
panic("mincore: user map has submap");
if (start < entry->start)
panic("mincore: hole");
#endif
/* Make sure there are no holes. */
if (entry->end < end &&
(entry->next == &map->header ||
entry->next->start > entry->end)) {
error = ENOMEM;
goto out;
}
lim = end < entry->end ? end : entry->end;
/*
* Special case for mapped devices; these are always
* considered resident.
*/
if (UVM_ET_ISOBJ(entry)) {
extern struct uvm_pagerops uvm_deviceops; /* XXX */
#ifdef DIAGNOSTIC
if (UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj))
panic("mincore: user map has kernel object");
#endif
if (entry->object.uvm_obj->pgops == &uvm_deviceops) {
for (/* nothing */; start < lim;
start += PAGE_SIZE, vec++)
subyte(vec, 1);
continue;
}
}
uobj = entry->object.uvm_obj; /* top layer */
amap = entry->aref.ar_amap; /* bottom layer */
if (amap != NULL)
amap_lock(amap);
if (uobj != NULL)
simple_lock(&uobj->vmobjlock);
for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) {
pgi = 0;
if (amap != NULL) {
/* Check the top layer first. */
anon = amap_lookup(&entry->aref,
start - entry->start);
/* Don't need to lock anon here. */
if (anon != NULL && anon->u.an_page != NULL) {
/*
* Anon has the page for this entry
* offset.
*/
pgi = 1;
}
}
if (uobj != NULL && pgi == 0) {
/* Check the bottom layer. */
m = uvm_pagelookup(uobj,
entry->offset + (start - entry->start));
if (m != NULL) {
/*
* Object has the page for this entry
* offset.
*/
pgi = 1;
}
}
(void) subyte(vec, pgi);
}
if (uobj != NULL)
simple_unlock(&obj->vmobjlock);
if (amap != NULL)
amap_unlock(amap);
}
out:
vm_map_unlock_read(map);
uvm_vsunlock(p, SCARG(uap, vec), npgs);
return (error);
}
#if 0
@ -816,6 +941,73 @@ sys_munlock(p, v, retval)
return (error == KERN_SUCCESS ? 0 : ENOMEM);
}
/*
* sys_mlockall: lock all pages mapped into an address space.
*/
int
sys_mlockall(p, v, retval)
struct proc *p;
void *v;
register_t *retval;
{
struct sys_mlockall_args /* {
syscallarg(int) flags;
} */ *uap = v;
vsize_t limit;
int error, flags;
flags = SCARG(uap, flags);
if (flags == 0 ||
(flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0)
return (EINVAL);
#ifdef pmap_wired_count
/* Actually checked in uvm_map_pageable_all() */
limit = p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur];
#else
limit = 0;
if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
return (error);
#endif
error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags, limit);
switch (error) {
case KERN_SUCCESS:
error = 0;
break;
case KERN_NO_SPACE: /* XXX overloaded */
error = ENOMEM;
break;
default:
/*
* "Some or all of the memory could not be locked when
* the call was made."
*/
error = EAGAIN;
}
return (error);
}
/*
* sys_munlockall: unlock all pages mapped into an address space.
*/
int
sys_munlockall(p, v, retval)
struct proc *p;
void *v;
register_t *retval;
{
(void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0);
return (0);
}
/*
* uvm_mmap: internal version of mmap
*

View File

@ -1,4 +1,4 @@
/* $NetBSD: vm_map.h,v 1.29 1999/06/07 16:34:04 thorpej Exp $ */
/* $NetBSD: vm_map.h,v 1.30 1999/06/15 23:27:48 thorpej Exp $ */
/*
* Copyright (c) 1991, 1993
@ -140,15 +140,23 @@ struct vm_map {
vm_map_entry_t hint; /* hint for quick lookups */
simple_lock_data_t hint_lock; /* lock for hint storage */
vm_map_entry_t first_free; /* First free space hint */
int flags; /* flags (read-only) */
/*
* Locking note: read-only flags need not be locked to read
* them; they are set once at map creation time, and never
* changed again. Only read-write flags require that the
* appropriate map lock be acquired before reading or writing
* the flag.
*/
int flags; /* flags */
unsigned int timestamp; /* Version number */
#define min_offset header.start
#define max_offset header.end
};
/* vm_map flags */
#define VM_MAP_PAGEABLE 0x01 /* entries are pageable */
#define VM_MAP_INTRSAFE 0x02 /* interrupt safe map */
#define VM_MAP_PAGEABLE 0x01 /* ro: entries are pageable */
#define VM_MAP_INTRSAFE 0x02 /* ro: interrupt safe map */
#define VM_MAP_WIREFUTURE 0x04 /* rw: wire future mappings */
/*
* Interrupt-safe maps must also be kept on a special list,