1169 lines
31 KiB
Groff
1169 lines
31 KiB
Groff
.\" $NetBSD: uvm.9,v 1.97 2009/03/12 13:13:16 wiz Exp $
|
|
.\"
|
|
.\" Copyright (c) 1998 Matthew R. Green
|
|
.\" All rights reserved.
|
|
.\"
|
|
.\" Redistribution and use in source and binary forms, with or without
|
|
.\" modification, are permitted provided that the following conditions
|
|
.\" are met:
|
|
.\" 1. Redistributions of source code must retain the above copyright
|
|
.\" notice, this list of conditions and the following disclaimer.
|
|
.\" 2. Redistributions in binary form must reproduce the above copyright
|
|
.\" notice, this list of conditions and the following disclaimer in the
|
|
.\" documentation and/or other materials provided with the distribution.
|
|
.\"
|
|
.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
.\" BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
.\" LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
|
.\" AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
.\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
.\" SUCH DAMAGE.
|
|
.\"
|
|
.Dd March 12, 2009
|
|
.Dt UVM 9
|
|
.Os
|
|
.Sh NAME
|
|
.Nm uvm
|
|
.Nd virtual memory system external interface
|
|
.Sh SYNOPSIS
|
|
.In sys/param.h
|
|
.In uvm/uvm.h
|
|
.Sh DESCRIPTION
|
|
The UVM virtual memory system manages access to the computer's memory
|
|
resources.
|
|
User processes and the kernel access these resources through
|
|
UVM's external interface.
|
|
UVM's external interface includes functions that:
|
|
.Pp
|
|
.Bl -hyphen -compact
|
|
.It
|
|
initialize UVM sub-systems
|
|
.It
|
|
manage virtual address spaces
|
|
.It
|
|
resolve page faults
|
|
.It
|
|
memory map files and devices
|
|
.It
|
|
perform uio-based I/O to virtual memory
|
|
.It
|
|
allocate and free kernel virtual memory
|
|
.It
|
|
allocate and free physical memory
|
|
.El
|
|
.Pp
|
|
In addition to exporting these services, UVM has two kernel-level processes:
|
|
pagedaemon and swapper.
|
|
The pagedaemon process sleeps until physical memory becomes scarce.
|
|
When that happens, pagedaemon is awoken.
|
|
It scans physical memory, paging out and freeing memory that has not
|
|
been recently used.
|
|
The swapper process swaps in runnable processes that are currently swapped
|
|
out, if there is room.
|
|
.Pp
|
|
There are also several miscellaneous functions.
|
|
.Sh INITIALIZATION
|
|
.Bl -ohang
|
|
.It Ft void
|
|
.Fn uvm_init "void" ;
|
|
.It Ft void
|
|
.Fn uvm_init_limits "struct lwp *l" ;
|
|
.It Ft void
|
|
.Fn uvm_setpagesize "void" ;
|
|
.It Ft void
|
|
.Fn uvm_swap_init "void" ;
|
|
.El
|
|
.Pp
|
|
.Fn uvm_init
|
|
sets up the UVM system at system boot time, after the
|
|
console has been setup.
|
|
It initializes global state, the page, map, kernel virtual memory state,
|
|
machine-dependent physical map, kernel memory allocator,
|
|
pager and anonymous memory sub-systems, and then enables
|
|
paging of kernel objects.
|
|
.Pp
|
|
.Fn uvm_init_limits
|
|
initializes process limits for the named process.
|
|
This is for use by the system startup for process zero, before any
|
|
other processes are created.
|
|
.Pp
|
|
.Fn uvm_setpagesize
|
|
initializes the uvmexp members pagesize (if not already done by
|
|
machine-dependent code), pageshift and pagemask.
|
|
It should be called by machine-dependent code early in the
|
|
.Fn pmap_init
|
|
call (see
|
|
.Xr pmap 9 ) .
|
|
.Pp
|
|
.Fn uvm_swap_init
|
|
initializes the swap sub-system.
|
|
.Sh VIRTUAL ADDRESS SPACE MANAGEMENT
|
|
.Bl -ohang
|
|
.It Ft int
|
|
.Fn uvm_map "struct vm_map *map" "vaddr_t *startp" "vsize_t size" "struct uvm_object *uobj" "voff_t uoffset" "vsize_t align" "uvm_flag_t flags" ;
|
|
.It Ft void
|
|
.Fn uvm_unmap "struct vm_map *map" "vaddr_t start" "vaddr_t end" ;
|
|
.It Ft int
|
|
.Fn uvm_map_pageable "struct vm_map *map" "vaddr_t start" "vaddr_t end" "bool new_pageable" "int lockflags" ;
|
|
.It Ft bool
|
|
.Fn uvm_map_checkprot "struct vm_map *map" "vaddr_t start" "vaddr_t end" "vm_prot_t protection" ;
|
|
.It Ft int
|
|
.Fn uvm_map_protect "struct vm_map *map" "vaddr_t start" "vaddr_t end" "vm_prot_t new_prot" "bool set_max" ;
|
|
.It Ft int
|
|
.Fn uvm_deallocate "struct vm_map *map" "vaddr_t start" "vsize_t size" ;
|
|
.It Ft struct vmspace *
|
|
.Fn uvmspace_alloc "vaddr_t min" "vaddr_t max" "int pageable" ;
|
|
.It Ft void
|
|
.Fn uvmspace_exec "struct lwp *l" "vaddr_t start" "vaddr_t end" ;
|
|
.It Ft struct vmspace *
|
|
.Fn uvmspace_fork "struct vmspace *vm" ;
|
|
.It Ft void
|
|
.Fn uvmspace_free "struct vmspace *vm1" ;
|
|
.It Ft void
|
|
.Fn uvmspace_share "struct proc *p1" "struct proc *p2" ;
|
|
.It Ft void
|
|
.Fn uvmspace_unshare "struct lwp *l" ;
|
|
.It Ft bool
|
|
.Fn uvm_uarea_alloc "vaddr_t *uaddrp" ;
|
|
.It Ft void
|
|
.Fn uvm_uarea_free "vaddr_t uaddr" ;
|
|
.El
|
|
.Pp
|
|
.Fn uvm_map
|
|
establishes a valid mapping in map
|
|
.Fa map ,
|
|
which must be unlocked.
|
|
The new mapping has size
|
|
.Fa size ,
|
|
which must be a multiple of
|
|
.Dv PAGE_SIZE .
|
|
The
|
|
.Fa uobj
|
|
and
|
|
.Fa uoffset
|
|
arguments can have four meanings.
|
|
When
|
|
.Fa uobj
|
|
is
|
|
.Dv NULL
|
|
and
|
|
.Fa uoffset
|
|
is
|
|
.Dv UVM_UNKNOWN_OFFSET ,
|
|
.Fn uvm_map
|
|
does not use the machine-dependent
|
|
.Dv PMAP_PREFER
|
|
function.
|
|
If
|
|
.Fa uoffset
|
|
is any other value, it is used as the hint to
|
|
.Dv PMAP_PREFER .
|
|
When
|
|
.Fa uobj
|
|
is not
|
|
.Dv NULL
|
|
and
|
|
.Fa uoffset
|
|
is
|
|
.Dv UVM_UNKNOWN_OFFSET ,
|
|
.Fn uvm_map
|
|
finds the offset based upon the virtual address, passed as
|
|
.Fa startp .
|
|
If
|
|
.Fa uoffset
|
|
is any other value, we are doing a normal mapping at this offset.
|
|
The start address of the map will be returned in
|
|
.Fa startp .
|
|
.Pp
|
|
.Fa align
|
|
specifies alignment of mapping unless
|
|
.Dv UVM_FLAG_FIXED
|
|
is specified in
|
|
.Fa flags .
|
|
.Fa align
|
|
must be a power of 2.
|
|
.Pp
|
|
.Fa flags
|
|
passed to
|
|
.Fn uvm_map
|
|
are typically created using the
|
|
.Fn UVM_MAPFLAG "vm_prot_t prot" "vm_prot_t maxprot" "vm_inherit_t inh" "int advice" "int flags"
|
|
macro, which uses the following values.
|
|
The
|
|
.Fa prot
|
|
and
|
|
.Fa maxprot
|
|
can take are:
|
|
.Bd -literal
|
|
#define UVM_PROT_MASK 0x07 /* protection mask */
|
|
#define UVM_PROT_NONE 0x00 /* protection none */
|
|
#define UVM_PROT_ALL 0x07 /* everything */
|
|
#define UVM_PROT_READ 0x01 /* read */
|
|
#define UVM_PROT_WRITE 0x02 /* write */
|
|
#define UVM_PROT_EXEC 0x04 /* exec */
|
|
#define UVM_PROT_R 0x01 /* read */
|
|
#define UVM_PROT_W 0x02 /* write */
|
|
#define UVM_PROT_RW 0x03 /* read-write */
|
|
#define UVM_PROT_X 0x04 /* exec */
|
|
#define UVM_PROT_RX 0x05 /* read-exec */
|
|
#define UVM_PROT_WX 0x06 /* write-exec */
|
|
#define UVM_PROT_RWX 0x07 /* read-write-exec */
|
|
.Ed
|
|
.Pp
|
|
The values that
|
|
.Fa inh
|
|
can take are:
|
|
.Bd -literal
|
|
#define UVM_INH_MASK 0x30 /* inherit mask */
|
|
#define UVM_INH_SHARE 0x00 /* "share" */
|
|
#define UVM_INH_COPY 0x10 /* "copy" */
|
|
#define UVM_INH_NONE 0x20 /* "none" */
|
|
#define UVM_INH_DONATE 0x30 /* "donate" \*[Lt]\*[Lt] not used */
|
|
.Ed
|
|
.Pp
|
|
The values that
|
|
.Fa advice
|
|
can take are:
|
|
.Bd -literal
|
|
#define UVM_ADV_NORMAL 0x0 /* 'normal' */
|
|
#define UVM_ADV_RANDOM 0x1 /* 'random' */
|
|
#define UVM_ADV_SEQUENTIAL 0x2 /* 'sequential' */
|
|
#define UVM_ADV_MASK 0x7 /* mask */
|
|
.Ed
|
|
.Pp
|
|
The values that
|
|
.Fa flags
|
|
can take are:
|
|
.Bd -literal
|
|
#define UVM_FLAG_FIXED 0x010000 /* find space */
|
|
#define UVM_FLAG_OVERLAY 0x020000 /* establish overlay */
|
|
#define UVM_FLAG_NOMERGE 0x040000 /* don't merge map entries */
|
|
#define UVM_FLAG_COPYONW 0x080000 /* set copy_on_write flag */
|
|
#define UVM_FLAG_AMAPPAD 0x100000 /* for bss: pad amap to reduce malloc() */
|
|
#define UVM_FLAG_TRYLOCK 0x200000 /* fail if we can not lock map */
|
|
.Ed
|
|
.Pp
|
|
The
|
|
.Dv UVM_MAPFLAG
|
|
macro arguments can be combined with an or operator.
|
|
There are several special purpose macros for checking protection
|
|
combinations, e.g., the
|
|
.Dv UVM_PROT_WX
|
|
macro.
|
|
There are also some additional macros to extract bits from the flags.
|
|
The
|
|
.Dv UVM_PROTECTION ,
|
|
.Dv UVM_INHERIT ,
|
|
.Dv UVM_MAXPROTECTION
|
|
and
|
|
.Dv UVM_ADVICE
|
|
macros return the protection, inheritance, maximum protection and advice,
|
|
respectively.
|
|
.Fn uvm_map
|
|
returns a standard UVM return value.
|
|
.Pp
|
|
.Fn uvm_unmap
|
|
removes a valid mapping,
|
|
from
|
|
.Fa start
|
|
to
|
|
.Fa end ,
|
|
in map
|
|
.Fa map ,
|
|
which must be unlocked.
|
|
.Pp
|
|
.Fn uvm_map_pageable
|
|
changes the pageability of the pages in the range from
|
|
.Fa start
|
|
to
|
|
.Fa end
|
|
in map
|
|
.Fa map
|
|
to
|
|
.Fa new_pageable .
|
|
.Fn uvm_map_pageable
|
|
returns a standard UVM return value.
|
|
.Pp
|
|
.Fn uvm_map_checkprot
|
|
checks the protection of the range from
|
|
.Fa start
|
|
to
|
|
.Fa end
|
|
in map
|
|
.Fa map
|
|
against
|
|
.Fa protection .
|
|
This returns either
|
|
.Dv true
|
|
or
|
|
.Dv false .
|
|
.Pp
|
|
.Fn uvm_map_protect
|
|
changes the protection
|
|
.Fa start
|
|
to
|
|
.Fa end
|
|
in map
|
|
.Fa map
|
|
to
|
|
.Fa new_prot ,
|
|
also setting the maximum protection to the region to
|
|
.Fa new_prot
|
|
if
|
|
.Fa set_max
|
|
is true.
|
|
This function returns a standard UVM return value.
|
|
.Pp
|
|
.Fn uvm_deallocate
|
|
deallocates kernel memory in map
|
|
.Fa map
|
|
from address
|
|
.Fa start
|
|
to
|
|
.Fa start + size .
|
|
.Pp
|
|
.Fn uvmspace_alloc
|
|
allocates and returns a new address space, with ranges from
|
|
.Fa min
|
|
to
|
|
.Fa max ,
|
|
setting the pageability of the address space to
|
|
.Fa pageable .
|
|
.Pp
|
|
.Fn uvmspace_exec
|
|
either reuses the address space of lwp
|
|
.Fa l
|
|
if there are no other references to it, or creates
|
|
a new one with
|
|
.Fn uvmspace_alloc .
|
|
The range of valid addresses in the address space is reset to
|
|
.Fa start
|
|
through
|
|
.Fa end .
|
|
.Pp
|
|
.Fn uvmspace_fork
|
|
creates and returns a new address space based upon the
|
|
.Fa vm1
|
|
address space, typically used when allocating an address space for a
|
|
child process.
|
|
.Pp
|
|
.Fn uvmspace_free
|
|
lowers the reference count on the address space
|
|
.Fa vm ,
|
|
freeing the data structures if there are no other references.
|
|
.Pp
|
|
.Fn uvmspace_share
|
|
causes process
|
|
.Pa p2
|
|
to share the address space of
|
|
.Fa p1 .
|
|
.Pp
|
|
.Fn uvmspace_unshare
|
|
ensures that lwp
|
|
.Fa l
|
|
has its own, unshared address space, by creating a new one if
|
|
necessary by calling
|
|
.Fn uvmspace_fork .
|
|
.Pp
|
|
.Fn uvm_uarea_alloc
|
|
allocates virtual space for a u-area (i.e., a kernel stack) and stores
|
|
its virtual address in
|
|
.Fa *uaddrp .
|
|
The return value is
|
|
.Dv true
|
|
if the u-area is already backed by wired physical memory, otherwise
|
|
.Dv false .
|
|
.Pp
|
|
.Fn uvm_uarea_free
|
|
frees a u-area allocated with
|
|
.Fn uvm_uarea_alloc ,
|
|
freeing both the virtual space and any physical pages which may have been
|
|
allocated to back that virtual space later.
|
|
.Sh PAGE FAULT HANDLING
|
|
.Bl -ohang
|
|
.It Ft int
|
|
.Fn uvm_fault "struct vm_map *orig_map" "vaddr_t vaddr" "vm_prot_t access_type" ;
|
|
.El
|
|
.Pp
|
|
.Fn uvm_fault
|
|
is the main entry point for faults.
|
|
It takes
|
|
.Fa orig_map
|
|
as the map the fault originated in, a
|
|
.Fa vaddr
|
|
offset into the map the fault occurred, and
|
|
.Fa access_type
|
|
describing the type of access requested.
|
|
.Fn uvm_fault
|
|
returns a standard UVM return value.
|
|
.Sh MEMORY MAPPING FILES AND DEVICES
|
|
.Bl -ohang
|
|
.It Ft void
|
|
.Fn uvm_vnp_setsize "struct vnode *vp" "voff_t newsize" ;
|
|
.It Ft void *
|
|
.Fn ubc_alloc "struct uvm_object *uobj" "voff_t offset" "vsize_t *lenp" \
|
|
"int advice" "int flags" ;
|
|
.It Ft void
|
|
.Fn ubc_release "void *va" "int flags" ;
|
|
.It Ft int
|
|
.Fn ubc_uiomove "struct uvm_object *uobj" "struct uio *uio" "vsize_t todo" \
|
|
"int advice" "int flags" ;
|
|
.El
|
|
.Pp
|
|
.Fn uvm_vnp_setsize
|
|
sets the size of vnode
|
|
.Fa vp
|
|
to
|
|
.Fa newsize .
|
|
Caller must hold a reference to the vnode.
|
|
If the vnode shrinks, pages no longer used are discarded.
|
|
.Pp
|
|
.Fn ubc_alloc
|
|
creates a kernel mapping of
|
|
.Fa uobj
|
|
starting at offset
|
|
.Fa offset .
|
|
The desired length of the mapping is pointed to by
|
|
.Fa lenp ,
|
|
but the actual mapping may be smaller than this.
|
|
.Fa lenp
|
|
is updated to contain the actual length mapped.
|
|
.Fa advice
|
|
is the access pattern hint, which must be one of
|
|
.Pp
|
|
.Bl -tag -offset indent -width "UVM_ADV_SEQUENTIAL" -compact
|
|
.It UVM_ADV_NORMAL
|
|
No hint
|
|
.It UVM_ADV_RANDOM
|
|
Random access hint
|
|
.It UVM_ADV_SEQUENTIAL
|
|
Sequential access hint (from lower offset to higher offset)
|
|
.El
|
|
.Pp
|
|
The possible
|
|
.Fa flags
|
|
are
|
|
.Pp
|
|
.Bl -tag -offset indent -width "UVM_ADV_SEQUENTIAL" -compact
|
|
.It UBC_READ
|
|
Mapping will be accessed for read.
|
|
.It UBC_WRITE
|
|
Mapping will be accessed for write.
|
|
.It UBC_FAULTBUSY
|
|
Fault in window's pages already during mapping operation.
|
|
Makes sense only for write.
|
|
.El
|
|
.Pp
|
|
Once the mapping is created, it must be accessed only by methods that can
|
|
handle faults, such as
|
|
.Fn uiomove
|
|
or
|
|
.Fn kcopy .
|
|
Page faults on the mapping will result in the object's pager
|
|
method being called to resolve the fault.
|
|
.Pp
|
|
.Fn ubc_release
|
|
frees the mapping at
|
|
.Fa va
|
|
for reuse.
|
|
The mapping may be cached to speed future accesses to the same region
|
|
of the object.
|
|
The flags can be any of
|
|
.Pp
|
|
.Bl -tag -offset indent -width "UVM_ADV_SEQUENTIAL" -compact
|
|
.It UBC_UNMAP
|
|
Do not cache mapping.
|
|
.El
|
|
.Pp
|
|
.Fn ubc_uiomove
|
|
allocates an UBC memory window, performs I/O on it and unmaps the window.
|
|
The
|
|
.Fa advice
|
|
parameter takes the same values as the respective parameter in
|
|
.Fn ubc_alloc
|
|
and the
|
|
.Fa flags
|
|
parameter takes the same arguments as
|
|
.Fn ubc_alloc
|
|
and
|
|
.Fn ubc_unmap .
|
|
Additionally, the flag
|
|
.Dv UBC_PARTIALOK
|
|
can be provided to indicate that it is acceptable to return if an error
|
|
occurs mid-transfer.
|
|
.Sh VIRTUAL MEMORY I/O
|
|
.Bl -ohang
|
|
.It Ft int
|
|
.Fn uvm_io "struct vm_map *map" "struct uio *uio" ;
|
|
.El
|
|
.Pp
|
|
.Fn uvm_io
|
|
performs the I/O described in
|
|
.Fa uio
|
|
on the memory described in
|
|
.Fa map .
|
|
.Sh ALLOCATION OF KERNEL MEMORY
|
|
.Bl -ohang
|
|
.It Ft vaddr_t
|
|
.Fn uvm_km_alloc "struct vm_map *map" "vsize_t size" "vsize_t align" "uvm_flag_t flags" ;
|
|
.It Ft void
|
|
.Fn uvm_km_free "struct vm_map *map" "vaddr_t addr" "vsize_t size" "uvm_flag_t flags" ;
|
|
.It Ft struct vm_map *
|
|
.Fn uvm_km_suballoc "struct vm_map *map" "vaddr_t *min" "vaddr_t *max" \
|
|
"vsize_t size" "int flags" "bool fixed" "struct vm_map *submap" ;
|
|
.El
|
|
.Pp
|
|
.Fn uvm_km_alloc
|
|
allocates
|
|
.Fa size
|
|
bytes of kernel memory in map
|
|
.Fa map .
|
|
The first address of the allocated memory range will be aligned according to the
|
|
.Fa align
|
|
argument
|
|
.Pq specify 0 if no alignment is necessary .
|
|
The alignment must be a multiple of page size.
|
|
The
|
|
.Fa flags
|
|
is a bitwise inclusive OR of the allocation type and operation flags.
|
|
.Pp
|
|
The allocation type should be one of:
|
|
.Bl -tag -width UVM_KMF_PAGEABLE
|
|
.It UVM_KMF_WIRED
|
|
Wired memory.
|
|
.It UVM_KMF_PAGEABLE
|
|
Demand-paged zero-filled memory.
|
|
.It UVM_KMF_VAONLY
|
|
Virtual address only.
|
|
No physical pages are mapped in the allocated region.
|
|
If necessary, it's the caller's responsibility to enter page mappings.
|
|
It's also the caller's responsibility to clean up the mappings before freeing
|
|
the address range.
|
|
.El
|
|
.Pp
|
|
The following operation flags are available:
|
|
.Bl -tag -width UVM_KMF_PAGEABLE
|
|
.It UVM_KMF_CANFAIL
|
|
Can fail even if
|
|
.Dv UVM_KMF_NOWAIT
|
|
is not specified and
|
|
.Dv UVM_KMF_WAITVA
|
|
is specified.
|
|
.It UVM_KMF_ZERO
|
|
Request zero-filled memory.
|
|
Only supported for
|
|
.Dv UVM_KMF_WIRED .
|
|
Shouldn't be used with other types.
|
|
.It UVM_KMF_TRYLOCK
|
|
Fail if we can't lock the map.
|
|
.It UVM_KMF_NOWAIT
|
|
Fail immediately if no memory is available.
|
|
.It UVM_KMF_WAITVA
|
|
Sleep to wait for the virtual address resources if needed.
|
|
.El
|
|
.Pp
|
|
(If neither
|
|
.Dv UVM_KMF_NOWAIT
|
|
nor
|
|
.Dv UVM_KMF_CANFAIL
|
|
are specified and
|
|
.Dv UVM_KMF_WAITVA
|
|
is specified,
|
|
.Fn uvm_km_alloc
|
|
will never fail, but rather sleep indefinitely until the allocation succeeds.)
|
|
.Pp
|
|
Pageability of the pages allocated with
|
|
.Dv UVM_KMF_PAGEABLE
|
|
can be changed by
|
|
.Fn uvm_map_pageable .
|
|
In that case, the entire range must be changed atomically.
|
|
Changing a part of the range is not supported.
|
|
.Pp
|
|
.Fn uvm_km_free
|
|
frees the memory range allocated by
|
|
.Fn uvm_km_alloc .
|
|
.Fa addr
|
|
must be an address returned by
|
|
.Fn uvm_km_alloc .
|
|
.Fa map
|
|
and
|
|
.Fa size
|
|
must be the same as the ones used for the corresponding
|
|
.Fn uvm_km_alloc .
|
|
.Fa flags
|
|
must be the allocation type used for the corresponding
|
|
.Fn uvm_km_alloc .
|
|
.Pp
|
|
.Fn uvm_km_free
|
|
is the only way to free memory ranges allocated by
|
|
.Fn uvm_km_alloc .
|
|
.Fn uvm_unmap
|
|
must not be used.
|
|
.Pp
|
|
.Fn uvm_km_suballoc
|
|
allocates submap from
|
|
.Fa map ,
|
|
creating a new map if
|
|
.Fa submap
|
|
is
|
|
.Dv NULL .
|
|
The addresses of the submap can be specified exactly by setting the
|
|
.Fa fixed
|
|
argument to true, which causes the
|
|
.Fa min
|
|
argument to specify the beginning of the address in the submap.
|
|
If
|
|
.Fa fixed
|
|
is false, any address of size
|
|
.Fa size
|
|
will be allocated from
|
|
.Fa map
|
|
and the start and end addresses returned in
|
|
.Fa min
|
|
and
|
|
.Fa max .
|
|
The
|
|
.Fa flags
|
|
are used to initialize the created submap.
|
|
The following flags could be set:
|
|
.Bl -tag -width VM_MAP_PAGEABLE
|
|
.It VM_MAP_PAGEABLE
|
|
Entries in the map may be paged out.
|
|
.It VM_MAP_INTRSAFE
|
|
Map should be interrupt-safe.
|
|
.It VM_MAP_TOPDOWN
|
|
A top-down mapping should be arranged.
|
|
.El
|
|
.Sh ALLOCATION OF PHYSICAL MEMORY
|
|
.Bl -ohang
|
|
.It Ft struct vm_page *
|
|
.Fn uvm_pagealloc "struct uvm_object *uobj" "voff_t off" "struct vm_anon *anon" "int flags" ;
|
|
.It Ft void
|
|
.Fn uvm_pagerealloc "struct vm_page *pg" "struct uvm_object *newobj" "voff_t newoff" ;
|
|
.It Ft void
|
|
.Fn uvm_pagefree "struct vm_page *pg" ;
|
|
.It Ft int
|
|
.Fn uvm_pglistalloc "psize_t size" "paddr_t low" "paddr_t high" "paddr_t alignment" "paddr_t boundary" "struct pglist *rlist" "int nsegs" "int waitok" ;
|
|
.It Ft void
|
|
.Fn uvm_pglistfree "struct pglist *list" ;
|
|
.It Ft void
|
|
.Fn uvm_page_physload "vaddr_t start" "vaddr_t end" "vaddr_t avail_start" "vaddr_t avail_end" "int free_list" ;
|
|
.El
|
|
.Pp
|
|
.Fn uvm_pagealloc
|
|
allocates a page of memory at virtual address
|
|
.Fa off
|
|
in either the object
|
|
.Fa uobj
|
|
or the anonymous memory
|
|
.Fa anon ,
|
|
which must be locked by the caller.
|
|
Only one of
|
|
.Fa uobj
|
|
and
|
|
.Fa anon
|
|
can be non
|
|
.Dv NULL .
|
|
Returns
|
|
.Dv NULL
|
|
when no page can be found.
|
|
The flags can be any of
|
|
.Bd -literal
|
|
#define UVM_PGA_USERESERVE 0x0001 /* ok to use reserve pages */
|
|
#define UVM_PGA_ZERO 0x0002 /* returned page must be zero'd */
|
|
.Ed
|
|
.Pp
|
|
.Dv UVM_PGA_USERESERVE
|
|
means to allocate a page even if that will result in the number of free pages
|
|
being lower than
|
|
.Dv uvmexp.reserve_pagedaemon
|
|
(if the current thread is the pagedaemon) or
|
|
.Dv uvmexp.reserve_kernel
|
|
(if the current thread is not the pagedaemon).
|
|
.Dv UVM_PGA_ZERO
|
|
causes the returned page to be filled with zeroes, either by allocating it
|
|
from a pool of pre-zeroed pages or by zeroing it in-line as necessary.
|
|
.Pp
|
|
.Fn uvm_pagerealloc
|
|
reallocates page
|
|
.Fa pg
|
|
to a new object
|
|
.Fa newobj ,
|
|
at a new offset
|
|
.Fa newoff .
|
|
.Pp
|
|
.Fn uvm_pagefree
|
|
frees the physical page
|
|
.Fa pg .
|
|
If the content of the page is known to be zero-filled,
|
|
caller should set
|
|
.Dv PG_ZERO
|
|
in pg-\*[Gt]flags so that the page allocator will use
|
|
the page to serve future
|
|
.Dv UVM_PGA_ZERO
|
|
requests efficiently.
|
|
.Pp
|
|
.Fn uvm_pglistalloc
|
|
allocates a list of pages for size
|
|
.Fa size
|
|
byte under various constraints.
|
|
.Fa low
|
|
and
|
|
.Fa high
|
|
describe the lowest and highest addresses acceptable for the list.
|
|
If
|
|
.Fa alignment
|
|
is non-zero, it describes the required alignment of the list, in
|
|
power-of-two notation.
|
|
If
|
|
.Fa boundary
|
|
is non-zero, no segment of the list may cross this power-of-two
|
|
boundary, relative to zero.
|
|
.Fa nsegs
|
|
is the maximum number of physically contiguous segments.
|
|
If
|
|
.Fa waitok
|
|
is non-zero, the function may sleep until enough memory is available.
|
|
(It also may give up in some situations, so a non-zero
|
|
.Fa waitok
|
|
does not imply that
|
|
.Fn uvm_pglistalloc
|
|
cannot return an error.)
|
|
The allocated memory is returned in the
|
|
.Fa rlist
|
|
list; the caller has to provide storage only, the list is initialized by
|
|
.Fn uvm_pglistalloc .
|
|
.Pp
|
|
.Fn uvm_pglistfree
|
|
frees the list of pages pointed to by
|
|
.Fa list .
|
|
If the content of the page is known to be zero-filled,
|
|
caller should set
|
|
.Dv PG_ZERO
|
|
in pg-\*[Gt]flags so that the page allocator will use
|
|
the page to serve future
|
|
.Dv UVM_PGA_ZERO
|
|
requests efficiently.
|
|
.Pp
|
|
.Fn uvm_page_physload
|
|
loads physical memory segments into VM space on the specified
|
|
.Fa free_list .
|
|
It must be called at system boot time to set up physical memory
|
|
management pages.
|
|
The arguments describe the
|
|
.Fa start
|
|
and
|
|
.Fa end
|
|
of the physical addresses of the segment, and the available start and end
|
|
addresses of pages not already in use.
|
|
If a system has memory banks of
|
|
different speeds the slower memory should be given a higher
|
|
.Fa free_list
|
|
value.
|
|
.\" XXX expand on "system boot time"!
|
|
.Sh PROCESSES
|
|
.Bl -ohang
|
|
.It Ft void
|
|
.Fn uvm_pageout "void" ;
|
|
.It Ft void
|
|
.Fn uvm_scheduler "void" ;
|
|
.It Ft void
|
|
.Fn uvm_swapin "struct lwp *l" ;
|
|
.El
|
|
.Pp
|
|
.Fn uvm_pageout
|
|
is the main loop for the page daemon.
|
|
.Pp
|
|
.Fn uvm_scheduler
|
|
is the process zero main loop, which is to be called after the
|
|
system has finished starting other processes.
|
|
It handles the swapping in of runnable, swapped out processes in priority
|
|
order.
|
|
.Pp
|
|
.Fn uvm_swapin
|
|
swaps in the named lwp.
|
|
.Sh PAGE LOAN
|
|
.Bl -ohang
|
|
.It Ft int
|
|
.Fn uvm_loan "struct vm_map *map" "vaddr_t start" "vsize_t len" "void *v" "int flags" ;
|
|
.It Ft void
|
|
.Fn uvm_unloan "void *v" "int npages" "int flags" ;
|
|
.El
|
|
.Pp
|
|
.Fn uvm_loan
|
|
loans pages in a map out to anons or to the kernel.
|
|
.Fa map
|
|
should be unlocked,
|
|
.Fa start
|
|
and
|
|
.Fa len
|
|
should be multiples of
|
|
.Dv PAGE_SIZE .
|
|
Argument
|
|
.Fa flags
|
|
should be one of
|
|
.Bd -literal
|
|
#define UVM_LOAN_TOANON 0x01 /* loan to anons */
|
|
#define UVM_LOAN_TOPAGE 0x02 /* loan to kernel */
|
|
.Ed
|
|
.Pp
|
|
.Fa v
|
|
should be pointer to array of pointers to
|
|
.Li struct anon
|
|
or
|
|
.Li struct vm_page ,
|
|
as appropriate.
|
|
The caller has to allocate memory for the array and
|
|
ensure it's big enough to hold
|
|
.Fa len / PAGE_SIZE
|
|
pointers.
|
|
Returns 0 for success, or appropriate error number otherwise.
|
|
Note that wired pages can't be loaned out and
|
|
.Fn uvm_loan
|
|
will fail in that case.
|
|
.Pp
|
|
.Fn uvm_unloan
|
|
kills loans on pages or anons.
|
|
The
|
|
.Fa v
|
|
must point to the array of pointers initialized by previous call to
|
|
.Fn uvm_loan .
|
|
.Fa npages
|
|
should match number of pages allocated for loan, this also matches
|
|
number of items in the array.
|
|
Argument
|
|
.Fa flags
|
|
should be one of
|
|
.Bd -literal
|
|
#define UVM_LOAN_TOANON 0x01 /* loan to anons */
|
|
#define UVM_LOAN_TOPAGE 0x02 /* loan to kernel */
|
|
.Ed
|
|
.Pp
|
|
and should match what was used for previous call to
|
|
.Fn uvm_loan .
|
|
.Sh MISCELLANEOUS FUNCTIONS
|
|
.Bl -ohang
|
|
.It Ft struct uvm_object *
|
|
.Fn uao_create "vsize_t size" "int flags" ;
|
|
.It Ft void
|
|
.Fn uao_detach "struct uvm_object *uobj" ;
|
|
.It Ft void
|
|
.Fn uao_reference "struct uvm_object *uobj" ;
|
|
.It Ft bool
|
|
.Fn uvm_chgkprot "void *addr" "size_t len" "int rw" ;
|
|
.It Ft void
|
|
.Fn uvm_kernacc "void *addr" "size_t len" "int rw" ;
|
|
.It Ft int
|
|
.Fn uvm_vslock "struct vmspace *vs" "void *addr" "size_t len" "vm_prot_t prot" ;
|
|
.It Ft void
|
|
.Fn uvm_vsunlock "struct vmspace *vs" "void *addr" "size_t len" ;
|
|
.It Ft void
|
|
.Fn uvm_meter "void" ;
|
|
.It Ft void
|
|
.Fn uvm_fork "struct lwp *l1" "struct lwp *l2" "bool shared" ;
|
|
.It Ft int
|
|
.Fn uvm_grow "struct proc *p" "vaddr_t sp" ;
|
|
.It Ft void
|
|
.Fn uvn_findpages "struct uvm_object *uobj" "voff_t offset" "int *npagesp" "struct vm_page **pps" "int flags" ;
|
|
.It Ft void
|
|
.Fn uvm_swap_stats "int cmd" "struct swapent *sep" "int sec" "register_t *retval" ;
|
|
.El
|
|
.Pp
|
|
The
|
|
.Fn uao_create ,
|
|
.Fn uao_detach ,
|
|
and
|
|
.Fn uao_reference
|
|
functions operate on anonymous memory objects, such as those used to support
|
|
System V shared memory.
|
|
.Fn uao_create
|
|
returns an object of size
|
|
.Fa size
|
|
with flags:
|
|
.Bd -literal
|
|
#define UAO_FLAG_KERNOBJ 0x1 /* create kernel object */
|
|
#define UAO_FLAG_KERNSWAP 0x2 /* enable kernel swap */
|
|
.Ed
|
|
.Pp
|
|
which can only be used once each at system boot time.
|
|
.Fn uao_reference
|
|
creates an additional reference to the named anonymous memory object.
|
|
.Fn uao_detach
|
|
removes a reference from the named anonymous memory object, destroying
|
|
it if removing the last reference.
|
|
.Pp
|
|
.Fn uvm_chgkprot
|
|
changes the protection of kernel memory from
|
|
.Fa addr
|
|
to
|
|
.Fa addr + len
|
|
to the value of
|
|
.Fa rw .
|
|
This is primarily useful for debuggers, for setting breakpoints.
|
|
This function is only available with options
|
|
.Dv KGDB .
|
|
.Pp
|
|
.Fn uvm_kernacc
|
|
checks the access at address
|
|
.Fa addr
|
|
to
|
|
.Fa addr + len
|
|
for
|
|
.Fa rw
|
|
access in the kernel address space.
|
|
.Pp
|
|
.Fn uvm_vslock
|
|
and
|
|
.Fn uvm_vsunlock
|
|
control the wiring and unwiring of pages for process
|
|
.Fa p
|
|
from
|
|
.Fa addr
|
|
to
|
|
.Fa addr + len .
|
|
These functions are normally used to wire memory for I/O.
|
|
.Pp
|
|
.Fn uvm_meter
|
|
calculates the load average and wakes up the swapper if necessary.
|
|
.Pp
|
|
.Fn uvm_fork
|
|
forks a virtual address space for process' (old)
|
|
.Fa p1
|
|
and (new)
|
|
.Fa p2 .
|
|
If the
|
|
.Fa shared
|
|
argument is non zero, p1 shares its address space with p2,
|
|
otherwise a new address space is created.
|
|
This function currently has no return value, and thus cannot fail.
|
|
In the future, this function will be changed to allow it to
|
|
fail in low memory conditions.
|
|
.Pp
|
|
.Fn uvm_grow
|
|
increases the stack segment of process
|
|
.Fa p
|
|
to include
|
|
.Fa sp .
|
|
.Pp
|
|
.Fn uvn_findpages
|
|
looks up or creates pages in
|
|
.Fa uobj
|
|
at offset
|
|
.Fa offset ,
|
|
marks them busy and returns them in the
|
|
.Fa pps
|
|
array.
|
|
Currently
|
|
.Fa uobj
|
|
must be a vnode object.
|
|
The number of pages requested is pointed to by
|
|
.Fa npagesp ,
|
|
and this value is updated with the actual number of pages returned.
|
|
The flags can be
|
|
.Bd -literal
|
|
#define UFP_ALL 0x00 /* return all pages requested */
|
|
#define UFP_NOWAIT 0x01 /* don't sleep */
|
|
#define UFP_NOALLOC 0x02 /* don't allocate new pages */
|
|
#define UFP_NOCACHE 0x04 /* don't return pages which already exist */
|
|
#define UFP_NORDONLY 0x08 /* don't return PG_READONLY pages */
|
|
.Ed
|
|
.Pp
|
|
.Dv UFP_ALL
|
|
is a pseudo-flag meaning all requested pages should be returned.
|
|
.Dv UFP_NOWAIT
|
|
means that we must not sleep.
|
|
.Dv UFP_NOALLOC
|
|
causes any pages which do not already exist to be skipped.
|
|
.Dv UFP_NOCACHE
|
|
causes any pages which do already exist to be skipped.
|
|
.Dv UFP_NORDONLY
|
|
causes any pages which are marked PG_READONLY to be skipped.
|
|
.Pp
|
|
.Fn uvm_swap_stats
|
|
implements the
|
|
.Dv SWAP_STATS
|
|
and
|
|
.Dv SWAP_OSTATS
|
|
operation of the
|
|
.Xr swapctl 2
|
|
system call.
|
|
.Fa cmd
|
|
is the requested command,
|
|
.Dv SWAP_STATS
|
|
or
|
|
.Dv SWAP_OSTATS .
|
|
The function will copy no more than
|
|
.Fa sec
|
|
entries in the array pointed by
|
|
.Fa sep .
|
|
On return,
|
|
.Fa retval
|
|
holds the actual number of entries copied in the array.
|
|
.Sh SYSCTL
|
|
UVM provides support for the
|
|
.Dv CTL_VM
|
|
domain of the
|
|
.Xr sysctl 3
|
|
hierarchy.
|
|
It handles the
|
|
.Dv VM_LOADAVG ,
|
|
.Dv VM_METER ,
|
|
.Dv VM_UVMEXP ,
|
|
and
|
|
.Dv VM_UVMEXP2
|
|
nodes, which return the current load averages, calculates current VM
|
|
totals, returns the uvmexp structure, and a kernel version independent
|
|
view of the uvmexp structure, respectively.
|
|
It also exports a number of tunables that control how much VM space is
|
|
allowed to be consumed by various tasks.
|
|
The load averages are typically accessed from userland using the
|
|
.Xr getloadavg 3
|
|
function.
|
|
The uvmexp structure has all global state of the UVM system,
|
|
and has the following members:
|
|
.Bd -literal
|
|
/* vm_page constants */
|
|
int pagesize; /* size of a page (PAGE_SIZE): must be power of 2 */
|
|
int pagemask; /* page mask */
|
|
int pageshift; /* page shift */
|
|
|
|
/* vm_page counters */
|
|
int npages; /* number of pages we manage */
|
|
int free; /* number of free pages */
|
|
int active; /* number of active pages */
|
|
int inactive; /* number of pages that we free'd but may want back */
|
|
int paging; /* number of pages in the process of being paged out */
|
|
int wired; /* number of wired pages */
|
|
int reserve_pagedaemon; /* number of pages reserved for pagedaemon */
|
|
int reserve_kernel; /* number of pages reserved for kernel */
|
|
|
|
/* pageout params */
|
|
int freemin; /* min number of free pages */
|
|
int freetarg; /* target number of free pages */
|
|
int inactarg; /* target number of inactive pages */
|
|
int wiredmax; /* max number of wired pages */
|
|
|
|
/* swap */
|
|
int nswapdev; /* number of configured swap devices in system */
|
|
int swpages; /* number of PAGE_SIZE'ed swap pages */
|
|
int swpginuse; /* number of swap pages in use */
|
|
int nswget; /* number of times fault calls uvm_swap_get() */
|
|
int nanon; /* number total of anon's in system */
|
|
int nfreeanon; /* number of free anon's */
|
|
|
|
/* stat counters */
|
|
int faults; /* page fault count */
|
|
int traps; /* trap count */
|
|
int intrs; /* interrupt count */
|
|
int swtch; /* context switch count */
|
|
int softs; /* software interrupt count */
|
|
int syscalls; /* system calls */
|
|
int pageins; /* pagein operation count */
|
|
/* pageouts are in pdpageouts below */
|
|
int swapins; /* swapins */
|
|
int swapouts; /* swapouts */
|
|
int pgswapin; /* pages swapped in */
|
|
int pgswapout; /* pages swapped out */
|
|
int forks; /* forks */
|
|
int forks_ppwait; /* forks where parent waits */
|
|
int forks_sharevm; /* forks where vmspace is shared */
|
|
|
|
/* fault subcounters */
|
|
int fltnoram; /* number of times fault was out of ram */
|
|
int fltnoanon; /* number of times fault was out of anons */
|
|
int fltpgwait; /* number of times fault had to wait on a page */
|
|
int fltpgrele; /* number of times fault found a released page */
|
|
int fltrelck; /* number of times fault relock called */
|
|
int fltrelckok; /* number of times fault relock is a success */
|
|
int fltanget; /* number of times fault gets anon page */
|
|
int fltanretry; /* number of times fault retrys an anon get */
|
|
int fltamcopy; /* number of times fault clears "needs copy" */
|
|
int fltnamap; /* number of times fault maps a neighbor anon page */
|
|
int fltnomap; /* number of times fault maps a neighbor obj page */
|
|
int fltlget; /* number of times fault does a locked pgo_get */
|
|
int fltget; /* number of times fault does an unlocked get */
|
|
int flt_anon; /* number of times fault anon (case 1a) */
|
|
int flt_acow; /* number of times fault anon cow (case 1b) */
|
|
int flt_obj; /* number of times fault is on object page (2a) */
|
|
int flt_prcopy; /* number of times fault promotes with copy (2b) */
|
|
int flt_przero; /* number of times fault promotes with zerofill (2b) */
|
|
|
|
/* daemon counters */
|
|
int pdwoke; /* number of times daemon woke up */
|
|
int pdrevs; /* number of times daemon rev'd clock hand */
|
|
int pdswout; /* number of times daemon called for swapout */
|
|
int pdfreed; /* number of pages daemon freed since boot */
|
|
int pdscans; /* number of pages daemon scanned since boot */
|
|
int pdanscan; /* number of anonymous pages scanned by daemon */
|
|
int pdobscan; /* number of object pages scanned by daemon */
|
|
int pdreact; /* number of pages daemon reactivated since boot */
|
|
int pdbusy; /* number of times daemon found a busy page */
|
|
int pdpageouts; /* number of times daemon started a pageout */
|
|
int pdpending; /* number of times daemon got a pending pageout */
|
|
int pddeact; /* number of pages daemon deactivates */
|
|
.Ed
|
|
.Sh NOTES
|
|
.Fn uvm_chgkprot
|
|
is only available if the kernel has been compiled with options
|
|
.Dv KGDB .
|
|
.Pp
|
|
All structure and types whose names begin with
|
|
.Dq vm_
|
|
will be renamed to
|
|
.Dq uvm_ .
|
|
.Sh SEE ALSO
|
|
.Xr swapctl 2 ,
|
|
.Xr getloadavg 3 ,
|
|
.Xr kvm 3 ,
|
|
.Xr sysctl 3 ,
|
|
.Xr ddb 4 ,
|
|
.Xr options 4 ,
|
|
.Xr memoryallocators 9 ,
|
|
.Xr pmap 9
|
|
.Sh HISTORY
|
|
UVM is a new VM system developed at Washington University in St. Louis
|
|
(Missouri).
|
|
UVM's roots lie partly in the Mach-based
|
|
.Bx 4.4
|
|
VM system, the
|
|
.Fx
|
|
VM system, and the SunOS 4 VM system.
|
|
UVM's basic structure is based on the
|
|
.Bx 4.4
|
|
VM system.
|
|
UVM's new anonymous memory system is based on the
|
|
anonymous memory system found in the SunOS 4 VM (as described in papers
|
|
published by Sun Microsystems, Inc.).
|
|
UVM also includes a number of features new to
|
|
.Bx
|
|
including page loanout, map entry passing, simplified
|
|
copy-on-write, and clustered anonymous memory pageout.
|
|
UVM is also further documented in an August 1998 dissertation by
|
|
Charles D. Cranor.
|
|
.Pp
|
|
UVM appeared in
|
|
.Nx 1.4 .
|
|
.Sh AUTHORS
|
|
Charles D. Cranor
|
|
.Aq chuck@ccrc.wustl.edu
|
|
designed and implemented UVM.
|
|
.Pp
|
|
Matthew Green
|
|
.Aq mrg@eterna.com.au
|
|
wrote the swap-space management code and handled the logistical issues
|
|
involved with merging UVM into the
|
|
.Nx
|
|
source tree.
|
|
.Pp
|
|
Chuck Silvers
|
|
.Aq chuq@chuq.com
|
|
implemented the aobj pager, thus allowing UVM to support System V shared
|
|
memory and process swapping.
|
|
He also designed and implemented the UBC part of UVM, which uses UVM pages
|
|
to cache vnode data rather than the traditional buffer cache buffers.
|