NetBSD/sys/uvm/uvm_bio.c

776 lines
20 KiB
C
Raw Normal View History

/* $NetBSD: uvm_bio.c,v 1.71 2010/11/30 10:55:25 hannken Exp $ */
2001-05-25 08:06:11 +04:00
/*
* Copyright (c) 1998 Chuck Silvers.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
/*
* uvm_bio.c: buffered i/o object mapping cache
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_bio.c,v 1.71 2010/11/30 10:55:25 hannken Exp $");
#include "opt_uvmhist.h"
2006-09-30 19:38:06 +04:00
#include "opt_ubc.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kmem.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <uvm/uvm.h>
/*
* global data structures
*/
/*
* local functions
*/
2005-06-27 06:19:48 +04:00
static int ubc_fault(struct uvm_faultinfo *, vaddr_t, struct vm_page **,
int, int, vm_prot_t, int);
2005-06-27 06:19:48 +04:00
static struct ubc_map *ubc_find_mapping(struct uvm_object *, voff_t);
/*
* local data structues
*/
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
#define UBC_HASH(uobj, offset) \
(((((u_long)(uobj)) >> 8) + (((u_long)(offset)) >> PAGE_SHIFT)) & \
ubc_object.hashmask)
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
#define UBC_QUEUE(offset) \
(&ubc_object.inactive[(((u_long)(offset)) >> ubc_winshift) & \
(UBC_NQUEUES - 1)])
#define UBC_UMAP_ADDR(u) \
(vaddr_t)(ubc_object.kva + (((u) - ubc_object.umap) << ubc_winshift))
#define UMAP_PAGES_LOCKED 0x0001
#define UMAP_MAPPING_CACHED 0x0002
struct ubc_map
{
struct uvm_object * uobj; /* mapped object */
voff_t offset; /* offset into uobj */
voff_t writeoff; /* write offset */
vsize_t writelen; /* write len */
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
int refcount; /* refcount on mapping */
int flags; /* extra state */
2005-11-30 01:52:02 +03:00
int advice;
LIST_ENTRY(ubc_map) hash; /* hash table */
TAILQ_ENTRY(ubc_map) inactive; /* inactive queue */
};
static struct ubc_object
{
struct uvm_object uobj; /* glue for uvm_map() */
char *kva; /* where ubc_object is mapped */
struct ubc_map *umap; /* array of ubc_map's */
LIST_HEAD(, ubc_map) *hash; /* hashtable for cached ubc_map's */
u_long hashmask; /* mask for hashtable */
TAILQ_HEAD(ubc_inactive_head, ubc_map) *inactive;
/* inactive queues for ubc_map's */
} ubc_object;
2007-12-01 13:40:27 +03:00
const struct uvm_pagerops ubc_pager = {
2006-09-04 01:33:33 +04:00
.pgo_fault = ubc_fault,
/* ... rest are NULL */
};
int ubc_nwins = UBC_NWINS;
int ubc_winshift = UBC_WINSHIFT;
int ubc_winsize;
#if defined(PMAP_PREFER)
int ubc_nqueues;
#define UBC_NQUEUES ubc_nqueues
#else
#define UBC_NQUEUES 1
#endif
2006-09-30 19:38:06 +04:00
#if defined(UBC_STATS)
#define UBC_EVCNT_DEFINE(name) \
struct evcnt ubc_evcnt_##name = \
EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "ubc", #name); \
EVCNT_ATTACH_STATIC(ubc_evcnt_##name);
#define UBC_EVCNT_INCR(name) ubc_evcnt_##name.ev_count++
#else /* defined(UBC_STATS) */
#define UBC_EVCNT_DEFINE(name) /* nothing */
#define UBC_EVCNT_INCR(name) /* nothing */
#endif /* defined(UBC_STATS) */
UBC_EVCNT_DEFINE(wincachehit)
UBC_EVCNT_DEFINE(wincachemiss)
2007-05-07 16:39:45 +04:00
UBC_EVCNT_DEFINE(faultbusy)
2006-09-30 19:38:06 +04:00
/*
* ubc_init
*
* init pager private data structures.
*/
void
ubc_init(void)
{
struct ubc_map *umap;
vaddr_t va;
int i;
2001-06-13 10:06:19 +04:00
/*
* Make sure ubc_winshift is sane.
*/
if (ubc_winshift < PAGE_SHIFT)
ubc_winshift = PAGE_SHIFT;
/*
* init ubc_object.
* alloc and init ubc_map's.
* init inactive queues.
* alloc and init hashtable.
* map in ubc_object.
*/
UVM_OBJ_INIT(&ubc_object.uobj, &ubc_pager, UVM_OBJ_KERN);
ubc_object.umap = kmem_zalloc(ubc_nwins * sizeof(struct ubc_map),
KM_SLEEP);
if (ubc_object.umap == NULL)
panic("ubc_init: failed to allocate ubc_map");
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
if (ubc_winshift < PAGE_SHIFT) {
ubc_winshift = PAGE_SHIFT;
}
va = (vaddr_t)1L;
#ifdef PMAP_PREFER
PMAP_PREFER(0, &va, 0, 0); /* kernel is never topdown */
ubc_nqueues = va >> ubc_winshift;
if (ubc_nqueues == 0) {
ubc_nqueues = 1;
}
#endif
ubc_winsize = 1 << ubc_winshift;
ubc_object.inactive = kmem_alloc(UBC_NQUEUES *
sizeof(struct ubc_inactive_head), KM_SLEEP);
if (ubc_object.inactive == NULL)
panic("ubc_init: failed to allocate inactive queue heads");
for (i = 0; i < UBC_NQUEUES; i++) {
TAILQ_INIT(&ubc_object.inactive[i]);
}
for (i = 0; i < ubc_nwins; i++) {
umap = &ubc_object.umap[i];
TAILQ_INSERT_TAIL(&ubc_object.inactive[i & (UBC_NQUEUES - 1)],
umap, inactive);
}
ubc_object.hash = hashinit(ubc_nwins, HASH_LIST, true,
&ubc_object.hashmask);
for (i = 0; i <= ubc_object.hashmask; i++) {
LIST_INIT(&ubc_object.hash[i]);
}
if (uvm_map(kernel_map, (vaddr_t *)&ubc_object.kva,
ubc_nwins << ubc_winshift, &ubc_object.uobj, 0, (vsize_t)va,
UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE,
UVM_ADV_RANDOM, UVM_FLAG_NOMERGE)) != 0) {
panic("ubc_init: failed to map ubc_object");
}
UVMHIST_INIT(ubchist, 300);
}
/*
* ubc_fault_page: helper of ubc_fault to handle a single page.
*
* => Caller has UVM object locked.
*/
static inline int
ubc_fault_page(const struct uvm_faultinfo *ufi, const struct ubc_map *umap,
struct vm_page *pg, vm_prot_t prot, vm_prot_t access_type, vaddr_t va)
{
struct uvm_object *uobj;
vm_prot_t mask;
int error;
bool rdonly;
uobj = pg->uobject;
KASSERT(mutex_owned(&uobj->vmobjlock));
if (pg->flags & PG_WANTED) {
wakeup(pg);
}
KASSERT((pg->flags & PG_FAKE) == 0);
if (pg->flags & PG_RELEASED) {
mutex_enter(&uvm_pageqlock);
uvm_pagefree(pg);
mutex_exit(&uvm_pageqlock);
return 0;
}
if (pg->loan_count != 0) {
/*
* Avoid unneeded loan break, if possible.
*/
if ((access_type & VM_PROT_WRITE) == 0) {
prot &= ~VM_PROT_WRITE;
}
if (prot & VM_PROT_WRITE) {
struct vm_page *newpg;
newpg = uvm_loanbreak(pg);
if (newpg == NULL) {
uvm_page_unbusy(&pg, 1);
return ENOMEM;
}
pg = newpg;
}
}
/*
* Note that a page whose backing store is partially allocated
* is marked as PG_RDONLY.
*/
KASSERT((pg->flags & PG_RDONLY) == 0 ||
(access_type & VM_PROT_WRITE) == 0 ||
pg->offset < umap->writeoff ||
pg->offset + PAGE_SIZE > umap->writeoff + umap->writelen);
rdonly = ((access_type & VM_PROT_WRITE) == 0 &&
(pg->flags & PG_RDONLY) != 0) ||
UVM_OBJ_NEEDS_WRITEFAULT(uobj);
mask = rdonly ? ~VM_PROT_WRITE : VM_PROT_ALL;
error = pmap_enter(ufi->orig_map->pmap, va, VM_PAGE_TO_PHYS(pg),
prot & mask, PMAP_CANFAIL | (access_type & mask));
mutex_enter(&uvm_pageqlock);
uvm_pageactivate(pg);
mutex_exit(&uvm_pageqlock);
pg->flags &= ~(PG_BUSY|PG_WANTED);
UVM_PAGE_OWN(pg, NULL);
return error;
}
/*
* ubc_fault: fault routine for ubc mapping
*/
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
2005-06-27 06:19:48 +04:00
static int
ubc_fault(struct uvm_faultinfo *ufi, vaddr_t ign1, struct vm_page **ign2,
int ign3, int ign4, vm_prot_t access_type, int flags)
{
struct uvm_object *uobj;
struct ubc_map *umap;
vaddr_t va, eva, ubc_offset, slot_offset;
struct vm_page *pgs[ubc_winsize >> PAGE_SHIFT];
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
int i, error, npages;
vm_prot_t prot;
UVMHIST_FUNC("ubc_fault"); UVMHIST_CALLED(ubchist);
/*
* no need to try with PGO_LOCKED...
* we don't need to have the map locked since we know that
* no one will mess with it until our reference is released.
*/
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
if (flags & PGO_LOCKED) {
uvmfault_unlockall(ufi, NULL, &ubc_object.uobj, NULL);
flags &= ~PGO_LOCKED;
}
va = ufi->orig_rvaddr;
ubc_offset = va - (vaddr_t)ubc_object.kva;
umap = &ubc_object.umap[ubc_offset >> ubc_winshift];
KASSERT(umap->refcount != 0);
2006-10-19 13:34:00 +04:00
KASSERT((umap->flags & UMAP_PAGES_LOCKED) == 0);
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
slot_offset = ubc_offset & (ubc_winsize - 1);
/*
* some platforms cannot write to individual bytes atomically, so
* software has to do read/modify/write of larger quantities instead.
* this means that the access_type for "write" operations
* can be VM_PROT_READ, which confuses us mightily.
2005-02-27 01:31:44 +03:00
*
* deal with this by resetting access_type based on the info
* that ubc_alloc() stores for us.
*/
access_type = umap->writelen ? VM_PROT_WRITE : VM_PROT_READ;
UVMHIST_LOG(ubchist, "va 0x%lx ubc_offset 0x%lx access_type %d",
va, ubc_offset, access_type, 0);
#ifdef DIAGNOSTIC
if ((access_type & VM_PROT_WRITE) != 0) {
if (slot_offset < trunc_page(umap->writeoff) ||
umap->writeoff + umap->writelen <= slot_offset) {
panic("ubc_fault: out of range write");
}
}
#endif
/* no umap locking needed since we have a ref on the umap */
uobj = umap->uobj;
if ((access_type & VM_PROT_WRITE) == 0) {
npages = (ubc_winsize - slot_offset) >> PAGE_SHIFT;
} else {
npages = (round_page(umap->offset + umap->writeoff +
umap->writelen) - (umap->offset + slot_offset))
>> PAGE_SHIFT;
flags |= PGO_PASTEOF;
}
again:
memset(pgs, 0, sizeof (pgs));
2008-01-02 14:48:20 +03:00
mutex_enter(&uobj->vmobjlock);
UVMHIST_LOG(ubchist, "slot_offset 0x%x writeoff 0x%x writelen 0x%x ",
slot_offset, umap->writeoff, umap->writelen, 0);
UVMHIST_LOG(ubchist, "getpages uobj %p offset 0x%x npages %d",
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
uobj, umap->offset + slot_offset, npages, 0);
error = (*uobj->pgops->pgo_get)(uobj, umap->offset + slot_offset, pgs,
2005-11-30 01:52:02 +03:00
&npages, 0, access_type, umap->advice, flags | PGO_NOBLOCKALLOC |
PGO_NOTIMESTAMP);
UVMHIST_LOG(ubchist, "getpages error %d npages %d", error, npages, 0,
0);
if (error == EAGAIN) {
kpause("ubc_fault", false, hz >> 2, NULL);
goto again;
}
if (error) {
return error;
}
/*
* For virtually-indexed, virtually-tagged caches we should avoid
* creating writable mappings when we do not absolutely need them,
* since the "compatible alias" trick does not work on such caches.
* Otherwise, we can always map the pages writable.
*/
#ifdef PMAP_CACHE_VIVT
prot = VM_PROT_READ | access_type;
#else
prot = VM_PROT_READ | VM_PROT_WRITE;
#endif
/*
* Note: in the common case, all returned pages would have the same
* UVM object. However, due to layered file-systems and e.g. tmpfs,
* returned pages may have different objects. We "remember" the
* last object in the loop to reduce locking overhead and to perform
* pmap_update() before object unlock.
*/
uobj = NULL;
va = ufi->orig_rvaddr;
eva = ufi->orig_rvaddr + (npages << PAGE_SHIFT);
UVMHIST_LOG(ubchist, "va 0x%lx eva 0x%lx", va, eva, 0, 0);
for (i = 0; va < eva; i++, va += PAGE_SIZE) {
struct vm_page *pg;
UVMHIST_LOG(ubchist, "pgs[%d] = %p", i, pgs[i], 0, 0);
pg = pgs[i];
if (pg == NULL || pg == PGO_DONTCARE) {
continue;
}
if (__predict_false(pg->uobject != uobj)) {
/* Check for the first iteration and error cases. */
if (uobj != NULL) {
/* Must make VA visible before the unlock. */
pmap_update(ufi->orig_map->pmap);
mutex_exit(&uobj->vmobjlock);
}
uobj = pg->uobject;
mutex_enter(&uobj->vmobjlock);
}
error = ubc_fault_page(ufi, umap, pg, prot, access_type, va);
if (error) {
/*
* Flush (there might be pages entered), drop the lock,
* "forget" the object and perform uvm_wait().
* Note: page will re-fault.
*/
pmap_update(ufi->orig_map->pmap);
mutex_exit(&uobj->vmobjlock);
uobj = NULL;
uvm_wait("ubc_fault");
}
}
if (__predict_true(uobj != NULL)) {
pmap_update(ufi->orig_map->pmap);
mutex_exit(&uobj->vmobjlock);
}
return 0;
}
/*
* local functions
*/
2005-06-27 06:19:48 +04:00
static struct ubc_map *
ubc_find_mapping(struct uvm_object *uobj, voff_t offset)
{
struct ubc_map *umap;
LIST_FOREACH(umap, &ubc_object.hash[UBC_HASH(uobj, offset)], hash) {
if (umap->uobj == uobj && umap->offset == offset) {
return umap;
}
}
return NULL;
}
/*
* ubc interface functions
*/
/*
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
* ubc_alloc: allocate a file mapping window
*/
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
void *
2005-11-30 01:52:02 +03:00
ubc_alloc(struct uvm_object *uobj, voff_t offset, vsize_t *lenp, int advice,
int flags)
{
vaddr_t slot_offset, va;
struct ubc_map *umap;
voff_t umap_offset;
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
int error;
UVMHIST_FUNC("ubc_alloc"); UVMHIST_CALLED(ubchist);
UVMHIST_LOG(ubchist, "uobj %p offset 0x%lx len 0x%lx",
uobj, offset, *lenp, 0);
KASSERT(*lenp > 0);
umap_offset = (offset & ~((voff_t)ubc_winsize - 1));
slot_offset = (vaddr_t)(offset & ((voff_t)ubc_winsize - 1));
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
*lenp = MIN(*lenp, ubc_winsize - slot_offset);
/*
* the object is always locked here, so we don't need to add a ref.
*/
again:
2008-01-02 14:48:20 +03:00
mutex_enter(&ubc_object.uobj.vmobjlock);
umap = ubc_find_mapping(uobj, umap_offset);
if (umap == NULL) {
2006-09-30 19:38:06 +04:00
UBC_EVCNT_INCR(wincachemiss);
umap = TAILQ_FIRST(UBC_QUEUE(offset));
if (umap == NULL) {
2008-01-02 14:48:20 +03:00
mutex_exit(&ubc_object.uobj.vmobjlock);
kpause("ubc_alloc", false, hz, NULL);
goto again;
}
/*
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
* remove from old hash (if any), add to new hash.
*/
if (umap->uobj != NULL) {
LIST_REMOVE(umap, hash);
}
umap->uobj = uobj;
umap->offset = umap_offset;
LIST_INSERT_HEAD(&ubc_object.hash[UBC_HASH(uobj, umap_offset)],
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
umap, hash);
va = UBC_UMAP_ADDR(umap);
if (umap->flags & UMAP_MAPPING_CACHED) {
umap->flags &= ~UMAP_MAPPING_CACHED;
pmap_remove(pmap_kernel(), va, va + ubc_winsize);
pmap_update(pmap_kernel());
}
} else {
2006-09-30 19:38:06 +04:00
UBC_EVCNT_INCR(wincachehit);
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
va = UBC_UMAP_ADDR(umap);
}
if (umap->refcount == 0) {
TAILQ_REMOVE(UBC_QUEUE(offset), umap, inactive);
}
#ifdef DIAGNOSTIC
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
if ((flags & UBC_WRITE) && (umap->writeoff || umap->writelen)) {
panic("ubc_alloc: concurrent writes uobj %p", uobj);
}
#endif
if (flags & UBC_WRITE) {
umap->writeoff = slot_offset;
umap->writelen = *lenp;
}
umap->refcount++;
2005-11-30 01:52:02 +03:00
umap->advice = advice;
2008-01-02 14:48:20 +03:00
mutex_exit(&ubc_object.uobj.vmobjlock);
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
UVMHIST_LOG(ubchist, "umap %p refs %d va %p flags 0x%x",
umap, umap->refcount, va, flags);
if (flags & UBC_FAULTBUSY) {
int npages = (*lenp + PAGE_SIZE - 1) >> PAGE_SHIFT;
struct vm_page *pgs[npages];
int gpflags =
PGO_SYNCIO|PGO_OVERWRITE|PGO_PASTEOF|PGO_NOBLOCKALLOC|
PGO_NOTIMESTAMP;
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
int i;
KDASSERT(flags & UBC_WRITE);
2007-05-07 16:39:45 +04:00
KASSERT(umap->refcount == 1);
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
2007-05-07 16:39:45 +04:00
UBC_EVCNT_INCR(faultbusy);
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
if (umap->flags & UMAP_MAPPING_CACHED) {
umap->flags &= ~UMAP_MAPPING_CACHED;
pmap_remove(pmap_kernel(), va, va + ubc_winsize);
}
again_faultbusy:
memset(pgs, 0, sizeof(pgs));
2008-01-02 14:48:20 +03:00
mutex_enter(&uobj->vmobjlock);
error = (*uobj->pgops->pgo_get)(uobj, trunc_page(offset), pgs,
2005-11-30 01:52:02 +03:00
&npages, 0, VM_PROT_READ | VM_PROT_WRITE, advice, gpflags);
UVMHIST_LOG(ubchist, "faultbusy getpages %d", error, 0, 0, 0);
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
if (error) {
goto out;
}
for (i = 0; i < npages; i++) {
struct vm_page *pg = pgs[i];
KASSERT(pg->uobject == uobj);
if (pg->loan_count != 0) {
2008-01-02 14:48:20 +03:00
mutex_enter(&uobj->vmobjlock);
if (pg->loan_count != 0) {
pg = uvm_loanbreak(pg);
}
2008-01-02 14:48:20 +03:00
mutex_exit(&uobj->vmobjlock);
if (pg == NULL) {
pmap_kremove(va, ubc_winsize);
pmap_update(pmap_kernel());
2008-01-02 14:48:20 +03:00
mutex_enter(&uobj->vmobjlock);
uvm_page_unbusy(pgs, npages);
2008-01-02 14:48:20 +03:00
mutex_exit(&uobj->vmobjlock);
uvm_wait("ubc_alloc");
goto again_faultbusy;
}
pgs[i] = pg;
}
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
pmap_kenter_pa(va + slot_offset + (i << PAGE_SHIFT),
VM_PAGE_TO_PHYS(pg),
VM_PROT_READ | VM_PROT_WRITE, 0);
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
}
pmap_update(pmap_kernel());
umap->flags |= UMAP_PAGES_LOCKED;
2007-05-07 16:39:45 +04:00
} else {
KASSERT((umap->flags & UMAP_PAGES_LOCKED) == 0);
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
}
out:
return (void *)(va + slot_offset);
}
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
/*
* ubc_release: free a file mapping window.
*/
void
2005-06-27 06:19:48 +04:00
ubc_release(void *va, int flags)
{
struct ubc_map *umap;
struct uvm_object *uobj;
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
vaddr_t umapva;
bool unmapped;
UVMHIST_FUNC("ubc_release"); UVMHIST_CALLED(ubchist);
UVMHIST_LOG(ubchist, "va %p", va, 0, 0, 0);
umap = &ubc_object.umap[((char *)va - ubc_object.kva) >> ubc_winshift];
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
umapva = UBC_UMAP_ADDR(umap);
uobj = umap->uobj;
KASSERT(uobj != NULL);
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
if (umap->flags & UMAP_PAGES_LOCKED) {
int slot_offset = umap->writeoff;
int endoff = umap->writeoff + umap->writelen;
int zerolen = round_page(endoff) - endoff;
int npages = (int)(round_page(umap->writeoff + umap->writelen)
- trunc_page(umap->writeoff)) >> PAGE_SHIFT;
struct vm_page *pgs[npages];
paddr_t pa;
int i;
bool rv;
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
2007-05-07 16:39:45 +04:00
KASSERT((umap->flags & UMAP_MAPPING_CACHED) == 0);
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
if (zerolen) {
memset((char *)umapva + endoff, 0, zerolen);
}
umap->flags &= ~UMAP_PAGES_LOCKED;
mutex_enter(&uobj->vmobjlock);
2008-01-02 14:48:20 +03:00
mutex_enter(&uvm_pageqlock);
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
for (i = 0; i < npages; i++) {
rv = pmap_extract(pmap_kernel(),
umapva + slot_offset + (i << PAGE_SHIFT), &pa);
KASSERT(rv);
pgs[i] = PHYS_TO_VM_PAGE(pa);
pgs[i]->flags &= ~(PG_FAKE|PG_CLEAN);
KASSERT(pgs[i]->loan_count == 0);
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
uvm_pageactivate(pgs[i]);
}
2008-01-02 14:48:20 +03:00
mutex_exit(&uvm_pageqlock);
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
pmap_kremove(umapva, ubc_winsize);
pmap_update(pmap_kernel());
uvm_page_unbusy(pgs, npages);
2008-01-02 14:48:20 +03:00
mutex_exit(&uobj->vmobjlock);
2007-02-22 09:05:00 +03:00
unmapped = true;
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
} else {
2007-02-22 09:05:00 +03:00
unmapped = false;
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
}
2008-01-02 14:48:20 +03:00
mutex_enter(&ubc_object.uobj.vmobjlock);
umap->writeoff = 0;
umap->writelen = 0;
umap->refcount--;
if (umap->refcount == 0) {
if (flags & UBC_UNMAP) {
/*
* Invalidate any cached mappings if requested.
* This is typically used to avoid leaving
* incompatible cache aliases around indefinitely.
*/
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
pmap_remove(pmap_kernel(), umapva,
umapva + ubc_winsize);
umap->flags &= ~UMAP_MAPPING_CACHED;
pmap_update(pmap_kernel());
LIST_REMOVE(umap, hash);
umap->uobj = NULL;
TAILQ_INSERT_HEAD(UBC_QUEUE(umap->offset), umap,
inactive);
} else {
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
if (!unmapped) {
umap->flags |= UMAP_MAPPING_CACHED;
}
TAILQ_INSERT_TAIL(UBC_QUEUE(umap->offset), umap,
inactive);
}
}
UVMHIST_LOG(ubchist, "umap %p refs %d", umap, umap->refcount, 0, 0);
2008-01-02 14:48:20 +03:00
mutex_exit(&ubc_object.uobj.vmobjlock);
}
/*
* ubc_uiomove: move data to/from an object.
*/
int
ubc_uiomove(struct uvm_object *uobj, struct uio *uio, vsize_t todo, int advice,
int flags)
{
voff_t off;
const bool overwrite = (flags & UBC_FAULTBUSY) != 0;
int error;
KASSERT(todo <= uio->uio_resid);
KASSERT(((flags & UBC_WRITE) != 0 && uio->uio_rw == UIO_WRITE) ||
((flags & UBC_READ) != 0 && uio->uio_rw == UIO_READ));
off = uio->uio_offset;
error = 0;
while (todo > 0) {
vsize_t bytelen = todo;
void *win;
win = ubc_alloc(uobj, off, &bytelen, advice, flags);
if (error == 0) {
error = uiomove(win, bytelen, uio);
}
if (error != 0 && overwrite) {
/*
* if we haven't initialized the pages yet,
* do it now. it's safe to use memset here
* because we just mapped the pages above.
*/
2008-01-02 14:48:20 +03:00
printf("%s: error=%d\n", __func__, error);
memset(win, 0, bytelen);
}
ubc_release(win, flags);
off += bytelen;
todo -= bytelen;
if (error != 0 && (flags & UBC_PARTIALOK) != 0) {
break;
}
}
return error;
}
/*
* uvm_vnp_zerorange: set a range of bytes in a file to zero.
*/
void
uvm_vnp_zerorange(struct vnode *vp, off_t off, size_t len)
{
void *win;
int flags;
/*
* XXXUBC invent kzero() and use it
*/
while (len) {
vsize_t bytelen = len;
win = ubc_alloc(&vp->v_uobj, off, &bytelen, UVM_ADV_NORMAL,
UBC_WRITE);
memset(win, 0, bytelen);
flags = UBC_WANT_UNMAP(vp) ? UBC_UNMAP : 0;
ubc_release(win, flags);
off += bytelen;
len -= bytelen;
}
}