From beae54ff5a36257473fa8fbb862d5892ecf23392 Mon Sep 17 00:00:00 2001 From: matt Date: Fri, 18 Jan 2013 06:42:16 +0000 Subject: [PATCH] Contain support routines used to create and destroy lists of physical pages from lists of pages or ranges of virtual address. By using these physical maps, the kernel can avoid mapping physical I/O in the kernel's address space in most cases. --- sys/kern/subr_physmap.c | 387 ++++++++++++++++++++++++++++++++++++++++ sys/sys/physmap.h | 67 +++++++ 2 files changed, 454 insertions(+) create mode 100644 sys/kern/subr_physmap.c create mode 100644 sys/sys/physmap.h diff --git a/sys/kern/subr_physmap.c b/sys/kern/subr_physmap.c new file mode 100644 index 000000000000..d41a6d1b8097 --- /dev/null +++ b/sys/kern/subr_physmap.c @@ -0,0 +1,387 @@ +/*- + * Copyright (c) 2013 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Matt Thomas of 3am Software Foundry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +__KERNEL_RCSID(1, "$NetBSD: subr_physmap.c,v 1.1 2013/01/18 06:42:16 matt Exp $"); + +#include +#include +#include + +#include + +/* + * This file contain support routines used to create and destroy lists of + * physical pages from lists of pages or ranges of virtual address. By using + * these physical maps, the kernel can avoid mapping physical I/O in the + * kernel's address space in most cases. + */ + +typedef struct { + physmap_t *pc_physmap; + physmap_segment_t *pc_segs; + vsize_t pc_offset; + vsize_t pc_klen; + vaddr_t pc_kva; + u_int pc_nsegs; + vm_prot_t pc_prot; + bool pc_direct_mapped; +} physmap_cookie_t; + +/* + * Allocate a physmap structure that requires "maxsegs" segments. + */ +static physmap_t * +physmap_alloc(size_t maxsegs) +{ + const size_t mapsize = offsetof(physmap_t, pm_segs[maxsegs]); + + KASSERT(maxsegs > 0); + + physmap_t * const map = kmem_zalloc(mapsize, KM_SLEEP); + map->pm_maxsegs = maxsegs; + + return map; +} + +static int +physmap_fill(physmap_t *map, pmap_t pmap, vaddr_t va, vsize_t len) +{ + size_t nsegs = map->pm_nsegs; + physmap_segment_t *ps = &map->pm_segs[nsegs]; + vsize_t offset = va - trunc_page(va); + + if (nsegs == 0) { + if (!pmap_extract(pmap, va, &ps->ps_addr)) { + return EFAULT; + } + ps->ps_len = min(len, PAGE_SIZE - offset); + if (ps->ps_len == len) { + map->pm_nsegs = 1; + return 0; + } + offset = 0; + } else { + /* + * Backup to the last segment since we have to see if we can + * merge virtual addresses that are physically contiguous into + * as few segments as possible. + */ + ps--; + nsegs--; + } + + paddr_t lastaddr = ps->ps_addr + ps->ps_len; + for (;;) { + paddr_t curaddr; + if (!pmap_extract(pmap, va, &curaddr)) { + return EFAULT; + } + if (curaddr != lastaddr) { + ps++; + nsegs++; + KASSERT(nsegs < map->pm_maxsegs); + ps->ps_addr = curaddr; + lastaddr = curaddr; + } + if (offset + len > PAGE_SIZE) { + ps->ps_len += PAGE_SIZE - offset; + lastaddr = ps->ps_addr + ps->ps_len; + len -= PAGE_SIZE - offset; + lastaddr += PAGE_SIZE - offset; + offset = 0; + } else { + ps->ps_len += len; + map->pm_nsegs = nsegs + 1; + return 0; + } + } +} + +/* + * Create a physmap and populate it with the pages that are used to mapped + * linear range of virtual addresses. It is assumed that uvm_vslock has been + * called to lock these pages into memory. + */ +int +physmap_create_linear(physmap_t **map_p, const struct vmspace *vs, vaddr_t va, + vsize_t len) +{ + const size_t maxsegs = atop(round_page(va + len) - trunc_page(va)); + physmap_t * const map = physmap_alloc(maxsegs); + int error = physmap_fill(map, vs->vm_map.pmap, va, len); + if (error) { + physmap_destroy(map); + *map_p = NULL; + return error; + } + *map_p = map; + return 0; +} + +/* + * Create a physmap and populate it with the pages that are contained in an + * iovec array. It is assumed that uvm_vslock has been called to lock these + * pages into memory. + */ +int +physmap_create_iov(physmap_t **map_p, const struct vmspace *vs, + struct iovec *iov, size_t iovlen) +{ + size_t maxsegs = 0; + for (size_t i = 0; i < iovlen; i++) { + const vaddr_t start = (vaddr_t) iov[i].iov_base; + const vaddr_t end = start + iov[i].iov_len; + maxsegs += atop(round_page(end) - trunc_page(start)); + } + physmap_t * const map = physmap_alloc(maxsegs); + + for (size_t i = 0; i < iovlen; i++) { + int error = physmap_fill(map, vs->vm_map.pmap, + (vaddr_t) iov[i].iov_base, iov[i].iov_len); + if (error) { + physmap_destroy(map); + *map_p = NULL; + return error; + } + } + *map_p = map; + return 0; +} + +/* + * This uses a list of vm_page structure to create a physmap. + */ +physmap_t * +physmap_create_pagelist(struct vm_page **pgs, size_t npgs) +{ + physmap_t * const map = physmap_alloc(npgs); + + physmap_segment_t *ps = map->pm_segs; + + /* + * Initialize the first segment. + */ + paddr_t lastaddr = VM_PAGE_TO_PHYS(pgs[0]); + ps->ps_addr = lastaddr; + ps->ps_len = PAGE_SIZE; + + for (pgs++; npgs-- > 1; pgs++) { + /* + * lastaddr needs to be increased by a page. + */ + lastaddr += PAGE_SIZE; + paddr_t curaddr = VM_PAGE_TO_PHYS(*pgs); + if (curaddr != lastaddr) { + /* + * If the addresses are not the same, we need to use + * a new segemnt. Set its address and update lastaddr. + */ + ps++; + ps->ps_addr = curaddr; + lastaddr = curaddr; + } + /* + * Increase this segment's length by a page + */ + ps->ps_len += PAGE_SIZE; + } + + map->pm_nsegs = ps + 1 - map->pm_segs; + return map; +} + +void +physmap_destroy(physmap_t *map) +{ + const size_t mapsize = offsetof(physmap_t, pm_segs[map->pm_maxsegs]); + + kmem_free(map, mapsize); +} + +void * +physmap_map_init(physmap_t *map, size_t offset, vm_prot_t prot) +{ + physmap_cookie_t * const pc = kmem_zalloc(sizeof(*pc), KM_SLEEP); + + KASSERT(prot == VM_PROT_READ || prot == (VM_PROT_READ|VM_PROT_WRITE)); + + pc->pc_physmap = map; + pc->pc_segs = map->pm_segs; + pc->pc_nsegs = map->pm_nsegs; + pc->pc_prot = prot; + pc->pc_klen = 0; + pc->pc_kva = 0; + pc->pc_direct_mapped = false; + + /* + * Skip to the first segment we are interested in. + */ + while (offset >= pc->pc_segs->ps_len) { + offset -= pc->pc_segs->ps_len; + pc->pc_segs++; + pc->pc_nsegs--; + } + + pc->pc_offset = offset; + + return pc; +} + +size_t +physmap_map(void *cookie, vaddr_t *kvap) +{ + physmap_cookie_t * const pc = cookie; + + /* + * If there is currently a non-direct mapped KVA region allocated, + * free it now. + */ + if (pc->pc_kva != 0 && !pc->pc_direct_mapped) { + pmap_kremove(pc->pc_kva, pc->pc_klen); + uvm_km_free(kernel_map, pc->pc_kva, pc->pc_klen, + UVM_KMF_VAONLY); + pmap_update(pmap_kernel()); + } + + /* + * If there are no more segments to process, return 0 indicating + * we are done. + */ + if (pc->pc_nsegs == 0) { + return 0; + } + + /* + * Get starting physical address of this segment and its length. + */ + paddr_t pa = pc->pc_segs->ps_addr + pc->pc_offset; + const size_t koff = pa & PAGE_MASK; + const size_t len = pc->pc_segs->ps_len - pc->pc_offset; + + /* + * Now that we have the starting offset in the page, reset to the + * beginning of the page. + */ + pa = trunc_page(pa); + + /* + * We are now done with this segment; advance to the next one. + */ + pc->pc_segs++; + pc->pc_nsegs--; + pc->pc_offset = 0; + + /* + * Find out how many pages we are mapping. + */ + pc->pc_klen = round_page(len); +#ifdef __HAVE_MM_MD_DIRECT_MAPPED_PHYS + /* + * Always try to direct map it since that's nearly zero cost. + */ + pc->pc_direct_mapped = mm_md_direct_mapped_phys(pa, &pc->pc_kva); +#endif + if (!pc->pc_direct_mapped) { + /* + * If we can't direct map it, we have to allocate some KVA + * so we map it via the kernel_map. + */ + pc->pc_kva = uvm_km_alloc(kernel_map, pc->pc_klen, + atop(pa) & uvmexp.ncolors, + UVM_KMF_VAONLY | UVM_KMF_WAITVA | UVM_KMF_COLORMATCH); + KASSERT(pc->pc_kva != 0); + + /* + * Setup mappings for this segment. + */ + for (size_t poff = 0; poff < pc->pc_klen; poff += PAGE_SIZE) { + pmap_kenter_pa(pc->pc_kva + poff, pa + poff, + pc->pc_prot, 0); + } + /* + * Make them real. + */ + pmap_update(pmap_kernel()); + } + /* + * Return the starting KVA (including offset into the page) and + * the length of this segment. + */ + *kvap = pc->pc_kva + koff; + return len; +} + +void +physmap_map_fini(void *cookie) +{ + physmap_cookie_t * const pc = cookie; + + /* + * If there is currently a non-direct mapped KVA region allocated, + * free it now. + */ + if (pc->pc_kva != 0 && !pc->pc_direct_mapped) { + pmap_kremove(pc->pc_kva, pc->pc_klen); + uvm_km_free(kernel_map, pc->pc_kva, pc->pc_klen, + UVM_KMF_VAONLY); + pmap_update(pmap_kernel()); + } + + /* + * Free the cookie. + */ + kmem_free(pc, sizeof(*pc)); +} + +/* + * genio needs to zero pages past the EOF or without backing storage (think + * sparse files). But since we are using physmaps, there is no kva to use with + * memset so we need a helper to obtain a kva and memset the desired memory. + */ +void +physmap_zero(physmap_t *map, size_t offset, size_t len) +{ + void * const cookie = physmap_map_init(map, offset, + VM_PROT_READ|VM_PROT_WRITE); + + for (;;) { + vaddr_t kva; + size_t seglen = physmap_map(cookie, &kva); + KASSERT(seglen != 0); + if (seglen > len) + seglen = len; + memset((void *)kva, 0, seglen); + if (seglen == len) + break; + } + + physmap_map_fini(cookie); +} diff --git a/sys/sys/physmap.h b/sys/sys/physmap.h new file mode 100644 index 000000000000..ebeab3ff5883 --- /dev/null +++ b/sys/sys/physmap.h @@ -0,0 +1,67 @@ +/*- + * Copyright (c) 2013 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Matt Thomas of 3am Software Foundry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_PHYSMAP_H_ +#define _SYS_PHYSMAP_H_ + +#include + +typedef struct { + paddr_t ps_addr; + psize_t ps_len; +} physmap_segment_t; + +/* typedef is in */ +struct physmap { + uint16_t pm_nsegs; + uint16_t pm_maxsegs; + physmap_segment_t pm_segs[0]; +}; + +#ifdef _KERNEL +#include +#include + +int physmap_create_iov(physmap_t **, const struct vmspace *, + struct iovec *, size_t); +int physmap_create_linear(physmap_t **, const struct vmspace *, + vaddr_t, vsize_t); +physmap_t * + physmap_create_pagelist(struct vm_page **, size_t); + +void physmap_destroy(physmap_t *); + +void * physmap_map_init(physmap_t *, size_t, vm_prot_t); +size_t physmap_map(void *, vaddr_t *); +void physmap_map_fini(void *); + +void physmap_zero(physmap_t *, size_t, size_t); +#endif /* _KERNEL */ + +#endif /* _SYS_PHYSMAP_H_ */