diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index 0d147c8e9190..b97e9b79d8dd 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -1,4 +1,4 @@ -/* $NetBSD: sys_pipe.c,v 1.145 2018/05/19 11:39:37 jdolecek Exp $ */ +/* $NetBSD: sys_pipe.c,v 1.146 2018/06/10 17:54:51 jdolecek Exp $ */ /*- * Copyright (c) 2003, 2007, 2008, 2009 The NetBSD Foundation, Inc. @@ -68,7 +68,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: sys_pipe.c,v 1.145 2018/05/19 11:39:37 jdolecek Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sys_pipe.c,v 1.146 2018/06/10 17:54:51 jdolecek Exp $"); #include #include @@ -102,6 +102,12 @@ __KERNEL_RCSID(0, "$NetBSD: sys_pipe.c,v 1.145 2018/05/19 11:39:37 jdolecek Exp #ifndef PIPE_NODIRECT #include + +#if !defined(PMAP_DIRECT) +# define PIPE_NODIRECT /* Direct map interface not available */ +#endif + +bool pipe_direct = true; #endif static int pipe_read(file_t *, off_t *, struct uio *, kauth_cred_t, int); @@ -135,20 +141,6 @@ static const struct fileops pipeops = { #define MINPIPESIZE (PIPE_SIZE / 3) #define MAXPIPESIZE (2 * PIPE_SIZE / 3) -/* - * Maximum amount of kva for pipes -- this is kind-of a soft limit, but - * is there so that on large systems, we don't exhaust it. - */ -#define MAXPIPEKVA (8 * 1024 * 1024) -static u_int maxpipekva = MAXPIPEKVA; - -/* - * Limit for direct transfers, we cannot, of course limit - * the amount of kva for pipes in general though. - */ -#define LIMITPIPEKVA (16 * 1024 * 1024) -static u_int limitpipekva = LIMITPIPEKVA; - /* * Limit the number of "big" pipes */ @@ -177,6 +169,7 @@ static void pipe_dtor(void *, void *); #ifndef PIPE_NODIRECT static int pipe_loan_alloc(struct pipe *, int); static void pipe_loan_free(struct pipe *); +static int pipe_direct_process_read(void *, size_t, void *); #endif /* PIPE_NODIRECT */ static pool_cache_t pipe_wr_cache; @@ -446,6 +439,16 @@ pipeselwakeup(struct pipe *selp, struct pipe *sigp, int code) fownsignal(sigp->pipe_pgid, SIGIO, code, band, selp); } +#ifndef PIPE_NODIRECT +static int +pipe_direct_process_read(void *va, size_t len, void *arg) +{ + struct uio *uio = (struct uio *)arg; + + return uiomove(va, len, uio); +} +#endif + static int pipe_read(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred, int flags) @@ -507,30 +510,39 @@ again: #ifndef PIPE_NODIRECT if ((rpipe->pipe_state & PIPE_DIRECTR) != 0) { struct pipemapping * const rmap = &rpipe->pipe_map; + voff_t pgoff; + u_int pgst, npages; + /* * Direct copy, bypassing a kernel buffer. */ - void *va; - KASSERT(rpipe->pipe_state & PIPE_DIRECTW); - size = rmap->cnt; - if (size > uio->uio_resid) - size = uio->uio_resid; + size = MIN(rmap->cnt, uio->uio_resid); + + if (size > 0) { + KASSERT(size > 0); + mutex_exit(lock); + + pgst = rmap->pos >> PAGE_SHIFT; + pgoff = rmap->pos & PAGE_MASK; + npages = (size + pgoff + PAGE_SIZE - 1) >> PAGE_SHIFT; + KASSERTMSG(npages > 0 && (pgst + npages) <= rmap->npages, "npages %u pgst %u rmap->npages %u", npages, pgst, rmap->npages); + + error = uvm_direct_process(&rmap->pgs[pgst], npages, + pgoff, size, pipe_direct_process_read, uio); + mutex_enter(lock); + + nread += size; + rmap->pos += size; + rmap->cnt -= size; + } - va = (char *)rmap->kva + rmap->pos; - mutex_exit(lock); - error = uiomove(va, size, uio); - mutex_enter(lock); - if (error) - break; - nread += size; - rmap->pos += size; - rmap->cnt -= size; if (rmap->cnt == 0) { rpipe->pipe_state &= ~PIPE_DIRECTR; cv_broadcast(&rpipe->pipe_wcv); } + continue; } #endif @@ -630,18 +642,20 @@ static int pipe_loan_alloc(struct pipe *wpipe, int npages) { struct pipemapping * const wmap = &wpipe->pipe_map; - const vsize_t len = ptoa(npages); - atomic_add_int(&amountpipekva, len); - wmap->kva = uvm_km_alloc(kernel_map, len, 0, - UVM_KMF_COLORMATCH | UVM_KMF_VAONLY | UVM_KMF_WAITVA); - if (wmap->kva == 0) { - atomic_add_int(&amountpipekva, -len); - return (ENOMEM); + KASSERT(wmap->npages == 0); + + if (npages > wmap->maxpages) { + pipe_loan_free(wpipe); + + wmap->pgs = kmem_alloc(npages * sizeof(struct vm_page *), KM_NOSLEEP); + if (wmap->pgs == NULL) + return ENOMEM; + wmap->maxpages = npages; } wmap->npages = npages; - wmap->pgs = kmem_alloc(npages * sizeof(struct vm_page *), KM_SLEEP); + return (0); } @@ -652,18 +666,16 @@ static void pipe_loan_free(struct pipe *wpipe) { struct pipemapping * const wmap = &wpipe->pipe_map; - const vsize_t len = ptoa(wmap->npages); - uvm_km_free(kernel_map, wmap->kva, len, UVM_KMF_VAONLY); - wmap->kva = 0; - atomic_add_int(&amountpipekva, -len); - kmem_free(wmap->pgs, wmap->npages * sizeof(struct vm_page *)); - wmap->pgs = NULL; -#if 0 + if (wmap->maxpages > 0) { + kmem_free(wmap->pgs, wmap->maxpages * sizeof(struct vm_page *)); + wmap->pgs = NULL; + wmap->maxpages = 0; + } + wmap->npages = 0; wmap->pos = 0; wmap->cnt = 0; -#endif } /* @@ -681,20 +693,18 @@ pipe_direct_write(file_t *fp, struct pipe *wpipe, struct uio *uio) { struct pipemapping * const wmap = &wpipe->pipe_map; kmutex_t * const lock = wpipe->pipe_lock; - struct vm_page **pgs; vaddr_t bbase, base, bend; vsize_t blen, bcnt; int error, npages; voff_t bpos; - u_int starting_color; - KASSERT(mutex_owned(wpipe->pipe_lock)); + KASSERT(mutex_owned(lock)); KASSERT(wmap->cnt == 0); mutex_exit(lock); /* - * Handle first PIPE_CHUNK_SIZE bytes of buffer. Deal with buffers + * Handle first PIPE_DIRECT_CHUNK bytes of buffer. Deal with buffers * not aligned to PAGE_SIZE. */ bbase = (vaddr_t)uio->uio_iov->iov_base; @@ -711,43 +721,28 @@ pipe_direct_write(file_t *fp, struct pipe *wpipe, struct uio *uio) bcnt = uio->uio_iov->iov_len; } npages = atop(blen); - starting_color = atop(base) & uvmexp.colormask; - /* - * Free the old kva if we need more pages than we have - * allocated. - */ - if (wmap->kva != 0 && starting_color + npages > wmap->npages) - pipe_loan_free(wpipe); + KASSERT((wpipe->pipe_state & (PIPE_DIRECTW | PIPE_DIRECTR)) == 0); + KASSERT(wmap->npages == 0); - /* Allocate new kva. */ - if (wmap->kva == 0) { - error = pipe_loan_alloc(wpipe, starting_color + npages); - if (error) { - mutex_enter(lock); - return (error); - } + /* Make sure page array is big enough */ + error = pipe_loan_alloc(wpipe, npages); + if (error) { + mutex_enter(lock); + return (error); } /* Loan the write buffer memory from writer process */ - pgs = wmap->pgs + starting_color; error = uvm_loan(&uio->uio_vmspace->vm_map, base, blen, - pgs, UVM_LOAN_TOPAGE); + wmap->pgs, UVM_LOAN_TOPAGE); if (error) { pipe_loan_free(wpipe); mutex_enter(lock); return (ENOMEM); /* so that caller fallback to ordinary write */ } - /* Enter the loaned pages to kva */ - vaddr_t kva = wpipe->pipe_map.kva; - for (int j = 0; j < npages; j++, kva += PAGE_SIZE) { - pmap_kenter_pa(kva, VM_PAGE_TO_PHYS(pgs[j]), VM_PROT_READ, 0); - } - pmap_update(pmap_kernel()); - /* Now we can put the pipe in direct write mode */ - wmap->pos = bpos + ptoa(starting_color); + wmap->pos = bpos; wmap->cnt = bcnt; /* @@ -783,17 +778,13 @@ pipe_direct_write(file_t *fp, struct pipe *wpipe, struct uio *uio) /* Acquire the pipe lock and cleanup */ (void)pipelock(wpipe, false); + mutex_exit(lock); - - if (pgs != NULL) { - pmap_kremove(wpipe->pipe_map.kva, blen); - pmap_update(pmap_kernel()); - uvm_unloan(pgs, npages, UVM_LOAN_TOPAGE); - } - if (error || amountpipekva > maxpipekva) - pipe_loan_free(wpipe); - + /* XXX what happens if the writer process exits without waiting for reader? + * XXX FreeBSD does a clone in this case */ + uvm_unloan(wmap->pgs, npages, UVM_LOAN_TOPAGE); mutex_enter(lock); + if (error) { pipeselwakeup(wpipe, wpipe, POLL_ERR); @@ -914,7 +905,7 @@ pipe_write(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred, */ if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && (fp->f_flag & FNONBLOCK) == 0 && - (wpipe->pipe_map.kva || (amountpipekva < limitpipekva))) { + pipe_direct) { error = pipe_direct_write(fp, wpipe, uio); /* @@ -1271,12 +1262,8 @@ pipe_free_kmem(struct pipe *pipe) pipe->pipe_buffer.buffer = NULL; } #ifndef PIPE_NODIRECT - if (pipe->pipe_map.kva != 0) { + if (pipe->pipe_map.npages > 0) pipe_loan_free(pipe); - pipe->pipe_map.cnt = 0; - pipe->pipe_map.pos = 0; - pipe->pipe_map.npages = 0; - } #endif /* !PIPE_NODIRECT */ } @@ -1507,19 +1494,6 @@ SYSCTL_SETUP(sysctl_kern_pipe_setup, "sysctl kern.pipe subtree setup") NULL, 0, NULL, 0, CTL_KERN, KERN_PIPE, CTL_EOL); - sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT|CTLFLAG_READWRITE, - CTLTYPE_INT, "maxkvasz", - SYSCTL_DESCR("Maximum amount of kernel memory to be " - "used for pipes"), - NULL, 0, &maxpipekva, 0, - CTL_KERN, KERN_PIPE, KERN_PIPE_MAXKVASZ, CTL_EOL); - sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT|CTLFLAG_READWRITE, - CTLTYPE_INT, "maxloankvasz", - SYSCTL_DESCR("Limit for direct transfers via page loan"), - NULL, 0, &limitpipekva, 0, - CTL_KERN, KERN_PIPE, KERN_PIPE_LIMITKVA, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "maxbigpipes", diff --git a/sys/sys/pipe.h b/sys/sys/pipe.h index 06cf6d28342e..cc9bdea31cb3 100644 --- a/sys/sys/pipe.h +++ b/sys/sys/pipe.h @@ -1,4 +1,4 @@ -/* $NetBSD: pipe.h,v 1.34 2018/05/19 11:39:37 jdolecek Exp $ */ +/* $NetBSD: pipe.h,v 1.35 2018/06/10 17:54:51 jdolecek Exp $ */ /* * Copyright (c) 1996 John S. Dyson @@ -43,10 +43,9 @@ #endif /* - * Maximum size of kva for direct write transfer. If the amount + * Maximum size of transfer for direct write transfer. If the amount * of data in buffer is larger, it would be transferred in chunks of this - * size. This kva memory is freed after use if amount of pipe kva memory - * is bigger than limitpipekva. + * size. */ #ifndef PIPE_DIRECT_CHUNK #define PIPE_DIRECT_CHUNK (1*1024*1024) @@ -77,10 +76,10 @@ struct pipebuf { * Information to support direct transfers between processes for pipes. */ struct pipemapping { - vaddr_t kva; /* kernel virtual address */ vsize_t cnt; /* number of chars in buffer */ voff_t pos; /* current position within page */ - int npages; /* how many pages allocated */ + u_int npages; /* how many pages available */ + u_int maxpages; /* how many pages allocated */ struct vm_page **pgs; /* pointers to the pages */ }; @@ -124,8 +123,8 @@ struct pipe { /* * KERN_PIPE subtypes */ -#define KERN_PIPE_MAXKVASZ 1 /* maximum kva size */ -#define KERN_PIPE_LIMITKVA 2 /* */ +#define KERN_PIPE_MAXKVASZ 1 /* maximum kva size (obsolete) */ +#define KERN_PIPE_LIMITKVA 2 /* limit kva for laons (obsolete) */ #define KERN_PIPE_MAXBIGPIPES 3 /* maximum # of "big" pipes */ #define KERN_PIPE_NBIGPIPES 4 /* current number of "big" p. */ #define KERN_PIPE_KVASIZE 5 /* current pipe kva size */