Add port of high performance pipe implementation written by John S. Dyson

for FreeBSD project. Besides huge speed boost compared with socketpair-based
pipes, this implementation also uses pagable kernel memory instead of mbufs.

Significant differences to FreeBSD version:
* uses uvm_loan() facility for direct write
* async/SIGIO handling correct also for sync writer, async reader
* limits settable via sysctl, amountpipekva and nbigpipes available via sysctl
* pipes are unidirectional - this is enforced on file descriptor level
	for now only, the code would be updated to take advantage of it
	eventually
* uses lockmgr(9)-based locks instead of home brew variant
* scatter-gather write is handled correctly for direct write case, data
  is transferred by PIPE_DIRECT_CHUNK bytes maximum, to avoid running out of kva

All FreeBSD/NetBSD specific code is within appropriate #ifdef, in preparation
to feed changes back to FreeBSD tree.

This pipe implementation is optional for now, add 'options NEW_PIPE'
to your kernel config to use it.
This commit is contained in:
jdolecek 2001-06-16 12:00:02 +00:00
parent 02d39f046b
commit ee882e3a09
10 changed files with 901 additions and 242 deletions

View File

@ -1,4 +1,4 @@
.\" $NetBSD: sysctl.8,v 1.66 2001/03/27 02:24:39 itojun Exp $
.\" $NetBSD: sysctl.8,v 1.67 2001/06/16 12:00:04 jdolecek Exp $
.\"
.\" Copyright (c) 1993
.\" The Regents of the University of California. All rights reserved.
@ -198,7 +198,11 @@ privilege can change the value.
.It kern.osrevision integer no
.It kern.ostype string no
.It kern.path_max integer no
.It kern.pipe_buf integer no
.It kern.pipe.maxkvasz integer yes
.It kern.pipe.maxloankvasz integer yes
.It kern.pipe.maxbigpipes integer yes
.It kern.pipe.nbigpipes integer no
.It kern.pipe.kvasize integer no
.It kern.posix1version integer no
.It kern.rawpartition integer no
.It kern.saved_ids integer no

View File

@ -1,4 +1,4 @@
/* $NetBSD: sysctl.c,v 1.46 2001/03/09 01:02:11 chs Exp $ */
/* $NetBSD: sysctl.c,v 1.47 2001/06/16 12:00:04 jdolecek Exp $ */
/*
* Copyright (c) 1993
@ -44,7 +44,7 @@ __COPYRIGHT(
#if 0
static char sccsid[] = "@(#)sysctl.c 8.1 (Berkeley) 6/6/93";
#else
__RCSID("$NetBSD: sysctl.c,v 1.46 2001/03/09 01:02:11 chs Exp $");
__RCSID("$NetBSD: sysctl.c,v 1.47 2001/06/16 12:00:04 jdolecek Exp $");
#endif
#endif /* not lint */
@ -181,6 +181,7 @@ static int sysctl_key __P((char *, char **, int[], int, int *));
static int sysctl_vfs __P((char *, char **, int[], int, int *));
static int sysctl_vfsgen __P((char *, char **, int[], int, int *));
static int sysctl_mbuf __P((char *, char **, int[], int, int *));
static int sysctl_pipe __P((char *, char **, int[], int, int *));
static int sysctl_proc __P((char *, char **, int[], int, int *));
static int findname __P((char *, char *, char **, struct list *));
static void usage __P((void));
@ -397,6 +398,11 @@ parse(string, flags)
case KERN_CONSDEV:
special |= CONSDEV;
break;
case KERN_PIPE:
len = sysctl_pipe(string, &bufp, mib, flags, &type);
if (len < 0)
return;
break;
}
break;
@ -1119,6 +1125,33 @@ sysctl_mbuf(string, bufpp, mib, flags, typep)
return (3);
}
struct ctlname pipenames[] = CTL_PIPE_NAMES;
struct list pipevars = { pipenames, KERN_PIPE_MAXID };
/*
* handle kern.mbuf requests
*/
static int
sysctl_pipe(string, bufpp, mib, flags, typep)
char *string;
char **bufpp;
int mib[];
int flags;
int *typep;
{
struct list *lp = &pipevars;
int indx;
if (*bufpp == NULL) {
listall(string, lp);
return (-1);
}
if ((indx = findname(string, "third", bufpp, lp)) == -1)
return (-1);
mib[2] = indx;
*typep = lp->list[indx].ctl_type;
return (3);
}
/*
* Scan a list of names searching for a particular name.
*/

View File

@ -1,4 +1,4 @@
# $NetBSD: files,v 1.446 2001/06/13 06:01:44 simonb Exp $
# $NetBSD: files,v 1.447 2001/06/16 12:00:03 jdolecek Exp $
# @(#)files.newconf 7.5 (Berkeley) 5/10/93
@ -11,6 +11,7 @@ defopt MAXUPRC
defopt RTC_OFFSET
defopt DEFCORENAME
defopt UCONSOLE
defopt NEW_PIPE
defopt MULTIPROCESSOR
@ -863,6 +864,7 @@ file kern/subr_prof.c
file kern/subr_rmap.c
file kern/subr_xxx.c
file kern/sys_generic.c
file kern/sys_pipe.c new_pipe
file kern/sys_process.c
file kern/sys_socket.c
file kern/syscalls.c syscall_debug

View File

@ -1,4 +1,4 @@
/* $NetBSD: init_main.c,v 1.191 2001/06/08 12:53:30 mrg Exp $ */
/* $NetBSD: init_main.c,v 1.192 2001/06/16 12:00:02 jdolecek Exp $ */
/*
* Copyright (c) 1995 Christopher G. Demetriou. All rights reserved.
@ -46,6 +46,7 @@
#include "opt_sysv.h"
#include "opt_maxuprc.h"
#include "opt_multiprocessor.h"
#include "opt_new_pipe.h"
#include "opt_syscall_debug.h"
#include "rnd.h"
@ -91,6 +92,9 @@
#if NRND > 0
#include <sys/rnd.h>
#endif
#ifdef NEW_PIPE
#include <sys/pipe.h>
#endif
#include <sys/syscall.h>
#include <sys/syscallargs.h>
@ -489,6 +493,11 @@ main(void)
/* Initialize exec structures */
exec_init(1);
#ifdef NEW_PIPE
/* Initialize pipe structures */
pipe_init();
#endif
/*
* Okay, now we can let init(8) exec! It's off to userland!
*/

View File

@ -1,4 +1,4 @@
/* $NetBSD: kern_sysctl.c,v 1.88 2001/04/26 06:07:13 enami Exp $ */
/* $NetBSD: kern_sysctl.c,v 1.89 2001/06/16 12:00:02 jdolecek Exp $ */
/*-
* Copyright (c) 1982, 1986, 1989, 1993
@ -45,6 +45,7 @@
#include "opt_ddb.h"
#include "opt_insecure.h"
#include "opt_defcorename.h"
#include "opt_new_pipe.h"
#include "opt_sysv.h"
#include "pty.h"
@ -334,6 +335,7 @@ kern_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
case KERN_MBUF:
case KERN_PROC_ARGS:
case KERN_SYSVIPC_INFO:
case KERN_PIPE:
/* Not terminal. */
break;
default:
@ -534,6 +536,11 @@ kern_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
#if NPTY > 0
case KERN_MAXPTYS:
return sysctl_pty(oldp, oldlenp, newp, newlen);
#endif
#ifdef NEW_PIPE
case KERN_PIPE:
return (sysctl_dopipe(name + 1, namelen - 1, oldp, oldlenp,
newp, newlen));
#endif
default:
return (EOPNOTSUPP);

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
/* $NetBSD: uipc_syscalls.c,v 1.59 2001/06/14 20:32:47 thorpej Exp $ */
/* $NetBSD: uipc_syscalls.c,v 1.60 2001/06/16 12:00:02 jdolecek Exp $ */
/*
* Copyright (c) 1982, 1986, 1989, 1990, 1993
@ -36,6 +36,7 @@
*/
#include "opt_ktrace.h"
#include "opt_new_pipe.h"
/*
* Though COMPAT_OLDSOCK is needed only for COMPAT_43, SunOS, Linux,
@ -912,6 +913,7 @@ sys_getsockopt(struct proc *p, void *v, register_t *retval)
return (error);
}
#ifndef NEW_PIPE
/* ARGSUSED */
int
sys_pipe(struct proc *p, void *v, register_t *retval)
@ -965,6 +967,7 @@ sys_pipe(struct proc *p, void *v, register_t *retval)
(void)soclose(rso);
return (error);
}
#endif /* !NEW_PIPE */
/*
* Get socket name.

View File

@ -1,4 +1,4 @@
/* $NetBSD: malloc.h,v 1.62 2001/05/30 12:07:05 mrg Exp $ */
/* $NetBSD: malloc.h,v 1.63 2001/06/16 12:00:03 jdolecek Exp $ */
/*
* Copyright (c) 1987, 1993
@ -171,7 +171,8 @@
#define M_EMULDATA 115 /* Per-process emulation data */
#define M_1394CTL 116 /* IEEE 1394 control structures */
#define M_1394DATA 117 /* IEEE 1394 data buffers */
#define M_LAST 118 /* Must be last type + 1 */
#define M_PIPE 118 /* Pipe structures */
#define M_LAST 119 /* Must be last type + 1 */
#define INITKMEMNAMES { \
"free", /* 0 M_FREE */ \
@ -292,7 +293,8 @@
"emuldata", /* 115 M_EMULDATA */ \
"1394ctl", /* 116 M_1394CTL */ \
"1394data", /* 117 M_1394DATA */ \
NULL, /* 118 */ \
"pipe", /* 118 M_PIPE */ \
NULL, /* 119 */ \
}
struct kmemstats {

View File

@ -25,11 +25,13 @@
#define _SYS_PIPE_H_
#ifndef _KERNEL
#include <sys/time.h> /* for struct timespec */
#ifdef __FreeBSD__
#include <sys/time.h> /* for struct timeval */
#include <sys/selinfo.h> /* for struct selinfo */
#include <vm/vm.h> /* for vm_page_t */
#include <machine/param.h> /* for PAGE_SIZE */
#endif
#endif
/*
* Pipe buffer size, keep moderate in value, pipes take kva space.
@ -42,15 +44,27 @@
#define BIG_PIPE_SIZE (64*1024)
#endif
/*
* Maximum size of kva for direct write transfer. If the amount
* of data in buffer is larger, it would be transferred in chunks of this
* size. This kva memory is freed after use if amount of pipe kva memory
* is bigger than limitpipekva.
*/
#ifndef PIPE_DIRECT_CHUNK
#define PIPE_DIRECT_CHUNK (1*1024*1024)
#endif
/*
* PIPE_MINDIRECT MUST be smaller than PIPE_SIZE and MUST be bigger
* than PIPE_BUF.
*/
#ifndef PIPE_MINDIRECT
#if defined(__FreeBSD__)
#define PIPE_MINDIRECT 8192
#elif defined(__NetBSD__)
#define PIPE_MINDIRECT PAGE_SIZE
#endif
#endif
#define PIPENPAGES (BIG_PIPE_SIZE / PAGE_SIZE + 1)
/*
* Pipe buffer information.
@ -58,17 +72,21 @@
* Buffered write is active when the buffer.cnt field is set.
*/
struct pipebuf {
u_int cnt; /* number of chars currently in buffer */
size_t cnt; /* number of chars currently in buffer */
u_int in; /* in pointer */
u_int out; /* out pointer */
u_int size; /* size of buffer */
size_t size; /* size of buffer */
caddr_t buffer; /* kva of buffer */
#ifdef __FreeBSD__
struct vm_object *object; /* VM object containing buffer */
#endif
};
/*
* Information to support direct transfers between processes for pipes.
*/
#if defined(__FreeBSD__)
#define PIPENPAGES (BIG_PIPE_SIZE / PAGE_SIZE + 1)
struct pipemapping {
vm_offset_t kva; /* kernel virtual address */
vm_size_t cnt; /* number of chars in buffer */
@ -76,6 +94,15 @@ struct pipemapping {
int npages; /* number of pages */
vm_page_t ms[PIPENPAGES]; /* pages in source process */
};
#elif defined(__NetBSD__)
struct pipemapping {
vaddr_t kva; /* kernel virtual address */
vsize_t cnt; /* number of chars in buffer */
voff_t pos; /* current position within page */
int npages; /* how many pages allocated */
struct vm_page **ms;
};
#endif
/*
* Bits in pipe_state.
@ -83,13 +110,17 @@ struct pipemapping {
#define PIPE_ASYNC 0x004 /* Async? I/O. */
#define PIPE_WANTR 0x008 /* Reader wants some characters. */
#define PIPE_WANTW 0x010 /* Writer wants space to put characters. */
#define PIPE_WANT 0x020 /* Pipe is wanted to be run-down. */
#define PIPE_WANTCLOSE 0x020 /* Pipe is wanted to be run-down. */
#define PIPE_SEL 0x040 /* Pipe has a select active. */
#define PIPE_EOF 0x080 /* Pipe is in EOF condition. */
#define PIPE_LOCK 0x100 /* Process has exclusive access to pointers/data. */
#define PIPE_LWANT 0x200 /* Process wants exclusive access to pointers/data. */
#define PIPE_DIRECTW 0x400 /* Pipe direct write active. */
#define PIPE_DIRECTOK 0x800 /* Direct mode ok. */
#define PIPE_SIGNALR 0x800 /* Do selwakeup() on read(2) */
#ifdef __NetBSD__
#define PIPE_MOREW 0x2000 /* Writer has more data to write. */
#endif
/*
* Per-pipe data structure.
@ -99,13 +130,25 @@ struct pipe {
struct pipebuf pipe_buffer; /* data storage */
struct pipemapping pipe_map; /* pipe mapping for direct I/O */
struct selinfo pipe_sel; /* for compat with select */
#ifdef __FreeBSD__
struct timespec pipe_atime; /* time of last access */
struct timespec pipe_mtime; /* time of last modify */
struct timespec pipe_ctime; /* time of status change */
struct sigio *pipe_sigio; /* information for async I/O */
#elif defined(__NetBSD__)
struct timeval pipe_atime; /* time of last access */
struct timeval pipe_mtime; /* time of last modify */
struct timeval pipe_ctime; /* time of status change */
gid_t pipe_pgid; /* process group for sigio */
struct lock pipe_lock; /* pipe lock */
#endif
struct pipe *pipe_peer; /* link with other direction */
u_int pipe_state; /* pipe status info */
int pipe_busy; /* busy flag, mostly to handle rundown sanely */
};
#ifdef __NetBSD__
void pipe_init __P((void));
#endif
#endif /* !_SYS_PIPE_H_ */

View File

@ -1,4 +1,4 @@
/* $NetBSD: sysctl.h,v 1.61 2001/05/04 15:50:13 simonb Exp $ */
/* $NetBSD: sysctl.h,v 1.62 2001/06/16 12:00:03 jdolecek Exp $ */
/*
* Copyright (c) 1989, 1993
@ -173,7 +173,8 @@ struct ctlname {
#define KERN_MSGBUF 53 /* kernel message buffer */
#define KERN_CONSDEV 54 /* dev_t: console terminal device */
#define KERN_MAXPTYS 55 /* int: maximum number of ptys */
#define KERN_MAXID 56 /* number of valid kern ids */
#define KERN_PIPE 56 /* node: pipe limits */
#define KERN_MAXID 57 /* number of valid kern ids */
#define CTL_KERN_NAMES { \
{ 0, 0 }, \
@ -232,6 +233,7 @@ struct ctlname {
{ "msgbuf", CTLTYPE_STRUCT }, \
{ "consdev", CTLTYPE_STRUCT }, \
{ "maxptys", CTLTYPE_INT }, \
{ "pipe", CTLTYPE_NODE }, \
}
/*
@ -428,6 +430,25 @@ struct kinfo_proc2 {
#define KERN_SYSVIPC_SEM_INFO 2 /* seminfo and semid_ds */
#define KERN_SYSVIPC_SHM_INFO 3 /* shminfo and shmid_ds */
/*
* KERN_PIPE subtypes
*/
#define KERN_PIPE_MAXKVASZ 1 /* maximum kva size */
#define KERN_PIPE_LIMITKVA 2 /* */
#define KERN_PIPE_MAXBIGPIPES 3 /* maximum # of "big" pipes */
#define KERN_PIPE_NBIGPIPES 4 /* current number of "big" p. */
#define KERN_PIPE_KVASIZE 5 /* current pipe kva size */
#define KERN_PIPE_MAXID 6
#define CTL_PIPE_NAMES { \
{ 0, 0 }, \
{ "maxkvasz", CTLTYPE_INT }, \
{ "maxloankvasz", CTLTYPE_INT }, \
{ "maxbigpipes", CTLTYPE_INT }, \
{ "nbigpipes", CTLTYPE_INT }, \
{ "kvasize", CTLTYPE_INT }, \
}
/*
* CTL_HW identifiers
*/
@ -652,6 +673,9 @@ int sysctl_ntptime __P((void *, size_t *));
int sysctl_doprof __P((int *, u_int, void *, size_t *, void *, size_t));
#endif
int sysctl_dombuf __P((int *, u_int, void *, size_t *, void *, size_t));
#ifdef NEW_PIPE
int sysctl_dopipe __P((int *, u_int, void *, size_t *, void *, size_t));
#endif
void fill_eproc __P((struct proc *, struct eproc *));