added Berkeley sed with mods to use GNU regex

1993-04-13 23:49:12 +00:00 · 1993-04-13 23:49:12 +00:00 · bb106b2b49
parent 5594a0292f
commit bb106b2b49
13 changed files with 3887 additions and 0 deletions
--- a/usr.bin/sed/Makefile
+++ b/usr.bin/sed/Makefile
@ -0,0 +1,8 @@
+#	@(#)Makefile	5.1 (Berkeley) 8/24/92
+
+PROG=	sed
+CFLAGS+=-DGNU_REGEX
+SRCS=	compile.c main.c misc.c process.c
+LDADD=-lgnuregex
+
+.include <bsd.prog.mk>
--- a/usr.bin/sed/POSIX
+++ b/usr.bin/sed/POSIX
@ -0,0 +1,205 @@
+#	@(#)POSIX	5.9 (Berkeley) 8/28/92
+
+Comments on the IEEE P1003.2 Draft 12
+     Part 2: Shell and Utilities
+  Section 4.55: sed - Stream editor
+
+Diomidis Spinellis <dds@doc.ic.ac.uk>
+Keith Bostic <bostic@cs.berkeley.edu>
+
+In the following paragraphs, "wrong" usually means "inconsistent with
+historic practice", as most of the following comments refer to
+undocumented inconsistencies between the historical versions of sed and
+the POSIX 1003.2 standard.  All the comments are notes taken while
+implementing a POSIX-compatible version of sed, and should not be
+interpreted as official opinions or criticism towards the POSIX committee.
+All uses of "POSIX" refer to section 4.55, Draft 12 of POSIX 1003.2.
+
+ 1.	32V and BSD derived implementations of sed strip the text
+	arguments of the a, c and i commands of their initial blanks,
+	i.e.
+
+	#!/bin/sed -f
+	a\
+		foo\
+		\  indent\
+		bar
+
+	produces:
+
+	foo
+	  indent
+	bar
+
+	POSIX does not specify this behavior as the System V versions of
+	sed do not do this stripping.  The argument against stripping is
+	that it is difficult to write sed scripts that have leading blanks
+	if they are stripped.  The argument for stripping is that it is
+	difficult to write readable sed scripts unless indentation is allowed
+	and ignored, and leading whitespace is obtainable by entering a
+	backslash in front of it.  This implementation follows the BSD
+	historic practice.
+
+ 2.	Historical versions of sed required that the w flag be the last
+	flag to an s command as it takes an additional argument.  This
+	is obvious, but not specified in POSIX.
+
+ 3.	Historical versions of sed required that whitespace follow a w
+	flag to an s command.  This is not specified in POSIX.  This
+	implementation permits whitespace but does not require it.
+
+ 4.	Historical versions of sed permitted any number of whitespace
+	characters to follow the w command.  This is not specified in
+	POSIX.  This implementation permits whitespace but does not
+	require it.
+
+ 5.	The rule for the l command differs from historic practice.  Table
+	2-15 includes the various ANSI C escape sequences, including \\
+	for backslash.  Some historical versions of sed displayed two
+	digit octal numbers, too, not three as specified by POSIX.  POSIX
+	is a cleanup, and is followed by this implementation.
+
+ 6.	The POSIX specification for ! does not specify that for a single
+	command the command must not contain an address specification
+	whereas the command list can contain address specifications.  The
+	specification for ! implies that "3!/hello/p" works, and it never
+	has, historically.  Note,
+
+		3!{
+			/hello/p
+		}
+
+	does work.
+
+ 7.	POSIX does not specify what happens with consecutive ! commands
+	(e.g. /foo/!!!p).  Historic implementations allow any number of
+	!'s without changing the behaviour.  (It seems logical that each
+	one might reverse the behaviour.)  This implementation follows
+	historic practice.
+
+ 8.	Historic versions of sed permitted commands to be separated
+	by semi-colons, e.g. 'sed -ne '1p;2p;3q' printed the first
+	three lines of a file.  This is not specified by POSIX.
+	Note, the ; command separator is not allowed for the commands
+	a, c, i, w, r, :, b, t, # and at the end of a w flag in the s
+	command.  This implementation follows historic practice and
+	implements the ; separator.
+
+ 9.	Historic versions of sed terminated the script if EOF was reached
+	during the execution of the 'n' command, i.e.:
+
+	sed -e '
+	n
+	i\
+	hello
+	' </dev/null
+
+	did not produce any output.  POSIX does not specify this behavior.
+	This implementation follows historic practice.
+
+10.	POSIX does not specify that the q command causes all lines that
+	have been appended to be output and that the pattern space is
+	printed before exiting.  This implementation follows historic
+	practice.
+
+11.	Historical implementations do not output the change text of a c
+	command in the case of an address range whose first line number
+	is greater than the second (e.g. 3,1).  POSIX requires that the
+	text be output.  Since the historic behavior doesn't seem to have
+	any particular purpose, this implementation follows the POSIX
+	behavior.
+
+12.	POSIX does not specify whether address ranges are checked and
+	reset if a command is not executed due to a jump.  The following
+	program will behave in different ways depending on whether the
+	'c' command is triggered at the third line, i.e. will the text
+	be output even though line 3 of the input will never logically
+	encounter that command.
+
+	2,4b
+	1,3c\
+		text
+
+	Historic implementations, and this implementation, do not output
+	the text in the above example.  The general rule, therefore,
+	is that a range whose second address is never matched extends to
+	the end of the input.
+
+13.	Historical implementations allow an output suppressing #n at the
+	beginning of -e arguments as well as in a script file.  POSIX
+	does not specify this.  This implementation follows historical
+	practice.
+
+14.	POSIX does not explicitly specify how sed behaves if no script is
+	specified.  Since the sed Synopsis permits this form of the command,
+	and the language in the Description section states that the input
+	is output, it seems reasonable that it behave like the cat(1)
+	command.  Historic sed implementations behave differently for "ls |
+	sed", where they produce no output, and "ls | sed -e#", where they
+	behave like cat.  This implementation behaves like cat in both cases.
+
+15.	The POSIX requirement to open all w files at the beginning makes
+	sed behave nonintuitively when the w commands are preceded by
+	addresses or are within conditional blocks.  This implementation
+	follows historic practice and POSIX, by default, and provides the
+	-a option which opens the files only when they are needed.
+
+16.	POSIX does not specify how escape sequences other than \n and \D
+	(where D is the delimiter character) are to be treated.  This is
+	reasonable, however, it also doesn't state that the backslash is
+	to be discarded from the output regardless.  A strict reading of
+	POSIX would be that "echo xyz | sed s/./\a" would display "\ayz".
+	As historic sed implementations always discarded the backslash,
+	this implementation does as well.
+
+17.	POSIX specifies that an address can be "empty".  This implies
+	that constructs like ",d" or "1,d" and ",5d" are allowed.  This
+	is not true for historic implementations or this implementation
+	of sed.
+
+18.	The b t and : commands are documented in POSIX to ignore leading
+	white space, but no mention is made of trailing white space.
+	Historic implementations of sed assigned different locations to
+	the labels "x" and "x ".  This is not useful, and leads to subtle
+	programming errors, but it is historic practice and changing it
+	could theoretically break working scripts.  This implementation
+	follows historic practice.
+
+19.	Although POSIX specifies that reading from files that do not exist
+	from within the script must not terminate the script, it does not
+	specify what happens if a write command fails.  Historic practice
+	is to fail immediately if the file cannot be opened or written.
+	This implementation follows historic practice.
+
+20.	Historic practice is that the \n construct can be used for either
+	string1 or string2 of the y command.  This is not specified by
+	POSIX.  This implementation follows historic practice.
+
+21.	POSIX does not specify if the "Nth occurrence" of an RE in a
+	substitute command is an overlapping or a non-overlapping one,
+	i.e. what is the result of s/a*/A/2 on the pattern "aaaaa aaaaa".
+	Historical practice is to drop core or only do non-overlapping
+	RE's.  This implementation only does non-overlapping RE's.
+
+22.	Historic implementations of sed ignore the RE delimiter characters
+	within character classes.  This is not specified in POSIX.  This
+	implementation follows historic practice.
+
+23.	Historic implementations handle empty RE's in a special way: the
+	empty RE is interpreted as if it were the last RE encountered,
+	whether in an address or elsewhere.  POSIX does not document this
+	behavior.  For example the command:
+
+		sed -e /abc/s//XXX/
+
+	substitutes XXX for the pattern abc.  The semantics of "the last
+	RE" can be defined in two different ways:
+
+	1. The last RE encountered when compiling (lexical/static scope).
+	2. The last RE encountered while running (dynamic scope).
+
+	While many historical implementations fail on programs depending
+	on scope differences, the SunOS version exhibited dynamic scope
+	behaviour.  This implementation does dynamic scoping, as this seems
+	the most useful and in order to remain consistent with historical
+	practice.
--- a/usr.bin/sed/compile.c
+++ b/usr.bin/sed/compile.c
@ -0,0 +1,714 @@
+/*-
+ * Copyright (c) 1992 Diomidis Spinellis.
+ * Copyright (c) 1992 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Diomidis Spinellis of Imperial College, University of London.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef lint
+static char sccsid[] = "@(#)compile.c	5.6 (Berkeley) 11/2/92";
+#endif /* not lint */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "defs.h"
+#include "extern.h"
+
+static char	 *compile_addr __P((char *, struct s_addr *));
+static char	 *compile_delimited __P((char *, char *));
+static char	 *compile_flags __P((char *, struct s_subst *));
+static char	 *compile_re __P((char *, regex_t **));
+static char	 *compile_subst __P((char *, struct s_subst *));
+static char	 *compile_text __P((void));
+static char	 *compile_tr __P((char *, char **));
+static struct s_command
+		**compile_stream __P((char *, struct s_command **, char *));
+static char	 *duptoeol __P((char *));
+static struct s_command
+		 *findlabel __P((struct s_command *, struct s_command *));
+static void	  fixuplabel __P((struct s_command *, struct s_command *,
+		  	struct s_command *));
+
+/*
+ * Command specification.  This is used to drive the command parser.
+ */
+struct s_format {
+	char code;				/* Command code */
+	int naddr;				/* Number of address args */
+	enum e_args args;			/* Argument type */
+};
+
+static struct s_format cmd_fmts[] = {
+	{'{', 2, GROUP},
+	{'a', 1, TEXT},
+	{'b', 2, BRANCH},
+	{'c', 2, TEXT},
+	{'d', 2, EMPTY},
+	{'D', 2, EMPTY},
+	{'g', 2, EMPTY},
+	{'G', 2, EMPTY},
+	{'h', 2, EMPTY},
+	{'H', 2, EMPTY},
+	{'i', 1, TEXT},
+	{'l', 2, EMPTY},
+	{'n', 2, EMPTY},
+	{'N', 2, EMPTY},
+	{'p', 2, EMPTY},
+	{'P', 2, EMPTY},
+	{'q', 1, EMPTY},
+	{'r', 1, RFILE},
+	{'s', 2, SUBST},
+	{'t', 2, BRANCH},
+	{'w', 2, WFILE},
+	{'x', 2, EMPTY},
+	{'y', 2, TR},
+	{'!', 2, NONSEL},
+	{':', 0, LABEL},
+	{'#', 0, COMMENT},
+	{'=', 1, EMPTY},
+	{'\0', 0, COMMENT},
+};
+
+/* The compiled program. */
+struct s_command *prog;
+
+/*
+ * Compile the program into prog.
+ * Initialise appends.
+ */
+void
+compile()
+{
+	*compile_stream(NULL, &prog, NULL) = NULL;
+	fixuplabel(prog, prog, NULL);
+	appends = xmalloc(sizeof(struct s_appends) * appendnum);
+	match = xmalloc((maxnsub + 1) * sizeof(regmatch_t));
+}
+
+#define EATSPACE() do {							\
+	if (p)								\
+		while (*p && isascii(*p) && isspace(*p))		\
+			p++;						\
+	} while (0)
+
+static struct s_command **
+compile_stream(terminator, link, p)
+	char *terminator;
+	struct s_command **link;
+	register char *p;
+{
+	static char lbuf[_POSIX2_LINE_MAX + 1];	/* To save stack */
+	struct s_command *cmd, *cmd2;
+	struct s_format *fp;
+	int naddr;				/* Number of addresses */
+
+	if (p != NULL)
+		goto semicolon;
+	for (;;) {
+		if ((p = cu_fgets(lbuf, sizeof(lbuf))) == NULL) {
+			if (terminator != NULL)
+				err(COMPILE, "unexpected EOF (pending }'s)");
+			return (link);
+		}
+
+semicolon:	EATSPACE();
+		if (p && (*p == '#' || *p == '\0'))
+			continue;
+		if (*p == '}') {
+			if (terminator == NULL)
+				err(COMPILE, "unexpected }");
+			return (link);
+		}
+		*link = cmd = xmalloc(sizeof(struct s_command));
+		link = &cmd->next;
+		cmd->nonsel = cmd->inrange = 0;
+		/* First parse the addresses */
+		naddr = 0;
+		cmd->a1 = cmd->a2 = NULL;
+
+/* Valid characters to start an address */
+#define	addrchar(c)	(strchr("0123456789/\\$", (c)))
+		if (addrchar(*p)) {
+			naddr++;
+			cmd->a1 = xmalloc(sizeof(struct s_addr));
+			p = compile_addr(p, cmd->a1);
+			EATSPACE();				/* EXTENSION */
+			if (*p == ',') {
+				naddr++;
+				p++;
+				EATSPACE();			/* EXTENSION */
+				cmd->a2 = xmalloc(sizeof(struct s_addr));
+				p = compile_addr(p, cmd->a2);
+			}
+		}
+
+nonsel:		/* Now parse the command */
+		EATSPACE();
+		if (!*p)
+			err(COMPILE, "command expected");
+		cmd->code = *p;
+		for (fp = cmd_fmts; fp->code; fp++)
+			if (fp->code == *p)
+				break;
+		if (!fp->code)
+			err(COMPILE, "invalid command code %c", *p);
+		if (naddr > fp->naddr)
+			err(COMPILE,
+"command %c expects up to %d address(es), found %d", *p, fp->naddr, naddr);
+		switch (fp->args) {
+		case NONSEL:			/* ! */
+			cmd->nonsel = ! cmd->nonsel;
+			p++;
+			goto nonsel;
+		case GROUP:			/* { */
+			p++;
+			EATSPACE();
+			if (!*p)
+				p = NULL;
+			cmd2 = xmalloc(sizeof(struct s_command));
+			cmd2->code = '}';
+			*compile_stream("}", &cmd->u.c, p) = cmd2;
+			cmd->next = cmd2;
+			link = &cmd2->next;
+			break;
+		case EMPTY:		/* d D g G h H l n N p P q x = \0 */
+			p++;
+			EATSPACE();
+			if (*p == ';') {
+				p++;
+				link = &cmd->next;
+				goto semicolon;
+			}
+			if (*p)
+				err(COMPILE,
+"extra characters at the end of %c command", cmd->code);
+			break;
+		case TEXT:			/* a c i */
+			p++;
+			EATSPACE();
+			if (*p != '\\')
+				err(COMPILE,
+"command %c expects \\ followed by text", cmd->code);
+			p++;
+			EATSPACE();
+			if (*p)
+				err(COMPILE,
+"extra characters after \\ at the end of %c command", cmd->code);
+			cmd->t = compile_text();
+			break;
+		case COMMENT:			/* \0 # */
+			break;
+		case WFILE:			/* w */
+			p++;
+			EATSPACE();
+			if (*p == '\0')
+				err(COMPILE, "filename expected");
+			cmd->t = duptoeol(p);
+			if (aflag)
+				cmd->u.fd = -1;
+			else if ((cmd->u.fd = open(p, 
+			    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
+			    DEFFILEMODE)) == -1)
+				err(FATAL, "%s: %s\n", p, strerror(errno));
+			break;
+		case RFILE:			/* r */
+			p++;
+			EATSPACE();
+			if (*p == '\0')
+				err(COMPILE, "filename expected");
+			else
+				cmd->t = duptoeol(p);
+			break;
+		case BRANCH:			/* b t */
+			p++;
+			EATSPACE();
+			if (*p == '\0')
+				cmd->t = NULL;
+			else
+				cmd->t = duptoeol(p);
+			break;
+		case LABEL:			/* : */
+			p++;
+			EATSPACE();
+			cmd->t = duptoeol(p);
+			if (strlen(p) == 0)
+				err(COMPILE, "empty label");
+			break;
+		case SUBST:			/* s */
+			p++;
+			if (*p == '\0' || *p == '\\')
+				err(COMPILE,
+"substitute pattern can not be delimited by newline or backslash");
+			cmd->u.s = xmalloc(sizeof(struct s_subst));
+			p = compile_re(p, &cmd->u.s->re);
+			if (p == NULL)
+				err(COMPILE, "unterminated substitute pattern");
+			--p;
+			p = compile_subst(p, cmd->u.s);
+			p = compile_flags(p, cmd->u.s);
+			EATSPACE();
+			if (*p == ';') {
+				p++;
+				link = &cmd->next;
+				goto semicolon;
+			}
+			break;
+		case TR:			/* y */
+			p++;
+			p = compile_tr(p, (char **)&cmd->u.y);
+			EATSPACE();
+			if (*p == ';') {
+				p++;
+				link = &cmd->next;
+				goto semicolon;
+			}
+			if (*p)
+				err(COMPILE,
+"extra text at the end of a transform command");
+			break;
+		}
+	}
+}
+
+/*
+ * Get a delimited string.  P points to the delimeter of the string; d points
+ * to a buffer area.  Newline and delimiter escapes are processed; other
+ * escapes are ignored.
+ *
+ * Returns a pointer to the first character after the final delimiter or NULL
+ * in the case of a non-terminated string.  The character array d is filled
+ * with the processed string.
+ */
+static char *
+compile_delimited(p, d)
+	char *p, *d;
+{
+	char c;
+
+	c = *p++;
+	if (c == '\0')
+		return (NULL);
+	else if (c == '\\')
+		err(COMPILE, "\\ can not be used as a string delimiter");
+	else if (c == '\n')
+		err(COMPILE, "newline can not be used as a string delimiter");
+	while (*p) {
+		if (*p == '\\' && p[1] == c)
+			p++;
+		else if (*p == '\\' && p[1] == 'n') {
+			*d++ = '\n';
+			p += 2;
+			continue;
+		} else if (*p == '\\' && p[1] == '\\')
+			*d++ = *p++;
+		else if (*p == c) {
+			*d = '\0';
+			return (p + 1);
+		}
+		*d++ = *p++;
+	}
+	return (NULL);
+}
+
+/*
+ * Get a regular expression.  P points to the delimiter of the regular
+ * expression; repp points to the address of a regexp pointer.  Newline
+ * and delimiter escapes are processed; other escapes are ignored.
+ * Returns a pointer to the first character after the final delimiter
+ * or NULL in the case of a non terminated regular expression.  The regexp
+ * pointer is set to the compiled regular expression.
+ * Cflags are passed to regcomp.
+ */
+static char *
+compile_re(p, repp)
+	char *p;
+	regex_t **repp;
+{
+	int eval;
+	char re[_POSIX2_LINE_MAX + 1];
+
+	p = compile_delimited(p, re);
+	if (p && strlen(re) == 0) {
+		*repp = NULL;
+		return (p);
+	}
+	*repp = xmalloc(sizeof(regex_t));
+#ifdef GNU_REGEX
+	/* initialize pattern buffer */
+	(*repp)->buffer = NULL;
+	(*repp)->allocated = 0L;
+	(*repp)->fastmap = (char *) malloc(FASTMAP_SIZE);
+	(*repp)->translate = 0;
+#endif
+	if (p && (eval = regcomp(*repp, re, 0)) != 0)
+		err(COMPILE, "RE error: %s", strregerror(eval, *repp));
+	if (maxnsub < (*repp)->re_nsub)
+		maxnsub = (*repp)->re_nsub;
+	return (p);
+}
+
+/*
+ * Compile the substitution string of a regular expression and set res to
+ * point to a saved copy of it.  Nsub is the number of parenthesized regular
+ * expressions.
+ */
+static char *
+compile_subst(p, s)
+	char *p;
+	struct s_subst *s;
+{
+	static char lbuf[_POSIX2_LINE_MAX + 1];
+	int asize, ref, size;
+	char c, *text, *op, *sp;
+
+	c = *p++;			/* Terminator character */
+	if (c == '\0')
+		return (NULL);
+
+	s->maxbref = 0;
+	s->linenum = linenum;
+	asize = 2 * _POSIX2_LINE_MAX + 1;
+	text = xmalloc(asize);
+	size = 0;
+	do {
+		op = sp = text + size;
+		for (; *p; p++) {
+			if (*p == '\\') {
+				p++;
+				if (strchr("123456789", *p) != NULL) {
+					*sp++ = '\\';
+					ref = *p - '0';
+					if (s->re != NULL &&
+					    ref > s->re->re_nsub)
+						err(COMPILE,
+"\\%c not defined in the RE", *p);
+					if (s->maxbref < ref)
+						s->maxbref = ref;
+				} else if (*p == '&' || *p == '\\')
+					*sp++ = '\\';
+			} else if (*p == c) {
+				p++;
+				*sp++ = '\0';
+				size += sp - op;
+				s->new = xrealloc(text, size);
+				return (p);
+			} else if (*p == '\n') {
+				err(COMPILE,
+"unescaped newline inside substitute pattern");
+				/* NOTREACHED */
+			}
+			*sp++ = *p;
+		}
+		size += sp - op;
+		if (asize - size < _POSIX2_LINE_MAX + 1) {
+			asize *= 2;
+			text = xmalloc(asize);
+		}
+	} while (cu_fgets(p = lbuf, sizeof(lbuf)));
+	err(COMPILE, "unterminated substitute in regular expression");
+	/* NOTREACHED */
+}
+
+/*
+ * Compile the flags of the s command
+ */
+static char *
+compile_flags(p, s)
+	char *p;
+	struct s_subst *s;
+{
+	int gn;			/* True if we have seen g or n */
+	char wfile[_POSIX2_LINE_MAX + 1], *q;
+
+	s->n = 1;				/* Default */
+	s->p = 0;
+	s->wfile = NULL;
+	s->wfd = -1;
+	for (gn = 0;;) {
+		EATSPACE();			/* EXTENSION */
+		switch (*p) {
+		case 'g':
+			if (gn)
+				err(COMPILE,
+"more than one number or 'g' in substitute flags");
+			gn = 1;
+			s->n = 0;
+			break;
+		case '\0':
+		case '\n':
+		case ';':
+			return (p);
+		case 'p':
+			s->p = 1;
+			break;
+		case '1': case '2': case '3':
+		case '4': case '5': case '6':
+		case '7': case '8': case '9':
+			if (gn)
+				err(COMPILE,
+"more than one number or 'g' in substitute flags");
+			gn = 1;
+			/* XXX Check for overflow */
+			s->n = (int)strtol(p, &p, 10);
+			break;
+		case 'w':
+			p++;
+#ifdef HISTORIC_PRACTICE
+			if (*p != ' ') {
+				err(WARNING, "space missing before w wfile");
+				return (p);
+			}
+#endif
+			EATSPACE();
+			q = wfile;
+			while (*p) {
+				if (*p == '\n')
+					break;
+				*q++ = *p++;
+			}
+			*q = '\0';
+			if (q == wfile)
+				err(COMPILE, "no wfile specified");
+			s->wfile = strdup(wfile);
+			if (!aflag && (s->wfd = open(wfile,
+			    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
+			    DEFFILEMODE)) == -1)
+				err(FATAL, "%s: %s\n", wfile, strerror(errno));
+			return (p);
+		default:
+			err(COMPILE,
+			    "bad flag in substitute command: '%c'", *p);
+			break;
+		}
+		p++;
+	}
+}
+
+/*
+ * Compile a translation set of strings into a lookup table.
+ */
+static char *
+compile_tr(p, transtab)
+	char *p;
+	char **transtab;
+{
+	int i;
+	char *lt, *op, *np;
+	char old[_POSIX2_LINE_MAX + 1];
+	char new[_POSIX2_LINE_MAX + 1];
+
+	if (*p == '\0' || *p == '\\')
+		err(COMPILE,
+"transform pattern can not be delimited by newline or backslash");
+	p = compile_delimited(p, old);
+	if (p == NULL) {
+		err(COMPILE, "unterminated transform source string");
+		return (NULL);
+	}
+	p = compile_delimited(--p, new);
+	if (p == NULL) {
+		err(COMPILE, "unterminated transform target string");
+		return (NULL);
+	}
+	EATSPACE();
+	if (strlen(new) != strlen(old)) {
+		err(COMPILE, "transform strings are not the same length");
+		return (NULL);
+	}
+	/* We assume characters are 8 bits */
+	lt = xmalloc(UCHAR_MAX);
+	for (i = 0; i <= UCHAR_MAX; i++)
+		lt[i] = (char)i;
+	for (op = old, np = new; *op; op++, np++)
+		lt[(u_char)*op] = *np;
+	*transtab = lt;
+	return (p);
+}
+
+/*
+ * Compile the text following an a or i command.
+ */
+static char *
+compile_text()
+{
+	int asize, size;
+	char *text, *p, *op, *s;
+	char lbuf[_POSIX2_LINE_MAX + 1];
+
+	asize = 2 * _POSIX2_LINE_MAX + 1;
+	text = xmalloc(asize);
+	size = 0;
+	while (cu_fgets(lbuf, sizeof(lbuf))) {
+		op = s = text + size;
+		p = lbuf;
+		EATSPACE();
+		for (; *p; p++) {
+			if (*p == '\\')
+				p++;
+			*s++ = *p;
+		}
+		size += s - op;
+		if (p[-2] != '\\') {
+			*s = '\0';
+			break;
+		}
+		if (asize - size < _POSIX2_LINE_MAX + 1) {
+			asize *= 2;
+			text = xmalloc(asize);
+		}
+	}
+	return (xrealloc(text, size + 1));
+}
+
+/*
+ * Get an address and return a pointer to the first character after
+ * it.  Fill the structure pointed to according to the address.
+ */
+static char *
+compile_addr(p, a)
+	char *p;
+	struct s_addr *a;
+{
+	char *end;
+
+	switch (*p) {
+	case '\\':				/* Context address */
+		++p;
+		/* FALLTHROUGH */
+	case '/':				/* Context address */
+		p = compile_re(p, &a->u.r);
+		if (p == NULL)
+			err(COMPILE, "unterminated regular expression");
+		a->type = AT_RE;
+		return (p);
+
+	case '$':				/* Last line */
+		a->type = AT_LAST;
+		return (p + 1);
+						/* Line number */
+	case '0': case '1': case '2': case '3': case '4': 
+	case '5': case '6': case '7': case '8': case '9':
+		a->type = AT_LINE;
+		a->u.l = strtol(p, &end, 10);
+		return (end);
+	default:
+		err(COMPILE, "expected context address");
+		return (NULL);
+	}
+}
+
+/*
+ * Return a copy of all the characters up to \n or \0
+ */
+static char *
+duptoeol(s)
+	register char *s;
+{
+	size_t len;
+	char *start;
+
+	for (start = s; *s != '\0' && *s != '\n'; ++s);
+	*s = '\0';
+	len = s - start + 1;
+	return (memmove(xmalloc(len), start, len));
+}
+
+/*
+ * Find the label contained in the command l in the command linked list cp.
+ * L is excluded from the search.  Return NULL if not found.
+ */
+static struct s_command *
+findlabel(l, cp)
+	struct s_command *l, *cp;
+{
+	struct s_command *r;
+
+	for (; cp; cp = cp->next)
+		if (cp->code == ':' && cp != l && strcmp(l->t, cp->t) == 0)
+			return (cp);
+		else if (cp->code == '{' && (r = findlabel(l, cp->u.c)))
+			return (r);
+	return (NULL);
+}
+
+/*
+ * Convert goto label names to addresses.
+ * Detect duplicate labels.
+ * Set appendnum to the number of a and r commands in the script.
+ * Free the memory used by labels in b and t commands (but not by :)
+ * Root is a pointer to the script linked list; cp points to the
+ * search start.
+ * TODO: Remove } nodes
+ */
+static void
+fixuplabel(root, cp, end)
+	struct s_command *root, *cp, *end;
+{
+	struct s_command *cp2;
+
+	for (; cp != end; cp = cp->next)
+		switch (cp->code) {
+		case ':':
+			if (findlabel(cp, root))
+				err(COMPILE2, "duplicate label %s", cp->t);
+			break;
+		case 'a':
+		case 'r':
+			appendnum++;
+			break;
+		case 'b':
+		case 't':
+			if (cp->t == NULL) {
+				cp->u.c = NULL;
+				break;
+			}
+			if ((cp2 = findlabel(cp, root)) == NULL)
+				err(COMPILE2, "undefined label '%s'", cp->t);
+			free(cp->t);
+			cp->u.c = cp2;
+			break;
+		case '{':
+			fixuplabel(root, cp->u.c, cp->next);
+			break;
+		}
+}
--- a/usr.bin/sed/defs.h
+++ b/usr.bin/sed/defs.h
@ -0,0 +1,148 @@
+/*-
+ * Copyright (c) 1992 Diomidis Spinellis.
+ * Copyright (c) 1992 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Diomidis Spinellis of Imperial College, University of London.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)defs.h	5.3 (Berkeley) 8/28/92
+ */
+
+/*
+ * Types of address specifications
+ */
+enum e_atype {
+	AT_RE,					/* Line that match RE */
+	AT_LINE,				/* Specific line */
+	AT_LAST,				/* Last line */
+};
+
+/*
+ * Format of an address
+ */
+struct s_addr {
+	enum e_atype type;			/* Address type */
+	union {
+		u_long l;			/* Line number */
+		regex_t *r;			/* Regular expression */
+	} u;
+};
+
+/*
+ * Substitution command
+ */
+struct s_subst {
+	int n;					/* Occurrence to subst. */
+	int p;					/* True if p flag */
+	char *wfile;				/* NULL if no wfile */
+	int wfd;				/* Cached file descriptor */
+	regex_t *re;				/* Regular expression */
+	int maxbref;				/* Largest backreference. */
+	u_long linenum;				/* Line number. */
+	char *new;				/* Replacement text */
+};
+
+
+/*
+ * An internally compiled command.
+ * Initialy, label references are stored in u.t, on a second pass they
+ * are updated to pointers.
+ */
+struct s_command {
+	struct s_command *next;			/* Pointer to next command */
+	struct s_addr *a1, *a2;			/* Start and end address */
+	char *t;				/* Text for : a c i r w */
+	union {
+		struct s_command *c;		/* Command(s) for b t { */
+		struct s_subst *s;		/* Substitute command */
+		u_char *y;			/* Replace command array */
+		int fd;				/* File descriptor for w */
+	} u;
+	char code;				/* Command code */
+	u_int nonsel:1;				/* True if ! */
+	u_int inrange:1;			/* True if in range */
+};
+
+/*
+ * Types of command arguments recognised by the parser
+ */
+enum e_args {
+	EMPTY,			/* d D g G h H l n N p P q x = \0 */
+	TEXT,			/* a c i */
+	NONSEL,			/* ! */
+	GROUP,			/* { */
+	COMMENT,		/* # */
+	BRANCH,			/* b t */
+	LABEL,			/* : */
+	RFILE,			/* r */
+	WFILE,			/* w */
+	SUBST,			/* s */
+	TR			/* y */
+};
+
+/*
+ * Structure containing things to append before a line is read
+ */
+struct s_appends {
+	enum {AP_STRING, AP_FILE} type;
+	char *s;
+};
+
+enum e_spflag {
+	APPEND,					/* Append to the contents. */
+	APPENDNL,				/* Append, with newline. */
+	REPLACE,				/* Replace the contents. */
+};
+
+/*
+ * Structure for a space (process, hold, otherwise).
+ */
+typedef struct {
+	char *space;		/* Current space pointer. */
+	size_t len;		/* Current length. */
+	int deleted;		/* If deleted. */
+	char *back;		/* Backing memory. */
+	size_t blen;		/* Backing memory length. */
+} SPACE;
+
+/*
+ * Error severity codes:
+ */
+#define	FATAL		0	/* Exit immediately with 1 */
+#define	ERROR		1	/* Continue, but change exit value */
+#define	WARNING		2	/* Just print the warning */
+#define	COMPILE		3	/* Print error, count and finish script */
+#define	COMPILE2	3	/* Print error, count and finish script */
+
+#ifdef GNU_REGEX
+# define FASTMAP_SIZE 256	/* size of fastmap for ASCII char set */
+#endif
--- a/usr.bin/sed/extern.h
+++ b/usr.bin/sed/extern.h
@ -0,0 +1,59 @@
+/*-
+ * Copyright (c) 1992 Diomidis Spinellis.
+ * Copyright (c) 1992 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Diomidis Spinellis of Imperial College, University of London.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)extern.h	5.5 (Berkeley) 8/30/92
+ */
+
+extern struct s_command *prog;
+extern struct s_appends *appends;
+extern regmatch_t *match;
+extern size_t maxnsub;
+extern u_long linenum;
+extern int appendnum;
+extern int lastline;
+extern int aflag, eflag, nflag;
+extern char *fname;
+
+void	 compile __P((void));
+char	*cu_fgets __P((char *, int));
+void	 err __P((int, const char *, ...));
+int	 mf_fgets __P((SPACE *, enum e_spflag));
+void	 process __P((void));
+char	*strregerror __P((int, regex_t *));
+void	*xmalloc __P((u_int));
+void	*xrealloc __P((void *, u_int));
+void	 cfclose __P((struct s_command *, struct s_command *));
+void	 cspace __P((SPACE *, char *, size_t, enum e_spflag));
--- a/usr.bin/sed/hanoi.sed
+++ b/usr.bin/sed/hanoi.sed
@ -0,0 +1,102 @@
+# Towers of Hanoi in sed.
+#
+#	@(#)hanoi.sed	5.1 (Berkeley) 10/10/90
+#
+#
+# Ex:
+# Run "sed -f hanoi.sed", and enter:
+#
+#	:abcd: : :<CR><CR>
+#
+# note -- TWO carriage returns, a peculiarity of sed), this will output the
+# sequence of states involved in moving 4 rings, the largest called "a" and
+# the smallest called "d", from the first to the second of three towers, so
+# that the rings on any tower at any time are in descending order of size.
+# You can start with a different arrangement and a different number of rings,
+# say :ce:b:ax: and it will give the shortest procedure for moving them all
+# to the middle tower.  The rules are: the names of the rings must all be
+# lower-case letters, they must be input within 3 fields (representing the
+# towers) and delimited by 4 colons, such that the letters within each field
+# are in alphabetical order (i.e. rings are in descending order of size).
+#
+# For the benefit of anyone who wants to figure out the script, an "internal"
+# line of the form
+#		b:0abx:1a2b3 :2   :3x2
+# has the following meaning: the material after the three markers :1, :2,
+# and :3 represents the three towers; in this case the current set-up is
+# ":ab :   :x  :".  The numbers after a, b and x in these fields indicate
+# that the next time it gets a chance, it will move a to tower 2, move b
+# to tower 3, and move x to tower 2.  The string after :0 just keeps track
+# of the alphabetical order of the names of the rings.  The b at the
+# beginning means that it is now dealing with ring b (either about to move
+# it, or re-evaluating where it should next be moved to).
+#
+# Although this version is "limited" to 26 rings because of the size of the
+# alphabet, one could write a script using the same idea in which the rings
+# were represented by arbitrary [strings][within][brackets], and in place of
+# the built-in line of the script giving the order of the letters of the
+# alphabet, it would accept from the user a line giving the ordering to be
+# assumed, e.g. [ucbvax][decvax][hplabs][foo][bar].
+#
+#			George Bergman
+#			Math, UC Berkeley 94720 USA
+
+# cleaning, diagnostics
+s/  *//g
+/^$/d
+/[^a-z:]/{a\
+Illegal characters: use only a-z and ":".  Try again.
+d
+}
+/^:[a-z]*:[a-z]*:[a-z]*:$/!{a\
+Incorrect format: use\
+\	: string1 : string2 : string3 :<CR><CR>\
+Try again.
+d
+}
+/\([a-z]\).*\1/{a\
+Repeated letters not allowed.  Try again.
+d
+}
+# initial formatting
+h
+s/[a-z]/ /g
+G
+s/^:\( *\):\( *\):\( *\):\n:\([a-z]*\):\([a-z]*\):\([a-z]*\):$/:1\4\2\3:2\5\1\3:3\6\1\2:0/
+s/[a-z]/&2/g
+s/^/abcdefghijklmnopqrstuvwxyz/
+:a
+s/^\(.\).*\1.*/&\1/
+s/.//
+/^[^:]/ba
+s/\([^0]*\)\(:0.*\)/\2\1:/
+s/^[^0]*0\(.\)/\1&/
+:b
+# outputting current state without markers
+h
+s/.*:1/:/
+s/[123]//gp
+g
+:c
+# establishing destinations
+/^\(.\).*\1:1/td
+/^\(.\).*:1[^:]*\11/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\31/
+/^\(.\).*:1[^:]*\12/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\33/
+/^\(.\).*:1[^:]*\13/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\32/
+/^\(.\).*:2[^:]*\11/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\33/
+/^\(.\).*:2[^:]*\12/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\32/
+/^\(.\).*:2[^:]*\13/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\31/
+/^\(.\).*:3[^:]*\11/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\32/
+/^\(.\).*:3[^:]*\12/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\31/
+/^\(.\).*:3[^:]*\13/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\33/
+bc
+# iterate back to find smallest out-of-place ring
+:d
+s/^\(.\)\(:0[^:]*\([^:]\)\1.*:\([123]\)[^:]*\1\)\4/\3\2\4/
+td
+# move said ring (right, resp. left)
+s/^\(.\)\(.*\)\1\([23]\)\(.*:\3[^ ]*\) /\1\2 \4\1\3/
+s/^\(.\)\(.*:\([12]\)[^ ]*\) \(.*\)\1\3/\1\2\1\3\4 /
+tb
+s/.*/Done!  Try another, or end with ^D./p
+d
--- a/usr.bin/sed/main.c
+++ b/usr.bin/sed/main.c
@ -0,0 +1,352 @@
+/*-
+ * Copyright (c) 1992 Diomidis Spinellis.
+ * Copyright (c) 1992 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Diomidis Spinellis of Imperial College, University of London.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef lint
+char copyright[] =
+"@(#) Copyright (c) 1992 The Regents of the University of California.\n\
+ All rights reserved.\n";
+#endif /* not lint */
+
+#ifndef lint
+static char sccsid[] = "@(#)main.c	5.6 (Berkeley) 8/30/92";
+#endif /* not lint */
+
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <regex.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "defs.h"
+#include "extern.h"
+
+/*
+ * Linked list of units (strings and files) to be compiled
+ */
+struct s_compunit {
+	struct s_compunit *next;
+	enum e_cut {CU_FILE, CU_STRING} type;
+	char *s;			/* Pointer to string or fname */
+};
+
+/*
+ * Linked list pointer to compilation units and pointer to current
+ * next pointer.
+ */
+static struct s_compunit *script, **cu_nextp = &script;
+
+/*
+ * Linked list of files to be processed
+ */
+struct s_flist {
+	char *fname;
+	struct s_flist *next;
+};
+
+/*
+ * Linked list pointer to files and pointer to current
+ * next pointer.
+ */
+static struct s_flist *files, **fl_nextp = &files;
+
+int aflag, eflag, nflag;
+
+/*
+ * Current file and line number; line numbers restart across compilation
+ * units, but span across input files.
+ */
+char *fname;			/* File name. */
+u_long linenum;
+int lastline;			/* TRUE on the last line of the last file */
+
+static void add_compunit __P((enum e_cut, char *));
+static void add_file __P((char *));
+
+int
+main(argc, argv)
+	int argc;
+	char *argv[];
+{
+	int c, fflag;
+
+	fflag = 0;
+	while ((c = getopt(argc, argv, "ae:f:n")) != EOF)
+		switch (c) {
+		case 'a':
+			aflag = 1;
+			break;
+		case 'e':
+			eflag = 1;
+			add_compunit(CU_STRING, optarg);
+			break;
+		case 'f':
+			fflag = 1;
+			add_compunit(CU_FILE, optarg);
+			break;
+		case 'n':
+			nflag = 1;
+			break;
+		default:
+		case '?':
+			(void)fprintf(stderr,
+"usage:\tsed script [-an] [file ...]\n\tsed [-an] [-e script] ... [-f scipt_file] ... [file ...]\n");
+			exit(1);
+		}
+	argc -= optind;
+	argv += optind;
+
+	/* First usage case; script is the first arg */
+	if (!eflag && !fflag && *argv) {
+		add_compunit(CU_STRING, *argv);
+		argv++;
+	}
+
+	compile();
+
+	/* Continue with first and start second usage */
+	if (*argv)
+		for (; *argv; argv++)
+			add_file(*argv);
+	else
+		add_file(NULL);
+	process();
+	cfclose(prog, NULL);
+	if (fclose(stdout))
+		err(FATAL, "stdout: %s", strerror(errno));
+	exit (0);
+}
+
+/*
+ * Like fgets, but go through the chain of compilation units chaining them
+ * together.  Empty strings and files are ignored.
+ */
+char *
+cu_fgets(buf, n)
+	char *buf;
+	int n;
+{
+	static enum {ST_EOF, ST_FILE, ST_STRING} state = ST_EOF;
+	static FILE *f;		/* Current open file */
+	static char *s;		/* Current pointer inside string */
+	static char string_ident[30];
+	char *p;
+
+again:
+	switch (state) {
+	case ST_EOF:
+		if (script == NULL)
+			return (NULL);
+		linenum = 0;
+		switch (script->type) {
+		case CU_FILE:
+			if ((f = fopen(script->s, "r")) == NULL)
+				err(FATAL,
+				    "%s: %s", script->s, strerror(errno));
+			fname = script->s;
+			state = ST_FILE;
+			goto again;
+		case CU_STRING:
+			if ((snprintf(string_ident,
+			    sizeof(string_ident), "\"%s\"", script->s)) >=
+			    sizeof(string_ident) - 1)
+				(void)strcpy(string_ident +
+				    sizeof(string_ident) - 6, " ...\"");
+			fname = string_ident;
+			s = script->s;
+			state = ST_STRING;
+			goto again;
+		}
+	case ST_FILE:
+		if ((p = fgets(buf, n, f)) != NULL) {
+			linenum++;
+			if (linenum == 1 && buf[0] == '#' && buf[1] == 'n')
+				nflag = 1;
+			return (p);
+		}
+		script = script->next;
+		(void)fclose(f);
+		state = ST_EOF;
+		goto again;
+	case ST_STRING:
+		if (linenum == 0 && s[0] == '#' && s[1] == 'n')
+			nflag = 1;
+		p = buf;
+		for (;;) {
+			if (n-- <= 1) {
+				*p = '\0';
+				linenum++;
+				return (buf);
+			}
+			switch (*s) {
+			case '\0':
+				state = ST_EOF;
+				if (s == script->s) {
+					script = script->next;
+					goto again;
+				} else {
+					script = script->next;
+					*p = '\0';
+					linenum++;
+					return (buf);
+				}
+			case '\n':
+				*p++ = '\n';
+				*p = '\0';
+				s++;
+				linenum++;
+				return (buf);
+			default:
+				*p++ = *s++;
+			}
+		}
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Like fgets, but go through the list of files chaining them together.
+ * Set len to the length of the line.
+ */
+int
+mf_fgets(sp, spflag)
+	SPACE *sp;
+	enum e_spflag spflag;
+{
+	static FILE *f;		/* Current open file */
+	size_t len;
+	char c, *p;
+
+	if (f == NULL)
+		/* Advance to first non-empty file */
+		for (;;) {
+			if (files == NULL) {
+				lastline = 1;
+				return (0);
+			}
+			if (files->fname == NULL) {
+				f = stdin;
+				fname = "stdin";
+			} else {
+				fname = files->fname;
+				if ((f = fopen(fname, "r")) == NULL)
+					err(FATAL, "%s: %s",
+					    fname, strerror(errno));
+			}
+			if ((c = getc(f)) != EOF) {
+				(void)ungetc(c, f);
+				break;
+			}
+			(void)fclose(f);
+			files = files->next;
+		}
+
+	if (lastline) {
+		sp->len = 0;
+		return (0);
+	}
+
+	/*
+	 * Use fgetline so that we can handle essentially infinite input
+	 * data.  Can't use the pointer into the stdio buffer as the process
+	 * space because the ungetc() can cause it to move.
+	 */
+	p = fgetline(f, &len);
+	if (ferror(f))
+		err(FATAL, "%s: %s", fname, strerror(errno ? errno : EIO));
+	cspace(sp, p, len, spflag);
+
+	linenum++;
+	/* Advance to next non-empty file */
+	while ((c = getc(f)) == EOF) {
+		(void)fclose(f);
+		files = files->next;
+		if (files == NULL) {
+			lastline = 1;
+			return (1);
+		}
+		if (files->fname == NULL) {
+			f = stdin;
+			fname = "stdin";
+		} else {
+			fname = files->fname;
+			if ((f = fopen(fname, "r")) == NULL)
+				err(FATAL, "%s: %s", fname, strerror(errno));
+		}
+	}
+	(void)ungetc(c, f);
+	return (1);
+}
+
+/*
+ * Add a compilation unit to the linked list
+ */
+static void
+add_compunit(type, s)
+	enum e_cut type;
+	char *s;
+{
+	struct s_compunit *cu;
+
+	cu = xmalloc(sizeof(struct s_compunit));
+	cu->type = type;
+	cu->s = s;
+	cu->next = NULL;
+	*cu_nextp = cu;
+	cu_nextp = &cu->next;
+}
+
+/*
+ * Add a file to the linked list
+ */
+static void
+add_file(s)
+	char *s;
+{
+	struct s_flist *fp;
+
+	fp = xmalloc(sizeof(struct s_flist));
+	fp->next = NULL;
+	*fl_nextp = fp;
+	fp->fname = s;
+	fl_nextp = &fp->next;
+}
--- a/usr.bin/sed/math.sed
+++ b/usr.bin/sed/math.sed
@ -0,0 +1,163 @@
+#
+#	@(#)math.sed	5.1 (Berkeley) 2/20/91
+#
+# Addition and multiplication in sed.
+# ++ for a limited time only do (expr) too!!!
+#
+# Kevin S Braunsdorf, PUCC UNIX Group, ksb@cc.purdue.edu.
+#
+# Ex:
+#	echo "4+7*3" | sed -f %f
+
+# make sure the expression is well formed
+s/[ 	]//g
+/[+*\/-]$/{
+	a\
+	poorly formed expression, operator on the end
+	q
+}
+/^[+*\/]/{
+	a\
+	poorly formed expression, leading operator
+	q
+}
+
+# fill hold space with done token
+x
+s/^.*/done/
+x
+
+# main loop, process operators (*, + and () )
+: loop
+/^\+/{
+	s///
+	b loop
+}
+/^\(.*\)(\([^)]*\))\(.*\)$/{
+	H
+	s//\2/
+	x
+	s/^\(.*\)\n\(.*\)(\([^()]*\))\(.*\)$/()\2@\4@\1/
+	x
+	b loop
+}
+/^[0-9]*\*/b mul
+/^\([0-9]*\)\+\([0-9+*]*\*[0-9]*\)$/{
+	s//\2+\1/
+	b loop
+}
+/^[0-9]*\+/{
+	s/$/=/
+	b add
+}
+x
+/^done$/{
+	x
+	p
+	d
+}
+/^()/{
+	s///
+	x
+	G
+	s/\(.*\)\n\([^@]*\)@\([^@]*\)@\(.*\)/\2\1\3/
+	x
+	s/[^@]*@[^@]*@\(.*\)/\1/
+	x
+	b loop
+}
+i\
+help, stack problem
+p
+x
+p
+q
+
+# turn mul into add until 1*x -> x
+: mul
+/^0*1\*/{
+	s///
+	b loop
+}
+/^\([0-9]*\)0\*/{
+	s/^\([0-9]*\)0\*\([0-9]*\)/\1*\20/
+	b mul
+}
+s/^\([0-9]*\)1\*/\10*/
+s/^\([0-9]*\)2\*/\11*/
+s/^\([0-9]*\)3\*/\12*/
+s/^\([0-9]*\)4\*/\13*/
+s/^\([0-9]*\)5\*/\14*/
+s/^\([0-9]*\)6\*/\15*/
+s/^\([0-9]*\)7\*/\16*/
+s/^\([0-9]*\)8\*/\17*/
+s/^\([0-9]*\)9\*/\18*/
+s/\*\([0-9*]*\)/*\1+\1/
+b mul
+
+# get rid of a plus term until 0+x -> x
+: add
+/^\+\([0-9+*]*\)=/{
+	s//\1/
+	b loop
+}
+/^\([0-9*]*\)\+=/{
+	s//\1/
+	b loop
+}
+/^\([0-9]*\)\+\([0-9*+]*\)\+=/{
+	s//\2+\1/
+	b loop
+}
+/^\([0-9]*\)0\+\([0-9]*\)\([0-9]\)=/{
+	s//\1+\2=\3/
+	b add
+}
+/^\([0-9]*\)\([0-9]\)\+\([0-9]*\)0=/{
+	s//\1+\3=\2/
+	b add
+}
+/^\([0-9]*\)0\+\([0-9*+]*\)\+\([0-9]*\)\([0-9]\)=/{
+	s//\1+\2+\3=\4/
+	b add
+}
+/^\([0-9]*\)\([0-9]\)\+\([0-9*+]*\)\+\([0-9]*\)0=/{
+	s//\1+\3+\4=\2/
+	b add
+}
+s/^\([0-9]*\)1\+/\10+/
+s/^\([0-9]*\)2\+/\11+/
+s/^\([0-9]*\)3\+/\12+/
+s/^\([0-9]*\)4\+/\13+/
+s/^\([0-9]*\)5\+/\14+/
+s/^\([0-9]*\)6\+/\15+/
+s/^\([0-9]*\)7\+/\16+/
+s/^\([0-9]*\)8\+/\17+/
+s/^\([0-9]*\)9\+/\18+/
+
+s/9=\([0-9]*\)$/_=\1/
+s/8=\([0-9]*\)$/9=\1/
+s/7=\([0-9]*\)$/8=\1/
+s/6=\([0-9]*\)$/7=\1/
+s/5=\([0-9]*\)$/6=\1/
+s/4=\([0-9]*\)$/5=\1/
+s/3=\([0-9]*\)$/4=\1/
+s/2=\([0-9]*\)$/3=\1/
+s/1=\([0-9]*\)$/2=\1/
+/_/{
+	s//_0/
+	: inc
+	s/9_/_0/
+	s/8_/9/
+	s/7_/8/
+	s/6_/7/
+	s/5_/6/
+	s/4_/5/
+	s/3_/4/
+	s/2_/3/
+	s/1_/2/
+	s/0_/1/
+	s/\+_/+1/
+	/_/b inc
+}
+b add
--- a/usr.bin/sed/misc.c
+++ b/usr.bin/sed/misc.c
@ -0,0 +1,141 @@
+/*-
+ * Copyright (c) 1992 Diomidis Spinellis.
+ * Copyright (c) 1992 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Diomidis Spinellis of Imperial College, University of London.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef lint
+static char sccsid[] = "@(#)misc.c	5.3 (Berkeley) 8/26/92";
+#endif /* not lint */
+
+#include <sys/types.h>
+
+#include <errno.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "defs.h"
+#include "extern.h"
+
+/*
+ * malloc with result test
+ */
+void *
+xmalloc(size)
+	u_int size;
+{
+	void *p;
+
+	if ((p = malloc(size)) == NULL)
+		err(FATAL, "%s", strerror(errno));
+	return (p);
+}
+
+/*
+ * realloc with result test
+ */
+void *
+xrealloc(p, size)
+	void *p;
+	u_int size;
+{
+	if (p == NULL)			/* Compatibility hack. */
+		return (xmalloc(size));
+
+	if ((p = realloc(p, size)) == NULL)
+		err(FATAL, "%s", strerror(errno));
+	return (p);
+}
+
+/*
+ * Return a string for a regular expression error passed.  This is a overkill,
+ * because of the silly semantics of regerror (we can never know the size of
+ * the buffer).
+ */
+char *
+strregerror(errcode, preg)
+	int errcode;
+	regex_t *preg;
+{
+	static char *oe;
+	size_t s;
+
+	if (oe != NULL)
+		free(oe);
+	s = regerror(errcode, preg, "", 0);
+	oe = xmalloc(s);
+	(void)regerror(errcode, preg, oe, s);
+	return (oe);
+}
+
+#if __STDC__
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+/*
+ * Error reporting function
+ */
+void
+#if __STDC__
+err(int severity, const char *fmt, ...)
+#else
+err(severity, fmt, va_alist)
+	int severity;
+	char *fmt;
+        va_dcl
+#endif
+{
+	va_list ap;
+#if __STDC__
+	va_start(ap, fmt);
+#else
+	va_start(ap);
+#endif
+	(void)fprintf(stderr, "sed: ");
+	switch (severity) {
+	case WARNING:
+	case COMPILE:
+		(void)fprintf(stderr, "%lu: %s: ", linenum, fname);
+	}
+	(void)vfprintf(stderr, fmt, ap);
+	va_end(ap);
+	(void)fprintf(stderr, "\n");
+	if (severity == WARNING)
+		return;
+	exit(1);
+	/* NOTREACHED */
+}
--- a/usr.bin/sed/process.c
+++ b/usr.bin/sed/process.c
@ -0,0 +1,607 @@
+/*-
+ * Copyright (c) 1992 Diomidis Spinellis.
+ * Copyright (c) 1992 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Diomidis Spinellis of Imperial College, University of London.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef lint
+static char sccsid[] = "@(#)process.c	5.10 (Berkeley) 12/2/92";
+#endif /* not lint */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/uio.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "defs.h"
+#include "extern.h"
+
+static SPACE HS, PS, SS;
+#define	pd		PS.deleted
+#define	ps		PS.space
+#define	psl		PS.len
+#define	hs		HS.space
+#define	hsl		HS.len
+
+static inline int	 applies __P((struct s_command *));
+static void		 flush_appends __P((void));
+static void		 lputs __P((char *));
+static inline int	 regexec_e __P((regex_t *, const char *, int, int));
+static void		 regsub __P((SPACE *, char *, char *));
+static int		 substitute __P((struct s_command *));
+
+struct s_appends *appends;	/* Array of pointers to strings to append. */
+static int appendx;		/* Index into appends array. */
+int appendnum;			/* Size of appends array. */
+
+static int lastaddr;		/* Set by applies if last address of a range. */
+static int sdone;		/* If any substitutes since last line input. */
+				/* Iov structure for 'w' commands. */
+static struct iovec iov[2] = { NULL, 0, "\n", 1 };
+
+static regex_t *defpreg;
+size_t maxnsub;
+regmatch_t *match;
+
+void
+process()
+{
+	struct s_command *cp;
+	SPACE tspace;
+	size_t len;
+	int r;
+	char oldc, *p;
+
+	for (linenum = 0; mf_fgets(&PS, REPLACE);) {
+		pd = 0;
+		cp = prog;
+redirect:
+		while (cp != NULL) {
+			if (!applies(cp)) {
+				cp = cp->next;
+				continue;
+			}
+			switch (cp->code) {
+			case '{':
+				cp = cp->u.c;
+				goto redirect;
+			case 'a':
+				if (appendx >= appendnum)
+					appends = xrealloc(appends,
+					    sizeof(struct s_appends) *
+					    (appendnum *= 2));
+				appends[appendx].type = AP_STRING;
+				appends[appendx].s = cp->t;
+				appendx++;
+				break;
+			case 'b':
+				cp = cp->u.c;
+				goto redirect;
+			case 'c':
+				pd = 1;
+				psl = 0;
+				if (cp->a2 == NULL || lastaddr)
+					(void)printf("%s", cp->t);
+				break;
+			case 'd':
+				pd = 1;
+				goto new;
+			case 'D':
+				if (pd)
+					goto new;
+				if ((p = strchr(ps, '\n')) == NULL)
+					pd = 1;
+				else {
+					psl -= (p - ps) - 1;
+					memmove(ps, p + 1, psl);
+				}
+				goto new;
+			case 'g':
+				cspace(&PS, hs, hsl, REPLACE);
+				break;
+			case 'G':
+				cspace(&PS, hs, hsl, APPENDNL);
+				break;
+			case 'h':
+				cspace(&HS, ps, psl, REPLACE);
+				break;
+			case 'H':
+				cspace(&HS, ps, psl, APPENDNL);
+				break;
+			case 'i':
+				(void)printf("%s", cp->t);
+				break;
+			case 'l':
+				lputs(ps);
+				break;
+			case 'n':
+				if (!nflag && !pd)
+					(void)printf("%s\n", ps);
+				flush_appends();
+				r = mf_fgets(&PS, REPLACE);
+#ifdef HISTORIC_PRACTICE
+				if (!r)
+					exit(0);
+#endif
+				pd = 0;
+				break;
+			case 'N':
+				flush_appends();
+				if (!mf_fgets(&PS, APPENDNL)) {
+					if (!nflag && !pd)
+						(void)printf("%s\n", ps);
+					exit(0);
+				}
+				break;
+			case 'p':
+				if (pd)
+					break;
+				(void)printf("%s\n", ps);
+				break;
+			case 'P':
+				if (pd)
+					break;
+				if ((p = strchr(ps, '\n')) != NULL) {
+					oldc = *p;
+					*p = '\0';
+				}
+				(void)printf("%s\n", ps);
+				if (p != NULL)
+					*p = oldc;
+				break;
+			case 'q':
+				if (!nflag && !pd)
+					(void)printf("%s\n", ps);
+				flush_appends();
+				exit(0);
+			case 'r':
+				if (appendx >= appendnum)
+					appends = xrealloc(appends,
+					    sizeof(struct s_appends) *
+					    (appendnum *= 2));
+				appends[appendx].type = AP_FILE;
+				appends[appendx].s = cp->t;
+				appendx++;
+				break;
+			case 's':
+				sdone |= substitute(cp);
+				break;
+			case 't':
+				if (sdone) {
+					sdone = 0;
+					cp = cp->u.c;
+					goto redirect;
+				}
+				break;
+			case 'w':
+				if (pd)
+					break;
+				if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
+				    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
+				    DEFFILEMODE)) == -1)
+					err(FATAL, "%s: %s\n",
+					    cp->t, strerror(errno));
+				iov[0].iov_base = ps;
+				iov[0].iov_len = psl;
+				if (writev(cp->u.fd, iov, 2) != psl + 1)
+					err(FATAL, "%s: %s\n",
+					    cp->t, strerror(errno));
+				break;
+			case 'x':
+				tspace = PS;
+				PS = HS;
+				HS = tspace;
+				break;
+			case 'y':
+				if (pd)
+					break;
+				for (p = ps, len = psl; len--; ++p)
+					*p = cp->u.y[*p];
+				break;
+			case ':':
+			case '}':
+				break;
+			case '=':
+				(void)printf("%lu\n", linenum);
+			}
+			cp = cp->next;
+		} /* for all cp */
+
+new:		if (!nflag && !pd)
+			(void)printf("%s\n", ps);
+		flush_appends();
+	} /* for all lines */
+}
+
+/*
+ * TRUE if the address passed matches the current program state
+ * (lastline, linenumber, ps).
+ */
+#define	MATCH(a)							\
+	(a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1) :		\
+	    (a)->type == AT_LINE ? linenum == (a)->u.l : lastline
+
+/*
+ * Return TRUE if the command applies to the current line.  Sets the inrange
+ * flag to process ranges.  Interprets the non-select (``!'') flag.
+ */
+static inline int
+applies(cp)
+	struct s_command *cp;
+{
+	int r;
+
+	lastaddr = 0;
+	if (cp->a1 == NULL && cp->a2 == NULL)
+		r = 1;
+	else if (cp->a2)
+		if (cp->inrange) {
+			if (MATCH(cp->a2)) {
+				cp->inrange = 0;
+				lastaddr = 1;
+			}
+			r = 1;
+		} else if (MATCH(cp->a1)) {
+			/*
+			 * If the second address is a number less than or
+			 * equal to the line number first selected, only
+			 * one line shall be selected.
+			 *	-- POSIX 1003.2
+			 */
+			if (cp->a2->type == AT_LINE &&
+			    linenum >= cp->a2->u.l)
+				lastaddr = 1;
+			else
+				cp->inrange = 1;
+			r = 1;
+		} else
+			r = 0;
+	else
+		r = MATCH(cp->a1);
+	return (cp->nonsel ? ! r : r);
+}
+
+/*
+ * substitute --
+ *	Do substitutions in the pattern space.  Currently, we build a
+ *	copy of the new pattern space in the substitute space structure
+ *	and then swap them.
+ */
+static int
+substitute(cp)
+	struct s_command *cp;
+{
+	SPACE tspace;
+	regex_t *re;
+	size_t re_off;
+	int n;
+	char *s;
+
+	s = ps;
+	re = cp->u.s->re;
+	if (re == NULL) {
+		if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
+			linenum = cp->u.s->linenum;
+			err(COMPILE, "\\%d not defined in the RE",
+			    cp->u.s->maxbref);
+		}
+	}
+	if (!regexec_e(re, s, 0, 0))
+		return (0);
+
+	SS.len = 0;				/* Clean substitute space. */
+	n = cp->u.s->n;
+	switch (n) {
+	case 0:					/* Global */
+		do {
+			/* Locate start of replaced string. */
+			re_off = match[0].rm_so;
+			/* Copy leading retained string. */
+			cspace(&SS, s, re_off, APPEND);
+			/* Add in regular expression. */
+			regsub(&SS, s, cp->u.s->new);
+			/* Move past this match. */
+			s += match[0].rm_eo;
+		} while(regexec_e(re, s, REG_NOTBOL, 0));
+		/* Copy trailing retained string. */
+		cspace(&SS, s, strlen(s), APPEND);
+		break;
+	default:				/* Nth occurrence */
+		while (--n) {
+			s += match[0].rm_eo;
+			if (!regexec_e(re, s, REG_NOTBOL, 0))
+				return (0);
+		}
+		/* FALLTHROUGH */
+	case 1:					/* 1st occurrence */
+		/* Locate start of replaced string. */
+		re_off = match[0].rm_so + (s - ps);
+		/* Copy leading retained string. */
+		cspace(&SS, ps, re_off, APPEND);
+		/* Add in regular expression. */
+		regsub(&SS, s, cp->u.s->new);
+		/* Copy trailing retained string. */
+		s += match[0].rm_eo;
+		cspace(&SS, s, strlen(s), APPEND);
+		break;
+	}
+
+	/*
+	 * Swap the substitute space and the pattern space, and make sure
+	 * that any leftover pointers into stdio memory get lost.
+	 */
+	tspace = PS;
+	PS = SS;
+	SS = tspace;
+	SS.space = SS.back;
+
+	/* Handle the 'p' flag. */
+	if (cp->u.s->p)
+		(void)printf("%s\n", ps);
+
+	/* Handle the 'w' flag. */
+	if (cp->u.s->wfile && !pd) {
+		if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
+		    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
+			err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
+		iov[0].iov_base = ps;
+		iov[0].iov_len = psl;	
+		if (writev(cp->u.s->wfd, iov, 2) != psl + 1)
+			err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
+	}
+	return (1);
+}
+
+/*
+ * Flush append requests.  Always called before reading a line,
+ * therefore it also resets the substitution done (sdone) flag.
+ */
+static void
+flush_appends()
+{
+	FILE *f;
+	int count, i;
+	char buf[8 * 1024];
+
+	for (i = 0; i < appendx; i++) 
+		switch (appends[i].type) {
+		case AP_STRING:
+			(void)printf("%s", appends[i].s);
+			break;
+		case AP_FILE:
+			/*
+			 * Read files probably shouldn't be cached.  Since
+			 * it's not an error to read a non-existent file,
+			 * it's possible that another program is interacting
+			 * with the sed script through the file system.  It
+			 * would be truly bizarre, but possible.  It's probably
+			 * not that big a performance win, anyhow.
+			 */
+			if ((f = fopen(appends[i].s, "r")) == NULL)
+				break;
+			while (count = fread(buf, 1, sizeof(buf), f))
+				(void)fwrite(buf, 1, count, stdout);
+			(void)fclose(f);
+			break;
+		}
+	if (ferror(stdout))
+		err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
+	appendx = sdone = 0;
+}
+
+static void
+lputs(s)
+	register char *s;
+{
+	register int count;
+	register char *escapes, *p;
+	struct winsize win;
+	static int termwidth = -1;
+
+	if (termwidth == -1)
+		if (p = getenv("COLUMNS"))
+			termwidth = atoi(p);
+		else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
+		    win.ws_col > 0)
+			termwidth = win.ws_col;
+		else
+			termwidth = 60;
+
+	for (count = 0; *s; ++s) { 
+		if (count >= termwidth) {
+			(void)printf("\\\n");
+			count = 0;
+		}
+		if (isascii(*s) && isprint(*s) && *s != '\\') {
+			(void)putchar(*s);
+			count++;
+		} else {
+			escapes = "\\\a\b\f\n\r\t\v";
+			(void)putchar('\\');
+			if (p = strchr(escapes, *s)) {
+				(void)putchar("\\abfnrtv"[p - escapes]);
+				count += 2;
+			} else {
+				(void)printf("%03o", (u_char)*s);
+				count += 4;
+			}
+		}
+	}
+	(void)putchar('$');
+	(void)putchar('\n');
+	if (ferror(stdout))
+		err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
+}
+
+static inline int
+regexec_e(preg, string, eflags, nomatch)
+	regex_t *preg;
+	const char *string;
+	int eflags, nomatch;
+{
+	int eval;
+
+	if (preg == NULL) {
+		if (defpreg == NULL)
+			err(FATAL, "first RE may not be empty");
+	} else
+		defpreg = preg;
+
+	eval = regexec(defpreg, string,
+	    nomatch ? 0 : maxnsub + 1, match, eflags);
+	switch(eval) {
+	case 0:
+		return (1);
+	case REG_NOMATCH:
+		return (0);
+	}
+	err(FATAL, "RE error: %s", strregerror(eval, defpreg));
+	/* NOTREACHED */
+}
+
+/*
+ * regsub - perform substitutions after a regexp match
+ * Based on a routine by Henry Spencer
+ */
+static void
+regsub(sp, string, src)
+	SPACE *sp;
+	char *string, *src;
+{
+	register int len, no;
+	register char c, *dst;
+
+#define	NEEDSP(reqlen)							\
+	if (sp->len >= sp->blen - (reqlen) - 1) {			\
+		sp->blen += (reqlen) + 1024;				\
+		sp->space = sp->back = xrealloc(sp->back, sp->blen);	\
+		dst = sp->space + sp->len;				\
+	}
+
+	dst = sp->space + sp->len;
+	while ((c = *src++) != '\0') {
+		if (c == '&')
+			no = 0;
+		else if (c == '\\' && isdigit(*src))
+			no = *src++ - '0';
+		else
+			no = -1;
+		if (no < 0) {		/* Ordinary character. */
+ 			if (c == '\\' && (*src == '\\' || *src == '&'))
+ 				c = *src++;
+			NEEDSP(1);
+ 			*dst++ = c;
+			++sp->len;
+ 		} else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
+			len = match[no].rm_eo - match[no].rm_so;
+			NEEDSP(len);
+			memmove(dst, string + match[no].rm_so, len);
+			dst += len;
+			sp->len += len;
+		}
+	}
+	NEEDSP(1);
+	*dst = '\0';
+}
+
+/*
+ * aspace --
+ *	Append the source space to the destination space, allocating new
+ *	space as necessary.
+ */
+void
+cspace(sp, p, len, spflag)
+	SPACE *sp;
+	char *p;
+	size_t len;
+	enum e_spflag spflag;
+{
+	size_t tlen;
+
+	/*
+	 * Make sure SPACE has enough memory and ramp up quickly.  Appends
+	 * need two extra bytes, one for the newline, one for a terminating
+	 * NULL.
+	 */
+	tlen = sp->len + len + spflag == APPENDNL ? 2 : 1;
+	if (tlen > sp->blen) {
+		sp->blen = tlen + 1024;
+		sp->space = sp->back = xrealloc(sp->back, sp->blen);
+	}
+
+	if (spflag == APPENDNL)
+		sp->space[sp->len++] = '\n';
+	else if (spflag == REPLACE)
+		sp->len = 0;
+
+	memmove(sp->space + sp->len, p, len);
+	sp->space[sp->len += len] = '\0';
+}
+
+/*
+ * Close all cached opened files and report any errors
+ */
+void
+cfclose(cp, end)
+	register struct s_command *cp, *end;
+{
+
+	for (; cp != end; cp = cp->next)
+		switch(cp->code) {
+		case 's':
+			if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
+				err(FATAL,
+				    "%s: %s", cp->u.s->wfile, strerror(errno));
+			cp->u.s->wfd = -1;
+			break;
+		case 'w':
+			if (cp->u.fd != -1 && close(cp->u.fd))
+				err(FATAL, "%s: %s", cp->t, strerror(errno));
+			cp->u.fd = -1;
+			break;
+		case '{':
+			cfclose(cp->u.c, cp->next);
+			break;
+		}
+}
--- a/usr.bin/sed/sed.0
+++ b/usr.bin/sed/sed.0
@ -0,0 +1,330 @@
+SED(1)                       UNIX Reference Manual                      SED(1)
+
+NNAAMMEE
+     sseedd - stream editor
+
+SSYYNNOOPPSSIISS
+     sseedd [--aann] _c_o_m_m_a_n_d [_f_i_l_e ...]
+     sseedd [--aann] [--ee _c_o_m_m_a_n_d] [--ff _c_o_m_m_a_n_d__f_i_l_e] [_f_i_l_e ...]
+
+DDEESSCCRRIIPPTTIIOONN
+     The sseedd utility reads the specified files, or the standard input if no
+     files are specified, modifying the input as specified by a list of com-
+     mands.  The input is then written to the standard output.
+
+     A single command may be specified as the first argument to sseedd. Multiple
+     commands may be specified by using the --ee or --ff options.  All commands
+     are applied to the input in the order they are specified regardless of
+     their origin.
+
+     The following options are available:
+
+     --aa      The files listed as parameters for the ``w'' functions are creat-
+             ed (or truncated) before any processing begins, by default.  The
+             --aa option causes sseedd to delay opening each file until a command
+             containing the related ``w'' function is applied to a line of in-
+             put.
+
+     --ee _c_o_m_m_a_n_d
+             Append the editing commands specified by the _c_o_m_m_a_n_d argument to
+             the list of commands.
+
+     --ff _c_o_m_m_a_n_d__f_i_l_e
+             Append the editing commands found in the file _c_o_m_m_a_n_d__f_i_l_e to the
+             list of commands.  The editing commands should each be listed on
+             a separate line.
+
+     --nn      By default, each line of input is echoed to the standard output
+             after all of the commands have been applied to it.  The --nn option
+             suppresses this behavior.
+
+     The form of a sseedd command is as follows:
+
+           [address[,address]]function[arguments]
+
+     Whitespace may be inserted before the first address and the function por-
+     tions of the command.
+
+     Normally, sseedd cyclically copies a line of input, not including its termi-
+     nating newline character, into a _p_a_t_t_e_r_n _s_p_a_c_e, (unless there is some-
+     thing left after a ``D'' function), applies all of the commands with ad-
+     dresses that select that pattern space, copies the pattern space to the
+     standard output, appending a newline, and deletes the pattern space.
+
+     Some of the functions use a _h_o_l_d _s_p_a_c_e to save all or part of the pattern
+     space for subsequent retrieval.
+
+SSeedd AAddddrreesssseess
+     An address is not required, but if specified must be a number (that
+     counts input lines cumulatively across input files), a dollar (``$'')
+     character that addresses the last line of input, or a context address
+     (which consists of a regular expression preceded and followed by a delim-
+     iter).
+
+     A command line with no addresses selects every pattern space.
+
+
+     A command line with one address selects all of the pattern spaces that
+     match the address.
+
+     A command line with two addresses selects the inclusive range from the
+     first pattern space that matches the first address through the next pat-
+     tern space that matches the second.  (If the second address is a number
+     less than or equal to the line number first selected, only that line is
+     selected.)  Starting at the first line following the selected range, sseedd
+     starts looking again for the first address.
+
+     Editing commands can be applied to non-selected pattern spaces by use of
+     the exclamation character (``!'') function.
+
+SSeedd RReegguullaarr EExxpprreessssiioonnss
+     The sseedd regular expressions are basic regular expressions (BRE's, see
+     regex(3) for more information).  In addition, sseedd has the following two
+     additions to BRE's:
+
+     1.   In a context address, any character other than a backslash (``\'')
+          or newline character may be used to delimit the regular expression.
+          Also, putting a backslash character before the delimiting character
+          causes the character to be treated literally.  For example, in the
+          context address \xabc\xdefx, the RE delimiter is an ``x'' and the
+          second ``x'' stands for itself, so that the regular expression is
+          ``abcxdef''.
+
+     2.   The escape sequence \n matches a newline character embedded in the
+          pattern space.  You can't, however, use a literal newline character
+          in an address or in the substitute command.
+
+     One special feature of sseedd regular expressions is that they can default
+     to the last regular expression used.  If a regular expression is empty,
+     i.e. just the delimiter characters are specified, the last regular ex-
+     pression encountered is used instead.  The last regular expression is de-
+     fined as the last regular expression used as part of an address or sub-
+     stitute command, and at run-time, not compile-time.  For example, the
+     command ``/abc/s//XXX/'' will substitute ``XXX'' for the pattern ``abc''.
+
+SSeedd FFuunnccttiioonnss
+     In the following list of commands, the maximum number of permissible ad-
+     dresses for each command is indicated by [0addr], [1addr], or [2addr],
+     representing zero, one, or two addresses.
+
+     The argument _t_e_x_t consists of one or more lines.  To embed a newline in
+     the text, precede it with a backslash.  Other backslashes in text are
+     deleted and the following character taken literally.
+
+     The ``r'' and ``w'' functions take an optional file parameter, which
+     should be separated from the function letter by white space.  Each file
+     given as an argument to sseedd is created (or its contents truncated) before
+     any input processing begins.
+
+     The ``b'', ``r'', ``s'', ``t'', ``w'', ``y'', ``!'', and ``:'' functions
+     all accept additional arguments.  The following synopses indicate which
+     arguments have to be separated from the function letters by white space
+     characters.
+
+     Two of the functions take a function-list.  This is a list of sseedd func-
+     tions separated by newlines, as follows:
+
+           { function
+             function
+             ...
+             function
+           }
+
+     The ``{'' can be preceded by white space and can be followed by white
+     space.  The function can be preceded by white space.  The terminating
+     ``}'' must be preceded by a newline an optional white space.
+
+     [2addr] function-list
+             Execute function-list only when the pattern space is selected.
+
+     [1addr]a\
+     text
+             Write _t_e_x_t to standard output immediately before each attempt to
+             read a line of input, whether by executing the ``N'' function or
+             by beginning a new cycle.
+
+     [2addr]b[lable]
+             Branch to the ``:'' function with the specified label.  If the
+             label is not specified, branch to the end of the script.
+
+     [2addr]c\
+     text
+             Delete the pattern space.  With 0 or 1 address or at the end of a
+             2-address range, _t_e_x_t is written to the standard output.
+
+     [2addr]d
+             Delete the pattern space and start the next cycle.
+
+     [2addr]D
+             Delete the initial segment of the pattern space through the first
+             newline character and start the next cycle.
+
+     [2addr]g
+             Replace the contents of the pattern space with the contents of
+             the hold space.
+
+     [2addr]G
+             Append a newline character followed by the contents of the hold
+             space to the pattern space.
+
+     [2addr]h
+             Replace the contents of the hold space with the contents of the
+             pattern space.
+
+     [2addr]H
+             Append a newline character followed by the contents of the pat-
+             tern space to the hold space.
+
+     [1addr]i\
+     text
+             Write _t_e_x_t to the standard output.
+
+     [2addr]l
+             (The letter ell.)  Write the pattern space to the standard output
+             in a visually unambiguous form.  This form is as follows:
+
+                   backslash          \
+                   alert              \a
+                   form-feed          \f
+                   newline            \n
+                   carriage-return    \r
+                   tab                \t
+                   vertical tab       \v
+
+             Nonprintable characters are written as three-digit octal numbers
+             (with a preceding backslash) for each byte in the character (most
+             significant byte first).  Long lines are folded, with the point
+             of folding indicated by displaying a backslash followed by a new-
+             line.  The end of each line is marked with a ``$''.
+
+     [2addr]n
+             Write the pattern space to the standard output if the default
+             output has not been suppressed, and replace the pattern space
+             with the next line of input.
+
+     [2addr]N
+             Append the next line of input to the pattern space, using an em-
+             bedded newline character to separate the appended material from
+             the original contents.  Note that the current line number
+             changes.
+
+     [2addr]p
+             Write the pattern space to standard output.
+
+     [2addr]P
+             Write the pattern space, up to the first newline character to the
+             standard output.
+
+     [1addr]q
+             Branch to the end of the script and quit without starting a new
+             cycle.
+
+     [1addr]r file
+             Copy the contents of _f_i_l_e to the standard output immediately be-
+             fore the next attempt to read a line of input.  If _f_i_l_e cannot be
+             read for any reason, it is silently ignored and no error condi-
+             tion is set.
+
+     [2addr]s/regular expression/replacement/flags
+             Substitute the replacement string for the first instance of the
+             regular expression in the pattern space.  Any character other
+             than backslash or newline can be used instead of a slash to de-
+             limit the RE and the replacement.  Within the RE and the replace-
+             ment, the RE delimiter itself can be used as a literal character
+             if it is preceded by a backslash.
+
+             An ampersand (``&'') appearing in the replacement is replaced by
+             the string matching the RE.  The special meaning of ``&'' in this
+             context can be suppressed by preceding it by backslash.  The
+             string ``\#'', where ``#'' is a digit, is replaced by the text
+             matched by the corresponding backreference expression (see
+             re_format(7)).
+
+             A line can be split by substituting a newline character into it.
+             To specify a newline character in the replacement string, precede
+             it with a backslash.
+
+             The value of _f_l_a_g_s in the substitute function is zero or more of
+             the following:
+
+                   0 ... 9
+                           Make the substitution only for the N'th occurrence
+                           of the regular expression in the pattern space.
+
+                   g       Make the substitution for all non-overlapping
+                           matches of the regular expression, not just the
+                           first one.
+
+                   p       Write the pattern space to standard output if a re-
+                           placement was made.  If the replacement string is
+                           identical to that which it replaces, it is still
+                           considered to have been a replacement.
+
+                   w _f_i_l_e  Append the pattern space to _f_i_l_e if a replacement
+                           was made.  If the replacement string is identical
+                           to that which it replaces, it is still considered
+                           to have been a replacement.
+
+     [2addr]t [label]
+             Branch to the ``'': function bearing the label if any substitu-
+             tions have been made since the most recent reading of an input
+             line or execution of a ``t'' function.  If no label is specified,
+             branch to the end of the script.
+
+     [2addr]w _f_i_l_e
+             Append the pattern space to the _f_i_l_e.
+
+     [2addr]x
+             Swap the contents of the pattern and hold spaces.
+
+     [2addr]y/string1/string2/
+             Replace all occurrences of characters in _s_t_r_i_n_g_1 in the pattern
+             space with the corresponding characters from _s_t_r_i_n_g_2. Any charac-
+             ter other than a backslash or newline can be used instead of a
+             slash to delimit the strings.  Within _s_t_r_i_n_g_1 and _s_t_r_i_n_g_2, the
+             delimiter itself can be used as a literal character if it is pre-
+             ceded by a backslash.
+
+     [2addr]!function
+     [2addr]!function-list
+             Apply the function or function-list only to the lines that are
+             _n_o_t selected by the address(es).
+
+     [0addr]:label
+             This function does nothing; it bears a label to which the ``b''
+             and ``t'' commands may branch.
+
+     [1addr]=
+             Write the line number to the standard output followed by a new-
+             line character.
+
+     [0addr]
+             Empty lines are ignored.
+
+     [0addr]#
+             The ``#'' and the remainder of the line are ignored (treated as a
+             comment), with the single exception that if the first two charac-
+             ters in the file are ``#n'', the default output is suppressed.
+             This is the same as specifying the --nn option on the command line.
+
+     The sseedd utility exits 0 on success and >0 if an error occurs.
+
+SSEEEE AALLSSOO
+     awk(1),  ed(1),  grep(1),  regex(3),  re_format(7)
+
+HHIISSTTOORRYY
+     A sseedd command appeared in Version 7 AT&T UNIX.
+
+SSTTAANNDDAARRDDSS
+     The sseedd function is expected to be a superset of the IEEE Std1003.2
+     (``POSIX'') specification.
+
+BSD Experimental                August 24, 1992                              6
+
+
+
+
+
+
+
+
--- a/usr.bin/sed/sed.1
+++ b/usr.bin/sed/sed.1
@ -0,0 +1,513 @@
+.\" Copyright (c) 1992 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" This code is derived from software contributed to Berkeley by
+.\" the Institute of Electrical and Electronics Engineers, Inc.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\"    must display the following acknowledgement:
+.\"	This product includes software developed by the University of
+.\"	California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\"    may be used to endorse or promote products derived from this software
+.\"    without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\"	@(#)sed.1	5.2 (Berkeley) 8/24/92
+.\"
+.Dd "August 24, 1992"
+.Dt SED 1
+.Os
+.Sh NAME
+.Nm sed
+.Nd stream editor
+.Sh SYNOPSIS
+.Nm sed
+.Op Fl an
+.Ar command
+.Op Ar file ...
+.Nm sed
+.Op Fl an
+.Op Fl e Ar command
+.Op Fl f Ar command_file
+.Op Ar file ...
+.Sh DESCRIPTION
+The
+.Nm sed
+utility reads the specified files, or the standard input if no files
+are specified, modifying the input as specified by a list of commands.
+The input is then written to the standard output.
+.Pp
+A single command may be specified as the first argument to
+.Nm sed .
+Multiple commands may be specified by using the
+.Fl e
+or
+.Fl f
+options.
+All commands are applied to the input in the order they are specified
+regardless of their origin.
+.Pp
+The following options are available:
+.Bl -tag -width indent
+.It Fl a
+The files listed as parameters for the
+.Dq w
+functions are created (or truncated) before any processing begins,
+by default.
+The
+.Fl a
+option causes
+.Nm sed
+to delay opening each file until a command containing the related
+.Dq w
+function is applied to a line of input.
+.It Fl e Ar command
+Append the editing commands specified by the
+.Ar command
+argument
+to the list of commands.
+.It Fl f Ar command_file
+Append the editing commands found in the file
+.Ar command_file
+to the list of commands.
+The editing commands should each be listed on a separate line.
+.It Fl n
+By default, each line of input is echoed to the standard output after
+all of the commands have been applied to it.
+The
+.Fl n
+option suppresses this behavior.
+.El
+.Pp
+The form of a
+.Nm sed
+command is as follows:
+.sp
+.Dl [address[,address]]function[arguments]
+.sp
+Whitespace may be inserted before the first address and the function
+portions of the command.
+.Pp
+Normally,
+.Nm sed
+cyclically copies a line of input, not including its terminating newline
+character, into a
+.Em "pattern space" ,
+(unless there is something left after a
+.Dq D
+function),
+applies all of the commands with addresses that select that pattern space,
+copies the pattern space to the standard output, appending a newline, and
+deletes the pattern space.
+.Pp
+Some of the functions use a
+.Em "hold space"
+to save all or part of the pattern space for subsequent retrieval.
+.Sh "Sed Addresses"
+An address is not required, but if specified must be a number (that counts
+input lines
+cumulatively across input files), a dollar
+.Po
+.Dq $
+.Pc
+character that addresses the last line of input, or a context address
+(which consists of a regular expression preceded and followed by a
+delimiter).
+.Pp
+A command line with no addresses selects every pattern space.
+.Pp
+A command line with one address selects all of the pattern spaces
+that match the address.
+.Pp
+A command line with two addresses selects the inclusive range from
+the first pattern space that matches the first address through the next
+pattern space that matches the second.
+(If the second address is a number less than or equal to the line number
+first selected, only that line is selected.)
+Starting at the first line following the selected range,
+.Nm sed
+starts looking again for the first address.
+.Pp
+Editing commands can be applied to non-selected pattern spaces by use
+of the exclamation character
+.Po
+.Dq !
+.Pc
+function.
+.Sh "Sed Regular Expressions"
+The
+.Nm sed
+regular expressions are basic regular expressions (BRE's, see
+.Xr regex 3
+for more information).
+In addition,
+.Nm sed
+has the following two additions to BRE's:
+.sp
+.Bl -enum -compact
+.It
+In a context address, any character other than a backslash
+.Po
+.Dq \e
+.Pc
+or newline character may be used to delimit the regular expression.
+Also, putting a backslash character before the delimiting character
+causes the character to be treated literally.
+For example, in the context address \exabc\exdefx, the RE delimiter
+is an
+.Dq x
+and the second
+.Dq x
+stands for itself, so that the regular expression is
+.Dq abcxdef .
+.sp
+.It
+The escape sequence \en matches a newline character embedded in the
+pattern space.
+You can't, however, use a literal newline character in an address or
+in the substitute command.
+.El
+.Pp
+One special feature of
+.Nm sed
+regular expressions is that they can default to the last regular
+expression used.
+If a regular expression is empty, i.e. just the delimiter characters
+are specified, the last regular expression encountered is used instead.
+The last regular expression is defined as the last regular expression
+used as part of an address or substitute command, and at run-time, not
+compile-time.
+For example, the command
+.Dq /abc/s//XXX/
+will substitute
+.Dq XXX
+for the pattern
+.Dq abc .
+.Sh "Sed Functions"
+In the following list of commands, the maximum number of permissible
+addresses for each command is indicated by [0addr], [1addr], or [2addr],
+representing zero, one, or two addresses.
+.Pp
+The argument
+.Em text
+consists of one or more lines.
+To embed a newline in the text, precede it with a backslash.
+Other backslashes in text are deleted and the following character
+taken literally.
+.Pp
+The
+.Dq r
+and
+.Dq w
+functions take an optional file parameter, which should be separated
+from the function letter by white space.
+Each file given as an argument to
+.Nm sed
+is created (or its contents truncated) before any input processing begins.
+.Pp
+The
+.Dq b ,
+.Dq r ,
+.Dq s ,
+.Dq t ,
+.Dq w ,
+.Dq y ,
+.Dq ! ,
+and
+.Dq \&:
+functions all accept additional arguments.
+The following synopses indicate which arguments have to be separated from
+the function letters by white space characters.
+.Pp
+Two of the functions take a function-list.
+This is a list of
+.Nm sed
+functions separated by newlines, as follows:
+.Bd -literal -offset indent
+{ function
+  function
+  ...
+  function
+}
+.Ed
+.Pp
+The
+.Dq {
+can be preceded by white space and can be followed by white space.
+The function can be preceded by white space.
+The terminating
+.Dq }
+must be preceded by a newline an optional white space.
+.sp
+.Bl -tag -width "XXXXXX" -compact
+.It [2addr] function-list
+Execute function-list only when the pattern space is selected.
+.sp
+.It [1addr]a\e
+.It text
+.br
+Write
+.Em text
+to standard output immediately before each attempt to read a line of input,
+whether by executing the
+.Dq N
+function or by beginning a new cycle.
+.sp
+.It [2addr]b[lable]
+Branch to the
+.Dq \&:
+function with the specified label.
+If the label is not specified, branch to the end of the script.
+.sp
+.It [2addr]c\e
+.It text
+.br
+Delete the pattern space.
+With 0 or 1 address or at the end of a 2-address range,
+.Em text
+is written to the standard output.
+.sp
+.It [2addr]d
+Delete the pattern space and start the next cycle.
+.sp
+.It [2addr]D
+Delete the initial segment of the pattern space through the first
+newline character and start the next cycle.
+.sp
+.It [2addr]g
+Replace the contents of the pattern space with the contents of the
+hold space.
+.sp
+.It [2addr]G
+Append a newline character followed by the contents of the hold space
+to the pattern space.
+.sp
+.It [2addr]h
+Replace the contents of the hold space with the contents of the
+pattern space.
+.sp
+.It [2addr]H
+Append a newline character followed by the contents of the pattern space
+to the hold space.
+.sp
+.It [1addr]i\e
+.It text
+.br
+Write
+.Em text
+to the standard output.
+.sp
+.It [2addr]l
+(The letter ell.)
+Write the pattern space to the standard output in a visually unambiguous
+form.
+This form is as follows:
+.sp
+.Bl -tag -width "carriage-returnXX" -offset indent -compact
+.It backslash
+\e
+.It alert
+\ea
+.It form-feed
+\ef
+.It newline
+\en
+.It carriage-return
+\er
+.It tab
+\et
+.It vertical tab
+\ev
+.El
+.Pp
+Nonprintable characters are written as three-digit octal numbers (with a
+preceding backslash) for each byte in the character (most significant byte
+first).
+Long lines are folded, with the point of folding indicated by displaying
+a backslash followed by a newline.
+The end of each line is marked with a
+.Dq $ .
+.sp
+.It [2addr]n
+Write the pattern space to the standard output if the default output has
+not been suppressed, and replace the pattern space with the next line of
+input.
+.sp
+.It [2addr]N
+Append the next line of input to the pattern space, using an embedded
+newline character to separate the appended material from the original
+contents.
+Note that the current line number changes.
+.sp
+.It [2addr]p
+Write the pattern space to standard output.
+.sp
+.It [2addr]P
+Write the pattern space, up to the first newline character to the
+standard output.
+.sp
+.It [1addr]q
+Branch to the end of the script and quit without starting a new cycle.
+.sp
+.It [1addr]r file
+Copy the contents of
+.Em file
+to the standard output immediately before the next attempt to read a
+line of input.
+If
+.Em file
+cannot be read for any reason, it is silently ignored and no error
+condition is set.
+.sp
+.It [2addr]s/regular expression/replacement/flags
+Substitute the replacement string for the first instance of the regular
+expression in the pattern space.
+Any character other than backslash or newline can be used instead of
+a slash to delimit the RE and the replacement.
+Within the RE and the replacement, the RE delimiter itself can be used as
+a literal character if it is preceded by a backslash.
+.Pp
+An ampersand
+.Po
+.Dq &
+.Pc
+appearing in the replacement is replaced by the string matching the RE.
+The special meaning of
+.Dq &
+in this context can be suppressed by preceding it by backslash.
+The string
+.Dq \e# ,
+where
+.Dq #
+is a digit, is replaced by the text matched
+by the corresponding backreference expression (see
+.Xr re_format 7 ).
+.Pp
+A line can be split by substituting a newline character into it.
+To specify a newline character in the replacement string, precede it with
+a backslash.
+.Pp
+The value of
+.Em flags
+in the substitute function is zero or more of the following:
+.Bl -tag -width "XXXXXX" -offset indent
+.It "0 ... 9"
+Make the substitution only for the N'th occurrence of the regular
+expression in the pattern space.
+.It g
+Make the substitution for all non-overlapping matches of the
+regular expression, not just the first one.
+.It p
+Write the pattern space to standard output if a replacement was made.
+If the replacement string is identical to that which it replaces, it
+is still considered to have been a replacement.
+.It w Em file
+Append the pattern space to
+.Em file
+if a replacement was made.
+If the replacement string is identical to that which it replaces, it
+is still considered to have been a replacement.
+.El
+.sp
+.It [2addr]t [label]
+Branch to the
+.Dq :
+function bearing the label if any substitutions have been made since the
+most recent reading of an input line or execution of a
+.Dq t
+function.
+If no label is specified, branch to the end of the script.
+.sp
+.It [2addr]w Em file
+Append the pattern space to the
+.Em file .
+.sp
+.It [2addr]x
+Swap the contents of the pattern and hold spaces.
+.sp
+.It [2addr]y/string1/string2/
+Replace all occurrences of characters in
+.Em string1
+in the pattern space with the corresponding characters from
+.Em string2 .
+Any character other than a backslash or newline can be used instead of
+a slash to delimit the strings.
+Within
+.Em string1
+and
+.Em string2 ,
+the delimiter itself can be used as a literal character if it is preceded
+by a backslash.
+.sp
+.It [2addr]!function
+.It [2addr]!function-list
+Apply the function or function-list only to the lines that are
+.Em not
+selected by the address(es).
+.sp
+.It [0addr]:label
+This function does nothing; it bears a label to which the
+.Dq b
+and
+.Dq t
+commands may branch.
+.sp
+.It [1addr]=
+Write the line number to the standard output followed by a newline
+character.
+.sp
+.It [0addr]
+Empty lines are ignored.
+.sp
+.It [0addr]#
+The
+.Dq #
+and the remainder of the line are ignored (treated as a comment), with
+the single exception that if the first two characters in the file are
+.Dq #n ,
+the default output is suppressed.
+This is the same as specifying the
+.Fl n
+option on the command line.
+.El
+.Pp
+The
+.Nm sed
+utility exits 0 on success and >0 if an error occurs.
+.Sh SEE ALSO
+.Xr awk 1 ,
+.Xr ed 1 ,
+.Xr grep 1 ,
+.Xr regex 3 ,
+.Xr re_format 7
+.Sh HISTORY
+A
+.Nm sed
+command appeared in
+.At v7 .
+.Sh STANDARDS
+The
+.Nm sed
+function is expected to be a superset of the
+.St -p1003.2
+specification.
--- a/usr.bin/sed/sed.test
+++ b/usr.bin/sed/sed.test
@ -0,0 +1,545 @@
+#!/bin/sh -
+#
+# Copyright (c) 1992 Diomidis Spinellis.
+# Copyright (c) 1992 The Regents of the University of California.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+#    must display the following acknowledgement:
+#	This product includes software developed by the University of
+#	California, Berkeley and its contributors.
+# 4. Neither the name of the University nor the names of its contributors
+#    may be used to endorse or promote products derived from this software
+#    without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+#	@(#)sed.test	5.6 (Berkeley) 8/28/92
+#
+
+# sed Regression Tests
+#
+# The following files are created:
+# lines[1-4], script1, script2
+# Two directories *.out contain the test results
+
+main()
+{
+	BASE=/usr/old/bin/sed
+	BASELOG=sed.out
+	TEST=../obj/sed
+	TESTLOG=nsed.out
+	DICT=/usr/share/dict/words
+
+	test_error | more
+
+	awk 'END { for (i = 1; i < 15; i++) print "l1_" i}' </dev/null >lines1
+	awk 'END { for (i = 1; i < 10; i++) print "l2_" i}' </dev/null >lines2
+
+	exec 4>&1 5>&2
+
+	# Set these flags to get messages about known problems
+	BSD=1
+	GNU=0
+	SUN=0
+	tests $BASE $BASELOG
+
+	BSD=0
+	GNU=0
+	SUN=0
+	tests $TEST $TESTLOG
+	exec 1>&4 2>&5
+	diff -c $BASELOG $TESTLOG | more
+}
+
+tests()
+{
+	SED=$1
+	DIR=$2
+	rm -rf $DIR
+	mkdir $DIR
+	MARK=100
+
+	test_args
+	test_addr
+	echo Testing commands
+	test_group
+	test_acid
+	test_branch
+	test_pattern
+	test_print
+	test_subst
+}
+
+mark()
+{
+	MARK=`expr $MARK + 1`
+	exec 1>&4 2>&5
+	exec >"$DIR/${MARK}_$1"
+	echo "Test $1:$MARK"
+	# Uncomment this line to match tests with sed error messages
+	echo "Test $1:$MARK" >&5
+}
+
+test_args()
+{
+	mark '1.1'
+	echo Testing argument parsing
+	echo First type
+	if [ $SUN -eq 1 ] ; then
+		echo SunOS sed prints only with -n
+	else
+		$SED 's/^/e1_/p' lines1
+	fi
+	mark '1.2' ; $SED -n 's/^/e1_/p' lines1
+	mark '1.3'
+	if [ $SUN -eq 1 ] ; then
+		echo SunOS sed prints only with -n
+	else
+		$SED 's/^/e1_/p' <lines1
+	fi
+	mark '1.4' ; $SED -n 's/^/e1_/p' <lines1
+	echo Second type
+	mark '1.4.1'
+	if [ $SUN -eq 1 ] ; then
+		echo SunOS sed fails this
+	fi
+	$SED -e '' <lines1
+	echo 's/^/s1_/p' >script1
+	echo 's/^/s2_/p' >script2
+	mark '1.5'
+	if [ $SUN -eq 1 ] ; then
+		echo SunOS sed prints only with -n
+	else
+		$SED -f script1 lines1
+	fi
+	mark '1.6'
+	if [ $SUN -eq 1 ] ; then
+		echo SunOS sed prints only with -n
+	else
+		$SED -f script1 <lines1
+	fi
+	mark '1.7'
+	if [ $SUN -eq 1 ] ; then
+		echo SunOS sed prints only with -n
+	else
+		$SED -e 's/^/e1_/p' lines1
+	fi
+	mark '1.8'
+	if [ $SUN -eq 1 ] ; then
+		echo SunOS sed prints only with -n
+	else
+		$SED -e 's/^/e1_/p' <lines1
+	fi
+	mark '1.9' ; $SED -n -f script1 lines1
+	mark '1.10' ; $SED -n -f script1 <lines1
+	mark '1.11' ; $SED -n -e 's/^/e1_/p' lines1
+	mark '1.12'
+	if [ $SUN -eq 1 ] ; then
+		echo SunOS sed prints only with -n
+	else
+		$SED -n -e 's/^/e1_/p' <lines1
+	fi
+	mark '1.13'
+	if [ $SUN -eq 1 ] ; then
+		echo SunOS sed prints only with -n
+	else
+		$SED -e 's/^/e1_/p' -e 's/^/e2_/p' lines1
+	fi
+	mark '1.14'
+	if [ $SUN -eq 1 ] ; then
+		echo SunOS sed prints only with -n
+	else
+		$SED -f script1 -f script2 lines1
+	fi
+	mark '1.15'
+	if [ $GNU -eq 1 -o $SUN -eq 1 ] ; then
+		echo GNU and SunOS sed fail this following older POSIX draft
+	else
+		$SED -e 's/^/e1_/p' -f script1 lines1
+	fi
+	mark '1.16'
+	if [ $SUN -eq 1 ] ; then
+		echo SunOS sed prints only with -n
+	else
+		$SED -e 's/^/e1_/p' lines1 lines1
+	fi
+	# POSIX D11.2:11251
+	mark '1.17' ; $SED p <lines1 lines1
+cat >script1 <<EOF
+#n
+# A comment
+
+p
+EOF
+	mark '1.18' ; $SED -f script1 <lines1 lines1
+}
+
+test_addr()
+{
+	echo Testing address ranges
+	mark '2.1' ; $SED -n -e '4p' lines1
+	mark '2.2' ; $SED -n -e '20p' lines1 lines2
+	mark '2.3' ; $SED -n -e '$p' lines1
+	mark '2.4' ; $SED -n -e '$p' lines1 lines2
+	mark '2.5' ; $SED -n -e '$a\
+hello' /dev/null
+	mark '2.6' ; $SED -n -e '$p' lines1 /dev/null lines2
+	# Should not print anything
+	mark '2.7' ; $SED -n -e '20p' lines1
+	mark '2.8' ; $SED -n -e '0p' lines1
+	mark '2.9' ; $SED -n '/l1_7/p' lines1
+	mark '2.10' ; $SED -n ' /l1_7/ p' lines1
+	mark '2.11'
+	if [ $BSD -eq 1 ] ; then
+		echo BSD sed fails this test
+	fi
+	if [ $GNU -eq 1 ] ; then
+		echo GNU sed fails this
+	fi
+	$SED -n '\_l1\_7_p' lines1
+	mark '2.12' ; $SED -n '1,4p' lines1
+	mark '2.13' ; $SED -n '1,$p' lines1 lines2
+	mark '2.14' ; $SED -n '1,/l2_9/p' lines1 lines2
+	mark '2.15' ; $SED -n '/4/,$p' lines1 lines2
+	mark '2.16' ; $SED -n '/4/,20p' lines1 lines2
+	mark '2.17' ; $SED -n '/4/,/10/p' lines1 lines2
+	mark '2.18' ; $SED -n '/l2_3/,/l1_8/p' lines1 lines2
+	mark '2.19'
+	if [ $GNU -eq 1 ] ; then
+		echo GNU sed fails this
+	fi
+	$SED -n '12,3p' lines1 lines2
+	mark '2.20'
+	if [ $GNU -eq 1 ] ; then
+		echo GNU sed fails this
+	fi
+	$SED -n '/l1_7/,3p' lines1 lines2
+}
+
+test_group()
+{
+	echo Brace and other grouping
+	mark '3.1' ; $SED -e '
+4,12 {
+	s/^/^/
+	s/$/$/
+	s/_/T/
+}' lines1
+	mark '3.2' ; $SED -e '
+4,12 {
+	s/^/^/
+	/6/,/10/ {
+		s/$/$/
+		/8/ s/_/T/
+	}
+}' lines1
+	mark '3.3' ; $SED -e '
+4,12 !{
+	s/^/^/
+	/6/,/10/ !{
+		s/$/$/
+		/8/ !s/_/T/
+	}
+}' lines1
+	mark '3.4' ; $SED -e '4,12!s/^/^/' lines1
+}
+
+test_acid()
+{
+	echo Testing a c d and i commands
+	mark '4.1' ; $SED -n -e '
+s/^/before_i/p
+20i\
+inserted
+s/^/after_i/p
+' lines1 lines2
+	mark '4.2' ; $SED -n -e '
+5,12s/^/5-12/
+s/^/before_a/p
+/5-12/a\
+appended
+s/^/after_a/p
+' lines1 lines2
+	mark '4.3'
+	if [ $GNU -eq 1 ] ; then
+		echo GNU sed fails this
+	fi
+	$SED -n -e '
+s/^/^/p
+/l1_/a\
+appended
+8,10N
+s/$/$/p
+' lines1 lines2
+	mark '4.4' ; $SED -n -e '
+c\
+hello
+' lines1
+	mark '4.5' ; $SED -n -e '
+8c\
+hello
+' lines1
+	mark '4.6' ; $SED -n -e '
+3,14c\
+hello
+' lines1
+# SunOS and GNU sed behave differently.   We follow POSIX
+#	mark '4.7' ; $SED -n -e '
+#8,3c\
+#hello
+#' lines1
+	mark '4.8' ; $SED d <lines1
+}
+
+test_branch()
+{
+	echo Testing labels and branching
+	mark '5.1' ; $SED -n -e '
+b label4
+:label3
+s/^/label3_/p
+b end
+:label4
+2,12b label1
+b label2
+:label1
+s/^/label1_/p
+b
+:label2
+s/^/label2_/p
+b label3
+:end
+' lines1
+	mark '5.2'
+	if [ $BSD -eq 1 ] ; then
+		echo BSD sed fails this test
+	fi
+	$SED -n -e '
+s/l1_/l2_/
+t ok
+b
+:ok
+s/^/tested /p
+' lines1 lines2
+# SunOS sed behaves differently here.  Clarification needed.
+#	mark '5.3' ; $SED -n -e '
+#5,8b inside
+#1,5 {
+#	s/^/^/p
+#	:inside
+#	s/$/$/p
+#}
+#' lines1
+# Check that t clears the substitution done flag
+	mark '5.4' ; $SED -n -e '
+1,8s/^/^/
+t l1
+:l1
+t l2
+s/$/$/p
+b
+:l2
+s/^/ERROR/
+' lines1
+# Check that reading a line clears the substitution done flag
+	mark '5.5'
+	if [ $BSD -eq 1 ] ; then
+		echo BSD sed fails this test
+	fi
+	$SED -n -e '
+t l2
+1,8s/^/^/p
+2,7N
+b
+:l2
+s/^/ERROR/p
+' lines1
+	mark '5.6' ; $SED 5q lines1
+	mark '5.7' ; $SED -e '
+5i\
+hello
+5q' lines1
+}
+
+test_pattern()
+{
+echo Pattern space commands
+# Check that the pattern space is deleted
+	mark '6.1' ; $SED -n -e '
+c\
+changed
+p
+' lines1
+	mark '6.2' ; $SED -n -e '
+4d
+p
+' lines1
+# SunOS sed refused to print here
+#	mark '6.3' ; $SED -e '
+#N
+#N
+#N
+#D
+#P
+#4p
+#' lines1
+	mark '6.4' ; $SED -e '
+2h
+3H
+4g
+5G
+6x
+6p
+6x
+6p
+' lines1
+	mark '6.5' ; $SED -e '4n' lines1
+	mark '6.6' ; $SED -n -e '4n' lines1
+}
+
+test_print()
+{
+	echo Testing print and file routines
+	awk 'END {for (i = 1; i < 256; i++) printf("%c", i);print "\n"}' \
+		</dev/null >lines3
+	# GNU and SunOS sed behave differently here
+	mark '7.1'
+	if [ $BSD -eq 1 ] ; then
+		echo 'BSD sed drops core on this one; TEST SKIPPED'
+	else
+		$SED -n l lines3
+	fi
+	mark '7.2' ; $SED -e '/l2_/=' lines1 lines2
+	rm -f lines4
+	mark '7.3' ; $SED -e '3,12w lines4' lines1
+	echo w results
+	cat lines4
+	mark '7.4' ; $SED -e '4r lines2' lines1
+	mark '7.5' ; $SED -e '5r /dev/dds' lines1
+	mark '7.6' ; $SED -e '6r /dev/null' lines1
+	mark '7.7'
+	if [ $BSD -eq 1 -o $GNU -eq 1 -o $SUN -eq 1 ] ; then
+		echo BSD, GNU and SunOS cannot pass this one
+	else
+		sed '200q' $DICT | sed 's$.*$s/^/&/w tmpdir/&$' >script1
+		rm -rf tmpdir
+		mkdir tmpdir
+		$SED -f script1 lines1
+		cat tmpdir/*
+		rm -rf tmpdir
+	fi
+	mark '7.8'
+	if [ $BSD -eq 1 ] ; then
+		echo BSD sed cannot pass 7.7
+	else
+		echo line1 > lines3
+		echo "" >> lines3
+		$SED -n -e '$p' lines3 /dev/null
+	fi
+		
+}
+
+test_subst()
+{
+	echo Testing substitution commands
+	mark '8.1' ; $SED -e 's/./X/g' lines1
+	mark '8.2' ; $SED -e 's,.,X,g' lines1
+# GNU and SunOS sed thinks we are escaping . as wildcard, not as separator
+#	mark '8.3' ; $SED -e 's.\..X.g' lines1
+# POSIX does not say that this should work
+#	mark '8.4' ; $SED -e 's/[/]/Q/' lines1
+	mark '8.4' ; $SED -e 's/[\/]/Q/' lines1
+	mark '8.5' ; $SED -e 's_\__X_' lines1
+	mark '8.6' ; $SED -e 's/./(&)/g' lines1
+	mark '8.7' ; $SED -e 's/./(\&)/g' lines1
+	mark '8.8' ; $SED -e 's/\(.\)\(.\)\(.\)/x\3x\2x\1/g' lines1
+	mark '8.9' ; $SED -e 's/_/u0\
+u1\
+u2/g' lines1
+	mark '8.10'
+	if [ $BSD -eq 1 -o $GNU -eq 1 ] ; then
+		echo 'BSD/GNU sed do not understand digit flags on s commands'
+	fi
+	$SED -e 's/./X/4' lines1
+	rm -f lines4
+	mark '8.11' ; $SED -e 's/1/X/w lines4' lines1
+	echo s wfile results
+	cat lines4
+	mark '8.12' ; $SED -e 's/[123]/X/g' lines1
+	mark '8.13' ; $SED -e 'y/0123456789/9876543210/' lines1
+	mark '8.14' ; 
+	if [ $BSD -eq 1 -o $GNU -eq 1 -o $SUN -eq 1 ] ; then
+		echo BSD/GNU/SUN sed fail this test
+	else
+		$SED -e 'y10\123456789198765432\101' lines1
+	fi
+	mark '8.15' ; $SED -e '1N;2y/\n/X/' lines1
+	mark '8.16'
+	if [ $BSD -eq 1 ] ; then
+		echo 'BSD sed does not handle branch defined REs'
+	else
+		echo 'eeefff' | $SED -e 'p' -e 's/e/X/p' -e ':x' \
+		    -e 's//Y/p' -e '/f/bx'
+	fi
+}
+
+test_error()
+{
+	exec 0>&3 4>&1 5>&2
+	exec 0</dev/null
+	exec 2>&1
+	set -x
+	$TEST -x && exit 1
+	$TEST -f && exit 1
+	$TEST -e && exit 1
+	$TEST -f /dev/dds && exit 1
+	$TEST p /dev/dds && exit 1
+	$TEST -f /bin/sh && exit 1
+	$TEST '{' && exit 1
+	$TEST '{' && exit 1
+	$TEST '/hello/' && exit 1
+	$TEST '1,/hello/' && exit 1
+	$TEST -e '-5p' && exit 1
+	$TEST '/jj' && exit 1
+	$TEST 'a hello' && exit 1
+	$TEST 'a \ hello' && exit 1
+	$TEST 'b foo' && exit 1
+	$TEST 'd hello' && exit 1
+	$TEST 's/aa' && exit 1
+	$TEST 's/aa/' && exit 1
+	$TEST 's/a/b' && exit 1
+	$TEST 's/a/b/c/d' && exit 1
+	$TEST 's/a/b/ 1 2' && exit 1
+	$TEST 's/a/b/ 1 g' && exit 1
+	$TEST 's/a/b/w' && exit 1
+	$TEST 'y/aa' && exit 1
+	$TEST 'y/aa/b/' && exit 1
+	$TEST 'y/aa/' && exit 1
+	$TEST 'y/a/b' && exit 1
+	$TEST 'y/a/b/c/d' && exit 1
+	$TEST '!' && exit 1
+	$TEST supercalifrangolisticexprialidociussupercalifrangolisticexcius
+	set +x
+	exec 0>&3 1>&4 2>&5
+}
+
+main