Merge our changes.

This commit is contained in:
christos 2014-06-06 00:13:13 +00:00
parent 1d79f6aae3
commit ef24ec4ecc
9 changed files with 1115 additions and 876 deletions

View File

@ -1,7 +1,8 @@
# $NetBSD: Makefile,v 1.14 2009/04/14 22:15:26 lukem Exp $
# $NetBSD: Makefile,v 1.15 2014/06/06 00:13:13 christos Exp $
# from: @(#)Makefile 8.1 (Berkeley) 6/6/93
.include <bsd.own.mk>
WARNS=6
PROG= sed
SRCS= compile.c main.c misc.c process.c

View File

@ -1,5 +1,6 @@
# $NetBSD: POSIX,v 1.4 1997/01/09 20:21:25 tls Exp $
# from: @(#)POSIX 8.1 (Berkeley) 6/6/93
# $NetBSD: POSIX,v 1.5 2014/06/06 00:13:13 christos Exp $
# @(#)POSIX 8.1 (Berkeley) 6/6/93
# $FreeBSD: head/usr.bin/sed/POSIX 168417 2007-04-06 08:43:30Z yar $
Comments on the IEEE P1003.2 Draft 12
Part 2: Shell and Utilities
@ -118,10 +119,15 @@ All uses of "POSIX" refer to section 4.55, Draft 12 of POSIX 1003.2.
1,3c\
text
Historic implementations, and this implementation, do not output
the text in the above example. The general rule, therefore,
is that a range whose second address is never matched extends to
the end of the input.
Historic implementations did not output the text in the above
example. Therefore it was believed that a range whose second
address was never matched extended to the end of the input.
However, the current practice adopted by this implementation,
as well as by those from GNU and SUN, is as follows: The text
from the 'c' command still isn't output because the second address
isn't actually matched; but the range is reset after all if its
second address is a line number. In the above example, only the
first line of the input will be deleted.
13. Historical implementations allow an output suppressing #n at the
beginning of -e arguments as well as in a script file. POSIX

View File

@ -1,6 +1,7 @@
/* $NetBSD: compile.c,v 1.40 2014/05/05 17:12:11 christos Exp $ */
/* $NetBSD: compile.c,v 1.41 2014/06/06 00:13:13 christos Exp $ */
/*-
* Copyright (c) 1992 Diomidis Spinellis.
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
@ -32,58 +33,25 @@
* SUCH DAMAGE.
*/
/*-
* Copyright (c) 1992 Diomidis Spinellis.
*
* This code is derived from software contributed to Berkeley by
* Diomidis Spinellis of Imperial College, University of London.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef lint
#if 0
static char sccsid[] = "@(#)compile.c 8.2 (Berkeley) 4/28/95";
#else
__RCSID("$NetBSD: compile.c,v 1.40 2014/05/05 17:12:11 christos Exp $");
__RCSID("$NetBSD: compile.c,v 1.41 2014/06/06 00:13:13 christos Exp $");
#ifdef __FBSDID
__FBSDID("$FreeBSD: head/usr.bin/sed/compile.c 259132 2013-12-09 18:57:20Z eadler $");
#endif
#ifndef lint
static const char sccsid[] = "@(#)compile.c 8.1 (Berkeley) 6/6/93";
#endif
#endif /* not lint */
#include <sys/types.h>
#include <sys/stat.h>
#include <ctype.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
@ -91,14 +59,11 @@ __RCSID("$NetBSD: compile.c,v 1.40 2014/05/05 17:12:11 christos Exp $");
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
#include "defs.h"
#include "extern.h"
#ifndef _POSIX2_LINE_MAX
#define _POSIX2_LINE_MAX (2 * BUFSIZ)
#endif
#define LHSZ 128
#define LHMASK (LHSZ - 1)
static struct labhash {
@ -110,12 +75,12 @@ static struct labhash {
static char *compile_addr(char *, struct s_addr *);
static char *compile_ccl(char **, char *);
static char *compile_delimited(char *, char *);
static char *compile_delimited(char *, char *, int);
static char *compile_flags(char *, struct s_subst *);
static char *compile_re(char *, regex_t **);
static regex_t *compile_re(char *, int);
static char *compile_subst(char *, struct s_subst *);
static char *compile_text(void);
static char *compile_tr(char *, char **);
static char *compile_tr(char *, struct s_tr **);
static struct s_command
**compile_stream(struct s_command **);
static char *duptoeol(char *, const char *);
@ -184,39 +149,43 @@ compile(void)
match = xmalloc((maxnsub + 1) * sizeof(regmatch_t));
}
#define EATSPACE() \
while (*p && isascii((unsigned char)*p) && \
isspace((unsigned char)*p)) \
p++ \
#define EATSPACE() do { \
if (p) \
while (*p && isspace((unsigned char)*p)) \
p++; \
} while (0)
static struct s_command **
compile_stream(struct s_command **link)
{
char *p;
static char *lbuf; /* To avoid excessive malloc calls */
static size_t bufsize;
static char lbuf[_POSIX2_LINE_MAX + 1]; /* To save stack */
struct s_command *cmd, *cmd2, *stack;
struct s_format *fp;
char re[_POSIX2_LINE_MAX + 1];
int naddr; /* Number of addresses */
stack = 0;
for (;;) {
if ((p = cu_fgets(&lbuf, &bufsize)) == NULL) {
if ((p = cu_fgets(lbuf, sizeof(lbuf), NULL)) == NULL) {
if (stack != 0)
err(COMPILE, "unexpected EOF (pending }'s)");
errx(1, "%lu: %s: unexpected EOF (pending }'s)",
linenum, fname);
return (link);
}
semicolon: EATSPACE();
if (*p == '#' || *p == '\0')
continue;
else if (*p == ';') {
p++;
goto semicolon;
if (p) {
if (*p == '#' || *p == '\0')
continue;
else if (*p == ';') {
p++;
goto semicolon;
}
}
*link = cmd = xmalloc(sizeof(struct s_command));
link = &cmd->next;
cmd->nonsel = cmd->inrange = 0;
cmd->startline = cmd->nonsel = 0;
/* First parse the addresses */
naddr = 0;
@ -241,16 +210,17 @@ semicolon: EATSPACE();
nonsel: /* Now parse the command */
if (!*p)
err(COMPILE, "command expected");
errx(1, "%lu: %s: command expected", linenum, fname);
cmd->code = *p;
for (fp = cmd_fmts; fp->code; fp++)
if (fp->code == *p)
break;
if (!fp->code)
err(COMPILE, "invalid command code %c", *p);
errx(1, "%lu: %s: invalid command code %c", linenum, fname, *p);
if (naddr > fp->naddr)
err(COMPILE,
"command %c expects up to %d address(es), found %d", *p, fp->naddr, naddr);
errx(1,
"%lu: %s: command %c expects up to %d address(es), found %d",
linenum, fname, *p, fp->naddr, naddr);
switch (fp->args) {
case NONSEL: /* ! */
p++;
@ -273,7 +243,7 @@ nonsel: /* Now parse the command */
*/
cmd->nonsel = 1;
if (stack == 0)
err(COMPILE, "unexpected }");
errx(1, "%lu: %s: unexpected }", linenum, fname);
cmd2 = stack;
stack = cmd2->next;
cmd2->next = cmd;
@ -291,21 +261,22 @@ nonsel: /* Now parse the command */
case '\0':
break;
default:
err(COMPILE,
"extra characters at the end of %c command", cmd->code);
errx(1, "%lu: %s: extra characters at the end of %c command",
linenum, fname, cmd->code);
}
break;
case TEXT: /* a c i */
p++;
EATSPACE();
if (*p != '\\')
err(COMPILE,
"command %c expects \\ followed by text", cmd->code);
errx(1,
"%lu: %s: command %c expects \\ followed by text", linenum, fname, cmd->code);
p++;
EATSPACE();
if (*p)
err(COMPILE,
"extra characters after \\ at the end of %c command", cmd->code);
errx(1,
"%lu: %s: extra characters after \\ at the end of %c command",
linenum, fname, cmd->code);
cmd->t = compile_text();
break;
case COMMENT: /* \0 # */
@ -314,20 +285,20 @@ nonsel: /* Now parse the command */
p++;
EATSPACE();
if (*p == '\0')
err(COMPILE, "filename expected");
errx(1, "%lu: %s: filename expected", linenum, fname);
cmd->t = duptoeol(p, "w command");
if (aflag)
cmd->u.fd = -1;
else if ((cmd->u.fd = open(p,
else if ((cmd->u.fd = open(p,
O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
DEFFILEMODE)) == -1)
err(FATAL, "%s: %s", p, strerror(errno));
err(1, "%s", p);
break;
case RFILE: /* r */
p++;
EATSPACE();
if (*p == '\0')
err(COMPILE, "filename expected");
errx(1, "%lu: %s: filename expected", linenum, fname);
else
cmd->t = duptoeol(p, "read command");
break;
@ -344,21 +315,35 @@ nonsel: /* Now parse the command */
EATSPACE();
cmd->t = duptoeol(p, "label");
if (strlen(p) == 0)
err(COMPILE, "empty label");
errx(1, "%lu: %s: empty label", linenum, fname);
enterlabel(cmd);
break;
case SUBST: /* s */
p++;
if (*p == '\0' || *p == '\\')
err(COMPILE,
"substitute pattern can not be delimited by newline or backslash");
cmd->u.s = xmalloc(sizeof(struct s_subst));
p = compile_re(p, &cmd->u.s->re);
errx(1,
"%lu: %s: substitute pattern can not be delimited by newline or backslash",
linenum, fname);
cmd->u.s = xcalloc(1, sizeof(struct s_subst));
p = compile_delimited(p, re, 0);
if (p == NULL)
err(COMPILE, "unterminated substitute pattern");
errx(1,
"%lu: %s: unterminated substitute pattern", linenum, fname);
/* Compile RE with no case sensitivity temporarily */
if (*re == '\0')
cmd->u.s->re = NULL;
else
cmd->u.s->re = compile_re(re, 0);
--p;
p = compile_subst(p, cmd->u.s);
p = compile_flags(p, cmd->u.s);
/* Recompile RE with case sensitivity from "I" flag if any */
if (*re == '\0')
cmd->u.s->re = NULL;
else
cmd->u.s->re = compile_re(re, cmd->u.s->icase);
EATSPACE();
if (*p == ';') {
p++;
@ -368,7 +353,7 @@ nonsel: /* Now parse the command */
break;
case TR: /* y */
p++;
p = compile_tr(p, (char **)(void *)&cmd->u.y);
p = compile_tr(p, &cmd->u.y);
EATSPACE();
switch (*p) {
case ';':
@ -380,16 +365,17 @@ nonsel: /* Now parse the command */
case '\0':
break;
default:
err(COMPILE,
"extra text at the end of a transform command");
errx(1,
"%lu: %s: extra text at the end of a transform command", linenum, fname);
}
if (*p)
break;
}
}
}
/*
* Get a delimited string. P points to the delimiter of the string; d points
* Get a delimited string. P points to the delimeter of the string; d points
* to a buffer area. Newline and delimiter escapes are processed; other
* escapes are ignored.
*
@ -398,7 +384,7 @@ nonsel: /* Now parse the command */
* with the processed string.
*/
static char *
compile_delimited(char *p, char *d)
compile_delimited(char *p, char *d, int is_tr)
{
char c;
@ -406,13 +392,15 @@ compile_delimited(char *p, char *d)
if (c == '\0')
return (NULL);
else if (c == '\\')
err(COMPILE, "\\ can not be used as a string delimiter");
errx(1, "%lu: %s: \\ can not be used as a string delimiter",
linenum, fname);
else if (c == '\n')
err(COMPILE, "newline can not be used as a string delimiter");
errx(1, "%lu: %s: newline can not be used as a string delimiter",
linenum, fname);
while (*p) {
if (*p == '[') {
if (*p == '[' && *p != c) {
if ((d = compile_ccl(&p, d)) == NULL)
err(COMPILE, "unbalanced brackets ([])");
errx(1, "%lu: %s: unbalanced brackets ([])", linenum, fname);
continue;
} else if (*p == '\\' && p[1] == '[') {
*d++ = *p++;
@ -422,9 +410,12 @@ compile_delimited(char *p, char *d)
*d++ = '\n';
p += 2;
continue;
} else if (*p == '\\' && p[1] == '\\')
*d++ = *p++;
else if (*p == c) {
} else if (*p == '\\' && p[1] == '\\') {
if (is_tr)
p++;
else
*d++ = *p++;
} else if (*p == c) {
*d = '\0';
return (p + 1);
}
@ -452,40 +443,32 @@ compile_ccl(char **sp, char *t)
for (c = *s; (*t = *s) != ']' || c != d; s++, t++)
if ((c = *s) == '\0')
return NULL;
} else if (*s == '\\' && s[1] == 'n')
*t = '\n', s++;
}
return (*s == ']') ? *sp = ++s, ++t : NULL;
}
/*
* Get a regular expression. P points to the delimiter of the regular
* expression; repp points to the address of a regexp pointer. Newline
* and delimiter escapes are processed; other escapes are ignored.
* Returns a pointer to the first character after the final delimiter
* or NULL in the case of a non terminated regular expression. The regexp
* pointer is set to the compiled regular expression.
* Compiles the regular expression in RE and returns a pointer to the compiled
* regular expression.
* Cflags are passed to regcomp.
*/
static char *
compile_re(char *p, regex_t **repp)
static regex_t *
compile_re(char *re, int case_insensitive)
{
int eval;
char *re;
regex_t *rep;
int eval, flags;
re = xmalloc(strlen(p) + 1); /* strlen(re) <= strlen(p) */
p = compile_delimited(p, re);
if (p && strlen(re) == 0) {
*repp = NULL;
free(re);
return (p);
}
*repp = xmalloc(sizeof(regex_t));
if (p && (eval = regcomp(*repp, re, ere)) != 0)
err(COMPILE, "RE error: %s", strregerror(eval, *repp));
if (maxnsub < (*repp)->re_nsub)
maxnsub = (*repp)->re_nsub;
free(re);
return (p);
flags = rflags;
if (case_insensitive)
flags |= REG_ICASE;
rep = xmalloc(sizeof(regex_t));
if ((eval = regcomp(rep, re, flags)) != 0)
errx(1, "%lu: %s: RE error: %s",
linenum, fname, strregerror(eval, rep));
if (maxnsub < rep->re_nsub)
maxnsub = rep->re_nsub;
return (rep);
}
/*
@ -496,11 +479,11 @@ compile_re(char *p, regex_t **repp)
static char *
compile_subst(char *p, struct s_subst *s)
{
static char *lbuf;
static size_t bufsize;
int asize, ref, size, len;
static char lbuf[_POSIX2_LINE_MAX + 1];
size_t asize, size;
u_char ref;
char c, *text, *op, *sp;
int sawesc = 0;
int more = 1, sawesc = 0;
c = *p++; /* Terminator character */
if (c == '\0')
@ -508,16 +491,10 @@ compile_subst(char *p, struct s_subst *s)
s->maxbref = 0;
s->linenum = linenum;
text = NULL;
asize = size = 0;
asize = 2 * _POSIX2_LINE_MAX + 1;
text = xmalloc(asize);
size = 0;
do {
len = ROUNDLEN(strlen(p) + 1);
if (asize - size < len) {
do {
asize += len;
} while (asize - size < len);
text = xrealloc(text, asize);
}
op = sp = text + size;
for (; *p; p++) {
if (*p == '\\' || sawesc) {
@ -544,33 +521,40 @@ compile_subst(char *p, struct s_subst *s)
continue;
} else if (strchr("123456789", *p) != NULL) {
*sp++ = '\\';
ref = *p - '0';
ref = (u_char)(*p - '0');
if (s->re != NULL &&
(size_t)ref > s->re->re_nsub)
err(COMPILE,
"\\%c not defined in the RE", *p);
ref > s->re->re_nsub)
errx(1, "%lu: %s: \\%c not defined in the RE",
linenum, fname, *p);
if (s->maxbref < ref)
s->maxbref = ref;
} else if (*p == '&' || *p == '\\')
*sp++ = '\\';
} else if (*p == c) {
p++;
if (*++p == '\0' && more) {
if (cu_fgets(lbuf, sizeof(lbuf), &more))
p = lbuf;
}
*sp++ = '\0';
size += sp - op;
size += (size_t)(sp - op);
s->new = xrealloc(text, size);
return (p);
} else if (*p == '\n') {
err(COMPILE,
"unescaped newline inside substitute pattern");
errx(1,
"%lu: %s: unescaped newline inside substitute pattern", linenum, fname);
/* NOTREACHED */
}
*sp++ = *p;
}
size += sp - op;
} while ((p = cu_fgets(&lbuf, &bufsize)));
err(COMPILE, "unterminated substitute in regular expression");
size += (size_t)(sp - op);
if (asize - size < _POSIX2_LINE_MAX + 1) {
asize *= 2;
text = xrealloc(text, asize);
}
} while (cu_fgets(p = lbuf, sizeof(lbuf), &more));
errx(1, "%lu: %s: unterminated substitute in regular expression",
linenum, fname);
/* NOTREACHED */
return (NULL);
}
/*
@ -580,19 +564,21 @@ static char *
compile_flags(char *p, struct s_subst *s)
{
int gn; /* True if we have seen g or n */
char wfile[PATH_MAX], *q;
unsigned long nval;
char wfile[_POSIX2_LINE_MAX + 1], *q;
s->n = 1; /* Default */
s->p = 0;
s->wfile = NULL;
s->wfd = -1;
s->icase = 0;
for (gn = 0;;) {
EATSPACE(); /* EXTENSION */
switch (*p) {
case 'g':
if (gn)
err(COMPILE,
"more than one number or 'g' in substitute flags");
errx(1,
"%lu: %s: more than one number or 'g' in substitute flags", linenum, fname);
gn = 1;
s->n = 0;
break;
@ -603,22 +589,30 @@ compile_flags(char *p, struct s_subst *s)
case 'p':
s->p = 1;
break;
case 'i':
case 'I':
s->icase = 1;
break;
case '1': case '2': case '3':
case '4': case '5': case '6':
case '7': case '8': case '9':
if (gn)
err(COMPILE,
"more than one number or 'g' in substitute flags");
errx(1,
"%lu: %s: more than one number or 'g' in substitute flags", linenum, fname);
gn = 1;
/* XXX Check for overflow */
s->n = (int)strtol(p, &p, 10);
errno = 0;
nval = strtoul(p, &p, 10);
if (errno == ERANGE || nval > INT_MAX)
errx(1,
"%lu: %s: overflow in the 'N' substitute flag", linenum, fname);
s->n = (int)nval;
p--;
break;
case 'w':
p++;
#ifdef HISTORIC_PRACTICE
if (*p != ' ') {
err(WARNING, "space missing before w wfile");
warnx("%lu: %s: space missing before w wfile", linenum, fname);
return (p);
}
#endif
@ -631,16 +625,16 @@ compile_flags(char *p, struct s_subst *s)
}
*q = '\0';
if (q == wfile)
err(COMPILE, "no wfile specified");
errx(1, "%lu: %s: no wfile specified", linenum, fname);
s->wfile = strdup(wfile);
if (!aflag && (s->wfd = open(wfile,
O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
DEFFILEMODE)) == -1)
err(FATAL, "%s: %s", wfile, strerror(errno));
err(1, "%s", wfile);
return (p);
default:
err(COMPILE,
"bad flag in substitute command: '%c'", *p);
errx(1, "%lu: %s: bad flag in substitute command: '%c'",
linenum, fname, *p);
break;
}
p++;
@ -651,82 +645,127 @@ compile_flags(char *p, struct s_subst *s)
* Compile a translation set of strings into a lookup table.
*/
static char *
compile_tr(char *p, char **transtab)
compile_tr(char *p, struct s_tr **py)
{
int i;
char *lt, *op, *np;
char *old = NULL, *new = NULL;
struct s_tr *y;
size_t i;
const char *op, *np;
char old[_POSIX2_LINE_MAX + 1];
char new[_POSIX2_LINE_MAX + 1];
size_t oclen, oldlen, nclen, newlen;
mbstate_t mbs1, mbs2;
*py = y = xmalloc(sizeof(*y));
y->multis = NULL;
y->nmultis = 0;
if (*p == '\0' || *p == '\\')
err(COMPILE,
"transform pattern can not be delimited by newline or backslash");
old = xmalloc(strlen(p) + 1);
p = compile_delimited(p, old);
if (p == NULL) {
err(COMPILE, "unterminated transform source string");
goto bad;
}
new = xmalloc(strlen(p) + 1);
p = compile_delimited(p - 1, new);
if (p == NULL) {
err(COMPILE, "unterminated transform target string");
goto bad;
}
errx(1,
"%lu: %s: transform pattern can not be delimited by newline or backslash",
linenum, fname);
p = compile_delimited(p, old, 1);
if (p == NULL)
errx(1, "%lu: %s: unterminated transform source string",
linenum, fname);
p = compile_delimited(p - 1, new, 1);
if (p == NULL)
errx(1, "%lu: %s: unterminated transform target string",
linenum, fname);
EATSPACE();
if (strlen(new) != strlen(old)) {
err(COMPILE, "transform strings are not the same length");
goto bad;
op = old;
oldlen = mbsrtowcs(NULL, &op, 0, NULL);
if (oldlen == (size_t)-1)
err(1, NULL);
np = new;
newlen = mbsrtowcs(NULL, &np, 0, NULL);
if (newlen == (size_t)-1)
err(1, NULL);
if (newlen != oldlen)
errx(1, "%lu: %s: transform strings are not the same length",
linenum, fname);
if (MB_CUR_MAX == 1) {
/*
* The single-byte encoding case is easy: generate a
* lookup table.
*/
for (i = 0; i <= UCHAR_MAX; i++)
y->bytetab[i] = (u_char)i;
for (; *op; op++, np++)
y->bytetab[(u_char)*op] = (u_char)*np;
} else {
/*
* Multi-byte encoding case: generate a lookup table as
* above, but only for single-byte characters. The first
* bytes of multi-byte characters have their lookup table
* entries set to 0, which causes do_tr() to search through
* an auxiliary vector of multi-byte mappings.
*/
memset(&mbs1, 0, sizeof(mbs1));
memset(&mbs2, 0, sizeof(mbs2));
for (i = 0; i <= UCHAR_MAX; i++)
y->bytetab[i] = (u_char)((btowc((int)i) != WEOF) ? i : 0);
while (*op != '\0') {
oclen = mbrlen(op, MB_LEN_MAX, &mbs1);
if (oclen == (size_t)-1 || oclen == (size_t)-2)
errc(1, EILSEQ, NULL);
nclen = mbrlen(np, MB_LEN_MAX, &mbs2);
if (nclen == (size_t)-1 || nclen == (size_t)-2)
errc(1, EILSEQ, NULL);
if (oclen == 1 && nclen == 1)
y->bytetab[(u_char)*op] = (u_char)*np;
else {
y->bytetab[(u_char)*op] = 0;
y->multis = xrealloc(y->multis,
(y->nmultis + 1) * sizeof(*y->multis));
i = y->nmultis++;
y->multis[i].fromlen = oclen;
memcpy(y->multis[i].from, op, oclen);
y->multis[i].tolen = nclen;
memcpy(y->multis[i].to, np, nclen);
}
op += oclen;
np += nclen;
}
}
/* We assume characters are 8 bits */
lt = xmalloc(UCHAR_MAX+1);
for (i = 0; i <= UCHAR_MAX; i++)
lt[i] = (char)i;
for (op = old, np = new; *op; op++, np++)
lt[(u_char)*op] = *np;
*transtab = lt;
free(old);
free(new);
return (p);
bad:
free(old);
free(new);
return (NULL);
}
/*
* Compile the text following an a, c, or i command.
* Compile the text following an a or i command.
*/
static char *
compile_text(void)
{
int asize, size, len;
char *lbuf, *text, *p, *op, *s;
size_t bufsize;
size_t asize, size;
int esc_nl;
char *text, *p, *op, *s;
char lbuf[_POSIX2_LINE_MAX + 1];
lbuf = text = NULL;
asize = size = 0;
while ((p = cu_fgets(&lbuf, &bufsize))) {
len = ROUNDLEN(strlen(p) + 1);
if (asize - size < len) {
do {
asize += len;
} while (asize - size < len);
text = xrealloc(text, asize);
}
asize = 2 * _POSIX2_LINE_MAX + 1;
text = xmalloc(asize);
size = 0;
while (cu_fgets(lbuf, sizeof(lbuf), NULL)) {
op = s = text + size;
for (; *p; p++) {
if (*p == '\\')
p++;
p = lbuf;
EATSPACE();
for (esc_nl = 0; *p != '\0'; p++) {
if (*p == '\\' && p[1] != '\0' && *++p == '\n')
esc_nl = 1;
*s++ = *p;
}
size += s - op;
if (p[-2] != '\\') {
size += (size_t)(s - op);
if (!esc_nl) {
*s = '\0';
break;
}
if (asize - size < _POSIX2_LINE_MAX + 1) {
asize *= 2;
text = xrealloc(text, asize);
}
}
free(lbuf);
return (xrealloc(text, size + 1));
text[size] = '\0';
p = xrealloc(text, size + 1);
return (p);
}
/*
@ -736,30 +775,49 @@ compile_text(void)
static char *
compile_addr(char *p, struct s_addr *a)
{
char *end;
char *end, re[_POSIX2_LINE_MAX + 1];
int icase;
icase = 0;
a->type = 0;
switch (*p) {
case '\\': /* Context address */
++p;
/* FALLTHROUGH */
case '/': /* Context address */
p = compile_re(p, &a->u.r);
p = compile_delimited(p, re, 0);
if (p == NULL)
err(COMPILE, "unterminated regular expression");
errx(1, "%lu: %s: unterminated regular expression", linenum, fname);
/* Check for case insensitive regexp flag */
if (*p == 'I') {
icase = 1;
p++;
}
if (*re == '\0')
a->u.r = NULL;
else
a->u.r = compile_re(re, icase);
a->type = AT_RE;
return (p);
case '$': /* Last line */
a->type = AT_LAST;
return (p + 1);
case '+': /* Relative line number */
a->type = AT_RELLINE;
p++;
/* FALLTHROUGH */
/* Line number */
case '0': case '1': case '2': case '3': case '4':
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
a->type = AT_LINE;
a->u.l = strtol(p, &end, 10);
if (a->type == 0)
a->type = AT_LINE;
a->u.l = strtoul(p, &end, 10);
return (end);
default:
err(COMPILE, "expected context address");
errx(1, "%lu: %s: expected context address", linenum, fname);
return (NULL);
}
}
@ -773,16 +831,17 @@ duptoeol(char *s, const char *ctype)
{
size_t len;
int ws;
char *start;
char *p, *start;
ws = 0;
for (start = s; *s != '\0' && *s != '\n'; ++s)
ws = isspace((unsigned char)*s);
*s = '\0';
if (ws)
err(WARNING, "whitespace after %s", ctype);
len = s - start + 1;
return (memmove(xmalloc(len), start, len));
warnx("%lu: %s: whitespace after %s", linenum, fname, ctype);
len = (size_t)(s - start + 1);
p = xmalloc(len);
return (memmove(p, start, len));
}
/*
@ -810,7 +869,7 @@ fixuplabel(struct s_command *cp, struct s_command *end)
break;
}
if ((cp->u.c = findlabel(cp->t)) == NULL)
err(COMPILE2, "undefined label '%s'", cp->t);
errx(1, "%lu: %s: undefined label '%s'", linenum, fname, cp->t);
free(cp->t);
break;
case '{':
@ -835,7 +894,7 @@ enterlabel(struct s_command *cp)
lhp = &labels[h & LHMASK];
for (lh = *lhp; lh != NULL; lh = lh->lh_next)
if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0)
err(COMPILE2, "duplicate label '%s'", cp->t);
errx(1, "%lu: %s: duplicate label '%s'", linenum, fname, cp->t);
lh = xmalloc(sizeof *lh);
lh->lh_next = *lhp;
lh->lh_hash = h;
@ -866,7 +925,7 @@ findlabel(char *name)
return (NULL);
}
/*
/*
* Warn about any unused labels. As a side effect, release the label hash
* table space.
*/
@ -880,8 +939,8 @@ uselabel(void)
for (lh = labels[i]; lh != NULL; lh = next) {
next = lh->lh_next;
if (!lh->lh_ref)
err(WARNING, "unused label '%s'",
lh->lh_cmd->t);
warnx("%lu: %s: unused label '%s'",
linenum, fname, lh->lh_cmd->t);
free(lh);
}
}

View File

@ -1,6 +1,7 @@
/* $NetBSD: defs.h,v 1.10 2010/02/19 16:35:27 tnn Exp $ */
/* $NetBSD: defs.h,v 1.11 2014/06/06 00:13:13 christos Exp $ */
/*-
* Copyright (c) 1992 Diomidis Spinellis.
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
@ -15,44 +16,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* from: @(#)defs.h 8.1 (Berkeley) 6/6/93
* $NetBSD: defs.h,v 1.10 2010/02/19 16:35:27 tnn Exp $
*/
/*-
* Copyright (c) 1992 Diomidis Spinellis.
*
* This code is derived from software contributed to Berkeley by
* Diomidis Spinellis of Imperial College, University of London.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
@ -69,16 +32,17 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* from: @(#)defs.h 8.1 (Berkeley) 6/6/93
* $NetBSD: defs.h,v 1.10 2010/02/19 16:35:27 tnn Exp $
* @(#)defs.h 8.1 (Berkeley) 6/6/93
* $FreeBSD: head/usr.bin/sed/defs.h 192732 2009-05-25 06:45:33Z brian $
*/
/*
* Types of address specifications
*/
enum e_atype {
AT_RE, /* Line that match RE */
AT_RE = 1, /* Line that match RE */
AT_LINE, /* Specific line */
AT_RELLINE, /* Relative line */
AT_LAST /* Last line */
};
@ -99,14 +63,28 @@ struct s_addr {
struct s_subst {
int n; /* Occurrence to subst. */
int p; /* True if p flag */
int icase; /* True if I flag */
char *wfile; /* NULL if no wfile */
int wfd; /* Cached file descriptor */
regex_t *re; /* Regular expression */
int maxbref; /* Largest backreference. */
unsigned int maxbref; /* Largest backreference. */
u_long linenum; /* Line number. */
char *new; /* Replacement text */
};
/*
* Translate command.
*/
struct s_tr {
unsigned char bytetab[256];
struct trmulti {
size_t fromlen;
char from[MB_LEN_MAX];
size_t tolen;
char to[MB_LEN_MAX];
} *multis;
size_t nmultis;
};
/*
* An internally compiled command.
@ -116,16 +94,16 @@ struct s_subst {
struct s_command {
struct s_command *next; /* Pointer to next command */
struct s_addr *a1, *a2; /* Start and end address */
u_long startline; /* Start line number or zero */
char *t; /* Text for : a c i r w */
union {
struct s_command *c; /* Command(s) for b t { */
struct s_subst *s; /* Substitute command */
u_char *y; /* Replace command array */
struct s_tr *y; /* Replace command array */
int fd; /* File descriptor for w */
} u;
char code; /* Command code */
u_int nonsel:1; /* True if ! */
u_int inrange:1; /* True if in range */
};
/*
@ -170,18 +148,3 @@ typedef struct {
char *back; /* Backing memory. */
size_t blen; /* Backing memory length. */
} SPACE;
/*
* Error severity codes:
*/
#define FATAL 0 /* Exit immediately with 1 */
#define ERROR 1 /* Continue, but change exit value */
#define WARNING 2 /* Just print the warning */
#define COMPILE 3 /* Print error, count and finish script */
#define COMPILE2 3 /* Print error, count and finish script */
/*
* Round up to the nearest multiple of _POSIX2_LINE_MAX
*/
#define ROUNDLEN(x) \
(((x) + _POSIX2_LINE_MAX - 1) & ~(_POSIX2_LINE_MAX - 1))

View File

@ -1,6 +1,5 @@
/* $NetBSD: extern.h,v 1.11 2010/02/19 16:35:27 tnn Exp $ */
/*-
* Copyright (c) 1992 Diomidis Spinellis.
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
@ -15,44 +14,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* from: @(#)extern.h 8.1 (Berkeley) 6/6/93
* $NetBSD: extern.h,v 1.11 2010/02/19 16:35:27 tnn Exp $
*/
/*-
* Copyright (c) 1992 Diomidis Spinellis.
*
* This code is derived from software contributed to Berkeley by
* Diomidis Spinellis of Imperial College, University of London.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
@ -69,8 +30,8 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* from: @(#)extern.h 8.1 (Berkeley) 6/6/93
* $NetBSD: extern.h,v 1.11 2010/02/19 16:35:27 tnn Exp $
* @(#)extern.h 8.1 (Berkeley) 6/6/93
* $FreeBSD: head/usr.bin/sed/extern.h 170608 2007-06-12 12:05:24Z yar $
*/
extern struct s_command *prog;
@ -78,20 +39,21 @@ extern struct s_appends *appends;
extern regmatch_t *match;
extern size_t maxnsub;
extern u_long linenum;
extern int appendnum;
extern int lastline;
extern size_t appendnum;
extern int aflag, eflag, nflag;
extern int ere;
extern const char *fname;
extern const char *fname, *outfname;
extern FILE *infile, *outfile;
extern int rflags; /* regex flags to use */
void cfclose(struct s_command *, struct s_command *);
void compile(void);
void cspace(SPACE *, const char *, size_t, enum e_spflag);
char *cu_fgets(char **, size_t *);
void err(int, const char *, ...)
__attribute__((__format__(__printf__, 2, 3)));
char *cu_fgets(char *, int, int *);
int mf_fgets(SPACE *, enum e_spflag);
int lastline(void);
void process(void);
void resetstate(void);
char *strregerror(int, regex_t *);
void *xmalloc(size_t);
void *xrealloc(void *, size_t);
void *xcalloc(size_t, size_t);

View File

@ -1,6 +1,8 @@
/* $NetBSD: main.c,v 1.21 2010/02/19 16:35:27 tnn Exp $ */
/* $NetBSD: main.c,v 1.22 2014/06/06 00:13:13 christos Exp $ */
/*-
* Copyright (c) 2013 Johann 'Myrkraverk' Oskarsson.
* Copyright (c) 1992 Diomidis Spinellis.
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
@ -32,67 +34,39 @@
* SUCH DAMAGE.
*/
/*-
* Copyright (c) 1992 Diomidis Spinellis.
*
* This code is derived from software contributed to Berkeley by
* Diomidis Spinellis of Imperial College, University of London.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
__RCSID("$NetBSD: main.c,v 1.22 2014/06/06 00:13:13 christos Exp $");
#ifdef __FBSDID
__FBSDID("$FreeBSD: head/usr.bin/sed/main.c 252231 2013-06-26 04:14:19Z pfg $");
#endif
#ifndef lint
__COPYRIGHT("@(#) Copyright (c) 1992, 1993\
The Regents of the University of California. All rights reserved.");
#endif /* not lint */
The Regents of the University of California. All rights reserved.");
#endif
#ifndef lint
#if 0
static char sccsid[] = "@(#)main.c 8.2 (Berkeley) 1/3/94";
#else
__RCSID("$NetBSD: main.c,v 1.21 2010/02/19 16:35:27 tnn Exp $");
static const char sccsid[] = "@(#)main.c 8.2 (Berkeley) 1/3/94";
#endif
#endif /* not lint */
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <ctype.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <libgen.h>
#include <limits.h>
#include <locale.h>
#include <regex.h>
#include <stddef.h>
#define _WITH_GETLINE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -130,53 +104,81 @@ struct s_flist {
*/
static struct s_flist *files, **fl_nextp = &files;
int aflag, eflag, nflag, ere;
FILE *infile; /* Current input file */
FILE *outfile; /* Current output file */
int aflag, eflag, nflag;
int rflags = 0;
static int rval; /* Exit status */
static int ispan; /* Whether inplace editing spans across files */
/*
* Current file and line number; line numbers restart across compilation
* units, but span across input files.
* units, but span across input files. The latter is optional if editing
* in place.
*/
const char *fname; /* File name. */
const char *fname; /* File name. */
const char *outfname; /* Output file name */
static char oldfname[PATH_MAX]; /* Old file name (for in-place editing) */
static char tmpfname[PATH_MAX]; /* Temporary file name (for in-place editing) */
static const char *inplace; /* Inplace edit file extension. */
u_long linenum;
int lastline; /* TRUE on the last line of the last file */
static void add_compunit(enum e_cut, char *);
static void add_file(char *);
int main(int, char **);
static void usage(void);
int
main(int argc, char *argv[])
{
int c, fflag;
char *temp_arg;
setprogname(argv[0]);
(void) setlocale(LC_ALL, "");
setprogname(*argv);
fflag = 0;
while ((c = getopt(argc, argv, "ae:f:nrE")) != -1)
inplace = NULL;
while ((c = getopt(argc, argv, "EI::ae:f:i::lnr")) != -1)
switch (c) {
case 'r': /* Gnu sed compat */
case 'E':
rflags = REG_EXTENDED;
break;
case 'I':
inplace = optarg ? optarg : __UNCONST("");
ispan = 1; /* span across input files */
break;
case 'a':
aflag = 1;
break;
case 'e':
eflag = 1;
add_compunit(CU_STRING, optarg);
temp_arg = xmalloc(strlen(optarg) + 2);
strcpy(temp_arg, optarg);
strcat(temp_arg, "\n");
add_compunit(CU_STRING, temp_arg);
break;
case 'f':
fflag = 1;
add_compunit(CU_FILE, optarg);
break;
case 'i':
inplace = optarg ? optarg : __UNCONST("");
ispan = 0; /* don't span across input files */
break;
case 'l':
if(setlinebuf(stdout) != 0)
warnx("setlinebuf() failed");
break;
case 'n':
nflag = 1;
break;
case 'r':
case 'E':
ere = REG_EXTENDED;
break;
default:
case '?':
(void)fprintf(stderr,
"usage:\t%s [-aEnr] script [file ...]\n\t%s [-aEnr] [-e script] ... [-f script_file] ... [file ...]\n",
getprogname(), getprogname());
exit(1);
usage();
}
argc -= optind;
argv += optind;
@ -198,8 +200,17 @@ main(int argc, char *argv[])
process();
cfclose(prog, NULL);
if (fclose(stdout))
err(FATAL, "stdout: %s", strerror(errno));
exit (0);
err(1, "stdout");
exit(rval);
}
static void
usage(void)
{
(void)fprintf(stderr, "%s\n%s\n",
"usage: sed script [-Ealn] [-i extension] [file ...]",
" sed [-Ealn] [-i extension] [-e script] ... [-f script_file] ... [file ...]");
exit(1);
}
/*
@ -207,36 +218,34 @@ main(int argc, char *argv[])
* together. Empty strings and files are ignored.
*/
char *
cu_fgets(char **outbuf, size_t *outsize)
cu_fgets(char *buf, int n, int *more)
{
static enum {ST_EOF, ST_FILE, ST_STRING} state = ST_EOF;
static FILE *f; /* Current open file */
static char *s; /* Current pointer inside string */
static char string_ident[30];
size_t len;
char *p;
if (*outbuf == NULL)
*outsize = 0;
again:
switch (state) {
case ST_EOF:
if (script == NULL)
if (script == NULL) {
if (more != NULL)
*more = 0;
return (NULL);
}
linenum = 0;
switch (script->type) {
case CU_FILE:
if ((f = fopen(script->s, "r")) == NULL)
err(FATAL,
"%s: %s", script->s, strerror(errno));
err(1, "%s", script->s);
fname = script->s;
state = ST_FILE;
goto again;
case CU_STRING:
if ((snprintf(string_ident,
if (((size_t)snprintf(string_ident,
sizeof(string_ident), "\"%s\"", script->s)) >=
(int)(sizeof(string_ident) - 1))
sizeof(string_ident) - 1)
(void)strcpy(string_ident +
sizeof(string_ident) - 6, " ...\"");
fname = string_ident;
@ -245,18 +254,13 @@ again:
goto again;
}
case ST_FILE:
if ((p = fgetln(f, &len)) != NULL) {
if ((p = fgets(buf, n, f)) != NULL) {
linenum++;
if (len >= *outsize) {
free(*outbuf);
*outsize = ROUNDLEN(len + 1);
*outbuf = xmalloc(*outsize);
}
memcpy(*outbuf, p, len);
(*outbuf)[len] = '\0';
if (linenum == 1 && p[0] == '#' && p[1] == 'n')
if (linenum == 1 && buf[0] == '#' && buf[1] == 'n')
nflag = 1;
return (*outbuf);
if (more != NULL)
*more = !feof(f);
return (p);
}
script = script->next;
(void)fclose(f);
@ -265,15 +269,14 @@ again:
case ST_STRING:
if (linenum == 0 && s[0] == '#' && s[1] == 'n')
nflag = 1;
p = *outbuf;
len = *outsize;
p = buf;
for (;;) {
if (len <= 1) {
*outbuf = xrealloc(*outbuf,
*outsize + _POSIX2_LINE_MAX);
p = *outbuf + *outsize - len;
len += _POSIX2_LINE_MAX;
*outsize += _POSIX2_LINE_MAX;
if (n-- <= 1) {
*p = '\0';
linenum++;
if (more != NULL)
*more = 1;
return (buf);
}
switch (*s) {
case '\0':
@ -285,17 +288,20 @@ again:
script = script->next;
*p = '\0';
linenum++;
return (*outbuf);
if (more != NULL)
*more = 0;
return (buf);
}
case '\n':
*p++ = '\n';
*p = '\0';
s++;
linenum++;
return (*outbuf);
if (more != NULL)
*more = 0;
return (buf);
default:
*p++ = *s++;
len--;
}
}
}
@ -310,69 +316,141 @@ again:
int
mf_fgets(SPACE *sp, enum e_spflag spflag)
{
static FILE *f; /* Current open file */
struct stat sb;
size_t len;
char *p;
static char *p = NULL;
static size_t plen = 0;
int c;
static int firstfile;
if (f == NULL)
/* Advance to first non-empty file */
for (;;) {
if (files == NULL) {
lastline = 1;
return (0);
}
if (files->fname == NULL) {
f = stdin;
fname = "stdin";
} else {
fname = files->fname;
if ((f = fopen(fname, "r")) == NULL)
err(FATAL, "%s: %s",
fname, strerror(errno));
}
if ((c = getc(f)) != EOF) {
(void)ungetc(c, f);
break;
}
(void)fclose(f);
files = files->next;
if (infile == NULL) {
/* stdin? */
if (files->fname == NULL) {
if (inplace != NULL)
errx(1, "-I or -i may not be used with stdin");
infile = stdin;
fname = "stdin";
outfile = stdout;
outfname = "stdout";
}
if (lastline) {
sp->len = 0;
return (0);
firstfile = 1;
}
for (;;) {
if (infile != NULL && (c = getc(infile)) != EOF) {
(void)ungetc(c, infile);
break;
}
/* If we are here then either eof or no files are open yet */
if (infile == stdin) {
sp->len = 0;
return (0);
}
if (infile != NULL) {
fclose(infile);
if (*oldfname != '\0') {
/* if there was a backup file, remove it */
unlink(oldfname);
/*
* Backup the original. Note that hard links
* are not supported on all filesystems.
*/
if ((link(fname, oldfname) != 0) &&
(rename(fname, oldfname) != 0)) {
warn("rename()");
if (*tmpfname)
unlink(tmpfname);
exit(1);
}
*oldfname = '\0';
}
if (*tmpfname != '\0') {
if (outfile != NULL && outfile != stdout)
if (fclose(outfile) != 0) {
warn("fclose()");
unlink(tmpfname);
exit(1);
}
outfile = NULL;
if (rename(tmpfname, fname) != 0) {
/* this should not happen really! */
warn("rename()");
unlink(tmpfname);
exit(1);
}
*tmpfname = '\0';
}
outfname = NULL;
}
if (firstfile == 0)
files = files->next;
else
firstfile = 0;
if (files == NULL) {
sp->len = 0;
return (0);
}
fname = files->fname;
if (inplace != NULL) {
if (lstat(fname, &sb) != 0)
err(1, "%s", fname);
if (!(sb.st_mode & S_IFREG))
errx(1, "%s: %s %s", fname,
"in-place editing only",
"works for regular files");
if (*inplace != '\0') {
strlcpy(oldfname, fname,
sizeof(oldfname));
len = strlcat(oldfname, inplace,
sizeof(oldfname));
if (len > sizeof(oldfname))
errx(1, "%s: name too long", fname);
}
char d_name[PATH_MAX], f_name[PATH_MAX];
(void)strlcpy(d_name, fname, sizeof(d_name));
(void)strlcpy(f_name, fname, sizeof(f_name));
len = (size_t)snprintf(tmpfname, sizeof(tmpfname),
"%s/.!%ld!%s", dirname(d_name), (long)getpid(),
basename(f_name));
if (len >= sizeof(tmpfname))
errx(1, "%s: name too long", fname);
unlink(tmpfname);
if ((outfile = fopen(tmpfname, "w")) == NULL)
err(1, "%s", fname);
fchown(fileno(outfile), sb.st_uid, sb.st_gid);
fchmod(fileno(outfile), sb.st_mode & ALLPERMS);
outfname = tmpfname;
if (!ispan) {
linenum = 0;
resetstate();
}
} else {
outfile = stdout;
outfname = "stdout";
}
if ((infile = fopen(fname, "r")) == NULL) {
warn("%s", fname);
rval = 1;
continue;
}
}
/*
* Use fgetln so that we can handle essentially infinite input data.
* Can't use the pointer into the stdio buffer as the process space
* because the ungetc() can cause it to move.
* We are here only when infile is open and we still have something
* to read from it.
*
* Use getline() so that we can handle essentially infinite input
* data. The p and plen are static so each invocation gives
* getline() the same buffer which is expanded as needed.
*/
p = fgetln(f, &len);
if (ferror(f))
err(FATAL, "%s: %s", fname, strerror(errno ? errno : EIO));
cspace(sp, p, len, spflag);
ssize_t slen = getline(&p, &plen, infile);
if (slen == -1)
err(1, "%s", fname);
if (slen != 0 && p[slen - 1] == '\n')
slen--;
cspace(sp, p, (size_t)slen, spflag);
linenum++;
/* Advance to next non-empty file */
while ((c = getc(f)) == EOF) {
(void)fclose(f);
files = files->next;
if (files == NULL) {
lastline = 1;
return (1);
}
if (files->fname == NULL) {
f = stdin;
fname = "stdin";
} else {
fname = files->fname;
if ((f = fopen(fname, "r")) == NULL)
err(FATAL, "%s: %s", fname, strerror(errno));
}
}
(void)ungetc(c, f);
return (1);
}
@ -406,3 +484,16 @@ add_file(char *s)
fp->fname = s;
fl_nextp = &fp->next;
}
int
lastline(void)
{
int ch;
if (files->next != NULL && (inplace == NULL || ispan))
return (0);
if ((ch = getc(infile)) == EOF)
return (1);
ungetc(ch, infile);
return (0);
}

View File

@ -1,6 +1,7 @@
/* $NetBSD: misc.c,v 1.11 2010/02/19 16:35:27 tnn Exp $ */
/* $NetBSD: misc.c,v 1.12 2014/06/06 00:13:13 christos Exp $ */
/*-
* Copyright (c) 1992 Diomidis Spinellis.
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
@ -15,41 +16,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*-
* Copyright (c) 1992 Diomidis Spinellis.
*
* This code is derived from software contributed to Berkeley by
* Diomidis Spinellis of Imperial College, University of London.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
@ -72,19 +38,20 @@
#endif
#include <sys/cdefs.h>
#ifndef lint
#if 0
static char sccsid[] = "@(#)misc.c 8.1 (Berkeley) 6/6/93";
#else
__RCSID("$NetBSD: misc.c,v 1.11 2010/02/19 16:35:27 tnn Exp $");
__RCSID("$NetBSD: misc.c,v 1.12 2014/06/06 00:13:13 christos Exp $");
#ifdef __FBSDID
__FBSDID("$FreeBSD: head/usr.bin/sed/misc.c 200462 2009-12-13 03:14:06Z delphij $");
#endif
#ifndef lint
static const char sccsid[] = "@(#)misc.c 8.1 (Berkeley) 6/6/93";
#endif
#endif /* not lint */
#include <sys/types.h>
#include <errno.h>
#include <err.h>
#include <limits.h>
#include <regex.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -101,8 +68,8 @@ xmalloc(size_t size)
void *p;
if ((p = malloc(size)) == NULL)
err(FATAL, "%s", strerror(errno));
return (p);
err(1, "malloc(%zu)", size);
return p;
}
/*
@ -115,12 +82,24 @@ xrealloc(void *p, size_t size)
return (xmalloc(size));
if ((p = realloc(p, size)) == NULL)
err(FATAL, "%s", strerror(errno));
return (p);
err(1, "realloc(%zu)", size);
return p;
}
/*
* Return a string for a regular expression error passed. This is a overkill,
* realloc with result test
*/
void *
xcalloc(size_t c, size_t n)
{
void *p;
if ((p = calloc(c, n)) == NULL)
err(1, "calloc(%zu, %zu)", c, n);
return p;
}
/*
* Return a string for a regular expression error passed. This is overkill,
* because of the silly semantics of regerror (we can never know the size of
* the buffer).
*/
@ -138,27 +117,3 @@ strregerror(int errcode, regex_t *preg)
(void)regerror(errcode, preg, oe, s);
return (oe);
}
/*
* Error reporting function
*/
void
err(int severity, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
(void)fprintf(stderr, "sed: ");
switch (severity) {
case WARNING:
case COMPILE:
(void)fprintf(stderr, "%lu: %s: ", linenum, fname);
}
(void)vfprintf(stderr, fmt, ap);
va_end(ap);
(void)fprintf(stderr, "\n");
if (severity == WARNING)
return;
exit(1);
/* NOTREACHED */
}

View File

@ -1,6 +1,7 @@
/* $NetBSD: process.c,v 1.39 2013/03/17 21:02:54 uwe Exp $ */
/* $NetBSD: process.c,v 1.40 2014/06/06 00:13:13 christos Exp $ */
/*-
* Copyright (c) 1992 Diomidis Spinellis.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
@ -32,53 +33,19 @@
* SUCH DAMAGE.
*/
/*-
* Copyright (c) 1992 Diomidis Spinellis.
*
* This code is derived from software contributed to Berkeley by
* Diomidis Spinellis of Imperial College, University of London.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif
#include <sys/cdefs.h>
#ifndef lint
#if 0
static char sccsid[] = "@(#)process.c 8.6 (Berkeley) 4/20/94";
#else
__RCSID("$NetBSD: process.c,v 1.39 2013/03/17 21:02:54 uwe Exp $");
__RCSID("$NetBSD: process.c,v 1.40 2014/06/06 00:13:13 christos Exp $");
#ifdef __FBSDID
__FBSDID("$FreeBSD: head/usr.bin/sed/process.c 192732 2009-05-25 06:45:33Z brian $");
#endif
#ifndef lint
static const char sccsid[] = "@(#)process.c 8.6 (Berkeley) 4/20/94";
#endif
#endif /* not lint */
#include <sys/types.h>
#include <sys/stat.h>
@ -86,6 +53,7 @@ __RCSID("$NetBSD: process.c,v 1.39 2013/03/17 21:02:54 uwe Exp $");
#include <sys/uio.h>
#include <ctype.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
@ -94,27 +62,30 @@ __RCSID("$NetBSD: process.c,v 1.39 2013/03/17 21:02:54 uwe Exp $");
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <wchar.h>
#include <wctype.h>
#include "defs.h"
#include "extern.h"
static SPACE HS, PS, SS;
static SPACE HS, PS, SS, YS;
#define pd PS.deleted
#define ps PS.space
#define psl PS.len
#define hs HS.space
#define hsl HS.len
static inline int applies(struct s_command *);
static __inline int applies(struct s_command *);
static void do_tr(struct s_tr *);
static void flush_appends(void);
static void lputs(char *);
static inline int regexec_e(regex_t *, const char *, int, int, size_t);
static void lputs(char *, size_t);
static __inline int regexec_e(regex_t *, const char *, int, int, size_t);
static void regsub(SPACE *, char *, char *);
static int substitute(struct s_command *);
struct s_appends *appends; /* Array of pointers to strings to append. */
static int appendx; /* Index into appends array. */
int appendnum; /* Size of appends array. */
static size_t appendx; /* Index into appends array. */
size_t appendnum; /* Size of appends array. */
static int lastaddr; /* Set by applies if last address of a range. */
static int sdone; /* If any substitutes since last line input. */
@ -123,17 +94,18 @@ static regex_t *defpreg;
size_t maxnsub;
regmatch_t *match;
#define OUT(s) { fwrite(s, sizeof(u_char), psl, stdout); }
#define OUT() do {fwrite(ps, 1, psl, outfile); fputc('\n', outfile);} while (0)
void
process(void)
{
struct s_command *cp;
SPACE tspace;
size_t len, oldpsl;
size_t oldpsl = 0;
char *p;
oldpsl = 0;
p = NULL;
for (linenum = 0; mf_fgets(&PS, REPLACE);) {
pd = 0;
top:
@ -149,12 +121,10 @@ redirect:
cp = cp->u.c;
goto redirect;
case 'a':
if (appendx >= appendnum) {
if (appendx >= appendnum)
appends = xrealloc(appends,
sizeof(struct s_appends) *
(appendnum * 2));
appendnum *= 2;
}
(appendnum *= 2));
appends[appendx].type = AP_STRING;
appends[appendx].s = cp->t;
appends[appendx].len = strlen(cp->t);
@ -166,22 +136,21 @@ redirect:
case 'c':
pd = 1;
psl = 0;
if (cp->a2 == NULL || lastaddr)
(void)printf("%s", cp->t);
if (cp->a2 == NULL || lastaddr || lastline())
(void)fprintf(outfile, "%s", cp->t);
goto new;
case 'd':
pd = 1;
goto new;
case 'D':
if (psl == 0)
pd = 1;
if (pd)
goto new;
if ((p = memchr(ps, '\n', psl - 1)) == NULL) {
if (psl == 0 ||
(p = memchr(ps, '\n', psl - 1)) == NULL) {
pd = 1;
goto new;
} else {
psl -= (p + 1) - ps;
psl -= (size_t)((p + 1) - ps);
memmove(ps, p + 1, psl);
goto top;
}
@ -190,24 +159,25 @@ redirect:
break;
case 'G':
if (hs == NULL)
cspace(&HS, "\n", 1, REPLACE);
cspace(&PS, hs, hsl, 0);
cspace(&PS, "\n", 1, REPLACE);
cspace(&PS, hs, hsl, APPEND);
break;
case 'h':
cspace(&HS, ps, psl, REPLACE);
break;
case 'H':
cspace(&HS, ps, psl, 0);
cspace(&HS, "\n", 1, APPEND);
cspace(&HS, ps, psl, APPEND);
break;
case 'i':
(void)printf("%s", cp->t);
(void)fprintf(outfile, "%s", cp->t);
break;
case 'l':
lputs(ps);
lputs(ps, psl);
break;
case 'n':
if (!nflag && !pd)
OUT(ps)
OUT();
flush_appends();
if (!mf_fgets(&PS, REPLACE))
exit(0);
@ -215,40 +185,36 @@ redirect:
break;
case 'N':
flush_appends();
if (!mf_fgets(&PS, 0)) {
if (!nflag && !pd)
OUT(ps)
cspace(&PS, "\n", 1, APPEND);
if (!mf_fgets(&PS, APPEND))
exit(0);
}
break;
case 'p':
if (pd)
break;
OUT(ps)
OUT();
break;
case 'P':
if (pd)
break;
if ((p = memchr(ps, '\n', psl - 1)) != NULL) {
oldpsl = psl;
psl = (p + 1) - ps;
psl = (size_t)(p - ps);
}
OUT(ps)
OUT();
if (p != NULL)
psl = oldpsl;
break;
case 'q':
if (!nflag && !pd)
OUT(ps)
OUT();
flush_appends();
exit(0);
case 'r':
if (appendx >= appendnum) {
if (appendx >= appendnum)
appends = xrealloc(appends,
sizeof(struct s_appends) *
(appendnum * 2));
appendnum *= 2;
}
(appendnum *= 2));
appends[appendx].type = AP_FILE;
appends[appendx].s = cp->t;
appends[appendx].len = strlen(cp->t);
@ -270,36 +236,40 @@ redirect:
if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
DEFFILEMODE)) == -1)
err(FATAL, "%s: %s",
cp->t, strerror(errno));
if ((size_t)write(cp->u.fd, ps, psl) != psl)
err(FATAL, "%s: %s",
cp->t, strerror(errno));
err(1, "%s", cp->t);
if (write(cp->u.fd, ps, psl) != (ssize_t)psl ||
write(cp->u.fd, "\n", 1) != 1)
err(1, "%s", cp->t);
break;
case 'x':
/*
* If the hold space is null, make it empty
* but not null. Otherwise the pattern space
* will become null after the swap, which is
* an abnormal condition.
*/
if (hs == NULL)
cspace(&HS, "\n", 1, REPLACE);
cspace(&HS, "", 0, REPLACE);
tspace = PS;
PS = HS;
HS = tspace;
break;
case 'y':
if (pd)
if (pd || psl == 0)
break;
for (p = ps, len = psl; --len; ++p)
*p = cp->u.y[(int)*p];
do_tr(cp->u.y);
break;
case ':':
case '}':
break;
case '=':
(void)printf("%lu\n", linenum);
(void)fprintf(outfile, "%lu\n", linenum);
}
cp = cp->next;
} /* for all cp */
new: if (!nflag && !pd)
OUT(ps)
OUT();
flush_appends();
} /* for all lines */
}
@ -308,15 +278,15 @@ new: if (!nflag && !pd)
* TRUE if the address passed matches the current program state
* (lastline, linenumber, ps).
*/
#define MATCH(a) \
(a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \
(a)->type == AT_LINE ? linenum == (a)->u.l : lastline
#define MATCH(a) \
((a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \
(a)->type == AT_LINE ? linenum == (a)->u.l : lastline())
/*
* Return TRUE if the command applies to the current line. Sets the inrange
* flag to process ranges. Interprets the non-select (``!'') flag.
* Return TRUE if the command applies to the current line. Sets the start
* line for process ranges. Interprets the non-select (``!'') flag.
*/
static inline int
static __inline int
applies(struct s_command *cp)
{
int r;
@ -324,33 +294,70 @@ applies(struct s_command *cp)
lastaddr = 0;
if (cp->a1 == NULL && cp->a2 == NULL)
r = 1;
else if (cp->a2) {
if (cp->inrange) {
else if (cp->a2)
if (cp->startline > 0) {
if (MATCH(cp->a2)) {
cp->inrange = 0;
cp->startline = 0;
lastaddr = 1;
}
r = 1;
r = 1;
} else if (linenum - cp->startline <= cp->a2->u.l)
r = 1;
else if ((cp->a2->type == AT_LINE &&
linenum > cp->a2->u.l) ||
(cp->a2->type == AT_RELLINE &&
linenum - cp->startline > cp->a2->u.l)) {
/*
* We missed the 2nd address due to a branch,
* so just close the range and return false.
*/
cp->startline = 0;
r = 0;
} else
r = 1;
} else if (cp->a1 && MATCH(cp->a1)) {
/*
* If the second address is a number less than or
* equal to the line number first selected, only
* one line shall be selected.
* -- POSIX 1003.2
* Likewise if the relative second line address is zero.
*/
if (cp->a2->type == AT_LINE &&
linenum >= cp->a2->u.l)
if ((cp->a2->type == AT_LINE &&
linenum >= cp->a2->u.l) ||
(cp->a2->type == AT_RELLINE && cp->a2->u.l == 0))
lastaddr = 1;
else
cp->inrange = 1;
else {
cp->startline = linenum;
}
r = 1;
} else
r = 0;
} else
else
r = MATCH(cp->a1);
return (cp->nonsel ? ! r : r);
}
/*
* Reset the sed processor to its initial state.
*/
void
resetstate(void)
{
struct s_command *cp;
/*
* Reset all in-range markers.
*/
for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next)
if (cp->a2)
cp->startline = 0;
/*
* Clear out the hold space.
*/
cspace(&HS, "", 0, REPLACE);
}
/*
* substitute --
* Do substitutions in the pattern space. Currently, we build a
@ -362,24 +369,24 @@ substitute(struct s_command *cp)
{
SPACE tspace;
regex_t *re;
size_t re_off, slen;
regoff_t re_off, slen;
int lastempty, n;
char *s;
s = ps;
re = cp->u.s->re;
if (re == NULL) {
if (defpreg != NULL && (size_t)cp->u.s->maxbref > defpreg->re_nsub) {
if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
linenum = cp->u.s->linenum;
err(COMPILE, "\\%d not defined in the RE",
cp->u.s->maxbref);
errx(1, "%lu: %s: \\%u not defined in the RE",
linenum, fname, cp->u.s->maxbref);
}
}
if (!regexec_e(re, s, 0, 0, psl))
return (0);
SS.len = 0; /* Clean substitute space. */
slen = psl;
slen = (regoff_t)psl;
n = cp->u.s->n;
lastempty = 1;
@ -390,7 +397,7 @@ substitute(struct s_command *cp)
/* Locate start of replaced string. */
re_off = match[0].rm_so;
/* Copy leading retained string. */
cspace(&SS, s, re_off, APPEND);
cspace(&SS, s, (size_t)re_off, APPEND);
/* Add in regular expression. */
regsub(&SS, s, cp->u.s->new);
}
@ -401,26 +408,27 @@ substitute(struct s_command *cp)
slen -= match[0].rm_eo;
lastempty = 0;
} else {
if (match[0].rm_so == 0)
cspace(&SS,
s, match[0].rm_so + 1, APPEND);
else
cspace(&SS,
s + match[0].rm_so, 1, APPEND);
if (match[0].rm_so < slen)
cspace(&SS, s + match[0].rm_so, 1,
APPEND);
s += match[0].rm_so + 1;
slen -= match[0].rm_so + 1;
lastempty = 1;
}
} while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
} while (slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, (size_t)slen));
/* Copy trailing retained string. */
if (slen > 0)
cspace(&SS, s, slen, APPEND);
cspace(&SS, s, (size_t)slen, APPEND);
break;
default: /* Nth occurrence */
while (--n) {
if (match[0].rm_eo == match[0].rm_so)
match[0].rm_eo = match[0].rm_so + 1;
s += match[0].rm_eo;
slen -= match[0].rm_eo;
if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
if (slen < 0)
return (0);
if (!regexec_e(re, s, REG_NOTBOL, 0, (size_t)slen))
return (0);
}
/* FALLTHROUGH */
@ -428,13 +436,13 @@ substitute(struct s_command *cp)
/* Locate start of replaced string. */
re_off = match[0].rm_so + (s - ps);
/* Copy leading retained string. */
cspace(&SS, ps, re_off, APPEND);
cspace(&SS, ps, (size_t)re_off, APPEND);
/* Add in regular expression. */
regsub(&SS, s, cp->u.s->new);
/* Copy trailing retained string. */
s += match[0].rm_eo;
slen -= match[0].rm_eo;
cspace(&SS, s, slen, APPEND);
cspace(&SS, s, (size_t)slen, APPEND);
break;
}
@ -449,19 +457,75 @@ substitute(struct s_command *cp)
/* Handle the 'p' flag. */
if (cp->u.s->p)
OUT(ps)
OUT();
/* Handle the 'w' flag. */
if (cp->u.s->wfile && !pd) {
if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
if ((size_t)write(cp->u.s->wfd, ps, psl) != psl)
err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
err(1, "%s", cp->u.s->wfile);
if (write(cp->u.s->wfd, ps, psl) != (ssize_t)psl ||
write(cp->u.s->wfd, "\n", 1) != 1)
err(1, "%s", cp->u.s->wfile);
}
return (1);
}
/*
* do_tr --
* Perform translation ('y' command) in the pattern space.
*/
static void
do_tr(struct s_tr *y)
{
SPACE tmp;
char c, *p;
size_t clen, left;
size_t i;
if (MB_CUR_MAX == 1) {
/*
* Single-byte encoding: perform in-place translation
* of the pattern space.
*/
for (p = ps; p < &ps[psl]; p++)
*p = (char)y->bytetab[(u_char)*p];
} else {
/*
* Multi-byte encoding: perform translation into the
* translation space, then swap the translation and
* pattern spaces.
*/
/* Clean translation space. */
YS.len = 0;
for (p = ps, left = psl; left > 0; p += clen, left -= clen) {
if ((c = (char)y->bytetab[(u_char)*p]) != '\0') {
cspace(&YS, &c, 1, APPEND);
clen = 1;
continue;
}
for (i = 0; i < y->nmultis; i++)
if (left >= y->multis[i].fromlen &&
memcmp(p, y->multis[i].from,
y->multis[i].fromlen) == 0)
break;
if (i < y->nmultis) {
cspace(&YS, y->multis[i].to,
y->multis[i].tolen, APPEND);
clen = y->multis[i].fromlen;
} else {
cspace(&YS, p, 1, APPEND);
clen = 1;
}
}
/* Swap the translation space and the pattern space. */
tmp = PS;
PS = YS;
YS = tmp;
YS.space = YS.back;
}
}
/*
* Flush append requests. Always called before reading a line,
* therefore it also resets the substitution done (sdone) flag.
@ -470,51 +534,58 @@ static void
flush_appends(void)
{
FILE *f;
int count, i;
size_t count, i;
char buf[8 * 1024];
for (i = 0; i < appendx; i++)
for (i = 0; i < appendx; i++)
switch (appends[i].type) {
case AP_STRING:
fwrite(appends[i].s, sizeof(char), appends[i].len,
stdout);
fwrite(appends[i].s, sizeof(char), appends[i].len,
outfile);
break;
case AP_FILE:
/*
* Read files probably shouldn't be cached. Since
* it's not an error to read a non-existent file,
* it's possible that another program is interacting
* with the sed script through the file system. It
* with the sed script through the filesystem. It
* would be truly bizarre, but possible. It's probably
* not that big a performance win, anyhow.
*/
if ((f = fopen(appends[i].s, "r")) == NULL)
break;
while ((count =
fread(buf, sizeof(char), sizeof(buf), f)) > 0)
(void)fwrite(buf, sizeof(char), count, stdout);
while ((count = fread(buf, sizeof(char), sizeof(buf), f)))
(void)fwrite(buf, sizeof(char), count, outfile);
(void)fclose(f);
break;
}
if (ferror(stdout))
err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
appendx = sdone = 0;
if (ferror(outfile))
errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
appendx = 0;
sdone = 0;
}
static void
lputs(char *s)
lputs(char *s, size_t len)
{
int count;
const char *escapes, *p;
#ifndef HAVE_NBTOOL_CONFIG_H
static const char escapes[] = "\\\a\b\f\r\t\v";
int c;
size_t col, width;
const char *p;
#ifdef TIOCGWINSZ
struct winsize win;
#endif
static int termwidth = -1;
static size_t termwidth = (size_t)-1;
size_t clen, i;
wchar_t wc;
mbstate_t mbs;
if (termwidth == -1) {
if ((p = getenv("COLUMNS")) != NULL)
termwidth = atoi(p);
#ifndef HAVE_NBTOOL_CONFIG_H
if (outfile != stdout)
termwidth = 60;
if (termwidth == (size_t)-1) {
if ((p = getenv("COLUMNS")) && *p != '\0')
termwidth = (size_t)atoi(p);
#ifdef TIOCGWINSZ
else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
win.ws_col > 0)
termwidth = win.ws_col;
@ -522,74 +593,89 @@ lputs(char *s)
else
termwidth = 60;
}
for (count = 0; *s; ++s) {
if (count >= termwidth) {
(void)printf("\\\n");
count = 0;
if (termwidth == 0)
termwidth = 1;
memset(&mbs, 0, sizeof(mbs));
col = 0;
while (len != 0) {
clen = mbrtowc(&wc, s, len, &mbs);
if (clen == 0)
clen = 1;
if (clen == (size_t)-1 || clen == (size_t)-2) {
wc = (unsigned char)*s;
clen = 1;
memset(&mbs, 0, sizeof(mbs));
}
if (isascii((unsigned char)*s) && isprint((unsigned char)*s) &&
*s != '\\') {
(void)putchar(*s);
count++;
} else {
escapes = "\\\a\b\f\n\r\t\v";
(void)putchar('\\');
if ((p = strchr(escapes, *s)) != NULL) {
(void)putchar("\\abfnrtv"[p - escapes]);
count += 2;
} else {
(void)printf("%03o", *(u_char *)s);
count += 4;
if (wc == '\n') {
if (col + 1 >= termwidth)
fprintf(outfile, "\\\n");
fputc('$', outfile);
fputc('\n', outfile);
col = 0;
} else if (iswprint(wc)) {
width = (size_t)wcwidth(wc);
if (col + width >= termwidth) {
fprintf(outfile, "\\\n");
col = 0;
}
fwrite(s, 1, clen, outfile);
col += width;
} else if (wc != L'\0' && (c = wctob(wc)) != EOF &&
(p = strchr(escapes, c)) != NULL) {
if (col + 2 >= termwidth) {
fprintf(outfile, "\\\n");
col = 0;
}
fprintf(outfile, "\\%c", "\\abfrtv"[p - escapes]);
col += 2;
} else {
if (col + 4 * clen >= termwidth) {
fprintf(outfile, "\\\n");
col = 0;
}
for (i = 0; i < clen; i++)
fprintf(outfile, "\\%03o",
(int)(unsigned char)s[i]);
col += 4 * clen;
}
s += clen;
len -= clen;
}
(void)putchar('$');
(void)putchar('\n');
if (ferror(stdout))
err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
if (col + 1 >= termwidth)
fprintf(outfile, "\\\n");
(void)fputc('$', outfile);
(void)fputc('\n', outfile);
if (ferror(outfile))
errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
}
static inline int
regexec_e(regex_t *preg, const char *string, int eflags, int nomatch, size_t slen)
static __inline int
regexec_e(regex_t *preg, const char *string, int eflags, int nomatch,
size_t slen)
{
int eval;
#ifndef REG_STARTEND
char *buf;
#endif
if (preg == NULL) {
if (defpreg == NULL)
err(FATAL, "first RE may not be empty");
errx(1, "first RE may not be empty");
} else
defpreg = preg;
/* Set anchors, discounting trailing newline (if any). */
if (slen > 0 && string[slen - 1] == '\n')
slen--;
#ifndef REG_STARTEND
if ((buf = malloc(slen + 1)) == NULL)
err(1, NULL);
(void)memcpy(buf, string, slen);
buf[slen] = '\0';
eval = regexec(defpreg, buf,
nomatch ? 0 : maxnsub + 1, match, eflags);
free(buf);
#else
/* Set anchors */
match[0].rm_so = 0;
match[0].rm_eo = slen;
match[0].rm_eo = (regoff_t)slen;
eval = regexec(defpreg, string,
nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
#endif
switch(eval) {
case 0:
return (1);
case REG_NOMATCH:
return (0);
}
err(FATAL, "RE error: %s", strregerror(eval, defpreg));
errx(1, "RE error: %s", strregerror(eval, defpreg));
/* NOTREACHED */
return (0);
}
/*
@ -599,14 +685,15 @@ regexec_e(regex_t *preg, const char *string, int eflags, int nomatch, size_t sle
static void
regsub(SPACE *sp, char *string, char *src)
{
int len, no;
size_t len;
int no;
char c, *dst;
#define NEEDSP(reqlen) \
/* XXX What is the +1 for? */ \
if (sp->len + (reqlen) + 1 >= sp->blen) { \
size_t newlen = sp->blen + (reqlen) + 1024; \
sp->space = sp->back = xrealloc(sp->back, newlen); \
sp->blen = newlen; \
sp->blen += (reqlen) + 1024; \
sp->space = sp->back = xrealloc(sp->back, sp->blen); \
dst = sp->space + sp->len; \
}
@ -619,13 +706,13 @@ regsub(SPACE *sp, char *string, char *src)
else
no = -1;
if (no < 0) { /* Ordinary character. */
if (c == '\\' && (*src == '\\' || *src == '&'))
c = *src++;
if (c == '\\' && (*src == '\\' || *src == '&'))
c = *src++;
NEEDSP(1);
*dst++ = c;
*dst++ = c;
++sp->len;
} else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
len = match[no].rm_eo - match[no].rm_so;
} else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
len = (size_t)(match[no].rm_eo - match[no].rm_so);
NEEDSP(len);
memmove(dst, string + match[no].rm_so, len);
dst += len;
@ -637,9 +724,9 @@ regsub(SPACE *sp, char *string, char *src)
}
/*
* aspace --
* Append the source space to the destination space, allocating new
* space as necessary.
* cspace --
* Concatenate space: append the source space to the destination space,
* allocating new space as necessary.
*/
void
cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag)
@ -649,9 +736,8 @@ cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag)
/* Make sure SPACE has enough memory and ramp up quickly. */
tlen = sp->len + len + 1;
if (tlen > sp->blen) {
size_t newlen = tlen + 1024;
sp->space = sp->back = xrealloc(sp->back, newlen);
sp->blen = newlen;
sp->blen = tlen + 1024;
sp->space = sp->back = xrealloc(sp->back, sp->blen);
}
if (spflag == REPLACE)
@ -673,13 +759,12 @@ cfclose(struct s_command *cp, struct s_command *end)
switch(cp->code) {
case 's':
if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
err(FATAL,
"%s: %s", cp->u.s->wfile, strerror(errno));
err(1, "%s", cp->u.s->wfile);
cp->u.s->wfd = -1;
break;
case 'w':
if (cp->u.fd != -1 && close(cp->u.fd))
err(FATAL, "%s: %s", cp->t, strerror(errno));
err(1, "%s", cp->t);
cp->u.fd = -1;
break;
case '{':

View File

@ -1,5 +1,4 @@
.\" $NetBSD: sed.1,v 1.32 2013/05/29 15:05:43 wiz Exp $
.\"
.\" $NetBSD: sed.1,v 1.33 2014/06/06 00:13:13 christos Exp $
.\" Copyright (c) 1992, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
@ -14,7 +13,7 @@
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 3. Neither the name of the University nor the names of its contributors
.\" 4. Neither the name of the University nor the names of its contributors
.\" may be used to endorse or promote products derived from this software
.\" without specific prior written permission.
.\"
@ -31,8 +30,9 @@
.\" SUCH DAMAGE.
.\"
.\" @(#)sed.1 8.2 (Berkeley) 12/30/93
.\" $FreeBSD: head/usr.bin/sed/sed.1 259132 2013-12-09 18:57:20Z eadler $
.\"
.Dd May 29, 2013
.Dd December 9, 2013
.Dt SED 1
.Os
.Sh NAME
@ -40,14 +40,16 @@
.Nd stream editor
.Sh SYNOPSIS
.Nm
.Op Fl aEnr
.Op Fl Ealnr
.Ar command
.Op Ar file ...
.Op Ar
.Nm
.Op Fl aEnr
.Op Fl Ealnr
.Op Fl e Ar command
.Op Fl f Ar command_file
.Op Ar file ...
.Op Fl I Op Ar extension
.Op Fl i Op Ar extension
.Op Ar
.Sh DESCRIPTION
The
.Nm
@ -67,6 +69,12 @@ regardless of their origin.
.Pp
The following options are available:
.Bl -tag -width indent
.It Fl E
Interpret regular expressions as extended (modern) regular expressions
rather than basic regular expressions (BRE's).
The
.Xr re_format 7
manual page fully describes both formats.
.It Fl a
The files listed as parameters for the
.Dq w
@ -79,19 +87,58 @@ option causes
to delay opening each file until a command containing the related
.Dq w
function is applied to a line of input.
.It Fl E
Enables the use of extended regular expressions instead of the
usual basic regular expression syntax.
.It Fl e Ar command
Append the editing commands specified by the
.Ar command
argument
to the list of commands.
.It Fl f Ar command_file
.It Fl f Op Ar command_file
Append the editing commands found in the file
.Ar command_file
to the list of commands.
The editing commands should each be listed on a separate line.
.It Fl I Op Ar extension
Edit files in-place, saving backups with the specified
.Ar extension .
If no
.Ar extension
is given, no backup will be saved.
It is not recommended to give a zero-length
.Ar extension
when in-place editing files, as you risk corruption or partial content
in situations where disk space is exhausted, etc.
.Pp
Note that in-place editing with
.Fl I
still takes place in a single continuous line address space covering
all files, although each file preserves its individuality instead of
forming one output stream.
The line counter is never reset between files, address ranges can span
file boundaries, and the
.Dq $
address matches only the last line of the last file.
(See
.Sx "Sed Addresses" . )
That can lead to unexpected results in many cases of in-place editing,
where using
.Fl i
is desired.
.It Fl i Ar extension
Edit files in-place similarly to
.Fl I ,
but treat each file independently from other files.
In particular, line numbers in each file start at 1,
the
.Dq $
address matches the last line of the current file,
and address ranges are limited to the current file.
(See
.Sx "Sed Addresses" . )
The net result is as though each file were edited by a separate
.Nm
instance.
.It Fl l
Make output line buffered.
.It Fl n
By default, each line of input is echoed to the standard output after
all of the commands have been applied to it.
@ -99,17 +146,17 @@ The
.Fl n
option suppresses this behavior.
.It Fl r
Identical to
.Fl E ,
present for compatibility with GNU sed.
Same as
.Fl E
for compatibility with GNU sed.
.El
.Pp
The form of a
.Nm
command is as follows:
.sp
.Pp
.Dl [address[,address]]function[arguments]
.sp
.Pp
Whitespace may be inserted before the first address and the function
portions of the command.
.Pp
@ -128,28 +175,57 @@ deletes the pattern space.
Some of the functions use a
.Em "hold space"
to save all or part of the pattern space for subsequent retrieval.
.Sh SED ADDRESSES
An address is not required, but if specified must be a number (that counts
.Sh "Sed Addresses"
An address is not required, but if specified must have one of the
following formats:
.Bl -bullet -offset indent
.It
a number that counts
input lines
cumulatively across input files), a dollar
.Po
.Dq $
.Pc
character that addresses the last line of input, or a context address
(which consists of a regular expression preceded and followed by a
delimiter).
cumulatively across input files (or in each file independently
if a
.Fl i
option is in effect);
.It
a dollar
.Pq Dq $
character that addresses the last line of input (or the last line
of the current file if a
.Fl i
option was specified);
.It
a context address
that consists of a regular expression preceded and followed by a
delimiter. The closing delimiter can also optionally be followed by the
.Dq i
character, to indicate that the regular expression is to be matched
in a case-insensitive way.
.El
.Pp
A command line with no addresses selects every pattern space.
.Pp
A command line with one address selects all of the pattern spaces
that match the address.
.Pp
A command line with two addresses selects the inclusive range from
the first pattern space that matches the first address through the next
pattern space that matches the second.
(If the second address is a number less than or equal to the line number
first selected, only that line is selected.)
Starting at the first line following the selected range,
A command line with two addresses selects an inclusive range.
This
range starts with the first pattern space that matches the first
address.
The end of the range is the next following pattern space
that matches the second address.
If the second address is a number
less than or equal to the line number first selected, only that
line is selected.
The number in the second address may be prefixed with a
.Pq Dq \&+
to specify the number of lines to match after the first pattern.
In the case when the second address is a context
address,
.Nm
does not re-match the second address against the
pattern space that matched the first address.
Starting at the
first line following the selected range,
.Nm
starts looking again for the first address.
.Pp
@ -157,38 +233,45 @@ Editing commands can be applied to non-selected pattern spaces by use
of the exclamation character
.Pq Dq \&!
function.
.Sh SED REGULAR EXPRESSIONS
The
.Nm
regular expressions are basic regular expressions (BRE's, see
.Sh "Sed Regular Expressions"
The regular expressions used in
.Nm ,
by default, are basic regular expressions (BREs, see
.Xr re_format 7
for more information).
for more information), but extended (modern) regular expressions can be used
instead if the
.Fl E
flag is given.
In addition,
.Nm
has the following two additions to BRE's:
.sp
has the following two additions to regular expressions:
.Pp
.Bl -enum -compact
.It
In a context address, any character other than a backslash
.Po
.Dq \e
.Pc
or newline character may be used to delimit the regular expression
by prefixing the first use of that delimiter with a backslash.
.Pq Dq \e
or newline character may be used to delimit the regular expression.
The opening delimiter needs to be preceded by a backslash
unless it is a slash.
For example, the context address
.Li \exabcx
is equivalent to
.Li /abc/ .
Also, putting a backslash character before the delimiting character
causes the character to be treated literally.
For example, in the context address \exabc\exdefx, the RE delimiter
is an
within the regular expression causes the character to be treated literally.
For example, in the context address
.Li \exabc\exdefx ,
the RE delimiter is an
.Dq x
and the second
.Dq x
stands for itself, so that the regular expression is
.Dq abcxdef .
.sp
.Pp
.It
The escape sequence \en matches a newline character embedded in the
pattern space.
You can't, however, use a literal newline character in an address or
You cannot, however, use a literal newline character in an address or
in the substitute command.
.El
.Pp
@ -196,8 +279,8 @@ One special feature of
.Nm
regular expressions is that they can default to the last regular
expression used.
If a regular expression is empty, with nothing between the delimiter
characters, the last regular expression encountered is used instead.
If a regular expression is empty, i.e., just the delimiter characters
are specified, the last regular expression encountered is used instead.
The last regular expression is defined as the last regular expression
used as part of an address or substitute command, and at run-time, not
compile-time.
@ -207,7 +290,7 @@ will substitute
.Dq XXX
for the pattern
.Dq abc .
.Sh SED FUNCTIONS
.Sh "Sed Functions"
In the following list of commands, the maximum number of permissible
addresses for each command is indicated by [0addr], [1addr], or [2addr],
representing zero, one, or two addresses.
@ -261,73 +344,69 @@ can be preceded by white space and can be followed by white space.
The function can be preceded by white space.
The terminating
.Dq }
must be preceded by a newline (and optionally white space).
.sp
must be preceded by a newline, and may also be preceded by white space.
.Pp
.Bl -tag -width "XXXXXX" -compact
.It [2addr] function-list
Execute function-list only when the pattern space is selected.
.sp
.Pp
.It [1addr]a\e
.It text
.br
Write
.Em text
to standard output immediately before each attempt to read a line of input,
whether by executing the
.Dq N
function or by beginning a new cycle.
.sp
.Pp
.It [2addr]b[label]
Branch to the
.Dq \&:
function with the specified label.
If the label is not specified, branch to the end of the script.
.sp
.Pp
.It [2addr]c\e
.It text
.br
Delete the pattern space.
With 0 or 1 address or at the end of a 2-address range,
.Em text
is written to the standard output.
Start the next cycle.
.sp
.Pp
.It [2addr]d
Delete the pattern space and start the next cycle.
.sp
.Pp
.It [2addr]D
Delete the initial segment of the pattern space through the first
newline character and start the next cycle.
.sp
.Pp
.It [2addr]g
Replace the contents of the pattern space with the contents of the
hold space.
.sp
.Pp
.It [2addr]G
Append a newline character followed by the contents of the hold space
to the pattern space.
.sp
.Pp
.It [2addr]h
Replace the contents of the hold space with the contents of the
pattern space.
.sp
.Pp
.It [2addr]H
Append a newline character followed by the contents of the pattern space
to the hold space.
.sp
.Pp
.It [1addr]i\e
.It text
.br
Write
.Em text
to the standard output.
.sp
.Pp
.It [2addr]l
(The letter ell.)
Write the pattern space to the standard output in a visually unambiguous
form.
This form is as follows:
.sp
.Pp
.Bl -tag -width "carriage-returnXX" -offset indent -compact
.It backslash
\e\e
@ -335,8 +414,6 @@ This form is as follows:
\ea
.It form-feed
\ef
.It newline
\en
.It carriage-return
\er
.It tab
@ -352,28 +429,28 @@ Long lines are folded, with the point of folding indicated by displaying
a backslash followed by a newline.
The end of each line is marked with a
.Dq $ .
.sp
.Pp
.It [2addr]n
Write the pattern space to the standard output if the default output has
not been suppressed, and replace the pattern space with the next line of
input. (Does not begin a new cycle.)
.sp
input.
.Pp
.It [2addr]N
Append the next line of input to the pattern space, using an embedded
newline character to separate the appended material from the original
contents.
Note that the current line number changes.
.sp
.Pp
.It [2addr]p
Write the pattern space to standard output.
.sp
.Pp
.It [2addr]P
Write the pattern space, up to the first newline character to the
standard output.
.sp
.Pp
.It [1addr]q
Branch to the end of the script and quit without starting a new cycle.
.sp
.Pp
.It [1addr]r file
Copy the contents of
.Em file
@ -383,7 +460,7 @@ If
.Em file
cannot be read for any reason, it is silently ignored and no error
condition is set.
.sp
.Pp
.It [2addr]s/regular expression/replacement/flags
Substitute the replacement string for the first instance of the regular
expression in the pattern space.
@ -393,12 +470,10 @@ Within the RE and the replacement, the RE delimiter itself can be used as
a literal character if it is preceded by a backslash.
.Pp
An ampersand
.Po
.Dq \*[Am]
.Pc
.Pq Dq &
appearing in the replacement is replaced by the string matching the RE.
The special meaning of
.Dq \*[Am]
.Dq &
in this context can be suppressed by preceding it by a backslash.
The string
.Dq \e# ,
@ -416,9 +491,10 @@ The value of
.Em flags
in the substitute function is zero or more of the following:
.Bl -tag -width "XXXXXX" -offset indent
.It "0 ... 9"
Make the substitution only for the N'th occurrence of the regular
expression in the pattern space.
.It Ar N
Make the substitution only for the
.Ar N Ns 'th
occurrence of the regular expression in the pattern space.
.It g
Make the substitution for all non-overlapping matches of the
regular expression, not just the first one.
@ -432,8 +508,10 @@ Append the pattern space to
if a replacement was made.
If the replacement string is identical to that which it replaces, it
is still considered to have been a replacement.
.It i or I
Match the regular expression in a case-insensitive way.
.El
.sp
.Pp
.It [2addr]t [label]
Branch to the
.Dq \&:
@ -442,14 +520,14 @@ most recent reading of an input line or execution of a
.Dq t
function.
If no label is specified, branch to the end of the script.
.sp
.Pp
.It [2addr]w Em file
Append the pattern space to the
.Em file .
.sp
.Pp
.It [2addr]x
Swap the contents of the pattern and hold spaces.
.sp
.Pp
.It [2addr]y/string1/string2/
Replace all occurrences of characters in
.Em string1
@ -464,27 +542,27 @@ and
a backslash followed by any character other than a newline is that literal
character, and a backslash followed by an ``n'' is replaced by a newline
character.
.sp
.Pp
.It [2addr]!function
.It [2addr]!function-list
Apply the function or function-list only to the lines that are
.Em not
selected by the address(es).
.sp
.Pp
.It [0addr]:label
This function does nothing; it bears a label to which the
.Dq b
and
.Dq t
commands may branch.
.sp
.Pp
.It [1addr]=
Write the line number to the standard output followed by a newline
character.
.sp
.Pp
.It [0addr]
Empty lines are ignored.
.sp
.Pp
.It [0addr]#
The
.Dq #
@ -496,25 +574,64 @@ This is the same as specifying the
.Fl n
option on the command line.
.El
.Pp
.Sh ENVIRONMENT
The
.Ev COLUMNS , LANG , LC_ALL , LC_CTYPE
and
.Ev LC_COLLATE
environment variables affect the execution of
.Nm
utility exits 0 on success and \*[Gt]0 if an error occurs.
as described in
.Xr environ 7 .
.Sh EXIT STATUS
.Ex -std
.Sh SEE ALSO
.Xr awk 1 ,
.Xr ed 1 ,
.Xr grep 1 ,
.Xr tr 1 ,
.Xr regex 3 ,
.Xr re_format 7
.Sh STANDARDS
The
.Nm
function is expected to be a superset of the
utility is expected to be a superset of the
.St -p1003.2
specification.
.Pp
The
.Fl E , I , a
and
.Fl i
options, the prefixing
.Dq \&+
in the second member of an address range,
as well as the
.Dq I
flag to the address regular expression and substitution command are
non-standard
.Fx
extensions and may not be available on other operating systems.
.Sh HISTORY
A
.Nm
command appeared in
command, written by
.An L. E. McMahon ,
appeared in
.At v7 .
.Sh AUTHORS
.An "Diomidis D. Spinellis" Aq dds@FreeBSD.org
.Sh BUGS
Multibyte characters containing a byte with value 0x5C
.Tn ( ASCII
.Ql \e )
may be incorrectly treated as line continuation characters in arguments to the
.Dq a ,
.Dq c
and
.Dq i
commands.
Multibyte characters cannot be used as delimiters with the
.Dq s
and
.Dq y
commands.