handle conflicts between grep-2-3 and grep-2-4

This commit is contained in:
wiz 2000-02-27 00:43:38 +00:00
parent 63737a9697
commit 9e6b558cde
5 changed files with 936 additions and 714 deletions

View File

@ -1,243 +0,0 @@
--------------------------------------------------------------------
> François Pinard mailto:pinard@iro.umontreal.ca
> I have a collection of DOS-abling patches for Autoconf which came from
> Eli, I think, and which you may fetch from:
>
> http://www.iro.umontreal.ca/contrib/paxutils/dist/PATCHES-AC
>
> if I'm not mistaken. There are a few other mods not related to DOS ports.
--------------------------------------------------------------------
> On Sun, 22 Nov 1998, Tim Rice(tim@trr.metro.net wrote:
> Here is a patch for Autoconf version 2.12 to address a couple of
> issues on SVR4.2 machines.
>
> It was failing the test for opendir() because opendir() is in libc
> not in libdir. The patch now checks libc first then libdir.
>
> It was failing on gethostbyname(). I added a third test for gethostbynane()
> in libsocket.
>
--------------------------------------------------------------------
1999-01-27
* acgeneral.m4: added support for DJGPP
- "test -f" should be replaced by "test -x" where the script
looks for an executable binary which will be called gcc.exe
etc. on DOS/Windows. A simple test for $COMSPEC being not
defined is suggested as a means to determine which option is
required. (You cannot use "test -x" on all systems because some
variants of Unix shell don't support -x.)
- File patterns used to distinguish between absolute and relative
file names need to be changed as follows:
/*) --> /*|[A-z]:/*)
[/$]* --> [/$]*/[A-z]:/*)
This is because absolute file names on DOS/Windows may include a
drive letter and a colon before the leading slash. Note that the
lower-case `z' in [A-z] is intentional, since some network
clients allow filesystems to be identified by the six letters
between the uppercase `Z' and lowercase `a', and because the
drive letter can come in either letter-case.
- The line which computes ac_file_inputs by replacing the colon in
foo:foo.in needs its two Sed commands to be swapped, because
once you put in the $ac_given_srcdir part, the colon after the
drive letter in it will be replaced instead of the original
colon.
From Eli Zarestskii.
* acspecific.m4: check opendir in libc before -ldir
gethosbyname/connect check in -lsocket also
From Tim Rice.
diff -ur ../autoconf-2.13.orig/acgeneral.m4 ./acgeneral.m4
--- ../autoconf-2.13.orig/acgeneral.m4 Tue Jan 5 08:27:37 1999
+++ ./acgeneral.m4 Tue Feb 9 22:27:34 1999
@@ -636,6 +636,9 @@
esac
done
+# Support of DJGPP port of bash.
+if test -n "$COMSPEC$ComSpec"; then ac_x=-x; else ac_x=-f; fi
+
# NLS nuisances.
# Only set these to C if already set. These must not be set unconditionally
# because not all systems understand e.g. LANG=C (notably SCO).
@@ -1213,7 +1216,7 @@
dnl AC_MSG_WARN(PROBLEM-DESCRIPTION)
define(AC_MSG_WARN,
-[echo "configure: warning: $1" 1>&2])
+[echo "configure: WARNING: $1" 1>&2])
dnl AC_MSG_ERROR(ERROR-DESCRIPTION)
define(AC_MSG_ERROR,
@@ -1304,7 +1307,7 @@
dnl AC_OBSOLETE(THIS-MACRO-NAME [, SUGGESTION])
define(AC_OBSOLETE,
-[errprint(__file__:__line__: warning: [$1] is obsolete[$2]
+[errprint(__file__:__line__: WARNING: [$1] is obsolete[$2]
)])
@@ -1330,7 +1333,7 @@
ac_dummy="ifelse([$5], , $PATH, [$5])"
for ac_dir in $ac_dummy; do
test -z "$ac_dir" && ac_dir=.
- if test -f $ac_dir/$ac_word; then
+ if test $ac_x $ac_dir/$ac_word; then
ifelse([$6], , , dnl
[ if test "[$ac_dir/$ac_word]" = "$6"; then
ac_prog_rejected=yes
@@ -1384,7 +1387,9 @@
AC_MSG_CHECKING([for $ac_word])
AC_CACHE_VAL(ac_cv_path_$1,
[case "[$]$1" in
- /*)
+changequote(, )dnl
+ /*|[A-z]:/*)
+changequote([, ])dnl
ac_cv_path_$1="[$]$1" # Let the user override the test with a path.
;;
?:/*)
@@ -1398,7 +1403,7 @@
ac_dummy="ifelse([$4], , $PATH, [$4])"
for ac_dir in $ac_dummy; do
test -z "$ac_dir" && ac_dir=.
- if test -f $ac_dir/$ac_word; then
+ if test $ac_x $ac_dir/$ac_word; then
ac_cv_path_$1="$ac_dir/$ac_word"
break
fi
@@ -1802,7 +1807,7 @@
AC_DEFUN(AC_TRY_RUN,
[if test "$cross_compiling" = yes; then
ifelse([$4], ,
- [errprint(__file__:__line__: warning: [AC_TRY_RUN] called without default to allow cross compiling
+ [errprint(__file__:__line__: WARNING: [AC_TRY_RUN] called without default to allow cross compiling
)dnl
AC_MSG_ERROR(can not run test program while cross compiling)],
[$4])
@@ -2267,7 +2272,10 @@
.) srcdir=.
if test -z "$ac_dots"; then top_srcdir=.
else top_srcdir=`echo $ac_dots|sed 's%/$%%'`; fi ;;
- /*) srcdir="$ac_given_srcdir$ac_dir_suffix"; top_srcdir="$ac_given_srcdir" ;;
+changequote(, )dnl
+ /*|[A-z]:/*)
+changequote([, ])dnl
+ srcdir="$ac_given_srcdir$ac_dir_suffix"; top_srcdir="$ac_given_srcdir" ;;
*) # Relative path.
srcdir="$ac_dots$ac_given_srcdir$ac_dir_suffix"
top_srcdir="$ac_dots$ac_given_srcdir" ;;
@@ -2276,7 +2284,7 @@
ifdef([AC_PROVIDE_AC_PROG_INSTALL],
[ case "$ac_given_INSTALL" in
changequote(, )dnl
- [/$]*) INSTALL="$ac_given_INSTALL" ;;
+ [/$]*|[A-z]:/*) INSTALL="$ac_given_INSTALL" ;;
changequote([, ])dnl
*) INSTALL="$ac_dots$ac_given_INSTALL" ;;
esac
@@ -2291,7 +2299,7 @@
*) ac_comsub= ;;
esac
- ac_file_inputs=`echo $ac_file_in|sed -e "s%^%$ac_given_srcdir/%" -e "s%:% $ac_given_srcdir/%g"`
+ ac_file_inputs=`echo $ac_file_in|sed -e "s%:% $ac_given_srcdir/%g" -e "s%^%$ac_given_srcdir/%"`
sed -e "$ac_comsub
s%@configure_input@%$configure_input%g
s%@srcdir@%$srcdir%g
@@ -2359,7 +2367,7 @@
echo creating $ac_file
rm -f conftest.frag conftest.in conftest.out
- ac_file_inputs=`echo $ac_file_in|sed -e "s%^%$ac_given_srcdir/%" -e "s%:% $ac_given_srcdir/%g"`
+ ac_file_inputs=`echo $ac_file_in|sed -e "s%:% $ac_given_srcdir/%g" -e "s%^%$ac_given_srcdir/%"`
cat $ac_file_inputs > conftest.in
EOF
@@ -2483,7 +2491,7 @@
case "$srcdir" in
changequote(, )dnl
- [/$]*) ac_rel_source="$srcdir/$ac_source" ;;
+ [/$]*|[A-z]:/*) ac_rel_source="$srcdir/$ac_source" ;;
changequote([, ])dnl
*) ac_rel_source="$ac_dots$srcdir/$ac_source" ;;
esac
@@ -2558,7 +2566,9 @@
case "$srcdir" in
.) # No --srcdir option. We are building in place.
ac_sub_srcdir=$srcdir ;;
- /*) # Absolute path.
+changequote(, )dnl
+ /*|[A-z]:/*) # Absolute path.
+changequote([, ])dnl
ac_sub_srcdir=$srcdir/$ac_config_dir ;;
*) # Relative path.
ac_sub_srcdir=$ac_dots$srcdir/$ac_config_dir ;;
@@ -2579,14 +2589,16 @@
# Make the cache file name correct relative to the subdirectory.
case "$cache_file" in
- /*) ac_sub_cache_file=$cache_file ;;
+changequote(, )dnl
+ /*|[A-z]:/*) ac_sub_cache_file=$cache_file ;;
+changequote([, ])dnl
*) # Relative path.
ac_sub_cache_file="$ac_dots$cache_file" ;;
esac
ifdef([AC_PROVIDE_AC_PROG_INSTALL],
[ case "$ac_given_INSTALL" in
changequote(, )dnl
- [/$]*) INSTALL="$ac_given_INSTALL" ;;
+ [/$]*|[A-z]:/*) INSTALL="$ac_given_INSTALL" ;;
changequote([, ])dnl
*) INSTALL="$ac_dots$ac_given_INSTALL" ;;
esac
diff -ur ../autoconf-2.13.orig/acspecific.m4 ./acspecific.m4
--- ../autoconf-2.13.orig/acspecific.m4 Tue Jan 5 08:27:52 1999
+++ ./acspecific.m4 Thu Jan 28 23:01:41 1999
@@ -615,7 +615,7 @@
# Don't use installbsd from OSF since it installs stuff as root
# by default.
for ac_prog in ginstall scoinst install; do
- if test -f $ac_dir/$ac_prog; then
+ if test $ac_x $ac_dir/$ac_prog; then
if test $ac_prog = install &&
grep dspmsg $ac_dir/$ac_prog >/dev/null 2>&1; then
# AIX install. It has an incompatible calling convention.
@@ -765,7 +765,10 @@
[ac_header_dirent=$ac_hdr; break])
# Two versions of opendir et al. are in -ldir and -lx on SCO Xenix.
if test $ac_header_dirent = dirent.h; then
-AC_CHECK_LIB(dir, opendir, LIBS="$LIBS -ldir")
+AC_CHECK_FUNC(opendir)
+if test $ac_cv_func_opendir = no; then
+ AC_CHECK_LIB(dir, opendir, LIBS="$LIBS -ldir")
+fi
else
AC_CHECK_LIB(x, opendir, LIBS="$LIBS -lx")
fi
@@ -2564,6 +2567,9 @@
AC_CHECK_FUNC(gethostbyname)
if test $ac_cv_func_gethostbyname = no; then
AC_CHECK_LIB(nsl, gethostbyname, X_EXTRA_LIBS="$X_EXTRA_LIBS -lnsl")
+ if test $ac_cv_func_gethostbyname = no; then
+ AC_CHECK_LIB(socket, gethostbyname, X_EXTRA_LIBS="$X_EXTRA_LIBS -lsocket -lnsl", , -lnsl)
+ fi
fi
# lieder@skyler.mavd.honeywell.com says without -lsocket,
@@ -2575,7 +2581,7 @@
# We assume that if connect needs -lnsl, so does gethostbyname.
AC_CHECK_FUNC(connect)
if test $ac_cv_func_connect = no; then
- AC_CHECK_LIB(socket, connect, X_EXTRA_LIBS="-lsocket $X_EXTRA_LIBS", ,
+ AC_CHECK_LIB(socket, connect, X_EXTRA_LIBS="-lsocket -lnsl $X_EXTRA_LIBS", ,
$X_EXTRA_LIBS)
fi

View File

@ -1,24 +1,66 @@
.\" grep man page
.if !\n(.g \{\
. if !\w|\*(lq| \{\
. ds lq ``
. if \w'\(lq' .ds lq "\(lq
. \}
. if !\w|\*(rq| \{\
. ds rq ''
. if \w'\(rq' .ds rq "\(rq
. \}
.\}
.de Id
.ds Dt \\$4
..
.Id $Id: grep.1,v 1.2 1999/08/25 01:32:03 hubertf Exp $
.Id $Id: grep.1,v 1.3 2000/02/27 00:43:39 wiz Exp $
.TH GREP 1 \*(Dt "GNU Project"
.SH NAME
grep, egrep, fgrep \- print lines matching a pattern
.SH SYNOPSIS
.B grep
[-[AB] NUM] [-CEFGVabchiLlnqrsvwxyUu] [-e PATTERN | -f FILE]
[-d ACTION] [--directories=ACTION]
[--extended-regexp] [--fixed-strings] [--basic-regexp]
[--regexp=PATTERN] [--file=FILE] [--ignore-case] [--word-regexp]
[--line-regexp] [--line-regexp] [--no-messages] [--revert-match]
[--version] [--help] [--byte-offset] [--line-number]
[--with-filename] [--no-filename] [--quiet] [--silent] [--text]
[--files-without-match] [--files-with-matcces] [--count]
[--before-context=NUM] [--after-context=NUM] [--context]
[--binary] [--unix-byte-offsets] [--recursive]
.I files...
.RB [ \- [ ABC ]
.IR NUM ]
.RB [ \-EFGHLUVZabchilnqrsuvwxyz ]
.RB [ \-e
.I PATTERN
|
.B \-f
.IR FILE ]
.RB [ \-d
.IR ACTION ]
.RB [ \-\^\-directories=\fIACTION\fP ]
.RB [ \-\^\-extended-regexp ]
.RB [ \-\^\-fixed-strings ]
.RB [ \-\^\-basic-regexp ]
.RB [ \-\^\-regexp=\fIPATTERN\fP ]
.RB [ \-\^\-file=\fIFILE\fP ]
.RB [ \-\^\-ignore-case ]
.RB [ \-\^\-word-regexp ]
.RB [ \-\^\-line-regexp ]
.RB [ \-\^\-line-regexp ]
.RB [ \-\^\-no-messages ]
.RB [ \-\^\-invert-match ]
.RB [ \-\^\-version ]
.RB [ \-\^\-help ]
.RB [ \-\^\-byte-offset ]
.RB [ \-\^\-line-number ]
.RB [ \-\^\-with-filename ]
.RB [ \-\^\-no-filename ]
.RB [ \-\^\-quiet ]
.RB [ \-\^\-silent ]
.RB [ \-\^\-text ]
.RB [ \-\^\-files-without-match ]
.RB [ \-\^\-files-with-matches ]
.RB [ \-\^\-count ]
.RB [ \-\^\-before-context=\fINUM\fP ]
.RB [ \-\^\-after-context=\fINUM\fP ]
.RB [ \-\^\-context [ =\fINUM\fP ]]
.RB [ \-\^\-binary ]
.RB [ \-\^\-unix-byte-offsets ]
.RB [ \-\^\-mmap ]
.RB [ \-\^\-null ]
.RB [ \-\^\-recursive ]
.RI [ file .\|.\|.]
.SH DESCRIPTION
.PP
.B Grep
@ -39,80 +81,80 @@ There are three major variants of
controlled by the following options.
.PD 0
.TP
.B \-G, --basic-regexp
.BR \-G ", " \-\^\-basic-regexp
Interpret
.I pattern
as a basic regular expression (see below). This is the default.
.TP
.B \-E, --extended-regexp
.BR \-E ", " \-\^\-extended-regexp
Interpret
.I pattern
as an extended regular expression (see below).
.TP
.B \-F, --fixed-strings
.BR \-F ", " \-\^\-fixed-strings
Interpret
.I pattern
as a list of fixed strings, separated by newlines,
any of which is to be matched.
.LP
.PP
In addition, two variant programs
.B egrep
and
.B fgrep
are available.
.B Egrep
is similar (but not identical) to
.BR "grep\ \-E" ,
and is compatible with the historical Unix
.BR egrep .
is the same as
.BR "grep\ \-E" .
.B Fgrep
is the same as
.BR "grep\ \-F" .
.PD
.LP
.PP
All variants of
.B grep
understand the following options:
.PD 0
.TP
.BI \-A " NUM" ", --after-context=" NUM
.BI \-A " NUM" "\fR,\fP \-\^\-after-context=" NUM
Print
.I NUM
lines of trailing context after matching lines.
.TP
.BI \-B " NUM" ", --before-context=" NUM
.BI \-B " NUM" "\fR,\fP \-\^\-before-context=" NUM
Print
.I NUM
lines of leading context before matching lines.
.TP
.BI \-C ,\ --context"[=NUM]"
.BI \-C " \fR[\fPNUM\fR]\fP" "\fR,\fP \-\^\-context\fR[\fP=" NUM\fR]\fP
Print
.I NUM
lines (default 2) of output context.
.TP
.BI \- NUM \
Same as --context=NUM lines of leading and trailing context. However,
.BI \- NUM
Same as
.BI \-\^\-context= NUM
lines of leading and trailing context. However,
.B grep
will never print any given line more than once.
.TP
.B \-V, --version
.BR \-V ", " \-\^\-version
Print the version number of
.B grep
to standard error. This version number should
be included in all bug reports (see below).
.TP
.B \-b, --byte-offset
.BR \-b ", " \-\^\-byte-offset
Print the byte offset within the input file before
each line of output.
.TP
.B \-c, --count
.BR \-c ", " \-\^\-count
Suppress normal output; instead print a count of
matching lines for each input file.
With the
.B \-v, --revert-match
.BR \-v ", " \-\^\-invert-match
option (see below), count non-matching lines.
.TP
.BI \-d " ACTION" ", --directories=" ACTION
.BI \-d " ACTION" "\fR,\fP \-\^\-directories=" ACTION
If an input file is a directory, use
.I ACTION
to process it. By default,
@ -135,75 +177,78 @@ this is equivalent to the
.B \-r
option.
.TP
.BI \-e " PATTERN" ", --regexp=" PATTERN
.BI \-e " PATTERN" "\fR,\fP \-\^\-regexp=" PATTERN
Use
.I PATTERN
as the pattern; useful to protect patterns beginning with
.BR \- . May be specified more than once.
.TP
.BI \-f " FILE" ", --file=" FILE
.BI \-f " FILE" "\fR,\fP \-\^\-file=" FILE
Obtain patterns from
.IR FILE ,
one per line.
The empty file contains zero patterns, and therfore matches nothing.
.TP
.B \-h, --no-filename
.BR \-H ", " \-\^\-with-filename
Print the filename for each match.
.TP
.BR \-h ", " \-\^\-no-filename
Suppress the prefixing of filenames on output
when multiple files are searched.
.TP
.B \-i, --ignore-case
.BR \-i ", " \-\^\-ignore-case
Ignore case distinctions in both the
.I pattern
and the input files.
.TP
.B \-L, --files-without-match
.BR \-L ", " \-\^\-files-without-match
Suppress normal output; instead print the name
of each input file from which no output would
normally have been printed. The scanning will stop
normally have been printed. The scanning will stop
on the first match.
.TP
.B \-l, --files-with-matches
.BR \-l ", " \-\^\-files-with-matches
Suppress normal output; instead print
the name of each input file from which output
would normally have been printed. The scanning will
would normally have been printed. The scanning will
stop on the first match.
.TP
.B \-n, --line-number
.BR \-n ", " \-\^\-line-number
Prefix each line of output with the line number
within its input file.
.TP
.B \-q, --quiet, --silent
Quiet; suppress normal output. The scanning will stop
.BR \-q ", " \-\^\-quiet ", " \-\^\-silent
Quiet; suppress normal output. The scanning will stop
on the first match.
Also see the
.B \-s
or
.B --no-messages
.B \-\^\-no-messages
option below.
.TP
.B \-r, --recursive
.BR \-r ", " \-\^\-recursive
Read all files under each directory, recursively;
this is equivalent to the
.B "\-d recurse"
option.
.TP
.B \-s, --no-messages
.BR \-s ", " \-\^\-no-messages
Suppress error messages about nonexistent or unreadable files.
Portability note: unlike GNU
Portability note: unlike \s-1GNU\s0
.BR grep ,
BSD
traditional
.B grep
does not comply with POSIX.2, because BSD
did not conform to \s-1POSIX.2\s0, because traditional
.B grep
lacks a
lacked a
.B \-q
option and its
.B \-s
option behaves like GNU
option behaved like \s-1GNU\s0
.BR grep 's
.B \-q
option.
Shell scripts intended to be portable to BSD
Shell scripts intended to be portable to traditional
.B grep
should avoid both
.B \-q
@ -211,7 +256,7 @@ and
.B \-s
and should redirect output to /dev/null instead.
.TP
.B \-a, --text
.BR \-a ", " \-\^\-text
Do not suppress output lines that contain binary data.
Normally, if the first few bytes of a file indicate that
the file contains binary data,
@ -222,10 +267,10 @@ This option causes
to act as if the file is a text file,
even if it would otherwise be treated as binary.
.TP
.B \-v, --revert-match
.BR \-v ", " \-\^\-invert-match
Invert the sense of matching, to select non-matching lines.
.TP
.B \-w, --word-regexp
.BR \-w ", " \-\^\-word-regexp
Select only those lines containing matches that form whole words.
The test is that the matching substring must either be at the
beginning of the line, or preceded by a non-word constituent
@ -233,14 +278,14 @@ character. Similarly, it must be either at the end of the line
or followed by a non-word constituent character. Word-constituent
characters are letters, digits, and the underscore.
.TP
.B \-x, --line-regexp
.BR \-x ", " \-\^\-line-regexp
Select only those matches that exactly match the whole line.
.TP
.B \-y
Obsolete synonym for
.BR \-i .
.TP
.B \-U, --binary
.BR \-U ", " \-\^\-binary
Treat the file(s) as binary. By default, under MS-DOS and MS-Windows,
.BR grep
guesses the file type by looking at the contents of the first 32KB
@ -256,10 +301,11 @@ work correctly). Specifying
overrules this guesswork, causing all files to be read and passed to the
matching mechanism verbatim; if the file is a text file with CR/LF
pairs at the end of each line, this will cause some regular
expressions to fail. This option is only supported on MS-DOS and
expressions to fail.
This option has no effect on platforms other than MS-DOS and
MS-Windows.
.TP
.B \-u, --unix-byte-offsets
.BR \-u ", " \-\^\-unix-byte-offsets
Report Unix-style byte offsets. This switch causes
.B grep
to report byte offsets as if the file were Unix-style text file, i.e. with
@ -267,7 +313,41 @@ CR characters stripped off. This will produce results identical to running
.B grep
on a Unix machine. This option has no effect unless
.B \-b
option is also used; it is only supported on MS-DOS and MS-Windows.
option is also used;
it has no effect on platforms other than MS-DOS and MS-Windows.
.TP
.B \-\^\-mmap
If possible, use the
.BR mmap (2)
system call to read input, instead of
the default
.BR read (2)
system call. In some situations,
.B -\^-mmap
yields better performance. However,
.B -\^-mmap
can cause undefined behavior (including core dumps)
if an input file shrinks while
.B grep
is operating, or if an I/O error occurs.
.TP
.BR \-Z ", " \-\^\-null
Output a zero byte (the \s-1ASCII\s0
.B NUL
character) instead of the character that normally follows a file name.
For example,
.B "grep \-lZ"
outputs a zero byte after each file name instead of the usual newline.
This option makes the output unambiguous, even in the presence of file
names containing unusual characters like newlines. This option can be
used with commands like
.BR "find \-print0" ,
.BR "perl \-0" ,
.BR "sort \-z" ,
and
.B "xargs \-0"
to process arbitrary file names,
even those that contain newline characters.
.PD
.SH "REGULAR EXPRESSIONS"
.PP
@ -277,8 +357,8 @@ expressions, by using various operators to combine smaller expressions.
.PP
.B Grep
understands two different versions of regular expression syntax:
``basic'' and ``extended.'' In
.RB "GNU\ " grep ,
\*(lqbasic\*(rq and \*(lqextended.\*(rq In
.RB "\s-1GNU\s0\ " grep ,
there is no difference in available functionality using either syntax.
In other implementations, basic regular expressions are less powerful.
The following description applies to extended regular expressions;
@ -390,11 +470,6 @@ The preceding item is matched
.I n
or more times.
.TP
.BI {, m }
The preceding item is optional and is matched at most
.I m
times.
.TP
.BI { n , m }
The preceding item is matched at least
.I n
@ -444,12 +519,35 @@ versions
and
.BR \e) .
.PP
In
Traditional
.B egrep
the metacharacter
did not support the
.B {
loses its special meaning; instead use
.BR \e{ .
metacharacter, and some
.B egrep
implementations support
.B \e{
instead, so portable scripts should avoid
.B {
in
.B egrep
patterns and should use
.B [{]
to match a literal
.BR { .
.PP
\s-1GNU\s0
.B egrep
attempts to support traditional usage by assuming that
.B {
is not special if it would be the start of an invalid interval
specification. For example, the shell command
.B "egrep '{1'"
searches for the two-character string
.B {1
instead of reporting a syntax error in the regular expression.
\s-1POSIX.2\s0 allows this behavior as an extension, but portable scripts
should avoid it.
.SH DIAGNOSTICS
.PP
Normally, exit status is 0 if matches were found,
@ -463,7 +561,8 @@ other system errors.
.PP
Email bug reports to
.BR bug-gnu-utils@gnu.org .
Be sure to include the word ``grep'' somewhere in the ``Subject:'' field.
Be sure to include the word \*(lqgrep\*(rq somewhere in the
\*(lqSubject:\*(rq field.
.PP
Large repetition counts in the
.BI { m , n }
@ -475,3 +574,5 @@ and space, and may cause
to run out of memory.
.PP
Backreferences are very slow, and may require exponential time.
.\" Work around problems with some troff -man implementations.
.br

View File

@ -22,19 +22,20 @@
@defcodeindex op
@syncodeindex op fn
@syncodeindex vr fn
@ifinfo
@direntry
* grep: (grep). print lines matching a pattern.
@end direntry
This file documents @sc{grep}, a pattern matching engine.
This file documents @command{grep}, a pattern matching engine.
Published by the Free Software Foundation,
59 Temple Place - Suite 330
Boston, MA 02111-1307, USA
Copyright (C) 1998 Free Software Foundation, Inc.
Copyright 1999 Free Software Foundation, Inc.
Permission is granted to make and distribute verbatim copies of
this manual provided the copyright notice and this permission notice
@ -67,7 +68,7 @@ by the Foundation.
@page
@vskip 0pt plus 1filll
Copyright @copyright{} 1998 Free Software Foundation, Inc.
Copyright @copyright{} 1999 Free Software Foundation, Inc.
@sp 2
Published by the Free Software Foundation, @*
@ -92,43 +93,48 @@ by the Foundation.
@page
@node Top, Introduction, (dir), (dir)
@comment node-name, next, previous, up
@ifnottex
@node Top
@top Grep
@ifinfo
This document was produced for version @value{VERSION} of @sc{GNU} @sc{grep}.
@end ifinfo
@command{grep} searches for lines matching a pattern.
This document was produced for version @value{VERSION} of @sc{gnu}
@command{grep}.
@end ifnottex
@menu
* Introduction:: Introduction.
* Invoking:: Invoking @sc{grep}; description of options.
* Diagnostics:: Exit status returned by @sc{grep}.
* Grep Programs:: @sc{grep} programs.
* Invoking:: Invoking @command{grep}; description of options.
* Diagnostics:: Exit status returned by @command{grep}.
* Grep Programs:: @command{grep} programs.
* Regular Expressions:: Regular Expressions.
* Usage:: Examples.
* Reporting Bugs:: Reporting Bugs.
* Concept Index:: A menu with all the topics in this manual.
* Index:: A menu with all @sc{grep} commands
* Index:: A menu with all @command{grep} commands
and command-line options.
@end menu
@node Introduction, Invoking, Top, Top
@comment node-name, next, previous, up
@node Introduction
@chapter Introduction
@cindex Searching for a pattern.
@sc{grep} searches the input files for lines containing a match to a given
@command{grep} searches the input files
for lines containing a match to a given
pattern list. When it finds a match in a line, it copies the line to standard
output (by default), or does whatever other sort of output you have requested
with options. @sc{grep} expects to do the matching on text.
with options. @command{grep} expects to do the matching on text.
Since newline is also a separator for the list of patterns, there
is no way to match newline characters in a text.
@node Invoking, Diagnostics, Introduction, Top
@comment node-name, next, previous, up
@chapter Invoking @sc{grep}
@node Invoking
@chapter Invoking @command{grep}
@sc{grep} comes with a rich set of options from POSIX.2 and GNU extensions.
@command{grep} comes with a rich set of options from @sc{posix.2} and @sc{gnu}
extensions.
@table @samp
@ -138,7 +144,7 @@ is no way to match newline characters in a text.
@opindex -count
@cindex counting lines
Suppress normal output; instead print a count of matching
lines for each input file. With the @samp{-v}, @samp{--revert-match} option,
lines for each input file. With the @samp{-v}, @samp{--invert-match} option,
count non-matching lines.
@item -e @var{pattern}
@ -146,15 +152,15 @@ count non-matching lines.
@opindex -e
@opindex --regexp=@var{pattern}
@cindex pattern list
Use @var{pattern} as the pattern; useful to protect patterns
Use @var{pattern} as the pattern; useful to protect patterns
beginning with a @samp{-}.
@item -f @var{file}
@item -f @var{file}
@itemx --file=@var{file}
@opindex -f
@opindex --file
@opindex -f
@opindex --file
@cindex pattern from file
Obtain patterns from @var{file}, one per line. The empty
Obtain patterns from @var{file}, one per line. The empty
file contains zero patterns, and therefore matches nothing.
@item -i
@ -162,15 +168,15 @@ file contains zero patterns, and therefore matches nothing.
@opindex -i
@opindex --ignore-case
@cindex case insensitive search
Ignore case distinctions in both the pattern and the input files.
Ignore case distinctions in both the pattern and the input files.
@item -l
@itemx --files-with-matches
@opindex -l
@opindex --files-with-matches
@cindex names of matching files
Suppress normal output; instead print the name of each input
file from which output would normally have been printed.
Suppress normal output; instead print the name of each input
file from which output would normally have been printed.
The scanning of every file will stop on the first match.
@item -n
@ -178,7 +184,7 @@ The scanning of every file will stop on the first match.
@opindex -n
@opindex --line-number
@cindex line numbering
Prefix each line of output with the line number within its input file.
Prefix each line of output with the line number within its input file.
@item -q
@itemx --quiet
@ -187,7 +193,7 @@ Prefix each line of output with the line number within its input file.
@opindex --quiet
@opindex --silent
@cindex quiet, silent
Quiet; suppress normal output. The scanning of every file will stop on
Quiet; suppress normal output. The scanning of every file will stop on
the first match. Also see the @samp{-s} or @samp{--no-messages} option.
@item -s
@ -196,31 +202,32 @@ the first match. Also see the @samp{-s} or @samp{--no-messages} option.
@opindex --no-messages
@cindex suppress error messages
Suppress error messages about nonexistent or unreadable files.
Portability note: unlike GNU @sc{grep}, BSD @sc{grep} does not comply
with POSIX.2, because BSD @sc{grep} lacks a @samp{-q} option and its
@samp{-s} option behaves like GNU @sc{grep}'s @samp{-q} option. Shell
scripts intended to be portable to BSD @sc{grep} should avoid both
Portability note: unlike @sc{gnu} @command{grep}, traditional
@command{grep} did not conform to @sc{posix.2}, because traditional
@command{grep} lacked a @samp{-q} option and its @samp{-s} option behaved
like @sc{gnu} @command{grep}'s @samp{-q} option. Shell scripts intended
to be portable to traditional @command{grep} should avoid both
@samp{-q} and @samp{-s} and should redirect
output to @file{/dev/null} instead.
@item -v
@itemx --revert-match
@itemx --invert-match
@opindex -v
@opindex --revert-match
@cindex revert matching
@opindex --invert-match
@cindex invert matching
@cindex print non-matching lines
Invert the sense of matching, to select non-matching lines.
Invert the sense of matching, to select non-matching lines.
@item -x
@itemx --line-regexp
@opindex -x
@opindex --line-regexp
@cindex match the whole line
Select only those matches that exactly match the whole line.
Select only those matches that exactly match the whole line.
@end table
@section GNU Extensions
@section @sc{gnu} Extensions
@table @samp
@ -240,17 +247,17 @@ Print @var{num} lines of trailing context after matching lines.
@cindex context lines, before match
Print @var{num} lines of leading context before matching lines.
@item -C
@itemx --context@var{[=num]}
@item -C @var{num}
@itemx --context=[@var{num}]
@opindex -C
@opindex --context
@cindex context
Print @var{num} lines (default 2) of output context.
@item -NUM
@item -@var{num}
@opindex -NUM
Same as @samp{--context=@var{num}} lines of leading and trailing
Same as @samp{--context=@var{num}} lines of leading and trailing
context. However, grep will never print any given line more than once.
@ -259,8 +266,8 @@ context. However, grep will never print any given line more than once.
@opindex -V
@opindex --version
@cindex Version, printing
Print the version number of @sc{grep} to the standard output stream.
This version number should be included in all bug reports.
Print the version number of @command{grep} to the standard output stream.
This version number should be included in all bug reports.
@item --help
@opindex --help
@ -274,7 +281,8 @@ and the bug-reporting address, then exit.
@opindex --byte-offset
@cindex byte offset
Print the byte offset within the input file before each line of output.
When @sc{grep} runs on MS-DOS or MS-Windows, the printed byte offsets
When @command{grep} runs on @sc{ms-dos} or MS-Windows, the printed
byte offsets
depend on whether the @samp{-u} (@samp{--unix-byte-offsets}) option is
used; see below.
@ -283,15 +291,22 @@ used; see below.
@opindex -d
@opindex --directories
@cindex directory search
If an input file is a directory, use @var{action} to process it.
By default, @var{action} is @samp{read}, which means that directories are
read just as if they were ordinary files (some operating systems
and filesystems disallow this, and will cause @sc{grep} to print error
If an input file is a directory, use @var{action} to process it.
By default, @var{action} is @samp{read}, which means that directories are
read just as if they were ordinary files (some operating systems
and filesystems disallow this, and will cause @command{grep} to print error
messages for every directory). If @var{action} is @samp{skip},
directories are silently skipped. If @var{action} is @samp{recurse},
@sc{grep} reads all files under each directory, recursively; this is
@command{grep} reads all files under each directory, recursively; this is
equivalent to the @samp{-r} option.
@item -H
@itemx --with-filename
@opindex -H
@opindex --With-filename
@cindex with filename prefix
Print the filename for each match.
@item -h
@itemx --no-filename
@opindex -h
@ -304,9 +319,9 @@ Suppress the prefixing of filenames on output when multiple files are searched.
@opindex -L
@opindex --files-without-match
@cindex files which don't match
Suppress normal output; instead print the name of each input
file from which no output would normally have been printed.
The scanning of every file will stop on the first match.
Suppress normal output; instead print the name of each input
file from which no output would normally have been printed.
The scanning of every file will stop on the first match.
@item -a
@itemx --text
@ -314,14 +329,14 @@ The scanning of every file will stop on the first match.
@opindex --text
@cindex suppress binary data
@cindex binary files
Do not suppress output lines that contain binary data.
Normally, if the first few bytes of a file indicate
Do not suppress output lines that contain binary data.
Normally, if the first few bytes of a file indicate
that the file contains binary data, grep outputs only a
message saying that the file matches the pattern. This
option causes grep to act as if the file is a text
option causes grep to act as if the file is a text
file, even if it would otherwise be treated as binary.
@emph{Warning:} the result might be binary garbage
printed to the terminal, which can have nasty
@emph{Warning:} the result might be binary garbage
printed to the terminal, which can have nasty
side-effects if the terminal driver interprets some of
it as commands.
@ -330,12 +345,12 @@ it as commands.
@opindex -w
@opindex --word-regexp
@cindex matching whole words
Select only those lines containing matches that form
whole words. The test is that the matching substring
must either be at the beginning of the line, or preceded
Select only those lines containing matches that form
whole words. The test is that the matching substring
must either be at the beginning of the line, or preceded
by a non-word constituent character. Similarly,
it must be either at the end of the line or followed by
a non-word constituent character. Word-constituent
a non-word constituent character. Word-constituent
characters are letters, digits, and the underscore.
@item -r
@ -359,18 +374,18 @@ Obsolete synonym for @samp{-i}.
@opindex --binary
@cindex DOS/Windows binary files
@cindex binary files, DOS/Windows
Treat the file(s) as binary. By default, under MS-DOS
and MS-Windows, @sc{grep} guesses the file type by looking
at the contents of the first 32KB read from the file.
If @sc{grep} decides the file is a text file, it strips the
CR characters from the original file contents (to make
regular expressions with @code{^} and @code{$} work correctly).
Treat the file(s) as binary. By default, under @sc{ms-dos}
and MS-Windows, @command{grep} guesses the file type by looking
at the contents of the first 32kB read from the file.
If @command{grep} decides the file is a text file, it strips the
@code{CR} characters from the original file contents (to make
regular expressions with @code{^} and @code{$} work correctly).
Specifying @samp{-U} overrules this guesswork, causing all
files to be read and passed to the matching mechanism
verbatim; if the file is a text file with CR/LF pairs
at the end of each line, this will cause some regular
expressions to fail. This option is only supported on
MS-DOS and MS-Windows.
files to be read and passed to the matching mechanism
verbatim; if the file is a text file with @code{CR/LF} pairs
at the end of each line, this will cause some regular
expressions to fail. This option has no effect on platforms other than
@sc{ms-dos} and MS-Windows.
@item -u
@itemx --unix-byte-offsets
@ -378,38 +393,146 @@ MS-DOS and MS-Windows.
@opindex --unix-byte-offsets
@cindex DOS byte offsets
@cindex byte offsets, on DOS/Windows
Report Unix-style byte offsets. This switch causes
@sc{grep} to report byte offsets as if the file were Unix style
text file, i.e. the byte offsets ignore the CR characters which were
stripped off. This will produce results identical to running @sc{grep} on
a Unix machine. This option has no effect unless @samp{-b}
option is also used; it is only supported on MS-DOS and
Report Unix-style byte offsets. This switch causes
@command{grep} to report byte offsets as if the file were Unix style
text file, i.e., the byte offsets ignore the @code{CR} characters which were
stripped. This will produce results identical to running @command{grep} on
a Unix machine. This option has no effect unless @samp{-b}
option is also used; it has no effect on platforms other than @sc{ms-dos} and
MS-Windows.
@item --mmap
@opindex --mmap
@cindex memory mapped input
If possible, use the @code{mmap} system call to read input, instead of
the default @code{read} system call. In some situations, @samp{--mmap}
yields better performance. However, @samp{--mmap} can cause undefined
behavior (including core dumps) if an input file shrinks while
@command{grep} is operating, or if an I/O error occurs.
@item -Z
@itemx --null
@opindex -Z
@opindex --null
@cindex zero-terminated file names
Output a zero byte (the @sc{ascii} @code{NUL} character) instead of the
character that normally follows a file name. For example, @samp{grep
-lZ} outputs a zero byte after each file name instead of the usual
newline. This option makes the output unambiguous, even in the presence
of file names containing unusual characters like newlines. This option
can be used with commands like @samp{find -print0}, @samp{perl -0},
@samp{sort -z}, and @samp{xargs -0} to process arbitrary file names,
even those that contain newline characters.
@item -z
@itemx --null-data
@opindex -z
@opindex --null-data
@cindex zero-terminated lines
Treat the input as a set of lines, each terminated by a zero byte (the
@sc{ascii} @code{NUL} character) instead of a newline. Like the @samp{-Z}
or @samp{--null} option, this option can be used with commands like
@samp{sort -z} to process arbitrary file names.
@end table
Several additional options control which variant of the @sc{grep}
Several additional options control which variant of the @command{grep}
matching engine is used. @xref{Grep Programs}.
@sc{grep} uses the environment variable @var{LANG} to
provide internationalization support, if compiled with this feature.
@section Environment Variables
@node Diagnostics, Grep Programs, Invoking, Top
@comment node-name, next, previous, up
Grep's behavior is affected by the following environment variables.
@cindex environment variables
@table @code
@item GREP_OPTIONS
@vindex GREP_OPTIONS
@cindex default options environment variable
This variable specifies default options to be placed in front of any
explicit options. For example, if @code{GREP_OPTIONS} is @samp{--text
--directories=skip}, @command{grep} behaves as if the two options
@samp{--text} and @samp{--directories=skip} had been specified before
any explicit options. Option specifications are separated by
whitespace. A backslash escapes the next character, so it can be used to
specify an option containing whitespace or a backslash.
@item LC_ALL
@itemx LC_MESSAGES
@itemx LANG
@vindex LC_ALL
@vindex LC_MESSAGES
@vindex LANG
@cindex language of messages
@cindex message language
@cindex national language support
@cindex NLS
@cindex translation of message language
These variables specify the @code{LC_MESSAGES} locale, which determines
the language that @command{grep} uses for messages. The locale is determined
by the first of these variables that is set. American English is used
if none of these environment variables are set, or if the message
catalog is not installed, or if @command{grep} was not compiled with national
language support (@sc{nls}).
@item LC_ALL
@itemx LC_CTYPE
@itemx LANG
@vindex LC_ALL
@vindex LC_CTYPE
@vindex LANG
@cindex character type
@cindex national language support
@cindex NLS
These variables specify the @code{LC_CTYPE} locale, which determines the
type of characters, e.g., which characters are whitespace. The locale is
determined by the first of these variables that is set. The @sc{posix}
locale is used if none of these environment variables are set, or if the
locale catalog is not installed, or if @command{grep} was not compiled with
national language support (@sc{nls}).
@item POSIXLY_CORRECT
@vindex POSIXLY_CORRECT
If set, @command{grep} behaves as @sc{posix.2} requires; otherwise,
@command{grep} behaves more like other @sc{gnu} programs. @sc{posix.2}
requires that options that
follow file names must be treated as file names; by default, such
options are permuted to the front of the operand list and are treated as
options. Also, @sc{posix.2} requires that unrecognized options be
diagnosed as
``illegal'', but since they are not really against the law the default
is to diagnose them as ``invalid''. @code{POSIXLY_CORRECT} also
disables @code{_@var{N}_GNU_nonoption_argv_flags_}, described below.
@item _@var{N}_GNU_nonoption_argv_flags_
@vindex _@var{N}_GNU_nonoption_argv_flags_
(Here @code{@var{N}} is @command{grep}'s numeric process ID.) If the
@var{i}th character of this environment variable's value is @samp{1}, do
not consider the @var{i}th operand of @command{grep} to be an option, even if
it appears to be one. A shell can put this variable in the environment
for each command it runs, specifying which operands are the results of
file name wildcard expansion and therefore should not be treated as
options. This behavior is available only with the @sc{gnu} C library, and
only when @code{POSIXLY_CORRECT} is not set.
@end table
@node Diagnostics
@chapter Diagnostics
Normally, exit status is 0 if matches were found, and 1 if no matches
were found (the @samp{-v} option inverts the sense of the exit status).
Exit status is 2 if there were syntax errors in the pattern,
Exit status is 2 if there were syntax errors in the pattern,
inaccessible input files, or other system errors.
@node Grep Programs, Regular Expressions, Diagnostics, Top
@comment node-name, next, previous, up
@chapter @sc{grep} programs
@node Grep Programs
@chapter @command{grep} programs
@sc{grep} searches the named input files (or standard input if no
@command{grep} searches the named input files (or standard input if no
files are named, or the file name @file{-} is given) for lines containing
a match to the given pattern. By default, @sc{grep} prints the matching lines.
There are three major variants of @sc{grep}, controlled by the following options.
a match to the given pattern. By default, @command{grep} prints the
matching lines. There are three major variants of @command{grep},
controlled by the following options.
@table @samp
@ -418,14 +541,14 @@ There are three major variants of @sc{grep}, controlled by the following options
@opindex -G
@opindex --basic-regexp
@cindex matching basic regular expressions
Interpret pattern as a basic regular expression. This is the default.
Interpret pattern as a basic regular expression. This is the default.
@item -E
@item --extended-regexp
@itemx --extended-regexp
@opindex -E
@opindex --extended-regexp
@cindex matching extended regular expressions
Interpret pattern as an extended regular expression.
Interpret pattern as an extended regular expression.
@item -F
@ -439,38 +562,39 @@ by newlines, any of which is to be matched.
@end table
In addition, two variant programs @sc{egrep} and @sc{fgrep} are available.
@sc{egrep} is similar (but not identical) to @samp{grep -E}, and
is compatible with the historical Unix @sc{egrep}. @sc{fgrep} is the
@sc{egrep} is the same as @samp{grep -E}. @sc{fgrep} is the
same as @samp{grep -F}.
@node Regular Expressions, Reporting Bugs, Grep Programs, Top
@comment node-name, next, previous, up
@node Regular Expressions
@chapter Regular Expressions
@cindex regular expressions
A @dfn{regular expression} is a pattern that describes a set of strings.
A @dfn{regular expression} is a pattern that describes a set of strings.
Regular expressions are constructed analogously to arithmetic expressions,
by using various operators to combine smaller expressions.
@sc{grep} understands two different versions of regular expression
syntax: ``basic'' and ``extended''. In GNU @sc{grep}, there is no
difference in available functionality using either syntax.
In other implementations, basic regular expressions are less powerful.
The following description applies to extended regular expressions;
by using various operators to combine smaller expressions.
@command{grep} understands two different versions of regular expression
syntax: ``basic'' and ``extended''. In @sc{gnu} @command{grep}, there is no
difference in available functionality using either syntax.
In other implementations, basic regular expressions are less powerful.
The following description applies to extended regular expressions;
differences for basic regular expressions are summarized afterwards.
The fundamental building blocks are the regular expressions that match
The fundamental building blocks are the regular expressions that match
a single character. Most characters, including all letters and digits,
are regular expressions that match themselves. Any metacharacter
are regular expressions that match themselves. Any metacharacter
with special meaning may be quoted by preceding it with a backslash.
A list of characters enclosed by @samp{[} and @samp{]} matches any
single character in that list; if the first character of the list is the
caret @samp{^}, then it
matches any character @strong{not} in the list. For example, the regular
expression @samp{[0123456789]} matches any single digit.
A range of @sc{ascii} characters may be specified by giving the first
and last characters, separated by a hyphen. Finally, certain named
classes of characters are predefined. Their names are self explanatory,
and they are :
A range of @sc{ascii} characters may be specified by giving the first
and last characters, separated by a hyphen.
Finally, certain named classes of characters are predefined, as follows.
Their interpretation depends on the @code{LC_CTYPE} locale; the
interpretation below is that of the @sc{posix} locale, which is the default
if no @code{LC_CTYPE} locale is specified.
@cindex classes of characters
@cindex character classes
@ -478,21 +602,26 @@ and they are :
@item [:alnum:]
@opindex alnum
@cindex alphanumeric characters
Any of [:digit:] or [:alpha:]
@cindex alphanumeric characters
Any of @samp{[:digit:]} or @samp{[:alpha:]}
@item [:alpha:]
@opindex alpha
@cindex alphabetic characters
Any local-specific or one of the @sc{ascii} letters:@*
Any letter:@*
@code{a b c d e f g h i j k l m n o p q r s t u v w x y z},@*
@code{A B C D E F G H I J K L M N O P Q R S T U V W X Y Z}.
@item [:blank:]
@opindex blank
@cindex blank characters
Space or tab.
@item [:cntrl:]
@opindex cntrl
@cindex control characters
Any of @code{BEL}, @code{BS}, @code{CR}, @code{FF}, @code{HT},
@code{NL}, or @code{VT}.
Any character with octal codes 000 through 037, or @code{DEL} (octal
code 177).
@item [:digit:]
@opindex digit
@ -503,7 +632,7 @@ Any one of @code{0 1 2 3 4 5 6 7 8 9}.
@item [:graph:]
@opindex graph
@cindex graphic characters
Anything that is not a @samp{[:alphanum:]} or @samp{[:punct:]}.
Anything that is not a @samp{[:alnum:]} or @samp{[:punct:]}.
@item [:lower:]
@opindex lower
@ -514,13 +643,12 @@ Any one of @code{a b c d e f g h i j k l m n o p q r s t u v w x y z}.
@opindex print
@cindex printable characters
Any character from the @samp{[:space:]} class, and any character that is
@strong{not} in the @samp{[:isgraph:]} class.
@strong{not} in the @samp{[:graph:]} class.
@item [:punct:]
@opindex punct
@cindex punctuation characters
Any one of @code{!@: " #% & ' ( ) ; < = > ?@: [ \ ] * + , - .@: / : ^ _ @{ | @}}.
Any one of @code{!@: " # $ % & ' ( ) * + , - .@: / : ; < = > ?@: @@ [ \ ] ^ _ ` @{ | @} ~}.
@item [:space:]
@opindex space
@ -541,13 +669,13 @@ Any one of @code{a b c d e f A B C D E F 0 1 2 3 4 5 6 7 8 9}.
@end table
For example, @samp{[[:alnum:]]} means @samp{[0-9A-Za-z]}, except the latter
form is dependent upon the @sc{ascii} character encoding, whereas the
former is portable. (Note that the brackets in these class names are
part of the symbolic names, and must be included in addition to
the brackets delimiting the bracket list). Most metacharacters lose
form is dependent upon the @sc{ascii} character encoding, whereas the
former is portable. (Note that the brackets in these class names are
part of the symbolic names, and must be included in addition to
the brackets delimiting the bracket list.) Most metacharacters lose
their special meaning inside lists. To include a literal @samp{]}, place it
first in the list. Similarly, to include a literal @samp{^}, place it anywhere
but first. Finally, to include a literal @samp{-}, place it last.
but first. Finally, to include a literal @samp{-}, place it last.
The period @samp{.} matches any single character. The symbol @samp{\w}
is a synonym for @samp{[[:alnum:]]} and @samp{\W} is a synonym for
@ -555,12 +683,12 @@ is a synonym for @samp{[[:alnum:]]} and @samp{\W} is a synonym for
The caret @samp{^} and the dollar sign @samp{$} are metacharacters that
respectively match the empty string at the beginning and end
of a line. The symbols @samp{\<} and @samp{\>} respectively match the
of a line. The symbols @samp{\<} and @samp{\>} respectively match the
empty string at the beginning and end of a word. The symbol
@samp{\b} matches the empty string at the edge of a word, and @samp{\B}
matches the empty string provided it's not at the edge of a word.
@samp{\b} matches the empty string at the edge of a word, and @samp{\B}
matches the empty string provided it's not at the edge of a word.
A regular expression may be followed by one of several
A regular expression may be followed by one of several
repetition operators:
@ -595,12 +723,6 @@ The preceding item is matched exactly @var{n} times.
@cindex match sub-expression n or more times
The preceding item is matched n or more times.
@item @{,@var{m}@}
@opindex @{,m@}
@cindex braces, first argument omitted
@cindex match sub-expression at most m times
The preceding item is optional and is matched at most @var{m} times.
@item @{@var{n},@var{m}@}
@opindex @{n,m@}
@cindex braces, two arguments
@ -609,17 +731,17 @@ The preceding item is matched at least @var{n} times, but not more than
@end table
Two regular expressions may be concatenated; the resulting regular
Two regular expressions may be concatenated; the resulting regular
expression matches any string formed by concatenating two substrings
that respectively match the concatenated subexpressions.
that respectively match the concatenated subexpressions.
Two regular expressions may be joined by the infix operator @samp{|}; the
resulting regular expression matches any string matching either
Two regular expressions may be joined by the infix operator @samp{|}; the
resulting regular expression matches any string matching either
subexpression.
Repetition takes precedence over concatenation, which in turn
Repetition takes precedence over concatenation, which in turn
takes precedence over alternation. A whole subexpression may be
enclosed in parentheses to override these precedence rules.
enclosed in parentheses to override these precedence rules.
The backreference @samp{\@var{n}}, where @var{n} is a single digit, matches the
substring previously matched by the @var{n}th parenthesized subexpression
@ -631,40 +753,201 @@ In basic regular expressions the metacharacters @samp{?}, @samp{+},
instead use the backslashed versions @samp{\?}, @samp{\+}, @samp{\@{},
@samp{\|}, @samp{\(}, and @samp{\)}.
In @sc{egrep} the metacharacter @samp{@{} loses its special meaning;
instead use @samp{\@{}. This not true for @samp{grep -E}.
@cindex interval specifications
Traditional @command{egrep} did not support the @samp{@{} metacharacter,
and some @command{egrep} implementations support @samp{\@{} instead, so
portable scripts should avoid @samp{@{} in @samp{egrep} patterns and
should use @samp{[@{]} to match a literal @samp{@{}.
@sc{gnu} @command{egrep} attempts to support traditional usage by
assuming that @samp{@{} is not special if it would be the start of an
invalid interval specification. For example, the shell command
@samp{egrep '@{1'} searches for the two-character string @samp{@{1}
instead of reporting a syntax error in the regular expression.
@sc{posix.2} allows this behavior as an extension, but portable scripts
should avoid it.
@node Reporting Bugs, Concept Index, Regular Expressions, Top
@comment node-name, next, previous, up
@node Usage
@chapter Usage
@cindex Usage, examples
Here is an example shell command that invokes @sc{gnu} @command{grep}:
@example
grep -i 'hello.*world' menu.h main.c
@end example
@noindent
This lists all lines in the files @file{menu.h} and @file{main.c} that
contain the string @samp{hello} followed by the string @samp{world};
this is because @samp{.*} matches zero or more characters within a line.
@xref{Regular Expressions}. The @samp{-i} option causes @command{grep}
to ignore case, causing it to match the line @samp{Hello, world!}, which
it would not otherwise match. @xref{Invoking}, for more details about
how to invoke @command{grep}.
@cindex Using @command{grep}, Q&A
@cindex FAQ about @command{grep} usage
Here are some common questions and answers about @command{grep} usage.
@enumerate
@item
How can I list just the names of matching files?
@example
grep -l 'main' *.c
@end example
@noindent
lists the names of all C files in the current directory whose contents
mention @samp{main}.
@item
How do I search directories recursively?
@example
grep -r 'hello' /home/gigi
@end example
@noindent
searches for @samp{hello} in all files under the directory
@file{/home/gigi}. For more control of which files are searched, use
@command{find}, @command{grep} and @command{xargs}. For example,
the following command searches only C files:
@smallexample
find /home/gigi -name '*.c' -print | xargs grep 'hello' /dev/null
@end smallexample
@item
What if a pattern has a leading @samp{-}?
@example
grep -e '--cut here--' *
@end example
@noindent
searches for all lines matching @samp{--cut here--}. Without @samp{-e},
@command{grep} would attempt to parse @samp{--cut here--} as a list of
options.
@item
Suppose I want to search for a whole word, not a part of a word?
@example
grep -w 'hello' *
@end example
@noindent
searches only for instances of @samp{hello} that are entire words; it
does not match @samp{Othello}. For more control, use @samp{\<} and
@samp{\>} to match the start and end of words. For example:
@example
grep 'hello\>' *
@end example
@noindent
searches only for words ending in @samp{hello}, so it matches the word
@samp{Othello}.
@item
How do I output context around the matching lines?
@example
grep -C 2 'hello' *
@end example
@noindent
prints two lines of context around each matching line.
@item
How do I force grep to print the name of the file?
Append @file{/dev/null}:
@example
grep 'eli' /etc/passwd /dev/null
@end example
@item
Why do people use strange regular expressions on @command{ps} output?
@example
ps -ef | grep '[c]ron'
@end example
If the pattern had been written without the square brackets, it would
have matched not only the @command{ps} output line for @command{cron},
but also the @command{ps} output line for @command{grep}.
@item
Why does @command{grep} report ``Binary file matches''?
If @command{grep} listed all matching ``lines'' from a binary file, it
would probably generate output that is not useful, and it might even
muck up your display. So @sc{gnu} @command{grep} suppresses output from
files that appear to be binary files. To force @sc{gnu} @command{grep}
to output lines even from files that appear to be binary, use the
@samp{-a} or @samp{--text} option.
@item
Why doesn't @samp{grep -lv} print nonmatching file names?
@samp{grep -lv} lists the names of all files containing one or more
lines that do not match. To list the names of all files that contain no
matching lines, use the @samp{-L} or @samp{--files-without-match}
option.
@item
I can do @sc{or} with @samp{|}, but what about @sc{and}?
@example
grep 'paul' /etc/motd | grep 'franc,ois'
@end example
@noindent
finds all lines that contain both @samp{paul} and @samp{franc,ois}.
@item
How can I search in both standard input and in files?
Use the special file name @samp{-}:
@example
cat /etc/passwd | grep 'alain' - /etc/motd
@end example
@end enumerate
@node Reporting Bugs
@chapter Reporting bugs
@cindex Bugs, reporting
Email bug reports to @email{bug-gnu-utils@@gnu.org}.
Be sure to include the word ``grep'' somewhere in the ``Subject:'' field.
Large repetition counts in the @samp{@{m,n@}} construct may cause
@sc{grep} to use lots of memory. In addition, certain other
obscure regular expressions require exponential time and
Large repetition counts in the @samp{@{m,n@}} construct may cause
@command{grep} to use lots of memory. In addition, certain other
obscure regular expressions require exponential time and
space, and may cause grep to run out of memory.
Backreferences are very slow, and may require exponential time.
Backreferences are very slow, and may require exponential time.
@page
@node Concept Index , Index, Reporting Bugs, Top
@comment node-name, next, previous, up
@node Concept Index
@unnumbered Concept Index
This is a general index of all issues discussed in this manual, with the
exception of the @sc{grep} commands and command-line options.
exception of the @command{grep} commands and command-line options.
@printindex cp
@page
@node Index, , Concept Index, Top
@node Index
@unnumbered Index
This is an alphabetical list of all @sc{grep} commands and command-line
options.
This is an alphabetical list of all @command{grep} commands, command-line
options, and environment variables.
@printindex fn

View File

@ -1,52 +0,0 @@
/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <stdio.h>
#if ! defined(HAVE_WCHAR_H) || defined(__CYGWIN__)
typedef unsigned int wint_t;
# undef WEOF
# define WEOF ((wint_t)-1)
#else
#include <wchar.h>
#endif
#ifndef weak_alias
# define __btowc btowc
#endif
/* We use UTF8 encoding for multibyte strings and therefore a valid
one byte multibyte string only can have a value from 0 to 0x7f. */
wint_t
__btowc (c)
int c;
{
if (WEOF != (wint_t) EOF || c < 0 || c > 0x7f)
return WEOF;
else
return (wint_t) c;
}
#ifdef weak_alias
weak_alias (__btowc, btowc)
#endif

View File

@ -55,6 +55,13 @@ static int show_help;
/* If non-zero, print the version on standard output and exit. */
static int show_version;
/* If nonzero, use mmap if possible. */
static int mmap_option;
/* Short options. */
static char const short_options[] =
"0123456789A:B:C::EFGHUVX:abcd:e:f:hiLlnoqrsuvwxyZz";
/* Long options equivalences. */
static struct option long_options[] =
{
@ -75,18 +82,19 @@ static struct option long_options[] =
{"ignore-case", no_argument, NULL, 'i'},
{"line-number", no_argument, NULL, 'n'},
{"line-regexp", no_argument, NULL, 'x'},
{"mmap", no_argument, &mmap_option, 1},
{"no-filename", no_argument, NULL, 'h'},
{"no-messages", no_argument, NULL, 's'},
{"null", no_argument, NULL, 'Z'},
{"null-data", no_argument, NULL, 'z'},
{"quiet", no_argument, NULL, 'q'},
{"recursive", no_argument, NULL, 'r'},
{"regexp", required_argument, NULL, 'e'},
{"revert-match", no_argument, NULL, 'v'},
{"invert-match", no_argument, NULL, 'v'},
{"silent", no_argument, NULL, 'q'},
{"text", no_argument, NULL, 'a'},
#if O_BINARY
{"binary", no_argument, NULL, 'U'},
{"unix-byte-offsets", no_argument, NULL, 'u'},
#endif
{"version", no_argument, NULL, 'V'},
{"with-filename", no_argument, NULL, 'H'},
{"word-regexp", no_argument, NULL, 'w'},
@ -94,10 +102,10 @@ static struct option long_options[] =
};
/* Define flags declared in grep.h. */
char const *matcher;
int match_icase;
int match_words;
int match_lines;
unsigned char eolbyte;
/* For error messages. */
static char *prog;
@ -115,7 +123,10 @@ static enum
static int ck_atoi PARAMS ((char const *, int *));
static void usage PARAMS ((int)) __attribute__((noreturn));
static void error PARAMS ((const char *, int));
static int setmatcher PARAMS ((char const *));
static void setmatcher PARAMS ((char const *));
static int install_matcher PARAMS ((char const *));
static int prepend_args PARAMS ((char const *, char *, char **));
static void prepend_default_options PARAMS ((char const *, int *, char ***));
static char *page_alloc PARAMS ((size_t, char **));
static int reset PARAMS ((int, char const *, struct stats *));
static int fillbuf PARAMS ((size_t, struct stats *));
@ -215,14 +226,15 @@ static char *ubuffer; /* Unaligned base of buffer. */
static char *buffer; /* Base of buffer. */
static size_t bufsalloc; /* Allocated size of buffer save region. */
static size_t bufalloc; /* Total buffer size. */
#define PREFERRED_SAVE_FACTOR 5 /* Preferred value of bufalloc / bufsalloc. */
static int bufdesc; /* File descriptor. */
static char *bufbeg; /* Beginning of user-visible stuff. */
static char *buflim; /* Limit of user-visible stuff. */
static size_t pagesize; /* alignment of memory pages */
static off_t bufoffset; /* Read offset; defined on regular files. */
#if defined(HAVE_MMAP)
static int bufmapped; /* True for ordinary files. */
static off_t bufoffset; /* What read() normally remembers. */
static int bufmapped; /* True if buffer is memory-mapped. */
static off_t initial_bufoffset; /* Initial value of bufoffset. */
#endif
@ -233,32 +245,26 @@ static off_t initial_bufoffset; /* Initial value of bufoffset. */
? (val) \
: (val) + ((alignment) - (size_t) (val) % (alignment)))
/* Return the address of a new page-aligned buffer of size SIZE. Set
*UP to the newly allocated (but possibly unaligned) buffer used to
*build the aligned buffer. To free the buffer, free (*UP). */
/* Return the address of a page-aligned buffer of size SIZE,
reallocating it from *UP. Set *UP to the newly allocated (but
possibly unaligned) buffer used to build the aligned buffer. To
free the buffer, free (*UP). */
static char *
page_alloc (size, up)
size_t size;
char **up;
{
/* HAVE_WORKING_VALLOC means that valloc is properly declared, and
you can free the result of valloc. This symbol is not (yet)
autoconfigured. It can be useful to define HAVE_WORKING_VALLOC
while debugging, since some debugging memory allocators might
catch more bugs if this symbol is enabled. */
#if HAVE_WORKING_VALLOC
*up = valloc (size);
return *up;
#else
size_t asize = size + pagesize - 1;
if (size <= asize)
{
*up = malloc (asize);
if (*up)
return ALIGN_TO (*up, pagesize);
char *p = *up ? realloc (*up, asize) : malloc (asize);
if (p)
{
*up = p;
return ALIGN_TO (p, pagesize);
}
}
return NULL;
#endif
}
/* Reset the buffer for a new file, returning zero if we should skip it.
@ -269,7 +275,9 @@ reset (fd, file, stats)
char const *file;
struct stats *stats;
{
if (pagesize == 0)
if (pagesize)
bufsalloc = ALIGN_TO (bufalloc / PREFERRED_SAVE_FACTOR, pagesize);
else
{
size_t ubufsalloc;
pagesize = getpagesize ();
@ -281,141 +289,195 @@ reset (fd, file, stats)
ubufsalloc = BUFSALLOC;
#endif
bufsalloc = ALIGN_TO (ubufsalloc, pagesize);
bufalloc = 5 * bufsalloc;
bufalloc = PREFERRED_SAVE_FACTOR * bufsalloc;
/* The 1 byte of overflow is a kludge for dfaexec(), which
inserts a sentinel newline at the end of the buffer
being searched. There's gotta be a better way... */
if (bufsalloc < ubufsalloc
|| bufalloc / 5 != bufsalloc || bufalloc + 1 < bufalloc
|| bufalloc / PREFERRED_SAVE_FACTOR != bufsalloc
|| bufalloc + 1 < bufalloc
|| ! (buffer = page_alloc (bufalloc + 1, &ubuffer)))
fatal (_("memory exhausted"), 0);
bufbeg = buffer;
buflim = buffer;
}
buflim = buffer;
bufdesc = fd;
if (
#if defined(HAVE_MMAP)
1
#else
directories != READ_DIRECTORIES
#endif
)
if (fstat (fd, &stats->stat) != 0)
{
error ("fstat", errno);
return 0;
}
if (fstat (fd, &stats->stat) != 0)
{
error ("fstat", errno);
return 0;
}
if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode))
return 0;
#if defined(HAVE_MMAP)
if (!S_ISREG (stats->stat.st_mode))
bufmapped = 0;
if (S_ISREG (stats->stat.st_mode))
{
if (file)
bufoffset = 0;
else
{
bufoffset = lseek (fd, 0, SEEK_CUR);
if (bufoffset < 0)
{
error ("lseek", errno);
return 0;
}
}
#ifdef HAVE_MMAP
initial_bufoffset = bufoffset;
bufmapped = mmap_option && bufoffset % pagesize == 0;
#endif
}
else
{
bufmapped = 1;
bufoffset = initial_bufoffset = file ? 0 : lseek (fd, 0, 1);
}
#ifdef HAVE_MMAP
bufmapped = 0;
#endif
}
return 1;
}
/* Read new stuff into the buffer, saving the specified
amount of old stuff. When we're done, 'bufbeg' points
to the beginning of the buffer contents, and 'buflim'
points just after the end. Return count of new stuff. */
points just after the end. Return zero if there's an error. */
static int
fillbuf (save, stats)
size_t save;
struct stats *stats;
{
int cc;
#if defined(HAVE_MMAP)
caddr_t maddr;
#endif
size_t fillsize = 0;
int cc = 1;
size_t readsize;
if (save > bufsalloc)
/* Offset from start of unaligned buffer to start of old stuff
that we want to save. */
size_t saved_offset = buflim - ubuffer - save;
if (bufsalloc < save)
{
char *nubuffer;
char *nbuffer;
size_t aligned_save = ALIGN_TO (save, pagesize);
size_t maxalloc = (size_t) -1;
size_t newalloc;
while (save > bufsalloc)
bufsalloc *= 2;
bufalloc = 5 * bufsalloc;
if (bufalloc / 5 != bufsalloc || bufalloc + 1 < bufalloc
|| ! (nbuffer = page_alloc (bufalloc + 1, &nubuffer)))
if (S_ISREG (stats->stat.st_mode))
{
/* Calculate an upper bound on how much memory we should allocate.
We can't use ALIGN_TO here, since off_t might be longer than
size_t. Watch out for arithmetic overflow. */
off_t to_be_read = stats->stat.st_size - bufoffset;
size_t slop = to_be_read % pagesize;
off_t aligned_to_be_read = to_be_read + (slop ? pagesize - slop : 0);
off_t maxalloc_off = aligned_save + aligned_to_be_read;
if (0 <= maxalloc_off && maxalloc_off == (size_t) maxalloc_off)
maxalloc = maxalloc_off;
}
/* Grow bufsalloc until it is at least as great as `save'; but
if there is an overflow, just grow it to the next page boundary. */
while (bufsalloc < save)
if (bufsalloc < bufsalloc * 2)
bufsalloc *= 2;
else
{
bufsalloc = aligned_save;
break;
}
/* Grow the buffer size to be PREFERRED_SAVE_FACTOR times
bufsalloc.... */
newalloc = PREFERRED_SAVE_FACTOR * bufsalloc;
if (maxalloc < newalloc)
{
/* ... except don't grow it more than a pagesize past the
file size, as that might cause unnecessary memory
exhaustion if the file is large. */
newalloc = maxalloc;
bufsalloc = aligned_save;
}
/* Check that the above calculations made progress, which might
not occur if there is arithmetic overflow. If there's no
progress, or if the new buffer size is larger than the old
and buffer reallocation fails, report memory exhaustion. */
if (bufsalloc < save || newalloc < save
|| (newalloc == save && newalloc != maxalloc)
|| (bufalloc < newalloc
&& ! (buffer
= page_alloc ((bufalloc = newalloc) + 1, &ubuffer))))
fatal (_("memory exhausted"), 0);
}
bufbeg = nbuffer + bufsalloc - save;
memcpy (bufbeg, buflim - save, save);
free (ubuffer);
ubuffer = nubuffer;
buffer = nbuffer;
}
else
{
bufbeg = buffer + bufsalloc - save;
memcpy (bufbeg, buflim - save, save);
}
bufbeg = buffer + bufsalloc - save;
memmove (bufbeg, ubuffer + saved_offset, save);
readsize = bufalloc - bufsalloc;
#if defined(HAVE_MMAP)
if (bufmapped && bufoffset % pagesize == 0
&& stats->stat.st_size - bufoffset >= bufalloc - bufsalloc)
if (bufmapped)
{
maddr = buffer + bufsalloc;
maddr = mmap (maddr, bufalloc - bufsalloc, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_FIXED, bufdesc, bufoffset);
if (maddr == (caddr_t) -1)
size_t mmapsize = readsize;
/* Don't mmap past the end of the file; some hosts don't allow this.
Use `read' on the last page. */
if (stats->stat.st_size - bufoffset < mmapsize)
{
/* This used to issue a warning, but on some hosts
(e.g. Solaris 2.5) mmap can fail merely because some
other process has an advisory read lock on the file.
There's no point alarming the user about this misfeature. */
#if 0
fprintf (stderr, _("%s: warning: %s: %s\n"), prog, filename,
strerror (errno));
#endif
goto tryread;
mmapsize = stats->stat.st_size - bufoffset;
mmapsize -= mmapsize % pagesize;
}
#if 0
/* You might thing this (or MADV_WILLNEED) would help,
but it doesn't, at least not on a Sun running 4.1.
In fact, it actually slows us down about 30%! */
madvise (maddr, bufalloc - bufsalloc, MADV_SEQUENTIAL);
#endif
cc = bufalloc - bufsalloc;
bufoffset += cc;
}
else
{
tryread:
/* We come here when we're not going to use mmap() any more.
Note that we need to synchronize the file offset the
first time through. */
if (bufmapped)
if (mmapsize
&& (mmap ((caddr_t) (buffer + bufsalloc), mmapsize,
PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED,
bufdesc, bufoffset)
!= (caddr_t) -1))
{
/* Do not bother to use madvise with MADV_SEQUENTIAL or
MADV_WILLNEED on the mmapped memory. One might think it
would help, but it slows us down about 30% on SunOS 4.1. */
fillsize = mmapsize;
}
else
{
/* Stop using mmap on this file. Synchronize the file
offset. Do not warn about mmap failures. On some hosts
(e.g. Solaris 2.5) mmap can fail merely because some
other process has an advisory read lock on the file.
There's no point alarming the user about this misfeature. */
bufmapped = 0;
if (bufoffset != initial_bufoffset)
lseek (bufdesc, bufoffset, 0);
if (bufoffset != initial_bufoffset
&& lseek (bufdesc, bufoffset, SEEK_SET) < 0)
{
error ("lseek", errno);
cc = 0;
}
}
cc = read (bufdesc, buffer + bufsalloc, bufalloc - bufsalloc);
}
#else
cc = read (bufdesc, buffer + bufsalloc, bufalloc - bufsalloc);
#endif /*HAVE_MMAP*/
if (! fillsize)
{
ssize_t bytesread;
while ((bytesread = read (bufdesc, buffer + bufsalloc, readsize)) < 0
&& errno == EINTR)
continue;
if (bytesread < 0)
cc = 0;
else
fillsize = bytesread;
}
bufoffset += fillsize;
#if O_BINARY
if (cc > 0)
cc = undossify_input (buffer + bufsalloc, cc);
if (fillsize)
fillsize = undossify_input (buffer + bufsalloc, fillsize);
#endif
if (cc > 0)
buflim = buffer + bufsalloc + cc;
else
buflim = buffer + bufsalloc;
buflim = buffer + bufsalloc + fillsize;
return cc;
}
/* Flags controlling the style of output. */
static int always_text; /* Assume the input is always text. */
static int filename_mask; /* If zero, output nulls after filenames. */
static int out_quiet; /* Suppress all normal output. */
static int out_invert; /* Print nonmatching stuff. */
static int out_file; /* Print filenames. */
@ -447,11 +509,9 @@ nlscan (lim)
char *lim;
{
char *beg;
for (beg = lastnl; beg < lim; ++beg)
if (*beg == '\n')
++totalnl;
lastnl = beg;
for (beg = lastnl; (beg = memchr (beg, eolbyte, lim - beg)); beg++)
totalnl++;
lastnl = lim;
}
static void
@ -480,7 +540,7 @@ prline (beg, lim, sep)
int sep;
{
if (out_file)
printf ("%s%c", filename, sep);
printf ("%s%c", filename, sep & filename_mask);
if (out_line)
{
nlscan (beg);
@ -513,7 +573,7 @@ prpending (lim)
while (pending > 0 && lastout < lim)
{
--pending;
if ((nl = memchr (lastout, '\n', lim - lastout)) != 0)
if ((nl = memchr (lastout, eolbyte, lim - lastout)) != 0)
++nl;
else
nl = lim;
@ -531,6 +591,7 @@ prtext (beg, lim, nlinesp)
{
static int used; /* avoid printing "--" before any output */
char *bp, *p, *nl;
char eol = eolbyte;
int i, n;
if (!out_quiet && pending > 0)
@ -547,7 +608,7 @@ prtext (beg, lim, nlinesp)
if (p > bp)
do
--p;
while (p > bp && p[-1] != '\n');
while (p > bp && p[-1] != eol);
/* We only print the "--" separator if our output is
discontiguous from the last output in the file. */
@ -556,7 +617,7 @@ prtext (beg, lim, nlinesp)
while (p < beg)
{
nl = memchr (p, '\n', beg - p);
nl = memchr (p, eol, beg - p);
prline (p, nl + 1, '-');
p = nl + 1;
}
@ -567,7 +628,7 @@ prtext (beg, lim, nlinesp)
/* Caller wants a line count. */
for (n = 0; p < lim; ++n)
{
if ((nl = memchr (p, '\n', lim - p)) != 0)
if ((nl = memchr (p, eol, lim - p)) != 0)
++nl;
else
nl = lim;
@ -581,7 +642,7 @@ prtext (beg, lim, nlinesp)
if (!out_quiet)
prline (beg, lim, ':');
pending = out_after;
pending = out_quiet ? 0 : out_after;
used = 1;
}
@ -596,13 +657,14 @@ grepbuf (beg, lim)
int nlines, n;
register char *p, *b;
char *endp;
char eol = eolbyte;
nlines = 0;
p = beg;
while ((b = (*execute)(p, lim - p, &endp)) != 0)
{
/* Avoid matching the empty line at the end of the buffer. */
if (b == lim && ((b > beg && b[-1] == '\n') || b == beg))
if (b == lim && ((b > beg && b[-1] == eol) || b == beg))
break;
if (!out_invert)
{
@ -639,6 +701,7 @@ grep (fd, file, stats)
int not_text;
size_t residue, save;
char *beg, *lim;
char eol = eolbyte;
if (!reset (fd, file, stats))
return 0;
@ -662,7 +725,7 @@ grep (fd, file, stats)
residue = 0;
save = 0;
if (fillbuf (save, stats) < 0)
if (! fillbuf (save, stats))
{
if (! (is_EISDIR (errno, file) && suppress_errors))
error (filename, errno);
@ -670,7 +733,7 @@ grep (fd, file, stats)
}
not_text = (! (always_text | out_quiet)
&& memchr (bufbeg, '\0', buflim - bufbeg));
&& memchr (bufbeg, eol ? '\0' : '\200', buflim - bufbeg));
done_on_match += not_text;
out_quiet += not_text;
@ -682,7 +745,7 @@ grep (fd, file, stats)
if (buflim - bufbeg == save)
break;
beg = bufbeg + save - residue;
for (lim = buflim; lim > beg && lim[-1] != '\n'; --lim)
for (lim = buflim; lim > beg && lim[-1] != eol; --lim)
;
residue = buflim - lim;
if (beg < lim)
@ -700,7 +763,7 @@ grep (fd, file, stats)
++i;
do
--beg;
while (beg > bufbeg && beg[-1] != '\n');
while (beg > bufbeg && beg[-1] != eol);
}
if (beg != lastout)
lastout = 0;
@ -708,7 +771,7 @@ grep (fd, file, stats)
totalcc += buflim - bufbeg - save;
if (out_line)
nlscan (beg);
if (fillbuf (save, stats) < 0)
if (! fillbuf (save, stats))
{
if (! (is_EISDIR (errno, file) && suppress_errors))
error (filename, errno);
@ -746,7 +809,8 @@ grepfile (file, stats)
}
else
{
desc = open (file, O_RDONLY);
while ((desc = open (file, O_RDONLY)) < 0 && errno == EINTR)
continue;
if (desc < 0)
{
@ -805,25 +869,21 @@ grepfile (file, stats)
if (count_matches)
{
if (out_file)
printf ("%s:", filename);
printf ("%s%c", filename, ':' & filename_mask);
printf ("%d\n", count);
}
if (count)
{
status = 0;
if (list_files == 1)
printf ("%s\n", filename);
}
else
{
status = 1;
if (list_files == -1)
printf ("%s\n", filename);
}
status = !count;
if (list_files == 1 - 2 * status)
printf ("%s%c", filename, '\n' & filename_mask);
if (file && close (desc) != 0)
error (file, errno);
if (file)
while (close (desc) != 0)
if (errno != EINTR)
{
error (file, errno);
break;
}
}
return status;
@ -839,8 +899,8 @@ grepdir (dir, stats)
char *name_space;
for (ancestor = stats; (ancestor = ancestor->parent) != 0; )
if (! ((ancestor->stat.st_ino ^ stats->stat.st_ino)
| (ancestor->stat.st_dev ^ stats->stat.st_dev)))
if (ancestor->stat.st_ino == stats->stat.st_ino
&& ancestor->stat.st_dev == stats->stat.st_dev)
{
if (!suppress_errors)
fprintf (stderr, _("%s: warning: %s: %s\n"), prog, dir,
@ -903,23 +963,28 @@ int status;
printf (_("Usage: %s [OPTION]... PATTERN [FILE] ...\n"), prog);
printf (_("\
Search for PATTERN in each FILE or standard input.\n\
Example: %s -i 'hello.*world' menu.h main.c\n\
\n\
Regexp selection and interpretation:\n\
Regexp selection and interpretation:\n"), prog);
printf (_("\
-E, --extended-regexp PATTERN is an extended regular expression\n\
-F, --fixed-regexp PATTERN is a fixed string separated by newlines\n\
-G, --basic-regexp PATTERN is a basic regular expression\n\
-F, --fixed-strings PATTERN is a set of newline-separated strings\n\
-G, --basic-regexp PATTERN is a basic regular expression\n"));
printf (_("\
-e, --regexp=PATTERN use PATTERN as a regular expression\n\
-f, --file=FILE obtain PATTERN from FILE\n\
-i, --ignore-case ignore case distinctions\n\
-w, --word-regexp force PATTERN to match only whole words\n\
-x, --line-regexp force PATTERN to match only whole lines\n"));
-x, --line-regexp force PATTERN to match only whole lines\n\
-z, --null-data a data line ends in 0 byte, not newline\n"));
printf (_("\
\n\
Miscellaneous:\n\
-s, --no-messages suppress error messages\n\
-v, --revert-match select non-matching lines\n\
-v, --invert-match select non-matching lines\n\
-V, --version print version information and exit\n\
--help display this help and exit\n"));
--help display this help and exit\n\
--mmap use memory-mapped input if possible\n"));
printf (_("\
\n\
Output control:\n\
@ -935,31 +1000,42 @@ Output control:\n\
-r, --recursive equivalent to --directories=recurse.\n\
-L, --files-without-match only print FILE names containing no match\n\
-l, --files-with-matches only print FILE names containing matches\n\
-c, --count only print a count of matching lines per FILE\n"));
-c, --count only print a count of matching lines per FILE\n\
-Z, --null print 0 byte after FILE name\n"));
printf (_("\
\n\
Context control:\n\
-B, --before-context=NUM print NUM lines of leading context\n\
-A, --after-context=NUM print NUM lines of trailing context\n\
-C, --context[=NUM] print NUM (default 2) lines of output context\n\
unless overriden by -A or -B\n\
unless overridden by -A or -B\n\
-NUM same as --context=NUM\n\
-U, --binary do not strip CR characters at EOL (MSDOS)\n\
-u, --unix-byte-offsets report offsets as if CRs were not there (MSDOS)\n\
\n\
If no -[GEF], then `egrep' assumes -E, `fgrep' -F, else -G.\n\
With no FILE, or when FILE is -, read standard input. If less than\n\
two FILEs given, assume -h. Exit with 0 if matches, with 1 if none.\n\
Exit with 2 if syntax errors or system errors.\n"));
`egrep' means `grep -E'. `fgrep' means `grep -F'.\n\
With no FILE, or when FILE is -, read standard input. If less than\n\
two FILEs given, assume -h. Exit status is 0 if match, 1 if no match,\n\
and 2 if trouble.\n"));
printf (_("\nReport bugs to <bug-gnu-utils@gnu.org>.\n"));
}
exit (status);
}
/* Set the matcher to M, reporting any conflicts. */
static void
setmatcher (m)
char const *m;
{
if (matcher && strcmp (matcher, m) != 0)
fatal (_("conflicting matchers specified"), 0);
matcher = m;
}
/* Go through the matchers vector and look for the specified matcher.
If we find it, install it in compile and execute, and return 1. */
static int
setmatcher (name)
install_matcher (name)
char const *name;
{
int i;
@ -1002,6 +1078,65 @@ setmatcher (name)
return 0;
}
/* Find the white-space-separated options specified by OPTIONS, and
using BUF to store copies of these options, set ARGV[0], ARGV[1],
etc. to the option copies. Return the number N of options found.
Do not set ARGV[N] to NULL. If ARGV is NULL, do not store ARGV[0]
etc. Backslash can be used to escape whitespace (and backslashes). */
static int
prepend_args (options, buf, argv)
char const *options;
char *buf;
char **argv;
{
char const *o = options;
char *b = buf;
int n = 0;
for (;;)
{
while (ISSPACE ((unsigned char) *o))
o++;
if (!*o)
return n;
if (argv)
argv[n] = b;
n++;
do
if ((*b++ = *o++) == '\\' && *o)
b[-1] = *o++;
while (*o && ! ISSPACE ((unsigned char) *o));
*b++ = '\0';
}
}
/* Prepend the whitespace-separated options in OPTIONS to the argument
vector of a main program with argument count *PARGC and argument
vector *PARGV. */
static void
prepend_default_options (options, pargc, pargv)
char const *options;
int *pargc;
char ***pargv;
{
if (options)
{
char *buf = xmalloc (strlen (options) + 1);
int prepended = prepend_args (options, buf, (char **) NULL);
int argc = *pargc;
char * const *argv = *pargv;
char **pp = (char **) xmalloc ((prepended + argc + 1) * sizeof *pp);
*pargc = prepended + argc;
*pargv = pp;
*pp++ = *argv++;
pp += prepend_args (options, buf, pp);
while ((*pp++ = *argv++))
continue;
}
}
int
main (argc, argv)
int argc;
@ -1052,7 +1187,8 @@ main (argc, argv)
keys = NULL;
keycc = 0;
with_filenames = 0;
matcher = NULL;
eolbyte = '\n';
filename_mask = ~0;
/* The value -1 means to use DEFAULT_CONTEXT. */
out_after = out_before = -1;
@ -1071,13 +1207,10 @@ main (argc, argv)
textdomain (PACKAGE);
#endif
while ((opt = getopt_long (argc, argv,
#if O_BINARY
"0123456789A:B:C::EFGHVX:abcd:e:f:hiLlnoqrsvwxyUu",
#else
"0123456789A:B:C::EFGHVX:abcd:e:f:hiLlnoqrsvwxy",
#endif
long_options, NULL)) != EOF)
prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv);
while ((opt = getopt_long (argc, argv, short_options, long_options, NULL))
!= -1)
switch (opt)
{
case '0':
@ -1119,39 +1252,33 @@ main (argc, argv)
default_context = 2;
break;
case 'E':
if (matcher && strcmp (matcher, "posix-egrep") != 0)
fatal (_("you may specify only one of -E, -F, or -G"), 0);
matcher = "posix-egrep";
setmatcher ("egrep");
break;
case 'F':
if (matcher && strcmp(matcher, "fgrep") != 0)
fatal(_("you may specify only one of -E, -F, or -G"), 0);;
matcher = "fgrep";
setmatcher ("fgrep");
break;
case 'G':
if (matcher && strcmp (matcher, "grep") != 0)
fatal (_("you may specify only one of -E, -F, or -G"), 0);
matcher = "grep";
setmatcher ("grep");
break;
case 'o': /* BSD 4.4 compatibility */
case 'H':
with_filenames = 1;
break;
#if O_BINARY
case 'U':
#if O_BINARY
dos_use_file_type = DOS_BINARY;
#endif
break;
case 'u':
#if O_BINARY
dos_report_unix_offset = 1;
break;
#endif
break;
case 'V':
show_version = 1;
break;
case 'X':
if (matcher)
fatal (_("matcher already specified"), 0);
matcher = optarg;
setmatcher (optarg);
break;
case 'a':
always_text = 1;
@ -1242,6 +1369,12 @@ main (argc, argv)
case 'x':
match_lines = 1;
break;
case 'Z':
filename_mask = 0;
break;
case 'z':
eolbyte = '\0';
break;
case 0:
/* long options */
break;
@ -1257,7 +1390,7 @@ main (argc, argv)
if (show_version)
{
printf (_("grep (GNU grep) %s\n"), VERSION);
printf (_("%s (GNU grep) %s\n"), matcher, VERSION);
printf ("\n");
printf (_("\
Copyright (C) 1988, 1992-1998, 1999 Free Software Foundation, Inc.\n"));
@ -1293,10 +1426,10 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"))
#ifdef __NetBSD__
matcher = __progname;
#else
matcher = default_matcher;
matcher = "grep";
#endif
if (!setmatcher (matcher) && !setmatcher ("default"))
if (!install_matcher (matcher) && !install_matcher ("default"))
abort ();
(*compile)(keys, keycc);