/* $NetBSD: look.c,v 1.11 2003/08/07 11:14:28 agc Exp $ */ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * David Hitz of Auspex Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #ifndef lint __COPYRIGHT("@(#) Copyright (c) 1991, 1993\n\ The Regents of the University of California. All rights reserved.\n"); #endif /* not lint */ #ifndef lint #if 0 static char sccsid[] = "@(#)look.c 8.2 (Berkeley) 5/4/95"; #endif __RCSID("$NetBSD: look.c,v 1.11 2003/08/07 11:14:28 agc Exp $"); #endif /* not lint */ /* * look -- find lines in a sorted list. * * The man page said that TABs and SPACEs participate in -d comparisons. * In fact, they were ignored. This implements historic practice, not * the manual page. */ #include #include #include #include #include #include #include #include #include #include #include #include #include "pathnames.h" /* * FOLD and DICT convert characters to a normal form for comparison, * according to the user specified flags. * * DICT expects integers because it uses a non-character value to * indicate a character which should not participate in comparisons. */ #define EQUAL 0 #define GREATER 1 #define LESS (-1) #define NO_COMPARE (-2) #define FOLD(c) (isascii(c) && isupper(c) ? tolower(c) : (c)) #define DICT(c) (isascii(c) && isalnum(c) ? (c) : NO_COMPARE) int dflag, fflag; char *binary_search __P((char *, char *, char *)); int compare __P((char *, char *, char *)); char *linear_search __P((char *, char *, char *)); int look __P((char *, char *, char *)); int main __P((int, char **)); void print_from __P((char *, char *, char *)); void usage __P((void)); int main(argc, argv) int argc; char *argv[]; { struct stat sb; int ch, fd, termchar; char *back, *file, *front, *string, *p; string = NULL; file = _PATH_WORDS; termchar = '\0'; while ((ch = getopt(argc, argv, "dft:")) != -1) switch(ch) { case 'd': dflag = 1; break; case 'f': fflag = 1; break; case 't': termchar = *optarg; break; case '?': default: usage(); } argc -= optind; argv += optind; switch (argc) { case 2: /* Don't set -df for user. */ string = *argv++; file = *argv; break; case 1: /* But set -df by default. */ dflag = fflag = 1; string = *argv; break; default: usage(); } if (termchar != '\0' && (p = strchr(string, termchar)) != NULL) *++p = '\0'; if ((fd = open(file, O_RDONLY, 0)) < 0 || fstat(fd, &sb)) err(2, "%s", file); if (sb.st_size > SIZE_T_MAX) err(2, "%s: %s", file, strerror(EFBIG)); if ((front = mmap(NULL, (size_t)sb.st_size, PROT_READ, MAP_FILE|MAP_SHARED, fd, (off_t)0)) == NULL) err(2, "%s", file); back = front + sb.st_size; exit(look(string, front, back)); } int look(string, front, back) char *string, *front, *back; { int ch; char *readp, *writep; /* Reformat string string to avoid doing it multiple times later. */ for (readp = writep = string; (ch = *readp++) != 0; ) { if (fflag) ch = FOLD(ch); if (dflag) ch = DICT(ch); if (ch != NO_COMPARE) *(writep++) = ch; } *writep = '\0'; front = binary_search(string, front, back); front = linear_search(string, front, back); if (front) print_from(string, front, back); return (front ? 0 : 1); } /* * Binary search for "string" in memory between "front" and "back". * * This routine is expected to return a pointer to the start of a line at * *or before* the first word matching "string". Relaxing the constraint * this way simplifies the algorithm. * * Invariants: * front points to the beginning of a line at or before the first * matching string. * * back points to the beginning of a line at or after the first * matching line. * * Base of the Invariants. * front = NULL; * back = EOF; * * Advancing the Invariants: * * p = first newline after halfway point from front to back. * * If the string at "p" is not greater than the string to match, * p is the new front. Otherwise it is the new back. * * Termination: * * The definition of the routine allows it return at any point, * since front is always at or before the line to print. * * In fact, it returns when the chosen "p" equals "back". This * implies that there exists a string is least half as long as * (back - front), which in turn implies that a linear search will * be no more expensive than the cost of simply printing a string or two. * * Trying to continue with binary search at this point would be * more trouble than it's worth. */ #define SKIP_PAST_NEWLINE(p, back) \ while (p < back && *p++ != '\n'); char * binary_search(string, front, back) char *string, *front, *back; { char *p; p = front + (back - front) / 2; SKIP_PAST_NEWLINE(p, back); /* * If the file changes underneath us, make sure we don't * infinitely loop. */ while (p < back && back > front) { if (compare(string, p, back) == GREATER) front = p; else back = p; p = front + (back - front) / 2; SKIP_PAST_NEWLINE(p, back); } return (front); } /* * Find the first line that starts with string, linearly searching from front * to back. * * Return NULL for no such line. * * This routine assumes: * * o front points at the first character in a line. * o front is before or at the first line to be printed. */ char * linear_search(string, front, back) char *string, *front, *back; { while (front < back) { switch (compare(string, front, back)) { case EQUAL: /* Found it. */ return (front); break; case LESS: /* No such string. */ return (NULL); break; case GREATER: /* Keep going. */ break; } SKIP_PAST_NEWLINE(front, back); } return (NULL); } /* * Print as many lines as match string, starting at front. */ void print_from(string, front, back) char *string, *front, *back; { for (; front < back && compare(string, front, back) == EQUAL; ++front) { for (; front < back && *front != '\n'; ++front) if (putchar(*front) == EOF) err(2, "stdout"); if (putchar('\n') == EOF) err(2, "stdout"); } } /* * Return LESS, GREATER, or EQUAL depending on how the string1 compares with * string2 (s1 ??? s2). * * o Matches up to len(s1) are EQUAL. * o Matches up to len(s2) are GREATER. * * Compare understands about the -f and -d flags, and treats comparisons * appropriately. * * The string "s1" is null terminated. The string s2 is '\n' terminated (or * "back" terminated). */ int compare(s1, s2, back) char *s1, *s2, *back; { int ch; for (; *s1 && s2 < back && *s2 != '\n'; ++s1, ++s2) { ch = *s2; if (fflag) ch = FOLD(ch); if (dflag) ch = DICT(ch); if (ch == NO_COMPARE) { ++s2; /* Ignore character in comparison. */ continue; } if (*s1 != ch) return (*s1 < ch ? LESS : GREATER); } return (*s1 ? GREATER : EQUAL); } void usage() { (void)fprintf(stderr, "usage: look [-df] [-t char] string [file]\n"); exit(2); }