386 lines
9.1 KiB
C
386 lines
9.1 KiB
C
|
/**
|
||
|
* @brief grep - mediocre grep
|
||
|
*
|
||
|
* Based on the regex search matcher in bim.
|
||
|
*
|
||
|
* @copyright
|
||
|
* This file is part of ToaruOS and is released under the terms
|
||
|
* of the NCSA / University of Illinois License - see LICENSE.md
|
||
|
* Copyright (C) 2022 K. Lange
|
||
|
*/
|
||
|
#include <stdio.h>
|
||
|
#include <stdint.h>
|
||
|
#include <fcntl.h>
|
||
|
#include <unistd.h>
|
||
|
#include <string.h>
|
||
|
#include <getopt.h>
|
||
|
#include <ctype.h>
|
||
|
#include <errno.h>
|
||
|
|
||
|
#define LINE_SIZE 4096
|
||
|
|
||
|
static int invert = 0;
|
||
|
static int ignorecase = 0;
|
||
|
static int quiet = 0;
|
||
|
static int only_matching = 0;
|
||
|
static int counts = 0;
|
||
|
|
||
|
struct MatchQualifier {
|
||
|
int (*matchFunc)(struct MatchQualifier*,char,int);
|
||
|
union {
|
||
|
char matchChar;
|
||
|
struct {
|
||
|
char * start;
|
||
|
char * end;
|
||
|
} matchSquares;
|
||
|
};
|
||
|
};
|
||
|
|
||
|
/**
|
||
|
* Helper for handling smart case sensitivity.
|
||
|
*/
|
||
|
int match_char(struct MatchQualifier * self, char b, int mode) {
|
||
|
if (mode == 0) {
|
||
|
return self->matchChar == b;
|
||
|
} else if (mode == 1) {
|
||
|
return tolower(self->matchChar) == tolower(b);
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
int match_squares(struct MatchQualifier * self, char c, int mode) {
|
||
|
char * start = self->matchSquares.start;
|
||
|
char * end = self->matchSquares.end;
|
||
|
char * t = start;
|
||
|
int good = 1;
|
||
|
if (*t == '^') { t++; good = 0; }
|
||
|
while (t != end) {
|
||
|
char test = *t++;
|
||
|
if (test == '\\' && *t && strchr("\\]",*t)) {
|
||
|
test = *t++;
|
||
|
} else if (test == '\\' && *t == 't') {
|
||
|
test = '\t'; t++;
|
||
|
}
|
||
|
|
||
|
if (*t == '-') {
|
||
|
t++;
|
||
|
if (t == end) return 0;
|
||
|
char right = *t++;
|
||
|
if (right == '\\' && *t && strchr("\\]",*t)) {
|
||
|
right = *t++;
|
||
|
} else if (right == '\\' && *t == 't') {
|
||
|
right = '\t'; t++;
|
||
|
}
|
||
|
if (mode ? (tolower(c) >= tolower(test) && tolower(c) <= tolower(right)) : (c >= test && c <= right)) return good;
|
||
|
} else {
|
||
|
if (mode ? (tolower(c) == tolower(test)) : (c == test)) return good;
|
||
|
}
|
||
|
}
|
||
|
return !good;
|
||
|
}
|
||
|
|
||
|
int match_dot(struct MatchQualifier * self, char c, int mode) {
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
struct BackRef {
|
||
|
int start;
|
||
|
int len;
|
||
|
uint32_t * copy;
|
||
|
};
|
||
|
|
||
|
struct Line {
|
||
|
int actual;
|
||
|
char * text;
|
||
|
};
|
||
|
|
||
|
#define MAX_REFS 10
|
||
|
int regex_matches(struct Line * line, int j, char * needle, int ignorecase, int *len, char **needleout, int refindex, struct BackRef * refs) {
|
||
|
int k = j;
|
||
|
char * match = needle;
|
||
|
if (*match == '^') {
|
||
|
if (j != 0) return 0;
|
||
|
match++;
|
||
|
}
|
||
|
while (k < line->actual + 1) {
|
||
|
if (needleout && *match == ')') {
|
||
|
*needleout = match + 1;
|
||
|
if (len) *len = k - j;
|
||
|
return 1;
|
||
|
}
|
||
|
if (*match == '\0') {
|
||
|
if (needleout) return 0;
|
||
|
if (len) *len = k - j;
|
||
|
return 1;
|
||
|
}
|
||
|
if (*match == '$') {
|
||
|
if (k != line->actual) return 0;
|
||
|
match++;
|
||
|
continue;
|
||
|
}
|
||
|
if (k == line->actual) break;
|
||
|
|
||
|
struct MatchQualifier matcher = {match_char, .matchChar=*match};
|
||
|
if (*match == '.') {
|
||
|
matcher.matchFunc = match_dot;
|
||
|
match++;
|
||
|
} else if (*match == '\\' && strchr("$^/\\.[?]*+()",match[1]) != NULL) {
|
||
|
matcher.matchChar = match[1];
|
||
|
match += 2;
|
||
|
} else if (*match == '\\' && match[1] == 't') {
|
||
|
matcher.matchChar = '\t';
|
||
|
match += 2;
|
||
|
} else if (*match == '[') {
|
||
|
char * s = match+1;
|
||
|
char * e = s;
|
||
|
while (*e && *e != ']') {
|
||
|
if (*e == '\\' && e[1] == ']') e++;
|
||
|
e++;
|
||
|
}
|
||
|
if (!*e) break; /* fail match on unterminated [] sequence */
|
||
|
match = e + 1;
|
||
|
matcher.matchFunc = match_squares;
|
||
|
matcher.matchSquares.start = s;
|
||
|
matcher.matchSquares.end = e;
|
||
|
} else if (*match == '(') {
|
||
|
match++;
|
||
|
int _len;
|
||
|
char * newmatch;
|
||
|
if (!regex_matches(line, k, match, ignorecase, &_len, &newmatch, 0, NULL)) break;
|
||
|
match = newmatch;
|
||
|
if (refindex && refindex < MAX_REFS) {
|
||
|
refs[refindex].start = k;
|
||
|
refs[refindex].len = _len;
|
||
|
refindex++;
|
||
|
}
|
||
|
k += _len;
|
||
|
continue;
|
||
|
} else {
|
||
|
match++;
|
||
|
}
|
||
|
if (*match == '?') {
|
||
|
/* Optional */
|
||
|
match++;
|
||
|
if (matcher.matchFunc(&matcher, line->text[k], ignorecase)) {
|
||
|
int _len;
|
||
|
if (regex_matches(line,k+1,match,ignorecase,&_len, needleout, refindex, refs)) {
|
||
|
if (len) *len = _len + k + 1 - j;
|
||
|
return 1;
|
||
|
}
|
||
|
}
|
||
|
continue;
|
||
|
} else if (*match == '+' || *match == '*') {
|
||
|
/* Must match at least one */
|
||
|
if (*match == '+') {
|
||
|
if (!matcher.matchFunc(&matcher, line->text[k], ignorecase)) break;
|
||
|
k++;
|
||
|
}
|
||
|
/* Match any */
|
||
|
match++;
|
||
|
int greedy = 1;
|
||
|
if (*match == '?') {
|
||
|
/* non-greedy */
|
||
|
match++;
|
||
|
greedy = 0;
|
||
|
}
|
||
|
|
||
|
int _j = k;
|
||
|
while (_j < line->actual + 1) {
|
||
|
int _len;
|
||
|
if (!greedy && regex_matches(line, _j, match, ignorecase, &_len, needleout, refindex, refs)) {
|
||
|
if (len) *len = _len + _j - j;
|
||
|
return 1;
|
||
|
}
|
||
|
if (_j < line->actual && !matcher.matchFunc(&matcher, line->text[_j], ignorecase)) break;
|
||
|
_j++;
|
||
|
}
|
||
|
if (!greedy) return 0;
|
||
|
while (_j >= k) {
|
||
|
int _len;
|
||
|
if (regex_matches(line, _j, match, ignorecase, &_len, needleout, refindex, refs)) {
|
||
|
if (len) *len = _len + _j - j;
|
||
|
return 1;
|
||
|
}
|
||
|
_j--;
|
||
|
}
|
||
|
return 0;
|
||
|
} else {
|
||
|
if (!matcher.matchFunc(&matcher, line->text[k], ignorecase)) break;
|
||
|
k++;
|
||
|
}
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
int subsearch_matches(struct Line * line, int j, char * needle, int *len) {
|
||
|
return regex_matches(line, j, needle, ignorecase, len, NULL, 0, NULL);
|
||
|
}
|
||
|
|
||
|
int usage(char ** argv) {
|
||
|
#define _I "\033[3m"
|
||
|
#define _E "\033[0m\n"
|
||
|
fprintf(stderr, "usage: %s [-ivqoc] PATTERN [FILE...]\n"
|
||
|
"\n"
|
||
|
" Supported options:\n"
|
||
|
" -c " _I "Instead of printing matches, print counts of matched lines." _E
|
||
|
" -i " _I "Ignore case in input and pattern." _E
|
||
|
" -o " _I "Print only the matching parts of each line, separating\n"
|
||
|
" each match with a line feed." _E
|
||
|
" -q " _I "Exit immediately with 0 when a match (or, with -v,\n"
|
||
|
" non-match) is found, do not print matches." _E
|
||
|
" -v " _I "Invert match - print lines that do not match pattern." _E
|
||
|
"\n"
|
||
|
" Supported regex syntax:\n"
|
||
|
" [abc] " _I "Match one of a set of characters." _E
|
||
|
" [a-z] " _I "Match one from a range of characters." _E
|
||
|
" (abc) " _I "Match a group; does nothing here, supported for compatibility\n"
|
||
|
" with bim and a possible future sed implementation." _E
|
||
|
" . " _I "Match any single character." _E
|
||
|
" ^ " _I "Match the start of the line." _E
|
||
|
" $ " _I "Match the end of the line." _E
|
||
|
"\n"
|
||
|
" Modifiers (can be combined with [], ., and single characters):\n"
|
||
|
" ? " _I "Match optionally" _E
|
||
|
" * " _I "Match any number of occurances" _E
|
||
|
" + " _I "Match at least one occurance" _E
|
||
|
" *? +? " _I "Non-greedy match variants of * and +" _E
|
||
|
"\n"
|
||
|
" Some characters can be escaped in the pattern with \\.\n"
|
||
|
" The regex engine is not Unicode-aware.\n",
|
||
|
argv[0]);
|
||
|
#undef _I
|
||
|
#undef _E
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
#define LINE_SIZE 4096
|
||
|
|
||
|
int main(int argc, char ** argv) {
|
||
|
int opt;
|
||
|
while ((opt = getopt(argc, argv, "?hivqoc")) != -1) {
|
||
|
switch (opt) {
|
||
|
case 'h':
|
||
|
case '?':
|
||
|
return usage(argv);
|
||
|
case 'i':
|
||
|
ignorecase = 1;
|
||
|
break;
|
||
|
case 'v':
|
||
|
invert = 1;
|
||
|
break;
|
||
|
case 'q':
|
||
|
quiet = 1;
|
||
|
break;
|
||
|
case 'o':
|
||
|
only_matching = 1;
|
||
|
break;
|
||
|
case 'c':
|
||
|
counts = 1;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (optind == argc) return usage(argv);
|
||
|
|
||
|
char * needle = argv[optind];
|
||
|
char buf[LINE_SIZE];
|
||
|
int ret = 1;
|
||
|
int is_tty = isatty(STDOUT_FILENO);
|
||
|
|
||
|
optind++;
|
||
|
|
||
|
|
||
|
int showFilenames = (optind + 1 != argc);
|
||
|
|
||
|
do {
|
||
|
FILE * input = stdin;
|
||
|
int count = 0;
|
||
|
if (optind < argc && strcmp(argv[optind],"-")) {
|
||
|
input = fopen(argv[optind], "r");
|
||
|
if (!input) {
|
||
|
fprintf(stderr, "%s: %s: %s\n", argv[0], argv[optind], strerror(errno));
|
||
|
return 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
const char * filename = input == stdin ? "(standard input)" : argv[optind];
|
||
|
|
||
|
while (fgets(buf, LINE_SIZE, input)) {
|
||
|
int lineLength = strlen(buf);
|
||
|
if (lineLength && buf[lineLength-1] == '\n') {
|
||
|
lineLength--;
|
||
|
}
|
||
|
struct Line line = {
|
||
|
lineLength,
|
||
|
buf
|
||
|
};
|
||
|
|
||
|
if (!invert) {
|
||
|
int lastMatch = 0;
|
||
|
for (int j = 0; j < lineLength;) {
|
||
|
int len;
|
||
|
if (subsearch_matches(&line, j, needle, &len)) {
|
||
|
ret = 0;
|
||
|
if (counts) {
|
||
|
count++;
|
||
|
break;
|
||
|
}
|
||
|
if (quiet) goto _done;
|
||
|
if (only_matching) {
|
||
|
if (showFilenames) fprintf(stdout, "%s:", filename);
|
||
|
fprintf(stdout, "%.*s\n", len, buf + j);
|
||
|
} else {
|
||
|
if (lastMatch == 0 && showFilenames) fprintf(stdout, "%s:", filename);
|
||
|
fprintf(stdout, "%.*s%s%.*s%s",
|
||
|
j - lastMatch,
|
||
|
buf + lastMatch,
|
||
|
is_tty ? "\033[1;31m" : "",
|
||
|
len,
|
||
|
buf + j,
|
||
|
is_tty ? "\033[0m" : "");
|
||
|
}
|
||
|
lastMatch = j + len;
|
||
|
j = lastMatch;
|
||
|
} else {
|
||
|
j++;
|
||
|
}
|
||
|
}
|
||
|
if (counts) continue;
|
||
|
if (!only_matching && lastMatch) {
|
||
|
fprintf(stdout, "%s", buf + lastMatch);
|
||
|
}
|
||
|
} else {
|
||
|
int matched = 0;
|
||
|
for (int j = 0; j < lineLength; ++j) {
|
||
|
if (subsearch_matches(&line, j, needle, NULL)) {
|
||
|
matched = 1;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
if (matched) continue;
|
||
|
ret = 0;
|
||
|
if (counts) {
|
||
|
count++;
|
||
|
continue;
|
||
|
}
|
||
|
if (quiet) goto _done;
|
||
|
if (showFilenames) fprintf(stdout, "%s:", filename);
|
||
|
fprintf(stdout, "%s", buf);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
_done: (void)0;
|
||
|
if (input != stdin) fclose(input);
|
||
|
|
||
|
if (counts) {
|
||
|
if (showFilenames) fprintf(stdout, "%s:", filename);
|
||
|
fprintf(stdout, "%d\n", count);
|
||
|
}
|
||
|
|
||
|
optind++;
|
||
|
} while (optind < argc);
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|