agrep(1): rewrite binary check introduced with recursive search on rev 1.3 to

match more closely grep(1) implementation. Instead of opening and scanning
full file every time, check for the \0 symbol once the file buffer is filled up
for the first time.

Patch fixes stdin, adds binary check for it as well, and works more efficiently
(at the cost of not scanning the full file).

Also original implementation forgot to add break if binary file is detected,
causing duplicate binary matches in the output result.

Due to full file scans on each match instead of once for the specific file,
scans may have been considerably slow.

Fixes PR bin/53513

Needs pullups to netbsd-9,-10.

Thanks for the help from mlelstv and dh in reviewing and finalizing the patch.
This commit is contained in:
andvar 2024-05-23 22:07:16 +00:00
parent ec7170c331
commit 45e3b02ce0
1 changed files with 16 additions and 37 deletions

View File

@ -179,6 +179,7 @@ static int next_delim_len; /* Length of delimiter after record. */
static int delim_after = 1;/* If true, print the delimiter after the record. */
static int at_eof;
static int have_matches; /* If true, matches have been found. */
static int is_binary; /* -1 unknown, 0 ascii, 1 binary */
static int invert_match; /* Show only non-matching records. */
static int print_filename; /* Output filename. */
@ -200,6 +201,12 @@ static regaparams_t match_params;
environment variable GREP_COLOR overrides this default value. */
static const char *highlight = "01;31";
static int
isbinaryfile(void)
{
return buf != NULL && memchr(buf, '\0', data_len) != NULL;
}
/* Sets `record' to the next complete record from file `fd', and `record_len'
to the length of the record. Returns 1 when there are no more records,
0 otherwise. */
@ -262,6 +269,9 @@ tre_agrep_get_next_record(int fd, const char *filename)
}
data_len += r;
next_record = buf;
if (is_binary < 0)
is_binary = isbinaryfile();
}
/* Find the next record delimiter. */
@ -316,41 +326,6 @@ tre_agrep_get_next_record(int fd, const char *filename)
#include <dirent.h>
static int
isbinaryfile(const char *filename)
{
struct stat st;
size_t size;
size_t i;
char *mapped;
FILE *fp;
int isbin;
if ((fp = fopen(filename, "r")) == NULL) {
return 1;
}
fstat(fileno(fp), &st);
isbin = 0;
if ((st.st_mode & S_IFMT) != S_IFREG) {
isbin = 1;
} else {
size = (size_t)st.st_size;
mapped = mmap(NULL, size, PROT_READ, MAP_SHARED, fileno(fp), 0);
if (mapped == MAP_FAILED) {
fclose(fp);
return 1;
}
for (i = 0 ; !isbin && i < size ; i++) {
if (mapped[i] == 0x0) {
isbin = 1;
}
}
munmap(mapped, size);
}
fclose(fp);
return isbin;
}
static int tre_agrep_handle_file(const char */*filename*/);
static int
@ -398,6 +373,8 @@ tre_agrep_handle_file(const char *filename)
int count = 0;
int recnum = 0;
is_binary = -1;
/* Allocate the initial buffer. */
if (buf == NULL)
{
@ -488,10 +465,12 @@ tre_agrep_handle_file(const char *filename)
printf("%s\n", filename);
break;
}
else if (!count_matches && isbinaryfile(filename))
else if (!count_matches && is_binary > 0)
{
if (print_filename)
printf("Binary file %s matches\n", filename);
printf("%s:", filename);
printf("Binary file matches\n");
break;
}
else if (!count_matches)
{