TextSearch: way speeder by using xargs + grep

* Previously, each file was starting a shell to run grep
  command on it. Very suboptimal performance, big overhead.
* Now a thread write each file path to xargs input,
  while another start xargs, let it distribute files on
  grep processes (one per cpu) and collect results asap.
* This bring results way faster than previously.
* Rename Escape Text setting into Regular Expression, as
  name was more after shell workaround than after function.
* While it doesn't use a native text searching, by
  reusing both grep and xargs power, it answer the main
  issue with #9529.
This commit is contained in:
Philippe Houdoin 2017-11-06 16:19:41 +00:00
parent 2d27c2d003
commit 33d4c8a62e
6 changed files with 280 additions and 139 deletions

View File

@ -90,7 +90,7 @@ GrepWindow::GrepWindow(BMessage* message)
fRecurseDirs(NULL),
fSkipDotDirs(NULL),
fCaseSensitive(NULL),
fEscapeText(NULL),
fRegularExpression(NULL),
fTextOnly(NULL),
fInvokePe(NULL),
fHistoryMenu(NULL),
@ -211,8 +211,8 @@ void GrepWindow::MessageReceived(BMessage* message)
_OnCaseSensitive();
break;
case MSG_ESCAPE_TEXT:
_OnEscapeText();
case MSG_REGULAR_EXPRESSION:
_OnRegularExpression();
break;
case MSG_TEXT_ONLY:
@ -452,8 +452,8 @@ GrepWindow::_CreateMenus()
fCaseSensitive = new BMenuItem(
B_TRANSLATE("Case-sensitive"), new BMessage(MSG_CASE_SENSITIVE));
fEscapeText = new BMenuItem(
B_TRANSLATE("Escape search text"), new BMessage(MSG_ESCAPE_TEXT));
fRegularExpression = new BMenuItem(
B_TRANSLATE("Regular expression"), new BMessage(MSG_REGULAR_EXPRESSION));
fTextOnly = new BMenuItem(
B_TRANSLATE("Text files only"), new BMessage(MSG_TEXT_ONLY));
@ -486,7 +486,7 @@ GrepWindow::_CreateMenus()
fPreferencesMenu->AddItem(fRecurseDirs);
fPreferencesMenu->AddItem(fSkipDotDirs);
fPreferencesMenu->AddItem(fCaseSensitive);
fPreferencesMenu->AddItem(fEscapeText);
fPreferencesMenu->AddItem(fRegularExpression);
fPreferencesMenu->AddItem(fTextOnly);
fPreferencesMenu->AddItem(fInvokePe);
@ -607,7 +607,7 @@ GrepWindow::_LoadPrefs()
fRecurseLinks->SetMarked(fModel->fRecurseLinks);
fSkipDotDirs->SetMarked(fModel->fSkipDotDirs);
fCaseSensitive->SetMarked(fModel->fCaseSensitive);
fEscapeText->SetMarked(fModel->fEscapeText);
fRegularExpression->SetMarked(fModel->fRegularExpression);
fTextOnly->SetMarked(fModel->fTextOnly);
fInvokePe->SetMarked(fModel->fInvokePe);
@ -865,12 +865,14 @@ GrepWindow::_OnNodeMonitorEvent(BMessage* message)
if (entry.GetRef(&ref) == B_OK) {
int32 index;
ResultItem* item = fSearchResults->FindItem(ref, &index);
item->SetText(path.String());
// take care of invalidation, the index is currently
// the full list index, but needs to be the visible
// items index for this
index = fSearchResults->IndexOf(item);
fSearchResults->InvalidateItem(index);
if (item) {
item->SetText(path.String());
// take care of invalidation, the index is currently
// the full list index, but needs to be the visible
// items index for this
index = fSearchResults->IndexOf(item);
fSearchResults->InvalidateItem(index);
}
}
}
break;
@ -969,7 +971,6 @@ void
GrepWindow::_OnReportResult(BMessage* message)
{
CALLED();
entry_ref ref;
if (message->FindRef("ref", &ref) != B_OK)
return;
@ -1056,10 +1057,10 @@ GrepWindow::_OnSkipDotDirs()
void
GrepWindow::_OnEscapeText()
GrepWindow::_OnRegularExpression()
{
fModel->fEscapeText = !fModel->fEscapeText;
fEscapeText->SetMarked(fModel->fEscapeText);
fModel->fRegularExpression = !fModel->fRegularExpression;
fRegularExpression->SetMarked(fModel->fRegularExpression);
_ModelChanged();
}

View File

@ -54,7 +54,7 @@ private:
void _OnRecurseLinks();
void _OnRecurseDirs();
void _OnSkipDotDirs();
void _OnEscapeText();
void _OnRegularExpression();
void _OnCaseSensitive();
void _OnTextOnly();
void _OnInvokePe();
@ -103,7 +103,7 @@ private:
BMenuItem* fRecurseDirs;
BMenuItem* fSkipDotDirs;
BMenuItem* fCaseSensitive;
BMenuItem* fEscapeText;
BMenuItem* fRegularExpression;
BMenuItem* fTextOnly;
BMenuItem* fInvokePe;
BMenu* fHistoryMenu;

View File

@ -10,6 +10,11 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/select.h>
#include <errno.h>
#include <OS.h>
#include <image.h>
#include <Catalog.h>
#include <Directory.h>
@ -26,14 +31,11 @@
#define B_TRANSLATION_CONTEXT "Grepper"
const char* kEOFTag = "//EOF";
using std::nothrow;
// TODO: stippi: Check if this is a the best place to maintain a global
// list of files and folders for node monitoring. It should probably monitor
// every file that was grepped, as well as every visited (sub) folder.
// For the moment I don't know the life cycle of the Grepper object.
char*
strdup_to_utf8(uint32 encode, const char* src, int32 length)
{
@ -85,12 +87,13 @@ Grepper::Grepper(const char* pattern, const Model* model,
const BHandler* target, FileIterator* iterator)
: fPattern(NULL),
fTarget(target),
fEscapeText(model->fEscapeText),
fRegularExpression(model->fRegularExpression),
fCaseSensitive(model->fCaseSensitive),
fEncoding(model->fEncoding),
fIterator(iterator),
fThreadId(-1),
fRunnerThreadId(-1),
fXargsInput(-1),
fMustQuit(false)
{
if (fEncoding > 0) {
@ -125,23 +128,23 @@ Grepper::Start()
Cancel();
fMustQuit = false;
fThreadId = spawn_thread(
_SpawnThread, "_GrepperThread", B_NORMAL_PRIORITY, this);
fRunnerThreadId = spawn_thread(
_SpawnRunnerThread, "Grep runner", B_NORMAL_PRIORITY, this);
resume_thread(fThreadId);
resume_thread(fRunnerThreadId);
}
void
Grepper::Cancel()
{
if (fThreadId < 0)
if (fRunnerThreadId < 0)
return;
fMustQuit = true;
int32 exitValue;
wait_for_thread(fThreadId, &exitValue);
fThreadId = -1;
wait_for_thread(fRunnerThreadId, &exitValue);
fRunnerThreadId = -1;
}
@ -149,52 +152,45 @@ Grepper::Cancel()
int32
Grepper::_SpawnThread(void* cookie)
Grepper::_SpawnWriterThread(void* cookie)
{
Grepper* self = static_cast<Grepper*>(cookie);
return self->_GrepperThread();
return self->_WriterThread();
}
int32
Grepper::_GrepperThread()
Grepper::_WriterThread()
{
BMessage message;
char fileName[B_PATH_NAME_LENGTH*2];
int count = 0;
char fileName[B_PATH_NAME_LENGTH];
char tempString[B_PATH_NAME_LENGTH];
char command[B_PATH_NAME_LENGTH + 32];
BPath tempFile;
sprintf(fileName, "/tmp/SearchText%" B_PRId32, fThreadId);
tempFile.SetTo(fileName);
printf("paths_writer started.\n");
while (!fMustQuit && fIterator->GetNextName(fileName)) {
message.MakeEmpty();
message.what = MSG_REPORT_FILE_NAME;
message.AddString("filename", fileName);
fTarget.SendMessage(&message);
message.MakeEmpty();
message.what = MSG_REPORT_RESULT;
message.AddString("filename", fileName);
// fTarget.SendMessage(&message);
BEntry entry(fileName);
entry_ref ref;
entry.GetRef(&ref);
message.AddRef("ref", &ref);
if (!entry.Exists()) {
if (fIterator->NotifyNegatives())
if (fIterator->NotifyNegatives()) {
message.what = MSG_REPORT_RESULT;
message.AddRef("ref", &ref);
fTarget.SendMessage(&message);
}
continue;
}
if (!_EscapeSpecialChars(fileName, B_PATH_NAME_LENGTH)) {
char tempString[B_PATH_NAME_LENGTH + 32];
sprintf(tempString, B_TRANSLATE("%s: Not enough room to escape "
"the filename."), fileName);
message.MakeEmpty();
message.what = MSG_REPORT_ERROR;
message.AddString("error", tempString);
@ -202,47 +198,232 @@ Grepper::_GrepperThread()
continue;
}
sprintf(command, "grep -hn %s %s \"%s\" > \"%s\"",
fCaseSensitive ? "" : "-i", fPattern, fileName, tempFile.Path());
// file exists, send it to xargs
write(fXargsInput, fileName, strlen(fileName));
write(fXargsInput, "\n", 1);
// printf(">>>>>> %s\n", fileName);
int res = system(command);
fTarget.SendMessage(&message);
if (res == 0 || res == 1) {
FILE *results = fopen(tempFile.Path(), "r");
count++;
}
if (results != NULL) {
while (fgets(tempString, B_PATH_NAME_LENGTH, results) != 0) {
if (fEncoding > 0) {
char* tempdup = strdup_to_utf8(fEncoding, tempString,
strlen(tempString));
message.AddString("text", tempdup);
free(tempdup);
} else
message.AddString("text", tempString);
}
write(fXargsInput, kEOFTag, strlen(kEOFTag));
write(fXargsInput, "\n", 1);
close(fXargsInput);
if (message.HasString("text") || fIterator->NotifyNegatives())
fTarget.SendMessage(&message);
printf("paths_writer stopped (%d paths).\n", count);
fclose(results);
continue;
}
}
return 0;
}
sprintf(tempString, B_TRANSLATE("%s: There was a problem running grep."), fileName);
int32
Grepper::_SpawnRunnerThread(void* cookie)
{
Grepper* self = static_cast<Grepper*>(cookie);
return self->_RunnerThread();
}
int32
Grepper::_RunnerThread()
{
BMessage message;
char fileName[B_PATH_NAME_LENGTH];
const char* argv[32];
int argc = 0;
argv[argc++] = "xargs";
// can't use yet the --null mode due to pipe issue
// the xargs stdin input pipe closure is not detected
// by xargs. Instead, we use eof-string mode
// argv[argc++] = "--null";
argv[argc++] = "-E";
argv[argc++] = kEOFTag;
// Enable parallel mode
// Retrieve cpu count for to parallel xargs via -P argument
char cpuCount[8];
system_info sys_info;
get_system_info(&sys_info);
snprintf(cpuCount, sizeof(cpuCount), "%d", sys_info.cpu_count);
argv[argc++] = "-P";
argv[argc++] = cpuCount;
// grep command driven by xargs dispatcher
argv[argc++] = "grep";
argv[argc++] = "-n"; // need matching line(s) number(s)
argv[argc++] = "-H"; // need filename prefix
if (! fCaseSensitive)
argv[argc++] = "-i";
if (! fRegularExpression)
argv[argc++] = "-F"; // no a regexp: force fixed string,
argv[argc++] = fPattern;
argv[argc] = NULL;
// prepare xargs to run with stdin, stdout and stderr pipes
int oldStdIn, oldStdOut, oldStdErr;
oldStdIn = dup(STDIN_FILENO);
oldStdOut = dup(STDOUT_FILENO);
oldStdErr = dup(STDERR_FILENO);
int fds[2];
pipe(fds); dup2(fds[0], STDIN_FILENO); close(fds[0]);
fXargsInput = fds[1]; // write to in, appears on command's stdin
pipe(fds); dup2(fds[1], STDOUT_FILENO); close(fds[1]);
int out = fds[0]; // read from out, taken from command's stdout
pipe(fds);dup2(fds[1], STDERR_FILENO); close(fds[1]);
int err = fds[0]; // read from err, taken from command's stderr
// "load" command
thread_id xargsThread = load_image(argc, argv,
const_cast<const char**>(environ));
// xargsThread is suspended after loading
// restore our previous stdin, stdout and stderr
close(STDIN_FILENO); dup(oldStdIn); close(oldStdIn);
close(STDOUT_FILENO); dup(oldStdOut); close(oldStdOut);
close(STDERR_FILENO); dup(oldStdErr); close(oldStdErr);
if (xargsThread < B_OK) {
message.MakeEmpty();
message.what = MSG_REPORT_ERROR;
message.AddString("error", tempString);
message.AddString("error",
B_TRANSLATE("Failed to start xargs program!"));
fTarget.SendMessage(&message);
return 0;
}
set_thread_priority(xargsThread, B_LOW_PRIORITY);
thread_id writerThread = spawn_thread(_SpawnWriterThread,
"Grep writer", B_LOW_PRIORITY, this);
// let's go!
resume_thread(xargsThread);
resume_thread(writerThread);
// Listen on xargs's stdout and stderr via select()
printf("Running: ");
for (int i = 0; i < argc; i++) {
printf("%s ", argv[i]);
}
printf("\n");
int fdl[2] = { out, err };
int maxfd = 0;
for (int i = 0; i < 2; i++) {
if (maxfd < fdl[i])
maxfd = fdl[i];
}
fd_set readSet;
char line[B_PATH_NAME_LENGTH * 2];
FILE* output = fdopen(out, "r");
FILE* errors = fdopen(err, "r");
char currentFileName[B_PATH_NAME_LENGTH];
currentFileName[0] = '\0';
bool canReadOutput, canReadErrors;
canReadOutput = canReadErrors = true;
while (!fMustQuit && (canReadOutput || canReadErrors)) {
FD_ZERO(&readSet);
if (canReadOutput) {
FD_SET(out, &readSet);
}
if (canReadErrors) {
FD_SET(err, &readSet);
}
int result = select(maxfd + 1, &readSet, NULL, NULL, NULL);
if (result < 0) {
printf("select(): %d (%s)\n", result, strerror(errno));
break;
}
if (canReadOutput && FD_ISSET(out, &readSet)) {
if (fgets(line, sizeof(line), output) != NULL) {
// parse grep output
int lineNumber = -1;
int textPos = -1;
sscanf(line, "%[^\n:]:%d:%n", fileName, &lineNumber, &textPos);
// printf("sscanf(\"%s\") -> %s %d %d\n", line, fileName,
// lineNumber, textPos);
if (textPos > 0) {
if (strcmp(fileName, currentFileName) != 0) {
fTarget.SendMessage(&message);
strncpy(currentFileName, fileName,
sizeof(currentFileName));
message.MakeEmpty();
message.what = MSG_REPORT_RESULT;
message.AddString("filename", fileName);
BEntry entry(fileName);
entry_ref ref;
entry.GetRef(&ref);
message.AddRef("ref", &ref);
}
char* text = &line[strlen(fileName)+1];
printf("[%s] %s", fileName, text);
if (fEncoding > 0) {
char* tempdup = strdup_to_utf8(fEncoding, text,
strlen(text));
message.AddString("text", tempdup);
free(tempdup);
} else {
message.AddString("text", text);
}
message.AddInt32("line", lineNumber);
}
} else {
canReadOutput = false;
}
}
if (canReadErrors && FD_ISSET(err, &readSet)) {
if (fgets(line, sizeof(line), errors) != NULL) {
if (message.HasString("text"))
fTarget.SendMessage(&message);
currentFileName[0] = '\0';
message.MakeEmpty();
message.what = MSG_REPORT_ERROR;
message.AddString("error", line);
fTarget.SendMessage(&message);
} else {
canReadErrors = false;
}
}
}
// send last pending message, if any
if (message.HasString("text"))
fTarget.SendMessage(&message);
printf("Done.\n");
fclose(output);
fclose(errors);
close(out);
close(err);
fMustQuit = true;
int32 exitValue;
wait_for_thread(xargsThread, &exitValue);
wait_for_thread(writerThread, &exitValue);
// We wait with removing the temporary file until after the
// entire search has finished, to prevent a lot of flickering
// if the Tracker window for /tmp/ might be open.
remove(tempFile.Path());
message.MakeEmpty();
message.what = MSG_SEARCH_FINISHED;
fTarget.SendMessage(&message);
@ -257,52 +438,7 @@ Grepper::_SetPattern(const char* src)
if (src == NULL)
return;
if (!fEscapeText) {
fPattern = strdup(src);
return;
}
// We will simply guess the size of the memory buffer
// that we need. This should always be large enough.
fPattern = (char*)malloc((strlen(src) + 1) * 3 * sizeof(char));
if (fPattern == NULL)
return;
const char* srcPtr = src;
char* dstPtr = fPattern;
// Put double quotes around the pattern, so separate
// words are considered to be part of a single string.
*dstPtr++ = '"';
while (*srcPtr != '\0') {
char c = *srcPtr++;
// Put a backslash in front of characters
// that should be escaped.
if ((c == '.') || (c == ',')
|| (c == '[') || (c == ']')
|| (c == '?') || (c == '*')
|| (c == '+') || (c == '-')
|| (c == ':') || (c == '^')
|| (c == '"') || (c == '`')) {
*dstPtr++ = '\\';
} else if ((c == '\\') || (c == '$')) {
// Some characters need to be escaped
// with *three* backslashes in a row.
*dstPtr++ = '\\';
*dstPtr++ = '\\';
*dstPtr++ = '\\';
}
// Note: we do not have to escape the
// { } ( ) < > and | characters.
*dstPtr++ = c;
}
*dstPtr++ = '"';
*dstPtr = '\0';
fPattern = strdup(src);
}
@ -314,7 +450,7 @@ Grepper::_EscapeSpecialChars(char* buffer, ssize_t bufferSize)
uint32 len = strlen(copy);
bool result = true;
for (uint32 count = 0; count < len; ++count) {
if (copy[count] == '"' || copy[count] == '$')
if (copy[count] == ' ')
*buffer++ = '\\';
if (buffer - start == bufferSize - 1) {
result = false;
@ -326,4 +462,3 @@ Grepper::_EscapeSpecialChars(char* buffer, ssize_t bufferSize)
free(copy);
return result;
}

View File

@ -24,11 +24,13 @@ public:
void Cancel();
private:
// Spawns the real grepper thread.
static int32 _SpawnThread(void* cookie);
// Spawns the real grepper threads.
static int32 _SpawnRunnerThread(void* cookie);
static int32 _SpawnWriterThread(void* cookie);
// The thread function that does the actual grepping.
int32 _GrepperThread();
// The threads functions that does the actual grepping.
int32 _RunnerThread();
int32 _WriterThread();
// Remembers, and possibly escapes, the search pattern.
void _SetPattern(const char* source);
@ -37,13 +39,14 @@ private:
// to prevent the shell from misinterpreting them.
bool _EscapeSpecialChars(char* buffer,
ssize_t bufferSize);
private:
// The (escaped) search pattern.
char* fPattern;
// The settings from the model.
BMessenger fTarget;
bool fEscapeText : 1;
bool fRegularExpression : 1;
bool fCaseSensitive : 1;
uint32 fEncoding;
@ -51,7 +54,9 @@ private:
FileIterator* fIterator;
// Our thread's ID.
thread_id fThreadId;
thread_id fRunnerThreadId;
// xargs input pipe
int fXargsInput;
// Whether our thread must quit.
volatile bool fMustQuit;

View File

@ -35,7 +35,7 @@ Model::Model()
fRecurseLinks(false),
fSkipDotDirs(true),
fCaseSensitive(false),
fEscapeText(true),
fRegularExpression(false),
fTextOnly(true),
fInvokePe(false),
@ -91,8 +91,8 @@ Model::LoadPrefs()
sizeof(int32)) > 0)
fCaseSensitive = (value != 0);
if (file.ReadAttr("EscapeText", B_INT32_TYPE, 0, &value, sizeof(int32)) > 0)
fEscapeText = (value != 0);
if (file.ReadAttr("RegularExpression", B_INT32_TYPE, 0, &value, sizeof(int32)) > 0)
fRegularExpression = (value != 0);
if (file.ReadAttr("TextOnly", B_INT32_TYPE, 0, &value, sizeof(int32)) > 0)
fTextOnly = (value != 0);
@ -147,8 +147,8 @@ Model::SavePrefs()
value = fCaseSensitive ? 1 : 0;
file.WriteAttr("CaseSensitive", B_INT32_TYPE, 0, &value, sizeof(int32));
value = fEscapeText ? 1 : 0;
file.WriteAttr("EscapeText", B_INT32_TYPE, 0, &value, sizeof(int32));
value = fRegularExpression ? 1 : 0;
file.WriteAttr("RegularExpression", B_INT32_TYPE, 0, &value, sizeof(int32));
value = fTextOnly ? 1 : 0;
file.WriteAttr("TextOnly", B_INT32_TYPE, 0, &value, sizeof(int32));

View File

@ -23,7 +23,7 @@ enum {
MSG_RECURSE_DIRS,
MSG_SKIP_DOT_DIRS,
MSG_CASE_SENSITIVE,
MSG_ESCAPE_TEXT,
MSG_REGULAR_EXPRESSION,
MSG_TEXT_ONLY,
MSG_INVOKE_PE,
MSG_CHECKBOX_SHOW_LINES,
@ -87,8 +87,8 @@ public:
// Whether the search is case sensitive.
bool fCaseSensitive;
// Whether the search pattern will be escaped.
bool fEscapeText;
// Whether the search pattern is a regular expression.
bool fRegularExpression;
// Whether we look at text files only.
bool fTextOnly;