From 33d4c8a62e32344e9207536e8f72d0f23b0fc0ee Mon Sep 17 00:00:00 2001 From: Philippe Houdoin Date: Mon, 6 Nov 2017 16:19:41 +0000 Subject: [PATCH] TextSearch: way speeder by using xargs + grep * Previously, each file was starting a shell to run grep command on it. Very suboptimal performance, big overhead. * Now a thread write each file path to xargs input, while another start xargs, let it distribute files on grep processes (one per cpu) and collect results asap. * This bring results way faster than previously. * Rename Escape Text setting into Regular Expression, as name was more after shell workaround than after function. * While it doesn't use a native text searching, by reusing both grep and xargs power, it answer the main issue with #9529. --- src/apps/text_search/GrepWindow.cpp | 35 +-- src/apps/text_search/GrepWindow.h | 4 +- src/apps/text_search/Grepper.cpp | 347 +++++++++++++++++++--------- src/apps/text_search/Grepper.h | 17 +- src/apps/text_search/Model.cpp | 10 +- src/apps/text_search/Model.h | 6 +- 6 files changed, 280 insertions(+), 139 deletions(-) diff --git a/src/apps/text_search/GrepWindow.cpp b/src/apps/text_search/GrepWindow.cpp index cab747e199..4b17ce9b82 100644 --- a/src/apps/text_search/GrepWindow.cpp +++ b/src/apps/text_search/GrepWindow.cpp @@ -90,7 +90,7 @@ GrepWindow::GrepWindow(BMessage* message) fRecurseDirs(NULL), fSkipDotDirs(NULL), fCaseSensitive(NULL), - fEscapeText(NULL), + fRegularExpression(NULL), fTextOnly(NULL), fInvokePe(NULL), fHistoryMenu(NULL), @@ -211,8 +211,8 @@ void GrepWindow::MessageReceived(BMessage* message) _OnCaseSensitive(); break; - case MSG_ESCAPE_TEXT: - _OnEscapeText(); + case MSG_REGULAR_EXPRESSION: + _OnRegularExpression(); break; case MSG_TEXT_ONLY: @@ -452,8 +452,8 @@ GrepWindow::_CreateMenus() fCaseSensitive = new BMenuItem( B_TRANSLATE("Case-sensitive"), new BMessage(MSG_CASE_SENSITIVE)); - fEscapeText = new BMenuItem( - B_TRANSLATE("Escape search text"), new BMessage(MSG_ESCAPE_TEXT)); + fRegularExpression = new BMenuItem( + B_TRANSLATE("Regular expression"), new BMessage(MSG_REGULAR_EXPRESSION)); fTextOnly = new BMenuItem( B_TRANSLATE("Text files only"), new BMessage(MSG_TEXT_ONLY)); @@ -486,7 +486,7 @@ GrepWindow::_CreateMenus() fPreferencesMenu->AddItem(fRecurseDirs); fPreferencesMenu->AddItem(fSkipDotDirs); fPreferencesMenu->AddItem(fCaseSensitive); - fPreferencesMenu->AddItem(fEscapeText); + fPreferencesMenu->AddItem(fRegularExpression); fPreferencesMenu->AddItem(fTextOnly); fPreferencesMenu->AddItem(fInvokePe); @@ -607,7 +607,7 @@ GrepWindow::_LoadPrefs() fRecurseLinks->SetMarked(fModel->fRecurseLinks); fSkipDotDirs->SetMarked(fModel->fSkipDotDirs); fCaseSensitive->SetMarked(fModel->fCaseSensitive); - fEscapeText->SetMarked(fModel->fEscapeText); + fRegularExpression->SetMarked(fModel->fRegularExpression); fTextOnly->SetMarked(fModel->fTextOnly); fInvokePe->SetMarked(fModel->fInvokePe); @@ -865,12 +865,14 @@ GrepWindow::_OnNodeMonitorEvent(BMessage* message) if (entry.GetRef(&ref) == B_OK) { int32 index; ResultItem* item = fSearchResults->FindItem(ref, &index); - item->SetText(path.String()); - // take care of invalidation, the index is currently - // the full list index, but needs to be the visible - // items index for this - index = fSearchResults->IndexOf(item); - fSearchResults->InvalidateItem(index); + if (item) { + item->SetText(path.String()); + // take care of invalidation, the index is currently + // the full list index, but needs to be the visible + // items index for this + index = fSearchResults->IndexOf(item); + fSearchResults->InvalidateItem(index); + } } } break; @@ -969,7 +971,6 @@ void GrepWindow::_OnReportResult(BMessage* message) { CALLED(); - entry_ref ref; if (message->FindRef("ref", &ref) != B_OK) return; @@ -1056,10 +1057,10 @@ GrepWindow::_OnSkipDotDirs() void -GrepWindow::_OnEscapeText() +GrepWindow::_OnRegularExpression() { - fModel->fEscapeText = !fModel->fEscapeText; - fEscapeText->SetMarked(fModel->fEscapeText); + fModel->fRegularExpression = !fModel->fRegularExpression; + fRegularExpression->SetMarked(fModel->fRegularExpression); _ModelChanged(); } diff --git a/src/apps/text_search/GrepWindow.h b/src/apps/text_search/GrepWindow.h index e65f9a65e6..651db09351 100644 --- a/src/apps/text_search/GrepWindow.h +++ b/src/apps/text_search/GrepWindow.h @@ -54,7 +54,7 @@ private: void _OnRecurseLinks(); void _OnRecurseDirs(); void _OnSkipDotDirs(); - void _OnEscapeText(); + void _OnRegularExpression(); void _OnCaseSensitive(); void _OnTextOnly(); void _OnInvokePe(); @@ -103,7 +103,7 @@ private: BMenuItem* fRecurseDirs; BMenuItem* fSkipDotDirs; BMenuItem* fCaseSensitive; - BMenuItem* fEscapeText; + BMenuItem* fRegularExpression; BMenuItem* fTextOnly; BMenuItem* fInvokePe; BMenu* fHistoryMenu; diff --git a/src/apps/text_search/Grepper.cpp b/src/apps/text_search/Grepper.cpp index 1a40decb9a..175634ad05 100644 --- a/src/apps/text_search/Grepper.cpp +++ b/src/apps/text_search/Grepper.cpp @@ -10,6 +10,11 @@ #include #include #include +#include +#include + +#include +#include #include #include @@ -26,14 +31,11 @@ #define B_TRANSLATION_CONTEXT "Grepper" +const char* kEOFTag = "//EOF"; + + using std::nothrow; -// TODO: stippi: Check if this is a the best place to maintain a global -// list of files and folders for node monitoring. It should probably monitor -// every file that was grepped, as well as every visited (sub) folder. -// For the moment I don't know the life cycle of the Grepper object. - - char* strdup_to_utf8(uint32 encode, const char* src, int32 length) { @@ -85,12 +87,13 @@ Grepper::Grepper(const char* pattern, const Model* model, const BHandler* target, FileIterator* iterator) : fPattern(NULL), fTarget(target), - fEscapeText(model->fEscapeText), + fRegularExpression(model->fRegularExpression), fCaseSensitive(model->fCaseSensitive), fEncoding(model->fEncoding), fIterator(iterator), - fThreadId(-1), + fRunnerThreadId(-1), + fXargsInput(-1), fMustQuit(false) { if (fEncoding > 0) { @@ -125,23 +128,23 @@ Grepper::Start() Cancel(); fMustQuit = false; - fThreadId = spawn_thread( - _SpawnThread, "_GrepperThread", B_NORMAL_PRIORITY, this); + fRunnerThreadId = spawn_thread( + _SpawnRunnerThread, "Grep runner", B_NORMAL_PRIORITY, this); - resume_thread(fThreadId); + resume_thread(fRunnerThreadId); } void Grepper::Cancel() { - if (fThreadId < 0) + if (fRunnerThreadId < 0) return; fMustQuit = true; int32 exitValue; - wait_for_thread(fThreadId, &exitValue); - fThreadId = -1; + wait_for_thread(fRunnerThreadId, &exitValue); + fRunnerThreadId = -1; } @@ -149,52 +152,45 @@ Grepper::Cancel() int32 -Grepper::_SpawnThread(void* cookie) +Grepper::_SpawnWriterThread(void* cookie) { Grepper* self = static_cast(cookie); - return self->_GrepperThread(); + return self->_WriterThread(); } int32 -Grepper::_GrepperThread() +Grepper::_WriterThread() { BMessage message; + char fileName[B_PATH_NAME_LENGTH*2]; + int count = 0; - char fileName[B_PATH_NAME_LENGTH]; - char tempString[B_PATH_NAME_LENGTH]; - char command[B_PATH_NAME_LENGTH + 32]; - - BPath tempFile; - sprintf(fileName, "/tmp/SearchText%" B_PRId32, fThreadId); - tempFile.SetTo(fileName); + printf("paths_writer started.\n"); while (!fMustQuit && fIterator->GetNextName(fileName)) { message.MakeEmpty(); message.what = MSG_REPORT_FILE_NAME; message.AddString("filename", fileName); - fTarget.SendMessage(&message); - - message.MakeEmpty(); - message.what = MSG_REPORT_RESULT; - message.AddString("filename", fileName); +// fTarget.SendMessage(&message); BEntry entry(fileName); entry_ref ref; entry.GetRef(&ref); - message.AddRef("ref", &ref); - if (!entry.Exists()) { - if (fIterator->NotifyNegatives()) + if (fIterator->NotifyNegatives()) { + message.what = MSG_REPORT_RESULT; + message.AddRef("ref", &ref); fTarget.SendMessage(&message); + } continue; } if (!_EscapeSpecialChars(fileName, B_PATH_NAME_LENGTH)) { + char tempString[B_PATH_NAME_LENGTH + 32]; sprintf(tempString, B_TRANSLATE("%s: Not enough room to escape " "the filename."), fileName); - message.MakeEmpty(); message.what = MSG_REPORT_ERROR; message.AddString("error", tempString); @@ -202,47 +198,232 @@ Grepper::_GrepperThread() continue; } - sprintf(command, "grep -hn %s %s \"%s\" > \"%s\"", - fCaseSensitive ? "" : "-i", fPattern, fileName, tempFile.Path()); + // file exists, send it to xargs + write(fXargsInput, fileName, strlen(fileName)); + write(fXargsInput, "\n", 1); + // printf(">>>>>> %s\n", fileName); - int res = system(command); + fTarget.SendMessage(&message); - if (res == 0 || res == 1) { - FILE *results = fopen(tempFile.Path(), "r"); + count++; + } - if (results != NULL) { - while (fgets(tempString, B_PATH_NAME_LENGTH, results) != 0) { - if (fEncoding > 0) { - char* tempdup = strdup_to_utf8(fEncoding, tempString, - strlen(tempString)); - message.AddString("text", tempdup); - free(tempdup); - } else - message.AddString("text", tempString); - } + write(fXargsInput, kEOFTag, strlen(kEOFTag)); + write(fXargsInput, "\n", 1); + close(fXargsInput); - if (message.HasString("text") || fIterator->NotifyNegatives()) - fTarget.SendMessage(&message); + printf("paths_writer stopped (%d paths).\n", count); - fclose(results); - continue; - } - } + return 0; +} - sprintf(tempString, B_TRANSLATE("%s: There was a problem running grep."), fileName); +int32 +Grepper::_SpawnRunnerThread(void* cookie) +{ + Grepper* self = static_cast(cookie); + return self->_RunnerThread(); +} + + +int32 +Grepper::_RunnerThread() +{ + BMessage message; + char fileName[B_PATH_NAME_LENGTH]; + + const char* argv[32]; + int argc = 0; + argv[argc++] = "xargs"; + + // can't use yet the --null mode due to pipe issue + // the xargs stdin input pipe closure is not detected + // by xargs. Instead, we use eof-string mode + + // argv[argc++] = "--null"; + argv[argc++] = "-E"; + argv[argc++] = kEOFTag; + + // Enable parallel mode + // Retrieve cpu count for to parallel xargs via -P argument + char cpuCount[8]; + system_info sys_info; + get_system_info(&sys_info); + snprintf(cpuCount, sizeof(cpuCount), "%d", sys_info.cpu_count); + argv[argc++] = "-P"; + argv[argc++] = cpuCount; + + // grep command driven by xargs dispatcher + argv[argc++] = "grep"; + argv[argc++] = "-n"; // need matching line(s) number(s) + argv[argc++] = "-H"; // need filename prefix + if (! fCaseSensitive) + argv[argc++] = "-i"; + if (! fRegularExpression) + argv[argc++] = "-F"; // no a regexp: force fixed string, + argv[argc++] = fPattern; + argv[argc] = NULL; + + // prepare xargs to run with stdin, stdout and stderr pipes + + int oldStdIn, oldStdOut, oldStdErr; + oldStdIn = dup(STDIN_FILENO); + oldStdOut = dup(STDOUT_FILENO); + oldStdErr = dup(STDERR_FILENO); + + int fds[2]; + pipe(fds); dup2(fds[0], STDIN_FILENO); close(fds[0]); + fXargsInput = fds[1]; // write to in, appears on command's stdin + + pipe(fds); dup2(fds[1], STDOUT_FILENO); close(fds[1]); + int out = fds[0]; // read from out, taken from command's stdout + + pipe(fds);dup2(fds[1], STDERR_FILENO); close(fds[1]); + int err = fds[0]; // read from err, taken from command's stderr + + // "load" command + thread_id xargsThread = load_image(argc, argv, + const_cast(environ)); + // xargsThread is suspended after loading + + // restore our previous stdin, stdout and stderr + close(STDIN_FILENO); dup(oldStdIn); close(oldStdIn); + close(STDOUT_FILENO); dup(oldStdOut); close(oldStdOut); + close(STDERR_FILENO); dup(oldStdErr); close(oldStdErr); + + if (xargsThread < B_OK) { message.MakeEmpty(); message.what = MSG_REPORT_ERROR; - message.AddString("error", tempString); + message.AddString("error", + B_TRANSLATE("Failed to start xargs program!")); fTarget.SendMessage(&message); + return 0; } + set_thread_priority(xargsThread, B_LOW_PRIORITY); + + thread_id writerThread = spawn_thread(_SpawnWriterThread, + "Grep writer", B_LOW_PRIORITY, this); + // let's go! + resume_thread(xargsThread); + resume_thread(writerThread); + + // Listen on xargs's stdout and stderr via select() + printf("Running: "); + for (int i = 0; i < argc; i++) { + printf("%s ", argv[i]); + } + printf("\n"); + + int fdl[2] = { out, err }; + int maxfd = 0; + for (int i = 0; i < 2; i++) { + if (maxfd < fdl[i]) + maxfd = fdl[i]; + } + + fd_set readSet; + char line[B_PATH_NAME_LENGTH * 2]; + + FILE* output = fdopen(out, "r"); + FILE* errors = fdopen(err, "r"); + + char currentFileName[B_PATH_NAME_LENGTH]; + currentFileName[0] = '\0'; + bool canReadOutput, canReadErrors; + canReadOutput = canReadErrors = true; + while (!fMustQuit && (canReadOutput || canReadErrors)) { + FD_ZERO(&readSet); + if (canReadOutput) { + FD_SET(out, &readSet); + } + if (canReadErrors) { + FD_SET(err, &readSet); + } + + int result = select(maxfd + 1, &readSet, NULL, NULL, NULL); + if (result < 0) { + printf("select(): %d (%s)\n", result, strerror(errno)); + break; + } + + if (canReadOutput && FD_ISSET(out, &readSet)) { + if (fgets(line, sizeof(line), output) != NULL) { + // parse grep output + int lineNumber = -1; + int textPos = -1; + sscanf(line, "%[^\n:]:%d:%n", fileName, &lineNumber, &textPos); + // printf("sscanf(\"%s\") -> %s %d %d\n", line, fileName, + // lineNumber, textPos); + if (textPos > 0) { + if (strcmp(fileName, currentFileName) != 0) { + fTarget.SendMessage(&message); + + strncpy(currentFileName, fileName, + sizeof(currentFileName)); + + message.MakeEmpty(); + message.what = MSG_REPORT_RESULT; + message.AddString("filename", fileName); + + BEntry entry(fileName); + entry_ref ref; + entry.GetRef(&ref); + message.AddRef("ref", &ref); + } + + char* text = &line[strlen(fileName)+1]; + printf("[%s] %s", fileName, text); + if (fEncoding > 0) { + char* tempdup = strdup_to_utf8(fEncoding, text, + strlen(text)); + message.AddString("text", tempdup); + free(tempdup); + } else { + message.AddString("text", text); + } + message.AddInt32("line", lineNumber); + } + } else { + canReadOutput = false; + } + } + if (canReadErrors && FD_ISSET(err, &readSet)) { + if (fgets(line, sizeof(line), errors) != NULL) { + + if (message.HasString("text")) + fTarget.SendMessage(&message); + currentFileName[0] = '\0'; + + message.MakeEmpty(); + message.what = MSG_REPORT_ERROR; + message.AddString("error", line); + fTarget.SendMessage(&message); + } else { + canReadErrors = false; + } + } + } + + // send last pending message, if any + if (message.HasString("text")) + fTarget.SendMessage(&message); + + printf("Done.\n"); + fclose(output); + fclose(errors); + + close(out); + close(err); + + fMustQuit = true; + int32 exitValue; + wait_for_thread(xargsThread, &exitValue); + wait_for_thread(writerThread, &exitValue); // We wait with removing the temporary file until after the // entire search has finished, to prevent a lot of flickering // if the Tracker window for /tmp/ might be open. - remove(tempFile.Path()); - message.MakeEmpty(); message.what = MSG_SEARCH_FINISHED; fTarget.SendMessage(&message); @@ -257,52 +438,7 @@ Grepper::_SetPattern(const char* src) if (src == NULL) return; - if (!fEscapeText) { - fPattern = strdup(src); - return; - } - - // We will simply guess the size of the memory buffer - // that we need. This should always be large enough. - fPattern = (char*)malloc((strlen(src) + 1) * 3 * sizeof(char)); - if (fPattern == NULL) - return; - - const char* srcPtr = src; - char* dstPtr = fPattern; - - // Put double quotes around the pattern, so separate - // words are considered to be part of a single string. - *dstPtr++ = '"'; - - while (*srcPtr != '\0') { - char c = *srcPtr++; - - // Put a backslash in front of characters - // that should be escaped. - if ((c == '.') || (c == ',') - || (c == '[') || (c == ']') - || (c == '?') || (c == '*') - || (c == '+') || (c == '-') - || (c == ':') || (c == '^') - || (c == '"') || (c == '`')) { - *dstPtr++ = '\\'; - } else if ((c == '\\') || (c == '$')) { - // Some characters need to be escaped - // with *three* backslashes in a row. - *dstPtr++ = '\\'; - *dstPtr++ = '\\'; - *dstPtr++ = '\\'; - } - - // Note: we do not have to escape the - // { } ( ) < > and | characters. - - *dstPtr++ = c; - } - - *dstPtr++ = '"'; - *dstPtr = '\0'; + fPattern = strdup(src); } @@ -314,7 +450,7 @@ Grepper::_EscapeSpecialChars(char* buffer, ssize_t bufferSize) uint32 len = strlen(copy); bool result = true; for (uint32 count = 0; count < len; ++count) { - if (copy[count] == '"' || copy[count] == '$') + if (copy[count] == ' ') *buffer++ = '\\'; if (buffer - start == bufferSize - 1) { result = false; @@ -326,4 +462,3 @@ Grepper::_EscapeSpecialChars(char* buffer, ssize_t bufferSize) free(copy); return result; } - diff --git a/src/apps/text_search/Grepper.h b/src/apps/text_search/Grepper.h index 221bff61fd..ad977810ea 100644 --- a/src/apps/text_search/Grepper.h +++ b/src/apps/text_search/Grepper.h @@ -24,11 +24,13 @@ public: void Cancel(); private: - // Spawns the real grepper thread. - static int32 _SpawnThread(void* cookie); + // Spawns the real grepper threads. + static int32 _SpawnRunnerThread(void* cookie); + static int32 _SpawnWriterThread(void* cookie); - // The thread function that does the actual grepping. - int32 _GrepperThread(); + // The threads functions that does the actual grepping. + int32 _RunnerThread(); + int32 _WriterThread(); // Remembers, and possibly escapes, the search pattern. void _SetPattern(const char* source); @@ -37,13 +39,14 @@ private: // to prevent the shell from misinterpreting them. bool _EscapeSpecialChars(char* buffer, ssize_t bufferSize); + private: // The (escaped) search pattern. char* fPattern; // The settings from the model. BMessenger fTarget; - bool fEscapeText : 1; + bool fRegularExpression : 1; bool fCaseSensitive : 1; uint32 fEncoding; @@ -51,7 +54,9 @@ private: FileIterator* fIterator; // Our thread's ID. - thread_id fThreadId; + thread_id fRunnerThreadId; + // xargs input pipe + int fXargsInput; // Whether our thread must quit. volatile bool fMustQuit; diff --git a/src/apps/text_search/Model.cpp b/src/apps/text_search/Model.cpp index c76d65ae41..b2cde14924 100644 --- a/src/apps/text_search/Model.cpp +++ b/src/apps/text_search/Model.cpp @@ -35,7 +35,7 @@ Model::Model() fRecurseLinks(false), fSkipDotDirs(true), fCaseSensitive(false), - fEscapeText(true), + fRegularExpression(false), fTextOnly(true), fInvokePe(false), @@ -91,8 +91,8 @@ Model::LoadPrefs() sizeof(int32)) > 0) fCaseSensitive = (value != 0); - if (file.ReadAttr("EscapeText", B_INT32_TYPE, 0, &value, sizeof(int32)) > 0) - fEscapeText = (value != 0); + if (file.ReadAttr("RegularExpression", B_INT32_TYPE, 0, &value, sizeof(int32)) > 0) + fRegularExpression = (value != 0); if (file.ReadAttr("TextOnly", B_INT32_TYPE, 0, &value, sizeof(int32)) > 0) fTextOnly = (value != 0); @@ -147,8 +147,8 @@ Model::SavePrefs() value = fCaseSensitive ? 1 : 0; file.WriteAttr("CaseSensitive", B_INT32_TYPE, 0, &value, sizeof(int32)); - value = fEscapeText ? 1 : 0; - file.WriteAttr("EscapeText", B_INT32_TYPE, 0, &value, sizeof(int32)); + value = fRegularExpression ? 1 : 0; + file.WriteAttr("RegularExpression", B_INT32_TYPE, 0, &value, sizeof(int32)); value = fTextOnly ? 1 : 0; file.WriteAttr("TextOnly", B_INT32_TYPE, 0, &value, sizeof(int32)); diff --git a/src/apps/text_search/Model.h b/src/apps/text_search/Model.h index 7c465e7588..fb52962a1a 100644 --- a/src/apps/text_search/Model.h +++ b/src/apps/text_search/Model.h @@ -23,7 +23,7 @@ enum { MSG_RECURSE_DIRS, MSG_SKIP_DOT_DIRS, MSG_CASE_SENSITIVE, - MSG_ESCAPE_TEXT, + MSG_REGULAR_EXPRESSION, MSG_TEXT_ONLY, MSG_INVOKE_PE, MSG_CHECKBOX_SHOW_LINES, @@ -87,8 +87,8 @@ public: // Whether the search is case sensitive. bool fCaseSensitive; - // Whether the search pattern will be escaped. - bool fEscapeText; + // Whether the search pattern is a regular expression. + bool fRegularExpression; // Whether we look at text files only. bool fTextOnly;