diff --git a/headers/private/index_server/TextDataBase.h b/headers/private/index_server/TextDataBase.h new file mode 100644 index 0000000000..6b594e801e --- /dev/null +++ b/headers/private/index_server/TextDataBase.h @@ -0,0 +1,27 @@ +/* + * Copyright 2010, Haiku. + * Distributed under the terms of the MIT License. + * + * Authors: + * Clemens Zeidler + */ +#ifndef TEXT_DATA_BASE_H +#define TEXT_DATA_BASE_H + + +#include + + +class TextWriteDataBase { +public: + virtual ~TextWriteDataBase() {} + + virtual status_t InitCheck() = 0; + + virtual status_t AddDocument(const entry_ref& ref) = 0; + virtual status_t RemoveDocument(const entry_ref& ref) = 0; + virtual status_t Commit() = 0; +}; + + +#endif // TEXT_DATA_BASE_H \ No newline at end of file diff --git a/src/add-ons/Jamfile b/src/add-ons/Jamfile index 54785a0dbf..039f33fad5 100644 --- a/src/add-ons/Jamfile +++ b/src/add-ons/Jamfile @@ -3,6 +3,7 @@ SubDir HAIKU_TOP src add-ons ; SubInclude HAIKU_TOP src add-ons accelerants ; SubInclude HAIKU_TOP src add-ons decorators ; SubInclude HAIKU_TOP src add-ons disk_systems ; +SubInclude HAIKU_TOP src add-ons index_server ; SubInclude HAIKU_TOP src add-ons input_server ; SubInclude HAIKU_TOP src add-ons kernel ; SubInclude HAIKU_TOP src add-ons locale ; diff --git a/src/add-ons/index_server/FullText/CLuceneDataBase.cpp b/src/add-ons/index_server/FullText/CLuceneDataBase.cpp new file mode 100644 index 0000000000..a6147d644b --- /dev/null +++ b/src/add-ons/index_server/FullText/CLuceneDataBase.cpp @@ -0,0 +1,291 @@ +/* + * Copyright 2010, Haiku. + * Distributed under the terms of the MIT License. + * + * Authors: + * based on previous work of Ankur Sethi + * Clemens Zeidler + */ + +#include "CLuceneDataBase.h" + +#include +#include +#include + + +#define DEBUG_CLUCENE_DATABASE +#ifdef DEBUG_CLUCENE_DATABASE +#include +# define STRACE(x...) printf("FT: " x) +#else +# define STRACE(x...) ; +#endif + + +using namespace lucene::document; +using namespace lucene::util; + + +const uint8 kCluceneTries = 10; + + +wchar_t* to_wchar(const char *str) +{ + int size = strlen(str) * sizeof(wchar_t) ; + wchar_t *wStr = new wchar_t[size] ; + + if (mbstowcs(wStr, str, size) == -1) + return NULL ; + else + return wStr ; +} + + +CLuceneWriteDataBase::CLuceneWriteDataBase(const BPath& databasePath) + : + fDataBasePath(databasePath), + fTempPath(databasePath), + fIndexWriter(NULL) +{ + printf("CLuceneWriteDataBase fDataBasePath %s\n", fDataBasePath.Path()); + create_directory(fDataBasePath.Path(), 0755); + + fTempPath.Append("temp_file"); +} + + +CLuceneWriteDataBase::~CLuceneWriteDataBase() +{ + // TODO: delete fTempPath file +} + + +status_t +CLuceneWriteDataBase::InitCheck() +{ + + return B_OK; +} + + +status_t +CLuceneWriteDataBase::AddDocument(const entry_ref& ref) +{ + fAddQueue.push_back(ref); + + return B_ERROR; +} + + +status_t +CLuceneWriteDataBase::RemoveDocument(const entry_ref& ref) +{ + fDeleteQueue.push_back(ref); + return B_ERROR; +} + + +status_t +CLuceneWriteDataBase::Commit() +{ + if (fAddQueue.size() == 0 && fDeleteQueue.size() == 0) + return B_OK; + STRACE("Commit\n"); + + _RemoveDocuments(fAddQueue); + _RemoveDocuments(fDeleteQueue); + fDeleteQueue.clear(); + + if (fAddQueue.size() == 0) + return B_OK; + + fIndexWriter = _OpenIndexWriter(); + if (fIndexWriter == NULL) + return B_ERROR; + + status_t status = B_OK; + for (unsigned int i = 0; i < fAddQueue.size(); i++) { + if (!_IndexDocument(fAddQueue.at(i))) { + status = B_ERROR; + break; + } + } + + fAddQueue.clear(); + fIndexWriter->close(); + delete fIndexWriter; + fIndexWriter = NULL; + + return status; +} + + +IndexWriter* +CLuceneWriteDataBase::_OpenIndexWriter() +{ + IndexWriter* writer = NULL; + for (int i = 0; i < kCluceneTries; i++) { + try { + bool createIndex = true; + if (IndexReader::indexExists(fDataBasePath.Path())) + createIndex = false; + + writer = new IndexWriter(fDataBasePath.Path(), + &fStandardAnalyzer, createIndex); + if (writer) + break; + } catch (CLuceneError &error) { + STRACE("CLuceneError: _OpenIndexWriter %s\n", error.what()); + delete writer; + writer = NULL; + } + } + return writer; +} + + +IndexReader* +CLuceneWriteDataBase::_OpenIndexReader() +{ + IndexReader* reader = NULL; + + BEntry entry(fDataBasePath.Path(), NULL); + if (!entry.Exists()) + return NULL; + + for (int i = 0; i < kCluceneTries; i++) { + try { + if (!IndexReader::indexExists(fDataBasePath.Path())) + return NULL; + + reader = IndexReader::open(fDataBasePath.Path()); + if (reader) + break; + } catch (CLuceneError &error) { + STRACE("CLuceneError: _OpenIndexReader %s\n", error.what()); + delete reader; + reader = NULL; + } + } + + return reader; +} + + +bool +CLuceneWriteDataBase::_RemoveDocuments(std::vector& docs) +{ + IndexReader *reader = NULL; + reader = _OpenIndexReader(); + if (!reader) + return false; + bool status = false; + + for (unsigned int i = 0; i < docs.size(); i++) { + BPath path(&docs.at(i)); + wchar_t* wPath = to_wchar(path.Path()); + if (wPath == NULL) + continue; + + for (int i = 0; i < kCluceneTries; i++) { + status = _RemoveDocument(wPath, reader); + if (status) + break; + reader->close(); + delete reader; + reader = _OpenIndexReader(); + if (!reader) { + status = false; + break; + } + } + delete wPath; + + if (!status) + break; + } + + reader->close(); + delete reader; + + return status; +} + + +bool +CLuceneWriteDataBase::_RemoveDocument(wchar_t* wPath, IndexReader* reader) +{ + try { + Term term(_T("path"), wPath); + reader->deleteDocuments(&term); + } catch (CLuceneError &error) { + STRACE("CLuceneError: deleteDocuments %s\n", error.what()); + return false; + } + return true; +} + + +bool +CLuceneWriteDataBase::_IndexDocument(const entry_ref& ref) +{ + BPath path(&ref); + + BFile inFile, outFile; + inFile.SetTo(path.Path(), B_READ_ONLY); + if (inFile.InitCheck() != B_OK) { + STRACE("Can't open inFile %s\n", path.Path()); + return false; + } + outFile.SetTo(fTempPath.Path(), + B_READ_WRITE | B_CREATE_FILE | B_ERASE_FILE); + if (outFile.InitCheck() != B_OK) { + STRACE("Can't open outFile %s\n", fTempPath.Path()); + return false; + } + + BTranslatorRoster* translatorRoster = BTranslatorRoster::Default(); + if (translatorRoster->Translate(&inFile, NULL, NULL, &outFile, 'TEXT') + != B_OK) + return false; + + inFile.Unset(); + outFile.Unset(); + + FileReader* fileReader = new FileReader(fTempPath.Path(), "UTF-8"); + wchar_t* wPath = to_wchar(path.Path()); + if (wPath == NULL) + return false; + + Document *document = new Document; + Field contentField(_T("contents"), fileReader, + Field::STORE_NO | Field::INDEX_TOKENIZED); + document->add(contentField); + Field pathField(_T("path"), wPath, + Field::STORE_YES | Field::INDEX_UNTOKENIZED); + document->add(pathField); + + bool status = true; + for (int i = 0; i < kCluceneTries; i++) { + try { + fIndexWriter->addDocument(document); + STRACE("document added, retries: %i\n", i); + break; + } catch (CLuceneError &error) { + STRACE("CLuceneError addDocument %s\n", error.what()); + fIndexWriter->close(); + delete fIndexWriter; + fIndexWriter = _OpenIndexWriter(); + if (fIndexWriter == NULL) { + status = false; + break; + } + } + } + + if (!status) + delete document; + delete wPath; + return status; +} diff --git a/src/add-ons/index_server/FullText/CLuceneDataBase.h b/src/add-ons/index_server/FullText/CLuceneDataBase.h new file mode 100644 index 0000000000..f62272ad13 --- /dev/null +++ b/src/add-ons/index_server/FullText/CLuceneDataBase.h @@ -0,0 +1,58 @@ +/* + * Copyright 2010, Haiku. + * Distributed under the terms of the MIT License. + * + * Authors: + * Clemens Zeidler + */ +#ifndef CLUCENE_DATA_BASE_H +#define CLUCENE_DATA_BASE_H + + +#include + +#include + +#include "TextDataBase.h" + +#include + + +using namespace lucene::index; +using namespace lucene::analysis::standard; + + +class CLuceneWriteDataBase : public TextWriteDataBase { +public: + CLuceneWriteDataBase(const BPath& databasePath); + ~CLuceneWriteDataBase(); + + status_t InitCheck(); + + status_t AddDocument(const entry_ref& ref); + status_t RemoveDocument(const entry_ref& ref); + status_t Commit(); + +private: + IndexWriter* _OpenIndexWriter(); + IndexReader* _OpenIndexReader(); + + bool _RemoveDocuments(std::vector& docs); + bool _RemoveDocument(wchar_t* doc, + IndexReader* reader); + + bool _IndexDocument(const entry_ref& ref); + + BPath fDataBasePath; + + BPath fTempPath; + + std::vector fAddQueue; + std::vector fDeleteQueue; + + StandardAnalyzer fStandardAnalyzer; + + IndexWriter* fIndexWriter; +}; + +#endif diff --git a/src/add-ons/index_server/FullText/FullTextAnalyser.cpp b/src/add-ons/index_server/FullText/FullTextAnalyser.cpp new file mode 100644 index 0000000000..1938ce0a7f --- /dev/null +++ b/src/add-ons/index_server/FullText/FullTextAnalyser.cpp @@ -0,0 +1,162 @@ +/* + * Copyright 2010, Haiku. + * Distributed under the terms of the MIT License. + * + * Authors: + * Clemens Zeidler + */ +#include "FullTextAnalyser.h" + +#include + +#include +#include +#include +#include + +#include "CLuceneDataBase.h" +#include "IndexServerPrivate.h" + + +#define DEBUG_FULLTEXT_ANALYSER +#ifdef DEBUG_FULLTEXT_ANALYSER +#include +# define STRACE(x...) printf("FullTextAnalyser: " x) +#else +# define STRACE(x...) ; +#endif + + +FullTextAnalyser::FullTextAnalyser(BString name, const BVolume& volume) + : + FileAnalyser(name, volume), + + fWriteDataBase(NULL), + fNUncommited(0) +{ + BDirectory dir; + volume.GetRootDirectory(&dir); + fDataBasePath.SetTo(&dir); + fDataBasePath.Append(kIndexServerDirectory); + status_t status = fDataBasePath.Append(kFullTextDirectory); + + if (status == B_OK) + fWriteDataBase = new CLuceneWriteDataBase(fDataBasePath); +} + + +FullTextAnalyser::~FullTextAnalyser() +{ + delete fWriteDataBase; +} + + +status_t +FullTextAnalyser::InitCheck() +{ + if (fDataBasePath.InitCheck() != B_OK) + return fDataBasePath.InitCheck(); + if (!fWriteDataBase) + return B_NO_MEMORY; + + return fWriteDataBase->InitCheck(); +} + + +void +FullTextAnalyser::AnalyseEntry(const entry_ref& ref) +{ + if (!_InterestingEntry(ref)) + return; + BPath path(&ref); + if (BString(path.Path()).FindFirst(fDataBasePath.Path()) == 0) { + STRACE("In database path %s\n", path.Path()); + return; + } + if (BString(path.Path()).FindFirst("/boot/common/cache/tmp") == 0) + return; + + //STRACE("FullTextAnalyser AnalyseEntry: %s %s\n", ref.name, path.Path()); + fWriteDataBase->AddDocument(ref); + + fNUncommited++; + if (fNUncommited > 100) + LastEntry(); +} + + +void +FullTextAnalyser::DeleteEntry(const entry_ref& ref) +{ + STRACE("FullTextAnalyser DeleteEntry: %s\n", ref.name); + fWriteDataBase->RemoveDocument(ref); +} + + +void +FullTextAnalyser::MoveEntry(const entry_ref& oldRef, const entry_ref& newRef) +{ + if (!_InterestingEntry(newRef)) + return; + STRACE("FullTextAnalyser MoveEntry: %s to %s\n", oldRef.name, newRef.name); + fWriteDataBase->RemoveDocument(oldRef); + AnalyseEntry(newRef); +} + + +void +FullTextAnalyser::LastEntry() +{ + fWriteDataBase->Commit(); + fNUncommited = 0; +} + + +bool +FullTextAnalyser::_InterestingEntry(const entry_ref& ref) +{ + if (_IsInIndexDirectory(ref)) + return false; + + BFile file(&ref, B_READ_ONLY); + translator_info translatorInfo; + if (BTranslatorRoster::Default()->Identify(&file, NULL, &translatorInfo, 0, + NULL, B_TRANSLATOR_TEXT) != B_OK) + return false; + + return true; +} + + +bool +FullTextAnalyser::_IsInIndexDirectory(const entry_ref& ref) +{ + BEntry entry(&ref); + BDirectory dataBaseDir(fDataBasePath.Path()); + if (dataBaseDir.Contains(&entry)) + return true; + + return false; +} + + +FullTextAddOn::FullTextAddOn(image_id id, const char* name) + : + IndexServerAddOn(id, name) +{ + +} + + +FileAnalyser* +FullTextAddOn::CreateFileAnalyser(const BVolume& volume) +{ + return new (std::nothrow)FullTextAnalyser(Name(), volume); +} + + +extern "C" IndexServerAddOn* (instantiate_index_server_addon)(image_id id, + const char* name) +{ + return new (std::nothrow)FullTextAddOn(id, name); +} diff --git a/src/add-ons/index_server/FullText/FullTextAnalyser.h b/src/add-ons/index_server/FullText/FullTextAnalyser.h new file mode 100644 index 0000000000..b97fb575e2 --- /dev/null +++ b/src/add-ons/index_server/FullText/FullTextAnalyser.h @@ -0,0 +1,54 @@ +/* + * Copyright 2010, Haiku. + * Distributed under the terms of the MIT License. + * + * Authors: + * Clemens Zeidler + */ +#ifndef FULL_TEXT_ANALYSER_H +#define FULL_TEXT_ANALYSER_H + + +#include "IndexServerAddOn.h" + +#include + +#include "TextDataBase.h" + + +const char* kFullTextDirectory = "FullTextAnalyser"; + + +class FullTextAnalyser : public FileAnalyser { +public: + FullTextAnalyser(BString name, + const BVolume& volume); + ~FullTextAnalyser(); + + status_t InitCheck(); + + void AnalyseEntry(const entry_ref& ref); + void DeleteEntry(const entry_ref& ref); + void MoveEntry(const entry_ref& oldRef, + const entry_ref& newRef); + void LastEntry(); + +private: + inline bool _InterestingEntry(const entry_ref& ref); + inline bool _IsInIndexDirectory(const entry_ref& ref); + + TextWriteDataBase* fWriteDataBase; + BPath fDataBasePath; + + uint32 fNUncommited; +}; + + +class FullTextAddOn : public IndexServerAddOn { +public: + FullTextAddOn(image_id id, const char* name); + + FileAnalyser* CreateFileAnalyser(const BVolume& volume); +}; + +#endif diff --git a/src/add-ons/index_server/FullText/Jamfile b/src/add-ons/index_server/FullText/Jamfile new file mode 100644 index 0000000000..46a910de68 --- /dev/null +++ b/src/add-ons/index_server/FullText/Jamfile @@ -0,0 +1,17 @@ +SubDir HAIKU_TOP src add-ons index_server FullText ; + +UsePrivateHeaders index_server shared ; + +SubDirSysHdrs $(HAIKU_CLUCENE_HEADERS) ; + +Addon FullTextAnalyser : + CLuceneDataBase.cpp + FullTextAnalyser.cpp + + IndexServerAddOn.cpp + : + be translation $(HAIKU_CLUCENE_LIBS) $(TARGET_LIBSTDC++) +; + +SEARCH on [ FGristFiles IndexServerAddOn.cpp ] + += [ FDirName $(SUBDIR) $(DOTDOT) ] ; diff --git a/src/add-ons/index_server/IndexServerAddOn.cpp b/src/add-ons/index_server/IndexServerAddOn.cpp new file mode 100644 index 0000000000..e8968b94de --- /dev/null +++ b/src/add-ons/index_server/IndexServerAddOn.cpp @@ -0,0 +1,230 @@ +/* + * Copyright 2010, Haiku. + * Distributed under the terms of the MIT License. + * + * Authors: + * Clemens Zeidler + */ + +#include "IndexServerAddOn.h" + +#include +#include +#include +#include + +#include "IndexServerPrivate.h" + + +analyser_settings::analyser_settings() + : + catchUpEnabled(true), + + syncPosition(0), + watchingStart(0), + watchingPosition(0) +{ + +} + + +const char* kAnalyserStatusFile = "AnalyserStatus"; + +const char* kCatchUpEnabledAttr = "CatchUpEnabled"; +const char* kSyncPositionAttr = "SyncPosition"; +const char* kWatchingStartAttr = "WatchingStart"; +const char* kWatchingPositionAttr = "WatchingPosition"; + + +AnalyserSettings::AnalyserSettings(const BString& name, const BVolume& volume) + : + fName(name), + fVolume(volume) +{ + ReadSettings(); +} + + +bool +AnalyserSettings::ReadSettings() +{ + BAutolock _(fSettingsLock); + + BDirectory rootDir; + fVolume.GetRootDirectory(&rootDir); + BPath path(&rootDir); + path.Append(kIndexServerDirectory); + path.Append(fName); + path.Append(kAnalyserStatusFile); + + BFile file(path.Path(), B_READ_ONLY); + if (file.InitCheck() != B_OK) + return false; + + uint32 value; + file.ReadAttr(kCatchUpEnabledAttr, B_UINT32_TYPE, 0, &value, + sizeof(uint32)); + fAnalyserSettings.catchUpEnabled = value != 0 ? true : false; + file.ReadAttr(kSyncPositionAttr, B_INT64_TYPE, 0, + &fAnalyserSettings.syncPosition, sizeof(int64)); + file.ReadAttr(kWatchingStartAttr, B_INT64_TYPE, 0, + &fAnalyserSettings.watchingStart, sizeof(int64)); + file.ReadAttr(kWatchingPositionAttr, B_INT64_TYPE, 0, + &fAnalyserSettings.watchingPosition, sizeof(int64)); + + return true; +} + + +bool +AnalyserSettings::WriteSettings() +{ + BAutolock _(fSettingsLock); + + BDirectory rootDir; + fVolume.GetRootDirectory(&rootDir); + BPath path(&rootDir); + path.Append(kIndexServerDirectory); + path.Append(fName); + if (create_directory(path.Path(), 777) != B_OK) + return false; + path.Append(kAnalyserStatusFile); + + BFile file(path.Path(), B_READ_WRITE | B_CREATE_FILE | B_ERASE_FILE); + if (file.InitCheck() != B_OK) + return false; + + uint32 value = fAnalyserSettings.catchUpEnabled ? 1 : 0; + file.WriteAttr(kCatchUpEnabledAttr, B_UINT32_TYPE, 0, &value, + sizeof(uint32)); + file.WriteAttr(kSyncPositionAttr, B_INT64_TYPE, 0, + &fAnalyserSettings.syncPosition, sizeof(int64)); + file.WriteAttr(kWatchingStartAttr, B_INT64_TYPE, 0, + &fAnalyserSettings.watchingStart, sizeof(int64)); + file.WriteAttr(kWatchingPositionAttr, B_INT64_TYPE, 0, + &fAnalyserSettings.watchingPosition, sizeof(int64)); + + return true; +} + + +analyser_settings +AnalyserSettings::RawSettings() +{ + BAutolock _(fSettingsLock); + + return fAnalyserSettings; +} + + +void +AnalyserSettings::SetCatchUpEnabled(bool enabled) +{ + BAutolock _(fSettingsLock); + + fAnalyserSettings.catchUpEnabled = enabled; +} + + +void +AnalyserSettings::SetSyncPosition(bigtime_t time) +{ + BAutolock _(fSettingsLock); + + fAnalyserSettings.syncPosition = time; +} + + +void +AnalyserSettings::SetWatchingStart(bigtime_t time) +{ + BAutolock _(fSettingsLock); + + fAnalyserSettings.watchingStart = time; +} + + +void +AnalyserSettings::SetWatchingPosition(bigtime_t time) +{ + BAutolock _(fSettingsLock); + + fAnalyserSettings.watchingPosition = time; +} + + +bool +AnalyserSettings::CatchUpEnabled() +{ + BAutolock _(fSettingsLock); + + return fAnalyserSettings.catchUpEnabled; +} + + +bigtime_t +AnalyserSettings::SyncPosition() +{ + BAutolock _(fSettingsLock); + + return fAnalyserSettings.syncPosition; +} + + +bigtime_t +AnalyserSettings::WatchingStart() +{ + BAutolock _(fSettingsLock); + + return fAnalyserSettings.watchingStart; +} + + +bigtime_t +AnalyserSettings::WatchingPosition() +{ + BAutolock _(fSettingsLock); + + return fAnalyserSettings.watchingPosition; +} + + +FileAnalyser::FileAnalyser(const BString& name, const BVolume& volume) + : + fVolume(volume), + fName(name) +{ + +} + + +void +FileAnalyser::SetSettings(AnalyserSettings* settings) +{ + ASSERT(fName == settings->Name() && fVolume == settings->Volume()); + + fAnalyserSettings = settings; + ASSERT(fAnalyserSettings.Get()); + UpdateSettingsCache(); +} + + +AnalyserSettings* +FileAnalyser::Settings() const +{ + return fAnalyserSettings; +} + + +const analyser_settings& +FileAnalyser::CachedSettings() const +{ + return fCachedSettings; +} + + +void +FileAnalyser::UpdateSettingsCache() +{ + fCachedSettings = fAnalyserSettings->RawSettings(); +} diff --git a/src/add-ons/index_server/Jamfile b/src/add-ons/index_server/Jamfile new file mode 100644 index 0000000000..071e8b8fa9 --- /dev/null +++ b/src/add-ons/index_server/Jamfile @@ -0,0 +1,4 @@ +SubDir HAIKU_TOP src add-ons index_server ; + +SubInclude HAIKU_TOP src add-ons index_server AudioTags ; +SubInclude HAIKU_TOP src add-ons index_server FullText ;