From e709e3665e47090edbf1f56304d017af0e833729 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Axel=20D=C3=B6rfler?= Date: Mon, 3 Jan 2005 08:33:19 +0000 Subject: [PATCH] The parser now uses a BBufferIO stream to speed things up. Text::Parse() now converts bigger chunks at once. Default RTF element destination is visible in text now, only known other destinations are marked that way (using a lookup table). Added virtual method IsDefinitionDelimiter() to Element to make definition iterating nicer. ";" now always forms a separated Text object - this fixes the definition handling. git-svn-id: file:///srv/svn/repos/haiku/trunk/current@10565 a95241bf-73f2-0310-859d-f6bbb57e9c96 --- src/add-ons/translators/rtftranslator/RTF.cpp | 131 ++++++++++++++---- src/add-ons/translators/rtftranslator/RTF.h | 11 +- 2 files changed, 109 insertions(+), 33 deletions(-) diff --git a/src/add-ons/translators/rtftranslator/RTF.cpp b/src/add-ons/translators/rtftranslator/RTF.cpp index 96a3098604..f280628971 100644 --- a/src/add-ons/translators/rtftranslator/RTF.cpp +++ b/src/add-ons/translators/rtftranslator/RTF.cpp @@ -14,6 +14,17 @@ #include +static const char *kDestinationControlWords[] = { + "aftncn", "aftnsep", "aftnsepc", "annotation", "atnauthor", "atndate", + "atnicn", "atnid", "atnparent", "atnref", "atntime", "atrfend", + "atrfstart", "author", "background", "bkmkend", "buptim", "colortbl", + "comment", "creatim", "do", "doccomm", "docvar", "fonttbl", "footer", + "footerf", "footerl", "footerr", "footnote", "ftncn", "ftnsep", + "ftnsepc", "header", "headerf", "headerl", "headerr", "info", + "keywords", "operator", "pict", "printim", "private1", "revtim", + "rxe", "stylesheet", "subject", "tc", "title", "txe", "xe", +}; + static char read_char(BDataIO &stream, bool endOfFileAllowed = false) throw (status_t); static int32 parse_integer(char first, BDataIO &stream, char &_last) throw (status_t); @@ -68,6 +79,13 @@ out: } +static int +string_array_compare(const char *key, const char **array) +{ + return strcmp(key, array[0]); +} + + static void dump(Element &element, int32 level = 0) { @@ -98,9 +116,9 @@ dump(Element &element, int32 level = 0) // #pragma mark - -Parser::Parser(BDataIO &stream) +Parser::Parser(BPositionIO &stream) : - fStream(stream), + fStream(&stream, 65536, false), fIdentified(false) { } @@ -141,6 +159,10 @@ Parser::Parse(Header &header) while (true) { Element *element = NULL; + // we'll just ignore the end of the stream + if (parent == NULL) + return B_OK; + switch (c) { case '{': openBrackets++; @@ -221,6 +243,13 @@ Element::Parent() const } +bool +Element::IsDefinitionDelimiter() +{ + return false; +} + + void Element::PrintToStream(int32 level) { @@ -233,7 +262,7 @@ Element::PrintToStream(int32 level) Group::Group() : - fDestination(OTHER_DESTINATION) + fDestination(TEXT_DESTINATION) { } @@ -289,8 +318,8 @@ Group::ElementAt(uint32 index) const } -Command * -Group::FindDefinition(const char *name, int32 index) const +Element * +Group::FindDefinitionStart(int32 index, int32 *_startIndex) const { if (index < 0) return NULL; @@ -298,14 +327,34 @@ Group::FindDefinition(const char *name, int32 index) const Element *element; int32 number = 0; for (uint32 i = 0; (element = ElementAt(i)) != NULL; i++) { - if (Text *text = dynamic_cast(element)) { - // the ';' indicates the next definition - if (!strcmp(text->String(), ";")) - number++; - } else if (Command *command = dynamic_cast(element)) { - if (command != NULL - && !strcmp(name, command->Name()) - && number == index) + if (number == index) { + if (_startIndex) + *_startIndex = i; + return element; + } + + if (element->IsDefinitionDelimiter()) + number++; + } + + return NULL; +} + + +Command * +Group::FindDefinition(const char *name, int32 index) const +{ + int32 startIndex; + Element *element = FindDefinitionStart(index, &startIndex); + if (element == NULL) + return NULL; + + for (uint32 i = startIndex; (element = ElementAt(i)) != NULL; i++) { + if (element->IsDefinitionDelimiter()) + break; + + if (Command *command = dynamic_cast(element)) { + if (command != NULL && !strcmp(name, command->Name())) return command; } } @@ -347,25 +396,21 @@ void Group::DetermineDestination() { const char *name = Name(); - if (name == NULL) { - fDestination = TEXT_DESTINATION; + if (name == NULL) return; - } if (!strcmp(name, "*")) { fDestination = COMMENT_DESTINATION; return; } - const char *texts[] = {"rtf", "sect", "par"}; - for (uint32 i = 0; i < sizeof(texts) / sizeof(texts[0]); i++) { - if (!strcmp(name, texts[i])) { - fDestination = TEXT_DESTINATION; - return; - } - } + // binary search for destination control words - fDestination = OTHER_DESTINATION; + if (bsearch(name, kDestinationControlWords, + sizeof(kDestinationControlWords) / sizeof(kDestinationControlWords[0]), + sizeof(kDestinationControlWords[0]), + (int (*)(const void *, const void *))string_array_compare) != NULL) + fDestination = OTHER_DESTINATION; } @@ -461,23 +506,51 @@ Text::~Text() } +bool +Text::IsDefinitionDelimiter() +{ + return fText == ";"; +} + + void Text::Parse(char first, BDataIO &stream, char &last) throw (status_t) { char c = first; if (c == '\0') c = read_char(stream); - - fText = ""; + + if (c == ';') { + // definition delimiter + fText.SetTo(";"); + last = read_char(stream); + return; + } + + const size_t kBufferSteps = 1; + size_t maxSize = kBufferSteps; + char *text = fText.LockBuffer(maxSize); + if (text == NULL) + throw (status_t)B_NO_MEMORY; + + size_t position = 0; while (true) { - if (c == '\\' || c == '}') + if (c == '\\' || c == '}' || c == '{' || c == ';') break; - // ToDo: this is horribly inefficient with BStrings - fText.Append(c, 1); + if (position >= maxSize) { + fText.UnlockBuffer(position); + text = fText.LockBuffer(maxSize += kBufferSteps); + if (text == NULL) + throw (status_t)B_NO_MEMORY; + } + + text[position++] = c; + c = read_char(stream); } + fText.UnlockBuffer(position); // ToDo: add support for different charsets - right now, only ASCII is supported! // To achieve this, we should just translate everything into UTF-8 here diff --git a/src/add-ons/translators/rtftranslator/RTF.h b/src/add-ons/translators/rtftranslator/RTF.h index e85cf8986d..c27078db5d 100644 --- a/src/add-ons/translators/rtftranslator/RTF.h +++ b/src/add-ons/translators/rtftranslator/RTF.h @@ -11,8 +11,8 @@ #include #include #include +#include -class BDataIO; namespace RTF { @@ -33,14 +33,14 @@ enum group_destination { class Parser { public: - Parser(BDataIO &stream); + Parser(BPositionIO &stream); status_t Identify(); status_t Parse(RTF::Header &header); private: - BDataIO &fStream; - bool fIdentified; + BBufferIO fStream; + bool fIdentified; }; @@ -52,6 +52,7 @@ class Element { void SetParent(Group *parent); Group *Parent() const; + virtual bool IsDefinitionDelimiter(); virtual void Parse(char first, BDataIO &stream, char &last) throw (status_t) = 0; virtual void PrintToStream(int32 level = 0); @@ -69,6 +70,7 @@ class Group : public Element { uint32 CountElements() const; Element *ElementAt(uint32 index) const; + Element *FindDefinitionStart(int32 index, int32 *_startIndex = NULL) const; Command *FindDefinition(const char *name, int32 index = 0) const; Group *FindGroup(const char *name) const; @@ -110,6 +112,7 @@ class Text : public Element { const char *String() const; uint32 Length() const; + virtual bool IsDefinitionDelimiter(); virtual void Parse(char first, BDataIO &stream, char &last) throw (status_t); private: