The parser now uses a BBufferIO stream to speed things up.

Text::Parse() now converts bigger chunks at once.
Default RTF element destination is visible in text now, only known
other destinations are marked that way (using a lookup table).
Added virtual method IsDefinitionDelimiter() to Element to make
definition iterating nicer.
";" now always forms a separated Text object - this fixes the
definition handling.


git-svn-id: file:///srv/svn/repos/haiku/trunk/current@10565 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
Axel Dörfler 2005-01-03 08:33:19 +00:00
parent 64214d4cce
commit e709e3665e
2 changed files with 109 additions and 33 deletions

View File

@ -14,6 +14,17 @@
#include <ctype.h>
static const char *kDestinationControlWords[] = {
"aftncn", "aftnsep", "aftnsepc", "annotation", "atnauthor", "atndate",
"atnicn", "atnid", "atnparent", "atnref", "atntime", "atrfend",
"atrfstart", "author", "background", "bkmkend", "buptim", "colortbl",
"comment", "creatim", "do", "doccomm", "docvar", "fonttbl", "footer",
"footerf", "footerl", "footerr", "footnote", "ftncn", "ftnsep",
"ftnsepc", "header", "headerf", "headerl", "headerr", "info",
"keywords", "operator", "pict", "printim", "private1", "revtim",
"rxe", "stylesheet", "subject", "tc", "title", "txe", "xe",
};
static char read_char(BDataIO &stream, bool endOfFileAllowed = false) throw (status_t);
static int32 parse_integer(char first, BDataIO &stream, char &_last) throw (status_t);
@ -68,6 +79,13 @@ out:
}
static int
string_array_compare(const char *key, const char **array)
{
return strcmp(key, array[0]);
}
static void
dump(Element &element, int32 level = 0)
{
@ -98,9 +116,9 @@ dump(Element &element, int32 level = 0)
// #pragma mark -
Parser::Parser(BDataIO &stream)
Parser::Parser(BPositionIO &stream)
:
fStream(stream),
fStream(&stream, 65536, false),
fIdentified(false)
{
}
@ -141,6 +159,10 @@ Parser::Parse(Header &header)
while (true) {
Element *element = NULL;
// we'll just ignore the end of the stream
if (parent == NULL)
return B_OK;
switch (c) {
case '{':
openBrackets++;
@ -221,6 +243,13 @@ Element::Parent() const
}
bool
Element::IsDefinitionDelimiter()
{
return false;
}
void
Element::PrintToStream(int32 level)
{
@ -233,7 +262,7 @@ Element::PrintToStream(int32 level)
Group::Group()
:
fDestination(OTHER_DESTINATION)
fDestination(TEXT_DESTINATION)
{
}
@ -289,8 +318,8 @@ Group::ElementAt(uint32 index) const
}
Command *
Group::FindDefinition(const char *name, int32 index) const
Element *
Group::FindDefinitionStart(int32 index, int32 *_startIndex) const
{
if (index < 0)
return NULL;
@ -298,14 +327,34 @@ Group::FindDefinition(const char *name, int32 index) const
Element *element;
int32 number = 0;
for (uint32 i = 0; (element = ElementAt(i)) != NULL; i++) {
if (Text *text = dynamic_cast<Text *>(element)) {
// the ';' indicates the next definition
if (!strcmp(text->String(), ";"))
number++;
} else if (Command *command = dynamic_cast<Command *>(element)) {
if (command != NULL
&& !strcmp(name, command->Name())
&& number == index)
if (number == index) {
if (_startIndex)
*_startIndex = i;
return element;
}
if (element->IsDefinitionDelimiter())
number++;
}
return NULL;
}
Command *
Group::FindDefinition(const char *name, int32 index) const
{
int32 startIndex;
Element *element = FindDefinitionStart(index, &startIndex);
if (element == NULL)
return NULL;
for (uint32 i = startIndex; (element = ElementAt(i)) != NULL; i++) {
if (element->IsDefinitionDelimiter())
break;
if (Command *command = dynamic_cast<Command *>(element)) {
if (command != NULL && !strcmp(name, command->Name()))
return command;
}
}
@ -347,25 +396,21 @@ void
Group::DetermineDestination()
{
const char *name = Name();
if (name == NULL) {
fDestination = TEXT_DESTINATION;
if (name == NULL)
return;
}
if (!strcmp(name, "*")) {
fDestination = COMMENT_DESTINATION;
return;
}
const char *texts[] = {"rtf", "sect", "par"};
for (uint32 i = 0; i < sizeof(texts) / sizeof(texts[0]); i++) {
if (!strcmp(name, texts[i])) {
fDestination = TEXT_DESTINATION;
return;
}
}
// binary search for destination control words
fDestination = OTHER_DESTINATION;
if (bsearch(name, kDestinationControlWords,
sizeof(kDestinationControlWords) / sizeof(kDestinationControlWords[0]),
sizeof(kDestinationControlWords[0]),
(int (*)(const void *, const void *))string_array_compare) != NULL)
fDestination = OTHER_DESTINATION;
}
@ -461,23 +506,51 @@ Text::~Text()
}
bool
Text::IsDefinitionDelimiter()
{
return fText == ";";
}
void
Text::Parse(char first, BDataIO &stream, char &last) throw (status_t)
{
char c = first;
if (c == '\0')
c = read_char(stream);
fText = "";
if (c == ';') {
// definition delimiter
fText.SetTo(";");
last = read_char(stream);
return;
}
const size_t kBufferSteps = 1;
size_t maxSize = kBufferSteps;
char *text = fText.LockBuffer(maxSize);
if (text == NULL)
throw (status_t)B_NO_MEMORY;
size_t position = 0;
while (true) {
if (c == '\\' || c == '}')
if (c == '\\' || c == '}' || c == '{' || c == ';')
break;
// ToDo: this is horribly inefficient with BStrings
fText.Append(c, 1);
if (position >= maxSize) {
fText.UnlockBuffer(position);
text = fText.LockBuffer(maxSize += kBufferSteps);
if (text == NULL)
throw (status_t)B_NO_MEMORY;
}
text[position++] = c;
c = read_char(stream);
}
fText.UnlockBuffer(position);
// ToDo: add support for different charsets - right now, only ASCII is supported!
// To achieve this, we should just translate everything into UTF-8 here

View File

@ -11,8 +11,8 @@
#include <List.h>
#include <String.h>
#include <GraphicsDefs.h>
#include <BufferIO.h>
class BDataIO;
namespace RTF {
@ -33,14 +33,14 @@ enum group_destination {
class Parser {
public:
Parser(BDataIO &stream);
Parser(BPositionIO &stream);
status_t Identify();
status_t Parse(RTF::Header &header);
private:
BDataIO &fStream;
bool fIdentified;
BBufferIO fStream;
bool fIdentified;
};
@ -52,6 +52,7 @@ class Element {
void SetParent(Group *parent);
Group *Parent() const;
virtual bool IsDefinitionDelimiter();
virtual void Parse(char first, BDataIO &stream, char &last) throw (status_t) = 0;
virtual void PrintToStream(int32 level = 0);
@ -69,6 +70,7 @@ class Group : public Element {
uint32 CountElements() const;
Element *ElementAt(uint32 index) const;
Element *FindDefinitionStart(int32 index, int32 *_startIndex = NULL) const;
Command *FindDefinition(const char *name, int32 index = 0) const;
Group *FindGroup(const char *name) const;
@ -110,6 +112,7 @@ class Text : public Element {
const char *String() const;
uint32 Length() const;
virtual bool IsDefinitionDelimiter();
virtual void Parse(char first, BDataIO &stream, char &last) throw (status_t);
private: