From 4574a75fc5289b63e0e390a98571445222baf362 Mon Sep 17 00:00:00 2001 From: Tyler Dauwalder Date: Mon, 29 Jul 2002 07:01:56 +0000 Subject: [PATCH] + Got rid of global TokenStream object + Started on Parser class + Moved parsing functions into Parser class + Added Pos() member to Err class to allow for snazzy BMimeType::CheckSnifferRule() error strings. git-svn-id: file:///srv/svn/repos/haiku/trunk/current@531 a95241bf-73f2-0310-859d-f6bbb57e9c96 --- headers/private/storage/sniffer/Parser.h | 61 +++-- src/kits/storage/sniffer/Parser.cpp | 279 +++++++++++++++-------- 2 files changed, 237 insertions(+), 103 deletions(-) diff --git a/headers/private/storage/sniffer/Parser.h b/headers/private/storage/sniffer/Parser.h index db83caa2ee..f7ef3b26d3 100644 --- a/headers/private/storage/sniffer/Parser.h +++ b/headers/private/storage/sniffer/Parser.h @@ -10,14 +10,23 @@ #define _sk_sniffer_parser_h_ #include +#include +#include #include #include +#include class BString; namespace Sniffer { + class Rule; +class Expr; +class Range; +class RPattern; +class Pattern; +typedef std::vector ExprList; //------------------------------------------------------------------------------ // The mighty parsing function ;-) @@ -31,15 +40,17 @@ status_t parse(const char *rule, Rule *result, BString *parseError = NULL); class Err { public: - Err(const char *msg); - Err(const std::string &msg); + Err(const char *msg, const ssize_t pos); + Err(const std::string &msg, const ssize_t pos); Err(const Err &ref); Err& operator=(const Err &ref); const char* Msg() const; + ssize_t Pos() const; private: void SetMsg(const char *msg); char *fMsg; + ssize_t fPos; }; class CharStream { @@ -51,14 +62,14 @@ public: void Unset(); status_t InitCheck() const; bool IsEmpty() const; - size_t Pos() const; + ssize_t Pos() const; char Get(); void Unget(); private: char *fString; - size_t fPos; + ssize_t fPos; ssize_t fLen; status_t fCStatus; @@ -84,21 +95,22 @@ const char* tokenTypeToString(TokenType type); class Token { public: - Token(TokenType type = EmptyToken, const size_t posInStream = -1); + Token(TokenType type = EmptyToken, const ssize_t pos = -1); virtual ~Token(); TokenType Type() const; virtual const char* String() const; virtual int32 Int() const; virtual double Float() const; + ssize_t Pos() const; bool operator==(Token &ref); protected: TokenType fType; - size_t fPosInStream; + ssize_t fPos; }; class StringToken : public Token { public: - StringToken(const char *string, const size_t posInStream); + StringToken(const char *string, const ssize_t pos); virtual ~StringToken(); virtual const char* String() const; protected: @@ -107,7 +119,7 @@ protected: class IntToken : public Token { public: - IntToken(const int32 value, const size_t posInStream); + IntToken(const int32 value, const ssize_t pos); virtual int32 Int() const; virtual double Float() const; protected: @@ -116,7 +128,7 @@ protected: class FloatToken : public Token { public: - FloatToken(const double value, const size_t posInStream); + FloatToken(const double value, const ssize_t pos); virtual double Float() const; protected: double fValue; @@ -137,10 +149,10 @@ public: bool IsEmpty(); private: - void AddToken(TokenType type, size_t posInStream); - void AddString(const char *str, size_t posInStream); - void AddInt(const char *str, size_t posInStream); - void AddFloat(const char *str, size_t posInStream); + void AddToken(TokenType type, ssize_t pos); + void AddString(const char *str, ssize_t pos); + void AddInt(const char *str, ssize_t pos); + void AddFloat(const char *str, ssize_t pos); BList fTokenList; status_t fCStatus; @@ -149,7 +161,28 @@ private: TokenStream& operator=(const TokenStream &ref); }; -} // namespace Sniffer +class Parser { +public: + Parser(); + status_t Parse(const char *rule, Rule *result, BString *parseError = NULL); +private: + std::string ErrorMessage(Err *err, const char *rule); + + // Parsing functions + void ParseRule(Rule *result); + double ParsePriority(); + ExprList* ParseExprList(); + Expr* ParseExpr(); + Range ParseRange(); + Expr* ParsePatternList(); + Expr* ParseRPatternList(); + RPattern* ParseRPattern(); + Pattern* ParsePattern(); + + TokenStream stream; +}; + +} // namespace Sniffer #endif // _sk_sniffer_parser_h_ diff --git a/src/kits/storage/sniffer/Parser.cpp b/src/kits/storage/sniffer/Parser.cpp index 0c7de87889..33f98ee02b 100644 --- a/src/kits/storage/sniffer/Parser.cpp +++ b/src/kits/storage/sniffer/Parser.cpp @@ -23,20 +23,6 @@ using namespace Sniffer; -// Our global token stream object -TokenStream stream; - -// Private parsing functions -double parsePriority(); -/* -BList* parseExprList(); -Expr* parseExpr(); -Range parseRange(); -Expr* parsePatternList(); -Expr* parseRPatternList(); -Pattern* parsePattern(); -*/ - // Miscellaneous helper functions char escapeChar(char ch); char hexToChar(char hi, char low); @@ -51,42 +37,33 @@ bool isDecimalChar(char ch); bool isPunctuation(char ch); status_t -Sniffer::parse(const char *rule, Rule *result, BString *parseError = NULL) { - try { - if (!rule) - throw new Err("Sniffer pattern error: NULL pattern"); - if (!result) - return B_BAD_VALUE; - if (stream.SetTo(rule) != B_OK) - throw new Err("Sniffer parser error: Unable to intialize token stream"); - - double priority; - BList* exprList; - - priority = parsePriority(); - - } catch (Err *err) { - if (parseError && err) - parseError->SetTo((err->Msg() ? err->Msg() : "Sniffer parser rule: Unexpected error with no supplied error message")); - return B_BAD_MIME_SNIFFER_RULE; - } +Sniffer::parse(const char *rule, Rule *result, BString *parseError) { + Parser parser; + return parser.Parse(rule, result, parseError); } //------------------------------------------------------------------------------ -// Sniffer::Err +// Err //------------------------------------------------------------------------------ -Err::Err(const char *msg) +Err::Err(const char *msg, const ssize_t pos) : fMsg(NULL) + , fPos(pos) { SetMsg(msg); } -Err::Err(const std::string &msg) { +Err::Err(const std::string &msg, const ssize_t pos) + : fMsg(NULL) + , fPos(pos) +{ SetMsg(msg.c_str()); } -Err::Err(const Err &ref) { +Err::Err(const Err &ref) + : fMsg(NULL) + , fPos(-1) +{ *this = ref; } @@ -101,6 +78,11 @@ Err::Msg() const { return fMsg; } +ssize_t +Err::Pos() const { + return fPos; +} + void Err::SetMsg(const char *msg) { if (fMsg) { @@ -116,7 +98,6 @@ Err::SetMsg(const char *msg) { } } - //------------------------------------------------------------------------------ // CharStream //------------------------------------------------------------------------------ @@ -168,7 +149,7 @@ CharStream::IsEmpty() const { return fPos >= fLen; } -size_t +ssize_t CharStream::Pos() const { return fPos; } @@ -176,7 +157,7 @@ CharStream::Pos() const { char CharStream::Get() { if (fCStatus != B_OK) - throw new Err("Sniffer parser error: CharStream::Get() called on uninitialized CharStream object"); + throw new Err("Sniffer parser error: CharStream::Get() called on uninitialized CharStream object", -1); if (fPos < fLen) return fString[fPos++]; else { @@ -188,20 +169,20 @@ CharStream::Get() { void CharStream::Unget() { if (fCStatus != B_OK) - throw new Err("Sniffer parser error: CharStream::Unget() called on uninitialized CharStream object"); + throw new Err("Sniffer parser error: CharStream::Unget() called on uninitialized CharStream object", -1); if (fPos > 0) fPos--; else - throw new Err("Sniffer parser error: CharStream::Unget() called at beginning of character stream"); + throw new Err("Sniffer parser error: CharStream::Unget() called at beginning of character stream", -1); } //------------------------------------------------------------------------------ // Token //------------------------------------------------------------------------------ -Token::Token(TokenType type, const size_t posInStream) +Token::Token(TokenType type, const ssize_t pos) : fType(type) - , fPosInStream(posInStream) + , fPos(pos) { // if (type != EmptyToken) // cout << "New Token, fType == " << tokenTypeToString(fType) << endl; @@ -217,17 +198,22 @@ Token::Type() const { const char* Token::String() const { - throw new Err("Sniffer scanner error: Token::String() called on non-string token"); + throw new Err("Sniffer scanner error: Token::String() called on non-string token", fPos); } int32 Token::Int() const { - throw new Err("Sniffer scanner error: Token::Int() called on non-integer token"); + throw new Err("Sniffer scanner error: Token::Int() called on non-integer token", fPos); } double Token::Float() const { - throw new Err("Sniffer scanner error: Token::Float() called on non-float token"); + throw new Err("Sniffer scanner error: Token::Float() called on non-float token", fPos); +} + +ssize_t +Token::Pos() const { + return fPos; } bool @@ -278,8 +264,8 @@ Token::operator==(Token &ref) { // StringToken //------------------------------------------------------------------------------ -StringToken::StringToken(const char *string, const size_t posInStream) - : Token(CharacterString, posInStream) +StringToken::StringToken(const char *string, const ssize_t pos) + : Token(CharacterString, pos) , fString(NULL) { if (string) { @@ -302,8 +288,8 @@ StringToken::String() const { // IntToken //------------------------------------------------------------------------------ -IntToken::IntToken(const int32 value, const size_t posInStream) - : Token(Integer, posInStream) +IntToken::IntToken(const int32 value, const ssize_t pos) + : Token(Integer, pos) , fValue(value) { } @@ -322,8 +308,8 @@ IntToken::Float() const { // FloatToken //------------------------------------------------------------------------------ -FloatToken::FloatToken(const double value, const size_t posInStream) - : Token(FloatingPoint, posInStream) +FloatToken::FloatToken(const double value, const ssize_t pos) + : Token(FloatingPoint, pos) , fValue(value) { } @@ -353,7 +339,7 @@ TokenStream::SetTo(const char *string) { if (string) { CharStream stream(string); if (stream.InitCheck() != B_OK) - throw new Err("Sniffer scanner error: Unable to intialize character stream"); + throw new Err("Sniffer scanner error: Unable to intialize character stream", -1); typedef enum TokenStreamScannerState { tsssStart, @@ -387,7 +373,7 @@ TokenStream::SetTo(const char *string) { char lastLastChar; // For three char lookahead bool keepLooping = true; while (keepLooping) { - size_t pos = stream.Pos(); + ssize_t pos = stream.Pos(); char ch = stream.Get(); switch (state) { case tsssStart: @@ -396,7 +382,7 @@ TokenStream::SetTo(const char *string) { if (stream.IsEmpty()) keepLooping = false; else - throw new Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'"); + throw new Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'", pos); break; case '\t': @@ -460,7 +446,7 @@ TokenStream::SetTo(const char *string) { case '|': AddToken(Divider, pos); break; default: - throw new Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'"); + throw new Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'", pos); } break; @@ -476,7 +462,7 @@ TokenStream::SetTo(const char *string) { break; case 0x3: if (stream.IsEmpty()) - throw new Err(std::string("Sniffer scanner error: unterminated single-quoted string")); + throw new Err(std::string("Sniffer scanner error: unterminated single-quoted string"), pos); else charStr += ch; break; @@ -498,7 +484,7 @@ TokenStream::SetTo(const char *string) { break; case 0x3: if (stream.IsEmpty()) - throw new Err(std::string("Sniffer scanner error: unterminated single-quoted string")); + throw new Err(std::string("Sniffer scanner error: unterminated single-quoted string"), pos); else charStr += ch; break; @@ -537,9 +523,9 @@ TokenStream::SetTo(const char *string) { lastChar = ch; state = tsssOneHex; } else if (ch == 0x3 && stream.IsEmpty()) - throw new Err(std::string("Sniffer scanner error: incomplete hex code")); + throw new Err(std::string("Sniffer scanner error: incomplete hex code"), pos); else - throw new Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'"); + throw new Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'", pos); break; case tsssOneHex: @@ -547,9 +533,9 @@ TokenStream::SetTo(const char *string) { charStr += hexToChar(lastChar, ch); state = tsssTwoHex; } else if (ch == 0x3 && stream.IsEmpty()) - throw Err(std::string("Sniffer scanner error: incomplete hex code (the number of hex digits must be a multiple of two)")); + throw new Err(std::string("Sniffer scanner error: incomplete hex code (the number of hex digits must be a multiple of two)"), pos); else - throw Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'"); + throw new Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'", pos); break; case tsssTwoHex: @@ -564,7 +550,7 @@ TokenStream::SetTo(const char *string) { AddString(charStr.c_str(), pos); keepLooping = false; } else - throw Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'"); + throw new Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'", pos); break; case tsssIntOrFloat: @@ -601,9 +587,9 @@ TokenStream::SetTo(const char *string) { charStr += ch; state = tsssFloat; } else if (ch == 0x3 && stream.IsEmpty()) - throw new Err(std::string("Sniffer scanner error: incomplete floating point number")); + throw new Err(std::string("Sniffer scanner error: incomplete floating point number"), pos); else - throw new Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'"); + throw new Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'", pos); break; case tsssLonelyMinusOrPlusSign: @@ -611,16 +597,16 @@ TokenStream::SetTo(const char *string) { charStr += ch; state = tsssPosNegInt; } else if (ch == 0x3 && stream.IsEmpty()) - throw new Err(std::string("Sniffer scanner error: incomplete signed integer")); + throw new Err(std::string("Sniffer scanner error: incomplete signed integer"), pos); else - throw new Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'"); + throw new Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'", pos); break; case tsssPosNegInt: if (isDecimalChar(ch)) charStr += ch; else if (ch == '.') - throw new Err(std::string("Sniffer scanner error: negative floating point numbers are useless and thus signs (both + and -) are disallowed on floating points")); + throw new Err(std::string("Sniffer scanner error: negative floating point numbers are useless and thus signs (both + and -) are disallowed on floating points"), pos); else { // Terminate the number AddInt(charStr.c_str(), pos); @@ -640,7 +626,7 @@ TokenStream::SetTo(const char *string) { stream.Unget(); // In case it's punctuation, let tsssStart handle it state = tsssStart; } else if (ch == '\'' || ch == '"') { - throw new Err(std::string("Sniffer scanner error: illegal unquoted character '") + ch + "'"); + throw new Err(std::string("Sniffer scanner error: illegal unquoted character '") + ch + "'", pos); } else if (ch == 0x3 && stream.IsEmpty()) { AddString(charStr.c_str(), pos); keepLooping = false; @@ -656,7 +642,7 @@ TokenStream::SetTo(const char *string) { } else { // Check for a true end-of-text marker if (ch == 0x3 && stream.IsEmpty()) - throw new Err(std::string("Sniffer scanner error: unterminated escape sequence")); + throw new Err(std::string("Sniffer scanner error: unterminated escape sequence"), pos); else { charStr += escapeChar(ch); state = escapedState; // Return to the state we were in before the escape @@ -669,7 +655,7 @@ TokenStream::SetTo(const char *string) { lastChar = ch; state = tsssEscapeOneHex; } else - throw new Err(std::string("Sniffer scanner error: incomplete hex code")); + throw new Err(std::string("Sniffer scanner error: incomplete hex code"), pos); break; case tsssEscapeOneOctal: @@ -708,9 +694,9 @@ TokenStream::SetTo(const char *string) { charStr += hexToChar(lastChar, ch); state = escapedState; } else if (ch == 0x3 && stream.IsEmpty()) - throw new Err(std::string("Sniffer scanner error: incomplete escaped hex code (the number of hex digits must be a multiple of two)")); + throw new Err(std::string("Sniffer scanner error: incomplete escaped hex code (the number of hex digits must be a multiple of two)"), pos); else - throw new Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'"); + throw new Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'", pos); break; } @@ -718,7 +704,7 @@ TokenStream::SetTo(const char *string) { if (state == tsssStart) fCStatus = B_OK; else - throw new Err("Sniffer pattern error: unterminated rule"); + throw new Err("Sniffer pattern error: unterminated rule", stream.Pos()); } return fCStatus; @@ -752,30 +738,30 @@ TokenStream::IsEmpty() { } void -TokenStream::AddToken(TokenType type, size_t posInStream) { - Token *token = new Token(type, posInStream); +TokenStream::AddToken(TokenType type, ssize_t pos) { + Token *token = new Token(type, pos); fTokenList.AddItem(token); } void -TokenStream::AddString(const char *str, size_t posInStream) { - Token *token = new StringToken(str, posInStream); +TokenStream::AddString(const char *str, ssize_t pos) { + Token *token = new StringToken(str, pos); fTokenList.AddItem(token); } void -TokenStream::AddInt(const char *str, size_t posInStream) { +TokenStream::AddInt(const char *str, ssize_t pos) { // Convert the string to an int int32 value = atol(str); - Token *token = new IntToken(value, posInStream); + Token *token = new IntToken(value, pos); fTokenList.AddItem(token); } void -TokenStream::AddFloat(const char *str, size_t posInStream) { +TokenStream::AddFloat(const char *str, ssize_t pos) { // Convert the string to a float double value = atof(str); - Token *token = new FloatToken(value, posInStream); + Token *token = new FloatToken(value, pos); fTokenList.AddItem(token); } @@ -826,7 +812,7 @@ hexToChar(char hex) { else if ('A' <= hex && hex <= 'F') return hex-'a'+10; else - throw new Err(std::string("Sniffer parser error: invalid hex digit '") + hex + "' passed to hexToChar()"); + throw new Err(std::string("Sniffer parser error: invalid hex digit '") + hex + "' passed to hexToChar()", -1); } char @@ -844,7 +830,7 @@ octalToChar(char hi, char mid, char low) { if (isOctalChar(hi) && isOctalChar(mid) && isOctalChar(low)) { return ((hi-'0') << 6) | ((mid-'0') << 3) | (low-'0'); } else - throw new Err(std::string("Sniffer parser error: invalid octal digit passed to hexToChar()")); + throw new Err(std::string("Sniffer parser error: invalid octal digit passed to hexToChar()"), -1); } bool @@ -925,10 +911,125 @@ Sniffer::tokenTypeToString(TokenType type) { } //------------------------------------------------------------------------------ -// Parsing functions +// Parser //------------------------------------------------------------------------------ -double -parsePriority() { -// stream.Read(LeftBracket); +Parser::Parser() { } + +status_t +Parser::Parse(const char *rule, Rule *result, BString *parseError) { + try { + if (!rule) + throw new Err("Sniffer pattern error: NULL pattern", -1); + if (!result) + return B_BAD_VALUE; + if (stream.SetTo(rule) != B_OK) + throw new Err("Sniffer parser error: Unable to intialize token stream", -1); + + ParseRule(result); + + return B_OK; + + } catch (Err *err) { +// cout << "Caught error" << endl; + if (parseError) + parseError->SetTo(ErrorMessage(err, rule).c_str()); + delete err; + return B_BAD_MIME_SNIFFER_RULE; + } +} + +std::string +Parser::ErrorMessage(Err *err, const char *rule) { + const char* msg = (err && err->Msg()) + ? err->Msg() + : "Sniffer parser error: Unexpected error with no supplied error message"; + size_t pos = err && (err->Pos() >= 0) ? err->Pos() : 0; + std::string str = std::string(rule) + "\n"; + for (int i = 0; i < pos; i++) + str += " "; + str += "^ "; + str += msg; + return str; +} + +void +Parser::ParseRule(Rule *result) { + // Priority + double priority = ParsePriority(); + // Expression List +// ExprList* list = ParseExprList(); +} + +double +Parser::ParsePriority() { + Token *t = stream.Get(); + Err *err = NULL; + double result; + +// cout << tokenTypeToString(t->Type()) << endl; + if (t->Type() == FloatingPoint || t->Type() == Integer) + result = t->Float(); + else + err = new Err("Sniffer pattern error: match level expected", t->Pos()); + + delete t; + if (err) + throw err; + else + return result; +} + +ExprList* +Parser::ParseExprList() { + // Expr+ +} + +Expr* +Parser::ParseExpr() { + // PatternList + // RangeList + // Range + PatternList +} + +Range +Parser::ParseRange() { + // LeftBracket + // Integer + // [Colon + // Integer] + // RightBracket +} + +Expr* +Parser::ParsePatternList() { + // LeftParen + // Pattern + // [Divider + // Pattern]* + // RightParen +} + +Expr* +Parser::ParseRPatternList() { + // LeftParen + // RPattern + // [Divider + // RPattern]* + // RightParen +} + +RPattern* +Parser::ParseRPattern() { + // Range + // Pattern +} + +Pattern* +Parser::ParsePattern() { + // String + // [Ampersand + // String] +} +