+ Moved internally used classes into header to allow for testing.

+ Added EmptyToken, Integer, and FloatingPoint TokenTypes.
+ Added IntToken and IntegerToken classes.
+ Updated scanner, which now handles numbers as well. I believe
it's nearly complete, although I'm almost positive it doesn't
handle octals correctly yet...


git-svn-id: file:///srv/svn/repos/haiku/trunk/current@477 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
Tyler Dauwalder 2002-07-27 19:24:05 +00:00
parent 6c6ea7dac5
commit 5da549240a
2 changed files with 457 additions and 123 deletions

View File

@ -10,13 +10,145 @@
#define _sk_sniffer_parser_h_ #define _sk_sniffer_parser_h_
#include <SupportDefs.h> #include <SupportDefs.h>
#include <List.h>
#include <string>
class BString; class BString;
namespace Sniffer { namespace Sniffer {
class Rule; class Rule;
//------------------------------------------------------------------------------
// The mighty parsing function ;-)
//------------------------------------------------------------------------------
status_t parse(const char *rule, Rule *result, BString *parseError = NULL); status_t parse(const char *rule, Rule *result, BString *parseError = NULL);
}
//------------------------------------------------------------------------------
// Classes used internally by the parser
//------------------------------------------------------------------------------
class Err {
public:
Err(const char *msg);
Err(const std::string &msg);
Err(const Err &ref);
Err& operator=(const Err &ref);
const char* Msg() const;
private:
void SetMsg(const char *msg);
char *fMsg;
};
class CharStream {
public:
CharStream(const char *string = NULL);
~CharStream();
status_t SetTo(const char *string);
void Unset();
status_t InitCheck() const;
bool IsEmpty() const;
char Get();
void Unget();
private:
char *fString;
size_t fPos;
ssize_t fLen;
status_t fCStatus;
CharStream(const CharStream &ref);
CharStream& operator=(const CharStream &ref);
};
typedef enum TokenType {
EmptyToken,
LeftParen,
RightParen,
LeftBracket,
RightBracket,
Colon,
Divider,
Ampersand,
CharacterString,
Integer,
FloatingPoint
};
const char* tokenTypeToString(TokenType type);
class Token {
public:
Token(TokenType type = EmptyToken);
virtual ~Token();
TokenType Type() const;
virtual const char* String() const;
virtual int32 Int() const;
virtual double Float() const;
bool operator==(Token &ref);
protected:
TokenType fType;
};
class StringToken : public Token {
public:
StringToken(const char *string);
virtual ~StringToken();
virtual const char* String() const;
protected:
char *fString;
};
class IntToken : public Token {
public:
IntToken(const int32 value);
virtual int32 Int() const;
virtual double Float() const;
protected:
int32 fValue;
};
class FloatToken : public Token {
public:
FloatToken(const double value);
virtual double Float() const;
protected:
double fValue;
};
class TokenStream {
public:
TokenStream(const char *string = NULL);
~TokenStream();
status_t SetTo(const char *string);
void Unset();
status_t InitCheck() const;
Token* Get();
void Unget(Token *token);
bool IsEmpty();
private:
void AddToken(TokenType type);
void AddString(const char *str);
void AddInt(const char *str);
void AddFloat(const char *str);
BList fTokenList;
status_t fCStatus;
TokenStream(const TokenStream &ref);
TokenStream& operator=(const TokenStream &ref);
};
} // namespace Sniffer
#endif // _sk_sniffer_parser_h_ #endif // _sk_sniffer_parser_h_

View File

@ -15,114 +15,20 @@
//#include <sniffer/RPatternList.h> //#include <sniffer/RPatternList.h>
#include <sniffer/Rule.h> #include <sniffer/Rule.h>
#include <string>
#include <List.h>
#include <new.h> #include <new.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> // For atol(), atof()
#include <string.h> #include <string.h>
#include <String.h> // BString #include <String.h> // BString
using namespace Sniffer; using namespace Sniffer;
namespace Sniffer {
class Err {
public:
Err(const char *msg);
Err(const std::string &msg);
Err(const Err &ref);
Err& operator=(const Err &ref);
const char* Msg() const;
private:
void SetMsg(const char *msg);
char *fMsg;
};
} // namespace Sniffer
class CharStream {
public:
CharStream(const char *string = NULL);
~CharStream();
status_t SetTo(const char *string);
void Unset();
status_t InitCheck() const;
bool IsEmpty();
char Get();
void Unget();
private:
char *fString;
size_t fPos;
ssize_t fLen;
status_t fCStatus;
CharStream(const CharStream &ref);
CharStream& operator=(const CharStream &ref);
};
typedef enum TokenType {
LeftParen,
RightParen,
LeftBracket,
RightBracket,
Colon,
Divider,
Ampersand,
CharacterString
};
class Token {
public:
Token(TokenType type);
virtual ~Token();
TokenType Type() const;
virtual const char* String() const;
protected:
TokenType fType;
};
class PString : public Token {
public:
PString(const char *string);
virtual ~PString();
virtual const char* String() const;
protected:
char *fString;
};
class TokenStream {
public:
TokenStream(const char *string = NULL);
~TokenStream();
status_t SetTo(const char *string);
void Unset();
status_t InitCheck() const;
Token* Get();
void Unget(Token *token);
private:
void AddToken(TokenType type);
void AddString(const char *str);
BList fTokenList;
status_t fCStatus;
TokenStream(const TokenStream &ref);
TokenStream& operator=(const TokenStream &ref);
};
// Our global token stream object // Our global token stream object
TokenStream stream; TokenStream stream;
// Private parsing functions // Private parsing functions
/* /*
float parsePriority(); double parsePriority();
BList* parseExprList(); BList* parseExprList();
Expr* parseExpr(); Expr* parseExpr();
Range parseRange(); Range parseRange();
@ -141,6 +47,7 @@ char octalToChar(char hi, char mid, char low);
bool isHexChar(char ch); bool isHexChar(char ch);
bool isWhiteSpace(char ch); bool isWhiteSpace(char ch);
bool isOctalChar(char ch); bool isOctalChar(char ch);
bool isDecimalChar(char ch);
status_t status_t
Sniffer::parse(const char *rule, Rule *result, BString *parseError = NULL) { Sniffer::parse(const char *rule, Rule *result, BString *parseError = NULL) {
@ -152,7 +59,7 @@ Sniffer::parse(const char *rule, Rule *result, BString *parseError = NULL) {
if (stream.SetTo(rule) != B_OK) if (stream.SetTo(rule) != B_OK)
throw Err("Sniffer parser error: Unable to intialize token stream"); throw Err("Sniffer parser error: Unable to intialize token stream");
float priority; double priority;
BList* exprList; BList* exprList;
// priority = parsePriority(); // priority = parsePriority();
@ -253,7 +160,7 @@ CharStream::InitCheck() const {
} }
bool bool
CharStream::IsEmpty() { CharStream::IsEmpty() const {
return fPos >= fLen; return fPos >= fLen;
} }
@ -261,10 +168,12 @@ char
CharStream::Get() { CharStream::Get() {
if (fCStatus != B_OK) if (fCStatus != B_OK)
throw Err("Sniffer parser error: CharStream::Get() called on uninitialized CharStream object"); throw Err("Sniffer parser error: CharStream::Get() called on uninitialized CharStream object");
if (!IsEmpty()) if (fPos < fLen)
return fString[fPos++]; return fString[fPos++];
else else {
throw Err("Sniffer pattern error: unterminated rule"); fPos++; // Increment fPos to keep Unget()s consistent
return 0x3; // Return End-Of-Text char
}
} }
void void
@ -284,6 +193,8 @@ CharStream::Unget() {
Token::Token(TokenType type) Token::Token(TokenType type)
: fType(type) : fType(type)
{ {
// if (type != EmptyToken)
// cout << "New Token, fType == " << tokenTypeToString(fType) << endl;
} }
Token::~Token() { Token::~Token() {
@ -299,11 +210,42 @@ Token::String() const {
throw Err("Sniffer scanner error: Token::String() called on non-string token"); throw Err("Sniffer scanner error: Token::String() called on non-string token");
} }
int32
Token::Int() const {
throw Err("Sniffer scanner error: Token::Int() called on non-integer token");
}
double
Token::Float() const {
throw Err("Sniffer scanner error: Token::Float() called on non-float token");
}
bool
Token::operator==(Token &ref) {
// Compare types, then data if necessary
if (Type() == ref.Type()) {
switch (Type()) {
case CharacterString:
return strcmp(String(), ref.String()) == 0;
case Integer:
return Int() == ref.Int();
case FloatingPoint:
return Float() == ref.Float();
default:
return true;
}
} else
return false;
}
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// PString // StringToken
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
PString::PString(const char *string) StringToken::StringToken(const char *string)
: Token(CharacterString) : Token(CharacterString)
, fString(NULL) , fString(NULL)
{ {
@ -314,15 +256,50 @@ PString::PString(const char *string)
} }
} }
PString::~PString() { StringToken::~StringToken() {
delete fString; delete fString;
} }
const char* const char*
PString::String() const { StringToken::String() const {
return fString; return fString;
} }
//------------------------------------------------------------------------------
// IntToken
//------------------------------------------------------------------------------
IntToken::IntToken(const int32 value)
: Token(Integer)
, fValue(value)
{
}
int32
IntToken::Int() const {
return fValue;
}
double
IntToken::Float() const {
return (double)fValue;
}
//------------------------------------------------------------------------------
// FloatToken
//------------------------------------------------------------------------------
FloatToken::FloatToken(const double value)
: Token(FloatingPoint)
, fValue(value)
{
}
double
FloatToken::Float() const {
return fValue;
}
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// TokenStream // TokenStream
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
@ -355,6 +332,11 @@ TokenStream::SetTo(const char *string) {
tsssZeroX, tsssZeroX,
tsssOneHex, tsssOneHex,
tsssTwoHex, tsssTwoHex,
tsssIntOrFloat,
tsssFloat,
tsssLonelyDecimalPoint,
tsssLonelyMinusSign,
tsssNegativeInt,
tsssOneEscape, tsssOneEscape,
tsssOneOctal, tsssOneOctal,
tsssTwoOctal, tsssTwoOctal,
@ -367,11 +349,19 @@ TokenStream::SetTo(const char *string) {
std::string charStr; // Used to build up character strings std::string charStr; // Used to build up character strings
char lastChar; // For two char lookahead char lastChar; // For two char lookahead
char lastLastChar; // For three char lookahead char lastLastChar; // For three char lookahead
while (!stream.IsEmpty()) { bool keepLooping = true;
while (keepLooping) {
char ch = stream.Get(); char ch = stream.Get();
switch (state) { switch (state) {
case tsssStart: case tsssStart:
switch (ch) { switch (ch) {
case 0x3: // End-Of-Text
if (stream.IsEmpty())
keepLooping = false;
else
throw Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'");
break;
case '\t': case '\t':
case '\n': case '\n':
case ' ': case ' ':
@ -388,9 +378,33 @@ TokenStream::SetTo(const char *string) {
state = tsssOneSingle; state = tsssOneSingle;
break; break;
case '-':
charStr = ch;
state = tsssLonelyMinusSign;
break;
case '.':
charStr = ch;
state = tsssLonelyDecimalPoint;
break;
case '0': case '0':
charStr = ch;
state = tsssOneZero; state = tsssOneZero;
break; break;
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
charStr = ch;
state = tsssIntOrFloat;
break;
case '&': AddToken(Ampersand); break; case '&': AddToken(Ampersand); break;
case '(': AddToken(LeftParen); break; case '(': AddToken(LeftParen); break;
@ -418,7 +432,13 @@ TokenStream::SetTo(const char *string) {
case '\'': case '\'':
AddString(charStr.c_str()); AddString(charStr.c_str());
state = tsssStart; state = tsssStart;
break; break;
case 0x3:
if (stream.IsEmpty())
throw Err(std::string("Sniffer scanner error: unterminated single-quoted string"));
else
charStr += ch;
break;
default: default:
charStr += ch; charStr += ch;
break; break;
@ -426,8 +446,13 @@ TokenStream::SetTo(const char *string) {
break; break;
case tsssSingleEscape: case tsssSingleEscape:
charStr += escapeChar(ch); // Check for a true end-of-text marker
state = tsssOneSingle; if (ch == 0x3 && stream.IsEmpty())
throw Err(std::string("Sniffer scanner error: unterminated escape sequence in single-quoted string"));
else {
charStr += escapeChar(ch);
state = tsssOneSingle;
}
break; break;
case tsssOneDouble: case tsssOneDouble:
@ -439,6 +464,12 @@ TokenStream::SetTo(const char *string) {
AddString(charStr.c_str()); AddString(charStr.c_str());
state = tsssStart; state = tsssStart;
break; break;
case 0x3:
if (stream.IsEmpty())
throw Err(std::string("Sniffer scanner error: unterminated single-quoted string"));
else
charStr += ch;
break;
default: default:
charStr += ch; charStr += ch;
break; break;
@ -446,22 +477,46 @@ TokenStream::SetTo(const char *string) {
break; break;
case tsssDoubleEscape: case tsssDoubleEscape:
charStr += escapeChar(ch); // Check for a true end-of-text marker
state = tsssOneDouble; if (ch == 0x3 && stream.IsEmpty())
throw Err(std::string("Sniffer scanner error: unterminated escape sequence in single-quoted string"));
else {
charStr += escapeChar(ch);
state = tsssOneDouble;
}
break; break;
case tsssOneZero: case tsssOneZero:
if (ch == 'x') if (ch == 'x') {
charStr = ""; // Reinit, since we actually have a hex string
state = tsssZeroX; state = tsssZeroX;
else } else if ('0' <= ch && ch <= '9') {
throw Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'"); charStr += ch;
state = tsssIntOrFloat;
} else if (ch == '.') {
charStr += ch;
state = tsssFloat;
} else if (ch == 0x3 && stream.IsEmpty()) {
// Terminate the number and then the loop
AddInt(charStr.c_str());
keepLooping = false;
} else {
// Terminate the number
AddInt(charStr.c_str());
// Push the last char back on and try again
stream.Unget();
state = tsssStart;
}
break; break;
case tsssZeroX: case tsssZeroX:
if (isHexChar(ch)) { if (isHexChar(ch)) {
lastChar = ch; lastChar = ch;
state = tsssOneHex; state = tsssOneHex;
} else } else if (ch == 0x3 && stream.IsEmpty())
throw Err(std::string("Sniffer scanner error: incomplete hex code"));
else
throw Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'"); throw Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'");
break; break;
@ -469,7 +524,9 @@ TokenStream::SetTo(const char *string) {
if (isHexChar(ch)) { if (isHexChar(ch)) {
charStr += hexToChar(lastChar, ch); charStr += hexToChar(lastChar, ch);
state = tsssTwoHex; state = tsssTwoHex;
} else } else if (ch == 0x3 && stream.IsEmpty())
throw Err(std::string("Sniffer scanner error: incomplete hex code (the number of hex digits must be a multiple of two)"));
else
throw Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'"); throw Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'");
break; break;
@ -480,17 +537,89 @@ TokenStream::SetTo(const char *string) {
} else if (isWhiteSpace(ch)) { } else if (isWhiteSpace(ch)) {
AddString(charStr.c_str()); AddString(charStr.c_str());
state = tsssStart; state = tsssStart;
} else if (ch == 0x3 && stream.IsEmpty()) {
AddString(charStr.c_str());
keepLooping = false;
} else } else
throw Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'"); throw Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'");
break; break;
case tsssIntOrFloat:
if (isDecimalChar(ch))
charStr += ch;
else if (ch == '.') {
charStr += ch;
state = tsssFloat;
} else {
// Terminate the number
AddInt(charStr.c_str());
// Push the last char back on and try again
stream.Unget();
state = tsssStart;
}
break;
case tsssFloat:
if (isDecimalChar(ch))
charStr += ch;
else {
// Terminate the number
AddFloat(charStr.c_str());
// Push the last char back on and try again
stream.Unget();
state = tsssStart;
}
break;
case tsssLonelyDecimalPoint:
if (isDecimalChar(ch)) {
charStr += ch;
state = tsssFloat;
} else if (ch == 0x3 && stream.IsEmpty())
throw Err(std::string("Sniffer scanner error: incomplete floating point number"));
else
throw Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'");
break;
case tsssLonelyMinusSign:
if (isDecimalChar(ch)) {
charStr += ch;
state = tsssNegativeInt;
} else if (ch == 0x3 && stream.IsEmpty())
throw Err(std::string("Sniffer scanner error: incomplete negative integer"));
else
throw Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'");
break;
case tsssNegativeInt:
if (isDecimalChar(ch))
charStr += ch;
else if (ch == '.')
throw Err(std::string("Sniffer scanner error: negative floating point numbers are useless and thus illegal"));
else {
// Terminate the number
AddInt(charStr.c_str());
// Push the last char back on and try again
stream.Unget();
state = tsssStart;
}
break;
case tsssOneEscape: case tsssOneEscape:
if (isOctalChar(ch)) { if (isOctalChar(ch)) {
lastChar = ch; lastChar = ch;
state = tsssOneOctal; state = tsssOneOctal;
} else { } else {
charStr += escapeChar(ch); // Check for a true end-of-text marker
state = tsssUnquoted; if (ch == 0x3 && stream.IsEmpty())
throw Err(std::string("Sniffer scanner error: unterminated escape sequence"));
else {
charStr += escapeChar(ch);
state = tsssUnquoted;
}
} }
break; break;
@ -533,14 +662,22 @@ TokenStream::SetTo(const char *string) {
state = tsssStart; state = tsssStart;
} else if (ch == '\'' || ch == '"' || ch == '&') { } else if (ch == '\'' || ch == '"' || ch == '&') {
throw Err(std::string("Sniffer scanner error: illegal unquoted character '") + ch + "'"); throw Err(std::string("Sniffer scanner error: illegal unquoted character '") + ch + "'");
} else if (ch == 0x3 && stream.IsEmpty()) {
AddString(charStr.c_str());
keepLooping = false;
} else { } else {
charStr += ch; charStr += ch;
} }
break; break;
case tsssUnquotedEscape: case tsssUnquotedEscape:
charStr += escapeChar(ch); // Check for a true end-of-text marker
state = tsssUnquoted; if (ch == 0x3 && stream.IsEmpty())
throw Err(std::string("Sniffer scanner error: unterminated escape sequence in unquoted string"));
else {
charStr += escapeChar(ch);
state = tsssUnquoted;
}
break; break;
} }
@ -567,11 +704,17 @@ TokenStream::InitCheck() const {
Token* Token*
TokenStream::Get() { TokenStream::Get() {
return (Token*)fTokenList.RemoveItem((int32)0);
} }
void void
TokenStream::Unget(Token *token) { TokenStream::Unget(Token *token) {
fTokenList.AddItem(token, 0);
}
bool
TokenStream::IsEmpty() {
return fCStatus != B_OK || fTokenList.IsEmpty();
} }
void void
@ -582,7 +725,23 @@ TokenStream::AddToken(TokenType type) {
void void
TokenStream::AddString(const char *str) { TokenStream::AddString(const char *str) {
Token *token = new PString(str); Token *token = new StringToken(str);
fTokenList.AddItem(token);
}
void
TokenStream::AddInt(const char *str) {
// Convert the string to an int
int32 value = atol(str);
Token *token = new IntToken(value);
fTokenList.AddItem(token);
}
void
TokenStream::AddFloat(const char *str) {
// Convert the string to a float
double value = atof(str);
Token *token = new FloatToken(value);
fTokenList.AddItem(token); fTokenList.AddItem(token);
} }
@ -653,4 +812,47 @@ isOctalChar(char ch) {
return ('0' <= ch && ch <= '7'); return ('0' <= ch && ch <= '7');
} }
bool
isDecimalChar(char ch) {
return ('0' <= ch && ch <= '9');
}
const char*
Sniffer::tokenTypeToString(TokenType type) {
switch (type) {
case LeftParen:
return "LeftParen";
break;
case RightParen:
return "RightParen";
break;
case LeftBracket:
return "LeftBracket";
break;
case RightBracket:
return "RightBracket";
break;
case Colon:
return "Colon";
break;
case Divider:
return "Divider";
break;
case Ampersand:
return "Ampersand";
break;
case CharacterString:
return "CharacterString";
break;
case Integer:
return "Integer";
break;
case FloatingPoint:
return "FloatingPoint";
break;
default:
return "UNKNOWN TOKEN TYPE";
break;
}
}