Finished scanner:

+ Octals are now handled properly in both quoted and
unquoted strings
+ Added support for \xXX style hex escapes in both
quoted and unquoted strings
+ Finished up the tests, so I think pretty much everything
should be working
NOTE: signed floating point numbers are no longer supported,
as they have no usefulness to sniffer rules
NOTE: extended format floating point numbers (i.e. 2.4e23) are
no longer supported, as they have no usefulness to sniffer rules.


git-svn-id: file:///srv/svn/repos/haiku/trunk/current@489 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
Tyler Dauwalder 2002-07-28 03:21:21 +00:00
parent 88c56012c8
commit dc685e9ad8
2 changed files with 659 additions and 164 deletions

View File

@ -48,25 +48,26 @@ bool isHexChar(char ch);
bool isWhiteSpace(char ch);
bool isOctalChar(char ch);
bool isDecimalChar(char ch);
bool isPunctuation(char ch);
status_t
Sniffer::parse(const char *rule, Rule *result, BString *parseError = NULL) {
try {
if (!rule)
throw Err("Sniffer pattern error: NULL pattern");
throw new Err("Sniffer pattern error: NULL pattern");
if (!result)
return B_BAD_VALUE;
if (stream.SetTo(rule) != B_OK)
throw Err("Sniffer parser error: Unable to intialize token stream");
throw new Err("Sniffer parser error: Unable to intialize token stream");
double priority;
BList* exprList;
// priority = parsePriority();
} catch (Err &err) {
if (parseError)
parseError->SetTo((err.Msg() ? err.Msg() : "Sniffer parser rule: Unexpected error with no supplied error message"));
} catch (Err *err) {
if (parseError && err)
parseError->SetTo((err->Msg() ? err->Msg() : "Sniffer parser rule: Unexpected error with no supplied error message"));
return B_BAD_MIME_SNIFFER_RULE;
}
}
@ -102,7 +103,10 @@ Err::Msg() const {
void
Err::SetMsg(const char *msg) {
delete fMsg;
if (fMsg) {
delete fMsg;
fMsg = NULL;
}
if (msg == NULL)
fMsg = NULL;
else {
@ -167,7 +171,7 @@ CharStream::IsEmpty() const {
char
CharStream::Get() {
if (fCStatus != B_OK)
throw Err("Sniffer parser error: CharStream::Get() called on uninitialized CharStream object");
throw new Err("Sniffer parser error: CharStream::Get() called on uninitialized CharStream object");
if (fPos < fLen)
return fString[fPos++];
else {
@ -179,11 +183,11 @@ CharStream::Get() {
void
CharStream::Unget() {
if (fCStatus != B_OK)
throw Err("Sniffer parser error: CharStream::Unget() called on uninitialized CharStream object");
throw new Err("Sniffer parser error: CharStream::Unget() called on uninitialized CharStream object");
if (fPos > 0)
fPos--;
else
throw Err("Sniffer parser error: CharStream::Unget() called at beginning of character stream");
throw new Err("Sniffer parser error: CharStream::Unget() called at beginning of character stream");
}
//------------------------------------------------------------------------------
@ -207,17 +211,17 @@ Token::Type() const {
const char*
Token::String() const {
throw Err("Sniffer scanner error: Token::String() called on non-string token");
throw new Err("Sniffer scanner error: Token::String() called on non-string token");
}
int32
Token::Int() const {
throw Err("Sniffer scanner error: Token::Int() called on non-integer token");
throw new Err("Sniffer scanner error: Token::Int() called on non-integer token");
}
double
Token::Float() const {
throw Err("Sniffer scanner error: Token::Float() called on non-float token");
throw new Err("Sniffer scanner error: Token::Float() called on non-float token");
}
bool
@ -226,7 +230,30 @@ Token::operator==(Token &ref) {
if (Type() == ref.Type()) {
switch (Type()) {
case CharacterString:
return strcmp(String(), ref.String()) == 0;
// printf(" str1 == '%s'\n", String());
// printf(" str2 == '%s'\n", ref.String());
// printf(" strcmp() == %d\n", strcmp(String(), ref.String()));
{
// strcmp() seems to choke on certain, non-normal ASCII chars
// (i.e. chars outside the usual alphabets, but still valid
// as far as ASCII is concerned), so we'll just compare the
// strings by hand to be safe.
const char *str1 = String();
const char *str2 = ref.String();
int len1 = strlen(str1);
int len2 = strlen(str2);
// printf("len1 == %d\n", len1);
// printf("len2 == %d\n", len2);
if (len1 == len2) {
for (int i = 0; i < len1; i++) {
// printf("i == %d, str1[%d] == %x, str2[%d] == %x\n", i, i, str1[i], i, str2[i]);
if (str1[i] != str2[i])
return false;
}
}
return true;
}
// return strcmp(String(), ref.String()) == 0;
case Integer:
return Int() == ref.Int();
@ -320,31 +347,34 @@ TokenStream::SetTo(const char *string) {
if (string) {
CharStream stream(string);
if (stream.InitCheck() != B_OK)
throw Err("Sniffer scanner error: Unable to intialize character stream");
throw new Err("Sniffer scanner error: Unable to intialize character stream");
typedef enum TokenStreamScannerState {
tsssStart,
tsssOneSingle,
tsssSingleEscape,
tsssOneDouble,
tsssDoubleEscape,
tsssOneZero,
tsssZeroX,
tsssOneHex,
tsssTwoHex,
tsssHexStringEnd,
tsssIntOrFloat,
tsssFloat,
tsssLonelyDecimalPoint,
tsssLonelyMinusSign,
tsssNegativeInt,
tsssOneEscape,
tsssOneOctal,
tsssTwoOctal,
tsssLonelyMinusOrPlusSign,
tsssPosNegInt,
tsssUnquoted,
tsssUnquotedEscape,
tsssEscape,
tsssEscapeX,
tsssEscapeOneOctal,
tsssEscapeTwoOctal,
tsssEscapeOneHex,
tsssEscapeTwoHex
};
TokenStreamScannerState state = tsssStart;
TokenStreamScannerState state = tsssStart;
TokenStreamScannerState escapedState;
// Used to remember which state to return to from an escape sequence
std::string charStr; // Used to build up character strings
char lastChar; // For two char lookahead
@ -359,7 +389,7 @@ TokenStream::SetTo(const char *string) {
if (stream.IsEmpty())
keepLooping = false;
else
throw Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'");
throw new Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'");
break;
case '\t':
@ -378,9 +408,10 @@ TokenStream::SetTo(const char *string) {
state = tsssOneSingle;
break;
case '+':
case '-':
charStr = ch;
state = tsssLonelyMinusSign;
state = tsssLonelyMinusOrPlusSign;
break;
case '.':
@ -413,21 +444,24 @@ TokenStream::SetTo(const char *string) {
case '[': AddToken(LeftBracket); break;
case '\\':
state = tsssOneEscape;
charStr = ""; // Clear our string
state = tsssEscape;
escapedState = tsssUnquoted; // Unquoted strings begin with an escaped character
break;
case ']': AddToken(RightBracket); break;
case '|': AddToken(Divider); break;
default:
throw Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'");
throw new Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'");
}
break;
case tsssOneSingle:
switch (ch) {
case '\\':
state = tsssSingleEscape;
escapedState = state; // Save our state
state = tsssEscape; // Handle the escape sequence
break;
case '\'':
AddString(charStr.c_str());
@ -435,7 +469,7 @@ TokenStream::SetTo(const char *string) {
break;
case 0x3:
if (stream.IsEmpty())
throw Err(std::string("Sniffer scanner error: unterminated single-quoted string"));
throw new Err(std::string("Sniffer scanner error: unterminated single-quoted string"));
else
charStr += ch;
break;
@ -445,20 +479,11 @@ TokenStream::SetTo(const char *string) {
}
break;
case tsssSingleEscape:
// Check for a true end-of-text marker
if (ch == 0x3 && stream.IsEmpty())
throw Err(std::string("Sniffer scanner error: unterminated escape sequence in single-quoted string"));
else {
charStr += escapeChar(ch);
state = tsssOneSingle;
}
break;
case tsssOneDouble:
switch (ch) {
case '\\':
state = tsssDoubleEscape;
escapedState = state; // Save our state
state = tsssEscape; // Handle the escape sequence
break;
case '"':
AddString(charStr.c_str());
@ -466,7 +491,7 @@ TokenStream::SetTo(const char *string) {
break;
case 0x3:
if (stream.IsEmpty())
throw Err(std::string("Sniffer scanner error: unterminated single-quoted string"));
throw new Err(std::string("Sniffer scanner error: unterminated single-quoted string"));
else
charStr += ch;
break;
@ -476,16 +501,6 @@ TokenStream::SetTo(const char *string) {
}
break;
case tsssDoubleEscape:
// Check for a true end-of-text marker
if (ch == 0x3 && stream.IsEmpty())
throw Err(std::string("Sniffer scanner error: unterminated escape sequence in single-quoted string"));
else {
charStr += escapeChar(ch);
state = tsssOneDouble;
}
break;
case tsssOneZero:
if (ch == 'x') {
charStr = ""; // Reinit, since we actually have a hex string
@ -515,9 +530,9 @@ TokenStream::SetTo(const char *string) {
lastChar = ch;
state = tsssOneHex;
} else if (ch == 0x3 && stream.IsEmpty())
throw Err(std::string("Sniffer scanner error: incomplete hex code"));
throw new Err(std::string("Sniffer scanner error: incomplete hex code"));
else
throw Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'");
throw new Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'");
break;
case tsssOneHex:
@ -534,8 +549,9 @@ TokenStream::SetTo(const char *string) {
if (isHexChar(ch)) {
lastChar = ch;
state = tsssOneHex;
} else if (isWhiteSpace(ch)) {
} else if (isWhiteSpace(ch) || isPunctuation(ch)) {
AddString(charStr.c_str());
stream.Unget(); // So punctuation gets handled properly
state = tsssStart;
} else if (ch == 0x3 && stream.IsEmpty()) {
AddString(charStr.c_str());
@ -578,26 +594,26 @@ TokenStream::SetTo(const char *string) {
charStr += ch;
state = tsssFloat;
} else if (ch == 0x3 && stream.IsEmpty())
throw Err(std::string("Sniffer scanner error: incomplete floating point number"));
throw new Err(std::string("Sniffer scanner error: incomplete floating point number"));
else
throw Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'");
throw new Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'");
break;
case tsssLonelyMinusSign:
case tsssLonelyMinusOrPlusSign:
if (isDecimalChar(ch)) {
charStr += ch;
state = tsssNegativeInt;
state = tsssPosNegInt;
} else if (ch == 0x3 && stream.IsEmpty())
throw Err(std::string("Sniffer scanner error: incomplete negative integer"));
throw new Err(std::string("Sniffer scanner error: incomplete signed integer"));
else
throw Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'");
throw new Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'");
break;
case tsssNegativeInt:
case tsssPosNegInt:
if (isDecimalChar(ch))
charStr += ch;
else if (ch == '.')
throw Err(std::string("Sniffer scanner error: negative floating point numbers are useless and thus illegal"));
throw new Err(std::string("Sniffer scanner error: negative floating point numbers are useless and thus signs (both + and -) are disallowed on floating points"));
else {
// Terminate the number
AddInt(charStr.c_str());
@ -608,60 +624,16 @@ TokenStream::SetTo(const char *string) {
}
break;
case tsssOneEscape:
if (isOctalChar(ch)) {
lastChar = ch;
state = tsssOneOctal;
} else {
// Check for a true end-of-text marker
if (ch == 0x3 && stream.IsEmpty())
throw Err(std::string("Sniffer scanner error: unterminated escape sequence"));
else {
charStr += escapeChar(ch);
state = tsssUnquoted;
}
}
break;
case tsssOneOctal:
if (isOctalChar(ch)) {
lastLastChar = lastChar;
lastChar = ch;
state = tsssTwoOctal;
} else {
// First handle the octal
charStr += octalToChar(lastChar);
// Push the new char back on and let the tsssUnquoted
// state handle it.
stream.Unget();
state = tsssUnquoted;
}
break;
case tsssTwoOctal:
if (isOctalChar(ch)) {
charStr += octalToChar(lastLastChar, lastChar, ch);
state = tsssUnquoted;
} else {
// First handle the octal
charStr += octalToChar(lastLastChar, lastChar);
// Push the new char back on and let the tsssUnquoted
// state handle it.
stream.Unget();
state = tsssUnquoted;
}
break;
case tsssUnquoted:
if (ch == '\\')
state = tsssUnquotedEscape;
else if (isWhiteSpace(ch)) {
if (ch == '\\') {
escapedState = state; // Save our state
state = tsssEscape; // Handle the escape sequence
} else if (isWhiteSpace(ch) || isPunctuation(ch)) {
AddString(charStr.c_str());
stream.Unget(); // In case it's punctuation, let tsssStart handle it
state = tsssStart;
} else if (ch == '\'' || ch == '"' || ch == '&') {
throw Err(std::string("Sniffer scanner error: illegal unquoted character '") + ch + "'");
} else if (ch == '\'' || ch == '"') {
throw new Err(std::string("Sniffer scanner error: illegal unquoted character '") + ch + "'");
} else if (ch == 0x3 && stream.IsEmpty()) {
AddString(charStr.c_str());
keepLooping = false;
@ -670,23 +642,78 @@ TokenStream::SetTo(const char *string) {
}
break;
case tsssUnquotedEscape:
// Check for a true end-of-text marker
if (ch == 0x3 && stream.IsEmpty())
throw Err(std::string("Sniffer scanner error: unterminated escape sequence in unquoted string"));
else {
charStr += escapeChar(ch);
state = tsssUnquoted;
}
case tsssEscape:
if (isOctalChar(ch)) {
lastChar = ch;
state = tsssEscapeOneOctal;
} else {
// Check for a true end-of-text marker
if (ch == 0x3 && stream.IsEmpty())
throw new Err(std::string("Sniffer scanner error: unterminated escape sequence"));
else {
charStr += escapeChar(ch);
state = escapedState; // Return to the state we were in before the escape
}
}
break;
case tsssEscapeX:
if (isHexChar(ch)) {
lastChar = ch;
state = tsssEscapeOneHex;
} else
throw new Err(std::string("Sniffer scanner error: incomplete hex code"));
break;
case tsssEscapeOneOctal:
if (isOctalChar(ch)) {
lastLastChar = lastChar;
lastChar = ch;
state = tsssEscapeTwoOctal;
} else {
// First handle the octal
charStr += octalToChar(lastChar);
// Push the new char back on and let the state we
// were in when the escape sequence was hit handle it.
stream.Unget();
state = escapedState;
}
break;
case tsssEscapeTwoOctal:
if (isOctalChar(ch)) {
charStr += octalToChar(lastLastChar, lastChar, ch);
state = escapedState;
} else {
// First handle the octal
charStr += octalToChar(lastLastChar, lastChar);
// Push the new char back on and let the state we
// were in when the escape sequence was hit handle it.
stream.Unget();
state = escapedState;
}
break;
case tsssEscapeOneHex:
if (isHexChar(ch)) {
charStr += hexToChar(lastChar, ch);
state = escapedState;
} else if (ch == 0x3 && stream.IsEmpty())
throw new Err(std::string("Sniffer scanner error: incomplete escaped hex code (the number of hex digits must be a multiple of two)"));
else
throw new Err(std::string("Sniffer scanner error: unexpected character '") + ch + "'");
break;
}
}
if (state == tsssStart)
fCStatus = B_OK;
else
throw Err("Sniffer pattern error: unterminated rule");
throw new Err("Sniffer pattern error: unterminated rule");
}
return fCStatus;
}
@ -751,11 +778,29 @@ TokenStream::AddFloat(const char *str) {
char
escapeChar(char ch) {
// Is there an easier way to do this? I'm not sure. :-)
std::string format = std::string("\\") + ch;
char str[3]; // Two should be enough but I'm paranoid :-)
sprintf(str, format.c_str());
return str[0];
// I've manually handled all the escape sequences I could come
// up with, and for anything else I just return the character
// passed in. Hex escapes are handled elsewhere, so \x just
// returns 'x'. Similarly, octals are handled elsewhere, so \0
// through \9 just return '0' through '9'.
switch (ch) {
case 'a':
return '\a';
case 'b':
return '\b';
case 'f':
return '\f';
case 'n':
return '\n';
case 'r':
return '\r';
case 't':
return '\t';
case 'v':
return '\v';
default:
return ch;
}
}
// Converts 0x|hi|low| to a single char
@ -774,7 +819,7 @@ hexToChar(char hex) {
else if ('A' <= hex && hex <= 'F')
return hex-'a'+10;
else
throw Err(std::string("Sniffer parser error: invalid hex digit '") + hex + "' passed to hexToChar()");
throw new Err(std::string("Sniffer parser error: invalid hex digit '") + hex + "' passed to hexToChar()");
}
char
@ -792,7 +837,7 @@ octalToChar(char hi, char mid, char low) {
if (isOctalChar(hi) && isOctalChar(mid) && isOctalChar(low)) {
return ((hi-'0') << 6) | ((mid-'0') << 3) | (low-'0');
} else
throw Err(std::string("Sniffer parser error: invalid octal digit passed to hexToChar()"));
throw new Err(std::string("Sniffer parser error: invalid octal digit passed to hexToChar()"));
}
bool
@ -817,6 +862,22 @@ isDecimalChar(char ch) {
return ('0' <= ch && ch <= '9');
}
bool
isPunctuation(char ch) {
switch (ch) {
case '&':
case '(':
case ')':
case ':':
case '[':
case ']':
case '|':
return true;
default:
return false;
}
}
const char*
Sniffer::tokenTypeToString(TokenType type) {
switch (type) {

View File

@ -87,38 +87,458 @@ ParserTest::ScannerTest() {
S("EFGH"),
T(RightParen)
}
} /*,
{ "0.5 \n [0:3] \t ('ABCD' \n | 'abcd' | 'EFGH')", NULL },
{ "0.8 [ 0 : 3 ] ('ABCDEFG' | 'abcdefghij')", NULL },
{ "0.8 [0:3] ('ABCDEFG' & 'abcdefg')", NULL },
{ "1.0 ('ABCD') | ('EFGH')", NULL },
{ "1.0 [0:3] ('ABCD') | [2:4] ('EFGH')", NULL },
{ "0.8 [0:3] (\\077Mkl0x34 & 'abcdefgh')", NULL },
{ "0.8 [0:3] (\\077034 & 'abcd')", NULL },
{ "0.8 [0:3] (\\077\\034 & 'ab')", NULL },
{ "0.8 [0:3] (\\77\\034 & 'ab')", NULL },
{ "0.8 [0:3] (\\7 & 'a')", NULL },
{ "0.8 [0:3] (\"\\17\" & 'a')", NULL },
{ "0.8 [0:3] ('\\17' & 'a')", NULL },
{ "0.8 [0:3] (\\g & 'a')", NULL },
{ "0.8 [0:3] (\\g&\\b)", NULL },
{ "0.8 [0:3] (\\g\\&b & 'abc')", NULL },
{ "0.8 [0:3] (0x3457 & 'ab')", NULL },
{ "0.8 [0:3] (0xA4b7 & 'ab')", NULL },
{ "0.8 [0:3] ('ab\"' & 'abc')", NULL },
{ "0.8 [0:3] (\"ab\\\"\" & 'abc')", NULL },
{ "0.8 [0:3] (\"ab\\A\" & 'abc')", NULL },
{ "0.8 [0:3] (\"ab'\" & 'abc')", NULL },
{ "0.8 [0:3] (\"ab\\\\\" & 'abc')", NULL },
{ "0.8 [-5:-3] (\"abc\" & 'abc')", NULL },
{ "0.8 [5:3] (\"abc\" & 'abc')", NULL },
{ "1.2 ('ABCD')", NULL },
{ ".2 ('ABCD')", NULL },
{ "0. ('ABCD')", NULL },
{ "-1 ('ABCD')", NULL },
{ "+1 ('ABCD')", NULL },
{ "1E25 ('ABCD')", NULL },
{ "1e25 ('ABCD')", NULL },*/
},
{ "0.5 \n [0:3] \t ('ABCD' \n | 'abcd' | 'EFGH')", 13,
{ F(0.5),
T(LeftBracket),
I(0),
T(Colon),
I(3),
T(RightBracket),
T(LeftParen),
S("ABCD"),
T(Divider),
S("abcd"),
T(Divider),
S("EFGH"),
T(RightParen)
}
},
{ "0.8 [ 0 : 3 ] ('ABCDEFG' | 'abcdefghij')", 11,
{ F(0.8),
T(LeftBracket),
I(0),
T(Colon),
I(3),
T(RightBracket),
T(LeftParen),
S("ABCDEFG"),
T(Divider),
S("abcdefghij"),
T(RightParen)
}
},
{ "0.8 [0:3] ('ABCDEFG' & 'abcdefg')", 11,
{ F(0.8),
T(LeftBracket),
I(0),
T(Colon),
I(3),
T(RightBracket),
T(LeftParen),
S("ABCDEFG"),
T(Ampersand),
S("abcdefg"),
T(RightParen)
}
},
{ "1.0 ('ABCD') | ('EFGH')", 8,
{ F(1.0),
T(LeftParen),
S("ABCD"),
T(RightParen),
T(Divider),
T(LeftParen),
S("EFGH"),
T(RightParen)
}
},
{ "1.0 [0:3] ('ABCD') | [2:4] ('EFGH')", 18,
{ F(1.0),
T(LeftBracket),
I(0),
T(Colon),
I(3),
T(RightBracket),
T(LeftParen),
S("ABCD"),
T(RightParen),
T(Divider),
T(LeftBracket),
I(2),
T(Colon),
I(4),
T(RightBracket),
T(LeftParen),
S("EFGH"),
T(RightParen)
}
},
{ "0.8 [0:4] (\\077Mkj0x34 & 'abcdefgh')", 11,
{ F(0.8),
T(LeftBracket),
I(0),
T(Colon),
I(4),
T(RightBracket),
T(LeftParen),
S("\077Mkj0x34"),
T(Ampersand),
S("abcdefgh"),
T(RightParen)
}
},
{ "0.8 [0:4] (\\077Mkj\\x34 & 'abcdefgh')", 11,
{ F(0.8),
T(LeftBracket),
I(0),
T(Colon),
I(4),
T(RightBracket),
T(LeftParen),
S("\077Mkj\x34"),
T(Ampersand),
S("abcdefgh"),
T(RightParen)
}
},
{ "0.8 [0:3] (\\077034 & 'abcd')", 11,
{ F(0.8),
T(LeftBracket),
I(0),
T(Colon),
I(3),
T(RightBracket),
T(LeftParen),
S("\077034"),
T(Ampersand),
S("abcd"),
T(RightParen)
}
},
{ "0.8 [0:3] (\\077\\034 & 'ab')", 11,
{ F(0.8),
T(LeftBracket),
I(0),
T(Colon),
I(3),
T(RightBracket),
T(LeftParen),
S("\077\034"),
T(Ampersand),
S("ab"),
T(RightParen)
}
},
{ "0.8 [0:3] (\\77\\034 & 'ab')", 11,
{ F(0.8),
T(LeftBracket),
I(0),
T(Colon),
I(3),
T(RightBracket),
T(LeftParen),
S("\077\034"),
T(Ampersand),
S("ab"),
T(RightParen)
}
},
{ "0.8 [0:3] (\\7 & 'a')", 11,
{ F(0.8),
T(LeftBracket),
I(0),
T(Colon),
I(3),
T(RightBracket),
T(LeftParen),
S("\007"),
T(Ampersand),
S("a"),
T(RightParen)
}
},
{ "0.8 [0:3] (\"\\17\" & 'a')", 11,
{ F(0.8),
T(LeftBracket),
I(0),
T(Colon),
I(3),
T(RightBracket),
T(LeftParen),
S("\017"),
T(Ampersand),
S("a"),
T(RightParen)
}
},
{ "0.8 [0:3] ('\\17' & 'a')", 11,
{ F(0.8),
T(LeftBracket),
I(0),
T(Colon),
I(3),
T(RightBracket),
T(LeftParen),
S("\017"),
T(Ampersand),
S("a"),
T(RightParen)
}
},
{ "0.8 [0:3] (\\g & 'a')", 11,
{ F(0.8),
T(LeftBracket),
I(0),
T(Colon),
I(3),
T(RightBracket),
T(LeftParen),
S("g"),
T(Ampersand),
S("a"),
T(RightParen)
}
},
{ "0.8 [0:3] (\\g&\\b)", 11,
{ F(0.8),
T(LeftBracket),
I(0),
T(Colon),
I(3),
T(RightBracket),
T(LeftParen),
S("g"),
T(Ampersand),
S("\b"),
T(RightParen)
}
},
{ "0.8 [0:3] (\\g\\&b & 'abc')", 11,
{ F(0.8),
T(LeftBracket),
I(0),
T(Colon),
I(3),
T(RightBracket),
T(LeftParen),
S("g&b"),
T(Ampersand),
S("abc"),
T(RightParen)
}
},
{ "0.8 [0:3] (0x3457 & 'ab')", 11,
{ F(0.8),
T(LeftBracket),
I(0),
T(Colon),
I(3),
T(RightBracket),
T(LeftParen),
S("\x34\x57"),
T(Ampersand),
S("ab"),
T(RightParen)
}
},
{ "0.8 [0:3] (\\x34\\x57 & 'ab')", 11,
{ F(0.8),
T(LeftBracket),
I(0),
T(Colon),
I(3),
T(RightBracket),
T(LeftParen),
S("\x34\x57"),
T(Ampersand),
S("ab"),
T(RightParen)
}
},
{ "0.8 [0:3] (0xA4b7 & 'ab')", 11,
{ F(0.8),
T(LeftBracket),
I(0),
T(Colon),
I(3),
T(RightBracket),
T(LeftParen),
S("\xA4\xb7"),
T(Ampersand),
S("ab"),
T(RightParen)
}
},
{ "0.8 [0:3] (\\xA4\\xb7 & 'ab')", 11,
{ F(0.8),
T(LeftBracket),
I(0),
T(Colon),
I(3),
T(RightBracket),
T(LeftParen),
S("\xA4\xb7"),
T(Ampersand),
S("ab"),
T(RightParen)
}
},
{ "0.8 [0:3] (\"\\xA4\\xb7\" & 'ab')", 11,
{ F(0.8),
T(LeftBracket),
I(0),
T(Colon),
I(3),
T(RightBracket),
T(LeftParen),
S("\xA4\xb7"),
T(Ampersand),
S("ab"),
T(RightParen)
}
},
{ "0.8 [0:3] (\'\\xA4\\xb7\' & 'ab')", 11,
{ F(0.8),
T(LeftBracket),
I(0),
T(Colon),
I(3),
T(RightBracket),
T(LeftParen),
S("\xA4\xb7"),
T(Ampersand),
S("ab"),
T(RightParen)
}
},
{ "0.8 [0:3] ('ab\"' & 'abc')", 11,
{ F(0.8),
T(LeftBracket),
I(0),
T(Colon),
I(3),
T(RightBracket),
T(LeftParen),
S("ab\""),
T(Ampersand),
S("abc"),
T(RightParen)
}
},
{ "0.8 [0:3] (\"ab\\\"\" & 'abc')", 11,
{ F(0.8),
T(LeftBracket),
I(0),
T(Colon),
I(3),
T(RightBracket),
T(LeftParen),
S("ab\""),
T(Ampersand),
S("abc"),
T(RightParen)
}
},
{ "0.8 [0:3] (\"ab\\A\" & 'abc')", 11,
{ F(0.8),
T(LeftBracket),
I(0),
T(Colon),
I(3),
T(RightBracket),
T(LeftParen),
S("abA"),
T(Ampersand),
S("abc"),
T(RightParen)
}
},
{ "0.8 [0:3] (\"ab'\" & 'abc')", 11,
{ F(0.8),
T(LeftBracket),
I(0),
T(Colon),
I(3),
T(RightBracket),
T(LeftParen),
S("ab'"),
T(Ampersand),
S("abc"),
T(RightParen)
}
},
{ "0.8 [0:3] (\"ab\\\\\" & 'abc')", 11,
{ F(0.8),
T(LeftBracket),
I(0),
T(Colon),
I(3),
T(RightBracket),
T(LeftParen),
S("ab\\"),
T(Ampersand),
S("abc"),
T(RightParen)
}
},
{ "0.8 [-5:-3] (\"abc\" & 'abc')", 11,
{ F(0.8),
T(LeftBracket),
I(-5),
T(Colon),
I(-3),
T(RightBracket),
T(LeftParen),
S("abc"),
T(Ampersand),
S("abc"),
T(RightParen)
}
},
{ "0.8 [5:3] (\"abc\" & 'abc')", 11,
{ F(0.8),
T(LeftBracket),
I(5),
T(Colon),
I(3),
T(RightBracket),
T(LeftParen),
S("abc"),
T(Ampersand),
S("abc"),
T(RightParen)
}
},
{ "1.2 ('ABCD')", 4,
{ F(1.2),
T(LeftParen),
S("ABCD"),
T(RightParen)
}
},
{ ".2 ('ABCD')", 4,
{ F(0.2),
T(LeftParen),
S("ABCD"),
T(RightParen)
}
},
{ "0. ('ABCD')", 4,
{ F(0.0),
T(LeftParen),
S("ABCD"),
T(RightParen)
}
},
{ "-1 ('ABCD')", 4,
{ I(-1),
T(LeftParen),
S("ABCD"),
T(RightParen)
}
},
{ "+1 ('ABCD')", 4,
{ I(1),
T(LeftParen),
S("ABCD"),
T(RightParen)
}
}
// e notation is no longer supported, due to the fact that it's
// not useful in the context of a priority...
// { "1E25 ('ABCD')", NULL },
// { "1e25 ('ABCD')", NULL }
};
// Undefine our nasty macros
@ -129,13 +549,27 @@ ParserTest::ScannerTest() {
const int testCaseCount = sizeof(testCases) / sizeof(test_case);
for (int i = 0; i < testCaseCount; i++) {
// cout << testCases[i].rule << endl;
NextSubTest();
TokenStream stream(testCases[i].rule);
TokenStream stream;
try {
stream.SetTo(testCases[i].rule);
} catch (Err *e) {
CppUnit::Exception *err = new CppUnit::Exception(e->Msg());
delete e;
throw *err;
}
CHK(stream.InitCheck() == B_OK);
for (int j = 0; j < testCases[i].tokenCount; j++) {
Token *token = stream.Get();
CHK(token);
// cout << tokenTypeToString(token->Type()) << endl;
/*
if (token->Type() == CharacterString)
cout << " token1 == " << token->String() << endl;
if (testCases[i].tokens[j]->Type() == CharacterString)
cout << " token2 == " << (testCases[i].tokens[j])->String() << endl;
*/
CHK(*token == *(testCases[i].tokens[j]));
/*
switch (token->Type()) {
@ -153,8 +587,8 @@ ParserTest::ScannerTest() {
delete testCases[i].tokens[j];
}
CHK(stream.IsEmpty());
// cout << endl;
}
//
#else // !TEST_R5
Outputf("(no tests actually performed for R5 version)\n");
#endif // !TEST_R5