+ Added some documentation
+ Fixed a bug with 0eXXX floats being handled improperly + Updated a few error messages to give proper character stream positions. git-svn-id: file:///srv/svn/repos/haiku/trunk/current@650 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
parent
0bc621d53b
commit
60ee71d302
@ -19,6 +19,7 @@
|
||||
|
||||
class BString;
|
||||
|
||||
//! MIME Sniffer related classes
|
||||
namespace Sniffer {
|
||||
|
||||
class Rule;
|
||||
@ -36,6 +37,10 @@ status_t parse(const char *rule, Rule *result, BString *parseError = NULL);
|
||||
// Classes used internally by the parser
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
//! Manages a stream of characters
|
||||
/*! CharStream is used by the scanner portion of the parser, which is implemented
|
||||
in TokenStream::SetTo().
|
||||
*/
|
||||
class CharStream {
|
||||
public:
|
||||
CharStream(const char *string = NULL);
|
||||
@ -61,6 +66,7 @@ private:
|
||||
CharStream& operator=(const CharStream &ref);
|
||||
};
|
||||
|
||||
//! Types of tokens
|
||||
typedef enum TokenType {
|
||||
EmptyToken,
|
||||
LeftParen,
|
||||
@ -75,8 +81,17 @@ typedef enum TokenType {
|
||||
FloatingPoint
|
||||
};
|
||||
|
||||
/*! \brief Returns a NULL-terminated string contating the
|
||||
name of the given token type
|
||||
*/
|
||||
const char* tokenTypeToString(TokenType type);
|
||||
|
||||
//! Base token class returned by TokenStream
|
||||
/*! Each token represents a single chunk of relevant information
|
||||
in a given rule. For example, the floating point number "1.2e-35",
|
||||
originally represented as a 7-character string, is added to the
|
||||
token stream as a single FloatToken object.
|
||||
*/
|
||||
class Token {
|
||||
public:
|
||||
Token(TokenType type = EmptyToken, const ssize_t pos = -1);
|
||||
@ -92,6 +107,11 @@ protected:
|
||||
ssize_t fPos;
|
||||
};
|
||||
|
||||
//! String token class
|
||||
/*! Single-quoted strings, double-quoted strings, unquoted strings, and
|
||||
hex literals are all converted to StringToken objects by the scanner
|
||||
and from then on treated uniformly.
|
||||
*/
|
||||
class StringToken : public Token {
|
||||
public:
|
||||
StringToken(const char *string, const ssize_t pos);
|
||||
@ -101,6 +121,11 @@ protected:
|
||||
char *fString;
|
||||
};
|
||||
|
||||
//! Integer token class
|
||||
/*! Signed or unsigned integer literals are coverted to IntToken objects,
|
||||
which may then be treated as either ints or floats (since a priority
|
||||
of "1" would be valid, but scanned as an int instead of a float).
|
||||
*/
|
||||
class IntToken : public Token {
|
||||
public:
|
||||
IntToken(const int32 value, const ssize_t pos);
|
||||
@ -110,6 +135,10 @@ protected:
|
||||
int32 fValue;
|
||||
};
|
||||
|
||||
//! Floating point token class
|
||||
/*! Signed or unsigned, extended or non-extended notation floating point
|
||||
numbers are converted to FloatToken objects by the scanner.
|
||||
*/
|
||||
class FloatToken : public Token {
|
||||
public:
|
||||
FloatToken(const double value, const ssize_t pos);
|
||||
@ -118,6 +147,14 @@ protected:
|
||||
double fValue;
|
||||
};
|
||||
|
||||
//! Manages a stream of Token objects
|
||||
/*! Provides Get() and Unget() operations, some handy shortcut operations (Read()
|
||||
and CondRead()), and handles memory management with respect to all the
|
||||
Token objects in the stream (i.e. never delete a Token object returned by Get()).
|
||||
|
||||
Also, the scanner portion of the parser is implemented in the TokenStream's
|
||||
SetTo() function.
|
||||
*/
|
||||
class TokenStream {
|
||||
public:
|
||||
TokenStream(const char *string = NULL);
|
||||
@ -154,6 +191,7 @@ private:
|
||||
TokenStream& operator=(const TokenStream &ref);
|
||||
};
|
||||
|
||||
//! Handles parsing a sniffer rule, yielding either a parsed rule or a descriptive error message.
|
||||
class Parser {
|
||||
public:
|
||||
Parser();
|
||||
|
@ -37,6 +37,24 @@ bool isOctalChar(char ch);
|
||||
bool isDecimalChar(char ch);
|
||||
bool isPunctuation(char ch);
|
||||
|
||||
//! Parses the given rule.
|
||||
/*! The resulting parsed Sniffer::Rule structure is stored in \c rule, which
|
||||
must be pre-allocated. If parsing fails, a descriptive error message (meant
|
||||
to be viewed in a monospaced font) is placed in the pre-allocated \c BString
|
||||
pointed to by \c parseError (which may be \c NULL if you don't care about
|
||||
the error message).
|
||||
|
||||
\param rule Pointer to a NULL-terminated string containing the sniffer
|
||||
rule to be parsed
|
||||
\param result Pointer to a pre-allocated \c Sniffer::Rule object into which the result
|
||||
of parsing is placed upon success.
|
||||
\param parseError Point to pre-allocated \c BString object into which
|
||||
a descriptive error message is stored upon failure.
|
||||
|
||||
\return
|
||||
- B_OK: Success
|
||||
- B_BAD_MIME_SNIFFER_RULE: Failure
|
||||
*/
|
||||
status_t
|
||||
Sniffer::parse(const char *rule, Rule *result, BString *parseError) {
|
||||
Parser parser;
|
||||
@ -442,7 +460,7 @@ int q = 0;
|
||||
break;
|
||||
case 0x3:
|
||||
if (stream.IsEmpty())
|
||||
throw new Err(std::string("Sniffer pattern error: unterminated single-quoted string"), pos);
|
||||
throw new Err(std::string("Sniffer pattern error: unterminated double-quoted string"), pos);
|
||||
else
|
||||
charStr += ch;
|
||||
break;
|
||||
@ -462,6 +480,9 @@ int q = 0;
|
||||
} else if (ch == '.') {
|
||||
charStr += ch;
|
||||
state = tsssFloat;
|
||||
} else if (ch == 'e' || ch == 'E') {
|
||||
charStr += ch;
|
||||
state = tsssLonelyFloatExtension;
|
||||
} else {
|
||||
// Terminate the number
|
||||
AddInt(charStr.c_str(), startPos);
|
||||
@ -482,7 +503,13 @@ int q = 0;
|
||||
|
||||
case tsssOneHex:
|
||||
if (isHexChar(ch)) {
|
||||
charStr += hexToChar(lastChar, ch);
|
||||
try {
|
||||
charStr += hexToChar(lastChar, ch);
|
||||
} catch (Err *err) {
|
||||
if (err)
|
||||
err->SetPos(pos);
|
||||
throw err;
|
||||
}
|
||||
state = tsssTwoHex;
|
||||
} else
|
||||
throw new Err(std::string("Sniffer pattern error: bad hex literal"), pos); // Same as R5
|
||||
@ -608,6 +635,8 @@ int q = 0;
|
||||
if (isOctalChar(ch)) {
|
||||
lastChar = ch;
|
||||
state = tsssEscapeOneOctal;
|
||||
} else if (ch == 'x') {
|
||||
state = tsssEscapeX;
|
||||
} else {
|
||||
// Check for a true end-of-text marker
|
||||
if (ch == 0x3 && stream.IsEmpty())
|
||||
@ -624,7 +653,7 @@ int q = 0;
|
||||
lastChar = ch;
|
||||
state = tsssEscapeOneHex;
|
||||
} else
|
||||
throw new Err(std::string("Sniffer pattern error: incomplete hex code"), pos);
|
||||
throw new Err(std::string("Sniffer pattern error: incomplete escaped hex code"), pos);
|
||||
break;
|
||||
|
||||
case tsssEscapeOneOctal:
|
||||
@ -634,7 +663,13 @@ int q = 0;
|
||||
state = tsssEscapeTwoOctal;
|
||||
} else {
|
||||
// First handle the octal
|
||||
charStr += octalToChar(lastChar);
|
||||
try {
|
||||
charStr += octalToChar(lastChar);
|
||||
} catch (Err *err) {
|
||||
if (err)
|
||||
err->SetPos(startPos);
|
||||
throw err;
|
||||
}
|
||||
|
||||
// Push the new char back on and let the state we
|
||||
// were in when the escape sequence was hit handle it.
|
||||
@ -645,11 +680,23 @@ int q = 0;
|
||||
|
||||
case tsssEscapeTwoOctal:
|
||||
if (isOctalChar(ch)) {
|
||||
charStr += octalToChar(lastLastChar, lastChar, ch);
|
||||
try {
|
||||
charStr += octalToChar(lastLastChar, lastChar, ch);
|
||||
} catch (Err *err) {
|
||||
if (err)
|
||||
err->SetPos(startPos);
|
||||
throw err;
|
||||
}
|
||||
state = escapedState;
|
||||
} else {
|
||||
// First handle the octal
|
||||
charStr += octalToChar(lastLastChar, lastChar);
|
||||
try {
|
||||
charStr += octalToChar(lastLastChar, lastChar);
|
||||
} catch (Err *err) {
|
||||
if (err)
|
||||
err->SetPos(startPos);
|
||||
throw err;
|
||||
}
|
||||
|
||||
// Push the new char back on and let the state we
|
||||
// were in when the escape sequence was hit handle it.
|
||||
@ -660,7 +707,13 @@ int q = 0;
|
||||
|
||||
case tsssEscapeOneHex:
|
||||
if (isHexChar(ch)) {
|
||||
charStr += hexToChar(lastChar, ch);
|
||||
try {
|
||||
charStr += hexToChar(lastChar, ch);
|
||||
} catch (Err *err) {
|
||||
if (err)
|
||||
err->SetPos(pos);
|
||||
throw err;
|
||||
}
|
||||
state = escapedState;
|
||||
} else
|
||||
throw new Err(std::string("Sniffer pattern error: incomplete escaped hex code"), pos);
|
||||
@ -694,6 +747,10 @@ TokenStream::InitCheck() const {
|
||||
return fCStatus;
|
||||
}
|
||||
|
||||
//! Returns a pointer to the next token in the stream.
|
||||
/*! The TokenStream object retains owner ship of the Token object returned by Get().
|
||||
If Get() is called at the end of the stream, a pointer to a Sniffer::Err object is thrown.
|
||||
*/
|
||||
const Token*
|
||||
TokenStream::Get() {
|
||||
if (fCStatus != B_OK)
|
||||
@ -707,6 +764,9 @@ TokenStream::Get() {
|
||||
}
|
||||
}
|
||||
|
||||
//! Places token returned by the most recent call to Get() back on the head of the stream.
|
||||
/*! If Unget() is called at the beginning of the stream, a pointer to a Sniffer::Err object is thrown.
|
||||
*/
|
||||
void
|
||||
TokenStream::Unget() {
|
||||
if (fCStatus != B_OK)
|
||||
@ -717,6 +777,10 @@ TokenStream::Unget() {
|
||||
throw new Err("Sniffer parser error: TokenStream::Unget() called at beginning of token stream", -1);
|
||||
}
|
||||
|
||||
|
||||
/*! \brief Reads the next token in the stream and verifies it is of the given type,
|
||||
throwing a pointer to a Sniffer::Err object if it is not.
|
||||
*/
|
||||
void
|
||||
TokenStream::Read(TokenType type) {
|
||||
const Token *t = Get();
|
||||
@ -726,6 +790,11 @@ TokenStream::Read(TokenType type) {
|
||||
}
|
||||
}
|
||||
|
||||
//! Conditionally reads the next token in the stream.
|
||||
/*! CondRead() peeks at the next token in the stream. If it is of the given type, the
|
||||
token is removed from the stream and \c true is returned. If it is not of the
|
||||
given type, false is returned and the token remains at the head of the stream.
|
||||
*/
|
||||
bool
|
||||
TokenStream::CondRead(TokenType type) {
|
||||
const Token *t = Get();
|
||||
@ -843,6 +912,7 @@ octalToChar(char hi, char low) {
|
||||
char
|
||||
octalToChar(char hi, char mid, char low) {
|
||||
if (isOctalChar(hi) && isOctalChar(mid) && isOctalChar(low)) {
|
||||
// Check for octals >= decimal 256
|
||||
if ((hi-'0') <= 3)
|
||||
return ((hi-'0') << 6) | ((mid-'0') << 3) | (low-'0');
|
||||
else
|
||||
@ -970,7 +1040,7 @@ Parser::ErrorMessage(Err *err, const char *rule) {
|
||||
? err->Msg()
|
||||
: "Sniffer parser error: Unexpected error with no supplied error message";
|
||||
size_t pos = err && (err->Pos() >= 0) ? err->Pos() : 0;
|
||||
std::string str = std::string(rule) + "\n";
|
||||
std::string str = std::string(rule ? rule : "") + "\n";
|
||||
for (int i = 0; i < pos; i++)
|
||||
str += " ";
|
||||
str += "^ ";
|
||||
@ -999,7 +1069,7 @@ Parser::ParsePriority() {
|
||||
if (0.0 <= result && result <= 1.0)
|
||||
return result;
|
||||
else {
|
||||
cout << "(priority == " << result << ")" << endl;
|
||||
// cout << "(priority == " << result << ")" << endl;
|
||||
throw new Err("Sniffer pattern error: invalid priority", t->Pos());
|
||||
}
|
||||
} else
|
||||
|
Loading…
Reference in New Issue
Block a user