+ Added some documentation

+ Fixed a bug with 0eXXX floats being handled improperly
+ Updated a few error messages to give proper character
  stream positions.


git-svn-id: file:///srv/svn/repos/haiku/trunk/current@650 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
Tyler Dauwalder 2002-08-08 07:23:24 +00:00
parent 0bc621d53b
commit 60ee71d302
2 changed files with 117 additions and 9 deletions

View File

@ -19,6 +19,7 @@
class BString;
//! MIME Sniffer related classes
namespace Sniffer {
class Rule;
@ -36,6 +37,10 @@ status_t parse(const char *rule, Rule *result, BString *parseError = NULL);
// Classes used internally by the parser
//------------------------------------------------------------------------------
//! Manages a stream of characters
/*! CharStream is used by the scanner portion of the parser, which is implemented
in TokenStream::SetTo().
*/
class CharStream {
public:
CharStream(const char *string = NULL);
@ -61,6 +66,7 @@ private:
CharStream& operator=(const CharStream &ref);
};
//! Types of tokens
typedef enum TokenType {
EmptyToken,
LeftParen,
@ -75,8 +81,17 @@ typedef enum TokenType {
FloatingPoint
};
/*! \brief Returns a NULL-terminated string contating the
name of the given token type
*/
const char* tokenTypeToString(TokenType type);
//! Base token class returned by TokenStream
/*! Each token represents a single chunk of relevant information
in a given rule. For example, the floating point number "1.2e-35",
originally represented as a 7-character string, is added to the
token stream as a single FloatToken object.
*/
class Token {
public:
Token(TokenType type = EmptyToken, const ssize_t pos = -1);
@ -92,6 +107,11 @@ protected:
ssize_t fPos;
};
//! String token class
/*! Single-quoted strings, double-quoted strings, unquoted strings, and
hex literals are all converted to StringToken objects by the scanner
and from then on treated uniformly.
*/
class StringToken : public Token {
public:
StringToken(const char *string, const ssize_t pos);
@ -101,6 +121,11 @@ protected:
char *fString;
};
//! Integer token class
/*! Signed or unsigned integer literals are coverted to IntToken objects,
which may then be treated as either ints or floats (since a priority
of "1" would be valid, but scanned as an int instead of a float).
*/
class IntToken : public Token {
public:
IntToken(const int32 value, const ssize_t pos);
@ -110,6 +135,10 @@ protected:
int32 fValue;
};
//! Floating point token class
/*! Signed or unsigned, extended or non-extended notation floating point
numbers are converted to FloatToken objects by the scanner.
*/
class FloatToken : public Token {
public:
FloatToken(const double value, const ssize_t pos);
@ -118,6 +147,14 @@ protected:
double fValue;
};
//! Manages a stream of Token objects
/*! Provides Get() and Unget() operations, some handy shortcut operations (Read()
and CondRead()), and handles memory management with respect to all the
Token objects in the stream (i.e. never delete a Token object returned by Get()).
Also, the scanner portion of the parser is implemented in the TokenStream's
SetTo() function.
*/
class TokenStream {
public:
TokenStream(const char *string = NULL);
@ -154,6 +191,7 @@ private:
TokenStream& operator=(const TokenStream &ref);
};
//! Handles parsing a sniffer rule, yielding either a parsed rule or a descriptive error message.
class Parser {
public:
Parser();

View File

@ -37,6 +37,24 @@ bool isOctalChar(char ch);
bool isDecimalChar(char ch);
bool isPunctuation(char ch);
//! Parses the given rule.
/*! The resulting parsed Sniffer::Rule structure is stored in \c rule, which
must be pre-allocated. If parsing fails, a descriptive error message (meant
to be viewed in a monospaced font) is placed in the pre-allocated \c BString
pointed to by \c parseError (which may be \c NULL if you don't care about
the error message).
\param rule Pointer to a NULL-terminated string containing the sniffer
rule to be parsed
\param result Pointer to a pre-allocated \c Sniffer::Rule object into which the result
of parsing is placed upon success.
\param parseError Point to pre-allocated \c BString object into which
a descriptive error message is stored upon failure.
\return
- B_OK: Success
- B_BAD_MIME_SNIFFER_RULE: Failure
*/
status_t
Sniffer::parse(const char *rule, Rule *result, BString *parseError) {
Parser parser;
@ -442,7 +460,7 @@ int q = 0;
break;
case 0x3:
if (stream.IsEmpty())
throw new Err(std::string("Sniffer pattern error: unterminated single-quoted string"), pos);
throw new Err(std::string("Sniffer pattern error: unterminated double-quoted string"), pos);
else
charStr += ch;
break;
@ -462,6 +480,9 @@ int q = 0;
} else if (ch == '.') {
charStr += ch;
state = tsssFloat;
} else if (ch == 'e' || ch == 'E') {
charStr += ch;
state = tsssLonelyFloatExtension;
} else {
// Terminate the number
AddInt(charStr.c_str(), startPos);
@ -482,7 +503,13 @@ int q = 0;
case tsssOneHex:
if (isHexChar(ch)) {
charStr += hexToChar(lastChar, ch);
try {
charStr += hexToChar(lastChar, ch);
} catch (Err *err) {
if (err)
err->SetPos(pos);
throw err;
}
state = tsssTwoHex;
} else
throw new Err(std::string("Sniffer pattern error: bad hex literal"), pos); // Same as R5
@ -608,6 +635,8 @@ int q = 0;
if (isOctalChar(ch)) {
lastChar = ch;
state = tsssEscapeOneOctal;
} else if (ch == 'x') {
state = tsssEscapeX;
} else {
// Check for a true end-of-text marker
if (ch == 0x3 && stream.IsEmpty())
@ -624,7 +653,7 @@ int q = 0;
lastChar = ch;
state = tsssEscapeOneHex;
} else
throw new Err(std::string("Sniffer pattern error: incomplete hex code"), pos);
throw new Err(std::string("Sniffer pattern error: incomplete escaped hex code"), pos);
break;
case tsssEscapeOneOctal:
@ -634,7 +663,13 @@ int q = 0;
state = tsssEscapeTwoOctal;
} else {
// First handle the octal
charStr += octalToChar(lastChar);
try {
charStr += octalToChar(lastChar);
} catch (Err *err) {
if (err)
err->SetPos(startPos);
throw err;
}
// Push the new char back on and let the state we
// were in when the escape sequence was hit handle it.
@ -645,11 +680,23 @@ int q = 0;
case tsssEscapeTwoOctal:
if (isOctalChar(ch)) {
charStr += octalToChar(lastLastChar, lastChar, ch);
try {
charStr += octalToChar(lastLastChar, lastChar, ch);
} catch (Err *err) {
if (err)
err->SetPos(startPos);
throw err;
}
state = escapedState;
} else {
// First handle the octal
charStr += octalToChar(lastLastChar, lastChar);
try {
charStr += octalToChar(lastLastChar, lastChar);
} catch (Err *err) {
if (err)
err->SetPos(startPos);
throw err;
}
// Push the new char back on and let the state we
// were in when the escape sequence was hit handle it.
@ -660,7 +707,13 @@ int q = 0;
case tsssEscapeOneHex:
if (isHexChar(ch)) {
charStr += hexToChar(lastChar, ch);
try {
charStr += hexToChar(lastChar, ch);
} catch (Err *err) {
if (err)
err->SetPos(pos);
throw err;
}
state = escapedState;
} else
throw new Err(std::string("Sniffer pattern error: incomplete escaped hex code"), pos);
@ -694,6 +747,10 @@ TokenStream::InitCheck() const {
return fCStatus;
}
//! Returns a pointer to the next token in the stream.
/*! The TokenStream object retains owner ship of the Token object returned by Get().
If Get() is called at the end of the stream, a pointer to a Sniffer::Err object is thrown.
*/
const Token*
TokenStream::Get() {
if (fCStatus != B_OK)
@ -707,6 +764,9 @@ TokenStream::Get() {
}
}
//! Places token returned by the most recent call to Get() back on the head of the stream.
/*! If Unget() is called at the beginning of the stream, a pointer to a Sniffer::Err object is thrown.
*/
void
TokenStream::Unget() {
if (fCStatus != B_OK)
@ -717,6 +777,10 @@ TokenStream::Unget() {
throw new Err("Sniffer parser error: TokenStream::Unget() called at beginning of token stream", -1);
}
/*! \brief Reads the next token in the stream and verifies it is of the given type,
throwing a pointer to a Sniffer::Err object if it is not.
*/
void
TokenStream::Read(TokenType type) {
const Token *t = Get();
@ -726,6 +790,11 @@ TokenStream::Read(TokenType type) {
}
}
//! Conditionally reads the next token in the stream.
/*! CondRead() peeks at the next token in the stream. If it is of the given type, the
token is removed from the stream and \c true is returned. If it is not of the
given type, false is returned and the token remains at the head of the stream.
*/
bool
TokenStream::CondRead(TokenType type) {
const Token *t = Get();
@ -843,6 +912,7 @@ octalToChar(char hi, char low) {
char
octalToChar(char hi, char mid, char low) {
if (isOctalChar(hi) && isOctalChar(mid) && isOctalChar(low)) {
// Check for octals >= decimal 256
if ((hi-'0') <= 3)
return ((hi-'0') << 6) | ((mid-'0') << 3) | (low-'0');
else
@ -970,7 +1040,7 @@ Parser::ErrorMessage(Err *err, const char *rule) {
? err->Msg()
: "Sniffer parser error: Unexpected error with no supplied error message";
size_t pos = err && (err->Pos() >= 0) ? err->Pos() : 0;
std::string str = std::string(rule) + "\n";
std::string str = std::string(rule ? rule : "") + "\n";
for (int i = 0; i < pos; i++)
str += " ";
str += "^ ";
@ -999,7 +1069,7 @@ Parser::ParsePriority() {
if (0.0 <= result && result <= 1.0)
return result;
else {
cout << "(priority == " << result << ")" << endl;
// cout << "(priority == " << result << ")" << endl;
throw new Err("Sniffer pattern error: invalid priority", t->Pos());
}
} else