Debugger: Improvements to C-style expression tokenizer.

- Add recognition of several additional types of tokens that, while
  not necessary for single line expressions, will be needed  for
  parsing/highlighting source files. Also rename tokens for open/close
  paren to reflect their content more accurately, and adjust callers
  accordingly.
This commit is contained in:
Rene Gollent 2014-11-28 18:14:34 -05:00
parent 4671959310
commit 15758369f2
3 changed files with 112 additions and 26 deletions

View File

@ -69,11 +69,11 @@ static BString TokenTypeToString(int32 type)
token = "**";
break;
case TOKEN_OPENING_BRACKET:
case TOKEN_OPENING_PAREN:
token = "(";
break;
case TOKEN_CLOSING_BRACKET:
case TOKEN_CLOSING_PAREN:
token = ")";
break;
@ -1734,11 +1734,11 @@ CLanguageExpressionEvaluator::_ParseAtom()
else {
fTokenizer->RewindToken();
_EatToken(TOKEN_OPENING_BRACKET);
_EatToken(TOKEN_OPENING_PAREN);
value = _ParseSum();
_EatToken(TOKEN_CLOSING_BRACKET);
_EatToken(TOKEN_CLOSING_PAREN);
}
return value;
@ -1765,8 +1765,8 @@ CLanguageExpressionEvaluator::_EatToken(int32 type)
case TOKEN_STAR:
case TOKEN_MODULO:
case TOKEN_POWER:
case TOKEN_OPENING_BRACKET:
case TOKEN_CLOSING_BRACKET:
case TOKEN_OPENING_PAREN:
case TOKEN_CLOSING_PAREN:
case TOKEN_LOGICAL_AND:
case TOKEN_BITWISE_AND:
case TOKEN_LOGICAL_OR:

View File

@ -108,7 +108,7 @@ Tokenizer::NextToken()
TOKEN_END_OF_LINE);
}
bool decimal = *fCurrentChar == '.' || *fCurrentChar == ',';
bool decimal = *fCurrentChar == '.';
if (decimal || isdigit(*fCurrentChar)) {
if (*fCurrentChar == '0' && fCurrentChar[1] == 'x')
@ -124,14 +124,14 @@ Tokenizer::NextToken()
fCurrentChar++;
}
// optional post comma part
// (required if there are no digits before the comma)
if (*fCurrentChar == '.' || *fCurrentChar == ',') {
// optional post decimal part
// (required if there are no digits before the decimal)
if (*fCurrentChar == '.') {
decimal = true;
temp << '.';
fCurrentChar++;
// optional post comma digits
// optional post decimal digits
while (isdigit(*fCurrentChar)) {
temp << *fCurrentChar;
fCurrentChar++;
@ -162,15 +162,27 @@ Tokenizer::NextToken()
fCurrentToken.value.SetTo(value);
else
fCurrentToken.value.SetTo((int64)strtoll(temp.String(), NULL, 10));
} else if (isalpha(*fCurrentChar)) {
} else if (isalpha(*fCurrentChar) || *fCurrentChar == '_') {
const char* begin = fCurrentChar;
while (*fCurrentChar != 0 && (isalpha(*fCurrentChar)
|| isdigit(*fCurrentChar))) {
|| isdigit(*fCurrentChar) || *fCurrentChar == '_')) {
fCurrentChar++;
}
int32 length = fCurrentChar - begin;
fCurrentToken = Token(begin, length, _CurrentPos() - length,
TOKEN_IDENTIFIER);
} else if (*fCurrentChar == '"' || *fCurrentChar == '\'') {
const char* begin = fCurrentChar++;
while (*fCurrentChar != 0) {
if (*fCurrentChar == '\\') {
if (*(fCurrentChar++) != 0)
fCurrentChar++;
} else if (*(fCurrentChar++) == *begin)
break;
}
int32 length = fCurrentChar - begin;
fCurrentToken = Token(begin, length, _CurrentPos() - length,
TOKEN_STRING_LITERAL);
} else {
if (!_ParseOperator()) {
int32 type = TOKEN_NONE;
@ -180,15 +192,48 @@ Tokenizer::NextToken()
break;
case '(':
type = TOKEN_OPENING_BRACKET;
type = TOKEN_OPENING_PAREN;
break;
case ')':
type = TOKEN_CLOSING_BRACKET;
type = TOKEN_CLOSING_PAREN;
break;
case '[':
type = TOKEN_OPENING_SQUARE_BRACKET;
break;
case ']':
type = TOKEN_CLOSING_SQUARE_BRACKET;
break;
case '{':
type = TOKEN_OPENING_CURLY_BRACE;
break;
case '}':
type = TOKEN_CLOSING_CURLY_BRACE;
break;
case '\\':
type = TOKEN_BACKSLASH;
break;
case ':':
type = TOKEN_SLASH;
type = TOKEN_COLON;
break;
case ';':
type = TOKEN_SEMICOLON;
break;
case ',':
type = TOKEN_COMMA;
break;
case '.':
type = TOKEN_PERIOD;
break;
case '#':
type = TOKEN_POUND;
break;
default:
@ -227,18 +272,37 @@ Tokenizer::_ParseOperator()
break;
case '*':
if (_Peek() == '*') {
type = TOKEN_POWER;
length = 2;
} else {
type = TOKEN_STAR;
length = 1;
switch (_Peek()) {
case '*':
type = TOKEN_POWER;
length = 2;
break;
case '/':
type = TOKEN_END_COMMENT_BLOCK;
length = 2;
break;
default:
type = TOKEN_STAR;
length = 1;
break;
}
break;
case '/':
type = TOKEN_SLASH;
length = 1;
switch (_Peek()) {
case '*':
type = TOKEN_BEGIN_COMMENT_BLOCK;
length = 2;
break;
case '/':
type = TOKEN_INLINE_COMMENT;
length = 2;
break;
default:
type = TOKEN_SLASH;
length = 1;
break;
}
break;
case '%':
@ -285,6 +349,9 @@ Tokenizer::_ParseOperator()
if (_Peek() == '=') {
type = TOKEN_EQ;
length = 2;
} else {
type = TOKEN_ASSIGN;
length = 1;
}
break;

View File

@ -35,9 +35,16 @@ enum {
TOKEN_POWER,
TOKEN_OPENING_BRACKET,
TOKEN_CLOSING_BRACKET,
TOKEN_OPENING_PAREN,
TOKEN_CLOSING_PAREN,
TOKEN_OPENING_SQUARE_BRACKET,
TOKEN_CLOSING_SQUARE_BRACKET,
TOKEN_OPENING_CURLY_BRACE,
TOKEN_CLOSING_CURLY_BRACE,
TOKEN_ASSIGN,
TOKEN_LOGICAL_AND,
TOKEN_LOGICAL_OR,
TOKEN_LOGICAL_NOT,
@ -52,6 +59,18 @@ enum {
TOKEN_LT,
TOKEN_LE,
TOKEN_BACKSLASH,
TOKEN_COLON,
TOKEN_SEMICOLON,
TOKEN_COMMA,
TOKEN_PERIOD,
TOKEN_POUND,
TOKEN_STRING_LITERAL,
TOKEN_BEGIN_COMMENT_BLOCK,
TOKEN_END_COMMENT_BLOCK,
TOKEN_INLINE_COMMENT,
TOKEN_MEMBER_PTR
};