Debugger: Implement C++ syntax highlighter.

- Implement a basic highlighting engine for C/C++. Not yet used/tested,
  as SourceView will first need some adjustments in order to acquire
  and make use of the highlighter modules in the first place.
This commit is contained in:
Rene Gollent 2014-11-26 22:35:32 -05:00
parent 15758369f2
commit 921517ecd3
3 changed files with 376 additions and 6 deletions

View File

@ -6,14 +6,171 @@
#include "CLanguageFamilySyntaxHighlightInfo.h"
#include <AutoDeleter.h>
#include "CLanguageTokenizer.h"
#include "LineDataSource.h"
using namespace CLanguage;
static const char* kLanguageKeywords[] = {
"NULL",
"asm",
"auto",
"bool",
"break",
"case",
"catch",
"char",
"class",
"const",
"const_cast",
"constexpr",
"continue",
"default",
"delete",
"do",
"double",
"dynamic_cast",
"enum",
"explicit",
"extern",
"false",
"float",
"for",
"goto",
"inline",
"int",
"long",
"mutable",
"namespace",
"new",
"operator",
"private",
"protected",
"public",
"register",
"reinterpret_cast",
"return",
"short",
"signed",
"sizeof",
"static",
"static_cast",
"struct",
"switch",
"template",
"this",
"throw",
"true",
"try",
"typedef",
"typeid",
"typename",
"union",
"unsigned",
"using",
"virtual",
"void",
"volatile"
"while"
};
static bool IsLanguageKeyword(const Token& token)
{
int lower = 0;
int upper = (sizeof(kLanguageKeywords)/sizeof(char*)) - 1;
while (lower < upper) {
int mid = (lower + upper + 1) / 2;
int cmp = token.string.Compare(kLanguageKeywords[mid]);
if (cmp == 0)
return true;
else if (cmp < 0)
upper = mid - 1;
else
lower = mid;
}
return token.string.Compare(kLanguageKeywords[lower]) == 0;
}
// #pragma mark - CLanguageFamilySyntaxHighlightInfo::SyntaxPair
struct CLanguageFamilySyntaxHighlightInfo::SyntaxPair {
int32 column;
syntax_highlight_type type;
SyntaxPair(int32 column, syntax_highlight_type type)
:
column(column),
type(type)
{
}
};
// #pragma mark - CLanguageFamilySyntaxHighlightInfo::LineInfo
class CLanguageFamilySyntaxHighlightInfo::LineInfo {
public:
LineInfo(int32 line)
:
fLine(line),
fPairs(5, true)
{
}
inline int32 CountPairs() const
{
return fPairs.CountItems();
}
SyntaxPair* PairAt(int32 index) const
{
return fPairs.ItemAt(index);
}
bool AddPair(int32 column, syntax_highlight_type type)
{
SyntaxPair* pair = new(std::nothrow) SyntaxPair(column, type);
if (pair == NULL)
return false;
ObjectDeleter<SyntaxPair> pairDeleter(pair);
if (!fPairs.AddItem(pair))
return false;
pairDeleter.Detach();
return true;
}
private:
typedef BObjectList<SyntaxPair> SyntaxPairList;
private:
int32 fLine;
SyntaxPairList fPairs;
};
// #pragma mark - CLanguageFamilySyntaxHighlightInfo;
CLanguageFamilySyntaxHighlightInfo::CLanguageFamilySyntaxHighlightInfo(
LineDataSource* source)
LineDataSource* source, Tokenizer* tokenizer)
:
SyntaxHighlightInfo(),
fHighlightSource(source)
fHighlightSource(source),
fTokenizer(tokenizer),
fLineInfos(10, true)
{
fHighlightSource->AcquireReference();
}
@ -22,6 +179,7 @@ CLanguageFamilySyntaxHighlightInfo::CLanguageFamilySyntaxHighlightInfo(
CLanguageFamilySyntaxHighlightInfo::~CLanguageFamilySyntaxHighlightInfo()
{
fHighlightSource->ReleaseReference();
delete fTokenizer;
}
@ -29,6 +187,182 @@ int32
CLanguageFamilySyntaxHighlightInfo::GetLineHighlightRanges(int32 line,
int32* _columns, syntax_highlight_type* _types, int32 maxCount)
{
// TODO: implement
return 0;
if (line >= fHighlightSource->CountLines())
return 0;
// lazily parse the source's highlight information the first time
// it's actually requested. Subsequently it's cached for quick retrieval.
if (fLineInfos.CountItems() == 0) {
if (_ParseLines() != B_OK)
return 0;
}
LineInfo* info = fLineInfos.ItemAt(line);
if (info == NULL)
return 0;
int32 count = 0;
for (; count < info->CountPairs(); count++) {
if (count == maxCount - 1)
break;
SyntaxPair* pair = info->PairAt(count);
if (pair == NULL)
break;
_columns[count] = pair->column;
_types[count] = pair->type;
}
return count;
}
status_t
CLanguageFamilySyntaxHighlightInfo::_ParseLines()
{
syntax_highlight_type type = SYNTAX_HIGHLIGHT_NONE;
for (int32 i = 0; i < fHighlightSource->CountLines(); i++) {
const char* line = fHighlightSource->LineAt(i);
fTokenizer->SetTo(line);
LineInfo* info = NULL;
status_t error = _ParseLine(i, type, info);
if (error != B_OK)
return error;
ObjectDeleter<LineInfo> infoDeleter(info);
if (!fLineInfos.AddItem(info))
return B_NO_MEMORY;
infoDeleter.Detach();
}
return B_OK;
}
status_t
CLanguageFamilySyntaxHighlightInfo::_ParseLine(int32 line,
syntax_highlight_type& _lastType, LineInfo*& _info)
{
bool inCommentBlock = (_lastType == SYNTAX_HIGHLIGHT_COMMENT);
bool inPreprocessor = false;
_info = new(std::nothrow) LineInfo(line);
if (_info == NULL)
return B_NO_MEMORY;
ObjectDeleter<LineInfo> infoDeleter(_info);
if (inCommentBlock) {
if (!_info->AddPair(0, SYNTAX_HIGHLIGHT_COMMENT))
return B_NO_MEMORY;
}
try {
for (;;) {
const Token& token = fTokenizer->NextToken();
if (token.type == TOKEN_END_OF_LINE)
break;
if (inCommentBlock) {
if (token.type == TOKEN_END_COMMENT_BLOCK)
inCommentBlock = false;
continue;
} else if (inPreprocessor) {
fTokenizer->NextToken();
inPreprocessor = false;
} else if (token.type == TOKEN_INLINE_COMMENT) {
if (!_info->AddPair(token.position, SYNTAX_HIGHLIGHT_COMMENT))
return B_NO_MEMORY;
break;
}
syntax_highlight_type current = _MapTokenToSyntaxType(token);
if (_lastType == current)
continue;
_lastType = current;
if (!_info->AddPair(token.position, current))
return B_NO_MEMORY;
if (token.type == TOKEN_BEGIN_COMMENT_BLOCK)
inCommentBlock = true;
else if (token.type == TOKEN_POUND)
inPreprocessor = true;
}
} catch (...) {
// if a parse exception was thrown, simply ignore it.
// in such a case, we can't guarantee correct highlight
// information anyhow, so simply return whatever we started
// with.
}
_lastType = inCommentBlock
? SYNTAX_HIGHLIGHT_COMMENT : SYNTAX_HIGHLIGHT_NONE;
infoDeleter.Detach();
return B_OK;
}
syntax_highlight_type
CLanguageFamilySyntaxHighlightInfo::_MapTokenToSyntaxType(const Token& token)
{
switch (token.type) {
case TOKEN_IDENTIFIER:
// TODO: recognize types
if (IsLanguageKeyword(token))
return SYNTAX_HIGHLIGHT_KEYWORD;
break;
case TOKEN_CONSTANT:
return SYNTAX_HIGHLIGHT_NUMERIC_LITERAL;
case TOKEN_END_OF_LINE:
break;
case TOKEN_PLUS:
case TOKEN_MINUS:
case TOKEN_STAR:
case TOKEN_SLASH:
case TOKEN_MODULO:
case TOKEN_POWER:
case TOKEN_OPENING_PAREN:
case TOKEN_CLOSING_PAREN:
case TOKEN_OPENING_SQUARE_BRACKET:
case TOKEN_CLOSING_SQUARE_BRACKET:
case TOKEN_OPENING_CURLY_BRACE:
case TOKEN_CLOSING_CURLY_BRACE:
case TOKEN_LOGICAL_AND:
case TOKEN_LOGICAL_OR:
case TOKEN_LOGICAL_NOT:
case TOKEN_BITWISE_AND:
case TOKEN_BITWISE_OR:
case TOKEN_BITWISE_NOT:
case TOKEN_BITWISE_XOR:
case TOKEN_EQ:
case TOKEN_NE:
case TOKEN_GT:
case TOKEN_GE:
case TOKEN_LT:
case TOKEN_LE:
case TOKEN_MEMBER_PTR:
case TOKEN_COLON:
case TOKEN_SEMICOLON:
case TOKEN_BACKSLASH:
return SYNTAX_HIGHLIGHT_OPERATOR;
case TOKEN_STRING_LITERAL:
return SYNTAX_HIGHLIGHT_STRING_LITERAL;
case TOKEN_POUND:
return SYNTAX_HIGHLIGHT_PREPROCESSOR_KEYWORD;
case TOKEN_BEGIN_COMMENT_BLOCK:
case TOKEN_END_COMMENT_BLOCK:
case TOKEN_INLINE_COMMENT:
return SYNTAX_HIGHLIGHT_COMMENT;
}
return SYNTAX_HIGHLIGHT_NONE;
}

View File

@ -8,11 +8,20 @@
#include "SyntaxHighlighter.h"
#include <ObjectList.h>
namespace CLanguage {
class Token;
class Tokenizer;
}
class CLanguageFamilySyntaxHighlightInfo : public SyntaxHighlightInfo {
public:
CLanguageFamilySyntaxHighlightInfo(
LineDataSource* source);
LineDataSource* source,
CLanguage::Tokenizer* tokenizer);
virtual ~CLanguageFamilySyntaxHighlightInfo();
virtual int32 GetLineHighlightRanges(int32 line,
@ -20,8 +29,22 @@ public:
syntax_highlight_type* _types,
int32 maxCount);
private:
class LineInfo;
typedef BObjectList<LineInfo> LineInfoList;
struct SyntaxPair;
private:
status_t _ParseLines();
status_t _ParseLine(int32 line,
syntax_highlight_type& _lastType,
LineInfo*& _info);
syntax_highlight_type _MapTokenToSyntaxType(
const CLanguage::Token& token);
private:
LineDataSource* fHighlightSource;
CLanguage::Tokenizer* fTokenizer;
LineInfoList fLineInfos;
};

View File

@ -8,7 +8,13 @@
#include <new>
#include <AutoDeleter.h>
#include "CLanguageFamilySyntaxHighlightInfo.h"
#include "CLanguageTokenizer.h"
using CLanguage::Tokenizer;
CLanguageFamilySyntaxHighlighter::CLanguageFamilySyntaxHighlighter()
@ -27,9 +33,16 @@ status_t
CLanguageFamilySyntaxHighlighter::ParseText(LineDataSource* source,
SyntaxHighlightInfo*& _info)
{
_info = new(std::nothrow) CLanguageFamilySyntaxHighlightInfo(source);
Tokenizer* tokenizer = new(std::nothrow) Tokenizer();
if (tokenizer == NULL)
return B_NO_MEMORY;
ObjectDeleter<Tokenizer> deleter(tokenizer);
_info = new(std::nothrow) CLanguageFamilySyntaxHighlightInfo(source,
tokenizer);
if (_info == NULL)
return B_NO_MEMORY;
deleter.Detach();
return B_OK;
}