From cec029dbee435e7f608b9f6a4a3d8e98f2c5a398 Mon Sep 17 00:00:00 2001 From: Greg Ercolano Date: Wed, 16 Sep 2020 09:33:24 -0700 Subject: [PATCH] Rewrite CodeEditor syntax highlighting for issue #135 --- fluid/CMakeLists.txt | 1 + fluid/CodeEditor.cxx | 245 +++++++++++++------------------------------ fluid/CodeEditor.h | 24 +++-- fluid/Makefile | 1 + fluid/StyleParse.cxx | 222 +++++++++++++++++++++++++++++++++++++++ fluid/StyleParse.h | 59 +++++++++++ 6 files changed, 371 insertions(+), 181 deletions(-) create mode 100644 fluid/StyleParse.cxx create mode 100644 fluid/StyleParse.h diff --git a/fluid/CMakeLists.txt b/fluid/CMakeLists.txt index cbe04353a..d429affe2 100644 --- a/fluid/CMakeLists.txt +++ b/fluid/CMakeLists.txt @@ -18,6 +18,7 @@ if (NOT ANDROID) set (CPPFILES CodeEditor.cxx + StyleParse.cxx Fl_Function_Type.cxx Fl_Group_Type.cxx Fl_Menu_Type.cxx diff --git a/fluid/CodeEditor.cxx b/fluid/CodeEditor.cxx index 4dd1cef96..e6e8d762a 100644 --- a/fluid/CodeEditor.cxx +++ b/fluid/CodeEditor.cxx @@ -1,7 +1,8 @@ // // Code editor widget for the Fast Light Tool Kit (FLTK). +// Syntax highlighting rewritten by erco@seriss.com 09/15/20. // -// Copyright 1998-2016 by Bill Spitzak and others. +// Copyright 1998-2020 by Bill Spitzak and others. // // This library is free software. Distribution and use rights are outlined in // the file "COPYING" which should have been included with this file. If this @@ -24,7 +25,6 @@ #include #include "CodeEditor.h" - Fl_Text_Display::Style_Table_Entry CodeEditor:: styletable[] = { // Style table { FL_FOREGROUND_COLOR, FL_COURIER, 11 }, // A - Plain @@ -35,6 +35,7 @@ Fl_Text_Display::Style_Table_Entry CodeEditor:: { FL_DARK_RED, FL_COURIER_BOLD, 11 }, // F - Types { FL_BLUE, FL_COURIER_BOLD, 11 } // G - Keywords }; + const char * const CodeEditor:: code_keywords[] = { // Sorted list of C/C++ keywords... "and", @@ -72,6 +73,7 @@ const char * const CodeEditor:: "xor", "xor_eq" }; + const char * const CodeEditor:: code_types[] = { // Sorted list of C/C++ types... "auto", @@ -130,132 +132,68 @@ extern "C" { } } -// 'style_parse()' - Parse text and produce style data. -void CodeEditor::style_parse(const char *text, char *style, int length) { - char current; - int col; - int last; - char buf[255], - *bufptr; - const char *temp; +// See if 'find' is a C/C++ keyword. +// Refer to bsearch(3) for return value. +// +void* CodeEditor::search_keywords(char *find) { + return bsearch(&find, code_keywords, + sizeof(code_keywords) / sizeof(code_keywords[0]), + sizeof(code_keywords[0]), compare_keywords); +} +// See if 'find' is a C/C++ type. +// Refer to bsearch(3) for return value. +// +void* CodeEditor::search_types(char *find) { + return bsearch(&find, code_types, + sizeof(code_types) / sizeof(code_types[0]), + sizeof(code_types[0]), compare_keywords); +} + +// 'style_parse()' - Parse text and produce style data. +void CodeEditor::style_parse(const char *in_tbuff, // text buffer to parse + char *in_sbuff, // style buffer we modify + int in_len, // byte length to parse + char in_style) { // starting style letter // Style letters: // - // A - Plain - // B - Line comments - // C - Block comments - // D - Strings - // E - Directives - // F - Types - // G - Keywords + // 'A' - Plain + // 'B' - Line comments // .. + // 'C' - Block comments /*..*/ + // 'D' - Strings "xxx" + // 'E' - Directives #define, #include.. + // 'F' - Types void, char.. + // 'G' - Keywords if, while.. - for (current = *style, col = 0, last = 0; length > 0; length --, text ++) { - if (current == 'B' || current == 'F' || current == 'G') current = 'A'; - if (current == 'A') { - // Check for directives, comments, strings, and keywords... - if (col == 0 && *text == '#') { - // Set style to directive - current = 'E'; - } else if (strncmp(text, "//", 2) == 0) { - current = 'B'; - for (; length > 0 && *text != '\n'; length --, text ++) *style++ = 'B'; + StyleParse sp; + sp.tbuff = in_tbuff; + sp.sbuff = in_sbuff; + sp.len = in_len; + sp.style = in_style; + sp.lwhite = 1; // 1:while parsing over leading white and first char past, 0:past white + sp.col = 0; + sp.last = 0; - if (length == 0) break; - } else if (strncmp(text, "/*", 2) == 0) { - current = 'C'; - } else if (strncmp(text, "\\\"", 2) == 0) { - // Quoted quote... - *style++ = current; - *style++ = current; - text ++; - length --; - col += 2; - continue; - } else if (*text == '\"') { - current = 'D'; - } else if (!last && (islower(*text) || *text == '_')) { - // Might be a keyword... - for (temp = text, bufptr = buf; - (islower(*temp) || *temp == '_') && bufptr < (buf + sizeof(buf) - 1); - *bufptr++ = *temp++) { - // nothing - } - - if (!islower(*temp) && *temp != '_') { - *bufptr = '\0'; - - bufptr = buf; - - if (bsearch(&bufptr, code_types, - sizeof(code_types) / sizeof(code_types[0]), - sizeof(code_types[0]), compare_keywords)) { - while (text < temp) { - *style++ = 'F'; - text ++; - length --; - col ++; - } - - text --; - length ++; - last = 1; - continue; - } else if (bsearch(&bufptr, code_keywords, - sizeof(code_keywords) / sizeof(code_keywords[0]), - sizeof(code_keywords[0]), compare_keywords)) { - while (text < temp) { - *style++ = 'G'; - text ++; - length --; - col ++; - } - - text --; - length ++; - last = 1; - continue; - } - } - } - } else if (current == 'C' && strncmp(text, "*/", 2) == 0) { - // Close a C comment... - *style++ = current; - *style++ = current; - text ++; - length --; - current = 'A'; - col += 2; - continue; - } else if (current == 'D') { - // Continuing in string... - if (strncmp(text, "\\\"", 2) == 0) { - // Quoted end quote... - *style++ = current; - *style++ = current; - text ++; - length --; - col += 2; - continue; - } else if (*text == '\"') { - // End quote... - *style++ = current; - col ++; - current = 'A'; - continue; - } - } - - // Copy style info... - if (current == 'A' && (*text == '{' || *text == '}')) *style++ = 'G'; - else *style++ = current; - col ++; - - last = isalnum(*text) || *text == '_' || *text == '.'; - - if (*text == '\n') { - // Reset column and possibly reset the style - col = 0; - if (current == 'B' || current == 'E') current = 'A'; + // Loop through the code, updating style buffer + char c; + while ( sp.len > 0 ) { + c = sp.tbuff[0]; // current char + if ( sp.style == 'C' ) { // Started in middle of comment block? + if ( !sp.parse_block_comment() ) break; + } else if ( strncmp(sp.tbuff, "/*", 2)==0 ) { // C style comment block? + if ( !sp.parse_block_comment() ) break; + } else if ( c == '\\' ) { // Backslash escape char? + if ( !sp.parse_escape() ) break; + } else if ( strncmp(sp.tbuff, "//", 2)==0 ) { // Line comment? + if ( !sp.parse_line_comment() ) break; + } else if ( c == '"' ) { // Start of quoted string? + if ( !sp.parse_quoted_string() ) break; + } else if ( c == '#' && sp.lwhite ) { // Start of '#' directive? + if ( !sp.parse_directive() ) break; + } else if ( !sp.last && (islower(c) || c == '_') ) { // Possible C/C++ keyword? + if ( !sp.parse_keyword() ) break; + } else { // All other chars? + if ( !sp.parse_all_else() ) break; } } } @@ -267,12 +205,9 @@ void CodeEditor::style_unfinished_cb(int, void*) { } void CodeEditor::style_update(int pos, int nInserted, int nDeleted, int /*nRestyled*/, const char * /*deletedText*/, void *cbArg) { - CodeEditor *editor = (CodeEditor *)cbArg; - int start, // Start of text - end; // End of text - char last, // Last style on line - *style, // Style data - *text; // Text data + CodeEditor *editor = (CodeEditor*)cbArg; + char *style, // Style data + *text; // Text data // If this is just a selection change, just unselect the style buffer... @@ -299,48 +234,18 @@ void CodeEditor::style_update(int pos, int nInserted, int nDeleted, // callbacks... editor->mStyleBuffer->select(pos, pos + nInserted - nDeleted); - // Re-parse the changed region; we do this by parsing from the - // beginning of the line of the changed region to the end of - // the line of the changed region... Then we check the last - // style character and keep updating if we have a multi-line - // comment character... - start = editor->mBuffer->line_start(pos); - // the following code checks the style of the last character of the previous - // line. If it is a block comment, the previous line is interpreted as well. - int altStart = editor->mBuffer->prev_char(start); - if (altStart>0) { - altStart = editor->mBuffer->prev_char(altStart); - if (altStart>=0 && editor->mStyleBuffer->byte_at(start-2)=='C') - start = editor->mBuffer->line_start(altStart); - } - end = editor->mBuffer->line_end(pos + nInserted); - text = editor->mBuffer->text_range(start, end); - style = editor->mStyleBuffer->text_range(start, end); - if (start==end) - last = 0; - else - last = style[end - start - 1]; + // Reparse whole buffer, don't get cute. Maybe optimize range later + int len = editor->buffer()->length(); + text = editor->mBuffer->text_range(0, len); + style = editor->mStyleBuffer->text_range(0, len); - style_parse(text, style, end - start); + //DEBUG printf("BEFORE:\n"); show_buffer(editor); printf("-- END BEFORE\n"); + style_parse(text, style, editor->mBuffer->length(), 'A'); + //DEBUG printf("AFTER:\n"); show_buffer(editor); printf("-- END AFTER\n"); - editor->mStyleBuffer->replace(start, end, style); - editor->redisplay_range(start, end); - - if (start==end || last != style[end - start - 1]) { - // The last character on the line changed styles, so reparse the - // remainder of the buffer... - free(text); - free(style); - - end = editor->mBuffer->length(); - text = editor->mBuffer->text_range(start, end); - style = editor->mStyleBuffer->text_range(start, end); - - style_parse(text, style, end - start); - - editor->mStyleBuffer->replace(start, end, style); - editor->redisplay_range(start, end); - } + editor->mStyleBuffer->replace(0, len, style); + editor->redisplay_range(0, len); + editor->redraw(); free(text); free(style); @@ -394,7 +299,7 @@ CodeEditor::CodeEditor(int X, int Y, int W, int H, const char *L) : sizeof(styletable) / sizeof(styletable[0]), 'A', style_unfinished_cb, this); - style_parse(text, style, mBuffer->length()); + style_parse(text, style, mBuffer->length(), 'A'); mStyleBuffer->text(style); delete[] style; diff --git a/fluid/CodeEditor.h b/fluid/CodeEditor.h index e865ac63e..617f61413 100644 --- a/fluid/CodeEditor.h +++ b/fluid/CodeEditor.h @@ -15,29 +15,30 @@ // #ifndef CodeEditor_h -# define CodeEditor_h +#define CodeEditor_h // // Include necessary headers... // -# include -# include -# include -# include -# include -# include -# include - +#include +#include +#include +#include +#include +#include +#include +#include "StyleParse.h" class CodeEditor : public Fl_Text_Editor { static Fl_Text_Display::Style_Table_Entry styletable[]; static const char * const code_keywords[]; static const char * const code_types[]; - + static void* search_types(char *find); + static void* search_keywords(char *find); // 'style_parse()' - Parse text and produce style data. - static void style_parse(const char *text, char *style, int length); + static void style_parse(const char *tbuff, char *sbuff, int len, char style); // 'style_unfinished_cb()' - Update unfinished styles. static void style_unfinished_cb(int, void*); @@ -58,6 +59,7 @@ class CodeEditor : public Fl_Text_Editor { // attempt to make the fluid code editor widget honour textsize setting void textsize(Fl_Fontsize s); + friend class StyleParse; }; class CodeViewer : public CodeEditor { diff --git a/fluid/Makefile b/fluid/Makefile index 2823e5f4d..a4656be46 100644 --- a/fluid/Makefile +++ b/fluid/Makefile @@ -18,6 +18,7 @@ include ../makeinclude CPPFILES = \ CodeEditor.cxx \ + StyleParse.cxx \ Fl_Function_Type.cxx \ Fl_Group_Type.cxx \ Fl_Menu_Type.cxx \ diff --git a/fluid/StyleParse.cxx b/fluid/StyleParse.cxx new file mode 100644 index 000000000..cbe0ba408 --- /dev/null +++ b/fluid/StyleParse.cxx @@ -0,0 +1,222 @@ +// +// Syntax highlighting style parser class - erco 09/16/2020 +// +// Copyright 1998-2020 by Bill Spitzak and others. +// +// This library is free software. Distribution and use rights are outlined in +// the file "COPYING" which should have been included with this file. If this +// file is missing or damaged, see the license at: +// +// https://www.fltk.org/COPYING.php +// +// Please see the following page on how to report bugs and issues: +// +// https://www.fltk.org/bugs.php +// + +#include +#include +#include +#include "StyleParse.h" +#include "CodeEditor.h" + +// Handle style parsing over a character +// Handles updating col counter when \n encountered. +// Applies the current style, advances to next text + style char. +// Returns 0 if hit end of buffer, 1 otherwise. +// +int StyleParse::parse_over_char(int handle_crlf) { + char c = *tbuff; + + // End of line? + if ( handle_crlf ) { + if ( c == '\n' ) { + lwhite = 1; // restart leading white flag + } else { + // End of leading white? (used by #directive) + if ( !strchr(" \t", c) ) lwhite = 0; + } + } + + // Adjust and advance + // If handling crlfs, zero col on crlf. If not handling, let col continue to count past crlf + // e.g. for multiline #define's that have lines ending in backslashes. + // + col = (c=='\n') ? (handle_crlf ? 0 : col) : col+1; // column counter + tbuff++; // advance text ptr + *sbuff++ = style; // apply style & advance its ptr + if ( --len <= 0 ) return 0; // keep track of length + return 1; +} + +// Parse over white space using current style +// Returns 0 if hit end of buffer, 1 otherwise. +// +int StyleParse::parse_over_white() { + while ( len > 0 && strchr(" \t", *tbuff)) + { if ( !parse_over_char() ) return 0; } + return 1; +} + +// Parse over non-white alphabetic text +// Returns 0 if hit end of buffer, 1 otherwise. +// +int StyleParse::parse_over_alpha() { + while ( len > 0 && isalpha(*tbuff) ) + { if ( !parse_over_char() ) return 0; } + return 1; +} + +// Parse to end of line in specified style. +// Returns 0 if hit end of buffer, 1 otherwise. +// +int StyleParse::parse_to_eol(char s) { + char save = style; + style = s; + while ( *tbuff != '\n' ) + { if ( !parse_over_char() ) return 0; } + style = save; + return 1; +} + +// Parse a block comment until end of comment or buffer. +// Returns 0 if hit end of buffer, 1 otherwise. +// +int StyleParse::parse_block_comment() { + char save = style; + style = 'C'; // block comment style + while ( len > 0 ) { + if ( strncmp(tbuff, "*/", 2) == 0 ) { + if ( !parse_over_char() ) return 0; // handle '*' + if ( !parse_over_char() ) return 0; // handle '/' + break; + } + if ( !parse_over_char() ) return 0; // handle comment text + } + style = save; // revert style + return 1; +} + +// Copy keyword from tbuff -> keyword[] buffer +void StyleParse::buffer_keyword() { + char *key = keyword; + char *kend = key + sizeof(keyword) - 1; // end of buffer + for ( const char *s=tbuff; + (islower(*s) || *s=='_') && (key < kend); + *key++ = *s++ ) { } + *key = 0; // terminate +} + +// Parse over specified 'key'word in specified style 's'. +// Returns 0 if hit end of buffer, 1 otherwise. +// +int StyleParse::parse_over_key(const char *key, char s) { + char save = style; + style = s; + // Parse over the keyword while applying style to sbuff + while ( *key++ ) + { if ( !parse_over_char() ) return 0; } + last = 1; + style = save; + return 1; +} + +// Parse over angle brackets <..> in specified style. +// Returns 0 if hit end of buffer, 1 otherwise. +// +int StyleParse::parse_over_angles(char s) { + if ( *tbuff != '<' ) return 1; // not <..>, early exit + char save = style; + style = s; + // Parse over angle brackets in specified style + while ( len > 0 && *tbuff != '>' ) + { if ( !parse_over_char() ) return 0; } // parse over '<' and angle content + if ( !parse_over_char() ) return 0; // parse over trailing '>' + style = save; + return 1; +} + +// Parse line for possible keyword +// spi.keyword[] will contain parsed word. +// Returns 0 if hit end of buffer, 1 otherwise. +// +int StyleParse::parse_keyword() { + // Parse into 'keyword' buffer + buffer_keyword(); + char *key = keyword; + // C/C++ type? (void, char..) + if ( CodeEditor::search_types(key) ) + return parse_over_key(key, 'F'); // 'type' style + // C/C++ Keyword? (switch, return..) + else if ( CodeEditor::search_keywords(key) ) + return parse_over_key(key, 'G'); // 'keyword' style + // Not a type or keyword? Parse over it + return parse_over_key(key, style); +} + +// Style parse a quoted string. +// Returns 0 if hit end of buffer, 1 otherwise. +// +int StyleParse::parse_quoted_string() { + style = 'D'; // start string style + if ( !parse_over_char() ) return 0; // parse over opening quote + + // Parse until closing quote reached + char c; + while ( len > 0 ) { + c = tbuff[0]; + if ( c == '"' ) { // Closing quote? Parse and done + if ( !parse_over_char() ) return 0; // close quote + break; + } else if ( c == '\\' ) { // Escape sequence? Parse over, continue + if ( !parse_over_char() ) return 0; // escape + if ( !parse_over_char() ) return 0; // char being escaped + continue; + } + // Keep parsing until end of buffer or closing quote.. + if ( !parse_over_char() ) return 0; + } + style = 'A'; // revert normal style + return 1; +} + +// Style parse a directive (#include, #define..) +// Returns 0 if hit end of buffer, 1 otherwise. +// +int StyleParse::parse_directive() { + style = 'E'; // start directive style + if ( !parse_over_char() ) return 0; // Parse over '#' + if ( !parse_over_white() ) return 0; // Parse over any whitespace after '#' + if ( !parse_over_alpha() ) return 0; // Parse over the directive + style = 'A'; // revert normal style + if ( !parse_over_white() ) return 0; // Parse over white after directive + if ( !parse_over_angles('D')) return 0; // #include <..> (if any) + return 1; +} + +// Style parse a line comment to end of line. +// Returns 0 if hit end of buffer, 1 otherwise. +// +int StyleParse::parse_line_comment() { + return parse_to_eol('B'); +} + +// Parse a backslash escape character sequence. +// Purposefully don't 'handle' \n, since an escaped \n should be +// a continuation of a line, such as in a multiline #directive. +// Returns 0 if hit end of buffer, 1 otherwise. +// +int StyleParse::parse_escape() { + const char no_crlf = 0; + if ( !parse_over_char(no_crlf) ) return 0; // backslash + if ( !parse_over_char(no_crlf) ) return 0; // char escaped + return 1; +} + +// Parse all other non-specific characters +// Returns 0 if hit end of buffer, 1 otherwise. +// +int StyleParse::parse_all_else() { + last = isalnum(*tbuff) || *tbuff == '_' || *tbuff == '.'; + return parse_over_char(); +} diff --git a/fluid/StyleParse.h b/fluid/StyleParse.h new file mode 100644 index 000000000..e243a7e47 --- /dev/null +++ b/fluid/StyleParse.h @@ -0,0 +1,59 @@ +// +// Syntax highlighting style parser class - erco 09/16/2020 +// +// Copyright 1998-2020 by Bill Spitzak and others. +// +// This library is free software. Distribution and use rights are outlined in +// the file "COPYING" which should have been included with this file. If this +// file is missing or damaged, see the license at: +// +// https://www.fltk.org/COPYING.php +// +// Please see the following page on how to report bugs and issues: +// +// https://www.fltk.org/bugs.php +// + +#ifndef StyleParse_h +#define StyleParse_h + +// Class to manage style parsing, friend of CodeEditor +class StyleParse { +public: + const char *tbuff; // text buffer + char *sbuff; // style buffer + int len; // running length + char style; // current style + char lwhite; // leading white space (1=white, 0=past white) + int col; // line's column counter + char keyword[40]; // keyword parsing buffer + char last; // flag for keyword parsing + + StyleParse() { + tbuff = 0; + sbuff = 0; + len = 0; + style = 0; + lwhite = 1; + col = 0; + last = 0; + } + + // Methods to aid in parsing + int parse_over_char(int handle_crlf=1); + int parse_over_white(); + int parse_over_alpha(); + int parse_to_eol(char s); + int parse_block_comment(); // "/* text.. */" + void buffer_keyword(); + int parse_over_key(const char *key, char s); + int parse_over_angles(char s); + int parse_keyword(); // "switch" + int parse_quoted_string(); // "hello" + int parse_directive(); // "#define" + int parse_line_comment(); // "// text.." + int parse_escape(); // "\'" + int parse_all_else(); // all other code +}; + +#endif //StyleParse_h