rudimentary character literal, specifically for bim syntax [sorry if you like single-quote strings in python]

2020-12-29 21:29:15 +09:00 · 2020-12-29 21:29:15 +09:00 · 267c7b5650
commit 267c7b5650
parent 430f20566e
2 changed files with 48 additions and 7 deletions
--- a/compiler.c
+++ b/compiler.c
@ -965,14 +965,33 @@ static void string(int canAssign) {
 	FREE_ARRAY(char,stringBytes,stringCapacity);
 }

-/* TODO
 static void codepoint(int canAssign) {
-  // Convert utf8 bytes to single codepoint; error on multiple codepoints.
-  // Emit as constant Integer value? Or as separate Codepoint value?
-  // The latter could add to strings as utf8 bytes, but compare to
-  // Integers as the numerical value...
+	const char * c = parser.previous.start + 1;
+	size_t width = 0;
+	int codepoint = 0;
+	while (c < parser.previous.start + parser.previous.length - 1) {
+		if (width >= 1) {
+			error("Wide character literals are not currently supported.");
+			return;
+		}
+		if (*c == '\\') {
+			switch (c[1]) {
+				case 'n': codepoint = '\n'; break;
+				case 'r': codepoint = '\r'; break;
+				case 't': codepoint = '\t'; break;
+				case '[': codepoint = '\033'; break;
+				default: codepoint = c[1]; break;
+			}
+			width++;
+			c += 2;
+		} else {
+			codepoint = *c;
+			width++;
+			c++;
+		}
+	}
+	emitConstant(INTEGER_VAL(codepoint));
 }
-*/

 static size_t addUpvalue(Compiler * compiler, ssize_t index, int isLocal) {
 	size_t upvalueCount = compiler->function->upvalueCount;
@ -1083,7 +1102,7 @@ ParseRule rules[] = {
 	RULE(TOKEN_IDENTIFIER,    variable, NULL,   PREC_NONE),
 	RULE(TOKEN_STRING,        string,   NULL,   PREC_NONE),
 	RULE(TOKEN_NUMBER,        number,   NULL,   PREC_NONE),
-	RULE(TOKEN_CODEPOINT,     NULL,     NULL,   PREC_NONE), /* TODO */
+	RULE(TOKEN_CODEPOINT,     codepoint,NULL,   PREC_NONE), /* TODO */
 	RULE(TOKEN_AND,           NULL,     and_,   PREC_AND),
 	RULE(TOKEN_CLASS,         NULL,     NULL,   PREC_NONE),
 	RULE(TOKEN_ELSE,          NULL,     NULL,   PREC_NONE),
--- a/rline.c
+++ b/rline.c
@ -515,6 +515,24 @@ void paint_simple_string(struct syntax_state * state) {
 	}
 }

+void paint_single_string(struct syntax_state * state) {
+	/* Assumes you came in from a check of charat() == '\'' */
+	paint(1, FLAG_NUMERAL);
+	while (charat() != -1) {
+		if (charat() == '\\' && nextchar() == '\'') {
+			paint(2, FLAG_ESCAPE);
+		} else if (charat() == '\'') {
+			paint(1, FLAG_NUMERAL);
+			return;
+		} else if (charat() == '\\') {
+			paint(2, FLAG_ESCAPE);
+		} else {
+			paint(1, FLAG_NUMERAL);
+		}
+	}
+}
+
+
 char * syn_krk_keywords[] = {
 	"and","class","def","else","export","for","if","in","import",
 	"let","not","or","print","return","while","try","except","raise",
@ -564,6 +582,9 @@ int syn_krk_calculate(struct syntax_state * state) {
 			} else if (charat() == '"') {
 				paint_simple_string(state);
 				return 0;
+			} else if (charat() == '\'') {
+				paint_single_string(state);
+				return 0;
 			} else if (find_keywords(state, syn_krk_keywords, FLAG_KEYWORD, c_keyword_qualifier)) {
 				return 0;
 			} else if (lastchar() != '.' && find_keywords(state, syn_krk_types, FLAG_TYPE, c_keyword_qualifier)) {
@ -1618,6 +1639,7 @@ static int read_line(void) {
 * Read a line of text with interactive editing.
 */
 int rline(char * buffer, int buf_size) {
+	setlocale(LC_ALL, "");
 	get_initial_termios();
 	set_unbuffered();
 	get_size();