Add octal string escapes
This commit is contained in:
parent
23839cde76
commit
5b30c76e92
@ -1,3 +1,32 @@
|
|||||||
|
/**
|
||||||
|
* @file compiler.c
|
||||||
|
* @brief Single-pass bytecode compiler.
|
||||||
|
*
|
||||||
|
* Kuroko's compiler is still very reminiscent of its CLox roots and uses
|
||||||
|
* the same parsing strategy, so if you have read the third chapter of
|
||||||
|
* Bob's "Crafting Interpreters", this should should be fairly easy to
|
||||||
|
* understand. One important thing that Kuroko's compiler does differently
|
||||||
|
* is implement rewinding, which allows for conservative reparsing and
|
||||||
|
* recompilation of subexpressions that have already been parsed. This is
|
||||||
|
* used to compile ternaries, multiple assignments, and the expression value
|
||||||
|
* in generator and comprehension expressions.
|
||||||
|
*
|
||||||
|
* Kuroko has several levels of parse precedence, including three different
|
||||||
|
* levels indicative of assignments. Most expressions start from the TERNARY
|
||||||
|
* or COMMA level, but top-level expression statements and assignment values
|
||||||
|
* start at the highest level of ASSIGNMENT, which allows for multiple
|
||||||
|
* assignment targets. Expressions parsed from the MUST_ASSIGN level are
|
||||||
|
* assignment targets in a multiple assignment. Expression parsed from
|
||||||
|
* the CAN_ASSIGN level are single assignment targets.
|
||||||
|
*
|
||||||
|
* String compilation manages escape sequence processing, so string tokens
|
||||||
|
* received from the scanner are not directly converted to string constants.
|
||||||
|
* F-strings are compiled as expressions generating a regular string.
|
||||||
|
*
|
||||||
|
* Kuroko's bytecode supports variable operand sizes using paired "short" and
|
||||||
|
* "long" opcodes. To ease the output of these opcodes, the EMIT_CONSTANT_OP
|
||||||
|
* macro will generate the appropriate opcode given an operand.
|
||||||
|
*/
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
@ -12,33 +41,6 @@
|
|||||||
#include "debug.h"
|
#include "debug.h"
|
||||||
#include "vm.h"
|
#include "vm.h"
|
||||||
|
|
||||||
/**
|
|
||||||
* There's nothing really especially different here compared to the Lox
|
|
||||||
* compiler from Crafting Interpreters. A handful of additional pieces
|
|
||||||
* of functionality are added, and some work is done to make blocks use
|
|
||||||
* indentation instead of braces, but the basic layout and operation
|
|
||||||
* of the compiler are the same top-down Pratt parser.
|
|
||||||
*
|
|
||||||
* The parser error handling has been improved over the Lox compiler with
|
|
||||||
* the addition of column offsets and a printed copy of the original source
|
|
||||||
* line and the offending token.
|
|
||||||
*
|
|
||||||
* String parsing also includes escape sequence support, so you can print
|
|
||||||
* quotation marks properly, as well as escape sequences for terminals.
|
|
||||||
*
|
|
||||||
* One notable part of the compiler is the handling of list comprehensions.
|
|
||||||
* In order to support Python-style syntax, the parser has been set up to
|
|
||||||
* support rolling back to a previous state, so that when the compiler sees
|
|
||||||
* an expression with references to a variable that has yet to be defined it
|
|
||||||
* will first output the expression as if that variable was a global, then it
|
|
||||||
* will see the 'in', rewind, parse the rest of the list comprehension, and
|
|
||||||
* then output the expression as a loop body, with the correct local references.
|
|
||||||
*
|
|
||||||
* if/else and try/except blocks also have to similarly handle rollback cases
|
|
||||||
* as they can not peek forward to see if a statement after an indentation
|
|
||||||
* block is an else/except.
|
|
||||||
*/
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
KrkToken current;
|
KrkToken current;
|
||||||
KrkToken previous;
|
KrkToken previous;
|
||||||
@ -51,8 +53,8 @@ typedef enum {
|
|||||||
PREC_NONE,
|
PREC_NONE,
|
||||||
PREC_ASSIGNMENT, /* = */
|
PREC_ASSIGNMENT, /* = */
|
||||||
PREC_COMMA, /* , */
|
PREC_COMMA, /* , */
|
||||||
PREC_MUST_ASSIGN,/* special */
|
PREC_MUST_ASSIGN,/* Multple assignment target */
|
||||||
PREC_CAN_ASSIGN, /* inside parens */
|
PREC_CAN_ASSIGN, /* Single assignment target, inside grouping */
|
||||||
PREC_TERNARY, /* TrueBranch if Condition else FalseBranch */
|
PREC_TERNARY, /* TrueBranch if Condition else FalseBranch */
|
||||||
PREC_OR, /* or */
|
PREC_OR, /* or */
|
||||||
PREC_AND, /* and */
|
PREC_AND, /* and */
|
||||||
@ -1431,8 +1433,6 @@ static void emitLoop(int loopStart) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void withStatement() {
|
static void withStatement() {
|
||||||
/* TODO: Multiple items, I'm feeling lazy. */
|
|
||||||
|
|
||||||
/* We only need this for block() */
|
/* We only need this for block() */
|
||||||
size_t blockWidth = (parser.previous.type == TOKEN_INDENTATION) ? parser.previous.length : 0;
|
size_t blockWidth = (parser.previous.type == TOKEN_INDENTATION) ? parser.previous.length : 0;
|
||||||
KrkToken myPrevious = parser.previous;
|
KrkToken myPrevious = parser.previous;
|
||||||
@ -1449,7 +1449,7 @@ static void withStatement() {
|
|||||||
declareVariable();
|
declareVariable();
|
||||||
defineVariable(ind);
|
defineVariable(ind);
|
||||||
} else {
|
} else {
|
||||||
/* Otherwise we want an unnamed local; TODO: Wait, can't we do this for iterable counts? */
|
/* Otherwise we want an unnamed local */
|
||||||
addLocal(syntheticToken(""));
|
addLocal(syntheticToken(""));
|
||||||
markInitialized();
|
markInitialized();
|
||||||
}
|
}
|
||||||
@ -2136,11 +2136,27 @@ static void string(int type) {
|
|||||||
} break;
|
} break;
|
||||||
case '\n': break;
|
case '\n': break;
|
||||||
default:
|
default:
|
||||||
/* TODO octal */
|
if (c[1] >= '0' && c[1] <= '7') {
|
||||||
|
int out = c[1] - '0';
|
||||||
|
if (c + 2 != end && (c[2] >= '0' && c[2] <= '7')) {
|
||||||
|
out <<= 3;
|
||||||
|
out += c[2] - '0';
|
||||||
|
c++;
|
||||||
|
if (c + 1 != end && (c[2] >= '0' && c[2] <= '7')) {
|
||||||
|
out <<= 3;
|
||||||
|
out += c[2] - '0';
|
||||||
|
c++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
unsigned char bytes[5] = {0};
|
||||||
|
size_t len = krk_codepointToBytes(out, bytes);
|
||||||
|
for (size_t i = 0; i < len; i++) PUSH_CHAR(bytes[i]);
|
||||||
|
} else {
|
||||||
PUSH_CHAR(c[0]);
|
PUSH_CHAR(c[0]);
|
||||||
c++;
|
c++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
c += 2;
|
c += 2;
|
||||||
} else if (isFormat && *c == '{') {
|
} else if (isFormat && *c == '{') {
|
||||||
if (!atLeastOne || stringLength) { /* Make sure there's a string for coersion reasons */
|
if (!atLeastOne || stringLength) { /* Make sure there's a string for coersion reasons */
|
||||||
|
@ -540,6 +540,14 @@ void paint_krk_string(struct syntax_state * state, int type) {
|
|||||||
paintNHex(state, 4);
|
paintNHex(state, 4);
|
||||||
} else if (nextchar() == 'U') {
|
} else if (nextchar() == 'U') {
|
||||||
paintNHex(state, 8);
|
paintNHex(state, 8);
|
||||||
|
} else if (nextchar() >= '0' && nextchar() <= '7') {
|
||||||
|
paint(2, FLAG_ESCAPE);
|
||||||
|
if (charat() >= '0' && charat() <= '7') {
|
||||||
|
paint(1, FLAG_ESCAPE);
|
||||||
|
if (charat() >= '0' && charat() <= '7') {
|
||||||
|
paint(1, FLAG_ESCAPE);
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
paint(2, FLAG_ESCAPE);
|
paint(2, FLAG_ESCAPE);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user