Use StringBuilders in compiler string parsing

This commit is contained in:
K. Lange 2022-08-14 14:12:18 +09:00
parent 0b696fc361
commit 016e9e4143

View File

@ -2620,38 +2620,34 @@ static int isHex(int c) {
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
} }
static void string(struct GlobalState * state, int exprType) { static int _pushHex(struct GlobalState * state, int isBytes, struct StringBuilder * sb, const char *c, const char *end, size_t n, char type) {
/* We'll just build with a flexible array like everything else. */ char tmpbuf[10] = {0};
size_t stringCapacity = 0; for (size_t i = 0; i < n; ++i) {
size_t stringLength = 0; if (c + i + 2 == end || !isHex(c[i+2])) {
char * stringBytes = 0; error("truncated \\%c escape", type);
#define PUSH_CHAR(c) do { if (stringCapacity < stringLength + 1) { \ return 1;
size_t old = stringCapacity; stringCapacity = GROW_CAPACITY(old); \ }
stringBytes = GROW_ARRAY(char, stringBytes, old, stringCapacity); \ tmpbuf[i] = c[i+2];
} stringBytes[stringLength++] = c; } while (0) }
unsigned long value = strtoul(tmpbuf, NULL, 16);
if (value >= 0x110000) {
error("invalid codepoint in \\%c escape", type);
return 1;
}
if (isBytes) {
krk_pushStringBuilder(sb, value);
} else {
unsigned char bytes[5] = {0};
size_t len = krk_codepointToBytes(value, bytes);
krk_pushStringBuilderStr(sb, (char*)bytes, len);
}
return 0;
}
#define PUSH_HEX(n, type) do { \ static void string(struct GlobalState * state, int exprType) {
char tmpbuf[10] = {0}; \ struct StringBuilder sb = {0};
for (size_t i = 0; i < n; ++i) { \ #define PUSH_CHAR(c) krk_pushStringBuilder(&sb, c)
if (c + i + 2 == end || !isHex(c[i+2])) { \ #define PUSH_HEX(n, type) _pushHex(state, isBytes, &sb, c, end, n, type)
error("truncated \\%c escape", type); \
goto _cleanupError; \
} \
tmpbuf[i] = c[i+2]; \
} \
unsigned long value = strtoul(tmpbuf, NULL, 16); \
if (value >= 0x110000) { \
error("invalid codepoint in \\%c escape", type); \
goto _cleanupError; \
} \
if (isBytes) { \
PUSH_CHAR(value); \
break; \
} \
unsigned char bytes[5] = {0}; \
size_t len = krk_codepointToBytes(value, bytes); \
for (size_t i = 0; i < len; i++) PUSH_CHAR(bytes[i]); \
} while (0)
int isBytes = (state->parser.previous.type == TOKEN_PREFIX_B); int isBytes = (state->parser.previous.type == TOKEN_PREFIX_B);
int isFormat = (state->parser.previous.type == TOKEN_PREFIX_F); int isFormat = (state->parser.previous.type == TOKEN_PREFIX_F);
@ -2757,13 +2753,11 @@ static void string(struct GlobalState * state, int exprType) {
c += 2; c += 2;
continue; continue;
} }
if (stringLength) { /* Make sure there's a string for coersion reasons */ if (sb.length) { /* Make sure there's a string for coersion reasons */
emitConstant(OBJECT_VAL(krk_copyString(stringBytes,stringLength))); emitConstant(krk_finishStringBuilder(&sb));
formatElements++; formatElements++;
stringLength = 0;
} }
const char * start = c+1; const char * start = c+1;
stringLength = 0;
KrkScanner beforeExpression = krk_tellScanner(&state->scanner); KrkScanner beforeExpression = krk_tellScanner(&state->scanner);
Parser parserBefore = state->parser; Parser parserBefore = state->parser;
KrkScanner inner = (KrkScanner){.start=c+1, .cur=c+1, .linePtr=lineBefore, .line=lineNo, .startOfLine = 0, .hasUnget = 0}; KrkScanner inner = (KrkScanner){.start=c+1, .cur=c+1, .linePtr=lineBefore, .line=lineNo, .startOfLine = 0, .hasUnget = 0};
@ -2851,22 +2845,18 @@ _nextStr:
goto _cleanupError; goto _cleanupError;
} }
if (isBytes) { if (isBytes) {
stringBytes = krk_reallocate(stringBytes, stringCapacity, stringLength); emitConstant(krk_finishStringBuilderBytes(&sb));
KrkBytes * bytes = krk_newBytes(0,NULL);
bytes->bytes = (uint8_t*)stringBytes;
bytes->length = stringLength;
emitConstant(OBJECT_VAL(bytes));
return; return;
} }
if (stringLength || !formatElements) { if (sb.length || !formatElements) {
emitConstant(OBJECT_VAL(krk_copyString(stringBytes,stringLength))); emitConstant(krk_finishStringBuilder(&sb));
formatElements++; formatElements++;
} }
if (formatElements != 1) { if (formatElements != 1) {
EMIT_OPERAND_OP(OP_MAKE_STRING, formatElements); EMIT_OPERAND_OP(OP_MAKE_STRING, formatElements);
} }
_cleanupError: _cleanupError:
FREE_ARRAY(char,stringBytes,stringCapacity); krk_discardStringBuilder(&sb);
#undef PUSH_CHAR #undef PUSH_CHAR
} }