From 391a4d79db1902e066f6246060b25c72d7de40ab Mon Sep 17 00:00:00 2001 From: "K. Lange" Date: Sun, 10 Jul 2022 17:44:06 +0900 Subject: [PATCH] Fixup concatenating unalike string tokens in compiler --- src/compiler.c | 59 +++++++++++--------- src/kuroko/chunk.h | 2 + src/opcodes.h | 1 + src/vm.c | 25 +++++++++ test/testConcatenatedStringTokens.krk | 3 + test/testConcatenatedStringTokens.krk.expect | 1 + 6 files changed, 66 insertions(+), 25 deletions(-) create mode 100644 test/testConcatenatedStringTokens.krk create mode 100644 test/testConcatenatedStringTokens.krk.expect diff --git a/src/compiler.c b/src/compiler.c index 346e949..bd796e2 100644 --- a/src/compiler.c +++ b/src/compiler.c @@ -2580,14 +2580,14 @@ static void string(int exprType) { for (size_t i = 0; i < n; ++i) { \ if (c + i + 2 == end || !isHex(c[i+2])) { \ error("truncated \\%c escape", type); \ - FREE_ARRAY(char,stringBytes,stringCapacity); \ - return; \ + goto _cleanupError; \ } \ tmpbuf[i] = c[i+2]; \ } \ unsigned long value = strtoul(tmpbuf, NULL, 16); \ if (value >= 0x110000) { \ error("invalid codepoint in \\%c escape", type); \ + goto _cleanupError; \ } \ if (isBytes) { \ PUSH_CHAR(value); \ @@ -2602,7 +2602,6 @@ static void string(int exprType) { int isFormat = (parser.previous.type == TOKEN_PREFIX_F); int isRaw = (parser.previous.type == TOKEN_PREFIX_R); - int atLeastOne = 0; const char * lineBefore = krk_tellScanner().linePtr; size_t lineNo = krk_tellScanner().line; @@ -2611,15 +2610,16 @@ static void string(int exprType) { return; } - if (isRaw) { - emitConstant(OBJECT_VAL(krk_copyString( - parser.previous.start + (parser.previous.type == TOKEN_BIG_STRING ? 3 : 1), - parser.previous.length - (parser.previous.type == TOKEN_BIG_STRING ? 6 : 2)))); - return; - } + int formatElements = 0; /* This should capture everything but the quotes. */ do { + if (isRaw) { + for (size_t i = 0; i < parser.previous.length - (parser.previous.type == TOKEN_BIG_STRING ? 6 : 2); ++i) { + PUSH_CHAR(parser.previous.start[(parser.previous.type == TOKEN_BIG_STRING ? 3 : 1) + i]); + } + goto _nextStr; + } int type = parser.previous.type == TOKEN_BIG_STRING ? 3 : 1; const char * c = parser.previous.start + type; const char * end = parser.previous.start + parser.previous.length - type; @@ -2691,8 +2691,7 @@ static void string(int exprType) { } else if (isFormat && *c == '}') { if (c[1] != '}') { error("single '}' not allowed in f-string"); - FREE_ARRAY(char,stringBytes,stringCapacity); - return; + goto _cleanupError; } PUSH_CHAR('}'); c += 2; @@ -2703,10 +2702,10 @@ static void string(int exprType) { c += 2; continue; } - if (!atLeastOne || stringLength) { /* Make sure there's a string for coersion reasons */ + if (stringLength) { /* Make sure there's a string for coersion reasons */ emitConstant(OBJECT_VAL(krk_copyString(stringBytes,stringLength))); - if (atLeastOne) emitByte(OP_ADD); - atLeastOne = 1; + formatElements++; + stringLength = 0; } const char * start = c+1; stringLength = 0; @@ -2716,10 +2715,7 @@ static void string(int exprType) { krk_rewindScanner(inner); advance(); parsePrecedence(PREC_COMMA); /* allow unparen'd tuples, but not assignments, as expressions in f-strings */ - if (parser.hadError) { - FREE_ARRAY(char,stringBytes,stringCapacity); - return; - } + if (parser.hadError) goto _cleanupError; inner = krk_tellScanner(); /* To figure out how far to advance c */ krk_rewindScanner(beforeExpression); /* To get us back to where we were with a string token */ parser = parserBefore; @@ -2764,8 +2760,8 @@ static void string(int exprType) { error("Expected closing '}' after expression in f-string"); goto _cleanupError; } - if (atLeastOne) emitByte(OP_ADD); - atLeastOne = 1; + + formatElements++; c++; } else { if (*(unsigned char*)c > 127 && isBytes) { @@ -2776,6 +2772,18 @@ static void string(int exprType) { c++; } } + +_nextStr: + (void)0; + isRaw = 0; + isFormat = 0; + if (!isBytes) { + if (match(TOKEN_PREFIX_F)) { + isFormat = 1; + } else if (match(TOKEN_PREFIX_R)) { + isRaw = 1; + } + } } while ((!isBytes || match(TOKEN_PREFIX_B)) && (match(TOKEN_STRING) || match(TOKEN_BIG_STRING))); if (isBytes && (match(TOKEN_STRING) || match(TOKEN_BIG_STRING))) { error("Can not mix bytes and string literals"); @@ -2789,15 +2797,16 @@ static void string(int exprType) { emitConstant(OBJECT_VAL(bytes)); return; } - if (!isFormat || stringLength || !atLeastOne) { + if (stringLength) { emitConstant(OBJECT_VAL(krk_copyString(stringBytes,stringLength))); - if (atLeastOne) emitByte(OP_ADD); + formatElements++; + } + if (formatElements != 1) { + EMIT_OPERAND_OP(OP_MAKE_STRING, formatElements); } - FREE_ARRAY(char,stringBytes,stringCapacity); -#undef PUSH_CHAR - return; _cleanupError: FREE_ARRAY(char,stringBytes,stringCapacity); +#undef PUSH_CHAR } static size_t addUpvalue(Compiler * compiler, ssize_t index, int isLocal) { diff --git a/src/kuroko/chunk.h b/src/kuroko/chunk.h index 95ae8a1..f4e1534 100644 --- a/src/kuroko/chunk.h +++ b/src/kuroko/chunk.h @@ -127,6 +127,7 @@ typedef enum { OP_CLOSE_MANY, OP_POP_MANY, OP_FORMAT_VALUE, + OP_MAKE_STRING, /* Two opcode instructions */ OP_JUMP_IF_FALSE_OR_POP, @@ -179,6 +180,7 @@ typedef enum { OP_CLOSE_MANY_LONG, OP_POP_MANY_LONG, OP_FORMAT_VALUE_LONG, /* should be unused */ + OP_MAKE_STRING_LONG, } KrkOpCode; /** diff --git a/src/opcodes.h b/src/opcodes.h index 416d055..92360cb 100644 --- a/src/opcodes.h +++ b/src/opcodes.h @@ -98,6 +98,7 @@ OPERAND(OP_CALL_METHOD, (void)0) OPERAND(OP_CLOSE_MANY, (void)0) OPERAND(OP_POP_MANY, (void)0) OPERAND(OP_FORMAT_VALUE, (void)0) +OPERAND(OP_MAKE_STRING, (void)0) JUMP(OP_JUMP_IF_FALSE_OR_POP,+) JUMP(OP_JUMP_IF_TRUE_OR_POP,+) JUMP(OP_JUMP,+) diff --git a/src/vm.c b/src/vm.c index c255a5e..f8b12ec 100644 --- a/src/vm.c +++ b/src/vm.c @@ -3505,6 +3505,31 @@ _finishReturn: (void)0; if (doFormatString(OPERAND)) goto _finishException; break; } + + case OP_MAKE_STRING_LONG: + THREE_BYTE_OPERAND; + case OP_MAKE_STRING: { + ONE_BYTE_OPERAND; + + struct StringBuilder sb = {0}; + + for (ssize_t i = 0; i < OPERAND; ++i) { + KrkValue s = krk_currentThread.stackTop[-OPERAND+i]; + if (unlikely(!IS_STRING(s))) { + discardStringBuilder(&sb); + krk_runtimeError(vm.exceptions->valueError, "'%s' is not a string", krk_typeName(s)); + goto _finishException; + } + pushStringBuilderStr(&sb, (char*)AS_STRING(s)->chars, AS_STRING(s)->length); + } + + for (ssize_t i = 0; i < OPERAND; ++i) { + krk_pop(); + } + + krk_push(finishStringBuilder(&sb)); + break; + } } if (unlikely(krk_currentThread.flags & KRK_THREAD_HAS_EXCEPTION)) { _finishException: diff --git a/test/testConcatenatedStringTokens.krk b/test/testConcatenatedStringTokens.krk new file mode 100644 index 0000000..5dede92 --- /dev/null +++ b/test/testConcatenatedStringTokens.krk @@ -0,0 +1,3 @@ +let foo = 42 + +print(f'{foo}' '{bar}' r'{\baz}') diff --git a/test/testConcatenatedStringTokens.krk.expect b/test/testConcatenatedStringTokens.krk.expect new file mode 100644 index 0000000..92b2fb8 --- /dev/null +++ b/test/testConcatenatedStringTokens.krk.expect @@ -0,0 +1 @@ +42{bar}{\baz}