Fixup concatenating unalike string tokens in compiler

This commit is contained in:
K. Lange 2022-07-10 17:44:06 +09:00
parent e5f4208f6a
commit 391a4d79db
6 changed files with 66 additions and 25 deletions

View File

@ -2580,14 +2580,14 @@ static void string(int exprType) {
for (size_t i = 0; i < n; ++i) { \ for (size_t i = 0; i < n; ++i) { \
if (c + i + 2 == end || !isHex(c[i+2])) { \ if (c + i + 2 == end || !isHex(c[i+2])) { \
error("truncated \\%c escape", type); \ error("truncated \\%c escape", type); \
FREE_ARRAY(char,stringBytes,stringCapacity); \ goto _cleanupError; \
return; \
} \ } \
tmpbuf[i] = c[i+2]; \ tmpbuf[i] = c[i+2]; \
} \ } \
unsigned long value = strtoul(tmpbuf, NULL, 16); \ unsigned long value = strtoul(tmpbuf, NULL, 16); \
if (value >= 0x110000) { \ if (value >= 0x110000) { \
error("invalid codepoint in \\%c escape", type); \ error("invalid codepoint in \\%c escape", type); \
goto _cleanupError; \
} \ } \
if (isBytes) { \ if (isBytes) { \
PUSH_CHAR(value); \ PUSH_CHAR(value); \
@ -2602,7 +2602,6 @@ static void string(int exprType) {
int isFormat = (parser.previous.type == TOKEN_PREFIX_F); int isFormat = (parser.previous.type == TOKEN_PREFIX_F);
int isRaw = (parser.previous.type == TOKEN_PREFIX_R); int isRaw = (parser.previous.type == TOKEN_PREFIX_R);
int atLeastOne = 0;
const char * lineBefore = krk_tellScanner().linePtr; const char * lineBefore = krk_tellScanner().linePtr;
size_t lineNo = krk_tellScanner().line; size_t lineNo = krk_tellScanner().line;
@ -2611,15 +2610,16 @@ static void string(int exprType) {
return; return;
} }
if (isRaw) { int formatElements = 0;
emitConstant(OBJECT_VAL(krk_copyString(
parser.previous.start + (parser.previous.type == TOKEN_BIG_STRING ? 3 : 1),
parser.previous.length - (parser.previous.type == TOKEN_BIG_STRING ? 6 : 2))));
return;
}
/* This should capture everything but the quotes. */ /* This should capture everything but the quotes. */
do { do {
if (isRaw) {
for (size_t i = 0; i < parser.previous.length - (parser.previous.type == TOKEN_BIG_STRING ? 6 : 2); ++i) {
PUSH_CHAR(parser.previous.start[(parser.previous.type == TOKEN_BIG_STRING ? 3 : 1) + i]);
}
goto _nextStr;
}
int type = parser.previous.type == TOKEN_BIG_STRING ? 3 : 1; int type = parser.previous.type == TOKEN_BIG_STRING ? 3 : 1;
const char * c = parser.previous.start + type; const char * c = parser.previous.start + type;
const char * end = parser.previous.start + parser.previous.length - type; const char * end = parser.previous.start + parser.previous.length - type;
@ -2691,8 +2691,7 @@ static void string(int exprType) {
} else if (isFormat && *c == '}') { } else if (isFormat && *c == '}') {
if (c[1] != '}') { if (c[1] != '}') {
error("single '}' not allowed in f-string"); error("single '}' not allowed in f-string");
FREE_ARRAY(char,stringBytes,stringCapacity); goto _cleanupError;
return;
} }
PUSH_CHAR('}'); PUSH_CHAR('}');
c += 2; c += 2;
@ -2703,10 +2702,10 @@ static void string(int exprType) {
c += 2; c += 2;
continue; continue;
} }
if (!atLeastOne || stringLength) { /* Make sure there's a string for coersion reasons */ if (stringLength) { /* Make sure there's a string for coersion reasons */
emitConstant(OBJECT_VAL(krk_copyString(stringBytes,stringLength))); emitConstant(OBJECT_VAL(krk_copyString(stringBytes,stringLength)));
if (atLeastOne) emitByte(OP_ADD); formatElements++;
atLeastOne = 1; stringLength = 0;
} }
const char * start = c+1; const char * start = c+1;
stringLength = 0; stringLength = 0;
@ -2716,10 +2715,7 @@ static void string(int exprType) {
krk_rewindScanner(inner); krk_rewindScanner(inner);
advance(); advance();
parsePrecedence(PREC_COMMA); /* allow unparen'd tuples, but not assignments, as expressions in f-strings */ parsePrecedence(PREC_COMMA); /* allow unparen'd tuples, but not assignments, as expressions in f-strings */
if (parser.hadError) { if (parser.hadError) goto _cleanupError;
FREE_ARRAY(char,stringBytes,stringCapacity);
return;
}
inner = krk_tellScanner(); /* To figure out how far to advance c */ inner = krk_tellScanner(); /* To figure out how far to advance c */
krk_rewindScanner(beforeExpression); /* To get us back to where we were with a string token */ krk_rewindScanner(beforeExpression); /* To get us back to where we were with a string token */
parser = parserBefore; parser = parserBefore;
@ -2764,8 +2760,8 @@ static void string(int exprType) {
error("Expected closing '}' after expression in f-string"); error("Expected closing '}' after expression in f-string");
goto _cleanupError; goto _cleanupError;
} }
if (atLeastOne) emitByte(OP_ADD);
atLeastOne = 1; formatElements++;
c++; c++;
} else { } else {
if (*(unsigned char*)c > 127 && isBytes) { if (*(unsigned char*)c > 127 && isBytes) {
@ -2776,6 +2772,18 @@ static void string(int exprType) {
c++; c++;
} }
} }
_nextStr:
(void)0;
isRaw = 0;
isFormat = 0;
if (!isBytes) {
if (match(TOKEN_PREFIX_F)) {
isFormat = 1;
} else if (match(TOKEN_PREFIX_R)) {
isRaw = 1;
}
}
} while ((!isBytes || match(TOKEN_PREFIX_B)) && (match(TOKEN_STRING) || match(TOKEN_BIG_STRING))); } while ((!isBytes || match(TOKEN_PREFIX_B)) && (match(TOKEN_STRING) || match(TOKEN_BIG_STRING)));
if (isBytes && (match(TOKEN_STRING) || match(TOKEN_BIG_STRING))) { if (isBytes && (match(TOKEN_STRING) || match(TOKEN_BIG_STRING))) {
error("Can not mix bytes and string literals"); error("Can not mix bytes and string literals");
@ -2789,15 +2797,16 @@ static void string(int exprType) {
emitConstant(OBJECT_VAL(bytes)); emitConstant(OBJECT_VAL(bytes));
return; return;
} }
if (!isFormat || stringLength || !atLeastOne) { if (stringLength) {
emitConstant(OBJECT_VAL(krk_copyString(stringBytes,stringLength))); emitConstant(OBJECT_VAL(krk_copyString(stringBytes,stringLength)));
if (atLeastOne) emitByte(OP_ADD); formatElements++;
}
if (formatElements != 1) {
EMIT_OPERAND_OP(OP_MAKE_STRING, formatElements);
} }
FREE_ARRAY(char,stringBytes,stringCapacity);
#undef PUSH_CHAR
return;
_cleanupError: _cleanupError:
FREE_ARRAY(char,stringBytes,stringCapacity); FREE_ARRAY(char,stringBytes,stringCapacity);
#undef PUSH_CHAR
} }
static size_t addUpvalue(Compiler * compiler, ssize_t index, int isLocal) { static size_t addUpvalue(Compiler * compiler, ssize_t index, int isLocal) {

View File

@ -127,6 +127,7 @@ typedef enum {
OP_CLOSE_MANY, OP_CLOSE_MANY,
OP_POP_MANY, OP_POP_MANY,
OP_FORMAT_VALUE, OP_FORMAT_VALUE,
OP_MAKE_STRING,
/* Two opcode instructions */ /* Two opcode instructions */
OP_JUMP_IF_FALSE_OR_POP, OP_JUMP_IF_FALSE_OR_POP,
@ -179,6 +180,7 @@ typedef enum {
OP_CLOSE_MANY_LONG, OP_CLOSE_MANY_LONG,
OP_POP_MANY_LONG, OP_POP_MANY_LONG,
OP_FORMAT_VALUE_LONG, /* should be unused */ OP_FORMAT_VALUE_LONG, /* should be unused */
OP_MAKE_STRING_LONG,
} KrkOpCode; } KrkOpCode;
/** /**

View File

@ -98,6 +98,7 @@ OPERAND(OP_CALL_METHOD, (void)0)
OPERAND(OP_CLOSE_MANY, (void)0) OPERAND(OP_CLOSE_MANY, (void)0)
OPERAND(OP_POP_MANY, (void)0) OPERAND(OP_POP_MANY, (void)0)
OPERAND(OP_FORMAT_VALUE, (void)0) OPERAND(OP_FORMAT_VALUE, (void)0)
OPERAND(OP_MAKE_STRING, (void)0)
JUMP(OP_JUMP_IF_FALSE_OR_POP,+) JUMP(OP_JUMP_IF_FALSE_OR_POP,+)
JUMP(OP_JUMP_IF_TRUE_OR_POP,+) JUMP(OP_JUMP_IF_TRUE_OR_POP,+)
JUMP(OP_JUMP,+) JUMP(OP_JUMP,+)

View File

@ -3505,6 +3505,31 @@ _finishReturn: (void)0;
if (doFormatString(OPERAND)) goto _finishException; if (doFormatString(OPERAND)) goto _finishException;
break; break;
} }
case OP_MAKE_STRING_LONG:
THREE_BYTE_OPERAND;
case OP_MAKE_STRING: {
ONE_BYTE_OPERAND;
struct StringBuilder sb = {0};
for (ssize_t i = 0; i < OPERAND; ++i) {
KrkValue s = krk_currentThread.stackTop[-OPERAND+i];
if (unlikely(!IS_STRING(s))) {
discardStringBuilder(&sb);
krk_runtimeError(vm.exceptions->valueError, "'%s' is not a string", krk_typeName(s));
goto _finishException;
}
pushStringBuilderStr(&sb, (char*)AS_STRING(s)->chars, AS_STRING(s)->length);
}
for (ssize_t i = 0; i < OPERAND; ++i) {
krk_pop();
}
krk_push(finishStringBuilder(&sb));
break;
}
} }
if (unlikely(krk_currentThread.flags & KRK_THREAD_HAS_EXCEPTION)) { if (unlikely(krk_currentThread.flags & KRK_THREAD_HAS_EXCEPTION)) {
_finishException: _finishException:

View File

@ -0,0 +1,3 @@
let foo = 42
print(f'{foo}' '{bar}' r'{\baz}')

View File

@ -0,0 +1 @@
42{bar}{\baz}