Fix up string escapes, make sure we're handling nil bytes when printing

This commit is contained in:
K. Lange 2021-01-11 11:41:26 +09:00
parent faba2e9e3f
commit 213c496372
4 changed files with 65 additions and 9 deletions

View File

@ -1490,6 +1490,10 @@ static void unary(int canAssign) {
}
}
static int isHex(int c) {
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
}
static void string(int type) {
/* We'll just build with a flexible array like everything else. */
size_t stringCapacity = 0;
@ -1504,13 +1508,22 @@ static void string(int type) {
do {
int type = parser.previous.type == TOKEN_BIG_STRING ? 3 : 1;
const char * c = parser.previous.start + type;
while (c < parser.previous.start + parser.previous.length - type) {
const char * end = parser.previous.start + parser.previous.length - type;
while (c < end) {
if (*c == '\\') {
switch (c[1]) {
case 'n': PUSH_CHAR('\n'); break;
case 'r': PUSH_CHAR('\r'); break;
case 't': PUSH_CHAR('\t'); break;
case '[': PUSH_CHAR('\033'); break;
case 'x':
if (c+2 == end || c+3 == end || !isHex(c[2]) || !isHex(c[3])) {
error("invalid \\x escape");
return;
}
PUSH_CHAR(strtoul((char[]){c[2],c[3],'\0'}, NULL, 16));
c += 2;
break;
case '\n': break;
default: PUSH_CHAR(c[1]); break;
}

15
modules/string.krk Normal file
View File

@ -0,0 +1,15 @@
'''
Helpful string constants.
'''
let ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
let ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
let ascii_letters = ascii_lowercase + ascii_uppercase
let digits = '0123456789'
let hexdigits = digits + 'abcdefABCDEF'
let octdigits = '01234567'
let punctuation = '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
let whitespace = '\t\n\x0b\x0c\r '
let printable = digits + ascii_letters + punctuation + whitespace

View File

@ -511,7 +511,14 @@ void paint_krk_string(struct syntax_state * state, int type) {
paint(1, FLAG_STRING);
return;
} else if (charat() == '\\') {
paint(2, FLAG_ESCAPE);
if (nextchar() == 'x') {
paint(2, FLAG_ESCAPE);
/* Why is my FLAG_ERROR not valid in rline? */
paint(1, isxdigit(charat()) ? FLAG_ESCAPE : FLAG_DIFFMINUS);
paint(1, isxdigit(charat()) ? FLAG_ESCAPE : FLAG_DIFFMINUS);
} else {
paint(2, FLAG_ESCAPE);
}
} else {
paint(1, FLAG_STRING);
}

35
vm.c
View File

@ -1413,8 +1413,8 @@ static KrkValue _char_to_int(int argc, KrkValue argv[]) {
static KrkValue _print(int argc, KrkValue argv[], int hasKw) {
KrkValue sepVal, endVal;
char * sep = " ";
char * end = "\n";
char * sep = " "; size_t sepLen = 1;
char * end = "\n"; size_t endLen = 1;
if (hasKw) {
argc--;
KrkValue _dict_internal = OBJECT_VAL(AS_INSTANCE(argv[argc])->_internal);
@ -1424,6 +1424,7 @@ static KrkValue _print(int argc, KrkValue argv[], int hasKw) {
return NONE_VAL();
}
sep = AS_CSTRING(sepVal);
sepLen = AS_STRING(sepVal)->length;
}
if (krk_tableGet(AS_DICT(_dict_internal), OBJECT_VAL(S("end")), &endVal)) {
if (!IS_STRING(endVal)) {
@ -1431,16 +1432,23 @@ static KrkValue _print(int argc, KrkValue argv[], int hasKw) {
return NONE_VAL();
}
end = AS_CSTRING(endVal);
endLen = AS_STRING(endVal)->length;
}
}
for (int i = 0; i < argc; ++i) {
KrkValue printable = argv[i];
if (IS_STRING(printable)) { /* krk_printValue runs repr */
fprintf(stdout, "%s", AS_CSTRING(printable));
/* Make sure we handle nil bits correctly. */
for (size_t j = 0; j < AS_STRING(printable)->length; ++j) {
fputc(AS_CSTRING(printable)[j], stdout);
}
} else {
krk_printValue(stdout, printable);
}
fprintf(stdout, "%s", (i == argc - 1) ? end : sep);
char * thingToPrint = (i == argc - 1) ? end : sep;
for (size_t j = 0; j < ((i == argc - 1) ? endLen : sepLen); ++j) {
fputc(thingToPrint[j], stdout);
}
}
return NONE_VAL();
}
@ -2203,10 +2211,11 @@ static KrkValue _module_repr(int argc, KrkValue argv[]) {
* should escape characters like quotes.
*/
static KrkValue _repr_str(int argc, KrkValue argv[]) {
char * str = malloc(3 + AS_STRING(argv[0])->length * 2);
char * str = malloc(3 + AS_STRING(argv[0])->length * 4); /* x 4 because a string of all < 32s would be a lot of \xXX */
char * tmp = str;
*(tmp++) = '\'';
for (char * c = AS_CSTRING(argv[0]); *c; ++c) {
char * end = AS_CSTRING(argv[0]) + AS_STRING(argv[0])->length;
for (char * c = AS_CSTRING(argv[0]); c < end; ++c) {
switch (*c) {
/* XXX: Other non-printables should probably be escaped as well. */
case '\n': *(tmp++) = '\\'; *(tmp++) = 'n'; break;
@ -2215,7 +2224,19 @@ static KrkValue _repr_str(int argc, KrkValue argv[]) {
case '\'': *(tmp++) = '\\'; *(tmp++) = '\''; break;
case '\\': *(tmp++) = '\\'; *(tmp++) = '\\'; break;
case 27: *(tmp++) = '\\'; *(tmp++) = '['; break;
default: *(tmp++) = *c; break;
default: {
if ((unsigned char)*c < ' ') {
*(tmp++) = '\\';
*(tmp++) = 'x';
char hex[3];
sprintf(hex,"%02x", (unsigned char)*c);
*(tmp++) = hex[0];
*(tmp++) = hex[1];
} else {
*(tmp++) = *c;
}
break;
}
}
}
*(tmp++) = '\'';