Add UTF-8 string literal

This commit is contained in:
Rui Ueyama 2020-09-27 19:47:36 +09:00
parent 2dac3afece
commit 57b21fe902
2 changed files with 17 additions and 5 deletions

View File

@ -34,6 +34,11 @@ int main() {
ASSERT(0, strcmp(STR(U'a'), "U'a'"));
ASSERT(4, sizeof(u8"abc"));
ASSERT(0, strcmp(u8"abc", "abc"));
ASSERT(0, strcmp(STR(u8"a"), "u8\"a\""));
printf("OK\n");
return 0;
}

View File

@ -228,12 +228,12 @@ static char *string_literal_end(char *p) {
return p;
}
static Token *read_string_literal(char *start) {
char *end = string_literal_end(start + 1);
char *buf = calloc(1, end - start);
static Token *read_string_literal(char *start, char *quote) {
char *end = string_literal_end(quote + 1);
char *buf = calloc(1, end - quote);
int len = 0;
for (char *p = start + 1; p < end;) {
for (char *p = quote + 1; p < end;) {
if (*p == '\\')
buf[len++] = read_escaped_char(&p, p + 1);
else
@ -467,7 +467,14 @@ Token *tokenize(File *file) {
// String literal
if (*p == '"') {
cur = cur->next = read_string_literal(p);
cur = cur->next = read_string_literal(p, p);
p += cur->len;
continue;
}
// UTF-8 string literal
if (startswith(p, "u8\"")) {
cur = cur->next = read_string_literal(p, p + 2);
p += cur->len;
continue;
}