From cae061af2b65ad0962fb4b6fe3b55abe2f3a5bf8 Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Sun, 5 Jul 2020 20:49:58 +0900 Subject: [PATCH] Add wide string literal --- test/unicode.c | 13 +++++++++++++ tokenize.c | 7 +++++++ 2 files changed, 20 insertions(+) diff --git a/test/unicode.c b/test/unicode.c index 1b36547..235a6ba 100644 --- a/test/unicode.c +++ b/test/unicode.c @@ -64,6 +64,19 @@ int main() { ASSERT(0, strcmp(STR(U"a"), "U\"a\"")); + ASSERT(4, sizeof(L"")); + ASSERT(20, sizeof(L"\xffzzz")); + ASSERT(0, memcmp(L"", "\0\0\0\0", 4)); + ASSERT(0, memcmp(L"abc", "a\0\0\0b\0\0\0c\0\0\0\0\0\0\0", 16)); + ASSERT(0, memcmp(L"日本語", "\345e\0\0,g\0\0\236\212\0\0\0\0\0\0", 16)); + ASSERT(0, memcmp(L"🍣", "c\363\001\0\0\0\0\0", 8)); + ASSERT(u'β', L"βb"[0]); + ASSERT(u'b', L"βb"[1]); + ASSERT(0, L"βb"[2]); + ASSERT(-1, L"\xffffffff"[0] >> 31); + + ASSERT(0, strcmp(STR(L"a"), "L\"a\"")); + printf("OK\n"); return 0; } diff --git a/tokenize.c b/tokenize.c index e5c7f38..8d6bc9d 100644 --- a/tokenize.c +++ b/tokenize.c @@ -544,6 +544,13 @@ Token *tokenize(File *file) { continue; } + // Wide string literal + if (startswith(p, "L\"")) { + cur = cur->next = read_utf32_string_literal(p, p + 1, ty_int); + p += cur->len; + continue; + } + // UTF-32 string literal if (startswith(p, "U\"")) { cur = cur->next = read_utf32_string_literal(p, p + 1, ty_uint);