mirror of
https://github.com/rui314/chibicc
synced 2025-02-19 23:23:57 +03:00
Skip UTF-8 BOM markers
This commit is contained in:
parent
238277714d
commit
2b2fa25507
@ -109,4 +109,8 @@ $chibicc -c -O -Wall -g -std=c11 -ffreestanding -fno-builtin \
|
||||
-m64 -mno-red-zone -w -o /dev/null $tmp/empty.c
|
||||
check 'ignored options'
|
||||
|
||||
# BOM marker
|
||||
printf '\xef\xbb\xbfxyz\n' | $chibicc -E -o- - | grep -q '^xyz'
|
||||
check 'BOM marker'
|
||||
|
||||
echo OK
|
||||
|
@ -772,6 +772,13 @@ Token *tokenize_file(char *path) {
|
||||
if (!p)
|
||||
return NULL;
|
||||
|
||||
// UTF-8 texts may start with a 3-byte "BOM" marker sequence.
|
||||
// If exists, just skip them because they are useless bytes.
|
||||
// (It is actually not recommended to add BOM markers to UTF-8
|
||||
// texts, but it's not uncommon particularly on Windows.)
|
||||
if (!memcmp(p, "\xef\xbb\xbf", 3))
|
||||
p += 3;
|
||||
|
||||
canonicalize_newline(p);
|
||||
remove_backslash_newline(p);
|
||||
convert_universal_chars(p);
|
||||
|
Loading…
x
Reference in New Issue
Block a user