mirror of https://github.com/rui314/chibicc
Add \a, \b, \t, \n \v, \f, \r and \e
This commit is contained in:
parent
35a0bcd366
commit
ad7749f2fa
18
test.sh
18
test.sh
|
@ -180,4 +180,22 @@ assert 99 'int main() { return "abc"[2]; }'
|
|||
assert 0 'int main() { return "abc"[3]; }'
|
||||
assert 4 'int main() { return sizeof("abc"); }'
|
||||
|
||||
assert 7 'int main() { return "\a"[0]; }'
|
||||
assert 8 'int main() { return "\b"[0]; }'
|
||||
assert 9 'int main() { return "\t"[0]; }'
|
||||
assert 10 'int main() { return "\n"[0]; }'
|
||||
assert 11 'int main() { return "\v"[0]; }'
|
||||
assert 12 'int main() { return "\f"[0]; }'
|
||||
assert 13 'int main() { return "\r"[0]; }'
|
||||
assert 27 'int main() { return "\e"[0]; }'
|
||||
|
||||
assert 106 'int main() { return "\j"[0]; }'
|
||||
assert 107 'int main() { return "\k"[0]; }'
|
||||
assert 108 'int main() { return "\l"[0]; }'
|
||||
|
||||
assert 7 'int main() { return "\ax\ny"[0]; }'
|
||||
assert 120 'int main() { return "\ax\ny"[1]; }'
|
||||
assert 10 'int main() { return "\ax\ny"[2]; }'
|
||||
assert 121 'int main() { return "\ax\ny"[3]; }'
|
||||
|
||||
echo OK
|
||||
|
|
58
tokenize.c
58
tokenize.c
|
@ -99,15 +99,61 @@ static bool is_keyword(Token *tok) {
|
|||
return false;
|
||||
}
|
||||
|
||||
static Token *read_string_literal(char *start) {
|
||||
char *p = start + 1;
|
||||
for (; *p != '"'; p++)
|
||||
static int read_escaped_char(char *p) {
|
||||
// Escape sequences are defined using themselves here. E.g.
|
||||
// '\n' is implemented using '\n'. This tautological definition
|
||||
// works because the compiler that compiles our compiler knows
|
||||
// what '\n' actually is. In other words, we "inherit" the ASCII
|
||||
// code of '\n' from the compiler that compiles our compiler,
|
||||
// so we don't have to teach the actual code here.
|
||||
//
|
||||
// This fact has huge implications not only for the correctness
|
||||
// of the compiler but also for the security of the generated code.
|
||||
// For more info, read "Reflections on Trusting Trust" by Ken Thompson.
|
||||
// https://github.com/rui314/chibicc/wiki/thompson1984.pdf
|
||||
switch (*p) {
|
||||
case 'a': return '\a';
|
||||
case 'b': return '\b';
|
||||
case 't': return '\t';
|
||||
case 'n': return '\n';
|
||||
case 'v': return '\v';
|
||||
case 'f': return '\f';
|
||||
case 'r': return '\r';
|
||||
// [GNU] \e for the ASCII escape character is a GNU C extension.
|
||||
case 'e': return 27;
|
||||
default: return *p;
|
||||
}
|
||||
}
|
||||
|
||||
// Find a closing double-quote.
|
||||
static char *string_literal_end(char *p) {
|
||||
char *start = p;
|
||||
for (; *p != '"'; p++) {
|
||||
if (*p == '\n' || *p == '\0')
|
||||
error_at(start, "unclosed string literal");
|
||||
if (*p == '\\')
|
||||
p++;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
Token *tok = new_token(TK_STR, start, p + 1);
|
||||
tok->ty = array_of(ty_char, p - start);
|
||||
tok->str = strndup(start + 1, p - start - 1);
|
||||
static Token *read_string_literal(char *start) {
|
||||
char *end = string_literal_end(start + 1);
|
||||
char *buf = calloc(1, end - start);
|
||||
int len = 0;
|
||||
|
||||
for (char *p = start + 1; p < end;) {
|
||||
if (*p == '\\') {
|
||||
buf[len++] = read_escaped_char(p + 1);
|
||||
p += 2;
|
||||
} else {
|
||||
buf[len++] = *p++;
|
||||
}
|
||||
}
|
||||
|
||||
Token *tok = new_token(TK_STR, start, end + 1);
|
||||
tok->ty = array_of(ty_char, len + 1);
|
||||
tok->str = buf;
|
||||
return tok;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue