Add \a, \b, \t, \n \v, \f, \r and \e

This commit is contained in:
Rui Ueyama 2019-08-06 22:36:19 +09:00
parent 35a0bcd366
commit ad7749f2fa
2 changed files with 70 additions and 6 deletions

18
test.sh
View File

@ -180,4 +180,22 @@ assert 99 'int main() { return "abc"[2]; }'
assert 0 'int main() { return "abc"[3]; }'
assert 4 'int main() { return sizeof("abc"); }'
assert 7 'int main() { return "\a"[0]; }'
assert 8 'int main() { return "\b"[0]; }'
assert 9 'int main() { return "\t"[0]; }'
assert 10 'int main() { return "\n"[0]; }'
assert 11 'int main() { return "\v"[0]; }'
assert 12 'int main() { return "\f"[0]; }'
assert 13 'int main() { return "\r"[0]; }'
assert 27 'int main() { return "\e"[0]; }'
assert 106 'int main() { return "\j"[0]; }'
assert 107 'int main() { return "\k"[0]; }'
assert 108 'int main() { return "\l"[0]; }'
assert 7 'int main() { return "\ax\ny"[0]; }'
assert 120 'int main() { return "\ax\ny"[1]; }'
assert 10 'int main() { return "\ax\ny"[2]; }'
assert 121 'int main() { return "\ax\ny"[3]; }'
echo OK

View File

@ -99,15 +99,61 @@ static bool is_keyword(Token *tok) {
return false;
}
static Token *read_string_literal(char *start) {
char *p = start + 1;
for (; *p != '"'; p++)
static int read_escaped_char(char *p) {
// Escape sequences are defined using themselves here. E.g.
// '\n' is implemented using '\n'. This tautological definition
// works because the compiler that compiles our compiler knows
// what '\n' actually is. In other words, we "inherit" the ASCII
// code of '\n' from the compiler that compiles our compiler,
// so we don't have to teach the actual code here.
//
// This fact has huge implications not only for the correctness
// of the compiler but also for the security of the generated code.
// For more info, read "Reflections on Trusting Trust" by Ken Thompson.
// https://github.com/rui314/chibicc/wiki/thompson1984.pdf
switch (*p) {
case 'a': return '\a';
case 'b': return '\b';
case 't': return '\t';
case 'n': return '\n';
case 'v': return '\v';
case 'f': return '\f';
case 'r': return '\r';
// [GNU] \e for the ASCII escape character is a GNU C extension.
case 'e': return 27;
default: return *p;
}
}
// Find a closing double-quote.
static char *string_literal_end(char *p) {
char *start = p;
for (; *p != '"'; p++) {
if (*p == '\n' || *p == '\0')
error_at(start, "unclosed string literal");
if (*p == '\\')
p++;
}
return p;
}
Token *tok = new_token(TK_STR, start, p + 1);
tok->ty = array_of(ty_char, p - start);
tok->str = strndup(start + 1, p - start - 1);
static Token *read_string_literal(char *start) {
char *end = string_literal_end(start + 1);
char *buf = calloc(1, end - start);
int len = 0;
for (char *p = start + 1; p < end;) {
if (*p == '\\') {
buf[len++] = read_escaped_char(p + 1);
p += 2;
} else {
buf[len++] = *p++;
}
}
Token *tok = new_token(TK_STR, start, end + 1);
tok->ty = array_of(ty_char, len + 1);
tok->str = buf;
return tok;
}