mirror of https://github.com/rui314/chibicc
339 lines
7.2 KiB
C
339 lines
7.2 KiB
C
#include "chibicc.h"
|
|
|
|
char *filename;
|
|
char *user_input;
|
|
Token *token;
|
|
|
|
// Reports an error and exit.
|
|
void error(char *fmt, ...) {
|
|
va_list ap;
|
|
va_start(ap, fmt);
|
|
vfprintf(stderr, fmt, ap);
|
|
fprintf(stderr, "\n");
|
|
exit(1);
|
|
}
|
|
|
|
// Reports an error message in the following format and exit.
|
|
//
|
|
// foo.c:10: x = y + 1;
|
|
// ^ <error message here>
|
|
void verror_at(char *loc, char *fmt, va_list ap) {
|
|
// Find a line containing `loc`.
|
|
char *line = loc;
|
|
while (user_input < line && line[-1] != '\n')
|
|
line--;
|
|
|
|
char *end = loc;
|
|
while (*end != '\n')
|
|
end++;
|
|
|
|
// Get a line number.
|
|
int line_num = 1;
|
|
for (char *p = user_input; p < line; p++)
|
|
if (*p == '\n')
|
|
line_num++;
|
|
|
|
// Print out the line.
|
|
int indent = fprintf(stderr, "%s:%d: ", filename, line_num);
|
|
fprintf(stderr, "%.*s\n", (int)(end - line), line);
|
|
|
|
// Show the error message.
|
|
int pos = loc - line + indent;
|
|
fprintf(stderr, "%*s", pos, ""); // print pos spaces.
|
|
fprintf(stderr, "^ ");
|
|
vfprintf(stderr, fmt, ap);
|
|
fprintf(stderr, "\n");
|
|
}
|
|
|
|
// Reports an error location and exit.
|
|
void error_at(char *loc, char *fmt, ...) {
|
|
va_list ap;
|
|
va_start(ap, fmt);
|
|
verror_at(loc, fmt, ap);
|
|
exit(1);
|
|
}
|
|
|
|
// Reports an error location and exit.
|
|
void error_tok(Token *tok, char *fmt, ...) {
|
|
va_list ap;
|
|
va_start(ap, fmt);
|
|
if (tok) {
|
|
verror_at(tok->str, fmt, ap);
|
|
} else {
|
|
vfprintf(stderr, fmt, ap);
|
|
fprintf(stderr, "\n");
|
|
}
|
|
exit(1);
|
|
}
|
|
|
|
void warn_tok(Token *tok, char *fmt, ...) {
|
|
va_list ap;
|
|
va_start(ap, fmt);
|
|
if (tok) {
|
|
verror_at(tok->str, fmt, ap);
|
|
} else {
|
|
vfprintf(stderr, fmt, ap);
|
|
fprintf(stderr, "\n");
|
|
}
|
|
}
|
|
|
|
char *strndup(char *p, int len) {
|
|
char *buf = malloc(len + 1);
|
|
strncpy(buf, p, len);
|
|
buf[len] = '\0';
|
|
return buf;
|
|
}
|
|
|
|
// Returns true if the current token matches a given string.
|
|
Token *peek(char *s) {
|
|
if (token->kind != TK_RESERVED || strlen(s) != token->len ||
|
|
memcmp(token->str, s, token->len))
|
|
return NULL;
|
|
return token;
|
|
}
|
|
|
|
// Consumes the current token if it matches a given string.
|
|
Token *consume(char *s) {
|
|
if (!peek(s))
|
|
return NULL;
|
|
Token *t = token;
|
|
token = token->next;
|
|
return t;
|
|
}
|
|
|
|
// Consumes the current token if it is an identifier.
|
|
Token *consume_ident() {
|
|
if (token->kind != TK_IDENT)
|
|
return NULL;
|
|
Token *t = token;
|
|
token = token->next;
|
|
return t;
|
|
}
|
|
|
|
// Ensure that the current token is a given string
|
|
void expect(char *s) {
|
|
if (!peek(s))
|
|
error_tok(token, "expected \"%s\"", s);
|
|
token = token->next;
|
|
}
|
|
|
|
// Ensure that the current token is TK_NUM.
|
|
long expect_number() {
|
|
if (token->kind != TK_NUM)
|
|
error_tok(token, "expected a number");
|
|
long val = token->val;
|
|
token = token->next;
|
|
return val;
|
|
}
|
|
|
|
// Ensure that the current token is TK_IDENT.
|
|
char *expect_ident() {
|
|
if (token->kind != TK_IDENT)
|
|
error_tok(token, "expected an identifier");
|
|
char *s = strndup(token->str, token->len);
|
|
token = token->next;
|
|
return s;
|
|
}
|
|
|
|
bool at_eof() {
|
|
return token->kind == TK_EOF;
|
|
}
|
|
|
|
// Create a new token and add it as the next token of `cur`.
|
|
Token *new_token(TokenKind kind, Token *cur, char *str, int len) {
|
|
Token *tok = calloc(1, sizeof(Token));
|
|
tok->kind = kind;
|
|
tok->str = str;
|
|
tok->len = len;
|
|
cur->next = tok;
|
|
return tok;
|
|
}
|
|
|
|
bool startswith(char *p, char *q) {
|
|
return memcmp(p, q, strlen(q)) == 0;
|
|
}
|
|
|
|
bool is_alpha(char c) {
|
|
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_';
|
|
}
|
|
|
|
bool is_alnum(char c) {
|
|
return is_alpha(c) || ('0' <= c && c <= '9');
|
|
}
|
|
|
|
char *starts_with_reserved(char *p) {
|
|
// Keyword
|
|
static char *kw[] = {"return", "if", "else", "while", "for", "int",
|
|
"char", "sizeof", "struct", "typedef", "short",
|
|
"long", "void", "_Bool", "enum", "static", "break",
|
|
"continue", "goto", "switch", "case", "default"};
|
|
|
|
for (int i = 0; i < sizeof(kw) / sizeof(*kw); i++) {
|
|
int len = strlen(kw[i]);
|
|
if (startswith(p, kw[i]) && !is_alnum(p[len]))
|
|
return kw[i];
|
|
}
|
|
|
|
// Multi-letter punctuator
|
|
static char *ops[] = {"<<=", ">>=", "==", "!=", "<=", ">=", "->",
|
|
"++", "--", "<<", ">>", "+=", "-=", "*=",
|
|
"/=", "&&", "||"};
|
|
|
|
for (int i = 0; i < sizeof(ops) / sizeof(*ops); i++)
|
|
if (startswith(p, ops[i]))
|
|
return ops[i];
|
|
|
|
return NULL;
|
|
}
|
|
|
|
char get_escape_char(char c) {
|
|
switch (c) {
|
|
case 'a': return '\a';
|
|
case 'b': return '\b';
|
|
case 't': return '\t';
|
|
case 'n': return '\n';
|
|
case 'v': return '\v';
|
|
case 'f': return '\f';
|
|
case 'r': return '\r';
|
|
case 'e': return 27;
|
|
case '0': return 0;
|
|
default: return c;
|
|
}
|
|
}
|
|
|
|
Token *read_string_literal(Token *cur, char *start) {
|
|
char *p = start + 1;
|
|
char buf[1024];
|
|
int len = 0;
|
|
|
|
for (;;) {
|
|
if (len == sizeof(buf))
|
|
error_at(start, "string literal too large");
|
|
if (*p == '\0')
|
|
error_at(start, "unclosed string literal");
|
|
if (*p == '"')
|
|
break;
|
|
|
|
if (*p == '\\') {
|
|
p++;
|
|
buf[len++] = get_escape_char(*p++);
|
|
} else {
|
|
buf[len++] = *p++;
|
|
}
|
|
}
|
|
|
|
Token *tok = new_token(TK_STR, cur, start, p - start + 1);
|
|
tok->contents = malloc(len + 1);
|
|
memcpy(tok->contents, buf, len);
|
|
tok->contents[len] = '\0';
|
|
tok->cont_len = len + 1;
|
|
return tok;
|
|
}
|
|
|
|
Token *read_char_literal(Token *cur, char *start) {
|
|
char *p = start + 1;
|
|
if (*p == '\0')
|
|
error_at(start, "unclosed char literal");
|
|
|
|
char c;
|
|
if (*p == '\\') {
|
|
p++;
|
|
c = get_escape_char(*p++);
|
|
} else {
|
|
c = *p++;
|
|
}
|
|
|
|
if (*p != '\'')
|
|
error_at(start, "char literal too long");
|
|
p++;
|
|
|
|
Token *tok = new_token(TK_NUM, cur, start, p - start);
|
|
tok->val = c;
|
|
return tok;
|
|
}
|
|
|
|
// Tokenize `user_input` and returns new tokens.
|
|
Token *tokenize() {
|
|
char *p = user_input;
|
|
Token head;
|
|
head.next = NULL;
|
|
Token *cur = &head;
|
|
|
|
while (*p) {
|
|
// Skip whitespace characters.
|
|
if (isspace(*p)) {
|
|
p++;
|
|
continue;
|
|
}
|
|
|
|
// Skip line comments.
|
|
if (startswith(p, "//")) {
|
|
p += 2;
|
|
while (*p != '\n')
|
|
p++;
|
|
continue;
|
|
}
|
|
|
|
// Skip block comments.
|
|
if (startswith(p, "/*")) {
|
|
char *q = strstr(p + 2, "*/");
|
|
if (!q)
|
|
error_at(p, "unclosed block comment");
|
|
p = q + 2;
|
|
continue;
|
|
}
|
|
|
|
// Keyword or multi-letter punctuator
|
|
char *kw = starts_with_reserved(p);
|
|
if (kw) {
|
|
int len = strlen(kw);
|
|
cur = new_token(TK_RESERVED, cur, p, len);
|
|
p += len;
|
|
continue;
|
|
}
|
|
|
|
// Single-letter punctuator
|
|
if (strchr("+-*/()<>;={},&[].,!~|^:?", *p)) {
|
|
cur = new_token(TK_RESERVED, cur, p++, 1);
|
|
continue;
|
|
}
|
|
|
|
// Identifier
|
|
if (is_alpha(*p)) {
|
|
char *q = p++;
|
|
while (is_alnum(*p))
|
|
p++;
|
|
cur = new_token(TK_IDENT, cur, q, p - q);
|
|
continue;
|
|
}
|
|
|
|
// String literal
|
|
if (*p == '"') {
|
|
cur = read_string_literal(cur, p);
|
|
p += cur->len;
|
|
continue;
|
|
}
|
|
|
|
// Character literal
|
|
if (*p == '\'') {
|
|
cur = read_char_literal(cur, p);
|
|
p += cur->len;
|
|
continue;
|
|
}
|
|
|
|
// Integer literal
|
|
if (isdigit(*p)) {
|
|
cur = new_token(TK_NUM, cur, p, 0);
|
|
char *q = p;
|
|
cur->val = strtol(p, &p, 10);
|
|
cur->len = p - q;
|
|
continue;
|
|
}
|
|
|
|
error_at(p, "invalid token");
|
|
}
|
|
|
|
new_token(TK_EOF, cur, p, 0);
|
|
return head.next;
|
|
}
|