2020-08-18 03:41:59 +03:00
|
|
|
#include "chibicc.h"
|
|
|
|
|
2020-03-29 07:29:48 +03:00
|
|
|
typedef struct Macro Macro;
|
|
|
|
struct Macro {
|
|
|
|
Macro *next;
|
|
|
|
char *name;
|
|
|
|
Token *body;
|
2020-03-29 12:23:33 +03:00
|
|
|
bool deleted;
|
2020-03-29 07:29:48 +03:00
|
|
|
};
|
|
|
|
|
2020-08-20 13:36:36 +03:00
|
|
|
// `#if` can be nested, so we use a stack to manage nested `#if`s.
|
|
|
|
typedef struct CondIncl CondIncl;
|
|
|
|
struct CondIncl {
|
|
|
|
CondIncl *next;
|
2020-03-29 02:51:06 +03:00
|
|
|
enum { IN_THEN, IN_ELIF, IN_ELSE } ctx;
|
2020-08-20 13:36:36 +03:00
|
|
|
Token *tok;
|
2020-03-30 03:57:07 +03:00
|
|
|
bool included;
|
2020-08-20 13:36:36 +03:00
|
|
|
};
|
|
|
|
|
2020-03-29 07:29:48 +03:00
|
|
|
static Macro *macros;
|
2020-08-20 13:36:36 +03:00
|
|
|
static CondIncl *cond_incl;
|
|
|
|
|
2020-08-20 13:36:49 +03:00
|
|
|
static Token *preprocess2(Token *tok);
|
|
|
|
|
2020-09-03 13:24:23 +03:00
|
|
|
static char *format(char *fmt, ...) {
|
|
|
|
va_list ap;
|
|
|
|
va_start(ap, fmt);
|
|
|
|
|
|
|
|
char *buf;
|
|
|
|
size_t buflen;
|
|
|
|
FILE *out = open_memstream(&buf, &buflen);
|
|
|
|
vfprintf(out, fmt, ap);
|
|
|
|
fclose(out);
|
|
|
|
return buf;
|
|
|
|
}
|
2020-08-20 13:36:49 +03:00
|
|
|
|
2020-03-30 03:30:06 +03:00
|
|
|
static bool is_hash(Token *tok) {
|
|
|
|
return tok->at_bol && equal(tok, "#");
|
|
|
|
}
|
|
|
|
|
2020-04-21 04:46:26 +03:00
|
|
|
// Some preprocessor directives such as #include allow extraneous
|
|
|
|
// tokens before newline. This function skips such tokens.
|
|
|
|
static Token *skip_line(Token *tok) {
|
|
|
|
if (tok->at_bol)
|
|
|
|
return tok;
|
|
|
|
warn_tok(tok, "extra token");
|
|
|
|
while (tok->at_bol)
|
|
|
|
tok = tok->next;
|
|
|
|
return tok;
|
|
|
|
}
|
|
|
|
|
2020-09-03 13:24:23 +03:00
|
|
|
static Token *copy_token(Token *tok) {
|
|
|
|
Token *t = calloc(1, sizeof(Token));
|
|
|
|
*t = *tok;
|
|
|
|
t->next = NULL;
|
|
|
|
return t;
|
|
|
|
}
|
|
|
|
|
2020-08-20 13:36:36 +03:00
|
|
|
static Token *new_eof(Token *tok) {
|
|
|
|
Token *t = copy_token(tok);
|
|
|
|
t->kind = TK_EOF;
|
|
|
|
t->len = 0;
|
|
|
|
return t;
|
|
|
|
}
|
|
|
|
|
2020-09-03 13:24:23 +03:00
|
|
|
// Append tok2 to the end of tok1.
|
|
|
|
static Token *append(Token *tok1, Token *tok2) {
|
2020-03-29 07:29:48 +03:00
|
|
|
if (tok1->kind == TK_EOF)
|
2020-09-03 13:24:23 +03:00
|
|
|
return tok2;
|
|
|
|
|
|
|
|
Token head = {};
|
|
|
|
Token *cur = &head;
|
|
|
|
|
2020-03-29 07:29:48 +03:00
|
|
|
for (; tok1->kind != TK_EOF; tok1 = tok1->next)
|
2020-09-03 13:24:23 +03:00
|
|
|
cur = cur->next = copy_token(tok1);
|
|
|
|
cur->next = tok2;
|
|
|
|
return head.next;
|
|
|
|
}
|
|
|
|
|
2020-03-30 03:57:07 +03:00
|
|
|
static Token *skip_cond_incl2(Token *tok) {
|
|
|
|
while (tok->kind != TK_EOF) {
|
|
|
|
if (is_hash(tok) && equal(tok->next, "if")) {
|
|
|
|
tok = skip_cond_incl2(tok->next->next);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (is_hash(tok) && equal(tok->next, "endif"))
|
|
|
|
return tok->next->next;
|
|
|
|
tok = tok->next;
|
|
|
|
}
|
|
|
|
return tok;
|
|
|
|
}
|
|
|
|
|
2020-03-29 02:51:06 +03:00
|
|
|
// Skip until next `#else`, `#elif` or `#endif`.
|
2020-03-30 03:54:47 +03:00
|
|
|
// Nested `#if` and `#endif` are skipped.
|
2020-08-20 13:36:36 +03:00
|
|
|
static Token *skip_cond_incl(Token *tok) {
|
|
|
|
while (tok->kind != TK_EOF) {
|
2020-03-30 03:54:47 +03:00
|
|
|
if (is_hash(tok) && equal(tok->next, "if")) {
|
2020-03-30 03:57:07 +03:00
|
|
|
tok = skip_cond_incl2(tok->next->next);
|
2020-03-30 03:54:47 +03:00
|
|
|
continue;
|
|
|
|
}
|
2020-03-30 03:57:07 +03:00
|
|
|
|
|
|
|
if (is_hash(tok) &&
|
2020-03-29 02:51:06 +03:00
|
|
|
(equal(tok->next, "elif") || equal(tok->next, "else") ||
|
|
|
|
equal(tok->next, "endif")))
|
2020-03-30 03:54:47 +03:00
|
|
|
break;
|
2020-08-20 13:36:36 +03:00
|
|
|
tok = tok->next;
|
|
|
|
}
|
|
|
|
return tok;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Copy all tokens until the next newline, terminate them with
|
|
|
|
// an EOF token and then returns them. This function is used to
|
|
|
|
// create a new list of tokens for `#if` arguments.
|
|
|
|
static Token *copy_line(Token **rest, Token *tok) {
|
|
|
|
Token head = {};
|
|
|
|
Token *cur = &head;
|
|
|
|
|
|
|
|
for (; !tok->at_bol; tok = tok->next)
|
|
|
|
cur = cur->next = copy_token(tok);
|
|
|
|
|
|
|
|
cur->next = new_eof(tok);
|
|
|
|
*rest = tok;
|
|
|
|
return head.next;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read and evaluate a constant expression.
|
|
|
|
static long eval_const_expr(Token **rest, Token *tok) {
|
|
|
|
Token *start = tok;
|
|
|
|
Token *expr = copy_line(rest, tok->next);
|
2020-08-20 13:36:49 +03:00
|
|
|
expr = preprocess2(expr);
|
2020-08-20 13:36:36 +03:00
|
|
|
|
|
|
|
if (expr->kind == TK_EOF)
|
|
|
|
error_tok(start, "no expression");
|
|
|
|
|
|
|
|
Token *rest2;
|
|
|
|
long val = const_expr(&rest2, expr);
|
|
|
|
if (rest2->kind != TK_EOF)
|
|
|
|
error_tok(rest2, "extra token");
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
2020-03-30 03:57:07 +03:00
|
|
|
static CondIncl *push_cond_incl(Token *tok, bool included) {
|
2020-08-20 13:36:36 +03:00
|
|
|
CondIncl *ci = calloc(1, sizeof(CondIncl));
|
|
|
|
ci->next = cond_incl;
|
2020-03-30 03:57:07 +03:00
|
|
|
ci->ctx = IN_THEN;
|
2020-08-20 13:36:36 +03:00
|
|
|
ci->tok = tok;
|
2020-03-30 03:57:07 +03:00
|
|
|
ci->included = included;
|
2020-08-20 13:36:36 +03:00
|
|
|
cond_incl = ci;
|
|
|
|
return ci;
|
|
|
|
}
|
|
|
|
|
2020-03-29 07:29:48 +03:00
|
|
|
static Macro *find_macro(Token *tok) {
|
|
|
|
if (tok->kind != TK_IDENT)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
for (Macro *m = macros; m; m = m->next)
|
|
|
|
if (strlen(m->name) == tok->len && !strncmp(m->name, tok->loc, tok->len))
|
2020-03-29 12:23:33 +03:00
|
|
|
return m->deleted ? NULL : m;
|
2020-03-29 07:29:48 +03:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static Macro *add_macro(char *name, Token *body) {
|
|
|
|
Macro *m = calloc(1, sizeof(Macro));
|
|
|
|
m->next = macros;
|
|
|
|
m->name = name;
|
|
|
|
m->body = body;
|
|
|
|
macros = m;
|
|
|
|
return m;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If tok is a macro, expand it and return true.
|
|
|
|
// Otherwise, do nothing and return false.
|
|
|
|
static bool expand_macro(Token **rest, Token *tok) {
|
|
|
|
Macro *m = find_macro(tok);
|
|
|
|
if (!m)
|
|
|
|
return false;
|
|
|
|
*rest = append(m->body, tok->next);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-03-30 03:30:06 +03:00
|
|
|
// Visit all tokens in `tok` while evaluating preprocessing
|
|
|
|
// macros and directives.
|
|
|
|
static Token *preprocess2(Token *tok) {
|
|
|
|
Token head = {};
|
|
|
|
Token *cur = &head;
|
|
|
|
|
|
|
|
while (tok->kind != TK_EOF) {
|
2020-03-29 07:29:48 +03:00
|
|
|
// If it is a macro, expand it.
|
|
|
|
if (expand_macro(&tok, tok))
|
|
|
|
continue;
|
|
|
|
|
2020-03-30 03:30:06 +03:00
|
|
|
// Pass through if it is not a "#".
|
|
|
|
if (!is_hash(tok)) {
|
|
|
|
cur = cur->next = tok;
|
|
|
|
tok = tok->next;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-08-20 13:36:36 +03:00
|
|
|
Token *start = tok;
|
2020-03-30 03:30:06 +03:00
|
|
|
tok = tok->next;
|
|
|
|
|
2020-09-03 13:24:23 +03:00
|
|
|
if (equal(tok, "include")) {
|
|
|
|
tok = tok->next;
|
|
|
|
|
|
|
|
if (tok->kind != TK_STR)
|
|
|
|
error_tok(tok, "expected a filename");
|
|
|
|
|
2020-08-20 15:37:02 +03:00
|
|
|
char *path;
|
|
|
|
if (tok->str[0] == '/')
|
|
|
|
path = tok->str;
|
|
|
|
else
|
|
|
|
path = format("%s/%s", dirname(strdup(tok->file->name)), tok->str);
|
|
|
|
|
2020-09-03 13:24:23 +03:00
|
|
|
Token *tok2 = tokenize_file(path);
|
|
|
|
if (!tok2)
|
|
|
|
error_tok(tok, "%s", strerror(errno));
|
2020-04-21 04:46:26 +03:00
|
|
|
tok = skip_line(tok->next);
|
|
|
|
tok = append(tok2, tok);
|
2020-09-03 13:24:23 +03:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-03-29 07:29:48 +03:00
|
|
|
if (equal(tok, "define")) {
|
|
|
|
tok = tok->next;
|
|
|
|
if (tok->kind != TK_IDENT)
|
|
|
|
error_tok(tok, "macro name must be an identifier");
|
|
|
|
char *name = strndup(tok->loc, tok->len);
|
|
|
|
add_macro(name, copy_line(&tok, tok->next));
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-03-29 12:23:33 +03:00
|
|
|
if (equal(tok, "undef")) {
|
|
|
|
tok = tok->next;
|
|
|
|
if (tok->kind != TK_IDENT)
|
|
|
|
error_tok(tok, "macro name must be an identifier");
|
|
|
|
char *name = strndup(tok->loc, tok->len);
|
|
|
|
tok = skip_line(tok->next);
|
|
|
|
|
|
|
|
Macro *m = add_macro(name, NULL);
|
|
|
|
m->deleted = true;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-08-20 13:36:36 +03:00
|
|
|
if (equal(tok, "if")) {
|
|
|
|
long val = eval_const_expr(&tok, tok);
|
2020-03-30 03:57:07 +03:00
|
|
|
push_cond_incl(start, val);
|
2020-08-20 13:36:36 +03:00
|
|
|
if (!val)
|
|
|
|
tok = skip_cond_incl(tok);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-03-29 02:51:06 +03:00
|
|
|
if (equal(tok, "elif")) {
|
|
|
|
if (!cond_incl || cond_incl->ctx == IN_ELSE)
|
|
|
|
error_tok(start, "stray #elif");
|
|
|
|
cond_incl->ctx = IN_ELIF;
|
|
|
|
|
|
|
|
if (!cond_incl->included && eval_const_expr(&tok, tok))
|
|
|
|
cond_incl->included = true;
|
|
|
|
else
|
|
|
|
tok = skip_cond_incl(tok);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-03-30 03:57:07 +03:00
|
|
|
if (equal(tok, "else")) {
|
|
|
|
if (!cond_incl || cond_incl->ctx == IN_ELSE)
|
|
|
|
error_tok(start, "stray #else");
|
|
|
|
cond_incl->ctx = IN_ELSE;
|
|
|
|
tok = skip_line(tok->next);
|
|
|
|
|
|
|
|
if (cond_incl->included)
|
|
|
|
tok = skip_cond_incl(tok);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-08-20 13:36:36 +03:00
|
|
|
if (equal(tok, "endif")) {
|
|
|
|
if (!cond_incl)
|
|
|
|
error_tok(start, "stray #endif");
|
|
|
|
cond_incl = cond_incl->next;
|
|
|
|
tok = skip_line(tok->next);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-03-30 03:30:06 +03:00
|
|
|
// `#`-only line is legal. It's called a null directive.
|
|
|
|
if (tok->at_bol)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
error_tok(tok, "invalid preprocessor directive");
|
|
|
|
}
|
|
|
|
|
|
|
|
cur->next = tok;
|
|
|
|
return head.next;
|
|
|
|
}
|
|
|
|
|
2020-08-18 03:41:59 +03:00
|
|
|
// Entry point function of the preprocessor.
|
|
|
|
Token *preprocess(Token *tok) {
|
2020-03-30 03:30:06 +03:00
|
|
|
tok = preprocess2(tok);
|
2020-08-20 13:36:36 +03:00
|
|
|
if (cond_incl)
|
|
|
|
error_tok(cond_incl->tok, "unterminated conditional directive");
|
2020-08-18 03:41:59 +03:00
|
|
|
convert_keywords(tok);
|
|
|
|
return tok;
|
|
|
|
}
|