2020-03-29 09:47:50 +03:00
|
|
|
// This file implements the C preprocessor.
|
|
|
|
//
|
|
|
|
// The preprocessor takes a list of tokens as an input and returns a
|
|
|
|
// new list of tokens as an output.
|
|
|
|
//
|
|
|
|
// The preprocessing language is designed in such a way that that's
|
|
|
|
// guaranteed to stop even if there is a recursive macro.
|
|
|
|
// Informally speaking, a macro is applied only once for each token.
|
|
|
|
// That is, if a macro token T appears in a result of direct or
|
|
|
|
// indirect macro expansion of T, T won't be expanded any further.
|
|
|
|
// For example, if T is defined as U, and U is defined as T, then
|
|
|
|
// token T is expanded to U and then to T and the macro expansion
|
|
|
|
// stops at that point.
|
|
|
|
//
|
|
|
|
// To achieve the above behavior, we attach for each token a set of
|
|
|
|
// macro names from which the token is expanded. The set is called
|
|
|
|
// "hideset". Hideset is initially empty, and every time we expand a
|
|
|
|
// macro, the macro name is added to the resulting tokens' hidesets.
|
|
|
|
//
|
2020-08-31 10:48:29 +03:00
|
|
|
// The above macro expansion algorithm is explained in this document
|
|
|
|
// written by Dave Prossor, which is used as a basis for the
|
|
|
|
// standard's wording:
|
2020-03-29 09:47:50 +03:00
|
|
|
// https://github.com/rui314/chibicc/wiki/cpp.algo.pdf
|
|
|
|
|
2020-08-18 03:41:59 +03:00
|
|
|
#include "chibicc.h"
|
|
|
|
|
2020-03-30 04:37:44 +03:00
|
|
|
typedef struct MacroParam MacroParam;
|
|
|
|
struct MacroParam {
|
|
|
|
MacroParam *next;
|
|
|
|
char *name;
|
|
|
|
};
|
|
|
|
|
|
|
|
typedef struct MacroArg MacroArg;
|
|
|
|
struct MacroArg {
|
|
|
|
MacroArg *next;
|
|
|
|
char *name;
|
|
|
|
Token *tok;
|
|
|
|
};
|
|
|
|
|
2020-03-29 07:29:48 +03:00
|
|
|
typedef struct Macro Macro;
|
|
|
|
struct Macro {
|
|
|
|
Macro *next;
|
|
|
|
char *name;
|
2020-08-18 04:45:03 +03:00
|
|
|
bool is_objlike; // Object-like or function-like
|
2020-03-30 04:37:44 +03:00
|
|
|
MacroParam *params;
|
2020-03-29 07:29:48 +03:00
|
|
|
Token *body;
|
2020-03-29 12:23:33 +03:00
|
|
|
bool deleted;
|
2020-03-29 07:29:48 +03:00
|
|
|
};
|
|
|
|
|
2020-08-20 13:36:36 +03:00
|
|
|
// `#if` can be nested, so we use a stack to manage nested `#if`s.
|
|
|
|
typedef struct CondIncl CondIncl;
|
|
|
|
struct CondIncl {
|
|
|
|
CondIncl *next;
|
2020-03-29 02:51:06 +03:00
|
|
|
enum { IN_THEN, IN_ELIF, IN_ELSE } ctx;
|
2020-08-20 13:36:36 +03:00
|
|
|
Token *tok;
|
2020-03-30 03:57:07 +03:00
|
|
|
bool included;
|
2020-08-20 13:36:36 +03:00
|
|
|
};
|
|
|
|
|
2020-03-29 09:47:50 +03:00
|
|
|
typedef struct Hideset Hideset;
|
|
|
|
struct Hideset {
|
|
|
|
Hideset *next;
|
|
|
|
char *name;
|
|
|
|
};
|
|
|
|
|
2020-03-29 07:29:48 +03:00
|
|
|
static Macro *macros;
|
2020-08-20 13:36:36 +03:00
|
|
|
static CondIncl *cond_incl;
|
|
|
|
|
2020-08-20 13:36:49 +03:00
|
|
|
static Token *preprocess2(Token *tok);
|
2020-08-31 10:50:58 +03:00
|
|
|
static Macro *find_macro(Token *tok);
|
2020-08-20 13:36:49 +03:00
|
|
|
|
2020-03-30 03:30:06 +03:00
|
|
|
static bool is_hash(Token *tok) {
|
|
|
|
return tok->at_bol && equal(tok, "#");
|
|
|
|
}
|
|
|
|
|
2020-04-21 04:46:26 +03:00
|
|
|
// Some preprocessor directives such as #include allow extraneous
|
|
|
|
// tokens before newline. This function skips such tokens.
|
|
|
|
static Token *skip_line(Token *tok) {
|
|
|
|
if (tok->at_bol)
|
|
|
|
return tok;
|
|
|
|
warn_tok(tok, "extra token");
|
|
|
|
while (tok->at_bol)
|
|
|
|
tok = tok->next;
|
|
|
|
return tok;
|
|
|
|
}
|
|
|
|
|
2020-09-03 13:24:23 +03:00
|
|
|
static Token *copy_token(Token *tok) {
|
|
|
|
Token *t = calloc(1, sizeof(Token));
|
|
|
|
*t = *tok;
|
|
|
|
t->next = NULL;
|
|
|
|
return t;
|
|
|
|
}
|
|
|
|
|
2020-08-20 13:36:36 +03:00
|
|
|
static Token *new_eof(Token *tok) {
|
|
|
|
Token *t = copy_token(tok);
|
|
|
|
t->kind = TK_EOF;
|
|
|
|
t->len = 0;
|
|
|
|
return t;
|
|
|
|
}
|
|
|
|
|
2020-03-29 09:47:50 +03:00
|
|
|
static Hideset *new_hideset(char *name) {
|
|
|
|
Hideset *hs = calloc(1, sizeof(Hideset));
|
|
|
|
hs->name = name;
|
|
|
|
return hs;
|
|
|
|
}
|
|
|
|
|
|
|
|
static Hideset *hideset_union(Hideset *hs1, Hideset *hs2) {
|
|
|
|
Hideset head = {};
|
|
|
|
Hideset *cur = &head;
|
|
|
|
|
|
|
|
for (; hs1; hs1 = hs1->next)
|
|
|
|
cur = cur->next = new_hideset(hs1->name);
|
|
|
|
cur->next = hs2;
|
|
|
|
return head.next;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool hideset_contains(Hideset *hs, char *s, int len) {
|
|
|
|
for (; hs; hs = hs->next)
|
|
|
|
if (strlen(hs->name) == len && !strncmp(hs->name, s, len))
|
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-08-31 10:48:29 +03:00
|
|
|
static Hideset *hideset_intersection(Hideset *hs1, Hideset *hs2) {
|
|
|
|
Hideset head = {};
|
|
|
|
Hideset *cur = &head;
|
|
|
|
|
|
|
|
for (; hs1; hs1 = hs1->next)
|
|
|
|
if (hideset_contains(hs2, hs1->name, strlen(hs1->name)))
|
|
|
|
cur = cur->next = new_hideset(hs1->name);
|
|
|
|
return head.next;
|
|
|
|
}
|
|
|
|
|
2020-03-29 09:47:50 +03:00
|
|
|
static Token *add_hideset(Token *tok, Hideset *hs) {
|
|
|
|
Token head = {};
|
|
|
|
Token *cur = &head;
|
|
|
|
|
|
|
|
for (; tok; tok = tok->next) {
|
|
|
|
Token *t = copy_token(tok);
|
|
|
|
t->hideset = hideset_union(t->hideset, hs);
|
|
|
|
cur = cur->next = t;
|
|
|
|
}
|
|
|
|
return head.next;
|
|
|
|
}
|
|
|
|
|
2020-09-03 13:24:23 +03:00
|
|
|
// Append tok2 to the end of tok1.
|
|
|
|
static Token *append(Token *tok1, Token *tok2) {
|
2020-03-29 07:29:48 +03:00
|
|
|
if (tok1->kind == TK_EOF)
|
2020-09-03 13:24:23 +03:00
|
|
|
return tok2;
|
|
|
|
|
|
|
|
Token head = {};
|
|
|
|
Token *cur = &head;
|
|
|
|
|
2020-03-29 07:29:48 +03:00
|
|
|
for (; tok1->kind != TK_EOF; tok1 = tok1->next)
|
2020-09-03 13:24:23 +03:00
|
|
|
cur = cur->next = copy_token(tok1);
|
|
|
|
cur->next = tok2;
|
|
|
|
return head.next;
|
|
|
|
}
|
|
|
|
|
2020-03-30 03:57:07 +03:00
|
|
|
static Token *skip_cond_incl2(Token *tok) {
|
|
|
|
while (tok->kind != TK_EOF) {
|
2020-03-29 11:18:31 +03:00
|
|
|
if (is_hash(tok) &&
|
|
|
|
(equal(tok->next, "if") || equal(tok->next, "ifdef") ||
|
|
|
|
equal(tok->next, "ifndef"))) {
|
2020-03-30 03:57:07 +03:00
|
|
|
tok = skip_cond_incl2(tok->next->next);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (is_hash(tok) && equal(tok->next, "endif"))
|
|
|
|
return tok->next->next;
|
|
|
|
tok = tok->next;
|
|
|
|
}
|
|
|
|
return tok;
|
|
|
|
}
|
|
|
|
|
2020-03-29 02:51:06 +03:00
|
|
|
// Skip until next `#else`, `#elif` or `#endif`.
|
2020-03-30 03:54:47 +03:00
|
|
|
// Nested `#if` and `#endif` are skipped.
|
2020-08-20 13:36:36 +03:00
|
|
|
static Token *skip_cond_incl(Token *tok) {
|
|
|
|
while (tok->kind != TK_EOF) {
|
2020-03-29 11:18:31 +03:00
|
|
|
if (is_hash(tok) &&
|
|
|
|
(equal(tok->next, "if") || equal(tok->next, "ifdef") ||
|
|
|
|
equal(tok->next, "ifndef"))) {
|
2020-03-30 03:57:07 +03:00
|
|
|
tok = skip_cond_incl2(tok->next->next);
|
2020-03-30 03:54:47 +03:00
|
|
|
continue;
|
|
|
|
}
|
2020-03-30 03:57:07 +03:00
|
|
|
|
|
|
|
if (is_hash(tok) &&
|
2020-03-29 02:51:06 +03:00
|
|
|
(equal(tok->next, "elif") || equal(tok->next, "else") ||
|
|
|
|
equal(tok->next, "endif")))
|
2020-03-30 03:54:47 +03:00
|
|
|
break;
|
2020-08-20 13:36:36 +03:00
|
|
|
tok = tok->next;
|
|
|
|
}
|
|
|
|
return tok;
|
|
|
|
}
|
|
|
|
|
2020-08-29 19:04:23 +03:00
|
|
|
// Double-quote a given string and returns it.
|
|
|
|
static char *quote_string(char *str) {
|
|
|
|
int bufsize = 3;
|
|
|
|
for (int i = 0; str[i]; i++) {
|
|
|
|
if (str[i] == '\\' || str[i] == '"')
|
|
|
|
bufsize++;
|
|
|
|
bufsize++;
|
|
|
|
}
|
|
|
|
|
|
|
|
char *buf = calloc(1, bufsize);
|
|
|
|
char *p = buf;
|
|
|
|
*p++ = '"';
|
|
|
|
for (int i = 0; str[i]; i++) {
|
|
|
|
if (str[i] == '\\' || str[i] == '"')
|
|
|
|
*p++ = '\\';
|
|
|
|
*p++ = str[i];
|
|
|
|
}
|
|
|
|
*p++ = '"';
|
|
|
|
*p++ = '\0';
|
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
static Token *new_str_token(char *str, Token *tmpl) {
|
|
|
|
char *buf = quote_string(str);
|
|
|
|
return tokenize(new_file(tmpl->file->name, tmpl->file->file_no, buf));
|
|
|
|
}
|
|
|
|
|
2020-08-20 13:36:36 +03:00
|
|
|
// Copy all tokens until the next newline, terminate them with
|
|
|
|
// an EOF token and then returns them. This function is used to
|
|
|
|
// create a new list of tokens for `#if` arguments.
|
|
|
|
static Token *copy_line(Token **rest, Token *tok) {
|
|
|
|
Token head = {};
|
|
|
|
Token *cur = &head;
|
|
|
|
|
|
|
|
for (; !tok->at_bol; tok = tok->next)
|
|
|
|
cur = cur->next = copy_token(tok);
|
|
|
|
|
|
|
|
cur->next = new_eof(tok);
|
|
|
|
*rest = tok;
|
|
|
|
return head.next;
|
|
|
|
}
|
|
|
|
|
2020-08-31 10:50:58 +03:00
|
|
|
static Token *new_num_token(int val, Token *tmpl) {
|
|
|
|
char *buf = format("%d\n", val);
|
|
|
|
return tokenize(new_file(tmpl->file->name, tmpl->file->file_no, buf));
|
|
|
|
}
|
|
|
|
|
|
|
|
static Token *read_const_expr(Token **rest, Token *tok) {
|
|
|
|
tok = copy_line(rest, tok);
|
|
|
|
|
|
|
|
Token head = {};
|
|
|
|
Token *cur = &head;
|
|
|
|
|
|
|
|
while (tok->kind != TK_EOF) {
|
|
|
|
// "defined(foo)" or "defined foo" becomes "1" if macro "foo"
|
|
|
|
// is defined. Otherwise "0".
|
|
|
|
if (equal(tok, "defined")) {
|
|
|
|
Token *start = tok;
|
|
|
|
bool has_paren = consume(&tok, tok->next, "(");
|
|
|
|
|
|
|
|
if (tok->kind != TK_IDENT)
|
|
|
|
error_tok(start, "macro name must be an identifier");
|
|
|
|
Macro *m = find_macro(tok);
|
|
|
|
tok = tok->next;
|
|
|
|
|
|
|
|
if (has_paren)
|
|
|
|
tok = skip(tok, ")");
|
|
|
|
|
|
|
|
cur = cur->next = new_num_token(m ? 1 : 0, start);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
cur = cur->next = tok;
|
|
|
|
tok = tok->next;
|
|
|
|
}
|
|
|
|
|
|
|
|
cur->next = tok;
|
|
|
|
return head.next;
|
|
|
|
}
|
|
|
|
|
2020-08-20 13:36:36 +03:00
|
|
|
// Read and evaluate a constant expression.
|
|
|
|
static long eval_const_expr(Token **rest, Token *tok) {
|
|
|
|
Token *start = tok;
|
2020-08-31 10:50:58 +03:00
|
|
|
Token *expr = read_const_expr(rest, tok->next);
|
2020-08-20 13:36:49 +03:00
|
|
|
expr = preprocess2(expr);
|
2020-08-20 13:36:36 +03:00
|
|
|
|
|
|
|
if (expr->kind == TK_EOF)
|
|
|
|
error_tok(start, "no expression");
|
|
|
|
|
2020-03-31 16:16:56 +03:00
|
|
|
// [https://www.sigbus.info/n1570#6.10.1p4] The standard requires
|
|
|
|
// we replace remaining non-macro identifiers with "0" before
|
|
|
|
// evaluating a constant expression. For example, `#if foo` is
|
|
|
|
// equivalent to `#if 0` if foo is not defined.
|
|
|
|
for (Token *t = expr; t->kind != TK_EOF; t = t->next) {
|
|
|
|
if (t->kind == TK_IDENT) {
|
|
|
|
Token *next = t->next;
|
|
|
|
*t = *new_num_token(0, t);
|
|
|
|
t->next = next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-20 13:36:36 +03:00
|
|
|
Token *rest2;
|
|
|
|
long val = const_expr(&rest2, expr);
|
|
|
|
if (rest2->kind != TK_EOF)
|
|
|
|
error_tok(rest2, "extra token");
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
2020-03-30 03:57:07 +03:00
|
|
|
static CondIncl *push_cond_incl(Token *tok, bool included) {
|
2020-08-20 13:36:36 +03:00
|
|
|
CondIncl *ci = calloc(1, sizeof(CondIncl));
|
|
|
|
ci->next = cond_incl;
|
2020-03-30 03:57:07 +03:00
|
|
|
ci->ctx = IN_THEN;
|
2020-08-20 13:36:36 +03:00
|
|
|
ci->tok = tok;
|
2020-03-30 03:57:07 +03:00
|
|
|
ci->included = included;
|
2020-08-20 13:36:36 +03:00
|
|
|
cond_incl = ci;
|
|
|
|
return ci;
|
|
|
|
}
|
|
|
|
|
2020-03-29 07:29:48 +03:00
|
|
|
static Macro *find_macro(Token *tok) {
|
|
|
|
if (tok->kind != TK_IDENT)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
for (Macro *m = macros; m; m = m->next)
|
|
|
|
if (strlen(m->name) == tok->len && !strncmp(m->name, tok->loc, tok->len))
|
2020-03-29 12:23:33 +03:00
|
|
|
return m->deleted ? NULL : m;
|
2020-03-29 07:29:48 +03:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2020-08-18 04:45:03 +03:00
|
|
|
static Macro *add_macro(char *name, bool is_objlike, Token *body) {
|
2020-03-29 07:29:48 +03:00
|
|
|
Macro *m = calloc(1, sizeof(Macro));
|
|
|
|
m->next = macros;
|
|
|
|
m->name = name;
|
2020-08-18 04:45:03 +03:00
|
|
|
m->is_objlike = is_objlike;
|
2020-03-29 07:29:48 +03:00
|
|
|
m->body = body;
|
|
|
|
macros = m;
|
|
|
|
return m;
|
|
|
|
}
|
|
|
|
|
2020-03-30 04:37:44 +03:00
|
|
|
static MacroParam *read_macro_params(Token **rest, Token *tok) {
|
|
|
|
MacroParam head = {};
|
|
|
|
MacroParam *cur = &head;
|
|
|
|
|
|
|
|
while (!equal(tok, ")")) {
|
|
|
|
if (cur != &head)
|
|
|
|
tok = skip(tok, ",");
|
|
|
|
|
|
|
|
if (tok->kind != TK_IDENT)
|
|
|
|
error_tok(tok, "expected an identifier");
|
|
|
|
MacroParam *m = calloc(1, sizeof(MacroParam));
|
|
|
|
m->name = strndup(tok->loc, tok->len);
|
|
|
|
cur = cur->next = m;
|
|
|
|
tok = tok->next;
|
|
|
|
}
|
|
|
|
*rest = tok->next;
|
|
|
|
return head.next;
|
|
|
|
}
|
|
|
|
|
2020-08-18 04:45:03 +03:00
|
|
|
static void read_macro_definition(Token **rest, Token *tok) {
|
|
|
|
if (tok->kind != TK_IDENT)
|
|
|
|
error_tok(tok, "macro name must be an identifier");
|
|
|
|
char *name = strndup(tok->loc, tok->len);
|
|
|
|
tok = tok->next;
|
|
|
|
|
|
|
|
if (!tok->has_space && equal(tok, "(")) {
|
|
|
|
// Function-like macro
|
2020-03-30 04:37:44 +03:00
|
|
|
MacroParam *params = read_macro_params(&tok, tok->next);
|
|
|
|
Macro *m = add_macro(name, false, copy_line(rest, tok));
|
|
|
|
m->params = params;
|
2020-08-18 04:45:03 +03:00
|
|
|
} else {
|
|
|
|
// Object-like macro
|
|
|
|
add_macro(name, true, copy_line(rest, tok));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-30 04:37:44 +03:00
|
|
|
static MacroArg *read_macro_arg_one(Token **rest, Token *tok) {
|
|
|
|
Token head = {};
|
|
|
|
Token *cur = &head;
|
2020-03-29 16:18:02 +03:00
|
|
|
int level = 0;
|
2020-03-30 04:37:44 +03:00
|
|
|
|
2020-03-29 16:18:02 +03:00
|
|
|
while (level > 0 || (!equal(tok, ",") && !equal(tok, ")"))) {
|
2020-03-30 04:37:44 +03:00
|
|
|
if (tok->kind == TK_EOF)
|
|
|
|
error_tok(tok, "premature end of input");
|
2020-03-29 16:18:02 +03:00
|
|
|
|
|
|
|
if (equal(tok, "("))
|
|
|
|
level++;
|
|
|
|
else if (equal(tok, ")"))
|
|
|
|
level--;
|
|
|
|
|
2020-03-30 04:37:44 +03:00
|
|
|
cur = cur->next = copy_token(tok);
|
|
|
|
tok = tok->next;
|
|
|
|
}
|
|
|
|
|
|
|
|
cur->next = new_eof(tok);
|
|
|
|
|
|
|
|
MacroArg *arg = calloc(1, sizeof(MacroArg));
|
|
|
|
arg->tok = head.next;
|
|
|
|
*rest = tok;
|
|
|
|
return arg;
|
|
|
|
}
|
|
|
|
|
|
|
|
static MacroArg *read_macro_args(Token **rest, Token *tok, MacroParam *params) {
|
|
|
|
Token *start = tok;
|
|
|
|
tok = tok->next->next;
|
|
|
|
|
|
|
|
MacroArg head = {};
|
|
|
|
MacroArg *cur = &head;
|
|
|
|
|
|
|
|
MacroParam *pp = params;
|
|
|
|
for (; pp; pp = pp->next) {
|
|
|
|
if (cur != &head)
|
|
|
|
tok = skip(tok, ",");
|
|
|
|
cur = cur->next = read_macro_arg_one(&tok, tok);
|
|
|
|
cur->name = pp->name;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pp)
|
|
|
|
error_tok(start, "too many arguments");
|
2020-08-31 10:48:29 +03:00
|
|
|
skip(tok, ")");
|
|
|
|
*rest = tok;
|
2020-03-30 04:37:44 +03:00
|
|
|
return head.next;
|
|
|
|
}
|
|
|
|
|
|
|
|
static MacroArg *find_arg(MacroArg *args, Token *tok) {
|
|
|
|
for (MacroArg *ap = args; ap; ap = ap->next)
|
|
|
|
if (tok->len == strlen(ap->name) && !strncmp(tok->loc, ap->name, tok->len))
|
|
|
|
return ap;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2020-08-29 19:04:23 +03:00
|
|
|
// Concatenates all tokens in `tok` and returns a new string.
|
|
|
|
static char *join_tokens(Token *tok) {
|
|
|
|
// Compute the length of the resulting token.
|
|
|
|
int len = 1;
|
|
|
|
for (Token *t = tok; t && t->kind != TK_EOF; t = t->next) {
|
|
|
|
if (t != tok && t->has_space)
|
|
|
|
len++;
|
|
|
|
len += t->len;
|
|
|
|
}
|
|
|
|
|
|
|
|
char *buf = calloc(1, len);
|
|
|
|
|
|
|
|
// Copy token texts.
|
|
|
|
int pos = 0;
|
|
|
|
for (Token *t = tok; t && t->kind != TK_EOF; t = t->next) {
|
|
|
|
if (t != tok && t->has_space)
|
|
|
|
buf[pos++] = ' ';
|
|
|
|
strncpy(buf + pos, t->loc, t->len);
|
|
|
|
pos += t->len;
|
|
|
|
}
|
|
|
|
buf[pos] = '\0';
|
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Concatenates all tokens in `arg` and returns a new string token.
|
|
|
|
// This function is used for the stringizing operator (#).
|
|
|
|
static Token *stringize(Token *hash, Token *arg) {
|
|
|
|
// Create a new string token. We need to set some value to its
|
|
|
|
// source location for error reporting function, so we use a macro
|
|
|
|
// name token as a template.
|
|
|
|
char *s = join_tokens(arg);
|
|
|
|
return new_str_token(s, hash);
|
|
|
|
}
|
|
|
|
|
2020-08-29 19:05:28 +03:00
|
|
|
// Concatenate two tokens to create a new token.
|
|
|
|
static Token *paste(Token *lhs, Token *rhs) {
|
|
|
|
// Paste the two tokens.
|
|
|
|
char *buf = format("%.*s%.*s", lhs->len, lhs->loc, rhs->len, rhs->loc);
|
|
|
|
|
|
|
|
// Tokenize the resulting string.
|
|
|
|
Token *tok = tokenize(new_file(lhs->file->name, lhs->file->file_no, buf));
|
|
|
|
if (tok->next->kind != TK_EOF)
|
|
|
|
error_tok(lhs, "pasting forms '%s', an invalid token", buf);
|
|
|
|
return tok;
|
|
|
|
}
|
|
|
|
|
2020-03-30 04:37:44 +03:00
|
|
|
// Replace func-like macro parameters with given arguments.
|
|
|
|
static Token *subst(Token *tok, MacroArg *args) {
|
|
|
|
Token head = {};
|
|
|
|
Token *cur = &head;
|
|
|
|
|
|
|
|
while (tok->kind != TK_EOF) {
|
2020-08-29 19:04:23 +03:00
|
|
|
// "#" followed by a parameter is replaced with stringized actuals.
|
|
|
|
if (equal(tok, "#")) {
|
|
|
|
MacroArg *arg = find_arg(args, tok->next);
|
|
|
|
if (!arg)
|
|
|
|
error_tok(tok->next, "'#' is not followed by a macro parameter");
|
|
|
|
cur = cur->next = stringize(tok, arg->tok);
|
|
|
|
tok = tok->next->next;
|
|
|
|
continue;
|
|
|
|
}
|
2020-03-30 04:37:44 +03:00
|
|
|
|
2020-08-29 19:05:28 +03:00
|
|
|
if (equal(tok, "##")) {
|
|
|
|
if (cur == &head)
|
|
|
|
error_tok(tok, "'##' cannot appear at start of macro expansion");
|
|
|
|
|
|
|
|
if (tok->next->kind == TK_EOF)
|
|
|
|
error_tok(tok, "'##' cannot appear at end of macro expansion");
|
|
|
|
|
|
|
|
MacroArg *arg = find_arg(args, tok->next);
|
|
|
|
if (arg) {
|
|
|
|
if (arg->tok->kind != TK_EOF) {
|
|
|
|
*cur = *paste(cur, arg->tok);
|
|
|
|
for (Token *t = arg->tok->next; t->kind != TK_EOF; t = t->next)
|
|
|
|
cur = cur->next = copy_token(t);
|
|
|
|
}
|
|
|
|
tok = tok->next->next;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
*cur = *paste(cur, tok->next);
|
|
|
|
tok = tok->next->next;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
MacroArg *arg = find_arg(args, tok);
|
|
|
|
|
|
|
|
if (arg && equal(tok->next, "##")) {
|
|
|
|
Token *rhs = tok->next->next;
|
|
|
|
|
|
|
|
if (arg->tok->kind == TK_EOF) {
|
|
|
|
MacroArg *arg2 = find_arg(args, rhs);
|
|
|
|
if (arg2) {
|
|
|
|
for (Token *t = arg2->tok; t->kind != TK_EOF; t = t->next)
|
|
|
|
cur = cur->next = copy_token(t);
|
|
|
|
} else {
|
|
|
|
cur = cur->next = copy_token(rhs);
|
|
|
|
}
|
|
|
|
tok = rhs->next;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (Token *t = arg->tok; t->kind != TK_EOF; t = t->next)
|
|
|
|
cur = cur->next = copy_token(t);
|
|
|
|
tok = tok->next;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-03-30 04:37:44 +03:00
|
|
|
// Handle a macro token. Macro arguments are completely macro-expanded
|
|
|
|
// before they are substituted into a macro body.
|
|
|
|
if (arg) {
|
|
|
|
Token *t = preprocess2(arg->tok);
|
2020-08-29 20:33:01 +03:00
|
|
|
t->at_bol = tok->at_bol;
|
|
|
|
t->has_space = tok->has_space;
|
2020-03-30 04:37:44 +03:00
|
|
|
for (; t->kind != TK_EOF; t = t->next)
|
|
|
|
cur = cur->next = copy_token(t);
|
|
|
|
tok = tok->next;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Handle a non-macro token.
|
|
|
|
cur = cur->next = copy_token(tok);
|
|
|
|
tok = tok->next;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
cur->next = tok;
|
|
|
|
return head.next;
|
|
|
|
}
|
|
|
|
|
2020-03-29 07:29:48 +03:00
|
|
|
// If tok is a macro, expand it and return true.
|
|
|
|
// Otherwise, do nothing and return false.
|
|
|
|
static bool expand_macro(Token **rest, Token *tok) {
|
2020-03-29 09:47:50 +03:00
|
|
|
if (hideset_contains(tok->hideset, tok->loc, tok->len))
|
|
|
|
return false;
|
|
|
|
|
2020-03-29 07:29:48 +03:00
|
|
|
Macro *m = find_macro(tok);
|
|
|
|
if (!m)
|
|
|
|
return false;
|
2020-03-29 09:47:50 +03:00
|
|
|
|
2020-08-18 04:45:03 +03:00
|
|
|
// Object-like macro application
|
|
|
|
if (m->is_objlike) {
|
|
|
|
Hideset *hs = hideset_union(tok->hideset, new_hideset(m->name));
|
|
|
|
Token *body = add_hideset(m->body, hs);
|
|
|
|
*rest = append(body, tok->next);
|
2020-08-29 20:33:01 +03:00
|
|
|
(*rest)->at_bol = tok->at_bol;
|
|
|
|
(*rest)->has_space = tok->has_space;
|
2020-08-18 04:45:03 +03:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If a funclike macro token is not followed by an argument list,
|
|
|
|
// treat it as a normal identifier.
|
|
|
|
if (!equal(tok->next, "("))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Function-like macro application
|
2020-08-31 10:48:29 +03:00
|
|
|
Token *macro_token = tok;
|
2020-03-30 04:37:44 +03:00
|
|
|
MacroArg *args = read_macro_args(&tok, tok, m->params);
|
2020-08-31 10:48:29 +03:00
|
|
|
Token *rparen = tok;
|
|
|
|
|
|
|
|
// Tokens that consist a func-like macro invocation may have different
|
|
|
|
// hidesets, and if that's the case, it's not clear what the hideset
|
|
|
|
// for the new tokens should be. We take the interesection of the
|
|
|
|
// macro token and the closing parenthesis and use it as a new hideset
|
|
|
|
// as explained in the Dave Prossor's algorithm.
|
|
|
|
Hideset *hs = hideset_intersection(macro_token->hideset, rparen->hideset);
|
|
|
|
hs = hideset_union(hs, new_hideset(m->name));
|
|
|
|
|
|
|
|
Token *body = subst(m->body, args);
|
|
|
|
body = add_hideset(body, hs);
|
|
|
|
*rest = append(body, tok->next);
|
2020-08-29 20:33:01 +03:00
|
|
|
(*rest)->at_bol = macro_token->at_bol;
|
|
|
|
(*rest)->has_space = macro_token->has_space;
|
2020-03-29 07:29:48 +03:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-03-30 03:30:06 +03:00
|
|
|
// Visit all tokens in `tok` while evaluating preprocessing
|
|
|
|
// macros and directives.
|
|
|
|
static Token *preprocess2(Token *tok) {
|
|
|
|
Token head = {};
|
|
|
|
Token *cur = &head;
|
|
|
|
|
|
|
|
while (tok->kind != TK_EOF) {
|
2020-03-29 07:29:48 +03:00
|
|
|
// If it is a macro, expand it.
|
|
|
|
if (expand_macro(&tok, tok))
|
|
|
|
continue;
|
|
|
|
|
2020-03-30 03:30:06 +03:00
|
|
|
// Pass through if it is not a "#".
|
|
|
|
if (!is_hash(tok)) {
|
|
|
|
cur = cur->next = tok;
|
|
|
|
tok = tok->next;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-08-20 13:36:36 +03:00
|
|
|
Token *start = tok;
|
2020-03-30 03:30:06 +03:00
|
|
|
tok = tok->next;
|
|
|
|
|
2020-09-03 13:24:23 +03:00
|
|
|
if (equal(tok, "include")) {
|
|
|
|
tok = tok->next;
|
|
|
|
|
|
|
|
if (tok->kind != TK_STR)
|
|
|
|
error_tok(tok, "expected a filename");
|
|
|
|
|
2020-08-20 15:37:02 +03:00
|
|
|
char *path;
|
|
|
|
if (tok->str[0] == '/')
|
|
|
|
path = tok->str;
|
|
|
|
else
|
|
|
|
path = format("%s/%s", dirname(strdup(tok->file->name)), tok->str);
|
|
|
|
|
2020-09-03 13:24:23 +03:00
|
|
|
Token *tok2 = tokenize_file(path);
|
|
|
|
if (!tok2)
|
|
|
|
error_tok(tok, "%s", strerror(errno));
|
2020-04-21 04:46:26 +03:00
|
|
|
tok = skip_line(tok->next);
|
|
|
|
tok = append(tok2, tok);
|
2020-09-03 13:24:23 +03:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-03-29 07:29:48 +03:00
|
|
|
if (equal(tok, "define")) {
|
2020-08-18 04:45:03 +03:00
|
|
|
read_macro_definition(&tok, tok->next);
|
2020-03-29 07:29:48 +03:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-03-29 12:23:33 +03:00
|
|
|
if (equal(tok, "undef")) {
|
|
|
|
tok = tok->next;
|
|
|
|
if (tok->kind != TK_IDENT)
|
|
|
|
error_tok(tok, "macro name must be an identifier");
|
|
|
|
char *name = strndup(tok->loc, tok->len);
|
|
|
|
tok = skip_line(tok->next);
|
|
|
|
|
2020-08-18 04:45:03 +03:00
|
|
|
Macro *m = add_macro(name, true, NULL);
|
2020-03-29 12:23:33 +03:00
|
|
|
m->deleted = true;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-08-20 13:36:36 +03:00
|
|
|
if (equal(tok, "if")) {
|
|
|
|
long val = eval_const_expr(&tok, tok);
|
2020-03-30 03:57:07 +03:00
|
|
|
push_cond_incl(start, val);
|
2020-08-20 13:36:36 +03:00
|
|
|
if (!val)
|
|
|
|
tok = skip_cond_incl(tok);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-03-29 11:18:31 +03:00
|
|
|
if (equal(tok, "ifdef")) {
|
|
|
|
bool defined = find_macro(tok->next);
|
|
|
|
push_cond_incl(tok, defined);
|
|
|
|
tok = skip_line(tok->next->next);
|
|
|
|
if (!defined)
|
|
|
|
tok = skip_cond_incl(tok);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (equal(tok, "ifndef")) {
|
|
|
|
bool defined = find_macro(tok->next);
|
|
|
|
push_cond_incl(tok, !defined);
|
|
|
|
tok = skip_line(tok->next->next);
|
|
|
|
if (defined)
|
|
|
|
tok = skip_cond_incl(tok);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-03-29 02:51:06 +03:00
|
|
|
if (equal(tok, "elif")) {
|
|
|
|
if (!cond_incl || cond_incl->ctx == IN_ELSE)
|
|
|
|
error_tok(start, "stray #elif");
|
|
|
|
cond_incl->ctx = IN_ELIF;
|
|
|
|
|
|
|
|
if (!cond_incl->included && eval_const_expr(&tok, tok))
|
|
|
|
cond_incl->included = true;
|
|
|
|
else
|
|
|
|
tok = skip_cond_incl(tok);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-03-30 03:57:07 +03:00
|
|
|
if (equal(tok, "else")) {
|
|
|
|
if (!cond_incl || cond_incl->ctx == IN_ELSE)
|
|
|
|
error_tok(start, "stray #else");
|
|
|
|
cond_incl->ctx = IN_ELSE;
|
|
|
|
tok = skip_line(tok->next);
|
|
|
|
|
|
|
|
if (cond_incl->included)
|
|
|
|
tok = skip_cond_incl(tok);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-08-20 13:36:36 +03:00
|
|
|
if (equal(tok, "endif")) {
|
|
|
|
if (!cond_incl)
|
|
|
|
error_tok(start, "stray #endif");
|
|
|
|
cond_incl = cond_incl->next;
|
|
|
|
tok = skip_line(tok->next);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-03-30 03:30:06 +03:00
|
|
|
// `#`-only line is legal. It's called a null directive.
|
|
|
|
if (tok->at_bol)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
error_tok(tok, "invalid preprocessor directive");
|
|
|
|
}
|
|
|
|
|
|
|
|
cur->next = tok;
|
|
|
|
return head.next;
|
|
|
|
}
|
|
|
|
|
2020-08-18 03:41:59 +03:00
|
|
|
// Entry point function of the preprocessor.
|
|
|
|
Token *preprocess(Token *tok) {
|
2020-03-30 03:30:06 +03:00
|
|
|
tok = preprocess2(tok);
|
2020-08-20 13:36:36 +03:00
|
|
|
if (cond_incl)
|
|
|
|
error_tok(cond_incl->tok, "unterminated conditional directive");
|
2020-08-18 03:41:59 +03:00
|
|
|
convert_keywords(tok);
|
|
|
|
return tok;
|
|
|
|
}
|