From d85fc4ffcfb8875aa191481e5c153a1e07066f8e Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Sun, 30 Aug 2020 18:57:20 +0900 Subject: [PATCH] Add #include <...> --- chibicc.h | 2 ++ main.c | 2 +- preprocess.c | 81 ++++++++++++++++++++++++++++++++++++++----------- self.py | 1 + test/include3.h | 1 + test/include4.h | 1 + test/macro.c | 16 ++++++++++ 7 files changed, 85 insertions(+), 19 deletions(-) create mode 100644 test/include3.h create mode 100644 test/include4.h diff --git a/chibicc.h b/chibicc.h index f476971..9cd1ed2 100644 --- a/chibicc.h +++ b/chibicc.h @@ -346,4 +346,6 @@ int align_to(int n, int align); // main.c // +bool file_exists(char *path); + extern char *base_file; diff --git a/main.c b/main.c index 086a13e..4c520c5 100644 --- a/main.c +++ b/main.c @@ -224,7 +224,7 @@ static char *find_file(char *pattern) { } // Returns true if a given file exists. -static bool file_exists(char *path) { +bool file_exists(char *path) { struct stat st; return !stat(path, &st); } diff --git a/preprocess.c b/preprocess.c index 63bd73b..ca06ed9 100644 --- a/preprocess.c +++ b/preprocess.c @@ -421,10 +421,10 @@ static MacroArg *find_arg(MacroArg *args, Token *tok) { } // Concatenates all tokens in `tok` and returns a new string. -static char *join_tokens(Token *tok) { +static char *join_tokens(Token *tok, Token *end) { // Compute the length of the resulting token. int len = 1; - for (Token *t = tok; t && t->kind != TK_EOF; t = t->next) { + for (Token *t = tok; t != end && t->kind != TK_EOF; t = t->next) { if (t != tok && t->has_space) len++; len += t->len; @@ -434,7 +434,7 @@ static char *join_tokens(Token *tok) { // Copy token texts. int pos = 0; - for (Token *t = tok; t && t->kind != TK_EOF; t = t->next) { + for (Token *t = tok; t != end && t->kind != TK_EOF; t = t->next) { if (t != tok && t->has_space) buf[pos++] = ' '; strncpy(buf + pos, t->loc, t->len); @@ -450,7 +450,7 @@ static Token *stringize(Token *hash, Token *arg) { // Create a new string token. We need to set some value to its // source location for error reporting function, so we use a macro // name token as a template. - char *s = join_tokens(arg); + char *s = join_tokens(arg, NULL); return new_str_token(s, hash); } @@ -596,6 +596,54 @@ static bool expand_macro(Token **rest, Token *tok) { return true; } +// Read an #include argument. +static char *read_include_filename(Token **rest, Token *tok, bool *is_dquote) { + // Pattern 1: #include "foo.h" + if (tok->kind == TK_STR) { + // A double-quoted filename for #include is a special kind of + // token, and we don't want to interpret any escape sequences in it. + // For example, "\f" in "C:\foo" is not a formfeed character but + // just two non-control characters, backslash and f. + // So we don't want to use token->str. + *is_dquote = true; + *rest = skip_line(tok->next); + return strndup(tok->loc + 1, tok->len - 2); + } + + // Pattern 2: #include + if (equal(tok, "<")) { + // Reconstruct a filename from a sequence of tokens between + // "<" and ">". + Token *start = tok; + + // Find closing ">". + for (; !equal(tok, ">"); tok = tok->next) + if (tok->at_bol || tok->kind == TK_EOF) + error_tok(tok, "expected '>'"); + + *is_dquote = false; + *rest = skip_line(tok->next); + return join_tokens(start->next, tok); + } + + // Pattern 3: #include FOO + // In this case FOO must be macro-expanded to either + // a single string token or a sequence of "<" ... ">". + if (tok->kind == TK_IDENT) { + Token *tok2 = preprocess2(copy_line(rest, tok)); + return read_include_filename(&tok2, tok2, is_dquote); + } + + error_tok(tok, "expected a filename"); +} + +static Token *include_file(Token *tok, char *path, Token *filename_tok) { + Token *tok2 = tokenize_file(path); + if (!tok2) + error_tok(filename_tok, "%s: cannot open file: %s", path, strerror(errno)); + return append(tok2, tok); +} + // Visit all tokens in `tok` while evaluating preprocessing // macros and directives. static Token *preprocess2(Token *tok) { @@ -618,22 +666,19 @@ static Token *preprocess2(Token *tok) { tok = tok->next; if (equal(tok, "include")) { - tok = tok->next; + bool is_dquote; + char *filename = read_include_filename(&tok, tok->next, &is_dquote); - if (tok->kind != TK_STR) - error_tok(tok, "expected a filename"); + if (filename[0] != '/') { + char *path = format("%s/%s", dirname(strdup(start->file->name)), filename); + if (file_exists(path)) { + tok = include_file(tok, path, start->next->next); + continue; + } + } - char *path; - if (tok->str[0] == '/') - path = tok->str; - else - path = format("%s/%s", dirname(strdup(tok->file->name)), tok->str); - - Token *tok2 = tokenize_file(path); - if (!tok2) - error_tok(tok, "%s", strerror(errno)); - tok = skip_line(tok->next); - tok = append(tok2, tok); + // TODO: Search a file from the include paths. + tok = include_file(tok, filename, start->next->next); continue; } diff --git a/self.py b/self.py index fd6623e..32e0580 100755 --- a/self.py +++ b/self.py @@ -94,6 +94,7 @@ int atexit(void (*)(void)); FILE *open_memstream(char **ptr, size_t *sizeloc); char *dirname(char *path); char *strncpy(char *dest, char *src, long n); +int stat(char *pathname, struct stat *statbuf); """) for path in sys.argv[1:]: diff --git a/test/include3.h b/test/include3.h new file mode 100644 index 0000000..820d4a6 --- /dev/null +++ b/test/include3.h @@ -0,0 +1 @@ +#define foo 3 diff --git a/test/include4.h b/test/include4.h new file mode 100644 index 0000000..1fea1a2 --- /dev/null +++ b/test/include4.h @@ -0,0 +1 @@ +#define foo 4 diff --git a/test/macro.c b/test/macro.c index 1360954..9bf2085 100644 --- a/test/macro.c +++ b/test/macro.c @@ -306,6 +306,22 @@ int main() { #define M14(x) M13(x. M12) ASSERT(0, strcmp(M14(bar), "bar. foo")); +#include "include3.h" + ASSERT(3, foo); + +#include "include4.h" + ASSERT(4, foo); + +#define M13 "include3.h" +#include M13 + ASSERT(3, foo); + +#define M13 < include4.h +#include M13 > + ASSERT(4, foo); + +#undef foo + printf("OK\n"); return 0; }