diff --git a/chibicc.h b/chibicc.h index 3d59afb..4c8a222 100644 --- a/chibicc.h +++ b/chibicc.h @@ -1,3 +1,4 @@ +#define _POSIX_C_SOURCE 200809L #include #include #include @@ -6,10 +7,13 @@ #include #include +typedef struct Node Node; + // // tokenize.c // +// Token typedef enum { TK_IDENT, // Identifiers TK_PUNCT, // Punctuators @@ -38,6 +42,23 @@ Token *tokenize(char *input); // parse.c // +// Local variable +typedef struct Obj Obj; +struct Obj { + Obj *next; + char *name; // Variable name + int offset; // Offset from RBP +}; + +// Function +typedef struct Function Function; +struct Function { + Node *body; + Obj *locals; + int stack_size; +}; + +// AST node typedef enum { ND_ADD, // + ND_SUB, // - @@ -55,20 +76,19 @@ typedef enum { } NodeKind; // AST node type -typedef struct Node Node; struct Node { NodeKind kind; // Node kind Node *next; // Next node Node *lhs; // Left-hand side Node *rhs; // Right-hand side - char name; // Used if kind == ND_VAR + Obj *var; // Used if kind == ND_VAR int val; // Used if kind == ND_NUM }; -Node *parse(Token *tok); +Function *parse(Token *tok); // // codegen.c // -void codegen(Node *node); +void codegen(Function *prog); diff --git a/codegen.c b/codegen.c index adc4924..8afaa47 100644 --- a/codegen.c +++ b/codegen.c @@ -12,12 +12,17 @@ static void pop(char *arg) { depth--; } +// Round up `n` to the nearest multiple of `align`. For instance, +// align_to(5, 8) returns 8 and align_to(11, 8) returns 16. +static int align_to(int n, int align) { + return (n + align - 1) / align * align; +} + // Compute the absolute address of a given node. // It's an error if a given node does not reside in memory. static void gen_addr(Node *node) { if (node->kind == ND_VAR) { - int offset = (node->name - 'a' + 1) * 8; - printf(" lea %d(%%rbp), %%rax\n", -offset); + printf(" lea %d(%%rbp), %%rax\n", node->var->offset); return; } @@ -97,16 +102,28 @@ static void gen_stmt(Node *node) { error("invalid statement"); } -void codegen(Node *node) { +// Assign offsets to local variables. +static void assign_lvar_offsets(Function *prog) { + int offset = 0; + for (Obj *var = prog->locals; var; var = var->next) { + offset += 8; + var->offset = -offset; + } + prog->stack_size = align_to(offset, 16); +} + +void codegen(Function *prog) { + assign_lvar_offsets(prog); + printf(" .globl main\n"); printf("main:\n"); // Prologue printf(" push %%rbp\n"); printf(" mov %%rsp, %%rbp\n"); - printf(" sub $208, %%rsp\n"); + printf(" sub $%d, %%rsp\n", prog->stack_size); - for (Node *n = node; n; n = n->next) { + for (Node *n = prog->body; n; n = n->next) { gen_stmt(n); assert(depth == 0); } diff --git a/main.c b/main.c index adb525c..a855d90 100644 --- a/main.c +++ b/main.c @@ -5,7 +5,10 @@ int main(int argc, char **argv) { error("%s: invalid number of arguments", argv[0]); Token *tok = tokenize(argv[1]); - Node *node = parse(tok); - codegen(node); + Function *prog = parse(tok); + + // Traverse the AST to emit assembly. + codegen(prog); + return 0; } diff --git a/parse.c b/parse.c index f9b8e58..49c5646 100644 --- a/parse.c +++ b/parse.c @@ -1,5 +1,9 @@ #include "chibicc.h" +// All local variable instances created during parsing are +// accumulated to this list. +Obj *locals; + static Node *expr(Token **rest, Token *tok); static Node *expr_stmt(Token **rest, Token *tok); static Node *assign(Token **rest, Token *tok); @@ -10,6 +14,14 @@ static Node *mul(Token **rest, Token *tok); static Node *unary(Token **rest, Token *tok); static Node *primary(Token **rest, Token *tok); +// Find a local variable by name. +static Obj *find_var(Token *tok) { + for (Obj *var = locals; var; var = var->next) + if (strlen(var->name) == tok->len && !strncmp(tok->loc, var->name, tok->len)) + return var; + return NULL; +} + static Node *new_node(NodeKind kind) { Node *node = calloc(1, sizeof(Node)); node->kind = kind; @@ -35,12 +47,20 @@ static Node *new_num(int val) { return node; } -static Node *new_var_node(char name) { +static Node *new_var_node(Obj *var) { Node *node = new_node(ND_VAR); - node->name = name; + node->var = var; return node; } +static Obj *new_lvar(char *name) { + Obj *var = calloc(1, sizeof(Obj)); + var->name = name; + var->next = locals; + locals = var; + return var; +} + // stmt = expr-stmt static Node *stmt(Token **rest, Token *tok) { return expr_stmt(rest, tok); @@ -178,9 +198,11 @@ static Node *primary(Token **rest, Token *tok) { } if (tok->kind == TK_IDENT) { - Node *node = new_var_node(*tok->loc); + Obj *var = find_var(tok); + if (!var) + var = new_lvar(strndup(tok->loc, tok->len)); *rest = tok->next; - return node; + return new_var_node(var); } if (tok->kind == TK_NUM) { @@ -193,10 +215,15 @@ static Node *primary(Token **rest, Token *tok) { } // program = stmt* -Node *parse(Token *tok) { +Function *parse(Token *tok) { Node head = {}; Node *cur = &head; + while (tok->kind != TK_EOF) cur = cur->next = stmt(&tok, tok); - return head.next; + + Function *prog = calloc(1, sizeof(Function)); + prog->body = head.next; + prog->locals = locals; + return prog; } diff --git a/test.sh b/test.sh index fb6888a..e813368 100755 --- a/test.sh +++ b/test.sh @@ -46,10 +46,13 @@ assert 1 '1>=0;' assert 1 '1>=1;' assert 0 '1>=2;' -assert 3 '1; 2; 3;' +assert 3 'a=3; a;' +assert 8 'a=3; z=5; a+z;' assert 3 'a=3; a;' assert 8 'a=3; z=5; a+z;' assert 6 'a=b=3; a+b;' +assert 3 'foo=3; foo;' +assert 8 'foo123=3; bar=5; foo123+bar;' echo OK diff --git a/tokenize.c b/tokenize.c index dfecaf6..e89d569 100644 --- a/tokenize.c +++ b/tokenize.c @@ -60,6 +60,16 @@ static bool startswith(char *p, char *q) { return strncmp(p, q, strlen(q)) == 0; } +// Returns true if c is valid as the first character of an identifier. +static bool is_ident1(char c) { + return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_'; +} + +// Returns true if c is valid as a non-first character of an identifier. +static bool is_ident2(char c) { + return is_ident1(c) || ('0' <= c && c <= '9'); +} + // Read a punctuator token from p and returns its length. static int read_punct(char *p) { if (startswith(p, "==") || startswith(p, "!=") || @@ -92,9 +102,12 @@ Token *tokenize(char *p) { } // Identifier - if ('a' <= *p && *p <= 'z') { - cur = cur->next = new_token(TK_IDENT, p, p + 1); - p++; + if (is_ident1(*p)) { + char *start = p; + do { + p++; + } while (is_ident2(*p)); + cur = cur->next = new_token(TK_IDENT, start, p); continue; }