Support multi-letter local variables

This commit is contained in:
Rui Ueyama 2020-10-07 20:12:19 +09:00
parent 1f9f3adf32
commit 482c26b536
6 changed files with 104 additions and 21 deletions

View File

@ -1,3 +1,4 @@
#define _POSIX_C_SOURCE 200809L
#include <assert.h>
#include <ctype.h>
#include <stdarg.h>
@ -6,10 +7,13 @@
#include <stdlib.h>
#include <string.h>
typedef struct Node Node;
//
// tokenize.c
//
// Token
typedef enum {
TK_IDENT, // Identifiers
TK_PUNCT, // Punctuators
@ -38,6 +42,23 @@ Token *tokenize(char *input);
// parse.c
//
// Local variable
typedef struct Obj Obj;
struct Obj {
Obj *next;
char *name; // Variable name
int offset; // Offset from RBP
};
// Function
typedef struct Function Function;
struct Function {
Node *body;
Obj *locals;
int stack_size;
};
// AST node
typedef enum {
ND_ADD, // +
ND_SUB, // -
@ -55,20 +76,19 @@ typedef enum {
} NodeKind;
// AST node type
typedef struct Node Node;
struct Node {
NodeKind kind; // Node kind
Node *next; // Next node
Node *lhs; // Left-hand side
Node *rhs; // Right-hand side
char name; // Used if kind == ND_VAR
Obj *var; // Used if kind == ND_VAR
int val; // Used if kind == ND_NUM
};
Node *parse(Token *tok);
Function *parse(Token *tok);
//
// codegen.c
//
void codegen(Node *node);
void codegen(Function *prog);

View File

@ -12,12 +12,17 @@ static void pop(char *arg) {
depth--;
}
// Round up `n` to the nearest multiple of `align`. For instance,
// align_to(5, 8) returns 8 and align_to(11, 8) returns 16.
static int align_to(int n, int align) {
return (n + align - 1) / align * align;
}
// Compute the absolute address of a given node.
// It's an error if a given node does not reside in memory.
static void gen_addr(Node *node) {
if (node->kind == ND_VAR) {
int offset = (node->name - 'a' + 1) * 8;
printf(" lea %d(%%rbp), %%rax\n", -offset);
printf(" lea %d(%%rbp), %%rax\n", node->var->offset);
return;
}
@ -97,16 +102,28 @@ static void gen_stmt(Node *node) {
error("invalid statement");
}
void codegen(Node *node) {
// Assign offsets to local variables.
static void assign_lvar_offsets(Function *prog) {
int offset = 0;
for (Obj *var = prog->locals; var; var = var->next) {
offset += 8;
var->offset = -offset;
}
prog->stack_size = align_to(offset, 16);
}
void codegen(Function *prog) {
assign_lvar_offsets(prog);
printf(" .globl main\n");
printf("main:\n");
// Prologue
printf(" push %%rbp\n");
printf(" mov %%rsp, %%rbp\n");
printf(" sub $208, %%rsp\n");
printf(" sub $%d, %%rsp\n", prog->stack_size);
for (Node *n = node; n; n = n->next) {
for (Node *n = prog->body; n; n = n->next) {
gen_stmt(n);
assert(depth == 0);
}

7
main.c
View File

@ -5,7 +5,10 @@ int main(int argc, char **argv) {
error("%s: invalid number of arguments", argv[0]);
Token *tok = tokenize(argv[1]);
Node *node = parse(tok);
codegen(node);
Function *prog = parse(tok);
// Traverse the AST to emit assembly.
codegen(prog);
return 0;
}

39
parse.c
View File

@ -1,5 +1,9 @@
#include "chibicc.h"
// All local variable instances created during parsing are
// accumulated to this list.
Obj *locals;
static Node *expr(Token **rest, Token *tok);
static Node *expr_stmt(Token **rest, Token *tok);
static Node *assign(Token **rest, Token *tok);
@ -10,6 +14,14 @@ static Node *mul(Token **rest, Token *tok);
static Node *unary(Token **rest, Token *tok);
static Node *primary(Token **rest, Token *tok);
// Find a local variable by name.
static Obj *find_var(Token *tok) {
for (Obj *var = locals; var; var = var->next)
if (strlen(var->name) == tok->len && !strncmp(tok->loc, var->name, tok->len))
return var;
return NULL;
}
static Node *new_node(NodeKind kind) {
Node *node = calloc(1, sizeof(Node));
node->kind = kind;
@ -35,12 +47,20 @@ static Node *new_num(int val) {
return node;
}
static Node *new_var_node(char name) {
static Node *new_var_node(Obj *var) {
Node *node = new_node(ND_VAR);
node->name = name;
node->var = var;
return node;
}
static Obj *new_lvar(char *name) {
Obj *var = calloc(1, sizeof(Obj));
var->name = name;
var->next = locals;
locals = var;
return var;
}
// stmt = expr-stmt
static Node *stmt(Token **rest, Token *tok) {
return expr_stmt(rest, tok);
@ -178,9 +198,11 @@ static Node *primary(Token **rest, Token *tok) {
}
if (tok->kind == TK_IDENT) {
Node *node = new_var_node(*tok->loc);
Obj *var = find_var(tok);
if (!var)
var = new_lvar(strndup(tok->loc, tok->len));
*rest = tok->next;
return node;
return new_var_node(var);
}
if (tok->kind == TK_NUM) {
@ -193,10 +215,15 @@ static Node *primary(Token **rest, Token *tok) {
}
// program = stmt*
Node *parse(Token *tok) {
Function *parse(Token *tok) {
Node head = {};
Node *cur = &head;
while (tok->kind != TK_EOF)
cur = cur->next = stmt(&tok, tok);
return head.next;
Function *prog = calloc(1, sizeof(Function));
prog->body = head.next;
prog->locals = locals;
return prog;
}

View File

@ -46,10 +46,13 @@ assert 1 '1>=0;'
assert 1 '1>=1;'
assert 0 '1>=2;'
assert 3 '1; 2; 3;'
assert 3 'a=3; a;'
assert 8 'a=3; z=5; a+z;'
assert 3 'a=3; a;'
assert 8 'a=3; z=5; a+z;'
assert 6 'a=b=3; a+b;'
assert 3 'foo=3; foo;'
assert 8 'foo123=3; bar=5; foo123+bar;'
echo OK

View File

@ -60,6 +60,16 @@ static bool startswith(char *p, char *q) {
return strncmp(p, q, strlen(q)) == 0;
}
// Returns true if c is valid as the first character of an identifier.
static bool is_ident1(char c) {
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_';
}
// Returns true if c is valid as a non-first character of an identifier.
static bool is_ident2(char c) {
return is_ident1(c) || ('0' <= c && c <= '9');
}
// Read a punctuator token from p and returns its length.
static int read_punct(char *p) {
if (startswith(p, "==") || startswith(p, "!=") ||
@ -92,9 +102,12 @@ Token *tokenize(char *p) {
}
// Identifier
if ('a' <= *p && *p <= 'z') {
cur = cur->next = new_token(TK_IDENT, p, p + 1);
if (is_ident1(*p)) {
char *start = p;
do {
p++;
} while (is_ident2(*p));
cur = cur->next = new_token(TK_IDENT, start, p);
continue;
}