mirror of
https://github.com/rui314/chibicc
synced 2024-11-22 06:11:18 +03:00
Support multi-letter local variables
This commit is contained in:
parent
1f9f3adf32
commit
482c26b536
28
chibicc.h
28
chibicc.h
@ -1,3 +1,4 @@
|
||||
#define _POSIX_C_SOURCE 200809L
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
#include <stdarg.h>
|
||||
@ -6,10 +7,13 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
typedef struct Node Node;
|
||||
|
||||
//
|
||||
// tokenize.c
|
||||
//
|
||||
|
||||
// Token
|
||||
typedef enum {
|
||||
TK_IDENT, // Identifiers
|
||||
TK_PUNCT, // Punctuators
|
||||
@ -38,6 +42,23 @@ Token *tokenize(char *input);
|
||||
// parse.c
|
||||
//
|
||||
|
||||
// Local variable
|
||||
typedef struct Obj Obj;
|
||||
struct Obj {
|
||||
Obj *next;
|
||||
char *name; // Variable name
|
||||
int offset; // Offset from RBP
|
||||
};
|
||||
|
||||
// Function
|
||||
typedef struct Function Function;
|
||||
struct Function {
|
||||
Node *body;
|
||||
Obj *locals;
|
||||
int stack_size;
|
||||
};
|
||||
|
||||
// AST node
|
||||
typedef enum {
|
||||
ND_ADD, // +
|
||||
ND_SUB, // -
|
||||
@ -55,20 +76,19 @@ typedef enum {
|
||||
} NodeKind;
|
||||
|
||||
// AST node type
|
||||
typedef struct Node Node;
|
||||
struct Node {
|
||||
NodeKind kind; // Node kind
|
||||
Node *next; // Next node
|
||||
Node *lhs; // Left-hand side
|
||||
Node *rhs; // Right-hand side
|
||||
char name; // Used if kind == ND_VAR
|
||||
Obj *var; // Used if kind == ND_VAR
|
||||
int val; // Used if kind == ND_NUM
|
||||
};
|
||||
|
||||
Node *parse(Token *tok);
|
||||
Function *parse(Token *tok);
|
||||
|
||||
//
|
||||
// codegen.c
|
||||
//
|
||||
|
||||
void codegen(Node *node);
|
||||
void codegen(Function *prog);
|
||||
|
27
codegen.c
27
codegen.c
@ -12,12 +12,17 @@ static void pop(char *arg) {
|
||||
depth--;
|
||||
}
|
||||
|
||||
// Round up `n` to the nearest multiple of `align`. For instance,
|
||||
// align_to(5, 8) returns 8 and align_to(11, 8) returns 16.
|
||||
static int align_to(int n, int align) {
|
||||
return (n + align - 1) / align * align;
|
||||
}
|
||||
|
||||
// Compute the absolute address of a given node.
|
||||
// It's an error if a given node does not reside in memory.
|
||||
static void gen_addr(Node *node) {
|
||||
if (node->kind == ND_VAR) {
|
||||
int offset = (node->name - 'a' + 1) * 8;
|
||||
printf(" lea %d(%%rbp), %%rax\n", -offset);
|
||||
printf(" lea %d(%%rbp), %%rax\n", node->var->offset);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -97,16 +102,28 @@ static void gen_stmt(Node *node) {
|
||||
error("invalid statement");
|
||||
}
|
||||
|
||||
void codegen(Node *node) {
|
||||
// Assign offsets to local variables.
|
||||
static void assign_lvar_offsets(Function *prog) {
|
||||
int offset = 0;
|
||||
for (Obj *var = prog->locals; var; var = var->next) {
|
||||
offset += 8;
|
||||
var->offset = -offset;
|
||||
}
|
||||
prog->stack_size = align_to(offset, 16);
|
||||
}
|
||||
|
||||
void codegen(Function *prog) {
|
||||
assign_lvar_offsets(prog);
|
||||
|
||||
printf(" .globl main\n");
|
||||
printf("main:\n");
|
||||
|
||||
// Prologue
|
||||
printf(" push %%rbp\n");
|
||||
printf(" mov %%rsp, %%rbp\n");
|
||||
printf(" sub $208, %%rsp\n");
|
||||
printf(" sub $%d, %%rsp\n", prog->stack_size);
|
||||
|
||||
for (Node *n = node; n; n = n->next) {
|
||||
for (Node *n = prog->body; n; n = n->next) {
|
||||
gen_stmt(n);
|
||||
assert(depth == 0);
|
||||
}
|
||||
|
7
main.c
7
main.c
@ -5,7 +5,10 @@ int main(int argc, char **argv) {
|
||||
error("%s: invalid number of arguments", argv[0]);
|
||||
|
||||
Token *tok = tokenize(argv[1]);
|
||||
Node *node = parse(tok);
|
||||
codegen(node);
|
||||
Function *prog = parse(tok);
|
||||
|
||||
// Traverse the AST to emit assembly.
|
||||
codegen(prog);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
39
parse.c
39
parse.c
@ -1,5 +1,9 @@
|
||||
#include "chibicc.h"
|
||||
|
||||
// All local variable instances created during parsing are
|
||||
// accumulated to this list.
|
||||
Obj *locals;
|
||||
|
||||
static Node *expr(Token **rest, Token *tok);
|
||||
static Node *expr_stmt(Token **rest, Token *tok);
|
||||
static Node *assign(Token **rest, Token *tok);
|
||||
@ -10,6 +14,14 @@ static Node *mul(Token **rest, Token *tok);
|
||||
static Node *unary(Token **rest, Token *tok);
|
||||
static Node *primary(Token **rest, Token *tok);
|
||||
|
||||
// Find a local variable by name.
|
||||
static Obj *find_var(Token *tok) {
|
||||
for (Obj *var = locals; var; var = var->next)
|
||||
if (strlen(var->name) == tok->len && !strncmp(tok->loc, var->name, tok->len))
|
||||
return var;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static Node *new_node(NodeKind kind) {
|
||||
Node *node = calloc(1, sizeof(Node));
|
||||
node->kind = kind;
|
||||
@ -35,12 +47,20 @@ static Node *new_num(int val) {
|
||||
return node;
|
||||
}
|
||||
|
||||
static Node *new_var_node(char name) {
|
||||
static Node *new_var_node(Obj *var) {
|
||||
Node *node = new_node(ND_VAR);
|
||||
node->name = name;
|
||||
node->var = var;
|
||||
return node;
|
||||
}
|
||||
|
||||
static Obj *new_lvar(char *name) {
|
||||
Obj *var = calloc(1, sizeof(Obj));
|
||||
var->name = name;
|
||||
var->next = locals;
|
||||
locals = var;
|
||||
return var;
|
||||
}
|
||||
|
||||
// stmt = expr-stmt
|
||||
static Node *stmt(Token **rest, Token *tok) {
|
||||
return expr_stmt(rest, tok);
|
||||
@ -178,9 +198,11 @@ static Node *primary(Token **rest, Token *tok) {
|
||||
}
|
||||
|
||||
if (tok->kind == TK_IDENT) {
|
||||
Node *node = new_var_node(*tok->loc);
|
||||
Obj *var = find_var(tok);
|
||||
if (!var)
|
||||
var = new_lvar(strndup(tok->loc, tok->len));
|
||||
*rest = tok->next;
|
||||
return node;
|
||||
return new_var_node(var);
|
||||
}
|
||||
|
||||
if (tok->kind == TK_NUM) {
|
||||
@ -193,10 +215,15 @@ static Node *primary(Token **rest, Token *tok) {
|
||||
}
|
||||
|
||||
// program = stmt*
|
||||
Node *parse(Token *tok) {
|
||||
Function *parse(Token *tok) {
|
||||
Node head = {};
|
||||
Node *cur = &head;
|
||||
|
||||
while (tok->kind != TK_EOF)
|
||||
cur = cur->next = stmt(&tok, tok);
|
||||
return head.next;
|
||||
|
||||
Function *prog = calloc(1, sizeof(Function));
|
||||
prog->body = head.next;
|
||||
prog->locals = locals;
|
||||
return prog;
|
||||
}
|
||||
|
5
test.sh
5
test.sh
@ -46,10 +46,13 @@ assert 1 '1>=0;'
|
||||
assert 1 '1>=1;'
|
||||
assert 0 '1>=2;'
|
||||
|
||||
assert 3 '1; 2; 3;'
|
||||
assert 3 'a=3; a;'
|
||||
assert 8 'a=3; z=5; a+z;'
|
||||
|
||||
assert 3 'a=3; a;'
|
||||
assert 8 'a=3; z=5; a+z;'
|
||||
assert 6 'a=b=3; a+b;'
|
||||
assert 3 'foo=3; foo;'
|
||||
assert 8 'foo123=3; bar=5; foo123+bar;'
|
||||
|
||||
echo OK
|
||||
|
19
tokenize.c
19
tokenize.c
@ -60,6 +60,16 @@ static bool startswith(char *p, char *q) {
|
||||
return strncmp(p, q, strlen(q)) == 0;
|
||||
}
|
||||
|
||||
// Returns true if c is valid as the first character of an identifier.
|
||||
static bool is_ident1(char c) {
|
||||
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_';
|
||||
}
|
||||
|
||||
// Returns true if c is valid as a non-first character of an identifier.
|
||||
static bool is_ident2(char c) {
|
||||
return is_ident1(c) || ('0' <= c && c <= '9');
|
||||
}
|
||||
|
||||
// Read a punctuator token from p and returns its length.
|
||||
static int read_punct(char *p) {
|
||||
if (startswith(p, "==") || startswith(p, "!=") ||
|
||||
@ -92,9 +102,12 @@ Token *tokenize(char *p) {
|
||||
}
|
||||
|
||||
// Identifier
|
||||
if ('a' <= *p && *p <= 'z') {
|
||||
cur = cur->next = new_token(TK_IDENT, p, p + 1);
|
||||
p++;
|
||||
if (is_ident1(*p)) {
|
||||
char *start = p;
|
||||
do {
|
||||
p++;
|
||||
} while (is_ident2(*p));
|
||||
cur = cur->next = new_token(TK_IDENT, start, p);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user