mirror of
https://github.com/rui314/chibicc
synced 2024-11-25 07:40:48 +03:00
Support multi-letter local variables
This commit is contained in:
parent
1f9f3adf32
commit
482c26b536
28
chibicc.h
28
chibicc.h
@ -1,3 +1,4 @@
|
|||||||
|
#define _POSIX_C_SOURCE 200809L
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
@ -6,10 +7,13 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
|
typedef struct Node Node;
|
||||||
|
|
||||||
//
|
//
|
||||||
// tokenize.c
|
// tokenize.c
|
||||||
//
|
//
|
||||||
|
|
||||||
|
// Token
|
||||||
typedef enum {
|
typedef enum {
|
||||||
TK_IDENT, // Identifiers
|
TK_IDENT, // Identifiers
|
||||||
TK_PUNCT, // Punctuators
|
TK_PUNCT, // Punctuators
|
||||||
@ -38,6 +42,23 @@ Token *tokenize(char *input);
|
|||||||
// parse.c
|
// parse.c
|
||||||
//
|
//
|
||||||
|
|
||||||
|
// Local variable
|
||||||
|
typedef struct Obj Obj;
|
||||||
|
struct Obj {
|
||||||
|
Obj *next;
|
||||||
|
char *name; // Variable name
|
||||||
|
int offset; // Offset from RBP
|
||||||
|
};
|
||||||
|
|
||||||
|
// Function
|
||||||
|
typedef struct Function Function;
|
||||||
|
struct Function {
|
||||||
|
Node *body;
|
||||||
|
Obj *locals;
|
||||||
|
int stack_size;
|
||||||
|
};
|
||||||
|
|
||||||
|
// AST node
|
||||||
typedef enum {
|
typedef enum {
|
||||||
ND_ADD, // +
|
ND_ADD, // +
|
||||||
ND_SUB, // -
|
ND_SUB, // -
|
||||||
@ -55,20 +76,19 @@ typedef enum {
|
|||||||
} NodeKind;
|
} NodeKind;
|
||||||
|
|
||||||
// AST node type
|
// AST node type
|
||||||
typedef struct Node Node;
|
|
||||||
struct Node {
|
struct Node {
|
||||||
NodeKind kind; // Node kind
|
NodeKind kind; // Node kind
|
||||||
Node *next; // Next node
|
Node *next; // Next node
|
||||||
Node *lhs; // Left-hand side
|
Node *lhs; // Left-hand side
|
||||||
Node *rhs; // Right-hand side
|
Node *rhs; // Right-hand side
|
||||||
char name; // Used if kind == ND_VAR
|
Obj *var; // Used if kind == ND_VAR
|
||||||
int val; // Used if kind == ND_NUM
|
int val; // Used if kind == ND_NUM
|
||||||
};
|
};
|
||||||
|
|
||||||
Node *parse(Token *tok);
|
Function *parse(Token *tok);
|
||||||
|
|
||||||
//
|
//
|
||||||
// codegen.c
|
// codegen.c
|
||||||
//
|
//
|
||||||
|
|
||||||
void codegen(Node *node);
|
void codegen(Function *prog);
|
||||||
|
27
codegen.c
27
codegen.c
@ -12,12 +12,17 @@ static void pop(char *arg) {
|
|||||||
depth--;
|
depth--;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Round up `n` to the nearest multiple of `align`. For instance,
|
||||||
|
// align_to(5, 8) returns 8 and align_to(11, 8) returns 16.
|
||||||
|
static int align_to(int n, int align) {
|
||||||
|
return (n + align - 1) / align * align;
|
||||||
|
}
|
||||||
|
|
||||||
// Compute the absolute address of a given node.
|
// Compute the absolute address of a given node.
|
||||||
// It's an error if a given node does not reside in memory.
|
// It's an error if a given node does not reside in memory.
|
||||||
static void gen_addr(Node *node) {
|
static void gen_addr(Node *node) {
|
||||||
if (node->kind == ND_VAR) {
|
if (node->kind == ND_VAR) {
|
||||||
int offset = (node->name - 'a' + 1) * 8;
|
printf(" lea %d(%%rbp), %%rax\n", node->var->offset);
|
||||||
printf(" lea %d(%%rbp), %%rax\n", -offset);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -97,16 +102,28 @@ static void gen_stmt(Node *node) {
|
|||||||
error("invalid statement");
|
error("invalid statement");
|
||||||
}
|
}
|
||||||
|
|
||||||
void codegen(Node *node) {
|
// Assign offsets to local variables.
|
||||||
|
static void assign_lvar_offsets(Function *prog) {
|
||||||
|
int offset = 0;
|
||||||
|
for (Obj *var = prog->locals; var; var = var->next) {
|
||||||
|
offset += 8;
|
||||||
|
var->offset = -offset;
|
||||||
|
}
|
||||||
|
prog->stack_size = align_to(offset, 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
void codegen(Function *prog) {
|
||||||
|
assign_lvar_offsets(prog);
|
||||||
|
|
||||||
printf(" .globl main\n");
|
printf(" .globl main\n");
|
||||||
printf("main:\n");
|
printf("main:\n");
|
||||||
|
|
||||||
// Prologue
|
// Prologue
|
||||||
printf(" push %%rbp\n");
|
printf(" push %%rbp\n");
|
||||||
printf(" mov %%rsp, %%rbp\n");
|
printf(" mov %%rsp, %%rbp\n");
|
||||||
printf(" sub $208, %%rsp\n");
|
printf(" sub $%d, %%rsp\n", prog->stack_size);
|
||||||
|
|
||||||
for (Node *n = node; n; n = n->next) {
|
for (Node *n = prog->body; n; n = n->next) {
|
||||||
gen_stmt(n);
|
gen_stmt(n);
|
||||||
assert(depth == 0);
|
assert(depth == 0);
|
||||||
}
|
}
|
||||||
|
7
main.c
7
main.c
@ -5,7 +5,10 @@ int main(int argc, char **argv) {
|
|||||||
error("%s: invalid number of arguments", argv[0]);
|
error("%s: invalid number of arguments", argv[0]);
|
||||||
|
|
||||||
Token *tok = tokenize(argv[1]);
|
Token *tok = tokenize(argv[1]);
|
||||||
Node *node = parse(tok);
|
Function *prog = parse(tok);
|
||||||
codegen(node);
|
|
||||||
|
// Traverse the AST to emit assembly.
|
||||||
|
codegen(prog);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
39
parse.c
39
parse.c
@ -1,5 +1,9 @@
|
|||||||
#include "chibicc.h"
|
#include "chibicc.h"
|
||||||
|
|
||||||
|
// All local variable instances created during parsing are
|
||||||
|
// accumulated to this list.
|
||||||
|
Obj *locals;
|
||||||
|
|
||||||
static Node *expr(Token **rest, Token *tok);
|
static Node *expr(Token **rest, Token *tok);
|
||||||
static Node *expr_stmt(Token **rest, Token *tok);
|
static Node *expr_stmt(Token **rest, Token *tok);
|
||||||
static Node *assign(Token **rest, Token *tok);
|
static Node *assign(Token **rest, Token *tok);
|
||||||
@ -10,6 +14,14 @@ static Node *mul(Token **rest, Token *tok);
|
|||||||
static Node *unary(Token **rest, Token *tok);
|
static Node *unary(Token **rest, Token *tok);
|
||||||
static Node *primary(Token **rest, Token *tok);
|
static Node *primary(Token **rest, Token *tok);
|
||||||
|
|
||||||
|
// Find a local variable by name.
|
||||||
|
static Obj *find_var(Token *tok) {
|
||||||
|
for (Obj *var = locals; var; var = var->next)
|
||||||
|
if (strlen(var->name) == tok->len && !strncmp(tok->loc, var->name, tok->len))
|
||||||
|
return var;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
static Node *new_node(NodeKind kind) {
|
static Node *new_node(NodeKind kind) {
|
||||||
Node *node = calloc(1, sizeof(Node));
|
Node *node = calloc(1, sizeof(Node));
|
||||||
node->kind = kind;
|
node->kind = kind;
|
||||||
@ -35,12 +47,20 @@ static Node *new_num(int val) {
|
|||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
|
||||||
static Node *new_var_node(char name) {
|
static Node *new_var_node(Obj *var) {
|
||||||
Node *node = new_node(ND_VAR);
|
Node *node = new_node(ND_VAR);
|
||||||
node->name = name;
|
node->var = var;
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static Obj *new_lvar(char *name) {
|
||||||
|
Obj *var = calloc(1, sizeof(Obj));
|
||||||
|
var->name = name;
|
||||||
|
var->next = locals;
|
||||||
|
locals = var;
|
||||||
|
return var;
|
||||||
|
}
|
||||||
|
|
||||||
// stmt = expr-stmt
|
// stmt = expr-stmt
|
||||||
static Node *stmt(Token **rest, Token *tok) {
|
static Node *stmt(Token **rest, Token *tok) {
|
||||||
return expr_stmt(rest, tok);
|
return expr_stmt(rest, tok);
|
||||||
@ -178,9 +198,11 @@ static Node *primary(Token **rest, Token *tok) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (tok->kind == TK_IDENT) {
|
if (tok->kind == TK_IDENT) {
|
||||||
Node *node = new_var_node(*tok->loc);
|
Obj *var = find_var(tok);
|
||||||
|
if (!var)
|
||||||
|
var = new_lvar(strndup(tok->loc, tok->len));
|
||||||
*rest = tok->next;
|
*rest = tok->next;
|
||||||
return node;
|
return new_var_node(var);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tok->kind == TK_NUM) {
|
if (tok->kind == TK_NUM) {
|
||||||
@ -193,10 +215,15 @@ static Node *primary(Token **rest, Token *tok) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// program = stmt*
|
// program = stmt*
|
||||||
Node *parse(Token *tok) {
|
Function *parse(Token *tok) {
|
||||||
Node head = {};
|
Node head = {};
|
||||||
Node *cur = &head;
|
Node *cur = &head;
|
||||||
|
|
||||||
while (tok->kind != TK_EOF)
|
while (tok->kind != TK_EOF)
|
||||||
cur = cur->next = stmt(&tok, tok);
|
cur = cur->next = stmt(&tok, tok);
|
||||||
return head.next;
|
|
||||||
|
Function *prog = calloc(1, sizeof(Function));
|
||||||
|
prog->body = head.next;
|
||||||
|
prog->locals = locals;
|
||||||
|
return prog;
|
||||||
}
|
}
|
||||||
|
5
test.sh
5
test.sh
@ -46,10 +46,13 @@ assert 1 '1>=0;'
|
|||||||
assert 1 '1>=1;'
|
assert 1 '1>=1;'
|
||||||
assert 0 '1>=2;'
|
assert 0 '1>=2;'
|
||||||
|
|
||||||
assert 3 '1; 2; 3;'
|
assert 3 'a=3; a;'
|
||||||
|
assert 8 'a=3; z=5; a+z;'
|
||||||
|
|
||||||
assert 3 'a=3; a;'
|
assert 3 'a=3; a;'
|
||||||
assert 8 'a=3; z=5; a+z;'
|
assert 8 'a=3; z=5; a+z;'
|
||||||
assert 6 'a=b=3; a+b;'
|
assert 6 'a=b=3; a+b;'
|
||||||
|
assert 3 'foo=3; foo;'
|
||||||
|
assert 8 'foo123=3; bar=5; foo123+bar;'
|
||||||
|
|
||||||
echo OK
|
echo OK
|
||||||
|
17
tokenize.c
17
tokenize.c
@ -60,6 +60,16 @@ static bool startswith(char *p, char *q) {
|
|||||||
return strncmp(p, q, strlen(q)) == 0;
|
return strncmp(p, q, strlen(q)) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Returns true if c is valid as the first character of an identifier.
|
||||||
|
static bool is_ident1(char c) {
|
||||||
|
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns true if c is valid as a non-first character of an identifier.
|
||||||
|
static bool is_ident2(char c) {
|
||||||
|
return is_ident1(c) || ('0' <= c && c <= '9');
|
||||||
|
}
|
||||||
|
|
||||||
// Read a punctuator token from p and returns its length.
|
// Read a punctuator token from p and returns its length.
|
||||||
static int read_punct(char *p) {
|
static int read_punct(char *p) {
|
||||||
if (startswith(p, "==") || startswith(p, "!=") ||
|
if (startswith(p, "==") || startswith(p, "!=") ||
|
||||||
@ -92,9 +102,12 @@ Token *tokenize(char *p) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Identifier
|
// Identifier
|
||||||
if ('a' <= *p && *p <= 'z') {
|
if (is_ident1(*p)) {
|
||||||
cur = cur->next = new_token(TK_IDENT, p, p + 1);
|
char *start = p;
|
||||||
|
do {
|
||||||
p++;
|
p++;
|
||||||
|
} while (is_ident2(*p));
|
||||||
|
cur = cur->next = new_token(TK_IDENT, start, p);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user