From a8702a3011ba3be4c811c9b1cd0c22e3751d0598 Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Thu, 3 Sep 2020 15:43:36 +0900 Subject: [PATCH] Add keyword "int" and make variable definition mandatory --- chibicc.h | 8 +++++++ parse.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++---- test.sh | 44 ++++++++++++++++++++--------------- tokenize.c | 11 ++++++++- type.c | 11 +++++---- 5 files changed, 114 insertions(+), 28 deletions(-) diff --git a/chibicc.h b/chibicc.h index 701578f..32ae700 100644 --- a/chibicc.h +++ b/chibicc.h @@ -37,6 +37,7 @@ void error_at(char *loc, char *fmt, ...); void error_tok(Token *tok, char *fmt, ...); bool equal(Token *tok, char *op); Token *skip(Token *tok, char *op); +bool consume(Token **rest, Token *tok, char *str); Token *tokenize(char *input); // @@ -48,6 +49,7 @@ typedef struct Var Var; struct Var { Var *next; char *name; // Variable name + Type *ty; // Type int offset; // Offset from RBP }; @@ -118,12 +120,18 @@ typedef enum { struct Type { TypeKind kind; + + // Pointer Type *base; + + // Declaration + Token *name; }; extern Type *ty_int; bool is_integer(Type *ty); +Type *pointer_to(Type *base); void add_type(Node *node); // diff --git a/parse.c b/parse.c index 53f5064..f9f258c 100644 --- a/parse.c +++ b/parse.c @@ -22,6 +22,7 @@ // accumulated to this list. Var *locals; +static Node *declaration(Token **rest, Token *tok); static Node *compound_stmt(Token **rest, Token *tok); static Node *stmt(Token **rest, Token *tok); static Node *expr_stmt(Token **rest, Token *tok); @@ -74,14 +75,70 @@ static Node *new_var_node(Var *var, Token *tok) { return node; } -static Var *new_lvar(char *name) { +static Var *new_lvar(char *name, Type *ty) { Var *var = calloc(1, sizeof(Var)); var->name = name; + var->ty = ty; var->next = locals; locals = var; return var; } +static char *get_ident(Token *tok) { + if (tok->kind != TK_IDENT) + error_tok(tok, "expected an identifier"); + return strndup(tok->loc, tok->len); +} + +// typespec = "int" +static Type *typespec(Token **rest, Token *tok) { + *rest = skip(tok, "int"); + return ty_int; +} + +// declarator = "*"* ident +static Type *declarator(Token **rest, Token *tok, Type *ty) { + while (consume(&tok, tok, "*")) + ty = pointer_to(ty); + + if (tok->kind != TK_IDENT) + error_tok(tok, "expected a variable name"); + + ty->name = tok; + *rest = tok->next; + return ty; +} + +// declaration = typespec (declarator ("=" expr)? ("," declarator ("=" expr)?)*)? ";" +static Node *declaration(Token **rest, Token *tok) { + Type *basety = typespec(&tok, tok); + + Node head = {}; + Node *cur = &head; + int i = 0; + + while (!equal(tok, ";")) { + if (i++ > 0) + tok = skip(tok, ","); + + Type *ty = declarator(&tok, tok, basety); + Var *var = new_lvar(get_ident(ty->name), ty); + + if (!equal(tok, "=")) + continue; + + Node *lhs = new_var_node(var, ty->name); + Node *rhs = assign(&tok, tok->next); + Node *node = new_binary(ND_ASSIGN, lhs, rhs, tok); + cur = cur->next = new_unary(ND_EXPR_STMT, node, tok); + } + + Node *node = new_node(ND_BLOCK, tok); + node->body = head.next; + *rest = tok->next; + return node; +} + // stmt = "return" expr ";" // | "if" "(" expr ")" stmt ("else" stmt)? // | "for" "(" expr-stmt expr? ";" expr? ")" stmt @@ -141,14 +198,17 @@ static Node *stmt(Token **rest, Token *tok) { return expr_stmt(rest, tok); } -// compound-stmt = stmt* "}" +// compound-stmt = (declaration | stmt)* "}" static Node *compound_stmt(Token **rest, Token *tok) { Node *node = new_node(ND_BLOCK, tok); Node head = {}; Node *cur = &head; while (!equal(tok, "}")) { - cur = cur->next = stmt(&tok, tok); + if (equal(tok, "int")) + cur = cur->next = declaration(&tok, tok); + else + cur = cur->next = stmt(&tok, tok); add_type(cur); } @@ -369,7 +429,7 @@ static Node *primary(Token **rest, Token *tok) { if (tok->kind == TK_IDENT) { Var *var = find_var(tok); if (!var) - var = new_lvar(strndup(tok->loc, tok->len)); + error_tok(tok, "undefined variable"); *rest = tok->next; return new_var_node(var, tok); } diff --git a/test.sh b/test.sh index 831c095..b961076 100755 --- a/test.sh +++ b/test.sh @@ -46,14 +46,15 @@ assert 1 '{ return 1>=0; }' assert 1 '{ return 1>=1; }' assert 0 '{ return 1>=2; }' -assert 3 '{ a=3; return a; }' -assert 8 '{ a=3; z=5; return a+z; }' +assert 3 '{ int a; a=3; return a; }' +assert 3 '{ int a=3; return a; }' +assert 8 '{ int a=3; int z=5; return a+z; }' -assert 3 '{ a=3; return a; }' -assert 8 '{ a=3; z=5; return a+z; }' -assert 6 '{ a=b=3; return a+b; }' -assert 3 '{ foo=3; return foo; }' -assert 8 '{ foo123=3; bar=5; return foo123+bar; }' +assert 3 '{ int a=3; return a; }' +assert 8 '{ int a=3; int z=5; return a+z; }' +assert 6 '{ int a; int b; a=b=3; return a+b; }' +assert 3 '{ int foo=3; return foo; }' +assert 8 '{ int foo123=3; int bar=5; return foo123+bar; }' assert 1 '{ return 1; 2; 3; }' assert 2 '{ 1; return 2; 3; }' @@ -69,18 +70,25 @@ assert 2 '{ if (2-1) return 2; return 3; }' assert 4 '{ if (0) { 1; 2; return 3; } else { return 4; } }' assert 3 '{ if (1) { 1; 2; return 3; } else { return 4; } }' -assert 55 '{ i=0; j=0; for (i=0; i<=10; i=i+1) j=i+j; return j; }' -assert 3 '{ for (;;) {return 3;} return 5; }' +assert 55 '{ int i=0; int j=0; for (i=0; i<=10; i=i+1) j=i+j; return j; }' +assert 3 '{ for (;;) return 3; return 5; }' -assert 10 '{ i=0; while(i<10) { i=i+1; } return i; }' +assert 10 '{ int i=0; while(i<10) i=i+1; return i; }' -assert 3 '{ x=3; return *&x; }' -assert 3 '{ x=3; y=&x; z=&y; return **z; }' -assert 5 '{ x=3; y=5; return *(&x+1); }' -assert 3 '{ x=3; y=5; return *(&y-1); }' -assert 5 '{ x=3; y=&x; *y=5; return x; }' -assert 7 '{ x=3; y=5; *(&x+1)=7; return y; }' -assert 7 '{ x=3; y=5; *(&y-2+1)=7; return x; }' -assert 5 '{ x=3; return (&x+2)-&x+3; }' +assert 3 '{ {1; {2;} return 3;} }' + +assert 10 '{ int i=0; while(i<10) i=i+1; return i; }' +assert 55 '{ int i=0; int j=0; while(i<=10) {j=i+j; i=i+1;} return j; }' + +assert 3 '{ int x=3; return *&x; }' +assert 3 '{ int x=3; int *y=&x; int **z=&y; return **z; }' +assert 5 '{ int x=3; int y=5; return *(&x+1); }' +assert 3 '{ int x=3; int y=5; return *(&y-1); }' +assert 5 '{ int x=3; int *y=&x; *y=5; return x; }' +assert 7 '{ int x=3; int y=5; *(&x+1)=7; return y; }' +assert 7 '{ int x=3; int y=5; *(&y-2+1)=7; return x; }' +assert 5 '{ int x=3; return (&x+2)-&x+3; }' +assert 8 '{ int x, y; x=3; y=5; return x+y; }' +assert 8 '{ int x=3, y=5; return x+y; }' echo OK diff --git a/tokenize.c b/tokenize.c index 8be6ba7..f91c213 100644 --- a/tokenize.c +++ b/tokenize.c @@ -48,6 +48,15 @@ Token *skip(Token *tok, char *op) { return tok->next; } +bool consume(Token **rest, Token *tok, char *str) { + if (equal(tok, str)) { + *rest = tok->next; + return true; + } + *rest = tok; + return false; +} + // Create a new token and add it as the next token of `cur`. static Token *new_token(TokenKind kind, Token *cur, char *str, int len) { Token *tok = calloc(1, sizeof(Token)); @@ -73,7 +82,7 @@ static bool is_ident2(char c) { } static bool is_keyword(Token *tok) { - static char *kw[] = {"return", "if", "else", "for", "while"}; + static char *kw[] = {"return", "if", "else", "for", "while", "int"}; for (int i = 0; i < sizeof(kw) / sizeof(*kw); i++) if (equal(tok, kw[i])) diff --git a/type.c b/type.c index 0f5c5dc..7a751a7 100644 --- a/type.c +++ b/type.c @@ -40,18 +40,19 @@ void add_type(Node *node) { case ND_NE: case ND_LT: case ND_LE: - case ND_VAR: case ND_NUM: node->ty = ty_int; return; + case ND_VAR: + node->ty = node->var->ty; + return; case ND_ADDR: node->ty = pointer_to(node->lhs->ty); return; case ND_DEREF: - if (node->lhs->ty->kind == TY_PTR) - node->ty = node->lhs->ty->base; - else - node->ty = ty_int; + if (node->lhs->ty->kind != TY_PTR) + error_tok(node->tok, "invalid pointer dereference"); + node->ty = node->lhs->ty->base; return; } }