Split main.c into multiple small files

2024-11-24 23:29:36 +03:00 · 2020-10-07 20:11:16 +09:00 · 2020-10-07 20:11:16 +09:00 · 725badfb49
commit 725badfb49
parent 25b4b85b88
6 changed files with 425 additions and 409 deletions
--- a/8
+++ b/8
@ -1,7 +1,11 @@
 CFLAGS=-std=c11 -g -fno-common
+SRCS=$(wildcard *.c)
+OBJS=$(SRCS:.c=.o)

-chibicc: main.o
-	$(CC) -o chibicc main.o $(LDFLAGS)
+chibicc: $(OBJS)
+	$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
+
+$(OBJS): chibicc.h

 test: chibicc
 	./test.sh
--- a/chibicc.h
+++ b/chibicc.h
@ -0,0 +1,68 @@
+#include <assert.h>
+#include <ctype.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+//
+// tokenize.c
+//
+
+typedef enum {
+  TK_PUNCT, // Keywords or punctuators
+  TK_NUM,   // Numeric literals
+  TK_EOF,   // End-of-file markers
+} TokenKind;
+
+// Token type
+typedef struct Token Token;
+struct Token {
+  TokenKind kind; // Token kind
+  Token *next;    // Next token
+  int val;        // If kind is TK_NUM, its value
+  char *loc;      // Token location
+  int len;        // Token length
+};
+
+void error(char *fmt, ...);
+void error_at(char *loc, char *fmt, ...);
+void error_tok(Token *tok, char *fmt, ...);
+bool equal(Token *tok, char *op);
+Token *skip(Token *tok, char *op);
+Token *tokenize(char *input);
+
+//
+// parse.c
+//
+
+typedef enum {
+  ND_ADD, // +
+  ND_SUB, // -
+  ND_MUL, // *
+  ND_DIV, // /
+  ND_NEG, // unary -
+  ND_EQ,  // ==
+  ND_NE,  // !=
+  ND_LT,  // <
+  ND_LE,  // <=
+  ND_NUM, // Integer
+} NodeKind;
+
+// AST node type
+typedef struct Node Node;
+struct Node {
+  NodeKind kind; // Node kind
+  Node *lhs;     // Left-hand side
+  Node *rhs;     // Right-hand side
+  int val;       // Used if kind == ND_NUM
+};
+
+Node *parse(Token *tok);
+
+//
+// codegen.c
+//
+
+void codegen(Node *node);
--- a/codegen.c
+++ b/codegen.c
@ -0,0 +1,75 @@
+#include "chibicc.h"
+
+static int depth;
+
+static void push(void) {
+  printf("  push %%rax\n");
+  depth++;
+}
+
+static void pop(char *arg) {
+  printf("  pop %s\n", arg);
+  depth--;
+}
+
+static void gen_expr(Node *node) {
+  switch (node->kind) {
+  case ND_NUM:
+    printf("  mov $%d, %%rax\n", node->val);
+    return;
+  case ND_NEG:
+    gen_expr(node->lhs);
+    printf("  neg %%rax\n");
+    return;
+  }
+
+  gen_expr(node->rhs);
+  push();
+  gen_expr(node->lhs);
+  pop("%rdi");
+
+  switch (node->kind) {
+  case ND_ADD:
+    printf("  add %%rdi, %%rax\n");
+    return;
+  case ND_SUB:
+    printf("  sub %%rdi, %%rax\n");
+    return;
+  case ND_MUL:
+    printf("  imul %%rdi, %%rax\n");
+    return;
+  case ND_DIV:
+    printf("  cqo\n");
+    printf("  idiv %%rdi\n");
+    return;
+  case ND_EQ:
+  case ND_NE:
+  case ND_LT:
+  case ND_LE:
+    printf("  cmp %%rdi, %%rax\n");
+
+    if (node->kind == ND_EQ)
+      printf("  sete %%al\n");
+    else if (node->kind == ND_NE)
+      printf("  setne %%al\n");
+    else if (node->kind == ND_LT)
+      printf("  setl %%al\n");
+    else if (node->kind == ND_LE)
+      printf("  setle %%al\n");
+
+    printf("  movzb %%al, %%rax\n");
+    return;
+  }
+
+  error("invalid expression");
+}
+
+void codegen(Node *node) {
+  printf("  .globl main\n");
+  printf("main:\n");
+
+  gen_expr(node);
+  printf("  ret\n");
+
+  assert(depth == 0);
+}
--- a/main.c
+++ b/main.c
@ -1,414 +1,11 @@
-#include <assert.h>
-#include <ctype.h>
-#include <stdarg.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-//
-// Tokenizer
-//
-
-typedef enum {
-  TK_PUNCT, // Punctuators
-  TK_NUM,   // Numeric literals
-  TK_EOF,   // End-of-file markers
-} TokenKind;
-
-// Token type
-typedef struct Token Token;
-struct Token {
-  TokenKind kind; // Token kind
-  Token *next;    // Next token
-  int val;        // If kind is TK_NUM, its value
-  char *loc;      // Token location
-  int len;        // Token length
-};
-
-// Input string
-static char *current_input;
-
-// Reports an error and exit.
-static void error(char *fmt, ...) {
-  va_list ap;
-  va_start(ap, fmt);
-  vfprintf(stderr, fmt, ap);
-  fprintf(stderr, "\n");
-  exit(1);
-}
-
-// Reports an error location and exit.
-static void verror_at(char *loc, char *fmt, va_list ap) {
-  int pos = loc - current_input;
-  fprintf(stderr, "%s\n", current_input);
-  fprintf(stderr, "%*s", pos, ""); // print pos spaces.
-  fprintf(stderr, "^ ");
-  vfprintf(stderr, fmt, ap);
-  fprintf(stderr, "\n");
-  exit(1);
-}
-
-static void error_at(char *loc, char *fmt, ...) {
-  va_list ap;
-  va_start(ap, fmt);
-  verror_at(loc, fmt, ap);
-}
-
-static void error_tok(Token *tok, char *fmt, ...) {
-  va_list ap;
-  va_start(ap, fmt);
-  verror_at(tok->loc, fmt, ap);
-}
-
-// Consumes the current token if it matches `s`.
-static bool equal(Token *tok, char *op) {
-  return memcmp(tok->loc, op, tok->len) == 0 && op[tok->len] == '\0';
-}
-
-// Ensure that the current token is `s`.
-static Token *skip(Token *tok, char *s) {
-  if (!equal(tok, s))
-    error_tok(tok, "expected '%s'", s);
-  return tok->next;
-}
-
-// Ensure that the current token is TK_NUM.
-static int get_number(Token *tok) {
-  if (tok->kind != TK_NUM)
-    error_tok(tok, "expected a number");
-  return tok->val;
-}
-
-// Create a new token.
-static Token *new_token(TokenKind kind, char *start, char *end) {
-  Token *tok = calloc(1, sizeof(Token));
-  tok->kind = kind;
-  tok->loc = start;
-  tok->len = end - start;
-  return tok;
-}
-
-static bool startswith(char *p, char *q) {
-  return strncmp(p, q, strlen(q)) == 0;
-}
-
-// Read a punctuator token from p and returns its length.
-static int read_punct(char *p) {
-  if (startswith(p, "==") || startswith(p, "!=") ||
-      startswith(p, "<=") || startswith(p, ">="))
-    return 2;
-
-  return ispunct(*p) ? 1 : 0;
-}
-
-// Tokenize `current_input` and returns new tokens.
-static Token *tokenize(void) {
-  char *p = current_input;
-  Token head = {};
-  Token *cur = &head;
-
-  while (*p) {
-    // Skip whitespace characters.
-    if (isspace(*p)) {
-      p++;
-      continue;
-    }
-
-    // Numeric literal
-    if (isdigit(*p)) {
-      cur = cur->next = new_token(TK_NUM, p, p);
-      char *q = p;
-      cur->val = strtoul(p, &p, 10);
-      cur->len = p - q;
-      continue;
-    }
-
-    // Punctuators
-    int punct_len = read_punct(p);
-    if (punct_len) {
-      cur = cur->next = new_token(TK_PUNCT, p, p + punct_len);
-      p += cur->len;
-      continue;
-    }
-
-    error_at(p, "invalid token");
-  }
-
-  cur = cur->next = new_token(TK_EOF, p, p);
-  return head.next;
-}
-
-//
-// Parser
-//
-
-typedef enum {
-  ND_ADD, // +
-  ND_SUB, // -
-  ND_MUL, // *
-  ND_DIV, // /
-  ND_NEG, // unary -
-  ND_EQ,  // ==
-  ND_NE,  // !=
-  ND_LT,  // <
-  ND_LE,  // <=
-  ND_NUM, // Integer
-} NodeKind;
-
-// AST node type
-typedef struct Node Node;
-struct Node {
-  NodeKind kind; // Node kind
-  Node *lhs;     // Left-hand side
-  Node *rhs;     // Right-hand side
-  int val;       // Used if kind == ND_NUM
-};
-
-static Node *new_node(NodeKind kind) {
-  Node *node = calloc(1, sizeof(Node));
-  node->kind = kind;
-  return node;
-}
-
-static Node *new_binary(NodeKind kind, Node *lhs, Node *rhs) {
-  Node *node = new_node(kind);
-  node->lhs = lhs;
-  node->rhs = rhs;
-  return node;
-}
-
-static Node *new_unary(NodeKind kind, Node *expr) {
-  Node *node = new_node(kind);
-  node->lhs = expr;
-  return node;
-}
-
-static Node *new_num(int val) {
-  Node *node = new_node(ND_NUM);
-  node->val = val;
-  return node;
-}
-
-static Node *expr(Token **rest, Token *tok);
-static Node *equality(Token **rest, Token *tok);
-static Node *relational(Token **rest, Token *tok);
-static Node *add(Token **rest, Token *tok);
-static Node *mul(Token **rest, Token *tok);
-static Node *unary(Token **rest, Token *tok);
-static Node *primary(Token **rest, Token *tok);
-
-// expr = equality
-static Node *expr(Token **rest, Token *tok) {
-  return equality(rest, tok);
-}
-
-// equality = relational ("==" relational | "!=" relational)*
-static Node *equality(Token **rest, Token *tok) {
-  Node *node = relational(&tok, tok);
-
-  for (;;) {
-    if (equal(tok, "==")) {
-      node = new_binary(ND_EQ, node, relational(&tok, tok->next));
-      continue;
-    }
-
-    if (equal(tok, "!=")) {
-      node = new_binary(ND_NE, node, relational(&tok, tok->next));
-      continue;
-    }
-
-    *rest = tok;
-    return node;
-  }
-}
-
-// relational = add ("<" add | "<=" add | ">" add | ">=" add)*
-static Node *relational(Token **rest, Token *tok) {
-  Node *node = add(&tok, tok);
-
-  for (;;) {
-    if (equal(tok, "<")) {
-      node = new_binary(ND_LT, node, add(&tok, tok->next));
-      continue;
-    }
-
-    if (equal(tok, "<=")) {
-      node = new_binary(ND_LE, node, add(&tok, tok->next));
-      continue;
-    }
-
-    if (equal(tok, ">")) {
-      node = new_binary(ND_LT, add(&tok, tok->next), node);
-      continue;
-    }
-
-    if (equal(tok, ">=")) {
-      node = new_binary(ND_LE, add(&tok, tok->next), node);
-      continue;
-    }
-
-    *rest = tok;
-    return node;
-  }
-}
-
-// add = mul ("+" mul | "-" mul)*
-static Node *add(Token **rest, Token *tok) {
-  Node *node = mul(&tok, tok);
-
-  for (;;) {
-    if (equal(tok, "+")) {
-      node = new_binary(ND_ADD, node, mul(&tok, tok->next));
-      continue;
-    }
-
-    if (equal(tok, "-")) {
-      node = new_binary(ND_SUB, node, mul(&tok, tok->next));
-      continue;
-    }
-
-    *rest = tok;
-    return node;
-  }
-}
-
-// mul = unary ("*" unary | "/" unary)*
-static Node *mul(Token **rest, Token *tok) {
-  Node *node = unary(&tok, tok);
-
-  for (;;) {
-    if (equal(tok, "*")) {
-      node = new_binary(ND_MUL, node, unary(&tok, tok->next));
-      continue;
-    }
-
-    if (equal(tok, "/")) {
-      node = new_binary(ND_DIV, node, unary(&tok, tok->next));
-      continue;
-    }
-
-    *rest = tok;
-    return node;
-  }
-}
-
-// unary = ("+" | "-") unary
-//       | primary
-static Node *unary(Token **rest, Token *tok) {
-  if (equal(tok, "+"))
-    return unary(rest, tok->next);
-
-  if (equal(tok, "-"))
-    return new_unary(ND_NEG, unary(rest, tok->next));
-
-  return primary(rest, tok);
-}
-
-// primary = "(" expr ")" | num
-static Node *primary(Token **rest, Token *tok) {
-  if (equal(tok, "(")) {
-    Node *node = expr(&tok, tok->next);
-    *rest = skip(tok, ")");
-    return node;
-  }
-
-  if (tok->kind == TK_NUM) {
-    Node *node = new_num(tok->val);
-    *rest = tok->next;
-    return node;
-  }
-
-  error_tok(tok, "expected an expression");
-}
-
-//
-// Code generator
-//
-
-static int depth;
-
-static void push(void) {
-  printf("  push %%rax\n");
-  depth++;
-}
-
-static void pop(char *arg) {
-  printf("  pop %s\n", arg);
-  depth--;
-}
-
-static void gen_expr(Node *node) {
-  switch (node->kind) {
-  case ND_NUM:
-    printf("  mov $%d, %%rax\n", node->val);
-    return;
-  case ND_NEG:
-    gen_expr(node->lhs);
-    printf("  neg %%rax\n");
-    return;
-  }
-
-  gen_expr(node->rhs);
-  push();
-  gen_expr(node->lhs);
-  pop("%rdi");
-
-  switch (node->kind) {
-  case ND_ADD:
-    printf("  add %%rdi, %%rax\n");
-    return;
-  case ND_SUB:
-    printf("  sub %%rdi, %%rax\n");
-    return;
-  case ND_MUL:
-    printf("  imul %%rdi, %%rax\n");
-    return;
-  case ND_DIV:
-    printf("  cqo\n");
-    printf("  idiv %%rdi\n");
-    return;
-  case ND_EQ:
-  case ND_NE:
-  case ND_LT:
-  case ND_LE:
-    printf("  cmp %%rdi, %%rax\n");
-
-    if (node->kind == ND_EQ)
-      printf("  sete %%al\n");
-    else if (node->kind == ND_NE)
-      printf("  setne %%al\n");
-    else if (node->kind == ND_LT)
-      printf("  setl %%al\n");
-    else if (node->kind == ND_LE)
-      printf("  setle %%al\n");
-
-    printf("  movzb %%al, %%rax\n");
-    return;
-  }
-
-  error("invalid expression");
-}
+#include "chibicc.h"

 int main(int argc, char **argv) {
  if (argc != 2)
    error("%s: invalid number of arguments", argv[0]);

-  // Tokenize and parse.
-  current_input = argv[1];
-  Token *tok = tokenize();
-  Node *node = expr(&tok, tok);
-
-  if (tok->kind != TK_EOF)
-    error_tok(tok, "extra token");
-
-  printf("  .globl main\n");
-  printf("main:\n");
-
-  // Traverse the AST to emit assembly.
-  gen_expr(node);
-  printf("  ret\n");
-
-  assert(depth == 0);
+  Token *tok = tokenize(argv[1]);
+  Node *node = parse(tok);
+  codegen(node);
  return 0;
 }
--- a/parse.c
+++ b/parse.c
@ -0,0 +1,165 @@
+#include "chibicc.h"
+
+static Node *expr(Token **rest, Token *tok);
+static Node *equality(Token **rest, Token *tok);
+static Node *relational(Token **rest, Token *tok);
+static Node *add(Token **rest, Token *tok);
+static Node *mul(Token **rest, Token *tok);
+static Node *unary(Token **rest, Token *tok);
+static Node *primary(Token **rest, Token *tok);
+
+static Node *new_node(NodeKind kind) {
+  Node *node = calloc(1, sizeof(Node));
+  node->kind = kind;
+  return node;
+}
+
+static Node *new_binary(NodeKind kind, Node *lhs, Node *rhs) {
+  Node *node = new_node(kind);
+  node->lhs = lhs;
+  node->rhs = rhs;
+  return node;
+}
+
+static Node *new_unary(NodeKind kind, Node *expr) {
+  Node *node = new_node(kind);
+  node->lhs = expr;
+  return node;
+}
+
+static Node *new_num(int val) {
+  Node *node = new_node(ND_NUM);
+  node->val = val;
+  return node;
+}
+
+// expr = equality
+static Node *expr(Token **rest, Token *tok) {
+  return equality(rest, tok);
+}
+
+// equality = relational ("==" relational | "!=" relational)*
+static Node *equality(Token **rest, Token *tok) {
+  Node *node = relational(&tok, tok);
+
+  for (;;) {
+    if (equal(tok, "==")) {
+      node = new_binary(ND_EQ, node, relational(&tok, tok->next));
+      continue;
+    }
+
+    if (equal(tok, "!=")) {
+      node = new_binary(ND_NE, node, relational(&tok, tok->next));
+      continue;
+    }
+
+    *rest = tok;
+    return node;
+  }
+}
+
+// relational = add ("<" add | "<=" add | ">" add | ">=" add)*
+static Node *relational(Token **rest, Token *tok) {
+  Node *node = add(&tok, tok);
+
+  for (;;) {
+    if (equal(tok, "<")) {
+      node = new_binary(ND_LT, node, add(&tok, tok->next));
+      continue;
+    }
+
+    if (equal(tok, "<=")) {
+      node = new_binary(ND_LE, node, add(&tok, tok->next));
+      continue;
+    }
+
+    if (equal(tok, ">")) {
+      node = new_binary(ND_LT, add(&tok, tok->next), node);
+      continue;
+    }
+
+    if (equal(tok, ">=")) {
+      node = new_binary(ND_LE, add(&tok, tok->next), node);
+      continue;
+    }
+
+    *rest = tok;
+    return node;
+  }
+}
+
+// add = mul ("+" mul | "-" mul)*
+static Node *add(Token **rest, Token *tok) {
+  Node *node = mul(&tok, tok);
+
+  for (;;) {
+    if (equal(tok, "+")) {
+      node = new_binary(ND_ADD, node, mul(&tok, tok->next));
+      continue;
+    }
+
+    if (equal(tok, "-")) {
+      node = new_binary(ND_SUB, node, mul(&tok, tok->next));
+      continue;
+    }
+
+    *rest = tok;
+    return node;
+  }
+}
+
+// mul = unary ("*" unary | "/" unary)*
+static Node *mul(Token **rest, Token *tok) {
+  Node *node = unary(&tok, tok);
+
+  for (;;) {
+    if (equal(tok, "*")) {
+      node = new_binary(ND_MUL, node, unary(&tok, tok->next));
+      continue;
+    }
+
+    if (equal(tok, "/")) {
+      node = new_binary(ND_DIV, node, unary(&tok, tok->next));
+      continue;
+    }
+
+    *rest = tok;
+    return node;
+  }
+}
+
+// unary = ("+" | "-") unary
+//       | primary
+static Node *unary(Token **rest, Token *tok) {
+  if (equal(tok, "+"))
+    return unary(rest, tok->next);
+
+  if (equal(tok, "-"))
+    return new_unary(ND_NEG, unary(rest, tok->next));
+
+  return primary(rest, tok);
+}
+
+// primary = "(" expr ")" | num
+static Node *primary(Token **rest, Token *tok) {
+  if (equal(tok, "(")) {
+    Node *node = expr(&tok, tok->next);
+    *rest = skip(tok, ")");
+    return node;
+  }
+
+  if (tok->kind == TK_NUM) {
+    Node *node = new_num(tok->val);
+    *rest = tok->next;
+    return node;
+  }
+
+  error_tok(tok, "expected an expression");
+}
+
+Node *parse(Token *tok) {
+  Node *node = expr(&tok, tok);
+  if (tok->kind != TK_EOF)
+    error_tok(tok, "extra token");
+  return node;
+}
--- a/tokenize.c
+++ b/tokenize.c
@ -0,0 +1,107 @@
+#include "chibicc.h"
+
+// Input string
+static char *current_input;
+
+// Reports an error and exit.
+void error(char *fmt, ...) {
+  va_list ap;
+  va_start(ap, fmt);
+  vfprintf(stderr, fmt, ap);
+  fprintf(stderr, "\n");
+  exit(1);
+}
+
+// Reports an error location and exit.
+static void verror_at(char *loc, char *fmt, va_list ap) {
+  int pos = loc - current_input;
+  fprintf(stderr, "%s\n", current_input);
+  fprintf(stderr, "%*s", pos, ""); // print pos spaces.
+  fprintf(stderr, "^ ");
+  vfprintf(stderr, fmt, ap);
+  fprintf(stderr, "\n");
+  exit(1);
+}
+
+void error_at(char *loc, char *fmt, ...) {
+  va_list ap;
+  va_start(ap, fmt);
+  verror_at(loc, fmt, ap);
+}
+
+void error_tok(Token *tok, char *fmt, ...) {
+  va_list ap;
+  va_start(ap, fmt);
+  verror_at(tok->loc, fmt, ap);
+}
+
+// Consumes the current token if it matches `op`.
+bool equal(Token *tok, char *op) {
+  return memcmp(tok->loc, op, tok->len) == 0 && op[tok->len] == '\0';
+}
+
+// Ensure that the current token is `op`.
+Token *skip(Token *tok, char *op) {
+  if (!equal(tok, op))
+    error_tok(tok, "expected '%s'", op);
+  return tok->next;
+}
+
+// Create a new token.
+static Token *new_token(TokenKind kind, char *start, char *end) {
+  Token *tok = calloc(1, sizeof(Token));
+  tok->kind = kind;
+  tok->loc = start;
+  tok->len = end - start;
+  return tok;
+}
+
+static bool startswith(char *p, char *q) {
+  return strncmp(p, q, strlen(q)) == 0;
+}
+
+// Read a punctuator token from p and returns its length.
+static int read_punct(char *p) {
+  if (startswith(p, "==") || startswith(p, "!=") ||
+      startswith(p, "<=") || startswith(p, ">="))
+    return 2;
+
+  return ispunct(*p) ? 1 : 0;
+}
+
+// Tokenize `current_input` and returns new tokens.
+Token *tokenize(char *p) {
+  current_input = p;
+  Token head = {};
+  Token *cur = &head;
+
+  while (*p) {
+    // Skip whitespace characters.
+    if (isspace(*p)) {
+      p++;
+      continue;
+    }
+
+    // Numeric literal
+    if (isdigit(*p)) {
+      cur = cur->next = new_token(TK_NUM, p, p);
+      char *q = p;
+      cur->val = strtoul(p, &p, 10);
+      cur->len = p - q;
+      continue;
+    }
+
+    // Punctuators
+    int punct_len = read_punct(p);
+    if (punct_len) {
+      cur = cur->next = new_token(TK_PUNCT, p, p + punct_len);
+      p += cur->len;
+      continue;
+    }
+
+    error_at(p, "invalid token");
+  }
+
+  cur = cur->next = new_token(TK_EOF, p, p);
+  return head.next;
+}