2020-10-07 14:12:19 +03:00
|
|
|
#define _POSIX_C_SOURCE 200809L
|
2020-10-07 14:11:16 +03:00
|
|
|
#include <assert.h>
|
|
|
|
#include <ctype.h>
|
2019-08-07 02:30:06 +03:00
|
|
|
#include <errno.h>
|
2020-10-07 14:11:16 +03:00
|
|
|
#include <stdarg.h>
|
|
|
|
#include <stdbool.h>
|
2020-08-27 15:51:00 +03:00
|
|
|
#include <stdint.h>
|
2020-10-07 14:11:16 +03:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
2019-08-13 12:39:56 +03:00
|
|
|
#include <strings.h>
|
2020-10-07 14:11:16 +03:00
|
|
|
|
2020-09-04 07:39:06 +03:00
|
|
|
typedef struct Type Type;
|
2020-10-07 14:12:19 +03:00
|
|
|
typedef struct Node Node;
|
2019-08-08 16:43:58 +03:00
|
|
|
typedef struct Member Member;
|
2020-10-07 14:12:19 +03:00
|
|
|
|
2020-10-08 08:30:04 +03:00
|
|
|
//
|
|
|
|
// strings.c
|
|
|
|
//
|
|
|
|
|
|
|
|
char *format(char *fmt, ...);
|
|
|
|
|
2020-10-07 14:11:16 +03:00
|
|
|
//
|
|
|
|
// tokenize.c
|
|
|
|
//
|
|
|
|
|
2020-10-07 14:12:19 +03:00
|
|
|
// Token
|
2020-10-07 14:11:16 +03:00
|
|
|
typedef enum {
|
2020-10-07 14:12:57 +03:00
|
|
|
TK_IDENT, // Identifiers
|
|
|
|
TK_PUNCT, // Punctuators
|
|
|
|
TK_KEYWORD, // Keywords
|
2020-10-07 06:49:08 +03:00
|
|
|
TK_STR, // String literals
|
2020-10-07 14:12:57 +03:00
|
|
|
TK_NUM, // Numeric literals
|
|
|
|
TK_EOF, // End-of-file markers
|
2020-10-07 14:11:16 +03:00
|
|
|
} TokenKind;
|
|
|
|
|
|
|
|
// Token type
|
|
|
|
typedef struct Token Token;
|
|
|
|
struct Token {
|
|
|
|
TokenKind kind; // Token kind
|
|
|
|
Token *next; // Next token
|
2020-08-27 15:51:00 +03:00
|
|
|
int64_t val; // If kind is TK_NUM, its value
|
2020-10-07 14:11:16 +03:00
|
|
|
char *loc; // Token location
|
|
|
|
int len; // Token length
|
2020-10-07 06:49:08 +03:00
|
|
|
Type *ty; // Used if TK_STR
|
|
|
|
char *str; // String literal contents including terminating '\0'
|
2020-04-20 16:23:11 +03:00
|
|
|
|
|
|
|
int line_no; // Line number
|
2020-10-07 14:11:16 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
void error(char *fmt, ...);
|
|
|
|
void error_at(char *loc, char *fmt, ...);
|
|
|
|
void error_tok(Token *tok, char *fmt, ...);
|
|
|
|
bool equal(Token *tok, char *op);
|
|
|
|
Token *skip(Token *tok, char *op);
|
2020-09-03 09:43:36 +03:00
|
|
|
bool consume(Token **rest, Token *tok, char *str);
|
2019-08-07 02:30:06 +03:00
|
|
|
Token *tokenize_file(char *filename);
|
2020-10-07 14:11:16 +03:00
|
|
|
|
2020-09-06 02:09:09 +03:00
|
|
|
#define unreachable() \
|
|
|
|
error("internal error at %s:%d", __FILE__, __LINE__)
|
|
|
|
|
2020-10-07 14:11:16 +03:00
|
|
|
//
|
|
|
|
// parse.c
|
|
|
|
//
|
|
|
|
|
2020-09-04 11:58:53 +03:00
|
|
|
// Variable or function
|
2020-10-07 14:12:19 +03:00
|
|
|
typedef struct Obj Obj;
|
|
|
|
struct Obj {
|
|
|
|
Obj *next;
|
2020-09-04 11:58:53 +03:00
|
|
|
char *name; // Variable name
|
|
|
|
Type *ty; // Type
|
|
|
|
bool is_local; // local or global/function
|
2020-10-07 14:12:19 +03:00
|
|
|
|
2020-09-04 11:58:53 +03:00
|
|
|
// Local variable
|
|
|
|
int offset;
|
2020-09-04 07:39:48 +03:00
|
|
|
|
2020-09-04 11:58:53 +03:00
|
|
|
// Global variable or function
|
|
|
|
bool is_function;
|
2020-09-04 05:45:29 +03:00
|
|
|
bool is_definition;
|
2020-09-04 11:25:15 +03:00
|
|
|
bool is_static;
|
2020-09-04 11:58:53 +03:00
|
|
|
|
2020-10-07 06:49:08 +03:00
|
|
|
// Global variable
|
|
|
|
char *init_data;
|
|
|
|
|
2020-09-04 11:58:53 +03:00
|
|
|
// Function
|
|
|
|
Obj *params;
|
2020-10-07 14:12:19 +03:00
|
|
|
Node *body;
|
|
|
|
Obj *locals;
|
|
|
|
int stack_size;
|
|
|
|
};
|
|
|
|
|
|
|
|
// AST node
|
2020-10-07 14:11:16 +03:00
|
|
|
typedef enum {
|
2020-09-26 02:50:44 +03:00
|
|
|
ND_ADD, // +
|
|
|
|
ND_SUB, // -
|
|
|
|
ND_MUL, // *
|
|
|
|
ND_DIV, // /
|
|
|
|
ND_NEG, // unary -
|
2020-10-07 14:18:57 +03:00
|
|
|
ND_MOD, // %
|
2020-10-07 14:19:35 +03:00
|
|
|
ND_BITAND, // &
|
|
|
|
ND_BITOR, // |
|
|
|
|
ND_BITXOR, // ^
|
2020-09-26 02:50:44 +03:00
|
|
|
ND_EQ, // ==
|
|
|
|
ND_NE, // !=
|
|
|
|
ND_LT, // <
|
|
|
|
ND_LE, // <=
|
2020-09-26 02:59:56 +03:00
|
|
|
ND_ASSIGN, // =
|
2019-08-12 04:29:17 +03:00
|
|
|
ND_COMMA, // ,
|
2019-08-08 16:43:58 +03:00
|
|
|
ND_MEMBER, // . (struct member access)
|
2019-08-05 15:12:44 +03:00
|
|
|
ND_ADDR, // unary &
|
|
|
|
ND_DEREF, // unary *
|
2019-08-13 13:31:04 +03:00
|
|
|
ND_NOT, // !
|
2019-08-13 13:41:11 +03:00
|
|
|
ND_BITNOT, // ~
|
2020-10-07 14:22:11 +03:00
|
|
|
ND_LOGAND, // &&
|
|
|
|
ND_LOGOR, // ||
|
2020-10-07 14:12:57 +03:00
|
|
|
ND_RETURN, // "return"
|
2020-10-07 06:47:09 +03:00
|
|
|
ND_IF, // "if"
|
2019-08-04 11:24:03 +03:00
|
|
|
ND_FOR, // "for" or "while"
|
2020-09-04 07:38:41 +03:00
|
|
|
ND_BLOCK, // { ... }
|
2019-08-04 12:25:20 +03:00
|
|
|
ND_FUNCALL, // Function call
|
2020-09-26 02:50:44 +03:00
|
|
|
ND_EXPR_STMT, // Expression statement
|
2019-08-07 02:05:18 +03:00
|
|
|
ND_STMT_EXPR, // Statement expression
|
2020-09-26 02:59:56 +03:00
|
|
|
ND_VAR, // Variable
|
2020-09-26 02:50:44 +03:00
|
|
|
ND_NUM, // Integer
|
2019-08-11 10:06:14 +03:00
|
|
|
ND_CAST, // Type cast
|
2020-10-07 14:11:16 +03:00
|
|
|
} NodeKind;
|
|
|
|
|
|
|
|
// AST node type
|
|
|
|
struct Node {
|
|
|
|
NodeKind kind; // Node kind
|
2020-09-26 02:50:44 +03:00
|
|
|
Node *next; // Next node
|
2020-09-04 07:39:06 +03:00
|
|
|
Type *ty; // Type, e.g. int or pointer to int
|
2020-09-26 05:23:04 +03:00
|
|
|
Token *tok; // Representative token
|
2020-10-07 06:47:09 +03:00
|
|
|
|
2020-10-07 14:11:16 +03:00
|
|
|
Node *lhs; // Left-hand side
|
|
|
|
Node *rhs; // Right-hand side
|
2020-09-04 07:38:41 +03:00
|
|
|
|
2019-08-04 11:35:53 +03:00
|
|
|
// "if" or "for" statement
|
2020-10-07 06:47:09 +03:00
|
|
|
Node *cond;
|
|
|
|
Node *then;
|
|
|
|
Node *els;
|
2019-08-04 11:35:53 +03:00
|
|
|
Node *init;
|
|
|
|
Node *inc;
|
2020-10-07 06:47:09 +03:00
|
|
|
|
2019-08-07 02:05:18 +03:00
|
|
|
// Block or statement expression
|
2020-09-04 07:38:41 +03:00
|
|
|
Node *body;
|
|
|
|
|
2019-08-08 16:43:58 +03:00
|
|
|
// Struct member access
|
|
|
|
Member *member;
|
|
|
|
|
2019-08-04 12:25:20 +03:00
|
|
|
// Function call
|
|
|
|
char *funcname;
|
2020-03-22 12:43:16 +03:00
|
|
|
Type *func_ty;
|
2019-08-04 13:03:46 +03:00
|
|
|
Node *args;
|
2019-08-04 12:25:20 +03:00
|
|
|
|
2020-10-07 14:12:19 +03:00
|
|
|
Obj *var; // Used if kind == ND_VAR
|
2020-08-27 15:51:00 +03:00
|
|
|
int64_t val; // Used if kind == ND_NUM
|
2020-10-07 14:11:16 +03:00
|
|
|
};
|
|
|
|
|
2020-09-26 04:24:45 +03:00
|
|
|
Node *new_cast(Node *expr, Type *ty);
|
2020-09-04 11:58:53 +03:00
|
|
|
Obj *parse(Token *tok);
|
2020-10-07 14:11:16 +03:00
|
|
|
|
2020-09-04 07:39:06 +03:00
|
|
|
//
|
|
|
|
// type.c
|
|
|
|
//
|
|
|
|
|
|
|
|
typedef enum {
|
2019-08-11 04:18:55 +03:00
|
|
|
TY_VOID,
|
2020-08-28 16:07:54 +03:00
|
|
|
TY_BOOL,
|
2020-08-27 15:04:17 +03:00
|
|
|
TY_CHAR,
|
2020-09-06 02:10:01 +03:00
|
|
|
TY_SHORT,
|
2020-09-04 07:39:06 +03:00
|
|
|
TY_INT,
|
2020-08-27 15:51:00 +03:00
|
|
|
TY_LONG,
|
2019-08-11 13:59:27 +03:00
|
|
|
TY_ENUM,
|
2020-09-04 07:39:06 +03:00
|
|
|
TY_PTR,
|
2020-09-04 13:01:33 +03:00
|
|
|
TY_FUNC,
|
2020-09-26 04:15:32 +03:00
|
|
|
TY_ARRAY,
|
2019-08-08 16:43:58 +03:00
|
|
|
TY_STRUCT,
|
2020-04-02 15:54:47 +03:00
|
|
|
TY_UNION,
|
2020-09-04 07:39:06 +03:00
|
|
|
} TypeKind;
|
|
|
|
|
|
|
|
struct Type {
|
|
|
|
TypeKind kind;
|
2019-08-13 15:03:20 +03:00
|
|
|
int size; // sizeof() value
|
|
|
|
int align; // alignment
|
2020-09-26 04:15:32 +03:00
|
|
|
|
|
|
|
// Pointer-to or array-of type. We intentionally use the same member
|
|
|
|
// to represent pointer/array duality in C.
|
|
|
|
//
|
|
|
|
// In many contexts in which a pointer is expected, we examine this
|
|
|
|
// member instead of "kind" member to determine whether a type is a
|
|
|
|
// pointer or not. That means in many contexts "array of T" is
|
|
|
|
// naturally handled as if it were "pointer to T", as required by
|
|
|
|
// the C spec.
|
2020-09-04 07:39:06 +03:00
|
|
|
Type *base;
|
2020-09-03 09:43:36 +03:00
|
|
|
|
|
|
|
// Declaration
|
|
|
|
Token *name;
|
2020-09-04 13:01:33 +03:00
|
|
|
|
2020-09-26 04:15:32 +03:00
|
|
|
// Array
|
|
|
|
int array_len;
|
|
|
|
|
2019-08-08 16:43:58 +03:00
|
|
|
// Struct
|
|
|
|
Member *members;
|
|
|
|
|
2020-09-04 13:01:33 +03:00
|
|
|
// Function type
|
|
|
|
Type *return_ty;
|
2020-09-04 07:39:48 +03:00
|
|
|
Type *params;
|
|
|
|
Type *next;
|
2020-09-04 07:39:06 +03:00
|
|
|
};
|
|
|
|
|
2019-08-08 16:43:58 +03:00
|
|
|
// Struct member
|
|
|
|
struct Member {
|
|
|
|
Member *next;
|
|
|
|
Type *ty;
|
2019-08-13 15:03:20 +03:00
|
|
|
Token *tok; // for error message
|
2019-08-08 16:43:58 +03:00
|
|
|
Token *name;
|
|
|
|
int offset;
|
|
|
|
};
|
|
|
|
|
2019-08-11 04:18:55 +03:00
|
|
|
extern Type *ty_void;
|
2020-08-28 16:07:54 +03:00
|
|
|
extern Type *ty_bool;
|
2019-08-11 04:18:55 +03:00
|
|
|
|
2020-08-27 15:04:17 +03:00
|
|
|
extern Type *ty_char;
|
2020-09-06 02:10:01 +03:00
|
|
|
extern Type *ty_short;
|
2020-09-04 07:39:06 +03:00
|
|
|
extern Type *ty_int;
|
2020-08-27 15:51:00 +03:00
|
|
|
extern Type *ty_long;
|
2020-09-04 07:39:06 +03:00
|
|
|
|
|
|
|
bool is_integer(Type *ty);
|
2020-09-04 07:39:48 +03:00
|
|
|
Type *copy_type(Type *ty);
|
2020-09-03 09:43:36 +03:00
|
|
|
Type *pointer_to(Type *base);
|
2020-09-04 13:01:33 +03:00
|
|
|
Type *func_type(Type *return_ty);
|
2020-09-26 04:15:32 +03:00
|
|
|
Type *array_of(Type *base, int size);
|
2019-08-11 13:59:27 +03:00
|
|
|
Type *enum_type(void);
|
2020-09-04 07:39:06 +03:00
|
|
|
void add_type(Node *node);
|
|
|
|
|
2020-10-07 14:11:16 +03:00
|
|
|
//
|
|
|
|
// codegen.c
|
|
|
|
//
|
|
|
|
|
2020-05-08 14:44:25 +03:00
|
|
|
void codegen(Obj *prog, FILE *out);
|
2020-08-30 11:21:54 +03:00
|
|
|
int align_to(int n, int align);
|