#define _POSIX_C_SOURCE 200809L #include #include #include #include #include #include #include #include #include #include #define MAX(x, y) ((x) < (y) ? (y) : (x)) #define MIN(x, y) ((x) < (y) ? (x) : (y)) typedef struct Type Type; typedef struct Node Node; typedef struct Member Member; typedef struct Relocation Relocation; // // strings.c // char *format(char *fmt, ...); // // tokenize.c // // Token typedef enum { TK_IDENT, // Identifiers TK_PUNCT, // Punctuators TK_KEYWORD, // Keywords TK_STR, // String literals TK_NUM, // Numeric literals TK_EOF, // End-of-file markers } TokenKind; // Token type typedef struct Token Token; struct Token { TokenKind kind; // Token kind Token *next; // Next token int64_t val; // If kind is TK_NUM, its value char *loc; // Token location int len; // Token length Type *ty; // Used if TK_STR char *str; // String literal contents including terminating '\0' int line_no; // Line number }; void error(char *fmt, ...); void error_at(char *loc, char *fmt, ...); void error_tok(Token *tok, char *fmt, ...); bool equal(Token *tok, char *op); Token *skip(Token *tok, char *op); bool consume(Token **rest, Token *tok, char *str); Token *tokenize_file(char *filename); #define unreachable() \ error("internal error at %s:%d", __FILE__, __LINE__) // // parse.c // // Variable or function typedef struct Obj Obj; struct Obj { Obj *next; char *name; // Variable name Type *ty; // Type bool is_local; // local or global/function int align; // alignment // Local variable int offset; // Global variable or function bool is_function; bool is_definition; bool is_static; // Global variable char *init_data; Relocation *rel; // Function Obj *params; Node *body; Obj *locals; int stack_size; }; // Global variable can be initialized either by a constant expression // or a pointer to another global variable. This struct represents the // latter. typedef struct Relocation Relocation; struct Relocation { Relocation *next; int offset; char *label; long addend; }; // AST node typedef enum { ND_NULL_EXPR, // Do nothing ND_ADD, // + ND_SUB, // - ND_MUL, // * ND_DIV, // / ND_NEG, // unary - ND_MOD, // % ND_BITAND, // & ND_BITOR, // | ND_BITXOR, // ^ ND_SHL, // << ND_SHR, // >> ND_EQ, // == ND_NE, // != ND_LT, // < ND_LE, // <= ND_ASSIGN, // = ND_COND, // ?: ND_COMMA, // , ND_MEMBER, // . (struct member access) ND_ADDR, // unary & ND_DEREF, // unary * ND_NOT, // ! ND_BITNOT, // ~ ND_LOGAND, // && ND_LOGOR, // || ND_RETURN, // "return" ND_IF, // "if" ND_FOR, // "for" or "while" ND_SWITCH, // "switch" ND_CASE, // "case" ND_BLOCK, // { ... } ND_GOTO, // "goto" ND_LABEL, // Labeled statement ND_FUNCALL, // Function call ND_EXPR_STMT, // Expression statement ND_STMT_EXPR, // Statement expression ND_VAR, // Variable ND_NUM, // Integer ND_CAST, // Type cast ND_MEMZERO, // Zero-clear a stack variable } NodeKind; // AST node type struct Node { NodeKind kind; // Node kind Node *next; // Next node Type *ty; // Type, e.g. int or pointer to int Token *tok; // Representative token Node *lhs; // Left-hand side Node *rhs; // Right-hand side // "if" or "for" statement Node *cond; Node *then; Node *els; Node *init; Node *inc; // "break" and "continue" labels char *brk_label; char *cont_label; // Block or statement expression Node *body; // Struct member access Member *member; // Function call char *funcname; Type *func_ty; Node *args; // Goto or labeled statement char *label; char *unique_label; Node *goto_next; // Switch-cases Node *case_next; Node *default_case; // Variable Obj *var; // Numeric literal int64_t val; }; Node *new_cast(Node *expr, Type *ty); Obj *parse(Token *tok); // // type.c // typedef enum { TY_VOID, TY_BOOL, TY_CHAR, TY_SHORT, TY_INT, TY_LONG, TY_ENUM, TY_PTR, TY_FUNC, TY_ARRAY, TY_STRUCT, TY_UNION, } TypeKind; struct Type { TypeKind kind; int size; // sizeof() value int align; // alignment // Pointer-to or array-of type. We intentionally use the same member // to represent pointer/array duality in C. // // In many contexts in which a pointer is expected, we examine this // member instead of "kind" member to determine whether a type is a // pointer or not. That means in many contexts "array of T" is // naturally handled as if it were "pointer to T", as required by // the C spec. Type *base; // Declaration Token *name; // Array int array_len; // Struct Member *members; bool is_flexible; // Function type Type *return_ty; Type *params; Type *next; }; // Struct member struct Member { Member *next; Type *ty; Token *tok; // for error message Token *name; int idx; int align; int offset; }; extern Type *ty_void; extern Type *ty_bool; extern Type *ty_char; extern Type *ty_short; extern Type *ty_int; extern Type *ty_long; bool is_integer(Type *ty); Type *copy_type(Type *ty); Type *pointer_to(Type *base); Type *func_type(Type *return_ty); Type *array_of(Type *base, int size); Type *enum_type(void); Type *struct_type(void); void add_type(Node *node); // // codegen.c // void codegen(Obj *prog, FILE *out); int align_to(int n, int align);