2020-10-07 14:12:19 +03:00
|
|
|
#define _POSIX_C_SOURCE 200809L
|
2020-10-07 14:11:16 +03:00
|
|
|
#include <assert.h>
|
|
|
|
#include <ctype.h>
|
2019-08-07 02:30:06 +03:00
|
|
|
#include <errno.h>
|
2020-09-19 14:36:26 +03:00
|
|
|
#include <glob.h>
|
2020-10-08 08:34:23 +03:00
|
|
|
#include <libgen.h>
|
2020-10-07 14:11:16 +03:00
|
|
|
#include <stdarg.h>
|
|
|
|
#include <stdbool.h>
|
2020-08-27 15:51:00 +03:00
|
|
|
#include <stdint.h>
|
2020-10-07 14:11:16 +03:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
2019-08-13 12:39:56 +03:00
|
|
|
#include <strings.h>
|
2020-09-19 14:36:26 +03:00
|
|
|
#include <sys/stat.h>
|
2020-08-15 16:30:28 +03:00
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/wait.h>
|
|
|
|
#include <unistd.h>
|
2020-10-07 14:11:16 +03:00
|
|
|
|
2019-08-18 10:34:39 +03:00
|
|
|
#define MAX(x, y) ((x) < (y) ? (y) : (x))
|
|
|
|
#define MIN(x, y) ((x) < (y) ? (x) : (y))
|
|
|
|
|
2020-09-04 07:39:06 +03:00
|
|
|
typedef struct Type Type;
|
2020-10-07 14:12:19 +03:00
|
|
|
typedef struct Node Node;
|
2019-08-08 16:43:58 +03:00
|
|
|
typedef struct Member Member;
|
2020-07-20 17:53:12 +03:00
|
|
|
typedef struct Relocation Relocation;
|
2020-03-29 09:47:50 +03:00
|
|
|
typedef struct Hideset Hideset;
|
2020-10-07 14:12:19 +03:00
|
|
|
|
2020-10-08 08:30:04 +03:00
|
|
|
//
|
|
|
|
// strings.c
|
|
|
|
//
|
|
|
|
|
2020-10-08 08:34:23 +03:00
|
|
|
typedef struct {
|
|
|
|
char **data;
|
|
|
|
int capacity;
|
|
|
|
int len;
|
|
|
|
} StringArray;
|
|
|
|
|
|
|
|
void strarray_push(StringArray *arr, char *s);
|
2020-10-08 08:30:04 +03:00
|
|
|
char *format(char *fmt, ...);
|
|
|
|
|
2020-10-07 14:11:16 +03:00
|
|
|
//
|
|
|
|
// tokenize.c
|
|
|
|
//
|
|
|
|
|
2020-10-07 14:12:19 +03:00
|
|
|
// Token
|
2020-10-07 14:11:16 +03:00
|
|
|
typedef enum {
|
2020-10-07 14:12:57 +03:00
|
|
|
TK_IDENT, // Identifiers
|
|
|
|
TK_PUNCT, // Punctuators
|
|
|
|
TK_KEYWORD, // Keywords
|
2020-10-07 06:49:08 +03:00
|
|
|
TK_STR, // String literals
|
2020-10-07 14:12:57 +03:00
|
|
|
TK_NUM, // Numeric literals
|
2020-09-27 13:45:12 +03:00
|
|
|
TK_PP_NUM, // Preprocessing numbers
|
2020-10-07 14:12:57 +03:00
|
|
|
TK_EOF, // End-of-file markers
|
2020-10-07 14:11:16 +03:00
|
|
|
} TokenKind;
|
|
|
|
|
2020-09-03 13:24:23 +03:00
|
|
|
typedef struct {
|
|
|
|
char *name;
|
|
|
|
int file_no;
|
|
|
|
char *contents;
|
|
|
|
} File;
|
|
|
|
|
2020-10-07 14:11:16 +03:00
|
|
|
// Token type
|
|
|
|
typedef struct Token Token;
|
|
|
|
struct Token {
|
2020-03-29 09:47:50 +03:00
|
|
|
TokenKind kind; // Token kind
|
|
|
|
Token *next; // Next token
|
|
|
|
int64_t val; // If kind is TK_NUM, its value
|
|
|
|
double fval; // If kind is TK_NUM, its value
|
|
|
|
char *loc; // Token location
|
|
|
|
int len; // Token length
|
|
|
|
Type *ty; // Used if TK_NUM or TK_STR
|
|
|
|
char *str; // String literal contents including terminating '\0'
|
2020-04-20 16:23:11 +03:00
|
|
|
|
2020-03-29 09:47:50 +03:00
|
|
|
File *file; // Source location
|
|
|
|
int line_no; // Line number
|
|
|
|
bool at_bol; // True if this token is at beginning of line
|
2020-08-18 04:45:03 +03:00
|
|
|
bool has_space; // True if this token follows a space character
|
2020-03-29 09:47:50 +03:00
|
|
|
Hideset *hideset; // For macro expansion
|
2020-08-30 12:57:54 +03:00
|
|
|
Token *origin; // If this is expanded from a macro, the original token
|
2020-10-07 14:11:16 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
void error(char *fmt, ...);
|
|
|
|
void error_at(char *loc, char *fmt, ...);
|
|
|
|
void error_tok(Token *tok, char *fmt, ...);
|
2020-04-21 04:46:26 +03:00
|
|
|
void warn_tok(Token *tok, char *fmt, ...);
|
2020-10-07 14:11:16 +03:00
|
|
|
bool equal(Token *tok, char *op);
|
|
|
|
Token *skip(Token *tok, char *op);
|
2020-09-03 09:43:36 +03:00
|
|
|
bool consume(Token **rest, Token *tok, char *str);
|
2020-09-27 13:45:12 +03:00
|
|
|
void convert_pp_tokens(Token *tok);
|
2020-09-03 13:24:23 +03:00
|
|
|
File **get_input_files(void);
|
2020-08-29 19:04:23 +03:00
|
|
|
File *new_file(char *name, int file_no, char *contents);
|
|
|
|
Token *tokenize(File *file);
|
2019-08-07 02:30:06 +03:00
|
|
|
Token *tokenize_file(char *filename);
|
2020-10-07 14:11:16 +03:00
|
|
|
|
2020-09-06 02:09:09 +03:00
|
|
|
#define unreachable() \
|
|
|
|
error("internal error at %s:%d", __FILE__, __LINE__)
|
|
|
|
|
2020-08-18 03:41:59 +03:00
|
|
|
//
|
|
|
|
// preprocess.c
|
|
|
|
//
|
|
|
|
|
2020-08-27 19:34:10 +03:00
|
|
|
void init_macros(void);
|
|
|
|
void define_macro(char *name, char *buf);
|
2020-08-18 05:09:32 +03:00
|
|
|
void undef_macro(char *name);
|
2020-08-18 03:41:59 +03:00
|
|
|
Token *preprocess(Token *tok);
|
|
|
|
|
2020-10-07 14:11:16 +03:00
|
|
|
//
|
|
|
|
// parse.c
|
|
|
|
//
|
|
|
|
|
2020-09-04 11:58:53 +03:00
|
|
|
// Variable or function
|
2020-10-07 14:12:19 +03:00
|
|
|
typedef struct Obj Obj;
|
|
|
|
struct Obj {
|
|
|
|
Obj *next;
|
2020-09-04 11:58:53 +03:00
|
|
|
char *name; // Variable name
|
|
|
|
Type *ty; // Type
|
2020-09-04 11:28:20 +03:00
|
|
|
Token *tok; // representative token
|
2020-09-04 11:58:53 +03:00
|
|
|
bool is_local; // local or global/function
|
2020-09-04 05:20:55 +03:00
|
|
|
int align; // alignment
|
2020-10-07 14:12:19 +03:00
|
|
|
|
2020-09-04 11:58:53 +03:00
|
|
|
// Local variable
|
|
|
|
int offset;
|
2020-09-04 07:39:48 +03:00
|
|
|
|
2020-09-04 11:58:53 +03:00
|
|
|
// Global variable or function
|
|
|
|
bool is_function;
|
2020-09-04 05:45:29 +03:00
|
|
|
bool is_definition;
|
2020-09-04 11:25:15 +03:00
|
|
|
bool is_static;
|
2020-09-04 11:58:53 +03:00
|
|
|
|
2020-10-07 06:49:08 +03:00
|
|
|
// Global variable
|
|
|
|
char *init_data;
|
2020-07-20 17:53:12 +03:00
|
|
|
Relocation *rel;
|
2020-10-07 06:49:08 +03:00
|
|
|
|
2020-09-04 11:58:53 +03:00
|
|
|
// Function
|
|
|
|
Obj *params;
|
2020-10-07 14:12:19 +03:00
|
|
|
Node *body;
|
|
|
|
Obj *locals;
|
2019-08-25 05:48:44 +03:00
|
|
|
Obj *va_area;
|
2020-10-07 14:12:19 +03:00
|
|
|
int stack_size;
|
|
|
|
};
|
|
|
|
|
2020-07-20 17:53:12 +03:00
|
|
|
// Global variable can be initialized either by a constant expression
|
|
|
|
// or a pointer to another global variable. This struct represents the
|
|
|
|
// latter.
|
|
|
|
typedef struct Relocation Relocation;
|
|
|
|
struct Relocation {
|
|
|
|
Relocation *next;
|
|
|
|
int offset;
|
|
|
|
char *label;
|
|
|
|
long addend;
|
|
|
|
};
|
|
|
|
|
2020-10-07 14:12:19 +03:00
|
|
|
// AST node
|
2020-10-07 14:11:16 +03:00
|
|
|
typedef enum {
|
2019-08-18 06:01:02 +03:00
|
|
|
ND_NULL_EXPR, // Do nothing
|
2020-09-26 02:50:44 +03:00
|
|
|
ND_ADD, // +
|
|
|
|
ND_SUB, // -
|
|
|
|
ND_MUL, // *
|
|
|
|
ND_DIV, // /
|
|
|
|
ND_NEG, // unary -
|
2020-10-07 14:18:57 +03:00
|
|
|
ND_MOD, // %
|
2020-10-07 14:19:35 +03:00
|
|
|
ND_BITAND, // &
|
|
|
|
ND_BITOR, // |
|
|
|
|
ND_BITXOR, // ^
|
2020-10-07 14:23:22 +03:00
|
|
|
ND_SHL, // <<
|
|
|
|
ND_SHR, // >>
|
2020-09-26 02:50:44 +03:00
|
|
|
ND_EQ, // ==
|
|
|
|
ND_NE, // !=
|
|
|
|
ND_LT, // <
|
|
|
|
ND_LE, // <=
|
2020-09-26 02:59:56 +03:00
|
|
|
ND_ASSIGN, // =
|
2019-08-17 04:27:35 +03:00
|
|
|
ND_COND, // ?:
|
2019-08-12 04:29:17 +03:00
|
|
|
ND_COMMA, // ,
|
2019-08-08 16:43:58 +03:00
|
|
|
ND_MEMBER, // . (struct member access)
|
2019-08-05 15:12:44 +03:00
|
|
|
ND_ADDR, // unary &
|
|
|
|
ND_DEREF, // unary *
|
2019-08-13 13:31:04 +03:00
|
|
|
ND_NOT, // !
|
2019-08-13 13:41:11 +03:00
|
|
|
ND_BITNOT, // ~
|
2020-10-07 14:22:11 +03:00
|
|
|
ND_LOGAND, // &&
|
|
|
|
ND_LOGOR, // ||
|
2020-10-07 14:12:57 +03:00
|
|
|
ND_RETURN, // "return"
|
2020-10-07 06:47:09 +03:00
|
|
|
ND_IF, // "if"
|
2019-08-04 11:24:03 +03:00
|
|
|
ND_FOR, // "for" or "while"
|
2019-08-24 10:09:46 +03:00
|
|
|
ND_DO, // "do"
|
2019-08-15 10:43:24 +03:00
|
|
|
ND_SWITCH, // "switch"
|
|
|
|
ND_CASE, // "case"
|
2020-09-04 07:38:41 +03:00
|
|
|
ND_BLOCK, // { ... }
|
2020-09-04 06:27:21 +03:00
|
|
|
ND_GOTO, // "goto"
|
|
|
|
ND_LABEL, // Labeled statement
|
2019-08-04 12:25:20 +03:00
|
|
|
ND_FUNCALL, // Function call
|
2020-09-26 02:50:44 +03:00
|
|
|
ND_EXPR_STMT, // Expression statement
|
2019-08-07 02:05:18 +03:00
|
|
|
ND_STMT_EXPR, // Statement expression
|
2020-09-26 02:59:56 +03:00
|
|
|
ND_VAR, // Variable
|
2020-09-26 02:50:44 +03:00
|
|
|
ND_NUM, // Integer
|
2019-08-11 10:06:14 +03:00
|
|
|
ND_CAST, // Type cast
|
2020-09-18 07:36:43 +03:00
|
|
|
ND_MEMZERO, // Zero-clear a stack variable
|
2020-10-07 14:11:16 +03:00
|
|
|
} NodeKind;
|
|
|
|
|
|
|
|
// AST node type
|
|
|
|
struct Node {
|
|
|
|
NodeKind kind; // Node kind
|
2020-09-26 02:50:44 +03:00
|
|
|
Node *next; // Next node
|
2020-09-04 07:39:06 +03:00
|
|
|
Type *ty; // Type, e.g. int or pointer to int
|
2020-09-26 05:23:04 +03:00
|
|
|
Token *tok; // Representative token
|
2020-10-07 06:47:09 +03:00
|
|
|
|
2020-10-07 14:11:16 +03:00
|
|
|
Node *lhs; // Left-hand side
|
|
|
|
Node *rhs; // Right-hand side
|
2020-09-04 07:38:41 +03:00
|
|
|
|
2019-08-04 11:35:53 +03:00
|
|
|
// "if" or "for" statement
|
2020-10-07 06:47:09 +03:00
|
|
|
Node *cond;
|
|
|
|
Node *then;
|
|
|
|
Node *els;
|
2019-08-04 11:35:53 +03:00
|
|
|
Node *init;
|
|
|
|
Node *inc;
|
2020-10-07 06:47:09 +03:00
|
|
|
|
2020-08-27 15:59:19 +03:00
|
|
|
// "break" and "continue" labels
|
2019-08-15 07:48:41 +03:00
|
|
|
char *brk_label;
|
2020-08-27 15:59:19 +03:00
|
|
|
char *cont_label;
|
2019-08-15 07:48:41 +03:00
|
|
|
|
2019-08-07 02:05:18 +03:00
|
|
|
// Block or statement expression
|
2020-09-04 07:38:41 +03:00
|
|
|
Node *body;
|
|
|
|
|
2019-08-08 16:43:58 +03:00
|
|
|
// Struct member access
|
|
|
|
Member *member;
|
|
|
|
|
2019-08-04 12:25:20 +03:00
|
|
|
// Function call
|
2020-03-22 12:43:16 +03:00
|
|
|
Type *func_ty;
|
2019-08-04 13:03:46 +03:00
|
|
|
Node *args;
|
2020-08-27 16:59:31 +03:00
|
|
|
bool pass_by_stack;
|
2020-08-27 17:16:53 +03:00
|
|
|
Obj *ret_buffer;
|
2019-08-04 12:25:20 +03:00
|
|
|
|
2020-09-04 06:27:21 +03:00
|
|
|
// Goto or labeled statement
|
|
|
|
char *label;
|
|
|
|
char *unique_label;
|
|
|
|
Node *goto_next;
|
|
|
|
|
2019-08-15 10:43:24 +03:00
|
|
|
// Switch-cases
|
|
|
|
Node *case_next;
|
|
|
|
Node *default_case;
|
|
|
|
|
|
|
|
// Variable
|
|
|
|
Obj *var;
|
|
|
|
|
|
|
|
// Numeric literal
|
|
|
|
int64_t val;
|
2020-09-27 13:43:03 +03:00
|
|
|
double fval;
|
2020-10-07 14:11:16 +03:00
|
|
|
};
|
|
|
|
|
2020-09-26 04:24:45 +03:00
|
|
|
Node *new_cast(Node *expr, Type *ty);
|
2020-08-20 13:36:36 +03:00
|
|
|
int64_t const_expr(Token **rest, Token *tok);
|
2020-09-04 11:58:53 +03:00
|
|
|
Obj *parse(Token *tok);
|
2020-10-07 14:11:16 +03:00
|
|
|
|
2020-09-04 07:39:06 +03:00
|
|
|
//
|
|
|
|
// type.c
|
|
|
|
//
|
|
|
|
|
|
|
|
typedef enum {
|
2019-08-11 04:18:55 +03:00
|
|
|
TY_VOID,
|
2020-08-28 16:07:54 +03:00
|
|
|
TY_BOOL,
|
2020-08-27 15:04:17 +03:00
|
|
|
TY_CHAR,
|
2020-09-06 02:10:01 +03:00
|
|
|
TY_SHORT,
|
2020-09-04 07:39:06 +03:00
|
|
|
TY_INT,
|
2020-08-27 15:51:00 +03:00
|
|
|
TY_LONG,
|
2020-09-27 13:43:03 +03:00
|
|
|
TY_FLOAT,
|
|
|
|
TY_DOUBLE,
|
2019-08-11 13:59:27 +03:00
|
|
|
TY_ENUM,
|
2020-09-04 07:39:06 +03:00
|
|
|
TY_PTR,
|
2020-09-04 13:01:33 +03:00
|
|
|
TY_FUNC,
|
2020-09-26 04:15:32 +03:00
|
|
|
TY_ARRAY,
|
2019-08-08 16:43:58 +03:00
|
|
|
TY_STRUCT,
|
2020-04-02 15:54:47 +03:00
|
|
|
TY_UNION,
|
2020-09-04 07:39:06 +03:00
|
|
|
} TypeKind;
|
|
|
|
|
|
|
|
struct Type {
|
|
|
|
TypeKind kind;
|
2019-08-13 15:03:20 +03:00
|
|
|
int size; // sizeof() value
|
|
|
|
int align; // alignment
|
2020-08-28 17:29:49 +03:00
|
|
|
bool is_unsigned; // unsigned or signed
|
2020-09-26 04:15:32 +03:00
|
|
|
|
|
|
|
// Pointer-to or array-of type. We intentionally use the same member
|
|
|
|
// to represent pointer/array duality in C.
|
|
|
|
//
|
|
|
|
// In many contexts in which a pointer is expected, we examine this
|
|
|
|
// member instead of "kind" member to determine whether a type is a
|
|
|
|
// pointer or not. That means in many contexts "array of T" is
|
|
|
|
// naturally handled as if it were "pointer to T", as required by
|
|
|
|
// the C spec.
|
2020-09-04 07:39:06 +03:00
|
|
|
Type *base;
|
2020-09-03 09:43:36 +03:00
|
|
|
|
|
|
|
// Declaration
|
|
|
|
Token *name;
|
2020-09-04 11:28:20 +03:00
|
|
|
Token *name_pos;
|
2020-09-04 13:01:33 +03:00
|
|
|
|
2020-09-26 04:15:32 +03:00
|
|
|
// Array
|
|
|
|
int array_len;
|
|
|
|
|
2019-08-08 16:43:58 +03:00
|
|
|
// Struct
|
|
|
|
Member *members;
|
2020-09-20 10:16:00 +03:00
|
|
|
bool is_flexible;
|
2019-08-08 16:43:58 +03:00
|
|
|
|
2020-09-04 13:01:33 +03:00
|
|
|
// Function type
|
|
|
|
Type *return_ty;
|
2020-09-04 07:39:48 +03:00
|
|
|
Type *params;
|
2020-10-07 14:24:13 +03:00
|
|
|
bool is_variadic;
|
2020-09-04 07:39:48 +03:00
|
|
|
Type *next;
|
2020-09-04 07:39:06 +03:00
|
|
|
};
|
|
|
|
|
2019-08-08 16:43:58 +03:00
|
|
|
// Struct member
|
|
|
|
struct Member {
|
|
|
|
Member *next;
|
|
|
|
Type *ty;
|
2019-08-13 15:03:20 +03:00
|
|
|
Token *tok; // for error message
|
2019-08-08 16:43:58 +03:00
|
|
|
Token *name;
|
2019-08-18 11:56:36 +03:00
|
|
|
int idx;
|
2020-09-04 05:20:55 +03:00
|
|
|
int align;
|
2019-08-08 16:43:58 +03:00
|
|
|
int offset;
|
2020-08-27 17:36:12 +03:00
|
|
|
|
|
|
|
// Bitfield
|
|
|
|
bool is_bitfield;
|
|
|
|
int bit_offset;
|
|
|
|
int bit_width;
|
2019-08-08 16:43:58 +03:00
|
|
|
};
|
|
|
|
|
2019-08-11 04:18:55 +03:00
|
|
|
extern Type *ty_void;
|
2020-08-28 16:07:54 +03:00
|
|
|
extern Type *ty_bool;
|
2019-08-11 04:18:55 +03:00
|
|
|
|
2020-08-27 15:04:17 +03:00
|
|
|
extern Type *ty_char;
|
2020-09-06 02:10:01 +03:00
|
|
|
extern Type *ty_short;
|
2020-09-04 07:39:06 +03:00
|
|
|
extern Type *ty_int;
|
2020-08-27 15:51:00 +03:00
|
|
|
extern Type *ty_long;
|
2020-09-04 07:39:06 +03:00
|
|
|
|
2020-08-28 17:29:49 +03:00
|
|
|
extern Type *ty_uchar;
|
|
|
|
extern Type *ty_ushort;
|
|
|
|
extern Type *ty_uint;
|
|
|
|
extern Type *ty_ulong;
|
|
|
|
|
2020-09-27 13:43:03 +03:00
|
|
|
extern Type *ty_float;
|
|
|
|
extern Type *ty_double;
|
|
|
|
|
2020-09-04 07:39:06 +03:00
|
|
|
bool is_integer(Type *ty);
|
2020-09-27 13:43:03 +03:00
|
|
|
bool is_flonum(Type *ty);
|
2020-09-22 12:29:17 +03:00
|
|
|
bool is_numeric(Type *ty);
|
2020-09-04 07:39:48 +03:00
|
|
|
Type *copy_type(Type *ty);
|
2020-09-03 09:43:36 +03:00
|
|
|
Type *pointer_to(Type *base);
|
2020-09-04 13:01:33 +03:00
|
|
|
Type *func_type(Type *return_ty);
|
2020-09-26 04:15:32 +03:00
|
|
|
Type *array_of(Type *base, int size);
|
2019-08-11 13:59:27 +03:00
|
|
|
Type *enum_type(void);
|
2020-10-07 05:10:28 +03:00
|
|
|
Type *struct_type(void);
|
2020-09-04 07:39:06 +03:00
|
|
|
void add_type(Node *node);
|
|
|
|
|
2020-10-07 14:11:16 +03:00
|
|
|
//
|
|
|
|
// codegen.c
|
|
|
|
//
|
|
|
|
|
2020-05-08 14:44:25 +03:00
|
|
|
void codegen(Obj *prog, FILE *out);
|
2020-08-30 11:21:54 +03:00
|
|
|
int align_to(int n, int align);
|
2020-09-03 13:24:23 +03:00
|
|
|
|
|
|
|
//
|
|
|
|
// main.c
|
|
|
|
//
|
|
|
|
|
2020-08-30 12:57:20 +03:00
|
|
|
bool file_exists(char *path);
|
|
|
|
|
2020-09-25 17:18:32 +03:00
|
|
|
extern StringArray include_paths;
|
2020-09-03 13:24:23 +03:00
|
|
|
extern char *base_file;
|