chibicc/chibicc.h

458 lines
9.6 KiB
C
Raw Normal View History

2020-10-07 14:12:19 +03:00
#define _POSIX_C_SOURCE 200809L
2020-10-07 14:11:16 +03:00
#include <assert.h>
#include <ctype.h>
#include <errno.h>
2020-09-19 14:36:26 +03:00
#include <glob.h>
2020-10-08 08:34:23 +03:00
#include <libgen.h>
2020-10-07 14:11:16 +03:00
#include <stdarg.h>
#include <stdbool.h>
2020-08-27 15:51:00 +03:00
#include <stdint.h>
2020-10-07 14:11:16 +03:00
#include <stdio.h>
#include <stdlib.h>
#include <stdnoreturn.h>
2020-10-07 14:11:16 +03:00
#include <string.h>
#include <strings.h>
2020-09-19 14:36:26 +03:00
#include <sys/stat.h>
2020-08-15 16:30:28 +03:00
#include <sys/types.h>
#include <sys/wait.h>
2020-05-17 07:34:36 +03:00
#include <time.h>
2020-08-15 16:30:28 +03:00
#include <unistd.h>
2020-10-07 14:11:16 +03:00
2019-08-18 10:34:39 +03:00
#define MAX(x, y) ((x) < (y) ? (y) : (x))
#define MIN(x, y) ((x) < (y) ? (x) : (y))
#ifndef __GNUC__
# define __attribute__(x)
#endif
2020-09-04 07:39:06 +03:00
typedef struct Type Type;
2020-10-07 14:12:19 +03:00
typedef struct Node Node;
2019-08-08 16:43:58 +03:00
typedef struct Member Member;
typedef struct Relocation Relocation;
typedef struct Hideset Hideset;
2020-10-07 14:12:19 +03:00
2020-10-08 08:30:04 +03:00
//
// strings.c
//
2020-10-08 08:34:23 +03:00
typedef struct {
char **data;
int capacity;
int len;
} StringArray;
void strarray_push(StringArray *arr, char *s);
char *format(char *fmt, ...) __attribute__((format(printf, 1, 2)));
2020-10-08 08:30:04 +03:00
2020-10-07 14:11:16 +03:00
//
// tokenize.c
//
2020-10-07 14:12:19 +03:00
// Token
2020-10-07 14:11:16 +03:00
typedef enum {
2020-10-07 14:12:57 +03:00
TK_IDENT, // Identifiers
TK_PUNCT, // Punctuators
TK_KEYWORD, // Keywords
2020-10-07 06:49:08 +03:00
TK_STR, // String literals
2020-10-07 14:12:57 +03:00
TK_NUM, // Numeric literals
2020-09-27 13:45:12 +03:00
TK_PP_NUM, // Preprocessing numbers
2020-10-07 14:12:57 +03:00
TK_EOF, // End-of-file markers
2020-10-07 14:11:16 +03:00
} TokenKind;
2020-09-03 13:24:23 +03:00
typedef struct {
char *name;
int file_no;
char *contents;
2020-07-22 13:45:03 +03:00
// For #line directive
char *display_name;
int line_delta;
2020-09-03 13:24:23 +03:00
} File;
2020-10-07 14:11:16 +03:00
// Token type
typedef struct Token Token;
struct Token {
TokenKind kind; // Token kind
Token *next; // Next token
int64_t val; // If kind is TK_NUM, its value
2020-08-29 04:37:53 +03:00
long double fval; // If kind is TK_NUM, its value
char *loc; // Token location
int len; // Token length
Type *ty; // Used if TK_NUM or TK_STR
char *str; // String literal contents including terminating '\0'
File *file; // Source location
2020-07-22 13:45:03 +03:00
char *filename; // Filename
int line_no; // Line number
2020-07-22 13:45:03 +03:00
int line_delta; // Line number
bool at_bol; // True if this token is at beginning of line
2020-08-18 04:45:03 +03:00
bool has_space; // True if this token follows a space character
Hideset *hideset; // For macro expansion
2020-08-30 12:57:54 +03:00
Token *origin; // If this is expanded from a macro, the original token
2020-10-07 14:11:16 +03:00
};
noreturn void error(char *fmt, ...) __attribute__((format(printf, 1, 2)));
noreturn void error_at(char *loc, char *fmt, ...) __attribute__((format(printf, 2, 3)));
noreturn void error_tok(Token *tok, char *fmt, ...) __attribute__((format(printf, 2, 3)));
void warn_tok(Token *tok, char *fmt, ...) __attribute__((format(printf, 2, 3)));
2020-10-07 14:11:16 +03:00
bool equal(Token *tok, char *op);
Token *skip(Token *tok, char *op);
bool consume(Token **rest, Token *tok, char *str);
2020-09-27 13:45:12 +03:00
void convert_pp_tokens(Token *tok);
2020-09-03 13:24:23 +03:00
File **get_input_files(void);
2020-08-29 19:04:23 +03:00
File *new_file(char *name, int file_no, char *contents);
Token *tokenize_string_literal(Token *tok, Type *basety);
2020-08-29 19:04:23 +03:00
Token *tokenize(File *file);
Token *tokenize_file(char *filename);
2020-10-07 14:11:16 +03:00
2020-09-06 02:09:09 +03:00
#define unreachable() \
error("internal error at %s:%d", __FILE__, __LINE__)
2020-08-18 03:41:59 +03:00
//
// preprocess.c
//
2020-09-19 14:53:13 +03:00
char *search_include_paths(char *filename);
2020-08-27 19:34:10 +03:00
void init_macros(void);
void define_macro(char *name, char *buf);
2020-08-18 05:09:32 +03:00
void undef_macro(char *name);
2020-08-18 03:41:59 +03:00
Token *preprocess(Token *tok);
2020-10-07 14:11:16 +03:00
//
// parse.c
//
// Variable or function
2020-10-07 14:12:19 +03:00
typedef struct Obj Obj;
struct Obj {
Obj *next;
char *name; // Variable name
Type *ty; // Type
Token *tok; // representative token
bool is_local; // local or global/function
2020-09-04 05:20:55 +03:00
int align; // alignment
2020-10-07 14:12:19 +03:00
// Local variable
int offset;
// Global variable or function
bool is_function;
2020-09-04 05:45:29 +03:00
bool is_definition;
2020-09-04 11:25:15 +03:00
bool is_static;
2020-10-07 06:49:08 +03:00
// Global variable
2020-09-04 11:29:15 +03:00
bool is_tentative;
2020-09-07 04:02:10 +03:00
bool is_tls;
2020-10-07 06:49:08 +03:00
char *init_data;
Relocation *rel;
2020-10-07 06:49:08 +03:00
// Function
bool is_inline;
Obj *params;
2020-10-07 14:12:19 +03:00
Node *body;
Obj *locals;
Obj *va_area;
2020-09-04 05:59:38 +03:00
Obj *alloca_bottom;
2020-10-07 14:12:19 +03:00
int stack_size;
// Static inline function
bool is_live;
bool is_root;
StringArray refs;
2020-10-07 14:12:19 +03:00
};
// Global variable can be initialized either by a constant expression
// or a pointer to another global variable. This struct represents the
// latter.
typedef struct Relocation Relocation;
struct Relocation {
Relocation *next;
int offset;
char **label;
long addend;
};
2020-10-07 14:12:19 +03:00
// AST node
2020-10-07 14:11:16 +03:00
typedef enum {
2019-08-18 06:01:02 +03:00
ND_NULL_EXPR, // Do nothing
ND_ADD, // +
ND_SUB, // -
ND_MUL, // *
ND_DIV, // /
ND_NEG, // unary -
2020-10-07 14:18:57 +03:00
ND_MOD, // %
2020-10-07 14:19:35 +03:00
ND_BITAND, // &
ND_BITOR, // |
ND_BITXOR, // ^
2020-10-07 14:23:22 +03:00
ND_SHL, // <<
ND_SHR, // >>
ND_EQ, // ==
ND_NE, // !=
ND_LT, // <
ND_LE, // <=
2020-09-26 02:59:56 +03:00
ND_ASSIGN, // =
2019-08-17 04:27:35 +03:00
ND_COND, // ?:
ND_COMMA, // ,
2019-08-08 16:43:58 +03:00
ND_MEMBER, // . (struct member access)
2019-08-05 15:12:44 +03:00
ND_ADDR, // unary &
ND_DEREF, // unary *
2019-08-13 13:31:04 +03:00
ND_NOT, // !
2019-08-13 13:41:11 +03:00
ND_BITNOT, // ~
2020-10-07 14:22:11 +03:00
ND_LOGAND, // &&
ND_LOGOR, // ||
2020-10-07 14:12:57 +03:00
ND_RETURN, // "return"
2020-10-07 06:47:09 +03:00
ND_IF, // "if"
2019-08-04 11:24:03 +03:00
ND_FOR, // "for" or "while"
2019-08-24 10:09:46 +03:00
ND_DO, // "do"
2019-08-15 10:43:24 +03:00
ND_SWITCH, // "switch"
ND_CASE, // "case"
2020-09-04 07:38:41 +03:00
ND_BLOCK, // { ... }
2020-09-04 06:27:21 +03:00
ND_GOTO, // "goto"
2020-09-04 06:26:27 +03:00
ND_GOTO_EXPR, // "goto" labels-as-values
2020-09-04 06:27:21 +03:00
ND_LABEL, // Labeled statement
2020-09-04 06:26:27 +03:00
ND_LABEL_VAL, // [GNU] Labels-as-values
2019-08-04 12:25:20 +03:00
ND_FUNCALL, // Function call
ND_EXPR_STMT, // Expression statement
ND_STMT_EXPR, // Statement expression
2020-09-26 02:59:56 +03:00
ND_VAR, // Variable
2020-09-03 16:27:13 +03:00
ND_VLA_PTR, // VLA designator
ND_NUM, // Integer
2019-08-11 10:06:14 +03:00
ND_CAST, // Type cast
ND_MEMZERO, // Zero-clear a stack variable
2020-08-27 17:50:52 +03:00
ND_ASM, // "asm"
2020-09-15 05:30:56 +03:00
ND_CAS, // Atomic compare-and-swap
2020-09-15 05:30:56 +03:00
ND_EXCH, // Atomic exchange
2020-10-07 14:11:16 +03:00
} NodeKind;
// AST node type
struct Node {
NodeKind kind; // Node kind
Node *next; // Next node
2020-09-04 07:39:06 +03:00
Type *ty; // Type, e.g. int or pointer to int
Token *tok; // Representative token
2020-10-07 06:47:09 +03:00
2020-10-07 14:11:16 +03:00
Node *lhs; // Left-hand side
Node *rhs; // Right-hand side
2020-09-04 07:38:41 +03:00
2019-08-04 11:35:53 +03:00
// "if" or "for" statement
2020-10-07 06:47:09 +03:00
Node *cond;
Node *then;
Node *els;
2019-08-04 11:35:53 +03:00
Node *init;
Node *inc;
2020-10-07 06:47:09 +03:00
2020-08-27 15:59:19 +03:00
// "break" and "continue" labels
2019-08-15 07:48:41 +03:00
char *brk_label;
2020-08-27 15:59:19 +03:00
char *cont_label;
2019-08-15 07:48:41 +03:00
// Block or statement expression
2020-09-04 07:38:41 +03:00
Node *body;
2019-08-08 16:43:58 +03:00
// Struct member access
Member *member;
2019-08-04 12:25:20 +03:00
// Function call
Type *func_ty;
Node *args;
2020-08-27 16:59:31 +03:00
bool pass_by_stack;
Obj *ret_buffer;
2019-08-04 12:25:20 +03:00
2020-09-04 06:26:27 +03:00
// Goto or labeled statement, or labels-as-values
2020-09-04 06:27:21 +03:00
char *label;
char *unique_label;
Node *goto_next;
2020-08-30 06:51:05 +03:00
// Switch
2019-08-15 10:43:24 +03:00
Node *case_next;
Node *default_case;
2020-08-30 06:51:05 +03:00
// Case
long begin;
long end;
2020-08-27 17:50:52 +03:00
// "asm" string literal
char *asm_str;
2020-09-15 05:30:56 +03:00
// Atomic compare-and-swap
Node *cas_addr;
Node *cas_old;
Node *cas_new;
// Atomic op= operators
Obj *atomic_addr;
Node *atomic_expr;
2019-08-15 10:43:24 +03:00
// Variable
Obj *var;
// Numeric literal
int64_t val;
2020-08-29 04:37:53 +03:00
long double fval;
2020-10-07 14:11:16 +03:00
};
2020-09-26 04:24:45 +03:00
Node *new_cast(Node *expr, Type *ty);
2020-08-20 13:36:36 +03:00
int64_t const_expr(Token **rest, Token *tok);
Obj *parse(Token *tok);
2020-10-07 14:11:16 +03:00
2020-09-04 07:39:06 +03:00
//
// type.c
//
typedef enum {
2019-08-11 04:18:55 +03:00
TY_VOID,
TY_BOOL,
2020-08-27 15:04:17 +03:00
TY_CHAR,
2020-09-06 02:10:01 +03:00
TY_SHORT,
2020-09-04 07:39:06 +03:00
TY_INT,
2020-08-27 15:51:00 +03:00
TY_LONG,
2020-09-27 13:43:03 +03:00
TY_FLOAT,
TY_DOUBLE,
2020-08-29 04:37:53 +03:00
TY_LDOUBLE,
2019-08-11 13:59:27 +03:00
TY_ENUM,
2020-09-04 07:39:06 +03:00
TY_PTR,
2020-09-04 13:01:33 +03:00
TY_FUNC,
2020-09-26 04:15:32 +03:00
TY_ARRAY,
2020-09-04 08:44:12 +03:00
TY_VLA, // variable-length array
2019-08-08 16:43:58 +03:00
TY_STRUCT,
2020-04-02 15:54:47 +03:00
TY_UNION,
2020-09-04 07:39:06 +03:00
} TypeKind;
struct Type {
TypeKind kind;
int size; // sizeof() value
int align; // alignment
2020-08-28 17:29:49 +03:00
bool is_unsigned; // unsigned or signed
bool is_atomic; // true if _Atomic
2020-09-06 15:06:56 +03:00
Type *origin; // for type compatibility check
2020-09-26 04:15:32 +03:00
// Pointer-to or array-of type. We intentionally use the same member
// to represent pointer/array duality in C.
//
// In many contexts in which a pointer is expected, we examine this
// member instead of "kind" member to determine whether a type is a
// pointer or not. That means in many contexts "array of T" is
// naturally handled as if it were "pointer to T", as required by
// the C spec.
2020-09-04 07:39:06 +03:00
Type *base;
// Declaration
Token *name;
Token *name_pos;
2020-09-04 13:01:33 +03:00
2020-09-26 04:15:32 +03:00
// Array
int array_len;
2020-09-04 08:44:12 +03:00
// Variable-length array
Node *vla_len; // # of elements
Obj *vla_size; // sizeof() value
2019-08-08 16:43:58 +03:00
// Struct
Member *members;
bool is_flexible;
2020-10-07 05:14:15 +03:00
bool is_packed;
2019-08-08 16:43:58 +03:00
2020-09-04 13:01:33 +03:00
// Function type
Type *return_ty;
Type *params;
2020-10-07 14:24:13 +03:00
bool is_variadic;
Type *next;
2020-09-04 07:39:06 +03:00
};
2019-08-08 16:43:58 +03:00
// Struct member
struct Member {
Member *next;
Type *ty;
Token *tok; // for error message
2019-08-08 16:43:58 +03:00
Token *name;
int idx;
2020-09-04 05:20:55 +03:00
int align;
2019-08-08 16:43:58 +03:00
int offset;
2020-08-27 17:36:12 +03:00
// Bitfield
bool is_bitfield;
int bit_offset;
int bit_width;
2019-08-08 16:43:58 +03:00
};
2019-08-11 04:18:55 +03:00
extern Type *ty_void;
extern Type *ty_bool;
2019-08-11 04:18:55 +03:00
2020-08-27 15:04:17 +03:00
extern Type *ty_char;
2020-09-06 02:10:01 +03:00
extern Type *ty_short;
2020-09-04 07:39:06 +03:00
extern Type *ty_int;
2020-08-27 15:51:00 +03:00
extern Type *ty_long;
2020-09-04 07:39:06 +03:00
2020-08-28 17:29:49 +03:00
extern Type *ty_uchar;
extern Type *ty_ushort;
extern Type *ty_uint;
extern Type *ty_ulong;
2020-09-27 13:43:03 +03:00
extern Type *ty_float;
extern Type *ty_double;
2020-08-29 04:37:53 +03:00
extern Type *ty_ldouble;
2020-09-27 13:43:03 +03:00
2020-09-04 07:39:06 +03:00
bool is_integer(Type *ty);
2020-09-27 13:43:03 +03:00
bool is_flonum(Type *ty);
2020-09-22 12:29:17 +03:00
bool is_numeric(Type *ty);
2020-09-06 15:06:56 +03:00
bool is_compatible(Type *t1, Type *t2);
Type *copy_type(Type *ty);
Type *pointer_to(Type *base);
2020-09-04 13:01:33 +03:00
Type *func_type(Type *return_ty);
2020-09-26 04:15:32 +03:00
Type *array_of(Type *base, int size);
2020-09-04 08:44:12 +03:00
Type *vla_of(Type *base, Node *expr);
2019-08-11 13:59:27 +03:00
Type *enum_type(void);
Type *struct_type(void);
2020-09-04 07:39:06 +03:00
void add_type(Node *node);
2020-10-07 14:11:16 +03:00
//
// codegen.c
//
2020-05-08 14:44:25 +03:00
void codegen(Obj *prog, FILE *out);
2020-08-30 11:21:54 +03:00
int align_to(int n, int align);
2020-09-03 13:24:23 +03:00
2020-08-18 05:56:24 +03:00
//
// unicode.c
//
int encode_utf8(char *buf, uint32_t c);
uint32_t decode_utf8(char **new_pos, char *p);
bool is_ident1(uint32_t c);
bool is_ident2(uint32_t c);
int display_width(char *p, int len);
2020-08-18 05:56:24 +03:00
2020-09-01 19:30:29 +03:00
//
// hashmap.c
//
typedef struct {
char *key;
int keylen;
void *val;
} HashEntry;
typedef struct {
HashEntry *buckets;
int capacity;
int used;
} HashMap;
void *hashmap_get(HashMap *map, char *key);
void *hashmap_get2(HashMap *map, char *key, int keylen);
void hashmap_put(HashMap *map, char *key, void *val);
void hashmap_put2(HashMap *map, char *key, int keylen, void *val);
void hashmap_delete(HashMap *map, char *key);
void hashmap_delete2(HashMap *map, char *key, int keylen);
void hashmap_test(void);
2020-09-03 13:24:23 +03:00
//
// main.c
//
2020-08-30 12:57:20 +03:00
bool file_exists(char *path);
2020-09-25 17:18:32 +03:00
extern StringArray include_paths;
2020-09-08 12:50:40 +03:00
extern bool opt_fpic;
2020-08-19 11:24:16 +03:00
extern bool opt_fcommon;
2020-09-03 13:24:23 +03:00
extern char *base_file;