diff --git a/tcc.c b/tcc.c index 9f5bd0b..2807c02 100644 --- a/tcc.c +++ b/tcc.c @@ -79,12 +79,14 @@ #define TOK_HASH_SIZE 2048 /* must be a power of two */ #define TOK_ALLOC_INCR 512 /* must be a power of two */ -#define SYM_HASH_SIZE 1031 /* token symbol management */ typedef struct TokenSym { struct TokenSym *hash_next; struct Sym *sym_define; /* direct pointer to define */ + struct Sym *sym_label; /* direct pointer to label */ + struct Sym *sym_struct; /* direct pointer to structure */ + struct Sym *sym_identifier; /* direct pointer to identifier */ int tok; /* token number */ int len; char str[1]; @@ -136,14 +138,9 @@ typedef struct Sym { CType type; /* associated type */ struct Sym *next; /* next related symbol */ struct Sym *prev; /* prev symbol in stack */ - struct Sym *hash_next; /* next symbol in hash table */ + struct Sym *prev_tok; /* previous symbol for this token */ } Sym; -typedef struct SymStack { - struct Sym *top; - struct Sym *hash[SYM_HASH_SIZE]; -} SymStack; - /* section definition */ /* XXX: use directly ELF structure for parameters ? */ /* special flag to indicate that the section should not be linked to @@ -293,8 +290,9 @@ TokenSym **table_ident; TokenSym *hash_ident[TOK_HASH_SIZE]; char token_buf[STRING_MAX_SIZE + 1]; char *funcname; -SymStack global_stack, local_stack, label_stack; +Sym *global_stack, *local_stack; Sym *define_stack; +Sym *label_stack; SValue vstack[VSTACK_SIZE], *vtop; int *macro_ptr, *macro_ptr_allocated; @@ -577,8 +575,8 @@ void gen_op(int op); void force_charshort_cast(int t); static void gen_cast(CType *type); void vstore(void); -Sym *sym_find(int v); -Sym *sym_push(int v, CType *type, int r, int c); +static Sym *sym_find(int v); +static Sym *sym_push(int v, CType *type, int r, int c); /* type handling */ int type_size(CType *type, int *a); @@ -1147,6 +1145,9 @@ TokenSym *tok_alloc(const char *str, int len) table_ident[i] = ts; ts->tok = tok_ident++; ts->sym_define = NULL; + ts->sym_label = NULL; + ts->sym_struct = NULL; + ts->sym_identifier = NULL; ts->len = len; ts->hash_next = NULL; memcpy(ts->str, str, len + 1); @@ -1342,7 +1343,7 @@ char *get_tok_str(int v, CValue *cv) } /* push, without hashing */ -Sym *sym_push2(Sym **ps, int v, int t, int c) +static Sym *sym_push2(Sym **ps, int v, int t, int c) { Sym *s; s = tcc_malloc(sizeof(Sym)); @@ -1358,7 +1359,7 @@ Sym *sym_push2(Sym **ps, int v, int t, int c) /* find a symbol and return its associated structure. 's' is the top of the symbol stack */ -Sym *sym_find2(Sym *s, int v) +static Sym *sym_find2(Sym *s, int v) { while (s) { if (s->v == v) @@ -1368,75 +1369,95 @@ Sym *sym_find2(Sym *s, int v) return NULL; } -#define HASH_SYM(v) ((unsigned)(v) % SYM_HASH_SIZE) - -/* find a symbol and return its associated structure. 'st' is the - symbol stack */ -Sym *sym_find1(SymStack *st, int v) +/* structure lookup */ +static Sym *struct_find(int v) { - Sym *s; - - s = st->hash[HASH_SYM(v)]; - while (s) { - if (s->v == v) - return s; - s = s->hash_next; - } - return NULL; + v -= TOK_IDENT; + if ((unsigned)v >= (unsigned)(tok_ident - TOK_IDENT)) + return NULL; + return table_ident[v]->sym_struct; } -Sym *sym_push1(SymStack *st, int v, int t, int c) +/* find an identifier */ +static inline Sym *sym_find(int v) +{ + v -= TOK_IDENT; + if ((unsigned)v >= (unsigned)(tok_ident - TOK_IDENT)) + return NULL; + return table_ident[v]->sym_identifier; +} + +/* push a given symbol on the symbol stack */ +static Sym *sym_push(int v, CType *type, int r, int c) { Sym *s, **ps; - s = sym_push2(&st->top, v, t, c); - /* add in hash table */ - if (v) { - ps = &st->hash[HASH_SYM(v)]; - s->hash_next = *ps; + TokenSym *ts; + + if (local_stack) + ps = &local_stack; + else + ps = &global_stack; + s = sym_push2(ps, v, type->t, c); + s->type.ref = type->ref; + s->r = r; + /* don't record fields or anonymous symbols */ + /* XXX: simplify */ + if (!(v & SYM_FIELD) && (v & ~SYM_STRUCT) < SYM_FIRST_ANOM) { + /* record symbol in token array */ + ts = table_ident[(v & ~SYM_STRUCT) - TOK_IDENT]; + if (v & SYM_STRUCT) + ps = &ts->sym_struct; + else + ps = &ts->sym_identifier; + s->prev_tok = *ps; *ps = s; } return s; } -/* find a symbol in the right symbol space */ -Sym *sym_find(int v) +/* push a global identifier */ +static Sym *global_identifier_push(int v, int t, int c) { - Sym *s; - s = sym_find1(&local_stack, v); - if (!s) - s = sym_find1(&global_stack, v); - return s; -} - -/* push a given symbol on the symbol stack */ -Sym *sym_push(int v, CType *type, int r, int c) -{ - Sym *s; - if (local_stack.top) - s = sym_push1(&local_stack, v, type->t, c); - else - s = sym_push1(&global_stack, v, type->t, c); - s->type.ref = type->ref; - s->r = r; + Sym *s, **ps; + s = sym_push2(&global_stack, v, t, c); + /* don't record anonymous symbol */ + if (v < SYM_FIRST_ANOM) { + ps = &table_ident[v - TOK_IDENT]->sym_identifier; + /* modify the top most local identifier, so that + sym_identifier will point to 's' when popped */ + while (*ps != NULL) + ps = &(*ps)->prev_tok; + s->prev_tok = NULL; + *ps = s; + } return s; } /* pop symbols until top reaches 'b' */ -void sym_pop(SymStack *st, Sym *b) +static void sym_pop(Sym **ptop, Sym *b) { - Sym *s, *ss; + Sym *s, *ss, **ps; + TokenSym *ts; + int v; - s = st->top; + s = *ptop; while(s != b) { ss = s->prev; - /* free hash table entry, except if symbol was freed (only - used for #undef symbols) */ - if (s->v) - st->hash[HASH_SYM(s->v)] = s->hash_next; + v = s->v; + /* remove symbol in token array */ + /* XXX: simplify */ + if (!(v & SYM_FIELD) && (v & ~SYM_STRUCT) < SYM_FIRST_ANOM) { + ts = table_ident[(v & ~SYM_STRUCT) - TOK_IDENT]; + if (v & SYM_STRUCT) + ps = &ts->sym_struct; + else + ps = &ts->sym_identifier; + *ps = s->prev_tok; + } tcc_free(s); s = ss; } - st->top = b; + *ptop = b; } /* I/O layer */ @@ -1845,6 +1866,24 @@ static void free_defines(Sym *b) define_stack = b; } +/* label lookup */ +static Sym *label_find(int v) +{ + v -= TOK_IDENT; + if ((unsigned)v >= (unsigned)(tok_ident - TOK_IDENT)) + return NULL; + return table_ident[v]->sym_label; +} + +static Sym *label_push(int v, int flags) +{ + Sym *s; + s = sym_push2(&label_stack, v, 0, 0); + s->r = flags; + table_ident[v - TOK_IDENT]->sym_label = s; + return s; +} + /* eval an expression for #if/#elif */ int expr_preprocess(void) { @@ -1996,7 +2035,7 @@ enum IncludeState { void preprocess(void) { TCCState *s1 = tcc_state; - int size, i, c, n; + int size, i, c, n, line_num; enum IncludeState state; char buf[1024], *q, *p; char buf1[1024]; @@ -2012,16 +2051,19 @@ void preprocess(void) cinp(); next_nomacro(); redo: - if (tok == TOK_DEFINE) { + switch(tok) { + case TOK_DEFINE: next_nomacro(); parse_define(); - } else if (tok == TOK_UNDEF) { + break; + case TOK_UNDEF: next_nomacro(); s = define_find(tok); /* undefine symbol by putting an invalid name */ if (s) define_undef(s); - } else if (tok == TOK_INCLUDE) { + break; + case TOK_INCLUDE: skip_spaces(); if (ch == '<') { c = '>'; @@ -2139,13 +2181,14 @@ void preprocess(void) state = INCLUDE_STATE_SEEK_IFNDEF; goto redo1; } - } else if (tok == TOK_IFNDEF) { + break; + case TOK_IFNDEF: c = 1; goto do_ifdef; - } else if (tok == TOK_IF) { + case TOK_IF: c = expr_preprocess(); goto do_if; - } else if (tok == TOK_IFDEF) { + case TOK_IFDEF: c = 0; do_ifdef: next_nomacro(); @@ -2163,14 +2206,14 @@ void preprocess(void) error("memory full"); *s1->ifdef_stack_ptr++ = c; goto test_skip; - } else if (tok == TOK_ELSE) { + case TOK_ELSE: if (s1->ifdef_stack_ptr == s1->ifdef_stack) error("#else without matching #if"); if (s1->ifdef_stack_ptr[-1] & 2) error("#else after #else"); c = (s1->ifdef_stack_ptr[-1] ^= 3); goto test_skip; - } else if (tok == TOK_ELIF) { + case TOK_ELIF: if (s1->ifdef_stack_ptr == s1->ifdef_stack) error("#elif without matching #if"); c = s1->ifdef_stack_ptr[-1]; @@ -2188,7 +2231,8 @@ void preprocess(void) state = INCLUDE_STATE_NONE; goto redo; } - } else if (tok == TOK_ENDIF) { + break; + case TOK_ENDIF: if (s1->ifdef_stack_ptr <= file->ifdef_stack_ptr) error("#endif without matching #if"); if (file->ifndef_macro && @@ -2209,8 +2253,8 @@ void preprocess(void) } } s1->ifdef_stack_ptr--; - } else if (tok == TOK_LINE) { - int line_num; + break; + case TOK_LINE: next(); if (tok != TOK_CINT) error("#line"); @@ -2224,8 +2268,31 @@ void preprocess(void) } /* NOTE: we do it there to avoid problems with linefeed */ file->line_num = line_num; - } else if (tok == TOK_ERROR) { - error("#error"); + break; + case TOK_ERROR: + case TOK_WARNING: + c = tok; + skip_spaces(); + q = buf; + while (ch != '\n' && ch != CH_EOF) { + if ((q - buf) < sizeof(buf) - 1) + *q++ = ch; + minp(); + } + *q = '\0'; + if (c == TOK_ERROR) + error("#error %s", buf); + else + warning("#warning %s", buf); + break; + default: + if (tok == TOK_LINEFEED || tok == '!' || tok == TOK_CINT) { + /* '!' is ignored to allow C scripts. numbers are ignored + to emulate cpp behaviour */ + } else { + error("invalid preprocessing directive #%s", get_tok_str(tok, &tokc)); + } + break; } /* ignore other preprocess commands or #! for C scripts */ while (tok != TOK_LINEFEED && tok != TOK_EOF) @@ -2604,7 +2671,7 @@ void parse_number(const char *p) /* return next token without macro substitution */ static inline void next_nomacro1(void) { - int b; + int b, t; char *q; TokenSym *ts; @@ -2628,20 +2695,44 @@ static inline void next_nomacro1(void) break; cinp(); } - if (isid(ch)) { + switch(ch) { + + case '#': + tok = ch; + cinp(); +#if 0 + if (start_of_line) { + preprocess(); + goto redo_no_start; + } else +#endif + { + if (ch == '#') { + cinp(); + tok = TOK_TWOSHARPS; + } + } + break; + + case 'a': case 'b': case 'c': case 'd': + case 'e': case 'f': case 'g': case 'h': + case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': + case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case 'A': case 'B': case 'C': case 'D': + case 'E': case 'F': case 'G': case 'H': + case 'I': case 'J': case 'K': + case 'M': case 'N': case 'O': case 'P': + case 'Q': case 'R': case 'S': case 'T': + case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case '_': q = token_buf; *q++ = ch; cinp(); - if (q[-1] == 'L') { - if (ch == '\'') { - tok = TOK_LCHAR; - goto char_const; - } - if (ch == '\"') { - tok = TOK_LSTR; - goto str_const; - } - } + parse_ident: while (isid(ch) || isnum(ch)) { if (q >= token_buf + STRING_MAX_SIZE) error("ident too long"); @@ -2651,8 +2742,25 @@ static inline void next_nomacro1(void) *q = '\0'; ts = tok_alloc(token_buf, q - token_buf); tok = ts->tok; - } else if (isnum(ch)) { - int t; + break; + case 'L': + cinp(); + if (ch == '\'') { + tok = TOK_LCHAR; + goto char_const; + } + if (ch == '\"') { + tok = TOK_LSTR; + goto str_const; + } + q = token_buf; + *q++ = 'L'; + goto parse_ident; + + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + case '8': case '9': + cstr_reset(&tokcstr); /* after the first digit, accept digits, alpha, '.' or sign if prefixed by 'eEpP' */ @@ -2670,7 +2778,8 @@ static inline void next_nomacro1(void) cstr_ccat(&tokcstr, '\0'); tokc.cstr = &tokcstr; tok = TOK_PPNUM; - } else if (ch == '.') { + break; + case '.': /* special dot handling because it can also start a number */ cinp(); if (isnum(ch)) { @@ -2687,7 +2796,8 @@ static inline void next_nomacro1(void) } else { tok = '.'; } - } else if (ch == '\'') { + break; + case '\'': tok = TOK_CCHAR; char_const: minp(); @@ -2699,7 +2809,8 @@ static inline void next_nomacro1(void) if (ch != '\'') expect("\'"); minp(); - } else if (ch == '\"') { + break; + case '\"': tok = TOK_STR; str_const: minp(); @@ -2719,35 +2830,177 @@ static inline void next_nomacro1(void) cstr_wccat(&tokcstr, '\0'); tokc.cstr = &tokcstr; minp(); - } else { - q = tok_two_chars; - /* two chars */ + break; + + case '<': + cinp(); + if (ch == '=') { + cinp(); + tok = TOK_LE; + } else if (ch == '<') { + cinp(); + if (ch == '=') { + cinp(); + tok = TOK_A_SHL; + } else { + tok = TOK_SHL; + } + } else { + tok = TOK_LT; + } + break; + + case '>': + cinp(); + if (ch == '=') { + cinp(); + tok = TOK_GE; + } else if (ch == '>') { + cinp(); + if (ch == '=') { + cinp(); + tok = TOK_A_SAR; + } else { + tok = TOK_SAR; + } + } else { + tok = TOK_GT; + } + break; + + case '!': tok = ch; cinp(); - while (*q) { - if (*q == tok && q[1] == ch) { - cinp(); - tok = q[2] & 0xff; - /* three chars tests */ - if (tok == TOK_SHL || tok == TOK_SAR) { - if (ch == '=') { - tok = tok | 0x80; - cinp(); - } - } else if (tok == TOK_DOTS) { - if (ch != '.') - error("parse error"); - cinp(); - } - return; - } - q = q + 3; + if (ch == '=') { + cinp(); + tok = TOK_NE; } - /* single char substitutions */ - if (tok == '<') - tok = TOK_LT; - else if (tok == '>') - tok = TOK_GT; + break; + + case '=': + tok = ch; + cinp(); + if (ch == '=') { + cinp(); + tok = TOK_EQ; + } + break; + + case '&': + tok = ch; + cinp(); + if (ch == '&') { + cinp(); + tok = TOK_LAND; + } else if (ch == '=') { + cinp(); + tok = TOK_A_AND; + } + break; + + case '|': + tok = ch; + cinp(); + if (ch == '|') { + cinp(); + tok = TOK_LOR; + } else if (ch == '=') { + cinp(); + tok = TOK_A_OR; + } + break; + + case '+': + tok = ch; + cinp(); + if (ch == '+') { + cinp(); + tok = TOK_INC; + } else if (ch == '=') { + cinp(); + tok = TOK_A_ADD; + } + break; + + case '-': + tok = ch; + cinp(); + if (ch == '-') { + cinp(); + tok = TOK_DEC; + } else if (ch == '=') { + cinp(); + tok = TOK_A_SUB; + } else if (ch == '>') { + cinp(); + tok = TOK_ARROW; + } + break; + + case '*': + tok = ch; + cinp(); + if (ch == '=') { + cinp(); + tok = TOK_A_MUL; + } + break; + + case '%': + tok = ch; + cinp(); + if (ch == '=') { + cinp(); + tok = TOK_A_MOD; + } + break; + + case '^': + tok = ch; + cinp(); + if (ch == '=') { + cinp(); + tok = TOK_A_XOR; + } + break; + + /* comments or operator */ + case '/': + tok = ch; + cinp(); + if (ch == '=') { + cinp(); + tok = TOK_A_DIV; + } +#if 0 + else if (ch == '/' || ch == '*') { + parse_comments(); + goto redo_no_start; + } +#endif + break; + + /* simple tokens */ + case '(': + case ')': + case '[': + case ']': + case '{': + case '}': + case ',': + case ';': + case ':': + case '?': + case '~': + tok = ch; + cinp(); + break; + case CH_EOF: + tok = TOK_EOF; + break; + default: + error("unrecognized character \\x%02x", ch); + break; } } @@ -3224,7 +3477,7 @@ static Sym *get_sym_ref(CType *type, Section *sec, Sym *sym; v = anon_sym++; - sym = sym_push1(&global_stack, v, type->t | VT_STATIC, 0); + sym = global_identifier_push(v, type->t | VT_STATIC, 0); sym->type.ref = type->ref; sym->r = VT_CONST | VT_SYM; put_extern_sym(sym, sec, offset, size); @@ -3249,8 +3502,7 @@ static Sym *external_global_sym(int v, CType *type, int r) s = sym_find(v); if (!s) { /* push forward reference */ - s = sym_push1(&global_stack, - v, type->t | VT_EXTERN, 0); + s = global_identifier_push(v, type->t | VT_EXTERN, 0); s->type.ref = type->ref; s->r = r | VT_CONST | VT_SYM; } @@ -4562,7 +4814,7 @@ static inline CType *pointed_type(CType *type) static void mk_pointer(CType *type) { Sym *s; - s = sym_push(0, type, 0, -1); + s = sym_push(SYM_FIELD, type, 0, -1); type->t = VT_PTR | (type->t & ~VT_TYPE); type->ref = s; } @@ -4972,7 +5224,7 @@ static void struct_decl(CType *type, int u) next(); /* struct already defined ? return it */ /* XXX: check consistency */ - s = sym_find(v | SYM_STRUCT); + s = struct_find(v); if (s) { if (s->type.t != a) error("invalid type"); @@ -5302,7 +5554,7 @@ static void post_type(CType *type, AttributeDef *ad) type->t &= ~(VT_TYPEDEF | VT_STATIC | VT_EXTERN); post_type(type, ad); /* we push a anonymous symbol which will contain the function prototype */ - s = sym_push(0, type, ad->func_call, l); + s = sym_push(SYM_FIELD, type, ad->func_call, l); s->next = first; type->t = t1 | VT_FUNC; type->ref = s; @@ -5323,7 +5575,7 @@ static void post_type(CType *type, AttributeDef *ad) /* we push a anonymous symbol which will contain the array element type */ - s = sym_push(0, type, 0, n); + s = sym_push(SYM_FIELD, type, 0, n); type->t = t1 | VT_ARRAY | VT_PTR; type->ref = s; } @@ -5610,10 +5862,9 @@ static void unary(void) /* allow to take the address of a label */ if (tok < TOK_UIDENT) expect("label identifier"); - s = sym_find1(&label_stack, tok); + s = label_find(tok); if (!s) { - s = sym_push1(&label_stack, tok, 0, 0); - s->r = LABEL_FORWARD; + s = label_push(tok, LABEL_FORWARD); } if (!s->type.t) { s->type.t = VT_VOID; @@ -6148,7 +6399,7 @@ static void block(int *bsym, int *csym, int *case_sym, int *def_sym, int case_re } else if (tok == '{') { next(); /* declarations */ - s = local_stack.top; + s = local_stack; while (tok != '}') { decl(VT_LOCAL); if (tok != '}') @@ -6317,11 +6568,10 @@ static void block(int *bsym, int *csym, int *case_sym, int *def_sym, int case_re expect("pointer"); ggoto(); } else if (tok >= TOK_UIDENT) { - s = sym_find1(&label_stack, tok); + s = label_find(tok); /* put forward definition if needed */ if (!s) { - s = sym_push1(&label_stack, tok, 0, 0); - s->r = LABEL_FORWARD; + s = label_push(tok, LABEL_FORWARD); } /* label already defined */ if (s->r & LABEL_FORWARD) @@ -6337,13 +6587,13 @@ static void block(int *bsym, int *csym, int *case_sym, int *def_sym, int case_re b = is_label(); if (b) { /* label case */ - s = sym_find1(&label_stack, b); + s = label_find(b); if (s) { if (!(s->r & LABEL_FORWARD)) error("multiple defined label"); gsym((long)s->next); } else { - s = sym_push1(&label_stack, b, 0, 0); + s = label_push(b, 0); } s->next = (void *)ind; s->r = 0; @@ -7110,7 +7360,7 @@ static void decl(int l) sym->type = type; } else { /* put function symbol */ - sym = sym_push1(&global_stack, v, type.t, 0); + sym = global_identifier_push(v, type.t, 0); sym->type.ref = type.ref; } /* NOTE: we patch the symbol size later */ @@ -7121,7 +7371,7 @@ static void decl(int l) if (do_debug) put_func_debug(sym); /* push a dummy symbol to enable local sym storage */ - sym_push1(&local_stack, 0, 0, 0); + sym_push2(&local_stack, SYM_FIELD, 0, 0); gfunc_prolog(&type); loc = 0; rsym = 0; @@ -7136,8 +7386,9 @@ static void decl(int l) /* look if any labels are undefined. Define symbols if '&&label' was used. */ { - Sym *s; - for(s = label_stack.top; s != NULL; s = s->prev) { + Sym *s, *s1; + for(s = label_stack; s != NULL; s = s1) { + s1 = s->prev; if (s->r & LABEL_FORWARD) { error("label '%s' used but not defined", get_tok_str(s->v, NULL)); @@ -7147,9 +7398,12 @@ static void decl(int l) 1 is put. */ put_extern_sym(s, cur_text_section, (long)s->next, 1); } + /* remove label */ + table_ident[s->v - TOK_IDENT]->sym_label = NULL; + tcc_free(s); } + label_stack = NULL; } - sym_pop(&label_stack, NULL); /* reset label stack */ sym_pop(&local_stack, NULL); /* reset local stack */ /* end of function */ /* patch symbol size */ @@ -7253,7 +7507,7 @@ static int tcc_compile(TCCState *s1) mk_pointer(&char_pointer_type); func_old_type.t = VT_FUNC; - func_old_type.ref = sym_push(0, &int_type, FUNC_CDECL, FUNC_OLD); + func_old_type.ref = sym_push(SYM_FIELD, &int_type, FUNC_CDECL, FUNC_OLD); #if 0 /* define 'void *alloca(unsigned int)' builtin function */ @@ -7262,7 +7516,7 @@ static int tcc_compile(TCCState *s1) p = anon_sym++; sym = sym_push(p, mk_pointer(VT_VOID), FUNC_CDECL, FUNC_NEW); - s1 = sym_push(0, VT_UNSIGNED | VT_INT, 0, 0); + s1 = sym_push(SYM_FIELD, VT_UNSIGNED | VT_INT, 0, 0); s1->next = NULL; sym->next = s1; sym_push(TOK_alloca, VT_FUNC | (p << VT_STRUCT_SHIFT), VT_CONST, 0);