diff --git a/Makefile b/Makefile index 0f3002b..4f18567 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ endif endif else # not GCC ifeq (-$(findstring clang,$(CC))-,-clang-) -# make clang accept gnuisms in libcrt.c +# make clang accept gnuisms in libtcc1.c CFLAGS+=-fheinous-gnu-extensions endif endif @@ -101,11 +101,11 @@ $(ARM_EABI_CROSS)_LINK = arm-eabi-tcc$(EXESUF) CORE_FILES = tcc.c libtcc.c tccpp.c tccgen.c tccelf.c tccasm.c tccrun.c CORE_FILES += tcc.h config.h libtcc.h tcctok.h -I386_FILES = $(CORE_FILES) i386-gen.c asmx86.c i386-asm.h asmx86-tok.h -WIN32_FILES = $(CORE_FILES) i386-gen.c asmx86.c i386-asm.h asmx86-tok.h tccpe.c -WIN64_FILES = $(CORE_FILES) x86_64-gen.c asmx86.c x86_64-asm.h tccpe.c +I386_FILES = $(CORE_FILES) i386-gen.c i386-asm.c i386-asm.h i386-tok.h +WIN32_FILES = $(CORE_FILES) i386-gen.c i386-asm.c i386-asm.h i386-tok.h tccpe.c +WIN64_FILES = $(CORE_FILES) x86_64-gen.c i386-asm.c x86_64-asm.h tccpe.c WINCE_FILES = $(CORE_FILES) arm-gen.c tccpe.c -X86_64_FILES = $(CORE_FILES) x86_64-gen.c asmx86.c x86_64-asm.h +X86_64_FILES = $(CORE_FILES) x86_64-gen.c i386-asm.c x86_64-asm.h ARM_FILES = $(CORE_FILES) arm-gen.c C67_FILES = $(CORE_FILES) c67-gen.c tcccoff.c @@ -113,29 +113,29 @@ ifdef CONFIG_WIN64 PROGS+=tiny_impdef$(EXESUF) tiny_libmaker$(EXESUF) NATIVE_FILES=$(WIN64_FILES) PROGS_CROSS=$(WIN32_CROSS) $(I386_CROSS) $(X64_CROSS) $(ARM_CROSS) $(C67_CROSS) -LIBTCC1_CROSS=lib/i386-win32/libcrt.a -LIBCRT=libcrt.a +LIBTCC1_CROSS=lib/i386-win32/libtcc1.a +LIBTCC1=libtcc1.a else ifdef CONFIG_WIN32 PROGS+=tiny_impdef$(EXESUF) tiny_libmaker$(EXESUF) NATIVE_FILES=$(WIN32_FILES) PROGS_CROSS=$(WIN64_CROSS) $(I386_CROSS) $(X64_CROSS) $(ARM_CROSS) $(C67_CROSS) -LIBTCC1_CROSS=lib/x86_64-win32/libcrt.a -LIBCRT=libcrt.a +LIBTCC1_CROSS=lib/x86_64-win32/libtcc1.a +LIBTCC1=libtcc1.a else ifeq ($(ARCH),i386) NATIVE_FILES=$(I386_FILES) PROGS_CROSS=$(X64_CROSS) $(WIN32_CROSS) $(WIN64_CROSS) $(ARM_CROSS) $(C67_CROSS) -LIBTCC1_CROSS=lib/i386-win32/libcrt.a lib/x86_64-win32/libcrt.a -LIBCRT=libcrt.a +LIBTCC1_CROSS=lib/i386-win32/libtcc1.a lib/x86_64-win32/libtcc1.a +LIBTCC1=libtcc1.a else ifeq ($(ARCH),x86-64) NATIVE_FILES=$(X86_64_FILES) PROGS_CROSS=$(I386_CROSS) $(WIN32_CROSS) $(WIN64_CROSS) $(ARM_CROSS) $(C67_CROSS) -LIBTCC1_CROSS=lib/i386-win32/libcrt.a lib/x86_64-win32/libcrt.a lib/i386/libcrt.a -LIBCRT=libcrt.a +LIBTCC1_CROSS=lib/i386-win32/libtcc1.a lib/x86_64-win32/libtcc1.a lib/i386/libtcc1.a +LIBTCC1=libtcc1.a else ifeq ($(ARCH),arm) NATIVE_FILES=$(ARM_FILES) PROGS_CROSS=$(I386_CROSS) $(X64_CROSS) $(WIN32_CROSS) $(WIN64_CROSS) $(C67_CROSS) -LIBCRT=libcrt.a -LIBTCC1_CROSS=lib/i386-win32/libcrt.a lib/x86_64-win32/libcrt.a lib/i386/libcrt.a +LIBTCC1=libtcc1.a +LIBTCC1_CROSS=lib/i386-win32/libtcc1.a lib/x86_64-win32/libtcc1.a lib/i386/libtcc1.a endif PROGS_CROSS_LINK=$(foreach PROG_CROSS,$(PROGS_CROSS),$($(PROG_CROSS)_LINK)) @@ -143,7 +143,7 @@ ifeq ($(TARGETOS),Darwin) PROGS+=tiny_libmaker$(EXESUF) endif -TCCLIBS = $(LIBCRT) $(LIBTCC) $(LIBTCC_EXTRA) +TCCLIBS = $(LIBTCC1) $(LIBTCC) $(LIBTCC_EXTRA) TCCDOCS = tcc.1 tcc-doc.html tcc-doc.info ifdef CONFIG_CROSS @@ -225,9 +225,9 @@ tiny_libmaker$(EXESUF): win32/tools/tiny_libmaker.c $(CC) -o $@ $< $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) # TinyCC runtime libraries -libcrt.a : FORCE +libtcc1.a : FORCE $(MAKE) -C lib native -lib/%/libcrt.a : FORCE $(PROGS_CROSS) +lib/%/libtcc1.a : FORCE $(PROGS_CROSS) $(MAKE) -C lib cross TARGET=$* FORCE: @@ -258,8 +258,8 @@ endif -$(INSTALL) -m644 tcc-doc.info "$(infodir)" mkdir -p "$(tccdir)" mkdir -p "$(tccdir)/include" -ifneq ($(LIBCRT),) - $(INSTALL) -m644 $(LIBCRT) "$(tccdir)" +ifneq ($(LIBTCC1),) + $(INSTALL) -m644 $(LIBTCC1) "$(tccdir)" endif $(INSTALL) -m644 $(addprefix $(top_srcdir)/include/,$(TCC_INCLUDES)) $(top_srcdir)/tcclib.h "$(tccdir)/include" mkdir -p "$(libdir)" @@ -277,19 +277,19 @@ ifdef CONFIG_CROSS mkdir -p "$(tccdir)/win32/lib/64" ifneq ($(ARCH),i386) mkdir -p "$(tccdir)/i386" - $(INSTALL) -m644 lib/i386/libcrt.a "$(tccdir)/i386" + $(INSTALL) -m644 lib/i386/libtcc1.a "$(tccdir)/i386" cp -r "$(tccdir)/include" "$(tccdir)/i386" endif $(INSTALL) -m644 $(top_srcdir)/win32/lib/*.def "$(tccdir)/win32/lib" - $(INSTALL) -m644 lib/i386-win32/libcrt.a "$(tccdir)/win32/lib/32" - $(INSTALL) -m644 lib/x86_64-win32/libcrt.a "$(tccdir)/win32/lib/64" + $(INSTALL) -m644 lib/i386-win32/libtcc1.a "$(tccdir)/win32/lib/32" + $(INSTALL) -m644 lib/x86_64-win32/libtcc1.a "$(tccdir)/win32/lib/64" cp -r $(top_srcdir)/win32/include/. "$(tccdir)/win32/include" cp -r "$(tccdir)/include" "$(tccdir)/win32" endif uninstall: rm -fv $(foreach P,$(PROGS),"$(bindir)/$P") - rm -fv $(foreach P,$(LIBCRT),"$(tccdir)/$P") + rm -fv $(foreach P,$(LIBTCC1),"$(tccdir)/$P") rm -fv $(foreach P,$(TCC_INCLUDES),"$(tccdir)/include/$P") rm -fv "$(tccdir)/include/tcclib.h" rm -fv "$(docdir)/tcc-doc.html" "$(mandir)/man1/tcc.1" "$(infodir)/tcc-doc.info" @@ -310,7 +310,7 @@ install: $(PROGS) $(TCCLIBS) $(TCCDOCS) mkdir -p "$(tccdir)/doc" mkdir -p "$(tccdir)/libtcc" $(INSTALLBIN) -m755 $(PROGS) "$(tccdir)" - $(INSTALL) -m644 $(LIBCRT) $(top_srcdir)/win32/lib/*.def "$(tccdir)/lib" + $(INSTALL) -m644 $(LIBTCC1) $(top_srcdir)/win32/lib/*.def "$(tccdir)/lib" cp -r $(top_srcdir)/win32/include/. "$(tccdir)/include" cp -r $(top_srcdir)/win32/examples/. "$(tccdir)/examples" $(INSTALL) -m644 $(addprefix $(top_srcdir)/include/,$(TCC_INCLUDES)) $(top_srcdir)/tcclib.h "$(tccdir)/include" @@ -320,8 +320,8 @@ install: $(PROGS) $(TCCLIBS) $(TCCDOCS) ifdef CONFIG_CROSS mkdir -p "$(tccdir)/lib/32" mkdir -p "$(tccdir)/lib/64" - -$(INSTALL) -m644 lib/i386-win32/libcrt.a "$(tccdir)/lib/32" - -$(INSTALL) -m644 lib/x86_64-win32/libcrt.a "$(tccdir)/lib/64" + -$(INSTALL) -m644 lib/i386-win32/libtcc1.a "$(tccdir)/lib/32" + -$(INSTALL) -m644 lib/x86_64-win32/libtcc1.a "$(tccdir)/lib/64" endif uninstall: @@ -340,7 +340,7 @@ tcc-doc.info: tcc-doc.texi -makeinfo $< # in tests subdir -export LIBCRT +export LIBTCC1 %est: $(MAKE) -C tests $@ 'PROGS_CROSS=$(PROGS_CROSS)' @@ -348,7 +348,7 @@ export LIBCRT clean: rm -vf $(PROGS) tcc_p$(EXESUF) tcc.pod *~ *.o *.a *.so* *.out *.exe libtcc_test$(EXESUF) $(MAKE) -C tests $@ -ifneq ($(LIBCRT),) +ifneq ($(LIBTCC1),) $(MAKE) -C lib $@ endif diff --git a/arm-gen.c b/arm-gen.c index 567c868..680a490 100644 --- a/arm-gen.c +++ b/arm-gen.c @@ -61,7 +61,7 @@ #define RC_IRET RC_R0 /* function return: integer register */ #define RC_LRET RC_R1 /* function return: second integer register */ #define RC_FRET RC_F0 /* function return: float register */ -#define RC_MASK (RC_INT|RC_FLOAT) + /* pretty names for the registers */ enum { TREG_R0 = 0, @@ -540,14 +540,6 @@ void load(int r, SValue *sv) v = fr & VT_VALMASK; if (fr & VT_LVAL) { uint32_t base = 0xB; // fp - if(fr & VT_TMP){ - int size, align; - if((ft & VT_BTYPE) == VT_FUNC) - size = PTR_SIZE; - else - size = type_size(&sv->type, &align); - loc_stack(size, 0); - } if(v == VT_LLOCAL) { v1.type.t = VT_PTR; v1.r = VT_LOCAL | VT_LVAL; @@ -1417,60 +1409,37 @@ void gjmp_addr(int a) /* generate a test. set 'inv' to invert test. Stack entry is popped */ int gtst(int inv, int t) { - int v, r; - uint32_t op; - v = vtop->r & VT_VALMASK; - r=ind; - if (v == VT_CMP) { - op=mapcc(inv?negcc(vtop->c.i):vtop->c.i); - op|=encbranch(r,t,1); - o(op); - t=r; - } else if (v == VT_JMP || v == VT_JMPI) { - if ((v & 1) == inv) { - if(!vtop->c.i) - vtop->c.i=t; - else { - uint32_t *x; - int p,lp; - if(t) { - p = vtop->c.i; - do { - p = decbranch(lp=p); - } while(p); - x = (uint32_t *)(cur_text_section->data + lp); - *x &= 0xff000000; - *x |= encbranch(lp,t,1); - } - t = vtop->c.i; - } - } else { - t = gjmp(t); - gsym(vtop->c.i); + int v, r; + uint32_t op; + v = vtop->r & VT_VALMASK; + r=ind; + if (v == VT_CMP) { + op=mapcc(inv?negcc(vtop->c.i):vtop->c.i); + op|=encbranch(r,t,1); + o(op); + t=r; + } else { /* VT_JMP || VT_JMPI */ + if ((v & 1) == inv) { + if(!vtop->c.i) + vtop->c.i=t; + else { + uint32_t *x; + int p,lp; + if(t) { + p = vtop->c.i; + do { + p = decbranch(lp=p); + } while(p); + x = (uint32_t *)(cur_text_section->data + lp); + *x &= 0xff000000; + *x |= encbranch(lp,t,1); + } + t = vtop->c.i; + } + } else { + t = gjmp(t); + gsym(vtop->c.i); } - } else { - if (is_float(vtop->type.t)) { - r=gv(RC_FLOAT); -#ifdef TCC_ARM_VFP - o(0xEEB50A40|(vfpr(r)<<12)|T2CPR(vtop->type.t)); /* fcmpzX */ - o(0xEEF1FA10); /* fmstat */ -#else - o(0xEE90F118|(fpr(r)<<16)); -#endif - vtop->r = VT_CMP; - vtop->c.i = TOK_NE; - return gtst(inv, t); - } else if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) { - /* constant jmp optimization */ - if ((vtop->c.i != 0) != inv) - t = gjmp(t); - } else { - v = gv(RC_INT); - o(0xE3300000|(intr(v)<<16)); - vtop->r = VT_CMP; - vtop->c.i = TOK_NE; - return gtst(inv, t); - } } vtop--; return t; diff --git a/c67-gen.c b/c67-gen.c index 07ae259..a26dfaa 100644 --- a/c67-gen.c +++ b/c67-gen.c @@ -58,7 +58,7 @@ #define RC_IRET RC_C67_A4 /* function return: integer register */ #define RC_LRET RC_C67_A5 /* function return: second integer register */ #define RC_FRET RC_C67_A4 /* function return: float register */ -#define RC_MASK (RC_INT|RC_FLOAT) + /* pretty names for the registers */ enum { TREG_EAX = 0, // really A2 @@ -1571,21 +1571,12 @@ void load(int r, SValue * sv) v = fr & VT_VALMASK; if (fr & VT_LVAL) { - if(fr & VT_TMP){ - int size, align; - if((ft & VT_BTYPE) == VT_FUNC) - size = PTR_SIZE; - else - size = type_size(&sv->type, &align); - loc_stack(size, 0); - } if (v == VT_LLOCAL) { v1.type.t = VT_INT; v1.r = VT_LOCAL | VT_LVAL; v1.c.ul = fc; load(r, &v1); fr = r; - fc = 0; } else if ((ft & VT_BTYPE) == VT_LDOUBLE) { tcc_error("long double not supported"); } else if ((ft & VT_TYPE) == VT_BYTE) { @@ -2111,7 +2102,7 @@ int gtst(int inv, int t) C67_NOP(5); t = ind1; //return where we need to patch - } else if (v == VT_JMP || v == VT_JMPI) { + } else { /* VT_JMP || VT_JMPI */ /* && or || optimization */ if ((v & 1) == inv) { /* insert vtop->c jump list in t */ @@ -2137,37 +2128,6 @@ int gtst(int inv, int t) t = gjmp(t); gsym(vtop->c.i); } - } else { - if (is_float(vtop->type.t)) { - vpushi(0); - gen_op(TOK_NE); - } - if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) { - /* constant jmp optimization */ - if ((vtop->c.i != 0) != inv) - t = gjmp(t); - } else { - // I think we need to get the value on the stack - // into a register, test it, and generate a branch - // return the address of the branch, so it can be - // later patched - - v = gv(RC_INT); // get value into a reg - ind1 = ind; - C67_MVKL(C67_A0, t); //r=reg to load, constant - C67_MVKH(C67_A0, t); //r=reg to load, constant - - if (v != TREG_EAX && // check if not already in a conditional test reg - v != TREG_EDX && v != TREG_ST0 && v != C67_B2) { - C67_MV(v, C67_B2); - v = C67_B2; - } - - C67_IREG_B_REG(inv, v, C67_A0); // [!R] B.S2x A0 - C67_NOP(5); - t = ind1; //return where we need to patch - ind1 = ind; - } } vtop--; return t; diff --git a/asmx86.c b/i386-asm.c similarity index 98% rename from asmx86.c rename to i386-asm.c index 9010c03..a524658 100644 --- a/asmx86.c +++ b/i386-asm.c @@ -239,36 +239,6 @@ static const uint16_t op0_codes[] = { #endif }; -#ifdef PRINTF_ASM_CODE -void printf_asm_opcode(){ - const ASMInstr *pa; - int freq[4]; - int op_vals[500]; - int nb_op_vals, i, j; - nb_op_vals = 0; - memset(freq, 0, sizeof(freq)); - for(pa = asm_instrs; pa->sym != 0; pa++) { - freq[pa->nb_ops]++; - for(i=0;inb_ops;i++) { - for(j=0;jop_type[i] == op_vals[j]) - goto found; - } - op_vals[nb_op_vals++] = pa->op_type[i]; - found: ; - } - } - for(i=0;i> 8); g(b); return; - } else if (opcode <= TOK_ASM_alllast) { - tcc_error("bad operand with opcode '%s'", get_tok_str(opcode, NULL)); + } else if (opcode <= TOK_ASM_alllast) { + tcc_error("bad operand with opcode '%s'", + get_tok_str(opcode, NULL)); } else { tcc_error("unknown opcode '%s'", get_tok_str(opcode, NULL)); @@ -1098,7 +1069,7 @@ ST_FUNC void asm_compute_constraints(ASMOperand *operands, uint8_t regs_allocated[NB_ASM_REGS]; /* init fields */ - for(i=0; iinput_index = -1; op->ref_index = -1; @@ -1108,7 +1079,7 @@ ST_FUNC void asm_compute_constraints(ASMOperand *operands, } /* compute constraint priority and evaluate references to output constraints if input constraints */ - for(i=0; iconstraint; str = skip_constraint_modifiers(str); @@ -1528,4 +1499,4 @@ ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str) tcc_error("invalid clobber register '%s'", str); } clobber_regs[reg] = 1; -} \ No newline at end of file +} diff --git a/i386-gen.c b/i386-gen.c index cfa9cc6..ece054b 100644 --- a/i386-gen.c +++ b/i386-gen.c @@ -21,7 +21,7 @@ #ifdef TARGET_DEFS_ONLY /* number of available registers */ -#define NB_REGS 8 +#define NB_REGS 4 #define NB_ASM_REGS 8 /* a register can belong to several classes. The classes must be @@ -33,24 +33,17 @@ #define RC_ST0 0x0008 #define RC_ECX 0x0010 #define RC_EDX 0x0020 -#define RC_EBX 0x0040 -#define RC_ESI 0x0080 -#define RC_EDI 0x0100 -#define RC_INT2 0x0200 #define RC_IRET RC_EAX /* function return: integer register */ #define RC_LRET RC_EDX /* function return: second integer register */ #define RC_FRET RC_ST0 /* function return: float register */ -#define RC_MASK (RC_INT|RC_INT2|RC_FLOAT) + /* pretty names for the registers */ enum { TREG_EAX = 0, TREG_ECX, TREG_EDX, - TREG_EBX, - TREG_ESP, TREG_ST0, - TREG_ESI, - TREG_EDI, + TREG_ESP = 4 }; /* return registers for function */ @@ -97,14 +90,10 @@ enum { #include "tcc.h" ST_DATA const int reg_classes[NB_REGS] = { - /* eax */ RC_INT | RC_EAX | RC_INT2, - /* ecx */ RC_INT | RC_ECX | RC_INT2, + /* eax */ RC_INT | RC_EAX, + /* ecx */ RC_INT | RC_ECX, /* edx */ RC_INT | RC_EDX, - RC_INT|RC_INT2|RC_EBX, - 0, /* st0 */ RC_FLOAT | RC_ST0, - RC_ESI|RC_INT2, - RC_EDI|RC_INT2, }; static unsigned long func_sub_sp_offset; @@ -237,14 +226,6 @@ ST_FUNC void load(int r, SValue *sv) v = fr & VT_VALMASK; if (fr & VT_LVAL) { - if(fr & VT_TMP){ - int size, align; - if((ft & VT_BTYPE) == VT_FUNC) - size = PTR_SIZE; - else - size = type_size(&sv->type, &align); - loc_stack(size, 0); - } if (v == VT_LLOCAL) { v1.type.t = VT_INT; v1.r = VT_LOCAL | VT_LVAL; @@ -253,7 +234,6 @@ ST_FUNC void load(int r, SValue *sv) if (!(reg_classes[fr] & RC_INT)) fr = get_reg(RC_INT); load(fr, &v1); - fc = 0; } if ((ft & VT_BTYPE) == VT_FLOAT) { o(0xd9); /* flds */ @@ -697,7 +677,7 @@ ST_FUNC int gtst(int inv, int t) /* fast case : can jump directly since flags are set */ g(0x0f); t = psym((vtop->c.i - 16) ^ inv, t); - } else if (v == VT_JMP || v == VT_JMPI) { + } else { /* VT_JMP || VT_JMPI */ /* && or || optimization */ if ((v & 1) == inv) { /* insert vtop->c jump list in t */ @@ -710,23 +690,6 @@ ST_FUNC int gtst(int inv, int t) t = gjmp(t); gsym(vtop->c.i); } - } else { - if (is_float(vtop->type.t) || - (vtop->type.t & VT_BTYPE) == VT_LLONG) { - vpushi(0); - gen_op(TOK_NE); - } - if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) { - /* constant jmp optimization */ - if ((vtop->c.i != 0) != inv) - t = gjmp(t); - } else { - v = gv(RC_INT); - o(0x85); - o(0xc0 + v * 9); - g(0x0f); - t = psym(0x85 ^ inv, t); - } } vtop--; return t; @@ -735,48 +698,40 @@ ST_FUNC int gtst(int inv, int t) /* generate an integer binary operation */ ST_FUNC void gen_opi(int op) { - int r, fr, opc, fc, c; - int cc, uu, tt2; - - fr = vtop[0].r; - fc = vtop->c.ul; - cc = (fr & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST; - tt2 = (fr & (VT_LVAL | VT_LVAL_TYPE)) == VT_LVAL; + int r, fr, opc, c; switch(op) { case '+': case TOK_ADDC1: /* add with carry generation */ opc = 0; gen_op8: - vswap(); - r = gv(RC_INT); - vswap(); - if (cc) { + if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) { /* constant case */ + vswap(); + r = gv(RC_INT); + vswap(); c = vtop->c.i; if (c == (char)c) { /* generate inc and dec for smaller code */ - if (c == 1 && opc == 0) { + if (c==1 && opc==0) { o (0x40 | r); // inc - } else if (c == 1 && opc == 5) { + } else if (c==1 && opc==5) { o (0x48 | r); // dec } else { o(0x83); - o(0xc0 + r + opc*8); + o(0xc0 | (opc << 3) | r); g(c); } } else { o(0x81); - oad(0xc0 + r+ opc*8, c); + oad(0xc0 | (opc << 3) | r, c); } } else { - if(!tt2) - fr = gv(RC_INT); - o(0x03 + opc*8); - if(fr >= VT_CONST) - gen_modrm(r, fr, vtop->sym, fc); - else - o(0xc0 + fr + r*8); + gv2(RC_INT, RC_INT); + r = vtop[-1].r; + fr = vtop[0].r; + o((opc << 3) | 0x01); + o(0xc0 + r + fr * 8); } vtop--; if (op >= TOK_ULT && op <= TOK_GT) { @@ -804,28 +759,12 @@ ST_FUNC void gen_opi(int op) opc = 1; goto gen_op8; case '*': - opc = 5; - vswap(); - r = gv(RC_INT); - vswap(); - if(!tt2) - fr = gv(RC_INT); - if(r == TREG_EAX){ - if(fr != TREG_EDX) - save_reg(TREG_EDX); - o(0xf7); - if(fr >= VT_CONST) - gen_modrm(opc, fr, vtop->sym, fc); - else - o(0xc0 + fr + opc*8); - }else{ - o(0xaf0f); /* imul fr, r */ - if(fr >= VT_CONST) - gen_modrm(r, fr, vtop->sym, fc); - else - o(0xc0 + fr + r*8); - } + gv2(RC_INT, RC_INT); + r = vtop[-1].r; + fr = vtop[0].r; vtop--; + o(0xaf0f); /* imul fr, r */ + o(0xc0 + fr + r * 8); break; case TOK_SHL: opc = 4; @@ -836,71 +775,56 @@ ST_FUNC void gen_opi(int op) case TOK_SAR: opc = 7; gen_shift: - if (cc) { + opc = 0xc0 | (opc << 3); + if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) { /* constant case */ vswap(); r = gv(RC_INT); vswap(); - c = vtop->c.i; - if(c == 1){ - o(0xd1); - o(0xc0 + r + opc*8); - }else{ - o(0xc1); /* shl/shr/sar $xxx, r */ - o(0xc0 + r + opc*8); - g(c & 0x1f); - } + c = vtop->c.i & 0x1f; + o(0xc1); /* shl/shr/sar $xxx, r */ + o(opc | r); + g(c); } else { /* we generate the shift in ecx */ gv2(RC_INT, RC_ECX); r = vtop[-1].r; o(0xd3); /* shl/shr/sar %cl, r */ - o(0xc0 + r + opc*8); + o(opc | r); } vtop--; break; - case TOK_UMOD: - opc = 4; - uu = 1; - goto divmod; - case TOK_UDIV: - case TOK_UMULL: - opc = 6; - uu = 1; - goto divmod; case '/': - case '%': + case TOK_UDIV: case TOK_PDIV: - opc = 7; - uu = 0; - divmod: + case '%': + case TOK_UMOD: + case TOK_UMULL: /* first operand must be in eax */ /* XXX: need better constraint for second operand */ - if(!tt2){ - gv2(RC_EAX, RC_INT2); - fr = vtop[0].r; - }else{ - vswap(); - gv(RC_EAX); - vswap(); - } - save_reg(TREG_EDX); - if (op == TOK_UMULL) { + gv2(RC_EAX, RC_ECX); + r = vtop[-1].r; + fr = vtop[0].r; + vtop--; + save_reg(TREG_EDX); + if (op == TOK_UMULL) { o(0xf7); /* mul fr */ - vtop->r2 = TREG_EDX; - }else{ - o(uu ? 0xd231 : 0x99); /* xor %edx,%edx : cdq RDX:RAX <- sign-extend of RAX. */ - o(0xf7); /* div fr, %eax */ - } - if(fr >= VT_CONST) - gen_modrm(opc, fr, vtop->sym, fc); - else - o(0xc0 + fr + opc*8); - if (op == '%' || op == TOK_UMOD) - r = TREG_EDX; - else + o(0xe0 + fr); + vtop->r2 = TREG_EDX; r = TREG_EAX; - vtop--; + } else { + if (op == TOK_UDIV || op == TOK_UMOD) { + o(0xf7d231); /* xor %edx, %edx, div fr, %eax */ + o(0xf0 + fr); + } else { + o(0xf799); /* cltd, idiv fr, %eax */ + o(0xf8 + fr); + } + if (op == '%' || op == TOK_UMOD) + r = TREG_EDX; + else + r = TREG_EAX; + } vtop->r = r; break; default: diff --git a/asmx86-tok.h b/i386-tok.h similarity index 100% rename from asmx86-tok.h rename to i386-tok.h diff --git a/il-gen.c b/il-gen.c index 9dafbbf..9e1ec64 100644 --- a/il-gen.c +++ b/il-gen.c @@ -516,7 +516,7 @@ int gtst(int inv, int t) break; } t = out_opj(c, t); - } else if (v == VT_JMP || v == VT_JMPI) { + } else { /* VT_JMP || VT_JMPI */ /* && or || optimization */ if ((v & 1) == inv) { /* insert vtop->c jump list in t */ @@ -529,19 +529,6 @@ int gtst(int inv, int t) t = gjmp(t); gsym(vtop->c.i); } - } else { - if (is_float(vtop->t)) { - vpushi(0); - gen_op(TOK_NE); - } - if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_FORWARD)) == VT_CONST) { - /* constant jmp optimization */ - if ((vtop->c.i != 0) != inv) - t = gjmp(t); - } else { - v = gv(RC_INT); - t = out_opj(IL_OP_BRTRUE - inv, t); - } } vtop--; return t; diff --git a/lib/Makefile b/lib/Makefile index 4bf2c7e..e9e12f1 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -1,5 +1,5 @@ # -# Tiny C Compiler Makefile for libcrt.a +# Tiny C Compiler Makefile for libtcc1.a # TOP = .. @@ -38,15 +38,15 @@ endif DIR = $(TARGET) -native : ../libcrt.a -cross : $(DIR)/libcrt.a +native : ../libtcc1.a +cross : $(DIR)/libtcc1.a native : TCC = $(TOP)/tcc$(EXESUF) cross : TCC = $(TOP)/$(TARGET)-tcc$(EXESUF) -I386_O = libcrt.o alloca86.o alloca86-bt.o $(BCHECK_O) -X86_64_O = libcrt.o alloca86_64.o -ARM_O = libcrt.o armeabi.o alloca-arm.o +I386_O = libtcc1.o alloca86.o alloca86-bt.o $(BCHECK_O) +X86_64_O = libtcc1.o alloca86_64.o +ARM_O = libtcc1.o armeabi.o alloca-arm.o WIN32_O = $(I386_O) crt1.o wincrt1.o dllcrt1.o dllmain.o chkstk.o WIN64_O = $(X86_64_O) crt1.o wincrt1.o dllcrt1.o dllmain.o chkstk.o @@ -83,7 +83,7 @@ ifeq "$(TARGET)" "arm" TGT = -DTCC_TARGET_ARM XCC ?= $(TCC) -B$(TOP) else - $(error libcrt.a not supported on target '$(TARGET)') + $(error libtcc1.a not supported on target '$(TARGET)') endif endif endif @@ -102,7 +102,7 @@ ifdef XAR AR = $(XAR) endif -$(DIR)/libcrt.a ../libcrt.a : $(OBJ) $(XAR) +$(DIR)/libtcc1.a ../libtcc1.a : $(OBJ) $(XAR) $(AR) rcs $@ $(OBJ) $(DIR)/%.o : %.c $(XCC) -c $< -o $@ $(XFLAGS) diff --git a/lib/libcrt.c b/lib/libtcc1.c similarity index 96% rename from lib/libcrt.c rename to lib/libtcc1.c index 642927a..284965e 100644 --- a/lib/libcrt.c +++ b/lib/libtcc1.c @@ -533,24 +533,23 @@ unsigned long long __fixunssfdi (float a1) register union float_long fl1; register int exp; register unsigned long l; - int s; + fl1.f = a1; if (fl1.l == 0) - return 0; + return (0); exp = EXP (fl1.l) - EXCESS - 24; l = MANT(fl1.l); - s = SIGN(fl1.l)? -1: 1; - if (exp >= 64) + if (exp >= 41) return (unsigned long long)-1; else if (exp >= 0) - return ((unsigned long long)l << exp)*s; + return (unsigned long long)l << exp; else if (exp >= -23) - return (l >> -exp)*s; + return l >> -exp; else - return 0; + return 0; } unsigned long long __fixunsdfdi (double a1) @@ -558,7 +557,7 @@ unsigned long long __fixunsdfdi (double a1) register union double_long dl1; register int exp; register unsigned long long l; - int s; + dl1.d = a1; if (dl1.ll == 0) @@ -567,15 +566,15 @@ unsigned long long __fixunsdfdi (double a1) exp = EXPD (dl1) - EXCESSD - 53; l = MANTD_LL(dl1); - s = SIGND(dl1)? -1: 1; - if (exp >= 64) + + if (exp >= 12) return (unsigned long long)-1; else if (exp >= 0) - return (l << exp)*s; + return l << exp; else if (exp >= -52) - return (l >> -exp)*s; + return l >> -exp; else - return 0; + return 0; } unsigned long long __fixunsxfdi (long double a1) @@ -583,24 +582,22 @@ unsigned long long __fixunsxfdi (long double a1) register union ldouble_long dl1; register int exp; register unsigned long long l; - int s; + dl1.ld = a1; if (dl1.l.lower == 0 && dl1.l.upper == 0) return (0); exp = EXPLD (dl1) - EXCESSLD - 64; - s = SIGNLD(dl1)? -1: 1; + l = dl1.l.lower; - if (exp >= 64) + if (exp > 0) return (unsigned long long)-1; - else if (exp >= 0) - return ((unsigned long long)l << exp)*s; - else if (exp >= -64) - return (l >> -exp)*s; + else if (exp >= -63) + return l >> -exp; else - return 0; + return 0; } long long __fixsfdi (float a1) @@ -640,7 +637,7 @@ extern void abort(void); #endif enum __va_arg_type { - __va_gen_reg, __va_float_reg, __va_ld_reg, __va_stack + __va_gen_reg, __va_float_reg, __va_stack }; //This should be in sync with the declaration on our include/stdarg.h @@ -691,11 +688,10 @@ void *__va_arg(__va_list_struct *ap, size = 8; goto use_overflow_area; - case __va_ld_reg: - ap->overflow_arg_area = (char*)((intptr_t)(ap->overflow_arg_area + align - 1) & -(intptr_t)align); case __va_stack: use_overflow_area: ap->overflow_arg_area += size; + ap->overflow_arg_area = (char*)((intptr_t)(ap->overflow_arg_area + align - 1) & -(intptr_t)align); return ap->overflow_arg_area - size; default: diff --git a/libtcc.c b/libtcc.c index 28b6407..deda7e6 100644 --- a/libtcc.c +++ b/libtcc.c @@ -52,10 +52,10 @@ ST_DATA struct TCCState *tcc_state; #include "x86_64-gen.c" #endif #ifdef CONFIG_TCC_ASM -#if defined TCC_TARGET_I386 || defined TCC_TARGET_X86_64 -#include "asmx86.c" -#endif #include "tccasm.c" +#if defined TCC_TARGET_I386 || defined TCC_TARGET_X86_64 +#include "i386-asm.c" +#endif #endif #ifdef TCC_TARGET_COFF #include "tcccoff.c" @@ -868,7 +868,6 @@ LIBTCCAPI void tcc_undefine_symbol(TCCState *s1, const char *sym) static void tcc_cleanup(void) { int i, n; - CSym *def; if (NULL == tcc_state) return; tcc_state = NULL; @@ -878,11 +877,8 @@ static void tcc_cleanup(void) /* free tokens */ n = tok_ident - TOK_IDENT; - for(i = 0; i < n; i++){ - def = &table_ident[i]->sym_define; - tcc_free(def->data); + for(i = 0; i < n; i++) tcc_free(table_ident[i]); - } tcc_free(table_ident); /* free sym_pools */ diff --git a/tcc.h b/tcc.h index 1cead42..c93cedf 100644 --- a/tcc.h +++ b/tcc.h @@ -39,7 +39,6 @@ #include #include #include -#include #ifdef CONFIG_TCCASSERT #include @@ -148,7 +147,6 @@ /* #define MEM_DEBUG */ /* assembler debug */ /* #define ASM_DEBUG */ -/* #define PRINTF_ASM_CODE */ /* target selection */ /* #define TCC_TARGET_I386 *//* i386 code generator */ @@ -276,7 +274,7 @@ # define DEFAULT_ELFINTERP(s) default_elfinterp(s) #endif -/* library to use with CONFIG_USE_LIBGCC instead of libcrt.a */ +/* library to use with CONFIG_USE_LIBGCC instead of libtcc1.a */ #define TCC_LIBGCC USE_MUADIR(CONFIG_SYSROOT "/" CONFIG_LDDIR) "/libgcc_s.so.1" /* -------------------------------------------- */ @@ -305,22 +303,15 @@ #define VSTACK_SIZE 256 #define STRING_MAX_SIZE 1024 #define PACK_STACK_SIZE 8 -#define MACRO_STACK_SIZE 4 #define TOK_HASH_SIZE 8192 /* must be a power of two */ #define TOK_ALLOC_INCR 512 /* must be a power of two */ #define TOK_MAX_SIZE 4 /* token max size in int unit when stored in string */ -typedef struct CSym { - int off; - int size;/* size in *sym */ - struct Sym **data; /* if non NULL, data has been malloced */ -} CSym; - /* token symbol management */ typedef struct TokenSym { struct TokenSym *hash_next; - struct CSym sym_define; /* direct pointer to define */ + struct Sym *sym_define; /* direct pointer to define */ struct Sym *sym_label; /* direct pointer to label */ struct Sym *sym_struct; /* direct pointer to structure */ struct Sym *sym_identifier; /* direct pointer to identifier */ @@ -366,8 +357,8 @@ typedef union CValue { /* value on stack */ typedef struct SValue { CType type; /* type */ - unsigned int r; /* register + flags */ - unsigned int r2; /* second register, used for 'long long' + unsigned short r; /* register + flags */ + unsigned short r2; /* second register, used for 'long long' type. If not used, set to VT_CONST */ CValue c; /* constant, if VT_CONST */ struct Sym *sym; /* symbol, if (VT_SYM | VT_CONST) */ @@ -747,21 +738,19 @@ struct TCCState { #define VT_CMP 0x0033 /* the value is stored in processor flags (in vc) */ #define VT_JMP 0x0034 /* value is the consequence of jmp true (even) */ #define VT_JMPI 0x0035 /* value is the consequence of jmp false (odd) */ -#define TREG_MEM 0x0040 /* x86_64-gen.c add for tcc.h: The current value can be */ -#define VT_REF 0x0080 /* value is pointer to structure rather than address */ +#define VT_REF 0x0040 /* value is pointer to structure rather than address */ #define VT_LVAL 0x0100 /* var is an lvalue */ #define VT_SYM 0x0200 /* a symbol value is added */ #define VT_MUSTCAST 0x0400 /* value must be casted to be correct (used for char/short stored in integer registers) */ #define VT_MUSTBOUND 0x0800 /* bound checking must be done before dereferencing value */ +#define VT_BOUNDED 0x8000 /* value is bounded. The address of the + bounding function call point is in vc */ #define VT_LVAL_BYTE 0x1000 /* lvalue is a byte */ #define VT_LVAL_SHORT 0x2000 /* lvalue is a short */ #define VT_LVAL_UNSIGNED 0x4000 /* lvalue is unsigned */ #define VT_LVAL_TYPE (VT_LVAL_BYTE | VT_LVAL_SHORT | VT_LVAL_UNSIGNED) -#define VT_BOUNDED 0x8000 /* value is bounded. The address of the - bounding function call point is in vc */ -#define VT_TMP 0x10000 /* luck or tmp stack */ /* types */ #define VT_BTYPE 0x000f /* mask for basic type */ @@ -789,7 +778,6 @@ struct TCCState { #define VT_VOLATILE 0x1000 /* volatile modifier */ #define VT_DEFSIGN 0x2000 /* signed type */ #define VT_VLA 0x00020000 /* VLA type (also has VT_PTR and VT_ARRAY) */ -#define VT_VLS 0x00080000 /* VLA type (also has VT_PTR and VT_STRUCT) */ /* storage */ #define VT_EXTERN 0x00000080 /* extern definition */ @@ -800,14 +788,14 @@ struct TCCState { #define VT_EXPORT 0x00008000 /* win32: data exported from dll */ #define VT_WEAK 0x00010000 /* weak symbol */ #define VT_TLS 0x00040000 /* thread-local storage */ -#define VT_VIS_SHIFT 20 /* shift for symbol visibility, overlapping +#define VT_VIS_SHIFT 19 /* shift for symbol visibility, overlapping bitfield values, because bitfields never have linkage and hence never have visibility. */ #define VT_VIS_SIZE 2 /* We have four visibilities. */ #define VT_VIS_MASK (((1 << VT_VIS_SIZE)-1) << VT_VIS_SHIFT) -#define VT_STRUCT_SHIFT 20 /* shift for bitfield shift values (max: 32 - 2*6) */ +#define VT_STRUCT_SHIFT 19 /* shift for bitfield shift values (max: 32 - 2*6) */ /* type mask (except storage) */ @@ -1136,8 +1124,7 @@ ST_DATA TokenSym **table_ident; token. line feed is also returned at eof */ #define PARSE_FLAG_ASM_COMMENTS 0x0008 /* '#' can be used for line comment */ -#define PARSE_FLAG_SPACES 0x0010 /* next() returns space tokens (for -E) */ -#define PARSE_FLAG_PACK 0x0020 /* #pragma pack */ +#define PARSE_FLAG_SPACES 0x0010 /* next() returns space tokens (for -E) */ ST_FUNC TokenSym *tok_alloc(const char *str, int len); ST_FUNC char *get_tok_str(int v, CValue *cv); @@ -1195,7 +1182,7 @@ ST_DATA Sym *define_stack; ST_DATA CType char_pointer_type, func_old_type, int_type, size_type; ST_DATA SValue __vstack[1+/*to make bcheck happy*/ VSTACK_SIZE], *vtop; #define vstack (__vstack + 1) -ST_DATA int rsym, anon_sym, ind, loc, ex_rc; +ST_DATA int rsym, anon_sym, ind, loc; ST_DATA int const_wanted; /* true if constant wanted */ ST_DATA int nocode_wanted; /* true if no code generation wanted for an expression */ @@ -1205,14 +1192,12 @@ ST_DATA int func_var; /* true if current function is variadic */ ST_DATA int func_vc; ST_DATA int last_line_num, last_ind, func_ind; /* debug last line number and pc */ ST_DATA char *funcname; -ST_DATA int pop_stack; ST_INLN int is_float(int t); ST_FUNC int ieee_finite(double d); ST_FUNC void test_lvalue(void); ST_FUNC void swap(int *p, int *q); ST_FUNC void vpushi(int v); -ST_FUNC void vpushs(addr_t v); ST_FUNC Sym *external_global_sym(int v, CType *type, int r); ST_FUNC void vset(CType *type, int r, int v); ST_FUNC void vswap(void); @@ -1246,9 +1231,6 @@ ST_FUNC void gexpr(void); ST_FUNC int expr_const(void); ST_FUNC void gen_inline_functions(void); ST_FUNC void decl(int l); -ST_FUNC void vdup(void); -ST_FUNC void gaddrof(void); -ST_FUNC int loc_stack(int size, int is_sub); #if defined CONFIG_TCC_BCHECK || defined TCC_TARGET_C67 ST_FUNC Sym *get_sym_ref(CType *type, Section *sec, unsigned long offset, unsigned long size); #endif diff --git a/tccasm.c b/tccasm.c index eec4208..38efe1c 100644 --- a/tccasm.c +++ b/tccasm.c @@ -20,6 +20,7 @@ #include "tcc.h" #ifdef CONFIG_TCC_ASM + ST_FUNC int asm_get_local_label_name(TCCState *s1, unsigned int n) { char buf[64]; @@ -482,7 +483,7 @@ static void asm_parse_directive(TCCState *s1) case TOK_ASM_globl: case TOK_ASM_global: case TOK_ASM_weak: - case TOK_ASM_hidden: + case TOK_ASM_hidden: tok1 = tok; do { Sym *sym; @@ -493,12 +494,12 @@ static void asm_parse_directive(TCCState *s1) sym = label_push(&s1->asm_labels, tok, 0); sym->type.t = VT_VOID; } - if (tok1 != TOK_ASM_hidden) + if (tok1 != TOK_ASM_hidden) sym->type.t &= ~VT_STATIC; if (tok1 == TOK_ASM_weak) sym->type.t |= VT_WEAK; - else if (tok1 == TOK_ASM_hidden) - sym->type.t |= STV_HIDDEN << VT_VIS_SHIFT; + else if (tok1 == TOK_ASM_hidden) + sym->type.t |= STV_HIDDEN << VT_VIS_SHIFT; next(); } while (tok == ','); break; @@ -696,15 +697,42 @@ static void asm_parse_directive(TCCState *s1) } } + /* assemble a file */ static int tcc_assemble_internal(TCCState *s1, int do_preprocess) { int opcode; -#ifdef PRINTF_ASM_CODE - ST_FUNC void printf_asm_opcode(); +#if 0 /* print stats about opcodes */ - printf_asm_opcode(); + { + const ASMInstr *pa; + int freq[4]; + int op_vals[500]; + int nb_op_vals, i, j; + + nb_op_vals = 0; + memset(freq, 0, sizeof(freq)); + for(pa = asm_instrs; pa->sym != 0; pa++) { + freq[pa->nb_ops]++; + for(i=0;inb_ops;i++) { + for(j=0;jop_type[i] == op_vals[j]) + goto found; + } + op_vals[nb_op_vals++] = pa->op_type[i]; + found: ; + } + } + for(i=0;ifilename); + put_elf_sym(symtab_section, 0, 0, + ELFW(ST_INFO)(STB_LOCAL, STT_FILE), 0, + SHN_ABS, file->filename); ret = tcc_assemble_internal(s1, do_preprocess); @@ -1090,4 +1119,4 @@ ST_FUNC void asm_global_instr(void) cstr_free(&astr); } -#endif /* CONFIG_TCC_ASM */ \ No newline at end of file +#endif /* CONFIG_TCC_ASM */ diff --git a/tccelf.c b/tccelf.c index 9faf27f..f0ed22b 100644 --- a/tccelf.c +++ b/tccelf.c @@ -1443,16 +1443,16 @@ ST_FUNC void tcc_add_runtime(TCCState *s1) #ifdef CONFIG_USE_LIBGCC if (!s1->static_link) { tcc_add_file(s1, TCC_LIBGCC); - tcc_add_support(s1, "libcrt.a"); + tcc_add_support(s1, "libtcc1.a"); } else - tcc_add_support(s1, "libcrt.a"); + tcc_add_support(s1, "libtcc1.a"); #else - tcc_add_support(s1, "libcrt.a"); + tcc_add_support(s1, "libtcc1.a"); #endif } /* tcc_add_bcheck tries to relocate a call to __bound_init in _init so - libcrt.a must be loaded before for __bound_init to be defined and + libtcc1.a must be loaded before for __bound_init to be defined and crtn.o must be loaded after to not finalize _init too early. */ tcc_add_bcheck(s1); @@ -1596,7 +1596,7 @@ ST_FUNC void fill_got_entry(TCCState *s1, ElfW_Rel *rel) put32(s1->got->data + offset, sym->st_value & 0xffffffff); } -/* Perform relocation to GOT or PLT entries */ +/* Perform relocation to GOT or PLTĀ entries */ ST_FUNC void fill_got(TCCState *s1) { Section *s; @@ -2469,7 +2469,7 @@ static int elf_output_file(TCCState *s1, const char *filename) goto the_end; } - /* Perform relocation to GOT or PLT entries */ + /* Perform relocation to GOT or PLTĀ entries */ if (file_type == TCC_OUTPUT_EXE && s1->static_link) fill_got(s1); diff --git a/tccgen.c b/tccgen.c index c2481b0..1a89d4a 100644 --- a/tccgen.c +++ b/tccgen.c @@ -28,7 +28,7 @@ rsym: return symbol anon_sym: anonymous symbol index */ -ST_DATA int rsym, anon_sym, ind, loc, ex_rc; +ST_DATA int rsym, anon_sym, ind, loc; ST_DATA Section *text_section, *data_section, *bss_section; /* predefined sections */ ST_DATA Section *cur_text_section; /* current section where function code is generated */ @@ -70,7 +70,6 @@ ST_DATA int func_var; /* true if current function is variadic (used by return in ST_DATA int func_vc; ST_DATA int last_line_num, last_ind, func_ind; /* debug last line number and pc */ ST_DATA char *funcname; -ST_DATA int pop_stack; ST_DATA CType char_pointer_type, func_old_type, int_type, size_type; @@ -93,7 +92,7 @@ static int is_compatible_parameter_types(CType *type1, CType *type2); static void expr_type(CType *type); ST_FUNC void vpush64(int ty, unsigned long long v); ST_FUNC void vpush(CType *type); -ST_FUNC int gtst(int inv, int t); +ST_FUNC int gvtst(int inv, int t); ST_FUNC int is_btype_size(int bt); ST_INLN int is_float(int t) @@ -370,7 +369,7 @@ ST_FUNC void vpushi(int v) } /* push a pointer sized constant */ -ST_FUNC void vpushs(addr_t v) +static void vpushs(addr_t v) { CValue cval; cval.ptr_offset = v; @@ -520,46 +519,11 @@ ST_FUNC void vpushv(SValue *v) *vtop = *v; } -ST_FUNC void vdup(void) +static void vdup(void) { vpushv(vtop); } -static int align_size(int size) -{ -#ifdef TCC_TARGET_X86_64 - if(size > 4) - return 8; - else -#endif - if(size > 2) - return 4; - else if(size > 1) - return 2; - else - return 1; -} - -int loc_stack(int size, int is_sub){ - int l, align; - align = align_size(size); - size = (size + align - 1) & - align; - if(is_sub){ - pop_stack -= size; - if(pop_stack >= 0) - l = loc + pop_stack; - else{ - loc += pop_stack; - l = loc &= -align; - pop_stack = 0; - } - }else{ - pop_stack += size; - l = loc + pop_stack; - } - return l; -} - /* save r to the memory stack, and mark it as being free */ ST_FUNC void save_reg(int r) { @@ -568,76 +532,57 @@ ST_FUNC void save_reg(int r) CType *type; /* modify all stack values */ - l = saved = 0; - for(p = vstack; p <= vtop; p++) { -#ifdef TCC_TARGET_X86_64 - if ((p->r & VT_VALMASK) == r || - ((((p->type.t & VT_BTYPE) == VT_QLONG) || ((p->type.t & VT_BTYPE) == VT_QFLOAT)) && - ((p->r2 & VT_VALMASK) == r))) -#else - if ((p->r & VT_VALMASK) == r || ((p->type.t & VT_BTYPE) == VT_LLONG && (p->r2 & VT_VALMASK) == r)) -#endif - { + saved = 0; + l = 0; + for(p=vstack;p<=vtop;p++) { + if ((p->r & VT_VALMASK) == r || + ((p->type.t & VT_BTYPE) == VT_LLONG && (p->r2 & VT_VALMASK) == r)) { /* must save value on stack if not already done */ if (!saved) { /* NOTE: must reload 'r' because r might be equal to r2 */ r = p->r & VT_VALMASK; /* store register in the stack */ type = &p->type; - if((type->t & VT_BTYPE) == VT_STRUCT){ - int ret_align; - SValue ret; - gfunc_sret(type, func_var, &ret.type, &ret_align); - type = &ret.type; - } - if ((p->r & VT_LVAL) || ((type->t & VT_BTYPE) == VT_FUNC)) + if ((p->r & VT_LVAL) || + (!is_float(type->t) && (type->t & VT_BTYPE) != VT_LLONG)) #ifdef TCC_TARGET_X86_64 type = &char_pointer_type; #else type = &int_type; #endif - size = type_size(type, &align); - l = loc_stack(size, 1); + size = type_size(type, &align); + loc = (loc - size) & -align; + sv.type.t = type->t; sv.r = VT_LOCAL | VT_LVAL; - sv.c.ul = l; -#ifdef TCC_TARGET_X86_64 - if (((type->t & VT_BTYPE) == VT_QLONG) || ((type->t & VT_BTYPE) == VT_QFLOAT)) -#else - if ((type->t & VT_BTYPE) == VT_LLONG) -#endif - { -#ifdef TCC_TARGET_X86_64 - int load_size = 8, load_type = ((type->t & VT_BTYPE) == VT_QLONG) ? VT_LLONG : VT_DOUBLE; -#else - int load_size = 4, load_type = VT_INT; -#endif - sv.type.t = load_type; - store(r, &sv); - sv.c.ul += load_size; - store(p->r2, &sv); - }else{ - sv.type.t = type->t; - store(r, &sv); - } + sv.c.ul = loc; + store(r, &sv); #if defined(TCC_TARGET_I386) || defined(TCC_TARGET_X86_64) /* x86 specific: need to pop fp register ST0 if saved */ if (r == TREG_ST0) { o(0xd8dd); /* fstp %st(0) */ } #endif - saved = 1; +#ifndef TCC_TARGET_X86_64 + /* special long long case */ + if ((type->t & VT_BTYPE) == VT_LLONG) { + sv.c.ul += 4; + store(p->r2, &sv); + } +#endif + l = loc; + saved = 1; } - /* mark that stack entry as being saved on the stack */ - if (p->r & VT_LVAL) { - /* also clear the bounded flag because the - relocation address of the function was stored in - p->c.ul */ - p->r = (p->r & ~(VT_VALMASK | VT_BOUNDED)) | VT_LLOCAL; - } else { - p->r = lvalue_type(p->type.t) | VT_LOCAL | VT_TMP; - } - p->r2 = VT_CONST; - p->c.ul = l; + /* mark that stack entry as being saved on the stack */ + if (p->r & VT_LVAL) { + /* also clear the bounded flag because the + relocation address of the function was stored in + p->c.ul */ + p->r = (p->r & ~(VT_VALMASK | VT_BOUNDED)) | VT_LLOCAL; + } else { + p->r = lvalue_type(p->type.t) | VT_LOCAL; + } + p->r2 = VT_CONST; + p->c.ul = l; } } } @@ -667,57 +612,42 @@ ST_FUNC int get_reg_ex(int rc, int rc2) } #endif -static int for_reg(int rc) -{ - int r; - SValue *p; - if(rc){ - for(r = 0; r < NB_REGS; r++) { - if (reg_classes[r] & rc) { - for(p = vstack; p <= vtop; p++) { - if ((p->r & VT_VALMASK) == r || (p->r2 & VT_VALMASK) == r) - goto notfound; - } - goto found; - } - notfound:; - } - } - r = -1; -found: - return r; -} - /* find a free register of class 'rc'. If none, save one register */ -int get_reg(int rc) +ST_FUNC int get_reg(int rc) { - int r; + int r; SValue *p; /* find a free register */ - r = for_reg(rc); - if (r != -1) - return r; + for(r=0;rr & VT_VALMASK) == r || + (p->r2 & VT_VALMASK) == r) + goto notfound; + } + return r; + } + notfound: ; + } + /* no register left : free the first one on the stack (VERY IMPORTANT to start from the bottom to ensure that we don't spill registers used in gen_opi()) */ - for(p = vstack; p <= vtop; p++) { + for(p=vstack;p<=vtop;p++) { /* look at second register (if long long) */ - if(p->r & VT_TMP) - continue; r = p->r2 & VT_VALMASK; if (r < VT_CONST && (reg_classes[r] & rc)) goto save_found; r = p->r & VT_VALMASK; if (r < VT_CONST && (reg_classes[r] & rc)) { -save_found: - save_reg(r); + save_found: + save_reg(r); return r; } } /* Should never comes here */ - assert(0); - return -1; + return -1; } /* save registers up to (vtop - n) stack entry */ @@ -751,14 +681,16 @@ static void move_reg(int r, int s, int t) } /* get address of vtop (vtop MUST BE an lvalue) */ -ST_FUNC void gaddrof(void) +static void gaddrof(void) { if (vtop->r & VT_REF) gv(RC_INT); vtop->r &= ~VT_LVAL; /* tricky: if saved lvalue, then we can go back to lvalue */ if ((vtop->r & VT_VALMASK) == VT_LLOCAL) - vtop->r = (vtop->r & ~(VT_VALMASK | VT_LVAL_TYPE)) | VT_LOCAL | VT_LVAL| VT_TMP; + vtop->r = (vtop->r & ~(VT_VALMASK | VT_LVAL_TYPE)) | VT_LOCAL | VT_LVAL; + + } #ifdef CONFIG_TCC_BCHECK @@ -794,28 +726,25 @@ static void gbound(void) register value (such as structures). */ ST_FUNC int gv(int rc) { - int r, bit_pos, bit_size, size, align, i, ft, sbt; + int r, bit_pos, bit_size, size, align, i; int rc2; - ft = vtop->type.t; - sbt = ft & VT_BTYPE; /* NOTE: get_reg can modify vstack[] */ - if (ft & VT_BITFIELD) { + if (vtop->type.t & VT_BITFIELD) { CType type; - int bits; - bit_pos = (ft >> VT_STRUCT_SHIFT) & 0x3f; - bit_size = (ft >> (VT_STRUCT_SHIFT + 6)) & 0x3f; + int bits = 32; + bit_pos = (vtop->type.t >> VT_STRUCT_SHIFT) & 0x3f; + bit_size = (vtop->type.t >> (VT_STRUCT_SHIFT + 6)) & 0x3f; /* remove bit field info to avoid loops */ - ft = vtop->type.t &= ~(VT_BITFIELD | (-1 << VT_STRUCT_SHIFT)); + vtop->type.t &= ~(VT_BITFIELD | (-1 << VT_STRUCT_SHIFT)); /* cast to int to propagate signedness in following ops */ - if (sbt == VT_LLONG) { + if ((vtop->type.t & VT_BTYPE) == VT_LLONG) { type.t = VT_LLONG; bits = 64; - } else{ + } else type.t = VT_INT; - bits = 32; - } - if((ft & VT_UNSIGNED) || sbt == VT_BOOL) + if((vtop->type.t & VT_UNSIGNED) || + (vtop->type.t & VT_BTYPE) == VT_BOOL) type.t |= VT_UNSIGNED; gen_cast(&type); /* generate shifts */ @@ -872,39 +801,41 @@ ST_FUNC int gv(int rc) gbound(); #endif - r = vtop->r & VT_VALMASK; - if(rc & ~RC_MASK) - rc2 = ex_rc; - else - rc2 = (rc & RC_FLOAT) ? RC_FLOAT : RC_INT; + r = vtop->r & VT_VALMASK; + rc2 = (rc & RC_FLOAT) ? RC_FLOAT : RC_INT; + if (rc == RC_IRET) + rc2 = RC_LRET; +#ifdef TCC_TARGET_X86_64 + else if (rc == RC_FRET) + rc2 = RC_QRET; +#endif /* need to reload if: - constant - lvalue (need to dereference pointer) - already a register, but not in the right class */ - if (r >= VT_CONST || (vtop->r & VT_LVAL) || !(reg_classes[r] & rc) + if (r >= VT_CONST + || (vtop->r & VT_LVAL) + || !(reg_classes[r] & rc) #ifdef TCC_TARGET_X86_64 - || (sbt == VT_QLONG && !(reg_classes[vtop->r2] & rc2)) - || (sbt == VT_QFLOAT && !(reg_classes[vtop->r2] & rc2)) + || ((vtop->type.t & VT_BTYPE) == VT_QLONG && !(reg_classes[vtop->r2] & rc2)) + || ((vtop->type.t & VT_BTYPE) == VT_QFLOAT && !(reg_classes[vtop->r2] & rc2)) #else - || (sbt == VT_LLONG && !(reg_classes[vtop->r2] & rc2)) + || ((vtop->type.t & VT_BTYPE) == VT_LLONG && !(reg_classes[vtop->r2] & rc2)) #endif - || vtop->c.i) + ) { r = get_reg(rc); #ifdef TCC_TARGET_X86_64 - if ((sbt == VT_QLONG) || (sbt == VT_QFLOAT)) + if (((vtop->type.t & VT_BTYPE) == VT_QLONG) || ((vtop->type.t & VT_BTYPE) == VT_QFLOAT)) { + int addr_type = VT_LLONG, load_size = 8, load_type = ((vtop->type.t & VT_BTYPE) == VT_QLONG) ? VT_LLONG : VT_DOUBLE; #else - if (sbt == VT_LLONG) -#endif - { -#ifdef TCC_TARGET_X86_64 - int load_size = 8, load_type = (sbt == VT_QLONG) ? VT_LLONG : VT_DOUBLE; -#else - int load_size = 4, load_type = VT_INT; + if ((vtop->type.t & VT_BTYPE) == VT_LLONG) { + int addr_type = VT_INT, load_size = 4, load_type = VT_INT; unsigned long long ll; #endif - int r2; + int r2, original_type; + original_type = vtop->type.t; /* two register type load : expand to two words temporarily */ #ifndef TCC_TARGET_X86_64 @@ -917,19 +848,20 @@ ST_FUNC int gv(int rc) vpushi(ll >> 32); /* second word */ } else #endif - /* XXX: test to VT_CONST incorrect ? */ - if (r >= VT_CONST || (vtop->r & VT_LVAL)) { + if (r >= VT_CONST || /* XXX: test to VT_CONST incorrect ? */ + (vtop->r & VT_LVAL)) { /* We do not want to modifier the long long pointer here, so the safest (and less efficient) is to save all the other registers - in the regs. use VT_TMP XXX: totally inefficient. */ + in the stack. XXX: totally inefficient. */ + save_regs(1); /* load from memory */ vtop->type.t = load_type; load(r, vtop); vdup(); - vtop[-1].r = r | VT_TMP; /* lock register value */ + vtop[-1].r = r; /* save register value */ /* increment pointer to get second word */ - vtop->type = char_pointer_type; + vtop->type.t = addr_type; gaddrof(); vpushi(load_size); gen_op('+'); @@ -939,23 +871,23 @@ ST_FUNC int gv(int rc) /* move registers */ load(r, vtop); vdup(); - vtop[-1].r = r | VT_TMP; /* lock register value */ + vtop[-1].r = r; /* save register value */ vtop->r = vtop[-1].r2; } /* Allocate second register. Here we rely on the fact that get_reg() tries first to free r2 of an SValue. */ r2 = get_reg(rc2); load(r2, vtop); - vtop--; + vpop(); /* write second register */ vtop->r2 = r2; - vtop->r &= ~VT_TMP; - vtop->type.t = ft; - } else if ((vtop->r & VT_LVAL) && !is_float(ft)) { - int t; + vtop->type.t = original_type; + } else if ((vtop->r & VT_LVAL) && !is_float(vtop->type.t)) { + int t1, t; /* lvalue of scalar type : need to use lvalue type because of possible cast */ - t = ft; + t = vtop->type.t; + t1 = t; /* compute memory access type */ if (vtop->r & VT_REF) #ifdef TCC_TARGET_X86_64 @@ -972,14 +904,13 @@ ST_FUNC int gv(int rc) vtop->type.t = t; load(r, vtop); /* restore wanted type */ - vtop->type.t = ft; + vtop->type.t = t1; } else { /* one register type load */ load(r, vtop); } - vtop->r = r; - vtop->c.ptr_offset = 0; } + vtop->r = r; #ifdef TCC_TARGET_C67 /* uses register pairs for doubles */ if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) @@ -992,10 +923,13 @@ ST_FUNC int gv(int rc) /* generate vtop[-1] and vtop[0] in resp. classes rc1 and rc2 */ ST_FUNC void gv2(int rc1, int rc2) { + int v; + /* generate more generic register first. But VT_JMP or VT_CMP values must be generated first in all cases to avoid possible reload errors */ - if (rc1 <= rc2) { + v = vtop[0].r & VT_VALMASK; + if (v != VT_CMP && (v & ~1) != VT_JMP && rc1 <= rc2) { vswap(); gv(rc1); vswap(); @@ -1025,7 +959,6 @@ static int rc_fret(int t) if (t == VT_LDOUBLE) { return RC_ST0; } - ex_rc = RC_QRET; #endif return RC_FRET; } @@ -1084,7 +1017,6 @@ ST_FUNC void lexpand_nr(void) } #endif -#ifndef TCC_TARGET_X86_64 /* build a long long from two ints */ static void lbuild(int t) { @@ -1093,7 +1025,6 @@ static void lbuild(int t) vtop[-1].type.t = t; vpop(); } -#endif /* rotate n first stack elements to the bottom I1 ... In -> I2 ... In I1 [top is right] @@ -1134,9 +1065,8 @@ ST_FUNC void vrott(int n) /* pop stack value */ ST_FUNC void vpop(void) { - int v, fr; - fr = vtop->r; - v = fr & VT_VALMASK; + int v; + v = vtop->r & VT_VALMASK; #if defined(TCC_TARGET_I386) || defined(TCC_TARGET_X86_64) /* for x86, we need to pop the FP stack */ if (v == TREG_ST0 && !nocode_wanted) { @@ -1147,17 +1077,6 @@ ST_FUNC void vpop(void) /* need to put correct jump if && or || without test */ gsym(vtop->c.ul); } - if(fr & VT_TMP){ - int size, align; - SValue ret; - if((vtop->type.t & VT_BTYPE) == VT_FUNC) - size = 8; - else{ - gfunc_sret(&vtop->type, func_var, &ret.type, &align); - size = type_size(&ret.type, &align); - } - loc_stack(size, 0); - } vtop--; } @@ -1167,8 +1086,8 @@ static void gv_dup(void) { int rc, t, r, r1; SValue sv; + t = vtop->type.t; -#ifndef TCC_TARGET_X86_64 if ((t & VT_BTYPE) == VT_LLONG) { lexpand(); gv_dup(); @@ -1178,14 +1097,15 @@ static void gv_dup(void) vrotb(4); /* stack: H L L1 H1 */ lbuild(t); - vrott(3); + vrotb(3); + vrotb(3); vswap(); lbuild(t); vswap(); - } else -#endif - { + } else { /* duplicate value */ + rc = RC_INT; + sv.type.t = VT_INT; if (is_float(t)) { rc = RC_FLOAT; #ifdef TCC_TARGET_X86_64 @@ -1193,9 +1113,8 @@ static void gv_dup(void) rc = RC_ST0; } #endif - }else - rc = RC_INT; - sv.type.t = t; + sv.type.t = t; + } r = gv(rc); r1 = get_reg(rc); sv.r = r; @@ -1207,6 +1126,27 @@ static void gv_dup(void) vtop->r = r1; } } + +/* Generate value test + * + * Generate a test for any value (jump, comparison and integers) */ +ST_FUNC int gvtst(int inv, int t) +{ + int v = vtop->r & VT_VALMASK; + if (v != VT_CMP && v != VT_JMP && v != VT_JMPI) { + vpushi(0); + gen_op(TOK_NE); + } + if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) { + /* constant jmp optimization */ + if ((vtop->c.i != 0) != inv) + t = gjmp(t); + vtop--; + return t; + } + return gtst(inv, t); +} + #ifndef TCC_TARGET_X86_64 /* generate CPU independent (unsigned) long long operations */ static void gen_opl(int op) @@ -1405,13 +1345,13 @@ static void gen_opl(int op) b = 0; gen_op(op1); if (op1 != TOK_NE) { - a = gtst(1, 0); + a = gvtst(1, 0); } if (op != TOK_EQ) { /* generate non equal test */ /* XXX: NOT PORTABLE yet */ if (a == 0) { - b = gtst(0, 0); + b = gvtst(0, 0); } else { #if defined(TCC_TARGET_I386) b = psym(0x850f, 0); @@ -1436,7 +1376,7 @@ static void gen_opl(int op) else if (op1 == TOK_GE) op1 = TOK_UGE; gen_op(op1); - a = gtst(1, a); + a = gvtst(1, a); gsym(b); vseti(VT_JMPI, a); break; @@ -2161,30 +2101,31 @@ ST_FUNC int type_size(CType *type, int *a) { Sym *s; int bt; - size_t size; bt = type->t & VT_BTYPE; if (bt == VT_STRUCT) { - assert(!(type->t & VT_VLS)); /* struct/union */ s = type->ref; *a = s->r; - size = s->c; + return s->c; } else if (bt == VT_PTR) { if (type->t & VT_ARRAY) { int ts; + s = type->ref; ts = type_size(&s->type, a); + if (ts < 0 && s->c < 0) ts = -ts; - size = (size_t)ts * s->c; + + return ts * s->c; } else { *a = PTR_SIZE; - size = PTR_SIZE; + return PTR_SIZE; } } else if (bt == VT_LDOUBLE) { *a = LDOUBLE_ALIGN; - size = LDOUBLE_SIZE; + return LDOUBLE_SIZE; } else if (bt == VT_DOUBLE || bt == VT_LLONG) { #ifdef TCC_TARGET_I386 #ifdef TCC_TARGET_PE @@ -2201,23 +2142,21 @@ ST_FUNC int type_size(CType *type, int *a) #else *a = 8; #endif - size = 8; + return 8; } else if (bt == VT_INT || bt == VT_ENUM || bt == VT_FLOAT) { *a = 4; - size = 4; + return 4; } else if (bt == VT_SHORT) { *a = 2; - size = 2; - } else if (bt == VT_QLONG || bt == VT_QFLOAT) { + return 2; + } else if (bt == VT_QLONG || bt == VT_QFLOAT) { *a = 8; - size = 16; + return 16; } else { /* char, void, function, _Bool */ *a = 1; - size = 1; + return 1; } - assert(size == (int)size); - return (int)size; } /* push type size as known at runtime time on top of value stack. Put @@ -2233,7 +2172,7 @@ ST_FUNC void vla_runtime_type_size(CType *type, int *a) static void vla_sp_save(void) { if (!(vla_flags & VLA_SP_LOC_SET)) { - *vla_sp_loc = loc_stack(PTR_SIZE, 1); + *vla_sp_loc = (loc -= PTR_SIZE); vla_flags |= VLA_SP_LOC_SET; } if (!(vla_flags & VLA_SP_SAVED)) { @@ -2502,8 +2441,6 @@ static void gen_assign_cast(CType *dt) if (sbt == VT_PTR || sbt == VT_FUNC) { tcc_warning("assignment makes integer from pointer without a cast"); } - if (sbt == VT_STRUCT) - goto error; /* XXX: more tests */ break; case VT_STRUCT: @@ -2523,78 +2460,17 @@ static void gen_assign_cast(CType *dt) gen_cast(dt); } -static void vstore_im(){ - int rc, ft, sbt, dbt, t, r; - ft = vtop[-1].type.t; - sbt = vtop->type.t & VT_BTYPE; - dbt = ft & VT_BTYPE; - if (is_float(ft)) { - rc = RC_FLOAT; -#ifdef TCC_TARGET_X86_64 - if (dbt == VT_LDOUBLE) { - rc = RC_ST0; - } -#endif - }else - rc = RC_INT; - r = gv(rc); /* generate value */ - /* if lvalue was saved on stack, must read it */ - if ((vtop[-1].r & VT_VALMASK) == VT_LLOCAL) { - SValue sv; - t = get_reg(RC_INT); -#ifdef TCC_TARGET_X86_64 - sv.type.t = VT_PTR; -#else - sv.type.t = VT_INT; -#endif - sv.r = VT_LOCAL | VT_LVAL | VT_TMP; - sv.c.ul = vtop[-1].c.ul; - load(t, &sv); - vtop[-1].r = t | VT_LVAL; - vtop[-1].c.ul = 0; - } - /* two word case handling : store second register at word + 4 */ -#ifdef TCC_TARGET_X86_64 - if ((dbt == VT_QLONG) || (dbt == VT_QFLOAT)) -#else - if (dbt == VT_LLONG) -#endif - { -#ifdef TCC_TARGET_X86_64 - int load_size = 8, load_type = (sbt == VT_QLONG) ? VT_LLONG : VT_DOUBLE; -#else - int load_size = 4, load_type = VT_INT; -#endif - vtop[-1].type.t = load_type; - store(r, vtop - 1); - vswap(); - /* convert to int to increment easily */ - vtop->type = char_pointer_type; - gaddrof(); - vpushi(load_size); - gen_op('+'); - vtop->r |= VT_LVAL; - vswap(); - vtop[-1].type.t = load_type; - /* XXX: it works because r2 is spilled last ! */ - store(vtop->r2, vtop - 1); - vtop->type.t = ft; - vtop[-1].type.t = ft; - } else { - store(r, vtop - 1); - } -} - /* store vtop in lvalue pushed on stack */ ST_FUNC void vstore(void) { - int sbt, dbt, ft, size, align, bit_size, bit_pos, delayed_cast; + int sbt, dbt, ft, r, t, size, align, bit_size, bit_pos, rc, delayed_cast; ft = vtop[-1].type.t; sbt = vtop->type.t & VT_BTYPE; dbt = ft & VT_BTYPE; if ((((sbt == VT_INT || sbt == VT_SHORT) && dbt == VT_BYTE) || - (sbt == VT_INT && dbt == VT_SHORT)) && !(vtop->type.t & VT_BITFIELD)) { + (sbt == VT_INT && dbt == VT_SHORT)) + && !(vtop->type.t & VT_BITFIELD)) { /* optimize char/short casts */ delayed_cast = VT_MUSTCAST; vtop->type.t = ft & (VT_TYPE & ~(VT_BITFIELD | (-1 << VT_STRUCT_SHIFT))); @@ -2612,42 +2488,31 @@ ST_FUNC void vstore(void) /* structure assignment : generate memcpy */ /* XXX: optimize if small size */ if (!nocode_wanted) { - SValue ret; - int ret_nregs, ret_align; - ret_nregs = gfunc_sret(&vtop->type, func_var, &ret.type, &ret_align); - if(ret_nregs){ - vswap(); - vpushv(vtop - 1); - vtop[0].type = ret.type; - vtop[-1].type = ret.type; - vstore_im(); - vtop -=2; - }else{ - size = type_size(&vtop->type, &align); - /* destination */ - vswap(); - vtop->type.t = VT_PTR; - gaddrof(); + size = type_size(&vtop->type, &align); - /* address of memcpy() */ + /* destination */ + vswap(); + vtop->type.t = VT_PTR; + gaddrof(); + + /* address of memcpy() */ #ifdef TCC_ARM_EABI - if(!(align & 7)) - vpush_global_sym(&func_old_type, TOK_memcpy8); - else if(!(align & 3)) - vpush_global_sym(&func_old_type, TOK_memcpy4); - else + if(!(align & 7)) + vpush_global_sym(&func_old_type, TOK_memcpy8); + else if(!(align & 3)) + vpush_global_sym(&func_old_type, TOK_memcpy4); + else #endif - vpush_global_sym(&func_old_type, TOK_memcpy); + vpush_global_sym(&func_old_type, TOK_memcpy); - vswap(); - /* source */ - vpushv(vtop - 2); - vtop->type.t = VT_PTR; - gaddrof(); - /* type size */ - vpushi(size); - gfunc_call(3); - } + vswap(); + /* source */ + vpushv(vtop - 2); + vtop->type.t = VT_PTR; + gaddrof(); + /* type size */ + vpushi(size); + gfunc_call(3); } else { vswap(); vpop(); @@ -2659,8 +2524,13 @@ ST_FUNC void vstore(void) bit_size = (ft >> (VT_STRUCT_SHIFT + 6)) & 0x3f; /* remove bit field info to avoid loops */ vtop[-1].type.t = ft & ~(VT_BITFIELD | (-1 << VT_STRUCT_SHIFT)); + /* duplicate source into other register */ - if(dbt == VT_BOOL) { + gv_dup(); + vswap(); + vrott(3); + + if((ft & VT_BTYPE) == VT_BOOL) { gen_cast(&vtop[-1].type); vtop[-1].type.t = (vtop[-1].type.t & ~VT_BTYPE) | (VT_BYTE | VT_UNSIGNED); } @@ -2670,8 +2540,8 @@ ST_FUNC void vstore(void) vtop[-1] = vtop[-2]; /* mask and shift source */ - if(dbt != VT_BOOL) { - if(dbt == VT_LLONG) { + if((ft & VT_BTYPE) != VT_BOOL) { + if((ft & VT_BTYPE) == VT_LLONG) { vpushll((1ULL << bit_size) - 1ULL); } else { vpushi((1 << bit_size) - 1); @@ -2682,7 +2552,7 @@ ST_FUNC void vstore(void) gen_op(TOK_SHL); /* load destination, mask and or with source */ vswap(); - if(dbt == VT_LLONG) { + if((ft & VT_BTYPE) == VT_LLONG) { vpushll(~(((1ULL << bit_size) - 1ULL) << bit_pos)); } else { vpushi(~(((1 << bit_size) - 1) << bit_pos)); @@ -2691,6 +2561,10 @@ ST_FUNC void vstore(void) gen_op('|'); /* store result */ vstore(); + + /* pop off shifted source from "duplicate source..." above */ + vpop(); + } else { #ifdef CONFIG_TCC_BCHECK /* bound check case */ @@ -2701,7 +2575,56 @@ ST_FUNC void vstore(void) } #endif if (!nocode_wanted) { - vstore_im(); + rc = RC_INT; + if (is_float(ft)) { + rc = RC_FLOAT; +#ifdef TCC_TARGET_X86_64 + if ((ft & VT_BTYPE) == VT_LDOUBLE) { + rc = RC_ST0; + } else if ((ft & VT_BTYPE) == VT_QFLOAT) { + rc = RC_FRET; + } +#endif + } + r = gv(rc); /* generate value */ + /* if lvalue was saved on stack, must read it */ + if ((vtop[-1].r & VT_VALMASK) == VT_LLOCAL) { + SValue sv; + t = get_reg(RC_INT); +#ifdef TCC_TARGET_X86_64 + sv.type.t = VT_PTR; +#else + sv.type.t = VT_INT; +#endif + sv.r = VT_LOCAL | VT_LVAL; + sv.c.ul = vtop[-1].c.ul; + load(t, &sv); + vtop[-1].r = t | VT_LVAL; + } + /* two word case handling : store second register at word + 4 (or +8 for x86-64) */ +#ifdef TCC_TARGET_X86_64 + if (((ft & VT_BTYPE) == VT_QLONG) || ((ft & VT_BTYPE) == VT_QFLOAT)) { + int addr_type = VT_LLONG, load_size = 8, load_type = ((vtop->type.t & VT_BTYPE) == VT_QLONG) ? VT_LLONG : VT_DOUBLE; +#else + if ((ft & VT_BTYPE) == VT_LLONG) { + int addr_type = VT_INT, load_size = 4, load_type = VT_INT; +#endif + vtop[-1].type.t = load_type; + store(r, vtop - 1); + vswap(); + /* convert to int to increment easily */ + vtop->type.t = addr_type; + gaddrof(); + vpushi(load_size); + gen_op('+'); + vtop->r |= VT_LVAL; + vswap(); + vtop[-1].type.t = load_type; + /* XXX: it works because r2 is spilled last ! */ + store(vtop->r2, vtop - 1); + } else { + store(r, vtop - 1); + } } vswap(); vtop--; /* NOT vpop() because on x86 it would flush the fp stack */ @@ -3364,7 +3287,7 @@ static void asm_label_instr(CString *astr) static void post_type(CType *type, AttributeDef *ad) { - int n, l, t1, arg_size, size, align; + int n, l, t1, arg_size, align; Sym **plast, *s, *first; AttributeDef ad1; CType pt; @@ -3468,12 +3391,13 @@ static void post_type(CType *type, AttributeDef *ad) t1 |= type->t & VT_VLA; if (t1 & VT_VLA) { - size = type_size(&int_type, &align); - n = loc_stack(size, 1); + loc -= type_size(&int_type, &align); + loc &= -align; + n = loc; vla_runtime_type_size(type, &align); gen_op('*'); - vset(&int_type, VT_LOCAL|VT_LVAL, n); + vset(&int_type, VT_LOCAL|VT_LVAL, loc); vswap(); vstore(); } @@ -3835,7 +3759,7 @@ ST_FUNC void unary(void) vtop->c.i = vtop->c.i ^ 1; else { save_regs(1); - vseti(VT_JMP, gtst(1, 0)); + vseti(VT_JMP, gvtst(1, 0)); } break; case '~': @@ -4069,94 +3993,47 @@ ST_FUNC void unary(void) /* post operations */ while (1) { - SValue ret; - int ret_nregs, ret_align; if (tok == TOK_INC || tok == TOK_DEC) { inc(1, tok); next(); } else if (tok == '.' || tok == TOK_ARROW) { - int qualifiers, add, is_lval; + int qualifiers; /* field */ - qualifiers = vtop->type.t & (VT_CONSTANT | VT_VOLATILE); - add = 0; - if (tok == TOK_ARROW) - indir(); - - type = vtop->type; - is_lval = (vtop->r & (VT_VALMASK | VT_LVAL)) >= VT_CONST; - if(is_lval){ - test_lvalue(); - gaddrof(); - vtop->type = char_pointer_type; /* change type to 'char *' */ - }else - gfunc_sret(&vtop->type, func_var, &ret.type, &ret_align); - do{ - next(); - /* expect pointer on structure */ - if ((type.t & VT_BTYPE) != VT_STRUCT) - expect("struct or union"); - s = type.ref; - /* find field */ - tok |= SYM_FIELD; - while ((s = s->next) != NULL) { - if (s->v == tok) - break; - } - if (!s) - tcc_error("field not found: %s", get_tok_str(tok & ~SYM_FIELD, NULL)); - /* add bit */ - add += s->c; - /* change type to field type, and set to lvalue */ - type = s->type; - next(); - }while(tok == '.'); - - type.t |= qualifiers; - if (is_lval){ - p_lval: - vpushi(add); - gen_op('+'); - /* an array is never an lvalue */ - if (!(type.t & VT_ARRAY)) { - vtop->r |= lvalue_type(type.t); - #ifdef CONFIG_TCC_BCHECK - /* if bound checking, the referenced pointer must be checked */ - if (tcc_state->do_bounds_check) - vtop->r |= VT_MUSTBOUND; - #endif - } - }else{ - gfunc_sret(&vtop->type, func_var, &ret.type, &ret_align); - if(is_float(ret.type.t) || (type.t & VT_ARRAY)){ -#ifdef TCC_TARGET_X86_64 - if((ret.type.t & VT_BTYPE) != VT_LDOUBLE) + if (tok == TOK_ARROW) + indir(); + qualifiers = vtop->type.t & (VT_CONSTANT | VT_VOLATILE); + test_lvalue(); + gaddrof(); + next(); + /* expect pointer on structure */ + if ((vtop->type.t & VT_BTYPE) != VT_STRUCT) + expect("struct or union"); + s = vtop->type.ref; + /* find field */ + tok |= SYM_FIELD; + while ((s = s->next) != NULL) { + if (s->v == tok) + break; + } + if (!s) + tcc_error("field not found: %s", get_tok_str(tok & ~SYM_FIELD, NULL)); + /* add field offset to pointer */ + vtop->type = char_pointer_type; /* change type to 'char *' */ + vpushi(s->c); + gen_op('+'); + /* change type to field type, and set to lvalue */ + vtop->type = s->type; + vtop->type.t |= qualifiers; + /* an array is never an lvalue */ + if (!(vtop->type.t & VT_ARRAY)) { + vtop->r |= lvalue_type(vtop->type.t); +#ifdef CONFIG_TCC_BCHECK + /* if bound checking, the referenced pointer must be checked */ + if (tcc_state->do_bounds_check) + vtop->r |= VT_MUSTBOUND; #endif - { - save_reg(vtop->r); - vtop->r &= ~VT_TMP; - gaddrof(); - vtop->type = char_pointer_type; /* change type to 'char *' */ - goto p_lval; - } - }else{ -#ifdef TCC_TARGET_X86_64 - int load_size = 8; -#else - int load_size = 4; -#endif - if(add & load_size){ - add -= load_size; - vtop->r = vtop->r2; - vtop->r2 = VT_CONST; - } - if(add){ - vtop->type.t = VT_LLONG; - vpushi(add*8); - gen_op(TOK_SAR); - } - } - } - vtop->type = type; + } + next(); } else if (tok == '[') { next(); gexpr(); @@ -4164,8 +4041,9 @@ ST_FUNC void unary(void) indir(); skip(']'); } else if (tok == '(') { + SValue ret; Sym *sa; - int nb_args, variadic, addr; + int nb_args, ret_nregs, ret_align, variadic; /* function call */ if ((vtop->type.t & VT_BTYPE) != VT_FUNC) { @@ -4183,6 +4061,7 @@ ST_FUNC void unary(void) } /* get return type */ s = vtop->type.ref; + next(); sa = s->next; /* first parameter */ nb_args = 0; ret.r2 = VT_CONST; @@ -4194,12 +4073,12 @@ ST_FUNC void unary(void) if (!ret_nregs) { /* get some space for the returned structure */ size = type_size(&s->type, &align); - addr = loc_stack(size, 1); + loc = (loc - size) & -align; ret.type = s->type; ret.r = VT_LOCAL | VT_LVAL; /* pass it as 'int' to avoid structure arg passing problems */ - vseti(VT_LOCAL, addr); + vseti(VT_LOCAL, loc); ret.c = vtop->c; nb_args++; } @@ -4227,7 +4106,6 @@ ST_FUNC void unary(void) } ret.c.i = 0; } - next(); if (tok != ')') { for(;;) { expr_eq(); @@ -4256,8 +4134,25 @@ ST_FUNC void unary(void) } /* handle packed struct return */ - if (((s->type.t & VT_BTYPE) == VT_STRUCT) && ret_nregs) - vtop->type = s->type; + if (((s->type.t & VT_BTYPE) == VT_STRUCT) && ret_nregs) { + int addr, offset; + + size = type_size(&s->type, &align); + loc = (loc - size) & -align; + addr = loc; + offset = 0; + for (;;) { + vset(&ret.type, VT_LOCAL | VT_LVAL, addr + offset); + vswap(); + vstore(); + vtop--; + if (--ret_nregs == 0) + break; + /* XXX: compatible with arm only: ret_align == register_size */ + offset += ret_align; + } + vset(&s->type, VT_LOCAL | VT_LVAL, addr); + } } else { break; } @@ -4392,7 +4287,7 @@ static void expr_land(void) t = 0; save_regs(1); for(;;) { - t = gtst(1, t); + t = gvtst(1, t); if (tok != TOK_LAND) { vseti(VT_JMPI, t); break; @@ -4412,7 +4307,7 @@ static void expr_lor(void) t = 0; save_regs(1); for(;;) { - t = gtst(0, t); + t = gvtst(0, t); if (tok != TOK_LOR) { vseti(VT_JMP, t); break; @@ -4426,8 +4321,8 @@ static void expr_lor(void) /* XXX: better constant handling */ static void expr_cond(void) { - int tt, u, r, rc, t1, t2, bt1, bt2, ret_nregs, ret_align; - SValue sv, ret; + int tt, u, r1, r2, rc, t1, t2, bt1, bt2; + SValue sv; CType type, type1, type2; if (const_wanted) { @@ -4469,13 +4364,14 @@ static void expr_cond(void) } else rc = RC_INT; - save_regs(1); + gv(rc); + save_regs(1); } if (tok == ':' && gnu_ext) { gv_dup(); - tt = gtst(1, 0); + tt = gvtst(1, 0); } else { - tt = gtst(1, 0); + tt = gvtst(1, 0); gexpr(); } type1 = vtop->type; @@ -4508,14 +4404,16 @@ static void expr_cond(void) (t2 & (VT_BTYPE | VT_UNSIGNED)) == (VT_LLONG | VT_UNSIGNED)) type.t |= VT_UNSIGNED; } else if (bt1 == VT_PTR || bt2 == VT_PTR) { - /* If one is a null ptr constant the result type is the other. */ - if (is_null_pointer (vtop)) - type = type1; - else if (is_null_pointer (&sv)) - type = type2; - /* XXX: test pointer compatibility, C99 has more elaborate rules here. */ - else - type = type1; + /* If one is a null ptr constant the result type + is the other. */ + if (is_null_pointer (vtop)) + type = type1; + else if (is_null_pointer (&sv)) + type = type2; + /* XXX: test pointer compatibility, C99 has more elaborate + rules here. */ + else + type = type1; } else if (bt1 == VT_FUNC || bt2 == VT_FUNC) { /* XXX: test function pointer compatibility */ type = bt1 == VT_FUNC ? type1 : type2; @@ -4533,35 +4431,26 @@ static void expr_cond(void) (t2 & (VT_BTYPE | VT_UNSIGNED)) == (VT_INT | VT_UNSIGNED)) type.t |= VT_UNSIGNED; } - + /* now we convert second operand */ gen_cast(&type); - ret_nregs = 0; - if (VT_STRUCT == (type.t & VT_BTYPE)){ - ret_nregs = gfunc_sret(&type, func_var, &ret.type, &ret_align); - if(ret_nregs) - vtop->type = ret.type; - else - gaddrof(); - } - - if (is_float(vtop->type.t)) { + if (VT_STRUCT == (vtop->type.t & VT_BTYPE)) + gaddrof(); + rc = RC_INT; + if (is_float(type.t)) { rc = RC_FLOAT; #ifdef TCC_TARGET_X86_64 if ((type.t & VT_BTYPE) == VT_LDOUBLE) { rc = RC_ST0; } #endif - } else - rc = RC_INT; - r = gv(rc); - rc = reg_classes[r] & ~RC_MASK; -#ifdef TCC_TARGET_X86_64 - if (((vtop->type.t & VT_BTYPE) == VT_QLONG) || ((vtop->type.t & VT_BTYPE) == VT_QFLOAT)) -#else - if ((vtop->type.t & VT_BTYPE) == VT_LLONG) -#endif - ex_rc = reg_classes[vtop->r2] & ~RC_MASK; + } else if ((type.t & VT_BTYPE) == VT_LLONG) { + /* for long longs, we use fixed registers to avoid having + to handle a complicated move */ + rc = RC_IRET; + } + + r2 = gv(rc); /* this is horrible, but we must also convert first operand */ tt = gjmp(0); @@ -4569,21 +4458,12 @@ static void expr_cond(void) /* put again first value and cast it */ *vtop = sv; gen_cast(&type); - if (VT_STRUCT == (type.t & VT_BTYPE)){ - if(ret_nregs) - vtop->type = ret.type; - else - gaddrof(); - } - gv(rc); + if (VT_STRUCT == (vtop->type.t & VT_BTYPE)) + gaddrof(); + r1 = gv(rc); + move_reg(r2, r1, type.t); + vtop->r = r2; gsym(tt); - - if (VT_STRUCT == (type.t & VT_BTYPE)){ - if(ret_nregs) - vtop->type = type; - else - vtop->r |= VT_LVAL; - } } } } @@ -4737,7 +4617,7 @@ static void block(int *bsym, int *csym, int *case_sym, int *def_sym, skip('('); gexpr(); skip(')'); - a = gtst(1, 0); + a = gvtst(1, 0); block(bsym, csym, case_sym, def_sym, case_reg, 0); c = tok; if (c == TOK_ELSE) { @@ -4754,7 +4634,7 @@ static void block(int *bsym, int *csym, int *case_sym, int *def_sym, skip('('); gexpr(); skip(')'); - a = gtst(1, 0); + a = gvtst(1, 0); b = 0; block(&a, &b, case_sym, def_sym, case_reg, 0); gjmp_addr(d); @@ -4771,7 +4651,7 @@ static void block(int *bsym, int *csym, int *case_sym, int *def_sym, frame_bottom->next = scope_stack_bottom; scope_stack_bottom = frame_bottom; llabel = local_label_stack; - + /* save VLA state */ block_vla_sp_loc = *(saved_vla_sp_loc = vla_sp_loc); if (saved_vla_sp_loc != &vla_sp_root_loc) @@ -4823,7 +4703,7 @@ static void block(int *bsym, int *csym, int *case_sym, int *def_sym, /* pop locally defined symbols */ scope_stack_bottom = scope_stack_bottom->next; sym_pop(&local_stack, s); - + /* Pop VLA frames and restore stack pointer if required */ if (saved_vla_sp_loc != &vla_sp_root_loc) *saved_vla_sp_loc = block_vla_sp_loc; @@ -4856,23 +4736,32 @@ static void block(int *bsym, int *csym, int *case_sym, int *def_sym, vstore(); } else { /* returning structure packed into registers */ - int rc; + int r, size, addr, align; + size = type_size(&func_vt,&align); + if ((vtop->r != (VT_LOCAL | VT_LVAL) || (vtop->c.i & (ret_align-1))) + && (align & (ret_align-1))) { + loc = (loc - size) & -align; + addr = loc; + type = func_vt; + vset(&type, VT_LOCAL | VT_LVAL, addr); + vswap(); + vstore(); + vset(&ret_type, VT_LOCAL | VT_LVAL, addr); + } vtop->type = ret_type; if (is_float(ret_type.t)) - rc = rc_fret(ret_type.t); - else{ - rc = RC_IRET; - ex_rc = RC_LRET; - } + r = rc_fret(ret_type.t); + else + r = RC_IRET; for (;;) { - gv(rc); + gv(r); if (--ret_nregs == 0) break; /* We assume that when a structure is returned in multiple registers, their classes are consecutive values of the suite s(n) = 2^n */ - rc <<= 1; + r <<= 1; /* XXX: compatible with arm only: ret_align == register_size */ vtop->c.i += ret_align; vtop->r = VT_LOCAL | VT_LVAL; @@ -4924,7 +4813,7 @@ static void block(int *bsym, int *csym, int *case_sym, int *def_sym, b = 0; if (tok != ';') { gexpr(); - a = gtst(1, 0); + a = gvtst(1, 0); } skip(';'); if (tok != ')') { @@ -4953,7 +4842,7 @@ static void block(int *bsym, int *csym, int *case_sym, int *def_sym, skip('('); gsym(b); gexpr(); - c = gtst(0, 0); + c = gvtst(0, 0); gsym_addr(c, d); skip(')'); gsym(a); @@ -5698,10 +5587,11 @@ static void decl_initializer_alloc(CType *type, AttributeDef *ad, int r, sec = NULL; #ifdef CONFIG_TCC_BCHECK if (tcc_state->do_bounds_check && (type->t & VT_ARRAY)) { - loc_stack(1, 1); + loc--; } #endif - addr = loc_stack(size, 1); + loc = (loc - size) & -align; + addr = loc; #ifdef CONFIG_TCC_BCHECK /* handles bounds */ /* XXX: currently, since we do only one pass, we cannot track @@ -5709,7 +5599,7 @@ static void decl_initializer_alloc(CType *type, AttributeDef *ad, int r, if (tcc_state->do_bounds_check && (type->t & VT_ARRAY)) { unsigned long *bounds_ptr; /* add padding between regions */ - loc_stack(1, 1); + loc--; /* then add local bound info */ bounds_ptr = section_ptr_add(lbounds_section, 2 * sizeof(unsigned long)); bounds_ptr[0] = addr; diff --git a/tccpe.c b/tccpe.c index f7ef99e..b972d75 100644 --- a/tccpe.c +++ b/tccpe.c @@ -1773,7 +1773,7 @@ static void pe_add_runtime(TCCState *s1, struct pe_info *pe) if (0 == s1->nostdlib) { static const char *libs[] = { - "libcrt.a", "msvcrt", "kernel32", "", "user32", "gdi32", NULL + "libtcc1.a", "msvcrt", "kernel32", "", "user32", "gdi32", NULL }; const char **pp, *p; for (pp = libs; 0 != (p = *pp); ++pp) { diff --git a/tccpp.c b/tccpp.c index 538f671..732c5ea 100644 --- a/tccpp.c +++ b/tccpp.c @@ -233,10 +233,7 @@ static TokenSym *tok_alloc_new(TokenSym **pts, const char *str, int len) ts = tcc_malloc(sizeof(TokenSym) + len); table_ident[i] = ts; ts->tok = tok_ident++; - ts->sym_define.data = tcc_malloc(sizeof(Sym**)); - ts->sym_define.off = 0; - ts->sym_define.data[0] = NULL; - ts->sym_define.size = 1; + ts->sym_define = NULL; ts->sym_label = NULL; ts->sym_struct = NULL; ts->sym_identifier = NULL; @@ -1055,62 +1052,52 @@ static int macro_is_equal(const int *a, const int *b) ST_INLN void define_push(int v, int macro_type, int *str, Sym *first_arg) { Sym *s; - CSym *def; + s = define_find(v); if (s && !macro_is_equal(s->d, str)) tcc_warning("%s redefined", get_tok_str(v, NULL)); + s = sym_push2(&define_stack, v, macro_type, 0); s->d = str; s->next = first_arg; - def = &table_ident[v - TOK_IDENT]->sym_define; - def->data[def->off] = s; + table_ident[v - TOK_IDENT]->sym_define = s; } /* undefined a define symbol. Its name is just set to zero */ ST_FUNC void define_undef(Sym *s) { int v; - CSym *def; - v = s->v - TOK_IDENT; - if ((unsigned)v < (unsigned)(tok_ident - TOK_IDENT)){ - def = &table_ident[v]->sym_define; - def->data[def->off] = NULL; - } + v = s->v; + if (v >= TOK_IDENT && v < tok_ident) + table_ident[v - TOK_IDENT]->sym_define = NULL; + s->v = 0; } ST_INLN Sym *define_find(int v) { - CSym *def; v -= TOK_IDENT; if ((unsigned)v >= (unsigned)(tok_ident - TOK_IDENT)) return NULL; - def = &table_ident[v]->sym_define; - return def->data[def->off]; + return table_ident[v]->sym_define; } /* free define stack until top reaches 'b' */ ST_FUNC void free_defines(Sym *b) { - Sym *top, *tmp; + Sym *top, *top1; int v; - CSym *def; top = define_stack; while (top != b) { - tmp = top->prev; + top1 = top->prev; /* do not free args or predefined defines */ if (top->d) tok_str_free(top->d); - v = top->v - TOK_IDENT; - if ((unsigned)v < (unsigned)(tok_ident - TOK_IDENT)){ - def = &table_ident[v]->sym_define; - if(def->off) - def->off = 0; - if(def->data[0]) - def->data[0] = NULL; - } + v = top->v; + if (v >= TOK_IDENT && v < tok_ident) + table_ident[v - TOK_IDENT]->sym_define = NULL; sym_free(top); - top = tmp; + top = top1; } define_stack = b; } @@ -1351,18 +1338,66 @@ static inline void add_cached_include(TCCState *s1, const char *filename, int if s1->cached_includes_hash[h] = s1->nb_cached_includes; } +static void pragma_parse(TCCState *s1) +{ + int val; + + next(); + if (tok == TOK_pack) { + /* + This may be: + #pragma pack(1) // set + #pragma pack() // reset to default + #pragma pack(push,1) // push & set + #pragma pack(pop) // restore previous + */ + next(); + skip('('); + if (tok == TOK_ASM_pop) { + next(); + if (s1->pack_stack_ptr <= s1->pack_stack) { + stk_error: + tcc_error("out of pack stack"); + } + s1->pack_stack_ptr--; + } else { + val = 0; + if (tok != ')') { + if (tok == TOK_ASM_push) { + next(); + if (s1->pack_stack_ptr >= s1->pack_stack + PACK_STACK_SIZE - 1) + goto stk_error; + s1->pack_stack_ptr++; + skip(','); + } + if (tok != TOK_CINT) { + pack_error: + tcc_error("invalid pack pragma"); + } + val = tokc.i; + if (val < 1 || val > 16 || (val & (val - 1)) != 0) + goto pack_error; + next(); + } + *s1->pack_stack_ptr = val; + skip(')'); + } + } +} + /* is_bof is true if first non space token at beginning of file */ ST_FUNC void preprocess(int is_bof) { TCCState *s1 = tcc_state; int i, c, n, saved_parse_flags; - uint8_t buf[1024], *p; + char buf[1024], *q; Sym *s; saved_parse_flags = parse_flags; - parse_flags = PARSE_FLAG_PREPROCESS | PARSE_FLAG_TOK_NUM | PARSE_FLAG_LINEFEED; + parse_flags = PARSE_FLAG_PREPROCESS | PARSE_FLAG_TOK_NUM | + PARSE_FLAG_LINEFEED; next_nomacro(); -redo: + redo: switch(tok) { case TOK_DEFINE: next_nomacro(); @@ -1385,21 +1420,19 @@ redo: goto read_name; } else if (ch == '\"') { c = ch; -read_name: + read_name: inp(); - p = buf; + q = buf; while (ch != c && ch != '\n' && ch != CH_EOF) { - if ((p - buf) < sizeof(buf) - 1) - *p++ = ch; + if ((q - buf) < sizeof(buf) - 1) + *q++ = ch; if (ch == '\\') { if (handle_stray_noerror() == 0) - --p; + --q; } else inp(); } - if (ch != c) - goto include_syntax; - *p = '\0'; + *q = '\0'; minp(); #if 0 /* eat all spaces and comments after include */ @@ -1437,8 +1470,6 @@ read_name: c = '>'; } } - if(!buf[0]) - tcc_error(" empty filename in #include"); if (s1->include_stack_ptr >= s1->include_stack + INCLUDE_STACK_SIZE) tcc_error("#include recursion too deep"); @@ -1505,7 +1536,8 @@ include_trynext: printf("%s: including %s\n", file->prev->filename, file->filename); #endif /* update target deps */ - dynarray_add((void ***)&s1->target_deps, &s1->nb_target_deps, tcc_strdup(buf1)); + dynarray_add((void ***)&s1->target_deps, &s1->nb_target_deps, + tcc_strdup(buf1)); /* push current file in stack */ ++s1->include_stack_ptr; /* add include file debug info */ @@ -1538,7 +1570,7 @@ include_done: file->ifndef_macro = tok; } } - c = !!define_find(tok) ^ c; + c = (define_find(tok) != 0) ^ c; do_if: if (s1->ifdef_stack_ptr >= s1->ifdef_stack + IFDEF_STACK_SIZE) tcc_error("memory full (ifdef)"); @@ -1562,12 +1594,12 @@ include_done: goto skip; c = expr_preprocess(); s1->ifdef_stack_ptr[-1] = c; -test_else: + test_else: if (s1->ifdef_stack_ptr == file->ifdef_stack_ptr + 1) file->ifndef_macro = 0; -test_skip: + test_skip: if (!(c & 1)) { -skip: + skip: preprocess_skip(); is_bof = 0; goto redo; @@ -1585,11 +1617,11 @@ skip: /* need to set to zero to avoid false matches if another #ifndef at middle of file */ file->ifndef_macro = 0; + while (tok != TOK_LINEFEED) + next_nomacro(); tok_flags |= TOK_FLAG_ENDIF; + goto the_end; } - next_nomacro(); - if (tok != TOK_LINEFEED) - tcc_warning("Ignoring: %s", get_tok_str(tok, &tokc)); break; case TOK_LINE: next(); @@ -1600,7 +1632,8 @@ skip: if (tok != TOK_LINEFEED) { if (tok != TOK_STR) tcc_error("#line"); - pstrcpy(file->filename, sizeof(file->filename), (char *)tokc.cstr->data); + pstrcpy(file->filename, sizeof(file->filename), + (char *)tokc.cstr->data); } break; case TOK_ERROR: @@ -1608,161 +1641,24 @@ skip: c = tok; ch = file->buf_ptr[0]; skip_spaces(); - p = buf; + q = buf; while (ch != '\n' && ch != CH_EOF) { - if ((p - buf) < sizeof(buf) - 1) - *p++ = ch; + if ((q - buf) < sizeof(buf) - 1) + *q++ = ch; if (ch == '\\') { if (handle_stray_noerror() == 0) - --p; + --q; } else inp(); } - *p = '\0'; + *q = '\0'; if (c == TOK_ERROR) tcc_error("#error %s", buf); else tcc_warning("#warning %s", buf); break; case TOK_PRAGMA: - next(); - if (tok == TOK_pack && parse_flags & PARSE_FLAG_PACK) { - /* - This may be: - #pragma pack(1) // set - #pragma pack() // reset to default - #pragma pack(push,1) // push & set - #pragma pack(pop) // restore previous - */ - next(); - skip('('); - if (tok == TOK_ASM_pop) { - next(); - if (s1->pack_stack_ptr <= s1->pack_stack) { -stk_error: - tcc_error("out of pack stack"); - } - s1->pack_stack_ptr--; - } else { - int val = 0; - if (tok != ')') { - if (tok == TOK_ASM_push) { - next(); - s1->pack_stack_ptr++; - if (s1->pack_stack_ptr >= s1->pack_stack + PACK_STACK_SIZE) - goto stk_error; - skip(','); - } - if (tok != TOK_CINT) { -pack_error: - tcc_error("invalid pack pragma"); - } - val = tokc.i; - if (val < 1 || val > 16) - goto pack_error; - if (val < 1 || val > 16) - tcc_error("Value must be greater than 1 is less than or equal to 16"); - if ((val & (val - 1)) != 0) - tcc_error("Value must be a power of 2 curtain"); - next(); - } - *s1->pack_stack_ptr = val; - skip(')'); - } - }else if (tok == TOK_PUSH_MACRO || tok == TOK_POP_MACRO) { - TokenSym *ts; - CSym *def; - uint8_t *p1; - int len, t; - t = tok; - ch = file->buf_ptr[0]; - skip_spaces(); - if (ch != '(') - goto macro_xxx_syntax; - /* XXX: incorrect if comments : use next_nomacro with a special mode */ - inp(); - skip_spaces(); - if (ch == '\"'){ - inp(); - p = buf; - while (ch != '\"' && ch != '\n' && ch != CH_EOF) { - if ((p - buf) < sizeof(buf) - 1) - *p++ = ch; - if (ch == CH_EOB) { - --p; - handle_stray(); - }else - inp(); - } - if(ch != '\"') - goto macro_xxx_syntax; - *p = '\0'; - minp(); - next(); - }else{ - /* computed #pragma macro_xxx for #define xxx */ - next(); - buf[0] = '\0'; - while (tok != ')') { - if (tok != TOK_STR) { - macro_xxx_syntax: - tcc_error("'macro_xxx' expects (\"NAME\")"); - } - pstrcat(buf, sizeof(buf), (char *)tokc.cstr->data); - next(); - } - } - skip (')'); - if(!buf[0]) - tcc_error(" empty string in #pragma"); - /* find TokenSym */ - p = buf; - while (is_space(*p)) - p++; - p1 = p; - for(;;){ - if (!isidnum_table[p[0] - CH_EOF]) - break; - ++p; - } - len = p - p1; - while (is_space(*p)) - p++; - if(!p) //'\0' - tcc_error("unrecognized string: %s", buf); - ts = tok_alloc(p1, len); - if(ts){ - def = &ts->sym_define; - if(t == TOK_PUSH_MACRO){ - void *tmp = def->data[def->off]; - if(tmp){ - def->off++; - if(def->off >= def->size){ - int size = def->size; - size *= 2; - if (size >= MACRO_STACK_SIZE) - tcc_error("stack full"); - def->data = tcc_realloc(def->data, size*sizeof(Sym**)); - def->size = size; - } - def->data[def->off] = tmp; - } - }else{ - if(def->off){ - --def->off; - }else{ - tcc_warning("stack empty"); - } - } - } - }else{ - fputs("#pragma ", s1->ppfp); - while (tok != TOK_LINEFEED){ - fputs(get_tok_str(tok, &tokc), s1->ppfp); - next(); - } - goto the_end; - } + pragma_parse(s1); break; default: if (tok == TOK_LINEFEED || tok == '!' || tok == TOK_PPNUM) { @@ -1782,7 +1678,7 @@ pack_error: /* ignore other preprocess commands or #! for C scripts */ while (tok != TOK_LINEFEED) next_nomacro(); -the_end: + the_end: parse_flags = saved_parse_flags; } @@ -3134,13 +3030,12 @@ ST_FUNC int tcc_preprocess(TCCState *s1) ch = file->buf_ptr[0]; tok_flags = TOK_FLAG_BOL | TOK_FLAG_BOF; parse_flags = PARSE_FLAG_ASM_COMMENTS | PARSE_FLAG_PREPROCESS | - PARSE_FLAG_LINEFEED | PARSE_FLAG_SPACES; + PARSE_FLAG_LINEFEED | PARSE_FLAG_SPACES; token_seen = 0; line_ref = 0; file_ref = NULL; iptr = s1->include_stack_ptr; - tok = TOK_LINEFEED; /* print line */ - goto print_line; + for (;;) { next(); if (tok == TOK_EOF) { @@ -3148,11 +3043,11 @@ ST_FUNC int tcc_preprocess(TCCState *s1) } else if (file != file_ref) { goto print_line; } else if (tok == TOK_LINEFEED) { - if (token_seen) + if (!token_seen) continue; ++line_ref; - token_seen = 1; - } else if (token_seen) { + token_seen = 0; + } else if (!token_seen) { d = file->line_num - line_ref; if (file != file_ref || d < 0 || d >= 8) { print_line: @@ -3160,7 +3055,8 @@ print_line: s = iptr_new > iptr ? " 1" : iptr_new < iptr ? " 2" : iptr_new > s1->include_stack ? " 3" - : ""; + : "" + ; iptr = iptr_new; fprintf(s1->ppfp, "# %d \"%s\"%s\n", file->line_num, file->filename, s); } else { @@ -3168,8 +3064,8 @@ print_line: fputs("\n", s1->ppfp), --d; } line_ref = (file_ref = file)->line_num; - token_seen = tok == TOK_LINEFEED; - if (token_seen) + token_seen = tok != TOK_LINEFEED; + if (!token_seen) continue; } fputs(get_tok_str(tok, &tokc), s1->ppfp); diff --git a/tcctok.h b/tcctok.h index c2aa040..735ccdd 100644 --- a/tcctok.h +++ b/tcctok.h @@ -138,8 +138,6 @@ /* pragma */ DEF(TOK_pack, "pack") - DEF(TOK_PUSH_MACRO, "push_macro") - DEF(TOK_POP_MACRO, "pop_macro") #if !defined(TCC_TARGET_I386) && !defined(TCC_TARGET_X86_64) /* already defined for assembler */ DEF(TOK_ASM_push, "push") @@ -287,5 +285,5 @@ #endif #if defined TCC_TARGET_I386 || defined TCC_TARGET_X86_64 -#include "asmx86-tok.h" +#include "i386-tok.h" #endif diff --git a/tests/Makefile b/tests/Makefile index 4c728eb..e3824ba 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -66,11 +66,11 @@ RUN_TCC = $(NATIVE_DEFINES) -DONE_SOURCE -run $(top_srcdir)/tcc.c $(TCCFLAGS) DISAS = objdump -d # libtcc test -ifdef LIBCRT - LIBCRT:=$(TOP)/$(LIBCRT) +ifdef LIBTCC1 + LIBTCC1:=$(TOP)/$(LIBTCC1) endif -all test : clean $(TESTS) +all test : $(TESTS) hello-exe: ../examples/ex1.c @echo ------------ $@ ------------ @@ -89,7 +89,7 @@ hello-run: ../examples/ex1.c @echo ------------ $@ ------------ $(TCC) -run $< -libtest: libtcc_test$(EXESUF) $(LIBCRT) +libtest: libtcc_test$(EXESUF) $(LIBTCC1) @echo ------------ $@ ------------ ./libtcc_test$(EXESUF) lib_path=.. @@ -101,7 +101,7 @@ moretests: $(MAKE) -C tests2 w32-prep: - cp ../libcrt.a ../lib + cp ../libtcc1.a ../lib # test.ref - generate using cc test.ref: tcctest.c @@ -210,14 +210,10 @@ abitest-cc$(EXESUF): abitest.c $(top_builddir)/$(LIBTCC) abitest-tcc$(EXESUF): abitest.c libtcc.c $(TCC) -o $@ $^ $(CPPFLAGS) $(CFLAGS) $(NATIVE_DEFINES) -DONE_SOURCE $(LIBS) $(LDFLAGS) -I$(top_srcdir) -abitest-tcc1$(EXESUF): abitest.c $(top_builddir)/$(LIBTCC) - $(CC) -o $@ $^ $(CPPFLAGS) $(CFLAGS) $(NATIVE_DEFINES) $(LIBS) $(LINK_LIBTCC) $(LDFLAGS) -I$(top_srcdir) - -abitest: abitest-cc$(EXESUF) abitest-tcc$(EXESUF) abitest-tcc1$(EXESUF) +abitest: abitest-cc$(EXESUF) abitest-tcc$(EXESUF) @echo ------------ $@ ------------ ./abitest-cc$(EXESUF) lib_path=.. include="$(top_srcdir)/include" ./abitest-tcc$(EXESUF) lib_path=.. include="$(top_srcdir)/include" - ./abitest-tcc1$(EXESUF) lib_path=.. include="$(top_srcdir)/include" vla_test$(EXESUF): vla_test.c $(TCC) -o $@ $^ $(CPPFLAGS) $(CFLAGS) @@ -244,6 +240,6 @@ cache: tcc_g clean: $(MAKE) -C tests2 $@ rm -vf *~ *.o *.a *.bin *.i *.ref *.out *.out? *.out?b *.cc \ - *-cc *-tcc *.exe *-tcc1\ + *-cc *-tcc *.exe \ hello libtcc_test vla_test tcctest[1234] ex? tcc_g tcclib.h \ - ../lib/libcrt.a + ../lib/libtcc1.a diff --git a/tests/abitest.c b/tests/abitest.c index d840d85..3ad707a 100644 --- a/tests/abitest.c +++ b/tests/abitest.c @@ -468,7 +468,7 @@ int main(int argc, char **argv) { const char *testname = NULL; int retval = EXIT_SUCCESS; - /* if tcclib.h and libcrt.a are not installed, where can we find them */ + /* if tcclib.h and libtcc1.a are not installed, where can we find them */ for (i = 1; i < argc; ++i) { if (!memcmp(argv[i], "lib_path=",9)) tccdir = argv[i] + 9; diff --git a/tests/libtcc_test.c b/tests/libtcc_test.c index 4449afa..bead0ff 100644 --- a/tests/libtcc_test.c +++ b/tests/libtcc_test.c @@ -43,7 +43,7 @@ int main(int argc, char **argv) exit(1); } - /* if tcclib.h and libcrt.a are not installed, where can we find them */ + /* if tcclib.h and libtcc1.a are not installed, where can we find them */ if (argc == 2 && !memcmp(argv[1], "lib_path=",9)) tcc_set_lib_path(s, argv[1]+9); diff --git a/tests/tcctest.c b/tests/tcctest.c index 5fac82e..cc8ffd8 100644 --- a/tests/tcctest.c +++ b/tests/tcctest.c @@ -235,7 +235,7 @@ void intdiv_test(void) void macro_test(void) { - printf("macro:\n"); + printf("macro:\n"); pf("N=%d\n", N); printf("aaa=%d\n", AAA); @@ -379,23 +379,6 @@ comment /* And again when the name and parenthes are separated by a comment. */ TEST2 /* the comment */ (); - /* macro_push and macro_pop test */ - #define MACRO_TEST "macro_test1\n" - #pragma push_macro("MACRO_TEST") - #undef MACRO_TEST - #define MACRO_TEST "macro_test2\n" - printf(MACRO_TEST); - #pragma pop_macro("MACRO_TEST") - printf(MACRO_TEST); -/* gcc does not support - #define MACRO_TEST_MACRO "MACRO_TEST" - #pragma push_macro(MACRO_TEST_MACRO) - #undef MACRO_TEST - #define MACRO_TEST "macro_test3\n" - printf(MACRO_TEST); - #pragma pop_macro(MACRO_TEST_MACRO) - printf(MACRO_TEST); -*/ } @@ -1697,6 +1680,7 @@ void prefix ## fcast(type a)\ printf("ftof: %f %f %Lf\n", fa, da, la);\ ia = (int)a;\ llia = (long long)a;\ + a = (a >= 0) ? a : -a;\ ua = (unsigned int)a;\ llua = (unsigned long long)a;\ printf("ftoi: %d %u %lld %llu\n", ia, ua, llia, llua);\ @@ -1726,18 +1710,6 @@ void prefix ## call(void)\ printf("strto%s: %f\n", #prefix, (double)strto ## prefix("1.2", NULL));\ }\ \ -void prefix ## calc(type x, type y)\ -{\ - x=x*x;y=y*y;\ - printf("%d, %d\n", (int)x, (int)y);\ - x=x-y;y=y-x;\ - printf("%d, %d\n", (int)x, (int)y);\ - x=x/y;y=y/x;\ - printf("%d, %d\n", (int)x, (int)y);\ - x=x+x;y=y+y;\ - printf("%d, %d\n", (int)x, (int)y);\ -}\ -\ void prefix ## signed_zeros(void) \ {\ type x = 0.0, y = -0.0, n, p;\ @@ -1760,7 +1732,7 @@ void prefix ## signed_zeros(void) \ 1.0 / x != 1.0 / p);\ else\ printf ("x != +y; this is wrong!\n");\ - p = -y;\ + p = -y;\ if (x == p)\ printf ("Test 1.0 / x != 1.0 / -y returns %d (should be 0).\n",\ 1.0 / x != 1.0 / p);\ @@ -1776,8 +1748,7 @@ void prefix ## test(void)\ prefix ## fcast(234.6);\ prefix ## fcast(-2334.6);\ prefix ## call();\ - prefix ## calc(1, 1.0000000000000001);\ - prefix ## signed_zeros();\ + prefix ## signed_zeros();\ } FTEST(f, float, float, "%f") @@ -2184,15 +2155,14 @@ void whitespace_test(void) { char *str; - -#if 1 + #if 1 pri\ -ntf("whitspace:\n"); +ntf("whitspace:\n"); #endif pf("N=%d\n", 2); #ifdef CORRECT_CR_HANDLING - pri\ + pri\ ntf("aaa=%d\n", 3); #endif @@ -2204,12 +2174,11 @@ ntf("min=%d\n", 4); printf("len1=%d\n", strlen(" ")); #ifdef CORRECT_CR_HANDLING - str = " + str = " "; printf("len1=%d str[0]=%d\n", strlen(str), str[0]); #endif - printf("len1=%d\n", strlen(" -a + printf("len1=%d\n", strlen(" a ")); #endif /* ACCEPT_CR_IN_STRINGS */ } @@ -2603,6 +2572,7 @@ int constant_p_var; void builtin_test(void) { +#if GCC_MAJOR >= 3 COMPAT_TYPE(int, int); COMPAT_TYPE(int, unsigned int); COMPAT_TYPE(int, char); @@ -2612,9 +2582,9 @@ void builtin_test(void) COMPAT_TYPE(int *, void *); COMPAT_TYPE(int *, const int *); COMPAT_TYPE(char *, unsigned char *); - COMPAT_TYPE(char, unsigned char); /* space is needed because tcc preprocessor introduces a space between each token */ - COMPAT_TYPE(char **, void *); + COMPAT_TYPE(char * *, void *); +#endif printf("res = %d\n", __builtin_constant_p(1)); printf("res = %d\n", __builtin_constant_p(1 + 2)); printf("res = %d\n", __builtin_constant_p(&constant_p_var)); diff --git a/win32/tools/tiny_libmaker.c b/win32/tools/tiny_libmaker.c index 29a692a..62d2a2e 100644 --- a/win32/tools/tiny_libmaker.c +++ b/win32/tools/tiny_libmaker.c @@ -1,5 +1,5 @@ /* - * This program is for making libcrt.a without ar + * This program is for making libtcc1.a without ar * tiny_libmaker - tiny elf lib maker * usage: tiny_libmaker [lib] files... * Copyright (c) 2007 Timppa diff --git a/x86_64-gen.c b/x86_64-gen.c index 2b52bb1..ae65328 100644 --- a/x86_64-gen.c +++ b/x86_64-gen.c @@ -29,38 +29,28 @@ /* a register can belong to several classes. The classes must be sorted from more general to more precise (see gv2() code which does assumptions on it). */ -#define RC_INT 0x0001 /* generic integer register */ -#define RC_FLOAT 0x0002 /* generic float register */ -#define RC_RAX 0x0004 -#define RC_RCX 0x0008 -#define RC_RDX 0x0010 -#define RC_ST0 0x0020 /* only for long double */ -#define RC_R8 0x0040 -#define RC_R9 0x0080 -#define RC_XMM0 0x0100 -#define RC_XMM1 0x0200 -#define RC_XMM2 0x0400 -#define RC_XMM3 0x0800 -#define RC_XMM4 0x1000 -#define RC_XMM5 0x2000 -#define RC_XMM6 0x4000 -#define RC_XMM7 0x8000 -#define RC_RSI 0x10000 -#define RC_RDI 0x20000 -#define RC_INT1 0x40000 /* function_pointer */ -#define RC_INT2 0x80000 -#define RC_RBX 0x100000 -#define RC_R10 0x200000 -#define RC_R11 0x400000 -#define RC_R12 0x800000 -#define RC_R13 0x1000000 -#define RC_R14 0x2000000 -#define RC_R15 0x4000000 -#define RC_IRET RC_RAX /* function return: integer register */ -#define RC_LRET RC_RDX /* function return: second integer register */ -#define RC_FRET RC_XMM0 /* function return: float register */ -#define RC_QRET RC_XMM1 /* function return: second float register */ -#define RC_MASK (RC_INT|RC_INT1|RC_INT2|RC_FLOAT) +#define RC_INT 0x0001 /* generic integer register */ +#define RC_FLOAT 0x0002 /* generic float register */ +#define RC_RAX 0x0004 +#define RC_RCX 0x0008 +#define RC_RDX 0x0010 +#define RC_ST0 0x0080 /* only for long double */ +#define RC_R8 0x0100 +#define RC_R9 0x0200 +#define RC_R10 0x0400 +#define RC_R11 0x0800 +#define RC_XMM0 0x1000 +#define RC_XMM1 0x2000 +#define RC_XMM2 0x4000 +#define RC_XMM3 0x8000 +#define RC_XMM4 0x10000 +#define RC_XMM5 0x20000 +#define RC_XMM6 0x40000 +#define RC_XMM7 0x80000 +#define RC_IRET RC_RAX /* function return: integer register */ +#define RC_LRET RC_RDX /* function return: second integer register */ +#define RC_FRET RC_XMM0 /* function return: float register */ +#define RC_QRET RC_XMM1 /* function return: second float register */ /* pretty names for the registers */ enum { @@ -68,7 +58,6 @@ enum { TREG_RCX = 1, TREG_RDX = 2, TREG_RSP = 4, - TREG_ST0 = 5, TREG_RSI = 6, TREG_RDI = 7, @@ -86,11 +75,13 @@ enum { TREG_XMM6 = 22, TREG_XMM7 = 23, + TREG_ST0 = 24, + + TREG_MEM = 0x20, }; #define REX_BASE(reg) (((reg) >> 3) & 1) #define REG_VALUE(reg) ((reg) & 7) -#define FLAG_GOT 0X01 /* return registers for function */ #define REG_IRET TREG_RAX /* single word int return register */ @@ -131,30 +122,34 @@ enum { #include ST_DATA const int reg_classes[NB_REGS] = { - /* eax */ RC_INT|RC_RAX|RC_INT2, - /* ecx */ RC_INT|RC_RCX|RC_INT2, - /* edx */ RC_INT|RC_RDX, - RC_INT|RC_INT1|RC_INT2|RC_RBX, + /* eax */ RC_INT | RC_RAX, + /* ecx */ RC_INT | RC_RCX, + /* edx */ RC_INT | RC_RDX, 0, - /* st0 */ RC_ST0, - RC_RSI|RC_INT2, - RC_RDI|RC_INT2, - RC_INT|RC_R8|RC_INT2, - RC_INT|RC_R9|RC_INT2, - RC_INT|RC_INT1|RC_INT2|RC_R10, - RC_INT|RC_INT1|RC_INT2|RC_R11, - RC_INT|RC_INT1|RC_INT2|RC_R12, - RC_INT|RC_INT1|RC_INT2|RC_R13, - RC_INT|RC_INT1|RC_INT2|RC_R14, - RC_INT|RC_INT1|RC_INT2|RC_R15, - /* xmm0 */ RC_FLOAT | RC_XMM0, - RC_FLOAT|RC_XMM1, - RC_FLOAT|RC_XMM2, - RC_FLOAT|RC_XMM3, - RC_FLOAT|RC_XMM4, - RC_FLOAT|RC_XMM5, - RC_FLOAT|RC_XMM6, - RC_FLOAT|RC_XMM7, + 0, + 0, + 0, + 0, + RC_R8, + RC_R9, + RC_R10, + RC_R11, + 0, + 0, + 0, + 0, + /* xmm0 */ RC_FLOAT | RC_XMM0, + /* xmm1 */ RC_FLOAT | RC_XMM1, + /* xmm2 */ RC_FLOAT | RC_XMM2, + /* xmm3 */ RC_FLOAT | RC_XMM3, + /* xmm4 */ RC_FLOAT | RC_XMM4, + /* xmm5 */ RC_FLOAT | RC_XMM5, + /* xmm6 an xmm7 are included so gv() can be used on them, + but they are not tagged with RC_FLOAT because they are + callee saved on Windows */ + RC_XMM6, + RC_XMM7, + /* st0 */ RC_ST0 }; static unsigned long func_sub_sp_offset; @@ -309,19 +304,18 @@ static void gen_gotpcrel(int r, Sym *sym, int c) } } -static void gen_modrm_impl(int op_reg, int fr, Sym *sym, int c, int flag) +static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got) { - int r = fr & VT_VALMASK; op_reg = REG_VALUE(op_reg) << 3; - if (r == VT_CONST) { + if ((r & VT_VALMASK) == VT_CONST) { /* constant memory reference */ o(0x05 | op_reg); - if (flag & FLAG_GOT) { - gen_gotpcrel(fr, sym, c); + if (is_got) { + gen_gotpcrel(r, sym, c); } else { - gen_addrpc32(fr, sym, c); + gen_addrpc32(r, sym, c); } - } else if (r == VT_LOCAL) { + } else if ((r & VT_VALMASK) == VT_LOCAL) { /* currently, we use only ebp as base */ if (c == (char)c) { /* short reference */ @@ -330,23 +324,15 @@ static void gen_modrm_impl(int op_reg, int fr, Sym *sym, int c, int flag) } else { oad(0x85 | op_reg, c); } - } else if (c) { - if (c == (char)c) { - /* short reference */ - g(0x40 | op_reg | REG_VALUE(fr)); - if(r == TREG_RSP) - g(0x24); - g(c); + } else if ((r & VT_VALMASK) >= TREG_MEM) { + if (c) { + g(0x80 | op_reg | REG_VALUE(r)); + gen_le32(c); } else { - g(0x80 | op_reg | REG_VALUE(fr)); - if(r == TREG_RSP) - g(0x24); - gen_le32(c); + g(0x00 | op_reg | REG_VALUE(r)); } } else { - g(0x00 | op_reg | REG_VALUE(fr)); - if(r == TREG_RSP) - g(0x24); + g(0x00 | op_reg | REG_VALUE(r)); } } @@ -361,18 +347,17 @@ static void gen_modrm(int op_reg, int r, Sym *sym, int c) opcode bits */ static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c) { - int flag; - if((op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC)) - flag = FLAG_GOT; + int is_got; + is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC); orex(1, r, op_reg, opcode); - gen_modrm_impl(op_reg, r, sym, c, flag); + gen_modrm_impl(op_reg, r, sym, c, is_got); } /* load 'r' from value 'sv' */ void load(int r, SValue *sv) { - int v, t, ft, fc, fr, ll; + int v, t, ft, fc, fr; SValue v1; #ifdef TCC_TARGET_PE @@ -383,21 +368,19 @@ void load(int r, SValue *sv) fr = sv->r; ft = sv->type.t & ~VT_DEFSIGN; fc = sv->c.ul; - ll = is64_type(ft); #ifndef TCC_TARGET_PE /* we use indirect access via got */ if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) && (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) { /* use the result register as a temporal register */ - int tr; + int tr = r | TREG_MEM; if (is_float(ft)) { /* we cannot use float registers as a temporal register */ tr = get_reg(RC_INT) | TREG_MEM; - }else{ - tr = r | TREG_MEM; - } + } gen_modrm64(0x8b, tr, fr, sv->sym, 0); + /* load from the temporal register */ fr = tr | VT_LVAL; } @@ -405,14 +388,7 @@ void load(int r, SValue *sv) v = fr & VT_VALMASK; if (fr & VT_LVAL) { - if(fr & VT_TMP){ - int size, align; - if((ft & VT_BTYPE) == VT_FUNC) - size = PTR_SIZE; - else - size = type_size(&sv->type, &align); - loc_stack(size, 0); - } + int b, ll; if (v == VT_LLOCAL) { v1.type.t = VT_PTR; v1.r = VT_LOCAL | VT_LVAL; @@ -421,13 +397,14 @@ void load(int r, SValue *sv) if (!(reg_classes[fr] & RC_INT)) fr = get_reg(RC_INT); load(fr, &v1); - fc = 0; } - int b; + ll = 0; if ((ft & VT_BTYPE) == VT_FLOAT) { - b = 0x100ff3; /* movss */ + b = 0x6e0f66; + r = REG_VALUE(r); /* movd */ } else if ((ft & VT_BTYPE) == VT_DOUBLE) { - b = 0x100ff2; /* movds */ + b = 0x7e0ff3; /* movq */ + r = REG_VALUE(r); } else if ((ft & VT_BTYPE) == VT_LDOUBLE) { b = 0xdb, r = 5; /* fldt */ } else if ((ft & VT_TYPE) == VT_BYTE || (ft & VT_TYPE) == VT_BOOL) { @@ -439,13 +416,18 @@ void load(int r, SValue *sv) } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) { b = 0xb70f; /* movzwl */ } else { - assert(((ft & VT_BTYPE) == VT_INT) || ((ft & VT_BTYPE) == VT_LLONG) + assert(((ft & VT_BTYPE) == VT_INT) || ((ft & VT_BTYPE) == VT_LLONG) || ((ft & VT_BTYPE) == VT_PTR) || ((ft & VT_BTYPE) == VT_ENUM) || ((ft & VT_BTYPE) == VT_FUNC)); + ll = is64_type(ft); b = 0x8b; } - orex(ll, fr, r, b); - gen_modrm(r, fr, sv->sym, fc); + if (ll) { + gen_modrm64(b, r, fr, sv->sym, fc); + } else { + orex(ll, fr, r, b); + gen_modrm(r, fr, sv->sym, fc); + } } else { if (v == VT_CONST) { if (fr & VT_SYM) { @@ -464,33 +446,33 @@ void load(int r, SValue *sv) gen_gotpcrel(r, sv->sym, fc); } #endif + } else if (is64_type(ft)) { + orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */ + gen_le64(sv->c.ull); } else { - orex(ll,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */ - if (ll) - gen_le64(sv->c.ull); - else - gen_le32(fc); - } + orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */ + gen_le32(fc); + } } else if (v == VT_LOCAL) { orex(1,0,r,0x8d); /* lea xxx(%ebp), r */ gen_modrm(r, VT_LOCAL, sv->sym, fc); } else if (v == VT_CMP) { - orex(0, r, 0, 0xb8 + REG_VALUE(r)); - if ((fc & ~0x100) == TOK_NE){ - gen_le32(1);/* mov $0, r */ - }else{ - gen_le32(0);/* mov $1, r */ - } - if (fc & 0x100){ - fc &= ~0x100; - /* This was a float compare. If the parity bit is - set the result was unordered, meaning false for everything - except TOK_NE, and true for TOK_NE. */ - o(0x037a + (REX_BASE(r) << 8));/* jp 3*/ - } - orex(0,r,0, 0x0f); /* setxx %br */ - o(fc); - o(0xc0 + REG_VALUE(r)); + orex(0,r,0,0); + if ((fc & ~0x100) != TOK_NE) + oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */ + else + oad(0xb8 + REG_VALUE(r), 1); /* mov $1, r */ + if (fc & 0x100) + { + /* This was a float compare. If the parity bit is + set the result was unordered, meaning false for everything + except TOK_NE, and true for TOK_NE. */ + fc &= ~0x100; + o(0x037a + (REX_BASE(r) << 8)); + } + orex(0,r,0, 0x0f); /* setxx %br */ + o(fc); + o(0xc0 + REG_VALUE(r)); } else if (v == VT_JMP || v == VT_JMPI) { t = v & 1; orex(0,r,0,0); @@ -500,89 +482,117 @@ void load(int r, SValue *sv) orex(0,r,0,0); oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */ } else if (v != r) { - if (reg_classes[r] & RC_FLOAT) { - if(v == TREG_ST0){ - /* gen_cvt_ftof(VT_DOUBLE); */ - o(0xf0245cdd); /* fstpl -0x10(%rsp) */ - /* movsd -0x10(%rsp),%xmm0 */ - o(0x100ff2); - o(0xf02444 + REG_VALUE(r)*8); - }else if(reg_classes[v] & RC_FLOAT){ - o(0x7e0ff3); - o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8); - }else - assert(0); + if ((r >= TREG_XMM0) && (r <= TREG_XMM7)) { + if (v == TREG_ST0) { + /* gen_cvt_ftof(VT_DOUBLE); */ + o(0xf0245cdd); /* fstpl -0x10(%rsp) */ + /* movsd -0x10(%rsp),%xmmN */ + o(0x100ff2); + o(0x44 + REG_VALUE(r)*8); /* %xmmN */ + o(0xf024); + } else { + assert((v >= TREG_XMM0) && (v <= TREG_XMM7)); + if ((ft & VT_BTYPE) == VT_FLOAT) { + o(0x100ff3); + } else { + assert((ft & VT_BTYPE) == VT_DOUBLE); + o(0x100ff2); + } + o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8); + } } else if (r == TREG_ST0) { - assert(reg_classes[v] & RC_FLOAT); + assert((v >= TREG_XMM0) && (v <= TREG_XMM7)); /* gen_cvt_ftof(VT_LDOUBLE); */ - /* movsd %xmm0,-0x10(%rsp) */ - o(0x110ff2); - o(0xf02444 + REG_VALUE(v)*8); + /* movsd %xmmN,-0x10(%rsp) */ + o(0x110ff2); + o(0x44 + REG_VALUE(r)*8); /* %xmmN */ + o(0xf024); o(0xf02444dd); /* fldl -0x10(%rsp) */ } else { - if(fc){ - orex(1,fr,r,0x8d); /* lea xxx(%ebp), r */ - gen_modrm(r, fr, sv->sym, fc); - }else{ - orex(ll,v,r, 0x8b); - o(0xc0 + REG_VALUE(v) + REG_VALUE(r) * 8); /* mov v, r */ - } + orex(1,r,v, 0x89); + o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */ } } } } /* store register 'r' in lvalue 'v' */ -void store(int r, SValue *sv) +void store(int r, SValue *v) { - int fr, bt, ft, fc, ll, v; + int fr, bt, ft, fc; + int op64 = 0; + /* store the REX prefix in this variable when PIC is enabled */ + int pic = 0; #ifdef TCC_TARGET_PE SValue v2; - sv = pe_getimport(sv, &v2); + v = pe_getimport(v, &v2); #endif - ft = sv->type.t & ~VT_DEFSIGN; - fc = sv->c.ul; - fr = sv->r; - bt = ft & VT_BTYPE; - ll = is64_type(ft); - v = fr & VT_VALMASK; -//#ifndef TCC_TARGET_PE + ft = v->type.t; + fc = v->c.ul; + fr = v->r & VT_VALMASK; + bt = ft & VT_BTYPE; + +#ifndef TCC_TARGET_PE /* we need to access the variable via got */ - // if (fr == VT_CONST && (v->r & VT_SYM)) { + if (fr == VT_CONST && (v->r & VT_SYM)) { /* mov xx(%rip), %r11 */ - // o(0x1d8b4c); - // gen_gotpcrel(TREG_R11, v->sym, v->c.ul); - //pic = is64_type(bt) ? 0x49 : 0x41; - // } -//#endif + o(0x1d8b4c); + gen_gotpcrel(TREG_R11, v->sym, v->c.ul); + pic = is64_type(bt) ? 0x49 : 0x41; + } +#endif /* XXX: incorrect if float reg to reg */ if (bt == VT_FLOAT) { - orex(0, fr, r, 0x110ff3); /* movss */ + o(0x66); + o(pic); + o(0x7e0f); /* movd */ + r = REG_VALUE(r); } else if (bt == VT_DOUBLE) { - orex(0, fr, r, 0x110ff2);/* movds */ + o(0x66); + o(pic); + o(0xd60f); /* movq */ + r = REG_VALUE(r); } else if (bt == VT_LDOUBLE) { o(0xc0d9); /* fld %st(0) */ - orex(0, fr, r, 0xdb);/* fstpt */ + o(pic); + o(0xdb); /* fstpt */ r = 7; } else { if (bt == VT_SHORT) o(0x66); - if (bt == VT_BYTE || bt == VT_BOOL) - orex(ll, fr, r, 0x88); - else{ - orex(ll, fr, r, 0x89); - } + o(pic); + if (bt == VT_BYTE || bt == VT_BOOL) + orex(0, 0, r, 0x88); + else if (is64_type(bt)) + op64 = 0x89; + else + orex(0, 0, r, 0x89); + } + if (pic) { + /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */ + if (op64) + o(op64); + o(3 + (r << 3)); + } else if (op64) { + if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) { + gen_modrm64(op64, r, v->r, v->sym, fc); + } else if (fr != r) { + /* XXX: don't we really come here? */ + abort(); + o(0xc0 + fr + r * 8); /* mov r, fr */ + } + } else { + if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) { + gen_modrm(r, v->r, v->sym, fc); + } else if (fr != r) { + /* XXX: don't we really come here? */ + abort(); + o(0xc0 + fr + r * 8); /* mov r, fr */ + } } - if (v == VT_CONST || v == VT_LOCAL || (fr & VT_LVAL)) { - gen_modrm(r, fr, sv->sym, fc); - } else if (v != r) { - /* XXX: don't we really come here? */ - abort(); - o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8); /* mov r, fr */ - } } /* 'is_jmp' is '1' if it is a jump */ @@ -603,76 +613,14 @@ static void gcall_or_jmp(int is_jmp) oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */ } else { /* otherwise, indirect call */ - r = get_reg(RC_INT1); + r = TREG_R11; load(r, vtop); - orex(0, r, 0, 0xff); /* REX call/jmp *r */ + o(0x41); /* REX */ + o(0xff); /* call/jmp *r */ o(0xd0 + REG_VALUE(r) + (is_jmp << 4)); } } -static int func_scratch; -static int r_loc; - -int reloc_add(int inds) -{ - return psym(0, inds); -} - -void reloc_use(int t, int data) -{ - int *ptr; - while (t) { - ptr = (int *)(cur_text_section->data + t); - t = *ptr; /* next value */ - *ptr = data; - } -} - -void struct_copy(SValue *d, SValue *s, SValue *c) -{ - if(!c->c.i) - return; - save_reg(TREG_RCX); - load(TREG_RCX, c); - load(TREG_RDI, d); - load(TREG_RSI, s); - o(0xa4f3);// rep movsb -} - -void gen_putz(SValue *d, int size) -{ - if(!size) - return; - save_reg(TREG_RAX); - o(0xb0); - g(0x00); - save_reg(TREG_RCX); - o(0xb8 + REG_VALUE(TREG_RCX)); /* mov $xx, r */ - gen_le32(size); - load(TREG_RDI, d); - o(0xaaf3);//rep stos -} - -/* Generate function call. The function address is pushed first, then - all the parameters in call order. This functions pops all the - parameters and the function address. */ -void gen_offs_sp(int b, int r, int off) -{ - if(r & 0x100) - o(b); - else - orex(1, 0, r, b); - if(!off){ - o(0x2404 | (REG_VALUE(r) << 3)); - }else if (off == (char)off) { - o(0x2444 | (REG_VALUE(r) << 3)); - g(off); - } else { - o(0x2484 | (REG_VALUE(r) << 3)); - gen_le32(off); - } -} - #ifdef TCC_TARGET_PE #define REGN 4 @@ -690,6 +638,24 @@ static int arg_prepare_reg(int idx) { return arg_regs[idx]; } +static int func_scratch; + +/* Generate function call. The function address is pushed first, then + all the parameters in call order. This functions pops all the + parameters and the function address. */ + +void gen_offs_sp(int b, int r, int d) +{ + orex(1,0,r & 0x100 ? 0 : r, b); + if (d == (char)d) { + o(0x2444 | (REG_VALUE(r) << 3)); + g(d); + } else { + o(0x2484 | (REG_VALUE(r) << 3)); + gen_le32(d); + } +} + /* Return the number of registers needed to return the struct, or 0 if returning via struct pointer. */ ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align) @@ -853,8 +819,9 @@ void gfunc_prolog(CType *func_type) Sym *sym; CType *type; - func_ret_sub = func_scratch = r_loc = 0; - pop_stack = loc = 0; + func_ret_sub = 0; + func_scratch = 0; + loc = 0; addr = PTR_SIZE * 2; ind += FUNC_PROLOG_SIZE; @@ -928,7 +895,7 @@ void gfunc_epilog(void) ind = func_sub_sp_offset - FUNC_PROLOG_SIZE; /* align local size to word & save local variables */ v = (func_scratch + -loc + 15) & -16; - reloc_use(r_loc, func_scratch); + if (v >= 4096) { Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0); oad(0xb8, v); /* mov stacksize, %eax */ @@ -948,6 +915,16 @@ void gfunc_epilog(void) #else +static void gadd_sp(int val) +{ + if (val == (char)val) { + o(0xc48348); + g(val); + } else { + oad(0xc48148, val); /* add $xxx, %rsp */ + } +} + typedef enum X86_64_Mode { x86_64_mode_none, x86_64_mode_memory, @@ -986,14 +963,12 @@ static X86_64_Mode classify_x86_64_inner(CType *ty) case VT_BYTE: case VT_SHORT: case VT_LLONG: - case VT_QLONG: case VT_BOOL: case VT_PTR: case VT_FUNC: case VT_ENUM: return x86_64_mode_integer; case VT_FLOAT: - case VT_QFLOAT: case VT_DOUBLE: return x86_64_mode_sse; case VT_LDOUBLE: return x86_64_mode_x87; @@ -1004,7 +979,7 @@ static X86_64_Mode classify_x86_64_inner(CType *ty) // Detect union if (f->next && (f->c == f->next->c)) return x86_64_mode_memory; - + mode = x86_64_mode_none; for (f = f->next; f; f = f->next) mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type)); @@ -1022,7 +997,7 @@ static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *p if (ty->t & (VT_BITFIELD|VT_ARRAY)) { *psize = 8; - *palign = 8; + *palign = 8; *reg_count = 1; ret_t = ty->t; mode = x86_64_mode_integer; @@ -1033,7 +1008,6 @@ static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *p if (size > 16) { mode = x86_64_mode_memory; - ret_t = ty->t; } else { mode = classify_x86_64_inner(ty); switch (mode) { @@ -1042,22 +1016,16 @@ static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *p *reg_count = 2; ret_t = VT_QLONG; } else { - *reg_count = 1; - if(size > 4) - ret_t = VT_LLONG; - else if(size > 2){ - ret_t = VT_INT; - }else if(size > 1) - ret_t = VT_SHORT; - else - ret_t = VT_BYTE; - } - ret_t |= (ty->t & VT_UNSIGNED); + *reg_count = 1; + ret_t = (size > 4) ? VT_LLONG : VT_INT; + } break; + case x86_64_mode_x87: *reg_count = 1; ret_t = VT_LDOUBLE; break; + case x86_64_mode_sse: if (size > 8) { *reg_count = 2; @@ -1067,15 +1035,13 @@ static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *p ret_t = (size > 4) ? VT_DOUBLE : VT_FLOAT; } break; - default: - ret_t = ty->t; - break; /* nothing to be done for x86_64_mode_memory and x86_64_mode_none*/ + default: break; /* nothing to be done for x86_64_mode_memory and x86_64_mode_none*/ } } } if (ret) { - ret->ref = ty->ref; + ret->ref = NULL; ret->t = ret_t; } @@ -1086,13 +1052,12 @@ ST_FUNC int classify_x86_64_va_arg(CType *ty) { /* This definition must be synced with stdarg.h */ enum __va_arg_type { - __va_gen_reg, __va_float_reg, __va_ld_reg, __va_stack - }; + __va_gen_reg, __va_float_reg, __va_stack + }; int size, align, reg_count; X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &align, ®_count); switch (mode) { default: return __va_stack; - case x86_64_mode_x87: return __va_ld_reg; case x86_64_mode_integer: return __va_gen_reg; case x86_64_mode_sse: return __va_float_reg; } @@ -1112,21 +1077,26 @@ static const uint8_t arg_regs[REGN] = { TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9 }; +static int arg_prepare_reg(int idx) { + if (idx == 2 || idx == 3) + /* idx=2: r10, idx=3: r11 */ + return idx + 8; + else + return arg_regs[idx]; +} + /* Generate function call. The function address is pushed first, then all the parameters in call order. This functions pops all the parameters and the function address. */ void gfunc_call(int nb_args) { - X86_64_Mode mode; - int size, align, args_size, s, e, i, reg_count; + X86_64_Mode mode; + CType type; + int size, align, r, args_size, stack_adjust, run_start, run_end, i, reg_count; int nb_reg_args = 0; int nb_sse_args = 0; - int gen_reg, sse_reg; - CType type; + int sse_reg, gen_reg; - /* fetch cpu flag before the following sub will change the value */ - if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP) - gv(RC_INT); /* calculate the number of integer/float register arguments */ for(i = 0; i < nb_args; i++) { mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count); @@ -1136,197 +1106,260 @@ void gfunc_call(int nb_args) nb_reg_args += reg_count; } + /* arguments are collected in runs. Each run is a collection of 8-byte aligned arguments + and ended by a 16-byte aligned argument. This is because, from the point of view of + the callee, argument alignment is computed from the bottom up. */ + /* for struct arguments, we need to call memcpy and the function + call breaks register passing arguments we are preparing. + So, we process arguments which will be passed by stack first. */ + gen_reg = nb_reg_args; + sse_reg = nb_sse_args; + run_start = 0; args_size = 0; - gen_reg = nb_reg_args; - sse_reg = nb_sse_args; - /* for struct arguments, we need to call memcpy and the function - call breaks register passing arguments we are preparing. - So, we process arguments which will be passed by stack first. */ - for(i = 0; i < nb_args; i++) { - mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count); - switch (mode) { - case x86_64_mode_x87: - if((vtop[-i].type.t & VT_BTYPE) == VT_STRUCT) - goto stack_arg1; - else - args_size = (args_size + 15) & ~15; - case x86_64_mode_memory: - stack_arg1: - args_size += size; - break; - case x86_64_mode_sse: - sse_reg -= reg_count; - if (sse_reg + reg_count > 8) - goto stack_arg1; - break; - case x86_64_mode_integer: - gen_reg -= reg_count; - if (gen_reg + reg_count > REGN) - goto stack_arg1; - break; - default: break; /* nothing to be done for x86_64_mode_none */ - } - } - - args_size = (args_size + 15) & ~15; - if (func_scratch < args_size) - func_scratch = args_size; - - gen_reg = nb_reg_args; - sse_reg = nb_sse_args; - for(s = e = 0; s < nb_args; s = e){ - int run_gen, run_sse, st_size; - run_gen = gen_reg; - run_sse = sse_reg; - st_size = 0; - for(i = s; i < nb_args; i++) { - mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count); - switch (mode) { - case x86_64_mode_x87: - if((vtop[-i].type.t & VT_BTYPE) == VT_STRUCT){ - goto stack_arg2; - }else{ - ++i; - goto doing; - } - case x86_64_mode_memory: - stack_arg2: - st_size += size; - break; - case x86_64_mode_sse: - sse_reg -= reg_count; - if (sse_reg + reg_count > 8) - goto stack_arg2; - break; - case x86_64_mode_integer: - gen_reg -= reg_count; - if (gen_reg + reg_count > REGN) - goto stack_arg2; - break; - default: break; /* nothing to be done for x86_64_mode_none */ - } - } -doing: - e = i; - st_size = -st_size & 15;// 16 - (size & 15) - if(st_size) - args_size -= st_size; - - gen_reg = run_gen; - sse_reg = run_sse; - for(i = s; i < e; i++) { - SValue tmp; - /* Swap argument to top, it will possibly be changed here, - and might use more temps. All arguments must remain on the - stack, so that get_reg can correctly evict some of them onto - stack. We could use also use a vrott(nb_args) at the end - of this loop, but this seems faster. */ - if(i != 0){ - tmp = vtop[0]; - vtop[0] = vtop[-i]; - vtop[-i] = tmp; - } - - mode = classify_x86_64_arg(&vtop->type, &type, &size, &align, ®_count); - switch (mode) { - case x86_64_mode_x87: - /* Must ensure TREG_ST0 only */ - if((vtop->type.t & VT_BTYPE) == VT_STRUCT){ - vdup(); - vtop[-1].r = VT_CONST; - vtop->type = type; - gv(RC_ST0); - args_size -= size; - gen_offs_sp(0xdb, 0x107, args_size); - vtop--;//Release TREG_ST0 - }else{ - gv(RC_ST0); - args_size -= size; - gen_offs_sp(0xdb, 0x107, args_size); - vtop->r = VT_CONST;//Release TREG_ST0 - } + while (run_start != nb_args) { + int run_gen_reg = gen_reg, run_sse_reg = sse_reg; + + run_end = nb_args; + stack_adjust = 0; + for(i = run_start; (i < nb_args) && (run_end == nb_args); i++) { + mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count); + switch (mode) { + case x86_64_mode_memory: + case x86_64_mode_x87: + stack_arg: + if (align == 16) + run_end = i; + else + stack_adjust += size; break; - case x86_64_mode_memory: - args_size -= size; - vset(&char_pointer_type, TREG_RSP, args_size);/* generate memcpy RSP */ - vpushv(&vtop[-1]); - vtop->type = char_pointer_type; - gaddrof(); - vpushs(size); - struct_copy(&vtop[-2], &vtop[-1], &vtop[0]); - vtop -= 3; - break; - case x86_64_mode_sse: - sse_reg -= reg_count; - if (sse_reg + reg_count > 8){ - args_size -= size; - goto gen_code; - } - break; - case x86_64_mode_integer: - gen_reg -= reg_count; - if (gen_reg + reg_count > REGN){ - args_size -= size; - gen_code: - vset(&type, TREG_RSP | VT_LVAL, args_size); - vpushv(&vtop[-1]); - vtop->type = type; - vstore(); - vtop--; - } - break; - default: break; /* nothing to be done for x86_64_mode_none */ - } - if(i != 0){ - tmp = vtop[0]; - vtop[0] = vtop[-i]; - vtop[-i] = tmp; - } - } - run_gen = gen_reg; - run_sse = sse_reg; - } + + case x86_64_mode_sse: + sse_reg -= reg_count; + if (sse_reg + reg_count > 8) goto stack_arg; + break; + + case x86_64_mode_integer: + gen_reg -= reg_count; + if (gen_reg + reg_count > REGN) goto stack_arg; + break; + default: break; /* nothing to be done for x86_64_mode_none */ + } + } + + gen_reg = run_gen_reg; + sse_reg = run_sse_reg; + + /* adjust stack to align SSE boundary */ + if (stack_adjust &= 15) { + /* fetch cpu flag before the following sub will change the value */ + if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP) + gv(RC_INT); - gen_reg = nb_reg_args; - sse_reg = nb_sse_args; + stack_adjust = 16 - stack_adjust; + o(0x48); + oad(0xec81, stack_adjust); /* sub $xxx, %rsp */ + args_size += stack_adjust; + } + + for(i = run_start; i < run_end;) { + /* Swap argument to top, it will possibly be changed here, + and might use more temps. At the end of the loop we keep + in on the stack and swap it back to its original position + if it is a register. */ + SValue tmp = vtop[0]; + vtop[0] = vtop[-i]; + vtop[-i] = tmp; + + mode = classify_x86_64_arg(&vtop->type, NULL, &size, &align, ®_count); + + int arg_stored = 1; + switch (vtop->type.t & VT_BTYPE) { + case VT_STRUCT: + if (mode == x86_64_mode_sse) { + if (sse_reg > 8) + sse_reg -= reg_count; + else + arg_stored = 0; + } else if (mode == x86_64_mode_integer) { + if (gen_reg > REGN) + gen_reg -= reg_count; + else + arg_stored = 0; + } + + if (arg_stored) { + /* allocate the necessary size on stack */ + o(0x48); + oad(0xec81, size); /* sub $xxx, %rsp */ + /* generate structure store */ + r = get_reg(RC_INT); + orex(1, r, 0, 0x89); /* mov %rsp, r */ + o(0xe0 + REG_VALUE(r)); + vset(&vtop->type, r | VT_LVAL, 0); + vswap(); + vstore(); + args_size += size; + } + break; + + case VT_LDOUBLE: + assert(0); + break; + + case VT_FLOAT: + case VT_DOUBLE: + assert(mode == x86_64_mode_sse); + if (sse_reg > 8) { + --sse_reg; + r = gv(RC_FLOAT); + o(0x50); /* push $rax */ + /* movq %xmmN, (%rsp) */ + o(0xd60f66); + o(0x04 + REG_VALUE(r)*8); + o(0x24); + args_size += size; + } else { + arg_stored = 0; + } + break; + + default: + assert(mode == x86_64_mode_integer); + /* simple type */ + /* XXX: implicit cast ? */ + if (gen_reg > REGN) { + --gen_reg; + r = gv(RC_INT); + orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */ + args_size += size; + } else { + arg_stored = 0; + } + break; + } + + /* And swap the argument back to it's original position. */ + tmp = vtop[0]; + vtop[0] = vtop[-i]; + vtop[-i] = tmp; + + if (arg_stored) { + vrotb(i+1); + assert((vtop->type.t == tmp.type.t) && (vtop->r == tmp.r)); + vpop(); + --nb_args; + --run_end; + } else { + ++i; + } + } + + /* handle 16 byte aligned arguments at end of run */ + run_start = i = run_end; + while (i < nb_args) { + /* Rotate argument to top since it will always be popped */ + mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count); + if (align != 16) + break; + + vrotb(i+1); + + if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) { + gv(RC_ST0); + oad(0xec8148, size); /* sub $xxx, %rsp */ + o(0x7cdb); /* fstpt 0(%rsp) */ + g(0x24); + g(0x00); + args_size += size; + } else { + //assert(mode == x86_64_mode_memory); + + /* allocate the necessary size on stack */ + o(0x48); + oad(0xec81, size); /* sub $xxx, %rsp */ + /* generate structure store */ + r = get_reg(RC_INT); + orex(1, r, 0, 0x89); /* mov %rsp, r */ + o(0xe0 + REG_VALUE(r)); + vset(&vtop->type, r | VT_LVAL, 0); + vswap(); + vstore(); + args_size += size; + } + + vpop(); + --nb_args; + } + } + + /* XXX This should be superfluous. */ + save_regs(0); /* save used temporary registers */ + + /* then, we prepare register passing arguments. + Note that we cannot set RDX and RCX in this loop because gv() + may break these temporary registers. Let's use R10 and R11 + instead of them */ + assert(gen_reg <= REGN); + assert(sse_reg <= 8); for(i = 0; i < nb_args; i++) { - int d; - mode = classify_x86_64_arg(&vtop->type, &type, &size, &align, ®_count); + mode = classify_x86_64_arg(&vtop->type, &type, &size, &align, ®_count); /* Alter stack entry type so that gv() knows how to treat it */ vtop->type = type; - /* Alter stack entry type so that gv() knows how to treat it */ if (mode == x86_64_mode_sse) { - sse_reg -= reg_count; - if (sse_reg + reg_count <= 8) { - if (reg_count == 2) { - ex_rc = RC_XMM0 << (sse_reg + 1); - gv(RC_XMM0 << sse_reg); - }else{ - assert(reg_count == 1); - /* Load directly to register */ - gv(RC_XMM0 << sse_reg); - } - } + if (reg_count == 2) { + sse_reg -= 2; + gv(RC_FRET); /* Use pair load into xmm0 & xmm1 */ + if (sse_reg) { /* avoid redundant movaps %xmm0, %xmm0 */ + /* movaps %xmm0, %xmmN */ + o(0x280f); + o(0xc0 + (sse_reg << 3)); + /* movaps %xmm1, %xmmN */ + o(0x280f); + o(0xc1 + ((sse_reg+1) << 3)); + } + } else { + assert(reg_count == 1); + --sse_reg; + /* Load directly to register */ + gv(RC_XMM0 << sse_reg); + } } else if (mode == x86_64_mode_integer) { - gen_reg -= reg_count; - if (gen_reg + reg_count <= REGN) { - if (reg_count == 2) { - d = arg_regs[gen_reg+1]; - ex_rc = reg_classes[d] & ~RC_MASK; - d = arg_regs[gen_reg]; - gv(reg_classes[d] & ~RC_MASK); - }else{ - assert(reg_count == 1); - d = arg_regs[gen_reg]; - gv(reg_classes[d] & ~RC_MASK); - } - } + /* simple type */ + /* XXX: implicit cast ? */ + gen_reg -= reg_count; + r = gv(RC_INT); + int d = arg_prepare_reg(gen_reg); + orex(1,d,r,0x89); /* mov */ + o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d)); + if (reg_count == 2) { + d = arg_prepare_reg(gen_reg+1); + orex(1,d,vtop->r2,0x89); /* mov */ + o(0xc0 + REG_VALUE(vtop->r2) * 8 + REG_VALUE(d)); + } } - vpop(); + vtop--; } - save_regs(0); + assert(gen_reg == 0); + assert(sse_reg == 0); + + /* We shouldn't have many operands on the stack anymore, but the + call address itself is still there, and it might be in %eax + (or edx/ecx) currently, which the below writes would clobber. + So evict all remaining operands here. */ + save_regs(0); + + /* Copy R10 and R11 into RDX and RCX, respectively */ + if (nb_reg_args > 2) { + o(0xd2894c); /* mov %r10, %rdx */ + if (nb_reg_args > 3) { + o(0xd9894c); /* mov %r11, %rcx */ + } + } + oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */ gcall_or_jmp(0); + if (args_size) + gadd_sp(args_size); vtop--; } @@ -1349,8 +1382,7 @@ void gfunc_prolog(CType *func_type) sym = func_type->ref; addr = PTR_SIZE * 2; - pop_stack = loc = 0; - func_scratch = r_loc = 0; + loc = 0; ind += FUNC_PROLOG_SIZE; func_sub_sp_offset = ind; func_ret_sub = 0; @@ -1361,6 +1393,7 @@ void gfunc_prolog(CType *func_type) /* frame pointer and return address */ seen_stack_size = PTR_SIZE * 2; /* count the number of seen parameters */ + sym = func_type->ref; while ((sym = sym->next) != NULL) { type = &sym->type; mode = classify_x86_64_arg(type, NULL, &size, &align, ®_count); @@ -1371,7 +1404,7 @@ void gfunc_prolog(CType *func_type) break; case x86_64_mode_integer: - if (seen_reg_num + reg_count <= REGN) { + if (seen_reg_num + reg_count <= 8) { seen_reg_num += reg_count; } else { seen_reg_num = 8; @@ -1401,19 +1434,19 @@ void gfunc_prolog(CType *func_type) o(0xf845c7); gen_le32(seen_stack_size); - o(0xc084);/* test %al,%al */ - o(0x74);/* je */ - g(4*(8 - seen_sse_num) + 3); - /* save all register passing arguments */ for (i = 0; i < 8; i++) { loc -= 16; - o(0x290f);/* movaps %xmm1-7,-XXX(%rbp) */ + o(0xd60f66); /* movq */ gen_modrm(7 - i, VT_LOCAL, NULL, loc); + /* movq $0, loc+8(%rbp) */ + o(0x85c748); + gen_le32(loc + 8); + gen_le32(0); + } + for (i = 0; i < REGN; i++) { + push_arg_reg(REGN-1-i); } - for (i = 0; i < (REGN - seen_reg_num); i++) { - push_arg_reg(REGN-1 - i); - } } sym = func_type->ref; @@ -1497,8 +1530,7 @@ void gfunc_epilog(void) g(func_ret_sub >> 8); } /* align local size to word & save local variables */ - v = (func_scratch -loc + 15) & -16; - reloc_use(r_loc, func_scratch); + v = (-loc + 15) & -16; saved_ind = ind; ind = func_sub_sp_offset - FUNC_PROLOG_SIZE; o(0xe5894855); /* push %rbp, mov %rsp, %rbp */ @@ -1556,7 +1588,7 @@ int gtst(int inv, int t) } g(0x0f); t = psym((vtop->c.i - 16) ^ inv, t); - } else if (v == VT_JMP || v == VT_JMPI) { + } else { /* VT_JMP || VT_JMPI */ /* && or || optimization */ if ((v & 1) == inv) { /* insert vtop->c jump list in t */ @@ -1569,23 +1601,6 @@ int gtst(int inv, int t) t = gjmp(t); gsym(vtop->c.i); } - } else { - if (is_float(vtop->type.t) || - (vtop->type.t & VT_BTYPE) == VT_LLONG) { - vpushi(0); - gen_op(TOK_NE); - } - if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) { - /* constant jmp optimization */ - if ((vtop->c.i != 0) != inv) - t = gjmp(t); - } else { - v = gv(RC_INT); - orex(0,v,v,0x85); - o(0xc0 + REG_VALUE(v) * 9); - g(0x0f); - t = psym(0x85 ^ inv, t); - } } vtop--; return t; @@ -1594,42 +1609,39 @@ int gtst(int inv, int t) /* generate an integer binary operation */ void gen_opi(int op) { - int r, fr, opc, fc, c, ll, uu, cc, tt2; + int r, fr, opc, c; + int ll, uu, cc; - fr = vtop[0].r; - fc = vtop->c.ul; ll = is64_type(vtop[-1].type.t); - cc = (fr & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST; - tt2 = (fr & (VT_LVAL | VT_LVAL_TYPE)) == VT_LVAL; + uu = (vtop[-1].type.t & VT_UNSIGNED) != 0; + cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST; switch(op) { case '+': case TOK_ADDC1: /* add with carry generation */ opc = 0; gen_op8: - vswap(); - r = gv(RC_INT); - vswap(); if (cc && (!ll || (int)vtop->c.ll == vtop->c.ll)) { /* constant case */ + vswap(); + r = gv(RC_INT); + vswap(); c = vtop->c.i; if (c == (char)c) { /* XXX: generate inc and dec for smaller code ? */ - orex(ll, r, 0, 0x83); - o(0xc0 + REG_VALUE(r) + opc*8); - g(c); + orex(ll, r, 0, 0x83); + o(0xc0 | (opc << 3) | REG_VALUE(r)); + g(c); } else { orex(ll, r, 0, 0x81); - oad(0xc0 + REG_VALUE(r) + opc*8, c); + oad(0xc0 | (opc << 3) | REG_VALUE(r), c); } } else { - if(!tt2) - fr = gv(RC_INT); - orex(ll, fr, r, 0x03 + opc*8); - if(fr >= VT_CONST) - gen_modrm(r, fr, vtop->sym, fc); - else - o(0xc0 + REG_VALUE(fr) + REG_VALUE(r)*8); + gv2(RC_INT, RC_INT); + r = vtop[-1].r; + fr = vtop[0].r; + orex(ll, r, fr, (opc << 3) | 0x01); + o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8); } vtop--; if (op >= TOK_ULT && op <= TOK_GT) { @@ -1657,27 +1669,11 @@ void gen_opi(int op) opc = 1; goto gen_op8; case '*': - opc = 5; - vswap(); - r = gv(RC_INT); - vswap(); - if(!tt2) - fr = gv(RC_INT); - if(r == TREG_RAX){ - if(fr != TREG_RDX) - save_reg(TREG_RDX); - orex(ll, fr, r, 0xf7); - if(fr >= VT_CONST) - gen_modrm(opc, fr, vtop->sym, fc); - else - o(0xc0 + REG_VALUE(fr) + opc*8); - }else{ - orex(ll, fr, r, 0xaf0f); /* imul fr, r */ - if(fr >= VT_CONST) - gen_modrm(r, fr, vtop->sym, fc); - else - o(0xc0 + REG_VALUE(fr) + REG_VALUE(r)*8); - } + gv2(RC_INT, RC_INT); + r = vtop[-1].r; + fr = vtop[0].r; + orex(ll, fr, r, 0xaf0f); /* imul fr, r */ + o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8); vtop--; break; case TOK_SHL: @@ -1689,62 +1685,47 @@ void gen_opi(int op) case TOK_SAR: opc = 7; gen_shift: + opc = 0xc0 | (opc << 3); if (cc) { /* constant case */ vswap(); r = gv(RC_INT); vswap(); - c = vtop->c.i; - if(c == 1){ - orex(ll, r, 0, 0xd1); - o(0xc0 + REG_VALUE(r) + opc*8); - }else{ - orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */ - o(0xc0 + REG_VALUE(r) + opc*8); - g(c & (ll ? 0x3f : 0x1f)); - } + orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */ + o(opc | REG_VALUE(r)); + g(vtop->c.i & (ll ? 63 : 31)); } else { /* we generate the shift in ecx */ gv2(RC_INT, RC_RCX); r = vtop[-1].r; orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */ - o(0xc0 + REG_VALUE(r) + opc*8); + o(opc | REG_VALUE(r)); } vtop--; break; case TOK_UDIV: case TOK_UMOD: - opc = 6; uu = 1; goto divmod; case '/': case '%': case TOK_PDIV: - opc = 7; uu = 0; divmod: /* first operand must be in eax */ /* XXX: need better constraint for second operand */ - if(!tt2){ - gv2(RC_RAX, RC_INT2); - fr = vtop[0].r; - }else{ - vswap(); - gv(RC_RAX); - vswap(); - } - save_reg(TREG_RDX); - orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cdq RDX:RAX <- sign-extend of RAX. */ - orex(ll, fr, 0, 0xf7); /* div fr, %eax */ - if(fr >= VT_CONST) - gen_modrm(opc, fr, vtop->sym, fc); - else - o(0xc0 + REG_VALUE(fr) + opc*8); + gv2(RC_RAX, RC_RCX); + r = vtop[-1].r; + fr = vtop[0].r; + vtop--; + save_reg(TREG_RDX); + orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */ + orex(ll, fr, 0, 0xf7); /* div fr, %eax */ + o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr)); if (op == '%' || op == TOK_UMOD) r = TREG_RDX; else r = TREG_RAX; - vtop--; vtop->r = r; break; default: @@ -1763,8 +1744,9 @@ void gen_opl(int op) /* XXX: need to use ST1 too */ void gen_opf(int op) { - int a, ft, fc, swapped, fr, r; - int float_type = (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT; + int a, ft, fc, swapped, r; + int float_type = + (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT; /* convert constants to memory references */ if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) { @@ -1775,23 +1757,21 @@ void gen_opf(int op) if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) gv(float_type); - swapped = 0; - fc = vtop->c.ul; - ft = vtop->type.t; - - if ((ft & VT_BTYPE) == VT_LDOUBLE) { - /* swap the stack if needed so that t1 is the register and t2 is - the memory reference */ - /* must put at least one value in the floating point register */ - if ((vtop[-1].r & VT_LVAL) && (vtop[0].r & VT_LVAL)) { - vswap(); - gv(float_type); - vswap(); - } - if (vtop[-1].r & VT_LVAL) { - vswap(); - swapped = 1; - } + /* must put at least one value in the floating point register */ + if ((vtop[-1].r & VT_LVAL) && + (vtop[0].r & VT_LVAL)) { + vswap(); + gv(float_type); + vswap(); + } + swapped = 0; + /* swap the stack if needed so that t1 is the register and t2 is + the memory reference */ + if (vtop[-1].r & VT_LVAL) { + vswap(); + swapped = 1; + } + if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) { if (op >= TOK_ULT && op <= TOK_GT) { /* load on stack second operand */ load(TREG_ST0, vtop); @@ -1802,10 +1782,10 @@ void gen_opf(int op) swapped = 0; if (swapped) o(0xc9d9); /* fxch %st(1) */ - if (op == TOK_EQ || op == TOK_NE) - o(0xe9da); /* fucompp */ - else - o(0xd9de); /* fcompp */ + if (op == TOK_EQ || op == TOK_NE) + o(0xe9da); /* fucompp */ + else + o(0xd9de); /* fcompp */ o(0xe0df); /* fnstsw %ax */ if (op == TOK_EQ) { o(0x45e480); /* and $0x45, %ah */ @@ -1828,6 +1808,7 @@ void gen_opf(int op) /* no memory reference possible for long double operations */ load(TREG_ST0, vtop); swapped = !swapped; + switch(op) { default: case '+': @@ -1847,45 +1828,63 @@ void gen_opf(int op) a++; break; } + ft = vtop->type.t; + fc = vtop->c.ul; o(0xde); /* fxxxp %st, %st(1) */ o(0xc1 + (a << 3)); vtop--; } } else { - vswap(); - gv(float_type); - vswap(); - fr = vtop->r; - r = vtop[-1].r; if (op >= TOK_ULT && op <= TOK_GT) { - switch(op){ - case TOK_LE: - op = TOK_ULE; /* setae */ - break; - case TOK_LT: - op = TOK_ULT; - break; - case TOK_GE: - op = TOK_UGE; - break; - case TOK_GT: - op = TOK_UGT; /* seta */ - break; - } - assert(!(vtop[-1].r & VT_LVAL)); - if ((ft & VT_BTYPE) == VT_DOUBLE) - o(0x66); - o(0x2e0f); /* ucomisd */ - if(fr >= VT_CONST) - gen_modrm(r, fr, vtop->sym, fc); - else - o(0xc0 + REG_VALUE(fr) + REG_VALUE(r)*8); + /* if saved lvalue, then we must reload it */ + r = vtop->r; + fc = vtop->c.ul; + if ((r & VT_VALMASK) == VT_LLOCAL) { + SValue v1; + r = get_reg(RC_INT); + v1.type.t = VT_PTR; + v1.r = VT_LOCAL | VT_LVAL; + v1.c.ul = fc; + load(r, &v1); + fc = 0; + } + + if (op == TOK_EQ || op == TOK_NE) { + swapped = 0; + } else { + if (op == TOK_LE || op == TOK_LT) + swapped = !swapped; + if (op == TOK_LE || op == TOK_GE) { + op = 0x93; /* setae */ + } else { + op = 0x97; /* seta */ + } + } + + if (swapped) { + gv(RC_FLOAT); + vswap(); + } + assert(!(vtop[-1].r & VT_LVAL)); + + if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) + o(0x66); + if (op == TOK_EQ || op == TOK_NE) + o(0x2e0f); /* ucomisd */ + else + o(0x2f0f); /* comisd */ + + if (vtop->r & VT_LVAL) { + gen_modrm(vtop[-1].r, r, vtop->sym, fc); + } else { + o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8); + } + vtop--; vtop->r = VT_CMP; vtop->c.i = op | 0x100; } else { - assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE); - /* no memory reference possible for long double operations */ + assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE); switch(op) { default: case '+': @@ -1901,20 +1900,44 @@ void gen_opf(int op) a = 6; break; } - assert((ft & VT_BTYPE) != VT_LDOUBLE); - assert(!(vtop[-1].r & VT_LVAL)); - if ((ft & VT_BTYPE) == VT_DOUBLE) { - o(0xf2); - } else { - o(0xf3); - } - o(0x0f); - o(0x58 + a); - if(fr >= VT_CONST) - gen_modrm(r, fr, vtop->sym, fc); - else - o(0xc0 + REG_VALUE(fr) + REG_VALUE(r)*8); - vtop--; + ft = vtop->type.t; + fc = vtop->c.ul; + assert((ft & VT_BTYPE) != VT_LDOUBLE); + + r = vtop->r; + /* if saved lvalue, then we must reload it */ + if ((vtop->r & VT_VALMASK) == VT_LLOCAL) { + SValue v1; + r = get_reg(RC_INT); + v1.type.t = VT_PTR; + v1.r = VT_LOCAL | VT_LVAL; + v1.c.ul = fc; + load(r, &v1); + fc = 0; + } + + assert(!(vtop[-1].r & VT_LVAL)); + if (swapped) { + assert(vtop->r & VT_LVAL); + gv(RC_FLOAT); + vswap(); + } + + if ((ft & VT_BTYPE) == VT_DOUBLE) { + o(0xf2); + } else { + o(0xf3); + } + o(0x0f); + o(0x58 + a); + + if (vtop->r & VT_LVAL) { + gen_modrm(vtop[-1].r, r, vtop->sym, fc); + } else { + o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8); + } + + vtop--; } } } @@ -1923,96 +1946,103 @@ void gen_opf(int op) and 'long long' cases. */ void gen_cvt_itof(int t) { - int ft, bt, tbt, r; - - ft = vtop->type.t; - bt = ft & VT_BTYPE; - tbt = t & VT_BTYPE; - r = gv(RC_INT); - - if (tbt == VT_LDOUBLE) { + if ((t & VT_BTYPE) == VT_LDOUBLE) { save_reg(TREG_ST0); - if ((ft & VT_BTYPE) == VT_LLONG) { + gv(RC_INT); + if ((vtop->type.t & VT_BTYPE) == VT_LLONG) { /* signed long long to float/double/long double (unsigned case is handled generically) */ - o(0x50 + REG_VALUE(r)); /* push r */ + o(0x50 + (vtop->r & VT_VALMASK)); /* push r */ o(0x242cdf); /* fildll (%rsp) */ o(0x08c48348); /* add $8, %rsp */ - } else if ((ft & (VT_BTYPE | VT_UNSIGNED)) == (VT_INT | VT_UNSIGNED)) { + } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) == + (VT_INT | VT_UNSIGNED)) { /* unsigned int to float/double/long double */ o(0x6a); /* push $0 */ g(0x00); - o(0x50 + REG_VALUE(r)); /* push r */ + o(0x50 + (vtop->r & VT_VALMASK)); /* push r */ o(0x242cdf); /* fildll (%rsp) */ o(0x10c48348); /* add $16, %rsp */ } else { /* int to float/double/long double */ - o(0x50 + REG_VALUE(r)); /* push r */ + o(0x50 + (vtop->r & VT_VALMASK)); /* push r */ o(0x2404db); /* fildl (%rsp) */ o(0x08c48348); /* add $8, %rsp */ } vtop->r = TREG_ST0; } else { - int r_xmm; - r_xmm = get_reg(RC_FLOAT); - o(0xf2 + (tbt == VT_FLOAT)); - if ((ft & (VT_BTYPE | VT_UNSIGNED)) == (VT_INT | VT_UNSIGNED) || bt == VT_LLONG) { + int r = get_reg(RC_FLOAT); + gv(RC_INT); + o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT?1:0)); + if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) == + (VT_INT | VT_UNSIGNED) || + (vtop->type.t & VT_BTYPE) == VT_LLONG) { o(0x48); /* REX */ } o(0x2a0f); - o(0xc0 + REG_VALUE(r) + REG_VALUE(r_xmm)*8); /* cvtsi2sd or cvtsi2ss */ - vtop->r = r_xmm; + o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */ + vtop->r = r; } } /* convert from one floating point type to another */ void gen_cvt_ftof(int t) { - int ft, bt, tbt, r; + int ft, bt, tbt; ft = vtop->type.t; bt = ft & VT_BTYPE; tbt = t & VT_BTYPE; - - if(bt == VT_LDOUBLE) - r = get_reg(RC_FLOAT); - else - r = gv(RC_FLOAT); - if (bt == VT_FLOAT) { + + if (bt == VT_FLOAT) { + gv(RC_FLOAT); if (tbt == VT_DOUBLE) { + o(0x140f); /* unpcklps */ + o(0xc0 + REG_VALUE(vtop->r)*9); o(0x5a0f); /* cvtps2pd */ - o(0xc0 + REG_VALUE(r) + REG_VALUE(r) * 8); + o(0xc0 + REG_VALUE(vtop->r)*9); } else if (tbt == VT_LDOUBLE) { - /* movss %xmm0-7,-0x10(%rsp) */ + save_reg(RC_ST0); + /* movss %xmm0,-0x10(%rsp) */ o(0x110ff3); - o(0xf02444 + REG_VALUE(r)*8); + o(0x44 + REG_VALUE(vtop->r)*8); + o(0xf024); o(0xf02444d9); /* flds -0x10(%rsp) */ vtop->r = TREG_ST0; } } else if (bt == VT_DOUBLE) { + gv(RC_FLOAT); if (tbt == VT_FLOAT) { + o(0x140f66); /* unpcklpd */ + o(0xc0 + REG_VALUE(vtop->r)*9); o(0x5a0f66); /* cvtpd2ps */ - o(0xc0 + REG_VALUE(r) + REG_VALUE(r) * 8); + o(0xc0 + REG_VALUE(vtop->r)*9); } else if (tbt == VT_LDOUBLE) { - /* movsd %xmm0-7,-0x10(%rsp) */ + save_reg(RC_ST0); + /* movsd %xmm0,-0x10(%rsp) */ o(0x110ff2); - o(0xf02444 + REG_VALUE(r)*8); + o(0x44 + REG_VALUE(vtop->r)*8); + o(0xf024); o(0xf02444dd); /* fldl -0x10(%rsp) */ vtop->r = TREG_ST0; } } else { + int r; gv(RC_ST0); + r = get_reg(RC_FLOAT); if (tbt == VT_DOUBLE) { o(0xf0245cdd); /* fstpl -0x10(%rsp) */ - /* movsd -0x10(%rsp),%xmm0-7 */ + /* movsd -0x10(%rsp),%xmm0 */ o(0x100ff2); - o(0xf02444 + REG_VALUE(r)*8); + o(0x44 + REG_VALUE(r)*8); + o(0xf024); vtop->r = r; } else if (tbt == VT_FLOAT) { o(0xf0245cd9); /* fstps -0x10(%rsp) */ - /* movss -0x10(%rsp),%xmm0-7 */ + /* movss -0x10(%rsp),%xmm0 */ o(0x100ff3); - o(0xf02444 + REG_VALUE(r)*8); + o(0x44 + REG_VALUE(r)*8); + o(0xf024); vtop->r = r; } } @@ -2021,20 +2051,20 @@ void gen_cvt_ftof(int t) /* convert fp to int 't' type */ void gen_cvt_ftoi(int t) { - int ft, bt, ll, r, r_xmm; - + int ft, bt, size, r; ft = vtop->type.t; bt = ft & VT_BTYPE; - if (bt == VT_LDOUBLE) { gen_cvt_ftof(VT_DOUBLE); bt = VT_DOUBLE; } - r_xmm = gv(RC_FLOAT); - if ((t & VT_BTYPE) == VT_INT) - ll = 0; + + gv(RC_FLOAT); + if (t != VT_INT) + size = 8; else - ll = 1; + size = 4; + r = get_reg(RC_INT); if (bt == VT_FLOAT) { o(0xf3); @@ -2043,8 +2073,8 @@ void gen_cvt_ftoi(int t) } else { assert(0); } - orex(ll, r, r_xmm, 0x2c0f); /* cvttss2si or cvttsd2si */ - o(0xc0 + REG_VALUE(r_xmm) + (REG_VALUE(r) << 3)); + orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */ + o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8); vtop->r = r; } @@ -2068,19 +2098,27 @@ ST_FUNC void gen_vla_sp_restore(int addr) { /* Subtract from the stack pointer, and push the resulting value onto the stack */ ST_FUNC void gen_vla_alloc(CType *type, int align) { +#ifdef TCC_TARGET_PE + /* alloca does more than just adjust %rsp on Windows */ + vpush_global_sym(&func_old_type, TOK_alloca); + vswap(); /* Move alloca ref past allocation size */ + gfunc_call(1); + vset(type, REG_IRET, 0); +#else int r; r = gv(RC_INT); /* allocation size */ /* sub r,%rsp */ o(0x2b48); o(0xe0 | REG_VALUE(r)); - /* and ~15, %rsp */ + /* We align to 16 bytes rather than align */ + /* and ~15, %rsp */ o(0xf0e48348); /* mov %rsp, r */ - orex(1, 0, r, 0x8d); - o(0x2484 | (REG_VALUE(r)*8)); - r_loc = reloc_add(r_loc); + o(0x8948); + o(0xe0 | REG_VALUE(r)); vpop(); vset(type, r, 0); +#endif }