tinycc

mirror of https://github.com/frida/tinycc synced 2024-12-01 20:07:03 +03:00

Author	SHA1	Message	Date
Kirill Smelkov	63193d1794	Optimize vswap() vswap() is called often enough and shows in profile and it was easy to hand optimize swapping vtop[-1] and vtop[0] - instead of large (28 bytes on i386) tmp variable and two memory to memory copies, let's swap areas by longs through registers with streamlined assembly. For $ ./tcc -B. -bench -DONE_SOURCE -DCONFIG_MULTIARCHDIR=\"i386-linux-gnu\" -c tcc.c before: # Overhead Command Shared Object Symbol # ........ ........... ................... .............................................. # 15.19% tcc tcc [.] next_nomacro1 5.19% tcc libc-2.13.so [.] _int_malloc 4.57% tcc tcc [.] next 3.36% tcc tcc [.] tok_str_add2 3.03% tcc tcc [.] macro_subst_tok 2.93% tcc tcc [.] macro_subst 2.53% tcc tcc [.] next_nomacro_spc 2.49% tcc tcc [.] vswap 2.36% tcc libc-2.13.so [.] _int_free │ ST_FUNC void vswap(void) │ { 1,96 │ push %edi 2,65 │ push %esi 1,08 │ sub $0x20,%esp │ SValue tmp; │ │ /* cannot let cpu flags if other instruction are generated. Also │ avoid leaving VT_JMP anywhere except on the top of the stack │ because it would complicate the code generator. / │ if (vtop >= vstack) { 0,98 │ mov 0x8078cac,%eax │ cmp $0x8078d3c,%eax 1,18 │ ┌──jb 24 │ │ int v = vtop->r & VT_VALMASK; 1,08 │ │ mov 0x8(%eax),%edx 0,78 │ │ and $0x3f,%edx │ │ if (v == VT_CMP \|\| (v & ~1) == VT_JMP) 0,78 │ │ cmp $0x33,%edx 0,69 │ │↓ je 54 0,59 │ │ and $0xfffffffe,%edx 0,49 │ │ cmp $0x34,%edx 0,29 │ │↓ je 54 │ │ gv(RC_INT); │ │ } │ │ tmp = vtop[0]; 1,08 │24:└─→lea 0x4(%esp),%edi 0,39 │ mov $0x7,%ecx │ mov %eax,%esi 14,41 │ rep movsl %ds:(%esi),%es:(%edi) │ vtop[0] = vtop[-1]; 9,51 │ lea -0x1c(%eax),%esi 1,96 │ mov $0x7,%cl │ mov %eax,%edi 17,06 │ rep movsl %ds:(%esi),%es:(%edi) │ vtop[-1] = tmp; 10,20 │ mov 0x8078cac,%edi 2,35 │ sub $0x1c,%edi 0,78 │ lea 0x4(%esp),%esi │ mov $0x7,%cl 15,20 │ rep movsl %ds:(%esi),%es:(%edi) │ } 9,90 │ add $0x20,%esp 2,25 │ pop %esi 1,67 │ pop %edi 0,69 │ ret after: # Overhead Command Shared Object Symbol # ........ ........... ................... .............................................. # 15.27% tcc tcc [.] next_nomacro1 5.08% tcc libc-2.13.so [.] _int_malloc 4.57% tcc tcc [.] next 3.17% tcc tcc [.] tok_str_add2 3.12% tcc tcc [.] macro_subst 2.99% tcc tcc [.] macro_subst_tok 2.43% tcc tcc [.] next_nomacro_spc 2.32% tcc libc-2.13.so [.] _int_free . . . 0.71% tcc tcc [.] vswap │ ST_FUNC void vswap(void) │ { 7,22 │ push %eax │ / cannot let cpu flags if other instruction are generated. Also │ avoid leaving VT_JMP anywhere except on the top of the stack │ because it would complicate the code generator. / │ if (vtop >= vstack) { 11,34 │ mov 0x8078cac,%eax 2,75 │ cmp $0x8078d3c,%eax 0,34 │ ┌──jb 20 │ │ int v = vtop->r & VT_VALMASK; 0,34 │ │ mov 0x8(%eax),%edx 8,93 │ │ and $0x3f,%edx │ │ if (v == VT_CMP \|\| (v & ~1) == VT_JMP) 2,06 │ │ cmp $0x33,%edx 2,41 │ │↓ je 74 2,41 │ │ and $0xfffffffe,%edx 0,34 │ │ cmp $0x34,%edx 2,41 │ │↓ je 74 │ │ vtopl[-1VSIZEL + i] = tmpl; \ │ │ } do {} while (0) │ │ │ │ VSWAPL(15); VSWAPL(14); VSWAPL(13); VSWAPL(12); │ │ VSWAPL(11); VSWAPL(10); VSWAPL( 9); VSWAPL( 8); │ │ VSWAPL( 7); VSWAPL( 6); VSWAPL( 5); VSWAPL( 4); 2,06 │20:└─→mov 0x18(%eax),%edx 1,37 │ mov -0x4(%eax),%ecx 2,06 │ mov %ecx,0x18(%eax) 1,37 │ mov %edx,-0x4(%eax) 2,06 │ mov 0x14(%eax),%edx 2,06 │ mov -0x8(%eax),%ecx 2,41 │ mov %ecx,0x14(%eax) 3,09 │ mov %edx,-0x8(%eax) 3,09 │ mov 0x10(%eax),%edx 1,72 │ mov -0xc(%eax),%ecx 2,75 │ mov %ecx,0x10(%eax) 1,72 │ mov %edx,-0xc(%eax) │ VSWAPL( 3); VSWAPL( 2); VSWAPL( 1); VSWAPL( 0); 2,41 │ mov 0xc(%eax),%edx 2,41 │ mov -0x10(%eax),%ecx 2,41 │ mov %ecx,0xc(%eax) 0,69 │ mov %edx,-0x10(%eax) 1,72 │ mov 0x8(%eax),%edx 0,69 │ mov -0x14(%eax),%ecx 1,03 │ mov %ecx,0x8(%eax) 1,37 │ mov %edx,-0x14(%eax) 1,37 │ mov 0x4(%eax),%edx 0,69 │ mov -0x18(%eax),%ecx 3,09 │ mov %ecx,0x4(%eax) 2,06 │ mov %edx,-0x18(%eax) 1,37 │ mov (%eax),%edx 2,41 │ mov -0x1c(%eax),%ecx 1,37 │ mov %ecx,(%eax) 4,12 │ mov %edx,-0x1c(%eax) │ } │ │ # undef VSWAPL │ # undef VSIZEL │ } 1,03 │ pop %eax 3,44 │ ret Overal speedup: # best of 5 runs before: 8268 idents, 47203 lines, 1526763 bytes, 0.148 s, 319217 lines/s, 10.3 MB/s after: 8273 idents, 47231 lines, 1527685 bytes, 0.146 s, 324092 lines/s, 10.5 MB/s Static ASSERT macro taken from CCAN's[1] build_assert[2] which is in public domain. [1] http://ccodearchive.net/ [2] http://git.ozlabs.org/?p=ccan;a=blob;f=ccan/build_assert/build_assert.h;h=24e59c44cd930173178ac9b6e101b0af64a879e9;hb=HEAD	2012-12-21 20:46:26 +04:00
Kirill Smelkov	8eb92e6052	Optimize cstr_reset() to only reset string to empty, not call free() and later malloc() A CString could be reset to empty just setting its .size to 0. If memory was already allocated, that would be remembered in .data_allocated and .size_allocated and on consequent string manipulations that memory will be used without immediate need to call malloc(). For $ ./tcc -B. -bench -DONE_SOURCE -DCONFIG_MULTIARCHDIR=\"i386-linux-gnu\" -c tcc.c after the patch malloc/free are called less often: (tcc is run in loop; perf record -a sleep 10 && perf report) before: # Overhead Command Shared Object Symbol # ........ ........... .................. .......................................... # 13.89% tcc tcc [.] next_nomacro1 4.73% tcc libc-2.13.so [.] _int_malloc 4.39% tcc tcc [.] next 2.94% tcc tcc [.] tok_str_add2 2.78% tcc tcc [.] macro_subst_tok 2.75% tcc libc-2.13.so [.] free 2.74% tcc tcc [.] macro_subst 2.63% tcc libc-2.13.so [.] _int_free 2.28% tcc tcc [.] vswap 2.24% tcc tcc [.] next_nomacro_spc 2.06% tcc libc-2.13.so [.] realloc 2.00% tcc libc-2.13.so [.] malloc 1.99% tcc tcc [.] unary 1.85% tcc libc-2.13.so [.] __i686.get_pc_thunk.bx 1.76% kworker/0:1 [kernel.kallsyms] [k] delay_tsc 1.70% tcc tcc [.] next_nomacro 1.62% tcc tcc [.] preprocess 1.41% tcc libc-2.13.so [.] __memcmp_ssse3 1.38% tcc [kernel.kallsyms] [k] memset 1.10% tcc tcc [.] g 1.06% tcc tcc [.] parse_btype 1.05% tcc tcc [.] sym_push2 1.04% tcc libc-2.13.so [.] _int_realloc 1.00% tcc libc-2.13.so [.] malloc_consolidate after: # Overhead Command Shared Object Symbol # ........ ........... .................. .............................................. # 15.26% tcc tcc [.] next_nomacro1 5.07% tcc libc-2.13.so [.] _int_malloc 4.62% tcc tcc [.] next 3.22% tcc tcc [.] tok_str_add2 3.03% tcc tcc [.] macro_subst_tok 3.02% tcc tcc [.] macro_subst 2.59% tcc tcc [.] next_nomacro_spc 2.44% tcc tcc [.] vswap 2.39% tcc libc-2.13.so [.] _int_free 2.28% tcc libc-2.13.so [.] free 2.22% tcc tcc [.] unary 2.07% tcc libc-2.13.so [.] realloc 1.97% tcc libc-2.13.so [.] malloc 1.70% tcc tcc [.] preprocess 1.69% tcc libc-2.13.so [.] __i686.get_pc_thunk.bx 1.68% tcc tcc [.] next_nomacro 1.59% tcc [kernel.kallsyms] [k] memset 1.55% tcc libc-2.13.so [.] __memcmp_ssse3 1.22% tcc tcc [.] parse_comment 1.11% tcc tcc [.] g 1.11% tcc tcc [.] sym_push2 1.10% tcc tcc [.] parse_btype 1.10% tcc libc-2.13.so [.] _int_realloc 1.06% tcc tcc [.] vsetc 0.98% tcc libc-2.13.so [.] malloc_consolidate and this gains small speedup for tcc: # best of 5 runs before: 8268 idents, 47191 lines, 1526670 bytes, 0.153 s, 307997 lines/s, 10.0 MB/s after: 8268 idents, 47203 lines, 1526763 bytes, 0.148 s, 319217 lines/s, 10.3 MB/s	2012-12-21 20:46:26 +04:00
Akim Demaille	e79281f58e	build: fix out-of-tree install Makefile (install): Fix installation of headers. Do not try to install twice libtcc.h, once should be enough.	2012-12-21 14:23:28 +01:00
Akim Demaille	7667a8887a	build: fix out-of-tree build * Makefile (TCC-VERSION): Use top_srcdir.	2012-12-21 14:17:23 +01:00
Akim Demaille	8adfb4a419	build: simplify the makefiles * Makefile: use "else if" to improve readability.	2012-12-21 14:17:16 +01:00
Akim Demaille	017bbbfee1	configure: support absolete out-of-tree builds configure: handle the case of absolute paths. Reported by grishka.	2012-12-21 13:57:22 +01:00
Akim Demaille	d7264e0218	configure: style changes * configure: use more here-documents.	2012-12-21 13:49:15 +01:00
Akim Demaille	ba49862de6	configure: prefer here-documents * configure: use here-documents to improve readability and reduce the clutter.	2012-12-21 13:47:00 +01:00
Akim Demaille	9c9ca2032b	configure: style changes * configure (case $targetos): Improve readibility. (case $cpu): New, to improve readability compare to if + test.	2012-12-21 13:45:22 +01:00
grischka	5ebc6a964d	Makefile: revamp "tar" target - Creates release tarball from current git branch - Includes tcc-doc.html - converts important windows files files to CRLF (requirement for the cmd.exe batch processor, convenience for reading the txt in notepad)	2012-12-20 21:29:57 +01:00
grischka	b174399340	win32: build-tcc.bat: get rid of hardcoded VERSION string Also: - put libtcc.def into libtcc dir - remove ar references - remove libtcc_test from build	2012-12-20 21:20:54 +01:00
Akim Demaille	3f09b90d21	build: fix VPATH builds * configure (fn_dirname): New. Use it to ensure the creation of proper symlinks to Makefiles. (config.mak): Define top_builddir and top_srcdir. (CPPFLAGS): Be sure to find the headers. * Makefile, lib/Makefile, tests/Makefile, tests2/Makefile: Adjust to set VPATH properly. Fix confusion between top_builddir and top_srcdir.	2012-12-18 10:06:20 +01:00
Roy	d815896d4c	bcheck: there is no unistd.h in win32.	2012-12-10 09:51:49 +08:00
Kirill Smelkov	a55ecf6d2c	Repair bounds-checking more, this time `tcc -b -run tcc.c -run tcc.c -run tcctest.c` works Hello up there. On the list Grischka made a point that we can't recommend using -b as long as tcc -b tcc.c doesn't produce anything useful. Now it does, so please don't treat -b mode as second class citizen anymore. Thanks, Kirill * bcheck2: tests: Add tests for compile/run tcc.c with `tcc -b` then compile tcc.c again, then run tcctest.c lib/bcheck: Fix code typo in __bound_delete_region() lib/bcheck: Don't assume heap goes right after bss Make tcc work after self-compiling with bounds-check enabled	2012-12-09 19:51:20 +04:00
Kirill Smelkov	031ff872be	tests: Add tests for compile/run tcc.c with `tcc -b` then compile tcc.c again, then run tcctest.c Just like with test[123] add their test[123]b variants. After previous 3 patchs all test pass here on Debian GNU/Linux on i385 with gcc-4.7 with or without memory randomization turned on.	2012-12-09 19:43:40 +04:00
Kirill Smelkov	dbeb4faf21	lib/bcheck: Fix code typo in __bound_delete_region() We were calling get_page() with t2 index which is not correct, since get_page() operate on t1 indices. The bug is here from day-1, from `60f781c4` (first version of bounds checker) and show as a crash in __bound_delete_region() at program exit: $ ./tcc -B. -DTCC_TARGET_I386 -DCONFIG_MULTIARCHDIR=\"i386-linux-gnu\" -b -run -DONE_SOURCE \ ./tcc.c -B. -DTCC_TARGET_I386 -DCONFIG_MULTIARCHDIR=\"i386-linux-gnu\" -run -DONE_SOURCE \ ./tcc.c -B. -run tests/tcctest.c (lot's of correct output from tcctest) Runtime error: dereferencing invalid pointer at 0xa7c21cc4 __bound_delete_region() by (nil) ??? Segmentation fault The fix is simple - last page should be get through t1_end, like it is done in __bound_new_region(). After this patch, tcc is being able to compile itself with -b, then compile itself again and run tcctest with correct output. Tests follow.	2012-12-09 19:33:47 +04:00
Kirill Smelkov	efd9d92b7c	lib/bcheck: Don't assume heap goes right after bss At startup __bound_init() wants to mark malloc zone as invalid memory, so that any access to memory on heap, not allocated through malloc be invalid. Other pages are initialized as empty regions, access to which is not treated as invalid by bounds-checking. The problem is code incorrectly assumed that heap goes right after bss, and that is not correct for two cases: 1) if we are running from `tcc -b -run`, program text data and bss will be already in malloced memory, possibly in mmaped region insead of heap, and marking memory as invalid from _end will not cover heap and probably wrongly mark correct regions. 2) if address space randomization is turned on, again heap does not start from _end, and we'll mark as invalid something else instead of malloc area. For example with the following diagnostic patch ... diff --git a/tcc.c b/tcc.c index 5dd5725..31c46e8 100644 --- a/tcc.c +++ b/tcc.c @@ -479,6 +479,8 @@ static int parse_args(TCCState s, int argc, char argv) return optind; } +extern int _etext, _edata, _end; + int main(int argc, char argv) { int i; @@ -487,6 +489,18 @@ int main(int argc, char argv) int64_t start_time = 0; const char default_file = NULL; + void brk; + + brk = sbrk(0); + + fprintf(stderr, "\n>>> TCC\n\n"); + fprintf(stderr, "etext:\t%10p\n", &_etext); + fprintf(stderr, "edata:\t%10p\n", &_edata); + fprintf(stderr, "end:\t%10p\n", &_end); + fprintf(stderr, "brk:\t%10p\n", brk); + fprintf(stderr, "stack:\t%10p\n", &brk); + + fprintf(stderr, "&errno: %p\n", &errno); s = tcc_new(); output_type = TCC_OUTPUT_EXE; diff --git a/tccrun.c b/tccrun.c index 531f46a..25ed30a 100644 --- a/tccrun.c +++ b/tccrun.c @@ -91,6 +91,8 @@ LIBTCCAPI int tcc_run(TCCState s1, int argc, char *argv) int (prog_main)(int, char *); int ret; + fprintf(stderr, "\n\ntcc_run() ...\n\n"); + if (tcc_relocate(s1, TCC_RELOCATE_AUTO) < 0) return -1; diff --git a/lib/bcheck.c b/lib/bcheck.c index ea5b233..8b26a5f 100644 --- a/lib/bcheck.c +++ b/lib/bcheck.c @@ -296,6 +326,8 @@ static void mark_invalid(unsigned long addr, unsigned long size) start = addr; end = addr + size; + fprintf(stderr, "mark_invalid %10p - %10p\n", (void )addr, (void )end); + t2_start = (start + BOUND_T3_SIZE - 1) >> BOUND_T3_BITS; if (end != 0) t2_end = end >> BOUND_T3_BITS; ... Look how memory is laid out for `tcc -b -run ...`: $ ./tcc -B. -b -DTCC_TARGET_I386 -DCONFIG_MULTIARCHDIR=\"i386-linux-gnu\" -run \ -DONE_SOURCE ./tcc.c -B. -c x.c >>> TCC etext: 0x8065477 edata: 0x8070220 end: 0x807a95c brk: 0x807b000 stack: 0xaffff0f0 &errno: 0xa7e25688 tcc_run() ... mark_invalid 0xfff80000 - (nil) mark_invalid 0xa7c31d98 - 0xafc31d98 >>> TCC etext: 0xa7c22767 edata: 0xa7c2759c end: 0xa7c31d98 brk: 0x8211000 stack: 0xafffeff0 &errno: 0xa7e25688 Runtime error: dereferencing invalid pointer ./tccpp.c:1953: at 0xa7beebdf parse_number() (included from ./libtcc.c, ./tcc.c) ./tccpp.c:3003: by 0xa7bf0708 next() (included from ./libtcc.c, ./tcc.c) ./tccgen.c:4465: by 0xa7bfe348 block() (included from ./libtcc.c, ./tcc.c) ./tccgen.c:4440: by 0xa7bfe212 block() (included from ./libtcc.c, ./tcc.c) ./tccgen.c:5529: by 0xa7c01929 gen_function() (included from ./libtcc.c, ./tcc.c) ./tccgen.c:5767: by 0xa7c02602 decl0() (included from ./libtcc.c, ./tcc.c) The second mark_invalid goes right after in-memory-compiled program's _end, and oops, that's not where malloc zone is (starts from brk), and oops again, mark_invalid covers e.g. errno. Then compiled tcc is crasshing by bcheck on errno access: 1776 static void parse_number(const char p) 1777 { 1778 int b, t, shift, frac_bits, s, exp_val, ch; ... 1951 *q = '\0'; 1952 t = toup(ch); 1953 errno = 0; The solution here is to use sbrk(0) as approximation for the program break start instead of &_end: - if we are a separately compiled program, __bound_init() runs early, and sbrk(0) should be equal or very near to start_brk (in case other constructors malloc something), or - if we are running from under `tcc -b -run`, sbrk(0) will return start of heap portion which is under this program control, and not mark as invalid earlier allocated memory. With this patch `tcc -b -run tcc.c ...` succeeds compiling above small-test program (diagnostic patch is still applied too): $ ./tcc -B. -b -DTCC_TARGET_I386 -DCONFIG_MULTIARCHDIR=\"i386-linux-gnu\" -run \ -DONE_SOURCE ./tcc.c -B. -c x.c >>> TCC etext: 0x8065477 edata: 0x8070220 end: 0x807a95c brk: 0x807b000 stack: 0xaffff0f0 &errno: 0xa7e25688 tcc_run() ... mark_invalid 0xfff80000 - (nil) mark_invalid 0x8211000 - 0x10211000 >>> TCC etext: 0xa7c22777 edata: 0xa7c275ac end: 0xa7c31da8 brk: 0x8211000 stack: 0xafffeff0 &errno: 0xa7e25688 (completes ok) but running `tcc -b -run tcc.c -run tests/tcctest.c` sigsegv's - that's the plot for the next patch.	2012-12-09 19:05:36 +04:00
Kirill Smelkov	43a11a7ed1	Make tcc work after self-compiling with bounds-check enabled For vstack Fabrice used the trick to initialize vtop to &vstack[-1], so that on first push, vtop becomes &vstack[0] and a value is also stored there - everything works. Except that when tcc is compiled with bounds-checking enabled, vstack - 1 returns INVALID_POINTER and oops... Let's workaround it with artificial 1 vstack slot which will not be used, but only serve as an indicator that pointing to &vstack[-1] is ok. Now, tcc, after being self-compiled with -b works: $ ./tcc -B. -o tccb -DONE_SOURCE -DCONFIG_MULTIARCHDIR=\"i386-linux-gnu\" tcc.c -ldl $ cd tests $ ../tcc -B.. -run tcctest.c >1 $ ../tccb -B.. -run tcctest.c >2 $ diff -u 1 2 and note, tcc's compilation speed is not affected: $ ./tcc -B. -bench -DONE_SOURCE -DCONFIG_MULTIARCHDIR=\"i386-linux-gnu\" -c tcc.c before: 8270 idents, 47221 lines, 1527730 bytes, 0.152 s, 309800 lines/s, 10.0 MB/s after: 8271 idents, 47221 lines, 1527733 bytes, 0.152 s, 310107 lines/s, 10.0 MB/s But note, that `tcc -b -run tcc` is still broken - for example it crashes on $ cat x.c double get100 () { return 100.0; } $ ./tcc -B. -b -DTCC_TARGET_I386 -DCONFIG_MULTIARCHDIR=\"i386-linux-gnu\" -run \ -DONE_SOURCE ./tcc.c -B. -c x.c Runtime error: dereferencing invalid pointer ./tccpp.c:1953: at 0xa7beebdf parse_number() (included from ./libtcc.c, ./tcc.c) ./tccpp.c:3003: by 0xa7bf0708 next() (included from ./libtcc.c, ./tcc.c) ./tccgen.c:4465: by 0xa7bfe348 block() (included from ./libtcc.c, ./tcc.c) ./tccgen.c:4440: by 0xa7bfe212 block() (included from ./libtcc.c, ./tcc.c) ./tccgen.c:5529: by 0xa7c01929 gen_function() (included from ./libtcc.c, ./tcc.c) ./tccgen.c:5767: by 0xa7c02602 decl0() (included from ./libtcc.c, ./tcc.c) that's because lib/bcheck.c runtime needs more fixes -- see next patches.	2012-12-09 18:06:09 +04:00
Thomas Preud'homme	c4a18f47a2	Detect ARM CPU version in configure Instead of guessing the ARM CPU version to compile for from tcc.h, we now detect it in configure and output the value in config.h	2012-12-04 11:17:51 +01:00
Thomas Preud'homme	05b02a5581	arm-gen.c: Invalid operator test always false Invalid operator test is always false in gen_opf for arm (found with cppcheck). This patch fixes the issue.	2012-11-28 22:26:39 +01:00
Thomas Preud'homme	8d90205fd9	Fix OABI calling convention OABI calling convention was broken since the addition of the hardfloat calling convention in commit `7f6095bfec`. This commit fixes the breakage.	2012-11-28 22:26:39 +01:00
Kirill Smelkov	168aed4984	tests: btest should only run on targets supporting bcheck After `40a54c43` (Repair bounds-checking runtime), and in particular `5d648485` (Now btest pass!) `make test` was broken on ARCH != i386, because I've changed btest to unconditionally run on all arches. But bounds-checking itsels is only supported on i386 and oops... Fix it. Reported-by: Thomas Preud'homme <robotux@celest.fr>	2012-11-24 12:54:03 +04:00
Kirill Smelkov	4744269494	Update .gitignore The following files were not ignored (produced by build on i386 with --enable-cross): arm-eabi-tcc arm-fpa-ld-tcc arm-fpa-tcc arm-vfp-tcc c67-tcc i386-win32-tcc lib/i386-win32/ lib/x86_64-win32/ x86_64-tcc x86_64-win32-tcc	2012-11-22 10:40:02 +04:00
Thomas Preud'homme	6eec931038	Only reference vfpr when available A line in gfunc_call in arm-gen.c is referencing vfpr unconditionally. Yet, this function is only available when TCC_ARM_VFP is set. While this code is only triggered when TCC_ARM_VFP, it fails at compile time. This commit fix the problem.	2012-11-21 12:21:51 +01:00
Thomas Preud'homme	15a315f4a5	Define TCC_ARM_EABI if using hardfloat ABI TCC_ARM_EABI should be defined when compiling with hardfloat calling convention. This commit rework the Makefile to distinguish between calling convention and multiarch and define TCC_ARM_EABI when hardfloat calling convention is used. The result is to first guess the calling convention and then add the multiarch triplet if necessary.	2012-11-20 11:36:13 +01:00
Thomas Preud'homme	e2212738d4	Generate PLT thumb stub only when necessary Generate PLT thumb stub for an ARM PLT entry only when at least one Thumb instruction branches to that entry. This is a rewrite of the previous patch.	2012-11-17 10:01:11 +01:00
Kirill Smelkov	ab24aaeca3	i386: We can change 'lea 0(%ebp),r' to 'mov %ebp,r' Because that mov is 1 byte shorter, look: int *func() { return __builtin_frame_address(0); } before patch: 00000000 <func>: 0: 55 push %ebp 1: 89 e5 mov %esp,%ebp 3: 81 ec 00 00 00 00 sub $0x0,%esp 9: 8d 45 00 lea 0x0(%ebp),%eax // <- here c: e9 00 00 00 00 jmp 11 <func+0x11> 11: c9 leave 12: c3 ret after patch: 00000000 <func>: 0: 55 push %ebp 1: 89 e5 mov %esp,%ebp 3: 81 ec 00 00 00 00 sub $0x0,%esp 9: 89 e8 mov %ebp,%eax // <- here b: e9 00 00 00 00 jmp 10 <func+0x10> 10: c9 leave 11: c3 ret	2012-11-16 10:22:45 +04:00
Kirill Smelkov	b2a02961b4	Add support for __builtin_frame_address(level) Continuing `d6072d37` (Add __builtin_frame_address(0)) implement __builtin_frame_address for levels greater than zero, in order for tinycc to be able to compile its own lib/bcheck.c after `cffb7af9` (lib/bcheck: Prevent __bound_local_new / __bound_local_delete from being miscompiled). I'm new to the internals, and used the most simple way to do it. Generated code is not very good for levels >= 2, compare gcc tcc level=0 mov %ebp,%eax lea 0x0(%ebp),%eax level=1 mov 0x0(%ebp),%eax mov 0x0(%ebp),%eax level=2 mov 0x0(%ebp),%eax mov 0x0(%ebp),%eax mov (%eax),%eax mov %eax,-0x10(%ebp) mov -0x10(%ebp),%eax mov (%eax),%eax level=3 mov 0x0(%ebp),%eax mov 0x0(%ebp),%eax mov (%eax),%eax mov (%eax),%ecx mov (%eax),%eax mov (%ecx),%eax But this is still an improvement and for bcheck we need level=1 for which the code is good. For the tests I had to force gcc use -O0 to not inline the functions. And -fno-omit-frame-pointer just in case. If someone knows how to improve the generated code - help is appreciated. Thanks, Kirill Cc: Michael Matz <matz@suse.de> Cc: Shinichiro Hamaji <shinichiro.hamaji@gmail.com>	2012-11-16 10:22:14 +04:00
Milutin Jovanović	e79c3533ec	-Wno-unused-result now added only on gcc >= 4.4 This option does not exist in gcc 4.3 and earlier, and it breaks the build on systems with older compilers. The makefile has been enhanced to test for the version and adds it only if a newer compiler is detected.	2012-11-14 17:45:15 -05:00
Kirill Smelkov	40a54c4399	Repair bounds-checking runtime On this weekend a thought came to me again that tinycc could be used for scripting. Only its bounds-checking mode turned out to be broken, which is a pity because bounds-checking is sometimes handy especially for quickly written scripts. Since tinycc is one of those small beautiful things, seldomly happening in our times, I couldn't resist trying to fix it. Thanks, Kirill * bcheck: Now btest pass! lib/bcheck: Prevent __bound_local_new / __bound_local_delete from being miscompiled lib/bcheck: Prevent libc_malloc/libc_free etc from being miscompiled	2012-11-14 10:50:34 +04:00
Kirill Smelkov	5d648485bd	Now btest pass! Thanks to two previous commits now btest tests pass, at least on Linux. Signed-off-by: Kirill Smelkov <kirr@navytux.spb.ru>	2012-11-13 22:23:01 +04:00
Kirill Smelkov	cffb7af9f9	lib/bcheck: Prevent __bound_local_new / __bound_local_delete from being miscompiled On i386 and gcc-4.7 I found that __bound_local_new was miscompiled - look: #ifdef __i386__ /* return the frame pointer of the caller / #define GET_CALLER_FP(fp)\ {\ unsigned long fp1;\ __asm__ __volatile__ ("movl %%ebp,%0" :"=g" (fp1));\ fp = fp1[0];\ } #endif /* called when entering a function to add all the local regions / void FASTCALL __bound_local_new(void p1) { unsigned long addr, size, fp, p = p1; GET_CALLER_FP(fp); for(;;) { addr = p[0]; if (addr == 0) break; addr += fp; size = p[1]; p += 2; __bound_new_region((void )addr, size); } } __bound_local_new: .LFB40: .cfi_startproc pushl %esi .cfi_def_cfa_offset 8 .cfi_offset 6, -8 pushl %ebx .cfi_def_cfa_offset 12 .cfi_offset 3, -12 subl $8, %esp // NOTE prologue does not touch %ebp .cfi_def_cfa_offset 20 #APP # 235 "lib/bcheck.c" 1 movl %ebp,%edx // %ebp -> fp1 # 0 "" 2 #NO_APP movl (%edx), %esi // fp1[0] -> fp movl (%eax), %edx movl %eax, %ebx testl %edx, %edx je .L167 .p2align 2,,3 .L173: movl 4(%ebx), %eax addl $8, %ebx movl %eax, 4(%esp) addl %esi, %edx movl %edx, (%esp) call __bound_new_region movl (%ebx), %edx testl %edx, %edx jne .L173 .L167: addl $8, %esp .cfi_def_cfa_offset 12 popl %ebx .cfi_restore 3 .cfi_def_cfa_offset 8 popl %esi .cfi_restore 6 .cfi_def_cfa_offset 4 ret here GET_CALLER_FP() assumed that its using function setups it's stack frame, i.e. first save, then set %ebp to stack frame start, and then it has to do perform two lookups: 1) to get current stack frame through %ebp, and 2) get caller stack frame through (%ebp). And here is the problem: gcc decided not to setup %ebp for __bound_local_new and in such case GET_CALLER_FP actually becomes GET_CALLER_CALLER_FP and oops, wrong regions are registered in bcheck tables... The solution is to stop using hand written assembly and rely on gcc's __builtin_frame_address(1) to get callers frame stack(). I think for the builtin gcc should generate correct code, independent of whether it decides or not to omit frame pointer in using function - it knows it. () judging by gcc history, __builtin_frame_address was there almost from the beginning - at least it is present in 1992 as seen from the following commit: http://gcc.gnu.org/git/?p=gcc.git;a=commit;h=be07f7bdbac76d87d3006c89855491504d5d6202 so we can rely on it being supported by all versions of gcc. In my environment the assembly of __bound_local_new changes as follows: diff --git a/bcheck0.s b/bcheck1.s index 4c02a5f..ef68918 100644 --- a/bcheck0.s +++ b/bcheck1.s @@ -1409,20 +1409,17 @@ __bound_init: __bound_local_new: .LFB40: .cfi_startproc - pushl %esi + pushl %ebp // NOTE prologue saves %ebp ... .cfi_def_cfa_offset 8 - .cfi_offset 6, -8 + .cfi_offset 5, -8 + movl %esp, %ebp // ... and reset it to local stack frame + .cfi_def_cfa_register 5 + pushl %esi pushl %ebx - .cfi_def_cfa_offset 12 - .cfi_offset 3, -12 subl $8, %esp - .cfi_def_cfa_offset 20 -#APP -# 235 "lib/bcheck.c" 1 - movl %ebp,%edx -# 0 "" 2 -#NO_APP - movl (%edx), %esi + .cfi_offset 6, -12 + .cfi_offset 3, -16 + movl 0(%ebp), %esi // stkframe -> stkframe.parent -> fp movl (%eax), %edx movl %eax, %ebx testl %edx, %edx @@ -1440,13 +1437,13 @@ __bound_local_new: jne .L173 .L167: addl $8, %esp - .cfi_def_cfa_offset 12 popl %ebx .cfi_restore 3 - .cfi_def_cfa_offset 8 popl %esi .cfi_restore 6 - .cfi_def_cfa_offset 4 + popl %ebp + .cfi_restore 5 + .cfi_def_cfa 4, 4 ret .cfi_endproc i.e. now it compiles correctly. Though I do not have x86_64 to test, my guess is that __builtin_frame_address(1) should work there too. If not - please revert only x86_64 part of the patch. Thanks. Cc: Michael Matz <matz@suse.de>	2012-11-13 22:17:58 +04:00
Kirill Smelkov	646b51833f	lib/bcheck: Prevent libc_malloc/libc_free etc from being miscompiled On i386 and gcc-4.7 I found that libc_malloc was miscompiled - look: static void libc_malloc(size_t size) { void ptr; restore_malloc_hooks(); // __malloc_hook = saved_malloc_hook ptr = malloc(size); install_malloc_hooks(); // saved_malloc_hook = __malloc_hook, __malloc_hook = __bound_malloc return ptr; } .type libc_malloc, @function libc_malloc: .LFB56: .cfi_startproc pushl %edx .cfi_def_cfa_offset 8 movl %eax, (%esp) call malloc movl $__bound_malloc, __malloc_hook movl $__bound_free, __free_hook movl $__bound_realloc, __realloc_hook movl $__bound_memalign, __memalign_hook popl %ecx .cfi_def_cfa_offset 4 ret Here gcc inlined both restore_malloc_hooks() and install_malloc_hooks() and decided that saved_malloc_hook -> __malloc_hook -> saved_malloc_hook stores are not needed and could be ommitted. Only it did not know __molloc_hook affects malloc()... So add compiler barrier to both install and restore hooks functions and be done with it - the code is now ok: diff --git a/bcheck0.s b/bcheck1.s index 5f50293..4c02a5f 100644 --- a/bcheck0.s +++ b/bcheck1.s @@ -42,8 +42,24 @@ libc_malloc: .cfi_startproc pushl %edx .cfi_def_cfa_offset 8 + movl saved_malloc_hook, %edx + movl %edx, __malloc_hook + movl saved_free_hook, %edx + movl %edx, __free_hook + movl saved_realloc_hook, %edx + movl %edx, __realloc_hook + movl saved_memalign_hook, %edx + movl %edx, __memalign_hook movl %eax, (%esp) call malloc + movl __malloc_hook, %edx + movl %edx, saved_malloc_hook + movl __free_hook, %edx + movl %edx, saved_free_hook + movl __realloc_hook, %edx + movl %edx, saved_realloc_hook + movl __memalign_hook, %edx + movl %edx, saved_memalign_hook movl $__bound_malloc, __malloc_hook movl $__bound_free, __free_hook movl $__bound_realloc, __realloc_hook For barrier I use __asm__ __volatile__ ("": : : "memory") which is used as compiler barrier by Linux kernel, and mentioned in gcc docs and in wikipedia [1]. Without this patch any program compiled with tcc -b crashes in startup because of infinite recursion in libc_malloc. [1] http://en.wikipedia.org/wiki/Memory_ordering#Compiler_memory_barrier	2012-11-13 22:17:51 +04:00
Thomas Preud'homme	1af3bca4ea	Revert "Generate PLT thumb stub only when necessary" Revert commit `891dfcdf3f` since it assumes all architectures supported by tcc have GOT offsets aligned on 2. A rework of this commit is being done since without it all PLT entries grow by 4 bytes.	2012-11-12 23:14:21 +01:00
Thomas Preud'homme	3c986eeae3	Add armv6l to ARM supported processors Add armv6l to the list of supported ARM architecture (as returned by uname -m) in ./configure	2012-11-11 20:01:01 +01:00
Thomas Preud'homme	14c99236da	Call to veneers in ARM mode Since commit `c6630ef92a`, Call to a veneer when the final symbol to be reached is thumb is made through a blx instruction. This is a mistake since veneers are ARM instructions and should thus be called with a simple bl. This commit prevent the bl -> blx conversion when a veneer is used.	2012-11-09 10:59:06 +01:00
Thomas Preud'homme	061b5799cc	Allow source fortification Source fortification now works correctly : it compiles without warning except unused result and the resulting tcc is working fine. Hence let's stop disabling source fortification and hide unused result instead.	2012-11-07 21:15:07 +01:00
Thomas Preud'homme	891dfcdf3f	Generate PLT thumb stub only when necessary Generate PLT thumb stub for an ARM PLT entry only when at least one Thumb instruction branches to that entry. Warning: To save space, this commit reuses the bit 0 of entries of got_offsets array. The GOT offset is thus saved in a 31 bit value. Make sure to divide by 2 (right shift by 1) an offset before storing it there and conversely to multiply the value by 2 (left shift by 1) before using it.	2012-11-07 20:51:33 +01:00
Thomas Preud'homme	e07802e39d	Support R_ARM_THM_JUMP24 relocation to plt Add thumb stubs switching from thumb mode to arm mode to all PLT entries so that R_ARM_THM_JUMP24 relocations to PLT entries can be satisfied.	2012-11-07 20:48:14 +01:00
Thomas Preud'homme	b0f08ace94	Create a clean target for tests2/Makefile the absence of a clean target in tests2/Makefile make the clean target in the main Makefile fails to complete. This commit create such a target which removes the only file created when tests pass successfully.	2012-11-07 14:56:37 +01:00
Thomas Preud'homme	a7f010ee8a	Honour *FLAGS everywhere Add CPPFLAGS, CFLAGS and LDFLAGS everywhere it's missing.	2012-11-06 15:20:53 +01:00
Hitoshi Mitake	5eb64357b1	forbid invalid comparison of struct Current tcc permits comparison of structs and comparison between struct and other typed values.	2012-11-05 22:34:43 +09:00
Roy Tam	943574aba5	pe: fix tcc not linking to user32 and gdi32	2012-11-02 16:59:21 +08:00
Thomas Preud'homme	034dce4f04	Enable arm hardfloat calling convention Use arm hardfloat calling convention when the system is using it (detected by searching for hardfloat multiarch directory).	2012-10-28 19:55:23 +01:00
Thomas Preud'homme	fad68c9163	Add support for R_ARM_THM_{JUMP24,CALL} relocs Add support for relocations R_ARM_THM_JUMP24 and R_ARM_THM_CALL. These are encountered with gcc when compiling for armv6 or greater with -mthumb flag and a call (conditional or not) is done.	2012-10-28 19:55:12 +01:00
Thomas Preud'homme	508df168f4	Fix commit `85f6fad3a6` Don't reset nocode_wanted with saved_nocode_wanted if it hasn't been modified (and hence saved_nocode_wanted is uninitialized).	2012-10-25 20:14:55 +02:00
Thomas Preud'homme	cf95ac399c	Error out in case of variable name clash Error out when two local variable with same name are defined in the same scope. This fixes bug #15597 in savannah's BTS.	2012-10-25 19:40:50 +02:00
Thomas Preud'homme	85f6fad3a6	Forbid VLA as static variables Currently, VLA are not forbidden for static variable. This leads to problems even if for fixed-size array when the size expression uses the ternary operator (cond ? then-value : else-value) because it is parsed as a general expression which leads to code generated in this case. This commit solve the problem by forbidding VLA for static variables. Although not required for the fix, avoiding code generation when the expression is constant would be a nice addition though.	2012-10-25 18:07:13 +02:00
Thomas Preud'homme	9966fd4eae	Only use blx if available Introduce ARM version for the target architecture in order to determine if blx instruction can be used or not. Availability of blx instruction allows for more scenarii supported in R_ARM_CALL relocation. It should also be useful when introducing support for the R_ARM_THM_CALL relocation.	2012-10-16 00:31:56 +02:00
Thomas Preud'homme	c6630ef92a	Fix R_ARM_CALL when target fonction is Thumb With R_ARM_CALL, if target function is to be entered in Thumb mode, the relocation is supposed to transform bl in blx. This is not the case actually so this commit is there to fix it.	2012-10-10 00:21:26 +02:00

1 2 3 4 5 ...

1184 Commits