From 853a498f2c234b41b8b2e7dacb371e9cec947b43 Mon Sep 17 00:00:00 2001 From: herman ten brugge Date: Tue, 8 Sep 2020 14:31:58 +0200 Subject: [PATCH] Fix boundschecking for signal/sigaction/fork The BOUNDS_CHECKING_ON/BOUNDS_CHECKING_OFF is not working for signal/sigaction/fork. The reason is that the code stops bound checking for the whole application. This result in wrong handling of __bound_local_new/__bound_local_delete and malloc/calloc/realloc/free. Consider the following code: void tst(int n) { int i, arr[n]; for (i = 0; i < n; i++) arr[i] = 0; } void *some_thread(void *dummy) { while (running) { tst(10); tst(20); } } void signal_handler(int sig) { ... } When the signal handler is called the some_thread code can be interrupted when is just registered the arr[10] data. When the signal handler is leaved the arr[10] is still registered and did not see the call to deregister arr[10] and then register arr[20]. The code resumes when tst(20) is running. This results in a bound checking error when i >= 10. To solve the above problem I changed the bound checking code to use tls (thread local storage) for the no_checking variable. This also makes it now possible to redirect signal/sigaction/fork code through the bound checking library and disable checking when a signal is running and to correct the bounds_sem for the fork child process. The BOUNDS_CHECKING_ON/BOUNDS_CHECKING_OFF is not needed any more for signal/sigaction/fork. In fact I could remove them from all my applications. The use of the tls function code slows down the code by about 10%. So if the slowdown due to bound checking was 5. It is now 5.5 times slower. For x86_64/i386 I also allowed to use __thread variable in bcheck.c when compiled with gcc with: make x86_64-libtcc1-usegcc=yes make i386-libtcc1-usegcc=yes This makes code run faster due to use of gcc and __thread variable. With the __thread variable there is no 10% slowdown. For other targets this does not work because stabs is not supported. Changes: lib/bcheck.c: - Add TRY_SEM - Add HAVE_SIGNAL/HAVE_SIGACTION/HAVE_FORK/HAVE_TLS_FUNC/HAVE_TLS_VAR - HAVE_SIGNAL: redirect signal() call if set. - HAVE_SIGACTION: redirect sigaction() call if set. - HAVE_FORK: redirect fork() call if set. - HAVE_TLS_FUNC: If target has tls function calls. - HAVE_TLS_VAR: If target has __thread tls support. - Replace all no_checking refecrences to NO_CHECKING_SET/NO_CHECKING_GET macros tcc-doc.texi: - Remove examples for signal/sigaction/fork code. - Add some explanation for signal/sigaction/fork code. - Add documentaion for __bounds_checking(). tccelf.c: - Add support for SHF_TLS tests/tests2/114_bound_signal.c: - Remove BOUNDS_CHECKING_ON/BOUNDS_CHECKING_OFF - Add code to trigger failure when tls is not working. x86_64-link.c: - Add support for R_X86_64_TLSGD/R_X86_64_TLSLD/R_X86_64_DTPOFF32/R_X86_64_TPOFF32 i386-link.c: - Add support for R_386_TLS_GD/R_386_TLS_LDM/R_386_TLS_LDO_32/R_386_TLS_LE --- i386-link.c | 75 ++++++ lib/bcheck.c | 406 +++++++++++++++++++++++++++----- tcc-doc.texi | 43 ++-- tccelf.c | 24 +- tests/tests2/114_bound_signal.c | 31 +-- x86_64-link.c | 72 ++++++ 6 files changed, 529 insertions(+), 122 deletions(-) diff --git a/i386-link.c b/i386-link.c index e4929b4..0428569 100644 --- a/i386-link.c +++ b/i386-link.c @@ -37,6 +37,10 @@ int code_reloc (int reloc_type) case R_386_GOT32X: case R_386_GLOB_DAT: case R_386_COPY: + case R_386_TLS_GD: + case R_386_TLS_LDM: + case R_386_TLS_LDO_32: + case R_386_TLS_LE: return 0; case R_386_PC16: @@ -78,6 +82,10 @@ int gotplt_entry_type (int reloc_type) case R_386_GOT32: case R_386_GOT32X: case R_386_PLT32: + case R_386_TLS_GD: + case R_386_TLS_LDM: + case R_386_TLS_LDO_32: + case R_386_TLS_LE: return ALWAYS_GOTPLT_ENTRY; } return -1; @@ -228,6 +236,73 @@ void relocate(TCCState *s1, ElfW_Rel *rel, int type, unsigned char *ptr, addr_t (to remove noise of default case). Is this true? */ return; + case R_386_TLS_GD: + { + static const unsigned char expect[] = { + /* lea 0(,%ebx,1),%eax */ + 0x8d, 0x04, 0x1d, 0x00, 0x00, 0x00, 0x00, + /* call __tls_get_addr@PLT */ + 0xe8, 0xfc, 0xff, 0xff, 0xff }; + static const unsigned char replace[] = { + /* mov %gs:0,%eax */ + 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, + /* sub 0,%eax */ + 0x81, 0xe8, 0x00, 0x00, 0x00, 0x00 }; + + if (memcmp (ptr-3, expect, sizeof(expect)) == 0) { + ElfW(Sym) *sym; + Section *sec; + int32_t x; + + memcpy(ptr-3, replace, sizeof(replace)); + rel[1].r_info = ELFW(R_INFO)(0, R_386_NONE); + sym = &((ElfW(Sym) *)symtab_section->data)[sym_index]; + sec = s1->sections[sym->st_shndx]; + x = sym->st_value - sec->sh_addr - sec->data_offset; + add32le(ptr + 5, -x); + } + else + tcc_error("unexpected R_386_TLS_GD pattern"); + } + return; + case R_386_TLS_LDM: + { + static const unsigned char expect[] = { + /* lea 0(%ebx),%eax */ + 0x8d, 0x83, 0x00, 0x00, 0x00, 0x00, + /* call __tls_get_addr@PLT */ + 0xe8, 0xfc, 0xff, 0xff, 0xff }; + static const unsigned char replace[] = { + /* mov %gs:0,%eax */ + 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, + /* nop */ + 0x90, + /* lea 0(%esi,%eiz,1),%esi */ + 0x8d, 0x74, 0x26, 0x00 }; + + if (memcmp (ptr-2, expect, sizeof(expect)) == 0) { + memcpy(ptr-2, replace, sizeof(replace)); + rel[1].r_info = ELFW(R_INFO)(0, R_386_NONE); + } + else + tcc_error("unexpected R_386_TLS_LDM pattern"); + } + return; + case R_386_TLS_LDO_32: + case R_386_TLS_LE: + { + ElfW(Sym) *sym; + Section *sec; + int32_t x; + + sym = &((ElfW(Sym) *)symtab_section->data)[sym_index]; + sec = s1->sections[sym->st_shndx]; + x = val - sec->sh_addr - sec->data_offset; + add32le(ptr, x); + } + return; + case R_386_NONE: + return; default: fprintf(stderr,"FIXME: handle reloc type %d at %x [%p] to %x\n", type, (unsigned)addr, ptr, (unsigned)val); diff --git a/lib/bcheck.c b/lib/bcheck.c index b5742e3..54ef5a7 100644 --- a/lib/bcheck.c +++ b/lib/bcheck.c @@ -69,25 +69,38 @@ #define EXIT_SEM() #define WAIT_SEM() #define POST_SEM() +#define TRY_SEM() #define HAVE_MEMALIGN (0) #define MALLOC_REDIR (0) #define HAVE_PTHREAD_CREATE (0) #define HAVE_CTYPE (0) #define HAVE_ERRNO (0) +#define HAVE_SIGNAL (0) +#define HAVE_SIGACTION (0) +#define HAVE_FORK (0) +#define HAVE_TLS_FUNC (0) +#define HAVE_TLS_VAR (0) #elif defined(_WIN32) #include +#include static CRITICAL_SECTION bounds_sem; #define INIT_SEM() InitializeCriticalSection(&bounds_sem) #define EXIT_SEM() DeleteCriticalSection(&bounds_sem) #define WAIT_SEM() EnterCriticalSection(&bounds_sem) #define POST_SEM() LeaveCriticalSection(&bounds_sem) +#define TRY_SEM() TryEnterCriticalSection(&bounds_sem) #define HAVE_MEMALIGN (0) #define MALLOC_REDIR (0) #define HAVE_PTHREAD_CREATE (0) #define HAVE_CTYPE (0) #define HAVE_ERRNO (0) +#define HAVE_SIGNAL (1) +#define HAVE_SIGACTION (0) +#define HAVE_FORK (0) +#define HAVE_TLS_FUNC (1) +#define HAVE_TLS_VAR (0) #else @@ -97,6 +110,7 @@ static CRITICAL_SECTION bounds_sem; #include #include #include +#include #ifdef __APPLE__ #include static dispatch_semaphore_t bounds_sem; @@ -104,6 +118,7 @@ static dispatch_semaphore_t bounds_sem; #define EXIT_SEM() dispatch_release(*(dispatch_object_t*)&bounds_sem) #define WAIT_SEM() if (use_sem) dispatch_semaphore_wait(bounds_sem, DISPATCH_TIME_FOREVER) #define POST_SEM() if (use_sem) dispatch_semaphore_signal(bounds_sem) +#define TRY_SEM() if (use_sem) dispatch_semaphore_wait(bounds_sem, DISPATCH_TIME_NOW) #elif 0 #include static sem_t bounds_sem; @@ -112,12 +127,15 @@ static sem_t bounds_sem; #define WAIT_SEM() if (use_sem) while (sem_wait (&bounds_sem) < 0 \ && errno == EINTR) #define POST_SEM() if (use_sem) sem_post (&bounds_sem) +#define TRY_SEM() if (use_sem) while (sem_trywait (&bounds_sem) < 0 \ + && errno == EINTR) #elif 0 static pthread_mutex_t bounds_mtx; #define INIT_SEM() pthread_mutex_init (&bounds_mtx, NULL) #define EXIT_SEM() pthread_mutex_destroy (&bounds_mtx) #define WAIT_SEM() if (use_sem) pthread_mutex_lock (&bounds_mtx) #define POST_SEM() if (use_sem) pthread_mutex_unlock (&bounds_mtx) +#define TRY_SEM() if (use_sem) pthread_mutex_trylock (&bounds_mtx) #else static pthread_spinlock_t bounds_spin; /* about 25% faster then semaphore. */ @@ -125,25 +143,52 @@ static pthread_spinlock_t bounds_spin; #define EXIT_SEM() pthread_spin_destroy (&bounds_spin) #define WAIT_SEM() if (use_sem) pthread_spin_lock (&bounds_spin) #define POST_SEM() if (use_sem) pthread_spin_unlock (&bounds_spin) +#define TRY_SEM() if (use_sem) pthread_spin_trylock (&bounds_spin) #endif #define HAVE_MEMALIGN (1) #define MALLOC_REDIR (1) #define HAVE_PTHREAD_CREATE (1) #define HAVE_CTYPE (1) #define HAVE_ERRNO (1) +#define HAVE_SIGNAL (1) +#define HAVE_SIGACTION (1) +#define HAVE_FORK (1) +#if !defined(__APPLE__) && defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) +#define HAVE_TLS_FUNC (0) +#define HAVE_TLS_VAR (1) +#else +#define HAVE_TLS_FUNC (1) +#define HAVE_TLS_VAR (0) +#endif +#endif +#if MALLOC_REDIR static void *(*malloc_redir) (size_t); static void *(*calloc_redir) (size_t, size_t); static void (*free_redir) (void *); static void *(*realloc_redir) (void *, size_t); -static void *(*memalign_redir) (size_t, size_t); -static int (*pthread_create_redir) (pthread_t *thread, - const pthread_attr_t *attr, - void *(*start_routine)(void *), void *arg); static unsigned int pool_index; static unsigned char __attribute__((aligned(16))) initial_pool[256]; static unsigned char use_sem; - +#endif +#if HAVE_MEMALIGN +static void *(*memalign_redir) (size_t, size_t); +#endif +#if HAVE_PTHREAD_CREATE +static int (*pthread_create_redir) (pthread_t *thread, + const pthread_attr_t *attr, + void *(*start_routine)(void *), void *arg); +#endif +#if HAVE_SIGNAL +typedef void (*bound_sig)(int); +static bound_sig (*signal_redir) (int signum, bound_sig handler); +#endif +#if HAVE_SIGACTION +static int (*sigaction_redir) (int signum, const struct sigaction *act, + struct sigaction *oldact); +#endif +#if HAVE_FORK +static int (*fork_redir) (void); #endif #define TCC_TYPE_NONE (0) @@ -285,7 +330,51 @@ static unsigned char print_heap; static unsigned char print_statistic; static unsigned char no_strdup; static int never_fatal; -static int no_checking = 1; +#if HAVE_TLS_FUNC +#if defined(_WIN32) +static int no_checking = 0; +static DWORD no_checking_key; +#define NO_CHECKING_CHECK() if (!p) { \ + p = (int *) LocalAlloc(LPTR, sizeof(int)); \ + if (!p) bound_alloc_error("tls malloc"); \ + *p = 0; \ + TlsSetValue(no_checking_key, p); \ + } +#define NO_CHECKING_GET() ({ int *p = TlsGetValue(no_checking_key); \ + NO_CHECKING_CHECK(); \ + *p; \ + }) +#define NO_CHECKING_SET(v) { int *p = TlsGetValue(no_checking_key); \ + NO_CHECKING_CHECK(); \ + *p = v; \ + } +#else +static int no_checking = 0; +static pthread_key_t no_checking_key; +#define NO_CHECKING_CHECK() if (!p) { \ + p = (int *) BOUND_MALLOC(sizeof(int)); \ + if (!p) bound_alloc_error("tls malloc"); \ + *p = 0; \ + pthread_setspecific(no_checking_key, p); \ + } +#define NO_CHECKING_GET() ({ int *p = pthread_getspecific(no_checking_key); \ + NO_CHECKING_CHECK(); \ + *p; \ + }) +#define NO_CHECKING_SET(v) { int *p = pthread_getspecific(no_checking_key); \ + NO_CHECKING_CHECK(); \ + *p = v; \ + } +#endif +#elif HAVE_TLS_VAR +static __thread int no_checking = 0; +#define NO_CHECKING_GET() no_checking +#define NO_CHECKING_SET(v) no_checking = v +#else +static int no_checking = 0; +#define NO_CHECKING_GET() no_checking +#define NO_CHECKING_SET(v) no_checking = v +#endif static char exec[100]; #if BOUND_STATISTIC @@ -335,41 +424,6 @@ static unsigned long long bound_splay_delete; #define INCR_COUNT_SPLAY(x) #endif -/* currently only i386/x86_64 supported. Change for other platforms */ -static void fetch_and_add(int* variable, int value) -{ -#if defined __i386__ || defined __x86_64__ - __asm__ volatile("lock; addl %0, %1" - : "+r" (value), "+m" (*variable) // input+output - : // No input-only - : "memory" - ); -#elif defined __arm__ - extern void fetch_and_add_arm(int* variable, int value); - fetch_and_add_arm(variable, value); -#elif defined __aarch64__ - extern void fetch_and_add_arm64(int* variable, int value); - fetch_and_add_arm64(variable, value); -#elif defined __riscv - extern void fetch_and_add_riscv64(int* variable, int value); - fetch_and_add_riscv64(variable, value); -#else - *variable += value; -#endif -} - -/* enable/disable checking. This can be used in signal handlers. */ -void __bounds_checking (int no_check) -{ - fetch_and_add (&no_checking, no_check); -} - -/* enable/disable checking. This can be used in signal handlers. */ -void __bound_never_fatal (int neverfatal) -{ - fetch_and_add (&never_fatal, neverfatal); -} - int tcc_backtrace(const char *fmt, ...); /* print a bound error message */ @@ -395,13 +449,51 @@ static void bound_not_found_warning(const char *file, const char *function, dprintf(stderr, "%s%s, %s(): Not found %p\n", exec, file, function, ptr); } +static void fetch_and_add(int* variable, int value) +{ +#if defined __i386__ || defined __x86_64__ + __asm__ volatile("lock; addl %0, %1" + : "+r" (value), "+m" (*variable) // input+output + : // No input-only + : "memory" + ); +#elif defined __arm__ + extern void fetch_and_add_arm(int* variable, int value); + fetch_and_add_arm(variable, value); +#elif defined __aarch64__ + extern void fetch_and_add_arm64(int* variable, int value); + fetch_and_add_arm64(variable, value); +#elif defined __riscv + extern void fetch_and_add_riscv64(int* variable, int value); + fetch_and_add_riscv64(variable, value); +#else + *variable += value; +#endif +} + +/* enable/disable checking. This can be used in signal handlers. */ +void __bounds_checking (int no_check) +{ +#if HAVE_TLS_FUNC || HAVE_TLS_VAR + NO_CHECKING_SET(NO_CHECKING_GET() + no_check); +#else + fetch_and_add (&no_checking, no_check); +#endif +} + +/* enable/disable checking. This can be used in signal handlers. */ +void __bound_never_fatal (int neverfatal) +{ + fetch_and_add (&never_fatal, neverfatal); +} + /* return '(p + offset)' for pointer arithmetic (a pointer can reach the end of a region in this case */ void * __bound_ptr_add(void *p, size_t offset) { size_t addr = (size_t)p; - if (no_checking) + if (NO_CHECKING_GET()) return p + offset; dprintf(stderr, "%s, %s(): %p 0x%lx\n", @@ -449,7 +541,7 @@ void * __bound_ptr_indir ## dsize (void *p, size_t offset) \ { \ size_t addr = (size_t)p; \ \ - if (no_checking) \ + if (NO_CHECKING_GET()) \ return p + offset; \ \ dprintf(stderr, "%s, %s(): %p 0x%lx\n", \ @@ -515,7 +607,7 @@ void FASTCALL __bound_local_new(void *p1) { size_t addr, fp, *p = p1; - if (no_checking) + if (NO_CHECKING_GET()) return; GET_CALLER_FP(fp); dprintf(stderr, "%s, %s(): p1=%p fp=%p\n", @@ -545,7 +637,7 @@ void FASTCALL __bound_local_delete(void *p1) { size_t addr, fp, *p = p1; - if (no_checking) + if (NO_CHECKING_GET()) return; GET_CALLER_FP(fp); dprintf(stderr, "%s, %s(): p1=%p fp=%p\n", @@ -624,7 +716,7 @@ void __bound_new_region(void *p, size_t size) alloca_list_type *cur; alloca_list_type *new; - if (no_checking) + if (NO_CHECKING_GET()) return; dprintf(stderr, "%s, %s(): %p, 0x%lx\n", @@ -667,7 +759,7 @@ void __bound_setjmp(jmp_buf env) jmp_list_type *jl; void *e = (void *) env; - if (no_checking == 0) { + if (NO_CHECKING_GET() == 0) { dprintf(stderr, "%s, %s(): %p\n", __FILE__, __FUNCTION__, e); WAIT_SEM (); INCR_COUNT(bound_setjmp_count); @@ -703,7 +795,7 @@ static void __bound_long_jump(jmp_buf env, int val, int sig, const char *func) void *e; BOUND_TID_TYPE tid; - if (no_checking == 0) { + if (NO_CHECKING_GET() == 0) { e = (void *)env; tid = BOUND_GET_TID; dprintf(stderr, "%s, %s(): %p\n", __FILE__, func, e); @@ -804,6 +896,17 @@ void __bound_init(size_t *p, int mode) } inited = 1; +#if HAVE_TLS_FUNC +#if defined(_WIN32) + no_checking_key = TlsAlloc(); + TlsSetValue(no_checking_key, &no_checking); +#else + pthread_key_create(&no_checking_key, NULL); + pthread_setspecific(no_checking_key, &no_checking); +#endif +#endif + NO_CHECKING_SET(1); + print_warn_ptr_add = getenv ("TCC_BOUNDS_WARN_POINTER_ADD") != NULL; print_calls = getenv ("TCC_BOUNDS_PRINT_CALLS") != NULL; print_heap = getenv ("TCC_BOUNDS_PRINT_HEAP") != NULL; @@ -845,6 +948,29 @@ void __bound_init(size_t *p, int mode) *(void **) (&pthread_create_redir) = dlsym (addr, "pthread_create"); dprintf(stderr, "%s, %s(): pthread_create_redir %p\n", __FILE__, __FUNCTION__, pthread_create_redir); + if (pthread_create_redir == NULL) + bound_alloc_error ("Cannot redirect pthread_create"); +#endif +#if HAVE_SIGNAL + *(void **) (&signal_redir) = dlsym (addr, "signal"); + dprintf(stderr, "%s, %s(): signal_redir %p\n", + __FILE__, __FUNCTION__, signal_redir); + if (signal_redir == NULL) + bound_alloc_error ("Cannot redirect signal"); +#endif +#if HAVE_SIGACTION + *(void **) (&sigaction_redir) = dlsym (addr, "sigaction"); + dprintf(stderr, "%s, %s(): sigaction_redir %p\n", + __FILE__, __FUNCTION__, sigaction_redir); + if (sigaction_redir == NULL) + bound_alloc_error ("Cannot sigaction signal"); +#endif +#if HAVE_FORK + *(void **) (&fork_redir) = dlsym (addr, "fork"); + dprintf(stderr, "%s, %s(): fork_redir %p\n", + __FILE__, __FUNCTION__, fork_redir); + if (fork_redir == NULL) + bound_alloc_error ("Cannot sigaction fork"); #endif } #endif @@ -936,7 +1062,7 @@ add_bounds: no_bounds: POST_SEM (); - no_checking = 0; + NO_CHECKING_SET(0); dprintf(stderr, "%s, %s(): end\n\n", __FILE__, __FUNCTION__); } @@ -961,8 +1087,9 @@ __bound_main_arg(int argc, char **argv, char **envp) dprintf(stderr, "%s, %s(): arg %p 0x%lx\n", __FILE__, __FUNCTION__, argv[i], (unsigned long)(strlen (argv[i]) + 1)); - dprintf(stderr, "%s, %s(): argv %p 0x%lx\n", - __FILE__, __FUNCTION__, argv, (argc + 1) * sizeof(char *)); + dprintf(stderr, "%s, %s(): argv %p %d\n", + __FILE__, __FUNCTION__, argv, + (int)((argc + 1) * sizeof(char *))); } #endif } @@ -986,8 +1113,9 @@ __bound_main_arg(int argc, char **argv, char **envp) *p, (unsigned long)(strlen (*p) + 1)); ++p; } - dprintf(stderr, "%s, %s(): environ %p 0x%lx\n", - __FILE__, __FUNCTION__, envp, (++p - envp) * sizeof(char *)); + dprintf(stderr, "%s, %s(): environ %p %d\n", + __FILE__, __FUNCTION__, envp, + (int)((++p - envp) * sizeof(char *))); } #endif } @@ -1010,9 +1138,9 @@ void __attribute__((destructor)) __bound_exit(void) } #endif - no_checking = 1; + NO_CHECKING_SET(1); - WAIT_SEM (); + TRY_SEM (); while (alloca_list) { alloca_list_type *next = alloca_list->next; @@ -1048,6 +1176,13 @@ void __attribute__((destructor)) __bound_exit(void) #endif POST_SEM (); EXIT_SEM (); +#if HAVE_TLS_FUNC +#if defined(_WIN32) + TlsFree(no_checking_key); +#else + pthread_key_delete(no_checking_key); +#endif +#endif inited = 0; if (print_statistic) { #if BOUND_STATISTIC @@ -1095,12 +1230,155 @@ void __attribute__((destructor)) __bound_exit(void) } #if HAVE_PTHREAD_CREATE +#if HAVE_TLS_FUNC +typedef struct { + void *(*start_routine) (void *); + void *arg; +} bound_thread_create_type; + +static void *bound_thread_create(void *bdata) +{ + bound_thread_create_type *data = (bound_thread_create_type *) bdata; + void *retval; + int *p = (int *) BOUND_MALLOC(sizeof(int)); + + if (!p) bound_alloc_error("bound_thread_create malloc"); + *p = 0; + pthread_setspecific(no_checking_key, p); + retval = data->start_routine(data->arg); + pthread_setspecific(no_checking_key, NULL); + BOUND_FREE (p); + BOUND_FREE (data); + return retval; +} +#endif + int pthread_create(pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg) { use_sem = 1; dprintf (stderr, "%s, %s()\n", __FILE__, __FUNCTION__); +#if HAVE_TLS_FUNC + { + bound_thread_create_type *data; + + data = (bound_thread_create_type *) BOUND_MALLOC(sizeof(bound_thread_create_type)); + data->start_routine = start_routine; + data->arg = arg; + return pthread_create_redir(thread, attr, bound_thread_create, data); + } +#else return pthread_create_redir(thread, attr, start_routine, arg); +#endif +} +#endif + +#if HAVE_SIGNAL || HAVE_SIGACTION +typedef union { +#if HAVE_SIGNAL + bound_sig signal_handler; +#endif +#if HAVE_SIGACTION + void (*sig_handler)(int); + void (*sig_sigaction)(int, siginfo_t *, void *); +#endif +} bound_sig_type; + +static unsigned char bound_sig_used[NSIG]; +static bound_sig_type bound_sig_data[NSIG]; +#endif + +#if HAVE_SIGNAL +static void signal_handler(int sig) +{ + __bounds_checking(1); + bound_sig_data[sig].signal_handler(sig); + __bounds_checking(-1); +} + +bound_sig signal(int signum, bound_sig handler) +{ + bound_sig retval; + + dprintf (stderr, "%s, %s() %d %p\n", __FILE__, __FUNCTION__, + signum, handler); + retval = signal_redir(signum, handler ? signal_handler : handler); + if (retval != SIG_ERR) { + if (bound_sig_used[signum]) + retval = bound_sig_data[signum].signal_handler; + if (handler) { + bound_sig_used[signum] = 1; + bound_sig_data[signum].signal_handler = handler; + } + } + return retval; +} +#endif + +#if HAVE_SIGACTION +static void sig_handler(int sig) +{ + __bounds_checking(1); + bound_sig_data[sig].sig_handler(sig); + __bounds_checking(-1); +} + +static void sig_sigaction(int sig, siginfo_t *info, void *ucontext) +{ + __bounds_checking(1); + bound_sig_data[sig].sig_sigaction(sig, info, ucontext); + __bounds_checking(-1); +} + +int sigaction(int signum, const struct sigaction *act, struct sigaction *oldact) +{ + int retval; + struct sigaction nact, oact; + + dprintf (stderr, "%s, %s() %d %p %p\n", __FILE__, __FUNCTION__, + signum, act, oldact); + if (act) { + nact = *act; + if (nact.sa_flags & SA_SIGINFO) + nact.sa_sigaction = sig_sigaction; + else + nact.sa_handler = sig_handler; + retval = sigaction_redir(signum, &nact, &oact); + } + else + retval = sigaction_redir(signum, act, &oact); + if (retval >= 0) { + if (bound_sig_used[signum]) { + if (oact.sa_flags & SA_SIGINFO) + oact.sa_sigaction = bound_sig_data[signum].sig_sigaction; + else + oact.sa_handler = bound_sig_data[signum].sig_handler; + } + if (oldact) { + *oldact = oact; + } + if (act) { + bound_sig_used[signum] = 1; + if (act->sa_flags & SA_SIGINFO) + bound_sig_data[signum].sig_sigaction = act->sa_sigaction; + else + bound_sig_data[signum].sig_handler = act->sa_handler; + } + } + return retval; +} +#endif + +#if HAVE_FORK +pid_t fork(void) +{ + pid_t retval = (*fork_redir)(); + + if (retval == 0) { + TRY_SEM(); + POST_SEM(); + } + return retval; } #endif @@ -1134,7 +1412,7 @@ void *__bound_malloc(size_t size, const void *caller) dprintf(stderr, "%s, %s(): %p, 0x%lx\n", __FILE__, __FUNCTION__, ptr, (unsigned long)size); - if (no_checking == 0) { + if (NO_CHECKING_GET() == 0) { WAIT_SEM (); INCR_COUNT(bound_malloc_count); @@ -1173,7 +1451,7 @@ void *__bound_memalign(size_t size, size_t align, const void *caller) dprintf(stderr, "%s, %s(): %p, 0x%lx\n", __FILE__, __FUNCTION__, ptr, (unsigned long)size); - if (no_checking == 0) { + if (NO_CHECKING_GET() == 0) { WAIT_SEM (); INCR_COUNT(bound_memalign_count); @@ -1206,7 +1484,7 @@ void __bound_free(void *ptr, const void *caller) dprintf(stderr, "%s, %s(): %p\n", __FILE__, __FUNCTION__, ptr); - if (no_checking == 0) { + if (NO_CHECKING_GET() == 0) { WAIT_SEM (); INCR_COUNT(bound_free_count); tree = splay (addr, tree); @@ -1251,7 +1529,7 @@ void *__bound_realloc(void *ptr, size_t size, const void *caller) dprintf(stderr, "%s, %s(): %p, 0x%lx\n", __FILE__, __FUNCTION__, new_ptr, (unsigned long)size); - if (no_checking == 0) { + if (NO_CHECKING_GET() == 0) { WAIT_SEM (); INCR_COUNT(bound_realloc_count); @@ -1298,7 +1576,7 @@ void *__bound_calloc(size_t nmemb, size_t size) if (ptr) { memset (ptr, 0, size); - if (no_checking == 0) { + if (NO_CHECKING_GET() == 0) { WAIT_SEM (); INCR_COUNT(bound_calloc_count); tree = splay_insert ((size_t) ptr, size ? size : size + 1, tree); @@ -1319,7 +1597,7 @@ void *__bound_mmap (void *start, size_t size, int prot, dprintf(stderr, "%s, %s(): %p, 0x%lx\n", __FILE__, __FUNCTION__, start, (unsigned long)size); result = mmap (start, size, prot, flags, fd, offset); - if (result && no_checking == 0) { + if (result && NO_CHECKING_GET() == 0) { WAIT_SEM (); INCR_COUNT(bound_mmap_count); tree = splay_insert((size_t)result, size, tree); @@ -1334,7 +1612,7 @@ int __bound_munmap (void *start, size_t size) dprintf(stderr, "%s, %s(): %p, 0x%lx\n", __FILE__, __FUNCTION__, start, (unsigned long)size); - if (start && no_checking == 0) { + if (start && NO_CHECKING_GET() == 0) { WAIT_SEM (); INCR_COUNT(bound_munmap_count); tree = splay_delete ((size_t) start, tree); @@ -1363,7 +1641,7 @@ static int check_overlap (const void *p1, size_t n1, const void *p1e = (const void *) ((const char *) p1 + n1); const void *p2e = (const void *) ((const char *) p2 + n2); - if (no_checking == 0 && n1 != 0 && n2 !=0 && + if (NO_CHECKING_GET() == 0 && n1 != 0 && n2 !=0 && ((p1 <= p2 && p1e > p2) || /* p1----p2====p1e----p2e */ (p2 <= p1 && p2e > p1))) { /* p2----p1====p2e----p1e */ bound_error("overlapping regions %p(0x%lx), %p(0x%lx) in %s", @@ -1613,7 +1891,7 @@ char *__bound_strdup(const char *s) dprintf(stderr, "%s, %s(): %p, 0x%lx\n", __FILE__, __FUNCTION__, new, (unsigned long)(p -s)); if (new) { - if (no_checking == 0 && no_strdup == 0) { + if (NO_CHECKING_GET() == 0 && no_strdup == 0) { WAIT_SEM (); tree = splay_insert((size_t)new, p - s, tree); if (tree && tree->start == (size_t) new) diff --git a/tcc-doc.texi b/tcc-doc.texi index d8cf070..1c66c01 100644 --- a/tcc-doc.texi +++ b/tcc-doc.texi @@ -949,13 +949,24 @@ executable should always be compiled with the @option{-b}. @emph{fully compatible} with unchecked code. When a pointer comes from unchecked code, it is assumed to be valid. Even very obscure C code with casts should work correctly. -@item Signal handlers are not compatible with bounds checking. The fork() -function call in a multi threaded application is also a problem. -The code below can be used to solve this. +@item Signal handlers are not compatible with bounds checking. The +bounds checking code disables checking in signal/sigaction handlers. +The fork() function call in a multi threaded application is also a problem. +The bound checking code fixes this for the child process. +@item The reason that signals and fork have problems is that we use locking +inside the bounds checking code. +Inside a signal handler we can not use locks. Also in a multi threaded +application after a fork the child process can have the lock set +by another thread. +@item The BOUNDS_CHECKING_OFF and BOUNDS_CHECKING_ON can also be used to +disable bounds checking for some code. +@item The __bounds_checking call adds a value to a thread local value. +The value starts at 0. If the value is not 0 the code is not checked +for bounds checking errors. @end itemize @example -#ifdef __BOUNDS_CHECKING_ON +#if defined(__TINYC__) && __BOUNDS_CHECKING_ON extern void __bounds_checking (int x); # define BOUNDS_CHECKING_OFF __bounds_checking(1) # define BOUNDS_CHECKING_ON __bounds_checking(-1) @@ -963,30 +974,6 @@ extern void __bounds_checking (int x); # define BOUNDS_CHECKING_OFF # define BOUNDS_CHECKING_ON #endif - -void signal_handler(int sig, void *info, void *ucontext) -@{ - BOUNDS_CHECKING_OFF; - ... signal handler code without generated bounds checking code. - BOUNDS_CHECKING_ON; -@} - -void run(const char *cmd) -@{ - switch (fork()) @{ - case 0: - BOUNDS_CHECKING_OFF; - ... - exec... - exit(1); - case -1: - ... - break; - default: - ... - break; - @} -@} @end example For more information about the ideas behind this method, see diff --git a/tccelf.c b/tccelf.c index ff83eb4..452f4eb 100644 --- a/tccelf.c +++ b/tccelf.c @@ -1876,13 +1876,13 @@ static int layout_sections(TCCState *s1, ElfW(Phdr) *phdr, int phnum, dyninf->bss_addr = dyninf->bss_size = 0; #endif - for(j = 0; j < 2; j++) { - ph->p_type = PT_LOAD; + for(j = 0; j < (phnum == 6 ? 3 : 2); j++) { + ph->p_type = j == 2 ? PT_TLS : PT_LOAD; if (j == 0) ph->p_flags = PF_R | PF_X; else ph->p_flags = PF_R | PF_W; - ph->p_align = s_align; + ph->p_align = j == 2 ? 4 : s_align; /* Decide the layout of sections loaded in memory. This must be done before program headers are filled since they contain @@ -1894,13 +1894,17 @@ static int layout_sections(TCCState *s1, ElfW(Phdr) *phdr, int phnum, s = s1->sections[i]; /* compute if section should be included */ if (j == 0) { - if ((s->sh_flags & (SHF_ALLOC | SHF_WRITE)) != + if ((s->sh_flags & (SHF_ALLOC | SHF_WRITE | SHF_TLS)) != SHF_ALLOC) continue; - } else { - if ((s->sh_flags & (SHF_ALLOC | SHF_WRITE)) != + } else if (j == 1) { + if ((s->sh_flags & (SHF_ALLOC | SHF_WRITE | SHF_TLS)) != (SHF_ALLOC | SHF_WRITE)) continue; + } else { + if ((s->sh_flags & (SHF_ALLOC | SHF_WRITE | SHF_TLS)) != + (SHF_ALLOC | SHF_WRITE | SHF_TLS)) + continue; } if (s == interp) { if (k != 0) @@ -2511,8 +2515,12 @@ static int elf_output_file(TCCState *s1, const char *filename) phnum = 3; else if (s1->static_link) phnum = 2; - else - phnum = 5; + else { + int i; + for (i = 1; i < s1->nb_sections && + !(s1->sections[i]->sh_flags & SHF_TLS); i++); + phnum = i < s1->nb_sections ? 6 : 5; + } /* allocate program segment headers */ phdr = tcc_mallocz(phnum * sizeof(ElfW(Phdr))); diff --git a/tests/tests2/114_bound_signal.c b/tests/tests2/114_bound_signal.c index 6af377d..f167a21 100644 --- a/tests/tests2/114_bound_signal.c +++ b/tests/tests2/114_bound_signal.c @@ -8,39 +8,27 @@ #include #include -/* See tcc-doc.info */ -#if defined(__TINYC__) && __BOUNDS_CHECKING_ON -#undef __attribute__ -extern void __bounds_checking (int no_check); -#define BOUNDS_CHECKING_OFF __bounds_checking(1) -#define BOUNDS_CHECKING_ON __bounds_checking(-1) -#else -#define BOUNDS_CHECKING_OFF -#define BOUNDS_CHECKING_ON -#endif - static volatile int run = 1; -static int dummy[10]; static sem_t sem; static void -add (void) +add (int n) { int i; + int arr[n]; - BOUNDS_CHECKING_OFF; - for (i = 0; i < (sizeof(dummy)/sizeof(dummy[0])); i++) { - dummy[i]++; + for (i = 0; i < n; i++) { + arr[i]++; } - memset (&dummy[0], 0, sizeof(dummy)); - BOUNDS_CHECKING_ON; + memset (&arr[0], 0, n * sizeof(int)); } static void * high_load (void *unused) { while (run) { - add(); + add(10); + add(20); } return NULL; } @@ -57,10 +45,9 @@ do_signal (void *unused) static void signal_handler(int sig) { - BOUNDS_CHECKING_OFF; - add(); + add(10); + add(20); sem_post (&sem); - BOUNDS_CHECKING_ON; } int diff --git a/x86_64-link.c b/x86_64-link.c index ec67d74..507ecb7 100644 --- a/x86_64-link.c +++ b/x86_64-link.c @@ -43,6 +43,10 @@ int code_reloc (int reloc_type) case R_X86_64_COPY: case R_X86_64_RELATIVE: case R_X86_64_GOTOFF64: + case R_X86_64_TLSGD: + case R_X86_64_TLSLD: + case R_X86_64_DTPOFF32: + case R_X86_64_TPOFF32: return 0; case R_X86_64_PC32: @@ -87,6 +91,10 @@ int gotplt_entry_type (int reloc_type) case R_X86_64_GOTOFF64: case R_X86_64_GOTPCREL: case R_X86_64_GOTPCRELX: + case R_X86_64_TLSGD: + case R_X86_64_TLSLD: + case R_X86_64_DTPOFF32: + case R_X86_64_TPOFF32: case R_X86_64_REX_GOTPCRELX: case R_X86_64_PLT32: case R_X86_64_PLTOFF64: @@ -281,6 +289,70 @@ void relocate(TCCState *s1, ElfW_Rel *rel, int type, unsigned char *ptr, addr_t case R_X86_64_GOTOFF64: add64le(ptr, val - s1->got->sh_addr); break; + case R_X86_64_TLSGD: + { + static const unsigned char expect[] = { + /* .byte 0x66; lea 0(%rip),%rdi */ + 0x66, 0x48, 0x8d, 0x3d, 0x00, 0x00, 0x00, 0x00, + /* .word 0x6666; rex64; call __tls_get_addr@PLT */ + 0x66, 0x66, 0x48, 0xe8, 0x00, 0x00, 0x00, 0x00 }; + static const unsigned char replace[] = { + /* mov %fs:0,%rax */ + 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, + /* lea -4(%rax),%rax */ + 0x48, 0x8d, 0x80, 0x00, 0x00, 0x00, 0x00 }; + + if (memcmp (ptr-4, expect, sizeof(expect)) == 0) { + ElfW(Sym) *sym; + Section *sec; + int32_t x; + + memcpy(ptr-4, replace, sizeof(replace)); + rel[1].r_info = ELFW(R_INFO)(0, R_X86_64_NONE); + sym = &((ElfW(Sym) *)symtab_section->data)[sym_index]; + sec = s1->sections[sym->st_shndx]; + x = sym->st_value - sec->sh_addr - sec->data_offset; + add32le(ptr + 8, x); + } + else + tcc_error("unexpected R_X86_64_TLSGD pattern"); + } + break; + case R_X86_64_TLSLD: + { + static const unsigned char expect[] = { + /* lea 0(%rip),%rdi */ + 0x48, 0x8d, 0x3d, 0x00, 0x00, 0x00, 0x00, + /* call __tls_get_addr@PLT */ + 0xe8, 0x00, 0x00, 0x00, 0x00 }; + static const unsigned char replace[] = { + /* data16 data16 data16 mov %fs:0,%rax */ + 0x66, 0x66, 0x66, 0x64, 0x48, 0x8b, 0x04, 0x25, + 0x00, 0x00, 0x00, 0x00 }; + + if (memcmp (ptr-3, expect, sizeof(expect)) == 0) { + memcpy(ptr-3, replace, sizeof(replace)); + rel[1].r_info = ELFW(R_INFO)(0, R_X86_64_NONE); + } + else + tcc_error("unexpected R_X86_64_TLSLD pattern"); + } + break; + case R_X86_64_DTPOFF32: + case R_X86_64_TPOFF32: + { + ElfW(Sym) *sym; + Section *sec; + int32_t x; + + sym = &((ElfW(Sym) *)symtab_section->data)[sym_index]; + sec = s1->sections[sym->st_shndx]; + x = val - sec->sh_addr - sec->data_offset; + add32le(ptr, x); + } + break; + case R_X86_64_NONE: + break; case R_X86_64_RELATIVE: #ifdef TCC_TARGET_PE add32le(ptr, val - s1->pe_imagebase);