tinycc/i386-link.c
herman ten brugge 853a498f2c Fix boundschecking for signal/sigaction/fork
The BOUNDS_CHECKING_ON/BOUNDS_CHECKING_OFF is not working for
signal/sigaction/fork. The reason is that the code stops bound checking
for the whole application. This result in wrong handling of
__bound_local_new/__bound_local_delete and malloc/calloc/realloc/free.
Consider the following code:

void tst(int n) {
  int i, arr[n];
  for (i = 0; i < n; i++) arr[i] = 0;
}

void *some_thread(void *dummy) {
  while (running) { tst(10); tst(20); }
}

void signal_handler(int sig) { ... }

When the signal handler is called the some_thread code can be interrupted when
is just registered the arr[10] data. When the signal handler is leaved the
arr[10] is still registered and did not see the call to deregister arr[10] and
then register arr[20]. The code resumes when tst(20) is running. This results
in a bound checking error when i >= 10.

To solve the above problem I changed the bound checking code to use
tls (thread local storage) for the no_checking variable.
This also makes it now possible to redirect signal/sigaction/fork code
through the bound checking library and disable checking when a signal is
running and to correct the bounds_sem for the fork child process.
The BOUNDS_CHECKING_ON/BOUNDS_CHECKING_OFF is not needed any more for
signal/sigaction/fork. In fact I could remove them from all my applications.

The use of the tls function code slows down the code by about 10%.
So if the slowdown due to bound checking was 5. It is now 5.5 times slower.

For x86_64/i386 I also allowed to use __thread variable in bcheck.c when
compiled with gcc with:
make x86_64-libtcc1-usegcc=yes
make i386-libtcc1-usegcc=yes
This makes code run faster due to use of gcc and __thread variable.
With the __thread variable there is no 10% slowdown.
For other targets this does not work because stabs is not supported.

Changes:

lib/bcheck.c:
- Add TRY_SEM
- Add HAVE_SIGNAL/HAVE_SIGACTION/HAVE_FORK/HAVE_TLS_FUNC/HAVE_TLS_VAR
  - HAVE_SIGNAL: redirect signal() call if set.
  - HAVE_SIGACTION: redirect sigaction() call if set.
  - HAVE_FORK: redirect fork() call if set.
  - HAVE_TLS_FUNC: If target has tls function calls.
  - HAVE_TLS_VAR: If target has __thread tls support.
- Replace all no_checking refecrences to NO_CHECKING_SET/NO_CHECKING_GET macros

tcc-doc.texi:
- Remove examples for signal/sigaction/fork code.
- Add some explanation for signal/sigaction/fork code.
- Add documentaion for __bounds_checking().

tccelf.c:
- Add support for SHF_TLS

tests/tests2/114_bound_signal.c:
- Remove BOUNDS_CHECKING_ON/BOUNDS_CHECKING_OFF
- Add code to trigger failure when tls is not working.

x86_64-link.c:
- Add support for R_X86_64_TLSGD/R_X86_64_TLSLD/R_X86_64_DTPOFF32/R_X86_64_TPOFF32

i386-link.c:
- Add support for R_386_TLS_GD/R_386_TLS_LDM/R_386_TLS_LDO_32/R_386_TLS_LE
2020-09-08 14:31:58 +02:00

314 lines
9.4 KiB
C

#ifdef TARGET_DEFS_ONLY
#define EM_TCC_TARGET EM_386
/* relocation type for 32 bit data relocation */
#define R_DATA_32 R_386_32
#define R_DATA_PTR R_386_32
#define R_JMP_SLOT R_386_JMP_SLOT
#define R_GLOB_DAT R_386_GLOB_DAT
#define R_COPY R_386_COPY
#define R_RELATIVE R_386_RELATIVE
#define R_NUM R_386_NUM
#define ELF_START_ADDR 0x08048000
#define ELF_PAGE_SIZE 0x1000
#define PCRELATIVE_DLLPLT 0
#define RELOCATE_DLLPLT 0
#else /* !TARGET_DEFS_ONLY */
#include "tcc.h"
#ifndef ELF_OBJ_ONLY
/* Returns 1 for a code relocation, 0 for a data relocation. For unknown
relocations, returns -1. */
int code_reloc (int reloc_type)
{
switch (reloc_type) {
case R_386_RELATIVE:
case R_386_16:
case R_386_32:
case R_386_GOTPC:
case R_386_GOTOFF:
case R_386_GOT32:
case R_386_GOT32X:
case R_386_GLOB_DAT:
case R_386_COPY:
case R_386_TLS_GD:
case R_386_TLS_LDM:
case R_386_TLS_LDO_32:
case R_386_TLS_LE:
return 0;
case R_386_PC16:
case R_386_PC32:
case R_386_PLT32:
case R_386_JMP_SLOT:
return 1;
}
return -1;
}
/* Returns an enumerator to describe whether and when the relocation needs a
GOT and/or PLT entry to be created. See tcc.h for a description of the
different values. */
int gotplt_entry_type (int reloc_type)
{
switch (reloc_type) {
case R_386_RELATIVE:
case R_386_16:
case R_386_GLOB_DAT:
case R_386_JMP_SLOT:
case R_386_COPY:
return NO_GOTPLT_ENTRY;
case R_386_32:
/* This relocations shouldn't normally need GOT or PLT
slots if it weren't for simplicity in the code generator.
See our caller for comments. */
return AUTO_GOTPLT_ENTRY;
case R_386_PC16:
case R_386_PC32:
return AUTO_GOTPLT_ENTRY;
case R_386_GOTPC:
case R_386_GOTOFF:
return BUILD_GOT_ONLY;
case R_386_GOT32:
case R_386_GOT32X:
case R_386_PLT32:
case R_386_TLS_GD:
case R_386_TLS_LDM:
case R_386_TLS_LDO_32:
case R_386_TLS_LE:
return ALWAYS_GOTPLT_ENTRY;
}
return -1;
}
ST_FUNC unsigned create_plt_entry(TCCState *s1, unsigned got_offset, struct sym_attr *attr)
{
Section *plt = s1->plt;
uint8_t *p;
int modrm;
unsigned plt_offset, relofs;
/* on i386 if we build a DLL, we add a %ebx offset */
if (s1->output_type == TCC_OUTPUT_DLL)
modrm = 0xa3;
else
modrm = 0x25;
/* empty PLT: create PLT0 entry that pushes the library identifier
(GOT + PTR_SIZE) and jumps to ld.so resolution routine
(GOT + 2 * PTR_SIZE) */
if (plt->data_offset == 0) {
p = section_ptr_add(plt, 16);
p[0] = 0xff; /* pushl got + PTR_SIZE */
p[1] = modrm + 0x10;
write32le(p + 2, PTR_SIZE);
p[6] = 0xff; /* jmp *(got + PTR_SIZE * 2) */
p[7] = modrm;
write32le(p + 8, PTR_SIZE * 2);
}
plt_offset = plt->data_offset;
/* The PLT slot refers to the relocation entry it needs via offset.
The reloc entry is created below, so its offset is the current
data_offset */
relofs = s1->got->reloc ? s1->got->reloc->data_offset : 0;
/* Jump to GOT entry where ld.so initially put the address of ip + 4 */
p = section_ptr_add(plt, 16);
p[0] = 0xff; /* jmp *(got + x) */
p[1] = modrm;
write32le(p + 2, got_offset);
p[6] = 0x68; /* push $xxx */
write32le(p + 7, relofs);
p[11] = 0xe9; /* jmp plt_start */
write32le(p + 12, -(plt->data_offset));
return plt_offset;
}
/* relocate the PLT: compute addresses and offsets in the PLT now that final
address for PLT and GOT are known (see fill_program_header) */
ST_FUNC void relocate_plt(TCCState *s1)
{
uint8_t *p, *p_end;
if (!s1->plt)
return;
p = s1->plt->data;
p_end = p + s1->plt->data_offset;
if (p < p_end) {
add32le(p + 2, s1->got->sh_addr);
add32le(p + 8, s1->got->sh_addr);
p += 16;
while (p < p_end) {
add32le(p + 2, s1->got->sh_addr);
p += 16;
}
}
}
#endif
void relocate(TCCState *s1, ElfW_Rel *rel, int type, unsigned char *ptr, addr_t addr, addr_t val)
{
int sym_index, esym_index;
sym_index = ELFW(R_SYM)(rel->r_info);
switch (type) {
case R_386_32:
if (s1->output_type == TCC_OUTPUT_DLL) {
esym_index = get_sym_attr(s1, sym_index, 0)->dyn_index;
qrel->r_offset = rel->r_offset;
if (esym_index) {
qrel->r_info = ELFW(R_INFO)(esym_index, R_386_32);
qrel++;
return;
} else {
qrel->r_info = ELFW(R_INFO)(0, R_386_RELATIVE);
qrel++;
}
}
add32le(ptr, val);
return;
case R_386_PC32:
if (s1->output_type == TCC_OUTPUT_DLL) {
/* DLL relocation */
esym_index = get_sym_attr(s1, sym_index, 0)->dyn_index;
if (esym_index) {
qrel->r_offset = rel->r_offset;
qrel->r_info = ELFW(R_INFO)(esym_index, R_386_PC32);
qrel++;
return;
}
}
add32le(ptr, val - addr);
return;
case R_386_PLT32:
add32le(ptr, val - addr);
return;
case R_386_GLOB_DAT:
case R_386_JMP_SLOT:
write32le(ptr, val);
return;
case R_386_GOTPC:
add32le(ptr, s1->got->sh_addr - addr);
return;
case R_386_GOTOFF:
add32le(ptr, val - s1->got->sh_addr);
return;
case R_386_GOT32:
case R_386_GOT32X:
/* we load the got offset */
add32le(ptr, get_sym_attr(s1, sym_index, 0)->got_offset);
return;
case R_386_16:
if (s1->output_format != TCC_OUTPUT_FORMAT_BINARY) {
output_file:
tcc_error("can only produce 16-bit binary files");
}
write16le(ptr, read16le(ptr) + val);
return;
case R_386_PC16:
if (s1->output_format != TCC_OUTPUT_FORMAT_BINARY)
goto output_file;
write16le(ptr, read16le(ptr) + val - addr);
return;
case R_386_RELATIVE:
#ifdef TCC_TARGET_PE
add32le(ptr, val - s1->pe_imagebase);
#endif
/* do nothing */
return;
case R_386_COPY:
/* This relocation must copy initialized data from the library
to the program .bss segment. Currently made like for ARM
(to remove noise of default case). Is this true?
*/
return;
case R_386_TLS_GD:
{
static const unsigned char expect[] = {
/* lea 0(,%ebx,1),%eax */
0x8d, 0x04, 0x1d, 0x00, 0x00, 0x00, 0x00,
/* call __tls_get_addr@PLT */
0xe8, 0xfc, 0xff, 0xff, 0xff };
static const unsigned char replace[] = {
/* mov %gs:0,%eax */
0x65, 0xa1, 0x00, 0x00, 0x00, 0x00,
/* sub 0,%eax */
0x81, 0xe8, 0x00, 0x00, 0x00, 0x00 };
if (memcmp (ptr-3, expect, sizeof(expect)) == 0) {
ElfW(Sym) *sym;
Section *sec;
int32_t x;
memcpy(ptr-3, replace, sizeof(replace));
rel[1].r_info = ELFW(R_INFO)(0, R_386_NONE);
sym = &((ElfW(Sym) *)symtab_section->data)[sym_index];
sec = s1->sections[sym->st_shndx];
x = sym->st_value - sec->sh_addr - sec->data_offset;
add32le(ptr + 5, -x);
}
else
tcc_error("unexpected R_386_TLS_GD pattern");
}
return;
case R_386_TLS_LDM:
{
static const unsigned char expect[] = {
/* lea 0(%ebx),%eax */
0x8d, 0x83, 0x00, 0x00, 0x00, 0x00,
/* call __tls_get_addr@PLT */
0xe8, 0xfc, 0xff, 0xff, 0xff };
static const unsigned char replace[] = {
/* mov %gs:0,%eax */
0x65, 0xa1, 0x00, 0x00, 0x00, 0x00,
/* nop */
0x90,
/* lea 0(%esi,%eiz,1),%esi */
0x8d, 0x74, 0x26, 0x00 };
if (memcmp (ptr-2, expect, sizeof(expect)) == 0) {
memcpy(ptr-2, replace, sizeof(replace));
rel[1].r_info = ELFW(R_INFO)(0, R_386_NONE);
}
else
tcc_error("unexpected R_386_TLS_LDM pattern");
}
return;
case R_386_TLS_LDO_32:
case R_386_TLS_LE:
{
ElfW(Sym) *sym;
Section *sec;
int32_t x;
sym = &((ElfW(Sym) *)symtab_section->data)[sym_index];
sec = s1->sections[sym->st_shndx];
x = val - sec->sh_addr - sec->data_offset;
add32le(ptr, x);
}
return;
case R_386_NONE:
return;
default:
fprintf(stderr,"FIXME: handle reloc type %d at %x [%p] to %x\n",
type, (unsigned)addr, ptr, (unsigned)val);
return;
}
}
#endif /* !TARGET_DEFS_ONLY */