jump optimizations

This unifies VT_CMP with VT_JMP(i) by using mostly VT_CMP
with both a positive and a negative jump target list.

Such we can delay putting the non-inverted or inverted jump
until we can see which one is nore suitable (in most cases).

example:
    if (a && b || c && d)
        e = 0;

before this patch:
   a:	8b 45 fc             	mov    0xfffffffc(%ebp),%eax
   d:	83 f8 00             	cmp    $0x0,%eax
  10:	0f 84 11 00 00 00    	je     27 <main+0x27>
  16:	8b 45 f8             	mov    0xfffffff8(%ebp),%eax
  19:	83 f8 00             	cmp    $0x0,%eax
  1c:	0f 84 05 00 00 00    	je     27 <main+0x27>
  22:	e9 22 00 00 00       	jmp    49 <main+0x49>
  27:	8b 45 f4             	mov    0xfffffff4(%ebp),%eax
  2a:	83 f8 00             	cmp    $0x0,%eax
  2d:	0f 84 11 00 00 00    	je     44 <main+0x44>
  33:	8b 45 f0             	mov    0xfffffff0(%ebp),%eax
  36:	83 f8 00             	cmp    $0x0,%eax
  39:	0f 84 05 00 00 00    	je     44 <main+0x44>
  3f:	e9 05 00 00 00       	jmp    49 <main+0x49>
  44:	e9 08 00 00 00       	jmp    51 <main+0x51>
  49:	b8 00 00 00 00       	mov    $0x0,%eax
  4e:	89 45 ec             	mov    %eax,0xffffffec(%ebp)
  51:   ...

with this patch:
   a:	8b 45 fc             	mov    0xfffffffc(%ebp),%eax
   d:	83 f8 00             	cmp    $0x0,%eax
  10:	0f 84 0c 00 00 00    	je     22 <main+0x22>
  16:	8b 45 f8             	mov    0xfffffff8(%ebp),%eax
  19:	83 f8 00             	cmp    $0x0,%eax
  1c:	0f 85 18 00 00 00    	jne    3a <main+0x3a>
  22:	8b 45 f4             	mov    0xfffffff4(%ebp),%eax
  25:	83 f8 00             	cmp    $0x0,%eax
  28:	0f 84 14 00 00 00    	je     42 <main+0x42>
  2e:	8b 45 f0             	mov    0xfffffff0(%ebp),%eax
  31:	83 f8 00             	cmp    $0x0,%eax
  34:	0f 84 08 00 00 00    	je     42 <main+0x42>
  3a:	b8 00 00 00 00       	mov    $0x0,%eax
  3f:	89 45 ec             	mov    %eax,0xffffffec(%ebp)
  42:   ...
This commit is contained in:
grischka 2019-06-22 11:45:35 +02:00
parent 1b57560502
commit 8227db3a23
9 changed files with 346 additions and 283 deletions

View File

@ -125,7 +125,7 @@ DEF-i386-win32 = -DTCC_TARGET_PE -DTCC_TARGET_I386
DEF-x86_64-win32= -DTCC_TARGET_PE -DTCC_TARGET_X86_64
DEF-x86_64-osx = -DTCC_TARGET_MACHO -DTCC_TARGET_X86_64
DEF-arm-wince = -DTCC_TARGET_PE -DTCC_TARGET_ARM -DTCC_ARM_EABI -DTCC_ARM_VFP -DTCC_ARM_HARDFLOAT
DEF-arm64 = -DTCC_TARGET_ARM64
DEF-arm64 = -DTCC_TARGET_ARM64 -Wno-format
DEF-c67 = -DTCC_TARGET_C67 -w # disable warnigs
DEF-arm-fpa = -DTCC_TARGET_ARM
DEF-arm-fpa-ld = -DTCC_TARGET_ARM -DLDOUBLE_SIZE=12

View File

@ -1419,7 +1419,7 @@ ST_FUNC void gen_fill_nops(int bytes)
}
/* generate a jump to a label */
int gjmp(int t)
ST_FUNC int gjmp(int t)
{
int r;
if (nocode_wanted)
@ -1430,51 +1430,37 @@ int gjmp(int t)
}
/* generate a jump to a fixed address */
void gjmp_addr(int a)
ST_FUNC void gjmp_addr(int a)
{
gjmp(a);
}
/* generate a test. set 'inv' to invert test. Stack entry is popped */
int gtst(int inv, int t)
ST_FUNC int gjmp_cond(int op, int t)
{
int v, r;
uint32_t op;
v = vtop->r & VT_VALMASK;
int r;
if (nocode_wanted)
return t;
r=ind;
op=mapcc(op);
op|=encbranch(r,t,1);
o(op);
return r;
}
if (nocode_wanted) {
;
} else if (v == VT_CMP) {
op=mapcc(inv?negcc(vtop->c.i):vtop->c.i);
op|=encbranch(r,t,1);
o(op);
t=r;
} else if (v == VT_JMP || v == VT_JMPI) {
if ((v & 1) == inv) {
if(!vtop->c.i)
vtop->c.i=t;
else {
uint32_t *x;
int p,lp;
if(t) {
p = vtop->c.i;
do {
p = decbranch(lp=p);
} while(p);
x = (uint32_t *)(cur_text_section->data + lp);
*x &= 0xff000000;
*x |= encbranch(lp,t,1);
}
t = vtop->c.i;
}
} else {
t = gjmp(t);
gsym(vtop->c.i);
}
ST_FUNC int gjmp_append(int n, int t)
{
uint32_t *x;
int p,lp;
if(n) {
p = n;
do {
p = decbranch(lp=p);
} while(p);
x = (uint32_t *)(cur_text_section->data + lp);
*x &= 0xff000000;
*x |= encbranch(lp,t,1);
t = n;
}
vtop--;
return t;
}
@ -1611,10 +1597,8 @@ void gen_opi(int op)
o(opc|(r<<12)|fr);
done:
vtop--;
if (op >= TOK_ULT && op <= TOK_GT) {
vtop->r = VT_CMP;
vtop->c.i = op;
}
if (op >= TOK_ULT && op <= TOK_GT)
vset_VT_CMP(op);
break;
case 2:
opc=0xE1A00000|(opc<<5);
@ -1730,9 +1714,7 @@ void gen_opf(int op)
case TOK_UGE: op=TOK_GE; break;
case TOK_UGT: op=TOK_GT; break;
}
vtop->r = VT_CMP;
vtop->c.i = op;
vset_VT_CMP(op);
return;
}
r=gv(RC_FLOAT);
@ -1934,8 +1916,9 @@ void gen_opf(int op)
} else {
r2=fpr(gv(RC_FLOAT));
}
vtop[-1].r = VT_CMP;
vtop[-1].c.i = op;
--vtop;
vset_VT_CMP(op);
++vtop;
} else {
tcc_error("unknown fp op %x!",op);
return;

View File

@ -91,6 +91,9 @@ static uint32_t fltr(int r)
return r - TREG_F(0);
}
#define dprintf(x) ((void)(tcc_state->verbose == 2 && printf x))
//#define dprintf(x)
// Add an instruction to text section:
ST_FUNC void o(unsigned int c)
{
@ -100,6 +103,7 @@ ST_FUNC void o(unsigned int c)
if (ind1 > cur_text_section->data_allocated)
section_realloc(cur_text_section, ind1);
write32le(cur_text_section->data + ind, c);
dprintf(("o %04x : %08x\n", ind, c)); //gr
ind = ind1;
}
@ -232,6 +236,7 @@ ST_FUNC void gsym_addr(int t_, int a_)
tcc_error("branch out of range");
write32le(ptr, (a - t == 4 ? 0xd503201f : // nop
0x14000000 | ((a - t) >> 2 & 0x3ffffff))); // b
dprintf((". gsym TARG=%04x ADDR=%04x\n", t, a)); //gr
t = next;
}
}
@ -440,6 +445,8 @@ static void arm64_sym(int r, Sym *sym, unsigned long addend)
}
}
static void arm64_load_cmp(int r, SValue *sv);
ST_FUNC void load(int r, SValue *sv)
{
int svtt = sv->type.t;
@ -531,6 +538,11 @@ ST_FUNC void load(int r, SValue *sv)
return;
}
if (svr == VT_CMP) {
arm64_load_cmp(r, sv);
return;
}
printf("load(%x, (%x, %x, %llx))\n", r, svtt, sv->r, (long long)svcul);
assert(0);
}
@ -1284,6 +1296,7 @@ ST_FUNC void gen_fill_nops(int bytes)
ST_FUNC int gjmp(int t)
{
int r = ind;
dprintf((". gjmp T=%04x\n", t)); //gr
if (nocode_wanted)
return t;
o(t);
@ -1295,11 +1308,57 @@ ST_FUNC void gjmp_addr(int a)
{
assert(a - ind + 0x8000000 < 0x10000000);
o(0x14000000 | ((a - ind) >> 2 & 0x3ffffff));
dprintf((". gjmp_addr T=%04x\n", a)); //gr
}
ST_FUNC int gtst(int inv, int t)
ST_FUNC int gjmp_append(int n, int t)
{
void *p;
/* insert vtop->c jump list in t */
if (n) {
uint32_t n1 = n, n2;
while ((n2 = read32le(p = cur_text_section->data + n1)))
n1 = n2;
write32le(p, t);
t = n;
}
return t;
}
void arm64_vset_VT_CMP(int op)
{
if (op >= TOK_ULT && op <= TOK_GT) {
vtop->cmp_r = vtop->r;
vset_VT_CMP(0x80);
dprintf((". set VT_CMP OP(%s) R=%x\n", get_tok_str(op, 0), vtop->cmp_r));
}
}
static void arm64_gen_opil(int op, uint32_t l);
static void arm64_load_cmp(int r, SValue *sv)
{
sv->r = sv->cmp_r;
dprintf((". load VT_CMP OP(%x), R=%x/%x\n", (int)sv->c.i, sv->r, r));
if (sv->c.i & 1) {
vpushi(1);
arm64_gen_opil('^', 0);
}
if (r != sv->r) {
load(r, sv);
sv->r = r;
}
dprintf((". load VT_CMP done\n")); //gr
}
ST_FUNC int gjmp_cond(int op, int t)
{
int bt = vtop->type.t & VT_BTYPE;
int inv = op & 1;
vtop->r = vtop->cmp_r;
dprintf((". gjmp_cond OP(%x) R=%x T=%04x\n", op, vtop->r, t)); //gr
if (bt == VT_LDOUBLE) {
uint32_t a, b, f = fltr(gv(RC_FLOAT));
a = get_reg(RC_INT);
@ -1324,7 +1383,6 @@ ST_FUNC int gtst(int inv, int t)
uint32_t a = intr(gv(RC_INT));
o(0x34000040 | a | !!inv << 24 | ll << 31); // cbz/cbnz wA,.+8
}
--vtop;
return gjmp(t);
}
@ -1553,11 +1611,13 @@ static void arm64_gen_opil(int op, uint32_t l)
ST_FUNC void gen_opi(int op)
{
arm64_gen_opil(op, 0);
arm64_vset_VT_CMP(op);
}
ST_FUNC void gen_opl(int op)
{
arm64_gen_opil(op, 1);
arm64_vset_VT_CMP(op);
}
ST_FUNC void gen_opf(int op)
@ -1657,6 +1717,7 @@ ST_FUNC void gen_opf(int op)
default:
assert(0);
}
arm64_vset_VT_CMP(op);
}
// Generate sign extension from 32 to 64 bits:

View File

@ -2072,15 +2072,13 @@ void gjmp_addr(int a)
}
/* generate a test. set 'inv' to invert test. Stack entry is popped */
int gtst(int inv, int t)
ST_FUNC int gjmp_cond(int op, int t)
{
int ind1, n;
int v, *p;
int ind1;
int inv = op & 1;
if (nocode_wanted)
return t;
v = vtop->r & VT_VALMASK;
if (nocode_wanted) {
;
} else if (v == VT_CMP) {
/* fast case : can jump directly since flags are set */
// C67 uses B2 sort of as flags register
ind1 = ind;
@ -2098,16 +2096,18 @@ int gtst(int inv, int t)
C67_NOP(5);
t = ind1; //return where we need to patch
} else if (v == VT_JMP || v == VT_JMPI) {
/* && or || optimization */
if ((v & 1) == inv) {
return t;
}
ST_FUNC int gjmp_append(int n0, int t)
{
if (n0) {
int n = n0, *p;
/* insert vtop->c jump list in t */
// I guess the idea is to traverse to the
// null at the end of the list and store t
// there
n = vtop->c.i;
while (n != 0) {
p = (int *) (cur_text_section->data + n);
@ -2117,14 +2117,8 @@ int gtst(int inv, int t)
}
*p |= (t & 0xffff) << 7;
*(p + 1) |= ((t >> 16) & 0xffff) << 7;
t = vtop->c.i;
} else {
t = gjmp(t);
gsym(vtop->c.i);
}
t = n0;
}
vtop--;
return t;
}
@ -2200,10 +2194,8 @@ void gen_opi(int op)
ALWAYS_ASSERT(FALSE);
vtop--;
if (op >= TOK_ULT && op <= TOK_GT) {
vtop->r = VT_CMP;
vtop->c.i = op;
}
if (op >= TOK_ULT && op <= TOK_GT)
vset_VT_CMP(0x80);
break;
case '-':
case TOK_SUBC1: /* sub with carry generation */
@ -2359,7 +2351,7 @@ void gen_opf(int op)
} else {
ALWAYS_ASSERT(FALSE);
}
vtop->r = VT_CMP; // tell TCC that result is in "flags" actually B2
vset_VT_CMP(0x80);
} else {
if (op == '+') {
if ((ft & VT_BTYPE) == VT_DOUBLE) {

View File

@ -261,10 +261,10 @@ ST_FUNC void load(int r, SValue *sv)
o(0xe8 + r); /* mov %ebp, r */
}
} else if (v == VT_CMP) {
oad(0xb8 + r, 0); /* mov $0, r */
o(0x0f); /* setxx %br */
o(fc);
o(0xc0 + r);
o(0xc0b60f + r * 0x90000); /* movzbl %al, %eax */
} else if (v == VT_JMP || v == VT_JMPI) {
t = v & 1;
oad(0xb8 + r, t); /* mov $1, r */
@ -692,63 +692,39 @@ ST_FUNC void gjmp_addr(int a)
}
}
ST_FUNC void gtst_addr(int inv, int a)
#if 0
/* generate a jump to a fixed address */
ST_FUNC void gjmp_cond_addr(int a, int op)
{
int v = vtop->r & VT_VALMASK;
if (v == VT_CMP) {
inv ^= (vtop--)->c.i;
a -= ind + 2;
if (a == (char)a) {
g(inv - 32);
g(a);
} else {
g(0x0f);
oad(inv - 16, a - 4);
}
} else if ((v & ~1) == VT_JMP) {
if ((v & 1) != inv) {
gjmp_addr(a);
gsym(vtop->c.i);
} else {
gsym(vtop->c.i);
o(0x05eb);
gjmp_addr(a);
}
vtop--;
}
int r = a - ind - 2;
if (r == (char)r)
g(op - 32), g(r);
else
g(0x0f), gjmp2(op - 16, r - 4);
}
#endif
/* generate a test. set 'inv' to invert test. Stack entry is popped */
ST_FUNC int gtst(int inv, int t)
ST_FUNC int gjmp_append(int n, int t)
{
int v = vtop->r & VT_VALMASK;
if (nocode_wanted) {
;
} else if (v == VT_CMP) {
/* fast case : can jump directly since flags are set */
g(0x0f);
t = gjmp2((vtop->c.i - 16) ^ inv, t);
} else if (v == VT_JMP || v == VT_JMPI) {
/* && or || optimization */
if ((v & 1) == inv) {
/* insert vtop->c jump list in t */
uint32_t n1, n = vtop->c.i;
if (n) {
while ((n1 = read32le(cur_text_section->data + n)))
n = n1;
write32le(cur_text_section->data + n, t);
t = vtop->c.i;
}
} else {
t = gjmp(t);
gsym(vtop->c.i);
}
void *p;
/* insert vtop->c jump list in t */
if (n) {
uint32_t n1 = n, n2;
while ((n2 = read32le(p = cur_text_section->data + n1)))
n1 = n2;
write32le(p, t);
t = n;
}
vtop--;
return t;
}
/* generate an integer binary operation */
ST_FUNC int gjmp_cond(int op, int t)
{
g(0x0f);
t = gjmp2(op - 16, t);
return t;
}
ST_FUNC void gen_opi(int op)
{
int r, fr, opc, c;
@ -766,10 +742,9 @@ ST_FUNC void gen_opi(int op)
c = vtop->c.i;
if (c == (char)c) {
/* generate inc and dec for smaller code */
if (c==1 && opc==0 && op != TOK_ADDC1) {
o (0x40 | r); // inc
} else if (c==1 && opc==5 && op != TOK_SUBC1) {
o (0x48 | r); // dec
if ((c == 1 || c == -1) && (op == '+' || op == '-')) {
opc = (c == 1) ^ (op == '+');
o (0x40 | (opc << 3) | r); // inc,dec
} else {
o(0x83);
o(0xc0 | (opc << 3) | r);
@ -787,10 +762,8 @@ ST_FUNC void gen_opi(int op)
o(0xc0 + r + fr * 8);
}
vtop--;
if (op >= TOK_ULT && op <= TOK_GT) {
vtop->r = VT_CMP;
vtop->c.i = op;
}
if (op >= TOK_ULT && op <= TOK_GT)
vset_VT_CMP(op);
break;
case '-':
case TOK_SUBC1: /* sub with carry generation */
@ -948,8 +921,7 @@ ST_FUNC void gen_opf(int op)
op = TOK_EQ;
}
vtop--;
vtop->r = VT_CMP;
vtop->c.i = op;
vset_VT_CMP(op);
} else {
/* no memory reference possible for long double operations */
if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {

21
tcc.h
View File

@ -430,9 +430,15 @@ typedef struct SValue {
unsigned short r; /* register + flags */
unsigned short r2; /* second register, used for 'long long'
type. If not used, set to VT_CONST */
CValue c; /* constant, if VT_CONST */
struct Sym *sym; /* symbol, if (VT_SYM | VT_CONST), or if
result of unary() for an identifier. */
union {
struct { int jtrue, jfalse; }; /* forward jmps */
CValue c; /* constant, if VT_CONST */
};
union {
struct { unsigned short cmp_op, cmp_r; }; /* VT_CMP operation */
struct Sym *sym; /* symbol, if (VT_SYM | VT_CONST), or if */
}; /* result of unary() for an identifier. */
} SValue;
/* symbol attributes */
@ -1322,6 +1328,7 @@ ST_FUNC ElfSym *elfsym(Sym *);
ST_FUNC void update_storage(Sym *sym);
ST_FUNC Sym *external_global_sym(int v, CType *type);
ST_FUNC void vset(CType *type, int r, int v);
ST_FUNC void vset_VT_CMP(int op);
ST_FUNC void vswap(void);
ST_FUNC void vpush_global_sym(CType *type, int v);
ST_FUNC void vrote(SValue *e, int n);
@ -1490,12 +1497,8 @@ ST_FUNC void gfunc_epilog(void);
ST_FUNC void gen_fill_nops(int);
ST_FUNC int gjmp(int t);
ST_FUNC void gjmp_addr(int a);
ST_FUNC int gtst(int inv, int t);
#if defined TCC_TARGET_I386 || defined TCC_TARGET_X86_64
ST_FUNC void gtst_addr(int inv, int a);
#else
#define gtst_addr(inv, a) gsym_addr(gtst(inv, 0), a)
#endif
ST_FUNC int gjmp_cond(int op, int t);
ST_FUNC int gjmp_append(int n, int t);
ST_FUNC void gen_opi(int op);
ST_FUNC void gen_opf(int op);
ST_FUNC void gen_cvt_ftoi(int t);

228
tccgen.c
View File

@ -681,13 +681,8 @@ ST_FUNC void sym_pop(Sym **ptop, Sym *b, int keep)
}
/* ------------------------------------------------------------------------- */
static void vsetc(CType *type, int r, CValue *vc)
static void vcheck_cmp(void)
{
int v;
if (vtop >= vstack + (VSTACK_SIZE - 1))
tcc_error("memory full (vstack)");
/* cannot let cpu flags if other instruction are generated. Also
avoid leaving VT_JMP anywhere except on the top of the stack
because it would complicate the code generator.
@ -698,15 +693,17 @@ static void vsetc(CType *type, int r, CValue *vc)
as their value might still be used for real. All values
we push under nocode_wanted will eventually be popped
again, so that the VT_CMP/VT_JMP value will be in vtop
when code is unsuppressed again.
when code is unsuppressed again. */
Same logic below in vswap(); */
if (vtop >= vstack && !nocode_wanted) {
v = vtop->r & VT_VALMASK;
if (v == VT_CMP || (v & ~1) == VT_JMP)
gv(RC_INT);
}
if (vtop->r == VT_CMP && !nocode_wanted)
gv(RC_INT);
}
static void vsetc(CType *type, int r, CValue *vc)
{
if (vtop >= vstack + (VSTACK_SIZE - 1))
tcc_error("memory full (vstack)");
vcheck_cmp();
vtop++;
vtop->type = *type;
vtop->r = r;
@ -718,12 +715,8 @@ static void vsetc(CType *type, int r, CValue *vc)
ST_FUNC void vswap(void)
{
SValue tmp;
/* cannot vswap cpu flags. See comment at vsetc() above */
if (vtop >= vstack && !nocode_wanted) {
int v = vtop->r & VT_VALMASK;
if (v == VT_CMP || (v & ~1) == VT_JMP)
gv(RC_INT);
}
vcheck_cmp();
tmp = vtop[0];
vtop[0] = vtop[-1];
vtop[-1] = tmp;
@ -740,9 +733,10 @@ ST_FUNC void vpop(void)
o(0xd8dd); /* fstp %st(0) */
} else
#endif
if (v == VT_JMP || v == VT_JMPI) {
if (v == VT_CMP) {
/* need to put correct jump if && or || without test */
gsym(vtop->c.i);
gsym(vtop->jtrue);
gsym(vtop->jfalse);
}
vtop--;
}
@ -823,6 +817,7 @@ ST_FUNC void vrotb(int n)
int i;
SValue tmp;
vcheck_cmp();
tmp = vtop[-n + 1];
for(i=-n+1;i!=0;i++)
vtop[i] = vtop[i+1];
@ -837,6 +832,7 @@ ST_FUNC void vrote(SValue *e, int n)
int i;
SValue tmp;
vcheck_cmp();
tmp = *e;
for(i = 0;i < n - 1; i++)
e[-i] = e[-i - 1];
@ -851,6 +847,75 @@ ST_FUNC void vrott(int n)
vrote(vtop, n);
}
/* ------------------------------------------------------------------------- */
/* vtop->r = VT_CMP means CPU-flags have been set from comparison or test. */
/* called from generators to set the result from relational ops */
ST_FUNC void vset_VT_CMP(int op)
{
vtop->r = VT_CMP;
vtop->cmp_op = op;
vtop->jfalse = 0;
vtop->jtrue = 0;
}
/* called once before asking generators to load VT_CMP to a register */
static void vset_VT_JMP(void)
{
int op = vtop->cmp_op;
if (vtop->jtrue || vtop->jfalse) {
/* we need to jump to 'mov $0,%R' or 'mov $1,%R' */
int inv = op & (op < 2); /* small optimization */
vseti(VT_JMP+inv, gvtst(inv, 0));
} else {
/* otherwise convert flags (rsp. 0/1) to register */
vtop->c.i = op;
if (op < 2) /* doesn't seem to happen */
vtop->r = VT_CONST;
}
}
/* Set CPU Flags, doesn't yet jump */
static void gvtst_set(int inv, int t)
{
int *p;
if (vtop->r != VT_CMP) {
vpushi(0);
gen_op(TOK_NE);
if (vtop->r != VT_CMP) /* must be VT_CONST then */
vset_VT_CMP(vtop->c.i != 0);
}
p = inv ? &vtop->jfalse : &vtop->jtrue;
*p = gjmp_append(*p, t);
}
/* Generate value test
*
* Generate a test for any value (jump, comparison and integers) */
static int gvtst(int inv, int t)
{
int op, u, x;
gvtst_set(inv, t);
t = vtop->jtrue, u = vtop->jfalse;
if (inv)
x = u, u = t, t = x;
op = vtop->cmp_op;
/* jump to the wanted target */
if (op > 1)
t = gjmp_cond(op ^ inv, t);
else if (op != inv)
t = gjmp(t);
/* resolve complementary jumps to here */
gsym(u);
vtop--;
return t;
}
/* ------------------------------------------------------------------------- */
/* push a symbol value of TYPE */
static inline void vpushsym(CType *type, Sym *sym)
{
@ -1591,6 +1656,8 @@ ST_FUNC int gv(int rc)
/* restore wanted type */
vtop->type.t = t1;
} else {
if (vtop->r == VT_CMP)
vset_VT_JMP();
/* one register type load */
load(r, vtop);
}
@ -1608,13 +1675,10 @@ ST_FUNC int gv(int rc)
/* generate vtop[-1] and vtop[0] in resp. classes rc1 and rc2 */
ST_FUNC void gv2(int rc1, int rc2)
{
int v;
/* generate more generic register first. But VT_JMP or VT_CMP
values must be generated first in all cases to avoid possible
reload errors */
v = vtop[0].r & VT_VALMASK;
if (v != VT_CMP && (v & ~1) != VT_JMP && rc1 <= rc2) {
if (vtop->r != VT_CMP && rc1 <= rc2) {
vswap();
gv(rc1);
vswap();
@ -1749,26 +1813,6 @@ static void gv_dup(void)
}
}
/* Generate value test
*
* Generate a test for any value (jump, comparison and integers) */
ST_FUNC int gvtst(int inv, int t)
{
int v = vtop->r & VT_VALMASK;
if (v != VT_CMP && v != VT_JMP && v != VT_JMPI) {
vpushi(0);
gen_op(TOK_NE);
}
if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
/* constant jmp optimization */
if ((vtop->c.i != 0) != inv)
t = gjmp(t);
vtop--;
return t;
}
return gtst(inv, t);
}
#if PTR_SIZE == 4
/* generate CPU independent (unsigned) long long operations */
static void gen_opl(int op)
@ -1974,8 +2018,8 @@ static void gen_opl(int op)
a = gvtst(1, 0);
if (op != TOK_EQ) {
/* generate non equal test */
vpushi(TOK_NE);
vtop->r = VT_CMP;
vpushi(0);
vset_VT_CMP(TOK_NE);
b = gvtst(0, 0);
}
}
@ -1990,9 +2034,12 @@ static void gen_opl(int op)
else if (op1 == TOK_GE)
op1 = TOK_UGE;
gen_op(op1);
a = gvtst(1, a);
gsym(b);
vseti(VT_JMPI, a);
#if 0//def TCC_TARGET_I386
if (op == TOK_NE) { gsym(b); break; }
if (op == TOK_EQ) { gsym(a); break; }
#endif
gvtst_set(1, a);
gvtst_set(0, b);
break;
}
}
@ -5001,11 +5048,12 @@ ST_FUNC void unary(void)
if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
gen_cast_s(VT_BOOL);
vtop->c.i = !vtop->c.i;
} else if ((vtop->r & VT_VALMASK) == VT_CMP)
vtop->c.i ^= 1;
else {
save_regs(1);
vseti(VT_JMP, gvtst(1, 0));
} else if (vtop->r == VT_CMP) {
vtop->cmp_op ^= 1;
n = vtop->jfalse, vtop->jfalse = vtop->jtrue, vtop->jtrue = n;
} else {
vpushi(0);
gen_op(TOK_EQ);
}
break;
case '~':
@ -5035,7 +5083,9 @@ ST_FUNC void unary(void)
next();
in_sizeof++;
expr_type(&type, unary); /* Perform a in_sizeof = 0; */
s = vtop[1].sym; /* hack: accessing previous vtop */
s = NULL;
if (vtop[1].r & VT_SYM)
s = vtop[1].sym; /* hack: accessing previous vtop */
size = type_size(&type, &align);
if (s && s->a.aligned)
align = 1 << (s->a.aligned - 1);
@ -5653,7 +5703,7 @@ static void expr_landor(void(*e_fn)(void), int e_op, int i)
gsym(t);
nocode_wanted -= f;
} else {
vseti(VT_JMP + i, gvtst(i, t));
gvtst_set(i, t);
}
break;
}
@ -5696,6 +5746,16 @@ static int condition_3way(void)
return c;
}
static int is_cond_bool(SValue *sv)
{
if ((sv->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST
&& (sv->type.t & VT_BTYPE) == VT_INT)
return (unsigned)sv->c.i < 2;
if (sv->r == VT_CMP)
return 1;
return 0;
}
static void expr_cond(void)
{
int tt, u, r1, r2, rc, t1, t2, bt1, bt2, islv, c, g;
@ -5741,6 +5801,12 @@ static void expr_cond(void)
if (!g)
gexpr();
if (c < 0 && vtop->r == VT_CMP) {
t1 = gvtst(0, 0);
vpushi(0);
gvtst_set(0, t1);
}
if ((vtop->type.t & VT_BTYPE) == VT_FUNC)
mk_pointer(&vtop->type);
type1 = vtop->type;
@ -5761,6 +5827,22 @@ static void expr_cond(void)
skip(':');
expr_cond();
if (c < 0 && is_cond_bool(vtop) && is_cond_bool(&sv)) {
if (sv.r == VT_CMP) {
t1 = sv.jtrue;
t2 = u;
} else {
t1 = gvtst(0, 0);
t2 = gjmp(0);
gsym(u);
vpushv(&sv);
}
gvtst_set(0, t1);
gvtst_set(1, t2);
nocode_wanted = ncw_prev;
// tcc_warning("two conditions expr_cond");
return;
}
if ((vtop->type.t & VT_BTYPE) == VT_FUNC)
mk_pointer(&vtop->type);
@ -6059,12 +6141,16 @@ static int case_cmp(const void *pa, const void *pb)
return a < b ? -1 : a > b;
}
static void gtst_addr(int t, int a)
{
gsym_addr(gvtst(0, t), a);
}
static void gcase(struct case_t **base, int len, int *bsym)
{
struct case_t *p;
int e;
int ll = (vtop->type.t & VT_BTYPE) == VT_LLONG;
gv(RC_INT);
while (len > 4) {
/* binary search */
p = base[len/2];
@ -6074,7 +6160,7 @@ static void gcase(struct case_t **base, int len, int *bsym)
else
vpushi(p->v2);
gen_op(TOK_LE);
e = gtst(1, 0);
e = gvtst(1, 0);
vdup();
if (ll)
vpushll(p->v1);
@ -6084,10 +6170,6 @@ static void gcase(struct case_t **base, int len, int *bsym)
gtst_addr(0, p->sym); /* v1 <= x <= v2 */
/* x < v1 */
gcase(base, len/2, bsym);
if (cur_switch->def_sym)
gjmp_addr(cur_switch->def_sym);
else
*bsym = gjmp(*bsym);
/* x > v2 */
gsym(e);
e = len/2 + 1;
@ -6106,7 +6188,7 @@ static void gcase(struct case_t **base, int len, int *bsym)
gtst_addr(0, p->sym);
} else {
gen_op(TOK_LE);
e = gtst(1, 0);
e = gvtst(1, 0);
vdup();
if (ll)
vpushll(p->v1);
@ -6117,6 +6199,7 @@ static void gcase(struct case_t **base, int len, int *bsym)
gsym(e);
}
}
*bsym = gjmp(*bsym);
}
/* call 'func' for each __attribute__((cleanup(func))) */
@ -6367,15 +6450,17 @@ static void block(int *bsym, Sym *bcl, int *csym, Sym *ccl, int is_expr)
gexpr();
skip(')');
switchval = *vtop--;
a = 0;
b = gjmp(0); /* jump to first case */
sw.p = NULL; sw.n = 0; sw.def_sym = 0;
saved = cur_switch;
cur_switch = &sw;
a = 0;
b = gjmp(0); /* jump to first case */
block(&a, current_cleanups, csym, ccl, 0);
a = gjmp(a); /* add implicit break */
/* case lookup */
gsym(b);
qsort(sw.p, sw.n, sizeof(void*), case_cmp);
for (b = 1; b < sw.n; b++)
if (sw.p[b - 1]->v2 >= sw.p[b]->v1)
@ -6385,14 +6470,17 @@ static void block(int *bsym, Sym *bcl, int *csym, Sym *ccl, int is_expr)
if ((switchval.type.t & VT_BTYPE) == VT_LLONG)
switchval.type.t &= ~VT_UNSIGNED;
vpushv(&switchval);
gcase(sw.p, sw.n, &a);
gv(RC_INT);
d = 0, gcase(sw.p, sw.n, &d);
vpop();
if (sw.def_sym)
gjmp_addr(sw.def_sym);
dynarray_reset(&sw.p, &sw.n);
cur_switch = saved;
gsym_addr(d, sw.def_sym);
else
gsym(d);
/* break label */
gsym(a);
dynarray_reset(&sw.p, &sw.n);
cur_switch = saved;
} else if (t == TOK_CASE) {
struct case_t *cr = tcc_malloc(sizeof(struct case_t));

View File

@ -3777,6 +3777,7 @@ void math_cmp_test(void)
double one = 1.0;
double two = 2.0;
int comp = 0;
int v;
#define bug(a,b,op,iop,part) printf("Test broken: %s %s %s %s %d\n", #a, #b, #op, #iop, part)
/* This asserts that "a op b" is _not_ true, but "a iop b" is true.
@ -3798,7 +3799,8 @@ void math_cmp_test(void)
if ((a iop b) || comp) \
; \
else \
bug (a,b,op,iop,5);
bug (a,b,op,iop,5); \
if (v = !(a op b), !v) bug(a,b,op,iop,7);
/* Equality tests. */
FCMP(nan, nan, ==, !=, 0);

View File

@ -471,22 +471,22 @@ void load(int r, SValue *sv)
orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
gen_modrm(r, VT_LOCAL, sv->sym, fc);
} else if (v == VT_CMP) {
orex(0,r,0,0);
if ((fc & ~0x100) != TOK_NE)
oad(0xb8 + REG_VALUE(r), 0); /* mov $0, r */
else
oad(0xb8 + REG_VALUE(r), 1); /* mov $1, r */
if (fc & 0x100)
{
v = vtop->cmp_r;
fc &= ~0x100;
/* This was a float compare. If the parity bit is
set the result was unordered, meaning false for everything
except TOK_NE, and true for TOK_NE. */
fc &= ~0x100;
o(0x037a + (REX_BASE(r) << 8));
}
orex(0, r, 0, 0xb0 + REG_VALUE(r)); /* mov $0/1,%al */
g(v ^ fc ^ (v == TOK_NE));
o(0x037a + (REX_BASE(r) << 8));
}
orex(0,r,0, 0x0f); /* setxx %br */
o(fc);
o(0xc0 + REG_VALUE(r));
orex(0,r,0, 0x0f);
o(0xc0b6 + REG_VALUE(r) * 0x900); /* movzbl %al, %eax */
} else if (v == VT_JMP || v == VT_JMPI) {
t = v & 1;
orex(0,r,0,0);
@ -1666,42 +1666,23 @@ void gjmp_addr(int a)
}
}
ST_FUNC void gtst_addr(int inv, int a)
ST_FUNC int gjmp_append(int n, int t)
{
int v = vtop->r & VT_VALMASK;
if (v == VT_CMP) {
inv ^= (vtop--)->c.i;
a -= ind + 2;
if (a == (char)a) {
g(inv - 32);
g(a);
} else {
g(0x0f);
oad(inv - 16, a - 4);
}
} else if ((v & ~1) == VT_JMP) {
if ((v & 1) != inv) {
gjmp_addr(a);
gsym(vtop->c.i);
} else {
gsym(vtop->c.i);
o(0x05eb);
gjmp_addr(a);
}
vtop--;
void *p;
/* insert vtop->c jump list in t */
if (n) {
uint32_t n1 = n, n2;
while ((n2 = read32le(p = cur_text_section->data + n1)))
n1 = n2;
write32le(p, t);
t = n;
}
return t;
}
/* generate a test. set 'inv' to invert test. Stack entry is popped */
ST_FUNC int gtst(int inv, int t)
ST_FUNC int gjmp_cond(int op, int t)
{
int v = vtop->r & VT_VALMASK;
if (nocode_wanted) {
;
} else if (v == VT_CMP) {
/* fast case : can jump directly since flags are set */
if (vtop->c.i & 0x100)
if (op & 0x100)
{
/* This was a float compare. If the parity flag is set
the result was unordered. For anything except != this
@ -1710,9 +1691,10 @@ ST_FUNC int gtst(int inv, int t)
Take care about inverting the test. We need to jump
to our target if the result was unordered and test wasn't NE,
otherwise if unordered we don't want to jump. */
vtop->c.i &= ~0x100;
if (inv == (vtop->c.i == TOK_NE))
o(0x067a); /* jp +6 */
int v = vtop->cmp_r;
op &= ~0x100;
if (op ^ v ^ (v != TOK_NE))
o(0x067a); /* jp +6 */
else
{
g(0x0f);
@ -1720,25 +1702,8 @@ ST_FUNC int gtst(int inv, int t)
}
}
g(0x0f);
t = gjmp2((vtop->c.i - 16) ^ inv, t);
} else if (v == VT_JMP || v == VT_JMPI) {
/* && or || optimization */
if ((v & 1) == inv) {
/* insert vtop->c jump list in t */
uint32_t n1, n = vtop->c.i;
if (n) {
while ((n1 = read32le(cur_text_section->data + n)))
n = n1;
write32le(cur_text_section->data + n, t);
t = vtop->c.i;
}
} else {
t = gjmp(t);
gsym(vtop->c.i);
}
}
vtop--;
return t;
t = gjmp2(op - 16, t);
return t;
}
/* generate an integer binary operation */
@ -1779,10 +1744,8 @@ void gen_opi(int op)
o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
}
vtop--;
if (op >= TOK_ULT && op <= TOK_GT) {
vtop->r = VT_CMP;
vtop->c.i = op;
}
if (op >= TOK_ULT && op <= TOK_GT)
vset_VT_CMP(op);
break;
case '-':
case TOK_SUBC1: /* sub with carry generation */
@ -1937,8 +1900,7 @@ void gen_opf(int op)
op = TOK_EQ;
}
vtop--;
vtop->r = VT_CMP;
vtop->c.i = op;
vset_VT_CMP(op);
} else {
/* no memory reference possible for long double operations */
load(TREG_ST0, vtop);
@ -2016,8 +1978,8 @@ void gen_opf(int op)
}
vtop--;
vtop->r = VT_CMP;
vtop->c.i = op | 0x100;
vset_VT_CMP(op | 0x100);
vtop->cmp_r = op;
} else {
assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE);
switch(op) {