tcg-ppc: Use the return address as a base pointer
This can significantly reduce code size for generation of (some) 64-bit constants. With the side effect that we know for a fact that exit_tb can use the register to good effect. Tested-by: Tom Musta <tommusta@gmail.com> Signed-off-by: Richard Henderson <rth@twiddle.net>
This commit is contained in:
parent
224f9fd419
commit
a84ac4cbbb
@ -41,6 +41,30 @@
|
|||||||
# define TCG_REG_TMP1 TCG_REG_R12
|
# define TCG_REG_TMP1 TCG_REG_R12
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* For the 64-bit target, we don't like the 5 insn sequence needed to build
|
||||||
|
full 64-bit addresses. Better to have a base register to which we can
|
||||||
|
apply a 32-bit displacement.
|
||||||
|
|
||||||
|
There are generally three items of interest:
|
||||||
|
(1) helper functions in the main executable,
|
||||||
|
(2) TranslationBlock data structures,
|
||||||
|
(3) the return address in the epilogue.
|
||||||
|
|
||||||
|
For user-only, we USE_STATIC_CODE_GEN_BUFFER, so the code_gen_buffer
|
||||||
|
will be inside the main executable, and thus near enough to make a
|
||||||
|
pointer to the epilogue be within 2GB of all helper functions.
|
||||||
|
|
||||||
|
For softmmu, we'll let the kernel choose the address of code_gen_buffer,
|
||||||
|
and odds are it'll be somewhere close to the main malloc arena, and so
|
||||||
|
a pointer to the epilogue will be within 2GB of the TranslationBlocks.
|
||||||
|
|
||||||
|
For --enable-pie, everything will be kinda near everything else,
|
||||||
|
somewhere in high memory.
|
||||||
|
|
||||||
|
Thus we choose to keep the return address in a call-saved register. */
|
||||||
|
#define TCG_REG_RA TCG_REG_R31
|
||||||
|
#define USE_REG_RA (TCG_TARGET_REG_BITS == 64)
|
||||||
|
|
||||||
/* Shorthand for size of a pointer. Avoid promotion to unsigned. */
|
/* Shorthand for size of a pointer. Avoid promotion to unsigned. */
|
||||||
#define SZP ((int)sizeof(void *))
|
#define SZP ((int)sizeof(void *))
|
||||||
|
|
||||||
@ -467,6 +491,8 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
|
|||||||
#define TW XO31( 4)
|
#define TW XO31( 4)
|
||||||
#define TRAP (TW | TO(31))
|
#define TRAP (TW | TO(31))
|
||||||
|
|
||||||
|
#define NOP ORI /* ori 0,0,0 */
|
||||||
|
|
||||||
#define RT(r) ((r)<<21)
|
#define RT(r) ((r)<<21)
|
||||||
#define RS(r) ((r)<<21)
|
#define RS(r) ((r)<<21)
|
||||||
#define RA(r) ((r)<<16)
|
#define RA(r) ((r)<<16)
|
||||||
@ -531,6 +557,9 @@ static const uint32_t tcg_to_isel[] = {
|
|||||||
[TCG_COND_GTU] = ISEL | BC_(7, CR_GT),
|
[TCG_COND_GTU] = ISEL | BC_(7, CR_GT),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
|
||||||
|
TCGReg base, tcg_target_long offset);
|
||||||
|
|
||||||
static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
|
static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
|
||||||
{
|
{
|
||||||
tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
|
tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
|
||||||
@ -601,7 +630,17 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
|
|||||||
tcg_out32(s, ADDI | TAI(ret, 0, arg));
|
tcg_out32(s, ADDI | TAI(ret, 0, arg));
|
||||||
tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
|
tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
|
||||||
} else {
|
} else {
|
||||||
int32_t high = arg >> 31 >> 1;
|
int32_t high;
|
||||||
|
|
||||||
|
if (USE_REG_RA) {
|
||||||
|
intptr_t diff = arg - (intptr_t)tb_ret_addr;
|
||||||
|
if (diff == (int32_t)diff) {
|
||||||
|
tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_RA, diff);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
high = arg >> 31 >> 1;
|
||||||
tcg_out_movi32(s, ret, high);
|
tcg_out_movi32(s, ret, high);
|
||||||
if (high) {
|
if (high) {
|
||||||
tcg_out_shli64(s, ret, ret, 32);
|
tcg_out_shli64(s, ret, ret, 32);
|
||||||
@ -1714,18 +1753,16 @@ static void tcg_target_qemu_prologue(TCGContext *s)
|
|||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
#ifdef _CALL_AIX
|
||||||
|
void **desc = (void **)s->code_ptr;
|
||||||
|
desc[0] = desc + 2; /* entry point */
|
||||||
|
desc[1] = 0; /* environment pointer */
|
||||||
|
s->code_ptr = (void *)(desc + 2); /* skip over descriptor */
|
||||||
|
#endif
|
||||||
|
|
||||||
tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
|
tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
|
||||||
CPU_TEMP_BUF_SIZE);
|
CPU_TEMP_BUF_SIZE);
|
||||||
|
|
||||||
#ifdef _CALL_AIX
|
|
||||||
{
|
|
||||||
void **desc = (void **)s->code_ptr;
|
|
||||||
desc[0] = desc + 2; /* entry point */
|
|
||||||
desc[1] = 0; /* environment pointer */
|
|
||||||
s->code_ptr = (void *)(desc + 2); /* skip over descriptor */
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Prologue */
|
/* Prologue */
|
||||||
tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
|
tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
|
||||||
tcg_out32(s, (SZR == 8 ? STDU : STWU)
|
tcg_out32(s, (SZR == 8 ? STDU : STWU)
|
||||||
@ -1746,10 +1783,36 @@ static void tcg_target_qemu_prologue(TCGContext *s)
|
|||||||
|
|
||||||
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
|
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
|
||||||
tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
|
tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
|
||||||
tcg_out32(s, BCCTR | BO_ALWAYS);
|
|
||||||
|
if (USE_REG_RA) {
|
||||||
|
#ifdef _CALL_AIX
|
||||||
|
/* Make the caller load the value as the TOC into R2. */
|
||||||
|
tb_ret_addr = s->code_ptr + 2;
|
||||||
|
desc[1] = tb_ret_addr;
|
||||||
|
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_RA, TCG_REG_R2);
|
||||||
|
tcg_out32(s, BCCTR | BO_ALWAYS);
|
||||||
|
#elif defined(_CALL_ELF) && _CALL_ELF == 2
|
||||||
|
/* Compute from the incoming R12 value. */
|
||||||
|
tb_ret_addr = s->code_ptr + 2;
|
||||||
|
tcg_out32(s, ADDI | TAI(TCG_REG_RA, TCG_REG_R12,
|
||||||
|
tcg_ptr_byte_diff(tb_ret_addr, s->code_buf)));
|
||||||
|
tcg_out32(s, BCCTR | BO_ALWAYS);
|
||||||
|
#else
|
||||||
|
/* Reserve max 5 insns for the constant load. */
|
||||||
|
tb_ret_addr = s->code_ptr + 6;
|
||||||
|
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)tb_ret_addr);
|
||||||
|
tcg_out32(s, BCCTR | BO_ALWAYS);
|
||||||
|
while (s->code_ptr < tb_ret_addr) {
|
||||||
|
tcg_out32(s, NOP);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
} else {
|
||||||
|
tcg_out32(s, BCCTR | BO_ALWAYS);
|
||||||
|
tb_ret_addr = s->code_ptr;
|
||||||
|
}
|
||||||
|
|
||||||
/* Epilogue */
|
/* Epilogue */
|
||||||
tb_ret_addr = s->code_ptr;
|
assert(tb_ret_addr == s->code_ptr);
|
||||||
|
|
||||||
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
|
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
|
||||||
for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
|
for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
|
||||||
@ -1769,6 +1832,21 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
|
|||||||
|
|
||||||
switch (opc) {
|
switch (opc) {
|
||||||
case INDEX_op_exit_tb:
|
case INDEX_op_exit_tb:
|
||||||
|
if (USE_REG_RA) {
|
||||||
|
ptrdiff_t disp = tcg_pcrel_diff(s, tb_ret_addr);
|
||||||
|
|
||||||
|
/* If we can use a direct branch, otherwise use the value in RA.
|
||||||
|
Note that the direct branch is always forward. If it's in
|
||||||
|
range now, it'll still be in range after the movi. Don't
|
||||||
|
bother about the 20 bytes where the test here fails but it
|
||||||
|
would succeed below. */
|
||||||
|
if (!in_range_b(disp)) {
|
||||||
|
tcg_out32(s, MTSPR | RS(TCG_REG_RA) | CTR);
|
||||||
|
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
|
||||||
|
tcg_out32(s, BCCTR | BO_ALWAYS);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
|
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
|
||||||
tcg_out_b(s, 0, tb_ret_addr);
|
tcg_out_b(s, 0, tb_ret_addr);
|
||||||
break;
|
break;
|
||||||
@ -2479,6 +2557,9 @@ static void tcg_target_init(TCGContext *s)
|
|||||||
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
|
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
|
||||||
#endif
|
#endif
|
||||||
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
|
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
|
||||||
|
if (USE_REG_RA) {
|
||||||
|
tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA); /* return addr */
|
||||||
|
}
|
||||||
|
|
||||||
tcg_add_target_add_op_defs(ppc_op_defs);
|
tcg_add_target_add_op_defs(ppc_op_defs);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user