tcg/tci: Implement add2, sub2

We already had the 32-bit versions for a 32-bit host; expand this
to 64-bit hosts as well.  The 64-bit opcodes are new.

Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2021-02-02 17:40:12 -08:00
parent f6db0d8dc6
commit 08096b1a64
3 changed files with 38 additions and 25 deletions

View File

@ -193,7 +193,6 @@ static void tci_args_rrrrrc(uint32_t insn, TCGReg *r0, TCGReg *r1,
*c5 = extract32(insn, 28, 4);
}
#if TCG_TARGET_REG_BITS == 32
static void tci_args_rrrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1,
TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGReg *r5)
{
@ -204,7 +203,6 @@ static void tci_args_rrrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1,
*r4 = extract32(insn, 24, 4);
*r5 = extract32(insn, 28, 4);
}
#endif
static bool tci_compare32(uint32_t u0, uint32_t u1, TCGCond condition)
{
@ -355,17 +353,14 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
for (;;) {
uint32_t insn;
TCGOpcode opc;
TCGReg r0, r1, r2, r3, r4;
TCGReg r0, r1, r2, r3, r4, r5;
tcg_target_ulong t1;
TCGCond condition;
target_ulong taddr;
uint8_t pos, len;
uint32_t tmp32;
uint64_t tmp64;
#if TCG_TARGET_REG_BITS == 32
TCGReg r5;
uint64_t T1, T2;
#endif
TCGMemOpIdx oi;
int32_t ofs;
void *ptr;
@ -656,20 +651,22 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
tb_ptr = ptr;
}
break;
#if TCG_TARGET_REG_BITS == 32
#if TCG_TARGET_REG_BITS == 32 || TCG_TARGET_HAS_add2_i32
case INDEX_op_add2_i32:
tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
T1 = tci_uint64(regs[r3], regs[r2]);
T2 = tci_uint64(regs[r5], regs[r4]);
tci_write_reg64(regs, r1, r0, T1 + T2);
break;
#endif
#if TCG_TARGET_REG_BITS == 32 || TCG_TARGET_HAS_sub2_i32
case INDEX_op_sub2_i32:
tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
T1 = tci_uint64(regs[r3], regs[r2]);
T2 = tci_uint64(regs[r5], regs[r4]);
tci_write_reg64(regs, r1, r0, T1 - T2);
break;
#endif /* TCG_TARGET_REG_BITS == 32 */
#endif
#if TCG_TARGET_HAS_mulu2_i32
case INDEX_op_mulu2_i32:
tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
@ -799,6 +796,24 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
muls64(&regs[r0], &regs[r1], regs[r2], regs[r3]);
break;
#endif
#if TCG_TARGET_HAS_add2_i64
case INDEX_op_add2_i64:
tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
T1 = regs[r2] + regs[r4];
T2 = regs[r3] + regs[r5] + (T1 < regs[r2]);
regs[r0] = T1;
regs[r1] = T2;
break;
#endif
#if TCG_TARGET_HAS_add2_i64
case INDEX_op_sub2_i64:
tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
T1 = regs[r2] - regs[r4];
T2 = regs[r3] - regs[r5] - (regs[r2] < regs[r4]);
regs[r0] = T1;
regs[r1] = T2;
break;
#endif
/* Shift/rotate operations (64 bit). */
@ -1115,10 +1130,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
const char *op_name;
uint32_t insn;
TCGOpcode op;
TCGReg r0, r1, r2, r3, r4;
#if TCG_TARGET_REG_BITS == 32
TCGReg r5;
#endif
TCGReg r0, r1, r2, r3, r4, r5;
tcg_target_ulong i1;
int32_t s2;
TCGCond c;
@ -1316,15 +1328,15 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
str_r(r2), str_r(r3));
break;
#if TCG_TARGET_REG_BITS == 32
case INDEX_op_add2_i32:
case INDEX_op_add2_i64:
case INDEX_op_sub2_i32:
case INDEX_op_sub2_i64:
tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %s, %s",
op_name, str_r(r0), str_r(r1), str_r(r2),
str_r(r3), str_r(r4), str_r(r5));
break;
#endif
case INDEX_op_qemu_ld_i64:
case INDEX_op_qemu_st_i64:

View File

@ -134,11 +134,13 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_brcond_i64:
return C_O0_I2(r, r);
#if TCG_TARGET_REG_BITS == 32
/* TODO: Support R, R, R, R, RI, RI? Will it be faster? */
case INDEX_op_add2_i32:
case INDEX_op_add2_i64:
case INDEX_op_sub2_i32:
case INDEX_op_sub2_i64:
return C_O2_I4(r, r, r, r, r, r);
#if TCG_TARGET_REG_BITS == 32
case INDEX_op_brcond2_i32:
return C_O0_I4(r, r, r, r);
#endif
@ -467,7 +469,6 @@ static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op,
tcg_out32(s, insn);
}
#if TCG_TARGET_REG_BITS == 32
static void tcg_out_op_rrrrrr(TCGContext *s, TCGOpcode op,
TCGReg r0, TCGReg r1, TCGReg r2,
TCGReg r3, TCGReg r4, TCGReg r5)
@ -483,7 +484,6 @@ static void tcg_out_op_rrrrrr(TCGContext *s, TCGOpcode op,
insn = deposit32(insn, 28, 4, r5);
tcg_out32(s, insn);
}
#endif
static void tcg_out_ldst(TCGContext *s, TCGOpcode op, TCGReg val,
TCGReg base, intptr_t offset)
@ -717,12 +717,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_op_rr(s, opc, args[0], args[1]);
break;
#if TCG_TARGET_REG_BITS == 32
case INDEX_op_add2_i32:
case INDEX_op_sub2_i32:
CASE_32_64(add2)
CASE_32_64(sub2)
tcg_out_op_rrrrrr(s, opc, args[0], args[1], args[2],
args[3], args[4], args[5]);
break;
#if TCG_TARGET_REG_BITS == 32
case INDEX_op_brcond2_i32:
tcg_out_op_rrrrrc(s, INDEX_op_setcond2_i32, TCG_REG_TMP,
args[0], args[1], args[2], args[3], args[4]);

View File

@ -122,11 +122,11 @@
#define TCG_TARGET_HAS_rot_i64 1
#define TCG_TARGET_HAS_movcond_i64 1
#define TCG_TARGET_HAS_muls2_i64 1
#define TCG_TARGET_HAS_add2_i32 0
#define TCG_TARGET_HAS_sub2_i32 0
#define TCG_TARGET_HAS_add2_i32 1
#define TCG_TARGET_HAS_sub2_i32 1
#define TCG_TARGET_HAS_mulu2_i32 1
#define TCG_TARGET_HAS_add2_i64 0
#define TCG_TARGET_HAS_sub2_i64 0
#define TCG_TARGET_HAS_add2_i64 1
#define TCG_TARGET_HAS_sub2_i64 1
#define TCG_TARGET_HAS_mulu2_i64 1
#define TCG_TARGET_HAS_muluh_i64 0
#define TCG_TARGET_HAS_mulsh_i64 0