tcg/tci: Implement mulu2, muls2

We already had mulu2_i32 for a 32-bit host; expand this to 64-bit
hosts as well.  The muls2_i32 and the 64-bit opcodes are new.

Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2021-02-02 17:21:27 -08:00
parent 5255f48c1c
commit f6db0d8dc6
3 changed files with 43 additions and 16 deletions

View File

@ -41,7 +41,7 @@ __thread uintptr_t tci_tb_ptr;
static void tci_write_reg64(tcg_target_ulong *regs, uint32_t high_index, static void tci_write_reg64(tcg_target_ulong *regs, uint32_t high_index,
uint32_t low_index, uint64_t value) uint32_t low_index, uint64_t value)
{ {
regs[low_index] = value; regs[low_index] = (uint32_t)value;
regs[high_index] = value >> 32; regs[high_index] = value >> 32;
} }
@ -173,7 +173,6 @@ static void tci_args_rrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1,
*r4 = extract32(insn, 24, 4); *r4 = extract32(insn, 24, 4);
} }
#if TCG_TARGET_REG_BITS == 32
static void tci_args_rrrr(uint32_t insn, static void tci_args_rrrr(uint32_t insn,
TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGReg *r3) TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGReg *r3)
{ {
@ -182,7 +181,6 @@ static void tci_args_rrrr(uint32_t insn,
*r2 = extract32(insn, 16, 4); *r2 = extract32(insn, 16, 4);
*r3 = extract32(insn, 20, 4); *r3 = extract32(insn, 20, 4);
} }
#endif
static void tci_args_rrrrrc(uint32_t insn, TCGReg *r0, TCGReg *r1, static void tci_args_rrrrrc(uint32_t insn, TCGReg *r0, TCGReg *r1,
TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGCond *c5) TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGCond *c5)
@ -671,11 +669,21 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
T2 = tci_uint64(regs[r5], regs[r4]); T2 = tci_uint64(regs[r5], regs[r4]);
tci_write_reg64(regs, r1, r0, T1 - T2); tci_write_reg64(regs, r1, r0, T1 - T2);
break; break;
#endif /* TCG_TARGET_REG_BITS == 32 */
#if TCG_TARGET_HAS_mulu2_i32
case INDEX_op_mulu2_i32: case INDEX_op_mulu2_i32:
tci_args_rrrr(insn, &r0, &r1, &r2, &r3); tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
tci_write_reg64(regs, r1, r0, (uint64_t)regs[r2] * regs[r3]); tmp64 = (uint64_t)(uint32_t)regs[r2] * (uint32_t)regs[r3];
tci_write_reg64(regs, r1, r0, tmp64);
break; break;
#endif /* TCG_TARGET_REG_BITS == 32 */ #endif
#if TCG_TARGET_HAS_muls2_i32
case INDEX_op_muls2_i32:
tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
tmp64 = (int64_t)(int32_t)regs[r2] * (int32_t)regs[r3];
tci_write_reg64(regs, r1, r0, tmp64);
break;
#endif
#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64 #if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64
CASE_32_64(ext8s) CASE_32_64(ext8s)
tci_args_rr(insn, &r0, &r1); tci_args_rr(insn, &r0, &r1);
@ -779,6 +787,18 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
regs[r0] = ctpop64(regs[r1]); regs[r0] = ctpop64(regs[r1]);
break; break;
#endif #endif
#if TCG_TARGET_HAS_mulu2_i64
case INDEX_op_mulu2_i64:
tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
mulu64(&regs[r0], &regs[r1], regs[r2], regs[r3]);
break;
#endif
#if TCG_TARGET_HAS_muls2_i64
case INDEX_op_muls2_i64:
tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
muls64(&regs[r0], &regs[r1], regs[r2], regs[r3]);
break;
#endif
/* Shift/rotate operations (64 bit). */ /* Shift/rotate operations (64 bit). */
@ -1286,14 +1306,17 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
str_r(r3), str_r(r4), str_c(c)); str_r(r3), str_r(r4), str_c(c));
break; break;
#if TCG_TARGET_REG_BITS == 32
case INDEX_op_mulu2_i32: case INDEX_op_mulu2_i32:
case INDEX_op_mulu2_i64:
case INDEX_op_muls2_i32:
case INDEX_op_muls2_i64:
tci_args_rrrr(insn, &r0, &r1, &r2, &r3); tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s", info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s",
op_name, str_r(r0), str_r(r1), op_name, str_r(r0), str_r(r1),
str_r(r2), str_r(r3)); str_r(r2), str_r(r3));
break; break;
#if TCG_TARGET_REG_BITS == 32
case INDEX_op_add2_i32: case INDEX_op_add2_i32:
case INDEX_op_sub2_i32: case INDEX_op_sub2_i32:
tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);

View File

@ -141,10 +141,14 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
return C_O2_I4(r, r, r, r, r, r); return C_O2_I4(r, r, r, r, r, r);
case INDEX_op_brcond2_i32: case INDEX_op_brcond2_i32:
return C_O0_I4(r, r, r, r); return C_O0_I4(r, r, r, r);
case INDEX_op_mulu2_i32:
return C_O2_I2(r, r, r, r);
#endif #endif
case INDEX_op_mulu2_i32:
case INDEX_op_mulu2_i64:
case INDEX_op_muls2_i32:
case INDEX_op_muls2_i64:
return C_O2_I2(r, r, r, r);
case INDEX_op_movcond_i32: case INDEX_op_movcond_i32:
case INDEX_op_movcond_i64: case INDEX_op_movcond_i64:
case INDEX_op_setcond2_i32: case INDEX_op_setcond2_i32:
@ -434,7 +438,6 @@ static void tcg_out_op_rrrrr(TCGContext *s, TCGOpcode op, TCGReg r0,
tcg_out32(s, insn); tcg_out32(s, insn);
} }
#if TCG_TARGET_REG_BITS == 32
static void tcg_out_op_rrrr(TCGContext *s, TCGOpcode op, static void tcg_out_op_rrrr(TCGContext *s, TCGOpcode op,
TCGReg r0, TCGReg r1, TCGReg r2, TCGReg r3) TCGReg r0, TCGReg r1, TCGReg r2, TCGReg r3)
{ {
@ -447,7 +450,6 @@ static void tcg_out_op_rrrr(TCGContext *s, TCGOpcode op,
insn = deposit32(insn, 20, 4, r3); insn = deposit32(insn, 20, 4, r3);
tcg_out32(s, insn); tcg_out32(s, insn);
} }
#endif
static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op, static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op,
TCGReg r0, TCGReg r1, TCGReg r2, TCGReg r0, TCGReg r1, TCGReg r2,
@ -726,10 +728,12 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
args[0], args[1], args[2], args[3], args[4]); args[0], args[1], args[2], args[3], args[4]);
tcg_out_op_rl(s, INDEX_op_brcond_i32, TCG_REG_TMP, arg_label(args[5])); tcg_out_op_rl(s, INDEX_op_brcond_i32, TCG_REG_TMP, arg_label(args[5]));
break; break;
case INDEX_op_mulu2_i32: #endif
CASE_32_64(mulu2)
CASE_32_64(muls2)
tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], args[3]); tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], args[3]);
break; break;
#endif
case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_st_i32: case INDEX_op_qemu_st_i32:

View File

@ -84,7 +84,7 @@
#define TCG_TARGET_HAS_orc_i32 1 #define TCG_TARGET_HAS_orc_i32 1
#define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_rot_i32 1
#define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_movcond_i32 1
#define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muls2_i32 1
#define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_muluh_i32 0
#define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0
#define TCG_TARGET_HAS_goto_ptr 1 #define TCG_TARGET_HAS_goto_ptr 1
@ -121,13 +121,13 @@
#define TCG_TARGET_HAS_orc_i64 1 #define TCG_TARGET_HAS_orc_i64 1
#define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_rot_i64 1
#define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_movcond_i64 1
#define TCG_TARGET_HAS_muls2_i64 0 #define TCG_TARGET_HAS_muls2_i64 1
#define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_add2_i32 0
#define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0
#define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_mulu2_i32 1
#define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_add2_i64 0
#define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0
#define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 1
#define TCG_TARGET_HAS_muluh_i64 0 #define TCG_TARGET_HAS_muluh_i64 0
#define TCG_TARGET_HAS_mulsh_i64 0 #define TCG_TARGET_HAS_mulsh_i64 0
#else #else