tcg/s390x: Use register pair allocation for div and mulu2

Previously we hard-coded R2 and R3.

Reviewed-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2022-10-10 08:41:15 -07:00
parent aaa90fede5
commit 4143f78dad
3 changed files with 35 additions and 20 deletions

View File

@ -29,8 +29,8 @@ C_O1_I2(v, v, v)
C_O1_I3(v, v, v, v)
C_O1_I4(r, r, ri, r, 0)
C_O1_I4(r, r, ri, rI, 0)
C_O2_I2(b, a, 0, r)
C_O2_I3(b, a, 0, 1, r)
C_O2_I2(o, m, 0, r)
C_O2_I3(o, m, 0, 1, r)
C_O2_I4(r, r, 0, 1, rA, r)
C_O2_I4(r, r, 0, 1, ri, r)
C_O2_I4(r, r, 0, 1, r, r)

View File

@ -11,13 +11,7 @@
REGS('r', ALL_GENERAL_REGS)
REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
REGS('v', ALL_VECTOR_REGS)
/*
* A (single) even/odd pair for division.
* TODO: Add something to the register allocator to allow
* this kind of regno+1 pairing to be done more generally.
*/
REGS('a', 1u << TCG_REG_R2)
REGS('b', 1u << TCG_REG_R3)
REGS('o', 0xaaaa) /* odd numbered general regs */
/*
* Define constraint letters for constants:

View File

@ -2264,10 +2264,18 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_div2_i32:
tcg_out_insn(s, RR, DR, TCG_REG_R2, args[4]);
tcg_debug_assert(args[0] == args[2]);
tcg_debug_assert(args[1] == args[3]);
tcg_debug_assert((args[1] & 1) == 0);
tcg_debug_assert(args[0] == args[1] + 1);
tcg_out_insn(s, RR, DR, args[1], args[4]);
break;
case INDEX_op_divu2_i32:
tcg_out_insn(s, RRE, DLR, TCG_REG_R2, args[4]);
tcg_debug_assert(args[0] == args[2]);
tcg_debug_assert(args[1] == args[3]);
tcg_debug_assert((args[1] & 1) == 0);
tcg_debug_assert(args[0] == args[1] + 1);
tcg_out_insn(s, RRE, DLR, args[1], args[4]);
break;
case INDEX_op_shl_i32:
@ -2521,17 +2529,30 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_div2_i64:
/* ??? We get an unnecessary sign-extension of the dividend
into R3 with this definition, but as we do in fact always
produce both quotient and remainder using INDEX_op_div_i64
instead requires jumping through even more hoops. */
tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, args[4]);
/*
* ??? We get an unnecessary sign-extension of the dividend
* into op0 with this definition, but as we do in fact always
* produce both quotient and remainder using INDEX_op_div_i64
* instead requires jumping through even more hoops.
*/
tcg_debug_assert(args[0] == args[2]);
tcg_debug_assert(args[1] == args[3]);
tcg_debug_assert((args[1] & 1) == 0);
tcg_debug_assert(args[0] == args[1] + 1);
tcg_out_insn(s, RRE, DSGR, args[1], args[4]);
break;
case INDEX_op_divu2_i64:
tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]);
tcg_debug_assert(args[0] == args[2]);
tcg_debug_assert(args[1] == args[3]);
tcg_debug_assert((args[1] & 1) == 0);
tcg_debug_assert(args[0] == args[1] + 1);
tcg_out_insn(s, RRE, DLGR, args[1], args[4]);
break;
case INDEX_op_mulu2_i64:
tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]);
tcg_debug_assert(args[0] == args[2]);
tcg_debug_assert((args[1] & 1) == 0);
tcg_debug_assert(args[0] == args[1] + 1);
tcg_out_insn(s, RRE, MLGR, args[1], args[3]);
break;
case INDEX_op_shl_i64:
@ -3226,10 +3247,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_div2_i64:
case INDEX_op_divu2_i32:
case INDEX_op_divu2_i64:
return C_O2_I3(b, a, 0, 1, r);
return C_O2_I3(o, m, 0, 1, r);
case INDEX_op_mulu2_i64:
return C_O2_I2(b, a, 0, r);
return C_O2_I2(o, m, 0, r);
case INDEX_op_add2_i32:
case INDEX_op_sub2_i32: