tcg: Optimize mulu2
Like add2, do operand ordering, constant folding, and dead operand elimination. The latter happens about 15% of all mulu2 during an x86_64 bios boot. Signed-off-by: Richard Henderson <rth@twiddle.net> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
This commit is contained in:
parent
1305c451e6
commit
1414968a6a
@ -543,6 +543,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
|
||||
swap_commutative(args[0], &args[2], &args[4]);
|
||||
swap_commutative(args[1], &args[3], &args[5]);
|
||||
break;
|
||||
case INDEX_op_mulu2_i32:
|
||||
swap_commutative(args[0], &args[2], &args[3]);
|
||||
break;
|
||||
case INDEX_op_brcond2_i32:
|
||||
if (swap_commutative2(&args[0], &args[2])) {
|
||||
args[4] = tcg_swap_cond(args[4]);
|
||||
@ -831,6 +834,29 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
|
||||
}
|
||||
goto do_default;
|
||||
|
||||
case INDEX_op_mulu2_i32:
|
||||
if (temps[args[2]].state == TCG_TEMP_CONST
|
||||
&& temps[args[3]].state == TCG_TEMP_CONST) {
|
||||
uint32_t a = temps[args[2]].val;
|
||||
uint32_t b = temps[args[3]].val;
|
||||
uint64_t r = (uint64_t)a * b;
|
||||
TCGArg rl, rh;
|
||||
|
||||
/* We emit the extra nop when we emit the mulu2. */
|
||||
assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
|
||||
|
||||
rl = args[0];
|
||||
rh = args[1];
|
||||
gen_opc_buf[op_index] = INDEX_op_movi_i32;
|
||||
gen_opc_buf[++op_index] = INDEX_op_movi_i32;
|
||||
tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r);
|
||||
tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32));
|
||||
gen_args += 4;
|
||||
args += 4;
|
||||
break;
|
||||
}
|
||||
goto do_default;
|
||||
|
||||
case INDEX_op_brcond2_i32:
|
||||
tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
|
||||
if (tmp != 2) {
|
||||
|
@ -1027,6 +1027,8 @@ static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
|
||||
|
||||
tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0),
|
||||
TCGV_LOW(arg1), TCGV_LOW(arg2));
|
||||
/* Allow the optimizer room to replace mulu2 with two moves. */
|
||||
tcg_gen_op0(INDEX_op_nop);
|
||||
|
||||
tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2));
|
||||
tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
|
||||
|
19
tcg/tcg.c
19
tcg/tcg.c
@ -1338,6 +1338,25 @@ static void tcg_liveness_analysis(TCGContext *s)
|
||||
}
|
||||
goto do_not_remove;
|
||||
|
||||
case INDEX_op_mulu2_i32:
|
||||
args -= 4;
|
||||
nb_iargs = 2;
|
||||
nb_oargs = 2;
|
||||
/* Likewise, test for the high part of the operation dead. */
|
||||
if (dead_temps[args[1]]) {
|
||||
if (dead_temps[args[0]]) {
|
||||
goto do_remove;
|
||||
}
|
||||
gen_opc_buf[op_index] = op = INDEX_op_mul_i32;
|
||||
args[1] = args[2];
|
||||
args[2] = args[3];
|
||||
assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
|
||||
tcg_set_nop(s, gen_opc_buf + op_index + 1, args + 3, 1);
|
||||
/* Fall through and mark the single-word operation live. */
|
||||
nb_oargs = 1;
|
||||
}
|
||||
goto do_not_remove;
|
||||
|
||||
default:
|
||||
/* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
|
||||
args -= def->nb_args;
|
||||
|
Loading…
Reference in New Issue
Block a user