target/i386: optimize TEST+Jxx sequences

Mostly used for TEST+JG and TEST+JLE, but it is easy to cover
also JBE/JA and JL/JGE; shaves about 0.5% TCG ops.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Paolo Bonzini 2024-06-20 11:31:33 +02:00
parent ae14b33de8
commit 37df7c4d57

View File

@ -1069,6 +1069,28 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
} }
break; break;
case CC_OP_LOGICB ... CC_OP_LOGICQ:
/* Mostly used for test+jump */
size = s->cc_op - CC_OP_LOGICB;
switch (jcc_op) {
case JCC_BE:
/* CF = 0, becomes jz/je */
jcc_op = JCC_Z;
goto slow_jcc;
case JCC_L:
/* OF = 0, becomes js/jns */
jcc_op = JCC_S;
goto slow_jcc;
case JCC_LE:
/* SF or ZF, becomes signed <= 0 */
tcg_gen_ext_tl(cpu_cc_dst, cpu_cc_dst, size | MO_SIGN);
cc = (CCPrepare) { .cond = TCG_COND_LE, .reg = cpu_cc_dst };
break;
default:
goto slow_jcc;
}
break;
default: default:
slow_jcc: slow_jcc:
/* This actually generates good code for JC, JZ and JS. */ /* This actually generates good code for JC, JZ and JS. */