tcg/ppc: Split out tcg_out_bswap16

With the use of a suitable temporary, we can use the same
algorithm when src overlaps dst.  The result is the same
number of instructions either way.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2021-06-13 16:16:25 +00:00
parent 05dd01fa5a
commit 783d3ecdda

View File

@ -789,6 +789,24 @@ static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)
tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));
}
static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src)
{
TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
/*
* In the following,
* dep(a, b, m) -> (a & ~m) | (b & m)
*
* Begin with: src = xxxxabcd
*/
/* tmp = rol32(src, 24) & 0x000000ff = 0000000c */
tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31);
/* tmp = dep(tmp, rol32(src, 8), 0x0000ff00) = 000000dc */
tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23);
tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
}
/* Emit a move into ret of arg, if it can be done in one insn. */
static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
{
@ -2779,21 +2797,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_bswap16_i32:
case INDEX_op_bswap16_i64:
a0 = args[0], a1 = args[1];
/* a1 = abcd */
if (a0 != a1) {
/* a0 = (a1 r<< 24) & 0xff # 000c */
tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
/* a0 = (a0 & ~0xff00) | (a1 r<< 8) & 0xff00 # 00dc */
tcg_out_rlw(s, RLWIMI, a0, a1, 8, 16, 23);
} else {
/* r0 = (a1 r<< 8) & 0xff00 # 00d0 */
tcg_out_rlw(s, RLWINM, TCG_REG_R0, a1, 8, 16, 23);
/* a0 = (a1 r<< 24) & 0xff # 000c */
tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
/* a0 = a0 | r0 # 00dc */
tcg_out32(s, OR | SAB(TCG_REG_R0, a0, a0));
}
tcg_out_bswap16(s, args[0], args[1]);
break;
case INDEX_op_bswap32_i32: