target/arm: Improve REV32

For the sf version, we are performing two 32-bit bswaps
in either half of the register.  This is equivalent to
performing one 64-bit bswap followed by a rotate.

For the non-sf version, we can remove TCG_BSWAP_IZ
and the preceding zero-extension.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2021-06-13 16:17:03 -07:00
parent b53357acb4
commit 2b0a39e51e

View File

@ -5430,22 +5430,13 @@ static void handle_rev32(DisasContext *s, unsigned int sf,
unsigned int rn, unsigned int rd)
{
TCGv_i64 tcg_rd = cpu_reg(s, rd);
TCGv_i64 tcg_rn = cpu_reg(s, rn);
if (sf) {
TCGv_i64 tcg_tmp = tcg_temp_new_i64();
TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
/* bswap32_i64 requires zero high word */
tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
tcg_gen_bswap32_i64(tcg_rd, tcg_tmp, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
tcg_temp_free_i64(tcg_tmp);
tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
} else {
tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
tcg_gen_bswap32_i64(tcg_rd, tcg_rd, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
}
}