From 5476ef1d40e77b1b556b59a1788a7b1142a0368e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADctor=20Colombo?= Date: Wed, 2 Mar 2022 06:51:37 +0100 Subject: [PATCH] target/ppc: Implement vmsumcud instruction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on [1] by Lijun Pan , which was never merged into master. [1]: https://lists.gnu.org/archive/html/qemu-ppc/2020-07/msg00419.html Reviewed-by: Richard Henderson Signed-off-by: Víctor Colombo Signed-off-by: Matheus Ferst Message-Id: <20220225210936.1749575-6-matheus.ferst@eldorado.org.br> Signed-off-by: Cédric Le Goater --- target/ppc/insn32.decode | 4 +++ target/ppc/translate/vmx-impl.c.inc | 53 +++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index d817e44c71..e85a75db2f 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -468,6 +468,10 @@ VMULHSD 000100 ..... ..... ..... 01111001001 @VX VMULHUD 000100 ..... ..... ..... 01011001001 @VX VMULLD 000100 ..... ..... ..... 00111001001 @VX +## Vector Multiply-Sum Instructions + +VMSUMCUD 000100 ..... ..... ..... ..... 010111 @VA + # VSX Load/Store Instructions LXV 111101 ..... ..... ............ . 001 @DQ_TSX diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index 97a075efd1..4f528dc820 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -2081,6 +2081,59 @@ static bool trans_VPEXTD(DisasContext *ctx, arg_VX *a) return true; } +static bool trans_VMSUMCUD(DisasContext *ctx, arg_VA *a) +{ + TCGv_i64 tmp0, tmp1, prod1h, prod1l, prod0h, prod0l, zero; + + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VECTOR(ctx); + + tmp0 = tcg_temp_new_i64(); + tmp1 = tcg_temp_new_i64(); + prod1h = tcg_temp_new_i64(); + prod1l = tcg_temp_new_i64(); + prod0h = tcg_temp_new_i64(); + prod0l = tcg_temp_new_i64(); + zero = tcg_constant_i64(0); + + /* prod1 = vsr[vra+32].dw[1] * vsr[vrb+32].dw[1] */ + get_avr64(tmp0, a->vra, false); + get_avr64(tmp1, a->vrb, false); + tcg_gen_mulu2_i64(prod1l, prod1h, tmp0, tmp1); + + /* prod0 = vsr[vra+32].dw[0] * vsr[vrb+32].dw[0] */ + get_avr64(tmp0, a->vra, true); + get_avr64(tmp1, a->vrb, true); + tcg_gen_mulu2_i64(prod0l, prod0h, tmp0, tmp1); + + /* Sum lower 64-bits elements */ + get_avr64(tmp1, a->rc, false); + tcg_gen_add2_i64(tmp1, tmp0, tmp1, zero, prod1l, zero); + tcg_gen_add2_i64(tmp1, tmp0, tmp1, tmp0, prod0l, zero); + + /* + * Discard lower 64-bits, leaving the carry into bit 64. + * Then sum the higher 64-bit elements. + */ + get_avr64(tmp1, a->rc, true); + tcg_gen_add2_i64(tmp1, tmp0, tmp0, zero, tmp1, zero); + tcg_gen_add2_i64(tmp1, tmp0, tmp1, tmp0, prod1h, zero); + tcg_gen_add2_i64(tmp1, tmp0, tmp1, tmp0, prod0h, zero); + + /* Discard 64 more bits to complete the CHOP128(temp >> 128) */ + set_avr64(a->vrt, tmp0, false); + set_avr64(a->vrt, zero, true); + + tcg_temp_free_i64(tmp0); + tcg_temp_free_i64(tmp1); + tcg_temp_free_i64(prod1h); + tcg_temp_free_i64(prod1l); + tcg_temp_free_i64(prod0h); + tcg_temp_free_i64(prod0l); + + return true; +} + static bool do_vx_helper(DisasContext *ctx, arg_VX *a, void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr)) {