From a66669c942077d53414ae0894e9fba3ddc18f58d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 2 Mar 2022 19:05:05 -0500 Subject: [PATCH 1/4] tcg/s390x: Fix tcg_out_dupi_vec vs VGM The immediate operands to VGM were in the wrong order, producing an inverse mask. Signed-off-by: Richard Henderson --- tcg/s390x/tcg-target.c.inc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 6e65828c09..508f1bccc7 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2715,7 +2715,7 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, msb = clz32(val); lsb = 31 - ctz32(val); } - tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_32); + tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_32); return; } } else { @@ -2729,7 +2729,7 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, msb = clz64(val); lsb = 63 - ctz64(val); } - tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_64); + tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_64); return; } } From 6e5f9fb7993aa3de5295ae2dbada39258ed333ac Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 2 Mar 2022 19:09:35 -0500 Subject: [PATCH 2/4] tcg/s390x: Fix INDEX_op_bitsel_vec vs VSEL The operands are output in the wrong order: the tcg selector argument is first, whereas the s390x selector argument is last. Tested-by: Thomas Huth Resolves: https://gitlab.com/qemu-project/qemu/-/issues/898 Fixes: 9bca986df88 ("tcg/s390x: Implement TCG_TARGET_HAS_bitsel_vec") Signed-off-by: Richard Henderson --- tcg/s390x/tcg-target.c.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 508f1bccc7..3b185b3c96 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2868,7 +2868,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_bitsel_vec: - tcg_out_insn(s, VRRe, VSEL, a0, a1, a2, args[3]); + tcg_out_insn(s, VRRe, VSEL, a0, a2, args[3], a1); break; case INDEX_op_cmp_vec: From 6e591a8569b24309f0b602042cce630524c03c0e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 2 Mar 2022 19:26:50 -0500 Subject: [PATCH 3/4] tcg/s390x: Fix tcg_out_dup_vec vs general registers We copied the data from the general register input to the vector register output, but have not yet replicated it. We intended to fall through into the vector-vector case, but failed to redirect the input register. This is caught by an assertion failure in tcg_out_insn_VRIc, which diagnosed the incorrect register class. Signed-off-by: Richard Henderson --- tcg/s390x/tcg-target.c.inc | 1 + 1 file changed, 1 insertion(+) diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 3b185b3c96..33becd7694 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2675,6 +2675,7 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, if (vece == MO_64) { return true; } + src = dst; } /* From 76cff100beeae8d3676bb658cccd45ef5ced8aa9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 10 Mar 2022 23:38:47 -0800 Subject: [PATCH 4/4] tcg/arm: Don't emit UNPREDICTABLE LDRD with Rm == Rt or Rt+1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The LDRD (register) instruction is UNPREDICTABLE if the Rm register is the same as either Rt or Rt+1 (the two registers being loaded to). We weren't making sure we avoided this, with the result that on some host CPUs like the Cortex-A7 we would get a SIGILL because the CPU chooses to UNDEF for this particular UNPREDICTABLE case. Since we've already checked that datalo is aligned, we can simplify the test vs the Rm operand by aligning it before comparison. Check for the two orderings before falling back to two ldr instructions. We don't bother to do anything similar for tcg_out_ldrd_rwb(), because it is only used in tcg_out_tlb_read() with a fixed set of registers which don't overlap. There is no equivalent UNPREDICTABLE case for STRD. Reviewed-by: Alex Bennée Resolves: https://gitlab.com/qemu-project/qemu/-/issues/896 Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index e1ea69669c..4bc0420f4d 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1689,8 +1689,21 @@ static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc, /* LDRD requires alignment; double-check that. */ if (get_alignment_bits(opc) >= MO_64 && (datalo & 1) == 0 && datahi == datalo + 1) { - tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend); - } else if (scratch_addend) { + /* + * Rm (the second address op) must not overlap Rt or Rt + 1. + * Since datalo is aligned, we can simplify the test via alignment. + * Flip the two address arguments if that works. + */ + if ((addend & ~1) != datalo) { + tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend); + break; + } + if ((addrlo & ~1) != datalo) { + tcg_out_ldrd_r(s, COND_AL, datalo, addend, addrlo); + break; + } + } + if (scratch_addend) { tcg_out_ld32_rwb(s, COND_AL, datalo, addend, addrlo); tcg_out_ld32_12(s, COND_AL, datahi, addend, 4); } else {