diff --git a/include/tcg/tcg-op-gvec-common.h b/include/tcg/tcg-op-gvec-common.h index 4db8a58c14..65553f5f97 100644 --- a/include/tcg/tcg-op-gvec-common.h +++ b/include/tcg/tcg-op-gvec-common.h @@ -183,6 +183,8 @@ typedef struct { bool prefer_i64; /* Load dest as a 3rd source operand. */ bool load_dest; + /* Write aofs as a 2nd dest operand. */ + bool write_aofs; } GVecGen3i; typedef struct { diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c index bb88943f79..0308732d9b 100644 --- a/tcg/tcg-op-gvec.c +++ b/tcg/tcg-op-gvec.c @@ -785,7 +785,8 @@ static void expand_3_i32(uint32_t dofs, uint32_t aofs, } static void expand_3i_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs, - uint32_t oprsz, int32_t c, bool load_dest, + uint32_t oprsz, int32_t c, + bool load_dest, bool write_aofs, void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32, int32_t)) { TCGv_i32 t0 = tcg_temp_new_i32(); @@ -801,6 +802,9 @@ static void expand_3i_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs, } fni(t2, t0, t1, c); tcg_gen_st_i32(t2, tcg_env, dofs + i); + if (write_aofs) { + tcg_gen_st_i32(t0, tcg_env, aofs + i); + } } tcg_temp_free_i32(t0); tcg_temp_free_i32(t1); @@ -944,7 +948,8 @@ static void expand_3_i64(uint32_t dofs, uint32_t aofs, } static void expand_3i_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs, - uint32_t oprsz, int64_t c, bool load_dest, + uint32_t oprsz, int64_t c, + bool load_dest, bool write_aofs, void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64, int64_t)) { TCGv_i64 t0 = tcg_temp_new_i64(); @@ -960,6 +965,9 @@ static void expand_3i_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs, } fni(t2, t0, t1, c); tcg_gen_st_i64(t2, tcg_env, dofs + i); + if (write_aofs) { + tcg_gen_st_i64(t0, tcg_env, aofs + i); + } } tcg_temp_free_i64(t0); tcg_temp_free_i64(t1); @@ -1102,7 +1110,8 @@ static void expand_3_vec(unsigned vece, uint32_t dofs, uint32_t aofs, */ static void expand_3i_vec(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t tysz, - TCGType type, int64_t c, bool load_dest, + TCGType type, int64_t c, + bool load_dest, bool write_aofs, void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec, int64_t)) { @@ -1118,6 +1127,9 @@ static void expand_3i_vec(unsigned vece, uint32_t dofs, uint32_t aofs, } fni(vece, t2, t0, t1, c); tcg_gen_st_vec(t2, tcg_env, dofs + i); + if (write_aofs) { + tcg_gen_st_vec(t0, tcg_env, aofs + i); + } } } @@ -1471,7 +1483,7 @@ void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs, */ some = QEMU_ALIGN_DOWN(oprsz, 32); expand_3i_vec(g->vece, dofs, aofs, bofs, some, 32, TCG_TYPE_V256, - c, g->load_dest, g->fniv); + c, g->load_dest, g->write_aofs, g->fniv); if (some == oprsz) { break; } @@ -1483,18 +1495,20 @@ void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs, /* fallthru */ case TCG_TYPE_V128: expand_3i_vec(g->vece, dofs, aofs, bofs, oprsz, 16, TCG_TYPE_V128, - c, g->load_dest, g->fniv); + c, g->load_dest, g->write_aofs, g->fniv); break; case TCG_TYPE_V64: expand_3i_vec(g->vece, dofs, aofs, bofs, oprsz, 8, TCG_TYPE_V64, - c, g->load_dest, g->fniv); + c, g->load_dest, g->write_aofs, g->fniv); break; case 0: if (g->fni8 && check_size_impl(oprsz, 8)) { - expand_3i_i64(dofs, aofs, bofs, oprsz, c, g->load_dest, g->fni8); + expand_3i_i64(dofs, aofs, bofs, oprsz, c, + g->load_dest, g->write_aofs, g->fni8); } else if (g->fni4 && check_size_impl(oprsz, 4)) { - expand_3i_i32(dofs, aofs, bofs, oprsz, c, g->load_dest, g->fni4); + expand_3i_i32(dofs, aofs, bofs, oprsz, c, + g->load_dest, g->write_aofs, g->fni4); } else { assert(g->fno != NULL); tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, c, g->fno);