tcg: Add write_aofs to GVecGen3i
tcg/i386: Simplify immediate 8-bit logical vector shifts tcg/i386: Optimize setcond of TST{EQ,NE} with 0xffffffff tcg/optimize: Optimize setcond with zmask accel/tcg: Introduce CF_BP_PAGE target/sh4: Update DisasContextBase.insn_start gitlab: Drop --static from s390x linux-user build gitlab: Streamline ubuntu-22.04-s390x -----BEGIN PGP SIGNATURE----- iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmY6OoAdHHJpY2hhcmQu aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV8FEwf7Bhs9bV2Kp4LxUzGq +dSHHc/WuCyIILLDQ4kZyXvILuI59wYhrWBUUTzBnAZ/tEf0oMG2y57F/lIcxz9w VvsFicMOhtjQ8iBEfl/rkkaYs9BLcxqMTAA3PxNBE6l3bzjcHSTkhey4MoPGRibn CkwaLzb2ebNjfgzC1IsNf/tyiMXl0tBQM7JVV4EztaOGEmqw8X0/PyVZDiC3WUNC tf9yqiNIlgGkn7rj3sT/rNdi4xlzQybgrb1MCFT6z5cqsW2bwqivRpxHi4yulHKI VhYA3kud+TX2ASukpibsSkA+9SbcH/qwOugPhPIu+KANsFUcVKL6Anzv6Ysl9kZ0 +Wnbow== =FJCW -----END PGP SIGNATURE----- Merge tag 'pull-tcg-20240507' of https://gitlab.com/rth7680/qemu into staging tcg: Add write_aofs to GVecGen3i tcg/i386: Simplify immediate 8-bit logical vector shifts tcg/i386: Optimize setcond of TST{EQ,NE} with 0xffffffff tcg/optimize: Optimize setcond with zmask accel/tcg: Introduce CF_BP_PAGE target/sh4: Update DisasContextBase.insn_start gitlab: Drop --static from s390x linux-user build gitlab: Streamline ubuntu-22.04-s390x # -----BEGIN PGP SIGNATURE----- # # iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmY6OoAdHHJpY2hhcmQu # aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV8FEwf7Bhs9bV2Kp4LxUzGq # +dSHHc/WuCyIILLDQ4kZyXvILuI59wYhrWBUUTzBnAZ/tEf0oMG2y57F/lIcxz9w # VvsFicMOhtjQ8iBEfl/rkkaYs9BLcxqMTAA3PxNBE6l3bzjcHSTkhey4MoPGRibn # CkwaLzb2ebNjfgzC1IsNf/tyiMXl0tBQM7JVV4EztaOGEmqw8X0/PyVZDiC3WUNC # tf9yqiNIlgGkn7rj3sT/rNdi4xlzQybgrb1MCFT6z5cqsW2bwqivRpxHi4yulHKI # VhYA3kud+TX2ASukpibsSkA+9SbcH/qwOugPhPIu+KANsFUcVKL6Anzv6Ysl9kZ0 # +Wnbow== # =FJCW # -----END PGP SIGNATURE----- # gpg: Signature made Tue 07 May 2024 07:28:16 AM PDT # gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F # gpg: issuer "richard.henderson@linaro.org" # gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [ultimate] * tag 'pull-tcg-20240507' of https://gitlab.com/rth7680/qemu: gitlab: Streamline ubuntu-22.04-s390x gitlab: Drop --static from s390x linux-user build gitlab: Drop --disable-libssh from ubuntu-22.04-s390x.yml target/sh4: Update DisasContextBase.insn_start accel/tcg: Introduce CF_BP_PAGE tcg/optimize: Optimize setcond with zmask tcg/i386: Optimize setcond of TST{EQ,NE} with 0xffffffff tcg/i386: Simplify immediate 8-bit logical vector shifts tcg: Add write_aofs to GVecGen3i Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
commit
571882c668
@ -2,7 +2,7 @@
|
||||
# setup by the scripts/ci/setup/build-environment.yml task
|
||||
# "Install basic packages to build QEMU on Ubuntu 22.04"
|
||||
|
||||
ubuntu-22.04-s390x-all-linux-static:
|
||||
ubuntu-22.04-s390x-all-linux:
|
||||
extends: .custom_runner_template
|
||||
needs: []
|
||||
stage: build
|
||||
@ -15,13 +15,13 @@ ubuntu-22.04-s390x-all-linux-static:
|
||||
script:
|
||||
- mkdir build
|
||||
- cd build
|
||||
- ../configure --enable-debug --static --disable-system
|
||||
- ../configure --enable-debug --disable-system --disable-tools --disable-docs
|
||||
|| { cat config.log meson-logs/meson-log.txt; exit 1; }
|
||||
- make --output-sync -j`nproc`
|
||||
- make --output-sync check-tcg
|
||||
- make --output-sync -j`nproc` check
|
||||
|
||||
ubuntu-22.04-s390x-all:
|
||||
ubuntu-22.04-s390x-all-system:
|
||||
extends: .custom_runner_template
|
||||
needs: []
|
||||
stage: build
|
||||
@ -35,7 +35,7 @@ ubuntu-22.04-s390x-all:
|
||||
script:
|
||||
- mkdir build
|
||||
- cd build
|
||||
- ../configure --disable-libssh
|
||||
- ../configure --disable-user
|
||||
|| { cat config.log meson-logs/meson-log.txt; exit 1; }
|
||||
- make --output-sync -j`nproc`
|
||||
- make --output-sync -j`nproc` check
|
||||
@ -57,7 +57,7 @@ ubuntu-22.04-s390x-alldbg:
|
||||
script:
|
||||
- mkdir build
|
||||
- cd build
|
||||
- ../configure --enable-debug --disable-libssh
|
||||
- ../configure --enable-debug
|
||||
|| { cat config.log meson-logs/meson-log.txt; exit 1; }
|
||||
- make clean
|
||||
- make --output-sync -j`nproc`
|
||||
@ -80,7 +80,7 @@ ubuntu-22.04-s390x-clang:
|
||||
script:
|
||||
- mkdir build
|
||||
- cd build
|
||||
- ../configure --disable-libssh --cc=clang --cxx=clang++ --enable-sanitizers
|
||||
- ../configure --cc=clang --cxx=clang++ --enable-sanitizers
|
||||
|| { cat config.log meson-logs/meson-log.txt; exit 1; }
|
||||
- make --output-sync -j`nproc`
|
||||
- make --output-sync -j`nproc` check
|
||||
@ -101,7 +101,7 @@ ubuntu-22.04-s390x-tci:
|
||||
script:
|
||||
- mkdir build
|
||||
- cd build
|
||||
- ../configure --disable-libssh --enable-tcg-interpreter
|
||||
- ../configure --enable-tcg-interpreter
|
||||
|| { cat config.log meson-logs/meson-log.txt; exit 1; }
|
||||
- make --output-sync -j`nproc`
|
||||
|
||||
@ -122,7 +122,7 @@ ubuntu-22.04-s390x-notcg:
|
||||
script:
|
||||
- mkdir build
|
||||
- cd build
|
||||
- ../configure --disable-libssh --disable-tcg
|
||||
- ../configure --disable-tcg
|
||||
|| { cat config.log meson-logs/meson-log.txt; exit 1; }
|
||||
- make --output-sync -j`nproc`
|
||||
- make --output-sync -j`nproc` check
|
||||
|
@ -381,7 +381,7 @@ static bool check_for_breakpoints_slow(CPUState *cpu, vaddr pc,
|
||||
* breakpoints are removed.
|
||||
*/
|
||||
if (match_page) {
|
||||
*cflags = (*cflags & ~CF_COUNT_MASK) | CF_NO_GOTO_TB | 1;
|
||||
*cflags = (*cflags & ~CF_COUNT_MASK) | CF_NO_GOTO_TB | CF_BP_PAGE | 1;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -77,6 +77,7 @@ struct TranslationBlock {
|
||||
#define CF_PARALLEL 0x00008000 /* Generate code for a parallel context */
|
||||
#define CF_NOIRQ 0x00010000 /* Generate an uninterruptible TB */
|
||||
#define CF_PCREL 0x00020000 /* Opcodes in TB are PC-relative */
|
||||
#define CF_BP_PAGE 0x00040000 /* Breakpoint present in code page */
|
||||
#define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */
|
||||
#define CF_CLUSTER_SHIFT 24
|
||||
|
||||
|
@ -183,6 +183,8 @@ typedef struct {
|
||||
bool prefer_i64;
|
||||
/* Load dest as a 3rd source operand. */
|
||||
bool load_dest;
|
||||
/* Write aofs as a 2nd dest operand. */
|
||||
bool write_aofs;
|
||||
} GVecGen3i;
|
||||
|
||||
typedef struct {
|
||||
|
@ -2189,6 +2189,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
|
||||
*/
|
||||
for (i = 1; i < max_insns; ++i) {
|
||||
tcg_gen_insn_start(pc + i * 2, ctx->envflags);
|
||||
ctx->base.insn_start = tcg_last_op();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -1658,6 +1658,7 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond,
|
||||
TCGArg dest, TCGArg arg1, TCGArg arg2,
|
||||
int const_arg2, bool neg)
|
||||
{
|
||||
int cmp_rexw = rexw;
|
||||
bool inv = false;
|
||||
bool cleared;
|
||||
int jcc;
|
||||
@ -1674,6 +1675,18 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond,
|
||||
}
|
||||
break;
|
||||
|
||||
case TCG_COND_TSTNE:
|
||||
inv = true;
|
||||
/* fall through */
|
||||
case TCG_COND_TSTEQ:
|
||||
/* If arg2 is -1, convert to LTU/GEU vs 1. */
|
||||
if (const_arg2 && arg2 == 0xffffffffu) {
|
||||
arg2 = 1;
|
||||
cmp_rexw = 0;
|
||||
goto do_ltu;
|
||||
}
|
||||
break;
|
||||
|
||||
case TCG_COND_LEU:
|
||||
inv = true;
|
||||
/* fall through */
|
||||
@ -1697,7 +1710,7 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond,
|
||||
* We can then use NEG or INC to produce the desired result.
|
||||
* This is always smaller than the SETCC expansion.
|
||||
*/
|
||||
tcg_out_cmp(s, TCG_COND_LTU, arg1, arg2, const_arg2, rexw);
|
||||
tcg_out_cmp(s, TCG_COND_LTU, arg1, arg2, const_arg2, cmp_rexw);
|
||||
|
||||
/* X - X - C = -C = (C ? -1 : 0) */
|
||||
tgen_arithr(s, ARITH_SBB + (neg ? rexw : 0), dest, dest);
|
||||
@ -1744,7 +1757,7 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond,
|
||||
cleared = true;
|
||||
}
|
||||
|
||||
jcc = tcg_out_cmp(s, cond, arg1, arg2, const_arg2, rexw);
|
||||
jcc = tcg_out_cmp(s, cond, arg1, arg2, const_arg2, cmp_rexw);
|
||||
tcg_out_modrm(s, OPC_SETCC | jcc, 0, dest);
|
||||
|
||||
if (!cleared) {
|
||||
@ -3769,49 +3782,20 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
|
||||
}
|
||||
}
|
||||
|
||||
static void expand_vec_shi(TCGType type, unsigned vece, TCGOpcode opc,
|
||||
static void expand_vec_shi(TCGType type, unsigned vece, bool right,
|
||||
TCGv_vec v0, TCGv_vec v1, TCGArg imm)
|
||||
{
|
||||
TCGv_vec t1, t2;
|
||||
uint8_t mask;
|
||||
|
||||
tcg_debug_assert(vece == MO_8);
|
||||
|
||||
t1 = tcg_temp_new_vec(type);
|
||||
t2 = tcg_temp_new_vec(type);
|
||||
|
||||
/*
|
||||
* Unpack to W, shift, and repack. Tricky bits:
|
||||
* (1) Use punpck*bw x,x to produce DDCCBBAA,
|
||||
* i.e. duplicate in other half of the 16-bit lane.
|
||||
* (2) For right-shift, add 8 so that the high half of the lane
|
||||
* becomes zero. For left-shift, and left-rotate, we must
|
||||
* shift up and down again.
|
||||
* (3) Step 2 leaves high half zero such that PACKUSWB
|
||||
* (pack with unsigned saturation) does not modify
|
||||
* the quantity.
|
||||
*/
|
||||
vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
|
||||
tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
|
||||
vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
|
||||
tcgv_vec_arg(t2), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
|
||||
|
||||
if (opc != INDEX_op_rotli_vec) {
|
||||
imm += 8;
|
||||
}
|
||||
if (opc == INDEX_op_shri_vec) {
|
||||
tcg_gen_shri_vec(MO_16, t1, t1, imm);
|
||||
tcg_gen_shri_vec(MO_16, t2, t2, imm);
|
||||
if (right) {
|
||||
mask = 0xff >> imm;
|
||||
tcg_gen_shri_vec(MO_16, v0, v1, imm);
|
||||
} else {
|
||||
tcg_gen_shli_vec(MO_16, t1, t1, imm);
|
||||
tcg_gen_shli_vec(MO_16, t2, t2, imm);
|
||||
tcg_gen_shri_vec(MO_16, t1, t1, 8);
|
||||
tcg_gen_shri_vec(MO_16, t2, t2, 8);
|
||||
mask = 0xff << imm;
|
||||
tcg_gen_shli_vec(MO_16, v0, v1, imm);
|
||||
}
|
||||
|
||||
vec_gen_3(INDEX_op_x86_packus_vec, type, MO_8,
|
||||
tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(t2));
|
||||
tcg_temp_free_vec(t1);
|
||||
tcg_temp_free_vec(t2);
|
||||
tcg_gen_and_vec(MO_8, v0, v0, tcg_constant_vec(type, MO_8, mask));
|
||||
}
|
||||
|
||||
static void expand_vec_sari(TCGType type, unsigned vece,
|
||||
@ -3821,7 +3805,7 @@ static void expand_vec_sari(TCGType type, unsigned vece,
|
||||
|
||||
switch (vece) {
|
||||
case MO_8:
|
||||
/* Unpack to W, shift, and repack, as in expand_vec_shi. */
|
||||
/* Unpack to 16-bit, shift, and repack. */
|
||||
t1 = tcg_temp_new_vec(type);
|
||||
t2 = tcg_temp_new_vec(type);
|
||||
vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
|
||||
@ -3874,12 +3858,7 @@ static void expand_vec_rotli(TCGType type, unsigned vece,
|
||||
{
|
||||
TCGv_vec t;
|
||||
|
||||
if (vece == MO_8) {
|
||||
expand_vec_shi(type, vece, INDEX_op_rotli_vec, v0, v1, imm);
|
||||
return;
|
||||
}
|
||||
|
||||
if (have_avx512vbmi2) {
|
||||
if (vece != MO_8 && have_avx512vbmi2) {
|
||||
vec_gen_4(INDEX_op_x86_vpshldi_vec, type, vece,
|
||||
tcgv_vec_arg(v0), tcgv_vec_arg(v1), tcgv_vec_arg(v1), imm);
|
||||
return;
|
||||
@ -4155,10 +4134,11 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
|
||||
|
||||
switch (opc) {
|
||||
case INDEX_op_shli_vec:
|
||||
case INDEX_op_shri_vec:
|
||||
expand_vec_shi(type, vece, opc, v0, v1, a2);
|
||||
expand_vec_shi(type, vece, false, v0, v1, a2);
|
||||
break;
|
||||
case INDEX_op_shri_vec:
|
||||
expand_vec_shi(type, vece, true, v0, v1, a2);
|
||||
break;
|
||||
|
||||
case INDEX_op_sari_vec:
|
||||
expand_vec_sari(type, vece, v0, v1, a2);
|
||||
break;
|
||||
|
110
tcg/optimize.c
110
tcg/optimize.c
@ -2099,6 +2099,108 @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
|
||||
{
|
||||
uint64_t a_zmask, b_val;
|
||||
TCGCond cond;
|
||||
|
||||
if (!arg_is_const(op->args[2])) {
|
||||
return false;
|
||||
}
|
||||
|
||||
a_zmask = arg_info(op->args[1])->z_mask;
|
||||
b_val = arg_info(op->args[2])->val;
|
||||
cond = op->args[3];
|
||||
|
||||
if (ctx->type == TCG_TYPE_I32) {
|
||||
a_zmask = (uint32_t)a_zmask;
|
||||
b_val = (uint32_t)b_val;
|
||||
}
|
||||
|
||||
/*
|
||||
* A with only low bits set vs B with high bits set means that A < B.
|
||||
*/
|
||||
if (a_zmask < b_val) {
|
||||
bool inv = false;
|
||||
|
||||
switch (cond) {
|
||||
case TCG_COND_NE:
|
||||
case TCG_COND_LEU:
|
||||
case TCG_COND_LTU:
|
||||
inv = true;
|
||||
/* fall through */
|
||||
case TCG_COND_GTU:
|
||||
case TCG_COND_GEU:
|
||||
case TCG_COND_EQ:
|
||||
return tcg_opt_gen_movi(ctx, op, op->args[0], neg ? -inv : inv);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* A with only lsb set is already boolean.
|
||||
*/
|
||||
if (a_zmask <= 1) {
|
||||
bool convert = false;
|
||||
bool inv = false;
|
||||
|
||||
switch (cond) {
|
||||
case TCG_COND_EQ:
|
||||
inv = true;
|
||||
/* fall through */
|
||||
case TCG_COND_NE:
|
||||
convert = (b_val == 0);
|
||||
break;
|
||||
case TCG_COND_LTU:
|
||||
case TCG_COND_TSTEQ:
|
||||
inv = true;
|
||||
/* fall through */
|
||||
case TCG_COND_GEU:
|
||||
case TCG_COND_TSTNE:
|
||||
convert = (b_val == 1);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (convert) {
|
||||
TCGOpcode add_opc, xor_opc, neg_opc;
|
||||
|
||||
if (!inv && !neg) {
|
||||
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
|
||||
}
|
||||
|
||||
switch (ctx->type) {
|
||||
case TCG_TYPE_I32:
|
||||
add_opc = INDEX_op_add_i32;
|
||||
neg_opc = INDEX_op_neg_i32;
|
||||
xor_opc = INDEX_op_xor_i32;
|
||||
break;
|
||||
case TCG_TYPE_I64:
|
||||
add_opc = INDEX_op_add_i64;
|
||||
neg_opc = INDEX_op_neg_i64;
|
||||
xor_opc = INDEX_op_xor_i64;
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
if (!inv) {
|
||||
op->opc = neg_opc;
|
||||
} else if (neg) {
|
||||
op->opc = add_opc;
|
||||
op->args[2] = arg_new_constant(ctx, -1);
|
||||
} else {
|
||||
op->opc = xor_opc;
|
||||
op->args[2] = arg_new_constant(ctx, 1);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
|
||||
{
|
||||
TCGOpcode and_opc, sub_opc, xor_opc, neg_opc, shr_opc;
|
||||
@ -2200,6 +2302,10 @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
|
||||
if (i >= 0) {
|
||||
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
|
||||
}
|
||||
|
||||
if (fold_setcond_zmask(ctx, op, false)) {
|
||||
return true;
|
||||
}
|
||||
fold_setcond_tst_pow2(ctx, op, false);
|
||||
|
||||
ctx->z_mask = 1;
|
||||
@ -2214,6 +2320,10 @@ static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
|
||||
if (i >= 0) {
|
||||
return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
|
||||
}
|
||||
|
||||
if (fold_setcond_zmask(ctx, op, true)) {
|
||||
return true;
|
||||
}
|
||||
fold_setcond_tst_pow2(ctx, op, true);
|
||||
|
||||
/* Value is {0,-1} so all bits are repetitions of the sign. */
|
||||
|
@ -785,7 +785,8 @@ static void expand_3_i32(uint32_t dofs, uint32_t aofs,
|
||||
}
|
||||
|
||||
static void expand_3i_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
|
||||
uint32_t oprsz, int32_t c, bool load_dest,
|
||||
uint32_t oprsz, int32_t c,
|
||||
bool load_dest, bool write_aofs,
|
||||
void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32, int32_t))
|
||||
{
|
||||
TCGv_i32 t0 = tcg_temp_new_i32();
|
||||
@ -801,6 +802,9 @@ static void expand_3i_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
|
||||
}
|
||||
fni(t2, t0, t1, c);
|
||||
tcg_gen_st_i32(t2, tcg_env, dofs + i);
|
||||
if (write_aofs) {
|
||||
tcg_gen_st_i32(t0, tcg_env, aofs + i);
|
||||
}
|
||||
}
|
||||
tcg_temp_free_i32(t0);
|
||||
tcg_temp_free_i32(t1);
|
||||
@ -944,7 +948,8 @@ static void expand_3_i64(uint32_t dofs, uint32_t aofs,
|
||||
}
|
||||
|
||||
static void expand_3i_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs,
|
||||
uint32_t oprsz, int64_t c, bool load_dest,
|
||||
uint32_t oprsz, int64_t c,
|
||||
bool load_dest, bool write_aofs,
|
||||
void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64, int64_t))
|
||||
{
|
||||
TCGv_i64 t0 = tcg_temp_new_i64();
|
||||
@ -960,6 +965,9 @@ static void expand_3i_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs,
|
||||
}
|
||||
fni(t2, t0, t1, c);
|
||||
tcg_gen_st_i64(t2, tcg_env, dofs + i);
|
||||
if (write_aofs) {
|
||||
tcg_gen_st_i64(t0, tcg_env, aofs + i);
|
||||
}
|
||||
}
|
||||
tcg_temp_free_i64(t0);
|
||||
tcg_temp_free_i64(t1);
|
||||
@ -1102,7 +1110,8 @@ static void expand_3_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
*/
|
||||
static void expand_3i_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t bofs, uint32_t oprsz, uint32_t tysz,
|
||||
TCGType type, int64_t c, bool load_dest,
|
||||
TCGType type, int64_t c,
|
||||
bool load_dest, bool write_aofs,
|
||||
void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec,
|
||||
int64_t))
|
||||
{
|
||||
@ -1118,6 +1127,9 @@ static void expand_3i_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
}
|
||||
fni(vece, t2, t0, t1, c);
|
||||
tcg_gen_st_vec(t2, tcg_env, dofs + i);
|
||||
if (write_aofs) {
|
||||
tcg_gen_st_vec(t0, tcg_env, aofs + i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1471,7 +1483,7 @@ void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs,
|
||||
*/
|
||||
some = QEMU_ALIGN_DOWN(oprsz, 32);
|
||||
expand_3i_vec(g->vece, dofs, aofs, bofs, some, 32, TCG_TYPE_V256,
|
||||
c, g->load_dest, g->fniv);
|
||||
c, g->load_dest, g->write_aofs, g->fniv);
|
||||
if (some == oprsz) {
|
||||
break;
|
||||
}
|
||||
@ -1483,18 +1495,20 @@ void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs,
|
||||
/* fallthru */
|
||||
case TCG_TYPE_V128:
|
||||
expand_3i_vec(g->vece, dofs, aofs, bofs, oprsz, 16, TCG_TYPE_V128,
|
||||
c, g->load_dest, g->fniv);
|
||||
c, g->load_dest, g->write_aofs, g->fniv);
|
||||
break;
|
||||
case TCG_TYPE_V64:
|
||||
expand_3i_vec(g->vece, dofs, aofs, bofs, oprsz, 8, TCG_TYPE_V64,
|
||||
c, g->load_dest, g->fniv);
|
||||
c, g->load_dest, g->write_aofs, g->fniv);
|
||||
break;
|
||||
|
||||
case 0:
|
||||
if (g->fni8 && check_size_impl(oprsz, 8)) {
|
||||
expand_3i_i64(dofs, aofs, bofs, oprsz, c, g->load_dest, g->fni8);
|
||||
expand_3i_i64(dofs, aofs, bofs, oprsz, c,
|
||||
g->load_dest, g->write_aofs, g->fni8);
|
||||
} else if (g->fni4 && check_size_impl(oprsz, 4)) {
|
||||
expand_3i_i32(dofs, aofs, bofs, oprsz, c, g->load_dest, g->fni4);
|
||||
expand_3i_i32(dofs, aofs, bofs, oprsz, c,
|
||||
g->load_dest, g->write_aofs, g->fni4);
|
||||
} else {
|
||||
assert(g->fno != NULL);
|
||||
tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, c, g->fno);
|
||||
|
Loading…
Reference in New Issue
Block a user