Improve code generation for vector duplication.

Add vector expansions for shifts by non-constant scalar.
 Add vector expansions for shifts by vector.
 Add integer and vector expansions for absolute value.
 Several patches in preparation for Altivec.
 Bug fix for tcg/aarch64 vs min/max.
 -----BEGIN PGP SIGNATURE-----
 
 iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAlzaBIodHHJpY2hhcmQu
 aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV+iAQf/Tdg7LmGSlNGPEEcA
 5fRXRu5ZzMPXWvfzAIxxTZGQzwipWPjLdEhJVNktIdHvPH3cpd8Ev6KTX4BUuSyp
 zjnepIiY2XXOkAuyUkNneRfKkzLTTFaV+v5A5EVhgxxECLm2HQDgmj0bECvfOCtA
 DSCVEoXUze4uLk0iHTw/+dHckcD0Q1G6oJLvfYSZ7aLT6+TTb4piLqAQRBP8zUIM
 KtntSP2+aEbr5j+lfw72YRptnclzLcwifY+TotwcEkMg5x9THgh2G0Oni2CvLGIb
 0dm6SmuEENFvvVpzErcMQaZ9yh9x3yHW7zdIxoMZpFtzThtHewR2pFVnkkIkPDNR
 Zh8xMQ==
 =HoB3
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20190513' into staging

Improve code generation for vector duplication.
Add vector expansions for shifts by non-constant scalar.
Add vector expansions for shifts by vector.
Add integer and vector expansions for absolute value.
Several patches in preparation for Altivec.
Bug fix for tcg/aarch64 vs min/max.

# gpg: Signature made Tue 14 May 2019 00:58:02 BST
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A  05C0 64DF 38E8 AF7E 215F

* remotes/rth/tags/pull-tcg-20190513: (31 commits)
  tcg/aarch64: Do not advertise minmax for MO_64
  target/xtensa: Use tcg_gen_abs_i32
  target/tricore: Use tcg_gen_abs_tl
  target/s390x: Use tcg_gen_abs_i64
  target/ppc: Use tcg_gen_abs_tl
  target/ppc: Use tcg_gen_abs_i32
  target/cris: Use tcg_gen_abs_tl
  target/arm: Use tcg_gen_abs_i64 and tcg_gen_gvec_abs
  tcg/aarch64: Support vector absolute value
  tcg/i386: Support vector absolute value
  tcg: Add support for vector absolute value
  tcg: Add support for integer absolute value
  tcg/i386: Support vector scalar shift opcodes
  tcg: Add gvec expanders for vector shift by scalar
  tcg/aarch64: Support vector variable shift opcodes
  tcg/i386: Support vector variable shift opcodes
  tcg: Add gvec expanders for variable shift
  tcg: Add INDEX_op_dupm_vec
  tcg/aarch64: Implement tcg_out_dupm_vec
  tcg/i386: Implement tcg_out_dupm_vec
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2019-05-14 10:08:47 +01:00
commit e329ad2ab7
36 changed files with 2020 additions and 473 deletions

View File

@ -398,6 +398,54 @@ void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
clear_high(d, oprsz, desc);
}
void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(int8_t)) {
int8_t aa = *(int8_t *)(a + i);
*(int8_t *)(d + i) = aa < 0 ? -aa : aa;
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(int16_t)) {
int16_t aa = *(int16_t *)(a + i);
*(int16_t *)(d + i) = aa < 0 ? -aa : aa;
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(int32_t)) {
int32_t aa = *(int32_t *)(a + i);
*(int32_t *)(d + i) = aa < 0 ? -aa : aa;
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(int64_t)) {
int64_t aa = *(int64_t *)(a + i);
*(int64_t *)(d + i) = aa < 0 ? -aa : aa;
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
@ -725,6 +773,150 @@ void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
clear_high(d, oprsz, desc);
}
void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
uint8_t sh = *(uint8_t *)(b + i) & 7;
*(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh;
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
uint8_t sh = *(uint16_t *)(b + i) & 15;
*(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh;
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
uint8_t sh = *(uint32_t *)(b + i) & 31;
*(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh;
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
uint8_t sh = *(uint64_t *)(b + i) & 63;
*(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh;
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
uint8_t sh = *(uint8_t *)(b + i) & 7;
*(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh;
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
uint8_t sh = *(uint16_t *)(b + i) & 15;
*(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh;
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
uint8_t sh = *(uint32_t *)(b + i) & 31;
*(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh;
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
uint8_t sh = *(uint64_t *)(b + i) & 63;
*(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh;
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(vec8)) {
uint8_t sh = *(uint8_t *)(b + i) & 7;
*(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh;
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(int16_t)) {
uint8_t sh = *(uint16_t *)(b + i) & 15;
*(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh;
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(vec32)) {
uint8_t sh = *(uint32_t *)(b + i) & 31;
*(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh;
}
clear_high(d, oprsz, desc);
}
void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;
for (i = 0; i < oprsz; i += sizeof(vec64)) {
uint8_t sh = *(uint64_t *)(b + i) & 63;
*(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh;
}
clear_high(d, oprsz, desc);
}
/* If vectors are enabled, the compiler fills in -1 for true.
Otherwise, we must take care of this by hand. */
#ifdef CONFIG_VECTOR16

View File

@ -225,6 +225,11 @@ DEF_HELPER_FLAGS_3(gvec_neg16, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_neg32, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_neg64, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_abs8, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_abs16, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_abs32, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_abs64, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_not, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_and, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_or, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@ -254,6 +259,21 @@ DEF_HELPER_FLAGS_3(gvec_sar16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_sar32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_sar64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shl8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shl16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shl32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shl64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shr8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shr16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shr32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shr64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_sar8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_sar16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_sar32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_sar64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_eq8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_eq16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_eq32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)

View File

@ -352,8 +352,6 @@ DEF_HELPER_2(neon_ceq_u8, i32, i32, i32)
DEF_HELPER_2(neon_ceq_u16, i32, i32, i32)
DEF_HELPER_2(neon_ceq_u32, i32, i32, i32)
DEF_HELPER_1(neon_abs_s8, i32, i32)
DEF_HELPER_1(neon_abs_s16, i32, i32)
DEF_HELPER_1(neon_clz_u8, i32, i32)
DEF_HELPER_1(neon_clz_u16, i32, i32)
DEF_HELPER_1(neon_cls_s8, i32, i32)

View File

@ -1228,11 +1228,6 @@ NEON_VOP(ceq_u16, neon_u16, 2)
NEON_VOP(ceq_u32, neon_u32, 1)
#undef NEON_FN
#define NEON_FN(dest, src, dummy) dest = (src < 0) ? -src : src
NEON_VOP1(abs_s8, neon_s8, 4)
NEON_VOP1(abs_s16, neon_s16, 2)
#undef NEON_FN
/* Count Leading Sign/Zero Bits. */
static inline int do_clz8(uint8_t x)
{

View File

@ -9468,11 +9468,7 @@ static void handle_2misc_64(DisasContext *s, int opcode, bool u,
if (u) {
tcg_gen_neg_i64(tcg_rd, tcg_rn);
} else {
TCGv_i64 tcg_zero = tcg_const_i64(0);
tcg_gen_neg_i64(tcg_rd, tcg_rn);
tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
tcg_rn, tcg_rd);
tcg_temp_free_i64(tcg_zero);
tcg_gen_abs_i64(tcg_rd, tcg_rn);
}
break;
case 0x2f: /* FABS */
@ -12366,11 +12362,12 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
}
break;
case 0xb:
if (u) { /* NEG */
if (u) { /* ABS, NEG */
gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
return;
} else {
gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
}
break;
return;
}
if (size == 3) {
@ -12438,17 +12435,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
}
break;
case 0xb: /* ABS, NEG */
if (u) {
tcg_gen_neg_i32(tcg_res, tcg_op);
} else {
TCGv_i32 tcg_zero = tcg_const_i32(0);
tcg_gen_neg_i32(tcg_res, tcg_op);
tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
tcg_zero, tcg_op, tcg_res);
tcg_temp_free_i32(tcg_zero);
}
break;
case 0x2f: /* FABS */
gen_helper_vfp_abss(tcg_res, tcg_op);
break;
@ -12561,23 +12547,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
tcg_temp_free_i32(tcg_zero);
break;
}
case 0xb: /* ABS, NEG */
if (u) {
TCGv_i32 tcg_zero = tcg_const_i32(0);
if (size) {
gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
} else {
gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
}
tcg_temp_free_i32(tcg_zero);
} else {
if (size) {
gen_helper_neon_abs_s16(tcg_res, tcg_op);
} else {
gen_helper_neon_abs_s8(tcg_res, tcg_op);
}
}
break;
case 0x4: /* CLS, CLZ */
if (u) {
if (size == 0) {

View File

@ -3302,29 +3302,30 @@ static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
{
static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
static const GVecGen2s op[4] = {
{ .fni8 = tcg_gen_vec_sub8_i64,
.fniv = tcg_gen_sub_vec,
.fno = gen_helper_sve_subri_b,
.opc = INDEX_op_sub_vec,
.opt_opc = vecop_list,
.vece = MO_8,
.scalar_first = true },
{ .fni8 = tcg_gen_vec_sub16_i64,
.fniv = tcg_gen_sub_vec,
.fno = gen_helper_sve_subri_h,
.opc = INDEX_op_sub_vec,
.opt_opc = vecop_list,
.vece = MO_16,
.scalar_first = true },
{ .fni4 = tcg_gen_sub_i32,
.fniv = tcg_gen_sub_vec,
.fno = gen_helper_sve_subri_s,
.opc = INDEX_op_sub_vec,
.opt_opc = vecop_list,
.vece = MO_32,
.scalar_first = true },
{ .fni8 = tcg_gen_sub_i64,
.fniv = tcg_gen_sub_vec,
.fno = gen_helper_sve_subri_d,
.opc = INDEX_op_sub_vec,
.opt_opc = vecop_list,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
.vece = MO_64,
.scalar_first = true }

View File

@ -604,16 +604,6 @@ static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
tcg_temp_free_i32(tmp1);
}
static void tcg_gen_abs_i32(TCGv_i32 dest, TCGv_i32 src)
{
TCGv_i32 c0 = tcg_const_i32(0);
TCGv_i32 tmp = tcg_temp_new_i32();
tcg_gen_neg_i32(tmp, src);
tcg_gen_movcond_i32(TCG_COND_GT, dest, src, c0, src, tmp);
tcg_temp_free_i32(c0);
tcg_temp_free_i32(tmp);
}
static void shifter_out_im(TCGv_i32 var, int shift)
{
if (shift == 0) {
@ -5861,27 +5851,31 @@ static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
tcg_gen_add_vec(vece, d, d, a);
}
static const TCGOpcode vecop_list_ssra[] = {
INDEX_op_sari_vec, INDEX_op_add_vec, 0
};
const GVecGen2i ssra_op[4] = {
{ .fni8 = gen_ssra8_i64,
.fniv = gen_ssra_vec,
.load_dest = true,
.opc = INDEX_op_sari_vec,
.opt_opc = vecop_list_ssra,
.vece = MO_8 },
{ .fni8 = gen_ssra16_i64,
.fniv = gen_ssra_vec,
.load_dest = true,
.opc = INDEX_op_sari_vec,
.opt_opc = vecop_list_ssra,
.vece = MO_16 },
{ .fni4 = gen_ssra32_i32,
.fniv = gen_ssra_vec,
.load_dest = true,
.opc = INDEX_op_sari_vec,
.opt_opc = vecop_list_ssra,
.vece = MO_32 },
{ .fni8 = gen_ssra64_i64,
.fniv = gen_ssra_vec,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
.opt_opc = vecop_list_ssra,
.load_dest = true,
.opc = INDEX_op_sari_vec,
.vece = MO_64 },
};
@ -5915,27 +5909,31 @@ static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
tcg_gen_add_vec(vece, d, d, a);
}
static const TCGOpcode vecop_list_usra[] = {
INDEX_op_shri_vec, INDEX_op_add_vec, 0
};
const GVecGen2i usra_op[4] = {
{ .fni8 = gen_usra8_i64,
.fniv = gen_usra_vec,
.load_dest = true,
.opc = INDEX_op_shri_vec,
.opt_opc = vecop_list_usra,
.vece = MO_8, },
{ .fni8 = gen_usra16_i64,
.fniv = gen_usra_vec,
.load_dest = true,
.opc = INDEX_op_shri_vec,
.opt_opc = vecop_list_usra,
.vece = MO_16, },
{ .fni4 = gen_usra32_i32,
.fniv = gen_usra_vec,
.load_dest = true,
.opc = INDEX_op_shri_vec,
.opt_opc = vecop_list_usra,
.vece = MO_32, },
{ .fni8 = gen_usra64_i64,
.fniv = gen_usra_vec,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
.load_dest = true,
.opc = INDEX_op_shri_vec,
.opt_opc = vecop_list_usra,
.vece = MO_64, },
};
@ -5993,27 +5991,29 @@ static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
}
}
static const TCGOpcode vecop_list_sri[] = { INDEX_op_shri_vec, 0 };
const GVecGen2i sri_op[4] = {
{ .fni8 = gen_shr8_ins_i64,
.fniv = gen_shr_ins_vec,
.load_dest = true,
.opc = INDEX_op_shri_vec,
.opt_opc = vecop_list_sri,
.vece = MO_8 },
{ .fni8 = gen_shr16_ins_i64,
.fniv = gen_shr_ins_vec,
.load_dest = true,
.opc = INDEX_op_shri_vec,
.opt_opc = vecop_list_sri,
.vece = MO_16 },
{ .fni4 = gen_shr32_ins_i32,
.fniv = gen_shr_ins_vec,
.load_dest = true,
.opc = INDEX_op_shri_vec,
.opt_opc = vecop_list_sri,
.vece = MO_32 },
{ .fni8 = gen_shr64_ins_i64,
.fniv = gen_shr_ins_vec,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
.load_dest = true,
.opc = INDEX_op_shri_vec,
.opt_opc = vecop_list_sri,
.vece = MO_64 },
};
@ -6069,27 +6069,29 @@ static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
}
}
static const TCGOpcode vecop_list_sli[] = { INDEX_op_shli_vec, 0 };
const GVecGen2i sli_op[4] = {
{ .fni8 = gen_shl8_ins_i64,
.fniv = gen_shl_ins_vec,
.load_dest = true,
.opc = INDEX_op_shli_vec,
.opt_opc = vecop_list_sli,
.vece = MO_8 },
{ .fni8 = gen_shl16_ins_i64,
.fniv = gen_shl_ins_vec,
.load_dest = true,
.opc = INDEX_op_shli_vec,
.opt_opc = vecop_list_sli,
.vece = MO_16 },
{ .fni4 = gen_shl32_ins_i32,
.fniv = gen_shl_ins_vec,
.load_dest = true,
.opc = INDEX_op_shli_vec,
.opt_opc = vecop_list_sli,
.vece = MO_32 },
{ .fni8 = gen_shl64_ins_i64,
.fniv = gen_shl_ins_vec,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
.load_dest = true,
.opc = INDEX_op_shli_vec,
.opt_opc = vecop_list_sli,
.vece = MO_64 },
};
@ -6156,51 +6158,60 @@ static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
/* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
* these tables are shared with AArch64 which does support them.
*/
static const TCGOpcode vecop_list_mla[] = {
INDEX_op_mul_vec, INDEX_op_add_vec, 0
};
static const TCGOpcode vecop_list_mls[] = {
INDEX_op_mul_vec, INDEX_op_sub_vec, 0
};
const GVecGen3 mla_op[4] = {
{ .fni4 = gen_mla8_i32,
.fniv = gen_mla_vec,
.opc = INDEX_op_mul_vec,
.load_dest = true,
.opt_opc = vecop_list_mla,
.vece = MO_8 },
{ .fni4 = gen_mla16_i32,
.fniv = gen_mla_vec,
.opc = INDEX_op_mul_vec,
.load_dest = true,
.opt_opc = vecop_list_mla,
.vece = MO_16 },
{ .fni4 = gen_mla32_i32,
.fniv = gen_mla_vec,
.opc = INDEX_op_mul_vec,
.load_dest = true,
.opt_opc = vecop_list_mla,
.vece = MO_32 },
{ .fni8 = gen_mla64_i64,
.fniv = gen_mla_vec,
.opc = INDEX_op_mul_vec,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
.load_dest = true,
.opt_opc = vecop_list_mla,
.vece = MO_64 },
};
const GVecGen3 mls_op[4] = {
{ .fni4 = gen_mls8_i32,
.fniv = gen_mls_vec,
.opc = INDEX_op_mul_vec,
.load_dest = true,
.opt_opc = vecop_list_mls,
.vece = MO_8 },
{ .fni4 = gen_mls16_i32,
.fniv = gen_mls_vec,
.opc = INDEX_op_mul_vec,
.load_dest = true,
.opt_opc = vecop_list_mls,
.vece = MO_16 },
{ .fni4 = gen_mls32_i32,
.fniv = gen_mls_vec,
.opc = INDEX_op_mul_vec,
.load_dest = true,
.opt_opc = vecop_list_mls,
.vece = MO_32 },
{ .fni8 = gen_mls64_i64,
.fniv = gen_mls_vec,
.opc = INDEX_op_mul_vec,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
.load_dest = true,
.opt_opc = vecop_list_mls,
.vece = MO_64 },
};
@ -6226,19 +6237,25 @@ static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
}
static const TCGOpcode vecop_list_cmtst[] = { INDEX_op_cmp_vec, 0 };
const GVecGen3 cmtst_op[4] = {
{ .fni4 = gen_helper_neon_tst_u8,
.fniv = gen_cmtst_vec,
.opt_opc = vecop_list_cmtst,
.vece = MO_8 },
{ .fni4 = gen_helper_neon_tst_u16,
.fniv = gen_cmtst_vec,
.opt_opc = vecop_list_cmtst,
.vece = MO_16 },
{ .fni4 = gen_cmtst_i32,
.fniv = gen_cmtst_vec,
.opt_opc = vecop_list_cmtst,
.vece = MO_32 },
{ .fni8 = gen_cmtst_i64,
.fniv = gen_cmtst_vec,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
.opt_opc = vecop_list_cmtst,
.vece = MO_64 },
};
@ -6253,26 +6270,30 @@ static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
tcg_temp_free_vec(x);
}
static const TCGOpcode vecop_list_uqadd[] = {
INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
};
const GVecGen4 uqadd_op[4] = {
{ .fniv = gen_uqadd_vec,
.fno = gen_helper_gvec_uqadd_b,
.opc = INDEX_op_usadd_vec,
.write_aofs = true,
.opt_opc = vecop_list_uqadd,
.vece = MO_8 },
{ .fniv = gen_uqadd_vec,
.fno = gen_helper_gvec_uqadd_h,
.opc = INDEX_op_usadd_vec,
.write_aofs = true,
.opt_opc = vecop_list_uqadd,
.vece = MO_16 },
{ .fniv = gen_uqadd_vec,
.fno = gen_helper_gvec_uqadd_s,
.opc = INDEX_op_usadd_vec,
.write_aofs = true,
.opt_opc = vecop_list_uqadd,
.vece = MO_32 },
{ .fniv = gen_uqadd_vec,
.fno = gen_helper_gvec_uqadd_d,
.opc = INDEX_op_usadd_vec,
.write_aofs = true,
.opt_opc = vecop_list_uqadd,
.vece = MO_64 },
};
@ -6287,25 +6308,29 @@ static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
tcg_temp_free_vec(x);
}
static const TCGOpcode vecop_list_sqadd[] = {
INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
};
const GVecGen4 sqadd_op[4] = {
{ .fniv = gen_sqadd_vec,
.fno = gen_helper_gvec_sqadd_b,
.opc = INDEX_op_ssadd_vec,
.opt_opc = vecop_list_sqadd,
.write_aofs = true,
.vece = MO_8 },
{ .fniv = gen_sqadd_vec,
.fno = gen_helper_gvec_sqadd_h,
.opc = INDEX_op_ssadd_vec,
.opt_opc = vecop_list_sqadd,
.write_aofs = true,
.vece = MO_16 },
{ .fniv = gen_sqadd_vec,
.fno = gen_helper_gvec_sqadd_s,
.opc = INDEX_op_ssadd_vec,
.opt_opc = vecop_list_sqadd,
.write_aofs = true,
.vece = MO_32 },
{ .fniv = gen_sqadd_vec,
.fno = gen_helper_gvec_sqadd_d,
.opc = INDEX_op_ssadd_vec,
.opt_opc = vecop_list_sqadd,
.write_aofs = true,
.vece = MO_64 },
};
@ -6321,25 +6346,29 @@ static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
tcg_temp_free_vec(x);
}
static const TCGOpcode vecop_list_uqsub[] = {
INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
};
const GVecGen4 uqsub_op[4] = {
{ .fniv = gen_uqsub_vec,
.fno = gen_helper_gvec_uqsub_b,
.opc = INDEX_op_ussub_vec,
.opt_opc = vecop_list_uqsub,
.write_aofs = true,
.vece = MO_8 },
{ .fniv = gen_uqsub_vec,
.fno = gen_helper_gvec_uqsub_h,
.opc = INDEX_op_ussub_vec,
.opt_opc = vecop_list_uqsub,
.write_aofs = true,
.vece = MO_16 },
{ .fniv = gen_uqsub_vec,
.fno = gen_helper_gvec_uqsub_s,
.opc = INDEX_op_ussub_vec,
.opt_opc = vecop_list_uqsub,
.write_aofs = true,
.vece = MO_32 },
{ .fniv = gen_uqsub_vec,
.fno = gen_helper_gvec_uqsub_d,
.opc = INDEX_op_ussub_vec,
.opt_opc = vecop_list_uqsub,
.write_aofs = true,
.vece = MO_64 },
};
@ -6355,25 +6384,29 @@ static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
tcg_temp_free_vec(x);
}
static const TCGOpcode vecop_list_sqsub[] = {
INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
};
const GVecGen4 sqsub_op[4] = {
{ .fniv = gen_sqsub_vec,
.fno = gen_helper_gvec_sqsub_b,
.opc = INDEX_op_sssub_vec,
.opt_opc = vecop_list_sqsub,
.write_aofs = true,
.vece = MO_8 },
{ .fniv = gen_sqsub_vec,
.fno = gen_helper_gvec_sqsub_h,
.opc = INDEX_op_sssub_vec,
.opt_opc = vecop_list_sqsub,
.write_aofs = true,
.vece = MO_16 },
{ .fniv = gen_sqsub_vec,
.fno = gen_helper_gvec_sqsub_s,
.opc = INDEX_op_sssub_vec,
.opt_opc = vecop_list_sqsub,
.write_aofs = true,
.vece = MO_32 },
{ .fniv = gen_sqsub_vec,
.fno = gen_helper_gvec_sqsub_d,
.opc = INDEX_op_sssub_vec,
.opt_opc = vecop_list_sqsub,
.write_aofs = true,
.vece = MO_64 },
};
@ -8087,6 +8120,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
case NEON_2RM_VNEG:
tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size);
break;
case NEON_2RM_VABS:
tcg_gen_gvec_abs(size, rd_ofs, rm_ofs, vec_size, vec_size);
break;
default:
elementwise:
@ -8192,14 +8228,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
}
tcg_temp_free_i32(tmp2);
break;
case NEON_2RM_VABS:
switch(size) {
case 0: gen_helper_neon_abs_s8(tmp, tmp); break;
case 1: gen_helper_neon_abs_s16(tmp, tmp); break;
case 2: tcg_gen_abs_i32(tmp, tmp); break;
default: abort();
}
break;
case NEON_2RM_VCGT0_F:
{
TCGv_ptr fpstatus = get_fpstatus_ptr(1);

View File

@ -1686,18 +1686,11 @@ static int dec_cmp_r(CPUCRISState *env, DisasContext *dc)
static int dec_abs_r(CPUCRISState *env, DisasContext *dc)
{
TCGv t0;
LOG_DIS("abs $r%u, $r%u\n",
dc->op1, dc->op2);
cris_cc_mask(dc, CC_MASK_NZ);
t0 = tcg_temp_new();
tcg_gen_sari_tl(t0, cpu_R[dc->op1], 31);
tcg_gen_xor_tl(cpu_R[dc->op2], cpu_R[dc->op1], t0);
tcg_gen_sub_tl(cpu_R[dc->op2], cpu_R[dc->op2], t0);
tcg_temp_free(t0);
tcg_gen_abs_tl(cpu_R[dc->op2], cpu_R[dc->op1]);
cris_alu(dc, CC_OP_MOVE,
cpu_R[dc->op2], cpu_R[dc->op2], cpu_R[dc->op2], 4);
return 2;

View File

@ -5075,40 +5075,26 @@ static void gen_ecowx(DisasContext *ctx)
/* abs - abs. */
static void gen_abs(DisasContext *ctx)
{
TCGLabel *l1 = gen_new_label();
TCGLabel *l2 = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_GE, cpu_gpr[rA(ctx->opcode)], 0, l1);
tcg_gen_neg_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
tcg_gen_br(l2);
gen_set_label(l1);
tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
gen_set_label(l2);
TCGv d = cpu_gpr[rD(ctx->opcode)];
TCGv a = cpu_gpr[rA(ctx->opcode)];
tcg_gen_abs_tl(d, a);
if (unlikely(Rc(ctx->opcode) != 0)) {
gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
gen_set_Rc0(ctx, d);
}
}
/* abso - abso. */
static void gen_abso(DisasContext *ctx)
{
TCGLabel *l1 = gen_new_label();
TCGLabel *l2 = gen_new_label();
TCGLabel *l3 = gen_new_label();
/* Start with XER OV disabled, the most likely case */
tcg_gen_movi_tl(cpu_ov, 0);
tcg_gen_brcondi_tl(TCG_COND_GE, cpu_gpr[rA(ctx->opcode)], 0, l2);
tcg_gen_brcondi_tl(TCG_COND_NE, cpu_gpr[rA(ctx->opcode)], 0x80000000, l1);
tcg_gen_movi_tl(cpu_ov, 1);
tcg_gen_movi_tl(cpu_so, 1);
tcg_gen_br(l2);
gen_set_label(l1);
tcg_gen_neg_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
tcg_gen_br(l3);
gen_set_label(l2);
tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
gen_set_label(l3);
TCGv d = cpu_gpr[rD(ctx->opcode)];
TCGv a = cpu_gpr[rA(ctx->opcode)];
tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_ov, a, 0x80000000);
tcg_gen_abs_tl(d, a);
tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
if (unlikely(Rc(ctx->opcode) != 0)) {
gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
gen_set_Rc0(ctx, d);
}
}
@ -5344,34 +5330,28 @@ static void gen_mulo(DisasContext *ctx)
/* nabs - nabs. */
static void gen_nabs(DisasContext *ctx)
{
TCGLabel *l1 = gen_new_label();
TCGLabel *l2 = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_GT, cpu_gpr[rA(ctx->opcode)], 0, l1);
tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
tcg_gen_br(l2);
gen_set_label(l1);
tcg_gen_neg_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
gen_set_label(l2);
TCGv d = cpu_gpr[rD(ctx->opcode)];
TCGv a = cpu_gpr[rA(ctx->opcode)];
tcg_gen_abs_tl(d, a);
tcg_gen_neg_tl(d, d);
if (unlikely(Rc(ctx->opcode) != 0)) {
gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
gen_set_Rc0(ctx, d);
}
}
/* nabso - nabso. */
static void gen_nabso(DisasContext *ctx)
{
TCGLabel *l1 = gen_new_label();
TCGLabel *l2 = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_GT, cpu_gpr[rA(ctx->opcode)], 0, l1);
tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
tcg_gen_br(l2);
gen_set_label(l1);
tcg_gen_neg_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
gen_set_label(l2);
TCGv d = cpu_gpr[rD(ctx->opcode)];
TCGv a = cpu_gpr[rA(ctx->opcode)];
tcg_gen_abs_tl(d, a);
tcg_gen_neg_tl(d, d);
/* nabs never overflows */
tcg_gen_movi_tl(cpu_ov, 0);
if (unlikely(Rc(ctx->opcode) != 0)) {
gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
gen_set_Rc0(ctx, d);
}
}

View File

@ -126,19 +126,7 @@ static inline void gen_##name(DisasContext *ctx) \
tcg_temp_free_i32(t0); \
}
static inline void gen_op_evabs(TCGv_i32 ret, TCGv_i32 arg1)
{
TCGLabel *l1 = gen_new_label();
TCGLabel *l2 = gen_new_label();
tcg_gen_brcondi_i32(TCG_COND_GE, arg1, 0, l1);
tcg_gen_neg_i32(ret, arg1);
tcg_gen_br(l2);
gen_set_label(l1);
tcg_gen_mov_i32(ret, arg1);
gen_set_label(l2);
}
GEN_SPEOP_ARITH1(evabs, gen_op_evabs);
GEN_SPEOP_ARITH1(evabs, tcg_gen_abs_i32);
GEN_SPEOP_ARITH1(evneg, tcg_gen_neg_i32);
GEN_SPEOP_ARITH1(evextsb, tcg_gen_ext8s_i32);
GEN_SPEOP_ARITH1(evextsh, tcg_gen_ext16s_i32);

View File

@ -566,10 +566,15 @@ static void glue(glue(gen_, NAME), _vec)(unsigned vece, TCGv_vec t, \
} \
static void glue(gen_, NAME)(DisasContext *ctx) \
{ \
static const TCGOpcode vecop_list[] = { \
glue(glue(INDEX_op_, NORM), _vec), \
glue(glue(INDEX_op_, SAT), _vec), \
INDEX_op_cmp_vec, 0 \
}; \
static const GVecGen4 g = { \
.fniv = glue(glue(gen_, NAME), _vec), \
.fno = glue(gen_helper_, NAME), \
.opc = glue(glue(INDEX_op_, SAT), _vec), \
.opt_opc = vecop_list, \
.write_aofs = true, \
.vece = VECE, \
}; \

View File

@ -1407,13 +1407,7 @@ static DisasJumpType help_branch(DisasContext *s, DisasCompare *c,
static DisasJumpType op_abs(DisasContext *s, DisasOps *o)
{
TCGv_i64 z, n;
z = tcg_const_i64(0);
n = tcg_temp_new_i64();
tcg_gen_neg_i64(n, o->in2);
tcg_gen_movcond_i64(TCG_COND_LT, o->out, o->in2, z, n, o->in2);
tcg_temp_free_i64(n);
tcg_temp_free_i64(z);
tcg_gen_abs_i64(o->out, o->in2);
return DISAS_NEXT;
}

View File

@ -2415,11 +2415,7 @@ gen_msubadr32s_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode)
static inline void gen_abs(TCGv ret, TCGv r1)
{
TCGv temp = tcg_temp_new();
TCGv t0 = tcg_const_i32(0);
tcg_gen_neg_tl(temp, r1);
tcg_gen_movcond_tl(TCG_COND_GE, ret, r1, t0, r1, temp);
tcg_gen_abs_tl(ret, r1);
/* overflow can only happen, if r1 = 0x80000000 */
tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_PSW_V, r1, 0x80000000);
tcg_gen_shli_tl(cpu_PSW_V, cpu_PSW_V, 31);
@ -2430,9 +2426,6 @@ static inline void gen_abs(TCGv ret, TCGv r1)
tcg_gen_xor_tl(cpu_PSW_AV, ret, cpu_PSW_AV);
/* calc SAV bit */
tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV);
tcg_temp_free(temp);
tcg_temp_free(t0);
}
static inline void gen_absdif(TCGv ret, TCGv r1, TCGv r2)
@ -6617,13 +6610,8 @@ static void decode_rr_divide(CPUTriCoreState *env, DisasContext *ctx)
tcg_gen_movi_tl(cpu_PSW_AV, 0);
if (!tricore_feature(env, TRICORE_FEATURE_131)) {
/* overflow = (abs(D[r3+1]) >= abs(D[r2])) */
tcg_gen_neg_tl(temp, temp3);
/* use cpu_PSW_AV to compare against 0 */
tcg_gen_movcond_tl(TCG_COND_LT, temp, temp3, cpu_PSW_AV,
temp, temp3);
tcg_gen_neg_tl(temp2, cpu_gpr_d[r2]);
tcg_gen_movcond_tl(TCG_COND_LT, temp2, cpu_gpr_d[r2], cpu_PSW_AV,
temp2, cpu_gpr_d[r2]);
tcg_gen_abs_tl(temp, temp3);
tcg_gen_abs_tl(temp2, cpu_gpr_d[r2]);
tcg_gen_setcond_tl(TCG_COND_GE, cpu_PSW_V, temp, temp2);
} else {
/* overflow = (D[b] == 0) */
@ -6655,13 +6643,8 @@ static void decode_rr_divide(CPUTriCoreState *env, DisasContext *ctx)
tcg_gen_movi_tl(cpu_PSW_AV, 0);
if (!tricore_feature(env, TRICORE_FEATURE_131)) {
/* overflow = (abs(D[r3+1]) >= abs(D[r2])) */
tcg_gen_neg_tl(temp, temp3);
/* use cpu_PSW_AV to compare against 0 */
tcg_gen_movcond_tl(TCG_COND_LT, temp, temp3, cpu_PSW_AV,
temp, temp3);
tcg_gen_neg_tl(temp2, cpu_gpr_d[r2]);
tcg_gen_movcond_tl(TCG_COND_LT, temp2, cpu_gpr_d[r2], cpu_PSW_AV,
temp2, cpu_gpr_d[r2]);
tcg_gen_abs_tl(temp, temp3);
tcg_gen_abs_tl(temp2, cpu_gpr_d[r2]);
tcg_gen_setcond_tl(TCG_COND_GE, cpu_PSW_V, temp, temp2);
} else {
/* overflow = (D[b] == 0) */

View File

@ -1709,14 +1709,7 @@ void restore_state_to_opc(CPUXtensaState *env, TranslationBlock *tb,
static void translate_abs(DisasContext *dc, const OpcodeArg arg[],
const uint32_t par[])
{
TCGv_i32 zero = tcg_const_i32(0);
TCGv_i32 neg = tcg_temp_new_i32();
tcg_gen_neg_i32(neg, arg[1].in);
tcg_gen_movcond_i32(TCG_COND_GE, arg[0].out,
arg[1].in, zero, arg[1].in, neg);
tcg_temp_free(neg);
tcg_temp_free(zero);
tcg_gen_abs_i32(arg[0].out, arg[1].in);
}
static void translate_add(DisasContext *dc, const OpcodeArg arg[],

View File

@ -561,6 +561,10 @@ E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32.
Similarly, v0 = -v1.
* abs_vec v0, v1
Similarly, v0 = v1 < 0 ? -v1 : v1, in elements across the vector.
* smin_vec:
* umin_vec:

View File

@ -132,9 +132,10 @@ typedef enum {
#define TCG_TARGET_HAS_orc_vec 1
#define TCG_TARGET_HAS_not_vec 1
#define TCG_TARGET_HAS_neg_vec 1
#define TCG_TARGET_HAS_abs_vec 1
#define TCG_TARGET_HAS_shi_vec 1
#define TCG_TARGET_HAS_shs_vec 0
#define TCG_TARGET_HAS_shv_vec 0
#define TCG_TARGET_HAS_shv_vec 1
#define TCG_TARGET_HAS_cmp_vec 1
#define TCG_TARGET_HAS_mul_vec 1
#define TCG_TARGET_HAS_sat_vec 1

View File

@ -381,6 +381,9 @@ typedef enum {
I3207_BLR = 0xd63f0000,
I3207_RET = 0xd65f0000,
/* AdvSIMD load/store single structure. */
I3303_LD1R = 0x0d40c000,
/* Load literal for loading the address at pc-relative offset */
I3305_LDR = 0x58000000,
I3305_LDR_v64 = 0x5c000000,
@ -533,12 +536,14 @@ typedef enum {
I3616_CMEQ = 0x2e208c00,
I3616_SMAX = 0x0e206400,
I3616_SMIN = 0x0e206c00,
I3616_SSHL = 0x0e204400,
I3616_SQADD = 0x0e200c00,
I3616_SQSUB = 0x0e202c00,
I3616_UMAX = 0x2e206400,
I3616_UMIN = 0x2e206c00,
I3616_UQADD = 0x2e200c00,
I3616_UQSUB = 0x2e202c00,
I3616_USHL = 0x2e204400,
/* AdvSIMD two-reg misc. */
I3617_CMGT0 = 0x0e208800,
@ -547,6 +552,7 @@ typedef enum {
I3617_CMGE0 = 0x2e208800,
I3617_CMLE0 = 0x2e20a800,
I3617_NOT = 0x2e205800,
I3617_ABS = 0x0e20b800,
I3617_NEG = 0x2e20b800,
/* System instructions. */
@ -566,7 +572,14 @@ static inline uint32_t tcg_in32(TCGContext *s)
#define tcg_out_insn(S, FMT, OP, ...) \
glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, int imm19, TCGReg rt)
static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
TCGReg rt, TCGReg rn, unsigned size)
{
tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
}
static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
int imm19, TCGReg rt)
{
tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
}
@ -799,7 +812,7 @@ static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
}
static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
TCGReg rd, uint64_t v64)
TCGReg rd, tcg_target_long v64)
{
int op, cmode, imm8;
@ -814,6 +827,43 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
}
}
static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg rd, TCGReg rs)
{
int is_q = type - TCG_TYPE_V64;
tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
return true;
}
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg r, TCGReg base, intptr_t offset)
{
TCGReg temp = TCG_REG_TMP;
if (offset < -0xffffff || offset > 0xffffff) {
tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
base = temp;
} else {
AArch64Insn add_insn = I3401_ADDI;
if (offset < 0) {
add_insn = I3401_SUBI;
offset = -offset;
}
if (offset & 0xfff000) {
tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
base = temp;
}
if (offset & 0xfff) {
tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
base = temp;
}
}
tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
return true;
}
static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
tcg_target_long value)
{
@ -938,10 +988,10 @@ static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
}
static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
{
if (ret == arg) {
return;
return true;
}
switch (type) {
case TCG_TYPE_I32:
@ -970,6 +1020,7 @@ static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
default:
g_assert_not_reached();
}
return true;
}
static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
@ -2099,10 +2150,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
case INDEX_op_mov_i64:
case INDEX_op_mov_vec:
case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
case INDEX_op_movi_i64:
case INDEX_op_dupi_vec:
case INDEX_op_call: /* Always emitted via tcg_out_call. */
default:
g_assert_not_reached();
@ -2145,6 +2194,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_st_vec:
tcg_out_st(s, type, a0, a1, a2);
break;
case INDEX_op_dupm_vec:
tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
break;
case INDEX_op_add_vec:
tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
break;
@ -2157,6 +2209,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_neg_vec:
tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
break;
case INDEX_op_abs_vec:
tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
break;
case INDEX_op_and_vec:
tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
break;
@ -2199,9 +2254,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_not_vec:
tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
break;
case INDEX_op_dup_vec:
tcg_out_insn(s, 3605, DUP, is_q, a0, a1, 1 << vece, 0);
break;
case INDEX_op_shli_vec:
tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
break;
@ -2211,6 +2263,12 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_sari_vec:
tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
break;
case INDEX_op_shlv_vec:
tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
break;
case INDEX_op_aa64_sshl_vec:
tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
break;
case INDEX_op_cmp_vec:
{
TCGCond cond = args[3];
@ -2245,6 +2303,10 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
}
}
break;
case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */
case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
default:
g_assert_not_reached();
}
@ -2261,6 +2323,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_andc_vec:
case INDEX_op_orc_vec:
case INDEX_op_neg_vec:
case INDEX_op_abs_vec:
case INDEX_op_not_vec:
case INDEX_op_cmp_vec:
case INDEX_op_shli_vec:
@ -2270,12 +2333,16 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_sssub_vec:
case INDEX_op_usadd_vec:
case INDEX_op_ussub_vec:
case INDEX_op_shlv_vec:
return 1;
case INDEX_op_shrv_vec:
case INDEX_op_sarv_vec:
return -1;
case INDEX_op_mul_vec:
case INDEX_op_smax_vec:
case INDEX_op_smin_vec:
case INDEX_op_umax_vec:
case INDEX_op_umin_vec:
return 1;
case INDEX_op_mul_vec:
return vece < MO_64;
default:
@ -2286,6 +2353,32 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
TCGArg a0, ...)
{
va_list va;
TCGv_vec v0, v1, v2, t1;
va_start(va, a0);
v0 = temp_tcgv_vec(arg_temp(a0));
v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
switch (opc) {
case INDEX_op_shrv_vec:
case INDEX_op_sarv_vec:
/* Right shifts are negative left shifts for AArch64. */
t1 = tcg_temp_new_vec(type);
tcg_gen_neg_vec(vece, t1, v2);
opc = (opc == INDEX_op_shrv_vec
? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
tcgv_vec_arg(v1), tcgv_vec_arg(t1));
tcg_temp_free_vec(t1);
break;
default:
g_assert_not_reached();
}
va_end(va);
}
static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
@ -2467,15 +2560,21 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
case INDEX_op_smin_vec:
case INDEX_op_umax_vec:
case INDEX_op_umin_vec:
case INDEX_op_shlv_vec:
case INDEX_op_shrv_vec:
case INDEX_op_sarv_vec:
case INDEX_op_aa64_sshl_vec:
return &w_w_w;
case INDEX_op_not_vec:
case INDEX_op_neg_vec:
case INDEX_op_abs_vec:
case INDEX_op_shli_vec:
case INDEX_op_shri_vec:
case INDEX_op_sari_vec:
return &w_w;
case INDEX_op_ld_vec:
case INDEX_op_st_vec:
case INDEX_op_dupm_vec:
return &w_r;
case INDEX_op_dup_vec:
return &w_wr;

View File

@ -1,3 +1,5 @@
/* Target-specific opcodes for host vector expansion. These will be
emitted by tcg_expand_vec_op. For those familiar with GCC internals,
consider these to be UNSPEC with names. */
DEF(aa64_sshl_vec, 1, 2, 0, IMPLVEC)

View File

@ -2264,10 +2264,11 @@ static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
return false;
}
static inline void tcg_out_mov(TCGContext *s, TCGType type,
static inline bool tcg_out_mov(TCGContext *s, TCGType type,
TCGReg ret, TCGReg arg)
{
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0));
tcg_out_mov_reg(s, COND_AL, ret, arg);
return true;
}
static inline void tcg_out_movi(TCGContext *s, TCGType type,

View File

@ -182,9 +182,10 @@ extern bool have_avx2;
#define TCG_TARGET_HAS_orc_vec 0
#define TCG_TARGET_HAS_not_vec 0
#define TCG_TARGET_HAS_neg_vec 0
#define TCG_TARGET_HAS_abs_vec 1
#define TCG_TARGET_HAS_shi_vec 1
#define TCG_TARGET_HAS_shs_vec 0
#define TCG_TARGET_HAS_shv_vec 0
#define TCG_TARGET_HAS_shs_vec 1
#define TCG_TARGET_HAS_shv_vec have_avx2
#define TCG_TARGET_HAS_cmp_vec 1
#define TCG_TARGET_HAS_mul_vec 1
#define TCG_TARGET_HAS_sat_vec 1

View File

@ -358,7 +358,6 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
#define OPC_MOVBE_MyGy (0xf1 | P_EXT38)
#define OPC_MOVD_VyEy (0x6e | P_EXT | P_DATA16)
#define OPC_MOVD_EyVy (0x7e | P_EXT | P_DATA16)
#define OPC_MOVDDUP (0x12 | P_EXT | P_SIMDF2)
#define OPC_MOVDQA_VxWx (0x6f | P_EXT | P_DATA16)
#define OPC_MOVDQA_WxVx (0x7f | P_EXT | P_DATA16)
#define OPC_MOVDQU_VxWx (0x6f | P_EXT | P_SIMDF3)
@ -370,6 +369,9 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
#define OPC_MOVSLQ (0x63 | P_REXW)
#define OPC_MOVZBL (0xb6 | P_EXT)
#define OPC_MOVZWL (0xb7 | P_EXT)
#define OPC_PABSB (0x1c | P_EXT38 | P_DATA16)
#define OPC_PABSW (0x1d | P_EXT38 | P_DATA16)
#define OPC_PABSD (0x1e | P_EXT38 | P_DATA16)
#define OPC_PACKSSDW (0x6b | P_EXT | P_DATA16)
#define OPC_PACKSSWB (0x63 | P_EXT | P_DATA16)
#define OPC_PACKUSDW (0x2b | P_EXT38 | P_DATA16)
@ -421,6 +423,14 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
#define OPC_PSHIFTW_Ib (0x71 | P_EXT | P_DATA16) /* /2 /6 /4 */
#define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16) /* /2 /6 /4 */
#define OPC_PSHIFTQ_Ib (0x73 | P_EXT | P_DATA16) /* /2 /6 /4 */
#define OPC_PSLLW (0xf1 | P_EXT | P_DATA16)
#define OPC_PSLLD (0xf2 | P_EXT | P_DATA16)
#define OPC_PSLLQ (0xf3 | P_EXT | P_DATA16)
#define OPC_PSRAW (0xe1 | P_EXT | P_DATA16)
#define OPC_PSRAD (0xe2 | P_EXT | P_DATA16)
#define OPC_PSRLW (0xd1 | P_EXT | P_DATA16)
#define OPC_PSRLD (0xd2 | P_EXT | P_DATA16)
#define OPC_PSRLQ (0xd3 | P_EXT | P_DATA16)
#define OPC_PSUBB (0xf8 | P_EXT | P_DATA16)
#define OPC_PSUBW (0xf9 | P_EXT | P_DATA16)
#define OPC_PSUBD (0xfa | P_EXT | P_DATA16)
@ -458,12 +468,21 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
#define OPC_UD2 (0x0b | P_EXT)
#define OPC_VPBLENDD (0x02 | P_EXT3A | P_DATA16)
#define OPC_VPBLENDVB (0x4c | P_EXT3A | P_DATA16)
#define OPC_VPINSRB (0x20 | P_EXT3A | P_DATA16)
#define OPC_VPINSRW (0xc4 | P_EXT | P_DATA16)
#define OPC_VBROADCASTSS (0x18 | P_EXT38 | P_DATA16)
#define OPC_VBROADCASTSD (0x19 | P_EXT38 | P_DATA16)
#define OPC_VPBROADCASTB (0x78 | P_EXT38 | P_DATA16)
#define OPC_VPBROADCASTW (0x79 | P_EXT38 | P_DATA16)
#define OPC_VPBROADCASTD (0x58 | P_EXT38 | P_DATA16)
#define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16)
#define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_REXW)
#define OPC_VPERM2I128 (0x46 | P_EXT3A | P_DATA16 | P_VEXL)
#define OPC_VPSLLVD (0x47 | P_EXT38 | P_DATA16)
#define OPC_VPSLLVQ (0x47 | P_EXT38 | P_DATA16 | P_REXW)
#define OPC_VPSRAVD (0x46 | P_EXT38 | P_DATA16)
#define OPC_VPSRLVD (0x45 | P_EXT38 | P_DATA16)
#define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_REXW)
#define OPC_VZEROUPPER (0x77 | P_EXT)
#define OPC_XCHG_ax_r32 (0x90)
@ -809,12 +828,12 @@ static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
}
static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
{
int rexw = 0;
if (arg == ret) {
return;
return true;
}
switch (type) {
case TCG_TYPE_I64:
@ -852,18 +871,20 @@ static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
default:
g_assert_not_reached();
}
return true;
}
static void tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
static const int avx2_dup_insn[4] = {
OPC_VPBROADCASTB, OPC_VPBROADCASTW,
OPC_VPBROADCASTD, OPC_VPBROADCASTQ,
};
static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg r, TCGReg a)
{
if (have_avx2) {
static const int dup_insn[4] = {
OPC_VPBROADCASTB, OPC_VPBROADCASTW,
OPC_VPBROADCASTD, OPC_VPBROADCASTQ,
};
int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0);
tcg_out_vex_modrm(s, dup_insn[vece] + vex_l, r, 0, a);
tcg_out_vex_modrm(s, avx2_dup_insn[vece] + vex_l, r, 0, a);
} else {
switch (vece) {
case MO_8:
@ -887,6 +908,39 @@ static void tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
g_assert_not_reached();
}
}
return true;
}
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg r, TCGReg base, intptr_t offset)
{
if (have_avx2) {
int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0);
tcg_out_vex_modrm_offset(s, avx2_dup_insn[vece] + vex_l,
r, 0, base, offset);
} else {
switch (vece) {
case MO_64:
tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSD, r, 0, base, offset);
break;
case MO_32:
tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSS, r, 0, base, offset);
break;
case MO_16:
tcg_out_vex_modrm_offset(s, OPC_VPINSRW, r, r, base, offset);
tcg_out8(s, 0); /* imm8 */
tcg_out_dup_vec(s, type, vece, r, r);
break;
case MO_8:
tcg_out_vex_modrm_offset(s, OPC_VPINSRB, r, r, base, offset);
tcg_out8(s, 0); /* imm8 */
tcg_out_dup_vec(s, type, vece, r, r);
break;
default:
g_assert_not_reached();
}
}
return true;
}
static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
@ -909,16 +963,16 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
} else if (have_avx2) {
tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTQ + vex_l, ret);
} else {
tcg_out_vex_modrm_pool(s, OPC_MOVDDUP, ret);
tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSD, ret);
}
new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
} else if (have_avx2) {
tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTD + vex_l, ret);
new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
} else {
tcg_out_vex_modrm_pool(s, OPC_MOVD_VyEy, ret);
if (have_avx2) {
tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSD + vex_l, ret);
} else {
tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret);
}
new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
tcg_out_dup_vec(s, type, MO_32, ret, ret);
}
}
@ -2601,10 +2655,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
case INDEX_op_mov_i64:
case INDEX_op_mov_vec:
case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
case INDEX_op_movi_i64:
case INDEX_op_dupi_vec:
case INDEX_op_call: /* Always emitted via tcg_out_call. */
default:
tcg_abort();
@ -2671,6 +2723,31 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
static int const umax_insn[4] = {
OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_UD2
};
static int const shlv_insn[4] = {
/* TODO: AVX512 adds support for MO_16. */
OPC_UD2, OPC_UD2, OPC_VPSLLVD, OPC_VPSLLVQ
};
static int const shrv_insn[4] = {
/* TODO: AVX512 adds support for MO_16. */
OPC_UD2, OPC_UD2, OPC_VPSRLVD, OPC_VPSRLVQ
};
static int const sarv_insn[4] = {
/* TODO: AVX512 adds support for MO_16, MO_64. */
OPC_UD2, OPC_UD2, OPC_VPSRAVD, OPC_UD2
};
static int const shls_insn[4] = {
OPC_UD2, OPC_PSLLW, OPC_PSLLD, OPC_PSLLQ
};
static int const shrs_insn[4] = {
OPC_UD2, OPC_PSRLW, OPC_PSRLD, OPC_PSRLQ
};
static int const sars_insn[4] = {
OPC_UD2, OPC_PSRAW, OPC_PSRAD, OPC_UD2
};
static int const abs_insn[4] = {
/* TODO: AVX512 adds support for MO_64. */
OPC_PABSB, OPC_PABSW, OPC_PABSD, OPC_UD2
};
TCGType type = vecl + TCG_TYPE_V64;
int insn, sub;
@ -2723,6 +2800,24 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_umax_vec:
insn = umax_insn[vece];
goto gen_simd;
case INDEX_op_shlv_vec:
insn = shlv_insn[vece];
goto gen_simd;
case INDEX_op_shrv_vec:
insn = shrv_insn[vece];
goto gen_simd;
case INDEX_op_sarv_vec:
insn = sarv_insn[vece];
goto gen_simd;
case INDEX_op_shls_vec:
insn = shls_insn[vece];
goto gen_simd;
case INDEX_op_shrs_vec:
insn = shrs_insn[vece];
goto gen_simd;
case INDEX_op_sars_vec:
insn = sars_insn[vece];
goto gen_simd;
case INDEX_op_x86_punpckl_vec:
insn = punpckl_insn[vece];
goto gen_simd;
@ -2741,6 +2836,11 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
insn = OPC_PUNPCKLDQ;
goto gen_simd;
#endif
case INDEX_op_abs_vec:
insn = abs_insn[vece];
a2 = a1;
a1 = 0;
goto gen_simd;
gen_simd:
tcg_debug_assert(insn != OPC_UD2);
if (type == TCG_TYPE_V256) {
@ -2793,8 +2893,8 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_st_vec:
tcg_out_st(s, type, a0, a1, a2);
break;
case INDEX_op_dup_vec:
tcg_out_dup_vec(s, type, vece, a0, a1);
case INDEX_op_dupm_vec:
tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
break;
case INDEX_op_x86_shufps_vec:
@ -2837,6 +2937,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
tcg_out8(s, a2);
break;
case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */
case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
default:
g_assert_not_reached();
}
@ -3079,6 +3182,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
case INDEX_op_ld_vec:
case INDEX_op_st_vec:
case INDEX_op_dupm_vec:
return &x_r;
case INDEX_op_add_vec:
@ -3096,6 +3200,12 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
case INDEX_op_umin_vec:
case INDEX_op_smax_vec:
case INDEX_op_umax_vec:
case INDEX_op_shlv_vec:
case INDEX_op_shrv_vec:
case INDEX_op_sarv_vec:
case INDEX_op_shls_vec:
case INDEX_op_shrs_vec:
case INDEX_op_sars_vec:
case INDEX_op_cmp_vec:
case INDEX_op_x86_shufps_vec:
case INDEX_op_x86_blend_vec:
@ -3108,6 +3218,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
case INDEX_op_dup2_vec:
#endif
return &x_x_x;
case INDEX_op_abs_vec:
case INDEX_op_dup_vec:
case INDEX_op_shli_vec:
case INDEX_op_shri_vec:
@ -3153,6 +3264,18 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
}
return 1;
case INDEX_op_shls_vec:
case INDEX_op_shrs_vec:
return vece >= MO_16;
case INDEX_op_sars_vec:
return vece >= MO_16 && vece <= MO_32;
case INDEX_op_shlv_vec:
case INDEX_op_shrv_vec:
return have_avx2 && vece >= MO_32;
case INDEX_op_sarv_vec:
return have_avx2 && vece == MO_32;
case INDEX_op_mul_vec:
if (vece == MO_8) {
/* We can expand the operation for MO_8. */
@ -3173,6 +3296,8 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_umin_vec:
case INDEX_op_umax_vec:
return vece <= MO_32 ? 1 : -1;
case INDEX_op_abs_vec:
return vece <= MO_32;
default:
return 0;

View File

@ -558,13 +558,14 @@ static inline void tcg_out_dsra(TCGContext *s, TCGReg rd, TCGReg rt, TCGArg sa)
tcg_out_opc_sa64(s, OPC_DSRA, OPC_DSRA32, rd, rt, sa);
}
static inline void tcg_out_mov(TCGContext *s, TCGType type,
static inline bool tcg_out_mov(TCGContext *s, TCGType type,
TCGReg ret, TCGReg arg)
{
/* Simple reg-reg move, optimising out the 'do nothing' case */
if (ret != arg) {
tcg_out_opc_reg(s, OPC_OR, ret, arg, TCG_REG_ZERO);
}
return true;
}
static void tcg_out_movi(TCGContext *s, TCGType type,

View File

@ -734,9 +734,13 @@ void tcg_optimize(TCGContext *s)
} else if (opc == INDEX_op_sub_i64) {
neg_op = INDEX_op_neg_i64;
have_neg = TCG_TARGET_HAS_neg_i64;
} else {
} else if (TCG_TARGET_HAS_neg_vec) {
TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64;
unsigned vece = TCGOP_VECE(op);
neg_op = INDEX_op_neg_vec;
have_neg = TCG_TARGET_HAS_neg_vec;
have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0;
} else {
break;
}
if (!have_neg) {
break;

View File

@ -559,12 +559,13 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
TCGReg base, tcg_target_long offset);
static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
{
tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
if (ret != arg) {
tcg_out32(s, OR | SAB(arg, ret, arg));
}
return true;
}
static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,

View File

@ -515,10 +515,10 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
* TCG intrinsics
*/
static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
{
if (ret == arg) {
return;
return true;
}
switch (type) {
case TCG_TYPE_I32:
@ -528,6 +528,7 @@ static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
default:
g_assert_not_reached();
}
return true;
}
static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,

View File

@ -548,7 +548,7 @@ static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest,
tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm);
}
static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
{
if (src != dst) {
if (type == TCG_TYPE_I32) {
@ -557,6 +557,7 @@ static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
tcg_out_insn(s, RRE, LGR, dst, src);
}
}
return true;
}
static const S390Opcode lli_insns[4] = {

View File

@ -407,12 +407,13 @@ static void tcg_out_arithc(TCGContext *s, TCGReg rd, TCGReg rs1,
| (val2const ? INSN_IMM13(val2) : INSN_RS2(val2)));
}
static inline void tcg_out_mov(TCGContext *s, TCGType type,
static inline bool tcg_out_mov(TCGContext *s, TCGType type,
TCGReg ret, TCGReg arg)
{
if (ret != arg) {
tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
}
return true;
}
static inline void tcg_out_sethi(TCGContext *s, TCGReg ret, uint32_t arg)

File diff suppressed because it is too large Load Diff

View File

@ -91,8 +91,8 @@ typedef struct {
void (*fniv)(unsigned, TCGv_vec, TCGv_vec);
/* Expand out-of-line helper w/descriptor. */
gen_helper_gvec_2 *fno;
/* The opcode, if any, to which this corresponds. */
TCGOpcode opc;
/* The optional opcodes, if any, utilized by .fniv. */
const TCGOpcode *opt_opc;
/* The data argument to the out-of-line helper. */
int32_t data;
/* The vector element size, if applicable. */
@ -112,8 +112,8 @@ typedef struct {
gen_helper_gvec_2 *fno;
/* Expand out-of-line helper w/descriptor, data as argument. */
gen_helper_gvec_2i *fnoi;
/* The opcode, if any, to which this corresponds. */
TCGOpcode opc;
/* The optional opcodes, if any, utilized by .fniv. */
const TCGOpcode *opt_opc;
/* The vector element size, if applicable. */
uint8_t vece;
/* Prefer i64 to v64. */
@ -131,8 +131,8 @@ typedef struct {
void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec);
/* Expand out-of-line helper w/descriptor. */
gen_helper_gvec_2i *fno;
/* The opcode, if any, to which this corresponds. */
TCGOpcode opc;
/* The optional opcodes, if any, utilized by .fniv. */
const TCGOpcode *opt_opc;
/* The data argument to the out-of-line helper. */
uint32_t data;
/* The vector element size, if applicable. */
@ -152,8 +152,8 @@ typedef struct {
void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec);
/* Expand out-of-line helper w/descriptor. */
gen_helper_gvec_3 *fno;
/* The opcode, if any, to which this corresponds. */
TCGOpcode opc;
/* The optional opcodes, if any, utilized by .fniv. */
const TCGOpcode *opt_opc;
/* The data argument to the out-of-line helper. */
int32_t data;
/* The vector element size, if applicable. */
@ -164,6 +164,27 @@ typedef struct {
bool load_dest;
} GVecGen3;
typedef struct {
/*
* Expand inline as a 64-bit or 32-bit integer. Only one of these will be
* non-NULL.
*/
void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64, int64_t);
void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32, int32_t);
/* Expand inline with a host vector type. */
void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec, int64_t);
/* Expand out-of-line helper w/descriptor, data in descriptor. */
gen_helper_gvec_3 *fno;
/* The optional opcodes, if any, utilized by .fniv. */
const TCGOpcode *opt_opc;
/* The vector element size, if applicable. */
uint8_t vece;
/* Prefer i64 to v64. */
bool prefer_i64;
/* Load dest as a 3rd source operand. */
bool load_dest;
} GVecGen3i;
typedef struct {
/* Expand inline as a 64-bit or 32-bit integer.
Only one of these will be non-NULL. */
@ -173,8 +194,8 @@ typedef struct {
void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec, TCGv_vec);
/* Expand out-of-line helper w/descriptor. */
gen_helper_gvec_4 *fno;
/* The opcode, if any, to which this corresponds. */
TCGOpcode opc;
/* The optional opcodes, if any, utilized by .fniv. */
const TCGOpcode *opt_opc;
/* The data argument to the out-of-line helper. */
int32_t data;
/* The vector element size, if applicable. */
@ -193,6 +214,9 @@ void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
uint32_t maxsz, TCGv_i64 c, const GVecGen2s *);
void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs,
uint32_t oprsz, uint32_t maxsz, const GVecGen3 *);
void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs,
uint32_t oprsz, uint32_t maxsz, int64_t c,
const GVecGen3i *);
void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
uint32_t oprsz, uint32_t maxsz, const GVecGen4 *);
@ -204,6 +228,8 @@ void tcg_gen_gvec_not(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_neg(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_abs(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_add(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
@ -294,6 +320,24 @@ void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs,
void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs,
int64_t shift, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_shls(unsigned vece, uint32_t dofs, uint32_t aofs,
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_shrs(unsigned vece, uint32_t dofs, uint32_t aofs,
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_sars(unsigned vece, uint32_t dofs, uint32_t aofs,
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
/*
* Perform vector shift by vector element, modulo the element size.
* E.g. D[i] = A[i] << (B[i] % (8 << vece)).
*/
void tcg_gen_gvec_shlv(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_shrv(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_sarv(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
uint32_t aofs, uint32_t bofs,
uint32_t oprsz, uint32_t maxsz);

View File

@ -34,6 +34,98 @@ extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
#define TCGV_HIGH TCGV_HIGH_link_error
#endif
/*
* Vector optional opcode tracking.
* Except for the basic logical operations (and, or, xor), and
* data movement (mov, ld, st, dupi), many vector opcodes are
* optional and may not be supported on the host. Thank Intel
* for the irregularity in their instruction set.
*
* The gvec expanders allow custom vector operations to be composed,
* generally via the .fniv callback in the GVecGen* structures. At
* the same time, in deciding whether to use this hook we need to
* know if the host supports the required operations. This is
* presented as an array of opcodes, terminated by 0. Each opcode
* is assumed to be expanded with the given VECE.
*
* For debugging, we want to validate this array. Therefore, when
* tcg_ctx->vec_opt_opc is non-NULL, the tcg_gen_*_vec expanders
* will validate that their opcode is present in the list.
*/
#ifdef CONFIG_DEBUG_TCG
void tcg_assert_listed_vecop(TCGOpcode op)
{
const TCGOpcode *p = tcg_ctx->vecop_list;
if (p) {
for (; *p; ++p) {
if (*p == op) {
return;
}
}
g_assert_not_reached();
}
}
#endif
bool tcg_can_emit_vecop_list(const TCGOpcode *list,
TCGType type, unsigned vece)
{
if (list == NULL) {
return true;
}
for (; *list; ++list) {
TCGOpcode opc = *list;
#ifdef CONFIG_DEBUG_TCG
switch (opc) {
case INDEX_op_and_vec:
case INDEX_op_or_vec:
case INDEX_op_xor_vec:
case INDEX_op_mov_vec:
case INDEX_op_dup_vec:
case INDEX_op_dupi_vec:
case INDEX_op_dup2_vec:
case INDEX_op_ld_vec:
case INDEX_op_st_vec:
/* These opcodes are mandatory and should not be listed. */
g_assert_not_reached();
default:
break;
}
#endif
if (tcg_can_emit_vec_op(opc, type, vece)) {
continue;
}
/*
* The opcode list is created by front ends based on what they
* actually invoke. We must mirror the logic in the routines
* below for generic expansions using other opcodes.
*/
switch (opc) {
case INDEX_op_neg_vec:
if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)) {
continue;
}
break;
case INDEX_op_abs_vec:
if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)
&& (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0
|| tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0
|| tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece))) {
continue;
}
break;
default:
break;
}
return false;
}
return true;
}
void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a)
{
TCGOp *op = tcg_emit_op(opc);
@ -194,6 +286,17 @@ void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a)
vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
}
void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec r, TCGv_ptr b,
tcg_target_long ofs)
{
TCGArg ri = tcgv_vec_arg(r);
TCGArg bi = tcgv_ptr_arg(b);
TCGTemp *rt = arg_temp(ri);
TCGType type = rt->base_type;
vec_gen_3(INDEX_op_dupm_vec, type, vece, ri, bi, ofs);
}
static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o)
{
TCGArg ri = tcgv_vec_arg(r);
@ -226,16 +329,6 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type)
vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o);
}
void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
{
vec_gen_op3(INDEX_op_add_vec, vece, r, a, b);
}
void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
{
vec_gen_op3(INDEX_op_sub_vec, vece, r, a, b);
}
void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
{
vec_gen_op3(INDEX_op_and_vec, 0, r, a, b);
@ -296,11 +389,33 @@ void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
tcg_gen_not_vec(0, r, r);
}
static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc)
{
TCGTemp *rt = tcgv_vec_temp(r);
TCGTemp *at = tcgv_vec_temp(a);
TCGArg ri = temp_arg(rt);
TCGArg ai = temp_arg(at);
TCGType type = rt->base_type;
int can;
tcg_debug_assert(at->base_type >= type);
tcg_assert_listed_vecop(opc);
can = tcg_can_emit_vec_op(opc, type, vece);
if (can > 0) {
vec_gen_2(opc, type, vece, ri, ai);
} else if (can < 0) {
const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
tcg_expand_vec_op(opc, type, vece, ri, ai);
tcg_swap_vecop_list(hold_list);
} else {
return false;
}
return true;
}
void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
{
if (TCG_TARGET_HAS_not_vec) {
vec_gen_op2(INDEX_op_not_vec, 0, r, a);
} else {
if (!TCG_TARGET_HAS_not_vec || !do_op2(vece, r, a, INDEX_op_not_vec)) {
TCGv_vec t = tcg_const_ones_vec_matching(r);
tcg_gen_xor_vec(0, r, a, t);
tcg_temp_free_vec(t);
@ -309,13 +424,48 @@ void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
{
if (TCG_TARGET_HAS_neg_vec) {
vec_gen_op2(INDEX_op_neg_vec, vece, r, a);
} else {
const TCGOpcode *hold_list;
tcg_assert_listed_vecop(INDEX_op_neg_vec);
hold_list = tcg_swap_vecop_list(NULL);
if (!TCG_TARGET_HAS_neg_vec || !do_op2(vece, r, a, INDEX_op_neg_vec)) {
TCGv_vec t = tcg_const_zeros_vec_matching(r);
tcg_gen_sub_vec(vece, r, t, a);
tcg_temp_free_vec(t);
}
tcg_swap_vecop_list(hold_list);
}
void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
{
const TCGOpcode *hold_list;
tcg_assert_listed_vecop(INDEX_op_abs_vec);
hold_list = tcg_swap_vecop_list(NULL);
if (!do_op2(vece, r, a, INDEX_op_abs_vec)) {
TCGType type = tcgv_vec_temp(r)->base_type;
TCGv_vec t = tcg_temp_new_vec(type);
tcg_debug_assert(tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece));
if (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0) {
tcg_gen_neg_vec(vece, t, a);
tcg_gen_smax_vec(vece, r, a, t);
} else {
if (tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0) {
tcg_gen_sari_vec(vece, t, a, (8 << vece) - 1);
} else {
do_dupi_vec(t, MO_REG, 0);
tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a, t);
}
tcg_gen_xor_vec(vece, r, a, t);
tcg_gen_sub_vec(vece, r, r, t);
}
tcg_temp_free_vec(t);
}
tcg_swap_vecop_list(hold_list);
}
static void do_shifti(TCGOpcode opc, unsigned vece,
@ -330,6 +480,7 @@ static void do_shifti(TCGOpcode opc, unsigned vece,
tcg_debug_assert(at->base_type == type);
tcg_debug_assert(i >= 0 && i < (8 << vece));
tcg_assert_listed_vecop(opc);
if (i == 0) {
tcg_gen_mov_vec(r, a);
@ -343,8 +494,10 @@ static void do_shifti(TCGOpcode opc, unsigned vece,
/* We leave the choice of expansion via scalar or vector shift
to the target. Often, but not always, dupi can feed a vector
shift easier than a scalar. */
const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
tcg_debug_assert(can < 0);
tcg_expand_vec_op(opc, type, vece, ri, ai, i);
tcg_swap_vecop_list(hold_list);
}
}
@ -377,12 +530,15 @@ void tcg_gen_cmp_vec(TCGCond cond, unsigned vece,
tcg_debug_assert(at->base_type >= type);
tcg_debug_assert(bt->base_type >= type);
tcg_assert_listed_vecop(INDEX_op_cmp_vec);
can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece);
if (can > 0) {
vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
} else {
const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
tcg_debug_assert(can < 0);
tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
tcg_swap_vecop_list(hold_list);
}
}
@ -400,15 +556,28 @@ static void do_op3(unsigned vece, TCGv_vec r, TCGv_vec a,
tcg_debug_assert(at->base_type >= type);
tcg_debug_assert(bt->base_type >= type);
tcg_assert_listed_vecop(opc);
can = tcg_can_emit_vec_op(opc, type, vece);
if (can > 0) {
vec_gen_3(opc, type, vece, ri, ai, bi);
} else {
const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
tcg_debug_assert(can < 0);
tcg_expand_vec_op(opc, type, vece, ri, ai, bi);
tcg_swap_vecop_list(hold_list);
}
}
void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
{
do_op3(vece, r, a, b, INDEX_op_add_vec);
}
void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
{
do_op3(vece, r, a, b, INDEX_op_sub_vec);
}
void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
{
do_op3(vece, r, a, b, INDEX_op_mul_vec);
@ -453,3 +622,72 @@ void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
{
do_op3(vece, r, a, b, INDEX_op_umax_vec);
}
void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
{
do_op3(vece, r, a, b, INDEX_op_shlv_vec);
}
void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
{
do_op3(vece, r, a, b, INDEX_op_shrv_vec);
}
void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
{
do_op3(vece, r, a, b, INDEX_op_sarv_vec);
}
static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a,
TCGv_i32 s, TCGOpcode opc_s, TCGOpcode opc_v)
{
TCGTemp *rt = tcgv_vec_temp(r);
TCGTemp *at = tcgv_vec_temp(a);
TCGTemp *st = tcgv_i32_temp(s);
TCGArg ri = temp_arg(rt);
TCGArg ai = temp_arg(at);
TCGArg si = temp_arg(st);
TCGType type = rt->base_type;
const TCGOpcode *hold_list;
int can;
tcg_debug_assert(at->base_type >= type);
tcg_assert_listed_vecop(opc_s);
hold_list = tcg_swap_vecop_list(NULL);
can = tcg_can_emit_vec_op(opc_s, type, vece);
if (can > 0) {
vec_gen_3(opc_s, type, vece, ri, ai, si);
} else if (can < 0) {
tcg_expand_vec_op(opc_s, type, vece, ri, ai, si);
} else {
TCGv_vec vec_s = tcg_temp_new_vec(type);
if (vece == MO_64) {
TCGv_i64 s64 = tcg_temp_new_i64();
tcg_gen_extu_i32_i64(s64, s);
tcg_gen_dup_i64_vec(MO_64, vec_s, s64);
tcg_temp_free_i64(s64);
} else {
tcg_gen_dup_i32_vec(vece, vec_s, s);
}
do_op3(vece, r, a, vec_s, opc_v);
tcg_temp_free_vec(vec_s);
}
tcg_swap_vecop_list(hold_list);
}
void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
{
do_shifts(vece, r, a, b, INDEX_op_shls_vec, INDEX_op_shlv_vec);
}
void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
{
do_shifts(vece, r, a, b, INDEX_op_shrs_vec, INDEX_op_shrv_vec);
}
void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
{
do_shifts(vece, r, a, b, INDEX_op_sars_vec, INDEX_op_sarv_vec);
}

View File

@ -1091,6 +1091,16 @@ void tcg_gen_umax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, b, a);
}
void tcg_gen_abs_i32(TCGv_i32 ret, TCGv_i32 a)
{
TCGv_i32 t = tcg_temp_new_i32();
tcg_gen_sari_i32(t, a, 31);
tcg_gen_xor_i32(ret, a, t);
tcg_gen_sub_i32(ret, ret, t);
tcg_temp_free_i32(t);
}
/* 64-bit ops */
#if TCG_TARGET_REG_BITS == 32
@ -2548,6 +2558,16 @@ void tcg_gen_umax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, b, a);
}
void tcg_gen_abs_i64(TCGv_i64 ret, TCGv_i64 a)
{
TCGv_i64 t = tcg_temp_new_i64();
tcg_gen_sari_i64(t, a, 63);
tcg_gen_xor_i64(ret, a, t);
tcg_gen_sub_i64(ret, ret, t);
tcg_temp_free_i64(t);
}
/* Size changing operations. */
void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg)

View File

@ -335,6 +335,7 @@ void tcg_gen_smin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
void tcg_gen_smax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
void tcg_gen_umin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
void tcg_gen_umax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
void tcg_gen_abs_i32(TCGv_i32, TCGv_i32);
static inline void tcg_gen_discard_i32(TCGv_i32 arg)
{
@ -534,6 +535,7 @@ void tcg_gen_smin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
void tcg_gen_smax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
void tcg_gen_umin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
void tcg_gen_umax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
void tcg_gen_abs_i64(TCGv_i64, TCGv_i64);
#if TCG_TARGET_REG_BITS == 64
static inline void tcg_gen_discard_i64(TCGv_i64 arg)
@ -954,6 +956,7 @@ void tcg_gen_atomic_umax_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
void tcg_gen_mov_vec(TCGv_vec, TCGv_vec);
void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec, TCGv_i32);
void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec, TCGv_i64);
void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec, TCGv_ptr, tcg_target_long);
void tcg_gen_dup8i_vec(TCGv_vec, uint32_t);
void tcg_gen_dup16i_vec(TCGv_vec, uint32_t);
void tcg_gen_dup32i_vec(TCGv_vec, uint32_t);
@ -972,6 +975,7 @@ void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a);
void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a);
void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a);
void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
@ -985,6 +989,14 @@ void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, TCGv_vec r,
TCGv_vec a, TCGv_vec b);
@ -1010,6 +1022,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
#define tcg_gen_addi_tl tcg_gen_addi_i64
#define tcg_gen_sub_tl tcg_gen_sub_i64
#define tcg_gen_neg_tl tcg_gen_neg_i64
#define tcg_gen_abs_tl tcg_gen_abs_i64
#define tcg_gen_subfi_tl tcg_gen_subfi_i64
#define tcg_gen_subi_tl tcg_gen_subi_i64
#define tcg_gen_and_tl tcg_gen_and_i64
@ -1122,6 +1135,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
#define tcg_gen_addi_tl tcg_gen_addi_i32
#define tcg_gen_sub_tl tcg_gen_sub_i32
#define tcg_gen_neg_tl tcg_gen_neg_i32
#define tcg_gen_abs_tl tcg_gen_abs_i32
#define tcg_gen_subfi_tl tcg_gen_subfi_i32
#define tcg_gen_subi_tl tcg_gen_subi_i32
#define tcg_gen_and_tl tcg_gen_and_i32

View File

@ -219,11 +219,13 @@ DEF(dup2_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_REG_BITS == 32))
DEF(ld_vec, 1, 1, 1, IMPLVEC)
DEF(st_vec, 0, 2, 1, IMPLVEC)
DEF(dupm_vec, 1, 1, 1, IMPLVEC)
DEF(add_vec, 1, 2, 0, IMPLVEC)
DEF(sub_vec, 1, 2, 0, IMPLVEC)
DEF(mul_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_mul_vec))
DEF(neg_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_neg_vec))
DEF(abs_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_abs_vec))
DEF(ssadd_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec))
DEF(usadd_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec))
DEF(sssub_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec))

271
tcg/tcg.c
View File

@ -103,16 +103,37 @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
const char *ct_str, TCGType type);
static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
intptr_t arg2);
static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
static void tcg_out_movi(TCGContext *s, TCGType type,
TCGReg ret, tcg_target_long arg);
static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
const int *const_args);
#if TCG_TARGET_MAYBE_vec
static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg dst, TCGReg src);
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg dst, TCGReg base, intptr_t offset);
static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
TCGReg dst, tcg_target_long arg);
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
unsigned vece, const TCGArg *args,
const int *const_args);
#else
static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg dst, TCGReg src)
{
g_assert_not_reached();
}
static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg dst, TCGReg base, intptr_t offset)
{
g_assert_not_reached();
}
static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type,
TCGReg dst, tcg_target_long arg)
{
g_assert_not_reached();
}
static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
unsigned vece, const TCGArg *args,
const int *const_args)
@ -1579,6 +1600,7 @@ bool tcg_op_supported(TCGOpcode op)
case INDEX_op_mov_vec:
case INDEX_op_dup_vec:
case INDEX_op_dupi_vec:
case INDEX_op_dupm_vec:
case INDEX_op_ld_vec:
case INDEX_op_st_vec:
case INDEX_op_add_vec:
@ -1594,6 +1616,8 @@ bool tcg_op_supported(TCGOpcode op)
return have_vec && TCG_TARGET_HAS_not_vec;
case INDEX_op_neg_vec:
return have_vec && TCG_TARGET_HAS_neg_vec;
case INDEX_op_abs_vec:
return have_vec && TCG_TARGET_HAS_abs_vec;
case INDEX_op_andc_vec:
return have_vec && TCG_TARGET_HAS_andc_vec;
case INDEX_op_orc_vec:
@ -3270,15 +3294,15 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
save_globals(s, allocated_regs);
}
/*
* Specialized code generation for INDEX_op_movi_*.
*/
static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
tcg_target_ulong val, TCGLifeData arg_life,
TCGRegSet preferred_regs)
{
if (ots->fixed_reg) {
/* For fixed registers, we do not do any constant propagation. */
tcg_out_movi(s, ots->type, ots->reg, val);
return;
}
/* ENV should not be modified. */
tcg_debug_assert(!ots->fixed_reg);
/* The movi is not explicitly generated here. */
if (ots->val_type == TEMP_VAL_REG) {
@ -3302,6 +3326,9 @@ static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
}
/*
* Specialized code generation for INDEX_op_mov_*.
*/
static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
{
const TCGLifeData arg_life = op->life;
@ -3314,6 +3341,9 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
ots = arg_temp(op->args[0]);
ts = arg_temp(op->args[1]);
/* ENV should not be modified. */
tcg_debug_assert(!ots->fixed_reg);
/* Note that otype != itype for no-op truncation. */
otype = ots->type;
itype = ts->type;
@ -3338,7 +3368,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
}
tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
if (IS_DEAD_ARG(0)) {
/* mov to a non-saved dead register makes no sense (even with
liveness analysis disabled). */
tcg_debug_assert(NEED_SYNC_ARG(0));
@ -3351,7 +3381,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
}
temp_dead(s, ots);
} else {
if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
if (IS_DEAD_ARG(1) && !ts->fixed_reg) {
/* the mov can be suppressed */
if (ots->val_type == TEMP_VAL_REG) {
s->reg_to_temp[ots->reg] = NULL;
@ -3367,7 +3397,22 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
allocated_regs, preferred_regs,
ots->indirect_base);
}
tcg_out_mov(s, otype, ots->reg, ts->reg);
if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
/*
* Cross register class move not supported.
* Store the source register into the destination slot
* and leave the destination temp as TEMP_VAL_MEM.
*/
assert(!ots->fixed_reg);
if (!ts->mem_allocated) {
temp_allocate_frame(s, ots);
}
tcg_out_st(s, ts->type, ts->reg,
ots->mem_base->reg, ots->mem_offset);
ots->mem_coherent = 1;
temp_free_or_dead(s, ots, -1);
return;
}
}
ots->val_type = TEMP_VAL_REG;
ots->mem_coherent = 0;
@ -3378,6 +3423,118 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
}
}
/*
* Specialized code generation for INDEX_op_dup_vec.
*/
static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
{
const TCGLifeData arg_life = op->life;
TCGRegSet dup_out_regs, dup_in_regs;
TCGTemp *its, *ots;
TCGType itype, vtype;
intptr_t endian_fixup;
unsigned vece;
bool ok;
ots = arg_temp(op->args[0]);
its = arg_temp(op->args[1]);
/* ENV should not be modified. */
tcg_debug_assert(!ots->fixed_reg);
itype = its->type;
vece = TCGOP_VECE(op);
vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
if (its->val_type == TEMP_VAL_CONST) {
/* Propagate constant via movi -> dupi. */
tcg_target_ulong val = its->val;
if (IS_DEAD_ARG(1)) {
temp_dead(s, its);
}
tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
return;
}
dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs;
dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs;
/* Allocate the output register now. */
if (ots->val_type != TEMP_VAL_REG) {
TCGRegSet allocated_regs = s->reserved_regs;
if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
/* Make sure to not spill the input register. */
tcg_regset_set_reg(allocated_regs, its->reg);
}
ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
op->output_pref[0], ots->indirect_base);
ots->val_type = TEMP_VAL_REG;
ots->mem_coherent = 0;
s->reg_to_temp[ots->reg] = ots;
}
switch (its->val_type) {
case TEMP_VAL_REG:
/*
* The dup constriaints must be broad, covering all possible VECE.
* However, tcg_op_dup_vec() gets to see the VECE and we allow it
* to fail, indicating that extra moves are required for that case.
*/
if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
goto done;
}
/* Try again from memory or a vector input register. */
}
if (!its->mem_coherent) {
/*
* The input register is not synced, and so an extra store
* would be required to use memory. Attempt an integer-vector
* register move first. We do not have a TCGRegSet for this.
*/
if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
break;
}
/* Sync the temp back to its slot and load from there. */
temp_sync(s, its, s->reserved_regs, 0, 0);
}
/* fall through */
case TEMP_VAL_MEM:
#ifdef HOST_WORDS_BIGENDIAN
endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
endian_fixup -= 1 << vece;
#else
endian_fixup = 0;
#endif
if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
its->mem_offset + endian_fixup)) {
goto done;
}
tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
break;
default:
g_assert_not_reached();
}
/* We now have a vector input register, so dup must succeed. */
ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
tcg_debug_assert(ok);
done:
if (IS_DEAD_ARG(1)) {
temp_dead(s, its);
}
if (NEED_SYNC_ARG(0)) {
temp_sync(s, ots, s->reserved_regs, 0, 0);
}
if (IS_DEAD_ARG(0)) {
temp_dead(s, ots);
}
}
static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
{
const TCGLifeData arg_life = op->life;
@ -3467,7 +3624,15 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
i_allocated_regs, 0);
reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
o_preferred_regs, ts->indirect_base);
tcg_out_mov(s, ts->type, reg, ts->reg);
if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
/*
* Cross register class move not supported. Sync the
* temp back to its slot and load from there.
*/
temp_sync(s, ts, i_allocated_regs, 0, 0);
tcg_out_ld(s, ts->type, reg,
ts->mem_base->reg, ts->mem_offset);
}
}
new_args[i] = reg;
const_args[i] = 0;
@ -3504,6 +3669,10 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
arg = op->args[i];
arg_ct = &def->args_ct[i];
ts = arg_temp(arg);
/* ENV should not be modified. */
tcg_debug_assert(!ts->fixed_reg);
if ((arg_ct->ct & TCG_CT_ALIAS)
&& !const_args[arg_ct->alias_index]) {
reg = new_args[arg_ct->alias_index];
@ -3512,29 +3681,21 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
i_allocated_regs | o_allocated_regs,
op->output_pref[k], ts->indirect_base);
} else {
/* if fixed register, we try to use it */
reg = ts->reg;
if (ts->fixed_reg &&
tcg_regset_test_reg(arg_ct->u.regs, reg)) {
goto oarg_end;
}
reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
op->output_pref[k], ts->indirect_base);
}
tcg_regset_set_reg(o_allocated_regs, reg);
/* if a fixed register is used, then a move will be done afterwards */
if (!ts->fixed_reg) {
if (ts->val_type == TEMP_VAL_REG) {
s->reg_to_temp[ts->reg] = NULL;
}
ts->val_type = TEMP_VAL_REG;
ts->reg = reg;
/* temp value is modified, so the value kept in memory is
potentially not the same */
ts->mem_coherent = 0;
s->reg_to_temp[reg] = ts;
if (ts->val_type == TEMP_VAL_REG) {
s->reg_to_temp[ts->reg] = NULL;
}
oarg_end:
ts->val_type = TEMP_VAL_REG;
ts->reg = reg;
/*
* Temp value is modified, so the value kept in memory is
* potentially not the same.
*/
ts->mem_coherent = 0;
s->reg_to_temp[reg] = ts;
new_args[i] = reg;
}
}
@ -3550,10 +3711,10 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
/* move the outputs in the correct register if needed */
for(i = 0; i < nb_oargs; i++) {
ts = arg_temp(op->args[i]);
reg = new_args[i];
if (ts->fixed_reg && ts->reg != reg) {
tcg_out_mov(s, ts->type, ts->reg, reg);
}
/* ENV should not be modified. */
tcg_debug_assert(!ts->fixed_reg);
if (NEED_SYNC_ARG(i)) {
temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
} else if (IS_DEAD_ARG(i)) {
@ -3630,7 +3791,15 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
if (ts->val_type == TEMP_VAL_REG) {
if (ts->reg != reg) {
tcg_reg_free(s, reg, allocated_regs);
tcg_out_mov(s, ts->type, reg, ts->reg);
if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
/*
* Cross register class move not supported. Sync the
* temp back to its slot and load from there.
*/
temp_sync(s, ts, allocated_regs, 0, 0);
tcg_out_ld(s, ts->type, reg,
ts->mem_base->reg, ts->mem_offset);
}
}
} else {
TCGRegSet arg_set = 0;
@ -3674,26 +3843,23 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
for(i = 0; i < nb_oargs; i++) {
arg = op->args[i];
ts = arg_temp(arg);
/* ENV should not be modified. */
tcg_debug_assert(!ts->fixed_reg);
reg = tcg_target_call_oarg_regs[i];
tcg_debug_assert(s->reg_to_temp[reg] == NULL);
if (ts->fixed_reg) {
if (ts->reg != reg) {
tcg_out_mov(s, ts->type, ts->reg, reg);
}
} else {
if (ts->val_type == TEMP_VAL_REG) {
s->reg_to_temp[ts->reg] = NULL;
}
ts->val_type = TEMP_VAL_REG;
ts->reg = reg;
ts->mem_coherent = 0;
s->reg_to_temp[reg] = ts;
if (NEED_SYNC_ARG(i)) {
temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
} else if (IS_DEAD_ARG(i)) {
temp_dead(s, ts);
}
if (ts->val_type == TEMP_VAL_REG) {
s->reg_to_temp[ts->reg] = NULL;
}
ts->val_type = TEMP_VAL_REG;
ts->reg = reg;
ts->mem_coherent = 0;
s->reg_to_temp[reg] = ts;
if (NEED_SYNC_ARG(i)) {
temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
} else if (IS_DEAD_ARG(i)) {
temp_dead(s, ts);
}
}
}
@ -3943,6 +4109,9 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
case INDEX_op_dupi_vec:
tcg_reg_alloc_movi(s, op);
break;
case INDEX_op_dup_vec:
tcg_reg_alloc_dup(s, op);
break;
case INDEX_op_insn_start:
if (num_insns >= 0) {
size_t off = tcg_current_code_size(s);

View File

@ -176,6 +176,7 @@ typedef uint64_t TCGRegSet;
&& !defined(TCG_TARGET_HAS_v128) \
&& !defined(TCG_TARGET_HAS_v256)
#define TCG_TARGET_MAYBE_vec 0
#define TCG_TARGET_HAS_abs_vec 0
#define TCG_TARGET_HAS_neg_vec 0
#define TCG_TARGET_HAS_not_vec 0
#define TCG_TARGET_HAS_andc_vec 0
@ -692,6 +693,7 @@ struct TCGContext {
#ifdef CONFIG_DEBUG_TCG
int temps_in_use;
int goto_tb_issue_mask;
const TCGOpcode *vecop_list;
#endif
/* Code generation. Note that we specifically do not use tcg_insn_unit
@ -1492,4 +1494,23 @@ void helper_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val,
void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val,
TCGMemOpIdx oi, uintptr_t retaddr);
#ifdef CONFIG_DEBUG_TCG
void tcg_assert_listed_vecop(TCGOpcode);
#else
static inline void tcg_assert_listed_vecop(TCGOpcode op) { }
#endif
static inline const TCGOpcode *tcg_swap_vecop_list(const TCGOpcode *n)
{
#ifdef CONFIG_DEBUG_TCG
const TCGOpcode *o = tcg_ctx->vecop_list;
tcg_ctx->vecop_list = n;
return o;
#else
return NULL;
#endif
}
bool tcg_can_emit_vecop_list(const TCGOpcode *, TCGType, unsigned);
#endif /* TCG_H */

View File

@ -509,7 +509,7 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
old_code_ptr[1] = s->code_ptr - old_code_ptr;
}
static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
{
uint8_t *old_code_ptr = s->code_ptr;
tcg_debug_assert(ret != arg);
@ -521,6 +521,7 @@ static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
tcg_out_r(s, ret);
tcg_out_r(s, arg);
old_code_ptr[1] = s->code_ptr - old_code_ptr;
return true;
}
static void tcg_out_movi(TCGContext *s, TCGType type,