tcg/ppc: Update vector support for v2.07 Altivec
These new instructions are conditional only on MSR.VEC and are thus part of the Altivec instruction set, and not VSX. This includes lots of double-word arithmetic and a few extra logical operations. Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
47c906ae6f
commit
64ff1c6d21
@ -61,6 +61,7 @@ typedef enum {
|
|||||||
typedef enum {
|
typedef enum {
|
||||||
tcg_isa_base,
|
tcg_isa_base,
|
||||||
tcg_isa_2_06,
|
tcg_isa_2_06,
|
||||||
|
tcg_isa_2_07,
|
||||||
tcg_isa_3_00,
|
tcg_isa_3_00,
|
||||||
} TCGPowerISA;
|
} TCGPowerISA;
|
||||||
|
|
||||||
@ -69,6 +70,7 @@ extern bool have_altivec;
|
|||||||
extern bool have_vsx;
|
extern bool have_vsx;
|
||||||
|
|
||||||
#define have_isa_2_06 (have_isa >= tcg_isa_2_06)
|
#define have_isa_2_06 (have_isa >= tcg_isa_2_06)
|
||||||
|
#define have_isa_2_07 (have_isa >= tcg_isa_2_07)
|
||||||
#define have_isa_3_00 (have_isa >= tcg_isa_3_00)
|
#define have_isa_3_00 (have_isa >= tcg_isa_3_00)
|
||||||
|
|
||||||
/* optional instructions automatically implemented */
|
/* optional instructions automatically implemented */
|
||||||
@ -155,7 +157,7 @@ extern bool have_vsx;
|
|||||||
#define TCG_TARGET_HAS_v256 0
|
#define TCG_TARGET_HAS_v256 0
|
||||||
|
|
||||||
#define TCG_TARGET_HAS_andc_vec 1
|
#define TCG_TARGET_HAS_andc_vec 1
|
||||||
#define TCG_TARGET_HAS_orc_vec 0
|
#define TCG_TARGET_HAS_orc_vec have_isa_2_07
|
||||||
#define TCG_TARGET_HAS_not_vec 1
|
#define TCG_TARGET_HAS_not_vec 1
|
||||||
#define TCG_TARGET_HAS_neg_vec 0
|
#define TCG_TARGET_HAS_neg_vec 0
|
||||||
#define TCG_TARGET_HAS_abs_vec 0
|
#define TCG_TARGET_HAS_abs_vec 0
|
||||||
|
@ -484,6 +484,7 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
|
|||||||
#define VADDSWS VX4(896)
|
#define VADDSWS VX4(896)
|
||||||
#define VADDUWS VX4(640)
|
#define VADDUWS VX4(640)
|
||||||
#define VADDUWM VX4(128)
|
#define VADDUWM VX4(128)
|
||||||
|
#define VADDUDM VX4(192) /* v2.07 */
|
||||||
|
|
||||||
#define VSUBSBS VX4(1792)
|
#define VSUBSBS VX4(1792)
|
||||||
#define VSUBUBS VX4(1536)
|
#define VSUBUBS VX4(1536)
|
||||||
@ -494,47 +495,62 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
|
|||||||
#define VSUBSWS VX4(1920)
|
#define VSUBSWS VX4(1920)
|
||||||
#define VSUBUWS VX4(1664)
|
#define VSUBUWS VX4(1664)
|
||||||
#define VSUBUWM VX4(1152)
|
#define VSUBUWM VX4(1152)
|
||||||
|
#define VSUBUDM VX4(1216) /* v2.07 */
|
||||||
|
|
||||||
#define VMAXSB VX4(258)
|
#define VMAXSB VX4(258)
|
||||||
#define VMAXSH VX4(322)
|
#define VMAXSH VX4(322)
|
||||||
#define VMAXSW VX4(386)
|
#define VMAXSW VX4(386)
|
||||||
|
#define VMAXSD VX4(450) /* v2.07 */
|
||||||
#define VMAXUB VX4(2)
|
#define VMAXUB VX4(2)
|
||||||
#define VMAXUH VX4(66)
|
#define VMAXUH VX4(66)
|
||||||
#define VMAXUW VX4(130)
|
#define VMAXUW VX4(130)
|
||||||
|
#define VMAXUD VX4(194) /* v2.07 */
|
||||||
#define VMINSB VX4(770)
|
#define VMINSB VX4(770)
|
||||||
#define VMINSH VX4(834)
|
#define VMINSH VX4(834)
|
||||||
#define VMINSW VX4(898)
|
#define VMINSW VX4(898)
|
||||||
|
#define VMINSD VX4(962) /* v2.07 */
|
||||||
#define VMINUB VX4(514)
|
#define VMINUB VX4(514)
|
||||||
#define VMINUH VX4(578)
|
#define VMINUH VX4(578)
|
||||||
#define VMINUW VX4(642)
|
#define VMINUW VX4(642)
|
||||||
|
#define VMINUD VX4(706) /* v2.07 */
|
||||||
|
|
||||||
#define VCMPEQUB VX4(6)
|
#define VCMPEQUB VX4(6)
|
||||||
#define VCMPEQUH VX4(70)
|
#define VCMPEQUH VX4(70)
|
||||||
#define VCMPEQUW VX4(134)
|
#define VCMPEQUW VX4(134)
|
||||||
|
#define VCMPEQUD VX4(199) /* v2.07 */
|
||||||
#define VCMPGTSB VX4(774)
|
#define VCMPGTSB VX4(774)
|
||||||
#define VCMPGTSH VX4(838)
|
#define VCMPGTSH VX4(838)
|
||||||
#define VCMPGTSW VX4(902)
|
#define VCMPGTSW VX4(902)
|
||||||
|
#define VCMPGTSD VX4(967) /* v2.07 */
|
||||||
#define VCMPGTUB VX4(518)
|
#define VCMPGTUB VX4(518)
|
||||||
#define VCMPGTUH VX4(582)
|
#define VCMPGTUH VX4(582)
|
||||||
#define VCMPGTUW VX4(646)
|
#define VCMPGTUW VX4(646)
|
||||||
|
#define VCMPGTUD VX4(711) /* v2.07 */
|
||||||
|
|
||||||
#define VSLB VX4(260)
|
#define VSLB VX4(260)
|
||||||
#define VSLH VX4(324)
|
#define VSLH VX4(324)
|
||||||
#define VSLW VX4(388)
|
#define VSLW VX4(388)
|
||||||
|
#define VSLD VX4(1476) /* v2.07 */
|
||||||
#define VSRB VX4(516)
|
#define VSRB VX4(516)
|
||||||
#define VSRH VX4(580)
|
#define VSRH VX4(580)
|
||||||
#define VSRW VX4(644)
|
#define VSRW VX4(644)
|
||||||
|
#define VSRD VX4(1732) /* v2.07 */
|
||||||
#define VSRAB VX4(772)
|
#define VSRAB VX4(772)
|
||||||
#define VSRAH VX4(836)
|
#define VSRAH VX4(836)
|
||||||
#define VSRAW VX4(900)
|
#define VSRAW VX4(900)
|
||||||
|
#define VSRAD VX4(964) /* v2.07 */
|
||||||
#define VRLB VX4(4)
|
#define VRLB VX4(4)
|
||||||
#define VRLH VX4(68)
|
#define VRLH VX4(68)
|
||||||
#define VRLW VX4(132)
|
#define VRLW VX4(132)
|
||||||
|
#define VRLD VX4(196) /* v2.07 */
|
||||||
|
|
||||||
#define VMULEUB VX4(520)
|
#define VMULEUB VX4(520)
|
||||||
#define VMULEUH VX4(584)
|
#define VMULEUH VX4(584)
|
||||||
|
#define VMULEUW VX4(648) /* v2.07 */
|
||||||
#define VMULOUB VX4(8)
|
#define VMULOUB VX4(8)
|
||||||
#define VMULOUH VX4(72)
|
#define VMULOUH VX4(72)
|
||||||
|
#define VMULOUW VX4(136) /* v2.07 */
|
||||||
|
#define VMULUWM VX4(137) /* v2.07 */
|
||||||
#define VMSUMUHM VX4(38)
|
#define VMSUMUHM VX4(38)
|
||||||
|
|
||||||
#define VMRGHB VX4(12)
|
#define VMRGHB VX4(12)
|
||||||
@ -552,6 +568,9 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
|
|||||||
#define VNOR VX4(1284)
|
#define VNOR VX4(1284)
|
||||||
#define VOR VX4(1156)
|
#define VOR VX4(1156)
|
||||||
#define VXOR VX4(1220)
|
#define VXOR VX4(1220)
|
||||||
|
#define VEQV VX4(1668) /* v2.07 */
|
||||||
|
#define VNAND VX4(1412) /* v2.07 */
|
||||||
|
#define VORC VX4(1348) /* v2.07 */
|
||||||
|
|
||||||
#define VSPLTB VX4(524)
|
#define VSPLTB VX4(524)
|
||||||
#define VSPLTH VX4(588)
|
#define VSPLTH VX4(588)
|
||||||
@ -2904,26 +2923,37 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
|
|||||||
case INDEX_op_andc_vec:
|
case INDEX_op_andc_vec:
|
||||||
case INDEX_op_not_vec:
|
case INDEX_op_not_vec:
|
||||||
return 1;
|
return 1;
|
||||||
|
case INDEX_op_orc_vec:
|
||||||
|
return have_isa_2_07;
|
||||||
case INDEX_op_add_vec:
|
case INDEX_op_add_vec:
|
||||||
case INDEX_op_sub_vec:
|
case INDEX_op_sub_vec:
|
||||||
case INDEX_op_smax_vec:
|
case INDEX_op_smax_vec:
|
||||||
case INDEX_op_smin_vec:
|
case INDEX_op_smin_vec:
|
||||||
case INDEX_op_umax_vec:
|
case INDEX_op_umax_vec:
|
||||||
case INDEX_op_umin_vec:
|
case INDEX_op_umin_vec:
|
||||||
|
case INDEX_op_shlv_vec:
|
||||||
|
case INDEX_op_shrv_vec:
|
||||||
|
case INDEX_op_sarv_vec:
|
||||||
|
return vece <= MO_32 || have_isa_2_07;
|
||||||
case INDEX_op_ssadd_vec:
|
case INDEX_op_ssadd_vec:
|
||||||
case INDEX_op_sssub_vec:
|
case INDEX_op_sssub_vec:
|
||||||
case INDEX_op_usadd_vec:
|
case INDEX_op_usadd_vec:
|
||||||
case INDEX_op_ussub_vec:
|
case INDEX_op_ussub_vec:
|
||||||
case INDEX_op_shlv_vec:
|
|
||||||
case INDEX_op_shrv_vec:
|
|
||||||
case INDEX_op_sarv_vec:
|
|
||||||
return vece <= MO_32;
|
return vece <= MO_32;
|
||||||
case INDEX_op_cmp_vec:
|
case INDEX_op_cmp_vec:
|
||||||
case INDEX_op_mul_vec:
|
|
||||||
case INDEX_op_shli_vec:
|
case INDEX_op_shli_vec:
|
||||||
case INDEX_op_shri_vec:
|
case INDEX_op_shri_vec:
|
||||||
case INDEX_op_sari_vec:
|
case INDEX_op_sari_vec:
|
||||||
return vece <= MO_32 ? -1 : 0;
|
return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
|
||||||
|
case INDEX_op_mul_vec:
|
||||||
|
switch (vece) {
|
||||||
|
case MO_8:
|
||||||
|
case MO_16:
|
||||||
|
return -1;
|
||||||
|
case MO_32:
|
||||||
|
return have_isa_2_07 ? 1 : -1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
case INDEX_op_bitsel_vec:
|
case INDEX_op_bitsel_vec:
|
||||||
return have_vsx;
|
return have_vsx;
|
||||||
default:
|
default:
|
||||||
@ -3027,28 +3057,28 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
|
|||||||
const TCGArg *args, const int *const_args)
|
const TCGArg *args, const int *const_args)
|
||||||
{
|
{
|
||||||
static const uint32_t
|
static const uint32_t
|
||||||
add_op[4] = { VADDUBM, VADDUHM, VADDUWM, 0 },
|
add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
|
||||||
sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, 0 },
|
sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
|
||||||
eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 },
|
eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
|
||||||
gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 },
|
gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
|
||||||
gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 },
|
gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
|
||||||
ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
|
ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
|
||||||
usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
|
usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
|
||||||
sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
|
sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
|
||||||
ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
|
ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
|
||||||
umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 },
|
umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
|
||||||
smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 },
|
smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
|
||||||
umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 },
|
umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
|
||||||
smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 },
|
smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
|
||||||
shlv_op[4] = { VSLB, VSLH, VSLW, 0 },
|
shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
|
||||||
shrv_op[4] = { VSRB, VSRH, VSRW, 0 },
|
shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
|
||||||
sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 },
|
sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
|
||||||
mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
|
mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
|
||||||
mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
|
mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
|
||||||
muleu_op[4] = { VMULEUB, VMULEUH, 0, 0 },
|
muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
|
||||||
mulou_op[4] = { VMULOUB, VMULOUH, 0, 0 },
|
mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
|
||||||
pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
|
pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
|
||||||
rotl_op[4] = { VRLB, VRLH, VRLW, 0 };
|
rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
|
||||||
|
|
||||||
TCGType type = vecl + TCG_TYPE_V64;
|
TCGType type = vecl + TCG_TYPE_V64;
|
||||||
TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
|
TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
|
||||||
@ -3071,6 +3101,10 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
|
|||||||
case INDEX_op_sub_vec:
|
case INDEX_op_sub_vec:
|
||||||
insn = sub_op[vece];
|
insn = sub_op[vece];
|
||||||
break;
|
break;
|
||||||
|
case INDEX_op_mul_vec:
|
||||||
|
tcg_debug_assert(vece == MO_32 && have_isa_2_07);
|
||||||
|
insn = VMULUWM;
|
||||||
|
break;
|
||||||
case INDEX_op_ssadd_vec:
|
case INDEX_op_ssadd_vec:
|
||||||
insn = ssadd_op[vece];
|
insn = ssadd_op[vece];
|
||||||
break;
|
break;
|
||||||
@ -3120,6 +3154,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
|
|||||||
insn = VNOR;
|
insn = VNOR;
|
||||||
a2 = a1;
|
a2 = a1;
|
||||||
break;
|
break;
|
||||||
|
case INDEX_op_orc_vec:
|
||||||
|
insn = VORC;
|
||||||
|
break;
|
||||||
|
|
||||||
case INDEX_op_cmp_vec:
|
case INDEX_op_cmp_vec:
|
||||||
switch (args[3]) {
|
switch (args[3]) {
|
||||||
@ -3200,7 +3237,7 @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
|
|||||||
{
|
{
|
||||||
bool need_swap = false, need_inv = false;
|
bool need_swap = false, need_inv = false;
|
||||||
|
|
||||||
tcg_debug_assert(vece <= MO_32);
|
tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
|
||||||
|
|
||||||
switch (cond) {
|
switch (cond) {
|
||||||
case TCG_COND_EQ:
|
case TCG_COND_EQ:
|
||||||
@ -3264,6 +3301,7 @@ static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case MO_32:
|
case MO_32:
|
||||||
|
tcg_debug_assert(!have_isa_2_07);
|
||||||
t3 = tcg_temp_new_vec(type);
|
t3 = tcg_temp_new_vec(type);
|
||||||
t4 = tcg_temp_new_vec(type);
|
t4 = tcg_temp_new_vec(type);
|
||||||
tcg_gen_dupi_vec(MO_8, t4, -16);
|
tcg_gen_dupi_vec(MO_8, t4, -16);
|
||||||
@ -3554,6 +3592,11 @@ static void tcg_target_init(TCGContext *s)
|
|||||||
if (hwcap & PPC_FEATURE_ARCH_2_06) {
|
if (hwcap & PPC_FEATURE_ARCH_2_06) {
|
||||||
have_isa = tcg_isa_2_06;
|
have_isa = tcg_isa_2_06;
|
||||||
}
|
}
|
||||||
|
#ifdef PPC_FEATURE2_ARCH_2_07
|
||||||
|
if (hwcap2 & PPC_FEATURE2_ARCH_2_07) {
|
||||||
|
have_isa = tcg_isa_2_07;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#ifdef PPC_FEATURE2_ARCH_3_00
|
#ifdef PPC_FEATURE2_ARCH_3_00
|
||||||
if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
|
if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
|
||||||
have_isa = tcg_isa_3_00;
|
have_isa = tcg_isa_3_00;
|
||||||
|
Loading…
Reference in New Issue
Block a user