tcg/s390x: Implement minimal vector operations

Implementing add, sub, and, or, xor as the minimal set.
This allows us to actually enable vectors in query_s390_facilities.

Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2020-09-14 18:08:00 -07:00
parent 79cada8693
commit a429ee2978

View File

@ -271,6 +271,14 @@ typedef enum S390Opcode {
VRIc_VREP = 0xe74d,
VRRa_VLR = 0xe756,
VRRc_VA = 0xe7f3,
VRRc_VCEQ = 0xe7f8, /* we leave the m5 cs field 0 */
VRRc_VCH = 0xe7fb, /* " */
VRRc_VCHL = 0xe7f9, /* " */
VRRc_VN = 0xe768,
VRRc_VO = 0xe76a,
VRRc_VS = 0xe7f7,
VRRc_VX = 0xe76d,
VRRf_VLVGP = 0xe762,
VRSb_VLVG = 0xe722,
@ -607,6 +615,17 @@ static void tcg_out_insn_VRRa(TCGContext *s, S390Opcode op,
tcg_out32(s, (op & 0x00ff) | RXB(v1, v2, 0, 0) | (m3 << 12));
}
static void tcg_out_insn_VRRc(TCGContext *s, S390Opcode op,
TCGReg v1, TCGReg v2, TCGReg v3, int m4)
{
tcg_debug_assert(is_vector_reg(v1));
tcg_debug_assert(is_vector_reg(v2));
tcg_debug_assert(is_vector_reg(v3));
tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
tcg_out16(s, v3 << 12);
tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, 0) | (m4 << 12));
}
static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
TCGReg v1, TCGReg r2, TCGReg r3)
{
@ -2636,18 +2655,145 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
unsigned vecl, unsigned vece,
const TCGArg *args, const int *const_args)
{
g_assert_not_reached();
TCGType type = vecl + TCG_TYPE_V64;
TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
switch (opc) {
case INDEX_op_ld_vec:
tcg_out_ld(s, type, a0, a1, a2);
break;
case INDEX_op_st_vec:
tcg_out_st(s, type, a0, a1, a2);
break;
case INDEX_op_dupm_vec:
tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
break;
case INDEX_op_add_vec:
tcg_out_insn(s, VRRc, VA, a0, a1, a2, vece);
break;
case INDEX_op_sub_vec:
tcg_out_insn(s, VRRc, VS, a0, a1, a2, vece);
break;
case INDEX_op_and_vec:
tcg_out_insn(s, VRRc, VN, a0, a1, a2, 0);
break;
case INDEX_op_or_vec:
tcg_out_insn(s, VRRc, VO, a0, a1, a2, 0);
break;
case INDEX_op_xor_vec:
tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
break;
case INDEX_op_cmp_vec:
switch ((TCGCond)args[3]) {
case TCG_COND_EQ:
tcg_out_insn(s, VRRc, VCEQ, a0, a1, a2, vece);
break;
case TCG_COND_GT:
tcg_out_insn(s, VRRc, VCH, a0, a1, a2, vece);
break;
case TCG_COND_GTU:
tcg_out_insn(s, VRRc, VCHL, a0, a1, a2, vece);
break;
default:
g_assert_not_reached();
}
break;
case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
default:
g_assert_not_reached();
}
}
int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
{
return 0;
switch (opc) {
case INDEX_op_add_vec:
case INDEX_op_and_vec:
case INDEX_op_or_vec:
case INDEX_op_sub_vec:
case INDEX_op_xor_vec:
return 1;
case INDEX_op_cmp_vec:
return -1;
default:
return 0;
}
}
static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
TCGv_vec v1, TCGv_vec v2, TCGCond cond)
{
bool need_swap = false, need_inv = false;
switch (cond) {
case TCG_COND_EQ:
case TCG_COND_GT:
case TCG_COND_GTU:
break;
case TCG_COND_NE:
case TCG_COND_LE:
case TCG_COND_LEU:
need_inv = true;
break;
case TCG_COND_LT:
case TCG_COND_LTU:
need_swap = true;
break;
case TCG_COND_GE:
case TCG_COND_GEU:
need_swap = need_inv = true;
break;
default:
g_assert_not_reached();
}
if (need_inv) {
cond = tcg_invert_cond(cond);
}
if (need_swap) {
TCGv_vec t1;
t1 = v1, v1 = v2, v2 = t1;
cond = tcg_swap_cond(cond);
}
vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
return need_inv;
}
static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
TCGv_vec v1, TCGv_vec v2, TCGCond cond)
{
if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) {
tcg_gen_not_vec(vece, v0, v0);
}
}
void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
TCGArg a0, ...)
{
g_assert_not_reached();
va_list va;
TCGv_vec v0, v1, v2;
va_start(va, a0);
v0 = temp_tcgv_vec(arg_temp(a0));
v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
switch (opc) {
case INDEX_op_cmp_vec:
expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
break;
default:
g_assert_not_reached();
}
va_end(va);
}
static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
@ -2839,7 +2985,7 @@ static void query_s390_facilities(void)
* There is nothing else we currently care about in the 3rd word, so
* disable VECTOR with one store.
*/
if (1 || !(hwcap & HWCAP_S390_VXRS)) {
if (!(hwcap & HWCAP_S390_VXRS)) {
s390_facilities[2] = 0;
}
}