tcg/riscv: Implement vector mov/dup{m/i}

Signed-off-by: TANG Tiancheng <tangtiancheng.ttc@alibaba-inc.com>
Reviewed-by: Liu Zhiwei <zhiwei_liu@linux.alibaba.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-ID: <20241007025700.47259-5-zhiwei_liu@linux.alibaba.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
TANG Tiancheng 2024-10-07 10:56:52 +08:00 committed by Richard Henderson
parent f63e7089b4
commit d4be6ee111

View File

@ -309,6 +309,12 @@ typedef enum {
OPC_VS2R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(1),
OPC_VS4R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(3),
OPC_VS8R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(7),
OPC_VMV_V_V = 0x5e000057 | V_OPIVV,
OPC_VMV_V_I = 0x5e000057 | V_OPIVI,
OPC_VMV_V_X = 0x5e000057 | V_OPIVX,
OPC_VMVNR_V = 0x9e000057 | V_OPIVI,
} RISCVInsn;
/*
@ -401,6 +407,16 @@ static int32_t encode_uj(RISCVInsn opc, TCGReg rd, uint32_t imm)
return opc | (rd & 0x1f) << 7 | encode_ujimm20(imm);
}
/* Type-OPIVI */
static int32_t encode_vi(RISCVInsn opc, TCGReg rd, int32_t imm,
TCGReg vs2, bool vm)
{
return opc | (rd & 0x1f) << 7 | (imm & 0x1f) << 15 |
(vs2 & 0x1f) << 20 | (vm << 25);
}
/* Type-OPIVV/OPMVV/OPIVX/OPMVX, Vector load and store */
static int32_t encode_v(RISCVInsn opc, TCGReg d, TCGReg s1,
@ -546,6 +562,24 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
* RISC-V vector instruction emitters
*/
/*
* Vector registers uses the same 5 lower bits as GPR registers,
* and vm=0 (vm = false) means vector masking ENABLED.
* With RVV 1.0, vs2 is the first operand, while rs1/imm is the
* second operand.
*/
static void tcg_out_opc_vx(TCGContext *s, RISCVInsn opc,
TCGReg vd, TCGReg vs2, TCGReg rs1)
{
tcg_out32(s, encode_v(opc, vd, rs1, vs2, true));
}
static void tcg_out_opc_vi(TCGContext *s, RISCVInsn opc,
TCGReg vd, TCGReg vs2, int32_t imm)
{
tcg_out32(s, encode_vi(opc, vd, imm, vs2, true));
}
typedef struct VsetCache {
uint32_t movi_insn;
uint32_t vset_insn;
@ -574,6 +608,13 @@ static MemOp set_vtype_len(TCGContext *s, TCGType type)
return s->riscv_cur_vsew;
}
static void set_vtype_len_sew(TCGContext *s, TCGType type, MemOp vsew)
{
if (type != s->riscv_cur_type || vsew != s->riscv_cur_vsew) {
set_vtype(s, type, vsew);
}
}
/*
* TCG intrinsics
*/
@ -588,6 +629,15 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
case TCG_TYPE_I64:
tcg_out_opc_imm(s, OPC_ADDI, ret, arg, 0);
break;
case TCG_TYPE_V64:
case TCG_TYPE_V128:
case TCG_TYPE_V256:
{
int lmul = type - riscv_lg2_vlenb;
int nf = 1 << MAX(lmul, 0);
tcg_out_opc_vi(s, OPC_VMVNR_V, ret, arg, nf - 1);
}
break;
default:
g_assert_not_reached();
}
@ -951,18 +1001,35 @@ static void tcg_out_addsub2(TCGContext *s,
static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg dst, TCGReg src)
{
return false;
set_vtype_len_sew(s, type, vece);
tcg_out_opc_vx(s, OPC_VMV_V_X, dst, 0, src);
return true;
}
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg dst, TCGReg base, intptr_t offset)
{
return false;
tcg_out_ld(s, TCG_TYPE_REG, TCG_REG_TMP0, base, offset);
return tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0);
}
static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg dst, int64_t arg)
{
/* Arg is replicated by VECE; extract the highest element. */
arg >>= (-8 << vece) & 63;
if (arg >= -16 && arg < 16) {
if (arg == 0 || arg == -1) {
set_vtype_len(s, type);
} else {
set_vtype_len_sew(s, type, vece);
}
tcg_out_opc_vi(s, OPC_VMV_V_I, dst, 0, arg);
return;
}
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, arg);
tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0);
}
static const struct {
@ -2104,6 +2171,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
a2 = args[2];
switch (opc) {
case INDEX_op_dupm_vec:
tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
break;
case INDEX_op_ld_vec:
tcg_out_ld(s, type, a0, a1, a2);
break;
@ -2272,6 +2342,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_st_vec:
return C_O0_I2(v, r);
case INDEX_op_dup_vec:
case INDEX_op_dupm_vec:
case INDEX_op_ld_vec:
return C_O1_I1(v, r);
default: