qemu/target/riscv/insn_trans/trans_rvm.c.inc
Weiwei Li de799beba7 target/riscv: add support for zmmul extension v0.1
Add support for the zmmul extension v0.1. This extension includes all
multiplication operations from the M extension but not the divide ops.

Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn>
Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn>
Reviewed-by: Víctor Colombo <victor.colombo@eldorado.org.br>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Message-Id: <20220531030732.3850-1-liweiwei@iscas.ac.cn>
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
2022-06-10 09:31:42 +10:00

434 lines
12 KiB
C++

/*
* RISC-V translation routines for the RV64M Standard Extension.
*
* Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
* Copyright (c) 2018 Peer Adelt, peer.adelt@hni.uni-paderborn.de
* Bastian Koppelmann, kbastian@mail.uni-paderborn.de
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2 or later, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define REQUIRE_M_OR_ZMMUL(ctx) do { \
if (!ctx->cfg_ptr->ext_zmmul && !has_ext(ctx, RVM)) { \
return false; \
} \
} while (0)
static void gen_mulhu_i128(TCGv r2, TCGv r3, TCGv al, TCGv ah, TCGv bl, TCGv bh)
{
TCGv tmpl = tcg_temp_new();
TCGv tmph = tcg_temp_new();
TCGv r0 = tcg_temp_new();
TCGv r1 = tcg_temp_new();
TCGv zero = tcg_constant_tl(0);
tcg_gen_mulu2_tl(r0, r1, al, bl);
tcg_gen_mulu2_tl(tmpl, tmph, al, bh);
tcg_gen_add2_tl(r1, r2, r1, zero, tmpl, tmph);
tcg_gen_mulu2_tl(tmpl, tmph, ah, bl);
tcg_gen_add2_tl(r1, tmph, r1, r2, tmpl, tmph);
/* Overflow detection into r3 */
tcg_gen_setcond_tl(TCG_COND_LTU, r3, tmph, r2);
tcg_gen_mov_tl(r2, tmph);
tcg_gen_mulu2_tl(tmpl, tmph, ah, bh);
tcg_gen_add2_tl(r2, r3, r2, r3, tmpl, tmph);
tcg_temp_free(tmpl);
tcg_temp_free(tmph);
}
static void gen_mul_i128(TCGv rl, TCGv rh,
TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
{
TCGv tmpl = tcg_temp_new();
TCGv tmph = tcg_temp_new();
TCGv tmpx = tcg_temp_new();
TCGv zero = tcg_constant_tl(0);
tcg_gen_mulu2_tl(rl, rh, rs1l, rs2l);
tcg_gen_mulu2_tl(tmpl, tmph, rs1l, rs2h);
tcg_gen_add2_tl(rh, tmpx, rh, zero, tmpl, tmph);
tcg_gen_mulu2_tl(tmpl, tmph, rs1h, rs2l);
tcg_gen_add2_tl(rh, tmph, rh, tmpx, tmpl, tmph);
tcg_temp_free(tmpl);
tcg_temp_free(tmph);
tcg_temp_free(tmpx);
}
static bool trans_mul(DisasContext *ctx, arg_mul *a)
{
REQUIRE_M_OR_ZMMUL(ctx);
return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, gen_mul_i128);
}
static void gen_mulh_i128(TCGv rl, TCGv rh,
TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
{
TCGv t0l = tcg_temp_new();
TCGv t0h = tcg_temp_new();
TCGv t1l = tcg_temp_new();
TCGv t1h = tcg_temp_new();
gen_mulhu_i128(rl, rh, rs1l, rs1h, rs2l, rs2h);
tcg_gen_sari_tl(t0h, rs1h, 63);
tcg_gen_and_tl(t0l, t0h, rs2l);
tcg_gen_and_tl(t0h, t0h, rs2h);
tcg_gen_sari_tl(t1h, rs2h, 63);
tcg_gen_and_tl(t1l, t1h, rs1l);
tcg_gen_and_tl(t1h, t1h, rs1h);
tcg_gen_sub2_tl(t0l, t0h, rl, rh, t0l, t0h);
tcg_gen_sub2_tl(rl, rh, t0l, t0h, t1l, t1h);
tcg_temp_free(t0l);
tcg_temp_free(t0h);
tcg_temp_free(t1l);
tcg_temp_free(t1h);
}
static void gen_mulh(TCGv ret, TCGv s1, TCGv s2)
{
TCGv discard = tcg_temp_new();
tcg_gen_muls2_tl(discard, ret, s1, s2);
tcg_temp_free(discard);
}
static void gen_mulh_w(TCGv ret, TCGv s1, TCGv s2)
{
tcg_gen_mul_tl(ret, s1, s2);
tcg_gen_sari_tl(ret, ret, 32);
}
static bool trans_mulh(DisasContext *ctx, arg_mulh *a)
{
REQUIRE_M_OR_ZMMUL(ctx);
return gen_arith_per_ol(ctx, a, EXT_SIGN, gen_mulh, gen_mulh_w,
gen_mulh_i128);
}
static void gen_mulhsu_i128(TCGv rl, TCGv rh,
TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
{
TCGv t0l = tcg_temp_new();
TCGv t0h = tcg_temp_new();
gen_mulhu_i128(rl, rh, rs1l, rs1h, rs2l, rs2h);
tcg_gen_sari_tl(t0h, rs1h, 63);
tcg_gen_and_tl(t0l, t0h, rs2l);
tcg_gen_and_tl(t0h, t0h, rs2h);
tcg_gen_sub2_tl(rl, rh, rl, rh, t0l, t0h);
tcg_temp_free(t0l);
tcg_temp_free(t0h);
}
static void gen_mulhsu(TCGv ret, TCGv arg1, TCGv arg2)
{
TCGv rl = tcg_temp_new();
TCGv rh = tcg_temp_new();
tcg_gen_mulu2_tl(rl, rh, arg1, arg2);
/* fix up for one negative */
tcg_gen_sari_tl(rl, arg1, TARGET_LONG_BITS - 1);
tcg_gen_and_tl(rl, rl, arg2);
tcg_gen_sub_tl(ret, rh, rl);
tcg_temp_free(rl);
tcg_temp_free(rh);
}
static void gen_mulhsu_w(TCGv ret, TCGv arg1, TCGv arg2)
{
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
tcg_gen_ext32s_tl(t1, arg1);
tcg_gen_ext32u_tl(t2, arg2);
tcg_gen_mul_tl(ret, t1, t2);
tcg_temp_free(t1);
tcg_temp_free(t2);
tcg_gen_sari_tl(ret, ret, 32);
}
static bool trans_mulhsu(DisasContext *ctx, arg_mulhsu *a)
{
REQUIRE_M_OR_ZMMUL(ctx);
return gen_arith_per_ol(ctx, a, EXT_NONE, gen_mulhsu, gen_mulhsu_w,
gen_mulhsu_i128);
}
static void gen_mulhu(TCGv ret, TCGv s1, TCGv s2)
{
TCGv discard = tcg_temp_new();
tcg_gen_mulu2_tl(discard, ret, s1, s2);
tcg_temp_free(discard);
}
static bool trans_mulhu(DisasContext *ctx, arg_mulhu *a)
{
REQUIRE_M_OR_ZMMUL(ctx);
/* gen_mulh_w works for either sign as input. */
return gen_arith_per_ol(ctx, a, EXT_ZERO, gen_mulhu, gen_mulh_w,
gen_mulhu_i128);
}
static void gen_div_i128(TCGv rdl, TCGv rdh,
TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
{
gen_helper_divs_i128(rdl, cpu_env, rs1l, rs1h, rs2l, rs2h);
tcg_gen_ld_tl(rdh, cpu_env, offsetof(CPURISCVState, retxh));
}
static void gen_div(TCGv ret, TCGv source1, TCGv source2)
{
TCGv temp1, temp2, zero, one, mone, min;
temp1 = tcg_temp_new();
temp2 = tcg_temp_new();
zero = tcg_constant_tl(0);
one = tcg_constant_tl(1);
mone = tcg_constant_tl(-1);
min = tcg_constant_tl(1ull << (TARGET_LONG_BITS - 1));
/*
* If overflow, set temp2 to 1, else source2.
* This produces the required result of min.
*/
tcg_gen_setcond_tl(TCG_COND_EQ, temp1, source1, min);
tcg_gen_setcond_tl(TCG_COND_EQ, temp2, source2, mone);
tcg_gen_and_tl(temp1, temp1, temp2);
tcg_gen_movcond_tl(TCG_COND_NE, temp2, temp1, zero, one, source2);
/*
* If div by zero, set temp1 to -1 and temp2 to 1 to
* produce the required result of -1.
*/
tcg_gen_movcond_tl(TCG_COND_EQ, temp1, source2, zero, mone, source1);
tcg_gen_movcond_tl(TCG_COND_EQ, temp2, source2, zero, one, temp2);
tcg_gen_div_tl(ret, temp1, temp2);
tcg_temp_free(temp1);
tcg_temp_free(temp2);
}
static bool trans_div(DisasContext *ctx, arg_div *a)
{
REQUIRE_EXT(ctx, RVM);
return gen_arith(ctx, a, EXT_SIGN, gen_div, gen_div_i128);
}
static void gen_divu_i128(TCGv rdl, TCGv rdh,
TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
{
gen_helper_divu_i128(rdl, cpu_env, rs1l, rs1h, rs2l, rs2h);
tcg_gen_ld_tl(rdh, cpu_env, offsetof(CPURISCVState, retxh));
}
static void gen_divu(TCGv ret, TCGv source1, TCGv source2)
{
TCGv temp1, temp2, zero, one, max;
temp1 = tcg_temp_new();
temp2 = tcg_temp_new();
zero = tcg_constant_tl(0);
one = tcg_constant_tl(1);
max = tcg_constant_tl(~0);
/*
* If div by zero, set temp1 to max and temp2 to 1 to
* produce the required result of max.
*/
tcg_gen_movcond_tl(TCG_COND_EQ, temp1, source2, zero, max, source1);
tcg_gen_movcond_tl(TCG_COND_EQ, temp2, source2, zero, one, source2);
tcg_gen_divu_tl(ret, temp1, temp2);
tcg_temp_free(temp1);
tcg_temp_free(temp2);
}
static bool trans_divu(DisasContext *ctx, arg_divu *a)
{
REQUIRE_EXT(ctx, RVM);
return gen_arith(ctx, a, EXT_ZERO, gen_divu, gen_divu_i128);
}
static void gen_rem_i128(TCGv rdl, TCGv rdh,
TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
{
gen_helper_rems_i128(rdl, cpu_env, rs1l, rs1h, rs2l, rs2h);
tcg_gen_ld_tl(rdh, cpu_env, offsetof(CPURISCVState, retxh));
}
static void gen_rem(TCGv ret, TCGv source1, TCGv source2)
{
TCGv temp1, temp2, zero, one, mone, min;
temp1 = tcg_temp_new();
temp2 = tcg_temp_new();
zero = tcg_constant_tl(0);
one = tcg_constant_tl(1);
mone = tcg_constant_tl(-1);
min = tcg_constant_tl(1ull << (TARGET_LONG_BITS - 1));
/*
* If overflow, set temp1 to 0, else source1.
* This avoids a possible host trap, and produces the required result of 0.
*/
tcg_gen_setcond_tl(TCG_COND_EQ, temp1, source1, min);
tcg_gen_setcond_tl(TCG_COND_EQ, temp2, source2, mone);
tcg_gen_and_tl(temp1, temp1, temp2);
tcg_gen_movcond_tl(TCG_COND_NE, temp1, temp1, zero, zero, source1);
/*
* If div by zero, set temp2 to 1, else source2.
* This avoids a possible host trap, but produces an incorrect result.
*/
tcg_gen_movcond_tl(TCG_COND_EQ, temp2, source2, zero, one, source2);
tcg_gen_rem_tl(temp1, temp1, temp2);
/* If div by zero, the required result is the original dividend. */
tcg_gen_movcond_tl(TCG_COND_EQ, ret, source2, zero, source1, temp1);
tcg_temp_free(temp1);
tcg_temp_free(temp2);
}
static bool trans_rem(DisasContext *ctx, arg_rem *a)
{
REQUIRE_EXT(ctx, RVM);
return gen_arith(ctx, a, EXT_SIGN, gen_rem, gen_rem_i128);
}
static void gen_remu_i128(TCGv rdl, TCGv rdh,
TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
{
gen_helper_remu_i128(rdl, cpu_env, rs1l, rs1h, rs2l, rs2h);
tcg_gen_ld_tl(rdh, cpu_env, offsetof(CPURISCVState, retxh));
}
static void gen_remu(TCGv ret, TCGv source1, TCGv source2)
{
TCGv temp, zero, one;
temp = tcg_temp_new();
zero = tcg_constant_tl(0);
one = tcg_constant_tl(1);
/*
* If div by zero, set temp to 1, else source2.
* This avoids a possible host trap, but produces an incorrect result.
*/
tcg_gen_movcond_tl(TCG_COND_EQ, temp, source2, zero, one, source2);
tcg_gen_remu_tl(temp, source1, temp);
/* If div by zero, the required result is the original dividend. */
tcg_gen_movcond_tl(TCG_COND_EQ, ret, source2, zero, source1, temp);
tcg_temp_free(temp);
}
static bool trans_remu(DisasContext *ctx, arg_remu *a)
{
REQUIRE_EXT(ctx, RVM);
return gen_arith(ctx, a, EXT_ZERO, gen_remu, gen_remu_i128);
}
static bool trans_mulw(DisasContext *ctx, arg_mulw *a)
{
REQUIRE_64_OR_128BIT(ctx);
REQUIRE_M_OR_ZMMUL(ctx);
ctx->ol = MXL_RV32;
return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, NULL);
}
static bool trans_divw(DisasContext *ctx, arg_divw *a)
{
REQUIRE_64_OR_128BIT(ctx);
REQUIRE_EXT(ctx, RVM);
ctx->ol = MXL_RV32;
return gen_arith(ctx, a, EXT_SIGN, gen_div, NULL);
}
static bool trans_divuw(DisasContext *ctx, arg_divuw *a)
{
REQUIRE_64_OR_128BIT(ctx);
REQUIRE_EXT(ctx, RVM);
ctx->ol = MXL_RV32;
return gen_arith(ctx, a, EXT_ZERO, gen_divu, NULL);
}
static bool trans_remw(DisasContext *ctx, arg_remw *a)
{
REQUIRE_64_OR_128BIT(ctx);
REQUIRE_EXT(ctx, RVM);
ctx->ol = MXL_RV32;
return gen_arith(ctx, a, EXT_SIGN, gen_rem, NULL);
}
static bool trans_remuw(DisasContext *ctx, arg_remuw *a)
{
REQUIRE_64_OR_128BIT(ctx);
REQUIRE_EXT(ctx, RVM);
ctx->ol = MXL_RV32;
return gen_arith(ctx, a, EXT_ZERO, gen_remu, NULL);
}
static bool trans_muld(DisasContext *ctx, arg_muld *a)
{
REQUIRE_128BIT(ctx);
REQUIRE_M_OR_ZMMUL(ctx);
ctx->ol = MXL_RV64;
return gen_arith(ctx, a, EXT_SIGN, tcg_gen_mul_tl, NULL);
}
static bool trans_divd(DisasContext *ctx, arg_divd *a)
{
REQUIRE_128BIT(ctx);
REQUIRE_EXT(ctx, RVM);
ctx->ol = MXL_RV64;
return gen_arith(ctx, a, EXT_SIGN, gen_div, NULL);
}
static bool trans_divud(DisasContext *ctx, arg_divud *a)
{
REQUIRE_128BIT(ctx);
REQUIRE_EXT(ctx, RVM);
ctx->ol = MXL_RV64;
return gen_arith(ctx, a, EXT_ZERO, gen_divu, NULL);
}
static bool trans_remd(DisasContext *ctx, arg_remd *a)
{
REQUIRE_128BIT(ctx);
REQUIRE_EXT(ctx, RVM);
ctx->ol = MXL_RV64;
return gen_arith(ctx, a, EXT_SIGN, gen_rem, NULL);
}
static bool trans_remud(DisasContext *ctx, arg_remud *a)
{
REQUIRE_128BIT(ctx);
REQUIRE_EXT(ctx, RVM);
ctx->ol = MXL_RV64;
return gen_arith(ctx, a, EXT_ZERO, gen_remu, NULL);
}