de799beba7
Add support for the zmmul extension v0.1. This extension includes all multiplication operations from the M extension but not the divide ops. Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn> Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn> Reviewed-by: Víctor Colombo <victor.colombo@eldorado.org.br> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-Id: <20220531030732.3850-1-liweiwei@iscas.ac.cn> Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
434 lines
12 KiB
C++
434 lines
12 KiB
C++
/*
|
|
* RISC-V translation routines for the RV64M Standard Extension.
|
|
*
|
|
* Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
|
|
* Copyright (c) 2018 Peer Adelt, peer.adelt@hni.uni-paderborn.de
|
|
* Bastian Koppelmann, kbastian@mail.uni-paderborn.de
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2 or later, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along with
|
|
* this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#define REQUIRE_M_OR_ZMMUL(ctx) do { \
|
|
if (!ctx->cfg_ptr->ext_zmmul && !has_ext(ctx, RVM)) { \
|
|
return false; \
|
|
} \
|
|
} while (0)
|
|
|
|
static void gen_mulhu_i128(TCGv r2, TCGv r3, TCGv al, TCGv ah, TCGv bl, TCGv bh)
|
|
{
|
|
TCGv tmpl = tcg_temp_new();
|
|
TCGv tmph = tcg_temp_new();
|
|
TCGv r0 = tcg_temp_new();
|
|
TCGv r1 = tcg_temp_new();
|
|
TCGv zero = tcg_constant_tl(0);
|
|
|
|
tcg_gen_mulu2_tl(r0, r1, al, bl);
|
|
|
|
tcg_gen_mulu2_tl(tmpl, tmph, al, bh);
|
|
tcg_gen_add2_tl(r1, r2, r1, zero, tmpl, tmph);
|
|
tcg_gen_mulu2_tl(tmpl, tmph, ah, bl);
|
|
tcg_gen_add2_tl(r1, tmph, r1, r2, tmpl, tmph);
|
|
/* Overflow detection into r3 */
|
|
tcg_gen_setcond_tl(TCG_COND_LTU, r3, tmph, r2);
|
|
|
|
tcg_gen_mov_tl(r2, tmph);
|
|
|
|
tcg_gen_mulu2_tl(tmpl, tmph, ah, bh);
|
|
tcg_gen_add2_tl(r2, r3, r2, r3, tmpl, tmph);
|
|
|
|
tcg_temp_free(tmpl);
|
|
tcg_temp_free(tmph);
|
|
}
|
|
|
|
static void gen_mul_i128(TCGv rl, TCGv rh,
|
|
TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
|
|
{
|
|
TCGv tmpl = tcg_temp_new();
|
|
TCGv tmph = tcg_temp_new();
|
|
TCGv tmpx = tcg_temp_new();
|
|
TCGv zero = tcg_constant_tl(0);
|
|
|
|
tcg_gen_mulu2_tl(rl, rh, rs1l, rs2l);
|
|
tcg_gen_mulu2_tl(tmpl, tmph, rs1l, rs2h);
|
|
tcg_gen_add2_tl(rh, tmpx, rh, zero, tmpl, tmph);
|
|
tcg_gen_mulu2_tl(tmpl, tmph, rs1h, rs2l);
|
|
tcg_gen_add2_tl(rh, tmph, rh, tmpx, tmpl, tmph);
|
|
|
|
tcg_temp_free(tmpl);
|
|
tcg_temp_free(tmph);
|
|
tcg_temp_free(tmpx);
|
|
}
|
|
|
|
static bool trans_mul(DisasContext *ctx, arg_mul *a)
|
|
{
|
|
REQUIRE_M_OR_ZMMUL(ctx);
|
|
return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, gen_mul_i128);
|
|
}
|
|
|
|
static void gen_mulh_i128(TCGv rl, TCGv rh,
|
|
TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
|
|
{
|
|
TCGv t0l = tcg_temp_new();
|
|
TCGv t0h = tcg_temp_new();
|
|
TCGv t1l = tcg_temp_new();
|
|
TCGv t1h = tcg_temp_new();
|
|
|
|
gen_mulhu_i128(rl, rh, rs1l, rs1h, rs2l, rs2h);
|
|
tcg_gen_sari_tl(t0h, rs1h, 63);
|
|
tcg_gen_and_tl(t0l, t0h, rs2l);
|
|
tcg_gen_and_tl(t0h, t0h, rs2h);
|
|
tcg_gen_sari_tl(t1h, rs2h, 63);
|
|
tcg_gen_and_tl(t1l, t1h, rs1l);
|
|
tcg_gen_and_tl(t1h, t1h, rs1h);
|
|
tcg_gen_sub2_tl(t0l, t0h, rl, rh, t0l, t0h);
|
|
tcg_gen_sub2_tl(rl, rh, t0l, t0h, t1l, t1h);
|
|
|
|
tcg_temp_free(t0l);
|
|
tcg_temp_free(t0h);
|
|
tcg_temp_free(t1l);
|
|
tcg_temp_free(t1h);
|
|
}
|
|
|
|
static void gen_mulh(TCGv ret, TCGv s1, TCGv s2)
|
|
{
|
|
TCGv discard = tcg_temp_new();
|
|
|
|
tcg_gen_muls2_tl(discard, ret, s1, s2);
|
|
tcg_temp_free(discard);
|
|
}
|
|
|
|
static void gen_mulh_w(TCGv ret, TCGv s1, TCGv s2)
|
|
{
|
|
tcg_gen_mul_tl(ret, s1, s2);
|
|
tcg_gen_sari_tl(ret, ret, 32);
|
|
}
|
|
|
|
static bool trans_mulh(DisasContext *ctx, arg_mulh *a)
|
|
{
|
|
REQUIRE_M_OR_ZMMUL(ctx);
|
|
return gen_arith_per_ol(ctx, a, EXT_SIGN, gen_mulh, gen_mulh_w,
|
|
gen_mulh_i128);
|
|
}
|
|
|
|
static void gen_mulhsu_i128(TCGv rl, TCGv rh,
|
|
TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
|
|
{
|
|
|
|
TCGv t0l = tcg_temp_new();
|
|
TCGv t0h = tcg_temp_new();
|
|
|
|
gen_mulhu_i128(rl, rh, rs1l, rs1h, rs2l, rs2h);
|
|
tcg_gen_sari_tl(t0h, rs1h, 63);
|
|
tcg_gen_and_tl(t0l, t0h, rs2l);
|
|
tcg_gen_and_tl(t0h, t0h, rs2h);
|
|
tcg_gen_sub2_tl(rl, rh, rl, rh, t0l, t0h);
|
|
|
|
tcg_temp_free(t0l);
|
|
tcg_temp_free(t0h);
|
|
}
|
|
|
|
static void gen_mulhsu(TCGv ret, TCGv arg1, TCGv arg2)
|
|
{
|
|
TCGv rl = tcg_temp_new();
|
|
TCGv rh = tcg_temp_new();
|
|
|
|
tcg_gen_mulu2_tl(rl, rh, arg1, arg2);
|
|
/* fix up for one negative */
|
|
tcg_gen_sari_tl(rl, arg1, TARGET_LONG_BITS - 1);
|
|
tcg_gen_and_tl(rl, rl, arg2);
|
|
tcg_gen_sub_tl(ret, rh, rl);
|
|
|
|
tcg_temp_free(rl);
|
|
tcg_temp_free(rh);
|
|
}
|
|
|
|
static void gen_mulhsu_w(TCGv ret, TCGv arg1, TCGv arg2)
|
|
{
|
|
TCGv t1 = tcg_temp_new();
|
|
TCGv t2 = tcg_temp_new();
|
|
|
|
tcg_gen_ext32s_tl(t1, arg1);
|
|
tcg_gen_ext32u_tl(t2, arg2);
|
|
tcg_gen_mul_tl(ret, t1, t2);
|
|
tcg_temp_free(t1);
|
|
tcg_temp_free(t2);
|
|
tcg_gen_sari_tl(ret, ret, 32);
|
|
}
|
|
|
|
static bool trans_mulhsu(DisasContext *ctx, arg_mulhsu *a)
|
|
{
|
|
REQUIRE_M_OR_ZMMUL(ctx);
|
|
return gen_arith_per_ol(ctx, a, EXT_NONE, gen_mulhsu, gen_mulhsu_w,
|
|
gen_mulhsu_i128);
|
|
}
|
|
|
|
static void gen_mulhu(TCGv ret, TCGv s1, TCGv s2)
|
|
{
|
|
TCGv discard = tcg_temp_new();
|
|
|
|
tcg_gen_mulu2_tl(discard, ret, s1, s2);
|
|
tcg_temp_free(discard);
|
|
}
|
|
|
|
static bool trans_mulhu(DisasContext *ctx, arg_mulhu *a)
|
|
{
|
|
REQUIRE_M_OR_ZMMUL(ctx);
|
|
/* gen_mulh_w works for either sign as input. */
|
|
return gen_arith_per_ol(ctx, a, EXT_ZERO, gen_mulhu, gen_mulh_w,
|
|
gen_mulhu_i128);
|
|
}
|
|
|
|
static void gen_div_i128(TCGv rdl, TCGv rdh,
|
|
TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
|
|
{
|
|
gen_helper_divs_i128(rdl, cpu_env, rs1l, rs1h, rs2l, rs2h);
|
|
tcg_gen_ld_tl(rdh, cpu_env, offsetof(CPURISCVState, retxh));
|
|
}
|
|
|
|
static void gen_div(TCGv ret, TCGv source1, TCGv source2)
|
|
{
|
|
TCGv temp1, temp2, zero, one, mone, min;
|
|
|
|
temp1 = tcg_temp_new();
|
|
temp2 = tcg_temp_new();
|
|
zero = tcg_constant_tl(0);
|
|
one = tcg_constant_tl(1);
|
|
mone = tcg_constant_tl(-1);
|
|
min = tcg_constant_tl(1ull << (TARGET_LONG_BITS - 1));
|
|
|
|
/*
|
|
* If overflow, set temp2 to 1, else source2.
|
|
* This produces the required result of min.
|
|
*/
|
|
tcg_gen_setcond_tl(TCG_COND_EQ, temp1, source1, min);
|
|
tcg_gen_setcond_tl(TCG_COND_EQ, temp2, source2, mone);
|
|
tcg_gen_and_tl(temp1, temp1, temp2);
|
|
tcg_gen_movcond_tl(TCG_COND_NE, temp2, temp1, zero, one, source2);
|
|
|
|
/*
|
|
* If div by zero, set temp1 to -1 and temp2 to 1 to
|
|
* produce the required result of -1.
|
|
*/
|
|
tcg_gen_movcond_tl(TCG_COND_EQ, temp1, source2, zero, mone, source1);
|
|
tcg_gen_movcond_tl(TCG_COND_EQ, temp2, source2, zero, one, temp2);
|
|
|
|
tcg_gen_div_tl(ret, temp1, temp2);
|
|
|
|
tcg_temp_free(temp1);
|
|
tcg_temp_free(temp2);
|
|
}
|
|
|
|
static bool trans_div(DisasContext *ctx, arg_div *a)
|
|
{
|
|
REQUIRE_EXT(ctx, RVM);
|
|
return gen_arith(ctx, a, EXT_SIGN, gen_div, gen_div_i128);
|
|
}
|
|
|
|
static void gen_divu_i128(TCGv rdl, TCGv rdh,
|
|
TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
|
|
{
|
|
gen_helper_divu_i128(rdl, cpu_env, rs1l, rs1h, rs2l, rs2h);
|
|
tcg_gen_ld_tl(rdh, cpu_env, offsetof(CPURISCVState, retxh));
|
|
}
|
|
|
|
static void gen_divu(TCGv ret, TCGv source1, TCGv source2)
|
|
{
|
|
TCGv temp1, temp2, zero, one, max;
|
|
|
|
temp1 = tcg_temp_new();
|
|
temp2 = tcg_temp_new();
|
|
zero = tcg_constant_tl(0);
|
|
one = tcg_constant_tl(1);
|
|
max = tcg_constant_tl(~0);
|
|
|
|
/*
|
|
* If div by zero, set temp1 to max and temp2 to 1 to
|
|
* produce the required result of max.
|
|
*/
|
|
tcg_gen_movcond_tl(TCG_COND_EQ, temp1, source2, zero, max, source1);
|
|
tcg_gen_movcond_tl(TCG_COND_EQ, temp2, source2, zero, one, source2);
|
|
tcg_gen_divu_tl(ret, temp1, temp2);
|
|
|
|
tcg_temp_free(temp1);
|
|
tcg_temp_free(temp2);
|
|
}
|
|
|
|
static bool trans_divu(DisasContext *ctx, arg_divu *a)
|
|
{
|
|
REQUIRE_EXT(ctx, RVM);
|
|
return gen_arith(ctx, a, EXT_ZERO, gen_divu, gen_divu_i128);
|
|
}
|
|
|
|
static void gen_rem_i128(TCGv rdl, TCGv rdh,
|
|
TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
|
|
{
|
|
gen_helper_rems_i128(rdl, cpu_env, rs1l, rs1h, rs2l, rs2h);
|
|
tcg_gen_ld_tl(rdh, cpu_env, offsetof(CPURISCVState, retxh));
|
|
}
|
|
|
|
static void gen_rem(TCGv ret, TCGv source1, TCGv source2)
|
|
{
|
|
TCGv temp1, temp2, zero, one, mone, min;
|
|
|
|
temp1 = tcg_temp_new();
|
|
temp2 = tcg_temp_new();
|
|
zero = tcg_constant_tl(0);
|
|
one = tcg_constant_tl(1);
|
|
mone = tcg_constant_tl(-1);
|
|
min = tcg_constant_tl(1ull << (TARGET_LONG_BITS - 1));
|
|
|
|
/*
|
|
* If overflow, set temp1 to 0, else source1.
|
|
* This avoids a possible host trap, and produces the required result of 0.
|
|
*/
|
|
tcg_gen_setcond_tl(TCG_COND_EQ, temp1, source1, min);
|
|
tcg_gen_setcond_tl(TCG_COND_EQ, temp2, source2, mone);
|
|
tcg_gen_and_tl(temp1, temp1, temp2);
|
|
tcg_gen_movcond_tl(TCG_COND_NE, temp1, temp1, zero, zero, source1);
|
|
|
|
/*
|
|
* If div by zero, set temp2 to 1, else source2.
|
|
* This avoids a possible host trap, but produces an incorrect result.
|
|
*/
|
|
tcg_gen_movcond_tl(TCG_COND_EQ, temp2, source2, zero, one, source2);
|
|
|
|
tcg_gen_rem_tl(temp1, temp1, temp2);
|
|
|
|
/* If div by zero, the required result is the original dividend. */
|
|
tcg_gen_movcond_tl(TCG_COND_EQ, ret, source2, zero, source1, temp1);
|
|
|
|
tcg_temp_free(temp1);
|
|
tcg_temp_free(temp2);
|
|
}
|
|
|
|
static bool trans_rem(DisasContext *ctx, arg_rem *a)
|
|
{
|
|
REQUIRE_EXT(ctx, RVM);
|
|
return gen_arith(ctx, a, EXT_SIGN, gen_rem, gen_rem_i128);
|
|
}
|
|
|
|
static void gen_remu_i128(TCGv rdl, TCGv rdh,
|
|
TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
|
|
{
|
|
gen_helper_remu_i128(rdl, cpu_env, rs1l, rs1h, rs2l, rs2h);
|
|
tcg_gen_ld_tl(rdh, cpu_env, offsetof(CPURISCVState, retxh));
|
|
}
|
|
|
|
static void gen_remu(TCGv ret, TCGv source1, TCGv source2)
|
|
{
|
|
TCGv temp, zero, one;
|
|
|
|
temp = tcg_temp_new();
|
|
zero = tcg_constant_tl(0);
|
|
one = tcg_constant_tl(1);
|
|
|
|
/*
|
|
* If div by zero, set temp to 1, else source2.
|
|
* This avoids a possible host trap, but produces an incorrect result.
|
|
*/
|
|
tcg_gen_movcond_tl(TCG_COND_EQ, temp, source2, zero, one, source2);
|
|
|
|
tcg_gen_remu_tl(temp, source1, temp);
|
|
|
|
/* If div by zero, the required result is the original dividend. */
|
|
tcg_gen_movcond_tl(TCG_COND_EQ, ret, source2, zero, source1, temp);
|
|
|
|
tcg_temp_free(temp);
|
|
}
|
|
|
|
static bool trans_remu(DisasContext *ctx, arg_remu *a)
|
|
{
|
|
REQUIRE_EXT(ctx, RVM);
|
|
return gen_arith(ctx, a, EXT_ZERO, gen_remu, gen_remu_i128);
|
|
}
|
|
|
|
static bool trans_mulw(DisasContext *ctx, arg_mulw *a)
|
|
{
|
|
REQUIRE_64_OR_128BIT(ctx);
|
|
REQUIRE_M_OR_ZMMUL(ctx);
|
|
ctx->ol = MXL_RV32;
|
|
return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, NULL);
|
|
}
|
|
|
|
static bool trans_divw(DisasContext *ctx, arg_divw *a)
|
|
{
|
|
REQUIRE_64_OR_128BIT(ctx);
|
|
REQUIRE_EXT(ctx, RVM);
|
|
ctx->ol = MXL_RV32;
|
|
return gen_arith(ctx, a, EXT_SIGN, gen_div, NULL);
|
|
}
|
|
|
|
static bool trans_divuw(DisasContext *ctx, arg_divuw *a)
|
|
{
|
|
REQUIRE_64_OR_128BIT(ctx);
|
|
REQUIRE_EXT(ctx, RVM);
|
|
ctx->ol = MXL_RV32;
|
|
return gen_arith(ctx, a, EXT_ZERO, gen_divu, NULL);
|
|
}
|
|
|
|
static bool trans_remw(DisasContext *ctx, arg_remw *a)
|
|
{
|
|
REQUIRE_64_OR_128BIT(ctx);
|
|
REQUIRE_EXT(ctx, RVM);
|
|
ctx->ol = MXL_RV32;
|
|
return gen_arith(ctx, a, EXT_SIGN, gen_rem, NULL);
|
|
}
|
|
|
|
static bool trans_remuw(DisasContext *ctx, arg_remuw *a)
|
|
{
|
|
REQUIRE_64_OR_128BIT(ctx);
|
|
REQUIRE_EXT(ctx, RVM);
|
|
ctx->ol = MXL_RV32;
|
|
return gen_arith(ctx, a, EXT_ZERO, gen_remu, NULL);
|
|
}
|
|
|
|
static bool trans_muld(DisasContext *ctx, arg_muld *a)
|
|
{
|
|
REQUIRE_128BIT(ctx);
|
|
REQUIRE_M_OR_ZMMUL(ctx);
|
|
ctx->ol = MXL_RV64;
|
|
return gen_arith(ctx, a, EXT_SIGN, tcg_gen_mul_tl, NULL);
|
|
}
|
|
|
|
static bool trans_divd(DisasContext *ctx, arg_divd *a)
|
|
{
|
|
REQUIRE_128BIT(ctx);
|
|
REQUIRE_EXT(ctx, RVM);
|
|
ctx->ol = MXL_RV64;
|
|
return gen_arith(ctx, a, EXT_SIGN, gen_div, NULL);
|
|
}
|
|
|
|
static bool trans_divud(DisasContext *ctx, arg_divud *a)
|
|
{
|
|
REQUIRE_128BIT(ctx);
|
|
REQUIRE_EXT(ctx, RVM);
|
|
ctx->ol = MXL_RV64;
|
|
return gen_arith(ctx, a, EXT_ZERO, gen_divu, NULL);
|
|
}
|
|
|
|
static bool trans_remd(DisasContext *ctx, arg_remd *a)
|
|
{
|
|
REQUIRE_128BIT(ctx);
|
|
REQUIRE_EXT(ctx, RVM);
|
|
ctx->ol = MXL_RV64;
|
|
return gen_arith(ctx, a, EXT_SIGN, gen_rem, NULL);
|
|
}
|
|
|
|
static bool trans_remud(DisasContext *ctx, arg_remud *a)
|
|
{
|
|
REQUIRE_128BIT(ctx);
|
|
REQUIRE_EXT(ctx, RVM);
|
|
ctx->ol = MXL_RV64;
|
|
return gen_arith(ctx, a, EXT_ZERO, gen_remu, NULL);
|
|
}
|