target/arm: Implement VFP fp16 VRINT*

Implement the fp16 version of the VFP VRINT* insns.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200828183354.27913-19-peter.maydell@linaro.org
This commit is contained in:
Peter Maydell 2020-08-28 19:33:27 +01:00
parent 11e78fecdf
commit 0a6f4b4cb3
5 changed files with 122 additions and 8 deletions

View File

@ -242,8 +242,10 @@ DEF_HELPER_3(shr_cc, i32, env, i32, i32)
DEF_HELPER_3(sar_cc, i32, env, i32, i32) DEF_HELPER_3(sar_cc, i32, env, i32, i32)
DEF_HELPER_3(ror_cc, i32, env, i32, i32) DEF_HELPER_3(ror_cc, i32, env, i32, i32)
DEF_HELPER_FLAGS_2(rinth_exact, TCG_CALL_NO_RWG, f16, f16, ptr)
DEF_HELPER_FLAGS_2(rints_exact, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(rints_exact, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(rintd_exact, TCG_CALL_NO_RWG, f64, f64, ptr) DEF_HELPER_FLAGS_2(rintd_exact, TCG_CALL_NO_RWG, f64, f64, ptr)
DEF_HELPER_FLAGS_2(rinth, TCG_CALL_NO_RWG, f16, f16, ptr)
DEF_HELPER_FLAGS_2(rints, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(rints, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(rintd, TCG_CALL_NO_RWG, f64, f64, ptr) DEF_HELPER_FLAGS_2(rintd, TCG_CALL_NO_RWG, f64, f64, ptr)

View File

@ -341,7 +341,7 @@ static const uint8_t fp_decode_rm[] = {
static bool trans_VRINT(DisasContext *s, arg_VRINT *a) static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
{ {
uint32_t rd, rm; uint32_t rd, rm;
bool dp = a->dp; int sz = a->sz;
TCGv_ptr fpst; TCGv_ptr fpst;
TCGv_i32 tcg_rmode; TCGv_i32 tcg_rmode;
int rounding = fp_decode_rm[a->rm]; int rounding = fp_decode_rm[a->rm];
@ -350,12 +350,16 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
return false; return false;
} }
if (dp && !dc_isar_feature(aa32_fpdp_v2, s)) { if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
return false;
}
if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
return false; return false;
} }
/* UNDEF accesses to D16-D31 if they don't exist */ /* UNDEF accesses to D16-D31 if they don't exist */
if (dp && !dc_isar_feature(aa32_simd_r32, s) && if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
((a->vm | a->vd) & 0x10)) { ((a->vm | a->vd) & 0x10)) {
return false; return false;
} }
@ -367,12 +371,16 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
return true; return true;
} }
fpst = fpstatus_ptr(FPST_FPCR); if (sz == 1) {
fpst = fpstatus_ptr(FPST_FPCR_F16);
} else {
fpst = fpstatus_ptr(FPST_FPCR);
}
tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding)); tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
if (dp) { if (sz == 3) {
TCGv_i64 tcg_op; TCGv_i64 tcg_op;
TCGv_i64 tcg_res; TCGv_i64 tcg_res;
tcg_op = tcg_temp_new_i64(); tcg_op = tcg_temp_new_i64();
@ -388,7 +396,11 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
tcg_op = tcg_temp_new_i32(); tcg_op = tcg_temp_new_i32();
tcg_res = tcg_temp_new_i32(); tcg_res = tcg_temp_new_i32();
neon_load_reg32(tcg_op, rm); neon_load_reg32(tcg_op, rm);
gen_helper_rints(tcg_res, tcg_op, fpst); if (sz == 1) {
gen_helper_rinth(tcg_res, tcg_op, fpst);
} else {
gen_helper_rints(tcg_res, tcg_op, fpst);
}
neon_store_reg32(tcg_res, rd); neon_store_reg32(tcg_res, rd);
tcg_temp_free_i32(tcg_op); tcg_temp_free_i32(tcg_op);
tcg_temp_free_i32(tcg_res); tcg_temp_free_i32(tcg_res);
@ -2638,6 +2650,29 @@ static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
return true; return true;
} }
static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
{
TCGv_ptr fpst;
TCGv_i32 tmp;
if (!dc_isar_feature(aa32_fp16_arith, s)) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
tmp = tcg_temp_new_i32();
neon_load_reg32(tmp, a->vm);
fpst = fpstatus_ptr(FPST_FPCR_F16);
gen_helper_rinth(tmp, tmp, fpst);
neon_store_reg32(tmp, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(tmp);
return true;
}
static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a) static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
{ {
TCGv_ptr fpst; TCGv_ptr fpst;
@ -2693,6 +2728,34 @@ static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
return true; return true;
} }
static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
{
TCGv_ptr fpst;
TCGv_i32 tmp;
TCGv_i32 tcg_rmode;
if (!dc_isar_feature(aa32_fp16_arith, s)) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
tmp = tcg_temp_new_i32();
neon_load_reg32(tmp, a->vm);
fpst = fpstatus_ptr(FPST_FPCR_F16);
tcg_rmode = tcg_const_i32(float_round_to_zero);
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
gen_helper_rinth(tmp, tmp, fpst);
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
neon_store_reg32(tmp, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(tcg_rmode);
tcg_temp_free_i32(tmp);
return true;
}
static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a) static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
{ {
TCGv_ptr fpst; TCGv_ptr fpst;
@ -2758,6 +2821,29 @@ static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
return true; return true;
} }
static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
{
TCGv_ptr fpst;
TCGv_i32 tmp;
if (!dc_isar_feature(aa32_fp16_arith, s)) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
tmp = tcg_temp_new_i32();
neon_load_reg32(tmp, a->vm);
fpst = fpstatus_ptr(FPST_FPCR_F16);
gen_helper_rinth_exact(tmp, tmp, fpst);
neon_store_reg32(tmp, a->vd);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(tmp);
return true;
}
static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a) static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
{ {
TCGv_ptr fpst; TCGv_ptr fpst;

View File

@ -60,10 +60,12 @@ VMINNM_sp 1111 1110 1.00 .... .... 1010 .1.0 .... @vfp_dnm_s
VMAXNM_dp 1111 1110 1.00 .... .... 1011 .0.0 .... @vfp_dnm_d VMAXNM_dp 1111 1110 1.00 .... .... 1011 .0.0 .... @vfp_dnm_d
VMINNM_dp 1111 1110 1.00 .... .... 1011 .1.0 .... @vfp_dnm_d VMINNM_dp 1111 1110 1.00 .... .... 1011 .1.0 .... @vfp_dnm_d
VRINT 1111 1110 1.11 10 rm:2 .... 1001 01.0 .... \
vm=%vm_sp vd=%vd_sp sz=1
VRINT 1111 1110 1.11 10 rm:2 .... 1010 01.0 .... \ VRINT 1111 1110 1.11 10 rm:2 .... 1010 01.0 .... \
vm=%vm_sp vd=%vd_sp dp=0 vm=%vm_sp vd=%vd_sp sz=2
VRINT 1111 1110 1.11 10 rm:2 .... 1011 01.0 .... \ VRINT 1111 1110 1.11 10 rm:2 .... 1011 01.0 .... \
vm=%vm_dp vd=%vd_dp dp=1 vm=%vm_dp vd=%vd_dp sz=3
# VCVT float to int with specified rounding mode; Vd is always single-precision # VCVT float to int with specified rounding mode; Vd is always single-precision
VCVT 1111 1110 1.11 11 rm:2 .... 1001 op:1 1.0 .... \ VCVT 1111 1110 1.11 11 rm:2 .... 1001 op:1 1.0 .... \

View File

@ -195,12 +195,15 @@ VCVT_f16_f32 ---- 1110 1.11 0011 .... 1010 t:1 1.0 .... \
VCVT_f16_f64 ---- 1110 1.11 0011 .... 1011 t:1 1.0 .... \ VCVT_f16_f64 ---- 1110 1.11 0011 .... 1011 t:1 1.0 .... \
vd=%vd_sp vm=%vm_dp vd=%vd_sp vm=%vm_dp
VRINTR_hp ---- 1110 1.11 0110 .... 1001 01.0 .... @vfp_dm_ss
VRINTR_sp ---- 1110 1.11 0110 .... 1010 01.0 .... @vfp_dm_ss VRINTR_sp ---- 1110 1.11 0110 .... 1010 01.0 .... @vfp_dm_ss
VRINTR_dp ---- 1110 1.11 0110 .... 1011 01.0 .... @vfp_dm_dd VRINTR_dp ---- 1110 1.11 0110 .... 1011 01.0 .... @vfp_dm_dd
VRINTZ_hp ---- 1110 1.11 0110 .... 1001 11.0 .... @vfp_dm_ss
VRINTZ_sp ---- 1110 1.11 0110 .... 1010 11.0 .... @vfp_dm_ss VRINTZ_sp ---- 1110 1.11 0110 .... 1010 11.0 .... @vfp_dm_ss
VRINTZ_dp ---- 1110 1.11 0110 .... 1011 11.0 .... @vfp_dm_dd VRINTZ_dp ---- 1110 1.11 0110 .... 1011 11.0 .... @vfp_dm_dd
VRINTX_hp ---- 1110 1.11 0111 .... 1001 01.0 .... @vfp_dm_ss
VRINTX_sp ---- 1110 1.11 0111 .... 1010 01.0 .... @vfp_dm_ss VRINTX_sp ---- 1110 1.11 0111 .... 1010 01.0 .... @vfp_dm_ss
VRINTX_dp ---- 1110 1.11 0111 .... 1011 01.0 .... @vfp_dm_dd VRINTX_dp ---- 1110 1.11 0111 .... 1011 01.0 .... @vfp_dm_dd

View File

@ -1019,6 +1019,11 @@ float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c, void *fpstp)
} }
/* ARMv8 round to integral */ /* ARMv8 round to integral */
dh_ctype_f16 HELPER(rinth_exact)(dh_ctype_f16 x, void *fp_status)
{
return float16_round_to_int(x, fp_status);
}
float32 HELPER(rints_exact)(float32 x, void *fp_status) float32 HELPER(rints_exact)(float32 x, void *fp_status)
{ {
return float32_round_to_int(x, fp_status); return float32_round_to_int(x, fp_status);
@ -1029,6 +1034,22 @@ float64 HELPER(rintd_exact)(float64 x, void *fp_status)
return float64_round_to_int(x, fp_status); return float64_round_to_int(x, fp_status);
} }
dh_ctype_f16 HELPER(rinth)(dh_ctype_f16 x, void *fp_status)
{
int old_flags = get_float_exception_flags(fp_status), new_flags;
float16 ret;
ret = float16_round_to_int(x, fp_status);
/* Suppress any inexact exceptions the conversion produced */
if (!(old_flags & float_flag_inexact)) {
new_flags = get_float_exception_flags(fp_status);
set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
}
return ret;
}
float32 HELPER(rints)(float32 x, void *fp_status) float32 HELPER(rints)(float32 x, void *fp_status)
{ {
int old_flags = get_float_exception_flags(fp_status), new_flags; int old_flags = get_float_exception_flags(fp_status), new_flags;