target-arm: A64: Implement SIMD FP compare and set insns

This adds all forms of the SIMD floating point and set instructions:

  FCM(GT|GE|EQ|LE|LT)

Most of the heavy lifting is done by either the existing neon helpers or
some new helpers for the 64bit double cases. Most of the code paths are
common although the 2misc versions are a little special as they compare
against zero.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
[PMM: fixed some minor bugs, added the 2-misc-scalar encoding]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
This commit is contained in:
Alex Bennée 2014-02-20 10:35:49 +00:00 committed by Peter Maydell
parent b033cd3d00
commit 8908f4d185
3 changed files with 207 additions and 12 deletions

View File

@ -179,3 +179,22 @@ uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
}
return result;
}
/* 64bit/double versions of the neon float compare functions */
uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
{
float_status *fpst = fpstp;
return -float64_eq_quiet(a, b, fpst);
}
uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, void *fpstp)
{
float_status *fpst = fpstp;
return -float64_le(b, a, fpst);
}
uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp)
{
float_status *fpst = fpstp;
return -float64_lt(b, a, fpst);
}

View File

@ -29,3 +29,6 @@ DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr)
DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32)
DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
DEF_HELPER_FLAGS_3(neon_cge_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
DEF_HELPER_FLAGS_3(neon_cgt_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)

View File

@ -77,6 +77,8 @@ typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
/* initialize TCG globals. */
void a64_translate_init(void)
@ -6049,6 +6051,9 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
case 0x1a: /* FADD */
gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
break;
case 0x1c: /* FCMEQ */
gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
break;
case 0x1e: /* FMAX */
gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
break;
@ -6064,6 +6069,9 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
case 0x5b: /* FMUL */
gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
break;
case 0x5c: /* FCMGE */
gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
break;
case 0x5f: /* FDIV */
gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
break;
@ -6071,6 +6079,9 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
gen_helper_vfp_absd(tcg_res, tcg_res);
break;
case 0x7c: /* FCMGT */
gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
break;
default:
g_assert_not_reached();
}
@ -6093,6 +6104,9 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
case 0x1a: /* FADD */
gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
break;
case 0x1c: /* FCMEQ */
gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
break;
case 0x1e: /* FMAX */
gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
break;
@ -6111,6 +6125,9 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
case 0x5b: /* FMUL */
gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
break;
case 0x5c: /* FCMGE */
gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
break;
case 0x5f: /* FDIV */
gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
break;
@ -6118,6 +6135,9 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
gen_helper_vfp_abss(tcg_res, tcg_res);
break;
case 0x7c: /* FCMGT */
gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
break;
default:
g_assert_not_reached();
}
@ -6168,15 +6188,15 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
switch (fpopcode) {
case 0x1b: /* FMULX */
case 0x1c: /* FCMEQ */
case 0x1f: /* FRECPS */
case 0x3f: /* FRSQRTS */
case 0x5c: /* FCMGE */
case 0x5d: /* FACGE */
case 0x7c: /* FCMGT */
case 0x7d: /* FACGT */
unsupported_encoding(s, insn);
return;
case 0x1c: /* FCMEQ */
case 0x5c: /* FCMGE */
case 0x7c: /* FCMGT */
case 0x7a: /* FABD */
break;
default:
@ -6361,6 +6381,115 @@ static void handle_2misc_64(DisasContext *s, int opcode, bool u,
}
}
static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
bool is_scalar, bool is_u, bool is_q,
int size, int rn, int rd)
{
bool is_double = (size == 3);
TCGv_ptr fpst = get_fpstatus_ptr();
if (is_double) {
TCGv_i64 tcg_op = tcg_temp_new_i64();
TCGv_i64 tcg_zero = tcg_const_i64(0);
TCGv_i64 tcg_res = tcg_temp_new_i64();
NeonGenTwoDoubleOPFn *genfn;
bool swap = false;
int pass;
switch (opcode) {
case 0x2e: /* FCMLT (zero) */
swap = true;
/* fallthrough */
case 0x2c: /* FCMGT (zero) */
genfn = gen_helper_neon_cgt_f64;
break;
case 0x2d: /* FCMEQ (zero) */
genfn = gen_helper_neon_ceq_f64;
break;
case 0x6d: /* FCMLE (zero) */
swap = true;
/* fall through */
case 0x6c: /* FCMGE (zero) */
genfn = gen_helper_neon_cge_f64;
break;
default:
g_assert_not_reached();
}
for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
read_vec_element(s, tcg_op, rn, pass, MO_64);
if (swap) {
genfn(tcg_res, tcg_zero, tcg_op, fpst);
} else {
genfn(tcg_res, tcg_op, tcg_zero, fpst);
}
write_vec_element(s, tcg_res, rd, pass, MO_64);
}
if (is_scalar) {
clear_vec_high(s, rd);
}
tcg_temp_free_i64(tcg_res);
tcg_temp_free_i64(tcg_zero);
tcg_temp_free_i64(tcg_op);
} else {
TCGv_i32 tcg_op = tcg_temp_new_i32();
TCGv_i32 tcg_zero = tcg_const_i32(0);
TCGv_i32 tcg_res = tcg_temp_new_i32();
NeonGenTwoSingleOPFn *genfn;
bool swap = false;
int pass, maxpasses;
switch (opcode) {
case 0x2e: /* FCMLT (zero) */
swap = true;
/* fall through */
case 0x2c: /* FCMGT (zero) */
genfn = gen_helper_neon_cgt_f32;
break;
case 0x2d: /* FCMEQ (zero) */
genfn = gen_helper_neon_ceq_f32;
break;
case 0x6d: /* FCMLE (zero) */
swap = true;
/* fall through */
case 0x6c: /* FCMGE (zero) */
genfn = gen_helper_neon_cge_f32;
break;
default:
g_assert_not_reached();
}
if (is_scalar) {
maxpasses = 1;
} else {
maxpasses = is_q ? 4 : 2;
}
for (pass = 0; pass < maxpasses; pass++) {
read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
if (swap) {
genfn(tcg_res, tcg_zero, tcg_op, fpst);
} else {
genfn(tcg_res, tcg_op, tcg_zero, fpst);
}
if (is_scalar) {
write_fp_sreg(s, rd, tcg_res);
} else {
write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
}
}
tcg_temp_free_i32(tcg_res);
tcg_temp_free_i32(tcg_zero);
tcg_temp_free_i32(tcg_op);
if (!is_q && !is_scalar) {
clear_vec_high(s, rd);
}
}
tcg_temp_free_ptr(fpst);
}
/* C3.6.12 AdvSIMD scalar two reg misc
* 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
* +-----+---+-----------+------+-----------+--------+-----+------+------+
@ -6390,9 +6519,47 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
return;
}
break;
case 0xc ... 0xf:
case 0x16 ... 0x1d:
case 0x1f:
/* Floating point: U, size[1] and opcode indicate operation;
* size[0] indicates single or double precision.
*/
opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
size = extract32(size, 0, 1) ? 3 : 2;
switch (opcode) {
case 0x2c: /* FCMGT (zero) */
case 0x2d: /* FCMEQ (zero) */
case 0x2e: /* FCMLT (zero) */
case 0x6c: /* FCMGE (zero) */
case 0x6d: /* FCMLE (zero) */
handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
return;
case 0x1a: /* FCVTNS */
case 0x1b: /* FCVTMS */
case 0x1c: /* FCVTAS */
case 0x1d: /* SCVTF */
case 0x3a: /* FCVTPS */
case 0x3b: /* FCVTZS */
case 0x3d: /* FRECPE */
case 0x3f: /* FRECPX */
case 0x56: /* FCVTXN, FCVTXN2 */
case 0x5a: /* FCVTNU */
case 0x5b: /* FCVTMU */
case 0x5c: /* FCVTAU */
case 0x5d: /* UCVTF */
case 0x7a: /* FCVTPU */
case 0x7b: /* FCVTZU */
case 0x7d: /* FRSQRTE */
unsupported_encoding(s, insn);
return;
default:
unallocated_encoding(s);
return;
}
break;
default:
/* Other categories of encoding in this class:
* + floating point (single and double)
* + SUQADD/USQADD/SQABS/SQNEG : size 8, 16, 32 or 64
* + SQXTN/SQXTN2/SQXTUN/SQXTUN2/UQXTN/UQXTN2:
* narrowing saturate ops: size 64/32/16 -> 32/16/8
@ -7101,12 +7268,9 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
unsupported_encoding(s, insn);
return;
case 0x1b: /* FMULX */
case 0x1c: /* FCMEQ */
case 0x1f: /* FRECPS */
case 0x3f: /* FRSQRTS */
case 0x5c: /* FCMGE */
case 0x5d: /* FACGE */
case 0x7c: /* FCMGT */
case 0x7d: /* FACGT */
case 0x19: /* FMLA */
case 0x39: /* FMLS */
@ -7114,13 +7278,16 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
return;
case 0x18: /* FMAXNM */
case 0x1a: /* FADD */
case 0x1c: /* FCMEQ */
case 0x1e: /* FMAX */
case 0x38: /* FMINNM */
case 0x3a: /* FSUB */
case 0x3e: /* FMIN */
case 0x5b: /* FMUL */
case 0x5c: /* FCMGE */
case 0x5f: /* FDIV */
case 0x7a: /* FABD */
case 0x7c: /* FCMGT */
handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
return;
default:
@ -7700,6 +7867,17 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
return;
}
break;
case 0x2c: /* FCMGT (zero) */
case 0x2d: /* FCMEQ (zero) */
case 0x2e: /* FCMLT (zero) */
case 0x6c: /* FCMGE (zero) */
case 0x6d: /* FCMLE (zero) */
if (size == 3 && !is_q) {
unallocated_encoding(s);
return;
}
handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
return;
case 0x16: /* FCVTN, FCVTN2 */
case 0x17: /* FCVTL, FCVTL2 */
case 0x18: /* FRINTN */
@ -7708,9 +7886,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
case 0x1b: /* FCVTMS */
case 0x1c: /* FCVTAS */
case 0x1d: /* SCVTF */
case 0x2c: /* FCMGT (zero) */
case 0x2d: /* FCMEQ (zero) */
case 0x2e: /* FCMLT (zero) */
case 0x38: /* FRINTP */
case 0x39: /* FRINTZ */
case 0x3a: /* FCVTPS */
@ -7724,8 +7899,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
case 0x5b: /* FCVTMU */
case 0x5c: /* FCVTAU */
case 0x5d: /* UCVTF */
case 0x6c: /* FCMGE (zero) */
case 0x6d: /* FCMLE (zero) */
case 0x79: /* FRINTI */
case 0x7a: /* FCVTPU */
case 0x7b: /* FCVTZU */