target-ppc: VSX Stage 4: Add Scalar SP Fused Multiply-Adds
This patch adds the Single Precision VSX Scalar Fused Multiply-Add instructions: xsmaddasp, xsmaddmsp, xssubasp, xssubmsp, xsnmaddasp, xsnmaddmsp, xsnmsubasp, xsnmsubmsp. The existing VSX_MADD() macro is modified to support rounding of the intermediate double precision result to single precision. Signed-off-by: Tom Musta <tommusta@gmail.com> Reviewed-by: Richard Henderson <rth@twiddle.net> Signed-off-by: Alexander Graf <agraf@suse.de>
This commit is contained in:
parent
968e76bcab
commit
f53f81e08b
@ -2192,7 +2192,7 @@ VSX_TSQRT(xvtsqrtsp, 4, float32, f32, -126, 23)
|
||||
* afrm - A form (1=A, 0=M)
|
||||
* sfprf - set FPRF
|
||||
*/
|
||||
#define VSX_MADD(op, nels, tp, fld, maddflgs, afrm, sfprf) \
|
||||
#define VSX_MADD(op, nels, tp, fld, maddflgs, afrm, sfprf, r2sp) \
|
||||
void helper_##op(CPUPPCState *env, uint32_t opcode) \
|
||||
{ \
|
||||
ppc_vsr_t xt_in, xa, xb, xt_out; \
|
||||
@ -2218,8 +2218,18 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) \
|
||||
for (i = 0; i < nels; i++) { \
|
||||
float_status tstat = env->fp_status; \
|
||||
set_float_exception_flags(0, &tstat); \
|
||||
xt_out.fld[i] = tp##_muladd(xa.fld[i], b->fld[i], c->fld[i], \
|
||||
maddflgs, &tstat); \
|
||||
if (r2sp && (tstat.float_rounding_mode == float_round_nearest_even)) {\
|
||||
/* Avoid double rounding errors by rounding the intermediate */ \
|
||||
/* result to odd. */ \
|
||||
set_float_rounding_mode(float_round_to_zero, &tstat); \
|
||||
xt_out.fld[i] = tp##_muladd(xa.fld[i], b->fld[i], c->fld[i], \
|
||||
maddflgs, &tstat); \
|
||||
xt_out.fld[i] |= (get_float_exception_flags(&tstat) & \
|
||||
float_flag_inexact) != 0; \
|
||||
} else { \
|
||||
xt_out.fld[i] = tp##_muladd(xa.fld[i], b->fld[i], c->fld[i], \
|
||||
maddflgs, &tstat); \
|
||||
} \
|
||||
env->fp_status.float_exception_flags |= tstat.float_exception_flags; \
|
||||
\
|
||||
if (unlikely(tstat.float_exception_flags & float_flag_invalid)) { \
|
||||
@ -2242,6 +2252,11 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) \
|
||||
fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, sfprf); \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
if (r2sp) { \
|
||||
xt_out.fld[i] = helper_frsp(env, xt_out.fld[i]); \
|
||||
} \
|
||||
\
|
||||
if (sfprf) { \
|
||||
helper_compute_fprf(env, xt_out.fld[i], sfprf); \
|
||||
} \
|
||||
@ -2255,32 +2270,41 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) \
|
||||
#define NMADD_FLGS float_muladd_negate_result
|
||||
#define NMSUB_FLGS (float_muladd_negate_c | float_muladd_negate_result)
|
||||
|
||||
VSX_MADD(xsmaddadp, 1, float64, f64, MADD_FLGS, 1, 1)
|
||||
VSX_MADD(xsmaddmdp, 1, float64, f64, MADD_FLGS, 0, 1)
|
||||
VSX_MADD(xsmsubadp, 1, float64, f64, MSUB_FLGS, 1, 1)
|
||||
VSX_MADD(xsmsubmdp, 1, float64, f64, MSUB_FLGS, 0, 1)
|
||||
VSX_MADD(xsnmaddadp, 1, float64, f64, NMADD_FLGS, 1, 1)
|
||||
VSX_MADD(xsnmaddmdp, 1, float64, f64, NMADD_FLGS, 0, 1)
|
||||
VSX_MADD(xsnmsubadp, 1, float64, f64, NMSUB_FLGS, 1, 1)
|
||||
VSX_MADD(xsnmsubmdp, 1, float64, f64, NMSUB_FLGS, 0, 1)
|
||||
VSX_MADD(xsmaddadp, 1, float64, f64, MADD_FLGS, 1, 1, 0)
|
||||
VSX_MADD(xsmaddmdp, 1, float64, f64, MADD_FLGS, 0, 1, 0)
|
||||
VSX_MADD(xsmsubadp, 1, float64, f64, MSUB_FLGS, 1, 1, 0)
|
||||
VSX_MADD(xsmsubmdp, 1, float64, f64, MSUB_FLGS, 0, 1, 0)
|
||||
VSX_MADD(xsnmaddadp, 1, float64, f64, NMADD_FLGS, 1, 1, 0)
|
||||
VSX_MADD(xsnmaddmdp, 1, float64, f64, NMADD_FLGS, 0, 1, 0)
|
||||
VSX_MADD(xsnmsubadp, 1, float64, f64, NMSUB_FLGS, 1, 1, 0)
|
||||
VSX_MADD(xsnmsubmdp, 1, float64, f64, NMSUB_FLGS, 0, 1, 0)
|
||||
|
||||
VSX_MADD(xvmaddadp, 2, float64, f64, MADD_FLGS, 1, 0)
|
||||
VSX_MADD(xvmaddmdp, 2, float64, f64, MADD_FLGS, 0, 0)
|
||||
VSX_MADD(xvmsubadp, 2, float64, f64, MSUB_FLGS, 1, 0)
|
||||
VSX_MADD(xvmsubmdp, 2, float64, f64, MSUB_FLGS, 0, 0)
|
||||
VSX_MADD(xvnmaddadp, 2, float64, f64, NMADD_FLGS, 1, 0)
|
||||
VSX_MADD(xvnmaddmdp, 2, float64, f64, NMADD_FLGS, 0, 0)
|
||||
VSX_MADD(xvnmsubadp, 2, float64, f64, NMSUB_FLGS, 1, 0)
|
||||
VSX_MADD(xvnmsubmdp, 2, float64, f64, NMSUB_FLGS, 0, 0)
|
||||
VSX_MADD(xsmaddasp, 1, float64, f64, MADD_FLGS, 1, 1, 1)
|
||||
VSX_MADD(xsmaddmsp, 1, float64, f64, MADD_FLGS, 0, 1, 1)
|
||||
VSX_MADD(xsmsubasp, 1, float64, f64, MSUB_FLGS, 1, 1, 1)
|
||||
VSX_MADD(xsmsubmsp, 1, float64, f64, MSUB_FLGS, 0, 1, 1)
|
||||
VSX_MADD(xsnmaddasp, 1, float64, f64, NMADD_FLGS, 1, 1, 1)
|
||||
VSX_MADD(xsnmaddmsp, 1, float64, f64, NMADD_FLGS, 0, 1, 1)
|
||||
VSX_MADD(xsnmsubasp, 1, float64, f64, NMSUB_FLGS, 1, 1, 1)
|
||||
VSX_MADD(xsnmsubmsp, 1, float64, f64, NMSUB_FLGS, 0, 1, 1)
|
||||
|
||||
VSX_MADD(xvmaddasp, 4, float32, f32, MADD_FLGS, 1, 0)
|
||||
VSX_MADD(xvmaddmsp, 4, float32, f32, MADD_FLGS, 0, 0)
|
||||
VSX_MADD(xvmsubasp, 4, float32, f32, MSUB_FLGS, 1, 0)
|
||||
VSX_MADD(xvmsubmsp, 4, float32, f32, MSUB_FLGS, 0, 0)
|
||||
VSX_MADD(xvnmaddasp, 4, float32, f32, NMADD_FLGS, 1, 0)
|
||||
VSX_MADD(xvnmaddmsp, 4, float32, f32, NMADD_FLGS, 0, 0)
|
||||
VSX_MADD(xvnmsubasp, 4, float32, f32, NMSUB_FLGS, 1, 0)
|
||||
VSX_MADD(xvnmsubmsp, 4, float32, f32, NMSUB_FLGS, 0, 0)
|
||||
VSX_MADD(xvmaddadp, 2, float64, f64, MADD_FLGS, 1, 0, 0)
|
||||
VSX_MADD(xvmaddmdp, 2, float64, f64, MADD_FLGS, 0, 0, 0)
|
||||
VSX_MADD(xvmsubadp, 2, float64, f64, MSUB_FLGS, 1, 0, 0)
|
||||
VSX_MADD(xvmsubmdp, 2, float64, f64, MSUB_FLGS, 0, 0, 0)
|
||||
VSX_MADD(xvnmaddadp, 2, float64, f64, NMADD_FLGS, 1, 0, 0)
|
||||
VSX_MADD(xvnmaddmdp, 2, float64, f64, NMADD_FLGS, 0, 0, 0)
|
||||
VSX_MADD(xvnmsubadp, 2, float64, f64, NMSUB_FLGS, 1, 0, 0)
|
||||
VSX_MADD(xvnmsubmdp, 2, float64, f64, NMSUB_FLGS, 0, 0, 0)
|
||||
|
||||
VSX_MADD(xvmaddasp, 4, float32, f32, MADD_FLGS, 1, 0, 0)
|
||||
VSX_MADD(xvmaddmsp, 4, float32, f32, MADD_FLGS, 0, 0, 0)
|
||||
VSX_MADD(xvmsubasp, 4, float32, f32, MSUB_FLGS, 1, 0, 0)
|
||||
VSX_MADD(xvmsubmsp, 4, float32, f32, MSUB_FLGS, 0, 0, 0)
|
||||
VSX_MADD(xvnmaddasp, 4, float32, f32, NMADD_FLGS, 1, 0, 0)
|
||||
VSX_MADD(xvnmaddmsp, 4, float32, f32, NMADD_FLGS, 0, 0, 0)
|
||||
VSX_MADD(xvnmsubasp, 4, float32, f32, NMSUB_FLGS, 1, 0, 0)
|
||||
VSX_MADD(xvnmsubmsp, 4, float32, f32, NMSUB_FLGS, 0, 0, 0)
|
||||
|
||||
#define VSX_SCALAR_CMP(op, ordered) \
|
||||
void helper_##op(CPUPPCState *env, uint32_t opcode) \
|
||||
|
@ -293,6 +293,14 @@ DEF_HELPER_2(xsdivsp, void, env, i32)
|
||||
DEF_HELPER_2(xsresp, void, env, i32)
|
||||
DEF_HELPER_2(xssqrtsp, void, env, i32)
|
||||
DEF_HELPER_2(xsrsqrtesp, void, env, i32)
|
||||
DEF_HELPER_2(xsmaddasp, void, env, i32)
|
||||
DEF_HELPER_2(xsmaddmsp, void, env, i32)
|
||||
DEF_HELPER_2(xsmsubasp, void, env, i32)
|
||||
DEF_HELPER_2(xsmsubmsp, void, env, i32)
|
||||
DEF_HELPER_2(xsnmaddasp, void, env, i32)
|
||||
DEF_HELPER_2(xsnmaddmsp, void, env, i32)
|
||||
DEF_HELPER_2(xsnmsubasp, void, env, i32)
|
||||
DEF_HELPER_2(xsnmsubmsp, void, env, i32)
|
||||
|
||||
DEF_HELPER_2(xvadddp, void, env, i32)
|
||||
DEF_HELPER_2(xvsubdp, void, env, i32)
|
||||
|
@ -7365,6 +7365,14 @@ GEN_VSX_HELPER_2(xsdivsp, 0x00, 0x03, 0, PPC2_VSX207)
|
||||
GEN_VSX_HELPER_2(xsresp, 0x14, 0x01, 0, PPC2_VSX207)
|
||||
GEN_VSX_HELPER_2(xssqrtsp, 0x16, 0x00, 0, PPC2_VSX207)
|
||||
GEN_VSX_HELPER_2(xsrsqrtesp, 0x14, 0x00, 0, PPC2_VSX207)
|
||||
GEN_VSX_HELPER_2(xsmaddasp, 0x04, 0x00, 0, PPC2_VSX207)
|
||||
GEN_VSX_HELPER_2(xsmaddmsp, 0x04, 0x01, 0, PPC2_VSX207)
|
||||
GEN_VSX_HELPER_2(xsmsubasp, 0x04, 0x02, 0, PPC2_VSX207)
|
||||
GEN_VSX_HELPER_2(xsmsubmsp, 0x04, 0x03, 0, PPC2_VSX207)
|
||||
GEN_VSX_HELPER_2(xsnmaddasp, 0x04, 0x10, 0, PPC2_VSX207)
|
||||
GEN_VSX_HELPER_2(xsnmaddmsp, 0x04, 0x11, 0, PPC2_VSX207)
|
||||
GEN_VSX_HELPER_2(xsnmsubasp, 0x04, 0x12, 0, PPC2_VSX207)
|
||||
GEN_VSX_HELPER_2(xsnmsubmsp, 0x04, 0x13, 0, PPC2_VSX207)
|
||||
|
||||
GEN_VSX_HELPER_2(xvadddp, 0x00, 0x0C, 0, PPC2_VSX)
|
||||
GEN_VSX_HELPER_2(xvsubdp, 0x00, 0x0D, 0, PPC2_VSX)
|
||||
@ -10179,6 +10187,14 @@ GEN_XX3FORM(xsdivsp, 0x00, 0x03, PPC2_VSX207),
|
||||
GEN_XX2FORM(xsresp, 0x14, 0x01, PPC2_VSX207),
|
||||
GEN_XX2FORM(xssqrtsp, 0x16, 0x00, PPC2_VSX207),
|
||||
GEN_XX2FORM(xsrsqrtesp, 0x14, 0x00, PPC2_VSX207),
|
||||
GEN_XX3FORM(xsmaddasp, 0x04, 0x00, PPC2_VSX207),
|
||||
GEN_XX3FORM(xsmaddmsp, 0x04, 0x01, PPC2_VSX207),
|
||||
GEN_XX3FORM(xsmsubasp, 0x04, 0x02, PPC2_VSX207),
|
||||
GEN_XX3FORM(xsmsubmsp, 0x04, 0x03, PPC2_VSX207),
|
||||
GEN_XX3FORM(xsnmaddasp, 0x04, 0x10, PPC2_VSX207),
|
||||
GEN_XX3FORM(xsnmaddmsp, 0x04, 0x11, PPC2_VSX207),
|
||||
GEN_XX3FORM(xsnmsubasp, 0x04, 0x12, PPC2_VSX207),
|
||||
GEN_XX3FORM(xsnmsubmsp, 0x04, 0x13, PPC2_VSX207),
|
||||
|
||||
GEN_XX3FORM(xvadddp, 0x00, 0x0C, PPC2_VSX),
|
||||
GEN_XX3FORM(xvsubdp, 0x00, 0x0D, PPC2_VSX),
|
||||
|
Loading…
Reference in New Issue
Block a user