Added shape of implementation for last missing VSCALEF* AVX-512 instructons.

The softfloat implementation is still missing (only corner cases are supported).
Extend softfloat floatNN_class methods to distinguish between SNaN and QNaN.
This commit is contained in:
Stanislav Shwartsman 2014-03-09 21:42:11 +00:00
parent aad090d03c
commit 02e19de346
14 changed files with 235 additions and 99 deletions

View File

@ -69,6 +69,7 @@ EVEX_OP_PACKED_SINGLE(VMULPS_MASK_VpsHpsWpsR, xmm_mulps_mask)
EVEX_OP_PACKED_SINGLE(VDIVPS_MASK_VpsHpsWpsR, xmm_divps_mask)
EVEX_OP_PACKED_SINGLE(VMAXPS_MASK_VpsHpsWpsR, xmm_maxps_mask)
EVEX_OP_PACKED_SINGLE(VMINPS_MASK_VpsHpsWpsR, xmm_minps_mask)
EVEX_OP_PACKED_SINGLE(VSCALEFPS_MASK_VpsHpsWpsR, xmm_scalefps_mask)
#define EVEX_OP_PACKED_DOUBLE(HANDLER, func) \
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
@ -105,6 +106,7 @@ EVEX_OP_PACKED_DOUBLE(VMULPD_MASK_VpdHpdWpdR, xmm_mulpd_mask)
EVEX_OP_PACKED_DOUBLE(VDIVPD_MASK_VpdHpdWpdR, xmm_divpd_mask)
EVEX_OP_PACKED_DOUBLE(VMAXPD_MASK_VpdHpdWpdR, xmm_maxpd_mask)
EVEX_OP_PACKED_DOUBLE(VMINPD_MASK_VpdHpdWpdR, xmm_minpd_mask)
EVEX_OP_PACKED_DOUBLE(VSCALEFPD_MASK_VpdHpdWpdR, xmm_scalefpd_mask)
#define EVEX_OP_SCALAR_SINGLE(HANDLER, func) \
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
@ -137,6 +139,7 @@ EVEX_OP_SCALAR_SINGLE(VMULSS_MASK_VssHpsWssR, float32_mul)
EVEX_OP_SCALAR_SINGLE(VDIVSS_MASK_VssHpsWssR, float32_div)
EVEX_OP_SCALAR_SINGLE(VMINSS_MASK_VssHpsWssR, float32_min)
EVEX_OP_SCALAR_SINGLE(VMAXSS_MASK_VssHpsWssR, float32_max)
EVEX_OP_SCALAR_SINGLE(VSCALEFSS_MASK_VssHpsWssR, float32_scalef)
#define EVEX_OP_SCALAR_DOUBLE(HANDLER, func) \
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
@ -169,6 +172,7 @@ EVEX_OP_SCALAR_DOUBLE(VMULSD_MASK_VsdHpdWsdR, float64_mul)
EVEX_OP_SCALAR_DOUBLE(VDIVSD_MASK_VsdHpdWsdR, float64_div)
EVEX_OP_SCALAR_DOUBLE(VMINSD_MASK_VsdHpdWsdR, float64_min)
EVEX_OP_SCALAR_DOUBLE(VMAXSD_MASK_VsdHpdWsdR, float64_max)
EVEX_OP_SCALAR_SINGLE(VSCALEFSD_MASK_VsdHpdWsdR, float64_scalef)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSQRTPS_MASK_VpsWpsR(bxInstruction_c *i)
{
@ -416,14 +420,13 @@ float32 float32_fixupimm(float32 dst, float32 op1, Bit32u op2, unsigned imm8, fl
ie_fault_mask = 0x80;
break;
case float_NaN:
if (float32_is_signaling_nan(tmp_op1)) {
token = BX_FIXUPIMM_SNAN_TOKEN;
ie_fault_mask = 0x10;
}
else {
token = BX_FIXUPIMM_QNAN_TOKEN;
}
case float_SNaN:
token = BX_FIXUPIMM_SNAN_TOKEN;
ie_fault_mask = 0x10;
break;
case float_QNaN:
token = BX_FIXUPIMM_QNAN_TOKEN;
break;
case float_denormal:
@ -512,14 +515,13 @@ float64 float64_fixupimm(float64 dst, float64 op1, Bit32u op2, unsigned imm8, fl
ie_fault_mask = 0x80;
break;
case float_NaN:
if (float64_is_signaling_nan(tmp_op1)) {
token = BX_FIXUPIMM_SNAN_TOKEN;
ie_fault_mask = 0x10;
}
else {
token = BX_FIXUPIMM_QNAN_TOKEN;
}
case float_SNaN:
token = BX_FIXUPIMM_SNAN_TOKEN;
ie_fault_mask = 0x10;
break;
case float_QNaN:
token = BX_FIXUPIMM_QNAN_TOKEN;
break;
case float_denormal:
@ -1112,34 +1114,4 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VRNDSCALESD_MASK_VsdHpdWsdIbR(bxIn
BX_NEXT_INSTR(i);
}
// scale
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFPS_MASK_VpsWpsR(bxInstruction_c *i)
{
BX_PANIC(("%s: AVX-512 instruction still not implemented", i->getIaOpcodeNameShort()));
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFSS_MASK_VssHpsWssR(bxInstruction_c *i)
{
BX_PANIC(("%s: AVX-512 instruction still not implemented", i->getIaOpcodeNameShort()));
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFPD_MASK_VpdWpdR(bxInstruction_c *i)
{
BX_PANIC(("%s: AVX-512 instruction still not implemented", i->getIaOpcodeNameShort()));
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFSD_MASK_VsdHpdWsdR(bxInstruction_c *i)
{
BX_PANIC(("%s: AVX-512 instruction still not implemented", i->getIaOpcodeNameShort()));
BX_NEXT_INSTR(i);
}
#endif

View File

@ -8281,7 +8281,8 @@ float32 approximate_rcp14(float32 op, const float_status_t &status)
case float_positive_inf:
return packFloat32(sign, 0, 0);
case float_NaN:
case float_SNaN:
case float_QNaN:
return convert_to_QNaN(op);
// the rcp14 handle denormals properly
@ -8332,7 +8333,8 @@ float64 approximate_rcp14(float64 op, const float_status_t &status)
case float_positive_inf:
return packFloat64(sign, 0, 0);
case float_NaN:
case float_SNaN:
case float_QNaN:
return convert_to_QNaN(op);
// the rcp14 handle denormals properly

View File

@ -8257,7 +8257,8 @@ float32 approximate_rsqrt14(float32 op, bx_bool daz)
case float_negative_inf:
return float32_default_nan;
case float_NaN:
case float_SNaN:
case float_QNaN:
return convert_to_QNaN(op);
case float_denormal:
@ -8307,7 +8308,8 @@ float64 approximate_rsqrt14(float64 op, bx_bool daz)
case float_negative_inf:
return float64_default_nan;
case float_NaN:
case float_SNaN:
case float_QNaN:
return convert_to_QNaN(op);
case float_denormal:

View File

@ -3261,8 +3261,8 @@ public: // for now...
BX_SMF BX_INSF_TYPE VRNDSCALESS_MASK_VssHpsWssIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VRNDSCALESD_MASK_VsdHpdWsdIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VSCALEFPS_MASK_VpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VSCALEFPD_MASK_VpdWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VSCALEFPS_MASK_VpsHpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VSCALEFPD_MASK_VpdHpdWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VSCALEFSS_MASK_VssHpsWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VSCALEFSD_MASK_VsdHpdWsdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);

View File

@ -1250,8 +1250,8 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
/* 2A */ { 0, BX_IA_ERROR }, // #UD
/* 2B k0 */ { 0, BX_IA_ERROR },
/* 2B */ { 0, BX_IA_ERROR },
/* 2C k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFPS_VpsWps_Kmask },
/* 2C */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFPS_VpsWps_Kmask },
/* 2C k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFPS_VpsHpsWps_Kmask },
/* 2C */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFPS_VpsHpsWps_Kmask },
/* 2D k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFSS_VssHpsWss_Kmask },
/* 2D */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFSS_VssHpsWss_Kmask },
/* 2E k0 */ { 0, BX_IA_ERROR },

View File

@ -536,7 +536,7 @@ void BX_CPU_C::print_state_FPU(void)
"32", "RES", "64", "80"
};
static const char* fp_class[] = {
"ZERO", "xNAN", "-INF", "+INF", "DENORMAL", "NORMAL"
"ZERO", "SNAN", "QNAN", "-INF", "+INF", "DENORMAL", "NORMAL"
};
Bit32u reg;
@ -601,7 +601,7 @@ void BX_CPU_C::print_state_FPU(void)
i==tos?"=>":" ", i, (i-tos)&7,
"v0se"[tag],
fp.exp & 0xffff, GET32H(fp.fraction), GET32L(fp.fraction),
f, (f_class == float_NaN) ? (floatx80_is_signaling_nan(fp) ? "SNAN" : "QNAN") : fp_class[f_class]);
f, fp_class[f_class]);
}
}

View File

@ -529,7 +529,8 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::FXAM(bxInstruction_c *i)
setcc(FPU_SW_C3|FPU_SW_C1);
break;
case float_NaN:
case float_SNaN:
case float_QNaN:
// unsupported handled as NaNs
if (floatx80_is_unsupported(reg)) {
setcc(FPU_SW_C1);

View File

@ -708,6 +708,66 @@ float32 float32_getmant(float32 a, float_status_t &status, int sign_ctrl, int in
return packFloat32(~sign_ctrl & aSign, aExp, aSig);
}
/*----------------------------------------------------------------------------
| Return the result of a floating point scale of the single-precision floating
| point value `a' by multiplying it by 2 power of the single-precision
| floating point value 'b' converted to integral value. If the result cannot
| be represented in single precision, then the proper overflow response (for
| positive scaling operand), or the proper underflow response (for negative
| scaling operand) is issued. The operation is performed according to the
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
float32 float32_scalef(float32 a, float32 b, float_status_t &status)
{
Bit32u aSig = extractFloat32Frac(a);
Bit16s aExp = extractFloat32Exp(a);
int aSign = extractFloat32Sign(a);
Bit32u bSig = extractFloat32Frac(b);
Bit16s bExp = extractFloat32Exp(b);
int bSign = extractFloat32Sign(b);
if (get_denormals_are_zeros(status)) {
if (aExp == 0) aSig = 0;
if (bExp == 0) bSig = 0;
}
if (bExp == 0xFF) {
if (bSig) return propagateFloat32NaN(a, b, status);
}
if (aExp == 0xFF) {
if (aSig) {
int aIsSignalingNaN = (aSig & 0x00400000) == 0;
if (aIsSignalingNaN || bExp != 0xFF || bSig)
return propagateFloat32NaN(a, b, status);
return bSign ? 0 : float32_positive_inf;
}
if (bExp == 0xFF && bSign) {
float_raise(status, float_flag_invalid);
return float32_default_nan;
}
return a;
}
if ((aExp | aSig) == 0) {
if (bExp == 0xFF && ! bSign) {
float_raise(status, float_flag_invalid);
return float32_default_nan;
}
return a;
}
if (bExp == 0xFF) {
if (bSign) return packFloat32(aSign, 0, 0);
return packFloat32(aSign, 0xFF, 0);
}
return 0; // fixme
}
/*----------------------------------------------------------------------------
| Returns the result of adding the absolute values of the single-precision
| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
@ -1136,7 +1196,7 @@ float32 float32_sqrt(float32 a, float_status_t &status)
}
/*----------------------------------------------------------------------------
| Determine single-precision floating-point number class
| Determine single-precision floating-point number class.
*----------------------------------------------------------------------------*/
float_class_t float32_class(float32 a)
@ -1149,7 +1209,7 @@ float_class_t float32_class(float32 a)
if (aSig == 0)
return (aSign) ? float_negative_inf : float_positive_inf;
return float_NaN;
return (aSig & 0x00400000) ? float_QNaN : float_SNaN;
}
if(aExp == 0) {
@ -1178,13 +1238,13 @@ int float32_compare(float32 a, float32 b, float_status_t &status)
float_class_t aClass = float32_class(a);
float_class_t bClass = float32_class(b);
if (aClass == float_NaN || bClass == float_NaN) {
if (aClass == float_SNaN || aClass == float_QNaN || bClass == float_SNaN || bClass == float_QNaN)
{
float_raise(status, float_flag_invalid);
return float_relation_unordered;
}
if (aClass == float_denormal || bClass == float_denormal)
{
if (aClass == float_denormal || bClass == float_denormal) {
float_raise(status, float_flag_denormal);
}
@ -1218,17 +1278,16 @@ int float32_compare_quiet(float32 a, float32 b, float_status_t &status)
float_class_t aClass = float32_class(a);
float_class_t bClass = float32_class(b);
if (aClass == float_NaN || bClass == float_NaN)
{
if (float32_is_signaling_nan(a) || float32_is_signaling_nan(b))
{
float_raise(status, float_flag_invalid);
}
if (aClass == float_SNaN || bClass == float_SNaN) {
float_raise(status, float_flag_invalid);
return float_relation_unordered;
}
if (aClass == float_denormal || bClass == float_denormal)
{
if (aClass == float_QNaN || bClass == float_QNaN) {
return float_relation_unordered;
}
if (aClass == float_denormal || bClass == float_denormal) {
float_raise(status, float_flag_denormal);
}
@ -1815,6 +1874,66 @@ float64 float64_getmant(float64 a, float_status_t &status, int sign_ctrl, int in
return packFloat64(~sign_ctrl & aSign, aExp, aSig);
}
/*----------------------------------------------------------------------------
| Return the result of a floating point scale of the double-precision floating
| point value `a' by multiplying it by 2 power of the double-precision
| floating point value 'b' converted to integral value. If the result cannot
| be represented in double precision, then the proper overflow response (for
| positive scaling operand), or the proper underflow response (for negative
| scaling operand) is issued. The operation is performed according to the
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
float64 float64_scalef(float64 a, float64 b, float_status_t &status)
{
Bit64u aSig = extractFloat64Frac(a);
Bit16s aExp = extractFloat64Exp(a);
int aSign = extractFloat64Sign(a);
Bit64u bSig = extractFloat64Frac(b);
Bit16s bExp = extractFloat64Exp(b);
int bSign = extractFloat64Sign(b);
if (get_denormals_are_zeros(status)) {
if (aExp == 0) aSig = 0;
if (bExp == 0) bSig = 0;
}
if (bExp == 0x7FF) {
if (bSig) return propagateFloat64NaN(a, b, status);
}
if (aExp == 0x7FF) {
if (aSig) {
int aIsSignalingNaN = (aSig & BX_CONST64(0x0008000000000000)) == 0;
if (aIsSignalingNaN || bExp != 0x7FF || bSig)
return propagateFloat64NaN(a, b, status);
return bSign ? 0 : float64_positive_inf;
}
if (bExp == 0x7FF && bSign) {
float_raise(status, float_flag_invalid);
return float64_default_nan;
}
return a;
}
if ((aExp | aSig) == 0) {
if (bExp == 0x7FF && ! bSign) {
float_raise(status, float_flag_invalid);
return float64_default_nan;
}
return a;
}
if (bExp == 0x7FF) {
if (bSign) return packFloat64(aSign, 0, 0);
return packFloat64(aSign, 0x7FF, 0);
}
return 0; // fixme
}
/*----------------------------------------------------------------------------
| Returns the result of adding the absolute values of the double-precision
| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
@ -2258,7 +2377,7 @@ float_class_t float64_class(float64 a)
if (aSig == 0)
return (aSign) ? float_negative_inf : float_positive_inf;
return float_NaN;
return (aSig & BX_CONST64(0x0008000000000000)) ? float_QNaN : float_SNaN;
}
if(aExp == 0) {
@ -2288,13 +2407,13 @@ int float64_compare(float64 a, float64 b, float_status_t &status)
float_class_t aClass = float64_class(a);
float_class_t bClass = float64_class(b);
if (aClass == float_NaN || bClass == float_NaN) {
if (aClass == float_SNaN || aClass == float_QNaN || bClass == float_SNaN || bClass == float_QNaN)
{
float_raise(status, float_flag_invalid);
return float_relation_unordered;
}
if (aClass == float_denormal || bClass == float_denormal)
{
if (aClass == float_denormal || bClass == float_denormal) {
float_raise(status, float_flag_denormal);
}
@ -2328,17 +2447,16 @@ int float64_compare_quiet(float64 a, float64 b, float_status_t &status)
float_class_t aClass = float64_class(a);
float_class_t bClass = float64_class(b);
if (aClass == float_NaN || bClass == float_NaN)
{
if (float64_is_signaling_nan(a) || float64_is_signaling_nan(b))
{
float_raise(status, float_flag_invalid);
}
if (aClass == float_SNaN || bClass == float_SNaN) {
float_raise(status, float_flag_invalid);
return float_relation_unordered;
}
if (aClass == float_denormal || bClass == float_denormal)
{
if (aClass == float_QNaN || bClass == float_QNaN) {
return float_relation_unordered;
}
if (aClass == float_denormal || bClass == float_denormal) {
float_raise(status, float_flag_denormal);
}

View File

@ -54,7 +54,8 @@ typedef Bit64u float64;
*----------------------------------------------------------------------------*/
typedef enum {
float_zero,
float_NaN,
float_SNaN,
float_QNaN,
float_negative_inf,
float_positive_inf,
float_denormal,
@ -264,6 +265,7 @@ float32 float32_div(float32, float32, float_status_t &status);
float32 float32_sqrt(float32, float_status_t &status);
float32 float32_frc(float32, float_status_t &status);
float32 float32_muladd(float32, float32, float32, int flags, float_status_t &status);
float32 float32_scalef(float32, float32, float_status_t &status);
BX_CPP_INLINE float32 float32_round_to_int(float32 a, float_status_t &status)
{
@ -329,6 +331,7 @@ float64 float64_div(float64, float64, float_status_t &status);
float64 float64_sqrt(float64, float_status_t &status);
float64 float64_frc(float64, float_status_t &status);
float64 float64_muladd(float64, float64, float64, int flags, float_status_t &status);
float64 float64_scalef(float64, float64, float_status_t &status);
BX_CPP_INLINE float64 float64_round_to_int(float64 a, float_status_t &status)
{

View File

@ -54,7 +54,7 @@ float_class_t float16_class(float16 a)
if (aSig == 0)
return (aSign) ? float_negative_inf : float_positive_inf;
return float_NaN;
return (aSig & 0x200) ? float_QNaN : float_SNaN;
}
if(aExp == 0) {

View File

@ -224,7 +224,7 @@ float_class_t floatx80_class(floatx80 a)
/* valid numbers have the MS bit set */
if (!(aSig & BX_CONST64(0x8000000000000000)))
return float_NaN; /* report unsupported as NaNs */
return float_SNaN; /* report unsupported as SNaNs */
if(aExp == 0x7fff) {
int aSign = extractFloatx80Sign(a);
@ -232,7 +232,7 @@ float_class_t floatx80_class(floatx80 a)
if (((Bit64u) (aSig<< 1)) == 0)
return (aSign) ? float_negative_inf : float_positive_inf;
return float_NaN;
return (aSig & BX_CONST64(0xC000000000000000)) ? float_QNaN : float_SNaN;
}
return float_normalized;
@ -251,14 +251,13 @@ int floatx80_compare(floatx80 a, floatx80 b, float_status_t &status)
float_class_t aClass = floatx80_class(a);
float_class_t bClass = floatx80_class(b);
if (aClass == float_NaN || bClass == float_NaN)
if (aClass == float_SNaN || aClass == float_QNaN || bClass == float_SNaN || bClass == float_QNaN)
{
float_raise(status, float_flag_invalid);
return float_relation_unordered;
}
if (aClass == float_denormal || bClass == float_denormal)
{
if (aClass == float_denormal || bClass == float_denormal) {
float_raise(status, float_flag_denormal);
}
@ -310,19 +309,20 @@ int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status_t &status)
float_class_t aClass = floatx80_class(a);
float_class_t bClass = floatx80_class(b);
if (aClass == float_NaN || bClass == float_NaN)
if (aClass == float_SNaN || bClass == float_SNaN)
{
if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
float_raise(status, float_flag_invalid);
if (floatx80_is_signaling_nan(a) || floatx80_is_signaling_nan(b))
float_raise(status, float_flag_invalid);
float_raise(status, float_flag_invalid);
return float_relation_unordered;
}
if (aClass == float_denormal || bClass == float_denormal)
{
if (aClass == float_QNaN || bClass == float_QNaN) {
return float_relation_unordered;
}
if (aClass == float_denormal || bClass == float_denormal) {
float_raise(status, float_flag_denormal);
}

View File

@ -2952,8 +2952,8 @@ bx_define_opcode(BX_IA_V512_VGETMANTPD_VpdWpdIb_Kmask, &BX_CPU_C::LOAD_BROADCAST
bx_define_opcode(BX_IA_V512_VGETMANTSS_VssHpsWssIb_Kmask, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VGETMANTSS_MASK_VssHpsWssIbR, BX_ISA_AVX512, OP_Vss, OP_Hps, OP_mVss, OP_Ib, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VGETMANTSD_VsdHpdWsdIb_Kmask, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VGETMANTSD_MASK_VsdHpdWsdIbR, BX_ISA_AVX512, OP_Vsd, OP_Hpd, OP_mVsd, OP_Ib, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VSCALEFPS_VpsWps_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorD, &BX_CPU_C::VSCALEFPS_MASK_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_mVps, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VSCALEFPD_VpdWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorQ, &BX_CPU_C::VSCALEFPD_MASK_VpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_mVpd, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VSCALEFPS_VpsHpsWps_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorD, &BX_CPU_C::VSCALEFPS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_mVps, OP_NONE, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VSCALEFPD_VpdHpdWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorQ, &BX_CPU_C::VSCALEFPD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_mVpd, OP_NONE, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VSCALEFSS_VssHpsWss_Kmask, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VSCALEFSS_MASK_VssHpsWssR, BX_ISA_AVX512, OP_Vss, OP_Hps, OP_mVss, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VSCALEFSD_VsdHpdWsd_Kmask, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VSCALEFSD_MASK_VsdHpdWsdR, BX_ISA_AVX512, OP_Vsd, OP_Hpd, OP_mVsd, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)

View File

@ -694,4 +694,40 @@ BX_CPP_INLINE void xmm_getexppd_mask(BxPackedXmmRegister *op, float_status_t &st
}
}
// scalef
BX_CPP_INLINE void xmm_scalefps(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, float_status_t &status)
{
for (unsigned n=0;n<4;n++) {
op1->xmm32u(n) = float32_scalef(op1->xmm32u(n), op2->xmm32u(n), status);
}
}
BX_CPP_INLINE void xmm_scalefps_mask(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, float_status_t &status, Bit32u mask)
{
for (unsigned n=0; n < 4; n++, mask >>= 1) {
if (mask & 0x1)
op1->xmm32u(n) = float32_scalef(op1->xmm32u(n), op2->xmm32u(n), status);
else
op1->xmm32u(n) = 0;
}
}
BX_CPP_INLINE void xmm_scalefpd(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, float_status_t &status)
{
for (unsigned n=0;n<2;n++) {
op1->xmm64u(n) = float64_scalef(op1->xmm64u(n), op2->xmm64u(n), status);
}
}
BX_CPP_INLINE void xmm_scalefpd_mask(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, float_status_t &status, Bit32u mask)
{
for (unsigned n=0; n < 2; n++, mask >>= 1) {
if (mask & 0x1)
op1->xmm64u(n) = float64_scalef(op1->xmm64u(n), op2->xmm64u(n), status);
else
op1->xmm64u(n) = 0;
}
}
#endif

View File

@ -304,7 +304,8 @@ float32 approximate_rcp(float32 op)
case float_positive_inf:
return packFloat32(sign, 0, 0);
case float_NaN:
case float_SNaN:
case float_QNaN:
return convert_to_QNaN(op);
case float_normalized:
@ -651,7 +652,8 @@ float32 approximate_rsqrt(float32 op)
case float_negative_inf:
return float32_default_nan;
case float_NaN:
case float_SNaN:
case float_QNaN:
return convert_to_QNaN(op);
case float_normalized: