Significanly improve accuracy of FYL2X and FYL2XP1

Optimize FMUL instructions
This commit is contained in:
Stanislav Shwartsman 2004-07-31 20:19:38 +00:00
parent 789ed4da04
commit a7eef1d526
3 changed files with 97 additions and 11 deletions

View File

@ -217,8 +217,7 @@ invalid:
float128 x = normalizeRoundAndPackFloat128(0, aExp+0x3FEF, aSig, 0, status);
x = poly_l2(x, status);
x = float128_add(x, floatx80_to_float128(int32_to_floatx80(ExpDiff), status), status);
floatx80 r = float128_to_floatx80(x, status);
return floatx80_mul(r, b, status);
return floatx80_mul(b, x, status);
}
// =================================================
@ -348,6 +347,5 @@ invalid:
float128 x = normalizeRoundAndPackFloat128(aSign, aExp-0x10, aSig, 0, status);
x = poly_l2p1(x, status);
floatx80 r = float128_to_floatx80(x, status);
return floatx80_mul(r, b, status);
return floatx80_mul(b, x, status);
}

View File

@ -2569,6 +2569,7 @@ floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status_t &status)
// handle unsupported extended double-precision floating encodings
if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
{
invalid:
float_raise(status, float_flag_invalid);
return floatx80_default_nan;
}
@ -2587,18 +2588,18 @@ floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status_t &status)
{
return propagateFloatx80NaN(a, b, status);
}
if ((bExp | bSig) == 0) goto invalid;
if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
if (bExp == 0) {
if (bSig == 0) goto invalid;
float_raise(status, float_flag_denormal);
}
return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
}
if (bExp == 0x7FFF) {
if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
if ((aExp | aSig) == 0) {
invalid:
float_raise(status, float_flag_invalid);
return floatx80_default_nan;
if (aExp == 0) {
if (aSig == 0) goto invalid;
float_raise(status, float_flag_denormal);
}
if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
}
if (aExp == 0) {
@ -2848,6 +2849,88 @@ floatx80 float128_to_floatx80(float128 a, float_status_t &status)
return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
}
/*----------------------------------------------------------------------------
| Returns the result of multiplying the extended double-precision floating-
| point value `a' and quadruple-precision floating point value `b'. The
| operation is performed according to the IEC/IEEE Standard for Binary
| Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
floatx80 floatx80_mul(floatx80 a, float128 b, float_status_t &status)
{
Bit32s aExp, bExp, zExp;
Bit64u aSig, bSig0, bSig1, zSig0, zSig1, zSig2;
int aSign, bSign, zSign;
// handle unsupported extended double-precision floating encodings
if (floatx80_is_unsupported(a))
{
invalid:
float_raise(status, float_flag_invalid);
return floatx80_default_nan;
}
aSig = extractFloatx80Frac(a);
aExp = extractFloatx80Exp(a);
aSign = extractFloatx80Sign(a);
bSig0 = extractFloat128Frac0(b);
bSig1 = extractFloat128Frac1(b);
bExp = extractFloat128Exp(b);
bSign = extractFloat128Sign(b);
zSign = aSign ^ bSign;
if (aExp == 0x7FFF) {
if ((Bit64u) (aSig<<1)
|| ((bExp == 0x7FFF) && (bSig0 | bSig1)))
{
floatx80 r = commonNaNToFloatx80(float128ToCommonNaN(b, status));
return propagateFloatx80NaN(a, r, status);
}
if (bExp == 0) {
if ((bSig0 | bSig1) == 0) goto invalid;
float_raise(status, float_flag_denormal);
}
return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
}
if (bExp == 0x7FFF) {
if (bSig0 | bSig1) {
floatx80 r = commonNaNToFloatx80(float128ToCommonNaN(b, status));
return propagateFloatx80NaN(a, r, status);
}
if (aExp == 0) {
if (aSig == 0) goto invalid;
float_raise(status, float_flag_denormal);
}
return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
}
if (aExp == 0) {
if (aSig == 0) {
if ((bExp == 0) && (bSig0 | bSig1)) float_raise(status, float_flag_denormal);
return packFloatx80(zSign, 0, 0);
}
float_raise(status, float_flag_denormal);
normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
}
if (bExp == 0) {
if ((bSig0 | bSig1) == 0) return packFloatx80(zSign, 0, 0);
float_raise(status, float_flag_denormal);
normalizeFloat128Subnormal(bSig0, bSig1, &bExp, &bSig0, &bSig1);
}
else bSig0 |= BX_CONST64(0x0001000000000000);
zExp = aExp + bExp - 0x3FFE;
shortShift128Left(bSig0, bSig1, 15, &bSig0, &bSig1);
mul128By64To192(bSig0, bSig1, aSig, &zSig0, &zSig1, &zSig2);
if (0 < (Bit64s) zSig0) {
shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
--zExp;
}
return
roundAndPackFloatx80(get_float_rounding_precision(status),
zSign, zExp, zSig0, zSig1, status);
}
/*----------------------------------------------------------------------------
| Returns the result of adding the absolute values of the quadruple-precision
| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated

View File

@ -320,6 +320,11 @@ struct float128 {
float128 floatx80_to_float128(floatx80 a, float_status_t &status);
floatx80 float128_to_floatx80(float128 a, float_status_t &status);
/*----------------------------------------------------------------------------
| Software IEC/IEEE extended double-precision operations.
*----------------------------------------------------------------------------*/
floatx80 floatx80_mul(floatx80 a, float128 b, float_status_t &status);
/*----------------------------------------------------------------------------
| Software IEC/IEEE quadruple-precision operations.
*----------------------------------------------------------------------------*/