Fast table-based implementation of reciprocal (RCPSS/RCPPS)
This implemntation is much more clear than old one. RSQRTSS/RSQRTPS coming soon.
This commit is contained in:
parent
e7e0b40bd1
commit
52d75d7aed
@ -29,7 +29,6 @@
|
||||
static void prepare_softfloat_status_word
|
||||
(softfloat_status_word_t &status, int rounding_mode)
|
||||
{
|
||||
status.float_precision = 32;
|
||||
status.float_detect_tininess = float_tininess_before_rounding;
|
||||
status.float_exception_flags = 0; // clear exceptions before execution
|
||||
status.float_nan_handling_mode = float_first_operand_nan;
|
||||
|
@ -68,6 +68,7 @@ OBJS = \
|
||||
sse.o \
|
||||
sse_move.o \
|
||||
sse_pfp.o \
|
||||
sse_rcp.o \
|
||||
soft_int.o \
|
||||
io_pro.o \
|
||||
$(APIC_OBJS) \
|
||||
@ -939,6 +940,19 @@ sse_pfp.o: sse_pfp.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h ../bx_debug/de
|
||||
../iodev/serial.h ../iodev/unmapped.h ../iodev/eth.h ../iodev/ne2k.h \
|
||||
../iodev/guest2host.h ../iodev/slowdown_timer.h ../iodev/extfpuirq.h \
|
||||
../instrument/stubs/instrument.h softfloat.h
|
||||
sse_rcp.o: sse_pfp.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h ../bx_debug/debug.h \
|
||||
../bxversion.h ../gui/siminterface.h ../state_file.h ../cpu/cpu.h \
|
||||
../cpu/lazy_flags.h ../cpu/i387.h ../cpu/xmm.h ../memory/memory.h \
|
||||
../pc_system.h ../plugin.h ../extplugin.h ../gui/gui.h \
|
||||
../gui/textconfig.h ../gui/keymap.h ../iodev/iodev.h ../iodev/pci.h \
|
||||
../iodev/pci2isa.h ../iodev/pcivga.h ../iodev/vga.h ../iodev/ioapic.h \
|
||||
../iodev/biosdev.h ../iodev/cmos.h ../iodev/dma.h ../iodev/floppy.h \
|
||||
../iodev/harddrv.h ../iodev/cdrom.h ../iodev/vmware3.h \
|
||||
../iodev/keyboard.h ../iodev/parallel.h ../iodev/pic.h ../iodev/pit.h \
|
||||
../iodev/pit_wrap.h ../iodev/pit82c54.h ../iodev/virt_timer.h \
|
||||
../iodev/serial.h ../iodev/unmapped.h ../iodev/eth.h ../iodev/ne2k.h \
|
||||
../iodev/guest2host.h ../iodev/slowdown_timer.h ../iodev/extfpuirq.h \
|
||||
../instrument/stubs/instrument.h softfloat.h softfloat-specialize.h
|
||||
stack16.o: stack16.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h ../bx_debug/debug.h \
|
||||
../bxversion.h ../gui/siminterface.h ../state_file.h ../cpu/cpu.h \
|
||||
../cpu/lazy_flags.h ../cpu/i387.h ../cpu/xmm.h ../memory/memory.h \
|
||||
|
@ -75,15 +75,6 @@ BX_CPP_INLINE int get_flush_underflow_to_zero(float_status_t &status)
|
||||
return status.flush_underflow_to_zero;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns current floating point precision.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE int get_float_precision(float_status_t &status)
|
||||
{
|
||||
return status.float_precision;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Internal canonical NaN format.
|
||||
*----------------------------------------------------------------------------*/
|
||||
@ -97,10 +88,59 @@ typedef struct {
|
||||
| The pattern for a default generated single-precision NaN.
|
||||
*----------------------------------------------------------------------------*/
|
||||
#define float32_default_nan 0xFFC00000
|
||||
/* in another version
|
||||
/*
|
||||
#define float32_default_nan 0x7FFFFFFF
|
||||
*/
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the fraction bits of the single-precision floating-point value `a'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE Bit32u extractFloat32Frac(float32 a)
|
||||
{
|
||||
return a & 0x007FFFFF;
|
||||
}
|
||||
|
||||
#define float32_fraction extractFloat32Frac
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the exponent bits of the single-precision floating-point value `a'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE Bit16s extractFloat32Exp(float32 a)
|
||||
{
|
||||
return (a>>23) & 0xFF;
|
||||
}
|
||||
|
||||
#define float32_exp extractFloat32Exp
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the sign bit of the single-precision floating-point value `a'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE flag extractFloat32Sign(float32 a)
|
||||
{
|
||||
return a>>31;
|
||||
}
|
||||
|
||||
#define float32_sign extractFloat32Sign
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
|
||||
| single-precision floating-point value, returning the result. After being
|
||||
| shifted into the proper positions, the three fields are simply added
|
||||
| together to form the result. This means that any integer portion of `zSig'
|
||||
| will be added into the exponent. Since a properly normalized significand
|
||||
| will have an integer portion equal to 1, the `zExp' input should be 1 less
|
||||
| than the desired result exponent whenever `zSig' is a complete, normalized
|
||||
| significand.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE float32 packFloat32(flag zSign, Bit16s zExp, Bit32u zSig)
|
||||
{
|
||||
return (((Bit32u) zSign)<<31) + (((Bit32u) zExp)<<23) + zSig;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns 1 if the single-precision floating-point value `a' is a NaN;
|
||||
| otherwise returns 0.
|
||||
@ -189,10 +229,59 @@ static float32 propagateFloat32NaN(float32 a, float32 b, float_status_t &status)
|
||||
| The pattern for a default generated double-precision NaN.
|
||||
*----------------------------------------------------------------------------*/
|
||||
#define float64_default_nan BX_CONST64(0xFFF8000000000000)
|
||||
/* in another version
|
||||
/*
|
||||
#define float64_default_nan BX_CONST64(0x7FFFFFFFFFFFFFFF)
|
||||
*/
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the fraction bits of the double-precision floating-point value `a'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE Bit64u extractFloat64Frac(float64 a)
|
||||
{
|
||||
return a & BX_CONST64(0x000FFFFFFFFFFFFF);
|
||||
}
|
||||
|
||||
#define float64_fraction extractFloat64Frac
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the exponent bits of the double-precision floating-point value `a'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE Bit16s extractFloat64Exp(float64 a)
|
||||
{
|
||||
return (a>>52) & 0x7FF;
|
||||
}
|
||||
|
||||
#define float64_exp extractFloat64Exp
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the sign bit of the double-precision floating-point value `a'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE flag extractFloat64Sign(float64 a)
|
||||
{
|
||||
return a>>63;
|
||||
}
|
||||
|
||||
#define float64_sign extractFloat64Sign
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
|
||||
| double-precision floating-point value, returning the result. After being
|
||||
| shifted into the proper positions, the three fields are simply added
|
||||
| together to form the result. This means that any integer portion of `zSig'
|
||||
| will be added into the exponent. Since a properly normalized significand
|
||||
| will have an integer portion equal to 1, the `zExp' input should be 1 less
|
||||
| than the desired result exponent whenever `zSig' is a complete, normalized
|
||||
| significand.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE float64 packFloat64(flag zSign, Bit16s zExp, Bit64u zSig)
|
||||
{
|
||||
return (((Bit64u) zSign)<<63) + (((Bit64u) zExp)<<52) + zSig;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns 1 if the double-precision floating-point value `a' is a NaN;
|
||||
| otherwise returns 0.
|
||||
|
@ -150,33 +150,6 @@ static Bit64s roundAndPackInt64(flag zSign, Bit64u absZ0, Bit64u absZ1, float_st
|
||||
return z;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the fraction bits of the single-precision floating-point value `a'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE Bit32u extractFloat32Frac(float32 a)
|
||||
{
|
||||
return a & 0x007FFFFF;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the exponent bits of the single-precision floating-point value `a'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE Bit16s extractFloat32Exp(float32 a)
|
||||
{
|
||||
return (a>>23) & 0xFF;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the sign bit of the single-precision floating-point value `a'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE flag extractFloat32Sign(float32 a)
|
||||
{
|
||||
return a>>31;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Determine single-precision floating-point number class
|
||||
*----------------------------------------------------------------------------*/
|
||||
@ -219,22 +192,6 @@ static void
|
||||
*zExpPtr = 1 - shiftCount;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
|
||||
| single-precision floating-point value, returning the result. After being
|
||||
| shifted into the proper positions, the three fields are simply added
|
||||
| together to form the result. This means that any integer portion of `zSig'
|
||||
| will be added into the exponent. Since a properly normalized significand
|
||||
| will have an integer portion equal to 1, the `zExp' input should be 1 less
|
||||
| than the desired result exponent whenever `zSig' is a complete, normalized
|
||||
| significand.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE float32 packFloat32(flag zSign, Bit16s zExp, Bit32u zSig)
|
||||
{
|
||||
return (((Bit32u) zSign)<<31) + (((Bit32u) zExp)<<23) + zSig;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
|
||||
| and significand `zSig', and returns the proper single-precision floating-
|
||||
@ -264,14 +221,8 @@ static float32 roundAndPackFloat32(flag zSign, Bit16s zExp, Bit32u zSig, float_s
|
||||
|
||||
roundingMode = get_float_rounding_mode(status);
|
||||
int roundNearestEven = (roundingMode == float_round_nearest_even);
|
||||
|
||||
if(get_float_precision(status) == 12) {
|
||||
roundIncrement = 0x20000;
|
||||
roundMask = 0x3FFFF;
|
||||
} else {
|
||||
roundIncrement = 0x40;
|
||||
roundMask = 0x7F;
|
||||
}
|
||||
roundIncrement = 0x40;
|
||||
roundMask = 0x7F;
|
||||
|
||||
if (! roundNearestEven) {
|
||||
if (roundingMode == float_round_to_zero) {
|
||||
@ -336,33 +287,6 @@ static float32
|
||||
return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount, status);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the fraction bits of the double-precision floating-point value `a'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE Bit64u extractFloat64Frac(float64 a)
|
||||
{
|
||||
return a & BX_CONST64(0x000FFFFFFFFFFFFF);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the exponent bits of the double-precision floating-point value `a'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE Bit16s extractFloat64Exp(float64 a)
|
||||
{
|
||||
return (a>>52) & 0x7FF;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the sign bit of the double-precision floating-point value `a'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE flag extractFloat64Sign(float64 a)
|
||||
{
|
||||
return a>>63;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Determine double-precision floating-point number class
|
||||
*----------------------------------------------------------------------------*/
|
||||
@ -405,22 +329,6 @@ static void
|
||||
*zExpPtr = 1 - shiftCount;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
|
||||
| double-precision floating-point value, returning the result. After being
|
||||
| shifted into the proper positions, the three fields are simply added
|
||||
| together to form the result. This means that any integer portion of `zSig'
|
||||
| will be added into the exponent. Since a properly normalized significand
|
||||
| will have an integer portion equal to 1, the `zExp' input should be 1 less
|
||||
| than the desired result exponent whenever `zSig' is a complete, normalized
|
||||
| significand.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE float64 packFloat64(flag zSign, Bit16s zExp, Bit64u zSig)
|
||||
{
|
||||
return (((Bit64u) zSign)<<63) + (((Bit64u) zExp)<<52) + zSig;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
|
||||
| and significand `zSig', and returns the proper double-precision floating-
|
||||
|
@ -34,6 +34,9 @@ these four paragraphs for those parts of this code that are retained.
|
||||
|
||||
#include <config.h> /* generated by configure script from config.h.in */
|
||||
|
||||
#ifndef SOFTFLOAT_H
|
||||
#define SOFTFLOAT_H
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Software IEC/IEEE floating-point types.
|
||||
*----------------------------------------------------------------------------*/
|
||||
@ -106,7 +109,6 @@ enum {
|
||||
*----------------------------------------------------------------------------*/
|
||||
struct float_status_t
|
||||
{
|
||||
int float_precision; /* 12/32/64/80 bit precision */
|
||||
int float_detect_tininess;
|
||||
int float_rounding_mode;
|
||||
int float_exception_flags;
|
||||
@ -201,3 +203,5 @@ int float64_compare_quiet(float64, float64, float_status_t &status);
|
||||
|
||||
float_class_t float64_class(float64);
|
||||
int float64_is_signaling_nan(float64);
|
||||
|
||||
#endif
|
||||
|
@ -41,9 +41,8 @@ void BX_CPU_C::check_exceptionsSSE(int exceptions_flags)
|
||||
}
|
||||
}
|
||||
|
||||
static void mxcsr_to_softfloat_status_word(softfloat_status_word_t &status, bx_mxcsr_t mxcsr, unsigned precision = 32)
|
||||
static void mxcsr_to_softfloat_status_word(softfloat_status_word_t &status, bx_mxcsr_t mxcsr)
|
||||
{
|
||||
status.float_precision = precision; // affects only float32 and float80 operations
|
||||
status.float_detect_tininess = float_tininess_before_rounding;
|
||||
status.float_exception_flags = 0; // clear exceptions before execution
|
||||
status.float_nan_handling_mode = float_first_operand_nan;
|
||||
@ -53,54 +52,6 @@ static void mxcsr_to_softfloat_status_word(softfloat_status_word_t &status, bx_m
|
||||
(mxcsr.get_flush_masked_underflow() && mxcsr.get_UM()) ? 1 : 0;
|
||||
}
|
||||
|
||||
BX_CPP_INLINE Float32 convert_to_QNaN(Float32 op)
|
||||
{
|
||||
return op | 0x00400000;
|
||||
}
|
||||
|
||||
// approximate reciprocal of scalar single precision FP
|
||||
static Float32 approximate_reciprocal(Float32 op)
|
||||
{
|
||||
softfloat_status_word_t status_word;
|
||||
float_class_t op_class = float32_class(op);
|
||||
|
||||
static const Float32 one = 0x3F800000;
|
||||
Float32 result;
|
||||
|
||||
if (op_class == float_NaN)
|
||||
{
|
||||
return convert_to_QNaN(op);
|
||||
}
|
||||
else {
|
||||
if (op_class == float_denormal)
|
||||
{
|
||||
op &= ((Bit32u)(1) << 31);
|
||||
}
|
||||
else if(op_class == float_normalized)
|
||||
{
|
||||
/*
|
||||
* for Katmai, a one will be placed in the 12th bit after decimal
|
||||
* point, and the lower bits will be cleared.
|
||||
*/
|
||||
op &= 0xFFFFF000;
|
||||
op |= 0x00000800;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate (1/1.yyyyyyyyyyy1), the result is always rounded to the
|
||||
* 12th bit after the decimal point by round-to-nearest, regardless
|
||||
* of the current rounding mode.
|
||||
*/
|
||||
|
||||
mxcsr_to_softfloat_status_word(status_word,
|
||||
bx_mxcsr_t(MXCSR_FLUSH_MASKED_UNDERFLOW | MXCSR_UM), 12);
|
||||
|
||||
result = float32_div(one, op, status_word);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// handle DAZ
|
||||
static float32 handleDAZ(float32 op)
|
||||
{
|
||||
@ -1460,92 +1411,6 @@ void BX_CPU_C::SQRTSS_VssWss(bxInstruction_c *i)
|
||||
#endif
|
||||
}
|
||||
|
||||
void BX_CPU_C::RSQRTPS_VpsWps(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_SUPPORT_SSE >= 1
|
||||
BX_CPU_THIS_PTR prepareSSE();
|
||||
|
||||
BX_PANIC(("RSQRTPS_VpsWps: SSE instruction still not implemented"));
|
||||
#else
|
||||
BX_INFO(("RSQRTPS_VpsWps: required SSE, use --enable-sse option"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
|
||||
void BX_CPU_C::RSQRTSS_VssWss(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_SUPPORT_SSE >= 1
|
||||
BX_CPU_THIS_PTR prepareSSE();
|
||||
|
||||
BX_PANIC(("RSQRTSS_VssWss: SSE instruction still not implemented"));
|
||||
#else
|
||||
BX_INFO(("RSQRTSS_VssWss: required SSE, use --enable-sse option"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Opcode: 0F 53
|
||||
* Approximate reciprocals of packed single precision FP values from XMM2/MEM.
|
||||
* Possible floating point exceptions: -
|
||||
*/
|
||||
void BX_CPU_C::RCPPS_VpsWps(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_SUPPORT_SSE >= 1
|
||||
BX_CPU_THIS_PTR prepareSSE();
|
||||
BxPackedXmmRegister op;
|
||||
|
||||
/* op is a register or memory reference */
|
||||
if (i->modC0()) {
|
||||
op = BX_READ_XMM_REG(i->rm());
|
||||
}
|
||||
else {
|
||||
/* pointer, segment address pair */
|
||||
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
|
||||
}
|
||||
|
||||
op.xmm32u(0) = approximate_reciprocal(op.xmm32u(0));
|
||||
op.xmm32u(1) = approximate_reciprocal(op.xmm32u(1));
|
||||
op.xmm32u(2) = approximate_reciprocal(op.xmm32u(2));
|
||||
op.xmm32u(3) = approximate_reciprocal(op.xmm32u(3));
|
||||
|
||||
BX_WRITE_XMM_REG(i->nnn(), op);
|
||||
|
||||
#else
|
||||
BX_INFO(("RCPPS_VpsWps: required SSE, use --enable-sse option"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Opcode: F3 0F 53
|
||||
* Approximate reciprocal of scalar single precision FP value from XMM2/MEM32.
|
||||
* Possible floating point exceptions: -
|
||||
*/
|
||||
void BX_CPU_C::RCPSS_VssWss(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_SUPPORT_SSE >= 1
|
||||
BX_CPU_THIS_PTR prepareSSE();
|
||||
Float32 op;
|
||||
|
||||
/* op is a register or memory reference */
|
||||
if (i->modC0()) {
|
||||
op = BX_READ_XMM_REG_LO_DWORD(i->rm());
|
||||
}
|
||||
else {
|
||||
/* pointer, segment address pair */
|
||||
read_virtual_dword(i->seg(), RMAddr(i), &op);
|
||||
}
|
||||
|
||||
Float32 result = approximate_reciprocal(op);
|
||||
BX_WRITE_XMM_REG_LO_DWORD(i->nnn(), result);
|
||||
|
||||
#else
|
||||
BX_INFO(("RCPSS_VssWss: required SSE, use --enable-sse option"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Opcode: 0F 58
|
||||
* Add packed single precision FP numbers from XMM2/MEM to XMM1.
|
||||
|
424
bochs/cpu/sse_rcp.cc
Executable file
424
bochs/cpu/sse_rcp.cc
Executable file
@ -0,0 +1,424 @@
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2002 Stanislav Shwartsman
|
||||
// Written by Stanislav Shwartsman <gate@fidonet.org.il>
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
// License as published by the Free Software Foundation; either
|
||||
// version 2 of the License, or (at your option) any later version.
|
||||
//
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
// Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License along with this library; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
//
|
||||
|
||||
#define NEED_CPU_REG_SHORTCUTS 1
|
||||
#include "bochs.h"
|
||||
#define LOG_THIS BX_CPU_THIS_PTR
|
||||
|
||||
#include "softfloat.h"
|
||||
#include "softfloat-specialize.h"
|
||||
|
||||
|
||||
#if BX_SUPPORT_SSE
|
||||
|
||||
BX_CPP_INLINE Float32 convert_to_QNaN(Float32 op)
|
||||
{
|
||||
return op | 0x00400000;
|
||||
}
|
||||
|
||||
static Bit16u rcp_table[2048] = {
|
||||
0x7ff0, 0x7fd0, 0x7fb0, 0x7f90, 0x7f70, 0x7f50, 0x7f30, 0x7f10,
|
||||
0x7ef0, 0x7ed0, 0x7eb0, 0x7e90, 0x7e70, 0x7e50, 0x7e30, 0x7e10,
|
||||
0x7df8, 0x7dd8, 0x7db8, 0x7d98, 0x7d78, 0x7d58, 0x7d38, 0x7d18,
|
||||
0x7cf8, 0x7cd8, 0x7cb8, 0x7c98, 0x7c80, 0x7c60, 0x7c40, 0x7c20,
|
||||
0x7c00, 0x7be0, 0x7bc0, 0x7ba0, 0x7b88, 0x7b68, 0x7b48, 0x7b28,
|
||||
0x7b08, 0x7ae8, 0x7ac8, 0x7ab0, 0x7a90, 0x7a70, 0x7a50, 0x7a30,
|
||||
0x7a10, 0x79f8, 0x79d8, 0x79b8, 0x7998, 0x7978, 0x7960, 0x7940,
|
||||
0x7920, 0x7900, 0x78e0, 0x78c8, 0x78a8, 0x7888, 0x7868, 0x7850,
|
||||
0x7830, 0x7810, 0x77f0, 0x77d8, 0x77b8, 0x7798, 0x7778, 0x7760,
|
||||
0x7740, 0x7720, 0x7700, 0x76e8, 0x76c8, 0x76a8, 0x7690, 0x7670,
|
||||
0x7650, 0x7630, 0x7618, 0x75f8, 0x75d8, 0x75c0, 0x75a0, 0x7580,
|
||||
0x7568, 0x7548, 0x7528, 0x7510, 0x74f0, 0x74d0, 0x74b8, 0x7498,
|
||||
0x7478, 0x7460, 0x7440, 0x7420, 0x7408, 0x73e8, 0x73d0, 0x73b0,
|
||||
0x7390, 0x7378, 0x7358, 0x7338, 0x7320, 0x7300, 0x72e8, 0x72c8,
|
||||
0x72a8, 0x7290, 0x7270, 0x7258, 0x7238, 0x7220, 0x7200, 0x71e0,
|
||||
0x71c8, 0x71a8, 0x7190, 0x7170, 0x7158, 0x7138, 0x7118, 0x7100,
|
||||
0x70e0, 0x70c8, 0x70a8, 0x7090, 0x7070, 0x7058, 0x7038, 0x7020,
|
||||
0x7000, 0x6fe8, 0x6fc8, 0x6fb0, 0x6f90, 0x6f78, 0x6f58, 0x6f40,
|
||||
0x6f20, 0x6f08, 0x6ee8, 0x6ed0, 0x6eb0, 0x6e98, 0x6e78, 0x6e60,
|
||||
0x6e40, 0x6e28, 0x6e08, 0x6df0, 0x6dd0, 0x6db8, 0x6da0, 0x6d80,
|
||||
0x6d68, 0x6d48, 0x6d30, 0x6d10, 0x6cf8, 0x6cd8, 0x6cc0, 0x6ca8,
|
||||
0x6c88, 0x6c70, 0x6c50, 0x6c38, 0x6c20, 0x6c00, 0x6be8, 0x6bc8,
|
||||
0x6bb0, 0x6b98, 0x6b78, 0x6b60, 0x6b40, 0x6b28, 0x6b10, 0x6af0,
|
||||
0x6ad8, 0x6ac0, 0x6aa0, 0x6a88, 0x6a70, 0x6a50, 0x6a38, 0x6a20,
|
||||
0x6a00, 0x69e8, 0x69c8, 0x69b0, 0x6998, 0x6978, 0x6960, 0x6948,
|
||||
0x6930, 0x6910, 0x68f8, 0x68e0, 0x68c0, 0x68a8, 0x6890, 0x6870,
|
||||
0x6858, 0x6840, 0x6820, 0x6808, 0x67f0, 0x67d8, 0x67b8, 0x67a0,
|
||||
0x6788, 0x6770, 0x6750, 0x6738, 0x6720, 0x6700, 0x66e8, 0x66d0,
|
||||
0x66b8, 0x6698, 0x6680, 0x6668, 0x6650, 0x6638, 0x6618, 0x6600,
|
||||
0x65e8, 0x65d0, 0x65b0, 0x6598, 0x6580, 0x6568, 0x6550, 0x6530,
|
||||
0x6518, 0x6500, 0x64e8, 0x64c8, 0x64b0, 0x6498, 0x6480, 0x6468,
|
||||
0x6450, 0x6430, 0x6418, 0x6400, 0x63e8, 0x63d0, 0x63b8, 0x6398,
|
||||
0x6380, 0x6368, 0x6350, 0x6338, 0x6320, 0x6300, 0x62e8, 0x62d0,
|
||||
0x62b8, 0x62a0, 0x6288, 0x6270, 0x6250, 0x6238, 0x6220, 0x6208,
|
||||
0x61f0, 0x61d8, 0x61c0, 0x61a8, 0x6190, 0x6170, 0x6158, 0x6140,
|
||||
0x6128, 0x6110, 0x60f8, 0x60e0, 0x60c8, 0x60b0, 0x6098, 0x6080,
|
||||
0x6060, 0x6048, 0x6030, 0x6018, 0x6000, 0x5fe8, 0x5fd0, 0x5fb8,
|
||||
0x5fa0, 0x5f88, 0x5f70, 0x5f58, 0x5f40, 0x5f28, 0x5f10, 0x5ef8,
|
||||
0x5ee0, 0x5ec8, 0x5eb0, 0x5e98, 0x5e80, 0x5e68, 0x5e50, 0x5e30,
|
||||
0x5e18, 0x5e00, 0x5de8, 0x5dd0, 0x5db8, 0x5da0, 0x5d88, 0x5d70,
|
||||
0x5d58, 0x5d40, 0x5d30, 0x5d18, 0x5d00, 0x5ce8, 0x5cd0, 0x5cb8,
|
||||
0x5ca0, 0x5c88, 0x5c70, 0x5c58, 0x5c40, 0x5c28, 0x5c10, 0x5bf8,
|
||||
0x5be0, 0x5bc8, 0x5bb0, 0x5b98, 0x5b80, 0x5b68, 0x5b50, 0x5b38,
|
||||
0x5b20, 0x5b08, 0x5af8, 0x5ae0, 0x5ac8, 0x5ab0, 0x5a98, 0x5a80,
|
||||
0x5a68, 0x5a50, 0x5a38, 0x5a20, 0x5a08, 0x59f8, 0x59e0, 0x59c8,
|
||||
0x59b0, 0x5998, 0x5980, 0x5968, 0x5950, 0x5938, 0x5928, 0x5910,
|
||||
0x58f8, 0x58e0, 0x58c8, 0x58b0, 0x5898, 0x5880, 0x5870, 0x5858,
|
||||
0x5840, 0x5828, 0x5810, 0x57f8, 0x57e0, 0x57d0, 0x57b8, 0x57a0,
|
||||
0x5788, 0x5770, 0x5758, 0x5748, 0x5730, 0x5718, 0x5700, 0x56e8,
|
||||
0x56d0, 0x56c0, 0x56a8, 0x5690, 0x5678, 0x5660, 0x5650, 0x5638,
|
||||
0x5620, 0x5608, 0x55f0, 0x55e0, 0x55c8, 0x55b0, 0x5598, 0x5588,
|
||||
0x5570, 0x5558, 0x5540, 0x5528, 0x5518, 0x5500, 0x54e8, 0x54d0,
|
||||
0x54c0, 0x54a8, 0x5490, 0x5478, 0x5468, 0x5450, 0x5438, 0x5420,
|
||||
0x5410, 0x53f8, 0x53e0, 0x53c8, 0x53b8, 0x53a0, 0x5388, 0x5370,
|
||||
0x5360, 0x5348, 0x5330, 0x5318, 0x5308, 0x52f0, 0x52d8, 0x52c8,
|
||||
0x52b0, 0x5298, 0x5280, 0x5270, 0x5258, 0x5240, 0x5230, 0x5218,
|
||||
0x5200, 0x51f0, 0x51d8, 0x51c0, 0x51b0, 0x5198, 0x5180, 0x5170,
|
||||
0x5158, 0x5140, 0x5128, 0x5118, 0x5100, 0x50e8, 0x50d8, 0x50c0,
|
||||
0x50a8, 0x5098, 0x5080, 0x5070, 0x5058, 0x5040, 0x5030, 0x5018,
|
||||
0x5000, 0x4ff0, 0x4fd8, 0x4fc0, 0x4fb0, 0x4f98, 0x4f80, 0x4f70,
|
||||
0x4f58, 0x4f48, 0x4f30, 0x4f18, 0x4f08, 0x4ef0, 0x4ee0, 0x4ec8,
|
||||
0x4eb0, 0x4ea0, 0x4e88, 0x4e78, 0x4e60, 0x4e48, 0x4e38, 0x4e20,
|
||||
0x4e10, 0x4df8, 0x4de0, 0x4dd0, 0x4db8, 0x4da8, 0x4d90, 0x4d78,
|
||||
0x4d68, 0x4d50, 0x4d40, 0x4d28, 0x4d18, 0x4d00, 0x4ce8, 0x4cd8,
|
||||
0x4cc0, 0x4cb0, 0x4c98, 0x4c88, 0x4c70, 0x4c60, 0x4c48, 0x4c30,
|
||||
0x4c20, 0x4c08, 0x4bf8, 0x4be0, 0x4bd0, 0x4bb8, 0x4ba8, 0x4b90,
|
||||
0x4b80, 0x4b68, 0x4b58, 0x4b40, 0x4b30, 0x4b18, 0x4b08, 0x4af0,
|
||||
0x4ad8, 0x4ac8, 0x4ab0, 0x4aa0, 0x4a88, 0x4a78, 0x4a60, 0x4a50,
|
||||
0x4a38, 0x4a28, 0x4a10, 0x4a00, 0x49e8, 0x49d8, 0x49c8, 0x49b0,
|
||||
0x49a0, 0x4988, 0x4978, 0x4960, 0x4950, 0x4938, 0x4928, 0x4910,
|
||||
0x4900, 0x48e8, 0x48d8, 0x48c0, 0x48b0, 0x4898, 0x4888, 0x4878,
|
||||
0x4860, 0x4850, 0x4838, 0x4828, 0x4810, 0x4800, 0x47e8, 0x47d8,
|
||||
0x47c8, 0x47b0, 0x47a0, 0x4788, 0x4778, 0x4760, 0x4750, 0x4740,
|
||||
0x4728, 0x4718, 0x4700, 0x46f0, 0x46d8, 0x46c8, 0x46b8, 0x46a0,
|
||||
0x4690, 0x4678, 0x4668, 0x4658, 0x4640, 0x4630, 0x4618, 0x4608,
|
||||
0x45f8, 0x45e0, 0x45d0, 0x45b8, 0x45a8, 0x4598, 0x4580, 0x4570,
|
||||
0x4560, 0x4548, 0x4538, 0x4520, 0x4510, 0x4500, 0x44e8, 0x44d8,
|
||||
0x44c8, 0x44b0, 0x44a0, 0x4488, 0x4478, 0x4468, 0x4450, 0x4440,
|
||||
0x4430, 0x4418, 0x4408, 0x43f8, 0x43e0, 0x43d0, 0x43c0, 0x43a8,
|
||||
0x4398, 0x4388, 0x4370, 0x4360, 0x4350, 0x4338, 0x4328, 0x4318,
|
||||
0x4300, 0x42f0, 0x42e0, 0x42c8, 0x42b8, 0x42a8, 0x4290, 0x4280,
|
||||
0x4270, 0x4260, 0x4248, 0x4238, 0x4228, 0x4210, 0x4200, 0x41f0,
|
||||
0x41d8, 0x41c8, 0x41b8, 0x41a8, 0x4190, 0x4180, 0x4170, 0x4158,
|
||||
0x4148, 0x4138, 0x4128, 0x4110, 0x4100, 0x40f0, 0x40d8, 0x40c8,
|
||||
0x40b8, 0x40a8, 0x4090, 0x4080, 0x4070, 0x4060, 0x4048, 0x4038,
|
||||
0x4028, 0x4018, 0x4000, 0x3ff0, 0x3fe0, 0x3fd0, 0x3fb8, 0x3fa8,
|
||||
0x3f98, 0x3f88, 0x3f70, 0x3f60, 0x3f50, 0x3f40, 0x3f28, 0x3f18,
|
||||
0x3f08, 0x3ef8, 0x3ee8, 0x3ed0, 0x3ec0, 0x3eb0, 0x3ea0, 0x3e88,
|
||||
0x3e78, 0x3e68, 0x3e58, 0x3e48, 0x3e30, 0x3e20, 0x3e10, 0x3e00,
|
||||
0x3df0, 0x3dd8, 0x3dc8, 0x3db8, 0x3da8, 0x3d98, 0x3d80, 0x3d70,
|
||||
0x3d60, 0x3d50, 0x3d40, 0x3d28, 0x3d18, 0x3d08, 0x3cf8, 0x3ce8,
|
||||
0x3cd8, 0x3cc0, 0x3cb0, 0x3ca0, 0x3c90, 0x3c80, 0x3c70, 0x3c58,
|
||||
0x3c48, 0x3c38, 0x3c28, 0x3c18, 0x3c08, 0x3bf0, 0x3be0, 0x3bd0,
|
||||
0x3bc0, 0x3bb0, 0x3ba0, 0x3b90, 0x3b78, 0x3b68, 0x3b58, 0x3b48,
|
||||
0x3b38, 0x3b28, 0x3b18, 0x3b00, 0x3af0, 0x3ae0, 0x3ad0, 0x3ac0,
|
||||
0x3ab0, 0x3aa0, 0x3a88, 0x3a78, 0x3a68, 0x3a58, 0x3a48, 0x3a38,
|
||||
0x3a28, 0x3a18, 0x3a08, 0x39f0, 0x39e0, 0x39d0, 0x39c0, 0x39b0,
|
||||
0x39a0, 0x3990, 0x3980, 0x3970, 0x3958, 0x3948, 0x3938, 0x3928,
|
||||
0x3918, 0x3908, 0x38f8, 0x38e8, 0x38d8, 0x38c8, 0x38b8, 0x38a8,
|
||||
0x3890, 0x3880, 0x3870, 0x3860, 0x3850, 0x3840, 0x3830, 0x3820,
|
||||
0x3810, 0x3800, 0x37f0, 0x37e0, 0x37d0, 0x37c0, 0x37a8, 0x3798,
|
||||
0x3788, 0x3778, 0x3768, 0x3758, 0x3748, 0x3738, 0x3728, 0x3718,
|
||||
0x3708, 0x36f8, 0x36e8, 0x36d8, 0x36c8, 0x36b8, 0x36a8, 0x3698,
|
||||
0x3688, 0x3678, 0x3668, 0x3658, 0x3648, 0x3630, 0x3620, 0x3610,
|
||||
0x3600, 0x35f0, 0x35e0, 0x35d0, 0x35c0, 0x35b0, 0x35a0, 0x3590,
|
||||
0x3580, 0x3570, 0x3560, 0x3550, 0x3540, 0x3530, 0x3520, 0x3510,
|
||||
0x3500, 0x34f0, 0x34e0, 0x34d0, 0x34c0, 0x34b0, 0x34a0, 0x3490,
|
||||
0x3480, 0x3470, 0x3460, 0x3450, 0x3440, 0x3430, 0x3420, 0x3410,
|
||||
0x3400, 0x33f0, 0x33e0, 0x33d0, 0x33c8, 0x33b8, 0x33a8, 0x3398,
|
||||
0x3388, 0x3378, 0x3368, 0x3358, 0x3348, 0x3338, 0x3328, 0x3318,
|
||||
0x3308, 0x32f8, 0x32e8, 0x32d8, 0x32c8, 0x32b8, 0x32a8, 0x3298,
|
||||
0x3288, 0x3278, 0x3268, 0x3260, 0x3250, 0x3240, 0x3230, 0x3220,
|
||||
0x3210, 0x3200, 0x31f0, 0x31e0, 0x31d0, 0x31c0, 0x31b0, 0x31a0,
|
||||
0x3190, 0x3180, 0x3178, 0x3168, 0x3158, 0x3148, 0x3138, 0x3128,
|
||||
0x3118, 0x3108, 0x30f8, 0x30e8, 0x30d8, 0x30c8, 0x30c0, 0x30b0,
|
||||
0x30a0, 0x3090, 0x3080, 0x3070, 0x3060, 0x3050, 0x3040, 0x3030,
|
||||
0x3028, 0x3018, 0x3008, 0x2ff8, 0x2fe8, 0x2fd8, 0x2fc8, 0x2fb8,
|
||||
0x2fa8, 0x2fa0, 0x2f90, 0x2f80, 0x2f70, 0x2f60, 0x2f50, 0x2f40,
|
||||
0x2f30, 0x2f28, 0x2f18, 0x2f08, 0x2ef8, 0x2ee8, 0x2ed8, 0x2ec8,
|
||||
0x2eb8, 0x2eb0, 0x2ea0, 0x2e90, 0x2e80, 0x2e70, 0x2e60, 0x2e50,
|
||||
0x2e48, 0x2e38, 0x2e28, 0x2e18, 0x2e08, 0x2df8, 0x2df0, 0x2de0,
|
||||
0x2dd0, 0x2dc0, 0x2db0, 0x2da0, 0x2d90, 0x2d88, 0x2d78, 0x2d68,
|
||||
0x2d58, 0x2d48, 0x2d38, 0x2d30, 0x2d20, 0x2d10, 0x2d00, 0x2cf0,
|
||||
0x2ce0, 0x2cd8, 0x2cc8, 0x2cb8, 0x2ca8, 0x2c98, 0x2c90, 0x2c80,
|
||||
0x2c70, 0x2c60, 0x2c50, 0x2c40, 0x2c38, 0x2c28, 0x2c18, 0x2c08,
|
||||
0x2bf8, 0x2bf0, 0x2be0, 0x2bd0, 0x2bc0, 0x2bb0, 0x2ba8, 0x2b98,
|
||||
0x2b88, 0x2b78, 0x2b68, 0x2b60, 0x2b50, 0x2b40, 0x2b30, 0x2b20,
|
||||
0x2b18, 0x2b08, 0x2af8, 0x2ae8, 0x2ae0, 0x2ad0, 0x2ac0, 0x2ab0,
|
||||
0x2aa0, 0x2a98, 0x2a88, 0x2a78, 0x2a68, 0x2a60, 0x2a50, 0x2a40,
|
||||
0x2a30, 0x2a20, 0x2a18, 0x2a08, 0x29f8, 0x29e8, 0x29e0, 0x29d0,
|
||||
0x29c0, 0x29b0, 0x29a8, 0x2998, 0x2988, 0x2978, 0x2970, 0x2960,
|
||||
0x2950, 0x2940, 0x2938, 0x2928, 0x2918, 0x2908, 0x2900, 0x28f0,
|
||||
0x28e0, 0x28d0, 0x28c8, 0x28b8, 0x28a8, 0x2898, 0x2890, 0x2880,
|
||||
0x2870, 0x2868, 0x2858, 0x2848, 0x2838, 0x2830, 0x2820, 0x2810,
|
||||
0x2800, 0x27f8, 0x27e8, 0x27d8, 0x27d0, 0x27c0, 0x27b0, 0x27a0,
|
||||
0x2798, 0x2788, 0x2778, 0x2770, 0x2760, 0x2750, 0x2740, 0x2738,
|
||||
0x2728, 0x2718, 0x2710, 0x2700, 0x26f0, 0x26e8, 0x26d8, 0x26c8,
|
||||
0x26b8, 0x26b0, 0x26a0, 0x2690, 0x2688, 0x2678, 0x2668, 0x2660,
|
||||
0x2650, 0x2640, 0x2638, 0x2628, 0x2618, 0x2608, 0x2600, 0x25f0,
|
||||
0x25e0, 0x25d8, 0x25c8, 0x25b8, 0x25b0, 0x25a0, 0x2590, 0x2588,
|
||||
0x2578, 0x2568, 0x2560, 0x2550, 0x2540, 0x2538, 0x2528, 0x2518,
|
||||
0x2510, 0x2500, 0x24f0, 0x24e8, 0x24d8, 0x24c8, 0x24c0, 0x24b0,
|
||||
0x24a0, 0x2498, 0x2488, 0x2478, 0x2470, 0x2460, 0x2450, 0x2448,
|
||||
0x2438, 0x2430, 0x2420, 0x2410, 0x2408, 0x23f8, 0x23e8, 0x23e0,
|
||||
0x23d0, 0x23c0, 0x23b8, 0x23a8, 0x23a0, 0x2390, 0x2380, 0x2378,
|
||||
0x2368, 0x2358, 0x2350, 0x2340, 0x2330, 0x2328, 0x2318, 0x2310,
|
||||
0x2300, 0x22f0, 0x22e8, 0x22d8, 0x22d0, 0x22c0, 0x22b0, 0x22a8,
|
||||
0x2298, 0x2288, 0x2280, 0x2270, 0x2268, 0x2258, 0x2248, 0x2240,
|
||||
0x2230, 0x2228, 0x2218, 0x2208, 0x2200, 0x21f0, 0x21e8, 0x21d8,
|
||||
0x21c8, 0x21c0, 0x21b0, 0x21a8, 0x2198, 0x2188, 0x2180, 0x2170,
|
||||
0x2168, 0x2158, 0x2148, 0x2140, 0x2130, 0x2128, 0x2118, 0x2108,
|
||||
0x2100, 0x20f0, 0x20e8, 0x20d8, 0x20d0, 0x20c0, 0x20b0, 0x20a8,
|
||||
0x2098, 0x2090, 0x2080, 0x2078, 0x2068, 0x2058, 0x2050, 0x2040,
|
||||
0x2038, 0x2028, 0x2020, 0x2010, 0x2000, 0x1ff8, 0x1fe8, 0x1fe0,
|
||||
0x1fd0, 0x1fc8, 0x1fb8, 0x1fb0, 0x1fa0, 0x1f90, 0x1f88, 0x1f78,
|
||||
0x1f70, 0x1f60, 0x1f58, 0x1f48, 0x1f40, 0x1f30, 0x1f20, 0x1f18,
|
||||
0x1f08, 0x1f00, 0x1ef0, 0x1ee8, 0x1ed8, 0x1ed0, 0x1ec0, 0x1eb8,
|
||||
0x1ea8, 0x1ea0, 0x1e90, 0x1e80, 0x1e78, 0x1e68, 0x1e60, 0x1e50,
|
||||
0x1e48, 0x1e38, 0x1e30, 0x1e20, 0x1e18, 0x1e08, 0x1e00, 0x1df0,
|
||||
0x1de8, 0x1dd8, 0x1dd0, 0x1dc0, 0x1db8, 0x1da8, 0x1da0, 0x1d90,
|
||||
0x1d80, 0x1d78, 0x1d68, 0x1d60, 0x1d50, 0x1d48, 0x1d38, 0x1d30,
|
||||
0x1d20, 0x1d18, 0x1d08, 0x1d00, 0x1cf0, 0x1ce8, 0x1cd8, 0x1cd0,
|
||||
0x1cc0, 0x1cb8, 0x1ca8, 0x1ca0, 0x1c90, 0x1c88, 0x1c78, 0x1c70,
|
||||
0x1c60, 0x1c58, 0x1c48, 0x1c40, 0x1c30, 0x1c28, 0x1c18, 0x1c10,
|
||||
0x1c00, 0x1bf8, 0x1bf0, 0x1be0, 0x1bd8, 0x1bc8, 0x1bc0, 0x1bb0,
|
||||
0x1ba8, 0x1b98, 0x1b90, 0x1b80, 0x1b78, 0x1b68, 0x1b60, 0x1b50,
|
||||
0x1b48, 0x1b38, 0x1b30, 0x1b20, 0x1b18, 0x1b08, 0x1b00, 0x1af8,
|
||||
0x1ae8, 0x1ae0, 0x1ad0, 0x1ac8, 0x1ab8, 0x1ab0, 0x1aa0, 0x1a98,
|
||||
0x1a88, 0x1a80, 0x1a70, 0x1a68, 0x1a60, 0x1a50, 0x1a48, 0x1a38,
|
||||
0x1a30, 0x1a20, 0x1a18, 0x1a08, 0x1a00, 0x19f8, 0x19e8, 0x19e0,
|
||||
0x19d0, 0x19c8, 0x19b8, 0x19b0, 0x19a0, 0x1998, 0x1990, 0x1980,
|
||||
0x1978, 0x1968, 0x1960, 0x1950, 0x1948, 0x1938, 0x1930, 0x1928,
|
||||
0x1918, 0x1910, 0x1900, 0x18f8, 0x18e8, 0x18e0, 0x18d8, 0x18c8,
|
||||
0x18c0, 0x18b0, 0x18a8, 0x1898, 0x1890, 0x1888, 0x1878, 0x1870,
|
||||
0x1860, 0x1858, 0x1850, 0x1840, 0x1838, 0x1828, 0x1820, 0x1810,
|
||||
0x1808, 0x1800, 0x17f0, 0x17e8, 0x17d8, 0x17d0, 0x17c8, 0x17b8,
|
||||
0x17b0, 0x17a0, 0x1798, 0x1790, 0x1780, 0x1778, 0x1768, 0x1760,
|
||||
0x1758, 0x1748, 0x1740, 0x1730, 0x1728, 0x1720, 0x1710, 0x1708,
|
||||
0x16f8, 0x16f0, 0x16e8, 0x16d8, 0x16d0, 0x16c8, 0x16b8, 0x16b0,
|
||||
0x16a0, 0x1698, 0x1690, 0x1680, 0x1678, 0x1668, 0x1660, 0x1658,
|
||||
0x1648, 0x1640, 0x1638, 0x1628, 0x1620, 0x1610, 0x1608, 0x1600,
|
||||
0x15f0, 0x15e8, 0x15e0, 0x15d0, 0x15c8, 0x15b8, 0x15b0, 0x15a8,
|
||||
0x1598, 0x1590, 0x1588, 0x1578, 0x1570, 0x1568, 0x1558, 0x1550,
|
||||
0x1540, 0x1538, 0x1530, 0x1520, 0x1518, 0x1510, 0x1500, 0x14f8,
|
||||
0x14f0, 0x14e0, 0x14d8, 0x14d0, 0x14c0, 0x14b8, 0x14a8, 0x14a0,
|
||||
0x1498, 0x1488, 0x1480, 0x1478, 0x1468, 0x1460, 0x1458, 0x1448,
|
||||
0x1440, 0x1438, 0x1428, 0x1420, 0x1418, 0x1408, 0x1400, 0x13f8,
|
||||
0x13e8, 0x13e0, 0x13d8, 0x13c8, 0x13c0, 0x13b8, 0x13a8, 0x13a0,
|
||||
0x1398, 0x1388, 0x1380, 0x1378, 0x1368, 0x1360, 0x1358, 0x1348,
|
||||
0x1340, 0x1338, 0x1328, 0x1320, 0x1318, 0x1308, 0x1300, 0x12f8,
|
||||
0x12e8, 0x12e0, 0x12d8, 0x12d0, 0x12c0, 0x12b8, 0x12b0, 0x12a0,
|
||||
0x1298, 0x1290, 0x1280, 0x1278, 0x1270, 0x1260, 0x1258, 0x1250,
|
||||
0x1240, 0x1238, 0x1230, 0x1228, 0x1218, 0x1210, 0x1208, 0x11f8,
|
||||
0x11f0, 0x11e8, 0x11d8, 0x11d0, 0x11c8, 0x11c0, 0x11b0, 0x11a8,
|
||||
0x11a0, 0x1190, 0x1188, 0x1180, 0x1178, 0x1168, 0x1160, 0x1158,
|
||||
0x1148, 0x1140, 0x1138, 0x1128, 0x1120, 0x1118, 0x1110, 0x1100,
|
||||
0x10f8, 0x10f0, 0x10e8, 0x10d8, 0x10d0, 0x10c8, 0x10b8, 0x10b0,
|
||||
0x10a8, 0x10a0, 0x1090, 0x1088, 0x1080, 0x1070, 0x1068, 0x1060,
|
||||
0x1058, 0x1048, 0x1040, 0x1038, 0x1030, 0x1020, 0x1018, 0x1010,
|
||||
0x1000, 0x0ff8, 0x0ff0, 0x0fe8, 0x0fd8, 0x0fd0, 0x0fc8, 0x0fc0,
|
||||
0x0fb0, 0x0fa8, 0x0fa0, 0x0f98, 0x0f88, 0x0f80, 0x0f78, 0x0f70,
|
||||
0x0f60, 0x0f58, 0x0f50, 0x0f48, 0x0f38, 0x0f30, 0x0f28, 0x0f20,
|
||||
0x0f10, 0x0f08, 0x0f00, 0x0ef8, 0x0ee8, 0x0ee0, 0x0ed8, 0x0ed0,
|
||||
0x0ec0, 0x0eb8, 0x0eb0, 0x0ea8, 0x0e98, 0x0e90, 0x0e88, 0x0e80,
|
||||
0x0e70, 0x0e68, 0x0e60, 0x0e58, 0x0e48, 0x0e40, 0x0e38, 0x0e30,
|
||||
0x0e28, 0x0e18, 0x0e10, 0x0e08, 0x0e00, 0x0df0, 0x0de8, 0x0de0,
|
||||
0x0dd8, 0x0dc8, 0x0dc0, 0x0db8, 0x0db0, 0x0da8, 0x0d98, 0x0d90,
|
||||
0x0d88, 0x0d80, 0x0d70, 0x0d68, 0x0d60, 0x0d58, 0x0d50, 0x0d40,
|
||||
0x0d38, 0x0d30, 0x0d28, 0x0d18, 0x0d10, 0x0d08, 0x0d00, 0x0cf8,
|
||||
0x0ce8, 0x0ce0, 0x0cd8, 0x0cd0, 0x0cc8, 0x0cb8, 0x0cb0, 0x0ca8,
|
||||
0x0ca0, 0x0c98, 0x0c88, 0x0c80, 0x0c78, 0x0c70, 0x0c60, 0x0c58,
|
||||
0x0c50, 0x0c48, 0x0c40, 0x0c30, 0x0c28, 0x0c20, 0x0c18, 0x0c10,
|
||||
0x0c00, 0x0bf8, 0x0bf0, 0x0be8, 0x0be0, 0x0bd8, 0x0bc8, 0x0bc0,
|
||||
0x0bb8, 0x0bb0, 0x0ba8, 0x0b98, 0x0b90, 0x0b88, 0x0b80, 0x0b78,
|
||||
0x0b68, 0x0b60, 0x0b58, 0x0b50, 0x0b48, 0x0b40, 0x0b30, 0x0b28,
|
||||
0x0b20, 0x0b18, 0x0b10, 0x0b00, 0x0af8, 0x0af0, 0x0ae8, 0x0ae0,
|
||||
0x0ad8, 0x0ac8, 0x0ac0, 0x0ab8, 0x0ab0, 0x0aa8, 0x0a98, 0x0a90,
|
||||
0x0a88, 0x0a80, 0x0a78, 0x0a70, 0x0a60, 0x0a58, 0x0a50, 0x0a48,
|
||||
0x0a40, 0x0a38, 0x0a28, 0x0a20, 0x0a18, 0x0a10, 0x0a08, 0x0a00,
|
||||
0x09f0, 0x09e8, 0x09e0, 0x09d8, 0x09d0, 0x09c8, 0x09c0, 0x09b0,
|
||||
0x09a8, 0x09a0, 0x0998, 0x0990, 0x0988, 0x0978, 0x0970, 0x0968,
|
||||
0x0960, 0x0958, 0x0950, 0x0948, 0x0938, 0x0930, 0x0928, 0x0920,
|
||||
0x0918, 0x0910, 0x0900, 0x08f8, 0x08f0, 0x08e8, 0x08e0, 0x08d8,
|
||||
0x08d0, 0x08c0, 0x08b8, 0x08b0, 0x08a8, 0x08a0, 0x0898, 0x0890,
|
||||
0x0880, 0x0878, 0x0870, 0x0868, 0x0860, 0x0858, 0x0850, 0x0848,
|
||||
0x0838, 0x0830, 0x0828, 0x0820, 0x0818, 0x0810, 0x0808, 0x0800,
|
||||
0x07f0, 0x07e8, 0x07e0, 0x07d8, 0x07d0, 0x07c8, 0x07c0, 0x07b0,
|
||||
0x07a8, 0x07a0, 0x0798, 0x0790, 0x0788, 0x0780, 0x0778, 0x0770,
|
||||
0x0760, 0x0758, 0x0750, 0x0748, 0x0740, 0x0738, 0x0730, 0x0728,
|
||||
0x0718, 0x0710, 0x0708, 0x0700, 0x06f8, 0x06f0, 0x06e8, 0x06e0,
|
||||
0x06d8, 0x06c8, 0x06c0, 0x06b8, 0x06b0, 0x06a8, 0x06a0, 0x0698,
|
||||
0x0690, 0x0688, 0x0680, 0x0670, 0x0668, 0x0660, 0x0658, 0x0650,
|
||||
0x0648, 0x0640, 0x0638, 0x0630, 0x0620, 0x0618, 0x0610, 0x0608,
|
||||
0x0600, 0x05f8, 0x05f0, 0x05e8, 0x05e0, 0x05d8, 0x05d0, 0x05c0,
|
||||
0x05b8, 0x05b0, 0x05a8, 0x05a0, 0x0598, 0x0590, 0x0588, 0x0580,
|
||||
0x0578, 0x0570, 0x0560, 0x0558, 0x0550, 0x0548, 0x0540, 0x0538,
|
||||
0x0530, 0x0528, 0x0520, 0x0518, 0x0510, 0x0508, 0x04f8, 0x04f0,
|
||||
0x04e8, 0x04e0, 0x04d8, 0x04d0, 0x04c8, 0x04c0, 0x04b8, 0x04b0,
|
||||
0x04a8, 0x04a0, 0x0498, 0x0488, 0x0480, 0x0478, 0x0470, 0x0468,
|
||||
0x0460, 0x0458, 0x0450, 0x0448, 0x0440, 0x0438, 0x0430, 0x0428,
|
||||
0x0420, 0x0418, 0x0408, 0x0400, 0x03f8, 0x03f0, 0x03e8, 0x03e0,
|
||||
0x03d8, 0x03d0, 0x03c8, 0x03c0, 0x03b8, 0x03b0, 0x03a8, 0x03a0,
|
||||
0x0398, 0x0390, 0x0388, 0x0378, 0x0370, 0x0368, 0x0360, 0x0358,
|
||||
0x0350, 0x0348, 0x0340, 0x0338, 0x0330, 0x0328, 0x0320, 0x0318,
|
||||
0x0310, 0x0308, 0x0300, 0x02f8, 0x02f0, 0x02e8, 0x02d8, 0x02d0,
|
||||
0x02c8, 0x02c0, 0x02b8, 0x02b0, 0x02a8, 0x02a0, 0x0298, 0x0290,
|
||||
0x0288, 0x0280, 0x0278, 0x0270, 0x0268, 0x0260, 0x0258, 0x0250,
|
||||
0x0248, 0x0240, 0x0238, 0x0230, 0x0228, 0x0220, 0x0218, 0x0210,
|
||||
0x0200, 0x01f8, 0x01f0, 0x01e8, 0x01e0, 0x01d8, 0x01d0, 0x01c8,
|
||||
0x01c0, 0x01b8, 0x01b0, 0x01a8, 0x01a0, 0x0198, 0x0190, 0x0188,
|
||||
0x0180, 0x0178, 0x0170, 0x0168, 0x0160, 0x0158, 0x0150, 0x0148,
|
||||
0x0140, 0x0138, 0x0130, 0x0128, 0x0120, 0x0118, 0x0110, 0x0108,
|
||||
0x0100, 0x00f8, 0x00f0, 0x00e8, 0x00e0, 0x00d8, 0x00d0, 0x00c8,
|
||||
0x00c0, 0x00b8, 0x00b0, 0x00a8, 0x00a0, 0x0098, 0x0090, 0x0088,
|
||||
0x0080, 0x0078, 0x0070, 0x0068, 0x0060, 0x0058, 0x0050, 0x0048,
|
||||
0x0040, 0x0038, 0x0030, 0x0028, 0x0020, 0x0018, 0x0010, 0x0008
|
||||
};
|
||||
|
||||
static const Float32 inf = 0x7f800000;
|
||||
|
||||
// approximate reciprocal of scalar single precision FP
|
||||
static Float32 approximate_reciprocal(Float32 op)
|
||||
{
|
||||
float_class_t op_class = float32_class(op);
|
||||
|
||||
int sign = float32_sign(op);
|
||||
|
||||
switch(op_class)
|
||||
{
|
||||
case float_negative_zero:
|
||||
case float_positive_zero:
|
||||
case float_denormal:
|
||||
return packFloat32(sign, 0xFF, 0);
|
||||
|
||||
case float_negative_inf:
|
||||
case float_positive_inf:
|
||||
return packFloat32(sign, 0x00, 0);
|
||||
|
||||
case float_NaN:
|
||||
return convert_to_QNaN(op);
|
||||
}
|
||||
|
||||
Bit32u fraction = float32_fraction(op);
|
||||
Bit16s exp = float32_exp(op);
|
||||
|
||||
/*
|
||||
* Calculate (1/1.yyyyyyyyyyy1), the result is always rounded to the
|
||||
* 12th bit after the decimal point by round-to-nearest, regardless
|
||||
* of the current rounding mode.
|
||||
*
|
||||
* Using precalculated 2048-entry table.
|
||||
*/
|
||||
|
||||
exp = 253 - exp;
|
||||
/* check for underflow */
|
||||
if (exp <= 0)
|
||||
return packFloat32(sign, 0x00, 0);
|
||||
|
||||
return packFloat32(sign, exp, (Bit32u)(rcp_table[fraction >> 12]) << 8);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Opcode: 0F 53
|
||||
* Approximate reciprocals of packed single precision FP values from XMM2/MEM.
|
||||
* Possible floating point exceptions: -
|
||||
*/
|
||||
void BX_CPU_C::RCPPS_VpsWps(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_SUPPORT_SSE >= 1
|
||||
BX_CPU_THIS_PTR prepareSSE();
|
||||
BxPackedXmmRegister op;
|
||||
|
||||
/* op is a register or memory reference */
|
||||
if (i->modC0()) {
|
||||
op = BX_READ_XMM_REG(i->rm());
|
||||
}
|
||||
else {
|
||||
/* pointer, segment address pair */
|
||||
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
|
||||
}
|
||||
|
||||
op.xmm32u(0) = approximate_reciprocal(op.xmm32u(0));
|
||||
op.xmm32u(1) = approximate_reciprocal(op.xmm32u(1));
|
||||
op.xmm32u(2) = approximate_reciprocal(op.xmm32u(2));
|
||||
op.xmm32u(3) = approximate_reciprocal(op.xmm32u(3));
|
||||
|
||||
BX_WRITE_XMM_REG(i->nnn(), op);
|
||||
|
||||
#else
|
||||
BX_INFO(("RCPPS_VpsWps: required SSE, use --enable-sse option"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Opcode: F3 0F 53
|
||||
* Approximate reciprocal of scalar single precision FP value from XMM2/MEM32.
|
||||
* Possible floating point exceptions: -
|
||||
*/
|
||||
void BX_CPU_C::RCPSS_VssWss(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_SUPPORT_SSE >= 1
|
||||
BX_CPU_THIS_PTR prepareSSE();
|
||||
Float32 op;
|
||||
|
||||
/* op is a register or memory reference */
|
||||
if (i->modC0()) {
|
||||
op = BX_READ_XMM_REG_LO_DWORD(i->rm());
|
||||
}
|
||||
else {
|
||||
/* pointer, segment address pair */
|
||||
read_virtual_dword(i->seg(), RMAddr(i), &op);
|
||||
}
|
||||
|
||||
Float32 result = approximate_reciprocal(op);
|
||||
BX_WRITE_XMM_REG_LO_DWORD(i->nnn(), result);
|
||||
|
||||
#else
|
||||
BX_INFO(("RCPSS_VssWss: required SSE, use --enable-sse option"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
|
||||
void BX_CPU_C::RSQRTPS_VpsWps(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_SUPPORT_SSE >= 1
|
||||
BX_CPU_THIS_PTR prepareSSE();
|
||||
|
||||
BX_PANIC(("RSQRTPS_VpsWps: SSE instruction still not implemented"));
|
||||
#else
|
||||
BX_INFO(("RSQRTPS_VpsWps: required SSE, use --enable-sse option"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
|
||||
void BX_CPU_C::RSQRTSS_VssWss(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_SUPPORT_SSE >= 1
|
||||
BX_CPU_THIS_PTR prepareSSE();
|
||||
|
||||
BX_PANIC(("RSQRTSS_VssWss: SSE instruction still not implemented"));
|
||||
#else
|
||||
BX_INFO(("RSQRTSS_VssWss: required SSE, use --enable-sse option"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
Loading…
Reference in New Issue
Block a user