implemented AVX float16 convert instructions
This commit is contained in:
parent
d7f19bcfd4
commit
8399dee24c
@ -179,6 +179,10 @@ cpu: count=1, ips=50000000, reset_on_triple_fault=1, ignore_bad_msrs=1, msrs="ms
|
||||
# Select AVX instruction set support.
|
||||
# This option exists only if Bochs compiled with --enable-avx option.
|
||||
#
|
||||
# AVX_F16C:
|
||||
# Select AVX float16 convert instructions support.
|
||||
# This option exists only if Bochs compiled with --enable-avx option.
|
||||
#
|
||||
# 1G_PAGES:
|
||||
# Enable 1G page size support in long mode.
|
||||
# This option exists only if Bochs compiled with x86-64 support.
|
||||
|
@ -10,6 +10,8 @@ Bochs repository moved to the SVN version control !
|
||||
- Added support for AVX instruction set emulation, to enable configure with
|
||||
--enable-avx option.
|
||||
When compiled in, AVX still could be disabled using .bochsrc CPUID option.
|
||||
- Added emulation of AVX float16 convert instructions, the feature can be
|
||||
enabled using .bochsrc CPUID option.
|
||||
- Updated/Fixed instrumentation callbacks.
|
||||
- Bugfixes for CPU emulation correctness and stability.
|
||||
|
||||
|
@ -40,6 +40,7 @@ cpuid
|
||||
xsave
|
||||
xsaveopt
|
||||
avx
|
||||
avx_f16c
|
||||
apic
|
||||
1g_pages
|
||||
pcid
|
||||
|
@ -419,6 +419,10 @@ void bx_init_options()
|
||||
"avx", "Support for AVX instruction set",
|
||||
"Support for AVX instruction set",
|
||||
0);
|
||||
new bx_param_bool_c(cpuid_param,
|
||||
"avx_f16c", "Support for AVX F16 convert instructions",
|
||||
"Support for AVX F16 convert instructions",
|
||||
0);
|
||||
#endif
|
||||
#if BX_SUPPORT_X86_64
|
||||
new bx_param_bool_c(cpuid_param,
|
||||
@ -2714,6 +2718,10 @@ static int parse_line_formatted(const char *context, int num_params, char *param
|
||||
if (parse_param_bool(params[i], 4, BXPN_CPUID_AVX) < 0) {
|
||||
PARSE_ERR(("%s: cpuid directive malformed.", context));
|
||||
}
|
||||
} else if (!strncmp(params[i], "avx_f16c=", 9)) {
|
||||
if (parse_param_bool(params[i], 9, BXPN_CPUID_AVX_F16CVT) < 0) {
|
||||
PARSE_ERR(("%s: cpuid directive malformed.", context));
|
||||
}
|
||||
#endif
|
||||
#if BX_SUPPORT_X86_64
|
||||
} else if (!strncmp(params[i], "1g_pages=", 9)) {
|
||||
@ -3951,7 +3959,9 @@ int bx_write_configuration(const char *rc, int overwrite)
|
||||
SIM->get_param_bool(BXPN_CPUID_MOVBE)->get(),
|
||||
SIM->get_param_bool(BXPN_CPUID_SMEP)->get());
|
||||
#if BX_SUPPORT_AVX
|
||||
fprintf(fp, ", avx=%d", SIM->get_param_bool(BXPN_CPUID_AVX)->get());
|
||||
fprintf(fp, ", avx=%d, avx_f16c=%d",
|
||||
SIM->get_param_bool(BXPN_CPUID_AVX)->get(),
|
||||
SIM->get_param_bool(BXPN_CPUID_AVX_F16CVT)->get());
|
||||
#endif
|
||||
#if BX_SUPPORT_X86_64
|
||||
fprintf(fp, ", 1g_pages=%d, pcid=%d, fsgsbase=%d",
|
||||
|
@ -1643,4 +1643,68 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::VDPPS_VpsWpsIbR(bxInstruction_c *i)
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Opcode: VEX.66.0F.3A.13 (VEX.W=0) */
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPH2PS_VpsWpsIbR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister result;
|
||||
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
|
||||
unsigned len = i->getVL();
|
||||
|
||||
float_status_t status_word;
|
||||
mxcsr_to_softfloat_status_word(status_word, MXCSR);
|
||||
|
||||
for (unsigned n=0; n < (4*len); n++) {
|
||||
|
||||
if (MXCSR.get_DAZ())
|
||||
op.xmm16u(n) = float16_denormal_to_zero(op.xmm16u(n));
|
||||
|
||||
result.avx32u(n) = float16_to_float32(op.xmm16u(n), status_word);
|
||||
}
|
||||
|
||||
check_exceptionsSSE(status_word.float_exception_flags);
|
||||
|
||||
BX_WRITE_AVX_REGZ(i->nnn(), result, len);
|
||||
}
|
||||
|
||||
/* Opcode: VEX.66.0F.3A.1D (VEX.W=0) */
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PH_WpsVpsIb(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op = BX_READ_AVX_REG(i->nnn());
|
||||
BxPackedXmmRegister result;
|
||||
|
||||
result.xmm64u(1) = 0; /* clear upper part of the result for case of VL128 */
|
||||
|
||||
float_status_t status_word;
|
||||
mxcsr_to_softfloat_status_word(status_word, MXCSR);
|
||||
unsigned len = i->getVL();
|
||||
|
||||
Bit8u control = i->Ib();
|
||||
|
||||
// override MXCSR rounding mode with control coming from imm8
|
||||
if ((control & 0x4) == 0)
|
||||
status_word.float_rounding_mode = control & 0x3;
|
||||
|
||||
for (unsigned n=0; n < (4*len); n++) {
|
||||
|
||||
if (MXCSR.get_DAZ())
|
||||
op.avx32u(n) = float32_denormal_to_zero(op.avx32u(n));
|
||||
|
||||
result.xmm16u(n) = float32_to_float16(op.avx32u(n), status_word);
|
||||
}
|
||||
|
||||
check_exceptionsSSE(status_word.float_exception_flags);
|
||||
|
||||
if (i->modC0()) {
|
||||
BX_WRITE_XMM_REG_CLEAR_HIGH(i->rm(), result);
|
||||
}
|
||||
else {
|
||||
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
||||
|
||||
if (len == BX_VL256)
|
||||
write_virtual_dqword(i->seg(), eaddr, &result);
|
||||
else
|
||||
write_virtual_qword(i->seg(), eaddr, result.xmm64u(0));
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -2500,6 +2500,9 @@ public: // for now...
|
||||
BX_SMF void VMASKMOVPD_VpdMpd(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void VMASKMOVPS_MpsVps(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void VMASKMOVPD_MpdVpd(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
||||
BX_SMF void VCVTPH2PS_VpsWpsIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void VCVTPS2PH_WpsVpsIb(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
#endif
|
||||
|
||||
BX_SMF void CMPXCHG_XBTS(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
@ -98,7 +98,7 @@ Bit32u BX_CPU_C::get_extended_cpuid_features(void)
|
||||
// [26:26] XSAVE extensions support
|
||||
// [27:27] OSXSAVE support
|
||||
// [28:28] AVX extensions support
|
||||
// [29:29] F16C - Float16 conversion support
|
||||
// [29:29] AVX F16C - Float16 conversion support
|
||||
// [30:30] RDRAND instruction
|
||||
// [31:31] reserved
|
||||
|
||||
@ -154,6 +154,9 @@ Bit32u BX_CPU_C::get_extended_cpuid_features(void)
|
||||
#if BX_SUPPORT_AVX
|
||||
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_CPU_AVX))
|
||||
features |= BX_CPUID_EXT_AVX;
|
||||
|
||||
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_CPU_AVX_F16C))
|
||||
features |= BX_CPUID_EXT_AVX_F16C;
|
||||
#endif
|
||||
|
||||
return features;
|
||||
@ -1138,6 +1141,16 @@ void BX_CPU_C::init_isa_features_bitmask(void)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static bx_bool avx_f16c_enabled = SIM->get_param_bool(BXPN_CPUID_AVX_F16CVT)->get();
|
||||
if (avx_f16c_enabled) {
|
||||
if (! avx_enabled) {
|
||||
BX_PANIC(("PANIC: Float16 convert emulation requires AVX support !"));
|
||||
return;
|
||||
}
|
||||
|
||||
features_bitmask |= BX_CPU_AVX_F16C;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if BX_SUPPORT_VMX
|
||||
|
@ -132,7 +132,7 @@ struct cpuid_function_t {
|
||||
// [26:26] XSAVE extensions support
|
||||
// [27:27] OSXSAVE support
|
||||
// [28:28] AVX extensions support
|
||||
// [29:29] F16C - Float16 conversion support
|
||||
// [29:29] AVX F16C - Float16 conversion support
|
||||
// [30:30] RDRAND instruction
|
||||
// [31:31] reserved
|
||||
|
||||
@ -165,7 +165,7 @@ struct cpuid_function_t {
|
||||
#define BX_CPUID_EXT_XSAVE (1 << 26)
|
||||
#define BX_CPUID_EXT_OSXSAVE (1 << 27)
|
||||
#define BX_CPUID_EXT_AVX (1 << 28)
|
||||
#define BX_CPUID_EXT_F16C (1 << 29)
|
||||
#define BX_CPUID_EXT_AVX_F16C (1 << 29)
|
||||
#define BX_CPUID_EXT_RDRAND (1 << 30)
|
||||
#define BX_CPUID_EXT_RESERVED31 (1 << 31)
|
||||
|
||||
|
@ -936,7 +936,7 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3] = {
|
||||
/* 10 */ { 0, BX_IA_ERROR },
|
||||
/* 11 */ { 0, BX_IA_ERROR },
|
||||
/* 12 */ { 0, BX_IA_ERROR },
|
||||
/* 13 */ { 0, BX_IA_ERROR },
|
||||
/* 13 */ { BxPrefixSSE66 | BxVexW0 | BxImmediate_Ib, BX_IA_VCVTPH2PS_VpsWpsIb },
|
||||
/* 14 */ { BxPrefixSSE66 | BxImmediate_Ib, BX_IA_VPEXTRB_EbdVdqIb },
|
||||
/* 15 */ { BxPrefixSSE66 | BxImmediate_Ib, BX_IA_VPEXTRW_EwdVdqIb },
|
||||
/* 16 */ { BxSplitVexW | BxImmediate_Ib, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a16 },
|
||||
@ -946,7 +946,7 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3] = {
|
||||
/* 1A */ { 0, BX_IA_ERROR },
|
||||
/* 1B */ { 0, BX_IA_ERROR },
|
||||
/* 1C */ { 0, BX_IA_ERROR },
|
||||
/* 1D */ { 0, BX_IA_ERROR },
|
||||
/* 1D */ { BxPrefixSSE66 | BxVexW0 | BxImmediate_Ib, BX_IA_VCVTPS2PH_WpsVpsIb },
|
||||
/* 1E */ { 0, BX_IA_ERROR },
|
||||
/* 1F */ { 0, BX_IA_ERROR },
|
||||
/* 20 */ { BxPrefixSSE66 | BxImmediate_Ib, BX_IA_VPINSRB_VdqEbIb },
|
||||
|
@ -1645,5 +1645,8 @@ bx_define_opcode(BX_IA_VPEXTRQ_EqVdqIb, &BX_CPU_C::PEXTRD_EdVdqIbM, &BX_CPU_C::P
|
||||
bx_define_opcode(BX_IA_VPINSRQ_VdqEqIb, &BX_CPU_C::PINSRD_VdqEdIbM, &BX_CPU_C::PINSRD_VdqEdIbR, BX_CPU_AVX | BX_CPU_X86_64, BX_PREPARE_AVX | BX_VEX_L128)
|
||||
bx_define_opcode(BX_IA_VMOVQ_VdqEq, &BX_CPU_C::MOVQ_VqWqM, &BX_CPU_C::MOVQ_VdqEqR, BX_CPU_AVX | BX_CPU_X86_64, BX_PREPARE_AVX | BX_VEX_L128)
|
||||
bx_define_opcode(BX_IA_VMOVQ_EqVq, &BX_CPU_C::MOVLPS_MqVps, &BX_CPU_C::MOVQ_EqVqR, BX_CPU_AVX | BX_CPU_X86_64, BX_PREPARE_AVX | BX_VEX_L128)
|
||||
|
||||
bx_define_opcode(BX_IA_VCVTPH2PS_VpsWpsIb, &BX_CPU_C::LOAD_VectorQ, &BX_CPU_C::VCVTPH2PS_VpsWpsIbR, BX_CPU_AVX_F16C, BX_PREPARE_AVX | BX_VEX_NO_VVV | BX_VEX_L128 | BX_VEX_L256)
|
||||
bx_define_opcode(BX_IA_VCVTPS2PH_WpsVpsIb, &BX_CPU_C::VCVTPS2PH_WpsVpsIb, &BX_CPU_C::VCVTPS2PH_WpsVpsIb, BX_CPU_AVX_F16C, BX_PREPARE_AVX | BX_VEX_NO_VVV | BX_VEX_L128 | BX_VEX_L256)
|
||||
#endif
|
||||
// AVX
|
||||
|
@ -3097,6 +3097,11 @@ This option exists only if Bochs compiled with BX_CPU_LEVEL >= 6.
|
||||
Select AVX instruction set support.
|
||||
This option exists only if Bochs compiled with --enable-avx option.
|
||||
</para>
|
||||
<para><command>avx_f16c</command></para>
|
||||
<para>
|
||||
Select AVX float16 convert instructions support.
|
||||
This option exists only if Bochs compiled with --enable-avx option.
|
||||
</para>
|
||||
<para><command>1g_pages</command></para>
|
||||
<para>
|
||||
Enable 1G page size support in long mode.
|
||||
|
@ -234,6 +234,11 @@ avx:
|
||||
Select AVX instruction set support.
|
||||
This option exists only if Bochs compiled with --enable-avx option.
|
||||
|
||||
avx_f16c:
|
||||
|
||||
Select AVX float16 convert instructions support.
|
||||
This option exists only if Bochs compiled with --enable-avx option.
|
||||
|
||||
1g_pages:
|
||||
|
||||
Enable 1G page size support in long mode.
|
||||
|
@ -44,7 +44,7 @@ BX_INCDIRS = -I.. -I$(srcdir)/.. -I../@INSTRUMENT_DIR@ -I$(srcdir)/../@INSTRUMEN
|
||||
OBJS = ferr.o fpu.o fpu_arith.o fpu_compare.o fpu_const.o \
|
||||
fpu_load_store.o fpu_misc.o fpu_trans.o fpu_tags.o \
|
||||
fprem.o fsincos.o f2xm1.o fyl2x.o fpatan.o \
|
||||
softfloat.o softfloatx80.o softfloat-specialize.o \
|
||||
softfloat.o softfloatx80.o softfloat16.o softfloat-specialize.o \
|
||||
softfloat-round-pack.o poly.o
|
||||
|
||||
all: libfpu.a
|
||||
@ -176,3 +176,5 @@ softfloat-specialize.o: softfloat-specialize.@CPP_SUFFIX@ softfloat.h ../config.
|
||||
softfloat-specialize.h softfloat-macros.h
|
||||
softfloatx80.o: softfloatx80.@CPP_SUFFIX@ softfloatx80.h softfloat.h ../config.h \
|
||||
softfloat-specialize.h softfloat-round-pack.h softfloat-macros.h
|
||||
softfloat16.o: softfloat16.@CPP_SUFFIX@ softfloat.h ../config.h \
|
||||
softfloat-specialize.h softfloat-round-pack.h softfloat-macros.h
|
||||
|
@ -35,13 +35,37 @@ these four paragraphs for those parts of this code that are retained.
|
||||
#ifndef _SOFTFLOAT_MACROS_H_
|
||||
#define _SOFTFLOAT_MACROS_H_
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Shifts `a' right by the number of bits given in `count'. If any nonzero
|
||||
| bits are shifted off, they are ``jammed'' into the least significant bit of
|
||||
| the result by setting the least significant bit to 1. The value of `count'
|
||||
| can be arbitrarily large; in particular, if `count' is greater than 16, the
|
||||
| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE Bit16u shift16RightJamming(Bit16u a, int count)
|
||||
{
|
||||
Bit16u z;
|
||||
|
||||
if (count == 0) {
|
||||
z = a;
|
||||
}
|
||||
else if (count < 16) {
|
||||
z = (a>>count) | ((a<<((-count) & 15)) != 0);
|
||||
}
|
||||
else {
|
||||
z = (a != 0);
|
||||
}
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Shifts `a' right by the number of bits given in `count'. If any nonzero
|
||||
| bits are shifted off, they are ``jammed'' into the least significant bit of
|
||||
| the result by setting the least significant bit to 1. The value of `count'
|
||||
| can be arbitrarily large; in particular, if `count' is greater than 32, the
|
||||
| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
|
||||
| The result is stored in the location pointed to by `zPtr'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE Bit32u shift32RightJamming(Bit32u a, int count)
|
||||
@ -67,7 +91,6 @@ BX_CPP_INLINE Bit32u shift32RightJamming(Bit32u a, int count)
|
||||
| the result by setting the least significant bit to 1. The value of `count'
|
||||
| can be arbitrarily large; in particular, if `count' is greater than 64, the
|
||||
| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
|
||||
| The result is stored in the location pointed to by `zPtr'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE Bit64u shift64RightJamming(Bit64u a, int count)
|
||||
@ -104,9 +127,7 @@ BX_CPP_INLINE Bit64u shift64RightJamming(Bit64u a, int count)
|
||||
| described above, and is returned at the location pointed to by `z1Ptr'.)
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE void
|
||||
shift64ExtraRightJamming(
|
||||
Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
|
||||
BX_CPP_INLINE void shift64ExtraRightJamming(Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
|
||||
{
|
||||
Bit64u z0, z1;
|
||||
int negCount = (-count) & 63;
|
||||
@ -139,8 +160,7 @@ BX_CPP_INLINE void
|
||||
| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE void
|
||||
add128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1, Bit64u *z0Ptr, Bit64u *z1Ptr)
|
||||
BX_CPP_INLINE void add128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1, Bit64u *z0Ptr, Bit64u *z1Ptr)
|
||||
{
|
||||
Bit64u z1 = a1 + b1;
|
||||
*z1Ptr = z1;
|
||||
@ -261,31 +281,52 @@ static Bit32u estimateSqrt32(Bit16s aExp, Bit32u a)
|
||||
}
|
||||
#endif
|
||||
|
||||
static const int countLeadingZeros8[] = {
|
||||
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
#ifdef FLOAT16
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the number of leading 0 bits before the most-significant 1 bit of
|
||||
| `a'. If `a' is zero, 16 is returned.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE int countLeadingZeros16(Bit16u a)
|
||||
{
|
||||
int shiftCount = 0;
|
||||
if (a < 0x100) {
|
||||
shiftCount += 8;
|
||||
a <<= 8;
|
||||
}
|
||||
shiftCount += countLeadingZeros8[a>>8];
|
||||
return shiftCount;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the number of leading 0 bits before the most-significant 1 bit of
|
||||
| `a'. If `a' is zero, 32 is returned.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
static int countLeadingZeros32(Bit32u a)
|
||||
BX_CPP_INLINE int countLeadingZeros32(Bit32u a)
|
||||
{
|
||||
static const int countLeadingZerosHigh[] = {
|
||||
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
int shiftCount = 0;
|
||||
if (a < 0x10000) {
|
||||
shiftCount += 16;
|
||||
@ -295,7 +336,7 @@ static int countLeadingZeros32(Bit32u a)
|
||||
shiftCount += 8;
|
||||
a <<= 8;
|
||||
}
|
||||
shiftCount += countLeadingZerosHigh[ a>>24 ];
|
||||
shiftCount += countLeadingZeros8[a>>24];
|
||||
return shiftCount;
|
||||
}
|
||||
|
||||
@ -307,13 +348,13 @@ static int countLeadingZeros32(Bit32u a)
|
||||
BX_CPP_INLINE int countLeadingZeros64(Bit64u a)
|
||||
{
|
||||
int shiftCount = 0;
|
||||
if (a < ((Bit64u) 1)<<32) {
|
||||
if (a < BX_CONST64(0x100000000)) {
|
||||
shiftCount += 32;
|
||||
}
|
||||
else {
|
||||
a >>= 32;
|
||||
}
|
||||
shiftCount += countLeadingZeros32((int)(a));
|
||||
shiftCount += countLeadingZeros32((Bit32u)(a));
|
||||
return shiftCount;
|
||||
}
|
||||
|
||||
@ -327,8 +368,7 @@ BX_CPP_INLINE int countLeadingZeros64(Bit64u a)
|
||||
| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE void
|
||||
shift128Right(Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
|
||||
BX_CPP_INLINE void shift128Right(Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
|
||||
{
|
||||
Bit64u z0, z1;
|
||||
int negCount = (-count) & 63;
|
||||
@ -360,9 +400,7 @@ BX_CPP_INLINE void
|
||||
| the locations pointed to by `z0Ptr' and `z1Ptr'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE void
|
||||
shift128RightJamming(
|
||||
Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
|
||||
BX_CPP_INLINE void shift128RightJamming(Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
|
||||
{
|
||||
Bit64u z0, z1;
|
||||
int negCount = (-count) & 63;
|
||||
@ -398,9 +436,7 @@ BX_CPP_INLINE void
|
||||
| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE void
|
||||
shortShift128Left(
|
||||
Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
|
||||
BX_CPP_INLINE void shortShift128Left(Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
|
||||
{
|
||||
*z1Ptr = a1<<count;
|
||||
*z0Ptr = (count == 0) ? a0 : (a0<<count) | (a1>>((-count) & 63));
|
||||
|
@ -148,6 +148,102 @@ Bit64s roundAndPackInt64(int zSign, Bit64u absZ0, Bit64u absZ1, float_status_t &
|
||||
return z;
|
||||
}
|
||||
|
||||
#ifdef FLOAT16
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Normalizes the subnormal half-precision floating-point value represented
|
||||
| by the denormalized significand `aSig'. The normalized exponent and
|
||||
| significand are stored at the locations pointed to by `zExpPtr' and
|
||||
| `zSigPtr', respectively.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
void normalizeFloat16Subnormal(Bit16u aSig, Bit16s *zExpPtr, Bit16u *zSigPtr)
|
||||
{
|
||||
int shiftCount = countLeadingZeros16(aSig) - 5;
|
||||
*zSigPtr = aSig<<shiftCount;
|
||||
*zExpPtr = 1 - shiftCount;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
|
||||
| and significand `zSig', and returns the proper half-precision floating-
|
||||
| point value corresponding to the abstract input. Ordinarily, the abstract
|
||||
| value is simply rounded and packed into the half-precision format, with
|
||||
| the inexact exception raised if the abstract input cannot be represented
|
||||
| exactly. However, if the abstract value is too large, the overflow and
|
||||
| inexact exceptions are raised and an infinity or maximal finite value is
|
||||
| returned. If the abstract value is too small, the input value is rounded to
|
||||
| a subnormal number, and the underflow and inexact exceptions are raised if
|
||||
| the abstract input cannot be represented exactly as a subnormal single-
|
||||
| precision floating-point number.
|
||||
| The input significand `zSig' has its binary point between bits 14
|
||||
| and 13, which is 4 bits to the left of the usual location. This shifted
|
||||
| significand must be normalized or smaller. If `zSig' is not normalized,
|
||||
| `zExp' must be 0; in that case, the result returned is a subnormal number,
|
||||
| and it must not require rounding. In the usual case that `zSig' is
|
||||
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
|
||||
| The handling of underflow and overflow follows the IEC/IEEE Standard for
|
||||
| Binary Floating-Point Arithmetic.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
float16 roundAndPackFloat16(int zSign, Bit16s zExp, Bit16u zSig, float_status_t &status)
|
||||
{
|
||||
Bit16s roundIncrement, roundBits, roundMask;
|
||||
|
||||
int roundingMode = get_float_rounding_mode(status);
|
||||
int roundNearestEven = (roundingMode == float_round_nearest_even);
|
||||
roundIncrement = 8;
|
||||
roundMask = 0xF;
|
||||
|
||||
if (! roundNearestEven) {
|
||||
if (roundingMode == float_round_to_zero) roundIncrement = 0;
|
||||
else {
|
||||
roundIncrement = roundMask;
|
||||
if (zSign) {
|
||||
if (roundingMode == float_round_up) roundIncrement = 0;
|
||||
}
|
||||
else {
|
||||
if (roundingMode == float_round_down) roundIncrement = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
roundBits = zSig & roundMask;
|
||||
if (0x1D <= (Bit16u) zExp) {
|
||||
if ((0x1D < zExp)
|
||||
|| ((zExp == 0x1D)
|
||||
&& ((Bit16s) (zSig + roundIncrement) < 0)))
|
||||
{
|
||||
float_raise(status, float_flag_overflow);
|
||||
if (roundBits || float_exception_masked(status, float_flag_overflow)) {
|
||||
float_raise(status, float_flag_inexact);
|
||||
}
|
||||
return packFloat16(zSign, 0x1F, 0) - (roundIncrement == 0);
|
||||
}
|
||||
if (zExp < 0) {
|
||||
int isTiny = (zExp < -1) || (zSig + roundIncrement < 0x8000);
|
||||
zSig = shift16RightJamming(zSig, -zExp);
|
||||
zExp = 0;
|
||||
roundBits = zSig & roundMask;
|
||||
if (isTiny) {
|
||||
if(get_flush_underflow_to_zero(status)) {
|
||||
float_raise(status, float_flag_underflow | float_flag_inexact);
|
||||
return packFloat16(zSign, 0, 0);
|
||||
}
|
||||
if (roundBits || !float_exception_masked(status, float_flag_underflow)) {
|
||||
float_raise(status, float_flag_underflow);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (roundBits) float_raise(status, float_flag_inexact);
|
||||
Bit16u zSigRound = ((zSig + roundIncrement) & ~roundMask) >> 4;
|
||||
zSigRound &= ~(((roundBits ^ 0x10) == 0) & roundNearestEven);
|
||||
if (zSigRound == 0) zExp = 0;
|
||||
return packFloat16(zSign, zExp, zSigRound);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Normalizes the subnormal single-precision floating-point value represented
|
||||
| by the denormalized significand `aSig'. The normalized exponent and
|
||||
|
@ -63,6 +63,43 @@ Bit32s roundAndPackInt32(int zSign, Bit64u absZ, float_status_t &status);
|
||||
|
||||
Bit64s roundAndPackInt64(int zSign, Bit64u absZ0, Bit64u absZ1, float_status_t &status);
|
||||
|
||||
#ifdef FLOAT16
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Normalizes the subnormal half-precision floating-point value represented
|
||||
| by the denormalized significand `aSig'. The normalized exponent and
|
||||
| significand are stored at the locations pointed to by `zExpPtr' and
|
||||
| `zSigPtr', respectively.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
void normalizeFloat16Subnormal(Bit16u aSig, Bit16s *zExpPtr, Bit16u *zSigPtr);
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
|
||||
| and significand `zSig', and returns the proper half-precision floating-
|
||||
| point value corresponding to the abstract input. Ordinarily, the abstract
|
||||
| value is simply rounded and packed into the half-precision format, with
|
||||
| the inexact exception raised if the abstract input cannot be represented
|
||||
| exactly. However, if the abstract value is too large, the overflow and
|
||||
| inexact exceptions are raised and an infinity or maximal finite value is
|
||||
| returned. If the abstract value is too small, the input value is rounded to
|
||||
| a subnormal number, and the underflow and inexact exceptions are raised if
|
||||
| the abstract input cannot be represented exactly as a subnormal single-
|
||||
| precision floating-point number.
|
||||
| The input significand `zSig' has its binary point between bits 14
|
||||
| and 13, which is 4 bits to the left of the usual location. This shifted
|
||||
| significand must be normalized or smaller. If `zSig' is not normalized,
|
||||
| `zExp' must be 0; in that case, the result returned is a subnormal number,
|
||||
| and it must not require rounding. In the usual case that `zSig' is
|
||||
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
|
||||
| The handling of underflow and overflow follows the IEC/IEEE Standard for
|
||||
| Binary Floating-Point Arithmetic.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
float16 roundAndPackFloat16(int zSign, Bit16s zExp, Bit16u zSig, float_status_t &status);
|
||||
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Normalizes the subnormal single-precision floating-point value represented
|
||||
| by the denormalized significand `aSig'. The normalized exponent and
|
||||
|
@ -50,6 +50,128 @@ typedef struct {
|
||||
Bit64u hi, lo;
|
||||
} commonNaNT;
|
||||
|
||||
#ifdef FLOAT16
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| The pattern for a default generated half-precision NaN.
|
||||
*----------------------------------------------------------------------------*/
|
||||
#define float16_default_nan 0xFE00
|
||||
|
||||
#define float16_fraction extractFloat16Frac
|
||||
#define float16_exp extractFloat16Exp
|
||||
#define float16_sign extractFloat16Sign
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the fraction bits of the half-precision floating-point value `a'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE Bit16u extractFloat16Frac(float16 a)
|
||||
{
|
||||
return a & 0x3FF;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the exponent bits of the half-precision floating-point value `a'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE Bit16s extractFloat16Exp(float16 a)
|
||||
{
|
||||
return (a>>10) & 0x1F;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the sign bit of the half-precision floating-point value `a'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE int extractFloat16Sign(float16 a)
|
||||
{
|
||||
return a>>15;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
|
||||
| single-precision floating-point value, returning the result. After being
|
||||
| shifted into the proper positions, the three fields are simply added
|
||||
| together to form the result. This means that any integer portion of `zSig'
|
||||
| will be added into the exponent. Since a properly normalized significand
|
||||
| will have an integer portion equal to 1, the `zExp' input should be 1 less
|
||||
| than the desired result exponent whenever `zSig' is a complete, normalized
|
||||
| significand.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE float16 packFloat16(int zSign, int zExp, Bit16u zSig)
|
||||
{
|
||||
return (((Bit16u) zSign)<<15) + (((Bit16u) zExp)<<10) + zSig;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns 1 if the half-precision floating-point value `a' is a NaN;
|
||||
| otherwise returns 0.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE int float16_is_nan(float16 a)
|
||||
{
|
||||
return (0xF800 < (Bit16u) (a<<1));
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns 1 if the half-precision floating-point value `a' is a signaling
|
||||
| NaN; otherwise returns 0.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE int float16_is_signaling_nan(float16 a)
|
||||
{
|
||||
return (((a>>9) & 0x3F) == 0x3E) && (a & 0x1FF);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns 1 if the half-precision floating-point value `a' is denormal;
|
||||
| otherwise returns 0.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE int float16_is_denormal(float16 a)
|
||||
{
|
||||
return (extractFloat16Exp(a) == 0) && (extractFloat16Frac(a) != 0);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Convert float16 denormals to zero.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE float16 float16_denormal_to_zero(float16 a)
|
||||
{
|
||||
if (float16_is_denormal(a)) a &= 0x8000;
|
||||
return a;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the result of converting the half-precision floating-point NaN
|
||||
| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
|
||||
| exception is raised.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE commonNaNT float16ToCommonNaN(float16 a, float_status_t &status)
|
||||
{
|
||||
commonNaNT z;
|
||||
if (float16_is_signaling_nan(a)) float_raise(status, float_flag_invalid);
|
||||
z.sign = a>>15;
|
||||
z.lo = 0;
|
||||
z.hi = ((Bit64u) a)<<54;
|
||||
return z;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the result of converting the canonical NaN `a' to the half-
|
||||
| precision floating-point format.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
BX_CPP_INLINE float16 commonNaNToFloat16(commonNaNT a)
|
||||
{
|
||||
return (((Bit16u) a.sign)<<15) | 0x7E00 | (Bit16u)(a.hi>>54);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| The pattern for a default generated single-precision NaN.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
@ -37,11 +37,15 @@ these four paragraphs for those parts of this code that are retained.
|
||||
#ifndef _SOFTFLOAT_H_
|
||||
#define _SOFTFLOAT_H_
|
||||
|
||||
#define FLOAT16
|
||||
#define FLOATX80
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Software IEC/IEEE floating-point types.
|
||||
*----------------------------------------------------------------------------*/
|
||||
#ifdef FLOAT16
|
||||
typedef Bit16u float16;
|
||||
#endif
|
||||
typedef Bit32u float32;
|
||||
typedef Bit64u float64;
|
||||
|
||||
@ -220,6 +224,7 @@ int float32_compare_quiet(float32, float32, float_status_t &status);
|
||||
float_class_t float32_class(float32);
|
||||
int float32_is_signaling_nan(float32);
|
||||
int float32_is_nan(float32);
|
||||
int float32_is_denormal(float32);
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Software IEC/IEEE double-precision conversion routines.
|
||||
@ -246,6 +251,17 @@ int float64_compare_quiet(float64, float64, float_status_t &status);
|
||||
float_class_t float64_class(float64);
|
||||
int float64_is_signaling_nan(float64);
|
||||
int float64_is_nan(float64);
|
||||
int float64_is_denormal(float64);
|
||||
|
||||
#ifdef FLOAT16
|
||||
float32 float16_to_float32(float16, float_status_t &status);
|
||||
float16 float32_to_float16(float32, float_status_t &status);
|
||||
|
||||
float_class_t float16_class(float16);
|
||||
int float16_is_signaling_nan(float16);
|
||||
int float16_is_nan(float16);
|
||||
int float16_is_denormal(float16);
|
||||
#endif
|
||||
|
||||
#ifdef FLOATX80
|
||||
|
||||
|
@ -54,6 +54,7 @@
|
||||
#define BXPN_CPUID_XSAVE "cpuid.xsave"
|
||||
#define BXPN_CPUID_XSAVEOPT "cpuid.xsaveopt"
|
||||
#define BXPN_CPUID_AVX "cpuid.avx"
|
||||
#define BXPN_CPUID_AVX_F16CVT "cpuid.avx_f16c"
|
||||
#define BXPN_CPUID_APIC "cpuid.apic"
|
||||
#define BXPN_CPUID_MWAIT "cpuid.mwait"
|
||||
#define BXPN_CPUID_MWAIT_IS_NOP "cpuid.mwait_is_nop"
|
||||
|
Loading…
Reference in New Issue
Block a user