implemented AVX float16 convert instructions

This commit is contained in:
Stanislav Shwartsman 2011-06-11 13:12:32 +00:00
parent d7f19bcfd4
commit 8399dee24c
19 changed files with 464 additions and 44 deletions

View File

@ -179,6 +179,10 @@ cpu: count=1, ips=50000000, reset_on_triple_fault=1, ignore_bad_msrs=1, msrs="ms
# Select AVX instruction set support.
# This option exists only if Bochs compiled with --enable-avx option.
#
# AVX_F16C:
# Select AVX float16 convert instructions support.
# This option exists only if Bochs compiled with --enable-avx option.
#
# 1G_PAGES:
# Enable 1G page size support in long mode.
# This option exists only if Bochs compiled with x86-64 support.

View File

@ -10,6 +10,8 @@ Bochs repository moved to the SVN version control !
- Added support for AVX instruction set emulation, to enable configure with
--enable-avx option.
When compiled in, AVX still could be disabled using .bochsrc CPUID option.
- Added emulation of AVX float16 convert instructions, the feature can be
enabled using .bochsrc CPUID option.
- Updated/Fixed instrumentation callbacks.
- Bugfixes for CPU emulation correctness and stability.

View File

@ -40,6 +40,7 @@ cpuid
xsave
xsaveopt
avx
avx_f16c
apic
1g_pages
pcid

View File

@ -419,6 +419,10 @@ void bx_init_options()
"avx", "Support for AVX instruction set",
"Support for AVX instruction set",
0);
new bx_param_bool_c(cpuid_param,
"avx_f16c", "Support for AVX F16 convert instructions",
"Support for AVX F16 convert instructions",
0);
#endif
#if BX_SUPPORT_X86_64
new bx_param_bool_c(cpuid_param,
@ -2714,6 +2718,10 @@ static int parse_line_formatted(const char *context, int num_params, char *param
if (parse_param_bool(params[i], 4, BXPN_CPUID_AVX) < 0) {
PARSE_ERR(("%s: cpuid directive malformed.", context));
}
} else if (!strncmp(params[i], "avx_f16c=", 9)) {
if (parse_param_bool(params[i], 9, BXPN_CPUID_AVX_F16CVT) < 0) {
PARSE_ERR(("%s: cpuid directive malformed.", context));
}
#endif
#if BX_SUPPORT_X86_64
} else if (!strncmp(params[i], "1g_pages=", 9)) {
@ -3951,7 +3959,9 @@ int bx_write_configuration(const char *rc, int overwrite)
SIM->get_param_bool(BXPN_CPUID_MOVBE)->get(),
SIM->get_param_bool(BXPN_CPUID_SMEP)->get());
#if BX_SUPPORT_AVX
fprintf(fp, ", avx=%d", SIM->get_param_bool(BXPN_CPUID_AVX)->get());
fprintf(fp, ", avx=%d, avx_f16c=%d",
SIM->get_param_bool(BXPN_CPUID_AVX)->get(),
SIM->get_param_bool(BXPN_CPUID_AVX_F16CVT)->get());
#endif
#if BX_SUPPORT_X86_64
fprintf(fp, ", 1g_pages=%d, pcid=%d, fsgsbase=%d",

View File

@ -1643,4 +1643,68 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::VDPPS_VpsWpsIbR(bxInstruction_c *i)
#endif
}
/* Opcode: VEX.66.0F.3A.13 (VEX.W=0) */
void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPH2PS_VpsWpsIbR(bxInstruction_c *i)
{
BxPackedAvxRegister result;
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
unsigned len = i->getVL();
float_status_t status_word;
mxcsr_to_softfloat_status_word(status_word, MXCSR);
for (unsigned n=0; n < (4*len); n++) {
if (MXCSR.get_DAZ())
op.xmm16u(n) = float16_denormal_to_zero(op.xmm16u(n));
result.avx32u(n) = float16_to_float32(op.xmm16u(n), status_word);
}
check_exceptionsSSE(status_word.float_exception_flags);
BX_WRITE_AVX_REGZ(i->nnn(), result, len);
}
/* Opcode: VEX.66.0F.3A.1D (VEX.W=0) */
void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PH_WpsVpsIb(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->nnn());
BxPackedXmmRegister result;
result.xmm64u(1) = 0; /* clear upper part of the result for case of VL128 */
float_status_t status_word;
mxcsr_to_softfloat_status_word(status_word, MXCSR);
unsigned len = i->getVL();
Bit8u control = i->Ib();
// override MXCSR rounding mode with control coming from imm8
if ((control & 0x4) == 0)
status_word.float_rounding_mode = control & 0x3;
for (unsigned n=0; n < (4*len); n++) {
if (MXCSR.get_DAZ())
op.avx32u(n) = float32_denormal_to_zero(op.avx32u(n));
result.xmm16u(n) = float32_to_float16(op.avx32u(n), status_word);
}
check_exceptionsSSE(status_word.float_exception_flags);
if (i->modC0()) {
BX_WRITE_XMM_REG_CLEAR_HIGH(i->rm(), result);
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
if (len == BX_VL256)
write_virtual_dqword(i->seg(), eaddr, &result);
else
write_virtual_qword(i->seg(), eaddr, result.xmm64u(0));
}
}
#endif

View File

@ -2500,6 +2500,9 @@ public: // for now...
BX_SMF void VMASKMOVPD_VpdMpd(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void VMASKMOVPS_MpsVps(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void VMASKMOVPD_MpdVpd(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void VCVTPH2PS_VpsWpsIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void VCVTPS2PH_WpsVpsIb(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
#endif
BX_SMF void CMPXCHG_XBTS(bxInstruction_c *) BX_CPP_AttrRegparmN(1);

View File

@ -98,7 +98,7 @@ Bit32u BX_CPU_C::get_extended_cpuid_features(void)
// [26:26] XSAVE extensions support
// [27:27] OSXSAVE support
// [28:28] AVX extensions support
// [29:29] F16C - Float16 conversion support
// [29:29] AVX F16C - Float16 conversion support
// [30:30] RDRAND instruction
// [31:31] reserved
@ -154,6 +154,9 @@ Bit32u BX_CPU_C::get_extended_cpuid_features(void)
#if BX_SUPPORT_AVX
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_CPU_AVX))
features |= BX_CPUID_EXT_AVX;
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_CPU_AVX_F16C))
features |= BX_CPUID_EXT_AVX_F16C;
#endif
return features;
@ -1138,6 +1141,16 @@ void BX_CPU_C::init_isa_features_bitmask(void)
return;
}
}
static bx_bool avx_f16c_enabled = SIM->get_param_bool(BXPN_CPUID_AVX_F16CVT)->get();
if (avx_f16c_enabled) {
if (! avx_enabled) {
BX_PANIC(("PANIC: Float16 convert emulation requires AVX support !"));
return;
}
features_bitmask |= BX_CPU_AVX_F16C;
}
#endif
#if BX_SUPPORT_VMX

View File

@ -132,7 +132,7 @@ struct cpuid_function_t {
// [26:26] XSAVE extensions support
// [27:27] OSXSAVE support
// [28:28] AVX extensions support
// [29:29] F16C - Float16 conversion support
// [29:29] AVX F16C - Float16 conversion support
// [30:30] RDRAND instruction
// [31:31] reserved
@ -165,7 +165,7 @@ struct cpuid_function_t {
#define BX_CPUID_EXT_XSAVE (1 << 26)
#define BX_CPUID_EXT_OSXSAVE (1 << 27)
#define BX_CPUID_EXT_AVX (1 << 28)
#define BX_CPUID_EXT_F16C (1 << 29)
#define BX_CPUID_EXT_AVX_F16C (1 << 29)
#define BX_CPUID_EXT_RDRAND (1 << 30)
#define BX_CPUID_EXT_RESERVED31 (1 << 31)

View File

@ -936,7 +936,7 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3] = {
/* 10 */ { 0, BX_IA_ERROR },
/* 11 */ { 0, BX_IA_ERROR },
/* 12 */ { 0, BX_IA_ERROR },
/* 13 */ { 0, BX_IA_ERROR },
/* 13 */ { BxPrefixSSE66 | BxVexW0 | BxImmediate_Ib, BX_IA_VCVTPH2PS_VpsWpsIb },
/* 14 */ { BxPrefixSSE66 | BxImmediate_Ib, BX_IA_VPEXTRB_EbdVdqIb },
/* 15 */ { BxPrefixSSE66 | BxImmediate_Ib, BX_IA_VPEXTRW_EwdVdqIb },
/* 16 */ { BxSplitVexW | BxImmediate_Ib, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a16 },
@ -946,7 +946,7 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3] = {
/* 1A */ { 0, BX_IA_ERROR },
/* 1B */ { 0, BX_IA_ERROR },
/* 1C */ { 0, BX_IA_ERROR },
/* 1D */ { 0, BX_IA_ERROR },
/* 1D */ { BxPrefixSSE66 | BxVexW0 | BxImmediate_Ib, BX_IA_VCVTPS2PH_WpsVpsIb },
/* 1E */ { 0, BX_IA_ERROR },
/* 1F */ { 0, BX_IA_ERROR },
/* 20 */ { BxPrefixSSE66 | BxImmediate_Ib, BX_IA_VPINSRB_VdqEbIb },

View File

@ -1645,5 +1645,8 @@ bx_define_opcode(BX_IA_VPEXTRQ_EqVdqIb, &BX_CPU_C::PEXTRD_EdVdqIbM, &BX_CPU_C::P
bx_define_opcode(BX_IA_VPINSRQ_VdqEqIb, &BX_CPU_C::PINSRD_VdqEdIbM, &BX_CPU_C::PINSRD_VdqEdIbR, BX_CPU_AVX | BX_CPU_X86_64, BX_PREPARE_AVX | BX_VEX_L128)
bx_define_opcode(BX_IA_VMOVQ_VdqEq, &BX_CPU_C::MOVQ_VqWqM, &BX_CPU_C::MOVQ_VdqEqR, BX_CPU_AVX | BX_CPU_X86_64, BX_PREPARE_AVX | BX_VEX_L128)
bx_define_opcode(BX_IA_VMOVQ_EqVq, &BX_CPU_C::MOVLPS_MqVps, &BX_CPU_C::MOVQ_EqVqR, BX_CPU_AVX | BX_CPU_X86_64, BX_PREPARE_AVX | BX_VEX_L128)
bx_define_opcode(BX_IA_VCVTPH2PS_VpsWpsIb, &BX_CPU_C::LOAD_VectorQ, &BX_CPU_C::VCVTPH2PS_VpsWpsIbR, BX_CPU_AVX_F16C, BX_PREPARE_AVX | BX_VEX_NO_VVV | BX_VEX_L128 | BX_VEX_L256)
bx_define_opcode(BX_IA_VCVTPS2PH_WpsVpsIb, &BX_CPU_C::VCVTPS2PH_WpsVpsIb, &BX_CPU_C::VCVTPS2PH_WpsVpsIb, BX_CPU_AVX_F16C, BX_PREPARE_AVX | BX_VEX_NO_VVV | BX_VEX_L128 | BX_VEX_L256)
#endif
// AVX

View File

@ -3097,6 +3097,11 @@ This option exists only if Bochs compiled with BX_CPU_LEVEL >= 6.
Select AVX instruction set support.
This option exists only if Bochs compiled with --enable-avx option.
</para>
<para><command>avx_f16c</command></para>
<para>
Select AVX float16 convert instructions support.
This option exists only if Bochs compiled with --enable-avx option.
</para>
<para><command>1g_pages</command></para>
<para>
Enable 1G page size support in long mode.

View File

@ -234,6 +234,11 @@ avx:
Select AVX instruction set support.
This option exists only if Bochs compiled with --enable-avx option.
avx_f16c:
Select AVX float16 convert instructions support.
This option exists only if Bochs compiled with --enable-avx option.
1g_pages:
Enable 1G page size support in long mode.

View File

@ -44,7 +44,7 @@ BX_INCDIRS = -I.. -I$(srcdir)/.. -I../@INSTRUMENT_DIR@ -I$(srcdir)/../@INSTRUMEN
OBJS = ferr.o fpu.o fpu_arith.o fpu_compare.o fpu_const.o \
fpu_load_store.o fpu_misc.o fpu_trans.o fpu_tags.o \
fprem.o fsincos.o f2xm1.o fyl2x.o fpatan.o \
softfloat.o softfloatx80.o softfloat-specialize.o \
softfloat.o softfloatx80.o softfloat16.o softfloat-specialize.o \
softfloat-round-pack.o poly.o
all: libfpu.a
@ -176,3 +176,5 @@ softfloat-specialize.o: softfloat-specialize.@CPP_SUFFIX@ softfloat.h ../config.
softfloat-specialize.h softfloat-macros.h
softfloatx80.o: softfloatx80.@CPP_SUFFIX@ softfloatx80.h softfloat.h ../config.h \
softfloat-specialize.h softfloat-round-pack.h softfloat-macros.h
softfloat16.o: softfloat16.@CPP_SUFFIX@ softfloat.h ../config.h \
softfloat-specialize.h softfloat-round-pack.h softfloat-macros.h

View File

@ -35,13 +35,37 @@ these four paragraphs for those parts of this code that are retained.
#ifndef _SOFTFLOAT_MACROS_H_
#define _SOFTFLOAT_MACROS_H_
/*----------------------------------------------------------------------------
| Shifts `a' right by the number of bits given in `count'. If any nonzero
| bits are shifted off, they are ``jammed'' into the least significant bit of
| the result by setting the least significant bit to 1. The value of `count'
| can be arbitrarily large; in particular, if `count' is greater than 16, the
| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE Bit16u shift16RightJamming(Bit16u a, int count)
{
Bit16u z;
if (count == 0) {
z = a;
}
else if (count < 16) {
z = (a>>count) | ((a<<((-count) & 15)) != 0);
}
else {
z = (a != 0);
}
return z;
}
/*----------------------------------------------------------------------------
| Shifts `a' right by the number of bits given in `count'. If any nonzero
| bits are shifted off, they are ``jammed'' into the least significant bit of
| the result by setting the least significant bit to 1. The value of `count'
| can be arbitrarily large; in particular, if `count' is greater than 32, the
| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
| The result is stored in the location pointed to by `zPtr'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE Bit32u shift32RightJamming(Bit32u a, int count)
@ -67,7 +91,6 @@ BX_CPP_INLINE Bit32u shift32RightJamming(Bit32u a, int count)
| the result by setting the least significant bit to 1. The value of `count'
| can be arbitrarily large; in particular, if `count' is greater than 64, the
| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
| The result is stored in the location pointed to by `zPtr'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE Bit64u shift64RightJamming(Bit64u a, int count)
@ -104,9 +127,7 @@ BX_CPP_INLINE Bit64u shift64RightJamming(Bit64u a, int count)
| described above, and is returned at the location pointed to by `z1Ptr'.)
*----------------------------------------------------------------------------*/
BX_CPP_INLINE void
shift64ExtraRightJamming(
Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
BX_CPP_INLINE void shift64ExtraRightJamming(Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
{
Bit64u z0, z1;
int negCount = (-count) & 63;
@ -139,8 +160,7 @@ BX_CPP_INLINE void
| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE void
add128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1, Bit64u *z0Ptr, Bit64u *z1Ptr)
BX_CPP_INLINE void add128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1, Bit64u *z0Ptr, Bit64u *z1Ptr)
{
Bit64u z1 = a1 + b1;
*z1Ptr = z1;
@ -261,31 +281,52 @@ static Bit32u estimateSqrt32(Bit16s aExp, Bit32u a)
}
#endif
static const int countLeadingZeros8[] = {
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
#ifdef FLOAT16
/*----------------------------------------------------------------------------
| Returns the number of leading 0 bits before the most-significant 1 bit of
| `a'. If `a' is zero, 16 is returned.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int countLeadingZeros16(Bit16u a)
{
int shiftCount = 0;
if (a < 0x100) {
shiftCount += 8;
a <<= 8;
}
shiftCount += countLeadingZeros8[a>>8];
return shiftCount;
}
#endif
/*----------------------------------------------------------------------------
| Returns the number of leading 0 bits before the most-significant 1 bit of
| `a'. If `a' is zero, 32 is returned.
*----------------------------------------------------------------------------*/
static int countLeadingZeros32(Bit32u a)
BX_CPP_INLINE int countLeadingZeros32(Bit32u a)
{
static const int countLeadingZerosHigh[] = {
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
int shiftCount = 0;
if (a < 0x10000) {
shiftCount += 16;
@ -295,7 +336,7 @@ static int countLeadingZeros32(Bit32u a)
shiftCount += 8;
a <<= 8;
}
shiftCount += countLeadingZerosHigh[ a>>24 ];
shiftCount += countLeadingZeros8[a>>24];
return shiftCount;
}
@ -307,13 +348,13 @@ static int countLeadingZeros32(Bit32u a)
BX_CPP_INLINE int countLeadingZeros64(Bit64u a)
{
int shiftCount = 0;
if (a < ((Bit64u) 1)<<32) {
if (a < BX_CONST64(0x100000000)) {
shiftCount += 32;
}
else {
a >>= 32;
}
shiftCount += countLeadingZeros32((int)(a));
shiftCount += countLeadingZeros32((Bit32u)(a));
return shiftCount;
}
@ -327,8 +368,7 @@ BX_CPP_INLINE int countLeadingZeros64(Bit64u a)
| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE void
shift128Right(Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
BX_CPP_INLINE void shift128Right(Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
{
Bit64u z0, z1;
int negCount = (-count) & 63;
@ -360,9 +400,7 @@ BX_CPP_INLINE void
| the locations pointed to by `z0Ptr' and `z1Ptr'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE void
shift128RightJamming(
Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
BX_CPP_INLINE void shift128RightJamming(Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
{
Bit64u z0, z1;
int negCount = (-count) & 63;
@ -398,9 +436,7 @@ BX_CPP_INLINE void
| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE void
shortShift128Left(
Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
BX_CPP_INLINE void shortShift128Left(Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
{
*z1Ptr = a1<<count;
*z0Ptr = (count == 0) ? a0 : (a0<<count) | (a1>>((-count) & 63));

View File

@ -148,6 +148,102 @@ Bit64s roundAndPackInt64(int zSign, Bit64u absZ0, Bit64u absZ1, float_status_t &
return z;
}
#ifdef FLOAT16
/*----------------------------------------------------------------------------
| Normalizes the subnormal half-precision floating-point value represented
| by the denormalized significand `aSig'. The normalized exponent and
| significand are stored at the locations pointed to by `zExpPtr' and
| `zSigPtr', respectively.
*----------------------------------------------------------------------------*/
void normalizeFloat16Subnormal(Bit16u aSig, Bit16s *zExpPtr, Bit16u *zSigPtr)
{
int shiftCount = countLeadingZeros16(aSig) - 5;
*zSigPtr = aSig<<shiftCount;
*zExpPtr = 1 - shiftCount;
}
/*----------------------------------------------------------------------------
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
| and significand `zSig', and returns the proper half-precision floating-
| point value corresponding to the abstract input. Ordinarily, the abstract
| value is simply rounded and packed into the half-precision format, with
| the inexact exception raised if the abstract input cannot be represented
| exactly. However, if the abstract value is too large, the overflow and
| inexact exceptions are raised and an infinity or maximal finite value is
| returned. If the abstract value is too small, the input value is rounded to
| a subnormal number, and the underflow and inexact exceptions are raised if
| the abstract input cannot be represented exactly as a subnormal single-
| precision floating-point number.
| The input significand `zSig' has its binary point between bits 14
| and 13, which is 4 bits to the left of the usual location. This shifted
| significand must be normalized or smaller. If `zSig' is not normalized,
| `zExp' must be 0; in that case, the result returned is a subnormal number,
| and it must not require rounding. In the usual case that `zSig' is
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
| The handling of underflow and overflow follows the IEC/IEEE Standard for
| Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
float16 roundAndPackFloat16(int zSign, Bit16s zExp, Bit16u zSig, float_status_t &status)
{
Bit16s roundIncrement, roundBits, roundMask;
int roundingMode = get_float_rounding_mode(status);
int roundNearestEven = (roundingMode == float_round_nearest_even);
roundIncrement = 8;
roundMask = 0xF;
if (! roundNearestEven) {
if (roundingMode == float_round_to_zero) roundIncrement = 0;
else {
roundIncrement = roundMask;
if (zSign) {
if (roundingMode == float_round_up) roundIncrement = 0;
}
else {
if (roundingMode == float_round_down) roundIncrement = 0;
}
}
}
roundBits = zSig & roundMask;
if (0x1D <= (Bit16u) zExp) {
if ((0x1D < zExp)
|| ((zExp == 0x1D)
&& ((Bit16s) (zSig + roundIncrement) < 0)))
{
float_raise(status, float_flag_overflow);
if (roundBits || float_exception_masked(status, float_flag_overflow)) {
float_raise(status, float_flag_inexact);
}
return packFloat16(zSign, 0x1F, 0) - (roundIncrement == 0);
}
if (zExp < 0) {
int isTiny = (zExp < -1) || (zSig + roundIncrement < 0x8000);
zSig = shift16RightJamming(zSig, -zExp);
zExp = 0;
roundBits = zSig & roundMask;
if (isTiny) {
if(get_flush_underflow_to_zero(status)) {
float_raise(status, float_flag_underflow | float_flag_inexact);
return packFloat16(zSign, 0, 0);
}
if (roundBits || !float_exception_masked(status, float_flag_underflow)) {
float_raise(status, float_flag_underflow);
}
}
}
}
if (roundBits) float_raise(status, float_flag_inexact);
Bit16u zSigRound = ((zSig + roundIncrement) & ~roundMask) >> 4;
zSigRound &= ~(((roundBits ^ 0x10) == 0) & roundNearestEven);
if (zSigRound == 0) zExp = 0;
return packFloat16(zSign, zExp, zSigRound);
}
#endif
/*----------------------------------------------------------------------------
| Normalizes the subnormal single-precision floating-point value represented
| by the denormalized significand `aSig'. The normalized exponent and

View File

@ -63,6 +63,43 @@ Bit32s roundAndPackInt32(int zSign, Bit64u absZ, float_status_t &status);
Bit64s roundAndPackInt64(int zSign, Bit64u absZ0, Bit64u absZ1, float_status_t &status);
#ifdef FLOAT16
/*----------------------------------------------------------------------------
| Normalizes the subnormal half-precision floating-point value represented
| by the denormalized significand `aSig'. The normalized exponent and
| significand are stored at the locations pointed to by `zExpPtr' and
| `zSigPtr', respectively.
*----------------------------------------------------------------------------*/
void normalizeFloat16Subnormal(Bit16u aSig, Bit16s *zExpPtr, Bit16u *zSigPtr);
/*----------------------------------------------------------------------------
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
| and significand `zSig', and returns the proper half-precision floating-
| point value corresponding to the abstract input. Ordinarily, the abstract
| value is simply rounded and packed into the half-precision format, with
| the inexact exception raised if the abstract input cannot be represented
| exactly. However, if the abstract value is too large, the overflow and
| inexact exceptions are raised and an infinity or maximal finite value is
| returned. If the abstract value is too small, the input value is rounded to
| a subnormal number, and the underflow and inexact exceptions are raised if
| the abstract input cannot be represented exactly as a subnormal single-
| precision floating-point number.
| The input significand `zSig' has its binary point between bits 14
| and 13, which is 4 bits to the left of the usual location. This shifted
| significand must be normalized or smaller. If `zSig' is not normalized,
| `zExp' must be 0; in that case, the result returned is a subnormal number,
| and it must not require rounding. In the usual case that `zSig' is
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
| The handling of underflow and overflow follows the IEC/IEEE Standard for
| Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
float16 roundAndPackFloat16(int zSign, Bit16s zExp, Bit16u zSig, float_status_t &status);
#endif
/*----------------------------------------------------------------------------
| Normalizes the subnormal single-precision floating-point value represented
| by the denormalized significand `aSig'. The normalized exponent and

View File

@ -50,6 +50,128 @@ typedef struct {
Bit64u hi, lo;
} commonNaNT;
#ifdef FLOAT16
/*----------------------------------------------------------------------------
| The pattern for a default generated half-precision NaN.
*----------------------------------------------------------------------------*/
#define float16_default_nan 0xFE00
#define float16_fraction extractFloat16Frac
#define float16_exp extractFloat16Exp
#define float16_sign extractFloat16Sign
/*----------------------------------------------------------------------------
| Returns the fraction bits of the half-precision floating-point value `a'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE Bit16u extractFloat16Frac(float16 a)
{
return a & 0x3FF;
}
/*----------------------------------------------------------------------------
| Returns the exponent bits of the half-precision floating-point value `a'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE Bit16s extractFloat16Exp(float16 a)
{
return (a>>10) & 0x1F;
}
/*----------------------------------------------------------------------------
| Returns the sign bit of the half-precision floating-point value `a'.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int extractFloat16Sign(float16 a)
{
return a>>15;
}
/*----------------------------------------------------------------------------
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
| single-precision floating-point value, returning the result. After being
| shifted into the proper positions, the three fields are simply added
| together to form the result. This means that any integer portion of `zSig'
| will be added into the exponent. Since a properly normalized significand
| will have an integer portion equal to 1, the `zExp' input should be 1 less
| than the desired result exponent whenever `zSig' is a complete, normalized
| significand.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE float16 packFloat16(int zSign, int zExp, Bit16u zSig)
{
return (((Bit16u) zSign)<<15) + (((Bit16u) zExp)<<10) + zSig;
}
/*----------------------------------------------------------------------------
| Returns 1 if the half-precision floating-point value `a' is a NaN;
| otherwise returns 0.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int float16_is_nan(float16 a)
{
return (0xF800 < (Bit16u) (a<<1));
}
/*----------------------------------------------------------------------------
| Returns 1 if the half-precision floating-point value `a' is a signaling
| NaN; otherwise returns 0.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int float16_is_signaling_nan(float16 a)
{
return (((a>>9) & 0x3F) == 0x3E) && (a & 0x1FF);
}
/*----------------------------------------------------------------------------
| Returns 1 if the half-precision floating-point value `a' is denormal;
| otherwise returns 0.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE int float16_is_denormal(float16 a)
{
return (extractFloat16Exp(a) == 0) && (extractFloat16Frac(a) != 0);
}
/*----------------------------------------------------------------------------
| Convert float16 denormals to zero.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE float16 float16_denormal_to_zero(float16 a)
{
if (float16_is_denormal(a)) a &= 0x8000;
return a;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the half-precision floating-point NaN
| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE commonNaNT float16ToCommonNaN(float16 a, float_status_t &status)
{
commonNaNT z;
if (float16_is_signaling_nan(a)) float_raise(status, float_flag_invalid);
z.sign = a>>15;
z.lo = 0;
z.hi = ((Bit64u) a)<<54;
return z;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the canonical NaN `a' to the half-
| precision floating-point format.
*----------------------------------------------------------------------------*/
BX_CPP_INLINE float16 commonNaNToFloat16(commonNaNT a)
{
return (((Bit16u) a.sign)<<15) | 0x7E00 | (Bit16u)(a.hi>>54);
}
#endif
/*----------------------------------------------------------------------------
| The pattern for a default generated single-precision NaN.
*----------------------------------------------------------------------------*/

View File

@ -37,11 +37,15 @@ these four paragraphs for those parts of this code that are retained.
#ifndef _SOFTFLOAT_H_
#define _SOFTFLOAT_H_
#define FLOAT16
#define FLOATX80
/*----------------------------------------------------------------------------
| Software IEC/IEEE floating-point types.
*----------------------------------------------------------------------------*/
#ifdef FLOAT16
typedef Bit16u float16;
#endif
typedef Bit32u float32;
typedef Bit64u float64;
@ -220,6 +224,7 @@ int float32_compare_quiet(float32, float32, float_status_t &status);
float_class_t float32_class(float32);
int float32_is_signaling_nan(float32);
int float32_is_nan(float32);
int float32_is_denormal(float32);
/*----------------------------------------------------------------------------
| Software IEC/IEEE double-precision conversion routines.
@ -246,6 +251,17 @@ int float64_compare_quiet(float64, float64, float_status_t &status);
float_class_t float64_class(float64);
int float64_is_signaling_nan(float64);
int float64_is_nan(float64);
int float64_is_denormal(float64);
#ifdef FLOAT16
float32 float16_to_float32(float16, float_status_t &status);
float16 float32_to_float16(float32, float_status_t &status);
float_class_t float16_class(float16);
int float16_is_signaling_nan(float16);
int float16_is_nan(float16);
int float16_is_denormal(float16);
#endif
#ifdef FLOATX80

View File

@ -54,6 +54,7 @@
#define BXPN_CPUID_XSAVE "cpuid.xsave"
#define BXPN_CPUID_XSAVEOPT "cpuid.xsaveopt"
#define BXPN_CPUID_AVX "cpuid.avx"
#define BXPN_CPUID_AVX_F16CVT "cpuid.avx_f16c"
#define BXPN_CPUID_APIC "cpuid.apic"
#define BXPN_CPUID_MWAIT "cpuid.mwait"
#define BXPN_CPUID_MWAIT_IS_NOP "cpuid.mwait_is_nop"