fixed bug in floatx80_class function

mmx code optimizations
This commit is contained in:
Stanislav Shwartsman 2004-03-03 21:09:08 +00:00
parent f552a1c861
commit 1706beda30
4 changed files with 68 additions and 106 deletions

View File

@ -100,89 +100,74 @@ extern "C"
#if BX_SUPPORT_MMX
typedef union {
Bit8u u8;
Bit8s s8;
} MMX_BYTE;
typedef union bx_packed_mmx_reg_t {
Bit8s _sbyte[8];
Bit16s _s16[4];
Bit32s _s32[2];
Bit64s _s64;
Bit8u _ubyte[8];
Bit16u _u16[4];
Bit32u _u32[2];
Bit64u _u64;
} BxPackedMmxRegister;
typedef union {
Bit16u u16;
Bit16s s16;
struct {
#ifdef BX_BIG_ENDIAN
MMX_BYTE hi;
MMX_BYTE lo;
#define mmx64s(i) _s64
#define mmx32s(i) _s32[1 - (i)]
#define mmx16s(i) _s16[3 - (i)]
#define mmxsbyte(i) _sbyte[7 - (i)]
#define mmxubyte(i) _ubyte[7 - (i)]
#define mmx16u(i) _u16[3 - (i)]
#define mmx32u(i) _u32[1 - (i)]
#define mmx64u _u64
#else
MMX_BYTE lo;
MMX_BYTE hi;
#define mmx64s(i) _s64
#define mmx32s(i) _s32[(i)]
#define mmx16s(i) _s16[(i)]
#define mmxsbyte(i) _sbyte[(i)]
#define mmxubyte(i) _ubyte[(i)]
#define mmx16u(i) _u16[(i)]
#define mmx32u(i) _u32[(i)]
#define mmx64u _u64
#endif
} bytes;
} MMX_WORD;
typedef union {
Bit32u u32;
Bit32s s32;
struct {
#ifdef BX_BIG_ENDIAN
MMX_WORD hi;
MMX_WORD lo;
#else
MMX_WORD lo;
MMX_WORD hi;
#endif
} words;
} MMX_DWORD;
/* for compatability with already written code */
#define MMXSB0(reg) (reg.mmxsbyte(0))
#define MMXSB1(reg) (reg.mmxsbyte(1))
#define MMXSB2(reg) (reg.mmxsbyte(2))
#define MMXSB3(reg) (reg.mmxsbyte(3))
#define MMXSB4(reg) (reg.mmxsbyte(4))
#define MMXSB5(reg) (reg.mmxsbyte(5))
#define MMXSB6(reg) (reg.mmxsbyte(6))
#define MMXSB7(reg) (reg.mmxsbyte(7))
typedef union {
Bit64u u64;
Bit64s s64;
struct {
#ifdef BX_BIG_ENDIAN
MMX_DWORD hi;
MMX_DWORD lo;
#else
MMX_DWORD lo;
MMX_DWORD hi;
#endif
} dwords;
} MMX_QWORD, BxPackedMmxRegister;
#define MMXSW0(reg) (reg.mmx16s(0))
#define MMXSW1(reg) (reg.mmx16s(1))
#define MMXSW2(reg) (reg.mmx16s(2))
#define MMXSW3(reg) (reg.mmx16s(3))
#define MMXSB0(reg) (reg.dwords.lo.words.lo.bytes.lo.s8)
#define MMXSB1(reg) (reg.dwords.lo.words.lo.bytes.hi.s8)
#define MMXSB2(reg) (reg.dwords.lo.words.hi.bytes.lo.s8)
#define MMXSB3(reg) (reg.dwords.lo.words.hi.bytes.hi.s8)
#define MMXSB4(reg) (reg.dwords.hi.words.lo.bytes.lo.s8)
#define MMXSB5(reg) (reg.dwords.hi.words.lo.bytes.hi.s8)
#define MMXSB6(reg) (reg.dwords.hi.words.hi.bytes.lo.s8)
#define MMXSB7(reg) (reg.dwords.hi.words.hi.bytes.hi.s8)
#define MMXSD0(reg) (reg.mmx32s(0))
#define MMXSD1(reg) (reg.mmx32s(1))
#define MMXUB0(reg) (reg.dwords.lo.words.lo.bytes.lo.u8)
#define MMXUB1(reg) (reg.dwords.lo.words.lo.bytes.hi.u8)
#define MMXUB2(reg) (reg.dwords.lo.words.hi.bytes.lo.u8)
#define MMXUB3(reg) (reg.dwords.lo.words.hi.bytes.hi.u8)
#define MMXUB4(reg) (reg.dwords.hi.words.lo.bytes.lo.u8)
#define MMXUB5(reg) (reg.dwords.hi.words.lo.bytes.hi.u8)
#define MMXUB6(reg) (reg.dwords.hi.words.hi.bytes.lo.u8)
#define MMXUB7(reg) (reg.dwords.hi.words.hi.bytes.hi.u8)
#define MMXSW0(reg) (reg.dwords.lo.words.lo.s16)
#define MMXSW1(reg) (reg.dwords.lo.words.hi.s16)
#define MMXSW2(reg) (reg.dwords.hi.words.lo.s16)
#define MMXSW3(reg) (reg.dwords.hi.words.hi.s16)
#define MMXUW0(reg) (reg.dwords.lo.words.lo.u16)
#define MMXUW1(reg) (reg.dwords.lo.words.hi.u16)
#define MMXUW2(reg) (reg.dwords.hi.words.lo.u16)
#define MMXUW3(reg) (reg.dwords.hi.words.hi.u16)
#define MMXSQ(reg) (reg.mmx64s)
#define MMXUQ(reg) (reg.mmx64u)
#define MMXSD0(reg) (reg.dwords.lo.s32)
#define MMXSD1(reg) (reg.dwords.hi.s32)
#define MMXUD0(reg) (reg.mmx32u(0))
#define MMXUD1(reg) (reg.mmx32u(1))
#define MMXUD0(reg) (reg.dwords.lo.u32)
#define MMXUD1(reg) (reg.dwords.hi.u32)
#define MMXUW0(reg) (reg.mmx16u(0))
#define MMXUW1(reg) (reg.mmx16u(1))
#define MMXUW2(reg) (reg.mmx16u(2))
#define MMXUW3(reg) (reg.mmx16u(3))
#define MMXSQ(reg) (reg.s64)
#define MMXUQ(reg) (reg.u64)
#define MMXUB0(reg) (reg.mmxubyte(0))
#define MMXUB1(reg) (reg.mmxubyte(1))
#define MMXUB2(reg) (reg.mmxubyte(2))
#define MMXUB3(reg) (reg.mmxubyte(3))
#define MMXUB4(reg) (reg.mmxubyte(4))
#define MMXUB5(reg) (reg.mmxubyte(5))
#define MMXUB6(reg) (reg.mmxubyte(6))
#define MMXUB7(reg) (reg.mmxubyte(7))
// Endian Host byte order Guest (x86) byte order
// ======================================================

View File

@ -111,13 +111,6 @@ void BX_CPU_C::prepareFPU2MMX(void)
#endif
#if BX_SUPPORT_3DNOW || BX_SUPPORT_SSE >= 1
BX_CPP_INLINE Bit16u SelectMmxWord(BxPackedMmxRegister mmx, unsigned index)
{
return (MMXUQ(mmx) >> ((index & 0x3) * 16)) & 0xffff;
}
#endif
/* 0F 60 */
void BX_CPU_C::PUNPCKLBW_PqQd(bxInstruction_c *i)
{
@ -577,10 +570,10 @@ void BX_CPU_C::PSHUFW_PqQqIb(bxInstruction_c *i)
read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op);
}
MMXUW0(result) = SelectMmxWord(op, order);
MMXUW1(result) = SelectMmxWord(op, order >> 2);
MMXUW2(result) = SelectMmxWord(op, order >> 4);
MMXUW3(result) = SelectMmxWord(op, order >> 6);
MMXUW0(result) = op.mmx16u((order) & 0x3);
MMXUW1(result) = op.mmx16u((order>>2) & 0x3);
MMXUW2(result) = op.mmx16u((order>>4) & 0x3);
MMXUW3(result) = op.mmx16u((order>>6) & 0x3);
/* now write result back to destination */
BX_WRITE_MMX_REG(i->nnn(), result);
@ -770,22 +763,7 @@ void BX_CPU_C::PINSRW_PqEdIb(bxInstruction_c *i)
read_virtual_word(i->seg(), RMAddr(i), &op2);
}
Bit8u count = i->Ib() & 0x3;
switch(count) {
case 0:
MMXUW0(op1) = op2;
break;
case 1:
MMXUW1(op1) = op2;
break;
case 2:
MMXUW2(op1) = op2;
break;
case 3:
MMXUW3(op1) = op2;
break;
}
op1.xmm16u(i->Ib() & 0x3) = op2;
/* now write result back to destination */
BX_WRITE_MMX_REG(i->nnn(), op1);
@ -802,8 +780,7 @@ void BX_CPU_C::PEXTRW_PqEdIb(bxInstruction_c *i)
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op = BX_READ_MMX_REG(i->rm());
Bit8u count = i->Ib() & 0x3;
Bit32u result = (Bit32u) SelectMmxWord(op, count);
Bit32u result = (Bit32u) op.mmx16u(i->Ib() & 0x3);
BX_WRITE_32BIT_REG(i->nnn(), result);
#else

View File

@ -2526,7 +2526,7 @@ float_class_t floatx80_class(floatx80 a)
int aSign = extractFloatx80Sign(a);
if(aExp == 0x7fff) {
if (aSig == 0)
if (((Bit64u) (aSig<<1)) == 0)
return (aSign) ? float_negative_inf : float_positive_inf;
return float_NaN;

View File

@ -215,15 +215,15 @@ int float64_is_signaling_nan(float64);
| Software IEC/IEEE floating-point types.
*----------------------------------------------------------------------------*/
#ifdef BIG_ENDIAN
typedef struct {
struct floatx80 { // do not allow 16-byte extension of the structure
Bit16u exp;
Bit64u fraction;
} floatx80;
} GCC_ATTRIBUTE((aligned(1), packed));
#else
typedef struct {
struct floatx80 { // do not allow 16-byte extension of the structure
Bit64u fraction;
Bit16u exp;
} floatx80;
} GCC_ATTRIBUTE((aligned(1), packed));
#endif
/*----------------------------------------------------------------------------