From 1706beda30694d79f4ae69bcf2a409077d325a48 Mon Sep 17 00:00:00 2001 From: Stanislav Shwartsman Date: Wed, 3 Mar 2004 21:09:08 +0000 Subject: [PATCH] fixed bug in floatx80_class function mmx code optimizations --- bochs/cpu/i387.h | 129 ++++++++++++++++++----------------------- bochs/cpu/mmx.cc | 35 ++--------- bochs/cpu/softfloat.cc | 2 +- bochs/cpu/softfloat.h | 8 +-- 4 files changed, 68 insertions(+), 106 deletions(-) diff --git a/bochs/cpu/i387.h b/bochs/cpu/i387.h index 3a9c9c273..13ef34551 100644 --- a/bochs/cpu/i387.h +++ b/bochs/cpu/i387.h @@ -100,89 +100,74 @@ extern "C" #if BX_SUPPORT_MMX -typedef union { - Bit8u u8; - Bit8s s8; -} MMX_BYTE; +typedef union bx_packed_mmx_reg_t { + Bit8s _sbyte[8]; + Bit16s _s16[4]; + Bit32s _s32[2]; + Bit64s _s64; + Bit8u _ubyte[8]; + Bit16u _u16[4]; + Bit32u _u32[2]; + Bit64u _u64; +} BxPackedMmxRegister; -typedef union { - Bit16u u16; - Bit16s s16; - struct { #ifdef BX_BIG_ENDIAN - MMX_BYTE hi; - MMX_BYTE lo; +#define mmx64s(i) _s64 +#define mmx32s(i) _s32[1 - (i)] +#define mmx16s(i) _s16[3 - (i)] +#define mmxsbyte(i) _sbyte[7 - (i)] +#define mmxubyte(i) _ubyte[7 - (i)] +#define mmx16u(i) _u16[3 - (i)] +#define mmx32u(i) _u32[1 - (i)] +#define mmx64u _u64 #else - MMX_BYTE lo; - MMX_BYTE hi; +#define mmx64s(i) _s64 +#define mmx32s(i) _s32[(i)] +#define mmx16s(i) _s16[(i)] +#define mmxsbyte(i) _sbyte[(i)] +#define mmxubyte(i) _ubyte[(i)] +#define mmx16u(i) _u16[(i)] +#define mmx32u(i) _u32[(i)] +#define mmx64u _u64 #endif - } bytes; -} MMX_WORD; -typedef union { - Bit32u u32; - Bit32s s32; - struct { -#ifdef BX_BIG_ENDIAN - MMX_WORD hi; - MMX_WORD lo; -#else - MMX_WORD lo; - MMX_WORD hi; -#endif - } words; -} MMX_DWORD; +/* for compatability with already written code */ +#define MMXSB0(reg) (reg.mmxsbyte(0)) +#define MMXSB1(reg) (reg.mmxsbyte(1)) +#define MMXSB2(reg) (reg.mmxsbyte(2)) +#define MMXSB3(reg) (reg.mmxsbyte(3)) +#define MMXSB4(reg) (reg.mmxsbyte(4)) +#define MMXSB5(reg) (reg.mmxsbyte(5)) +#define MMXSB6(reg) (reg.mmxsbyte(6)) +#define MMXSB7(reg) (reg.mmxsbyte(7)) -typedef union { - Bit64u u64; - Bit64s s64; - struct { -#ifdef BX_BIG_ENDIAN - MMX_DWORD hi; - MMX_DWORD lo; -#else - MMX_DWORD lo; - MMX_DWORD hi; -#endif - } dwords; -} MMX_QWORD, BxPackedMmxRegister; +#define MMXSW0(reg) (reg.mmx16s(0)) +#define MMXSW1(reg) (reg.mmx16s(1)) +#define MMXSW2(reg) (reg.mmx16s(2)) +#define MMXSW3(reg) (reg.mmx16s(3)) -#define MMXSB0(reg) (reg.dwords.lo.words.lo.bytes.lo.s8) -#define MMXSB1(reg) (reg.dwords.lo.words.lo.bytes.hi.s8) -#define MMXSB2(reg) (reg.dwords.lo.words.hi.bytes.lo.s8) -#define MMXSB3(reg) (reg.dwords.lo.words.hi.bytes.hi.s8) -#define MMXSB4(reg) (reg.dwords.hi.words.lo.bytes.lo.s8) -#define MMXSB5(reg) (reg.dwords.hi.words.lo.bytes.hi.s8) -#define MMXSB6(reg) (reg.dwords.hi.words.hi.bytes.lo.s8) -#define MMXSB7(reg) (reg.dwords.hi.words.hi.bytes.hi.s8) +#define MMXSD0(reg) (reg.mmx32s(0)) +#define MMXSD1(reg) (reg.mmx32s(1)) -#define MMXUB0(reg) (reg.dwords.lo.words.lo.bytes.lo.u8) -#define MMXUB1(reg) (reg.dwords.lo.words.lo.bytes.hi.u8) -#define MMXUB2(reg) (reg.dwords.lo.words.hi.bytes.lo.u8) -#define MMXUB3(reg) (reg.dwords.lo.words.hi.bytes.hi.u8) -#define MMXUB4(reg) (reg.dwords.hi.words.lo.bytes.lo.u8) -#define MMXUB5(reg) (reg.dwords.hi.words.lo.bytes.hi.u8) -#define MMXUB6(reg) (reg.dwords.hi.words.hi.bytes.lo.u8) -#define MMXUB7(reg) (reg.dwords.hi.words.hi.bytes.hi.u8) - -#define MMXSW0(reg) (reg.dwords.lo.words.lo.s16) -#define MMXSW1(reg) (reg.dwords.lo.words.hi.s16) -#define MMXSW2(reg) (reg.dwords.hi.words.lo.s16) -#define MMXSW3(reg) (reg.dwords.hi.words.hi.s16) - -#define MMXUW0(reg) (reg.dwords.lo.words.lo.u16) -#define MMXUW1(reg) (reg.dwords.lo.words.hi.u16) -#define MMXUW2(reg) (reg.dwords.hi.words.lo.u16) -#define MMXUW3(reg) (reg.dwords.hi.words.hi.u16) +#define MMXSQ(reg) (reg.mmx64s) +#define MMXUQ(reg) (reg.mmx64u) -#define MMXSD0(reg) (reg.dwords.lo.s32) -#define MMXSD1(reg) (reg.dwords.hi.s32) +#define MMXUD0(reg) (reg.mmx32u(0)) +#define MMXUD1(reg) (reg.mmx32u(1)) -#define MMXUD0(reg) (reg.dwords.lo.u32) -#define MMXUD1(reg) (reg.dwords.hi.u32) +#define MMXUW0(reg) (reg.mmx16u(0)) +#define MMXUW1(reg) (reg.mmx16u(1)) +#define MMXUW2(reg) (reg.mmx16u(2)) +#define MMXUW3(reg) (reg.mmx16u(3)) -#define MMXSQ(reg) (reg.s64) -#define MMXUQ(reg) (reg.u64) +#define MMXUB0(reg) (reg.mmxubyte(0)) +#define MMXUB1(reg) (reg.mmxubyte(1)) +#define MMXUB2(reg) (reg.mmxubyte(2)) +#define MMXUB3(reg) (reg.mmxubyte(3)) +#define MMXUB4(reg) (reg.mmxubyte(4)) +#define MMXUB5(reg) (reg.mmxubyte(5)) +#define MMXUB6(reg) (reg.mmxubyte(6)) +#define MMXUB7(reg) (reg.mmxubyte(7)) // Endian Host byte order Guest (x86) byte order // ====================================================== diff --git a/bochs/cpu/mmx.cc b/bochs/cpu/mmx.cc index 133c8a1ee..74c763b3d 100644 --- a/bochs/cpu/mmx.cc +++ b/bochs/cpu/mmx.cc @@ -111,13 +111,6 @@ void BX_CPU_C::prepareFPU2MMX(void) #endif -#if BX_SUPPORT_3DNOW || BX_SUPPORT_SSE >= 1 -BX_CPP_INLINE Bit16u SelectMmxWord(BxPackedMmxRegister mmx, unsigned index) -{ - return (MMXUQ(mmx) >> ((index & 0x3) * 16)) & 0xffff; -} -#endif - /* 0F 60 */ void BX_CPU_C::PUNPCKLBW_PqQd(bxInstruction_c *i) { @@ -577,10 +570,10 @@ void BX_CPU_C::PSHUFW_PqQqIb(bxInstruction_c *i) read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op); } - MMXUW0(result) = SelectMmxWord(op, order); - MMXUW1(result) = SelectMmxWord(op, order >> 2); - MMXUW2(result) = SelectMmxWord(op, order >> 4); - MMXUW3(result) = SelectMmxWord(op, order >> 6); + MMXUW0(result) = op.mmx16u((order) & 0x3); + MMXUW1(result) = op.mmx16u((order>>2) & 0x3); + MMXUW2(result) = op.mmx16u((order>>4) & 0x3); + MMXUW3(result) = op.mmx16u((order>>6) & 0x3); /* now write result back to destination */ BX_WRITE_MMX_REG(i->nnn(), result); @@ -770,22 +763,7 @@ void BX_CPU_C::PINSRW_PqEdIb(bxInstruction_c *i) read_virtual_word(i->seg(), RMAddr(i), &op2); } - Bit8u count = i->Ib() & 0x3; - - switch(count) { - case 0: - MMXUW0(op1) = op2; - break; - case 1: - MMXUW1(op1) = op2; - break; - case 2: - MMXUW2(op1) = op2; - break; - case 3: - MMXUW3(op1) = op2; - break; - } + op1.xmm16u(i->Ib() & 0x3) = op2; /* now write result back to destination */ BX_WRITE_MMX_REG(i->nnn(), op1); @@ -802,8 +780,7 @@ void BX_CPU_C::PEXTRW_PqEdIb(bxInstruction_c *i) BX_CPU_THIS_PTR prepareMMX(); BxPackedMmxRegister op = BX_READ_MMX_REG(i->rm()); - Bit8u count = i->Ib() & 0x3; - Bit32u result = (Bit32u) SelectMmxWord(op, count); + Bit32u result = (Bit32u) op.mmx16u(i->Ib() & 0x3); BX_WRITE_32BIT_REG(i->nnn(), result); #else diff --git a/bochs/cpu/softfloat.cc b/bochs/cpu/softfloat.cc index d57562b50..0c09feecd 100755 --- a/bochs/cpu/softfloat.cc +++ b/bochs/cpu/softfloat.cc @@ -2526,7 +2526,7 @@ float_class_t floatx80_class(floatx80 a) int aSign = extractFloatx80Sign(a); if(aExp == 0x7fff) { - if (aSig == 0) + if (((Bit64u) (aSig<<1)) == 0) return (aSign) ? float_negative_inf : float_positive_inf; return float_NaN; diff --git a/bochs/cpu/softfloat.h b/bochs/cpu/softfloat.h index e6e880df5..222559ea1 100755 --- a/bochs/cpu/softfloat.h +++ b/bochs/cpu/softfloat.h @@ -215,15 +215,15 @@ int float64_is_signaling_nan(float64); | Software IEC/IEEE floating-point types. *----------------------------------------------------------------------------*/ #ifdef BIG_ENDIAN -typedef struct { +struct floatx80 { // do not allow 16-byte extension of the structure Bit16u exp; Bit64u fraction; -} floatx80; +} GCC_ATTRIBUTE((aligned(1), packed)); #else -typedef struct { +struct floatx80 { // do not allow 16-byte extension of the structure Bit64u fraction; Bit16u exp; -} floatx80; +} GCC_ATTRIBUTE((aligned(1), packed)); #endif /*----------------------------------------------------------------------------