From 03eac6401324cf89df25f330cf7473dc59d8bd07 Mon Sep 17 00:00:00 2001 From: Stanislav Shwartsman Date: Thu, 6 Apr 2006 18:30:05 +0000 Subject: [PATCH] Added decoding of new SSE4 instructions (recently published in Intel docs) At least CPUID detects them correctly The code is never tested (still) ! (but should work fine) --- bochs/cpu/cpu.h | 47 +++- bochs/cpu/cpuid.cc | 9 +- bochs/cpu/fetchdecode.cc | 49 +++- bochs/cpu/fetchdecode.h | 219 ++++++++++++++- bochs/cpu/fetchdecode64.cc | 55 +++- bochs/cpu/mmx.cc | 493 ++++++++++++++++++++++++++++++++- bochs/cpu/smm.cc | 6 +- bochs/cpu/sse.cc | 554 ++++++++++++++++++++++++++++++++++++- 8 files changed, 1409 insertions(+), 23 deletions(-) diff --git a/bochs/cpu/cpu.h b/bochs/cpu/cpu.h index d0590310b..e58437e51 100644 --- a/bochs/cpu/cpu.h +++ b/bochs/cpu/cpu.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////// -// $Id: cpu.h,v 1.276 2006-04-05 17:31:30 sshwarts Exp $ +// $Id: cpu.h,v 1.277 2006-04-06 18:30:02 sshwarts Exp $ ///////////////////////////////////////////////////////////////////////// // // Copyright (C) 2001 MandrakeSoft S.A. @@ -1743,7 +1743,7 @@ public: // for now... BX_SMF void FISTP_QWORD_INTEGER(bxInstruction_c *); BX_SMF void FBSTP_PACKED_BCD(bxInstruction_c *); - BX_SMF void FISTTP16(bxInstruction_c *); + BX_SMF void FISTTP16(bxInstruction_c *); // SSE3 BX_SMF void FISTTP32(bxInstruction_c *); BX_SMF void FISTTP64(bxInstruction_c *); @@ -2202,7 +2202,7 @@ public: // for now... #define MOVNTDQ_MdqVdq /* 66 0f e7 */ MOVNTPD_MdqVpd /* 66 0f 2b */ #endif // #ifdef StandAloneDecoder - /* PNI */ + /* SSE3 */ BX_SMF void MOVDDUP_VpdWq(bxInstruction_c *i); BX_SMF void MOVSLDUP_VpsWps(bxInstruction_c *i); BX_SMF void MOVSHDUP_VpsWps(bxInstruction_c *i); @@ -2213,7 +2213,43 @@ public: // for now... BX_SMF void ADDSUBPD_VpdWpd(bxInstruction_c *i); BX_SMF void ADDSUBPS_VpsWps(bxInstruction_c *i); BX_SMF void LDDQU_VdqMdq(bxInstruction_c *i); - /* PNI */ + /* SSE3 */ + +#if BX_SUPPORT_SSE >= 4 + BX_SMF void PSHUFB_PqQq(bxInstruction_c *i); + BX_SMF void PHADDW_PqQq(bxInstruction_c *i); + BX_SMF void PHADDD_PqQq(bxInstruction_c *i); + BX_SMF void PHADDSW_PqQq(bxInstruction_c *i); + BX_SMF void PMADDUBSW_PqQq(bxInstruction_c *i); + BX_SMF void PHSUBSW_PqQq(bxInstruction_c *i); + BX_SMF void PHSUBW_PqQq(bxInstruction_c *i); + BX_SMF void PHSUBD_PqQq(bxInstruction_c *i); + BX_SMF void PSIGNB_PqQq(bxInstruction_c *i); + BX_SMF void PSIGNW_PqQq(bxInstruction_c *i); + BX_SMF void PSIGND_PqQq(bxInstruction_c *i); + BX_SMF void PMULHRSW_PqQq(bxInstruction_c *i); + BX_SMF void PABSB_PqQq(bxInstruction_c *i); + BX_SMF void PABSW_PqQq(bxInstruction_c *i); + BX_SMF void PABSD_PqQq(bxInstruction_c *i); + BX_SMF void PALIGNR_PqQqIb(bxInstruction_c *i); + + BX_SMF void PSHUFB_VdqWdq(bxInstruction_c *i); + BX_SMF void PHADDW_VdqWdq(bxInstruction_c *i); + BX_SMF void PHADDD_VdqWdq(bxInstruction_c *i); + BX_SMF void PHADDSW_VdqWdq(bxInstruction_c *i); + BX_SMF void PMADDUBSW_VdqWdq(bxInstruction_c *i); + BX_SMF void PHSUBSW_VdqWdq(bxInstruction_c *i); + BX_SMF void PHSUBW_VdqWdq(bxInstruction_c *i); + BX_SMF void PHSUBD_VdqWdq(bxInstruction_c *i); + BX_SMF void PSIGNB_VdqWdq(bxInstruction_c *i); + BX_SMF void PSIGNW_VdqWdq(bxInstruction_c *i); + BX_SMF void PSIGND_VdqWdq(bxInstruction_c *i); + BX_SMF void PMULHRSW_VdqWdq(bxInstruction_c *i); + BX_SMF void PABSB_VdqWdq(bxInstruction_c *i); + BX_SMF void PABSW_VdqWdq(bxInstruction_c *i); + BX_SMF void PABSD_VdqWdq(bxInstruction_c *i); + BX_SMF void PALIGNR_VdqWdqIb(bxInstruction_c *i); +#endif BX_SMF void CMPXCHG_XBTS(bxInstruction_c *); BX_SMF void CMPXCHG_IBTS(bxInstruction_c *); @@ -3291,10 +3327,13 @@ IMPLEMENT_EFLAG_ACCESSOR (TF, 8) #define BxSplitMod11b 0x0030 // Group encoding: 011 #define BxFPGroup 0x0040 // Group encoding: 100 #define BxRMGroup 0x0050 // Group encoding: 101 +#define Bx3ByteOpIndex 0x0060 // Group encoding: 110 +#define Bx3ByteOpTable 0x0070 // Group encoding: 111 #define BxPrefix 0x0080 // bit 7 #define BxAnother 0x0100 // bit 8 #define BxLockable 0x0200 // bit 9 +#define Bx3ByteOpcode 0x0400 // bit 10 #define BxRepeatable 0x0800 // bit 11 (pass through to metaInfo field) #define BxRepeatableZF 0x1000 // bit 12 (pass through to metaInfo field) diff --git a/bochs/cpu/cpuid.cc b/bochs/cpu/cpuid.cc index 03e1713b1..9259a0102 100755 --- a/bochs/cpu/cpuid.cc +++ b/bochs/cpu/cpuid.cc @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////// -// $Id: cpuid.cc,v 1.35 2006-04-05 17:31:30 sshwarts Exp $ +// $Id: cpuid.cc,v 1.36 2006-04-06 18:30:03 sshwarts Exp $ ///////////////////////////////////////////////////////////////////////// // // Copyright (C) 2001 MandrakeSoft S.A. @@ -122,11 +122,14 @@ Bit32u BX_CPU_C::get_extended_cpuid_features() Bit32u features = 0; #if BX_SUPPORT_SSE >= 3 - features |= 0x01; // report SSE3 (PNI) + features |= 0x1; // report SSE3 +#endif +#if BX_SUPPORT_SSE >= 4 + features |= (1<<9); // report SSE4 #endif #if BX_SUPPORT_X86_64 - features |= (1<<13); // support CMPXCHG16B + features |= (1<<13); // support CMPXCHG16B #endif return features; diff --git a/bochs/cpu/fetchdecode.cc b/bochs/cpu/fetchdecode.cc index 54af529ec..5a2fbe780 100644 --- a/bochs/cpu/fetchdecode.cc +++ b/bochs/cpu/fetchdecode.cc @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////// -// $Id: fetchdecode.cc,v 1.91 2006-04-05 20:52:37 sshwarts Exp $ +// $Id: fetchdecode.cc,v 1.92 2006-04-06 18:30:03 sshwarts Exp $ ///////////////////////////////////////////////////////////////////////// // // Copyright (C) 2001 MandrakeSoft S.A. @@ -750,9 +750,17 @@ static BxOpcodeInfo_t BxOpcodeInfo[512*2] = { /* 0F 35 */ { 0, &BX_CPU_C::SYSEXIT }, /* 0F 36 */ { 0, &BX_CPU_C::BxError }, /* 0F 37 */ { 0, &BX_CPU_C::BxError }, +#if BX_SUPPORT_SSE >= 4 + /* 0F 38 */ { BxAnother | Bx3ByteOpcode | Bx3ByteOpTable, NULL, BxOpcode3ByteTableA4 }, // 3-byte escape +#else /* 0F 38 */ { 0, &BX_CPU_C::BxError }, +#endif /* 0F 39 */ { 0, &BX_CPU_C::BxError }, +#if BX_SUPPORT_SSE >= 4 + /* 0F 3A */ { BxAnother | Bx3ByteOpcode | Bx3ByteOpTable, NULL, BxOpcode3ByteTableA5 }, // 3-byte escape +#else /* 0F 3A */ { 0, &BX_CPU_C::BxError }, +#endif /* 0F 3B */ { 0, &BX_CPU_C::BxError }, /* 0F 3C */ { 0, &BX_CPU_C::BxError }, /* 0F 3D */ { 0, &BX_CPU_C::BxError }, @@ -1300,9 +1308,17 @@ static BxOpcodeInfo_t BxOpcodeInfo[512*2] = { /* 0F 35 */ { 0, &BX_CPU_C::SYSEXIT }, /* 0F 36 */ { 0, &BX_CPU_C::BxError }, /* 0F 37 */ { 0, &BX_CPU_C::BxError }, +#if BX_SUPPORT_SSE >= 4 + /* 0F 38 */ { BxAnother | Bx3ByteOpcode, NULL, BxOpcode3ByteTableA4 }, // 3-byte escape +#else /* 0F 38 */ { 0, &BX_CPU_C::BxError }, +#endif /* 0F 39 */ { 0, &BX_CPU_C::BxError }, +#if BX_SUPPORT_SSE >= 4 + /* 0F 3A */ { BxAnother | Bx3ByteOpcode, NULL, BxOpcode3ByteTableA5 }, // 3-byte escape +#else /* 0F 3A */ { 0, &BX_CPU_C::BxError }, +#endif /* 0F 3B */ { 0, &BX_CPU_C::BxError }, /* 0F 3C */ { 0, &BX_CPU_C::BxError }, /* 0F 3D */ { 0, &BX_CPU_C::BxError }, @@ -1510,8 +1526,11 @@ BX_CPU_C::fetchDecode(Bit8u *iptr, bxInstruction_c *instruction, unsigned remain bx_bool is_32, lock=0; unsigned b1, b2, ilen=0, attr, os_32; unsigned imm_mode, offset; - unsigned rm, mod=0, nnn=0; + unsigned rm = 0, mod=0, nnn=0; unsigned sse_prefix; +#if BX_SUPPORT_SSE >= 4 + unsigned b3 = 0; +#endif #define SSE_PREFIX_NONE 0 #define SSE_PREFIX_66 1 @@ -1628,6 +1647,18 @@ fetch_b1: attr = BxOpcodeInfo[b1+offset].Attr; instruction->setRepAttr(attr & (BxRepeatable | BxRepeatableZF)); +#if BX_SUPPORT_SSE >= 4 + // handle 3-byte escape + if (attr & Bx3ByteOpcode) { + if (ilen < remain) { + ilen++; + b3 = *iptr++; + } + else + return(0); + } +#endif + if (attr & BxAnother) { // opcode requires modrm byte if (ilen < remain) { @@ -1795,6 +1826,14 @@ modrm_done: case BxRMGroup: OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[rm]); break; +#if BX_SUPPORT_SSE >= 4 + case Bx3ByteOpTable: + OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[b3 >> 4]); + break; + case Bx3ByteOpIndex: + OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[b3 & 15]); + break; +#endif case BxPrefixSSE: { /* For SSE opcodes, look into another 4 entries table @@ -1973,7 +2012,7 @@ modrm_done: } #if BX_SUPPORT_3DNOW - if(b1 == 0x10f) { // 3DNow! instruction set + if(b1 == 0x10f) { instruction->execute = Bx3DNowOpcodeInfo[instruction->modRMForm.Ib].ExecutePtr; } #endif @@ -1983,12 +2022,10 @@ modrm_done: return(1); } - void -BX_CPU_C::BxError(bxInstruction_c *i) +void BX_CPU_C::BxError(bxInstruction_c *i) { BX_INFO(("BxError: instruction with opcode=0x%x", i->b1())); BX_INFO(("mod was %x, nnn was %u, rm was %u", i->mod(), i->nnn(), i->rm())); - BX_INFO(("WARNING: Encountered an unknown instruction (signalling illegal instruction)")); BX_CPU_THIS_PTR UndefinedOpcode(i); diff --git a/bochs/cpu/fetchdecode.h b/bochs/cpu/fetchdecode.h index 337ef166a..c4cc0ccbf 100755 --- a/bochs/cpu/fetchdecode.h +++ b/bochs/cpu/fetchdecode.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////// -// $Id: fetchdecode.h,v 1.23 2006-04-05 20:52:39 sshwarts Exp $ +// $Id: fetchdecode.h,v 1.24 2006-04-06 18:30:04 sshwarts Exp $ ///////////////////////////////////////////////////////////////////////// // // Copyright (c) 2003 Stanislav Shwartsman @@ -1846,4 +1846,221 @@ static BxOpcodeInfo_t BxOpcodeGroupSSE_G1407[4] = { /* F3 */ { 0, &BX_CPU_C::BxError } }; +#if BX_SUPPORT_SSE >= 4 + +static BxOpcodeInfo_t BxOpcodeGroupSSE_0f3800[4] = { + /* -- */ { 0, &BX_CPU_C::PSHUFB_PqQq }, + /* 66 */ { 0, &BX_CPU_C::PSHUFB_VdqWdq }, + /* F2 */ { 0, &BX_CPU_C::BxError }, + /* F3 */ { 0, &BX_CPU_C::BxError } +}; + +static BxOpcodeInfo_t BxOpcodeGroupSSE_0f3801[4] = { + /* -- */ { 0, &BX_CPU_C::PHADDW_PqQq }, + /* 66 */ { 0, &BX_CPU_C::PHADDW_VdqWdq }, + /* F2 */ { 0, &BX_CPU_C::BxError }, + /* F3 */ { 0, &BX_CPU_C::BxError } +}; + +static BxOpcodeInfo_t BxOpcodeGroupSSE_0f3802[4] = { + /* -- */ { 0, &BX_CPU_C::PHADDD_PqQq }, + /* 66 */ { 0, &BX_CPU_C::PHADDD_VdqWdq }, + /* F2 */ { 0, &BX_CPU_C::BxError }, + /* F3 */ { 0, &BX_CPU_C::BxError } +}; + +static BxOpcodeInfo_t BxOpcodeGroupSSE_0f3803[4] = { + /* -- */ { 0, &BX_CPU_C::PHADDSW_PqQq }, + /* 66 */ { 0, &BX_CPU_C::PHADDSW_VdqWdq }, + /* F2 */ { 0, &BX_CPU_C::BxError }, + /* F3 */ { 0, &BX_CPU_C::BxError } +}; + +static BxOpcodeInfo_t BxOpcodeGroupSSE_0f3804[4] = { + /* -- */ { 0, &BX_CPU_C::PMADDUBSW_PqQq }, + /* 66 */ { 0, &BX_CPU_C::PMADDUBSW_VdqWdq }, + /* F2 */ { 0, &BX_CPU_C::BxError }, + /* F3 */ { 0, &BX_CPU_C::BxError } +}; + +static BxOpcodeInfo_t BxOpcodeGroupSSE_0f3805[4] = { + /* -- */ { 0, &BX_CPU_C::PHSUBW_PqQq }, + /* 66 */ { 0, &BX_CPU_C::PHSUBW_VdqWdq }, + /* F2 */ { 0, &BX_CPU_C::BxError }, + /* F3 */ { 0, &BX_CPU_C::BxError } +}; + +static BxOpcodeInfo_t BxOpcodeGroupSSE_0f3806[4] = { + /* -- */ { 0, &BX_CPU_C::PHSUBD_PqQq }, + /* 66 */ { 0, &BX_CPU_C::PHSUBD_VdqWdq }, + /* F2 */ { 0, &BX_CPU_C::BxError }, + /* F3 */ { 0, &BX_CPU_C::BxError } +}; + +static BxOpcodeInfo_t BxOpcodeGroupSSE_0f3807[4] = { + /* -- */ { 0, &BX_CPU_C::PHSUBSW_PqQq }, + /* 66 */ { 0, &BX_CPU_C::PHSUBSW_VdqWdq }, + /* F2 */ { 0, &BX_CPU_C::BxError }, + /* F3 */ { 0, &BX_CPU_C::BxError } +}; + +static BxOpcodeInfo_t BxOpcodeGroupSSE_0f3808[4] = { + /* -- */ { 0, &BX_CPU_C::PSIGNB_PqQq }, + /* 66 */ { 0, &BX_CPU_C::PSIGNB_VdqWdq }, + /* F2 */ { 0, &BX_CPU_C::BxError }, + /* F3 */ { 0, &BX_CPU_C::BxError } +}; + +static BxOpcodeInfo_t BxOpcodeGroupSSE_0f3809[4] = { + /* -- */ { 0, &BX_CPU_C::PSIGNW_PqQq }, + /* 66 */ { 0, &BX_CPU_C::PSIGNW_VdqWdq }, + /* F2 */ { 0, &BX_CPU_C::BxError }, + /* F3 */ { 0, &BX_CPU_C::BxError } +}; + +static BxOpcodeInfo_t BxOpcodeGroupSSE_0f380a[4] = { + /* -- */ { 0, &BX_CPU_C::PSIGND_PqQq }, + /* 66 */ { 0, &BX_CPU_C::PSIGND_VdqWdq }, + /* F2 */ { 0, &BX_CPU_C::BxError }, + /* F3 */ { 0, &BX_CPU_C::BxError } +}; + +static BxOpcodeInfo_t BxOpcodeGroupSSE_0f380b[4] = { + /* -- */ { 0, &BX_CPU_C::PMULHRSW_PqQq }, + /* 66 */ { 0, &BX_CPU_C::PMULHRSW_VdqWdq }, + /* F2 */ { 0, &BX_CPU_C::BxError }, + /* F3 */ { 0, &BX_CPU_C::BxError } +}; + +static BxOpcodeInfo_t BxOpcodeGroupSSE_0f381c[4] = { + /* -- */ { 0, &BX_CPU_C::PABSB_PqQq }, + /* 66 */ { 0, &BX_CPU_C::PABSB_VdqWdq }, + /* F2 */ { 0, &BX_CPU_C::BxError }, + /* F3 */ { 0, &BX_CPU_C::BxError } +}; + +static BxOpcodeInfo_t BxOpcodeGroupSSE_0f381d[4] = { + /* -- */ { 0, &BX_CPU_C::PABSW_PqQq }, + /* 66 */ { 0, &BX_CPU_C::PABSW_VdqWdq }, + /* F2 */ { 0, &BX_CPU_C::BxError }, + /* F3 */ { 0, &BX_CPU_C::BxError } +}; + +static BxOpcodeInfo_t BxOpcodeGroupSSE_0f381e[4] = { + /* -- */ { 0, &BX_CPU_C::PABSD_PqQq }, + /* 66 */ { 0, &BX_CPU_C::PABSD_VdqWdq }, + /* F2 */ { 0, &BX_CPU_C::BxError }, + /* F3 */ { 0, &BX_CPU_C::BxError } +}; + +static BxOpcodeInfo_t BxOpcodeGroupSSE_0f3a0f[4] = { + /* -- */ { 0, &BX_CPU_C::PALIGNR_PqQqIb }, + /* 66 */ { 0, &BX_CPU_C::PALIGNR_VdqWdqIb }, + /* F2 */ { 0, &BX_CPU_C::BxError }, + /* F3 */ { 0, &BX_CPU_C::BxError } +}; + +/* ************************************************************************ */ +/* 3-byte opcode table (Table A-4, 0F 38) */ + +static BxOpcodeInfo_t BxOpcode3ByteOp0f380x[16] = { + /* 00 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3800 }, + /* 01 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3801 }, + /* 02 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3802 }, + /* 03 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3803 }, + /* 04 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3804 }, + /* 05 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3805 }, + /* 06 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3806 }, + /* 07 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3807 }, + /* 08 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3808 }, + /* 09 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3809 }, + /* 0A */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f380a }, + /* 0B */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f380b }, + /* 0C */ { 0, &BX_CPU_C::BxError }, + /* 0D */ { 0, &BX_CPU_C::BxError }, + /* 0E */ { 0, &BX_CPU_C::BxError }, + /* 0F */ { 0, &BX_CPU_C::BxError } +}; + +static BxOpcodeInfo_t BxOpcode3ByteOp0f381x[16] = { + /* 00 */ { 0, &BX_CPU_C::BxError }, + /* 01 */ { 0, &BX_CPU_C::BxError }, + /* 02 */ { 0, &BX_CPU_C::BxError }, + /* 03 */ { 0, &BX_CPU_C::BxError }, + /* 04 */ { 0, &BX_CPU_C::BxError }, + /* 05 */ { 0, &BX_CPU_C::BxError }, + /* 06 */ { 0, &BX_CPU_C::BxError }, + /* 07 */ { 0, &BX_CPU_C::BxError }, + /* 08 */ { 0, &BX_CPU_C::BxError }, + /* 09 */ { 0, &BX_CPU_C::BxError }, + /* 0A */ { 0, &BX_CPU_C::BxError }, + /* 0B */ { 0, &BX_CPU_C::BxError }, + /* 0C */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f381c }, + /* 0D */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f381d }, + /* 0E */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f381e }, + /* 0F */ { 0, &BX_CPU_C::BxError } +}; + +static BxOpcodeInfo_t BxOpcode3ByteTableA4[16] = { + /* 00 */ { Bx3ByteOpIndex, NULL, BxOpcode3ByteOp0f380x }, + /* 00 */ { Bx3ByteOpIndex, NULL, BxOpcode3ByteOp0f381x }, + /* 02 */ { 0, &BX_CPU_C::BxError }, + /* 03 */ { 0, &BX_CPU_C::BxError }, + /* 04 */ { 0, &BX_CPU_C::BxError }, + /* 05 */ { 0, &BX_CPU_C::BxError }, + /* 06 */ { 0, &BX_CPU_C::BxError }, + /* 07 */ { 0, &BX_CPU_C::BxError }, + /* 08 */ { 0, &BX_CPU_C::BxError }, + /* 09 */ { 0, &BX_CPU_C::BxError }, + /* 0A */ { 0, &BX_CPU_C::BxError }, + /* 0B */ { 0, &BX_CPU_C::BxError }, + /* 0C */ { 0, &BX_CPU_C::BxError }, + /* 0D */ { 0, &BX_CPU_C::BxError }, + /* 0E */ { 0, &BX_CPU_C::BxError }, + /* 0F */ { 0, &BX_CPU_C::BxError } +}; + +/* ************************************************************************ */ +/* 3-byte opcode table (Table A-5, 0F 3A) */ + +static BxOpcodeInfo_t BxOpcode3ByteOp0f3a0x[16] = { + /* 00 */ { 0, &BX_CPU_C::BxError }, + /* 01 */ { 0, &BX_CPU_C::BxError }, + /* 02 */ { 0, &BX_CPU_C::BxError }, + /* 03 */ { 0, &BX_CPU_C::BxError }, + /* 04 */ { 0, &BX_CPU_C::BxError }, + /* 05 */ { 0, &BX_CPU_C::BxError }, + /* 06 */ { 0, &BX_CPU_C::BxError }, + /* 07 */ { 0, &BX_CPU_C::BxError }, + /* 08 */ { 0, &BX_CPU_C::BxError }, + /* 09 */ { 0, &BX_CPU_C::BxError }, + /* 0A */ { 0, &BX_CPU_C::BxError }, + /* 0B */ { 0, &BX_CPU_C::BxError }, + /* 0C */ { 0, &BX_CPU_C::BxError }, + /* 0D */ { 0, &BX_CPU_C::BxError }, + /* 0E */ { 0, &BX_CPU_C::BxError }, + /* 0F */ { BxImmediate_Ib | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3a0f } +}; + +static BxOpcodeInfo_t BxOpcode3ByteTableA5[16] = { + /* 00 */ { Bx3ByteOpIndex, NULL, BxOpcode3ByteOp0f3a0x }, + /* 01 */ { 0, &BX_CPU_C::BxError }, + /* 02 */ { 0, &BX_CPU_C::BxError }, + /* 03 */ { 0, &BX_CPU_C::BxError }, + /* 04 */ { 0, &BX_CPU_C::BxError }, + /* 05 */ { 0, &BX_CPU_C::BxError }, + /* 06 */ { 0, &BX_CPU_C::BxError }, + /* 07 */ { 0, &BX_CPU_C::BxError }, + /* 08 */ { 0, &BX_CPU_C::BxError }, + /* 09 */ { 0, &BX_CPU_C::BxError }, + /* 0A */ { 0, &BX_CPU_C::BxError }, + /* 0B */ { 0, &BX_CPU_C::BxError }, + /* 0C */ { 0, &BX_CPU_C::BxError }, + /* 0D */ { 0, &BX_CPU_C::BxError }, + /* 0E */ { 0, &BX_CPU_C::BxError }, + /* 0F */ { 0, &BX_CPU_C::BxError } +}; + +#endif /* BX_SUPPORT_SSE >= 4 */ + #endif diff --git a/bochs/cpu/fetchdecode64.cc b/bochs/cpu/fetchdecode64.cc index 865b3fe3f..c3d463a2e 100644 --- a/bochs/cpu/fetchdecode64.cc +++ b/bochs/cpu/fetchdecode64.cc @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////// -// $Id: fetchdecode64.cc,v 1.91 2006-04-05 20:52:40 sshwarts Exp $ +// $Id: fetchdecode64.cc,v 1.92 2006-04-06 18:30:04 sshwarts Exp $ ///////////////////////////////////////////////////////////////////////// // // Copyright (C) 2001 MandrakeSoft S.A. @@ -873,9 +873,17 @@ static BxOpcodeInfo_t BxOpcodeInfo64[512*3] = { /* 0F 35 */ { 0, &BX_CPU_C::BxError }, // SYSENTER/SYSEXIT not recognized in long mode /* 0F 36 */ { 0, &BX_CPU_C::BxError }, /* 0F 37 */ { 0, &BX_CPU_C::BxError }, +#if BX_SUPPORT_SSE >= 4 + /* 0F 38 */ { BxAnother | Bx3ByteOpcode | Bx3ByteOpTable, NULL, BxOpcode3ByteTableA4 }, // 3-byte escape +#else /* 0F 38 */ { 0, &BX_CPU_C::BxError }, +#endif /* 0F 39 */ { 0, &BX_CPU_C::BxError }, +#if BX_SUPPORT_SSE >= 4 + /* 0F 3A */ { BxAnother | Bx3ByteOpcode | Bx3ByteOpTable, NULL, BxOpcode3ByteTableA5 }, // 3-byte escape +#else /* 0F 3A */ { 0, &BX_CPU_C::BxError }, +#endif /* 0F 3B */ { 0, &BX_CPU_C::BxError }, /* 0F 3C */ { 0, &BX_CPU_C::BxError }, /* 0F 3D */ { 0, &BX_CPU_C::BxError }, @@ -1394,9 +1402,17 @@ static BxOpcodeInfo_t BxOpcodeInfo64[512*3] = { /* 0F 35 */ { 0, &BX_CPU_C::BxError }, // SYSENTER/SYSEXIT not recognized in long mode /* 0F 36 */ { 0, &BX_CPU_C::BxError }, /* 0F 37 */ { 0, &BX_CPU_C::BxError }, +#if BX_SUPPORT_SSE >= 4 + /* 0F 38 */ { BxAnother | Bx3ByteOpcode | Bx3ByteOpTable, NULL, BxOpcode3ByteTableA4 }, // 3-byte escape +#else /* 0F 38 */ { 0, &BX_CPU_C::BxError }, +#endif /* 0F 39 */ { 0, &BX_CPU_C::BxError }, +#if BX_SUPPORT_SSE >= 4 + /* 0F 3A */ { BxAnother | Bx3ByteOpcode | Bx3ByteOpTable, NULL, BxOpcode3ByteTableA5 }, // 3-byte escape +#else /* 0F 3A */ { 0, &BX_CPU_C::BxError }, +#endif /* 0F 3B */ { 0, &BX_CPU_C::BxError }, /* 0F 3C */ { 0, &BX_CPU_C::BxError }, /* 0F 3D */ { 0, &BX_CPU_C::BxError }, @@ -1915,9 +1931,17 @@ static BxOpcodeInfo_t BxOpcodeInfo64[512*3] = { /* 0F 35 */ { 0, &BX_CPU_C::BxError }, // SYSENTER/SYSEXIT not recognized in long mode /* 0F 36 */ { 0, &BX_CPU_C::BxError }, /* 0F 37 */ { 0, &BX_CPU_C::BxError }, +#if BX_SUPPORT_SSE >= 4 + /* 0F 38 */ { BxAnother | Bx3ByteOpcode | Bx3ByteOpTable, NULL, BxOpcode3ByteTableA4 }, // 3-byte escape +#else /* 0F 38 */ { 0, &BX_CPU_C::BxError }, +#endif /* 0F 39 */ { 0, &BX_CPU_C::BxError }, +#if BX_SUPPORT_SSE >= 4 + /* 0F 3A */ { BxAnother | Bx3ByteOpcode | Bx3ByteOpTable, NULL, BxOpcode3ByteTableA5 }, // 3-byte escape +#else /* 0F 3A */ { 0, &BX_CPU_C::BxError }, +#endif /* 0F 3B */ { 0, &BX_CPU_C::BxError }, /* 0F 3C */ { 0, &BX_CPU_C::BxError }, /* 0F 3D */ { 0, &BX_CPU_C::BxError }, @@ -2125,13 +2149,16 @@ BX_CPU_C::fetchDecode64(Bit8u *iptr, bxInstruction_c *instruction, unsigned rema unsigned b1, b2, ilen=0, attr, lock=0; unsigned imm_mode, offset, rex_r,rex_x,rex_b; - unsigned rm, mod = 0, nnn = 0; + unsigned rm = 0, mod = 0, nnn = 0; unsigned sse_prefix; #define SSE_PREFIX_NONE 0 #define SSE_PREFIX_66 1 #define SSE_PREFIX_F2 2 #define SSE_PREFIX_F3 4 /* only one SSE prefix could be used */ static int sse_prefix_index[8] = { 0, 1, 2, -1, 3, -1, -1, -1 }; +#if BX_SUPPORT_SSE >= 4 + unsigned b3 = 0; +#endif offset = 512*1; rex_r = 0; @@ -2259,6 +2286,18 @@ fetch_b1: attr = BxOpcodeInfo64[b1+offset].Attr; instruction->setRepAttr(attr & (BxRepeatable | BxRepeatableZF)); +#if BX_SUPPORT_SSE >= 4 + // handle 3-byte escape + if (attr & Bx3ByteOpcode) { + if (ilen < remain) { + ilen++; + b3 = *iptr++; + } + else + return(0); + } +#endif + if (attr & BxAnother) { // opcode requires modrm byte if (ilen < remain) { @@ -2277,7 +2316,7 @@ fetch_b1: instruction->modRMForm.modRMData |= (nnn<<8); // MOVs with CRx and DRx always use register ops and ignore the mod field. - if ( (b1 & ~3) == 0x120 ) + if ((b1 & ~3) == 0x120) mod = 0xc0; if (mod == 0xc0) { // mod == 11b @@ -2482,6 +2521,14 @@ modrm_done: case BxRMGroup: OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[rm]); break; +#if BX_SUPPORT_SSE >= 4 + case Bx3ByteOpTable: + OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[b3 >> 4]); + break; + case Bx3ByteOpIndex: + OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[b3 & 15]); + break; +#endif case BxPrefixSSE: { /* For SSE opcodes, look into another 4 entries table @@ -2665,7 +2712,7 @@ modrm_done: } #if BX_SUPPORT_3DNOW - if(b1 == 0x10f) { // 3DNow! instruction set + if(b1 == 0x10f) { instruction->execute = Bx3DNowOpcodeInfo[instruction->modRMForm.Ib].ExecutePtr; } #endif diff --git a/bochs/cpu/mmx.cc b/bochs/cpu/mmx.cc index 7e08e0b49..be8601ed3 100644 --- a/bochs/cpu/mmx.cc +++ b/bochs/cpu/mmx.cc @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////// -// $Id: mmx.cc,v 1.52 2006-03-06 22:03:00 sshwarts Exp $ +// $Id: mmx.cc,v 1.53 2006-04-06 18:30:05 sshwarts Exp $ ///////////////////////////////////////////////////////////////////////// // // Copyright (c) 2002 Stanislav Shwartsman @@ -115,6 +115,497 @@ void BX_CPU_C::prepareFPU2MMX(void) #endif +#if BX_SUPPORT_SSE >= 4 + +/* 0F 38 00 */ +void BX_CPU_C::PSHUFB_PqQq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareMMX(); + + BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_MMX_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2); + } + + for(unsigned j=0; j<8; j++) + { + unsigned mask = op2.mmxubyte(j); + if (mask & 0x80) + result.mmxubyte(j) = 0; + else + result.mmxubyte(j) = op1.mmxubyte(mask & 0xf); + } + + BX_WRITE_MMX_REG(i->nnn(), result); +#else + BX_INFO(("PSHUFB_PqQq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 0F 38 01 */ +void BX_CPU_C::PHADDW_PqQq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareMMX(); + + BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_MMX_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2); + } + + MMXUW0(result) = MMXUW0(op1) + MMXUW1(op1); + MMXUW1(result) = MMXUW2(op1) + MMXUW3(op1); + MMXUW2(result) = MMXUW0(op2) + MMXUW1(op2); + MMXUW3(result) = MMXUW2(op2) + MMXUW3(op2); + + BX_WRITE_MMX_REG(i->nnn(), result); +#else + BX_INFO(("PHADDW_PqQq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 0F 38 02 */ +void BX_CPU_C::PHADDD_PqQq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareMMX(); + + BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_MMX_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2); + } + + MMXUD0(result) = MMXUD0(op1) + MMXUD1(op1); + MMXUD1(result) = MMXUD0(op2) + MMXUD1(op2); + + BX_WRITE_MMX_REG(i->nnn(), result); +#else + BX_INFO(("PHADDD_PqQq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 0F 38 03 */ +void BX_CPU_C::PHADDSW_PqQq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareMMX(); + + BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_MMX_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2); + } + + MMXSW0(result) = SaturateDwordSToWordS(Bit32s(MMXSW0(op1)) + Bit32s(MMXSW1(op1))); + MMXSW1(result) = SaturateDwordSToWordS(Bit32s(MMXSW2(op1)) + Bit32s(MMXSW3(op1))); + MMXSW2(result) = SaturateDwordSToWordS(Bit32s(MMXSW0(op2)) + Bit32s(MMXSW1(op2))); + MMXSW3(result) = SaturateDwordSToWordS(Bit32s(MMXSW2(op2)) + Bit32s(MMXSW3(op2))); + + /* now write result back to destination */ + BX_WRITE_MMX_REG(i->nnn(), result); +#else + BX_INFO(("PHADDSW_PqQq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 0F 38 04 */ +void BX_CPU_C::PMADDUBSW_PqQq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareMMX(); + + BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_MMX_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2); + } + + for(unsigned j=0; j<4; j++) + { + Bit32s temp = Bit32s(op1.mmxubyte(j*2+0))*Bit32s(op2.mmxsbyte(j*2+0)) + + Bit32s(op1.mmxubyte(j*2+1))*Bit32s(op2.mmxsbyte(j*2+1)); + + result.mmx16s(j) = SaturateDwordSToWordS(temp); + } + + /* now write result back to destination */ + BX_WRITE_MMX_REG(i->nnn(), result); +#else + BX_INFO(("PMADDUBSW_PqQq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 0F 38 05 */ +void BX_CPU_C::PHSUBSW_PqQq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareMMX(); + + BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_MMX_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2); + } + + MMXSW0(result) = SaturateDwordSToWordS(Bit32s(MMXSW0(op1)) - Bit32s(MMXSW1(op1))); + MMXSW1(result) = SaturateDwordSToWordS(Bit32s(MMXSW2(op1)) - Bit32s(MMXSW3(op1))); + MMXSW2(result) = SaturateDwordSToWordS(Bit32s(MMXSW0(op2)) - Bit32s(MMXSW1(op2))); + MMXSW3(result) = SaturateDwordSToWordS(Bit32s(MMXSW2(op2)) - Bit32s(MMXSW3(op2))); + + /* now write result back to destination */ + BX_WRITE_MMX_REG(i->nnn(), result); +#else + BX_INFO(("PHSUBSW_PqQq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 0F 38 05 */ +void BX_CPU_C::PHSUBW_PqQq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareMMX(); + + BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_MMX_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2); + } + + MMXUW0(result) = MMXUW0(op1) - MMXUW1(op1); + MMXUW1(result) = MMXUW2(op1) - MMXUW3(op1); + MMXUW2(result) = MMXUW0(op2) - MMXUW1(op2); + MMXUW3(result) = MMXUW2(op2) - MMXUW3(op2); + + BX_WRITE_MMX_REG(i->nnn(), result); +#else + BX_INFO(("PHSUBW_PqQq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 0F 38 06 */ +void BX_CPU_C::PHSUBD_PqQq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareMMX(); + + BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_MMX_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2); + } + + MMXUD0(result) = MMXUD0(op1) - MMXUD1(op1); + MMXUD1(result) = MMXUD0(op2) - MMXUD1(op2); + + BX_WRITE_MMX_REG(i->nnn(), result); +#else + BX_INFO(("PHSUBD_PqQq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 0F 38 08 */ +void BX_CPU_C::PSIGNB_PqQq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareMMX(); + + BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_MMX_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2); + } + + for(unsigned j=0; j<8; j++) { + int sign = (op2.mmxsbyte(j) > 0) - (op2.mmxsbyte(j) < 0); + op1.mmxsbyte(j) *= sign; + } + + BX_WRITE_MMX_REG(i->nnn(), op1); +#else + BX_INFO(("PSIGNB_PqQq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 0F 38 09 */ +void BX_CPU_C::PSIGNW_PqQq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareMMX(); + + BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_MMX_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2); + } + + for(unsigned j=0; j<4; j++) { + int sign = (op2.mmx16s(j) > 0) - (op2.mmx16s(j) < 0); + op1.mmx16s(j) *= sign; + } + + BX_WRITE_MMX_REG(i->nnn(), op1); +#else + BX_INFO(("PSIGNW_PqQq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 0F 38 0A */ +void BX_CPU_C::PSIGND_PqQq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareMMX(); + + BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_MMX_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2); + } + + int sign; + + sign = (MMXSD0(op2) > 0) - (MMXSD0(op2) < 0); + MMXSD0(op1) *= sign; + sign = (MMXSD1(op2) > 0) - (MMXSD1(op2) < 0); + MMXSD1(op1) *= sign; + + BX_WRITE_MMX_REG(i->nnn(), op1); +#else + BX_INFO(("PSIGND_PqQq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 0F 38 0B */ +void BX_CPU_C::PMULHRSW_PqQq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareMMX(); + + BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_MMX_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2); + } + + for(unsigned j=0; j<4; j++) { + Bit32s temp = Bit32s(op1.mmx16s(j)) * Bit32s(op2.mmx16s(j)); + result.mmx16u(j) = ((temp >> 14) + 1) >> 1; + } + + MMXUW0(result) = (((MMXSW0(op1) * MMXSW0(op2)) >> 14) + 1) >> 1; + MMXUW1(result) = (((MMXSW1(op1) * MMXSW1(op2)) >> 14) + 1) >> 1; + MMXUW2(result) = (((MMXSW2(op1) * MMXSW2(op2)) >> 14) + 1) >> 1; + MMXUW3(result) = (((MMXSW3(op1) * MMXSW3(op2)) >> 14) + 1) >> 1; + + /* now write result back to destination */ + BX_WRITE_MMX_REG(i->nnn(), result); +#else + BX_INFO(("PMULHRSW_PqQq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 0F 38 1C */ +void BX_CPU_C::PABSB_PqQq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareMMX(); + + BxPackedMmxRegister op; + + if (i->modC0()) { + op = BX_READ_MMX_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op); + } + + if (MMXSB0(op) < 0) MMXUB0(op) = -MMXSB0(op); + if (MMXSB1(op) < 0) MMXUB1(op) = -MMXSB1(op); + if (MMXSB2(op) < 0) MMXUB2(op) = -MMXSB2(op); + if (MMXSB3(op) < 0) MMXUB3(op) = -MMXSB3(op); + if (MMXSB4(op) < 0) MMXUB4(op) = -MMXSB4(op); + if (MMXSB5(op) < 0) MMXUB5(op) = -MMXSB5(op); + if (MMXSB6(op) < 0) MMXUB6(op) = -MMXSB6(op); + if (MMXSB7(op) < 0) MMXUB7(op) = -MMXSB7(op); + + /* now write result back to destination */ + BX_WRITE_MMX_REG(i->nnn(), op); +#else + BX_INFO(("PABSB_PqQq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 0F 38 1D */ +void BX_CPU_C::PABSW_PqQq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareMMX(); + + BxPackedMmxRegister op; + + if (i->modC0()) { + op = BX_READ_MMX_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op); + } + + if (MMXSW0(op) < 0) MMXUW0(op) = -MMXSW0(op); + if (MMXSW1(op) < 0) MMXUW1(op) = -MMXSW1(op); + if (MMXSW2(op) < 0) MMXUW2(op) = -MMXSW2(op); + if (MMXSW3(op) < 0) MMXUW3(op) = -MMXSW3(op); + + /* now write result back to destination */ + BX_WRITE_MMX_REG(i->nnn(), op); +#else + BX_INFO(("PABSW_PqQq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 0F 38 1E */ +void BX_CPU_C::PABSD_PqQq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareMMX(); + + BxPackedMmxRegister op; + + if (i->modC0()) { + op = BX_READ_MMX_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op); + } + + if (MMXSD0(op) < 0) MMXUD0(op) = -MMXSD0(op); + if (MMXSD1(op) < 0) MMXUD1(op) = -MMXSD1(op); + + /* now write result back to destination */ + BX_WRITE_MMX_REG(i->nnn(), op); +#else + BX_INFO(("PABSD_PqQq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 0F 3A 0F */ +void BX_CPU_C::PALIGNR_PqQqIb(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareMMX(); + + BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_MMX_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2); + } + + Bit8u shift = i->Ib() * 8; + + if(shift == 0) + MMXUQ(result) = MMXUQ(op2); + else if(shift < 64) + MMXUQ(result) = (MMXUQ(op2) >> shift) | (MMXUQ(op1) << (64-shift)); + else if(shift < 128) + MMXUQ(result) = MMXUQ(op1) >> (shift-64); + else + MMXUQ(result) = 0; + + /* now write result back to destination */ + BX_WRITE_MMX_REG(i->nnn(), result); +#else + BX_INFO(("PALIGNR_PqQqIb: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +#endif /* BX_SUPPORT_SSE >= 4 */ + /* 0F 60 */ void BX_CPU_C::PUNPCKLBW_PqQd(bxInstruction_c *i) { diff --git a/bochs/cpu/smm.cc b/bochs/cpu/smm.cc index d362b229d..ebb837b0b 100755 --- a/bochs/cpu/smm.cc +++ b/bochs/cpu/smm.cc @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////// -// $Id: smm.cc,v 1.14 2006-04-06 16:47:29 sshwarts Exp $ +// $Id: smm.cc,v 1.15 2006-04-06 18:30:05 sshwarts Exp $ ///////////////////////////////////////////////////////////////////////// // // Copyright (c) 2006 Stanislav Shwartsman @@ -60,9 +60,9 @@ // 4. GDTR, IDTR // fields: base, limit // 5. LDTR, TR -// fields: base, limit, something more ? +// fields: base, limit, anything else ? // 6. Debug Registers DR0-DR7, only DR6 and DR7 are saved -// 7. Control Regsiters: CR0, CR1 is always 0, CR2 is NOT saved, CR3, CR4, EFER +// 7. Control Registers: CR0, CR1 is always 0, CR2 is NOT saved, CR3, CR4, EFER // 8. SMBASE // 9. MSR/FPU/XMM/APIC are NOT saved accoring to Intel docs // diff --git a/bochs/cpu/sse.cc b/bochs/cpu/sse.cc index 5fef1abd5..ccf74cbd4 100644 --- a/bochs/cpu/sse.cc +++ b/bochs/cpu/sse.cc @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////// -// $Id: sse.cc,v 1.39 2006-04-05 17:31:32 sshwarts Exp $ +// $Id: sse.cc,v 1.40 2006-04-06 18:30:05 sshwarts Exp $ ///////////////////////////////////////////////////////////////////////// // // Copyright (c) 2003 Stanislav Shwartsman @@ -29,6 +29,558 @@ /* SSE Integer Operations (128bit MMX extensions) */ /* ********************************************** */ +#if BX_SUPPORT_SSE >= 4 + +/* 66 0F 38 00 */ +void BX_CPU_C::PSHUFB_VdqWdq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareSSE(); + + BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_XMM_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2); + } + + for(unsigned j=0; j<16; j++) + { + unsigned mask = op2.xmmubyte(j); + if (mask & 0x80) + result.xmmubyte(j) = 0; + else + result.xmmubyte(j) = op1.xmmubyte(mask & 0xf); + } + + BX_WRITE_XMM_REG(i->nnn(), result); +#else + BX_INFO(("PSHUFB_VdqWdq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 66 0F 38 01 */ +void BX_CPU_C::PHADDW_VdqWdq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareSSE(); + + BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_XMM_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2); + } + + result.xmm16u(0) = op1.xmm16u(0) + op1.xmm16u(1); + result.xmm16u(1) = op1.xmm16u(2) + op1.xmm16u(3); + result.xmm16u(2) = op1.xmm16u(4) + op1.xmm16u(5); + result.xmm16u(3) = op1.xmm16u(6) + op1.xmm16u(7); + + result.xmm16u(4) = op2.xmm16u(0) + op2.xmm16u(1); + result.xmm16u(5) = op2.xmm16u(2) + op2.xmm16u(3); + result.xmm16u(6) = op2.xmm16u(4) + op2.xmm16u(5); + result.xmm16u(7) = op2.xmm16u(6) + op2.xmm16u(7); + + BX_WRITE_XMM_REG(i->nnn(), result); +#else + BX_INFO(("PHADDW_VdqWdq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 66 0F 38 02 */ +void BX_CPU_C::PHADDD_VdqWdq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareSSE(); + + BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_XMM_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2); + } + + result.xmm32u(0) = op1.xmm32u(0) + op1.xmm32u(1); + result.xmm32u(1) = op1.xmm32u(2) + op1.xmm32u(3); + result.xmm32u(2) = op2.xmm32u(0) + op2.xmm32u(1); + result.xmm32u(3) = op2.xmm32u(2) + op2.xmm32u(3); + + BX_WRITE_XMM_REG(i->nnn(), result); +#else + BX_INFO(("PHADDD_VdqWdq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 66 0F 38 03 */ +void BX_CPU_C::PHADDSW_VdqWdq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareSSE(); + + BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_XMM_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2); + } + + result.xmm16s(0) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(0)) + Bit32s(op1.xmm16s(1))); + result.xmm16s(1) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(2)) + Bit32s(op1.xmm16s(3))); + result.xmm16s(2) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(4)) + Bit32s(op1.xmm16s(5))); + result.xmm16s(3) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(6)) + Bit32s(op1.xmm16s(7))); + + result.xmm16s(4) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(0)) + Bit32s(op2.xmm16s(1))); + result.xmm16s(5) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(2)) + Bit32s(op2.xmm16s(3))); + result.xmm16s(6) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(4)) + Bit32s(op2.xmm16s(5))); + result.xmm16s(7) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(6)) + Bit32s(op2.xmm16s(7))); + + /* now write result back to destination */ + BX_WRITE_XMM_REG(i->nnn(), result); +#else + BX_INFO(("PHADDSW_VdqWdq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 66 0F 38 04 */ +void BX_CPU_C::PMADDUBSW_VdqWdq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareSSE(); + + BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_XMM_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2); + } + + for(unsigned j=0; j<8; j++) + { + Bit32s temp = Bit32s(op1.xmmubyte(j*2+0))*Bit32s(op2.xmmsbyte(j*2+0)) + + Bit32s(op1.xmmubyte(j*2+1))*Bit32s(op2.xmmsbyte(j*2+1)); + + result.xmm16s(j) = SaturateDwordSToWordS(temp); + } + + /* now write result back to destination */ + BX_WRITE_XMM_REG(i->nnn(), result); +#else + BX_INFO(("PMADDUBSW_VdqWdq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 66 0F 38 05 */ +void BX_CPU_C::PHSUBSW_VdqWdq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareSSE(); + + BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_XMM_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2); + } + + result.xmm16s(0) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(0)) - Bit32s(op1.xmm16s(1))); + result.xmm16s(1) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(2)) - Bit32s(op1.xmm16s(3))); + result.xmm16s(2) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(4)) - Bit32s(op1.xmm16s(5))); + result.xmm16s(3) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(6)) - Bit32s(op1.xmm16s(7))); + + result.xmm16s(4) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(0)) - Bit32s(op2.xmm16s(1))); + result.xmm16s(5) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(2)) - Bit32s(op2.xmm16s(3))); + result.xmm16s(6) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(4)) - Bit32s(op2.xmm16s(5))); + result.xmm16s(7) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(6)) - Bit32s(op2.xmm16s(7))); + + /* now write result back to destination */ + BX_WRITE_XMM_REG(i->nnn(), result); +#else + BX_INFO(("PHSUBSW_VdqWdq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 66 0F 38 05 */ +void BX_CPU_C::PHSUBW_VdqWdq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareSSE(); + + BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_XMM_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2); + } + + result.xmm16u(0) = op1.xmm16u(0) - op1.xmm16u(1); + result.xmm16u(1) = op1.xmm16u(2) - op1.xmm16u(3); + result.xmm16u(2) = op1.xmm16u(4) - op1.xmm16u(5); + result.xmm16u(3) = op1.xmm16u(6) - op1.xmm16u(7); + + result.xmm16u(4) = op2.xmm16u(0) - op2.xmm16u(1); + result.xmm16u(5) = op2.xmm16u(2) - op2.xmm16u(3); + result.xmm16u(6) = op2.xmm16u(4) - op2.xmm16u(5); + result.xmm16u(7) = op2.xmm16u(6) - op2.xmm16u(7); + + BX_WRITE_XMM_REG(i->nnn(), result); +#else + BX_INFO(("PHSUBW_VdqWdq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 66 0F 38 06 */ +void BX_CPU_C::PHSUBD_VdqWdq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareSSE(); + + BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_XMM_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2); + } + + result.xmm32u(0) = op1.xmm32u(0) - op1.xmm32u(1); + result.xmm32u(1) = op1.xmm32u(2) - op1.xmm32u(3); + result.xmm32u(2) = op2.xmm32u(0) - op2.xmm32u(1); + result.xmm32u(3) = op2.xmm32u(2) - op2.xmm32u(3); + + BX_WRITE_XMM_REG(i->nnn(), result); +#else + BX_INFO(("PHSUBD_VdqWdq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 66 0F 38 08 */ +void BX_CPU_C::PSIGNB_VdqWdq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareSSE(); + + BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_XMM_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2); + } + + for(unsigned j=0; j<16; j++) { + int sign = (op2.xmmsbyte(j) > 0) - (op2.xmmsbyte(j) < 0); + op1.xmmsbyte(j) *= sign; + } + + BX_WRITE_XMM_REG(i->nnn(), op1); +#else + BX_INFO(("PSIGNB_VdqWdq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 66 0F 38 09 */ +void BX_CPU_C::PSIGNW_VdqWdq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareSSE(); + + BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_XMM_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2); + } + + for(unsigned j=0; j<8; j++) { + int sign = (op2.xmm16s(j) > 0) - (op2.xmm16s(j) < 0); + op1.xmm16s(j) *= sign; + } + + BX_WRITE_XMM_REG(i->nnn(), op1); +#else + BX_INFO(("PSIGNW_VdqWdq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 66 0F 38 0A */ +void BX_CPU_C::PSIGND_VdqWdq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareSSE(); + + BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_XMM_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2); + } + + for(unsigned j=0; j<4; j++) { + int sign = (op2.xmm32s(j) > 0) - (op2.xmm32s(j) < 0); + op1.xmm32s(j) *= sign; + } + + BX_WRITE_XMM_REG(i->nnn(), op1); +#else + BX_INFO(("PSIGND_VdqWdq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 66 0F 38 0B */ +void BX_CPU_C::PMULHRSW_VdqWdq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareSSE(); + + BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_XMM_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2); + } + + result.xmm16u(0) = (((op1.xmm16s(0) * op2.xmm16s(0)) >> 14) + 1) >> 1; + result.xmm16u(1) = (((op1.xmm16s(1) * op2.xmm16s(1)) >> 14) + 1) >> 1; + result.xmm16u(2) = (((op1.xmm16s(2) * op2.xmm16s(2)) >> 14) + 1) >> 1; + result.xmm16u(3) = (((op1.xmm16s(3) * op2.xmm16s(3)) >> 14) + 1) >> 1; + result.xmm16u(4) = (((op1.xmm16s(4) * op2.xmm16s(4)) >> 14) + 1) >> 1; + result.xmm16u(5) = (((op1.xmm16s(5) * op2.xmm16s(5)) >> 14) + 1) >> 1; + result.xmm16u(6) = (((op1.xmm16s(6) * op2.xmm16s(6)) >> 14) + 1) >> 1; + result.xmm16u(7) = (((op1.xmm16s(7) * op2.xmm16s(7)) >> 14) + 1) >> 1; + + /* now write result back to destination */ + BX_WRITE_XMM_REG(i->nnn(), result); +#else + BX_INFO(("PMULHRSW_VdqWdq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 66 0F 38 1C */ +void BX_CPU_C::PABSB_VdqWdq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareSSE(); + + BxPackedXmmRegister op; + + if (i->modC0()) { + op = BX_READ_XMM_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op); + } + + if(op.xmmsbyte(0x0) < 0) op.xmmubyte(0x0) = -op.xmmsbyte(0x0); + if(op.xmmsbyte(0x1) < 0) op.xmmubyte(0x1) = -op.xmmsbyte(0x1); + if(op.xmmsbyte(0x2) < 0) op.xmmubyte(0x2) = -op.xmmsbyte(0x2); + if(op.xmmsbyte(0x3) < 0) op.xmmubyte(0x3) = -op.xmmsbyte(0x3); + if(op.xmmsbyte(0x4) < 0) op.xmmubyte(0x4) = -op.xmmsbyte(0x4); + if(op.xmmsbyte(0x5) < 0) op.xmmubyte(0x5) = -op.xmmsbyte(0x5); + if(op.xmmsbyte(0x6) < 0) op.xmmubyte(0x6) = -op.xmmsbyte(0x6); + if(op.xmmsbyte(0x7) < 0) op.xmmubyte(0x7) = -op.xmmsbyte(0x7); + if(op.xmmsbyte(0x8) < 0) op.xmmubyte(0x8) = -op.xmmsbyte(0x8); + if(op.xmmsbyte(0x9) < 0) op.xmmubyte(0x9) = -op.xmmsbyte(0x9); + if(op.xmmsbyte(0xa) < 0) op.xmmubyte(0xa) = -op.xmmsbyte(0xa); + if(op.xmmsbyte(0xb) < 0) op.xmmubyte(0xb) = -op.xmmsbyte(0xb); + if(op.xmmsbyte(0xc) < 0) op.xmmubyte(0xc) = -op.xmmsbyte(0xc); + if(op.xmmsbyte(0xd) < 0) op.xmmubyte(0xd) = -op.xmmsbyte(0xd); + if(op.xmmsbyte(0xe) < 0) op.xmmubyte(0xe) = -op.xmmsbyte(0xe); + if(op.xmmsbyte(0xf) < 0) op.xmmubyte(0xf) = -op.xmmsbyte(0xf); + + /* now write result back to destination */ + BX_WRITE_XMM_REG(i->nnn(), op); +#else + BX_INFO(("PABSB_VdqWdq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 66 0F 38 1D */ +void BX_CPU_C::PABSW_VdqWdq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareSSE(); + + BxPackedXmmRegister op; + + if (i->modC0()) { + op = BX_READ_XMM_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op); + } + + if(op.xmm16s(0) < 0) op.xmm16u(0) = -op.xmm16s(0); + if(op.xmm16s(1) < 0) op.xmm16u(1) = -op.xmm16s(1); + if(op.xmm16s(2) < 0) op.xmm16u(2) = -op.xmm16s(2); + if(op.xmm16s(3) < 0) op.xmm16u(3) = -op.xmm16s(3); + if(op.xmm16s(4) < 0) op.xmm16u(4) = -op.xmm16s(4); + if(op.xmm16s(5) < 0) op.xmm16u(5) = -op.xmm16s(5); + if(op.xmm16s(6) < 0) op.xmm16u(6) = -op.xmm16s(6); + if(op.xmm16s(7) < 0) op.xmm16u(7) = -op.xmm16s(7); + + /* now write result back to destination */ + BX_WRITE_XMM_REG(i->nnn(), op); +#else + BX_INFO(("PABSW_VdqWdq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 66 0F 38 1E */ +void BX_CPU_C::PABSD_VdqWdq(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareSSE(); + + BxPackedXmmRegister op; + + if (i->modC0()) { + op = BX_READ_XMM_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op); + } + + if(op.xmm32s(0) < 0) op.xmm32u(0) = -op.xmm32s(0); + if(op.xmm32s(1) < 0) op.xmm32u(1) = -op.xmm32s(1); + if(op.xmm32s(2) < 0) op.xmm32u(2) = -op.xmm32s(2); + if(op.xmm32s(3) < 0) op.xmm32u(3) = -op.xmm32s(3); + + /* now write result back to destination */ + BX_WRITE_XMM_REG(i->nnn(), op); +#else + BX_INFO(("PABSD_VdqWdq: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +/* 66 0F 3A 0F */ +void BX_CPU_C::PALIGNR_VdqWdqIb(bxInstruction_c *i) +{ +#if BX_SUPPORT_SSE >= 4 + BX_CPU_THIS_PTR prepareSSE(); + + BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_XMM_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2); + } + + Bit8u shift = i->Ib() * 8; + + if(shift == 0) { + result.xmm64u(0) = op2.xmm64u(0); + result.xmm64u(1) = op2.xmm64u(1); + } + else if(shift < 64) { + result.xmm64u(0) = (op2.xmm64u(0) >> shift) | (op2.xmm64u(1) << (64-shift)); + result.xmm64u(1) = (op2.xmm64u(1) >> shift) | (op1.xmm64u(0) << (64-shift)); + } + else if(shift == 64) { + result.xmm64u(0) = op2.xmm64u(1); + result.xmm64u(1) = op1.xmm64u(0); + } + else if(shift < 128) { + shift -= 64; + result.xmm64u(0) = (op2.xmm64u(1) >> shift) | (op1.xmm64u(0) << (64-shift)); + result.xmm64u(1) = (op1.xmm64u(0) >> shift) | (op1.xmm64u(1) << (64-shift)); + } + else if(shift == 128) { + result.xmm64u(0) = op1.xmm64u(0); + result.xmm64u(1) = op1.xmm64u(1); + } + else if(shift < 192) { + shift -= 128; + result.xmm64u(0) = (op1.xmm64u(0) >> shift) | (op1.xmm64u(1) << (64-shift)); + result.xmm64u(1) = (op1.xmm64u(1) >> shift); + } + else if(shift < 256) { + result.xmm64u(0) = op1.xmm64u(1) >> (shift - 192); + result.xmm64u(1) = 0; + } + else { + result.xmm64u(0) = 0; + result.xmm64u(1) = 0; + } + + /* now write result back to destination */ + BX_WRITE_XMM_REG(i->nnn(), result); +#else + BX_INFO(("PALIGNR_VdqWdqIb: required SSE4, use --enable-sse option")); + UndefinedOpcode(i); +#endif +} + +#endif /* BX_SUPPORT_SSE >= 4 */ + /* 66 0F 63 */ void BX_CPU_C::PACKSSWB_VdqWq(bxInstruction_c *i) {