Added decoding of new SSE4 instructions (recently published in Intel docs)

At least CPUID detects them correctly
The code is never tested (still) ! (but should work fine)
This commit is contained in:
Stanislav Shwartsman 2006-04-06 18:30:05 +00:00
parent c8cc6bcd14
commit 03eac64013
8 changed files with 1409 additions and 23 deletions

View File

@ -1,5 +1,5 @@
///////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////
// $Id: cpu.h,v 1.276 2006-04-05 17:31:30 sshwarts Exp $ // $Id: cpu.h,v 1.277 2006-04-06 18:30:02 sshwarts Exp $
///////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////
// //
// Copyright (C) 2001 MandrakeSoft S.A. // Copyright (C) 2001 MandrakeSoft S.A.
@ -1743,7 +1743,7 @@ public: // for now...
BX_SMF void FISTP_QWORD_INTEGER(bxInstruction_c *); BX_SMF void FISTP_QWORD_INTEGER(bxInstruction_c *);
BX_SMF void FBSTP_PACKED_BCD(bxInstruction_c *); BX_SMF void FBSTP_PACKED_BCD(bxInstruction_c *);
BX_SMF void FISTTP16(bxInstruction_c *); BX_SMF void FISTTP16(bxInstruction_c *); // SSE3
BX_SMF void FISTTP32(bxInstruction_c *); BX_SMF void FISTTP32(bxInstruction_c *);
BX_SMF void FISTTP64(bxInstruction_c *); BX_SMF void FISTTP64(bxInstruction_c *);
@ -2202,7 +2202,7 @@ public: // for now...
#define MOVNTDQ_MdqVdq /* 66 0f e7 */ MOVNTPD_MdqVpd /* 66 0f 2b */ #define MOVNTDQ_MdqVdq /* 66 0f e7 */ MOVNTPD_MdqVpd /* 66 0f 2b */
#endif // #ifdef StandAloneDecoder #endif // #ifdef StandAloneDecoder
/* PNI */ /* SSE3 */
BX_SMF void MOVDDUP_VpdWq(bxInstruction_c *i); BX_SMF void MOVDDUP_VpdWq(bxInstruction_c *i);
BX_SMF void MOVSLDUP_VpsWps(bxInstruction_c *i); BX_SMF void MOVSLDUP_VpsWps(bxInstruction_c *i);
BX_SMF void MOVSHDUP_VpsWps(bxInstruction_c *i); BX_SMF void MOVSHDUP_VpsWps(bxInstruction_c *i);
@ -2213,7 +2213,43 @@ public: // for now...
BX_SMF void ADDSUBPD_VpdWpd(bxInstruction_c *i); BX_SMF void ADDSUBPD_VpdWpd(bxInstruction_c *i);
BX_SMF void ADDSUBPS_VpsWps(bxInstruction_c *i); BX_SMF void ADDSUBPS_VpsWps(bxInstruction_c *i);
BX_SMF void LDDQU_VdqMdq(bxInstruction_c *i); BX_SMF void LDDQU_VdqMdq(bxInstruction_c *i);
/* PNI */ /* SSE3 */
#if BX_SUPPORT_SSE >= 4
BX_SMF void PSHUFB_PqQq(bxInstruction_c *i);
BX_SMF void PHADDW_PqQq(bxInstruction_c *i);
BX_SMF void PHADDD_PqQq(bxInstruction_c *i);
BX_SMF void PHADDSW_PqQq(bxInstruction_c *i);
BX_SMF void PMADDUBSW_PqQq(bxInstruction_c *i);
BX_SMF void PHSUBSW_PqQq(bxInstruction_c *i);
BX_SMF void PHSUBW_PqQq(bxInstruction_c *i);
BX_SMF void PHSUBD_PqQq(bxInstruction_c *i);
BX_SMF void PSIGNB_PqQq(bxInstruction_c *i);
BX_SMF void PSIGNW_PqQq(bxInstruction_c *i);
BX_SMF void PSIGND_PqQq(bxInstruction_c *i);
BX_SMF void PMULHRSW_PqQq(bxInstruction_c *i);
BX_SMF void PABSB_PqQq(bxInstruction_c *i);
BX_SMF void PABSW_PqQq(bxInstruction_c *i);
BX_SMF void PABSD_PqQq(bxInstruction_c *i);
BX_SMF void PALIGNR_PqQqIb(bxInstruction_c *i);
BX_SMF void PSHUFB_VdqWdq(bxInstruction_c *i);
BX_SMF void PHADDW_VdqWdq(bxInstruction_c *i);
BX_SMF void PHADDD_VdqWdq(bxInstruction_c *i);
BX_SMF void PHADDSW_VdqWdq(bxInstruction_c *i);
BX_SMF void PMADDUBSW_VdqWdq(bxInstruction_c *i);
BX_SMF void PHSUBSW_VdqWdq(bxInstruction_c *i);
BX_SMF void PHSUBW_VdqWdq(bxInstruction_c *i);
BX_SMF void PHSUBD_VdqWdq(bxInstruction_c *i);
BX_SMF void PSIGNB_VdqWdq(bxInstruction_c *i);
BX_SMF void PSIGNW_VdqWdq(bxInstruction_c *i);
BX_SMF void PSIGND_VdqWdq(bxInstruction_c *i);
BX_SMF void PMULHRSW_VdqWdq(bxInstruction_c *i);
BX_SMF void PABSB_VdqWdq(bxInstruction_c *i);
BX_SMF void PABSW_VdqWdq(bxInstruction_c *i);
BX_SMF void PABSD_VdqWdq(bxInstruction_c *i);
BX_SMF void PALIGNR_VdqWdqIb(bxInstruction_c *i);
#endif
BX_SMF void CMPXCHG_XBTS(bxInstruction_c *); BX_SMF void CMPXCHG_XBTS(bxInstruction_c *);
BX_SMF void CMPXCHG_IBTS(bxInstruction_c *); BX_SMF void CMPXCHG_IBTS(bxInstruction_c *);
@ -3291,10 +3327,13 @@ IMPLEMENT_EFLAG_ACCESSOR (TF, 8)
#define BxSplitMod11b 0x0030 // Group encoding: 011 #define BxSplitMod11b 0x0030 // Group encoding: 011
#define BxFPGroup 0x0040 // Group encoding: 100 #define BxFPGroup 0x0040 // Group encoding: 100
#define BxRMGroup 0x0050 // Group encoding: 101 #define BxRMGroup 0x0050 // Group encoding: 101
#define Bx3ByteOpIndex 0x0060 // Group encoding: 110
#define Bx3ByteOpTable 0x0070 // Group encoding: 111
#define BxPrefix 0x0080 // bit 7 #define BxPrefix 0x0080 // bit 7
#define BxAnother 0x0100 // bit 8 #define BxAnother 0x0100 // bit 8
#define BxLockable 0x0200 // bit 9 #define BxLockable 0x0200 // bit 9
#define Bx3ByteOpcode 0x0400 // bit 10
#define BxRepeatable 0x0800 // bit 11 (pass through to metaInfo field) #define BxRepeatable 0x0800 // bit 11 (pass through to metaInfo field)
#define BxRepeatableZF 0x1000 // bit 12 (pass through to metaInfo field) #define BxRepeatableZF 0x1000 // bit 12 (pass through to metaInfo field)

View File

@ -1,5 +1,5 @@
///////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////
// $Id: cpuid.cc,v 1.35 2006-04-05 17:31:30 sshwarts Exp $ // $Id: cpuid.cc,v 1.36 2006-04-06 18:30:03 sshwarts Exp $
///////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////
// //
// Copyright (C) 2001 MandrakeSoft S.A. // Copyright (C) 2001 MandrakeSoft S.A.
@ -122,7 +122,10 @@ Bit32u BX_CPU_C::get_extended_cpuid_features()
Bit32u features = 0; Bit32u features = 0;
#if BX_SUPPORT_SSE >= 3 #if BX_SUPPORT_SSE >= 3
features |= 0x01; // report SSE3 (PNI) features |= 0x1; // report SSE3
#endif
#if BX_SUPPORT_SSE >= 4
features |= (1<<9); // report SSE4
#endif #endif
#if BX_SUPPORT_X86_64 #if BX_SUPPORT_X86_64

View File

@ -1,5 +1,5 @@
///////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////
// $Id: fetchdecode.cc,v 1.91 2006-04-05 20:52:37 sshwarts Exp $ // $Id: fetchdecode.cc,v 1.92 2006-04-06 18:30:03 sshwarts Exp $
///////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////
// //
// Copyright (C) 2001 MandrakeSoft S.A. // Copyright (C) 2001 MandrakeSoft S.A.
@ -750,9 +750,17 @@ static BxOpcodeInfo_t BxOpcodeInfo[512*2] = {
/* 0F 35 */ { 0, &BX_CPU_C::SYSEXIT }, /* 0F 35 */ { 0, &BX_CPU_C::SYSEXIT },
/* 0F 36 */ { 0, &BX_CPU_C::BxError }, /* 0F 36 */ { 0, &BX_CPU_C::BxError },
/* 0F 37 */ { 0, &BX_CPU_C::BxError }, /* 0F 37 */ { 0, &BX_CPU_C::BxError },
#if BX_SUPPORT_SSE >= 4
/* 0F 38 */ { BxAnother | Bx3ByteOpcode | Bx3ByteOpTable, NULL, BxOpcode3ByteTableA4 }, // 3-byte escape
#else
/* 0F 38 */ { 0, &BX_CPU_C::BxError }, /* 0F 38 */ { 0, &BX_CPU_C::BxError },
#endif
/* 0F 39 */ { 0, &BX_CPU_C::BxError }, /* 0F 39 */ { 0, &BX_CPU_C::BxError },
#if BX_SUPPORT_SSE >= 4
/* 0F 3A */ { BxAnother | Bx3ByteOpcode | Bx3ByteOpTable, NULL, BxOpcode3ByteTableA5 }, // 3-byte escape
#else
/* 0F 3A */ { 0, &BX_CPU_C::BxError }, /* 0F 3A */ { 0, &BX_CPU_C::BxError },
#endif
/* 0F 3B */ { 0, &BX_CPU_C::BxError }, /* 0F 3B */ { 0, &BX_CPU_C::BxError },
/* 0F 3C */ { 0, &BX_CPU_C::BxError }, /* 0F 3C */ { 0, &BX_CPU_C::BxError },
/* 0F 3D */ { 0, &BX_CPU_C::BxError }, /* 0F 3D */ { 0, &BX_CPU_C::BxError },
@ -1300,9 +1308,17 @@ static BxOpcodeInfo_t BxOpcodeInfo[512*2] = {
/* 0F 35 */ { 0, &BX_CPU_C::SYSEXIT }, /* 0F 35 */ { 0, &BX_CPU_C::SYSEXIT },
/* 0F 36 */ { 0, &BX_CPU_C::BxError }, /* 0F 36 */ { 0, &BX_CPU_C::BxError },
/* 0F 37 */ { 0, &BX_CPU_C::BxError }, /* 0F 37 */ { 0, &BX_CPU_C::BxError },
#if BX_SUPPORT_SSE >= 4
/* 0F 38 */ { BxAnother | Bx3ByteOpcode, NULL, BxOpcode3ByteTableA4 }, // 3-byte escape
#else
/* 0F 38 */ { 0, &BX_CPU_C::BxError }, /* 0F 38 */ { 0, &BX_CPU_C::BxError },
#endif
/* 0F 39 */ { 0, &BX_CPU_C::BxError }, /* 0F 39 */ { 0, &BX_CPU_C::BxError },
#if BX_SUPPORT_SSE >= 4
/* 0F 3A */ { BxAnother | Bx3ByteOpcode, NULL, BxOpcode3ByteTableA5 }, // 3-byte escape
#else
/* 0F 3A */ { 0, &BX_CPU_C::BxError }, /* 0F 3A */ { 0, &BX_CPU_C::BxError },
#endif
/* 0F 3B */ { 0, &BX_CPU_C::BxError }, /* 0F 3B */ { 0, &BX_CPU_C::BxError },
/* 0F 3C */ { 0, &BX_CPU_C::BxError }, /* 0F 3C */ { 0, &BX_CPU_C::BxError },
/* 0F 3D */ { 0, &BX_CPU_C::BxError }, /* 0F 3D */ { 0, &BX_CPU_C::BxError },
@ -1510,8 +1526,11 @@ BX_CPU_C::fetchDecode(Bit8u *iptr, bxInstruction_c *instruction, unsigned remain
bx_bool is_32, lock=0; bx_bool is_32, lock=0;
unsigned b1, b2, ilen=0, attr, os_32; unsigned b1, b2, ilen=0, attr, os_32;
unsigned imm_mode, offset; unsigned imm_mode, offset;
unsigned rm, mod=0, nnn=0; unsigned rm = 0, mod=0, nnn=0;
unsigned sse_prefix; unsigned sse_prefix;
#if BX_SUPPORT_SSE >= 4
unsigned b3 = 0;
#endif
#define SSE_PREFIX_NONE 0 #define SSE_PREFIX_NONE 0
#define SSE_PREFIX_66 1 #define SSE_PREFIX_66 1
@ -1628,6 +1647,18 @@ fetch_b1:
attr = BxOpcodeInfo[b1+offset].Attr; attr = BxOpcodeInfo[b1+offset].Attr;
instruction->setRepAttr(attr & (BxRepeatable | BxRepeatableZF)); instruction->setRepAttr(attr & (BxRepeatable | BxRepeatableZF));
#if BX_SUPPORT_SSE >= 4
// handle 3-byte escape
if (attr & Bx3ByteOpcode) {
if (ilen < remain) {
ilen++;
b3 = *iptr++;
}
else
return(0);
}
#endif
if (attr & BxAnother) { if (attr & BxAnother) {
// opcode requires modrm byte // opcode requires modrm byte
if (ilen < remain) { if (ilen < remain) {
@ -1795,6 +1826,14 @@ modrm_done:
case BxRMGroup: case BxRMGroup:
OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[rm]); OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[rm]);
break; break;
#if BX_SUPPORT_SSE >= 4
case Bx3ByteOpTable:
OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[b3 >> 4]);
break;
case Bx3ByteOpIndex:
OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[b3 & 15]);
break;
#endif
case BxPrefixSSE: case BxPrefixSSE:
{ {
/* For SSE opcodes, look into another 4 entries table /* For SSE opcodes, look into another 4 entries table
@ -1973,7 +2012,7 @@ modrm_done:
} }
#if BX_SUPPORT_3DNOW #if BX_SUPPORT_3DNOW
if(b1 == 0x10f) { // 3DNow! instruction set if(b1 == 0x10f) {
instruction->execute = Bx3DNowOpcodeInfo[instruction->modRMForm.Ib].ExecutePtr; instruction->execute = Bx3DNowOpcodeInfo[instruction->modRMForm.Ib].ExecutePtr;
} }
#endif #endif
@ -1983,12 +2022,10 @@ modrm_done:
return(1); return(1);
} }
void void BX_CPU_C::BxError(bxInstruction_c *i)
BX_CPU_C::BxError(bxInstruction_c *i)
{ {
BX_INFO(("BxError: instruction with opcode=0x%x", i->b1())); BX_INFO(("BxError: instruction with opcode=0x%x", i->b1()));
BX_INFO(("mod was %x, nnn was %u, rm was %u", i->mod(), i->nnn(), i->rm())); BX_INFO(("mod was %x, nnn was %u, rm was %u", i->mod(), i->nnn(), i->rm()));
BX_INFO(("WARNING: Encountered an unknown instruction (signalling illegal instruction)")); BX_INFO(("WARNING: Encountered an unknown instruction (signalling illegal instruction)"));
BX_CPU_THIS_PTR UndefinedOpcode(i); BX_CPU_THIS_PTR UndefinedOpcode(i);

View File

@ -1,5 +1,5 @@
///////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////
// $Id: fetchdecode.h,v 1.23 2006-04-05 20:52:39 sshwarts Exp $ // $Id: fetchdecode.h,v 1.24 2006-04-06 18:30:04 sshwarts Exp $
///////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////
// //
// Copyright (c) 2003 Stanislav Shwartsman // Copyright (c) 2003 Stanislav Shwartsman
@ -1846,4 +1846,221 @@ static BxOpcodeInfo_t BxOpcodeGroupSSE_G1407[4] = {
/* F3 */ { 0, &BX_CPU_C::BxError } /* F3 */ { 0, &BX_CPU_C::BxError }
}; };
#if BX_SUPPORT_SSE >= 4
static BxOpcodeInfo_t BxOpcodeGroupSSE_0f3800[4] = {
/* -- */ { 0, &BX_CPU_C::PSHUFB_PqQq },
/* 66 */ { 0, &BX_CPU_C::PSHUFB_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static BxOpcodeInfo_t BxOpcodeGroupSSE_0f3801[4] = {
/* -- */ { 0, &BX_CPU_C::PHADDW_PqQq },
/* 66 */ { 0, &BX_CPU_C::PHADDW_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static BxOpcodeInfo_t BxOpcodeGroupSSE_0f3802[4] = {
/* -- */ { 0, &BX_CPU_C::PHADDD_PqQq },
/* 66 */ { 0, &BX_CPU_C::PHADDD_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static BxOpcodeInfo_t BxOpcodeGroupSSE_0f3803[4] = {
/* -- */ { 0, &BX_CPU_C::PHADDSW_PqQq },
/* 66 */ { 0, &BX_CPU_C::PHADDSW_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static BxOpcodeInfo_t BxOpcodeGroupSSE_0f3804[4] = {
/* -- */ { 0, &BX_CPU_C::PMADDUBSW_PqQq },
/* 66 */ { 0, &BX_CPU_C::PMADDUBSW_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static BxOpcodeInfo_t BxOpcodeGroupSSE_0f3805[4] = {
/* -- */ { 0, &BX_CPU_C::PHSUBW_PqQq },
/* 66 */ { 0, &BX_CPU_C::PHSUBW_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static BxOpcodeInfo_t BxOpcodeGroupSSE_0f3806[4] = {
/* -- */ { 0, &BX_CPU_C::PHSUBD_PqQq },
/* 66 */ { 0, &BX_CPU_C::PHSUBD_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static BxOpcodeInfo_t BxOpcodeGroupSSE_0f3807[4] = {
/* -- */ { 0, &BX_CPU_C::PHSUBSW_PqQq },
/* 66 */ { 0, &BX_CPU_C::PHSUBSW_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static BxOpcodeInfo_t BxOpcodeGroupSSE_0f3808[4] = {
/* -- */ { 0, &BX_CPU_C::PSIGNB_PqQq },
/* 66 */ { 0, &BX_CPU_C::PSIGNB_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static BxOpcodeInfo_t BxOpcodeGroupSSE_0f3809[4] = {
/* -- */ { 0, &BX_CPU_C::PSIGNW_PqQq },
/* 66 */ { 0, &BX_CPU_C::PSIGNW_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static BxOpcodeInfo_t BxOpcodeGroupSSE_0f380a[4] = {
/* -- */ { 0, &BX_CPU_C::PSIGND_PqQq },
/* 66 */ { 0, &BX_CPU_C::PSIGND_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static BxOpcodeInfo_t BxOpcodeGroupSSE_0f380b[4] = {
/* -- */ { 0, &BX_CPU_C::PMULHRSW_PqQq },
/* 66 */ { 0, &BX_CPU_C::PMULHRSW_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static BxOpcodeInfo_t BxOpcodeGroupSSE_0f381c[4] = {
/* -- */ { 0, &BX_CPU_C::PABSB_PqQq },
/* 66 */ { 0, &BX_CPU_C::PABSB_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static BxOpcodeInfo_t BxOpcodeGroupSSE_0f381d[4] = {
/* -- */ { 0, &BX_CPU_C::PABSW_PqQq },
/* 66 */ { 0, &BX_CPU_C::PABSW_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static BxOpcodeInfo_t BxOpcodeGroupSSE_0f381e[4] = {
/* -- */ { 0, &BX_CPU_C::PABSD_PqQq },
/* 66 */ { 0, &BX_CPU_C::PABSD_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static BxOpcodeInfo_t BxOpcodeGroupSSE_0f3a0f[4] = {
/* -- */ { 0, &BX_CPU_C::PALIGNR_PqQqIb },
/* 66 */ { 0, &BX_CPU_C::PALIGNR_VdqWdqIb },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
/* ************************************************************************ */
/* 3-byte opcode table (Table A-4, 0F 38) */
static BxOpcodeInfo_t BxOpcode3ByteOp0f380x[16] = {
/* 00 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3800 },
/* 01 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3801 },
/* 02 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3802 },
/* 03 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3803 },
/* 04 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3804 },
/* 05 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3805 },
/* 06 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3806 },
/* 07 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3807 },
/* 08 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3808 },
/* 09 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3809 },
/* 0A */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f380a },
/* 0B */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f380b },
/* 0C */ { 0, &BX_CPU_C::BxError },
/* 0D */ { 0, &BX_CPU_C::BxError },
/* 0E */ { 0, &BX_CPU_C::BxError },
/* 0F */ { 0, &BX_CPU_C::BxError }
};
static BxOpcodeInfo_t BxOpcode3ByteOp0f381x[16] = {
/* 00 */ { 0, &BX_CPU_C::BxError },
/* 01 */ { 0, &BX_CPU_C::BxError },
/* 02 */ { 0, &BX_CPU_C::BxError },
/* 03 */ { 0, &BX_CPU_C::BxError },
/* 04 */ { 0, &BX_CPU_C::BxError },
/* 05 */ { 0, &BX_CPU_C::BxError },
/* 06 */ { 0, &BX_CPU_C::BxError },
/* 07 */ { 0, &BX_CPU_C::BxError },
/* 08 */ { 0, &BX_CPU_C::BxError },
/* 09 */ { 0, &BX_CPU_C::BxError },
/* 0A */ { 0, &BX_CPU_C::BxError },
/* 0B */ { 0, &BX_CPU_C::BxError },
/* 0C */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f381c },
/* 0D */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f381d },
/* 0E */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f381e },
/* 0F */ { 0, &BX_CPU_C::BxError }
};
static BxOpcodeInfo_t BxOpcode3ByteTableA4[16] = {
/* 00 */ { Bx3ByteOpIndex, NULL, BxOpcode3ByteOp0f380x },
/* 00 */ { Bx3ByteOpIndex, NULL, BxOpcode3ByteOp0f381x },
/* 02 */ { 0, &BX_CPU_C::BxError },
/* 03 */ { 0, &BX_CPU_C::BxError },
/* 04 */ { 0, &BX_CPU_C::BxError },
/* 05 */ { 0, &BX_CPU_C::BxError },
/* 06 */ { 0, &BX_CPU_C::BxError },
/* 07 */ { 0, &BX_CPU_C::BxError },
/* 08 */ { 0, &BX_CPU_C::BxError },
/* 09 */ { 0, &BX_CPU_C::BxError },
/* 0A */ { 0, &BX_CPU_C::BxError },
/* 0B */ { 0, &BX_CPU_C::BxError },
/* 0C */ { 0, &BX_CPU_C::BxError },
/* 0D */ { 0, &BX_CPU_C::BxError },
/* 0E */ { 0, &BX_CPU_C::BxError },
/* 0F */ { 0, &BX_CPU_C::BxError }
};
/* ************************************************************************ */
/* 3-byte opcode table (Table A-5, 0F 3A) */
static BxOpcodeInfo_t BxOpcode3ByteOp0f3a0x[16] = {
/* 00 */ { 0, &BX_CPU_C::BxError },
/* 01 */ { 0, &BX_CPU_C::BxError },
/* 02 */ { 0, &BX_CPU_C::BxError },
/* 03 */ { 0, &BX_CPU_C::BxError },
/* 04 */ { 0, &BX_CPU_C::BxError },
/* 05 */ { 0, &BX_CPU_C::BxError },
/* 06 */ { 0, &BX_CPU_C::BxError },
/* 07 */ { 0, &BX_CPU_C::BxError },
/* 08 */ { 0, &BX_CPU_C::BxError },
/* 09 */ { 0, &BX_CPU_C::BxError },
/* 0A */ { 0, &BX_CPU_C::BxError },
/* 0B */ { 0, &BX_CPU_C::BxError },
/* 0C */ { 0, &BX_CPU_C::BxError },
/* 0D */ { 0, &BX_CPU_C::BxError },
/* 0E */ { 0, &BX_CPU_C::BxError },
/* 0F */ { BxImmediate_Ib | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3a0f }
};
static BxOpcodeInfo_t BxOpcode3ByteTableA5[16] = {
/* 00 */ { Bx3ByteOpIndex, NULL, BxOpcode3ByteOp0f3a0x },
/* 01 */ { 0, &BX_CPU_C::BxError },
/* 02 */ { 0, &BX_CPU_C::BxError },
/* 03 */ { 0, &BX_CPU_C::BxError },
/* 04 */ { 0, &BX_CPU_C::BxError },
/* 05 */ { 0, &BX_CPU_C::BxError },
/* 06 */ { 0, &BX_CPU_C::BxError },
/* 07 */ { 0, &BX_CPU_C::BxError },
/* 08 */ { 0, &BX_CPU_C::BxError },
/* 09 */ { 0, &BX_CPU_C::BxError },
/* 0A */ { 0, &BX_CPU_C::BxError },
/* 0B */ { 0, &BX_CPU_C::BxError },
/* 0C */ { 0, &BX_CPU_C::BxError },
/* 0D */ { 0, &BX_CPU_C::BxError },
/* 0E */ { 0, &BX_CPU_C::BxError },
/* 0F */ { 0, &BX_CPU_C::BxError }
};
#endif /* BX_SUPPORT_SSE >= 4 */
#endif #endif

View File

@ -1,5 +1,5 @@
///////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////
// $Id: fetchdecode64.cc,v 1.91 2006-04-05 20:52:40 sshwarts Exp $ // $Id: fetchdecode64.cc,v 1.92 2006-04-06 18:30:04 sshwarts Exp $
///////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////
// //
// Copyright (C) 2001 MandrakeSoft S.A. // Copyright (C) 2001 MandrakeSoft S.A.
@ -873,9 +873,17 @@ static BxOpcodeInfo_t BxOpcodeInfo64[512*3] = {
/* 0F 35 */ { 0, &BX_CPU_C::BxError }, // SYSENTER/SYSEXIT not recognized in long mode /* 0F 35 */ { 0, &BX_CPU_C::BxError }, // SYSENTER/SYSEXIT not recognized in long mode
/* 0F 36 */ { 0, &BX_CPU_C::BxError }, /* 0F 36 */ { 0, &BX_CPU_C::BxError },
/* 0F 37 */ { 0, &BX_CPU_C::BxError }, /* 0F 37 */ { 0, &BX_CPU_C::BxError },
#if BX_SUPPORT_SSE >= 4
/* 0F 38 */ { BxAnother | Bx3ByteOpcode | Bx3ByteOpTable, NULL, BxOpcode3ByteTableA4 }, // 3-byte escape
#else
/* 0F 38 */ { 0, &BX_CPU_C::BxError }, /* 0F 38 */ { 0, &BX_CPU_C::BxError },
#endif
/* 0F 39 */ { 0, &BX_CPU_C::BxError }, /* 0F 39 */ { 0, &BX_CPU_C::BxError },
#if BX_SUPPORT_SSE >= 4
/* 0F 3A */ { BxAnother | Bx3ByteOpcode | Bx3ByteOpTable, NULL, BxOpcode3ByteTableA5 }, // 3-byte escape
#else
/* 0F 3A */ { 0, &BX_CPU_C::BxError }, /* 0F 3A */ { 0, &BX_CPU_C::BxError },
#endif
/* 0F 3B */ { 0, &BX_CPU_C::BxError }, /* 0F 3B */ { 0, &BX_CPU_C::BxError },
/* 0F 3C */ { 0, &BX_CPU_C::BxError }, /* 0F 3C */ { 0, &BX_CPU_C::BxError },
/* 0F 3D */ { 0, &BX_CPU_C::BxError }, /* 0F 3D */ { 0, &BX_CPU_C::BxError },
@ -1394,9 +1402,17 @@ static BxOpcodeInfo_t BxOpcodeInfo64[512*3] = {
/* 0F 35 */ { 0, &BX_CPU_C::BxError }, // SYSENTER/SYSEXIT not recognized in long mode /* 0F 35 */ { 0, &BX_CPU_C::BxError }, // SYSENTER/SYSEXIT not recognized in long mode
/* 0F 36 */ { 0, &BX_CPU_C::BxError }, /* 0F 36 */ { 0, &BX_CPU_C::BxError },
/* 0F 37 */ { 0, &BX_CPU_C::BxError }, /* 0F 37 */ { 0, &BX_CPU_C::BxError },
#if BX_SUPPORT_SSE >= 4
/* 0F 38 */ { BxAnother | Bx3ByteOpcode | Bx3ByteOpTable, NULL, BxOpcode3ByteTableA4 }, // 3-byte escape
#else
/* 0F 38 */ { 0, &BX_CPU_C::BxError }, /* 0F 38 */ { 0, &BX_CPU_C::BxError },
#endif
/* 0F 39 */ { 0, &BX_CPU_C::BxError }, /* 0F 39 */ { 0, &BX_CPU_C::BxError },
#if BX_SUPPORT_SSE >= 4
/* 0F 3A */ { BxAnother | Bx3ByteOpcode | Bx3ByteOpTable, NULL, BxOpcode3ByteTableA5 }, // 3-byte escape
#else
/* 0F 3A */ { 0, &BX_CPU_C::BxError }, /* 0F 3A */ { 0, &BX_CPU_C::BxError },
#endif
/* 0F 3B */ { 0, &BX_CPU_C::BxError }, /* 0F 3B */ { 0, &BX_CPU_C::BxError },
/* 0F 3C */ { 0, &BX_CPU_C::BxError }, /* 0F 3C */ { 0, &BX_CPU_C::BxError },
/* 0F 3D */ { 0, &BX_CPU_C::BxError }, /* 0F 3D */ { 0, &BX_CPU_C::BxError },
@ -1915,9 +1931,17 @@ static BxOpcodeInfo_t BxOpcodeInfo64[512*3] = {
/* 0F 35 */ { 0, &BX_CPU_C::BxError }, // SYSENTER/SYSEXIT not recognized in long mode /* 0F 35 */ { 0, &BX_CPU_C::BxError }, // SYSENTER/SYSEXIT not recognized in long mode
/* 0F 36 */ { 0, &BX_CPU_C::BxError }, /* 0F 36 */ { 0, &BX_CPU_C::BxError },
/* 0F 37 */ { 0, &BX_CPU_C::BxError }, /* 0F 37 */ { 0, &BX_CPU_C::BxError },
#if BX_SUPPORT_SSE >= 4
/* 0F 38 */ { BxAnother | Bx3ByteOpcode | Bx3ByteOpTable, NULL, BxOpcode3ByteTableA4 }, // 3-byte escape
#else
/* 0F 38 */ { 0, &BX_CPU_C::BxError }, /* 0F 38 */ { 0, &BX_CPU_C::BxError },
#endif
/* 0F 39 */ { 0, &BX_CPU_C::BxError }, /* 0F 39 */ { 0, &BX_CPU_C::BxError },
#if BX_SUPPORT_SSE >= 4
/* 0F 3A */ { BxAnother | Bx3ByteOpcode | Bx3ByteOpTable, NULL, BxOpcode3ByteTableA5 }, // 3-byte escape
#else
/* 0F 3A */ { 0, &BX_CPU_C::BxError }, /* 0F 3A */ { 0, &BX_CPU_C::BxError },
#endif
/* 0F 3B */ { 0, &BX_CPU_C::BxError }, /* 0F 3B */ { 0, &BX_CPU_C::BxError },
/* 0F 3C */ { 0, &BX_CPU_C::BxError }, /* 0F 3C */ { 0, &BX_CPU_C::BxError },
/* 0F 3D */ { 0, &BX_CPU_C::BxError }, /* 0F 3D */ { 0, &BX_CPU_C::BxError },
@ -2125,13 +2149,16 @@ BX_CPU_C::fetchDecode64(Bit8u *iptr, bxInstruction_c *instruction, unsigned rema
unsigned b1, b2, ilen=0, attr, lock=0; unsigned b1, b2, ilen=0, attr, lock=0;
unsigned imm_mode, offset, rex_r,rex_x,rex_b; unsigned imm_mode, offset, rex_r,rex_x,rex_b;
unsigned rm, mod = 0, nnn = 0; unsigned rm = 0, mod = 0, nnn = 0;
unsigned sse_prefix; unsigned sse_prefix;
#define SSE_PREFIX_NONE 0 #define SSE_PREFIX_NONE 0
#define SSE_PREFIX_66 1 #define SSE_PREFIX_66 1
#define SSE_PREFIX_F2 2 #define SSE_PREFIX_F2 2
#define SSE_PREFIX_F3 4 /* only one SSE prefix could be used */ #define SSE_PREFIX_F3 4 /* only one SSE prefix could be used */
static int sse_prefix_index[8] = { 0, 1, 2, -1, 3, -1, -1, -1 }; static int sse_prefix_index[8] = { 0, 1, 2, -1, 3, -1, -1, -1 };
#if BX_SUPPORT_SSE >= 4
unsigned b3 = 0;
#endif
offset = 512*1; offset = 512*1;
rex_r = 0; rex_r = 0;
@ -2259,6 +2286,18 @@ fetch_b1:
attr = BxOpcodeInfo64[b1+offset].Attr; attr = BxOpcodeInfo64[b1+offset].Attr;
instruction->setRepAttr(attr & (BxRepeatable | BxRepeatableZF)); instruction->setRepAttr(attr & (BxRepeatable | BxRepeatableZF));
#if BX_SUPPORT_SSE >= 4
// handle 3-byte escape
if (attr & Bx3ByteOpcode) {
if (ilen < remain) {
ilen++;
b3 = *iptr++;
}
else
return(0);
}
#endif
if (attr & BxAnother) { if (attr & BxAnother) {
// opcode requires modrm byte // opcode requires modrm byte
if (ilen < remain) { if (ilen < remain) {
@ -2277,7 +2316,7 @@ fetch_b1:
instruction->modRMForm.modRMData |= (nnn<<8); instruction->modRMForm.modRMData |= (nnn<<8);
// MOVs with CRx and DRx always use register ops and ignore the mod field. // MOVs with CRx and DRx always use register ops and ignore the mod field.
if ( (b1 & ~3) == 0x120 ) if ((b1 & ~3) == 0x120)
mod = 0xc0; mod = 0xc0;
if (mod == 0xc0) { // mod == 11b if (mod == 0xc0) { // mod == 11b
@ -2482,6 +2521,14 @@ modrm_done:
case BxRMGroup: case BxRMGroup:
OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[rm]); OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[rm]);
break; break;
#if BX_SUPPORT_SSE >= 4
case Bx3ByteOpTable:
OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[b3 >> 4]);
break;
case Bx3ByteOpIndex:
OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[b3 & 15]);
break;
#endif
case BxPrefixSSE: case BxPrefixSSE:
{ {
/* For SSE opcodes, look into another 4 entries table /* For SSE opcodes, look into another 4 entries table
@ -2665,7 +2712,7 @@ modrm_done:
} }
#if BX_SUPPORT_3DNOW #if BX_SUPPORT_3DNOW
if(b1 == 0x10f) { // 3DNow! instruction set if(b1 == 0x10f) {
instruction->execute = Bx3DNowOpcodeInfo[instruction->modRMForm.Ib].ExecutePtr; instruction->execute = Bx3DNowOpcodeInfo[instruction->modRMForm.Ib].ExecutePtr;
} }
#endif #endif

View File

@ -1,5 +1,5 @@
///////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////
// $Id: mmx.cc,v 1.52 2006-03-06 22:03:00 sshwarts Exp $ // $Id: mmx.cc,v 1.53 2006-04-06 18:30:05 sshwarts Exp $
///////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////
// //
// Copyright (c) 2002 Stanislav Shwartsman // Copyright (c) 2002 Stanislav Shwartsman
@ -115,6 +115,497 @@ void BX_CPU_C::prepareFPU2MMX(void)
#endif #endif
#if BX_SUPPORT_SSE >= 4
/* 0F 38 00 */
void BX_CPU_C::PSHUFB_PqQq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_MMX_REG(i->rm());
}
else {
/* pointer, segment address pair */
read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2);
}
for(unsigned j=0; j<8; j++)
{
unsigned mask = op2.mmxubyte(j);
if (mask & 0x80)
result.mmxubyte(j) = 0;
else
result.mmxubyte(j) = op1.mmxubyte(mask & 0xf);
}
BX_WRITE_MMX_REG(i->nnn(), result);
#else
BX_INFO(("PSHUFB_PqQq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 0F 38 01 */
void BX_CPU_C::PHADDW_PqQq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_MMX_REG(i->rm());
}
else {
/* pointer, segment address pair */
read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2);
}
MMXUW0(result) = MMXUW0(op1) + MMXUW1(op1);
MMXUW1(result) = MMXUW2(op1) + MMXUW3(op1);
MMXUW2(result) = MMXUW0(op2) + MMXUW1(op2);
MMXUW3(result) = MMXUW2(op2) + MMXUW3(op2);
BX_WRITE_MMX_REG(i->nnn(), result);
#else
BX_INFO(("PHADDW_PqQq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 0F 38 02 */
void BX_CPU_C::PHADDD_PqQq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_MMX_REG(i->rm());
}
else {
/* pointer, segment address pair */
read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2);
}
MMXUD0(result) = MMXUD0(op1) + MMXUD1(op1);
MMXUD1(result) = MMXUD0(op2) + MMXUD1(op2);
BX_WRITE_MMX_REG(i->nnn(), result);
#else
BX_INFO(("PHADDD_PqQq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 0F 38 03 */
void BX_CPU_C::PHADDSW_PqQq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_MMX_REG(i->rm());
}
else {
/* pointer, segment address pair */
read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2);
}
MMXSW0(result) = SaturateDwordSToWordS(Bit32s(MMXSW0(op1)) + Bit32s(MMXSW1(op1)));
MMXSW1(result) = SaturateDwordSToWordS(Bit32s(MMXSW2(op1)) + Bit32s(MMXSW3(op1)));
MMXSW2(result) = SaturateDwordSToWordS(Bit32s(MMXSW0(op2)) + Bit32s(MMXSW1(op2)));
MMXSW3(result) = SaturateDwordSToWordS(Bit32s(MMXSW2(op2)) + Bit32s(MMXSW3(op2)));
/* now write result back to destination */
BX_WRITE_MMX_REG(i->nnn(), result);
#else
BX_INFO(("PHADDSW_PqQq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 0F 38 04 */
void BX_CPU_C::PMADDUBSW_PqQq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_MMX_REG(i->rm());
}
else {
/* pointer, segment address pair */
read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2);
}
for(unsigned j=0; j<4; j++)
{
Bit32s temp = Bit32s(op1.mmxubyte(j*2+0))*Bit32s(op2.mmxsbyte(j*2+0)) +
Bit32s(op1.mmxubyte(j*2+1))*Bit32s(op2.mmxsbyte(j*2+1));
result.mmx16s(j) = SaturateDwordSToWordS(temp);
}
/* now write result back to destination */
BX_WRITE_MMX_REG(i->nnn(), result);
#else
BX_INFO(("PMADDUBSW_PqQq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 0F 38 05 */
void BX_CPU_C::PHSUBSW_PqQq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_MMX_REG(i->rm());
}
else {
/* pointer, segment address pair */
read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2);
}
MMXSW0(result) = SaturateDwordSToWordS(Bit32s(MMXSW0(op1)) - Bit32s(MMXSW1(op1)));
MMXSW1(result) = SaturateDwordSToWordS(Bit32s(MMXSW2(op1)) - Bit32s(MMXSW3(op1)));
MMXSW2(result) = SaturateDwordSToWordS(Bit32s(MMXSW0(op2)) - Bit32s(MMXSW1(op2)));
MMXSW3(result) = SaturateDwordSToWordS(Bit32s(MMXSW2(op2)) - Bit32s(MMXSW3(op2)));
/* now write result back to destination */
BX_WRITE_MMX_REG(i->nnn(), result);
#else
BX_INFO(("PHSUBSW_PqQq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 0F 38 05 */
void BX_CPU_C::PHSUBW_PqQq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_MMX_REG(i->rm());
}
else {
/* pointer, segment address pair */
read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2);
}
MMXUW0(result) = MMXUW0(op1) - MMXUW1(op1);
MMXUW1(result) = MMXUW2(op1) - MMXUW3(op1);
MMXUW2(result) = MMXUW0(op2) - MMXUW1(op2);
MMXUW3(result) = MMXUW2(op2) - MMXUW3(op2);
BX_WRITE_MMX_REG(i->nnn(), result);
#else
BX_INFO(("PHSUBW_PqQq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 0F 38 06 */
void BX_CPU_C::PHSUBD_PqQq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_MMX_REG(i->rm());
}
else {
/* pointer, segment address pair */
read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2);
}
MMXUD0(result) = MMXUD0(op1) - MMXUD1(op1);
MMXUD1(result) = MMXUD0(op2) - MMXUD1(op2);
BX_WRITE_MMX_REG(i->nnn(), result);
#else
BX_INFO(("PHSUBD_PqQq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 0F 38 08 */
void BX_CPU_C::PSIGNB_PqQq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_MMX_REG(i->rm());
}
else {
/* pointer, segment address pair */
read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2);
}
for(unsigned j=0; j<8; j++) {
int sign = (op2.mmxsbyte(j) > 0) - (op2.mmxsbyte(j) < 0);
op1.mmxsbyte(j) *= sign;
}
BX_WRITE_MMX_REG(i->nnn(), op1);
#else
BX_INFO(("PSIGNB_PqQq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 0F 38 09 */
void BX_CPU_C::PSIGNW_PqQq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_MMX_REG(i->rm());
}
else {
/* pointer, segment address pair */
read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2);
}
for(unsigned j=0; j<4; j++) {
int sign = (op2.mmx16s(j) > 0) - (op2.mmx16s(j) < 0);
op1.mmx16s(j) *= sign;
}
BX_WRITE_MMX_REG(i->nnn(), op1);
#else
BX_INFO(("PSIGNW_PqQq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 0F 38 0A */
void BX_CPU_C::PSIGND_PqQq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_MMX_REG(i->rm());
}
else {
/* pointer, segment address pair */
read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2);
}
int sign;
sign = (MMXSD0(op2) > 0) - (MMXSD0(op2) < 0);
MMXSD0(op1) *= sign;
sign = (MMXSD1(op2) > 0) - (MMXSD1(op2) < 0);
MMXSD1(op1) *= sign;
BX_WRITE_MMX_REG(i->nnn(), op1);
#else
BX_INFO(("PSIGND_PqQq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 0F 38 0B */
void BX_CPU_C::PMULHRSW_PqQq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_MMX_REG(i->rm());
}
else {
/* pointer, segment address pair */
read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2);
}
for(unsigned j=0; j<4; j++) {
Bit32s temp = Bit32s(op1.mmx16s(j)) * Bit32s(op2.mmx16s(j));
result.mmx16u(j) = ((temp >> 14) + 1) >> 1;
}
MMXUW0(result) = (((MMXSW0(op1) * MMXSW0(op2)) >> 14) + 1) >> 1;
MMXUW1(result) = (((MMXSW1(op1) * MMXSW1(op2)) >> 14) + 1) >> 1;
MMXUW2(result) = (((MMXSW2(op1) * MMXSW2(op2)) >> 14) + 1) >> 1;
MMXUW3(result) = (((MMXSW3(op1) * MMXSW3(op2)) >> 14) + 1) >> 1;
/* now write result back to destination */
BX_WRITE_MMX_REG(i->nnn(), result);
#else
BX_INFO(("PMULHRSW_PqQq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 0F 38 1C */
void BX_CPU_C::PABSB_PqQq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op;
if (i->modC0()) {
op = BX_READ_MMX_REG(i->rm());
}
else {
/* pointer, segment address pair */
read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op);
}
if (MMXSB0(op) < 0) MMXUB0(op) = -MMXSB0(op);
if (MMXSB1(op) < 0) MMXUB1(op) = -MMXSB1(op);
if (MMXSB2(op) < 0) MMXUB2(op) = -MMXSB2(op);
if (MMXSB3(op) < 0) MMXUB3(op) = -MMXSB3(op);
if (MMXSB4(op) < 0) MMXUB4(op) = -MMXSB4(op);
if (MMXSB5(op) < 0) MMXUB5(op) = -MMXSB5(op);
if (MMXSB6(op) < 0) MMXUB6(op) = -MMXSB6(op);
if (MMXSB7(op) < 0) MMXUB7(op) = -MMXSB7(op);
/* now write result back to destination */
BX_WRITE_MMX_REG(i->nnn(), op);
#else
BX_INFO(("PABSB_PqQq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 0F 38 1D */
void BX_CPU_C::PABSW_PqQq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op;
if (i->modC0()) {
op = BX_READ_MMX_REG(i->rm());
}
else {
/* pointer, segment address pair */
read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op);
}
if (MMXSW0(op) < 0) MMXUW0(op) = -MMXSW0(op);
if (MMXSW1(op) < 0) MMXUW1(op) = -MMXSW1(op);
if (MMXSW2(op) < 0) MMXUW2(op) = -MMXSW2(op);
if (MMXSW3(op) < 0) MMXUW3(op) = -MMXSW3(op);
/* now write result back to destination */
BX_WRITE_MMX_REG(i->nnn(), op);
#else
BX_INFO(("PABSW_PqQq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 0F 38 1E */
void BX_CPU_C::PABSD_PqQq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op;
if (i->modC0()) {
op = BX_READ_MMX_REG(i->rm());
}
else {
/* pointer, segment address pair */
read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op);
}
if (MMXSD0(op) < 0) MMXUD0(op) = -MMXSD0(op);
if (MMXSD1(op) < 0) MMXUD1(op) = -MMXSD1(op);
/* now write result back to destination */
BX_WRITE_MMX_REG(i->nnn(), op);
#else
BX_INFO(("PABSD_PqQq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 0F 3A 0F */
void BX_CPU_C::PALIGNR_PqQqIb(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_MMX_REG(i->rm());
}
else {
/* pointer, segment address pair */
read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2);
}
Bit8u shift = i->Ib() * 8;
if(shift == 0)
MMXUQ(result) = MMXUQ(op2);
else if(shift < 64)
MMXUQ(result) = (MMXUQ(op2) >> shift) | (MMXUQ(op1) << (64-shift));
else if(shift < 128)
MMXUQ(result) = MMXUQ(op1) >> (shift-64);
else
MMXUQ(result) = 0;
/* now write result back to destination */
BX_WRITE_MMX_REG(i->nnn(), result);
#else
BX_INFO(("PALIGNR_PqQqIb: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
#endif /* BX_SUPPORT_SSE >= 4 */
/* 0F 60 */ /* 0F 60 */
void BX_CPU_C::PUNPCKLBW_PqQd(bxInstruction_c *i) void BX_CPU_C::PUNPCKLBW_PqQd(bxInstruction_c *i)
{ {

View File

@ -1,5 +1,5 @@
///////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////
// $Id: smm.cc,v 1.14 2006-04-06 16:47:29 sshwarts Exp $ // $Id: smm.cc,v 1.15 2006-04-06 18:30:05 sshwarts Exp $
///////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////
// //
// Copyright (c) 2006 Stanislav Shwartsman // Copyright (c) 2006 Stanislav Shwartsman
@ -60,9 +60,9 @@
// 4. GDTR, IDTR // 4. GDTR, IDTR
// fields: base, limit // fields: base, limit
// 5. LDTR, TR // 5. LDTR, TR
// fields: base, limit, something more ? // fields: base, limit, anything else ?
// 6. Debug Registers DR0-DR7, only DR6 and DR7 are saved // 6. Debug Registers DR0-DR7, only DR6 and DR7 are saved
// 7. Control Regsiters: CR0, CR1 is always 0, CR2 is NOT saved, CR3, CR4, EFER // 7. Control Registers: CR0, CR1 is always 0, CR2 is NOT saved, CR3, CR4, EFER
// 8. SMBASE // 8. SMBASE
// 9. MSR/FPU/XMM/APIC are NOT saved accoring to Intel docs // 9. MSR/FPU/XMM/APIC are NOT saved accoring to Intel docs
// //

View File

@ -1,5 +1,5 @@
///////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////
// $Id: sse.cc,v 1.39 2006-04-05 17:31:32 sshwarts Exp $ // $Id: sse.cc,v 1.40 2006-04-06 18:30:05 sshwarts Exp $
///////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////
// //
// Copyright (c) 2003 Stanislav Shwartsman // Copyright (c) 2003 Stanislav Shwartsman
@ -29,6 +29,558 @@
/* SSE Integer Operations (128bit MMX extensions) */ /* SSE Integer Operations (128bit MMX extensions) */
/* ********************************************** */ /* ********************************************** */
#if BX_SUPPORT_SSE >= 4
/* 66 0F 38 00 */
void BX_CPU_C::PSHUFB_VdqWdq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareSSE();
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
}
for(unsigned j=0; j<16; j++)
{
unsigned mask = op2.xmmubyte(j);
if (mask & 0x80)
result.xmmubyte(j) = 0;
else
result.xmmubyte(j) = op1.xmmubyte(mask & 0xf);
}
BX_WRITE_XMM_REG(i->nnn(), result);
#else
BX_INFO(("PSHUFB_VdqWdq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 66 0F 38 01 */
void BX_CPU_C::PHADDW_VdqWdq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareSSE();
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
}
result.xmm16u(0) = op1.xmm16u(0) + op1.xmm16u(1);
result.xmm16u(1) = op1.xmm16u(2) + op1.xmm16u(3);
result.xmm16u(2) = op1.xmm16u(4) + op1.xmm16u(5);
result.xmm16u(3) = op1.xmm16u(6) + op1.xmm16u(7);
result.xmm16u(4) = op2.xmm16u(0) + op2.xmm16u(1);
result.xmm16u(5) = op2.xmm16u(2) + op2.xmm16u(3);
result.xmm16u(6) = op2.xmm16u(4) + op2.xmm16u(5);
result.xmm16u(7) = op2.xmm16u(6) + op2.xmm16u(7);
BX_WRITE_XMM_REG(i->nnn(), result);
#else
BX_INFO(("PHADDW_VdqWdq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 66 0F 38 02 */
void BX_CPU_C::PHADDD_VdqWdq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareSSE();
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
}
result.xmm32u(0) = op1.xmm32u(0) + op1.xmm32u(1);
result.xmm32u(1) = op1.xmm32u(2) + op1.xmm32u(3);
result.xmm32u(2) = op2.xmm32u(0) + op2.xmm32u(1);
result.xmm32u(3) = op2.xmm32u(2) + op2.xmm32u(3);
BX_WRITE_XMM_REG(i->nnn(), result);
#else
BX_INFO(("PHADDD_VdqWdq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 66 0F 38 03 */
void BX_CPU_C::PHADDSW_VdqWdq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareSSE();
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
}
result.xmm16s(0) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(0)) + Bit32s(op1.xmm16s(1)));
result.xmm16s(1) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(2)) + Bit32s(op1.xmm16s(3)));
result.xmm16s(2) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(4)) + Bit32s(op1.xmm16s(5)));
result.xmm16s(3) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(6)) + Bit32s(op1.xmm16s(7)));
result.xmm16s(4) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(0)) + Bit32s(op2.xmm16s(1)));
result.xmm16s(5) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(2)) + Bit32s(op2.xmm16s(3)));
result.xmm16s(6) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(4)) + Bit32s(op2.xmm16s(5)));
result.xmm16s(7) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(6)) + Bit32s(op2.xmm16s(7)));
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), result);
#else
BX_INFO(("PHADDSW_VdqWdq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 66 0F 38 04 */
void BX_CPU_C::PMADDUBSW_VdqWdq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareSSE();
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
}
for(unsigned j=0; j<8; j++)
{
Bit32s temp = Bit32s(op1.xmmubyte(j*2+0))*Bit32s(op2.xmmsbyte(j*2+0)) +
Bit32s(op1.xmmubyte(j*2+1))*Bit32s(op2.xmmsbyte(j*2+1));
result.xmm16s(j) = SaturateDwordSToWordS(temp);
}
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), result);
#else
BX_INFO(("PMADDUBSW_VdqWdq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 66 0F 38 05 */
void BX_CPU_C::PHSUBSW_VdqWdq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareSSE();
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
}
result.xmm16s(0) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(0)) - Bit32s(op1.xmm16s(1)));
result.xmm16s(1) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(2)) - Bit32s(op1.xmm16s(3)));
result.xmm16s(2) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(4)) - Bit32s(op1.xmm16s(5)));
result.xmm16s(3) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(6)) - Bit32s(op1.xmm16s(7)));
result.xmm16s(4) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(0)) - Bit32s(op2.xmm16s(1)));
result.xmm16s(5) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(2)) - Bit32s(op2.xmm16s(3)));
result.xmm16s(6) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(4)) - Bit32s(op2.xmm16s(5)));
result.xmm16s(7) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(6)) - Bit32s(op2.xmm16s(7)));
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), result);
#else
BX_INFO(("PHSUBSW_VdqWdq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 66 0F 38 05 */
void BX_CPU_C::PHSUBW_VdqWdq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareSSE();
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
}
result.xmm16u(0) = op1.xmm16u(0) - op1.xmm16u(1);
result.xmm16u(1) = op1.xmm16u(2) - op1.xmm16u(3);
result.xmm16u(2) = op1.xmm16u(4) - op1.xmm16u(5);
result.xmm16u(3) = op1.xmm16u(6) - op1.xmm16u(7);
result.xmm16u(4) = op2.xmm16u(0) - op2.xmm16u(1);
result.xmm16u(5) = op2.xmm16u(2) - op2.xmm16u(3);
result.xmm16u(6) = op2.xmm16u(4) - op2.xmm16u(5);
result.xmm16u(7) = op2.xmm16u(6) - op2.xmm16u(7);
BX_WRITE_XMM_REG(i->nnn(), result);
#else
BX_INFO(("PHSUBW_VdqWdq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 66 0F 38 06 */
void BX_CPU_C::PHSUBD_VdqWdq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareSSE();
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
}
result.xmm32u(0) = op1.xmm32u(0) - op1.xmm32u(1);
result.xmm32u(1) = op1.xmm32u(2) - op1.xmm32u(3);
result.xmm32u(2) = op2.xmm32u(0) - op2.xmm32u(1);
result.xmm32u(3) = op2.xmm32u(2) - op2.xmm32u(3);
BX_WRITE_XMM_REG(i->nnn(), result);
#else
BX_INFO(("PHSUBD_VdqWdq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 66 0F 38 08 */
void BX_CPU_C::PSIGNB_VdqWdq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareSSE();
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
}
for(unsigned j=0; j<16; j++) {
int sign = (op2.xmmsbyte(j) > 0) - (op2.xmmsbyte(j) < 0);
op1.xmmsbyte(j) *= sign;
}
BX_WRITE_XMM_REG(i->nnn(), op1);
#else
BX_INFO(("PSIGNB_VdqWdq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 66 0F 38 09 */
void BX_CPU_C::PSIGNW_VdqWdq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareSSE();
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
}
for(unsigned j=0; j<8; j++) {
int sign = (op2.xmm16s(j) > 0) - (op2.xmm16s(j) < 0);
op1.xmm16s(j) *= sign;
}
BX_WRITE_XMM_REG(i->nnn(), op1);
#else
BX_INFO(("PSIGNW_VdqWdq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 66 0F 38 0A */
void BX_CPU_C::PSIGND_VdqWdq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareSSE();
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
}
for(unsigned j=0; j<4; j++) {
int sign = (op2.xmm32s(j) > 0) - (op2.xmm32s(j) < 0);
op1.xmm32s(j) *= sign;
}
BX_WRITE_XMM_REG(i->nnn(), op1);
#else
BX_INFO(("PSIGND_VdqWdq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 66 0F 38 0B */
void BX_CPU_C::PMULHRSW_VdqWdq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareSSE();
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
}
result.xmm16u(0) = (((op1.xmm16s(0) * op2.xmm16s(0)) >> 14) + 1) >> 1;
result.xmm16u(1) = (((op1.xmm16s(1) * op2.xmm16s(1)) >> 14) + 1) >> 1;
result.xmm16u(2) = (((op1.xmm16s(2) * op2.xmm16s(2)) >> 14) + 1) >> 1;
result.xmm16u(3) = (((op1.xmm16s(3) * op2.xmm16s(3)) >> 14) + 1) >> 1;
result.xmm16u(4) = (((op1.xmm16s(4) * op2.xmm16s(4)) >> 14) + 1) >> 1;
result.xmm16u(5) = (((op1.xmm16s(5) * op2.xmm16s(5)) >> 14) + 1) >> 1;
result.xmm16u(6) = (((op1.xmm16s(6) * op2.xmm16s(6)) >> 14) + 1) >> 1;
result.xmm16u(7) = (((op1.xmm16s(7) * op2.xmm16s(7)) >> 14) + 1) >> 1;
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), result);
#else
BX_INFO(("PMULHRSW_VdqWdq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 66 0F 38 1C */
void BX_CPU_C::PABSB_VdqWdq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareSSE();
BxPackedXmmRegister op;
if (i->modC0()) {
op = BX_READ_XMM_REG(i->rm());
}
else {
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
}
if(op.xmmsbyte(0x0) < 0) op.xmmubyte(0x0) = -op.xmmsbyte(0x0);
if(op.xmmsbyte(0x1) < 0) op.xmmubyte(0x1) = -op.xmmsbyte(0x1);
if(op.xmmsbyte(0x2) < 0) op.xmmubyte(0x2) = -op.xmmsbyte(0x2);
if(op.xmmsbyte(0x3) < 0) op.xmmubyte(0x3) = -op.xmmsbyte(0x3);
if(op.xmmsbyte(0x4) < 0) op.xmmubyte(0x4) = -op.xmmsbyte(0x4);
if(op.xmmsbyte(0x5) < 0) op.xmmubyte(0x5) = -op.xmmsbyte(0x5);
if(op.xmmsbyte(0x6) < 0) op.xmmubyte(0x6) = -op.xmmsbyte(0x6);
if(op.xmmsbyte(0x7) < 0) op.xmmubyte(0x7) = -op.xmmsbyte(0x7);
if(op.xmmsbyte(0x8) < 0) op.xmmubyte(0x8) = -op.xmmsbyte(0x8);
if(op.xmmsbyte(0x9) < 0) op.xmmubyte(0x9) = -op.xmmsbyte(0x9);
if(op.xmmsbyte(0xa) < 0) op.xmmubyte(0xa) = -op.xmmsbyte(0xa);
if(op.xmmsbyte(0xb) < 0) op.xmmubyte(0xb) = -op.xmmsbyte(0xb);
if(op.xmmsbyte(0xc) < 0) op.xmmubyte(0xc) = -op.xmmsbyte(0xc);
if(op.xmmsbyte(0xd) < 0) op.xmmubyte(0xd) = -op.xmmsbyte(0xd);
if(op.xmmsbyte(0xe) < 0) op.xmmubyte(0xe) = -op.xmmsbyte(0xe);
if(op.xmmsbyte(0xf) < 0) op.xmmubyte(0xf) = -op.xmmsbyte(0xf);
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), op);
#else
BX_INFO(("PABSB_VdqWdq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 66 0F 38 1D */
void BX_CPU_C::PABSW_VdqWdq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareSSE();
BxPackedXmmRegister op;
if (i->modC0()) {
op = BX_READ_XMM_REG(i->rm());
}
else {
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
}
if(op.xmm16s(0) < 0) op.xmm16u(0) = -op.xmm16s(0);
if(op.xmm16s(1) < 0) op.xmm16u(1) = -op.xmm16s(1);
if(op.xmm16s(2) < 0) op.xmm16u(2) = -op.xmm16s(2);
if(op.xmm16s(3) < 0) op.xmm16u(3) = -op.xmm16s(3);
if(op.xmm16s(4) < 0) op.xmm16u(4) = -op.xmm16s(4);
if(op.xmm16s(5) < 0) op.xmm16u(5) = -op.xmm16s(5);
if(op.xmm16s(6) < 0) op.xmm16u(6) = -op.xmm16s(6);
if(op.xmm16s(7) < 0) op.xmm16u(7) = -op.xmm16s(7);
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), op);
#else
BX_INFO(("PABSW_VdqWdq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 66 0F 38 1E */
void BX_CPU_C::PABSD_VdqWdq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareSSE();
BxPackedXmmRegister op;
if (i->modC0()) {
op = BX_READ_XMM_REG(i->rm());
}
else {
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
}
if(op.xmm32s(0) < 0) op.xmm32u(0) = -op.xmm32s(0);
if(op.xmm32s(1) < 0) op.xmm32u(1) = -op.xmm32s(1);
if(op.xmm32s(2) < 0) op.xmm32u(2) = -op.xmm32s(2);
if(op.xmm32s(3) < 0) op.xmm32u(3) = -op.xmm32s(3);
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), op);
#else
BX_INFO(("PABSD_VdqWdq: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 66 0F 3A 0F */
void BX_CPU_C::PALIGNR_VdqWdqIb(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 4
BX_CPU_THIS_PTR prepareSSE();
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
}
Bit8u shift = i->Ib() * 8;
if(shift == 0) {
result.xmm64u(0) = op2.xmm64u(0);
result.xmm64u(1) = op2.xmm64u(1);
}
else if(shift < 64) {
result.xmm64u(0) = (op2.xmm64u(0) >> shift) | (op2.xmm64u(1) << (64-shift));
result.xmm64u(1) = (op2.xmm64u(1) >> shift) | (op1.xmm64u(0) << (64-shift));
}
else if(shift == 64) {
result.xmm64u(0) = op2.xmm64u(1);
result.xmm64u(1) = op1.xmm64u(0);
}
else if(shift < 128) {
shift -= 64;
result.xmm64u(0) = (op2.xmm64u(1) >> shift) | (op1.xmm64u(0) << (64-shift));
result.xmm64u(1) = (op1.xmm64u(0) >> shift) | (op1.xmm64u(1) << (64-shift));
}
else if(shift == 128) {
result.xmm64u(0) = op1.xmm64u(0);
result.xmm64u(1) = op1.xmm64u(1);
}
else if(shift < 192) {
shift -= 128;
result.xmm64u(0) = (op1.xmm64u(0) >> shift) | (op1.xmm64u(1) << (64-shift));
result.xmm64u(1) = (op1.xmm64u(1) >> shift);
}
else if(shift < 256) {
result.xmm64u(0) = op1.xmm64u(1) >> (shift - 192);
result.xmm64u(1) = 0;
}
else {
result.xmm64u(0) = 0;
result.xmm64u(1) = 0;
}
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), result);
#else
BX_INFO(("PALIGNR_VdqWdqIb: required SSE4, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
#endif /* BX_SUPPORT_SSE >= 4 */
/* 66 0F 63 */ /* 66 0F 63 */
void BX_CPU_C::PACKSSWB_VdqWq(bxInstruction_c *i) void BX_CPU_C::PACKSSWB_VdqWq(bxInstruction_c *i)
{ {