split more SSE ops, optimizations in MMX code

This commit is contained in:
Stanislav Shwartsman 2010-12-25 17:04:36 +00:00
parent d0ee1c1b80
commit 1bd512e98d
10 changed files with 438 additions and 662 deletions

View File

@ -1,5 +1,5 @@
/////////////////////////////////////////////////////////////////////////
// $Id: bit.cc,v 1.70 2010-12-06 21:45:56 sshwarts Exp $
// $Id: bit.cc,v 1.71 2010-12-25 17:04:35 sshwarts Exp $
/////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2001-2009 The Bochs Project
@ -258,17 +258,9 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::BSWAP_RRX(bxInstruction_c *i)
}
#endif
void BX_CPP_AttrRegparmN(1) BX_CPU_C::MOVBE_GwEw(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::MOVBE_GwEwR(bxInstruction_c *i)
{
Bit16u val16, b0, b1;
if (i->modC0()) {
val16 = BX_READ_16BIT_REG(i->rm());
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
val16 = read_virtual_word(i->seg(), eaddr);
}
Bit16u val16 = BX_READ_16BIT_REG(i->rm()), b0, b1;
b0 = val16 & 0xff; val16 >>= 8;
b1 = val16;
@ -294,17 +286,9 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::MOVBE_EwGw(bxInstruction_c *i)
}
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::MOVBE_GdEd(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::MOVBE_GdEdR(bxInstruction_c *i)
{
Bit32u val32;
if (i->modC0()) {
val32 = BX_READ_32BIT_REG(i->rm());
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
val32 = read_virtual_dword(i->seg(), eaddr);
}
Bit32u val32 = BX_READ_32BIT_REG(i->rm());
BX_WRITE_32BIT_REGZ(i->nnn(), bx_bswap32(val32));
}
@ -326,17 +310,9 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::MOVBE_EdGd(bxInstruction_c *i)
#if BX_SUPPORT_X86_64
void BX_CPP_AttrRegparmN(1) BX_CPU_C::MOVBE_GqEq(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::MOVBE_GqEqR(bxInstruction_c *i)
{
Bit64u val64;
if (i->modC0()) {
val64 = BX_READ_64BIT_REG(i->rm());
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
val64 = read_virtual_qword(i->seg(), eaddr);
}
Bit64u val64 = BX_READ_64BIT_REG(i->rm());
BX_WRITE_64BIT_REG(i->nnn(), bx_bswap64(val64));
}

View File

@ -1,5 +1,5 @@
/////////////////////////////////////////////////////////////////////////
// $Id: cpu.h,v 1.696 2010-12-25 07:59:15 sshwarts Exp $
// $Id: cpu.h,v 1.697 2010-12-25 17:04:35 sshwarts Exp $
/////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2001-2011 The Bochs Project
@ -2257,10 +2257,10 @@ public: // for now...
BX_SMF void PBLENDVB_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void BLENDVPS_VpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void BLENDVPD_VpdWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PTEST_VdqWdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PMULDQ_VdqWdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PCMPEQQ_VdqWdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PACKUSDW_VdqWdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PTEST_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PMULDQ_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PCMPEQQ_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PACKUSDW_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PMOVSXBW_VdqWq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PMOVSXBD_VdqWd(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PMOVSXBQ_VdqWw(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
@ -2273,16 +2273,16 @@ public: // for now...
BX_SMF void PMOVZXWD_VdqWq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PMOVZXWQ_VdqWd(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PMOVZXDQ_VdqWq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PMINSB_VdqWdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PMINSD_VdqWdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PMINUW_VdqWdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PMINUD_VdqWdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PMAXSB_VdqWdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PMAXSD_VdqWdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PMAXUW_VdqWdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PMAXUD_VdqWdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PMULLD_VdqWdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PHMINPOSUW_VdqWdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PMINSB_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PMINSD_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PMINUW_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PMINUD_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PMAXSB_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PMAXSD_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PMAXUW_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PMAXUD_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PMULLD_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PHMINPOSUW_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void ROUNDPS_VpsWpsIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void ROUNDPD_VpdWpdIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void ROUNDSS_VssWssIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
@ -2290,26 +2290,31 @@ public: // for now...
BX_SMF void BLENDPS_VpsWpsIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void BLENDPD_VpdWpdIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PBLENDW_VdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PEXTRB_EbdVdqIb(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PEXTRW_EwdVdqIb(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PEXTRD_EdVdqIb(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void EXTRACTPS_EdVpsIb(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PEXTRB_EbdVdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PEXTRB_EbdVdqIbM(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PEXTRW_EwdVdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PEXTRW_EwdVdqIbM(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PEXTRD_EdVdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PEXTRD_EdVdqIbM(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void EXTRACTPS_EdVpsIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void EXTRACTPS_EdVpsIbM(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PINSRB_VdqEbIb(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void INSERTPS_VpsWssIb(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PINSRD_VdqEdIb(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void DPPS_VpsWpsIb(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void DPPD_VpdWpdIb(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PINSRD_VdqEdIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PINSRD_VdqEdIbM(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void DPPS_VpsWpsIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void DPPD_VpdWpdIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void MPSADBW_VdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
/* SSE4.1 */
/* SSE4.2 */
BX_SMF void CRC32_GdEb(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void CRC32_GdEbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void CRC32_GdEw(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void CRC32_GdEd(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
#if BX_SUPPORT_X86_64
BX_SMF void CRC32_GdEq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
#endif
BX_SMF void PCMPGTQ_VdqWdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PCMPGTQ_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PCMPESTRM_VdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PCMPESTRI_VdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PCMPISTRM_VdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
@ -2317,14 +2322,15 @@ public: // for now...
/* SSE4.2 */
/* MOVBE Intel Atom(R) instruction */
BX_SMF void MOVBE_GwEw(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void MOVBE_GdEd(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void MOVBE_GwEwR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void MOVBE_GdEdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void MOVBE_EwGw(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void MOVBE_EdGd(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
#if BX_SUPPORT_X86_64
BX_SMF void MOVBE_GqEq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void MOVBE_GqEqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void MOVBE_EqGq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
#endif
/* MOVBE Intel Atom(R) instruction */
/* XSAVE/XRSTOR extensions */
BX_SMF void XSAVE(bxInstruction_c *) BX_CPP_AttrRegparmN(1);

View File

@ -1,5 +1,5 @@
/////////////////////////////////////////////////////////////////////////
// $Id: crc32.cc,v 1.6 2010-11-23 14:59:35 sshwarts Exp $
// $Id: crc32.cc,v 1.7 2010-12-25 17:04:36 sshwarts Exp $
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2008-2009 Stanislav Shwartsman
@ -70,18 +70,9 @@ static Bit32u mod2_64bit(Bit64u divisor, Bit64u dividend)
return (Bit32u) remainder;
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::CRC32_GdEb(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::CRC32_GdEbR(bxInstruction_c *i)
{
Bit8u op1;
if (i->modC0()) {
op1 = BX_READ_8BIT_REGx(i->rm(),i->extend8bitL());
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
op1 = read_virtual_byte(i->seg(), eaddr);
}
Bit8u op1 = BX_READ_8BIT_REGx(i->rm(), i->extend8bitL());
Bit32u op2 = BX_READ_32BIT_REG(i->nnn());
op2 = BitReflect32(op2);

View File

@ -1,5 +1,5 @@
/////////////////////////////////////////////////////////////////////////
// $Id: fetchdecode_sse.h,v 1.16 2010-12-25 07:59:15 sshwarts Exp $
// $Id: fetchdecode_sse.h,v 1.17 2010-12-25 17:04:36 sshwarts Exp $
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2005-2011 Stanislav Shwartsman
@ -980,17 +980,31 @@ static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f381e[3] = {
/* F2 */ { 0, BX_IA_ERROR }
};
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f38f0[3] = {
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f38f0R[3] = {
/* 66 */ { 0, BX_IA_ERROR },
/* F3 */ { 0, BX_IA_ERROR },
/* F2 */ { 0, BX_IA_CRC32_GdEb }
/* F2 */ { 0, BX_IA_CRC32_GdEbR }
};
static const BxOpcodeInfo_t BxOpcodeGroupOsize0f38f0[BX_SUPPORT_X86_64 + 2] = {
/* 16 */ { BxPrefixSSE, BX_IA_MOVBE_GwEw, BxOpcodeGroupSSE_0f38f0 },
/* 32 */ { BxPrefixSSE, BX_IA_MOVBE_GdEd, BxOpcodeGroupSSE_0f38f0 },
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f38f0M[3] = {
/* 66 */ { 0, BX_IA_ERROR },
/* F3 */ { 0, BX_IA_ERROR },
/* F2 */ { 0, BX_IA_CRC32_GdEbM }
};
static const BxOpcodeInfo_t BxOpcodeGroupOsize0f38f0R[BX_SUPPORT_X86_64 + 2] = {
/* 16 */ { BxPrefixSSE, BX_IA_MOVBE_GwEwR, BxOpcodeGroupSSE_0f38f0R },
/* 32 */ { BxPrefixSSE, BX_IA_MOVBE_GdEdR, BxOpcodeGroupSSE_0f38f0R },
#if BX_SUPPORT_X86_64
/* 64 */ { BxPrefixSSE, BX_IA_MOVBE_GqEq, BxOpcodeGroupSSE_0f38f0 },
/* 64 */ { BxPrefixSSE, BX_IA_MOVBE_GqEqR, BxOpcodeGroupSSE_0f38f0R },
#endif
};
static const BxOpcodeInfo_t BxOpcodeGroupOsize0f38f0M[BX_SUPPORT_X86_64 + 2] = {
/* 16 */ { BxPrefixSSE, BX_IA_MOVBE_GwEwM, BxOpcodeGroupSSE_0f38f0M },
/* 32 */ { BxPrefixSSE, BX_IA_MOVBE_GdEdM, BxOpcodeGroupSSE_0f38f0M },
#if BX_SUPPORT_X86_64
/* 64 */ { BxPrefixSSE, BX_IA_MOVBE_GqEqM, BxOpcodeGroupSSE_0f38f0M },
#endif
};
@ -1078,8 +1092,8 @@ static const BxOpcodeInfo_t BxOpcode3ByteTable0f38[256*2] = {
/* 15 /m */ { BxPrefixSSE66, BX_IA_BLENDVPD_VpdWpdM },
/* 16 /r */ { 0, BX_IA_ERROR },
/* 16 /m */ { 0, BX_IA_ERROR },
/* 17 /r */ { BxPrefixSSE66, BX_IA_PTEST_VdqWdq },
/* 17 /m */ { BxPrefixSSE66, BX_IA_PTEST_VdqWdq },
/* 17 /r */ { BxPrefixSSE66, BX_IA_PTEST_VdqWdqR },
/* 17 /m */ { BxPrefixSSE66, BX_IA_PTEST_VdqWdqM },
/* 18 /r */ { 0, BX_IA_ERROR },
/* 18 /m */ { 0, BX_IA_ERROR },
/* 19 /r */ { 0, BX_IA_ERROR },
@ -1112,14 +1126,14 @@ static const BxOpcodeInfo_t BxOpcode3ByteTable0f38[256*2] = {
/* 26 /m */ { 0, BX_IA_ERROR },
/* 27 /r */ { 0, BX_IA_ERROR },
/* 27 /m */ { 0, BX_IA_ERROR },
/* 28 /r */ { BxPrefixSSE66, BX_IA_PMULDQ_VdqWdq },
/* 28 /m */ { BxPrefixSSE66, BX_IA_PMULDQ_VdqWdq },
/* 29 /r */ { BxPrefixSSE66, BX_IA_PCMPEQQ_VdqWdq },
/* 29 /m */ { BxPrefixSSE66, BX_IA_PCMPEQQ_VdqWdq },
/* 28 /r */ { BxPrefixSSE66, BX_IA_PMULDQ_VdqWdqR },
/* 28 /m */ { BxPrefixSSE66, BX_IA_PMULDQ_VdqWdqM },
/* 29 /r */ { BxPrefixSSE66, BX_IA_PCMPEQQ_VdqWdqR },
/* 29 /m */ { BxPrefixSSE66, BX_IA_PCMPEQQ_VdqWdqM },
/* 2A /r */ { 0, BX_IA_ERROR },
/* 2A /m */ { BxPrefixSSE66, BX_IA_MOVNTDQA_VdqMdq },
/* 2B /r */ { BxPrefixSSE66, BX_IA_PACKUSDW_VdqWdq },
/* 2B /m */ { BxPrefixSSE66, BX_IA_PACKUSDW_VdqWdq },
/* 2B /r */ { BxPrefixSSE66, BX_IA_PACKUSDW_VdqWdqR },
/* 2B /m */ { BxPrefixSSE66, BX_IA_PACKUSDW_VdqWdqM },
/* 2C /r */ { 0, BX_IA_ERROR },
/* 2C /m */ { 0, BX_IA_ERROR },
/* 2D /r */ { 0, BX_IA_ERROR },
@ -1142,28 +1156,28 @@ static const BxOpcodeInfo_t BxOpcode3ByteTable0f38[256*2] = {
/* 35 /m */ { BxPrefixSSE66, BX_IA_PMOVZXDQ_VdqWq },
/* 36 /r */ { 0, BX_IA_ERROR },
/* 36 /m */ { 0, BX_IA_ERROR },
/* 37 /r */ { BxPrefixSSE66, BX_IA_PCMPGTQ_VdqWdq },
/* 37 /m */ { BxPrefixSSE66, BX_IA_PCMPGTQ_VdqWdq },
/* 38 /r */ { BxPrefixSSE66, BX_IA_PMINSB_VdqWdq },
/* 38 /m */ { BxPrefixSSE66, BX_IA_PMINSB_VdqWdq },
/* 39 /r */ { BxPrefixSSE66, BX_IA_PMINSD_VdqWdq },
/* 39 /m */ { BxPrefixSSE66, BX_IA_PMINSD_VdqWdq },
/* 3A /r */ { BxPrefixSSE66, BX_IA_PMINUW_VdqWdq },
/* 3A /m */ { BxPrefixSSE66, BX_IA_PMINUW_VdqWdq },
/* 3B /r */ { BxPrefixSSE66, BX_IA_PMINUD_VdqWdq },
/* 3B /m */ { BxPrefixSSE66, BX_IA_PMINUD_VdqWdq },
/* 3C /r */ { BxPrefixSSE66, BX_IA_PMAXSB_VdqWdq },
/* 3C /m */ { BxPrefixSSE66, BX_IA_PMAXSB_VdqWdq },
/* 3D /r */ { BxPrefixSSE66, BX_IA_PMAXSD_VdqWdq },
/* 3D /m */ { BxPrefixSSE66, BX_IA_PMAXSD_VdqWdq },
/* 3E /r */ { BxPrefixSSE66, BX_IA_PMAXUW_VdqWdq },
/* 3E /m */ { BxPrefixSSE66, BX_IA_PMAXUW_VdqWdq },
/* 3F /r */ { BxPrefixSSE66, BX_IA_PMAXUD_VdqWdq },
/* 3F /m */ { BxPrefixSSE66, BX_IA_PMAXUD_VdqWdq },
/* 40 /r */ { BxPrefixSSE66, BX_IA_PMULLD_VdqWdq },
/* 40 /m */ { BxPrefixSSE66, BX_IA_PMULLD_VdqWdq },
/* 41 /r */ { BxPrefixSSE66, BX_IA_PHMINPOSUW_VdqWdq },
/* 41 /m */ { BxPrefixSSE66, BX_IA_PHMINPOSUW_VdqWdq },
/* 37 /r */ { BxPrefixSSE66, BX_IA_PCMPGTQ_VdqWdqR },
/* 37 /m */ { BxPrefixSSE66, BX_IA_PCMPGTQ_VdqWdqM },
/* 38 /r */ { BxPrefixSSE66, BX_IA_PMINSB_VdqWdqR },
/* 38 /m */ { BxPrefixSSE66, BX_IA_PMINSB_VdqWdqM },
/* 39 /r */ { BxPrefixSSE66, BX_IA_PMINSD_VdqWdqR },
/* 39 /m */ { BxPrefixSSE66, BX_IA_PMINSD_VdqWdqM },
/* 3A /r */ { BxPrefixSSE66, BX_IA_PMINUW_VdqWdqR },
/* 3A /m */ { BxPrefixSSE66, BX_IA_PMINUW_VdqWdqM },
/* 3B /r */ { BxPrefixSSE66, BX_IA_PMINUD_VdqWdqR },
/* 3B /m */ { BxPrefixSSE66, BX_IA_PMINUD_VdqWdqM },
/* 3C /r */ { BxPrefixSSE66, BX_IA_PMAXSB_VdqWdqR },
/* 3C /m */ { BxPrefixSSE66, BX_IA_PMAXSB_VdqWdqM },
/* 3D /r */ { BxPrefixSSE66, BX_IA_PMAXSD_VdqWdqR },
/* 3D /m */ { BxPrefixSSE66, BX_IA_PMAXSD_VdqWdqM },
/* 3E /r */ { BxPrefixSSE66, BX_IA_PMAXUW_VdqWdqR },
/* 3E /m */ { BxPrefixSSE66, BX_IA_PMAXUW_VdqWdqM },
/* 3F /r */ { BxPrefixSSE66, BX_IA_PMAXUD_VdqWdqR },
/* 3F /m */ { BxPrefixSSE66, BX_IA_PMAXUD_VdqWdqM },
/* 40 /r */ { BxPrefixSSE66, BX_IA_PMULLD_VdqWdqR },
/* 40 /m */ { BxPrefixSSE66, BX_IA_PMULLD_VdqWdqM },
/* 41 /r */ { BxPrefixSSE66, BX_IA_PHMINPOSUW_VdqWdqR },
/* 41 /m */ { BxPrefixSSE66, BX_IA_PHMINPOSUW_VdqWdqM },
/* 42 /r */ { 0, BX_IA_ERROR },
/* 42 /m */ { 0, BX_IA_ERROR },
/* 43 /r */ { 0, BX_IA_ERROR },
@ -1512,8 +1526,8 @@ static const BxOpcodeInfo_t BxOpcode3ByteTable0f38[256*2] = {
/* EE /m */ { 0, BX_IA_ERROR },
/* EF /r */ { 0, BX_IA_ERROR },
/* EF /m */ { 0, BX_IA_ERROR },
/* F0 /r */ { BxOSizeGrp, BX_IA_ERROR, BxOpcodeGroupOsize0f38f0 },
/* F0 /m */ { BxOSizeGrp, BX_IA_ERROR, BxOpcodeGroupOsize0f38f0 },
/* F0 /r */ { BxOSizeGrp, BX_IA_ERROR, BxOpcodeGroupOsize0f38f0R },
/* F0 /m */ { BxOSizeGrp, BX_IA_ERROR, BxOpcodeGroupOsize0f38f0M },
/* F1 /r */ { BxOSizeGrp, BX_IA_ERROR, BxOpcodeGroupOsize0f38f1 },
/* F1 /m */ { BxOSizeGrp, BX_IA_ERROR, BxOpcodeGroupOsize0f38f1 },
/* F2 /r */ { 0, BX_IA_ERROR },
@ -1590,14 +1604,14 @@ static const BxOpcodeInfo_t BxOpcode3ByteTable0f3a[256*2] = {
/* 12 /m */ { 0, BX_IA_ERROR },
/* 13 /r */ { 0, BX_IA_ERROR },
/* 13 /m */ { 0, BX_IA_ERROR },
/* 14 /r */ { BxPrefixSSE66, BX_IA_PEXTRB_EbdVdqIb },
/* 14 /m */ { BxPrefixSSE66, BX_IA_PEXTRB_EbdVdqIb },
/* 15 /r */ { BxPrefixSSE66, BX_IA_PEXTRW_EwdVdqIb },
/* 15 /m */ { BxPrefixSSE66, BX_IA_PEXTRW_EwdVdqIb },
/* 16 /r */ { BxPrefixSSE66, BX_IA_PEXTRD_EdVdqIb },
/* 16 /m */ { BxPrefixSSE66, BX_IA_PEXTRD_EdVdqIb },
/* 17 /r */ { BxPrefixSSE66, BX_IA_EXTRACTPS_EdVpsIb },
/* 17 /m */ { BxPrefixSSE66, BX_IA_EXTRACTPS_EdVpsIb },
/* 14 /r */ { BxPrefixSSE66, BX_IA_PEXTRB_EbdVdqIbR },
/* 14 /m */ { BxPrefixSSE66, BX_IA_PEXTRB_EbdVdqIbM },
/* 15 /r */ { BxPrefixSSE66, BX_IA_PEXTRW_EwdVdqIbR },
/* 15 /m */ { BxPrefixSSE66, BX_IA_PEXTRW_EwdVdqIbM },
/* 16 /r */ { BxPrefixSSE66, BX_IA_PEXTRD_EdVdqIbR },
/* 16 /m */ { BxPrefixSSE66, BX_IA_PEXTRD_EdVdqIbM },
/* 17 /r */ { BxPrefixSSE66, BX_IA_EXTRACTPS_EdVpsIbR },
/* 17 /m */ { BxPrefixSSE66, BX_IA_EXTRACTPS_EdVpsIbM },
/* 18 /r */ { 0, BX_IA_ERROR },
/* 18 /m */ { 0, BX_IA_ERROR },
/* 19 /r */ { 0, BX_IA_ERROR },
@ -1618,8 +1632,8 @@ static const BxOpcodeInfo_t BxOpcode3ByteTable0f3a[256*2] = {
/* 20 /m */ { BxPrefixSSE66, BX_IA_PINSRB_VdqEbIb },
/* 21 /r */ { BxPrefixSSE66, BX_IA_INSERTPS_VpsWssIb },
/* 21 /m */ { BxPrefixSSE66, BX_IA_INSERTPS_VpsWssIb },
/* 22 /r */ { BxPrefixSSE66, BX_IA_PINSRD_VdqEdIb },
/* 22 /m */ { BxPrefixSSE66, BX_IA_PINSRD_VdqEdIb },
/* 22 /r */ { BxPrefixSSE66, BX_IA_PINSRD_VdqEdIbR },
/* 22 /m */ { BxPrefixSSE66, BX_IA_PINSRD_VdqEdIbM },
/* 23 /r */ { 0, BX_IA_ERROR },
/* 23 /m */ { 0, BX_IA_ERROR },
/* 24 /r */ { 0, BX_IA_ERROR },
@ -1678,10 +1692,10 @@ static const BxOpcodeInfo_t BxOpcode3ByteTable0f3a[256*2] = {
/* 3E /m */ { 0, BX_IA_ERROR },
/* 3F /r */ { 0, BX_IA_ERROR },
/* 3F /m */ { 0, BX_IA_ERROR },
/* 40 /r */ { BxPrefixSSE66, BX_IA_DPPS_VpsWpsIb },
/* 40 /m */ { BxPrefixSSE66, BX_IA_DPPS_VpsWpsIb },
/* 41 /r */ { BxPrefixSSE66, BX_IA_DPPD_VpdWpdIb },
/* 41 /m */ { BxPrefixSSE66, BX_IA_DPPD_VpdWpdIb },
/* 40 /r */ { BxPrefixSSE66, BX_IA_DPPS_VpsWpsIbR },
/* 40 /m */ { BxPrefixSSE66, BX_IA_DPPS_VpsWpsIbM },
/* 41 /r */ { BxPrefixSSE66, BX_IA_DPPD_VpdWpdIbR },
/* 41 /m */ { BxPrefixSSE66, BX_IA_DPPD_VpdWpdIbM },
/* 42 /r */ { BxPrefixSSE66, BX_IA_MPSADBW_VdqWdqIbR },
/* 42 /m */ { BxPrefixSSE66, BX_IA_MPSADBW_VdqWdqIbM },
/* 43 /r */ { 0, BX_IA_ERROR },

View File

@ -1,5 +1,5 @@
/////////////////////////////////////////////////////////////////////////
// $Id: ia_opcodes.h,v 1.55 2010-12-25 07:59:15 sshwarts Exp $
// $Id: ia_opcodes.h,v 1.56 2010-12-25 17:04:36 sshwarts Exp $
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2008-2011 Stanislav Shwartsman
@ -1315,10 +1315,14 @@ bx_define_opcode(BX_IA_BLENDVPS_VpsWpsR, &BX_CPU_C::BLENDVPS_VpsWpsR, NULL, BX_C
bx_define_opcode(BX_IA_BLENDVPS_VpsWpsM, &BX_CPU_C::LOAD_Vdq, &BX_CPU_C::BLENDVPS_VpsWpsR, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_BLENDVPD_VpdWpdR, &BX_CPU_C::BLENDVPD_VpdWpdR, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_BLENDVPD_VpdWpdM, &BX_CPU_C::LOAD_Vdq, &BX_CPU_C::BLENDVPD_VpdWpdR, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PTEST_VdqWdq, &BX_CPU_C::PTEST_VdqWdq, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMULDQ_VdqWdq, &BX_CPU_C::PMULDQ_VdqWdq, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PCMPEQQ_VdqWdq, &BX_CPU_C::PCMPEQQ_VdqWdq, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PACKUSDW_VdqWdq, &BX_CPU_C::PACKUSDW_VdqWdq, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PTEST_VdqWdqR, &BX_CPU_C::PTEST_VdqWdqR, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PTEST_VdqWdqM, &BX_CPU_C::LOAD_Vdq, &BX_CPU_C::PTEST_VdqWdqR, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMULDQ_VdqWdqR, &BX_CPU_C::PMULDQ_VdqWdqR, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMULDQ_VdqWdqM, &BX_CPU_C::LOAD_Vdq, &BX_CPU_C::PMULDQ_VdqWdqR, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PCMPEQQ_VdqWdqR, &BX_CPU_C::PCMPEQQ_VdqWdqR, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PCMPEQQ_VdqWdqM, &BX_CPU_C::LOAD_Vdq, &BX_CPU_C::PCMPEQQ_VdqWdqR, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PACKUSDW_VdqWdqR, &BX_CPU_C::PACKUSDW_VdqWdqR, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PACKUSDW_VdqWdqM, &BX_CPU_C::LOAD_Vdq, &BX_CPU_C::PACKUSDW_VdqWdqR, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMOVSXBW_VdqWq, &BX_CPU_C::PMOVSXBW_VdqWq, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMOVSXBD_VdqWd, &BX_CPU_C::PMOVSXBD_VdqWd, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMOVSXBQ_VdqWw, &BX_CPU_C::PMOVSXBQ_VdqWw, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
@ -1331,16 +1335,26 @@ bx_define_opcode(BX_IA_PMOVZXBQ_VdqWw, &BX_CPU_C::PMOVZXBQ_VdqWw, NULL, BX_CPU_S
bx_define_opcode(BX_IA_PMOVZXWD_VdqWq, &BX_CPU_C::PMOVZXWD_VdqWq, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMOVZXWQ_VdqWd, &BX_CPU_C::PMOVZXWQ_VdqWd, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMOVZXDQ_VdqWq, &BX_CPU_C::PMOVZXDQ_VdqWq, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMINSB_VdqWdq, &BX_CPU_C::PMINSB_VdqWdq, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMINSD_VdqWdq, &BX_CPU_C::PMINSD_VdqWdq, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMINUW_VdqWdq, &BX_CPU_C::PMINUW_VdqWdq, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMINUD_VdqWdq, &BX_CPU_C::PMINUD_VdqWdq, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMAXSB_VdqWdq, &BX_CPU_C::PMAXSB_VdqWdq, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMAXSD_VdqWdq, &BX_CPU_C::PMAXSD_VdqWdq, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMAXUW_VdqWdq, &BX_CPU_C::PMAXUW_VdqWdq, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMAXUD_VdqWdq, &BX_CPU_C::PMAXUD_VdqWdq, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMULLD_VdqWdq, &BX_CPU_C::PMULLD_VdqWdq, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PHMINPOSUW_VdqWdq, &BX_CPU_C::PHMINPOSUW_VdqWdq, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMINSB_VdqWdqR, &BX_CPU_C::PMINSB_VdqWdqR, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMINSB_VdqWdqM, &BX_CPU_C::LOAD_Vdq, &BX_CPU_C::PMINSB_VdqWdqR, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMINSD_VdqWdqR, &BX_CPU_C::PMINSD_VdqWdqR, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMINSD_VdqWdqM, &BX_CPU_C::LOAD_Vdq, &BX_CPU_C::PMINSD_VdqWdqR, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMINUW_VdqWdqR, &BX_CPU_C::PMINUW_VdqWdqR, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMINUW_VdqWdqM, &BX_CPU_C::LOAD_Vdq, &BX_CPU_C::PMINUW_VdqWdqR, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMINUD_VdqWdqR, &BX_CPU_C::PMINUD_VdqWdqR, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMINUD_VdqWdqM, &BX_CPU_C::LOAD_Vdq, &BX_CPU_C::PMINUD_VdqWdqR, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMAXSB_VdqWdqR, &BX_CPU_C::PMAXSB_VdqWdqR, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMAXSB_VdqWdqM, &BX_CPU_C::LOAD_Vdq, &BX_CPU_C::PMAXSB_VdqWdqR, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMAXSD_VdqWdqR, &BX_CPU_C::PMAXSD_VdqWdqR, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMAXSD_VdqWdqM, &BX_CPU_C::LOAD_Vdq, &BX_CPU_C::PMAXSD_VdqWdqR, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMAXUW_VdqWdqR, &BX_CPU_C::PMAXUW_VdqWdqR, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMAXUW_VdqWdqM, &BX_CPU_C::LOAD_Vdq, &BX_CPU_C::PMAXUW_VdqWdqR, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMAXUD_VdqWdqR, &BX_CPU_C::PMAXUD_VdqWdqR, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMAXUD_VdqWdqM, &BX_CPU_C::LOAD_Vdq, &BX_CPU_C::PMAXUD_VdqWdqR, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMULLD_VdqWdqR, &BX_CPU_C::PMULLD_VdqWdqR, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PMULLD_VdqWdqM, &BX_CPU_C::LOAD_Vdq, &BX_CPU_C::PMULLD_VdqWdqR, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PHMINPOSUW_VdqWdqR, &BX_CPU_C::PHMINPOSUW_VdqWdqR, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PHMINPOSUW_VdqWdqM, &BX_CPU_C::LOAD_Vdq, &BX_CPU_C::PHMINPOSUW_VdqWdqR, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_ROUNDPS_VpsWpsIbR, &BX_CPU_C::ROUNDPS_VpsWpsIbR, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_ROUNDPS_VpsWpsIbM, &BX_CPU_C::LOAD_Vdq, &BX_CPU_C::ROUNDPS_VpsWpsIbR, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_ROUNDPD_VpdWpdIbR, &BX_CPU_C::ROUNDPD_VpdWpdIbR, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
@ -1355,28 +1369,37 @@ bx_define_opcode(BX_IA_BLENDPD_VpdWpdIbR, &BX_CPU_C::BLENDPD_VpdWpdIbR, NULL, BX
bx_define_opcode(BX_IA_BLENDPD_VpdWpdIbM, &BX_CPU_C::LOAD_Vdq, &BX_CPU_C::BLENDPD_VpdWpdIbR, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PBLENDW_VdqWdqIbR, &BX_CPU_C::PBLENDW_VdqWdqIbR, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PBLENDW_VdqWdqIbM, &BX_CPU_C::LOAD_Vdq, &BX_CPU_C::PBLENDW_VdqWdqIbR, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PEXTRB_EbdVdqIb, &BX_CPU_C::PEXTRB_EbdVdqIb, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PEXTRW_EwdVdqIb, &BX_CPU_C::PEXTRW_EwdVdqIb, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PEXTRD_EdVdqIb, &BX_CPU_C::PEXTRD_EdVdqIb, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_EXTRACTPS_EdVpsIb, &BX_CPU_C::EXTRACTPS_EdVpsIb, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PEXTRB_EbdVdqIbR, &BX_CPU_C::PEXTRB_EbdVdqIbR, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PEXTRB_EbdVdqIbM, &BX_CPU_C::PEXTRB_EbdVdqIbM, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PEXTRW_EwdVdqIbR, &BX_CPU_C::PEXTRW_EwdVdqIbR, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PEXTRW_EwdVdqIbM, &BX_CPU_C::PEXTRW_EwdVdqIbM, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PEXTRD_EdVdqIbR, &BX_CPU_C::PEXTRD_EdVdqIbR, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PEXTRD_EdVdqIbM, &BX_CPU_C::PEXTRD_EdVdqIbM, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_EXTRACTPS_EdVpsIbR, &BX_CPU_C::EXTRACTPS_EdVpsIbR, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_EXTRACTPS_EdVpsIbM, &BX_CPU_C::EXTRACTPS_EdVpsIbM, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PINSRB_VdqEbIb, &BX_CPU_C::PINSRB_VdqEbIb, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_INSERTPS_VpsWssIb, &BX_CPU_C::INSERTPS_VpsWssIb, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PINSRD_VdqEdIb, &BX_CPU_C::PINSRD_VdqEdIb, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_DPPS_VpsWpsIb, &BX_CPU_C::DPPS_VpsWpsIb, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_DPPD_VpdWpdIb, &BX_CPU_C::DPPD_VpdWpdIb, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PINSRD_VdqEdIbR, &BX_CPU_C::PINSRD_VdqEdIbR, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PINSRD_VdqEdIbM, &BX_CPU_C::PINSRD_VdqEdIbM, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_DPPS_VpsWpsIbR, &BX_CPU_C::DPPS_VpsWpsIbR, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_DPPS_VpsWpsIbM, &BX_CPU_C::LOAD_Vdq, &BX_CPU_C::DPPS_VpsWpsIbR, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_DPPD_VpdWpdIbR, &BX_CPU_C::DPPD_VpdWpdIbR, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_DPPD_VpdWpdIbM, &BX_CPU_C::LOAD_Vdq, &BX_CPU_C::DPPD_VpdWpdIbR, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_MPSADBW_VdqWdqIbR, &BX_CPU_C::MPSADBW_VdqWdqIbR, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_MPSADBW_VdqWdqIbM, &BX_CPU_C::LOAD_Vdq, &BX_CPU_C::MPSADBW_VdqWdqIbR, BX_CPU_SSE4_1, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_MOVNTDQA_VdqMdq, &BX_CPU_C::MOVAPS_VpsWpsM, NULL, BX_CPU_SSE4_1, BX_PREPARE_SSE)
// SSE4.1
// SSE4.2
bx_define_opcode(BX_IA_CRC32_GdEb, &BX_CPU_C::CRC32_GdEb, NULL, BX_CPU_SSE4_2, 0)
bx_define_opcode(BX_IA_CRC32_GdEbR, &BX_CPU_C::CRC32_GdEbR, NULL, BX_CPU_SSE4_2, 0)
bx_define_opcode(BX_IA_CRC32_GdEbM, &BX_CPU_C::LOAD_Eb, &BX_CPU_C::CRC32_GdEbR, BX_CPU_SSE4_2, 0)
bx_define_opcode(BX_IA_CRC32_GdEw, &BX_CPU_C::CRC32_GdEw, NULL, BX_CPU_SSE4_2, 0)
bx_define_opcode(BX_IA_CRC32_GdEd, &BX_CPU_C::CRC32_GdEd, NULL, BX_CPU_SSE4_2, 0)
#if BX_SUPPORT_X86_64
bx_define_opcode(BX_IA_CRC32_GdEq, &BX_CPU_C::CRC32_GdEq, NULL, BX_CPU_SSE4_2 | BX_CPU_X86_64, 0)
#endif
bx_define_opcode(BX_IA_PCMPGTQ_VdqWdq, &BX_CPU_C::PCMPGTQ_VdqWdq, NULL, BX_CPU_SSE4_2, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PCMPGTQ_VdqWdqR, &BX_CPU_C::PCMPGTQ_VdqWdqR, NULL, BX_CPU_SSE4_2, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PCMPGTQ_VdqWdqM, &BX_CPU_C::LOAD_Vdq, &BX_CPU_C::PCMPGTQ_VdqWdqR, BX_CPU_SSE4_2, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PCMPESTRM_VdqWdqIbR, &BX_CPU_C::PCMPESTRM_VdqWdqIbR, NULL, BX_CPU_SSE4_2, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PCMPESTRM_VdqWdqIbM, &BX_CPU_C::LOADU_Vdq, &BX_CPU_C::PCMPESTRM_VdqWdqIbR, BX_CPU_SSE4_2, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PCMPESTRI_VdqWdqIbR, &BX_CPU_C::PCMPESTRI_VdqWdqIbR, NULL, BX_CPU_SSE4_2, BX_PREPARE_SSE)
@ -1388,12 +1411,17 @@ bx_define_opcode(BX_IA_PCMPISTRI_VdqWdqIbM, &BX_CPU_C::LOADU_Vdq, &BX_CPU_C::PCM
// SSE4.2
// MOVBE instruction
bx_define_opcode(BX_IA_MOVBE_GwEw, &BX_CPU_C::MOVBE_GwEw, NULL, BX_CPU_MOVBE, 0)
bx_define_opcode(BX_IA_MOVBE_GdEd, &BX_CPU_C::MOVBE_GdEd, NULL, BX_CPU_MOVBE, 0)
bx_define_opcode(BX_IA_MOVBE_GwEwR, &BX_CPU_C::MOVBE_GwEwR, NULL, BX_CPU_MOVBE, 0)
bx_define_opcode(BX_IA_MOVBE_GwEwM, &BX_CPU_C::LOAD_Ew, &BX_CPU_C::MOVBE_GwEwR, BX_CPU_MOVBE, 0)
bx_define_opcode(BX_IA_MOVBE_GdEdR, &BX_CPU_C::MOVBE_GdEdR, NULL, BX_CPU_MOVBE, 0)
bx_define_opcode(BX_IA_MOVBE_GdEdM, &BX_CPU_C::LOAD_Ed, &BX_CPU_C::MOVBE_GdEdR, BX_CPU_MOVBE, 0)
#if BX_SUPPORT_X86_64
bx_define_opcode(BX_IA_MOVBE_GqEqR, &BX_CPU_C::MOVBE_GqEqR, NULL, BX_CPU_MOVBE | BX_CPU_X86_64, 0)
bx_define_opcode(BX_IA_MOVBE_GqEqM, &BX_CPU_C::LOAD_Eq, &BX_CPU_C::MOVBE_GqEqR, BX_CPU_MOVBE | BX_CPU_X86_64, 0)
#endif
bx_define_opcode(BX_IA_MOVBE_EwGw, &BX_CPU_C::MOVBE_EwGw, NULL, BX_CPU_MOVBE, 0)
bx_define_opcode(BX_IA_MOVBE_EdGd, &BX_CPU_C::MOVBE_EdGd, NULL, BX_CPU_MOVBE, 0)
#if BX_SUPPORT_X86_64
bx_define_opcode(BX_IA_MOVBE_GqEq, &BX_CPU_C::MOVBE_GqEq, NULL, BX_CPU_MOVBE | BX_CPU_X86_64, 0)
bx_define_opcode(BX_IA_MOVBE_EqGq, &BX_CPU_C::MOVBE_EqGq, NULL, BX_CPU_MOVBE | BX_CPU_X86_64, 0)
#endif
// MOVBE instruction

View File

@ -1,5 +1,5 @@
/////////////////////////////////////////////////////////////////////////
// $Id: mmx.cc,v 1.96 2010-09-26 20:20:27 sshwarts Exp $
// $Id: mmx.cc,v 1.97 2010-12-25 17:04:36 sshwarts Exp $
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2002-2009 Stanislav Shwartsman
@ -26,6 +26,11 @@
#include "cpu.h"
#define LOG_THIS BX_CPU_THIS_PTR
// Make code more tidy with a few macros.
#if BX_SUPPORT_X86_64==0
#define RDI EDI
#endif
#if BX_CPU_LEVEL >= 5
void BX_CPU_C::print_state_MMX(void)
@ -112,7 +117,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHADDD_PqQq(bxInstruction_c *i)
#if BX_CPU_LEVEL >= 5
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
@ -126,10 +131,10 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHADDD_PqQq(bxInstruction_c *i)
BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX transition */
MMXUD0(result) = MMXUD0(op1) + MMXUD1(op1);
MMXUD1(result) = MMXUD0(op2) + MMXUD1(op2);
MMXUD0(op1) = MMXUD0(op1) + MMXUD1(op1);
MMXUD1(op1) = MMXUD0(op2) + MMXUD1(op2);
BX_WRITE_MMX_REG(i->nnn(), result);
BX_WRITE_MMX_REG(i->nnn(), op1);
#endif
}
@ -377,7 +382,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMULHRSW_PqQq(bxInstruction_c *i)
#if BX_CPU_LEVEL >= 5
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
@ -391,18 +396,13 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMULHRSW_PqQq(bxInstruction_c *i)
BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX transition */
for(unsigned j=0; j<4; j++) {
Bit32s temp = Bit32s(op1.mmx16s(j)) * Bit32s(op2.mmx16s(j));
result.mmx16u(j) = ((temp >> 14) + 1) >> 1;
}
MMXUW0(result) = (((MMXSW0(op1) * MMXSW0(op2)) >> 14) + 1) >> 1;
MMXUW1(result) = (((MMXSW1(op1) * MMXSW1(op2)) >> 14) + 1) >> 1;
MMXUW2(result) = (((MMXSW2(op1) * MMXSW2(op2)) >> 14) + 1) >> 1;
MMXUW3(result) = (((MMXSW3(op1) * MMXSW3(op2)) >> 14) + 1) >> 1;
MMXUW0(op1) = (((MMXSW0(op1) * MMXSW0(op2)) >> 14) + 1) >> 1;
MMXUW1(op1) = (((MMXSW1(op1) * MMXSW1(op2)) >> 14) + 1) >> 1;
MMXUW2(op1) = (((MMXSW2(op1) * MMXSW2(op2)) >> 14) + 1) >> 1;
MMXUW3(op1) = (((MMXSW3(op1) * MMXSW3(op2)) >> 14) + 1) >> 1;
/* now write result back to destination */
BX_WRITE_MMX_REG(i->nnn(), result);
BX_WRITE_MMX_REG(i->nnn(), op1);
#endif
}
@ -501,7 +501,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PALIGNR_PqQqIb(bxInstruction_c *i)
#if BX_CPU_LEVEL >= 5
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
@ -518,16 +518,16 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PALIGNR_PqQqIb(bxInstruction_c *i)
unsigned shift = i->Ib() * 8;
if(shift == 0)
MMXUQ(result) = MMXUQ(op2);
MMXUQ(op1) = MMXUQ(op2);
else if(shift < 64)
MMXUQ(result) = (MMXUQ(op2) >> shift) | (MMXUQ(op1) << (64-shift));
MMXUQ(op1) = (MMXUQ(op2) >> shift) | (MMXUQ(op1) << (64-shift));
else if(shift < 128)
MMXUQ(result) = MMXUQ(op1) >> (shift-64);
MMXUQ(op1) = MMXUQ(op1) >> (shift-64);
else
MMXUQ(result) = 0;
MMXUQ(op1) = 0;
/* now write result back to destination */
BX_WRITE_MMX_REG(i->nnn(), result);
BX_WRITE_MMX_REG(i->nnn(), op1);
#endif
}
@ -912,11 +912,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::MOVD_PqEdR(bxInstruction_c *i)
BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX transition */
BxPackedMmxRegister op;
MMXUD0(op) = BX_READ_32BIT_REG(i->rm());
MMXUD1(op) = 0;
/* now write result back to destination */
MMXUQ(op) = (Bit64u) BX_READ_32BIT_REG(i->rm());
BX_WRITE_MMX_REG(i->nnn(), op);
#endif
}
@ -929,8 +925,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::MOVD_PqEdM(bxInstruction_c *i)
BxPackedMmxRegister op;
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
MMXUD0(op) = read_virtual_dword(i->seg(), eaddr);
MMXUD1(op) = 0;
MMXUQ(op) = (Bit64u) read_virtual_dword(i->seg(), eaddr);
BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX transition */
@ -947,7 +942,6 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::MOVQ_PqEqR(bxInstruction_c *i)
BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX transition */
BxPackedMmxRegister op;
MMXUQ(op) = BX_READ_64BIT_REG(i->rm());
/* now write result back to destination */
@ -1709,7 +1703,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRAW_PqQq(bxInstruction_c *i)
#if BX_CPU_LEVEL >= 5
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
@ -1726,27 +1720,22 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRAW_PqQq(bxInstruction_c *i)
if(!MMXUQ(op2)) return;
if(MMXUQ(op2) > 15) {
MMXUW0(result) = (MMXUW0(op1) & 0x8000) ? 0xffff : 0;
MMXUW1(result) = (MMXUW1(op1) & 0x8000) ? 0xffff : 0;
MMXUW2(result) = (MMXUW2(op1) & 0x8000) ? 0xffff : 0;
MMXUW3(result) = (MMXUW3(op1) & 0x8000) ? 0xffff : 0;
MMXUW0(op1) = (MMXUW0(op1) & 0x8000) ? 0xffff : 0;
MMXUW1(op1) = (MMXUW1(op1) & 0x8000) ? 0xffff : 0;
MMXUW2(op1) = (MMXUW2(op1) & 0x8000) ? 0xffff : 0;
MMXUW3(op1) = (MMXUW3(op1) & 0x8000) ? 0xffff : 0;
}
else {
Bit8u shift = MMXUB0(op2);
MMXUW0(result) = MMXUW0(op1) >> shift;
MMXUW1(result) = MMXUW1(op1) >> shift;
MMXUW2(result) = MMXUW2(op1) >> shift;
MMXUW3(result) = MMXUW3(op1) >> shift;
if(MMXUW0(op1) & 0x8000) MMXUW0(result) |= (0xffff << (16 - shift));
if(MMXUW1(op1) & 0x8000) MMXUW1(result) |= (0xffff << (16 - shift));
if(MMXUW2(op1) & 0x8000) MMXUW2(result) |= (0xffff << (16 - shift));
if(MMXUW3(op1) & 0x8000) MMXUW3(result) |= (0xffff << (16 - shift));
MMXUW0(op1) = (Bit16u)(MMXSW0(op1) >> shift);
MMXUW1(op1) = (Bit16u)(MMXSW1(op1) >> shift);
MMXUW2(op1) = (Bit16u)(MMXSW2(op1) >> shift);
MMXUW3(op1) = (Bit16u)(MMXSW3(op1) >> shift);
}
/* now write result back to destination */
BX_WRITE_MMX_REG(i->nnn(), result);
BX_WRITE_MMX_REG(i->nnn(), op1);
#endif
}
@ -1756,7 +1745,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRAD_PqQq(bxInstruction_c *i)
#if BX_CPU_LEVEL >= 5
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
@ -1773,24 +1762,18 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRAD_PqQq(bxInstruction_c *i)
if(!MMXUQ(op2)) return;
if(MMXUQ(op2) > 31) {
MMXUD0(result) = (MMXUD0(op1) & 0x80000000) ? 0xffffffff : 0;
MMXUD1(result) = (MMXUD1(op1) & 0x80000000) ? 0xffffffff : 0;
MMXUD0(op1) = (MMXUD0(op1) & 0x80000000) ? 0xffffffff : 0;
MMXUD1(op1) = (MMXUD1(op1) & 0x80000000) ? 0xffffffff : 0;
}
else {
Bit8u shift = MMXUB0(op2);
MMXUD0(result) = MMXUD0(op1) >> shift;
MMXUD1(result) = MMXUD1(op1) >> shift;
if(MMXUD0(op1) & 0x80000000)
MMXUD0(result) |= (0xffffffff << (32 - shift));
if(MMXUD1(op1) & 0x80000000)
MMXUD1(result) |= (0xffffffff << (32 - shift));
MMXUD0(op1) = (Bit32u)(MMXSD0(op1) >> shift);
MMXUD1(op1) = (Bit32u)(MMXSD1(op1) >> shift);
}
/* now write result back to destination */
BX_WRITE_MMX_REG(i->nnn(), result);
BX_WRITE_MMX_REG(i->nnn(), op1);
#endif
}
@ -1915,7 +1898,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSUBSB_PqQq(bxInstruction_c *i)
#if BX_CPU_LEVEL >= 5
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
@ -1929,17 +1912,17 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSUBSB_PqQq(bxInstruction_c *i)
BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX transition */
MMXSB0(result) = SaturateWordSToByteS(Bit16s(MMXSB0(op1)) - Bit16s(MMXSB0(op2)));
MMXSB1(result) = SaturateWordSToByteS(Bit16s(MMXSB1(op1)) - Bit16s(MMXSB1(op2)));
MMXSB2(result) = SaturateWordSToByteS(Bit16s(MMXSB2(op1)) - Bit16s(MMXSB2(op2)));
MMXSB3(result) = SaturateWordSToByteS(Bit16s(MMXSB3(op1)) - Bit16s(MMXSB3(op2)));
MMXSB4(result) = SaturateWordSToByteS(Bit16s(MMXSB4(op1)) - Bit16s(MMXSB4(op2)));
MMXSB5(result) = SaturateWordSToByteS(Bit16s(MMXSB5(op1)) - Bit16s(MMXSB5(op2)));
MMXSB6(result) = SaturateWordSToByteS(Bit16s(MMXSB6(op1)) - Bit16s(MMXSB6(op2)));
MMXSB7(result) = SaturateWordSToByteS(Bit16s(MMXSB7(op1)) - Bit16s(MMXSB7(op2)));
MMXSB0(op1) = SaturateWordSToByteS(Bit16s(MMXSB0(op1)) - Bit16s(MMXSB0(op2)));
MMXSB1(op1) = SaturateWordSToByteS(Bit16s(MMXSB1(op1)) - Bit16s(MMXSB1(op2)));
MMXSB2(op1) = SaturateWordSToByteS(Bit16s(MMXSB2(op1)) - Bit16s(MMXSB2(op2)));
MMXSB3(op1) = SaturateWordSToByteS(Bit16s(MMXSB3(op1)) - Bit16s(MMXSB3(op2)));
MMXSB4(op1) = SaturateWordSToByteS(Bit16s(MMXSB4(op1)) - Bit16s(MMXSB4(op2)));
MMXSB5(op1) = SaturateWordSToByteS(Bit16s(MMXSB5(op1)) - Bit16s(MMXSB5(op2)));
MMXSB6(op1) = SaturateWordSToByteS(Bit16s(MMXSB6(op1)) - Bit16s(MMXSB6(op2)));
MMXSB7(op1) = SaturateWordSToByteS(Bit16s(MMXSB7(op1)) - Bit16s(MMXSB7(op2)));
/* now write result back to destination */
BX_WRITE_MMX_REG(i->nnn(), result);
BX_WRITE_MMX_REG(i->nnn(), op1);
#endif
}
@ -1949,7 +1932,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSUBSW_PqQq(bxInstruction_c *i)
#if BX_CPU_LEVEL >= 5
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
@ -1963,13 +1946,13 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSUBSW_PqQq(bxInstruction_c *i)
BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX transition */
MMXSW0(result) = SaturateDwordSToWordS(Bit32s(MMXSW0(op1)) - Bit32s(MMXSW0(op2)));
MMXSW1(result) = SaturateDwordSToWordS(Bit32s(MMXSW1(op1)) - Bit32s(MMXSW1(op2)));
MMXSW2(result) = SaturateDwordSToWordS(Bit32s(MMXSW2(op1)) - Bit32s(MMXSW2(op2)));
MMXSW3(result) = SaturateDwordSToWordS(Bit32s(MMXSW3(op1)) - Bit32s(MMXSW3(op2)));
MMXSW0(op1) = SaturateDwordSToWordS(Bit32s(MMXSW0(op1)) - Bit32s(MMXSW0(op2)));
MMXSW1(op1) = SaturateDwordSToWordS(Bit32s(MMXSW1(op1)) - Bit32s(MMXSW1(op2)));
MMXSW2(op1) = SaturateDwordSToWordS(Bit32s(MMXSW2(op1)) - Bit32s(MMXSW2(op2)));
MMXSW3(op1) = SaturateDwordSToWordS(Bit32s(MMXSW3(op1)) - Bit32s(MMXSW3(op2)));
/* now write result back to destination */
BX_WRITE_MMX_REG(i->nnn(), result);
BX_WRITE_MMX_REG(i->nnn(), op1);
#endif
}
@ -2036,7 +2019,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PADDSB_PqQq(bxInstruction_c *i)
#if BX_CPU_LEVEL >= 5
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
@ -2050,17 +2033,17 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PADDSB_PqQq(bxInstruction_c *i)
BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX transition */
MMXSB0(result) = SaturateWordSToByteS(Bit16s(MMXSB0(op1)) + Bit16s(MMXSB0(op2)));
MMXSB1(result) = SaturateWordSToByteS(Bit16s(MMXSB1(op1)) + Bit16s(MMXSB1(op2)));
MMXSB2(result) = SaturateWordSToByteS(Bit16s(MMXSB2(op1)) + Bit16s(MMXSB2(op2)));
MMXSB3(result) = SaturateWordSToByteS(Bit16s(MMXSB3(op1)) + Bit16s(MMXSB3(op2)));
MMXSB4(result) = SaturateWordSToByteS(Bit16s(MMXSB4(op1)) + Bit16s(MMXSB4(op2)));
MMXSB5(result) = SaturateWordSToByteS(Bit16s(MMXSB5(op1)) + Bit16s(MMXSB5(op2)));
MMXSB6(result) = SaturateWordSToByteS(Bit16s(MMXSB6(op1)) + Bit16s(MMXSB6(op2)));
MMXSB7(result) = SaturateWordSToByteS(Bit16s(MMXSB7(op1)) + Bit16s(MMXSB7(op2)));
MMXSB0(op1) = SaturateWordSToByteS(Bit16s(MMXSB0(op1)) + Bit16s(MMXSB0(op2)));
MMXSB1(op1) = SaturateWordSToByteS(Bit16s(MMXSB1(op1)) + Bit16s(MMXSB1(op2)));
MMXSB2(op1) = SaturateWordSToByteS(Bit16s(MMXSB2(op1)) + Bit16s(MMXSB2(op2)));
MMXSB3(op1) = SaturateWordSToByteS(Bit16s(MMXSB3(op1)) + Bit16s(MMXSB3(op2)));
MMXSB4(op1) = SaturateWordSToByteS(Bit16s(MMXSB4(op1)) + Bit16s(MMXSB4(op2)));
MMXSB5(op1) = SaturateWordSToByteS(Bit16s(MMXSB5(op1)) + Bit16s(MMXSB5(op2)));
MMXSB6(op1) = SaturateWordSToByteS(Bit16s(MMXSB6(op1)) + Bit16s(MMXSB6(op2)));
MMXSB7(op1) = SaturateWordSToByteS(Bit16s(MMXSB7(op1)) + Bit16s(MMXSB7(op2)));
/* now write result back to destination */
BX_WRITE_MMX_REG(i->nnn(), result);
BX_WRITE_MMX_REG(i->nnn(), op1);
#endif
}
@ -2070,7 +2053,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PADDSW_PqQq(bxInstruction_c *i)
#if BX_CPU_LEVEL >= 5
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
@ -2084,13 +2067,13 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PADDSW_PqQq(bxInstruction_c *i)
BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX transition */
MMXSW0(result) = SaturateDwordSToWordS(Bit32s(MMXSW0(op1)) + Bit32s(MMXSW0(op2)));
MMXSW1(result) = SaturateDwordSToWordS(Bit32s(MMXSW1(op1)) + Bit32s(MMXSW1(op2)));
MMXSW2(result) = SaturateDwordSToWordS(Bit32s(MMXSW2(op1)) + Bit32s(MMXSW2(op2)));
MMXSW3(result) = SaturateDwordSToWordS(Bit32s(MMXSW3(op1)) + Bit32s(MMXSW3(op2)));
MMXSW0(op1) = SaturateDwordSToWordS(Bit32s(MMXSW0(op1)) + Bit32s(MMXSW0(op2)));
MMXSW1(op1) = SaturateDwordSToWordS(Bit32s(MMXSW1(op1)) + Bit32s(MMXSW1(op2)));
MMXSW2(op1) = SaturateDwordSToWordS(Bit32s(MMXSW2(op1)) + Bit32s(MMXSW2(op2)));
MMXSW3(op1) = SaturateDwordSToWordS(Bit32s(MMXSW3(op1)) + Bit32s(MMXSW3(op2)));
/* now write result back to destination */
BX_WRITE_MMX_REG(i->nnn(), result);
BX_WRITE_MMX_REG(i->nnn(), op1);
#endif
}
@ -2259,7 +2242,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMULUDQ_PqQq(bxInstruction_c *i)
#if BX_CPU_LEVEL >= 5
BX_CPU_THIS_PTR prepareMMX();
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
@ -2273,10 +2256,10 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMULUDQ_PqQq(bxInstruction_c *i)
BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX transition */
MMXUQ(result) = Bit64u(MMXUD0(op1)) * Bit64u(MMXUD0(op2));
MMXUQ(op1) = Bit64u(MMXUD0(op1)) * Bit64u(MMXUD0(op2));
/* now write result back to destination */
BX_WRITE_MMX_REG(i->nnn(), result);
BX_WRITE_MMX_REG(i->nnn(), op1);
#endif
}
@ -2363,23 +2346,10 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::MASKMOVQ_PqPRq(bxInstruction_c *i)
BX_CPU_THIS_PTR prepareMMX();
BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX transition */
bx_address rdi;
bx_address rdi = RDI & i->asize_mask();
BxPackedMmxRegister op = BX_READ_MMX_REG(i->nnn()), tmp,
mask = BX_READ_MMX_REG(i->rm());
#if BX_SUPPORT_X86_64
if (i->as64L()) { /* 64 bit address mode */
rdi = RDI;
}
else
#endif
if (i->as32L()) {
rdi = EDI;
}
else { /* 16 bit address mode */
rdi = DI;
}
/* do read-modify-write for efficiency */
MMXUQ(tmp) = read_RMW_virtual_qword(i->seg(), rdi);
@ -2640,31 +2610,26 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRAW_PqIb(bxInstruction_c *i)
BX_CPU_THIS_PTR prepareMMX();
BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX transition */
BxPackedMmxRegister op = BX_READ_MMX_REG(i->rm()), result;
BxPackedMmxRegister op = BX_READ_MMX_REG(i->rm());
Bit8u shift = i->Ib();
if(shift == 0) return;
if(shift > 15) {
MMXUW0(result) = (MMXUW0(op) & 0x8000) ? 0xffff : 0;
MMXUW1(result) = (MMXUW1(op) & 0x8000) ? 0xffff : 0;
MMXUW2(result) = (MMXUW2(op) & 0x8000) ? 0xffff : 0;
MMXUW3(result) = (MMXUW3(op) & 0x8000) ? 0xffff : 0;
MMXUW0(op) = (MMXUW0(op) & 0x8000) ? 0xffff : 0;
MMXUW1(op) = (MMXUW1(op) & 0x8000) ? 0xffff : 0;
MMXUW2(op) = (MMXUW2(op) & 0x8000) ? 0xffff : 0;
MMXUW3(op) = (MMXUW3(op) & 0x8000) ? 0xffff : 0;
}
else {
MMXUW0(result) = MMXUW0(op) >> shift;
MMXUW1(result) = MMXUW1(op) >> shift;
MMXUW2(result) = MMXUW2(op) >> shift;
MMXUW3(result) = MMXUW3(op) >> shift;
if(MMXUW0(op) & 0x8000) MMXUW0(result) |= (0xffff << (16 - shift));
if(MMXUW1(op) & 0x8000) MMXUW1(result) |= (0xffff << (16 - shift));
if(MMXUW2(op) & 0x8000) MMXUW2(result) |= (0xffff << (16 - shift));
if(MMXUW3(op) & 0x8000) MMXUW3(result) |= (0xffff << (16 - shift));
MMXUW0(op) = (Bit16u)(MMXSW0(op) >> shift);
MMXUW1(op) = (Bit16u)(MMXSW1(op) >> shift);
MMXUW2(op) = (Bit16u)(MMXSW2(op) >> shift);
MMXUW3(op) = (Bit16u)(MMXSW3(op) >> shift);
}
/* now write result back to destination */
BX_WRITE_MMX_REG(i->rm(), result);
BX_WRITE_MMX_REG(i->rm(), op);
#endif
}
@ -2721,28 +2686,22 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRAD_PqIb(bxInstruction_c *i)
BX_CPU_THIS_PTR prepareMMX();
BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX transition */
BxPackedMmxRegister op = BX_READ_MMX_REG(i->rm()), result;
BxPackedMmxRegister op = BX_READ_MMX_REG(i->rm());
Bit8u shift = i->Ib();
if(shift == 0) return;
if(shift > 31) {
MMXUD0(result) = (MMXUD0(op) & 0x80000000) ? 0xffffffff : 0;
MMXUD1(result) = (MMXUD1(op) & 0x80000000) ? 0xffffffff : 0;
MMXUD0(op) = (MMXUD0(op) & 0x80000000) ? 0xffffffff : 0;
MMXUD1(op) = (MMXUD1(op) & 0x80000000) ? 0xffffffff : 0;
}
else {
MMXUD0(result) = MMXUD0(op) >> shift;
MMXUD1(result) = MMXUD1(op) >> shift;
if(MMXUD0(op) & 0x80000000)
MMXUD0(result) |= (0xffffffff << (32 - shift));
if(MMXUD1(op) & 0x80000000)
MMXUD1(result) |= (0xffffffff << (32 - shift));
MMXUD0(op) = (Bit32u)(MMXSD0(op) >> shift);
MMXUD1(op) = (Bit32u)(MMXSD1(op) >> shift);
}
/* now write result back to destination */
BX_WRITE_MMX_REG(i->rm(), result);
BX_WRITE_MMX_REG(i->rm(), op);
#endif
}

View File

@ -1,5 +1,5 @@
/////////////////////////////////////////////////////////////////////////
// $Id: proc_ctrl.cc,v 1.340 2010-12-22 21:16:02 sshwarts Exp $
// $Id: proc_ctrl.cc,v 1.341 2010-12-25 17:04:36 sshwarts Exp $
/////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2001-2010 The Bochs Project
@ -464,20 +464,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::MONITOR(bxInstruction_c *i)
bx_segment_reg_t *seg = &BX_CPU_THIS_PTR sregs[i->seg()];
bx_address offset;
#if BX_SUPPORT_X86_64
if (i->as64L()) {
offset = RAX;
}
else
#endif
if (i->as32L()) {
offset = EAX;
}
else {
offset = AX;
}
bx_address offset = RAX & i->asize_mask();
// set MONITOR
bx_address laddr = BX_CPU_THIS_PTR get_laddr(i->seg(), offset);

View File

@ -1,5 +1,5 @@
/////////////////////////////////////////////////////////////////////////
// $Id: sse.cc,v 1.75 2010-12-25 07:59:15 sshwarts Exp $
// $Id: sse.cc,v 1.76 2010-12-25 17:04:36 sshwarts Exp $
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2003-2010 Stanislav Shwartsman
@ -504,22 +504,12 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::BLENDVPD_VpdWpdR(bxInstruction_c *i)
}
/* 66 0F 38 17 */
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PTEST_VdqWdq(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PTEST_VdqWdqR(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2 = BX_READ_XMM_REG(i->rm());
unsigned result = 0;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
}
if ((op2.xmm64u(0) & op1.xmm64u(0)) == 0 &&
(op2.xmm64u(1) & op1.xmm64u(1)) == 0) result |= EFlagsZFMask;
@ -531,20 +521,11 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PTEST_VdqWdq(bxInstruction_c *i)
}
/* 66 0F 38 28 */
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMULDQ_VdqWdq(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMULDQ_VdqWdqR(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
}
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm()), result;
result.xmm64s(0) = Bit64s(op1.xmm32s(0)) * Bit64s(op2.xmm32s(0));
result.xmm64s(1) = Bit64s(op1.xmm32s(2)) * Bit64s(op2.xmm32s(2));
@ -555,20 +536,10 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMULDQ_VdqWdq(bxInstruction_c *i)
}
/* 66 0F 38 29 */
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPEQQ_VdqWdq(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPEQQ_VdqWdqR(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
}
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2 = BX_READ_XMM_REG(i->rm());
op1.xmm64u(0) = (op1.xmm64u(0) == op2.xmm64u(0)) ?
BX_CONST64(0xffffffffffffffff) : 0;
@ -576,26 +547,16 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPEQQ_VdqWdq(bxInstruction_c *i)
op1.xmm64u(1) = (op1.xmm64u(1) == op2.xmm64u(1)) ?
BX_CONST64(0xffffffffffffffff) : 0;
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), op1);
#endif
}
/* 66 0F 38 2B */
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PACKUSDW_VdqWdq(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PACKUSDW_VdqWdqR(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
}
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm()), result;
result.xmm16u(0) = SaturateDwordSToWordU(op1.xmm32s(0));
result.xmm16u(1) = SaturateDwordSToWordU(op1.xmm32s(1));
@ -612,20 +573,10 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PACKUSDW_VdqWdq(bxInstruction_c *i)
}
/* 66 0F 38 37 */
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPGTQ_VdqWdq(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPGTQ_VdqWdqR(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
}
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2 = BX_READ_XMM_REG(i->rm());
op1.xmm64u(0) = (op1.xmm64u(0) > op2.xmm64u(0)) ?
BX_CONST64(0xffffffffffffffff) : 0;
@ -639,20 +590,10 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPGTQ_VdqWdq(bxInstruction_c *i)
}
/* 66 0F 38 38 */
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMINSB_VdqWdq(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMINSB_VdqWdqR(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
}
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2 = BX_READ_XMM_REG(i->rm());
for(unsigned j=0; j<16; j++) {
if(op2.xmmsbyte(j) < op1.xmmsbyte(j)) op1.xmmubyte(j) = op2.xmmubyte(j);
@ -664,20 +605,10 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMINSB_VdqWdq(bxInstruction_c *i)
}
/* 66 0F 38 39 */
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMINSD_VdqWdq(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMINSD_VdqWdqR(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
}
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2 = BX_READ_XMM_REG(i->rm());
if(op2.xmm32s(0) < op1.xmm32s(0)) op1.xmm32u(0) = op2.xmm32u(0);
if(op2.xmm32s(1) < op1.xmm32s(1)) op1.xmm32u(1) = op2.xmm32u(1);
@ -690,20 +621,10 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMINSD_VdqWdq(bxInstruction_c *i)
}
/* 66 0F 38 3A */
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMINUW_VdqWdq(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMINUW_VdqWdqR(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
}
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2 = BX_READ_XMM_REG(i->rm());
if(op2.xmm16u(0) < op1.xmm16u(0)) op1.xmm16u(0) = op2.xmm16u(0);
if(op2.xmm16u(1) < op1.xmm16u(1)) op1.xmm16u(1) = op2.xmm16u(1);
@ -720,20 +641,10 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMINUW_VdqWdq(bxInstruction_c *i)
}
/* 66 0F 38 3B */
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMINUD_VdqWdq(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMINUD_VdqWdqR(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
}
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2 = BX_READ_XMM_REG(i->rm());
if(op2.xmm32u(0) < op1.xmm32u(0)) op1.xmm32u(0) = op2.xmm32u(0);
if(op2.xmm32u(1) < op1.xmm32u(1)) op1.xmm32u(1) = op2.xmm32u(1);
@ -746,20 +657,10 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMINUD_VdqWdq(bxInstruction_c *i)
}
/* 66 0F 38 3C */
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMAXSB_VdqWdq(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMAXSB_VdqWdqR(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
}
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2 = BX_READ_XMM_REG(i->rm());
for(unsigned j=0; j<16; j++) {
if(op2.xmmsbyte(j) > op1.xmmsbyte(j)) op1.xmmubyte(j) = op2.xmmubyte(j);
@ -771,20 +672,10 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMAXSB_VdqWdq(bxInstruction_c *i)
}
/* 66 0F 38 3D */
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMAXSD_VdqWdq(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMAXSD_VdqWdqR(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
}
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2 = BX_READ_XMM_REG(i->rm());
if(op2.xmm32s(0) > op1.xmm32s(0)) op1.xmm32u(0) = op2.xmm32u(0);
if(op2.xmm32s(1) > op1.xmm32s(1)) op1.xmm32u(1) = op2.xmm32u(1);
@ -797,20 +688,10 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMAXSD_VdqWdq(bxInstruction_c *i)
}
/* 66 0F 38 3E */
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMAXUW_VdqWdq(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMAXUW_VdqWdqR(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
}
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2 = BX_READ_XMM_REG(i->rm());
if(op2.xmm16u(0) > op1.xmm16u(0)) op1.xmm16u(0) = op2.xmm16u(0);
if(op2.xmm16u(1) > op1.xmm16u(1)) op1.xmm16u(1) = op2.xmm16u(1);
@ -827,20 +708,10 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMAXUW_VdqWdq(bxInstruction_c *i)
}
/* 66 0F 38 3F */
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMAXUD_VdqWdq(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMAXUD_VdqWdqR(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
}
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2 = BX_READ_XMM_REG(i->rm());
if(op2.xmm32u(0) > op1.xmm32u(0)) op1.xmm32u(0) = op2.xmm32u(0);
if(op2.xmm32u(1) > op1.xmm32u(1)) op1.xmm32u(1) = op2.xmm32u(1);
@ -853,20 +724,10 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMAXUD_VdqWdq(bxInstruction_c *i)
}
/* 66 0F 38 40 */
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMULLD_VdqWdq(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMULLD_VdqWdqR(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
}
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2 = BX_READ_XMM_REG(i->rm());
Bit64s product1 = Bit64s(op1.xmm32s(0)) * Bit64s(op2.xmm32s(0));
Bit64s product2 = Bit64s(op1.xmm32s(1)) * Bit64s(op2.xmm32s(1));
@ -884,20 +745,10 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMULLD_VdqWdq(bxInstruction_c *i)
}
/* 66 0F 38 41 */
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHMINPOSUW_VdqWdq(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHMINPOSUW_VdqWdqR(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op, result;
/* op2 is a register or memory reference */
if (i->modC0()) {
op = BX_READ_XMM_REG(i->rm());
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op);
}
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm()), result;
unsigned min = 0;
@ -969,45 +820,51 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PBLENDW_VdqWdqIbR(bxInstruction_c *i)
}
/* 66 0F 3A 14 */
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRB_EbdVdqIb(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRB_EbdVdqIbR(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op = BX_READ_XMM_REG(i->nnn());
Bit8u result = op.xmmubyte(i->Ib() & 0xF);
BX_WRITE_32BIT_REGZ(i->rm(), (Bit32u) result);
#endif
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRB_EbdVdqIbM(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op = BX_READ_XMM_REG(i->nnn());
Bit8u result = op.xmmubyte(i->Ib() & 0xF);
/* result is a register or memory reference */
if (i->modC0()) {
BX_WRITE_32BIT_REGZ(i->rm(), result);
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
write_virtual_byte(i->seg(), eaddr, result);
}
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
write_virtual_byte(i->seg(), eaddr, result);
#endif
}
/* 66 0F 3A 15 */
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRW_EwdVdqIb(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRW_EwdVdqIbR(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op = BX_READ_XMM_REG(i->nnn());
Bit16u result = op.xmm16u(i->Ib() & 7);
BX_WRITE_32BIT_REGZ(i->rm(), (Bit32u) result);
#endif
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRW_EwdVdqIbM(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op = BX_READ_XMM_REG(i->nnn());
Bit16u result = op.xmm16u(i->Ib() & 7);
/* result is a register or memory reference */
if (i->modC0()) {
BX_WRITE_32BIT_REGZ(i->rm(), result);
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
write_virtual_word(i->seg(), eaddr, result);
}
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
write_virtual_word(i->seg(), eaddr, result);
#endif
}
/* 66 0F 3A 16 */
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRD_EdVdqIb(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRD_EdVdqIbR(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op = BX_READ_XMM_REG(i->nnn());
@ -1016,51 +873,59 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRD_EdVdqIb(bxInstruction_c *i)
if (i->os64L()) /* 64 bit operand size mode */
{
Bit64u result = op.xmm64u(i->Ib() & 1);
/* result is a register or memory reference */
if (i->modC0()) {
BX_WRITE_64BIT_REG(i->rm(), result);
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
write_virtual_qword_64(i->seg(), eaddr, result);
}
BX_WRITE_64BIT_REG(i->rm(), result);
}
else
#endif
{
Bit32u result = op.xmm32u(i->Ib() & 3);
BX_WRITE_32BIT_REGZ(i->rm(), result);
}
#endif
}
/* result is a register or memory reference */
if (i->modC0()) {
BX_WRITE_32BIT_REGZ(i->rm(), result);
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
write_virtual_dword(i->seg(), eaddr, result);
}
/* 66 0F 3A 16 */
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRD_EdVdqIbM(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op = BX_READ_XMM_REG(i->nnn());
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
#if BX_SUPPORT_X86_64
if (i->os64L()) /* 64 bit operand size mode */
{
Bit64u result = op.xmm64u(i->Ib() & 1);
write_virtual_qword_64(i->seg(), eaddr, result);
}
else
#endif
{
Bit32u result = op.xmm32u(i->Ib() & 3);
write_virtual_dword(i->seg(), eaddr, result);
}
#endif
}
/* 66 0F 3A 17 */
void BX_CPP_AttrRegparmN(1) BX_CPU_C::EXTRACTPS_EdVpsIb(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::EXTRACTPS_EdVpsIbR(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op = BX_READ_XMM_REG(i->nnn());
Bit32u result = op.xmm32u(i->Ib() & 3);
BX_WRITE_32BIT_REGZ(i->rm(), result);
#endif
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::EXTRACTPS_EdVpsIbM(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op = BX_READ_XMM_REG(i->nnn());
Bit32u result = op.xmm32u(i->Ib() & 3);
/* result is a register or memory reference */
if (i->modC0()) {
BX_WRITE_32BIT_REGZ(i->rm(), result);
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
write_virtual_dword(i->seg(), eaddr, result);
}
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
write_virtual_dword(i->seg(), eaddr, result);
#endif
}
@ -1120,7 +985,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::INSERTPS_VpsWssIb(bxInstruction_c *i)
}
/* 66 0F 3A 22 */
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PINSRD_VdqEdIb(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PINSRD_VdqEdIbR(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
@ -1128,35 +993,36 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PINSRD_VdqEdIb(bxInstruction_c *i)
#if BX_SUPPORT_X86_64
if (i->os64L()) /* 64 bit operand size mode */
{
Bit64u op2;
op1.xmm64u(i->Ib() & 1) = BX_READ_64BIT_REG(i->rm());
}
else
#endif
{
op1.xmm32u(i->Ib() & 3) = BX_READ_32BIT_REG(i->rm());
}
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_64BIT_REG(i->rm());
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
op2 = read_virtual_qword_64(i->seg(), eaddr);
}
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), op1);
#endif
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PINSRD_VdqEdIbM(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
#if BX_SUPPORT_X86_64
if (i->os64L()) /* 64 bit operand size mode */
{
Bit64u op2 = read_virtual_qword_64(i->seg(), eaddr);
op1.xmm64u(i->Ib() & 1) = op2;
}
else
#endif
{
Bit32u op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_32BIT_REG(i->rm());
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
op2 = read_virtual_dword(i->seg(), eaddr);
}
Bit32u op2 = read_virtual_dword(i->seg(), eaddr);
op1.xmm32u(i->Ib() & 3) = op2;
}
@ -1818,7 +1684,6 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRW_GdUdqIb(bxInstruction_c *i)
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
Bit8u count = i->Ib() & 0x7;
Bit32u result = (Bit32u) op.xmm16u(count);
BX_WRITE_32BIT_REGZ(i->nnn(), result);
#endif
}
@ -3137,34 +3002,34 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRLW_UdqIb(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRAW_UdqIb(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm()), result;
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
Bit8u shift = i->Ib();
if(shift == 0) return;
if(shift > 15) {
result.xmm16u(0) = (op.xmm16u(0) & 0x8000) ? 0xffff : 0;
result.xmm16u(1) = (op.xmm16u(1) & 0x8000) ? 0xffff : 0;
result.xmm16u(2) = (op.xmm16u(2) & 0x8000) ? 0xffff : 0;
result.xmm16u(3) = (op.xmm16u(3) & 0x8000) ? 0xffff : 0;
result.xmm16u(4) = (op.xmm16u(4) & 0x8000) ? 0xffff : 0;
result.xmm16u(5) = (op.xmm16u(5) & 0x8000) ? 0xffff : 0;
result.xmm16u(6) = (op.xmm16u(6) & 0x8000) ? 0xffff : 0;
result.xmm16u(7) = (op.xmm16u(7) & 0x8000) ? 0xffff : 0;
op.xmm16u(0) = (op.xmm16u(0) & 0x8000) ? 0xffff : 0;
op.xmm16u(1) = (op.xmm16u(1) & 0x8000) ? 0xffff : 0;
op.xmm16u(2) = (op.xmm16u(2) & 0x8000) ? 0xffff : 0;
op.xmm16u(3) = (op.xmm16u(3) & 0x8000) ? 0xffff : 0;
op.xmm16u(4) = (op.xmm16u(4) & 0x8000) ? 0xffff : 0;
op.xmm16u(5) = (op.xmm16u(5) & 0x8000) ? 0xffff : 0;
op.xmm16u(6) = (op.xmm16u(6) & 0x8000) ? 0xffff : 0;
op.xmm16u(7) = (op.xmm16u(7) & 0x8000) ? 0xffff : 0;
}
else {
result.xmm16u(0) = (Bit16u)(op.xmm16s(0) >> shift);
result.xmm16u(1) = (Bit16u)(op.xmm16s(1) >> shift);
result.xmm16u(2) = (Bit16u)(op.xmm16s(2) >> shift);
result.xmm16u(3) = (Bit16u)(op.xmm16s(3) >> shift);
result.xmm16u(4) = (Bit16u)(op.xmm16s(4) >> shift);
result.xmm16u(5) = (Bit16u)(op.xmm16s(5) >> shift);
result.xmm16u(6) = (Bit16u)(op.xmm16s(6) >> shift);
result.xmm16u(7) = (Bit16u)(op.xmm16s(7) >> shift);
op.xmm16u(0) = (Bit16u)(op.xmm16s(0) >> shift);
op.xmm16u(1) = (Bit16u)(op.xmm16s(1) >> shift);
op.xmm16u(2) = (Bit16u)(op.xmm16s(2) >> shift);
op.xmm16u(3) = (Bit16u)(op.xmm16s(3) >> shift);
op.xmm16u(4) = (Bit16u)(op.xmm16s(4) >> shift);
op.xmm16u(5) = (Bit16u)(op.xmm16s(5) >> shift);
op.xmm16u(6) = (Bit16u)(op.xmm16s(6) >> shift);
op.xmm16u(7) = (Bit16u)(op.xmm16s(7) >> shift);
}
/* now write result back to destination */
BX_WRITE_XMM_REG(i->rm(), result);
BX_WRITE_XMM_REG(i->rm(), op);
#endif
}
@ -3222,26 +3087,26 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRLD_UdqIb(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRAD_UdqIb(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm()), result;
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
Bit8u shift = i->Ib();
if(shift == 0) return;
if(shift > 31) {
result.xmm32u(0) = (op.xmm32u(0) & 0x80000000) ? 0xffffffff : 0;
result.xmm32u(1) = (op.xmm32u(1) & 0x80000000) ? 0xffffffff : 0;
result.xmm32u(2) = (op.xmm32u(2) & 0x80000000) ? 0xffffffff : 0;
result.xmm32u(3) = (op.xmm32u(3) & 0x80000000) ? 0xffffffff : 0;
op.xmm32u(0) = (op.xmm32u(0) & 0x80000000) ? 0xffffffff : 0;
op.xmm32u(1) = (op.xmm32u(1) & 0x80000000) ? 0xffffffff : 0;
op.xmm32u(2) = (op.xmm32u(2) & 0x80000000) ? 0xffffffff : 0;
op.xmm32u(3) = (op.xmm32u(3) & 0x80000000) ? 0xffffffff : 0;
}
else {
result.xmm32u(0) = (Bit32u)(op.xmm32s(0) >> shift);
result.xmm32u(1) = (Bit32u)(op.xmm32s(1) >> shift);
result.xmm32u(2) = (Bit32u)(op.xmm32s(2) >> shift);
result.xmm32u(3) = (Bit32u)(op.xmm32s(3) >> shift);
op.xmm32u(0) = (Bit32u)(op.xmm32s(0) >> shift);
op.xmm32u(1) = (Bit32u)(op.xmm32s(1) >> shift);
op.xmm32u(2) = (Bit32u)(op.xmm32s(2) >> shift);
op.xmm32u(3) = (Bit32u)(op.xmm32s(3) >> shift);
}
/* now write result back to destination */
BX_WRITE_XMM_REG(i->rm(), result);
BX_WRITE_XMM_REG(i->rm(), op);
#endif
}

View File

@ -1,5 +1,5 @@
/////////////////////////////////////////////////////////////////////////
// $Id: sse_move.cc,v 1.126 2010-12-25 07:59:15 sshwarts Exp $
// $Id: sse_move.cc,v 1.127 2010-12-25 17:04:36 sshwarts Exp $
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2003-2010 Stanislav Shwartsman
@ -26,6 +26,11 @@
#include "cpu.h"
#define LOG_THIS BX_CPU_THIS_PTR
// Make code more tidy with a few macros.
#if BX_SUPPORT_X86_64==0
#define RDI EDI
#endif
#if BX_CPU_LEVEL >= 6
void BX_CPU_C::print_state_SSE(void)
@ -620,23 +625,10 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::MOVHPS_MqVps(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::MASKMOVDQU_VdqUdq(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
bx_address rdi;
bx_address rdi = RDI & i->asize_mask();
BxPackedXmmRegister op = BX_READ_XMM_REG(i->nnn()),
mask = BX_READ_XMM_REG(i->rm()), temp;
#if BX_SUPPORT_X86_64
if (i->as64L()) { /* 64 bit address mode */
rdi = RDI;
}
else
#endif
if (i->as32L()) {
rdi = EDI;
}
else { /* 16 bit address mode */
rdi = DI;
}
/* implement as read-modify-write for efficiency */
read_virtual_dqword(i->seg(), rdi, (Bit8u *) &temp);

View File

@ -1,5 +1,5 @@
/////////////////////////////////////////////////////////////////////////
// $Id: sse_pfp.cc,v 1.70 2010-12-25 07:59:15 sshwarts Exp $
// $Id: sse_pfp.cc,v 1.71 2010-12-25 17:04:36 sshwarts Exp $
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2003-2010 Stanislav Shwartsman
@ -2047,13 +2047,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::CMPPS_VpsWpsIbR(bxInstruction_c *i)
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
int ib = i->Ib();
/* mask used bits, ignore reserved */
if (ib > 7) {
BX_ERROR(("CMPPS_VpsWpsIb: unrecognized predicate %u", i->Ib()));
}
ib &= 7;
int ib = i->Ib() & 7;
if (MXCSR.get_DAZ()) {
op1.xmm32u(0) = float32_denormal_to_zero(op1.xmm32u(0));
@ -2109,13 +2103,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::CMPPD_VpdWpdIbR(bxInstruction_c *i)
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
int ib = i->Ib();
/* mask used bits, ignore reserved */
if (ib > 7) {
BX_ERROR(("CMPPD_VpdWpdIb: unrecognized predicate %u", i->Ib()));
}
ib &= 7;
int ib = i->Ib() & 7;
if (MXCSR.get_DAZ())
{
@ -2155,17 +2143,11 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::CMPPD_VpdWpdIbR(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::CMPSD_VsdWsdIbR(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->nnn()), op2 = BX_READ_XMM_REG_LO_QWORD(i->rm()), result = 0;
float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->nnn()), op2 = BX_READ_XMM_REG_LO_QWORD(i->rm());
float_status_t status_word;
mxcsr_to_softfloat_status_word(status_word, MXCSR);
int ib = i->Ib();
/* mask used bits, ignore reserved */
if (ib > 7) {
BX_ERROR(("CMPSD_VsdWsdIb: unrecognized predicate %u", i->Ib()));
}
ib &= 7;
int ib = i->Ib() & 7;
if (MXCSR.get_DAZ())
{
@ -2175,20 +2157,20 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::CMPSD_VsdWsdIbR(bxInstruction_c *i)
if(ib < 4) {
if(compare64[ib](op1, op2, status_word)) {
result = BX_CONST64(0xFFFFFFFFFFFFFFFF);
op1 = BX_CONST64(0xFFFFFFFFFFFFFFFF);
} else {
result = 0;
op1 = 0;
}
} else {
if(compare64[ib-4](op1, op2, status_word)) {
result = 0;
op1 = 0;
} else {
result = BX_CONST64(0xFFFFFFFFFFFFFFFF);
op1 = BX_CONST64(0xFFFFFFFFFFFFFFFF);
}
}
check_exceptionsSSE(status_word.float_exception_flags);
BX_WRITE_XMM_REG_LO_QWORD(i->nnn(), result);
BX_WRITE_XMM_REG_LO_QWORD(i->nnn(), op1);
#endif
}
@ -2200,17 +2182,11 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::CMPSD_VsdWsdIbR(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::CMPSS_VssWssIbR(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->nnn()), op2 = BX_READ_XMM_REG_LO_DWORD(i->rm()), result = 0;
float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->nnn()), op2 = BX_READ_XMM_REG_LO_DWORD(i->rm());
float_status_t status_word;
mxcsr_to_softfloat_status_word(status_word, MXCSR);
int ib = i->Ib();
/* mask used bits, ignore reserved */
if (ib > 7) {
BX_ERROR(("CMPSS_VssWssIb: unrecognized predicate %u", i->Ib()));
}
ib &= 7;
int ib = i->Ib() & 7;
if (MXCSR.get_DAZ())
{
@ -2220,20 +2196,20 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::CMPSS_VssWssIbR(bxInstruction_c *i)
if(ib < 4) {
if(compare32[ib](op1, op2, status_word)) {
result = 0xFFFFFFFF;
op1 = 0xFFFFFFFF;
} else {
result = 0;
op1 = 0;
}
} else {
if(compare32[ib-4](op1, op2, status_word)) {
result = 0;
op1 = 0;
} else {
result = 0xFFFFFFFF;
op1 = 0xFFFFFFFF;
}
}
check_exceptionsSSE(status_word.float_exception_flags);
BX_WRITE_XMM_REG_LO_DWORD(i->nnn(), result);
BX_WRITE_XMM_REG_LO_DWORD(i->nnn(), op1);
#endif
}
@ -2426,22 +2402,13 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::ROUNDSD_VsdWsdIbR(bxInstruction_c *i)
* packed SP floating-point values from xmm2, add and selectively
* store the packed SP floating-point values or zero values to xmm1
*/
void BX_CPP_AttrRegparmN(1) BX_CPU_C::DPPS_VpsWpsIb(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::DPPS_VpsWpsIbR(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, tmp;
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm()), tmp;
Bit8u mask = i->Ib();
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
}
float_status_t status_word;
mxcsr_to_softfloat_status_word(status_word, MXCSR);
@ -2489,22 +2456,13 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::DPPS_VpsWpsIb(bxInstruction_c *i)
* packed DP floating-point values from xmm2, add and selectively
* store the packed DP floating-point values or zero values to xmm1
*/
void BX_CPP_AttrRegparmN(1) BX_CPU_C::DPPD_VpdWpdIb(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::DPPD_VpdWpdIbR(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, tmp;
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm()), tmp;
Bit8u mask = i->Ib();
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2);
}
float_status_t status_word;
mxcsr_to_softfloat_status_word(status_word, MXCSR);