Added SSE4_2 instructions emulation

This commit is contained in:
Stanislav Shwartsman 2007-10-01 19:59:37 +00:00
parent de72d9141f
commit dbb91069f4
9 changed files with 973 additions and 132 deletions

View File

@ -68,6 +68,7 @@ OBJS = \
sse_move.o \
sse_pfp.o \
sse_rcp.o \
sse_string.o \
soft_int.o \
io_pro.o \
$(APIC_OBJS) \
@ -735,6 +736,15 @@ sse_rcp.o: sse_rcp.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h \
crregs.h descriptor.h icache.h apic.h ../cpu/i387.h ../fpu/softfloat.h \
../config.h ../fpu/tag_w.h ../fpu/status_w.h ../fpu/control_w.h \
../cpu/xmm.h ../fpu/softfloat-specialize.h ../fpu/softfloat.h
sse_string.o: sse_string.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h \
../bx_debug/debug.h ../config.h ../osdep.h ../bxversion.h \
../gui/siminterface.h ../memory/memory.h ../pc_system.h ../plugin.h \
../extplugin.h ../gui/gui.h ../gui/textconfig.h ../config.h \
../gui/keymap.h ../instrument/stubs/instrument.h cpu.h \
../cpu/lazy_flags.h ../cpu/hostasm.h ../disasm/disasm.h ../config.h \
crregs.h descriptor.h icache.h apic.h ../cpu/i387.h ../fpu/softfloat.h \
../config.h ../fpu/tag_w.h ../fpu/status_w.h ../fpu/control_w.h \
../cpu/xmm.h ../fpu/softfloat-specialize.h ../fpu/softfloat.h
stack16.o: stack16.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h \
../bx_debug/debug.h ../config.h ../osdep.h ../bxversion.h \
../gui/siminterface.h ../memory/memory.h ../pc_system.h ../plugin.h \

View File

@ -1,5 +1,5 @@
/////////////////////////////////////////////////////////////////////////
// $Id: bit.cc,v 1.32 2007-09-19 19:38:08 sshwarts Exp $
// $Id: bit.cc,v 1.33 2007-10-01 19:59:35 sshwarts Exp $
/////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2001 MandrakeSoft S.A.
@ -1276,9 +1276,10 @@ void BX_CPU_C::BTR_EqIb(bxInstruction_c *i)
}
#endif
/* 0F B8 */
void BX_CPU_C::POPCNT_GwEw(bxInstruction_c *i)
{
#if BX_SUPPORT_POPCNT
#if BX_SUPPORT_POPCNT || (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
Bit16u op1_16, op2_16;
/* op2_16 is a register or memory reference */
@ -1296,8 +1297,8 @@ void BX_CPU_C::POPCNT_GwEw(bxInstruction_c *i)
op2_16 >>= 1;
}
Bit32u flags32 = op1_16 ? 0 : EFlagsZFMask;
setEFlagsOSZAPC(flags32);
Bit32u flags = op1_16 ? 0 : EFlagsZFMask;
setEFlagsOSZAPC(flags);
/* now write result back to destination */
BX_WRITE_16BIT_REG(i->nnn(), op1_16);
@ -1307,9 +1308,10 @@ void BX_CPU_C::POPCNT_GwEw(bxInstruction_c *i)
#endif
}
/* 0F B8 */
void BX_CPU_C::POPCNT_GdEd(bxInstruction_c *i)
{
#if BX_SUPPORT_POPCNT
#if BX_SUPPORT_POPCNT || (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
Bit32u op1_32, op2_32;
/* op2_16 is a register or memory reference */
@ -1327,8 +1329,8 @@ void BX_CPU_C::POPCNT_GdEd(bxInstruction_c *i)
op2_32 >>= 1;
}
Bit32u flags32 = op1_32 ? 0 : EFlagsZFMask;
setEFlagsOSZAPC(flags32);
Bit32u flags = op1_32 ? 0 : EFlagsZFMask;
setEFlagsOSZAPC(flags);
/* now write result back to destination */
BX_WRITE_32BIT_REGZ(i->nnn(), op1_32);
@ -1339,9 +1341,10 @@ void BX_CPU_C::POPCNT_GdEd(bxInstruction_c *i)
}
#if BX_SUPPORT_X86_64
/* 0F B8 */
void BX_CPU_C::POPCNT_GqEq(bxInstruction_c *i)
{
#if BX_SUPPORT_POPCNT
#if BX_SUPPORT_POPCNT || (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
Bit64u op1_64, op2_64;
/* op2_16 is a register or memory reference */
@ -1359,8 +1362,8 @@ void BX_CPU_C::POPCNT_GqEq(bxInstruction_c *i)
op2_64 >>= 1;
}
Bit32u flags32 = op1_64 ? 0 : EFlagsZFMask;
setEFlagsOSZAPC(flags32);
Bit32u flags = op1_64 ? 0 : EFlagsZFMask;
setEFlagsOSZAPC(flags);
/* now write result back to destination */
BX_WRITE_64BIT_REG(i->nnn(), op1_64);
@ -1369,6 +1372,158 @@ void BX_CPU_C::POPCNT_GqEq(bxInstruction_c *i)
UndefinedOpcode(i);
#endif
}
#endif
#endif // BX_SUPPORT_X86_64
#endif /* BX_CPU_LEVEL >= 3 */
// 3-byte opcodes
#if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
#define CRC32_POLYNOMIAL BX_CONST64(0x11edc6f41)
#if (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
// primitives for CRC32 usage
static Bit8u BitReflect8(Bit8u val8)
{
return ((val8 & 0x80) >> 7) |
((val8 & 0x40) >> 5) |
((val8 & 0x20) >> 3) |
((val8 & 0x10) >> 1) |
((val8 & 0x08) << 1) |
((val8 & 0x04) << 3) |
((val8 & 0x02) << 5) |
((val8 & 0x01) << 7);
}
BX_CPP_INLINE Bit16u BitReflect16(Bit16u val16)
{
return ((Bit16u)(BitReflect8(val16 & 0xff)) << 8) | BitReflect8(val16 >> 8);
}
BX_CPP_INLINE Bit32u BitReflect32(Bit32u val32)
{
return ((Bit32u)(BitReflect16(val32 & 0xffff)) << 16) | BitReflect16(val32 >> 16);
}
static Bit32u mod2_64bit(Bit64u divisor, Bit64u dividend)
{
Bit64u remainder = dividend >> 32;
for (int bitpos=31; bitpos>=0; bitpos--)
{
// copy one more bit from the dividend
remainder = (remainder << 1) | ((dividend >> bitpos) & 1);
// if MSB is set, then XOR divisor and get new remainder
if (((remainder >> 32) & 1) == 1)
{
remainder ^= divisor;
}
}
return remainder;
}
#endif // (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
void BX_CPU_C::CRC32_GdEb(bxInstruction_c *i)
{
#if (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
Bit8u op1;
if (i->modC0()) {
op1 = BX_READ_8BIT_REGx(i->rm(),i->extend8bitL());
}
else {
read_virtual_byte(i->seg(), RMAddr(i), &op1);
}
Bit32u op2 = BX_READ_32BIT_REG(i->nnn());
op2 = BitReflect32(op2);
Bit64u tmp1 = ((Bit64u) BitReflect8 (op1)) << 32;
Bit64u tmp2 = ((Bit64u) op2) << 8;
Bit64u tmp3 = tmp1 ^ tmp2;
op2 = mod2_64bit(CRC32_POLYNOMIAL, tmp3);
/* now write result back to destination */
BX_WRITE_32BIT_REGZ(i->nnn(), BitReflect32(op2));
#else
BX_INFO(("CRC32_GdEb: required SSE4_2 support, required SSE4.2, use --enable-sse and --enable-sse-extension options"));
UndefinedOpcode(i);
#endif
}
void BX_CPU_C::CRC32_GdEv(bxInstruction_c *i)
{
#if (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
Bit32u op2 = BX_READ_32BIT_REG(i->nnn());
op2 = BitReflect32(op2);
#if BX_SUPPORT_X86_64
if (i->os64L()) /* 64 bit operand size */
{
Bit64u op1;
if (i->modC0()) {
op1 = BX_READ_64BIT_REG(i->rm());
}
else {
read_virtual_qword(i->seg(), RMAddr(i), &op1);
}
Bit64u tmp1 = ((Bit64u) BitReflect32(op1 & 0xffffffff)) << 32;
Bit64u tmp2 = ((Bit64u) op2) << 32;
Bit64u tmp3 = tmp1 ^ tmp2;
op2 = mod2_64bit(CRC32_POLYNOMIAL, tmp3);
tmp1 = ((Bit64u) BitReflect32(op1 >> 32)) << 32;
tmp2 = ((Bit64u) op2) << 32;
tmp3 = tmp1 ^ tmp2;
op2 = mod2_64bit(CRC32_POLYNOMIAL, tmp3);
}
else
#endif
{
if (i->os32L()) /* 32 bit operand size */
{
Bit32u op1;
if (i->modC0()) {
op1 = BX_READ_32BIT_REG(i->rm());
}
else {
read_virtual_dword(i->seg(), RMAddr(i), &op1);
}
Bit64u tmp1 = ((Bit64u) BitReflect32(op1)) << 32;
Bit64u tmp2 = ((Bit64u) op2) << 32;
Bit64u tmp3 = tmp1 ^ tmp2;
op2 = mod2_64bit(CRC32_POLYNOMIAL, tmp3);
}
else { /* 16 bit operand size */
Bit16u op1;
if (i->modC0()) {
op1 = BX_READ_16BIT_REG(i->rm());
}
else {
read_virtual_word(i->seg(), RMAddr(i), &op1);
}
Bit64u tmp1 = ((Bit64u) BitReflect16(op1)) << 32;
Bit64u tmp2 = ((Bit64u) op2) << 16;
Bit64u tmp3 = tmp1 ^ tmp2;
op2 = mod2_64bit(CRC32_POLYNOMIAL, tmp3);
}
}
/* now write result back to destination */
BX_WRITE_32BIT_REGZ(i->nnn(), BitReflect32(op2));
#else
BX_INFO(("CRC32_GdEv: required SSE4_2 support, required SSE4.2, use --enable-sse and --enable-sse-extension options"));
UndefinedOpcode(i);
#endif
}
#endif // (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
#endif // (BX_CPU_LEVEL >= 3)

View File

@ -1,5 +1,5 @@
/////////////////////////////////////////////////////////////////////////
// $Id: cpu.h,v 1.328 2007-09-28 19:51:44 sshwarts Exp $
// $Id: cpu.h,v 1.329 2007-10-01 19:59:36 sshwarts Exp $
/////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2001 MandrakeSoft S.A.
@ -1971,7 +1971,7 @@ public: // for now...
BX_SMF void MAXPS_VpsWps(bxInstruction_c *i);
BX_SMF void MAXSS_VssWss(bxInstruction_c *i);
BX_SMF void PSHUFW_PqQqIb(bxInstruction_c *i);
BX_SMF void PSHUFLW_VqWqIb(bxInstruction_c *i);
BX_SMF void PSHUFLW_VdqWdqIb(bxInstruction_c *i);
BX_SMF void CMPPS_VpsWpsIb(bxInstruction_c *i);
BX_SMF void CMPSS_VssWssIb(bxInstruction_c *i);
BX_SMF void PINSRW_PqEwIb(bxInstruction_c *i);
@ -2023,23 +2023,23 @@ public: // for now...
BX_SMF void DIVSD_VsdWsd(bxInstruction_c *i);
BX_SMF void MAXPD_VpdWpd(bxInstruction_c *i);
BX_SMF void MAXSD_VsdWsd(bxInstruction_c *i);
BX_SMF void PUNPCKLBW_VdqWq(bxInstruction_c *i);
BX_SMF void PUNPCKLWD_VdqWq(bxInstruction_c *i);
BX_SMF void UNPCKLPS_VpsWq(bxInstruction_c *i);
BX_SMF void PACKSSWB_VdqWq(bxInstruction_c *i);
BX_SMF void PCMPGTB_VdqWq(bxInstruction_c *i);
BX_SMF void PCMPGTW_VdqWq(bxInstruction_c *i);
BX_SMF void PUNPCKLBW_VdqWdq(bxInstruction_c *i);
BX_SMF void PUNPCKLWD_VdqWdq(bxInstruction_c *i);
BX_SMF void UNPCKLPS_VpsWdq(bxInstruction_c *i);
BX_SMF void PACKSSWB_VdqWdq(bxInstruction_c *i);
BX_SMF void PCMPGTB_VdqWdq(bxInstruction_c *i);
BX_SMF void PCMPGTW_VdqWdq(bxInstruction_c *i);
BX_SMF void PCMPGTD_VdqWdq(bxInstruction_c *i);
BX_SMF void PACKUSWB_VdqWdq(bxInstruction_c *i);
BX_SMF void PUNPCKHBW_VdqWq(bxInstruction_c *i);
BX_SMF void PUNPCKHWD_VdqWq(bxInstruction_c *i);
BX_SMF void UNPCKHPS_VpsWq(bxInstruction_c *i);
BX_SMF void PUNPCKHBW_VdqWdq(bxInstruction_c *i);
BX_SMF void PUNPCKHWD_VdqWdq(bxInstruction_c *i);
BX_SMF void UNPCKHPS_VpsWdq(bxInstruction_c *i);
BX_SMF void PACKSSDW_VdqWdq(bxInstruction_c *i);
BX_SMF void PUNPCKLQDQ_VdqWq(bxInstruction_c *i);
BX_SMF void PUNPCKHQDQ_VdqWq(bxInstruction_c *i);
BX_SMF void PUNPCKLQDQ_VdqWdq(bxInstruction_c *i);
BX_SMF void PUNPCKHQDQ_VdqWdq(bxInstruction_c *i);
BX_SMF void MOVD_VdqEd(bxInstruction_c *i);
BX_SMF void PSHUFD_VdqWdqIb(bxInstruction_c *i);
BX_SMF void PSHUFHW_VqWqIb(bxInstruction_c *i);
BX_SMF void PSHUFHW_VdqWdqIb(bxInstruction_c *i);
BX_SMF void PCMPEQB_VdqWdq(bxInstruction_c *i);
BX_SMF void PCMPEQW_VdqWdq(bxInstruction_c *i);
BX_SMF void PCMPEQD_VdqWdq(bxInstruction_c *i);
@ -2127,6 +2127,7 @@ public: // for now...
BX_SMF void LDDQU_VdqMdq(bxInstruction_c *i);
/* SSE3 */
// 3-byte opcodes
#if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
/* SSE3E */
BX_SMF void PSHUFB_PqQq(bxInstruction_c *i);
@ -2163,9 +2164,7 @@ public: // for now...
BX_SMF void PABSD_VdqWdq(bxInstruction_c *i);
BX_SMF void PALIGNR_VdqWdqIb(bxInstruction_c *i);
/* SSE3E */
#endif
#if BX_SUPPORT_SSE >= 4
/* SSE4.1 */
BX_SMF void PBLENDVB_VdqWdq(bxInstruction_c *i);
BX_SMF void BLENDVPS_VpsWps(bxInstruction_c *i);
@ -2214,6 +2213,16 @@ public: // for now...
BX_SMF void DPPD_VpdWpdIb(bxInstruction_c *i);
BX_SMF void MPSADBW_VdqWdqIb(bxInstruction_c *i);
/* SSE4.1 */
/* SSE4.2 */
BX_SMF void CRC32_GdEb(bxInstruction_c *i);
BX_SMF void CRC32_GdEv(bxInstruction_c *i);
BX_SMF void PCMPGTQ_VdqWdq(bxInstruction_c *i);
BX_SMF void PCMPESTRM_VdqWdqIb(bxInstruction_c *i);
BX_SMF void PCMPESTRI_VdqWdqIb(bxInstruction_c *i);
BX_SMF void PCMPISTRM_VdqWdqIb(bxInstruction_c *i);
BX_SMF void PCMPISTRI_VdqWdqIb(bxInstruction_c *i);
/* SSE4.2 */
#endif
/*** Duplicate SSE instructions ***/
@ -2229,8 +2238,8 @@ public: // for now...
BX_SMF void MOVDQU_WdqVdq(bxInstruction_c *);
BX_SMF void MOVDQA_VdqWdq(bxInstruction_c *);
BX_SMF void MOVDQA_WdqVdq(bxInstruction_c *);
BX_SMF void PUNPCKHDQ_VdqWq(bxInstruction_c *);
BX_SMF void PUNPCKLDQ_VdqWq(bxInstruction_c *);
BX_SMF void PUNPCKHDQ_VdqWdq(bxInstruction_c *);
BX_SMF void PUNPCKLDQ_VdqWdq(bxInstruction_c *);
BX_SMF void ANDPD_VpdWpd(bxInstruction_c *);
BX_SMF void ANDNPD_VpdWpd(bxInstruction_c *);
BX_SMF void ORPD_VpdWpd(bxInstruction_c *);
@ -2239,8 +2248,8 @@ public: // for now...
BX_SMF void PANDN_VdqWdq(bxInstruction_c *);
BX_SMF void POR_VdqWdq(bxInstruction_c *);
BX_SMF void PXOR_VdqWdq(bxInstruction_c *);
BX_SMF void UNPCKHPD_VpdWq(bxInstruction_c *);
BX_SMF void UNPCKLPD_VpdWq(bxInstruction_c *);
BX_SMF void UNPCKHPD_VpdWdq(bxInstruction_c *);
BX_SMF void UNPCKLPD_VpdWdq(bxInstruction_c *);
BX_SMF void MOVLPD_VsdMq(bxInstruction_c *);
BX_SMF void MOVLPD_MqVsd(bxInstruction_c *);
BX_SMF void MOVHPD_VsdMq(bxInstruction_c *);
@ -2262,40 +2271,40 @@ public: // for now...
#define SSE4_ALIAS(i) BxError
#endif
#define MOVUPD_VpdWpd /* 66 0f 10 */ SSE2_ALIAS(MOVUPS_VpsWps) /* 0f 10 */
#define MOVUPD_WpdVpd /* 66 0f 11 */ SSE2_ALIAS(MOVUPS_WpsVps) /* 0f 11 */
#define MOVAPD_VpdWpd /* 66 0f 28 */ SSE2_ALIAS(MOVAPS_VpsWps) /* 0f 28 */
#define MOVAPD_WpdVpd /* 66 0f 29 */ SSE2_ALIAS(MOVAPS_WpsVps) /* 0f 29 */
#define MOVDQU_VdqWdq /* f3 0f 6f */ SSE2_ALIAS(MOVUPS_VpsWps) /* 0f 10 */
#define MOVDQU_WdqVdq /* f3 0f 7f */ SSE2_ALIAS(MOVUPS_WpsVps) /* 0f 11 */
#define MOVDQA_VdqWdq /* 66 0f 6f */ SSE2_ALIAS(MOVAPS_VpsWps) /* 0f 28 */
#define MOVDQA_WdqVdq /* 66 0f 7f */ SSE2_ALIAS(MOVAPS_WpsVps) /* 0f 29 */
#define MOVUPD_VpdWpd /* 66 0f 10 */ SSE2_ALIAS(MOVUPS_VpsWps) /* 0f 10 */
#define MOVUPD_WpdVpd /* 66 0f 11 */ SSE2_ALIAS(MOVUPS_WpsVps) /* 0f 11 */
#define MOVAPD_VpdWpd /* 66 0f 28 */ SSE2_ALIAS(MOVAPS_VpsWps) /* 0f 28 */
#define MOVAPD_WpdVpd /* 66 0f 29 */ SSE2_ALIAS(MOVAPS_WpsVps) /* 0f 29 */
#define MOVDQU_VdqWdq /* f3 0f 6f */ SSE2_ALIAS(MOVUPS_VpsWps) /* 0f 10 */
#define MOVDQU_WdqVdq /* f3 0f 7f */ SSE2_ALIAS(MOVUPS_WpsVps) /* 0f 11 */
#define MOVDQA_VdqWdq /* 66 0f 6f */ SSE2_ALIAS(MOVAPS_VpsWps) /* 0f 28 */
#define MOVDQA_WdqVdq /* 66 0f 7f */ SSE2_ALIAS(MOVAPS_WpsVps) /* 0f 29 */
#define PUNPCKLDQ_VdqWq /* 66 0f 62 */ SSE2_ALIAS(UNPCKLPS_VpsWq) /* 0f 14 */
#define PUNPCKHDQ_VdqWq /* 66 0f 6a */ SSE2_ALIAS(UNPCKHPS_VpsWq) /* 0f 15 */
#define PUNPCKLDQ_VdqWdq /* 66 0f 62 */ SSE2_ALIAS(UNPCKLPS_VpsWdq) /* 0f 14 */
#define PUNPCKHDQ_VdqWdq /* 66 0f 6a */ SSE2_ALIAS(UNPCKHPS_VpsWdq) /* 0f 15 */
#define PAND_VdqWdq /* 66 0f db */ SSE2_ALIAS(ANDPS_VpsWps) /* 0f 54 */
#define PANDN_VdqWdq /* 66 0f df */ SSE2_ALIAS(ANDNPS_VpsWps) /* 0f 55 */
#define POR_VdqWdq /* 66 0f eb */ SSE2_ALIAS(ORPS_VpsWps) /* 0f 56 */
#define PXOR_VdqWdq /* 66 0f ef */ SSE2_ALIAS(XORPS_VpsWps) /* 0f 57 */
#define PAND_VdqWdq /* 66 0f db */ SSE2_ALIAS(ANDPS_VpsWps) /* 0f 54 */
#define PANDN_VdqWdq /* 66 0f df */ SSE2_ALIAS(ANDNPS_VpsWps) /* 0f 55 */
#define POR_VdqWdq /* 66 0f eb */ SSE2_ALIAS(ORPS_VpsWps) /* 0f 56 */
#define PXOR_VdqWdq /* 66 0f ef */ SSE2_ALIAS(XORPS_VpsWps) /* 0f 57 */
#define ANDPD_VpdWpd /* 66 0f 54 */ SSE2_ALIAS(ANDPS_VpsWps) /* 0f 54 */
#define ANDNPD_VpdWpd /* 66 0f 55 */ SSE2_ALIAS(ANDNPS_VpsWps) /* 0f 55 */
#define ORPD_VpdWpd /* 66 0f 56 */ SSE2_ALIAS(ORPS_VpsWps) /* 0f 56 */
#define XORPD_VpdWpd /* 66 0f 57 */ SSE2_ALIAS(XORPS_VpsWps) /* 0f 57 */
#define ANDPD_VpdWpd /* 66 0f 54 */ SSE2_ALIAS(ANDPS_VpsWps) /* 0f 54 */
#define ANDNPD_VpdWpd /* 66 0f 55 */ SSE2_ALIAS(ANDNPS_VpsWps) /* 0f 55 */
#define ORPD_VpdWpd /* 66 0f 56 */ SSE2_ALIAS(ORPS_VpsWps) /* 0f 56 */
#define XORPD_VpdWpd /* 66 0f 57 */ SSE2_ALIAS(XORPS_VpsWps) /* 0f 57 */
#define MOVLPD_VsdMq /* 66 0f 12 */ SSE2_ALIAS(MOVLPS_VpsMq) /* 0f 12 */
#define MOVLPD_MqVsd /* 66 0f 13 */ SSE2_ALIAS(MOVLPS_MqVps) /* 0f 13 */
#define MOVHPD_VsdMq /* 66 0f 16 */ SSE2_ALIAS(MOVHPS_VpsMq) /* 0f 16 */
#define MOVHPD_MqVsd /* 66 0f 17 */ SSE2_ALIAS(MOVHPS_MqVps) /* 0f 17 */
#define MOVLPD_VsdMq /* 66 0f 12 */ SSE2_ALIAS(MOVLPS_VpsMq) /* 0f 12 */
#define MOVLPD_MqVsd /* 66 0f 13 */ SSE2_ALIAS(MOVLPS_MqVps) /* 0f 13 */
#define MOVHPD_VsdMq /* 66 0f 16 */ SSE2_ALIAS(MOVHPS_VpsMq) /* 0f 16 */
#define MOVHPD_MqVsd /* 66 0f 17 */ SSE2_ALIAS(MOVHPS_MqVps) /* 0f 17 */
#define MOVNTPD_MpdVpd /* 66 0f 2b */ SSE2_ALIAS(MOVNTPS_MpsVps) /* 0f 2b */
#define MOVNTDQ_MdqVdq /* 66 0f e7 */ SSE2_ALIAS(MOVNTPS_MpsVps) /* 0f 2b */
#define MOVNTPD_MpdVpd /* 66 0f 2b */ SSE2_ALIAS(MOVNTPS_MpsVps) /* 0f 2b */
#define MOVNTDQ_MdqVdq /* 66 0f e7 */ SSE2_ALIAS(MOVNTPS_MpsVps) /* 0f 2b */
#define UNPCKLPD_VpdWq /* 66 0f 14 */ PUNPCKLQDQ_VdqWq /* 66 0f 6c */
#define UNPCKHPD_VpdWq /* 66 0f 15 */ PUNPCKHQDQ_VdqWq /* 66 0f 6d */
#define UNPCKLPD_VpdWdq /* 66 0f 14 */ PUNPCKLQDQ_VdqWdq /* 66 0f 6c */
#define UNPCKHPD_VpdWdq /* 66 0f 15 */ PUNPCKHQDQ_VdqWdq /* 66 0f 6d */
#define MOVNTDQA_VdqMdq /* 66 0f 38 2a */ SSE4_ALIAS(LDDQU_VdqMdq) /* f2 0f f0 */
#define MOVNTDQA_VdqMdq /* 66 0f 38 2a */ SSE4_ALIAS(LDDQU_VdqMdq) /* f2 0f f0 */
#endif // #ifdef STAND_ALONE_DECODER

View File

@ -1,5 +1,5 @@
/////////////////////////////////////////////////////////////////////////
// $Id: cpuid.cc,v 1.48 2007-09-27 16:11:32 sshwarts Exp $
// $Id: cpuid.cc,v 1.49 2007-10-01 19:59:36 sshwarts Exp $
/////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2001 MandrakeSoft S.A.
@ -160,7 +160,7 @@ Bit32u BX_CPU_C::get_extended_cpuid_features()
features |= (1<<19); // support SSE4
#endif
#if BX_SUPPORT_POPCNT
#if BX_SUPPORT_POPCNT || (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
features |= (1<<23); // support POPCNT instruction
#endif

View File

@ -1,5 +1,5 @@
/////////////////////////////////////////////////////////////////////////
// $Id: fetchdecode.h,v 1.31 2007-09-19 19:38:09 sshwarts Exp $
// $Id: fetchdecode.h,v 1.32 2007-10-01 19:59:36 sshwarts Exp $
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2005 Stanislav Shwartsman
@ -1049,15 +1049,15 @@ static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f13[4] = {
};
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f14[4] = {
/* -- */ { 0, &BX_CPU_C::UNPCKLPS_VpsWq },
/* 66 */ { 0, &BX_CPU_C::UNPCKLPD_VpdWq },
/* -- */ { 0, &BX_CPU_C::UNPCKLPS_VpsWdq },
/* 66 */ { 0, &BX_CPU_C::UNPCKLPD_VpdWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f15[4] = {
/* -- */ { 0, &BX_CPU_C::UNPCKHPS_VpsWq },
/* 66 */ { 0, &BX_CPU_C::UNPCKHPD_VpdWq },
/* -- */ { 0, &BX_CPU_C::UNPCKHPS_VpsWdq },
/* 66 */ { 0, &BX_CPU_C::UNPCKHPD_VpdWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
@ -1246,42 +1246,42 @@ static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f5f[4] = {
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f60[4] = {
/* -- */ { 0, &BX_CPU_C::PUNPCKLBW_PqQd },
/* 66 */ { 0, &BX_CPU_C::PUNPCKLBW_VdqWq },
/* 66 */ { 0, &BX_CPU_C::PUNPCKLBW_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f61[4] = {
/* -- */ { 0, &BX_CPU_C::PUNPCKLWD_PqQd },
/* 66 */ { 0, &BX_CPU_C::PUNPCKLWD_VdqWq },
/* 66 */ { 0, &BX_CPU_C::PUNPCKLWD_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f62[4] = {
/* -- */ { 0, &BX_CPU_C::PUNPCKLDQ_PqQd },
/* 66 */ { 0, &BX_CPU_C::PUNPCKLDQ_VdqWq },
/* 66 */ { 0, &BX_CPU_C::PUNPCKLDQ_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f63[4] = {
/* -- */ { 0, &BX_CPU_C::PACKSSWB_PqQq },
/* 66 */ { 0, &BX_CPU_C::PACKSSWB_VdqWq },
/* 66 */ { 0, &BX_CPU_C::PACKSSWB_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f64[4] = {
/* -- */ { 0, &BX_CPU_C::PCMPGTB_PqQq },
/* 66 */ { 0, &BX_CPU_C::PCMPGTB_VdqWq },
/* -- */ { 0, &BX_CPU_C::PCMPGTB_PqQq },
/* 66 */ { 0, &BX_CPU_C::PCMPGTB_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f65[4] = {
/* -- */ { 0, &BX_CPU_C::PCMPGTW_PqQq },
/* 66 */ { 0, &BX_CPU_C::PCMPGTW_VdqWq },
/* -- */ { 0, &BX_CPU_C::PCMPGTW_PqQq },
/* 66 */ { 0, &BX_CPU_C::PCMPGTW_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
@ -1302,21 +1302,21 @@ static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f67[4] = {
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f68[4] = {
/* -- */ { 0, &BX_CPU_C::PUNPCKHBW_PqQq },
/* 66 */ { 0, &BX_CPU_C::PUNPCKHBW_VdqWq },
/* 66 */ { 0, &BX_CPU_C::PUNPCKHBW_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f69[4] = {
/* -- */ { 0, &BX_CPU_C::PUNPCKHWD_PqQq },
/* 66 */ { 0, &BX_CPU_C::PUNPCKHWD_VdqWq },
/* 66 */ { 0, &BX_CPU_C::PUNPCKHWD_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f6a[4] = {
/* -- */ { 0, &BX_CPU_C::PUNPCKHDQ_PqQq },
/* 66 */ { 0, &BX_CPU_C::PUNPCKHDQ_VdqWq },
/* 66 */ { 0, &BX_CPU_C::PUNPCKHDQ_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
@ -1330,14 +1330,14 @@ static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f6b[4] = {
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f6c[4] = {
/* -- */ { 0, &BX_CPU_C::BxError },
/* 66 */ { 0, &BX_CPU_C::PUNPCKLQDQ_VdqWq },
/* 66 */ { 0, &BX_CPU_C::PUNPCKLQDQ_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f6d[4] = {
/* -- */ { 0, &BX_CPU_C::BxError },
/* 66 */ { 0, &BX_CPU_C::PUNPCKHQDQ_VdqWq },
/* 66 */ { 0, &BX_CPU_C::PUNPCKHQDQ_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
@ -1357,10 +1357,10 @@ static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f6f[4] = {
};
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f70[4] = {
/* -- */ { 0, &BX_CPU_C::PSHUFW_PqQqIb },
/* 66 */ { 0, &BX_CPU_C::PSHUFD_VdqWdqIb },
/* F2 */ { 0, &BX_CPU_C::PSHUFHW_VqWqIb },
/* F3 */ { 0, &BX_CPU_C::PSHUFLW_VqWqIb }
/* -- */ { 0, &BX_CPU_C::PSHUFW_PqQqIb },
/* 66 */ { 0, &BX_CPU_C::PSHUFD_VdqWdqIb },
/* F2 */ { 0, &BX_CPU_C::PSHUFHW_VdqWdqIb },
/* F3 */ { 0, &BX_CPU_C::PSHUFLW_VdqWdqIb }
};
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f74[4] = {
@ -2116,6 +2116,13 @@ static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f3835[4] = {
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f3837[4] = {
/* -- */ { 0, &BX_CPU_C::BxError },
/* 66 */ { 0, &BX_CPU_C::PCMPGTQ_VdqWdq },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f3838[4] = {
/* -- */ { 0, &BX_CPU_C::BxError },
/* 66 */ { 0, &BX_CPU_C::PMINSB_VdqWdq },
@ -2186,6 +2193,20 @@ static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f3841[4] = {
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f38f0[4] = {
/* -- */ { 0, &BX_CPU_C::BxError },
/* 66 */ { 0, &BX_CPU_C::BxError },
/* F2 */ { 0, &BX_CPU_C::CRC32_GdEb },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f38f1[4] = {
/* -- */ { 0, &BX_CPU_C::BxError },
/* 66 */ { 0, &BX_CPU_C::BxError },
/* F2 */ { 0, &BX_CPU_C::CRC32_GdEv },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f3a08[4] = {
/* -- */ { 0, &BX_CPU_C::BxError },
/* 66 */ { 0, &BX_CPU_C::ROUNDPS_VpsWpsIb },
@ -2312,6 +2333,34 @@ static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f3a42[4] = {
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f3a60[4] = {
/* -- */ { 0, &BX_CPU_C::BxError },
/* 66 */ { 0, &BX_CPU_C::PCMPESTRM_VdqWdqIb },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f3a61[4] = {
/* -- */ { 0, &BX_CPU_C::BxError },
/* 66 */ { 0, &BX_CPU_C::PCMPESTRI_VdqWdqIb },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f3a62[4] = {
/* -- */ { 0, &BX_CPU_C::BxError },
/* 66 */ { 0, &BX_CPU_C::PCMPISTRM_VdqWdqIb },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f3a63[4] = {
/* -- */ { 0, &BX_CPU_C::BxError },
/* 66 */ { 0, &BX_CPU_C::PCMPISTRI_VdqWdqIb },
/* F2 */ { 0, &BX_CPU_C::BxError },
/* F3 */ { 0, &BX_CPU_C::BxError }
};
/* ************************************************************************ */
/* 3-byte opcode table (Table A-4, 0F 38) */
@ -2380,7 +2429,7 @@ static const BxOpcodeInfo_t BxOpcode3ByteOp0f383x[16] = {
/* 04 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3834 },
/* 05 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3835 },
/* 06 */ { 0, &BX_CPU_C::BxError },
/* 07 */ { 0, &BX_CPU_C::BxError },
/* 07 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3837 },
/* 08 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3838 },
/* 09 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3839 },
/* 0A */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f383a },
@ -2410,6 +2459,25 @@ static const BxOpcodeInfo_t BxOpcode3ByteOp0f384x[16] = {
/* 0F */ { 0, &BX_CPU_C::BxError }
};
static const BxOpcodeInfo_t BxOpcode3ByteOp0f38fx[16] = {
/* 00 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f38f0 },
/* 01 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f38f1 },
/* 02 */ { 0, &BX_CPU_C::BxError },
/* 03 */ { 0, &BX_CPU_C::BxError },
/* 04 */ { 0, &BX_CPU_C::BxError },
/* 05 */ { 0, &BX_CPU_C::BxError },
/* 06 */ { 0, &BX_CPU_C::BxError },
/* 07 */ { 0, &BX_CPU_C::BxError },
/* 08 */ { 0, &BX_CPU_C::BxError },
/* 09 */ { 0, &BX_CPU_C::BxError },
/* 0A */ { 0, &BX_CPU_C::BxError },
/* 0B */ { 0, &BX_CPU_C::BxError },
/* 0C */ { 0, &BX_CPU_C::BxError },
/* 0D */ { 0, &BX_CPU_C::BxError },
/* 0E */ { 0, &BX_CPU_C::BxError },
/* 0F */ { 0, &BX_CPU_C::BxError }
};
static const BxOpcodeInfo_t BxOpcode3ByteTableA4[16] = {
/* 00 */ { Bx3ByteOpIndex, NULL, BxOpcode3ByteOp0f380x },
/* 01 */ { Bx3ByteOpIndex, NULL, BxOpcode3ByteOp0f381x },
@ -2426,7 +2494,7 @@ static const BxOpcodeInfo_t BxOpcode3ByteTableA4[16] = {
/* 0C */ { 0, &BX_CPU_C::BxError },
/* 0D */ { 0, &BX_CPU_C::BxError },
/* 0E */ { 0, &BX_CPU_C::BxError },
/* 0F */ { 0, &BX_CPU_C::BxError }
/* 0F */ { Bx3ByteOpIndex, NULL, BxOpcode3ByteOp0f38fx }
};
/* ************************************************************************ */
@ -2508,6 +2576,25 @@ static const BxOpcodeInfo_t BxOpcode3ByteOp0f3a4x[16] = {
/* 0F */ { 0, &BX_CPU_C::BxError }
};
static const BxOpcodeInfo_t BxOpcode3ByteOp0f3a6x[16] = {
/* 00 */ { BxImmediate_Ib | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3a60 },
/* 01 */ { BxImmediate_Ib | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3a61 },
/* 02 */ { BxImmediate_Ib | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3a62 },
/* 03 */ { BxImmediate_Ib | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3a63 },
/* 04 */ { 0, &BX_CPU_C::BxError },
/* 05 */ { 0, &BX_CPU_C::BxError },
/* 06 */ { 0, &BX_CPU_C::BxError },
/* 07 */ { 0, &BX_CPU_C::BxError },
/* 08 */ { 0, &BX_CPU_C::BxError },
/* 09 */ { 0, &BX_CPU_C::BxError },
/* 0A */ { 0, &BX_CPU_C::BxError },
/* 0B */ { 0, &BX_CPU_C::BxError },
/* 0C */ { 0, &BX_CPU_C::BxError },
/* 0D */ { 0, &BX_CPU_C::BxError },
/* 0E */ { 0, &BX_CPU_C::BxError },
/* 0F */ { 0, &BX_CPU_C::BxError }
};
static const BxOpcodeInfo_t BxOpcode3ByteTableA5[16] = {
/* 00 */ { Bx3ByteOpIndex, NULL, BxOpcode3ByteOp0f3a0x },
/* 01 */ { Bx3ByteOpIndex, NULL, BxOpcode3ByteOp0f3a1x },
@ -2515,7 +2602,7 @@ static const BxOpcodeInfo_t BxOpcode3ByteTableA5[16] = {
/* 03 */ { 0, &BX_CPU_C::BxError },
/* 04 */ { Bx3ByteOpIndex, NULL, BxOpcode3ByteOp0f3a4x },
/* 05 */ { 0, &BX_CPU_C::BxError },
/* 06 */ { 0, &BX_CPU_C::BxError },
/* 06 */ { Bx3ByteOpIndex, NULL, BxOpcode3ByteOp0f3a6x },
/* 07 */ { 0, &BX_CPU_C::BxError },
/* 08 */ { 0, &BX_CPU_C::BxError },
/* 09 */ { 0, &BX_CPU_C::BxError },

View File

@ -1,5 +1,5 @@
/////////////////////////////////////////////////////////////////////////
// $Id: protect_ctrl.cc,v 1.60 2007-09-30 18:47:41 sshwarts Exp $
// $Id: protect_ctrl.cc,v 1.61 2007-10-01 19:59:36 sshwarts Exp $
/////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2001 MandrakeSoft S.A.
@ -77,7 +77,6 @@ void BX_CPU_C::LAR_GvEw(bxInstruction_c *i)
if (real_mode() || v8086_mode()) {
BX_ERROR(("LAR: not recognized in real or virtual-8086 mode"));
UndefinedOpcode(i);
return;
}
if (i->modC0()) {

View File

@ -1,5 +1,5 @@
/////////////////////////////////////////////////////////////////////////
// $Id: sse.cc,v 1.48 2007-09-20 22:55:03 sshwarts Exp $
// $Id: sse.cc,v 1.49 2007-10-01 19:59:36 sshwarts Exp $
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2003 Stanislav Shwartsman
@ -29,6 +29,7 @@
/* SSE Integer Operations (128bit MMX extensions) */
/* ********************************************** */
// for 3-byte opcodes
#if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
/* 66 0F 38 00 */
@ -731,6 +732,37 @@ void BX_CPU_C::PACKUSDW_VdqWdq(bxInstruction_c *i)
#endif
}
/* 66 0F 38 37 */
void BX_CPU_C::PCMPGTQ_VdqWdq(bxInstruction_c *i)
{
#if (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
BX_CPU_THIS_PTR prepareSSE();
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
}
op1.xmm64u(0) = (op1.xmm64u(0) > op2.xmm64u(0)) ?
BX_CONST64(0xffffffffffffffff) : 0;
op1.xmm64u(1) = (op1.xmm64u(1) > op2.xmm64u(1)) ?
BX_CONST64(0xffffffffffffffff) : 0;
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), op1);
#else
BX_INFO(("PCMPGTQ_VdqWdq: required SSE4.2, use --enable-sse and --enable-sse-extension options"));
UndefinedOpcode(i);
#endif
}
/* 66 0F 38 38 */
void BX_CPU_C::PMINSB_VdqWdq(bxInstruction_c *i)
{
@ -1405,7 +1437,7 @@ void BX_CPU_C::MPSADBW_VdqWdqIb(bxInstruction_c *i)
#endif // (BX_SUPPORT_SSE >= 4 || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
/* 66 0F 60 */
void BX_CPU_C::PUNPCKLBW_VdqWq(bxInstruction_c *i)
void BX_CPU_C::PUNPCKLBW_VdqWdq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 2
BX_CPU_THIS_PTR prepareSSE();
@ -1441,13 +1473,13 @@ void BX_CPU_C::PUNPCKLBW_VdqWq(bxInstruction_c *i)
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), result);
#else
BX_INFO(("PUNPCKLBW_VdqWq: required SSE2, use --enable-sse option"));
BX_INFO(("PUNPCKLBW_VdqWdq: required SSE2, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 66 0F 61 */
void BX_CPU_C::PUNPCKLWD_VdqWq(bxInstruction_c *i)
void BX_CPU_C::PUNPCKLWD_VdqWdq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 2
BX_CPU_THIS_PTR prepareSSE();
@ -1475,14 +1507,14 @@ void BX_CPU_C::PUNPCKLWD_VdqWq(bxInstruction_c *i)
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), result);
#else
BX_INFO(("PUNPCKLWD_VdqWq: required SSE2, use --enable-sse option"));
BX_INFO(("PUNPCKLWD_VdqWdq: required SSE2, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* UNPCKLPS: 0F 14 */
/* PUNPCKLDQ: 66 0F 62 */
void BX_CPU_C::UNPCKLPS_VpsWq(bxInstruction_c *i)
void BX_CPU_C::UNPCKLPS_VpsWdq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 1
BX_CPU_THIS_PTR prepareSSE();
@ -1506,13 +1538,13 @@ void BX_CPU_C::UNPCKLPS_VpsWq(bxInstruction_c *i)
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), result);
#else
BX_INFO(("UNPCKLPS_VpsWq: required SSE, use --enable-sse option"));
BX_INFO(("UNPCKLPS_VpsWdq: required SSE, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 66 0F 63 */
void BX_CPU_C::PACKSSWB_VdqWq(bxInstruction_c *i)
void BX_CPU_C::PACKSSWB_VdqWdq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 2
BX_CPU_THIS_PTR prepareSSE();
@ -1549,13 +1581,13 @@ void BX_CPU_C::PACKSSWB_VdqWq(bxInstruction_c *i)
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), result);
#else
BX_INFO(("PACKSSWB_VdqWq: required SSE2, use --enable-sse option"));
BX_INFO(("PACKSSWB_VdqWdq: required SSE2, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 66 0F 64 */
void BX_CPU_C::PCMPGTB_VdqWq(bxInstruction_c *i)
void BX_CPU_C::PCMPGTB_VdqWdq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 2
BX_CPU_THIS_PTR prepareSSE();
@ -1572,19 +1604,19 @@ void BX_CPU_C::PCMPGTB_VdqWq(bxInstruction_c *i)
}
for(unsigned j=0; j<16; j++) {
op1.xmmsbyte(j) = (op1.xmmsbyte(j) > op2.xmmsbyte(j)) ? 0xff : 0;
op1.xmmubyte(j) = (op1.xmmsbyte(j) > op2.xmmsbyte(j)) ? 0xff : 0;
}
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), op1);
#else
BX_INFO(("PCMPGTB_VdqWq: required SSE2, use --enable-sse option"));
BX_INFO(("PCMPGTB_VdqWdq: required SSE2, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 66 0F 65 */
void BX_CPU_C::PCMPGTW_VdqWq(bxInstruction_c *i)
void BX_CPU_C::PCMPGTW_VdqWdq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 2
BX_CPU_THIS_PTR prepareSSE();
@ -1600,19 +1632,19 @@ void BX_CPU_C::PCMPGTW_VdqWq(bxInstruction_c *i)
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
}
op1.xmm16s(0) = (op1.xmm16s(0) > op2.xmm16s(0)) ? 0xffff : 0;
op1.xmm16s(1) = (op1.xmm16s(1) > op2.xmm16s(1)) ? 0xffff : 0;
op1.xmm16s(2) = (op1.xmm16s(2) > op2.xmm16s(2)) ? 0xffff : 0;
op1.xmm16s(3) = (op1.xmm16s(3) > op2.xmm16s(3)) ? 0xffff : 0;
op1.xmm16s(4) = (op1.xmm16s(4) > op2.xmm16s(4)) ? 0xffff : 0;
op1.xmm16s(5) = (op1.xmm16s(5) > op2.xmm16s(5)) ? 0xffff : 0;
op1.xmm16s(6) = (op1.xmm16s(6) > op2.xmm16s(6)) ? 0xffff : 0;
op1.xmm16s(7) = (op1.xmm16s(7) > op2.xmm16s(7)) ? 0xffff : 0;
op1.xmm16u(0) = (op1.xmm16s(0) > op2.xmm16s(0)) ? 0xffff : 0;
op1.xmm16u(1) = (op1.xmm16s(1) > op2.xmm16s(1)) ? 0xffff : 0;
op1.xmm16u(2) = (op1.xmm16s(2) > op2.xmm16s(2)) ? 0xffff : 0;
op1.xmm16u(3) = (op1.xmm16s(3) > op2.xmm16s(3)) ? 0xffff : 0;
op1.xmm16u(4) = (op1.xmm16s(4) > op2.xmm16s(4)) ? 0xffff : 0;
op1.xmm16u(5) = (op1.xmm16s(5) > op2.xmm16s(5)) ? 0xffff : 0;
op1.xmm16u(6) = (op1.xmm16s(6) > op2.xmm16s(6)) ? 0xffff : 0;
op1.xmm16u(7) = (op1.xmm16s(7) > op2.xmm16s(7)) ? 0xffff : 0;
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), op1);
#else
BX_INFO(("PCMPGTW_VdqWq: required SSE2, use --enable-sse option"));
BX_INFO(("PCMPGTW_VdqWdq: required SSE2, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
@ -1634,10 +1666,10 @@ void BX_CPU_C::PCMPGTD_VdqWdq(bxInstruction_c *i)
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
}
op1.xmm32s(0) = (op1.xmm32s(0) > op2.xmm32s(0)) ? 0xffffffff : 0;
op1.xmm32s(1) = (op1.xmm32s(1) > op2.xmm32s(1)) ? 0xffffffff : 0;
op1.xmm32s(2) = (op1.xmm32s(2) > op2.xmm32s(2)) ? 0xffffffff : 0;
op1.xmm32s(3) = (op1.xmm32s(3) > op2.xmm32s(3)) ? 0xffffffff : 0;
op1.xmm32u(0) = (op1.xmm32s(0) > op2.xmm32s(0)) ? 0xffffffff : 0;
op1.xmm32u(1) = (op1.xmm32s(1) > op2.xmm32s(1)) ? 0xffffffff : 0;
op1.xmm32u(2) = (op1.xmm32s(2) > op2.xmm32s(2)) ? 0xffffffff : 0;
op1.xmm32u(3) = (op1.xmm32s(3) > op2.xmm32s(3)) ? 0xffffffff : 0;
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), op1);
@ -1691,7 +1723,7 @@ void BX_CPU_C::PACKUSWB_VdqWdq(bxInstruction_c *i)
}
/* 66 0F 68 */
void BX_CPU_C::PUNPCKHBW_VdqWq(bxInstruction_c *i)
void BX_CPU_C::PUNPCKHBW_VdqWdq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 2
BX_CPU_THIS_PTR prepareSSE();
@ -1727,13 +1759,13 @@ void BX_CPU_C::PUNPCKHBW_VdqWq(bxInstruction_c *i)
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), result);
#else
BX_INFO(("PUNPCKHBW_VdqWq: required SSE2, use --enable-sse option"));
BX_INFO(("PUNPCKHBW_VdqWdq: required SSE2, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* 66 0F 69 */
void BX_CPU_C::PUNPCKHWD_VdqWq(bxInstruction_c *i)
void BX_CPU_C::PUNPCKHWD_VdqWdq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 2
BX_CPU_THIS_PTR prepareSSE();
@ -1761,14 +1793,14 @@ void BX_CPU_C::PUNPCKHWD_VdqWq(bxInstruction_c *i)
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), result);
#else
BX_INFO(("PUNPCKHWD_VdqWq: required SSE2, use --enable-sse option"));
BX_INFO(("PUNPCKHWD_VdqWdq: required SSE2, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* UNPCKHPS: 0F 15 */
/* PUNPCKHDQ: 66 0F 6A */
void BX_CPU_C::UNPCKHPS_VpsWq(bxInstruction_c *i)
void BX_CPU_C::UNPCKHPS_VpsWdq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 1
BX_CPU_THIS_PTR prepareSSE();
@ -1792,7 +1824,7 @@ void BX_CPU_C::UNPCKHPS_VpsWq(bxInstruction_c *i)
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), result);
#else
BX_INFO(("UNPCKHPS_VpsWq: required SSE, use --enable-sse option"));
BX_INFO(("UNPCKHPS_VpsWdq: required SSE, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
@ -1833,7 +1865,7 @@ void BX_CPU_C::PACKSSDW_VdqWdq(bxInstruction_c *i)
}
/* UNPCKLPD: 66 0F 14 */
/* PUNPCKLQDQ: 66 0F 6C */
void BX_CPU_C::PUNPCKLQDQ_VdqWq(bxInstruction_c *i)
void BX_CPU_C::PUNPCKLQDQ_VdqWdq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 2
BX_CPU_THIS_PTR prepareSSE();
@ -1854,14 +1886,14 @@ void BX_CPU_C::PUNPCKLQDQ_VdqWq(bxInstruction_c *i)
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), op1);
#else
BX_INFO(("PUNPCKLQDQ_VdqWq: required SSE2, use --enable-sse option"));
BX_INFO(("PUNPCKLQDQ_VdqWdq: required SSE2, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* UNPCKHPD: 66 0F 15 */
/* PUNPCKHQDQ: 66 0F 6D */
void BX_CPU_C::PUNPCKHQDQ_VdqWq(bxInstruction_c *i)
void BX_CPU_C::PUNPCKHQDQ_VdqWdq(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 2
BX_CPU_THIS_PTR prepareSSE();
@ -1883,7 +1915,7 @@ void BX_CPU_C::PUNPCKHQDQ_VdqWq(bxInstruction_c *i)
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), result);
#else
BX_INFO(("PUNPCKHQDQ_VdqWq: required SSE2, use --enable-sse option"));
BX_INFO(("PUNPCKHQDQ_VdqWdq: required SSE2, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
@ -1903,7 +1935,7 @@ void BX_CPU_C::PSHUFD_VdqWdqIb(bxInstruction_c *i)
}
else {
/* pointer, segment address pair */
read_virtual_dqword(i->seg(), RMAddr(i), (Bit8u *) &op);
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
}
result.xmm32u(0) = op.xmm32u((order >> 0) & 0x3);
@ -1920,7 +1952,7 @@ void BX_CPU_C::PSHUFD_VdqWdqIb(bxInstruction_c *i)
}
/* F2 0F 70 */
void BX_CPU_C::PSHUFHW_VqWqIb(bxInstruction_c *i)
void BX_CPU_C::PSHUFHW_VdqWdqIb(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 2
BX_CPU_THIS_PTR prepareSSE();
@ -1934,7 +1966,7 @@ void BX_CPU_C::PSHUFHW_VqWqIb(bxInstruction_c *i)
}
else {
/* pointer, segment address pair */
read_virtual_dqword(i->seg(), RMAddr(i), (Bit8u *) &op);
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
}
result.xmm64u(0) = op.xmm64u(0);
@ -1946,13 +1978,13 @@ void BX_CPU_C::PSHUFHW_VqWqIb(bxInstruction_c *i)
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), result);
#else
BX_INFO(("PSHUFHW_VqWqIb: required SSE2, use --enable-sse option"));
BX_INFO(("PSHUFHW_VdqWdqIb: required SSE2, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}
/* F3 0F 70 */
void BX_CPU_C::PSHUFLW_VqWqIb(bxInstruction_c *i)
void BX_CPU_C::PSHUFLW_VdqWdqIb(bxInstruction_c *i)
{
#if BX_SUPPORT_SSE >= 2
BX_CPU_THIS_PTR prepareSSE();
@ -1966,7 +1998,7 @@ void BX_CPU_C::PSHUFLW_VqWqIb(bxInstruction_c *i)
}
else {
/* pointer, segment address pair */
read_virtual_dqword(i->seg(), RMAddr(i), (Bit8u *) &op);
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
}
result.xmm16u(0) = op.xmm16u((order >> 0) & 0x3);
@ -1978,7 +2010,7 @@ void BX_CPU_C::PSHUFLW_VqWqIb(bxInstruction_c *i)
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), result);
#else
BX_INFO(("PSHUFLW_VqWqIb: required SSE, use --enable-sse option"));
BX_INFO(("PSHUFLW_VdqWdqIb: required SSE, use --enable-sse option"));
UndefinedOpcode(i);
#endif
}

View File

@ -1,5 +1,5 @@
/////////////////////////////////////////////////////////////////////////
// $Id: sse_pfp.cc,v 1.34 2007-09-27 16:23:29 sshwarts Exp $
// $Id: sse_pfp.cc,v 1.35 2007-10-01 19:59:36 sshwarts Exp $
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2003 Stanislav Shwartsman
@ -2972,6 +2972,7 @@ void BX_CPU_C::ADDSUBPS_VpsWps(bxInstruction_c *i)
#endif
}
// for 3-byte opcodes
#if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
/* 66 0F 3A 08 */

548
bochs/cpu/sse_string.cc Executable file
View File

@ -0,0 +1,548 @@
/////////////////////////////////////////////////////////////////////////
// $Id: sse_string.cc,v 1.1 2007-10-01 19:59:37 sshwarts Exp $
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2007 Stanislav Shwartsman
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
#define NEED_CPU_REG_SHORTCUTS 1
#include "bochs.h"
#include "cpu.h"
#define LOG_THIS BX_CPU_THIS_PTR
// Make code more tidy with a few macros.
#if BX_SUPPORT_X86_64==0
#define RCX ECX
#endif
#if (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
// Compare all pairs of Ai, Bj according to imm8 control
static void compare_strings(bx_bool BoolRes[16][16], BxPackedXmmRegister op1, BxPackedXmmRegister op2, Bit8u imm)
{
unsigned i, j;
unsigned aggregation_operation = (imm >> 2) & 3;
// All possible comparisons are performed, the individual boolean
// results of those comparisons are referred by
// BoolRes[op2 element index, op1 element index]
switch (imm & 3) {
case 0: /* unsigned bytes compare */
for (i=0;i<16;i++) {
for (j=0;j<16;j++) {
switch (aggregation_operation) {
case 0: /* 'equal' comparison */
case 2:
case 3:
BoolRes[j][i] = (op1.xmmubyte(i) == op2.xmmubyte(j));
break;
case 1: /* 'ranges' comparison */
if ((i % 2) == 0)
BoolRes[j][i] = (op1.xmmubyte(i) <= op2.xmmubyte(j));
else
BoolRes[j][i] = (op1.xmmubyte(i) >= op2.xmmubyte(j));
break;
}
}
}
break;
case 1: /* unsigned words compare */
for (i=0;i<8;i++) {
for (j=0;j<8;j++) {
switch (aggregation_operation) {
case 0: /* 'equal' comparison */
case 2:
case 3:
BoolRes[j][i] = (op1.xmm16u(i) == op2.xmm16u(j));
break;
case 1: /* 'ranges' comparison */
if ((i % 2) == 0)
BoolRes[j][i] = (op1.xmm16u(i) <= op2.xmm16u(j));
else
BoolRes[j][i] = (op1.xmm16u(i) >= op2.xmm16u(j));
break;
}
}
}
break;
case 2: /* signed bytes compare */
for (i=0;i<16;i++) {
for (j=0;j<16;j++) {
switch (aggregation_operation) {
case 0: /* 'equal' comparison */
case 2:
case 3:
BoolRes[j][i] = (op1.xmmsbyte(i) == op2.xmmsbyte(j));
break;
case 1: /* 'ranges' comparison */
if ((i % 2) == 0)
BoolRes[j][i] = (op1.xmmsbyte(i) <= op2.xmmsbyte(j));
else
BoolRes[j][i] = (op1.xmmsbyte(i) >= op2.xmmsbyte(j));
break;
}
}
}
break;
case 3: /* signed words compare */
for (i=0;i<8;i++) {
for (j=0;j<8;j++) {
switch (aggregation_operation) {
case 0: /* 'equal' comparison */
case 2:
case 3:
BoolRes[j][i] = (op1.xmm16s(i) == op2.xmm16s(j));
break;
case 1: /* 'ranges' comparison */
if ((i % 2) == 0)
BoolRes[j][i] = (op1.xmm16s(i) <= op2.xmm16s(j));
else
BoolRes[j][i] = (op1.xmm16s(i) >= op2.xmm16s(j));
break;
}
}
}
break;
}
}
static unsigned find_eos32(Bit32s reg32, Bit8u imm)
{
if (imm & 0x1) { // 8 elements
if (reg32 > 8 || reg32 < -8) return 8;
else return abs(reg32);
}
else { // 16 elements
if (reg32 > 16 || reg32 < -16) return 16;
else return abs(reg32);
}
}
#if BX_SUPPORT_X86_64
static unsigned find_eos64(Bit64s reg64, Bit8u imm)
{
if (imm & 0x1) { // 8 elements
if (reg64 > 8 || reg64 < -8) return 8;
else return abs(reg64);
}
else { // 16 elements
if (reg64 > 16 || reg64 < -16) return 16;
else return abs(reg64);
}
}
#endif
static unsigned find_eos(BxPackedXmmRegister op, Bit8u imm)
{
unsigned i = 0;
if (imm & 0x1) { // 8 elements
for(i=0;i<8;i++)
if (op.xmm16u(i) == 0) break;
}
else { // 16 elements
for(i=0;i<16;i++)
if (op.xmmubyte(i) == 0) break;
}
return i;
}
static bx_bool override_if_data_invalid(bx_bool val, bx_bool i_valid, bx_bool j_valid, Bit8u imm)
{
unsigned aggregation_operation = (imm >> 2) & 3;
switch(aggregation_operation) {
case 0: // 'equal any'
case 1: // 'ranges'
if (! i_valid || ! j_valid) // one of the elements is invalid
return 0;
break;
case 2: // 'equal each'
if (! i_valid) {
if (! j_valid) return 1; // both elements are invalid
else return 0; // only i is invalid
}
else {
if (! j_valid) return 0; // only j is invalid
}
break;
case 3: // 'equal ordered'
if (! i_valid) { // element i is invalid
return 1;
}
else {
if (! j_valid) { // only j is invalid
return 0;
}
}
break;
}
return val;
}
static Bit16u aggregate(bx_bool BoolRes[16][16], unsigned len1, unsigned len2, Bit8u imm)
{
unsigned aggregation_operation = (imm >> 2) & 3;
unsigned num_elements = (imm & 0x1) ? 8 : 16;
unsigned polarity = (imm >> 4) & 3;
unsigned i,j,k;
Bit16u result = 0;
switch(aggregation_operation) {
case 0: // 'equal any'
for(j=0; j<num_elements; j++) {
bx_bool res = 0;
for(i=0; i<num_elements; i++) {
if (override_if_data_invalid(BoolRes[j][i], (i < len1), (j < len2), imm)) {
res = 1;
break;
}
}
if (res)
result |= (1<<j);
}
break;
case 1: // 'ranges'
for(j=0; j<num_elements; j++) {
bx_bool res = 0;
for(i=0; i<num_elements; i+=2) {
if (override_if_data_invalid(BoolRes[j][i], (i < len1), (j < len2), imm) &&
override_if_data_invalid(BoolRes[j][i+1], (i+1 < len1), (j < len2), imm)) {
res = 1;
break;
}
}
if (res)
result |= (1<<j);
}
break;
case 2: // 'equal each'
for(j=0; j<num_elements; j++) {
if (override_if_data_invalid(BoolRes[j][j], (j < len1), (j < len2), imm))
result |= (1<<j);
}
break;
case 3: // 'equal ordered'
for(j=0; j<num_elements; j++) {
bx_bool res = 1;
for (i=0, k=j; (i < num_elements-j) && (k < num_elements); i++, k++) {
if (! override_if_data_invalid(BoolRes[k][i], (i < len1), (k < len2), imm)) {
res = 0;
break;
}
}
if (res)
result |= (1<<j);
}
break;
}
switch(polarity) {
case 0:
case 2:
break; // do nothing
case 1:
result ^= (num_elements == 8) ? 0xFF : 0xFFFF;
break;
case 3:
for (j=0;j<num_elements;j++)
if (j < len2) result ^= (1<<j); // flip the bit
break;
}
return result;
}
#endif // (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
// for 3-byte opcodes
#if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
/* 66 0F 3A 60 */
void BX_CPU_C::PCMPESTRM_VdqWdqIb(bxInstruction_c *i)
{
#if (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
BX_CPU_THIS_PTR prepareSSE();
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
Bit8u imm8 = i->Ib();
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
}
// compare all pairs of Ai, Bj
bx_bool BoolRes[16][16];
compare_strings(BoolRes, op1, op2, imm8);
unsigned len1, len2, num_elements = (imm8 & 0x1) ? 8 : 16;
#if BX_SUPPORT_X86_64
if (i->os64L()) {
len1 = find_eos64(RAX, imm8);
len2 = find_eos64(RDX, imm8);
}
else
#endif
{
len1 = find_eos32(EAX, imm8);
len2 = find_eos32(EDX, imm8);
}
Bit16u result2 = aggregate(BoolRes, len1, len2, imm8);
// As defined by imm8[6], result2 is then either stored to the least
// significant bits of XMM0 (zero extended to 128 bits) or expanded
// into a byte/word-mask and then stored to XMM0
if (imm8 & 0x40) {
if (num_elements == 8) {
for (int index = 0; index < 8; index++)
result.xmm16u(index) = (result2 & (1<<index)) ? 0xffff : 0;
}
else { // num_elements = 16
for (int index = 0; index < 16; index++)
result.xmmubyte(index) = (result2 & (1<<index)) ? 0xff : 0;
}
}
else {
result.xmm64u(1) = 0;
result.xmm64u(0) = (Bit64u) result2;
}
Bit32u flags = 0;
if (result2 != 0) flags |= EFlagsCFMask;
if (len1 < num_elements) flags |= EFlagsSFMask;
if (len2 < num_elements) flags |= EFlagsZFMask;
if (result2 & 0x1)
flags |= EFlagsOFMask;
setEFlagsOSZAPC(flags);
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), result);
#else
BX_INFO(("PCMPESTRM_VdqWdqIb: required SSE4.2, use --enable-sse and --enable-sse-extension options"));
UndefinedOpcode(i);
#endif
}
/* 66 0F 3A 61 */
void BX_CPU_C::PCMPESTRI_VdqWdqIb(bxInstruction_c *i)
{
#if (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
BX_CPU_THIS_PTR prepareSSE();
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
Bit8u imm8 = i->Ib();
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
}
// compare all pairs of Ai, Bj
bx_bool BoolRes[16][16];
compare_strings(BoolRes, op1, op2, imm8);
unsigned len1, len2, num_elements = (imm8 & 0x1) ? 8 : 16;
int index;
#if BX_SUPPORT_X86_64
if (i->os64L()) {
len1 = find_eos64(RAX, imm8);
len2 = find_eos64(RDX, imm8);
}
else
#endif
{
len1 = find_eos32(EAX, imm8);
len2 = find_eos32(EDX, imm8);
}
Bit16u result2 = aggregate(BoolRes, len1, len2, imm8);
// The index of the first (or last, according to imm8[6]) set bit of result2
// is returned to ECX. If no bits are set in IntRes2, ECX is set to 16 (8)
if (imm8 & 0x40) {
// The index returned to ECX is of the MSB in result2
for (index=num_elements-1; index>=0; index--)
if (result2 & (1<<index)) break;
if (index < 0) index = num_elements;
}
else {
// The index returned to ECX is of the LSB in result2
for (index=0; index<(int)num_elements; index++)
if (result2 & (1<<index)) break;
}
RCX = index;
Bit32u flags = 0;
if (result2 != 0) flags |= EFlagsCFMask;
if (len1 < num_elements) flags |= EFlagsSFMask;
if (len2 < num_elements) flags |= EFlagsZFMask;
if (result2 & 0x1)
flags |= EFlagsOFMask;
setEFlagsOSZAPC(flags);
#else
BX_INFO(("PCMPESTRI_VdqWdqIb: required SSE4.2, use --enable-sse and --enable-sse-extension options"));
UndefinedOpcode(i);
#endif
}
/* 66 0F 3A 62 */
void BX_CPU_C::PCMPISTRM_VdqWdqIb(bxInstruction_c *i)
{
#if (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
BX_CPU_THIS_PTR prepareSSE();
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
Bit8u imm8 = i->Ib();
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
}
// compare all pairs of Ai, Bj
bx_bool BoolRes[16][16];
compare_strings(BoolRes, op1, op2, imm8);
unsigned num_elements = (imm8 & 0x1) ? 8 : 16;
unsigned len1 = find_eos(op1, imm8);
unsigned len2 = find_eos(op2, imm8);
Bit16u result2 = aggregate(BoolRes, len1, len2, imm8);
// As defined by imm8[6], result2 is then either stored to the least
// significant bits of XMM0 (zero extended to 128 bits) or expanded
// into a byte/word-mask and then stored to XMM0
if (imm8 & 0x40) {
if (num_elements == 8) {
for (int index = 0; index < 8; index++)
result.xmm16u(index) = (result2 & (1<<index)) ? 0xffff : 0;
}
else { // num_elements = 16
for (int index = 0; index < 16; index++)
result.xmmubyte(index) = (result2 & (1<<index)) ? 0xff : 0;
}
}
else {
result.xmm64u(1) = 0;
result.xmm64u(0) = (Bit64u) result2;
}
Bit32u flags = 0;
if (result2 != 0) flags |= EFlagsCFMask;
if (len1 < num_elements) flags |= EFlagsSFMask;
if (len2 < num_elements) flags |= EFlagsZFMask;
if (result2 & 0x1)
flags |= EFlagsOFMask;
setEFlagsOSZAPC(flags);
/* now write result back to destination */
BX_WRITE_XMM_REG(i->nnn(), result);
#else
BX_INFO(("PCMPISTRM_VdqWdqIb: required SSE4.2, use --enable-sse and --enable-sse-extension options"));
UndefinedOpcode(i);
#endif
}
/* 66 0F 3A 63 */
void BX_CPU_C::PCMPISTRI_VdqWdqIb(bxInstruction_c *i)
{
#if (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
BX_CPU_THIS_PTR prepareSSE();
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
Bit8u imm8 = i->Ib();
/* op2 is a register or memory reference */
if (i->modC0()) {
op2 = BX_READ_XMM_REG(i->rm());
}
else {
/* pointer, segment address pair */
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
}
// compare all pairs of Ai, Bj
bx_bool BoolRes[16][16];
compare_strings(BoolRes, op1, op2, imm8);
unsigned num_elements = (imm8 & 0x1) ? 8 : 16;
int index;
unsigned len1 = find_eos(op1, imm8);
unsigned len2 = find_eos(op2, imm8);
Bit16u result2 = aggregate(BoolRes, len1, len2, imm8);
// The index of the first (or last, according to imm8[6]) set bit of result2
// is returned to ECX. If no bits are set in IntRes2, ECX is set to 16 (8)
if (imm8 & 0x40) {
// The index returned to ECX is of the MSB in result2
for (index=num_elements-1; index>=0; index--)
if (result2 & (1<<index)) break;
if (index < 0) index = num_elements;
}
else {
// The index returned to ECX is of the LSB in result2
for (index=0; index<(int)num_elements; index++)
if (result2 & (1<<index)) break;
}
RCX = index;
Bit32u flags = 0;
if (result2 != 0) flags |= EFlagsCFMask;
if (len1 < num_elements) flags |= EFlagsSFMask;
if (len2 < num_elements) flags |= EFlagsZFMask;
if (result2 & 0x1)
flags |= EFlagsOFMask;
setEFlagsOSZAPC(flags);
#else
BX_INFO(("PCMPISTRI_VdqWdqIb: required SSE4.2, use --enable-sse and --enable-sse-extension options"));
UndefinedOpcode(i);
#endif
}
#endif // (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)