Added SSE4_2 instructions emulation
This commit is contained in:
parent
de72d9141f
commit
dbb91069f4
@ -68,6 +68,7 @@ OBJS = \
|
||||
sse_move.o \
|
||||
sse_pfp.o \
|
||||
sse_rcp.o \
|
||||
sse_string.o \
|
||||
soft_int.o \
|
||||
io_pro.o \
|
||||
$(APIC_OBJS) \
|
||||
@ -735,6 +736,15 @@ sse_rcp.o: sse_rcp.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h \
|
||||
crregs.h descriptor.h icache.h apic.h ../cpu/i387.h ../fpu/softfloat.h \
|
||||
../config.h ../fpu/tag_w.h ../fpu/status_w.h ../fpu/control_w.h \
|
||||
../cpu/xmm.h ../fpu/softfloat-specialize.h ../fpu/softfloat.h
|
||||
sse_string.o: sse_string.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h \
|
||||
../bx_debug/debug.h ../config.h ../osdep.h ../bxversion.h \
|
||||
../gui/siminterface.h ../memory/memory.h ../pc_system.h ../plugin.h \
|
||||
../extplugin.h ../gui/gui.h ../gui/textconfig.h ../config.h \
|
||||
../gui/keymap.h ../instrument/stubs/instrument.h cpu.h \
|
||||
../cpu/lazy_flags.h ../cpu/hostasm.h ../disasm/disasm.h ../config.h \
|
||||
crregs.h descriptor.h icache.h apic.h ../cpu/i387.h ../fpu/softfloat.h \
|
||||
../config.h ../fpu/tag_w.h ../fpu/status_w.h ../fpu/control_w.h \
|
||||
../cpu/xmm.h ../fpu/softfloat-specialize.h ../fpu/softfloat.h
|
||||
stack16.o: stack16.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h \
|
||||
../bx_debug/debug.h ../config.h ../osdep.h ../bxversion.h \
|
||||
../gui/siminterface.h ../memory/memory.h ../pc_system.h ../plugin.h \
|
||||
|
179
bochs/cpu/bit.cc
179
bochs/cpu/bit.cc
@ -1,5 +1,5 @@
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// $Id: bit.cc,v 1.32 2007-09-19 19:38:08 sshwarts Exp $
|
||||
// $Id: bit.cc,v 1.33 2007-10-01 19:59:35 sshwarts Exp $
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (C) 2001 MandrakeSoft S.A.
|
||||
@ -1276,9 +1276,10 @@ void BX_CPU_C::BTR_EqIb(bxInstruction_c *i)
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 0F B8 */
|
||||
void BX_CPU_C::POPCNT_GwEw(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_SUPPORT_POPCNT
|
||||
#if BX_SUPPORT_POPCNT || (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
|
||||
Bit16u op1_16, op2_16;
|
||||
|
||||
/* op2_16 is a register or memory reference */
|
||||
@ -1296,8 +1297,8 @@ void BX_CPU_C::POPCNT_GwEw(bxInstruction_c *i)
|
||||
op2_16 >>= 1;
|
||||
}
|
||||
|
||||
Bit32u flags32 = op1_16 ? 0 : EFlagsZFMask;
|
||||
setEFlagsOSZAPC(flags32);
|
||||
Bit32u flags = op1_16 ? 0 : EFlagsZFMask;
|
||||
setEFlagsOSZAPC(flags);
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_16BIT_REG(i->nnn(), op1_16);
|
||||
@ -1307,9 +1308,10 @@ void BX_CPU_C::POPCNT_GwEw(bxInstruction_c *i)
|
||||
#endif
|
||||
}
|
||||
|
||||
/* 0F B8 */
|
||||
void BX_CPU_C::POPCNT_GdEd(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_SUPPORT_POPCNT
|
||||
#if BX_SUPPORT_POPCNT || (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
|
||||
Bit32u op1_32, op2_32;
|
||||
|
||||
/* op2_16 is a register or memory reference */
|
||||
@ -1327,8 +1329,8 @@ void BX_CPU_C::POPCNT_GdEd(bxInstruction_c *i)
|
||||
op2_32 >>= 1;
|
||||
}
|
||||
|
||||
Bit32u flags32 = op1_32 ? 0 : EFlagsZFMask;
|
||||
setEFlagsOSZAPC(flags32);
|
||||
Bit32u flags = op1_32 ? 0 : EFlagsZFMask;
|
||||
setEFlagsOSZAPC(flags);
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_32BIT_REGZ(i->nnn(), op1_32);
|
||||
@ -1339,9 +1341,10 @@ void BX_CPU_C::POPCNT_GdEd(bxInstruction_c *i)
|
||||
}
|
||||
|
||||
#if BX_SUPPORT_X86_64
|
||||
/* 0F B8 */
|
||||
void BX_CPU_C::POPCNT_GqEq(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_SUPPORT_POPCNT
|
||||
#if BX_SUPPORT_POPCNT || (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
|
||||
Bit64u op1_64, op2_64;
|
||||
|
||||
/* op2_16 is a register or memory reference */
|
||||
@ -1359,8 +1362,8 @@ void BX_CPU_C::POPCNT_GqEq(bxInstruction_c *i)
|
||||
op2_64 >>= 1;
|
||||
}
|
||||
|
||||
Bit32u flags32 = op1_64 ? 0 : EFlagsZFMask;
|
||||
setEFlagsOSZAPC(flags32);
|
||||
Bit32u flags = op1_64 ? 0 : EFlagsZFMask;
|
||||
setEFlagsOSZAPC(flags);
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_64BIT_REG(i->nnn(), op1_64);
|
||||
@ -1369,6 +1372,158 @@ void BX_CPU_C::POPCNT_GqEq(bxInstruction_c *i)
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#endif // BX_SUPPORT_X86_64
|
||||
|
||||
#endif /* BX_CPU_LEVEL >= 3 */
|
||||
// 3-byte opcodes
|
||||
#if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
|
||||
|
||||
#define CRC32_POLYNOMIAL BX_CONST64(0x11edc6f41)
|
||||
|
||||
#if (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
|
||||
// primitives for CRC32 usage
|
||||
static Bit8u BitReflect8(Bit8u val8)
|
||||
{
|
||||
return ((val8 & 0x80) >> 7) |
|
||||
((val8 & 0x40) >> 5) |
|
||||
((val8 & 0x20) >> 3) |
|
||||
((val8 & 0x10) >> 1) |
|
||||
((val8 & 0x08) << 1) |
|
||||
((val8 & 0x04) << 3) |
|
||||
((val8 & 0x02) << 5) |
|
||||
((val8 & 0x01) << 7);
|
||||
}
|
||||
|
||||
BX_CPP_INLINE Bit16u BitReflect16(Bit16u val16)
|
||||
{
|
||||
return ((Bit16u)(BitReflect8(val16 & 0xff)) << 8) | BitReflect8(val16 >> 8);
|
||||
}
|
||||
|
||||
BX_CPP_INLINE Bit32u BitReflect32(Bit32u val32)
|
||||
{
|
||||
return ((Bit32u)(BitReflect16(val32 & 0xffff)) << 16) | BitReflect16(val32 >> 16);
|
||||
}
|
||||
|
||||
static Bit32u mod2_64bit(Bit64u divisor, Bit64u dividend)
|
||||
{
|
||||
Bit64u remainder = dividend >> 32;
|
||||
|
||||
for (int bitpos=31; bitpos>=0; bitpos--)
|
||||
{
|
||||
// copy one more bit from the dividend
|
||||
remainder = (remainder << 1) | ((dividend >> bitpos) & 1);
|
||||
|
||||
// if MSB is set, then XOR divisor and get new remainder
|
||||
if (((remainder >> 32) & 1) == 1)
|
||||
{
|
||||
remainder ^= divisor;
|
||||
}
|
||||
}
|
||||
|
||||
return remainder;
|
||||
}
|
||||
#endif // (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
|
||||
|
||||
void BX_CPU_C::CRC32_GdEb(bxInstruction_c *i)
|
||||
{
|
||||
#if (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
|
||||
Bit8u op1;
|
||||
|
||||
if (i->modC0()) {
|
||||
op1 = BX_READ_8BIT_REGx(i->rm(),i->extend8bitL());
|
||||
}
|
||||
else {
|
||||
read_virtual_byte(i->seg(), RMAddr(i), &op1);
|
||||
}
|
||||
|
||||
Bit32u op2 = BX_READ_32BIT_REG(i->nnn());
|
||||
op2 = BitReflect32(op2);
|
||||
|
||||
Bit64u tmp1 = ((Bit64u) BitReflect8 (op1)) << 32;
|
||||
Bit64u tmp2 = ((Bit64u) op2) << 8;
|
||||
Bit64u tmp3 = tmp1 ^ tmp2;
|
||||
op2 = mod2_64bit(CRC32_POLYNOMIAL, tmp3);
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_32BIT_REGZ(i->nnn(), BitReflect32(op2));
|
||||
#else
|
||||
BX_INFO(("CRC32_GdEb: required SSE4_2 support, required SSE4.2, use --enable-sse and --enable-sse-extension options"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
|
||||
void BX_CPU_C::CRC32_GdEv(bxInstruction_c *i)
|
||||
{
|
||||
#if (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
|
||||
|
||||
Bit32u op2 = BX_READ_32BIT_REG(i->nnn());
|
||||
op2 = BitReflect32(op2);
|
||||
|
||||
#if BX_SUPPORT_X86_64
|
||||
if (i->os64L()) /* 64 bit operand size */
|
||||
{
|
||||
Bit64u op1;
|
||||
|
||||
if (i->modC0()) {
|
||||
op1 = BX_READ_64BIT_REG(i->rm());
|
||||
}
|
||||
else {
|
||||
read_virtual_qword(i->seg(), RMAddr(i), &op1);
|
||||
}
|
||||
|
||||
Bit64u tmp1 = ((Bit64u) BitReflect32(op1 & 0xffffffff)) << 32;
|
||||
Bit64u tmp2 = ((Bit64u) op2) << 32;
|
||||
Bit64u tmp3 = tmp1 ^ tmp2;
|
||||
op2 = mod2_64bit(CRC32_POLYNOMIAL, tmp3);
|
||||
tmp1 = ((Bit64u) BitReflect32(op1 >> 32)) << 32;
|
||||
tmp2 = ((Bit64u) op2) << 32;
|
||||
tmp3 = tmp1 ^ tmp2;
|
||||
op2 = mod2_64bit(CRC32_POLYNOMIAL, tmp3);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
if (i->os32L()) /* 32 bit operand size */
|
||||
{
|
||||
Bit32u op1;
|
||||
|
||||
if (i->modC0()) {
|
||||
op1 = BX_READ_32BIT_REG(i->rm());
|
||||
}
|
||||
else {
|
||||
read_virtual_dword(i->seg(), RMAddr(i), &op1);
|
||||
}
|
||||
|
||||
Bit64u tmp1 = ((Bit64u) BitReflect32(op1)) << 32;
|
||||
Bit64u tmp2 = ((Bit64u) op2) << 32;
|
||||
Bit64u tmp3 = tmp1 ^ tmp2;
|
||||
op2 = mod2_64bit(CRC32_POLYNOMIAL, tmp3);
|
||||
}
|
||||
else { /* 16 bit operand size */
|
||||
Bit16u op1;
|
||||
|
||||
if (i->modC0()) {
|
||||
op1 = BX_READ_16BIT_REG(i->rm());
|
||||
}
|
||||
else {
|
||||
read_virtual_word(i->seg(), RMAddr(i), &op1);
|
||||
}
|
||||
|
||||
Bit64u tmp1 = ((Bit64u) BitReflect16(op1)) << 32;
|
||||
Bit64u tmp2 = ((Bit64u) op2) << 16;
|
||||
Bit64u tmp3 = tmp1 ^ tmp2;
|
||||
op2 = mod2_64bit(CRC32_POLYNOMIAL, tmp3);
|
||||
}
|
||||
}
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_32BIT_REGZ(i->nnn(), BitReflect32(op2));
|
||||
|
||||
#else
|
||||
BX_INFO(("CRC32_GdEv: required SSE4_2 support, required SSE4.2, use --enable-sse and --enable-sse-extension options"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
|
||||
|
||||
#endif // (BX_CPU_LEVEL >= 3)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// $Id: cpu.h,v 1.328 2007-09-28 19:51:44 sshwarts Exp $
|
||||
// $Id: cpu.h,v 1.329 2007-10-01 19:59:36 sshwarts Exp $
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (C) 2001 MandrakeSoft S.A.
|
||||
@ -1971,7 +1971,7 @@ public: // for now...
|
||||
BX_SMF void MAXPS_VpsWps(bxInstruction_c *i);
|
||||
BX_SMF void MAXSS_VssWss(bxInstruction_c *i);
|
||||
BX_SMF void PSHUFW_PqQqIb(bxInstruction_c *i);
|
||||
BX_SMF void PSHUFLW_VqWqIb(bxInstruction_c *i);
|
||||
BX_SMF void PSHUFLW_VdqWdqIb(bxInstruction_c *i);
|
||||
BX_SMF void CMPPS_VpsWpsIb(bxInstruction_c *i);
|
||||
BX_SMF void CMPSS_VssWssIb(bxInstruction_c *i);
|
||||
BX_SMF void PINSRW_PqEwIb(bxInstruction_c *i);
|
||||
@ -2023,23 +2023,23 @@ public: // for now...
|
||||
BX_SMF void DIVSD_VsdWsd(bxInstruction_c *i);
|
||||
BX_SMF void MAXPD_VpdWpd(bxInstruction_c *i);
|
||||
BX_SMF void MAXSD_VsdWsd(bxInstruction_c *i);
|
||||
BX_SMF void PUNPCKLBW_VdqWq(bxInstruction_c *i);
|
||||
BX_SMF void PUNPCKLWD_VdqWq(bxInstruction_c *i);
|
||||
BX_SMF void UNPCKLPS_VpsWq(bxInstruction_c *i);
|
||||
BX_SMF void PACKSSWB_VdqWq(bxInstruction_c *i);
|
||||
BX_SMF void PCMPGTB_VdqWq(bxInstruction_c *i);
|
||||
BX_SMF void PCMPGTW_VdqWq(bxInstruction_c *i);
|
||||
BX_SMF void PUNPCKLBW_VdqWdq(bxInstruction_c *i);
|
||||
BX_SMF void PUNPCKLWD_VdqWdq(bxInstruction_c *i);
|
||||
BX_SMF void UNPCKLPS_VpsWdq(bxInstruction_c *i);
|
||||
BX_SMF void PACKSSWB_VdqWdq(bxInstruction_c *i);
|
||||
BX_SMF void PCMPGTB_VdqWdq(bxInstruction_c *i);
|
||||
BX_SMF void PCMPGTW_VdqWdq(bxInstruction_c *i);
|
||||
BX_SMF void PCMPGTD_VdqWdq(bxInstruction_c *i);
|
||||
BX_SMF void PACKUSWB_VdqWdq(bxInstruction_c *i);
|
||||
BX_SMF void PUNPCKHBW_VdqWq(bxInstruction_c *i);
|
||||
BX_SMF void PUNPCKHWD_VdqWq(bxInstruction_c *i);
|
||||
BX_SMF void UNPCKHPS_VpsWq(bxInstruction_c *i);
|
||||
BX_SMF void PUNPCKHBW_VdqWdq(bxInstruction_c *i);
|
||||
BX_SMF void PUNPCKHWD_VdqWdq(bxInstruction_c *i);
|
||||
BX_SMF void UNPCKHPS_VpsWdq(bxInstruction_c *i);
|
||||
BX_SMF void PACKSSDW_VdqWdq(bxInstruction_c *i);
|
||||
BX_SMF void PUNPCKLQDQ_VdqWq(bxInstruction_c *i);
|
||||
BX_SMF void PUNPCKHQDQ_VdqWq(bxInstruction_c *i);
|
||||
BX_SMF void PUNPCKLQDQ_VdqWdq(bxInstruction_c *i);
|
||||
BX_SMF void PUNPCKHQDQ_VdqWdq(bxInstruction_c *i);
|
||||
BX_SMF void MOVD_VdqEd(bxInstruction_c *i);
|
||||
BX_SMF void PSHUFD_VdqWdqIb(bxInstruction_c *i);
|
||||
BX_SMF void PSHUFHW_VqWqIb(bxInstruction_c *i);
|
||||
BX_SMF void PSHUFHW_VdqWdqIb(bxInstruction_c *i);
|
||||
BX_SMF void PCMPEQB_VdqWdq(bxInstruction_c *i);
|
||||
BX_SMF void PCMPEQW_VdqWdq(bxInstruction_c *i);
|
||||
BX_SMF void PCMPEQD_VdqWdq(bxInstruction_c *i);
|
||||
@ -2127,6 +2127,7 @@ public: // for now...
|
||||
BX_SMF void LDDQU_VdqMdq(bxInstruction_c *i);
|
||||
/* SSE3 */
|
||||
|
||||
// 3-byte opcodes
|
||||
#if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
|
||||
/* SSE3E */
|
||||
BX_SMF void PSHUFB_PqQq(bxInstruction_c *i);
|
||||
@ -2163,9 +2164,7 @@ public: // for now...
|
||||
BX_SMF void PABSD_VdqWdq(bxInstruction_c *i);
|
||||
BX_SMF void PALIGNR_VdqWdqIb(bxInstruction_c *i);
|
||||
/* SSE3E */
|
||||
#endif
|
||||
|
||||
#if BX_SUPPORT_SSE >= 4
|
||||
/* SSE4.1 */
|
||||
BX_SMF void PBLENDVB_VdqWdq(bxInstruction_c *i);
|
||||
BX_SMF void BLENDVPS_VpsWps(bxInstruction_c *i);
|
||||
@ -2214,6 +2213,16 @@ public: // for now...
|
||||
BX_SMF void DPPD_VpdWpdIb(bxInstruction_c *i);
|
||||
BX_SMF void MPSADBW_VdqWdqIb(bxInstruction_c *i);
|
||||
/* SSE4.1 */
|
||||
|
||||
/* SSE4.2 */
|
||||
BX_SMF void CRC32_GdEb(bxInstruction_c *i);
|
||||
BX_SMF void CRC32_GdEv(bxInstruction_c *i);
|
||||
BX_SMF void PCMPGTQ_VdqWdq(bxInstruction_c *i);
|
||||
BX_SMF void PCMPESTRM_VdqWdqIb(bxInstruction_c *i);
|
||||
BX_SMF void PCMPESTRI_VdqWdqIb(bxInstruction_c *i);
|
||||
BX_SMF void PCMPISTRM_VdqWdqIb(bxInstruction_c *i);
|
||||
BX_SMF void PCMPISTRI_VdqWdqIb(bxInstruction_c *i);
|
||||
/* SSE4.2 */
|
||||
#endif
|
||||
|
||||
/*** Duplicate SSE instructions ***/
|
||||
@ -2229,8 +2238,8 @@ public: // for now...
|
||||
BX_SMF void MOVDQU_WdqVdq(bxInstruction_c *);
|
||||
BX_SMF void MOVDQA_VdqWdq(bxInstruction_c *);
|
||||
BX_SMF void MOVDQA_WdqVdq(bxInstruction_c *);
|
||||
BX_SMF void PUNPCKHDQ_VdqWq(bxInstruction_c *);
|
||||
BX_SMF void PUNPCKLDQ_VdqWq(bxInstruction_c *);
|
||||
BX_SMF void PUNPCKHDQ_VdqWdq(bxInstruction_c *);
|
||||
BX_SMF void PUNPCKLDQ_VdqWdq(bxInstruction_c *);
|
||||
BX_SMF void ANDPD_VpdWpd(bxInstruction_c *);
|
||||
BX_SMF void ANDNPD_VpdWpd(bxInstruction_c *);
|
||||
BX_SMF void ORPD_VpdWpd(bxInstruction_c *);
|
||||
@ -2239,8 +2248,8 @@ public: // for now...
|
||||
BX_SMF void PANDN_VdqWdq(bxInstruction_c *);
|
||||
BX_SMF void POR_VdqWdq(bxInstruction_c *);
|
||||
BX_SMF void PXOR_VdqWdq(bxInstruction_c *);
|
||||
BX_SMF void UNPCKHPD_VpdWq(bxInstruction_c *);
|
||||
BX_SMF void UNPCKLPD_VpdWq(bxInstruction_c *);
|
||||
BX_SMF void UNPCKHPD_VpdWdq(bxInstruction_c *);
|
||||
BX_SMF void UNPCKLPD_VpdWdq(bxInstruction_c *);
|
||||
BX_SMF void MOVLPD_VsdMq(bxInstruction_c *);
|
||||
BX_SMF void MOVLPD_MqVsd(bxInstruction_c *);
|
||||
BX_SMF void MOVHPD_VsdMq(bxInstruction_c *);
|
||||
@ -2271,8 +2280,8 @@ public: // for now...
|
||||
#define MOVDQA_VdqWdq /* 66 0f 6f */ SSE2_ALIAS(MOVAPS_VpsWps) /* 0f 28 */
|
||||
#define MOVDQA_WdqVdq /* 66 0f 7f */ SSE2_ALIAS(MOVAPS_WpsVps) /* 0f 29 */
|
||||
|
||||
#define PUNPCKLDQ_VdqWq /* 66 0f 62 */ SSE2_ALIAS(UNPCKLPS_VpsWq) /* 0f 14 */
|
||||
#define PUNPCKHDQ_VdqWq /* 66 0f 6a */ SSE2_ALIAS(UNPCKHPS_VpsWq) /* 0f 15 */
|
||||
#define PUNPCKLDQ_VdqWdq /* 66 0f 62 */ SSE2_ALIAS(UNPCKLPS_VpsWdq) /* 0f 14 */
|
||||
#define PUNPCKHDQ_VdqWdq /* 66 0f 6a */ SSE2_ALIAS(UNPCKHPS_VpsWdq) /* 0f 15 */
|
||||
|
||||
#define PAND_VdqWdq /* 66 0f db */ SSE2_ALIAS(ANDPS_VpsWps) /* 0f 54 */
|
||||
#define PANDN_VdqWdq /* 66 0f df */ SSE2_ALIAS(ANDNPS_VpsWps) /* 0f 55 */
|
||||
@ -2292,8 +2301,8 @@ public: // for now...
|
||||
#define MOVNTPD_MpdVpd /* 66 0f 2b */ SSE2_ALIAS(MOVNTPS_MpsVps) /* 0f 2b */
|
||||
#define MOVNTDQ_MdqVdq /* 66 0f e7 */ SSE2_ALIAS(MOVNTPS_MpsVps) /* 0f 2b */
|
||||
|
||||
#define UNPCKLPD_VpdWq /* 66 0f 14 */ PUNPCKLQDQ_VdqWq /* 66 0f 6c */
|
||||
#define UNPCKHPD_VpdWq /* 66 0f 15 */ PUNPCKHQDQ_VdqWq /* 66 0f 6d */
|
||||
#define UNPCKLPD_VpdWdq /* 66 0f 14 */ PUNPCKLQDQ_VdqWdq /* 66 0f 6c */
|
||||
#define UNPCKHPD_VpdWdq /* 66 0f 15 */ PUNPCKHQDQ_VdqWdq /* 66 0f 6d */
|
||||
|
||||
#define MOVNTDQA_VdqMdq /* 66 0f 38 2a */ SSE4_ALIAS(LDDQU_VdqMdq) /* f2 0f f0 */
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// $Id: cpuid.cc,v 1.48 2007-09-27 16:11:32 sshwarts Exp $
|
||||
// $Id: cpuid.cc,v 1.49 2007-10-01 19:59:36 sshwarts Exp $
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (C) 2001 MandrakeSoft S.A.
|
||||
@ -160,7 +160,7 @@ Bit32u BX_CPU_C::get_extended_cpuid_features()
|
||||
features |= (1<<19); // support SSE4
|
||||
#endif
|
||||
|
||||
#if BX_SUPPORT_POPCNT
|
||||
#if BX_SUPPORT_POPCNT || (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
|
||||
features |= (1<<23); // support POPCNT instruction
|
||||
#endif
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// $Id: fetchdecode.h,v 1.31 2007-09-19 19:38:09 sshwarts Exp $
|
||||
// $Id: fetchdecode.h,v 1.32 2007-10-01 19:59:36 sshwarts Exp $
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2005 Stanislav Shwartsman
|
||||
@ -1049,15 +1049,15 @@ static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f13[4] = {
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f14[4] = {
|
||||
/* -- */ { 0, &BX_CPU_C::UNPCKLPS_VpsWq },
|
||||
/* 66 */ { 0, &BX_CPU_C::UNPCKLPD_VpdWq },
|
||||
/* -- */ { 0, &BX_CPU_C::UNPCKLPS_VpsWdq },
|
||||
/* 66 */ { 0, &BX_CPU_C::UNPCKLPD_VpdWdq },
|
||||
/* F2 */ { 0, &BX_CPU_C::BxError },
|
||||
/* F3 */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f15[4] = {
|
||||
/* -- */ { 0, &BX_CPU_C::UNPCKHPS_VpsWq },
|
||||
/* 66 */ { 0, &BX_CPU_C::UNPCKHPD_VpdWq },
|
||||
/* -- */ { 0, &BX_CPU_C::UNPCKHPS_VpsWdq },
|
||||
/* 66 */ { 0, &BX_CPU_C::UNPCKHPD_VpdWdq },
|
||||
/* F2 */ { 0, &BX_CPU_C::BxError },
|
||||
/* F3 */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
@ -1246,42 +1246,42 @@ static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f5f[4] = {
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f60[4] = {
|
||||
/* -- */ { 0, &BX_CPU_C::PUNPCKLBW_PqQd },
|
||||
/* 66 */ { 0, &BX_CPU_C::PUNPCKLBW_VdqWq },
|
||||
/* 66 */ { 0, &BX_CPU_C::PUNPCKLBW_VdqWdq },
|
||||
/* F2 */ { 0, &BX_CPU_C::BxError },
|
||||
/* F3 */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f61[4] = {
|
||||
/* -- */ { 0, &BX_CPU_C::PUNPCKLWD_PqQd },
|
||||
/* 66 */ { 0, &BX_CPU_C::PUNPCKLWD_VdqWq },
|
||||
/* 66 */ { 0, &BX_CPU_C::PUNPCKLWD_VdqWdq },
|
||||
/* F2 */ { 0, &BX_CPU_C::BxError },
|
||||
/* F3 */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f62[4] = {
|
||||
/* -- */ { 0, &BX_CPU_C::PUNPCKLDQ_PqQd },
|
||||
/* 66 */ { 0, &BX_CPU_C::PUNPCKLDQ_VdqWq },
|
||||
/* 66 */ { 0, &BX_CPU_C::PUNPCKLDQ_VdqWdq },
|
||||
/* F2 */ { 0, &BX_CPU_C::BxError },
|
||||
/* F3 */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f63[4] = {
|
||||
/* -- */ { 0, &BX_CPU_C::PACKSSWB_PqQq },
|
||||
/* 66 */ { 0, &BX_CPU_C::PACKSSWB_VdqWq },
|
||||
/* 66 */ { 0, &BX_CPU_C::PACKSSWB_VdqWdq },
|
||||
/* F2 */ { 0, &BX_CPU_C::BxError },
|
||||
/* F3 */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f64[4] = {
|
||||
/* -- */ { 0, &BX_CPU_C::PCMPGTB_PqQq },
|
||||
/* 66 */ { 0, &BX_CPU_C::PCMPGTB_VdqWq },
|
||||
/* 66 */ { 0, &BX_CPU_C::PCMPGTB_VdqWdq },
|
||||
/* F2 */ { 0, &BX_CPU_C::BxError },
|
||||
/* F3 */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f65[4] = {
|
||||
/* -- */ { 0, &BX_CPU_C::PCMPGTW_PqQq },
|
||||
/* 66 */ { 0, &BX_CPU_C::PCMPGTW_VdqWq },
|
||||
/* 66 */ { 0, &BX_CPU_C::PCMPGTW_VdqWdq },
|
||||
/* F2 */ { 0, &BX_CPU_C::BxError },
|
||||
/* F3 */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
@ -1302,21 +1302,21 @@ static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f67[4] = {
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f68[4] = {
|
||||
/* -- */ { 0, &BX_CPU_C::PUNPCKHBW_PqQq },
|
||||
/* 66 */ { 0, &BX_CPU_C::PUNPCKHBW_VdqWq },
|
||||
/* 66 */ { 0, &BX_CPU_C::PUNPCKHBW_VdqWdq },
|
||||
/* F2 */ { 0, &BX_CPU_C::BxError },
|
||||
/* F3 */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f69[4] = {
|
||||
/* -- */ { 0, &BX_CPU_C::PUNPCKHWD_PqQq },
|
||||
/* 66 */ { 0, &BX_CPU_C::PUNPCKHWD_VdqWq },
|
||||
/* 66 */ { 0, &BX_CPU_C::PUNPCKHWD_VdqWdq },
|
||||
/* F2 */ { 0, &BX_CPU_C::BxError },
|
||||
/* F3 */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f6a[4] = {
|
||||
/* -- */ { 0, &BX_CPU_C::PUNPCKHDQ_PqQq },
|
||||
/* 66 */ { 0, &BX_CPU_C::PUNPCKHDQ_VdqWq },
|
||||
/* 66 */ { 0, &BX_CPU_C::PUNPCKHDQ_VdqWdq },
|
||||
/* F2 */ { 0, &BX_CPU_C::BxError },
|
||||
/* F3 */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
@ -1330,14 +1330,14 @@ static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f6b[4] = {
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f6c[4] = {
|
||||
/* -- */ { 0, &BX_CPU_C::BxError },
|
||||
/* 66 */ { 0, &BX_CPU_C::PUNPCKLQDQ_VdqWq },
|
||||
/* 66 */ { 0, &BX_CPU_C::PUNPCKLQDQ_VdqWdq },
|
||||
/* F2 */ { 0, &BX_CPU_C::BxError },
|
||||
/* F3 */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f6d[4] = {
|
||||
/* -- */ { 0, &BX_CPU_C::BxError },
|
||||
/* 66 */ { 0, &BX_CPU_C::PUNPCKHQDQ_VdqWq },
|
||||
/* 66 */ { 0, &BX_CPU_C::PUNPCKHQDQ_VdqWdq },
|
||||
/* F2 */ { 0, &BX_CPU_C::BxError },
|
||||
/* F3 */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
@ -1359,8 +1359,8 @@ static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f6f[4] = {
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f70[4] = {
|
||||
/* -- */ { 0, &BX_CPU_C::PSHUFW_PqQqIb },
|
||||
/* 66 */ { 0, &BX_CPU_C::PSHUFD_VdqWdqIb },
|
||||
/* F2 */ { 0, &BX_CPU_C::PSHUFHW_VqWqIb },
|
||||
/* F3 */ { 0, &BX_CPU_C::PSHUFLW_VqWqIb }
|
||||
/* F2 */ { 0, &BX_CPU_C::PSHUFHW_VdqWdqIb },
|
||||
/* F3 */ { 0, &BX_CPU_C::PSHUFLW_VdqWdqIb }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f74[4] = {
|
||||
@ -2116,6 +2116,13 @@ static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f3835[4] = {
|
||||
/* F3 */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f3837[4] = {
|
||||
/* -- */ { 0, &BX_CPU_C::BxError },
|
||||
/* 66 */ { 0, &BX_CPU_C::PCMPGTQ_VdqWdq },
|
||||
/* F2 */ { 0, &BX_CPU_C::BxError },
|
||||
/* F3 */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f3838[4] = {
|
||||
/* -- */ { 0, &BX_CPU_C::BxError },
|
||||
/* 66 */ { 0, &BX_CPU_C::PMINSB_VdqWdq },
|
||||
@ -2186,6 +2193,20 @@ static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f3841[4] = {
|
||||
/* F3 */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f38f0[4] = {
|
||||
/* -- */ { 0, &BX_CPU_C::BxError },
|
||||
/* 66 */ { 0, &BX_CPU_C::BxError },
|
||||
/* F2 */ { 0, &BX_CPU_C::CRC32_GdEb },
|
||||
/* F3 */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f38f1[4] = {
|
||||
/* -- */ { 0, &BX_CPU_C::BxError },
|
||||
/* 66 */ { 0, &BX_CPU_C::BxError },
|
||||
/* F2 */ { 0, &BX_CPU_C::CRC32_GdEv },
|
||||
/* F3 */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f3a08[4] = {
|
||||
/* -- */ { 0, &BX_CPU_C::BxError },
|
||||
/* 66 */ { 0, &BX_CPU_C::ROUNDPS_VpsWpsIb },
|
||||
@ -2312,6 +2333,34 @@ static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f3a42[4] = {
|
||||
/* F3 */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f3a60[4] = {
|
||||
/* -- */ { 0, &BX_CPU_C::BxError },
|
||||
/* 66 */ { 0, &BX_CPU_C::PCMPESTRM_VdqWdqIb },
|
||||
/* F2 */ { 0, &BX_CPU_C::BxError },
|
||||
/* F3 */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f3a61[4] = {
|
||||
/* -- */ { 0, &BX_CPU_C::BxError },
|
||||
/* 66 */ { 0, &BX_CPU_C::PCMPESTRI_VdqWdqIb },
|
||||
/* F2 */ { 0, &BX_CPU_C::BxError },
|
||||
/* F3 */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f3a62[4] = {
|
||||
/* -- */ { 0, &BX_CPU_C::BxError },
|
||||
/* 66 */ { 0, &BX_CPU_C::PCMPISTRM_VdqWdqIb },
|
||||
/* F2 */ { 0, &BX_CPU_C::BxError },
|
||||
/* F3 */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f3a63[4] = {
|
||||
/* -- */ { 0, &BX_CPU_C::BxError },
|
||||
/* 66 */ { 0, &BX_CPU_C::PCMPISTRI_VdqWdqIb },
|
||||
/* F2 */ { 0, &BX_CPU_C::BxError },
|
||||
/* F3 */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
|
||||
/* ************************************************************************ */
|
||||
/* 3-byte opcode table (Table A-4, 0F 38) */
|
||||
|
||||
@ -2380,7 +2429,7 @@ static const BxOpcodeInfo_t BxOpcode3ByteOp0f383x[16] = {
|
||||
/* 04 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3834 },
|
||||
/* 05 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3835 },
|
||||
/* 06 */ { 0, &BX_CPU_C::BxError },
|
||||
/* 07 */ { 0, &BX_CPU_C::BxError },
|
||||
/* 07 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3837 },
|
||||
/* 08 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3838 },
|
||||
/* 09 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3839 },
|
||||
/* 0A */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f383a },
|
||||
@ -2410,6 +2459,25 @@ static const BxOpcodeInfo_t BxOpcode3ByteOp0f384x[16] = {
|
||||
/* 0F */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcode3ByteOp0f38fx[16] = {
|
||||
/* 00 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f38f0 },
|
||||
/* 01 */ { BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f38f1 },
|
||||
/* 02 */ { 0, &BX_CPU_C::BxError },
|
||||
/* 03 */ { 0, &BX_CPU_C::BxError },
|
||||
/* 04 */ { 0, &BX_CPU_C::BxError },
|
||||
/* 05 */ { 0, &BX_CPU_C::BxError },
|
||||
/* 06 */ { 0, &BX_CPU_C::BxError },
|
||||
/* 07 */ { 0, &BX_CPU_C::BxError },
|
||||
/* 08 */ { 0, &BX_CPU_C::BxError },
|
||||
/* 09 */ { 0, &BX_CPU_C::BxError },
|
||||
/* 0A */ { 0, &BX_CPU_C::BxError },
|
||||
/* 0B */ { 0, &BX_CPU_C::BxError },
|
||||
/* 0C */ { 0, &BX_CPU_C::BxError },
|
||||
/* 0D */ { 0, &BX_CPU_C::BxError },
|
||||
/* 0E */ { 0, &BX_CPU_C::BxError },
|
||||
/* 0F */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcode3ByteTableA4[16] = {
|
||||
/* 00 */ { Bx3ByteOpIndex, NULL, BxOpcode3ByteOp0f380x },
|
||||
/* 01 */ { Bx3ByteOpIndex, NULL, BxOpcode3ByteOp0f381x },
|
||||
@ -2426,7 +2494,7 @@ static const BxOpcodeInfo_t BxOpcode3ByteTableA4[16] = {
|
||||
/* 0C */ { 0, &BX_CPU_C::BxError },
|
||||
/* 0D */ { 0, &BX_CPU_C::BxError },
|
||||
/* 0E */ { 0, &BX_CPU_C::BxError },
|
||||
/* 0F */ { 0, &BX_CPU_C::BxError }
|
||||
/* 0F */ { Bx3ByteOpIndex, NULL, BxOpcode3ByteOp0f38fx }
|
||||
};
|
||||
|
||||
/* ************************************************************************ */
|
||||
@ -2508,6 +2576,25 @@ static const BxOpcodeInfo_t BxOpcode3ByteOp0f3a4x[16] = {
|
||||
/* 0F */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcode3ByteOp0f3a6x[16] = {
|
||||
/* 00 */ { BxImmediate_Ib | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3a60 },
|
||||
/* 01 */ { BxImmediate_Ib | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3a61 },
|
||||
/* 02 */ { BxImmediate_Ib | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3a62 },
|
||||
/* 03 */ { BxImmediate_Ib | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f3a63 },
|
||||
/* 04 */ { 0, &BX_CPU_C::BxError },
|
||||
/* 05 */ { 0, &BX_CPU_C::BxError },
|
||||
/* 06 */ { 0, &BX_CPU_C::BxError },
|
||||
/* 07 */ { 0, &BX_CPU_C::BxError },
|
||||
/* 08 */ { 0, &BX_CPU_C::BxError },
|
||||
/* 09 */ { 0, &BX_CPU_C::BxError },
|
||||
/* 0A */ { 0, &BX_CPU_C::BxError },
|
||||
/* 0B */ { 0, &BX_CPU_C::BxError },
|
||||
/* 0C */ { 0, &BX_CPU_C::BxError },
|
||||
/* 0D */ { 0, &BX_CPU_C::BxError },
|
||||
/* 0E */ { 0, &BX_CPU_C::BxError },
|
||||
/* 0F */ { 0, &BX_CPU_C::BxError }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcode3ByteTableA5[16] = {
|
||||
/* 00 */ { Bx3ByteOpIndex, NULL, BxOpcode3ByteOp0f3a0x },
|
||||
/* 01 */ { Bx3ByteOpIndex, NULL, BxOpcode3ByteOp0f3a1x },
|
||||
@ -2515,7 +2602,7 @@ static const BxOpcodeInfo_t BxOpcode3ByteTableA5[16] = {
|
||||
/* 03 */ { 0, &BX_CPU_C::BxError },
|
||||
/* 04 */ { Bx3ByteOpIndex, NULL, BxOpcode3ByteOp0f3a4x },
|
||||
/* 05 */ { 0, &BX_CPU_C::BxError },
|
||||
/* 06 */ { 0, &BX_CPU_C::BxError },
|
||||
/* 06 */ { Bx3ByteOpIndex, NULL, BxOpcode3ByteOp0f3a6x },
|
||||
/* 07 */ { 0, &BX_CPU_C::BxError },
|
||||
/* 08 */ { 0, &BX_CPU_C::BxError },
|
||||
/* 09 */ { 0, &BX_CPU_C::BxError },
|
||||
|
@ -1,5 +1,5 @@
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// $Id: protect_ctrl.cc,v 1.60 2007-09-30 18:47:41 sshwarts Exp $
|
||||
// $Id: protect_ctrl.cc,v 1.61 2007-10-01 19:59:36 sshwarts Exp $
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (C) 2001 MandrakeSoft S.A.
|
||||
@ -77,7 +77,6 @@ void BX_CPU_C::LAR_GvEw(bxInstruction_c *i)
|
||||
if (real_mode() || v8086_mode()) {
|
||||
BX_ERROR(("LAR: not recognized in real or virtual-8086 mode"));
|
||||
UndefinedOpcode(i);
|
||||
return;
|
||||
}
|
||||
|
||||
if (i->modC0()) {
|
||||
|
118
bochs/cpu/sse.cc
118
bochs/cpu/sse.cc
@ -1,5 +1,5 @@
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// $Id: sse.cc,v 1.48 2007-09-20 22:55:03 sshwarts Exp $
|
||||
// $Id: sse.cc,v 1.49 2007-10-01 19:59:36 sshwarts Exp $
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2003 Stanislav Shwartsman
|
||||
@ -29,6 +29,7 @@
|
||||
/* SSE Integer Operations (128bit MMX extensions) */
|
||||
/* ********************************************** */
|
||||
|
||||
// for 3-byte opcodes
|
||||
#if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
|
||||
|
||||
/* 66 0F 38 00 */
|
||||
@ -731,6 +732,37 @@ void BX_CPU_C::PACKUSDW_VdqWdq(bxInstruction_c *i)
|
||||
#endif
|
||||
}
|
||||
|
||||
/* 66 0F 38 37 */
|
||||
void BX_CPU_C::PCMPGTQ_VdqWdq(bxInstruction_c *i)
|
||||
{
|
||||
#if (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
|
||||
BX_CPU_THIS_PTR prepareSSE();
|
||||
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
|
||||
|
||||
/* op2 is a register or memory reference */
|
||||
if (i->modC0()) {
|
||||
op2 = BX_READ_XMM_REG(i->rm());
|
||||
}
|
||||
else {
|
||||
/* pointer, segment address pair */
|
||||
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
|
||||
}
|
||||
|
||||
op1.xmm64u(0) = (op1.xmm64u(0) > op2.xmm64u(0)) ?
|
||||
BX_CONST64(0xffffffffffffffff) : 0;
|
||||
|
||||
op1.xmm64u(1) = (op1.xmm64u(1) > op2.xmm64u(1)) ?
|
||||
BX_CONST64(0xffffffffffffffff) : 0;
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), op1);
|
||||
#else
|
||||
BX_INFO(("PCMPGTQ_VdqWdq: required SSE4.2, use --enable-sse and --enable-sse-extension options"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* 66 0F 38 38 */
|
||||
void BX_CPU_C::PMINSB_VdqWdq(bxInstruction_c *i)
|
||||
{
|
||||
@ -1405,7 +1437,7 @@ void BX_CPU_C::MPSADBW_VdqWdqIb(bxInstruction_c *i)
|
||||
#endif // (BX_SUPPORT_SSE >= 4 || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
|
||||
|
||||
/* 66 0F 60 */
|
||||
void BX_CPU_C::PUNPCKLBW_VdqWq(bxInstruction_c *i)
|
||||
void BX_CPU_C::PUNPCKLBW_VdqWdq(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_SUPPORT_SSE >= 2
|
||||
BX_CPU_THIS_PTR prepareSSE();
|
||||
@ -1441,13 +1473,13 @@ void BX_CPU_C::PUNPCKLBW_VdqWq(bxInstruction_c *i)
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
#else
|
||||
BX_INFO(("PUNPCKLBW_VdqWq: required SSE2, use --enable-sse option"));
|
||||
BX_INFO(("PUNPCKLBW_VdqWdq: required SSE2, use --enable-sse option"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* 66 0F 61 */
|
||||
void BX_CPU_C::PUNPCKLWD_VdqWq(bxInstruction_c *i)
|
||||
void BX_CPU_C::PUNPCKLWD_VdqWdq(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_SUPPORT_SSE >= 2
|
||||
BX_CPU_THIS_PTR prepareSSE();
|
||||
@ -1475,14 +1507,14 @@ void BX_CPU_C::PUNPCKLWD_VdqWq(bxInstruction_c *i)
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
#else
|
||||
BX_INFO(("PUNPCKLWD_VdqWq: required SSE2, use --enable-sse option"));
|
||||
BX_INFO(("PUNPCKLWD_VdqWdq: required SSE2, use --enable-sse option"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* UNPCKLPS: 0F 14 */
|
||||
/* PUNPCKLDQ: 66 0F 62 */
|
||||
void BX_CPU_C::UNPCKLPS_VpsWq(bxInstruction_c *i)
|
||||
void BX_CPU_C::UNPCKLPS_VpsWdq(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_SUPPORT_SSE >= 1
|
||||
BX_CPU_THIS_PTR prepareSSE();
|
||||
@ -1506,13 +1538,13 @@ void BX_CPU_C::UNPCKLPS_VpsWq(bxInstruction_c *i)
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
#else
|
||||
BX_INFO(("UNPCKLPS_VpsWq: required SSE, use --enable-sse option"));
|
||||
BX_INFO(("UNPCKLPS_VpsWdq: required SSE, use --enable-sse option"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* 66 0F 63 */
|
||||
void BX_CPU_C::PACKSSWB_VdqWq(bxInstruction_c *i)
|
||||
void BX_CPU_C::PACKSSWB_VdqWdq(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_SUPPORT_SSE >= 2
|
||||
BX_CPU_THIS_PTR prepareSSE();
|
||||
@ -1549,13 +1581,13 @@ void BX_CPU_C::PACKSSWB_VdqWq(bxInstruction_c *i)
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
#else
|
||||
BX_INFO(("PACKSSWB_VdqWq: required SSE2, use --enable-sse option"));
|
||||
BX_INFO(("PACKSSWB_VdqWdq: required SSE2, use --enable-sse option"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* 66 0F 64 */
|
||||
void BX_CPU_C::PCMPGTB_VdqWq(bxInstruction_c *i)
|
||||
void BX_CPU_C::PCMPGTB_VdqWdq(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_SUPPORT_SSE >= 2
|
||||
BX_CPU_THIS_PTR prepareSSE();
|
||||
@ -1572,19 +1604,19 @@ void BX_CPU_C::PCMPGTB_VdqWq(bxInstruction_c *i)
|
||||
}
|
||||
|
||||
for(unsigned j=0; j<16; j++) {
|
||||
op1.xmmsbyte(j) = (op1.xmmsbyte(j) > op2.xmmsbyte(j)) ? 0xff : 0;
|
||||
op1.xmmubyte(j) = (op1.xmmsbyte(j) > op2.xmmsbyte(j)) ? 0xff : 0;
|
||||
}
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), op1);
|
||||
#else
|
||||
BX_INFO(("PCMPGTB_VdqWq: required SSE2, use --enable-sse option"));
|
||||
BX_INFO(("PCMPGTB_VdqWdq: required SSE2, use --enable-sse option"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* 66 0F 65 */
|
||||
void BX_CPU_C::PCMPGTW_VdqWq(bxInstruction_c *i)
|
||||
void BX_CPU_C::PCMPGTW_VdqWdq(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_SUPPORT_SSE >= 2
|
||||
BX_CPU_THIS_PTR prepareSSE();
|
||||
@ -1600,19 +1632,19 @@ void BX_CPU_C::PCMPGTW_VdqWq(bxInstruction_c *i)
|
||||
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
|
||||
}
|
||||
|
||||
op1.xmm16s(0) = (op1.xmm16s(0) > op2.xmm16s(0)) ? 0xffff : 0;
|
||||
op1.xmm16s(1) = (op1.xmm16s(1) > op2.xmm16s(1)) ? 0xffff : 0;
|
||||
op1.xmm16s(2) = (op1.xmm16s(2) > op2.xmm16s(2)) ? 0xffff : 0;
|
||||
op1.xmm16s(3) = (op1.xmm16s(3) > op2.xmm16s(3)) ? 0xffff : 0;
|
||||
op1.xmm16s(4) = (op1.xmm16s(4) > op2.xmm16s(4)) ? 0xffff : 0;
|
||||
op1.xmm16s(5) = (op1.xmm16s(5) > op2.xmm16s(5)) ? 0xffff : 0;
|
||||
op1.xmm16s(6) = (op1.xmm16s(6) > op2.xmm16s(6)) ? 0xffff : 0;
|
||||
op1.xmm16s(7) = (op1.xmm16s(7) > op2.xmm16s(7)) ? 0xffff : 0;
|
||||
op1.xmm16u(0) = (op1.xmm16s(0) > op2.xmm16s(0)) ? 0xffff : 0;
|
||||
op1.xmm16u(1) = (op1.xmm16s(1) > op2.xmm16s(1)) ? 0xffff : 0;
|
||||
op1.xmm16u(2) = (op1.xmm16s(2) > op2.xmm16s(2)) ? 0xffff : 0;
|
||||
op1.xmm16u(3) = (op1.xmm16s(3) > op2.xmm16s(3)) ? 0xffff : 0;
|
||||
op1.xmm16u(4) = (op1.xmm16s(4) > op2.xmm16s(4)) ? 0xffff : 0;
|
||||
op1.xmm16u(5) = (op1.xmm16s(5) > op2.xmm16s(5)) ? 0xffff : 0;
|
||||
op1.xmm16u(6) = (op1.xmm16s(6) > op2.xmm16s(6)) ? 0xffff : 0;
|
||||
op1.xmm16u(7) = (op1.xmm16s(7) > op2.xmm16s(7)) ? 0xffff : 0;
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), op1);
|
||||
#else
|
||||
BX_INFO(("PCMPGTW_VdqWq: required SSE2, use --enable-sse option"));
|
||||
BX_INFO(("PCMPGTW_VdqWdq: required SSE2, use --enable-sse option"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
@ -1634,10 +1666,10 @@ void BX_CPU_C::PCMPGTD_VdqWdq(bxInstruction_c *i)
|
||||
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
|
||||
}
|
||||
|
||||
op1.xmm32s(0) = (op1.xmm32s(0) > op2.xmm32s(0)) ? 0xffffffff : 0;
|
||||
op1.xmm32s(1) = (op1.xmm32s(1) > op2.xmm32s(1)) ? 0xffffffff : 0;
|
||||
op1.xmm32s(2) = (op1.xmm32s(2) > op2.xmm32s(2)) ? 0xffffffff : 0;
|
||||
op1.xmm32s(3) = (op1.xmm32s(3) > op2.xmm32s(3)) ? 0xffffffff : 0;
|
||||
op1.xmm32u(0) = (op1.xmm32s(0) > op2.xmm32s(0)) ? 0xffffffff : 0;
|
||||
op1.xmm32u(1) = (op1.xmm32s(1) > op2.xmm32s(1)) ? 0xffffffff : 0;
|
||||
op1.xmm32u(2) = (op1.xmm32s(2) > op2.xmm32s(2)) ? 0xffffffff : 0;
|
||||
op1.xmm32u(3) = (op1.xmm32s(3) > op2.xmm32s(3)) ? 0xffffffff : 0;
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), op1);
|
||||
@ -1691,7 +1723,7 @@ void BX_CPU_C::PACKUSWB_VdqWdq(bxInstruction_c *i)
|
||||
}
|
||||
|
||||
/* 66 0F 68 */
|
||||
void BX_CPU_C::PUNPCKHBW_VdqWq(bxInstruction_c *i)
|
||||
void BX_CPU_C::PUNPCKHBW_VdqWdq(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_SUPPORT_SSE >= 2
|
||||
BX_CPU_THIS_PTR prepareSSE();
|
||||
@ -1727,13 +1759,13 @@ void BX_CPU_C::PUNPCKHBW_VdqWq(bxInstruction_c *i)
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
#else
|
||||
BX_INFO(("PUNPCKHBW_VdqWq: required SSE2, use --enable-sse option"));
|
||||
BX_INFO(("PUNPCKHBW_VdqWdq: required SSE2, use --enable-sse option"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* 66 0F 69 */
|
||||
void BX_CPU_C::PUNPCKHWD_VdqWq(bxInstruction_c *i)
|
||||
void BX_CPU_C::PUNPCKHWD_VdqWdq(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_SUPPORT_SSE >= 2
|
||||
BX_CPU_THIS_PTR prepareSSE();
|
||||
@ -1761,14 +1793,14 @@ void BX_CPU_C::PUNPCKHWD_VdqWq(bxInstruction_c *i)
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
#else
|
||||
BX_INFO(("PUNPCKHWD_VdqWq: required SSE2, use --enable-sse option"));
|
||||
BX_INFO(("PUNPCKHWD_VdqWdq: required SSE2, use --enable-sse option"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* UNPCKHPS: 0F 15 */
|
||||
/* PUNPCKHDQ: 66 0F 6A */
|
||||
void BX_CPU_C::UNPCKHPS_VpsWq(bxInstruction_c *i)
|
||||
void BX_CPU_C::UNPCKHPS_VpsWdq(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_SUPPORT_SSE >= 1
|
||||
BX_CPU_THIS_PTR prepareSSE();
|
||||
@ -1792,7 +1824,7 @@ void BX_CPU_C::UNPCKHPS_VpsWq(bxInstruction_c *i)
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
#else
|
||||
BX_INFO(("UNPCKHPS_VpsWq: required SSE, use --enable-sse option"));
|
||||
BX_INFO(("UNPCKHPS_VpsWdq: required SSE, use --enable-sse option"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
@ -1833,7 +1865,7 @@ void BX_CPU_C::PACKSSDW_VdqWdq(bxInstruction_c *i)
|
||||
}
|
||||
/* UNPCKLPD: 66 0F 14 */
|
||||
/* PUNPCKLQDQ: 66 0F 6C */
|
||||
void BX_CPU_C::PUNPCKLQDQ_VdqWq(bxInstruction_c *i)
|
||||
void BX_CPU_C::PUNPCKLQDQ_VdqWdq(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_SUPPORT_SSE >= 2
|
||||
BX_CPU_THIS_PTR prepareSSE();
|
||||
@ -1854,14 +1886,14 @@ void BX_CPU_C::PUNPCKLQDQ_VdqWq(bxInstruction_c *i)
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), op1);
|
||||
#else
|
||||
BX_INFO(("PUNPCKLQDQ_VdqWq: required SSE2, use --enable-sse option"));
|
||||
BX_INFO(("PUNPCKLQDQ_VdqWdq: required SSE2, use --enable-sse option"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* UNPCKHPD: 66 0F 15 */
|
||||
/* PUNPCKHQDQ: 66 0F 6D */
|
||||
void BX_CPU_C::PUNPCKHQDQ_VdqWq(bxInstruction_c *i)
|
||||
void BX_CPU_C::PUNPCKHQDQ_VdqWdq(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_SUPPORT_SSE >= 2
|
||||
BX_CPU_THIS_PTR prepareSSE();
|
||||
@ -1883,7 +1915,7 @@ void BX_CPU_C::PUNPCKHQDQ_VdqWq(bxInstruction_c *i)
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
#else
|
||||
BX_INFO(("PUNPCKHQDQ_VdqWq: required SSE2, use --enable-sse option"));
|
||||
BX_INFO(("PUNPCKHQDQ_VdqWdq: required SSE2, use --enable-sse option"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
@ -1903,7 +1935,7 @@ void BX_CPU_C::PSHUFD_VdqWdqIb(bxInstruction_c *i)
|
||||
}
|
||||
else {
|
||||
/* pointer, segment address pair */
|
||||
read_virtual_dqword(i->seg(), RMAddr(i), (Bit8u *) &op);
|
||||
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
|
||||
}
|
||||
|
||||
result.xmm32u(0) = op.xmm32u((order >> 0) & 0x3);
|
||||
@ -1920,7 +1952,7 @@ void BX_CPU_C::PSHUFD_VdqWdqIb(bxInstruction_c *i)
|
||||
}
|
||||
|
||||
/* F2 0F 70 */
|
||||
void BX_CPU_C::PSHUFHW_VqWqIb(bxInstruction_c *i)
|
||||
void BX_CPU_C::PSHUFHW_VdqWdqIb(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_SUPPORT_SSE >= 2
|
||||
BX_CPU_THIS_PTR prepareSSE();
|
||||
@ -1934,7 +1966,7 @@ void BX_CPU_C::PSHUFHW_VqWqIb(bxInstruction_c *i)
|
||||
}
|
||||
else {
|
||||
/* pointer, segment address pair */
|
||||
read_virtual_dqword(i->seg(), RMAddr(i), (Bit8u *) &op);
|
||||
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
|
||||
}
|
||||
|
||||
result.xmm64u(0) = op.xmm64u(0);
|
||||
@ -1946,13 +1978,13 @@ void BX_CPU_C::PSHUFHW_VqWqIb(bxInstruction_c *i)
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
#else
|
||||
BX_INFO(("PSHUFHW_VqWqIb: required SSE2, use --enable-sse option"));
|
||||
BX_INFO(("PSHUFHW_VdqWdqIb: required SSE2, use --enable-sse option"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* F3 0F 70 */
|
||||
void BX_CPU_C::PSHUFLW_VqWqIb(bxInstruction_c *i)
|
||||
void BX_CPU_C::PSHUFLW_VdqWdqIb(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_SUPPORT_SSE >= 2
|
||||
BX_CPU_THIS_PTR prepareSSE();
|
||||
@ -1966,7 +1998,7 @@ void BX_CPU_C::PSHUFLW_VqWqIb(bxInstruction_c *i)
|
||||
}
|
||||
else {
|
||||
/* pointer, segment address pair */
|
||||
read_virtual_dqword(i->seg(), RMAddr(i), (Bit8u *) &op);
|
||||
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
|
||||
}
|
||||
|
||||
result.xmm16u(0) = op.xmm16u((order >> 0) & 0x3);
|
||||
@ -1978,7 +2010,7 @@ void BX_CPU_C::PSHUFLW_VqWqIb(bxInstruction_c *i)
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
#else
|
||||
BX_INFO(("PSHUFLW_VqWqIb: required SSE, use --enable-sse option"));
|
||||
BX_INFO(("PSHUFLW_VdqWdqIb: required SSE, use --enable-sse option"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// $Id: sse_pfp.cc,v 1.34 2007-09-27 16:23:29 sshwarts Exp $
|
||||
// $Id: sse_pfp.cc,v 1.35 2007-10-01 19:59:36 sshwarts Exp $
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2003 Stanislav Shwartsman
|
||||
@ -2972,6 +2972,7 @@ void BX_CPU_C::ADDSUBPS_VpsWps(bxInstruction_c *i)
|
||||
#endif
|
||||
}
|
||||
|
||||
// for 3-byte opcodes
|
||||
#if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
|
||||
|
||||
/* 66 0F 3A 08 */
|
||||
|
548
bochs/cpu/sse_string.cc
Executable file
548
bochs/cpu/sse_string.cc
Executable file
@ -0,0 +1,548 @@
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// $Id: sse_string.cc,v 1.1 2007-10-01 19:59:37 sshwarts Exp $
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2007 Stanislav Shwartsman
|
||||
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
// License as published by the Free Software Foundation; either
|
||||
// version 2 of the License, or (at your option) any later version.
|
||||
//
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
// Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License along with this library; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
//
|
||||
|
||||
|
||||
#define NEED_CPU_REG_SHORTCUTS 1
|
||||
#include "bochs.h"
|
||||
#include "cpu.h"
|
||||
#define LOG_THIS BX_CPU_THIS_PTR
|
||||
|
||||
// Make code more tidy with a few macros.
|
||||
#if BX_SUPPORT_X86_64==0
|
||||
#define RCX ECX
|
||||
#endif
|
||||
|
||||
#if (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
|
||||
|
||||
// Compare all pairs of Ai, Bj according to imm8 control
|
||||
static void compare_strings(bx_bool BoolRes[16][16], BxPackedXmmRegister op1, BxPackedXmmRegister op2, Bit8u imm)
|
||||
{
|
||||
unsigned i, j;
|
||||
unsigned aggregation_operation = (imm >> 2) & 3;
|
||||
|
||||
// All possible comparisons are performed, the individual boolean
|
||||
// results of those comparisons are referred by
|
||||
// BoolRes[op2 element index, op1 element index]
|
||||
|
||||
switch (imm & 3) {
|
||||
case 0: /* unsigned bytes compare */
|
||||
for (i=0;i<16;i++) {
|
||||
for (j=0;j<16;j++) {
|
||||
switch (aggregation_operation) {
|
||||
case 0: /* 'equal' comparison */
|
||||
case 2:
|
||||
case 3:
|
||||
BoolRes[j][i] = (op1.xmmubyte(i) == op2.xmmubyte(j));
|
||||
break;
|
||||
case 1: /* 'ranges' comparison */
|
||||
if ((i % 2) == 0)
|
||||
BoolRes[j][i] = (op1.xmmubyte(i) <= op2.xmmubyte(j));
|
||||
else
|
||||
BoolRes[j][i] = (op1.xmmubyte(i) >= op2.xmmubyte(j));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 1: /* unsigned words compare */
|
||||
for (i=0;i<8;i++) {
|
||||
for (j=0;j<8;j++) {
|
||||
switch (aggregation_operation) {
|
||||
case 0: /* 'equal' comparison */
|
||||
case 2:
|
||||
case 3:
|
||||
BoolRes[j][i] = (op1.xmm16u(i) == op2.xmm16u(j));
|
||||
break;
|
||||
case 1: /* 'ranges' comparison */
|
||||
if ((i % 2) == 0)
|
||||
BoolRes[j][i] = (op1.xmm16u(i) <= op2.xmm16u(j));
|
||||
else
|
||||
BoolRes[j][i] = (op1.xmm16u(i) >= op2.xmm16u(j));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 2: /* signed bytes compare */
|
||||
for (i=0;i<16;i++) {
|
||||
for (j=0;j<16;j++) {
|
||||
switch (aggregation_operation) {
|
||||
case 0: /* 'equal' comparison */
|
||||
case 2:
|
||||
case 3:
|
||||
BoolRes[j][i] = (op1.xmmsbyte(i) == op2.xmmsbyte(j));
|
||||
break;
|
||||
case 1: /* 'ranges' comparison */
|
||||
if ((i % 2) == 0)
|
||||
BoolRes[j][i] = (op1.xmmsbyte(i) <= op2.xmmsbyte(j));
|
||||
else
|
||||
BoolRes[j][i] = (op1.xmmsbyte(i) >= op2.xmmsbyte(j));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 3: /* signed words compare */
|
||||
for (i=0;i<8;i++) {
|
||||
for (j=0;j<8;j++) {
|
||||
switch (aggregation_operation) {
|
||||
case 0: /* 'equal' comparison */
|
||||
case 2:
|
||||
case 3:
|
||||
BoolRes[j][i] = (op1.xmm16s(i) == op2.xmm16s(j));
|
||||
break;
|
||||
case 1: /* 'ranges' comparison */
|
||||
if ((i % 2) == 0)
|
||||
BoolRes[j][i] = (op1.xmm16s(i) <= op2.xmm16s(j));
|
||||
else
|
||||
BoolRes[j][i] = (op1.xmm16s(i) >= op2.xmm16s(j));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned find_eos32(Bit32s reg32, Bit8u imm)
|
||||
{
|
||||
if (imm & 0x1) { // 8 elements
|
||||
if (reg32 > 8 || reg32 < -8) return 8;
|
||||
else return abs(reg32);
|
||||
}
|
||||
else { // 16 elements
|
||||
if (reg32 > 16 || reg32 < -16) return 16;
|
||||
else return abs(reg32);
|
||||
}
|
||||
}
|
||||
|
||||
#if BX_SUPPORT_X86_64
|
||||
static unsigned find_eos64(Bit64s reg64, Bit8u imm)
|
||||
{
|
||||
if (imm & 0x1) { // 8 elements
|
||||
if (reg64 > 8 || reg64 < -8) return 8;
|
||||
else return abs(reg64);
|
||||
}
|
||||
else { // 16 elements
|
||||
if (reg64 > 16 || reg64 < -16) return 16;
|
||||
else return abs(reg64);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static unsigned find_eos(BxPackedXmmRegister op, Bit8u imm)
|
||||
{
|
||||
unsigned i = 0;
|
||||
|
||||
if (imm & 0x1) { // 8 elements
|
||||
for(i=0;i<8;i++)
|
||||
if (op.xmm16u(i) == 0) break;
|
||||
}
|
||||
else { // 16 elements
|
||||
for(i=0;i<16;i++)
|
||||
if (op.xmmubyte(i) == 0) break;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
static bx_bool override_if_data_invalid(bx_bool val, bx_bool i_valid, bx_bool j_valid, Bit8u imm)
|
||||
{
|
||||
unsigned aggregation_operation = (imm >> 2) & 3;
|
||||
|
||||
switch(aggregation_operation) {
|
||||
case 0: // 'equal any'
|
||||
case 1: // 'ranges'
|
||||
if (! i_valid || ! j_valid) // one of the elements is invalid
|
||||
return 0;
|
||||
break;
|
||||
|
||||
case 2: // 'equal each'
|
||||
if (! i_valid) {
|
||||
if (! j_valid) return 1; // both elements are invalid
|
||||
else return 0; // only i is invalid
|
||||
}
|
||||
else {
|
||||
if (! j_valid) return 0; // only j is invalid
|
||||
}
|
||||
break;
|
||||
|
||||
case 3: // 'equal ordered'
|
||||
if (! i_valid) { // element i is invalid
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
if (! j_valid) { // only j is invalid
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static Bit16u aggregate(bx_bool BoolRes[16][16], unsigned len1, unsigned len2, Bit8u imm)
|
||||
{
|
||||
unsigned aggregation_operation = (imm >> 2) & 3;
|
||||
unsigned num_elements = (imm & 0x1) ? 8 : 16;
|
||||
unsigned polarity = (imm >> 4) & 3;
|
||||
unsigned i,j,k;
|
||||
|
||||
Bit16u result = 0;
|
||||
|
||||
switch(aggregation_operation) {
|
||||
case 0: // 'equal any'
|
||||
for(j=0; j<num_elements; j++) {
|
||||
bx_bool res = 0;
|
||||
for(i=0; i<num_elements; i++) {
|
||||
if (override_if_data_invalid(BoolRes[j][i], (i < len1), (j < len2), imm)) {
|
||||
res = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (res)
|
||||
result |= (1<<j);
|
||||
}
|
||||
break;
|
||||
|
||||
case 1: // 'ranges'
|
||||
for(j=0; j<num_elements; j++) {
|
||||
bx_bool res = 0;
|
||||
for(i=0; i<num_elements; i+=2) {
|
||||
if (override_if_data_invalid(BoolRes[j][i], (i < len1), (j < len2), imm) &&
|
||||
override_if_data_invalid(BoolRes[j][i+1], (i+1 < len1), (j < len2), imm)) {
|
||||
res = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (res)
|
||||
result |= (1<<j);
|
||||
}
|
||||
break;
|
||||
|
||||
case 2: // 'equal each'
|
||||
for(j=0; j<num_elements; j++) {
|
||||
if (override_if_data_invalid(BoolRes[j][j], (j < len1), (j < len2), imm))
|
||||
result |= (1<<j);
|
||||
}
|
||||
break;
|
||||
|
||||
case 3: // 'equal ordered'
|
||||
for(j=0; j<num_elements; j++) {
|
||||
bx_bool res = 1;
|
||||
for (i=0, k=j; (i < num_elements-j) && (k < num_elements); i++, k++) {
|
||||
if (! override_if_data_invalid(BoolRes[k][i], (i < len1), (k < len2), imm)) {
|
||||
res = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (res)
|
||||
result |= (1<<j);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
switch(polarity) {
|
||||
case 0:
|
||||
case 2:
|
||||
break; // do nothing
|
||||
|
||||
case 1:
|
||||
result ^= (num_elements == 8) ? 0xFF : 0xFFFF;
|
||||
break;
|
||||
|
||||
case 3:
|
||||
for (j=0;j<num_elements;j++)
|
||||
if (j < len2) result ^= (1<<j); // flip the bit
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
#endif // (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
|
||||
|
||||
// for 3-byte opcodes
|
||||
#if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
|
||||
|
||||
/* 66 0F 3A 60 */
|
||||
void BX_CPU_C::PCMPESTRM_VdqWdqIb(bxInstruction_c *i)
|
||||
{
|
||||
#if (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
|
||||
BX_CPU_THIS_PTR prepareSSE();
|
||||
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
|
||||
Bit8u imm8 = i->Ib();
|
||||
|
||||
/* op2 is a register or memory reference */
|
||||
if (i->modC0()) {
|
||||
op2 = BX_READ_XMM_REG(i->rm());
|
||||
}
|
||||
else {
|
||||
/* pointer, segment address pair */
|
||||
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
|
||||
}
|
||||
|
||||
// compare all pairs of Ai, Bj
|
||||
bx_bool BoolRes[16][16];
|
||||
compare_strings(BoolRes, op1, op2, imm8);
|
||||
unsigned len1, len2, num_elements = (imm8 & 0x1) ? 8 : 16;
|
||||
|
||||
#if BX_SUPPORT_X86_64
|
||||
if (i->os64L()) {
|
||||
len1 = find_eos64(RAX, imm8);
|
||||
len2 = find_eos64(RDX, imm8);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
len1 = find_eos32(EAX, imm8);
|
||||
len2 = find_eos32(EDX, imm8);
|
||||
}
|
||||
Bit16u result2 = aggregate(BoolRes, len1, len2, imm8);
|
||||
|
||||
// As defined by imm8[6], result2 is then either stored to the least
|
||||
// significant bits of XMM0 (zero extended to 128 bits) or expanded
|
||||
// into a byte/word-mask and then stored to XMM0
|
||||
if (imm8 & 0x40) {
|
||||
if (num_elements == 8) {
|
||||
for (int index = 0; index < 8; index++)
|
||||
result.xmm16u(index) = (result2 & (1<<index)) ? 0xffff : 0;
|
||||
}
|
||||
else { // num_elements = 16
|
||||
for (int index = 0; index < 16; index++)
|
||||
result.xmmubyte(index) = (result2 & (1<<index)) ? 0xff : 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
result.xmm64u(1) = 0;
|
||||
result.xmm64u(0) = (Bit64u) result2;
|
||||
}
|
||||
|
||||
Bit32u flags = 0;
|
||||
if (result2 != 0) flags |= EFlagsCFMask;
|
||||
if (len1 < num_elements) flags |= EFlagsSFMask;
|
||||
if (len2 < num_elements) flags |= EFlagsZFMask;
|
||||
if (result2 & 0x1)
|
||||
flags |= EFlagsOFMask;
|
||||
setEFlagsOSZAPC(flags);
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
#else
|
||||
BX_INFO(("PCMPESTRM_VdqWdqIb: required SSE4.2, use --enable-sse and --enable-sse-extension options"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* 66 0F 3A 61 */
|
||||
void BX_CPU_C::PCMPESTRI_VdqWdqIb(bxInstruction_c *i)
|
||||
{
|
||||
#if (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
|
||||
BX_CPU_THIS_PTR prepareSSE();
|
||||
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
|
||||
Bit8u imm8 = i->Ib();
|
||||
|
||||
/* op2 is a register or memory reference */
|
||||
if (i->modC0()) {
|
||||
op2 = BX_READ_XMM_REG(i->rm());
|
||||
}
|
||||
else {
|
||||
/* pointer, segment address pair */
|
||||
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
|
||||
}
|
||||
|
||||
// compare all pairs of Ai, Bj
|
||||
bx_bool BoolRes[16][16];
|
||||
compare_strings(BoolRes, op1, op2, imm8);
|
||||
unsigned len1, len2, num_elements = (imm8 & 0x1) ? 8 : 16;
|
||||
int index;
|
||||
|
||||
#if BX_SUPPORT_X86_64
|
||||
if (i->os64L()) {
|
||||
len1 = find_eos64(RAX, imm8);
|
||||
len2 = find_eos64(RDX, imm8);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
len1 = find_eos32(EAX, imm8);
|
||||
len2 = find_eos32(EDX, imm8);
|
||||
}
|
||||
Bit16u result2 = aggregate(BoolRes, len1, len2, imm8);
|
||||
|
||||
// The index of the first (or last, according to imm8[6]) set bit of result2
|
||||
// is returned to ECX. If no bits are set in IntRes2, ECX is set to 16 (8)
|
||||
if (imm8 & 0x40) {
|
||||
// The index returned to ECX is of the MSB in result2
|
||||
for (index=num_elements-1; index>=0; index--)
|
||||
if (result2 & (1<<index)) break;
|
||||
if (index < 0) index = num_elements;
|
||||
}
|
||||
else {
|
||||
// The index returned to ECX is of the LSB in result2
|
||||
for (index=0; index<(int)num_elements; index++)
|
||||
if (result2 & (1<<index)) break;
|
||||
}
|
||||
RCX = index;
|
||||
|
||||
Bit32u flags = 0;
|
||||
if (result2 != 0) flags |= EFlagsCFMask;
|
||||
if (len1 < num_elements) flags |= EFlagsSFMask;
|
||||
if (len2 < num_elements) flags |= EFlagsZFMask;
|
||||
if (result2 & 0x1)
|
||||
flags |= EFlagsOFMask;
|
||||
setEFlagsOSZAPC(flags);
|
||||
|
||||
#else
|
||||
BX_INFO(("PCMPESTRI_VdqWdqIb: required SSE4.2, use --enable-sse and --enable-sse-extension options"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* 66 0F 3A 62 */
|
||||
void BX_CPU_C::PCMPISTRM_VdqWdqIb(bxInstruction_c *i)
|
||||
{
|
||||
#if (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
|
||||
BX_CPU_THIS_PTR prepareSSE();
|
||||
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result;
|
||||
Bit8u imm8 = i->Ib();
|
||||
|
||||
/* op2 is a register or memory reference */
|
||||
if (i->modC0()) {
|
||||
op2 = BX_READ_XMM_REG(i->rm());
|
||||
}
|
||||
else {
|
||||
/* pointer, segment address pair */
|
||||
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
|
||||
}
|
||||
|
||||
// compare all pairs of Ai, Bj
|
||||
bx_bool BoolRes[16][16];
|
||||
compare_strings(BoolRes, op1, op2, imm8);
|
||||
|
||||
unsigned num_elements = (imm8 & 0x1) ? 8 : 16;
|
||||
unsigned len1 = find_eos(op1, imm8);
|
||||
unsigned len2 = find_eos(op2, imm8);
|
||||
Bit16u result2 = aggregate(BoolRes, len1, len2, imm8);
|
||||
|
||||
// As defined by imm8[6], result2 is then either stored to the least
|
||||
// significant bits of XMM0 (zero extended to 128 bits) or expanded
|
||||
// into a byte/word-mask and then stored to XMM0
|
||||
if (imm8 & 0x40) {
|
||||
if (num_elements == 8) {
|
||||
for (int index = 0; index < 8; index++)
|
||||
result.xmm16u(index) = (result2 & (1<<index)) ? 0xffff : 0;
|
||||
}
|
||||
else { // num_elements = 16
|
||||
for (int index = 0; index < 16; index++)
|
||||
result.xmmubyte(index) = (result2 & (1<<index)) ? 0xff : 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
result.xmm64u(1) = 0;
|
||||
result.xmm64u(0) = (Bit64u) result2;
|
||||
}
|
||||
|
||||
Bit32u flags = 0;
|
||||
if (result2 != 0) flags |= EFlagsCFMask;
|
||||
if (len1 < num_elements) flags |= EFlagsSFMask;
|
||||
if (len2 < num_elements) flags |= EFlagsZFMask;
|
||||
if (result2 & 0x1)
|
||||
flags |= EFlagsOFMask;
|
||||
setEFlagsOSZAPC(flags);
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
#else
|
||||
BX_INFO(("PCMPISTRM_VdqWdqIb: required SSE4.2, use --enable-sse and --enable-sse-extension options"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* 66 0F 3A 63 */
|
||||
void BX_CPU_C::PCMPISTRI_VdqWdqIb(bxInstruction_c *i)
|
||||
{
|
||||
#if (BX_SUPPORT_SSE >= 5) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
|
||||
BX_CPU_THIS_PTR prepareSSE();
|
||||
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2;
|
||||
Bit8u imm8 = i->Ib();
|
||||
|
||||
/* op2 is a register or memory reference */
|
||||
if (i->modC0()) {
|
||||
op2 = BX_READ_XMM_REG(i->rm());
|
||||
}
|
||||
else {
|
||||
/* pointer, segment address pair */
|
||||
readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
|
||||
}
|
||||
|
||||
// compare all pairs of Ai, Bj
|
||||
bx_bool BoolRes[16][16];
|
||||
compare_strings(BoolRes, op1, op2, imm8);
|
||||
unsigned num_elements = (imm8 & 0x1) ? 8 : 16;
|
||||
int index;
|
||||
|
||||
unsigned len1 = find_eos(op1, imm8);
|
||||
unsigned len2 = find_eos(op2, imm8);
|
||||
Bit16u result2 = aggregate(BoolRes, len1, len2, imm8);
|
||||
|
||||
// The index of the first (or last, according to imm8[6]) set bit of result2
|
||||
// is returned to ECX. If no bits are set in IntRes2, ECX is set to 16 (8)
|
||||
if (imm8 & 0x40) {
|
||||
// The index returned to ECX is of the MSB in result2
|
||||
for (index=num_elements-1; index>=0; index--)
|
||||
if (result2 & (1<<index)) break;
|
||||
if (index < 0) index = num_elements;
|
||||
}
|
||||
else {
|
||||
// The index returned to ECX is of the LSB in result2
|
||||
for (index=0; index<(int)num_elements; index++)
|
||||
if (result2 & (1<<index)) break;
|
||||
}
|
||||
RCX = index;
|
||||
|
||||
Bit32u flags = 0;
|
||||
if (result2 != 0) flags |= EFlagsCFMask;
|
||||
if (len1 < num_elements) flags |= EFlagsSFMask;
|
||||
if (len2 < num_elements) flags |= EFlagsZFMask;
|
||||
if (result2 & 0x1)
|
||||
flags |= EFlagsOFMask;
|
||||
setEFlagsOSZAPC(flags);
|
||||
|
||||
#else
|
||||
BX_INFO(("PCMPISTRI_VdqWdqIb: required SSE4.2, use --enable-sse and --enable-sse-extension options"));
|
||||
UndefinedOpcode(i);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
|
Loading…
Reference in New Issue
Block a user