fixes and small optimizations for avx and xop decoding

This commit is contained in:
Stanislav Shwartsman 2013-09-05 18:29:50 +00:00
parent f36364bc65
commit 69f947cef2
8 changed files with 107 additions and 81 deletions

View File

@ -2114,6 +2114,7 @@ public: // for now...
BX_SMF BX_INSF_TYPE LOAD_Wb(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE LOAD_Wb(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
#if BX_SUPPORT_AVX #if BX_SUPPORT_AVX
BX_SMF BX_INSF_TYPE LOAD_Vector(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE LOAD_Vector(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE LOAD_Half_Vector(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
#endif #endif
#if BX_SUPPORT_FPU == 0 // if FPU is disabled #if BX_SUPPORT_FPU == 0 // if FPU is disabled
@ -3605,6 +3606,9 @@ public: // for now...
BX_SMF BX_INSF_TYPE CVTTSS2SI_GqWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE CVTTSS2SI_GqWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE CVTSD2SI_GqWsdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE CVTSD2SI_GqWsdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE CVTSS2SI_GqWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE CVTSS2SI_GqWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE PEXTRQ_EqVdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE PEXTRQ_EqVdqIbM(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
#endif // #if BX_SUPPORT_X86_64 #endif // #if BX_SUPPORT_X86_64
BX_SMF BX_INSF_TYPE RDTSCP(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE RDTSCP(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
@ -5070,11 +5074,10 @@ enum {
#define BxGroupN 0x0060 // Group encoding: 0110 #define BxGroupN 0x0060 // Group encoding: 0110
#define BxSplitGroupN 0x0070 // Group encoding: 0111 #define BxSplitGroupN 0x0070 // Group encoding: 0111
#define BxFPEscape 0x0080 // Group encoding: 1000 #define BxFPEscape 0x0080 // Group encoding: 1000
#define Bx3ByteOp 0x0090 // Group encoding: 1001 #define BxOSizeGrp 0x0090 // Group encoding: 1001
#define BxOSizeGrp 0x00A0 // Group encoding: 1010 #define BxSplitVexW 0x00A0 // Group encoding: 1010
#define BxSplitVexW 0x00B0 // Group encoding: 1011 #define BxSplitVexW64 0x00B0 // Group encoding: 1011 - VexW ignored in 32-bit mode
#define BxSplitVexW64 0x00C0 // Group encoding: 1100 - VexW ignored in 32-bit mode #define BxSplitMod11B 0x00C0 // Group encoding: 1100
#define BxSplitMod11B 0x00D0 // Group encoding: 1101
// The BxImmediate2 mask specifies kind of second immediate data // The BxImmediate2 mask specifies kind of second immediate data
// required by instruction. // required by instruction.

View File

@ -513,13 +513,13 @@ static const BxOpcodeInfo_t BxOpcodeInfo32[512*2] = {
/* 0F 36 /w */ { 0, BX_IA_ERROR }, /* 0F 36 /w */ { 0, BX_IA_ERROR },
/* 0F 37 /w */ { 0, BX_IA_GETSEC }, /* 0F 37 /w */ { 0, BX_IA_GETSEC },
#if BX_CPU_LEVEL >= 6 #if BX_CPU_LEVEL >= 6
/* 0F 38 /w */ { Bx3ByteOp, BX_IA_ERROR, BxOpcode3ByteTable0f38 }, // 3-byte escape /* 0F 38 /w */ { 0, BX_IA_ERROR, BxOpcode3ByteTable0f38 }, // 3-byte escape
#else #else
/* 0F 38 /w */ { 0, BX_IA_ERROR }, /* 0F 38 /w */ { 0, BX_IA_ERROR },
#endif #endif
/* 0F 39 /w */ { 0, BX_IA_ERROR }, /* 0F 39 /w */ { 0, BX_IA_ERROR },
#if BX_CPU_LEVEL >= 6 #if BX_CPU_LEVEL >= 6
/* 0F 3A /w */ { Bx3ByteOp | BxImmediate_Ib, BX_IA_ERROR, BxOpcode3ByteTable0f3a }, // 3-byte escape /* 0F 3A /w */ { BxImmediate_Ib, BX_IA_ERROR, BxOpcode3ByteTable0f3a }, // 3-byte escape
#else #else
/* 0F 3A /w */ { 0, BX_IA_ERROR }, /* 0F 3A /w */ { 0, BX_IA_ERROR },
#endif #endif
@ -1058,13 +1058,13 @@ static const BxOpcodeInfo_t BxOpcodeInfo32[512*2] = {
/* 0F 36 /d */ { 0, BX_IA_ERROR }, /* 0F 36 /d */ { 0, BX_IA_ERROR },
/* 0F 37 /d */ { 0, BX_IA_GETSEC }, /* 0F 37 /d */ { 0, BX_IA_GETSEC },
#if BX_CPU_LEVEL >= 6 #if BX_CPU_LEVEL >= 6
/* 0F 38 /d */ { Bx3ByteOp, BX_IA_ERROR, BxOpcode3ByteTable0f38 }, // 3-byte escape /* 0F 38 /d */ { 0, BX_IA_ERROR, BxOpcode3ByteTable0f38 }, // 3-byte escape
#else #else
/* 0F 38 /d */ { 0, BX_IA_ERROR }, /* 0F 38 /d */ { 0, BX_IA_ERROR },
#endif #endif
/* 0F 39 /d */ { 0, BX_IA_ERROR }, /* 0F 39 /d */ { 0, BX_IA_ERROR },
#if BX_CPU_LEVEL >= 6 #if BX_CPU_LEVEL >= 6
/* 0F 3A /d */ { Bx3ByteOp | BxImmediate_Ib, BX_IA_ERROR, BxOpcode3ByteTable0f3a }, // 3-byte escape /* 0F 3A /d */ { BxImmediate_Ib, BX_IA_ERROR, BxOpcode3ByteTable0f3a }, // 3-byte escape
#else #else
/* 0F 3A /d */ { 0, BX_IA_ERROR }, /* 0F 3A /d */ { 0, BX_IA_ERROR },
#endif #endif
@ -1366,7 +1366,7 @@ fetch_b1:
bx_bool has_modrm = 0; bx_bool has_modrm = 0;
#if BX_SUPPORT_AVX #if BX_SUPPORT_AVX
if ((b1 & ~0x01) == 0xc4 && (*iptr & 0xc0) == 0xc0) { if ((b1 & ~0x1) == 0xc4 && (*iptr & 0xc0) == 0xc0) {
// VEX 0xC4 and VEX 0xC5 // VEX 0xC4 and VEX 0xC5
had_vex_xop = 1; had_vex_xop = 1;
if (sse_prefix || ! protected_mode()) if (sse_prefix || ! protected_mode())
@ -1398,18 +1398,19 @@ fetch_b1:
i->setVL(BX_VL128 + vex_l); i->setVL(BX_VL128 + vex_l);
sse_prefix = vex & 0x3; sse_prefix = vex & 0x3;
unsigned opcode_byte = 0;
if (remain != 0) { if (remain != 0) {
remain--; remain--;
b1 = *iptr++; // fetch new b1 opcode_byte = *iptr++;
} }
else else
return(-1); return(-1);
b1 += 256 * vex_opcext; opcode_byte += 256 * vex_opcext;
if (b1 < 256 || b1 >= 1024) goto decode_done; if (opcode_byte < 256 || opcode_byte >= 1024) goto decode_done;
has_modrm = (b1 != 0x177); // if not VZEROUPPER/VZEROALL opcode has_modrm = (opcode_byte != 0x177); // if not VZEROUPPER/VZEROALL opcode
OpcodeInfoPtr = &BxOpcodeTableAVX[(b1-256)*2 + vex_l]; OpcodeInfoPtr = &BxOpcodeTableAVX[(opcode_byte-256)*2 + vex_l];
} }
else if (b1 == 0x8f && (*iptr & 0xc8) == 0xc8) { else if (b1 == 0x8f && (*iptr & 0xc8) == 0xc8) {
// 3 byte XOP prefix // 3 byte XOP prefix
@ -1438,11 +1439,11 @@ fetch_b1:
sse_prefix = vex & 0x3; sse_prefix = vex & 0x3;
if (sse_prefix) goto decode_done; if (sse_prefix) goto decode_done;
b1 = *iptr++; // fetch new b1 unsigned opcode_byte = *iptr++;
has_modrm = 1; has_modrm = 1;
b1 += 256 * xop_opcext; opcode_byte += 256 * xop_opcext;
OpcodeInfoPtr = &BxOpcodeTableXOP[b1*2 + vex_l]; OpcodeInfoPtr = &BxOpcodeTableXOP[opcode_byte*2 + vex_l];
} }
else else
#endif #endif
@ -1453,12 +1454,12 @@ fetch_b1:
if (has_modrm) { if (has_modrm) {
#if BX_CPU_LEVEL >= 6 #if BX_CPU_LEVEL >= 6
unsigned b3 = 0;
// handle 3-byte escape // handle 3-byte escape
if ((attr & BxGroupX) == Bx3ByteOp) { if (b1 == 0x138 || b1 == 0x13a) {
if (remain != 0) { if (remain != 0) {
remain--; remain--;
b3 = *iptr++; unsigned b3 = *iptr++;
OpcodeInfoPtr = &OpcodeInfoPtr->AnotherArray[b3];
} }
else else
return(-1); return(-1);
@ -1478,9 +1479,6 @@ fetch_b1:
nnn = (b2 >> 3) & 0x7; nnn = (b2 >> 3) & 0x7;
rm = b2 & 0x7; rm = b2 & 0x7;
#if BX_SUPPORT_AVX
if (! had_vex_xop)
#endif
if (b1 >= 0xd8 && b1 <= 0xdf) if (b1 >= 0xd8 && b1 <= 0xdf)
i->setFoo((b2 | (b1 << 8)) & 0x7ff); /* for x87 */ i->setFoo((b2 | (b1 << 8)) & 0x7ff); /* for x87 */
@ -1624,7 +1622,7 @@ fetch_b1:
modrm_done: modrm_done:
attr = OpcodeInfoPtr->Attr; attr |= OpcodeInfoPtr->Attr;
if (attr & BxAliasSSE) { if (attr & BxAliasSSE) {
// SSE alias always comes alone // SSE alias always comes alone
@ -1673,11 +1671,6 @@ modrm_done:
case BxSplitMod11B: case BxSplitMod11B:
OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[mod_mem]); OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[mod_mem]);
break; break;
#endif
#if BX_CPU_LEVEL >= 6
case Bx3ByteOp:
OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[b3]);
break;
#endif #endif
case BxOSizeGrp: case BxOSizeGrp:
OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[os_32]); OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[os_32]);

View File

@ -452,9 +452,9 @@ static const BxOpcodeInfo_t BxOpcodeInfo64[512*3] = {
/* 0F 35 /w */ { BxTraceEnd, BX_IA_SYSEXIT }, /* 0F 35 /w */ { BxTraceEnd, BX_IA_SYSEXIT },
/* 0F 36 /w */ { 0, BX_IA_ERROR }, /* 0F 36 /w */ { 0, BX_IA_ERROR },
/* 0F 37 /w */ { 0, BX_IA_GETSEC }, /* 0F 37 /w */ { 0, BX_IA_GETSEC },
/* 0F 38 /w */ { Bx3ByteOp, BX_IA_ERROR, BxOpcode3ByteTable0f38 }, // 3-byte escape /* 0F 38 /w */ { 0, BX_IA_ERROR, BxOpcode3ByteTable0f38 }, // 3-byte escape
/* 0F 39 /w */ { 0, BX_IA_ERROR }, /* 0F 39 /w */ { 0, BX_IA_ERROR },
/* 0F 3A /w */ { Bx3ByteOp | BxImmediate_Ib, BX_IA_ERROR, BxOpcode3ByteTable0f3a }, // 3-byte escape /* 0F 3A /w */ { BxImmediate_Ib, BX_IA_ERROR, BxOpcode3ByteTable0f3a }, // 3-byte escape
/* 0F 3B /w */ { 0, BX_IA_ERROR }, /* 0F 3B /w */ { 0, BX_IA_ERROR },
/* 0F 3C /w */ { 0, BX_IA_ERROR }, /* 0F 3C /w */ { 0, BX_IA_ERROR },
/* 0F 3D /w */ { 0, BX_IA_ERROR }, /* 0F 3D /w */ { 0, BX_IA_ERROR },
@ -967,9 +967,9 @@ static const BxOpcodeInfo_t BxOpcodeInfo64[512*3] = {
/* 0F 35 /d */ { BxTraceEnd, BX_IA_SYSEXIT }, /* 0F 35 /d */ { BxTraceEnd, BX_IA_SYSEXIT },
/* 0F 36 /d */ { 0, BX_IA_ERROR }, /* 0F 36 /d */ { 0, BX_IA_ERROR },
/* 0F 37 /d */ { 0, BX_IA_GETSEC }, /* 0F 37 /d */ { 0, BX_IA_GETSEC },
/* 0F 38 /d */ { Bx3ByteOp, BX_IA_ERROR, BxOpcode3ByteTable0f38 }, // 3-byte escape /* 0F 38 /d */ { 0, BX_IA_ERROR, BxOpcode3ByteTable0f38 }, // 3-byte escape
/* 0F 39 /d */ { 0, BX_IA_ERROR }, /* 0F 39 /d */ { 0, BX_IA_ERROR },
/* 0F 3A /d */ { Bx3ByteOp | BxImmediate_Ib, BX_IA_ERROR, BxOpcode3ByteTable0f3a }, // 3-byte escape /* 0F 3A /d */ { BxImmediate_Ib, BX_IA_ERROR, BxOpcode3ByteTable0f3a }, // 3-byte escape
/* 0F 3B /d */ { 0, BX_IA_ERROR }, /* 0F 3B /d */ { 0, BX_IA_ERROR },
/* 0F 3C /d */ { 0, BX_IA_ERROR }, /* 0F 3C /d */ { 0, BX_IA_ERROR },
/* 0F 3D /d */ { 0, BX_IA_ERROR }, /* 0F 3D /d */ { 0, BX_IA_ERROR },
@ -1482,9 +1482,9 @@ static const BxOpcodeInfo_t BxOpcodeInfo64[512*3] = {
/* 0F 35 /q */ { BxTraceEnd, BX_IA_SYSEXIT }, /* 0F 35 /q */ { BxTraceEnd, BX_IA_SYSEXIT },
/* 0F 36 /q */ { 0, BX_IA_ERROR }, /* 0F 36 /q */ { 0, BX_IA_ERROR },
/* 0F 37 /q */ { 0, BX_IA_GETSEC }, /* 0F 37 /q */ { 0, BX_IA_GETSEC },
/* 0F 38 /q */ { Bx3ByteOp, BX_IA_ERROR, BxOpcode3ByteTable0f38 }, // 3-byte escape /* 0F 38 /q */ { 0, BX_IA_ERROR, BxOpcode3ByteTable0f38 }, // 3-byte escape
/* 0F 39 /q */ { 0, BX_IA_ERROR }, /* 0F 39 /q */ { 0, BX_IA_ERROR },
/* 0F 3A /q */ { Bx3ByteOp | BxImmediate_Ib, BX_IA_ERROR, BxOpcode3ByteTable0f3a }, // 3-byte escape /* 0F 3A /q */ { BxImmediate_Ib, BX_IA_ERROR, BxOpcode3ByteTable0f3a }, // 3-byte escape
/* 0F 3B /q */ { 0, BX_IA_ERROR }, /* 0F 3B /q */ { 0, BX_IA_ERROR },
/* 0F 3C /q */ { 0, BX_IA_ERROR }, /* 0F 3C /q */ { 0, BX_IA_ERROR },
/* 0F 3D /q */ { 0, BX_IA_ERROR }, /* 0F 3D /q */ { 0, BX_IA_ERROR },
@ -1823,7 +1823,7 @@ fetch_b1:
bx_bool has_modrm = 0; bx_bool has_modrm = 0;
#if BX_SUPPORT_AVX #if BX_SUPPORT_AVX
if ((b1 & ~0x01) == 0xc4) { if ((b1 & ~0x1) == 0xc4) {
// VEX // VEX
had_vex_xop = 1; had_vex_xop = 1;
if (sse_prefix | rex_prefix) if (sse_prefix | rex_prefix)
@ -1863,19 +1863,20 @@ fetch_b1:
i->setVL(BX_VL128 + vex_l); i->setVL(BX_VL128 + vex_l);
sse_prefix = vex & 0x3; sse_prefix = vex & 0x3;
unsigned opcode_byte = 0;
if (remain != 0) { if (remain != 0) {
remain--; remain--;
b1 = *iptr++; // fetch new b1 opcode_byte = *iptr++; // fetch new b1
} }
else else
return(-1); return(-1);
b1 += 256 * vex_opcext; opcode_byte += 256 * vex_opcext;
if (b1 < 256 || b1 >= 1024) if (opcode_byte < 256 || opcode_byte >= 1024)
goto decode_done; goto decode_done;
has_modrm = (b1 != 0x177); // if not VZEROUPPER/VZEROALL opcode has_modrm = (opcode_byte != 0x177); // if not VZEROUPPER/VZEROALL opcode
OpcodeInfoPtr = &BxOpcodeTableAVX[(b1-256)*2 + vex_l]; OpcodeInfoPtr = &BxOpcodeTableAVX[(opcode_byte-256)*2 + vex_l];
} }
else if (b1 == 0x8f && (*iptr & 0x08) == 0x08) { else if (b1 == 0x8f && (*iptr & 0x08) == 0x08) {
// 3 byte XOP prefix // 3 byte XOP prefix
@ -1913,11 +1914,11 @@ fetch_b1:
sse_prefix = vex & 0x3; sse_prefix = vex & 0x3;
if (sse_prefix) goto decode_done; if (sse_prefix) goto decode_done;
b1 = *iptr++; // fetch new b1 unsigned opcode_byte = *iptr++;
has_modrm = 1; has_modrm = 1;
b1 += 256 * xop_opcext; opcode_byte += 256 * xop_opcext;
OpcodeInfoPtr = &BxOpcodeTableXOP[b1*2 + vex_l]; OpcodeInfoPtr = &BxOpcodeTableXOP[opcode_byte*2 + vex_l];
} }
else else
#endif #endif
@ -1927,12 +1928,12 @@ fetch_b1:
if (has_modrm) { if (has_modrm) {
unsigned b3 = 0;
// handle 3-byte escape // handle 3-byte escape
if ((attr & BxGroupX) == Bx3ByteOp) { if (b1 == 0x138 || b1 == 0x13a) {
if (remain != 0) { if (remain != 0) {
remain--; remain--;
b3 = *iptr++; unsigned b3 = *iptr++;
OpcodeInfoPtr = &OpcodeInfoPtr->AnotherArray[b3];
} }
else else
return(-1); return(-1);
@ -1951,9 +1952,6 @@ fetch_b1:
nnn = ((b2 >> 3) & 0x7) | rex_r; nnn = ((b2 >> 3) & 0x7) | rex_r;
rm = (b2 & 0x7) | rex_b; rm = (b2 & 0x7) | rex_b;
#if BX_SUPPORT_AVX
if (! had_vex_xop)
#endif
if (b1 >= 0xd8 && b1 <= 0xdf) if (b1 >= 0xd8 && b1 <= 0xdf)
i->setFoo((b2 | (b1 << 8)) & 0x7ff); /* for x87 */ i->setFoo((b2 | (b1 << 8)) & 0x7ff); /* for x87 */
@ -2055,7 +2053,7 @@ get_32bit_displ:
modrm_done: modrm_done:
attr = OpcodeInfoPtr->Attr; attr |= OpcodeInfoPtr->Attr;
if (attr & BxAliasSSE) { if (attr & BxAliasSSE) {
// SSE alias always comes alone // SSE alias always comes alone
@ -2102,9 +2100,6 @@ modrm_done:
OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[mod_mem]); OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[mod_mem]);
break; break;
#endif #endif
case Bx3ByteOp:
OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[b3]);
break;
case BxOSizeGrp: case BxOSizeGrp:
OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[offset >> 9]); OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[offset >> 9]);
break; break;

View File

@ -424,15 +424,9 @@ static const BxOpcodeInfo_t BxOpcodeGroupAVX_0fd0[3] = {
/* F2 */ { 0, BX_IA_VADDSUBPS_VpsHpsWps } /* F2 */ { 0, BX_IA_VADDSUBPS_VpsHpsWps }
}; };
static const BxOpcodeInfo_t BxOpcodeGroupAVX128_0fe6[3] = { static const BxOpcodeInfo_t BxOpcodeGroupAVX_0fe6[3] = {
/* 66 */ { 0, BX_IA_VCVTTPD2DQ_VqWpd }, /* 66 */ { 0, BX_IA_VCVTTPD2DQ_VqWpd },
/* F3 */ { 0, BX_IA_V128_VCVTDQ2PD_VpdWq }, /* F3 */ { 0, BX_IA_VCVTDQ2PD_VpdWq },
/* F2 */ { 0, BX_IA_VCVTPD2DQ_VqWpd }
};
static const BxOpcodeInfo_t BxOpcodeGroupAVX256_0fe6[3] = {
/* 66 */ { 0, BX_IA_VCVTTPD2DQ_VqWpd },
/* F3 */ { 0, BX_IA_V256_VCVTDQ2PD_VpdWq },
/* F2 */ { 0, BX_IA_VCVTPD2DQ_VqWpd } /* F2 */ { 0, BX_IA_VCVTPD2DQ_VqWpd }
}; };
@ -713,8 +707,8 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3*2] = {
/* 58 /1 */ { BxAliasSSE, BX_IA_VADDPS_VpsHpsWps }, /* 58 /1 */ { BxAliasSSE, BX_IA_VADDPS_VpsHpsWps },
/* 59 /0 */ { BxAliasSSE, BX_IA_VMULPS_VpsHpsWps }, /* 59 /0 */ { BxAliasSSE, BX_IA_VMULPS_VpsHpsWps },
/* 59 /1 */ { BxAliasSSE, BX_IA_VMULPS_VpsHpsWps }, /* 59 /1 */ { BxAliasSSE, BX_IA_VMULPS_VpsHpsWps },
/* 5A /0 */ { BxPrefixSSE, BX_IA_V128_VCVTPS2PD_VpdWps, BxOpcodeGroupAVX_0f5a }, /* 5A /0 */ { BxPrefixSSE, BX_IA_VCVTPS2PD_VpdWps, BxOpcodeGroupAVX_0f5a },
/* 5A /1 */ { BxPrefixSSE, BX_IA_V256_VCVTPS2PD_VpdWps, BxOpcodeGroupAVX_0f5a }, /* 5A /1 */ { BxPrefixSSE, BX_IA_VCVTPS2PD_VpdWps, BxOpcodeGroupAVX_0f5a },
/* 5B /0 */ { BxPrefixSSE, BX_IA_VCVTDQ2PS_VpsWdq, BxOpcodeGroupAVX_0f5b }, /* 5B /0 */ { BxPrefixSSE, BX_IA_VCVTDQ2PS_VpsWdq, BxOpcodeGroupAVX_0f5b },
/* 5B /1 */ { BxPrefixSSE, BX_IA_VCVTDQ2PS_VpsWdq, BxOpcodeGroupAVX_0f5b }, /* 5B /1 */ { BxPrefixSSE, BX_IA_VCVTDQ2PS_VpsWdq, BxOpcodeGroupAVX_0f5b },
/* 5C /0 */ { BxAliasSSE, BX_IA_VSUBPS_VpsHpsWps }, /* 5C /0 */ { BxAliasSSE, BX_IA_VSUBPS_VpsHpsWps },
@ -993,8 +987,8 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3*2] = {
/* E4 /1 */ { BxPrefixSSE66, BX_IA_V256_VPMULHUW_VdqHdqWdq }, /* E4 /1 */ { BxPrefixSSE66, BX_IA_V256_VPMULHUW_VdqHdqWdq },
/* E5 /0 */ { BxPrefixSSE66, BX_IA_V128_VPMULHW_VdqHdqWdq }, /* E5 /0 */ { BxPrefixSSE66, BX_IA_V128_VPMULHW_VdqHdqWdq },
/* E5 /1 */ { BxPrefixSSE66, BX_IA_V256_VPMULHW_VdqHdqWdq }, /* E5 /1 */ { BxPrefixSSE66, BX_IA_V256_VPMULHW_VdqHdqWdq },
/* E6 /0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupAVX128_0fe6 }, /* E6 /0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupAVX_0fe6 },
/* E6 /1 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupAVX256_0fe6 }, /* E6 /1 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupAVX_0fe6 },
/* E7 /0 */ { BxPrefixSSE66, BX_IA_V128_VMOVNTDQ_MdqVdq }, /* E7 /0 */ { BxPrefixSSE66, BX_IA_V128_VMOVNTDQ_MdqVdq },
/* E7 /1 */ { BxPrefixSSE66, BX_IA_V256_VMOVNTDQ_MdqVdq }, /* E7 /1 */ { BxPrefixSSE66, BX_IA_V256_VMOVNTDQ_MdqVdq },
/* E8 /0 */ { BxPrefixSSE66, BX_IA_V128_VPSUBSB_VdqHdqWdq }, /* E8 /0 */ { BxPrefixSSE66, BX_IA_V128_VPSUBSB_VdqHdqWdq },
@ -1085,8 +1079,8 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3*2] = {
/* 11 /1 */ { 0, BX_IA_ERROR }, /* 11 /1 */ { 0, BX_IA_ERROR },
/* 12 /0 */ { 0, BX_IA_ERROR }, /* 12 /0 */ { 0, BX_IA_ERROR },
/* 12 /1 */ { 0, BX_IA_ERROR }, /* 12 /1 */ { 0, BX_IA_ERROR },
/* 13 /0 */ { BxPrefixSSE66 | BxVexW0, BX_IA_V128_VCVTPH2PS_VpsWps }, /* 13 /0 */ { BxPrefixSSE66 | BxVexW0, BX_IA_VCVTPH2PS_VpsWps },
/* 13 /1 */ { BxPrefixSSE66 | BxVexW0, BX_IA_V256_VCVTPH2PS_VpsWps }, /* 13 /1 */ { BxPrefixSSE66 | BxVexW0, BX_IA_VCVTPH2PS_VpsWps },
/* 14 /0 */ { 0, BX_IA_ERROR }, /* 14 /0 */ { 0, BX_IA_ERROR },
/* 14 /1 */ { 0, BX_IA_ERROR }, /* 14 /1 */ { 0, BX_IA_ERROR },
/* 15 /0 */ { 0, BX_IA_ERROR }, /* 15 /0 */ { 0, BX_IA_ERROR },

View File

@ -1844,13 +1844,11 @@ bx_define_opcode(BX_IA_V128_VPEXTRW_GdUdqIb, &BX_CPU_C::BxError, &BX_CPU_C::PEXT
bx_define_opcode(BX_IA_V128_VPEXTRB_EbdVdqIb, &BX_CPU_C::PEXTRB_EbdVdqIbM, &BX_CPU_C::PEXTRB_EbdVdqIbR, BX_ISA_AVX, BX_SRC_RM, BX_SRC_NNN, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_V128_VPEXTRB_EbdVdqIb, &BX_CPU_C::PEXTRB_EbdVdqIbM, &BX_CPU_C::PEXTRB_EbdVdqIbR, BX_ISA_AVX, BX_SRC_RM, BX_SRC_NNN, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_V128_VPEXTRW_EwdVdqIb, &BX_CPU_C::PEXTRW_EwdVdqIbM, &BX_CPU_C::PEXTRW_EwdVdqIbR, BX_ISA_AVX, BX_SRC_RM, BX_SRC_NNN, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_V128_VPEXTRW_EwdVdqIb, &BX_CPU_C::PEXTRW_EwdVdqIbM, &BX_CPU_C::PEXTRW_EwdVdqIbR, BX_ISA_AVX, BX_SRC_RM, BX_SRC_NNN, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_V128_VPEXTRD_EdVdqIb, &BX_CPU_C::PEXTRD_EdVdqIbM, &BX_CPU_C::PEXTRD_EdVdqIbR, BX_ISA_AVX, BX_SRC_RM, BX_SRC_NNN, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_V128_VPEXTRD_EdVdqIb, &BX_CPU_C::PEXTRD_EdVdqIbM, &BX_CPU_C::PEXTRD_EdVdqIbR, BX_ISA_AVX, BX_SRC_RM, BX_SRC_NNN, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_V128_VPEXTRQ_EqVdqIb, &BX_CPU_C::PEXTRD_EdVdqIbM, &BX_CPU_C::PEXTRD_EdVdqIbR, BX_ISA_AVX, BX_SRC_RM, BX_SRC_NNN, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_V128_VPEXTRQ_EqVdqIb, &BX_CPU_C::PEXTRQ_EqVdqIbM, &BX_CPU_C::PEXTRQ_EqVdqIbR, BX_ISA_AVX, BX_SRC_RM, BX_SRC_NNN, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_V128_VCVTPS2PD_VpdWps, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VCVTPS2PD_VpdWpsR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_VCVTPS2PD_VpdWps, &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VCVTPS2PD_VpdWpsR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_V256_VCVTPS2PD_VpdWps, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VCVTPS2PD_VpdWpsR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VCVTTPD2DQ_VqWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVTTPD2DQ_VqWpdR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_VCVTTPD2DQ_VqWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVTTPD2DQ_VqWpdR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VCVTPD2DQ_VqWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVTPD2DQ_VqWpdR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_VCVTPD2DQ_VqWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVTPD2DQ_VqWpdR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_V128_VCVTDQ2PD_VpdWq, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VCVTDQ2PD_VpdWqR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_VCVTDQ2PD_VpdWq, &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VCVTDQ2PD_VpdWqR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_V256_VCVTDQ2PD_VpdWq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VCVTDQ2PD_VpdWqR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VCVTPD2PS_VpsWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVTPD2PS_VpsWpdR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_VCVTPD2PS_VpsWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVTPD2PS_VpsWpdR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VCVTSD2SS_VssWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VCVTSD2SS_VssWsdR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VVV, BX_SRC_VEC_RM, BX_SRC_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_VCVTSD2SS_VssWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VCVTSD2SS_VssWsdR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VVV, BX_SRC_VEC_RM, BX_SRC_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VCVTSS2SD_VsdWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VCVTSS2SD_VsdWssR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VVV, BX_SRC_VEC_RM, BX_SRC_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_VCVTSS2SD_VsdWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VCVTSS2SD_VsdWssR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VVV, BX_SRC_VEC_RM, BX_SRC_NONE, BX_PREPARE_AVX)
@ -1874,8 +1872,7 @@ bx_define_opcode(BX_IA_VMOVQ_VqWq, &BX_CPU_C::MOVQ_VqWqM, &BX_CPU_C::MOVQ_VqWqR,
bx_define_opcode(BX_IA_V128_VMOVQ_VdqEq, &BX_CPU_C::MOVQ_VqWqM, &BX_CPU_C::MOVQ_VdqEqR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_V128_VMOVQ_VdqEq, &BX_CPU_C::MOVQ_VqWqM, &BX_CPU_C::MOVQ_VdqEqR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_V128_VMOVQ_EqVq, &BX_CPU_C::MOVSD_WsdVsdM, &BX_CPU_C::MOVQ_EqVqR, BX_ISA_AVX, BX_SRC_RM, BX_SRC_NNN, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_V128_VMOVQ_EqVq, &BX_CPU_C::MOVSD_WsdVsdM, &BX_CPU_C::MOVQ_EqVqR, BX_ISA_AVX, BX_SRC_RM, BX_SRC_NNN, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_V128_VCVTPH2PS_VpsWps, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VCVTPH2PS_VpsWpsR, BX_ISA_AVX_F16C, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_VCVTPH2PS_VpsWps, &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VCVTPH2PS_VpsWpsR, BX_ISA_AVX_F16C, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_V256_VCVTPH2PS_VpsWps, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VCVTPH2PS_VpsWpsR, BX_ISA_AVX_F16C, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VCVTPS2PH_WpsVpsIb, &BX_CPU_C::VCVTPS2PH_WpsVpsIb, &BX_CPU_C::VCVTPS2PH_WpsVpsIb, BX_ISA_AVX_F16C, BX_SRC_RM, BX_SRC_NNN, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_VCVTPS2PH_WpsVpsIb, &BX_CPU_C::VCVTPS2PH_WpsVpsIb, &BX_CPU_C::VCVTPS2PH_WpsVpsIb, BX_ISA_AVX_F16C, BX_SRC_RM, BX_SRC_NNN, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX)
// AVX // AVX

View File

@ -130,6 +130,8 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_Vector(bxInstruction_c *i)
{ {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i)); bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
BX_ASSERT(i->getVL() != BX_VL512);
if (i->getVL() == BX_VL256) if (i->getVL() == BX_VL256)
read_virtual_ymmword(i->seg(), eaddr, &BX_READ_YMM_REG(BX_VECTOR_TMP_REGISTER)); read_virtual_ymmword(i->seg(), eaddr, &BX_READ_YMM_REG(BX_VECTOR_TMP_REGISTER));
else else
@ -138,4 +140,21 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_Vector(bxInstruction_c *i)
return BX_CPU_CALL_METHOD(i->execute2(), (i)); return BX_CPU_CALL_METHOD(i->execute2(), (i));
} }
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_Half_Vector(bxInstruction_c *i)
{
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
BX_ASSERT(i->getVL() != BX_VL512);
if (i->getVL() == BX_VL256) {
read_virtual_xmmword(i->seg(), eaddr, &BX_READ_XMM_REG(BX_VECTOR_TMP_REGISTER));
}
else {
Bit64u val_64 = read_virtual_qword(i->seg(), eaddr);
BX_WRITE_XMM_REG_LO_QWORD(BX_VECTOR_TMP_REGISTER, val_64);
}
return BX_CPU_CALL_METHOD(i->execute2(), (i));
}
#endif #endif

View File

@ -429,7 +429,7 @@ void BX_CPU_C::handleAvxModeChange(void)
BX_CPU_THIS_PTR avx_ok = 1; BX_CPU_THIS_PTR avx_ok = 1;
#if BX_SUPPORT_EVEX #if BX_SUPPORT_EVEX
if (~BX_CPU_THIS_PTR xcr0.val32 & (BX_XCR0_OPMASK_MASK | BX_XCR0_ZMM_HI256_MASK | BX_XCR0_HI_ZMM_MASK) != 0) if ((~BX_CPU_THIS_PTR xcr0.val32 & (BX_XCR0_OPMASK_MASK | BX_XCR0_ZMM_HI256_MASK | BX_XCR0_HI_ZMM_MASK)) != 0)
BX_CPU_THIS_PTR evex_ok = 0; BX_CPU_THIS_PTR evex_ok = 0;
else else
BX_CPU_THIS_PTR evex_ok = 1; BX_CPU_THIS_PTR evex_ok = 1;

View File

@ -322,6 +322,18 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRD_EdVdqIbR(bxInstruction_c *i
BX_NEXT_INSTR(i); BX_NEXT_INSTR(i);
} }
#if BX_SUPPORT_X86_64
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRQ_EqVdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
Bit64u result = op.xmm64u(i->Ib() & 1);
BX_WRITE_64BIT_REG(i->dst(), result);
BX_NEXT_INSTR(i);
}
#endif
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRD_EdVdqIbM(bxInstruction_c *i) BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRD_EdVdqIbM(bxInstruction_c *i)
{ {
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src()); BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
@ -344,6 +356,19 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRD_EdVdqIbM(bxInstruction_c *i
BX_NEXT_INSTR(i); BX_NEXT_INSTR(i);
} }
#if BX_SUPPORT_X86_64
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRQ_EqVdqIbM(bxInstruction_c *i)
{
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
Bit64u result = op.xmm64u(i->Ib() & 1);
write_virtual_qword_64(i->seg(), eaddr, result);
BX_NEXT_INSTR(i);
}
#endif
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::EXTRACTPS_EdVpsIbR(bxInstruction_c *i) BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::EXTRACTPS_EdVpsIbR(bxInstruction_c *i)
{ {
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src()); BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());