From 69f947cef2a6de28ec6dac003da804ebf699e00b Mon Sep 17 00:00:00 2001 From: Stanislav Shwartsman Date: Thu, 5 Sep 2013 18:29:50 +0000 Subject: [PATCH] fixes and small optimizations for avx and xop decoding --- bochs/cpu/cpu.h | 13 ++++++---- bochs/cpu/fetchdecode.cc | 47 +++++++++++++++-------------------- bochs/cpu/fetchdecode64.cc | 49 +++++++++++++++++-------------------- bochs/cpu/fetchdecode_avx.h | 22 ++++++----------- bochs/cpu/ia_opcodes.h | 11 +++------ bochs/cpu/load.cc | 19 ++++++++++++++ bochs/cpu/proc_ctrl.cc | 2 +- bochs/cpu/sse.cc | 25 +++++++++++++++++++ 8 files changed, 107 insertions(+), 81 deletions(-) diff --git a/bochs/cpu/cpu.h b/bochs/cpu/cpu.h index 2abf87341..d88291381 100644 --- a/bochs/cpu/cpu.h +++ b/bochs/cpu/cpu.h @@ -2114,6 +2114,7 @@ public: // for now... BX_SMF BX_INSF_TYPE LOAD_Wb(bxInstruction_c *) BX_CPP_AttrRegparmN(1); #if BX_SUPPORT_AVX BX_SMF BX_INSF_TYPE LOAD_Vector(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE LOAD_Half_Vector(bxInstruction_c *) BX_CPP_AttrRegparmN(1); #endif #if BX_SUPPORT_FPU == 0 // if FPU is disabled @@ -3605,6 +3606,9 @@ public: // for now... BX_SMF BX_INSF_TYPE CVTTSS2SI_GqWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE CVTSD2SI_GqWsdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE CVTSS2SI_GqWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + + BX_SMF BX_INSF_TYPE PEXTRQ_EqVdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE PEXTRQ_EqVdqIbM(bxInstruction_c *) BX_CPP_AttrRegparmN(1); #endif // #if BX_SUPPORT_X86_64 BX_SMF BX_INSF_TYPE RDTSCP(bxInstruction_c *) BX_CPP_AttrRegparmN(1); @@ -5070,11 +5074,10 @@ enum { #define BxGroupN 0x0060 // Group encoding: 0110 #define BxSplitGroupN 0x0070 // Group encoding: 0111 #define BxFPEscape 0x0080 // Group encoding: 1000 -#define Bx3ByteOp 0x0090 // Group encoding: 1001 -#define BxOSizeGrp 0x00A0 // Group encoding: 1010 -#define BxSplitVexW 0x00B0 // Group encoding: 1011 -#define BxSplitVexW64 0x00C0 // Group encoding: 1100 - VexW ignored in 32-bit mode -#define BxSplitMod11B 0x00D0 // Group encoding: 1101 +#define BxOSizeGrp 0x0090 // Group encoding: 1001 +#define BxSplitVexW 0x00A0 // Group encoding: 1010 +#define BxSplitVexW64 0x00B0 // Group encoding: 1011 - VexW ignored in 32-bit mode +#define BxSplitMod11B 0x00C0 // Group encoding: 1100 // The BxImmediate2 mask specifies kind of second immediate data // required by instruction. diff --git a/bochs/cpu/fetchdecode.cc b/bochs/cpu/fetchdecode.cc index 2ea824898..3471b163d 100644 --- a/bochs/cpu/fetchdecode.cc +++ b/bochs/cpu/fetchdecode.cc @@ -513,13 +513,13 @@ static const BxOpcodeInfo_t BxOpcodeInfo32[512*2] = { /* 0F 36 /w */ { 0, BX_IA_ERROR }, /* 0F 37 /w */ { 0, BX_IA_GETSEC }, #if BX_CPU_LEVEL >= 6 - /* 0F 38 /w */ { Bx3ByteOp, BX_IA_ERROR, BxOpcode3ByteTable0f38 }, // 3-byte escape + /* 0F 38 /w */ { 0, BX_IA_ERROR, BxOpcode3ByteTable0f38 }, // 3-byte escape #else /* 0F 38 /w */ { 0, BX_IA_ERROR }, #endif /* 0F 39 /w */ { 0, BX_IA_ERROR }, #if BX_CPU_LEVEL >= 6 - /* 0F 3A /w */ { Bx3ByteOp | BxImmediate_Ib, BX_IA_ERROR, BxOpcode3ByteTable0f3a }, // 3-byte escape + /* 0F 3A /w */ { BxImmediate_Ib, BX_IA_ERROR, BxOpcode3ByteTable0f3a }, // 3-byte escape #else /* 0F 3A /w */ { 0, BX_IA_ERROR }, #endif @@ -1058,13 +1058,13 @@ static const BxOpcodeInfo_t BxOpcodeInfo32[512*2] = { /* 0F 36 /d */ { 0, BX_IA_ERROR }, /* 0F 37 /d */ { 0, BX_IA_GETSEC }, #if BX_CPU_LEVEL >= 6 - /* 0F 38 /d */ { Bx3ByteOp, BX_IA_ERROR, BxOpcode3ByteTable0f38 }, // 3-byte escape + /* 0F 38 /d */ { 0, BX_IA_ERROR, BxOpcode3ByteTable0f38 }, // 3-byte escape #else /* 0F 38 /d */ { 0, BX_IA_ERROR }, #endif /* 0F 39 /d */ { 0, BX_IA_ERROR }, #if BX_CPU_LEVEL >= 6 - /* 0F 3A /d */ { Bx3ByteOp | BxImmediate_Ib, BX_IA_ERROR, BxOpcode3ByteTable0f3a }, // 3-byte escape + /* 0F 3A /d */ { BxImmediate_Ib, BX_IA_ERROR, BxOpcode3ByteTable0f3a }, // 3-byte escape #else /* 0F 3A /d */ { 0, BX_IA_ERROR }, #endif @@ -1366,7 +1366,7 @@ fetch_b1: bx_bool has_modrm = 0; #if BX_SUPPORT_AVX - if ((b1 & ~0x01) == 0xc4 && (*iptr & 0xc0) == 0xc0) { + if ((b1 & ~0x1) == 0xc4 && (*iptr & 0xc0) == 0xc0) { // VEX 0xC4 and VEX 0xC5 had_vex_xop = 1; if (sse_prefix || ! protected_mode()) @@ -1398,18 +1398,19 @@ fetch_b1: i->setVL(BX_VL128 + vex_l); sse_prefix = vex & 0x3; + unsigned opcode_byte = 0; if (remain != 0) { remain--; - b1 = *iptr++; // fetch new b1 + opcode_byte = *iptr++; } else return(-1); - b1 += 256 * vex_opcext; - if (b1 < 256 || b1 >= 1024) goto decode_done; - has_modrm = (b1 != 0x177); // if not VZEROUPPER/VZEROALL opcode + opcode_byte += 256 * vex_opcext; + if (opcode_byte < 256 || opcode_byte >= 1024) goto decode_done; + has_modrm = (opcode_byte != 0x177); // if not VZEROUPPER/VZEROALL opcode - OpcodeInfoPtr = &BxOpcodeTableAVX[(b1-256)*2 + vex_l]; + OpcodeInfoPtr = &BxOpcodeTableAVX[(opcode_byte-256)*2 + vex_l]; } else if (b1 == 0x8f && (*iptr & 0xc8) == 0xc8) { // 3 byte XOP prefix @@ -1438,11 +1439,11 @@ fetch_b1: sse_prefix = vex & 0x3; if (sse_prefix) goto decode_done; - b1 = *iptr++; // fetch new b1 + unsigned opcode_byte = *iptr++; has_modrm = 1; - b1 += 256 * xop_opcext; + opcode_byte += 256 * xop_opcext; - OpcodeInfoPtr = &BxOpcodeTableXOP[b1*2 + vex_l]; + OpcodeInfoPtr = &BxOpcodeTableXOP[opcode_byte*2 + vex_l]; } else #endif @@ -1453,12 +1454,12 @@ fetch_b1: if (has_modrm) { #if BX_CPU_LEVEL >= 6 - unsigned b3 = 0; // handle 3-byte escape - if ((attr & BxGroupX) == Bx3ByteOp) { + if (b1 == 0x138 || b1 == 0x13a) { if (remain != 0) { remain--; - b3 = *iptr++; + unsigned b3 = *iptr++; + OpcodeInfoPtr = &OpcodeInfoPtr->AnotherArray[b3]; } else return(-1); @@ -1478,11 +1479,8 @@ fetch_b1: nnn = (b2 >> 3) & 0x7; rm = b2 & 0x7; -#if BX_SUPPORT_AVX - if (! had_vex_xop) -#endif - if (b1 >= 0xd8 && b1 <= 0xdf) - i->setFoo((b2 | (b1 << 8)) & 0x7ff); /* for x87 */ + if (b1 >= 0xd8 && b1 <= 0xdf) + i->setFoo((b2 | (b1 << 8)) & 0x7ff); /* for x87 */ // MOVs with CRx and DRx always use register ops and ignore the mod field. if ((b1 & ~3) == 0x120) @@ -1624,7 +1622,7 @@ fetch_b1: modrm_done: - attr = OpcodeInfoPtr->Attr; + attr |= OpcodeInfoPtr->Attr; if (attr & BxAliasSSE) { // SSE alias always comes alone @@ -1673,11 +1671,6 @@ modrm_done: case BxSplitMod11B: OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[mod_mem]); break; -#endif -#if BX_CPU_LEVEL >= 6 - case Bx3ByteOp: - OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[b3]); - break; #endif case BxOSizeGrp: OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[os_32]); diff --git a/bochs/cpu/fetchdecode64.cc b/bochs/cpu/fetchdecode64.cc index 5d63b449b..bc1548484 100644 --- a/bochs/cpu/fetchdecode64.cc +++ b/bochs/cpu/fetchdecode64.cc @@ -452,9 +452,9 @@ static const BxOpcodeInfo_t BxOpcodeInfo64[512*3] = { /* 0F 35 /w */ { BxTraceEnd, BX_IA_SYSEXIT }, /* 0F 36 /w */ { 0, BX_IA_ERROR }, /* 0F 37 /w */ { 0, BX_IA_GETSEC }, - /* 0F 38 /w */ { Bx3ByteOp, BX_IA_ERROR, BxOpcode3ByteTable0f38 }, // 3-byte escape + /* 0F 38 /w */ { 0, BX_IA_ERROR, BxOpcode3ByteTable0f38 }, // 3-byte escape /* 0F 39 /w */ { 0, BX_IA_ERROR }, - /* 0F 3A /w */ { Bx3ByteOp | BxImmediate_Ib, BX_IA_ERROR, BxOpcode3ByteTable0f3a }, // 3-byte escape + /* 0F 3A /w */ { BxImmediate_Ib, BX_IA_ERROR, BxOpcode3ByteTable0f3a }, // 3-byte escape /* 0F 3B /w */ { 0, BX_IA_ERROR }, /* 0F 3C /w */ { 0, BX_IA_ERROR }, /* 0F 3D /w */ { 0, BX_IA_ERROR }, @@ -967,9 +967,9 @@ static const BxOpcodeInfo_t BxOpcodeInfo64[512*3] = { /* 0F 35 /d */ { BxTraceEnd, BX_IA_SYSEXIT }, /* 0F 36 /d */ { 0, BX_IA_ERROR }, /* 0F 37 /d */ { 0, BX_IA_GETSEC }, - /* 0F 38 /d */ { Bx3ByteOp, BX_IA_ERROR, BxOpcode3ByteTable0f38 }, // 3-byte escape + /* 0F 38 /d */ { 0, BX_IA_ERROR, BxOpcode3ByteTable0f38 }, // 3-byte escape /* 0F 39 /d */ { 0, BX_IA_ERROR }, - /* 0F 3A /d */ { Bx3ByteOp | BxImmediate_Ib, BX_IA_ERROR, BxOpcode3ByteTable0f3a }, // 3-byte escape + /* 0F 3A /d */ { BxImmediate_Ib, BX_IA_ERROR, BxOpcode3ByteTable0f3a }, // 3-byte escape /* 0F 3B /d */ { 0, BX_IA_ERROR }, /* 0F 3C /d */ { 0, BX_IA_ERROR }, /* 0F 3D /d */ { 0, BX_IA_ERROR }, @@ -1482,9 +1482,9 @@ static const BxOpcodeInfo_t BxOpcodeInfo64[512*3] = { /* 0F 35 /q */ { BxTraceEnd, BX_IA_SYSEXIT }, /* 0F 36 /q */ { 0, BX_IA_ERROR }, /* 0F 37 /q */ { 0, BX_IA_GETSEC }, - /* 0F 38 /q */ { Bx3ByteOp, BX_IA_ERROR, BxOpcode3ByteTable0f38 }, // 3-byte escape + /* 0F 38 /q */ { 0, BX_IA_ERROR, BxOpcode3ByteTable0f38 }, // 3-byte escape /* 0F 39 /q */ { 0, BX_IA_ERROR }, - /* 0F 3A /q */ { Bx3ByteOp | BxImmediate_Ib, BX_IA_ERROR, BxOpcode3ByteTable0f3a }, // 3-byte escape + /* 0F 3A /q */ { BxImmediate_Ib, BX_IA_ERROR, BxOpcode3ByteTable0f3a }, // 3-byte escape /* 0F 3B /q */ { 0, BX_IA_ERROR }, /* 0F 3C /q */ { 0, BX_IA_ERROR }, /* 0F 3D /q */ { 0, BX_IA_ERROR }, @@ -1823,7 +1823,7 @@ fetch_b1: bx_bool has_modrm = 0; #if BX_SUPPORT_AVX - if ((b1 & ~0x01) == 0xc4) { + if ((b1 & ~0x1) == 0xc4) { // VEX had_vex_xop = 1; if (sse_prefix | rex_prefix) @@ -1863,19 +1863,20 @@ fetch_b1: i->setVL(BX_VL128 + vex_l); sse_prefix = vex & 0x3; + unsigned opcode_byte = 0; if (remain != 0) { remain--; - b1 = *iptr++; // fetch new b1 + opcode_byte = *iptr++; // fetch new b1 } else return(-1); - b1 += 256 * vex_opcext; - if (b1 < 256 || b1 >= 1024) + opcode_byte += 256 * vex_opcext; + if (opcode_byte < 256 || opcode_byte >= 1024) goto decode_done; - has_modrm = (b1 != 0x177); // if not VZEROUPPER/VZEROALL opcode + has_modrm = (opcode_byte != 0x177); // if not VZEROUPPER/VZEROALL opcode - OpcodeInfoPtr = &BxOpcodeTableAVX[(b1-256)*2 + vex_l]; + OpcodeInfoPtr = &BxOpcodeTableAVX[(opcode_byte-256)*2 + vex_l]; } else if (b1 == 0x8f && (*iptr & 0x08) == 0x08) { // 3 byte XOP prefix @@ -1913,11 +1914,11 @@ fetch_b1: sse_prefix = vex & 0x3; if (sse_prefix) goto decode_done; - b1 = *iptr++; // fetch new b1 + unsigned opcode_byte = *iptr++; has_modrm = 1; - b1 += 256 * xop_opcext; + opcode_byte += 256 * xop_opcext; - OpcodeInfoPtr = &BxOpcodeTableXOP[b1*2 + vex_l]; + OpcodeInfoPtr = &BxOpcodeTableXOP[opcode_byte*2 + vex_l]; } else #endif @@ -1927,12 +1928,12 @@ fetch_b1: if (has_modrm) { - unsigned b3 = 0; // handle 3-byte escape - if ((attr & BxGroupX) == Bx3ByteOp) { + if (b1 == 0x138 || b1 == 0x13a) { if (remain != 0) { remain--; - b3 = *iptr++; + unsigned b3 = *iptr++; + OpcodeInfoPtr = &OpcodeInfoPtr->AnotherArray[b3]; } else return(-1); @@ -1951,11 +1952,8 @@ fetch_b1: nnn = ((b2 >> 3) & 0x7) | rex_r; rm = (b2 & 0x7) | rex_b; -#if BX_SUPPORT_AVX - if (! had_vex_xop) -#endif - if (b1 >= 0xd8 && b1 <= 0xdf) - i->setFoo((b2 | (b1 << 8)) & 0x7ff); /* for x87 */ + if (b1 >= 0xd8 && b1 <= 0xdf) + i->setFoo((b2 | (b1 << 8)) & 0x7ff); /* for x87 */ // MOVs with CRx and DRx always use register ops and ignore the mod field. if ((b1 & ~3) == 0x120) @@ -2055,7 +2053,7 @@ get_32bit_displ: modrm_done: - attr = OpcodeInfoPtr->Attr; + attr |= OpcodeInfoPtr->Attr; if (attr & BxAliasSSE) { // SSE alias always comes alone @@ -2102,9 +2100,6 @@ modrm_done: OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[mod_mem]); break; #endif - case Bx3ByteOp: - OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[b3]); - break; case BxOSizeGrp: OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[offset >> 9]); break; diff --git a/bochs/cpu/fetchdecode_avx.h b/bochs/cpu/fetchdecode_avx.h index 6b0b634cd..4284bbf14 100644 --- a/bochs/cpu/fetchdecode_avx.h +++ b/bochs/cpu/fetchdecode_avx.h @@ -424,15 +424,9 @@ static const BxOpcodeInfo_t BxOpcodeGroupAVX_0fd0[3] = { /* F2 */ { 0, BX_IA_VADDSUBPS_VpsHpsWps } }; -static const BxOpcodeInfo_t BxOpcodeGroupAVX128_0fe6[3] = { +static const BxOpcodeInfo_t BxOpcodeGroupAVX_0fe6[3] = { /* 66 */ { 0, BX_IA_VCVTTPD2DQ_VqWpd }, - /* F3 */ { 0, BX_IA_V128_VCVTDQ2PD_VpdWq }, - /* F2 */ { 0, BX_IA_VCVTPD2DQ_VqWpd } -}; - -static const BxOpcodeInfo_t BxOpcodeGroupAVX256_0fe6[3] = { - /* 66 */ { 0, BX_IA_VCVTTPD2DQ_VqWpd }, - /* F3 */ { 0, BX_IA_V256_VCVTDQ2PD_VpdWq }, + /* F3 */ { 0, BX_IA_VCVTDQ2PD_VpdWq }, /* F2 */ { 0, BX_IA_VCVTPD2DQ_VqWpd } }; @@ -713,8 +707,8 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3*2] = { /* 58 /1 */ { BxAliasSSE, BX_IA_VADDPS_VpsHpsWps }, /* 59 /0 */ { BxAliasSSE, BX_IA_VMULPS_VpsHpsWps }, /* 59 /1 */ { BxAliasSSE, BX_IA_VMULPS_VpsHpsWps }, - /* 5A /0 */ { BxPrefixSSE, BX_IA_V128_VCVTPS2PD_VpdWps, BxOpcodeGroupAVX_0f5a }, - /* 5A /1 */ { BxPrefixSSE, BX_IA_V256_VCVTPS2PD_VpdWps, BxOpcodeGroupAVX_0f5a }, + /* 5A /0 */ { BxPrefixSSE, BX_IA_VCVTPS2PD_VpdWps, BxOpcodeGroupAVX_0f5a }, + /* 5A /1 */ { BxPrefixSSE, BX_IA_VCVTPS2PD_VpdWps, BxOpcodeGroupAVX_0f5a }, /* 5B /0 */ { BxPrefixSSE, BX_IA_VCVTDQ2PS_VpsWdq, BxOpcodeGroupAVX_0f5b }, /* 5B /1 */ { BxPrefixSSE, BX_IA_VCVTDQ2PS_VpsWdq, BxOpcodeGroupAVX_0f5b }, /* 5C /0 */ { BxAliasSSE, BX_IA_VSUBPS_VpsHpsWps }, @@ -993,8 +987,8 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3*2] = { /* E4 /1 */ { BxPrefixSSE66, BX_IA_V256_VPMULHUW_VdqHdqWdq }, /* E5 /0 */ { BxPrefixSSE66, BX_IA_V128_VPMULHW_VdqHdqWdq }, /* E5 /1 */ { BxPrefixSSE66, BX_IA_V256_VPMULHW_VdqHdqWdq }, - /* E6 /0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupAVX128_0fe6 }, - /* E6 /1 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupAVX256_0fe6 }, + /* E6 /0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupAVX_0fe6 }, + /* E6 /1 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupAVX_0fe6 }, /* E7 /0 */ { BxPrefixSSE66, BX_IA_V128_VMOVNTDQ_MdqVdq }, /* E7 /1 */ { BxPrefixSSE66, BX_IA_V256_VMOVNTDQ_MdqVdq }, /* E8 /0 */ { BxPrefixSSE66, BX_IA_V128_VPSUBSB_VdqHdqWdq }, @@ -1085,8 +1079,8 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3*2] = { /* 11 /1 */ { 0, BX_IA_ERROR }, /* 12 /0 */ { 0, BX_IA_ERROR }, /* 12 /1 */ { 0, BX_IA_ERROR }, - /* 13 /0 */ { BxPrefixSSE66 | BxVexW0, BX_IA_V128_VCVTPH2PS_VpsWps }, - /* 13 /1 */ { BxPrefixSSE66 | BxVexW0, BX_IA_V256_VCVTPH2PS_VpsWps }, + /* 13 /0 */ { BxPrefixSSE66 | BxVexW0, BX_IA_VCVTPH2PS_VpsWps }, + /* 13 /1 */ { BxPrefixSSE66 | BxVexW0, BX_IA_VCVTPH2PS_VpsWps }, /* 14 /0 */ { 0, BX_IA_ERROR }, /* 14 /1 */ { 0, BX_IA_ERROR }, /* 15 /0 */ { 0, BX_IA_ERROR }, diff --git a/bochs/cpu/ia_opcodes.h b/bochs/cpu/ia_opcodes.h index 1401966d0..e167eb22a 100644 --- a/bochs/cpu/ia_opcodes.h +++ b/bochs/cpu/ia_opcodes.h @@ -1844,13 +1844,11 @@ bx_define_opcode(BX_IA_V128_VPEXTRW_GdUdqIb, &BX_CPU_C::BxError, &BX_CPU_C::PEXT bx_define_opcode(BX_IA_V128_VPEXTRB_EbdVdqIb, &BX_CPU_C::PEXTRB_EbdVdqIbM, &BX_CPU_C::PEXTRB_EbdVdqIbR, BX_ISA_AVX, BX_SRC_RM, BX_SRC_NNN, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_V128_VPEXTRW_EwdVdqIb, &BX_CPU_C::PEXTRW_EwdVdqIbM, &BX_CPU_C::PEXTRW_EwdVdqIbR, BX_ISA_AVX, BX_SRC_RM, BX_SRC_NNN, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_V128_VPEXTRD_EdVdqIb, &BX_CPU_C::PEXTRD_EdVdqIbM, &BX_CPU_C::PEXTRD_EdVdqIbR, BX_ISA_AVX, BX_SRC_RM, BX_SRC_NNN, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) -bx_define_opcode(BX_IA_V128_VPEXTRQ_EqVdqIb, &BX_CPU_C::PEXTRD_EdVdqIbM, &BX_CPU_C::PEXTRD_EdVdqIbR, BX_ISA_AVX, BX_SRC_RM, BX_SRC_NNN, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) -bx_define_opcode(BX_IA_V128_VCVTPS2PD_VpdWps, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VCVTPS2PD_VpdWpsR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) -bx_define_opcode(BX_IA_V256_VCVTPS2PD_VpdWps, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VCVTPS2PD_VpdWpsR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) +bx_define_opcode(BX_IA_V128_VPEXTRQ_EqVdqIb, &BX_CPU_C::PEXTRQ_EqVdqIbM, &BX_CPU_C::PEXTRQ_EqVdqIbR, BX_ISA_AVX, BX_SRC_RM, BX_SRC_NNN, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) +bx_define_opcode(BX_IA_VCVTPS2PD_VpdWps, &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VCVTPS2PD_VpdWpsR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_VCVTTPD2DQ_VqWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVTTPD2DQ_VqWpdR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_VCVTPD2DQ_VqWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVTPD2DQ_VqWpdR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) -bx_define_opcode(BX_IA_V128_VCVTDQ2PD_VpdWq, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VCVTDQ2PD_VpdWqR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) -bx_define_opcode(BX_IA_V256_VCVTDQ2PD_VpdWq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VCVTDQ2PD_VpdWqR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) +bx_define_opcode(BX_IA_VCVTDQ2PD_VpdWq, &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VCVTDQ2PD_VpdWqR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_VCVTPD2PS_VpsWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVTPD2PS_VpsWpdR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_VCVTSD2SS_VssWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VCVTSD2SS_VssWsdR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VVV, BX_SRC_VEC_RM, BX_SRC_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_VCVTSS2SD_VsdWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VCVTSS2SD_VsdWssR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_VVV, BX_SRC_VEC_RM, BX_SRC_NONE, BX_PREPARE_AVX) @@ -1874,8 +1872,7 @@ bx_define_opcode(BX_IA_VMOVQ_VqWq, &BX_CPU_C::MOVQ_VqWqM, &BX_CPU_C::MOVQ_VqWqR, bx_define_opcode(BX_IA_V128_VMOVQ_VdqEq, &BX_CPU_C::MOVQ_VqWqM, &BX_CPU_C::MOVQ_VdqEqR, BX_ISA_AVX, BX_SRC_NNN, BX_SRC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_V128_VMOVQ_EqVq, &BX_CPU_C::MOVSD_WsdVsdM, &BX_CPU_C::MOVQ_EqVqR, BX_ISA_AVX, BX_SRC_RM, BX_SRC_NNN, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) -bx_define_opcode(BX_IA_V128_VCVTPH2PS_VpsWps, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VCVTPH2PS_VpsWpsR, BX_ISA_AVX_F16C, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) -bx_define_opcode(BX_IA_V256_VCVTPH2PS_VpsWps, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VCVTPH2PS_VpsWpsR, BX_ISA_AVX_F16C, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) +bx_define_opcode(BX_IA_VCVTPH2PS_VpsWps, &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VCVTPH2PS_VpsWpsR, BX_ISA_AVX_F16C, BX_SRC_NNN, BX_SRC_VEC_RM, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_VCVTPS2PH_WpsVpsIb, &BX_CPU_C::VCVTPS2PH_WpsVpsIb, &BX_CPU_C::VCVTPS2PH_WpsVpsIb, BX_ISA_AVX_F16C, BX_SRC_RM, BX_SRC_NNN, BX_SRC_NONE, BX_SRC_NONE, BX_PREPARE_AVX) // AVX diff --git a/bochs/cpu/load.cc b/bochs/cpu/load.cc index 0f615bf8f..397566dcb 100644 --- a/bochs/cpu/load.cc +++ b/bochs/cpu/load.cc @@ -130,6 +130,8 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_Vector(bxInstruction_c *i) { bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i)); + BX_ASSERT(i->getVL() != BX_VL512); + if (i->getVL() == BX_VL256) read_virtual_ymmword(i->seg(), eaddr, &BX_READ_YMM_REG(BX_VECTOR_TMP_REGISTER)); else @@ -138,4 +140,21 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_Vector(bxInstruction_c *i) return BX_CPU_CALL_METHOD(i->execute2(), (i)); } +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_Half_Vector(bxInstruction_c *i) +{ + bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i)); + + BX_ASSERT(i->getVL() != BX_VL512); + + if (i->getVL() == BX_VL256) { + read_virtual_xmmword(i->seg(), eaddr, &BX_READ_XMM_REG(BX_VECTOR_TMP_REGISTER)); + } + else { + Bit64u val_64 = read_virtual_qword(i->seg(), eaddr); + BX_WRITE_XMM_REG_LO_QWORD(BX_VECTOR_TMP_REGISTER, val_64); + } + + return BX_CPU_CALL_METHOD(i->execute2(), (i)); +} + #endif diff --git a/bochs/cpu/proc_ctrl.cc b/bochs/cpu/proc_ctrl.cc index 15f3f51b6..b6787ed9e 100644 --- a/bochs/cpu/proc_ctrl.cc +++ b/bochs/cpu/proc_ctrl.cc @@ -429,7 +429,7 @@ void BX_CPU_C::handleAvxModeChange(void) BX_CPU_THIS_PTR avx_ok = 1; #if BX_SUPPORT_EVEX - if (~BX_CPU_THIS_PTR xcr0.val32 & (BX_XCR0_OPMASK_MASK | BX_XCR0_ZMM_HI256_MASK | BX_XCR0_HI_ZMM_MASK) != 0) + if ((~BX_CPU_THIS_PTR xcr0.val32 & (BX_XCR0_OPMASK_MASK | BX_XCR0_ZMM_HI256_MASK | BX_XCR0_HI_ZMM_MASK)) != 0) BX_CPU_THIS_PTR evex_ok = 0; else BX_CPU_THIS_PTR evex_ok = 1; diff --git a/bochs/cpu/sse.cc b/bochs/cpu/sse.cc index b83a3900c..7ab83a597 100644 --- a/bochs/cpu/sse.cc +++ b/bochs/cpu/sse.cc @@ -322,6 +322,18 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRD_EdVdqIbR(bxInstruction_c *i BX_NEXT_INSTR(i); } +#if BX_SUPPORT_X86_64 +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRQ_EqVdqIbR(bxInstruction_c *i) +{ + BxPackedXmmRegister op = BX_READ_XMM_REG(i->src()); + + Bit64u result = op.xmm64u(i->Ib() & 1); + BX_WRITE_64BIT_REG(i->dst(), result); + + BX_NEXT_INSTR(i); +} +#endif + BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRD_EdVdqIbM(bxInstruction_c *i) { BxPackedXmmRegister op = BX_READ_XMM_REG(i->src()); @@ -344,6 +356,19 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRD_EdVdqIbM(bxInstruction_c *i BX_NEXT_INSTR(i); } +#if BX_SUPPORT_X86_64 +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRQ_EqVdqIbM(bxInstruction_c *i) +{ + bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i)); + + BxPackedXmmRegister op = BX_READ_XMM_REG(i->src()); + Bit64u result = op.xmm64u(i->Ib() & 1); + write_virtual_qword_64(i->seg(), eaddr, result); + + BX_NEXT_INSTR(i); +} +#endif + BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::EXTRACTPS_EdVpsIbR(bxInstruction_c *i) { BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());