From 59c65151f548112906ce583c79bc4ffbd5417d02 Mon Sep 17 00:00:00 2001 From: Stanislav Shwartsman Date: Thu, 29 Aug 2013 19:43:15 +0000 Subject: [PATCH] various fixes --- bochs/cpu/cpu.h | 13 ++++ bochs/cpu/cpudb/corei7_haswell_4770.cc | 2 +- bochs/cpu/cpudb/corei7_ivy_bridge_3770K.cc | 2 +- bochs/cpu/cpudb/corei7_sandy_bridge_2600K.cc | 2 +- bochs/cpu/cpudb/zambezi.cc | 2 +- bochs/cpu/crregs.h | 41 ++++++---- bochs/cpu/fetchdecode.cc | 79 +++++++------------- bochs/cpu/fetchdecode64.cc | 78 +++++++------------ bochs/cpu/fetchdecode_avx.h | 18 ++--- bochs/cpu/generic_cpuid.cc | 2 +- bochs/cpu/init.cc | 5 +- bochs/cpu/proc_ctrl.cc | 38 +++++++++- bochs/cpu/xsave.cc | 8 +- 13 files changed, 144 insertions(+), 146 deletions(-) diff --git a/bochs/cpu/cpu.h b/bochs/cpu/cpu.h index 889832ced..2abf87341 100644 --- a/bochs/cpu/cpu.h +++ b/bochs/cpu/cpu.h @@ -1208,6 +1208,9 @@ public: // for now... #if BX_SUPPORT_AVX unsigned avx_ok; #endif +#if BX_SUPPORT_EVEX + unsigned evex_ok; +#endif #endif // for exceptions @@ -3624,9 +3627,15 @@ public: // for now... #if BX_SUPPORT_HANDLERS_CHAINING_SPEEDUPS BX_SMF BX_INSF_TYPE BxEndTrace(bxInstruction_c *) BX_CPP_AttrRegparmN(1); #endif + #if BX_CPU_LEVEL >= 6 BX_SMF BX_INSF_TYPE BxNoSSE(bxInstruction_c *) BX_CPP_AttrRegparmN(1); +#if BX_SUPPORT_AVX BX_SMF BX_INSF_TYPE BxNoAVX(bxInstruction_c *) BX_CPP_AttrRegparmN(1); +#endif +#if BX_SUPPORT_EVEX + BX_SMF BX_INSF_TYPE BxNoEVEX(bxInstruction_c *) BX_CPP_AttrRegparmN(1); +#endif #endif BX_SMF bx_address BxResolve16BaseIndex(bxInstruction_c *) BX_CPP_AttrRegparmN(1); @@ -4614,6 +4623,7 @@ BX_CPP_INLINE void BX_CPU_C::prepareXSAVE(void) // bit 1 - long64 mode (CS.L) // bit 2 - SSE_OK // bit 3 - AVX_OK +// bit 4 - EVEX_OK // // updateFetchModeMask - has to be called everytime // CS.L / CS.D_B / CR0.PE, CR0.TS or CR0.EM / CR4.OSFXSR / CR4.OSXSAVE changes @@ -4622,6 +4632,9 @@ BX_CPP_INLINE void BX_CPU_C::updateFetchModeMask(void) { BX_CPU_THIS_PTR fetchModeMask = #if BX_CPU_LEVEL >= 6 +#if BX_SUPPORT_EVEX + (BX_CPU_THIS_PTR evex_ok << 4) | +#endif #if BX_SUPPORT_AVX (BX_CPU_THIS_PTR avx_ok << 3) | #endif diff --git a/bochs/cpu/cpudb/corei7_haswell_4770.cc b/bochs/cpu/cpudb/corei7_haswell_4770.cc index cc7b9e181..e8c444041 100644 --- a/bochs/cpu/cpudb/corei7_haswell_4770.cc +++ b/bochs/cpu/cpudb/corei7_haswell_4770.cc @@ -686,7 +686,7 @@ void corei7_haswell_4770_t::get_std_cpuid_xsave_leaf(Bit32u subfunction, cpuid_f case 2: // AVX leaf leaf->eax = 256; - leaf->ebx = 576; + leaf->ebx = XSAVE_YMM_STATE_OFFSET; leaf->ecx = 0; leaf->edx = 0; return; diff --git a/bochs/cpu/cpudb/corei7_ivy_bridge_3770K.cc b/bochs/cpu/cpudb/corei7_ivy_bridge_3770K.cc index d7d1e50fc..718d7984c 100644 --- a/bochs/cpu/cpudb/corei7_ivy_bridge_3770K.cc +++ b/bochs/cpu/cpudb/corei7_ivy_bridge_3770K.cc @@ -679,7 +679,7 @@ void corei7_ivy_bridge_3770k_t::get_std_cpuid_xsave_leaf(Bit32u subfunction, cpu case 2: // AVX leaf leaf->eax = 256; - leaf->ebx = 576; + leaf->ebx = XSAVE_YMM_STATE_OFFSET; leaf->ecx = 0; leaf->edx = 0; return; diff --git a/bochs/cpu/cpudb/corei7_sandy_bridge_2600K.cc b/bochs/cpu/cpudb/corei7_sandy_bridge_2600K.cc index b086f5300..0b10b0f0e 100644 --- a/bochs/cpu/cpudb/corei7_sandy_bridge_2600K.cc +++ b/bochs/cpu/cpudb/corei7_sandy_bridge_2600K.cc @@ -630,7 +630,7 @@ void corei7_sandy_bridge_2600k_t::get_std_cpuid_xsave_leaf(Bit32u subfunction, c case 2: // AVX leaf leaf->eax = 256; - leaf->ebx = 576; + leaf->ebx = XSAVE_YMM_STATE_OFFSET; leaf->ecx = 0; leaf->edx = 0; return; diff --git a/bochs/cpu/cpudb/zambezi.cc b/bochs/cpu/cpudb/zambezi.cc index a9ef8b8b4..21fdb4234 100644 --- a/bochs/cpu/cpudb/zambezi.cc +++ b/bochs/cpu/cpudb/zambezi.cc @@ -421,7 +421,7 @@ void zambezi_t::get_std_cpuid_xsave_leaf(Bit32u subfunction, cpuid_function_t *l case 2: // AVX leaf leaf->eax = 256; - leaf->ebx = 576; + leaf->ebx = XSAVE_YMM_STATE_OFFSET; leaf->ecx = 0; leaf->edx = 0; return; diff --git a/bochs/cpu/crregs.h b/bochs/cpu/crregs.h index 178b3dae2..3bcba3883 100644 --- a/bochs/cpu/crregs.h +++ b/bochs/cpu/crregs.h @@ -231,25 +231,34 @@ struct bx_efer_t { #if BX_CPU_LEVEL >= 6 +#define XSAVE_SSE_STATE_OFFSET (160) +#define XSAVE_YMM_STATE_OFFSET (576) +#define XSAVE_OPMASK_STATE_OFFSET (1088) +#define XSAVE_ZMM_HI256_STATE_OFFSET (1152) +#define XSAVE_HI_ZMM_STATE_OFFSET (1664) + struct xcr0_t { Bit32u val32; // 32bit value of register -#define BX_XCR0_FPU_BIT 0 -#define BX_XCR0_FPU_MASK (1<Attr; bx_bool has_modrm = 0; @@ -1368,10 +1369,10 @@ fetch_b1: if ((b1 & ~0x01) == 0xc4 && (*iptr & 0xc0) == 0xc0) { // VEX 0xC4 and VEX 0xC5 had_vex = 1; - if (sse_prefix) had_vex = -1; - if (! protected_mode()) had_vex = -1; - unsigned vex, vex_opcext = 1; + if (sse_prefix || ! protected_mode()) + goto decode_done; + unsigned vex, vex_opcext = 1; if (remain != 0) { remain--; vex = *iptr++; @@ -1405,16 +1406,18 @@ fetch_b1: return(-1); b1 += 256 * vex_opcext; - if (b1 < 256 || b1 >= 1024) had_vex = -1; - else has_modrm = (b1 != 0x177); // if not VZEROUPPER/VZEROALL opcode + if (b1 < 256 || b1 >= 1024) goto decode_done; + has_modrm = (b1 != 0x177); // if not VZEROUPPER/VZEROALL opcode + + OpcodeInfoPtr = &BxOpcodeTableAVX[(b1-256) + 768*vex_l]; } else if (b1 == 0x8f && (*iptr & 0xc8) == 0xc8) { // 3 byte XOP prefix had_xop = 1; - if (sse_prefix) had_xop = -1; - if (! protected_mode()) had_xop = -1; - unsigned vex; + if (sse_prefix || ! protected_mode()) + goto decode_done; + unsigned vex; if (remain > 2) { remain -= 3; vex = *iptr++; // fetch XOP2 @@ -1424,7 +1427,7 @@ fetch_b1: unsigned xop_opcext = (vex & 0x1f) - 8; if (xop_opcext >= 3) - had_xop = -1; + goto decode_done; vex = *iptr++; // fetch XOP3 @@ -1433,11 +1436,13 @@ fetch_b1: vex_l = (vex >> 2) & 0x1; i->setVL(BX_VL128 + vex_l); sse_prefix = vex & 0x3; - if (sse_prefix) had_xop = -1; + if (sse_prefix) goto decode_done; b1 = *iptr++; // fetch new b1 has_modrm = 1; b1 += 256 * xop_opcext; + + OpcodeInfoPtr = &BxOpcodeTableXOP[b1 + 768*vex_l]; } else #endif @@ -1619,24 +1624,6 @@ fetch_b1: modrm_done: - // Resolve ExecutePtr and additional opcode Attr - const BxOpcodeInfo_t *OpcodeInfoPtr = &(BxOpcodeInfo32[index]); - -#if BX_SUPPORT_AVX - if (had_vex != 0) { - if (had_vex < 0) - OpcodeInfoPtr = &BxOpcodeGroupSSE_ERR[0]; // BX_IA_ERROR - else - OpcodeInfoPtr = &BxOpcodeTableAVX[(b1-256) + 768*vex_l]; - } - else if (had_xop != 0) { - if (had_xop < 0) - OpcodeInfoPtr = &BxOpcodeGroupSSE_ERR[0]; // BX_IA_ERROR - else - OpcodeInfoPtr = &BxOpcodeTableXOP[b1 + 768*vex_l]; - } -#endif - attr = OpcodeInfoPtr->Attr; if (attr & BxAliasSSE) { @@ -1646,7 +1633,7 @@ modrm_done: #if BX_SUPPORT_AVX else if (attr & BxAliasVexW) { // VexW alias could come with BxPrefixSSE - BX_ASSERT(had_vex != 0 || had_xop != 0); + BX_ASSERT(had_vex | had_xop); alias = vex_w; } #endif @@ -1663,10 +1650,7 @@ modrm_done: if (group < BxPrefixSSE) { /* For opcodes with only one allowed SSE prefix */ - if (sse_prefix != (group >> 4)) { - OpcodeInfoPtr = &BxOpcodeGroupSSE_ERR[0]; // BX_IA_ERROR - alias = 0; - } + if (sse_prefix != (group >> 4)) goto decode_done; break; } @@ -1679,11 +1663,11 @@ modrm_done: break; #if BX_SUPPORT_AVX case BxSplitVexW64: // VexW is ignored in 32-bit mode - BX_ASSERT(had_vex != 0 || had_xop != 0); + BX_ASSERT(had_vex | had_xop); OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[0]); break; case BxSplitVexW: // VexW is a real opcode extension - BX_ASSERT(had_vex != 0 || had_xop != 0); + BX_ASSERT(had_vex | had_xop); OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[vex_w]); break; case BxSplitMod11B: @@ -1728,19 +1712,6 @@ modrm_done: // the if() above after fetching the 2nd byte, so this path is // taken in all cases if a modrm byte is NOT required. - const BxOpcodeInfo_t *OpcodeInfoPtr = &(BxOpcodeInfo32[index]); - -#if BX_SUPPORT_AVX - if (had_vex != 0) { - if (had_vex < 0) - OpcodeInfoPtr = &BxOpcodeGroupSSE_ERR[0]; // BX_IA_ERROR - else - OpcodeInfoPtr = &BxOpcodeTableAVX[(b1-256) + 768*vex_l]; - } - // XOP always has modrm byte - BX_ASSERT(had_xop == 0); -#endif - unsigned group = attr & BxGroupX; if (group == BxPrefixSSE && sse_prefix) { OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[sse_prefix-1]); @@ -1764,12 +1735,12 @@ modrm_done: { BX_INFO(("LOCK prefix unallowed (op1=0x%x, modrm=0x%02x)", b1, b2)); // replace execution function with undefined-opcode - ia_opcode = BX_IA_ERROR; + goto decode_done; } } } - unsigned imm_mode = attr & BxImmediate; + imm_mode = attr & BxImmediate; if (imm_mode) { // make sure iptr was advanced after Ib(), Iw() and Id() switch (imm_mode) { @@ -1920,7 +1891,7 @@ modrm_done: i->setIaOpcode(ia_opcode); #if BX_SUPPORT_AVX - if (had_vex > 0 || had_xop > 0) { + if (had_vex | had_xop) { if (! use_vvv && vvv != 0) { ia_opcode = BX_IA_ERROR; } @@ -1936,6 +1907,8 @@ modrm_done: } #endif +decode_done: + if (mod_mem) { i->execute1 = BxOpcodesTable[ia_opcode].execute1; i->handlers.execute2 = BxOpcodesTable[ia_opcode].execute2; diff --git a/bochs/cpu/fetchdecode64.cc b/bochs/cpu/fetchdecode64.cc index a1c73f3c0..44fec7840 100644 --- a/bochs/cpu/fetchdecode64.cc +++ b/bochs/cpu/fetchdecode64.cc @@ -1690,7 +1690,7 @@ BX_CPU_C::fetchDecode64(const Bit8u *iptr, bxInstruction_c *i, unsigned remainin if (remainingInPage > 15) remainingInPage = 15; unsigned remain = remainingInPage; // remain must be at least 1 - unsigned b1, b2 = 0, ia_opcode = 0, alias = 0; + unsigned b1, b2 = 0, ia_opcode = BX_IA_ERROR, alias = 0, imm_mode = 0; unsigned offset = 512, rex_r = 0, rex_x = 0, rex_b = 0; unsigned rm = 0, mod = 0, nnn = 0, mod_mem = 0; unsigned seg = BX_SEG_REG_DS, seg_override = BX_SEG_REG_NULL; @@ -1817,7 +1817,8 @@ fetch_b1: unsigned index = b1+offset; - unsigned attr = BxOpcodeInfo64[index].Attr; + const BxOpcodeInfo_t *OpcodeInfoPtr = &(BxOpcodeInfo64[index]); + unsigned attr = OpcodeInfoPtr->Attr; bx_bool has_modrm = 0; @@ -1825,10 +1826,10 @@ fetch_b1: if ((b1 & ~0x01) == 0xc4) { // VEX had_vex = 1; - if (sse_prefix | rex_prefix) had_vex = -1; - if (! protected_mode()) had_vex = -1; - unsigned vex, vex_opcext = 1; + if (sse_prefix | rex_prefix) + goto decode_done; + unsigned vex, vex_opcext = 1; if (remain != 0) { remain--; vex = *iptr++; @@ -1870,16 +1871,19 @@ fetch_b1: return(-1); b1 += 256 * vex_opcext; - if (b1 < 256 || b1 >= 1024) had_vex = -1; - else has_modrm = (b1 != 0x177); // if not VZEROUPPER/VZEROALL opcode + if (b1 < 256 || b1 >= 1024) + goto decode_done; + has_modrm = (b1 != 0x177); // if not VZEROUPPER/VZEROALL opcode + + OpcodeInfoPtr = &BxOpcodeTableAVX[(b1-256) + 768*vex_l]; } else if (b1 == 0x8f && (*iptr & 0x08) == 0x08) { // 3 byte XOP prefix had_xop = 1; - if (sse_prefix | rex_prefix) had_xop = -1; - if (! protected_mode()) had_xop = -1; - unsigned vex; + if (sse_prefix | rex_prefix) + goto decode_done; + unsigned vex; if (remain > 2) { remain -= 3; vex = *iptr++; // fetch XOP2 @@ -1893,7 +1897,7 @@ fetch_b1: unsigned xop_opcext = (vex & 0x1f) - 8; if (xop_opcext >= 3) - had_xop = -1; + goto decode_done; vex = *iptr++; // fetch XOP3 @@ -1907,11 +1911,13 @@ fetch_b1: vex_l = (vex >> 2) & 0x1; i->setVL(BX_VL128 + vex_l); sse_prefix = vex & 0x3; - if (sse_prefix) had_xop = -1; + if (sse_prefix) goto decode_done; b1 = *iptr++; // fetch new b1 has_modrm = 1; b1 += 256 * xop_opcext; + + OpcodeInfoPtr = &BxOpcodeTableXOP[b1 + 768*vex_l]; } else #endif @@ -2049,24 +2055,6 @@ get_32bit_displ: modrm_done: - // Resolve ExecutePtr and additional opcode Attr - const BxOpcodeInfo_t *OpcodeInfoPtr = &(BxOpcodeInfo64[index]); - -#if BX_SUPPORT_AVX - if (had_vex != 0) { - if (had_vex < 0) - OpcodeInfoPtr = &BxOpcodeGroupSSE_ERR[0]; // BX_IA_ERROR - else - OpcodeInfoPtr = &BxOpcodeTableAVX[(b1-256) + 768*vex_l]; - } - else if (had_xop != 0) { - if (had_xop < 0) - OpcodeInfoPtr = &BxOpcodeGroupSSE_ERR[0]; // BX_IA_ERROR - else - OpcodeInfoPtr = &BxOpcodeTableXOP[b1 + 768*vex_l]; - } -#endif - attr = OpcodeInfoPtr->Attr; if (attr & BxAliasSSE) { @@ -2076,7 +2064,7 @@ modrm_done: #if BX_SUPPORT_AVX else if (attr & BxAliasVexW) { // VexW alias could come with BxPrefixSSE - BX_ASSERT(had_vex != 0 || had_xop != 0); + BX_ASSERT(had_vex | had_xop); alias = vex_w; } #endif @@ -2093,10 +2081,7 @@ modrm_done: if (group < BxPrefixSSE) { /* For opcodes with only one allowed SSE prefix */ - if (sse_prefix != (group >> 4)) { - OpcodeInfoPtr = &BxOpcodeGroupSSE_ERR[0]; // BX_IA_ERROR - alias = 0; - } + if (sse_prefix != (group >> 4)) goto decode_done; break; } @@ -2110,7 +2095,7 @@ modrm_done: #if BX_SUPPORT_AVX case BxSplitVexW: case BxSplitVexW64: - BX_ASSERT(had_vex != 0 || had_xop != 0); + BX_ASSERT(had_vex | had_xop); OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[vex_w]); break; case BxSplitMod11B: @@ -2153,19 +2138,6 @@ modrm_done: // the if() above after fetching the 2nd byte, so this path is // taken in all cases if a modrm byte is NOT required. - const BxOpcodeInfo_t *OpcodeInfoPtr = &(BxOpcodeInfo64[index]); - -#if BX_SUPPORT_AVX - if (had_vex != 0) { - if (had_vex < 0) - OpcodeInfoPtr = &BxOpcodeGroupSSE_ERR[0]; // BX_IA_ERROR - else - OpcodeInfoPtr = &BxOpcodeTableAVX[(b1-256) + 768*vex_l]; - } - // XOP always has modrm byte - BX_ASSERT(had_xop == 0); -#endif - if (b1 == 0x90 && sse_prefix == SSE_PREFIX_F3) { // attention: need to handle VEX separately, XOP never reach here ia_opcode = BX_IA_PAUSE; @@ -2191,12 +2163,12 @@ modrm_done: else { BX_INFO(("LOCK prefix unallowed (op1=0x%x, modrm=0x%02x)", b1, b2)); // replace execution function with undefined-opcode - ia_opcode = BX_IA_ERROR; + goto decode_done; } } } - unsigned imm_mode = attr & BxImmediate; + imm_mode = attr & BxImmediate; if (imm_mode) { // make sure iptr was advanced after Ib(), Iw() and Id() switch (imm_mode) { @@ -2354,7 +2326,7 @@ modrm_done: i->setIaOpcode(ia_opcode); #if BX_SUPPORT_AVX - if (had_vex > 0 || had_xop > 0) { + if (had_vex | had_xop) { if (! use_vvv && vvv != 0) { ia_opcode = BX_IA_ERROR; } @@ -2370,6 +2342,8 @@ modrm_done: } #endif +decode_done: + if (mod_mem) { i->execute1 = BxOpcodesTable[ia_opcode].execute1; i->handlers.execute2 = BxOpcodesTable[ia_opcode].execute2; diff --git a/bochs/cpu/fetchdecode_avx.h b/bochs/cpu/fetchdecode_avx.h index 79f4f6c5e..c7fc2904c 100644 --- a/bochs/cpu/fetchdecode_avx.h +++ b/bochs/cpu/fetchdecode_avx.h @@ -264,8 +264,8 @@ static const BxOpcodeInfo_t BxOpcodeGroupAVX256_0f29[3] = { static const BxOpcodeInfo_t BxOpcodeGroupAVX_0f2a[3] = { /* 66 */ { 0, BX_IA_ERROR }, - /* F3 */ { BxSplitVexW, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_f30f2a }, - /* F2 */ { BxSplitVexW, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_f20f2a } + /* F3 */ { BxSplitVexW64, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_f30f2a }, + /* F2 */ { BxSplitVexW64, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_f20f2a } }; static const BxOpcodeInfo_t BxOpcodeGroupAVX128_0f2bM[3] = { @@ -282,14 +282,14 @@ static const BxOpcodeInfo_t BxOpcodeGroupAVX256_0f2bM[3] = { static const BxOpcodeInfo_t BxOpcodeGroupAVX_0f2c[3] = { /* 66 */ { 0, BX_IA_ERROR }, - /* F3 */ { BxSplitVexW, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_f30f2c }, - /* F2 */ { BxSplitVexW, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_f20f2c } + /* F3 */ { BxSplitVexW64, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_f30f2c }, + /* F2 */ { BxSplitVexW64, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_f20f2c } }; static const BxOpcodeInfo_t BxOpcodeGroupAVX_0f2d[3] = { /* 66 */ { 0, BX_IA_ERROR }, - /* F3 */ { BxSplitVexW, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_f30f2d }, - /* F2 */ { BxSplitVexW, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_f20f2d } + /* F3 */ { BxSplitVexW64, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_f30f2d }, + /* F2 */ { BxSplitVexW64, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_f20f2d } }; static const BxOpcodeInfo_t BxOpcodeGroupAVX_0f2e[3] = { @@ -395,7 +395,7 @@ static const BxOpcodeInfo_t BxOpcodeGroupAVX_0f7d[3] = { }; static const BxOpcodeInfo_t BxOpcodeGroupAVX_0f7e[3] = { - /* 66 */ { BxSplitVexW, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_660f7e }, + /* 66 */ { BxSplitVexW64, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_660f7e }, /* F3 */ { 0, BX_IA_VMOVQ_VqWq }, /* F2 */ { 0, BX_IA_ERROR } }; @@ -643,7 +643,7 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3*2] = { /* 6B /0 */ { BxPrefixSSE66, BX_IA_V128_VPACKSSDW_VdqHdqWdq }, /* 6C /0 */ { BxPrefixSSE66, BX_IA_V128_VPUNPCKLQDQ_VdqHdqWdq }, /* 6D /0 */ { BxPrefixSSE66, BX_IA_V128_VPUNPCKHQDQ_VdqHdqWdq }, - /* 6E /0 */ { BxSplitVexW, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f6e }, + /* 6E /0 */ { BxSplitVexW64, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f6e }, /* 6F /0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupAVX128_0f6f }, /* 70 /0 */ { BxPrefixSSE | BxImmediate_Ib, BX_IA_ERROR, BxOpcodeGroupAVX128_0f70 }, /* 71 /0 */ { BxGroup12, BX_IA_ERROR, BxOpcodeInfoAVX128G12R }, @@ -1071,7 +1071,7 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3*2] = { /* 13 /0 */ { 0, BX_IA_ERROR }, /* 14 /0 */ { BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V128_VPEXTRB_EbdVdqIb }, /* 15 /0 */ { BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V128_VPEXTRW_EwdVdqIb }, - /* 16 /0 */ { BxSplitVexW | BxImmediate_Ib, BX_IA_ERROR, BxOpcodeInfoAVX128_VexW_0f3a16 }, + /* 16 /0 */ { BxSplitVexW64 | BxImmediate_Ib, BX_IA_ERROR, BxOpcodeInfoAVX128_VexW_0f3a16 }, /* 17 /0 */ { BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V128_VEXTRACTPS_EdVpsIb }, /* 18 /0 */ { 0, BX_IA_ERROR }, /* 19 /0 */ { 0, BX_IA_ERROR }, diff --git a/bochs/cpu/generic_cpuid.cc b/bochs/cpu/generic_cpuid.cc index ada675a48..7892b210b 100644 --- a/bochs/cpu/generic_cpuid.cc +++ b/bochs/cpu/generic_cpuid.cc @@ -454,7 +454,7 @@ void bx_generic_cpuid_t::get_std_cpuid_xsave_leaf(Bit32u subfunction, cpuid_func case 2: // AVX leaf if (cpu->xcr0_suppmask & BX_XCR0_YMM_MASK) { leaf->eax = 256; - leaf->ebx = 576; + leaf->ebx = XSAVE_YMM_STATE_OFFSET; leaf->ecx = 0; leaf->edx = 0; break; diff --git a/bochs/cpu/init.cc b/bochs/cpu/init.cc index 33e2cdada..3c8f246bf 100644 --- a/bochs/cpu/init.cc +++ b/bochs/cpu/init.cc @@ -860,7 +860,10 @@ void BX_CPU_C::reset(unsigned source) #endif #if BX_SUUPORT_EVEX - for (n=0; n<8; n++) BX_WRITE_OPMASK(index, 0); + BX_CPU_THIS_PTR evex_ok = 0; + + for (n=0; n<8; n++) + BX_WRITE_OPMASK(index, 0); #endif // Reset XMM state - unchanged on #INIT diff --git a/bochs/cpu/proc_ctrl.cc b/bochs/cpu/proc_ctrl.cc index d9a16e3f9..15f3f51b6 100644 --- a/bochs/cpu/proc_ctrl.cc +++ b/bochs/cpu/proc_ctrl.cc @@ -422,12 +422,26 @@ void BX_CPU_C::handleAvxModeChange(void) } else { if (! protected_mode() || ! BX_CPU_THIS_PTR cr4.get_OSXSAVE() || - (~BX_CPU_THIS_PTR xcr0.val32 & (BX_XCR0_SSE_MASK | BX_XCR0_YMM_MASK)) != 0) - BX_CPU_THIS_PTR avx_ok = 0; - else + (~BX_CPU_THIS_PTR xcr0.val32 & (BX_XCR0_SSE_MASK | BX_XCR0_YMM_MASK)) != 0) { + BX_CPU_THIS_PTR avx_ok = 0; + } + else { BX_CPU_THIS_PTR avx_ok = 1; + +#if BX_SUPPORT_EVEX + if (~BX_CPU_THIS_PTR xcr0.val32 & (BX_XCR0_OPMASK_MASK | BX_XCR0_ZMM_HI256_MASK | BX_XCR0_HI_ZMM_MASK) != 0) + BX_CPU_THIS_PTR evex_ok = 0; + else + BX_CPU_THIS_PTR evex_ok = 1; +#endif + } } +#if BX_SUPPORT_EVEX + if (! BX_CPU_THIS_PTR avx_ok) + BX_CPU_THIS_PTR evex_ok = 0; +#endif + updateFetchModeMask(); /* AVX_OK changed */ } @@ -448,6 +462,24 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BxNoAVX(bxInstruction_c *i) } #endif +#if BX_SUPPORT_EVEX +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BxNoEVEX(bxInstruction_c *i) +{ + if (! protected_mode() || ! BX_CPU_THIS_PTR cr4.get_OSXSAVE()) + exception(BX_UD_EXCEPTION, 0); + + if (~BX_CPU_THIS_PTR xcr0.val32 & (BX_XCR0_SSE_MASK | BX_XCR0_YMM_MASK | BX_XCR0_OPMASK_MASK | BX_XCR0_ZMM_HI256_MASK | BX_XCR0_HI_ZMM_MASK)) + exception(BX_UD_EXCEPTION, 0); + + if(BX_CPU_THIS_PTR cr0.get_TS()) + exception(BX_NM_EXCEPTION, 0); + + BX_ASSERT(0); + + BX_NEXT_TRACE(i); // keep compiler happy +} +#endif + #endif void BX_CPU_C::handleCpuContextChange(void) diff --git a/bochs/cpu/xsave.cc b/bochs/cpu/xsave.cc index c70c12b35..98142d31c 100644 --- a/bochs/cpu/xsave.cc +++ b/bochs/cpu/xsave.cc @@ -26,12 +26,6 @@ #include "cpu.h" #define LOG_THIS BX_CPU_THIS_PTR -#define XSAVE_SSE_STATE_OFFSET 160 -#define XSAVE_YMM_STATE_OFFSET 576 -#define XSAVE_OPMASK_STATE_OFFSET 1088 -#define XSAVE_ZMM_HI256_STATE_OFFSET 1152 -#define XSAVE_HI_ZMM_STATE_OFFSET 1664 - /* 0F AE /4 */ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::XSAVE(bxInstruction_c *i) { @@ -426,7 +420,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::XRSTOR(bxInstruction_c *i) ///////////////////////////////////////////////////////////////////////////// if ((features_load_enable_mask & BX_XCR0_ZMM_HI256_MASK) != 0) { - if (header1 & BX_XCR0_ZMM_HI256_BIT) { + if (header1 & BX_XCR0_ZMM_HI256_MASK) { // load upper part of ZMM registers from XSAVE area for(index=0; index < 16; index++) { read_virtual_ymmword(i->seg(), (eaddr+index*32+XSAVE_ZMM_HI256_STATE_OFFSET) & asize_mask,