various fixes

This commit is contained in:
Stanislav Shwartsman 2013-08-29 19:43:15 +00:00
parent 7e2ab5ca81
commit 59c65151f5
13 changed files with 144 additions and 146 deletions

View File

@ -1208,6 +1208,9 @@ public: // for now...
#if BX_SUPPORT_AVX
unsigned avx_ok;
#endif
#if BX_SUPPORT_EVEX
unsigned evex_ok;
#endif
#endif
// for exceptions
@ -3624,9 +3627,15 @@ public: // for now...
#if BX_SUPPORT_HANDLERS_CHAINING_SPEEDUPS
BX_SMF BX_INSF_TYPE BxEndTrace(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
#endif
#if BX_CPU_LEVEL >= 6
BX_SMF BX_INSF_TYPE BxNoSSE(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
#if BX_SUPPORT_AVX
BX_SMF BX_INSF_TYPE BxNoAVX(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
#endif
#if BX_SUPPORT_EVEX
BX_SMF BX_INSF_TYPE BxNoEVEX(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
#endif
#endif
BX_SMF bx_address BxResolve16BaseIndex(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
@ -4614,6 +4623,7 @@ BX_CPP_INLINE void BX_CPU_C::prepareXSAVE(void)
// bit 1 - long64 mode (CS.L)
// bit 2 - SSE_OK
// bit 3 - AVX_OK
// bit 4 - EVEX_OK
//
// updateFetchModeMask - has to be called everytime
// CS.L / CS.D_B / CR0.PE, CR0.TS or CR0.EM / CR4.OSFXSR / CR4.OSXSAVE changes
@ -4622,6 +4632,9 @@ BX_CPP_INLINE void BX_CPU_C::updateFetchModeMask(void)
{
BX_CPU_THIS_PTR fetchModeMask =
#if BX_CPU_LEVEL >= 6
#if BX_SUPPORT_EVEX
(BX_CPU_THIS_PTR evex_ok << 4) |
#endif
#if BX_SUPPORT_AVX
(BX_CPU_THIS_PTR avx_ok << 3) |
#endif

View File

@ -686,7 +686,7 @@ void corei7_haswell_4770_t::get_std_cpuid_xsave_leaf(Bit32u subfunction, cpuid_f
case 2: // AVX leaf
leaf->eax = 256;
leaf->ebx = 576;
leaf->ebx = XSAVE_YMM_STATE_OFFSET;
leaf->ecx = 0;
leaf->edx = 0;
return;

View File

@ -679,7 +679,7 @@ void corei7_ivy_bridge_3770k_t::get_std_cpuid_xsave_leaf(Bit32u subfunction, cpu
case 2: // AVX leaf
leaf->eax = 256;
leaf->ebx = 576;
leaf->ebx = XSAVE_YMM_STATE_OFFSET;
leaf->ecx = 0;
leaf->edx = 0;
return;

View File

@ -630,7 +630,7 @@ void corei7_sandy_bridge_2600k_t::get_std_cpuid_xsave_leaf(Bit32u subfunction, c
case 2: // AVX leaf
leaf->eax = 256;
leaf->ebx = 576;
leaf->ebx = XSAVE_YMM_STATE_OFFSET;
leaf->ecx = 0;
leaf->edx = 0;
return;

View File

@ -421,7 +421,7 @@ void zambezi_t::get_std_cpuid_xsave_leaf(Bit32u subfunction, cpuid_function_t *l
case 2: // AVX leaf
leaf->eax = 256;
leaf->ebx = 576;
leaf->ebx = XSAVE_YMM_STATE_OFFSET;
leaf->ecx = 0;
leaf->edx = 0;
return;

View File

@ -231,25 +231,34 @@ struct bx_efer_t {
#if BX_CPU_LEVEL >= 6
#define XSAVE_SSE_STATE_OFFSET (160)
#define XSAVE_YMM_STATE_OFFSET (576)
#define XSAVE_OPMASK_STATE_OFFSET (1088)
#define XSAVE_ZMM_HI256_STATE_OFFSET (1152)
#define XSAVE_HI_ZMM_STATE_OFFSET (1664)
struct xcr0_t {
Bit32u val32; // 32bit value of register
#define BX_XCR0_FPU_BIT 0
#define BX_XCR0_FPU_MASK (1<<BX_XCR0_FPU_BIT)
#define BX_XCR0_SSE_BIT 1
#define BX_XCR0_SSE_MASK (1<<BX_XCR0_SSE_BIT)
#define BX_XCR0_YMM_BIT 2
#define BX_XCR0_YMM_MASK (1<<BX_XCR0_YMM_BIT)
#define BX_XCR0_BNDREGS_BIT 3
#define BX_XCR0_BNDREGS_MASK (1<<BX_XCR0_BNDREGS_BIT)
#define BX_XCR0_BNDCFG_BIT 4
#define BX_XCR0_BNDCFG_MASK (1<<BX_XCR0_BNDCFG_BIT)
#define BX_XCR0_OPMASK_BIT 5
#define BX_XCR0_OPMASK_MASK (1<<BX_XCR0_OPMASK_BIT)
#define BX_XCR0_ZMM_HI256_BIT 6
#define BX_XCR0_ZMM_HI256_MASK (1<<BX_XCR0_ZMM_HI256_BIT)
#define BX_XCR0_HI_ZMM_BIT 7
#define BX_XCR0_HI_ZMM_MASK (1<<BX_XCR0_HI_ZMM_BIT)
enum {
BX_XCR0_FPU_BIT = 0,
BX_XCR0_SSE_BIT = 1,
BX_XCR0_YMM_BIT = 2,
BX_XCR0_BNDREGS_BIT = 3,
BX_XCR0_BNDCFG_BIT = 4,
BX_XCR0_OPMASK_BIT = 5,
BX_XCR0_ZMM_HI256_BIT = 6,
BX_XCR0_HI_ZMM_BIT = 7
};
#define BX_XCR0_FPU_MASK (1 << xcr0_t::BX_XCR0_FPU_BIT)
#define BX_XCR0_SSE_MASK (1 << xcr0_t::BX_XCR0_SSE_BIT)
#define BX_XCR0_YMM_MASK (1 << xcr0_t::BX_XCR0_YMM_BIT)
#define BX_XCR0_BNDREGS_MASK (1 << xcr0_t::BX_XCR0_BNDREGS_BIT)
#define BX_XCR0_BNDCFG_MASK (1 << xcr0_t::BX_XCR0_BNDCFG_BIT)
#define BX_XCR0_OPMASK_MASK (1 << xcr0_t::BX_XCR0_OPMASK_BIT)
#define BX_XCR0_ZMM_HI256_MASK (1 << xcr0_t::BX_XCR0_ZMM_HI256_BIT)
#define BX_XCR0_HI_ZMM_MASK (1 << xcr0_t::BX_XCR0_HI_ZMM_BIT)
IMPLEMENT_CRREG_ACCESSORS(FPU, BX_XCR0_FPU_BIT);
IMPLEMENT_CRREG_ACCESSORS(SSE, BX_XCR0_SSE_BIT);

View File

@ -1274,7 +1274,7 @@ BX_CPU_C::fetchDecode32(const Bit8u *iptr, bxInstruction_c *i, unsigned remainin
unsigned remain = remainingInPage; // remain must be at least 1
bx_bool is_32, lock=0;
unsigned b1, b2 = 0, os_32, ia_opcode = 0, alias = 0;
unsigned b1, b2 = 0, os_32, ia_opcode = BX_IA_ERROR, alias = 0, imm_mode = 0;
unsigned rm = 0, mod=0, nnn=0, mod_mem = 0;
unsigned seg = BX_SEG_REG_DS, seg_override = BX_SEG_REG_NULL;
@ -1360,7 +1360,8 @@ fetch_b1:
unsigned index = b1 + (os_32 << 9); // *512
unsigned attr = BxOpcodeInfo32[index].Attr;
const BxOpcodeInfo_t *OpcodeInfoPtr = &(BxOpcodeInfo32[index]);
unsigned attr = OpcodeInfoPtr->Attr;
bx_bool has_modrm = 0;
@ -1368,10 +1369,10 @@ fetch_b1:
if ((b1 & ~0x01) == 0xc4 && (*iptr & 0xc0) == 0xc0) {
// VEX 0xC4 and VEX 0xC5
had_vex = 1;
if (sse_prefix) had_vex = -1;
if (! protected_mode()) had_vex = -1;
unsigned vex, vex_opcext = 1;
if (sse_prefix || ! protected_mode())
goto decode_done;
unsigned vex, vex_opcext = 1;
if (remain != 0) {
remain--;
vex = *iptr++;
@ -1405,16 +1406,18 @@ fetch_b1:
return(-1);
b1 += 256 * vex_opcext;
if (b1 < 256 || b1 >= 1024) had_vex = -1;
else has_modrm = (b1 != 0x177); // if not VZEROUPPER/VZEROALL opcode
if (b1 < 256 || b1 >= 1024) goto decode_done;
has_modrm = (b1 != 0x177); // if not VZEROUPPER/VZEROALL opcode
OpcodeInfoPtr = &BxOpcodeTableAVX[(b1-256) + 768*vex_l];
}
else if (b1 == 0x8f && (*iptr & 0xc8) == 0xc8) {
// 3 byte XOP prefix
had_xop = 1;
if (sse_prefix) had_xop = -1;
if (! protected_mode()) had_xop = -1;
unsigned vex;
if (sse_prefix || ! protected_mode())
goto decode_done;
unsigned vex;
if (remain > 2) {
remain -= 3;
vex = *iptr++; // fetch XOP2
@ -1424,7 +1427,7 @@ fetch_b1:
unsigned xop_opcext = (vex & 0x1f) - 8;
if (xop_opcext >= 3)
had_xop = -1;
goto decode_done;
vex = *iptr++; // fetch XOP3
@ -1433,11 +1436,13 @@ fetch_b1:
vex_l = (vex >> 2) & 0x1;
i->setVL(BX_VL128 + vex_l);
sse_prefix = vex & 0x3;
if (sse_prefix) had_xop = -1;
if (sse_prefix) goto decode_done;
b1 = *iptr++; // fetch new b1
has_modrm = 1;
b1 += 256 * xop_opcext;
OpcodeInfoPtr = &BxOpcodeTableXOP[b1 + 768*vex_l];
}
else
#endif
@ -1619,24 +1624,6 @@ fetch_b1:
modrm_done:
// Resolve ExecutePtr and additional opcode Attr
const BxOpcodeInfo_t *OpcodeInfoPtr = &(BxOpcodeInfo32[index]);
#if BX_SUPPORT_AVX
if (had_vex != 0) {
if (had_vex < 0)
OpcodeInfoPtr = &BxOpcodeGroupSSE_ERR[0]; // BX_IA_ERROR
else
OpcodeInfoPtr = &BxOpcodeTableAVX[(b1-256) + 768*vex_l];
}
else if (had_xop != 0) {
if (had_xop < 0)
OpcodeInfoPtr = &BxOpcodeGroupSSE_ERR[0]; // BX_IA_ERROR
else
OpcodeInfoPtr = &BxOpcodeTableXOP[b1 + 768*vex_l];
}
#endif
attr = OpcodeInfoPtr->Attr;
if (attr & BxAliasSSE) {
@ -1646,7 +1633,7 @@ modrm_done:
#if BX_SUPPORT_AVX
else if (attr & BxAliasVexW) {
// VexW alias could come with BxPrefixSSE
BX_ASSERT(had_vex != 0 || had_xop != 0);
BX_ASSERT(had_vex | had_xop);
alias = vex_w;
}
#endif
@ -1663,10 +1650,7 @@ modrm_done:
if (group < BxPrefixSSE) {
/* For opcodes with only one allowed SSE prefix */
if (sse_prefix != (group >> 4)) {
OpcodeInfoPtr = &BxOpcodeGroupSSE_ERR[0]; // BX_IA_ERROR
alias = 0;
}
if (sse_prefix != (group >> 4)) goto decode_done;
break;
}
@ -1679,11 +1663,11 @@ modrm_done:
break;
#if BX_SUPPORT_AVX
case BxSplitVexW64: // VexW is ignored in 32-bit mode
BX_ASSERT(had_vex != 0 || had_xop != 0);
BX_ASSERT(had_vex | had_xop);
OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[0]);
break;
case BxSplitVexW: // VexW is a real opcode extension
BX_ASSERT(had_vex != 0 || had_xop != 0);
BX_ASSERT(had_vex | had_xop);
OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[vex_w]);
break;
case BxSplitMod11B:
@ -1728,19 +1712,6 @@ modrm_done:
// the if() above after fetching the 2nd byte, so this path is
// taken in all cases if a modrm byte is NOT required.
const BxOpcodeInfo_t *OpcodeInfoPtr = &(BxOpcodeInfo32[index]);
#if BX_SUPPORT_AVX
if (had_vex != 0) {
if (had_vex < 0)
OpcodeInfoPtr = &BxOpcodeGroupSSE_ERR[0]; // BX_IA_ERROR
else
OpcodeInfoPtr = &BxOpcodeTableAVX[(b1-256) + 768*vex_l];
}
// XOP always has modrm byte
BX_ASSERT(had_xop == 0);
#endif
unsigned group = attr & BxGroupX;
if (group == BxPrefixSSE && sse_prefix) {
OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[sse_prefix-1]);
@ -1764,12 +1735,12 @@ modrm_done:
{
BX_INFO(("LOCK prefix unallowed (op1=0x%x, modrm=0x%02x)", b1, b2));
// replace execution function with undefined-opcode
ia_opcode = BX_IA_ERROR;
goto decode_done;
}
}
}
unsigned imm_mode = attr & BxImmediate;
imm_mode = attr & BxImmediate;
if (imm_mode) {
// make sure iptr was advanced after Ib(), Iw() and Id()
switch (imm_mode) {
@ -1920,7 +1891,7 @@ modrm_done:
i->setIaOpcode(ia_opcode);
#if BX_SUPPORT_AVX
if (had_vex > 0 || had_xop > 0) {
if (had_vex | had_xop) {
if (! use_vvv && vvv != 0) {
ia_opcode = BX_IA_ERROR;
}
@ -1936,6 +1907,8 @@ modrm_done:
}
#endif
decode_done:
if (mod_mem) {
i->execute1 = BxOpcodesTable[ia_opcode].execute1;
i->handlers.execute2 = BxOpcodesTable[ia_opcode].execute2;

View File

@ -1690,7 +1690,7 @@ BX_CPU_C::fetchDecode64(const Bit8u *iptr, bxInstruction_c *i, unsigned remainin
if (remainingInPage > 15) remainingInPage = 15;
unsigned remain = remainingInPage; // remain must be at least 1
unsigned b1, b2 = 0, ia_opcode = 0, alias = 0;
unsigned b1, b2 = 0, ia_opcode = BX_IA_ERROR, alias = 0, imm_mode = 0;
unsigned offset = 512, rex_r = 0, rex_x = 0, rex_b = 0;
unsigned rm = 0, mod = 0, nnn = 0, mod_mem = 0;
unsigned seg = BX_SEG_REG_DS, seg_override = BX_SEG_REG_NULL;
@ -1817,7 +1817,8 @@ fetch_b1:
unsigned index = b1+offset;
unsigned attr = BxOpcodeInfo64[index].Attr;
const BxOpcodeInfo_t *OpcodeInfoPtr = &(BxOpcodeInfo64[index]);
unsigned attr = OpcodeInfoPtr->Attr;
bx_bool has_modrm = 0;
@ -1825,10 +1826,10 @@ fetch_b1:
if ((b1 & ~0x01) == 0xc4) {
// VEX
had_vex = 1;
if (sse_prefix | rex_prefix) had_vex = -1;
if (! protected_mode()) had_vex = -1;
unsigned vex, vex_opcext = 1;
if (sse_prefix | rex_prefix)
goto decode_done;
unsigned vex, vex_opcext = 1;
if (remain != 0) {
remain--;
vex = *iptr++;
@ -1870,16 +1871,19 @@ fetch_b1:
return(-1);
b1 += 256 * vex_opcext;
if (b1 < 256 || b1 >= 1024) had_vex = -1;
else has_modrm = (b1 != 0x177); // if not VZEROUPPER/VZEROALL opcode
if (b1 < 256 || b1 >= 1024)
goto decode_done;
has_modrm = (b1 != 0x177); // if not VZEROUPPER/VZEROALL opcode
OpcodeInfoPtr = &BxOpcodeTableAVX[(b1-256) + 768*vex_l];
}
else if (b1 == 0x8f && (*iptr & 0x08) == 0x08) {
// 3 byte XOP prefix
had_xop = 1;
if (sse_prefix | rex_prefix) had_xop = -1;
if (! protected_mode()) had_xop = -1;
unsigned vex;
if (sse_prefix | rex_prefix)
goto decode_done;
unsigned vex;
if (remain > 2) {
remain -= 3;
vex = *iptr++; // fetch XOP2
@ -1893,7 +1897,7 @@ fetch_b1:
unsigned xop_opcext = (vex & 0x1f) - 8;
if (xop_opcext >= 3)
had_xop = -1;
goto decode_done;
vex = *iptr++; // fetch XOP3
@ -1907,11 +1911,13 @@ fetch_b1:
vex_l = (vex >> 2) & 0x1;
i->setVL(BX_VL128 + vex_l);
sse_prefix = vex & 0x3;
if (sse_prefix) had_xop = -1;
if (sse_prefix) goto decode_done;
b1 = *iptr++; // fetch new b1
has_modrm = 1;
b1 += 256 * xop_opcext;
OpcodeInfoPtr = &BxOpcodeTableXOP[b1 + 768*vex_l];
}
else
#endif
@ -2049,24 +2055,6 @@ get_32bit_displ:
modrm_done:
// Resolve ExecutePtr and additional opcode Attr
const BxOpcodeInfo_t *OpcodeInfoPtr = &(BxOpcodeInfo64[index]);
#if BX_SUPPORT_AVX
if (had_vex != 0) {
if (had_vex < 0)
OpcodeInfoPtr = &BxOpcodeGroupSSE_ERR[0]; // BX_IA_ERROR
else
OpcodeInfoPtr = &BxOpcodeTableAVX[(b1-256) + 768*vex_l];
}
else if (had_xop != 0) {
if (had_xop < 0)
OpcodeInfoPtr = &BxOpcodeGroupSSE_ERR[0]; // BX_IA_ERROR
else
OpcodeInfoPtr = &BxOpcodeTableXOP[b1 + 768*vex_l];
}
#endif
attr = OpcodeInfoPtr->Attr;
if (attr & BxAliasSSE) {
@ -2076,7 +2064,7 @@ modrm_done:
#if BX_SUPPORT_AVX
else if (attr & BxAliasVexW) {
// VexW alias could come with BxPrefixSSE
BX_ASSERT(had_vex != 0 || had_xop != 0);
BX_ASSERT(had_vex | had_xop);
alias = vex_w;
}
#endif
@ -2093,10 +2081,7 @@ modrm_done:
if (group < BxPrefixSSE) {
/* For opcodes with only one allowed SSE prefix */
if (sse_prefix != (group >> 4)) {
OpcodeInfoPtr = &BxOpcodeGroupSSE_ERR[0]; // BX_IA_ERROR
alias = 0;
}
if (sse_prefix != (group >> 4)) goto decode_done;
break;
}
@ -2110,7 +2095,7 @@ modrm_done:
#if BX_SUPPORT_AVX
case BxSplitVexW:
case BxSplitVexW64:
BX_ASSERT(had_vex != 0 || had_xop != 0);
BX_ASSERT(had_vex | had_xop);
OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[vex_w]);
break;
case BxSplitMod11B:
@ -2153,19 +2138,6 @@ modrm_done:
// the if() above after fetching the 2nd byte, so this path is
// taken in all cases if a modrm byte is NOT required.
const BxOpcodeInfo_t *OpcodeInfoPtr = &(BxOpcodeInfo64[index]);
#if BX_SUPPORT_AVX
if (had_vex != 0) {
if (had_vex < 0)
OpcodeInfoPtr = &BxOpcodeGroupSSE_ERR[0]; // BX_IA_ERROR
else
OpcodeInfoPtr = &BxOpcodeTableAVX[(b1-256) + 768*vex_l];
}
// XOP always has modrm byte
BX_ASSERT(had_xop == 0);
#endif
if (b1 == 0x90 && sse_prefix == SSE_PREFIX_F3) {
// attention: need to handle VEX separately, XOP never reach here
ia_opcode = BX_IA_PAUSE;
@ -2191,12 +2163,12 @@ modrm_done:
else {
BX_INFO(("LOCK prefix unallowed (op1=0x%x, modrm=0x%02x)", b1, b2));
// replace execution function with undefined-opcode
ia_opcode = BX_IA_ERROR;
goto decode_done;
}
}
}
unsigned imm_mode = attr & BxImmediate;
imm_mode = attr & BxImmediate;
if (imm_mode) {
// make sure iptr was advanced after Ib(), Iw() and Id()
switch (imm_mode) {
@ -2354,7 +2326,7 @@ modrm_done:
i->setIaOpcode(ia_opcode);
#if BX_SUPPORT_AVX
if (had_vex > 0 || had_xop > 0) {
if (had_vex | had_xop) {
if (! use_vvv && vvv != 0) {
ia_opcode = BX_IA_ERROR;
}
@ -2370,6 +2342,8 @@ modrm_done:
}
#endif
decode_done:
if (mod_mem) {
i->execute1 = BxOpcodesTable[ia_opcode].execute1;
i->handlers.execute2 = BxOpcodesTable[ia_opcode].execute2;

View File

@ -264,8 +264,8 @@ static const BxOpcodeInfo_t BxOpcodeGroupAVX256_0f29[3] = {
static const BxOpcodeInfo_t BxOpcodeGroupAVX_0f2a[3] = {
/* 66 */ { 0, BX_IA_ERROR },
/* F3 */ { BxSplitVexW, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_f30f2a },
/* F2 */ { BxSplitVexW, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_f20f2a }
/* F3 */ { BxSplitVexW64, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_f30f2a },
/* F2 */ { BxSplitVexW64, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_f20f2a }
};
static const BxOpcodeInfo_t BxOpcodeGroupAVX128_0f2bM[3] = {
@ -282,14 +282,14 @@ static const BxOpcodeInfo_t BxOpcodeGroupAVX256_0f2bM[3] = {
static const BxOpcodeInfo_t BxOpcodeGroupAVX_0f2c[3] = {
/* 66 */ { 0, BX_IA_ERROR },
/* F3 */ { BxSplitVexW, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_f30f2c },
/* F2 */ { BxSplitVexW, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_f20f2c }
/* F3 */ { BxSplitVexW64, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_f30f2c },
/* F2 */ { BxSplitVexW64, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_f20f2c }
};
static const BxOpcodeInfo_t BxOpcodeGroupAVX_0f2d[3] = {
/* 66 */ { 0, BX_IA_ERROR },
/* F3 */ { BxSplitVexW, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_f30f2d },
/* F2 */ { BxSplitVexW, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_f20f2d }
/* F3 */ { BxSplitVexW64, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_f30f2d },
/* F2 */ { BxSplitVexW64, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_f20f2d }
};
static const BxOpcodeInfo_t BxOpcodeGroupAVX_0f2e[3] = {
@ -395,7 +395,7 @@ static const BxOpcodeInfo_t BxOpcodeGroupAVX_0f7d[3] = {
};
static const BxOpcodeInfo_t BxOpcodeGroupAVX_0f7e[3] = {
/* 66 */ { BxSplitVexW, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_660f7e },
/* 66 */ { BxSplitVexW64, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_660f7e },
/* F3 */ { 0, BX_IA_VMOVQ_VqWq },
/* F2 */ { 0, BX_IA_ERROR }
};
@ -643,7 +643,7 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3*2] = {
/* 6B /0 */ { BxPrefixSSE66, BX_IA_V128_VPACKSSDW_VdqHdqWdq },
/* 6C /0 */ { BxPrefixSSE66, BX_IA_V128_VPUNPCKLQDQ_VdqHdqWdq },
/* 6D /0 */ { BxPrefixSSE66, BX_IA_V128_VPUNPCKHQDQ_VdqHdqWdq },
/* 6E /0 */ { BxSplitVexW, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f6e },
/* 6E /0 */ { BxSplitVexW64, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f6e },
/* 6F /0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupAVX128_0f6f },
/* 70 /0 */ { BxPrefixSSE | BxImmediate_Ib, BX_IA_ERROR, BxOpcodeGroupAVX128_0f70 },
/* 71 /0 */ { BxGroup12, BX_IA_ERROR, BxOpcodeInfoAVX128G12R },
@ -1071,7 +1071,7 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3*2] = {
/* 13 /0 */ { 0, BX_IA_ERROR },
/* 14 /0 */ { BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V128_VPEXTRB_EbdVdqIb },
/* 15 /0 */ { BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V128_VPEXTRW_EwdVdqIb },
/* 16 /0 */ { BxSplitVexW | BxImmediate_Ib, BX_IA_ERROR, BxOpcodeInfoAVX128_VexW_0f3a16 },
/* 16 /0 */ { BxSplitVexW64 | BxImmediate_Ib, BX_IA_ERROR, BxOpcodeInfoAVX128_VexW_0f3a16 },
/* 17 /0 */ { BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V128_VEXTRACTPS_EdVpsIb },
/* 18 /0 */ { 0, BX_IA_ERROR },
/* 19 /0 */ { 0, BX_IA_ERROR },

View File

@ -454,7 +454,7 @@ void bx_generic_cpuid_t::get_std_cpuid_xsave_leaf(Bit32u subfunction, cpuid_func
case 2: // AVX leaf
if (cpu->xcr0_suppmask & BX_XCR0_YMM_MASK) {
leaf->eax = 256;
leaf->ebx = 576;
leaf->ebx = XSAVE_YMM_STATE_OFFSET;
leaf->ecx = 0;
leaf->edx = 0;
break;

View File

@ -860,7 +860,10 @@ void BX_CPU_C::reset(unsigned source)
#endif
#if BX_SUUPORT_EVEX
for (n=0; n<8; n++) BX_WRITE_OPMASK(index, 0);
BX_CPU_THIS_PTR evex_ok = 0;
for (n=0; n<8; n++)
BX_WRITE_OPMASK(index, 0);
#endif
// Reset XMM state - unchanged on #INIT

View File

@ -422,12 +422,26 @@ void BX_CPU_C::handleAvxModeChange(void)
}
else {
if (! protected_mode() || ! BX_CPU_THIS_PTR cr4.get_OSXSAVE() ||
(~BX_CPU_THIS_PTR xcr0.val32 & (BX_XCR0_SSE_MASK | BX_XCR0_YMM_MASK)) != 0)
BX_CPU_THIS_PTR avx_ok = 0;
else
(~BX_CPU_THIS_PTR xcr0.val32 & (BX_XCR0_SSE_MASK | BX_XCR0_YMM_MASK)) != 0) {
BX_CPU_THIS_PTR avx_ok = 0;
}
else {
BX_CPU_THIS_PTR avx_ok = 1;
#if BX_SUPPORT_EVEX
if (~BX_CPU_THIS_PTR xcr0.val32 & (BX_XCR0_OPMASK_MASK | BX_XCR0_ZMM_HI256_MASK | BX_XCR0_HI_ZMM_MASK) != 0)
BX_CPU_THIS_PTR evex_ok = 0;
else
BX_CPU_THIS_PTR evex_ok = 1;
#endif
}
}
#if BX_SUPPORT_EVEX
if (! BX_CPU_THIS_PTR avx_ok)
BX_CPU_THIS_PTR evex_ok = 0;
#endif
updateFetchModeMask(); /* AVX_OK changed */
}
@ -448,6 +462,24 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BxNoAVX(bxInstruction_c *i)
}
#endif
#if BX_SUPPORT_EVEX
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BxNoEVEX(bxInstruction_c *i)
{
if (! protected_mode() || ! BX_CPU_THIS_PTR cr4.get_OSXSAVE())
exception(BX_UD_EXCEPTION, 0);
if (~BX_CPU_THIS_PTR xcr0.val32 & (BX_XCR0_SSE_MASK | BX_XCR0_YMM_MASK | BX_XCR0_OPMASK_MASK | BX_XCR0_ZMM_HI256_MASK | BX_XCR0_HI_ZMM_MASK))
exception(BX_UD_EXCEPTION, 0);
if(BX_CPU_THIS_PTR cr0.get_TS())
exception(BX_NM_EXCEPTION, 0);
BX_ASSERT(0);
BX_NEXT_TRACE(i); // keep compiler happy
}
#endif
#endif
void BX_CPU_C::handleCpuContextChange(void)

View File

@ -26,12 +26,6 @@
#include "cpu.h"
#define LOG_THIS BX_CPU_THIS_PTR
#define XSAVE_SSE_STATE_OFFSET 160
#define XSAVE_YMM_STATE_OFFSET 576
#define XSAVE_OPMASK_STATE_OFFSET 1088
#define XSAVE_ZMM_HI256_STATE_OFFSET 1152
#define XSAVE_HI_ZMM_STATE_OFFSET 1664
/* 0F AE /4 */
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::XSAVE(bxInstruction_c *i)
{
@ -426,7 +420,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::XRSTOR(bxInstruction_c *i)
/////////////////////////////////////////////////////////////////////////////
if ((features_load_enable_mask & BX_XCR0_ZMM_HI256_MASK) != 0)
{
if (header1 & BX_XCR0_ZMM_HI256_BIT) {
if (header1 & BX_XCR0_ZMM_HI256_MASK) {
// load upper part of ZMM registers from XSAVE area
for(index=0; index < 16; index++) {
read_virtual_ymmword(i->seg(), (eaddr+index*32+XSAVE_ZMM_HI256_STATE_OFFSET) & asize_mask,