Bochs/bochs/cpu/xsave.cc
Stanislav Shwartsman 36ba25847f Implemented last missed AVX-512 unsigned convert instructions
The only missed AVX-512 opcodes now are:

512.66.0F38.W0 13 VCVTPH2PS
512.66.0F3A.W0 1D VCVTPS2PH

512.66.0F38.WIG 21 VPMOVSXBD
512.66.0F38.WIG 22 VPMOVSXBQ
512.66.0F38.WIG 23 VPMOVSXWD
512.66.0F38.WIG 24 VPMOVSXWQ
512.66.0F38.W0  25 VPMOVSXDQ

512.66.0F38.WIG 31 VPMOVSZBD
512.66.0F38.WIG 32 VPMOVSZBQ
512.66.0F38.WIG 33 VPMOVSZWD
512.66.0F38.WIG 34 VPMOVSZWQ
512.66.0F38.W0  35 VPMOVSzDQ

512.66.0F38.W0 2C VSCALEFPS
512.66.0F38.W1 2C VSCALEFPD
NDS.LIG.66.0F38.W0 2D VSCALESS
NDS.LIG.66.0F38.W1 2D VSCALESD

512.66.0F38.W0 4C VRCP14PS
 512.66.0F38.W1 4C VRCP14PD
NDS.LIG.66.0F38.W0 4D VRCP14SS
 NDS.LIG.66.0F38.W1 4D VRCP14SD
512.66.0F38.W0 4E VRSQRT14PS
 512.66.0F38.W1 4E VRSQRT14PD
NDS.LIG.66.0F38.W0 4F VRSQRT14SS
 NDS.LIG.66.0F38.W1 4F VRSQRT14SD

NDS.512.66.0F3A.W0 03 VALIGND
NDS.512.66.0F3A.W1 03 VALIGNQ

512.66.0F3A.W0 08 VRNDSCALEPS
512.66.0F3A.W1 09 VRNDSCALEPD
NDS.LIG.66.0F3A.W1 0A VRNDSCALESS
NDS.LIG.66.0F3A.W1 0B VRNDSCALESD

512.66.0F3A.W0 19 VEXTRACTF32x4
512.66.0F3A.W1 1B VEXTRACTF64x4
512.66.0F3A.W0 39 VEXTRACTI32x4
512.66.0F3A.W1 3B VEXTRACTI64x4

512.66.0F3A.W0 26 VGETMANTPS
 512.66.0F3A.W1 26 VGETMANTPD
NDS.LIG.66.0F3A.W0 27 VGETMANTSS
 NDS.LIG.66.0F3A.W1 27 VGETMANTSD
2014-01-28 12:57:38 +00:00

549 lines
17 KiB
C++

/////////////////////////////////////////////////////////////////////////
// $Id$
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2008-2013 Stanislav Shwartsman
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
//
/////////////////////////////////////////////////////////////////////////
#define NEED_CPU_REG_SHORTCUTS 1
#include "bochs.h"
#include "cpu.h"
#define LOG_THIS BX_CPU_THIS_PTR
/* 0F AE /4 */
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::XSAVE(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
unsigned index;
BxPackedXmmRegister xmm;
BX_CPU_THIS_PTR prepareXSAVE();
BX_DEBUG(("XSAVE: save processor state XCR0=0x%08x", BX_CPU_THIS_PTR xcr0.get32()));
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
bx_address laddr = get_laddr(i->seg(), eaddr);
if (laddr & 0x3f) {
BX_ERROR(("XSAVE: access not aligned to 64-byte"));
exception(BX_GP_EXCEPTION, 0);
}
bx_address asize_mask = i->asize_mask();
//
// We will go feature-by-feature and not run over all XCR0 bits
//
Bit64u header1 = read_virtual_qword(i->seg(), (eaddr + 512) & asize_mask);
Bit32u features_save_enable_mask = BX_CPU_THIS_PTR xcr0.get32() & EAX;
/////////////////////////////////////////////////////////////////////////////
if ((features_save_enable_mask & BX_XCR0_FPU_MASK) != 0)
{
xmm.xmm16u(0) = BX_CPU_THIS_PTR the_i387.get_control_word();
xmm.xmm16u(1) = BX_CPU_THIS_PTR the_i387.get_status_word();
xmm.xmm16u(2) = pack_FPU_TW(BX_CPU_THIS_PTR the_i387.get_tag_word());
/* x87 FPU Opcode (16 bits) */
/* The lower 11 bits contain the FPU opcode, upper 5 bits are reserved */
xmm.xmm16u(3) = BX_CPU_THIS_PTR the_i387.foo;
/*
* x87 FPU IP Offset (32/64 bits)
* The contents of this field differ depending on the current
* addressing mode (16/32/64 bit) when the FXSAVE instruction was executed:
* + 64-bit mode - 64-bit IP offset
* + 32-bit mode - 32-bit IP offset
* + 16-bit mode - low 16 bits are IP offset; high 16 bits are reserved.
* x87 CS FPU IP Selector
* + 16 bit, in 16/32 bit mode only
*/
#if BX_SUPPORT_X86_64
if (i->os64L()) {
xmm.xmm64u(1) = (BX_CPU_THIS_PTR the_i387.fip);
}
else
#endif
{
xmm.xmm32u(2) = (Bit32u)(BX_CPU_THIS_PTR the_i387.fip);
xmm.xmm32u(3) = x87_get_FCS();
}
write_virtual_xmmword(i->seg(), eaddr, (Bit8u *) &xmm);
/*
* x87 FPU Instruction Operand (Data) Pointer Offset (32/64 bits)
* The contents of this field differ depending on the current
* addressing mode (16/32 bit) when the FXSAVE instruction was executed:
* + 64-bit mode - 64-bit offset
* + 32-bit mode - 32-bit offset
* + 16-bit mode - low 16 bits are offset; high 16 bits are reserved.
* x87 DS FPU Instruction Operand (Data) Pointer Selector
* + 16 bit, in 16/32 bit mode only
*/
#if BX_SUPPORT_X86_64
if (i->os64L()) {
write_virtual_qword(i->seg(), (eaddr + 16) & asize_mask, BX_CPU_THIS_PTR the_i387.fdp);
}
else
#endif
{
write_virtual_dword(i->seg(), (eaddr + 16) & asize_mask, (Bit32u) BX_CPU_THIS_PTR the_i387.fdp);
write_virtual_dword(i->seg(), (eaddr + 20) & asize_mask, x87_get_FDS());
}
/* do not touch MXCSR state */
/* store i387 register file */
for(index=0; index < 8; index++)
{
const floatx80 &fp = BX_READ_FPU_REG(index);
xmm.xmm64u(0) = fp.fraction;
xmm.xmm64u(1) = 0;
xmm.xmm16u(4) = fp.exp;
write_virtual_xmmword(i->seg(), (eaddr+index*16+32) & asize_mask, (Bit8u *) &xmm);
}
header1 |= BX_XCR0_FPU_MASK;
}
/////////////////////////////////////////////////////////////////////////////
if ((features_save_enable_mask & (BX_XCR0_SSE_MASK | BX_XCR0_YMM_MASK)) != 0)
{
// store MXCSR
write_virtual_dword(i->seg(), (eaddr + 24) & asize_mask, BX_MXCSR_REGISTER);
write_virtual_dword(i->seg(), (eaddr + 28) & asize_mask, MXCSR_MASK);
}
/////////////////////////////////////////////////////////////////////////////
if ((features_save_enable_mask & BX_XCR0_SSE_MASK) != 0)
{
/* store XMM register file */
for(index=0; index < 16; index++)
{
// save XMM8-XMM15 only in 64-bit mode
if (index < 8 || long64_mode()) {
write_virtual_xmmword(i->seg(),
(eaddr+index*16+XSAVE_SSE_STATE_OFFSET) & asize_mask, (Bit8u *)(&BX_READ_XMM_REG(index)));
}
}
header1 |= BX_XCR0_SSE_MASK;
}
#if BX_SUPPORT_AVX
/////////////////////////////////////////////////////////////////////////////
if ((features_save_enable_mask & BX_XCR0_YMM_MASK) != 0)
{
/* store AVX state */
for(index=0; index < 16; index++)
{
// save YMM8-YMM15 only in 64-bit mode
if (index < 8 || long64_mode()) {
write_virtual_xmmword(i->seg(),
(eaddr+index*16+XSAVE_YMM_STATE_OFFSET) & asize_mask, (Bit8u *)(&BX_READ_AVX_REG_LANE(index, 1)));
}
}
header1 |= BX_XCR0_YMM_MASK;
}
#endif
#if BX_SUPPORT_EVEX
/////////////////////////////////////////////////////////////////////////////
if ((features_save_enable_mask & BX_XCR0_OPMASK_MASK) != 0)
{
// save OPMASK state to XSAVE area
for(index=0; index < 8; index++) {
write_virtual_qword(i->seg(), (eaddr+index*8+XSAVE_OPMASK_STATE_OFFSET) & asize_mask, BX_READ_OPMASK(index));
}
header1 |= BX_XCR0_OPMASK_MASK;
}
/////////////////////////////////////////////////////////////////////////////
if ((features_save_enable_mask & BX_XCR0_ZMM_HI256_MASK) != 0)
{
// save upper part of ZMM registrs to XSAVE area
for(index=0; index < 16; index++) {
write_virtual_ymmword(i->seg(), (eaddr+index*32+XSAVE_ZMM_HI256_STATE_OFFSET) & asize_mask,
(Bit8u *)(&BX_READ_ZMM_REG_HI(index)));
}
header1 |= BX_XCR0_ZMM_HI256_MASK;
}
/////////////////////////////////////////////////////////////////////////////
if ((features_save_enable_mask & BX_XCR0_HI_ZMM_MASK) != 0)
{
// load ZMM state from XSAVE area
for(index=0; index < 16; index++) {
write_virtual_zmmword(i->seg(), (eaddr+index*64+XSAVE_HI_ZMM_STATE_OFFSET) & asize_mask,
(Bit8u *)(&BX_READ_AVX_REG(index+16)));
}
header1 |= BX_XCR0_HI_ZMM_MASK;
}
#endif
// always update header to 'dirty' state
write_virtual_qword(i->seg(), (eaddr + 512) & asize_mask, header1);
#endif
BX_NEXT_INSTR(i);
}
/* 0F AE /5 */
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::XRSTOR(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
unsigned index;
BxPackedXmmRegister xmm;
BX_CPU_THIS_PTR prepareXSAVE();
BX_DEBUG(("XRSTOR: restore processor state XCR0=0x%08x", BX_CPU_THIS_PTR xcr0.get32()));
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
bx_address laddr = get_laddr(i->seg(), eaddr);
if (laddr & 0x3f) {
BX_ERROR(("XRSTOR: access not aligned to 64-byte"));
exception(BX_GP_EXCEPTION, 0);
}
bx_address asize_mask = i->asize_mask();
Bit64u header1 = read_virtual_qword(i->seg(), (eaddr + 512) & asize_mask);
Bit64u header2 = read_virtual_qword(i->seg(), (eaddr + 520) & asize_mask);
Bit64u header3 = read_virtual_qword(i->seg(), (eaddr + 528) & asize_mask);
if ((~BX_CPU_THIS_PTR xcr0.get32() & header1) != 0) {
BX_ERROR(("XRSTOR: Broken header state"));
exception(BX_GP_EXCEPTION, 0);
}
if (header2 != 0 || header3 != 0) {
BX_ERROR(("XRSTOR: Reserved header state is not '0"));
exception(BX_GP_EXCEPTION, 0);
}
//
// We will go feature-by-feature and not run over all XCR0 bits
//
Bit32u features_load_enable_mask = BX_CPU_THIS_PTR xcr0.get32() & EAX;
/////////////////////////////////////////////////////////////////////////////
if ((features_load_enable_mask & BX_XCR0_FPU_MASK) != 0)
{
if (header1 & BX_XCR0_FPU_MASK) {
// load FPU state from XSAVE area
read_virtual_xmmword(i->seg(), eaddr, (Bit8u *) &xmm);
BX_CPU_THIS_PTR the_i387.cwd = xmm.xmm16u(0);
BX_CPU_THIS_PTR the_i387.swd = xmm.xmm16u(1);
BX_CPU_THIS_PTR the_i387.tos = (xmm.xmm16u(1) >> 11) & 0x07;
/* always set bit 6 as '1 */
BX_CPU_THIS_PTR the_i387.cwd =
(BX_CPU_THIS_PTR the_i387.cwd & ~FPU_CW_Reserved_Bits) | 0x0040;
/* Restore x87 FPU Opcode */
/* The lower 11 bits contain the FPU opcode, upper 5 bits are reserved */
BX_CPU_THIS_PTR the_i387.foo = xmm.xmm16u(3) & 0x7FF;
/* Restore x87 FPU IP */
#if BX_SUPPORT_X86_64
if (i->os64L()) {
BX_CPU_THIS_PTR the_i387.fip = xmm.xmm64u(1);
BX_CPU_THIS_PTR the_i387.fcs = 0;
}
else
#endif
{
BX_CPU_THIS_PTR the_i387.fip = xmm.xmm32u(2);
BX_CPU_THIS_PTR the_i387.fcs = xmm.xmm16u(6);
}
Bit32u tag_byte = xmm.xmmubyte(4);
/* Restore x87 FPU DP */
read_virtual_xmmword(i->seg(), (eaddr + 16) & asize_mask, (Bit8u *) &xmm);
#if BX_SUPPORT_X86_64
if (i->os64L()) {
BX_CPU_THIS_PTR the_i387.fdp = xmm.xmm64u(0);
BX_CPU_THIS_PTR the_i387.fds = 0;
}
else
#endif
{
BX_CPU_THIS_PTR the_i387.fdp = xmm.xmm32u(0);
BX_CPU_THIS_PTR the_i387.fds = xmm.xmm16u(2);
}
/* load i387 register file */
for(index=0; index < 8; index++)
{
floatx80 reg;
reg.fraction = read_virtual_qword(i->seg(), (eaddr+index*16+32) & asize_mask);
reg.exp = read_virtual_word (i->seg(), (eaddr+index*16+40) & asize_mask);
// update tag only if it is not empty
BX_WRITE_FPU_REGISTER_AND_TAG(reg,
IS_TAG_EMPTY(index) ? FPU_Tag_Empty : FPU_tagof(reg), index);
}
/* Restore floating point tag word - see desription for FXRSTOR instruction */
BX_CPU_THIS_PTR the_i387.twd = unpack_FPU_TW(tag_byte);
/* check for unmasked exceptions */
if (FPU_PARTIAL_STATUS & ~FPU_CONTROL_WORD & FPU_CW_Exceptions_Mask) {
/* set the B and ES bits in the status-word */
FPU_PARTIAL_STATUS |= FPU_SW_Summary | FPU_SW_Backward;
}
else {
/* clear the B and ES bits in the status-word */
FPU_PARTIAL_STATUS &= ~(FPU_SW_Summary | FPU_SW_Backward);
}
}
else {
// initialize FPU with reset values
BX_CPU_THIS_PTR the_i387.init();
for (index=0;index<8;index++) {
static floatx80 reg = { 0, 0 };
BX_FPU_REG(index) = reg;
}
}
}
/////////////////////////////////////////////////////////////////////////////
if ((features_load_enable_mask & (BX_XCR0_SSE_MASK | BX_XCR0_YMM_MASK)) != 0)
{
Bit32u new_mxcsr = read_virtual_dword(i->seg(), (eaddr + 24) & asize_mask);
if(new_mxcsr & ~MXCSR_MASK)
exception(BX_GP_EXCEPTION, 0);
BX_MXCSR_REGISTER = new_mxcsr;
}
/////////////////////////////////////////////////////////////////////////////
if ((features_load_enable_mask & BX_XCR0_SSE_MASK) != 0)
{
if (header1 & BX_XCR0_SSE_MASK) {
// load SSE state from XSAVE area
for(index=0; index < 16; index++)
{
// restore XMM8-XMM15 only in 64-bit mode
if (index < 8 || long64_mode()) {
read_virtual_xmmword(i->seg(),
(eaddr+index*16+XSAVE_SSE_STATE_OFFSET) & asize_mask, (Bit8u *)(&BX_READ_XMM_REG(index)));
}
}
}
else {
// initialize SSE with reset values
for(index=0; index < 16; index++) {
// set XMM8-XMM15 only in 64-bit mode
if (index < 8 || long64_mode())
BX_CLEAR_XMM_REG(index);
}
}
}
#if BX_SUPPORT_AVX
/////////////////////////////////////////////////////////////////////////////
if ((features_load_enable_mask & BX_XCR0_YMM_MASK) != 0)
{
if (header1 & BX_XCR0_YMM_MASK) {
// load AVX state from XSAVE area
for(index=0; index < 16; index++)
{
// restore YMM8-YMM15 only in 64-bit mode
if (index < 8 || long64_mode()) {
read_virtual_xmmword(i->seg(),
(eaddr+index*16+XSAVE_YMM_STATE_OFFSET) & asize_mask, (Bit8u *)(&BX_READ_AVX_REG_LANE(index, 1)));
}
}
}
else {
// initialize upper part of AVX registers with reset values
for(index=0; index < 16; index++) {
// set YMM8-YMM15 only in 64-bit mode
if (index < 8 || long64_mode()) BX_CLEAR_AVX_HIGH128(index);
}
}
}
#endif
#if BX_SUPPORT_EVEX
/////////////////////////////////////////////////////////////////////////////
if ((features_load_enable_mask & BX_XCR0_OPMASK_MASK) != 0)
{
if (header1 & BX_XCR0_OPMASK_MASK) {
// load opmask registers from XSAVE area
for(index=0; index < 8; index++) {
Bit64u opmask = read_virtual_qword(i->seg(), (eaddr+index*8+XSAVE_OPMASK_STATE_OFFSET) & asize_mask);
BX_WRITE_OPMASK(index, opmask);
}
}
else {
// initialize opmask registers with reset values
for(index=0; index < 8; index++) {
BX_WRITE_OPMASK(index, 0);
}
}
}
/////////////////////////////////////////////////////////////////////////////
if ((features_load_enable_mask & BX_XCR0_ZMM_HI256_MASK) != 0)
{
if (header1 & BX_XCR0_ZMM_HI256_MASK) {
// load upper part of ZMM registers from XSAVE area
for(index=0; index < 16; index++) {
read_virtual_ymmword(i->seg(), (eaddr+index*32+XSAVE_ZMM_HI256_STATE_OFFSET) & asize_mask,
(Bit8u *)(&BX_READ_ZMM_REG_HI(index)));
}
}
else {
// initialize upper part of ZMM registers with reset values
for(index=0; index < 16; index++) {
BX_CLEAR_AVX_HIGH256(index);
}
}
}
/////////////////////////////////////////////////////////////////////////////
if ((features_load_enable_mask & BX_XCR0_HI_ZMM_MASK) != 0)
{
if (header1 & BX_XCR0_HI_ZMM_MASK) {
// load ZMM state from XSAVE area
for(index=0; index < 16; index++) {
read_virtual_zmmword(i->seg(), (eaddr+index*64+XSAVE_HI_ZMM_STATE_OFFSET) & asize_mask,
(Bit8u *)(&BX_READ_AVX_REG(index+16)));
}
}
else {
// initialize upper part of ZMM registers with reset values
for(index=0; index < 16; index++) {
BX_CLEAR_AVX_REG(index+16);
}
}
}
#endif
#endif
BX_NEXT_INSTR(i);
}
/* 0F 01 D0 */
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::XGETBV(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
if(! BX_CPU_THIS_PTR cr4.get_OSXSAVE()) {
BX_ERROR(("XGETBV: OSXSAVE feature is not enabled in CR4!"));
exception(BX_UD_EXCEPTION, 0);
}
// For now hardcoded handle only XCR0 register, it should take a few
// years until extension will be required
if (ECX != 0) {
BX_ERROR(("XGETBV: Invalid XCR register %d", ECX));
exception(BX_GP_EXCEPTION, 0);
}
RDX = 0;
RAX = BX_CPU_THIS_PTR xcr0.get32();
#endif
BX_NEXT_INSTR(i);
}
/* 0F 01 D1 */
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::XSETBV(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
if(! BX_CPU_THIS_PTR cr4.get_OSXSAVE()) {
BX_ERROR(("XSETBV: OSXSAVE feature is not enabled in CR4!"));
exception(BX_UD_EXCEPTION, 0);
}
#if BX_SUPPORT_VMX
if (BX_CPU_THIS_PTR in_vmx_guest) {
VMexit(VMX_VMEXIT_XSETBV, 0);
}
#endif
#if BX_SUPPORT_SVM
if (BX_CPU_THIS_PTR in_svm_guest) {
if (SVM_INTERCEPT(SVM_INTERCEPT1_XSETBV)) Svm_Vmexit(SVM_VMEXIT_XSETBV);
}
#endif
// CPL is always 3 in vm8086 mode
if (/* v8086_mode() || */ CPL != 0) {
BX_ERROR(("XSETBV: The current priveledge level is not 0"));
exception(BX_GP_EXCEPTION, 0);
}
// For now hardcoded handle only XCR0 register, it should take a few
// years until extension will be required
if (ECX != 0) {
BX_ERROR(("XSETBV: Invalid XCR register %d", ECX));
exception(BX_GP_EXCEPTION, 0);
}
if (EDX != 0 || (EAX & ~BX_CPU_THIS_PTR xcr0_suppmask) != 0 || (EAX & BX_XCR0_FPU_MASK) == 0) {
BX_ERROR(("XSETBV: Attempt to change reserved bits"));
exception(BX_GP_EXCEPTION, 0);
}
#if BX_SUPPORT_AVX
if ((EAX & (BX_XCR0_YMM_MASK | BX_XCR0_SSE_MASK)) == BX_XCR0_YMM_MASK) {
BX_ERROR(("XSETBV: Attempt to enable AVX without SSE"));
exception(BX_GP_EXCEPTION, 0);
}
#endif
#if BX_SUPPORT_EVEX
if (EAX & (BX_XCR0_OPMASK_MASK | BX_XCR0_ZMM_HI256_MASK | BX_XCR0_HI_ZMM_MASK)) {
Bit32u avx512_state_mask = (BX_XCR0_FPU_MASK | BX_XCR0_SSE_MASK | BX_XCR0_YMM_MASK | BX_XCR0_OPMASK_MASK | BX_XCR0_ZMM_HI256_MASK | BX_XCR0_HI_ZMM_MASK);
if ((EAX & avx512_state_mask) != avx512_state_mask) {
BX_ERROR(("XSETBV: Illegal attempt to enable AVX-512 state"));
exception(BX_GP_EXCEPTION, 0);
}
}
#endif
BX_CPU_THIS_PTR xcr0.set32(EAX);
#if BX_SUPPORT_AVX
handleAvxModeChange();
#endif
#endif // BX_CPU_LEVEL >= 6
BX_NEXT_TRACE(i);
}