From ba5289053823c18c1cdd1289d6f428dcf976c0dd Mon Sep 17 00:00:00 2001 From: Stanislav Shwartsman Date: Sat, 18 Jan 2014 20:10:05 +0000 Subject: [PATCH] implemented few more AVX-512 floating point convert instructions --- bochs/cpu/avx512_pfp.cc | 88 +++++++++++++++++++++++++++++++++--- bochs/cpu/avx_pfp.cc | 53 +++++++++++++--------- bochs/cpu/cpu.h | 5 ++ bochs/cpu/fetchdecode.cc | 2 +- bochs/cpu/fetchdecode64.cc | 2 +- bochs/cpu/fetchdecode_evex.h | 20 ++++++-- bochs/cpu/gather.cc | 8 ++-- bochs/cpu/ia_opcodes.h | 16 +++++-- bochs/cpu/xop.cc | 6 +-- 9 files changed, 156 insertions(+), 44 deletions(-) diff --git a/bochs/cpu/avx512_pfp.cc b/bochs/cpu/avx512_pfp.cc index d1b51bd7d..b28928e91 100644 --- a/bochs/cpu/avx512_pfp.cc +++ b/bochs/cpu/avx512_pfp.cc @@ -518,20 +518,66 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUDQ2PD_MASK_VpdWdqR(bxInstruct BX_NEXT_INSTR(i); } -#if 0 +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSD2SS_MASK_VssWsdR(bxInstruction_c *i) +{ + BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1()); + + if (BX_SCALAR_ELEMENT_MASK(i->opmask())) { + float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2()); + + float_status_t status; + mxcsr_to_softfloat_status_word(status, MXCSR); + softfloat_status_word_rc_override(status, i); + op1.xmm32u(0) = float64_to_float32(op2, status); + check_exceptionsSSE(get_exception_flags(status)); + } + else { + if (i->isZeroMasking()) + op1.xmm32u(0) = 0; + else + op1.xmm32u(0) = BX_READ_XMM_REG_LO_DWORD(i->dst()); + } + + BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1); + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSS2SD_MASK_VsdWssR(bxInstruction_c *i) +{ + BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1()); + + if (BX_SCALAR_ELEMENT_MASK(i->opmask())) { + float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2()); + + float_status_t status; + mxcsr_to_softfloat_status_word(status, MXCSR); + softfloat_status_word_rc_override(status, i); + op1.xmm64u(0) = float32_to_float64(op2, status); + check_exceptionsSSE(get_exception_flags(status)); + } + else { + if (i->isZeroMasking()) + op1.xmm64u(0) = 0; + else + op1.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->dst()); + } + + BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1); + BX_NEXT_INSTR(i); +} BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PD_MASK_VpdWpsR(bxInstruction_c *i) { BxPackedAvxRegister result; BxPackedYmmRegister op = BX_READ_YMM_REG(i->src()); - unsigned mask = BX_READ_8BIT_OPMASK(i->opmask()); + unsigned opmask = BX_READ_8BIT_OPMASK(i->opmask()); unsigned len = i->getVL(); float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); softfloat_status_word_rc_override(status, i); - for (unsigned n=0, tmp_mask = mask; n < QWORD_ELEMENTS(len); n++, tmp_mask >>= 1) { + for (unsigned n=0, tmp_mask = opmask; n < QWORD_ELEMENTS(len); n++, tmp_mask >>= 1) { if (tmp_mask & 0x1) result.vmm64u(n) = float32_to_float64(op.ymm32u(n), status); else @@ -541,18 +587,46 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PD_MASK_VpdWpsR(bxInstructi check_exceptionsSSE(get_exception_flags(status)); if (! i->isZeroMasking()) { - for (unsigned n=0; n < len; n++, mask >>= 2) - xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), mask); + for (unsigned n=0; n < len; n++, opmask >>= 2) + xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &result.vmm128(n), opmask); BX_CLEAR_AVX_REGZ(i->dst(), len); } else { - BX_WRITE_AVX_REGZ(i->dst(), op, len); + BX_WRITE_AVX_REGZ(i->dst(), result, len); } BX_NEXT_INSTR(i); } -#endif +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPD2PS_MASK_VpsWpdR(bxInstruction_c *i) +{ + BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result = BX_READ_AVX_REG(i->dst()); + unsigned opmask = BX_READ_8BIT_OPMASK(i->opmask()); + unsigned len = i->getVL(); + + float_status_t status; + mxcsr_to_softfloat_status_word(status, MXCSR); + softfloat_status_word_rc_override(status, i); + + for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, opmask >>= 1) { + if (opmask & 0x1) + result.vmm32u(n) = float64_to_float32(op.vmm64u(n), status); + else if (i->isZeroMasking()) + result.vmm32u(n) = 0; + } + + check_exceptionsSSE(get_exception_flags(status)); + + if (len == BX_VL128) { + result.vmm64u(1) = 0; + BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), result.vmm128(0)); + } + else { + BX_WRITE_AVX_REGZ(i->dst(), result, len-1); // write half vector + } + + BX_NEXT_INSTR(i); +} // fixup diff --git a/bochs/cpu/avx_pfp.cc b/bochs/cpu/avx_pfp.cc index 73badbac8..7505ff83a 100644 --- a/bochs/cpu/avx_pfp.cc +++ b/bochs/cpu/avx_pfp.cc @@ -2,7 +2,7 @@ // $Id$ ///////////////////////////////////////////////////////////////////////// // -// Copyright (c) 2011-2013 Stanislav Shwartsman +// Copyright (c) 2011-2014 Stanislav Shwartsman // Written by Stanislav Shwartsman [sshwarts at sourceforge net] // // This library is free software; you can redistribute it and/or @@ -270,7 +270,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VRSQRTPS_VpsWpsR(bxInstruction_c * BxPackedYmmRegister op = BX_READ_YMM_REG(i->src()); unsigned len = i->getVL(); - for (unsigned n=0; n < (4*len); n++) + for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) op.ymm32u(n) = approximate_rsqrt(op.ymm32u(n)); BX_WRITE_YMM_REGZ_VLEN(i->dst(), op, len); @@ -297,7 +297,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VRCPPS_VpsWpsR(bxInstruction_c *i) BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()); unsigned len = i->getVL(); - for (unsigned n=0; n < (4*len); n++) + for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) op.vmm32u(n) = approximate_rcp(op.vmm32u(n)); BX_WRITE_AVX_REGZ(i->dst(), op, len); @@ -483,8 +483,9 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PD_VpdWpsR(bxInstruction_c float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); + softfloat_status_word_rc_override(status, i); - for (unsigned n=0; n < (2*len); n++) { + for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) { result.ymm64u(n) = float32_to_float64(op.xmm32u(n), status); } @@ -500,14 +501,15 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPD2PS_VpsWpdR(bxInstruction_c { BxPackedYmmRegister op = BX_READ_YMM_REG(i->src()); BxPackedXmmRegister result; + unsigned len = i->getVL(); result.xmm64u(1) = 0; /* clear upper part of the result for case of VL128 */ float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); - unsigned len = i->getVL(); + softfloat_status_word_rc_override(status, i); - for (unsigned n=0; n < (2*len); n++) { + for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) { result.xmm32u(n) = float64_to_float32(op.ymm64u(n), status); } @@ -525,6 +527,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSS2SD_VsdWssR(bxInstruction_c float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); + softfloat_status_word_rc_override(status, i); op1.xmm64u(0) = float32_to_float64(op2, status); check_exceptionsSSE(get_exception_flags(status)); @@ -541,6 +544,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSD2SS_VssWsdR(bxInstruction_c float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); + softfloat_status_word_rc_override(status, i); op1.xmm32u(0) = float64_to_float32(op2, status); check_exceptionsSSE(get_exception_flags(status)); @@ -557,8 +561,9 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTDQ2PS_VpsWdqR(bxInstruction_c float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); + softfloat_status_word_rc_override(status, i); - for (unsigned n=0; n < (4*len); n++) { + for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) { op.ymm32u(n) = int32_to_float32(op.ymm32u(n), status); } @@ -577,8 +582,9 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2DQ_VdqWpsR(bxInstruction_c float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); + softfloat_status_word_rc_override(status, i); - for (unsigned n=0; n < (4*len); n++) { + for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) { op.ymm32u(n) = float32_to_int32(op.ymm32u(n), status); } @@ -597,8 +603,9 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTTPS2DQ_VdqWpsR(bxInstruction_c float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); + softfloat_status_word_rc_override(status, i); - for (unsigned n=0; n < (4*len); n++) { + for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) { op.ymm32u(n) = float32_to_int32_round_to_zero(op.ymm32u(n), status); } @@ -1019,7 +1026,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCMPPS_VpsHpsWpsIbR(bxInstruction_ mxcsr_to_softfloat_status_word(status, MXCSR); int ib = i->Ib() & 0x1F; - for (unsigned n=0; n < (4*len); n++) { + for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) { op1.ymm32u(n) = avx_compare32[ib](op1.ymm32u(n), op2.ymm32u(n), status) ? 0xFFFFFFFF : 0; } @@ -1039,7 +1046,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCMPPD_VpdHpdWpdIbR(bxInstruction_ mxcsr_to_softfloat_status_word(status, MXCSR); int ib = i->Ib() & 0x1F; - for (unsigned n=0; n < (2*len); n++) { + for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) { op1.ymm64u(n) = avx_compare64[ib](op1.ymm64u(n), op2.ymm64u(n), status) ? BX_CONST64(0xFFFFFFFFFFFFFFFF) : 0; } @@ -1141,14 +1148,15 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTTPD2DQ_VqWpdR(bxInstruction_c { BxPackedYmmRegister op = BX_READ_YMM_REG(i->src()); BxPackedXmmRegister result; + unsigned len = i->getVL(); result.xmm64u(1) = 0; /* clear upper part of the result for case of VL128 */ float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); - unsigned len = i->getVL(); + softfloat_status_word_rc_override(status, i); - for (unsigned n=0; n < (2*len); n++) { + for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) { result.xmm32u(n) = float64_to_int32_round_to_zero(op.ymm64u(n), status); } @@ -1163,14 +1171,15 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPD2DQ_VqWpdR(bxInstruction_c * { BxPackedYmmRegister op = BX_READ_YMM_REG(i->src()); BxPackedXmmRegister result; + unsigned len = i->getVL(); result.xmm64u(1) = 0; /* clear upper part of the result for case of VL128 */ float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); - unsigned len = i->getVL(); + softfloat_status_word_rc_override(status, i); - for (unsigned n=0; n < (2*len); n++) { + for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) { result.xmm32u(n) = float64_to_int32(op.ymm64u(n), status); } @@ -1187,7 +1196,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTDQ2PD_VpdWqR(bxInstruction_c * BxPackedXmmRegister op = BX_READ_XMM_REG(i->src()); unsigned len = i->getVL(); - for (unsigned n=0; n < (2*len); n++) { + for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) { result.ymm64u(n) = int32_to_float64(op.xmm32u(n)); } @@ -1204,7 +1213,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VTESTPS_VpsWpsR(bxInstruction_c *i unsigned result = EFlagsZFMask | EFlagsCFMask; - for (unsigned n=0; n < (2*len); n++) { + for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) { if ((op2.ymm64u(n) & op1.ymm64u(n) & BX_CONST64(0x8000000080000000)) != 0) result &= ~EFlagsZFMask; @@ -1225,7 +1234,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VTESTPD_VpdWpdR(bxInstruction_c *i unsigned result = EFlagsZFMask | EFlagsCFMask; - for (unsigned n=0; n < (2*len); n++) { + for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) { if ((op2.ymm64u(n) & op1.ymm64u(n) & BX_CONST64(0x8000000000000000)) != 0) result &= ~EFlagsZFMask; @@ -1255,7 +1264,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VROUNDPS_VpsWpsIbR(bxInstruction_c if (control & 0x8) status.float_suppress_exception |= float_flag_inexact; - for(unsigned n=0; n < (4*len); n++) { + for(unsigned n=0; n < DWORD_ELEMENTS(len); n++) { op.ymm32u(n) = float32_round_to_int(op.ymm32u(n), status); } @@ -1283,7 +1292,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VROUNDPD_VpdWpdIbR(bxInstruction_c if (control & 0x8) status.float_suppress_exception |= float_flag_inexact; - for(unsigned n=0; n < (2*len); n++) { + for(unsigned n=0; n < QWORD_ELEMENTS(len); n++) { op.ymm64u(n) = float64_round_to_int(op.ymm64u(n), status); } @@ -1401,7 +1410,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPH2PS_VpsWpsR(bxInstruction_c // no denormal exception is reported on MXCSR status.float_suppress_exception = float_flag_denormal; - for (unsigned n=0; n < (4*len); n++) { + for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) { result.ymm32u(n) = float16_to_float32(op.xmm16u(n), status); } @@ -1431,7 +1440,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PH_WpsVpsIb(bxInstruction_c if ((control & 0x4) == 0) status.float_rounding_mode = control & 0x3; - for (unsigned n=0; n < (4*len); n++) { + for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) { result.xmm16u(n) = float32_to_float16(op.ymm32u(n), status); } diff --git a/bochs/cpu/cpu.h b/bochs/cpu/cpu.h index 040d3fb4a..a6fb762ae 100644 --- a/bochs/cpu/cpu.h +++ b/bochs/cpu/cpu.h @@ -3240,6 +3240,11 @@ public: // for now... BX_SMF BX_INSF_TYPE VCVTUDQ2PD_VpdWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VCVTUDQ2PD_MASK_VpdWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VCVTPD2PS_MASK_VpsWpdR(bxInstruction_c *i) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VCVTPS2PD_MASK_VpdWpsR(bxInstruction_c *i) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VCVTSS2SD_MASK_VsdWssR(bxInstruction_c *i) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VCVTSD2SS_MASK_VssWsdR(bxInstruction_c *i) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPADDD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VPSUBD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VPANDD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); diff --git a/bochs/cpu/fetchdecode.cc b/bochs/cpu/fetchdecode.cc index d300fd4bb..0474ec979 100644 --- a/bochs/cpu/fetchdecode.cc +++ b/bochs/cpu/fetchdecode.cc @@ -2,7 +2,7 @@ // $Id$ ///////////////////////////////////////////////////////////////////////// // -// Copyright (C) 2001-2013 The Bochs Project +// Copyright (C) 2001-2014 The Bochs Project // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public diff --git a/bochs/cpu/fetchdecode64.cc b/bochs/cpu/fetchdecode64.cc index 257ab4065..a50082bc7 100644 --- a/bochs/cpu/fetchdecode64.cc +++ b/bochs/cpu/fetchdecode64.cc @@ -2,7 +2,7 @@ // $Id$ ///////////////////////////////////////////////////////////////////////// // -// Copyright (C) 2001-2013 The Bochs Project +// Copyright (C) 2001-2014 The Bochs Project // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public diff --git a/bochs/cpu/fetchdecode_evex.h b/bochs/cpu/fetchdecode_evex.h index 02efd3838..be86455a8 100644 --- a/bochs/cpu/fetchdecode_evex.h +++ b/bochs/cpu/fetchdecode_evex.h @@ -2,7 +2,7 @@ // $Id$ ///////////////////////////////////////////////////////////////////////// // -// Copyright (c) 2013 Stanislav Shwartsman +// Copyright (c) 2013-2014 Stanislav Shwartsman // Written by Stanislav Shwartsman [sshwarts at sourceforge net] // // This library is free software; you can redistribute it and/or @@ -247,6 +247,20 @@ static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f59_Mask[4] = { /* F2 */ { BxVexW1, BX_IA_V512_VMULSD_VsdHpdWsd_Kmask } }; +static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f5a[4] = { + /* -- */ { BxVexW0, BX_IA_V512_VCVTPS2PD_VpdWps }, + /* 66 */ { BxVexW1, BX_IA_V512_VCVTPD2PS_VpsWpd }, + /* F3 */ { BxVexW0, BX_IA_V512_VCVTSS2SD_VsdWss }, + /* F2 */ { BxVexW1, BX_IA_V512_VCVTSD2SS_VssWsd } +}; + +static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f5a_Mask[4] = { + /* -- */ { BxVexW0, BX_IA_V512_VCVTPS2PD_VpdWps_Kmask }, + /* 66 */ { BxVexW1, BX_IA_V512_VCVTPD2PS_VpsWpd_Kmask }, + /* F3 */ { BxVexW0, BX_IA_V512_VCVTSS2SD_VsdWss_Kmask }, + /* F2 */ { BxVexW1, BX_IA_V512_VCVTSD2SS_VssWsd_Kmask } +}; + static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f5c[4] = { /* -- */ { BxVexW0, BX_IA_V512_VSUBPS_VpsHpsWps }, /* 66 */ { BxVexW1, BX_IA_V512_VSUBPD_VpdHpdWpd }, @@ -636,8 +650,8 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = { /* 58 */ { BxPrefixSSE4, BX_IA_ERROR, BxOpcodeGroupEVEX_0f58_Mask }, /* 59 k0 */ { BxPrefixSSE4, BX_IA_ERROR, BxOpcodeGroupEVEX_0f59 }, /* 59 */ { BxPrefixSSE4, BX_IA_ERROR, BxOpcodeGroupEVEX_0f59_Mask }, - /* 5A k0 */ { 0, BX_IA_ERROR }, - /* 5A */ { 0, BX_IA_ERROR }, + /* 5A k0 */ { BxPrefixSSE4, BX_IA_ERROR, BxOpcodeGroupEVEX_0f5a }, + /* 5A */ { BxPrefixSSE4, BX_IA_ERROR, BxOpcodeGroupEVEX_0f5a_Mask }, /* 5B k0 */ { 0, BX_IA_ERROR }, /* 5B */ { 0, BX_IA_ERROR }, /* 5C k0 */ { BxPrefixSSE4, BX_IA_ERROR, BxOpcodeGroupEVEX_0f5c }, diff --git a/bochs/cpu/gather.cc b/bochs/cpu/gather.cc index 6756af411..2bad05939 100644 --- a/bochs/cpu/gather.cc +++ b/bochs/cpu/gather.cc @@ -271,7 +271,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VGATHERDPS_MASK_VpsVSib(bxInstruct // 256 bit => 8 // 512 bit => 16 - unsigned n, len = i->getVL(), num_elements = 4 * len; + unsigned n, len = i->getVL(), num_elements = DWORD_ELEMENTS(len); #if BX_SUPPORT_ALIGNMENT_CHECK unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask; @@ -312,7 +312,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VGATHERQPS_MASK_VpsVSib(bxInstruct BxPackedAvxRegister *dest = &BX_AVX_REG(i->dst()); Bit64u opmask = BX_READ_OPMASK(i->opmask()), mask; - unsigned n, len = i->getVL(), num_elements = 2 * len; + unsigned n, len = i->getVL(), num_elements = QWORD_ELEMENTS(len); #if BX_SUPPORT_ALIGNMENT_CHECK unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask; @@ -357,7 +357,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VGATHERDPD_MASK_VpdVSib(bxInstruct BxPackedAvxRegister *dest = &BX_AVX_REG(i->dst()); Bit64u opmask = BX_READ_OPMASK(i->opmask()), mask; - unsigned n, len = i->getVL(), num_elements = 2 * len; + unsigned n, len = i->getVL(), num_elements = QWORD_ELEMENTS(len); #if BX_SUPPORT_ALIGNMENT_CHECK unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask; @@ -398,7 +398,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VGATHERQPD_MASK_VpdVSib(bxInstruct BxPackedAvxRegister *dest = &BX_AVX_REG(i->dst()); Bit64u opmask = BX_READ_OPMASK(i->opmask()), mask; - unsigned n, len = i->getVL(), num_elements = 2 * len; + unsigned n, len = i->getVL(), num_elements = QWORD_ELEMENTS(len); #if BX_SUPPORT_ALIGNMENT_CHECK unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask; diff --git a/bochs/cpu/ia_opcodes.h b/bochs/cpu/ia_opcodes.h index 63c26f067..28d0e7dd8 100644 --- a/bochs/cpu/ia_opcodes.h +++ b/bochs/cpu/ia_opcodes.h @@ -2,7 +2,7 @@ // $Id$ ///////////////////////////////////////////////////////////////////////// // -// Copyright (c) 2008-2013 Stanislav Shwartsman +// Copyright (c) 2008-2014 Stanislav Shwartsman // Written by Stanislav Shwartsman [sshwarts at sourceforge net] // // This library is free software; you can redistribute it and/or @@ -2060,8 +2060,8 @@ bx_define_opcode(BX_IA_VCVTTPD2DQ_VqWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVT bx_define_opcode(BX_IA_VCVTPD2DQ_VqWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVTPD2DQ_VqWpdR, BX_ISA_AVX, OP_Vq, OP_Wpd, OP_NONE, OP_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_VCVTDQ2PD_VpdWq, &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VCVTDQ2PD_VpdWqR, BX_ISA_AVX, OP_Vpd, OP_Wq, OP_NONE, OP_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_VCVTPD2PS_VpsWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVTPD2PS_VpsWpdR, BX_ISA_AVX, OP_Vps, OP_Wpd, OP_NONE, OP_NONE, BX_PREPARE_AVX) -bx_define_opcode(BX_IA_VCVTSD2SS_VssWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VCVTSD2SS_VssWsdR, BX_ISA_AVX, OP_Vss, OP_Hpd, OP_Wsd, OP_NONE, BX_PREPARE_AVX) -bx_define_opcode(BX_IA_VCVTSS2SD_VsdWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VCVTSS2SD_VsdWssR, BX_ISA_AVX, OP_Vsd, OP_Hps, OP_Wss, OP_NONE, BX_PREPARE_AVX) +bx_define_opcode(BX_IA_VCVTSD2SS_VssWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VCVTSD2SS_VssWsdR, BX_ISA_AVX, OP_Vss, OP_Hps, OP_Wsd, OP_NONE, BX_PREPARE_AVX) +bx_define_opcode(BX_IA_VCVTSS2SD_VsdWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VCVTSS2SD_VsdWssR, BX_ISA_AVX, OP_Vsd, OP_Hpd, OP_Wss, OP_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_VCVTDQ2PS_VpsWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVTDQ2PS_VpsWdqR, BX_ISA_AVX, OP_Vps, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_VCVTPS2DQ_VdqWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVTPS2DQ_VdqWpsR, BX_ISA_AVX, OP_Vdq, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_AVX) bx_define_opcode(BX_IA_VCVTTPS2DQ_VdqWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVTTPS2DQ_VdqWpsR, BX_ISA_AVX, OP_Vdq, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_AVX) @@ -2569,6 +2569,16 @@ bx_define_opcode(BX_IA_V512_VCVTUDQ2PS_VpsWdq, &BX_CPU_C::LOAD_BROADCAST_VectorD bx_define_opcode(BX_IA_V512_VCVTUDQ2PD_VpdWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VCVTUDQ2PD_MASK_VpdWdqR, BX_ISA_AVX512, OP_Vpd, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE) bx_define_opcode(BX_IA_V512_VCVTUDQ2PS_VpsWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VCVTUDQ2PS_MASK_VpsWdqR, BX_ISA_AVX512, OP_Vps, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX) +bx_define_opcode(BX_IA_V512_VCVTSS2SD_VsdWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VCVTSS2SD_VsdWssR, BX_ISA_AVX512, OP_Vsd, OP_Hpd, OP_Wss, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VCVTSD2SS_VssWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VCVTSD2SS_VssWsdR, BX_ISA_AVX512, OP_Vss, OP_Hps, OP_Wsd, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VCVTPS2PD_VpdWps, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VCVTPS2PD_VpdWpsR, BX_ISA_AVX512, OP_Vpd, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_EVEX) +bx_define_opcode(BX_IA_V512_VCVTPD2PS_VpsWpd, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VCVTPD2PS_VpsWpdR, BX_ISA_AVX512, OP_Vps, OP_Wpd, OP_NONE, OP_NONE, BX_PREPARE_EVEX) + +bx_define_opcode(BX_IA_V512_VCVTSS2SD_VsdWss_Kmask, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VCVTSS2SD_MASK_VsdWssR, BX_ISA_AVX512, OP_Vsd, OP_Hpd, OP_Wss, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VCVTSD2SS_VssWsd_Kmask, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VCVTSD2SS_MASK_VssWsdR, BX_ISA_AVX512, OP_Vss, OP_Hps, OP_Wsd, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VCVTPS2PD_VpdWps_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorD, &BX_CPU_C::VCVTPS2PD_MASK_VpdWpsR, BX_ISA_AVX512, OP_Vpd, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_EVEX) +bx_define_opcode(BX_IA_V512_VCVTPD2PS_VpsWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorQ, &BX_CPU_C::VCVTPD2PS_MASK_VpsWpdR, BX_ISA_AVX512, OP_Vps, OP_Wpd, OP_NONE, OP_NONE, BX_PREPARE_EVEX) + bx_define_opcode(BX_IA_V512_VMOVAPS_VpsWps, &BX_CPU_C::VMOVAPS_VpsWpsM, &BX_CPU_C::VMOVAPS_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VMOVAPS_VpsWps_Kmask, &BX_CPU_C::VMOVAPS_MASK_VpsWpsM, &BX_CPU_C::VMOVAPS_MASK_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VMOVAPS_WpsVps, &BX_CPU_C::VMOVAPS_WpsVpsM, &BX_CPU_C::VMOVAPS_VpsWpsR, BX_ISA_AVX512, OP_Wps, OP_Vps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) diff --git a/bochs/cpu/xop.cc b/bochs/cpu/xop.cc index cb9fb0c30..7923226a8 100644 --- a/bochs/cpu/xop.cc +++ b/bochs/cpu/xop.cc @@ -2,7 +2,7 @@ // $Id$ ///////////////////////////////////////////////////////////////////////// // -// Copyright (c) 2011-2013 Stanislav Shwartsman +// Copyright (c) 2011-2014 Stanislav Shwartsman // Written by Stanislav Shwartsman [sshwarts at sourceforge net] // // This library is free software; you can redistribute it and/or @@ -614,7 +614,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFRCZPS_VpsWpsR(bxInstruction_c *i float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); - for (unsigned n=0; n < (4*len); n++) { + for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) { op.ymm32u(n) = float32_frc(op.ymm32u(n), status); } @@ -632,7 +632,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFRCZPD_VpdWpdR(bxInstruction_c *i float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); - for (unsigned n=0; n < (2*len); n++) { + for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) { op.ymm64u(n) = float64_frc(op.ymm64u(n), status); }