implemented AVX-512 VNNI instructions
This commit is contained in:
parent
4179e6f939
commit
0d190eec8e
@ -1,4 +1,4 @@
|
||||
# Copyright (C) 2015 The Bochs Project
|
||||
# Copyright (C) 2017 The Bochs Project
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
@ -59,6 +59,7 @@ AVX_OBJS = \
|
||||
avx512_mask32.o \
|
||||
avx512_mask64.o \
|
||||
avx512_conflict.o \
|
||||
avx512_vnni.o \
|
||||
gather.o \
|
||||
tbm32.o \
|
||||
tbm64.o \
|
||||
@ -242,6 +243,15 @@ avx_pfp.o: avx_pfp.@CPP_SUFFIX@ ../../bochs.h ../../config.h ../../osdep.h \
|
||||
../fpu/control_w.h ../crregs.h ../descriptor.h ../decoder/instr.h \
|
||||
../decoder/ia_opcodes.h ../lazy_flags.h ../tlb.h ../icache.h ../apic.h \
|
||||
../xmm.h ../vmx.h ../cpuid.h ../stack.h ../access.h
|
||||
avx512_vnni.o: avx512_vnni.@CPP_SUFFIX@ ../../bochs.h ../../config.h \
|
||||
../../osdep.h ../../bx_debug/debug.h ../../config.h ../../osdep.h \
|
||||
../../gui/siminterface.h ../../cpudb.h ../../gui/paramtree.h \
|
||||
../../memory/memory-bochs.h ../../pc_system.h ../../gui/gui.h \
|
||||
../../instrument/stubs/instrument.h ../cpu.h ../decoder/decoder.h \
|
||||
../i387.h ../fpu/softfloat.h ../fpu/tag_w.h ../fpu/status_w.h \
|
||||
../fpu/control_w.h ../crregs.h ../descriptor.h ../decoder/instr.h \
|
||||
../decoder/ia_opcodes.h ../lazy_flags.h ../tlb.h ../icache.h ../apic.h \
|
||||
../xmm.h ../vmx.h ../cpuid.h ../stack.h ../access.h
|
||||
gather.o: gather.@CPP_SUFFIX@ ../../bochs.h ../../config.h ../../osdep.h \
|
||||
../../bx_debug/debug.h ../../config.h ../../osdep.h \
|
||||
../../gui/siminterface.h ../../cpudb.h ../../gui/paramtree.h \
|
||||
|
51
bochs/cpu/avx/avx512_vnni.cc
Normal file
51
bochs/cpu/avx/avx512_vnni.cc
Normal file
@ -0,0 +1,51 @@
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// $Id$
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2017 Stanislav Shwartsman
|
||||
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
// License as published by the Free Software Foundation; either
|
||||
// version 2 of the License, or (at your option) any later version.
|
||||
//
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
// Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License along with this library; if not, write to the Free Software
|
||||
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define NEED_CPU_REG_SHORTCUTS 1
|
||||
#include "bochs.h"
|
||||
#include "cpu.h"
|
||||
#define LOG_THIS BX_CPU_THIS_PTR
|
||||
|
||||
#if BX_SUPPORT_EVEX
|
||||
|
||||
#include "simd_int.h"
|
||||
|
||||
#define AVX512_3OP_DWORD_EL(HANDLER, func) \
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C :: HANDLER(bxInstruction_c *i) \
|
||||
{ \
|
||||
BxPackedAvxRegister dst = BX_READ_AVX_REG(i->dst()), op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()); \
|
||||
unsigned len = i->getVL(); \
|
||||
\
|
||||
for (unsigned n=0; n < len; n++) \
|
||||
(func) (&dst.vmm128(n), &op1.vmm128(n), &op2.vmm128(n)); \
|
||||
\
|
||||
avx512_write_regd_masked(i, &dst, len, BX_READ_16BIT_OPMASK(i->opmask())); \
|
||||
BX_NEXT_INSTR(i); \
|
||||
}
|
||||
|
||||
AVX512_3OP_DWORD_EL(VPDPBUSD_MASK_VdqWdqR, xmm_pdpbusd)
|
||||
AVX512_3OP_DWORD_EL(VPDPBUSDS_MASK_VdqWdqR, xmm_pdpbusds)
|
||||
AVX512_3OP_DWORD_EL(VPDPWSSD_MASK_VdqWdqR, xmm_pdpwssd)
|
||||
AVX512_3OP_DWORD_EL(VPDPWSSDS_MASK_VdqWdqR, xmm_pdpwssds)
|
||||
|
||||
#endif
|
@ -3881,6 +3881,11 @@ public: // for now...
|
||||
|
||||
BX_SMF BX_INSF_TYPE VPMULTISHIFTQB_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPMULTISHIFTQB_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
||||
BX_SMF BX_INSF_TYPE VPDPBUSD_MASK_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPDPBUSDS_MASK_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPDPWSSD_MASK_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPDPWSSDS_MASK_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
#endif
|
||||
|
||||
BX_SMF BX_INSF_TYPE LZCNT_GwEwR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
@ -2,7 +2,7 @@
|
||||
// $Id$
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (C) 2016 The Bochs Project
|
||||
// Copyright (C) 2016-2017 The Bochs Project
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
@ -99,6 +99,7 @@ enum {
|
||||
BX_ISA_AVX512_VBMI, /* AVX-512 Vector Bit Manipulation Instructions */
|
||||
BX_ISA_AVX512_IFMA52, /* AVX-512 IFMA52 Instructions */
|
||||
BX_ISA_AVX512_VPOPCNTDQ, /* AVX-512 VPOPCNTD/VPOPCNTQ Instructions */
|
||||
BX_ISA_AVX512_VNNI, /* AVX-512 VNNI Instructions */
|
||||
BX_ISA_XAPIC, /* XAPIC support */
|
||||
BX_ISA_X2APIC, /* X2APIC support */
|
||||
BX_ISA_XAPIC_EXT, /* XAPIC Extensions support */
|
||||
|
@ -2,7 +2,7 @@
|
||||
// $Id$
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2013-2016 Stanislav Shwartsman
|
||||
// Copyright (c) 2013-2017 Stanislav Shwartsman
|
||||
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or
|
||||
@ -1506,14 +1506,14 @@ static const BxExtOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
|
||||
/* 4E */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VRSQRT14PS_VpsWps_Kmask },
|
||||
/* 4F k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VRSQRT14SS_VssHpsWss_Kmask },
|
||||
/* 4F */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VRSQRT14SS_VssHpsWss_Kmask },
|
||||
/* 50 k0 */ { 0, BX_IA_ERROR },
|
||||
/* 50 */ { 0, BX_IA_ERROR },
|
||||
/* 51 k0 */ { 0, BX_IA_ERROR },
|
||||
/* 51 */ { 0, BX_IA_ERROR },
|
||||
/* 52 k0 */ { 0, BX_IA_ERROR },
|
||||
/* 52 */ { 0, BX_IA_ERROR },
|
||||
/* 53 k0 */ { 0, BX_IA_ERROR },
|
||||
/* 53 */ { 0, BX_IA_ERROR },
|
||||
/* 50 k0 */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VPDPBUSD_VdqWdq_Kmask },
|
||||
/* 50 */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VPDPBUSD_VdqWdq_Kmask },
|
||||
/* 51 k0 */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VPDPBUSDS_VdqWdq_Kmask },
|
||||
/* 51 */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VPDPBUSDS_VdqWdq_Kmask },
|
||||
/* 52 k0 */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VPDPWSSD_VdqWdq_Kmask },
|
||||
/* 52 */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VPDPWSSD_VdqWdq_Kmask },
|
||||
/* 53 k0 */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VPDPWSSDS_VdqWdq_Kmask },
|
||||
/* 53 */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VPDPWSSDS_VdqWdq_Kmask },
|
||||
/* 54 k0 */ { 0, BX_IA_ERROR },
|
||||
/* 54 */ { 0, BX_IA_ERROR },
|
||||
/* 55 k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VPOPCNTD_VdqWdq_Kmask },
|
||||
|
@ -3662,4 +3662,9 @@ bx_define_opcode(BX_IA_V512_VPEXTRQ_EqVdqIb, &BX_CPU_C::PEXTRQ_EqVdqIbM, &BX_CPU
|
||||
bx_define_opcode(BX_IA_V512_VPBROADCASTMB2Q_VdqKEb, &BX_CPU_C::BxError, &BX_CPU_C::VPBROADCASTMB2Q_VdqKEbR, BX_ISA_AVX512, OP_Vdq, OP_KEb, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPBROADCASTMW2D_VdqKEw, &BX_CPU_C::BxError, &BX_CPU_C::VPBROADCASTMW2D_VdqKEwR, BX_ISA_AVX512, OP_Vdq, OP_KEw, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
|
||||
bx_define_opcode(BX_IA_V512_VPDPBUSD_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPDPBUSD_MASK_VdqWdq, BX_ISA_AVX512_VNNI, OP_Vdq, OP_mVdq32, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VPDPBUSDS_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPDPBUSDS_MASK_VdqWdq, BX_ISA_AVX512_VNNI, OP_Vdq, OP_mVdq32, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VPDPWSSD_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPDPWSSD_MASK_VdqWdq, BX_ISA_AVX512_VNNI, OP_Vdq, OP_mVdq32, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VPDPWSSDS_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPDPWSSDS_MASK_VdqWdq, BX_ISA_AVX512_VNNI, OP_Vdq, OP_mVdq32, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
|
||||
#endif // BX_SUPPORT_EVEX
|
||||
|
@ -2,7 +2,7 @@
|
||||
// $Id$
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2011-2015 Stanislav Shwartsman
|
||||
// Copyright (c) 2011-2017 Stanislav Shwartsman
|
||||
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or
|
||||
@ -1413,15 +1413,43 @@ Bit32u bx_generic_cpuid_t::get_ext4_cpuid_features(void) const
|
||||
{
|
||||
Bit32u features = 0;
|
||||
|
||||
// [0:0] PREFETCHWT1 instruction support
|
||||
// [1:1] AVX512 VBMI instructions support
|
||||
// [2:2] reserved
|
||||
// [3:3] PKU: Protection keys for user-mode pages.
|
||||
// [4:4] OSPKE: OS has set CR4.PKE to enable protection keys
|
||||
// [31:5] reserved
|
||||
// [0:0] PREFETCHWT1 instruction support
|
||||
// [1:1] AVX512 VBMI instructions support
|
||||
// [2:2] UMIP: Supports user-mode instruction prevention
|
||||
// [3:3] PKU: Protection keys for user-mode pages.
|
||||
// [4:4] OSPKE: OS has set CR4.PKE to enable protection keys
|
||||
// [5:5] reserved
|
||||
// [6:6] AVX512 VBMI2 instructions support
|
||||
// [7:7] reserved
|
||||
// [8:8] GFNI instructions support
|
||||
// [9:9] VAES instructions support
|
||||
// [10:10] VPCLMULQDQ instruction support
|
||||
// [11:11] AVX512 VNNI instructions support
|
||||
// [12:12] AVX512 BITALG instructions support
|
||||
// [13:13] reserved
|
||||
// [14:14] AVX512 VPOPCNTDQ: AVX512 VPOPCNTD/VPOPCNTQ instructions
|
||||
// [15:15] reserved
|
||||
// [16:16] LA57: LA57 and 5-level paging
|
||||
// [21:17] reserved
|
||||
// [22:22] RDPID: Read Processor ID support
|
||||
// [29:23] reserved
|
||||
// [30:30] SGX_LC: SGX Launch Configuration
|
||||
// [31:31] reserved
|
||||
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_AVX512_VBMI))
|
||||
features |= BX_CPUID_EXT4_AVX512_VBMI;
|
||||
|
||||
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_UMIP))
|
||||
features |= BX_CPUID_EXT4_UMIP;
|
||||
|
||||
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_AVX512_VNNI))
|
||||
features |= BX_CPUID_EXT4_AVX512_VNNI;
|
||||
|
||||
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_AVX512_VPOPCNTDQ))
|
||||
features |= BX_CPUID_EXT4_AVX512_VPOPCNTDQ;
|
||||
|
||||
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_RDPID))
|
||||
features |= BX_CPUID_EXT4_RDPID;
|
||||
|
||||
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_PKU)) {
|
||||
features |= BX_CPUID_EXT4_PKU;
|
||||
if (cpu->cr4.get_PKE())
|
||||
|
@ -1746,4 +1746,56 @@ BX_CPP_INLINE void xmm_pshlq(BxPackedXmmRegister *op1, const BxPackedXmmRegister
|
||||
}
|
||||
}
|
||||
|
||||
// VNNI
|
||||
|
||||
BX_CPP_INLINE void xmm_pdpbusd(BxPackedXmmRegister *dst, BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
|
||||
{
|
||||
for(unsigned n=0; n<4; n++)
|
||||
{
|
||||
Bit32s p1word = (Bit32u) op1->xmmubyte(n*4) * (Bit32s) op2->xmmsbyte(n*4);
|
||||
Bit32s p2word = (Bit32u) op1->xmmubyte(n*4+1) * (Bit32s) op2->xmmsbyte(n*4+1);
|
||||
Bit32s p3word = (Bit32u) op1->xmmubyte(n*4+2) * (Bit32s) op2->xmmsbyte(n*4+2);
|
||||
Bit32s p4word = (Bit32u) op1->xmmubyte(n*4+3) * (Bit32s) op2->xmmsbyte(n*4+3);
|
||||
|
||||
dst->xmm32s(n) += (p1word + p2word + p3word + p4word);
|
||||
}
|
||||
}
|
||||
|
||||
BX_CPP_INLINE void xmm_pdpbusds(BxPackedXmmRegister *dst, BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
|
||||
{
|
||||
for(unsigned n=0; n<4; n++)
|
||||
{
|
||||
Bit32s p1word = (Bit32u) op1->xmmubyte(n*4) * (Bit32s) op2->xmmsbyte(n*4);
|
||||
Bit32s p2word = (Bit32u) op1->xmmubyte(n*4+1) * (Bit32s) op2->xmmsbyte(n*4+1);
|
||||
Bit32s p3word = (Bit32u) op1->xmmubyte(n*4+2) * (Bit32s) op2->xmmsbyte(n*4+2);
|
||||
Bit32s p4word = (Bit32u) op1->xmmubyte(n*4+3) * (Bit32s) op2->xmmsbyte(n*4+3);
|
||||
|
||||
Bit64s result = (Bit64s) dst->xmm32s(n) + (p1word + p2word + p3word + p4word);
|
||||
dst->xmm32s(n) = SaturateQwordSToDwordS(result);
|
||||
}
|
||||
}
|
||||
|
||||
BX_CPP_INLINE void xmm_pdpwssd(BxPackedXmmRegister *dst, BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
|
||||
{
|
||||
for(unsigned n=0; n<4; n++)
|
||||
{
|
||||
Bit32s p1_dword = (Bit32s) op1->xmm16s(n*2) * (Bit32s) op2->xmm16s(n*2);
|
||||
Bit32s p2_dword = (Bit32s) op1->xmm16s(n*2+1) * (Bit32s) op2->xmm16s(n*2+1);
|
||||
|
||||
dst->xmm32s(n) += (p1_dword + p2_dword);
|
||||
}
|
||||
}
|
||||
|
||||
BX_CPP_INLINE void xmm_pdpwssds(BxPackedXmmRegister *dst, BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
|
||||
{
|
||||
for(unsigned n=0; n<4; n++)
|
||||
{
|
||||
Bit32s p1_dword = (Bit32s) op1->xmm16s(n*2) * (Bit32s) op2->xmm16s(n*2);
|
||||
Bit32s p2_dword = (Bit32s) op1->xmm16s(n*2+1) * (Bit32s) op2->xmm16s(n*2+1);
|
||||
|
||||
Bit64s result = (Bit64s) dst->xmm32s(n) + (p1_dword + p2_dword);
|
||||
dst->xmm32s(n) = SaturateQwordSToDwordS(result);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user