implemented AVX-512 VNNI instructions

This commit is contained in:
Stanislav Shwartsman 2017-10-15 19:17:07 +00:00
parent 4179e6f939
commit 0d190eec8e
8 changed files with 170 additions and 18 deletions

View File

@ -1,4 +1,4 @@
# Copyright (C) 2015 The Bochs Project
# Copyright (C) 2017 The Bochs Project
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@ -59,6 +59,7 @@ AVX_OBJS = \
avx512_mask32.o \
avx512_mask64.o \
avx512_conflict.o \
avx512_vnni.o \
gather.o \
tbm32.o \
tbm64.o \
@ -242,6 +243,15 @@ avx_pfp.o: avx_pfp.@CPP_SUFFIX@ ../../bochs.h ../../config.h ../../osdep.h \
../fpu/control_w.h ../crregs.h ../descriptor.h ../decoder/instr.h \
../decoder/ia_opcodes.h ../lazy_flags.h ../tlb.h ../icache.h ../apic.h \
../xmm.h ../vmx.h ../cpuid.h ../stack.h ../access.h
avx512_vnni.o: avx512_vnni.@CPP_SUFFIX@ ../../bochs.h ../../config.h \
../../osdep.h ../../bx_debug/debug.h ../../config.h ../../osdep.h \
../../gui/siminterface.h ../../cpudb.h ../../gui/paramtree.h \
../../memory/memory-bochs.h ../../pc_system.h ../../gui/gui.h \
../../instrument/stubs/instrument.h ../cpu.h ../decoder/decoder.h \
../i387.h ../fpu/softfloat.h ../fpu/tag_w.h ../fpu/status_w.h \
../fpu/control_w.h ../crregs.h ../descriptor.h ../decoder/instr.h \
../decoder/ia_opcodes.h ../lazy_flags.h ../tlb.h ../icache.h ../apic.h \
../xmm.h ../vmx.h ../cpuid.h ../stack.h ../access.h
gather.o: gather.@CPP_SUFFIX@ ../../bochs.h ../../config.h ../../osdep.h \
../../bx_debug/debug.h ../../config.h ../../osdep.h \
../../gui/siminterface.h ../../cpudb.h ../../gui/paramtree.h \

View File

@ -0,0 +1,51 @@
/////////////////////////////////////////////////////////////////////////
// $Id$
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2017 Stanislav Shwartsman
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
//
/////////////////////////////////////////////////////////////////////////
#define NEED_CPU_REG_SHORTCUTS 1
#include "bochs.h"
#include "cpu.h"
#define LOG_THIS BX_CPU_THIS_PTR
#if BX_SUPPORT_EVEX
#include "simd_int.h"
#define AVX512_3OP_DWORD_EL(HANDLER, func) \
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C :: HANDLER(bxInstruction_c *i) \
{ \
BxPackedAvxRegister dst = BX_READ_AVX_REG(i->dst()), op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()); \
unsigned len = i->getVL(); \
\
for (unsigned n=0; n < len; n++) \
(func) (&dst.vmm128(n), &op1.vmm128(n), &op2.vmm128(n)); \
\
avx512_write_regd_masked(i, &dst, len, BX_READ_16BIT_OPMASK(i->opmask())); \
BX_NEXT_INSTR(i); \
}
AVX512_3OP_DWORD_EL(VPDPBUSD_MASK_VdqWdqR, xmm_pdpbusd)
AVX512_3OP_DWORD_EL(VPDPBUSDS_MASK_VdqWdqR, xmm_pdpbusds)
AVX512_3OP_DWORD_EL(VPDPWSSD_MASK_VdqWdqR, xmm_pdpwssd)
AVX512_3OP_DWORD_EL(VPDPWSSDS_MASK_VdqWdqR, xmm_pdpwssds)
#endif

View File

@ -3881,6 +3881,11 @@ public: // for now...
BX_SMF BX_INSF_TYPE VPMULTISHIFTQB_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMULTISHIFTQB_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPDPBUSD_MASK_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPDPBUSDS_MASK_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPDPWSSD_MASK_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPDPWSSDS_MASK_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
#endif
BX_SMF BX_INSF_TYPE LZCNT_GwEwR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);

View File

@ -2,7 +2,7 @@
// $Id$
/////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2016 The Bochs Project
// Copyright (C) 2016-2017 The Bochs Project
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@ -99,6 +99,7 @@ enum {
BX_ISA_AVX512_VBMI, /* AVX-512 Vector Bit Manipulation Instructions */
BX_ISA_AVX512_IFMA52, /* AVX-512 IFMA52 Instructions */
BX_ISA_AVX512_VPOPCNTDQ, /* AVX-512 VPOPCNTD/VPOPCNTQ Instructions */
BX_ISA_AVX512_VNNI, /* AVX-512 VNNI Instructions */
BX_ISA_XAPIC, /* XAPIC support */
BX_ISA_X2APIC, /* X2APIC support */
BX_ISA_XAPIC_EXT, /* XAPIC Extensions support */

View File

@ -2,7 +2,7 @@
// $Id$
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2013-2016 Stanislav Shwartsman
// Copyright (c) 2013-2017 Stanislav Shwartsman
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
//
// This library is free software; you can redistribute it and/or
@ -1506,14 +1506,14 @@ static const BxExtOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
/* 4E */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VRSQRT14PS_VpsWps_Kmask },
/* 4F k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VRSQRT14SS_VssHpsWss_Kmask },
/* 4F */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VRSQRT14SS_VssHpsWss_Kmask },
/* 50 k0 */ { 0, BX_IA_ERROR },
/* 50 */ { 0, BX_IA_ERROR },
/* 51 k0 */ { 0, BX_IA_ERROR },
/* 51 */ { 0, BX_IA_ERROR },
/* 52 k0 */ { 0, BX_IA_ERROR },
/* 52 */ { 0, BX_IA_ERROR },
/* 53 k0 */ { 0, BX_IA_ERROR },
/* 53 */ { 0, BX_IA_ERROR },
/* 50 k0 */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VPDPBUSD_VdqWdq_Kmask },
/* 50 */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VPDPBUSD_VdqWdq_Kmask },
/* 51 k0 */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VPDPBUSDS_VdqWdq_Kmask },
/* 51 */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VPDPBUSDS_VdqWdq_Kmask },
/* 52 k0 */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VPDPWSSD_VdqWdq_Kmask },
/* 52 */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VPDPWSSD_VdqWdq_Kmask },
/* 53 k0 */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VPDPWSSDS_VdqWdq_Kmask },
/* 53 */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VPDPWSSDS_VdqWdq_Kmask },
/* 54 k0 */ { 0, BX_IA_ERROR },
/* 54 */ { 0, BX_IA_ERROR },
/* 55 k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VPOPCNTD_VdqWdq_Kmask },

View File

@ -3662,4 +3662,9 @@ bx_define_opcode(BX_IA_V512_VPEXTRQ_EqVdqIb, &BX_CPU_C::PEXTRQ_EqVdqIbM, &BX_CPU
bx_define_opcode(BX_IA_V512_VPBROADCASTMB2Q_VdqKEb, &BX_CPU_C::BxError, &BX_CPU_C::VPBROADCASTMB2Q_VdqKEbR, BX_ISA_AVX512, OP_Vdq, OP_KEb, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPBROADCASTMW2D_VdqKEw, &BX_CPU_C::BxError, &BX_CPU_C::VPBROADCASTMW2D_VdqKEwR, BX_ISA_AVX512, OP_Vdq, OP_KEw, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPDPBUSD_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPDPBUSD_MASK_VdqWdq, BX_ISA_AVX512_VNNI, OP_Vdq, OP_mVdq32, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
bx_define_opcode(BX_IA_V512_VPDPBUSDS_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPDPBUSDS_MASK_VdqWdq, BX_ISA_AVX512_VNNI, OP_Vdq, OP_mVdq32, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
bx_define_opcode(BX_IA_V512_VPDPWSSD_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPDPWSSD_MASK_VdqWdq, BX_ISA_AVX512_VNNI, OP_Vdq, OP_mVdq32, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
bx_define_opcode(BX_IA_V512_VPDPWSSDS_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPDPWSSDS_MASK_VdqWdq, BX_ISA_AVX512_VNNI, OP_Vdq, OP_mVdq32, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
#endif // BX_SUPPORT_EVEX

View File

@ -2,7 +2,7 @@
// $Id$
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2011-2015 Stanislav Shwartsman
// Copyright (c) 2011-2017 Stanislav Shwartsman
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
//
// This library is free software; you can redistribute it and/or
@ -1413,15 +1413,43 @@ Bit32u bx_generic_cpuid_t::get_ext4_cpuid_features(void) const
{
Bit32u features = 0;
// [0:0] PREFETCHWT1 instruction support
// [1:1] AVX512 VBMI instructions support
// [2:2] reserved
// [3:3] PKU: Protection keys for user-mode pages.
// [4:4] OSPKE: OS has set CR4.PKE to enable protection keys
// [31:5] reserved
// [0:0] PREFETCHWT1 instruction support
// [1:1] AVX512 VBMI instructions support
// [2:2] UMIP: Supports user-mode instruction prevention
// [3:3] PKU: Protection keys for user-mode pages.
// [4:4] OSPKE: OS has set CR4.PKE to enable protection keys
// [5:5] reserved
// [6:6] AVX512 VBMI2 instructions support
// [7:7] reserved
// [8:8] GFNI instructions support
// [9:9] VAES instructions support
// [10:10] VPCLMULQDQ instruction support
// [11:11] AVX512 VNNI instructions support
// [12:12] AVX512 BITALG instructions support
// [13:13] reserved
// [14:14] AVX512 VPOPCNTDQ: AVX512 VPOPCNTD/VPOPCNTQ instructions
// [15:15] reserved
// [16:16] LA57: LA57 and 5-level paging
// [21:17] reserved
// [22:22] RDPID: Read Processor ID support
// [29:23] reserved
// [30:30] SGX_LC: SGX Launch Configuration
// [31:31] reserved
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_AVX512_VBMI))
features |= BX_CPUID_EXT4_AVX512_VBMI;
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_UMIP))
features |= BX_CPUID_EXT4_UMIP;
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_AVX512_VNNI))
features |= BX_CPUID_EXT4_AVX512_VNNI;
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_AVX512_VPOPCNTDQ))
features |= BX_CPUID_EXT4_AVX512_VPOPCNTDQ;
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_RDPID))
features |= BX_CPUID_EXT4_RDPID;
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_PKU)) {
features |= BX_CPUID_EXT4_PKU;
if (cpu->cr4.get_PKE())

View File

@ -1746,4 +1746,56 @@ BX_CPP_INLINE void xmm_pshlq(BxPackedXmmRegister *op1, const BxPackedXmmRegister
}
}
// VNNI
BX_CPP_INLINE void xmm_pdpbusd(BxPackedXmmRegister *dst, BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<4; n++)
{
Bit32s p1word = (Bit32u) op1->xmmubyte(n*4) * (Bit32s) op2->xmmsbyte(n*4);
Bit32s p2word = (Bit32u) op1->xmmubyte(n*4+1) * (Bit32s) op2->xmmsbyte(n*4+1);
Bit32s p3word = (Bit32u) op1->xmmubyte(n*4+2) * (Bit32s) op2->xmmsbyte(n*4+2);
Bit32s p4word = (Bit32u) op1->xmmubyte(n*4+3) * (Bit32s) op2->xmmsbyte(n*4+3);
dst->xmm32s(n) += (p1word + p2word + p3word + p4word);
}
}
BX_CPP_INLINE void xmm_pdpbusds(BxPackedXmmRegister *dst, BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<4; n++)
{
Bit32s p1word = (Bit32u) op1->xmmubyte(n*4) * (Bit32s) op2->xmmsbyte(n*4);
Bit32s p2word = (Bit32u) op1->xmmubyte(n*4+1) * (Bit32s) op2->xmmsbyte(n*4+1);
Bit32s p3word = (Bit32u) op1->xmmubyte(n*4+2) * (Bit32s) op2->xmmsbyte(n*4+2);
Bit32s p4word = (Bit32u) op1->xmmubyte(n*4+3) * (Bit32s) op2->xmmsbyte(n*4+3);
Bit64s result = (Bit64s) dst->xmm32s(n) + (p1word + p2word + p3word + p4word);
dst->xmm32s(n) = SaturateQwordSToDwordS(result);
}
}
BX_CPP_INLINE void xmm_pdpwssd(BxPackedXmmRegister *dst, BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<4; n++)
{
Bit32s p1_dword = (Bit32s) op1->xmm16s(n*2) * (Bit32s) op2->xmm16s(n*2);
Bit32s p2_dword = (Bit32s) op1->xmm16s(n*2+1) * (Bit32s) op2->xmm16s(n*2+1);
dst->xmm32s(n) += (p1_dword + p2_dword);
}
}
BX_CPP_INLINE void xmm_pdpwssds(BxPackedXmmRegister *dst, BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<4; n++)
{
Bit32s p1_dword = (Bit32s) op1->xmm16s(n*2) * (Bit32s) op2->xmm16s(n*2);
Bit32s p2_dword = (Bit32s) op1->xmm16s(n*2+1) * (Bit32s) op2->xmm16s(n*2+1);
Bit64s result = (Bit64s) dst->xmm32s(n) + (p1_dword + p2_dword);
dst->xmm32s(n) = SaturateQwordSToDwordS(result);
}
}
#endif