SHA512 instructions implemented (#88)
Co-authored-by: Stanislav Shwartsman <sshwarts@users.sourceforge.net>
This commit is contained in:
parent
3234e9b88e
commit
4a309478f9
@ -6,7 +6,7 @@ We welcome every new contributor !
|
||||
- Bugfixes for CPU emulation correctness (MONITOR/MWAIT, VMX, SHA fixes)
|
||||
! Implemented VMX MBE (Mode Based Execution Control) emulation required for Windows 11 guest
|
||||
! Implemented new published Intel instruction sets:
|
||||
- AVX IFMA52, VNNI-INT8, VNNI-INT16, CMPCCXADD, SM3, SM4, WRMSRNS
|
||||
- AVX IFMA52, VNNI-INT8, VNNI-INT16, CMPCCXADD, SM3, SM4, SHA512, WRMSRNS
|
||||
- Improved 64-bit guest support in Bochs internal debugger, added new internal debugger commands
|
||||
- Fixed Voodoo device compilation for big endian architectures
|
||||
- Fixed memory handling in volatile BIOS write support
|
||||
|
@ -64,6 +64,7 @@ OBJS = \
|
||||
aes.o \
|
||||
gf2.o \
|
||||
sha.o \
|
||||
sha512.o \
|
||||
sm3.o \
|
||||
sm4.o \
|
||||
svm.o \
|
||||
@ -627,6 +628,12 @@ sha.o: sha.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h ../bx_debug/debug.h \
|
||||
decoder/decoder.h i387.h fpu/softfloat.h fpu/tag_w.h fpu/status_w.h \
|
||||
fpu/control_w.h crregs.h descriptor.h decoder/instr.h lazy_flags.h tlb.h \
|
||||
icache.h apic.h xmm.h vmx.h svm.h cpuid.h stack.h access.h
|
||||
sha512.o: sha512.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h ../bx_debug/debug.h \
|
||||
../config.h ../osdep.h ../cpu/decoder/decoder.h ../gui/paramtree.h \
|
||||
../logio.h ../instrument/stubs/instrument.h cpu.h \
|
||||
decoder/decoder.h i387.h fpu/softfloat.h fpu/tag_w.h fpu/status_w.h \
|
||||
fpu/control_w.h crregs.h descriptor.h decoder/instr.h lazy_flags.h tlb.h \
|
||||
icache.h apic.h xmm.h vmx.h svm.h cpuid.h stack.h access.h
|
||||
sm3.o: sm3.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h ../bx_debug/debug.h \
|
||||
../config.h ../osdep.h ../cpu/decoder/decoder.h ../gui/paramtree.h \
|
||||
../logio.h ../instrument/stubs/instrument.h cpu.h \
|
||||
|
@ -2581,17 +2581,6 @@ public: // for now...
|
||||
BX_SMF void GF2P8AFFINEQB_VdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void GF2P8MULB_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
/* GFNI instructions */
|
||||
|
||||
/* SM3 instructions */
|
||||
BX_SMF void VSM3MSG1_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void VSM3MSG2_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void VSM3RNDS2_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
/* SM3 instructions */
|
||||
|
||||
/* SM4 instructions */
|
||||
BX_SMF void VSM4KEY4_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void VSM4RNDS4_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
/* SM4 instructions */
|
||||
#endif
|
||||
|
||||
/* VMX instructions */
|
||||
@ -3131,6 +3120,20 @@ public: // for now...
|
||||
BX_SMF void VGF2P8AFFINEQB_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void VGF2P8MULB_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
||||
/* SHA512 instructions: VEX encoded */
|
||||
BX_SMF void VSHA512MSG1_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void VSHA512MSG2_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void VSHA512RNDS2_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
||||
/* SM3 instructions: VEX encoded */
|
||||
BX_SMF void VSM3MSG1_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void VSM3MSG2_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void VSM3RNDS2_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
||||
/* SM4 instructions: VEX encoded */
|
||||
BX_SMF void VSM4KEY4_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void VSM4RNDS4_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
||||
/* AVX encoded VNNI instructions */
|
||||
BX_SMF void VPDPBUSD_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void VPDPBUSDS_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
@ -97,6 +97,7 @@ static const char *cpu_feature_name[] =
|
||||
"smap", // BX_ISA_SMAP
|
||||
"rdseed", // BX_ISA_RDSEED
|
||||
"sha", // BX_ISA_SHA
|
||||
"sha512", // BX_ISA_SHA512
|
||||
"gfni", // BX_ISA_GFNI
|
||||
"sm3", // BX_ISA_SM3
|
||||
"sm4", // BX_ISA_SM4
|
||||
|
@ -92,6 +92,7 @@ enum x86_feature_name {
|
||||
BX_ISA_SMAP, /* SMAP support */
|
||||
BX_ISA_RDSEED, /* RDSEED instruction */
|
||||
BX_ISA_SHA, /* SHA instruction */
|
||||
BX_ISA_SHA512, /* SHA512 instruction */
|
||||
BX_ISA_GFNI, /* GFNI instruction */
|
||||
BX_ISA_SM3, /* SM3 instruction */
|
||||
BX_ISA_SM4, /* SM4 instruction */
|
||||
|
@ -1170,6 +1170,10 @@ static const Bit64u BxOpcodeGroup_VEX_0F38BF[] = {
|
||||
last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W1, BX_IA_VFNMSUB231SD_VpdHsdWsd)
|
||||
};
|
||||
|
||||
static const Bit64u BxOpcodeGroup_VEX_0F38CB[] = { last_opcode(ATTR_SSE_PREFIX_F2 | ATTR_VL256 | ATTR_VEX_W0 | ATTR_MODC0, BX_IA_VSHA512RNDS2_VdqHdqWdq) };
|
||||
static const Bit64u BxOpcodeGroup_VEX_0F38CC[] = { last_opcode(ATTR_SSE_PREFIX_F2 | ATTR_VL256 | ATTR_VEX_W0 | ATTR_MODC0, BX_IA_VSHA512MSG1_VdqWdq) };
|
||||
static const Bit64u BxOpcodeGroup_VEX_0F38CD[] = { last_opcode(ATTR_SSE_PREFIX_F2 | ATTR_VL256 | ATTR_VEX_W0 | ATTR_MODC0, BX_IA_VSHA512MSG2_VdqWdq) };
|
||||
|
||||
static const Bit64u BxOpcodeGroup_VEX_0F38CF[] = { last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W0, BX_IA_VGF2P8MULB_VdqHdqWdq) };
|
||||
|
||||
static const Bit64u BxOpcodeGroup_VEX_0F38D2[] = {
|
||||
@ -1997,9 +2001,9 @@ static const Bit64u *BxOpcodeTableVEX[256*3] = {
|
||||
/* C8 */ ( BxOpcodeGroup_ERR ),
|
||||
/* C9 */ ( BxOpcodeGroup_ERR ),
|
||||
/* CA */ ( BxOpcodeGroup_ERR ),
|
||||
/* CB */ ( BxOpcodeGroup_ERR ),
|
||||
/* CC */ ( BxOpcodeGroup_ERR ),
|
||||
/* CD */ ( BxOpcodeGroup_ERR ),
|
||||
/* CB */ ( BxOpcodeGroup_VEX_0F38CB ),
|
||||
/* CC */ ( BxOpcodeGroup_VEX_0F38CC ),
|
||||
/* CD */ ( BxOpcodeGroup_VEX_0F38CD ),
|
||||
/* CE */ ( BxOpcodeGroup_ERR ),
|
||||
/* CF */ ( BxOpcodeGroup_VEX_0F38CF ),
|
||||
/* D0 */ ( BxOpcodeGroup_ERR ),
|
||||
|
@ -2241,6 +2241,11 @@ bx_define_opcode(BX_IA_VSM3RNDS2_VdqHdqWdqIb, "vsm3rnds2", "vsm3rnds2", &BX_CPU_
|
||||
bx_define_opcode(BX_IA_VSM4KEY4_VdqHdqWdq, "vsm4key4", "vsm4key4", &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VSM4KEY4_VdqHdqWdqR, BX_ISA_SM4, OP_Vdq, OP_Hdq, OP_Wdq, OP_NONE, BX_PREPARE_AVX)
|
||||
bx_define_opcode(BX_IA_VSM4RNDS4_VdqHdqWdq, "vsm4rnds4", "vsm4rnds4", &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VSM4RNDS4_VdqHdqWdqR, BX_ISA_SM4, OP_Vdq, OP_Hdq, OP_Wdq, OP_NONE, BX_PREPARE_AVX)
|
||||
|
||||
// SHA512 instructions
|
||||
bx_define_opcode(BX_IA_VSHA512MSG1_VdqWdq, "vsha512msg1", "vsha512msg1", &BX_CPU_C::BxError, &BX_CPU_C::VSHA512MSG1_VdqWdqR, BX_ISA_SHA512, OP_Vdq, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_AVX)
|
||||
bx_define_opcode(BX_IA_VSHA512MSG2_VdqWdq, "vsha512msg2", "vsha512msg2", &BX_CPU_C::BxError, &BX_CPU_C::VSHA512MSG2_VdqWdqR, BX_ISA_SHA512, OP_Vdq, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_AVX)
|
||||
bx_define_opcode(BX_IA_VSHA512RNDS2_VdqHdqWdq, "vsha512rnds2", "vsha512rnds2", &BX_CPU_C::BxError, &BX_CPU_C::VSHA512RNDS2_VdqHdqWdqR, BX_ISA_SHA512, OP_Vdq, OP_Hdq, OP_Wdq, OP_NONE, BX_PREPARE_AVX)
|
||||
|
||||
bx_define_opcode(BX_IA_V128_VMOVD_VdqEd, "vmovd", "vmovd", &BX_CPU_C::MOVSS_VssWssM, &BX_CPU_C::MOVD_VdqEdR, BX_ISA_AVX, OP_Vdq, OP_Ed, OP_NONE, OP_NONE, BX_PREPARE_AVX)
|
||||
bx_define_opcode(BX_IA_V128_VMOVQ_VdqEq, "vmovq", "vmovq", &BX_CPU_C::MOVSD_VsdWsdM, &BX_CPU_C::MOVQ_VdqEqR, BX_ISA_AVX, OP_Vdq, OP_Eq, OP_NONE, OP_NONE, BX_PREPARE_AVX)
|
||||
bx_define_opcode(BX_IA_V128_VMOVD_EdVd, "vmovd", "vmovd", &BX_CPU_C::MOVSS_WssVssM, &BX_CPU_C::MOVD_EdVdR, BX_ISA_AVX, OP_Ed, OP_Vd, OP_NONE, OP_NONE, BX_PREPARE_AVX)
|
||||
|
@ -2,7 +2,7 @@
|
||||
// $Id$
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2013-2019 Stanislav Shwartsman
|
||||
// Copyright (c) 2013-2023 Stanislav Shwartsman
|
||||
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or
|
||||
|
132
bochs/cpu/sha512.cc
Normal file
132
bochs/cpu/sha512.cc
Normal file
@ -0,0 +1,132 @@
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// $Id$
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2023 Stanislav Shwartsman
|
||||
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
// License as published by the Free Software Foundation; either
|
||||
// version 2 of the License, or (at your option) any later version.
|
||||
//
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
// Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License along with this library; if not, write to the Free Software
|
||||
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define NEED_CPU_REG_SHORTCUTS 1
|
||||
#include "bochs.h"
|
||||
#include "cpu.h"
|
||||
#define LOG_THIS BX_CPU_THIS_PTR
|
||||
|
||||
#if BX_SUPPORT_AVX
|
||||
|
||||
#include "scalar_arith.h"
|
||||
|
||||
BX_CPP_INLINE Bit64u s0(Bit64u val64)
|
||||
{
|
||||
return ror64(val64, 1) ^ ror64(val64, 8) ^ (val64 >> 7);
|
||||
}
|
||||
|
||||
BX_CPP_INLINE Bit64u s1(Bit64u val64)
|
||||
{
|
||||
return ror64(val64, 19) ^ ror64(val64, 61) ^ (val64 >> 6);
|
||||
}
|
||||
|
||||
BX_CPP_INLINE Bit64u cap_sigma0(Bit64u val64)
|
||||
{
|
||||
return ror64(val64, 28) ^ ror64(val64, 34) ^ ror64(val64, 39);
|
||||
}
|
||||
|
||||
BX_CPP_INLINE Bit64u cap_sigma1(Bit64u val64)
|
||||
{
|
||||
return ror64(val64, 14) ^ ror64(val64, 18) ^ ror64(val64, 41);
|
||||
}
|
||||
|
||||
BX_CPP_INLINE Bit64u sha_maj(Bit64u a, Bit64u b, Bit64u c)
|
||||
{
|
||||
return (a & b) ^ (a & c) ^ (b & c);
|
||||
}
|
||||
|
||||
BX_CPP_INLINE Bit64u sha_ch(Bit64u e, Bit64u f, Bit64u g)
|
||||
{
|
||||
return (e & f) ^ (g & ~e);
|
||||
}
|
||||
|
||||
// See https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.180-4.pdf for more information on the SHA512 standard
|
||||
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSHA512MSG1_VdqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
|
||||
Bit64u src = BX_READ_XMM_REG_LO_QWORD(i->src());
|
||||
|
||||
dst.ymm64u(0) += s0(dst.ymm64u(1));
|
||||
dst.ymm64u(1) += s0(dst.ymm64u(2));
|
||||
dst.ymm64u(2) += s0(dst.ymm64u(3));
|
||||
dst.ymm64u(3) += s0(src);
|
||||
|
||||
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSHA512MSG2_VdqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst()), src = BX_READ_YMM_REG(i->src());
|
||||
|
||||
dst.ymm64u(0) += s1(src.ymm64u(2));
|
||||
dst.ymm64u(1) += s1(src.ymm64u(3));
|
||||
dst.ymm64u(2) += s1(dst.ymm64u(0));
|
||||
dst.ymm64u(3) += s1(dst.ymm64u(1));
|
||||
|
||||
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSHA512RNDS2_VdqHdqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst()), op1 = BX_READ_YMM_REG(i->src1());
|
||||
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->src2());
|
||||
|
||||
Bit64u A[3], B[3], C[3], D[3], E[3], F[3], G[3], H[3];
|
||||
|
||||
A[0] = op1.ymm64u(3);
|
||||
B[0] = op1.ymm64u(2);
|
||||
C[0] = dst.ymm64u(3);
|
||||
D[0] = dst.ymm64u(2);
|
||||
|
||||
E[0] = op1.ymm64u(1);
|
||||
F[0] = op1.ymm64u(0);
|
||||
G[0] = dst.ymm64u(1);
|
||||
H[0] = dst.ymm64u(0);
|
||||
|
||||
for (unsigned n=0; n < 2; n++) {
|
||||
A[n+1] = sha_ch(E[n], F[n], G[n]) + cap_sigma1(E[n]) + op2.xmm64u(n) /* WK[n] */ + H[n] + sha_maj(A[n], B[n], C[n]) + cap_sigma0(A[n]);
|
||||
B[n+1] = A[n];
|
||||
C[n+1] = B[n];
|
||||
D[n+1] = C[n];
|
||||
E[n+1] = sha_ch(E[n], F[n], G[n]) + cap_sigma1(E[n]) + op2.xmm64u(n) /* WK[n] */ + H[n] + D[n];
|
||||
F[n+1] = E[n];
|
||||
G[n+1] = F[n];
|
||||
H[n+1] = G[n];
|
||||
}
|
||||
|
||||
dst.ymm64u(3) = A[2];
|
||||
dst.ymm64u(2) = B[2];
|
||||
dst.ymm64u(1) = E[2];
|
||||
dst.ymm64u(0) = F[2];
|
||||
|
||||
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user