fixed accidentially broken XMM versions of AES instrructions

This commit is contained in:
Stanislav Shwartsman 2017-10-19 20:25:05 +00:00
parent 15ba88c195
commit ba1e5bbffa
3 changed files with 96 additions and 15 deletions

View File

@ -302,6 +302,21 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::AESIMC_VdqWdqR(bxInstruction_c *i)
}
/* 66 0F 38 DC */
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::AESENC_VdqHdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
AES_ShiftRows(op1);
AES_SubstituteBytes(op1);
AES_MixColumns(op1);
xmm_xorps(&op1, &op2);
BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VAESENC_VdqHdqWdqR(bxInstruction_c *i)
{
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
@ -321,6 +336,20 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VAESENC_VdqHdqWdqR(bxInstruction_c
}
/* 66 0F 38 DD */
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::AESENCLAST_VdqHdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
AES_ShiftRows(op1);
AES_SubstituteBytes(op1);
xmm_xorps(&op1, &op2);
BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VAESENCLAST_VdqHdqWdqR(bxInstruction_c *i)
{
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
@ -333,12 +362,27 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VAESENCLAST_VdqHdqWdqR(bxInstructi
xmm_xorps(&op1.vmm128(n), &op2.vmm128(n));
}
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
BX_WRITE_AVX_REGZ(i->dst(), op1, i->getVL());
BX_NEXT_INSTR(i);
}
/* 66 0F 38 DE */
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::AESDEC_VdqHdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
AES_InverseShiftRows(op1);
AES_InverseSubstituteBytes(op1);
AES_InverseMixColumns(op1);
xmm_xorps(&op1, &op2);
BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VAESDEC_VdqHdqWdqR(bxInstruction_c *i)
{
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
@ -352,12 +396,26 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VAESDEC_VdqHdqWdqR(bxInstruction_c
xmm_xorps(&op1.vmm128(n), &op2.vmm128(n));
}
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
BX_WRITE_AVX_REGZ(i->dst(), op1, i->getVL());
BX_NEXT_INSTR(i);
}
/* 66 0F 38 DF */
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::AESDECLAST_VdqHdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
AES_InverseShiftRows(op1);
AES_InverseSubstituteBytes(op1);
xmm_xorps(&op1, &op2);
BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VAESDECLAST_VdqHdqWdqR(bxInstruction_c *i)
{
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
@ -370,7 +428,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VAESDECLAST_VdqHdqWdqR(bxInstructi
xmm_xorps(&op1.vmm128(n), &op2.vmm128(n));
}
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
BX_WRITE_AVX_REGZ(i->dst(), op1, i->getVL());
BX_NEXT_INSTR(i);
}
@ -412,6 +470,23 @@ BX_CPP_INLINE void xmm_pclmulqdq(BxPackedXmmRegister *r, Bit64u a, Bit64u b)
}
/* 66 0F 3A 44 */
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::PCLMULQDQ_VdqHdqWdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister r;
Bit8u imm8 = i->Ib();
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
// Perform Carry Less Multiplication [R = A CLMUL B]
// A determined by op1[imm8[0]]
// B determined by op2[imm8[4]]
xmm_pclmulqdq(&r, op1.xmm64u(imm8 & 1), op2.xmm64u((imm8 >> 4) & 1));
BX_WRITE_XMM_REGZ(i->dst(), r, i->getVL());
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCLMULQDQ_VdqHdqWdqIbR(bxInstruction_c *i)
{
BxPackedAvxRegister r;
@ -427,7 +502,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCLMULQDQ_VdqHdqWdqIbR(bxInstruct
xmm_pclmulqdq(&r.vmm128(n), op1.xmm64u(imm8 & 1), op2.xmm64u((imm8 >> 4) & 1));
}
BX_WRITE_AVX_REGZ(i->dst(), r, len);
BX_WRITE_AVX_REGZ(i->dst(), r, i->getVL());
BX_NEXT_INSTR(i);
}

View File

@ -2634,12 +2634,11 @@ public: // for now...
/* AES instructions */
BX_SMF BX_INSF_TYPE AESIMC_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE AESKEYGENASSIST_VdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
// extended to VEX/EVEX
BX_SMF BX_INSF_TYPE VAESENC_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VAESENCLAST_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VAESDEC_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VAESDECLAST_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPCLMULQDQ_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE AESENC_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE AESENCLAST_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE AESDEC_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE AESDECLAST_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE PCLMULQDQ_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
/* AES instructions */
/* SHA instructions */
@ -3113,6 +3112,13 @@ public: // for now...
/* TBM (AMD) */
#endif
// VAES: VEX extended AES instructions
BX_SMF BX_INSF_TYPE VAESENC_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VAESENCLAST_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VAESDEC_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VAESDECLAST_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPCLMULQDQ_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
#if BX_SUPPORT_AVX
// AVX512 OPMASK instructions (VEX encoded)
BX_SMF BX_INSF_TYPE KADDB_KGbKHbKEbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);

View File

@ -1307,11 +1307,11 @@ bx_define_opcode(BX_IA_XRSTORS, &BX_CPU_C::XRSTOR, &BX_CPU_C::BxError, BX_ISA_XS
// AES instructions
bx_define_opcode(BX_IA_AESIMC_VdqWdq, &BX_CPU_C::LOAD_Wdq, &BX_CPU_C::AESIMC_VdqWdqR, BX_ISA_AES_PCLMULQDQ, OP_Vdq, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_AESKEYGENASSIST_VdqWdqIb, &BX_CPU_C::LOAD_Wdq, &BX_CPU_C::AESKEYGENASSIST_VdqWdqIbR, BX_ISA_AES_PCLMULQDQ, OP_Vdq, OP_Wdq, OP_Ib, OP_NONE, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_AESENC_VdqWdq, &BX_CPU_C::LOAD_Wdq, &BX_CPU_C::VAESENC_VdqHdqWdqR, BX_ISA_AES_PCLMULQDQ, OP_Vdq, OP_Vdq, OP_Wdq, OP_NONE, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_AESENCLAST_VdqWdq, &BX_CPU_C::LOAD_Wdq, &BX_CPU_C::VAESENCLAST_VdqHdqWdqR, BX_ISA_AES_PCLMULQDQ, OP_Vdq, OP_Vdq, OP_Wdq, OP_NONE, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_AESDEC_VdqWdq, &BX_CPU_C::LOAD_Wdq, &BX_CPU_C::VAESDEC_VdqHdqWdqR, BX_ISA_AES_PCLMULQDQ, OP_Vdq, OP_Vdq, OP_Wdq, OP_NONE, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_AESDECLAST_VdqWdq, &BX_CPU_C::LOAD_Wdq, &BX_CPU_C::VAESDECLAST_VdqHdqWdqR, BX_ISA_AES_PCLMULQDQ, OP_Vdq, OP_Vdq, OP_Wdq, OP_NONE, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PCLMULQDQ_VdqWdqIb, &BX_CPU_C::LOAD_Wdq, &BX_CPU_C::VPCLMULQDQ_VdqHdqWdqIbR, BX_ISA_AES_PCLMULQDQ, OP_Vdq, OP_Vdq, OP_Wdq, OP_Ib, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_AESENC_VdqWdq, &BX_CPU_C::LOAD_Wdq, &BX_CPU_C::AESENC_VdqHdqWdqR, BX_ISA_AES_PCLMULQDQ, OP_Vdq, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_AESENCLAST_VdqWdq, &BX_CPU_C::LOAD_Wdq, &BX_CPU_C::AESENCLAST_VdqHdqWdqR, BX_ISA_AES_PCLMULQDQ, OP_Vdq, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_AESDEC_VdqWdq, &BX_CPU_C::LOAD_Wdq, &BX_CPU_C::AESDEC_VdqHdqWdqR, BX_ISA_AES_PCLMULQDQ, OP_Vdq, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_AESDECLAST_VdqWdq, &BX_CPU_C::LOAD_Wdq, &BX_CPU_C::AESDECLAST_VdqHdqWdqR, BX_ISA_AES_PCLMULQDQ, OP_Vdq, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PCLMULQDQ_VdqWdqIb, &BX_CPU_C::LOAD_Wdq, &BX_CPU_C::PCLMULQDQ_VdqHdqWdqIbR, BX_ISA_AES_PCLMULQDQ, OP_Vdq, OP_Wdq, OP_Ib, OP_NONE, BX_PREPARE_SSE)
// SHA instructions
bx_define_opcode(BX_IA_SHA1NEXTE_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::SHA1NEXTE_VdqWdqR, BX_ISA_SHA, OP_Vdq, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_SSE)