///////////////////////////////////////////////////////////////////////// // $Id$ ///////////////////////////////////////////////////////////////////////// // // Copyright (c) 2013 Stanislav Shwartsman // Written by Stanislav Shwartsman [sshwarts at sourceforge net] // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA // ///////////////////////////////////////////////////////////////////////// #define NEED_CPU_REG_SHORTCUTS 1 #include "bochs.h" #include "cpu.h" #define LOG_THIS BX_CPU_THIS_PTR #if BX_CPU_LEVEL >= 6 // // sha_f0(): A bit oriented logical operation that derives a new dword from three SHA1 state variables (dword). // This function is used in SHA1 round 1 to 20 processing: // // f0(B,C,D) := (B AND C) XOR ((NOT(B) AND D) // BX_CPP_INLINE Bit32u sha_f0(Bit32u B, Bit32u C, Bit32u D) { return (B & C) ^ (~B & D); } // // sha_f1(): A bit oriented logical operation that derives a new dword from three SHA1 state variables (dword). // This function is used in SHA1 round 21 to 40 processing: // // f1(B,C,D) := B XOR C XOR D // BX_CPP_INLINE Bit32u sha_f1(Bit32u B, Bit32u C, Bit32u D) { return (B ^ C ^ D); } // // sha_f2(): A bit oriented logical operation that derives a new dword from three SHA1 state variables (dword). // This function is used in SHA1 round 41 to 60 processing: // // f2(B,C,D) := (B AND C) XOR (B AND D) XOR (C AND D) // BX_CPP_INLINE Bit32u sha_f2(Bit32u B, Bit32u C, Bit32u D) { return (B & C) ^ (B & D) ^ (C & D); } // // sha_f3(): A bit oriented logical operation that derives a new dword from three SHA1 state variables (dword). // This function is used in SHA1 round 61 to 80 processing: // // f3(B,C,D) := B XOR C XOR D // // Yes, it is the same function as sha_f1() // BX_CPP_INLINE Bit32u sha_f(Bit32u B, Bit32u C, Bit32u D, unsigned index) { if (index == 0) return sha_f0(B,C,D); if (index == 2) return sha_f2(B,C,D); // sha_f3() and sha_f1() are the same return sha_f1(B,C,D); } // // sha_ch(): A bit oriented logical operation that derives a new dword from three SHA256 state variables (dword). // // Ch(E,F,G) := (E AND F) XOR ((NOT E) AND G) // // Yes, it is the same as sha_f0() // #define sha_ch(E,F,G) sha_f0((E), (F), (G)) // // sha_maj(): A bit oriented logical operation that derives a new dword from three SHA256 state variables (dword). // // Maj(A,B,C) := (A AND B) XOR (A AND C) XOR (B AND C) // // Yes, it is the same as sha_f2() // #define sha_maj(A,B,C) sha_f2((A), (B), (C)) BX_CPP_INLINE Bit32u rotate_r(Bit32u val_32, unsigned count) { return (val_32 >> count) | (val_32 << (32-count)); } BX_CPP_INLINE Bit32u rotate_l(Bit32u val_32, unsigned count) { return (val_32 << count) | (val_32 >> (32-count)); } // A bit oriented logical and rotational transformation performed on a dword for SHA256 BX_CPP_INLINE Bit32u sha256_transformation(Bit32u val_32, unsigned rotate1, unsigned rotate2, unsigned shr) { return rotate_r(val_32, rotate1) ^ rotate_r(val_32, rotate2) ^ (val_32 >> shr); } /* 0F 38 C8 */ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::SHA1NEXTE_VdqWdqR(bxInstruction_c *i) { BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src()); op2.xmm32u(3) += rotate_l(op1.xmm32u(3), 30); BX_WRITE_XMM_REG(i->dst(), op2); BX_NEXT_INSTR(i); } /* 0F 38 C9 */ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::SHA1MSG1_VdqWdqR(bxInstruction_c *i) { BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src()); op1.xmm32u(3) ^= op1.xmm32u(1); op1.xmm32u(2) ^= op1.xmm32u(0); op1.xmm32u(1) ^= op2.xmm32u(3); op1.xmm32u(0) ^= op2.xmm32u(2); BX_WRITE_XMM_REG(i->dst(), op1); BX_NEXT_INSTR(i); } /* 0F 38 CA */ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::SHA1MSG2_VdqWdqR(bxInstruction_c *i) { BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src()); op1.xmm32u(3) = rotate_l(op1.xmm32u(3) ^ op2.xmm32u(2), 1); op1.xmm32u(2) = rotate_l(op1.xmm32u(2) ^ op2.xmm32u(1), 1); op1.xmm32u(1) = rotate_l(op1.xmm32u(1) ^ op2.xmm32u(0), 1); op1.xmm32u(0) = rotate_l(op1.xmm32u(0) ^ op1.xmm32u(3), 1); BX_WRITE_XMM_REG(i->dst(), op1); BX_NEXT_INSTR(i); } /* 0F 38 CB */ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::SHA256RNDS2_VdqWdqR(bxInstruction_c *i) { BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src()), wk = BX_READ_XMM_REG(0); Bit32u A[3], B[3], C[3], D[3], E[3], F[3], G[3], H[3]; A[0] = op2.xmm32u(3); B[0] = op2.xmm32u(2); E[0] = op2.xmm32u(1); F[0] = op2.xmm32u(0); C[0] = op1.xmm32u(3); D[0] = op1.xmm32u(2); G[0] = op1.xmm32u(1); H[0] = op1.xmm32u(0); for (unsigned n=0; n < 2; n++) { Bit32u tmp = sha_ch (E[n], F[n], G[n]) + sha256_transformation(E[n], 6, 11, 25) + wk.xmm32u(n) + H[n]; A[n+1] = tmp + sha_maj(A[n], B[n], C[n]) + sha256_transformation(A[n], 2, 13, 22); B[n+1] = A[n]; C[n+1] = B[n]; D[n+1] = C[n]; E[n+1] = tmp + D[n]; F[n+1] = E[n]; G[n+1] = F[n]; H[n+1] = G[n]; } op1.xmm32u(0) = A[2]; op1.xmm32u(1) = B[2]; op1.xmm32u(2) = E[2]; op1.xmm32u(3) = F[2]; BX_WRITE_XMM_REG(i->dst(), op1); BX_NEXT_INSTR(i); } /* 0F 38 CC */ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::SHA256MSG1_VdqWdqR(bxInstruction_c *i) { BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()); Bit32u op2 = BX_READ_XMM_REG_LO_DWORD(i->src()); op1.xmm32u(0) += sha256_transformation(op1.xmm32u(1), 7, 18, 3); op1.xmm32u(1) += sha256_transformation(op1.xmm32u(2), 7, 18, 3); op1.xmm32u(2) += sha256_transformation(op1.xmm32u(3), 7, 18, 3); op1.xmm32u(3) += sha256_transformation(op2, 7, 18, 3); BX_WRITE_XMM_REG(i->dst(), op1); BX_NEXT_INSTR(i); } /* 0F 38 CD */ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::SHA256MSG2_VdqWdqR(bxInstruction_c *i) { BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src()); op1.xmm32u(0) += sha256_transformation(op2.xmm32u(2), 17, 19, 10); op1.xmm32u(1) += sha256_transformation(op2.xmm32u(3), 17, 19, 10); op1.xmm32u(2) += sha256_transformation(op1.xmm32u(0), 17, 19, 10); op1.xmm32u(3) += sha256_transformation(op1.xmm32u(1), 17, 19, 10); BX_WRITE_XMM_REG(i->dst(), op1); BX_NEXT_INSTR(i); } /* 0F 3A CC */ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::SHA1RNDS4_VdqWdqIbR(bxInstruction_c *i) { // SHA1 Constants dependent on immediate i static const Bit32u sha_Ki[4] = { 0x5A827999, 0x6ED9EBA1, 0X8F1BBCDC, 0xCA62C1D6 }; BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src()); unsigned imm = i->Ib() & 0x3; Bit32u K = sha_Ki[imm]; Bit32u W[4] = { op2.xmm32u(3), op2.xmm32u(2), op2.xmm32u(1), op2.xmm32u(0) }; Bit32u A[5], B[5], C[5], D[5], E[5]; A[0] = op1.xmm32u(3); B[0] = op1.xmm32u(2); C[0] = op1.xmm32u(1); D[0] = op1.xmm32u(0); E[0] = 0; for (unsigned n=0; n < 4; n++) { A[n+1] = sha_f(B[n], C[n], D[n], imm) + rotate_l(A[n], 5) + W[n] + E[n] + K; B[n+1] = A[n]; C[n+1] = rotate_l(B[n], 30); D[n+1] = C[n]; E[n+1] = D[n]; } op1.xmm32u(0) = A[4]; op1.xmm32u(1) = B[4]; op1.xmm32u(2) = C[4]; op1.xmm32u(3) = D[4]; BX_WRITE_XMM_REG(i->dst(), op1); BX_NEXT_INSTR(i); } #endif