///////////////////////////////////////////////////////////////////////// // $Id: sse_string.cc,v 1.13 2009-10-14 20:45:29 sshwarts Exp $ ///////////////////////////////////////////////////////////////////////// // // Copyright (c) 2007-2009 Stanislav Shwartsman // Written by Stanislav Shwartsman [sshwarts at sourceforge net] // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA // ///////////////////////////////////////////////////////////////////////// #define NEED_CPU_REG_SHORTCUTS 1 #include "bochs.h" #include "cpu.h" #define LOG_THIS BX_CPU_THIS_PTR // Make code more tidy with a few macros. #if BX_SUPPORT_X86_64==0 #define RCX ECX #endif #if (BX_SUPPORT_SSE > 4) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0) // Compare all pairs of Ai, Bj according to imm8 control static void compare_strings(Bit8u BoolRes[16][16], BxPackedXmmRegister op1, BxPackedXmmRegister op2, Bit8u imm) { unsigned i, j; unsigned aggregation_operation = (imm >> 2) & 3; // All possible comparisons are performed, the individual boolean // results of those comparisons are referred by // BoolRes[op2 element index, op1 element index] switch (imm & 3) { case 0: /* unsigned bytes compare */ for (i=0;i<16;i++) { for (j=0;j<16;j++) { switch (aggregation_operation) { case 0: /* 'equal' comparison */ case 2: case 3: BoolRes[j][i] = (op1.xmmubyte(i) == op2.xmmubyte(j)); break; case 1: /* 'ranges' comparison */ if ((i % 2) == 0) BoolRes[j][i] = (op1.xmmubyte(i) <= op2.xmmubyte(j)); else BoolRes[j][i] = (op1.xmmubyte(i) >= op2.xmmubyte(j)); break; } } } break; case 1: /* unsigned words compare */ for (i=0;i<8;i++) { for (j=0;j<8;j++) { switch (aggregation_operation) { case 0: /* 'equal' comparison */ case 2: case 3: BoolRes[j][i] = (op1.xmm16u(i) == op2.xmm16u(j)); break; case 1: /* 'ranges' comparison */ if ((i % 2) == 0) BoolRes[j][i] = (op1.xmm16u(i) <= op2.xmm16u(j)); else BoolRes[j][i] = (op1.xmm16u(i) >= op2.xmm16u(j)); break; } } } break; case 2: /* signed bytes compare */ for (i=0;i<16;i++) { for (j=0;j<16;j++) { switch (aggregation_operation) { case 0: /* 'equal' comparison */ case 2: case 3: BoolRes[j][i] = (op1.xmmsbyte(i) == op2.xmmsbyte(j)); break; case 1: /* 'ranges' comparison */ if ((i % 2) == 0) BoolRes[j][i] = (op1.xmmsbyte(i) <= op2.xmmsbyte(j)); else BoolRes[j][i] = (op1.xmmsbyte(i) >= op2.xmmsbyte(j)); break; } } } break; case 3: /* signed words compare */ for (i=0;i<8;i++) { for (j=0;j<8;j++) { switch (aggregation_operation) { case 0: /* 'equal' comparison */ case 2: case 3: BoolRes[j][i] = (op1.xmm16s(i) == op2.xmm16s(j)); break; case 1: /* 'ranges' comparison */ if ((i % 2) == 0) BoolRes[j][i] = (op1.xmm16s(i) <= op2.xmm16s(j)); else BoolRes[j][i] = (op1.xmm16s(i) >= op2.xmm16s(j)); break; } } } break; } } static unsigned find_eos32(Bit32s reg32, Bit8u imm) { if (imm & 0x1) { // 8 elements if (reg32 > 8 || reg32 < -8) return 8; else return abs(reg32); } else { // 16 elements if (reg32 > 16 || reg32 < -16) return 16; else return abs(reg32); } } #if BX_SUPPORT_X86_64 static unsigned find_eos64(Bit64s reg64, Bit8u imm) { if (imm & 0x1) { // 8 elements if (reg64 > 8 || reg64 < -8) return 8; else return abs(reg64); } else { // 16 elements if (reg64 > 16 || reg64 < -16) return 16; else return abs(reg64); } } #endif static unsigned find_eos(BxPackedXmmRegister op, Bit8u imm) { unsigned i = 0; if (imm & 0x1) { // 8 elements for(i=0;i<8;i++) if (op.xmm16u(i) == 0) break; } else { // 16 elements for(i=0;i<16;i++) if (op.xmmubyte(i) == 0) break; } return i; } static bx_bool override_if_data_invalid(bx_bool val, bx_bool i_valid, bx_bool j_valid, Bit8u imm) { unsigned aggregation_operation = (imm >> 2) & 3; switch(aggregation_operation) { case 0: // 'equal any' case 1: // 'ranges' if (! i_valid || ! j_valid) // one of the elements is invalid return 0; break; case 2: // 'equal each' if (! i_valid) { if (! j_valid) return 1; // both elements are invalid else return 0; // only i is invalid } else { if (! j_valid) return 0; // only j is invalid } break; case 3: // 'equal ordered' if (! i_valid) { // element i is invalid return 1; } else { if (! j_valid) { // only j is invalid return 0; } } break; } return val; } static Bit16u aggregate(Bit8u BoolRes[16][16], unsigned len1, unsigned len2, Bit8u imm) { unsigned aggregation_operation = (imm >> 2) & 3; unsigned num_elements = (imm & 0x1) ? 8 : 16; unsigned polarity = (imm >> 4) & 3; unsigned i,j,k; Bit16u result = 0; switch(aggregation_operation) { case 0: // 'equal any' for(j=0; j 4) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0) // for 3-byte opcodes #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0) /* 66 0F 3A 60 */ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPESTRM_VdqWdqIb(bxInstruction_c *i) { #if (BX_SUPPORT_SSE > 4) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0) BX_CPU_THIS_PTR prepareSSE(); BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result; Bit8u imm8 = i->Ib(); /* op2 is a register or memory reference */ if (i->modC0()) { op2 = BX_READ_XMM_REG(i->rm()); } else { bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i)); /* pointer, segment address pair */ readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2); } // compare all pairs of Ai, Bj Bit8u BoolRes[16][16]; compare_strings(BoolRes, op1, op2, imm8); unsigned len1, len2, num_elements = (imm8 & 0x1) ? 8 : 16; #if BX_SUPPORT_X86_64 if (i->os64L()) { len1 = find_eos64(RAX, imm8); len2 = find_eos64(RDX, imm8); } else #endif { len1 = find_eos32(EAX, imm8); len2 = find_eos32(EDX, imm8); } Bit16u result2 = aggregate(BoolRes, len1, len2, imm8); // As defined by imm8[6], result2 is then either stored to the least // significant bits of XMM0 (zero extended to 128 bits) or expanded // into a byte/word-mask and then stored to XMM0 if (imm8 & 0x40) { if (num_elements == 8) { for (int index = 0; index < 8; index++) result.xmm16u(index) = (result2 & (1< 4) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0) BX_CPU_THIS_PTR prepareSSE(); BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2; Bit8u imm8 = i->Ib(); /* op2 is a register or memory reference */ if (i->modC0()) { op2 = BX_READ_XMM_REG(i->rm()); } else { bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i)); /* pointer, segment address pair */ readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2); } // compare all pairs of Ai, Bj Bit8u BoolRes[16][16]; compare_strings(BoolRes, op1, op2, imm8); unsigned len1, len2, num_elements = (imm8 & 0x1) ? 8 : 16; int index; #if BX_SUPPORT_X86_64 if (i->os64L()) { len1 = find_eos64(RAX, imm8); len2 = find_eos64(RDX, imm8); } else #endif { len1 = find_eos32(EAX, imm8); len2 = find_eos32(EDX, imm8); } Bit16u result2 = aggregate(BoolRes, len1, len2, imm8); // The index of the first (or last, according to imm8[6]) set bit of result2 // is returned to ECX. If no bits are set in IntRes2, ECX is set to 16 (8) if (imm8 & 0x40) { // The index returned to ECX is of the MSB in result2 for (index=num_elements-1; index>=0; index--) if (result2 & (1< 4) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0) BX_CPU_THIS_PTR prepareSSE(); BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result; Bit8u imm8 = i->Ib(); /* op2 is a register or memory reference */ if (i->modC0()) { op2 = BX_READ_XMM_REG(i->rm()); } else { bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i)); /* pointer, segment address pair */ readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2); } // compare all pairs of Ai, Bj Bit8u BoolRes[16][16]; compare_strings(BoolRes, op1, op2, imm8); unsigned num_elements = (imm8 & 0x1) ? 8 : 16; unsigned len1 = find_eos(op1, imm8); unsigned len2 = find_eos(op2, imm8); Bit16u result2 = aggregate(BoolRes, len1, len2, imm8); // As defined by imm8[6], result2 is then either stored to the least // significant bits of XMM0 (zero extended to 128 bits) or expanded // into a byte/word-mask and then stored to XMM0 if (imm8 & 0x40) { if (num_elements == 8) { for (int index = 0; index < 8; index++) result.xmm16u(index) = (result2 & (1< 4) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0) BX_CPU_THIS_PTR prepareSSE(); BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2; Bit8u imm8 = i->Ib(); /* op2 is a register or memory reference */ if (i->modC0()) { op2 = BX_READ_XMM_REG(i->rm()); } else { bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i)); /* pointer, segment address pair */ readVirtualDQwordAligned(i->seg(), eaddr, (Bit8u *) &op2); } // compare all pairs of Ai, Bj Bit8u BoolRes[16][16]; compare_strings(BoolRes, op1, op2, imm8); unsigned num_elements = (imm8 & 0x1) ? 8 : 16; int index; unsigned len1 = find_eos(op1, imm8); unsigned len2 = find_eos(op2, imm8); Bit16u result2 = aggregate(BoolRes, len1, len2, imm8); // The index of the first (or last, according to imm8[6]) set bit of result2 // is returned to ECX. If no bits are set in IntRes2, ECX is set to 16 (8) if (imm8 & 0x40) { // The index returned to ECX is of the MSB in result2 for (index=num_elements-1; index>=0; index--) if (result2 & (1<= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)