optimize sse and mmx code
This commit is contained in:
parent
72af6740ac
commit
8c5c078b13
@ -1,5 +1,5 @@
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// $Id: cpu.h,v 1.708 2011-01-15 15:17:28 sshwarts Exp $
|
||||
// $Id: cpu.h,v 1.709 2011-01-16 20:42:28 sshwarts Exp $
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (C) 2001-2011 The Bochs Project
|
||||
@ -2031,7 +2031,6 @@ public: // for now...
|
||||
BX_SMF void MOVHPS_MqVps(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void MOVAPS_VpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void MOVAPS_VpsWpsM(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void MOVAPS_WpsVpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void MOVAPS_WpsVpsM(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void CVTPI2PS_VpsQqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void CVTPI2PS_VpsQqM(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
@ -2150,7 +2149,6 @@ public: // for now...
|
||||
BX_SMF void PADDQ_PqQq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void PADDQ_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void PMULLW_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void MOVQ_WqVqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void MOVDQ2Q_PqVRq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void MOVQ2DQ_VdqQq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void PMOVMSKB_GdUdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// $Id: fetchdecode.cc,v 1.292 2011-01-09 20:18:02 sshwarts Exp $
|
||||
// $Id: fetchdecode.cc,v 1.293 2011-01-16 20:42:28 sshwarts Exp $
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (C) 2001-2011 The Bochs Project
|
||||
@ -756,7 +756,7 @@ static const BxOpcodeInfo_t BxOpcodeInfo32[512*2*2] = {
|
||||
/* 0F 0F /wm */ { BxImmediate_Ib, BX_IA_ERROR }, // 3DNow! Opcode Table
|
||||
/* 0F 10 /wr */ { BxPrefixSSE, BX_IA_MOVUPS_VpsWpsR, BxOpcodeGroupSSE_0f10 },
|
||||
/* 0F 10 /wm */ { BxPrefixSSE, BX_IA_MOVUPS_VpsWpsM, BxOpcodeGroupSSE_0f10 },
|
||||
/* 0F 11 /wr */ { BxPrefixSSE, BX_IA_MOVUPS_WpsVpsR, BxOpcodeGroupSSE_0f11 },
|
||||
/* 0F 11 /wr */ { BxPrefixSSE | BxArithDstRM, BX_IA_MOVUPS_VpsWpsR, BxOpcodeGroupSSE_0f11 },
|
||||
/* 0F 11 /wm */ { BxPrefixSSE, BX_IA_MOVUPS_WpsVpsM, BxOpcodeGroupSSE_0f11 },
|
||||
/* 0F 12 /wr */ { BxPrefixSSE, BX_IA_MOVHLPS_VpsWpsR, BxOpcodeGroupSSE_0f12 },
|
||||
/* 0F 12 /wm */ { BxPrefixSSE, BX_IA_MOVLPS_VpsMq, BxOpcodeGroupSSE_0f12 },
|
||||
@ -823,7 +823,7 @@ static const BxOpcodeInfo_t BxOpcodeInfo32[512*2*2] = {
|
||||
/* 0F 27 /wm */ { 0, BX_IA_ERROR },
|
||||
/* 0F 28 /wr */ { BxPrefixSSE, BX_IA_MOVAPS_VpsWpsR, BxOpcodeGroupSSE_0f28 },
|
||||
/* 0F 28 /wm */ { BxPrefixSSE, BX_IA_MOVAPS_VpsWpsM, BxOpcodeGroupSSE_0f28 },
|
||||
/* 0F 29 /wr */ { BxPrefixSSE, BX_IA_MOVAPS_WpsVpsR, BxOpcodeGroupSSE_0f29 },
|
||||
/* 0F 29 /wr */ { BxPrefixSSE | BxArithDstRM, BX_IA_MOVAPS_VpsWpsR, BxOpcodeGroupSSE_0f29 },
|
||||
/* 0F 29 /wm */ { BxPrefixSSE, BX_IA_MOVAPS_WpsVpsM, BxOpcodeGroupSSE_0f29 },
|
||||
/* 0F 2A /wr */ { BxPrefixSSE, BX_IA_CVTPI2PS_VpsQqR, BxOpcodeGroupSSE_0f2a },
|
||||
/* 0F 2A /wm */ { BxPrefixSSE, BX_IA_CVTPI2PS_VpsQqM, BxOpcodeGroupSSE_0f2a },
|
||||
@ -1831,7 +1831,7 @@ static const BxOpcodeInfo_t BxOpcodeInfo32[512*2*2] = {
|
||||
/* 0F 0F /dm */ { BxImmediate_Ib, BX_IA_ERROR }, // 3DNow! Opcode Table
|
||||
/* 0F 10 /dr */ { BxPrefixSSE, BX_IA_MOVUPS_VpsWpsR, BxOpcodeGroupSSE_0f10 },
|
||||
/* 0F 10 /dm */ { BxPrefixSSE, BX_IA_MOVUPS_VpsWpsM, BxOpcodeGroupSSE_0f10 },
|
||||
/* 0F 11 /dr */ { BxPrefixSSE, BX_IA_MOVUPS_WpsVpsR, BxOpcodeGroupSSE_0f11 },
|
||||
/* 0F 11 /dr */ { BxPrefixSSE | BxArithDstRM, BX_IA_MOVUPS_VpsWpsR, BxOpcodeGroupSSE_0f11 },
|
||||
/* 0F 11 /dm */ { BxPrefixSSE, BX_IA_MOVUPS_WpsVpsM, BxOpcodeGroupSSE_0f11 },
|
||||
/* 0F 12 /dr */ { BxPrefixSSE, BX_IA_MOVHLPS_VpsWpsR, BxOpcodeGroupSSE_0f12 },
|
||||
/* 0F 12 /dm */ { BxPrefixSSE, BX_IA_MOVLPS_VpsMq, BxOpcodeGroupSSE_0f12 },
|
||||
@ -1898,7 +1898,7 @@ static const BxOpcodeInfo_t BxOpcodeInfo32[512*2*2] = {
|
||||
/* 0F 27 /dm */ { 0, BX_IA_ERROR },
|
||||
/* 0F 28 /dr */ { BxPrefixSSE, BX_IA_MOVAPS_VpsWpsR, BxOpcodeGroupSSE_0f28 },
|
||||
/* 0F 28 /dm */ { BxPrefixSSE, BX_IA_MOVAPS_VpsWpsM, BxOpcodeGroupSSE_0f28 },
|
||||
/* 0F 29 /dr */ { BxPrefixSSE, BX_IA_MOVAPS_WpsVpsR, BxOpcodeGroupSSE_0f29 },
|
||||
/* 0F 29 /dr */ { BxPrefixSSE | BxArithDstRM, BX_IA_MOVAPS_VpsWpsR, BxOpcodeGroupSSE_0f29 },
|
||||
/* 0F 29 /dm */ { BxPrefixSSE, BX_IA_MOVAPS_WpsVpsM, BxOpcodeGroupSSE_0f29 },
|
||||
/* 0F 2A /dr */ { BxPrefixSSE, BX_IA_CVTPI2PS_VpsQqR, BxOpcodeGroupSSE_0f2a },
|
||||
/* 0F 2A /dm */ { BxPrefixSSE, BX_IA_CVTPI2PS_VpsQqM, BxOpcodeGroupSSE_0f2a },
|
||||
|
@ -1,5 +1,5 @@
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// $Id: fetchdecode64.cc,v 1.287 2011-01-09 20:18:02 sshwarts Exp $
|
||||
// $Id: fetchdecode64.cc,v 1.288 2011-01-16 20:42:28 sshwarts Exp $
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (C) 2001-2011 The Bochs Project
|
||||
@ -703,7 +703,7 @@ static const BxOpcodeInfo_t BxOpcodeInfo64[512*3*2] = {
|
||||
/* 0F 0F /wm */ { BxImmediate_Ib, BX_IA_ERROR }, // 3DNow! Opcode Table
|
||||
/* 0F 10 /wr */ { BxPrefixSSE, BX_IA_MOVUPS_VpsWpsR, BxOpcodeGroupSSE_0f10 },
|
||||
/* 0F 10 /wm */ { BxPrefixSSE, BX_IA_MOVUPS_VpsWpsM, BxOpcodeGroupSSE_0f10 },
|
||||
/* 0F 11 /wr */ { BxPrefixSSE, BX_IA_MOVUPS_WpsVpsR, BxOpcodeGroupSSE_0f11 },
|
||||
/* 0F 11 /wr */ { BxPrefixSSE | BxArithDstRM, BX_IA_MOVUPS_VpsWpsR, BxOpcodeGroupSSE_0f11 },
|
||||
/* 0F 11 /wm */ { BxPrefixSSE, BX_IA_MOVUPS_WpsVpsM, BxOpcodeGroupSSE_0f11 },
|
||||
/* 0F 12 /wr */ { BxPrefixSSE, BX_IA_MOVHLPS_VpsWpsR, BxOpcodeGroupSSE_0f12 },
|
||||
/* 0F 12 /wm */ { BxPrefixSSE, BX_IA_MOVLPS_VpsMq, BxOpcodeGroupSSE_0f12 },
|
||||
@ -751,7 +751,7 @@ static const BxOpcodeInfo_t BxOpcodeInfo64[512*3*2] = {
|
||||
/* 0F 27 /wm */ { 0, BX_IA_ERROR },
|
||||
/* 0F 28 /wr */ { BxPrefixSSE, BX_IA_MOVAPS_VpsWpsR, BxOpcodeGroupSSE_0f28 },
|
||||
/* 0F 28 /wm */ { BxPrefixSSE, BX_IA_MOVAPS_VpsWpsM, BxOpcodeGroupSSE_0f28 },
|
||||
/* 0F 29 /wr */ { BxPrefixSSE, BX_IA_MOVAPS_WpsVpsR, BxOpcodeGroupSSE_0f29 },
|
||||
/* 0F 29 /wr */ { BxPrefixSSE | BxArithDstRM, BX_IA_MOVAPS_VpsWpsR, BxOpcodeGroupSSE_0f29 },
|
||||
/* 0F 29 /wm */ { BxPrefixSSE, BX_IA_MOVAPS_WpsVpsM, BxOpcodeGroupSSE_0f29 },
|
||||
/* 0F 2A /wr */ { BxPrefixSSE, BX_IA_CVTPI2PS_VpsQqR, BxOpcodeGroupSSE_0f2a },
|
||||
/* 0F 2A /wm */ { BxPrefixSSE, BX_IA_CVTPI2PS_VpsQqM, BxOpcodeGroupSSE_0f2a },
|
||||
@ -1730,7 +1730,7 @@ static const BxOpcodeInfo_t BxOpcodeInfo64[512*3*2] = {
|
||||
/* 0F 0F /dm */ { BxImmediate_Ib, BX_IA_ERROR }, // 3DNow! Opcode Table
|
||||
/* 0F 10 /dr */ { BxPrefixSSE, BX_IA_MOVUPS_VpsWpsR, BxOpcodeGroupSSE_0f10 },
|
||||
/* 0F 10 /dm */ { BxPrefixSSE, BX_IA_MOVUPS_VpsWpsM, BxOpcodeGroupSSE_0f10 },
|
||||
/* 0F 11 /dr */ { BxPrefixSSE, BX_IA_MOVUPS_WpsVpsR, BxOpcodeGroupSSE_0f11 },
|
||||
/* 0F 11 /dr */ { BxPrefixSSE | BxArithDstRM, BX_IA_MOVUPS_VpsWpsR, BxOpcodeGroupSSE_0f11 },
|
||||
/* 0F 11 /dm */ { BxPrefixSSE, BX_IA_MOVUPS_WpsVpsM, BxOpcodeGroupSSE_0f11 },
|
||||
/* 0F 12 /dr */ { BxPrefixSSE, BX_IA_MOVHLPS_VpsWpsR, BxOpcodeGroupSSE_0f12 },
|
||||
/* 0F 12 /dm */ { BxPrefixSSE, BX_IA_MOVLPS_VpsMq, BxOpcodeGroupSSE_0f12 },
|
||||
@ -1778,7 +1778,7 @@ static const BxOpcodeInfo_t BxOpcodeInfo64[512*3*2] = {
|
||||
/* 0F 27 /dm */ { 0, BX_IA_ERROR },
|
||||
/* 0F 28 /dr */ { BxPrefixSSE, BX_IA_MOVAPS_VpsWpsR, BxOpcodeGroupSSE_0f28 },
|
||||
/* 0F 28 /dm */ { BxPrefixSSE, BX_IA_MOVAPS_VpsWpsM, BxOpcodeGroupSSE_0f28 },
|
||||
/* 0F 29 /dr */ { BxPrefixSSE, BX_IA_MOVAPS_WpsVpsR, BxOpcodeGroupSSE_0f29 },
|
||||
/* 0F 29 /dr */ { BxPrefixSSE | BxArithDstRM, BX_IA_MOVAPS_VpsWpsR, BxOpcodeGroupSSE_0f29 },
|
||||
/* 0F 29 /dm */ { BxPrefixSSE, BX_IA_MOVAPS_WpsVpsM, BxOpcodeGroupSSE_0f29 },
|
||||
/* 0F 2A /dr */ { BxPrefixSSE, BX_IA_CVTPI2PS_VpsQqR, BxOpcodeGroupSSE_0f2a },
|
||||
/* 0F 2A /dm */ { BxPrefixSSE, BX_IA_CVTPI2PS_VpsQqM, BxOpcodeGroupSSE_0f2a },
|
||||
@ -2757,7 +2757,7 @@ static const BxOpcodeInfo_t BxOpcodeInfo64[512*3*2] = {
|
||||
/* 0F 0F /qm */ { BxImmediate_Ib, BX_IA_ERROR }, // 3DNow! Opcode Table
|
||||
/* 0F 10 /qr */ { BxPrefixSSE, BX_IA_MOVUPS_VpsWpsR, BxOpcodeGroupSSE_0f10 },
|
||||
/* 0F 10 /qm */ { BxPrefixSSE, BX_IA_MOVUPS_VpsWpsM, BxOpcodeGroupSSE_0f10 },
|
||||
/* 0F 11 /qr */ { BxPrefixSSE, BX_IA_MOVUPS_WpsVpsR, BxOpcodeGroupSSE_0f11 },
|
||||
/* 0F 11 /qr */ { BxPrefixSSE | BxArithDstRM, BX_IA_MOVUPS_VpsWpsR, BxOpcodeGroupSSE_0f11 },
|
||||
/* 0F 11 /qm */ { BxPrefixSSE, BX_IA_MOVUPS_WpsVpsM, BxOpcodeGroupSSE_0f11 },
|
||||
/* 0F 12 /qr */ { BxPrefixSSE, BX_IA_MOVHLPS_VpsWpsR, BxOpcodeGroupSSE_0f12 },
|
||||
/* 0F 12 /qm */ { BxPrefixSSE, BX_IA_MOVLPS_VpsMq, BxOpcodeGroupSSE_0f12 },
|
||||
@ -2805,7 +2805,7 @@ static const BxOpcodeInfo_t BxOpcodeInfo64[512*3*2] = {
|
||||
/* 0F 27 /qm */ { 0, BX_IA_ERROR },
|
||||
/* 0F 28 /qr */ { BxPrefixSSE, BX_IA_MOVAPS_VpsWpsR, BxOpcodeGroupSSE_0f28 },
|
||||
/* 0F 28 /qm */ { BxPrefixSSE, BX_IA_MOVAPS_VpsWpsM, BxOpcodeGroupSSE_0f28 },
|
||||
/* 0F 29 /qr */ { BxPrefixSSE, BX_IA_MOVAPS_WpsVpsR, BxOpcodeGroupSSE_0f29 },
|
||||
/* 0F 29 /qr */ { BxPrefixSSE | BxArithDstRM, BX_IA_MOVAPS_VpsWpsR, BxOpcodeGroupSSE_0f29 },
|
||||
/* 0F 29 /qm */ { BxPrefixSSE, BX_IA_MOVAPS_WpsVpsM, BxOpcodeGroupSSE_0f29 },
|
||||
/* 0F 2A /qr */ { BxPrefixSSE, BX_IA_CVTPI2PS_VpsQqR, BxOpcodeGroupSSE_0f2a },
|
||||
/* 0F 2A /qm */ { BxPrefixSSE, BX_IA_CVTPI2PS_VpsQqM, BxOpcodeGroupSSE_0f2a },
|
||||
|
@ -1,5 +1,5 @@
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// $Id: fetchdecode_sse.h,v 1.26 2011-01-15 21:46:41 sshwarts Exp $
|
||||
// $Id: fetchdecode_sse.h,v 1.27 2011-01-16 20:42:28 sshwarts Exp $
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2005-2011 Stanislav Shwartsman
|
||||
@ -129,7 +129,7 @@ static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f28[6] = {
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f29[6] = {
|
||||
/* 66 /r */ { 0, BX_IA_MOVAPD_WpdVpdR },
|
||||
/* 66 /r */ { BxArithDstRM, BX_IA_MOVAPD_VpdWpdR },
|
||||
/* 66 /m */ { 0, BX_IA_MOVAPD_WpdVpdM },
|
||||
/* F3 /r */ { 0, BX_IA_ERROR },
|
||||
/* F3 /m */ { 0, BX_IA_ERROR },
|
||||
@ -547,9 +547,9 @@ static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f7eQ[6] = {
|
||||
#endif
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0f7f[6] = {
|
||||
/* 66 /r */ { 0, BX_IA_MOVDQA_WdqVdqR },
|
||||
/* 66 /r */ { BxArithDstRM, BX_IA_MOVDQA_VdqWdqR },
|
||||
/* 66 /m */ { 0, BX_IA_MOVDQA_WdqVdqM },
|
||||
/* F3 /r */ { 0, BX_IA_MOVDQU_WdqVdqR },
|
||||
/* F3 /r */ { BxArithDstRM, BX_IA_MOVDQU_VdqWdqR },
|
||||
/* F3 /m */ { 0, BX_IA_MOVDQU_WdqVdqM },
|
||||
/* F2 /r */ { 0, BX_IA_ERROR },
|
||||
/* F2 /m */ { 0, BX_IA_ERROR }
|
||||
@ -646,7 +646,7 @@ static const BxOpcodeInfo_t BxOpcodeGroupSSE_0fd5[6] = {
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupSSE_0fd6[6] = {
|
||||
/* 66 /r */ { 0, BX_IA_MOVQ_WqVqR },
|
||||
/* 66 /r */ { BxArithDstRM, BX_IA_MOVQ_VqWqR },
|
||||
/* 66 /m */ { 0, BX_IA_MOVQ_WqVqM },
|
||||
/* F3 /r */ { 0, BX_IA_MOVQ2DQ_VdqQq },
|
||||
/* F3 /m */ { 0, BX_IA_ERROR },
|
||||
|
@ -1,5 +1,5 @@
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// $Id: ia_opcodes.h,v 1.67 2011-01-15 15:17:28 sshwarts Exp $
|
||||
// $Id: ia_opcodes.h,v 1.68 2011-01-16 20:42:28 sshwarts Exp $
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2008-2011 Stanislav Shwartsman
|
||||
@ -986,7 +986,6 @@ bx_define_opcode(BX_IA_ANDNPS_VpsWpsR, &BX_CPU_C::ANDNPS_VpsWpsR, NULL, BX_CPU_S
|
||||
bx_define_opcode(BX_IA_ANDNPS_VpsWpsM, &BX_CPU_C::LOAD_Wdq, &BX_CPU_C::ANDNPS_VpsWpsR, BX_CPU_SSE, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVUPS_VpsWpsR, &BX_CPU_C::MOVAPS_VpsWpsR, NULL, BX_CPU_SSE, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVUPS_VpsWpsM, &BX_CPU_C::MOVUPS_VpsWpsM, NULL, BX_CPU_SSE, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVUPS_WpsVpsR, &BX_CPU_C::MOVAPS_WpsVpsR, NULL, BX_CPU_SSE, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVUPS_WpsVpsM, &BX_CPU_C::MOVUPS_WpsVpsM, NULL, BX_CPU_SSE, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVSS_VssWssR, &BX_CPU_C::MOVSS_VssWssR, NULL, BX_CPU_SSE, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVSS_VssWssM, &BX_CPU_C::MOVSS_VssWssM, NULL, BX_CPU_SSE, BX_PREPARE_SSE)
|
||||
@ -999,7 +998,6 @@ bx_define_opcode(BX_IA_MOVHPS_VpsMq, &BX_CPU_C::MOVHPS_VpsMq, NULL, BX_CPU_SSE,
|
||||
bx_define_opcode(BX_IA_MOVHPS_MqVps, &BX_CPU_C::MOVHPS_MqVps, NULL, BX_CPU_SSE, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVAPS_VpsWpsR, &BX_CPU_C::MOVAPS_VpsWpsR, NULL, BX_CPU_SSE, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVAPS_VpsWpsM, &BX_CPU_C::MOVAPS_VpsWpsM, NULL, BX_CPU_SSE, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVAPS_WpsVpsR, &BX_CPU_C::MOVAPS_WpsVpsR, NULL, BX_CPU_SSE, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVAPS_WpsVpsM, &BX_CPU_C::MOVAPS_WpsVpsM, NULL, BX_CPU_SSE, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_CVTPI2PS_VpsQqR, &BX_CPU_C::CVTPI2PS_VpsQqR, NULL, BX_CPU_SSE, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_CVTPI2PS_VpsQqM, &BX_CPU_C::CVTPI2PS_VpsQqM, NULL, BX_CPU_SSE, BX_PREPARE_SSE)
|
||||
@ -1148,15 +1146,12 @@ bx_define_opcode(BX_IA_PUNPCKLDQ_VdqWdqR, &BX_CPU_C::UNPCKLPS_VpsWdqR, NULL, BX_
|
||||
bx_define_opcode(BX_IA_PUNPCKLDQ_VdqWdqM, &BX_CPU_C::LOAD_Wdq, &BX_CPU_C::UNPCKLPS_VpsWdqR, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVAPD_VpdWpdR, &BX_CPU_C::MOVAPS_VpsWpsR, NULL, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVAPD_VpdWpdM, &BX_CPU_C::MOVAPS_VpsWpsM, NULL, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVAPD_WpdVpdR, &BX_CPU_C::MOVAPS_WpsVpsR, NULL, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVAPD_WpdVpdM, &BX_CPU_C::MOVAPS_WpsVpsM, NULL, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVDQA_VdqWdqR, &BX_CPU_C::MOVAPS_VpsWpsR, NULL, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVDQA_VdqWdqM, &BX_CPU_C::MOVAPS_VpsWpsM, NULL, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVDQA_WdqVdqR, &BX_CPU_C::MOVAPS_WpsVpsR, NULL, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVDQA_WdqVdqM, &BX_CPU_C::MOVAPS_WpsVpsM, NULL, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVDQU_VdqWdqR, &BX_CPU_C::MOVAPS_VpsWpsR, NULL, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVDQU_VdqWdqM, &BX_CPU_C::MOVUPS_VpsWpsM, NULL, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVDQU_WdqVdqR, &BX_CPU_C::MOVAPS_WpsVpsR, NULL, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVDQU_WdqVdqM, &BX_CPU_C::MOVUPS_WpsVpsM, NULL, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVHPD_MqVsd, &BX_CPU_C::MOVHPS_MqVps, NULL, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVHPD_VsdMq, &BX_CPU_C::MOVHPS_VpsMq, NULL, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
@ -1166,7 +1161,6 @@ bx_define_opcode(BX_IA_MOVNTDQ_MdqVdq, &BX_CPU_C::MOVAPS_WpsVpsM, NULL, BX_CPU_S
|
||||
bx_define_opcode(BX_IA_MOVNTPD_MpdVpd, &BX_CPU_C::MOVAPS_WpsVpsM, NULL, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVUPD_VpdWpdR, &BX_CPU_C::MOVAPS_VpsWpsR, NULL, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVUPD_VpdWpdM, &BX_CPU_C::MOVUPS_VpsWpsM, NULL, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVUPD_WpdVpdR, &BX_CPU_C::MOVAPS_WpsVpsR, NULL, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVUPD_WpdVpdM, &BX_CPU_C::MOVUPS_WpsVpsM, NULL, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_ANDNPD_VpdWpdR, &BX_CPU_C::ANDNPS_VpsWpsR, NULL, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_ANDNPD_VpdWpdM, &BX_CPU_C::LOAD_Wdq, &BX_CPU_C::ANDNPS_VpsWpsR, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
@ -1249,7 +1243,6 @@ bx_define_opcode(BX_IA_PADDQ_VdqWdqR, &BX_CPU_C::PADDQ_VdqWdqR, NULL, BX_CPU_SSE
|
||||
bx_define_opcode(BX_IA_PADDQ_VdqWdqM, &BX_CPU_C::LOAD_Wdq, &BX_CPU_C::PADDQ_VdqWdqR, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_PMULLW_VdqWdqR, &BX_CPU_C::PMULLW_VdqWdqR, NULL, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_PMULLW_VdqWdqM, &BX_CPU_C::LOAD_Wdq, &BX_CPU_C::PMULLW_VdqWdqR, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVQ_WqVqR, &BX_CPU_C::MOVQ_WqVqR, NULL, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVQ_WqVqM, &BX_CPU_C::MOVLPS_MqVps, NULL, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVDQ2Q_PqVRq, &BX_CPU_C::MOVDQ2Q_PqVRq, NULL, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_MOVQ2DQ_VdqQq, &BX_CPU_C::MOVQ2DQ_VdqQq, NULL, BX_CPU_SSE2, BX_PREPARE_SSE)
|
||||
|
154
bochs/cpu/mmx.cc
154
bochs/cpu/mmx.cc
@ -1,5 +1,5 @@
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// $Id: mmx.cc,v 1.99 2011-01-08 19:50:22 sshwarts Exp $
|
||||
// $Id: mmx.cc,v 1.100 2011-01-16 20:42:28 sshwarts Exp $
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2002-2010 Stanislav Shwartsman
|
||||
@ -82,7 +82,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHADDW_PqQq(bxInstruction_c *i)
|
||||
#if BX_CPU_LEVEL >= 5
|
||||
BX_CPU_THIS_PTR prepareMMX();
|
||||
|
||||
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
|
||||
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2;
|
||||
|
||||
/* op2 is a register or memory reference */
|
||||
if (i->modC0()) {
|
||||
@ -96,12 +96,12 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHADDW_PqQq(bxInstruction_c *i)
|
||||
|
||||
BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX transition */
|
||||
|
||||
MMXUW0(result) = MMXUW0(op1) + MMXUW1(op1);
|
||||
MMXUW1(result) = MMXUW2(op1) + MMXUW3(op1);
|
||||
MMXUW2(result) = MMXUW0(op2) + MMXUW1(op2);
|
||||
MMXUW3(result) = MMXUW2(op2) + MMXUW3(op2);
|
||||
MMXUW0(op1) = MMXUW0(op1) + MMXUW1(op1);
|
||||
MMXUW1(op1) = MMXUW2(op1) + MMXUW3(op1);
|
||||
MMXUW2(op1) = MMXUW0(op2) + MMXUW1(op2);
|
||||
MMXUW3(op1) = MMXUW2(op2) + MMXUW3(op2);
|
||||
|
||||
BX_WRITE_MMX_REG(i->nnn(), result);
|
||||
BX_WRITE_MMX_REG(i->nnn(), op1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -138,7 +138,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHADDSW_PqQq(bxInstruction_c *i)
|
||||
#if BX_CPU_LEVEL >= 5
|
||||
BX_CPU_THIS_PTR prepareMMX();
|
||||
|
||||
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
|
||||
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2;
|
||||
|
||||
/* op2 is a register or memory reference */
|
||||
if (i->modC0()) {
|
||||
@ -152,13 +152,13 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHADDSW_PqQq(bxInstruction_c *i)
|
||||
|
||||
BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX transition */
|
||||
|
||||
MMXSW0(result) = SaturateDwordSToWordS(Bit32s(MMXSW0(op1)) + Bit32s(MMXSW1(op1)));
|
||||
MMXSW1(result) = SaturateDwordSToWordS(Bit32s(MMXSW2(op1)) + Bit32s(MMXSW3(op1)));
|
||||
MMXSW2(result) = SaturateDwordSToWordS(Bit32s(MMXSW0(op2)) + Bit32s(MMXSW1(op2)));
|
||||
MMXSW3(result) = SaturateDwordSToWordS(Bit32s(MMXSW2(op2)) + Bit32s(MMXSW3(op2)));
|
||||
MMXSW0(op1) = SaturateDwordSToWordS(Bit32s(MMXSW0(op1)) + Bit32s(MMXSW1(op1)));
|
||||
MMXSW1(op1) = SaturateDwordSToWordS(Bit32s(MMXSW2(op1)) + Bit32s(MMXSW3(op1)));
|
||||
MMXSW2(op1) = SaturateDwordSToWordS(Bit32s(MMXSW0(op2)) + Bit32s(MMXSW1(op2)));
|
||||
MMXSW3(op1) = SaturateDwordSToWordS(Bit32s(MMXSW2(op2)) + Bit32s(MMXSW3(op2)));
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_MMX_REG(i->nnn(), result);
|
||||
BX_WRITE_MMX_REG(i->nnn(), op1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -184,7 +184,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMADDUBSW_PqQq(bxInstruction_c *i)
|
||||
|
||||
for(unsigned j=0; j<4; j++)
|
||||
{
|
||||
Bit32s temp = Bit32s(op1.mmxubyte(j*2+0))*Bit32s(op2.mmxsbyte(j*2+0)) +
|
||||
Bit32s temp = Bit32s(op1.mmxubyte(j*2+0))*Bit32s(op2.mmxsbyte(j*2)) +
|
||||
Bit32s(op1.mmxubyte(j*2+1))*Bit32s(op2.mmxsbyte(j*2+1));
|
||||
|
||||
result.mmx16s(j) = SaturateDwordSToWordS(temp);
|
||||
@ -201,7 +201,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHSUBSW_PqQq(bxInstruction_c *i)
|
||||
#if BX_CPU_LEVEL >= 5
|
||||
BX_CPU_THIS_PTR prepareMMX();
|
||||
|
||||
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
|
||||
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2;
|
||||
|
||||
/* op2 is a register or memory reference */
|
||||
if (i->modC0()) {
|
||||
@ -215,13 +215,13 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHSUBSW_PqQq(bxInstruction_c *i)
|
||||
|
||||
BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX transition */
|
||||
|
||||
MMXSW0(result) = SaturateDwordSToWordS(Bit32s(MMXSW0(op1)) - Bit32s(MMXSW1(op1)));
|
||||
MMXSW1(result) = SaturateDwordSToWordS(Bit32s(MMXSW2(op1)) - Bit32s(MMXSW3(op1)));
|
||||
MMXSW2(result) = SaturateDwordSToWordS(Bit32s(MMXSW0(op2)) - Bit32s(MMXSW1(op2)));
|
||||
MMXSW3(result) = SaturateDwordSToWordS(Bit32s(MMXSW2(op2)) - Bit32s(MMXSW3(op2)));
|
||||
MMXSW0(op1) = SaturateDwordSToWordS(Bit32s(MMXSW0(op1)) - Bit32s(MMXSW1(op1)));
|
||||
MMXSW1(op1) = SaturateDwordSToWordS(Bit32s(MMXSW2(op1)) - Bit32s(MMXSW3(op1)));
|
||||
MMXSW2(op1) = SaturateDwordSToWordS(Bit32s(MMXSW0(op2)) - Bit32s(MMXSW1(op2)));
|
||||
MMXSW3(op1) = SaturateDwordSToWordS(Bit32s(MMXSW2(op2)) - Bit32s(MMXSW3(op2)));
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_MMX_REG(i->nnn(), result);
|
||||
BX_WRITE_MMX_REG(i->nnn(), op1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -231,7 +231,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHSUBW_PqQq(bxInstruction_c *i)
|
||||
#if BX_CPU_LEVEL >= 5
|
||||
BX_CPU_THIS_PTR prepareMMX();
|
||||
|
||||
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
|
||||
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2;
|
||||
|
||||
/* op2 is a register or memory reference */
|
||||
if (i->modC0()) {
|
||||
@ -245,12 +245,12 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHSUBW_PqQq(bxInstruction_c *i)
|
||||
|
||||
BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX transition */
|
||||
|
||||
MMXUW0(result) = MMXUW0(op1) - MMXUW1(op1);
|
||||
MMXUW1(result) = MMXUW2(op1) - MMXUW3(op1);
|
||||
MMXUW2(result) = MMXUW0(op2) - MMXUW1(op2);
|
||||
MMXUW3(result) = MMXUW2(op2) - MMXUW3(op2);
|
||||
MMXUW0(op1) = MMXUW0(op1) - MMXUW1(op1);
|
||||
MMXUW1(op1) = MMXUW2(op1) - MMXUW3(op1);
|
||||
MMXUW2(op1) = MMXUW0(op2) - MMXUW1(op2);
|
||||
MMXUW3(op1) = MMXUW2(op2) - MMXUW3(op2);
|
||||
|
||||
BX_WRITE_MMX_REG(i->nnn(), result);
|
||||
BX_WRITE_MMX_REG(i->nnn(), op1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -260,7 +260,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHSUBD_PqQq(bxInstruction_c *i)
|
||||
#if BX_CPU_LEVEL >= 5
|
||||
BX_CPU_THIS_PTR prepareMMX();
|
||||
|
||||
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
|
||||
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2;
|
||||
|
||||
/* op2 is a register or memory reference */
|
||||
if (i->modC0()) {
|
||||
@ -274,10 +274,10 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHSUBD_PqQq(bxInstruction_c *i)
|
||||
|
||||
BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX transition */
|
||||
|
||||
MMXUD0(result) = MMXUD0(op1) - MMXUD1(op1);
|
||||
MMXUD1(result) = MMXUD0(op2) - MMXUD1(op2);
|
||||
MMXUD0(op1) = MMXUD0(op1) - MMXUD1(op1);
|
||||
MMXUD1(op1) = MMXUD0(op2) - MMXUD1(op2);
|
||||
|
||||
BX_WRITE_MMX_REG(i->nnn(), result);
|
||||
BX_WRITE_MMX_REG(i->nnn(), op1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -622,7 +622,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PACKSSWB_PqQq(bxInstruction_c *i)
|
||||
#if BX_CPU_LEVEL >= 5
|
||||
BX_CPU_THIS_PTR prepareMMX();
|
||||
|
||||
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
|
||||
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2;
|
||||
|
||||
/* op2 is a register or memory reference */
|
||||
if (i->modC0()) {
|
||||
@ -636,17 +636,18 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PACKSSWB_PqQq(bxInstruction_c *i)
|
||||
|
||||
BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX transition */
|
||||
|
||||
MMXSB0(result) = SaturateWordSToByteS(MMXSW0(op1));
|
||||
MMXSB1(result) = SaturateWordSToByteS(MMXSW1(op1));
|
||||
MMXSB2(result) = SaturateWordSToByteS(MMXSW2(op1));
|
||||
MMXSB3(result) = SaturateWordSToByteS(MMXSW3(op1));
|
||||
MMXSB4(result) = SaturateWordSToByteS(MMXSW0(op2));
|
||||
MMXSB5(result) = SaturateWordSToByteS(MMXSW1(op2));
|
||||
MMXSB6(result) = SaturateWordSToByteS(MMXSW2(op2));
|
||||
MMXSB7(result) = SaturateWordSToByteS(MMXSW3(op2));
|
||||
MMXSB0(op1) = SaturateWordSToByteS(MMXSW0(op1));
|
||||
MMXSB1(op1) = SaturateWordSToByteS(MMXSW1(op1));
|
||||
MMXSB2(op1) = SaturateWordSToByteS(MMXSW2(op1));
|
||||
MMXSB3(op1) = SaturateWordSToByteS(MMXSW3(op1));
|
||||
|
||||
MMXSB4(op1) = SaturateWordSToByteS(MMXSW0(op2));
|
||||
MMXSB5(op1) = SaturateWordSToByteS(MMXSW1(op2));
|
||||
MMXSB6(op1) = SaturateWordSToByteS(MMXSW2(op2));
|
||||
MMXSB7(op1) = SaturateWordSToByteS(MMXSW3(op2));
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_MMX_REG(i->nnn(), result);
|
||||
BX_WRITE_MMX_REG(i->nnn(), op1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -748,7 +749,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PACKUSWB_PqQq(bxInstruction_c *i)
|
||||
#if BX_CPU_LEVEL >= 5
|
||||
BX_CPU_THIS_PTR prepareMMX();
|
||||
|
||||
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
|
||||
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2;
|
||||
|
||||
/* op2 is a register or memory reference */
|
||||
if (i->modC0()) {
|
||||
@ -762,17 +763,17 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PACKUSWB_PqQq(bxInstruction_c *i)
|
||||
|
||||
BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX transition */
|
||||
|
||||
MMXUB0(result) = SaturateWordSToByteU(MMXSW0(op1));
|
||||
MMXUB1(result) = SaturateWordSToByteU(MMXSW1(op1));
|
||||
MMXUB2(result) = SaturateWordSToByteU(MMXSW2(op1));
|
||||
MMXUB3(result) = SaturateWordSToByteU(MMXSW3(op1));
|
||||
MMXUB4(result) = SaturateWordSToByteU(MMXSW0(op2));
|
||||
MMXUB5(result) = SaturateWordSToByteU(MMXSW1(op2));
|
||||
MMXUB6(result) = SaturateWordSToByteU(MMXSW2(op2));
|
||||
MMXUB7(result) = SaturateWordSToByteU(MMXSW3(op2));
|
||||
MMXUB0(op1) = SaturateWordSToByteU(MMXSW0(op1));
|
||||
MMXUB1(op1) = SaturateWordSToByteU(MMXSW1(op1));
|
||||
MMXUB2(op1) = SaturateWordSToByteU(MMXSW2(op1));
|
||||
MMXUB3(op1) = SaturateWordSToByteU(MMXSW3(op1));
|
||||
MMXUB4(op1) = SaturateWordSToByteU(MMXSW0(op2));
|
||||
MMXUB5(op1) = SaturateWordSToByteU(MMXSW1(op2));
|
||||
MMXUB6(op1) = SaturateWordSToByteU(MMXSW2(op2));
|
||||
MMXUB7(op1) = SaturateWordSToByteU(MMXSW3(op2));
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_MMX_REG(i->nnn(), result);
|
||||
BX_WRITE_MMX_REG(i->nnn(), op1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -846,7 +847,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PUNPCKHDQ_PqQq(bxInstruction_c *i)
|
||||
#if BX_CPU_LEVEL >= 5
|
||||
BX_CPU_THIS_PTR prepareMMX();
|
||||
|
||||
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
|
||||
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2;
|
||||
|
||||
/* op2 is a register or memory reference */
|
||||
if (i->modC0()) {
|
||||
@ -860,11 +861,11 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PUNPCKHDQ_PqQq(bxInstruction_c *i)
|
||||
|
||||
BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX transition */
|
||||
|
||||
MMXUD1(result) = MMXUD1(op2);
|
||||
MMXUD0(result) = MMXUD1(op1);
|
||||
MMXUD0(op1) = MMXUD1(op1);
|
||||
MMXUD1(op1) = MMXUD1(op2);
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_MMX_REG(i->nnn(), result);
|
||||
BX_WRITE_MMX_REG(i->nnn(), op1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -874,7 +875,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PACKSSDW_PqQq(bxInstruction_c *i)
|
||||
#if BX_CPU_LEVEL >= 5
|
||||
BX_CPU_THIS_PTR prepareMMX();
|
||||
|
||||
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
|
||||
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2;
|
||||
|
||||
/* op2 is a register or memory reference */
|
||||
if (i->modC0()) {
|
||||
@ -888,13 +889,13 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PACKSSDW_PqQq(bxInstruction_c *i)
|
||||
|
||||
BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX transition */
|
||||
|
||||
MMXSW0(result) = SaturateDwordSToWordS(MMXSD0(op1));
|
||||
MMXSW1(result) = SaturateDwordSToWordS(MMXSD1(op1));
|
||||
MMXSW2(result) = SaturateDwordSToWordS(MMXSD0(op2));
|
||||
MMXSW3(result) = SaturateDwordSToWordS(MMXSD1(op2));
|
||||
MMXSW0(op1) = SaturateDwordSToWordS(MMXSD0(op1));
|
||||
MMXSW1(op1) = SaturateDwordSToWordS(MMXSD1(op1));
|
||||
MMXSW2(op1) = SaturateDwordSToWordS(MMXSD0(op2));
|
||||
MMXSW3(op1) = SaturateDwordSToWordS(MMXSD1(op2));
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_MMX_REG(i->nnn(), result);
|
||||
BX_WRITE_MMX_REG(i->nnn(), op1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1538,7 +1539,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PADDUSB_PqQq(bxInstruction_c *i)
|
||||
#if BX_CPU_LEVEL >= 5
|
||||
BX_CPU_THIS_PTR prepareMMX();
|
||||
|
||||
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
|
||||
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2;
|
||||
|
||||
/* op2 is a register or memory reference */
|
||||
if (i->modC0()) {
|
||||
@ -1552,17 +1553,18 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PADDUSB_PqQq(bxInstruction_c *i)
|
||||
|
||||
BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX transition */
|
||||
|
||||
MMXUB0(result) = SaturateWordSToByteU(Bit16s(MMXUB0(op1)) + Bit16s(MMXUB0(op2)));
|
||||
MMXUB1(result) = SaturateWordSToByteU(Bit16s(MMXUB1(op1)) + Bit16s(MMXUB1(op2)));
|
||||
MMXUB2(result) = SaturateWordSToByteU(Bit16s(MMXUB2(op1)) + Bit16s(MMXUB2(op2)));
|
||||
MMXUB3(result) = SaturateWordSToByteU(Bit16s(MMXUB3(op1)) + Bit16s(MMXUB3(op2)));
|
||||
MMXUB4(result) = SaturateWordSToByteU(Bit16s(MMXUB4(op1)) + Bit16s(MMXUB4(op2)));
|
||||
MMXUB5(result) = SaturateWordSToByteU(Bit16s(MMXUB5(op1)) + Bit16s(MMXUB5(op2)));
|
||||
MMXUB6(result) = SaturateWordSToByteU(Bit16s(MMXUB6(op1)) + Bit16s(MMXUB6(op2)));
|
||||
MMXUB7(result) = SaturateWordSToByteU(Bit16s(MMXUB7(op1)) + Bit16s(MMXUB7(op2)));
|
||||
MMXUB0(op1) = SaturateWordSToByteU(Bit16s(MMXUB0(op1)) + Bit16s(MMXUB0(op2)));
|
||||
MMXUB1(op1) = SaturateWordSToByteU(Bit16s(MMXUB1(op1)) + Bit16s(MMXUB1(op2)));
|
||||
MMXUB2(op1) = SaturateWordSToByteU(Bit16s(MMXUB2(op1)) + Bit16s(MMXUB2(op2)));
|
||||
MMXUB3(op1) = SaturateWordSToByteU(Bit16s(MMXUB3(op1)) + Bit16s(MMXUB3(op2)));
|
||||
|
||||
MMXUB4(op1) = SaturateWordSToByteU(Bit16s(MMXUB4(op1)) + Bit16s(MMXUB4(op2)));
|
||||
MMXUB5(op1) = SaturateWordSToByteU(Bit16s(MMXUB5(op1)) + Bit16s(MMXUB5(op2)));
|
||||
MMXUB6(op1) = SaturateWordSToByteU(Bit16s(MMXUB6(op1)) + Bit16s(MMXUB6(op2)));
|
||||
MMXUB7(op1) = SaturateWordSToByteU(Bit16s(MMXUB7(op1)) + Bit16s(MMXUB7(op2)));
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_MMX_REG(i->nnn(), result);
|
||||
BX_WRITE_MMX_REG(i->nnn(), op1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1572,7 +1574,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PADDUSW_PqQq(bxInstruction_c *i)
|
||||
#if BX_CPU_LEVEL >= 5
|
||||
BX_CPU_THIS_PTR prepareMMX();
|
||||
|
||||
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2, result;
|
||||
BxPackedMmxRegister op1 = BX_READ_MMX_REG(i->nnn()), op2;
|
||||
|
||||
/* op2 is a register or memory reference */
|
||||
if (i->modC0()) {
|
||||
@ -1586,13 +1588,13 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PADDUSW_PqQq(bxInstruction_c *i)
|
||||
|
||||
BX_CPU_THIS_PTR prepareFPU2MMX(); /* FPU2MMX transition */
|
||||
|
||||
MMXUW0(result) = SaturateDwordSToWordU(Bit32s(MMXUW0(op1)) + Bit32s(MMXUW0(op2)));
|
||||
MMXUW1(result) = SaturateDwordSToWordU(Bit32s(MMXUW1(op1)) + Bit32s(MMXUW1(op2)));
|
||||
MMXUW2(result) = SaturateDwordSToWordU(Bit32s(MMXUW2(op1)) + Bit32s(MMXUW2(op2)));
|
||||
MMXUW3(result) = SaturateDwordSToWordU(Bit32s(MMXUW3(op1)) + Bit32s(MMXUW3(op2)));
|
||||
MMXUW0(op1) = SaturateDwordSToWordU(Bit32s(MMXUW0(op1)) + Bit32s(MMXUW0(op2)));
|
||||
MMXUW1(op1) = SaturateDwordSToWordU(Bit32s(MMXUW1(op1)) + Bit32s(MMXUW1(op2)));
|
||||
MMXUW2(op1) = SaturateDwordSToWordU(Bit32s(MMXUW2(op1)) + Bit32s(MMXUW2(op2)));
|
||||
MMXUW3(op1) = SaturateDwordSToWordU(Bit32s(MMXUW3(op1)) + Bit32s(MMXUW3(op2)));
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_MMX_REG(i->nnn(), result);
|
||||
BX_WRITE_MMX_REG(i->nnn(), op1);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
294
bochs/cpu/sse.cc
294
bochs/cpu/sse.cc
@ -1,5 +1,5 @@
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// $Id: sse.cc,v 1.80 2011-01-09 20:18:02 sshwarts Exp $
|
||||
// $Id: sse.cc,v 1.81 2011-01-16 20:42:28 sshwarts Exp $
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2003-2011 Stanislav Shwartsman
|
||||
@ -54,20 +54,19 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSHUFB_VdqWdqR(bxInstruction_c *i)
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHADDW_VdqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_CPU_LEVEL >= 6
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
|
||||
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm()), result;
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2 = BX_READ_XMM_REG(i->rm());
|
||||
|
||||
result.xmm16u(0) = op1.xmm16u(0) + op1.xmm16u(1);
|
||||
result.xmm16u(1) = op1.xmm16u(2) + op1.xmm16u(3);
|
||||
result.xmm16u(2) = op1.xmm16u(4) + op1.xmm16u(5);
|
||||
result.xmm16u(3) = op1.xmm16u(6) + op1.xmm16u(7);
|
||||
op1.xmm16u(0) = op1.xmm16u(0) + op1.xmm16u(1);
|
||||
op1.xmm16u(1) = op1.xmm16u(2) + op1.xmm16u(3);
|
||||
op1.xmm16u(2) = op1.xmm16u(4) + op1.xmm16u(5);
|
||||
op1.xmm16u(3) = op1.xmm16u(6) + op1.xmm16u(7);
|
||||
|
||||
result.xmm16u(4) = op2.xmm16u(0) + op2.xmm16u(1);
|
||||
result.xmm16u(5) = op2.xmm16u(2) + op2.xmm16u(3);
|
||||
result.xmm16u(6) = op2.xmm16u(4) + op2.xmm16u(5);
|
||||
result.xmm16u(7) = op2.xmm16u(6) + op2.xmm16u(7);
|
||||
op1.xmm16u(4) = op2.xmm16u(0) + op2.xmm16u(1);
|
||||
op1.xmm16u(5) = op2.xmm16u(2) + op2.xmm16u(3);
|
||||
op1.xmm16u(6) = op2.xmm16u(4) + op2.xmm16u(5);
|
||||
op1.xmm16u(7) = op2.xmm16u(6) + op2.xmm16u(7);
|
||||
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
BX_WRITE_XMM_REG(i->nnn(), op1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -75,15 +74,14 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHADDW_VdqWdqR(bxInstruction_c *i)
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHADDD_VdqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_CPU_LEVEL >= 6
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
|
||||
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm()), result;
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2 = BX_READ_XMM_REG(i->rm());
|
||||
|
||||
result.xmm32u(0) = op1.xmm32u(0) + op1.xmm32u(1);
|
||||
result.xmm32u(1) = op1.xmm32u(2) + op1.xmm32u(3);
|
||||
result.xmm32u(2) = op2.xmm32u(0) + op2.xmm32u(1);
|
||||
result.xmm32u(3) = op2.xmm32u(2) + op2.xmm32u(3);
|
||||
op1.xmm32u(0) = op1.xmm32u(0) + op1.xmm32u(1);
|
||||
op1.xmm32u(1) = op1.xmm32u(2) + op1.xmm32u(3);
|
||||
op1.xmm32u(2) = op2.xmm32u(0) + op2.xmm32u(1);
|
||||
op1.xmm32u(3) = op2.xmm32u(2) + op2.xmm32u(3);
|
||||
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
BX_WRITE_XMM_REG(i->nnn(), op1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -91,21 +89,20 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHADDD_VdqWdqR(bxInstruction_c *i)
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHADDSW_VdqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_CPU_LEVEL >= 6
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
|
||||
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm()), result;
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2 = BX_READ_XMM_REG(i->rm());
|
||||
|
||||
result.xmm16s(0) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(0)) + Bit32s(op1.xmm16s(1)));
|
||||
result.xmm16s(1) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(2)) + Bit32s(op1.xmm16s(3)));
|
||||
result.xmm16s(2) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(4)) + Bit32s(op1.xmm16s(5)));
|
||||
result.xmm16s(3) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(6)) + Bit32s(op1.xmm16s(7)));
|
||||
op1.xmm16s(0) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(0)) + Bit32s(op1.xmm16s(1)));
|
||||
op1.xmm16s(1) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(2)) + Bit32s(op1.xmm16s(3)));
|
||||
op1.xmm16s(2) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(4)) + Bit32s(op1.xmm16s(5)));
|
||||
op1.xmm16s(3) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(6)) + Bit32s(op1.xmm16s(7)));
|
||||
|
||||
result.xmm16s(4) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(0)) + Bit32s(op2.xmm16s(1)));
|
||||
result.xmm16s(5) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(2)) + Bit32s(op2.xmm16s(3)));
|
||||
result.xmm16s(6) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(4)) + Bit32s(op2.xmm16s(5)));
|
||||
result.xmm16s(7) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(6)) + Bit32s(op2.xmm16s(7)));
|
||||
op1.xmm16s(4) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(0)) + Bit32s(op2.xmm16s(1)));
|
||||
op1.xmm16s(5) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(2)) + Bit32s(op2.xmm16s(3)));
|
||||
op1.xmm16s(6) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(4)) + Bit32s(op2.xmm16s(5)));
|
||||
op1.xmm16s(7) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(6)) + Bit32s(op2.xmm16s(7)));
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
BX_WRITE_XMM_REG(i->nnn(), op1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -118,7 +115,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMADDUBSW_VdqWdqR(bxInstruction_c *i)
|
||||
|
||||
for(unsigned j=0; j<8; j++)
|
||||
{
|
||||
Bit32s temp = Bit32s(op1.xmmubyte(j*2+0))*Bit32s(op2.xmmsbyte(j*2+0)) +
|
||||
Bit32s temp = Bit32s(op1.xmmubyte(j*2+0))*Bit32s(op2.xmmsbyte(j*2)) +
|
||||
Bit32s(op1.xmmubyte(j*2+1))*Bit32s(op2.xmmsbyte(j*2+1));
|
||||
|
||||
result.xmm16s(j) = SaturateDwordSToWordS(temp);
|
||||
@ -133,21 +130,20 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMADDUBSW_VdqWdqR(bxInstruction_c *i)
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHSUBSW_VdqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_CPU_LEVEL >= 6
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
|
||||
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm()), result;
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2 = BX_READ_XMM_REG(i->rm());
|
||||
|
||||
result.xmm16s(0) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(0)) - Bit32s(op1.xmm16s(1)));
|
||||
result.xmm16s(1) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(2)) - Bit32s(op1.xmm16s(3)));
|
||||
result.xmm16s(2) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(4)) - Bit32s(op1.xmm16s(5)));
|
||||
result.xmm16s(3) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(6)) - Bit32s(op1.xmm16s(7)));
|
||||
op1.xmm16s(0) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(0)) - Bit32s(op1.xmm16s(1)));
|
||||
op1.xmm16s(1) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(2)) - Bit32s(op1.xmm16s(3)));
|
||||
op1.xmm16s(2) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(4)) - Bit32s(op1.xmm16s(5)));
|
||||
op1.xmm16s(3) = SaturateDwordSToWordS(Bit32s(op1.xmm16s(6)) - Bit32s(op1.xmm16s(7)));
|
||||
|
||||
result.xmm16s(4) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(0)) - Bit32s(op2.xmm16s(1)));
|
||||
result.xmm16s(5) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(2)) - Bit32s(op2.xmm16s(3)));
|
||||
result.xmm16s(6) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(4)) - Bit32s(op2.xmm16s(5)));
|
||||
result.xmm16s(7) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(6)) - Bit32s(op2.xmm16s(7)));
|
||||
op1.xmm16s(4) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(0)) - Bit32s(op2.xmm16s(1)));
|
||||
op1.xmm16s(5) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(2)) - Bit32s(op2.xmm16s(3)));
|
||||
op1.xmm16s(6) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(4)) - Bit32s(op2.xmm16s(5)));
|
||||
op1.xmm16s(7) = SaturateDwordSToWordS(Bit32s(op2.xmm16s(6)) - Bit32s(op2.xmm16s(7)));
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
BX_WRITE_XMM_REG(i->nnn(), op1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -155,20 +151,19 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHSUBSW_VdqWdqR(bxInstruction_c *i)
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHSUBW_VdqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_CPU_LEVEL >= 6
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
|
||||
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm()), result;
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2 = BX_READ_XMM_REG(i->rm());
|
||||
|
||||
result.xmm16u(0) = op1.xmm16u(0) - op1.xmm16u(1);
|
||||
result.xmm16u(1) = op1.xmm16u(2) - op1.xmm16u(3);
|
||||
result.xmm16u(2) = op1.xmm16u(4) - op1.xmm16u(5);
|
||||
result.xmm16u(3) = op1.xmm16u(6) - op1.xmm16u(7);
|
||||
op1.xmm16u(0) = op1.xmm16u(0) - op1.xmm16u(1);
|
||||
op1.xmm16u(1) = op1.xmm16u(2) - op1.xmm16u(3);
|
||||
op1.xmm16u(2) = op1.xmm16u(4) - op1.xmm16u(5);
|
||||
op1.xmm16u(3) = op1.xmm16u(6) - op1.xmm16u(7);
|
||||
|
||||
result.xmm16u(4) = op2.xmm16u(0) - op2.xmm16u(1);
|
||||
result.xmm16u(5) = op2.xmm16u(2) - op2.xmm16u(3);
|
||||
result.xmm16u(6) = op2.xmm16u(4) - op2.xmm16u(5);
|
||||
result.xmm16u(7) = op2.xmm16u(6) - op2.xmm16u(7);
|
||||
op1.xmm16u(4) = op2.xmm16u(0) - op2.xmm16u(1);
|
||||
op1.xmm16u(5) = op2.xmm16u(2) - op2.xmm16u(3);
|
||||
op1.xmm16u(6) = op2.xmm16u(4) - op2.xmm16u(5);
|
||||
op1.xmm16u(7) = op2.xmm16u(6) - op2.xmm16u(7);
|
||||
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
BX_WRITE_XMM_REG(i->nnn(), op1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -176,15 +171,14 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHSUBW_VdqWdqR(bxInstruction_c *i)
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PHSUBD_VdqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_CPU_LEVEL >= 6
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
|
||||
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm()), result;
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2 = BX_READ_XMM_REG(i->rm());
|
||||
|
||||
result.xmm32u(0) = op1.xmm32u(0) - op1.xmm32u(1);
|
||||
result.xmm32u(1) = op1.xmm32u(2) - op1.xmm32u(3);
|
||||
result.xmm32u(2) = op2.xmm32u(0) - op2.xmm32u(1);
|
||||
result.xmm32u(3) = op2.xmm32u(2) - op2.xmm32u(3);
|
||||
op1.xmm32u(0) = op1.xmm32u(0) - op1.xmm32u(1);
|
||||
op1.xmm32u(1) = op1.xmm32u(2) - op1.xmm32u(3);
|
||||
op1.xmm32u(2) = op2.xmm32u(0) - op2.xmm32u(1);
|
||||
op1.xmm32u(3) = op2.xmm32u(2) - op2.xmm32u(3);
|
||||
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
BX_WRITE_XMM_REG(i->nnn(), op1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -416,20 +410,20 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPEQQ_VdqWdqR(bxInstruction_c *i)
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PACKUSDW_VdqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_CPU_LEVEL >= 6
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
|
||||
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm()), result;
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2 = BX_READ_XMM_REG(i->rm());
|
||||
|
||||
result.xmm16u(0) = SaturateDwordSToWordU(op1.xmm32s(0));
|
||||
result.xmm16u(1) = SaturateDwordSToWordU(op1.xmm32s(1));
|
||||
result.xmm16u(2) = SaturateDwordSToWordU(op1.xmm32s(2));
|
||||
result.xmm16u(3) = SaturateDwordSToWordU(op1.xmm32s(3));
|
||||
result.xmm16u(4) = SaturateDwordSToWordU(op2.xmm32s(0));
|
||||
result.xmm16u(5) = SaturateDwordSToWordU(op2.xmm32s(1));
|
||||
result.xmm16u(6) = SaturateDwordSToWordU(op2.xmm32s(2));
|
||||
result.xmm16u(7) = SaturateDwordSToWordU(op2.xmm32s(3));
|
||||
op1.xmm16u(0) = SaturateDwordSToWordU(op1.xmm32s(0));
|
||||
op1.xmm16u(1) = SaturateDwordSToWordU(op1.xmm32s(1));
|
||||
op1.xmm16u(2) = SaturateDwordSToWordU(op1.xmm32s(2));
|
||||
op1.xmm16u(3) = SaturateDwordSToWordU(op1.xmm32s(3));
|
||||
|
||||
op1.xmm16u(4) = SaturateDwordSToWordU(op2.xmm32s(0));
|
||||
op1.xmm16u(5) = SaturateDwordSToWordU(op2.xmm32s(1));
|
||||
op1.xmm16u(6) = SaturateDwordSToWordU(op2.xmm32s(2));
|
||||
op1.xmm16u(7) = SaturateDwordSToWordU(op2.xmm32s(3));
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
BX_WRITE_XMM_REG(i->nnn(), op1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -992,29 +986,28 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::UNPCKLPS_VpsWdqR(bxInstruction_c *i)
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PACKSSWB_VdqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_CPU_LEVEL >= 6
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
|
||||
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm()), result;
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2 = BX_READ_XMM_REG(i->rm());
|
||||
|
||||
result.xmmsbyte(0x0) = SaturateWordSToByteS(op1.xmm16s(0));
|
||||
result.xmmsbyte(0x1) = SaturateWordSToByteS(op1.xmm16s(1));
|
||||
result.xmmsbyte(0x2) = SaturateWordSToByteS(op1.xmm16s(2));
|
||||
result.xmmsbyte(0x3) = SaturateWordSToByteS(op1.xmm16s(3));
|
||||
result.xmmsbyte(0x4) = SaturateWordSToByteS(op1.xmm16s(4));
|
||||
result.xmmsbyte(0x5) = SaturateWordSToByteS(op1.xmm16s(5));
|
||||
result.xmmsbyte(0x6) = SaturateWordSToByteS(op1.xmm16s(6));
|
||||
result.xmmsbyte(0x7) = SaturateWordSToByteS(op1.xmm16s(7));
|
||||
op1.xmmsbyte(0x0) = SaturateWordSToByteS(op1.xmm16s(0));
|
||||
op1.xmmsbyte(0x1) = SaturateWordSToByteS(op1.xmm16s(1));
|
||||
op1.xmmsbyte(0x2) = SaturateWordSToByteS(op1.xmm16s(2));
|
||||
op1.xmmsbyte(0x3) = SaturateWordSToByteS(op1.xmm16s(3));
|
||||
op1.xmmsbyte(0x4) = SaturateWordSToByteS(op1.xmm16s(4));
|
||||
op1.xmmsbyte(0x5) = SaturateWordSToByteS(op1.xmm16s(5));
|
||||
op1.xmmsbyte(0x6) = SaturateWordSToByteS(op1.xmm16s(6));
|
||||
op1.xmmsbyte(0x7) = SaturateWordSToByteS(op1.xmm16s(7));
|
||||
|
||||
result.xmmsbyte(0x8) = SaturateWordSToByteS(op2.xmm16s(0));
|
||||
result.xmmsbyte(0x9) = SaturateWordSToByteS(op2.xmm16s(1));
|
||||
result.xmmsbyte(0xA) = SaturateWordSToByteS(op2.xmm16s(2));
|
||||
result.xmmsbyte(0xB) = SaturateWordSToByteS(op2.xmm16s(3));
|
||||
result.xmmsbyte(0xC) = SaturateWordSToByteS(op2.xmm16s(4));
|
||||
result.xmmsbyte(0xD) = SaturateWordSToByteS(op2.xmm16s(5));
|
||||
result.xmmsbyte(0xE) = SaturateWordSToByteS(op2.xmm16s(6));
|
||||
result.xmmsbyte(0xF) = SaturateWordSToByteS(op2.xmm16s(7));
|
||||
op1.xmmsbyte(0x8) = SaturateWordSToByteS(op2.xmm16s(0));
|
||||
op1.xmmsbyte(0x9) = SaturateWordSToByteS(op2.xmm16s(1));
|
||||
op1.xmmsbyte(0xA) = SaturateWordSToByteS(op2.xmm16s(2));
|
||||
op1.xmmsbyte(0xB) = SaturateWordSToByteS(op2.xmm16s(3));
|
||||
op1.xmmsbyte(0xC) = SaturateWordSToByteS(op2.xmm16s(4));
|
||||
op1.xmmsbyte(0xD) = SaturateWordSToByteS(op2.xmm16s(5));
|
||||
op1.xmmsbyte(0xE) = SaturateWordSToByteS(op2.xmm16s(6));
|
||||
op1.xmmsbyte(0xF) = SaturateWordSToByteS(op2.xmm16s(7));
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
BX_WRITE_XMM_REG(i->nnn(), op1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1073,29 +1066,28 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPGTD_VdqWdqR(bxInstruction_c *i)
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PACKUSWB_VdqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_CPU_LEVEL >= 6
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
|
||||
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm()), result;
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2 = BX_READ_XMM_REG(i->rm());
|
||||
|
||||
result.xmmubyte(0x0) = SaturateWordSToByteU(op1.xmm16s(0));
|
||||
result.xmmubyte(0x1) = SaturateWordSToByteU(op1.xmm16s(1));
|
||||
result.xmmubyte(0x2) = SaturateWordSToByteU(op1.xmm16s(2));
|
||||
result.xmmubyte(0x3) = SaturateWordSToByteU(op1.xmm16s(3));
|
||||
result.xmmubyte(0x4) = SaturateWordSToByteU(op1.xmm16s(4));
|
||||
result.xmmubyte(0x5) = SaturateWordSToByteU(op1.xmm16s(5));
|
||||
result.xmmubyte(0x6) = SaturateWordSToByteU(op1.xmm16s(6));
|
||||
result.xmmubyte(0x7) = SaturateWordSToByteU(op1.xmm16s(7));
|
||||
op1.xmmubyte(0x0) = SaturateWordSToByteU(op1.xmm16s(0));
|
||||
op1.xmmubyte(0x1) = SaturateWordSToByteU(op1.xmm16s(1));
|
||||
op1.xmmubyte(0x2) = SaturateWordSToByteU(op1.xmm16s(2));
|
||||
op1.xmmubyte(0x3) = SaturateWordSToByteU(op1.xmm16s(3));
|
||||
op1.xmmubyte(0x4) = SaturateWordSToByteU(op1.xmm16s(4));
|
||||
op1.xmmubyte(0x5) = SaturateWordSToByteU(op1.xmm16s(5));
|
||||
op1.xmmubyte(0x6) = SaturateWordSToByteU(op1.xmm16s(6));
|
||||
op1.xmmubyte(0x7) = SaturateWordSToByteU(op1.xmm16s(7));
|
||||
|
||||
result.xmmubyte(0x8) = SaturateWordSToByteU(op2.xmm16s(0));
|
||||
result.xmmubyte(0x9) = SaturateWordSToByteU(op2.xmm16s(1));
|
||||
result.xmmubyte(0xA) = SaturateWordSToByteU(op2.xmm16s(2));
|
||||
result.xmmubyte(0xB) = SaturateWordSToByteU(op2.xmm16s(3));
|
||||
result.xmmubyte(0xC) = SaturateWordSToByteU(op2.xmm16s(4));
|
||||
result.xmmubyte(0xD) = SaturateWordSToByteU(op2.xmm16s(5));
|
||||
result.xmmubyte(0xE) = SaturateWordSToByteU(op2.xmm16s(6));
|
||||
result.xmmubyte(0xF) = SaturateWordSToByteU(op2.xmm16s(7));
|
||||
op1.xmmubyte(0x8) = SaturateWordSToByteU(op2.xmm16s(0));
|
||||
op1.xmmubyte(0x9) = SaturateWordSToByteU(op2.xmm16s(1));
|
||||
op1.xmmubyte(0xA) = SaturateWordSToByteU(op2.xmm16s(2));
|
||||
op1.xmmubyte(0xB) = SaturateWordSToByteU(op2.xmm16s(3));
|
||||
op1.xmmubyte(0xC) = SaturateWordSToByteU(op2.xmm16s(4));
|
||||
op1.xmmubyte(0xD) = SaturateWordSToByteU(op2.xmm16s(5));
|
||||
op1.xmmubyte(0xE) = SaturateWordSToByteU(op2.xmm16s(6));
|
||||
op1.xmmubyte(0xF) = SaturateWordSToByteU(op2.xmm16s(7));
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
BX_WRITE_XMM_REG(i->nnn(), op1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1508,21 +1500,21 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMULLW_VdqWdqR(bxInstruction_c *i)
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSUBUSB_VdqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_CPU_LEVEL >= 6
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
|
||||
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm()), result;
|
||||
|
||||
result.xmm64u(0) = result.xmm64u(1) = 0;
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2 = BX_READ_XMM_REG(i->rm());
|
||||
|
||||
for(unsigned j=0; j<16; j++)
|
||||
{
|
||||
if(op1.xmmubyte(j) > op2.xmmubyte(j))
|
||||
{
|
||||
result.xmmubyte(j) = op1.xmmubyte(j) - op2.xmmubyte(j);
|
||||
op1.xmmubyte(j) -= op2.xmmubyte(j);
|
||||
}
|
||||
else {
|
||||
op1.xmmubyte(j) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
BX_WRITE_XMM_REG(i->nnn(), op1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1530,21 +1522,21 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSUBUSB_VdqWdqR(bxInstruction_c *i)
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSUBUSW_VdqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_CPU_LEVEL >= 6
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
|
||||
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm()), result;
|
||||
|
||||
result.xmm64u(0) = result.xmm64u(1) = 0;
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2 = BX_READ_XMM_REG(i->rm());
|
||||
|
||||
for(unsigned j=0; j<8; j++)
|
||||
{
|
||||
if(op1.xmm16u(j) > op2.xmm16u(j))
|
||||
{
|
||||
result.xmm16u(j) = op1.xmm16u(j) - op2.xmm16u(j);
|
||||
op1.xmm16u(j) -= op2.xmm16u(j);
|
||||
}
|
||||
else {
|
||||
op1.xmm16u(j) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
BX_WRITE_XMM_REG(i->nnn(), op1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1664,38 +1656,37 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PAVGB_VdqWdqR(bxInstruction_c *i)
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRAW_VdqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_CPU_LEVEL >= 6
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
|
||||
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm()), result;
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2 = BX_READ_XMM_REG(i->rm());
|
||||
|
||||
if(op2.xmm64u(0) == 0) return;
|
||||
|
||||
if(op2.xmm64u(0) > 15) /* looking only to low 64 bits */
|
||||
{
|
||||
result.xmm16u(0) = (op1.xmm16u(0) & 0x8000) ? 0xffff : 0;
|
||||
result.xmm16u(1) = (op1.xmm16u(1) & 0x8000) ? 0xffff : 0;
|
||||
result.xmm16u(2) = (op1.xmm16u(2) & 0x8000) ? 0xffff : 0;
|
||||
result.xmm16u(3) = (op1.xmm16u(3) & 0x8000) ? 0xffff : 0;
|
||||
result.xmm16u(4) = (op1.xmm16u(4) & 0x8000) ? 0xffff : 0;
|
||||
result.xmm16u(5) = (op1.xmm16u(5) & 0x8000) ? 0xffff : 0;
|
||||
result.xmm16u(6) = (op1.xmm16u(6) & 0x8000) ? 0xffff : 0;
|
||||
result.xmm16u(7) = (op1.xmm16u(7) & 0x8000) ? 0xffff : 0;
|
||||
op1.xmm16u(0) = (op1.xmm16u(0) & 0x8000) ? 0xffff : 0;
|
||||
op1.xmm16u(1) = (op1.xmm16u(1) & 0x8000) ? 0xffff : 0;
|
||||
op1.xmm16u(2) = (op1.xmm16u(2) & 0x8000) ? 0xffff : 0;
|
||||
op1.xmm16u(3) = (op1.xmm16u(3) & 0x8000) ? 0xffff : 0;
|
||||
op1.xmm16u(4) = (op1.xmm16u(4) & 0x8000) ? 0xffff : 0;
|
||||
op1.xmm16u(5) = (op1.xmm16u(5) & 0x8000) ? 0xffff : 0;
|
||||
op1.xmm16u(6) = (op1.xmm16u(6) & 0x8000) ? 0xffff : 0;
|
||||
op1.xmm16u(7) = (op1.xmm16u(7) & 0x8000) ? 0xffff : 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
Bit8u shift = op2.xmmubyte(0);
|
||||
|
||||
result.xmm16u(0) = (Bit16u)(op1.xmm16s(0) >> shift);
|
||||
result.xmm16u(1) = (Bit16u)(op1.xmm16s(1) >> shift);
|
||||
result.xmm16u(2) = (Bit16u)(op1.xmm16s(2) >> shift);
|
||||
result.xmm16u(3) = (Bit16u)(op1.xmm16s(3) >> shift);
|
||||
result.xmm16u(4) = (Bit16u)(op1.xmm16s(4) >> shift);
|
||||
result.xmm16u(5) = (Bit16u)(op1.xmm16s(5) >> shift);
|
||||
result.xmm16u(6) = (Bit16u)(op1.xmm16s(6) >> shift);
|
||||
result.xmm16u(7) = (Bit16u)(op1.xmm16s(7) >> shift);
|
||||
op1.xmm16u(0) = (Bit16u)(op1.xmm16s(0) >> shift);
|
||||
op1.xmm16u(1) = (Bit16u)(op1.xmm16s(1) >> shift);
|
||||
op1.xmm16u(2) = (Bit16u)(op1.xmm16s(2) >> shift);
|
||||
op1.xmm16u(3) = (Bit16u)(op1.xmm16s(3) >> shift);
|
||||
op1.xmm16u(4) = (Bit16u)(op1.xmm16s(4) >> shift);
|
||||
op1.xmm16u(5) = (Bit16u)(op1.xmm16s(5) >> shift);
|
||||
op1.xmm16u(6) = (Bit16u)(op1.xmm16s(6) >> shift);
|
||||
op1.xmm16u(7) = (Bit16u)(op1.xmm16s(7) >> shift);
|
||||
}
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
BX_WRITE_XMM_REG(i->nnn(), op1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1703,30 +1694,29 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRAW_VdqWdqR(bxInstruction_c *i)
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PSRAD_VdqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_CPU_LEVEL >= 6
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn());
|
||||
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm()), result;
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2 = BX_READ_XMM_REG(i->rm());
|
||||
|
||||
if(op2.xmm64u(0) == 0) return;
|
||||
|
||||
if(op2.xmm64u(0) > 31) /* looking only to low 64 bits */
|
||||
{
|
||||
result.xmm32u(0) = (op1.xmm32u(0) & 0x80000000) ? 0xffffffff : 0;
|
||||
result.xmm32u(1) = (op1.xmm32u(1) & 0x80000000) ? 0xffffffff : 0;
|
||||
result.xmm32u(2) = (op1.xmm32u(2) & 0x80000000) ? 0xffffffff : 0;
|
||||
result.xmm32u(3) = (op1.xmm32u(3) & 0x80000000) ? 0xffffffff : 0;
|
||||
op1.xmm32u(0) = (op1.xmm32u(0) & 0x80000000) ? 0xffffffff : 0;
|
||||
op1.xmm32u(1) = (op1.xmm32u(1) & 0x80000000) ? 0xffffffff : 0;
|
||||
op1.xmm32u(2) = (op1.xmm32u(2) & 0x80000000) ? 0xffffffff : 0;
|
||||
op1.xmm32u(3) = (op1.xmm32u(3) & 0x80000000) ? 0xffffffff : 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
Bit8u shift = op2.xmmubyte(0);
|
||||
|
||||
result.xmm32u(0) = (Bit32u)(op1.xmm32s(0) >> shift);
|
||||
result.xmm32u(1) = (Bit32u)(op1.xmm32s(1) >> shift);
|
||||
result.xmm32u(2) = (Bit32u)(op1.xmm32s(2) >> shift);
|
||||
result.xmm32u(3) = (Bit32u)(op1.xmm32s(3) >> shift);
|
||||
op1.xmm32u(0) = (Bit32u)(op1.xmm32s(0) >> shift);
|
||||
op1.xmm32u(1) = (Bit32u)(op1.xmm32s(1) >> shift);
|
||||
op1.xmm32u(2) = (Bit32u)(op1.xmm32s(2) >> shift);
|
||||
op1.xmm32u(3) = (Bit32u)(op1.xmm32s(3) >> shift);
|
||||
}
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
BX_WRITE_XMM_REG(i->nnn(), op1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -2059,7 +2049,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMADDWD_VdqWdqR(bxInstruction_c *i)
|
||||
}
|
||||
else {
|
||||
result.xmm32u(j) =
|
||||
Bit32s(op1.xmm16s(2*j+0)) * Bit32s(op2.xmm16s(2*j+0)) +
|
||||
Bit32s(op1.xmm16s(2*j+0)) * Bit32s(op2.xmm16s(2*j)) +
|
||||
Bit32s(op1.xmm16s(2*j+1)) * Bit32s(op2.xmm16s(2*j+1));
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// $Id: sse_move.cc,v 1.130 2011-01-15 15:17:28 sshwarts Exp $
|
||||
// $Id: sse_move.cc,v 1.131 2011-01-16 20:42:28 sshwarts Exp $
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2003-2011 Stanislav Shwartsman
|
||||
@ -437,16 +437,6 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::MOVAPS_VpsWpsM(bxInstruction_c *i)
|
||||
#endif
|
||||
}
|
||||
|
||||
/* MOVAPS: 0F 29 */
|
||||
/* MOVAPD: 66 0F 29 */
|
||||
/* MOVDQA: F3 0F 7F */
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::MOVAPS_WpsVpsR(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_CPU_LEVEL >= 6
|
||||
BX_WRITE_XMM_REG(i->rm(), BX_READ_XMM_REG(i->nnn()));
|
||||
#endif
|
||||
}
|
||||
|
||||
/* MOVAPS: 0F 29 */
|
||||
/* MOVNTPS: 0F 2B */
|
||||
/* MOVNTPD: 66 0F 2B */
|
||||
@ -748,17 +738,6 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::MOVQ_VqWqM(bxInstruction_c *i)
|
||||
#endif
|
||||
}
|
||||
|
||||
/* 66 0F D6 */
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::MOVQ_WqVqR(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_CPU_LEVEL >= 6
|
||||
BxPackedXmmRegister op;
|
||||
op.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->nnn());
|
||||
op.xmm64u(1) = 0; /* zero-extension to 128 bit */
|
||||
BX_WRITE_XMM_REG(i->rm(), op);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* F2 0F D6 */
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::MOVDQ2Q_PqVRq(bxInstruction_c *i)
|
||||
{
|
||||
@ -854,16 +833,19 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMOVSXBW_VdqWqR(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_CPU_LEVEL >= 6
|
||||
BxPackedXmmRegister result;
|
||||
Bit64u val64 = BX_READ_XMM_REG_LO_QWORD(i->rm());
|
||||
BxPackedMmxRegister op;
|
||||
|
||||
result.xmm16u(0) = (Bit8s) (val64 & 0xFF);
|
||||
result.xmm16u(1) = (Bit8s) ((val64 >> 8) & 0xFF);
|
||||
result.xmm16u(2) = (Bit8s) ((val64 >> 16) & 0xFF);
|
||||
result.xmm16u(3) = (Bit8s) ((val64 >> 24) & 0xFF);
|
||||
result.xmm16u(4) = (Bit8s) ((val64 >> 32) & 0xFF);
|
||||
result.xmm16u(5) = (Bit8s) ((val64 >> 40) & 0xFF);
|
||||
result.xmm16u(6) = (Bit8s) ((val64 >> 48) & 0xFF);
|
||||
result.xmm16u(7) = (Bit8s) (val64 >> 56);
|
||||
// use MMX register as 64-bit value with convinient accessors
|
||||
MMXUQ(op) = BX_READ_XMM_REG_LO_QWORD(i->rm());
|
||||
|
||||
result.xmm16u(0) = MMXSB0(op);
|
||||
result.xmm16u(1) = MMXSB1(op);
|
||||
result.xmm16u(2) = MMXSB2(op);
|
||||
result.xmm16u(3) = MMXSB3(op);
|
||||
result.xmm16u(4) = MMXSB4(op);
|
||||
result.xmm16u(5) = MMXSB5(op);
|
||||
result.xmm16u(6) = MMXSB6(op);
|
||||
result.xmm16u(7) = MMXSB7(op);
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
@ -954,16 +936,19 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PMOVZXBW_VdqWqR(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_CPU_LEVEL >= 6
|
||||
BxPackedXmmRegister result;
|
||||
Bit64u val64 = BX_READ_XMM_REG_LO_QWORD(i->rm());
|
||||
BxPackedMmxRegister op;
|
||||
|
||||
result.xmm16u(0) = val64 & 0xFF;
|
||||
result.xmm16u(1) = (val64 >> 8) & 0xFF;
|
||||
result.xmm16u(2) = (val64 >> 16) & 0xFF;
|
||||
result.xmm16u(3) = (val64 >> 24) & 0xFF;
|
||||
result.xmm16u(4) = (val64 >> 32) & 0xFF;
|
||||
result.xmm16u(5) = (val64 >> 40) & 0xFF;
|
||||
result.xmm16u(6) = (val64 >> 48) & 0xFF;
|
||||
result.xmm16u(7) = val64 >> 56;
|
||||
// use MMX register as 64-bit value with convinient accessors
|
||||
MMXUQ(op) = BX_READ_XMM_REG_LO_QWORD(i->rm());
|
||||
|
||||
result.xmm16u(0) = MMXUB0(op);
|
||||
result.xmm16u(1) = MMXUB1(op);
|
||||
result.xmm16u(2) = MMXUB2(op);
|
||||
result.xmm16u(3) = MMXUB3(op);
|
||||
result.xmm16u(4) = MMXUB4(op);
|
||||
result.xmm16u(5) = MMXUB5(op);
|
||||
result.xmm16u(6) = MMXUB6(op);
|
||||
result.xmm16u(7) = MMXUB7(op);
|
||||
|
||||
/* now write result back to destination */
|
||||
BX_WRITE_XMM_REG(i->nnn(), result);
|
||||
|
Loading…
Reference in New Issue
Block a user