rellback part of commit with xmm register access interface changes - doesn't work for big endian hosts

This commit is contained in:
Stanislav Shwartsman 2013-09-16 19:10:42 +00:00
parent ec4990a380
commit 1cebe5f83d
5 changed files with 90 additions and 98 deletions

View File

@ -289,7 +289,10 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPBROADCASTB_VdqWb(bxInstruction_c
unsigned len = i->getVL();
BxPackedYmmRegister op;
simd_pbroadcastb(op.ymm_ubyteptr(), BX_READ_XMM_REG_LO_BYTE(i->src()), len*16);
Bit8u val_8 = BX_READ_XMM_REG_LO_BYTE(i->src());
for (unsigned n=0; n < len; n++)
sse_pbroadcastb(&op.ymm128(n), val_8);
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op, len);
@ -301,7 +304,10 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPBROADCASTW_VdqWw(bxInstruction_c
unsigned len = i->getVL();
BxPackedYmmRegister op;
simd_pbroadcastw(op.ymm_u16ptr(), BX_READ_XMM_REG_LO_WORD(i->src()), len*8);
Bit16u val_16 = BX_READ_XMM_REG_LO_WORD(i->src());
for (unsigned n=0; n < len; n++)
sse_pbroadcastw(&op.ymm128(n), val_16);
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op, len);
@ -313,7 +319,10 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPBROADCASTD_VdqWd(bxInstruction_c
unsigned len = i->getVL();
BxPackedYmmRegister op;
simd_pbroadcastd(op.ymm_u32ptr(), BX_READ_XMM_REG_LO_DWORD(i->src()), len*4);
Bit32u val_32 = BX_READ_XMM_REG_LO_DWORD(i->src());
for (unsigned n=0; n < len; n++)
sse_pbroadcastd(&op.ymm128(n), val_32);
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op, len);
@ -325,7 +334,10 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPBROADCASTQ_VdqWq(bxInstruction_c
unsigned len = i->getVL();
BxPackedYmmRegister op;
simd_pbroadcastq(op.ymm_u64ptr(), BX_READ_XMM_REG_LO_QWORD(i->src()), len*2);
Bit64u val_64 = BX_READ_XMM_REG_LO_QWORD(i->src());
for (unsigned n=0; n < len; n++)
sse_pbroadcastq(&op.ymm128(n), val_64);
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op, len);

View File

@ -176,7 +176,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_BROADCAST_VectorD(bxInstructi
if (i->getBroadcast()) {
Bit32u val_32 = read_virtual_dword(i->seg(), eaddr);
simd_pbroadcastd(BX_READ_AVX_REG(BX_VECTOR_TMP_REGISTER).vmm_u32ptr(), val_32, vl * 4);
simd_pbroadcastd(&BX_AVX_REG(BX_VECTOR_TMP_REGISTER), val_32, vl * 4);
}
else {
if (vl == BX_VL512)
@ -203,7 +203,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_BROADCAST_VectorQ(bxInstructi
if (i->getBroadcast()) {
Bit64u val_64 = read_virtual_qword(i->seg(), eaddr);
simd_pbroadcastq(BX_READ_AVX_REG(BX_VECTOR_TMP_REGISTER).vmm_u64ptr(), val_64, vl * 2);
simd_pbroadcastq(&BX_AVX_REG(BX_VECTOR_TMP_REGISTER), val_64, vl * 2);
}
else {
if (vl == BX_VL512)

View File

@ -819,34 +819,66 @@ BX_CPP_INLINE void sse_pmaddwd(BxPackedXmmRegister *op1, const BxPackedXmmRegist
// broadcast
BX_CPP_INLINE void simd_pbroadcastb(Bit8u *dst, Bit8u val_8, unsigned len)
BX_CPP_INLINE void sse_pbroadcastb(BxPackedXmmRegister *op, Bit8u val_8)
{
for(unsigned n=0; n < len; n++) {
dst[n] = val_8;
for(unsigned n=0; n<16; n++) {
op->xmmubyte(n) = val_8;
}
}
BX_CPP_INLINE void simd_pbroadcastw(Bit16u *dst, Bit16u val_16, unsigned len)
BX_CPP_INLINE void sse_pbroadcastw(BxPackedXmmRegister *op, Bit16u val_16)
{
for(unsigned n=0; n < len; n++) {
dst[n] = val_16;
for(unsigned n=0; n<8; n++) {
op->xmm16u(n) = val_16;
}
}
BX_CPP_INLINE void simd_pbroadcastd(Bit32u *dst, Bit32u val_32, unsigned len)
BX_CPP_INLINE void sse_pbroadcastd(BxPackedXmmRegister *op, Bit32u val_32)
{
for(unsigned n=0; n < len; n++) {
dst[n] = val_32;
for(unsigned n=0; n<4; n++) {
op->xmm32u(n) = val_32;
}
}
BX_CPP_INLINE void simd_pbroadcastq(Bit64u *dst, Bit64u val_64, unsigned len)
BX_CPP_INLINE void sse_pbroadcastq(BxPackedXmmRegister *op, Bit64u val_64)
{
for(unsigned n=0; n < len; n++) {
dst[n] = val_64;
for(unsigned n=0; n<2; n++) {
op->xmm64u(n) = val_64;
}
}
#if BX_SUPPORT_EVEX
BX_CPP_INLINE void simd_pbroadcastb(BxPackedZmmRegister *op, Bit8u val_8, unsigned len)
{
for(unsigned n=0; n < len; n++) {
op->vmmubyte(n) = val_8;
}
}
BX_CPP_INLINE void simd_pbroadcastw(BxPackedZmmRegister *op, Bit16u val_16, unsigned len)
{
for(unsigned n=0; n < len; n++) {
op->vmm16u(n) = val_16;
}
}
BX_CPP_INLINE void simd_pbroadcastd(BxPackedZmmRegister *op, Bit32u val_32, unsigned len)
{
for(unsigned n=0; n < len; n++) {
op->vmm32u(n) = val_32;
}
}
BX_CPP_INLINE void simd_pbroadcastq(BxPackedZmmRegister *op, Bit64u val_64, unsigned len)
{
for(unsigned n=0; n < len; n++) {
op->vmm64u(n) = val_64;
}
}
#endif
// sum of absolute differences (SAD)
BX_CPP_INLINE void sse_psadbw(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)

View File

@ -549,7 +549,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::MOVLPS_VpsMq(bxInstruction_c *i)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::MOVDDUP_VpdWqR(bxInstruction_c *i)
{
#if BX_CPU_LEVEL >= 6
simd_pbroadcastq(BX_XMM_REG(i->dst()).xmm_u64ptr(), BX_READ_XMM_REG_LO_QWORD(i->src()), 2);
sse_pbroadcastq(&BX_XMM_REG(i->dst()), BX_READ_XMM_REG_LO_QWORD(i->src()));
#endif
BX_NEXT_INSTR(i);

View File

@ -35,16 +35,6 @@ typedef union bx_xmm_reg_t {
Bit16u xmm_u16[8];
Bit32u xmm_u32[4];
Bit64u xmm_u64[2];
Bit8s* xmm_sbyteptr() { return &xmm_sbyte[0]; }
Bit16s* xmm_s16ptr() { return &xmm_s16[0]; }
Bit32s* xmm_s32ptr() { return &xmm_s32[0]; }
Bit64s* xmm_s64ptr() { return &xmm_s64[0]; }
Bit8u* xmm_ubyteptr() { return &xmm_ubyte[0]; }
Bit16u* xmm_u16ptr() { return &xmm_u16[0]; }
Bit32u* xmm_u32ptr() { return &xmm_u32[0]; }
Bit64u* xmm_u64ptr() { return &xmm_u64[0]; }
} BxPackedXmmRegister;
#ifdef BX_BIG_ENDIAN
@ -110,15 +100,6 @@ typedef union bx_ymm_reg_t {
Bit32u ymm_u32[8];
Bit64u ymm_u64[4];
BxPackedXmmRegister ymm_v128[2];
Bit8s* ymm_sbyteptr() { return &ymm_sbyte[0]; }
Bit16s* ymm_s16ptr() { return &ymm_s16[0]; }
Bit32s* ymm_s32ptr() { return &ymm_s32[0]; }
Bit64s* ymm_s64ptr() { return &ymm_s64[0]; }
Bit8u* ymm_ubyteptr() { return &ymm_ubyte[0]; }
Bit16u* ymm_u16ptr() { return &ymm_u16[0]; }
Bit32u* ymm_u32ptr() { return &ymm_u32[0]; }
Bit64u* ymm_u64ptr() { return &ymm_u64[0]; }
} BxPackedYmmRegister;
#ifdef BX_BIG_ENDIAN
@ -158,15 +139,6 @@ typedef union bx_zmm_reg_t {
Bit64u zmm_u64[8];
BxPackedXmmRegister zmm_v128[4];
BxPackedYmmRegister zmm_v256[2];
Bit8s* zmm_sbyteptr() { return &zmm_sbyte[0]; }
Bit16s* zmm_s16ptr() { return &zmm_s16[0]; }
Bit32s* zmm_s32ptr() { return &zmm_s32[0]; }
Bit64s* zmm_s64ptr() { return &zmm_s64[0]; }
Bit8u* zmm_ubyteptr() { return &zmm_ubyte[0]; }
Bit16u* zmm_u16ptr() { return &zmm_u16[0]; }
Bit32u* zmm_u32ptr() { return &zmm_u32[0]; }
Bit64u* zmm_u64ptr() { return &zmm_u64[0]; }
} BxPackedZmmRegister;
#ifdef BX_BIG_ENDIAN
@ -196,60 +168,36 @@ typedef union bx_zmm_reg_t {
#endif
#if BX_SUPPORT_EVEX
# define vmm64s(i) zmm64s(i)
# define vmm32s(i) zmm64s(i)
# define vmm16s(i) zmm16s(i)
# define vmmsbyte(i) zmmsbyte(i)
# define vmmubyte(i) zmmubyte(i)
# define vmm16u(i) zmm16u(i)
# define vmm32u(i) zmm32u(i)
# define vmm64u(i) zmm64u(i)
# define vmm128(i) zmm128(i)
# define vmm256(i) zmm256(i)
# define vmm_ubyteptr() zmm_ubyteptr()
# define vmm_sbyteptr() zmm_sbyteptr()
# define vmm_u16ptr() zmm_u16ptr()
# define vmm_s16ptr() zmm_s16ptr()
# define vmm_u32ptr() zmm_u32ptr()
# define vmm_s32ptr() zmm_s32ptr()
# define vmm_u64ptr() zmm_u64ptr()
# define vmm_s64ptr() zmm_s64ptr()
# define vmm64s(i) zmm64s(i)
# define vmm32s(i) zmm64s(i)
# define vmm16s(i) zmm16s(i)
# define vmmsbyte(i) zmmsbyte(i)
# define vmmubyte(i) zmmubyte(i)
# define vmm16u(i) zmm16u(i)
# define vmm32u(i) zmm32u(i)
# define vmm64u(i) zmm64u(i)
# define vmm128(i) zmm128(i)
# define vmm256(i) zmm256(i)
#else
# if BX_SUPPORT_AVX
# define vmm64s(i) ymm64s(i)
# define vmm32s(i) ymm64s(i)
# define vmm16s(i) ymm16s(i)
# define vmmsbyte(i) ymmsbyte(i)
# define vmmubyte(i) ymmubyte(i)
# define vmm16u(i) ymm16u(i)
# define vmm32u(i) ymm32u(i)
# define vmm64u(i) ymm64u(i)
# define vmm128(i) ymm128(i)
# define vmm_ubyteptr() ymm_ubyteptr()
# define vmm_sbyteptr() ymm_sbyteptr()
# define vmm_u16ptr() ymm_u16ptr()
# define vmm_s16ptr() ymm_s16ptr()
# define vmm_u32ptr() ymm_u32ptr()
# define vmm_s32ptr() ymm_s32ptr()
# define vmm_u64ptr() ymm_u64ptr()
# define vmm_s64ptr() ymm_s64ptr()
# define vmm64s(i) ymm64s(i)
# define vmm32s(i) ymm64s(i)
# define vmm16s(i) ymm16s(i)
# define vmmsbyte(i) ymmsbyte(i)
# define vmmubyte(i) ymmubyte(i)
# define vmm16u(i) ymm16u(i)
# define vmm32u(i) ymm32u(i)
# define vmm64u(i) ymm64u(i)
# define vmm128(i) ymm128(i)
# else
# define vmm64s(i) xmm64s(i)
# define vmm32s(i) xmm64s(i)
# define vmm16s(i) xmm16s(i)
# define vmmsbyte(i) xmmsbyte(i)
# define vmmubyte(i) xmmubyte(i)
# define vmm16u(i) xmm16u(i)
# define vmm32u(i) xmm32u(i)
# define vmm64u(i) xmm64u(i)
# define vmm_ubyteptr() xmm_ubyteptr()
# define vmm_sbyteptr() xmm_sbyteptr()
# define vmm_u16ptr() xmm_u16ptr()
# define vmm_s16ptr() xmm_s16ptr()
# define vmm_u32ptr() xmm_u32ptr()
# define vmm_s32ptr() xmm_s32ptr()
# define vmm_u64ptr() xmm_u64ptr()
# define vmm_s64ptr() xmm_s64ptr()
# define vmm64s(i) xmm64s(i)
# define vmm32s(i) xmm64s(i)
# define vmm16s(i) xmm16s(i)
# define vmmsbyte(i) xmmsbyte(i)
# define vmmubyte(i) xmmubyte(i)
# define vmm16u(i) xmm16u(i)
# define vmm32u(i) xmm32u(i)
# define vmm64u(i) xmm64u(i)
# endif
#endif