rellback part of commit with xmm register access interface changes - doesn't work for big endian hosts
This commit is contained in:
parent
ec4990a380
commit
1cebe5f83d
@ -289,7 +289,10 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPBROADCASTB_VdqWb(bxInstruction_c
|
||||
unsigned len = i->getVL();
|
||||
BxPackedYmmRegister op;
|
||||
|
||||
simd_pbroadcastb(op.ymm_ubyteptr(), BX_READ_XMM_REG_LO_BYTE(i->src()), len*16);
|
||||
Bit8u val_8 = BX_READ_XMM_REG_LO_BYTE(i->src());
|
||||
|
||||
for (unsigned n=0; n < len; n++)
|
||||
sse_pbroadcastb(&op.ymm128(n), val_8);
|
||||
|
||||
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op, len);
|
||||
|
||||
@ -301,7 +304,10 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPBROADCASTW_VdqWw(bxInstruction_c
|
||||
unsigned len = i->getVL();
|
||||
BxPackedYmmRegister op;
|
||||
|
||||
simd_pbroadcastw(op.ymm_u16ptr(), BX_READ_XMM_REG_LO_WORD(i->src()), len*8);
|
||||
Bit16u val_16 = BX_READ_XMM_REG_LO_WORD(i->src());
|
||||
|
||||
for (unsigned n=0; n < len; n++)
|
||||
sse_pbroadcastw(&op.ymm128(n), val_16);
|
||||
|
||||
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op, len);
|
||||
|
||||
@ -313,7 +319,10 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPBROADCASTD_VdqWd(bxInstruction_c
|
||||
unsigned len = i->getVL();
|
||||
BxPackedYmmRegister op;
|
||||
|
||||
simd_pbroadcastd(op.ymm_u32ptr(), BX_READ_XMM_REG_LO_DWORD(i->src()), len*4);
|
||||
Bit32u val_32 = BX_READ_XMM_REG_LO_DWORD(i->src());
|
||||
|
||||
for (unsigned n=0; n < len; n++)
|
||||
sse_pbroadcastd(&op.ymm128(n), val_32);
|
||||
|
||||
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op, len);
|
||||
|
||||
@ -325,7 +334,10 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPBROADCASTQ_VdqWq(bxInstruction_c
|
||||
unsigned len = i->getVL();
|
||||
BxPackedYmmRegister op;
|
||||
|
||||
simd_pbroadcastq(op.ymm_u64ptr(), BX_READ_XMM_REG_LO_QWORD(i->src()), len*2);
|
||||
Bit64u val_64 = BX_READ_XMM_REG_LO_QWORD(i->src());
|
||||
|
||||
for (unsigned n=0; n < len; n++)
|
||||
sse_pbroadcastq(&op.ymm128(n), val_64);
|
||||
|
||||
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op, len);
|
||||
|
||||
|
@ -176,7 +176,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_BROADCAST_VectorD(bxInstructi
|
||||
|
||||
if (i->getBroadcast()) {
|
||||
Bit32u val_32 = read_virtual_dword(i->seg(), eaddr);
|
||||
simd_pbroadcastd(BX_READ_AVX_REG(BX_VECTOR_TMP_REGISTER).vmm_u32ptr(), val_32, vl * 4);
|
||||
simd_pbroadcastd(&BX_AVX_REG(BX_VECTOR_TMP_REGISTER), val_32, vl * 4);
|
||||
}
|
||||
else {
|
||||
if (vl == BX_VL512)
|
||||
@ -203,7 +203,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_BROADCAST_VectorQ(bxInstructi
|
||||
|
||||
if (i->getBroadcast()) {
|
||||
Bit64u val_64 = read_virtual_qword(i->seg(), eaddr);
|
||||
simd_pbroadcastq(BX_READ_AVX_REG(BX_VECTOR_TMP_REGISTER).vmm_u64ptr(), val_64, vl * 2);
|
||||
simd_pbroadcastq(&BX_AVX_REG(BX_VECTOR_TMP_REGISTER), val_64, vl * 2);
|
||||
}
|
||||
else {
|
||||
if (vl == BX_VL512)
|
||||
|
@ -819,34 +819,66 @@ BX_CPP_INLINE void sse_pmaddwd(BxPackedXmmRegister *op1, const BxPackedXmmRegist
|
||||
|
||||
// broadcast
|
||||
|
||||
BX_CPP_INLINE void simd_pbroadcastb(Bit8u *dst, Bit8u val_8, unsigned len)
|
||||
BX_CPP_INLINE void sse_pbroadcastb(BxPackedXmmRegister *op, Bit8u val_8)
|
||||
{
|
||||
for(unsigned n=0; n < len; n++) {
|
||||
dst[n] = val_8;
|
||||
for(unsigned n=0; n<16; n++) {
|
||||
op->xmmubyte(n) = val_8;
|
||||
}
|
||||
}
|
||||
|
||||
BX_CPP_INLINE void simd_pbroadcastw(Bit16u *dst, Bit16u val_16, unsigned len)
|
||||
BX_CPP_INLINE void sse_pbroadcastw(BxPackedXmmRegister *op, Bit16u val_16)
|
||||
{
|
||||
for(unsigned n=0; n < len; n++) {
|
||||
dst[n] = val_16;
|
||||
for(unsigned n=0; n<8; n++) {
|
||||
op->xmm16u(n) = val_16;
|
||||
}
|
||||
}
|
||||
|
||||
BX_CPP_INLINE void simd_pbroadcastd(Bit32u *dst, Bit32u val_32, unsigned len)
|
||||
BX_CPP_INLINE void sse_pbroadcastd(BxPackedXmmRegister *op, Bit32u val_32)
|
||||
{
|
||||
for(unsigned n=0; n < len; n++) {
|
||||
dst[n] = val_32;
|
||||
for(unsigned n=0; n<4; n++) {
|
||||
op->xmm32u(n) = val_32;
|
||||
}
|
||||
}
|
||||
|
||||
BX_CPP_INLINE void simd_pbroadcastq(Bit64u *dst, Bit64u val_64, unsigned len)
|
||||
BX_CPP_INLINE void sse_pbroadcastq(BxPackedXmmRegister *op, Bit64u val_64)
|
||||
{
|
||||
for(unsigned n=0; n < len; n++) {
|
||||
dst[n] = val_64;
|
||||
for(unsigned n=0; n<2; n++) {
|
||||
op->xmm64u(n) = val_64;
|
||||
}
|
||||
}
|
||||
|
||||
#if BX_SUPPORT_EVEX
|
||||
|
||||
BX_CPP_INLINE void simd_pbroadcastb(BxPackedZmmRegister *op, Bit8u val_8, unsigned len)
|
||||
{
|
||||
for(unsigned n=0; n < len; n++) {
|
||||
op->vmmubyte(n) = val_8;
|
||||
}
|
||||
}
|
||||
|
||||
BX_CPP_INLINE void simd_pbroadcastw(BxPackedZmmRegister *op, Bit16u val_16, unsigned len)
|
||||
{
|
||||
for(unsigned n=0; n < len; n++) {
|
||||
op->vmm16u(n) = val_16;
|
||||
}
|
||||
}
|
||||
|
||||
BX_CPP_INLINE void simd_pbroadcastd(BxPackedZmmRegister *op, Bit32u val_32, unsigned len)
|
||||
{
|
||||
for(unsigned n=0; n < len; n++) {
|
||||
op->vmm32u(n) = val_32;
|
||||
}
|
||||
}
|
||||
|
||||
BX_CPP_INLINE void simd_pbroadcastq(BxPackedZmmRegister *op, Bit64u val_64, unsigned len)
|
||||
{
|
||||
for(unsigned n=0; n < len; n++) {
|
||||
op->vmm64u(n) = val_64;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// sum of absolute differences (SAD)
|
||||
|
||||
BX_CPP_INLINE void sse_psadbw(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
|
||||
|
@ -549,7 +549,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::MOVLPS_VpsMq(bxInstruction_c *i)
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::MOVDDUP_VpdWqR(bxInstruction_c *i)
|
||||
{
|
||||
#if BX_CPU_LEVEL >= 6
|
||||
simd_pbroadcastq(BX_XMM_REG(i->dst()).xmm_u64ptr(), BX_READ_XMM_REG_LO_QWORD(i->src()), 2);
|
||||
sse_pbroadcastq(&BX_XMM_REG(i->dst()), BX_READ_XMM_REG_LO_QWORD(i->src()));
|
||||
#endif
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
|
106
bochs/cpu/xmm.h
106
bochs/cpu/xmm.h
@ -35,16 +35,6 @@ typedef union bx_xmm_reg_t {
|
||||
Bit16u xmm_u16[8];
|
||||
Bit32u xmm_u32[4];
|
||||
Bit64u xmm_u64[2];
|
||||
|
||||
Bit8s* xmm_sbyteptr() { return &xmm_sbyte[0]; }
|
||||
Bit16s* xmm_s16ptr() { return &xmm_s16[0]; }
|
||||
Bit32s* xmm_s32ptr() { return &xmm_s32[0]; }
|
||||
Bit64s* xmm_s64ptr() { return &xmm_s64[0]; }
|
||||
Bit8u* xmm_ubyteptr() { return &xmm_ubyte[0]; }
|
||||
Bit16u* xmm_u16ptr() { return &xmm_u16[0]; }
|
||||
Bit32u* xmm_u32ptr() { return &xmm_u32[0]; }
|
||||
Bit64u* xmm_u64ptr() { return &xmm_u64[0]; }
|
||||
|
||||
} BxPackedXmmRegister;
|
||||
|
||||
#ifdef BX_BIG_ENDIAN
|
||||
@ -110,15 +100,6 @@ typedef union bx_ymm_reg_t {
|
||||
Bit32u ymm_u32[8];
|
||||
Bit64u ymm_u64[4];
|
||||
BxPackedXmmRegister ymm_v128[2];
|
||||
|
||||
Bit8s* ymm_sbyteptr() { return &ymm_sbyte[0]; }
|
||||
Bit16s* ymm_s16ptr() { return &ymm_s16[0]; }
|
||||
Bit32s* ymm_s32ptr() { return &ymm_s32[0]; }
|
||||
Bit64s* ymm_s64ptr() { return &ymm_s64[0]; }
|
||||
Bit8u* ymm_ubyteptr() { return &ymm_ubyte[0]; }
|
||||
Bit16u* ymm_u16ptr() { return &ymm_u16[0]; }
|
||||
Bit32u* ymm_u32ptr() { return &ymm_u32[0]; }
|
||||
Bit64u* ymm_u64ptr() { return &ymm_u64[0]; }
|
||||
} BxPackedYmmRegister;
|
||||
|
||||
#ifdef BX_BIG_ENDIAN
|
||||
@ -158,15 +139,6 @@ typedef union bx_zmm_reg_t {
|
||||
Bit64u zmm_u64[8];
|
||||
BxPackedXmmRegister zmm_v128[4];
|
||||
BxPackedYmmRegister zmm_v256[2];
|
||||
|
||||
Bit8s* zmm_sbyteptr() { return &zmm_sbyte[0]; }
|
||||
Bit16s* zmm_s16ptr() { return &zmm_s16[0]; }
|
||||
Bit32s* zmm_s32ptr() { return &zmm_s32[0]; }
|
||||
Bit64s* zmm_s64ptr() { return &zmm_s64[0]; }
|
||||
Bit8u* zmm_ubyteptr() { return &zmm_ubyte[0]; }
|
||||
Bit16u* zmm_u16ptr() { return &zmm_u16[0]; }
|
||||
Bit32u* zmm_u32ptr() { return &zmm_u32[0]; }
|
||||
Bit64u* zmm_u64ptr() { return &zmm_u64[0]; }
|
||||
} BxPackedZmmRegister;
|
||||
|
||||
#ifdef BX_BIG_ENDIAN
|
||||
@ -196,60 +168,36 @@ typedef union bx_zmm_reg_t {
|
||||
#endif
|
||||
|
||||
#if BX_SUPPORT_EVEX
|
||||
# define vmm64s(i) zmm64s(i)
|
||||
# define vmm32s(i) zmm64s(i)
|
||||
# define vmm16s(i) zmm16s(i)
|
||||
# define vmmsbyte(i) zmmsbyte(i)
|
||||
# define vmmubyte(i) zmmubyte(i)
|
||||
# define vmm16u(i) zmm16u(i)
|
||||
# define vmm32u(i) zmm32u(i)
|
||||
# define vmm64u(i) zmm64u(i)
|
||||
# define vmm128(i) zmm128(i)
|
||||
# define vmm256(i) zmm256(i)
|
||||
# define vmm_ubyteptr() zmm_ubyteptr()
|
||||
# define vmm_sbyteptr() zmm_sbyteptr()
|
||||
# define vmm_u16ptr() zmm_u16ptr()
|
||||
# define vmm_s16ptr() zmm_s16ptr()
|
||||
# define vmm_u32ptr() zmm_u32ptr()
|
||||
# define vmm_s32ptr() zmm_s32ptr()
|
||||
# define vmm_u64ptr() zmm_u64ptr()
|
||||
# define vmm_s64ptr() zmm_s64ptr()
|
||||
# define vmm64s(i) zmm64s(i)
|
||||
# define vmm32s(i) zmm64s(i)
|
||||
# define vmm16s(i) zmm16s(i)
|
||||
# define vmmsbyte(i) zmmsbyte(i)
|
||||
# define vmmubyte(i) zmmubyte(i)
|
||||
# define vmm16u(i) zmm16u(i)
|
||||
# define vmm32u(i) zmm32u(i)
|
||||
# define vmm64u(i) zmm64u(i)
|
||||
# define vmm128(i) zmm128(i)
|
||||
# define vmm256(i) zmm256(i)
|
||||
#else
|
||||
# if BX_SUPPORT_AVX
|
||||
# define vmm64s(i) ymm64s(i)
|
||||
# define vmm32s(i) ymm64s(i)
|
||||
# define vmm16s(i) ymm16s(i)
|
||||
# define vmmsbyte(i) ymmsbyte(i)
|
||||
# define vmmubyte(i) ymmubyte(i)
|
||||
# define vmm16u(i) ymm16u(i)
|
||||
# define vmm32u(i) ymm32u(i)
|
||||
# define vmm64u(i) ymm64u(i)
|
||||
# define vmm128(i) ymm128(i)
|
||||
# define vmm_ubyteptr() ymm_ubyteptr()
|
||||
# define vmm_sbyteptr() ymm_sbyteptr()
|
||||
# define vmm_u16ptr() ymm_u16ptr()
|
||||
# define vmm_s16ptr() ymm_s16ptr()
|
||||
# define vmm_u32ptr() ymm_u32ptr()
|
||||
# define vmm_s32ptr() ymm_s32ptr()
|
||||
# define vmm_u64ptr() ymm_u64ptr()
|
||||
# define vmm_s64ptr() ymm_s64ptr()
|
||||
# define vmm64s(i) ymm64s(i)
|
||||
# define vmm32s(i) ymm64s(i)
|
||||
# define vmm16s(i) ymm16s(i)
|
||||
# define vmmsbyte(i) ymmsbyte(i)
|
||||
# define vmmubyte(i) ymmubyte(i)
|
||||
# define vmm16u(i) ymm16u(i)
|
||||
# define vmm32u(i) ymm32u(i)
|
||||
# define vmm64u(i) ymm64u(i)
|
||||
# define vmm128(i) ymm128(i)
|
||||
# else
|
||||
# define vmm64s(i) xmm64s(i)
|
||||
# define vmm32s(i) xmm64s(i)
|
||||
# define vmm16s(i) xmm16s(i)
|
||||
# define vmmsbyte(i) xmmsbyte(i)
|
||||
# define vmmubyte(i) xmmubyte(i)
|
||||
# define vmm16u(i) xmm16u(i)
|
||||
# define vmm32u(i) xmm32u(i)
|
||||
# define vmm64u(i) xmm64u(i)
|
||||
# define vmm_ubyteptr() xmm_ubyteptr()
|
||||
# define vmm_sbyteptr() xmm_sbyteptr()
|
||||
# define vmm_u16ptr() xmm_u16ptr()
|
||||
# define vmm_s16ptr() xmm_s16ptr()
|
||||
# define vmm_u32ptr() xmm_u32ptr()
|
||||
# define vmm_s32ptr() xmm_s32ptr()
|
||||
# define vmm_u64ptr() xmm_u64ptr()
|
||||
# define vmm_s64ptr() xmm_s64ptr()
|
||||
# define vmm64s(i) xmm64s(i)
|
||||
# define vmm32s(i) xmm64s(i)
|
||||
# define vmm16s(i) xmm16s(i)
|
||||
# define vmmsbyte(i) xmmsbyte(i)
|
||||
# define vmmubyte(i) xmmubyte(i)
|
||||
# define vmm16u(i) xmm16u(i)
|
||||
# define vmm32u(i) xmm32u(i)
|
||||
# define vmm64u(i) xmm64u(i)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user