next step new methods for VEXPANDPS_MASK_VpsWpsR and VEXPANDPD_MASK_VpdWpdR with mask and memory operand will be introduced with fault suppression support legacy load methods aren't good because the loads here actually read POPCNT(mask) elements from memory src
This commit is contained in:
parent
1a420ddf8b
commit
96b887a9bb
@ -1859,120 +1859,96 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::VBLENDMPD_MASK_VpdHpdWpdR(bxInstruction_c
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPEXPANDB_MASK_VdqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
|
||||
if (i->isZeroMasking())
|
||||
result.clear();
|
||||
else
|
||||
result = BX_READ_AVX_REG(i->dst());
|
||||
|
||||
Bit64u opmask = BX_READ_OPMASK(i->opmask()), mask = opmask;
|
||||
unsigned len = i->getVL(), n = 0, k = 0;
|
||||
Bit64u opmask = BX_READ_OPMASK(i->opmask());
|
||||
unsigned len = i->getVL(), k = 0;
|
||||
|
||||
for (; n < len*16; n++, mask >>= 1) {
|
||||
if (mask & 0x1) {
|
||||
for (unsigned n = 0; n < len*16; n++, opmask >>= 1) {
|
||||
if (! opmask) break;
|
||||
|
||||
if (opmask & 0x1) {
|
||||
result.vmmubyte(n) = op.vmmubyte(k);
|
||||
k++;
|
||||
}
|
||||
else {
|
||||
result.vmmubyte(n) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (i->isZeroMasking()) {
|
||||
BX_WRITE_AVX_REGZ(i->dst(), result, len);
|
||||
}
|
||||
else {
|
||||
for (unsigned n=0; n < len; n++, opmask >>= 16)
|
||||
xmm_pblendb(&BX_READ_AVX_REG_LANE(i->dst(), n), &result.vmm128(n), opmask);
|
||||
|
||||
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
||||
}
|
||||
|
||||
BX_WRITE_AVX_REGZ(i->dst(), result, len);
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPEXPANDW_MASK_VdqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
|
||||
if (i->isZeroMasking())
|
||||
result.clear();
|
||||
else
|
||||
result = BX_READ_AVX_REG(i->dst());
|
||||
|
||||
Bit32u opmask = BX_READ_32BIT_OPMASK(i->opmask()), mask = opmask;
|
||||
unsigned len = i->getVL(), n = 0, k = 0;
|
||||
Bit32u opmask = BX_READ_32BIT_OPMASK(i->opmask());
|
||||
unsigned len = i->getVL(), k = 0;
|
||||
|
||||
for (; n < len*8; n++, mask >>= 1) {
|
||||
if (mask & 0x1) {
|
||||
for (unsigned n = 0; n < len*8; n++, opmask >>= 1) {
|
||||
if (! opmask) break;
|
||||
|
||||
if (opmask & 0x1) {
|
||||
result.vmm16u(n) = op.vmm16u(k);
|
||||
k++;
|
||||
}
|
||||
else {
|
||||
result.vmm16u(n) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (i->isZeroMasking()) {
|
||||
BX_WRITE_AVX_REGZ(i->dst(), result, len);
|
||||
}
|
||||
else {
|
||||
for (unsigned n=0; n < len; n++, opmask >>= 8)
|
||||
xmm_pblendw(&BX_READ_AVX_REG_LANE(i->dst(), n), &result.vmm128(n), opmask);
|
||||
|
||||
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
||||
}
|
||||
|
||||
BX_WRITE_AVX_REGZ(i->dst(), result, len);
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::VEXPANDPS_MASK_VpsWpsR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
|
||||
if (i->isZeroMasking())
|
||||
result.clear();
|
||||
else
|
||||
result = BX_READ_AVX_REG(i->dst());
|
||||
|
||||
Bit32u opmask = BX_READ_16BIT_OPMASK(i->opmask()), mask = opmask;
|
||||
unsigned len = i->getVL(), n = 0, k = 0;
|
||||
Bit32u opmask = BX_READ_16BIT_OPMASK(i->opmask());
|
||||
unsigned len = i->getVL(), k = 0;
|
||||
|
||||
for (; n < len*4; n++, mask >>= 1) {
|
||||
if (mask & 0x1) {
|
||||
for (unsigned n = 0; n < len*4; n++, opmask >>= 1) {
|
||||
if (! opmask) break;
|
||||
|
||||
if (opmask & 0x1) {
|
||||
result.vmm32u(n) = op.vmm32u(k);
|
||||
k++;
|
||||
}
|
||||
else {
|
||||
result.vmm32u(n) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (i->isZeroMasking()) {
|
||||
BX_WRITE_AVX_REGZ(i->dst(), result, len);
|
||||
}
|
||||
else {
|
||||
for (unsigned n=0; n < len; n++, opmask >>= 4)
|
||||
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &result.vmm128(n), opmask);
|
||||
|
||||
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
||||
}
|
||||
|
||||
BX_WRITE_AVX_REGZ(i->dst(), result, len);
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::VEXPANDPD_MASK_VpdWpdR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
|
||||
if (i->isZeroMasking())
|
||||
result.clear();
|
||||
else
|
||||
result = BX_READ_AVX_REG(i->dst());
|
||||
|
||||
Bit32u opmask = BX_READ_8BIT_OPMASK(i->opmask()), mask = opmask;
|
||||
unsigned len = i->getVL(), n = 0, k = 0;
|
||||
Bit32u opmask = BX_READ_8BIT_OPMASK(i->opmask());
|
||||
unsigned len = i->getVL(), k = 0;
|
||||
|
||||
for (; n < len*2; n++, mask >>= 1) {
|
||||
if (mask & 0x1) {
|
||||
for (unsigned n = 0; n < len*2; n++, opmask >>= 1) {
|
||||
if (! opmask) break;
|
||||
|
||||
if (opmask & 0x1) {
|
||||
result.vmm64u(n) = op.vmm64u(k);
|
||||
k++;
|
||||
}
|
||||
else {
|
||||
result.vmm64u(n) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (i->isZeroMasking()) {
|
||||
BX_WRITE_AVX_REGZ(i->dst(), result, len);
|
||||
}
|
||||
else {
|
||||
for (unsigned n=0; n < len; n++, opmask >>= 2)
|
||||
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &result.vmm128(n), opmask);
|
||||
|
||||
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
||||
}
|
||||
|
||||
BX_WRITE_AVX_REGZ(i->dst(), result, len);
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
@ -1984,11 +1960,12 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMPRESSB_MASK_WdqVdq(bxInstruction_c *i
|
||||
unsigned len = i->getVL(), n = 0, k = 0;
|
||||
|
||||
for (; n < len*16; n++, opmask >>= 1) {
|
||||
if (! opmask) break;
|
||||
|
||||
if (opmask & 0x1) {
|
||||
result.vmmubyte(k) = op.vmmubyte(n);
|
||||
k++;
|
||||
}
|
||||
if (! opmask) break;
|
||||
}
|
||||
|
||||
Bit64u writemask = (BX_CONST64(1) << k) - 1;
|
||||
@ -2012,11 +1989,12 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMPRESSW_MASK_WdqVdq(bxInstruction_c *i
|
||||
unsigned len = i->getVL(), n = 0, k = 0;
|
||||
|
||||
for (; n < len*8; n++, opmask >>= 1) {
|
||||
if (! opmask) break;
|
||||
|
||||
if (opmask & 0x1) {
|
||||
result.vmm16u(k) = op.vmm16u(n);
|
||||
k++;
|
||||
}
|
||||
if (! opmask) break;
|
||||
}
|
||||
|
||||
Bit32u writemask = (1 << k) - 1;
|
||||
@ -2040,11 +2018,12 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCOMPRESSPS_MASK_WpsVps(bxInstruction_c *i
|
||||
unsigned len = i->getVL(), n = 0, k = 0;
|
||||
|
||||
for (; n < len*4; n++, opmask >>= 1) {
|
||||
if (! opmask) break;
|
||||
|
||||
if (opmask & 0x1) {
|
||||
result.vmm32u(k) = op.vmm32u(n);
|
||||
k++;
|
||||
}
|
||||
if (! opmask) break;
|
||||
}
|
||||
|
||||
Bit32u writemask = (1 << k) - 1;
|
||||
@ -2068,11 +2047,12 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCOMPRESSPD_MASK_WpdVpd(bxInstruction_c *i
|
||||
unsigned len = i->getVL(), n = 0, k = 0;
|
||||
|
||||
for (; n < len*2; n++, opmask >>= 1) {
|
||||
if (! opmask) break;
|
||||
|
||||
if (opmask & 0x1) {
|
||||
result.vmm64u(k) = op.vmm64u(n);
|
||||
k++;
|
||||
}
|
||||
if (! opmask) break;
|
||||
}
|
||||
|
||||
Bit32u writemask = (1 << k) - 1;
|
||||
|
@ -395,54 +395,44 @@ AVX512_CVT32_TO_64(VCVTTPS2UQQ_MASK_VdqWpsR, float32_to_uint64_round_to_zero)
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUDQ2PD_MASK_VpdWdqR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
|
||||
BxPackedAvxRegister result;
|
||||
unsigned len = i->getVL();
|
||||
|
||||
BxPackedAvxRegister result;
|
||||
if (i->isZeroMasking())
|
||||
result.clear();
|
||||
else
|
||||
result = BX_READ_AVX_REG(i->dst());
|
||||
|
||||
Bit32u opmask = BX_READ_8BIT_OPMASK(i->opmask());
|
||||
|
||||
for (unsigned n=0, mask = 0x1; n < QWORD_ELEMENTS(len); n++, mask <<= 1) {
|
||||
if (opmask & mask)
|
||||
result.vmm64u(n) = uint32_to_float64(op.ymm32u(n));
|
||||
else
|
||||
result.vmm64u(n) = 0;
|
||||
}
|
||||
|
||||
if (! i->isZeroMasking()) {
|
||||
for (unsigned n=0; n < len; n++, opmask >>= 2)
|
||||
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &result.vmm128(n), opmask);
|
||||
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
||||
}
|
||||
else {
|
||||
BX_WRITE_AVX_REGZ(i->dst(), result, len);
|
||||
}
|
||||
|
||||
BX_WRITE_AVX_REGZ(i->dst(), result, len);
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTDQ2PD_MASK_VpdWdqR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
|
||||
BxPackedAvxRegister result;
|
||||
unsigned len = i->getVL();
|
||||
|
||||
BxPackedAvxRegister result;
|
||||
if (i->isZeroMasking())
|
||||
result.clear();
|
||||
else
|
||||
result = BX_READ_AVX_REG(i->dst());
|
||||
|
||||
Bit32u opmask = BX_READ_8BIT_OPMASK(i->opmask());
|
||||
|
||||
for (unsigned n=0, mask = 0x1; n < QWORD_ELEMENTS(len); n++, mask <<= 1) {
|
||||
if (opmask & mask)
|
||||
result.vmm64u(n) = int32_to_float64(op.ymm32s(n));
|
||||
else
|
||||
result.vmm64u(n) = 0;
|
||||
}
|
||||
|
||||
if (! i->isZeroMasking()) {
|
||||
for (unsigned n=0; n < len; n++, opmask >>= 2)
|
||||
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &result.vmm128(n), opmask);
|
||||
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
||||
}
|
||||
else {
|
||||
BX_WRITE_AVX_REGZ(i->dst(), result, len);
|
||||
}
|
||||
|
||||
BX_WRITE_AVX_REGZ(i->dst(), result, len);
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
@ -800,10 +790,15 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUDQ2PD_VpdWdqR(bxInstruction_c *i)
|
||||
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPH2PS_MASK_VpsWpsR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister result;
|
||||
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
|
||||
unsigned len = i->getVL();
|
||||
|
||||
BxPackedAvxRegister result;
|
||||
if (i->isZeroMasking())
|
||||
result.clear();
|
||||
else
|
||||
result = BX_READ_AVX_REG(i->dst());
|
||||
|
||||
float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
|
||||
status.denormals_are_zeros = 0; // ignore MXCSR.DAZ
|
||||
// no denormal exception is reported on MXCSR
|
||||
@ -814,21 +809,11 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPH2PS_MASK_VpsWpsR(bxInstruction_c *i)
|
||||
for (unsigned n=0, mask = 0x1; n < DWORD_ELEMENTS(len); n++, mask <<= 1) {
|
||||
if (opmask & mask)
|
||||
result.vmm32u(n) = float16_to_float32(op.ymm16u(n), status);
|
||||
else
|
||||
result.vmm32u(n) = 0;
|
||||
}
|
||||
|
||||
check_exceptionsSSE(get_exception_flags(status));
|
||||
|
||||
if (! i->isZeroMasking()) {
|
||||
for (unsigned n=0; n < len; n++, opmask >>= 4)
|
||||
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &result.vmm128(n), opmask);
|
||||
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
||||
}
|
||||
else {
|
||||
BX_WRITE_AVX_REGZ(i->dst(), result, len);
|
||||
}
|
||||
|
||||
BX_WRITE_AVX_REGZ(i->dst(), result, len);
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user