implemented (experimental, still untested) AVX512 VBMI2 extensions
This commit is contained in:
parent
0afbb6cd3d
commit
77a62a4dcd
@ -2,7 +2,7 @@
|
||||
// $Id$
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2013-2015 Stanislav Shwartsman
|
||||
// Copyright (c) 2013-2017 Stanislav Shwartsman
|
||||
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or
|
||||
@ -638,6 +638,249 @@ AVX512_PSHIFT_IMM_QWORD_EL(VPSLLQ_MASK_UdqIb, xmm_psllq);
|
||||
AVX512_PSHIFT_IMM_QWORD_EL(VPRORQ_MASK_UdqIb, xmm_prorq);
|
||||
AVX512_PSHIFT_IMM_QWORD_EL(VPROLQ_MASK_UdqIb, xmm_prolq);
|
||||
|
||||
// concatenate and shift
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHLW_VdqHdqWdqIbR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
|
||||
unsigned len = i->getVL();
|
||||
unsigned count = i->Ib() & 15;
|
||||
|
||||
if (count) {
|
||||
for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
|
||||
op1.vmm16u(n) = (op1.vmm16u(n) << count) | (op2.vmm16u(n) >> (16 - count));
|
||||
}
|
||||
}
|
||||
|
||||
if (i->opmask())
|
||||
avx512_write_regw_masked(i, &op1, len, BX_READ_32BIT_OPMASK(i->opmask()));
|
||||
else
|
||||
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHLWV_VdqHdqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister dst = BX_READ_AVX_REG(i->dst()), op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
|
||||
unsigned len = i->getVL();
|
||||
|
||||
for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
|
||||
unsigned count = op2.vmm16u(n) & 15;
|
||||
if (count) {
|
||||
dst.vmm16u(n) = (dst.vmm16u(n) << count) | (op1.vmm16u(n) >> (16 - count));
|
||||
}
|
||||
}
|
||||
|
||||
if (i->opmask())
|
||||
avx512_write_regw_masked(i, &dst, len, BX_READ_32BIT_OPMASK(i->opmask()));
|
||||
else
|
||||
BX_WRITE_AVX_REGZ(i->dst(), dst, len);
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHLD_VdqHdqWdqIbR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
|
||||
unsigned len = i->getVL();
|
||||
unsigned count = i->Ib() & 31;
|
||||
|
||||
if (count) {
|
||||
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
||||
op1.vmm32u(n) = (op1.vmm32u(n) << count) | (op2.vmm32u(n) >> (32 - count));
|
||||
}
|
||||
}
|
||||
|
||||
if (i->opmask())
|
||||
avx512_write_regd_masked(i, &op1, len, BX_READ_16BIT_OPMASK(i->opmask()));
|
||||
else
|
||||
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHLDV_VdqHdqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister dst = BX_READ_AVX_REG(i->dst()), op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
|
||||
unsigned len = i->getVL();
|
||||
|
||||
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
||||
unsigned count = op2.vmm16u(n) & 31;
|
||||
if (count) {
|
||||
dst.vmm32u(n) = (dst.vmm32u(n) << count) | (op1.vmm32u(n) >> (32 - count));
|
||||
}
|
||||
}
|
||||
|
||||
if (i->opmask())
|
||||
avx512_write_regd_masked(i, &dst, len, BX_READ_16BIT_OPMASK(i->opmask()));
|
||||
else
|
||||
BX_WRITE_AVX_REGZ(i->dst(), dst, len);
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHLQ_VdqHdqWdqIbR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
|
||||
unsigned len = i->getVL();
|
||||
unsigned count = i->Ib() & 63;
|
||||
|
||||
if (count) {
|
||||
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
||||
op1.vmm64u(n) = (op1.vmm64u(n) << count) | (op2.vmm64u(n) >> (64 - count));
|
||||
}
|
||||
}
|
||||
|
||||
if (i->opmask())
|
||||
avx512_write_regq_masked(i, &op1, len, BX_READ_8BIT_OPMASK(i->opmask()));
|
||||
else
|
||||
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHLQV_VdqHdqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister dst = BX_READ_AVX_REG(i->dst()), op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
|
||||
unsigned len = i->getVL();
|
||||
|
||||
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
||||
unsigned count = op2.vmm16u(n) & 63;
|
||||
if (count) {
|
||||
dst.vmm64u(n) = (dst.vmm64u(n) << count) | (op1.vmm64u(n) >> (64 - count));
|
||||
}
|
||||
}
|
||||
|
||||
if (i->opmask())
|
||||
avx512_write_regq_masked(i, &dst, len, BX_READ_8BIT_OPMASK(i->opmask()));
|
||||
else
|
||||
BX_WRITE_AVX_REGZ(i->dst(), dst, len);
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHRW_VdqHdqWdqIbR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
|
||||
unsigned len = i->getVL();
|
||||
unsigned count = i->Ib() & 15;
|
||||
|
||||
if (count) {
|
||||
for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
|
||||
op1.vmm16u(n) = (op1.vmm16u(n) >> count) | (op2.vmm16u(n) << (16 - count));
|
||||
}
|
||||
}
|
||||
|
||||
if (i->opmask())
|
||||
avx512_write_regw_masked(i, &op1, len, BX_READ_32BIT_OPMASK(i->opmask()));
|
||||
else
|
||||
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHRWV_VdqHdqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister dst = BX_READ_AVX_REG(i->dst()), op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
|
||||
unsigned len = i->getVL();
|
||||
|
||||
for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
|
||||
unsigned count = op2.vmm16u(n) & 15;
|
||||
if (count) {
|
||||
dst.vmm16u(n) = (dst.vmm16u(n) >> count) | (op1.vmm16u(n) << (16 - count));
|
||||
}
|
||||
}
|
||||
|
||||
if (i->opmask())
|
||||
avx512_write_regw_masked(i, &dst, len, BX_READ_32BIT_OPMASK(i->opmask()));
|
||||
else
|
||||
BX_WRITE_AVX_REGZ(i->dst(), dst, len);
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHRD_VdqHdqWdqIbR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
|
||||
unsigned len = i->getVL();
|
||||
unsigned count = i->Ib() & 31;
|
||||
|
||||
if (count) {
|
||||
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
||||
op1.vmm32u(n) = (op1.vmm32u(n) >> count) | (op2.vmm32u(n) << (32 - count));
|
||||
}
|
||||
}
|
||||
|
||||
if (i->opmask())
|
||||
avx512_write_regd_masked(i, &op1, len, BX_READ_16BIT_OPMASK(i->opmask()));
|
||||
else
|
||||
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHRDV_VdqHdqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister dst = BX_READ_AVX_REG(i->dst()), op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
|
||||
unsigned len = i->getVL();
|
||||
|
||||
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
||||
unsigned count = op2.vmm16u(n) & 31;
|
||||
if (count) {
|
||||
dst.vmm32u(n) = (dst.vmm32u(n) >> count) | (op1.vmm32u(n) << (32 - count));
|
||||
}
|
||||
}
|
||||
|
||||
if (i->opmask())
|
||||
avx512_write_regd_masked(i, &dst, len, BX_READ_16BIT_OPMASK(i->opmask()));
|
||||
else
|
||||
BX_WRITE_AVX_REGZ(i->dst(), dst, len);
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHRQ_VdqHdqWdqIbR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
|
||||
unsigned len = i->getVL();
|
||||
unsigned count = i->Ib() & 63;
|
||||
|
||||
if (count) {
|
||||
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
||||
op1.vmm64u(n) = (op1.vmm64u(n) >> count) | (op2.vmm64u(n) << (64 - count));
|
||||
}
|
||||
}
|
||||
|
||||
if (i->opmask())
|
||||
avx512_write_regq_masked(i, &op1, len, BX_READ_8BIT_OPMASK(i->opmask()));
|
||||
else
|
||||
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHRQV_VdqHdqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister dst = BX_READ_AVX_REG(i->dst()), op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
|
||||
unsigned len = i->getVL();
|
||||
|
||||
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
||||
unsigned count = op2.vmm16u(n) & 63;
|
||||
if (count) {
|
||||
dst.vmm64u(n) = (dst.vmm64u(n) >> count) | (op1.vmm64u(n) << (64 - count));
|
||||
}
|
||||
}
|
||||
|
||||
if (i->opmask())
|
||||
avx512_write_regq_masked(i, &dst, len, BX_READ_8BIT_OPMASK(i->opmask()));
|
||||
else
|
||||
BX_WRITE_AVX_REGZ(i->dst(), dst, len);
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
|
||||
// absolute value
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPABSB_MASK_VdqWdqR(bxInstruction_c *i)
|
||||
@ -1945,6 +2188,66 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VBLENDMPD_MASK_VpdHpdWpdR(bxInstru
|
||||
|
||||
// compress, expand
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPEXPANDB_MASK_VdqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
|
||||
|
||||
Bit64u opmask = BX_READ_OPMASK(i->opmask()), mask = opmask;
|
||||
unsigned len = i->getVL(), n = 0, k = 0;
|
||||
|
||||
for (; n < len*16; n++, mask >>= 1) {
|
||||
if (mask & 0x1) {
|
||||
result.vmmubyte(n) = op.vmmubyte(k);
|
||||
k++;
|
||||
}
|
||||
else {
|
||||
result.vmmubyte(n) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (i->isZeroMasking()) {
|
||||
BX_WRITE_AVX_REGZ(i->dst(), result, len);
|
||||
}
|
||||
else {
|
||||
for (unsigned n=0; n < len; n++, opmask >>= 16)
|
||||
xmm_pblendb(&BX_READ_AVX_REG_LANE(i->dst(), n), &result.vmm128(n), opmask);
|
||||
|
||||
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
||||
}
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPEXPANDW_MASK_VdqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
|
||||
|
||||
Bit32u opmask = BX_READ_32BIT_OPMASK(i->opmask()), mask = opmask;
|
||||
unsigned len = i->getVL(), n = 0, k = 0;
|
||||
|
||||
for (; n < len*8; n++, mask >>= 1) {
|
||||
if (mask & 0x1) {
|
||||
result.vmm16u(n) = op.vmm16u(k);
|
||||
k++;
|
||||
}
|
||||
else {
|
||||
result.vmm16u(n) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (i->isZeroMasking()) {
|
||||
BX_WRITE_AVX_REGZ(i->dst(), result, len);
|
||||
}
|
||||
else {
|
||||
for (unsigned n=0; n < len; n++, opmask >>= 8)
|
||||
xmm_pblendw(&BX_READ_AVX_REG_LANE(i->dst(), n), &result.vmm128(n), opmask);
|
||||
|
||||
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
||||
}
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VEXPANDPS_MASK_VpsWpsR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
|
||||
@ -2005,6 +2308,62 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VEXPANDPD_MASK_VpdWpdR(bxInstructi
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMPRESSB_MASK_WdqVdq(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
|
||||
|
||||
Bit64u opmask = BX_READ_OPMASK(i->opmask());
|
||||
unsigned len = i->getVL(), n = 0, k = 0;
|
||||
|
||||
for (; n < len*16; n++, opmask >>= 1) {
|
||||
if (opmask & 0x1) {
|
||||
result.vmmubyte(k) = op.vmmubyte(n);
|
||||
k++;
|
||||
}
|
||||
if (! opmask) break;
|
||||
}
|
||||
|
||||
Bit64u writemask = (BX_CONST64(1) << k) - 1;
|
||||
|
||||
if (i->modC0()) {
|
||||
avx512_write_regb_masked(i, &result, len, writemask);
|
||||
}
|
||||
else {
|
||||
bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
|
||||
avx_masked_store8(i, eaddr, &result, writemask);
|
||||
}
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMPRESSW_MASK_WdqVdq(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
|
||||
|
||||
Bit32u opmask = BX_READ_32BIT_OPMASK(i->opmask());
|
||||
unsigned len = i->getVL(), n = 0, k = 0;
|
||||
|
||||
for (; n < len*8; n++, opmask >>= 1) {
|
||||
if (opmask & 0x1) {
|
||||
result.vmm16u(k) = op.vmm16u(n);
|
||||
k++;
|
||||
}
|
||||
if (! opmask) break;
|
||||
}
|
||||
|
||||
Bit32u writemask = (1 << k) - 1;
|
||||
|
||||
if (i->modC0()) {
|
||||
avx512_write_regw_masked(i, &result, len, writemask);
|
||||
}
|
||||
else {
|
||||
bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
|
||||
avx_masked_store16(i, eaddr, &result, writemask);
|
||||
}
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCOMPRESSPS_MASK_WpsVps(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
|
||||
|
@ -3661,6 +3661,11 @@ public: // for now...
|
||||
BX_SMF BX_INSF_TYPE VEXPANDPS_MASK_VpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VEXPANDPD_MASK_VpdWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
||||
BX_SMF BX_INSF_TYPE VPCOMPRESSB_MASK_WdqVdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPCOMPRESSW_MASK_WdqVdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPEXPANDB_MASK_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPEXPANDW_MASK_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
||||
BX_SMF BX_INSF_TYPE VPMOVQB_WdqVdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPMOVDB_WdqVdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPMOVWB_WdqVdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
@ -3776,6 +3781,20 @@ public: // for now...
|
||||
BX_SMF BX_INSF_TYPE VPDPBUSDS_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPDPWSSD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPDPWSSDS_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
||||
BX_SMF BX_INSF_TYPE VPSHLW_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPSHLWV_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPSHLD_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPSHLDV_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPSHLQ_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPSHLQV_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
||||
BX_SMF BX_INSF_TYPE VPSHRW_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPSHRWV_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPSHRD_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPSHRDV_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPSHRQ_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPSHRQV_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
#endif
|
||||
|
||||
BX_SMF BX_INSF_TYPE LZCNT_GwEwR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
@ -97,7 +97,8 @@ enum {
|
||||
BX_ISA_AVX512_DQ, /* AVX-512DQ instruction */
|
||||
BX_ISA_AVX512_BW, /* AVX-512 Byte/Word instruction */
|
||||
BX_ISA_AVX512_VL, /* AVX-512 Vector Length extensions */
|
||||
BX_ISA_AVX512_VBMI, /* AVX-512 Vector Bit Manipulation Instructions */
|
||||
BX_ISA_AVX512_VBMI, /* AVX-512 VBMI : Vector Bit Manipulation Instructions */
|
||||
BX_ISA_AVX512_VBMI2, /* AVX-512 VBMI2 : Vector Bit Manipulation Instructions */
|
||||
BX_ISA_AVX512_IFMA52, /* AVX-512 IFMA52 Instructions */
|
||||
BX_ISA_AVX512_VPOPCNTDQ, /* AVX-512 VPOPCNTD/VPOPCNTQ Instructions */
|
||||
BX_ISA_AVX512_VNNI, /* AVX-512 VNNI Instructions */
|
||||
|
@ -2084,10 +2084,10 @@ static const BxExtOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
|
||||
/* 6E */ { 0, BX_IA_ERROR },
|
||||
/* 6F k0 */ { 0, BX_IA_ERROR },
|
||||
/* 6F */ { 0, BX_IA_ERROR },
|
||||
/* 70 k0 */ { 0, BX_IA_ERROR },
|
||||
/* 70 */ { 0, BX_IA_ERROR },
|
||||
/* 71 k0 */ { 0, BX_IA_ERROR },
|
||||
/* 71 */ { 0, BX_IA_ERROR },
|
||||
/* 70 k0 */ { BxVexW1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VPSHLW_VdqWHdqdqIb_Kmask },
|
||||
/* 70 */ { BxVexW1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VPSHLW_VdqWHdqdqIb_Kmask },
|
||||
/* 71 k0 */ { BxAliasVexW | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VPSHLD_VdqWHdqdqIb_Kmask },
|
||||
/* 71 */ { BxAliasVexW | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VPSHLD_VdqWHdqdqIb_Kmask },
|
||||
/* 72 k0 */ { 0, BX_IA_ERROR },
|
||||
/* 72 */ { 0, BX_IA_ERROR },
|
||||
/* 73 k0 */ { 0, BX_IA_ERROR },
|
||||
|
@ -3584,8 +3584,8 @@ bx_define_opcode(BX_IA_V512_VPERMT2PD_VpdHpdWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST
|
||||
bx_define_opcode(BX_IA_V512_VPERMI2PS_VpsHpsWps_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPERMI2PS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_mVps, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VPERMI2PD_VpdHpdWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPERMI2PD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_mVpd, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
|
||||
bx_define_opcode(BX_IA_V512_VPERMD_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPERMPS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VPERMQ_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPERMPD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VPERMD_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPERMPS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_mVdq32, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VPERMQ_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPERMPD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_mVdq64, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
|
||||
bx_define_opcode(BX_IA_V512_VPERMPS_VpsHpsWps_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPERMPS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_mVps, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VPERMPD_VpdHpdWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPERMPD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_mVpd, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
@ -3593,8 +3593,8 @@ bx_define_opcode(BX_IA_V512_VPERMPD_VpdHpdWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_V
|
||||
bx_define_opcode(BX_IA_V512_VPCONFLICTD_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPCONFLICTD_MASK_VdqWdqR, BX_ISA_AVX512_CD, OP_Vdq, OP_mVdq32, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VPCONFLICTQ_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPCONFLICTQ_MASK_VdqWdqR, BX_ISA_AVX512_CD, OP_Vdq, OP_mVdq64, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
|
||||
bx_define_opcode(BX_IA_V512_VPLZCNTD_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPLZCNTD_MASK_VdqWdqR, BX_ISA_AVX512_CD, OP_Vdq, OP_mVdq32, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VPLZCNTQ_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPLZCNTQ_MASK_VdqWdqR, BX_ISA_AVX512_CD, OP_Vdq, OP_mVdq64, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VPLZCNTD_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPLZCNTD_MASK_VdqWdqR, BX_ISA_AVX512_CD, OP_Vdq, OP_mVdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VPLZCNTQ_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPLZCNTQ_MASK_VdqWdqR, BX_ISA_AVX512_CD, OP_Vdq, OP_mVdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
|
||||
bx_define_opcode(BX_IA_V512_VPMOVM2B_VdqKEq, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVM2B_VdqKEqR, BX_ISA_AVX512_BW, OP_Vdq, OP_KEq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPMOVM2W_VdqKEd, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVM2W_VdqKEdR, BX_ISA_AVX512_BW, OP_Vdq, OP_KEd, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
@ -3608,8 +3608,18 @@ bx_define_opcode(BX_IA_V512_VPMOVQ2M_KGbWdq, &BX_CPU_C::BxError, &BX_CPU_C::VPMO
|
||||
|
||||
bx_define_opcode(BX_IA_V512_VPOPCNTB_VdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPOPCNTB_MASK_VdqWdqR, BX_ISA_AVX512_BITALG, OP_Vdq, OP_mVdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPOPCNTW_VdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPOPCNTW_MASK_VdqWdqR, BX_ISA_AVX512_BITALG, OP_Vdq, OP_mVdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPOPCNTD_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPOPCNTD_MASK_VdqWdqR, BX_ISA_AVX512_VPOPCNTDQ, OP_Vdq, OP_mVdq32, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VPOPCNTQ_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPOPCNTQ_MASK_VdqWdqR, BX_ISA_AVX512_VPOPCNTDQ, OP_Vdq, OP_mVdq64, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VPOPCNTD_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPOPCNTD_MASK_VdqWdqR, BX_ISA_AVX512_VPOPCNTDQ, OP_Vdq, OP_mVdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VPOPCNTQ_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPOPCNTQ_MASK_VdqWdqR, BX_ISA_AVX512_VPOPCNTDQ, OP_Vdq, OP_mVdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
|
||||
bx_define_opcode(BX_IA_V512_VPSHRD_VdqHdqWdqIb_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPSHRD_VdqHdqWdqIbR, BX_ISA_AVX512_VBMI2, OP_Vdq, OP_Hdq, OP_mVdq, OP_Ib, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VPSHRQ_VdqHdqWdqIb_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPSHRQ_VdqHdqWdqIbR, BX_ISA_AVX512_VBMI2, OP_Vdq, OP_Hdq, OP_mVdq, OP_Ib, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VPSHRDV_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPSHRDV_VdqHdqWdqR, BX_ISA_AVX512_VBMI2, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VPSHRQV_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPSHRQV_VdqHdqWdqR, BX_ISA_AVX512_VBMI2, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
|
||||
bx_define_opcode(BX_IA_V512_VPSHLD_VdqHdqWdqIb_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPSHLD_VdqHdqWdqIbR, BX_ISA_AVX512_VBMI2, OP_Vdq, OP_Hdq, OP_mVdq, OP_Ib, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VPSHLQ_VdqHdqWdqIb_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPSHLQ_VdqHdqWdqIbR, BX_ISA_AVX512_VBMI2, OP_Vdq, OP_Hdq, OP_mVdq, OP_Ib, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VPSHLDV_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPSHLDV_VdqHdqWdqR, BX_ISA_AVX512_VBMI2, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VPSHLQV_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPSHLQV_VdqHdqWdqR, BX_ISA_AVX512_VBMI2, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
// VexW alias
|
||||
|
||||
// VexW64 aliased
|
||||
@ -3683,4 +3693,7 @@ bx_define_opcode(BX_IA_V512_VAESDEC_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C
|
||||
bx_define_opcode(BX_IA_V512_VAESDECLAST_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VAESDECLAST_VdqHdqWdqR, BX_ISA_VAES_VPCLMULQDQ, OP_Vdq, OP_Hdq, OP_Wdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPCLMULQDQ_VdqHdqWdqIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPCLMULQDQ_VdqHdqWdqIbR, BX_ISA_VAES_VPCLMULQDQ, OP_Vdq, OP_Hdq, OP_Wdq, OP_Ib, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
|
||||
bx_define_opcode(BX_IA_V512_VPSHLW_VdqHdqWdqIb_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPSHLW_VdqHdqWdqIbR, BX_ISA_AVX512_VBMI2, OP_Vdq, OP_Hdq, OP_mVdq, OP_Ib, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPSHLWV_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPSHLWV_VdqHdqWdqR, BX_ISA_AVX512_VBMI2, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
|
||||
#endif // BX_SUPPORT_EVEX
|
||||
|
@ -1441,6 +1441,9 @@ Bit32u bx_generic_cpuid_t::get_ext4_cpuid_features(void) const
|
||||
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_UMIP))
|
||||
features |= BX_CPUID_EXT4_UMIP;
|
||||
|
||||
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_AVX512_VBMI2))
|
||||
features |= BX_CPUID_EXT4_AVX512_VBMI2;
|
||||
|
||||
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_VAES_VPCLMULQDQ))
|
||||
features |= BX_CPUID_EXT4_VAES | BX_CPUID_EXT4_VPCLMULQDQ;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user