From 77a62a4dcd2b8a3051aa3fe964393564fcba4287 Mon Sep 17 00:00:00 2001 From: Stanislav Shwartsman Date: Fri, 20 Oct 2017 18:38:15 +0000 Subject: [PATCH] implemented (experimental, still untested) AVX512 VBMI2 extensions --- bochs/cpu/avx/avx512.cc | 361 ++++++++++++++++++++++++++- bochs/cpu/cpu.h | 19 ++ bochs/cpu/decoder/decoder.h | 3 +- bochs/cpu/decoder/fetchdecode_evex.h | 8 +- bochs/cpu/decoder/ia_opcodes.def | 25 +- bochs/cpu/generic_cpuid.cc | 3 + 6 files changed, 407 insertions(+), 12 deletions(-) diff --git a/bochs/cpu/avx/avx512.cc b/bochs/cpu/avx/avx512.cc index f5e04e96b..806e65def 100644 --- a/bochs/cpu/avx/avx512.cc +++ b/bochs/cpu/avx/avx512.cc @@ -2,7 +2,7 @@ // $Id$ ///////////////////////////////////////////////////////////////////////// // -// Copyright (c) 2013-2015 Stanislav Shwartsman +// Copyright (c) 2013-2017 Stanislav Shwartsman // Written by Stanislav Shwartsman [sshwarts at sourceforge net] // // This library is free software; you can redistribute it and/or @@ -638,6 +638,249 @@ AVX512_PSHIFT_IMM_QWORD_EL(VPSLLQ_MASK_UdqIb, xmm_psllq); AVX512_PSHIFT_IMM_QWORD_EL(VPRORQ_MASK_UdqIb, xmm_prorq); AVX512_PSHIFT_IMM_QWORD_EL(VPROLQ_MASK_UdqIb, xmm_prolq); +// concatenate and shift + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHLW_VdqHdqWdqIbR(bxInstruction_c *i) +{ + BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()); + unsigned len = i->getVL(); + unsigned count = i->Ib() & 15; + + if (count) { + for (unsigned n=0; n < WORD_ELEMENTS(len); n++) { + op1.vmm16u(n) = (op1.vmm16u(n) << count) | (op2.vmm16u(n) >> (16 - count)); + } + } + + if (i->opmask()) + avx512_write_regw_masked(i, &op1, len, BX_READ_32BIT_OPMASK(i->opmask())); + else + BX_WRITE_AVX_REGZ(i->dst(), op1, len); + + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHLWV_VdqHdqWdqR(bxInstruction_c *i) +{ + BxPackedAvxRegister dst = BX_READ_AVX_REG(i->dst()), op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()); + unsigned len = i->getVL(); + + for (unsigned n=0; n < WORD_ELEMENTS(len); n++) { + unsigned count = op2.vmm16u(n) & 15; + if (count) { + dst.vmm16u(n) = (dst.vmm16u(n) << count) | (op1.vmm16u(n) >> (16 - count)); + } + } + + if (i->opmask()) + avx512_write_regw_masked(i, &dst, len, BX_READ_32BIT_OPMASK(i->opmask())); + else + BX_WRITE_AVX_REGZ(i->dst(), dst, len); + + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHLD_VdqHdqWdqIbR(bxInstruction_c *i) +{ + BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()); + unsigned len = i->getVL(); + unsigned count = i->Ib() & 31; + + if (count) { + for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) { + op1.vmm32u(n) = (op1.vmm32u(n) << count) | (op2.vmm32u(n) >> (32 - count)); + } + } + + if (i->opmask()) + avx512_write_regd_masked(i, &op1, len, BX_READ_16BIT_OPMASK(i->opmask())); + else + BX_WRITE_AVX_REGZ(i->dst(), op1, len); + + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHLDV_VdqHdqWdqR(bxInstruction_c *i) +{ + BxPackedAvxRegister dst = BX_READ_AVX_REG(i->dst()), op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()); + unsigned len = i->getVL(); + + for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) { + unsigned count = op2.vmm16u(n) & 31; + if (count) { + dst.vmm32u(n) = (dst.vmm32u(n) << count) | (op1.vmm32u(n) >> (32 - count)); + } + } + + if (i->opmask()) + avx512_write_regd_masked(i, &dst, len, BX_READ_16BIT_OPMASK(i->opmask())); + else + BX_WRITE_AVX_REGZ(i->dst(), dst, len); + + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHLQ_VdqHdqWdqIbR(bxInstruction_c *i) +{ + BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()); + unsigned len = i->getVL(); + unsigned count = i->Ib() & 63; + + if (count) { + for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) { + op1.vmm64u(n) = (op1.vmm64u(n) << count) | (op2.vmm64u(n) >> (64 - count)); + } + } + + if (i->opmask()) + avx512_write_regq_masked(i, &op1, len, BX_READ_8BIT_OPMASK(i->opmask())); + else + BX_WRITE_AVX_REGZ(i->dst(), op1, len); + + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHLQV_VdqHdqWdqR(bxInstruction_c *i) +{ + BxPackedAvxRegister dst = BX_READ_AVX_REG(i->dst()), op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()); + unsigned len = i->getVL(); + + for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) { + unsigned count = op2.vmm16u(n) & 63; + if (count) { + dst.vmm64u(n) = (dst.vmm64u(n) << count) | (op1.vmm64u(n) >> (64 - count)); + } + } + + if (i->opmask()) + avx512_write_regq_masked(i, &dst, len, BX_READ_8BIT_OPMASK(i->opmask())); + else + BX_WRITE_AVX_REGZ(i->dst(), dst, len); + + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHRW_VdqHdqWdqIbR(bxInstruction_c *i) +{ + BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()); + unsigned len = i->getVL(); + unsigned count = i->Ib() & 15; + + if (count) { + for (unsigned n=0; n < WORD_ELEMENTS(len); n++) { + op1.vmm16u(n) = (op1.vmm16u(n) >> count) | (op2.vmm16u(n) << (16 - count)); + } + } + + if (i->opmask()) + avx512_write_regw_masked(i, &op1, len, BX_READ_32BIT_OPMASK(i->opmask())); + else + BX_WRITE_AVX_REGZ(i->dst(), op1, len); + + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHRWV_VdqHdqWdqR(bxInstruction_c *i) +{ + BxPackedAvxRegister dst = BX_READ_AVX_REG(i->dst()), op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()); + unsigned len = i->getVL(); + + for (unsigned n=0; n < WORD_ELEMENTS(len); n++) { + unsigned count = op2.vmm16u(n) & 15; + if (count) { + dst.vmm16u(n) = (dst.vmm16u(n) >> count) | (op1.vmm16u(n) << (16 - count)); + } + } + + if (i->opmask()) + avx512_write_regw_masked(i, &dst, len, BX_READ_32BIT_OPMASK(i->opmask())); + else + BX_WRITE_AVX_REGZ(i->dst(), dst, len); + + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHRD_VdqHdqWdqIbR(bxInstruction_c *i) +{ + BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()); + unsigned len = i->getVL(); + unsigned count = i->Ib() & 31; + + if (count) { + for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) { + op1.vmm32u(n) = (op1.vmm32u(n) >> count) | (op2.vmm32u(n) << (32 - count)); + } + } + + if (i->opmask()) + avx512_write_regd_masked(i, &op1, len, BX_READ_16BIT_OPMASK(i->opmask())); + else + BX_WRITE_AVX_REGZ(i->dst(), op1, len); + + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHRDV_VdqHdqWdqR(bxInstruction_c *i) +{ + BxPackedAvxRegister dst = BX_READ_AVX_REG(i->dst()), op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()); + unsigned len = i->getVL(); + + for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) { + unsigned count = op2.vmm16u(n) & 31; + if (count) { + dst.vmm32u(n) = (dst.vmm32u(n) >> count) | (op1.vmm32u(n) << (32 - count)); + } + } + + if (i->opmask()) + avx512_write_regd_masked(i, &dst, len, BX_READ_16BIT_OPMASK(i->opmask())); + else + BX_WRITE_AVX_REGZ(i->dst(), dst, len); + + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHRQ_VdqHdqWdqIbR(bxInstruction_c *i) +{ + BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()); + unsigned len = i->getVL(); + unsigned count = i->Ib() & 63; + + if (count) { + for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) { + op1.vmm64u(n) = (op1.vmm64u(n) >> count) | (op2.vmm64u(n) << (64 - count)); + } + } + + if (i->opmask()) + avx512_write_regq_masked(i, &op1, len, BX_READ_8BIT_OPMASK(i->opmask())); + else + BX_WRITE_AVX_REGZ(i->dst(), op1, len); + + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHRQV_VdqHdqWdqR(bxInstruction_c *i) +{ + BxPackedAvxRegister dst = BX_READ_AVX_REG(i->dst()), op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()); + unsigned len = i->getVL(); + + for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) { + unsigned count = op2.vmm16u(n) & 63; + if (count) { + dst.vmm64u(n) = (dst.vmm64u(n) >> count) | (op1.vmm64u(n) << (64 - count)); + } + } + + if (i->opmask()) + avx512_write_regq_masked(i, &dst, len, BX_READ_8BIT_OPMASK(i->opmask())); + else + BX_WRITE_AVX_REGZ(i->dst(), dst, len); + + BX_NEXT_INSTR(i); +} + + // absolute value BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPABSB_MASK_VdqWdqR(bxInstruction_c *i) @@ -1945,6 +2188,66 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VBLENDMPD_MASK_VpdHpdWpdR(bxInstru // compress, expand +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPEXPANDB_MASK_VdqWdqR(bxInstruction_c *i) +{ + BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result; + + Bit64u opmask = BX_READ_OPMASK(i->opmask()), mask = opmask; + unsigned len = i->getVL(), n = 0, k = 0; + + for (; n < len*16; n++, mask >>= 1) { + if (mask & 0x1) { + result.vmmubyte(n) = op.vmmubyte(k); + k++; + } + else { + result.vmmubyte(n) = 0; + } + } + + if (i->isZeroMasking()) { + BX_WRITE_AVX_REGZ(i->dst(), result, len); + } + else { + for (unsigned n=0; n < len; n++, opmask >>= 16) + xmm_pblendb(&BX_READ_AVX_REG_LANE(i->dst(), n), &result.vmm128(n), opmask); + + BX_CLEAR_AVX_REGZ(i->dst(), len); + } + + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPEXPANDW_MASK_VdqWdqR(bxInstruction_c *i) +{ + BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result; + + Bit32u opmask = BX_READ_32BIT_OPMASK(i->opmask()), mask = opmask; + unsigned len = i->getVL(), n = 0, k = 0; + + for (; n < len*8; n++, mask >>= 1) { + if (mask & 0x1) { + result.vmm16u(n) = op.vmm16u(k); + k++; + } + else { + result.vmm16u(n) = 0; + } + } + + if (i->isZeroMasking()) { + BX_WRITE_AVX_REGZ(i->dst(), result, len); + } + else { + for (unsigned n=0; n < len; n++, opmask >>= 8) + xmm_pblendw(&BX_READ_AVX_REG_LANE(i->dst(), n), &result.vmm128(n), opmask); + + BX_CLEAR_AVX_REGZ(i->dst(), len); + } + + BX_NEXT_INSTR(i); +} + BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VEXPANDPS_MASK_VpsWpsR(bxInstruction_c *i) { BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result; @@ -2005,6 +2308,62 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VEXPANDPD_MASK_VpdWpdR(bxInstructi BX_NEXT_INSTR(i); } +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMPRESSB_MASK_WdqVdq(bxInstruction_c *i) +{ + BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result; + + Bit64u opmask = BX_READ_OPMASK(i->opmask()); + unsigned len = i->getVL(), n = 0, k = 0; + + for (; n < len*16; n++, opmask >>= 1) { + if (opmask & 0x1) { + result.vmmubyte(k) = op.vmmubyte(n); + k++; + } + if (! opmask) break; + } + + Bit64u writemask = (BX_CONST64(1) << k) - 1; + + if (i->modC0()) { + avx512_write_regb_masked(i, &result, len, writemask); + } + else { + bx_address eaddr = BX_CPU_RESOLVE_ADDR(i); + avx_masked_store8(i, eaddr, &result, writemask); + } + + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMPRESSW_MASK_WdqVdq(bxInstruction_c *i) +{ + BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result; + + Bit32u opmask = BX_READ_32BIT_OPMASK(i->opmask()); + unsigned len = i->getVL(), n = 0, k = 0; + + for (; n < len*8; n++, opmask >>= 1) { + if (opmask & 0x1) { + result.vmm16u(k) = op.vmm16u(n); + k++; + } + if (! opmask) break; + } + + Bit32u writemask = (1 << k) - 1; + + if (i->modC0()) { + avx512_write_regw_masked(i, &result, len, writemask); + } + else { + bx_address eaddr = BX_CPU_RESOLVE_ADDR(i); + avx_masked_store16(i, eaddr, &result, writemask); + } + + BX_NEXT_INSTR(i); +} + BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCOMPRESSPS_MASK_WpsVps(bxInstruction_c *i) { BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result; diff --git a/bochs/cpu/cpu.h b/bochs/cpu/cpu.h index 19b4e81a2..073a42bd0 100644 --- a/bochs/cpu/cpu.h +++ b/bochs/cpu/cpu.h @@ -3661,6 +3661,11 @@ public: // for now... BX_SMF BX_INSF_TYPE VEXPANDPS_MASK_VpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VEXPANDPD_MASK_VpdWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPCOMPRESSB_MASK_WdqVdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPCOMPRESSW_MASK_WdqVdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPEXPANDB_MASK_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPEXPANDW_MASK_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPMOVQB_WdqVdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VPMOVDB_WdqVdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VPMOVWB_WdqVdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); @@ -3776,6 +3781,20 @@ public: // for now... BX_SMF BX_INSF_TYPE VPDPBUSDS_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VPDPWSSD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VPDPWSSDS_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + + BX_SMF BX_INSF_TYPE VPSHLW_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPSHLWV_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPSHLD_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPSHLDV_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPSHLQ_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPSHLQV_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + + BX_SMF BX_INSF_TYPE VPSHRW_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPSHRWV_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPSHRD_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPSHRDV_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPSHRQ_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPSHRQV_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); #endif BX_SMF BX_INSF_TYPE LZCNT_GwEwR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); diff --git a/bochs/cpu/decoder/decoder.h b/bochs/cpu/decoder/decoder.h index 5d71ad271..c3c058972 100644 --- a/bochs/cpu/decoder/decoder.h +++ b/bochs/cpu/decoder/decoder.h @@ -97,7 +97,8 @@ enum { BX_ISA_AVX512_DQ, /* AVX-512DQ instruction */ BX_ISA_AVX512_BW, /* AVX-512 Byte/Word instruction */ BX_ISA_AVX512_VL, /* AVX-512 Vector Length extensions */ - BX_ISA_AVX512_VBMI, /* AVX-512 Vector Bit Manipulation Instructions */ + BX_ISA_AVX512_VBMI, /* AVX-512 VBMI : Vector Bit Manipulation Instructions */ + BX_ISA_AVX512_VBMI2, /* AVX-512 VBMI2 : Vector Bit Manipulation Instructions */ BX_ISA_AVX512_IFMA52, /* AVX-512 IFMA52 Instructions */ BX_ISA_AVX512_VPOPCNTDQ, /* AVX-512 VPOPCNTD/VPOPCNTQ Instructions */ BX_ISA_AVX512_VNNI, /* AVX-512 VNNI Instructions */ diff --git a/bochs/cpu/decoder/fetchdecode_evex.h b/bochs/cpu/decoder/fetchdecode_evex.h index 10dbcf624..bd73ddd3b 100644 --- a/bochs/cpu/decoder/fetchdecode_evex.h +++ b/bochs/cpu/decoder/fetchdecode_evex.h @@ -2084,10 +2084,10 @@ static const BxExtOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = { /* 6E */ { 0, BX_IA_ERROR }, /* 6F k0 */ { 0, BX_IA_ERROR }, /* 6F */ { 0, BX_IA_ERROR }, - /* 70 k0 */ { 0, BX_IA_ERROR }, - /* 70 */ { 0, BX_IA_ERROR }, - /* 71 k0 */ { 0, BX_IA_ERROR }, - /* 71 */ { 0, BX_IA_ERROR }, + /* 70 k0 */ { BxVexW1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VPSHLW_VdqWHdqdqIb_Kmask }, + /* 70 */ { BxVexW1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VPSHLW_VdqWHdqdqIb_Kmask }, + /* 71 k0 */ { BxAliasVexW | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VPSHLD_VdqWHdqdqIb_Kmask }, + /* 71 */ { BxAliasVexW | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VPSHLD_VdqWHdqdqIb_Kmask }, /* 72 k0 */ { 0, BX_IA_ERROR }, /* 72 */ { 0, BX_IA_ERROR }, /* 73 k0 */ { 0, BX_IA_ERROR }, diff --git a/bochs/cpu/decoder/ia_opcodes.def b/bochs/cpu/decoder/ia_opcodes.def index 18f8f550d..c3dec5504 100644 --- a/bochs/cpu/decoder/ia_opcodes.def +++ b/bochs/cpu/decoder/ia_opcodes.def @@ -3584,8 +3584,8 @@ bx_define_opcode(BX_IA_V512_VPERMT2PD_VpdHpdWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST bx_define_opcode(BX_IA_V512_VPERMI2PS_VpsHpsWps_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPERMI2PS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_mVps, OP_NONE, BX_PREPARE_EVEX_NO_SAE) bx_define_opcode(BX_IA_V512_VPERMI2PD_VpdHpdWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPERMI2PD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_mVpd, OP_NONE, BX_PREPARE_EVEX_NO_SAE) -bx_define_opcode(BX_IA_V512_VPERMD_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPERMPS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE) -bx_define_opcode(BX_IA_V512_VPERMQ_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPERMPD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE) +bx_define_opcode(BX_IA_V512_VPERMD_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPERMPS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_mVdq32, OP_NONE, BX_PREPARE_EVEX_NO_SAE) +bx_define_opcode(BX_IA_V512_VPERMQ_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPERMPD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_mVdq64, OP_NONE, BX_PREPARE_EVEX_NO_SAE) bx_define_opcode(BX_IA_V512_VPERMPS_VpsHpsWps_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPERMPS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_mVps, OP_NONE, BX_PREPARE_EVEX_NO_SAE) bx_define_opcode(BX_IA_V512_VPERMPD_VpdHpdWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPERMPD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_mVpd, OP_NONE, BX_PREPARE_EVEX_NO_SAE) @@ -3593,8 +3593,8 @@ bx_define_opcode(BX_IA_V512_VPERMPD_VpdHpdWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_V bx_define_opcode(BX_IA_V512_VPCONFLICTD_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPCONFLICTD_MASK_VdqWdqR, BX_ISA_AVX512_CD, OP_Vdq, OP_mVdq32, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE) bx_define_opcode(BX_IA_V512_VPCONFLICTQ_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPCONFLICTQ_MASK_VdqWdqR, BX_ISA_AVX512_CD, OP_Vdq, OP_mVdq64, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE) -bx_define_opcode(BX_IA_V512_VPLZCNTD_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPLZCNTD_MASK_VdqWdqR, BX_ISA_AVX512_CD, OP_Vdq, OP_mVdq32, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE) -bx_define_opcode(BX_IA_V512_VPLZCNTQ_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPLZCNTQ_MASK_VdqWdqR, BX_ISA_AVX512_CD, OP_Vdq, OP_mVdq64, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE) +bx_define_opcode(BX_IA_V512_VPLZCNTD_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPLZCNTD_MASK_VdqWdqR, BX_ISA_AVX512_CD, OP_Vdq, OP_mVdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE) +bx_define_opcode(BX_IA_V512_VPLZCNTQ_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPLZCNTQ_MASK_VdqWdqR, BX_ISA_AVX512_CD, OP_Vdq, OP_mVdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE) bx_define_opcode(BX_IA_V512_VPMOVM2B_VdqKEq, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVM2B_VdqKEqR, BX_ISA_AVX512_BW, OP_Vdq, OP_KEq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VPMOVM2W_VdqKEd, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVM2W_VdqKEdR, BX_ISA_AVX512_BW, OP_Vdq, OP_KEd, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) @@ -3608,8 +3608,18 @@ bx_define_opcode(BX_IA_V512_VPMOVQ2M_KGbWdq, &BX_CPU_C::BxError, &BX_CPU_C::VPMO bx_define_opcode(BX_IA_V512_VPOPCNTB_VdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPOPCNTB_MASK_VdqWdqR, BX_ISA_AVX512_BITALG, OP_Vdq, OP_mVdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VPOPCNTW_VdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPOPCNTW_MASK_VdqWdqR, BX_ISA_AVX512_BITALG, OP_Vdq, OP_mVdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) -bx_define_opcode(BX_IA_V512_VPOPCNTD_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPOPCNTD_MASK_VdqWdqR, BX_ISA_AVX512_VPOPCNTDQ, OP_Vdq, OP_mVdq32, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE) -bx_define_opcode(BX_IA_V512_VPOPCNTQ_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPOPCNTQ_MASK_VdqWdqR, BX_ISA_AVX512_VPOPCNTDQ, OP_Vdq, OP_mVdq64, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE) +bx_define_opcode(BX_IA_V512_VPOPCNTD_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPOPCNTD_MASK_VdqWdqR, BX_ISA_AVX512_VPOPCNTDQ, OP_Vdq, OP_mVdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE) +bx_define_opcode(BX_IA_V512_VPOPCNTQ_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPOPCNTQ_MASK_VdqWdqR, BX_ISA_AVX512_VPOPCNTDQ, OP_Vdq, OP_mVdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE) + +bx_define_opcode(BX_IA_V512_VPSHRD_VdqHdqWdqIb_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPSHRD_VdqHdqWdqIbR, BX_ISA_AVX512_VBMI2, OP_Vdq, OP_Hdq, OP_mVdq, OP_Ib, BX_PREPARE_EVEX_NO_SAE) +bx_define_opcode(BX_IA_V512_VPSHRQ_VdqHdqWdqIb_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPSHRQ_VdqHdqWdqIbR, BX_ISA_AVX512_VBMI2, OP_Vdq, OP_Hdq, OP_mVdq, OP_Ib, BX_PREPARE_EVEX_NO_SAE) +bx_define_opcode(BX_IA_V512_VPSHRDV_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPSHRDV_VdqHdqWdqR, BX_ISA_AVX512_VBMI2, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE) +bx_define_opcode(BX_IA_V512_VPSHRQV_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPSHRQV_VdqHdqWdqR, BX_ISA_AVX512_VBMI2, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE) + +bx_define_opcode(BX_IA_V512_VPSHLD_VdqHdqWdqIb_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPSHLD_VdqHdqWdqIbR, BX_ISA_AVX512_VBMI2, OP_Vdq, OP_Hdq, OP_mVdq, OP_Ib, BX_PREPARE_EVEX_NO_SAE) +bx_define_opcode(BX_IA_V512_VPSHLQ_VdqHdqWdqIb_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPSHLQ_VdqHdqWdqIbR, BX_ISA_AVX512_VBMI2, OP_Vdq, OP_Hdq, OP_mVdq, OP_Ib, BX_PREPARE_EVEX_NO_SAE) +bx_define_opcode(BX_IA_V512_VPSHLDV_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPSHLDV_VdqHdqWdqR, BX_ISA_AVX512_VBMI2, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE) +bx_define_opcode(BX_IA_V512_VPSHLQV_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPSHLQV_VdqHdqWdqR, BX_ISA_AVX512_VBMI2, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE) // VexW alias // VexW64 aliased @@ -3683,4 +3693,7 @@ bx_define_opcode(BX_IA_V512_VAESDEC_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C bx_define_opcode(BX_IA_V512_VAESDECLAST_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VAESDECLAST_VdqHdqWdqR, BX_ISA_VAES_VPCLMULQDQ, OP_Vdq, OP_Hdq, OP_Wdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VPCLMULQDQ_VdqHdqWdqIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPCLMULQDQ_VdqHdqWdqIbR, BX_ISA_VAES_VPCLMULQDQ, OP_Vdq, OP_Hdq, OP_Wdq, OP_Ib, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPSHLW_VdqHdqWdqIb_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPSHLW_VdqHdqWdqIbR, BX_ISA_AVX512_VBMI2, OP_Vdq, OP_Hdq, OP_mVdq, OP_Ib, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPSHLWV_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPSHLWV_VdqHdqWdqR, BX_ISA_AVX512_VBMI2, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) + #endif // BX_SUPPORT_EVEX diff --git a/bochs/cpu/generic_cpuid.cc b/bochs/cpu/generic_cpuid.cc index f1e75f70f..56010b6dc 100644 --- a/bochs/cpu/generic_cpuid.cc +++ b/bochs/cpu/generic_cpuid.cc @@ -1441,6 +1441,9 @@ Bit32u bx_generic_cpuid_t::get_ext4_cpuid_features(void) const if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_UMIP)) features |= BX_CPUID_EXT4_UMIP; + if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_AVX512_VBMI2)) + features |= BX_CPUID_EXT4_AVX512_VBMI2; + if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_VAES_VPCLMULQDQ)) features |= BX_CPUID_EXT4_VAES | BX_CPUID_EXT4_VPCLMULQDQ;