added FMA4 AMD instructions support, fixed mem access length for Intel scalar FMA instructions

This commit is contained in:
Stanislav Shwartsman 2011-10-07 14:09:35 +00:00
parent aad57310c2
commit 2580d8c46d
14 changed files with 464 additions and 76 deletions

View File

@ -218,6 +218,10 @@ cpu: cpuid_limit_winnt=0
# Select BMI1/BMI2 instructions support.
# This option exists only if Bochs compiled with --enable-avx option.
#
# FMA4:
# Select AMD four operand FMA instructions support.
# This option exists only if Bochs compiled with --enable-avx option.
#
# X86-64:
# Enable x86-64 and long mode support.
# This option exists only if Bochs compiled with x86-64 support.

View File

@ -27,7 +27,7 @@ Detailed change log :
of supported configurations. When this option is used, Bochs CPU emulation
engine is automatically configured to emulate a specific real hardware CPU,
including exact CPUID matching reference hardware. Check .bochsrc example
or read docs for list of supported configurations or more details.
or check user manual for list of supported configurations and more details.
* It is also possible to choose the CPU to emulate from Bochs command line
using command line interface to .bochsrc: "cpu::model <cpu_name>"
* Query for supported CPU models using command line option: -help cpu.
@ -59,6 +59,8 @@ Detailed change log :
instructions support can be enabled using .bochsrc CPUID option.
- Added support for AMD SSE4A emulation, the instructions can be enabled
using .bochsrc CPUID option.
- Added support for AMD FMA4 emulation, the instructions can be enabled
using .bochsrc CPUID option.
- Implemented VMX preemption timer VMEXIT control (patch by Jianan Hao)
- Implemented Pause-Loop Exiting Secondary VMEXIT control.
- Added INVPCID instruction emulation support.

View File

@ -45,6 +45,7 @@ cpuid
avx_f16c
avx_fma
bmi
fma4
apic
x86_64
1g_pages

View File

@ -290,7 +290,7 @@ void bx_init_options()
// cpuid subtree
#if BX_CPU_LEVEL >= 4
bx_list_c *cpuid_param = new bx_list_c(root_param, "cpuid", "CPUID Options", 25);
bx_list_c *cpuid_param = new bx_list_c(root_param, "cpuid", "CPUID Options", 26);
new bx_param_string_c(cpuid_param,
"vendor_string",
@ -401,6 +401,10 @@ void bx_init_options()
"Support for Bit Manipulation Instructions (BMI)",
0, 2,
0);
new bx_param_bool_c(cpuid_param,
"fma4", "Support for AMD four operand FMA instructions",
"Support for AMD FMA4 instructions",
0);
#endif
#if BX_SUPPORT_X86_64
new bx_param_bool_c(cpuid_param,
@ -2715,6 +2719,10 @@ static int parse_line_formatted(const char *context, int num_params, char *param
}
} else if (!strncmp(params[i], "bmi=", 4)) {
SIM->get_param_num(BXPN_CPUID_BMI)->set(atol(&params[i][4]));
} else if (!strncmp(params[i], "fma4=", 5)) {
if (parse_param_bool(params[i], 5, BXPN_CPUID_FMA4) < 0) {
PARSE_ERR(("%s: cpuid directive malformed.", context));
}
#endif
#if BX_SUPPORT_X86_64
} else if (!strncmp(params[i], "x86_64=", 7)) {
@ -4009,11 +4017,12 @@ int bx_write_configuration(const char *rc, int overwrite)
SIM->get_param_bool(BXPN_CPUID_MOVBE)->get(),
SIM->get_param_bool(BXPN_CPUID_SMEP)->get());
#if BX_SUPPORT_AVX
fprintf(fp, ", avx=%d, avx_f16c=%d, avx_fma=%d, bmi=%d",
fprintf(fp, ", avx=%d, avx_f16c=%d, avx_fma=%d, bmi=%d, fma4=%d",
SIM->get_param_num(BXPN_CPUID_AVX)->get(),
SIM->get_param_bool(BXPN_CPUID_AVX_F16CVT)->get(),
SIM->get_param_bool(BXPN_CPUID_AVX_FMA)->get(),
SIM->get_param_num(BXPN_CPUID_BMI)->get());
SIM->get_param_num(BXPN_CPUID_BMI)->get(),
SIM->get_param_bool(BXPN_CPUID_FMA4)->get());
#endif
#if BX_SUPPORT_X86_64
fprintf(fp, ", x86_64=%d, 1g_pages=%d, pcid=%d, fsgsbase=%d",

View File

@ -32,6 +32,10 @@ extern void mxcsr_to_softfloat_status_word(float_status_t &status, bx_mxcsr_t mx
#include "simd_pfp.h"
//////////////////////////
// AVX FMA Instructions //
//////////////////////////
// FMADDPD
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFMADD132PD_VpdHpdWpdR(bxInstruction_c *i)
{
@ -392,7 +396,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFMSUBADD132PD_VpdHpdWpdR(bxInstru
mxcsr_to_softfloat_status_word(status, MXCSR);
for (unsigned n=0; n < len; n++)
fmasubaddpd(&op1.avx128(n), &op3.avx128(n), &op2.avx128(n), status);
fmsubaddpd(&op1.avx128(n), &op3.avx128(n), &op2.avx128(n), status);
check_exceptionsSSE(status.float_exception_flags);
@ -412,7 +416,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFMSUBADD213PD_VpdHpdWpdR(bxInstru
mxcsr_to_softfloat_status_word(status, MXCSR);
for (unsigned n=0; n < len; n++)
fmasubaddpd(&op2.avx128(n), &op1.avx128(n), &op3.avx128(n), status);
fmsubaddpd(&op2.avx128(n), &op1.avx128(n), &op3.avx128(n), status);
check_exceptionsSSE(status.float_exception_flags);
@ -432,7 +436,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFMSUBADD231PD_VpdHpdWpdR(bxInstru
mxcsr_to_softfloat_status_word(status, MXCSR);
for (unsigned n=0; n < len; n++)
fmasubaddpd(&op2.avx128(n), &op3.avx128(n), &op1.avx128(n), status);
fmsubaddpd(&op2.avx128(n), &op3.avx128(n), &op1.avx128(n), status);
check_exceptionsSSE(status.float_exception_flags);
@ -453,7 +457,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFMSUBADD132PS_VpsHpsWpsR(bxInstru
mxcsr_to_softfloat_status_word(status, MXCSR);
for (unsigned n=0; n < len; n++)
fmasubaddps(&op1.avx128(n), &op3.avx128(n), &op2.avx128(n), status);
fmsubaddps(&op1.avx128(n), &op3.avx128(n), &op2.avx128(n), status);
check_exceptionsSSE(status.float_exception_flags);
@ -473,7 +477,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFMSUBADD213PS_VpsHpsWpsR(bxInstru
mxcsr_to_softfloat_status_word(status, MXCSR);
for (unsigned n=0; n < len; n++)
fmasubaddps(&op2.avx128(n), &op1.avx128(n), &op3.avx128(n), status);
fmsubaddps(&op2.avx128(n), &op1.avx128(n), &op3.avx128(n), status);
check_exceptionsSSE(status.float_exception_flags);
@ -493,7 +497,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFMSUBADD231PS_VpsHpsWpsR(bxInstru
mxcsr_to_softfloat_status_word(status, MXCSR);
for (unsigned n=0; n < len; n++)
fmasubaddps(&op2.avx128(n), &op3.avx128(n), &op1.avx128(n), status);
fmsubaddps(&op2.avx128(n), &op3.avx128(n), &op1.avx128(n), status);
check_exceptionsSSE(status.float_exception_flags);
@ -1180,4 +1184,120 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFNMSUB231SS_VpsHssWssR(bxInstruct
BX_NEXT_INSTR(i);
}
/////////////////////////////
// FMA4 (AMD) Instructions //
/////////////////////////////
#define FMA4_OP_VECTOR(HANDLER, func, src2, src3) \
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
{ \
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->vvv()); \
BxPackedAvxRegister op2 = BX_READ_AVX_REG(src2); \
BxPackedAvxRegister op3 = BX_READ_AVX_REG(src3); \
unsigned len = i->getVL(); \
\
float_status_t status; \
mxcsr_to_softfloat_status_word(status, MXCSR); \
\
for (unsigned n=0; n < len; n++) \
(func)(&op1.avx128(n), &op2.avx128(n), &op3.avx128(n), status); \
\
check_exceptionsSSE(status.float_exception_flags); \
\
BX_WRITE_AVX_REGZ(i->nnn(), op1, len); \
\
BX_NEXT_INSTR(i); \
}
FMA4_OP_VECTOR(VFMADDSUBPS_VpsHpsWpsVIbR, fmaddsubps, i->rm(), i->Ib())
FMA4_OP_VECTOR(VFMADDSUBPS_VpsHpsVIbWpsR, fmaddsubps, i->Ib(), i->rm())
FMA4_OP_VECTOR(VFMADDSUBPD_VpdHpdWpdVIbR, fmaddsubpd, i->rm(), i->Ib())
FMA4_OP_VECTOR(VFMADDSUBPD_VpdHpdVIbWpdR, fmaddsubpd, i->Ib(), i->rm())
FMA4_OP_VECTOR(VFMSUBADDPS_VpsHpsWpsVIbR, fmsubaddps, i->rm(), i->Ib())
FMA4_OP_VECTOR(VFMSUBADDPS_VpsHpsVIbWpsR, fmsubaddps, i->Ib(), i->rm())
FMA4_OP_VECTOR(VFMSUBADDPD_VpdHpdWpdVIbR, fmsubaddpd, i->rm(), i->Ib())
FMA4_OP_VECTOR(VFMSUBADDPD_VpdHpdVIbWpdR, fmsubaddpd, i->Ib(), i->rm())
FMA4_OP_VECTOR(VFMADDPS_VpsHpsWpsVIbR, fmaddps, i->rm(), i->Ib())
FMA4_OP_VECTOR(VFMADDPS_VpsHpsVIbWpsR, fmaddps, i->Ib(), i->rm())
FMA4_OP_VECTOR(VFMADDPD_VpdHpdWpdVIbR, fmaddpd, i->rm(), i->Ib())
FMA4_OP_VECTOR(VFMADDPD_VpdHpdVIbWpdR, fmaddpd, i->Ib(), i->rm())
FMA4_OP_VECTOR(VFMSUBPS_VpsHpsWpsVIbR, fmsubps, i->rm(), i->Ib())
FMA4_OP_VECTOR(VFMSUBPS_VpsHpsVIbWpsR, fmsubps, i->Ib(), i->rm())
FMA4_OP_VECTOR(VFMSUBPD_VpdHpdWpdVIbR, fmsubpd, i->rm(), i->Ib())
FMA4_OP_VECTOR(VFMSUBPD_VpdHpdVIbWpdR, fmsubpd, i->Ib(), i->rm())
FMA4_OP_VECTOR(VFNMADDPS_VpsHpsWpsVIbR, fnmaddps, i->rm(), i->Ib())
FMA4_OP_VECTOR(VFNMADDPS_VpsHpsVIbWpsR, fnmaddps, i->Ib(), i->rm())
FMA4_OP_VECTOR(VFNMADDPD_VpdHpdWpdVIbR, fnmaddpd, i->rm(), i->Ib())
FMA4_OP_VECTOR(VFNMADDPD_VpdHpdVIbWpdR, fnmaddpd, i->Ib(), i->rm())
FMA4_OP_VECTOR(VFNMSUBPS_VpsHpsWpsVIbR, fnmsubps, i->rm(), i->Ib())
FMA4_OP_VECTOR(VFNMSUBPS_VpsHpsVIbWpsR, fnmsubps, i->Ib(), i->rm())
FMA4_OP_VECTOR(VFNMSUBPD_VpdHpdWpdVIbR, fnmsubpd, i->rm(), i->Ib())
FMA4_OP_VECTOR(VFNMSUBPD_VpdHpdVIbWpdR, fnmsubpd, i->Ib(), i->rm())
#define FMA4_SINGLE_SCALAR(HANDLER, func, src2, src3) \
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
{ \
float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->vvv()); \
float32 op2 = BX_READ_XMM_REG_LO_DWORD(src2); \
float32 op3 = BX_READ_XMM_REG_LO_DWORD(src3); \
\
BxPackedXmmRegister dest; \
dest.xmm64u(0) = dest.xmm64u(1) = 0; \
\
float_status_t status; \
mxcsr_to_softfloat_status_word(status, MXCSR); \
dest.xmm32u(0) = (func)(op1, op2, op3, status); \
check_exceptionsSSE(status.float_exception_flags); \
\
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), dest); \
\
BX_NEXT_INSTR(i); \
}
FMA4_SINGLE_SCALAR(VFMADDSS_VssHssWssVIbR, float32_fmadd, i->rm(), i->Ib())
FMA4_SINGLE_SCALAR(VFMADDSS_VssHssVIbWssR, float32_fmadd, i->Ib(), i->rm())
FMA4_SINGLE_SCALAR(VFMSUBSS_VssHssWssVIbR, float32_fmsub, i->rm(), i->Ib())
FMA4_SINGLE_SCALAR(VFMSUBSS_VssHssVIbWssR, float32_fmsub, i->Ib(), i->rm())
FMA4_SINGLE_SCALAR(VFNMADDSS_VssHssWssVIbR, float32_fnmadd, i->rm(), i->Ib())
FMA4_SINGLE_SCALAR(VFNMADDSS_VssHssVIbWssR, float32_fnmadd, i->Ib(), i->rm())
FMA4_SINGLE_SCALAR(VFNMSUBSS_VssHssWssVIbR, float32_fnmsub, i->rm(), i->Ib())
FMA4_SINGLE_SCALAR(VFNMSUBSS_VssHssVIbWssR, float32_fnmsub, i->Ib(), i->rm())
#define FMA4_DOUBLE_SCALAR(HANDLER, func, src2, src3) \
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
{ \
float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->vvv()); \
float64 op2 = BX_READ_XMM_REG_LO_QWORD(src2); \
float64 op3 = BX_READ_XMM_REG_LO_QWORD(src3); \
BxPackedXmmRegister dest; \
\
float_status_t status; \
mxcsr_to_softfloat_status_word(status, MXCSR); \
\
dest.xmm64u(0) = (func)(op1, op2, op3, status); \
dest.xmm64u(1) = 0; \
\
check_exceptionsSSE(status.float_exception_flags); \
\
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), dest); \
\
BX_NEXT_INSTR(i); \
}
FMA4_DOUBLE_SCALAR(VFMADDSD_VsdHsdWsdVIbR, float64_fmadd, i->rm(), i->Ib())
FMA4_DOUBLE_SCALAR(VFMADDSD_VsdHsdVIbWsdR, float64_fmadd, i->Ib(), i->rm())
FMA4_DOUBLE_SCALAR(VFMSUBSD_VsdHsdWsdVIbR, float64_fmsub, i->rm(), i->Ib())
FMA4_DOUBLE_SCALAR(VFMSUBSD_VsdHsdVIbWsdR, float64_fmsub, i->Ib(), i->rm())
FMA4_DOUBLE_SCALAR(VFNMADDSD_VsdHsdWsdVIbR, float64_fnmadd, i->rm(), i->Ib())
FMA4_DOUBLE_SCALAR(VFNMADDSD_VsdHsdVIbWsdR, float64_fnmadd, i->Ib(), i->rm())
FMA4_DOUBLE_SCALAR(VFNMSUBSD_VsdHsdWsdVIbR, float64_fnmsub, i->rm(), i->Ib())
FMA4_DOUBLE_SCALAR(VFNMSUBSD_VsdHsdVIbWsdR, float64_fnmsub, i->Ib(), i->rm())
#endif

View File

@ -2896,6 +2896,49 @@ public: // for now...
BX_SMF BX_INSF_TYPE PEXT_GqEqBqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE PDEP_GqEqBqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
/* BMI */
/* FMA4 (AMD) */
BX_SMF BX_INSF_TYPE VFMADDSUBPS_VpsHpsWpsVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMADDSUBPS_VpsHpsVIbWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMADDSUBPD_VpdHpdWpdVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMADDSUBPD_VpdHpdVIbWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMSUBADDPS_VpsHpsWpsVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMSUBADDPS_VpsHpsVIbWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMSUBADDPD_VpdHpdWpdVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMSUBADDPD_VpdHpdVIbWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMADDPS_VpsHpsWpsVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMADDPS_VpsHpsVIbWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMADDPD_VpdHpdWpdVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMADDPD_VpdHpdVIbWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMADDSS_VssHssWssVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMADDSS_VssHssVIbWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMADDSD_VsdHsdWsdVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMADDSD_VsdHsdVIbWsdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMSUBPS_VpsHpsWpsVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMSUBPS_VpsHpsVIbWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMSUBPD_VpdHpdWpdVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMSUBPD_VpdHpdVIbWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMSUBSS_VssHssWssVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMSUBSS_VssHssVIbWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMSUBSD_VsdHsdWsdVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMSUBSD_VsdHsdVIbWsdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMADDPS_VpsHpsWpsVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMADDPS_VpsHpsVIbWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMADDPD_VpdHpdWpdVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMADDPD_VpdHpdVIbWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMADDSS_VssHssWssVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMADDSS_VssHssVIbWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMADDSD_VsdHsdWsdVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMADDSD_VsdHsdVIbWsdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMSUBPS_VpsHpsWpsVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMSUBPS_VpsHpsVIbWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMSUBPD_VpdHpdWpdVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMSUBPD_VpdHpdVIbWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMSUBSS_VssHssWssVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMSUBSS_VssHssVIbWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMSUBSD_VsdHsdWsdVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMSUBSD_VsdHsdVIbWsdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
/* FMA4 (AMD) */
#endif
BX_SMF BX_INSF_TYPE LZCNT_GwEwR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);

View File

@ -358,6 +358,108 @@ static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3af0[2] = {
};
// BMI
// FMA4
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a5c[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFMADDSUBPS_VpsHpsWpsVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFMADDSUBPS_VpsHpsVIbWps }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a5d[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFMADDSUBPD_VpdHpdWpdVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFMADDSUBPD_VpdHpdVIbWpd }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a5e[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFMSUBADDPS_VpsHpsWpsVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFMSUBADDPS_VpsHpsVIbWps }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a5f[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFMSUBADDPD_VpdHpdWpdVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFMSUBADDPD_VpdHpdVIbWpd }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a68[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFMADDPS_VpsHpsWpsVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFMADDPS_VpsHpsVIbWps }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a69[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFMADDPD_VpdHpdWpdVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFMADDPD_VpdHpdVIbWpd }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a6a[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFMADDSS_VssHssWssVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFMADDSS_VssHssVIbWss }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a6b[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFMADDSD_VsdHsdWsdVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFMADDSD_VsdHsdVIbWsd }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a6c[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFMSUBPS_VpsHpsWpsVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFMSUBPS_VpsHpsVIbWps }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a6d[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFMSUBPD_VpdHpdWpdVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFMSUBPD_VpdHpdVIbWpd }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a6e[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFMSUBSS_VssHssWssVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFMSUBSS_VssHssVIbWss }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a6f[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFMSUBSD_VsdHsdWsdVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFMSUBSD_VsdHsdVIbWsd }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a78[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFNMADDPS_VpsHpsWpsVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFNMADDPS_VpsHpsVIbWps }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a79[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFNMADDPD_VpdHpdWpdVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFNMADDPD_VpdHpdVIbWpd }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a7a[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFNMADDSS_VssHssWssVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFNMADDSS_VssHssVIbWss }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a7b[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFNMADDSD_VsdHsdWsdVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFNMADDSD_VsdHsdVIbWsd }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a7c[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFNMSUBPS_VpsHpsWpsVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFNMSUBPS_VpsHpsVIbWps }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a7d[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFNMSUBPD_VpdHpdWpdVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFNMSUBPD_VpdHpdVIbWpd }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a7e[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFNMSUBSS_VssHssWssVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFNMSUBSS_VssHssVIbWss }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a7f[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFNMSUBSD_VsdHsdWsdVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFNMSUBSD_VsdHsdVIbWsd }
};
// FMA4
/* ************************************************************************ */
/* ********** */
@ -1345,10 +1447,10 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3*2] = {
/* 59 /0 */ { 0, BX_IA_ERROR },
/* 5A /0 */ { 0, BX_IA_ERROR },
/* 5B /0 */ { 0, BX_IA_ERROR },
/* 5C /0 */ { 0, BX_IA_ERROR },
/* 5D /0 */ { 0, BX_IA_ERROR },
/* 5E /0 */ { 0, BX_IA_ERROR },
/* 5F /0 */ { 0, BX_IA_ERROR },
/* 5C /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a5c },
/* 5D /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a5d },
/* 5E /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a5e },
/* 5F /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a5f },
/* 60 /0 */ { BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V128_VPCMPESTRM_VdqWdqIb },
/* 61 /0 */ { BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V128_VPCMPESTRI_VdqWdqIb },
/* 62 /0 */ { BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V128_VPCMPISTRM_VdqWdqIb },
@ -1357,14 +1459,14 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3*2] = {
/* 65 /0 */ { 0, BX_IA_ERROR },
/* 66 /0 */ { 0, BX_IA_ERROR },
/* 67 /0 */ { 0, BX_IA_ERROR },
/* 68 /0 */ { 0, BX_IA_ERROR },
/* 69 /0 */ { 0, BX_IA_ERROR },
/* 6A /0 */ { 0, BX_IA_ERROR },
/* 6B /0 */ { 0, BX_IA_ERROR },
/* 6C /0 */ { 0, BX_IA_ERROR },
/* 6D /0 */ { 0, BX_IA_ERROR },
/* 6E /0 */ { 0, BX_IA_ERROR },
/* 6F /0 */ { 0, BX_IA_ERROR },
/* 68 /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a68 },
/* 69 /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a69 },
/* 6A /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a6a },
/* 6B /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a6b },
/* 6C /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a6c },
/* 6D /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a6d },
/* 6E /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a6e },
/* 6F /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a6f },
/* 70 /0 */ { 0, BX_IA_ERROR },
/* 71 /0 */ { 0, BX_IA_ERROR },
/* 72 /0 */ { 0, BX_IA_ERROR },
@ -1373,14 +1475,14 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3*2] = {
/* 75 /0 */ { 0, BX_IA_ERROR },
/* 76 /0 */ { 0, BX_IA_ERROR },
/* 77 /0 */ { 0, BX_IA_ERROR },
/* 78 /0 */ { 0, BX_IA_ERROR },
/* 79 /0 */ { 0, BX_IA_ERROR },
/* 7A /0 */ { 0, BX_IA_ERROR },
/* 7B /0 */ { 0, BX_IA_ERROR },
/* 7C /0 */ { 0, BX_IA_ERROR },
/* 7D /0 */ { 0, BX_IA_ERROR },
/* 7E /0 */ { 0, BX_IA_ERROR },
/* 7F /0 */ { 0, BX_IA_ERROR },
/* 78 /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a78 },
/* 79 /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a79 },
/* 7A /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a7a },
/* 7B /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a7b },
/* 7C /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a7c },
/* 7D /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a7d },
/* 7E /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a7e },
/* 7F /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a7f },
/* 80 /0 */ { 0, BX_IA_ERROR },
/* 81 /0 */ { 0, BX_IA_ERROR },
/* 82 /0 */ { 0, BX_IA_ERROR },
@ -2119,10 +2221,10 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3*2] = {
/* 59 /1 */ { 0, BX_IA_ERROR },
/* 5A /1 */ { 0, BX_IA_ERROR },
/* 5B /1 */ { 0, BX_IA_ERROR },
/* 5C /1 */ { 0, BX_IA_ERROR },
/* 5D /1 */ { 0, BX_IA_ERROR },
/* 5E /1 */ { 0, BX_IA_ERROR },
/* 5F /1 */ { 0, BX_IA_ERROR },
/* 5C /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a5c },
/* 5D /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a5d },
/* 5E /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a5e },
/* 5F /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a5f },
/* 60 /1 */ { 0, BX_IA_ERROR },
/* 61 /1 */ { 0, BX_IA_ERROR },
/* 62 /1 */ { 0, BX_IA_ERROR },
@ -2131,14 +2233,14 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3*2] = {
/* 65 /1 */ { 0, BX_IA_ERROR },
/* 66 /1 */ { 0, BX_IA_ERROR },
/* 67 /1 */ { 0, BX_IA_ERROR },
/* 68 /1 */ { 0, BX_IA_ERROR },
/* 69 /1 */ { 0, BX_IA_ERROR },
/* 6A /1 */ { 0, BX_IA_ERROR },
/* 6B /1 */ { 0, BX_IA_ERROR },
/* 6C /1 */ { 0, BX_IA_ERROR },
/* 6D /1 */ { 0, BX_IA_ERROR },
/* 6E /1 */ { 0, BX_IA_ERROR },
/* 6F /1 */ { 0, BX_IA_ERROR },
/* 68 /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a68 },
/* 69 /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a69 },
/* 6A /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a6a },
/* 6B /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a6b },
/* 6C /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a6c },
/* 6D /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a6d },
/* 6E /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a6e },
/* 6F /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a6f },
/* 70 /1 */ { 0, BX_IA_ERROR },
/* 71 /1 */ { 0, BX_IA_ERROR },
/* 72 /1 */ { 0, BX_IA_ERROR },
@ -2147,14 +2249,14 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3*2] = {
/* 75 /1 */ { 0, BX_IA_ERROR },
/* 76 /1 */ { 0, BX_IA_ERROR },
/* 77 /1 */ { 0, BX_IA_ERROR },
/* 78 /1 */ { 0, BX_IA_ERROR },
/* 79 /1 */ { 0, BX_IA_ERROR },
/* 7A /1 */ { 0, BX_IA_ERROR },
/* 7B /1 */ { 0, BX_IA_ERROR },
/* 7C /1 */ { 0, BX_IA_ERROR },
/* 7D /1 */ { 0, BX_IA_ERROR },
/* 7E /1 */ { 0, BX_IA_ERROR },
/* 7F /1 */ { 0, BX_IA_ERROR },
/* 78 /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a78 },
/* 79 /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a79 },
/* 7A /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a7a },
/* 7B /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a7b },
/* 7C /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a7c },
/* 7D /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a7d },
/* 7E /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a7e },
/* 7F /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a7f },
/* 80 /1 */ { 0, BX_IA_ERROR },
/* 81 /1 */ { 0, BX_IA_ERROR },
/* 82 /1 */ { 0, BX_IA_ERROR },

View File

@ -818,6 +818,16 @@ void bx_generic_cpuid_t::init_isa_extensions_bitmask(void)
if (bmi_enabled >= 2)
features_bitmask |= BX_ISA_BMI2;
}
static bx_bool fma4_enabled = SIM->get_param_bool(BXPN_CPUID_FMA4)->get();
if (fma4_enabled) {
if (! avx_enabled) {
BX_PANIC(("PANIC: FMA4 emulation requires AVX support !"));
return;
}
features_bitmask |= BX_ISA_FMA4;
}
#endif // BX_SUPPORT_AVX
#endif // BX_SUPPORT_X86_64
@ -1297,6 +1307,9 @@ Bit32u bx_generic_cpuid_t::get_ext2_cpuid_features(void) const
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_SSE4A))
features |= BX_CPUID_EXT2_SSE4A;
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_FMA4))
features |= BX_CPUID_EXT2_FMA4;
return features;
}

View File

@ -1844,12 +1844,12 @@ bx_define_opcode(BX_IA_VFMADD231PD_VpdHpdWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C:
bx_define_opcode(BX_IA_VFMADD132PS_VpsHpsWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADD132PS_VpsHpsWpsR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADD213PS_VpsHpsWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADD213PS_VpsHpsWpsR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADD231PS_VpsHpsWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADD231PS_VpsHpsWpsR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADD132SD_VpdHsdWsd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADD132SD_VpdHsdWsdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADD213SD_VpdHsdWsd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADD213SD_VpdHsdWsdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADD231SD_VpdHsdWsd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADD231SD_VpdHsdWsdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADD132SS_VpsHssWss, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADD132SS_VpsHssWssR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADD213SS_VpsHssWss, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADD213SS_VpsHssWssR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADD231SS_VpsHssWss, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADD231SS_VpsHssWssR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADD132SD_VpdHsdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFMADD132SD_VpdHsdWsdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADD213SD_VpdHsdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFMADD213SD_VpdHsdWsdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADD231SD_VpdHsdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFMADD231SD_VpdHsdWsdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADD132SS_VpsHssWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFMADD132SS_VpsHssWssR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADD213SS_VpsHssWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFMADD213SS_VpsHssWssR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADD231SS_VpsHssWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFMADD231SS_VpsHssWssR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADDSUB132PD_VpdHpdWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADDSUB132PD_VpdHpdWpdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADDSUB213PD_VpdHpdWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADDSUB213PD_VpdHpdWpdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADDSUB231PD_VpdHpdWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADDSUB231PD_VpdHpdWpdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
@ -1868,36 +1868,36 @@ bx_define_opcode(BX_IA_VFMSUB231PD_VpdHpdWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C:
bx_define_opcode(BX_IA_VFMSUB132PS_VpsHpsWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMSUB132PS_VpsHpsWpsR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUB213PS_VpsHpsWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMSUB213PS_VpsHpsWpsR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUB231PS_VpsHpsWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMSUB231PS_VpsHpsWpsR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUB132SD_VpdHsdWsd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMSUB132SD_VpdHsdWsdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUB213SD_VpdHsdWsd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMSUB213SD_VpdHsdWsdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUB231SD_VpdHsdWsd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMSUB231SD_VpdHsdWsdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUB132SS_VpsHssWss, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMSUB132SS_VpsHssWssR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUB213SS_VpsHssWss, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMSUB213SS_VpsHssWssR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUB231SS_VpsHssWss, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMSUB231SS_VpsHssWssR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUB132SD_VpdHsdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFMSUB132SD_VpdHsdWsdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUB213SD_VpdHsdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFMSUB213SD_VpdHsdWsdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUB231SD_VpdHsdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFMSUB231SD_VpdHsdWsdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUB132SS_VpsHssWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFMSUB132SS_VpsHssWssR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUB213SS_VpsHssWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFMSUB213SS_VpsHssWssR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUB231SS_VpsHssWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFMSUB231SS_VpsHssWssR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADD132PD_VpdHpdWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMADD132PD_VpdHpdWpdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADD213PD_VpdHpdWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMADD213PD_VpdHpdWpdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADD231PD_VpdHpdWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMADD231PD_VpdHpdWpdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADD132PS_VpsHpsWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMADD132PS_VpsHpsWpsR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADD213PS_VpsHpsWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMADD213PS_VpsHpsWpsR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADD231PS_VpsHpsWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMADD231PS_VpsHpsWpsR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADD132SD_VpdHsdWsd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMADD132SD_VpdHsdWsdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADD213SD_VpdHsdWsd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMADD213SD_VpdHsdWsdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADD231SD_VpdHsdWsd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMADD231SD_VpdHsdWsdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADD132SS_VpsHssWss, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMADD132SS_VpsHssWssR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADD213SS_VpsHssWss, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMADD213SS_VpsHssWssR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADD231SS_VpsHssWss, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMADD231SS_VpsHssWssR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADD132SD_VpdHsdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFNMADD132SD_VpdHsdWsdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADD213SD_VpdHsdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFNMADD213SD_VpdHsdWsdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADD231SD_VpdHsdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFNMADD231SD_VpdHsdWsdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADD132SS_VpsHssWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFNMADD132SS_VpsHssWssR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADD213SS_VpsHssWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFNMADD213SS_VpsHssWssR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADD231SS_VpsHssWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFNMADD231SS_VpsHssWssR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUB132PD_VpdHpdWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMSUB132PD_VpdHpdWpdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUB213PD_VpdHpdWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMSUB213PD_VpdHpdWpdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUB231PD_VpdHpdWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMSUB231PD_VpdHpdWpdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUB132PS_VpsHpsWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMSUB132PS_VpsHpsWpsR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUB213PS_VpsHpsWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMSUB213PS_VpsHpsWpsR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUB231PS_VpsHpsWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMSUB231PS_VpsHpsWpsR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUB132SD_VpdHsdWsd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMSUB132SD_VpdHsdWsdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUB213SD_VpdHsdWsd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMSUB213SD_VpdHsdWsdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUB231SD_VpdHsdWsd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMSUB231SD_VpdHsdWsdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUB132SS_VpsHssWss, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMSUB132SS_VpsHssWssR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUB213SS_VpsHssWss, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMSUB213SS_VpsHssWssR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUB231SS_VpsHssWss, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMSUB231SS_VpsHssWssR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUB132SD_VpdHsdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFNMSUB132SD_VpdHsdWsdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUB213SD_VpdHsdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFNMSUB213SD_VpdHsdWsdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUB231SD_VpdHsdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFNMSUB231SD_VpdHsdWsdR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUB132SS_VpsHssWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFNMSUB132SS_VpsHssWssR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUB213SS_VpsHssWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFNMSUB213SS_VpsHssWssR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUB231SS_VpsHssWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFNMSUB231SS_VpsHssWssR, BX_ISA_AVX_FMA, BX_PREPARE_AVX)
// AVX2 FMA
// BMI1
@ -1933,6 +1933,49 @@ bx_define_opcode(BX_IA_BZHI_GqEqBq, &BX_CPU_C::LOAD_Eq, &BX_CPU_C::BZHI_GqEqBqR,
bx_define_opcode(BX_IA_PEXT_GqEqBq, &BX_CPU_C::LOAD_Eq, &BX_CPU_C::PEXT_GqEqBqR, BX_ISA_BMI2, 0)
bx_define_opcode(BX_IA_PDEP_GqEqBq, &BX_CPU_C::LOAD_Eq, &BX_CPU_C::PDEP_GqEqBqR, BX_ISA_BMI2, 0)
// BMI2
// FMA4 (AMD)
bx_define_opcode(BX_IA_VFMADDSUBPS_VpsHpsWpsVIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADDSUBPS_VpsHpsWpsVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADDSUBPS_VpsHpsVIbWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADDSUBPS_VpsHpsVIbWpsR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADDSUBPD_VpdHpdWpdVIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADDSUBPD_VpdHpdWpdVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADDSUBPD_VpdHpdVIbWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADDSUBPD_VpdHpdVIbWpdR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUBADDPS_VpsHpsWpsVIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMSUBADDPS_VpsHpsWpsVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUBADDPS_VpsHpsVIbWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMSUBADDPS_VpsHpsVIbWpsR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUBADDPD_VpdHpdWpdVIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMSUBADDPD_VpdHpdWpdVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUBADDPD_VpdHpdVIbWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMSUBADDPD_VpdHpdVIbWpdR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADDPS_VpsHpsWpsVIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADDPS_VpsHpsWpsVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADDPS_VpsHpsVIbWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADDPS_VpsHpsVIbWpsR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADDPD_VpdHpdWpdVIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADDPD_VpdHpdWpdVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADDPD_VpdHpdVIbWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADDPD_VpdHpdVIbWpdR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADDSS_VssHssWssVIb, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFMADDSS_VssHssWssVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADDSS_VssHssVIbWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFMADDSS_VssHssVIbWssR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADDSD_VsdHsdWsdVIb, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFMADDSD_VsdHsdWsdVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADDSD_VsdHsdVIbWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFMADDSD_VsdHsdVIbWsdR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUBPS_VpsHpsWpsVIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMSUBPS_VpsHpsWpsVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUBPS_VpsHpsVIbWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMSUBPS_VpsHpsVIbWpsR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUBPD_VpdHpdWpdVIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMSUBPD_VpdHpdWpdVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUBPD_VpdHpdVIbWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMSUBPD_VpdHpdVIbWpdR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUBSS_VssHssWssVIb, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFMSUBSS_VssHssWssVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUBSS_VssHssVIbWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFMSUBSS_VssHssVIbWssR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUBSD_VsdHsdWsdVIb, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFMSUBSD_VsdHsdWsdVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUBSD_VsdHsdVIbWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFMSUBSD_VsdHsdVIbWsdR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADDPS_VpsHpsWpsVIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMADDPS_VpsHpsWpsVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADDPS_VpsHpsVIbWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMADDPS_VpsHpsVIbWpsR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADDPD_VpdHpdWpdVIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMADDPD_VpdHpdWpdVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADDPD_VpdHpdVIbWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMADDPD_VpdHpdVIbWpdR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADDSS_VssHssWssVIb, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFNMADDSS_VssHssWssVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADDSS_VssHssVIbWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFNMADDSS_VssHssVIbWssR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADDSD_VsdHsdWsdVIb, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFNMADDSD_VsdHsdWsdVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADDSD_VsdHsdVIbWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFNMADDSD_VsdHsdVIbWsdR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUBPS_VpsHpsWpsVIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMSUBPS_VpsHpsWpsVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUBPS_VpsHpsVIbWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMSUBPS_VpsHpsVIbWpsR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUBPD_VpdHpdWpdVIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMSUBPD_VpdHpdWpdVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUBPD_VpdHpdVIbWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMSUBPD_VpdHpdVIbWpdR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUBSS_VssHssWssVIb, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFNMSUBSS_VssHssWssVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUBSS_VssHssVIbWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFNMSUBSS_VssHssVIbWssR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUBSD_VsdHsdWsdVIb, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFNMSUBSD_VsdHsdWsdVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUBSD_VsdHsdVIbWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFNMSUBSD_VsdHsdVIbWsdR, BX_ISA_FMA4, BX_PREPARE_AVX)
// FMA4 (AMD)
#endif /* BX_SUPPORT_AVX */
bx_define_opcode(BX_IA_TZCNT_GwEw, &BX_CPU_C::LOAD_Ew, &BX_CPU_C::TZCNT_GwEwR, BX_ISA_BMI1, 0)

View File

@ -200,7 +200,7 @@ BX_CPP_INLINE void fmaddsubpd(BxPackedXmmRegister *op1, const BxPackedXmmRegiste
op1->xmm64u(1) = float64_muladd(op1->xmm64u(1), op2->xmm64u(1), op3->xmm64u(1), 0, status);
}
BX_CPP_INLINE void fmasubaddps(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, const BxPackedXmmRegister *op3, float_status_t &status)
BX_CPP_INLINE void fmsubaddps(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, const BxPackedXmmRegister *op3, float_status_t &status)
{
op1->xmm32u(0) = float32_muladd(op1->xmm32u(0), op2->xmm32u(0), op3->xmm32u(0), 0, status);
op1->xmm32u(1) = float32_muladd(op1->xmm32u(1), op2->xmm32u(1), op3->xmm32u(1), float_muladd_negate_c, status);
@ -208,7 +208,7 @@ BX_CPP_INLINE void fmasubaddps(BxPackedXmmRegister *op1, const BxPackedXmmRegist
op1->xmm32u(3) = float32_muladd(op1->xmm32u(3), op2->xmm32u(3), op3->xmm32u(3), float_muladd_negate_c, status);
}
BX_CPP_INLINE void fmasubaddpd(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, const BxPackedXmmRegister *op3, float_status_t &status)
BX_CPP_INLINE void fmsubaddpd(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, const BxPackedXmmRegister *op3, float_status_t &status)
{
op1->xmm64u(0) = float64_muladd(op1->xmm64u(0), op2->xmm64u(0), op3->xmm64u(0), 0, status);
op1->xmm64u(1) = float64_muladd(op1->xmm64u(1), op2->xmm64u(1), op3->xmm64u(1), float_muladd_negate_c, status);

View File

@ -3081,6 +3081,11 @@ This option exists only if Bochs compiled with --enable-avx option.
Select BMI1/BMI2 instructions support.
This option exists only if Bochs compiled with --enable-avx option.
</para>
<para><command>fma4</command></para>
<para>
Select AMD four operand FMA instructions support.
This option exists only if Bochs compiled with --enable-avx option.
</para>
<para><command>x86_64</command></para>
<para>
Enable x86-64 and long mode support.

View File

@ -258,6 +258,11 @@ bmi:
Select BMI1/BMI2 instructions support.
This option exists only if Bochs compiled with --enable-avx option.
fma4:
Select AMD four operand FMA instructions support.
This option exists only if Bochs compiled with --enable-avx option.
x86_64:
Enable x85-64 and long mode support.

View File

@ -242,6 +242,26 @@ float32 float32_div(float32, float32, float_status_t &status);
float32 float32_sqrt(float32, float_status_t &status);
float32 float32_muladd(float32, float32, float32, int flags, float_status_t &status);
BX_CPP_INLINE float32 float32_fmadd(float32 a, float32 b, float32 c, float_status_t &status)
{
return float32_muladd(a, b, c, 0, status);
}
BX_CPP_INLINE float32 float32_fmsub(float32 a, float32 b, float32 c, float_status_t &status)
{
return float32_muladd(a, b, c, float_muladd_negate_c, status);
}
BX_CPP_INLINE float32 float32_fnmadd(float32 a, float32 b, float32 c, float_status_t &status)
{
return float32_muladd(a, b, c, float_muladd_negate_product, status);
}
BX_CPP_INLINE float32 float32_fnmsub(float32 a, float32 b, float32 c, float_status_t &status)
{
return float32_muladd(a, b, c, float_muladd_negate_result, status);
}
int float32_compare(float32, float32, float_status_t &status);
int float32_compare_quiet(float32, float32, float_status_t &status);
@ -273,6 +293,26 @@ float64 float64_div(float64, float64, float_status_t &status);
float64 float64_sqrt(float64, float_status_t &status);
float64 float64_muladd(float64, float64, float64, int flags, float_status_t &status);
BX_CPP_INLINE float64 float64_fmadd(float64 a, float64 b, float64 c, float_status_t &status)
{
return float64_muladd(a, b, c, 0, status);
}
BX_CPP_INLINE float64 float64_fmsub(float64 a, float64 b, float64 c, float_status_t &status)
{
return float64_muladd(a, b, c, float_muladd_negate_c, status);
}
BX_CPP_INLINE float64 float64_fnmadd(float64 a, float64 b, float64 c, float_status_t &status)
{
return float64_muladd(a, b, c, float_muladd_negate_product, status);
}
BX_CPP_INLINE float64 float64_fnmsub(float64 a, float64 b, float64 c, float_status_t &status)
{
return float64_muladd(a, b, c, float_muladd_negate_result, status);
}
int float64_compare(float64, float64, float_status_t &status);
int float64_compare_quiet(float64, float64, float_status_t &status);

View File

@ -59,6 +59,7 @@
#define BXPN_CPUID_AVX_F16CVT "cpuid.avx_f16c"
#define BXPN_CPUID_AVX_FMA "cpuid.avx_fma"
#define BXPN_CPUID_BMI "cpuid.bmi"
#define BXPN_CPUID_FMA4 "cpuid.fma4"
#define BXPN_CPUID_APIC "cpuid.apic"
#define BXPN_CPUID_MWAIT "cpuid.mwait"
#define BXPN_CPUID_MWAIT_IS_NOP "cpuid.mwait_is_nop"