Implemented AMDs Buldozer XOP and TBM extensions.

XOP: few instructions are still missing, coming soon

  BX_PANIC(("VPERMILPS_VpsHpsWpsVIbR: not implemented yet"));
  BX_PANIC(("VPERMILPD_VpdHpdWpdVIbR: not implemented yet"));
  BX_PANIC(("VPMADCSSWD_VdqHdqWdqVIbR: not implemented yet"));
  BX_PANIC(("VPMADCSWD_VdqHdqWdqVIbR: not implemented yet"));
  BX_PANIC(("VFRCZPS_VpsWpsR: not implemented yet"));
  BX_PANIC(("VFRCZPD_VpdWpdR: not implemented yet"));
  BX_PANIC(("VFRCZSS_VssWssR: not implemented yet"));
  BX_PANIC(("VFRCZSD_VsdWsdR: not implemented yet"));
This commit is contained in:
Stanislav Shwartsman 2011-10-19 20:54:04 +00:00
parent aa03bf7675
commit 5cc04b9955
37 changed files with 4054 additions and 467 deletions

View File

@ -219,10 +219,18 @@ cpu: cpuid_limit_winnt=0
# Select BMI1/BMI2 instructions support.
# This option exists only if Bochs compiled with --enable-avx option.
#
# XOP:
# Select AMD XOP instructions support.
# This option exists only if Bochs compiled with --enable-avx option.
#
# FMA4:
# Select AMD four operand FMA instructions support.
# This option exists only if Bochs compiled with --enable-avx option.
#
# TBM:
# Select AMD Trailing Bit Manipulation (TBM) instructions support.
# This option exists only if Bochs compiled with --enable-avx option.
#
# X86-64:
# Enable x86-64 and long mode support.
# This option exists only if Bochs compiled with x86-64 support.

View File

@ -8,9 +8,9 @@ Brief summary :
- 10% (ST) to 50% (SMP) CPU emulation speedup !
- Implemented support for new x86 ISA extensions, Bochs is aligned with
latest published Intel Archicture Manual (rev 039, AVX rev 011):
- XSAVEOPT, AVX/AVX2/FMA/F16C, BMI1/BMI2, SMEP, SSE4A (AMD), INVPCID
- XSAVEOPT, AVX/AVX2/FMA/F16C, BMI1/BMI2, SMEP, INVPCID
- VMX: VMX Preemption Timer and Pause Loop Exiting VMEXIT controls
- TODO: XOP (AMD), SVM (AMD)
- Implemented support for AMD SSE4A/XOP/FMA4/TBM instruction sets
- Networking: introduced new networking module 'slirp'
- Harddrive: fixed buffer overflow causing Bochs crash in LBA48 mode
- VGA: Added PCI ROM support to cirrus and pcivga and moved ROM loading
@ -55,12 +55,10 @@ Detailed change log :
- Added support for AVX2 FMA instructions emulation. The implementation
was ported (with few bugfixes) from QEMU patch by Peter Maydell.
The FMA instructions support can be enabled using .bochsrc CPUID option.
- Added support for Bit Manipulation Instructions (BMI) emulation. The BMI
instructions support can be enabled using .bochsrc CPUID option.
- Added support for AMD SSE4A emulation, the instructions can be enabled
using .bochsrc CPUID option.
- Added support for AMD FMA4 emulation, the instructions can be enabled
using .bochsrc CPUID option.
- Added support for Bit Manipulation Instructions (BMI1/BMI2) emulation.
The BMI instructions support can be enabled using .bochsrc CPUID option.
- Added support for AMD SSE4A/XOP/FMA4/TBM extensions emulation, the
instructions can be enabled using .bochsrc CPUID option.
- Implemented VMX preemption timer VMEXIT control (patch by Jianan Hao)
- Implemented Pause-Loop Exiting Secondary VMEXIT control.
- Added INVPCID instruction emulation support.

View File

@ -45,6 +45,8 @@ cpuid
avx_f16c
avx_fma
bmi
xop
tbm
fma4
apic
x86_64

View File

@ -290,7 +290,7 @@ void bx_init_options()
// cpuid subtree
#if BX_CPU_LEVEL >= 4
bx_list_c *cpuid_param = new bx_list_c(root_param, "cpuid", "CPUID Options", 26);
bx_list_c *cpuid_param = new bx_list_c(root_param, "cpuid", "CPUID Options", 28);
new bx_param_string_c(cpuid_param,
"vendor_string",
@ -401,10 +401,18 @@ void bx_init_options()
"Support for Bit Manipulation Instructions (BMI)",
0, 2,
0);
new bx_param_bool_c(cpuid_param,
"xop", "Support for AMD XOP instructions",
"Support for AMD XOP instructions",
0);
new bx_param_bool_c(cpuid_param,
"fma4", "Support for AMD four operand FMA instructions",
"Support for AMD FMA4 instructions",
0);
new bx_param_bool_c(cpuid_param,
"tbm", "Support for AMD TBM instructions",
"Support for AMD Trailing Bit Manipulation (TBM) instructions",
0);
#endif
#if BX_SUPPORT_X86_64
new bx_param_bool_c(cpuid_param,
@ -2719,6 +2727,14 @@ static int parse_line_formatted(const char *context, int num_params, char *param
}
} else if (!strncmp(params[i], "bmi=", 4)) {
SIM->get_param_num(BXPN_CPUID_BMI)->set(atol(&params[i][4]));
} else if (!strncmp(params[i], "xop=", 4)) {
if (parse_param_bool(params[i], 4, BXPN_CPUID_XOP) < 0) {
PARSE_ERR(("%s: cpuid directive malformed.", context));
}
} else if (!strncmp(params[i], "tbm=", 4)) {
if (parse_param_bool(params[i], 4, BXPN_CPUID_TBM) < 0) {
PARSE_ERR(("%s: cpuid directive malformed.", context));
}
} else if (!strncmp(params[i], "fma4=", 5)) {
if (parse_param_bool(params[i], 5, BXPN_CPUID_FMA4) < 0) {
PARSE_ERR(("%s: cpuid directive malformed.", context));
@ -4017,11 +4033,13 @@ int bx_write_configuration(const char *rc, int overwrite)
SIM->get_param_bool(BXPN_CPUID_MOVBE)->get(),
SIM->get_param_bool(BXPN_CPUID_SMEP)->get());
#if BX_SUPPORT_AVX
fprintf(fp, ", avx=%d, avx_f16c=%d, avx_fma=%d, bmi=%d, fma4=%d",
fprintf(fp, ", avx=%d, avx_f16c=%d, avx_fma=%d, bmi=%d, xop=%d, tbm=%d, fma4=%d",
SIM->get_param_num(BXPN_CPUID_AVX)->get(),
SIM->get_param_bool(BXPN_CPUID_AVX_F16CVT)->get(),
SIM->get_param_bool(BXPN_CPUID_AVX_FMA)->get(),
SIM->get_param_num(BXPN_CPUID_BMI)->get(),
SIM->get_param_bool(BXPN_CPUID_XOP)->get(),
SIM->get_param_bool(BXPN_CPUID_TBM)->get(),
SIM->get_param_bool(BXPN_CPUID_FMA4)->get());
#endif
#if BX_SUPPORT_X86_64

View File

@ -128,7 +128,10 @@ OBJS64 = \
avx2.o \
gather.o \
bmi32.o \
bmi64.o
bmi64.o \
tbm32.o \
tbm64.o \
xop.o
BX_INCLUDES = ../bochs.h ../config.h
@ -243,7 +246,7 @@ avx2.o: avx2.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h \
../gui/gui.h ../instrument/stubs/instrument.h cpu.h cpuid.h crregs.h \
descriptor.h instr.h ia_opcodes.h lazy_flags.h icache.h apic.h i387.h \
../fpu/softfloat.h ../fpu/tag_w.h ../fpu/status_w.h ../fpu/control_w.h \
xmm.h vmx.h stack.h simd_int.h
xmm.h vmx.h stack.h simd_int.h simd_compare.h
gather.o: gather.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h \
../bx_debug/debug.h ../config.h ../osdep.h ../gui/siminterface.h \
../cpudb.h ../gui/paramtree.h ../memory/memory.h ../pc_system.h \
@ -251,6 +254,13 @@ gather.o: gather.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h \
descriptor.h instr.h ia_opcodes.h lazy_flags.h icache.h apic.h i387.h \
../fpu/softfloat.h ../fpu/tag_w.h ../fpu/status_w.h ../fpu/control_w.h \
xmm.h vmx.h stack.h
xop.o: xop.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h \
../bx_debug/debug.h ../config.h ../osdep.h ../gui/siminterface.h \
../cpudb.h ../gui/paramtree.h ../memory/memory.h ../pc_system.h \
../gui/gui.h ../instrument/stubs/instrument.h cpu.h cpuid.h crregs.h \
descriptor.h instr.h ia_opcodes.h lazy_flags.h icache.h apic.h i387.h \
../fpu/softfloat.h ../fpu/tag_w.h ../fpu/status_w.h ../fpu/control_w.h \
xmm.h vmx.h stack.h simd_int.h simd_compare.h
avx_pfp.o: avx_pfp.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h \
../bx_debug/debug.h ../config.h ../osdep.h ../gui/siminterface.h \
../cpudb.h ../gui/paramtree.h ../memory/memory.h ../pc_system.h \
@ -316,6 +326,20 @@ bmi64.o: bmi64.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h ../bx_debug/debug.
instr.h ia_opcodes.h lazy_flags.h icache.h apic.h i387.h \
../fpu/softfloat.h ../fpu/tag_w.h ../fpu/status_w.h ../fpu/control_w.h \
xmm.h vmx.h stack.h
tbm32.o: tbm32.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h ../bx_debug/debug.h \
../config.h ../osdep.h ../gui/siminterface.h ../cpudb.h \
../gui/paramtree.h ../memory/memory.h ../pc_system.h ../gui/gui.h \
../instrument/stubs/instrument.h cpu.h cpuid.h crregs.h descriptor.h \
instr.h ia_opcodes.h lazy_flags.h icache.h apic.h i387.h \
../fpu/softfloat.h ../fpu/tag_w.h ../fpu/status_w.h ../fpu/control_w.h \
xmm.h vmx.h stack.h
tbm64.o: tbm64.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h ../bx_debug/debug.h \
../config.h ../osdep.h ../gui/siminterface.h ../cpudb.h \
../gui/paramtree.h ../memory/memory.h ../pc_system.h ../gui/gui.h \
../instrument/stubs/instrument.h cpu.h cpuid.h crregs.h descriptor.h \
instr.h ia_opcodes.h lazy_flags.h icache.h apic.h i387.h \
../fpu/softfloat.h ../fpu/tag_w.h ../fpu/status_w.h ../fpu/control_w.h \
xmm.h vmx.h stack.h
call_far.o: call_far.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h \
../bx_debug/debug.h ../config.h ../osdep.h ../gui/siminterface.h \
../cpudb.h ../gui/paramtree.h ../memory/memory.h ../pc_system.h \
@ -423,7 +447,7 @@ fetchdecode.o: fetchdecode.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h \
descriptor.h instr.h ia_opcodes.h lazy_flags.h icache.h apic.h i387.h \
../fpu/softfloat.h ../fpu/tag_w.h ../fpu/status_w.h ../fpu/control_w.h \
xmm.h vmx.h stack.h fetchdecode.h fetchdecode_x87.h fetchdecode_sse.h \
fetchdecode_avx.h
fetchdecode_avx.h fetchdecode_xop.h
fetchdecode64.o: fetchdecode64.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h \
../bx_debug/debug.h ../config.h ../osdep.h ../gui/siminterface.h \
../cpudb.h ../gui/paramtree.h ../memory/memory.h ../pc_system.h \
@ -431,7 +455,7 @@ fetchdecode64.o: fetchdecode64.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h \
descriptor.h instr.h ia_opcodes.h lazy_flags.h icache.h apic.h i387.h \
../fpu/softfloat.h ../fpu/tag_w.h ../fpu/status_w.h ../fpu/control_w.h \
xmm.h vmx.h stack.h fetchdecode.h fetchdecode_x87.h fetchdecode_sse.h \
fetchdecode_avx.h
fetchdecode_avx.h fetchdecode_xop.h
flag_ctrl.o: flag_ctrl.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h \
../bx_debug/debug.h ../config.h ../osdep.h ../gui/siminterface.h \
../cpudb.h ../gui/paramtree.h ../memory/memory.h ../pc_system.h \
@ -670,7 +694,7 @@ sse.o: sse.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h ../bx_debug/debug.h \
../instrument/stubs/instrument.h cpu.h cpuid.h crregs.h descriptor.h \
instr.h ia_opcodes.h lazy_flags.h icache.h apic.h i387.h \
../fpu/softfloat.h ../fpu/tag_w.h ../fpu/status_w.h ../fpu/control_w.h \
xmm.h vmx.h stack.h simd_int.h
xmm.h vmx.h stack.h simd_int.h simd_compare.h
sse_move.o: sse_move.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h \
../bx_debug/debug.h ../config.h ../osdep.h ../gui/siminterface.h \
../cpudb.h ../gui/paramtree.h ../memory/memory.h ../pc_system.h \

View File

@ -26,7 +26,7 @@
#include "cpu.h"
#define LOG_THIS BX_CPU_THIS_PTR
#if BX_SUPPORT_AVX && BX_CPU_LEVEL >= 6
#if BX_SUPPORT_AVX
#include "simd_int.h"
@ -696,4 +696,4 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMASKMOVPD_MpdHpdVpd(bxInstruction
BX_NEXT_INSTR(i);
}
#endif // BX_SUPPORT_AVX && BX_CPU_LEVEL >= 6
#endif // BX_SUPPORT_AVX

View File

@ -26,9 +26,10 @@
#include "cpu.h"
#define LOG_THIS BX_CPU_THIS_PTR
#if BX_SUPPORT_AVX && BX_CPU_LEVEL >= 6
#if BX_SUPPORT_AVX
#include "simd_int.h"
#include "simd_compare.h"
#define AVX_2OP(HANDLER, func) \
/* AVX instruction with two src operands */ \

View File

@ -26,7 +26,7 @@
#include "cpu.h"
#define LOG_THIS BX_CPU_THIS_PTR
#if BX_SUPPORT_AVX && BX_CPU_LEVEL >= 6
#if BX_SUPPORT_AVX
extern void mxcsr_to_softfloat_status_word(float_status_t &status, bx_mxcsr_t mxcsr);
@ -1188,12 +1188,18 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFNMSUB231SS_VpsHssWssR(bxInstruct
// FMA4 (AMD) Instructions //
/////////////////////////////
#define FMA4_OP_VECTOR(HANDLER, func, src2, src3) \
#define FMA4_OP_VECTOR(HANDLER, func) \
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
{ \
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->vvv()); \
BxPackedAvxRegister op2 = BX_READ_AVX_REG(src2); \
BxPackedAvxRegister op3 = BX_READ_AVX_REG(src3); \
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->vvv()), op2, op3; \
if (i->getVexW()) { \
op2 = BX_READ_AVX_REG(i->rm()); \
op3 = BX_READ_AVX_REG(i->Ib()); \
} \
else { \
op2 = BX_READ_AVX_REG(i->Ib()); \
op3 = BX_READ_AVX_REG(i->rm()); \
} \
unsigned len = i->getVL(); \
\
float_status_t status; \
@ -1209,42 +1215,36 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFNMSUB231SS_VpsHssWssR(bxInstruct
BX_NEXT_INSTR(i); \
}
FMA4_OP_VECTOR(VFMADDSUBPS_VpsHpsWpsVIbR, fmaddsubps, i->rm(), i->Ib())
FMA4_OP_VECTOR(VFMADDSUBPS_VpsHpsVIbWpsR, fmaddsubps, i->Ib(), i->rm())
FMA4_OP_VECTOR(VFMADDSUBPD_VpdHpdWpdVIbR, fmaddsubpd, i->rm(), i->Ib())
FMA4_OP_VECTOR(VFMADDSUBPD_VpdHpdVIbWpdR, fmaddsubpd, i->Ib(), i->rm())
FMA4_OP_VECTOR(VFMADDSUBPS_VpsHpsWpsVIbR, fmaddsubps)
FMA4_OP_VECTOR(VFMADDSUBPD_VpdHpdWpdVIbR, fmaddsubpd)
FMA4_OP_VECTOR(VFMSUBADDPS_VpsHpsWpsVIbR, fmsubaddps, i->rm(), i->Ib())
FMA4_OP_VECTOR(VFMSUBADDPS_VpsHpsVIbWpsR, fmsubaddps, i->Ib(), i->rm())
FMA4_OP_VECTOR(VFMSUBADDPD_VpdHpdWpdVIbR, fmsubaddpd, i->rm(), i->Ib())
FMA4_OP_VECTOR(VFMSUBADDPD_VpdHpdVIbWpdR, fmsubaddpd, i->Ib(), i->rm())
FMA4_OP_VECTOR(VFMSUBADDPS_VpsHpsWpsVIbR, fmsubaddps)
FMA4_OP_VECTOR(VFMSUBADDPD_VpdHpdWpdVIbR, fmsubaddpd)
FMA4_OP_VECTOR(VFMADDPS_VpsHpsWpsVIbR, fmaddps, i->rm(), i->Ib())
FMA4_OP_VECTOR(VFMADDPS_VpsHpsVIbWpsR, fmaddps, i->Ib(), i->rm())
FMA4_OP_VECTOR(VFMADDPD_VpdHpdWpdVIbR, fmaddpd, i->rm(), i->Ib())
FMA4_OP_VECTOR(VFMADDPD_VpdHpdVIbWpdR, fmaddpd, i->Ib(), i->rm())
FMA4_OP_VECTOR(VFMADDPS_VpsHpsWpsVIbR, fmaddps)
FMA4_OP_VECTOR(VFMADDPD_VpdHpdWpdVIbR, fmaddpd)
FMA4_OP_VECTOR(VFMSUBPS_VpsHpsWpsVIbR, fmsubps, i->rm(), i->Ib())
FMA4_OP_VECTOR(VFMSUBPS_VpsHpsVIbWpsR, fmsubps, i->Ib(), i->rm())
FMA4_OP_VECTOR(VFMSUBPD_VpdHpdWpdVIbR, fmsubpd, i->rm(), i->Ib())
FMA4_OP_VECTOR(VFMSUBPD_VpdHpdVIbWpdR, fmsubpd, i->Ib(), i->rm())
FMA4_OP_VECTOR(VFMSUBPS_VpsHpsWpsVIbR, fmsubps)
FMA4_OP_VECTOR(VFMSUBPD_VpdHpdWpdVIbR, fmsubpd)
FMA4_OP_VECTOR(VFNMADDPS_VpsHpsWpsVIbR, fnmaddps, i->rm(), i->Ib())
FMA4_OP_VECTOR(VFNMADDPS_VpsHpsVIbWpsR, fnmaddps, i->Ib(), i->rm())
FMA4_OP_VECTOR(VFNMADDPD_VpdHpdWpdVIbR, fnmaddpd, i->rm(), i->Ib())
FMA4_OP_VECTOR(VFNMADDPD_VpdHpdVIbWpdR, fnmaddpd, i->Ib(), i->rm())
FMA4_OP_VECTOR(VFNMADDPS_VpsHpsWpsVIbR, fnmaddps)
FMA4_OP_VECTOR(VFNMADDPD_VpdHpdWpdVIbR, fnmaddpd)
FMA4_OP_VECTOR(VFNMSUBPS_VpsHpsWpsVIbR, fnmsubps, i->rm(), i->Ib())
FMA4_OP_VECTOR(VFNMSUBPS_VpsHpsVIbWpsR, fnmsubps, i->Ib(), i->rm())
FMA4_OP_VECTOR(VFNMSUBPD_VpdHpdWpdVIbR, fnmsubpd, i->rm(), i->Ib())
FMA4_OP_VECTOR(VFNMSUBPD_VpdHpdVIbWpdR, fnmsubpd, i->Ib(), i->rm())
FMA4_OP_VECTOR(VFNMSUBPS_VpsHpsWpsVIbR, fnmsubps)
FMA4_OP_VECTOR(VFNMSUBPD_VpdHpdWpdVIbR, fnmsubpd)
#define FMA4_SINGLE_SCALAR(HANDLER, func, src2, src3) \
#define FMA4_SINGLE_SCALAR(HANDLER, func) \
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
{ \
float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->vvv()); \
float32 op2 = BX_READ_XMM_REG_LO_DWORD(src2); \
float32 op3 = BX_READ_XMM_REG_LO_DWORD(src3); \
float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->vvv()), op2, op3; \
if (i->getVexW()) { \
op2 = BX_READ_XMM_REG_LO_DWORD(i->rm()); \
op3 = BX_READ_XMM_REG_LO_DWORD(i->Ib()); \
} \
else { \
op2 = BX_READ_XMM_REG_LO_DWORD(i->Ib()); \
op3 = BX_READ_XMM_REG_LO_DWORD(i->rm()); \
} \
\
BxPackedXmmRegister dest; \
dest.xmm64u(0) = dest.xmm64u(1) = 0; \
@ -1259,22 +1259,24 @@ FMA4_OP_VECTOR(VFNMSUBPD_VpdHpdVIbWpdR, fnmsubpd, i->Ib(), i->rm())
BX_NEXT_INSTR(i); \
}
FMA4_SINGLE_SCALAR(VFMADDSS_VssHssWssVIbR, float32_fmadd, i->rm(), i->Ib())
FMA4_SINGLE_SCALAR(VFMADDSS_VssHssVIbWssR, float32_fmadd, i->Ib(), i->rm())
FMA4_SINGLE_SCALAR(VFMSUBSS_VssHssWssVIbR, float32_fmsub, i->rm(), i->Ib())
FMA4_SINGLE_SCALAR(VFMSUBSS_VssHssVIbWssR, float32_fmsub, i->Ib(), i->rm())
FMA4_SINGLE_SCALAR(VFMADDSS_VssHssWssVIbR, float32_fmadd)
FMA4_SINGLE_SCALAR(VFMSUBSS_VssHssWssVIbR, float32_fmsub)
FMA4_SINGLE_SCALAR(VFNMADDSS_VssHssWssVIbR, float32_fnmadd, i->rm(), i->Ib())
FMA4_SINGLE_SCALAR(VFNMADDSS_VssHssVIbWssR, float32_fnmadd, i->Ib(), i->rm())
FMA4_SINGLE_SCALAR(VFNMSUBSS_VssHssWssVIbR, float32_fnmsub, i->rm(), i->Ib())
FMA4_SINGLE_SCALAR(VFNMSUBSS_VssHssVIbWssR, float32_fnmsub, i->Ib(), i->rm())
FMA4_SINGLE_SCALAR(VFNMADDSS_VssHssWssVIbR, float32_fnmadd)
FMA4_SINGLE_SCALAR(VFNMSUBSS_VssHssWssVIbR, float32_fnmsub)
#define FMA4_DOUBLE_SCALAR(HANDLER, func, src2, src3) \
#define FMA4_DOUBLE_SCALAR(HANDLER, func) \
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
{ \
float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->vvv()); \
float64 op2 = BX_READ_XMM_REG_LO_QWORD(src2); \
float64 op3 = BX_READ_XMM_REG_LO_QWORD(src3); \
float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->vvv()), op2, op3; \
if (i->getVexW()) { \
op2 = BX_READ_XMM_REG_LO_QWORD(i->rm()); \
op3 = BX_READ_XMM_REG_LO_QWORD(i->Ib()); \
} \
else { \
op2 = BX_READ_XMM_REG_LO_QWORD(i->Ib()); \
op3 = BX_READ_XMM_REG_LO_QWORD(i->rm()); \
} \
BxPackedXmmRegister dest; \
\
float_status_t status; \
@ -1290,14 +1292,10 @@ FMA4_SINGLE_SCALAR(VFNMSUBSS_VssHssVIbWssR, float32_fnmsub, i->Ib(), i->rm())
BX_NEXT_INSTR(i); \
}
FMA4_DOUBLE_SCALAR(VFMADDSD_VsdHsdWsdVIbR, float64_fmadd, i->rm(), i->Ib())
FMA4_DOUBLE_SCALAR(VFMADDSD_VsdHsdVIbWsdR, float64_fmadd, i->Ib(), i->rm())
FMA4_DOUBLE_SCALAR(VFMSUBSD_VsdHsdWsdVIbR, float64_fmsub, i->rm(), i->Ib())
FMA4_DOUBLE_SCALAR(VFMSUBSD_VsdHsdVIbWsdR, float64_fmsub, i->Ib(), i->rm())
FMA4_DOUBLE_SCALAR(VFMADDSD_VsdHsdWsdVIbR, float64_fmadd)
FMA4_DOUBLE_SCALAR(VFMSUBSD_VsdHsdWsdVIbR, float64_fmsub)
FMA4_DOUBLE_SCALAR(VFNMADDSD_VsdHsdWsdVIbR, float64_fnmadd, i->rm(), i->Ib())
FMA4_DOUBLE_SCALAR(VFNMADDSD_VsdHsdVIbWsdR, float64_fnmadd, i->Ib(), i->rm())
FMA4_DOUBLE_SCALAR(VFNMSUBSD_VsdHsdWsdVIbR, float64_fnmsub, i->rm(), i->Ib())
FMA4_DOUBLE_SCALAR(VFNMSUBSD_VsdHsdVIbWsdR, float64_fnmsub, i->Ib(), i->rm())
FMA4_DOUBLE_SCALAR(VFNMADDSD_VsdHsdWsdVIbR, float64_fnmadd)
FMA4_DOUBLE_SCALAR(VFNMSUBSD_VsdHsdWsdVIbR, float64_fnmsub)
#endif

View File

@ -26,7 +26,7 @@
#include "cpu.h"
#define LOG_THIS BX_CPU_THIS_PTR
#if BX_SUPPORT_AVX && BX_CPU_LEVEL >= 6
#if BX_SUPPORT_AVX
extern void mxcsr_to_softfloat_status_word(float_status_t &status, bx_mxcsr_t mxcsr);
@ -1392,4 +1392,4 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PH_WpsVpsIb(bxInstruction_c
BX_NEXT_INSTR(i);
}
#endif // BX_SUPPORT_AVX && BX_CPU_LEVEL >= 6
#endif // BX_SUPPORT_AVX

View File

@ -28,11 +28,6 @@
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::ANDN_GdBdEdR(bxInstruction_c *i)
{
if (!protected_mode()) {
BX_DEBUG(("ANDN_GdBdEd: not recognized in real or virtual-8086 mode"));
exception(BX_UD_EXCEPTION, 0);
}
Bit32u op1_32 = BX_READ_32BIT_REG(i->rm());
Bit32u op2_32 = BX_READ_32BIT_REG(i->vvv());
@ -47,11 +42,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::ANDN_GdBdEdR(bxInstruction_c *i)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::MULX_GdBdEdR(bxInstruction_c *i)
{
if (!protected_mode()) {
BX_DEBUG(("MULX_GdBdEdR: not recognized in real or virtual-8086 mode"));
exception(BX_UD_EXCEPTION, 0);
}
Bit32u op1_32 = EDX;
Bit32u op2_32 = BX_READ_32BIT_REG(i->rm());
Bit64u product_64 = ((Bit64u) op1_32) * ((Bit64u) op2_32);
@ -64,11 +54,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::MULX_GdBdEdR(bxInstruction_c *i)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BLSI_BdEdR(bxInstruction_c *i)
{
if (!protected_mode()) {
BX_DEBUG(("BLSI_BdEd: not recognized in real or virtual-8086 mode"));
exception(BX_UD_EXCEPTION, 0);
}
Bit32u op1_32 = BX_READ_32BIT_REG(i->rm());
bx_bool tmpCF = (op1_32 == 0);
@ -84,11 +69,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BLSI_BdEdR(bxInstruction_c *i)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BLSMSK_BdEdR(bxInstruction_c *i)
{
if (!protected_mode()) {
BX_DEBUG(("BLSMSK_BdEd: not recognized in real or virtual-8086 mode"));
exception(BX_UD_EXCEPTION, 0);
}
Bit32u op1_32 = BX_READ_32BIT_REG(i->rm());
bx_bool tmpCF = (op1_32 == 0);
@ -102,14 +82,8 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BLSMSK_BdEdR(bxInstruction_c *i)
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BLSR_BdEdR(bxInstruction_c *i)
{
if (!protected_mode()) {
BX_DEBUG(("BLSR_BdEd: not recognized in real or virtual-8086 mode"));
exception(BX_UD_EXCEPTION, 0);
}
Bit32u op1_32 = BX_READ_32BIT_REG(i->rm());
bx_bool tmpCF = (op1_32 == 0);
@ -125,11 +99,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BLSR_BdEdR(bxInstruction_c *i)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::RORX_GdEdIbR(bxInstruction_c *i)
{
if (!protected_mode()) {
BX_DEBUG(("RORX_GdEdIb: not recognized in real or virtual-8086 mode"));
exception(BX_UD_EXCEPTION, 0);
}
Bit32u op1_32 = BX_READ_32BIT_REG(i->rm());
unsigned count = i->Ib() & 0x1f;
@ -144,11 +113,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::RORX_GdEdIbR(bxInstruction_c *i)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::SHRX_GdEdBdR(bxInstruction_c *i)
{
if (!protected_mode()) {
BX_DEBUG(("SHRX_GdEdBdR: not recognized in real or virtual-8086 mode"));
exception(BX_UD_EXCEPTION, 0);
}
Bit32u op1_32 = BX_READ_32BIT_REG(i->rm());
unsigned count = BX_READ_32BIT_REG(i->vvv()) & 0x1f;
@ -162,11 +126,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::SHRX_GdEdBdR(bxInstruction_c *i)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::SARX_GdEdBdR(bxInstruction_c *i)
{
if (!protected_mode()) {
BX_DEBUG(("SARX_GdEdBdR: not recognized in real or virtual-8086 mode"));
exception(BX_UD_EXCEPTION, 0);
}
Bit32u op1_32 = BX_READ_32BIT_REG(i->rm());
unsigned count = BX_READ_32BIT_REG(i->vvv()) & 0x1f;
@ -182,11 +141,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::SARX_GdEdBdR(bxInstruction_c *i)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::SHLX_GdEdBdR(bxInstruction_c *i)
{
if (!protected_mode()) {
BX_DEBUG(("SHLX_GdEdBdR: not recognized in real or virtual-8086 mode"));
exception(BX_UD_EXCEPTION, 0);
}
Bit32u op1_32 = BX_READ_32BIT_REG(i->rm());
unsigned count = BX_READ_32BIT_REG(i->vvv()) & 0x1f;
@ -200,11 +154,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::SHLX_GdEdBdR(bxInstruction_c *i)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BEXTR_GdEdBdR(bxInstruction_c *i)
{
if (!protected_mode()) {
BX_DEBUG(("BEXTR_GdEdBdR: not recognized in real or virtual-8086 mode"));
exception(BX_UD_EXCEPTION, 0);
}
Bit16u control = BX_READ_16BIT_REG(i->vvv());
unsigned start = control & 0xff;
unsigned len = control >> 8;
@ -229,11 +178,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BEXTR_GdEdBdR(bxInstruction_c *i)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BZHI_GdEdBdR(bxInstruction_c *i)
{
if (!protected_mode()) {
BX_DEBUG(("BZHI_GdEdBdR: not recognized in real or virtual-8086 mode"));
exception(BX_UD_EXCEPTION, 0);
}
unsigned control = BX_READ_16BIT_REG(i->vvv()) & 0xff;
bx_bool tmpCF = 0;
Bit32u op1_32 = BX_READ_32BIT_REG(i->rm());
@ -257,11 +201,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BZHI_GdEdBdR(bxInstruction_c *i)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXT_GdEdBdR(bxInstruction_c *i)
{
if (!protected_mode()) {
BX_DEBUG(("PEXT_GdEdBdR: not recognized in real or virtual-8086 mode"));
exception(BX_UD_EXCEPTION, 0);
}
Bit32u op1_32 = BX_READ_32BIT_REG(i->vvv());
Bit32u op2_32 = BX_READ_32BIT_REG(i->rm()), result_32 = 0;
@ -283,11 +222,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXT_GdEdBdR(bxInstruction_c *i)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::PDEP_GdEdBdR(bxInstruction_c *i)
{
if (!protected_mode()) {
BX_DEBUG(("PDEP_GdEdBdR: not recognized in real or virtual-8086 mode"));
exception(BX_UD_EXCEPTION, 0);
}
Bit32u op1_32 = BX_READ_32BIT_REG(i->vvv());
Bit32u op2_32 = BX_READ_32BIT_REG(i->rm()), result_32 = 0;

View File

@ -28,11 +28,6 @@
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::ANDN_GqBqEqR(bxInstruction_c *i)
{
if (!protected_mode()) {
BX_DEBUG(("ANDN_GqBqEq: not recognized in real or virtual-8086 mode"));
exception(BX_UD_EXCEPTION, 0);
}
Bit64u op1_64 = BX_READ_64BIT_REG(i->rm());
Bit64u op2_64 = BX_READ_64BIT_REG(i->vvv());
@ -49,11 +44,6 @@ extern void long_mul(Bit128u *product, Bit64u op1, Bit64u op2);
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::MULX_GqBqEqR(bxInstruction_c *i)
{
if (!protected_mode()) {
BX_DEBUG(("MULX_GqBqEqR: not recognized in real or virtual-8086 mode"));
exception(BX_UD_EXCEPTION, 0);
}
Bit64u op1_64 = RDX;
Bit64u op2_64 = BX_READ_64BIT_REG(i->rm());
@ -73,11 +63,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::MULX_GqBqEqR(bxInstruction_c *i)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BLSI_BqEqR(bxInstruction_c *i)
{
if (!protected_mode()) {
BX_DEBUG(("BLSI_BqEq: not recognized in real or virtual-8086 mode"));
exception(BX_UD_EXCEPTION, 0);
}
Bit64u op1_64 = BX_READ_64BIT_REG(i->rm());
bx_bool tmpCF = (op1_64 == 0);
@ -93,11 +78,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BLSI_BqEqR(bxInstruction_c *i)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BLSMSK_BqEqR(bxInstruction_c *i)
{
if (!protected_mode()) {
BX_DEBUG(("BLSMSK_BqEq: not recognized in real or virtual-8086 mode"));
exception(BX_UD_EXCEPTION, 0);
}
Bit64u op1_64 = BX_READ_64BIT_REG(i->rm());
bx_bool tmpCF = (op1_64 == 0);
@ -111,14 +91,8 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BLSMSK_BqEqR(bxInstruction_c *i)
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BLSR_BqEqR(bxInstruction_c *i)
{
if (!protected_mode()) {
BX_DEBUG(("BLSR_BqEq: not recognized in real or virtual-8086 mode"));
exception(BX_UD_EXCEPTION, 0);
}
Bit64u op1_64 = BX_READ_64BIT_REG(i->rm());
bx_bool tmpCF = (op1_64 == 0);
@ -134,11 +108,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BLSR_BqEqR(bxInstruction_c *i)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::RORX_GqEqIbR(bxInstruction_c *i)
{
if (!protected_mode()) {
BX_DEBUG(("RORX_GqEqIb: not recognized in real or virtual-8086 mode"));
exception(BX_UD_EXCEPTION, 0);
}
Bit64u op1_64 = BX_READ_64BIT_REG(i->rm());
unsigned count = i->Ib() & 0x3f;
@ -153,11 +122,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::RORX_GqEqIbR(bxInstruction_c *i)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::SHRX_GqEqBqR(bxInstruction_c *i)
{
if (!protected_mode()) {
BX_DEBUG(("SHRX_GqEqBqR: not recognized in real or virtual-8086 mode"));
exception(BX_UD_EXCEPTION, 0);
}
Bit64u op1_64 = BX_READ_64BIT_REG(i->rm());
unsigned count = BX_READ_32BIT_REG(i->vvv()) & 0x3f;
@ -171,11 +135,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::SHRX_GqEqBqR(bxInstruction_c *i)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::SARX_GqEqBqR(bxInstruction_c *i)
{
if (!protected_mode()) {
BX_DEBUG(("SARX_GqEqBqR: not recognized in real or virtual-8086 mode"));
exception(BX_UD_EXCEPTION, 0);
}
Bit64u op1_64 = BX_READ_64BIT_REG(i->rm());
unsigned count = BX_READ_32BIT_REG(i->vvv()) & 0x3f;
@ -191,11 +150,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::SARX_GqEqBqR(bxInstruction_c *i)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::SHLX_GqEqBqR(bxInstruction_c *i)
{
if (!protected_mode()) {
BX_DEBUG(("SHLX_GqEqBqR: not recognized in real or virtual-8086 mode"));
exception(BX_UD_EXCEPTION, 0);
}
Bit64u op1_64 = BX_READ_64BIT_REG(i->rm());
unsigned count = BX_READ_32BIT_REG(i->vvv()) & 0x3f;
@ -209,11 +163,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::SHLX_GqEqBqR(bxInstruction_c *i)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BEXTR_GqEqBqR(bxInstruction_c *i)
{
if (!protected_mode()) {
BX_DEBUG(("BEXTR_GqEqBqR: not recognized in real or virtual-8086 mode"));
exception(BX_UD_EXCEPTION, 0);
}
Bit16u control = BX_READ_16BIT_REG(i->vvv());
unsigned start = control & 0xff;
unsigned len = control >> 8;
@ -238,11 +187,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BEXTR_GqEqBqR(bxInstruction_c *i)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BZHI_GqEqBqR(bxInstruction_c *i)
{
if (!protected_mode()) {
BX_DEBUG(("BZHI_GqEqBqR: not recognized in real or virtual-8086 mode"));
exception(BX_UD_EXCEPTION, 0);
}
unsigned control = BX_READ_16BIT_REG(i->vvv()) & 0xff;
bx_bool tmpCF = 0;
Bit64u op1_64 = BX_READ_64BIT_REG(i->rm());
@ -265,11 +209,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BZHI_GqEqBqR(bxInstruction_c *i)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXT_GqEqBqR(bxInstruction_c *i)
{
if (!protected_mode()) {
BX_DEBUG(("PEXT_GqEqBqR: not recognized in real or virtual-8086 mode"));
exception(BX_UD_EXCEPTION, 0);
}
Bit64u op1_64 = BX_READ_64BIT_REG(i->vvv());
Bit64u op2_64 = BX_READ_64BIT_REG(i->rm()), result_64 = 0;
@ -291,11 +230,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXT_GqEqBqR(bxInstruction_c *i)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::PDEP_GqEqBqR(bxInstruction_c *i)
{
if (!protected_mode()) {
BX_DEBUG(("PDEP_GqEqBqR: not recognized in real or virtual-8086 mode"));
exception(BX_UD_EXCEPTION, 0);
}
Bit64u op1_64 = BX_READ_64BIT_REG(i->vvv());
Bit64u op2_64 = BX_READ_64BIT_REG(i->rm()), result_64 = 0;

View File

@ -2559,7 +2559,7 @@ public: // for now...
BX_SMF BX_INSF_TYPE INVPCID(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
#endif
#if BX_SUPPORT_AVX && BX_CPU_LEVEL >= 6
#if BX_SUPPORT_AVX
/* AVX */
BX_SMF BX_INSF_TYPE VZEROUPPER(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VZEROALL(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
@ -2899,46 +2899,110 @@ public: // for now...
/* FMA4 (AMD) */
BX_SMF BX_INSF_TYPE VFMADDSUBPS_VpsHpsWpsVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMADDSUBPS_VpsHpsVIbWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMADDSUBPD_VpdHpdWpdVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMADDSUBPD_VpdHpdVIbWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMSUBADDPS_VpsHpsWpsVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMSUBADDPS_VpsHpsVIbWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMSUBADDPD_VpdHpdWpdVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMSUBADDPD_VpdHpdVIbWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMADDPS_VpsHpsWpsVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMADDPS_VpsHpsVIbWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMADDPD_VpdHpdWpdVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMADDPD_VpdHpdVIbWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMADDSS_VssHssWssVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMADDSS_VssHssVIbWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMADDSD_VsdHsdWsdVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMADDSD_VsdHsdVIbWsdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMSUBPS_VpsHpsWpsVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMSUBPS_VpsHpsVIbWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMSUBPD_VpdHpdWpdVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMSUBPD_VpdHpdVIbWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMSUBPD_VpdHpdWpdVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMSUBSS_VssHssWssVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMSUBSS_VssHssVIbWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMSUBSD_VsdHsdWsdVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFMSUBSD_VsdHsdVIbWsdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMADDPS_VpsHpsWpsVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMADDPS_VpsHpsVIbWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMADDPD_VpdHpdWpdVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMADDPD_VpdHpdVIbWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMADDSS_VssHssWssVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMADDSS_VssHssVIbWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMADDSD_VsdHsdWsdVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMADDSD_VsdHsdVIbWsdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMSUBPS_VpsHpsWpsVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMSUBPS_VpsHpsVIbWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMSUBPD_VpdHpdWpdVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMSUBPD_VpdHpdVIbWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMSUBSS_VssHssWssVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMSUBSS_VssHssVIbWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMSUBSD_VsdHsdWsdVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFNMSUBSD_VsdHsdVIbWsdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
/* FMA4 (AMD) */
/* XOP (AMD) */
BX_SMF BX_INSF_TYPE VPCMOV_VdqHdqWdqVIb(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPPERM_VdqHdqWdqVIb(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPSHAB_VdqWdqHdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPSHAW_VdqWdqHdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPSHAD_VdqWdqHdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPSHAQ_VdqWdqHdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPROTB_VdqWdqHdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPROTW_VdqWdqHdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPROTD_VdqWdqHdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPROTQ_VdqWdqHdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPSHLB_VdqWdqHdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPSHLW_VdqWdqHdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPSHLD_VdqWdqHdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPSHLQ_VdqWdqHdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMACSSWW_VdqHdqWdqVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMACSSWD_VdqHdqWdqVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMACSSDQL_VdqHdqWdqVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMACSSDD_VdqHdqWdqVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMACSSDQH_VdqHdqWdqVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMACSWW_VdqHdqWdqVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMACSWD_VdqHdqWdqVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMACSDQL_VdqHdqWdqVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMACSDD_VdqHdqWdqVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMACSDQH_VdqHdqWdqVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMADCSSWD_VdqHdqWdqVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMADCSWD_VdqHdqWdqVIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPROTB_VdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPROTW_VdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPROTD_VdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPROTQ_VdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPCOMB_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPCOMW_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPCOMD_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPCOMQ_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPCOMUB_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPCOMUW_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPCOMUD_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPCOMUQ_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFRCZPS_VpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFRCZPD_VpdWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFRCZSS_VssWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VFRCZSD_VsdWsdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPHADDBW_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPHADDBD_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPHADDBQ_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPHADDWD_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPHADDWQ_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPHADDDQ_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPHADDUBW_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPHADDUBD_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPHADDUBQ_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPHADDUWD_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPHADDUWQ_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPHADDUDQ_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPHSUBBW_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPHSUBWD_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPHSUBDQ_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
/* XOP (AMD) */
/* TBM (AMD) */
BX_SMF BX_INSF_TYPE BEXTR_GdEdIdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE BLCFILL_BdEdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE BLCI_BdEdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE BLCIC_BdEdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE BLCMSK_BdEdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE BLCS_BdEdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE BLSFILL_BdEdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE BLSIC_BdEdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE T1MSKC_BdEdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE TZMSK_BdEdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE BEXTR_GqEqIdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE BLCFILL_BqEqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE BLCI_BqEqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE BLCIC_BqEqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE BLCMSK_BqEqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE BLCS_BqEqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE BLSFILL_BqEqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE BLSIC_BqEqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE T1MSKC_BqEqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE TZMSK_BqEqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
/* TBM (AMD) */
#endif
BX_SMF BX_INSF_TYPE LZCNT_GwEwR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);

View File

@ -105,6 +105,7 @@ typedef bx_cpuid_t* (*bx_create_cpuid_method)(BX_CPU_C *cpu);
#define BX_ISA_BMI2 (BX_CONST64(1) << 33) /* BMI2 instruction */
#define BX_ISA_FMA4 (BX_CONST64(1) << 34) /* FMA4 instruction (AMD) */
#define BX_ISA_XOP (BX_CONST64(1) << 35) /* XOP instruction (AMD) */
#define BX_ISA_TBM (BX_CONST64(1) << 36) /* TBM instruction (AMD) */
// cpuid non-ISA features
#define BX_CPU_DEBUG_EXTENSIONS (1 << 0) /* Debug Extensions support */

View File

@ -1286,7 +1286,7 @@ BX_CPU_C::fetchDecode32(const Bit8u *iptr, bxInstruction_c *i, unsigned remainin
int vvv = -1;
#if BX_SUPPORT_AVX
int had_vex = 0;
int had_vex = 0, had_xop = 0;
bx_bool vex_w = 0, vex_l = 0;
#endif
@ -1359,9 +1359,6 @@ fetch_b1:
}
i->setB1(b1);
#if BX_SUPPORT_FPU
i->setVL(BX_NO_VL);
#endif
i->modRMForm.Id = 0;
unsigned index = b1 + (os_32 << 9); // *512
@ -1372,8 +1369,10 @@ fetch_b1:
#if BX_SUPPORT_AVX
if ((attr & BxGroupX) == BxPrefixVEX && (*iptr & 0xc0) == 0xc0) {
// VEX
had_vex = 1;
if (sse_prefix) had_vex = -1;
if (! protected_mode()) had_vex = -1;
unsigned vex, vex_opcext = 1;
if (remain != 0) {
@ -1394,6 +1393,7 @@ fetch_b1:
return(-1);
vex_w = (vex >> 7) & 0x1;
i->setVexW(vex_w);
}
vvv = 15 - ((vex >> 3) & 0xf);
@ -1417,6 +1417,48 @@ fetch_b1:
has_modrm = BxOpcodeHasModrm32[b1];
}
}
else if (b1 == 0x8f && (*iptr & 0xc8) == 0xc8) {
// 3 byte XOP prefix
had_xop = 1;
if (! protected_mode()) had_vex = -1;
unsigned vex;
if (remain != 0) {
remain--;
vex = *iptr++; // fetch XOP2
}
else
return(-1);
unsigned xop_opcext = (vex & 0x1f) - 8;
if (xop_opcext >= 3)
had_xop = -1;
if (remain != 0) {
remain--;
vex = *iptr++; // fetch XOP3
}
else
return(-1);
vex_w = (vex >> 7) & 0x1;
i->setVexW(vex_w);
vvv = 15 - ((vex >> 3) & 0xf);
vex_l = (vex >> 2) & 0x1;
i->setVL(BX_VL128 + vex_l);
sse_prefix = vex & 0x3;
if (sse_prefix) had_xop = -1;
if (remain != 0) {
remain--;
b1 = *iptr++; // fetch new b1
}
else
return(-1);
has_modrm = 1;
b1 += 256 * xop_opcext;
}
else
#endif
{
@ -1613,6 +1655,12 @@ modrm_done:
else
OpcodeInfoPtr = &BxOpcodeTableAVX[(b1-256) + 768*vex_l];
}
else if (had_xop != 0) {
if (had_xop < 0)
OpcodeInfoPtr = &BxOpcodeGroupSSE_ERR[0]; // BX_IA_ERROR
else
OpcodeInfoPtr = &BxOpcodeTableXOP[b1 + 768*vex_l];
}
#endif
attr = OpcodeInfoPtr->Attr;
@ -1644,11 +1692,11 @@ modrm_done:
break;
#if BX_SUPPORT_AVX
case BxSplitVexW64: // VexW is ignored in 32-bit mode
BX_ASSERT(had_vex != 0);
BX_ASSERT(had_vex != 0 || had_xop != 0);
OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[0]);
break;
case BxSplitVexW: // VexW is a real opcode extension
BX_ASSERT(had_vex != 0);
BX_ASSERT(had_vex != 0 || had_xop != 0);
OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[vex_w]);
break;
case BxSplitMod11B:
@ -1705,6 +1753,13 @@ modrm_done:
else
OpcodeInfoPtr = &BxOpcodeTableAVX[(b1-256) + 768*vex_l];
}
else if (had_xop != 0) {
i->setVvv(vvv);
if (had_xop < 0)
OpcodeInfoPtr = &BxOpcodeGroupSSE_ERR[0]; // BX_IA_ERROR
else
OpcodeInfoPtr = &BxOpcodeTableXOP[b1 + 768*vex_l];
}
#endif
unsigned group = attr & BxGroupX;
@ -1808,6 +1863,7 @@ modrm_done:
case BxImmediate_Ib4:
if (remain != 0) {
i->modRMForm.Ib = (*iptr >> 4) & 7;
i->modRMForm.Ib2 = *iptr; // for VPERMIL2PS/VPERMIL2PD (XOP)
remain--;
}
else {
@ -1865,7 +1921,7 @@ modrm_done:
#endif
#if BX_SUPPORT_AVX
if (had_vex > 0) {
if (had_vex > 0 || had_xop > 0) {
if ((attr & BxVexW0) != 0 && vex_w) {
ia_opcode = BX_IA_ERROR;
}

View File

@ -76,6 +76,7 @@ struct bxIAOpcodeTable {
#include "fetchdecode_x87.h"
#include "fetchdecode_sse.h"
#include "fetchdecode_avx.h"
#include "fetchdecode_xop.h"
/* ************************************************************************ */
/* Opcode Groups */

View File

@ -1705,7 +1705,7 @@ BX_CPU_C::fetchDecode64(const Bit8u *iptr, bxInstruction_c *i, unsigned remainin
int vvv = -1;
#if BX_SUPPORT_AVX
int had_vex = 0;
int had_vex = 0, had_xop = 0;
bx_bool vex_w = 0, vex_l = 0;
#endif
@ -1816,7 +1816,6 @@ fetch_b1:
}
i->setB1(b1);
i->setVL(BX_NO_VL);
i->modRMForm.Id = 0;
unsigned index = b1+offset;
@ -1827,8 +1826,10 @@ fetch_b1:
#if BX_SUPPORT_AVX
if ((attr & BxGroupX) == BxPrefixVEX) {
// VEX
had_vex = 1;
if (sse_prefix || rex_prefix) had_vex = -1;
if (sse_prefix | rex_prefix) had_vex = -1;
if (! protected_mode()) had_vex = -1;
unsigned vex, vex_opcext = 1;
if (remain != 0) {
@ -1854,6 +1855,7 @@ fetch_b1:
if (vex & 0x80) {
vex_w = 1;
i->assertVexW();
i->assertOs64();
i->assertOs32();
}
@ -1880,6 +1882,58 @@ fetch_b1:
has_modrm = BxOpcodeHasModrm64[b1];
}
}
else if (b1 == 0x8f && (*iptr & 0x08) == 0x08) {
// 3 byte XOP prefix
had_xop = 1;
if (sse_prefix | rex_prefix) had_xop = -1;
if (! protected_mode()) had_vex = -1;
unsigned vex;
if (remain != 0) {
remain--;
vex = *iptr++; // fetch XOP2
}
else
return(-1);
rex_r = ((vex >> 4) & 0x8) ^ 0x8;
rex_x = ((vex >> 3) & 0x8) ^ 0x8;
rex_b = ((vex >> 2) & 0x8) ^ 0x8;
unsigned xop_opcext = (vex & 0x1f) - 8;
if (xop_opcext >= 3)
had_xop = -1;
if (remain != 0) {
remain--;
vex = *iptr++; // fetch XOP3
}
else
return(-1);
if (vex & 0x80) {
vex_w = 1;
i->assertVexW();
i->assertOs64();
i->assertOs32();
}
vvv = 15 - ((vex >> 3) & 0xf);
vex_l = (vex >> 2) & 0x1;
i->setVL(BX_VL128 + vex_l);
sse_prefix = vex & 0x3;
if (sse_prefix) had_xop = -1;
if (remain != 0) {
remain--;
b1 = *iptr++; // fetch new b1
}
else
return(-1);
has_modrm = 1;
b1 += 256 * xop_opcext;
}
else
#endif
{
@ -2032,6 +2086,12 @@ modrm_done:
else
OpcodeInfoPtr = &BxOpcodeTableAVX[(b1-256) + 768*vex_l];
}
else if (had_xop != 0) {
if (had_xop < 0)
OpcodeInfoPtr = &BxOpcodeGroupSSE_ERR[0]; // BX_IA_ERROR
else
OpcodeInfoPtr = &BxOpcodeTableXOP[b1 + 768*vex_l];
}
#endif
attr = OpcodeInfoPtr->Attr;
@ -2064,7 +2124,7 @@ modrm_done:
#if BX_SUPPORT_AVX
case BxSplitVexW:
case BxSplitVexW64:
BX_ASSERT(had_vex != 0);
BX_ASSERT(had_vex != 0 || had_xop != 0);
OpcodeInfoPtr = &(OpcodeInfoPtr->AnotherArray[vex_w]);
break;
case BxSplitMod11B:
@ -2117,9 +2177,16 @@ modrm_done:
else
OpcodeInfoPtr = &BxOpcodeTableAVX[(b1-256) + 768*vex_l];
}
else if (had_xop != 0) {
i->setVvv(vvv);
if (had_xop < 0)
OpcodeInfoPtr = &BxOpcodeGroupSSE_ERR[0]; // BX_IA_ERROR
else
OpcodeInfoPtr = &BxOpcodeTableXOP[b1 + 768*vex_l];
}
#endif
if (b1 == 0x90 && sse_prefix == SSE_PREFIX_F3) {
if (b1 == 0x90 && sse_prefix == SSE_PREFIX_F3 && (had_vex | had_xop) == 0) {
ia_opcode = BX_IA_PAUSE;
}
else {
@ -2237,7 +2304,8 @@ modrm_done:
#if BX_SUPPORT_AVX
case BxImmediate_Ib4:
if (remain != 0) {
i->modRMForm.Ib = *iptr >> 4;
i->modRMForm.Ib = *iptr >> 4;
i->modRMForm.Ib2 = *iptr; // for VPERMIL2PS/VPERMIL2PD (XOP)
remain--;
}
else {
@ -2282,7 +2350,7 @@ modrm_done:
Bit32u op_flags = BxOpcodesTable[ia_opcode].flags;
#if BX_SUPPORT_AVX
if (had_vex > 0) {
if (had_vex > 0 || had_xop > 0) {
if ((attr & BxVexW0) != 0 && vex_w) {
ia_opcode = BX_IA_ERROR;
}

View File

@ -358,108 +358,6 @@ static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3af0[2] = {
};
// BMI
// FMA4
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a5c[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFMADDSUBPS_VpsHpsWpsVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFMADDSUBPS_VpsHpsVIbWps }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a5d[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFMADDSUBPD_VpdHpdWpdVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFMADDSUBPD_VpdHpdVIbWpd }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a5e[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFMSUBADDPS_VpsHpsWpsVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFMSUBADDPS_VpsHpsVIbWps }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a5f[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFMSUBADDPD_VpdHpdWpdVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFMSUBADDPD_VpdHpdVIbWpd }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a68[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFMADDPS_VpsHpsWpsVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFMADDPS_VpsHpsVIbWps }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a69[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFMADDPD_VpdHpdWpdVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFMADDPD_VpdHpdVIbWpd }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a6a[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFMADDSS_VssHssWssVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFMADDSS_VssHssVIbWss }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a6b[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFMADDSD_VsdHsdWsdVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFMADDSD_VsdHsdVIbWsd }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a6c[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFMSUBPS_VpsHpsWpsVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFMSUBPS_VpsHpsVIbWps }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a6d[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFMSUBPD_VpdHpdWpdVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFMSUBPD_VpdHpdVIbWpd }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a6e[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFMSUBSS_VssHssWssVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFMSUBSS_VssHssVIbWss }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a6f[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFMSUBSD_VsdHsdWsdVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFMSUBSD_VsdHsdVIbWsd }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a78[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFNMADDPS_VpsHpsWpsVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFNMADDPS_VpsHpsVIbWps }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a79[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFNMADDPD_VpdHpdWpdVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFNMADDPD_VpdHpdVIbWpd }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a7a[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFNMADDSS_VssHssWssVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFNMADDSS_VssHssVIbWss }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a7b[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFNMADDSD_VsdHsdWsdVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFNMADDSD_VsdHsdVIbWsd }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a7c[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFNMSUBPS_VpsHpsWpsVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFNMSUBPS_VpsHpsVIbWps }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a7d[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFNMSUBPD_VpdHpdWpdVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFNMSUBPD_VpdHpdVIbWpd }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a7e[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFNMSUBSS_VssHssWssVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFNMSUBSS_VssHssVIbWss }
};
static const BxOpcodeInfo_t BxOpcodeInfoAVX_VexW_0f3a7f[2] = {
/* 0 */ { BxPrefixSSE66, BX_IA_VFNMSUBSD_VsdHsdWsdVIb },
/* 1 */ { BxPrefixSSE66, BX_IA_VFNMSUBSD_VsdHsdVIbWsd }
};
// FMA4
/* ************************************************************************ */
/* ********** */
@ -1447,10 +1345,10 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3*2] = {
/* 59 /0 */ { 0, BX_IA_ERROR },
/* 5A /0 */ { 0, BX_IA_ERROR },
/* 5B /0 */ { 0, BX_IA_ERROR },
/* 5C /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a5c },
/* 5D /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a5d },
/* 5E /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a5e },
/* 5F /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a5f },
/* 5C /0 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFMADDSUBPS_VpsHpsWpsVIb },
/* 5D /0 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFMADDSUBPD_VpdHpdWpdVIb },
/* 5E /0 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFMSUBADDPS_VpsHpsWpsVIb },
/* 5F /0 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFMSUBADDPD_VpdHpdWpdVIb },
/* 60 /0 */ { BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V128_VPCMPESTRM_VdqWdqIb },
/* 61 /0 */ { BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V128_VPCMPESTRI_VdqWdqIb },
/* 62 /0 */ { BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V128_VPCMPISTRM_VdqWdqIb },
@ -1459,14 +1357,14 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3*2] = {
/* 65 /0 */ { 0, BX_IA_ERROR },
/* 66 /0 */ { 0, BX_IA_ERROR },
/* 67 /0 */ { 0, BX_IA_ERROR },
/* 68 /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a68 },
/* 69 /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a69 },
/* 6A /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a6a },
/* 6B /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a6b },
/* 6C /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a6c },
/* 6D /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a6d },
/* 6E /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a6e },
/* 6F /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a6f },
/* 68 /0 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFMADDPS_VpsHpsWpsVIb },
/* 69 /0 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFMADDPD_VpdHpdWpdVIb },
/* 6A /0 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFMADDSS_VssHssWssVIb },
/* 6B /0 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFMADDSD_VsdHsdWsdVIb },
/* 6C /0 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFMSUBPS_VpsHpsWpsVIb },
/* 6D /0 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFMSUBPD_VpdHpdWpdVIb },
/* 6E /0 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFMSUBSS_VssHssWssVIb },
/* 6F /0 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFMSUBSD_VsdHsdWsdVIb },
/* 70 /0 */ { 0, BX_IA_ERROR },
/* 71 /0 */ { 0, BX_IA_ERROR },
/* 72 /0 */ { 0, BX_IA_ERROR },
@ -1475,14 +1373,14 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3*2] = {
/* 75 /0 */ { 0, BX_IA_ERROR },
/* 76 /0 */ { 0, BX_IA_ERROR },
/* 77 /0 */ { 0, BX_IA_ERROR },
/* 78 /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a78 },
/* 79 /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a79 },
/* 7A /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a7a },
/* 7B /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a7b },
/* 7C /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a7c },
/* 7D /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a7d },
/* 7E /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a7e },
/* 7F /0 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a7f },
/* 78 /0 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFNMADDPS_VpsHpsWpsVIb },
/* 79 /0 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFNMADDPD_VpdHpdWpdVIb },
/* 7A /0 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFNMADDSS_VssHssWssVIb },
/* 7B /0 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFNMADDSD_VsdHsdWsdVIb },
/* 7C /0 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFNMSUBPS_VpsHpsWpsVIb },
/* 7D /0 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFNMSUBPD_VpdHpdWpdVIb },
/* 7E /0 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFNMSUBSS_VssHssWssVIb },
/* 7F /0 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFNMSUBSD_VsdHsdWsdVIb },
/* 80 /0 */ { 0, BX_IA_ERROR },
/* 81 /0 */ { 0, BX_IA_ERROR },
/* 82 /0 */ { 0, BX_IA_ERROR },
@ -2221,10 +2119,10 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3*2] = {
/* 59 /1 */ { 0, BX_IA_ERROR },
/* 5A /1 */ { 0, BX_IA_ERROR },
/* 5B /1 */ { 0, BX_IA_ERROR },
/* 5C /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a5c },
/* 5D /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a5d },
/* 5E /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a5e },
/* 5F /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a5f },
/* 5C /1 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFMADDSUBPS_VpsHpsWpsVIb },
/* 5D /1 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFMADDSUBPD_VpdHpdWpdVIb },
/* 5E /1 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFMSUBADDPS_VpsHpsWpsVIb },
/* 5F /1 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFMSUBADDPD_VpdHpdWpdVIb },
/* 60 /1 */ { 0, BX_IA_ERROR },
/* 61 /1 */ { 0, BX_IA_ERROR },
/* 62 /1 */ { 0, BX_IA_ERROR },
@ -2233,14 +2131,14 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3*2] = {
/* 65 /1 */ { 0, BX_IA_ERROR },
/* 66 /1 */ { 0, BX_IA_ERROR },
/* 67 /1 */ { 0, BX_IA_ERROR },
/* 68 /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a68 },
/* 69 /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a69 },
/* 6A /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a6a },
/* 6B /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a6b },
/* 6C /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a6c },
/* 6D /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a6d },
/* 6E /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a6e },
/* 6F /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a6f },
/* 68 /1 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFMADDPS_VpsHpsWpsVIb },
/* 69 /1 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFMADDPD_VpdHpdWpdVIb },
/* 6A /1 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFMADDSS_VssHssWssVIb },
/* 6B /1 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFMADDSD_VsdHsdWsdVIb },
/* 6C /1 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFMSUBPS_VpsHpsWpsVIb },
/* 6D /1 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFMSUBPD_VpdHpdWpdVIb },
/* 6E /1 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFMSUBSS_VssHssWssVIb },
/* 6F /1 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFMSUBSD_VsdHsdWsdVIb },
/* 70 /1 */ { 0, BX_IA_ERROR },
/* 71 /1 */ { 0, BX_IA_ERROR },
/* 72 /1 */ { 0, BX_IA_ERROR },
@ -2249,14 +2147,14 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3*2] = {
/* 75 /1 */ { 0, BX_IA_ERROR },
/* 76 /1 */ { 0, BX_IA_ERROR },
/* 77 /1 */ { 0, BX_IA_ERROR },
/* 78 /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a78 },
/* 79 /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a79 },
/* 7A /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a7a },
/* 7B /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a7b },
/* 7C /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a7c },
/* 7D /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a7d },
/* 7E /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a7e },
/* 7F /1 */ { BxSplitVexW | BxImmediate_Ib4, BX_IA_ERROR, BxOpcodeInfoAVX_VexW_0f3a7f },
/* 78 /1 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFNMADDPS_VpsHpsWpsVIb },
/* 79 /1 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFNMADDPD_VpdHpdWpdVIb },
/* 7A /1 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFNMADDSS_VssHssWssVIb },
/* 7B /1 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFNMADDSD_VsdHsdWsdVIb },
/* 7C /1 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFNMSUBPS_VpsHpsWpsVIb },
/* 7D /1 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFNMSUBPD_VpdHpdWpdVIb },
/* 7E /1 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFNMSUBSS_VssHssWssVIb },
/* 7F /1 */ { BxPrefixSSE66 | BxImmediate_Ib4, BX_IA_VFNMSUBSD_VsdHsdWsdVIb },
/* 80 /1 */ { 0, BX_IA_ERROR },
/* 81 /1 */ { 0, BX_IA_ERROR },
/* 82 /1 */ { 0, BX_IA_ERROR },

1644
bochs/cpu/fetchdecode_xop.h Executable file

File diff suppressed because it is too large Load Diff

View File

@ -26,7 +26,7 @@
#include "cpu.h"
#define LOG_THIS BX_CPU_THIS_PTR
#if BX_SUPPORT_AVX && BX_CPU_LEVEL >= 6
#if BX_SUPPORT_AVX
bx_address BX_CPP_AttrRegparmN(2) BX_CPU_C::BxResolveGatherD(bxInstruction_c *i, unsigned element)
{

View File

@ -828,6 +828,26 @@ void bx_generic_cpuid_t::init_isa_extensions_bitmask(void)
features_bitmask |= BX_ISA_FMA4;
}
static bx_bool xop_enabled = SIM->get_param_bool(BXPN_CPUID_XOP)->get();
if (xop_enabled) {
if (! avx_enabled) {
BX_PANIC(("PANIC: XOP emulation requires AVX support !"));
return;
}
features_bitmask |= BX_ISA_XOP;
}
static bx_bool tbm_enabled = SIM->get_param_bool(BXPN_CPUID_TBM)->get();
if (tbm_enabled) {
if (! avx_enabled || ! xop_enabled) {
BX_PANIC(("PANIC: TBM emulation requires AVX and XOP support !"));
return;
}
features_bitmask |= BX_ISA_TBM;
}
#endif // BX_SUPPORT_AVX
#endif // BX_SUPPORT_X86_64
@ -1297,6 +1317,7 @@ Bit32u bx_generic_cpuid_t::get_ext2_cpuid_features(void) const
if (BX_CPUID_SUPPORT_CPU_EXTENSION(BX_CPU_LONG_MODE))
features |= BX_CPUID_EXT2_LAHF_SAHF | BX_CPUID_EXT2_PREFETCHW;
#endif
#if BX_SUPPORT_MISALIGNED_SSE
features |= BX_CPUID_EXT2_MISALIGNED_SSE;
#endif
@ -1307,9 +1328,15 @@ Bit32u bx_generic_cpuid_t::get_ext2_cpuid_features(void) const
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_SSE4A))
features |= BX_CPUID_EXT2_SSE4A;
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_XOP))
features |= BX_CPUID_EXT2_XOP;
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_FMA4))
features |= BX_CPUID_EXT2_FMA4;
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_TBM))
features |= BX_CPUID_EXT2_TBM;
return features;
}

View File

@ -1936,46 +1936,111 @@ bx_define_opcode(BX_IA_PDEP_GqEqBq, &BX_CPU_C::LOAD_Eq, &BX_CPU_C::PDEP_GqEqBqR,
// FMA4 (AMD)
bx_define_opcode(BX_IA_VFMADDSUBPS_VpsHpsWpsVIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADDSUBPS_VpsHpsWpsVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADDSUBPS_VpsHpsVIbWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADDSUBPS_VpsHpsVIbWpsR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADDSUBPD_VpdHpdWpdVIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADDSUBPD_VpdHpdWpdVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADDSUBPD_VpdHpdVIbWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADDSUBPD_VpdHpdVIbWpdR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUBADDPS_VpsHpsWpsVIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMSUBADDPS_VpsHpsWpsVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUBADDPS_VpsHpsVIbWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMSUBADDPS_VpsHpsVIbWpsR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUBADDPD_VpdHpdWpdVIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMSUBADDPD_VpdHpdWpdVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUBADDPD_VpdHpdVIbWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMSUBADDPD_VpdHpdVIbWpdR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADDPS_VpsHpsWpsVIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADDPS_VpsHpsWpsVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADDPS_VpsHpsVIbWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADDPS_VpsHpsVIbWpsR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADDPD_VpdHpdWpdVIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADDPD_VpdHpdWpdVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADDPD_VpdHpdVIbWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMADDPD_VpdHpdVIbWpdR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADDSS_VssHssWssVIb, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFMADDSS_VssHssWssVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADDSS_VssHssVIbWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFMADDSS_VssHssVIbWssR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADDSD_VsdHsdWsdVIb, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFMADDSD_VsdHsdWsdVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMADDSD_VsdHsdVIbWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFMADDSD_VsdHsdVIbWsdR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUBPS_VpsHpsWpsVIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMSUBPS_VpsHpsWpsVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUBPS_VpsHpsVIbWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMSUBPS_VpsHpsVIbWpsR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUBPD_VpdHpdWpdVIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMSUBPD_VpdHpdWpdVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUBPD_VpdHpdVIbWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFMSUBPD_VpdHpdVIbWpdR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUBSS_VssHssWssVIb, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFMSUBSS_VssHssWssVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUBSS_VssHssVIbWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFMSUBSS_VssHssVIbWssR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUBSD_VsdHsdWsdVIb, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFMSUBSD_VsdHsdWsdVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFMSUBSD_VsdHsdVIbWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFMSUBSD_VsdHsdVIbWsdR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADDPS_VpsHpsWpsVIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMADDPS_VpsHpsWpsVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADDPS_VpsHpsVIbWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMADDPS_VpsHpsVIbWpsR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADDPD_VpdHpdWpdVIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMADDPD_VpdHpdWpdVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADDPD_VpdHpdVIbWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMADDPD_VpdHpdVIbWpdR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADDSS_VssHssWssVIb, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFNMADDSS_VssHssWssVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADDSS_VssHssVIbWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFNMADDSS_VssHssVIbWssR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADDSD_VsdHsdWsdVIb, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFNMADDSD_VsdHsdWsdVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMADDSD_VsdHsdVIbWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFNMADDSD_VsdHsdVIbWsdR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUBPS_VpsHpsWpsVIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMSUBPS_VpsHpsWpsVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUBPS_VpsHpsVIbWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMSUBPS_VpsHpsVIbWpsR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUBPD_VpdHpdWpdVIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMSUBPD_VpdHpdWpdVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUBPD_VpdHpdVIbWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFNMSUBPD_VpdHpdVIbWpdR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUBSS_VssHssWssVIb, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFNMSUBSS_VssHssWssVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUBSS_VssHssVIbWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFNMSUBSS_VssHssVIbWssR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUBSD_VsdHsdWsdVIb, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFNMSUBSD_VsdHsdWsdVIbR, BX_ISA_FMA4, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFNMSUBSD_VsdHsdVIbWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFNMSUBSD_VsdHsdVIbWsdR, BX_ISA_FMA4, BX_PREPARE_AVX)
// FMA4 (AMD)
// XOP (AMD)
bx_define_opcode(BX_IA_VPCMOV_VdqHdqWdqVIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPCMOV_VdqHdqWdqVIb, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPPERM_VdqHdqWdqVIb, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPPERM_VdqHdqWdqVIb, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPSHAB_VdqWdqHdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPSHAB_VdqWdqHdq, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPSHAW_VdqWdqHdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPSHAW_VdqWdqHdq, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPSHAD_VdqWdqHdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPSHAD_VdqWdqHdq, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPSHAQ_VdqWdqHdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPSHAQ_VdqWdqHdq, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPROTB_VdqWdqHdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPROTB_VdqWdqHdq, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPROTW_VdqWdqHdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPROTW_VdqWdqHdq, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPROTD_VdqWdqHdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPROTD_VdqWdqHdq, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPROTQ_VdqWdqHdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPROTQ_VdqWdqHdq, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPSHLB_VdqWdqHdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPSHLB_VdqWdqHdq, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPSHLW_VdqWdqHdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPSHLW_VdqWdqHdq, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPSHLD_VdqWdqHdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPSHLD_VdqWdqHdq, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPSHLQ_VdqWdqHdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPSHLQ_VdqWdqHdq, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPMACSSWW_VdqHdqWdqVIb, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPMACSSWW_VdqHdqWdqVIbR, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPMACSSWD_VdqHdqWdqVIb, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPMACSSWD_VdqHdqWdqVIbR, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPMACSSDQL_VdqHdqWdqVIb, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPMACSSDQL_VdqHdqWdqVIbR, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPMACSSDD_VdqHdqWdqVIb, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPMACSSDD_VdqHdqWdqVIbR, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPMACSSDQH_VdqHdqWdqVIb, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPMACSSDQH_VdqHdqWdqVIbR, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPMACSWW_VdqHdqWdqVIb, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPMACSWW_VdqHdqWdqVIbR, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPMACSWD_VdqHdqWdqVIb, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPMACSWD_VdqHdqWdqVIbR, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPMACSDQL_VdqHdqWdqVIb, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPMACSDQL_VdqHdqWdqVIbR, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPMACSDD_VdqHdqWdqVIb, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPMACSDD_VdqHdqWdqVIbR, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPMACSDQH_VdqHdqWdqVIb, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPMACSDQH_VdqHdqWdqVIbR, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPMADCSSWD_VdqHdqWdqVIb, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPMADCSSWD_VdqHdqWdqVIbR, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPMADCSWD_VdqHdqWdqVIb, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPMADCSWD_VdqHdqWdqVIbR, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPROTB_VdqWdqIb, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPROTB_VdqWdqIbR, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPROTW_VdqWdqIb, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPROTW_VdqWdqIbR, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPROTD_VdqWdqIb, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPROTD_VdqWdqIbR, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPROTQ_VdqWdqIb, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPROTQ_VdqWdqIbR, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPCOMB_VdqHdqWdqIb, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPCOMB_VdqHdqWdqIbR, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPCOMW_VdqHdqWdqIb, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPCOMW_VdqHdqWdqIbR, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPCOMD_VdqHdqWdqIb, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPCOMD_VdqHdqWdqIbR, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPCOMQ_VdqHdqWdqIb, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPCOMQ_VdqHdqWdqIbR, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPCOMUB_VdqHdqWdqIb, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPCOMUB_VdqHdqWdqIbR, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPCOMUW_VdqHdqWdqIb, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPCOMUW_VdqHdqWdqIbR, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPCOMUD_VdqHdqWdqIb, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPCOMUD_VdqHdqWdqIbR, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPCOMUQ_VdqHdqWdqIb, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPCOMUQ_VdqHdqWdqIbR, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VFRCZPS_VpsWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFRCZPS_VpsWpsR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
bx_define_opcode(BX_IA_VFRCZPD_VpdWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VFRCZPD_VpdWpdR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
bx_define_opcode(BX_IA_VFRCZSS_VssWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VFRCZSS_VssWssR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
bx_define_opcode(BX_IA_VFRCZSD_VsdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VFRCZSD_VsdWsdR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
bx_define_opcode(BX_IA_VPHADDBW_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPHADDBW_VdqWdqR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
bx_define_opcode(BX_IA_VPHADDBD_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPHADDBD_VdqWdqR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
bx_define_opcode(BX_IA_VPHADDBQ_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPHADDBQ_VdqWdqR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
bx_define_opcode(BX_IA_VPHADDWD_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPHADDWD_VdqWdqR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
bx_define_opcode(BX_IA_VPHADDWQ_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPHADDWQ_VdqWdqR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
bx_define_opcode(BX_IA_VPHADDDQ_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPHADDDQ_VdqWdqR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
bx_define_opcode(BX_IA_VPHADDUBW_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPHADDUBW_VdqWdqR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
bx_define_opcode(BX_IA_VPHADDUBD_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPHADDUBD_VdqWdqR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
bx_define_opcode(BX_IA_VPHADDUBQ_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPHADDUBQ_VdqWdqR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
bx_define_opcode(BX_IA_VPHADDUWD_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPHADDUWD_VdqWdqR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
bx_define_opcode(BX_IA_VPHADDUWQ_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPHADDUWQ_VdqWdqR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
bx_define_opcode(BX_IA_VPHADDUDQ_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPHADDUDQ_VdqWdqR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
bx_define_opcode(BX_IA_VPHSUBBW_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPHSUBBW_VdqWdqR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
bx_define_opcode(BX_IA_VPHSUBWD_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPHSUBWD_VdqWdqR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
bx_define_opcode(BX_IA_VPHSUBDQ_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPHSUBDQ_VdqWdqR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
// XOP (AMD)
// TBM (AMD)
bx_define_opcode(BX_IA_BEXTR_GdEdId, &BX_CPU_C::LOAD_Ed, &BX_CPU_C::BEXTR_GdEdIdR, BX_ISA_TBM, BX_VEX_NO_VVV)
bx_define_opcode(BX_IA_BLCFILL_BdEd, &BX_CPU_C::LOAD_Ed, &BX_CPU_C::BLCFILL_BdEdR, BX_ISA_TBM, 0)
bx_define_opcode(BX_IA_BLCI_BdEd, &BX_CPU_C::LOAD_Ed, &BX_CPU_C::BLCI_BdEdR, BX_ISA_TBM, 0)
bx_define_opcode(BX_IA_BLCIC_BdEd, &BX_CPU_C::LOAD_Ed, &BX_CPU_C::BLCIC_BdEdR, BX_ISA_TBM, 0)
bx_define_opcode(BX_IA_BLCMSK_BdEd, &BX_CPU_C::LOAD_Ed, &BX_CPU_C::BLCMSK_BdEdR, BX_ISA_TBM, 0)
bx_define_opcode(BX_IA_BLCS_BdEd, &BX_CPU_C::LOAD_Ed, &BX_CPU_C::BLCS_BdEdR, BX_ISA_TBM, 0)
bx_define_opcode(BX_IA_BLSFILL_BdEd, &BX_CPU_C::LOAD_Ed, &BX_CPU_C::BLSFILL_BdEdR, BX_ISA_TBM, 0)
bx_define_opcode(BX_IA_BLSIC_BdEd, &BX_CPU_C::LOAD_Ed, &BX_CPU_C::BLSIC_BdEdR, BX_ISA_TBM, 0)
bx_define_opcode(BX_IA_T1MSKC_BdEd, &BX_CPU_C::LOAD_Ed, &BX_CPU_C::T1MSKC_BdEdR, BX_ISA_TBM, 0)
bx_define_opcode(BX_IA_TZMSK_BdEd, &BX_CPU_C::LOAD_Ed, &BX_CPU_C::TZMSK_BdEdR, BX_ISA_TBM, 0)
bx_define_opcode(BX_IA_BEXTR_GqEqId, &BX_CPU_C::LOAD_Ed, &BX_CPU_C::BEXTR_GqEqIdR, BX_ISA_TBM, BX_VEX_NO_VVV)
bx_define_opcode(BX_IA_BLCFILL_BqEq, &BX_CPU_C::LOAD_Ed, &BX_CPU_C::BLCFILL_BqEqR, BX_ISA_TBM, 0)
bx_define_opcode(BX_IA_BLCI_BqEq, &BX_CPU_C::LOAD_Ed, &BX_CPU_C::BLCI_BqEqR, BX_ISA_TBM, 0)
bx_define_opcode(BX_IA_BLCIC_BqEq, &BX_CPU_C::LOAD_Ed, &BX_CPU_C::BLCIC_BqEqR, BX_ISA_TBM, 0)
bx_define_opcode(BX_IA_BLCMSK_BqEq, &BX_CPU_C::LOAD_Ed, &BX_CPU_C::BLCMSK_BqEqR, BX_ISA_TBM, 0)
bx_define_opcode(BX_IA_BLCS_BqEq, &BX_CPU_C::LOAD_Ed, &BX_CPU_C::BLCS_BqEqR, BX_ISA_TBM, 0)
bx_define_opcode(BX_IA_BLSFILL_BqEq, &BX_CPU_C::LOAD_Ed, &BX_CPU_C::BLSFILL_BqEqR, BX_ISA_TBM, 0)
bx_define_opcode(BX_IA_BLSIC_BqEq, &BX_CPU_C::LOAD_Ed, &BX_CPU_C::BLSIC_BqEqR, BX_ISA_TBM, 0)
bx_define_opcode(BX_IA_T1MSKC_BqEq, &BX_CPU_C::LOAD_Ed, &BX_CPU_C::T1MSKC_BqEqR, BX_ISA_TBM, 0)
bx_define_opcode(BX_IA_TZMSK_BqEq, &BX_CPU_C::LOAD_Ed, &BX_CPU_C::TZMSK_BqEqR, BX_ISA_TBM, 0)
// TBM (AMD)
#endif /* BX_SUPPORT_AVX */
bx_define_opcode(BX_IA_TZCNT_GwEw, &BX_CPU_C::LOAD_Ew, &BX_CPU_C::TZCNT_GwEwR, BX_ISA_BMI1, 0)

View File

@ -102,7 +102,8 @@ public:
BxResolvePtr_tR ResolveModrm;
struct {
// 15..13 AVX vl (0=no VL, 1=128 bit, 2=256 bit)
// 15..14 VEX Vector Length (0=no VL, 1=128 bit, 2=256 bit)
// 13..13 VEX.W
// 12..12 lock
// 11...0 opcode
Bit16u ia_opcode;
@ -238,6 +239,7 @@ public:
BX_CPP_INLINE void init(unsigned os32, unsigned as32, unsigned os64, unsigned as64)
{
metaInfo.metaInfo1 = (os32<<2) | (os64<<3) | (as32<<0) | (as64<<1);
metaInfo.ia_opcode = 0; // clear VEX.W and VEX.VL
}
BX_CPP_INLINE unsigned os32L(void) const {
@ -325,15 +327,27 @@ public:
BX_CPP_INLINE unsigned getVL(void) const {
#if BX_SUPPORT_AVX
return metaInfo.ia_opcode >> 13;
return metaInfo.ia_opcode >> 14;
#else
return 0;
#endif
}
BX_CPP_INLINE void setVL(unsigned value) {
metaInfo.ia_opcode = (metaInfo.ia_opcode & 0x1fff) | (value << 13);
metaInfo.ia_opcode = (metaInfo.ia_opcode & 0x3fff) | (value << 14);
}
#if BX_SUPPORT_AVX
BX_CPP_INLINE unsigned getVexW(void) const {
return metaInfo.metaInfo1 & (1<<13);
}
BX_CPP_INLINE void setVexW(unsigned bit) {
metaInfo.ia_opcode = (metaInfo.ia_opcode & 0xdfff) | (bit << 13);
}
BX_CPP_INLINE void assertVexW(void) {
metaInfo.ia_opcode |= (1 << 13);
}
#endif
BX_CPP_INLINE void setVvv(unsigned vvv) {
metaData[BX_INSTR_METADATA_VVV] = vvv;
}

View File

@ -26,7 +26,7 @@
#if BX_SUPPORT_X86_64
static unsigned partial_add(Bit32u *sum,Bit32u b)
static unsigned partial_add(Bit32u *sum, Bit32u b)
{
Bit32u t = *sum;
*sum += b;

343
bochs/cpu/simd_compare.h Executable file
View File

@ -0,0 +1,343 @@
/////////////////////////////////////////////////////////////////////////
// $Id$
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2011 Stanislav Shwartsman
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
//
/////////////////////////////////////////////////////////////////////////
#ifndef BX_SIMD_INT_COMPARE_FUNCTIONS_H
#define BX_SIMD_INT_COMPARE_FUNCTIONS_H
// compare less than (signed)
BX_CPP_INLINE void sse_pcmpltb(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<16; n++) {
op1->xmmubyte(n) = (op1->xmmsbyte(n) < op2->xmmsbyte(n)) ? 0xff : 0;
}
}
BX_CPP_INLINE void sse_pcmpltw(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<8; n++) {
op1->xmm16u(n) = (op1->xmm16s(n) < op2->xmm16s(n)) ? 0xffff : 0;
}
}
BX_CPP_INLINE void sse_pcmpltd(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<4; n++) {
op1->xmm32u(n) = (op1->xmm32s(n) < op2->xmm32s(n)) ? 0xffffffff : 0;
}
}
BX_CPP_INLINE void sse_pcmpltq(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<2; n++) {
op1->xmm64u(n) = (op1->xmm64s(n) < op2->xmm64s(n)) ? BX_CONST64(0xffffffffffffffff) : 0;
}
}
// compare less than (unsigned)
BX_CPP_INLINE void sse_pcmpltub(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<16; n++) {
op1->xmmubyte(n) = (op1->xmmubyte(n) < op2->xmmubyte(n)) ? 0xff : 0;
}
}
BX_CPP_INLINE void sse_pcmpltuw(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<8; n++) {
op1->xmm16u(n) = (op1->xmm16u(n) < op2->xmm16u(n)) ? 0xffff : 0;
}
}
BX_CPP_INLINE void sse_pcmpltud(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<4; n++) {
op1->xmm32u(n) = (op1->xmm32u(n) < op2->xmm32u(n)) ? 0xffffffff : 0;
}
}
BX_CPP_INLINE void sse_pcmpltuq(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<2; n++) {
op1->xmm64u(n) = (op1->xmm64u(n) < op2->xmm64u(n)) ? BX_CONST64(0xffffffffffffffff) : 0;
}
}
// compare less than or equal (signed)
BX_CPP_INLINE void sse_pcmpleb(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<16; n++) {
op1->xmmubyte(n) = (op1->xmmsbyte(n) <= op2->xmmsbyte(n)) ? 0xff : 0;
}
}
BX_CPP_INLINE void sse_pcmplew(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<8; n++) {
op1->xmm16u(n) = (op1->xmm16s(n) <= op2->xmm16s(n)) ? 0xffff : 0;
}
}
BX_CPP_INLINE void sse_pcmpled(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<4; n++) {
op1->xmm32u(n) = (op1->xmm32s(n) <= op2->xmm32s(n)) ? 0xffffffff : 0;
}
}
BX_CPP_INLINE void sse_pcmpleq(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<2; n++) {
op1->xmm64u(n) = (op1->xmm64s(n) <= op2->xmm64s(n)) ? BX_CONST64(0xffffffffffffffff) : 0;
}
}
// compare less than or equal (unsigned)
BX_CPP_INLINE void sse_pcmpleub(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<16; n++) {
op1->xmmubyte(n) = (op1->xmmubyte(n) <= op2->xmmubyte(n)) ? 0xff : 0;
}
}
BX_CPP_INLINE void sse_pcmpleuw(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<8; n++) {
op1->xmm16u(n) = (op1->xmm16u(n) <= op2->xmm16u(n)) ? 0xffff : 0;
}
}
BX_CPP_INLINE void sse_pcmpleud(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<4; n++) {
op1->xmm32u(n) = (op1->xmm32u(n) <= op2->xmm32u(n)) ? 0xffffffff : 0;
}
}
BX_CPP_INLINE void sse_pcmpleuq(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<2; n++) {
op1->xmm64u(n) = (op1->xmm64u(n) <= op2->xmm64u(n)) ? BX_CONST64(0xffffffffffffffff) : 0;
}
}
// compare greater than (signed)
BX_CPP_INLINE void sse_pcmpgtb(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<16; n++) {
op1->xmmubyte(n) = (op1->xmmsbyte(n) > op2->xmmsbyte(n)) ? 0xff : 0;
}
}
BX_CPP_INLINE void sse_pcmpgtw(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<8; n++) {
op1->xmm16u(n) = (op1->xmm16s(n) > op2->xmm16s(n)) ? 0xffff : 0;
}
}
BX_CPP_INLINE void sse_pcmpgtd(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<4; n++) {
op1->xmm32u(n) = (op1->xmm32s(n) > op2->xmm32s(n)) ? 0xffffffff : 0;
}
}
BX_CPP_INLINE void sse_pcmpgtq(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<2; n++) {
op1->xmm64u(n) = (op1->xmm64s(n) > op2->xmm64s(n)) ? BX_CONST64(0xffffffffffffffff) : 0;
}
}
// compare greater than (unsigned)
BX_CPP_INLINE void sse_pcmpgtub(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<16; n++) {
op1->xmmubyte(n) = (op1->xmmubyte(n) > op2->xmmubyte(n)) ? 0xff : 0;
}
}
BX_CPP_INLINE void sse_pcmpgtuw(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<8; n++) {
op1->xmm16u(n) = (op1->xmm16u(n) > op2->xmm16u(n)) ? 0xffff : 0;
}
}
BX_CPP_INLINE void sse_pcmpgtud(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<4; n++) {
op1->xmm32u(n) = (op1->xmm32u(n) > op2->xmm32u(n)) ? 0xffffffff : 0;
}
}
BX_CPP_INLINE void sse_pcmpgtuq(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<2; n++) {
op1->xmm64u(n) = (op1->xmm64u(n) > op2->xmm64u(n)) ? BX_CONST64(0xffffffffffffffff) : 0;
}
}
// compare greater than or equal (signed)
BX_CPP_INLINE void sse_pcmpgeb(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<16; n++) {
op1->xmmubyte(n) = (op1->xmmsbyte(n) >= op2->xmmsbyte(n)) ? 0xff : 0;
}
}
BX_CPP_INLINE void sse_pcmpgew(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<8; n++) {
op1->xmm16u(n) = (op1->xmm16s(n) >= op2->xmm16s(n)) ? 0xffff : 0;
}
}
BX_CPP_INLINE void sse_pcmpged(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<4; n++) {
op1->xmm32u(n) = (op1->xmm32s(n) >= op2->xmm32s(n)) ? 0xffffffff : 0;
}
}
BX_CPP_INLINE void sse_pcmpgeq(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<2; n++) {
op1->xmm64u(n) = (op1->xmm64s(n) >= op2->xmm64s(n)) ? BX_CONST64(0xffffffffffffffff) : 0;
}
}
// compare greater than or equal (unsigned)
BX_CPP_INLINE void sse_pcmpgeub(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<16; n++) {
op1->xmmubyte(n) = (op1->xmmubyte(n) >= op2->xmmubyte(n)) ? 0xff : 0;
}
}
BX_CPP_INLINE void sse_pcmpgeuw(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<8; n++) {
op1->xmm16u(n) = (op1->xmm16u(n) >= op2->xmm16u(n)) ? 0xffff : 0;
}
}
BX_CPP_INLINE void sse_pcmpgeud(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<4; n++) {
op1->xmm32u(n) = (op1->xmm32u(n) >= op2->xmm32u(n)) ? 0xffffffff : 0;
}
}
BX_CPP_INLINE void sse_pcmpgeuq(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<2; n++) {
op1->xmm64u(n) = (op1->xmm64u(n) >= op2->xmm64u(n)) ? BX_CONST64(0xffffffffffffffff) : 0;
}
}
// compare equal
BX_CPP_INLINE void sse_pcmpeqb(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<16; n++) {
op1->xmmubyte(n) = (op1->xmmubyte(n) == op2->xmmubyte(n)) ? 0xff : 0;
}
}
BX_CPP_INLINE void sse_pcmpeqw(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<8; n++) {
op1->xmm16u(n) = (op1->xmm16u(n) == op2->xmm16u(n)) ? 0xffff : 0;
}
}
BX_CPP_INLINE void sse_pcmpeqd(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<4; n++) {
op1->xmm32u(n) = (op1->xmm32u(n) == op2->xmm32u(n)) ? 0xffffffff : 0;
}
}
BX_CPP_INLINE void sse_pcmpeqq(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<2; n++) {
op1->xmm64u(n) = (op1->xmm64u(n) == op2->xmm64u(n)) ? BX_CONST64(0xffffffffffffffff) : 0;
}
}
// compare not equal
BX_CPP_INLINE void sse_pcmpneb(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<16; n++) {
op1->xmmubyte(n) = (op1->xmmubyte(n) != op2->xmmubyte(n)) ? 0xff : 0;
}
}
BX_CPP_INLINE void sse_pcmpnew(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<8; n++) {
op1->xmm16u(n) = (op1->xmm16u(n) != op2->xmm16u(n)) ? 0xffff : 0;
}
}
BX_CPP_INLINE void sse_pcmpned(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<4; n++) {
op1->xmm32u(n) = (op1->xmm32u(n) != op2->xmm32u(n)) ? 0xffffffff : 0;
}
}
BX_CPP_INLINE void sse_pcmpneq(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<2; n++) {
op1->xmm64u(n) = (op1->xmm64u(n) != op2->xmm64u(n)) ? BX_CONST64(0xffffffffffffffff) : 0;
}
}
// compare true/false
BX_CPP_INLINE void sse_pcmptrue(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<2; n++) {
op1->xmm64u(n) = BX_CONST64(0xffffffffffffffff);
}
}
BX_CPP_INLINE void sse_pcmpfalse(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<2; n++) {
op1->xmm64u(n) = 0;
}
}
#endif

View File

@ -47,64 +47,6 @@ BX_CPP_INLINE void sse_pabsd(BxPackedXmmRegister *op)
}
}
// compare
BX_CPP_INLINE void sse_pcmpeqb(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<16; n++) {
op1->xmmubyte(n) = (op1->xmmubyte(n) == op2->xmmubyte(n)) ? 0xff : 0;
}
}
BX_CPP_INLINE void sse_pcmpeqw(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<8; n++) {
op1->xmm16u(n) = (op1->xmm16u(n) == op2->xmm16u(n)) ? 0xffff : 0;
}
}
BX_CPP_INLINE void sse_pcmpeqd(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<4; n++) {
op1->xmm32u(n) = (op1->xmm32u(n) == op2->xmm32u(n)) ? 0xffffffff : 0;
}
}
BX_CPP_INLINE void sse_pcmpeqq(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<2; n++) {
op1->xmm64u(n) = (op1->xmm64u(n) == op2->xmm64u(n)) ? BX_CONST64(0xffffffffffffffff) : 0;
}
}
BX_CPP_INLINE void sse_pcmpgtb(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<16; n++) {
op1->xmmubyte(n) = (op1->xmmsbyte(n) > op2->xmmsbyte(n)) ? 0xff : 0;
}
}
BX_CPP_INLINE void sse_pcmpgtw(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<8; n++) {
op1->xmm16u(n) = (op1->xmm16s(n) > op2->xmm16s(n)) ? 0xffff : 0;
}
}
BX_CPP_INLINE void sse_pcmpgtd(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<4; n++) {
op1->xmm32u(n) = (op1->xmm32s(n) > op2->xmm32s(n)) ? 0xffffffff : 0;
}
}
BX_CPP_INLINE void sse_pcmpgtq(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0; n<2; n++) {
op1->xmm64u(n) = (op1->xmm64s(n) > op2->xmm64s(n)) ? BX_CONST64(0xffffffffffffffff) : 0;
}
}
// min/max
BX_CPP_INLINE void sse_pminsb(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
@ -933,6 +875,15 @@ BX_CPP_INLINE void sse_mpsadbw(BxPackedXmmRegister *r, const BxPackedXmmRegister
}
}
// bitwise select
BX_CPP_INLINE void sse_pselect(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, const BxPackedXmmRegister *op3)
{
for(unsigned n=0;n < 2;n++) {
op1->xmm64u(n) = (op3->xmm64u(n) & op1->xmm64u(n)) | (~op3->xmm64u(n) & op2->xmm64u(n));
}
}
// shift
BX_CPP_INLINE void sse_psravd(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
@ -1181,4 +1132,270 @@ BX_CPP_INLINE void sse_palignr(BxPackedXmmRegister *op2, const BxPackedXmmRegist
}
}
// rotate (right)
BX_CPP_INLINE void sse_prorb(BxPackedXmmRegister *op, int shift)
{
shift &= 0x7;
for(unsigned n=0;n<16;n++) {
op->xmmubyte(n) = (op->xmmubyte(n) >> shift) | (op->xmmubyte(n) << (8 - shift));
}
}
BX_CPP_INLINE void sse_prorw(BxPackedXmmRegister *op, int shift)
{
shift &= 0xf;
for(unsigned n=0;n<8;n++) {
op->xmm16u(n) = (op->xmm16u(n) >> shift) | (op->xmm16u(n) << (16 - shift));
}
}
BX_CPP_INLINE void sse_prord(BxPackedXmmRegister *op, int shift)
{
shift &= 0x1f;
for(unsigned n=0;n<4;n++) {
op->xmm32u(n) = (op->xmm32u(n) >> shift) | (op->xmm32u(n) << (32 - shift));
}
}
BX_CPP_INLINE void sse_prorq(BxPackedXmmRegister *op, int shift)
{
shift &= 0x3f;
for(unsigned n=0;n<2;n++) {
op->xmm64u(n) = (op->xmm64u(n) >> shift) | (op->xmm64u(n) << (64 - shift));
}
}
// rotate (left)
BX_CPP_INLINE void sse_prolb(BxPackedXmmRegister *op, int shift)
{
shift &= 0x7;
for(unsigned n=0;n<16;n++) {
op->xmmubyte(n) = (op->xmmubyte(n) << shift) | (op->xmmubyte(n) >> (8 - shift));
}
}
BX_CPP_INLINE void sse_prolw(BxPackedXmmRegister *op, int shift)
{
shift &= 0xf;
for(unsigned n=0;n<8;n++) {
op->xmm16u(n) = (op->xmm16u(n) << shift) | (op->xmm16u(n) >> (16 - shift));
}
}
BX_CPP_INLINE void sse_prold(BxPackedXmmRegister *op, int shift)
{
shift &= 0x1f;
for(unsigned n=0;n<4;n++) {
op->xmm32u(n) = (op->xmm32u(n) << shift) | (op->xmm32u(n) >> (32 - shift));
}
}
BX_CPP_INLINE void sse_prolq(BxPackedXmmRegister *op, int shift)
{
shift &= 0x3f;
for(unsigned n=0;n<2;n++) {
op->xmm64u(n) = (op->xmm64u(n) << shift) | (op->xmm64u(n) >> (64 - shift));
}
}
// variable shift/rotate (XOP)
BX_CPP_INLINE void sse_protb(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0;n < 16;n++) {
int shift = op2->xmmsbyte(n);
if (shift > 0) {
// rotate left
shift &= 0x7;
op1->xmmubyte(n) = (op1->xmmubyte(n) << shift) | (op1->xmmubyte(n) >> (8 - shift));
}
else if (shift < 0) {
// rotate right
shift = -shift & 0x7;
op1->xmmubyte(n) = (op1->xmmubyte(n) >> shift) | (op1->xmmubyte(n) << (8 - shift));
}
}
}
BX_CPP_INLINE void sse_protw(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0;n < 8;n++) {
int shift = op2->xmmsbyte(n*2);
if (shift > 0) {
// rotate left
shift &= 0xf;
op1->xmm16u(n) = (op1->xmm16u(n) << shift) | (op1->xmm16u(n) >> (16 - shift));
}
else if (shift < 0) {
// rotate right
shift = -shift & 0xf;
op1->xmm16u(n) = (op1->xmm16u(n) >> shift) | (op1->xmm16u(n) << (16 - shift));
}
}
}
BX_CPP_INLINE void sse_protd(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0;n < 4;n++) {
int shift = op2->xmmsbyte(n*4);
if (shift > 0) {
// rotate left
shift &= 0x1f;
op1->xmm32u(n) = (op1->xmm32u(n) << shift) | (op1->xmm32u(n) >> (32 - shift));
}
else if (shift < 0) {
// rotate right
shift = -shift & 0x1f;
op1->xmm32u(n) = (op1->xmm32u(n) >> shift) | (op1->xmm32u(n) << (32 - shift));
}
}
}
BX_CPP_INLINE void sse_protq(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0;n < 2;n++) {
int shift = op2->xmmsbyte(n*8);
if (shift > 0) {
// rotate left
shift &= 0x3f;
op1->xmm64u(n) = (op1->xmm64u(n) << shift) | (op1->xmm64u(n) >> (64 - shift));
}
else if (shift < 0) {
// rotate right
shift = -shift & 0x3f;
op1->xmm64u(n) = (op1->xmm64u(n) >> shift) | (op1->xmm64u(n) << (64 - shift));
}
}
}
BX_CPP_INLINE void sse_pshab(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0;n < 16;n++) {
int shift = op2->xmmsbyte(n);
if (shift > 0) {
// shift left
op1->xmmsbyte(n) <<= (shift & 0x7);
}
else if (shift < 0) {
// shift right
op1->xmmsbyte(n) >>= (-shift & 0x7);
}
}
}
BX_CPP_INLINE void sse_pshaw(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0;n < 8;n++) {
int shift = op2->xmmsbyte(n*2);
if (shift > 0) {
// shift left
op1->xmm16s(n) <<= (shift & 0xf);
}
else if (shift < 0) {
// shift right
op1->xmm16s(n) >>= (-shift & 0xf);
}
}
}
BX_CPP_INLINE void sse_pshad(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0;n < 4;n++) {
int shift = op2->xmmsbyte(n*4);
if (shift > 0) {
// shift left
op1->xmm32s(n) <<= (shift & 0x1f);
}
else if (shift < 0) {
// shift right
op1->xmm32s(n) >>= (-shift & 0x1f);
}
}
}
BX_CPP_INLINE void sse_pshaq(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0;n < 2;n++) {
int shift = op2->xmmsbyte(n*8);
if (shift > 0) {
// shift left
op1->xmm64s(n) <<= (shift & 0x3f);
}
else if (shift < 0) {
// shift right
op1->xmm64s(n) >>= (-shift & 0x3f);
}
}
}
BX_CPP_INLINE void sse_pshlb(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0;n < 16;n++) {
int shift = op2->xmmsbyte(n);
if (shift > 0) {
// shift left
op1->xmmubyte(n) <<= (shift & 0x7);
}
else if (shift < 0) {
// shift right
op1->xmmubyte(n) >>= (-shift & 0x7);
}
}
}
BX_CPP_INLINE void sse_pshlw(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0;n < 8;n++) {
int shift = op2->xmmubyte(n*2);
if (shift > 0) {
// shift left
op1->xmm16u(n) <<= (shift & 0xf);
}
else if (shift < 0) {
// shift right
op1->xmm16u(n) >>= (-shift & 0xf);
}
}
}
BX_CPP_INLINE void sse_pshld(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0;n < 4;n++) {
int shift = op2->xmmsbyte(n*4);
if (shift > 0) {
// shift left
op1->xmm32u(n) <<= (shift & 0x1f);
}
else if (shift < 0) {
// shift right
op1->xmm32u(n) >>= (-shift & 0x1f);
}
}
}
BX_CPP_INLINE void sse_pshlq(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
{
for(unsigned n=0;n < 2;n++) {
int shift = op2->xmmsbyte(n*8);
if (shift > 0) {
// shift left
op1->xmm64u(n) <<= (shift & 0x3f);
}
else if (shift < 0) {
// shift right
op1->xmm64u(n) >>= (-shift & 0x3f);
}
}
}
#endif

View File

@ -33,6 +33,7 @@
#if BX_CPU_LEVEL >= 6
#include "simd_int.h"
#include "simd_compare.h"
#define SSE_2OP(HANDLER, func) \
/* SSE instruction with two src operands */ \

179
bochs/cpu/tbm32.cc Executable file
View File

@ -0,0 +1,179 @@
/////////////////////////////////////////////////////////////////////////
// $Id$
/////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2011 The Bochs Project
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
/////////////////////////////////////////////////////////////////////////
#define NEED_CPU_REG_SHORTCUTS 1
#include "bochs.h"
#include "cpu.h"
#define LOG_THIS BX_CPU_THIS_PTR
#if BX_SUPPORT_AVX
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BEXTR_GdEdIdR(bxInstruction_c *i)
{
Bit16u control = (Bit16u) i->Id();
unsigned start = control & 0xff;
unsigned len = control >> 8;
Bit32u op1_32 = 0;
if (start < 32 && len > 0) {
op1_32 = BX_READ_32BIT_REG(i->rm());
op1_32 >>= start;
if (len < 32) {
Bit32u extract_mask = (1 << len) - 1;
op1_32 &= extract_mask;
}
}
SET_FLAGS_OSZAPC_LOGIC_32(op1_32);
BX_WRITE_32BIT_REGZ(i->nnn(), op1_32);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BLCFILL_BdEdR(bxInstruction_c *i)
{
Bit32u op_32 = BX_READ_32BIT_REG(i->rm());
Bit32u result_32 = (op_32 + 1) & op_32;
SET_FLAGS_OSZAPC_LOGIC_32(result_32);
set_CF((op_32 + 1) == 0);
BX_WRITE_32BIT_REGZ(i->vvv(), result_32);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BLCI_BdEdR(bxInstruction_c *i)
{
Bit32u op_32 = BX_READ_32BIT_REG(i->rm());
Bit32u result_32 = ~(op_32 + 1) | op_32;
SET_FLAGS_OSZAPC_LOGIC_32(result_32);
set_CF((op_32 + 1) == 0);
BX_WRITE_32BIT_REGZ(i->vvv(), result_32);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BLCIC_BdEdR(bxInstruction_c *i)
{
Bit32u op_32 = BX_READ_32BIT_REG(i->rm());
Bit32u result_32 = (op_32 + 1) & ~op_32;
SET_FLAGS_OSZAPC_LOGIC_32(result_32);
set_CF((op_32 + 1) == 0);
BX_WRITE_32BIT_REGZ(i->vvv(), result_32);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BLCMSK_BdEdR(bxInstruction_c *i)
{
Bit32u op_32 = BX_READ_32BIT_REG(i->rm());
Bit32u result_32 = (op_32 + 1) ^ op_32;
SET_FLAGS_OSZAPC_LOGIC_32(result_32);
set_CF((op_32 + 1) == 0);
BX_WRITE_32BIT_REGZ(i->vvv(), result_32);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BLCS_BdEdR(bxInstruction_c *i)
{
Bit32u op_32 = BX_READ_32BIT_REG(i->rm());
Bit32u result_32 = (op_32 + 1) | op_32;
SET_FLAGS_OSZAPC_LOGIC_32(result_32);
set_CF((op_32 + 1) == 0);
BX_WRITE_32BIT_REGZ(i->vvv(), result_32);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BLSFILL_BdEdR(bxInstruction_c *i)
{
Bit32u op_32 = BX_READ_32BIT_REG(i->rm());
Bit32u result_32 = (op_32 - 1) | op_32;
SET_FLAGS_OSZAPC_LOGIC_32(result_32);
set_CF(op_32 == 0);
BX_WRITE_32BIT_REGZ(i->vvv(), result_32);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BLSIC_BdEdR(bxInstruction_c *i)
{
Bit32u op_32 = BX_READ_32BIT_REG(i->rm());
Bit32u result_32 = (op_32 - 1) | ~op_32;
SET_FLAGS_OSZAPC_LOGIC_32(result_32);
set_CF(op_32 == 0);
BX_WRITE_32BIT_REGZ(i->vvv(), result_32);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::T1MSKC_BdEdR(bxInstruction_c *i)
{
Bit32u op_32 = BX_READ_32BIT_REG(i->rm());
Bit32u result_32 = (op_32 + 1) | ~op_32;
SET_FLAGS_OSZAPC_LOGIC_32(result_32);
set_CF((op_32 + 1) == 0);
BX_WRITE_32BIT_REGZ(i->vvv(), result_32);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::TZMSK_BdEdR(bxInstruction_c *i)
{
Bit32u op_32 = BX_READ_32BIT_REG(i->rm());
Bit32u result_32 = (op_32 - 1) & ~op_32;
SET_FLAGS_OSZAPC_LOGIC_32(result_32);
set_CF(op_32 == 0);
BX_WRITE_32BIT_REGZ(i->vvv(), result_32);
BX_NEXT_INSTR(i);
}
#endif

179
bochs/cpu/tbm64.cc Executable file
View File

@ -0,0 +1,179 @@
/////////////////////////////////////////////////////////////////////////
// $Id$
/////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2011 The Bochs Project
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
/////////////////////////////////////////////////////////////////////////
#define NEED_CPU_REG_SHORTCUTS 1
#include "bochs.h"
#include "cpu.h"
#define LOG_THIS BX_CPU_THIS_PTR
#if BX_SUPPORT_X86_64 && BX_SUPPORT_AVX
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BEXTR_GqEqIdR(bxInstruction_c *i)
{
Bit16u control = (Bit16u) i->Id();
unsigned start = control & 0xff;
unsigned len = control >> 8;
Bit64u op1_64 = 0;
if (start < 64 && len > 0) {
op1_64 = BX_READ_64BIT_REG(i->rm());
op1_64 >>= start;
if (len < 64) {
Bit64u extract_mask = (BX_CONST64(1) << len) - 1;
op1_64 &= extract_mask;
}
}
SET_FLAGS_OSZAPC_LOGIC_64(op1_64);
BX_WRITE_64BIT_REG(i->nnn(), op1_64);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BLCFILL_BqEqR(bxInstruction_c *i)
{
Bit64u op_64 = BX_READ_64BIT_REG(i->rm());
Bit64u result_64 = (op_64 + 1) & op_64;
SET_FLAGS_OSZAPC_LOGIC_64(result_64);
set_CF((op_64 + 1) == 0);
BX_WRITE_64BIT_REG(i->vvv(), result_64);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BLCI_BqEqR(bxInstruction_c *i)
{
Bit64u op_64 = BX_READ_64BIT_REG(i->rm());
Bit64u result_64 = ~(op_64 + 1) | op_64;
SET_FLAGS_OSZAPC_LOGIC_64(result_64);
set_CF((op_64 + 1) == 0);
BX_WRITE_64BIT_REG(i->vvv(), result_64);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BLCIC_BqEqR(bxInstruction_c *i)
{
Bit64u op_64 = BX_READ_64BIT_REG(i->rm());
Bit64u result_64 = (op_64 + 1) & ~op_64;
SET_FLAGS_OSZAPC_LOGIC_64(result_64);
set_CF((op_64 + 1) == 0);
BX_WRITE_64BIT_REG(i->vvv(), result_64);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BLCMSK_BqEqR(bxInstruction_c *i)
{
Bit64u op_64 = BX_READ_64BIT_REG(i->rm());
Bit64u result_64 = (op_64 + 1) ^ op_64;
SET_FLAGS_OSZAPC_LOGIC_64(result_64);
set_CF((op_64 + 1) == 0);
BX_WRITE_64BIT_REG(i->vvv(), result_64);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BLCS_BqEqR(bxInstruction_c *i)
{
Bit64u op_64 = BX_READ_64BIT_REG(i->rm());
Bit64u result_64 = (op_64 + 1) | op_64;
SET_FLAGS_OSZAPC_LOGIC_64(result_64);
set_CF((op_64 + 1) == 0);
BX_WRITE_64BIT_REG(i->vvv(), result_64);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BLSFILL_BqEqR(bxInstruction_c *i)
{
Bit64u op_64 = BX_READ_64BIT_REG(i->rm());
Bit64u result_64 = (op_64 - 1) | op_64;
SET_FLAGS_OSZAPC_LOGIC_64(result_64);
set_CF(op_64 == 0);
BX_WRITE_64BIT_REG(i->vvv(), result_64);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::BLSIC_BqEqR(bxInstruction_c *i)
{
Bit64u op_64 = BX_READ_64BIT_REG(i->rm());
Bit64u result_64 = (op_64 - 1) | ~op_64;
SET_FLAGS_OSZAPC_LOGIC_64(result_64);
set_CF(op_64 == 0);
BX_WRITE_64BIT_REG(i->vvv(), result_64);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::T1MSKC_BqEqR(bxInstruction_c *i)
{
Bit64u op_64 = BX_READ_64BIT_REG(i->rm());
Bit64u result_64 = (op_64 + 1) | ~op_64;
SET_FLAGS_OSZAPC_LOGIC_64(result_64);
set_CF((op_64 + 1) == 0);
BX_WRITE_64BIT_REG(i->vvv(), result_64);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::TZMSK_BqEqR(bxInstruction_c *i)
{
Bit64u op_64 = BX_READ_64BIT_REG(i->rm());
Bit64u result_64 = (op_64 - 1) & ~op_64;
SET_FLAGS_OSZAPC_LOGIC_64(result_64);
set_CF(op_64 == 0);
BX_WRITE_64BIT_REG(i->vvv(), result_64);
BX_NEXT_INSTR(i);
}
#endif

View File

@ -47,6 +47,6 @@ TODO (know issues in CPU model):
- Dual-monitor treatment of SMIs and SMM not implemented yet
- VMENTER to not-active state not supported yet
[!] SMX, SVM (AMD), XOP (AMD), TBM (AMD)
[!] SMX, SVM (AMD)
[!] TODO: Convert CPUDB to plugins and search for them in runtime

View File

@ -351,6 +351,20 @@ BX_CPP_INLINE Bit16s BX_CPP_AttrRegparmN(1) SaturateDwordSToWordS(Bit32s value)
return (Bit16s) value;
}
/*
* SaturateQwordSToDwordS converts a signed 64-bit value to a signed
* 32-bit value. If the signed 64-bit value is less than -2147483648, it
* is represented by the saturated value -2147483648 (0x80000000). If it
* is greater than 2147483647, it is represented by the saturated value
* 2147483647 (0x7FFFFFFF).
*/
BX_CPP_INLINE Bit32s BX_CPP_AttrRegparmN(1) SaturateQwordSToDwordS(Bit64s value)
{
if(value < BX_CONST64(-2147483648)) return BX_CONST64(-2147483648);
if(value > 2147483647) return 2147483647;
return (Bit32s) value;
}
/*
* SaturateWordSToByteU converts a signed 16-bit value to an unsigned
* 8-bit value. If the signed 16-bit value is less than zero it is
@ -377,4 +391,18 @@ BX_CPP_INLINE Bit16u BX_CPP_AttrRegparmN(1) SaturateDwordSToWordU(Bit32s value)
return (Bit16u) value;
}
/*
* SaturateQwordSToDwordU converts a signed 64-bit value to an unsigned
* 32-bit value. If the signed 64-bit value is less than zero, it is
* represented by the saturated value zero (0x00000000). If it is greater
* than 4294967295, it is represented by the saturated value 4294967295
* (0xFFFFFFFF).
*/
BX_CPP_INLINE Bit32u BX_CPP_AttrRegparmN(1) SaturateDwordSToWordU(Bit64s value)
{
if(value < 0) return 0;
if(value > BX_CONST64(4294967295)) return BX_CONST64(4294967295);
return (Bit32u) value;
}
#endif

862
bochs/cpu/xop.cc Executable file
View File

@ -0,0 +1,862 @@
/////////////////////////////////////////////////////////////////////////
// $Id$
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2011 Stanislav Shwartsman
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
//
/////////////////////////////////////////////////////////////////////////
#define NEED_CPU_REG_SHORTCUTS 1
#include "bochs.h"
#include "cpu.h"
#define LOG_THIS BX_CPU_THIS_PTR
#if BX_SUPPORT_AVX
#include "simd_int.h"
#include "simd_compare.h"
typedef void (*simd_compare_method)(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2);
// comparison predicate for PCOMB
static simd_compare_method compare8[8] = {
sse_pcmpltb,
sse_pcmpleb,
sse_pcmpgtb,
sse_pcmpgeb,
sse_pcmpeqb,
sse_pcmpneb,
sse_pcmpfalse,
sse_pcmptrue
};
// comparison predicate for PCOMUB
static simd_compare_method compare8u[8] = {
sse_pcmpltub,
sse_pcmpleub,
sse_pcmpgtub,
sse_pcmpgeub,
sse_pcmpeqb,
sse_pcmpneb,
sse_pcmpfalse,
sse_pcmptrue
};
// comparison predicate for PCOMW
static simd_compare_method compare16[8] = {
sse_pcmpltw,
sse_pcmplew,
sse_pcmpgtw,
sse_pcmpgew,
sse_pcmpeqw,
sse_pcmpnew,
sse_pcmpfalse,
sse_pcmptrue
};
// comparison predicate for PCOMUW
static simd_compare_method compare16u[8] = {
sse_pcmpltuw,
sse_pcmpleuw,
sse_pcmpgtuw,
sse_pcmpgeuw,
sse_pcmpeqw,
sse_pcmpnew,
sse_pcmpfalse,
sse_pcmptrue
};
// comparison predicate for PCOMD
static simd_compare_method compare32[8] = {
sse_pcmpltd,
sse_pcmpled,
sse_pcmpgtd,
sse_pcmpged,
sse_pcmpeqd,
sse_pcmpned,
sse_pcmpfalse,
sse_pcmptrue
};
// comparison predicate for PCOMUD
static simd_compare_method compare32u[8] = {
sse_pcmpltud,
sse_pcmpleud,
sse_pcmpgtud,
sse_pcmpgeud,
sse_pcmpeqd,
sse_pcmpned,
sse_pcmpfalse,
sse_pcmptrue
};
// comparison predicate for PCOMQ
static simd_compare_method compare64[8] = {
sse_pcmpltq,
sse_pcmpleq,
sse_pcmpgtq,
sse_pcmpgeq,
sse_pcmpeqq,
sse_pcmpneq,
sse_pcmpfalse,
sse_pcmptrue
};
// comparison predicate for PCOMUQ
static simd_compare_method compare64u[8] = {
sse_pcmpltuq,
sse_pcmpleuq,
sse_pcmpgtuq,
sse_pcmpgeuq,
sse_pcmpeqq,
sse_pcmpneq,
sse_pcmpfalse,
sse_pcmptrue
};
typedef Bit8u (*vpperm_operation)(Bit8u byte);
BX_CPP_INLINE Bit8u vpperm_bit_reverse(Bit8u v8)
{
return (v8 >> 7) |
((v8 >> 5) & 0x02) |
((v8 >> 3) & 0x04) |
((v8 >> 1) & 0x08) |
((v8 << 1) & 0x10) |
((v8 << 3) & 0x20) |
((v8 << 5) & 0x40) |
(v8 << 7);
}
BX_CPP_INLINE Bit8u vpperm_noop(Bit8u v8) { return v8; }
BX_CPP_INLINE Bit8u vpperm_invert(Bit8u v8) { return ~v8; }
BX_CPP_INLINE Bit8u vpperm_invert_bit_reverse(Bit8u v8) { return vpperm_bit_reverse(~v8); }
BX_CPP_INLINE Bit8u vpperm_zeros(Bit8u v8) { return 0; }
BX_CPP_INLINE Bit8u vpperm_ones(Bit8u v8) { return 0xff; }
BX_CPP_INLINE Bit8u vpperm_replicate_msb(Bit8u v8) { return (((Bit8s) v8) >> 7); }
BX_CPP_INLINE Bit8u vpperm_invert_replicate_msb(Bit8u v8) { return vpperm_replicate_msb(~v8); }
// logical operation for VPPERM
static vpperm_operation vpperm_op[8] = {
vpperm_noop,
vpperm_invert,
vpperm_bit_reverse,
vpperm_invert_bit_reverse,
vpperm_zeros,
vpperm_ones,
vpperm_replicate_msb,
vpperm_invert_replicate_msb
};
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCMOV_VdqHdqWdqVIb(bxInstruction_c *i)
{
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->vvv()), op2, op3;
if (i->getVexW()) {
op2 = BX_READ_AVX_REG(i->Ib());
op3 = BX_READ_AVX_REG(i->rm());
}
else {
op2 = BX_READ_AVX_REG(i->rm());
op3 = BX_READ_AVX_REG(i->Ib());
}
unsigned len = i->getVL();
for (unsigned n=0; n < len; n++) {
sse_pselect(&op1.avx128(n), &op2.avx128(n), &op3.avx128(n));
}
BX_WRITE_AVX_REGZ(i->nnn(), op1, len);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPPERM_VdqHdqWdqVIb(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv()), op2, op3, dest;
if (i->getVexW()) {
op2 = BX_READ_XMM_REG(i->Ib());
op3 = BX_READ_XMM_REG(i->rm());
}
else {
op2 = BX_READ_XMM_REG(i->rm());
op3 = BX_READ_XMM_REG(i->Ib());
}
for (unsigned n=0;n<16;n++) {
unsigned control = op3.xmmubyte(n);
if (control & 0x10)
dest.xmmubyte(n) = op1.xmmubyte(control & 0xf);
else
dest.xmmubyte(n) = op2.xmmubyte(control & 0xf);
dest.xmmubyte(n) = vpperm_op[control >> 5](dest.xmmubyte(n));
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), dest);
BX_NEXT_INSTR(i);
}
#define XOP_SHIFT_ROTATE(HANDLER, func) \
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
{ \
BxPackedXmmRegister op1, op2; \
if (i->getVexW()) { \
op1 = BX_READ_XMM_REG(i->rm()); \
op2 = BX_READ_XMM_REG(i->vvv()); \
} \
else { \
op1 = BX_READ_XMM_REG(i->vvv()); \
op2 = BX_READ_XMM_REG(i->rm()); \
} \
\
(func)(&op1, &op2); \
\
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1); \
\
BX_NEXT_INSTR(i); \
}
XOP_SHIFT_ROTATE(VPSHAB_VdqWdqHdq, sse_pshab);
XOP_SHIFT_ROTATE(VPSHAW_VdqWdqHdq, sse_pshaw);
XOP_SHIFT_ROTATE(VPSHAD_VdqWdqHdq, sse_pshad);
XOP_SHIFT_ROTATE(VPSHAQ_VdqWdqHdq, sse_pshaq);
XOP_SHIFT_ROTATE(VPSHLB_VdqWdqHdq, sse_pshlb);
XOP_SHIFT_ROTATE(VPSHLW_VdqWdqHdq, sse_pshlw);
XOP_SHIFT_ROTATE(VPSHLD_VdqWdqHdq, sse_pshld);
XOP_SHIFT_ROTATE(VPSHLQ_VdqWdqHdq, sse_pshlq);
XOP_SHIFT_ROTATE(VPROTB_VdqWdqHdq, sse_protb);
XOP_SHIFT_ROTATE(VPROTW_VdqWdqHdq, sse_protw);
XOP_SHIFT_ROTATE(VPROTD_VdqWdqHdq, sse_protd);
XOP_SHIFT_ROTATE(VPROTQ_VdqWdqHdq, sse_protq);
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSSWW_VdqHdqWdqVIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv());
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm());
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->Ib());
for(unsigned n=0;n<8;n++) {
op1.xmm16s(n) = SaturateDwordSToWordS(((Bit32s) op1.xmm16s(n) * (Bit32s) op2.xmm16s(n)) + (Bit32s) op3.xmm16s(n));
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSSWD_VdqHdqWdqVIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv());
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm());
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->Ib());
op1.xmm32s(0) = SaturateQwordSToDwordS(((Bit32s) op1.xmm16s(1) * (Bit32s) op2.xmm16s(1)) + (Bit64s) op3.xmm32s(0));
op1.xmm32s(1) = SaturateQwordSToDwordS(((Bit32s) op1.xmm16s(3) * (Bit32s) op2.xmm16s(3)) + (Bit64s) op3.xmm32s(1));
op1.xmm32s(2) = SaturateQwordSToDwordS(((Bit32s) op1.xmm16s(5) * (Bit32s) op2.xmm16s(5)) + (Bit64s) op3.xmm32s(2));
op1.xmm32s(3) = SaturateQwordSToDwordS(((Bit32s) op1.xmm16s(7) * (Bit32s) op2.xmm16s(7)) + (Bit64s) op3.xmm32s(3));
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
#define GET_ADD_OVERFLOW(op1, op2, result, mask) \
((((op1) ^ (result)) & ((op2) ^ (result))) & (mask))
BX_CPP_INLINE Bit64s add_saturate64(Bit64s a, Bit64s b)
{
Bit64s r = a + b;
Bit64u overflow = GET_ADD_OVERFLOW(a, b, r, BX_CONST64(0x8000000000000000));
if (! overflow) return r;
// signed overflow detected, saturate
if (a > 0) overflow--;
return (Bit64s) overflow;
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSSDQL_VdqHdqWdqVIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv());
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm());
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->Ib());
Bit64s product1 = (Bit64s) op1.xmm32s(0) * (Bit64s) op2.xmm32s(0);
Bit64s product2 = (Bit64s) op1.xmm32s(2) * (Bit64s) op2.xmm32s(2);
op1.xmm64s(0) = add_saturate64(product1, op3.xmm64s(0));
op1.xmm64s(1) = add_saturate64(product2, op3.xmm64s(1));
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSSDD_VdqHdqWdqVIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv());
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm());
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->Ib());
for(unsigned n=0;n<4;n++) {
op1.xmm32s(n) = SaturateQwordSToDwordS(((Bit64s) op1.xmm32s(n) * (Bit64s) op2.xmm32s(n)) + (Bit64s) op3.xmm32s(n));
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSSDQH_VdqHdqWdqVIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv());
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm());
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->Ib());
Bit64s product1 = (Bit64s) op1.xmm32s(1) * (Bit64s) op2.xmm32s(1);
Bit64s product2 = (Bit64s) op1.xmm32s(3) * (Bit64s) op2.xmm32s(3);
op1.xmm64s(0) = add_saturate64(product1, op3.xmm64s(0));
op1.xmm64s(1) = add_saturate64(product2, op3.xmm64s(1));
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSWW_VdqHdqWdqVIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv());
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm());
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->Ib());
for(unsigned n=0;n<8;n++) {
op1.xmm16s(n) = ((Bit32s) op1.xmm16s(n) * (Bit32s) op2.xmm16s(n)) + (Bit32s) op3.xmm16s(n);
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSWD_VdqHdqWdqVIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv());
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm());
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->Ib());
op1.xmm32s(0) = ((Bit32s) op1.xmm16s(1) * (Bit32s) op2.xmm16s(1)) + (Bit64s) op3.xmm32s(0);
op1.xmm32s(1) = ((Bit32s) op1.xmm16s(3) * (Bit32s) op2.xmm16s(3)) + (Bit64s) op3.xmm32s(1);
op1.xmm32s(2) = ((Bit32s) op1.xmm16s(5) * (Bit32s) op2.xmm16s(5)) + (Bit64s) op3.xmm32s(2);
op1.xmm32s(3) = ((Bit32s) op1.xmm16s(7) * (Bit32s) op2.xmm16s(7)) + (Bit64s) op3.xmm32s(3);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSDQL_VdqHdqWdqVIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv());
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm());
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->Ib());
Bit64s product1 = (Bit64s) op1.xmm32s(0) * (Bit64s) op2.xmm32s(0);
Bit64s product2 = (Bit64s) op1.xmm32s(2) * (Bit64s) op2.xmm32s(2);
op1.xmm64s(0) = product1 + op3.xmm64s(0);
op1.xmm64s(1) = product2 + op3.xmm64s(1);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSDD_VdqHdqWdqVIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv());
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm());
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->Ib());
for(unsigned n=0;n<4;n++) {
op1.xmm32s(n) = ((Bit64s) op1.xmm32s(n) * (Bit64s) op2.xmm32s(n)) + (Bit64s) op3.xmm32s(n);
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSDQH_VdqHdqWdqVIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv());
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm());
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->Ib());
Bit64s product1 = (Bit64s) op1.xmm32s(1) * (Bit64s) op2.xmm32s(1);
Bit64s product2 = (Bit64s) op1.xmm32s(3) * (Bit64s) op2.xmm32s(3);
op1.xmm64s(0) = product1 + op3.xmm64s(0);
op1.xmm64s(1) = product2 + op3.xmm64s(1);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMADCSSWD_VdqHdqWdqVIbR(bxInstruction_c *i)
{
BX_PANIC(("VPMADCSSWD_VdqHdqWdqVIbR: not implemented yet"));
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMADCSWD_VdqHdqWdqVIbR(bxInstruction_c *i)
{
BX_PANIC(("VPMADCSWD_VdqHdqWdqVIbR: not implemented yet"));
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPROTB_VdqWdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
int count = i->Ib();
if (count > 0) {
// rotate left
sse_prolb(&op, count);
}
else if (count < 0) {
// rotate right
sse_prorb(&op, -count);
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPROTW_VdqWdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
int count = i->Ib();
if (count > 0) {
// rotate left
sse_prolw(&op, count);
}
else if (count < 0) {
// rotate right
sse_prorw(&op, -count);
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPROTD_VdqWdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
int count = i->Ib();
if (count > 0) {
// rotate left
sse_prold(&op, count);
}
else if (count < 0) {
// rotate right
sse_prord(&op, -count);
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPROTQ_VdqWdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
int count = i->Ib();
if (count > 0) {
// rotate left
sse_prolq(&op, count);
}
else if (count < 0) {
// rotate right
sse_prorq(&op, -count);
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMB_VdqHdqWdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv()), op2 = BX_READ_XMM_REG(i->rm());
compare8[i->Ib() & 7](&op1, &op2);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMW_VdqHdqWdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv()), op2 = BX_READ_XMM_REG(i->rm());
compare16[i->Ib() & 7](&op1, &op2);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMD_VdqHdqWdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv()), op2 = BX_READ_XMM_REG(i->rm());
compare32[i->Ib() & 7](&op1, &op2);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMQ_VdqHdqWdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv()), op2 = BX_READ_XMM_REG(i->rm());
compare64[i->Ib() & 7](&op1, &op2);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMUB_VdqHdqWdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv()), op2 = BX_READ_XMM_REG(i->rm());
compare8u[i->Ib() & 7](&op1, &op2);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMUW_VdqHdqWdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv()), op2 = BX_READ_XMM_REG(i->rm());
compare16u[i->Ib() & 7](&op1, &op2);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMUD_VdqHdqWdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv()), op2 = BX_READ_XMM_REG(i->rm());
compare32u[i->Ib() & 7](&op1, &op2);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMUQ_VdqHdqWdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv()), op2 = BX_READ_XMM_REG(i->rm());
compare64u[i->Ib() & 7](&op1, &op2);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFRCZPS_VpsWpsR(bxInstruction_c *i)
{
BX_PANIC(("VFRCZPS_VpsWpsR: not implemented yet"));
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFRCZPD_VpdWpdR(bxInstruction_c *i)
{
BX_PANIC(("VFRCZPD_VpdWpdR: not implemented yet"));
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFRCZSS_VssWssR(bxInstruction_c *i)
{
BX_PANIC(("VFRCZSS_VssWssR: not implemented yet"));
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFRCZSD_VsdWsdR(bxInstruction_c *i)
{
BX_PANIC(("VFRCZSD_VsdWsdR: not implemented yet"));
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDBW_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm16s(0) = (Bit16s) op.xmmsbyte(0x0) + (Bit16s) op.xmmsbyte(0x1);
op.xmm16s(1) = (Bit16s) op.xmmsbyte(0x2) + (Bit16s) op.xmmsbyte(0x3);
op.xmm16s(2) = (Bit16s) op.xmmsbyte(0x4) + (Bit16s) op.xmmsbyte(0x5);
op.xmm16s(3) = (Bit16s) op.xmmsbyte(0x6) + (Bit16s) op.xmmsbyte(0x7);
op.xmm16s(4) = (Bit16s) op.xmmsbyte(0x8) + (Bit16s) op.xmmsbyte(0x9);
op.xmm16s(5) = (Bit16s) op.xmmsbyte(0xA) + (Bit16s) op.xmmsbyte(0xB);
op.xmm16s(6) = (Bit16s) op.xmmsbyte(0xC) + (Bit16s) op.xmmsbyte(0xD);
op.xmm16s(7) = (Bit16s) op.xmmsbyte(0xE) + (Bit16s) op.xmmsbyte(0xF);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDBD_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm32s(0) = (Bit32s) op.xmmsbyte(0x0) + (Bit32s) op.xmmsbyte(0x1) +
(Bit32s) op.xmmsbyte(0x2) + (Bit32s) op.xmmsbyte(0x3);
op.xmm32s(1) = (Bit32s) op.xmmsbyte(0x4) + (Bit32s) op.xmmsbyte(0x5) +
(Bit32s) op.xmmsbyte(0x6) + (Bit32s) op.xmmsbyte(0x7);
op.xmm32s(2) = (Bit32s) op.xmmsbyte(0x8) + (Bit32s) op.xmmsbyte(0x9) +
(Bit32s) op.xmmsbyte(0xA) + (Bit32s) op.xmmsbyte(0xB);
op.xmm32s(3) = (Bit32s) op.xmmsbyte(0xC) + (Bit32s) op.xmmsbyte(0xD) +
(Bit32s) op.xmmsbyte(0xE) + (Bit32s) op.xmmsbyte(0xF);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDBQ_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm64s(0) = (Bit32s) op.xmmsbyte(0x0) + (Bit32s) op.xmmsbyte(0x1) +
(Bit32s) op.xmmsbyte(0x2) + (Bit32s) op.xmmsbyte(0x3) +
(Bit32s) op.xmmsbyte(0x4) + (Bit32s) op.xmmsbyte(0x5) +
(Bit32s) op.xmmsbyte(0x6) + (Bit32s) op.xmmsbyte(0x7);
op.xmm64s(1) = (Bit32s) op.xmmsbyte(0x8) + (Bit32s) op.xmmsbyte(0x9) +
(Bit32s) op.xmmsbyte(0xA) + (Bit32s) op.xmmsbyte(0xB) +
(Bit32s) op.xmmsbyte(0xC) + (Bit32s) op.xmmsbyte(0xD) +
(Bit32s) op.xmmsbyte(0xE) + (Bit32s) op.xmmsbyte(0xF);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDWD_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm32s(0) = (Bit32s) op.xmm16s(0) + (Bit32s) op.xmm16s(1);
op.xmm32s(1) = (Bit32s) op.xmm16s(2) + (Bit32s) op.xmm16s(3);
op.xmm32s(2) = (Bit32s) op.xmm16s(4) + (Bit32s) op.xmm16s(5);
op.xmm32s(3) = (Bit32s) op.xmm16s(6) + (Bit32s) op.xmm16s(7);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDWQ_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm64s(0) = (Bit32s) op.xmm16s(0) + (Bit32s) op.xmm16s(1) +
(Bit32s) op.xmm16s(2) + (Bit32s) op.xmm16s(3);
op.xmm64s(1) = (Bit32s) op.xmm16s(4) + (Bit32s) op.xmm16s(5) +
(Bit32s) op.xmm16s(6) + (Bit32s) op.xmm16s(7);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDDQ_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm64s(0) = (Bit64s) op.xmm32s(0) + (Bit64s) op.xmm32s(1);
op.xmm64s(1) = (Bit64s) op.xmm32s(2) + (Bit64s) op.xmm32s(3);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDUBW_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm16u(0) = (Bit16u) op.xmmubyte(0x0) + (Bit16u) op.xmmubyte(0x1);
op.xmm16u(1) = (Bit16u) op.xmmubyte(0x2) + (Bit16u) op.xmmubyte(0x3);
op.xmm16u(2) = (Bit16u) op.xmmubyte(0x4) + (Bit16u) op.xmmubyte(0x5);
op.xmm16u(3) = (Bit16u) op.xmmubyte(0x6) + (Bit16u) op.xmmubyte(0x7);
op.xmm16u(4) = (Bit16u) op.xmmubyte(0x8) + (Bit16u) op.xmmubyte(0x9);
op.xmm16u(5) = (Bit16u) op.xmmubyte(0xA) + (Bit16u) op.xmmubyte(0xB);
op.xmm16u(6) = (Bit16u) op.xmmubyte(0xC) + (Bit16u) op.xmmubyte(0xD);
op.xmm16u(7) = (Bit16u) op.xmmubyte(0xE) + (Bit16u) op.xmmubyte(0xF);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDUBD_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm32u(0) = (Bit32u) op.xmmubyte(0x0) + (Bit32s) op.xmmubyte(0x1) +
(Bit32u) op.xmmubyte(0x2) + (Bit32s) op.xmmubyte(0x3);
op.xmm32u(1) = (Bit32u) op.xmmubyte(0x4) + (Bit32s) op.xmmubyte(0x5) +
(Bit32u) op.xmmubyte(0x6) + (Bit32s) op.xmmubyte(0x7);
op.xmm32u(2) = (Bit32u) op.xmmubyte(0x8) + (Bit32s) op.xmmubyte(0x9) +
(Bit32u) op.xmmubyte(0xA) + (Bit32s) op.xmmubyte(0xB);
op.xmm32u(3) = (Bit32u) op.xmmubyte(0xC) + (Bit32s) op.xmmubyte(0xD) +
(Bit32u) op.xmmubyte(0xE) + (Bit32s) op.xmmubyte(0xF);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDUBQ_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm64u(0) = (Bit32u) op.xmmubyte(0x0) + (Bit32u) op.xmmubyte(0x1) +
(Bit32u) op.xmmubyte(0x2) + (Bit32u) op.xmmubyte(0x3) +
(Bit32u) op.xmmubyte(0x4) + (Bit32u) op.xmmubyte(0x5) +
(Bit32u) op.xmmubyte(0x6) + (Bit32u) op.xmmubyte(0x7);
op.xmm64u(1) = (Bit32u) op.xmmubyte(0x8) + (Bit32u) op.xmmubyte(0x9) +
(Bit32u) op.xmmubyte(0xA) + (Bit32u) op.xmmubyte(0xB) +
(Bit32u) op.xmmubyte(0xC) + (Bit32u) op.xmmubyte(0xD) +
(Bit32u) op.xmmubyte(0xE) + (Bit32u) op.xmmubyte(0xF);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDUWD_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm32u(0) = (Bit32u) op.xmm16u(0) + (Bit32u) op.xmm16u(1);
op.xmm32u(1) = (Bit32u) op.xmm16u(2) + (Bit32u) op.xmm16u(3);
op.xmm32u(2) = (Bit32u) op.xmm16u(4) + (Bit32u) op.xmm16u(5);
op.xmm32u(3) = (Bit32u) op.xmm16u(6) + (Bit32u) op.xmm16u(7);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDUWQ_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm64u(0) = (Bit32u) op.xmm16u(0) + (Bit32u) op.xmm16u(1) +
(Bit32u) op.xmm16u(2) + (Bit32u) op.xmm16u(3);
op.xmm64u(1) = (Bit32u) op.xmm16u(4) + (Bit32u) op.xmm16u(5) +
(Bit32u) op.xmm16u(6) + (Bit32u) op.xmm16u(7);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDUDQ_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm64u(0) = (Bit64u) op.xmm32u(0) + (Bit64u) op.xmm32u(1);
op.xmm64u(1) = (Bit64u) op.xmm32u(2) + (Bit64u) op.xmm32u(3);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHSUBBW_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm16s(0) = (Bit16s) op.xmmsbyte(0x0) - (Bit16s) op.xmmsbyte(0x1);
op.xmm16s(1) = (Bit16s) op.xmmsbyte(0x2) - (Bit16s) op.xmmsbyte(0x3);
op.xmm16s(2) = (Bit16s) op.xmmsbyte(0x4) - (Bit16s) op.xmmsbyte(0x5);
op.xmm16s(3) = (Bit16s) op.xmmsbyte(0x6) - (Bit16s) op.xmmsbyte(0x7);
op.xmm16s(4) = (Bit16s) op.xmmsbyte(0x8) - (Bit16s) op.xmmsbyte(0x9);
op.xmm16s(5) = (Bit16s) op.xmmsbyte(0xA) - (Bit16s) op.xmmsbyte(0xB);
op.xmm16s(6) = (Bit16s) op.xmmsbyte(0xC) - (Bit16s) op.xmmsbyte(0xD);
op.xmm16s(7) = (Bit16s) op.xmmsbyte(0xE) - (Bit16s) op.xmmsbyte(0xF);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHSUBWD_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm32s(0) = (Bit32s) op.xmm16s(0) - (Bit32s) op.xmm16s(1);
op.xmm32s(1) = (Bit32s) op.xmm16s(2) - (Bit32s) op.xmm16s(3);
op.xmm32s(2) = (Bit32s) op.xmm16s(4) - (Bit32s) op.xmm16s(5);
op.xmm32s(3) = (Bit32s) op.xmm16s(6) - (Bit32s) op.xmm16s(7);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHSUBDQ_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm64s(0) = (Bit64s) op.xmm32s(0) - (Bit64s) op.xmm32s(1);
op.xmm64s(1) = (Bit64s) op.xmm32s(2) - (Bit64s) op.xmm32s(3);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
#endif

View File

@ -75,6 +75,7 @@
#define IA_BMI2 (BX_CONST64(1) << 33) /* BMI2 instruction */
#define IA_FMA4 (BX_CONST64(1) << 34) /* FMA4 instruction (AMD) */
#define IA_XOP (BX_CONST64(1) << 35) /* XOP instruction (AMD) */
#define IA_TBM (BX_CONST64(1) << 36) /* TBM instruction (AMD) */
/* general purpose bit register */
enum {

View File

@ -3086,6 +3086,16 @@ This option exists only if Bochs compiled with --enable-avx option.
Select AMD four operand FMA instructions support.
This option exists only if Bochs compiled with --enable-avx option.
</para>
<para><command>xop</command></para>
<para>
Select AMD XOP instructions support.
This option exists only if Bochs compiled with --enable-avx option.
</para>
<para><command>tbm</command></para>
<para>
Select AMD TBM instructions support.
This option exists only if Bochs compiled with --enable-avx option.
</para>
<para><command>x86_64</command></para>
<para>
Enable x86-64 and long mode support.

View File

@ -263,6 +263,16 @@ fma4:
Select AMD four operand FMA instructions support.
This option exists only if Bochs compiled with --enable-avx option.
xop:
Select AMD XOP instructions support.
This option exists only if Bochs compiled with --enable-avx option.
tbm:
Select AMD TBM instructions support.
This option exists only if Bochs compiled with --enable-avx option.
x86_64:
Enable x85-64 and long mode support.

View File

@ -59,6 +59,8 @@
#define BXPN_CPUID_AVX_F16CVT "cpuid.avx_f16c"
#define BXPN_CPUID_AVX_FMA "cpuid.avx_fma"
#define BXPN_CPUID_BMI "cpuid.bmi"
#define BXPN_CPUID_XOP "cpuid.xop"
#define BXPN_CPUID_TBM "cpuid.tbm"
#define BXPN_CPUID_FMA4 "cpuid.fma4"
#define BXPN_CPUID_APIC "cpuid.apic"
#define BXPN_CPUID_MWAIT "cpuid.mwait"