mirror of https://github.com/bochs-emu/Bochs
AMX support (#212)
This commit is contained in:
parent
cfa7276cb9
commit
3a02e85599
|
@ -33,6 +33,7 @@ case $which_config in
|
|||
--enable-svm \
|
||||
--enable-avx \
|
||||
--enable-evex \
|
||||
--enable-amx \
|
||||
--enable-cet \
|
||||
--enable-pci \
|
||||
--enable-clgd54xx \
|
||||
|
|
|
@ -10,8 +10,8 @@ Brief summary :
|
|||
! Implemented Linear Address Separation (LASS) extension
|
||||
! Implemented 57-bit Linear Address and 5-Level Paging support
|
||||
! Implemented User-Level Interrupt (UINTR) extension
|
||||
! Implemented recently published Intel instruction sets:
|
||||
- MOVDIRI/MOVDIR64B, AVX512 BF16, AVX IFMA52, AVX-VNNI/VNNI-INT8/VNNI-INT16, AVX-NE-CONVERT, CMPCCXADD, SM3/SM4, SHA512, WRMSRNS, MSRLIST, WAITPKG, SERIALIZE
|
||||
! Implemented Intel instruction sets:
|
||||
- MOVDIRI/MOVDIR64B, AMX, AVX512 BF16, AVX IFMA52, AVX-VNNI/VNNI-INT8/VNNI-INT16, AVX-NE-CONVERT, CMPCCXADD, SM3/SM4, SHA512, WRMSRNS, MSRLIST, WAITPKG, SERIALIZE
|
||||
! CPUID: Added Xeon Sapphire Rapids CPU definition
|
||||
- Improved 64-bit guest support in Bochs internal debugger, added new internal debugger commands
|
||||
- Bochs debugger enhanced with new commands (setpmem, loadmem, deref, ...)
|
||||
|
@ -40,11 +40,11 @@ Detailed change log :
|
|||
- Implemented Linear Address Separation (LASS) extension
|
||||
- Implemented 57-bit Linear Address and 5-Level Paging support
|
||||
- Implemented User-Level Interrupt (UINTR) extension
|
||||
- Implemented recently published Intel instruction sets:
|
||||
- MOVDIRI/MOVDIR64B, AVX512 BF16, AVX IFMA52, AVX-VNNI/VNNI-INT8/VNNI-INT16, AVX-NE-CONVERT, CMPCCXADD, SM3/SM4, SHA512, WRMSRNS, MSRLIST, WAITPKG, SERIALIZE
|
||||
- Implemented Intel instruction sets:
|
||||
- MOVDIRI/MOVDIR64B, AMX, AVX512 BF16, AVX IFMA52, AVX-VNNI/VNNI-INT8/VNNI-INT16, AVX-NE-CONVERT, CMPCCXADD, SM3/SM4, SHA512, WRMSRNS, MSRLIST, WAITPKG, SERIALIZE
|
||||
- CPUID: Added Xeon Sapphire Rapids CPU definition
|
||||
- Features PKS, WAITPKG, UINTR, AVX-VNNI, AVX512_BF16, MOVDIRI/MOVDIR64, LA57, SERIALIZE and more
|
||||
Not yet supported but will be added in future: AVX512_FP16, AMX, VMX Extensions (HLAT, IPI Virtualization)
|
||||
Not yet supported but will be added in future: AVX512_FP16, VMX Extensions (HLAT, IPI Virtualization)
|
||||
|
||||
- Bochs Debugger and Instrumentation
|
||||
- Updated Bochs instrumentation examples for new disassembler introduced in Bochs 2.7 release.
|
||||
|
|
|
@ -215,6 +215,7 @@
|
|||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\cpu\avx\amx.cc" />
|
||||
<ClCompile Include="..\cpu\avx\avx.cc" />
|
||||
<ClCompile Include="..\cpu\avx\avx2.cc" />
|
||||
<ClCompile Include="..\cpu\avx\avx512.cc" />
|
||||
|
|
|
@ -208,6 +208,7 @@
|
|||
</Bscmake>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\cpu\avx\amx.cc" />
|
||||
<ClCompile Include="..\cpu\avx\avx.cc" />
|
||||
<ClCompile Include="..\cpu\avx\avx2.cc" />
|
||||
<ClCompile Include="..\cpu\avx\avx512.cc" />
|
||||
|
|
|
@ -873,6 +873,65 @@ void bx_dbg_print_avx_state(unsigned vlen)
|
|||
#endif
|
||||
}
|
||||
|
||||
void bx_dbg_print_amx_state(void)
|
||||
{
|
||||
#if BX_SUPPORT_AMX
|
||||
if (BX_CPU(dbg_cpu)->is_cpu_extension_supported(BX_ISA_AMX)) {
|
||||
char param_name[20];
|
||||
unsigned palette_id = SIM->get_param_num("AMX.palette", dbg_cpu_list)->get();
|
||||
unsigned start_row = SIM->get_param_num("AMX.start_row", dbg_cpu_list)->get();
|
||||
dbg_printf("TILECFG palette=%d, start_row=%d\n", palette_id, start_row);
|
||||
for(unsigned i=0;i<8;i++) {
|
||||
sprintf(param_name, "AMX.tile%d_rows", i);
|
||||
unsigned rows = SIM->get_param_num(param_name, dbg_cpu_list)->get64();
|
||||
sprintf(param_name, "AMX.tile%d_colsb", i);
|
||||
unsigned cols = SIM->get_param_num(param_name, dbg_cpu_list)->get64();
|
||||
dbg_printf("TILECFG[%d]: %2d x %2d\n", i, rows, cols);
|
||||
}
|
||||
dbg_printf("use \"tile <tile_number>\" command to print tile content\n");
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
dbg_printf("The CPU doesn't support AMX state !\n");
|
||||
}
|
||||
}
|
||||
|
||||
void bx_dbg_print_amx_tile_command(int tile)
|
||||
{
|
||||
#if BX_SUPPORT_AMX
|
||||
if (BX_CPU(dbg_cpu)->is_cpu_extension_supported(BX_ISA_AMX)) {
|
||||
if (tile < 8) {
|
||||
char param_name[30];
|
||||
sprintf(param_name, "AMX.tile%d_rows", tile);
|
||||
unsigned rows = SIM->get_param_num(param_name, dbg_cpu_list)->get64();
|
||||
sprintf(param_name, "AMX.tile%d_colsb", tile);
|
||||
unsigned cols = SIM->get_param_num(param_name, dbg_cpu_list)->get64();
|
||||
dbg_printf("TILE[%d]: %2d x %2d\n", tile, rows, cols);
|
||||
for (int row=0;row<16;row++) {
|
||||
dbg_printf("row[%02d]: ", row);
|
||||
for (int j=BX_VL512-1;j >= 0; j--) {
|
||||
sprintf(param_name, "AMX.tile%d_row%d_%d", tile, row, j*2+1);
|
||||
Bit64u hi = SIM->get_param_num(param_name, dbg_cpu_list)->get64();
|
||||
sprintf(param_name, "AMX.tile%d_row%d_%d", tile, row, j*2);
|
||||
Bit64u lo = SIM->get_param_num(param_name, dbg_cpu_list)->get64();
|
||||
dbg_printf("%08x_%08x_%08x_%08x", GET32H(hi), GET32L(hi), GET32H(lo), GET32L(lo));
|
||||
if (j!=0) dbg_printf("_");
|
||||
}
|
||||
dbg_printf("\n");
|
||||
}
|
||||
}
|
||||
else {
|
||||
dbg_printf("TILE[%d]: invalid or not configured\n", tile);
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
dbg_printf("The CPU doesn't support AMX state !\n");
|
||||
}
|
||||
}
|
||||
|
||||
void bx_dbg_print_mmx_state(void)
|
||||
{
|
||||
#if BX_CPU_LEVEL >= 5
|
||||
|
@ -1160,11 +1219,9 @@ void bx_dbg_info_registers_command(int which_regs_mask)
|
|||
bx_dbg_info_flags();
|
||||
}
|
||||
|
||||
#if BX_SUPPORT_FPU
|
||||
if (which_regs_mask & BX_INFO_FPU_REGS) {
|
||||
bx_dbg_print_fpu_state();
|
||||
}
|
||||
#endif
|
||||
|
||||
if (which_regs_mask & BX_INFO_MMX_REGS) {
|
||||
bx_dbg_print_mmx_state();
|
||||
|
@ -1189,6 +1246,10 @@ void bx_dbg_info_registers_command(int which_regs_mask)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (which_regs_mask & BX_INFO_AMX_REGS) {
|
||||
bx_dbg_print_amx_state();
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
|
|
|
@ -151,7 +151,9 @@ void bx_dbg_quit_command(void);
|
|||
#define BX_INFO_SSE_REGS 0x08
|
||||
#define BX_INFO_YMM_REGS 0x10
|
||||
#define BX_INFO_ZMM_REGS 0x20
|
||||
#define BX_INFO_AMX_REGS 0x40
|
||||
void bx_dbg_info_registers_command(int);
|
||||
void bx_dbg_print_amx_tile_command(int tile);
|
||||
void bx_dbg_info_ivt_command(unsigned from, unsigned to);
|
||||
void bx_dbg_info_idt_command(unsigned from, unsigned to);
|
||||
void bx_dbg_info_gdt_command(unsigned from, unsigned to);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -91,6 +91,8 @@ sse|xmm { bxlval.sval = strdup(bxtext); return(BX_TOKEN_XMM); }
|
|||
ymm { bxlval.sval = strdup(bxtext); return(BX_TOKEN_YMM); }
|
||||
zmm { bxlval.sval = strdup(bxtext); return(BX_TOKEN_ZMM); }
|
||||
avx { bxlval.sval = strdup(bxtext); return(BX_TOKEN_AVX); }
|
||||
amx { bxlval.sval = strdup(bxtext); return(BX_TOKEN_AMX); }
|
||||
tile { bxlval.sval = strdup(bxtext); return(BX_TOKEN_TILE); }
|
||||
mmx { bxlval.sval = strdup(bxtext); return(BX_TOKEN_MMX); }
|
||||
cpu { bxlval.sval = strdup(bxtext); return(BX_TOKEN_CPU); }
|
||||
idt { bxlval.sval = strdup(bxtext); return(BX_TOKEN_IDT); }
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -93,90 +93,92 @@ extern int bxdebug;
|
|||
BX_TOKEN_YMM = 294, /* BX_TOKEN_YMM */
|
||||
BX_TOKEN_ZMM = 295, /* BX_TOKEN_ZMM */
|
||||
BX_TOKEN_AVX = 296, /* BX_TOKEN_AVX */
|
||||
BX_TOKEN_IDT = 297, /* BX_TOKEN_IDT */
|
||||
BX_TOKEN_IVT = 298, /* BX_TOKEN_IVT */
|
||||
BX_TOKEN_GDT = 299, /* BX_TOKEN_GDT */
|
||||
BX_TOKEN_LDT = 300, /* BX_TOKEN_LDT */
|
||||
BX_TOKEN_TSS = 301, /* BX_TOKEN_TSS */
|
||||
BX_TOKEN_TAB = 302, /* BX_TOKEN_TAB */
|
||||
BX_TOKEN_ALL = 303, /* BX_TOKEN_ALL */
|
||||
BX_TOKEN_LINUX = 304, /* BX_TOKEN_LINUX */
|
||||
BX_TOKEN_DEBUG_REGS = 305, /* BX_TOKEN_DEBUG_REGS */
|
||||
BX_TOKEN_CONTROL_REGS = 306, /* BX_TOKEN_CONTROL_REGS */
|
||||
BX_TOKEN_SEGMENT_REGS = 307, /* BX_TOKEN_SEGMENT_REGS */
|
||||
BX_TOKEN_EXAMINE = 308, /* BX_TOKEN_EXAMINE */
|
||||
BX_TOKEN_XFORMAT = 309, /* BX_TOKEN_XFORMAT */
|
||||
BX_TOKEN_DISFORMAT = 310, /* BX_TOKEN_DISFORMAT */
|
||||
BX_TOKEN_RESTORE = 311, /* BX_TOKEN_RESTORE */
|
||||
BX_TOKEN_WRITEMEM = 312, /* BX_TOKEN_WRITEMEM */
|
||||
BX_TOKEN_LOADMEM = 313, /* BX_TOKEN_LOADMEM */
|
||||
BX_TOKEN_SETPMEM = 314, /* BX_TOKEN_SETPMEM */
|
||||
BX_TOKEN_DEREF = 315, /* BX_TOKEN_DEREF */
|
||||
BX_TOKEN_SYMBOLNAME = 316, /* BX_TOKEN_SYMBOLNAME */
|
||||
BX_TOKEN_QUERY = 317, /* BX_TOKEN_QUERY */
|
||||
BX_TOKEN_PENDING = 318, /* BX_TOKEN_PENDING */
|
||||
BX_TOKEN_TAKE = 319, /* BX_TOKEN_TAKE */
|
||||
BX_TOKEN_DMA = 320, /* BX_TOKEN_DMA */
|
||||
BX_TOKEN_IRQ = 321, /* BX_TOKEN_IRQ */
|
||||
BX_TOKEN_SMI = 322, /* BX_TOKEN_SMI */
|
||||
BX_TOKEN_NMI = 323, /* BX_TOKEN_NMI */
|
||||
BX_TOKEN_TLB = 324, /* BX_TOKEN_TLB */
|
||||
BX_TOKEN_DISASM = 325, /* BX_TOKEN_DISASM */
|
||||
BX_TOKEN_INSTRUMENT = 326, /* BX_TOKEN_INSTRUMENT */
|
||||
BX_TOKEN_STRING = 327, /* BX_TOKEN_STRING */
|
||||
BX_TOKEN_STOP = 328, /* BX_TOKEN_STOP */
|
||||
BX_TOKEN_DOIT = 329, /* BX_TOKEN_DOIT */
|
||||
BX_TOKEN_CRC = 330, /* BX_TOKEN_CRC */
|
||||
BX_TOKEN_TRACE = 331, /* BX_TOKEN_TRACE */
|
||||
BX_TOKEN_TRACEREG = 332, /* BX_TOKEN_TRACEREG */
|
||||
BX_TOKEN_TRACEMEM = 333, /* BX_TOKEN_TRACEMEM */
|
||||
BX_TOKEN_SWITCH_MODE = 334, /* BX_TOKEN_SWITCH_MODE */
|
||||
BX_TOKEN_SIZE = 335, /* BX_TOKEN_SIZE */
|
||||
BX_TOKEN_PTIME = 336, /* BX_TOKEN_PTIME */
|
||||
BX_TOKEN_TIMEBP_ABSOLUTE = 337, /* BX_TOKEN_TIMEBP_ABSOLUTE */
|
||||
BX_TOKEN_TIMEBP = 338, /* BX_TOKEN_TIMEBP */
|
||||
BX_TOKEN_MODEBP = 339, /* BX_TOKEN_MODEBP */
|
||||
BX_TOKEN_VMEXITBP = 340, /* BX_TOKEN_VMEXITBP */
|
||||
BX_TOKEN_PRINT_STACK = 341, /* BX_TOKEN_PRINT_STACK */
|
||||
BX_TOKEN_BT = 342, /* BX_TOKEN_BT */
|
||||
BX_TOKEN_WATCH = 343, /* BX_TOKEN_WATCH */
|
||||
BX_TOKEN_UNWATCH = 344, /* BX_TOKEN_UNWATCH */
|
||||
BX_TOKEN_READ = 345, /* BX_TOKEN_READ */
|
||||
BX_TOKEN_WRITE = 346, /* BX_TOKEN_WRITE */
|
||||
BX_TOKEN_SHOW = 347, /* BX_TOKEN_SHOW */
|
||||
BX_TOKEN_LOAD_SYMBOLS = 348, /* BX_TOKEN_LOAD_SYMBOLS */
|
||||
BX_TOKEN_SET_MAGIC_BREAK_POINTS = 349, /* BX_TOKEN_SET_MAGIC_BREAK_POINTS */
|
||||
BX_TOKEN_CLEAR_MAGIC_BREAK_POINTS = 350, /* BX_TOKEN_CLEAR_MAGIC_BREAK_POINTS */
|
||||
BX_TOKEN_SYMBOLS = 351, /* BX_TOKEN_SYMBOLS */
|
||||
BX_TOKEN_LIST_SYMBOLS = 352, /* BX_TOKEN_LIST_SYMBOLS */
|
||||
BX_TOKEN_GLOBAL = 353, /* BX_TOKEN_GLOBAL */
|
||||
BX_TOKEN_WHERE = 354, /* BX_TOKEN_WHERE */
|
||||
BX_TOKEN_PRINT_STRING = 355, /* BX_TOKEN_PRINT_STRING */
|
||||
BX_TOKEN_NUMERIC = 356, /* BX_TOKEN_NUMERIC */
|
||||
BX_TOKEN_PAGE = 357, /* BX_TOKEN_PAGE */
|
||||
BX_TOKEN_HELP = 358, /* BX_TOKEN_HELP */
|
||||
BX_TOKEN_XML = 359, /* BX_TOKEN_XML */
|
||||
BX_TOKEN_CALC = 360, /* BX_TOKEN_CALC */
|
||||
BX_TOKEN_ADDLYT = 361, /* BX_TOKEN_ADDLYT */
|
||||
BX_TOKEN_REMLYT = 362, /* BX_TOKEN_REMLYT */
|
||||
BX_TOKEN_LYT = 363, /* BX_TOKEN_LYT */
|
||||
BX_TOKEN_SOURCE = 364, /* BX_TOKEN_SOURCE */
|
||||
BX_TOKEN_DEVICE = 365, /* BX_TOKEN_DEVICE */
|
||||
BX_TOKEN_GENERIC = 366, /* BX_TOKEN_GENERIC */
|
||||
BX_TOKEN_DEREF_CHR = 367, /* BX_TOKEN_DEREF_CHR */
|
||||
BX_TOKEN_RSHIFT = 368, /* BX_TOKEN_RSHIFT */
|
||||
BX_TOKEN_LSHIFT = 369, /* BX_TOKEN_LSHIFT */
|
||||
BX_TOKEN_EQ = 370, /* BX_TOKEN_EQ */
|
||||
BX_TOKEN_NE = 371, /* BX_TOKEN_NE */
|
||||
BX_TOKEN_LE = 372, /* BX_TOKEN_LE */
|
||||
BX_TOKEN_GE = 373, /* BX_TOKEN_GE */
|
||||
BX_TOKEN_REG_IP = 374, /* BX_TOKEN_REG_IP */
|
||||
BX_TOKEN_REG_EIP = 375, /* BX_TOKEN_REG_EIP */
|
||||
BX_TOKEN_REG_RIP = 376, /* BX_TOKEN_REG_RIP */
|
||||
BX_TOKEN_REG_SSP = 377, /* BX_TOKEN_REG_SSP */
|
||||
NOT = 378, /* NOT */
|
||||
NEG = 379, /* NEG */
|
||||
INDIRECT = 380 /* INDIRECT */
|
||||
BX_TOKEN_AMX = 297, /* BX_TOKEN_AMX */
|
||||
BX_TOKEN_TILE = 298, /* BX_TOKEN_TILE */
|
||||
BX_TOKEN_IDT = 299, /* BX_TOKEN_IDT */
|
||||
BX_TOKEN_IVT = 300, /* BX_TOKEN_IVT */
|
||||
BX_TOKEN_GDT = 301, /* BX_TOKEN_GDT */
|
||||
BX_TOKEN_LDT = 302, /* BX_TOKEN_LDT */
|
||||
BX_TOKEN_TSS = 303, /* BX_TOKEN_TSS */
|
||||
BX_TOKEN_TAB = 304, /* BX_TOKEN_TAB */
|
||||
BX_TOKEN_ALL = 305, /* BX_TOKEN_ALL */
|
||||
BX_TOKEN_LINUX = 306, /* BX_TOKEN_LINUX */
|
||||
BX_TOKEN_DEBUG_REGS = 307, /* BX_TOKEN_DEBUG_REGS */
|
||||
BX_TOKEN_CONTROL_REGS = 308, /* BX_TOKEN_CONTROL_REGS */
|
||||
BX_TOKEN_SEGMENT_REGS = 309, /* BX_TOKEN_SEGMENT_REGS */
|
||||
BX_TOKEN_EXAMINE = 310, /* BX_TOKEN_EXAMINE */
|
||||
BX_TOKEN_XFORMAT = 311, /* BX_TOKEN_XFORMAT */
|
||||
BX_TOKEN_DISFORMAT = 312, /* BX_TOKEN_DISFORMAT */
|
||||
BX_TOKEN_RESTORE = 313, /* BX_TOKEN_RESTORE */
|
||||
BX_TOKEN_WRITEMEM = 314, /* BX_TOKEN_WRITEMEM */
|
||||
BX_TOKEN_LOADMEM = 315, /* BX_TOKEN_LOADMEM */
|
||||
BX_TOKEN_SETPMEM = 316, /* BX_TOKEN_SETPMEM */
|
||||
BX_TOKEN_DEREF = 317, /* BX_TOKEN_DEREF */
|
||||
BX_TOKEN_SYMBOLNAME = 318, /* BX_TOKEN_SYMBOLNAME */
|
||||
BX_TOKEN_QUERY = 319, /* BX_TOKEN_QUERY */
|
||||
BX_TOKEN_PENDING = 320, /* BX_TOKEN_PENDING */
|
||||
BX_TOKEN_TAKE = 321, /* BX_TOKEN_TAKE */
|
||||
BX_TOKEN_DMA = 322, /* BX_TOKEN_DMA */
|
||||
BX_TOKEN_IRQ = 323, /* BX_TOKEN_IRQ */
|
||||
BX_TOKEN_SMI = 324, /* BX_TOKEN_SMI */
|
||||
BX_TOKEN_NMI = 325, /* BX_TOKEN_NMI */
|
||||
BX_TOKEN_TLB = 326, /* BX_TOKEN_TLB */
|
||||
BX_TOKEN_DISASM = 327, /* BX_TOKEN_DISASM */
|
||||
BX_TOKEN_INSTRUMENT = 328, /* BX_TOKEN_INSTRUMENT */
|
||||
BX_TOKEN_STRING = 329, /* BX_TOKEN_STRING */
|
||||
BX_TOKEN_STOP = 330, /* BX_TOKEN_STOP */
|
||||
BX_TOKEN_DOIT = 331, /* BX_TOKEN_DOIT */
|
||||
BX_TOKEN_CRC = 332, /* BX_TOKEN_CRC */
|
||||
BX_TOKEN_TRACE = 333, /* BX_TOKEN_TRACE */
|
||||
BX_TOKEN_TRACEREG = 334, /* BX_TOKEN_TRACEREG */
|
||||
BX_TOKEN_TRACEMEM = 335, /* BX_TOKEN_TRACEMEM */
|
||||
BX_TOKEN_SWITCH_MODE = 336, /* BX_TOKEN_SWITCH_MODE */
|
||||
BX_TOKEN_SIZE = 337, /* BX_TOKEN_SIZE */
|
||||
BX_TOKEN_PTIME = 338, /* BX_TOKEN_PTIME */
|
||||
BX_TOKEN_TIMEBP_ABSOLUTE = 339, /* BX_TOKEN_TIMEBP_ABSOLUTE */
|
||||
BX_TOKEN_TIMEBP = 340, /* BX_TOKEN_TIMEBP */
|
||||
BX_TOKEN_MODEBP = 341, /* BX_TOKEN_MODEBP */
|
||||
BX_TOKEN_VMEXITBP = 342, /* BX_TOKEN_VMEXITBP */
|
||||
BX_TOKEN_PRINT_STACK = 343, /* BX_TOKEN_PRINT_STACK */
|
||||
BX_TOKEN_BT = 344, /* BX_TOKEN_BT */
|
||||
BX_TOKEN_WATCH = 345, /* BX_TOKEN_WATCH */
|
||||
BX_TOKEN_UNWATCH = 346, /* BX_TOKEN_UNWATCH */
|
||||
BX_TOKEN_READ = 347, /* BX_TOKEN_READ */
|
||||
BX_TOKEN_WRITE = 348, /* BX_TOKEN_WRITE */
|
||||
BX_TOKEN_SHOW = 349, /* BX_TOKEN_SHOW */
|
||||
BX_TOKEN_LOAD_SYMBOLS = 350, /* BX_TOKEN_LOAD_SYMBOLS */
|
||||
BX_TOKEN_SET_MAGIC_BREAK_POINTS = 351, /* BX_TOKEN_SET_MAGIC_BREAK_POINTS */
|
||||
BX_TOKEN_CLEAR_MAGIC_BREAK_POINTS = 352, /* BX_TOKEN_CLEAR_MAGIC_BREAK_POINTS */
|
||||
BX_TOKEN_SYMBOLS = 353, /* BX_TOKEN_SYMBOLS */
|
||||
BX_TOKEN_LIST_SYMBOLS = 354, /* BX_TOKEN_LIST_SYMBOLS */
|
||||
BX_TOKEN_GLOBAL = 355, /* BX_TOKEN_GLOBAL */
|
||||
BX_TOKEN_WHERE = 356, /* BX_TOKEN_WHERE */
|
||||
BX_TOKEN_PRINT_STRING = 357, /* BX_TOKEN_PRINT_STRING */
|
||||
BX_TOKEN_NUMERIC = 358, /* BX_TOKEN_NUMERIC */
|
||||
BX_TOKEN_PAGE = 359, /* BX_TOKEN_PAGE */
|
||||
BX_TOKEN_HELP = 360, /* BX_TOKEN_HELP */
|
||||
BX_TOKEN_XML = 361, /* BX_TOKEN_XML */
|
||||
BX_TOKEN_CALC = 362, /* BX_TOKEN_CALC */
|
||||
BX_TOKEN_ADDLYT = 363, /* BX_TOKEN_ADDLYT */
|
||||
BX_TOKEN_REMLYT = 364, /* BX_TOKEN_REMLYT */
|
||||
BX_TOKEN_LYT = 365, /* BX_TOKEN_LYT */
|
||||
BX_TOKEN_SOURCE = 366, /* BX_TOKEN_SOURCE */
|
||||
BX_TOKEN_DEVICE = 367, /* BX_TOKEN_DEVICE */
|
||||
BX_TOKEN_GENERIC = 368, /* BX_TOKEN_GENERIC */
|
||||
BX_TOKEN_DEREF_CHR = 369, /* BX_TOKEN_DEREF_CHR */
|
||||
BX_TOKEN_RSHIFT = 370, /* BX_TOKEN_RSHIFT */
|
||||
BX_TOKEN_LSHIFT = 371, /* BX_TOKEN_LSHIFT */
|
||||
BX_TOKEN_EQ = 372, /* BX_TOKEN_EQ */
|
||||
BX_TOKEN_NE = 373, /* BX_TOKEN_NE */
|
||||
BX_TOKEN_LE = 374, /* BX_TOKEN_LE */
|
||||
BX_TOKEN_GE = 375, /* BX_TOKEN_GE */
|
||||
BX_TOKEN_REG_IP = 376, /* BX_TOKEN_REG_IP */
|
||||
BX_TOKEN_REG_EIP = 377, /* BX_TOKEN_REG_EIP */
|
||||
BX_TOKEN_REG_RIP = 378, /* BX_TOKEN_REG_RIP */
|
||||
BX_TOKEN_REG_SSP = 379, /* BX_TOKEN_REG_SSP */
|
||||
NOT = 380, /* NOT */
|
||||
NEG = 381, /* NEG */
|
||||
INDIRECT = 382 /* INDIRECT */
|
||||
};
|
||||
typedef enum yytokentype yytoken_kind_t;
|
||||
#endif
|
||||
|
@ -224,90 +226,92 @@ extern int bxdebug;
|
|||
#define BX_TOKEN_YMM 294
|
||||
#define BX_TOKEN_ZMM 295
|
||||
#define BX_TOKEN_AVX 296
|
||||
#define BX_TOKEN_IDT 297
|
||||
#define BX_TOKEN_IVT 298
|
||||
#define BX_TOKEN_GDT 299
|
||||
#define BX_TOKEN_LDT 300
|
||||
#define BX_TOKEN_TSS 301
|
||||
#define BX_TOKEN_TAB 302
|
||||
#define BX_TOKEN_ALL 303
|
||||
#define BX_TOKEN_LINUX 304
|
||||
#define BX_TOKEN_DEBUG_REGS 305
|
||||
#define BX_TOKEN_CONTROL_REGS 306
|
||||
#define BX_TOKEN_SEGMENT_REGS 307
|
||||
#define BX_TOKEN_EXAMINE 308
|
||||
#define BX_TOKEN_XFORMAT 309
|
||||
#define BX_TOKEN_DISFORMAT 310
|
||||
#define BX_TOKEN_RESTORE 311
|
||||
#define BX_TOKEN_WRITEMEM 312
|
||||
#define BX_TOKEN_LOADMEM 313
|
||||
#define BX_TOKEN_SETPMEM 314
|
||||
#define BX_TOKEN_DEREF 315
|
||||
#define BX_TOKEN_SYMBOLNAME 316
|
||||
#define BX_TOKEN_QUERY 317
|
||||
#define BX_TOKEN_PENDING 318
|
||||
#define BX_TOKEN_TAKE 319
|
||||
#define BX_TOKEN_DMA 320
|
||||
#define BX_TOKEN_IRQ 321
|
||||
#define BX_TOKEN_SMI 322
|
||||
#define BX_TOKEN_NMI 323
|
||||
#define BX_TOKEN_TLB 324
|
||||
#define BX_TOKEN_DISASM 325
|
||||
#define BX_TOKEN_INSTRUMENT 326
|
||||
#define BX_TOKEN_STRING 327
|
||||
#define BX_TOKEN_STOP 328
|
||||
#define BX_TOKEN_DOIT 329
|
||||
#define BX_TOKEN_CRC 330
|
||||
#define BX_TOKEN_TRACE 331
|
||||
#define BX_TOKEN_TRACEREG 332
|
||||
#define BX_TOKEN_TRACEMEM 333
|
||||
#define BX_TOKEN_SWITCH_MODE 334
|
||||
#define BX_TOKEN_SIZE 335
|
||||
#define BX_TOKEN_PTIME 336
|
||||
#define BX_TOKEN_TIMEBP_ABSOLUTE 337
|
||||
#define BX_TOKEN_TIMEBP 338
|
||||
#define BX_TOKEN_MODEBP 339
|
||||
#define BX_TOKEN_VMEXITBP 340
|
||||
#define BX_TOKEN_PRINT_STACK 341
|
||||
#define BX_TOKEN_BT 342
|
||||
#define BX_TOKEN_WATCH 343
|
||||
#define BX_TOKEN_UNWATCH 344
|
||||
#define BX_TOKEN_READ 345
|
||||
#define BX_TOKEN_WRITE 346
|
||||
#define BX_TOKEN_SHOW 347
|
||||
#define BX_TOKEN_LOAD_SYMBOLS 348
|
||||
#define BX_TOKEN_SET_MAGIC_BREAK_POINTS 349
|
||||
#define BX_TOKEN_CLEAR_MAGIC_BREAK_POINTS 350
|
||||
#define BX_TOKEN_SYMBOLS 351
|
||||
#define BX_TOKEN_LIST_SYMBOLS 352
|
||||
#define BX_TOKEN_GLOBAL 353
|
||||
#define BX_TOKEN_WHERE 354
|
||||
#define BX_TOKEN_PRINT_STRING 355
|
||||
#define BX_TOKEN_NUMERIC 356
|
||||
#define BX_TOKEN_PAGE 357
|
||||
#define BX_TOKEN_HELP 358
|
||||
#define BX_TOKEN_XML 359
|
||||
#define BX_TOKEN_CALC 360
|
||||
#define BX_TOKEN_ADDLYT 361
|
||||
#define BX_TOKEN_REMLYT 362
|
||||
#define BX_TOKEN_LYT 363
|
||||
#define BX_TOKEN_SOURCE 364
|
||||
#define BX_TOKEN_DEVICE 365
|
||||
#define BX_TOKEN_GENERIC 366
|
||||
#define BX_TOKEN_DEREF_CHR 367
|
||||
#define BX_TOKEN_RSHIFT 368
|
||||
#define BX_TOKEN_LSHIFT 369
|
||||
#define BX_TOKEN_EQ 370
|
||||
#define BX_TOKEN_NE 371
|
||||
#define BX_TOKEN_LE 372
|
||||
#define BX_TOKEN_GE 373
|
||||
#define BX_TOKEN_REG_IP 374
|
||||
#define BX_TOKEN_REG_EIP 375
|
||||
#define BX_TOKEN_REG_RIP 376
|
||||
#define BX_TOKEN_REG_SSP 377
|
||||
#define NOT 378
|
||||
#define NEG 379
|
||||
#define INDIRECT 380
|
||||
#define BX_TOKEN_AMX 297
|
||||
#define BX_TOKEN_TILE 298
|
||||
#define BX_TOKEN_IDT 299
|
||||
#define BX_TOKEN_IVT 300
|
||||
#define BX_TOKEN_GDT 301
|
||||
#define BX_TOKEN_LDT 302
|
||||
#define BX_TOKEN_TSS 303
|
||||
#define BX_TOKEN_TAB 304
|
||||
#define BX_TOKEN_ALL 305
|
||||
#define BX_TOKEN_LINUX 306
|
||||
#define BX_TOKEN_DEBUG_REGS 307
|
||||
#define BX_TOKEN_CONTROL_REGS 308
|
||||
#define BX_TOKEN_SEGMENT_REGS 309
|
||||
#define BX_TOKEN_EXAMINE 310
|
||||
#define BX_TOKEN_XFORMAT 311
|
||||
#define BX_TOKEN_DISFORMAT 312
|
||||
#define BX_TOKEN_RESTORE 313
|
||||
#define BX_TOKEN_WRITEMEM 314
|
||||
#define BX_TOKEN_LOADMEM 315
|
||||
#define BX_TOKEN_SETPMEM 316
|
||||
#define BX_TOKEN_DEREF 317
|
||||
#define BX_TOKEN_SYMBOLNAME 318
|
||||
#define BX_TOKEN_QUERY 319
|
||||
#define BX_TOKEN_PENDING 320
|
||||
#define BX_TOKEN_TAKE 321
|
||||
#define BX_TOKEN_DMA 322
|
||||
#define BX_TOKEN_IRQ 323
|
||||
#define BX_TOKEN_SMI 324
|
||||
#define BX_TOKEN_NMI 325
|
||||
#define BX_TOKEN_TLB 326
|
||||
#define BX_TOKEN_DISASM 327
|
||||
#define BX_TOKEN_INSTRUMENT 328
|
||||
#define BX_TOKEN_STRING 329
|
||||
#define BX_TOKEN_STOP 330
|
||||
#define BX_TOKEN_DOIT 331
|
||||
#define BX_TOKEN_CRC 332
|
||||
#define BX_TOKEN_TRACE 333
|
||||
#define BX_TOKEN_TRACEREG 334
|
||||
#define BX_TOKEN_TRACEMEM 335
|
||||
#define BX_TOKEN_SWITCH_MODE 336
|
||||
#define BX_TOKEN_SIZE 337
|
||||
#define BX_TOKEN_PTIME 338
|
||||
#define BX_TOKEN_TIMEBP_ABSOLUTE 339
|
||||
#define BX_TOKEN_TIMEBP 340
|
||||
#define BX_TOKEN_MODEBP 341
|
||||
#define BX_TOKEN_VMEXITBP 342
|
||||
#define BX_TOKEN_PRINT_STACK 343
|
||||
#define BX_TOKEN_BT 344
|
||||
#define BX_TOKEN_WATCH 345
|
||||
#define BX_TOKEN_UNWATCH 346
|
||||
#define BX_TOKEN_READ 347
|
||||
#define BX_TOKEN_WRITE 348
|
||||
#define BX_TOKEN_SHOW 349
|
||||
#define BX_TOKEN_LOAD_SYMBOLS 350
|
||||
#define BX_TOKEN_SET_MAGIC_BREAK_POINTS 351
|
||||
#define BX_TOKEN_CLEAR_MAGIC_BREAK_POINTS 352
|
||||
#define BX_TOKEN_SYMBOLS 353
|
||||
#define BX_TOKEN_LIST_SYMBOLS 354
|
||||
#define BX_TOKEN_GLOBAL 355
|
||||
#define BX_TOKEN_WHERE 356
|
||||
#define BX_TOKEN_PRINT_STRING 357
|
||||
#define BX_TOKEN_NUMERIC 358
|
||||
#define BX_TOKEN_PAGE 359
|
||||
#define BX_TOKEN_HELP 360
|
||||
#define BX_TOKEN_XML 361
|
||||
#define BX_TOKEN_CALC 362
|
||||
#define BX_TOKEN_ADDLYT 363
|
||||
#define BX_TOKEN_REMLYT 364
|
||||
#define BX_TOKEN_LYT 365
|
||||
#define BX_TOKEN_SOURCE 366
|
||||
#define BX_TOKEN_DEVICE 367
|
||||
#define BX_TOKEN_GENERIC 368
|
||||
#define BX_TOKEN_DEREF_CHR 369
|
||||
#define BX_TOKEN_RSHIFT 370
|
||||
#define BX_TOKEN_LSHIFT 371
|
||||
#define BX_TOKEN_EQ 372
|
||||
#define BX_TOKEN_NE 373
|
||||
#define BX_TOKEN_LE 374
|
||||
#define BX_TOKEN_GE 375
|
||||
#define BX_TOKEN_REG_IP 376
|
||||
#define BX_TOKEN_REG_EIP 377
|
||||
#define BX_TOKEN_REG_RIP 378
|
||||
#define BX_TOKEN_REG_SSP 379
|
||||
#define NOT 380
|
||||
#define NEG 381
|
||||
#define INDIRECT 382
|
||||
|
||||
/* Value type. */
|
||||
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
|
||||
|
@ -319,7 +323,7 @@ union YYSTYPE
|
|||
Bit64u uval;
|
||||
unsigned bval;
|
||||
|
||||
#line 323 "y.tab.h"
|
||||
#line 327 "y.tab.h"
|
||||
|
||||
};
|
||||
typedef union YYSTYPE YYSTYPE;
|
||||
|
|
|
@ -61,6 +61,8 @@ Bit64u eval_value;
|
|||
%token <sval> BX_TOKEN_YMM
|
||||
%token <sval> BX_TOKEN_ZMM
|
||||
%token <sval> BX_TOKEN_AVX
|
||||
%token <sval> BX_TOKEN_AMX
|
||||
%token <sval> BX_TOKEN_TILE
|
||||
%token <sval> BX_TOKEN_IDT
|
||||
%token <sval> BX_TOKEN_IVT
|
||||
%token <sval> BX_TOKEN_GDT
|
||||
|
@ -172,6 +174,8 @@ command:
|
|||
| xmm_regs_command
|
||||
| ymm_regs_command
|
||||
| zmm_regs_command
|
||||
| amx_regs_command
|
||||
| print_tile_command
|
||||
| segment_regs_command
|
||||
| debug_regs_command
|
||||
| control_regs_command
|
||||
|
@ -823,6 +827,22 @@ zmm_regs_command:
|
|||
}
|
||||
;
|
||||
|
||||
amx_regs_command:
|
||||
BX_TOKEN_AMX '\n'
|
||||
{
|
||||
bx_dbg_info_registers_command(BX_INFO_AMX_REGS);
|
||||
free($1);
|
||||
}
|
||||
;
|
||||
|
||||
print_tile_command:
|
||||
BX_TOKEN_TILE BX_TOKEN_NUMERIC '\n'
|
||||
{
|
||||
bx_dbg_print_amx_tile_command($2);
|
||||
free($1);
|
||||
}
|
||||
;
|
||||
|
||||
segment_regs_command:
|
||||
BX_TOKEN_SEGMENT_REGS '\n'
|
||||
{
|
||||
|
@ -1233,6 +1253,11 @@ help_command:
|
|||
dbg_printf("zmm - print AVX-512 state\n");
|
||||
free($1);free($2);
|
||||
}
|
||||
| BX_TOKEN_HELP BX_TOKEN_AMX '\n'
|
||||
{
|
||||
dbg_printf("amx - print AMX state\n");
|
||||
free($1);free($2);
|
||||
}
|
||||
| BX_TOKEN_HELP BX_TOKEN_SEGMENT_REGS '\n'
|
||||
{
|
||||
dbg_printf("sreg - show segment registers\n");
|
||||
|
|
|
@ -629,6 +629,7 @@ typedef Bit32u bx_phy_address;
|
|||
#define BX_SUPPORT_VMX 0
|
||||
#define BX_SUPPORT_AVX 0
|
||||
#define BX_SUPPORT_EVEX 0
|
||||
#define BX_SUPPORT_AMX 0
|
||||
|
||||
#if BX_SUPPORT_UINTR && BX_SUPPORT_X86_64 == 0
|
||||
#error "UINTR require x86-64 support"
|
||||
|
@ -650,6 +651,10 @@ typedef Bit32u bx_phy_address;
|
|||
#error "EVEX and AVX-512 support require AVX to be compiled in"
|
||||
#endif
|
||||
|
||||
#if BX_SUPPORT_AMX && BX_SUPPORT_EVEX == 0
|
||||
#error "AMX require EVEX and AVX to be compiled in"
|
||||
#endif
|
||||
|
||||
#define BX_SUPPORT_REPEAT_SPEEDUPS 0
|
||||
#define BX_SUPPORT_HANDLERS_CHAINING_SPEEDUPS 0
|
||||
#define BX_ENABLE_TRACE_LINKING 0
|
||||
|
|
|
@ -1432,6 +1432,29 @@ if test "$support_evex" -gt 0 -a "$support_avx" = 0; then
|
|||
AC_MSG_ERROR([for EVEX and AVX-512 support AVX emulation must be compiled in!])
|
||||
fi
|
||||
|
||||
support_amx=0
|
||||
AC_MSG_CHECKING(for AMX extensions support)
|
||||
AC_ARG_ENABLE(amx,
|
||||
AS_HELP_STRING([--enable-amx], [support for AMX extensions (no)]),
|
||||
[if test "$enableval" = yes; then
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(BX_SUPPORT_AMX, 1)
|
||||
support_amx=1
|
||||
elif test "$enableval" = no; then
|
||||
AC_MSG_RESULT(no)
|
||||
AC_DEFINE(BX_SUPPORT_AMX, 0)
|
||||
fi
|
||||
],
|
||||
[
|
||||
AC_MSG_RESULT(no)
|
||||
AC_DEFINE(BX_SUPPORT_AMX, 0)
|
||||
]
|
||||
)
|
||||
|
||||
if test "$support_amx" -gt 0 -a "$support_evex" = 0; then
|
||||
AC_MSG_ERROR([for AMX support AVX and EVEX emulation must be compiled in!])
|
||||
fi
|
||||
|
||||
AC_MSG_CHECKING(for x86 debugger support)
|
||||
AC_ARG_ENABLE(x86-debugger,
|
||||
AS_HELP_STRING([--enable-x86-debugger], [x86 debugger support (no)]),
|
||||
|
|
|
@ -42,6 +42,7 @@ BX_INCDIRS = -I.. -I../.. -I$(srcdir)/.. -I$(srcdir)/../.. -I../../@INSTRUMENT_D
|
|||
|
||||
# Objects which are only used for AVX / EVEX code
|
||||
AVX_OBJS = \
|
||||
amx.o \
|
||||
avx.o \
|
||||
avx_pfp.o \
|
||||
avx_cvt.o \
|
||||
|
@ -94,6 +95,14 @@ dist-clean: clean
|
|||
# dependencies generated by
|
||||
# gcc -MM -I.. -I../.. -I../../instrument/stubs *.cc | sed 's/\.cc/.@CPP_SUFFIX@/g'
|
||||
###########################################
|
||||
amx.o: amx.@CPP_SUFFIX@ ../../bochs.h ../../config.h ../../osdep.h ../../logio.h \
|
||||
../../misc/bswap.h ../cpu.h ../../bx_debug/debug.h ../../config.h \
|
||||
../../osdep.h ../../cpu/decoder/decoder.h ../../cpu/decoder/features.h \
|
||||
../decoder/decoder.h ../../instrument/stubs/instrument.h ../i387.h \
|
||||
../fpu/softfloat.h ../fpu/tag_w.h ../fpu/status_w.h ../fpu/control_w.h \
|
||||
../crregs.h ../descriptor.h ../decoder/instr.h ../lazy_flags.h ../tlb.h \
|
||||
../icache.h ../xmm.h ../vmx.h ../svm.h ../cpuid.h ../stack.h \
|
||||
../access.h ../simd_int.h amx.h
|
||||
avx.o: avx.@CPP_SUFFIX@ ../../bochs.h ../../config.h ../../osdep.h ../../logio.h \
|
||||
../../misc/bswap.h ../cpu.h ../../bx_debug/debug.h ../../config.h \
|
||||
../../osdep.h ../../cpu/decoder/decoder.h ../../cpu/decoder/features.h \
|
||||
|
|
|
@ -0,0 +1,394 @@
|
|||
/////////////////////////////////////////////////////////////////////////
|
||||
// $Id$
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2024 Stanislav Shwartsman
|
||||
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
// License as published by the Free Software Foundation; either
|
||||
// version 2 of the License, or (at your option) any later version.
|
||||
//
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
// Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License along with this library; if not, write to the Free Software
|
||||
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define NEED_CPU_REG_SHORTCUTS 1
|
||||
#include "bochs.h"
|
||||
#include "cpu.h"
|
||||
#define LOG_THIS BX_CPU_THIS_PTR
|
||||
|
||||
#if BX_SUPPORT_AMX
|
||||
|
||||
#include "amx.h"
|
||||
|
||||
bool BX_CPP_AttrRegparmN(2) BX_CPU_C::configure_tiles(bxInstruction_c *i, const BxPackedAvxRegister &tilecfg)
|
||||
{
|
||||
Bit8u palette_id = tilecfg.vmmubyte(0);
|
||||
Bit8u start_row = tilecfg.vmmubyte(1);
|
||||
|
||||
if (palette_id == 0) {
|
||||
BX_CPU_THIS_PTR amx->clear();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (palette_id == 1) {
|
||||
if ((tilecfg.vmm64u(0) >> 16) != 0 || tilecfg.vmm64u(1) || tilecfg.vmm64u(4) || tilecfg.vmm64u(5) || tilecfg.vmm64u(7)) {
|
||||
BX_ERROR(("LDTILECFG: reserved bits set for palette_id=%d", palette_id));
|
||||
return false;
|
||||
}
|
||||
|
||||
AMX::TILECFG tile[8];
|
||||
|
||||
for (unsigned n=0; n < 8; n++) {
|
||||
tile[n].bytes_per_row = tilecfg.vmm16u(8+n);
|
||||
if (tile[n].bytes_per_row > 64) {
|
||||
BX_ERROR(("LDTILECFG: too many bytes_per_row for tile=%d in palette_id=%d", n, palette_id));
|
||||
return false;
|
||||
}
|
||||
tile[n].rows = tilecfg.vmmubyte(48+n);
|
||||
if (tile[n].rows > 16) {
|
||||
BX_ERROR(("LDTILECFG: too many rows for tile=%d in palette_id=%d", n, palette_id));
|
||||
return false;
|
||||
}
|
||||
if ((tile[n].rows == 0 && tile[n].bytes_per_row != 0) || (tile[n].rows != 0 && tile[n].bytes_per_row == 0)) {
|
||||
BX_ERROR(("LDTILECFG: invalid empty tile=%d in palette_id=%d", n, palette_id));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
BX_CPU_THIS_PTR amx->clear();
|
||||
BX_CPU_THIS_PTR amx->palette_id = 1;
|
||||
BX_CPU_THIS_PTR amx->start_row = start_row;
|
||||
for (unsigned n=0; n < 8; n++)
|
||||
BX_CPU_THIS_PTR amx->tilecfg[n] = tile[n];
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::LDTILECFG(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister tilecfg;
|
||||
Bit64u eaddr = BX_CPU_RESOLVE_ADDR_64(i);
|
||||
read_linear_zmmword(i->seg(), get_laddr64(i->seg(), eaddr), &tilecfg);
|
||||
|
||||
if (!configure_tiles(i, tilecfg))
|
||||
exception(BX_GP_EXCEPTION, 0);
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::STTILECFG(bxInstruction_c *i)
|
||||
{
|
||||
xsave_tilecfg_state(i, BX_CPU_RESOLVE_ADDR_64(i));
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::TILELOADD_TnnnMdq(bxInstruction_c *i)
|
||||
{
|
||||
if (i->sibIndex() == BX_NIL_REGISTER) {
|
||||
BX_ERROR(("%s: SIB byte required", i->getIaOpcodeNameShort()));
|
||||
exception(BX_UD_EXCEPTION, 0);
|
||||
}
|
||||
|
||||
unsigned tile = i->dst();
|
||||
|
||||
if (tile >= BX_TILE_REGISTERS || ! BX_CPU_THIS_PTR amx->tile_valid(tile)) {
|
||||
BX_ERROR(("%s: invalid tile %d", i->getIaOpcodeNameShort(), tile));
|
||||
exception(BX_UD_EXCEPTION, 0);
|
||||
}
|
||||
|
||||
unsigned rows = BX_CPU_THIS_PTR amx->tile_num_rows(tile);
|
||||
unsigned bytes_per_row = BX_CPU_THIS_PTR amx->tile_bytes_per_row(tile);
|
||||
|
||||
if ((bytes_per_row & 0x3) != 0) {
|
||||
BX_ERROR(("%s: invalid tile %d bytes_per_row=%d", i->getIaOpcodeNameShort(), tile, bytes_per_row));
|
||||
exception(BX_UD_EXCEPTION, 0);
|
||||
}
|
||||
|
||||
if (BX_CPU_THIS_PTR amx->start_row >= rows) {
|
||||
BX_ERROR(("%s: invalid (start_row=%d) >= (rows=%d)", i->getIaOpcodeNameShort(), tile, BX_CPU_THIS_PTR amx->start_row, rows));
|
||||
exception(BX_UD_EXCEPTION, 0);
|
||||
}
|
||||
|
||||
unsigned elements_per_row = bytes_per_row / 4;
|
||||
Bit32u mask = (elements_per_row < 16) ? (BX_CONST64(1) << elements_per_row) - 1 : BX_CONST64(0xFFFF);
|
||||
|
||||
BX_CPU_THIS_PTR amx->set_tile_used(tile);
|
||||
|
||||
BX_CPU_THIS_PTR amx->tile[tile].clear_upper_rows(BX_CPU_THIS_PTR amx->start_row);
|
||||
|
||||
Bit64u start_eaddr = BX_READ_64BIT_REG(i->sibBase()) + (Bit64s) i->displ32s();
|
||||
Bit64u stride = BX_READ_64BIT_REG(i->sibIndex()) << i->sibScale();
|
||||
|
||||
for (unsigned row=BX_CPU_THIS_PTR amx->start_row; row < rows; row++) {
|
||||
BxPackedAvxRegister *data = &(BX_CPU_THIS_PTR amx->tile[tile].row[row]);
|
||||
|
||||
Bit64u eaddr = start_eaddr + row * stride;
|
||||
if (bytes_per_row == 64) {
|
||||
read_linear_zmmword(i->seg(), get_laddr64(i->seg(), eaddr), data);
|
||||
}
|
||||
else {
|
||||
avx_masked_load32(i, eaddr, data, mask);
|
||||
|
||||
for (unsigned n=elements_per_row; n < 16; n++)
|
||||
data->vmm32u(n) = 0;
|
||||
}
|
||||
|
||||
BX_CPU_THIS_PTR amx->start_row++;
|
||||
}
|
||||
|
||||
BX_CPU_THIS_PTR amx->restart();
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::TILESTORED_MdqTnnn(bxInstruction_c *i)
|
||||
{
|
||||
if (i->sibIndex() == BX_NIL_REGISTER) {
|
||||
BX_ERROR(("%s: SIB byte required", i->getIaOpcodeNameShort()));
|
||||
exception(BX_UD_EXCEPTION, 0);
|
||||
}
|
||||
|
||||
unsigned tile = i->src();
|
||||
|
||||
if (tile >= BX_TILE_REGISTERS || ! BX_CPU_THIS_PTR amx->tile_valid(tile)) {
|
||||
BX_ERROR(("TILESTORED: invalid tile %d", tile));
|
||||
exception(BX_UD_EXCEPTION, 0);
|
||||
}
|
||||
|
||||
unsigned rows = BX_CPU_THIS_PTR amx->tile_num_rows(tile);
|
||||
unsigned bytes_per_row = BX_CPU_THIS_PTR amx->tile_bytes_per_row(tile);
|
||||
|
||||
if ((bytes_per_row & 0x3) != 0) {
|
||||
BX_ERROR(("TILESTORED: invalid tile %d bytes_per_row=%d", tile, bytes_per_row));
|
||||
exception(BX_UD_EXCEPTION, 0);
|
||||
}
|
||||
|
||||
if (BX_CPU_THIS_PTR amx->start_row >= rows) {
|
||||
BX_ERROR(("TILESTORED: invalid (start_row=%d) >= (rows=%d)", tile, BX_CPU_THIS_PTR amx->start_row, rows));
|
||||
exception(BX_UD_EXCEPTION, 0);
|
||||
}
|
||||
|
||||
unsigned elements_per_row = bytes_per_row / 4;
|
||||
Bit32u mask = (elements_per_row < 16) ? (BX_CONST64(1) << elements_per_row) - 1 : BX_CONST64(0xFFFF);
|
||||
|
||||
Bit64u start_eaddr = BX_READ_64BIT_REG(i->sibBase()) + (Bit64s) i->displ32s();
|
||||
Bit64u stride = BX_READ_64BIT_REG(i->sibIndex()) << i->sibScale();
|
||||
|
||||
for (unsigned row=BX_CPU_THIS_PTR amx->start_row; row < rows; row++) {
|
||||
BxPackedAvxRegister *data = &(BX_CPU_THIS_PTR amx->tile[tile].row[row]);
|
||||
|
||||
Bit64u eaddr = start_eaddr + row * stride;
|
||||
if (bytes_per_row == 64)
|
||||
write_linear_zmmword(i->seg(), get_laddr64(i->seg(), eaddr), data);
|
||||
else
|
||||
avx_masked_store32(i, eaddr, data, mask);
|
||||
|
||||
BX_CPU_THIS_PTR amx->start_row++;
|
||||
}
|
||||
|
||||
BX_CPU_THIS_PTR amx->restart();
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
void BX_CPU_C::check_tiles(bxInstruction_c *i, unsigned tile_dst, unsigned tile_src1, unsigned tile_src2)
|
||||
{
|
||||
// #UD if srcdest == src1 OR src1 == src2 OR srcdest == src2
|
||||
if (tile_dst == tile_src1 || tile_dst == tile_src2 || tile_src1 == tile_src2) {
|
||||
BX_ERROR(("%s: must use different tiles", i->getIaOpcodeNameShort()));
|
||||
exception(BX_UD_EXCEPTION, 0);
|
||||
}
|
||||
|
||||
// #UD if TILES_CONFIGURED == 0
|
||||
// #UD if srcdest/src1/src2 are not valid tiles
|
||||
// #UD if srcdest/src1/src2 are >= palette_table[tilecfg.palette_id].max_names
|
||||
if (tile_dst >= BX_TILE_REGISTERS || ! BX_CPU_THIS_PTR amx->tile_valid(tile_dst)) {
|
||||
BX_ERROR(("%s: invalid tile %d", i->getIaOpcodeNameShort(), tile_dst));
|
||||
exception(BX_UD_EXCEPTION, 0);
|
||||
}
|
||||
|
||||
if (tile_src1 >= BX_TILE_REGISTERS || ! BX_CPU_THIS_PTR amx->tile_valid(tile_src1)) {
|
||||
BX_ERROR(("%s: invalid tile %d", i->getIaOpcodeNameShort(), tile_src1));
|
||||
exception(BX_UD_EXCEPTION, 0);
|
||||
}
|
||||
|
||||
if (tile_src2 >= BX_TILE_REGISTERS || ! BX_CPU_THIS_PTR amx->tile_valid(tile_src2)) {
|
||||
BX_ERROR(("%s: invalid tile %d", i->getIaOpcodeNameShort(), tile_src2));
|
||||
exception(BX_UD_EXCEPTION, 0);
|
||||
}
|
||||
|
||||
unsigned rows[3];
|
||||
unsigned bytes_per_row[3];
|
||||
|
||||
rows[0] = BX_CPU_THIS_PTR amx->tile_num_rows(tile_dst);
|
||||
bytes_per_row[0] = BX_CPU_THIS_PTR amx->tile_bytes_per_row(tile_dst);
|
||||
rows[1] = BX_CPU_THIS_PTR amx->tile_num_rows(tile_src1);
|
||||
bytes_per_row[1] = BX_CPU_THIS_PTR amx->tile_bytes_per_row(tile_src1);
|
||||
rows[2] = BX_CPU_THIS_PTR amx->tile_num_rows(tile_src2);
|
||||
bytes_per_row[2] = BX_CPU_THIS_PTR amx->tile_bytes_per_row(tile_src2);
|
||||
|
||||
// #UD if srcdest.colbytes mod 4 != 0
|
||||
// #UD if src1.colbytes mod 4 != 0
|
||||
// #UD if src2.colbytes mod 4 != 0
|
||||
for (unsigned j=0; j<3; j++) {
|
||||
if ((bytes_per_row[j] & 0x3) != 0) {
|
||||
BX_ERROR(("%s: invalid tile bytes_per_row=%d", i->getIaOpcodeNameShort(), bytes_per_row[j]));
|
||||
exception(BX_UD_EXCEPTION, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// R C
|
||||
// A = m x k (tsrc1)
|
||||
// B = k x n (tsrc2)
|
||||
// C = m x n (tsrcdest)
|
||||
unsigned n = bytes_per_row[0] / 4;
|
||||
unsigned m = rows[1];
|
||||
unsigned k = rows[2];
|
||||
|
||||
// #UD if srcdest.colbytes != src2.colbytes (n)
|
||||
// #UD if srcdest.rows != src1.rows (m)
|
||||
// #UD if src1.colbytes / 4 != src2.rows (k)
|
||||
if (n != (bytes_per_row[2] / 4) || m != rows[0] || k != (bytes_per_row[1] / 4)) {
|
||||
BX_ERROR(("%s: invalid matmul tile dimenstions", i->getIaOpcodeNameShort()));
|
||||
exception(BX_UD_EXCEPTION, 0);
|
||||
}
|
||||
|
||||
// #UD if srcdest.colbytes > tmul_maxn
|
||||
// #UD if src2.colbytes > tmul_maxn
|
||||
// #UD if src1.colbytes/4 > tmul_maxk
|
||||
// #UD if src2.rows > tmul_maxk
|
||||
if (n > 16 || k > 16) {
|
||||
BX_ERROR(("%s: unsupported matmul tile dimenstions", i->getIaOpcodeNameShort()));
|
||||
exception(BX_UD_EXCEPTION, 0);
|
||||
}
|
||||
}
|
||||
|
||||
#include "cpu/simd_vnni.h"
|
||||
|
||||
#define HANDLE_AMX_INT8_3OP(HANDLER, func) \
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
|
||||
{ \
|
||||
unsigned tile_dst = i->dst(), tile_src1 = i->src1(), tile_src2 = i->src2(); \
|
||||
check_tiles(i, tile_dst, tile_src1, tile_src2); \
|
||||
\
|
||||
/* R C */ \
|
||||
/* A = m x k (tsrc1) */ \
|
||||
/* B = k x n (tsrc2) */ \
|
||||
/* C = m x n (tsrcdest) */ \
|
||||
unsigned max_n = BX_CPU_THIS_PTR amx->tile_bytes_per_row(tile_dst) / 4; \
|
||||
unsigned max_m = BX_CPU_THIS_PTR amx->tile_num_rows(tile_dst); \
|
||||
unsigned max_k = BX_CPU_THIS_PTR amx->tile_num_rows(tile_src2); \
|
||||
\
|
||||
AMX::TILE *tdst = &(BX_CPU_THIS_PTR amx->tile[tile_dst]); \
|
||||
AMX::TILE *tsrc1 = &(BX_CPU_THIS_PTR amx->tile[tile_src1]); \
|
||||
AMX::TILE *tsrc2 = &(BX_CPU_THIS_PTR amx->tile[tile_src2]); \
|
||||
\
|
||||
for (unsigned m=0; m < max_m; m++) { \
|
||||
BxPackedAvxRegister* tmp = &(tdst->row[m]); \
|
||||
for (unsigned k=0; k < max_k; k++) { \
|
||||
for (unsigned n=0; n < max_n; n+=4) { \
|
||||
(func)(&(tmp->vmm128(n/4)), \
|
||||
&(tsrc1->row[m].vmm128(n/4)), &(tsrc2->row[m].vmm128(n/4))); \
|
||||
} \
|
||||
} \
|
||||
tdst->zero_upper_row_data32(m, max_n); \
|
||||
} \
|
||||
\
|
||||
BX_CPU_THIS_PTR amx->set_tile_used(tile_dst); \
|
||||
BX_CPU_THIS_PTR amx->tile[tile_dst].clear_upper_rows(max_m); \
|
||||
BX_CPU_THIS_PTR amx->restart(); \
|
||||
BX_NEXT_INSTR(i); \
|
||||
}
|
||||
|
||||
HANDLE_AMX_INT8_3OP(TDPBSSD_TnnnTrmTreg, xmm_pdpbssd)
|
||||
HANDLE_AMX_INT8_3OP(TDPBSUD_TnnnTrmTreg, xmm_pdpbsud)
|
||||
HANDLE_AMX_INT8_3OP(TDPBUSD_TnnnTrmTreg, xmm_pdpbusd)
|
||||
HANDLE_AMX_INT8_3OP(TDPBUUD_TnnnTrmTreg, xmm_pdpbuud)
|
||||
|
||||
#include "bf16.h"
|
||||
|
||||
extern float_status_t prepare_ne_softfloat_status_helper();
|
||||
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::TDPBF16PS_TnnnTrmTreg(bxInstruction_c *i)
|
||||
{
|
||||
unsigned tile_dst = i->dst(), tile_src1 = i->src1(), tile_src2 = i->src2();
|
||||
check_tiles(i, tile_dst, tile_src1, tile_src2);
|
||||
|
||||
// R C
|
||||
// A = m x k (tsrc1)
|
||||
// B = k x n (tsrc2)
|
||||
// C = m x n (tsrcdest)
|
||||
unsigned max_n = BX_CPU_THIS_PTR amx->tile_bytes_per_row(tile_dst) / 4;
|
||||
unsigned max_m = BX_CPU_THIS_PTR amx->tile_num_rows(tile_dst);
|
||||
unsigned max_k = BX_CPU_THIS_PTR amx->tile_num_rows(tile_src2);
|
||||
|
||||
AMX::TILE *tdst = &(BX_CPU_THIS_PTR amx->tile[tile_dst]);
|
||||
AMX::TILE *tsrc1 = &(BX_CPU_THIS_PTR amx->tile[tile_src1]);
|
||||
AMX::TILE *tsrc2 = &(BX_CPU_THIS_PTR amx->tile[tile_src2]);
|
||||
|
||||
float_status_t status = prepare_ne_softfloat_status_helper();
|
||||
|
||||
for (unsigned m=0; m < max_m; m++) {
|
||||
float32 tmp[32]; // new empty array
|
||||
for (unsigned n=0; n < 32; n++) tmp[32] = 0;
|
||||
|
||||
for (unsigned k=0; k < max_k; k++) {
|
||||
for (unsigned n=0; n < max_n; n++) {
|
||||
tmp[2*n] = float32_fmadd(convert_bfloat16_to_fp32(tsrc1->row[m].vmm16u(2*k)),
|
||||
convert_bfloat16_to_fp32(tsrc2->row[k].vmm16u(2*n)), tmp[2*n], status);
|
||||
|
||||
tmp[2*n+1] = float32_fmadd(convert_bfloat16_to_fp32(tsrc1->row[m].vmm16u(2*k+1)),
|
||||
convert_bfloat16_to_fp32(tsrc2->row[k].vmm16u(2*n+1)), tmp[2*n+1], status);
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned n=0; n < max_n; n++) {
|
||||
float32 tmpf32 = float32_add(tmp[2*n], tmp[2*n+1], status);
|
||||
tdst->row[m].vmm32u(n) = float32_add(tdst->row[m].vmm32u(n), tmpf32, status);
|
||||
}
|
||||
|
||||
tdst->zero_upper_row_data32(m, max_n);
|
||||
}
|
||||
|
||||
BX_CPU_THIS_PTR amx->set_tile_used(tile_dst);
|
||||
BX_CPU_THIS_PTR amx->tile[tile_dst].clear_upper_rows(max_m);
|
||||
BX_CPU_THIS_PTR amx->restart();
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::TILEZERO_Tnnn(bxInstruction_c *i)
|
||||
{
|
||||
unsigned tile = i->dst();
|
||||
|
||||
if (tile >= BX_TILE_REGISTERS || ! BX_CPU_THIS_PTR amx->tile_valid(tile)) {
|
||||
BX_ERROR(("TILEZERO: invalid tile %d", tile));
|
||||
exception(BX_UD_EXCEPTION, 0);
|
||||
}
|
||||
|
||||
BX_CPU_THIS_PTR amx->clear_tile_used(tile);
|
||||
BX_CPU_THIS_PTR amx->tile[tile].clear();
|
||||
BX_CPU_THIS_PTR amx->restart();
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::TILERELEASE(bxInstruction_c *i)
|
||||
{
|
||||
BX_CPU_THIS_PTR amx->clear();
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
#endif // BX_SUPPORT_AMX
|
|
@ -0,0 +1,108 @@
|
|||
/////////////////////////////////////////////////////////////////////////
|
||||
// $Id$
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2024 Stanislav Shwartsman
|
||||
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
// License as published by the Free Software Foundation; either
|
||||
// version 2 of the License, or (at your option) any later version.
|
||||
//
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
// Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License along with this library; if not, write to the Free Software
|
||||
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef BX_AMX_EXTENSIONS_H
|
||||
#define BX_AMX_EXTENSIONS_H
|
||||
|
||||
#if BX_SUPPORT_AMX
|
||||
|
||||
#include "cpu/xmm.h"
|
||||
|
||||
#define BX_TILE_REGISTERS 8
|
||||
|
||||
struct AMX {
|
||||
AMX(): palette_id(0), start_row(0) {}
|
||||
|
||||
unsigned palette_id; // 0 if tiles are not configured
|
||||
unsigned start_row; // used to restart tile operations
|
||||
|
||||
struct TILECFG {
|
||||
unsigned rows, bytes_per_row;
|
||||
TILECFG() { clear(); }
|
||||
void clear() { rows = bytes_per_row = 0; }
|
||||
} tilecfg[BX_TILE_REGISTERS];
|
||||
|
||||
bool tiles_configured() const { return palette_id != 0; }
|
||||
|
||||
void clear_tilecfg() {
|
||||
for (int i=0;i<BX_TILE_REGISTERS;i++) tilecfg[i].clear();
|
||||
}
|
||||
|
||||
bool tile_valid(unsigned tile_num) const { return tilecfg[tile_num].rows != 0; }
|
||||
unsigned tile_num_rows(unsigned tile_num) const { return tilecfg[tile_num].rows; }
|
||||
unsigned tile_bytes_per_row(unsigned tile_num) const { return tilecfg[tile_num].bytes_per_row; }
|
||||
|
||||
bool is_tile_used(unsigned tile_num) const { return tile_use_tracker & (1 << tile_num); }
|
||||
void set_tile_used(unsigned tile_num) { tile_use_tracker |= (1 << tile_num); }
|
||||
void clear_tile_used(unsigned tile_num) { tile_use_tracker &= ~(1 << tile_num); }
|
||||
|
||||
void restart() { start_row = 0; }
|
||||
|
||||
struct TILE {
|
||||
#define BX_TILE_MAX_ROWS (16)
|
||||
bx_zmm_reg_t row[BX_TILE_MAX_ROWS];
|
||||
|
||||
TILE() { clear(); }
|
||||
|
||||
// clear upper part of a row (clears dwords limit..16)
|
||||
void zero_upper_row_data32(unsigned nrow, unsigned limit)
|
||||
{
|
||||
for (unsigned i=limit; i < 16; i++)
|
||||
row[nrow].vmm32u(i) = 0;
|
||||
}
|
||||
|
||||
// clear 0..nrows
|
||||
void clear_rows(unsigned nrows) {
|
||||
for (unsigned i=0; i < nrows; i++)
|
||||
row[i].clear();
|
||||
}
|
||||
|
||||
// clear nrows..MAX_ROWS
|
||||
void clear_upper_rows(unsigned nrows) {
|
||||
for (unsigned i=nrows; i < BX_TILE_MAX_ROWS; i++)
|
||||
row[i].clear();
|
||||
}
|
||||
|
||||
void clear() { clear_rows(BX_TILE_MAX_ROWS); }
|
||||
} tile[BX_TILE_REGISTERS] BX_CPP_AlignN(64);
|
||||
|
||||
unsigned tile_use_tracker;
|
||||
|
||||
void clear_tiles() {
|
||||
for (int i=0;i<BX_TILE_REGISTERS;i++) tile[i].clear();
|
||||
}
|
||||
|
||||
void clear() {
|
||||
palette_id = 0;
|
||||
start_row = 0;
|
||||
tile_use_tracker = 0;
|
||||
|
||||
clear_tiles();
|
||||
clear_tilecfg();
|
||||
}
|
||||
};
|
||||
|
||||
#endif // BX_SUPPORT_AMX
|
||||
|
||||
#endif
|
||||
|
|
@ -384,6 +384,7 @@ class BX_CPU_C;
|
|||
class BX_MEM_C;
|
||||
class bxInstruction_c;
|
||||
class bx_local_apic_c;
|
||||
class AMX;
|
||||
|
||||
// <TAG-TYPE-EXECUTEPTR-START>
|
||||
#if BX_USE_CPU_SMF
|
||||
|
@ -1056,6 +1057,10 @@ public: // for now...
|
|||
MSR *msrs[BX_MSR_MAX_INDEX];
|
||||
#endif
|
||||
|
||||
#if BX_SUPPORT_AMX
|
||||
AMX *amx;
|
||||
#endif
|
||||
|
||||
#if BX_SUPPORT_VMX
|
||||
bool in_vmx;
|
||||
bool in_vmx_guest;
|
||||
|
@ -1199,6 +1204,10 @@ public: // for now...
|
|||
BX_SMF void clear_evex_ok();
|
||||
BX_SMF bool get_evex_ok();
|
||||
|
||||
BX_SMF void set_amx_ok();
|
||||
BX_SMF void clear_amx_ok();
|
||||
BX_SMF bool get_amx_ok();
|
||||
|
||||
// for exceptions
|
||||
static jmp_buf jmp_buf_env;
|
||||
unsigned last_exception_type;
|
||||
|
@ -2477,10 +2486,10 @@ public: // for now...
|
|||
BX_SMF void BLENDPS_VpsWpsIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void BLENDPD_VpdWpdIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void PBLENDW_VdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void PEXTRB_EbdVdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void PEXTRB_EbdVdqIbM(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void PEXTRW_EwdVdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void PEXTRW_EwdVdqIbM(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void PEXTRB_EdVdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void PEXTRB_MbVdqIbM(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void PEXTRW_EdVdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void PEXTRW_MwVdqIbM(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void PEXTRD_EdVdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void PEXTRD_EdVdqIbM(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
#if BX_SUPPORT_X86_64
|
||||
|
@ -3580,6 +3589,20 @@ public: // for now...
|
|||
BX_SMF void VPSHRDVQ_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
#endif
|
||||
|
||||
#if BX_SUPPORT_AMX
|
||||
BX_SMF void LDTILECFG(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void STTILECFG(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void TILELOADD_TnnnMdq(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void TILESTORED_MdqTnnn(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void TDPBSSD_TnnnTrmTreg(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void TDPBSUD_TnnnTrmTreg(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void TDPBUSD_TnnnTrmTreg(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void TDPBUUD_TnnnTrmTreg(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void TDPBF16PS_TnnnTrmTreg(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void TILEZERO_Tnnn(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void TILERELEASE(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
|
||||
#endif
|
||||
|
||||
BX_SMF void LZCNT_GwEwR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void LZCNT_GdEdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
#if BX_SUPPORT_X86_64
|
||||
|
@ -4085,6 +4108,9 @@ public: // for now...
|
|||
BX_SMF void BxNoOpMask(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF void BxNoEVEX(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
#endif
|
||||
#if BX_SUPPORT_AMX
|
||||
BX_SMF void BxNoAMX(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
BX_CPP_INLINE BX_SMF Bit32u BxResolve32(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
@ -4481,6 +4507,11 @@ public: // for now...
|
|||
BX_SMF void avx512_write_regq_masked(bxInstruction_c *i, const BxPackedAvxRegister *op, unsigned vlen, Bit32u mask);
|
||||
#endif
|
||||
|
||||
#if BX_SUPPORT_AMX
|
||||
BX_SMF void check_tiles(bxInstruction_c *i, unsigned tile_dst, unsigned tile_src1, unsigned tile_src2);
|
||||
BX_SMF bool configure_tiles(bxInstruction_c *i, const BxPackedAvxRegister &tilecfg) BX_CPP_AttrRegparmN(2);
|
||||
#endif
|
||||
|
||||
#if BX_CPU_LEVEL >= 5
|
||||
BX_SMF bool rdmsr(Bit32u index, Bit64u *val_64) BX_CPP_AttrRegparmN(2);
|
||||
BX_SMF bool handle_unknown_rdmsr(Bit32u index, Bit64u *val_64) BX_CPP_AttrRegparmN(2);
|
||||
|
@ -4821,6 +4852,18 @@ public: // for now...
|
|||
BX_SMF void xrstor_uintr_state(bxInstruction_c *i, bx_address offset);
|
||||
BX_SMF void xrstor_init_uintr_state(void);
|
||||
#endif
|
||||
|
||||
#if BX_SUPPORT_AMX
|
||||
BX_SMF bool xsave_tilecfg_state_xinuse(void);
|
||||
BX_SMF void xsave_tilecfg_state(bxInstruction_c *i, bx_address offset);
|
||||
BX_SMF void xrstor_tilecfg_state(bxInstruction_c *i, bx_address offset);
|
||||
BX_SMF void xrstor_init_tilecfg_state(void);
|
||||
|
||||
BX_SMF bool xsave_tiledata_state_xinuse(void);
|
||||
BX_SMF void xsave_tiledata_state(bxInstruction_c *i, bx_address offset);
|
||||
BX_SMF void xrstor_tiledata_state(bxInstruction_c *i, bx_address offset);
|
||||
BX_SMF void xrstor_init_tiledata_state(void);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if BX_SUPPORT_CET
|
||||
|
@ -5056,6 +5099,7 @@ BX_CPP_INLINE Bit32u BX_CPP_AttrRegparmN(1) BX_CPU_C::BxResolve32(bxInstruction_
|
|||
// bit 4 - AVX_OK
|
||||
// bit 5 - OPMASK_OK
|
||||
// bit 6 - EVEX_OK
|
||||
// bit 7 - AMX_OK
|
||||
//
|
||||
|
||||
enum {
|
||||
|
@ -5065,7 +5109,8 @@ enum {
|
|||
BX_FETCH_MODE_SSE_OK = (1 << 3),
|
||||
BX_FETCH_MODE_AVX_OK = (1 << 4),
|
||||
BX_FETCH_MODE_OPMASK_OK = (1 << 5),
|
||||
BX_FETCH_MODE_EVEX_OK = (1 << 6)
|
||||
BX_FETCH_MODE_EVEX_OK = (1 << 6),
|
||||
BX_FETCH_MODE_AMX_OK = (1 << 7)
|
||||
};
|
||||
|
||||
BX_CPP_INLINE void BX_CPU_C::set_fpu_mmx_ok() { BX_CPU_THIS_PTR cpu_state_use_ok |= BX_FETCH_MODE_FPU_MMX_OK; }
|
||||
|
@ -5088,6 +5133,10 @@ BX_CPP_INLINE void BX_CPU_C::set_evex_ok() { BX_CPU_THIS_PTR cpu_state_use_ok |=
|
|||
BX_CPP_INLINE void BX_CPU_C::clear_evex_ok() { BX_CPU_THIS_PTR cpu_state_use_ok &= ~BX_FETCH_MODE_EVEX_OK; }
|
||||
BX_CPP_INLINE bool BX_CPU_C::get_evex_ok() { return (BX_CPU_THIS_PTR cpu_state_use_ok & BX_FETCH_MODE_EVEX_OK); }
|
||||
|
||||
BX_CPP_INLINE void BX_CPU_C::set_amx_ok() { BX_CPU_THIS_PTR cpu_state_use_ok |= BX_FETCH_MODE_AMX_OK; }
|
||||
BX_CPP_INLINE void BX_CPU_C::clear_amx_ok() { BX_CPU_THIS_PTR cpu_state_use_ok &= ~BX_FETCH_MODE_AMX_OK; }
|
||||
BX_CPP_INLINE bool BX_CPU_C::get_amx_ok() { return (BX_CPU_THIS_PTR cpu_state_use_ok & BX_FETCH_MODE_AMX_OK); }
|
||||
|
||||
//
|
||||
// updateFetchModeMask - has to be called everytime
|
||||
// CS.L / CS.D_B / CR0.PE, CR0.TS or CR0.EM / CR4.OSFXSR / CR4.OSXSAVE changes
|
||||
|
|
|
@ -123,6 +123,11 @@ sapphire_rapids_t::sapphire_rapids_t(BX_CPU_C *cpu):
|
|||
enable_cpu_extension(BX_ISA_AVX512_BITALG);
|
||||
enable_cpu_extension(BX_ISA_AVX512_VPOPCNTDQ);
|
||||
enable_cpu_extension(BX_ISA_AVX512_BF16);
|
||||
#endif
|
||||
#if BX_SUPPORT_AMX
|
||||
enable_cpu_extension(BX_ISA_AMX);
|
||||
enable_cpu_extension(BX_ISA_AMX_INT8);
|
||||
enable_cpu_extension(BX_ISA_AMX_BF16);
|
||||
#endif
|
||||
enable_cpu_extension(BX_ISA_CLFLUSHOPT);
|
||||
enable_cpu_extension(BX_ISA_CLWB);
|
||||
|
@ -250,10 +255,19 @@ void sapphire_rapids_t::get_cpuid_leaf(Bit32u function, Bit32u subfunction, cpui
|
|||
case 0x0000001A: // CPUID leaf 0x0000001A - native Model ID Enumeration leaf (for Hybrid)
|
||||
case 0x0000001B: // PCONFIG Information
|
||||
case 0x0000001C: // CPUID leaf 0x0000001C - Last Branch Record (Architectural LBR) leaf
|
||||
#if BX_SUPPORT_AMX
|
||||
case 0x0000001D: // AMX
|
||||
get_std_cpuid_amx_palette_info_leaf(subfunction, leaf);
|
||||
return;
|
||||
case 0x0000001E: // AMX: TMUL Information Main leaf
|
||||
get_std_cpuid_amx_tmul_leaf(subfunction, leaf);
|
||||
return;
|
||||
#else
|
||||
case 0x0000001D: // AMX
|
||||
case 0x0000001E: // AMX: TMUL Information Main leaf
|
||||
get_reserved_leaf(leaf);
|
||||
return;
|
||||
#endif
|
||||
case 0x0000001F: // V2 Extended Topology Enumberation leaf
|
||||
get_reserved_leaf(leaf); // until figured it out
|
||||
return;
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
// $Id$
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2014-2023 Stanislav Shwartsman
|
||||
// Copyright (c) 2014-2024 Stanislav Shwartsman
|
||||
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or
|
||||
|
@ -235,13 +235,69 @@ void bx_cpuid_t::get_std_cpuid_xsave_leaf(Bit32u subfunction, cpuid_function_t *
|
|||
if (support_mask & (1 << subfunction)) {
|
||||
leaf->eax = xsave_restore[subfunction].len;
|
||||
leaf->ebx = xsave_restore[subfunction].offset;
|
||||
leaf->ecx = (cpu->ia32_xss_suppmask & (1 << subfunction)) != 0; // managed through IA32_XSS register
|
||||
// ECX[0] - set if this component managed through IA32_XSS register
|
||||
// ECX[1] - set to indicate this component must be aligned to 64-byte
|
||||
// ECX[2] - XFD support for this component
|
||||
leaf->ecx = (cpu->ia32_xss_suppmask & (1 << subfunction)) != 0;
|
||||
leaf->edx = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if BX_SUPPORT_AMX
|
||||
void bx_cpuid_t::get_std_cpuid_amx_palette_info_leaf(Bit32u subfunction, cpuid_function_t *leaf) const
|
||||
{
|
||||
leaf->eax = 0;
|
||||
leaf->ebx = 0;
|
||||
leaf->ecx = 0;
|
||||
leaf->edx = 0;
|
||||
|
||||
if (!is_cpu_extension_supported(BX_ISA_AMX))
|
||||
return;
|
||||
|
||||
if (subfunction == 0) {
|
||||
leaf->eax = 1; // max palette_id
|
||||
leaf->ebx = 0;
|
||||
leaf->ecx = 0;
|
||||
leaf->edx = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
// information about palette #1
|
||||
if (subfunction == 1) {
|
||||
// EAX[15:00] : Palette #1 total tile bytes = 8192
|
||||
// EAX[31:16] : Palette #1 bytes per tile = 1024
|
||||
leaf->eax = 8192 | (1024<<16);
|
||||
// EBX[15:00] : Palette #1 bytes_per_row = 64
|
||||
// EBX[31:16] : Palette #1 number of tiles = 8
|
||||
leaf->ebx = 64 | (8<<16);
|
||||
// ECX[15:00] : Palette #1 max_rows = 16
|
||||
// ECX[31:16] : Reserved
|
||||
leaf->ecx = 16;
|
||||
// EdX[31:00] : Reserved
|
||||
leaf->edx = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void bx_cpuid_t::get_std_cpuid_amx_tmul_leaf(Bit32u subfunction, cpuid_function_t *leaf) const
|
||||
{
|
||||
leaf->eax = 0;
|
||||
leaf->ebx = 0;
|
||||
leaf->ecx = 0;
|
||||
leaf->edx = 0;
|
||||
|
||||
if (!is_cpu_extension_supported(BX_ISA_AMX))
|
||||
return;
|
||||
|
||||
// EBX[07:00] = 16 TMUL_MAX_K (rows or columns)
|
||||
// EBX[23:08] = 64 TMUL_MAX_N (column bytes)
|
||||
// EBX[31:24] reserved
|
||||
leaf->ebx = 16 | (64<<8);
|
||||
}
|
||||
#endif
|
||||
|
||||
void bx_cpuid_t::get_leaf_0(unsigned max_leaf, const char *vendor_string, cpuid_function_t *leaf, unsigned limited_max_leaf) const
|
||||
{
|
||||
// EAX: highest function understood by CPUID
|
||||
|
@ -1023,10 +1079,28 @@ Bit32u bx_cpuid_t::get_std_cpuid_leaf_7_edx(Bit32u extra) const
|
|||
#endif
|
||||
|
||||
// [21:21] reserved
|
||||
|
||||
// [22:22] AMX BF16 support
|
||||
#if BX_SUPPORT_AMX
|
||||
if (is_cpu_extension_supported(BX_ISA_AMX)) {
|
||||
if (is_cpu_extension_supported(BX_ISA_AMX_BF16))
|
||||
edx |= BX_CPUID_STD7_SUBLEAF0_EDX_AMX_BF16;
|
||||
}
|
||||
#endif
|
||||
|
||||
// [23:23] AVX512_FP16 instructions support
|
||||
|
||||
#if BX_SUPPORT_AMX
|
||||
// [24:24] AMX TILE architecture support
|
||||
if (is_cpu_extension_supported(BX_ISA_AMX)) {
|
||||
edx |= BX_CPUID_STD7_SUBLEAF0_EDX_AMX_TILE;
|
||||
|
||||
// [25:25] AMX INT8 support
|
||||
if (is_cpu_extension_supported(BX_ISA_AMX_INT8))
|
||||
edx |= BX_CPUID_STD7_SUBLEAF0_EDX_AMX_INT8;
|
||||
}
|
||||
#endif
|
||||
|
||||
// * [26:26] IBRS and IBPB: Indirect branch restricted speculation (SCA)
|
||||
// * [27:27] STIBP: Single Thread Indirect Branch Predictors supported (SCA)
|
||||
// * [28:28] L1D_FLUSH supported (SCA)
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
// $Id$
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2010-2023 Stanislav Shwartsman
|
||||
// Copyright (c) 2010-2024 Stanislav Shwartsman
|
||||
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or
|
||||
|
@ -113,6 +113,11 @@ protected:
|
|||
void get_std_cpuid_xsave_leaf(Bit32u subfunction, cpuid_function_t *leaf) const;
|
||||
#endif
|
||||
|
||||
#if BX_SUPPORT_AMX
|
||||
void get_std_cpuid_amx_palette_info_leaf(Bit32u subfunction, cpuid_function_t *leaf) const;
|
||||
void get_std_cpuid_amx_tmul_leaf(Bit32u subfunction, cpuid_function_t *leaf) const;
|
||||
#endif
|
||||
|
||||
Bit32u get_std_cpuid_leaf_1_ecx(Bit32u extra = 0) const;
|
||||
Bit32u get_std_cpuid_leaf_1_edx_common(Bit32u extra = 0) const;
|
||||
Bit32u get_std_cpuid_leaf_1_edx(Bit32u extra = 0) const;
|
||||
|
|
|
@ -1823,8 +1823,28 @@ void BX_CPU_C::xsave_xrestor_init(void)
|
|||
|
||||
// XCR0[15]: LBR state (not implemented)
|
||||
// XCR0[16]: HWP state (not implemented)
|
||||
// XCR0[17]: AMX XTILECFG state (not implemented)
|
||||
// XCR0[17]: AMX XTILEDATA state (not implemented)
|
||||
|
||||
// XCR0[17]: AMX XTILECFG state
|
||||
// XCR0[18]: AMX XTILEDATA state
|
||||
#if BX_SUPPORT_AMX
|
||||
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_AMX)) {
|
||||
// XCR0[17]: AMX XTILECFG state
|
||||
xsave_restore[xcr0_t::BX_XCR0_XTILECFG_BIT].len = XSAVE_XTILECFG_STATE_LEN;
|
||||
xsave_restore[xcr0_t::BX_XCR0_XTILECFG_BIT].offset = XSAVE_XTILECFG_STATE_OFFSET;
|
||||
xsave_restore[xcr0_t::BX_XCR0_XTILECFG_BIT].xstate_in_use_method = &BX_CPU_C::xsave_tilecfg_state_xinuse;
|
||||
xsave_restore[xcr0_t::BX_XCR0_XTILECFG_BIT].xsave_method = &BX_CPU_C::xsave_tilecfg_state;
|
||||
xsave_restore[xcr0_t::BX_XCR0_XTILECFG_BIT].xrstor_method = &BX_CPU_C::xrstor_tilecfg_state;
|
||||
xsave_restore[xcr0_t::BX_XCR0_XTILECFG_BIT].xrstor_init_method = &BX_CPU_C::xrstor_init_tilecfg_state;
|
||||
|
||||
// XCR0[18]: AMX XTILEDATA state
|
||||
xsave_restore[xcr0_t::BX_XCR0_XTILEDATA_BIT].len = XSAVE_XTILEDATA_STATE_LEN;
|
||||
xsave_restore[xcr0_t::BX_XCR0_XTILEDATA_BIT].offset = XSAVE_XTILEDATA_STATE_OFFSET;
|
||||
xsave_restore[xcr0_t::BX_XCR0_XTILEDATA_BIT].xstate_in_use_method = &BX_CPU_C::xsave_tiledata_state_xinuse;
|
||||
xsave_restore[xcr0_t::BX_XCR0_XTILEDATA_BIT].xsave_method = &BX_CPU_C::xsave_tiledata_state;
|
||||
xsave_restore[xcr0_t::BX_XCR0_XTILEDATA_BIT].xrstor_method = &BX_CPU_C::xrstor_tiledata_state;
|
||||
xsave_restore[xcr0_t::BX_XCR0_XTILEDATA_BIT].xrstor_init_method = &BX_CPU_C::xrstor_init_tiledata_state;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if BX_CPU_LEVEL >= 5
|
||||
|
@ -1870,6 +1890,10 @@ Bit32u BX_CPU_C::get_xcr0_allow_mask(void)
|
|||
#if BX_SUPPORT_PKEYS
|
||||
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_PKU))
|
||||
allowMask |= BX_XCR0_PKRU_MASK;
|
||||
#endif
|
||||
#if BX_SUPPORT_AMX
|
||||
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_AMX))
|
||||
allowMask |= BX_XCR0_XTILE_BITS_MASK;
|
||||
#endif
|
||||
return allowMask;
|
||||
}
|
||||
|
|
|
@ -274,6 +274,8 @@ const unsigned XSAVE_OPMASK_STATE_OFFSET = 1088;
|
|||
const unsigned XSAVE_ZMM_HI256_STATE_OFFSET = 1152;
|
||||
const unsigned XSAVE_HI_ZMM_STATE_OFFSET = 1664;
|
||||
const unsigned XSAVE_PKRU_STATE_OFFSET = 2688;
|
||||
const unsigned XSAVE_XTILECFG_STATE_OFFSET = 2752;
|
||||
const unsigned XSAVE_XTILEDATA_STATE_OFFSET = 2816;
|
||||
|
||||
struct xcr0_t {
|
||||
Bit32u val32; // 32bit value of register
|
||||
|
@ -296,8 +298,8 @@ struct xcr0_t {
|
|||
BX_XCR0_UINTR_BIT = 14,
|
||||
BX_XCR0_LBR_BIT = 15, // not implemented yet
|
||||
BX_XCR0_HWP_BIT = 16, // not implemented yet
|
||||
BX_XCR0_XTILECFG_BIT = 17, // not implemented yet
|
||||
BX_XCR0_XTILEDATA_BIT = 18, // not implemented yet
|
||||
BX_XCR0_XTILECFG_BIT = 17,
|
||||
BX_XCR0_XTILEDATA_BIT = 18,
|
||||
BX_XCR0_LAST // make sure it is < 32
|
||||
};
|
||||
|
||||
|
@ -321,6 +323,8 @@ struct xcr0_t {
|
|||
#define BX_XCR0_XTILECFG_MASK (1 << xcr0_t::BX_XCR0_XTILECFG_BIT)
|
||||
#define BX_XCR0_XTILEDATA_MASK (1 << xcr0_t::BX_XCR0_XTILEDATA_BIT)
|
||||
|
||||
#define BX_XCR0_XTILE_BITS_MASK (BX_XCR0_XTILECFG_MASK | BX_XCR0_XTILEDATA_MASK)
|
||||
|
||||
IMPLEMENT_CRREG_ACCESSORS(FPU, BX_XCR0_FPU_BIT);
|
||||
IMPLEMENT_CRREG_ACCESSORS(SSE, BX_XCR0_SSE_BIT);
|
||||
IMPLEMENT_CRREG_ACCESSORS(YMM, BX_XCR0_YMM_BIT);
|
||||
|
|
|
@ -293,12 +293,10 @@ char *resolve_memsize(char *disbufptr, const bxInstruction_c *i, unsigned src_in
|
|||
else if (src_index == BX_SRC_RM) {
|
||||
switch(src_type) {
|
||||
case BX_GPR8:
|
||||
case BX_GPR32_MEM8: // 8-bit memory ref but 32-bit GPR
|
||||
disbufptr = dis_sprintf(disbufptr, "byte ptr ");
|
||||
break;
|
||||
|
||||
case BX_GPR16:
|
||||
case BX_GPR32_MEM16: // 16-bit memory ref but 32-bit GPR
|
||||
case BX_SEGREG:
|
||||
disbufptr = dis_sprintf(disbufptr, "word ptr ");
|
||||
break;
|
||||
|
@ -329,6 +327,9 @@ char *resolve_memsize(char *disbufptr, const bxInstruction_c *i, unsigned src_in
|
|||
disbufptr = dis_sprintf(disbufptr, "xmmword ptr ");
|
||||
break;
|
||||
|
||||
case BX_TMM_REG:
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -401,8 +402,6 @@ char *disasm_regref(char *disbufptr, const bxInstruction_c *i, unsigned src_num,
|
|||
break;
|
||||
|
||||
case BX_GPR32:
|
||||
case BX_GPR32_MEM8: // 8-bit memory ref but 32-bit GPR
|
||||
case BX_GPR32_MEM16: // 16-bit memory ref but 32-bit GPR
|
||||
disbufptr = dis_sprintf(disbufptr, "%s", general_32bit_regname[srcreg]);
|
||||
break;
|
||||
|
||||
|
@ -460,6 +459,10 @@ char *disasm_regref(char *disbufptr, const bxInstruction_c *i, unsigned src_num,
|
|||
break;
|
||||
#endif
|
||||
|
||||
case BX_TMM_REG:
|
||||
disbufptr = dis_sprintf(disbufptr, "tmm%d", srcreg);
|
||||
break;
|
||||
|
||||
case BX_SEGREG:
|
||||
disbufptr = dis_sprintf(disbufptr, "%s", segment_name[srcreg]);
|
||||
break;
|
||||
|
|
|
@ -115,6 +115,11 @@ x86_feature(BX_ISA_AVX_VNNI, "avx_vnni") /* AVX e
|
|||
x86_feature(BX_ISA_AVX_VNNI_INT8, "avx_vnni_int8") /* AVX encoded VNNI-INT8 Instructions */
|
||||
x86_feature(BX_ISA_AVX_VNNI_INT16, "avx_vnni_int16") /* AVX encoded VNNI-INT16 Instructions */
|
||||
x86_feature(BX_ISA_AVX_NE_CONVERT, "avx_ne_convert") /* AVX-NE-CONVERT Instructions */
|
||||
#if BX_SUPPORT_AMX
|
||||
x86_feature(BX_ISA_AMX, "amx") /* AMX Instructions */
|
||||
x86_feature(BX_ISA_AMX_INT8, "amx_int8") /* AMX-INT8 Instructions */
|
||||
x86_feature(BX_ISA_AMX_BF16, "amx_bf16") /* AMX-BF16 Instructions */
|
||||
#endif
|
||||
#endif
|
||||
x86_feature(BX_ISA_XAPIC, "xapic") /* XAPIC support */
|
||||
x86_feature(BX_ISA_X2APIC, "x2apic") /* X2APIC support */
|
||||
|
|
|
@ -53,6 +53,7 @@ enum BxDecodeError {
|
|||
BX_EVEX_ILLEGAL_ZERO_MASKING_WITH_KMASK_SRC_OR_DEST,
|
||||
BX_EVEX_ILLEGAL_ZERO_MASKING_VSIB,
|
||||
BX_EVEX_ILLEGAL_ZERO_MASKING_MEMORY_DESTINATION,
|
||||
BX_AMX_ILLEGAL_TILE_REGISTER
|
||||
};
|
||||
|
||||
//
|
||||
|
@ -78,6 +79,7 @@ BX_CPP_INLINE Bit64u FetchQWORD(const Bit8u *iptr)
|
|||
}
|
||||
#endif
|
||||
|
||||
#define BX_PREPARE_AMX (0x400)
|
||||
#define BX_PREPARE_EVEX_NO_BROADCAST (0x200 | BX_PREPARE_EVEX)
|
||||
#define BX_PREPARE_EVEX_NO_SAE (0x100 | BX_PREPARE_EVEX)
|
||||
#define BX_PREPARE_EVEX (0x80)
|
||||
|
@ -95,7 +97,7 @@ struct bxIAOpcodeTable {
|
|||
BxExecutePtr_tR execute2;
|
||||
#endif
|
||||
Bit8u src[4];
|
||||
#if BX_SUPPORT_EVEX
|
||||
#if BX_SUPPORT_EVEX || BX_SUPPORT_AMX
|
||||
Bit16u opflags;
|
||||
#else
|
||||
Bit8u opflags;
|
||||
|
@ -140,20 +142,20 @@ enum {
|
|||
enum {
|
||||
BX_NO_REGISTER = 0,
|
||||
BX_GPR8 = 0x1,
|
||||
BX_GPR32_MEM8 = 0x2, // 8-bit memory reference but 32-bit GPR
|
||||
BX_GPR16 = 0x3,
|
||||
BX_GPR32_MEM16 = 0x4, // 16-bit memory reference but 32-bit GPR
|
||||
BX_GPR32 = 0x5,
|
||||
BX_GPR64 = 0x6,
|
||||
BX_FPU_REG = 0x7,
|
||||
BX_MMX_REG = 0x8,
|
||||
BX_MMX_HALF_REG = 0x9,
|
||||
BX_VMM_REG = 0xA,
|
||||
BX_KMASK_REG = 0xB,
|
||||
BX_KMASK_REG_PAIR = 0xC,
|
||||
BX_SEGREG = 0xD,
|
||||
BX_CREG = 0xE,
|
||||
BX_DREG = 0xF
|
||||
BX_GPR16 = 0x2,
|
||||
BX_GPR32 = 0x3,
|
||||
BX_GPR64 = 0x4,
|
||||
BX_FPU_REG = 0x5,
|
||||
BX_MMX_REG = 0x6,
|
||||
BX_MMX_HALF_REG = 0x7,
|
||||
BX_VMM_REG = 0x8,
|
||||
BX_KMASK_REG = 0x9,
|
||||
BX_KMASK_REG_PAIR = 0xA,
|
||||
BX_TMM_REG = 0xB,
|
||||
BX_SEGREG = 0xC,
|
||||
BX_CREG = 0xD,
|
||||
BX_DREG = 0xE
|
||||
// encoding 0xF is still free
|
||||
};
|
||||
|
||||
// to be used together with BX_SRC_VECTOR_RM
|
||||
|
@ -215,9 +217,7 @@ enum {
|
|||
const Bit8u OP_NONE = BX_SRC_NONE;
|
||||
|
||||
const Bit8u OP_Eb = BX_FORM_SRC(BX_GPR8, BX_SRC_RM);
|
||||
const Bit8u OP_Ebd = BX_FORM_SRC(BX_GPR32_MEM8, BX_SRC_RM);
|
||||
const Bit8u OP_Ew = BX_FORM_SRC(BX_GPR16, BX_SRC_RM);
|
||||
const Bit8u OP_Ewd = BX_FORM_SRC(BX_GPR32_MEM16, BX_SRC_RM);
|
||||
const Bit8u OP_Ed = BX_FORM_SRC(BX_GPR32, BX_SRC_RM);
|
||||
const Bit8u OP_Eq = BX_FORM_SRC(BX_GPR64, BX_SRC_RM);
|
||||
|
||||
|
@ -356,6 +356,10 @@ const Bit8u OP_KHq = BX_FORM_SRC(BX_KMASK_REG, BX_SRC_VVV);
|
|||
|
||||
const Bit8u OP_KGq2 = BX_FORM_SRC(BX_KMASK_REG_PAIR, BX_SRC_NNN);
|
||||
|
||||
const Bit8u OP_Trm = BX_FORM_SRC(BX_TMM_REG, BX_SRC_RM);
|
||||
const Bit8u OP_Tnnn = BX_FORM_SRC(BX_TMM_REG, BX_SRC_NNN);
|
||||
const Bit8u OP_Treg = BX_FORM_SRC(BX_TMM_REG, BX_SRC_VVV);
|
||||
|
||||
const Bit8u OP_ST0 = BX_FORM_SRC(BX_FPU_REG, BX_SRC_EAX);
|
||||
const Bit8u OP_STi = BX_FORM_SRC(BX_FPU_REG, BX_SRC_RM);
|
||||
|
||||
|
|
|
@ -1754,6 +1754,11 @@ BxDecodeError assign_srcs(bxInstruction_c *i, unsigned ia_opcode, bool is_64, un
|
|||
if (i->isZeroMasking())
|
||||
return BX_EVEX_ILLEGAL_ZERO_MASKING_WITH_KMASK_SRC_OR_DEST;
|
||||
}
|
||||
#endif
|
||||
#if BX_SUPPORT_AMX
|
||||
if (type == BX_TMM_REG) {
|
||||
if (nnn >= 8) return BX_AMX_ILLEGAL_TILE_REGISTER;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
case BX_SRC_RM:
|
||||
|
@ -1765,6 +1770,11 @@ BxDecodeError assign_srcs(bxInstruction_c *i, unsigned ia_opcode, bool is_64, un
|
|||
if (i->isZeroMasking())
|
||||
return BX_EVEX_ILLEGAL_ZERO_MASKING_WITH_KMASK_SRC_OR_DEST;
|
||||
}
|
||||
#endif
|
||||
#if BX_SUPPORT_AMX
|
||||
if (type == BX_TMM_REG) {
|
||||
if (rm >= 8) return BX_AMX_ILLEGAL_TILE_REGISTER;
|
||||
}
|
||||
#endif
|
||||
i->setSrcReg(n, rm);
|
||||
}
|
||||
|
@ -1798,6 +1808,11 @@ BxDecodeError assign_srcs(bxInstruction_c *i, unsigned ia_opcode, bool is_64, un
|
|||
if (i->isZeroMasking())
|
||||
return BX_EVEX_ILLEGAL_ZERO_MASKING_WITH_KMASK_SRC_OR_DEST;
|
||||
}
|
||||
#endif
|
||||
#if BX_SUPPORT_AMX
|
||||
if (type == BX_TMM_REG) {
|
||||
if (vvv >= 8) return BX_AMX_ILLEGAL_TILE_REGISTER;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
case BX_SRC_VIB:
|
||||
|
@ -2600,6 +2615,14 @@ int assignHandler(bxInstruction_c *i, Bit32u fetchModeMask)
|
|||
return(1);
|
||||
}
|
||||
}
|
||||
#if BX_SUPPORT_AMX
|
||||
if (! (fetchModeMask & BX_FETCH_MODE_AMX_OK)) {
|
||||
if (op_flags & BX_PREPARE_AMX) {
|
||||
if (i->execute1 != &BX_CPU_C::BxError) i->execute1 = &BX_CPU_C::BxNoAMX;
|
||||
return(1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -994,6 +994,21 @@ static const Bit64u BxOpcodeGroup_VEX_0F3847[] = {
|
|||
last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W1, BX_IA_VPSLLVQ_VdqHdqWdq)
|
||||
};
|
||||
|
||||
#if BX_SUPPORT_AMX
|
||||
static const Bit64u BxOpcodeGroup_VEX_0F3849[] = {
|
||||
form_opcode(ATTR_SSE_NO_PREFIX | ATTR_VEX_W0 | ATTR_VL128 | ATTR_NNN0 | ATTR_RRR0 | ATTR_MODC0 | ATTR_IS64, BX_IA_TILERELEASE),
|
||||
form_opcode(ATTR_SSE_NO_PREFIX | ATTR_VEX_W0 | ATTR_VL128 | ATTR_MOD_MEM | ATTR_IS64, BX_IA_LDTILECFG),
|
||||
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W1 | ATTR_VL128 | ATTR_MOD_MEM | ATTR_IS64, BX_IA_STTILECFG),
|
||||
last_opcode(ATTR_SSE_PREFIX_F2 | ATTR_VEX_W0 | ATTR_VL128 | ATTR_RRR0 | ATTR_MODC0 | ATTR_IS64, BX_IA_TILEZERO_Tnnn)
|
||||
};
|
||||
|
||||
static const Bit64u BxOpcodeGroup_VEX_0F384B[] = {
|
||||
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W0 | ATTR_VL128 | ATTR_MOD_MEM | ATTR_IS64, BX_IA_TILELOADDT1_TnnnMdq),
|
||||
form_opcode(ATTR_SSE_PREFIX_F3 | ATTR_VEX_W0 | ATTR_VL128 | ATTR_MOD_MEM | ATTR_IS64, BX_IA_TILESTORED_MdqTnnn),
|
||||
last_opcode(ATTR_SSE_PREFIX_F2 | ATTR_VEX_W0 | ATTR_VL128 | ATTR_MOD_MEM | ATTR_IS64, BX_IA_TILELOADD_TnnnMdq)
|
||||
};
|
||||
#endif
|
||||
|
||||
static const Bit64u BxOpcodeGroup_VEX_0F3850[] = {
|
||||
form_opcode(ATTR_SSE_NO_PREFIX | ATTR_VEX_W0, BX_IA_VPDPBUUD_VdqHdqWdq),
|
||||
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W0, BX_IA_VPDPBUSD_VdqHdqWdq),
|
||||
|
@ -1014,6 +1029,18 @@ static const Bit64u BxOpcodeGroup_VEX_0F3853[] = { last_opcode(ATTR_SSE_PREFIX_6
|
|||
static const Bit64u BxOpcodeGroup_VEX_0F3858[] = { last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W0, BX_IA_VPBROADCASTD_VdqWd) };
|
||||
static const Bit64u BxOpcodeGroup_VEX_0F3859[] = { last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W0, BX_IA_VPBROADCASTQ_VdqWq) };
|
||||
static const Bit64u BxOpcodeGroup_VEX_0F385A[] = { last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W0 | ATTR_VL256 | ATTR_MOD_MEM, BX_IA_V256_VBROADCASTI128_VdqMdq) };
|
||||
|
||||
#if BX_SUPPORT_AMX
|
||||
static const Bit64u BxOpcodeGroup_VEX_0F385C[] = { last_opcode(ATTR_SSE_PREFIX_F3 | ATTR_VEX_W0 | ATTR_VL128 | ATTR_MODC0 | ATTR_IS64, BX_IA_TDPBF16PS_TnnnTrmTreg) };
|
||||
|
||||
static const Bit64u BxOpcodeGroup_VEX_0F385E[] = {
|
||||
form_opcode(ATTR_SSE_NO_PREFIX | ATTR_VEX_W0 | ATTR_VL128 | ATTR_MODC0 | ATTR_IS64, BX_IA_TDPBUUD_TnnnTrmTreg),
|
||||
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W0 | ATTR_VL128 | ATTR_MODC0 | ATTR_IS64, BX_IA_TDPBUSD_TnnnTrmTreg),
|
||||
form_opcode(ATTR_SSE_PREFIX_F2 | ATTR_VEX_W0 | ATTR_VL128 | ATTR_MODC0 | ATTR_IS64, BX_IA_TDPBSSD_TnnnTrmTreg),
|
||||
last_opcode(ATTR_SSE_PREFIX_F3 | ATTR_VEX_W0 | ATTR_VL128 | ATTR_MODC0 | ATTR_IS64, BX_IA_TDPBSUD_TnnnTrmTreg)
|
||||
};
|
||||
#endif
|
||||
|
||||
static const Bit64u BxOpcodeGroup_VEX_0F3872[] = { last_opcode(ATTR_SSE_PREFIX_F3 | ATTR_VEX_W0, BX_IA_VCVTNEPS2BF16_Vbf16Wps) };
|
||||
static const Bit64u BxOpcodeGroup_VEX_0F3878[] = { last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W0, BX_IA_VPBROADCASTB_VdqWb) };
|
||||
static const Bit64u BxOpcodeGroup_VEX_0F3879[] = { last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W0, BX_IA_VPBROADCASTW_VdqWw) };
|
||||
|
@ -1357,8 +1384,14 @@ static const Bit64u BxOpcodeGroup_VEX_0F3A0F[] = {
|
|||
last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VL256, BX_IA_V256_VPALIGNR_VdqHdqWdqIb)
|
||||
};
|
||||
|
||||
static const Bit64u BxOpcodeGroup_VEX_0F3A14[] = { last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VL128, BX_IA_V128_VPEXTRB_EbdVdqIb) };
|
||||
static const Bit64u BxOpcodeGroup_VEX_0F3A15[] = { last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VL128, BX_IA_V128_VPEXTRW_EwdVdqIb) };
|
||||
static const Bit64u BxOpcodeGroup_VEX_0F3A14[] = {
|
||||
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VL128 | ATTR_MODC0, BX_IA_V128_VPEXTRB_EdVdqIbR),
|
||||
last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VL128 | ATTR_MOD_MEM, BX_IA_V128_VPEXTRB_MbVdqIbM)
|
||||
};
|
||||
static const Bit64u BxOpcodeGroup_VEX_0F3A15[] = {
|
||||
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VL128 | ATTR_MODC0, BX_IA_V128_VPEXTRW_EdVdqIbR),
|
||||
last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VL128 | ATTR_MOD_MEM, BX_IA_V128_VPEXTRW_MwVdqIbM)
|
||||
};
|
||||
|
||||
static const Bit64u BxOpcodeGroup_VEX_0F3A16[] = {
|
||||
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VL128 | ATTR_VEX_W0, BX_IA_V128_VPEXTRD_EdVdqIb),
|
||||
|
@ -1884,9 +1917,17 @@ static const Bit64u *BxOpcodeTableVEX[256*3] = {
|
|||
/* 46 */ ( BxOpcodeGroup_VEX_0F3846 ),
|
||||
/* 47 */ ( BxOpcodeGroup_VEX_0F3847 ),
|
||||
/* 48 */ ( BxOpcodeGroup_ERR ),
|
||||
#if BX_SUPPORT_AMX
|
||||
/* 49 */ ( BxOpcodeGroup_VEX_0F3849 ),
|
||||
#else
|
||||
/* 49 */ ( BxOpcodeGroup_ERR ),
|
||||
#endif
|
||||
/* 4A */ ( BxOpcodeGroup_ERR ),
|
||||
#if BX_SUPPORT_AMX
|
||||
/* 4B */ ( BxOpcodeGroup_VEX_0F384B ),
|
||||
#else
|
||||
/* 4B */ ( BxOpcodeGroup_ERR ),
|
||||
#endif
|
||||
/* 4C */ ( BxOpcodeGroup_ERR ),
|
||||
/* 4D */ ( BxOpcodeGroup_ERR ),
|
||||
/* 4E */ ( BxOpcodeGroup_ERR ),
|
||||
|
@ -1903,9 +1944,17 @@ static const Bit64u *BxOpcodeTableVEX[256*3] = {
|
|||
/* 59 */ ( BxOpcodeGroup_VEX_0F3859 ),
|
||||
/* 5A */ ( BxOpcodeGroup_VEX_0F385A ),
|
||||
/* 5B */ ( BxOpcodeGroup_ERR ),
|
||||
#if BX_SUPPORT_AMX
|
||||
/* 5C */ ( BxOpcodeGroup_VEX_0F385C ),
|
||||
#else
|
||||
/* 5C */ ( BxOpcodeGroup_ERR ),
|
||||
#endif
|
||||
/* 5D */ ( BxOpcodeGroup_ERR ),
|
||||
#if BX_SUPPORT_AMX
|
||||
/* 5E */ ( BxOpcodeGroup_VEX_0F385E ),
|
||||
#else
|
||||
/* 5E */ ( BxOpcodeGroup_ERR ),
|
||||
#endif
|
||||
/* 5F */ ( BxOpcodeGroup_ERR ),
|
||||
/* 60 */ ( BxOpcodeGroup_ERR ),
|
||||
/* 61 */ ( BxOpcodeGroup_ERR ),
|
||||
|
|
|
@ -1610,8 +1610,14 @@ static const Bit64u BxOpcodeGroup_EVEX_0F3A0F[] = {
|
|||
last_opcode(ATTR_SSE_PREFIX_66, BX_IA_V512_VPALIGNR_VdqHdqWdqIb_Kmask)
|
||||
};
|
||||
|
||||
static const Bit64u BxOpcodeGroup_EVEX_0F3A14[] = { last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_L0 | ATTR_MASK_K0, BX_IA_V512_VPEXTRB_EbdVdqIb) };
|
||||
static const Bit64u BxOpcodeGroup_EVEX_0F3A15[] = { last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_L0 | ATTR_MASK_K0, BX_IA_V512_VPEXTRW_EwdVdqIb) };
|
||||
static const Bit64u BxOpcodeGroup_EVEX_0F3A14[] = {
|
||||
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_L0 | ATTR_MASK_K0 | ATTR_MODC0, BX_IA_V512_VPEXTRB_EdVdqIbR),
|
||||
last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_L0 | ATTR_MASK_K0 | ATTR_MOD_MEM, BX_IA_V512_VPEXTRB_MbVdqIbM)
|
||||
};
|
||||
static const Bit64u BxOpcodeGroup_EVEX_0F3A15[] = {
|
||||
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_L0 | ATTR_MASK_K0 | ATTR_MODC0, BX_IA_V512_VPEXTRW_EdVdqIbR),
|
||||
last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_L0 | ATTR_MASK_K0 | ATTR_MOD_MEM, BX_IA_V512_VPEXTRW_MwVdqIbM)
|
||||
};
|
||||
|
||||
static const Bit64u BxOpcodeGroup_EVEX_0F3A16[] = {
|
||||
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_L0 | ATTR_VEX_W0 | ATTR_MASK_K0, BX_IA_V512_VPEXTRD_EdVdqIb),
|
||||
|
|
|
@ -42,8 +42,14 @@ static const Bit64u BxOpcodeTable0F3A0F[] = {
|
|||
last_opcode(ATTR_SSE_PREFIX_66, BX_IA_PALIGNR_VdqWdqIb),
|
||||
};
|
||||
|
||||
static const Bit64u BxOpcodeTable0F3A14[] = { last_opcode(ATTR_SSE_PREFIX_66, BX_IA_PEXTRB_EbdVdqIb) };
|
||||
static const Bit64u BxOpcodeTable0F3A15[] = { last_opcode(ATTR_SSE_PREFIX_66, BX_IA_PEXTRW_EwdVdqIb) };
|
||||
static const Bit64u BxOpcodeTable0F3A14[] = {
|
||||
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_MODC0, BX_IA_PEXTRB_EdVdqIbR),
|
||||
last_opcode(ATTR_SSE_PREFIX_66 | ATTR_MOD_MEM, BX_IA_PEXTRB_MbVdqIbM)
|
||||
};
|
||||
static const Bit64u BxOpcodeTable0F3A15[] = {
|
||||
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_MODC0, BX_IA_PEXTRW_EdVdqIbR),
|
||||
last_opcode(ATTR_SSE_PREFIX_66 | ATTR_MOD_MEM, BX_IA_PEXTRW_MwVdqIbM)
|
||||
};
|
||||
|
||||
// opcode 0F 3A 16
|
||||
static const Bit64u BxOpcodeTable0F3A16[] = {
|
||||
|
|
|
@ -1347,8 +1347,10 @@ bx_define_opcode(BX_IA_ROUNDSD_VsdWsdIb, "roundsd", "roundsd", &BX_CPU_C::LOAD_W
|
|||
bx_define_opcode(BX_IA_BLENDPS_VpsWpsIb, "blendps", "blendps", &BX_CPU_C::LOAD_Wdq, &BX_CPU_C::BLENDPS_VpsWpsIbR, BX_ISA_SSE4_1, OP_Vps, OP_Wps, OP_Ib, OP_NONE, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_BLENDPD_VpdWpdIb, "blendpd", "blendpd", &BX_CPU_C::LOAD_Wdq, &BX_CPU_C::BLENDPD_VpdWpdIbR, BX_ISA_SSE4_1, OP_Vpd, OP_Wpd, OP_Ib, OP_NONE, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_PBLENDW_VdqWdqIb, "pblendw", "pblendw", &BX_CPU_C::LOAD_Wdq, &BX_CPU_C::PBLENDW_VdqWdqIbR, BX_ISA_SSE4_1, OP_Vdq, OP_Wdq, OP_Ib, OP_NONE, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_PEXTRB_EbdVdqIb, "pextrb", "pextrb", &BX_CPU_C::PEXTRB_EbdVdqIbM, &BX_CPU_C::PEXTRB_EbdVdqIbR, BX_ISA_SSE4_1, OP_Ebd, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_PEXTRW_EwdVdqIb, "pextrw", "pextrw", &BX_CPU_C::PEXTRW_EwdVdqIbM, &BX_CPU_C::PEXTRW_EwdVdqIbR, BX_ISA_SSE4_1, OP_Ewd, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_PEXTRB_EdVdqIbR, "pextrb", "pextrb", NULL, &BX_CPU_C::PEXTRB_EdVdqIbR, BX_ISA_SSE4_1, OP_Ed, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_PEXTRB_MbVdqIbM, "pextrb", "pextrb", &BX_CPU_C::PEXTRB_MbVdqIbM, NULL, BX_ISA_SSE4_1, OP_Mb, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_PEXTRW_EdVdqIbR, "pextrw", "pextrw", NULL, &BX_CPU_C::PEXTRW_EdVdqIbR, BX_ISA_SSE4_1, OP_Ed, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_PEXTRW_MwVdqIbM, "pextrw", "pextrw", &BX_CPU_C::PEXTRW_MwVdqIbM, NULL, BX_ISA_SSE4_1, OP_Mw, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_SSE)
|
||||
bx_define_opcode(BX_IA_PEXTRD_EdVdqIb, "pextrd", "pextrd", &BX_CPU_C::PEXTRD_EdVdqIbM, &BX_CPU_C::PEXTRD_EdVdqIbR, BX_ISA_SSE4_1, OP_Ed, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_SSE)
|
||||
#if BX_SUPPORT_X86_64
|
||||
bx_define_opcode(BX_IA_PEXTRQ_EqVdqIb, "pextrq", "pextrq", &BX_CPU_C::PEXTRQ_EqVdqIbM, &BX_CPU_C::PEXTRQ_EqVdqIbR, BX_ISA_SSE4_1, OP_Eq, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_SSE)
|
||||
|
@ -2277,8 +2279,10 @@ bx_define_opcode(BX_IA_V128_VMOVQ_EqVq, "vmovq", "vmovq", &BX_CPU_C::MOVSD_WsdVs
|
|||
bx_define_opcode(BX_IA_V128_VPINSRB_VdqEbIb, "vpinsrb", "vpinsrb", &BX_CPU_C::VPINSRB_VdqHdqEbIbM, &BX_CPU_C::VPINSRB_VdqHdqEbIbR, BX_ISA_AVX, OP_Vdq, OP_Hdq, OP_Ew, OP_Ib, BX_PREPARE_AVX)
|
||||
bx_define_opcode(BX_IA_V128_VPINSRW_VdqEwIb, "vpinsrw", "vpinsrw", &BX_CPU_C::VPINSRW_VdqHdqEwIbM, &BX_CPU_C::VPINSRW_VdqHdqEwIbR, BX_ISA_AVX, OP_Vdq, OP_Hdq, OP_Ew, OP_Ib, BX_PREPARE_AVX)
|
||||
bx_define_opcode(BX_IA_V128_VPEXTRW_GdUdqIb, "vpextrw", "vpextrw", &BX_CPU_C::BxError, &BX_CPU_C::PEXTRW_GdUdqIb, BX_ISA_AVX, OP_Gd, OP_Wdq, OP_Ib, OP_NONE, BX_PREPARE_AVX)
|
||||
bx_define_opcode(BX_IA_V128_VPEXTRB_EbdVdqIb, "vpextrb", "vpextrb", &BX_CPU_C::PEXTRB_EbdVdqIbM, &BX_CPU_C::PEXTRB_EbdVdqIbR, BX_ISA_AVX, OP_Ebd, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_AVX)
|
||||
bx_define_opcode(BX_IA_V128_VPEXTRW_EwdVdqIb, "vpextrw", "vpextrw", &BX_CPU_C::PEXTRW_EwdVdqIbM, &BX_CPU_C::PEXTRW_EwdVdqIbR, BX_ISA_AVX, OP_Ewd, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_AVX)
|
||||
bx_define_opcode(BX_IA_V128_VPEXTRB_EdVdqIbR, "vpextrb", "vpextrb", NULL, &BX_CPU_C::PEXTRB_EdVdqIbR, BX_ISA_AVX, OP_Ed, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_AVX)
|
||||
bx_define_opcode(BX_IA_V128_VPEXTRB_MbVdqIbM, "vpextrb", "vpextrb", &BX_CPU_C::PEXTRB_MbVdqIbM, NULL, BX_ISA_AVX, OP_Mb, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_AVX)
|
||||
bx_define_opcode(BX_IA_V128_VPEXTRW_EdVdqIbR, "vpextrw", "vpextrw", NULL, &BX_CPU_C::PEXTRW_EdVdqIbR, BX_ISA_AVX, OP_Ed, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_AVX)
|
||||
bx_define_opcode(BX_IA_V128_VPEXTRW_MwVdqIbM, "vpextrw", "vpextrw", &BX_CPU_C::PEXTRW_MwVdqIbM, NULL, BX_ISA_AVX, OP_Mw, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_AVX)
|
||||
|
||||
bx_define_opcode(BX_IA_V128_VPINSRD_VdqEdIb, "vpinsrd", "vpinsrd", &BX_CPU_C::VPINSRD_VdqHdqEdIbM, &BX_CPU_C::VPINSRD_VdqHdqEdIbR, BX_ISA_AVX, OP_Vdq, OP_Hdq, OP_Ed, OP_Ib, BX_PREPARE_AVX)
|
||||
bx_define_opcode(BX_IA_V128_VPINSRQ_VdqEqIb, "vpinsrq", "vpinsrq", &BX_CPU_C::VPINSRQ_VdqHdqEqIbM, &BX_CPU_C::VPINSRQ_VdqHdqEqIbR, BX_ISA_AVX, OP_Vdq, OP_Hdq, OP_Eq, OP_Ib, BX_PREPARE_AVX)
|
||||
|
@ -2756,6 +2760,21 @@ bx_define_opcode(BX_IA_MOVDIR64B_GdMdq, "movdir64b", "movdir64b", &BX_CPU_C::MOV
|
|||
bx_define_opcode(BX_IA_MOVDIR64B_GqMdq, "movdir64b", "movdir64b", &BX_CPU_C::MOVDIR64B, NULL, BX_ISA_MOVDIR64B, OP_Gq, OP_M, OP_NONE, OP_NONE, 0)
|
||||
#endif
|
||||
|
||||
#if BX_SUPPORT_AMX
|
||||
bx_define_opcode(BX_IA_LDTILECFG, "ldtilecfg", "ldtilecfg", &BX_CPU_C::LDTILECFG, NULL, BX_ISA_AMX, OP_M, OP_NONE, OP_NONE, OP_NONE, BX_PREPARE_AMX)
|
||||
bx_define_opcode(BX_IA_STTILECFG, "sttilecfg", "sttilecfg", &BX_CPU_C::STTILECFG, NULL, BX_ISA_AMX, OP_M, OP_NONE, OP_NONE, OP_NONE, BX_PREPARE_AMX)
|
||||
bx_define_opcode(BX_IA_TILELOADD_TnnnMdq, "tileloadd", "tileloadd", &BX_CPU_C::TILELOADD_TnnnMdq, NULL, BX_ISA_AMX, OP_Tnnn, OP_M, OP_NONE, OP_NONE, BX_PREPARE_AMX)
|
||||
bx_define_opcode(BX_IA_TILELOADDT1_TnnnMdq, "tileloaddt1", "tileloaddt1", &BX_CPU_C::TILELOADD_TnnnMdq, NULL, BX_ISA_AMX, OP_Tnnn, OP_M, OP_NONE, OP_NONE, BX_PREPARE_AMX)
|
||||
bx_define_opcode(BX_IA_TILESTORED_MdqTnnn, "tilestored", "tilestored", &BX_CPU_C::TILESTORED_MdqTnnn, NULL, BX_ISA_AMX, OP_M, OP_Tnnn, OP_NONE, OP_NONE, BX_PREPARE_AMX)
|
||||
bx_define_opcode(BX_IA_TILERELEASE, "tilerelease", "tilerelease", NULL, &BX_CPU_C::TILERELEASE, BX_ISA_AMX, OP_NONE, OP_NONE, OP_NONE, OP_NONE, BX_PREPARE_AMX)
|
||||
bx_define_opcode(BX_IA_TILEZERO_Tnnn, "tilezero", "tilezero", NULL, &BX_CPU_C::TILEZERO_Tnnn, BX_ISA_AMX, OP_Tnnn, OP_NONE, OP_NONE, OP_NONE, BX_PREPARE_AMX)
|
||||
bx_define_opcode(BX_IA_TDPBSSD_TnnnTrmTreg, "tdpbssd", "tdpbssd", NULL, &BX_CPU_C::TDPBSSD_TnnnTrmTreg, BX_ISA_AMX_INT8, OP_Tnnn, OP_Trm, OP_Treg, OP_NONE, BX_PREPARE_AMX)
|
||||
bx_define_opcode(BX_IA_TDPBSUD_TnnnTrmTreg, "tdpbsud", "tdpbsud", NULL, &BX_CPU_C::TDPBSUD_TnnnTrmTreg, BX_ISA_AMX_INT8, OP_Tnnn, OP_Trm, OP_Treg, OP_NONE, BX_PREPARE_AMX)
|
||||
bx_define_opcode(BX_IA_TDPBUSD_TnnnTrmTreg, "tdpbusd", "tdpbusd", NULL, &BX_CPU_C::TDPBUSD_TnnnTrmTreg, BX_ISA_AMX_INT8, OP_Tnnn, OP_Trm, OP_Treg, OP_NONE, BX_PREPARE_AMX)
|
||||
bx_define_opcode(BX_IA_TDPBUUD_TnnnTrmTreg, "tdpbuud", "tdpbuud", NULL, &BX_CPU_C::TDPBUUD_TnnnTrmTreg, BX_ISA_AMX_INT8, OP_Tnnn, OP_Trm, OP_Treg, OP_NONE, BX_PREPARE_AMX)
|
||||
bx_define_opcode(BX_IA_TDPBF16PS_TnnnTrmTreg, "tdpbf16ps", "tdpbf16ps", NULL, &BX_CPU_C::TDPBF16PS_TnnnTrmTreg, BX_ISA_AMX_BF16, OP_Tnnn, OP_Trm, OP_Treg, OP_NONE, BX_PREPARE_AMX)
|
||||
#endif
|
||||
|
||||
#if BX_SUPPORT_AVX
|
||||
bx_define_opcode(BX_IA_KADDW_KGwKHwKEw, "kaddw", "kaddw", &BX_CPU_C::BxError, &BX_CPU_C::KADDW_KGwKHwKEwR, BX_ISA_AVX512_DQ, OP_KGw, OP_KHw, OP_KEw, OP_NONE, BX_PREPARE_OPMASK)
|
||||
bx_define_opcode(BX_IA_KADDQ_KGqKHqKEq, "kaddq", "kaddq", &BX_CPU_C::BxError, &BX_CPU_C::KADDQ_KGqKHqKEqR, BX_ISA_AVX512_BW, OP_KGq, OP_KHq, OP_KEq, OP_NONE, BX_PREPARE_OPMASK)
|
||||
|
@ -3973,8 +3992,10 @@ bx_define_opcode(BX_IA_V512_VCVTTSD2USI_GqWsd, "vcvttsd2usi", "vcvttsd2usiq", &B
|
|||
bx_define_opcode(BX_IA_V512_VPINSRB_VdqEbIb, "vpinsrb", "vpinsrb", &BX_CPU_C::VPINSRB_VdqHdqEbIbM, &BX_CPU_C::VPINSRB_VdqHdqEbIbR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_Ew, OP_Ib, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPINSRW_VdqEwIb, "vpinsrw", "vpinsrw", &BX_CPU_C::VPINSRW_VdqHdqEwIbM, &BX_CPU_C::VPINSRW_VdqHdqEwIbR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_Ew, OP_Ib, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPEXTRW_GdUdqIb, "vpextrw", "vpextrw", &BX_CPU_C::BxError, &BX_CPU_C::PEXTRW_GdUdqIb, BX_ISA_AVX512_BW, OP_Gd, OP_Wdq, OP_Ib, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPEXTRB_EbdVdqIb, "vpextrb", "vpextrb", &BX_CPU_C::PEXTRB_EbdVdqIbM, &BX_CPU_C::PEXTRB_EbdVdqIbR, BX_ISA_AVX512_BW, OP_Ebd, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPEXTRW_EwdVdqIb, "vpextrw", "vpextrw", &BX_CPU_C::PEXTRW_EwdVdqIbM, &BX_CPU_C::PEXTRW_EwdVdqIbR, BX_ISA_AVX512_BW, OP_Ewd, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPEXTRB_EdVdqIbR, "vpextrb", "vpextrb", NULL, &BX_CPU_C::PEXTRB_EdVdqIbR, BX_ISA_AVX512_BW, OP_Ed, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPEXTRB_MbVdqIbM, "vpextrb", "vpextrb", &BX_CPU_C::PEXTRB_MbVdqIbM, NULL, BX_ISA_AVX512_BW, OP_Mb, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPEXTRW_EdVdqIbR, "vpextrw", "vpextrw", NULL, &BX_CPU_C::PEXTRW_EdVdqIbR, BX_ISA_AVX512_BW, OP_Ed, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPEXTRW_MwVdqIbM, "vpextrw", "vpextrw", &BX_CPU_C::PEXTRW_MwVdqIbM, NULL, BX_ISA_AVX512_BW, OP_Mw, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
|
||||
bx_define_opcode(BX_IA_V512_VPINSRD_VdqEdIb, "vpinsrd", "vpinsrd", &BX_CPU_C::VPINSRD_VdqHdqEdIbM, &BX_CPU_C::VPINSRD_VdqHdqEdIbR, BX_ISA_AVX512_DQ, OP_Vdq, OP_Hdq, OP_Ed, OP_Ib, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPINSRQ_VdqEqIb, "vpinsrq", "vpinsrq", &BX_CPU_C::VPINSRQ_VdqHdqEqIbM, &BX_CPU_C::VPINSRQ_VdqHdqEqIbR, BX_ISA_AVX512_DQ, OP_Vdq, OP_Hdq, OP_Eq, OP_Ib, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
|
|
|
@ -33,6 +33,10 @@
|
|||
#include "apic.h"
|
||||
#endif
|
||||
|
||||
#if BX_SUPPORT_AMX
|
||||
#include "avx/amx.h"
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
BX_CPU_C::BX_CPU_C(unsigned id): bx_cpuid(id)
|
||||
|
@ -131,6 +135,13 @@ void BX_CPU_C::initialize(void)
|
|||
xsave_xrestor_init();
|
||||
#endif
|
||||
|
||||
#if BX_SUPPORT_AMX
|
||||
amx = NULL;
|
||||
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_AMX)) {
|
||||
amx = new AMX;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if BX_CONFIGURE_MSRS
|
||||
for (unsigned n=0; n < BX_MSR_MAX_INDEX; n++) {
|
||||
BX_CPU_THIS_PTR msrs[n] = 0;
|
||||
|
@ -505,6 +516,27 @@ void BX_CPU_C::register_state(void)
|
|||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if BX_SUPPORT_AMX
|
||||
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_AMX)) {
|
||||
bx_list_c *amx_list = new bx_list_c(cpu, "AMX");
|
||||
BXRS_DEC_PARAM_FIELD(amx_list, palette, amx->palette_id);
|
||||
BXRS_DEC_PARAM_FIELD(amx_list, start_row, amx->start_row);
|
||||
BXRS_HEX_PARAM_FIELD(amx_list, tile_use_tracker, amx->tile_use_tracker);
|
||||
for (n=0; n<8; n++) {
|
||||
sprintf(name, "tile%d_rows", n);
|
||||
new bx_shadow_num_c(amx_list, name, &(amx->tilecfg[n].rows), BASE_DEC);
|
||||
sprintf(name, "tile%d_colsb", n);
|
||||
new bx_shadow_num_c(amx_list, name, &(amx->tilecfg[n].bytes_per_row), BASE_DEC);
|
||||
for(unsigned row=0;row < 16;row++) {
|
||||
for(unsigned j=0;j < BX_VLMAX*2;j++) {
|
||||
sprintf(name, "tile%d_row%d_%d", n, row, j);
|
||||
new bx_shadow_num_c(amx_list, name, &(amx->tile[n].row[row].vmm64u(j)), BASE_HEX);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif // BX_CPU_LEVEL >= 6
|
||||
|
||||
#if BX_SUPPORT_MONITOR_MWAIT
|
||||
|
@ -709,6 +741,10 @@ BX_CPU_C::~BX_CPU_C()
|
|||
delete lapic;
|
||||
#endif
|
||||
|
||||
#if BX_SUPPORT_AMX
|
||||
delete amx;
|
||||
#endif
|
||||
|
||||
#if InstrumentCPU
|
||||
delete stats;
|
||||
#endif
|
||||
|
|
|
@ -524,6 +524,14 @@ void BX_CPU_C::handleAvxModeChange(void)
|
|||
}
|
||||
}
|
||||
|
||||
#if BX_SUPPORT_AMX
|
||||
if (! long64_mode() || ! BX_CPU_THIS_PTR cr4.get_OSXSAVE() ||
|
||||
(~BX_CPU_THIS_PTR xcr0.val32 & (BX_XCR0_XTILECFG_MASK | BX_XCR0_XTILEDATA_MASK)) != 0)
|
||||
clear_amx_ok();
|
||||
else
|
||||
set_amx_ok();
|
||||
#endif
|
||||
|
||||
updateFetchModeMask(); /* AVX_OK changed */
|
||||
}
|
||||
|
||||
|
@ -578,6 +586,21 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::BxNoEVEX(bxInstruction_c *i)
|
|||
}
|
||||
#endif
|
||||
|
||||
#if BX_SUPPORT_AMX
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::BxNoAMX(bxInstruction_c *i)
|
||||
{
|
||||
if (! long64_mode() || ! BX_CPU_THIS_PTR cr4.get_OSXSAVE())
|
||||
exception(BX_UD_EXCEPTION, 0);
|
||||
|
||||
if (~BX_CPU_THIS_PTR xcr0.val32 & (BX_XCR0_XTILECFG_MASK | BX_XCR0_XTILEDATA_MASK))
|
||||
exception(BX_UD_EXCEPTION, 0);
|
||||
|
||||
BX_ASSERT(0);
|
||||
|
||||
BX_NEXT_TRACE(i); // keep compiler happy
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
void BX_CPU_C::handleCpuContextChange(void)
|
||||
|
|
|
@ -122,7 +122,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PBLENDW_VdqWdqIbR(bxInstruction_c *i)
|
|||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRB_EbdVdqIbR(bxInstruction_c *i)
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRB_EdVdqIbR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
||||
Bit8u result = op.xmmubyte(i->Ib() & 0xF);
|
||||
|
@ -131,7 +131,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRB_EbdVdqIbR(bxInstruction_c *i)
|
|||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRB_EbdVdqIbM(bxInstruction_c *i)
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRB_MbVdqIbM(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
||||
Bit8u result = op.xmmubyte(i->Ib() & 0xF);
|
||||
|
@ -142,7 +142,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRB_EbdVdqIbM(bxInstruction_c *i)
|
|||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRW_EwdVdqIbR(bxInstruction_c *i)
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRW_EdVdqIbR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
||||
Bit16u result = op.xmm16u(i->Ib() & 7);
|
||||
|
@ -151,7 +151,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRW_EwdVdqIbR(bxInstruction_c *i)
|
|||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRW_EwdVdqIbM(bxInstruction_c *i)
|
||||
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRW_MwVdqIbM(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
||||
Bit16u result = op.xmm16u(i->Ib() & 7);
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
// $Id$
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2008-2019 Stanislav Shwartsman
|
||||
// Copyright (c) 2008-2024 Stanislav Shwartsman
|
||||
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or
|
||||
|
@ -1043,6 +1043,87 @@ bool BX_CPU_C::xsave_uintr_state_xinuse(void)
|
|||
}
|
||||
#endif
|
||||
|
||||
#if BX_SUPPORT_AMX
|
||||
|
||||
#include "avx/amx.h"
|
||||
|
||||
// TILECFG state management //
|
||||
void BX_CPU_C::xsave_tilecfg_state(bxInstruction_c *i, bx_address offset)
|
||||
{
|
||||
BxPackedAvxRegister tilecfg;
|
||||
tilecfg.clear();
|
||||
|
||||
if (BX_CPU_THIS_PTR amx->tiles_configured()) {
|
||||
tilecfg.vmmubyte(0) = BX_CPU_THIS_PTR amx->palette_id;
|
||||
tilecfg.vmmubyte(1) = BX_CPU_THIS_PTR amx->start_row;
|
||||
|
||||
for (unsigned n=0; n < 8; n++) {
|
||||
tilecfg.vmm16u(8+n) = BX_CPU_THIS_PTR amx->tilecfg[n].rows;
|
||||
tilecfg.vmmubyte(48+n) = BX_CPU_THIS_PTR amx->tilecfg[n].bytes_per_row;
|
||||
}
|
||||
}
|
||||
|
||||
write_virtual_zmmword(i->seg(), offset, &tilecfg);
|
||||
}
|
||||
|
||||
void BX_CPU_C::xrstor_tilecfg_state(bxInstruction_c *i, bx_address offset)
|
||||
{
|
||||
BxPackedAvxRegister tilecfg;
|
||||
read_virtual_zmmword(i->seg(), offset, &tilecfg);
|
||||
|
||||
if (!configure_tiles(i, tilecfg))
|
||||
BX_CPU_THIS_PTR amx->clear();
|
||||
}
|
||||
|
||||
void BX_CPU_C::xrstor_init_tilecfg_state(void)
|
||||
{
|
||||
BX_CPU_THIS_PTR amx->clear();
|
||||
}
|
||||
|
||||
bool BX_CPU_C::xsave_tilecfg_state_xinuse(void)
|
||||
{
|
||||
return BX_CPU_THIS_PTR amx->tiles_configured();
|
||||
}
|
||||
|
||||
// TILEDATA state management //
|
||||
void BX_CPU_C::xsave_tiledata_state(bxInstruction_c *i, bx_address offset)
|
||||
{
|
||||
bx_address asize_mask = i->asize_mask();
|
||||
|
||||
for (unsigned tile=0; tile < BX_TILE_REGISTERS; tile++) {
|
||||
for (unsigned row=0; row < BX_TILE_REGISTERS; row++) {
|
||||
write_virtual_zmmword(i->seg(), (offset+(tile*BX_TILE_MAX_ROWS+row)*64) & asize_mask, &(BX_CPU_THIS_PTR amx->tile[tile].row[row]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BX_CPU_C::xrstor_tiledata_state(bxInstruction_c *i, bx_address offset)
|
||||
{
|
||||
bx_address asize_mask = i->asize_mask();
|
||||
|
||||
for (unsigned tile=0; tile < BX_TILE_REGISTERS; tile++) {
|
||||
for (unsigned row=0; row < BX_TILE_REGISTERS; row++) {
|
||||
read_virtual_zmmword(i->seg(), (offset+(tile*BX_TILE_MAX_ROWS+row)*64) & asize_mask, &(BX_CPU_THIS_PTR amx->tile[tile].row[row]));
|
||||
}
|
||||
BX_CPU_THIS_PTR amx->set_tile_used(tile);
|
||||
}
|
||||
}
|
||||
|
||||
void BX_CPU_C::xrstor_init_tiledata_state(void)
|
||||
{
|
||||
for (unsigned tile=0; tile < BX_TILE_REGISTERS; tile++) {
|
||||
BX_CPU_THIS_PTR amx->tile[tile].clear();
|
||||
BX_CPU_THIS_PTR amx->clear_tile_used(tile);
|
||||
}
|
||||
}
|
||||
|
||||
bool BX_CPU_C::xsave_tiledata_state_xinuse(void)
|
||||
{
|
||||
return (BX_CPU_THIS_PTR amx->tile_use_tracker == 0); // all tiles are zero
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
Bit32u BX_CPU_C::get_xinuse_vector(Bit32u requested_feature_bitmap)
|
||||
{
|
||||
Bit32u xinuse = 0;
|
||||
|
@ -1166,6 +1247,15 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::XSETBV(bxInstruction_c *i)
|
|||
}
|
||||
#endif
|
||||
|
||||
#if BX_SUPPORT_AMX
|
||||
if (EAX & BX_XCR0_XTILE_BITS_MASK) {
|
||||
if ((EAX & BX_XCR0_XTILE_BITS_MASK) != BX_XCR0_XTILE_BITS_MASK) {
|
||||
BX_ERROR(("XSETBV: Illegal attempt to enable AMX state"));
|
||||
exception(BX_GP_EXCEPTION, 0);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
BX_CPU_THIS_PTR xcr0.set32(EAX);
|
||||
|
||||
#if BX_SUPPORT_AVX
|
||||
|
|
|
@ -7223,6 +7223,7 @@ From here, you may use the following commands:
|
|||
mmx List of all MMX registers and their contents
|
||||
sse|xmm List of all SSE registers and their contents
|
||||
ymm|zmm List of all AVX registers and their contents
|
||||
amx|tile n Show AMX state and TILE register contents
|
||||
sreg Show segment registers and their contents
|
||||
dreg Show debug registers and their contents
|
||||
creg Show control registers and their contents
|
||||
|
|
Loading…
Reference in New Issue