AMX support (#212)

This commit is contained in:
Stanislav Shwartsman 2024-01-10 20:13:25 +02:00 committed by GitHub
parent cfa7276cb9
commit 3a02e85599
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
35 changed files with 3927 additions and 2798 deletions

View File

@ -33,6 +33,7 @@ case $which_config in
--enable-svm \
--enable-avx \
--enable-evex \
--enable-amx \
--enable-cet \
--enable-pci \
--enable-clgd54xx \

View File

@ -10,8 +10,8 @@ Brief summary :
! Implemented Linear Address Separation (LASS) extension
! Implemented 57-bit Linear Address and 5-Level Paging support
! Implemented User-Level Interrupt (UINTR) extension
! Implemented recently published Intel instruction sets:
- MOVDIRI/MOVDIR64B, AVX512 BF16, AVX IFMA52, AVX-VNNI/VNNI-INT8/VNNI-INT16, AVX-NE-CONVERT, CMPCCXADD, SM3/SM4, SHA512, WRMSRNS, MSRLIST, WAITPKG, SERIALIZE
! Implemented Intel instruction sets:
- MOVDIRI/MOVDIR64B, AMX, AVX512 BF16, AVX IFMA52, AVX-VNNI/VNNI-INT8/VNNI-INT16, AVX-NE-CONVERT, CMPCCXADD, SM3/SM4, SHA512, WRMSRNS, MSRLIST, WAITPKG, SERIALIZE
! CPUID: Added Xeon Sapphire Rapids CPU definition
- Improved 64-bit guest support in Bochs internal debugger, added new internal debugger commands
- Bochs debugger enhanced with new commands (setpmem, loadmem, deref, ...)
@ -40,11 +40,11 @@ Detailed change log :
- Implemented Linear Address Separation (LASS) extension
- Implemented 57-bit Linear Address and 5-Level Paging support
- Implemented User-Level Interrupt (UINTR) extension
- Implemented recently published Intel instruction sets:
- MOVDIRI/MOVDIR64B, AVX512 BF16, AVX IFMA52, AVX-VNNI/VNNI-INT8/VNNI-INT16, AVX-NE-CONVERT, CMPCCXADD, SM3/SM4, SHA512, WRMSRNS, MSRLIST, WAITPKG, SERIALIZE
- Implemented Intel instruction sets:
- MOVDIRI/MOVDIR64B, AMX, AVX512 BF16, AVX IFMA52, AVX-VNNI/VNNI-INT8/VNNI-INT16, AVX-NE-CONVERT, CMPCCXADD, SM3/SM4, SHA512, WRMSRNS, MSRLIST, WAITPKG, SERIALIZE
- CPUID: Added Xeon Sapphire Rapids CPU definition
- Features PKS, WAITPKG, UINTR, AVX-VNNI, AVX512_BF16, MOVDIRI/MOVDIR64, LA57, SERIALIZE and more
Not yet supported but will be added in future: AVX512_FP16, AMX, VMX Extensions (HLAT, IPI Virtualization)
Not yet supported but will be added in future: AVX512_FP16, VMX Extensions (HLAT, IPI Virtualization)
- Bochs Debugger and Instrumentation
- Updated Bochs instrumentation examples for new disassembler introduced in Bochs 2.7 release.

View File

@ -215,6 +215,7 @@
</Bscmake>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="..\cpu\avx\amx.cc" />
<ClCompile Include="..\cpu\avx\avx.cc" />
<ClCompile Include="..\cpu\avx\avx2.cc" />
<ClCompile Include="..\cpu\avx\avx512.cc" />

View File

@ -208,6 +208,7 @@
</Bscmake>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="..\cpu\avx\amx.cc" />
<ClCompile Include="..\cpu\avx\avx.cc" />
<ClCompile Include="..\cpu\avx\avx2.cc" />
<ClCompile Include="..\cpu\avx\avx512.cc" />

View File

@ -873,6 +873,65 @@ void bx_dbg_print_avx_state(unsigned vlen)
#endif
}
void bx_dbg_print_amx_state(void)
{
#if BX_SUPPORT_AMX
if (BX_CPU(dbg_cpu)->is_cpu_extension_supported(BX_ISA_AMX)) {
char param_name[20];
unsigned palette_id = SIM->get_param_num("AMX.palette", dbg_cpu_list)->get();
unsigned start_row = SIM->get_param_num("AMX.start_row", dbg_cpu_list)->get();
dbg_printf("TILECFG palette=%d, start_row=%d\n", palette_id, start_row);
for(unsigned i=0;i<8;i++) {
sprintf(param_name, "AMX.tile%d_rows", i);
unsigned rows = SIM->get_param_num(param_name, dbg_cpu_list)->get64();
sprintf(param_name, "AMX.tile%d_colsb", i);
unsigned cols = SIM->get_param_num(param_name, dbg_cpu_list)->get64();
dbg_printf("TILECFG[%d]: %2d x %2d\n", i, rows, cols);
}
dbg_printf("use \"tile <tile_number>\" command to print tile content\n");
}
else
#endif
{
dbg_printf("The CPU doesn't support AMX state !\n");
}
}
void bx_dbg_print_amx_tile_command(int tile)
{
#if BX_SUPPORT_AMX
if (BX_CPU(dbg_cpu)->is_cpu_extension_supported(BX_ISA_AMX)) {
if (tile < 8) {
char param_name[30];
sprintf(param_name, "AMX.tile%d_rows", tile);
unsigned rows = SIM->get_param_num(param_name, dbg_cpu_list)->get64();
sprintf(param_name, "AMX.tile%d_colsb", tile);
unsigned cols = SIM->get_param_num(param_name, dbg_cpu_list)->get64();
dbg_printf("TILE[%d]: %2d x %2d\n", tile, rows, cols);
for (int row=0;row<16;row++) {
dbg_printf("row[%02d]: ", row);
for (int j=BX_VL512-1;j >= 0; j--) {
sprintf(param_name, "AMX.tile%d_row%d_%d", tile, row, j*2+1);
Bit64u hi = SIM->get_param_num(param_name, dbg_cpu_list)->get64();
sprintf(param_name, "AMX.tile%d_row%d_%d", tile, row, j*2);
Bit64u lo = SIM->get_param_num(param_name, dbg_cpu_list)->get64();
dbg_printf("%08x_%08x_%08x_%08x", GET32H(hi), GET32L(hi), GET32H(lo), GET32L(lo));
if (j!=0) dbg_printf("_");
}
dbg_printf("\n");
}
}
else {
dbg_printf("TILE[%d]: invalid or not configured\n", tile);
}
}
else
#endif
{
dbg_printf("The CPU doesn't support AMX state !\n");
}
}
void bx_dbg_print_mmx_state(void)
{
#if BX_CPU_LEVEL >= 5
@ -1160,11 +1219,9 @@ void bx_dbg_info_registers_command(int which_regs_mask)
bx_dbg_info_flags();
}
#if BX_SUPPORT_FPU
if (which_regs_mask & BX_INFO_FPU_REGS) {
bx_dbg_print_fpu_state();
}
#endif
if (which_regs_mask & BX_INFO_MMX_REGS) {
bx_dbg_print_mmx_state();
@ -1189,6 +1246,10 @@ void bx_dbg_info_registers_command(int which_regs_mask)
}
}
}
if (which_regs_mask & BX_INFO_AMX_REGS) {
bx_dbg_print_amx_state();
}
}
//

View File

@ -151,7 +151,9 @@ void bx_dbg_quit_command(void);
#define BX_INFO_SSE_REGS 0x08
#define BX_INFO_YMM_REGS 0x10
#define BX_INFO_ZMM_REGS 0x20
#define BX_INFO_AMX_REGS 0x40
void bx_dbg_info_registers_command(int);
void bx_dbg_print_amx_tile_command(int tile);
void bx_dbg_info_ivt_command(unsigned from, unsigned to);
void bx_dbg_info_idt_command(unsigned from, unsigned to);
void bx_dbg_info_gdt_command(unsigned from, unsigned to);

File diff suppressed because it is too large Load Diff

View File

@ -91,6 +91,8 @@ sse|xmm { bxlval.sval = strdup(bxtext); return(BX_TOKEN_XMM); }
ymm { bxlval.sval = strdup(bxtext); return(BX_TOKEN_YMM); }
zmm { bxlval.sval = strdup(bxtext); return(BX_TOKEN_ZMM); }
avx { bxlval.sval = strdup(bxtext); return(BX_TOKEN_AVX); }
amx { bxlval.sval = strdup(bxtext); return(BX_TOKEN_AMX); }
tile { bxlval.sval = strdup(bxtext); return(BX_TOKEN_TILE); }
mmx { bxlval.sval = strdup(bxtext); return(BX_TOKEN_MMX); }
cpu { bxlval.sval = strdup(bxtext); return(BX_TOKEN_CPU); }
idt { bxlval.sval = strdup(bxtext); return(BX_TOKEN_IDT); }

File diff suppressed because it is too large Load Diff

View File

@ -93,90 +93,92 @@ extern int bxdebug;
BX_TOKEN_YMM = 294, /* BX_TOKEN_YMM */
BX_TOKEN_ZMM = 295, /* BX_TOKEN_ZMM */
BX_TOKEN_AVX = 296, /* BX_TOKEN_AVX */
BX_TOKEN_IDT = 297, /* BX_TOKEN_IDT */
BX_TOKEN_IVT = 298, /* BX_TOKEN_IVT */
BX_TOKEN_GDT = 299, /* BX_TOKEN_GDT */
BX_TOKEN_LDT = 300, /* BX_TOKEN_LDT */
BX_TOKEN_TSS = 301, /* BX_TOKEN_TSS */
BX_TOKEN_TAB = 302, /* BX_TOKEN_TAB */
BX_TOKEN_ALL = 303, /* BX_TOKEN_ALL */
BX_TOKEN_LINUX = 304, /* BX_TOKEN_LINUX */
BX_TOKEN_DEBUG_REGS = 305, /* BX_TOKEN_DEBUG_REGS */
BX_TOKEN_CONTROL_REGS = 306, /* BX_TOKEN_CONTROL_REGS */
BX_TOKEN_SEGMENT_REGS = 307, /* BX_TOKEN_SEGMENT_REGS */
BX_TOKEN_EXAMINE = 308, /* BX_TOKEN_EXAMINE */
BX_TOKEN_XFORMAT = 309, /* BX_TOKEN_XFORMAT */
BX_TOKEN_DISFORMAT = 310, /* BX_TOKEN_DISFORMAT */
BX_TOKEN_RESTORE = 311, /* BX_TOKEN_RESTORE */
BX_TOKEN_WRITEMEM = 312, /* BX_TOKEN_WRITEMEM */
BX_TOKEN_LOADMEM = 313, /* BX_TOKEN_LOADMEM */
BX_TOKEN_SETPMEM = 314, /* BX_TOKEN_SETPMEM */
BX_TOKEN_DEREF = 315, /* BX_TOKEN_DEREF */
BX_TOKEN_SYMBOLNAME = 316, /* BX_TOKEN_SYMBOLNAME */
BX_TOKEN_QUERY = 317, /* BX_TOKEN_QUERY */
BX_TOKEN_PENDING = 318, /* BX_TOKEN_PENDING */
BX_TOKEN_TAKE = 319, /* BX_TOKEN_TAKE */
BX_TOKEN_DMA = 320, /* BX_TOKEN_DMA */
BX_TOKEN_IRQ = 321, /* BX_TOKEN_IRQ */
BX_TOKEN_SMI = 322, /* BX_TOKEN_SMI */
BX_TOKEN_NMI = 323, /* BX_TOKEN_NMI */
BX_TOKEN_TLB = 324, /* BX_TOKEN_TLB */
BX_TOKEN_DISASM = 325, /* BX_TOKEN_DISASM */
BX_TOKEN_INSTRUMENT = 326, /* BX_TOKEN_INSTRUMENT */
BX_TOKEN_STRING = 327, /* BX_TOKEN_STRING */
BX_TOKEN_STOP = 328, /* BX_TOKEN_STOP */
BX_TOKEN_DOIT = 329, /* BX_TOKEN_DOIT */
BX_TOKEN_CRC = 330, /* BX_TOKEN_CRC */
BX_TOKEN_TRACE = 331, /* BX_TOKEN_TRACE */
BX_TOKEN_TRACEREG = 332, /* BX_TOKEN_TRACEREG */
BX_TOKEN_TRACEMEM = 333, /* BX_TOKEN_TRACEMEM */
BX_TOKEN_SWITCH_MODE = 334, /* BX_TOKEN_SWITCH_MODE */
BX_TOKEN_SIZE = 335, /* BX_TOKEN_SIZE */
BX_TOKEN_PTIME = 336, /* BX_TOKEN_PTIME */
BX_TOKEN_TIMEBP_ABSOLUTE = 337, /* BX_TOKEN_TIMEBP_ABSOLUTE */
BX_TOKEN_TIMEBP = 338, /* BX_TOKEN_TIMEBP */
BX_TOKEN_MODEBP = 339, /* BX_TOKEN_MODEBP */
BX_TOKEN_VMEXITBP = 340, /* BX_TOKEN_VMEXITBP */
BX_TOKEN_PRINT_STACK = 341, /* BX_TOKEN_PRINT_STACK */
BX_TOKEN_BT = 342, /* BX_TOKEN_BT */
BX_TOKEN_WATCH = 343, /* BX_TOKEN_WATCH */
BX_TOKEN_UNWATCH = 344, /* BX_TOKEN_UNWATCH */
BX_TOKEN_READ = 345, /* BX_TOKEN_READ */
BX_TOKEN_WRITE = 346, /* BX_TOKEN_WRITE */
BX_TOKEN_SHOW = 347, /* BX_TOKEN_SHOW */
BX_TOKEN_LOAD_SYMBOLS = 348, /* BX_TOKEN_LOAD_SYMBOLS */
BX_TOKEN_SET_MAGIC_BREAK_POINTS = 349, /* BX_TOKEN_SET_MAGIC_BREAK_POINTS */
BX_TOKEN_CLEAR_MAGIC_BREAK_POINTS = 350, /* BX_TOKEN_CLEAR_MAGIC_BREAK_POINTS */
BX_TOKEN_SYMBOLS = 351, /* BX_TOKEN_SYMBOLS */
BX_TOKEN_LIST_SYMBOLS = 352, /* BX_TOKEN_LIST_SYMBOLS */
BX_TOKEN_GLOBAL = 353, /* BX_TOKEN_GLOBAL */
BX_TOKEN_WHERE = 354, /* BX_TOKEN_WHERE */
BX_TOKEN_PRINT_STRING = 355, /* BX_TOKEN_PRINT_STRING */
BX_TOKEN_NUMERIC = 356, /* BX_TOKEN_NUMERIC */
BX_TOKEN_PAGE = 357, /* BX_TOKEN_PAGE */
BX_TOKEN_HELP = 358, /* BX_TOKEN_HELP */
BX_TOKEN_XML = 359, /* BX_TOKEN_XML */
BX_TOKEN_CALC = 360, /* BX_TOKEN_CALC */
BX_TOKEN_ADDLYT = 361, /* BX_TOKEN_ADDLYT */
BX_TOKEN_REMLYT = 362, /* BX_TOKEN_REMLYT */
BX_TOKEN_LYT = 363, /* BX_TOKEN_LYT */
BX_TOKEN_SOURCE = 364, /* BX_TOKEN_SOURCE */
BX_TOKEN_DEVICE = 365, /* BX_TOKEN_DEVICE */
BX_TOKEN_GENERIC = 366, /* BX_TOKEN_GENERIC */
BX_TOKEN_DEREF_CHR = 367, /* BX_TOKEN_DEREF_CHR */
BX_TOKEN_RSHIFT = 368, /* BX_TOKEN_RSHIFT */
BX_TOKEN_LSHIFT = 369, /* BX_TOKEN_LSHIFT */
BX_TOKEN_EQ = 370, /* BX_TOKEN_EQ */
BX_TOKEN_NE = 371, /* BX_TOKEN_NE */
BX_TOKEN_LE = 372, /* BX_TOKEN_LE */
BX_TOKEN_GE = 373, /* BX_TOKEN_GE */
BX_TOKEN_REG_IP = 374, /* BX_TOKEN_REG_IP */
BX_TOKEN_REG_EIP = 375, /* BX_TOKEN_REG_EIP */
BX_TOKEN_REG_RIP = 376, /* BX_TOKEN_REG_RIP */
BX_TOKEN_REG_SSP = 377, /* BX_TOKEN_REG_SSP */
NOT = 378, /* NOT */
NEG = 379, /* NEG */
INDIRECT = 380 /* INDIRECT */
BX_TOKEN_AMX = 297, /* BX_TOKEN_AMX */
BX_TOKEN_TILE = 298, /* BX_TOKEN_TILE */
BX_TOKEN_IDT = 299, /* BX_TOKEN_IDT */
BX_TOKEN_IVT = 300, /* BX_TOKEN_IVT */
BX_TOKEN_GDT = 301, /* BX_TOKEN_GDT */
BX_TOKEN_LDT = 302, /* BX_TOKEN_LDT */
BX_TOKEN_TSS = 303, /* BX_TOKEN_TSS */
BX_TOKEN_TAB = 304, /* BX_TOKEN_TAB */
BX_TOKEN_ALL = 305, /* BX_TOKEN_ALL */
BX_TOKEN_LINUX = 306, /* BX_TOKEN_LINUX */
BX_TOKEN_DEBUG_REGS = 307, /* BX_TOKEN_DEBUG_REGS */
BX_TOKEN_CONTROL_REGS = 308, /* BX_TOKEN_CONTROL_REGS */
BX_TOKEN_SEGMENT_REGS = 309, /* BX_TOKEN_SEGMENT_REGS */
BX_TOKEN_EXAMINE = 310, /* BX_TOKEN_EXAMINE */
BX_TOKEN_XFORMAT = 311, /* BX_TOKEN_XFORMAT */
BX_TOKEN_DISFORMAT = 312, /* BX_TOKEN_DISFORMAT */
BX_TOKEN_RESTORE = 313, /* BX_TOKEN_RESTORE */
BX_TOKEN_WRITEMEM = 314, /* BX_TOKEN_WRITEMEM */
BX_TOKEN_LOADMEM = 315, /* BX_TOKEN_LOADMEM */
BX_TOKEN_SETPMEM = 316, /* BX_TOKEN_SETPMEM */
BX_TOKEN_DEREF = 317, /* BX_TOKEN_DEREF */
BX_TOKEN_SYMBOLNAME = 318, /* BX_TOKEN_SYMBOLNAME */
BX_TOKEN_QUERY = 319, /* BX_TOKEN_QUERY */
BX_TOKEN_PENDING = 320, /* BX_TOKEN_PENDING */
BX_TOKEN_TAKE = 321, /* BX_TOKEN_TAKE */
BX_TOKEN_DMA = 322, /* BX_TOKEN_DMA */
BX_TOKEN_IRQ = 323, /* BX_TOKEN_IRQ */
BX_TOKEN_SMI = 324, /* BX_TOKEN_SMI */
BX_TOKEN_NMI = 325, /* BX_TOKEN_NMI */
BX_TOKEN_TLB = 326, /* BX_TOKEN_TLB */
BX_TOKEN_DISASM = 327, /* BX_TOKEN_DISASM */
BX_TOKEN_INSTRUMENT = 328, /* BX_TOKEN_INSTRUMENT */
BX_TOKEN_STRING = 329, /* BX_TOKEN_STRING */
BX_TOKEN_STOP = 330, /* BX_TOKEN_STOP */
BX_TOKEN_DOIT = 331, /* BX_TOKEN_DOIT */
BX_TOKEN_CRC = 332, /* BX_TOKEN_CRC */
BX_TOKEN_TRACE = 333, /* BX_TOKEN_TRACE */
BX_TOKEN_TRACEREG = 334, /* BX_TOKEN_TRACEREG */
BX_TOKEN_TRACEMEM = 335, /* BX_TOKEN_TRACEMEM */
BX_TOKEN_SWITCH_MODE = 336, /* BX_TOKEN_SWITCH_MODE */
BX_TOKEN_SIZE = 337, /* BX_TOKEN_SIZE */
BX_TOKEN_PTIME = 338, /* BX_TOKEN_PTIME */
BX_TOKEN_TIMEBP_ABSOLUTE = 339, /* BX_TOKEN_TIMEBP_ABSOLUTE */
BX_TOKEN_TIMEBP = 340, /* BX_TOKEN_TIMEBP */
BX_TOKEN_MODEBP = 341, /* BX_TOKEN_MODEBP */
BX_TOKEN_VMEXITBP = 342, /* BX_TOKEN_VMEXITBP */
BX_TOKEN_PRINT_STACK = 343, /* BX_TOKEN_PRINT_STACK */
BX_TOKEN_BT = 344, /* BX_TOKEN_BT */
BX_TOKEN_WATCH = 345, /* BX_TOKEN_WATCH */
BX_TOKEN_UNWATCH = 346, /* BX_TOKEN_UNWATCH */
BX_TOKEN_READ = 347, /* BX_TOKEN_READ */
BX_TOKEN_WRITE = 348, /* BX_TOKEN_WRITE */
BX_TOKEN_SHOW = 349, /* BX_TOKEN_SHOW */
BX_TOKEN_LOAD_SYMBOLS = 350, /* BX_TOKEN_LOAD_SYMBOLS */
BX_TOKEN_SET_MAGIC_BREAK_POINTS = 351, /* BX_TOKEN_SET_MAGIC_BREAK_POINTS */
BX_TOKEN_CLEAR_MAGIC_BREAK_POINTS = 352, /* BX_TOKEN_CLEAR_MAGIC_BREAK_POINTS */
BX_TOKEN_SYMBOLS = 353, /* BX_TOKEN_SYMBOLS */
BX_TOKEN_LIST_SYMBOLS = 354, /* BX_TOKEN_LIST_SYMBOLS */
BX_TOKEN_GLOBAL = 355, /* BX_TOKEN_GLOBAL */
BX_TOKEN_WHERE = 356, /* BX_TOKEN_WHERE */
BX_TOKEN_PRINT_STRING = 357, /* BX_TOKEN_PRINT_STRING */
BX_TOKEN_NUMERIC = 358, /* BX_TOKEN_NUMERIC */
BX_TOKEN_PAGE = 359, /* BX_TOKEN_PAGE */
BX_TOKEN_HELP = 360, /* BX_TOKEN_HELP */
BX_TOKEN_XML = 361, /* BX_TOKEN_XML */
BX_TOKEN_CALC = 362, /* BX_TOKEN_CALC */
BX_TOKEN_ADDLYT = 363, /* BX_TOKEN_ADDLYT */
BX_TOKEN_REMLYT = 364, /* BX_TOKEN_REMLYT */
BX_TOKEN_LYT = 365, /* BX_TOKEN_LYT */
BX_TOKEN_SOURCE = 366, /* BX_TOKEN_SOURCE */
BX_TOKEN_DEVICE = 367, /* BX_TOKEN_DEVICE */
BX_TOKEN_GENERIC = 368, /* BX_TOKEN_GENERIC */
BX_TOKEN_DEREF_CHR = 369, /* BX_TOKEN_DEREF_CHR */
BX_TOKEN_RSHIFT = 370, /* BX_TOKEN_RSHIFT */
BX_TOKEN_LSHIFT = 371, /* BX_TOKEN_LSHIFT */
BX_TOKEN_EQ = 372, /* BX_TOKEN_EQ */
BX_TOKEN_NE = 373, /* BX_TOKEN_NE */
BX_TOKEN_LE = 374, /* BX_TOKEN_LE */
BX_TOKEN_GE = 375, /* BX_TOKEN_GE */
BX_TOKEN_REG_IP = 376, /* BX_TOKEN_REG_IP */
BX_TOKEN_REG_EIP = 377, /* BX_TOKEN_REG_EIP */
BX_TOKEN_REG_RIP = 378, /* BX_TOKEN_REG_RIP */
BX_TOKEN_REG_SSP = 379, /* BX_TOKEN_REG_SSP */
NOT = 380, /* NOT */
NEG = 381, /* NEG */
INDIRECT = 382 /* INDIRECT */
};
typedef enum yytokentype yytoken_kind_t;
#endif
@ -224,90 +226,92 @@ extern int bxdebug;
#define BX_TOKEN_YMM 294
#define BX_TOKEN_ZMM 295
#define BX_TOKEN_AVX 296
#define BX_TOKEN_IDT 297
#define BX_TOKEN_IVT 298
#define BX_TOKEN_GDT 299
#define BX_TOKEN_LDT 300
#define BX_TOKEN_TSS 301
#define BX_TOKEN_TAB 302
#define BX_TOKEN_ALL 303
#define BX_TOKEN_LINUX 304
#define BX_TOKEN_DEBUG_REGS 305
#define BX_TOKEN_CONTROL_REGS 306
#define BX_TOKEN_SEGMENT_REGS 307
#define BX_TOKEN_EXAMINE 308
#define BX_TOKEN_XFORMAT 309
#define BX_TOKEN_DISFORMAT 310
#define BX_TOKEN_RESTORE 311
#define BX_TOKEN_WRITEMEM 312
#define BX_TOKEN_LOADMEM 313
#define BX_TOKEN_SETPMEM 314
#define BX_TOKEN_DEREF 315
#define BX_TOKEN_SYMBOLNAME 316
#define BX_TOKEN_QUERY 317
#define BX_TOKEN_PENDING 318
#define BX_TOKEN_TAKE 319
#define BX_TOKEN_DMA 320
#define BX_TOKEN_IRQ 321
#define BX_TOKEN_SMI 322
#define BX_TOKEN_NMI 323
#define BX_TOKEN_TLB 324
#define BX_TOKEN_DISASM 325
#define BX_TOKEN_INSTRUMENT 326
#define BX_TOKEN_STRING 327
#define BX_TOKEN_STOP 328
#define BX_TOKEN_DOIT 329
#define BX_TOKEN_CRC 330
#define BX_TOKEN_TRACE 331
#define BX_TOKEN_TRACEREG 332
#define BX_TOKEN_TRACEMEM 333
#define BX_TOKEN_SWITCH_MODE 334
#define BX_TOKEN_SIZE 335
#define BX_TOKEN_PTIME 336
#define BX_TOKEN_TIMEBP_ABSOLUTE 337
#define BX_TOKEN_TIMEBP 338
#define BX_TOKEN_MODEBP 339
#define BX_TOKEN_VMEXITBP 340
#define BX_TOKEN_PRINT_STACK 341
#define BX_TOKEN_BT 342
#define BX_TOKEN_WATCH 343
#define BX_TOKEN_UNWATCH 344
#define BX_TOKEN_READ 345
#define BX_TOKEN_WRITE 346
#define BX_TOKEN_SHOW 347
#define BX_TOKEN_LOAD_SYMBOLS 348
#define BX_TOKEN_SET_MAGIC_BREAK_POINTS 349
#define BX_TOKEN_CLEAR_MAGIC_BREAK_POINTS 350
#define BX_TOKEN_SYMBOLS 351
#define BX_TOKEN_LIST_SYMBOLS 352
#define BX_TOKEN_GLOBAL 353
#define BX_TOKEN_WHERE 354
#define BX_TOKEN_PRINT_STRING 355
#define BX_TOKEN_NUMERIC 356
#define BX_TOKEN_PAGE 357
#define BX_TOKEN_HELP 358
#define BX_TOKEN_XML 359
#define BX_TOKEN_CALC 360
#define BX_TOKEN_ADDLYT 361
#define BX_TOKEN_REMLYT 362
#define BX_TOKEN_LYT 363
#define BX_TOKEN_SOURCE 364
#define BX_TOKEN_DEVICE 365
#define BX_TOKEN_GENERIC 366
#define BX_TOKEN_DEREF_CHR 367
#define BX_TOKEN_RSHIFT 368
#define BX_TOKEN_LSHIFT 369
#define BX_TOKEN_EQ 370
#define BX_TOKEN_NE 371
#define BX_TOKEN_LE 372
#define BX_TOKEN_GE 373
#define BX_TOKEN_REG_IP 374
#define BX_TOKEN_REG_EIP 375
#define BX_TOKEN_REG_RIP 376
#define BX_TOKEN_REG_SSP 377
#define NOT 378
#define NEG 379
#define INDIRECT 380
#define BX_TOKEN_AMX 297
#define BX_TOKEN_TILE 298
#define BX_TOKEN_IDT 299
#define BX_TOKEN_IVT 300
#define BX_TOKEN_GDT 301
#define BX_TOKEN_LDT 302
#define BX_TOKEN_TSS 303
#define BX_TOKEN_TAB 304
#define BX_TOKEN_ALL 305
#define BX_TOKEN_LINUX 306
#define BX_TOKEN_DEBUG_REGS 307
#define BX_TOKEN_CONTROL_REGS 308
#define BX_TOKEN_SEGMENT_REGS 309
#define BX_TOKEN_EXAMINE 310
#define BX_TOKEN_XFORMAT 311
#define BX_TOKEN_DISFORMAT 312
#define BX_TOKEN_RESTORE 313
#define BX_TOKEN_WRITEMEM 314
#define BX_TOKEN_LOADMEM 315
#define BX_TOKEN_SETPMEM 316
#define BX_TOKEN_DEREF 317
#define BX_TOKEN_SYMBOLNAME 318
#define BX_TOKEN_QUERY 319
#define BX_TOKEN_PENDING 320
#define BX_TOKEN_TAKE 321
#define BX_TOKEN_DMA 322
#define BX_TOKEN_IRQ 323
#define BX_TOKEN_SMI 324
#define BX_TOKEN_NMI 325
#define BX_TOKEN_TLB 326
#define BX_TOKEN_DISASM 327
#define BX_TOKEN_INSTRUMENT 328
#define BX_TOKEN_STRING 329
#define BX_TOKEN_STOP 330
#define BX_TOKEN_DOIT 331
#define BX_TOKEN_CRC 332
#define BX_TOKEN_TRACE 333
#define BX_TOKEN_TRACEREG 334
#define BX_TOKEN_TRACEMEM 335
#define BX_TOKEN_SWITCH_MODE 336
#define BX_TOKEN_SIZE 337
#define BX_TOKEN_PTIME 338
#define BX_TOKEN_TIMEBP_ABSOLUTE 339
#define BX_TOKEN_TIMEBP 340
#define BX_TOKEN_MODEBP 341
#define BX_TOKEN_VMEXITBP 342
#define BX_TOKEN_PRINT_STACK 343
#define BX_TOKEN_BT 344
#define BX_TOKEN_WATCH 345
#define BX_TOKEN_UNWATCH 346
#define BX_TOKEN_READ 347
#define BX_TOKEN_WRITE 348
#define BX_TOKEN_SHOW 349
#define BX_TOKEN_LOAD_SYMBOLS 350
#define BX_TOKEN_SET_MAGIC_BREAK_POINTS 351
#define BX_TOKEN_CLEAR_MAGIC_BREAK_POINTS 352
#define BX_TOKEN_SYMBOLS 353
#define BX_TOKEN_LIST_SYMBOLS 354
#define BX_TOKEN_GLOBAL 355
#define BX_TOKEN_WHERE 356
#define BX_TOKEN_PRINT_STRING 357
#define BX_TOKEN_NUMERIC 358
#define BX_TOKEN_PAGE 359
#define BX_TOKEN_HELP 360
#define BX_TOKEN_XML 361
#define BX_TOKEN_CALC 362
#define BX_TOKEN_ADDLYT 363
#define BX_TOKEN_REMLYT 364
#define BX_TOKEN_LYT 365
#define BX_TOKEN_SOURCE 366
#define BX_TOKEN_DEVICE 367
#define BX_TOKEN_GENERIC 368
#define BX_TOKEN_DEREF_CHR 369
#define BX_TOKEN_RSHIFT 370
#define BX_TOKEN_LSHIFT 371
#define BX_TOKEN_EQ 372
#define BX_TOKEN_NE 373
#define BX_TOKEN_LE 374
#define BX_TOKEN_GE 375
#define BX_TOKEN_REG_IP 376
#define BX_TOKEN_REG_EIP 377
#define BX_TOKEN_REG_RIP 378
#define BX_TOKEN_REG_SSP 379
#define NOT 380
#define NEG 381
#define INDIRECT 382
/* Value type. */
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
@ -319,7 +323,7 @@ union YYSTYPE
Bit64u uval;
unsigned bval;
#line 323 "y.tab.h"
#line 327 "y.tab.h"
};
typedef union YYSTYPE YYSTYPE;

View File

@ -61,6 +61,8 @@ Bit64u eval_value;
%token <sval> BX_TOKEN_YMM
%token <sval> BX_TOKEN_ZMM
%token <sval> BX_TOKEN_AVX
%token <sval> BX_TOKEN_AMX
%token <sval> BX_TOKEN_TILE
%token <sval> BX_TOKEN_IDT
%token <sval> BX_TOKEN_IVT
%token <sval> BX_TOKEN_GDT
@ -172,6 +174,8 @@ command:
| xmm_regs_command
| ymm_regs_command
| zmm_regs_command
| amx_regs_command
| print_tile_command
| segment_regs_command
| debug_regs_command
| control_regs_command
@ -823,6 +827,22 @@ zmm_regs_command:
}
;
amx_regs_command:
BX_TOKEN_AMX '\n'
{
bx_dbg_info_registers_command(BX_INFO_AMX_REGS);
free($1);
}
;
print_tile_command:
BX_TOKEN_TILE BX_TOKEN_NUMERIC '\n'
{
bx_dbg_print_amx_tile_command($2);
free($1);
}
;
segment_regs_command:
BX_TOKEN_SEGMENT_REGS '\n'
{
@ -1233,6 +1253,11 @@ help_command:
dbg_printf("zmm - print AVX-512 state\n");
free($1);free($2);
}
| BX_TOKEN_HELP BX_TOKEN_AMX '\n'
{
dbg_printf("amx - print AMX state\n");
free($1);free($2);
}
| BX_TOKEN_HELP BX_TOKEN_SEGMENT_REGS '\n'
{
dbg_printf("sreg - show segment registers\n");

View File

@ -629,6 +629,7 @@ typedef Bit32u bx_phy_address;
#define BX_SUPPORT_VMX 0
#define BX_SUPPORT_AVX 0
#define BX_SUPPORT_EVEX 0
#define BX_SUPPORT_AMX 0
#if BX_SUPPORT_UINTR && BX_SUPPORT_X86_64 == 0
#error "UINTR require x86-64 support"
@ -650,6 +651,10 @@ typedef Bit32u bx_phy_address;
#error "EVEX and AVX-512 support require AVX to be compiled in"
#endif
#if BX_SUPPORT_AMX && BX_SUPPORT_EVEX == 0
#error "AMX require EVEX and AVX to be compiled in"
#endif
#define BX_SUPPORT_REPEAT_SPEEDUPS 0
#define BX_SUPPORT_HANDLERS_CHAINING_SPEEDUPS 0
#define BX_ENABLE_TRACE_LINKING 0

View File

@ -1432,6 +1432,29 @@ if test "$support_evex" -gt 0 -a "$support_avx" = 0; then
AC_MSG_ERROR([for EVEX and AVX-512 support AVX emulation must be compiled in!])
fi
support_amx=0
AC_MSG_CHECKING(for AMX extensions support)
AC_ARG_ENABLE(amx,
AS_HELP_STRING([--enable-amx], [support for AMX extensions (no)]),
[if test "$enableval" = yes; then
AC_MSG_RESULT(yes)
AC_DEFINE(BX_SUPPORT_AMX, 1)
support_amx=1
elif test "$enableval" = no; then
AC_MSG_RESULT(no)
AC_DEFINE(BX_SUPPORT_AMX, 0)
fi
],
[
AC_MSG_RESULT(no)
AC_DEFINE(BX_SUPPORT_AMX, 0)
]
)
if test "$support_amx" -gt 0 -a "$support_evex" = 0; then
AC_MSG_ERROR([for AMX support AVX and EVEX emulation must be compiled in!])
fi
AC_MSG_CHECKING(for x86 debugger support)
AC_ARG_ENABLE(x86-debugger,
AS_HELP_STRING([--enable-x86-debugger], [x86 debugger support (no)]),

View File

@ -42,6 +42,7 @@ BX_INCDIRS = -I.. -I../.. -I$(srcdir)/.. -I$(srcdir)/../.. -I../../@INSTRUMENT_D
# Objects which are only used for AVX / EVEX code
AVX_OBJS = \
amx.o \
avx.o \
avx_pfp.o \
avx_cvt.o \
@ -94,6 +95,14 @@ dist-clean: clean
# dependencies generated by
# gcc -MM -I.. -I../.. -I../../instrument/stubs *.cc | sed 's/\.cc/.@CPP_SUFFIX@/g'
###########################################
amx.o: amx.@CPP_SUFFIX@ ../../bochs.h ../../config.h ../../osdep.h ../../logio.h \
../../misc/bswap.h ../cpu.h ../../bx_debug/debug.h ../../config.h \
../../osdep.h ../../cpu/decoder/decoder.h ../../cpu/decoder/features.h \
../decoder/decoder.h ../../instrument/stubs/instrument.h ../i387.h \
../fpu/softfloat.h ../fpu/tag_w.h ../fpu/status_w.h ../fpu/control_w.h \
../crregs.h ../descriptor.h ../decoder/instr.h ../lazy_flags.h ../tlb.h \
../icache.h ../xmm.h ../vmx.h ../svm.h ../cpuid.h ../stack.h \
../access.h ../simd_int.h amx.h
avx.o: avx.@CPP_SUFFIX@ ../../bochs.h ../../config.h ../../osdep.h ../../logio.h \
../../misc/bswap.h ../cpu.h ../../bx_debug/debug.h ../../config.h \
../../osdep.h ../../cpu/decoder/decoder.h ../../cpu/decoder/features.h \

394
bochs/cpu/avx/amx.cc Normal file
View File

@ -0,0 +1,394 @@
/////////////////////////////////////////////////////////////////////////
// $Id$
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2024 Stanislav Shwartsman
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
//
/////////////////////////////////////////////////////////////////////////
#define NEED_CPU_REG_SHORTCUTS 1
#include "bochs.h"
#include "cpu.h"
#define LOG_THIS BX_CPU_THIS_PTR
#if BX_SUPPORT_AMX
#include "amx.h"
bool BX_CPP_AttrRegparmN(2) BX_CPU_C::configure_tiles(bxInstruction_c *i, const BxPackedAvxRegister &tilecfg)
{
Bit8u palette_id = tilecfg.vmmubyte(0);
Bit8u start_row = tilecfg.vmmubyte(1);
if (palette_id == 0) {
BX_CPU_THIS_PTR amx->clear();
return true;
}
if (palette_id == 1) {
if ((tilecfg.vmm64u(0) >> 16) != 0 || tilecfg.vmm64u(1) || tilecfg.vmm64u(4) || tilecfg.vmm64u(5) || tilecfg.vmm64u(7)) {
BX_ERROR(("LDTILECFG: reserved bits set for palette_id=%d", palette_id));
return false;
}
AMX::TILECFG tile[8];
for (unsigned n=0; n < 8; n++) {
tile[n].bytes_per_row = tilecfg.vmm16u(8+n);
if (tile[n].bytes_per_row > 64) {
BX_ERROR(("LDTILECFG: too many bytes_per_row for tile=%d in palette_id=%d", n, palette_id));
return false;
}
tile[n].rows = tilecfg.vmmubyte(48+n);
if (tile[n].rows > 16) {
BX_ERROR(("LDTILECFG: too many rows for tile=%d in palette_id=%d", n, palette_id));
return false;
}
if ((tile[n].rows == 0 && tile[n].bytes_per_row != 0) || (tile[n].rows != 0 && tile[n].bytes_per_row == 0)) {
BX_ERROR(("LDTILECFG: invalid empty tile=%d in palette_id=%d", n, palette_id));
return false;
}
}
BX_CPU_THIS_PTR amx->clear();
BX_CPU_THIS_PTR amx->palette_id = 1;
BX_CPU_THIS_PTR amx->start_row = start_row;
for (unsigned n=0; n < 8; n++)
BX_CPU_THIS_PTR amx->tilecfg[n] = tile[n];
return true;
}
return false;
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::LDTILECFG(bxInstruction_c *i)
{
BxPackedAvxRegister tilecfg;
Bit64u eaddr = BX_CPU_RESOLVE_ADDR_64(i);
read_linear_zmmword(i->seg(), get_laddr64(i->seg(), eaddr), &tilecfg);
if (!configure_tiles(i, tilecfg))
exception(BX_GP_EXCEPTION, 0);
BX_NEXT_INSTR(i);
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::STTILECFG(bxInstruction_c *i)
{
xsave_tilecfg_state(i, BX_CPU_RESOLVE_ADDR_64(i));
BX_NEXT_INSTR(i);
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::TILELOADD_TnnnMdq(bxInstruction_c *i)
{
if (i->sibIndex() == BX_NIL_REGISTER) {
BX_ERROR(("%s: SIB byte required", i->getIaOpcodeNameShort()));
exception(BX_UD_EXCEPTION, 0);
}
unsigned tile = i->dst();
if (tile >= BX_TILE_REGISTERS || ! BX_CPU_THIS_PTR amx->tile_valid(tile)) {
BX_ERROR(("%s: invalid tile %d", i->getIaOpcodeNameShort(), tile));
exception(BX_UD_EXCEPTION, 0);
}
unsigned rows = BX_CPU_THIS_PTR amx->tile_num_rows(tile);
unsigned bytes_per_row = BX_CPU_THIS_PTR amx->tile_bytes_per_row(tile);
if ((bytes_per_row & 0x3) != 0) {
BX_ERROR(("%s: invalid tile %d bytes_per_row=%d", i->getIaOpcodeNameShort(), tile, bytes_per_row));
exception(BX_UD_EXCEPTION, 0);
}
if (BX_CPU_THIS_PTR amx->start_row >= rows) {
BX_ERROR(("%s: invalid (start_row=%d) >= (rows=%d)", i->getIaOpcodeNameShort(), tile, BX_CPU_THIS_PTR amx->start_row, rows));
exception(BX_UD_EXCEPTION, 0);
}
unsigned elements_per_row = bytes_per_row / 4;
Bit32u mask = (elements_per_row < 16) ? (BX_CONST64(1) << elements_per_row) - 1 : BX_CONST64(0xFFFF);
BX_CPU_THIS_PTR amx->set_tile_used(tile);
BX_CPU_THIS_PTR amx->tile[tile].clear_upper_rows(BX_CPU_THIS_PTR amx->start_row);
Bit64u start_eaddr = BX_READ_64BIT_REG(i->sibBase()) + (Bit64s) i->displ32s();
Bit64u stride = BX_READ_64BIT_REG(i->sibIndex()) << i->sibScale();
for (unsigned row=BX_CPU_THIS_PTR amx->start_row; row < rows; row++) {
BxPackedAvxRegister *data = &(BX_CPU_THIS_PTR amx->tile[tile].row[row]);
Bit64u eaddr = start_eaddr + row * stride;
if (bytes_per_row == 64) {
read_linear_zmmword(i->seg(), get_laddr64(i->seg(), eaddr), data);
}
else {
avx_masked_load32(i, eaddr, data, mask);
for (unsigned n=elements_per_row; n < 16; n++)
data->vmm32u(n) = 0;
}
BX_CPU_THIS_PTR amx->start_row++;
}
BX_CPU_THIS_PTR amx->restart();
BX_NEXT_INSTR(i);
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::TILESTORED_MdqTnnn(bxInstruction_c *i)
{
if (i->sibIndex() == BX_NIL_REGISTER) {
BX_ERROR(("%s: SIB byte required", i->getIaOpcodeNameShort()));
exception(BX_UD_EXCEPTION, 0);
}
unsigned tile = i->src();
if (tile >= BX_TILE_REGISTERS || ! BX_CPU_THIS_PTR amx->tile_valid(tile)) {
BX_ERROR(("TILESTORED: invalid tile %d", tile));
exception(BX_UD_EXCEPTION, 0);
}
unsigned rows = BX_CPU_THIS_PTR amx->tile_num_rows(tile);
unsigned bytes_per_row = BX_CPU_THIS_PTR amx->tile_bytes_per_row(tile);
if ((bytes_per_row & 0x3) != 0) {
BX_ERROR(("TILESTORED: invalid tile %d bytes_per_row=%d", tile, bytes_per_row));
exception(BX_UD_EXCEPTION, 0);
}
if (BX_CPU_THIS_PTR amx->start_row >= rows) {
BX_ERROR(("TILESTORED: invalid (start_row=%d) >= (rows=%d)", tile, BX_CPU_THIS_PTR amx->start_row, rows));
exception(BX_UD_EXCEPTION, 0);
}
unsigned elements_per_row = bytes_per_row / 4;
Bit32u mask = (elements_per_row < 16) ? (BX_CONST64(1) << elements_per_row) - 1 : BX_CONST64(0xFFFF);
Bit64u start_eaddr = BX_READ_64BIT_REG(i->sibBase()) + (Bit64s) i->displ32s();
Bit64u stride = BX_READ_64BIT_REG(i->sibIndex()) << i->sibScale();
for (unsigned row=BX_CPU_THIS_PTR amx->start_row; row < rows; row++) {
BxPackedAvxRegister *data = &(BX_CPU_THIS_PTR amx->tile[tile].row[row]);
Bit64u eaddr = start_eaddr + row * stride;
if (bytes_per_row == 64)
write_linear_zmmword(i->seg(), get_laddr64(i->seg(), eaddr), data);
else
avx_masked_store32(i, eaddr, data, mask);
BX_CPU_THIS_PTR amx->start_row++;
}
BX_CPU_THIS_PTR amx->restart();
BX_NEXT_INSTR(i);
}
void BX_CPU_C::check_tiles(bxInstruction_c *i, unsigned tile_dst, unsigned tile_src1, unsigned tile_src2)
{
// #UD if srcdest == src1 OR src1 == src2 OR srcdest == src2
if (tile_dst == tile_src1 || tile_dst == tile_src2 || tile_src1 == tile_src2) {
BX_ERROR(("%s: must use different tiles", i->getIaOpcodeNameShort()));
exception(BX_UD_EXCEPTION, 0);
}
// #UD if TILES_CONFIGURED == 0
// #UD if srcdest/src1/src2 are not valid tiles
// #UD if srcdest/src1/src2 are >= palette_table[tilecfg.palette_id].max_names
if (tile_dst >= BX_TILE_REGISTERS || ! BX_CPU_THIS_PTR amx->tile_valid(tile_dst)) {
BX_ERROR(("%s: invalid tile %d", i->getIaOpcodeNameShort(), tile_dst));
exception(BX_UD_EXCEPTION, 0);
}
if (tile_src1 >= BX_TILE_REGISTERS || ! BX_CPU_THIS_PTR amx->tile_valid(tile_src1)) {
BX_ERROR(("%s: invalid tile %d", i->getIaOpcodeNameShort(), tile_src1));
exception(BX_UD_EXCEPTION, 0);
}
if (tile_src2 >= BX_TILE_REGISTERS || ! BX_CPU_THIS_PTR amx->tile_valid(tile_src2)) {
BX_ERROR(("%s: invalid tile %d", i->getIaOpcodeNameShort(), tile_src2));
exception(BX_UD_EXCEPTION, 0);
}
unsigned rows[3];
unsigned bytes_per_row[3];
rows[0] = BX_CPU_THIS_PTR amx->tile_num_rows(tile_dst);
bytes_per_row[0] = BX_CPU_THIS_PTR amx->tile_bytes_per_row(tile_dst);
rows[1] = BX_CPU_THIS_PTR amx->tile_num_rows(tile_src1);
bytes_per_row[1] = BX_CPU_THIS_PTR amx->tile_bytes_per_row(tile_src1);
rows[2] = BX_CPU_THIS_PTR amx->tile_num_rows(tile_src2);
bytes_per_row[2] = BX_CPU_THIS_PTR amx->tile_bytes_per_row(tile_src2);
// #UD if srcdest.colbytes mod 4 != 0
// #UD if src1.colbytes mod 4 != 0
// #UD if src2.colbytes mod 4 != 0
for (unsigned j=0; j<3; j++) {
if ((bytes_per_row[j] & 0x3) != 0) {
BX_ERROR(("%s: invalid tile bytes_per_row=%d", i->getIaOpcodeNameShort(), bytes_per_row[j]));
exception(BX_UD_EXCEPTION, 0);
}
}
// R C
// A = m x k (tsrc1)
// B = k x n (tsrc2)
// C = m x n (tsrcdest)
unsigned n = bytes_per_row[0] / 4;
unsigned m = rows[1];
unsigned k = rows[2];
// #UD if srcdest.colbytes != src2.colbytes (n)
// #UD if srcdest.rows != src1.rows (m)
// #UD if src1.colbytes / 4 != src2.rows (k)
if (n != (bytes_per_row[2] / 4) || m != rows[0] || k != (bytes_per_row[1] / 4)) {
BX_ERROR(("%s: invalid matmul tile dimenstions", i->getIaOpcodeNameShort()));
exception(BX_UD_EXCEPTION, 0);
}
// #UD if srcdest.colbytes > tmul_maxn
// #UD if src2.colbytes > tmul_maxn
// #UD if src1.colbytes/4 > tmul_maxk
// #UD if src2.rows > tmul_maxk
if (n > 16 || k > 16) {
BX_ERROR(("%s: unsupported matmul tile dimenstions", i->getIaOpcodeNameShort()));
exception(BX_UD_EXCEPTION, 0);
}
}
#include "cpu/simd_vnni.h"
#define HANDLE_AMX_INT8_3OP(HANDLER, func) \
void BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
{ \
unsigned tile_dst = i->dst(), tile_src1 = i->src1(), tile_src2 = i->src2(); \
check_tiles(i, tile_dst, tile_src1, tile_src2); \
\
/* R C */ \
/* A = m x k (tsrc1) */ \
/* B = k x n (tsrc2) */ \
/* C = m x n (tsrcdest) */ \
unsigned max_n = BX_CPU_THIS_PTR amx->tile_bytes_per_row(tile_dst) / 4; \
unsigned max_m = BX_CPU_THIS_PTR amx->tile_num_rows(tile_dst); \
unsigned max_k = BX_CPU_THIS_PTR amx->tile_num_rows(tile_src2); \
\
AMX::TILE *tdst = &(BX_CPU_THIS_PTR amx->tile[tile_dst]); \
AMX::TILE *tsrc1 = &(BX_CPU_THIS_PTR amx->tile[tile_src1]); \
AMX::TILE *tsrc2 = &(BX_CPU_THIS_PTR amx->tile[tile_src2]); \
\
for (unsigned m=0; m < max_m; m++) { \
BxPackedAvxRegister* tmp = &(tdst->row[m]); \
for (unsigned k=0; k < max_k; k++) { \
for (unsigned n=0; n < max_n; n+=4) { \
(func)(&(tmp->vmm128(n/4)), \
&(tsrc1->row[m].vmm128(n/4)), &(tsrc2->row[m].vmm128(n/4))); \
} \
} \
tdst->zero_upper_row_data32(m, max_n); \
} \
\
BX_CPU_THIS_PTR amx->set_tile_used(tile_dst); \
BX_CPU_THIS_PTR amx->tile[tile_dst].clear_upper_rows(max_m); \
BX_CPU_THIS_PTR amx->restart(); \
BX_NEXT_INSTR(i); \
}
HANDLE_AMX_INT8_3OP(TDPBSSD_TnnnTrmTreg, xmm_pdpbssd)
HANDLE_AMX_INT8_3OP(TDPBSUD_TnnnTrmTreg, xmm_pdpbsud)
HANDLE_AMX_INT8_3OP(TDPBUSD_TnnnTrmTreg, xmm_pdpbusd)
HANDLE_AMX_INT8_3OP(TDPBUUD_TnnnTrmTreg, xmm_pdpbuud)
#include "bf16.h"
extern float_status_t prepare_ne_softfloat_status_helper();
void BX_CPP_AttrRegparmN(1) BX_CPU_C::TDPBF16PS_TnnnTrmTreg(bxInstruction_c *i)
{
unsigned tile_dst = i->dst(), tile_src1 = i->src1(), tile_src2 = i->src2();
check_tiles(i, tile_dst, tile_src1, tile_src2);
// R C
// A = m x k (tsrc1)
// B = k x n (tsrc2)
// C = m x n (tsrcdest)
unsigned max_n = BX_CPU_THIS_PTR amx->tile_bytes_per_row(tile_dst) / 4;
unsigned max_m = BX_CPU_THIS_PTR amx->tile_num_rows(tile_dst);
unsigned max_k = BX_CPU_THIS_PTR amx->tile_num_rows(tile_src2);
AMX::TILE *tdst = &(BX_CPU_THIS_PTR amx->tile[tile_dst]);
AMX::TILE *tsrc1 = &(BX_CPU_THIS_PTR amx->tile[tile_src1]);
AMX::TILE *tsrc2 = &(BX_CPU_THIS_PTR amx->tile[tile_src2]);
float_status_t status = prepare_ne_softfloat_status_helper();
for (unsigned m=0; m < max_m; m++) {
float32 tmp[32]; // new empty array
for (unsigned n=0; n < 32; n++) tmp[32] = 0;
for (unsigned k=0; k < max_k; k++) {
for (unsigned n=0; n < max_n; n++) {
tmp[2*n] = float32_fmadd(convert_bfloat16_to_fp32(tsrc1->row[m].vmm16u(2*k)),
convert_bfloat16_to_fp32(tsrc2->row[k].vmm16u(2*n)), tmp[2*n], status);
tmp[2*n+1] = float32_fmadd(convert_bfloat16_to_fp32(tsrc1->row[m].vmm16u(2*k+1)),
convert_bfloat16_to_fp32(tsrc2->row[k].vmm16u(2*n+1)), tmp[2*n+1], status);
}
}
for (unsigned n=0; n < max_n; n++) {
float32 tmpf32 = float32_add(tmp[2*n], tmp[2*n+1], status);
tdst->row[m].vmm32u(n) = float32_add(tdst->row[m].vmm32u(n), tmpf32, status);
}
tdst->zero_upper_row_data32(m, max_n);
}
BX_CPU_THIS_PTR amx->set_tile_used(tile_dst);
BX_CPU_THIS_PTR amx->tile[tile_dst].clear_upper_rows(max_m);
BX_CPU_THIS_PTR amx->restart();
BX_NEXT_INSTR(i);
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::TILEZERO_Tnnn(bxInstruction_c *i)
{
unsigned tile = i->dst();
if (tile >= BX_TILE_REGISTERS || ! BX_CPU_THIS_PTR amx->tile_valid(tile)) {
BX_ERROR(("TILEZERO: invalid tile %d", tile));
exception(BX_UD_EXCEPTION, 0);
}
BX_CPU_THIS_PTR amx->clear_tile_used(tile);
BX_CPU_THIS_PTR amx->tile[tile].clear();
BX_CPU_THIS_PTR amx->restart();
BX_NEXT_INSTR(i);
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::TILERELEASE(bxInstruction_c *i)
{
BX_CPU_THIS_PTR amx->clear();
BX_NEXT_INSTR(i);
}
#endif // BX_SUPPORT_AMX

108
bochs/cpu/avx/amx.h Normal file
View File

@ -0,0 +1,108 @@
/////////////////////////////////////////////////////////////////////////
// $Id$
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2024 Stanislav Shwartsman
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
//
/////////////////////////////////////////////////////////////////////////
#ifndef BX_AMX_EXTENSIONS_H
#define BX_AMX_EXTENSIONS_H
#if BX_SUPPORT_AMX
#include "cpu/xmm.h"
#define BX_TILE_REGISTERS 8
struct AMX {
AMX(): palette_id(0), start_row(0) {}
unsigned palette_id; // 0 if tiles are not configured
unsigned start_row; // used to restart tile operations
struct TILECFG {
unsigned rows, bytes_per_row;
TILECFG() { clear(); }
void clear() { rows = bytes_per_row = 0; }
} tilecfg[BX_TILE_REGISTERS];
bool tiles_configured() const { return palette_id != 0; }
void clear_tilecfg() {
for (int i=0;i<BX_TILE_REGISTERS;i++) tilecfg[i].clear();
}
bool tile_valid(unsigned tile_num) const { return tilecfg[tile_num].rows != 0; }
unsigned tile_num_rows(unsigned tile_num) const { return tilecfg[tile_num].rows; }
unsigned tile_bytes_per_row(unsigned tile_num) const { return tilecfg[tile_num].bytes_per_row; }
bool is_tile_used(unsigned tile_num) const { return tile_use_tracker & (1 << tile_num); }
void set_tile_used(unsigned tile_num) { tile_use_tracker |= (1 << tile_num); }
void clear_tile_used(unsigned tile_num) { tile_use_tracker &= ~(1 << tile_num); }
void restart() { start_row = 0; }
struct TILE {
#define BX_TILE_MAX_ROWS (16)
bx_zmm_reg_t row[BX_TILE_MAX_ROWS];
TILE() { clear(); }
// clear upper part of a row (clears dwords limit..16)
void zero_upper_row_data32(unsigned nrow, unsigned limit)
{
for (unsigned i=limit; i < 16; i++)
row[nrow].vmm32u(i) = 0;
}
// clear 0..nrows
void clear_rows(unsigned nrows) {
for (unsigned i=0; i < nrows; i++)
row[i].clear();
}
// clear nrows..MAX_ROWS
void clear_upper_rows(unsigned nrows) {
for (unsigned i=nrows; i < BX_TILE_MAX_ROWS; i++)
row[i].clear();
}
void clear() { clear_rows(BX_TILE_MAX_ROWS); }
} tile[BX_TILE_REGISTERS] BX_CPP_AlignN(64);
unsigned tile_use_tracker;
void clear_tiles() {
for (int i=0;i<BX_TILE_REGISTERS;i++) tile[i].clear();
}
void clear() {
palette_id = 0;
start_row = 0;
tile_use_tracker = 0;
clear_tiles();
clear_tilecfg();
}
};
#endif // BX_SUPPORT_AMX
#endif

View File

@ -384,6 +384,7 @@ class BX_CPU_C;
class BX_MEM_C;
class bxInstruction_c;
class bx_local_apic_c;
class AMX;
// <TAG-TYPE-EXECUTEPTR-START>
#if BX_USE_CPU_SMF
@ -1056,6 +1057,10 @@ public: // for now...
MSR *msrs[BX_MSR_MAX_INDEX];
#endif
#if BX_SUPPORT_AMX
AMX *amx;
#endif
#if BX_SUPPORT_VMX
bool in_vmx;
bool in_vmx_guest;
@ -1199,6 +1204,10 @@ public: // for now...
BX_SMF void clear_evex_ok();
BX_SMF bool get_evex_ok();
BX_SMF void set_amx_ok();
BX_SMF void clear_amx_ok();
BX_SMF bool get_amx_ok();
// for exceptions
static jmp_buf jmp_buf_env;
unsigned last_exception_type;
@ -2477,10 +2486,10 @@ public: // for now...
BX_SMF void BLENDPS_VpsWpsIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void BLENDPD_VpdWpdIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PBLENDW_VdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PEXTRB_EbdVdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PEXTRB_EbdVdqIbM(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PEXTRW_EwdVdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PEXTRW_EwdVdqIbM(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PEXTRB_EdVdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PEXTRB_MbVdqIbM(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PEXTRW_EdVdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PEXTRW_MwVdqIbM(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PEXTRD_EdVdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void PEXTRD_EdVdqIbM(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
#if BX_SUPPORT_X86_64
@ -3580,6 +3589,20 @@ public: // for now...
BX_SMF void VPSHRDVQ_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
#endif
#if BX_SUPPORT_AMX
BX_SMF void LDTILECFG(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
BX_SMF void STTILECFG(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
BX_SMF void TILELOADD_TnnnMdq(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
BX_SMF void TILESTORED_MdqTnnn(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
BX_SMF void TDPBSSD_TnnnTrmTreg(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
BX_SMF void TDPBSUD_TnnnTrmTreg(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
BX_SMF void TDPBUSD_TnnnTrmTreg(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
BX_SMF void TDPBUUD_TnnnTrmTreg(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
BX_SMF void TDPBF16PS_TnnnTrmTreg(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
BX_SMF void TILEZERO_Tnnn(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
BX_SMF void TILERELEASE(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
#endif
BX_SMF void LZCNT_GwEwR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void LZCNT_GdEdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
#if BX_SUPPORT_X86_64
@ -4085,6 +4108,9 @@ public: // for now...
BX_SMF void BxNoOpMask(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void BxNoEVEX(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
#endif
#if BX_SUPPORT_AMX
BX_SMF void BxNoAMX(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
#endif
#endif
BX_CPP_INLINE BX_SMF Bit32u BxResolve32(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
@ -4481,6 +4507,11 @@ public: // for now...
BX_SMF void avx512_write_regq_masked(bxInstruction_c *i, const BxPackedAvxRegister *op, unsigned vlen, Bit32u mask);
#endif
#if BX_SUPPORT_AMX
BX_SMF void check_tiles(bxInstruction_c *i, unsigned tile_dst, unsigned tile_src1, unsigned tile_src2);
BX_SMF bool configure_tiles(bxInstruction_c *i, const BxPackedAvxRegister &tilecfg) BX_CPP_AttrRegparmN(2);
#endif
#if BX_CPU_LEVEL >= 5
BX_SMF bool rdmsr(Bit32u index, Bit64u *val_64) BX_CPP_AttrRegparmN(2);
BX_SMF bool handle_unknown_rdmsr(Bit32u index, Bit64u *val_64) BX_CPP_AttrRegparmN(2);
@ -4821,6 +4852,18 @@ public: // for now...
BX_SMF void xrstor_uintr_state(bxInstruction_c *i, bx_address offset);
BX_SMF void xrstor_init_uintr_state(void);
#endif
#if BX_SUPPORT_AMX
BX_SMF bool xsave_tilecfg_state_xinuse(void);
BX_SMF void xsave_tilecfg_state(bxInstruction_c *i, bx_address offset);
BX_SMF void xrstor_tilecfg_state(bxInstruction_c *i, bx_address offset);
BX_SMF void xrstor_init_tilecfg_state(void);
BX_SMF bool xsave_tiledata_state_xinuse(void);
BX_SMF void xsave_tiledata_state(bxInstruction_c *i, bx_address offset);
BX_SMF void xrstor_tiledata_state(bxInstruction_c *i, bx_address offset);
BX_SMF void xrstor_init_tiledata_state(void);
#endif
#endif
#if BX_SUPPORT_CET
@ -5056,6 +5099,7 @@ BX_CPP_INLINE Bit32u BX_CPP_AttrRegparmN(1) BX_CPU_C::BxResolve32(bxInstruction_
// bit 4 - AVX_OK
// bit 5 - OPMASK_OK
// bit 6 - EVEX_OK
// bit 7 - AMX_OK
//
enum {
@ -5065,7 +5109,8 @@ enum {
BX_FETCH_MODE_SSE_OK = (1 << 3),
BX_FETCH_MODE_AVX_OK = (1 << 4),
BX_FETCH_MODE_OPMASK_OK = (1 << 5),
BX_FETCH_MODE_EVEX_OK = (1 << 6)
BX_FETCH_MODE_EVEX_OK = (1 << 6),
BX_FETCH_MODE_AMX_OK = (1 << 7)
};
BX_CPP_INLINE void BX_CPU_C::set_fpu_mmx_ok() { BX_CPU_THIS_PTR cpu_state_use_ok |= BX_FETCH_MODE_FPU_MMX_OK; }
@ -5088,6 +5133,10 @@ BX_CPP_INLINE void BX_CPU_C::set_evex_ok() { BX_CPU_THIS_PTR cpu_state_use_ok |=
BX_CPP_INLINE void BX_CPU_C::clear_evex_ok() { BX_CPU_THIS_PTR cpu_state_use_ok &= ~BX_FETCH_MODE_EVEX_OK; }
BX_CPP_INLINE bool BX_CPU_C::get_evex_ok() { return (BX_CPU_THIS_PTR cpu_state_use_ok & BX_FETCH_MODE_EVEX_OK); }
BX_CPP_INLINE void BX_CPU_C::set_amx_ok() { BX_CPU_THIS_PTR cpu_state_use_ok |= BX_FETCH_MODE_AMX_OK; }
BX_CPP_INLINE void BX_CPU_C::clear_amx_ok() { BX_CPU_THIS_PTR cpu_state_use_ok &= ~BX_FETCH_MODE_AMX_OK; }
BX_CPP_INLINE bool BX_CPU_C::get_amx_ok() { return (BX_CPU_THIS_PTR cpu_state_use_ok & BX_FETCH_MODE_AMX_OK); }
//
// updateFetchModeMask - has to be called everytime
// CS.L / CS.D_B / CR0.PE, CR0.TS or CR0.EM / CR4.OSFXSR / CR4.OSXSAVE changes

View File

@ -123,6 +123,11 @@ sapphire_rapids_t::sapphire_rapids_t(BX_CPU_C *cpu):
enable_cpu_extension(BX_ISA_AVX512_BITALG);
enable_cpu_extension(BX_ISA_AVX512_VPOPCNTDQ);
enable_cpu_extension(BX_ISA_AVX512_BF16);
#endif
#if BX_SUPPORT_AMX
enable_cpu_extension(BX_ISA_AMX);
enable_cpu_extension(BX_ISA_AMX_INT8);
enable_cpu_extension(BX_ISA_AMX_BF16);
#endif
enable_cpu_extension(BX_ISA_CLFLUSHOPT);
enable_cpu_extension(BX_ISA_CLWB);
@ -250,10 +255,19 @@ void sapphire_rapids_t::get_cpuid_leaf(Bit32u function, Bit32u subfunction, cpui
case 0x0000001A: // CPUID leaf 0x0000001A - native Model ID Enumeration leaf (for Hybrid)
case 0x0000001B: // PCONFIG Information
case 0x0000001C: // CPUID leaf 0x0000001C - Last Branch Record (Architectural LBR) leaf
#if BX_SUPPORT_AMX
case 0x0000001D: // AMX
get_std_cpuid_amx_palette_info_leaf(subfunction, leaf);
return;
case 0x0000001E: // AMX: TMUL Information Main leaf
get_std_cpuid_amx_tmul_leaf(subfunction, leaf);
return;
#else
case 0x0000001D: // AMX
case 0x0000001E: // AMX: TMUL Information Main leaf
get_reserved_leaf(leaf);
return;
#endif
case 0x0000001F: // V2 Extended Topology Enumberation leaf
get_reserved_leaf(leaf); // until figured it out
return;

View File

@ -2,7 +2,7 @@
// $Id$
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2014-2023 Stanislav Shwartsman
// Copyright (c) 2014-2024 Stanislav Shwartsman
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
//
// This library is free software; you can redistribute it and/or
@ -235,13 +235,69 @@ void bx_cpuid_t::get_std_cpuid_xsave_leaf(Bit32u subfunction, cpuid_function_t *
if (support_mask & (1 << subfunction)) {
leaf->eax = xsave_restore[subfunction].len;
leaf->ebx = xsave_restore[subfunction].offset;
leaf->ecx = (cpu->ia32_xss_suppmask & (1 << subfunction)) != 0; // managed through IA32_XSS register
// ECX[0] - set if this component managed through IA32_XSS register
// ECX[1] - set to indicate this component must be aligned to 64-byte
// ECX[2] - XFD support for this component
leaf->ecx = (cpu->ia32_xss_suppmask & (1 << subfunction)) != 0;
leaf->edx = 0;
}
}
}
#endif
#if BX_SUPPORT_AMX
void bx_cpuid_t::get_std_cpuid_amx_palette_info_leaf(Bit32u subfunction, cpuid_function_t *leaf) const
{
leaf->eax = 0;
leaf->ebx = 0;
leaf->ecx = 0;
leaf->edx = 0;
if (!is_cpu_extension_supported(BX_ISA_AMX))
return;
if (subfunction == 0) {
leaf->eax = 1; // max palette_id
leaf->ebx = 0;
leaf->ecx = 0;
leaf->edx = 0;
return;
}
// information about palette #1
if (subfunction == 1) {
// EAX[15:00] : Palette #1 total tile bytes = 8192
// EAX[31:16] : Palette #1 bytes per tile = 1024
leaf->eax = 8192 | (1024<<16);
// EBX[15:00] : Palette #1 bytes_per_row = 64
// EBX[31:16] : Palette #1 number of tiles = 8
leaf->ebx = 64 | (8<<16);
// ECX[15:00] : Palette #1 max_rows = 16
// ECX[31:16] : Reserved
leaf->ecx = 16;
// EdX[31:00] : Reserved
leaf->edx = 0;
return;
}
}
void bx_cpuid_t::get_std_cpuid_amx_tmul_leaf(Bit32u subfunction, cpuid_function_t *leaf) const
{
leaf->eax = 0;
leaf->ebx = 0;
leaf->ecx = 0;
leaf->edx = 0;
if (!is_cpu_extension_supported(BX_ISA_AMX))
return;
// EBX[07:00] = 16 TMUL_MAX_K (rows or columns)
// EBX[23:08] = 64 TMUL_MAX_N (column bytes)
// EBX[31:24] reserved
leaf->ebx = 16 | (64<<8);
}
#endif
void bx_cpuid_t::get_leaf_0(unsigned max_leaf, const char *vendor_string, cpuid_function_t *leaf, unsigned limited_max_leaf) const
{
// EAX: highest function understood by CPUID
@ -1023,10 +1079,28 @@ Bit32u bx_cpuid_t::get_std_cpuid_leaf_7_edx(Bit32u extra) const
#endif
// [21:21] reserved
// [22:22] AMX BF16 support
#if BX_SUPPORT_AMX
if (is_cpu_extension_supported(BX_ISA_AMX)) {
if (is_cpu_extension_supported(BX_ISA_AMX_BF16))
edx |= BX_CPUID_STD7_SUBLEAF0_EDX_AMX_BF16;
}
#endif
// [23:23] AVX512_FP16 instructions support
#if BX_SUPPORT_AMX
// [24:24] AMX TILE architecture support
if (is_cpu_extension_supported(BX_ISA_AMX)) {
edx |= BX_CPUID_STD7_SUBLEAF0_EDX_AMX_TILE;
// [25:25] AMX INT8 support
if (is_cpu_extension_supported(BX_ISA_AMX_INT8))
edx |= BX_CPUID_STD7_SUBLEAF0_EDX_AMX_INT8;
}
#endif
// * [26:26] IBRS and IBPB: Indirect branch restricted speculation (SCA)
// * [27:27] STIBP: Single Thread Indirect Branch Predictors supported (SCA)
// * [28:28] L1D_FLUSH supported (SCA)

View File

@ -2,7 +2,7 @@
// $Id$
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2010-2023 Stanislav Shwartsman
// Copyright (c) 2010-2024 Stanislav Shwartsman
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
//
// This library is free software; you can redistribute it and/or
@ -113,6 +113,11 @@ protected:
void get_std_cpuid_xsave_leaf(Bit32u subfunction, cpuid_function_t *leaf) const;
#endif
#if BX_SUPPORT_AMX
void get_std_cpuid_amx_palette_info_leaf(Bit32u subfunction, cpuid_function_t *leaf) const;
void get_std_cpuid_amx_tmul_leaf(Bit32u subfunction, cpuid_function_t *leaf) const;
#endif
Bit32u get_std_cpuid_leaf_1_ecx(Bit32u extra = 0) const;
Bit32u get_std_cpuid_leaf_1_edx_common(Bit32u extra = 0) const;
Bit32u get_std_cpuid_leaf_1_edx(Bit32u extra = 0) const;

View File

@ -1823,8 +1823,28 @@ void BX_CPU_C::xsave_xrestor_init(void)
// XCR0[15]: LBR state (not implemented)
// XCR0[16]: HWP state (not implemented)
// XCR0[17]: AMX XTILECFG state (not implemented)
// XCR0[17]: AMX XTILEDATA state (not implemented)
// XCR0[17]: AMX XTILECFG state
// XCR0[18]: AMX XTILEDATA state
#if BX_SUPPORT_AMX
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_AMX)) {
// XCR0[17]: AMX XTILECFG state
xsave_restore[xcr0_t::BX_XCR0_XTILECFG_BIT].len = XSAVE_XTILECFG_STATE_LEN;
xsave_restore[xcr0_t::BX_XCR0_XTILECFG_BIT].offset = XSAVE_XTILECFG_STATE_OFFSET;
xsave_restore[xcr0_t::BX_XCR0_XTILECFG_BIT].xstate_in_use_method = &BX_CPU_C::xsave_tilecfg_state_xinuse;
xsave_restore[xcr0_t::BX_XCR0_XTILECFG_BIT].xsave_method = &BX_CPU_C::xsave_tilecfg_state;
xsave_restore[xcr0_t::BX_XCR0_XTILECFG_BIT].xrstor_method = &BX_CPU_C::xrstor_tilecfg_state;
xsave_restore[xcr0_t::BX_XCR0_XTILECFG_BIT].xrstor_init_method = &BX_CPU_C::xrstor_init_tilecfg_state;
// XCR0[18]: AMX XTILEDATA state
xsave_restore[xcr0_t::BX_XCR0_XTILEDATA_BIT].len = XSAVE_XTILEDATA_STATE_LEN;
xsave_restore[xcr0_t::BX_XCR0_XTILEDATA_BIT].offset = XSAVE_XTILEDATA_STATE_OFFSET;
xsave_restore[xcr0_t::BX_XCR0_XTILEDATA_BIT].xstate_in_use_method = &BX_CPU_C::xsave_tiledata_state_xinuse;
xsave_restore[xcr0_t::BX_XCR0_XTILEDATA_BIT].xsave_method = &BX_CPU_C::xsave_tiledata_state;
xsave_restore[xcr0_t::BX_XCR0_XTILEDATA_BIT].xrstor_method = &BX_CPU_C::xrstor_tiledata_state;
xsave_restore[xcr0_t::BX_XCR0_XTILEDATA_BIT].xrstor_init_method = &BX_CPU_C::xrstor_init_tiledata_state;
}
#endif
}
#if BX_CPU_LEVEL >= 5
@ -1870,6 +1890,10 @@ Bit32u BX_CPU_C::get_xcr0_allow_mask(void)
#if BX_SUPPORT_PKEYS
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_PKU))
allowMask |= BX_XCR0_PKRU_MASK;
#endif
#if BX_SUPPORT_AMX
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_AMX))
allowMask |= BX_XCR0_XTILE_BITS_MASK;
#endif
return allowMask;
}

View File

@ -274,6 +274,8 @@ const unsigned XSAVE_OPMASK_STATE_OFFSET = 1088;
const unsigned XSAVE_ZMM_HI256_STATE_OFFSET = 1152;
const unsigned XSAVE_HI_ZMM_STATE_OFFSET = 1664;
const unsigned XSAVE_PKRU_STATE_OFFSET = 2688;
const unsigned XSAVE_XTILECFG_STATE_OFFSET = 2752;
const unsigned XSAVE_XTILEDATA_STATE_OFFSET = 2816;
struct xcr0_t {
Bit32u val32; // 32bit value of register
@ -296,8 +298,8 @@ struct xcr0_t {
BX_XCR0_UINTR_BIT = 14,
BX_XCR0_LBR_BIT = 15, // not implemented yet
BX_XCR0_HWP_BIT = 16, // not implemented yet
BX_XCR0_XTILECFG_BIT = 17, // not implemented yet
BX_XCR0_XTILEDATA_BIT = 18, // not implemented yet
BX_XCR0_XTILECFG_BIT = 17,
BX_XCR0_XTILEDATA_BIT = 18,
BX_XCR0_LAST // make sure it is < 32
};
@ -321,6 +323,8 @@ struct xcr0_t {
#define BX_XCR0_XTILECFG_MASK (1 << xcr0_t::BX_XCR0_XTILECFG_BIT)
#define BX_XCR0_XTILEDATA_MASK (1 << xcr0_t::BX_XCR0_XTILEDATA_BIT)
#define BX_XCR0_XTILE_BITS_MASK (BX_XCR0_XTILECFG_MASK | BX_XCR0_XTILEDATA_MASK)
IMPLEMENT_CRREG_ACCESSORS(FPU, BX_XCR0_FPU_BIT);
IMPLEMENT_CRREG_ACCESSORS(SSE, BX_XCR0_SSE_BIT);
IMPLEMENT_CRREG_ACCESSORS(YMM, BX_XCR0_YMM_BIT);

View File

@ -293,12 +293,10 @@ char *resolve_memsize(char *disbufptr, const bxInstruction_c *i, unsigned src_in
else if (src_index == BX_SRC_RM) {
switch(src_type) {
case BX_GPR8:
case BX_GPR32_MEM8: // 8-bit memory ref but 32-bit GPR
disbufptr = dis_sprintf(disbufptr, "byte ptr ");
break;
case BX_GPR16:
case BX_GPR32_MEM16: // 16-bit memory ref but 32-bit GPR
case BX_SEGREG:
disbufptr = dis_sprintf(disbufptr, "word ptr ");
break;
@ -329,6 +327,9 @@ char *resolve_memsize(char *disbufptr, const bxInstruction_c *i, unsigned src_in
disbufptr = dis_sprintf(disbufptr, "xmmword ptr ");
break;
case BX_TMM_REG:
break;
default:
break;
}
@ -401,8 +402,6 @@ char *disasm_regref(char *disbufptr, const bxInstruction_c *i, unsigned src_num,
break;
case BX_GPR32:
case BX_GPR32_MEM8: // 8-bit memory ref but 32-bit GPR
case BX_GPR32_MEM16: // 16-bit memory ref but 32-bit GPR
disbufptr = dis_sprintf(disbufptr, "%s", general_32bit_regname[srcreg]);
break;
@ -460,6 +459,10 @@ char *disasm_regref(char *disbufptr, const bxInstruction_c *i, unsigned src_num,
break;
#endif
case BX_TMM_REG:
disbufptr = dis_sprintf(disbufptr, "tmm%d", srcreg);
break;
case BX_SEGREG:
disbufptr = dis_sprintf(disbufptr, "%s", segment_name[srcreg]);
break;

View File

@ -115,6 +115,11 @@ x86_feature(BX_ISA_AVX_VNNI, "avx_vnni") /* AVX e
x86_feature(BX_ISA_AVX_VNNI_INT8, "avx_vnni_int8") /* AVX encoded VNNI-INT8 Instructions */
x86_feature(BX_ISA_AVX_VNNI_INT16, "avx_vnni_int16") /* AVX encoded VNNI-INT16 Instructions */
x86_feature(BX_ISA_AVX_NE_CONVERT, "avx_ne_convert") /* AVX-NE-CONVERT Instructions */
#if BX_SUPPORT_AMX
x86_feature(BX_ISA_AMX, "amx") /* AMX Instructions */
x86_feature(BX_ISA_AMX_INT8, "amx_int8") /* AMX-INT8 Instructions */
x86_feature(BX_ISA_AMX_BF16, "amx_bf16") /* AMX-BF16 Instructions */
#endif
#endif
x86_feature(BX_ISA_XAPIC, "xapic") /* XAPIC support */
x86_feature(BX_ISA_X2APIC, "x2apic") /* X2APIC support */

View File

@ -53,6 +53,7 @@ enum BxDecodeError {
BX_EVEX_ILLEGAL_ZERO_MASKING_WITH_KMASK_SRC_OR_DEST,
BX_EVEX_ILLEGAL_ZERO_MASKING_VSIB,
BX_EVEX_ILLEGAL_ZERO_MASKING_MEMORY_DESTINATION,
BX_AMX_ILLEGAL_TILE_REGISTER
};
//
@ -78,6 +79,7 @@ BX_CPP_INLINE Bit64u FetchQWORD(const Bit8u *iptr)
}
#endif
#define BX_PREPARE_AMX (0x400)
#define BX_PREPARE_EVEX_NO_BROADCAST (0x200 | BX_PREPARE_EVEX)
#define BX_PREPARE_EVEX_NO_SAE (0x100 | BX_PREPARE_EVEX)
#define BX_PREPARE_EVEX (0x80)
@ -95,7 +97,7 @@ struct bxIAOpcodeTable {
BxExecutePtr_tR execute2;
#endif
Bit8u src[4];
#if BX_SUPPORT_EVEX
#if BX_SUPPORT_EVEX || BX_SUPPORT_AMX
Bit16u opflags;
#else
Bit8u opflags;
@ -140,20 +142,20 @@ enum {
enum {
BX_NO_REGISTER = 0,
BX_GPR8 = 0x1,
BX_GPR32_MEM8 = 0x2, // 8-bit memory reference but 32-bit GPR
BX_GPR16 = 0x3,
BX_GPR32_MEM16 = 0x4, // 16-bit memory reference but 32-bit GPR
BX_GPR32 = 0x5,
BX_GPR64 = 0x6,
BX_FPU_REG = 0x7,
BX_MMX_REG = 0x8,
BX_MMX_HALF_REG = 0x9,
BX_VMM_REG = 0xA,
BX_KMASK_REG = 0xB,
BX_KMASK_REG_PAIR = 0xC,
BX_SEGREG = 0xD,
BX_CREG = 0xE,
BX_DREG = 0xF
BX_GPR16 = 0x2,
BX_GPR32 = 0x3,
BX_GPR64 = 0x4,
BX_FPU_REG = 0x5,
BX_MMX_REG = 0x6,
BX_MMX_HALF_REG = 0x7,
BX_VMM_REG = 0x8,
BX_KMASK_REG = 0x9,
BX_KMASK_REG_PAIR = 0xA,
BX_TMM_REG = 0xB,
BX_SEGREG = 0xC,
BX_CREG = 0xD,
BX_DREG = 0xE
// encoding 0xF is still free
};
// to be used together with BX_SRC_VECTOR_RM
@ -215,9 +217,7 @@ enum {
const Bit8u OP_NONE = BX_SRC_NONE;
const Bit8u OP_Eb = BX_FORM_SRC(BX_GPR8, BX_SRC_RM);
const Bit8u OP_Ebd = BX_FORM_SRC(BX_GPR32_MEM8, BX_SRC_RM);
const Bit8u OP_Ew = BX_FORM_SRC(BX_GPR16, BX_SRC_RM);
const Bit8u OP_Ewd = BX_FORM_SRC(BX_GPR32_MEM16, BX_SRC_RM);
const Bit8u OP_Ed = BX_FORM_SRC(BX_GPR32, BX_SRC_RM);
const Bit8u OP_Eq = BX_FORM_SRC(BX_GPR64, BX_SRC_RM);
@ -356,6 +356,10 @@ const Bit8u OP_KHq = BX_FORM_SRC(BX_KMASK_REG, BX_SRC_VVV);
const Bit8u OP_KGq2 = BX_FORM_SRC(BX_KMASK_REG_PAIR, BX_SRC_NNN);
const Bit8u OP_Trm = BX_FORM_SRC(BX_TMM_REG, BX_SRC_RM);
const Bit8u OP_Tnnn = BX_FORM_SRC(BX_TMM_REG, BX_SRC_NNN);
const Bit8u OP_Treg = BX_FORM_SRC(BX_TMM_REG, BX_SRC_VVV);
const Bit8u OP_ST0 = BX_FORM_SRC(BX_FPU_REG, BX_SRC_EAX);
const Bit8u OP_STi = BX_FORM_SRC(BX_FPU_REG, BX_SRC_RM);

View File

@ -1754,6 +1754,11 @@ BxDecodeError assign_srcs(bxInstruction_c *i, unsigned ia_opcode, bool is_64, un
if (i->isZeroMasking())
return BX_EVEX_ILLEGAL_ZERO_MASKING_WITH_KMASK_SRC_OR_DEST;
}
#endif
#if BX_SUPPORT_AMX
if (type == BX_TMM_REG) {
if (nnn >= 8) return BX_AMX_ILLEGAL_TILE_REGISTER;
}
#endif
break;
case BX_SRC_RM:
@ -1765,6 +1770,11 @@ BxDecodeError assign_srcs(bxInstruction_c *i, unsigned ia_opcode, bool is_64, un
if (i->isZeroMasking())
return BX_EVEX_ILLEGAL_ZERO_MASKING_WITH_KMASK_SRC_OR_DEST;
}
#endif
#if BX_SUPPORT_AMX
if (type == BX_TMM_REG) {
if (rm >= 8) return BX_AMX_ILLEGAL_TILE_REGISTER;
}
#endif
i->setSrcReg(n, rm);
}
@ -1798,6 +1808,11 @@ BxDecodeError assign_srcs(bxInstruction_c *i, unsigned ia_opcode, bool is_64, un
if (i->isZeroMasking())
return BX_EVEX_ILLEGAL_ZERO_MASKING_WITH_KMASK_SRC_OR_DEST;
}
#endif
#if BX_SUPPORT_AMX
if (type == BX_TMM_REG) {
if (vvv >= 8) return BX_AMX_ILLEGAL_TILE_REGISTER;
}
#endif
break;
case BX_SRC_VIB:
@ -2600,6 +2615,14 @@ int assignHandler(bxInstruction_c *i, Bit32u fetchModeMask)
return(1);
}
}
#if BX_SUPPORT_AMX
if (! (fetchModeMask & BX_FETCH_MODE_AMX_OK)) {
if (op_flags & BX_PREPARE_AMX) {
if (i->execute1 != &BX_CPU_C::BxError) i->execute1 = &BX_CPU_C::BxNoAMX;
return(1);
}
}
#endif
#endif
#endif
#endif

View File

@ -994,6 +994,21 @@ static const Bit64u BxOpcodeGroup_VEX_0F3847[] = {
last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W1, BX_IA_VPSLLVQ_VdqHdqWdq)
};
#if BX_SUPPORT_AMX
static const Bit64u BxOpcodeGroup_VEX_0F3849[] = {
form_opcode(ATTR_SSE_NO_PREFIX | ATTR_VEX_W0 | ATTR_VL128 | ATTR_NNN0 | ATTR_RRR0 | ATTR_MODC0 | ATTR_IS64, BX_IA_TILERELEASE),
form_opcode(ATTR_SSE_NO_PREFIX | ATTR_VEX_W0 | ATTR_VL128 | ATTR_MOD_MEM | ATTR_IS64, BX_IA_LDTILECFG),
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W1 | ATTR_VL128 | ATTR_MOD_MEM | ATTR_IS64, BX_IA_STTILECFG),
last_opcode(ATTR_SSE_PREFIX_F2 | ATTR_VEX_W0 | ATTR_VL128 | ATTR_RRR0 | ATTR_MODC0 | ATTR_IS64, BX_IA_TILEZERO_Tnnn)
};
static const Bit64u BxOpcodeGroup_VEX_0F384B[] = {
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W0 | ATTR_VL128 | ATTR_MOD_MEM | ATTR_IS64, BX_IA_TILELOADDT1_TnnnMdq),
form_opcode(ATTR_SSE_PREFIX_F3 | ATTR_VEX_W0 | ATTR_VL128 | ATTR_MOD_MEM | ATTR_IS64, BX_IA_TILESTORED_MdqTnnn),
last_opcode(ATTR_SSE_PREFIX_F2 | ATTR_VEX_W0 | ATTR_VL128 | ATTR_MOD_MEM | ATTR_IS64, BX_IA_TILELOADD_TnnnMdq)
};
#endif
static const Bit64u BxOpcodeGroup_VEX_0F3850[] = {
form_opcode(ATTR_SSE_NO_PREFIX | ATTR_VEX_W0, BX_IA_VPDPBUUD_VdqHdqWdq),
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W0, BX_IA_VPDPBUSD_VdqHdqWdq),
@ -1014,6 +1029,18 @@ static const Bit64u BxOpcodeGroup_VEX_0F3853[] = { last_opcode(ATTR_SSE_PREFIX_6
static const Bit64u BxOpcodeGroup_VEX_0F3858[] = { last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W0, BX_IA_VPBROADCASTD_VdqWd) };
static const Bit64u BxOpcodeGroup_VEX_0F3859[] = { last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W0, BX_IA_VPBROADCASTQ_VdqWq) };
static const Bit64u BxOpcodeGroup_VEX_0F385A[] = { last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W0 | ATTR_VL256 | ATTR_MOD_MEM, BX_IA_V256_VBROADCASTI128_VdqMdq) };
#if BX_SUPPORT_AMX
static const Bit64u BxOpcodeGroup_VEX_0F385C[] = { last_opcode(ATTR_SSE_PREFIX_F3 | ATTR_VEX_W0 | ATTR_VL128 | ATTR_MODC0 | ATTR_IS64, BX_IA_TDPBF16PS_TnnnTrmTreg) };
static const Bit64u BxOpcodeGroup_VEX_0F385E[] = {
form_opcode(ATTR_SSE_NO_PREFIX | ATTR_VEX_W0 | ATTR_VL128 | ATTR_MODC0 | ATTR_IS64, BX_IA_TDPBUUD_TnnnTrmTreg),
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W0 | ATTR_VL128 | ATTR_MODC0 | ATTR_IS64, BX_IA_TDPBUSD_TnnnTrmTreg),
form_opcode(ATTR_SSE_PREFIX_F2 | ATTR_VEX_W0 | ATTR_VL128 | ATTR_MODC0 | ATTR_IS64, BX_IA_TDPBSSD_TnnnTrmTreg),
last_opcode(ATTR_SSE_PREFIX_F3 | ATTR_VEX_W0 | ATTR_VL128 | ATTR_MODC0 | ATTR_IS64, BX_IA_TDPBSUD_TnnnTrmTreg)
};
#endif
static const Bit64u BxOpcodeGroup_VEX_0F3872[] = { last_opcode(ATTR_SSE_PREFIX_F3 | ATTR_VEX_W0, BX_IA_VCVTNEPS2BF16_Vbf16Wps) };
static const Bit64u BxOpcodeGroup_VEX_0F3878[] = { last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W0, BX_IA_VPBROADCASTB_VdqWb) };
static const Bit64u BxOpcodeGroup_VEX_0F3879[] = { last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W0, BX_IA_VPBROADCASTW_VdqWw) };
@ -1357,8 +1384,14 @@ static const Bit64u BxOpcodeGroup_VEX_0F3A0F[] = {
last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VL256, BX_IA_V256_VPALIGNR_VdqHdqWdqIb)
};
static const Bit64u BxOpcodeGroup_VEX_0F3A14[] = { last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VL128, BX_IA_V128_VPEXTRB_EbdVdqIb) };
static const Bit64u BxOpcodeGroup_VEX_0F3A15[] = { last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VL128, BX_IA_V128_VPEXTRW_EwdVdqIb) };
static const Bit64u BxOpcodeGroup_VEX_0F3A14[] = {
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VL128 | ATTR_MODC0, BX_IA_V128_VPEXTRB_EdVdqIbR),
last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VL128 | ATTR_MOD_MEM, BX_IA_V128_VPEXTRB_MbVdqIbM)
};
static const Bit64u BxOpcodeGroup_VEX_0F3A15[] = {
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VL128 | ATTR_MODC0, BX_IA_V128_VPEXTRW_EdVdqIbR),
last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VL128 | ATTR_MOD_MEM, BX_IA_V128_VPEXTRW_MwVdqIbM)
};
static const Bit64u BxOpcodeGroup_VEX_0F3A16[] = {
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VL128 | ATTR_VEX_W0, BX_IA_V128_VPEXTRD_EdVdqIb),
@ -1884,9 +1917,17 @@ static const Bit64u *BxOpcodeTableVEX[256*3] = {
/* 46 */ ( BxOpcodeGroup_VEX_0F3846 ),
/* 47 */ ( BxOpcodeGroup_VEX_0F3847 ),
/* 48 */ ( BxOpcodeGroup_ERR ),
#if BX_SUPPORT_AMX
/* 49 */ ( BxOpcodeGroup_VEX_0F3849 ),
#else
/* 49 */ ( BxOpcodeGroup_ERR ),
#endif
/* 4A */ ( BxOpcodeGroup_ERR ),
#if BX_SUPPORT_AMX
/* 4B */ ( BxOpcodeGroup_VEX_0F384B ),
#else
/* 4B */ ( BxOpcodeGroup_ERR ),
#endif
/* 4C */ ( BxOpcodeGroup_ERR ),
/* 4D */ ( BxOpcodeGroup_ERR ),
/* 4E */ ( BxOpcodeGroup_ERR ),
@ -1903,9 +1944,17 @@ static const Bit64u *BxOpcodeTableVEX[256*3] = {
/* 59 */ ( BxOpcodeGroup_VEX_0F3859 ),
/* 5A */ ( BxOpcodeGroup_VEX_0F385A ),
/* 5B */ ( BxOpcodeGroup_ERR ),
#if BX_SUPPORT_AMX
/* 5C */ ( BxOpcodeGroup_VEX_0F385C ),
#else
/* 5C */ ( BxOpcodeGroup_ERR ),
#endif
/* 5D */ ( BxOpcodeGroup_ERR ),
#if BX_SUPPORT_AMX
/* 5E */ ( BxOpcodeGroup_VEX_0F385E ),
#else
/* 5E */ ( BxOpcodeGroup_ERR ),
#endif
/* 5F */ ( BxOpcodeGroup_ERR ),
/* 60 */ ( BxOpcodeGroup_ERR ),
/* 61 */ ( BxOpcodeGroup_ERR ),

View File

@ -1610,8 +1610,14 @@ static const Bit64u BxOpcodeGroup_EVEX_0F3A0F[] = {
last_opcode(ATTR_SSE_PREFIX_66, BX_IA_V512_VPALIGNR_VdqHdqWdqIb_Kmask)
};
static const Bit64u BxOpcodeGroup_EVEX_0F3A14[] = { last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_L0 | ATTR_MASK_K0, BX_IA_V512_VPEXTRB_EbdVdqIb) };
static const Bit64u BxOpcodeGroup_EVEX_0F3A15[] = { last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_L0 | ATTR_MASK_K0, BX_IA_V512_VPEXTRW_EwdVdqIb) };
static const Bit64u BxOpcodeGroup_EVEX_0F3A14[] = {
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_L0 | ATTR_MASK_K0 | ATTR_MODC0, BX_IA_V512_VPEXTRB_EdVdqIbR),
last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_L0 | ATTR_MASK_K0 | ATTR_MOD_MEM, BX_IA_V512_VPEXTRB_MbVdqIbM)
};
static const Bit64u BxOpcodeGroup_EVEX_0F3A15[] = {
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_L0 | ATTR_MASK_K0 | ATTR_MODC0, BX_IA_V512_VPEXTRW_EdVdqIbR),
last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_L0 | ATTR_MASK_K0 | ATTR_MOD_MEM, BX_IA_V512_VPEXTRW_MwVdqIbM)
};
static const Bit64u BxOpcodeGroup_EVEX_0F3A16[] = {
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_L0 | ATTR_VEX_W0 | ATTR_MASK_K0, BX_IA_V512_VPEXTRD_EdVdqIb),

View File

@ -42,8 +42,14 @@ static const Bit64u BxOpcodeTable0F3A0F[] = {
last_opcode(ATTR_SSE_PREFIX_66, BX_IA_PALIGNR_VdqWdqIb),
};
static const Bit64u BxOpcodeTable0F3A14[] = { last_opcode(ATTR_SSE_PREFIX_66, BX_IA_PEXTRB_EbdVdqIb) };
static const Bit64u BxOpcodeTable0F3A15[] = { last_opcode(ATTR_SSE_PREFIX_66, BX_IA_PEXTRW_EwdVdqIb) };
static const Bit64u BxOpcodeTable0F3A14[] = {
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_MODC0, BX_IA_PEXTRB_EdVdqIbR),
last_opcode(ATTR_SSE_PREFIX_66 | ATTR_MOD_MEM, BX_IA_PEXTRB_MbVdqIbM)
};
static const Bit64u BxOpcodeTable0F3A15[] = {
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_MODC0, BX_IA_PEXTRW_EdVdqIbR),
last_opcode(ATTR_SSE_PREFIX_66 | ATTR_MOD_MEM, BX_IA_PEXTRW_MwVdqIbM)
};
// opcode 0F 3A 16
static const Bit64u BxOpcodeTable0F3A16[] = {

View File

@ -1347,8 +1347,10 @@ bx_define_opcode(BX_IA_ROUNDSD_VsdWsdIb, "roundsd", "roundsd", &BX_CPU_C::LOAD_W
bx_define_opcode(BX_IA_BLENDPS_VpsWpsIb, "blendps", "blendps", &BX_CPU_C::LOAD_Wdq, &BX_CPU_C::BLENDPS_VpsWpsIbR, BX_ISA_SSE4_1, OP_Vps, OP_Wps, OP_Ib, OP_NONE, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_BLENDPD_VpdWpdIb, "blendpd", "blendpd", &BX_CPU_C::LOAD_Wdq, &BX_CPU_C::BLENDPD_VpdWpdIbR, BX_ISA_SSE4_1, OP_Vpd, OP_Wpd, OP_Ib, OP_NONE, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PBLENDW_VdqWdqIb, "pblendw", "pblendw", &BX_CPU_C::LOAD_Wdq, &BX_CPU_C::PBLENDW_VdqWdqIbR, BX_ISA_SSE4_1, OP_Vdq, OP_Wdq, OP_Ib, OP_NONE, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PEXTRB_EbdVdqIb, "pextrb", "pextrb", &BX_CPU_C::PEXTRB_EbdVdqIbM, &BX_CPU_C::PEXTRB_EbdVdqIbR, BX_ISA_SSE4_1, OP_Ebd, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PEXTRW_EwdVdqIb, "pextrw", "pextrw", &BX_CPU_C::PEXTRW_EwdVdqIbM, &BX_CPU_C::PEXTRW_EwdVdqIbR, BX_ISA_SSE4_1, OP_Ewd, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PEXTRB_EdVdqIbR, "pextrb", "pextrb", NULL, &BX_CPU_C::PEXTRB_EdVdqIbR, BX_ISA_SSE4_1, OP_Ed, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PEXTRB_MbVdqIbM, "pextrb", "pextrb", &BX_CPU_C::PEXTRB_MbVdqIbM, NULL, BX_ISA_SSE4_1, OP_Mb, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PEXTRW_EdVdqIbR, "pextrw", "pextrw", NULL, &BX_CPU_C::PEXTRW_EdVdqIbR, BX_ISA_SSE4_1, OP_Ed, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PEXTRW_MwVdqIbM, "pextrw", "pextrw", &BX_CPU_C::PEXTRW_MwVdqIbM, NULL, BX_ISA_SSE4_1, OP_Mw, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_SSE)
bx_define_opcode(BX_IA_PEXTRD_EdVdqIb, "pextrd", "pextrd", &BX_CPU_C::PEXTRD_EdVdqIbM, &BX_CPU_C::PEXTRD_EdVdqIbR, BX_ISA_SSE4_1, OP_Ed, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_SSE)
#if BX_SUPPORT_X86_64
bx_define_opcode(BX_IA_PEXTRQ_EqVdqIb, "pextrq", "pextrq", &BX_CPU_C::PEXTRQ_EqVdqIbM, &BX_CPU_C::PEXTRQ_EqVdqIbR, BX_ISA_SSE4_1, OP_Eq, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_SSE)
@ -2277,8 +2279,10 @@ bx_define_opcode(BX_IA_V128_VMOVQ_EqVq, "vmovq", "vmovq", &BX_CPU_C::MOVSD_WsdVs
bx_define_opcode(BX_IA_V128_VPINSRB_VdqEbIb, "vpinsrb", "vpinsrb", &BX_CPU_C::VPINSRB_VdqHdqEbIbM, &BX_CPU_C::VPINSRB_VdqHdqEbIbR, BX_ISA_AVX, OP_Vdq, OP_Hdq, OP_Ew, OP_Ib, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_V128_VPINSRW_VdqEwIb, "vpinsrw", "vpinsrw", &BX_CPU_C::VPINSRW_VdqHdqEwIbM, &BX_CPU_C::VPINSRW_VdqHdqEwIbR, BX_ISA_AVX, OP_Vdq, OP_Hdq, OP_Ew, OP_Ib, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_V128_VPEXTRW_GdUdqIb, "vpextrw", "vpextrw", &BX_CPU_C::BxError, &BX_CPU_C::PEXTRW_GdUdqIb, BX_ISA_AVX, OP_Gd, OP_Wdq, OP_Ib, OP_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_V128_VPEXTRB_EbdVdqIb, "vpextrb", "vpextrb", &BX_CPU_C::PEXTRB_EbdVdqIbM, &BX_CPU_C::PEXTRB_EbdVdqIbR, BX_ISA_AVX, OP_Ebd, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_V128_VPEXTRW_EwdVdqIb, "vpextrw", "vpextrw", &BX_CPU_C::PEXTRW_EwdVdqIbM, &BX_CPU_C::PEXTRW_EwdVdqIbR, BX_ISA_AVX, OP_Ewd, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_V128_VPEXTRB_EdVdqIbR, "vpextrb", "vpextrb", NULL, &BX_CPU_C::PEXTRB_EdVdqIbR, BX_ISA_AVX, OP_Ed, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_V128_VPEXTRB_MbVdqIbM, "vpextrb", "vpextrb", &BX_CPU_C::PEXTRB_MbVdqIbM, NULL, BX_ISA_AVX, OP_Mb, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_V128_VPEXTRW_EdVdqIbR, "vpextrw", "vpextrw", NULL, &BX_CPU_C::PEXTRW_EdVdqIbR, BX_ISA_AVX, OP_Ed, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_V128_VPEXTRW_MwVdqIbM, "vpextrw", "vpextrw", &BX_CPU_C::PEXTRW_MwVdqIbM, NULL, BX_ISA_AVX, OP_Mw, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_V128_VPINSRD_VdqEdIb, "vpinsrd", "vpinsrd", &BX_CPU_C::VPINSRD_VdqHdqEdIbM, &BX_CPU_C::VPINSRD_VdqHdqEdIbR, BX_ISA_AVX, OP_Vdq, OP_Hdq, OP_Ed, OP_Ib, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_V128_VPINSRQ_VdqEqIb, "vpinsrq", "vpinsrq", &BX_CPU_C::VPINSRQ_VdqHdqEqIbM, &BX_CPU_C::VPINSRQ_VdqHdqEqIbR, BX_ISA_AVX, OP_Vdq, OP_Hdq, OP_Eq, OP_Ib, BX_PREPARE_AVX)
@ -2756,6 +2760,21 @@ bx_define_opcode(BX_IA_MOVDIR64B_GdMdq, "movdir64b", "movdir64b", &BX_CPU_C::MOV
bx_define_opcode(BX_IA_MOVDIR64B_GqMdq, "movdir64b", "movdir64b", &BX_CPU_C::MOVDIR64B, NULL, BX_ISA_MOVDIR64B, OP_Gq, OP_M, OP_NONE, OP_NONE, 0)
#endif
#if BX_SUPPORT_AMX
bx_define_opcode(BX_IA_LDTILECFG, "ldtilecfg", "ldtilecfg", &BX_CPU_C::LDTILECFG, NULL, BX_ISA_AMX, OP_M, OP_NONE, OP_NONE, OP_NONE, BX_PREPARE_AMX)
bx_define_opcode(BX_IA_STTILECFG, "sttilecfg", "sttilecfg", &BX_CPU_C::STTILECFG, NULL, BX_ISA_AMX, OP_M, OP_NONE, OP_NONE, OP_NONE, BX_PREPARE_AMX)
bx_define_opcode(BX_IA_TILELOADD_TnnnMdq, "tileloadd", "tileloadd", &BX_CPU_C::TILELOADD_TnnnMdq, NULL, BX_ISA_AMX, OP_Tnnn, OP_M, OP_NONE, OP_NONE, BX_PREPARE_AMX)
bx_define_opcode(BX_IA_TILELOADDT1_TnnnMdq, "tileloaddt1", "tileloaddt1", &BX_CPU_C::TILELOADD_TnnnMdq, NULL, BX_ISA_AMX, OP_Tnnn, OP_M, OP_NONE, OP_NONE, BX_PREPARE_AMX)
bx_define_opcode(BX_IA_TILESTORED_MdqTnnn, "tilestored", "tilestored", &BX_CPU_C::TILESTORED_MdqTnnn, NULL, BX_ISA_AMX, OP_M, OP_Tnnn, OP_NONE, OP_NONE, BX_PREPARE_AMX)
bx_define_opcode(BX_IA_TILERELEASE, "tilerelease", "tilerelease", NULL, &BX_CPU_C::TILERELEASE, BX_ISA_AMX, OP_NONE, OP_NONE, OP_NONE, OP_NONE, BX_PREPARE_AMX)
bx_define_opcode(BX_IA_TILEZERO_Tnnn, "tilezero", "tilezero", NULL, &BX_CPU_C::TILEZERO_Tnnn, BX_ISA_AMX, OP_Tnnn, OP_NONE, OP_NONE, OP_NONE, BX_PREPARE_AMX)
bx_define_opcode(BX_IA_TDPBSSD_TnnnTrmTreg, "tdpbssd", "tdpbssd", NULL, &BX_CPU_C::TDPBSSD_TnnnTrmTreg, BX_ISA_AMX_INT8, OP_Tnnn, OP_Trm, OP_Treg, OP_NONE, BX_PREPARE_AMX)
bx_define_opcode(BX_IA_TDPBSUD_TnnnTrmTreg, "tdpbsud", "tdpbsud", NULL, &BX_CPU_C::TDPBSUD_TnnnTrmTreg, BX_ISA_AMX_INT8, OP_Tnnn, OP_Trm, OP_Treg, OP_NONE, BX_PREPARE_AMX)
bx_define_opcode(BX_IA_TDPBUSD_TnnnTrmTreg, "tdpbusd", "tdpbusd", NULL, &BX_CPU_C::TDPBUSD_TnnnTrmTreg, BX_ISA_AMX_INT8, OP_Tnnn, OP_Trm, OP_Treg, OP_NONE, BX_PREPARE_AMX)
bx_define_opcode(BX_IA_TDPBUUD_TnnnTrmTreg, "tdpbuud", "tdpbuud", NULL, &BX_CPU_C::TDPBUUD_TnnnTrmTreg, BX_ISA_AMX_INT8, OP_Tnnn, OP_Trm, OP_Treg, OP_NONE, BX_PREPARE_AMX)
bx_define_opcode(BX_IA_TDPBF16PS_TnnnTrmTreg, "tdpbf16ps", "tdpbf16ps", NULL, &BX_CPU_C::TDPBF16PS_TnnnTrmTreg, BX_ISA_AMX_BF16, OP_Tnnn, OP_Trm, OP_Treg, OP_NONE, BX_PREPARE_AMX)
#endif
#if BX_SUPPORT_AVX
bx_define_opcode(BX_IA_KADDW_KGwKHwKEw, "kaddw", "kaddw", &BX_CPU_C::BxError, &BX_CPU_C::KADDW_KGwKHwKEwR, BX_ISA_AVX512_DQ, OP_KGw, OP_KHw, OP_KEw, OP_NONE, BX_PREPARE_OPMASK)
bx_define_opcode(BX_IA_KADDQ_KGqKHqKEq, "kaddq", "kaddq", &BX_CPU_C::BxError, &BX_CPU_C::KADDQ_KGqKHqKEqR, BX_ISA_AVX512_BW, OP_KGq, OP_KHq, OP_KEq, OP_NONE, BX_PREPARE_OPMASK)
@ -3973,8 +3992,10 @@ bx_define_opcode(BX_IA_V512_VCVTTSD2USI_GqWsd, "vcvttsd2usi", "vcvttsd2usiq", &B
bx_define_opcode(BX_IA_V512_VPINSRB_VdqEbIb, "vpinsrb", "vpinsrb", &BX_CPU_C::VPINSRB_VdqHdqEbIbM, &BX_CPU_C::VPINSRB_VdqHdqEbIbR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_Ew, OP_Ib, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPINSRW_VdqEwIb, "vpinsrw", "vpinsrw", &BX_CPU_C::VPINSRW_VdqHdqEwIbM, &BX_CPU_C::VPINSRW_VdqHdqEwIbR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_Ew, OP_Ib, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPEXTRW_GdUdqIb, "vpextrw", "vpextrw", &BX_CPU_C::BxError, &BX_CPU_C::PEXTRW_GdUdqIb, BX_ISA_AVX512_BW, OP_Gd, OP_Wdq, OP_Ib, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPEXTRB_EbdVdqIb, "vpextrb", "vpextrb", &BX_CPU_C::PEXTRB_EbdVdqIbM, &BX_CPU_C::PEXTRB_EbdVdqIbR, BX_ISA_AVX512_BW, OP_Ebd, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPEXTRW_EwdVdqIb, "vpextrw", "vpextrw", &BX_CPU_C::PEXTRW_EwdVdqIbM, &BX_CPU_C::PEXTRW_EwdVdqIbR, BX_ISA_AVX512_BW, OP_Ewd, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPEXTRB_EdVdqIbR, "vpextrb", "vpextrb", NULL, &BX_CPU_C::PEXTRB_EdVdqIbR, BX_ISA_AVX512_BW, OP_Ed, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPEXTRB_MbVdqIbM, "vpextrb", "vpextrb", &BX_CPU_C::PEXTRB_MbVdqIbM, NULL, BX_ISA_AVX512_BW, OP_Mb, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPEXTRW_EdVdqIbR, "vpextrw", "vpextrw", NULL, &BX_CPU_C::PEXTRW_EdVdqIbR, BX_ISA_AVX512_BW, OP_Ed, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPEXTRW_MwVdqIbM, "vpextrw", "vpextrw", &BX_CPU_C::PEXTRW_MwVdqIbM, NULL, BX_ISA_AVX512_BW, OP_Mw, OP_Vdq, OP_Ib, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPINSRD_VdqEdIb, "vpinsrd", "vpinsrd", &BX_CPU_C::VPINSRD_VdqHdqEdIbM, &BX_CPU_C::VPINSRD_VdqHdqEdIbR, BX_ISA_AVX512_DQ, OP_Vdq, OP_Hdq, OP_Ed, OP_Ib, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPINSRQ_VdqEqIb, "vpinsrq", "vpinsrq", &BX_CPU_C::VPINSRQ_VdqHdqEqIbM, &BX_CPU_C::VPINSRQ_VdqHdqEqIbR, BX_ISA_AVX512_DQ, OP_Vdq, OP_Hdq, OP_Eq, OP_Ib, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)

View File

@ -33,6 +33,10 @@
#include "apic.h"
#endif
#if BX_SUPPORT_AMX
#include "avx/amx.h"
#endif
#include <stdlib.h>
BX_CPU_C::BX_CPU_C(unsigned id): bx_cpuid(id)
@ -131,6 +135,13 @@ void BX_CPU_C::initialize(void)
xsave_xrestor_init();
#endif
#if BX_SUPPORT_AMX
amx = NULL;
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_AMX)) {
amx = new AMX;
}
#endif
#if BX_CONFIGURE_MSRS
for (unsigned n=0; n < BX_MSR_MAX_INDEX; n++) {
BX_CPU_THIS_PTR msrs[n] = 0;
@ -505,6 +516,27 @@ void BX_CPU_C::register_state(void)
}
}
#endif
#if BX_SUPPORT_AMX
if (BX_CPUID_SUPPORT_ISA_EXTENSION(BX_ISA_AMX)) {
bx_list_c *amx_list = new bx_list_c(cpu, "AMX");
BXRS_DEC_PARAM_FIELD(amx_list, palette, amx->palette_id);
BXRS_DEC_PARAM_FIELD(amx_list, start_row, amx->start_row);
BXRS_HEX_PARAM_FIELD(amx_list, tile_use_tracker, amx->tile_use_tracker);
for (n=0; n<8; n++) {
sprintf(name, "tile%d_rows", n);
new bx_shadow_num_c(amx_list, name, &(amx->tilecfg[n].rows), BASE_DEC);
sprintf(name, "tile%d_colsb", n);
new bx_shadow_num_c(amx_list, name, &(amx->tilecfg[n].bytes_per_row), BASE_DEC);
for(unsigned row=0;row < 16;row++) {
for(unsigned j=0;j < BX_VLMAX*2;j++) {
sprintf(name, "tile%d_row%d_%d", n, row, j);
new bx_shadow_num_c(amx_list, name, &(amx->tile[n].row[row].vmm64u(j)), BASE_HEX);
}
}
}
}
#endif
#endif // BX_CPU_LEVEL >= 6
#if BX_SUPPORT_MONITOR_MWAIT
@ -709,6 +741,10 @@ BX_CPU_C::~BX_CPU_C()
delete lapic;
#endif
#if BX_SUPPORT_AMX
delete amx;
#endif
#if InstrumentCPU
delete stats;
#endif

View File

@ -524,6 +524,14 @@ void BX_CPU_C::handleAvxModeChange(void)
}
}
#if BX_SUPPORT_AMX
if (! long64_mode() || ! BX_CPU_THIS_PTR cr4.get_OSXSAVE() ||
(~BX_CPU_THIS_PTR xcr0.val32 & (BX_XCR0_XTILECFG_MASK | BX_XCR0_XTILEDATA_MASK)) != 0)
clear_amx_ok();
else
set_amx_ok();
#endif
updateFetchModeMask(); /* AVX_OK changed */
}
@ -578,6 +586,21 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::BxNoEVEX(bxInstruction_c *i)
}
#endif
#if BX_SUPPORT_AMX
void BX_CPP_AttrRegparmN(1) BX_CPU_C::BxNoAMX(bxInstruction_c *i)
{
if (! long64_mode() || ! BX_CPU_THIS_PTR cr4.get_OSXSAVE())
exception(BX_UD_EXCEPTION, 0);
if (~BX_CPU_THIS_PTR xcr0.val32 & (BX_XCR0_XTILECFG_MASK | BX_XCR0_XTILEDATA_MASK))
exception(BX_UD_EXCEPTION, 0);
BX_ASSERT(0);
BX_NEXT_TRACE(i); // keep compiler happy
}
#endif
#endif
void BX_CPU_C::handleCpuContextChange(void)

View File

@ -122,7 +122,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PBLENDW_VdqWdqIbR(bxInstruction_c *i)
BX_NEXT_INSTR(i);
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRB_EbdVdqIbR(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRB_EdVdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
Bit8u result = op.xmmubyte(i->Ib() & 0xF);
@ -131,7 +131,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRB_EbdVdqIbR(bxInstruction_c *i)
BX_NEXT_INSTR(i);
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRB_EbdVdqIbM(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRB_MbVdqIbM(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
Bit8u result = op.xmmubyte(i->Ib() & 0xF);
@ -142,7 +142,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRB_EbdVdqIbM(bxInstruction_c *i)
BX_NEXT_INSTR(i);
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRW_EwdVdqIbR(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRW_EdVdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
Bit16u result = op.xmm16u(i->Ib() & 7);
@ -151,7 +151,7 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRW_EwdVdqIbR(bxInstruction_c *i)
BX_NEXT_INSTR(i);
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRW_EwdVdqIbM(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::PEXTRW_MwVdqIbM(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
Bit16u result = op.xmm16u(i->Ib() & 7);

View File

@ -2,7 +2,7 @@
// $Id$
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2008-2019 Stanislav Shwartsman
// Copyright (c) 2008-2024 Stanislav Shwartsman
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
//
// This library is free software; you can redistribute it and/or
@ -1043,6 +1043,87 @@ bool BX_CPU_C::xsave_uintr_state_xinuse(void)
}
#endif
#if BX_SUPPORT_AMX
#include "avx/amx.h"
// TILECFG state management //
void BX_CPU_C::xsave_tilecfg_state(bxInstruction_c *i, bx_address offset)
{
BxPackedAvxRegister tilecfg;
tilecfg.clear();
if (BX_CPU_THIS_PTR amx->tiles_configured()) {
tilecfg.vmmubyte(0) = BX_CPU_THIS_PTR amx->palette_id;
tilecfg.vmmubyte(1) = BX_CPU_THIS_PTR amx->start_row;
for (unsigned n=0; n < 8; n++) {
tilecfg.vmm16u(8+n) = BX_CPU_THIS_PTR amx->tilecfg[n].rows;
tilecfg.vmmubyte(48+n) = BX_CPU_THIS_PTR amx->tilecfg[n].bytes_per_row;
}
}
write_virtual_zmmword(i->seg(), offset, &tilecfg);
}
void BX_CPU_C::xrstor_tilecfg_state(bxInstruction_c *i, bx_address offset)
{
BxPackedAvxRegister tilecfg;
read_virtual_zmmword(i->seg(), offset, &tilecfg);
if (!configure_tiles(i, tilecfg))
BX_CPU_THIS_PTR amx->clear();
}
void BX_CPU_C::xrstor_init_tilecfg_state(void)
{
BX_CPU_THIS_PTR amx->clear();
}
bool BX_CPU_C::xsave_tilecfg_state_xinuse(void)
{
return BX_CPU_THIS_PTR amx->tiles_configured();
}
// TILEDATA state management //
void BX_CPU_C::xsave_tiledata_state(bxInstruction_c *i, bx_address offset)
{
bx_address asize_mask = i->asize_mask();
for (unsigned tile=0; tile < BX_TILE_REGISTERS; tile++) {
for (unsigned row=0; row < BX_TILE_REGISTERS; row++) {
write_virtual_zmmword(i->seg(), (offset+(tile*BX_TILE_MAX_ROWS+row)*64) & asize_mask, &(BX_CPU_THIS_PTR amx->tile[tile].row[row]));
}
}
}
void BX_CPU_C::xrstor_tiledata_state(bxInstruction_c *i, bx_address offset)
{
bx_address asize_mask = i->asize_mask();
for (unsigned tile=0; tile < BX_TILE_REGISTERS; tile++) {
for (unsigned row=0; row < BX_TILE_REGISTERS; row++) {
read_virtual_zmmword(i->seg(), (offset+(tile*BX_TILE_MAX_ROWS+row)*64) & asize_mask, &(BX_CPU_THIS_PTR amx->tile[tile].row[row]));
}
BX_CPU_THIS_PTR amx->set_tile_used(tile);
}
}
void BX_CPU_C::xrstor_init_tiledata_state(void)
{
for (unsigned tile=0; tile < BX_TILE_REGISTERS; tile++) {
BX_CPU_THIS_PTR amx->tile[tile].clear();
BX_CPU_THIS_PTR amx->clear_tile_used(tile);
}
}
bool BX_CPU_C::xsave_tiledata_state_xinuse(void)
{
return (BX_CPU_THIS_PTR amx->tile_use_tracker == 0); // all tiles are zero
}
#endif
Bit32u BX_CPU_C::get_xinuse_vector(Bit32u requested_feature_bitmap)
{
Bit32u xinuse = 0;
@ -1166,6 +1247,15 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::XSETBV(bxInstruction_c *i)
}
#endif
#if BX_SUPPORT_AMX
if (EAX & BX_XCR0_XTILE_BITS_MASK) {
if ((EAX & BX_XCR0_XTILE_BITS_MASK) != BX_XCR0_XTILE_BITS_MASK) {
BX_ERROR(("XSETBV: Illegal attempt to enable AMX state"));
exception(BX_GP_EXCEPTION, 0);
}
}
#endif
BX_CPU_THIS_PTR xcr0.set32(EAX);
#if BX_SUPPORT_AVX

View File

@ -7223,6 +7223,7 @@ From here, you may use the following commands:
mmx List of all MMX registers and their contents
sse|xmm List of all SSE registers and their contents
ymm|zmm List of all AVX registers and their contents
amx|tile n Show AMX state and TILE register contents
sreg Show segment registers and their contents
dreg Show debug registers and their contents
creg Show control registers and their contents