tests/tcg: i386: add SSE tests

Tests for correct operation of most x86-64 SSE instructions.
It should cover all combinations of overlapping register and memory
operands on a set of random-ish data.

Results are bit-identical to an Intel i5-8500, with the exception of
the RCPSS and RSQRT approximations where the real CPU gives less accurate
results (the Intel spec allows relative errors up to 1.5 * 2^-12)

Signed-off-by: Paul Brook <paul@nowt.org>
Acked-by: Alex Bennée <alex.bennee@linaro.org>
Message-Id: <20220424220204.2493824-42-paul@nowt.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Paul Brook 2022-04-24 23:02:03 +01:00 committed by Paolo Bonzini
parent bf30ad8cef
commit 91117bc546
7 changed files with 5359 additions and 3 deletions

View File

@ -188,4 +188,4 @@ gdb-%: %
run: $(RUN_TESTS) run: $(RUN_TESTS)
clean: clean:
rm -f $(TESTS) *.o rm -f $(TESTS) *.o $(CLEANFILES)

View File

@ -7,8 +7,8 @@ VPATH += $(I386_SRC)
I386_SRCS=$(notdir $(wildcard $(I386_SRC)/*.c)) I386_SRCS=$(notdir $(wildcard $(I386_SRC)/*.c))
ALL_X86_TESTS=$(I386_SRCS:.c=) ALL_X86_TESTS=$(I386_SRCS:.c=)
SKIP_I386_TESTS=test-i386-ssse3 SKIP_I386_TESTS=test-i386-ssse3 test-avx
X86_64_TESTS:=$(filter test-i386-bmi2 test-i386-ssse3, $(ALL_X86_TESTS)) X86_64_TESTS:=$(filter test-i386-bmi2 test-i386-ssse3 test-avx, $(ALL_X86_TESTS))
test-i386-sse-exceptions: CFLAGS += -msse4.1 -mfpmath=sse test-i386-sse-exceptions: CFLAGS += -msse4.1 -mfpmath=sse
run-test-i386-sse-exceptions: QEMU_OPTS += -cpu max run-test-i386-sse-exceptions: QEMU_OPTS += -cpu max
@ -81,3 +81,10 @@ run-sha512-sse: QEMU_OPTS+=-cpu max
run-plugin-sha512-sse-with-%: QEMU_OPTS+=-cpu max run-plugin-sha512-sse-with-%: QEMU_OPTS+=-cpu max
TESTS+=sha512-sse TESTS+=sha512-sse
CLEANFILES += test-avx.h
test-avx.h: test-avx.py x86.csv
$(PYTHON) $(I386_SRC)/test-avx.py $(I386_SRC)/x86.csv $@
test-avx: CFLAGS += -masm=intel -O -I.
test-avx: test-avx.h

View File

@ -15,6 +15,15 @@ The Linux system call vm86() is used to test vm86 emulation.
Various exceptions are raised to test most of the x86 user space Various exceptions are raised to test most of the x86 user space
exception reporting. exception reporting.
test-avx
--------
This program executes most SSE/AVX instructions and generates a text output,
for comparison with the output obtained with a real CPU or another emulator.
test-avx.h is generate from x86.csv by test-avx.py
x86.csv comes from https://github.com/quasilyte/avx512test
linux-test linux-test
---------- ----------

330
tests/tcg/i386/test-avx.c Normal file
View File

@ -0,0 +1,330 @@
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
typedef void (*testfn)(void);
typedef struct {
uint64_t q0, q1;
} __attribute__((aligned(16))) v2di;
typedef struct {
uint64_t mm[8];
v2di xmm[16];
uint64_t r[16];
uint64_t flags;
uint32_t ff;
uint64_t pad;
v2di mem[4];
v2di mem0[4];
} reg_state;
typedef struct {
int n;
testfn fn;
const char *s;
reg_state *init;
} TestDef;
reg_state initI;
reg_state initF32;
reg_state initF64;
static void dump_xmm(const char *name, int n, const v2di *r, int ff)
{
printf("%s%d = %016lx %016lx\n",
name, n, r->q1, r->q0);
if (ff == 64) {
double v[2];
memcpy(v, r, sizeof(v));
printf(" %16g %16g\n",
v[1], v[0]);
} else if (ff == 32) {
float v[4];
memcpy(v, r, sizeof(v));
printf(" %8g %8g %8g %8g\n",
v[3], v[2], v[1], v[0]);
}
}
static void dump_regs(reg_state *s)
{
int i;
for (i = 0; i < 16; i++) {
dump_xmm("xmm", i, &s->xmm[i], 0);
}
for (i = 0; i < 4; i++) {
dump_xmm("mem", i, &s->mem0[i], 0);
}
}
static void compare_state(const reg_state *a, const reg_state *b)
{
int i;
for (i = 0; i < 8; i++) {
if (a->mm[i] != b->mm[i]) {
printf("MM%d = %016lx\n", i, b->mm[i]);
}
}
for (i = 0; i < 16; i++) {
if (a->r[i] != b->r[i]) {
printf("r%d = %016lx\n", i, b->r[i]);
}
}
for (i = 0; i < 16; i++) {
if (memcmp(&a->xmm[i], &b->xmm[i], 16)) {
dump_xmm("xmm", i, &b->xmm[i], a->ff);
}
}
for (i = 0; i < 4; i++) {
if (memcmp(&a->mem0[i], &a->mem[i], 16)) {
dump_xmm("mem", i, &a->mem[i], a->ff);
}
}
if (a->flags != b->flags) {
printf("FLAGS = %016lx\n", b->flags);
}
}
#define LOADMM(r, o) "movq " #r ", " #o "[%0]\n\t"
#define LOADXMM(r, o) "movdqa " #r ", " #o "[%0]\n\t"
#define STOREMM(r, o) "movq " #o "[%1], " #r "\n\t"
#define STOREXMM(r, o) "movdqa " #o "[%1], " #r "\n\t"
#define MMREG(F) \
F(mm0, 0x00) \
F(mm1, 0x08) \
F(mm2, 0x10) \
F(mm3, 0x18) \
F(mm4, 0x20) \
F(mm5, 0x28) \
F(mm6, 0x30) \
F(mm7, 0x38)
#define XMMREG(F) \
F(xmm0, 0x040) \
F(xmm1, 0x050) \
F(xmm2, 0x060) \
F(xmm3, 0x070) \
F(xmm4, 0x080) \
F(xmm5, 0x090) \
F(xmm6, 0x0a0) \
F(xmm7, 0x0b0) \
F(xmm8, 0x0c0) \
F(xmm9, 0x0d0) \
F(xmm10, 0x0e0) \
F(xmm11, 0x0f0) \
F(xmm12, 0x100) \
F(xmm13, 0x110) \
F(xmm14, 0x120) \
F(xmm15, 0x130)
#define LOADREG(r, o) "mov " #r ", " #o "[rax]\n\t"
#define STOREREG(r, o) "mov " #o "[rax], " #r "\n\t"
#define REG(F) \
F(rbx, 0x148) \
F(rcx, 0x150) \
F(rdx, 0x158) \
F(rsi, 0x160) \
F(rdi, 0x168) \
F(r8, 0x180) \
F(r9, 0x188) \
F(r10, 0x190) \
F(r11, 0x198) \
F(r12, 0x1a0) \
F(r13, 0x1a8) \
F(r14, 0x1b0) \
F(r15, 0x1b8) \
static void run_test(const TestDef *t)
{
reg_state result;
reg_state *init = t->init;
memcpy(init->mem, init->mem0, sizeof(init->mem));
printf("%5d %s\n", t->n, t->s);
asm volatile(
MMREG(LOADMM)
XMMREG(LOADXMM)
"sub rsp, 128\n\t"
"push rax\n\t"
"push rbx\n\t"
"push rcx\n\t"
"push rdx\n\t"
"push %1\n\t"
"push %2\n\t"
"mov rax, %0\n\t"
"pushf\n\t"
"pop rbx\n\t"
"shr rbx, 8\n\t"
"shl rbx, 8\n\t"
"mov rcx, 0x1c0[rax]\n\t"
"and rcx, 0xff\n\t"
"or rbx, rcx\n\t"
"push rbx\n\t"
"popf\n\t"
REG(LOADREG)
"mov rax, 0x140[rax]\n\t"
"call [rsp]\n\t"
"mov [rsp], rax\n\t"
"mov rax, 8[rsp]\n\t"
REG(STOREREG)
"mov rbx, [rsp]\n\t"
"mov 0x140[rax], rbx\n\t"
"mov rbx, 0\n\t"
"mov 0x170[rax], rbx\n\t"
"mov 0x178[rax], rbx\n\t"
"pushf\n\t"
"pop rbx\n\t"
"and rbx, 0xff\n\t"
"mov 0x1c0[rax], rbx\n\t"
"add rsp, 16\n\t"
"pop rdx\n\t"
"pop rcx\n\t"
"pop rbx\n\t"
"pop rax\n\t"
"add rsp, 128\n\t"
MMREG(STOREMM)
XMMREG(STOREXMM)
: : "r"(init), "r"(&result), "r"(t->fn)
: "memory", "cc",
"rsi", "rdi",
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5",
"xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11",
"xmm12", "xmm13", "xmm14", "xmm15"
);
compare_state(init, &result);
}
#define TEST(n, cmd, type) \
static void __attribute__((naked)) test_##n(void) \
{ \
asm volatile(cmd); \
asm volatile("ret"); \
}
#include "test-avx.h"
static const TestDef test_table[] = {
#define TEST(n, cmd, type) {n, test_##n, cmd, &init##type},
#include "test-avx.h"
{-1, NULL, "", NULL}
};
static void run_all(void)
{
const TestDef *t;
for (t = test_table; t->fn; t++) {
run_test(t);
}
}
#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0]))
float val_f32[] = {2.0, -1.0, 4.8, 0.8, 3, -42.0, 5e6, 7.5, 8.3};
double val_f64[] = {2.0, -1.0, 4.8, 0.8, 3, -42.0, 5e6, 7.5};
v2di val_i64[] = {
{0x3d6b3b6a9e4118f2lu, 0x355ae76d2774d78clu},
{0xd851c54a56bf1f29lu, 0x4a84d1d50bf4c4fflu},
{0x5826475e2c5fd799lu, 0xfd32edc01243f5e9lu},
};
v2di deadbeef = {0xa5a5a5a5deadbeefull, 0xa5a5a5a5deadbeefull};
v2di indexq = {0x000000000000001full, 0x000000000000008full};
v2di indexd = {0x00000002000000efull, 0xfffffff500000010ull};
void init_f32reg(v2di *r)
{
static int n;
float v[4];
int i;
for (i = 0; i < 4; i++) {
v[i] = val_f32[n++];
if (n == ARRAY_LEN(val_f32)) {
n = 0;
}
}
memcpy(r, v, sizeof(*r));
}
void init_f64reg(v2di *r)
{
static int n;
double v[2];
int i;
for (i = 0; i < 2; i++) {
v[i] = val_f64[n++];
if (n == ARRAY_LEN(val_f64)) {
n = 0;
}
}
memcpy(r, v, sizeof(*r));
}
void init_intreg(v2di *r)
{
static uint64_t mask;
static int n;
r->q0 = val_i64[n].q0 ^ mask;
r->q1 = val_i64[n].q1 ^ mask;
n++;
if (n == ARRAY_LEN(val_i64)) {
n = 0;
mask *= 0x104C11DB7;
}
}
static void init_all(reg_state *s)
{
int i;
s->r[3] = (uint64_t)&s->mem[0]; /* rdx */
s->r[5] = (uint64_t)&s->mem[2]; /* rdi */
s->flags = 2;
for (i = 0; i < 8; i++) {
s->xmm[i] = deadbeef;
}
s->xmm[13] = indexd;
s->xmm[14] = indexq;
for (i = 0; i < 2; i++) {
s->mem0[i] = deadbeef;
}
}
int main(int argc, char *argv[])
{
init_all(&initI);
init_intreg(&initI.xmm[10]);
init_intreg(&initI.xmm[11]);
init_intreg(&initI.xmm[12]);
init_intreg(&initI.mem0[1]);
printf("Int:\n");
dump_regs(&initI);
init_all(&initF32);
init_f32reg(&initF32.xmm[10]);
init_f32reg(&initF32.xmm[11]);
init_f32reg(&initF32.xmm[12]);
init_f32reg(&initF32.mem0[1]);
initF32.ff = 32;
printf("F32:\n");
dump_regs(&initF32);
init_all(&initF64);
init_f64reg(&initF64.xmm[10]);
init_f64reg(&initF64.xmm[11]);
init_f64reg(&initF64.xmm[12]);
init_f64reg(&initF64.mem0[1]);
initF64.ff = 64;
printf("F64:\n");
dump_regs(&initF64);
if (argc > 1) {
int n = atoi(argv[1]);
run_test(&test_table[n]);
} else {
run_all();
}
return 0;
}

351
tests/tcg/i386/test-avx.py Executable file
View File

@ -0,0 +1,351 @@
#! /usr/bin/env python3
# Generate test-avx.h from x86.csv
import csv
import sys
from fnmatch import fnmatch
archs = [
# TODO: MMX?
"SSE", "SSE2", "SSE3", "SSSE3", "SSE4_1", "SSE4_2",
]
ignore = set(["FISTTP",
"LDMXCSR", "VLDMXCSR", "STMXCSR", "VSTMXCSR"])
imask = {
'vBLENDPD': 0xff,
'vBLENDPS': 0x0f,
'CMP[PS][SD]': 0x07,
'VCMP[PS][SD]': 0x1f,
'vDPPD': 0x33,
'vDPPS': 0xff,
'vEXTRACTPS': 0x03,
'vINSERTPS': 0xff,
'MPSADBW': 0x7,
'VMPSADBW': 0x3f,
'vPALIGNR': 0x3f,
'vPBLENDW': 0xff,
'vPCMP[EI]STR*': 0x0f,
'vPEXTRB': 0x0f,
'vPEXTRW': 0x07,
'vPEXTRD': 0x03,
'vPEXTRQ': 0x01,
'vPINSRB': 0x0f,
'vPINSRW': 0x07,
'vPINSRD': 0x03,
'vPINSRQ': 0x01,
'vPSHUF[DW]': 0xff,
'vPSHUF[LH]W': 0xff,
'vPS[LR][AL][WDQ]': 0x3f,
'vPS[RL]LDQ': 0x1f,
'vROUND[PS][SD]': 0x7,
'vSHUFPD': 0x0f,
'vSHUFPS': 0xff,
'vAESKEYGENASSIST': 0,
'VEXTRACT[FI]128': 0x01,
'VINSERT[FI]128': 0x01,
'VPBLENDD': 0xff,
'VPERM2[FI]128': 0x33,
'VPERMPD': 0xff,
'VPERMQ': 0xff,
'VPERMILPS': 0xff,
'VPERMILPD': 0x0f,
}
def strip_comments(x):
for l in x:
if l != '' and l[0] != '#':
yield l
def reg_w(w):
if w == 8:
return 'al'
elif w == 16:
return 'ax'
elif w == 32:
return 'eax'
elif w == 64:
return 'rax'
raise Exception("bad reg_w %d" % w)
def mem_w(w):
if w == 8:
t = "BYTE"
elif w == 16:
t = "WORD"
elif w == 32:
t = "DWORD"
elif w == 64:
t = "QWORD"
elif w == 128:
t = "XMMWORD"
elif w == 256:
t = "YMMWORD"
else:
raise Exception()
return t + " PTR 16[rdx]"
class XMMArg():
isxmm = True
def __init__(self, reg, mw):
if mw not in [0, 8, 16, 32, 64, 128, 256]:
raise Exception("Bad /m width: %s" % w)
self.reg = reg
self.mw = mw
self.ismem = mw != 0
def regstr(self, n):
if n < 0:
return mem_w(self.mw)
else:
return "%smm%d" % (self.reg, n)
class MMArg():
isxmm = True
ismem = False # TODO
def regstr(self, n):
return "mm%d" % (n & 7)
def match(op, pattern):
if pattern[0] == 'v':
return fnmatch(op, pattern[1:]) or fnmatch(op, 'V'+pattern[1:])
return fnmatch(op, pattern)
class ArgVSIB():
isxmm = True
ismem = False
def __init__(self, reg, w):
if w not in [32, 64]:
raise Exception("Bad vsib width: %s" % w)
self.w = w
self.reg = reg
def regstr(self, n):
reg = "%smm%d" % (self.reg, n >> 2)
return "[rsi + %s * %d]" % (reg, 1 << (n & 3))
class ArgImm8u():
isxmm = False
ismem = False
def __init__(self, op):
for k, v in imask.items():
if match(op, k):
self.mask = imask[k];
return
raise Exception("Unknown immediate")
def vals(self):
mask = self.mask
yield 0
n = 0
while n != mask:
n += 1
while (n & ~mask) != 0:
n += (n & ~mask)
yield n
class ArgRM():
isxmm = False
def __init__(self, rw, mw):
if rw not in [8, 16, 32, 64]:
raise Exception("Bad r/w width: %s" % w)
if mw not in [0, 8, 16, 32, 64]:
raise Exception("Bad r/w width: %s" % w)
self.rw = rw
self.mw = mw
self.ismem = mw != 0
def regstr(self, n):
if n < 0:
return mem_w(self.mw)
else:
return reg_w(self.rw)
class ArgMem():
isxmm = False
ismem = True
def __init__(self, w):
if w not in [8, 16, 32, 64, 128, 256]:
raise Exception("Bad mem width: %s" % w)
self.w = w
def regstr(self, n):
return mem_w(self.w)
def ArgGenerator(arg, op):
if arg[:3] == 'xmm' or arg[:3] == "ymm":
if "/" in arg:
r, m = arg.split('/')
if (m[0] != 'm'):
raise Exception("Expected /m: %s", arg)
return XMMArg(arg[0], int(m[1:]));
else:
return XMMArg(arg[0], 0);
elif arg[:2] == 'mm':
return MMArg();
elif arg[:4] == 'imm8':
return ArgImm8u(op);
elif arg == '<XMM0>':
return None
elif arg[0] == 'r':
if '/m' in arg:
r, m = arg.split('/')
if (m[0] != 'm'):
raise Exception("Expected /m: %s", arg)
mw = int(m[1:])
if r == 'r':
rw = mw
else:
rw = int(r[1:])
return ArgRM(rw, mw)
return ArgRM(int(arg[1:]), 0);
elif arg[0] == 'm':
return ArgMem(int(arg[1:]))
elif arg[:2] == 'vm':
return ArgVSIB(arg[-1], int(arg[2:-1]))
else:
raise Exception("Unrecognised arg: %s", arg)
class InsnGenerator:
def __init__(self, op, args):
self.op = op
if op[-2:] in ["PS", "PD", "SS", "SD"]:
if op[-1] == 'S':
self.optype = 'F32'
else:
self.optype = 'F64'
else:
self.optype = 'I'
try:
self.args = list(ArgGenerator(a, op) for a in args)
if len(self.args) > 0 and self.args[-1] is None:
self.args = self.args[:-1]
except Exception as e:
raise Exception("Bad arg %s: %s" % (op, e))
def gen(self):
regs = (10, 11, 12)
dest = 9
nreg = len(self.args)
if nreg == 0:
yield self.op
return
if isinstance(self.args[-1], ArgImm8u):
nreg -= 1
immarg = self.args[-1]
else:
immarg = None
memarg = -1
for n, arg in enumerate(self.args):
if arg.ismem:
memarg = n
if (self.op.startswith("VGATHER") or self.op.startswith("VPGATHER")):
if "GATHERD" in self.op:
ireg = 13 << 2
else:
ireg = 14 << 2
regset = [
(dest, ireg | 0, regs[0]),
(dest, ireg | 1, regs[0]),
(dest, ireg | 2, regs[0]),
(dest, ireg | 3, regs[0]),
]
if memarg >= 0:
raise Exception("vsib with memory: %s" % self.op)
elif nreg == 1:
regset = [(regs[0],)]
if memarg == 0:
regset += [(-1,)]
elif nreg == 2:
regset = [
(regs[0], regs[1]),
(regs[0], regs[0]),
]
if memarg == 0:
regset += [(-1, regs[0])]
elif memarg == 1:
regset += [(dest, -1)]
elif nreg == 3:
regset = [
(dest, regs[0], regs[1]),
(dest, regs[0], regs[0]),
(regs[0], regs[0], regs[1]),
(regs[0], regs[1], regs[0]),
(regs[0], regs[0], regs[0]),
]
if memarg == 2:
regset += [
(dest, regs[0], -1),
(regs[0], regs[0], -1),
]
elif memarg > 0:
raise Exception("Memarg %d" % memarg)
elif nreg == 4:
regset = [
(dest, regs[0], regs[1], regs[2]),
(dest, regs[0], regs[0], regs[1]),
(dest, regs[0], regs[1], regs[0]),
(dest, regs[1], regs[0], regs[0]),
(dest, regs[0], regs[0], regs[0]),
(regs[0], regs[0], regs[1], regs[2]),
(regs[0], regs[1], regs[0], regs[2]),
(regs[0], regs[1], regs[2], regs[0]),
(regs[0], regs[0], regs[0], regs[1]),
(regs[0], regs[0], regs[1], regs[0]),
(regs[0], regs[1], regs[0], regs[0]),
(regs[0], regs[0], regs[0], regs[0]),
]
if memarg == 2:
regset += [
(dest, regs[0], -1, regs[1]),
(dest, regs[0], -1, regs[0]),
(regs[0], regs[0], -1, regs[1]),
(regs[0], regs[1], -1, regs[0]),
(regs[0], regs[0], -1, regs[0]),
]
elif memarg > 0:
raise Exception("Memarg4 %d" % memarg)
else:
raise Exception("Too many regs: %s(%d)" % (self.op, nreg))
for regv in regset:
argstr = []
for i in range(nreg):
arg = self.args[i]
argstr.append(arg.regstr(regv[i]))
if immarg is None:
yield self.op + ' ' + ','.join(argstr)
else:
for immval in immarg.vals():
yield self.op + ' ' + ','.join(argstr) + ',' + str(immval)
def split0(s):
if s == '':
return []
return s.split(',')
def main():
n = 0
if len(sys.argv) != 3:
print("Usage: test-avx.py x86.csv test-avx.h")
exit(1)
csvfile = open(sys.argv[1], 'r', newline='')
with open(sys.argv[2], "w") as outf:
outf.write("// Generated by test-avx.py. Do not edit.\n")
for row in csv.reader(strip_comments(csvfile)):
insn = row[0].replace(',', '').split()
if insn[0] in ignore:
continue
cpuid = row[6]
if cpuid in archs:
g = InsnGenerator(insn[0], insn[1:])
for insn in g.gen():
outf.write('TEST(%d, "%s", %s)\n' % (n, insn, g.optype))
n += 1
outf.write("#undef TEST\n")
csvfile.close()
if __name__ == "__main__":
main()

4658
tests/tcg/i386/x86.csv Normal file

File diff suppressed because it is too large Load Diff

View File

@ -16,6 +16,7 @@ TESTS=$(MULTIARCH_TESTS)
endif endif
run-test-i386-ssse3: QEMU_OPTS += -cpu max run-test-i386-ssse3: QEMU_OPTS += -cpu max
run-test-avx: QEMU_OPTS += -cpu max
run-plugin-test-i386-ssse3-%: QEMU_OPTS += -cpu max run-plugin-test-i386-ssse3-%: QEMU_OPTS += -cpu max
test-x86_64: LDFLAGS+=-lm -lc test-x86_64: LDFLAGS+=-lm -lc