qemu/tests/tcg/i386/Makefile.target

114 lines
3.5 KiB
Makefile
Raw Normal View History

# i386 cross compile notes
I386_SRC=$(SRC_PATH)/tests/tcg/i386
# Set search path for all sources
VPATH += $(I386_SRC)
config-cc.mak: Makefile
$(quiet-@)( \
$(call cc-option,-fno-pie, CROSS_CC_HAS_I386_NOPIE)) 3> config-cc.mak
-include config-cc.mak
I386_SRCS=$(notdir $(wildcard $(I386_SRC)/*.c))
ALL_X86_TESTS=$(I386_SRCS:.c=)
SKIP_I386_TESTS=test-i386-ssse3 test-avx test-3dnow test-mmx
X86_64_TESTS:=$(filter test-i386-bmi2 $(SKIP_I386_TESTS), $(ALL_X86_TESTS))
target/i386: fix IEEE SSE floating-point exception raising The SSE instruction implementations all fail to raise the expected IEEE floating-point exceptions because they do nothing to convert the exception state from the softfloat machinery into the exception flags in MXCSR. Fix this by adding such conversions. Unlike for x87, emulated SSE floating-point operations might be optimized using hardware floating point on the host, and so a different approach is taken that is compatible with such optimizations. The required invariant is that all exceptions set in env->sse_status (other than "denormal operand", for which the SSE semantics are different from those in the softfloat code) are ones that are set in the MXCSR; the emulated MXCSR is updated lazily when code reads MXCSR, while when code sets MXCSR, the exceptions in env->sse_status are set accordingly. A few instructions do not raise all the exceptions that would be raised by the softfloat code, and those instructions are made to save and restore the softfloat exception state accordingly. Nothing is done about "denormal operand"; setting that (only for the case when input denormals are *not* flushed to zero, the opposite of the logic in the softfloat code for such an exception) will require custom code for relevant instructions, or else architecture-specific conditionals in the softfloat code for when to set such an exception together with custom code for various SSE conversion and rounding instructions that do not set that exception. Nothing is done about trapping exceptions (for which there is minimal and largely broken support in QEMU's emulation in the x87 case and no support at all in the SSE case). Signed-off-by: Joseph Myers <joseph@codesourcery.com> Message-Id: <alpine.DEB.2.21.2006252358000.3832@digraph.polyomino.org.uk> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2020-06-26 02:58:31 +03:00
test-i386-sse-exceptions: CFLAGS += -msse4.1 -mfpmath=sse
run-test-i386-sse-exceptions: QEMU_OPTS += -cpu max
run-plugin-test-i386-sse-exceptions-%: QEMU_OPTS += -cpu max
target/i386: correct fix for pcmpxstrx substring search This corrects a bug introduced in my previous fix for SSE4.2 pcmpestri / pcmpestrm / pcmpistri / pcmpistrm substring search, commit ae35eea7e4a9f21dd147406dfbcd0c4c6aaf2a60. That commit fixed a bug that showed up in four GCC tests with one libc implementation. The tests in question generate random inputs to the intrinsics and compare results to a C implementation, but they only test 1024 possible random inputs, and when the tests use the cases of those instructions that work with word rather than byte inputs, it's easy to have problematic cases that show up much less frequently than that. Thus, testing with a different libc implementation, and so a different random number generator, showed up a problem with the previous patch. When investigating the previous test failures, I found the description of these instructions in the Intel manuals (starting from computing a 16x16 or 8x8 set of comparison results) confusing and hard to match up with the more optimized implementation in QEMU, and referred to AMD manuals which described the instructions in a different way. Those AMD descriptions are very explicit that the whole of the string being searched for must be found in the other operand, not running off the end of that operand; they say "If the prototype and the SUT are equal in length, the two strings must be identical for the comparison to be TRUE.". However, that statement is incorrect. In my previous commit message, I noted: The operation in this case is a search for a string (argument d to the helper) in another string (argument s to the helper); if a copy of d at a particular position would run off the end of s, the resulting output bit should be 0 whether or not the strings match in the region where they overlap, but the QEMU implementation was wrongly comparing only up to the point where s ends and counting it as a match if an initial segment of d matched a terminal segment of s. Here, "run off the end of s" means that some byte of d would overlap some byte outside of s; thus, if d has zero length, it is considered to match everywhere, including after the end of s. The description "some byte of d would overlap some byte outside of s" is accurate only when understood to refer to overlapping some byte *within the 16-byte operand* but at or after the zero terminator; it is valid to run over the end of s if the end of s is the end of the 16-byte operand. So the fix in the previous patch for the case of d being empty was correct, but the other part of that patch was not correct (as it never allowed partial matches even at the end of the 16-byte operand). Nor was the code before the previous patch correct for the case of d nonempty, as it would always have allowed partial matches at the end of s. Fix with a partial revert of my previous change, combined with inserting a check for the special case of s having maximum length to determine where it is necessary to check for matches. In the added test, test 1 is for the case of empty strings, which failed before my 2017 patch, test 2 is for the bug introduced by my 2017 patch and test 3 deals with the case where a match of an initial segment at the end of the string is not valid when the string ends before the end of the 16-byte operand (that is, the case that would be broken by a simple revert of the non-empty-string part of my 2017 patch). Signed-off-by: Joseph Myers <joseph@codesourcery.com> Message-Id: <alpine.DEB.2.21.2006121344290.9881@digraph.polyomino.org.uk> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2020-06-12 16:45:23 +03:00
test-i386-pcmpistri: CFLAGS += -msse4.2
run-test-i386-pcmpistri: QEMU_OPTS += -cpu max
run-plugin-test-i386-pcmpistri-%: QEMU_OPTS += -cpu max
target/i386: correct fix for pcmpxstrx substring search This corrects a bug introduced in my previous fix for SSE4.2 pcmpestri / pcmpestrm / pcmpistri / pcmpistrm substring search, commit ae35eea7e4a9f21dd147406dfbcd0c4c6aaf2a60. That commit fixed a bug that showed up in four GCC tests with one libc implementation. The tests in question generate random inputs to the intrinsics and compare results to a C implementation, but they only test 1024 possible random inputs, and when the tests use the cases of those instructions that work with word rather than byte inputs, it's easy to have problematic cases that show up much less frequently than that. Thus, testing with a different libc implementation, and so a different random number generator, showed up a problem with the previous patch. When investigating the previous test failures, I found the description of these instructions in the Intel manuals (starting from computing a 16x16 or 8x8 set of comparison results) confusing and hard to match up with the more optimized implementation in QEMU, and referred to AMD manuals which described the instructions in a different way. Those AMD descriptions are very explicit that the whole of the string being searched for must be found in the other operand, not running off the end of that operand; they say "If the prototype and the SUT are equal in length, the two strings must be identical for the comparison to be TRUE.". However, that statement is incorrect. In my previous commit message, I noted: The operation in this case is a search for a string (argument d to the helper) in another string (argument s to the helper); if a copy of d at a particular position would run off the end of s, the resulting output bit should be 0 whether or not the strings match in the region where they overlap, but the QEMU implementation was wrongly comparing only up to the point where s ends and counting it as a match if an initial segment of d matched a terminal segment of s. Here, "run off the end of s" means that some byte of d would overlap some byte outside of s; thus, if d has zero length, it is considered to match everywhere, including after the end of s. The description "some byte of d would overlap some byte outside of s" is accurate only when understood to refer to overlapping some byte *within the 16-byte operand* but at or after the zero terminator; it is valid to run over the end of s if the end of s is the end of the 16-byte operand. So the fix in the previous patch for the case of d being empty was correct, but the other part of that patch was not correct (as it never allowed partial matches even at the end of the 16-byte operand). Nor was the code before the previous patch correct for the case of d nonempty, as it would always have allowed partial matches at the end of s. Fix with a partial revert of my previous change, combined with inserting a check for the special case of s having maximum length to determine where it is necessary to check for matches. In the added test, test 1 is for the case of empty strings, which failed before my 2017 patch, test 2 is for the bug introduced by my 2017 patch and test 3 deals with the case where a match of an initial segment at the end of the string is not valid when the string ends before the end of the 16-byte operand (that is, the case that would be broken by a simple revert of the non-empty-string part of my 2017 patch). Signed-off-by: Joseph Myers <joseph@codesourcery.com> Message-Id: <alpine.DEB.2.21.2006121344290.9881@digraph.polyomino.org.uk> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2020-06-12 16:45:23 +03:00
test-i386-bmi2: CFLAGS=-O2
run-test-i386-bmi2: QEMU_OPTS += -cpu max
run-plugin-test-i386-bmi2-%: QEMU_OPTS += -cpu max
#
# hello-i386 is a barebones app
#
hello-i386: CFLAGS+=-ffreestanding
hello-i386: LDFLAGS+=-nostdlib
# test-386 includes a couple of additional objects that need to be
# linked together, we also need a no-pie capable compiler due to the
# non-pic calls into 16-bit mode
ifneq ($(CROSS_CC_HAS_I386_NOPIE),)
test-i386: CFLAGS += -fno-pie
test-i386: test-i386.c test-i386-code16.S test-i386-vm86.S test-i386.h test-i386-shift.h test-i386-muldiv.h
$(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_CFLAGS) -o $@ \
$(<D)/test-i386.c $(<D)/test-i386-code16.S $(<D)/test-i386-vm86.S -lm
else
test-i386:
$(call skip-test, "BUILD of $@", "missing -no-pie compiler support")
run-test-i386:
$(call skip-test, "RUN of test-i386", "not built")
run-plugin-test-i386-with-%:
$(call skip-test, "RUN of test-i386 ($*)", "not built")
endif
ifeq ($(SPEED), slow)
test-i386-fprem.ref: test-i386-fprem
$(call quiet-command, ./$< > $@,"GENREF","generating $@")
run-test-i386-fprem: TIMEOUT=60
run-test-i386-fprem: test-i386-fprem test-i386-fprem.ref
$(call run-test,test-i386-fprem, $(QEMU) $<)
$(call diff-out,test-i386-fprem, test-i386-fprem.ref)
else
SKIP_I386_TESTS+=test-i386-fprem
endif
# non-inline runs will trigger the duplicate instruction heuristics in libinsn.so
run-plugin-%-with-libinsn.so:
$(call run-test, $@, $(QEMU) $(QEMU_OPTS) \
-plugin ../../plugin/libinsn.so$(COMMA)inline=on \
-d plugin -D $*-with-libinsn.so.pout $*)
# Update TESTS
I386_TESTS:=$(filter-out $(SKIP_I386_TESTS), $(ALL_X86_TESTS))
TESTS=$(MULTIARCH_TESTS) $(I386_TESTS)
# On i386 and x86_64 Linux only supports 4k pages (large pages are a different hack)
EXTRA_RUNS+=run-test-mmap-4096
sha512-sse: CFLAGS=-msse4.1 -O3
sha512-sse: sha512.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
run-sha512-sse: QEMU_OPTS+=-cpu max
run-plugin-sha512-sse-with-%: QEMU_OPTS+=-cpu max
TESTS+=sha512-sse
CLEANFILES += test-avx.h test-mmx.h test-3dnow.h
test-3dnow.h: test-mmx.py x86.csv
$(PYTHON) $(I386_SRC)/test-mmx.py $(I386_SRC)/x86.csv $@ 3DNOW
test-mmx.h: test-mmx.py x86.csv
$(PYTHON) $(I386_SRC)/test-mmx.py $(I386_SRC)/x86.csv $@ MMX SSE SSE2 SSE3 SSSE3
test-avx.h: test-avx.py x86.csv
$(PYTHON) $(I386_SRC)/test-avx.py $(I386_SRC)/x86.csv $@
test-3dnow: CFLAGS += -masm=intel -O -I.
run-test-3dnow: QEMU_OPTS += -cpu max
run-plugin-test-3dnow: QEMU_OPTS += -cpu max
test-3dnow: test-3dnow.h
test-mmx: CFLAGS += -masm=intel -O -I.
run-test-mmx: QEMU_OPTS += -cpu max
run-plugin-test-mmx: QEMU_OPTS += -cpu max
test-mmx: test-mmx.h
test-avx: CFLAGS += -masm=intel -O -I.
run-test-avx: QEMU_OPTS += -cpu max
run-plugin-test-avx: QEMU_OPTS += -cpu max
test-avx: test-avx.h