add sse fenv support on i386 through hwcap

the sse and x87 rounding modes should be always the same,
the visible exception flags are the bitwise or of the two
fenv states (so it's enough to query the rounding mode or
raise exceptions on one fenv)
This commit is contained in:
Szabolcs Nagy 2013-08-17 02:40:44 +00:00
parent d684149910
commit ebc10fa176
2 changed files with 61 additions and 9 deletions

View File

@ -1,14 +1,26 @@
.hidden __hwcap
.global feclearexcept
.type feclearexcept,@function
feclearexcept:
mov 4(%esp),%ecx
not %ecx
test $0x3f,%ecx
# consider sse fenv as well if the cpu has XMM capability
call 1f
1: addl $__hwcap-1b,(%esp)
pop %edx
testl $0x02000000,(%edx)
jz 1f
stmxcsr 4(%esp)
and %ecx,4(%esp)
ldmxcsr 4(%esp)
1: test $0x3f,%ecx
jnz 2f
1: fnclex
xor %eax,%eax
ret
2: fnstsw %ax
# TODO: only load/store fenv if exceptions arent clear yet
and %ecx,%eax
jz 1b
sub $32,%esp
@ -41,7 +53,18 @@ fesetround:
andb $0xf3,1(%esp)
or %ch,1(%esp)
fldcw (%esp)
pop %ecx
# consider sse fenv as well if the cpu has XMM capability
call 1f
1: addl $__hwcap-1b,(%esp)
pop %edx
testl $0x02000000,(%edx)
jmp 1f
stmxcsr (%esp)
shl $3,%ch
andb $0x9f,1(%esp)
or %ch,1(%esp)
ldmxcsr (%esp)
1: pop %ecx
ret
.global fegetround
@ -59,7 +82,18 @@ fegetenv:
mov 4(%esp),%ecx
xor %eax,%eax
fnstenv (%ecx)
ret
# consider sse fenv as well if the cpu has XMM capability
call 1f
1: addl $__hwcap-1b,(%esp)
pop %edx
testl $0x02000000,(%edx)
jz 1f
push %eax
stmxcsr (%esp)
pop %edx
and $0x3f,%edx
or %edx,4(%ecx)
1: ret
.global fesetenv
.type fesetenv,@function
@ -69,7 +103,8 @@ fesetenv:
inc %ecx
jz 1f
fldenv -1(%ecx)
ret
movl -1(%ecx),%ecx
jmp 2f
1: push %eax
push %eax
push %eax
@ -79,12 +114,32 @@ fesetenv:
pushl $0x37f
fldenv (%esp)
add $28,%esp
ret
# consider sse fenv as well if the cpu has XMM capability
2: call 1f
1: addl $__hwcap-1b,(%esp)
pop %edx
testl $0x02000000,(%edx)
jz 1f
# mxcsr := same rounding mode, cleared exceptions, default mask
and $0xc00,%ecx
shl $3,%ecx
or $0x1f80,%ecx
mov %ecx,4(%esp)
ldmxcsr 4(%esp)
1: ret
.global fetestexcept
.type fetestexcept,@function
fetestexcept:
mov 4(%esp),%ecx
fnstsw %ax
and %ecx,%eax
# consider sse fenv as well if the cpu has XMM capability
call 1f
1: addl $__hwcap-1b,(%esp)
pop %edx
testl $0x02000000,(%edx)
jz 1f
stmxcsr 4(%esp)
or 4(%esp),%eax
1: and %ecx,%eax
ret

View File

@ -28,9 +28,6 @@ feraiseexcept:
stmxcsr -8(%rsp)
or %edi,-8(%rsp)
ldmxcsr -8(%rsp)
fnstenv -32(%rsp)
or %edi,-28(%rsp)
fldenv -32(%rsp)
xor %eax,%eax
ret