mirror of
https://git.musl-libc.org/git/musl
synced 2025-02-23 21:54:16 +03:00
add sse fenv support on i386 through hwcap
the sse and x87 rounding modes should be always the same, the visible exception flags are the bitwise or of the two fenv states (so it's enough to query the rounding mode or raise exceptions on one fenv)
This commit is contained in:
parent
d684149910
commit
ebc10fa176
@ -1,14 +1,26 @@
|
||||
.hidden __hwcap
|
||||
|
||||
.global feclearexcept
|
||||
.type feclearexcept,@function
|
||||
feclearexcept:
|
||||
mov 4(%esp),%ecx
|
||||
not %ecx
|
||||
test $0x3f,%ecx
|
||||
# consider sse fenv as well if the cpu has XMM capability
|
||||
call 1f
|
||||
1: addl $__hwcap-1b,(%esp)
|
||||
pop %edx
|
||||
testl $0x02000000,(%edx)
|
||||
jz 1f
|
||||
stmxcsr 4(%esp)
|
||||
and %ecx,4(%esp)
|
||||
ldmxcsr 4(%esp)
|
||||
1: test $0x3f,%ecx
|
||||
jnz 2f
|
||||
1: fnclex
|
||||
xor %eax,%eax
|
||||
ret
|
||||
2: fnstsw %ax
|
||||
# TODO: only load/store fenv if exceptions arent clear yet
|
||||
and %ecx,%eax
|
||||
jz 1b
|
||||
sub $32,%esp
|
||||
@ -41,7 +53,18 @@ fesetround:
|
||||
andb $0xf3,1(%esp)
|
||||
or %ch,1(%esp)
|
||||
fldcw (%esp)
|
||||
pop %ecx
|
||||
# consider sse fenv as well if the cpu has XMM capability
|
||||
call 1f
|
||||
1: addl $__hwcap-1b,(%esp)
|
||||
pop %edx
|
||||
testl $0x02000000,(%edx)
|
||||
jmp 1f
|
||||
stmxcsr (%esp)
|
||||
shl $3,%ch
|
||||
andb $0x9f,1(%esp)
|
||||
or %ch,1(%esp)
|
||||
ldmxcsr (%esp)
|
||||
1: pop %ecx
|
||||
ret
|
||||
|
||||
.global fegetround
|
||||
@ -59,7 +82,18 @@ fegetenv:
|
||||
mov 4(%esp),%ecx
|
||||
xor %eax,%eax
|
||||
fnstenv (%ecx)
|
||||
ret
|
||||
# consider sse fenv as well if the cpu has XMM capability
|
||||
call 1f
|
||||
1: addl $__hwcap-1b,(%esp)
|
||||
pop %edx
|
||||
testl $0x02000000,(%edx)
|
||||
jz 1f
|
||||
push %eax
|
||||
stmxcsr (%esp)
|
||||
pop %edx
|
||||
and $0x3f,%edx
|
||||
or %edx,4(%ecx)
|
||||
1: ret
|
||||
|
||||
.global fesetenv
|
||||
.type fesetenv,@function
|
||||
@ -69,7 +103,8 @@ fesetenv:
|
||||
inc %ecx
|
||||
jz 1f
|
||||
fldenv -1(%ecx)
|
||||
ret
|
||||
movl -1(%ecx),%ecx
|
||||
jmp 2f
|
||||
1: push %eax
|
||||
push %eax
|
||||
push %eax
|
||||
@ -79,12 +114,32 @@ fesetenv:
|
||||
pushl $0x37f
|
||||
fldenv (%esp)
|
||||
add $28,%esp
|
||||
ret
|
||||
# consider sse fenv as well if the cpu has XMM capability
|
||||
2: call 1f
|
||||
1: addl $__hwcap-1b,(%esp)
|
||||
pop %edx
|
||||
testl $0x02000000,(%edx)
|
||||
jz 1f
|
||||
# mxcsr := same rounding mode, cleared exceptions, default mask
|
||||
and $0xc00,%ecx
|
||||
shl $3,%ecx
|
||||
or $0x1f80,%ecx
|
||||
mov %ecx,4(%esp)
|
||||
ldmxcsr 4(%esp)
|
||||
1: ret
|
||||
|
||||
.global fetestexcept
|
||||
.type fetestexcept,@function
|
||||
fetestexcept:
|
||||
mov 4(%esp),%ecx
|
||||
fnstsw %ax
|
||||
and %ecx,%eax
|
||||
# consider sse fenv as well if the cpu has XMM capability
|
||||
call 1f
|
||||
1: addl $__hwcap-1b,(%esp)
|
||||
pop %edx
|
||||
testl $0x02000000,(%edx)
|
||||
jz 1f
|
||||
stmxcsr 4(%esp)
|
||||
or 4(%esp),%eax
|
||||
1: and %ecx,%eax
|
||||
ret
|
||||
|
@ -28,9 +28,6 @@ feraiseexcept:
|
||||
stmxcsr -8(%rsp)
|
||||
or %edi,-8(%rsp)
|
||||
ldmxcsr -8(%rsp)
|
||||
fnstenv -32(%rsp)
|
||||
or %edi,-28(%rsp)
|
||||
fldenv -32(%rsp)
|
||||
xor %eax,%eax
|
||||
ret
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user