Improve the exponential and hyperbolic function's performance
10..100 times faster. PR port-m68k/51645 from rin@ (and modified by me)
This commit is contained in:
parent
baefce2250
commit
6e812b739f
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: fpu_exp.c,v 1.8 2013/04/20 04:54:22 isaki Exp $ */
|
||||
/* $NetBSD: fpu_exp.c,v 1.9 2016/12/05 15:31:01 isaki Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1995 Ken Nakata
|
||||
@ -32,7 +32,7 @@
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: fpu_exp.c,v 1.8 2013/04/20 04:54:22 isaki Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: fpu_exp.c,v 1.9 2016/12/05 15:31:01 isaki Exp $");
|
||||
|
||||
#include <machine/ieee.h>
|
||||
|
||||
@ -100,12 +100,16 @@ fpu_etox_taylor(struct fpemu *fe)
|
||||
}
|
||||
|
||||
/*
|
||||
* exp(x)
|
||||
* exp(x) = 2^k * exp(r) with k = round(x / ln2) and r = x - k * ln2
|
||||
*
|
||||
* Algorithm partially taken from libm, where exp(r) is approximated by a
|
||||
* rational function of r. We use the Taylor expansion instead.
|
||||
*/
|
||||
struct fpn *
|
||||
fpu_etox(struct fpemu *fe)
|
||||
{
|
||||
struct fpn *fp;
|
||||
struct fpn x, *fp;
|
||||
int j, k;
|
||||
|
||||
if (ISNAN(&fe->fe_f2))
|
||||
return &fe->fe_f2;
|
||||
@ -115,19 +119,47 @@ fpu_etox(struct fpemu *fe)
|
||||
return &fe->fe_f2;
|
||||
}
|
||||
|
||||
if (fe->fe_f2.fp_sign == 0) {
|
||||
/* exp(x) */
|
||||
fp = fpu_etox_taylor(fe);
|
||||
} else {
|
||||
/* 1/exp(-x) */
|
||||
fe->fe_f2.fp_sign = 0;
|
||||
fp = fpu_etox_taylor(fe);
|
||||
CPYFPN(&x, &fe->fe_f2);
|
||||
|
||||
CPYFPN(&fe->fe_f2, fp);
|
||||
fpu_const(&fe->fe_f1, FPU_CONST_1);
|
||||
fp = fpu_div(fe);
|
||||
/* k = round(x / ln2) */
|
||||
CPYFPN(&fe->fe_f1, &fe->fe_f2);
|
||||
fpu_const(&fe->fe_f2, FPU_CONST_LN_2);
|
||||
fp = fpu_div(fe);
|
||||
CPYFPN(&fe->fe_f2, fp);
|
||||
fp = fpu_int(fe);
|
||||
if (ISZERO(fp)) {
|
||||
/* k = 0 */
|
||||
CPYFPN(&fe->fe_f2, &x);
|
||||
fp = fpu_etox_taylor(fe);
|
||||
return fp;
|
||||
}
|
||||
|
||||
/* extract k as integer format from fpn format */
|
||||
j = FP_LG - fp->fp_exp;
|
||||
if (j < 0) {
|
||||
if (fp->fp_sign)
|
||||
fp->fp_class = FPC_ZERO; /* k < -2^18 */
|
||||
else
|
||||
fp->fp_class = FPC_INF; /* k > 2^18 */
|
||||
return fp;
|
||||
}
|
||||
k = fp->fp_mant[0] >> j;
|
||||
if (fp->fp_sign)
|
||||
k *= -1;
|
||||
|
||||
/* exp(r) = exp(x - k * ln2) */
|
||||
CPYFPN(&fe->fe_f1, fp);
|
||||
fpu_const(&fe->fe_f2, FPU_CONST_LN_2);
|
||||
fp = fpu_mul(fe);
|
||||
fp->fp_sign = !fp->fp_sign;
|
||||
CPYFPN(&fe->fe_f1, fp);
|
||||
CPYFPN(&fe->fe_f2, &x);
|
||||
fp = fpu_add(fe);
|
||||
CPYFPN(&fe->fe_f2, fp);
|
||||
fp = fpu_etox_taylor(fe);
|
||||
|
||||
/* 2^k */
|
||||
fp->fp_exp += k;
|
||||
|
||||
return fp;
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: fpu_hyperb.c,v 1.16 2013/10/11 03:37:08 isaki Exp $ */
|
||||
/* $NetBSD: fpu_hyperb.c,v 1.17 2016/12/05 15:31:01 isaki Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1995 Ken Nakata
|
||||
@ -57,15 +57,12 @@
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: fpu_hyperb.c,v 1.16 2013/10/11 03:37:08 isaki Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: fpu_hyperb.c,v 1.17 2016/12/05 15:31:01 isaki Exp $");
|
||||
|
||||
#include <machine/ieee.h>
|
||||
|
||||
#include "fpu_emulate.h"
|
||||
|
||||
/* The number of items to terminate the Taylor expansion */
|
||||
#define MAX_ITEMS (2000)
|
||||
|
||||
/*
|
||||
* fpu_hyperb.c: defines the following functions
|
||||
*
|
||||
@ -137,71 +134,14 @@ fpu_atanh(struct fpemu *fe)
|
||||
}
|
||||
|
||||
/*
|
||||
* taylor expansion used by sinh(), cosh().
|
||||
* exp(x) + exp(-x)
|
||||
* cosh(x) = ------------------
|
||||
* 2
|
||||
*/
|
||||
static struct fpn *
|
||||
__fpu_sinhcosh_taylor(struct fpemu *fe, struct fpn *s0, uint32_t f)
|
||||
{
|
||||
struct fpn res;
|
||||
struct fpn x2;
|
||||
struct fpn *s1;
|
||||
struct fpn *r;
|
||||
int sign;
|
||||
uint32_t k;
|
||||
|
||||
/* x2 := x * x */
|
||||
CPYFPN(&fe->fe_f1, &fe->fe_f2);
|
||||
r = fpu_mul(fe);
|
||||
CPYFPN(&x2, r);
|
||||
|
||||
/* res := s0 */
|
||||
CPYFPN(&res, s0);
|
||||
|
||||
sign = 1; /* sign := (-1)^n */
|
||||
|
||||
for (; f < (2 * MAX_ITEMS); ) {
|
||||
/* (f1 :=) s0 * x^2 */
|
||||
CPYFPN(&fe->fe_f1, s0);
|
||||
CPYFPN(&fe->fe_f2, &x2);
|
||||
r = fpu_mul(fe);
|
||||
CPYFPN(&fe->fe_f1, r);
|
||||
|
||||
/*
|
||||
* for sinh(), s1 := s0 * x^2 / (2n+1)2n
|
||||
* for cosh(), s1 := s0 * x^2 / 2n(2n-1)
|
||||
*/
|
||||
k = f * (f + 1);
|
||||
fpu_explode(fe, &fe->fe_f2, FTYPE_LNG, &k);
|
||||
s1 = fpu_div(fe);
|
||||
|
||||
/* break if s1 is enough small */
|
||||
if (ISZERO(s1))
|
||||
break;
|
||||
if (res.fp_exp - s1->fp_exp >= EXT_FRACBITS)
|
||||
break;
|
||||
|
||||
/* s0 := s1 for next loop */
|
||||
CPYFPN(s0, s1);
|
||||
|
||||
/* res += s1 */
|
||||
CPYFPN(&fe->fe_f2, s1);
|
||||
CPYFPN(&fe->fe_f1, &res);
|
||||
r = fpu_add(fe);
|
||||
CPYFPN(&res, r);
|
||||
|
||||
f += 2;
|
||||
sign ^= 1;
|
||||
}
|
||||
|
||||
CPYFPN(&fe->fe_f2, &res);
|
||||
return &fe->fe_f2;
|
||||
}
|
||||
|
||||
struct fpn *
|
||||
fpu_cosh(struct fpemu *fe)
|
||||
{
|
||||
struct fpn s0;
|
||||
struct fpn *r;
|
||||
struct fpn x, *fp;
|
||||
|
||||
if (ISNAN(&fe->fe_f2))
|
||||
return &fe->fe_f2;
|
||||
@ -211,17 +151,37 @@ fpu_cosh(struct fpemu *fe)
|
||||
return &fe->fe_f2;
|
||||
}
|
||||
|
||||
fpu_const(&s0, FPU_CONST_1);
|
||||
r = __fpu_sinhcosh_taylor(fe, &s0, 1);
|
||||
/* if x is +0/-0, return 1 */ /* XXX is this necessary? */
|
||||
if (ISZERO(&fe->fe_f2)) {
|
||||
fpu_const(&fe->fe_f2, FPU_CONST_1);
|
||||
return &fe->fe_f2;
|
||||
}
|
||||
|
||||
return r;
|
||||
fp = fpu_etox(fe);
|
||||
CPYFPN(&x, fp);
|
||||
|
||||
fpu_const(&fe->fe_f1, FPU_CONST_1);
|
||||
CPYFPN(&fe->fe_f2, fp);
|
||||
fp = fpu_div(fe);
|
||||
|
||||
CPYFPN(&fe->fe_f1, fp);
|
||||
CPYFPN(&fe->fe_f2, &x);
|
||||
fp = fpu_add(fe);
|
||||
|
||||
fp->fp_exp--;
|
||||
|
||||
return fp;
|
||||
}
|
||||
|
||||
/*
|
||||
* exp(x) - exp(-x)
|
||||
* sinh(x) = ------------------
|
||||
* 2
|
||||
*/
|
||||
struct fpn *
|
||||
fpu_sinh(struct fpemu *fe)
|
||||
{
|
||||
struct fpn s0;
|
||||
struct fpn *r;
|
||||
struct fpn x, *fp;
|
||||
|
||||
if (ISNAN(&fe->fe_f2))
|
||||
return &fe->fe_f2;
|
||||
@ -232,12 +192,28 @@ fpu_sinh(struct fpemu *fe)
|
||||
if (ISZERO(&fe->fe_f2))
|
||||
return &fe->fe_f2;
|
||||
|
||||
CPYFPN(&s0, &fe->fe_f2);
|
||||
r = __fpu_sinhcosh_taylor(fe, &s0, 2);
|
||||
fp = fpu_etox(fe);
|
||||
CPYFPN(&x, fp);
|
||||
|
||||
return r;
|
||||
fpu_const(&fe->fe_f1, FPU_CONST_1);
|
||||
CPYFPN(&fe->fe_f2, fp);
|
||||
fp = fpu_div(fe);
|
||||
|
||||
fp->fp_sign = 1;
|
||||
CPYFPN(&fe->fe_f1, fp);
|
||||
CPYFPN(&fe->fe_f2, &x);
|
||||
fp = fpu_add(fe);
|
||||
|
||||
fp->fp_exp--;
|
||||
|
||||
return fp;
|
||||
}
|
||||
|
||||
/*
|
||||
* sinh(x)
|
||||
* tanh(x) = ---------
|
||||
* cosh(x)
|
||||
*/
|
||||
struct fpn *
|
||||
fpu_tanh(struct fpemu *fe)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user