Improve the exponential and hyperbolic function's performance

10..100 times faster.
PR port-m68k/51645 from rin@ (and modified by me)
This commit is contained in:
isaki 2016-12-05 15:31:01 +00:00
parent baefce2250
commit 6e812b739f
2 changed files with 97 additions and 89 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: fpu_exp.c,v 1.8 2013/04/20 04:54:22 isaki Exp $ */
/* $NetBSD: fpu_exp.c,v 1.9 2016/12/05 15:31:01 isaki Exp $ */
/*
* Copyright (c) 1995 Ken Nakata
@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: fpu_exp.c,v 1.8 2013/04/20 04:54:22 isaki Exp $");
__KERNEL_RCSID(0, "$NetBSD: fpu_exp.c,v 1.9 2016/12/05 15:31:01 isaki Exp $");
#include <machine/ieee.h>
@ -100,12 +100,16 @@ fpu_etox_taylor(struct fpemu *fe)
}
/*
* exp(x)
* exp(x) = 2^k * exp(r) with k = round(x / ln2) and r = x - k * ln2
*
* Algorithm partially taken from libm, where exp(r) is approximated by a
* rational function of r. We use the Taylor expansion instead.
*/
struct fpn *
fpu_etox(struct fpemu *fe)
{
struct fpn *fp;
struct fpn x, *fp;
int j, k;
if (ISNAN(&fe->fe_f2))
return &fe->fe_f2;
@ -115,19 +119,47 @@ fpu_etox(struct fpemu *fe)
return &fe->fe_f2;
}
if (fe->fe_f2.fp_sign == 0) {
/* exp(x) */
fp = fpu_etox_taylor(fe);
} else {
/* 1/exp(-x) */
fe->fe_f2.fp_sign = 0;
fp = fpu_etox_taylor(fe);
CPYFPN(&x, &fe->fe_f2);
CPYFPN(&fe->fe_f2, fp);
fpu_const(&fe->fe_f1, FPU_CONST_1);
fp = fpu_div(fe);
/* k = round(x / ln2) */
CPYFPN(&fe->fe_f1, &fe->fe_f2);
fpu_const(&fe->fe_f2, FPU_CONST_LN_2);
fp = fpu_div(fe);
CPYFPN(&fe->fe_f2, fp);
fp = fpu_int(fe);
if (ISZERO(fp)) {
/* k = 0 */
CPYFPN(&fe->fe_f2, &x);
fp = fpu_etox_taylor(fe);
return fp;
}
/* extract k as integer format from fpn format */
j = FP_LG - fp->fp_exp;
if (j < 0) {
if (fp->fp_sign)
fp->fp_class = FPC_ZERO; /* k < -2^18 */
else
fp->fp_class = FPC_INF; /* k > 2^18 */
return fp;
}
k = fp->fp_mant[0] >> j;
if (fp->fp_sign)
k *= -1;
/* exp(r) = exp(x - k * ln2) */
CPYFPN(&fe->fe_f1, fp);
fpu_const(&fe->fe_f2, FPU_CONST_LN_2);
fp = fpu_mul(fe);
fp->fp_sign = !fp->fp_sign;
CPYFPN(&fe->fe_f1, fp);
CPYFPN(&fe->fe_f2, &x);
fp = fpu_add(fe);
CPYFPN(&fe->fe_f2, fp);
fp = fpu_etox_taylor(fe);
/* 2^k */
fp->fp_exp += k;
return fp;
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: fpu_hyperb.c,v 1.16 2013/10/11 03:37:08 isaki Exp $ */
/* $NetBSD: fpu_hyperb.c,v 1.17 2016/12/05 15:31:01 isaki Exp $ */
/*
* Copyright (c) 1995 Ken Nakata
@ -57,15 +57,12 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: fpu_hyperb.c,v 1.16 2013/10/11 03:37:08 isaki Exp $");
__KERNEL_RCSID(0, "$NetBSD: fpu_hyperb.c,v 1.17 2016/12/05 15:31:01 isaki Exp $");
#include <machine/ieee.h>
#include "fpu_emulate.h"
/* The number of items to terminate the Taylor expansion */
#define MAX_ITEMS (2000)
/*
* fpu_hyperb.c: defines the following functions
*
@ -137,71 +134,14 @@ fpu_atanh(struct fpemu *fe)
}
/*
* taylor expansion used by sinh(), cosh().
* exp(x) + exp(-x)
* cosh(x) = ------------------
* 2
*/
static struct fpn *
__fpu_sinhcosh_taylor(struct fpemu *fe, struct fpn *s0, uint32_t f)
{
struct fpn res;
struct fpn x2;
struct fpn *s1;
struct fpn *r;
int sign;
uint32_t k;
/* x2 := x * x */
CPYFPN(&fe->fe_f1, &fe->fe_f2);
r = fpu_mul(fe);
CPYFPN(&x2, r);
/* res := s0 */
CPYFPN(&res, s0);
sign = 1; /* sign := (-1)^n */
for (; f < (2 * MAX_ITEMS); ) {
/* (f1 :=) s0 * x^2 */
CPYFPN(&fe->fe_f1, s0);
CPYFPN(&fe->fe_f2, &x2);
r = fpu_mul(fe);
CPYFPN(&fe->fe_f1, r);
/*
* for sinh(), s1 := s0 * x^2 / (2n+1)2n
* for cosh(), s1 := s0 * x^2 / 2n(2n-1)
*/
k = f * (f + 1);
fpu_explode(fe, &fe->fe_f2, FTYPE_LNG, &k);
s1 = fpu_div(fe);
/* break if s1 is enough small */
if (ISZERO(s1))
break;
if (res.fp_exp - s1->fp_exp >= EXT_FRACBITS)
break;
/* s0 := s1 for next loop */
CPYFPN(s0, s1);
/* res += s1 */
CPYFPN(&fe->fe_f2, s1);
CPYFPN(&fe->fe_f1, &res);
r = fpu_add(fe);
CPYFPN(&res, r);
f += 2;
sign ^= 1;
}
CPYFPN(&fe->fe_f2, &res);
return &fe->fe_f2;
}
struct fpn *
fpu_cosh(struct fpemu *fe)
{
struct fpn s0;
struct fpn *r;
struct fpn x, *fp;
if (ISNAN(&fe->fe_f2))
return &fe->fe_f2;
@ -211,17 +151,37 @@ fpu_cosh(struct fpemu *fe)
return &fe->fe_f2;
}
fpu_const(&s0, FPU_CONST_1);
r = __fpu_sinhcosh_taylor(fe, &s0, 1);
/* if x is +0/-0, return 1 */ /* XXX is this necessary? */
if (ISZERO(&fe->fe_f2)) {
fpu_const(&fe->fe_f2, FPU_CONST_1);
return &fe->fe_f2;
}
return r;
fp = fpu_etox(fe);
CPYFPN(&x, fp);
fpu_const(&fe->fe_f1, FPU_CONST_1);
CPYFPN(&fe->fe_f2, fp);
fp = fpu_div(fe);
CPYFPN(&fe->fe_f1, fp);
CPYFPN(&fe->fe_f2, &x);
fp = fpu_add(fe);
fp->fp_exp--;
return fp;
}
/*
* exp(x) - exp(-x)
* sinh(x) = ------------------
* 2
*/
struct fpn *
fpu_sinh(struct fpemu *fe)
{
struct fpn s0;
struct fpn *r;
struct fpn x, *fp;
if (ISNAN(&fe->fe_f2))
return &fe->fe_f2;
@ -232,12 +192,28 @@ fpu_sinh(struct fpemu *fe)
if (ISZERO(&fe->fe_f2))
return &fe->fe_f2;
CPYFPN(&s0, &fe->fe_f2);
r = __fpu_sinhcosh_taylor(fe, &s0, 2);
fp = fpu_etox(fe);
CPYFPN(&x, fp);
return r;
fpu_const(&fe->fe_f1, FPU_CONST_1);
CPYFPN(&fe->fe_f2, fp);
fp = fpu_div(fe);
fp->fp_sign = 1;
CPYFPN(&fe->fe_f1, fp);
CPYFPN(&fe->fe_f2, &x);
fp = fpu_add(fe);
fp->fp_exp--;
return fp;
}
/*
* sinh(x)
* tanh(x) = ---------
* cosh(x)
*/
struct fpn *
fpu_tanh(struct fpemu *fe)
{