added a arch directory for GLibC (based on 2.2.5)

generic files are used by archs with source search feature of Jam when no specialized version exists


git-svn-id: file:///srv/svn/repos/haiku/trunk/current@11643 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
Jérôme Duval 2005-03-10 18:56:25 +00:00
parent cd623098da
commit c862c0e4a4
6 changed files with 965 additions and 0 deletions

View File

@ -0,0 +1,4 @@
SubDir OBOS_TOP src kernel libroot posix glibc arch ;
SubInclude OBOS_TOP src kernel libroot posix glibc arch $(OBOS_ARCH) ;

View File

@ -0,0 +1,56 @@
/* mpn_cmp -- Compare two low-level natural-number integers.
Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
MA 02111-1307, USA. */
#include "gmp.h"
#include "gmp-impl.h"
/* Compare OP1_PTR/OP1_SIZE with OP2_PTR/OP2_SIZE.
There are no restrictions on the relative sizes of
the two arguments.
Return 1 if OP1 > OP2, 0 if they are equal, and -1 if OP1 < OP2. */
int
#if __STDC__
mpn_cmp (mp_srcptr op1_ptr, mp_srcptr op2_ptr, mp_size_t size)
#else
mpn_cmp (op1_ptr, op2_ptr, size)
mp_srcptr op1_ptr;
mp_srcptr op2_ptr;
mp_size_t size;
#endif
{
mp_size_t i;
mp_limb_t op1_word, op2_word;
for (i = size - 1; i >= 0; i--)
{
op1_word = op1_ptr[i];
op2_word = op2_ptr[i];
if (op1_word != op2_word)
goto diff;
}
return 0;
diff:
/* This can *not* be simplified to
op2_word - op2_word
since that expression might give signed overflow. */
return (op1_word > op2_word) ? 1 : -1;
}

View File

@ -0,0 +1,107 @@
/* Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"
#include <ieee754.h>
#include <float.h>
#include <stdlib.h>
/* Convert a `double' in IEEE754 standard double-precision format to a
multi-precision integer representing the significand scaled up by its
number of bits (52 for double) and an integral power of two (MPN frexp). */
mp_size_t
__mpn_extract_double (mp_ptr res_ptr, mp_size_t size,
int *expt, int *is_neg,
double value)
{
union ieee754_double u;
u.d = value;
*is_neg = u.ieee.negative;
*expt = (int) u.ieee.exponent - IEEE754_DOUBLE_BIAS;
#if BITS_PER_MP_LIMB == 32
res_ptr[0] = u.ieee.mantissa1; /* Low-order 32 bits of fraction. */
res_ptr[1] = u.ieee.mantissa0; /* High-order 20 bits. */
#define N 2
#elif BITS_PER_MP_LIMB == 64
/* Hopefully the compiler will combine the two bitfield extracts
and this composition into just the original quadword extract. */
res_ptr[0] = ((unsigned long int) u.ieee.mantissa0 << 32) | u.ieee.mantissa1;
#define N 1
#else
#error "mp_limb size " BITS_PER_MP_LIMB "not accounted for"
#endif
/* The format does not fill the last limb. There are some zeros. */
#define NUM_LEADING_ZEROS (BITS_PER_MP_LIMB \
- (DBL_MANT_DIG - ((N - 1) * BITS_PER_MP_LIMB)))
if (u.ieee.exponent == 0)
{
/* A biased exponent of zero is a special case.
Either it is a zero or it is a denormal number. */
if (res_ptr[0] == 0 && res_ptr[N - 1] == 0) /* Assumes N<=2. */
/* It's zero. */
*expt = 0;
else
{
/* It is a denormal number, meaning it has no implicit leading
one bit, and its exponent is in fact the format minimum. */
int cnt;
if (res_ptr[N - 1] != 0)
{
count_leading_zeros (cnt, res_ptr[N - 1]);
cnt -= NUM_LEADING_ZEROS;
#if N == 2
res_ptr[N - 1] = res_ptr[1] << cnt
| (N - 1)
* (res_ptr[0] >> (BITS_PER_MP_LIMB - cnt));
res_ptr[0] <<= cnt;
#else
res_ptr[N - 1] <<= cnt;
#endif
*expt = DBL_MIN_EXP - 1 - cnt;
}
else
{
count_leading_zeros (cnt, res_ptr[0]);
if (cnt >= NUM_LEADING_ZEROS)
{
res_ptr[N - 1] = res_ptr[0] << (cnt - NUM_LEADING_ZEROS);
res_ptr[0] = 0;
}
else
{
res_ptr[N - 1] = res_ptr[0] >> (NUM_LEADING_ZEROS - cnt);
res_ptr[0] <<= BITS_PER_MP_LIMB - (NUM_LEADING_ZEROS - cnt);
}
*expt = DBL_MIN_EXP - 1
- (BITS_PER_MP_LIMB - NUM_LEADING_ZEROS) - cnt;
}
}
}
else
/* Add the implicit leading one bit for a normalized number. */
res_ptr[N - 1] |= 1L << (DBL_MANT_DIG - 1 - ((N - 1) * BITS_PER_MP_LIMB));
return N;
}

View File

@ -0,0 +1,245 @@
/* mpn_divrem -- Divide natural numbers, producing both remainder and
quotient.
Copyright (C) 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
MA 02111-1307, USA. */
#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"
/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
the NSIZE-DSIZE least significant quotient limbs at QP
and the DSIZE long remainder at NP. If QEXTRA_LIMBS is
non-zero, generate that many fraction bits and append them after the
other quotient limbs.
Return the most significant limb of the quotient, this is always 0 or 1.
Preconditions:
0. NSIZE >= DSIZE.
1. The most significant bit of the divisor must be set.
2. QP must either not overlap with the input operands at all, or
QP + DSIZE >= NP must hold true. (This means that it's
possible to put the quotient in the high part of NUM, right after the
remainder in NUM.
3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero. */
mp_limb_t
#if __STDC__
mpn_divrem (mp_ptr qp, mp_size_t qextra_limbs,
mp_ptr np, mp_size_t nsize,
mp_srcptr dp, mp_size_t dsize)
#else
mpn_divrem (qp, qextra_limbs, np, nsize, dp, dsize)
mp_ptr qp;
mp_size_t qextra_limbs;
mp_ptr np;
mp_size_t nsize;
mp_srcptr dp;
mp_size_t dsize;
#endif
{
mp_limb_t most_significant_q_limb = 0;
switch (dsize)
{
case 0:
/* We are asked to divide by zero, so go ahead and do it! (To make
the compiler not remove this statement, return the value.) */
return 1 / dsize;
case 1:
{
mp_size_t i;
mp_limb_t n1;
mp_limb_t d;
d = dp[0];
n1 = np[nsize - 1];
if (n1 >= d)
{
n1 -= d;
most_significant_q_limb = 1;
}
qp += qextra_limbs;
for (i = nsize - 2; i >= 0; i--)
udiv_qrnnd (qp[i], n1, n1, np[i], d);
qp -= qextra_limbs;
for (i = qextra_limbs - 1; i >= 0; i--)
udiv_qrnnd (qp[i], n1, n1, 0, d);
np[0] = n1;
}
break;
case 2:
{
mp_size_t i;
mp_limb_t n1, n0, n2;
mp_limb_t d1, d0;
np += nsize - 2;
d1 = dp[1];
d0 = dp[0];
n1 = np[1];
n0 = np[0];
if (n1 >= d1 && (n1 > d1 || n0 >= d0))
{
sub_ddmmss (n1, n0, n1, n0, d1, d0);
most_significant_q_limb = 1;
}
for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--)
{
mp_limb_t q;
mp_limb_t r;
if (i >= qextra_limbs)
np--;
else
np[0] = 0;
if (n1 == d1)
{
/* Q should be either 111..111 or 111..110. Need special
treatment of this rare case as normal division would
give overflow. */
q = ~(mp_limb_t) 0;
r = n0 + d1;
if (r < d1) /* Carry in the addition? */
{
add_ssaaaa (n1, n0, r - d0, np[0], 0, d0);
qp[i] = q;
continue;
}
n1 = d0 - (d0 != 0);
n0 = -d0;
}
else
{
udiv_qrnnd (q, r, n1, n0, d1);
umul_ppmm (n1, n0, d0, q);
}
n2 = np[0];
q_test:
if (n1 > r || (n1 == r && n0 > n2))
{
/* The estimated Q was too large. */
q--;
sub_ddmmss (n1, n0, n1, n0, 0, d0);
r += d1;
if (r >= d1) /* If not carry, test Q again. */
goto q_test;
}
qp[i] = q;
sub_ddmmss (n1, n0, r, n2, n1, n0);
}
np[1] = n1;
np[0] = n0;
}
break;
default:
{
mp_size_t i;
mp_limb_t dX, d1, n0;
np += nsize - dsize;
dX = dp[dsize - 1];
d1 = dp[dsize - 2];
n0 = np[dsize - 1];
if (n0 >= dX)
{
if (n0 > dX || mpn_cmp (np, dp, dsize - 1) >= 0)
{
mpn_sub_n (np, np, dp, dsize);
n0 = np[dsize - 1];
most_significant_q_limb = 1;
}
}
for (i = qextra_limbs + nsize - dsize - 1; i >= 0; i--)
{
mp_limb_t q;
mp_limb_t n1, n2;
mp_limb_t cy_limb;
if (i >= qextra_limbs)
{
np--;
n2 = np[dsize];
}
else
{
n2 = np[dsize - 1];
MPN_COPY_DECR (np + 1, np, dsize);
np[0] = 0;
}
if (n0 == dX)
/* This might over-estimate q, but it's probably not worth
the extra code here to find out. */
q = ~(mp_limb_t) 0;
else
{
mp_limb_t r;
udiv_qrnnd (q, r, n0, np[dsize - 1], dX);
umul_ppmm (n1, n0, d1, q);
while (n1 > r || (n1 == r && n0 > np[dsize - 2]))
{
q--;
r += dX;
if (r < dX) /* I.e. "carry in previous addition?" */
break;
n1 -= n0 < d1;
n0 -= d1;
}
}
/* Possible optimization: We already have (q * n0) and (1 * n1)
after the calculation of q. Taking advantage of that, we
could make this loop make two iterations less. */
cy_limb = mpn_submul_1 (np, dp, dsize, q);
if (n2 != cy_limb)
{
mpn_add_n (np, np, dp, dsize);
q--;
}
qp[i] = q;
n0 = np[dsize - 1];
}
}
}
return most_significant_q_limb;
}

View File

@ -0,0 +1,152 @@
/* mpn_mul -- Multiply two natural numbers.
Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
MA 02111-1307, USA. */
#include "gmp.h"
#include "gmp-impl.h"
/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
and v (pointed to by VP, with VSIZE limbs), and store the result at
PRODP. USIZE + VSIZE limbs are always stored, but if the input
operands are normalized. Return the most significant limb of the
result.
NOTE: The space pointed to by PRODP is overwritten before finished
with U and V, so overlap is an error.
Argument constraints:
1. USIZE >= VSIZE.
2. PRODP != UP and PRODP != VP, i.e. the destination
must be distinct from the multiplier and the multiplicand. */
/* If KARATSUBA_THRESHOLD is not already defined, define it to a
value which is good on most machines. */
#ifndef KARATSUBA_THRESHOLD
#define KARATSUBA_THRESHOLD 32
#endif
mp_limb_t
#if __STDC__
mpn_mul (mp_ptr prodp,
mp_srcptr up, mp_size_t usize,
mp_srcptr vp, mp_size_t vsize)
#else
mpn_mul (prodp, up, usize, vp, vsize)
mp_ptr prodp;
mp_srcptr up;
mp_size_t usize;
mp_srcptr vp;
mp_size_t vsize;
#endif
{
mp_ptr prod_endp = prodp + usize + vsize - 1;
mp_limb_t cy;
mp_ptr tspace;
TMP_DECL (marker);
if (vsize < KARATSUBA_THRESHOLD)
{
/* Handle simple cases with traditional multiplication.
This is the most critical code of the entire function. All
multiplies rely on this, both small and huge. Small ones arrive
here immediately. Huge ones arrive here as this is the base case
for Karatsuba's recursive algorithm below. */
mp_size_t i;
mp_limb_t cy_limb;
mp_limb_t v_limb;
if (vsize == 0)
return 0;
/* Multiply by the first limb in V separately, as the result can be
stored (not added) to PROD. We also avoid a loop for zeroing. */
v_limb = vp[0];
if (v_limb <= 1)
{
if (v_limb == 1)
MPN_COPY (prodp, up, usize);
else
MPN_ZERO (prodp, usize);
cy_limb = 0;
}
else
cy_limb = mpn_mul_1 (prodp, up, usize, v_limb);
prodp[usize] = cy_limb;
prodp++;
/* For each iteration in the outer loop, multiply one limb from
U with one limb from V, and add it to PROD. */
for (i = 1; i < vsize; i++)
{
v_limb = vp[i];
if (v_limb <= 1)
{
cy_limb = 0;
if (v_limb == 1)
cy_limb = mpn_add_n (prodp, prodp, up, usize);
}
else
cy_limb = mpn_addmul_1 (prodp, up, usize, v_limb);
prodp[usize] = cy_limb;
prodp++;
}
return cy_limb;
}
TMP_MARK (marker);
tspace = (mp_ptr) TMP_ALLOC (2 * vsize * BYTES_PER_MP_LIMB);
MPN_MUL_N_RECURSE (prodp, up, vp, vsize, tspace);
prodp += vsize;
up += vsize;
usize -= vsize;
if (usize >= vsize)
{
mp_ptr tp = (mp_ptr) TMP_ALLOC (2 * vsize * BYTES_PER_MP_LIMB);
do
{
MPN_MUL_N_RECURSE (tp, up, vp, vsize, tspace);
cy = mpn_add_n (prodp, prodp, tp, vsize);
mpn_add_1 (prodp + vsize, tp + vsize, vsize, cy);
prodp += vsize;
up += vsize;
usize -= vsize;
}
while (usize >= vsize);
}
/* True: usize < vsize. */
/* Make life simple: Recurse. */
if (usize != 0)
{
mpn_mul (tspace, vp, vsize, up, usize);
cy = mpn_add_n (prodp, prodp, tspace, vsize);
mpn_add_1 (prodp + vsize, tspace + vsize, usize, cy);
}
TMP_FREE (marker);
return *prod_endp;
}

View File

@ -0,0 +1,401 @@
/* mpn_mul_n -- Multiply two natural numbers of length n.
Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
MA 02111-1307, USA. */
#include "gmp.h"
#include "gmp-impl.h"
/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are
always stored. Return the most significant limb.
Argument constraints:
1. PRODP != UP and PRODP != VP, i.e. the destination
must be distinct from the multiplier and the multiplicand. */
/* If KARATSUBA_THRESHOLD is not already defined, define it to a
value which is good on most machines. */
#ifndef KARATSUBA_THRESHOLD
#define KARATSUBA_THRESHOLD 32
#endif
/* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */
#if KARATSUBA_THRESHOLD < 2
#undef KARATSUBA_THRESHOLD
#define KARATSUBA_THRESHOLD 2
#endif
/* Handle simple cases with traditional multiplication.
This is the most critical code of multiplication. All multiplies rely
on this, both small and huge. Small ones arrive here immediately. Huge
ones arrive here as this is the base case for Karatsuba's recursive
algorithm below. */
void
#if __STDC__
impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
#else
impn_mul_n_basecase (prodp, up, vp, size)
mp_ptr prodp;
mp_srcptr up;
mp_srcptr vp;
mp_size_t size;
#endif
{
mp_size_t i;
mp_limb_t cy_limb;
mp_limb_t v_limb;
/* Multiply by the first limb in V separately, as the result can be
stored (not added) to PROD. We also avoid a loop for zeroing. */
v_limb = vp[0];
if (v_limb <= 1)
{
if (v_limb == 1)
MPN_COPY (prodp, up, size);
else
MPN_ZERO (prodp, size);
cy_limb = 0;
}
else
cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
prodp[size] = cy_limb;
prodp++;
/* For each iteration in the outer loop, multiply one limb from
U with one limb from V, and add it to PROD. */
for (i = 1; i < size; i++)
{
v_limb = vp[i];
if (v_limb <= 1)
{
cy_limb = 0;
if (v_limb == 1)
cy_limb = mpn_add_n (prodp, prodp, up, size);
}
else
cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
prodp[size] = cy_limb;
prodp++;
}
}
void
#if __STDC__
impn_mul_n (mp_ptr prodp,
mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
#else
impn_mul_n (prodp, up, vp, size, tspace)
mp_ptr prodp;
mp_srcptr up;
mp_srcptr vp;
mp_size_t size;
mp_ptr tspace;
#endif
{
if ((size & 1) != 0)
{
/* The size is odd, the code code below doesn't handle that.
Multiply the least significant (size - 1) limbs with a recursive
call, and handle the most significant limb of S1 and S2
separately. */
/* A slightly faster way to do this would be to make the Karatsuba
code below behave as if the size were even, and let it check for
odd size in the end. I.e., in essence move this code to the end.
Doing so would save us a recursive call, and potentially make the
stack grow a lot less. */
mp_size_t esize = size - 1; /* even size */
mp_limb_t cy_limb;
MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
prodp[esize + esize] = cy_limb;
cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
prodp[esize + size] = cy_limb;
}
else
{
/* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
Split U in two pieces, U1 and U0, such that
U = U0 + U1*(B**n),
and V in V1 and V0, such that
V = V0 + V1*(B**n).
UV is then computed recursively using the identity
2n n n n
UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V
1 1 1 0 0 1 0 0
Where B = 2**BITS_PER_MP_LIMB. */
mp_size_t hsize = size >> 1;
mp_limb_t cy;
int negflg;
/*** Product H. ________________ ________________
|_____U1 x V1____||____U0 x V0_____| */
/* Put result in upper part of PROD and pass low part of TSPACE
as new TSPACE. */
MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace);
/*** Product M. ________________
|_(U1-U0)(V0-V1)_| */
if (mpn_cmp (up + hsize, up, hsize) >= 0)
{
mpn_sub_n (prodp, up + hsize, up, hsize);
negflg = 0;
}
else
{
mpn_sub_n (prodp, up, up + hsize, hsize);
negflg = 1;
}
if (mpn_cmp (vp + hsize, vp, hsize) >= 0)
{
mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
negflg ^= 1;
}
else
{
mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
/* No change of NEGFLG. */
}
/* Read temporary operands from low part of PROD.
Put result in low part of TSPACE using upper part of TSPACE
as new TSPACE. */
MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size);
/*** Add/copy product H. */
MPN_COPY (prodp + hsize, prodp + size, hsize);
cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
/*** Add product M (if NEGFLG M is a negative number). */
if (negflg)
cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
else
cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
/*** Product L. ________________ ________________
|________________||____U0 x V0_____| */
/* Read temporary operands from low part of PROD.
Put result in low part of TSPACE using upper part of TSPACE
as new TSPACE. */
MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size);
/*** Add/copy Product L (twice). */
cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
if (cy)
mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
MPN_COPY (prodp, tspace, hsize);
cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
if (cy)
mpn_add_1 (prodp + size, prodp + size, size, 1);
}
}
void
#if __STDC__
impn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size)
#else
impn_sqr_n_basecase (prodp, up, size)
mp_ptr prodp;
mp_srcptr up;
mp_size_t size;
#endif
{
mp_size_t i;
mp_limb_t cy_limb;
mp_limb_t v_limb;
/* Multiply by the first limb in V separately, as the result can be
stored (not added) to PROD. We also avoid a loop for zeroing. */
v_limb = up[0];
if (v_limb <= 1)
{
if (v_limb == 1)
MPN_COPY (prodp, up, size);
else
MPN_ZERO (prodp, size);
cy_limb = 0;
}
else
cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
prodp[size] = cy_limb;
prodp++;
/* For each iteration in the outer loop, multiply one limb from
U with one limb from V, and add it to PROD. */
for (i = 1; i < size; i++)
{
v_limb = up[i];
if (v_limb <= 1)
{
cy_limb = 0;
if (v_limb == 1)
cy_limb = mpn_add_n (prodp, prodp, up, size);
}
else
cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
prodp[size] = cy_limb;
prodp++;
}
}
void
#if __STDC__
impn_sqr_n (mp_ptr prodp,
mp_srcptr up, mp_size_t size, mp_ptr tspace)
#else
impn_sqr_n (prodp, up, size, tspace)
mp_ptr prodp;
mp_srcptr up;
mp_size_t size;
mp_ptr tspace;
#endif
{
if ((size & 1) != 0)
{
/* The size is odd, the code code below doesn't handle that.
Multiply the least significant (size - 1) limbs with a recursive
call, and handle the most significant limb of S1 and S2
separately. */
/* A slightly faster way to do this would be to make the Karatsuba
code below behave as if the size were even, and let it check for
odd size in the end. I.e., in essence move this code to the end.
Doing so would save us a recursive call, and potentially make the
stack grow a lot less. */
mp_size_t esize = size - 1; /* even size */
mp_limb_t cy_limb;
MPN_SQR_N_RECURSE (prodp, up, esize, tspace);
cy_limb = mpn_addmul_1 (prodp + esize, up, esize, up[esize]);
prodp[esize + esize] = cy_limb;
cy_limb = mpn_addmul_1 (prodp + esize, up, size, up[esize]);
prodp[esize + size] = cy_limb;
}
else
{
mp_size_t hsize = size >> 1;
mp_limb_t cy;
/*** Product H. ________________ ________________
|_____U1 x U1____||____U0 x U0_____| */
/* Put result in upper part of PROD and pass low part of TSPACE
as new TSPACE. */
MPN_SQR_N_RECURSE (prodp + size, up + hsize, hsize, tspace);
/*** Product M. ________________
|_(U1-U0)(U0-U1)_| */
if (mpn_cmp (up + hsize, up, hsize) >= 0)
{
mpn_sub_n (prodp, up + hsize, up, hsize);
}
else
{
mpn_sub_n (prodp, up, up + hsize, hsize);
}
/* Read temporary operands from low part of PROD.
Put result in low part of TSPACE using upper part of TSPACE
as new TSPACE. */
MPN_SQR_N_RECURSE (tspace, prodp, hsize, tspace + size);
/*** Add/copy product H. */
MPN_COPY (prodp + hsize, prodp + size, hsize);
cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
/*** Add product M (if NEGFLG M is a negative number). */
cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
/*** Product L. ________________ ________________
|________________||____U0 x U0_____| */
/* Read temporary operands from low part of PROD.
Put result in low part of TSPACE using upper part of TSPACE
as new TSPACE. */
MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
/*** Add/copy Product L (twice). */
cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
if (cy)
mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
MPN_COPY (prodp, tspace, hsize);
cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
if (cy)
mpn_add_1 (prodp + size, prodp + size, size, 1);
}
}
/* This should be made into an inline function in gmp.h. */
void
#if __STDC__
mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
#else
mpn_mul_n (prodp, up, vp, size)
mp_ptr prodp;
mp_srcptr up;
mp_srcptr vp;
mp_size_t size;
#endif
{
TMP_DECL (marker);
TMP_MARK (marker);
if (up == vp)
{
if (size < KARATSUBA_THRESHOLD)
{
impn_sqr_n_basecase (prodp, up, size);
}
else
{
mp_ptr tspace;
tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
impn_sqr_n (prodp, up, size, tspace);
}
}
else
{
if (size < KARATSUBA_THRESHOLD)
{
impn_mul_n_basecase (prodp, up, vp, size);
}
else
{
mp_ptr tspace;
tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
impn_mul_n (prodp, up, vp, size, tspace);
}
}
TMP_FREE (marker);
}