added a arch directory for GLibC (based on 2.2.5)

generic files are used by archs with source search feature of Jam when no specialized version exists git-svn-id: file:///srv/svn/repos/haiku/trunk/current@11643 a95241bf-73f2-0310-859d-f6bbb57e9c96
2005-03-10 18:56:25 +00:00 · 2005-03-10 18:56:25 +00:00 · c862c0e4a4
commit c862c0e4a4
parent cd623098da
6 changed files with 965 additions and 0 deletions
--- a/src/kernel/libroot/posix/glibc/arch/Jamfile
+++ b/src/kernel/libroot/posix/glibc/arch/Jamfile
@ -0,0 +1,4 @@
+SubDir OBOS_TOP src kernel libroot posix glibc arch ;
+
+SubInclude OBOS_TOP src kernel libroot posix glibc arch $(OBOS_ARCH) ;
+
--- a/src/kernel/libroot/posix/glibc/arch/cmp.c
+++ b/src/kernel/libroot/posix/glibc/arch/cmp.c
@ -0,0 +1,56 @@
+/* mpn_cmp -- Compare two low-level natural-number integers.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Compare OP1_PTR/OP1_SIZE with OP2_PTR/OP2_SIZE.
+   There are no restrictions on the relative sizes of
+   the two arguments.
+   Return 1 if OP1 > OP2, 0 if they are equal, and -1 if OP1 < OP2.  */
+
+int
+#if __STDC__
+mpn_cmp (mp_srcptr op1_ptr, mp_srcptr op2_ptr, mp_size_t size)
+#else
+mpn_cmp (op1_ptr, op2_ptr, size)
+     mp_srcptr op1_ptr;
+     mp_srcptr op2_ptr;
+     mp_size_t size;
+#endif
+{
+  mp_size_t i;
+  mp_limb_t op1_word, op2_word;
+
+  for (i = size - 1; i >= 0; i--)
+    {
+      op1_word = op1_ptr[i];
+      op2_word = op2_ptr[i];
+      if (op1_word != op2_word)
+	goto diff;
+    }
+  return 0;
+ diff:
+  /* This can *not* be simplified to
+	op2_word - op2_word
+     since that expression might give signed overflow.  */
+  return (op1_word > op2_word) ? 1 : -1;
+}
--- a/src/kernel/libroot/posix/glibc/arch/dbl2mpn.c
+++ b/src/kernel/libroot/posix/glibc/arch/dbl2mpn.c
@ -0,0 +1,107 @@
+/* Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#include <ieee754.h>
+#include <float.h>
+#include <stdlib.h>
+
+/* Convert a `double' in IEEE754 standard double-precision format to a
+   multi-precision integer representing the significand scaled up by its
+   number of bits (52 for double) and an integral power of two (MPN frexp). */
+
+mp_size_t
+__mpn_extract_double (mp_ptr res_ptr, mp_size_t size,
+		      int *expt, int *is_neg,
+		      double value)
+{
+  union ieee754_double u;
+  u.d = value;
+
+  *is_neg = u.ieee.negative;
+  *expt = (int) u.ieee.exponent - IEEE754_DOUBLE_BIAS;
+
+#if BITS_PER_MP_LIMB == 32
+  res_ptr[0] = u.ieee.mantissa1; /* Low-order 32 bits of fraction.  */
+  res_ptr[1] = u.ieee.mantissa0; /* High-order 20 bits.  */
+  #define N 2
+#elif BITS_PER_MP_LIMB == 64
+  /* Hopefully the compiler will combine the two bitfield extracts
+     and this composition into just the original quadword extract.  */
+  res_ptr[0] = ((unsigned long int) u.ieee.mantissa0 << 32) | u.ieee.mantissa1;
+  #define N 1
+#else
+  #error "mp_limb size " BITS_PER_MP_LIMB "not accounted for"
+#endif
+/* The format does not fill the last limb.  There are some zeros.  */
+#define NUM_LEADING_ZEROS (BITS_PER_MP_LIMB \
+			   - (DBL_MANT_DIG - ((N - 1) * BITS_PER_MP_LIMB)))
+
+  if (u.ieee.exponent == 0)
+    {
+      /* A biased exponent of zero is a special case.
+	 Either it is a zero or it is a denormal number.  */
+      if (res_ptr[0] == 0 && res_ptr[N - 1] == 0) /* Assumes N<=2.  */
+	/* It's zero.  */
+	*expt = 0;
+      else
+	{
+          /* It is a denormal number, meaning it has no implicit leading
+	     one bit, and its exponent is in fact the format minimum.  */
+	  int cnt;
+
+	  if (res_ptr[N - 1] != 0)
+	    {
+	      count_leading_zeros (cnt, res_ptr[N - 1]);
+	      cnt -= NUM_LEADING_ZEROS;
+#if N == 2
+	      res_ptr[N - 1] = res_ptr[1] << cnt
+			       | (N - 1)
+			         * (res_ptr[0] >> (BITS_PER_MP_LIMB - cnt));
+	      res_ptr[0] <<= cnt;
+#else
+	      res_ptr[N - 1] <<= cnt;
+#endif
+	      *expt = DBL_MIN_EXP - 1 - cnt;
+	    }
+	  else
+	    {
+	      count_leading_zeros (cnt, res_ptr[0]);
+	      if (cnt >= NUM_LEADING_ZEROS)
+		{
+		  res_ptr[N - 1] = res_ptr[0] << (cnt - NUM_LEADING_ZEROS);
+		  res_ptr[0] = 0;
+		}
+	      else
+		{
+		  res_ptr[N - 1] = res_ptr[0] >> (NUM_LEADING_ZEROS - cnt);
+		  res_ptr[0] <<= BITS_PER_MP_LIMB - (NUM_LEADING_ZEROS - cnt);
+		}
+	      *expt = DBL_MIN_EXP - 1
+		      - (BITS_PER_MP_LIMB - NUM_LEADING_ZEROS) - cnt;
+	    }
+	}
+    }
+  else
+    /* Add the implicit leading one bit for a normalized number.  */
+    res_ptr[N - 1] |= 1L << (DBL_MANT_DIG - 1 - ((N - 1) * BITS_PER_MP_LIMB));
+
+  return N;
+}
--- a/src/kernel/libroot/posix/glibc/arch/divrem.c
+++ b/src/kernel/libroot/posix/glibc/arch/divrem.c
@ -0,0 +1,245 @@
+/* mpn_divrem -- Divide natural numbers, producing both remainder and
+   quotient.
+
+Copyright (C) 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
+   the NSIZE-DSIZE least significant quotient limbs at QP
+   and the DSIZE long remainder at NP.  If QEXTRA_LIMBS is
+   non-zero, generate that many fraction bits and append them after the
+   other quotient limbs.
+   Return the most significant limb of the quotient, this is always 0 or 1.
+
+   Preconditions:
+   0. NSIZE >= DSIZE.
+   1. The most significant bit of the divisor must be set.
+   2. QP must either not overlap with the input operands at all, or
+      QP + DSIZE >= NP must hold true.  (This means that it's
+      possible to put the quotient in the high part of NUM, right after the
+      remainder in NUM.
+   3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero.  */
+
+mp_limb_t
+#if __STDC__
+mpn_divrem (mp_ptr qp, mp_size_t qextra_limbs,
+	    mp_ptr np, mp_size_t nsize,
+	    mp_srcptr dp, mp_size_t dsize)
+#else
+mpn_divrem (qp, qextra_limbs, np, nsize, dp, dsize)
+     mp_ptr qp;
+     mp_size_t qextra_limbs;
+     mp_ptr np;
+     mp_size_t nsize;
+     mp_srcptr dp;
+     mp_size_t dsize;
+#endif
+{
+  mp_limb_t most_significant_q_limb = 0;
+
+  switch (dsize)
+    {
+    case 0:
+      /* We are asked to divide by zero, so go ahead and do it!  (To make
+	 the compiler not remove this statement, return the value.)  */
+      return 1 / dsize;
+
+    case 1:
+      {
+	mp_size_t i;
+	mp_limb_t n1;
+	mp_limb_t d;
+
+	d = dp[0];
+	n1 = np[nsize - 1];
+
+	if (n1 >= d)
+	  {
+	    n1 -= d;
+	    most_significant_q_limb = 1;
+	  }
+
+	qp += qextra_limbs;
+	for (i = nsize - 2; i >= 0; i--)
+	  udiv_qrnnd (qp[i], n1, n1, np[i], d);
+	qp -= qextra_limbs;
+
+	for (i = qextra_limbs - 1; i >= 0; i--)
+	  udiv_qrnnd (qp[i], n1, n1, 0, d);
+
+	np[0] = n1;
+      }
+      break;
+
+    case 2:
+      {
+	mp_size_t i;
+	mp_limb_t n1, n0, n2;
+	mp_limb_t d1, d0;
+
+	np += nsize - 2;
+	d1 = dp[1];
+	d0 = dp[0];
+	n1 = np[1];
+	n0 = np[0];
+
+	if (n1 >= d1 && (n1 > d1 || n0 >= d0))
+	  {
+	    sub_ddmmss (n1, n0, n1, n0, d1, d0);
+	    most_significant_q_limb = 1;
+	  }
+
+	for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--)
+	  {
+	    mp_limb_t q;
+	    mp_limb_t r;
+
+	    if (i >= qextra_limbs)
+	      np--;
+	    else
+	      np[0] = 0;
+
+	    if (n1 == d1)
+	      {
+		/* Q should be either 111..111 or 111..110.  Need special
+		   treatment of this rare case as normal division would
+		   give overflow.  */
+		q = ~(mp_limb_t) 0;
+
+		r = n0 + d1;
+		if (r < d1)	/* Carry in the addition? */
+		  {
+		    add_ssaaaa (n1, n0, r - d0, np[0], 0, d0);
+		    qp[i] = q;
+		    continue;
+		  }
+		n1 = d0 - (d0 != 0);
+		n0 = -d0;
+	      }
+	    else
+	      {
+		udiv_qrnnd (q, r, n1, n0, d1);
+		umul_ppmm (n1, n0, d0, q);
+	      }
+
+	    n2 = np[0];
+	  q_test:
+	    if (n1 > r || (n1 == r && n0 > n2))
+	      {
+		/* The estimated Q was too large.  */
+		q--;
+
+		sub_ddmmss (n1, n0, n1, n0, 0, d0);
+		r += d1;
+		if (r >= d1)	/* If not carry, test Q again.  */
+		  goto q_test;
+	      }
+
+	    qp[i] = q;
+	    sub_ddmmss (n1, n0, r, n2, n1, n0);
+	  }
+	np[1] = n1;
+	np[0] = n0;
+      }
+      break;
+
+    default:
+      {
+	mp_size_t i;
+	mp_limb_t dX, d1, n0;
+
+	np += nsize - dsize;
+	dX = dp[dsize - 1];
+	d1 = dp[dsize - 2];
+	n0 = np[dsize - 1];
+
+	if (n0 >= dX)
+	  {
+	    if (n0 > dX || mpn_cmp (np, dp, dsize - 1) >= 0)
+	      {
+		mpn_sub_n (np, np, dp, dsize);
+		n0 = np[dsize - 1];
+		most_significant_q_limb = 1;
+	      }
+	  }
+
+	for (i = qextra_limbs + nsize - dsize - 1; i >= 0; i--)
+	  {
+	    mp_limb_t q;
+	    mp_limb_t n1, n2;
+	    mp_limb_t cy_limb;
+
+	    if (i >= qextra_limbs)
+	      {
+		np--;
+		n2 = np[dsize];
+	      }
+	    else
+	      {
+		n2 = np[dsize - 1];
+		MPN_COPY_DECR (np + 1, np, dsize);
+		np[0] = 0;
+	      }
+
+	    if (n0 == dX)
+	      /* This might over-estimate q, but it's probably not worth
+		 the extra code here to find out.  */
+	      q = ~(mp_limb_t) 0;
+	    else
+	      {
+		mp_limb_t r;
+
+		udiv_qrnnd (q, r, n0, np[dsize - 1], dX);
+		umul_ppmm (n1, n0, d1, q);
+
+		while (n1 > r || (n1 == r && n0 > np[dsize - 2]))
+		  {
+		    q--;
+		    r += dX;
+		    if (r < dX)	/* I.e. "carry in previous addition?"  */
+		      break;
+		    n1 -= n0 < d1;
+		    n0 -= d1;
+		  }
+	      }
+
+	    /* Possible optimization: We already have (q * n0) and (1 * n1)
+	       after the calculation of q.  Taking advantage of that, we
+	       could make this loop make two iterations less.  */
+
+	    cy_limb = mpn_submul_1 (np, dp, dsize, q);
+
+	    if (n2 != cy_limb)
+	      {
+		mpn_add_n (np, np, dp, dsize);
+		q--;
+	      }
+
+	    qp[i] = q;
+	    n0 = np[dsize - 1];
+	  }
+      }
+    }
+
+  return most_significant_q_limb;
+}
--- a/src/kernel/libroot/posix/glibc/arch/mul.c
+++ b/src/kernel/libroot/posix/glibc/arch/mul.c
@ -0,0 +1,152 @@
+/* mpn_mul -- Multiply two natural numbers.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
+   and v (pointed to by VP, with VSIZE limbs), and store the result at
+   PRODP.  USIZE + VSIZE limbs are always stored, but if the input
+   operands are normalized.  Return the most significant limb of the
+   result.
+
+   NOTE: The space pointed to by PRODP is overwritten before finished
+   with U and V, so overlap is an error.
+
+   Argument constraints:
+   1. USIZE >= VSIZE.
+   2. PRODP != UP and PRODP != VP, i.e. the destination
+      must be distinct from the multiplier and the multiplicand.  */
+
+/* If KARATSUBA_THRESHOLD is not already defined, define it to a
+   value which is good on most machines.  */
+#ifndef KARATSUBA_THRESHOLD
+#define KARATSUBA_THRESHOLD 32
+#endif
+
+mp_limb_t
+#if __STDC__
+mpn_mul (mp_ptr prodp,
+	 mp_srcptr up, mp_size_t usize,
+	 mp_srcptr vp, mp_size_t vsize)
+#else
+mpn_mul (prodp, up, usize, vp, vsize)
+     mp_ptr prodp;
+     mp_srcptr up;
+     mp_size_t usize;
+     mp_srcptr vp;
+     mp_size_t vsize;
+#endif
+{
+  mp_ptr prod_endp = prodp + usize + vsize - 1;
+  mp_limb_t cy;
+  mp_ptr tspace;
+  TMP_DECL (marker);
+
+  if (vsize < KARATSUBA_THRESHOLD)
+    {
+      /* Handle simple cases with traditional multiplication.
+
+	 This is the most critical code of the entire function.  All
+	 multiplies rely on this, both small and huge.  Small ones arrive
+	 here immediately.  Huge ones arrive here as this is the base case
+	 for Karatsuba's recursive algorithm below.  */
+      mp_size_t i;
+      mp_limb_t cy_limb;
+      mp_limb_t v_limb;
+
+      if (vsize == 0)
+	return 0;
+
+      /* Multiply by the first limb in V separately, as the result can be
+	 stored (not added) to PROD.  We also avoid a loop for zeroing.  */
+      v_limb = vp[0];
+      if (v_limb <= 1)
+	{
+	  if (v_limb == 1)
+	    MPN_COPY (prodp, up, usize);
+	  else
+	    MPN_ZERO (prodp, usize);
+	  cy_limb = 0;
+	}
+      else
+	cy_limb = mpn_mul_1 (prodp, up, usize, v_limb);
+
+      prodp[usize] = cy_limb;
+      prodp++;
+
+      /* For each iteration in the outer loop, multiply one limb from
+	 U with one limb from V, and add it to PROD.  */
+      for (i = 1; i < vsize; i++)
+	{
+	  v_limb = vp[i];
+	  if (v_limb <= 1)
+	    {
+	      cy_limb = 0;
+	      if (v_limb == 1)
+		cy_limb = mpn_add_n (prodp, prodp, up, usize);
+	    }
+	  else
+	    cy_limb = mpn_addmul_1 (prodp, up, usize, v_limb);
+
+	  prodp[usize] = cy_limb;
+	  prodp++;
+	}
+      return cy_limb;
+    }
+
+  TMP_MARK (marker);
+
+  tspace = (mp_ptr) TMP_ALLOC (2 * vsize * BYTES_PER_MP_LIMB);
+  MPN_MUL_N_RECURSE (prodp, up, vp, vsize, tspace);
+
+  prodp += vsize;
+  up += vsize;
+  usize -= vsize;
+  if (usize >= vsize)
+    {
+      mp_ptr tp = (mp_ptr) TMP_ALLOC (2 * vsize * BYTES_PER_MP_LIMB);
+      do
+	{
+	  MPN_MUL_N_RECURSE (tp, up, vp, vsize, tspace);
+	  cy = mpn_add_n (prodp, prodp, tp, vsize);
+	  mpn_add_1 (prodp + vsize, tp + vsize, vsize, cy);
+	  prodp += vsize;
+	  up += vsize;
+	  usize -= vsize;
+	}
+      while (usize >= vsize);
+    }
+
+  /* True: usize < vsize.  */
+
+  /* Make life simple: Recurse.  */
+
+  if (usize != 0)
+    {
+      mpn_mul (tspace, vp, vsize, up, usize);
+      cy = mpn_add_n (prodp, prodp, tspace, vsize);
+      mpn_add_1 (prodp + vsize, tspace + vsize, usize, cy);
+    }
+
+  TMP_FREE (marker);
+  return *prod_endp;
+}
--- a/src/kernel/libroot/posix/glibc/arch/mul_n.c
+++ b/src/kernel/libroot/posix/glibc/arch/mul_n.c
@ -0,0 +1,401 @@
+/* mpn_mul_n -- Multiply two natural numbers of length n.
+
+Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
+   both with SIZE limbs, and store the result at PRODP.  2 * SIZE limbs are
+   always stored.  Return the most significant limb.
+
+   Argument constraints:
+   1. PRODP != UP and PRODP != VP, i.e. the destination
+      must be distinct from the multiplier and the multiplicand.  */
+
+/* If KARATSUBA_THRESHOLD is not already defined, define it to a
+   value which is good on most machines.  */
+#ifndef KARATSUBA_THRESHOLD
+#define KARATSUBA_THRESHOLD 32
+#endif
+
+/* The code can't handle KARATSUBA_THRESHOLD smaller than 2.  */
+#if KARATSUBA_THRESHOLD < 2
+#undef KARATSUBA_THRESHOLD
+#define KARATSUBA_THRESHOLD 2
+#endif
+
+/* Handle simple cases with traditional multiplication.
+
+   This is the most critical code of multiplication.  All multiplies rely
+   on this, both small and huge.  Small ones arrive here immediately.  Huge
+   ones arrive here as this is the base case for Karatsuba's recursive
+   algorithm below.  */
+
+void
+#if __STDC__
+impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
+#else
+impn_mul_n_basecase (prodp, up, vp, size)
+     mp_ptr prodp;
+     mp_srcptr up;
+     mp_srcptr vp;
+     mp_size_t size;
+#endif
+{
+  mp_size_t i;
+  mp_limb_t cy_limb;
+  mp_limb_t v_limb;
+
+  /* Multiply by the first limb in V separately, as the result can be
+     stored (not added) to PROD.  We also avoid a loop for zeroing.  */
+  v_limb = vp[0];
+  if (v_limb <= 1)
+    {
+      if (v_limb == 1)
+	MPN_COPY (prodp, up, size);
+      else
+	MPN_ZERO (prodp, size);
+      cy_limb = 0;
+    }
+  else
+    cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
+
+  prodp[size] = cy_limb;
+  prodp++;
+
+  /* For each iteration in the outer loop, multiply one limb from
+     U with one limb from V, and add it to PROD.  */
+  for (i = 1; i < size; i++)
+    {
+      v_limb = vp[i];
+      if (v_limb <= 1)
+	{
+	  cy_limb = 0;
+	  if (v_limb == 1)
+	    cy_limb = mpn_add_n (prodp, prodp, up, size);
+	}
+      else
+	cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
+
+      prodp[size] = cy_limb;
+      prodp++;
+    }
+}
+
+void
+#if __STDC__
+impn_mul_n (mp_ptr prodp,
+	     mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
+#else
+impn_mul_n (prodp, up, vp, size, tspace)
+     mp_ptr prodp;
+     mp_srcptr up;
+     mp_srcptr vp;
+     mp_size_t size;
+     mp_ptr tspace;
+#endif
+{
+  if ((size & 1) != 0)
+    {
+      /* The size is odd, the code code below doesn't handle that.
+	 Multiply the least significant (size - 1) limbs with a recursive
+	 call, and handle the most significant limb of S1 and S2
+	 separately.  */
+      /* A slightly faster way to do this would be to make the Karatsuba
+	 code below behave as if the size were even, and let it check for
+	 odd size in the end.  I.e., in essence move this code to the end.
+	 Doing so would save us a recursive call, and potentially make the
+	 stack grow a lot less.  */
+
+      mp_size_t esize = size - 1;	/* even size */
+      mp_limb_t cy_limb;
+
+      MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
+      cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
+      prodp[esize + esize] = cy_limb;
+      cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
+
+      prodp[esize + size] = cy_limb;
+    }
+  else
+    {
+      /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
+
+	 Split U in two pieces, U1 and U0, such that
+	 U = U0 + U1*(B**n),
+	 and V in V1 and V0, such that
+	 V = V0 + V1*(B**n).
+
+	 UV is then computed recursively using the identity
+
+		2n   n          n                     n
+	 UV = (B  + B )U V  +  B (U -U )(V -V )  +  (B + 1)U V
+			1 1        1  0   0  1              0 0
+
+	 Where B = 2**BITS_PER_MP_LIMB.  */
+
+      mp_size_t hsize = size >> 1;
+      mp_limb_t cy;
+      int negflg;
+
+      /*** Product H.	 ________________  ________________
+			|_____U1 x V1____||____U0 x V0_____|  */
+      /* Put result in upper part of PROD and pass low part of TSPACE
+	 as new TSPACE.  */
+      MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace);
+
+      /*** Product M.	 ________________
+			|_(U1-U0)(V0-V1)_|  */
+      if (mpn_cmp (up + hsize, up, hsize) >= 0)
+	{
+	  mpn_sub_n (prodp, up + hsize, up, hsize);
+	  negflg = 0;
+	}
+      else
+	{
+	  mpn_sub_n (prodp, up, up + hsize, hsize);
+	  negflg = 1;
+	}
+      if (mpn_cmp (vp + hsize, vp, hsize) >= 0)
+	{
+	  mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
+	  negflg ^= 1;
+	}
+      else
+	{
+	  mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
+	  /* No change of NEGFLG.  */
+	}
+      /* Read temporary operands from low part of PROD.
+	 Put result in low part of TSPACE using upper part of TSPACE
+	 as new TSPACE.  */
+      MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size);
+
+      /*** Add/copy product H.  */
+      MPN_COPY (prodp + hsize, prodp + size, hsize);
+      cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
+
+      /*** Add product M (if NEGFLG M is a negative number).  */
+      if (negflg)
+	cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
+      else
+	cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
+
+      /*** Product L.	 ________________  ________________
+			|________________||____U0 x V0_____|  */
+      /* Read temporary operands from low part of PROD.
+	 Put result in low part of TSPACE using upper part of TSPACE
+	 as new TSPACE.  */
+      MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size);
+
+      /*** Add/copy Product L (twice).  */
+
+      cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
+      if (cy)
+	mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
+
+      MPN_COPY (prodp, tspace, hsize);
+      cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
+      if (cy)
+	mpn_add_1 (prodp + size, prodp + size, size, 1);
+    }
+}
+
+void
+#if __STDC__
+impn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size)
+#else
+impn_sqr_n_basecase (prodp, up, size)
+     mp_ptr prodp;
+     mp_srcptr up;
+     mp_size_t size;
+#endif
+{
+  mp_size_t i;
+  mp_limb_t cy_limb;
+  mp_limb_t v_limb;
+
+  /* Multiply by the first limb in V separately, as the result can be
+     stored (not added) to PROD.  We also avoid a loop for zeroing.  */
+  v_limb = up[0];
+  if (v_limb <= 1)
+    {
+      if (v_limb == 1)
+	MPN_COPY (prodp, up, size);
+      else
+	MPN_ZERO (prodp, size);
+      cy_limb = 0;
+    }
+  else
+    cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
+
+  prodp[size] = cy_limb;
+  prodp++;
+
+  /* For each iteration in the outer loop, multiply one limb from
+     U with one limb from V, and add it to PROD.  */
+  for (i = 1; i < size; i++)
+    {
+      v_limb = up[i];
+      if (v_limb <= 1)
+	{
+	  cy_limb = 0;
+	  if (v_limb == 1)
+	    cy_limb = mpn_add_n (prodp, prodp, up, size);
+	}
+      else
+	cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
+
+      prodp[size] = cy_limb;
+      prodp++;
+    }
+}
+
+void
+#if __STDC__
+impn_sqr_n (mp_ptr prodp,
+	     mp_srcptr up, mp_size_t size, mp_ptr tspace)
+#else
+impn_sqr_n (prodp, up, size, tspace)
+     mp_ptr prodp;
+     mp_srcptr up;
+     mp_size_t size;
+     mp_ptr tspace;
+#endif
+{
+  if ((size & 1) != 0)
+    {
+      /* The size is odd, the code code below doesn't handle that.
+	 Multiply the least significant (size - 1) limbs with a recursive
+	 call, and handle the most significant limb of S1 and S2
+	 separately.  */
+      /* A slightly faster way to do this would be to make the Karatsuba
+	 code below behave as if the size were even, and let it check for
+	 odd size in the end.  I.e., in essence move this code to the end.
+	 Doing so would save us a recursive call, and potentially make the
+	 stack grow a lot less.  */
+
+      mp_size_t esize = size - 1;	/* even size */
+      mp_limb_t cy_limb;
+
+      MPN_SQR_N_RECURSE (prodp, up, esize, tspace);
+      cy_limb = mpn_addmul_1 (prodp + esize, up, esize, up[esize]);
+      prodp[esize + esize] = cy_limb;
+      cy_limb = mpn_addmul_1 (prodp + esize, up, size, up[esize]);
+
+      prodp[esize + size] = cy_limb;
+    }
+  else
+    {
+      mp_size_t hsize = size >> 1;
+      mp_limb_t cy;
+
+      /*** Product H.	 ________________  ________________
+			|_____U1 x U1____||____U0 x U0_____|  */
+      /* Put result in upper part of PROD and pass low part of TSPACE
+	 as new TSPACE.  */
+      MPN_SQR_N_RECURSE (prodp + size, up + hsize, hsize, tspace);
+
+      /*** Product M.	 ________________
+			|_(U1-U0)(U0-U1)_|  */
+      if (mpn_cmp (up + hsize, up, hsize) >= 0)
+	{
+	  mpn_sub_n (prodp, up + hsize, up, hsize);
+	}
+      else
+	{
+	  mpn_sub_n (prodp, up, up + hsize, hsize);
+	}
+
+      /* Read temporary operands from low part of PROD.
+	 Put result in low part of TSPACE using upper part of TSPACE
+	 as new TSPACE.  */
+      MPN_SQR_N_RECURSE (tspace, prodp, hsize, tspace + size);
+
+      /*** Add/copy product H.  */
+      MPN_COPY (prodp + hsize, prodp + size, hsize);
+      cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
+
+      /*** Add product M (if NEGFLG M is a negative number).  */
+      cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
+
+      /*** Product L.	 ________________  ________________
+			|________________||____U0 x U0_____|  */
+      /* Read temporary operands from low part of PROD.
+	 Put result in low part of TSPACE using upper part of TSPACE
+	 as new TSPACE.  */
+      MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
+
+      /*** Add/copy Product L (twice).  */
+
+      cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
+      if (cy)
+	mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
+
+      MPN_COPY (prodp, tspace, hsize);
+      cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
+      if (cy)
+	mpn_add_1 (prodp + size, prodp + size, size, 1);
+    }
+}
+
+/* This should be made into an inline function in gmp.h.  */
+void
+#if __STDC__
+mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
+#else
+mpn_mul_n (prodp, up, vp, size)
+     mp_ptr prodp;
+     mp_srcptr up;
+     mp_srcptr vp;
+     mp_size_t size;
+#endif
+{
+  TMP_DECL (marker);
+  TMP_MARK (marker);
+  if (up == vp)
+    {
+      if (size < KARATSUBA_THRESHOLD)
+	{
+	  impn_sqr_n_basecase (prodp, up, size);
+	}
+      else
+	{
+	  mp_ptr tspace;
+	  tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
+	  impn_sqr_n (prodp, up, size, tspace);
+	}
+    }
+  else
+    {
+      if (size < KARATSUBA_THRESHOLD)
+	{
+	  impn_mul_n_basecase (prodp, up, vp, size);
+	}
+      else
+	{
+	  mp_ptr tspace;
+	  tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
+	  impn_mul_n (prodp, up, vp, size, tspace);
+	}
+    }
+  TMP_FREE (marker);
+}