dab47db46b
fixes for: - mpn_sbpi1_div_qr_sec and mpn_sbpi1_div_r_sec - mpz_powm_ui - AMD family 11h - mpz_powm_sec and mpn_powm_sec - ASSERT() fixes - gcd, gcdext, and invert function fixes - some PPC division operations
417 lines
11 KiB
C
417 lines
11 KiB
C
/* mpz_oddfac_1(RESULT, N) -- Set RESULT to the odd factor of N!.
|
|
|
|
Contributed to the GNU project by Marco Bodrato.
|
|
|
|
THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
|
|
IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
|
|
IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
|
|
DISAPPEAR IN A FUTURE GNU MP RELEASE.
|
|
|
|
Copyright 2010, 2011, 2012 Free Software Foundation, Inc.
|
|
|
|
This file is part of the GNU MP Library.
|
|
|
|
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU Lesser General Public License as published by
|
|
the Free Software Foundation; either version 3 of the License, or (at your
|
|
option) any later version.
|
|
|
|
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
|
License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
|
|
|
|
#include "gmp.h"
|
|
#include "gmp-impl.h"
|
|
#include "longlong.h"
|
|
|
|
/* TODO:
|
|
- split this file in smaller parts with functions that can be recycled for different computations.
|
|
*/
|
|
|
|
/**************************************************************/
|
|
/* Section macros: common macros, for mswing/fac/bin (&sieve) */
|
|
/**************************************************************/
|
|
|
|
#define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I) \
|
|
if ((PR) > (MAX_PR)) { \
|
|
(VEC)[(I)++] = (PR); \
|
|
(PR) = 1; \
|
|
}
|
|
|
|
#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \
|
|
do { \
|
|
if ((PR) > (MAX_PR)) { \
|
|
(VEC)[(I)++] = (PR); \
|
|
(PR) = (P); \
|
|
} else \
|
|
(PR) *= (P); \
|
|
} while (0)
|
|
|
|
#define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) \
|
|
__max_i = (end); \
|
|
\
|
|
do { \
|
|
++__i; \
|
|
if (((sieve)[__index] & __mask) == 0) \
|
|
{ \
|
|
(prime) = id_to_n(__i)
|
|
|
|
#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \
|
|
do { \
|
|
mp_limb_t __mask, __index, __max_i, __i; \
|
|
\
|
|
__i = (start)-(off); \
|
|
__index = __i / GMP_LIMB_BITS; \
|
|
__mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \
|
|
__i += (off); \
|
|
\
|
|
LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)
|
|
|
|
#define LOOP_ON_SIEVE_STOP \
|
|
} \
|
|
__mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \
|
|
__index += __mask & 1; \
|
|
} while (__i <= __max_i) \
|
|
|
|
#define LOOP_ON_SIEVE_END \
|
|
LOOP_ON_SIEVE_STOP; \
|
|
} while (0)
|
|
|
|
/*********************************************************/
|
|
/* Section sieve: sieving functions and tools for primes */
|
|
/*********************************************************/
|
|
|
|
#if WANT_ASSERT
|
|
static mp_limb_t
|
|
bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
|
|
#endif
|
|
|
|
/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
|
|
static mp_limb_t
|
|
id_to_n (mp_limb_t id) { return id*3+1+(id&1); }
|
|
|
|
/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
|
|
static mp_limb_t
|
|
n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
|
|
|
|
#if WANT_ASSERT
|
|
static mp_size_t
|
|
primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
|
|
#endif
|
|
|
|
/*********************************************************/
|
|
/* Section mswing: 2-multiswing factorial */
|
|
/*********************************************************/
|
|
|
|
/* Returns an approximation of the sqare root of x. *
|
|
* It gives: x <= limb_apprsqrt (x) ^ 2 < x * 9/4 */
|
|
static mp_limb_t
|
|
limb_apprsqrt (mp_limb_t x)
|
|
{
|
|
int s;
|
|
|
|
ASSERT (x > 2);
|
|
count_leading_zeros (s, x - 1);
|
|
s = GMP_LIMB_BITS - 1 - s;
|
|
return (CNST_LIMB(1) << (s >> 1)) + (CNST_LIMB(1) << ((s - 1) >> 1));
|
|
}
|
|
|
|
#if 0
|
|
/* A count-then-exponentiate variant for SWING_A_PRIME */
|
|
#define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
|
|
do { \
|
|
mp_limb_t __q, __prime; \
|
|
int __exp; \
|
|
__prime = (P); \
|
|
__exp = 0; \
|
|
__q = (N); \
|
|
do { \
|
|
__q /= __prime; \
|
|
__exp += __q & 1; \
|
|
} while (__q >= __prime); \
|
|
if (__exp) { /* Store $prime^{exp}$ */ \
|
|
for (__q = __prime; --__exp; __q *= __prime); \
|
|
FACTOR_LIST_STORE(__q, PR, MAX_PR, VEC, I); \
|
|
}; \
|
|
} while (0)
|
|
#else
|
|
#define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
|
|
do { \
|
|
mp_limb_t __q, __prime; \
|
|
__prime = (P); \
|
|
FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \
|
|
__q = (N); \
|
|
do { \
|
|
__q /= __prime; \
|
|
if ((__q & 1) != 0) (PR) *= __prime; \
|
|
} while (__q >= __prime); \
|
|
} while (0)
|
|
#endif
|
|
|
|
#define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
|
|
do { \
|
|
mp_limb_t __prime; \
|
|
__prime = (P); \
|
|
if ((((N) / __prime) & 1) != 0) \
|
|
FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I); \
|
|
} while (0)
|
|
|
|
/* mpz_2multiswing_1 computes the odd part of the 2-multiswing
|
|
factorial of the parameter n. The result x is an odd positive
|
|
integer so that multiswing(n,2) = x 2^a.
|
|
|
|
Uses the algorithm described by Peter Luschny in "Divide, Swing and
|
|
Conquer the Factorial!".
|
|
|
|
The pointer sieve points to primesieve_size(n) limbs containing a
|
|
bit-array where primes are marked as 0.
|
|
Enough (FIXME: explain :-) limbs must be pointed by factors.
|
|
*/
|
|
|
|
static void
|
|
mpz_2multiswing_1 (mpz_ptr x, mp_limb_t n, mp_ptr sieve, mp_ptr factors)
|
|
{
|
|
mp_limb_t prod, max_prod;
|
|
mp_size_t j;
|
|
|
|
ASSERT (n >= 26);
|
|
|
|
j = 0;
|
|
prod = -(n & 1);
|
|
n &= ~ CNST_LIMB(1); /* n-1, if n is odd */
|
|
|
|
prod = (prod & n) + 1; /* the original n, if it was odd, 1 otherwise */
|
|
max_prod = GMP_NUMB_MAX / (n-1);
|
|
|
|
/* Handle prime = 3 separately. */
|
|
SWING_A_PRIME (3, n, prod, max_prod, factors, j);
|
|
|
|
/* Swing primes from 5 to n/3 */
|
|
{
|
|
mp_limb_t s;
|
|
|
|
{
|
|
mp_limb_t prime;
|
|
|
|
s = limb_apprsqrt(n);
|
|
ASSERT (s >= 5);
|
|
s = n_to_bit (s);
|
|
LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
|
|
SWING_A_PRIME (prime, n, prod, max_prod, factors, j);
|
|
LOOP_ON_SIEVE_END;
|
|
s++;
|
|
}
|
|
|
|
ASSERT (max_prod <= GMP_NUMB_MAX / 3);
|
|
ASSERT (bit_to_n (s) * bit_to_n (s) > n);
|
|
ASSERT (s <= n_to_bit (n / 3));
|
|
{
|
|
mp_limb_t prime;
|
|
mp_limb_t l_max_prod = max_prod * 3;
|
|
|
|
LOOP_ON_SIEVE_BEGIN (prime, s, n_to_bit (n/3), 0, sieve);
|
|
SH_SWING_A_PRIME (prime, n, prod, l_max_prod, factors, j);
|
|
LOOP_ON_SIEVE_END;
|
|
}
|
|
}
|
|
|
|
/* Store primes from (n+1)/2 to n */
|
|
{
|
|
mp_limb_t prime;
|
|
LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n >> 1) + 1, n_to_bit (n), 0,sieve);
|
|
FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
|
|
LOOP_ON_SIEVE_END;
|
|
}
|
|
|
|
if (LIKELY (j != 0))
|
|
{
|
|
factors[j++] = prod;
|
|
mpz_prodlimbs (x, factors, j);
|
|
}
|
|
else
|
|
{
|
|
PTR (x)[0] = prod;
|
|
SIZ (x) = 1;
|
|
}
|
|
}
|
|
|
|
#undef SWING_A_PRIME
|
|
#undef SH_SWING_A_PRIME
|
|
#undef LOOP_ON_SIEVE_END
|
|
#undef LOOP_ON_SIEVE_STOP
|
|
#undef LOOP_ON_SIEVE_BEGIN
|
|
#undef LOOP_ON_SIEVE_CONTINUE
|
|
#undef FACTOR_LIST_APPEND
|
|
|
|
/*********************************************************/
|
|
/* Section oddfac: odd factorial, needed also by binomial*/
|
|
/*********************************************************/
|
|
|
|
#if TUNE_PROGRAM_BUILD
|
|
#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD_LIMIT-1)+1))
|
|
#else
|
|
#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD-1)+1))
|
|
#endif
|
|
|
|
/* mpz_oddfac_1 computes the odd part of the factorial of the
|
|
parameter n. I.e. n! = x 2^a, where x is the returned value: an
|
|
odd positive integer.
|
|
|
|
If flag != 0 a square is skipped in the DSC part, e.g.
|
|
if n is odd, n > FAC_DSC_THRESHOLD and flag = 1, x is set to n!!.
|
|
|
|
If n is too small, flag is ignored, and an ASSERT can be triggered.
|
|
|
|
TODO: FAC_DSC_THRESHOLD is used here with two different roles:
|
|
- to decide when prime factorisation is needed,
|
|
- to stop the recursion, once sieving is done.
|
|
Maybe two thresholds can do a better job.
|
|
*/
|
|
void
|
|
mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag)
|
|
{
|
|
ASSERT (n <= GMP_NUMB_MAX);
|
|
ASSERT (flag == 0 || (flag == 1 && n > ODD_FACTORIAL_TABLE_LIMIT && ABOVE_THRESHOLD (n, FAC_DSC_THRESHOLD)));
|
|
|
|
if (n <= ODD_FACTORIAL_TABLE_LIMIT)
|
|
{
|
|
PTR (x)[0] = __gmp_oddfac_table[n];
|
|
SIZ (x) = 1;
|
|
}
|
|
else if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1)
|
|
{
|
|
mp_ptr px;
|
|
|
|
px = MPZ_NEWALLOC (x, 2);
|
|
umul_ppmm (px[1], px[0], __gmp_odd2fac_table[(n - 1) >> 1], __gmp_oddfac_table[n >> 1]);
|
|
SIZ (x) = 2;
|
|
}
|
|
else
|
|
{
|
|
unsigned s;
|
|
mp_ptr factors;
|
|
|
|
s = 0;
|
|
{
|
|
mp_limb_t tn;
|
|
mp_limb_t prod, max_prod, i;
|
|
mp_size_t j;
|
|
TMP_SDECL;
|
|
|
|
#if TUNE_PROGRAM_BUILD
|
|
ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD);
|
|
ASSERT (FAC_DSC_THRESHOLD >= 2 * (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2));
|
|
#endif
|
|
|
|
/* Compute the number of recursive steps for the DSC algorithm. */
|
|
for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++)
|
|
tn >>= 1;
|
|
|
|
j = 0;
|
|
|
|
TMP_SMARK;
|
|
factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB);
|
|
ASSERT (tn >= FACTORS_PER_LIMB);
|
|
|
|
prod = 1;
|
|
#if TUNE_PROGRAM_BUILD
|
|
max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD_LIMIT;
|
|
#else
|
|
max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD;
|
|
#endif
|
|
|
|
ASSERT (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
|
|
do {
|
|
i = ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2;
|
|
factors[j++] = ODD_DOUBLEFACTORIAL_TABLE_MAX;
|
|
do {
|
|
FACTOR_LIST_STORE (i, prod, max_prod, factors, j);
|
|
i += 2;
|
|
} while (i <= tn);
|
|
max_prod <<= 1;
|
|
tn >>= 1;
|
|
} while (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
|
|
|
|
factors[j++] = prod;
|
|
factors[j++] = __gmp_odd2fac_table[(tn - 1) >> 1];
|
|
factors[j++] = __gmp_oddfac_table[tn >> 1];
|
|
mpz_prodlimbs (x, factors, j);
|
|
|
|
TMP_SFREE;
|
|
}
|
|
|
|
if (s != 0)
|
|
/* Use the algorithm described by Peter Luschny in "Divide,
|
|
Swing and Conquer the Factorial!".
|
|
|
|
Improvement: there are two temporary buffers, factors and
|
|
square, that are never used together; with a good estimate
|
|
of the maximal needed size, they could share a single
|
|
allocation.
|
|
*/
|
|
{
|
|
mpz_t mswing;
|
|
mp_ptr sieve;
|
|
mp_size_t size;
|
|
TMP_DECL;
|
|
|
|
TMP_MARK;
|
|
|
|
flag--;
|
|
size = n / GMP_NUMB_BITS + 4;
|
|
ASSERT (primesieve_size (n - 1) <= size - (size / 2 + 1));
|
|
/* 2-multiswing(n) < 2^(n-1)*sqrt(n/pi) < 2^(n+GMP_NUMB_BITS);
|
|
one more can be overwritten by mul, another for the sieve */
|
|
MPZ_TMP_INIT (mswing, size);
|
|
/* Initialize size, so that ASSERT can check it correctly. */
|
|
ASSERT_CODE (SIZ (mswing) = 0);
|
|
|
|
/* Put the sieve on the second half, it will be overwritten by the last mswing. */
|
|
sieve = PTR (mswing) + size / 2 + 1;
|
|
|
|
size = (gmp_primesieve (sieve, n - 1) + 1) / log_n_max (n) + 1;
|
|
|
|
factors = TMP_ALLOC_LIMBS (size);
|
|
do {
|
|
mp_ptr square, px;
|
|
mp_size_t nx, ns;
|
|
mp_limb_t cy;
|
|
TMP_DECL;
|
|
|
|
s--;
|
|
ASSERT (ABSIZ (mswing) < ALLOC (mswing) / 2); /* Check: sieve has not been overwritten */
|
|
mpz_2multiswing_1 (mswing, n >> s, sieve, factors);
|
|
|
|
TMP_MARK;
|
|
nx = SIZ (x);
|
|
if (s == flag) {
|
|
size = nx;
|
|
square = TMP_ALLOC_LIMBS (size);
|
|
MPN_COPY (square, PTR (x), nx);
|
|
} else {
|
|
size = nx << 1;
|
|
square = TMP_ALLOC_LIMBS (size);
|
|
mpn_sqr (square, PTR (x), nx);
|
|
size -= (square[size - 1] == 0);
|
|
}
|
|
ns = SIZ (mswing);
|
|
nx = size + ns;
|
|
px = MPZ_NEWALLOC (x, nx);
|
|
ASSERT (ns <= size);
|
|
cy = mpn_mul (px, square, size, PTR(mswing), ns); /* n!= n$ * floor(n/2)!^2 */
|
|
|
|
TMP_FREE;
|
|
SIZ(x) = nx - (cy == 0);
|
|
} while (s != 0);
|
|
TMP_FREE;
|
|
}
|
|
}
|
|
}
|
|
|
|
#undef FACTORS_PER_LIMB
|
|
#undef FACTOR_LIST_STORE
|