Faster binary-to-decimal conversions
This commit is contained in:
parent
b162ed889b
commit
9590df3ef0
374
src/obj_long.c
374
src/obj_long.c
@ -1404,7 +1404,6 @@ KRK_Method(long,__rtruediv__) {
|
||||
return OBJECT_VAL(krk_takeStringVetted(rev,size,size,KRK_OBJ_FLAGS_STRING_ASCII,hash)); \
|
||||
}
|
||||
|
||||
PRINTER(repr,10,"")
|
||||
PRINTER(hex,16,"x0")
|
||||
PRINTER(oct,8,"o0")
|
||||
PRINTER(bin,2,"b0")
|
||||
@ -1948,6 +1947,379 @@ KRK_Method(long,_get_digit) {
|
||||
return INTEGER_VAL(_self->digits[index]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Huge decimals for fast conversion.
|
||||
*
|
||||
* This is a lightweight implementation of decimal-based bigints that only supports
|
||||
* addition, inplace subtraction, and multiplication (via Karatsuba). With this, we
|
||||
* can much more quickly produce decimal conversions of (binary) longs.
|
||||
*/
|
||||
typedef uint32_t digit_t;
|
||||
#define DEC_DIGIT_SIZE sizeof(digit_t)
|
||||
#define DEC_DIGIT_CNT 9
|
||||
#define DEC_DIGIT_MAX 1000000000
|
||||
|
||||
/**
|
||||
* Adds @c a and @c b to create a new results.
|
||||
*/
|
||||
static digit_t * dec_add(const digit_t * a, size_t awidth, const digit_t * b, size_t bwidth, size_t * outwidth) {
|
||||
*outwidth = (awidth > bwidth ? awidth : bwidth) + 1;
|
||||
digit_t * out = calloc(*outwidth, DEC_DIGIT_SIZE);
|
||||
int64_t carry = 0;
|
||||
for (size_t i = 0; i < *outwidth - 1; ++i) {
|
||||
digit_t n = ((i < awidth) ? a[i] : 0) + ((i < bwidth) ? b[i] : 0) + carry;
|
||||
out[i] = n % DEC_DIGIT_MAX;
|
||||
carry = (n >= DEC_DIGIT_MAX);
|
||||
}
|
||||
if (carry) {
|
||||
out[*outwidth-1] = 1;
|
||||
} else {
|
||||
*outwidth -= 1;
|
||||
}
|
||||
|
||||
if (*outwidth == 0) {
|
||||
*outwidth = 1;
|
||||
out[0] = 0;
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Subtracts a smaller @c b from a larger @c a in-place.
|
||||
*/
|
||||
static void dec_isub(digit_t * a, size_t awidth, const digit_t * b, size_t bwidth) {
|
||||
int64_t carry = 0;
|
||||
for (size_t i = 0; i < awidth; ++i) {
|
||||
int64_t a_digit = (int64_t)((i < awidth) ? a[i] : 0) - carry;
|
||||
int64_t b_digit = (int64_t)((i < bwidth) ? b[i] : 0);
|
||||
if (a_digit < b_digit) {
|
||||
a_digit += DEC_DIGIT_MAX;
|
||||
carry = 1;
|
||||
} else {
|
||||
carry = 0;
|
||||
}
|
||||
a[i] = (a_digit - b_digit) % DEC_DIGIT_MAX;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Decimal left shift. Multiply a by 1000000000^amount and return a new result value.
|
||||
*/
|
||||
static digit_t * dec_shift(const digit_t * a, size_t awidth, size_t amount, size_t * outwidth) {
|
||||
if (awidth == 1 && a[0] == 0) {
|
||||
*outwidth = 1;
|
||||
return calloc(1,DEC_DIGIT_SIZE);
|
||||
}
|
||||
*outwidth = awidth + amount;
|
||||
digit_t * out = calloc(*outwidth,DEC_DIGIT_SIZE);
|
||||
|
||||
for (size_t i = 0; i < awidth; ++i) {
|
||||
out[i+amount] = a[i];
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Multiply a by b and return a new result value.
|
||||
*
|
||||
* Uses the Karatsuba algorithm for larger values; degrades to brute-force
|
||||
* chalkboard multiplication for smaller values.
|
||||
*/
|
||||
static digit_t * dec_mul(const digit_t * a, size_t a_width, const digit_t * b, size_t b_width, size_t * outwidth) {
|
||||
/* We want a to be bigger than b */
|
||||
if (a_width < b_width) {
|
||||
const digit_t * t = a;
|
||||
a = b;
|
||||
b = t;
|
||||
size_t tmp = a_width;
|
||||
a_width = b_width;
|
||||
b_width = tmp;
|
||||
}
|
||||
|
||||
*outwidth = a_width + b_width;
|
||||
|
||||
/* Degenerate case where a or b is 0: return 0 */
|
||||
if ((a_width == 1 && a[0] == 0) || (b_width == 1 && b[0] == 0)) {
|
||||
*outwidth = 1;
|
||||
return calloc(1,DEC_DIGIT_SIZE);
|
||||
}
|
||||
|
||||
/* Degenerate case where a is 1, return b */
|
||||
if (a_width == 1 && a[0] == 1) {
|
||||
*outwidth = b_width;
|
||||
digit_t * out = malloc(*outwidth * DEC_DIGIT_SIZE);
|
||||
memcpy(out, b, *outwidth * DEC_DIGIT_SIZE);
|
||||
return out;
|
||||
}
|
||||
|
||||
/* Degenerate case where b is 1, return a */
|
||||
if (b_width == 1 && b[0] == 1) {
|
||||
*outwidth = a_width;
|
||||
digit_t * out = malloc(*outwidth * DEC_DIGIT_SIZE);
|
||||
memcpy(out, a, *outwidth * DEC_DIGIT_SIZE);
|
||||
return out;
|
||||
}
|
||||
|
||||
if (b_width < 50) {
|
||||
/* Fallback brute-force multiplication */
|
||||
digit_t * out = calloc(*outwidth,DEC_DIGIT_SIZE);
|
||||
for (size_t i = 0; i < b_width; ++i) {
|
||||
digit_t bdigit = (i < b_width) ? b[i] : 0;
|
||||
int64_t carry = 0;
|
||||
for (size_t j = 0; j < a_width; ++j) {
|
||||
digit_t adigit = (j < a_width) ? a[j] : 0;
|
||||
uint64_t t = carry + (int64_t)adigit * (int64_t)bdigit + out[i+j];
|
||||
carry = t / DEC_DIGIT_MAX;
|
||||
out[i+j] = t % DEC_DIGIT_MAX;
|
||||
}
|
||||
out[i+a_width] = carry;
|
||||
}
|
||||
while (*outwidth > 1 && out[(*outwidth)-1] == 0) (*outwidth)--;
|
||||
return out;
|
||||
} else {
|
||||
size_t m2 = a_width / 2;
|
||||
|
||||
/* Split a into its high and low halves */
|
||||
const digit_t * low1 = a;
|
||||
size_t low1_width = (m2 <= a_width) ? m2 : a_width;
|
||||
while (low1_width > 1 && low1[low1_width-1] == 0) low1_width--;
|
||||
digit_t a_zero = 0;
|
||||
const digit_t * high1 = (m2 <= a_width) ? (a + m2) : &a_zero;
|
||||
size_t high1_width = (m2 <= a_width) ? (a_width - m2) : 1;
|
||||
|
||||
/* Split b into its high and low halves */
|
||||
const digit_t * low2 = b;
|
||||
size_t low2_width = (m2 <= b_width) ? m2 : b_width;
|
||||
while (low2_width > 1 && low2[low2_width-1] == 0) low2_width--;
|
||||
digit_t b_zero = 0;
|
||||
const digit_t * high2 = (m2 <= b_width) ? (b + m2) : &b_zero;
|
||||
size_t high2_width = (m2 <= b_width) ? (b_width - m2) : 1;
|
||||
|
||||
size_t z0_width, z1_width, z2_width;
|
||||
|
||||
/* z0 = low1 * low2; z2 = high1 * high2 */
|
||||
digit_t * z0 = dec_mul(low1, low1_width, low2, low2_width, &z0_width);
|
||||
digit_t * z2 = dec_mul(high1, high1_width, high2, high2_width, &z2_width);
|
||||
|
||||
/* z1 = (low1 + high1) * (low2 + high2) */
|
||||
size_t sleft_width, sright_width;
|
||||
digit_t * sleft = dec_add(low1, low1_width, high1, high1_width, &sleft_width);
|
||||
digit_t * sright = dec_add(low2, low2_width, high2, high2_width, &sright_width);
|
||||
digit_t * z1 = dec_mul(sleft, sleft_width, sright, sright_width, &z1_width);
|
||||
free(sleft);
|
||||
free(sright);
|
||||
|
||||
/* Store (z1 - z2 - z0) into z1 */
|
||||
dec_isub(z1, z1_width, z2, z2_width);
|
||||
dec_isub(z1, z1_width, z0, z0_width);
|
||||
|
||||
/* Calculate (z1 - z2 - z0) * 10 ^ m2 */
|
||||
size_t m2_shift_width;
|
||||
digit_t * m2_shift = dec_shift(z1, z1_width, m2, &m2_shift_width);
|
||||
free(z1);
|
||||
|
||||
/* Add z0 to that */
|
||||
size_t add_width;
|
||||
digit_t * add = dec_add(m2_shift, m2_shift_width, z0, z0_width, &add_width);
|
||||
free(m2_shift);
|
||||
free(z0);
|
||||
|
||||
/* Then calculate z2 * 10 ^ (m2 * 2) */
|
||||
size_t m2_2_width;
|
||||
digit_t * m2_2 = dec_shift(z2, z2_width, m2 * 2, &m2_2_width);
|
||||
free(z2);
|
||||
|
||||
/* And add everything up */
|
||||
size_t result_width;
|
||||
digit_t * result = dec_add(m2_2, m2_2_width, add, add_width, &result_width);
|
||||
free(m2_2);
|
||||
free(add);
|
||||
|
||||
*outwidth = result_width;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Raise 2 to the wth power, as a huge decimal.
|
||||
*
|
||||
* Creates a decimal representation of 2 raised to the requested power.
|
||||
*
|
||||
* If @p w is very small (eg. 2**w would fit in one decimal digit), then
|
||||
* we can create this value directly. Otherwise, we recursively break
|
||||
* down @p w into smaller values and build huge decimals out of them
|
||||
* through repeated multiplication.
|
||||
*
|
||||
* An older prototype of this used a cache, which did save some time,
|
||||
* but not a whole lot in the long run, and this implementation is
|
||||
* considerably simpler without the cache.
|
||||
*
|
||||
* @param w Power to raise 2 to, as a KrkLong.
|
||||
* @param sizeOut Resulting size of the huge decimal.
|
||||
* @returns A huge decimal representing 2 ** w.
|
||||
*/
|
||||
static digit_t * dec_two_raised(KrkLong * w, size_t * sizeOut) {
|
||||
if (w->width == 0 || (w->width == 1 && w->digits[0] <= 29)) {
|
||||
*sizeOut = 1;
|
||||
digit_t * out = malloc(DEC_DIGIT_SIZE);
|
||||
out[0] = 1 << (w->width == 0 ? 0 : w->digits[0]);
|
||||
return out;
|
||||
} else {
|
||||
/* w2 = w >> 1 */
|
||||
KrkLong w2;
|
||||
KrkLong one;
|
||||
krk_long_init_si(&w2, 0);
|
||||
krk_long_init_si(&one, 1);
|
||||
_krk_long_rshift(&w2, w, &one);
|
||||
|
||||
/* t = Decimal(1 << w2) */
|
||||
size_t tSize;
|
||||
digit_t * t = dec_two_raised(&w2, &tSize);
|
||||
|
||||
if ((w->digits[0] & 1) == 0) {
|
||||
/* Result = t * t */
|
||||
krk_long_clear_many(&one, &w2, NULL);
|
||||
digit_t * result = dec_mul(t, tSize, t, tSize, sizeOut);
|
||||
free(t);
|
||||
return result;
|
||||
} else {
|
||||
/* wmw2 = w - w2 */
|
||||
KrkLong wmw2;
|
||||
krk_long_init_si(&wmw2, 0);
|
||||
krk_long_sub(&wmw2, w, &w2);
|
||||
krk_long_clear_many(&one, &w2, NULL);
|
||||
|
||||
/* right = 1 << wmw2 */
|
||||
size_t rightSize;
|
||||
digit_t * right = dec_two_raised(&wmw2, &rightSize);
|
||||
krk_long_clear(&wmw2);
|
||||
|
||||
/* result = t * right */
|
||||
digit_t * result = dec_mul(t, tSize, right, rightSize, sizeOut);
|
||||
free(t);
|
||||
free(right);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Convert a KrkLong to a series of huge decimal digits.
|
||||
*
|
||||
* Takes a KrkLong @p n of bitwidth @p w and converts it into an array of
|
||||
* @c digit_t huge decimal digits, storing the size in @p sizeOut.
|
||||
*
|
||||
* @param n KrkLong to convert.
|
||||
* @param w Bitwidth of @p n as a KrkLong.
|
||||
* @param sizeOut Resulting size of the huge decimal.
|
||||
* @returns Huge decimal of equivalent value.
|
||||
*/
|
||||
static digit_t * long_to_dec_inner(KrkLong * n, KrkLong * w, size_t * sizeOut) {
|
||||
if (n->width == 0) {
|
||||
*sizeOut = 1;
|
||||
return calloc(1,DEC_DIGIT_SIZE);
|
||||
}
|
||||
if (w->width == 1 && w->digits[0] <= 29) {
|
||||
*sizeOut = 1;
|
||||
digit_t * out = malloc(DEC_DIGIT_SIZE);
|
||||
out[0] = n->digits[0];
|
||||
return out;
|
||||
}
|
||||
|
||||
size_t aSize, bSize, cSize;
|
||||
digit_t * a, * b, * c;
|
||||
KrkLong w2, hi, lo, tmp;
|
||||
krk_long_init_many(&w2, &hi, &lo, &tmp, NULL);
|
||||
KrkLong one;
|
||||
krk_long_init_si(&one, 1);
|
||||
/* w2 = w >> 1 */
|
||||
_krk_long_rshift(&w2, w, &one);
|
||||
/* hi = n >> w2 */
|
||||
_krk_long_rshift(&hi, n, &w2);
|
||||
/* tmp = hi >> w2 */
|
||||
_krk_long_lshift(&tmp, &hi, &w2);
|
||||
/* lo = n - (hi >> w2) */
|
||||
krk_long_sub(&lo, n, &tmp);
|
||||
krk_long_clear_many(&one, &tmp, NULL);
|
||||
/* tmp = w - w2 */
|
||||
krk_long_sub(&tmp, w, &w2);
|
||||
/* a = Dec(hi) */
|
||||
a = long_to_dec_inner(&hi, &tmp, &aSize);
|
||||
krk_long_clear_many(&hi, &tmp, NULL);
|
||||
/* b = Dec(1 << w2) */
|
||||
b = dec_two_raised(&w2, &bSize);
|
||||
/* c = a * b */
|
||||
c = dec_mul(a, aSize, b, bSize, &cSize);
|
||||
free(a);
|
||||
free(b);
|
||||
/* a = Dec(lo) */
|
||||
a = long_to_dec_inner(&lo, &w2, &aSize);
|
||||
krk_long_clear_many(&lo,&w2,NULL);
|
||||
/* result = a + c */
|
||||
digit_t * result = dec_add(a, aSize, c, cSize, sizeOut);
|
||||
free(a);
|
||||
free(c);
|
||||
return result;
|
||||
}
|
||||
|
||||
KRK_Method(long,__repr__) {
|
||||
/* For rather small values (10 was chosen arbitrarily), use the older approach */
|
||||
if (self->value->width >= -10 && self->value->width < 10) {
|
||||
size_t size;
|
||||
uint32_t hash;
|
||||
char * rev = krk_long_to_str(self->value, 10, "", &size, &hash);
|
||||
return OBJECT_VAL(krk_takeStringVetted(rev,size,size,KRK_OBJ_FLAGS_STRING_ASCII,hash));
|
||||
}
|
||||
|
||||
/* We can only do this on positive values, but we can re-use the digits
|
||||
* of the current number while processing, since longs are generally
|
||||
* not mutable by any other operations. */
|
||||
KrkLong abs = *self->value;
|
||||
int inv = (krk_long_sign(&abs) == -1);
|
||||
krk_long_set_sign(&abs, 1);
|
||||
|
||||
/* Calculate bit width for halving */
|
||||
size_t bits = _bits_in(&abs);
|
||||
KrkLong w;
|
||||
krk_long_init_ui(&w, bits);
|
||||
|
||||
/* Convert to big decimal digits */
|
||||
size_t size;
|
||||
digit_t * digits = long_to_dec_inner(&abs, &w, &size);
|
||||
|
||||
/* We don't need to clear abs since its digits are our digits, but
|
||||
* we need to clean up w even if it is pretty small... */
|
||||
krk_long_clear(&w);
|
||||
|
||||
/* Count number of leading zeros */
|
||||
int leading = 0;
|
||||
for (size_t j = 0, div = DEC_DIGIT_MAX/10; j < DEC_DIGIT_CNT; j++, div/=10) {
|
||||
if (((digits[size-1] / div) % 10)) break;
|
||||
leading += 1;
|
||||
}
|
||||
|
||||
/* Allocate spcae for output */
|
||||
char * out = malloc(size * DEC_DIGIT_CNT + 1 - leading + inv);
|
||||
char * writer = out;
|
||||
|
||||
/* Write negative sign if original value was negative. */
|
||||
if (inv) *(writer++) = '-';
|
||||
|
||||
/* Collect digits */
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
for (size_t j = 0, div = DEC_DIGIT_MAX/10; j < DEC_DIGIT_CNT; j++, div/=10) {
|
||||
if (leading) { leading--; continue; }
|
||||
*(writer++) = ((digits[size-i-1] / div) % 10) + '0';
|
||||
}
|
||||
}
|
||||
*writer = '\0';
|
||||
|
||||
free(digits);
|
||||
return OBJECT_VAL(krk_takeString(out, writer - out));
|
||||
}
|
||||
|
||||
#ifndef KRK_NO_FLOAT
|
||||
KrkValue krk_int_from_float(double val) {
|
||||
union { double asDbl; uint64_t asInt; } u = {val};
|
||||
|
Loading…
Reference in New Issue
Block a user