bitmath: Finish up optimizations
This patch adds support for other compilers and systems including MSVC, Intel C compiler etc.. Signed-off-by: Erik de Castro Lopo <erikd@mega-nerd.com>
This commit is contained in:
parent
605f920816
commit
387b72731d
@ -36,54 +36,6 @@
|
|||||||
#include "private/bitmath.h"
|
#include "private/bitmath.h"
|
||||||
#include "FLAC/assert.h"
|
#include "FLAC/assert.h"
|
||||||
|
|
||||||
/* An example of what FLAC__bitmath_ilog2() computes:
|
|
||||||
*
|
|
||||||
* ilog2( 0) = assertion failure
|
|
||||||
* ilog2( 1) = 0
|
|
||||||
* ilog2( 2) = 1
|
|
||||||
* ilog2( 3) = 1
|
|
||||||
* ilog2( 4) = 2
|
|
||||||
* ilog2( 5) = 2
|
|
||||||
* ilog2( 6) = 2
|
|
||||||
* ilog2( 7) = 2
|
|
||||||
* ilog2( 8) = 3
|
|
||||||
* ilog2( 9) = 3
|
|
||||||
* ilog2(10) = 3
|
|
||||||
* ilog2(11) = 3
|
|
||||||
* ilog2(12) = 3
|
|
||||||
* ilog2(13) = 3
|
|
||||||
* ilog2(14) = 3
|
|
||||||
* ilog2(15) = 3
|
|
||||||
* ilog2(16) = 4
|
|
||||||
* ilog2(17) = 4
|
|
||||||
* ilog2(18) = 4
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __GNUC__
|
|
||||||
|
|
||||||
/* For GNUC, use static inline version in include/private/bitmath.h. */
|
|
||||||
|
|
||||||
unsigned FLAC__bitmath_ilog2(FLAC__uint32 v)
|
|
||||||
{
|
|
||||||
unsigned l = 0;
|
|
||||||
if (v == 0)
|
|
||||||
return 0;
|
|
||||||
while(v >>= 1)
|
|
||||||
l++;
|
|
||||||
return l;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned FLAC__bitmath_ilog2_wide(FLAC__uint64 v)
|
|
||||||
{
|
|
||||||
unsigned l = 0;
|
|
||||||
if (v == 0)
|
|
||||||
return 0;
|
|
||||||
while(v >>= 1)
|
|
||||||
l++;
|
|
||||||
return l;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* An example of what FLAC__bitmath_silog2() computes:
|
/* An example of what FLAC__bitmath_silog2() computes:
|
||||||
*
|
*
|
||||||
* silog2(-10) = 5
|
* silog2(-10) = 5
|
||||||
|
@ -43,7 +43,7 @@
|
|||||||
#include "share/endswap.h"
|
#include "share/endswap.h"
|
||||||
|
|
||||||
/* Things should be fastest when this matches the machine word size */
|
/* Things should be fastest when this matches the machine word size */
|
||||||
/* WATCHOUT: if you change this you must also change the following #defines down to COUNT_ZERO_MSBS below to match */
|
/* WATCHOUT: if you change this you must also change the following #defines down to FLAC__clz_uint32 below to match */
|
||||||
/* WATCHOUT: there are a few places where the code will not work unless uint32_t is >= 32 bits wide */
|
/* WATCHOUT: there are a few places where the code will not work unless uint32_t is >= 32 bits wide */
|
||||||
/* also, some sections currently only have fast versions for 4 or 8 bytes per word */
|
/* also, some sections currently only have fast versions for 4 or 8 bytes per word */
|
||||||
#define FLAC__BYTES_PER_WORD 4 /* sizeof uint32_t */
|
#define FLAC__BYTES_PER_WORD 4 /* sizeof uint32_t */
|
||||||
@ -56,27 +56,6 @@
|
|||||||
#define SWAP_BE_WORD_TO_HOST(x) ENDSWAP_32(x)
|
#define SWAP_BE_WORD_TO_HOST(x) ENDSWAP_32(x)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__GNUC__)
|
|
||||||
/* "int __builtin_clz (unsigned int x) If x is 0, the result is undefined" */
|
|
||||||
static inline uint32_t
|
|
||||||
COUNT_ZERO_MSBS (uint32_t word)
|
|
||||||
{
|
|
||||||
if (word == 0)
|
|
||||||
return 32;
|
|
||||||
return __builtin_clz (word);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
/* counts the # of zero MSBs in a word */
|
|
||||||
#define COUNT_ZERO_MSBS(word) ( \
|
|
||||||
(word) > 0xffffff ? byte_to_unary_table[(word) >> 24] : \
|
|
||||||
!(word) ? 32 : \
|
|
||||||
(word) > 0xffff ? byte_to_unary_table[(word) >> 16] + 8 : \
|
|
||||||
(word) > 0xff ? byte_to_unary_table[(word) >> 8] + 16 : \
|
|
||||||
byte_to_unary_table[(word)] + 24 \
|
|
||||||
)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This should be at least twice as large as the largest number of words
|
* This should be at least twice as large as the largest number of words
|
||||||
* required to represent any 'number' (in any encoding) you are going to
|
* required to represent any 'number' (in any encoding) you are going to
|
||||||
@ -93,25 +72,6 @@ COUNT_ZERO_MSBS (uint32_t word)
|
|||||||
*/
|
*/
|
||||||
static const unsigned FLAC__BITREADER_DEFAULT_CAPACITY = 65536u / FLAC__BITS_PER_WORD; /* in words */
|
static const unsigned FLAC__BITREADER_DEFAULT_CAPACITY = 65536u / FLAC__BITS_PER_WORD; /* in words */
|
||||||
|
|
||||||
static const unsigned char byte_to_unary_table[] = {
|
|
||||||
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
||||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
|
||||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
||||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
|
||||||
};
|
|
||||||
|
|
||||||
/* adjust for compilers that can't understand using LLU suffix for uint64_t literals */
|
/* adjust for compilers that can't understand using LLU suffix for uint64_t literals */
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#define FLAC__U64L(x) x
|
#define FLAC__U64L(x) x
|
||||||
@ -679,7 +639,7 @@ FLAC__bool FLAC__bitreader_read_unary_unsigned(FLAC__BitReader *br, unsigned *va
|
|||||||
while(br->consumed_words < br->words) { /* if we've not consumed up to a partial tail word... */
|
while(br->consumed_words < br->words) { /* if we've not consumed up to a partial tail word... */
|
||||||
uint32_t b = br->buffer[br->consumed_words] << br->consumed_bits;
|
uint32_t b = br->buffer[br->consumed_words] << br->consumed_bits;
|
||||||
if(b) {
|
if(b) {
|
||||||
i = COUNT_ZERO_MSBS(b);
|
i = FLAC__clz_uint32(b);
|
||||||
*val += i;
|
*val += i;
|
||||||
i++;
|
i++;
|
||||||
br->consumed_bits += i;
|
br->consumed_bits += i;
|
||||||
@ -709,7 +669,7 @@ FLAC__bool FLAC__bitreader_read_unary_unsigned(FLAC__BitReader *br, unsigned *va
|
|||||||
const unsigned end = br->bytes * 8;
|
const unsigned end = br->bytes * 8;
|
||||||
uint32_t b = (br->buffer[br->consumed_words] & (FLAC__WORD_ALL_ONES << (FLAC__BITS_PER_WORD-end))) << br->consumed_bits;
|
uint32_t b = (br->buffer[br->consumed_words] & (FLAC__WORD_ALL_ONES << (FLAC__BITS_PER_WORD-end))) << br->consumed_bits;
|
||||||
if(b) {
|
if(b) {
|
||||||
i = COUNT_ZERO_MSBS(b);
|
i = FLAC__clz_uint32(b);
|
||||||
*val += i;
|
*val += i;
|
||||||
i++;
|
i++;
|
||||||
br->consumed_bits += i;
|
br->consumed_bits += i;
|
||||||
@ -800,7 +760,7 @@ FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[
|
|||||||
mov i, eax
|
mov i, eax
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
i = COUNT_ZERO_MSBS(b);
|
i = FLAC__clz_uint32(b);
|
||||||
#endif
|
#endif
|
||||||
uval += i;
|
uval += i;
|
||||||
bits = parameter;
|
bits = parameter;
|
||||||
@ -832,7 +792,7 @@ FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[
|
|||||||
const unsigned end = br->bytes * 8;
|
const unsigned end = br->bytes * 8;
|
||||||
uint32_t b = (br->buffer[cwords] & (FLAC__WORD_ALL_ONES << (FLAC__BITS_PER_WORD-end))) << cbits;
|
uint32_t b = (br->buffer[cwords] & (FLAC__WORD_ALL_ONES << (FLAC__BITS_PER_WORD-end))) << cbits;
|
||||||
if(b) {
|
if(b) {
|
||||||
i = COUNT_ZERO_MSBS(b);
|
i = FLAC__clz_uint32(b);
|
||||||
uval += i;
|
uval += i;
|
||||||
bits = parameter;
|
bits = parameter;
|
||||||
i++;
|
i++;
|
||||||
@ -984,7 +944,7 @@ break2:
|
|||||||
: "r"(b)
|
: "r"(b)
|
||||||
);
|
);
|
||||||
#else
|
#else
|
||||||
i = COUNT_ZERO_MSBS(b);
|
i = FLAC__clz_uint32(b);
|
||||||
#endif
|
#endif
|
||||||
uval += i;
|
uval += i;
|
||||||
cbits += i;
|
cbits += i;
|
||||||
@ -1015,7 +975,7 @@ break2:
|
|||||||
const unsigned end = br->bytes * 8;
|
const unsigned end = br->bytes * 8;
|
||||||
uint32_t b = (br->buffer[cwords] & ~(FLAC__WORD_ALL_ONES >> end)) << cbits;
|
uint32_t b = (br->buffer[cwords] & ~(FLAC__WORD_ALL_ONES >> end)) << cbits;
|
||||||
if(b) {
|
if(b) {
|
||||||
i = COUNT_ZERO_MSBS(b);
|
i = FLAC__clz_uint32(b);
|
||||||
uval += i;
|
uval += i;
|
||||||
cbits += i;
|
cbits += i;
|
||||||
cbits++; /* skip over stop bit */
|
cbits++; /* skip over stop bit */
|
||||||
|
@ -34,28 +34,126 @@
|
|||||||
|
|
||||||
#include "FLAC/ordinals.h"
|
#include "FLAC/ordinals.h"
|
||||||
|
|
||||||
|
/* for CHAR_BIT */
|
||||||
|
#include <limits.h>
|
||||||
|
|
||||||
#if defined(__GNUC__)
|
#if defined(_MSC_VER) && (_MSC_VER >= 1400)
|
||||||
|
#include <intrin.h> /* for _BitScanReverse* */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Will never be emitted for MSVC, GCC, Intel compilers */
|
||||||
|
inline unsigned int FLAC__clz_soft_uint32(unsigned int word)
|
||||||
|
{
|
||||||
|
static const unsigned char byte_to_unary_table[] = {
|
||||||
|
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||||
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||||
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||||
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
return (word) > 0xffffff ? byte_to_unary_table[(word) >> 24] :
|
||||||
|
!(word) ? 32 :
|
||||||
|
(word) > 0xffff ? byte_to_unary_table[(word) >> 16] + 8 :
|
||||||
|
(word) > 0xff ? byte_to_unary_table[(word) >> 8] + 16 :
|
||||||
|
byte_to_unary_table[(word)] + 24;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned int FLAC__clz_uint32(FLAC__uint32 v)
|
||||||
|
{
|
||||||
|
/* Never used with input 0 */
|
||||||
|
#if defined(__INTEL_COMPILER)
|
||||||
|
return _bit_scan_reverse(n) ^ 31U;
|
||||||
|
#elif defined(__GNUC__) && (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
|
||||||
|
/* This will translate either to (bsr ^ 31U), clz , ctlz, cntlz, lzcnt depending on
|
||||||
|
* -march= setting or to a software rutine in exotic machines. */
|
||||||
|
return __builtin_clz(v);
|
||||||
|
#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
|
||||||
|
FLAC__uint32 idx;
|
||||||
|
_BitScanReverse(&idx, v);
|
||||||
|
return idx ^ 31U;
|
||||||
|
#else
|
||||||
|
return FLAC__clz_soft_uint32(v);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* An example of what FLAC__bitmath_ilog2() computes:
|
||||||
|
*
|
||||||
|
* ilog2( 0) = undefined
|
||||||
|
* ilog2( 1) = 0
|
||||||
|
* ilog2( 2) = 1
|
||||||
|
* ilog2( 3) = 1
|
||||||
|
* ilog2( 4) = 2
|
||||||
|
* ilog2( 5) = 2
|
||||||
|
* ilog2( 6) = 2
|
||||||
|
* ilog2( 7) = 2
|
||||||
|
* ilog2( 8) = 3
|
||||||
|
* ilog2( 9) = 3
|
||||||
|
* ilog2(10) = 3
|
||||||
|
* ilog2(11) = 3
|
||||||
|
* ilog2(12) = 3
|
||||||
|
* ilog2(13) = 3
|
||||||
|
* ilog2(14) = 3
|
||||||
|
* ilog2(15) = 3
|
||||||
|
* ilog2(16) = 4
|
||||||
|
* ilog2(17) = 4
|
||||||
|
* ilog2(18) = 4
|
||||||
|
*/
|
||||||
|
|
||||||
static inline unsigned FLAC__bitmath_ilog2(FLAC__uint32 v)
|
static inline unsigned FLAC__bitmath_ilog2(FLAC__uint32 v)
|
||||||
{
|
{
|
||||||
if (v == 0)
|
return sizeof(FLAC__uint32) * CHAR_BIT - 1 - FLAC__clz_uint32(v);
|
||||||
return 0;
|
|
||||||
return sizeof(FLAC__uint32) * __CHAR_BIT__ - 1 - __builtin_clz(v);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef FLAC__INTEGER_ONLY_LIBRARY /*Unused otherwise */
|
||||||
|
|
||||||
static inline unsigned FLAC__bitmath_ilog2_wide(FLAC__uint64 v)
|
static inline unsigned FLAC__bitmath_ilog2_wide(FLAC__uint64 v)
|
||||||
{
|
{
|
||||||
if (v == 0)
|
if (v == 0)
|
||||||
return 0;
|
return 0;
|
||||||
return sizeof(FLAC__uint64) * __CHAR_BIT__ - 1 - __builtin_clzll(v);
|
#if && defined(__GNUC__) && (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
|
||||||
}
|
return sizeof(FLAC__uint64) * CHAR_BIT - 1 - __builtin_clzll(v);
|
||||||
|
/* Sorry, only supported in win64/Itanium.. */
|
||||||
|
#elif (defined(_MSC_VER) && (_MSC_VER >= 1400)) && (defined(_M_IA64) || defined(_WIN64))
|
||||||
|
FLAC__uint64 idx;
|
||||||
|
_BitScanReverse64(&idx, v);
|
||||||
|
return idx ^ 63U;
|
||||||
#else
|
#else
|
||||||
|
/* Brain-damaged compilers will use the fastest possible way that is,
|
||||||
unsigned FLAC__bitmath_ilog2(FLAC__uint32 v);
|
de Bruijn sequences (http://supertech.csail.mit.edu/papers/debruijn.pdf)
|
||||||
unsigned FLAC__bitmath_ilog2_wide(FLAC__uint64 v);
|
(C) Timothy B. Terriberry (tterribe@xiph.org) 2001-2009 LGPL (v2 or later).
|
||||||
|
*/
|
||||||
|
static const unsigned char DEBRUIJN_IDX64[64]={
|
||||||
|
0, 1, 2, 7, 3,13, 8,19, 4,25,14,28, 9,34,20,40,
|
||||||
|
5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57,
|
||||||
|
63, 6,12,18,24,27,33,39,16,37,45,47,30,53,49,56,
|
||||||
|
62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58
|
||||||
|
};
|
||||||
|
int ret;
|
||||||
|
ret= v>0;
|
||||||
|
v|= v>>1;
|
||||||
|
v|= v>>2;
|
||||||
|
v|= v>>4;
|
||||||
|
v|= v>>8;
|
||||||
|
v|= v>>16;
|
||||||
|
v|= v>>32;
|
||||||
|
v= (v>>1)+1;
|
||||||
|
ret+=DEBRUIJN_IDX64[v*0x218A392CD3D5DBF>>58&0x3F];
|
||||||
|
return ret;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
unsigned FLAC__bitmath_silog2(int v);
|
unsigned FLAC__bitmath_silog2(int v);
|
||||||
|
Loading…
Reference in New Issue
Block a user