From 6cc1cbad2c40a4406fd911445b86b3df2291dd79 Mon Sep 17 00:00:00 2001 From: Erik de Castro Lopo Date: Mon, 4 Jan 2016 10:30:54 +1100 Subject: [PATCH] libFLAC: Support 64bit brword/bwword This patch allows FLAC__BYTES_PER_WORD to be set to 8, but is disabled by default. Patch-from: lvqcl --- include/share/endswap.h | 6 +- src/libFLAC/bitreader.c | 96 +++++++++++++++++---------- src/libFLAC/bitwriter.c | 51 +++++++++----- src/libFLAC/include/private/bitmath.h | 33 ++++++++- src/test_libFLAC/bitwriter.c | 91 ++++++++++++++++++++++++- 5 files changed, 222 insertions(+), 55 deletions(-) diff --git a/include/share/endswap.h b/include/share/endswap.h index 4fde4c15..b189f3ec 100644 --- a/include/share/endswap.h +++ b/include/share/endswap.h @@ -43,11 +43,13 @@ static inline unsigned short __builtin_bswap16(unsigned short a) #define ENDSWAP_16(x) (__builtin_bswap16 (x)) #define ENDSWAP_32(x) (__builtin_bswap32 (x)) +#define ENDSWAP_64(x) (__builtin_bswap64 (x)) #elif defined _MSC_VER /* Windows. Apparently in . */ #define ENDSWAP_16(x) (_byteswap_ushort (x)) #define ENDSWAP_32(x) (_byteswap_ulong (x)) +#define ENDSWAP_64(x) (_byteswap_uint64 (x)) #elif defined HAVE_BYTESWAP_H /* Linux */ @@ -55,16 +57,18 @@ static inline unsigned short __builtin_bswap16(unsigned short a) #define ENDSWAP_16(x) (bswap_16 (x)) #define ENDSWAP_32(x) (bswap_32 (x)) +#define ENDSWAP_64(x) (bswap_64 (x)) #else #define ENDSWAP_16(x) ((((x) >> 8) & 0xFF) | (((x) & 0xFF) << 8)) #define ENDSWAP_32(x) ((((x) >> 24) & 0xFF) | (((x) >> 8) & 0xFF00) | (((x) & 0xFF00) << 8) | (((x) & 0xFF) << 24)) +#define ENDSWAP_64(x) ((ENDSWAP_32(((x) >> 32) & 0xFFFFFFFF)) | (ENDSWAP_32((x) & 0xFFFFFFFF) << 32)) #endif -/* Host to little-endian byte swapping. */ +/* Host to little-endian byte swapping (for MD5 calculation) */ #if CPU_IS_BIG_ENDIAN #define H2LE_16(x) ENDSWAP_16 (x) diff --git a/src/libFLAC/bitreader.c b/src/libFLAC/bitreader.c index 67d0f371..3559bf12 100644 --- a/src/libFLAC/bitreader.c +++ b/src/libFLAC/bitreader.c @@ -45,17 +45,42 @@ #include "share/endswap.h" /* Things should be fastest when this matches the machine word size */ -/* WATCHOUT: if you change this you must also change the following #defines down to FLAC__clz_uint32 below to match */ -/* WATCHOUT: there are a few places where the code will not work unless uint32_t is >= 32 bits wide */ +/* WATCHOUT: if you change this you must also change the following #defines down to COUNT_ZERO_MSBS2 below to match */ +/* WATCHOUT: there are a few places where the code will not work unless brword is >= 32 bits wide */ /* also, some sections currently only have fast versions for 4 or 8 bytes per word */ -#define FLAC__BYTES_PER_WORD 4 /* sizeof uint32_t */ -#define FLAC__BITS_PER_WORD (8 * FLAC__BYTES_PER_WORD) + +#if 1 + +typedef FLAC__uint32 brword; +#define FLAC__BYTES_PER_WORD 4 /* sizeof brword */ +#define FLAC__BITS_PER_WORD 32 #define FLAC__WORD_ALL_ONES ((FLAC__uint32)0xffffffff) -/* SWAP_BE_WORD_TO_HOST swaps bytes in a uint32_t (which is always big-endian) if necessary to match host byte order */ +/* SWAP_BE_WORD_TO_HOST swaps bytes in a brword (which is always big-endian) if necessary to match host byte order */ #if WORDS_BIGENDIAN #define SWAP_BE_WORD_TO_HOST(x) (x) #else #define SWAP_BE_WORD_TO_HOST(x) ENDSWAP_32(x) +#endif +/* counts the # of zero MSBs in a word */ +#define COUNT_ZERO_MSBS(word) FLAC__clz_uint32(word) +#define COUNT_ZERO_MSBS2(word) FLAC__clz2_uint32(word) + +#else + +typedef FLAC__uint64 brword; +#define FLAC__BYTES_PER_WORD 8 /* sizeof brword */ +#define FLAC__BITS_PER_WORD 64 +#define FLAC__WORD_ALL_ONES ((FLAC__uint64)FLAC__U64L(0xffffffffffffffff)) +/* SWAP_BE_WORD_TO_HOST swaps bytes in a brword (which is always big-endian) if necessary to match host byte order */ +#if WORDS_BIGENDIAN +#define SWAP_BE_WORD_TO_HOST(x) (x) +#else +#define SWAP_BE_WORD_TO_HOST(x) ENDSWAP_64(x) +#endif +/* counts the # of zero MSBs in a word */ +#define COUNT_ZERO_MSBS(word) FLAC__clz_uint64(word) +#define COUNT_ZERO_MSBS2(word) FLAC__clz2_uint64(word) + #endif /* @@ -77,7 +102,7 @@ static const unsigned FLAC__BITREADER_DEFAULT_CAPACITY = 65536u / FLAC__BITS_PER struct FLAC__BitReader { /* any partially-consumed word at the head will stay right-justified as bits are consumed from the left */ /* any incomplete word at the tail will be left-justified, and bytes from the read callback are added on the right */ - uint32_t *buffer; + brword *buffer; unsigned capacity; /* in words */ unsigned words; /* # of completed words in buffer */ unsigned bytes; /* # of bytes in incomplete word at buffer[words] */ @@ -89,7 +114,7 @@ struct FLAC__BitReader { void *client_data; }; -static inline void crc16_update_word_(FLAC__BitReader *br, uint32_t word) +static inline void crc16_update_word_(FLAC__BitReader *br, brword word) { register unsigned crc = br->read_crc16; #if FLAC__BYTES_PER_WORD == 4 @@ -142,7 +167,7 @@ static FLAC__bool bitreader_read_from_client_(FLAC__BitReader *br) return false; /* no space left, buffer is too small; see note for FLAC__BITREADER_DEFAULT_CAPACITY */ target = ((FLAC__byte*)(br->buffer+br->words)) + br->bytes; - /* before reading, if the existing reader looks like this (say uint32_t is 32 bits wide) + /* before reading, if the existing reader looks like this (say brword is 32 bits wide) * bitstream : 11 22 33 44 55 br->words=1 br->bytes=1 (partial tail word is left-justified) * buffer[BE]: 11 22 33 44 55 ?? ?? ?? (shown layed out as bytes sequentially in memory) * buffer[LE]: 44 33 22 11 ?? ?? ?? 55 (?? being don't-care) @@ -175,7 +200,7 @@ static FLAC__bool bitreader_read_from_client_(FLAC__BitReader *br) */ #if WORDS_BIGENDIAN #else - end = (br->words*FLAC__BYTES_PER_WORD + br->bytes + bytes + (FLAC__BYTES_PER_WORD-1)) / FLAC__BYTES_PER_WORD; + end = (br->words*FLAC__BYTES_PER_WORD + br->bytes + (unsigned)bytes + (FLAC__BYTES_PER_WORD-1)) / FLAC__BYTES_PER_WORD; for(start = br->words; start < end; start++) br->buffer[start] = SWAP_BE_WORD_TO_HOST(br->buffer[start]); #endif @@ -186,7 +211,7 @@ static FLAC__bool bitreader_read_from_client_(FLAC__BitReader *br) * buffer[LE]: 44 33 22 11 88 77 66 55 CC BB AA 99 ?? FF EE DD * finally we'll update the reader values: */ - end = br->words*FLAC__BYTES_PER_WORD + br->bytes + bytes; + end = br->words*FLAC__BYTES_PER_WORD + br->bytes + (unsigned)bytes; br->words = end / FLAC__BYTES_PER_WORD; br->bytes = end % FLAC__BYTES_PER_WORD; @@ -236,7 +261,7 @@ FLAC__bool FLAC__bitreader_init(FLAC__BitReader *br, FLAC__BitReaderReadCallback br->words = br->bytes = 0; br->consumed_words = br->consumed_bits = 0; br->capacity = FLAC__BITREADER_DEFAULT_CAPACITY; - br->buffer = malloc(sizeof(uint32_t) * br->capacity); + br->buffer = malloc(sizeof(brword) * br->capacity); if(br->buffer == 0) return false; br->read_callback = rcb; @@ -281,7 +306,7 @@ void FLAC__bitreader_dump(const FLAC__BitReader *br, FILE *out) if(i < br->consumed_words || (i == br->consumed_words && j < br->consumed_bits)) fprintf(out, "."); else - fprintf(out, "%01u", br->buffer[i] & (1 << (FLAC__BITS_PER_WORD-j-1)) ? 1:0); + fprintf(out, "%01u", br->buffer[i] & ((brword)1 << (FLAC__BITS_PER_WORD-j-1)) ? 1:0); fprintf(out, "\n"); } if(br->bytes > 0) { @@ -290,7 +315,7 @@ void FLAC__bitreader_dump(const FLAC__BitReader *br, FILE *out) if(i < br->consumed_words || (i == br->consumed_words && j < br->consumed_bits)) fprintf(out, "."); else - fprintf(out, "%01u", br->buffer[i] & (1 << (br->bytes*8-j-1)) ? 1:0); + fprintf(out, "%01u", br->buffer[i] & ((brword)1 << (br->bytes*8-j-1)) ? 1:0); fprintf(out, "\n"); } } @@ -315,7 +340,7 @@ FLAC__uint16 FLAC__bitreader_get_read_crc16(FLAC__BitReader *br) /* CRC any tail bytes in a partially-consumed word */ if(br->consumed_bits) { - const uint32_t tail = br->buffer[br->consumed_words]; + const brword tail = br->buffer[br->consumed_words]; for( ; br->crc16_align < br->consumed_bits; br->crc16_align += 8) br->read_crc16 = FLAC__CRC16_UPDATE((unsigned)((tail >> (FLAC__BITS_PER_WORD-8-br->crc16_align)) & 0xff), br->read_crc16); } @@ -363,33 +388,34 @@ FLAC__bool FLAC__bitreader_read_raw_uint32(FLAC__BitReader *br, FLAC__uint32 *va if(br->consumed_bits) { /* this also works when consumed_bits==0, it's just a little slower than necessary for that case */ const unsigned n = FLAC__BITS_PER_WORD - br->consumed_bits; - const uint32_t word = br->buffer[br->consumed_words]; + const brword word = br->buffer[br->consumed_words]; if(bits < n) { - *val = (word & (FLAC__WORD_ALL_ONES >> br->consumed_bits)) >> (n-bits); + *val = (FLAC__uint32)((word & (FLAC__WORD_ALL_ONES >> br->consumed_bits)) >> (n-bits)); /* The result has <= 32 non-zero bits */ br->consumed_bits += bits; return true; } - *val = word & (FLAC__WORD_ALL_ONES >> br->consumed_bits); + /* (FLAC__BITS_PER_WORD - br->consumed_bits <= bits) ==> (FLAC__WORD_ALL_ONES >> br->consumed_bits) has no more than 'bits' non-zero bits */ + *val = (FLAC__uint32)(word & (FLAC__WORD_ALL_ONES >> br->consumed_bits)); bits -= n; crc16_update_word_(br, word); br->consumed_words++; br->consumed_bits = 0; if(bits) { /* if there are still bits left to read, there have to be less than 32 so they will all be in the next word */ *val <<= bits; - *val |= (br->buffer[br->consumed_words] >> (FLAC__BITS_PER_WORD-bits)); + *val |= (FLAC__uint32)(br->buffer[br->consumed_words] >> (FLAC__BITS_PER_WORD-bits)); br->consumed_bits = bits; } return true; } - else { - const uint32_t word = br->buffer[br->consumed_words]; + else { /* br->consumed_bits == 0 */ + const brword word = br->buffer[br->consumed_words]; if(bits < FLAC__BITS_PER_WORD) { - *val = word >> (FLAC__BITS_PER_WORD-bits); + *val = (FLAC__uint32)(word >> (FLAC__BITS_PER_WORD-bits)); br->consumed_bits = bits; return true; } - /* at this point 'bits' must be == FLAC__BITS_PER_WORD; because of previous assertions, it can't be larger */ - *val = word; + /* at this point bits == FLAC__BITS_PER_WORD == 32; because of previous assertions, it can't be larger */ + *val = (FLAC__uint32)word; crc16_update_word_(br, word); br->consumed_words++; return true; @@ -404,12 +430,12 @@ FLAC__bool FLAC__bitreader_read_raw_uint32(FLAC__BitReader *br, FLAC__uint32 *va if(br->consumed_bits) { /* this also works when consumed_bits==0, it's just a little slower than necessary for that case */ FLAC__ASSERT(br->consumed_bits + bits <= br->bytes*8); - *val = (br->buffer[br->consumed_words] & (FLAC__WORD_ALL_ONES >> br->consumed_bits)) >> (FLAC__BITS_PER_WORD-br->consumed_bits-bits); + *val = (FLAC__uint32)((br->buffer[br->consumed_words] & (FLAC__WORD_ALL_ONES >> br->consumed_bits)) >> (FLAC__BITS_PER_WORD-br->consumed_bits-bits)); br->consumed_bits += bits; return true; } else { - *val = br->buffer[br->consumed_words] >> (FLAC__BITS_PER_WORD-bits); + *val = (FLAC__uint32)(br->buffer[br->consumed_words] >> (FLAC__BITS_PER_WORD-bits)); br->consumed_bits += bits; return true; } @@ -565,7 +591,7 @@ FLAC__bool FLAC__bitreader_read_byte_block_aligned_no_crc(FLAC__BitReader *br, F /* step 2: read whole words in chunks */ while(nvals >= FLAC__BYTES_PER_WORD) { if(br->consumed_words < br->words) { - const uint32_t word = br->buffer[br->consumed_words++]; + const brword word = br->buffer[br->consumed_words++]; #if FLAC__BYTES_PER_WORD == 4 val[0] = (FLAC__byte)(word >> 24); val[1] = (FLAC__byte)(word >> 16); @@ -630,9 +656,9 @@ FLAC__bool FLAC__bitreader_read_unary_unsigned(FLAC__BitReader *br, unsigned *va *val = 0; while(1) { while(br->consumed_words < br->words) { /* if we've not consumed up to a partial tail word... */ - uint32_t b = br->buffer[br->consumed_words] << br->consumed_bits; + brword b = br->buffer[br->consumed_words] << br->consumed_bits; if(b) { - i = FLAC__clz_uint32(b); + i = COUNT_ZERO_MSBS(b); *val += i; i++; br->consumed_bits += i; @@ -660,9 +686,9 @@ FLAC__bool FLAC__bitreader_read_unary_unsigned(FLAC__BitReader *br, unsigned *va */ if(br->bytes*8 > br->consumed_bits) { const unsigned end = br->bytes * 8; - uint32_t b = (br->buffer[br->consumed_words] & (FLAC__WORD_ALL_ONES << (FLAC__BITS_PER_WORD-end))) << br->consumed_bits; + brword b = (br->buffer[br->consumed_words] & (FLAC__WORD_ALL_ONES << (FLAC__BITS_PER_WORD-end))) << br->consumed_bits; if(b) { - i = FLAC__clz_uint32(b); + i = COUNT_ZERO_MSBS(b); *val += i; i++; br->consumed_bits += i; @@ -717,7 +743,7 @@ FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[ * bitreader functions that use them, and before returning */ unsigned cwords, words, lsbs, msbs, x, y; unsigned ucbits; /* keep track of the number of unconsumed bits in word */ - uint32_t b; + brword b; int *val, *end; FLAC__ASSERT(0 != br); @@ -758,7 +784,7 @@ FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[ while(val < end) { /* read the unary MSBs and end bit */ - x = y = FLAC__clz2_uint32(b); + x = y = COUNT_ZERO_MSBS2(b); if(x == FLAC__BITS_PER_WORD) { x = ucbits; do { @@ -767,7 +793,7 @@ FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[ if (cwords >= words) goto incomplete_msbs; b = br->buffer[cwords]; - y = FLAC__clz2_uint32(b); + y = COUNT_ZERO_MSBS2(b); x += y; } while(y == FLAC__BITS_PER_WORD); } @@ -777,7 +803,7 @@ FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[ msbs = x; /* read the binary LSBs */ - x = b >> (FLAC__BITS_PER_WORD - parameter); + x = (FLAC__uint32)(b >> (FLAC__BITS_PER_WORD - parameter)); /* parameter < 32, so we can cast to 32-bit unsigned */ if(parameter <= ucbits) { ucbits -= parameter; b <<= parameter; @@ -788,7 +814,7 @@ FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[ goto incomplete_lsbs; b = br->buffer[cwords]; ucbits += FLAC__BITS_PER_WORD - parameter; - x |= b >> ucbits; + x |= (FLAC__uint32)(b >> ucbits); b <<= FLAC__BITS_PER_WORD - ucbits; } lsbs = x; diff --git a/src/libFLAC/bitwriter.c b/src/libFLAC/bitwriter.c index 5582efc8..742fba36 100644 --- a/src/libFLAC/bitwriter.c +++ b/src/libFLAC/bitwriter.c @@ -46,33 +46,52 @@ /* Things should be fastest when this matches the machine word size */ /* WATCHOUT: if you change this you must also change the following #defines down to SWAP_BE_WORD_TO_HOST below to match */ -/* WATCHOUT: there are a few places where the code will not work unless uint32_t is >= 32 bits wide */ -#define FLAC__BYTES_PER_WORD 4 +/* WATCHOUT: there are a few places where the code will not work unless bwword is >= 32 bits wide */ + +#if 1 + +typedef FLAC__uint32 bwword; +#define FLAC__BYTES_PER_WORD 4 /* sizeof bwword */ #define FLAC__BITS_PER_WORD 32 #define FLAC__WORD_ALL_ONES ((FLAC__uint32)0xffffffff) -/* SWAP_BE_WORD_TO_HOST swaps bytes in a uint32_t (which is always big-endian) if necessary to match host byte order */ +/* SWAP_BE_WORD_TO_HOST swaps bytes in a bwword (which is always big-endian) if necessary to match host byte order */ #if WORDS_BIGENDIAN #define SWAP_BE_WORD_TO_HOST(x) (x) #else #define SWAP_BE_WORD_TO_HOST(x) ENDSWAP_32(x) #endif +#else + +typedef FLAC__uint64 bwword; +#define FLAC__BYTES_PER_WORD 8 /* sizeof bwword */ +#define FLAC__BITS_PER_WORD 64 +#define FLAC__WORD_ALL_ONES ((FLAC__uint64)FLAC__U64L(0xffffffffffffffff)) +/* SWAP_BE_WORD_TO_HOST swaps bytes in a bwword (which is always big-endian) if necessary to match host byte order */ +#if WORDS_BIGENDIAN +#define SWAP_BE_WORD_TO_HOST(x) (x) +#else +#define SWAP_BE_WORD_TO_HOST(x) ENDSWAP_64(x) +#endif + +#endif + /* * The default capacity here doesn't matter too much. The buffer always grows * to hold whatever is written to it. Usually the encoder will stop adding at * a frame or metadata block, then write that out and clear the buffer for the * next one. */ -static const unsigned FLAC__BITWRITER_DEFAULT_CAPACITY = 32768u / sizeof(uint32_t); /* size in words */ +static const unsigned FLAC__BITWRITER_DEFAULT_CAPACITY = 32768u / sizeof(bwword); /* size in words */ /* When growing, increment 4K at a time */ -static const unsigned FLAC__BITWRITER_DEFAULT_INCREMENT = 4096u / sizeof(uint32_t); /* size in words */ +static const unsigned FLAC__BITWRITER_DEFAULT_INCREMENT = 4096u / sizeof(bwword); /* size in words */ #define FLAC__WORDS_TO_BITS(words) ((words) * FLAC__BITS_PER_WORD) #define FLAC__TOTAL_BITS(bw) (FLAC__WORDS_TO_BITS((bw)->words) + (bw)->bits) struct FLAC__BitWriter { - uint32_t *buffer; - uint32_t accum; /* accumulator; bits are right-justified; when full, accum is appended to buffer */ + bwword *buffer; + bwword accum; /* accumulator; bits are right-justified; when full, accum is appended to buffer */ unsigned capacity; /* capacity of buffer in words */ unsigned words; /* # of complete words in buffer */ unsigned bits; /* # of used bits in accum */ @@ -85,7 +104,7 @@ static FLAC__bool bitwriter_grow_(FLAC__BitWriter *bw, unsigned bits_to_add) { unsigned new_capacity; - uint32_t *new_buffer; + bwword *new_buffer; FLAC__ASSERT(0 != bw); FLAC__ASSERT(0 != bw->buffer); @@ -107,7 +126,7 @@ FLAC__bool bitwriter_grow_(FLAC__BitWriter *bw, unsigned bits_to_add) FLAC__ASSERT(new_capacity > bw->capacity); FLAC__ASSERT(new_capacity >= bw->words + ((bw->bits + bits_to_add + FLAC__BITS_PER_WORD - 1) / FLAC__BITS_PER_WORD)); - new_buffer = safe_realloc_mul_2op_(bw->buffer, sizeof(uint32_t), /*times*/new_capacity); + new_buffer = safe_realloc_mul_2op_(bw->buffer, sizeof(bwword), /*times*/new_capacity); if(new_buffer == 0) return false; bw->buffer = new_buffer; @@ -149,7 +168,7 @@ FLAC__bool FLAC__bitwriter_init(FLAC__BitWriter *bw) bw->words = bw->bits = 0; bw->capacity = FLAC__BITWRITER_DEFAULT_CAPACITY; - bw->buffer = malloc(sizeof(uint32_t) * bw->capacity); + bw->buffer = malloc(sizeof(bwword) * bw->capacity); if(bw->buffer == 0) return false; @@ -184,13 +203,13 @@ void FLAC__bitwriter_dump(const FLAC__BitWriter *bw, FILE *out) for(i = 0; i < bw->words; i++) { fprintf(out, "%08X: ", i); for(j = 0; j < FLAC__BITS_PER_WORD; j++) - fprintf(out, "%01u", bw->buffer[i] & (1u << (FLAC__BITS_PER_WORD-j-1)) ? 1:0); + fprintf(out, "%01u", bw->buffer[i] & ((bwword)1 << (FLAC__BITS_PER_WORD-j-1)) ? 1:0); fprintf(out, "\n"); } if(bw->bits > 0) { fprintf(out, "%08X: ", i); for(j = 0; j < bw->bits; j++) - fprintf(out, "%01u", bw->accum & (1 << (bw->bits-j-1)) ? 1:0); + fprintf(out, "%01u", bw->accum & ((bwword)1 << (bw->bits-j-1)) ? 1:0); fprintf(out, "\n"); } } @@ -337,7 +356,7 @@ inline FLAC__bool FLAC__bitwriter_write_raw_uint32(FLAC__BitWriter *bw, FLAC__ui bw->accum = val; } else { /* at this point bits == FLAC__BITS_PER_WORD == 32 and bw->bits == 0 */ - bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(val); + bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST((bwword)val); } return true; @@ -533,8 +552,8 @@ FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter *bw, const FL msbits = uval >> parameter; total_bits = lsbits + msbits; - if(bw->bits && bw->bits + total_bits < FLAC__BITS_PER_WORD) { /* i.e. if the whole thing fits in the current uint32_t */ - /* ^^^ if bw->bits is 0 then we may have filled the buffer and have no free uint32_t to work in */ + if(bw->bits && bw->bits + total_bits < FLAC__BITS_PER_WORD) { /* i.e. if the whole thing fits in the current bwword */ + /* ^^^ if bw->bits is 0 then we may have filled the buffer and have no free bwword to work in */ bw->bits += total_bits; uval |= mask1; /* set stop bit */ uval &= mask2; /* mask off unused top bits */ @@ -544,7 +563,7 @@ FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter *bw, const FL else { /* slightly pessimistic size check but faster than "<= bw->words + (bw->bits+msbits+lsbits+FLAC__BITS_PER_WORD-1)/FLAC__BITS_PER_WORD" */ /* OPT: pessimism may cause flurry of false calls to grow_ which eat up all savings before it */ - if(bw->capacity <= bw->words + bw->bits + msbits + 1 /* lsbits always fit in 1 uint32_t */ && !bitwriter_grow_(bw, total_bits)) + if(bw->capacity <= bw->words + bw->bits + msbits + 1 /* lsbits always fit in 1 bwword */ && !bitwriter_grow_(bw, total_bits)) return false; if(msbits) { diff --git a/src/libFLAC/include/private/bitmath.h b/src/libFLAC/include/private/bitmath.h index 220b8dcc..bdd154a9 100644 --- a/src/libFLAC/include/private/bitmath.h +++ b/src/libFLAC/include/private/bitmath.h @@ -91,7 +91,31 @@ static inline unsigned int FLAC__clz_uint32(FLAC__uint32 v) #endif } -/* This one works with input 0 */ +/* Used when 64-bit bsr/clz is unavailable; can use 32-bit bsr/clz when possible */ +static inline unsigned int FLAC__clz_soft_uint64(FLAC__uint64 word) +{ + return (FLAC__uint32)(word>>32) ? FLAC__clz_uint32((FLAC__uint32)(word>>32)) : + FLAC__clz_uint32((FLAC__uint32)word) + 32; +} + +static inline unsigned int FLAC__clz_uint64(FLAC__uint64 v) +{ + /* Never used with input 0 */ + FLAC__ASSERT(v > 0); +#if defined(__GNUC__) && (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) + return __builtin_clzll(v); +#elif (defined(__INTEL_COMPILER) || defined(_MSC_VER)) && (defined(_M_IA64) || defined(_M_X64)) + { + unsigned long idx; + _BitScanReverse64(&idx, v); + return idx ^ 63U; + } +#else + return FLAC__clz_soft_uint64(v); +#endif +} + +/* These two functions work with input 0 */ static inline unsigned int FLAC__clz2_uint32(FLAC__uint32 v) { if (!v) @@ -99,6 +123,13 @@ static inline unsigned int FLAC__clz2_uint32(FLAC__uint32 v) return FLAC__clz_uint32(v); } +static inline unsigned int FLAC__clz2_uint64(FLAC__uint64 v) +{ + if (!v) + return 64; + return FLAC__clz_uint64(v); +} + /* An example of what FLAC__bitmath_ilog2() computes: * * ilog2( 0) = assertion failure diff --git a/src/test_libFLAC/bitwriter.c b/src/test_libFLAC/bitwriter.c index a652b301..d8f744c9 100644 --- a/src/test_libFLAC/bitwriter.c +++ b/src/test_libFLAC/bitwriter.c @@ -33,8 +33,21 @@ * the definition here to get at the internals. Make sure this is kept up * to date with what is in ../libFLAC/bitwriter.c */ +#if 1 + typedef FLAC__uint32 bwword; +#define FLAC__BYTES_PER_WORD 4 #define FLAC__BITS_PER_WORD 32 +#define PRI_BWWORD "08x" + +#else + +typedef FLAC__uint64 bwword; +#define FLAC__BYTES_PER_WORD 8 +#define FLAC__BITS_PER_WORD 64 +#define PRI_BWWORD "016" PRIx64 + +#endif struct FLAC__BitWriter { bwword *buffer; @@ -53,10 +66,20 @@ FLAC__bool test_bitwriter(void) FLAC__BitWriter *bw; FLAC__bool ok; unsigned i, j; +#if FLAC__BYTES_PER_WORD == 4 #if WORDS_BIGENDIAN static bwword test_pattern1[5] = { 0xaaf0aabe, 0xaaaaaaa8, 0x300aaaaa, 0xaaadeadb, 0x00eeface }; #else static bwword test_pattern1[5] = { 0xbeaaf0aa, 0xa8aaaaaa, 0xaaaa0a30, 0xdbeaadaa, 0x00eeface }; +#endif +#elif FLAC__BYTES_PER_WORD == 8 +#if WORDS_BIGENDIAN + static bwword test_pattern1[3] = { FLAC__U64L(0xaaf0aabeaaaaaaa8), FLAC__U64L(0x300aaaaaaaadeadb), FLAC__U64L(0x0000000000eeface) }; +#else + static bwword test_pattern1[3] = { FLAC__U64L(0xa8aaaaaabeaaf0aa), FLAC__U64L(0xdbeaadaaaaaa0a30), FLAC__U64L(0x0000000000eeface) }; +#endif +#else +#error FLAC__BYTES_PER_WORD is neither 4 nor 8 -- not implemented #endif unsigned words, bits; /* what we think bw->words and bw->bits should be */ @@ -196,7 +219,7 @@ FLAC__bool test_bitwriter(void) return false; } if((bw->accum & 0x00ffffff) != test_pattern1[words]) { - printf("FAILED pattern match (bw->accum=%08X != %08X)\n", bw->accum&0x00ffffff, test_pattern1[words]); + printf("FAILED pattern match (bw->accum=%" PRI_BWWORD " != %" PRI_BWWORD ")\n", bw->accum&0x00ffffff, test_pattern1[words]); FLAC__bitwriter_dump(bw, stdout); return false; } @@ -229,7 +252,7 @@ FLAC__bool test_bitwriter(void) return false; } if((bw->accum & 0x3fffffff) != test_pattern1[words]) { - printf("FAILED pattern match (bw->accum=%08X != %08X)\n", bw->accum&0x3fffffff, test_pattern1[words]); + printf("FAILED pattern match (bw->accum=%" PRI_BWWORD " != %" PRI_BWWORD ")\n", bw->accum&0x3fffffff, test_pattern1[words]); FLAC__bitwriter_dump(bw, stdout); return false; } @@ -299,10 +322,14 @@ FLAC__bool test_bitwriter(void) printf("testing utf8_uint32(0x00010000)... "); FLAC__bitwriter_clear(bw); FLAC__bitwriter_write_utf8_uint32(bw, 0x00010000); +#if FLAC__BYTES_PER_WORD == 4 #if WORDS_BIGENDIAN ok = TOTAL_BITS(bw) == 32 && bw->buffer[0] == 0xF0908080; #else ok = TOTAL_BITS(bw) == 32 && bw->buffer[0] == 0x808090F0; +#endif +#elif FLAC__BYTES_PER_WORD == 8 + ok = TOTAL_BITS(bw) == 32 && (bw->accum & 0xffffffff) == 0xF0908080; #endif printf("%s\n", ok?"OK":"FAILED"); if(!ok) { @@ -313,10 +340,14 @@ FLAC__bool test_bitwriter(void) printf("testing utf8_uint32(0x001FFFFF)... "); FLAC__bitwriter_clear(bw); FLAC__bitwriter_write_utf8_uint32(bw, 0x001FFFFF); +#if FLAC__BYTES_PER_WORD == 4 #if WORDS_BIGENDIAN ok = TOTAL_BITS(bw) == 32 && bw->buffer[0] == 0xF7BFBFBF; #else ok = TOTAL_BITS(bw) == 32 && bw->buffer[0] == 0xBFBFBFF7; +#endif +#elif FLAC__BYTES_PER_WORD == 8 + ok = TOTAL_BITS(bw) == 32 && (bw->accum & 0xffffffff) == 0xF7BFBFBF; #endif printf("%s\n", ok?"OK":"FAILED"); if(!ok) { @@ -327,10 +358,14 @@ FLAC__bool test_bitwriter(void) printf("testing utf8_uint32(0x00200000)... "); FLAC__bitwriter_clear(bw); FLAC__bitwriter_write_utf8_uint32(bw, 0x00200000); +#if FLAC__BYTES_PER_WORD == 4 #if WORDS_BIGENDIAN ok = TOTAL_BITS(bw) == 40 && bw->buffer[0] == 0xF8888080 && (bw->accum & 0xff) == 0x80; #else ok = TOTAL_BITS(bw) == 40 && bw->buffer[0] == 0x808088F8 && (bw->accum & 0xff) == 0x80; +#endif +#elif FLAC__BYTES_PER_WORD == 8 + ok = TOTAL_BITS(bw) == 40 && (bw->accum & FLAC__U64L(0xffffffffff)) == FLAC__U64L(0xF888808080); #endif printf("%s\n", ok?"OK":"FAILED"); if(!ok) { @@ -341,10 +376,14 @@ FLAC__bool test_bitwriter(void) printf("testing utf8_uint32(0x03FFFFFF)... "); FLAC__bitwriter_clear(bw); FLAC__bitwriter_write_utf8_uint32(bw, 0x03FFFFFF); +#if FLAC__BYTES_PER_WORD == 4 #if WORDS_BIGENDIAN ok = TOTAL_BITS(bw) == 40 && bw->buffer[0] == 0xFBBFBFBF && (bw->accum & 0xff) == 0xBF; #else ok = TOTAL_BITS(bw) == 40 && bw->buffer[0] == 0xBFBFBFFB && (bw->accum & 0xff) == 0xBF; +#endif +#elif FLAC__BYTES_PER_WORD == 8 + ok = TOTAL_BITS(bw) == 40 && (bw->accum & FLAC__U64L(0xffffffffff)) == FLAC__U64L(0xFBBFBFBFBF); #endif printf("%s\n", ok?"OK":"FAILED"); if(!ok) { @@ -355,10 +394,14 @@ FLAC__bool test_bitwriter(void) printf("testing utf8_uint32(0x04000000)... "); FLAC__bitwriter_clear(bw); FLAC__bitwriter_write_utf8_uint32(bw, 0x04000000); +#if FLAC__BYTES_PER_WORD == 4 #if WORDS_BIGENDIAN ok = TOTAL_BITS(bw) == 48 && bw->buffer[0] == 0xFC848080 && (bw->accum & 0xffff) == 0x8080; #else ok = TOTAL_BITS(bw) == 48 && bw->buffer[0] == 0x808084FC && (bw->accum & 0xffff) == 0x8080; +#endif +#elif FLAC__BYTES_PER_WORD == 8 + ok = TOTAL_BITS(bw) == 48 && (bw->accum & FLAC__U64L(0xffffffffffff)) == FLAC__U64L(0xFC8480808080); #endif printf("%s\n", ok?"OK":"FAILED"); if(!ok) { @@ -369,10 +412,14 @@ FLAC__bool test_bitwriter(void) printf("testing utf8_uint32(0x7FFFFFFF)... "); FLAC__bitwriter_clear(bw); FLAC__bitwriter_write_utf8_uint32(bw, 0x7FFFFFFF); +#if FLAC__BYTES_PER_WORD == 4 #if WORDS_BIGENDIAN ok = TOTAL_BITS(bw) == 48 && bw->buffer[0] == 0xFDBFBFBF && (bw->accum & 0xffff) == 0xBFBF; #else ok = TOTAL_BITS(bw) == 48 && bw->buffer[0] == 0xBFBFBFFD && (bw->accum & 0xffff) == 0xBFBF; +#endif +#elif FLAC__BYTES_PER_WORD == 8 + ok = TOTAL_BITS(bw) == 48 && (bw->accum & FLAC__U64L(0xffffffffffff)) == FLAC__U64L(0xFDBFBFBFBFBF); #endif printf("%s\n", ok?"OK":"FAILED"); if(!ok) { @@ -443,10 +490,14 @@ FLAC__bool test_bitwriter(void) printf("testing utf8_uint64(0x0000000000010000)... "); FLAC__bitwriter_clear(bw); FLAC__bitwriter_write_utf8_uint64(bw, FLAC__U64L(0x0000000000010000)); +#if FLAC__BYTES_PER_WORD == 4 #if WORDS_BIGENDIAN ok = TOTAL_BITS(bw) == 32 && bw->buffer[0] == 0xF0908080; #else ok = TOTAL_BITS(bw) == 32 && bw->buffer[0] == 0x808090F0; +#endif +#elif FLAC__BYTES_PER_WORD == 8 + ok = TOTAL_BITS(bw) == 32 && (bw->accum & 0xffffffff) == 0xF0908080; #endif printf("%s\n", ok?"OK":"FAILED"); if(!ok) { @@ -457,10 +508,14 @@ FLAC__bool test_bitwriter(void) printf("testing utf8_uint64(0x00000000001FFFFF)... "); FLAC__bitwriter_clear(bw); FLAC__bitwriter_write_utf8_uint64(bw, FLAC__U64L(0x00000000001FFFFF)); +#if FLAC__BYTES_PER_WORD == 4 #if WORDS_BIGENDIAN ok = TOTAL_BITS(bw) == 32 && bw->buffer[0] == 0xF7BFBFBF; #else ok = TOTAL_BITS(bw) == 32 && bw->buffer[0] == 0xBFBFBFF7; +#endif +#elif FLAC__BYTES_PER_WORD == 8 + ok = TOTAL_BITS(bw) == 32 && (bw->accum & 0xffffffff) == 0xF7BFBFBF; #endif printf("%s\n", ok?"OK":"FAILED"); if(!ok) { @@ -471,10 +526,14 @@ FLAC__bool test_bitwriter(void) printf("testing utf8_uint64(0x0000000000200000)... "); FLAC__bitwriter_clear(bw); FLAC__bitwriter_write_utf8_uint64(bw, FLAC__U64L(0x0000000000200000)); +#if FLAC__BYTES_PER_WORD == 4 #if WORDS_BIGENDIAN ok = TOTAL_BITS(bw) == 40 && bw->buffer[0] == 0xF8888080 && (bw->accum & 0xff) == 0x80; #else ok = TOTAL_BITS(bw) == 40 && bw->buffer[0] == 0x808088F8 && (bw->accum & 0xff) == 0x80; +#endif +#elif FLAC__BYTES_PER_WORD == 8 + ok = TOTAL_BITS(bw) == 40 && (bw->accum & FLAC__U64L(0xffffffffff)) == FLAC__U64L(0xF888808080); #endif printf("%s\n", ok?"OK":"FAILED"); if(!ok) { @@ -485,10 +544,14 @@ FLAC__bool test_bitwriter(void) printf("testing utf8_uint64(0x0000000003FFFFFF)... "); FLAC__bitwriter_clear(bw); FLAC__bitwriter_write_utf8_uint64(bw, FLAC__U64L(0x0000000003FFFFFF)); +#if FLAC__BYTES_PER_WORD == 4 #if WORDS_BIGENDIAN ok = TOTAL_BITS(bw) == 40 && bw->buffer[0] == 0xFBBFBFBF && (bw->accum & 0xff) == 0xBF; #else ok = TOTAL_BITS(bw) == 40 && bw->buffer[0] == 0xBFBFBFFB && (bw->accum & 0xff) == 0xBF; +#endif +#elif FLAC__BYTES_PER_WORD == 8 + ok = TOTAL_BITS(bw) == 40 && (bw->accum & FLAC__U64L(0xffffffffff)) == FLAC__U64L(0xFBBFBFBFBF); #endif printf("%s\n", ok?"OK":"FAILED"); if(!ok) { @@ -499,10 +562,14 @@ FLAC__bool test_bitwriter(void) printf("testing utf8_uint64(0x0000000004000000)... "); FLAC__bitwriter_clear(bw); FLAC__bitwriter_write_utf8_uint64(bw, FLAC__U64L(0x0000000004000000)); +#if FLAC__BYTES_PER_WORD == 4 #if WORDS_BIGENDIAN ok = TOTAL_BITS(bw) == 48 && bw->buffer[0] == 0xFC848080 && (bw->accum & 0xffff) == 0x8080; #else ok = TOTAL_BITS(bw) == 48 && bw->buffer[0] == 0x808084FC && (bw->accum & 0xffff) == 0x8080; +#endif +#elif FLAC__BYTES_PER_WORD == 8 + ok = TOTAL_BITS(bw) == 48 && (bw->accum & FLAC__U64L(0xffffffffffff)) == FLAC__U64L(0xFC8480808080); #endif printf("%s\n", ok?"OK":"FAILED"); if(!ok) { @@ -513,10 +580,14 @@ FLAC__bool test_bitwriter(void) printf("testing utf8_uint64(0x000000007FFFFFFF)... "); FLAC__bitwriter_clear(bw); FLAC__bitwriter_write_utf8_uint64(bw, FLAC__U64L(0x000000007FFFFFFF)); +#if FLAC__BYTES_PER_WORD == 4 #if WORDS_BIGENDIAN ok = TOTAL_BITS(bw) == 48 && bw->buffer[0] == 0xFDBFBFBF && (bw->accum & 0xffff) == 0xBFBF; #else ok = TOTAL_BITS(bw) == 48 && bw->buffer[0] == 0xBFBFBFFD && (bw->accum & 0xffff) == 0xBFBF; +#endif +#elif FLAC__BYTES_PER_WORD == 8 + ok = TOTAL_BITS(bw) == 48 && (bw->accum & FLAC__U64L(0xffffffffffff)) == FLAC__U64L(0xFDBFBFBFBFBF); #endif printf("%s\n", ok?"OK":"FAILED"); if(!ok) { @@ -527,10 +598,14 @@ FLAC__bool test_bitwriter(void) printf("testing utf8_uint64(0x0000000080000000)... "); FLAC__bitwriter_clear(bw); FLAC__bitwriter_write_utf8_uint64(bw, FLAC__U64L(0x0000000080000000)); +#if FLAC__BYTES_PER_WORD == 4 #if WORDS_BIGENDIAN ok = TOTAL_BITS(bw) == 56 && bw->buffer[0] == 0xFE828080 && (bw->accum & 0xffffff) == 0x808080; #else ok = TOTAL_BITS(bw) == 56 && bw->buffer[0] == 0x808082FE && (bw->accum & 0xffffff) == 0x808080; +#endif +#elif FLAC__BYTES_PER_WORD == 8 + ok = TOTAL_BITS(bw) == 56 && (bw->accum & FLAC__U64L(0xffffffffffffff)) == FLAC__U64L(0xFE828080808080); #endif printf("%s\n", ok?"OK":"FAILED"); if(!ok) { @@ -541,10 +616,14 @@ FLAC__bool test_bitwriter(void) printf("testing utf8_uint64(0x0000000FFFFFFFFF)... "); FLAC__bitwriter_clear(bw); FLAC__bitwriter_write_utf8_uint64(bw, FLAC__U64L(0x0000000FFFFFFFFF)); +#if FLAC__BYTES_PER_WORD == 4 #if WORDS_BIGENDIAN ok = TOTAL_BITS(bw) == 56 && bw->buffer[0] == 0xFEBFBFBF && (bw->accum & 0xffffff) == 0xBFBFBF; #else ok = TOTAL_BITS(bw) == 56 && bw->buffer[0] == 0xBFBFBFFE && (bw->accum & 0xffffff) == 0xBFBFBF; +#endif +#elif FLAC__BYTES_PER_WORD == 8 + ok = TOTAL_BITS(bw) == 56 && (bw->accum & FLAC__U64L(0xffffffffffffff)) == FLAC__U64L(0xFEBFBFBFBFBFBF); #endif printf("%s\n", ok?"OK":"FAILED"); if(!ok) { @@ -558,10 +637,18 @@ FLAC__bool test_bitwriter(void) j = bw->capacity; for(i = 0; i < j; i++) FLAC__bitwriter_write_raw_uint32(bw, 0xaaaaaaaa, 32); +#if FLAC__BYTES_PER_WORD == 4 #if WORDS_BIGENDIAN ok = TOTAL_BITS(bw) == i*32+4 && bw->buffer[0] == 0x5aaaaaaa && (bw->accum & 0xf) == 0xa; #else ok = TOTAL_BITS(bw) == i*32+4 && bw->buffer[0] == 0xaaaaaa5a && (bw->accum & 0xf) == 0xa; +#endif +#elif FLAC__BYTES_PER_WORD == 8 +#if WORDS_BIGENDIAN + ok = TOTAL_BITS(bw) == i*32+4 && bw->buffer[0] == FLAC__U64L(0x5aaaaaaaaaaaaaaa) && (bw->accum & 0xf) == 0xa; +#else + ok = TOTAL_BITS(bw) == i*32+4 && bw->buffer[0] == FLAC__U64L(0xaaaaaaaaaaaaaa5a) && (bw->accum & 0xf) == 0xa; +#endif #endif printf("%s\n", ok?"OK":"FAILED"); if(!ok) {