diff --git a/src/libFLAC/bitwriter.c b/src/libFLAC/bitwriter.c index 5a6468ad..0a56f878 100644 --- a/src/libFLAC/bitwriter.c +++ b/src/libFLAC/bitwriter.c @@ -57,6 +57,7 @@ typedef FLAC__uint64 FLAC__bwtemp; #define FLAC__BYTES_PER_WORD 4 /* sizeof bwword */ #define FLAC__BITS_PER_WORD 32 #define FLAC__TEMP_BITS 64 +#define FLAC__HALF_TEMP_BITS 32 /* SWAP_BE_WORD_TO_HOST swaps bytes in a bwword (which is always big-endian) if necessary to match host byte order */ #if WORDS_BIGENDIAN #define SWAP_BE_WORD_TO_HOST(x) (x) @@ -67,8 +68,11 @@ typedef FLAC__uint64 FLAC__bwtemp; #else typedef FLAC__uint64 bwword; +typedef FLAC__uint64 FLAC__bwtemp; #define FLAC__BYTES_PER_WORD 8 /* sizeof bwword */ #define FLAC__BITS_PER_WORD 64 +#define FLAC__TEMP_BITS 64 +#define FLAC__HALF_TEMP_BITS 32 /* SWAP_BE_WORD_TO_HOST swaps bytes in a bwword (which is always big-endian) if necessary to match host byte order */ #if WORDS_BIGENDIAN #define SWAP_BE_WORD_TO_HOST(x) (x) @@ -537,26 +541,31 @@ FLAC__bool FLAC__bitwriter_write_rice_signed(FLAC__BitWriter *bw, FLAC__int32 va #if (ENABLE_64_BIT_WORDS == 0) -#define EMPTY_WIDE_ACCUM_TO_BW { \ - bw->buffer[bw->words++] = 0; \ - bitpointer += FLAC__BITS_PER_WORD; \ +#define WIDE_ACCUM_TO_BW { \ + bw->accum = wide_accum >> FLAC__HALF_TEMP_BITS; \ + bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum); \ + wide_accum <<= FLAC__HALF_TEMP_BITS; \ + bitpointer += FLAC__HALF_TEMP_BITS; \ } -#if WORDS_BIGENDIAN -#define WIDE_ACCUM_TO_BW { \ - bw->accum = *((FLAC__int32 *)&wide_accum); \ - bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum); \ - wide_accum <<= FLAC__BITS_PER_WORD; \ - bitpointer += FLAC__BITS_PER_WORD; \ -} #else + #define WIDE_ACCUM_TO_BW { \ - bw->accum = *(((FLAC__int32 *)&wide_accum)+1); \ - bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum); \ - wide_accum <<= FLAC__BITS_PER_WORD; \ - bitpointer += FLAC__BITS_PER_WORD; \ + FLAC__ASSERT(bw->bits % FLAC__HALF_TEMP_BITS == 0); \ + if(bw->bits == 0) { \ + bw->accum = wide_accum >> FLAC__HALF_TEMP_BITS; \ + wide_accum <<= FLAC__HALF_TEMP_BITS; \ + bw->bits = FLAC__HALF_TEMP_BITS; \ + } \ + else { \ + bw->accum <<= FLAC__HALF_TEMP_BITS; \ + bw->accum += wide_accum >> FLAC__HALF_TEMP_BITS; \ + bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum); \ + wide_accum <<= FLAC__HALF_TEMP_BITS; \ + bw->bits = 0; \ + } \ + bitpointer += FLAC__HALF_TEMP_BITS; \ } -#endif #endif @@ -567,32 +576,40 @@ FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter *bw, const FL FLAC__uint32 uval; const uint32_t lsbits = 1 + parameter; uint32_t msbits, total_bits; -#if (ENABLE_64_BIT_WORDS == 0) FLAC__bwtemp wide_accum = 0; FLAC__uint32 bitpointer = FLAC__TEMP_BITS; -#else - uint32_t left; -#endif FLAC__ASSERT(0 != bw); FLAC__ASSERT(0 != bw->buffer); FLAC__ASSERT(parameter < 31); /* WATCHOUT: code does not work with <32bit words; we can make things much faster with this assertion */ FLAC__ASSERT(FLAC__BITS_PER_WORD >= 32); - #if (ENABLE_64_BIT_WORDS == 0) if(bw->bits > 0) { bitpointer -= bw->bits; wide_accum = (FLAC__bwtemp)(bw->accum) << bitpointer; + bw->bits = 0; } +#else + if(bw->bits > 0 && bw->bits < FLAC__HALF_TEMP_BITS) { + bitpointer -= bw->bits; + wide_accum = bw->accum << bitpointer; + bw->bits = 0; + } + else if(bw->bits > FLAC__HALF_TEMP_BITS) { + bitpointer -= (bw->bits - FLAC__HALF_TEMP_BITS); + wide_accum = bw->accum << bitpointer; + bw->accum >>= (bw->bits - FLAC__HALF_TEMP_BITS); + bw->bits = FLAC__HALF_TEMP_BITS; + } +#endif { - FLAC__uint32 capacity_needed = (bw->words + 2 * nvals) * FLAC__BITS_PER_WORD + bw->bits; - /* Reserve two words per symbol, add space only when very large symbols are encountered */ + /* Reserve one FLAC__TEMP_BITS per symbol, so checks for space are only necessary when very large symbols are encountered */ + FLAC__uint32 capacity_needed = bw->words * FLAC__BITS_PER_WORD + nvals * FLAC__TEMP_BITS + bw->bits; if(bw->capacity * FLAC__BITS_PER_WORD <= capacity_needed && !bitwriter_grow_(bw, capacity_needed - bw->capacity * FLAC__BITS_PER_WORD)) return false; } -#endif while(nvals) { /* fold signed to uint32_t; actual formula is: negative(v)? -2v-1 : 2v */ @@ -607,12 +624,11 @@ FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter *bw, const FL uval &= mask2; /* mask off unused top bits */ -#if (ENABLE_64_BIT_WORDS == 0) if(total_bits <= bitpointer) { /* There is room enough to store the symbol whole at once */ wide_accum |= (FLAC__bwtemp)(uval) << (bitpointer - total_bits); bitpointer -= total_bits; - if(bitpointer <= FLAC__BITS_PER_WORD) { + if(bitpointer <= FLAC__HALF_TEMP_BITS) { /* A word is finished, copy the upper 32 bits of the wide_accum */ WIDE_ACCUM_TO_BW } @@ -620,40 +636,40 @@ FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter *bw, const FL else { /* The symbol needs to be split. This code isn't used often */ /* First check for space in the bitwriter */ - if(total_bits > (2 * FLAC__BITS_PER_WORD)) { - FLAC__uint32 oversize_in_bits = total_bits - 2 * FLAC__BITS_PER_WORD; - FLAC__uint32 capacity_needed = (bw->words + 2 * nvals) * FLAC__BITS_PER_WORD + bw->bits + oversize_in_bits; + if(total_bits > FLAC__TEMP_BITS) { + FLAC__uint32 oversize_in_bits = total_bits - FLAC__TEMP_BITS; + FLAC__uint32 capacity_needed = bw->words * FLAC__BITS_PER_WORD + bw->bits + nvals * FLAC__TEMP_BITS + oversize_in_bits; if(bw->capacity * FLAC__BITS_PER_WORD <= capacity_needed && !bitwriter_grow_(bw, capacity_needed * FLAC__BITS_PER_WORD - bw->capacity)) return false; } if(msbits > bitpointer) { - /* Conveniently empty wide_accum */ - msbits -= bitpointer - FLAC__BITS_PER_WORD; - bitpointer = FLAC__BITS_PER_WORD; + /* We have a lot of 0 bits to write, first align with bitwriter word */ + msbits -= bitpointer - FLAC__HALF_TEMP_BITS; + bitpointer = FLAC__HALF_TEMP_BITS; WIDE_ACCUM_TO_BW while(msbits > bitpointer) { /* As the accumulator is already zero, we only need to * assign zeroes to the bitbuffer */ - EMPTY_WIDE_ACCUM_TO_BW - bitpointer -= FLAC__BITS_PER_WORD; - msbits -= FLAC__BITS_PER_WORD; + WIDE_ACCUM_TO_BW + bitpointer -= FLAC__HALF_TEMP_BITS; + msbits -= FLAC__HALF_TEMP_BITS; } /* The remaining bits are zero, and the accumulator already is zero, * so just subtract the number of bits from bitpointer. When storing, * we can also just store 0 */ bitpointer -= msbits; - if(bitpointer <= FLAC__BITS_PER_WORD) - EMPTY_WIDE_ACCUM_TO_BW + if(bitpointer <= FLAC__HALF_TEMP_BITS) + WIDE_ACCUM_TO_BW } else { bitpointer -= msbits; - if(bitpointer <= FLAC__BITS_PER_WORD) + if(bitpointer <= FLAC__HALF_TEMP_BITS) WIDE_ACCUM_TO_BW } /* The lsbs + stop bit always fit 32 bit, so this code mirrors the code above */ wide_accum |= (FLAC__bwtemp)(uval) << (bitpointer - lsbits); bitpointer -= lsbits; - if(bitpointer <= FLAC__BITS_PER_WORD) { + if(bitpointer <= FLAC__HALF_TEMP_BITS) { /* A word is finished, copy the upper 32 bits of the wide_accum */ WIDE_ACCUM_TO_BW } @@ -661,85 +677,30 @@ FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter *bw, const FL vals++; nvals--; } - /* Now fixup remainer of wide_accum */ + /* Now fixup remainder of wide_accum */ +#if (ENABLE_64_BIT_WORDS == 0) if(bitpointer < FLAC__TEMP_BITS) { bw->accum = wide_accum >> bitpointer; bw->bits = FLAC__TEMP_BITS - bitpointer; } - else { - bw->accum = 0; - bw->bits = 0; - } #else - - if(bw->bits && bw->bits + total_bits < FLAC__BITS_PER_WORD) { /* i.e. if the whole thing fits in the current bwword */ - /* ^^^ if bw->bits is 0 then we may have filled the buffer and have no free bwword to work in */ - bw->bits += total_bits; - uval |= mask1; /* set stop bit */ - uval &= mask2; /* mask off unused top bits */ - bw->accum <<= total_bits; - bw->accum |= uval; + if(bitpointer < FLAC__TEMP_BITS) { + if(bw->bits == 0) { + bw->accum = wide_accum >> bitpointer; + bw->bits = FLAC__TEMP_BITS - bitpointer; + } + else if (bw->bits == FLAC__HALF_TEMP_BITS) { + bw->accum <<= FLAC__TEMP_BITS - bitpointer; + bw->accum |= (wide_accum >> bitpointer); + bw->bits = FLAC__HALF_TEMP_BITS + FLAC__TEMP_BITS - bitpointer; } else { - /* slightly pessimistic size check but faster than "<= bw->words + (bw->bits+msbits+lsbits+FLAC__BITS_PER_WORD-1)/FLAC__BITS_PER_WORD" */ - /* OPT: pessimism may cause flurry of false calls to grow_ which eat up all savings before it */ - if(bw->capacity <= bw->words + bw->bits + msbits + 1 /* lsbits always fit in 1 bwword */ && !bitwriter_grow_(bw, total_bits)) - return false; - - if(msbits) { - /* first part gets to word alignment */ - if(bw->bits) { - left = FLAC__BITS_PER_WORD - bw->bits; - if(msbits < left) { - bw->accum <<= msbits; - bw->bits += msbits; - goto break1; - } - else { - bw->accum <<= left; - msbits -= left; - bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum); - bw->bits = 0; - } - } - /* do whole words */ - while(msbits >= FLAC__BITS_PER_WORD) { - bw->buffer[bw->words++] = 0; - msbits -= FLAC__BITS_PER_WORD; - } - /* do any leftovers */ - if(msbits > 0) { - bw->accum = 0; - bw->bits = msbits; - } - } -break1: - uval |= mask1; /* set stop bit */ - uval &= mask2; /* mask off unused top bits */ - - left = FLAC__BITS_PER_WORD - bw->bits; - if(lsbits < left) { - bw->accum <<= lsbits; - bw->accum |= uval; - bw->bits += lsbits; - } - else { - /* if bw->bits == 0, left==FLAC__BITS_PER_WORD which will always - * be > lsbits (because of previous assertions) so it would have - * triggered the (lsbitsbits); - FLAC__ASSERT(left < FLAC__BITS_PER_WORD); - bw->accum <<= left; - bw->accum |= uval >> (bw->bits = lsbits - left); - bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum); - bw->accum = uval; /* unused top bits can contain garbage */ - } + FLAC__ASSERT(0); } - vals++; - nvals--; } #endif + + return true; }