Change FLAC__bitwriter_write_rice_signed_block for 64-bit words
This commit is contained in:
parent
b698ed45f3
commit
9f3894f0d5
@ -57,6 +57,7 @@ typedef FLAC__uint64 FLAC__bwtemp;
|
||||
#define FLAC__BYTES_PER_WORD 4 /* sizeof bwword */
|
||||
#define FLAC__BITS_PER_WORD 32
|
||||
#define FLAC__TEMP_BITS 64
|
||||
#define FLAC__HALF_TEMP_BITS 32
|
||||
/* SWAP_BE_WORD_TO_HOST swaps bytes in a bwword (which is always big-endian) if necessary to match host byte order */
|
||||
#if WORDS_BIGENDIAN
|
||||
#define SWAP_BE_WORD_TO_HOST(x) (x)
|
||||
@ -67,8 +68,11 @@ typedef FLAC__uint64 FLAC__bwtemp;
|
||||
#else
|
||||
|
||||
typedef FLAC__uint64 bwword;
|
||||
typedef FLAC__uint64 FLAC__bwtemp;
|
||||
#define FLAC__BYTES_PER_WORD 8 /* sizeof bwword */
|
||||
#define FLAC__BITS_PER_WORD 64
|
||||
#define FLAC__TEMP_BITS 64
|
||||
#define FLAC__HALF_TEMP_BITS 32
|
||||
/* SWAP_BE_WORD_TO_HOST swaps bytes in a bwword (which is always big-endian) if necessary to match host byte order */
|
||||
#if WORDS_BIGENDIAN
|
||||
#define SWAP_BE_WORD_TO_HOST(x) (x)
|
||||
@ -537,26 +541,31 @@ FLAC__bool FLAC__bitwriter_write_rice_signed(FLAC__BitWriter *bw, FLAC__int32 va
|
||||
|
||||
#if (ENABLE_64_BIT_WORDS == 0)
|
||||
|
||||
#define EMPTY_WIDE_ACCUM_TO_BW { \
|
||||
bw->buffer[bw->words++] = 0; \
|
||||
bitpointer += FLAC__BITS_PER_WORD; \
|
||||
#define WIDE_ACCUM_TO_BW { \
|
||||
bw->accum = wide_accum >> FLAC__HALF_TEMP_BITS; \
|
||||
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum); \
|
||||
wide_accum <<= FLAC__HALF_TEMP_BITS; \
|
||||
bitpointer += FLAC__HALF_TEMP_BITS; \
|
||||
}
|
||||
|
||||
#if WORDS_BIGENDIAN
|
||||
#define WIDE_ACCUM_TO_BW { \
|
||||
bw->accum = *((FLAC__int32 *)&wide_accum); \
|
||||
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum); \
|
||||
wide_accum <<= FLAC__BITS_PER_WORD; \
|
||||
bitpointer += FLAC__BITS_PER_WORD; \
|
||||
}
|
||||
#else
|
||||
|
||||
#define WIDE_ACCUM_TO_BW { \
|
||||
bw->accum = *(((FLAC__int32 *)&wide_accum)+1); \
|
||||
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum); \
|
||||
wide_accum <<= FLAC__BITS_PER_WORD; \
|
||||
bitpointer += FLAC__BITS_PER_WORD; \
|
||||
FLAC__ASSERT(bw->bits % FLAC__HALF_TEMP_BITS == 0); \
|
||||
if(bw->bits == 0) { \
|
||||
bw->accum = wide_accum >> FLAC__HALF_TEMP_BITS; \
|
||||
wide_accum <<= FLAC__HALF_TEMP_BITS; \
|
||||
bw->bits = FLAC__HALF_TEMP_BITS; \
|
||||
} \
|
||||
else { \
|
||||
bw->accum <<= FLAC__HALF_TEMP_BITS; \
|
||||
bw->accum += wide_accum >> FLAC__HALF_TEMP_BITS; \
|
||||
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum); \
|
||||
wide_accum <<= FLAC__HALF_TEMP_BITS; \
|
||||
bw->bits = 0; \
|
||||
} \
|
||||
bitpointer += FLAC__HALF_TEMP_BITS; \
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@ -567,32 +576,40 @@ FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter *bw, const FL
|
||||
FLAC__uint32 uval;
|
||||
const uint32_t lsbits = 1 + parameter;
|
||||
uint32_t msbits, total_bits;
|
||||
#if (ENABLE_64_BIT_WORDS == 0)
|
||||
FLAC__bwtemp wide_accum = 0;
|
||||
FLAC__uint32 bitpointer = FLAC__TEMP_BITS;
|
||||
#else
|
||||
uint32_t left;
|
||||
#endif
|
||||
|
||||
FLAC__ASSERT(0 != bw);
|
||||
FLAC__ASSERT(0 != bw->buffer);
|
||||
FLAC__ASSERT(parameter < 31);
|
||||
/* WATCHOUT: code does not work with <32bit words; we can make things much faster with this assertion */
|
||||
FLAC__ASSERT(FLAC__BITS_PER_WORD >= 32);
|
||||
|
||||
#if (ENABLE_64_BIT_WORDS == 0)
|
||||
if(bw->bits > 0) {
|
||||
bitpointer -= bw->bits;
|
||||
wide_accum = (FLAC__bwtemp)(bw->accum) << bitpointer;
|
||||
bw->bits = 0;
|
||||
}
|
||||
#else
|
||||
if(bw->bits > 0 && bw->bits < FLAC__HALF_TEMP_BITS) {
|
||||
bitpointer -= bw->bits;
|
||||
wide_accum = bw->accum << bitpointer;
|
||||
bw->bits = 0;
|
||||
}
|
||||
else if(bw->bits > FLAC__HALF_TEMP_BITS) {
|
||||
bitpointer -= (bw->bits - FLAC__HALF_TEMP_BITS);
|
||||
wide_accum = bw->accum << bitpointer;
|
||||
bw->accum >>= (bw->bits - FLAC__HALF_TEMP_BITS);
|
||||
bw->bits = FLAC__HALF_TEMP_BITS;
|
||||
}
|
||||
#endif
|
||||
|
||||
{
|
||||
FLAC__uint32 capacity_needed = (bw->words + 2 * nvals) * FLAC__BITS_PER_WORD + bw->bits;
|
||||
/* Reserve two words per symbol, add space only when very large symbols are encountered */
|
||||
/* Reserve one FLAC__TEMP_BITS per symbol, so checks for space are only necessary when very large symbols are encountered */
|
||||
FLAC__uint32 capacity_needed = bw->words * FLAC__BITS_PER_WORD + nvals * FLAC__TEMP_BITS + bw->bits;
|
||||
if(bw->capacity * FLAC__BITS_PER_WORD <= capacity_needed && !bitwriter_grow_(bw, capacity_needed - bw->capacity * FLAC__BITS_PER_WORD))
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
while(nvals) {
|
||||
/* fold signed to uint32_t; actual formula is: negative(v)? -2v-1 : 2v */
|
||||
@ -607,12 +624,11 @@ FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter *bw, const FL
|
||||
uval &= mask2; /* mask off unused top bits */
|
||||
|
||||
|
||||
#if (ENABLE_64_BIT_WORDS == 0)
|
||||
if(total_bits <= bitpointer) {
|
||||
/* There is room enough to store the symbol whole at once */
|
||||
wide_accum |= (FLAC__bwtemp)(uval) << (bitpointer - total_bits);
|
||||
bitpointer -= total_bits;
|
||||
if(bitpointer <= FLAC__BITS_PER_WORD) {
|
||||
if(bitpointer <= FLAC__HALF_TEMP_BITS) {
|
||||
/* A word is finished, copy the upper 32 bits of the wide_accum */
|
||||
WIDE_ACCUM_TO_BW
|
||||
}
|
||||
@ -620,40 +636,40 @@ FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter *bw, const FL
|
||||
else {
|
||||
/* The symbol needs to be split. This code isn't used often */
|
||||
/* First check for space in the bitwriter */
|
||||
if(total_bits > (2 * FLAC__BITS_PER_WORD)) {
|
||||
FLAC__uint32 oversize_in_bits = total_bits - 2 * FLAC__BITS_PER_WORD;
|
||||
FLAC__uint32 capacity_needed = (bw->words + 2 * nvals) * FLAC__BITS_PER_WORD + bw->bits + oversize_in_bits;
|
||||
if(total_bits > FLAC__TEMP_BITS) {
|
||||
FLAC__uint32 oversize_in_bits = total_bits - FLAC__TEMP_BITS;
|
||||
FLAC__uint32 capacity_needed = bw->words * FLAC__BITS_PER_WORD + bw->bits + nvals * FLAC__TEMP_BITS + oversize_in_bits;
|
||||
if(bw->capacity * FLAC__BITS_PER_WORD <= capacity_needed && !bitwriter_grow_(bw, capacity_needed * FLAC__BITS_PER_WORD - bw->capacity))
|
||||
return false;
|
||||
}
|
||||
if(msbits > bitpointer) {
|
||||
/* Conveniently empty wide_accum */
|
||||
msbits -= bitpointer - FLAC__BITS_PER_WORD;
|
||||
bitpointer = FLAC__BITS_PER_WORD;
|
||||
/* We have a lot of 0 bits to write, first align with bitwriter word */
|
||||
msbits -= bitpointer - FLAC__HALF_TEMP_BITS;
|
||||
bitpointer = FLAC__HALF_TEMP_BITS;
|
||||
WIDE_ACCUM_TO_BW
|
||||
while(msbits > bitpointer) {
|
||||
/* As the accumulator is already zero, we only need to
|
||||
* assign zeroes to the bitbuffer */
|
||||
EMPTY_WIDE_ACCUM_TO_BW
|
||||
bitpointer -= FLAC__BITS_PER_WORD;
|
||||
msbits -= FLAC__BITS_PER_WORD;
|
||||
WIDE_ACCUM_TO_BW
|
||||
bitpointer -= FLAC__HALF_TEMP_BITS;
|
||||
msbits -= FLAC__HALF_TEMP_BITS;
|
||||
}
|
||||
/* The remaining bits are zero, and the accumulator already is zero,
|
||||
* so just subtract the number of bits from bitpointer. When storing,
|
||||
* we can also just store 0 */
|
||||
bitpointer -= msbits;
|
||||
if(bitpointer <= FLAC__BITS_PER_WORD)
|
||||
EMPTY_WIDE_ACCUM_TO_BW
|
||||
if(bitpointer <= FLAC__HALF_TEMP_BITS)
|
||||
WIDE_ACCUM_TO_BW
|
||||
}
|
||||
else {
|
||||
bitpointer -= msbits;
|
||||
if(bitpointer <= FLAC__BITS_PER_WORD)
|
||||
if(bitpointer <= FLAC__HALF_TEMP_BITS)
|
||||
WIDE_ACCUM_TO_BW
|
||||
}
|
||||
/* The lsbs + stop bit always fit 32 bit, so this code mirrors the code above */
|
||||
wide_accum |= (FLAC__bwtemp)(uval) << (bitpointer - lsbits);
|
||||
bitpointer -= lsbits;
|
||||
if(bitpointer <= FLAC__BITS_PER_WORD) {
|
||||
if(bitpointer <= FLAC__HALF_TEMP_BITS) {
|
||||
/* A word is finished, copy the upper 32 bits of the wide_accum */
|
||||
WIDE_ACCUM_TO_BW
|
||||
}
|
||||
@ -661,85 +677,30 @@ FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter *bw, const FL
|
||||
vals++;
|
||||
nvals--;
|
||||
}
|
||||
/* Now fixup remainer of wide_accum */
|
||||
/* Now fixup remainder of wide_accum */
|
||||
#if (ENABLE_64_BIT_WORDS == 0)
|
||||
if(bitpointer < FLAC__TEMP_BITS) {
|
||||
bw->accum = wide_accum >> bitpointer;
|
||||
bw->bits = FLAC__TEMP_BITS - bitpointer;
|
||||
}
|
||||
else {
|
||||
bw->accum = 0;
|
||||
bw->bits = 0;
|
||||
}
|
||||
#else
|
||||
|
||||
if(bw->bits && bw->bits + total_bits < FLAC__BITS_PER_WORD) { /* i.e. if the whole thing fits in the current bwword */
|
||||
/* ^^^ if bw->bits is 0 then we may have filled the buffer and have no free bwword to work in */
|
||||
bw->bits += total_bits;
|
||||
uval |= mask1; /* set stop bit */
|
||||
uval &= mask2; /* mask off unused top bits */
|
||||
bw->accum <<= total_bits;
|
||||
bw->accum |= uval;
|
||||
if(bitpointer < FLAC__TEMP_BITS) {
|
||||
if(bw->bits == 0) {
|
||||
bw->accum = wide_accum >> bitpointer;
|
||||
bw->bits = FLAC__TEMP_BITS - bitpointer;
|
||||
}
|
||||
else if (bw->bits == FLAC__HALF_TEMP_BITS) {
|
||||
bw->accum <<= FLAC__TEMP_BITS - bitpointer;
|
||||
bw->accum |= (wide_accum >> bitpointer);
|
||||
bw->bits = FLAC__HALF_TEMP_BITS + FLAC__TEMP_BITS - bitpointer;
|
||||
}
|
||||
else {
|
||||
/* slightly pessimistic size check but faster than "<= bw->words + (bw->bits+msbits+lsbits+FLAC__BITS_PER_WORD-1)/FLAC__BITS_PER_WORD" */
|
||||
/* OPT: pessimism may cause flurry of false calls to grow_ which eat up all savings before it */
|
||||
if(bw->capacity <= bw->words + bw->bits + msbits + 1 /* lsbits always fit in 1 bwword */ && !bitwriter_grow_(bw, total_bits))
|
||||
return false;
|
||||
|
||||
if(msbits) {
|
||||
/* first part gets to word alignment */
|
||||
if(bw->bits) {
|
||||
left = FLAC__BITS_PER_WORD - bw->bits;
|
||||
if(msbits < left) {
|
||||
bw->accum <<= msbits;
|
||||
bw->bits += msbits;
|
||||
goto break1;
|
||||
}
|
||||
else {
|
||||
bw->accum <<= left;
|
||||
msbits -= left;
|
||||
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum);
|
||||
bw->bits = 0;
|
||||
}
|
||||
}
|
||||
/* do whole words */
|
||||
while(msbits >= FLAC__BITS_PER_WORD) {
|
||||
bw->buffer[bw->words++] = 0;
|
||||
msbits -= FLAC__BITS_PER_WORD;
|
||||
}
|
||||
/* do any leftovers */
|
||||
if(msbits > 0) {
|
||||
bw->accum = 0;
|
||||
bw->bits = msbits;
|
||||
}
|
||||
}
|
||||
break1:
|
||||
uval |= mask1; /* set stop bit */
|
||||
uval &= mask2; /* mask off unused top bits */
|
||||
|
||||
left = FLAC__BITS_PER_WORD - bw->bits;
|
||||
if(lsbits < left) {
|
||||
bw->accum <<= lsbits;
|
||||
bw->accum |= uval;
|
||||
bw->bits += lsbits;
|
||||
}
|
||||
else {
|
||||
/* if bw->bits == 0, left==FLAC__BITS_PER_WORD which will always
|
||||
* be > lsbits (because of previous assertions) so it would have
|
||||
* triggered the (lsbits<left) case above.
|
||||
*/
|
||||
FLAC__ASSERT(bw->bits);
|
||||
FLAC__ASSERT(left < FLAC__BITS_PER_WORD);
|
||||
bw->accum <<= left;
|
||||
bw->accum |= uval >> (bw->bits = lsbits - left);
|
||||
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum);
|
||||
bw->accum = uval; /* unused top bits can contain garbage */
|
||||
}
|
||||
FLAC__ASSERT(0);
|
||||
}
|
||||
vals++;
|
||||
nvals--;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user