Change FLAC__bitwriter_write_rice_signed_block for 64-bit words

This commit is contained in:
Martijn van Beurden 2023-02-18 22:21:38 +01:00
parent b698ed45f3
commit 9f3894f0d5

View File

@ -57,6 +57,7 @@ typedef FLAC__uint64 FLAC__bwtemp;
#define FLAC__BYTES_PER_WORD 4 /* sizeof bwword */
#define FLAC__BITS_PER_WORD 32
#define FLAC__TEMP_BITS 64
#define FLAC__HALF_TEMP_BITS 32
/* SWAP_BE_WORD_TO_HOST swaps bytes in a bwword (which is always big-endian) if necessary to match host byte order */
#if WORDS_BIGENDIAN
#define SWAP_BE_WORD_TO_HOST(x) (x)
@ -67,8 +68,11 @@ typedef FLAC__uint64 FLAC__bwtemp;
#else
typedef FLAC__uint64 bwword;
typedef FLAC__uint64 FLAC__bwtemp;
#define FLAC__BYTES_PER_WORD 8 /* sizeof bwword */
#define FLAC__BITS_PER_WORD 64
#define FLAC__TEMP_BITS 64
#define FLAC__HALF_TEMP_BITS 32
/* SWAP_BE_WORD_TO_HOST swaps bytes in a bwword (which is always big-endian) if necessary to match host byte order */
#if WORDS_BIGENDIAN
#define SWAP_BE_WORD_TO_HOST(x) (x)
@ -537,26 +541,31 @@ FLAC__bool FLAC__bitwriter_write_rice_signed(FLAC__BitWriter *bw, FLAC__int32 va
#if (ENABLE_64_BIT_WORDS == 0)
#define EMPTY_WIDE_ACCUM_TO_BW { \
bw->buffer[bw->words++] = 0; \
bitpointer += FLAC__BITS_PER_WORD; \
#define WIDE_ACCUM_TO_BW { \
bw->accum = wide_accum >> FLAC__HALF_TEMP_BITS; \
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum); \
wide_accum <<= FLAC__HALF_TEMP_BITS; \
bitpointer += FLAC__HALF_TEMP_BITS; \
}
#if WORDS_BIGENDIAN
#define WIDE_ACCUM_TO_BW { \
bw->accum = *((FLAC__int32 *)&wide_accum); \
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum); \
wide_accum <<= FLAC__BITS_PER_WORD; \
bitpointer += FLAC__BITS_PER_WORD; \
}
#else
#define WIDE_ACCUM_TO_BW { \
bw->accum = *(((FLAC__int32 *)&wide_accum)+1); \
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum); \
wide_accum <<= FLAC__BITS_PER_WORD; \
bitpointer += FLAC__BITS_PER_WORD; \
FLAC__ASSERT(bw->bits % FLAC__HALF_TEMP_BITS == 0); \
if(bw->bits == 0) { \
bw->accum = wide_accum >> FLAC__HALF_TEMP_BITS; \
wide_accum <<= FLAC__HALF_TEMP_BITS; \
bw->bits = FLAC__HALF_TEMP_BITS; \
} \
else { \
bw->accum <<= FLAC__HALF_TEMP_BITS; \
bw->accum += wide_accum >> FLAC__HALF_TEMP_BITS; \
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum); \
wide_accum <<= FLAC__HALF_TEMP_BITS; \
bw->bits = 0; \
} \
bitpointer += FLAC__HALF_TEMP_BITS; \
}
#endif
#endif
@ -567,32 +576,40 @@ FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter *bw, const FL
FLAC__uint32 uval;
const uint32_t lsbits = 1 + parameter;
uint32_t msbits, total_bits;
#if (ENABLE_64_BIT_WORDS == 0)
FLAC__bwtemp wide_accum = 0;
FLAC__uint32 bitpointer = FLAC__TEMP_BITS;
#else
uint32_t left;
#endif
FLAC__ASSERT(0 != bw);
FLAC__ASSERT(0 != bw->buffer);
FLAC__ASSERT(parameter < 31);
/* WATCHOUT: code does not work with <32bit words; we can make things much faster with this assertion */
FLAC__ASSERT(FLAC__BITS_PER_WORD >= 32);
#if (ENABLE_64_BIT_WORDS == 0)
if(bw->bits > 0) {
bitpointer -= bw->bits;
wide_accum = (FLAC__bwtemp)(bw->accum) << bitpointer;
bw->bits = 0;
}
#else
if(bw->bits > 0 && bw->bits < FLAC__HALF_TEMP_BITS) {
bitpointer -= bw->bits;
wide_accum = bw->accum << bitpointer;
bw->bits = 0;
}
else if(bw->bits > FLAC__HALF_TEMP_BITS) {
bitpointer -= (bw->bits - FLAC__HALF_TEMP_BITS);
wide_accum = bw->accum << bitpointer;
bw->accum >>= (bw->bits - FLAC__HALF_TEMP_BITS);
bw->bits = FLAC__HALF_TEMP_BITS;
}
#endif
{
FLAC__uint32 capacity_needed = (bw->words + 2 * nvals) * FLAC__BITS_PER_WORD + bw->bits;
/* Reserve two words per symbol, add space only when very large symbols are encountered */
/* Reserve one FLAC__TEMP_BITS per symbol, so checks for space are only necessary when very large symbols are encountered */
FLAC__uint32 capacity_needed = bw->words * FLAC__BITS_PER_WORD + nvals * FLAC__TEMP_BITS + bw->bits;
if(bw->capacity * FLAC__BITS_PER_WORD <= capacity_needed && !bitwriter_grow_(bw, capacity_needed - bw->capacity * FLAC__BITS_PER_WORD))
return false;
}
#endif
while(nvals) {
/* fold signed to uint32_t; actual formula is: negative(v)? -2v-1 : 2v */
@ -607,12 +624,11 @@ FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter *bw, const FL
uval &= mask2; /* mask off unused top bits */
#if (ENABLE_64_BIT_WORDS == 0)
if(total_bits <= bitpointer) {
/* There is room enough to store the symbol whole at once */
wide_accum |= (FLAC__bwtemp)(uval) << (bitpointer - total_bits);
bitpointer -= total_bits;
if(bitpointer <= FLAC__BITS_PER_WORD) {
if(bitpointer <= FLAC__HALF_TEMP_BITS) {
/* A word is finished, copy the upper 32 bits of the wide_accum */
WIDE_ACCUM_TO_BW
}
@ -620,40 +636,40 @@ FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter *bw, const FL
else {
/* The symbol needs to be split. This code isn't used often */
/* First check for space in the bitwriter */
if(total_bits > (2 * FLAC__BITS_PER_WORD)) {
FLAC__uint32 oversize_in_bits = total_bits - 2 * FLAC__BITS_PER_WORD;
FLAC__uint32 capacity_needed = (bw->words + 2 * nvals) * FLAC__BITS_PER_WORD + bw->bits + oversize_in_bits;
if(total_bits > FLAC__TEMP_BITS) {
FLAC__uint32 oversize_in_bits = total_bits - FLAC__TEMP_BITS;
FLAC__uint32 capacity_needed = bw->words * FLAC__BITS_PER_WORD + bw->bits + nvals * FLAC__TEMP_BITS + oversize_in_bits;
if(bw->capacity * FLAC__BITS_PER_WORD <= capacity_needed && !bitwriter_grow_(bw, capacity_needed * FLAC__BITS_PER_WORD - bw->capacity))
return false;
}
if(msbits > bitpointer) {
/* Conveniently empty wide_accum */
msbits -= bitpointer - FLAC__BITS_PER_WORD;
bitpointer = FLAC__BITS_PER_WORD;
/* We have a lot of 0 bits to write, first align with bitwriter word */
msbits -= bitpointer - FLAC__HALF_TEMP_BITS;
bitpointer = FLAC__HALF_TEMP_BITS;
WIDE_ACCUM_TO_BW
while(msbits > bitpointer) {
/* As the accumulator is already zero, we only need to
* assign zeroes to the bitbuffer */
EMPTY_WIDE_ACCUM_TO_BW
bitpointer -= FLAC__BITS_PER_WORD;
msbits -= FLAC__BITS_PER_WORD;
WIDE_ACCUM_TO_BW
bitpointer -= FLAC__HALF_TEMP_BITS;
msbits -= FLAC__HALF_TEMP_BITS;
}
/* The remaining bits are zero, and the accumulator already is zero,
* so just subtract the number of bits from bitpointer. When storing,
* we can also just store 0 */
bitpointer -= msbits;
if(bitpointer <= FLAC__BITS_PER_WORD)
EMPTY_WIDE_ACCUM_TO_BW
if(bitpointer <= FLAC__HALF_TEMP_BITS)
WIDE_ACCUM_TO_BW
}
else {
bitpointer -= msbits;
if(bitpointer <= FLAC__BITS_PER_WORD)
if(bitpointer <= FLAC__HALF_TEMP_BITS)
WIDE_ACCUM_TO_BW
}
/* The lsbs + stop bit always fit 32 bit, so this code mirrors the code above */
wide_accum |= (FLAC__bwtemp)(uval) << (bitpointer - lsbits);
bitpointer -= lsbits;
if(bitpointer <= FLAC__BITS_PER_WORD) {
if(bitpointer <= FLAC__HALF_TEMP_BITS) {
/* A word is finished, copy the upper 32 bits of the wide_accum */
WIDE_ACCUM_TO_BW
}
@ -661,85 +677,30 @@ FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter *bw, const FL
vals++;
nvals--;
}
/* Now fixup remainer of wide_accum */
/* Now fixup remainder of wide_accum */
#if (ENABLE_64_BIT_WORDS == 0)
if(bitpointer < FLAC__TEMP_BITS) {
bw->accum = wide_accum >> bitpointer;
bw->bits = FLAC__TEMP_BITS - bitpointer;
}
else {
bw->accum = 0;
bw->bits = 0;
}
#else
if(bw->bits && bw->bits + total_bits < FLAC__BITS_PER_WORD) { /* i.e. if the whole thing fits in the current bwword */
/* ^^^ if bw->bits is 0 then we may have filled the buffer and have no free bwword to work in */
bw->bits += total_bits;
uval |= mask1; /* set stop bit */
uval &= mask2; /* mask off unused top bits */
bw->accum <<= total_bits;
bw->accum |= uval;
if(bitpointer < FLAC__TEMP_BITS) {
if(bw->bits == 0) {
bw->accum = wide_accum >> bitpointer;
bw->bits = FLAC__TEMP_BITS - bitpointer;
}
else if (bw->bits == FLAC__HALF_TEMP_BITS) {
bw->accum <<= FLAC__TEMP_BITS - bitpointer;
bw->accum |= (wide_accum >> bitpointer);
bw->bits = FLAC__HALF_TEMP_BITS + FLAC__TEMP_BITS - bitpointer;
}
else {
/* slightly pessimistic size check but faster than "<= bw->words + (bw->bits+msbits+lsbits+FLAC__BITS_PER_WORD-1)/FLAC__BITS_PER_WORD" */
/* OPT: pessimism may cause flurry of false calls to grow_ which eat up all savings before it */
if(bw->capacity <= bw->words + bw->bits + msbits + 1 /* lsbits always fit in 1 bwword */ && !bitwriter_grow_(bw, total_bits))
return false;
if(msbits) {
/* first part gets to word alignment */
if(bw->bits) {
left = FLAC__BITS_PER_WORD - bw->bits;
if(msbits < left) {
bw->accum <<= msbits;
bw->bits += msbits;
goto break1;
}
else {
bw->accum <<= left;
msbits -= left;
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum);
bw->bits = 0;
}
}
/* do whole words */
while(msbits >= FLAC__BITS_PER_WORD) {
bw->buffer[bw->words++] = 0;
msbits -= FLAC__BITS_PER_WORD;
}
/* do any leftovers */
if(msbits > 0) {
bw->accum = 0;
bw->bits = msbits;
}
}
break1:
uval |= mask1; /* set stop bit */
uval &= mask2; /* mask off unused top bits */
left = FLAC__BITS_PER_WORD - bw->bits;
if(lsbits < left) {
bw->accum <<= lsbits;
bw->accum |= uval;
bw->bits += lsbits;
}
else {
/* if bw->bits == 0, left==FLAC__BITS_PER_WORD which will always
* be > lsbits (because of previous assertions) so it would have
* triggered the (lsbits<left) case above.
*/
FLAC__ASSERT(bw->bits);
FLAC__ASSERT(left < FLAC__BITS_PER_WORD);
bw->accum <<= left;
bw->accum |= uval >> (bw->bits = lsbits - left);
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum);
bw->accum = uval; /* unused top bits can contain garbage */
}
FLAC__ASSERT(0);
}
vals++;
nvals--;
}
#endif
return true;
}