optimize special case in FLAC__bitwriter_write_rice_signed_block()

This commit is contained in:
Josh Coalson 2007-04-04 00:57:03 +00:00
parent 4cc675bf19
commit eb6ef7d0fb
1 changed files with 82 additions and 49 deletions

View File

@ -556,60 +556,93 @@ FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter *bw, const FL
msbits = uval >> parameter;
/* slightly pessimistic size check but faster than "<= bw->words + (bw->bits+msbits+lsbits+FLAC__BITS_PER_WORD-1)/FLAC__BITS_PER_WORD" */
/* OPT: pessimism may cause flurry of false calls to grow_ which eat up all savings before it */
if(bw->capacity <= bw->words + bw->bits + msbits + 1/*lsbits always fit in 1 bwword*/ && !bitwriter_grow_(bw, msbits+lsbits))
return false;
if(msbits) {
/* first part gets to word alignment */
if(bw->bits) {
left = FLAC__BITS_PER_WORD - bw->bits;
if(msbits < left) {
bw->accum <<= msbits;
bw->bits += msbits;
goto break1;
}
else {
bw->accum <<= left;
msbits -= left;
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum);
bw->bits = 0;
}
}
/* do whole words */
while(msbits >= FLAC__BITS_PER_WORD) {
bw->buffer[bw->words++] = 0;
msbits -= FLAC__BITS_PER_WORD;
}
/* do any leftovers */
if(msbits > 0) {
bw->accum = 0;
bw->bits = msbits;
}
}
break1:
uval |= mask1; /* set stop bit */
uval &= mask2; /* mask off unused top bits */
left = FLAC__BITS_PER_WORD - bw->bits;
if(lsbits < left) {
#if 1 /* OPT: can remove this special case if it doesn't make up for the extra compare */
if(bw->bits + msbits + lsbits <= FLAC__BITS_PER_WORD) { /* i.e. if the whole thing fits in the current bwword */
bw->bits = bw->bits + msbits + lsbits;
uval |= mask1; /* set stop bit */
uval &= mask2; /* mask off unused top bits */
/* NOT: bw->accum <<= msbits + lsbits because msbits+lsbits could be 32, then the shift would be a NOP */
bw->accum <<= msbits;
bw->accum <<= lsbits;
bw->accum |= uval;
bw->bits += lsbits;
if(bw->bits == FLAC__BITS_PER_WORD) {
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum);
bw->bits = 0;
/* burying the capacity check down here means we have to grow the buffer a little if there are more vals to do */
if(bw->capacity <= bw->words && nvals > 1 && !bitwriter_grow_(bw, 1)) {
FLAC__ASSERT(bw->capacity == bw->words);
return false;
}
}
}
else {
/* if bw->bits == 0, left==FLAC__BITS_PER_WORD which will always
* be > lsbits (because of previous assertions) so it would have
* triggered the (lsbits<left) case above.
*/
FLAC__ASSERT(bw->bits);
FLAC__ASSERT(left < FLAC__BITS_PER_WORD);
bw->accum <<= left;
bw->accum |= uval >> (bw->bits = lsbits - left);
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum);
bw->accum = uval;
#elif 0 /*@@@@@@ OPT: try this version with MSVC6 to see if better, not much difference for gcc-4 */
if(bw->bits + msbits + lsbits < FLAC__BITS_PER_WORD) { /* i.e. if the whole thing fits in the current bwword */
bw->bits = bw->bits + msbits + lsbits;
uval |= mask1; /* set stop bit */
uval &= mask2; /* mask off unused top bits */
bw->accum <<= msbits + lsbits;
bw->accum |= uval;
}
else {
#endif
/* slightly pessimistic size check but faster than "<= bw->words + (bw->bits+msbits+lsbits+FLAC__BITS_PER_WORD-1)/FLAC__BITS_PER_WORD" */
/* OPT: pessimism may cause flurry of false calls to grow_ which eat up all savings before it */
if(bw->capacity <= bw->words + bw->bits + msbits + 1/*lsbits always fit in 1 bwword*/ && !bitwriter_grow_(bw, msbits+lsbits))
return false;
if(msbits) {
/* first part gets to word alignment */
if(bw->bits) {
left = FLAC__BITS_PER_WORD - bw->bits;
if(msbits < left) {
bw->accum <<= msbits;
bw->bits += msbits;
goto break1;
}
else {
bw->accum <<= left;
msbits -= left;
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum);
bw->bits = 0;
}
}
/* do whole words */
while(msbits >= FLAC__BITS_PER_WORD) {
bw->buffer[bw->words++] = 0;
msbits -= FLAC__BITS_PER_WORD;
}
/* do any leftovers */
if(msbits > 0) {
bw->accum = 0;
bw->bits = msbits;
}
}
break1:
uval |= mask1; /* set stop bit */
uval &= mask2; /* mask off unused top bits */
left = FLAC__BITS_PER_WORD - bw->bits;
if(lsbits < left) {
bw->accum <<= lsbits;
bw->accum |= uval;
bw->bits += lsbits;
}
else {
/* if bw->bits == 0, left==FLAC__BITS_PER_WORD which will always
* be > lsbits (because of previous assertions) so it would have
* triggered the (lsbits<left) case above.
*/
FLAC__ASSERT(bw->bits);
FLAC__ASSERT(left < FLAC__BITS_PER_WORD);
bw->accum <<= left;
bw->accum |= uval >> (bw->bits = lsbits - left);
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum);
bw->accum = uval;
}
#if 1
}
#endif
vals++;
nvals--;
}