mirror of https://github.com/xiph/flac
optimize special case in FLAC__bitwriter_write_rice_signed_block()
This commit is contained in:
parent
4cc675bf19
commit
eb6ef7d0fb
|
@ -556,60 +556,93 @@ FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter *bw, const FL
|
|||
|
||||
msbits = uval >> parameter;
|
||||
|
||||
/* slightly pessimistic size check but faster than "<= bw->words + (bw->bits+msbits+lsbits+FLAC__BITS_PER_WORD-1)/FLAC__BITS_PER_WORD" */
|
||||
/* OPT: pessimism may cause flurry of false calls to grow_ which eat up all savings before it */
|
||||
if(bw->capacity <= bw->words + bw->bits + msbits + 1/*lsbits always fit in 1 bwword*/ && !bitwriter_grow_(bw, msbits+lsbits))
|
||||
return false;
|
||||
|
||||
if(msbits) {
|
||||
/* first part gets to word alignment */
|
||||
if(bw->bits) {
|
||||
left = FLAC__BITS_PER_WORD - bw->bits;
|
||||
if(msbits < left) {
|
||||
bw->accum <<= msbits;
|
||||
bw->bits += msbits;
|
||||
goto break1;
|
||||
}
|
||||
else {
|
||||
bw->accum <<= left;
|
||||
msbits -= left;
|
||||
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum);
|
||||
bw->bits = 0;
|
||||
}
|
||||
}
|
||||
/* do whole words */
|
||||
while(msbits >= FLAC__BITS_PER_WORD) {
|
||||
bw->buffer[bw->words++] = 0;
|
||||
msbits -= FLAC__BITS_PER_WORD;
|
||||
}
|
||||
/* do any leftovers */
|
||||
if(msbits > 0) {
|
||||
bw->accum = 0;
|
||||
bw->bits = msbits;
|
||||
}
|
||||
}
|
||||
break1:
|
||||
uval |= mask1; /* set stop bit */
|
||||
uval &= mask2; /* mask off unused top bits */
|
||||
|
||||
left = FLAC__BITS_PER_WORD - bw->bits;
|
||||
if(lsbits < left) {
|
||||
#if 1 /* OPT: can remove this special case if it doesn't make up for the extra compare */
|
||||
if(bw->bits + msbits + lsbits <= FLAC__BITS_PER_WORD) { /* i.e. if the whole thing fits in the current bwword */
|
||||
bw->bits = bw->bits + msbits + lsbits;
|
||||
uval |= mask1; /* set stop bit */
|
||||
uval &= mask2; /* mask off unused top bits */
|
||||
/* NOT: bw->accum <<= msbits + lsbits because msbits+lsbits could be 32, then the shift would be a NOP */
|
||||
bw->accum <<= msbits;
|
||||
bw->accum <<= lsbits;
|
||||
bw->accum |= uval;
|
||||
bw->bits += lsbits;
|
||||
if(bw->bits == FLAC__BITS_PER_WORD) {
|
||||
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum);
|
||||
bw->bits = 0;
|
||||
/* burying the capacity check down here means we have to grow the buffer a little if there are more vals to do */
|
||||
if(bw->capacity <= bw->words && nvals > 1 && !bitwriter_grow_(bw, 1)) {
|
||||
FLAC__ASSERT(bw->capacity == bw->words);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* if bw->bits == 0, left==FLAC__BITS_PER_WORD which will always
|
||||
* be > lsbits (because of previous assertions) so it would have
|
||||
* triggered the (lsbits<left) case above.
|
||||
*/
|
||||
FLAC__ASSERT(bw->bits);
|
||||
FLAC__ASSERT(left < FLAC__BITS_PER_WORD);
|
||||
bw->accum <<= left;
|
||||
bw->accum |= uval >> (bw->bits = lsbits - left);
|
||||
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum);
|
||||
bw->accum = uval;
|
||||
#elif 0 /*@@@@@@ OPT: try this version with MSVC6 to see if better, not much difference for gcc-4 */
|
||||
if(bw->bits + msbits + lsbits < FLAC__BITS_PER_WORD) { /* i.e. if the whole thing fits in the current bwword */
|
||||
bw->bits = bw->bits + msbits + lsbits;
|
||||
uval |= mask1; /* set stop bit */
|
||||
uval &= mask2; /* mask off unused top bits */
|
||||
bw->accum <<= msbits + lsbits;
|
||||
bw->accum |= uval;
|
||||
}
|
||||
else {
|
||||
#endif
|
||||
/* slightly pessimistic size check but faster than "<= bw->words + (bw->bits+msbits+lsbits+FLAC__BITS_PER_WORD-1)/FLAC__BITS_PER_WORD" */
|
||||
/* OPT: pessimism may cause flurry of false calls to grow_ which eat up all savings before it */
|
||||
if(bw->capacity <= bw->words + bw->bits + msbits + 1/*lsbits always fit in 1 bwword*/ && !bitwriter_grow_(bw, msbits+lsbits))
|
||||
return false;
|
||||
|
||||
if(msbits) {
|
||||
/* first part gets to word alignment */
|
||||
if(bw->bits) {
|
||||
left = FLAC__BITS_PER_WORD - bw->bits;
|
||||
if(msbits < left) {
|
||||
bw->accum <<= msbits;
|
||||
bw->bits += msbits;
|
||||
goto break1;
|
||||
}
|
||||
else {
|
||||
bw->accum <<= left;
|
||||
msbits -= left;
|
||||
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum);
|
||||
bw->bits = 0;
|
||||
}
|
||||
}
|
||||
/* do whole words */
|
||||
while(msbits >= FLAC__BITS_PER_WORD) {
|
||||
bw->buffer[bw->words++] = 0;
|
||||
msbits -= FLAC__BITS_PER_WORD;
|
||||
}
|
||||
/* do any leftovers */
|
||||
if(msbits > 0) {
|
||||
bw->accum = 0;
|
||||
bw->bits = msbits;
|
||||
}
|
||||
}
|
||||
break1:
|
||||
uval |= mask1; /* set stop bit */
|
||||
uval &= mask2; /* mask off unused top bits */
|
||||
|
||||
left = FLAC__BITS_PER_WORD - bw->bits;
|
||||
if(lsbits < left) {
|
||||
bw->accum <<= lsbits;
|
||||
bw->accum |= uval;
|
||||
bw->bits += lsbits;
|
||||
}
|
||||
else {
|
||||
/* if bw->bits == 0, left==FLAC__BITS_PER_WORD which will always
|
||||
* be > lsbits (because of previous assertions) so it would have
|
||||
* triggered the (lsbits<left) case above.
|
||||
*/
|
||||
FLAC__ASSERT(bw->bits);
|
||||
FLAC__ASSERT(left < FLAC__BITS_PER_WORD);
|
||||
bw->accum <<= left;
|
||||
bw->accum |= uval >> (bw->bits = lsbits - left);
|
||||
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum);
|
||||
bw->accum = uval;
|
||||
}
|
||||
#if 1
|
||||
}
|
||||
#endif
|
||||
vals++;
|
||||
nvals--;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue