mirror of https://github.com/xiph/flac
optimize special case in FLAC__bitwriter_write_rice_signed_block()
This commit is contained in:
parent
4cc675bf19
commit
eb6ef7d0fb
|
@ -556,60 +556,93 @@ FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter *bw, const FL
|
||||||
|
|
||||||
msbits = uval >> parameter;
|
msbits = uval >> parameter;
|
||||||
|
|
||||||
/* slightly pessimistic size check but faster than "<= bw->words + (bw->bits+msbits+lsbits+FLAC__BITS_PER_WORD-1)/FLAC__BITS_PER_WORD" */
|
#if 1 /* OPT: can remove this special case if it doesn't make up for the extra compare */
|
||||||
/* OPT: pessimism may cause flurry of false calls to grow_ which eat up all savings before it */
|
if(bw->bits + msbits + lsbits <= FLAC__BITS_PER_WORD) { /* i.e. if the whole thing fits in the current bwword */
|
||||||
if(bw->capacity <= bw->words + bw->bits + msbits + 1/*lsbits always fit in 1 bwword*/ && !bitwriter_grow_(bw, msbits+lsbits))
|
bw->bits = bw->bits + msbits + lsbits;
|
||||||
return false;
|
uval |= mask1; /* set stop bit */
|
||||||
|
uval &= mask2; /* mask off unused top bits */
|
||||||
if(msbits) {
|
/* NOT: bw->accum <<= msbits + lsbits because msbits+lsbits could be 32, then the shift would be a NOP */
|
||||||
/* first part gets to word alignment */
|
bw->accum <<= msbits;
|
||||||
if(bw->bits) {
|
|
||||||
left = FLAC__BITS_PER_WORD - bw->bits;
|
|
||||||
if(msbits < left) {
|
|
||||||
bw->accum <<= msbits;
|
|
||||||
bw->bits += msbits;
|
|
||||||
goto break1;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
bw->accum <<= left;
|
|
||||||
msbits -= left;
|
|
||||||
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum);
|
|
||||||
bw->bits = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* do whole words */
|
|
||||||
while(msbits >= FLAC__BITS_PER_WORD) {
|
|
||||||
bw->buffer[bw->words++] = 0;
|
|
||||||
msbits -= FLAC__BITS_PER_WORD;
|
|
||||||
}
|
|
||||||
/* do any leftovers */
|
|
||||||
if(msbits > 0) {
|
|
||||||
bw->accum = 0;
|
|
||||||
bw->bits = msbits;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break1:
|
|
||||||
uval |= mask1; /* set stop bit */
|
|
||||||
uval &= mask2; /* mask off unused top bits */
|
|
||||||
|
|
||||||
left = FLAC__BITS_PER_WORD - bw->bits;
|
|
||||||
if(lsbits < left) {
|
|
||||||
bw->accum <<= lsbits;
|
bw->accum <<= lsbits;
|
||||||
bw->accum |= uval;
|
bw->accum |= uval;
|
||||||
bw->bits += lsbits;
|
if(bw->bits == FLAC__BITS_PER_WORD) {
|
||||||
|
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum);
|
||||||
|
bw->bits = 0;
|
||||||
|
/* burying the capacity check down here means we have to grow the buffer a little if there are more vals to do */
|
||||||
|
if(bw->capacity <= bw->words && nvals > 1 && !bitwriter_grow_(bw, 1)) {
|
||||||
|
FLAC__ASSERT(bw->capacity == bw->words);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/* if bw->bits == 0, left==FLAC__BITS_PER_WORD which will always
|
#elif 0 /*@@@@@@ OPT: try this version with MSVC6 to see if better, not much difference for gcc-4 */
|
||||||
* be > lsbits (because of previous assertions) so it would have
|
if(bw->bits + msbits + lsbits < FLAC__BITS_PER_WORD) { /* i.e. if the whole thing fits in the current bwword */
|
||||||
* triggered the (lsbits<left) case above.
|
bw->bits = bw->bits + msbits + lsbits;
|
||||||
*/
|
uval |= mask1; /* set stop bit */
|
||||||
FLAC__ASSERT(bw->bits);
|
uval &= mask2; /* mask off unused top bits */
|
||||||
FLAC__ASSERT(left < FLAC__BITS_PER_WORD);
|
bw->accum <<= msbits + lsbits;
|
||||||
bw->accum <<= left;
|
bw->accum |= uval;
|
||||||
bw->accum |= uval >> (bw->bits = lsbits - left);
|
|
||||||
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum);
|
|
||||||
bw->accum = uval;
|
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
#endif
|
||||||
|
/* slightly pessimistic size check but faster than "<= bw->words + (bw->bits+msbits+lsbits+FLAC__BITS_PER_WORD-1)/FLAC__BITS_PER_WORD" */
|
||||||
|
/* OPT: pessimism may cause flurry of false calls to grow_ which eat up all savings before it */
|
||||||
|
if(bw->capacity <= bw->words + bw->bits + msbits + 1/*lsbits always fit in 1 bwword*/ && !bitwriter_grow_(bw, msbits+lsbits))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if(msbits) {
|
||||||
|
/* first part gets to word alignment */
|
||||||
|
if(bw->bits) {
|
||||||
|
left = FLAC__BITS_PER_WORD - bw->bits;
|
||||||
|
if(msbits < left) {
|
||||||
|
bw->accum <<= msbits;
|
||||||
|
bw->bits += msbits;
|
||||||
|
goto break1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
bw->accum <<= left;
|
||||||
|
msbits -= left;
|
||||||
|
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum);
|
||||||
|
bw->bits = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* do whole words */
|
||||||
|
while(msbits >= FLAC__BITS_PER_WORD) {
|
||||||
|
bw->buffer[bw->words++] = 0;
|
||||||
|
msbits -= FLAC__BITS_PER_WORD;
|
||||||
|
}
|
||||||
|
/* do any leftovers */
|
||||||
|
if(msbits > 0) {
|
||||||
|
bw->accum = 0;
|
||||||
|
bw->bits = msbits;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break1:
|
||||||
|
uval |= mask1; /* set stop bit */
|
||||||
|
uval &= mask2; /* mask off unused top bits */
|
||||||
|
|
||||||
|
left = FLAC__BITS_PER_WORD - bw->bits;
|
||||||
|
if(lsbits < left) {
|
||||||
|
bw->accum <<= lsbits;
|
||||||
|
bw->accum |= uval;
|
||||||
|
bw->bits += lsbits;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* if bw->bits == 0, left==FLAC__BITS_PER_WORD which will always
|
||||||
|
* be > lsbits (because of previous assertions) so it would have
|
||||||
|
* triggered the (lsbits<left) case above.
|
||||||
|
*/
|
||||||
|
FLAC__ASSERT(bw->bits);
|
||||||
|
FLAC__ASSERT(left < FLAC__BITS_PER_WORD);
|
||||||
|
bw->accum <<= left;
|
||||||
|
bw->accum |= uval >> (bw->bits = lsbits - left);
|
||||||
|
bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum);
|
||||||
|
bw->accum = uval;
|
||||||
|
}
|
||||||
|
#if 1
|
||||||
|
}
|
||||||
|
#endif
|
||||||
vals++;
|
vals++;
|
||||||
nvals--;
|
nvals--;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue