Speed up set_partitioned_rice_

This commit is contained in:
Martijn van Beurden 2023-03-08 12:26:44 +01:00 committed by GitHub
parent e2faeb80ec
commit 4d6d8b342f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -4417,9 +4417,10 @@ FLAC__bool set_partitioned_rice_(
uint32_t bits_ = FLAC__ENTROPY_CODING_METHOD_TYPE_LEN + FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ORDER_LEN;
uint32_t *parameters, *raw_bits;
uint32_t partition, residual_sample;
uint32_t partition_samples;
uint32_t partition_samples, partition_samples_base;
uint32_t partition_samples_fixed_point_divisor, partition_samples_fixed_point_divisor_base;
const uint32_t partitions = 1u << partition_order;
FLAC__uint64 mean, k;
FLAC__uint64 mean;
#ifdef ENABLE_RICE_PARAMETER_SEARCH
uint32_t min_rice_parameter, max_rice_parameter;
#else
@ -4431,52 +4432,38 @@ FLAC__bool set_partitioned_rice_(
parameters = partitioned_rice_contents->parameters;
raw_bits = partitioned_rice_contents->raw_bits;
partition_samples_base = (residual_samples+predictor_order) >> partition_order;
/* Integer division is slow. To speed up things, precalculate a fixed point
* divisor, as all partitions except the first are the same size. 18 bits
* are taken because maximum block size is 65535, max partition size for
* partitions other than 0 is 32767 (15 bit), max abs residual is 2^31,
* which leaves 18 bit */
partition_samples_fixed_point_divisor_base = 0x40000 / partition_samples_base;
for(partition = residual_sample = 0; partition < partitions; partition++) {
partition_samples = (residual_samples+predictor_order) >> partition_order;
if(partition == 0) {
partition_samples = partition_samples_base;
if(partition > 0) {
partition_samples_fixed_point_divisor = partition_samples_fixed_point_divisor_base;
}
else {
if(partition_samples <= predictor_order)
return false;
else
partition_samples -= predictor_order;
partition_samples_fixed_point_divisor = 0x40000 / partition_samples;
}
mean = abs_residual_partition_sums[partition];
/* we are basically calculating the size in bits of the
* average residual magnitude in the partition:
* rice_parameter = floor(log2(mean/partition_samples))
* 'mean' is not a good name for the variable, it is
/* 'mean' is not a good name for the variable, it is
* actually the sum of magnitudes of all residual values
* in the partition, so the actual mean is
* mean/partition_samples
*/
#if 0 /* old simple code */
for(rice_parameter = 0, k = partition_samples; k < mean; rice_parameter++, k <<= 1)
;
#else
#if defined FLAC__CPU_X86_64 /* and other 64-bit arch, too */
if(mean <= 0x80000000/512) { /* 512: more or less optimal for both 16- and 24-bit input */
#else
if(mean <= 0x80000000/8) { /* 32-bit arch: use 32-bit math if possible */
#endif
FLAC__uint32 k2, mean2 = (FLAC__uint32) mean;
rice_parameter = 0; k2 = partition_samples;
while(k2*8 < mean2) { /* requires: mean <= (2^31)/8 */
rice_parameter += 4; k2 <<= 4; /* tuned for 16-bit input */
}
while(k2 < mean2) { /* requires: mean <= 2^31 */
rice_parameter++; k2 <<= 1;
}
}
else {
rice_parameter = 0; k = partition_samples;
if(mean <= FLAC__U64L(0x8000000000000000)/128) /* usually mean is _much_ smaller than this value */
while(k*128 < mean) { /* requires: mean <= (2^63)/128 */
rice_parameter += 8; k <<= 8; /* tuned for 24-bit input */
}
while(k < mean) { /* requires: mean <= 2^63 */
rice_parameter++; k <<= 1;
}
}
#endif
if(mean < 2 || (((mean - 1)*partition_samples_fixed_point_divisor)>>18) == 0)
rice_parameter = 0;
else
rice_parameter = FLAC__bitmath_ilog2_wide(((mean - 1)*partition_samples_fixed_point_divisor)>>18) + 1;
if(rice_parameter >= rice_parameter_limit) {
#ifndef NDEBUG
fprintf(stderr, "clipping rice_parameter (%u -> %u) @6\n", rice_parameter, rice_parameter_limit - 1);