Enable encoder to use INT32_MIN as residual value
As abs(INT32_MIN) is undefined, it took some extra work to enable the encoder to do this. While expected gains are zero, this is done to ensure full spec coverage in this regard
This commit is contained in:
parent
633ab36ec5
commit
7e0a0e5723
@ -377,33 +377,32 @@ uint32_t FLAC__fixed_compute_best_predictor_limit_residual(const FLAC__int32 dat
|
||||
#endif
|
||||
{
|
||||
FLAC__uint64 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, total_error_3 = 0, total_error_4 = 0, smallest_error = UINT64_MAX;
|
||||
FLAC__uint64 error_0, error_1, error_2, error_3, error_4;
|
||||
FLAC__int64 error_0, error_1, error_2, error_3, error_4;
|
||||
FLAC__bool order_0_is_valid = true, order_1_is_valid = true, order_2_is_valid = true, order_3_is_valid = true, order_4_is_valid = true;
|
||||
uint32_t order = 0;
|
||||
|
||||
for(int i = 0; i < (int)data_len; i++) {
|
||||
error_0 = local_abs64((FLAC__int64)data[i]);
|
||||
error_1 = (i > 0) ? local_abs64((FLAC__int64)data[i] - data[i-1]) : 0 ;
|
||||
error_2 = (i > 1) ? local_abs64((FLAC__int64)data[i] - 2 * (FLAC__int64)data[i-1] + data[i-2]) : 0;
|
||||
error_3 = (i > 2) ? local_abs64((FLAC__int64)data[i] - 3 * (FLAC__int64)data[i-1] + 3 * (FLAC__int64)data[i-2] - data[i-3]) : 0;
|
||||
error_4 = (i > 3) ? local_abs64((FLAC__int64)data[i] - 4 * (FLAC__int64)data[i-1] + 6 * (FLAC__int64)data[i-2] - 4 * (FLAC__int64)data[i-3] + data[i-4]) : 0;
|
||||
error_0 = (FLAC__int64)data[i];
|
||||
error_1 = (i > 0) ? (FLAC__int64)data[i] - data[i-1] : 0 ;
|
||||
error_2 = (i > 1) ? (FLAC__int64)data[i] - 2 * (FLAC__int64)data[i-1] + data[i-2] : 0;
|
||||
error_3 = (i > 2) ? (FLAC__int64)data[i] - 3 * (FLAC__int64)data[i-1] + 3 * (FLAC__int64)data[i-2] - data[i-3] : 0;
|
||||
error_4 = (i > 3) ? (FLAC__int64)data[i] - 4 * (FLAC__int64)data[i-1] + 6 * (FLAC__int64)data[i-2] - 4 * (FLAC__int64)data[i-3] + data[i-4] : 0;
|
||||
|
||||
total_error_0 += error_0;
|
||||
total_error_1 += error_1;
|
||||
total_error_2 += error_2;
|
||||
total_error_3 += error_3;
|
||||
total_error_4 += error_4;
|
||||
total_error_0 += local_abs64(error_0);
|
||||
total_error_1 += local_abs64(error_1);
|
||||
total_error_2 += local_abs64(error_2);
|
||||
total_error_3 += local_abs64(error_3);
|
||||
total_error_4 += local_abs64(error_4);
|
||||
|
||||
/* residual must not be INT32_MIN because abs(INT32_MIN) is undefined */
|
||||
if(error_0 > INT32_MAX)
|
||||
if(error_0 > INT32_MAX || error_0 < INT32_MIN)
|
||||
order_0_is_valid = false;
|
||||
if(error_1 > INT32_MAX)
|
||||
if(error_1 > INT32_MAX || error_1 < INT32_MIN)
|
||||
order_1_is_valid = false;
|
||||
if(error_2 > INT32_MAX)
|
||||
if(error_2 > INT32_MAX || error_2 < INT32_MIN)
|
||||
order_2_is_valid = false;
|
||||
if(error_3 > INT32_MAX)
|
||||
if(error_3 > INT32_MAX || error_3 < INT32_MIN)
|
||||
order_3_is_valid = false;
|
||||
if(error_4 > INT32_MAX)
|
||||
if(error_4 > INT32_MAX || error_4 < INT32_MIN)
|
||||
order_4_is_valid = false;
|
||||
}
|
||||
|
||||
@ -423,33 +422,33 @@ uint32_t FLAC__fixed_compute_best_predictor_limit_residual_33bit(const FLAC__int
|
||||
#endif
|
||||
{
|
||||
FLAC__uint64 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, total_error_3 = 0, total_error_4 = 0, smallest_error = UINT64_MAX;
|
||||
FLAC__uint64 error_0, error_1, error_2, error_3, error_4;
|
||||
FLAC__int64 error_0, error_1, error_2, error_3, error_4;
|
||||
FLAC__bool order_0_is_valid = true, order_1_is_valid = true, order_2_is_valid = true, order_3_is_valid = true, order_4_is_valid = true;
|
||||
uint32_t order = 0;
|
||||
|
||||
for(int i = 0; i < (int)data_len; i++) {
|
||||
error_0 = local_abs64(data[i]);
|
||||
error_1 = (i > 0) ? local_abs64(data[i] - data[i-1]) : 0 ;
|
||||
error_2 = (i > 1) ? local_abs64(data[i] - 2 * data[i-1] + data[i-2]) : 0;
|
||||
error_3 = (i > 2) ? local_abs64(data[i] - 3 * data[i-1] + 3 * data[i-2] - data[i-3]) : 0;
|
||||
error_4 = (i > 3) ? local_abs64(data[i] - 4 * data[i-1] + 6 * data[i-2] - 4 * data[i-3] + data[i-4]) : 0;
|
||||
error_0 = data[i];
|
||||
error_1 = (i > 0) ? data[i] - data[i-1] : 0 ;
|
||||
error_2 = (i > 1) ? data[i] - 2 * data[i-1] + data[i-2] : 0;
|
||||
error_3 = (i > 2) ? data[i] - 3 * data[i-1] + 3 * data[i-2] - data[i-3] : 0;
|
||||
error_4 = (i > 3) ? data[i] - 4 * data[i-1] + 6 * data[i-2] - 4 * data[i-3] + data[i-4] : 0;
|
||||
|
||||
total_error_0 += error_0;
|
||||
total_error_1 += error_1;
|
||||
total_error_2 += error_2;
|
||||
total_error_3 += error_3;
|
||||
total_error_4 += error_4;
|
||||
total_error_0 += local_abs64(error_0);
|
||||
total_error_1 += local_abs64(error_1);
|
||||
total_error_2 += local_abs64(error_2);
|
||||
total_error_3 += local_abs64(error_3);
|
||||
total_error_4 += local_abs64(error_4);
|
||||
|
||||
/* residual must not be INT32_MIN because abs(INT32_MIN) is undefined */
|
||||
if(error_0 > INT32_MAX)
|
||||
|
||||
if(error_0 > INT32_MAX || error_0 < INT32_MIN)
|
||||
order_0_is_valid = false;
|
||||
if(error_1 > INT32_MAX)
|
||||
if(error_1 > INT32_MAX || error_1 < INT32_MIN)
|
||||
order_1_is_valid = false;
|
||||
if(error_2 > INT32_MAX)
|
||||
if(error_2 > INT32_MAX || error_2 < INT32_MIN)
|
||||
order_2_is_valid = false;
|
||||
if(error_3 > INT32_MAX)
|
||||
if(error_3 > INT32_MAX || error_3 < INT32_MIN)
|
||||
order_3_is_valid = false;
|
||||
if(error_4 > INT32_MAX)
|
||||
if(error_4 > INT32_MAX || error_4 < INT32_MIN)
|
||||
order_4_is_valid = false;
|
||||
}
|
||||
|
||||
|
@ -37,29 +37,23 @@
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This is used to avoid overflow with unusual signals in 32-bit
|
||||
* accumulator in the *precompute_partition_info_sums_* functions.
|
||||
*/
|
||||
#define FLAC__MAX_EXTRA_RESIDUAL_BPS 4
|
||||
|
||||
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
|
||||
#include "private/cpu.h"
|
||||
#include "FLAC/format.h"
|
||||
|
||||
#ifdef FLAC__SSE2_SUPPORTED
|
||||
extern void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
|
||||
uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t bps);
|
||||
uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t max_residual_bps);
|
||||
#endif
|
||||
|
||||
#ifdef FLAC__SSSE3_SUPPORTED
|
||||
extern void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
|
||||
uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t bps);
|
||||
uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t max_residual_bps);
|
||||
#endif
|
||||
|
||||
#ifdef FLAC__AVX2_SUPPORTED
|
||||
extern void FLAC__precompute_partition_info_sums_intrin_avx2(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
|
||||
uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t bps);
|
||||
uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t max_residual_bps);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -828,8 +828,7 @@ FLAC__bool FLAC__lpc_compute_residual_from_qlp_coefficients_limit_residual(const
|
||||
case 1: sum += qlp_coeff[ 0] * (FLAC__int64)data[i- 1];
|
||||
}
|
||||
residual_to_check = data[i] - (sum >> lp_quantization);
|
||||
/* residual must not be INT32_MIN because abs(INT32_MIN) is undefined */
|
||||
if(residual_to_check <= INT32_MIN || residual_to_check > INT32_MAX)
|
||||
if(residual_to_check < INT32_MIN || residual_to_check > INT32_MAX)
|
||||
return false;
|
||||
else
|
||||
residual[i] = residual_to_check;
|
||||
@ -882,8 +881,7 @@ FLAC__bool FLAC__lpc_compute_residual_from_qlp_coefficients_limit_residual_33bit
|
||||
case 1: sum += qlp_coeff[ 0] * data[i- 1];
|
||||
}
|
||||
residual_to_check = data[i] - (sum >> lp_quantization);
|
||||
/* residual must not be INT32_MIN because abs(INT32_MIN) is undefined */
|
||||
if(residual_to_check <= INT32_MIN || residual_to_check > INT32_MAX)
|
||||
if(residual_to_check < INT32_MIN || residual_to_check > INT32_MAX)
|
||||
return false;
|
||||
else
|
||||
residual[i] = residual_to_check;
|
||||
|
@ -231,7 +231,7 @@ static uint32_t find_best_partition_order_(
|
||||
uint32_t rice_parameter_limit,
|
||||
uint32_t min_partition_order,
|
||||
uint32_t max_partition_order,
|
||||
uint32_t bps,
|
||||
uint32_t max_residual_bps,
|
||||
FLAC__bool do_escape_coding,
|
||||
uint32_t rice_parameter_search_dist,
|
||||
FLAC__EntropyCodingMethod *best_ecm
|
||||
@ -244,7 +244,7 @@ static void precompute_partition_info_sums_(
|
||||
uint32_t predictor_order,
|
||||
uint32_t min_partition_order,
|
||||
uint32_t max_partition_order,
|
||||
uint32_t bps
|
||||
uint32_t max_residual_bps
|
||||
);
|
||||
|
||||
static void precompute_partition_info_escapes_(
|
||||
@ -349,7 +349,7 @@ typedef struct FLAC__StreamEncoderPrivate {
|
||||
uint32_t current_frame_number;
|
||||
FLAC__MD5Context md5context;
|
||||
FLAC__CPUInfo cpuinfo;
|
||||
void (*local_precompute_partition_info_sums)(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[], uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t bps);
|
||||
void (*local_precompute_partition_info_sums)(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[], uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t max_residual_bps);
|
||||
#ifndef FLAC__INTEGER_ONLY_LIBRARY
|
||||
uint32_t (*local_fixed_compute_best_predictor)(const FLAC__int32 data[], uint32_t data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
|
||||
uint32_t (*local_fixed_compute_best_predictor_wide)(const FLAC__int32 data[], uint32_t data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
|
||||
@ -3873,7 +3873,7 @@ uint32_t evaluate_fixed_subframe_(
|
||||
rice_parameter_limit,
|
||||
min_partition_order,
|
||||
max_partition_order,
|
||||
subframe_bps,
|
||||
(subframe_bps + order),
|
||||
do_escape_coding,
|
||||
rice_parameter_search_dist,
|
||||
&subframe->data.fixed.entropy_coding_method
|
||||
@ -3972,7 +3972,7 @@ uint32_t evaluate_lpc_subframe_(
|
||||
rice_parameter_limit,
|
||||
min_partition_order,
|
||||
max_partition_order,
|
||||
subframe_bps,
|
||||
FLAC__lpc_max_residual_bps(subframe_bps, qlp_coeff, order, quantization),
|
||||
do_escape_coding,
|
||||
rice_parameter_search_dist,
|
||||
&subframe->data.lpc.entropy_coding_method
|
||||
@ -4046,7 +4046,7 @@ uint32_t find_best_partition_order_(
|
||||
uint32_t rice_parameter_limit,
|
||||
uint32_t min_partition_order,
|
||||
uint32_t max_partition_order,
|
||||
uint32_t bps,
|
||||
uint32_t max_residual_bps,
|
||||
FLAC__bool do_escape_coding,
|
||||
uint32_t rice_parameter_search_dist,
|
||||
FLAC__EntropyCodingMethod *best_ecm
|
||||
@ -4060,7 +4060,7 @@ uint32_t find_best_partition_order_(
|
||||
max_partition_order = FLAC__format_get_max_rice_partition_order_from_blocksize_limited_max_and_predictor_order(max_partition_order, blocksize, predictor_order);
|
||||
min_partition_order = flac_min(min_partition_order, max_partition_order);
|
||||
|
||||
private_->local_precompute_partition_info_sums(residual, abs_residual_partition_sums, residual_samples, predictor_order, min_partition_order, max_partition_order, bps);
|
||||
private_->local_precompute_partition_info_sums(residual, abs_residual_partition_sums, residual_samples, predictor_order, min_partition_order, max_partition_order, max_residual_bps);
|
||||
|
||||
if(do_escape_coding)
|
||||
precompute_partition_info_escapes_(residual, raw_bits_per_partition, residual_samples, predictor_order, min_partition_order, max_partition_order);
|
||||
@ -4138,7 +4138,7 @@ void precompute_partition_info_sums_(
|
||||
uint32_t predictor_order,
|
||||
uint32_t min_partition_order,
|
||||
uint32_t max_partition_order,
|
||||
uint32_t bps
|
||||
uint32_t max_residual_bps
|
||||
)
|
||||
{
|
||||
const uint32_t default_partition_samples = (residual_samples + predictor_order) >> max_partition_order;
|
||||
@ -4150,22 +4150,33 @@ void precompute_partition_info_sums_(
|
||||
{
|
||||
const uint32_t threshold = 32 - FLAC__bitmath_ilog2(default_partition_samples);
|
||||
uint32_t partition, residual_sample, end = (uint32_t)(-(int)predictor_order);
|
||||
/* WATCHOUT: "bps + FLAC__MAX_EXTRA_RESIDUAL_BPS" is the maximum assumed size of the average residual magnitude */
|
||||
if(bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < threshold) {
|
||||
if(max_residual_bps < threshold) {
|
||||
for(partition = residual_sample = 0; partition < partitions; partition++) {
|
||||
FLAC__uint32 abs_residual_partition_sum = 0;
|
||||
end += default_partition_samples;
|
||||
for( ; residual_sample < end; residual_sample++)
|
||||
abs_residual_partition_sum += abs(residual[residual_sample]); /* abs(INT_MIN) is undefined, but if the residual is INT_MIN we have bigger problems */
|
||||
abs_residual_partition_sum += abs(residual[residual_sample]);
|
||||
abs_residual_partition_sums[partition] = abs_residual_partition_sum;
|
||||
}
|
||||
}
|
||||
else { /* have to pessimistically use 64 bits for accumulator */
|
||||
else if(max_residual_bps < 32) { /* have to pessimistically use 64 bits for accumulator */
|
||||
for(partition = residual_sample = 0; partition < partitions; partition++) {
|
||||
FLAC__uint64 abs_residual_partition_sum64 = 0;
|
||||
end += default_partition_samples;
|
||||
for( ; residual_sample < end; residual_sample++)
|
||||
abs_residual_partition_sum64 += abs(residual[residual_sample]); /* abs(INT_MIN) is undefined, but if the residual is INT_MIN we have bigger problems */
|
||||
abs_residual_partition_sum64 += abs(residual[residual_sample]);
|
||||
abs_residual_partition_sums[partition] = abs_residual_partition_sum64;
|
||||
}
|
||||
}
|
||||
else { /* must handle abs(INT32_MIN) */
|
||||
for(partition = residual_sample = 0; partition < partitions; partition++) {
|
||||
FLAC__uint64 abs_residual_partition_sum64 = 0;
|
||||
end += default_partition_samples;
|
||||
for( ; residual_sample < end; residual_sample++)
|
||||
if(residual[residual_sample] == INT32_MIN)
|
||||
abs_residual_partition_sum64 -= (FLAC__int64)INT32_MIN;
|
||||
else
|
||||
abs_residual_partition_sum64 += abs(residual[residual_sample]);
|
||||
abs_residual_partition_sums[partition] = abs_residual_partition_sum64;
|
||||
}
|
||||
}
|
||||
|
@ -48,7 +48,7 @@
|
||||
|
||||
FLAC__SSE_TARGET("avx2")
|
||||
void FLAC__precompute_partition_info_sums_intrin_avx2(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
|
||||
uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t bps)
|
||||
uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t max_residual_bps)
|
||||
{
|
||||
const uint32_t default_partition_samples = (residual_samples + predictor_order) >> max_partition_order;
|
||||
uint32_t partitions = 1u << max_partition_order;
|
||||
@ -60,7 +60,7 @@ void FLAC__precompute_partition_info_sums_intrin_avx2(const FLAC__int32 residual
|
||||
const uint32_t threshold = 32 - FLAC__bitmath_ilog2(default_partition_samples);
|
||||
uint32_t partition, residual_sample, end = (uint32_t)(-(int32_t)predictor_order);
|
||||
|
||||
if(bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < threshold) {
|
||||
if(max_residual_bps < threshold) {
|
||||
for(partition = residual_sample = 0; partition < partitions; partition++) {
|
||||
__m256i sum256 = _mm256_setzero_si256();
|
||||
__m128i sum128;
|
||||
@ -92,7 +92,7 @@ void FLAC__precompute_partition_info_sums_intrin_avx2(const FLAC__int32 residual
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else { /* have to pessimistically use 64 bits for accumulator */
|
||||
else if(max_residual_bps < 32) { /* have to pessimistically use 64 bits for accumulator */
|
||||
for(partition = residual_sample = 0; partition < partitions; partition++) {
|
||||
__m256i sum256 = _mm256_setzero_si256();
|
||||
__m128i sum128;
|
||||
@ -121,6 +121,18 @@ void FLAC__precompute_partition_info_sums_intrin_avx2(const FLAC__int32 residual
|
||||
_mm_storel_epi64((__m128i*)(void*)(abs_residual_partition_sums+partition), sum128);
|
||||
}
|
||||
}
|
||||
else { /* must handle abs(INT32_MIN) */
|
||||
for(partition = residual_sample = 0; partition < partitions; partition++) {
|
||||
FLAC__uint64 abs_residual_partition_sum64 = 0;
|
||||
end += default_partition_samples;
|
||||
for( ; residual_sample < end; residual_sample++)
|
||||
if(residual[residual_sample] == INT32_MIN)
|
||||
abs_residual_partition_sum64 -= (FLAC__int64)INT32_MIN;
|
||||
else
|
||||
abs_residual_partition_sum64 += abs(residual[residual_sample]);
|
||||
abs_residual_partition_sums[partition] = abs_residual_partition_sum64;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* now merge partitions for lower orders */
|
||||
|
@ -59,7 +59,7 @@ static inline __m128i local_abs_epi32(__m128i val)
|
||||
|
||||
FLAC__SSE_TARGET("sse2")
|
||||
void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
|
||||
uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t bps)
|
||||
uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t max_residual_bps)
|
||||
{
|
||||
const uint32_t default_partition_samples = (residual_samples + predictor_order) >> max_partition_order;
|
||||
uint32_t partitions = 1u << max_partition_order;
|
||||
@ -71,7 +71,7 @@ void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual
|
||||
const uint32_t threshold = 32 - FLAC__bitmath_ilog2(default_partition_samples);
|
||||
uint32_t partition, residual_sample, end = (uint32_t)(-(int32_t)predictor_order);
|
||||
|
||||
if(bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < threshold) {
|
||||
if(max_residual_bps < threshold) {
|
||||
for(partition = residual_sample = 0; partition < partitions; partition++) {
|
||||
__m128i mm_sum = _mm_setzero_si128();
|
||||
uint32_t e1, e3;
|
||||
@ -106,7 +106,7 @@ void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else { /* have to pessimistically use 64 bits for accumulator */
|
||||
else if(max_residual_bps < 32) { /* have to pessimistically use 64 bits for accumulator */
|
||||
for(partition = residual_sample = 0; partition < partitions; partition++) {
|
||||
__m128i mm_sum = _mm_setzero_si128();
|
||||
uint32_t e1, e3;
|
||||
@ -135,6 +135,19 @@ void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual
|
||||
_mm_storel_epi64((__m128i*)(void*)(abs_residual_partition_sums+partition), mm_sum);
|
||||
}
|
||||
}
|
||||
else { /* must handle abs(INT32_MIN) */
|
||||
for(partition = residual_sample = 0; partition < partitions; partition++) {
|
||||
FLAC__uint64 abs_residual_partition_sum64 = 0;
|
||||
end += default_partition_samples;
|
||||
for( ; residual_sample < end; residual_sample++)
|
||||
if(residual[residual_sample] == INT32_MIN)
|
||||
abs_residual_partition_sum64 -= (FLAC__int64)INT32_MIN;
|
||||
else
|
||||
abs_residual_partition_sum64 += abs(residual[residual_sample]);
|
||||
abs_residual_partition_sums[partition] = abs_residual_partition_sum64;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* now merge partitions for lower orders */
|
||||
|
@ -48,7 +48,7 @@
|
||||
|
||||
FLAC__SSE_TARGET("ssse3")
|
||||
void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
|
||||
uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t bps)
|
||||
uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t max_residual_bps)
|
||||
{
|
||||
const uint32_t default_partition_samples = (residual_samples + predictor_order) >> max_partition_order;
|
||||
uint32_t partitions = 1u << max_partition_order;
|
||||
@ -60,7 +60,7 @@ void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residua
|
||||
const uint32_t threshold = 32 - FLAC__bitmath_ilog2(default_partition_samples);
|
||||
uint32_t partition, residual_sample, end = (uint32_t)(-(int32_t)predictor_order);
|
||||
|
||||
if(bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < threshold) {
|
||||
if(max_residual_bps < threshold) {
|
||||
for(partition = residual_sample = 0; partition < partitions; partition++) {
|
||||
__m128i mm_sum = _mm_setzero_si128();
|
||||
uint32_t e1, e3;
|
||||
@ -95,7 +95,7 @@ void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residua
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else { /* have to pessimistically use 64 bits for accumulator */
|
||||
else if(max_residual_bps < 32) { /* have to pessimistically use 64 bits for accumulator */
|
||||
for(partition = residual_sample = 0; partition < partitions; partition++) {
|
||||
__m128i mm_sum = _mm_setzero_si128();
|
||||
uint32_t e1, e3;
|
||||
@ -124,6 +124,18 @@ void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residua
|
||||
_mm_storel_epi64((__m128i*)(void*)(abs_residual_partition_sums+partition), mm_sum);
|
||||
}
|
||||
}
|
||||
else { /* must handle abs(INT32_MIN) */
|
||||
for(partition = residual_sample = 0; partition < partitions; partition++) {
|
||||
FLAC__uint64 abs_residual_partition_sum64 = 0;
|
||||
end += default_partition_samples;
|
||||
for( ; residual_sample < end; residual_sample++)
|
||||
if(residual[residual_sample] == INT32_MIN)
|
||||
abs_residual_partition_sum64 -= (FLAC__int64)INT32_MIN;
|
||||
else
|
||||
abs_residual_partition_sum64 += abs(residual[residual_sample]);
|
||||
abs_residual_partition_sums[partition] = abs_residual_partition_sum64;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* now merge partitions for lower orders */
|
||||
|
Loading…
x
Reference in New Issue
Block a user