Add intrinsics version of two lpc functions.

Functions:
- FLAC__fixed_compute_best_predictor
- FLAC__fixed_compute_best_predictor_wide

Patch-from: lvqcl <lvqcl.mail@gmail.com>
This commit is contained in:
Erik de Castro Lopo 2014-04-11 06:21:09 +10:00
parent d456cdd28a
commit 93f6109c90
6 changed files with 57 additions and 5 deletions

View File

@ -121,6 +121,8 @@ libFLAC_sources = \
cpu.c \
crc.c \
fixed.c \
fixed_intrin_sse2.c \
fixed_intrin_ssse3.c \
float.c \
format.c \
lpc.c \

View File

@ -85,6 +85,8 @@ SRCS_C = \
cpu.c \
crc.c \
fixed.c \
fixed_intrin_sse2.c \
fixed_intrin_ssse3.c \
float.c \
format.c \
lpc.c \

View File

@ -37,6 +37,7 @@
#include <config.h>
#endif
#include "private/cpu.h"
#include "private/float.h"
#include "FLAC/format.h"
@ -54,14 +55,22 @@
*/
#ifndef FLAC__INTEGER_ONLY_LIBRARY
unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
unsigned FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
# ifndef FLAC__NO_ASM
# ifdef FLAC__CPU_IA32
# ifdef FLAC__HAS_NASM
unsigned FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
# ifdef FLAC__SSE2_SUPPORTED
unsigned FLAC__fixed_compute_best_predictor_intrin_sse2(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
unsigned FLAC__fixed_compute_best_predictor_wide_intrin_sse2(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
# endif
# ifdef FLAC__SSSE3_SUPPORTED
unsigned FLAC__fixed_compute_best_predictor_intrin_ssse3(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
unsigned FLAC__fixed_compute_best_predictor_wide_intrin_ssse3(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
# endif
# endif
# if defined FLAC__CPU_IA32 && defined FLAC__HAS_NASM
unsigned FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
# endif
# endif
unsigned FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
#else
unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], unsigned data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
unsigned FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], unsigned data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);

View File

@ -310,6 +310,14 @@
RelativePath=".\fixed.c"
>
</File>
<File
RelativePath=".\fixed_intrin_sse2.c"
>
</File>
<File
RelativePath=".\fixed_intrin_ssse3.c"
>
</File>
<File
RelativePath=".\float.c"
>

View File

@ -323,6 +323,14 @@
RelativePath=".\fixed.c"
>
</File>
<File
RelativePath=".\fixed_intrin_sse2.c"
>
</File>
<File
RelativePath=".\fixed_intrin_ssse3.c"
>
</File>
<File
RelativePath=".\float.c"
>

View File

@ -348,8 +348,10 @@ typedef struct FLAC__StreamEncoderPrivate {
void (*local_precompute_partition_info_sums)(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[], unsigned residual_samples, unsigned predictor_order, unsigned min_partition_order, unsigned max_partition_order, unsigned bps);
#ifndef FLAC__INTEGER_ONLY_LIBRARY
unsigned (*local_fixed_compute_best_predictor)(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
unsigned (*local_fixed_compute_best_predictor_wide)(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
#else
unsigned (*local_fixed_compute_best_predictor)(const FLAC__int32 data[], unsigned data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
unsigned (*local_fixed_compute_best_predictor_wide)(const FLAC__int32 data[], unsigned data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
#endif
#ifndef FLAC__INTEGER_ONLY_LIBRARY
void (*local_lpc_compute_autocorrelation)(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
@ -879,6 +881,7 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
#endif
encoder->private_->local_precompute_partition_info_sums = precompute_partition_info_sums_;
encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor;
encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide;
#ifndef FLAC__INTEGER_ONLY_LIBRARY
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients;
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide;
@ -937,6 +940,17 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2;
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2;
}
# ifdef FLAC__SSSE3_SUPPORTED
if (encoder->private_->cpuinfo.ia32.ssse3) {
encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_intrin_ssse3;
encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide_intrin_ssse3;
}
else
# endif
if (encoder->private_->cpuinfo.ia32.sse2) {
encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_intrin_sse2;
encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide_intrin_sse2;
}
# endif
# ifdef FLAC__SSE4_1_SUPPORTED
if(encoder->private_->cpuinfo.ia32.sse41)
@ -959,6 +973,15 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
# ifdef FLAC__SSE2_SUPPORTED
/* encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2; // OPT: not faster than C; TODO: more tests on different CPUs */
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2;
# ifdef FLAC__SSSE3_SUPPORTED
if (encoder->private_->cpuinfo.x86_64.ssse3) {
encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_intrin_ssse3;
encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide_intrin_ssse3;
}
else
# endif
encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_intrin_sse2;
encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide_intrin_sse2;
# endif
# endif /* FLAC__HAS_X86INTRIN */
# endif /* FLAC__CPU_... */
@ -991,7 +1014,7 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
#endif /* !FLAC__NO_ASM && FLAC__HAS_X86INTRIN */
/* finally override based on wide-ness if necessary */
if(encoder->private_->use_wide_by_block) {
encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_wide;
encoder->private_->local_fixed_compute_best_predictor = encoder->private_->local_fixed_compute_best_predictor_wide;
}
/* set state to OK; from here on, errors are fatal and we'll override the state then */