Add intrinsics version of two lpc functions.
Functions: - FLAC__fixed_compute_best_predictor - FLAC__fixed_compute_best_predictor_wide Patch-from: lvqcl <lvqcl.mail@gmail.com>
This commit is contained in:
parent
d456cdd28a
commit
93f6109c90
@ -121,6 +121,8 @@ libFLAC_sources = \
|
||||
cpu.c \
|
||||
crc.c \
|
||||
fixed.c \
|
||||
fixed_intrin_sse2.c \
|
||||
fixed_intrin_ssse3.c \
|
||||
float.c \
|
||||
format.c \
|
||||
lpc.c \
|
||||
|
@ -85,6 +85,8 @@ SRCS_C = \
|
||||
cpu.c \
|
||||
crc.c \
|
||||
fixed.c \
|
||||
fixed_intrin_sse2.c \
|
||||
fixed_intrin_ssse3.c \
|
||||
float.c \
|
||||
format.c \
|
||||
lpc.c \
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include "private/cpu.h"
|
||||
#include "private/float.h"
|
||||
#include "FLAC/format.h"
|
||||
|
||||
@ -54,14 +55,22 @@
|
||||
*/
|
||||
#ifndef FLAC__INTEGER_ONLY_LIBRARY
|
||||
unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
|
||||
unsigned FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
|
||||
# ifndef FLAC__NO_ASM
|
||||
# ifdef FLAC__CPU_IA32
|
||||
# ifdef FLAC__HAS_NASM
|
||||
unsigned FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
|
||||
# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
|
||||
# ifdef FLAC__SSE2_SUPPORTED
|
||||
unsigned FLAC__fixed_compute_best_predictor_intrin_sse2(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
|
||||
unsigned FLAC__fixed_compute_best_predictor_wide_intrin_sse2(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
|
||||
# endif
|
||||
# ifdef FLAC__SSSE3_SUPPORTED
|
||||
unsigned FLAC__fixed_compute_best_predictor_intrin_ssse3(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
|
||||
unsigned FLAC__fixed_compute_best_predictor_wide_intrin_ssse3(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
|
||||
# endif
|
||||
# endif
|
||||
# if defined FLAC__CPU_IA32 && defined FLAC__HAS_NASM
|
||||
unsigned FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
|
||||
# endif
|
||||
# endif
|
||||
unsigned FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
|
||||
#else
|
||||
unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], unsigned data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
|
||||
unsigned FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], unsigned data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
|
||||
|
@ -310,6 +310,14 @@
|
||||
RelativePath=".\fixed.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\fixed_intrin_sse2.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\fixed_intrin_ssse3.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\float.c"
|
||||
>
|
||||
|
@ -323,6 +323,14 @@
|
||||
RelativePath=".\fixed.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\fixed_intrin_sse2.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\fixed_intrin_ssse3.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\float.c"
|
||||
>
|
||||
|
@ -348,8 +348,10 @@ typedef struct FLAC__StreamEncoderPrivate {
|
||||
void (*local_precompute_partition_info_sums)(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[], unsigned residual_samples, unsigned predictor_order, unsigned min_partition_order, unsigned max_partition_order, unsigned bps);
|
||||
#ifndef FLAC__INTEGER_ONLY_LIBRARY
|
||||
unsigned (*local_fixed_compute_best_predictor)(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
|
||||
unsigned (*local_fixed_compute_best_predictor_wide)(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
|
||||
#else
|
||||
unsigned (*local_fixed_compute_best_predictor)(const FLAC__int32 data[], unsigned data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
|
||||
unsigned (*local_fixed_compute_best_predictor_wide)(const FLAC__int32 data[], unsigned data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
|
||||
#endif
|
||||
#ifndef FLAC__INTEGER_ONLY_LIBRARY
|
||||
void (*local_lpc_compute_autocorrelation)(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
|
||||
@ -879,6 +881,7 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
|
||||
#endif
|
||||
encoder->private_->local_precompute_partition_info_sums = precompute_partition_info_sums_;
|
||||
encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor;
|
||||
encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide;
|
||||
#ifndef FLAC__INTEGER_ONLY_LIBRARY
|
||||
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients;
|
||||
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide;
|
||||
@ -937,6 +940,17 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
|
||||
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2;
|
||||
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2;
|
||||
}
|
||||
# ifdef FLAC__SSSE3_SUPPORTED
|
||||
if (encoder->private_->cpuinfo.ia32.ssse3) {
|
||||
encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_intrin_ssse3;
|
||||
encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide_intrin_ssse3;
|
||||
}
|
||||
else
|
||||
# endif
|
||||
if (encoder->private_->cpuinfo.ia32.sse2) {
|
||||
encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_intrin_sse2;
|
||||
encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide_intrin_sse2;
|
||||
}
|
||||
# endif
|
||||
# ifdef FLAC__SSE4_1_SUPPORTED
|
||||
if(encoder->private_->cpuinfo.ia32.sse41)
|
||||
@ -959,6 +973,15 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
|
||||
# ifdef FLAC__SSE2_SUPPORTED
|
||||
/* encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2; // OPT: not faster than C; TODO: more tests on different CPUs */
|
||||
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2;
|
||||
# ifdef FLAC__SSSE3_SUPPORTED
|
||||
if (encoder->private_->cpuinfo.x86_64.ssse3) {
|
||||
encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_intrin_ssse3;
|
||||
encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide_intrin_ssse3;
|
||||
}
|
||||
else
|
||||
# endif
|
||||
encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_intrin_sse2;
|
||||
encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide_intrin_sse2;
|
||||
# endif
|
||||
# endif /* FLAC__HAS_X86INTRIN */
|
||||
# endif /* FLAC__CPU_... */
|
||||
@ -991,7 +1014,7 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
|
||||
#endif /* !FLAC__NO_ASM && FLAC__HAS_X86INTRIN */
|
||||
/* finally override based on wide-ness if necessary */
|
||||
if(encoder->private_->use_wide_by_block) {
|
||||
encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_wide;
|
||||
encoder->private_->local_fixed_compute_best_predictor = encoder->private_->local_fixed_compute_best_predictor_wide;
|
||||
}
|
||||
|
||||
/* set state to OK; from here on, errors are fatal and we'll override the state then */
|
||||
|
Loading…
Reference in New Issue
Block a user