From ae4d720417fbac1abecf8eaba47b122366211390 Mon Sep 17 00:00:00 2001 From: Erik de Castro Lopo Date: Sun, 15 Sep 2013 20:34:40 +1000 Subject: [PATCH] Fix/re-enable SSE/SSE2 lpc optimisations. --- configure.ac | 5 +++++ src/libFLAC/cpu.c | 4 ++-- src/libFLAC/include/private/cpu.h | 2 +- src/libFLAC/include/private/lpc.h | 8 ++------ src/libFLAC/lpc_x86intrin.c | 4 ++-- 5 files changed, 12 insertions(+), 11 deletions(-) diff --git a/configure.ac b/configure.ac index f000a0b9..ed9713b2 100644 --- a/configure.ac +++ b/configure.ac @@ -415,6 +415,11 @@ if test x$ac_cv_c_compiler_gnu = xyes ; then XIPH_ADD_CFLAGS([-fgnu89-inline]) fi + if test "x$asm_optimisation" = "xyes" ; then + XIPH_ADD_CFLAGS([-msse]) + XIPH_ADD_CFLAGS([-msse2]) + fi + fi XIPH_ADD_CFLAGS([-Wextra]) diff --git a/src/libFLAC/cpu.c b/src/libFLAC/cpu.c index ec9704a7..493e1398 100644 --- a/src/libFLAC/cpu.c +++ b/src/libFLAC/cpu.c @@ -430,7 +430,7 @@ void FLAC__cpu_info(FLAC__CPUInfo *info) #endif } -#if defined FLAC__CPU_X86_64 && defined FLAC__HAS_X86INTRIN +#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN #if defined _MSC_VER && (_MSC_VER >= 1400) #include /* for __cpuid() */ @@ -456,4 +456,4 @@ void FLAC__cpu_info_x86(FLAC__uint32 *flags_edx, FLAC__uint32 *flags_ecx) *flags_ecx = *flags_edx = 0; #endif } -#endif /* FLAC__HAS_X86INTRIN && FLAC__CPU_X86_64 */ +#endif /* (FLAC__CPU_IA32 || FLAC__HAS_X86INTRIN) && FLAC__CPU_X86_64 */ diff --git a/src/libFLAC/include/private/cpu.h b/src/libFLAC/include/private/cpu.h index 9501694c..4bfe1ae3 100644 --- a/src/libFLAC/include/private/cpu.h +++ b/src/libFLAC/include/private/cpu.h @@ -91,7 +91,7 @@ void FLAC__cpu_info_asm_ia32(FLAC__uint32 *flags_edx, FLAC__uint32 *flag FLAC__uint32 FLAC__cpu_info_extended_amd_asm_ia32(void); #endif -#if defined FLAC__CPU_X86_64 && defined FLAC__HAS_X86INTRIN +#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN void FLAC__cpu_info_x86(FLAC__uint32 *flags_edx, FLAC__uint32 *flags_ecx); #endif diff --git a/src/libFLAC/include/private/lpc.h b/src/libFLAC/include/private/lpc.h index caee2b58..e3b2e389 100644 --- a/src/libFLAC/include/private/lpc.h +++ b/src/libFLAC/include/private/lpc.h @@ -79,13 +79,11 @@ void FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_16(const FLAC__real data void FLAC__lpc_compute_autocorrelation_asm_ia32_3dnow(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]); # endif # endif -# if defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64 -# ifdef FLAC__HAS_X86INTRIN +# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]); void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]); void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]); void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]); -# endif # endif #endif @@ -156,10 +154,8 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32(const FLAC__int32 void FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]); # endif # endif -# if defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64 -# ifdef FLAC__HAS_X86INTRIN +# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]); -# endif # endif #endif diff --git a/src/libFLAC/lpc_x86intrin.c b/src/libFLAC/lpc_x86intrin.c index b16f3a5f..ba26847c 100644 --- a/src/libFLAC/lpc_x86intrin.c +++ b/src/libFLAC/lpc_x86intrin.c @@ -36,7 +36,7 @@ #ifndef FLAC__INTEGER_ONLY_LIBRARY #ifndef FLAC__NO_ASM -#if defined FLAC__CPU_X86_64 && defined FLAC__HAS_X86INTRIN +#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN #include "FLAC/assert.h" #include "FLAC/format.h" @@ -561,6 +561,6 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2(const FLAC_ } } -#endif /* FLAC__CPU_X86_64 && FLAC__HAS_X86INTRIN */ +#endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */ #endif /* FLAC__NO_ASM */ #endif /* FLAC__INTEGER_ONLY_LIBRARY */