Adds use of restrict keyword to improve encoding speed.
Restrict works very poorly in Visual Studio (much slower than without) so defined flac_restrict in share/compat.h and use that in: lpc_compute_residual...() lpc_restore_signal...() As a result, FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41() offers no advantage for 64-bit compiles and was removed from x86-64 part of stream_encoder.c Patch-from: lvqcl <lvqcl.mail@gmail.com>
This commit is contained in:
parent
a1abfa3df2
commit
cf28c0144b
@ -76,6 +76,15 @@
|
||||
#define inline __inline
|
||||
#endif
|
||||
|
||||
#if defined __INTEL_COMPILER || (defined _MSC_VER && defined _WIN64)
|
||||
/* MSVS generates VERY slow 32-bit code with __restrict */
|
||||
#define flac_restrict __restrict
|
||||
#elif defined __GNUC__
|
||||
#define flac_restrict __restrict__
|
||||
#else
|
||||
#define flac_restrict
|
||||
#endif
|
||||
|
||||
/* adjust for compilers that can't understand using LLU suffix for uint64_t literals */
|
||||
#ifdef _MSC_VER
|
||||
#define FLAC__U64L(x) x
|
||||
|
@ -260,7 +260,7 @@ int FLAC__lpc_quantize_coefficients(const FLAC__real lp_coeff[], unsigned order,
|
||||
return 0;
|
||||
}
|
||||
|
||||
void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
|
||||
void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 * flac_restrict data, unsigned data_len, const FLAC__int32 * flac_restrict qlp_coeff, unsigned order, int lp_quantization, FLAC__int32 * flac_restrict residual)
|
||||
#if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
|
||||
{
|
||||
FLAC__int64 sumo;
|
||||
@ -520,7 +520,7 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 *data, u
|
||||
}
|
||||
#endif
|
||||
|
||||
void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
|
||||
void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(const FLAC__int32 * flac_restrict data, unsigned data_len, const FLAC__int32 * flac_restrict qlp_coeff, unsigned order, int lp_quantization, FLAC__int32 * flac_restrict residual)
|
||||
#if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
|
||||
{
|
||||
unsigned i, j;
|
||||
@ -776,7 +776,7 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(const FLAC__int32 *da
|
||||
|
||||
#endif /* !defined FLAC__INTEGER_ONLY_LIBRARY */
|
||||
|
||||
void FLAC__lpc_restore_signal(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[])
|
||||
void FLAC__lpc_restore_signal(const FLAC__int32 * flac_restrict residual, unsigned data_len, const FLAC__int32 * flac_restrict qlp_coeff, unsigned order, int lp_quantization, FLAC__int32 * flac_restrict data)
|
||||
#if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
|
||||
{
|
||||
FLAC__int64 sumo;
|
||||
@ -1037,7 +1037,7 @@ void FLAC__lpc_restore_signal(const FLAC__int32 residual[], unsigned data_len, c
|
||||
}
|
||||
#endif
|
||||
|
||||
void FLAC__lpc_restore_signal_wide(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[])
|
||||
void FLAC__lpc_restore_signal_wide(const FLAC__int32 * flac_restrict residual, unsigned data_len, const FLAC__int32 * flac_restrict qlp_coeff, unsigned order, int lp_quantization, FLAC__int32 * flac_restrict data)
|
||||
#if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
|
||||
{
|
||||
unsigned i, j;
|
||||
|
@ -942,10 +942,6 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
|
||||
|
||||
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2;
|
||||
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2;
|
||||
# ifdef FLAC__SSE4_SUPPORTED
|
||||
if(encoder->private_->cpuinfo.x86_64.sse41)
|
||||
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41;
|
||||
# endif
|
||||
# endif /* FLAC__HAS_X86INTRIN */
|
||||
# endif /* FLAC__CPU_... */
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user