mirror of https://github.com/xiph/flac
Remove all PPC-specific code (and a mention of SPARC)
As it turns out that the PPC-specific code provides no benefit with modern, autovectorizing compilers, and given that there is a lot of cruft surrounding the PPC-specific parts, remove all of it. See also https://lists.xiph.org/pipermail/flac-dev/2022-December/006620.html
This commit is contained in:
parent
fe0bf2309a
commit
ba5172e554
|
@ -43,12 +43,8 @@ EXTRA_DIST = \
|
|||
flac-config.cmake.in \
|
||||
cmake/CheckA64NEON.c.in \
|
||||
cmake/CheckA64NEON.cmake \
|
||||
cmake/CheckAttribute.c.in \
|
||||
cmake/CheckAttribute.cmake \
|
||||
cmake/CheckCPUArch.c.in \
|
||||
cmake/CheckCPUArch.cmake \
|
||||
cmake/CheckVSX.c.in \
|
||||
cmake/CheckVSX.cmake \
|
||||
cmake/FindOgg.cmake \
|
||||
cmake/UseSystemExtensions.cmake \
|
||||
CHANGELOG.md \
|
||||
|
|
|
@ -1,6 +0,0 @@
|
|||
int main (void)
|
||||
{
|
||||
void foo(void) __attribute__ ((@CHECK_ATTRIBUTE@));
|
||||
;
|
||||
return 0;
|
||||
}
|
|
@ -1,23 +0,0 @@
|
|||
macro(_CHECK_ATTRIBUTE ATTRIBUTE VARIABLE)
|
||||
if(NOT DEFINED HAVE_${VARIABLE})
|
||||
message(STATUS "Check for __attribute__ ((${ATTRIBUTE})) ")
|
||||
set(CHECK_ATTRIBUTE ${ATTRIBUTE})
|
||||
configure_file(${PROJECT_SOURCE_DIR}/cmake/CheckAttribute.c.in ${PROJECT_BINARY_DIR}/CMakeFiles/CMakeTmp/CheckAttribute.c @ONLY)
|
||||
try_compile(HAVE_${VARIABLE} "${PROJECT_BINARY_DIR}"
|
||||
"${PROJECT_BINARY_DIR}/CMakeFiles/CMakeTmp/CheckAttribute.c")
|
||||
if(HAVE_${VARIABLE})
|
||||
message(STATUS "Check for __attribute__ ((${ATTRIBUTE})) - yes")
|
||||
set(${VARIABLE} 1 CACHE INTERNAL "Result of CHECK_ATTRIBUTE ${ATTRIBUTE}" FORCE)
|
||||
else ()
|
||||
message(STATUS "Check for __attribute__ ((${ATTRIBUTE})) - no")
|
||||
endif()
|
||||
endif ()
|
||||
endmacro(_CHECK_ATTRIBUTE)
|
||||
|
||||
macro(CHECK_ATTRIBUTE_POWER8 VARIABLE)
|
||||
_CHECK_ATTRIBUTE("target(\"cpu=power8\")" ${VARIABLE})
|
||||
endmacro(CHECK_ATTRIBUTE_POWER8)
|
||||
|
||||
macro(CHECK_ATTRIBUTE_POWER9 VARIABLE)
|
||||
_CHECK_ATTRIBUTE("target(\"cpu=power9\")" ${VARIABLE})
|
||||
endmacro(CHECK_ATTRIBUTE_POWER9)
|
|
@ -22,10 +22,6 @@ macro(CHECK_CPU_ARCH_X86 VARIABLE)
|
|||
_CHECK_CPU_ARCH(x86 "defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) ||defined( __i386) || defined(_M_IX86)" ${VARIABLE})
|
||||
endmacro(CHECK_CPU_ARCH_X86)
|
||||
|
||||
macro(CHECK_CPU_ARCH_PPC64 VARIABLE)
|
||||
_CHECK_CPU_ARCH(ppc64 "defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) ||defined(_ARCH_PPC64)" ${VARIABLE})
|
||||
endmacro(CHECK_CPU_ARCH_PPC64)
|
||||
|
||||
macro(CHECK_CPU_ARCH_ARM64 VARIABLE)
|
||||
_CHECK_CPU_ARCH(arm64 "defined(__aarch64__) || defined(__arm64__)" ${VARIABLE})
|
||||
endmacro(CHECK_CPU_ARCH_ARM64)
|
||||
|
|
|
@ -1,6 +0,0 @@
|
|||
#include <altivec.h>
|
||||
int main (void)
|
||||
{
|
||||
vector float d = {0.0f,0.0f,0.0f,0.0f};
|
||||
vec_doubleh(d);
|
||||
}
|
|
@ -1,14 +0,0 @@
|
|||
macro(CHECK_VSX VARIABLE)
|
||||
if(NOT DEFINED HAVE_${VARIABLE})
|
||||
message(STATUS "Check whether VSX can be used")
|
||||
configure_file(${PROJECT_SOURCE_DIR}/cmake/CheckVSX.c.in ${PROJECT_BINARY_DIR}/CMakeFiles/CMakeTmp/CheckVSX.c @ONLY)
|
||||
try_compile(HAVE_${VARIABLE} "${PROJECT_BINARY_DIR}"
|
||||
"${PROJECT_BINARY_DIR}/CMakeFiles/CMakeTmp/CheckVSX.c")
|
||||
if(HAVE_${VARIABLE})
|
||||
message(STATUS "Check whether VSX can be used - yes")
|
||||
set(${VARIABLE} 1 CACHE INTERNAL "Result of CHECK_VSX" FORCE)
|
||||
else ()
|
||||
message(STATUS "Check whether VSX can be used - no")
|
||||
endif()
|
||||
endif ()
|
||||
endmacro(CHECK_VSX)
|
|
@ -9,12 +9,6 @@
|
|||
/* Target processor ARM64 */
|
||||
#cmakedefine FLAC__CPU_ARM64
|
||||
|
||||
/* Target processor PPC */
|
||||
#cmakedefine FLAC__CPU_PPC
|
||||
|
||||
/* Target processor PPC64 */
|
||||
#cmakedefine FLAC__CPU_PPC64
|
||||
|
||||
/* Set FLAC__BYTES_PER_WORD to 8 (4 is the default) */
|
||||
#cmakedefine01 ENABLE_64_BIT_WORDS
|
||||
|
||||
|
@ -28,12 +22,6 @@
|
|||
#cmakedefine01 OGG_FOUND
|
||||
#define FLAC__HAS_OGG OGG_FOUND
|
||||
|
||||
/* define if compiler has __attribute__((target("cpu=power8"))) support */
|
||||
#cmakedefine FLAC__HAS_TARGET_POWER8
|
||||
|
||||
/* define if compiler has __attribute__((target("cpu=power9"))) support */
|
||||
#cmakedefine FLAC__HAS_TARGET_POWER9
|
||||
|
||||
/* Set to 1 if <x86intrin.h> is available. */
|
||||
#cmakedefine01 FLAC__HAS_X86INTRIN
|
||||
|
||||
|
@ -49,18 +37,12 @@
|
|||
/* define if building for Linux */
|
||||
#cmakedefine FLAC__SYS_LINUX
|
||||
|
||||
/* define to enable use of Altivec instructions */
|
||||
#cmakedefine FLAC__USE_ALTIVEC
|
||||
|
||||
/* define to enable use of AVX instructions */
|
||||
#cmakedefine WITH_AVX
|
||||
#ifdef WITH_AVX
|
||||
#define FLAC__USE_AVX
|
||||
#endif
|
||||
|
||||
/* define to enable use of VSX instructions */
|
||||
#cmakedefine FLAC__USE_VSX
|
||||
|
||||
/* Define to the commit date of the current git HEAD */
|
||||
#cmakedefine GIT_COMMIT_DATE "@GIT_COMMIT_DATE@"
|
||||
|
||||
|
|
78
configure.ac
78
configure.ac
|
@ -126,36 +126,15 @@ case "$host_cpu" in
|
|||
AC_DEFINE(FLAC__CPU_IA32)
|
||||
AH_TEMPLATE(FLAC__CPU_IA32, [define if building for ia32/i386])
|
||||
;;
|
||||
powerpc64|powerpc64le)
|
||||
cpu_ppc64=true
|
||||
cpu_ppc=true
|
||||
AC_DEFINE(FLAC__CPU_PPC)
|
||||
AH_TEMPLATE(FLAC__CPU_PPC, [define if building for PowerPC])
|
||||
AC_DEFINE(FLAC__CPU_PPC64)
|
||||
AH_TEMPLATE(FLAC__CPU_PPC64, [define if building for PowerPC64])
|
||||
;;
|
||||
powerpc|powerpcle)
|
||||
cpu_ppc=true
|
||||
AC_DEFINE(FLAC__CPU_PPC)
|
||||
AH_TEMPLATE(FLAC__CPU_PPC, [define if building for PowerPC])
|
||||
;;
|
||||
arm64|aarch64)
|
||||
cpu_arm64=true
|
||||
AC_DEFINE(FLAC__CPU_ARM64)
|
||||
AH_TEMPLATE(FLAC__CPU_ARM64, [define if building for ARM])
|
||||
;;
|
||||
sparc)
|
||||
cpu_sparc=true
|
||||
AC_DEFINE(FLAC__CPU_SPARC)
|
||||
AH_TEMPLATE(FLAC__CPU_SPARC, [define if building for SPARC])
|
||||
;;
|
||||
esac
|
||||
AM_CONDITIONAL(FLAC__CPU_X86_64, test "x$cpu_x86_64" = xtrue)
|
||||
AM_CONDITIONAL(FLaC__CPU_IA32, test "x$cpu_ia32" = xtrue)
|
||||
AM_CONDITIONAL(FLaC__CPU_PPC, test "x$cpu_ppc" = xtrue)
|
||||
AM_CONDITIONAL(FLaC__CPU_PPC64, test "x$cpu_ppc64" = xtrue)
|
||||
AM_CONDITIONAL(FLAC__CPU_ARM64, test "x$cpu_arm64" = xtrue)
|
||||
AM_CONDITIONAL(FLaC__CPU_SPARC, test "x$cpu_sparc" = xtrue)
|
||||
|
||||
if test "x$ac_cv_header_x86intrin_h" = xyes -a "x$asm_opt" = xyes; then
|
||||
AC_DEFINE([FLAC__HAS_X86INTRIN], 1, [Set to 1 if <x86intrin.h> is available.])
|
||||
|
@ -183,36 +162,6 @@ else
|
|||
AC_DEFINE([FLAC__HAS_NEONINTRIN], 0)
|
||||
fi
|
||||
|
||||
if test x"$cpu_ppc64" = xtrue -a "x$asm_opt" = xyes ; then
|
||||
|
||||
AC_C_ATTRIBUTE([target("cpu=power8")],
|
||||
[have_cpu_power8=yes],
|
||||
[have_cpu_power8=no])
|
||||
if test x"$have_cpu_power8" = xyes ; then
|
||||
AC_DEFINE(FLAC__HAS_TARGET_POWER8)
|
||||
AH_TEMPLATE(FLAC__HAS_TARGET_POWER8, [define if compiler has __attribute__((target("cpu=power8"))) support])
|
||||
fi
|
||||
|
||||
AC_C_ATTRIBUTE([target("cpu=power9")],
|
||||
[have_cpu_power9=yes],
|
||||
[have_cpu_power9=no])
|
||||
if test x"$have_cpu_power9" = xyes ; then
|
||||
AC_DEFINE(FLAC__HAS_TARGET_POWER9)
|
||||
AH_TEMPLATE(FLAC__HAS_TARGET_POWER9, [define if compiler has __attribute__((target("cpu=power9"))) support])
|
||||
fi
|
||||
|
||||
if test x"$have_cpu_power8" = xyes || test x"$have_cpu_power9" = xyes ; then
|
||||
AC_MSG_CHECKING([whether altivec.h has vec_doubleh()])
|
||||
AC_COMPILE_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <altivec.h>]],
|
||||
[[vector float d = {0.0f,0.0f,0.0f,0.0f}; vec_doubleh(d);]])],
|
||||
[AC_MSG_RESULT([yes])
|
||||
has_vec_doubleh=true],
|
||||
[AC_MSG_RESULT([no])])
|
||||
fi
|
||||
|
||||
fi
|
||||
|
||||
case "$host" in
|
||||
i386-*-openbsd3.[[0-3]]) OBJ_FORMAT=aoutb ;;
|
||||
*-*-cygwin|*mingw*) OBJ_FORMAT=win32 ;;
|
||||
|
@ -254,33 +203,6 @@ fi
|
|||
|
||||
AM_CONDITIONAL([DEBUG], [test "x${ax_enable_debug}" = "xyes" || test "x${ax_enable_debug}" = "xinfo"])
|
||||
|
||||
AC_ARG_ENABLE(altivec,
|
||||
AS_HELP_STRING([--disable-altivec],[Disable use of Altivec instructions]),
|
||||
[case "${enableval}" in
|
||||
yes) use_altivec=true ;;
|
||||
no) use_altivec=false ;;
|
||||
*) AC_MSG_ERROR(bad value ${enableval} for --enable-altivec) ;;
|
||||
esac],[use_altivec=true])
|
||||
AM_CONDITIONAL(FLaC__USE_ALTIVEC, test "x$use_altivec" = xtrue)
|
||||
if test "x$use_altivec" = xtrue ; then
|
||||
AC_DEFINE(FLAC__USE_ALTIVEC)
|
||||
AH_TEMPLATE(FLAC__USE_ALTIVEC, [define to enable use of Altivec instructions])
|
||||
fi
|
||||
|
||||
AC_ARG_ENABLE(vsx,
|
||||
AS_HELP_STRING([--disable-vsx],[Disable VSX optimizations]),
|
||||
[case "${enableval}" in
|
||||
yes) use_vsx=true ;;
|
||||
no) use_vsx=false ;;
|
||||
*) AC_MSG_ERROR(bad value ${enableval} for --enable-vsx) ;;
|
||||
esac],[use_vsx=true])
|
||||
AM_CONDITIONAL(FLaC__USE_VSX, test "x$use_vsx" = xtrue)
|
||||
if test "x$use_vsx$has_vec_doubleh" = xtruetrue ; then
|
||||
AC_DEFINE(FLAC__USE_VSX)
|
||||
AH_TEMPLATE(FLAC__USE_VSX, [define to enable use of VSX instructions])
|
||||
asm_optimisation=yes
|
||||
fi
|
||||
|
||||
AC_ARG_ENABLE(avx,
|
||||
AS_HELP_STRING([--disable-avx],[Disable AVX, AVX2 optimizations. There is runtime detection of CPU features, so disabling is only necessary when a compiler does not know about them]),
|
||||
[case "${enableval}" in
|
||||
|
|
|
@ -8,8 +8,6 @@ check_function_exists(lround HAVE_LROUND)
|
|||
|
||||
include(CheckCSourceCompiles)
|
||||
include(CheckCPUArch)
|
||||
include(CheckAttribute)
|
||||
include(CheckVSX)
|
||||
include(CheckA64NEON)
|
||||
|
||||
check_cpu_arch_x64(FLAC__CPU_X86_64)
|
||||
|
@ -24,17 +22,9 @@ if(FLAC__CPU_X86_64 OR FLAC__CPU_IA32)
|
|||
set_source_files_properties(lpc_intrin_avx2.c stream_encoder_intrin_avx2.c lpc_intrin_fma.c PROPERTIES COMPILE_FLAGS /arch:AVX2)
|
||||
endif()
|
||||
else()
|
||||
check_cpu_arch_ppc64(FLAC__CPU_PPC64)
|
||||
if(FLAC__CPU_PPC64)
|
||||
set(FLAC__CPU_PPC 1 CACHE INTERNAL "Set because FLAC__CPU_PPC64 depends on it" FORCE)
|
||||
check_attribute_power8(FLAC__HAS_TARGET_POWER8)
|
||||
check_attribute_power9(FLAC__HAS_TARGET_POWER9)
|
||||
check_vsx(FLAC__USE_VSX)
|
||||
else()
|
||||
check_cpu_arch_arm64(FLAC__CPU_ARM64)
|
||||
if(FLAC__CPU_ARM64)
|
||||
check_a64neon(FLAC__HAS_A64NEONINTRIN)
|
||||
endif()
|
||||
check_cpu_arch_arm64(FLAC__CPU_ARM64)
|
||||
if(FLAC__CPU_ARM64)
|
||||
check_a64neon(FLAC__HAS_A64NEONINTRIN)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@ -61,7 +51,6 @@ add_library(FLAC
|
|||
lpc_intrin_sse41.c
|
||||
lpc_intrin_avx2.c
|
||||
lpc_intrin_fma.c
|
||||
lpc_intrin_vsx.c
|
||||
md5.c
|
||||
memory.c
|
||||
metadata_iterators.c
|
||||
|
|
|
@ -40,21 +40,7 @@ ASSOCMATHCFLAGS = -fassociative-math -fno-signed-zeros -fno-trapping-math -freci
|
|||
endif
|
||||
endif
|
||||
|
||||
# FIXME: The following logic should be part of configure, not of Makefile.am
|
||||
|
||||
if FLaC__CPU_PPC
|
||||
if FLaC__SYS_DARWIN
|
||||
CPUCFLAGS = -faltivec
|
||||
else
|
||||
CPUCFLAGS =
|
||||
if FLaC__USE_ALTIVEC
|
||||
CPUCFLAGS += -maltivec -mabi=altivec
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
AM_CFLAGS = $(DEBUGCFLAGS) $(CPUCFLAGS) ${ASSOCMATHCFLAGS} @OGG_CFLAGS@
|
||||
AM_CFLAGS = $(DEBUGCFLAGS) ${ASSOCMATHCFLAGS} @OGG_CFLAGS@
|
||||
|
||||
libFLAC_la_LIBADD = @OGG_LIBS@ -lm
|
||||
|
||||
|
@ -72,7 +58,6 @@ EXTRA_DIST = \
|
|||
libFLAC.m4 \
|
||||
deduplication/lpc_compute_autocorrelation_intrin.c \
|
||||
deduplication/lpc_compute_autocorrelation_intrin_sse2.c \
|
||||
deduplication/lpc_compute_autocorrelation_intrin_vsx.c \
|
||||
deduplication/lpc_compute_autocorrelation_intrin_neon.c
|
||||
|
||||
if OS_IS_WINDOWS
|
||||
|
@ -106,7 +91,6 @@ libFLAC_sources = \
|
|||
lpc_intrin_sse41.c \
|
||||
lpc_intrin_avx2.c \
|
||||
lpc_intrin_fma.c \
|
||||
lpc_intrin_vsx.c \
|
||||
lpc_intrin_neon.c \
|
||||
md5.c \
|
||||
memory.c \
|
||||
|
|
|
@ -228,47 +228,6 @@ x86_cpu_info (FLAC__CPUInfo *info)
|
|||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
ppc_cpu_info (FLAC__CPUInfo *info)
|
||||
{
|
||||
#if defined FLAC__CPU_PPC
|
||||
#ifndef PPC_FEATURE2_ARCH_3_00
|
||||
#define PPC_FEATURE2_ARCH_3_00 0x00800000
|
||||
#endif
|
||||
|
||||
#ifndef PPC_FEATURE2_ARCH_2_07
|
||||
#define PPC_FEATURE2_ARCH_2_07 0x80000000
|
||||
#endif
|
||||
|
||||
#if defined (__linux__) && defined(HAVE_GETAUXVAL)
|
||||
if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_3_00) {
|
||||
info->ppc.arch_3_00 = true;
|
||||
} else if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07) {
|
||||
info->ppc.arch_2_07 = true;
|
||||
}
|
||||
#elif defined(__FreeBSD__) && defined(HAVE_SYS_AUXV_H)
|
||||
unsigned long hwcaps;
|
||||
elf_aux_info(AT_HWCAP2, &hwcaps, sizeof(hwcaps));
|
||||
if (hwcaps & PPC_FEATURE2_ARCH_3_00) {
|
||||
info->ppc.arch_3_00 = true;
|
||||
} else if (hwcaps & PPC_FEATURE2_ARCH_2_07) {
|
||||
info->ppc.arch_2_07 = true;
|
||||
}
|
||||
#elif defined(__APPLE__)
|
||||
/* no Mac OS X version supports CPU with Power AVI v2.07 or better */
|
||||
info->ppc.arch_2_07 = false;
|
||||
info->ppc.arch_3_00 = false;
|
||||
#else
|
||||
info->ppc.arch_2_07 = false;
|
||||
info->ppc.arch_3_00 = false;
|
||||
#endif
|
||||
|
||||
#else
|
||||
info->ppc.arch_2_07 = false;
|
||||
info->ppc.arch_3_00 = false;
|
||||
#endif
|
||||
}
|
||||
|
||||
void FLAC__cpu_info (FLAC__CPUInfo *info)
|
||||
{
|
||||
memset(info, 0, sizeof(*info));
|
||||
|
@ -277,8 +236,6 @@ void FLAC__cpu_info (FLAC__CPUInfo *info)
|
|||
info->type = FLAC__CPUINFO_TYPE_IA32;
|
||||
#elif defined FLAC__CPU_X86_64
|
||||
info->type = FLAC__CPUINFO_TYPE_X86_64;
|
||||
#elif defined FLAC__CPU_PPC
|
||||
info->type = FLAC__CPUINFO_TYPE_PPC;
|
||||
#else
|
||||
info->type = FLAC__CPUINFO_TYPE_UNKNOWN;
|
||||
#endif
|
||||
|
@ -288,9 +245,6 @@ void FLAC__cpu_info (FLAC__CPUInfo *info)
|
|||
case FLAC__CPUINFO_TYPE_X86_64:
|
||||
x86_cpu_info (info);
|
||||
break;
|
||||
case FLAC__CPUINFO_TYPE_PPC:
|
||||
ppc_cpu_info (info);
|
||||
break;
|
||||
default:
|
||||
info->use_asm = false;
|
||||
break;
|
||||
|
|
|
@ -1,179 +0,0 @@
|
|||
/* This code is imported several times in lpc_intrin_vsx.c with different
|
||||
* values for MAX_LAG. Comments are for MAX_LAG == 14 */
|
||||
|
||||
long i;
|
||||
long limit = (long)data_len - MAX_LAG;
|
||||
const FLAC__real *base;
|
||||
vector double d0, d1, d2, d3;
|
||||
vector double sum0 = { 0.0f, 0.0f};
|
||||
vector double sum10 = { 0.0f, 0.0f};
|
||||
vector double sum1 = { 0.0f, 0.0f};
|
||||
vector double sum11 = { 0.0f, 0.0f};
|
||||
vector double sum2 = { 0.0f, 0.0f};
|
||||
vector double sum12 = { 0.0f, 0.0f};
|
||||
vector double sum3 = { 0.0f, 0.0f};
|
||||
vector double sum13 = { 0.0f, 0.0f};
|
||||
#if MAX_LAG > 8
|
||||
vector double d4;
|
||||
vector double sum4 = { 0.0f, 0.0f};
|
||||
vector double sum14 = { 0.0f, 0.0f};
|
||||
#endif
|
||||
#if MAX_LAG > 10
|
||||
vector double d5, d6;
|
||||
vector double sum5 = { 0.0f, 0.0f};
|
||||
vector double sum15 = { 0.0f, 0.0f};
|
||||
vector double sum6 = { 0.0f, 0.0f};
|
||||
vector double sum16 = { 0.0f, 0.0f};
|
||||
#endif
|
||||
|
||||
vector float dtemp;
|
||||
|
||||
#if WORDS_BIGENDIAN
|
||||
vector unsigned long long vperm = { 0x08090A0B0C0D0E0F, 0x1011121314151617 };
|
||||
vector unsigned long long vsel = { 0x0000000000000000, 0xFFFFFFFFFFFFFFFF };
|
||||
#else
|
||||
vector unsigned long long vperm = { 0x0F0E0D0C0B0A0908, 0x1716151413121110 };
|
||||
vector unsigned long long vsel = { 0xFFFFFFFFFFFFFFFF, 0x0000000000000000 };
|
||||
#endif
|
||||
|
||||
(void) lag;
|
||||
FLAC__ASSERT(lag <= MAX_LAG);
|
||||
|
||||
base = data;
|
||||
|
||||
/* First, check whether it is possible to load
|
||||
* 16 elements at once */
|
||||
if(limit > 2){
|
||||
/* Convert all floats to doubles */
|
||||
dtemp = vec_vsx_ld(0, base);
|
||||
d0 = vec_doubleh(dtemp);
|
||||
d1 = vec_doublel(dtemp);
|
||||
dtemp = vec_vsx_ld(16, base);
|
||||
d2 = vec_doubleh(dtemp);
|
||||
d3 = vec_doublel(dtemp);
|
||||
#if MAX_LAG > 8
|
||||
dtemp = vec_vsx_ld(32, base);
|
||||
d4 = vec_doubleh(dtemp);
|
||||
#endif
|
||||
#if MAX_LAG > 10
|
||||
d5 = vec_doublel(dtemp);
|
||||
dtemp = vec_vsx_ld(48, base);
|
||||
d6 = vec_doubleh(dtemp);
|
||||
#endif
|
||||
|
||||
base += MAX_LAG;
|
||||
|
||||
/* Loop until nearing data_len */
|
||||
for (i = 0; i <= (limit-2); i += 2) {
|
||||
vector double d, dnext;
|
||||
|
||||
/* Load next 2 datapoints and convert to double
|
||||
* for lag 14 that is data[i+14] and data[i+15] */
|
||||
dtemp = vec_vsx_ld(0, base);
|
||||
dnext = vec_doubleh(dtemp);
|
||||
base += 2;
|
||||
|
||||
/* Create vector d with both elements set to the first
|
||||
* element of d0, so both elements data[i] */
|
||||
d = vec_splat(d0, 0);
|
||||
sum0 += d0 * d; // Multiply data[i] with data[i] and data[i+1]
|
||||
sum1 += d1 * d; // Multiply data[i] with data[i+2] and data[i+3]
|
||||
sum2 += d2 * d; // Multiply data[i] with data[i+4] and data[i+5]
|
||||
sum3 += d3 * d; // Multiply data[i] with data[i+6] and data[i+7]
|
||||
#if MAX_LAG > 8
|
||||
sum4 += d4 * d; // Multiply data[i] with data[i+8] and data[i+9]
|
||||
#endif
|
||||
#if MAX_LAG > 10
|
||||
sum5 += d5 * d; // Multiply data[i] with data[i+10] and data[i+11]
|
||||
sum6 += d6 * d; // Multiply data[i] with data[i+12] and data[i+13]
|
||||
#endif
|
||||
|
||||
/* Set both elements of d to data[i+1] */
|
||||
d = vec_splat(d0, 1);
|
||||
|
||||
/* Set d0 to data[i+14] and data[i+1] */
|
||||
d0 = vec_sel(d0, dnext, vsel);
|
||||
sum10 += d0 * d; /* Multiply data[i+1] with data[i+14] and data[i+1] */
|
||||
sum11 += d1 * d; /* Multiply data[i+1] with data[i+2] and data[i+3] */
|
||||
sum12 += d2 * d;
|
||||
sum13 += d3 * d;
|
||||
#if MAX_LAG > 8
|
||||
sum14 += d4 * d;
|
||||
#endif
|
||||
#if MAX_LAG > 10
|
||||
sum15 += d5 * d;
|
||||
sum16 += d6 * d; /* Multiply data[i+1] with data[i+12] and data[i+13] */
|
||||
#endif
|
||||
|
||||
/* Shift all loaded values one vector (2 elements) so the next
|
||||
* iterations aligns again */
|
||||
d0 = d1;
|
||||
d1 = d2;
|
||||
d2 = d3;
|
||||
#if MAX_LAG > 8
|
||||
d3 = d4;
|
||||
#endif
|
||||
#if MAX_LAG > 10
|
||||
d4 = d5;
|
||||
d5 = d6;
|
||||
#endif
|
||||
|
||||
#if MAX_LAG == 8
|
||||
d3 = dnext;
|
||||
#elif MAX_LAG == 10
|
||||
d4 = dnext;
|
||||
#elif MAX_LAG == 14
|
||||
d6 = dnext;
|
||||
#else
|
||||
#error "Unsupported lag";
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Because the values in sum10..sum16 do not align with
|
||||
* the values in sum0..sum6, these need to be 'left-rotated'
|
||||
* before adding them to sum0..sum6 */
|
||||
sum0 += vec_perm(sum10, sum11, (vector unsigned char)vperm);
|
||||
sum1 += vec_perm(sum11, sum12, (vector unsigned char)vperm);
|
||||
sum2 += vec_perm(sum12, sum13, (vector unsigned char)vperm);
|
||||
#if MAX_LAG > 8
|
||||
sum3 += vec_perm(sum13, sum14, (vector unsigned char)vperm);
|
||||
#endif
|
||||
#if MAX_LAG > 10
|
||||
sum4 += vec_perm(sum14, sum15, (vector unsigned char)vperm);
|
||||
sum5 += vec_perm(sum15, sum16, (vector unsigned char)vperm);
|
||||
#endif
|
||||
|
||||
#if MAX_LAG == 8
|
||||
sum3 += vec_perm(sum13, sum10, (vector unsigned char)vperm);
|
||||
#elif MAX_LAG == 10
|
||||
sum4 += vec_perm(sum14, sum10, (vector unsigned char)vperm);
|
||||
#elif MAX_LAG == 14
|
||||
sum6 += vec_perm(sum16, sum10, (vector unsigned char)vperm);
|
||||
#else
|
||||
#error "Unsupported lag";
|
||||
#endif
|
||||
}else{
|
||||
i = 0;
|
||||
}
|
||||
|
||||
/* Store result */
|
||||
vec_vsx_st(sum0, 0, autoc);
|
||||
vec_vsx_st(sum1, 16, autoc);
|
||||
vec_vsx_st(sum2, 32, autoc);
|
||||
vec_vsx_st(sum3, 48, autoc);
|
||||
#if MAX_LAG > 8
|
||||
vec_vsx_st(sum4, 64, autoc);
|
||||
#endif
|
||||
#if MAX_LAG > 10
|
||||
vec_vsx_st(sum5, 80, autoc);
|
||||
vec_vsx_st(sum6, 96, autoc);
|
||||
#endif
|
||||
|
||||
/* Process remainder of samples in a non-VSX way */
|
||||
for (; i < (long)data_len; i++) {
|
||||
uint32_t coeff;
|
||||
|
||||
FLAC__real d = data[i];
|
||||
for (coeff = 0; coeff < data_len - i; coeff++)
|
||||
autoc[coeff] += d * data[i+coeff];
|
||||
}
|
|
@ -153,7 +153,6 @@
|
|||
typedef enum {
|
||||
FLAC__CPUINFO_TYPE_IA32,
|
||||
FLAC__CPUINFO_TYPE_X86_64,
|
||||
FLAC__CPUINFO_TYPE_PPC,
|
||||
FLAC__CPUINFO_TYPE_UNKNOWN
|
||||
} FLAC__CPUInfo_Type;
|
||||
|
||||
|
@ -174,16 +173,10 @@ typedef struct {
|
|||
FLAC__bool fma;
|
||||
} FLAC__CPUInfo_x86;
|
||||
|
||||
typedef struct {
|
||||
FLAC__bool arch_3_00;
|
||||
FLAC__bool arch_2_07;
|
||||
} FLAC__CPUInfo_ppc;
|
||||
|
||||
typedef struct {
|
||||
FLAC__bool use_asm;
|
||||
FLAC__CPUInfo_Type type;
|
||||
FLAC__CPUInfo_x86 x86;
|
||||
FLAC__CPUInfo_ppc ppc;
|
||||
} FLAC__CPUInfo;
|
||||
|
||||
void FLAC__cpu_info(FLAC__CPUInfo *info);
|
||||
|
|
|
@ -87,18 +87,6 @@ void FLAC__lpc_compute_autocorrelation_intrin_fma_lag_12(const FLAC__real data[]
|
|||
void FLAC__lpc_compute_autocorrelation_intrin_fma_lag_16(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]);
|
||||
# endif
|
||||
# endif
|
||||
#if defined(FLAC__CPU_PPC64) && defined(FLAC__USE_VSX)
|
||||
#ifdef FLAC__HAS_TARGET_POWER9
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_8(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]);
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_10(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]);
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_14(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]);
|
||||
#endif
|
||||
#ifdef FLAC__HAS_TARGET_POWER8
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_8(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]);
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_10(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]);
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_14(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]);
|
||||
#endif
|
||||
#endif
|
||||
#if defined FLAC__CPU_ARM64 && FLAC__HAS_NEONINTRIN && FLAC__HAS_A64NEONINTRIN
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_neon_lag_8(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]);
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_neon_lag_10(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]);
|
||||
|
|
|
@ -1,102 +0,0 @@
|
|||
/* libFLAC - Free Lossless Audio Codec library
|
||||
* Copyright (C) 2000-2009 Josh Coalson
|
||||
* Copyright (C) 2011-2022 Xiph.Org Foundation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* - Neither the name of the Xiph.org Foundation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include <config.h>
|
||||
#endif
|
||||
|
||||
#ifndef FLAC__INTEGER_ONLY_LIBRARY
|
||||
#ifndef FLAC__NO_ASM
|
||||
#if defined(FLAC__CPU_PPC64) && defined(FLAC__USE_VSX)
|
||||
|
||||
#include "private/cpu.h"
|
||||
#include "private/lpc.h"
|
||||
#include "FLAC/assert.h"
|
||||
#include "FLAC/format.h"
|
||||
|
||||
#include <altivec.h>
|
||||
|
||||
#ifdef FLAC__HAS_TARGET_POWER8
|
||||
__attribute__((target("cpu=power8")))
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_14(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
|
||||
{
|
||||
#undef MAX_LAG
|
||||
#define MAX_LAG 14
|
||||
#include "deduplication/lpc_compute_autocorrelation_intrin_vsx.c"
|
||||
}
|
||||
|
||||
__attribute__((target("cpu=power8")))
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_10(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
|
||||
{
|
||||
#undef MAX_LAG
|
||||
#define MAX_LAG 10
|
||||
#include "deduplication/lpc_compute_autocorrelation_intrin_vsx.c"
|
||||
}
|
||||
|
||||
__attribute__((target("cpu=power8")))
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_8(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
|
||||
{
|
||||
#undef MAX_LAG
|
||||
#define MAX_LAG 8
|
||||
#include "deduplication/lpc_compute_autocorrelation_intrin_vsx.c"
|
||||
}
|
||||
#endif /* FLAC__HAS_TARGET_POWER8 */
|
||||
|
||||
#ifdef FLAC__HAS_TARGET_POWER9
|
||||
__attribute__((target("cpu=power9")))
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_14(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
|
||||
{
|
||||
#undef MAX_LAG
|
||||
#define MAX_LAG 14
|
||||
#include "deduplication/lpc_compute_autocorrelation_intrin_vsx.c"
|
||||
}
|
||||
|
||||
__attribute__((target("cpu=power9")))
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_10(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
|
||||
{
|
||||
#undef MAX_LAG
|
||||
#define MAX_LAG 10
|
||||
#include "deduplication/lpc_compute_autocorrelation_intrin_vsx.c"
|
||||
}
|
||||
|
||||
__attribute__((target("cpu=power9")))
|
||||
void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_8(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
|
||||
{
|
||||
#undef MAX_LAG
|
||||
#define MAX_LAG 8
|
||||
#include "deduplication/lpc_compute_autocorrelation_intrin_vsx.c"
|
||||
}
|
||||
#endif /* FLAC__HAS_TARGET_POWER9 */
|
||||
|
||||
#endif /* FLAC__CPU_PPC64 && FLAC__USE_VSX */
|
||||
#endif /* FLAC__NO_ASM */
|
||||
#endif /* FLAC__INTEGER_ONLY_LIBRARY */
|
|
@ -937,33 +937,6 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
|
|||
/* now override with asm where appropriate */
|
||||
#ifndef FLAC__INTEGER_ONLY_LIBRARY
|
||||
# ifndef FLAC__NO_ASM
|
||||
#if defined(FLAC__CPU_PPC64) && defined(FLAC__USE_VSX)
|
||||
#ifdef FLAC__HAS_TARGET_POWER8
|
||||
#ifdef FLAC__HAS_TARGET_POWER9
|
||||
if (encoder->private_->cpuinfo.ppc.arch_3_00) {
|
||||
if(encoder->protected_->max_lpc_order < 8)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_8;
|
||||
else if(encoder->protected_->max_lpc_order < 10)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_10;
|
||||
else if(encoder->protected_->max_lpc_order < 14)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_14;
|
||||
else
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation;
|
||||
} else
|
||||
#endif
|
||||
if (encoder->private_->cpuinfo.ppc.arch_2_07) {
|
||||
if(encoder->protected_->max_lpc_order < 8)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_8;
|
||||
else if(encoder->protected_->max_lpc_order < 10)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_10;
|
||||
else if(encoder->protected_->max_lpc_order < 14)
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_14;
|
||||
else
|
||||
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation;
|
||||
}
|
||||
#endif
|
||||
#endif /* defined(FLAC__CPU_PPC64) && defined(FLAC__USE_VSX) */
|
||||
|
||||
#if defined FLAC__CPU_ARM64 && FLAC__HAS_NEONINTRIN
|
||||
#if FLAC__HAS_A64NEONINTRIN
|
||||
if(encoder->protected_->max_lpc_order < 8)
|
||||
|
|
Loading…
Reference in New Issue