From ba5172e554fa830ebbdc8221f2631297a3b25f74 Mon Sep 17 00:00:00 2001 From: Martijn van Beurden Date: Fri, 27 Jan 2023 19:02:36 +0100 Subject: [PATCH] Remove all PPC-specific code (and a mention of SPARC) As it turns out that the PPC-specific code provides no benefit with modern, autovectorizing compilers, and given that there is a lot of cruft surrounding the PPC-specific parts, remove all of it. See also https://lists.xiph.org/pipermail/flac-dev/2022-December/006620.html --- Makefile.am | 4 - cmake/CheckAttribute.c.in | 6 - cmake/CheckAttribute.cmake | 23 --- cmake/CheckCPUArch.cmake | 4 - cmake/CheckVSX.c.in | 6 - cmake/CheckVSX.cmake | 14 -- config.cmake.h.in | 18 -- configure.ac | 78 -------- src/libFLAC/CMakeLists.txt | 17 +- src/libFLAC/Makefile.am | 18 +- src/libFLAC/cpu.c | 46 ----- .../lpc_compute_autocorrelation_intrin_vsx.c | 179 ------------------ src/libFLAC/include/private/cpu.h | 7 - src/libFLAC/include/private/lpc.h | 12 -- src/libFLAC/lpc_intrin_vsx.c | 102 ---------- src/libFLAC/stream_encoder.c | 27 --- 16 files changed, 4 insertions(+), 557 deletions(-) delete mode 100644 cmake/CheckAttribute.c.in delete mode 100644 cmake/CheckAttribute.cmake delete mode 100644 cmake/CheckVSX.c.in delete mode 100644 cmake/CheckVSX.cmake delete mode 100644 src/libFLAC/deduplication/lpc_compute_autocorrelation_intrin_vsx.c delete mode 100644 src/libFLAC/lpc_intrin_vsx.c diff --git a/Makefile.am b/Makefile.am index 4484db92..36d823a8 100644 --- a/Makefile.am +++ b/Makefile.am @@ -43,12 +43,8 @@ EXTRA_DIST = \ flac-config.cmake.in \ cmake/CheckA64NEON.c.in \ cmake/CheckA64NEON.cmake \ - cmake/CheckAttribute.c.in \ - cmake/CheckAttribute.cmake \ cmake/CheckCPUArch.c.in \ cmake/CheckCPUArch.cmake \ - cmake/CheckVSX.c.in \ - cmake/CheckVSX.cmake \ cmake/FindOgg.cmake \ cmake/UseSystemExtensions.cmake \ CHANGELOG.md \ diff --git a/cmake/CheckAttribute.c.in b/cmake/CheckAttribute.c.in deleted file mode 100644 index a3441462..00000000 --- a/cmake/CheckAttribute.c.in +++ /dev/null @@ -1,6 +0,0 @@ -int main (void) -{ - void foo(void) __attribute__ ((@CHECK_ATTRIBUTE@)); - ; - return 0; -} \ No newline at end of file diff --git a/cmake/CheckAttribute.cmake b/cmake/CheckAttribute.cmake deleted file mode 100644 index d2e1b500..00000000 --- a/cmake/CheckAttribute.cmake +++ /dev/null @@ -1,23 +0,0 @@ -macro(_CHECK_ATTRIBUTE ATTRIBUTE VARIABLE) - if(NOT DEFINED HAVE_${VARIABLE}) - message(STATUS "Check for __attribute__ ((${ATTRIBUTE})) ") - set(CHECK_ATTRIBUTE ${ATTRIBUTE}) - configure_file(${PROJECT_SOURCE_DIR}/cmake/CheckAttribute.c.in ${PROJECT_BINARY_DIR}/CMakeFiles/CMakeTmp/CheckAttribute.c @ONLY) - try_compile(HAVE_${VARIABLE} "${PROJECT_BINARY_DIR}" - "${PROJECT_BINARY_DIR}/CMakeFiles/CMakeTmp/CheckAttribute.c") - if(HAVE_${VARIABLE}) - message(STATUS "Check for __attribute__ ((${ATTRIBUTE})) - yes") - set(${VARIABLE} 1 CACHE INTERNAL "Result of CHECK_ATTRIBUTE ${ATTRIBUTE}" FORCE) - else () - message(STATUS "Check for __attribute__ ((${ATTRIBUTE})) - no") - endif() - endif () -endmacro(_CHECK_ATTRIBUTE) - -macro(CHECK_ATTRIBUTE_POWER8 VARIABLE) - _CHECK_ATTRIBUTE("target(\"cpu=power8\")" ${VARIABLE}) -endmacro(CHECK_ATTRIBUTE_POWER8) - -macro(CHECK_ATTRIBUTE_POWER9 VARIABLE) - _CHECK_ATTRIBUTE("target(\"cpu=power9\")" ${VARIABLE}) -endmacro(CHECK_ATTRIBUTE_POWER9) diff --git a/cmake/CheckCPUArch.cmake b/cmake/CheckCPUArch.cmake index 4c444363..665fa615 100644 --- a/cmake/CheckCPUArch.cmake +++ b/cmake/CheckCPUArch.cmake @@ -22,10 +22,6 @@ macro(CHECK_CPU_ARCH_X86 VARIABLE) _CHECK_CPU_ARCH(x86 "defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) ||defined( __i386) || defined(_M_IX86)" ${VARIABLE}) endmacro(CHECK_CPU_ARCH_X86) -macro(CHECK_CPU_ARCH_PPC64 VARIABLE) - _CHECK_CPU_ARCH(ppc64 "defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) ||defined(_ARCH_PPC64)" ${VARIABLE}) -endmacro(CHECK_CPU_ARCH_PPC64) - macro(CHECK_CPU_ARCH_ARM64 VARIABLE) _CHECK_CPU_ARCH(arm64 "defined(__aarch64__) || defined(__arm64__)" ${VARIABLE}) endmacro(CHECK_CPU_ARCH_ARM64) diff --git a/cmake/CheckVSX.c.in b/cmake/CheckVSX.c.in deleted file mode 100644 index b172a491..00000000 --- a/cmake/CheckVSX.c.in +++ /dev/null @@ -1,6 +0,0 @@ -#include -int main (void) -{ - vector float d = {0.0f,0.0f,0.0f,0.0f}; - vec_doubleh(d); -} \ No newline at end of file diff --git a/cmake/CheckVSX.cmake b/cmake/CheckVSX.cmake deleted file mode 100644 index 1feddb18..00000000 --- a/cmake/CheckVSX.cmake +++ /dev/null @@ -1,14 +0,0 @@ -macro(CHECK_VSX VARIABLE) - if(NOT DEFINED HAVE_${VARIABLE}) - message(STATUS "Check whether VSX can be used") - configure_file(${PROJECT_SOURCE_DIR}/cmake/CheckVSX.c.in ${PROJECT_BINARY_DIR}/CMakeFiles/CMakeTmp/CheckVSX.c @ONLY) - try_compile(HAVE_${VARIABLE} "${PROJECT_BINARY_DIR}" - "${PROJECT_BINARY_DIR}/CMakeFiles/CMakeTmp/CheckVSX.c") - if(HAVE_${VARIABLE}) - message(STATUS "Check whether VSX can be used - yes") - set(${VARIABLE} 1 CACHE INTERNAL "Result of CHECK_VSX" FORCE) - else () - message(STATUS "Check whether VSX can be used - no") - endif() - endif () -endmacro(CHECK_VSX) diff --git a/config.cmake.h.in b/config.cmake.h.in index 10efc712..acc73f08 100644 --- a/config.cmake.h.in +++ b/config.cmake.h.in @@ -9,12 +9,6 @@ /* Target processor ARM64 */ #cmakedefine FLAC__CPU_ARM64 -/* Target processor PPC */ -#cmakedefine FLAC__CPU_PPC - -/* Target processor PPC64 */ -#cmakedefine FLAC__CPU_PPC64 - /* Set FLAC__BYTES_PER_WORD to 8 (4 is the default) */ #cmakedefine01 ENABLE_64_BIT_WORDS @@ -28,12 +22,6 @@ #cmakedefine01 OGG_FOUND #define FLAC__HAS_OGG OGG_FOUND -/* define if compiler has __attribute__((target("cpu=power8"))) support */ -#cmakedefine FLAC__HAS_TARGET_POWER8 - -/* define if compiler has __attribute__((target("cpu=power9"))) support */ -#cmakedefine FLAC__HAS_TARGET_POWER9 - /* Set to 1 if is available. */ #cmakedefine01 FLAC__HAS_X86INTRIN @@ -49,18 +37,12 @@ /* define if building for Linux */ #cmakedefine FLAC__SYS_LINUX -/* define to enable use of Altivec instructions */ -#cmakedefine FLAC__USE_ALTIVEC - /* define to enable use of AVX instructions */ #cmakedefine WITH_AVX #ifdef WITH_AVX #define FLAC__USE_AVX #endif -/* define to enable use of VSX instructions */ -#cmakedefine FLAC__USE_VSX - /* Define to the commit date of the current git HEAD */ #cmakedefine GIT_COMMIT_DATE "@GIT_COMMIT_DATE@" diff --git a/configure.ac b/configure.ac index d505450f..6dcaba26 100644 --- a/configure.ac +++ b/configure.ac @@ -126,36 +126,15 @@ case "$host_cpu" in AC_DEFINE(FLAC__CPU_IA32) AH_TEMPLATE(FLAC__CPU_IA32, [define if building for ia32/i386]) ;; - powerpc64|powerpc64le) - cpu_ppc64=true - cpu_ppc=true - AC_DEFINE(FLAC__CPU_PPC) - AH_TEMPLATE(FLAC__CPU_PPC, [define if building for PowerPC]) - AC_DEFINE(FLAC__CPU_PPC64) - AH_TEMPLATE(FLAC__CPU_PPC64, [define if building for PowerPC64]) - ;; - powerpc|powerpcle) - cpu_ppc=true - AC_DEFINE(FLAC__CPU_PPC) - AH_TEMPLATE(FLAC__CPU_PPC, [define if building for PowerPC]) - ;; arm64|aarch64) cpu_arm64=true AC_DEFINE(FLAC__CPU_ARM64) AH_TEMPLATE(FLAC__CPU_ARM64, [define if building for ARM]) ;; - sparc) - cpu_sparc=true - AC_DEFINE(FLAC__CPU_SPARC) - AH_TEMPLATE(FLAC__CPU_SPARC, [define if building for SPARC]) - ;; esac AM_CONDITIONAL(FLAC__CPU_X86_64, test "x$cpu_x86_64" = xtrue) AM_CONDITIONAL(FLaC__CPU_IA32, test "x$cpu_ia32" = xtrue) -AM_CONDITIONAL(FLaC__CPU_PPC, test "x$cpu_ppc" = xtrue) -AM_CONDITIONAL(FLaC__CPU_PPC64, test "x$cpu_ppc64" = xtrue) AM_CONDITIONAL(FLAC__CPU_ARM64, test "x$cpu_arm64" = xtrue) -AM_CONDITIONAL(FLaC__CPU_SPARC, test "x$cpu_sparc" = xtrue) if test "x$ac_cv_header_x86intrin_h" = xyes -a "x$asm_opt" = xyes; then AC_DEFINE([FLAC__HAS_X86INTRIN], 1, [Set to 1 if is available.]) @@ -183,36 +162,6 @@ else AC_DEFINE([FLAC__HAS_NEONINTRIN], 0) fi -if test x"$cpu_ppc64" = xtrue -a "x$asm_opt" = xyes ; then - -AC_C_ATTRIBUTE([target("cpu=power8")], - [have_cpu_power8=yes], - [have_cpu_power8=no]) -if test x"$have_cpu_power8" = xyes ; then - AC_DEFINE(FLAC__HAS_TARGET_POWER8) - AH_TEMPLATE(FLAC__HAS_TARGET_POWER8, [define if compiler has __attribute__((target("cpu=power8"))) support]) -fi - -AC_C_ATTRIBUTE([target("cpu=power9")], - [have_cpu_power9=yes], - [have_cpu_power9=no]) -if test x"$have_cpu_power9" = xyes ; then - AC_DEFINE(FLAC__HAS_TARGET_POWER9) - AH_TEMPLATE(FLAC__HAS_TARGET_POWER9, [define if compiler has __attribute__((target("cpu=power9"))) support]) -fi - -if test x"$have_cpu_power8" = xyes || test x"$have_cpu_power9" = xyes ; then - AC_MSG_CHECKING([whether altivec.h has vec_doubleh()]) - AC_COMPILE_IFELSE( - [AC_LANG_PROGRAM([[#include ]], - [[vector float d = {0.0f,0.0f,0.0f,0.0f}; vec_doubleh(d);]])], - [AC_MSG_RESULT([yes]) - has_vec_doubleh=true], - [AC_MSG_RESULT([no])]) -fi - -fi - case "$host" in i386-*-openbsd3.[[0-3]]) OBJ_FORMAT=aoutb ;; *-*-cygwin|*mingw*) OBJ_FORMAT=win32 ;; @@ -254,33 +203,6 @@ fi AM_CONDITIONAL([DEBUG], [test "x${ax_enable_debug}" = "xyes" || test "x${ax_enable_debug}" = "xinfo"]) -AC_ARG_ENABLE(altivec, -AS_HELP_STRING([--disable-altivec],[Disable use of Altivec instructions]), -[case "${enableval}" in - yes) use_altivec=true ;; - no) use_altivec=false ;; - *) AC_MSG_ERROR(bad value ${enableval} for --enable-altivec) ;; -esac],[use_altivec=true]) -AM_CONDITIONAL(FLaC__USE_ALTIVEC, test "x$use_altivec" = xtrue) -if test "x$use_altivec" = xtrue ; then -AC_DEFINE(FLAC__USE_ALTIVEC) -AH_TEMPLATE(FLAC__USE_ALTIVEC, [define to enable use of Altivec instructions]) -fi - -AC_ARG_ENABLE(vsx, -AS_HELP_STRING([--disable-vsx],[Disable VSX optimizations]), -[case "${enableval}" in - yes) use_vsx=true ;; - no) use_vsx=false ;; - *) AC_MSG_ERROR(bad value ${enableval} for --enable-vsx) ;; -esac],[use_vsx=true]) -AM_CONDITIONAL(FLaC__USE_VSX, test "x$use_vsx" = xtrue) -if test "x$use_vsx$has_vec_doubleh" = xtruetrue ; then -AC_DEFINE(FLAC__USE_VSX) -AH_TEMPLATE(FLAC__USE_VSX, [define to enable use of VSX instructions]) -asm_optimisation=yes -fi - AC_ARG_ENABLE(avx, AS_HELP_STRING([--disable-avx],[Disable AVX, AVX2 optimizations. There is runtime detection of CPU features, so disabling is only necessary when a compiler does not know about them]), [case "${enableval}" in diff --git a/src/libFLAC/CMakeLists.txt b/src/libFLAC/CMakeLists.txt index caf0864d..55ed5ab6 100644 --- a/src/libFLAC/CMakeLists.txt +++ b/src/libFLAC/CMakeLists.txt @@ -8,8 +8,6 @@ check_function_exists(lround HAVE_LROUND) include(CheckCSourceCompiles) include(CheckCPUArch) -include(CheckAttribute) -include(CheckVSX) include(CheckA64NEON) check_cpu_arch_x64(FLAC__CPU_X86_64) @@ -24,17 +22,9 @@ if(FLAC__CPU_X86_64 OR FLAC__CPU_IA32) set_source_files_properties(lpc_intrin_avx2.c stream_encoder_intrin_avx2.c lpc_intrin_fma.c PROPERTIES COMPILE_FLAGS /arch:AVX2) endif() else() - check_cpu_arch_ppc64(FLAC__CPU_PPC64) - if(FLAC__CPU_PPC64) - set(FLAC__CPU_PPC 1 CACHE INTERNAL "Set because FLAC__CPU_PPC64 depends on it" FORCE) - check_attribute_power8(FLAC__HAS_TARGET_POWER8) - check_attribute_power9(FLAC__HAS_TARGET_POWER9) - check_vsx(FLAC__USE_VSX) - else() - check_cpu_arch_arm64(FLAC__CPU_ARM64) - if(FLAC__CPU_ARM64) - check_a64neon(FLAC__HAS_A64NEONINTRIN) - endif() + check_cpu_arch_arm64(FLAC__CPU_ARM64) + if(FLAC__CPU_ARM64) + check_a64neon(FLAC__HAS_A64NEONINTRIN) endif() endif() @@ -61,7 +51,6 @@ add_library(FLAC lpc_intrin_sse41.c lpc_intrin_avx2.c lpc_intrin_fma.c - lpc_intrin_vsx.c md5.c memory.c metadata_iterators.c diff --git a/src/libFLAC/Makefile.am b/src/libFLAC/Makefile.am index a7c8fb4a..0fc98459 100644 --- a/src/libFLAC/Makefile.am +++ b/src/libFLAC/Makefile.am @@ -40,21 +40,7 @@ ASSOCMATHCFLAGS = -fassociative-math -fno-signed-zeros -fno-trapping-math -freci endif endif -# FIXME: The following logic should be part of configure, not of Makefile.am - -if FLaC__CPU_PPC -if FLaC__SYS_DARWIN -CPUCFLAGS = -faltivec -else -CPUCFLAGS = -if FLaC__USE_ALTIVEC -CPUCFLAGS += -maltivec -mabi=altivec -endif -endif -endif - - -AM_CFLAGS = $(DEBUGCFLAGS) $(CPUCFLAGS) ${ASSOCMATHCFLAGS} @OGG_CFLAGS@ +AM_CFLAGS = $(DEBUGCFLAGS) ${ASSOCMATHCFLAGS} @OGG_CFLAGS@ libFLAC_la_LIBADD = @OGG_LIBS@ -lm @@ -72,7 +58,6 @@ EXTRA_DIST = \ libFLAC.m4 \ deduplication/lpc_compute_autocorrelation_intrin.c \ deduplication/lpc_compute_autocorrelation_intrin_sse2.c \ - deduplication/lpc_compute_autocorrelation_intrin_vsx.c \ deduplication/lpc_compute_autocorrelation_intrin_neon.c if OS_IS_WINDOWS @@ -106,7 +91,6 @@ libFLAC_sources = \ lpc_intrin_sse41.c \ lpc_intrin_avx2.c \ lpc_intrin_fma.c \ - lpc_intrin_vsx.c \ lpc_intrin_neon.c \ md5.c \ memory.c \ diff --git a/src/libFLAC/cpu.c b/src/libFLAC/cpu.c index cba0ad02..51f72ddf 100644 --- a/src/libFLAC/cpu.c +++ b/src/libFLAC/cpu.c @@ -228,47 +228,6 @@ x86_cpu_info (FLAC__CPUInfo *info) #endif } -static void -ppc_cpu_info (FLAC__CPUInfo *info) -{ -#if defined FLAC__CPU_PPC -#ifndef PPC_FEATURE2_ARCH_3_00 -#define PPC_FEATURE2_ARCH_3_00 0x00800000 -#endif - -#ifndef PPC_FEATURE2_ARCH_2_07 -#define PPC_FEATURE2_ARCH_2_07 0x80000000 -#endif - -#if defined (__linux__) && defined(HAVE_GETAUXVAL) - if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_3_00) { - info->ppc.arch_3_00 = true; - } else if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07) { - info->ppc.arch_2_07 = true; - } -#elif defined(__FreeBSD__) && defined(HAVE_SYS_AUXV_H) - unsigned long hwcaps; - elf_aux_info(AT_HWCAP2, &hwcaps, sizeof(hwcaps)); - if (hwcaps & PPC_FEATURE2_ARCH_3_00) { - info->ppc.arch_3_00 = true; - } else if (hwcaps & PPC_FEATURE2_ARCH_2_07) { - info->ppc.arch_2_07 = true; - } -#elif defined(__APPLE__) - /* no Mac OS X version supports CPU with Power AVI v2.07 or better */ - info->ppc.arch_2_07 = false; - info->ppc.arch_3_00 = false; -#else - info->ppc.arch_2_07 = false; - info->ppc.arch_3_00 = false; -#endif - -#else - info->ppc.arch_2_07 = false; - info->ppc.arch_3_00 = false; -#endif -} - void FLAC__cpu_info (FLAC__CPUInfo *info) { memset(info, 0, sizeof(*info)); @@ -277,8 +236,6 @@ void FLAC__cpu_info (FLAC__CPUInfo *info) info->type = FLAC__CPUINFO_TYPE_IA32; #elif defined FLAC__CPU_X86_64 info->type = FLAC__CPUINFO_TYPE_X86_64; -#elif defined FLAC__CPU_PPC - info->type = FLAC__CPUINFO_TYPE_PPC; #else info->type = FLAC__CPUINFO_TYPE_UNKNOWN; #endif @@ -288,9 +245,6 @@ void FLAC__cpu_info (FLAC__CPUInfo *info) case FLAC__CPUINFO_TYPE_X86_64: x86_cpu_info (info); break; - case FLAC__CPUINFO_TYPE_PPC: - ppc_cpu_info (info); - break; default: info->use_asm = false; break; diff --git a/src/libFLAC/deduplication/lpc_compute_autocorrelation_intrin_vsx.c b/src/libFLAC/deduplication/lpc_compute_autocorrelation_intrin_vsx.c deleted file mode 100644 index 721d2a0a..00000000 --- a/src/libFLAC/deduplication/lpc_compute_autocorrelation_intrin_vsx.c +++ /dev/null @@ -1,179 +0,0 @@ -/* This code is imported several times in lpc_intrin_vsx.c with different - * values for MAX_LAG. Comments are for MAX_LAG == 14 */ - -long i; -long limit = (long)data_len - MAX_LAG; -const FLAC__real *base; -vector double d0, d1, d2, d3; -vector double sum0 = { 0.0f, 0.0f}; -vector double sum10 = { 0.0f, 0.0f}; -vector double sum1 = { 0.0f, 0.0f}; -vector double sum11 = { 0.0f, 0.0f}; -vector double sum2 = { 0.0f, 0.0f}; -vector double sum12 = { 0.0f, 0.0f}; -vector double sum3 = { 0.0f, 0.0f}; -vector double sum13 = { 0.0f, 0.0f}; -#if MAX_LAG > 8 -vector double d4; -vector double sum4 = { 0.0f, 0.0f}; -vector double sum14 = { 0.0f, 0.0f}; -#endif -#if MAX_LAG > 10 -vector double d5, d6; -vector double sum5 = { 0.0f, 0.0f}; -vector double sum15 = { 0.0f, 0.0f}; -vector double sum6 = { 0.0f, 0.0f}; -vector double sum16 = { 0.0f, 0.0f}; -#endif - -vector float dtemp; - -#if WORDS_BIGENDIAN -vector unsigned long long vperm = { 0x08090A0B0C0D0E0F, 0x1011121314151617 }; -vector unsigned long long vsel = { 0x0000000000000000, 0xFFFFFFFFFFFFFFFF }; -#else -vector unsigned long long vperm = { 0x0F0E0D0C0B0A0908, 0x1716151413121110 }; -vector unsigned long long vsel = { 0xFFFFFFFFFFFFFFFF, 0x0000000000000000 }; -#endif - -(void) lag; -FLAC__ASSERT(lag <= MAX_LAG); - -base = data; - -/* First, check whether it is possible to load - * 16 elements at once */ -if(limit > 2){ - /* Convert all floats to doubles */ - dtemp = vec_vsx_ld(0, base); - d0 = vec_doubleh(dtemp); - d1 = vec_doublel(dtemp); - dtemp = vec_vsx_ld(16, base); - d2 = vec_doubleh(dtemp); - d3 = vec_doublel(dtemp); -#if MAX_LAG > 8 - dtemp = vec_vsx_ld(32, base); - d4 = vec_doubleh(dtemp); -#endif -#if MAX_LAG > 10 - d5 = vec_doublel(dtemp); - dtemp = vec_vsx_ld(48, base); - d6 = vec_doubleh(dtemp); -#endif - - base += MAX_LAG; - - /* Loop until nearing data_len */ - for (i = 0; i <= (limit-2); i += 2) { - vector double d, dnext; - - /* Load next 2 datapoints and convert to double - * for lag 14 that is data[i+14] and data[i+15] */ - dtemp = vec_vsx_ld(0, base); - dnext = vec_doubleh(dtemp); - base += 2; - - /* Create vector d with both elements set to the first - * element of d0, so both elements data[i] */ - d = vec_splat(d0, 0); - sum0 += d0 * d; // Multiply data[i] with data[i] and data[i+1] - sum1 += d1 * d; // Multiply data[i] with data[i+2] and data[i+3] - sum2 += d2 * d; // Multiply data[i] with data[i+4] and data[i+5] - sum3 += d3 * d; // Multiply data[i] with data[i+6] and data[i+7] -#if MAX_LAG > 8 - sum4 += d4 * d; // Multiply data[i] with data[i+8] and data[i+9] -#endif -#if MAX_LAG > 10 - sum5 += d5 * d; // Multiply data[i] with data[i+10] and data[i+11] - sum6 += d6 * d; // Multiply data[i] with data[i+12] and data[i+13] -#endif - - /* Set both elements of d to data[i+1] */ - d = vec_splat(d0, 1); - - /* Set d0 to data[i+14] and data[i+1] */ - d0 = vec_sel(d0, dnext, vsel); - sum10 += d0 * d; /* Multiply data[i+1] with data[i+14] and data[i+1] */ - sum11 += d1 * d; /* Multiply data[i+1] with data[i+2] and data[i+3] */ - sum12 += d2 * d; - sum13 += d3 * d; -#if MAX_LAG > 8 - sum14 += d4 * d; -#endif -#if MAX_LAG > 10 - sum15 += d5 * d; - sum16 += d6 * d; /* Multiply data[i+1] with data[i+12] and data[i+13] */ -#endif - - /* Shift all loaded values one vector (2 elements) so the next - * iterations aligns again */ - d0 = d1; - d1 = d2; - d2 = d3; -#if MAX_LAG > 8 - d3 = d4; -#endif -#if MAX_LAG > 10 - d4 = d5; - d5 = d6; -#endif - -#if MAX_LAG == 8 - d3 = dnext; -#elif MAX_LAG == 10 - d4 = dnext; -#elif MAX_LAG == 14 - d6 = dnext; -#else -#error "Unsupported lag"; -#endif - } - - /* Because the values in sum10..sum16 do not align with - * the values in sum0..sum6, these need to be 'left-rotated' - * before adding them to sum0..sum6 */ - sum0 += vec_perm(sum10, sum11, (vector unsigned char)vperm); - sum1 += vec_perm(sum11, sum12, (vector unsigned char)vperm); - sum2 += vec_perm(sum12, sum13, (vector unsigned char)vperm); -#if MAX_LAG > 8 - sum3 += vec_perm(sum13, sum14, (vector unsigned char)vperm); -#endif -#if MAX_LAG > 10 - sum4 += vec_perm(sum14, sum15, (vector unsigned char)vperm); - sum5 += vec_perm(sum15, sum16, (vector unsigned char)vperm); -#endif - -#if MAX_LAG == 8 - sum3 += vec_perm(sum13, sum10, (vector unsigned char)vperm); -#elif MAX_LAG == 10 - sum4 += vec_perm(sum14, sum10, (vector unsigned char)vperm); -#elif MAX_LAG == 14 - sum6 += vec_perm(sum16, sum10, (vector unsigned char)vperm); -#else -#error "Unsupported lag"; -#endif -}else{ - i = 0; -} - -/* Store result */ -vec_vsx_st(sum0, 0, autoc); -vec_vsx_st(sum1, 16, autoc); -vec_vsx_st(sum2, 32, autoc); -vec_vsx_st(sum3, 48, autoc); -#if MAX_LAG > 8 -vec_vsx_st(sum4, 64, autoc); -#endif -#if MAX_LAG > 10 -vec_vsx_st(sum5, 80, autoc); -vec_vsx_st(sum6, 96, autoc); -#endif - -/* Process remainder of samples in a non-VSX way */ -for (; i < (long)data_len; i++) { - uint32_t coeff; - - FLAC__real d = data[i]; - for (coeff = 0; coeff < data_len - i; coeff++) - autoc[coeff] += d * data[i+coeff]; -} diff --git a/src/libFLAC/include/private/cpu.h b/src/libFLAC/include/private/cpu.h index 4f1d16c1..5a76a753 100644 --- a/src/libFLAC/include/private/cpu.h +++ b/src/libFLAC/include/private/cpu.h @@ -153,7 +153,6 @@ typedef enum { FLAC__CPUINFO_TYPE_IA32, FLAC__CPUINFO_TYPE_X86_64, - FLAC__CPUINFO_TYPE_PPC, FLAC__CPUINFO_TYPE_UNKNOWN } FLAC__CPUInfo_Type; @@ -174,16 +173,10 @@ typedef struct { FLAC__bool fma; } FLAC__CPUInfo_x86; -typedef struct { - FLAC__bool arch_3_00; - FLAC__bool arch_2_07; -} FLAC__CPUInfo_ppc; - typedef struct { FLAC__bool use_asm; FLAC__CPUInfo_Type type; FLAC__CPUInfo_x86 x86; - FLAC__CPUInfo_ppc ppc; } FLAC__CPUInfo; void FLAC__cpu_info(FLAC__CPUInfo *info); diff --git a/src/libFLAC/include/private/lpc.h b/src/libFLAC/include/private/lpc.h index 8ff3b894..eed7b666 100644 --- a/src/libFLAC/include/private/lpc.h +++ b/src/libFLAC/include/private/lpc.h @@ -87,18 +87,6 @@ void FLAC__lpc_compute_autocorrelation_intrin_fma_lag_12(const FLAC__real data[] void FLAC__lpc_compute_autocorrelation_intrin_fma_lag_16(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]); # endif # endif -#if defined(FLAC__CPU_PPC64) && defined(FLAC__USE_VSX) -#ifdef FLAC__HAS_TARGET_POWER9 -void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_8(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]); -void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_10(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]); -void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_14(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]); -#endif -#ifdef FLAC__HAS_TARGET_POWER8 -void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_8(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]); -void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_10(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]); -void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_14(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]); -#endif -#endif #if defined FLAC__CPU_ARM64 && FLAC__HAS_NEONINTRIN && FLAC__HAS_A64NEONINTRIN void FLAC__lpc_compute_autocorrelation_intrin_neon_lag_8(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]); void FLAC__lpc_compute_autocorrelation_intrin_neon_lag_10(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]); diff --git a/src/libFLAC/lpc_intrin_vsx.c b/src/libFLAC/lpc_intrin_vsx.c deleted file mode 100644 index 3f823b99..00000000 --- a/src/libFLAC/lpc_intrin_vsx.c +++ /dev/null @@ -1,102 +0,0 @@ -/* libFLAC - Free Lossless Audio Codec library - * Copyright (C) 2000-2009 Josh Coalson - * Copyright (C) 2011-2022 Xiph.Org Foundation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * - Neither the name of the Xiph.org Foundation nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifdef HAVE_CONFIG_H -# include -#endif - -#ifndef FLAC__INTEGER_ONLY_LIBRARY -#ifndef FLAC__NO_ASM -#if defined(FLAC__CPU_PPC64) && defined(FLAC__USE_VSX) - -#include "private/cpu.h" -#include "private/lpc.h" -#include "FLAC/assert.h" -#include "FLAC/format.h" - -#include - -#ifdef FLAC__HAS_TARGET_POWER8 -__attribute__((target("cpu=power8"))) -void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_14(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]) -{ -#undef MAX_LAG -#define MAX_LAG 14 -#include "deduplication/lpc_compute_autocorrelation_intrin_vsx.c" -} - -__attribute__((target("cpu=power8"))) -void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_10(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]) -{ -#undef MAX_LAG -#define MAX_LAG 10 -#include "deduplication/lpc_compute_autocorrelation_intrin_vsx.c" -} - -__attribute__((target("cpu=power8"))) -void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_8(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]) -{ -#undef MAX_LAG -#define MAX_LAG 8 -#include "deduplication/lpc_compute_autocorrelation_intrin_vsx.c" -} -#endif /* FLAC__HAS_TARGET_POWER8 */ - -#ifdef FLAC__HAS_TARGET_POWER9 -__attribute__((target("cpu=power9"))) -void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_14(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]) -{ -#undef MAX_LAG -#define MAX_LAG 14 -#include "deduplication/lpc_compute_autocorrelation_intrin_vsx.c" -} - -__attribute__((target("cpu=power9"))) -void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_10(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]) -{ -#undef MAX_LAG -#define MAX_LAG 10 -#include "deduplication/lpc_compute_autocorrelation_intrin_vsx.c" -} - -__attribute__((target("cpu=power9"))) -void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_8(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]) -{ -#undef MAX_LAG -#define MAX_LAG 8 -#include "deduplication/lpc_compute_autocorrelation_intrin_vsx.c" -} -#endif /* FLAC__HAS_TARGET_POWER9 */ - -#endif /* FLAC__CPU_PPC64 && FLAC__USE_VSX */ -#endif /* FLAC__NO_ASM */ -#endif /* FLAC__INTEGER_ONLY_LIBRARY */ diff --git a/src/libFLAC/stream_encoder.c b/src/libFLAC/stream_encoder.c index c950f136..cba69e60 100644 --- a/src/libFLAC/stream_encoder.c +++ b/src/libFLAC/stream_encoder.c @@ -937,33 +937,6 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_( /* now override with asm where appropriate */ #ifndef FLAC__INTEGER_ONLY_LIBRARY # ifndef FLAC__NO_ASM -#if defined(FLAC__CPU_PPC64) && defined(FLAC__USE_VSX) -#ifdef FLAC__HAS_TARGET_POWER8 -#ifdef FLAC__HAS_TARGET_POWER9 - if (encoder->private_->cpuinfo.ppc.arch_3_00) { - if(encoder->protected_->max_lpc_order < 8) - encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_8; - else if(encoder->protected_->max_lpc_order < 10) - encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_10; - else if(encoder->protected_->max_lpc_order < 14) - encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_14; - else - encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation; - } else -#endif - if (encoder->private_->cpuinfo.ppc.arch_2_07) { - if(encoder->protected_->max_lpc_order < 8) - encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_8; - else if(encoder->protected_->max_lpc_order < 10) - encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_10; - else if(encoder->protected_->max_lpc_order < 14) - encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_14; - else - encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation; - } -#endif -#endif /* defined(FLAC__CPU_PPC64) && defined(FLAC__USE_VSX) */ - #if defined FLAC__CPU_ARM64 && FLAC__HAS_NEONINTRIN #if FLAC__HAS_A64NEONINTRIN if(encoder->protected_->max_lpc_order < 8)