Compile libFLAC with associative floating-point math

2022-10-07 09:14:03 +02:00 · 2022-10-07 09:14:03 +02:00 · 06769de241
parent f64f3782f0
commit 06769de241
6 changed files with 75 additions and 15 deletions
--- a/configure.ac
+++ b/configure.ac
@ -598,6 +598,9 @@ fi

 AC_SUBST(GIT_COMMIT_VERSION_HASH)

+AX_CHECK_COMPILE_FLAG([-fassociative-math -fno-signed-zeros -fno-trapping-math -freciprocal-math],HAVE_ASSOC_MATH=yes)
+AM_CONDITIONAL(ASSOC_MATH_AVAILABLE, test "x$HAVE_ASSOC_MATH" = "xyes")
+

 AC_CONFIG_FILES([ \
 	Makefile \
--- a/m4/ax_check_compile_flag.m4
+++ b/m4/ax_check_compile_flag.m4
@ -0,0 +1,53 @@
+# ===========================================================================
+#  https://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT])
+#
+# DESCRIPTION
+#
+#   Check whether the given FLAG works with the current language's compiler
+#   or gives an error.  (Warnings, however, are ignored)
+#
+#   ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on
+#   success/failure.
+#
+#   If EXTRA-FLAGS is defined, it is added to the current language's default
+#   flags (e.g. CFLAGS) when the check is done.  The check is thus made with
+#   the flags: "CFLAGS EXTRA-FLAGS FLAG".  This can for example be used to
+#   force the compiler to issue an error when a bad flag is given.
+#
+#   INPUT gives an alternative input source to AC_COMPILE_IFELSE.
+#
+#   NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this
+#   macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG.
+#
+# LICENSE
+#
+#   Copyright (c) 2008 Guido U. Draheim <guidod@gmx.de>
+#   Copyright (c) 2011 Maarten Bosmans <mkbosmans@gmail.com>
+#
+#   Copying and distribution of this file, with or without modification, are
+#   permitted in any medium without royalty provided the copyright notice
+#   and this notice are preserved.  This file is offered as-is, without any
+#   warranty.
+
+#serial 6
+
+AC_DEFUN([AX_CHECK_COMPILE_FLAG],
+[AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF
+AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl
+AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [
+  ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS
+  _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1"
+  AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])],
+    [AS_VAR_SET(CACHEVAR,[yes])],
+    [AS_VAR_SET(CACHEVAR,[no])])
+  _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])
+AS_VAR_IF(CACHEVAR,yes,
+  [m4_default([$2], :)],
+  [m4_default([$3], :)])
+AS_VAR_POPDEF([CACHEVAR])dnl
+])dnl AX_CHECK_COMPILE_FLAGS
--- a/src/libFLAC/CMakeLists.txt
+++ b/src/libFLAC/CMakeLists.txt
@ -21,11 +21,7 @@ if(FLAC__CPU_X86_64 OR FLAC__CPU_IA32)
    set(FLAC__ALIGN_MALLOC_DATA 1)
    option(WITH_AVX "Enable AVX, AVX2 optimizations (with runtime detection, resulting binary does not require AVX2)" ON)
    if(WITH_AVX AND MSVC)
-        set_source_files_properties(lpc_intrin_avx2.c stream_encoder_intrin_avx2.c PROPERTIES COMPILE_FLAGS /arch:AVX2)
-        set_source_files_properties(lpc_intrin_fma.c PROPERTIES COMPILE_FLAGS "/arch:AVX2 /fp:fast")
-    endif()
-    if(WITH_AVX AND (CMAKE_C_COMPILER_ID MATCHES "Clang"))
-        set_source_files_properties(lpc_intrin_fma.c PROPERTIES COMPILE_FLAGS "-ffast-math")
+        set_source_files_properties(lpc_intrin_avx2.c stream_encoder_intrin_avx2.c lpc_intrin_fma.c PROPERTIES COMPILE_FLAGS /arch:AVX2)
    endif()
 else()
    check_cpu_arch_ppc64(FLAC__CPU_PPC64)
@ -132,8 +128,17 @@ if(BUILD_SHARED_LIBS)
    endif()
 endif()

-add_library(FLAC::FLAC ALIAS FLAC)
+check_c_compiler_flag("-fassociative-math -fno-signed-zeros -fno-trapping-math -freciprocal-math" HAVE_ASSOC_MATH)

+if(MSVC)
+        target_compile_options(FLAC BEFORE PRIVATE "/fp:fast")
+else()
+    if(HAVE_ASSOC_MATH)
+        target_compile_options(FLAC BEFORE PRIVATE -fassociative-math -fno-signed-zeros -fno-trapping-math -freciprocal-math)
+    endif()
+endif()
+
+add_library(FLAC::FLAC ALIAS FLAC)

 install(TARGETS FLAC EXPORT targets
    ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}/"
--- a/src/libFLAC/Makefile.am
+++ b/src/libFLAC/Makefile.am
@ -49,7 +49,11 @@ endif
 endif
 endif

-AM_CFLAGS = $(DEBUGCFLAGS) $(CPUCFLAGS) @OGG_CFLAGS@
+if ASSOC_MATH_AVAILABLE
+ASSOCMATHCFLAGS = -fassociative-math -fno-signed-zeros -fno-trapping-math -freciprocal-math
+endif
+
+AM_CFLAGS = $(DEBUGCFLAGS) $(CPUCFLAGS) ${ASSOCMATHCFLAGS} @OGG_CFLAGS@

 if FLaC__NO_ASM
 else
--- a/src/libFLAC/include/private/cpu.h
+++ b/src/libFLAC/include/private/cpu.h
@ -63,7 +63,6 @@
 /* SSE intrinsics support by ICC/MSVC/GCC */
 #if defined __INTEL_COMPILER
  #define FLAC__SSE_TARGET(x)
-  #define FLAC__FAST_MATH_TARGET(x)
  #define FLAC__SSE_SUPPORTED 1
  #define FLAC__SSE2_SUPPORTED 1
  #if (__INTEL_COMPILER >= 1000) /* Intel C++ Compiler 10.0 */
@ -81,7 +80,6 @@
  #endif
 #elif defined __clang__ && __has_attribute(__target__) /* clang */
  #define FLAC__SSE_TARGET(x) __attribute__ ((__target__ (x)))
-  #define FLAC__FAST_MATH_TARGET(x) __attribute__ ((__target__ (x)))
  #define FLAC__SSE_SUPPORTED 1
  #define FLAC__SSE2_SUPPORTED 1
  #define FLAC__SSSE3_SUPPORTED 1
@ -93,7 +91,6 @@
  #endif
 #elif defined __GNUC__ && !defined __clang__ && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9)) /* GCC 4.9+ */
  #define FLAC__SSE_TARGET(x) __attribute__ ((__target__ (x)))
-  #define FLAC__FAST_MATH_TARGET(x) __attribute__ ((__target__ (x), optimize("-ffast-math")))
  #define FLAC__SSE_SUPPORTED 1
  #define FLAC__SSE2_SUPPORTED 1
  #define FLAC__SSSE3_SUPPORTED 1
@ -105,7 +102,6 @@
  #endif
 #elif defined _MSC_VER
  #define FLAC__SSE_TARGET(x)
-  #define FLAC__FAST_MATH_TARGET(x)
  #define FLAC__SSE_SUPPORTED 1
  #define FLAC__SSE2_SUPPORTED 1
  #if (_MSC_VER >= 1500) /* MS Visual Studio 2008 */
@ -123,7 +119,6 @@
  #endif
 #else
  #define FLAC__SSE_TARGET(x)
-  #define FLAC__FAST_MATH_TARGET(x)
  #ifdef __SSE__
    #define FLAC__SSE_SUPPORTED 1
  #endif
--- a/src/libFLAC/lpc_intrin_fma.c
+++ b/src/libFLAC/lpc_intrin_fma.c
@ -43,7 +43,7 @@

 #include "FLAC/assert.h"

-FLAC__FAST_MATH_TARGET("fma")
+FLAC__SSE_TARGET("fma")
 void FLAC__lpc_compute_autocorrelation_intrin_fma_lag_8(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
 {
 #undef MAX_LAG
@ -51,14 +51,14 @@ void FLAC__lpc_compute_autocorrelation_intrin_fma_lag_8(const FLAC__real data[],
 #include "deduplication/lpc_compute_autocorrelation_intrin.c"
 }

-FLAC__FAST_MATH_TARGET("fma")
+FLAC__SSE_TARGET("fma")
 void FLAC__lpc_compute_autocorrelation_intrin_fma_lag_12(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
 {
 #undef MAX_LAG
 #define MAX_LAG 12
 #include "deduplication/lpc_compute_autocorrelation_intrin.c"
 }
-FLAC__FAST_MATH_TARGET("fma")
+FLAC__SSE_TARGET("fma")
 void FLAC__lpc_compute_autocorrelation_intrin_fma_lag_16(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
 {
 #undef MAX_LAG