mirror of https://github.com/xiph/flac
Remove all assembler
This commit is contained in:
parent
0665053c5d
commit
75ef7958df
|
@ -60,7 +60,6 @@ EXTRA_DIST = \
|
|||
autogen.sh \
|
||||
config.rpath \
|
||||
depcomp \
|
||||
ltmain.sh \
|
||||
strip_non_asm_libtool_args.sh
|
||||
ltmain.sh
|
||||
|
||||
CLEANFILES = *~
|
||||
|
|
|
@ -24,9 +24,6 @@
|
|||
/* define if you have docbook-to-man or docbook2man */
|
||||
#cmakedefine FLAC__HAS_DOCBOOK_TO_MAN
|
||||
|
||||
/* define if you are compiling for x86 and have the NASM assembler */
|
||||
#cmakedefine FLAC__HAS_NASM
|
||||
|
||||
/* define if you have the ogg library */
|
||||
#cmakedefine01 OGG_FOUND
|
||||
#define FLAC__HAS_OGG OGG_FOUND
|
||||
|
|
|
@ -471,14 +471,6 @@ AC_CHECK_LIB(rt, clock_gettime,
|
|||
AH_TEMPLATE(HAVE_CLOCK_GETTIME, [define if you have clock_gettime]))
|
||||
AC_SUBST(LIB_CLOCK_GETTIME)
|
||||
|
||||
# only matters for x86
|
||||
AC_CHECK_PROGS(NASM, nasm)
|
||||
AM_CONDITIONAL(FLaC__HAS_NASM, test -n "$NASM")
|
||||
if test -n "$NASM" ; then
|
||||
AC_DEFINE(FLAC__HAS_NASM)
|
||||
AH_TEMPLATE(FLAC__HAS_NASM, [define if you are compiling for x86 and have the NASM assembler])
|
||||
fi
|
||||
|
||||
dnl If debugging is disabled AND no CFLAGS/CXXFLAGS/CPPFLAGS/LDFLAGS
|
||||
dnl are provided, we can set defaults to our liking
|
||||
AS_IF([test "x${ax_enable_debug}" = "xno" && test "x${enable_flags_setting}" = "xyes"], [
|
||||
|
@ -607,7 +599,6 @@ AC_CONFIG_FILES([ \
|
|||
src/Makefile \
|
||||
src/libFLAC/Makefile \
|
||||
src/libFLAC/flac.pc \
|
||||
src/libFLAC/ia32/Makefile \
|
||||
src/libFLAC/include/Makefile \
|
||||
src/libFLAC/include/private/Makefile \
|
||||
src/libFLAC/include/protected/Makefile \
|
||||
|
|
|
@ -38,23 +38,11 @@ else()
|
|||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
include(CheckLanguage)
|
||||
check_language(ASM_NASM)
|
||||
if(CMAKE_ASM_NASM_COMPILER)
|
||||
enable_language(ASM_NASM)
|
||||
add_definitions(-DFLAC__HAS_NASM)
|
||||
endif()
|
||||
|
||||
if(NOT WITH_ASM)
|
||||
add_definitions(-DFLAC__NO_ASM)
|
||||
endif()
|
||||
|
||||
if(FLAC__CPU_IA32)
|
||||
if(WITH_ASM AND CMAKE_ASM_NASM_COMPILER)
|
||||
add_subdirectory(ia32)
|
||||
endif()
|
||||
|
||||
option(WITH_SSE "Enable SSE2 optimizations (WITHOUT runtime detection, resulting binary requires SSE2)" ON)
|
||||
check_c_compiler_flag(-msse2 HAVE_MSSE2_FLAG)
|
||||
if(WITH_SSE)
|
||||
|
|
|
@ -55,19 +55,9 @@ endif
|
|||
|
||||
AM_CFLAGS = $(DEBUGCFLAGS) $(CPUCFLAGS) ${ASSOCMATHCFLAGS} @OGG_CFLAGS@
|
||||
|
||||
if FLaC__NO_ASM
|
||||
else
|
||||
if FLaC__CPU_IA32
|
||||
if FLaC__HAS_NASM
|
||||
ARCH_SUBDIRS = ia32
|
||||
LOCAL_EXTRA_LIBADD = ia32/libFLAC-asm.la
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
libFLAC_la_LIBADD = @OGG_LIBS@ -lm
|
||||
|
||||
libFLAC_la_LIBADD = $(LOCAL_EXTRA_LIBADD) @OGG_LIBS@ -lm
|
||||
|
||||
SUBDIRS = $(ARCH_SUBDIRS) include .
|
||||
SUBDIRS = include .
|
||||
|
||||
m4datadir = $(datadir)/aclocal
|
||||
m4data_DATA = libFLAC.m4
|
||||
|
@ -97,7 +87,7 @@ extra_ogg_sources = \
|
|||
endif
|
||||
|
||||
# see 'http://www.gnu.org/software/libtool/manual/libtool.html#Libtool-versioning' for numbering convention
|
||||
libFLAC_la_LDFLAGS = $(AM_LDFLAGS) -no-undefined -version-info 12:0:0 $(LOCAL_EXTRA_LDFLAGS)
|
||||
libFLAC_la_LDFLAGS = $(AM_LDFLAGS) -no-undefined -version-info 12:0:0
|
||||
|
||||
libFLAC_sources = \
|
||||
bitmath.c \
|
||||
|
@ -134,5 +124,4 @@ libFLAC_sources = \
|
|||
libFLAC_la_SOURCES = $(libFLAC_sources)
|
||||
|
||||
# needed for test_libFLAC
|
||||
libFLAC_static_la_LIBADD = $(LOCAL_EXTRA_LIBADD)
|
||||
libFLAC_static_la_SOURCES = $(libFLAC_sources)
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
#include <sys/auxv.h>
|
||||
#endif
|
||||
|
||||
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && (defined FLAC__HAS_NASM || FLAC__HAS_X86INTRIN) && !defined FLAC__NO_ASM
|
||||
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN && !defined FLAC__NO_ASM
|
||||
|
||||
/* these are flags in EDX of CPUID AX=00000001 */
|
||||
static const uint32_t FLAC__CPUINFO_X86_CPUID_CMOV = 0x00008000;
|
||||
|
@ -97,8 +97,6 @@ cpu_have_cpuid(void)
|
|||
#if defined FLAC__CPU_X86_64 || defined __i686__ || defined __SSE__ || (defined _M_IX86_FP && _M_IX86_FP > 0)
|
||||
/* target CPU does have CPUID instruction */
|
||||
return 1;
|
||||
#elif defined FLAC__HAS_NASM
|
||||
return FLAC__cpu_have_cpuid_asm_ia32();
|
||||
#elif defined __GNUC__ && defined HAVE_CPUID_H
|
||||
if (__get_cpuid_max(0, 0) != 0)
|
||||
return 1;
|
||||
|
@ -151,9 +149,6 @@ cpuinfo_x86(FLAC__uint32 level, FLAC__uint32 *eax, FLAC__uint32 *ebx, FLAC__uint
|
|||
__cpuid_count(level, 0, *eax, *ebx, *ecx, *edx);
|
||||
return;
|
||||
}
|
||||
#elif defined FLAC__HAS_NASM && defined FLAC__CPU_IA32
|
||||
FLAC__cpu_info_asm_ia32(level, eax, ebx, ecx, edx);
|
||||
return;
|
||||
#endif
|
||||
*eax = *ebx = *ecx = *edx = 0;
|
||||
}
|
||||
|
@ -163,7 +158,7 @@ cpuinfo_x86(FLAC__uint32 level, FLAC__uint32 *eax, FLAC__uint32 *ebx, FLAC__uint
|
|||
static void
|
||||
x86_cpu_info (FLAC__CPUInfo *info)
|
||||
{
|
||||
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && (defined FLAC__HAS_NASM || FLAC__HAS_X86INTRIN) && !defined FLAC__NO_ASM
|
||||
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN && !defined FLAC__NO_ASM
|
||||
FLAC__bool x86_osxsave = false;
|
||||
FLAC__bool os_avx = false;
|
||||
FLAC__uint32 flags_eax, flags_ebx, flags_ecx, flags_edx;
|
||||
|
|
|
@ -1,17 +0,0 @@
|
|||
include_directories("${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
add_compile_options(-I${CMAKE_CURRENT_SOURCE_DIR}/)
|
||||
|
||||
if(APPLE)
|
||||
add_compile_options(-dOBJ_FORMAT_macho)
|
||||
elseif(WIN32)
|
||||
#add_compile_options(-d OBJ_FORMAT_win32)
|
||||
# FIXME the command above doesn't seem to work on Windows
|
||||
set(CMAKE_ASM_NASM_FLAGS -dOBJ_FORMAT_win32)
|
||||
else()
|
||||
add_compile_options(-dOBJ_FORMAT_elf)
|
||||
endif()
|
||||
|
||||
add_library(FLAC-asm OBJECT
|
||||
cpu_asm.nasm
|
||||
fixed_asm.nasm
|
||||
lpc_asm.nasm)
|
|
@ -1,46 +0,0 @@
|
|||
# libFLAC - Free Lossless Audio Codec library
|
||||
# Copyright (C) 2001-2009 Josh Coalson
|
||||
# Copyright (C) 2011-2022 Xiph.Org Foundation
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# - Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
#
|
||||
# - Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# - Neither the name of the Xiph.org Foundation nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
SUFFIXES = .nasm .lo
|
||||
|
||||
STRIP_NON_ASM = sh $(top_srcdir)/strip_non_asm_libtool_args.sh
|
||||
AM_CPPFLAGS = -I$(top_builddir) -I$(srcdir)/include -I$(top_srcdir)/include
|
||||
.nasm.lo:
|
||||
$(LIBTOOL) --tag=CC --mode=compile $(STRIP_NON_ASM) $(NASM) -f $(OBJ_FORMAT) -d OBJ_FORMAT_$(OBJ_FORMAT) -i$(srcdir)/ $< -o $@
|
||||
|
||||
noinst_LTLIBRARIES = libFLAC-asm.la
|
||||
libFLAC_asm_la_SOURCES = \
|
||||
cpu_asm.nasm \
|
||||
fixed_asm.nasm \
|
||||
lpc_asm.nasm \
|
||||
nasm.h
|
||||
|
||||
EXTRA_DIST = CMakeLists.txt
|
|
@ -1,119 +0,0 @@
|
|||
; vim:filetype=nasm ts=8
|
||||
|
||||
; libFLAC - Free Lossless Audio Codec library
|
||||
; Copyright (C) 2001-2009 Josh Coalson
|
||||
; Copyright (C) 2011-2022 Xiph.Org Foundation
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
;
|
||||
; - Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
;
|
||||
; - Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in the
|
||||
; documentation and/or other materials provided with the distribution.
|
||||
;
|
||||
; - Neither the name of the Xiph.org Foundation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived from
|
||||
; this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
%include "nasm.h"
|
||||
|
||||
data_section
|
||||
|
||||
cglobal FLAC__cpu_have_cpuid_asm_ia32
|
||||
cglobal FLAC__cpu_info_asm_ia32
|
||||
|
||||
code_section
|
||||
|
||||
; **********************************************************************
|
||||
;
|
||||
; FLAC__uint32 FLAC__cpu_have_cpuid_asm_ia32()
|
||||
;
|
||||
|
||||
cident FLAC__cpu_have_cpuid_asm_ia32
|
||||
pushfd
|
||||
pop eax
|
||||
mov edx, eax
|
||||
xor eax, 0x00200000
|
||||
push eax
|
||||
popfd
|
||||
pushfd
|
||||
pop eax
|
||||
xor eax, edx
|
||||
and eax, 0x00200000
|
||||
shr eax, 0x15
|
||||
push edx
|
||||
popfd
|
||||
ret
|
||||
|
||||
|
||||
; **********************************************************************
|
||||
;
|
||||
; void FLAC__cpu_info_asm_ia32(FLAC__uint32 level, FLAC__uint32 *eax, FLAC__uint32 *ebx, FLAC__uint32 *ecx, FLAC__uint32 *edx)
|
||||
;
|
||||
|
||||
cident FLAC__cpu_info_asm_ia32
|
||||
;[esp + 8] == level
|
||||
;[esp + 12] == flags_eax
|
||||
;[esp + 16] == flags_ebx
|
||||
;[esp + 20] == flags_ecx
|
||||
;[esp + 24] == flags_edx
|
||||
|
||||
push ebx
|
||||
call FLAC__cpu_have_cpuid_asm_ia32
|
||||
test eax, eax
|
||||
jz .no_cpuid
|
||||
|
||||
mov eax, [esp + 8]
|
||||
and eax, 0x80000000
|
||||
cpuid
|
||||
cmp eax, [esp + 8]
|
||||
jb .no_cpuid
|
||||
xor ecx, ecx
|
||||
mov eax, [esp + 8]
|
||||
cpuid
|
||||
|
||||
push ebx
|
||||
;[esp + 16] == flags_eax
|
||||
mov ebx, [esp + 16]
|
||||
mov [ebx], eax
|
||||
pop eax
|
||||
;[esp + 16] == flags_ebx
|
||||
mov ebx, [esp + 16]
|
||||
mov [ebx], eax
|
||||
mov ebx, [esp + 20]
|
||||
mov [ebx], ecx
|
||||
mov ebx, [esp + 24]
|
||||
mov [ebx], edx
|
||||
jmp .end
|
||||
|
||||
.no_cpuid:
|
||||
xor eax, eax
|
||||
mov ebx, [esp + 12]
|
||||
mov [ebx], eax
|
||||
mov ebx, [esp + 16]
|
||||
mov [ebx], eax
|
||||
mov ebx, [esp + 20]
|
||||
mov [ebx], eax
|
||||
mov ebx, [esp + 24]
|
||||
mov [ebx], eax
|
||||
.end:
|
||||
pop ebx
|
||||
ret
|
||||
|
||||
; end
|
|
@ -1,309 +0,0 @@
|
|||
; vim:filetype=nasm ts=8
|
||||
|
||||
; libFLAC - Free Lossless Audio Codec library
|
||||
; Copyright (C) 2001-2009 Josh Coalson
|
||||
; Copyright (C) 2011-2022 Xiph.Org Foundation
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
;
|
||||
; - Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
;
|
||||
; - Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in the
|
||||
; documentation and/or other materials provided with the distribution.
|
||||
;
|
||||
; - Neither the name of the Xiph.org Foundation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived from
|
||||
; this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
%include "nasm.h"
|
||||
|
||||
data_section
|
||||
|
||||
cglobal FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov
|
||||
|
||||
code_section
|
||||
|
||||
; **********************************************************************
|
||||
;
|
||||
; unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 *data, unsigned data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1])
|
||||
; {
|
||||
; FLAC__int32 last_error_0 = data[-1];
|
||||
; FLAC__int32 last_error_1 = data[-1] - data[-2];
|
||||
; FLAC__int32 last_error_2 = last_error_1 - (data[-2] - data[-3]);
|
||||
; FLAC__int32 last_error_3 = last_error_2 - (data[-2] - 2*data[-3] + data[-4]);
|
||||
; FLAC__int32 error, save;
|
||||
; FLAC__uint32 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, total_error_3 = 0, total_error_4 = 0;
|
||||
; unsigned i, order;
|
||||
;
|
||||
; for(i = 0; i < data_len; i++) {
|
||||
; error = data[i] ; total_error_0 += local_abs(error); save = error;
|
||||
; error -= last_error_0; total_error_1 += local_abs(error); last_error_0 = save; save = error;
|
||||
; error -= last_error_1; total_error_2 += local_abs(error); last_error_1 = save; save = error;
|
||||
; error -= last_error_2; total_error_3 += local_abs(error); last_error_2 = save; save = error;
|
||||
; error -= last_error_3; total_error_4 += local_abs(error); last_error_3 = save;
|
||||
; }
|
||||
;
|
||||
; if(total_error_0 < min(min(min(total_error_1, total_error_2), total_error_3), total_error_4))
|
||||
; order = 0;
|
||||
; else if(total_error_1 < min(min(total_error_2, total_error_3), total_error_4))
|
||||
; order = 1;
|
||||
; else if(total_error_2 < min(total_error_3, total_error_4))
|
||||
; order = 2;
|
||||
; else if(total_error_3 < total_error_4)
|
||||
; order = 3;
|
||||
; else
|
||||
; order = 4;
|
||||
;
|
||||
; residual_bits_per_sample[0] = (float)((data_len > 0 && total_error_0 > 0) ? log(M_LN2 * (double)total_error_0 / (double)data_len) / M_LN2 : 0.0);
|
||||
; residual_bits_per_sample[1] = (float)((data_len > 0 && total_error_1 > 0) ? log(M_LN2 * (double)total_error_1 / (double)data_len) / M_LN2 : 0.0);
|
||||
; residual_bits_per_sample[2] = (float)((data_len > 0 && total_error_2 > 0) ? log(M_LN2 * (double)total_error_2 / (double)data_len) / M_LN2 : 0.0);
|
||||
; residual_bits_per_sample[3] = (float)((data_len > 0 && total_error_3 > 0) ? log(M_LN2 * (double)total_error_3 / (double)data_len) / M_LN2 : 0.0);
|
||||
; residual_bits_per_sample[4] = (float)((data_len > 0 && total_error_4 > 0) ? log(M_LN2 * (double)total_error_4 / (double)data_len) / M_LN2 : 0.0);
|
||||
;
|
||||
; return order;
|
||||
; }
|
||||
ALIGN 16
|
||||
cident FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov
|
||||
|
||||
; esp + 36 == data[]
|
||||
; esp + 40 == data_len
|
||||
; esp + 44 == residual_bits_per_sample[]
|
||||
|
||||
push ebp
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
sub esp, byte 16
|
||||
; qword [esp] == temp space for loading FLAC__uint64s to FPU regs
|
||||
|
||||
; ebx == &data[i]
|
||||
; ecx == loop counter (i)
|
||||
; ebp == order
|
||||
; mm0 == total_error_1:total_error_0
|
||||
; mm1 == total_error_2:total_error_3
|
||||
; mm2 == :total_error_4
|
||||
; mm3 == last_error_1:last_error_0
|
||||
; mm4 == last_error_2:last_error_3
|
||||
|
||||
mov ecx, [esp + 40] ; ecx = data_len
|
||||
test ecx, ecx
|
||||
jz near .data_len_is_0
|
||||
|
||||
mov ebx, [esp + 36] ; ebx = data[]
|
||||
movd mm3, [ebx - 4] ; mm3 = 0:last_error_0
|
||||
movd mm2, [ebx - 8] ; mm2 = 0:data[-2]
|
||||
movd mm1, [ebx - 12] ; mm1 = 0:data[-3]
|
||||
movd mm0, [ebx - 16] ; mm0 = 0:data[-4]
|
||||
movq mm5, mm3 ; mm5 = 0:last_error_0
|
||||
psubd mm5, mm2 ; mm5 = 0:last_error_1
|
||||
punpckldq mm3, mm5 ; mm3 = last_error_1:last_error_0
|
||||
psubd mm2, mm1 ; mm2 = 0:data[-2] - data[-3]
|
||||
psubd mm5, mm2 ; mm5 = 0:last_error_2
|
||||
movq mm4, mm5 ; mm4 = 0:last_error_2
|
||||
psubd mm4, mm2 ; mm4 = 0:last_error_2 - (data[-2] - data[-3])
|
||||
paddd mm4, mm1 ; mm4 = 0:last_error_2 - (data[-2] - 2 * data[-3])
|
||||
psubd mm4, mm0 ; mm4 = 0:last_error_3
|
||||
punpckldq mm4, mm5 ; mm4 = last_error_2:last_error_3
|
||||
pxor mm0, mm0 ; mm0 = total_error_1:total_error_0
|
||||
pxor mm1, mm1 ; mm1 = total_error_2:total_error_3
|
||||
pxor mm2, mm2 ; mm2 = 0:total_error_4
|
||||
|
||||
ALIGN 16
|
||||
.loop:
|
||||
movd mm7, [ebx] ; mm7 = 0:error_0
|
||||
add ebx, byte 4
|
||||
movq mm6, mm7 ; mm6 = 0:error_0
|
||||
psubd mm7, mm3 ; mm7 = :error_1
|
||||
punpckldq mm6, mm7 ; mm6 = error_1:error_0
|
||||
movq mm5, mm6 ; mm5 = error_1:error_0
|
||||
movq mm7, mm6 ; mm7 = error_1:error_0
|
||||
psubd mm5, mm3 ; mm5 = error_2:
|
||||
movq mm3, mm6 ; mm3 = error_1:error_0
|
||||
psrad mm6, 31
|
||||
pxor mm7, mm6
|
||||
psubd mm7, mm6 ; mm7 = abs(error_1):abs(error_0)
|
||||
paddd mm0, mm7 ; mm0 = total_error_1:total_error_0
|
||||
movq mm6, mm5 ; mm6 = error_2:
|
||||
psubd mm5, mm4 ; mm5 = error_3:
|
||||
punpckhdq mm5, mm6 ; mm5 = error_2:error_3
|
||||
movq mm7, mm5 ; mm7 = error_2:error_3
|
||||
movq mm6, mm5 ; mm6 = error_2:error_3
|
||||
psubd mm5, mm4 ; mm5 = :error_4
|
||||
movq mm4, mm6 ; mm4 = error_2:error_3
|
||||
psrad mm6, 31
|
||||
pxor mm7, mm6
|
||||
psubd mm7, mm6 ; mm7 = abs(error_2):abs(error_3)
|
||||
paddd mm1, mm7 ; mm1 = total_error_2:total_error_3
|
||||
movq mm6, mm5 ; mm6 = :error_4
|
||||
psrad mm5, 31
|
||||
pxor mm6, mm5
|
||||
psubd mm6, mm5 ; mm6 = :abs(error_4)
|
||||
paddd mm2, mm6 ; mm2 = :total_error_4
|
||||
|
||||
dec ecx
|
||||
jnz short .loop
|
||||
|
||||
; if(total_error_0 < min(min(min(total_error_1, total_error_2), total_error_3), total_error_4))
|
||||
; order = 0;
|
||||
; else if(total_error_1 < min(min(total_error_2, total_error_3), total_error_4))
|
||||
; order = 1;
|
||||
; else if(total_error_2 < min(total_error_3, total_error_4))
|
||||
; order = 2;
|
||||
; else if(total_error_3 < total_error_4)
|
||||
; order = 3;
|
||||
; else
|
||||
; order = 4;
|
||||
movq mm3, mm0 ; mm3 = total_error_1:total_error_0
|
||||
movd edi, mm2 ; edi = total_error_4
|
||||
movd esi, mm1 ; esi = total_error_3
|
||||
movd eax, mm0 ; eax = total_error_0
|
||||
punpckhdq mm1, mm1 ; mm1 = total_error_2:total_error_2
|
||||
punpckhdq mm3, mm3 ; mm3 = total_error_1:total_error_1
|
||||
movd edx, mm1 ; edx = total_error_2
|
||||
movd ecx, mm3 ; ecx = total_error_1
|
||||
|
||||
xor ebx, ebx
|
||||
xor ebp, ebp
|
||||
inc ebx
|
||||
cmp ecx, eax
|
||||
cmovb eax, ecx ; eax = min(total_error_0, total_error_1)
|
||||
cmovbe ebp, ebx
|
||||
inc ebx
|
||||
cmp edx, eax
|
||||
cmovb eax, edx ; eax = min(total_error_0, total_error_1, total_error_2)
|
||||
cmovbe ebp, ebx
|
||||
inc ebx
|
||||
cmp esi, eax
|
||||
cmovb eax, esi ; eax = min(total_error_0, total_error_1, total_error_2, total_error_3)
|
||||
cmovbe ebp, ebx
|
||||
inc ebx
|
||||
cmp edi, eax
|
||||
cmovb eax, edi ; eax = min(total_error_0, total_error_1, total_error_2, total_error_3, total_error_4)
|
||||
cmovbe ebp, ebx
|
||||
movd ebx, mm0 ; ebx = total_error_0
|
||||
emms
|
||||
|
||||
; residual_bits_per_sample[0] = (float)((data_len > 0 && total_error_0 > 0) ? log(M_LN2 * (double)total_error_0 / (double)data_len) / M_LN2 : 0.0);
|
||||
; residual_bits_per_sample[1] = (float)((data_len > 0 && total_error_1 > 0) ? log(M_LN2 * (double)total_error_1 / (double)data_len) / M_LN2 : 0.0);
|
||||
; residual_bits_per_sample[2] = (float)((data_len > 0 && total_error_2 > 0) ? log(M_LN2 * (double)total_error_2 / (double)data_len) / M_LN2 : 0.0);
|
||||
; residual_bits_per_sample[3] = (float)((data_len > 0 && total_error_3 > 0) ? log(M_LN2 * (double)total_error_3 / (double)data_len) / M_LN2 : 0.0);
|
||||
; residual_bits_per_sample[4] = (float)((data_len > 0 && total_error_4 > 0) ? log(M_LN2 * (double)total_error_4 / (double)data_len) / M_LN2 : 0.0);
|
||||
xor eax, eax
|
||||
fild dword [esp + 40] ; ST = data_len (NOTE: assumes data_len is <2gigs)
|
||||
.rbps_0:
|
||||
test ebx, ebx
|
||||
jz .total_error_0_is_0
|
||||
fld1 ; ST = 1.0 data_len
|
||||
mov [esp], ebx
|
||||
mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_error_0
|
||||
mov ebx, [esp + 44]
|
||||
fild qword [esp] ; ST = total_error_0 1.0 data_len
|
||||
fdiv st2 ; ST = total_error_0/data_len 1.0 data_len
|
||||
fldln2 ; ST = ln2 total_error_0/data_len 1.0 data_len
|
||||
fmulp st1 ; ST = ln2*total_error_0/data_len 1.0 data_len
|
||||
fyl2x ; ST = log2(ln2*total_error_0/data_len) data_len
|
||||
fstp dword [ebx] ; residual_bits_per_sample[0] = log2(ln2*total_error_0/data_len) ST = data_len
|
||||
jmp short .rbps_1
|
||||
.total_error_0_is_0:
|
||||
mov ebx, [esp + 44]
|
||||
mov [ebx], eax ; residual_bits_per_sample[0] = 0.0
|
||||
.rbps_1:
|
||||
test ecx, ecx
|
||||
jz .total_error_1_is_0
|
||||
fld1 ; ST = 1.0 data_len
|
||||
mov [esp], ecx
|
||||
mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_error_1
|
||||
fild qword [esp] ; ST = total_error_1 1.0 data_len
|
||||
fdiv st2 ; ST = total_error_1/data_len 1.0 data_len
|
||||
fldln2 ; ST = ln2 total_error_1/data_len 1.0 data_len
|
||||
fmulp st1 ; ST = ln2*total_error_1/data_len 1.0 data_len
|
||||
fyl2x ; ST = log2(ln2*total_error_1/data_len) data_len
|
||||
fstp dword [ebx + 4] ; residual_bits_per_sample[1] = log2(ln2*total_error_1/data_len) ST = data_len
|
||||
jmp short .rbps_2
|
||||
.total_error_1_is_0:
|
||||
mov [ebx + 4], eax ; residual_bits_per_sample[1] = 0.0
|
||||
.rbps_2:
|
||||
test edx, edx
|
||||
jz .total_error_2_is_0
|
||||
fld1 ; ST = 1.0 data_len
|
||||
mov [esp], edx
|
||||
mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_error_2
|
||||
fild qword [esp] ; ST = total_error_2 1.0 data_len
|
||||
fdiv st2 ; ST = total_error_2/data_len 1.0 data_len
|
||||
fldln2 ; ST = ln2 total_error_2/data_len 1.0 data_len
|
||||
fmulp st1 ; ST = ln2*total_error_2/data_len 1.0 data_len
|
||||
fyl2x ; ST = log2(ln2*total_error_2/data_len) data_len
|
||||
fstp dword [ebx + 8] ; residual_bits_per_sample[2] = log2(ln2*total_error_2/data_len) ST = data_len
|
||||
jmp short .rbps_3
|
||||
.total_error_2_is_0:
|
||||
mov [ebx + 8], eax ; residual_bits_per_sample[2] = 0.0
|
||||
.rbps_3:
|
||||
test esi, esi
|
||||
jz .total_error_3_is_0
|
||||
fld1 ; ST = 1.0 data_len
|
||||
mov [esp], esi
|
||||
mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_error_3
|
||||
fild qword [esp] ; ST = total_error_3 1.0 data_len
|
||||
fdiv st2 ; ST = total_error_3/data_len 1.0 data_len
|
||||
fldln2 ; ST = ln2 total_error_3/data_len 1.0 data_len
|
||||
fmulp st1 ; ST = ln2*total_error_3/data_len 1.0 data_len
|
||||
fyl2x ; ST = log2(ln2*total_error_3/data_len) data_len
|
||||
fstp dword [ebx + 12] ; residual_bits_per_sample[3] = log2(ln2*total_error_3/data_len) ST = data_len
|
||||
jmp short .rbps_4
|
||||
.total_error_3_is_0:
|
||||
mov [ebx + 12], eax ; residual_bits_per_sample[3] = 0.0
|
||||
.rbps_4:
|
||||
test edi, edi
|
||||
jz .total_error_4_is_0
|
||||
fld1 ; ST = 1.0 data_len
|
||||
mov [esp], edi
|
||||
mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_error_4
|
||||
fild qword [esp] ; ST = total_error_4 1.0 data_len
|
||||
fdiv st2 ; ST = total_error_4/data_len 1.0 data_len
|
||||
fldln2 ; ST = ln2 total_error_4/data_len 1.0 data_len
|
||||
fmulp st1 ; ST = ln2*total_error_4/data_len 1.0 data_len
|
||||
fyl2x ; ST = log2(ln2*total_error_4/data_len) data_len
|
||||
fstp dword [ebx + 16] ; residual_bits_per_sample[4] = log2(ln2*total_error_4/data_len) ST = data_len
|
||||
jmp short .rbps_end
|
||||
.total_error_4_is_0:
|
||||
mov [ebx + 16], eax ; residual_bits_per_sample[4] = 0.0
|
||||
.rbps_end:
|
||||
fstp st0 ; ST = [empty]
|
||||
jmp short .end
|
||||
.data_len_is_0:
|
||||
; data_len == 0, so residual_bits_per_sample[*] = 0.0
|
||||
xor ebp, ebp
|
||||
mov edi, [esp + 44]
|
||||
mov [edi], ebp
|
||||
mov [edi + 4], ebp
|
||||
mov [edi + 8], ebp
|
||||
mov [edi + 12], ebp
|
||||
mov [edi + 16], ebp
|
||||
add ebp, byte 4 ; order = 4
|
||||
|
||||
.end:
|
||||
mov eax, ebp ; return order
|
||||
add esp, byte 16
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
pop ebp
|
||||
ret
|
||||
|
||||
; end
|
|
@ -1,727 +0,0 @@
|
|||
; vim:filetype=nasm ts=8
|
||||
|
||||
; libFLAC - Free Lossless Audio Codec library
|
||||
; Copyright (C) 2001-2009 Josh Coalson
|
||||
; Copyright (C) 2011-2022 Xiph.Org Foundation
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
;
|
||||
; - Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
;
|
||||
; - Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in the
|
||||
; documentation and/or other materials provided with the distribution.
|
||||
;
|
||||
; - Neither the name of the Xiph.org Foundation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived from
|
||||
; this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
%include "nasm.h"
|
||||
|
||||
data_section
|
||||
|
||||
cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32
|
||||
cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx
|
||||
cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32
|
||||
|
||||
code_section
|
||||
|
||||
;void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
|
||||
;
|
||||
; for(i = 0; i < data_len; i++) {
|
||||
; sum = 0;
|
||||
; for(j = 0; j < order; j++)
|
||||
; sum += qlp_coeff[j] * data[i-j-1];
|
||||
; residual[i] = data[i] - (sum >> lp_quantization);
|
||||
; }
|
||||
;
|
||||
ALIGN 16
|
||||
cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32
|
||||
;[esp + 40] residual[]
|
||||
;[esp + 36] lp_quantization
|
||||
;[esp + 32] order
|
||||
;[esp + 28] qlp_coeff[]
|
||||
;[esp + 24] data_len
|
||||
;[esp + 20] data[]
|
||||
|
||||
;ASSERT(order > 0)
|
||||
|
||||
push ebp
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
|
||||
mov esi, [esp + 20] ; esi = data[]
|
||||
mov edi, [esp + 40] ; edi = residual[]
|
||||
mov eax, [esp + 32] ; eax = order
|
||||
mov ebx, [esp + 24] ; ebx = data_len
|
||||
|
||||
test ebx, ebx
|
||||
jz near .end ; do nothing if data_len == 0
|
||||
.begin:
|
||||
cmp eax, byte 1
|
||||
jg short .i_1more
|
||||
|
||||
mov ecx, [esp + 28]
|
||||
mov edx, [ecx] ; edx = qlp_coeff[0]
|
||||
mov eax, [esi - 4] ; eax = data[-1]
|
||||
mov ecx, [esp + 36] ; cl = lp_quantization
|
||||
ALIGN 16
|
||||
.i_1_loop_i:
|
||||
imul eax, edx
|
||||
sar eax, cl
|
||||
neg eax
|
||||
add eax, [esi]
|
||||
mov [edi], eax
|
||||
mov eax, [esi]
|
||||
add edi, byte 4
|
||||
add esi, byte 4
|
||||
dec ebx
|
||||
jnz .i_1_loop_i
|
||||
|
||||
jmp .end
|
||||
|
||||
.i_1more:
|
||||
cmp eax, byte 32 ; for order <= 32 there is a faster routine
|
||||
jbe short .i_32
|
||||
|
||||
; This version is here just for completeness, since FLAC__MAX_LPC_ORDER == 32
|
||||
ALIGN 16
|
||||
.i_32more_loop_i:
|
||||
xor ebp, ebp
|
||||
mov ecx, [esp + 32]
|
||||
mov edx, ecx
|
||||
shl edx, 2
|
||||
add edx, [esp + 28]
|
||||
neg ecx
|
||||
ALIGN 16
|
||||
.i_32more_loop_j:
|
||||
sub edx, byte 4
|
||||
mov eax, [edx]
|
||||
imul eax, [esi + 4 * ecx]
|
||||
add ebp, eax
|
||||
inc ecx
|
||||
jnz short .i_32more_loop_j
|
||||
|
||||
mov ecx, [esp + 36]
|
||||
sar ebp, cl
|
||||
neg ebp
|
||||
add ebp, [esi]
|
||||
mov [edi], ebp
|
||||
add esi, byte 4
|
||||
add edi, byte 4
|
||||
|
||||
dec ebx
|
||||
jnz .i_32more_loop_i
|
||||
|
||||
jmp .end
|
||||
|
||||
.mov_eip_to_eax:
|
||||
mov eax, [esp]
|
||||
ret
|
||||
|
||||
.i_32:
|
||||
sub edi, esi
|
||||
neg eax
|
||||
lea edx, [eax + eax * 8 + .jumper_0 - .get_eip0]
|
||||
call .mov_eip_to_eax
|
||||
.get_eip0:
|
||||
add edx, eax
|
||||
inc edx
|
||||
mov eax, [esp + 28] ; eax = qlp_coeff[]
|
||||
xor ebp, ebp
|
||||
jmp edx
|
||||
|
||||
mov ecx, [eax + 124]
|
||||
imul ecx, [esi - 128]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 120]
|
||||
imul ecx, [esi - 124]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 116]
|
||||
imul ecx, [esi - 120]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 112]
|
||||
imul ecx, [esi - 116]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 108]
|
||||
imul ecx, [esi - 112]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 104]
|
||||
imul ecx, [esi - 108]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 100]
|
||||
imul ecx, [esi - 104]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 96]
|
||||
imul ecx, [esi - 100]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 92]
|
||||
imul ecx, [esi - 96]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 88]
|
||||
imul ecx, [esi - 92]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 84]
|
||||
imul ecx, [esi - 88]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 80]
|
||||
imul ecx, [esi - 84]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 76]
|
||||
imul ecx, [esi - 80]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 72]
|
||||
imul ecx, [esi - 76]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 68]
|
||||
imul ecx, [esi - 72]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 64]
|
||||
imul ecx, [esi - 68]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 60]
|
||||
imul ecx, [esi - 64]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 56]
|
||||
imul ecx, [esi - 60]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 52]
|
||||
imul ecx, [esi - 56]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 48]
|
||||
imul ecx, [esi - 52]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 44]
|
||||
imul ecx, [esi - 48]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 40]
|
||||
imul ecx, [esi - 44]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 36]
|
||||
imul ecx, [esi - 40]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 32]
|
||||
imul ecx, [esi - 36]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 28]
|
||||
imul ecx, [esi - 32]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 24]
|
||||
imul ecx, [esi - 28]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 20]
|
||||
imul ecx, [esi - 24]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 16]
|
||||
imul ecx, [esi - 20]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 12]
|
||||
imul ecx, [esi - 16]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 8]
|
||||
imul ecx, [esi - 12]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax + 4]
|
||||
imul ecx, [esi - 8]
|
||||
add ebp, ecx
|
||||
mov ecx, [eax] ; there is one byte missing
|
||||
imul ecx, [esi - 4]
|
||||
add ebp, ecx
|
||||
.jumper_0:
|
||||
|
||||
mov ecx, [esp + 36]
|
||||
sar ebp, cl
|
||||
neg ebp
|
||||
add ebp, [esi]
|
||||
mov [edi + esi], ebp
|
||||
add esi, byte 4
|
||||
|
||||
dec ebx
|
||||
jz short .end
|
||||
xor ebp, ebp
|
||||
jmp edx
|
||||
|
||||
.end:
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
pop ebp
|
||||
ret
|
||||
|
||||
; WATCHOUT: this routine works on 16 bit data which means bits-per-sample for
|
||||
; the channel and qlp_coeffs must be <= 16. Especially note that this routine
|
||||
; cannot be used for side-channel coded 16bps channels since the effective bps
|
||||
; is 17.
|
||||
ALIGN 16
|
||||
cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx
|
||||
;[esp + 40] residual[]
|
||||
;[esp + 36] lp_quantization
|
||||
;[esp + 32] order
|
||||
;[esp + 28] qlp_coeff[]
|
||||
;[esp + 24] data_len
|
||||
;[esp + 20] data[]
|
||||
|
||||
;ASSERT(order > 0)
|
||||
|
||||
push ebp
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
|
||||
mov esi, [esp + 20] ; esi = data[]
|
||||
mov edi, [esp + 40] ; edi = residual[]
|
||||
mov eax, [esp + 32] ; eax = order
|
||||
mov ebx, [esp + 24] ; ebx = data_len
|
||||
|
||||
test ebx, ebx
|
||||
jz near .end ; do nothing if data_len == 0
|
||||
dec ebx
|
||||
test ebx, ebx
|
||||
jz near .last_one
|
||||
|
||||
mov edx, [esp + 28] ; edx = qlp_coeff[]
|
||||
movd mm6, [esp + 36] ; mm6 = 0:lp_quantization
|
||||
mov ebp, esp
|
||||
|
||||
and esp, 0xfffffff8
|
||||
|
||||
xor ecx, ecx
|
||||
.copy_qlp_loop:
|
||||
push word [edx + 4 * ecx]
|
||||
inc ecx
|
||||
cmp ecx, eax
|
||||
jnz short .copy_qlp_loop
|
||||
|
||||
and ecx, 0x3
|
||||
test ecx, ecx
|
||||
je short .za_end
|
||||
sub ecx, byte 4
|
||||
.za_loop:
|
||||
push word 0
|
||||
inc eax
|
||||
inc ecx
|
||||
jnz short .za_loop
|
||||
.za_end:
|
||||
|
||||
movq mm5, [esp + 2 * eax - 8]
|
||||
movd mm4, [esi - 16]
|
||||
punpckldq mm4, [esi - 12]
|
||||
movd mm0, [esi - 8]
|
||||
punpckldq mm0, [esi - 4]
|
||||
packssdw mm4, mm0
|
||||
|
||||
cmp eax, byte 4
|
||||
jnbe short .mmx_4more
|
||||
|
||||
ALIGN 16
|
||||
.mmx_4_loop_i:
|
||||
movd mm1, [esi]
|
||||
movq mm3, mm4
|
||||
punpckldq mm1, [esi + 4]
|
||||
psrlq mm4, 16
|
||||
movq mm0, mm1
|
||||
psllq mm0, 48
|
||||
por mm4, mm0
|
||||
movq mm2, mm4
|
||||
psrlq mm4, 16
|
||||
pxor mm0, mm0
|
||||
punpckhdq mm0, mm1
|
||||
pmaddwd mm3, mm5
|
||||
pmaddwd mm2, mm5
|
||||
psllq mm0, 16
|
||||
por mm4, mm0
|
||||
movq mm0, mm3
|
||||
punpckldq mm3, mm2
|
||||
punpckhdq mm0, mm2
|
||||
paddd mm3, mm0
|
||||
psrad mm3, mm6
|
||||
psubd mm1, mm3
|
||||
movd [edi], mm1
|
||||
punpckhdq mm1, mm1
|
||||
movd [edi + 4], mm1
|
||||
|
||||
add edi, byte 8
|
||||
add esi, byte 8
|
||||
|
||||
sub ebx, 2
|
||||
jg .mmx_4_loop_i
|
||||
jmp .mmx_end
|
||||
|
||||
.mmx_4more:
|
||||
shl eax, 2
|
||||
neg eax
|
||||
add eax, byte 16
|
||||
|
||||
ALIGN 16
|
||||
.mmx_4more_loop_i:
|
||||
movd mm1, [esi]
|
||||
punpckldq mm1, [esi + 4]
|
||||
movq mm3, mm4
|
||||
psrlq mm4, 16
|
||||
movq mm0, mm1
|
||||
psllq mm0, 48
|
||||
por mm4, mm0
|
||||
movq mm2, mm4
|
||||
psrlq mm4, 16
|
||||
pxor mm0, mm0
|
||||
punpckhdq mm0, mm1
|
||||
pmaddwd mm3, mm5
|
||||
pmaddwd mm2, mm5
|
||||
psllq mm0, 16
|
||||
por mm4, mm0
|
||||
|
||||
mov ecx, esi
|
||||
add ecx, eax
|
||||
mov edx, esp
|
||||
|
||||
ALIGN 16
|
||||
.mmx_4more_loop_j:
|
||||
movd mm0, [ecx - 16]
|
||||
movd mm7, [ecx - 8]
|
||||
punpckldq mm0, [ecx - 12]
|
||||
punpckldq mm7, [ecx - 4]
|
||||
packssdw mm0, mm7
|
||||
pmaddwd mm0, [edx]
|
||||
punpckhdq mm7, mm7
|
||||
paddd mm3, mm0
|
||||
movd mm0, [ecx - 12]
|
||||
punpckldq mm0, [ecx - 8]
|
||||
punpckldq mm7, [ecx]
|
||||
packssdw mm0, mm7
|
||||
pmaddwd mm0, [edx]
|
||||
paddd mm2, mm0
|
||||
|
||||
add edx, byte 8
|
||||
add ecx, byte 16
|
||||
cmp ecx, esi
|
||||
jnz .mmx_4more_loop_j
|
||||
|
||||
movq mm0, mm3
|
||||
punpckldq mm3, mm2
|
||||
punpckhdq mm0, mm2
|
||||
paddd mm3, mm0
|
||||
psrad mm3, mm6
|
||||
psubd mm1, mm3
|
||||
movd [edi], mm1
|
||||
punpckhdq mm1, mm1
|
||||
movd [edi + 4], mm1
|
||||
|
||||
add edi, byte 8
|
||||
add esi, byte 8
|
||||
|
||||
sub ebx, 2
|
||||
jg near .mmx_4more_loop_i
|
||||
|
||||
.mmx_end:
|
||||
emms
|
||||
mov esp, ebp
|
||||
.last_one:
|
||||
mov eax, [esp + 32]
|
||||
inc ebx
|
||||
jnz near FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32.begin
|
||||
|
||||
.end:
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
pop ebp
|
||||
ret
|
||||
|
||||
; **********************************************************************
|
||||
;
|
||||
;void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
|
||||
; {
|
||||
; unsigned i, j;
|
||||
; FLAC__int64 sum;
|
||||
;
|
||||
; FLAC__ASSERT(order > 0);
|
||||
;
|
||||
; for(i = 0; i < data_len; i++) {
|
||||
; sum = 0;
|
||||
; for(j = 0; j < order; j++)
|
||||
; sum += qlp_coeff[j] * (FLAC__int64)data[i-j-1];
|
||||
; residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
|
||||
; }
|
||||
; }
|
||||
ALIGN 16
|
||||
cident FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32
|
||||
;[esp + 40] residual[]
|
||||
;[esp + 36] lp_quantization
|
||||
;[esp + 32] order
|
||||
;[esp + 28] qlp_coeff[]
|
||||
;[esp + 24] data_len
|
||||
;[esp + 20] data[]
|
||||
|
||||
;ASSERT(order > 0)
|
||||
;ASSERT(order <= 32)
|
||||
;ASSERT(lp_quantization <= 31)
|
||||
|
||||
push ebp
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
|
||||
mov ebx, [esp + 24] ; ebx = data_len
|
||||
test ebx, ebx
|
||||
jz near .end ; do nothing if data_len == 0
|
||||
|
||||
.begin:
|
||||
mov eax, [esp + 32] ; eax = order
|
||||
cmp eax, 1
|
||||
jg short .i_32
|
||||
|
||||
mov esi, [esp + 40] ; esi = residual[]
|
||||
mov edi, [esp + 20] ; edi = data[]
|
||||
mov ecx, [esp + 28] ; ecx = qlp_coeff[]
|
||||
mov ebp, [ecx] ; ebp = qlp_coeff[0]
|
||||
mov eax, [edi - 4] ; eax = data[-1]
|
||||
mov ecx, [esp + 36] ; cl = lp_quantization
|
||||
ALIGN 16
|
||||
.i_1_loop_i:
|
||||
imul ebp ; edx:eax = qlp_coeff[0] * (FLAC__int64)data[i-1]
|
||||
shrd eax, edx, cl ; 0 <= lp_quantization <= 15
|
||||
neg eax
|
||||
add eax, [edi]
|
||||
mov [esi], eax
|
||||
mov eax, [edi]
|
||||
add esi, 4
|
||||
add edi, 4
|
||||
dec ebx
|
||||
jnz .i_1_loop_i
|
||||
jmp .end
|
||||
|
||||
.mov_eip_to_eax:
|
||||
mov eax, [esp]
|
||||
ret
|
||||
|
||||
.i_32: ; eax = order
|
||||
neg eax
|
||||
add eax, eax
|
||||
lea ebp, [eax + eax * 4 + .jumper_0 - .get_eip0]
|
||||
call .mov_eip_to_eax
|
||||
.get_eip0:
|
||||
add ebp, eax
|
||||
inc ebp ; compensate for the shorter opcode on the last iteration
|
||||
|
||||
mov ebx, [esp + 28] ; ebx = qlp_coeff[]
|
||||
mov edi, [esp + 20] ; edi = data[]
|
||||
sub [esp + 40], edi ; residual[] -= data[]
|
||||
|
||||
xor ecx, ecx
|
||||
xor esi, esi
|
||||
jmp ebp
|
||||
|
||||
;eax = --
|
||||
;edx = --
|
||||
;ecx = 0
|
||||
;esi = 0
|
||||
;
|
||||
;ebx = qlp_coeff[]
|
||||
;edi = data[]
|
||||
;ebp = @address
|
||||
|
||||
mov eax, [ebx + 124] ; eax = qlp_coeff[31]
|
||||
imul dword [edi - 128] ; edx:eax = qlp_coeff[31] * data[i-32]
|
||||
add ecx, eax
|
||||
adc esi, edx ; sum += qlp_coeff[31] * data[i-32]
|
||||
|
||||
mov eax, [ebx + 120] ; eax = qlp_coeff[30]
|
||||
imul dword [edi - 124] ; edx:eax = qlp_coeff[30] * data[i-31]
|
||||
add ecx, eax
|
||||
adc esi, edx ; sum += qlp_coeff[30] * data[i-31]
|
||||
|
||||
mov eax, [ebx + 116]
|
||||
imul dword [edi - 120]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 112]
|
||||
imul dword [edi - 116]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 108]
|
||||
imul dword [edi - 112]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 104]
|
||||
imul dword [edi - 108]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 100]
|
||||
imul dword [edi - 104]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 96]
|
||||
imul dword [edi - 100]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 92]
|
||||
imul dword [edi - 96]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 88]
|
||||
imul dword [edi - 92]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 84]
|
||||
imul dword [edi - 88]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 80]
|
||||
imul dword [edi - 84]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 76]
|
||||
imul dword [edi - 80]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 72]
|
||||
imul dword [edi - 76]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 68]
|
||||
imul dword [edi - 72]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 64]
|
||||
imul dword [edi - 68]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 60]
|
||||
imul dword [edi - 64]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 56]
|
||||
imul dword [edi - 60]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 52]
|
||||
imul dword [edi - 56]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 48]
|
||||
imul dword [edi - 52]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 44]
|
||||
imul dword [edi - 48]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 40]
|
||||
imul dword [edi - 44]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 36]
|
||||
imul dword [edi - 40]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 32]
|
||||
imul dword [edi - 36]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 28]
|
||||
imul dword [edi - 32]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 24]
|
||||
imul dword [edi - 28]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 20]
|
||||
imul dword [edi - 24]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 16]
|
||||
imul dword [edi - 20]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 12]
|
||||
imul dword [edi - 16]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 8]
|
||||
imul dword [edi - 12]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx + 4]
|
||||
imul dword [edi - 8]
|
||||
add ecx, eax
|
||||
adc esi, edx
|
||||
|
||||
mov eax, [ebx] ; eax = qlp_coeff[ 0] (NOTE: one byte missing from instruction)
|
||||
imul dword [edi - 4] ; edx:eax = qlp_coeff[ 0] * data[i- 1]
|
||||
add ecx, eax
|
||||
adc esi, edx ; sum += qlp_coeff[ 0] * data[i- 1]
|
||||
|
||||
.jumper_0:
|
||||
mov edx, ecx
|
||||
;esi:edx = sum
|
||||
mov ecx, [esp + 36] ; cl = lp_quantization
|
||||
shrd edx, esi, cl ; edx = (sum >> lp_quantization)
|
||||
;eax = --
|
||||
;ecx = --
|
||||
;edx = sum >> lp_q
|
||||
;esi = --
|
||||
neg edx ; edx = -(sum >> lp_quantization)
|
||||
mov eax, [esp + 40] ; residual[] - data[]
|
||||
add edx, [edi] ; edx = data[i] - (sum >> lp_quantization)
|
||||
mov [edi + eax], edx
|
||||
add edi, 4
|
||||
|
||||
dec dword [esp + 24]
|
||||
jz short .end
|
||||
xor ecx, ecx
|
||||
xor esi, esi
|
||||
jmp ebp
|
||||
|
||||
.end:
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
pop ebp
|
||||
ret
|
||||
|
||||
; end
|
|
@ -1,95 +0,0 @@
|
|||
; libFLAC - Free Lossless Audio Codec library
|
||||
; Copyright (C) 2001-2009 Josh Coalson
|
||||
; Copyright (C) 2011-2022 Xiph.Org Foundation
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
;
|
||||
; - Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
;
|
||||
; - Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in the
|
||||
; documentation and/or other materials provided with the distribution.
|
||||
;
|
||||
; - Neither the name of the Xiph.org Foundation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived from
|
||||
; this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
bits 32
|
||||
|
||||
%ifdef OBJ_FORMAT_win32
|
||||
%define FLAC__PUBLIC_NEEDS_UNDERSCORE
|
||||
%idefine code_section section .text align=16 class=CODE use32
|
||||
%idefine data_section section .data align=32 class=DATA use32
|
||||
%idefine bss_section section .bss align=32 class=DATA use32
|
||||
%elifdef OBJ_FORMAT_aout
|
||||
%define FLAC__PUBLIC_NEEDS_UNDERSCORE
|
||||
%idefine code_section section .text
|
||||
%idefine data_section section .data
|
||||
%idefine bss_section section .bss
|
||||
%elifdef OBJ_FORMAT_aoutb
|
||||
%define FLAC__PUBLIC_NEEDS_UNDERSCORE
|
||||
%idefine code_section section .text
|
||||
%idefine data_section section .data
|
||||
%idefine bss_section section .bss
|
||||
%elifdef OBJ_FORMAT_coff
|
||||
%define FLAC__PUBLIC_NEEDS_UNDERSCORE
|
||||
%idefine code_section section .text
|
||||
%idefine data_section section .data
|
||||
%idefine bss_section section .bss
|
||||
%elifdef OBJ_FORMAT_macho
|
||||
%define FLAC__PUBLIC_NEEDS_UNDERSCORE
|
||||
%idefine code_section section .text
|
||||
%idefine data_section section .data
|
||||
%idefine bss_section section .bss
|
||||
%elifdef OBJ_FORMAT_elf
|
||||
%idefine code_section section .text align=16
|
||||
%idefine data_section section .data align=32
|
||||
%idefine bss_section section .bss align=32
|
||||
%else
|
||||
%error unsupported object format! ; this directive doesn't really work here
|
||||
%endif
|
||||
|
||||
%imacro cglobal 1
|
||||
%ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
|
||||
global _%1
|
||||
%else
|
||||
%if __NASM_MAJOR__ >= 2
|
||||
global %1:function hidden
|
||||
%else
|
||||
global %1
|
||||
%endif
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%imacro cextern 1
|
||||
%ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
|
||||
extern _%1
|
||||
%else
|
||||
extern %1
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%imacro cident 1
|
||||
_%1:
|
||||
%1:
|
||||
%endmacro
|
||||
|
||||
%ifdef OBJ_FORMAT_elf
|
||||
section .note.GNU-stack progbits noalloc noexec nowrite align=1
|
||||
%endif
|
||||
|
|
@ -69,9 +69,6 @@ uint32_t FLAC__fixed_compute_best_predictor_intrin_ssse3(const FLAC__int32 data[
|
|||
uint32_t FLAC__fixed_compute_best_predictor_wide_intrin_ssse3(const FLAC__int32 data[], uint32_t data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
|
||||
# endif
|
||||
# endif
|
||||
# if defined FLAC__CPU_IA32 && defined FLAC__HAS_NASM
|
||||
uint32_t FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov(const FLAC__int32 data[], uint32_t data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
|
||||
# endif
|
||||
# endif
|
||||
#else
|
||||
uint32_t FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], uint32_t data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
|
||||
|
|
|
@ -174,13 +174,6 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_neon(const FLAC__in
|
|||
void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_neon(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]);
|
||||
# endif
|
||||
|
||||
# ifdef FLAC__CPU_IA32
|
||||
# ifdef FLAC__HAS_NASM
|
||||
void FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]);
|
||||
void FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]);
|
||||
void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]);
|
||||
# endif
|
||||
# endif
|
||||
# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN
|
||||
# ifdef FLAC__SSE2_SUPPORTED
|
||||
void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]);
|
||||
|
|
|
@ -949,20 +949,6 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
|
|||
if(encoder->private_->cpuinfo.use_asm) {
|
||||
# ifdef FLAC__CPU_IA32
|
||||
FLAC__ASSERT(encoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_IA32);
|
||||
# ifdef FLAC__HAS_NASM
|
||||
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32; /* OPT_IA32: was really necessary for GCC < 4.9 */
|
||||
if (encoder->private_->cpuinfo.x86.mmx) {
|
||||
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32;
|
||||
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx;
|
||||
}
|
||||
else {
|
||||
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32;
|
||||
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32;
|
||||
}
|
||||
|
||||
if (encoder->private_->cpuinfo.x86.mmx && encoder->private_->cpuinfo.x86.cmov)
|
||||
encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov;
|
||||
# endif /* FLAC__HAS_NASM */
|
||||
# if FLAC__HAS_X86INTRIN
|
||||
# ifdef FLAC__SSE2_SUPPORTED
|
||||
if (encoder->private_->cpuinfo.x86.sse2) {
|
||||
|
|
|
@ -1,19 +0,0 @@
|
|||
#!/bin/sh
|
||||
#
|
||||
# libtool assumes that the compiler can handle the -fPIC flag.
|
||||
# This isn't always true (for example, nasm can't handle it).
|
||||
# Also, on some versions of OS X it tries to pass -fno-common
|
||||
# to 'as' which causes problems.
|
||||
command=""
|
||||
while [ $1 ]; do
|
||||
if [ "$1" != "-fPIC" ]; then
|
||||
if [ "$1" != "-DPIC" ]; then
|
||||
if [ "$1" != "-fno-common" ]; then
|
||||
command="$command $1"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
shift
|
||||
done
|
||||
echo $command
|
||||
exec $command
|
Loading…
Reference in New Issue