Remove all assembler

This commit is contained in:
Martijn van Beurden 2022-10-15 15:44:03 +02:00 committed by GitHub
parent 0665053c5d
commit 75ef7958df
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 6 additions and 1403 deletions

View File

@ -60,7 +60,6 @@ EXTRA_DIST = \
autogen.sh \
config.rpath \
depcomp \
ltmain.sh \
strip_non_asm_libtool_args.sh
ltmain.sh
CLEANFILES = *~

View File

@ -24,9 +24,6 @@
/* define if you have docbook-to-man or docbook2man */
#cmakedefine FLAC__HAS_DOCBOOK_TO_MAN
/* define if you are compiling for x86 and have the NASM assembler */
#cmakedefine FLAC__HAS_NASM
/* define if you have the ogg library */
#cmakedefine01 OGG_FOUND
#define FLAC__HAS_OGG OGG_FOUND

View File

@ -471,14 +471,6 @@ AC_CHECK_LIB(rt, clock_gettime,
AH_TEMPLATE(HAVE_CLOCK_GETTIME, [define if you have clock_gettime]))
AC_SUBST(LIB_CLOCK_GETTIME)
# only matters for x86
AC_CHECK_PROGS(NASM, nasm)
AM_CONDITIONAL(FLaC__HAS_NASM, test -n "$NASM")
if test -n "$NASM" ; then
AC_DEFINE(FLAC__HAS_NASM)
AH_TEMPLATE(FLAC__HAS_NASM, [define if you are compiling for x86 and have the NASM assembler])
fi
dnl If debugging is disabled AND no CFLAGS/CXXFLAGS/CPPFLAGS/LDFLAGS
dnl are provided, we can set defaults to our liking
AS_IF([test "x${ax_enable_debug}" = "xno" && test "x${enable_flags_setting}" = "xyes"], [
@ -607,7 +599,6 @@ AC_CONFIG_FILES([ \
src/Makefile \
src/libFLAC/Makefile \
src/libFLAC/flac.pc \
src/libFLAC/ia32/Makefile \
src/libFLAC/include/Makefile \
src/libFLAC/include/private/Makefile \
src/libFLAC/include/protected/Makefile \

View File

@ -38,23 +38,11 @@ else()
endif()
endif()
include(CheckLanguage)
check_language(ASM_NASM)
if(CMAKE_ASM_NASM_COMPILER)
enable_language(ASM_NASM)
add_definitions(-DFLAC__HAS_NASM)
endif()
if(NOT WITH_ASM)
add_definitions(-DFLAC__NO_ASM)
endif()
if(FLAC__CPU_IA32)
if(WITH_ASM AND CMAKE_ASM_NASM_COMPILER)
add_subdirectory(ia32)
endif()
option(WITH_SSE "Enable SSE2 optimizations (WITHOUT runtime detection, resulting binary requires SSE2)" ON)
check_c_compiler_flag(-msse2 HAVE_MSSE2_FLAG)
if(WITH_SSE)

View File

@ -55,19 +55,9 @@ endif
AM_CFLAGS = $(DEBUGCFLAGS) $(CPUCFLAGS) ${ASSOCMATHCFLAGS} @OGG_CFLAGS@
if FLaC__NO_ASM
else
if FLaC__CPU_IA32
if FLaC__HAS_NASM
ARCH_SUBDIRS = ia32
LOCAL_EXTRA_LIBADD = ia32/libFLAC-asm.la
endif
endif
endif
libFLAC_la_LIBADD = @OGG_LIBS@ -lm
libFLAC_la_LIBADD = $(LOCAL_EXTRA_LIBADD) @OGG_LIBS@ -lm
SUBDIRS = $(ARCH_SUBDIRS) include .
SUBDIRS = include .
m4datadir = $(datadir)/aclocal
m4data_DATA = libFLAC.m4
@ -97,7 +87,7 @@ extra_ogg_sources = \
endif
# see 'http://www.gnu.org/software/libtool/manual/libtool.html#Libtool-versioning' for numbering convention
libFLAC_la_LDFLAGS = $(AM_LDFLAGS) -no-undefined -version-info 12:0:0 $(LOCAL_EXTRA_LDFLAGS)
libFLAC_la_LDFLAGS = $(AM_LDFLAGS) -no-undefined -version-info 12:0:0
libFLAC_sources = \
bitmath.c \
@ -134,5 +124,4 @@ libFLAC_sources = \
libFLAC_la_SOURCES = $(libFLAC_sources)
# needed for test_libFLAC
libFLAC_static_la_LIBADD = $(LOCAL_EXTRA_LIBADD)
libFLAC_static_la_SOURCES = $(libFLAC_sources)

View File

@ -57,7 +57,7 @@
#include <sys/auxv.h>
#endif
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && (defined FLAC__HAS_NASM || FLAC__HAS_X86INTRIN) && !defined FLAC__NO_ASM
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN && !defined FLAC__NO_ASM
/* these are flags in EDX of CPUID AX=00000001 */
static const uint32_t FLAC__CPUINFO_X86_CPUID_CMOV = 0x00008000;
@ -97,8 +97,6 @@ cpu_have_cpuid(void)
#if defined FLAC__CPU_X86_64 || defined __i686__ || defined __SSE__ || (defined _M_IX86_FP && _M_IX86_FP > 0)
/* target CPU does have CPUID instruction */
return 1;
#elif defined FLAC__HAS_NASM
return FLAC__cpu_have_cpuid_asm_ia32();
#elif defined __GNUC__ && defined HAVE_CPUID_H
if (__get_cpuid_max(0, 0) != 0)
return 1;
@ -151,9 +149,6 @@ cpuinfo_x86(FLAC__uint32 level, FLAC__uint32 *eax, FLAC__uint32 *ebx, FLAC__uint
__cpuid_count(level, 0, *eax, *ebx, *ecx, *edx);
return;
}
#elif defined FLAC__HAS_NASM && defined FLAC__CPU_IA32
FLAC__cpu_info_asm_ia32(level, eax, ebx, ecx, edx);
return;
#endif
*eax = *ebx = *ecx = *edx = 0;
}
@ -163,7 +158,7 @@ cpuinfo_x86(FLAC__uint32 level, FLAC__uint32 *eax, FLAC__uint32 *ebx, FLAC__uint
static void
x86_cpu_info (FLAC__CPUInfo *info)
{
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && (defined FLAC__HAS_NASM || FLAC__HAS_X86INTRIN) && !defined FLAC__NO_ASM
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN && !defined FLAC__NO_ASM
FLAC__bool x86_osxsave = false;
FLAC__bool os_avx = false;
FLAC__uint32 flags_eax, flags_ebx, flags_ecx, flags_edx;

View File

@ -1,17 +0,0 @@
include_directories("${CMAKE_CURRENT_SOURCE_DIR}")
add_compile_options(-I${CMAKE_CURRENT_SOURCE_DIR}/)
if(APPLE)
add_compile_options(-dOBJ_FORMAT_macho)
elseif(WIN32)
#add_compile_options(-d OBJ_FORMAT_win32)
# FIXME the command above doesn't seem to work on Windows
set(CMAKE_ASM_NASM_FLAGS -dOBJ_FORMAT_win32)
else()
add_compile_options(-dOBJ_FORMAT_elf)
endif()
add_library(FLAC-asm OBJECT
cpu_asm.nasm
fixed_asm.nasm
lpc_asm.nasm)

View File

@ -1,46 +0,0 @@
# libFLAC - Free Lossless Audio Codec library
# Copyright (C) 2001-2009 Josh Coalson
# Copyright (C) 2011-2022 Xiph.Org Foundation
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# - Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# - Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# - Neither the name of the Xiph.org Foundation nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
SUFFIXES = .nasm .lo
STRIP_NON_ASM = sh $(top_srcdir)/strip_non_asm_libtool_args.sh
AM_CPPFLAGS = -I$(top_builddir) -I$(srcdir)/include -I$(top_srcdir)/include
.nasm.lo:
$(LIBTOOL) --tag=CC --mode=compile $(STRIP_NON_ASM) $(NASM) -f $(OBJ_FORMAT) -d OBJ_FORMAT_$(OBJ_FORMAT) -i$(srcdir)/ $< -o $@
noinst_LTLIBRARIES = libFLAC-asm.la
libFLAC_asm_la_SOURCES = \
cpu_asm.nasm \
fixed_asm.nasm \
lpc_asm.nasm \
nasm.h
EXTRA_DIST = CMakeLists.txt

View File

@ -1,119 +0,0 @@
; vim:filetype=nasm ts=8
; libFLAC - Free Lossless Audio Codec library
; Copyright (C) 2001-2009 Josh Coalson
; Copyright (C) 2011-2022 Xiph.Org Foundation
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
;
; - Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
;
; - Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in the
; documentation and/or other materials provided with the distribution.
;
; - Neither the name of the Xiph.org Foundation nor the names of its
; contributors may be used to endorse or promote products derived from
; this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
%include "nasm.h"
data_section
cglobal FLAC__cpu_have_cpuid_asm_ia32
cglobal FLAC__cpu_info_asm_ia32
code_section
; **********************************************************************
;
; FLAC__uint32 FLAC__cpu_have_cpuid_asm_ia32()
;
cident FLAC__cpu_have_cpuid_asm_ia32
pushfd
pop eax
mov edx, eax
xor eax, 0x00200000
push eax
popfd
pushfd
pop eax
xor eax, edx
and eax, 0x00200000
shr eax, 0x15
push edx
popfd
ret
; **********************************************************************
;
; void FLAC__cpu_info_asm_ia32(FLAC__uint32 level, FLAC__uint32 *eax, FLAC__uint32 *ebx, FLAC__uint32 *ecx, FLAC__uint32 *edx)
;
cident FLAC__cpu_info_asm_ia32
;[esp + 8] == level
;[esp + 12] == flags_eax
;[esp + 16] == flags_ebx
;[esp + 20] == flags_ecx
;[esp + 24] == flags_edx
push ebx
call FLAC__cpu_have_cpuid_asm_ia32
test eax, eax
jz .no_cpuid
mov eax, [esp + 8]
and eax, 0x80000000
cpuid
cmp eax, [esp + 8]
jb .no_cpuid
xor ecx, ecx
mov eax, [esp + 8]
cpuid
push ebx
;[esp + 16] == flags_eax
mov ebx, [esp + 16]
mov [ebx], eax
pop eax
;[esp + 16] == flags_ebx
mov ebx, [esp + 16]
mov [ebx], eax
mov ebx, [esp + 20]
mov [ebx], ecx
mov ebx, [esp + 24]
mov [ebx], edx
jmp .end
.no_cpuid:
xor eax, eax
mov ebx, [esp + 12]
mov [ebx], eax
mov ebx, [esp + 16]
mov [ebx], eax
mov ebx, [esp + 20]
mov [ebx], eax
mov ebx, [esp + 24]
mov [ebx], eax
.end:
pop ebx
ret
; end

View File

@ -1,309 +0,0 @@
; vim:filetype=nasm ts=8
; libFLAC - Free Lossless Audio Codec library
; Copyright (C) 2001-2009 Josh Coalson
; Copyright (C) 2011-2022 Xiph.Org Foundation
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
;
; - Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
;
; - Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in the
; documentation and/or other materials provided with the distribution.
;
; - Neither the name of the Xiph.org Foundation nor the names of its
; contributors may be used to endorse or promote products derived from
; this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
%include "nasm.h"
data_section
cglobal FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov
code_section
; **********************************************************************
;
; unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 *data, unsigned data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1])
; {
; FLAC__int32 last_error_0 = data[-1];
; FLAC__int32 last_error_1 = data[-1] - data[-2];
; FLAC__int32 last_error_2 = last_error_1 - (data[-2] - data[-3]);
; FLAC__int32 last_error_3 = last_error_2 - (data[-2] - 2*data[-3] + data[-4]);
; FLAC__int32 error, save;
; FLAC__uint32 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, total_error_3 = 0, total_error_4 = 0;
; unsigned i, order;
;
; for(i = 0; i < data_len; i++) {
; error = data[i] ; total_error_0 += local_abs(error); save = error;
; error -= last_error_0; total_error_1 += local_abs(error); last_error_0 = save; save = error;
; error -= last_error_1; total_error_2 += local_abs(error); last_error_1 = save; save = error;
; error -= last_error_2; total_error_3 += local_abs(error); last_error_2 = save; save = error;
; error -= last_error_3; total_error_4 += local_abs(error); last_error_3 = save;
; }
;
; if(total_error_0 < min(min(min(total_error_1, total_error_2), total_error_3), total_error_4))
; order = 0;
; else if(total_error_1 < min(min(total_error_2, total_error_3), total_error_4))
; order = 1;
; else if(total_error_2 < min(total_error_3, total_error_4))
; order = 2;
; else if(total_error_3 < total_error_4)
; order = 3;
; else
; order = 4;
;
; residual_bits_per_sample[0] = (float)((data_len > 0 && total_error_0 > 0) ? log(M_LN2 * (double)total_error_0 / (double)data_len) / M_LN2 : 0.0);
; residual_bits_per_sample[1] = (float)((data_len > 0 && total_error_1 > 0) ? log(M_LN2 * (double)total_error_1 / (double)data_len) / M_LN2 : 0.0);
; residual_bits_per_sample[2] = (float)((data_len > 0 && total_error_2 > 0) ? log(M_LN2 * (double)total_error_2 / (double)data_len) / M_LN2 : 0.0);
; residual_bits_per_sample[3] = (float)((data_len > 0 && total_error_3 > 0) ? log(M_LN2 * (double)total_error_3 / (double)data_len) / M_LN2 : 0.0);
; residual_bits_per_sample[4] = (float)((data_len > 0 && total_error_4 > 0) ? log(M_LN2 * (double)total_error_4 / (double)data_len) / M_LN2 : 0.0);
;
; return order;
; }
ALIGN 16
cident FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov
; esp + 36 == data[]
; esp + 40 == data_len
; esp + 44 == residual_bits_per_sample[]
push ebp
push ebx
push esi
push edi
sub esp, byte 16
; qword [esp] == temp space for loading FLAC__uint64s to FPU regs
; ebx == &data[i]
; ecx == loop counter (i)
; ebp == order
; mm0 == total_error_1:total_error_0
; mm1 == total_error_2:total_error_3
; mm2 == :total_error_4
; mm3 == last_error_1:last_error_0
; mm4 == last_error_2:last_error_3
mov ecx, [esp + 40] ; ecx = data_len
test ecx, ecx
jz near .data_len_is_0
mov ebx, [esp + 36] ; ebx = data[]
movd mm3, [ebx - 4] ; mm3 = 0:last_error_0
movd mm2, [ebx - 8] ; mm2 = 0:data[-2]
movd mm1, [ebx - 12] ; mm1 = 0:data[-3]
movd mm0, [ebx - 16] ; mm0 = 0:data[-4]
movq mm5, mm3 ; mm5 = 0:last_error_0
psubd mm5, mm2 ; mm5 = 0:last_error_1
punpckldq mm3, mm5 ; mm3 = last_error_1:last_error_0
psubd mm2, mm1 ; mm2 = 0:data[-2] - data[-3]
psubd mm5, mm2 ; mm5 = 0:last_error_2
movq mm4, mm5 ; mm4 = 0:last_error_2
psubd mm4, mm2 ; mm4 = 0:last_error_2 - (data[-2] - data[-3])
paddd mm4, mm1 ; mm4 = 0:last_error_2 - (data[-2] - 2 * data[-3])
psubd mm4, mm0 ; mm4 = 0:last_error_3
punpckldq mm4, mm5 ; mm4 = last_error_2:last_error_3
pxor mm0, mm0 ; mm0 = total_error_1:total_error_0
pxor mm1, mm1 ; mm1 = total_error_2:total_error_3
pxor mm2, mm2 ; mm2 = 0:total_error_4
ALIGN 16
.loop:
movd mm7, [ebx] ; mm7 = 0:error_0
add ebx, byte 4
movq mm6, mm7 ; mm6 = 0:error_0
psubd mm7, mm3 ; mm7 = :error_1
punpckldq mm6, mm7 ; mm6 = error_1:error_0
movq mm5, mm6 ; mm5 = error_1:error_0
movq mm7, mm6 ; mm7 = error_1:error_0
psubd mm5, mm3 ; mm5 = error_2:
movq mm3, mm6 ; mm3 = error_1:error_0
psrad mm6, 31
pxor mm7, mm6
psubd mm7, mm6 ; mm7 = abs(error_1):abs(error_0)
paddd mm0, mm7 ; mm0 = total_error_1:total_error_0
movq mm6, mm5 ; mm6 = error_2:
psubd mm5, mm4 ; mm5 = error_3:
punpckhdq mm5, mm6 ; mm5 = error_2:error_3
movq mm7, mm5 ; mm7 = error_2:error_3
movq mm6, mm5 ; mm6 = error_2:error_3
psubd mm5, mm4 ; mm5 = :error_4
movq mm4, mm6 ; mm4 = error_2:error_3
psrad mm6, 31
pxor mm7, mm6
psubd mm7, mm6 ; mm7 = abs(error_2):abs(error_3)
paddd mm1, mm7 ; mm1 = total_error_2:total_error_3
movq mm6, mm5 ; mm6 = :error_4
psrad mm5, 31
pxor mm6, mm5
psubd mm6, mm5 ; mm6 = :abs(error_4)
paddd mm2, mm6 ; mm2 = :total_error_4
dec ecx
jnz short .loop
; if(total_error_0 < min(min(min(total_error_1, total_error_2), total_error_3), total_error_4))
; order = 0;
; else if(total_error_1 < min(min(total_error_2, total_error_3), total_error_4))
; order = 1;
; else if(total_error_2 < min(total_error_3, total_error_4))
; order = 2;
; else if(total_error_3 < total_error_4)
; order = 3;
; else
; order = 4;
movq mm3, mm0 ; mm3 = total_error_1:total_error_0
movd edi, mm2 ; edi = total_error_4
movd esi, mm1 ; esi = total_error_3
movd eax, mm0 ; eax = total_error_0
punpckhdq mm1, mm1 ; mm1 = total_error_2:total_error_2
punpckhdq mm3, mm3 ; mm3 = total_error_1:total_error_1
movd edx, mm1 ; edx = total_error_2
movd ecx, mm3 ; ecx = total_error_1
xor ebx, ebx
xor ebp, ebp
inc ebx
cmp ecx, eax
cmovb eax, ecx ; eax = min(total_error_0, total_error_1)
cmovbe ebp, ebx
inc ebx
cmp edx, eax
cmovb eax, edx ; eax = min(total_error_0, total_error_1, total_error_2)
cmovbe ebp, ebx
inc ebx
cmp esi, eax
cmovb eax, esi ; eax = min(total_error_0, total_error_1, total_error_2, total_error_3)
cmovbe ebp, ebx
inc ebx
cmp edi, eax
cmovb eax, edi ; eax = min(total_error_0, total_error_1, total_error_2, total_error_3, total_error_4)
cmovbe ebp, ebx
movd ebx, mm0 ; ebx = total_error_0
emms
; residual_bits_per_sample[0] = (float)((data_len > 0 && total_error_0 > 0) ? log(M_LN2 * (double)total_error_0 / (double)data_len) / M_LN2 : 0.0);
; residual_bits_per_sample[1] = (float)((data_len > 0 && total_error_1 > 0) ? log(M_LN2 * (double)total_error_1 / (double)data_len) / M_LN2 : 0.0);
; residual_bits_per_sample[2] = (float)((data_len > 0 && total_error_2 > 0) ? log(M_LN2 * (double)total_error_2 / (double)data_len) / M_LN2 : 0.0);
; residual_bits_per_sample[3] = (float)((data_len > 0 && total_error_3 > 0) ? log(M_LN2 * (double)total_error_3 / (double)data_len) / M_LN2 : 0.0);
; residual_bits_per_sample[4] = (float)((data_len > 0 && total_error_4 > 0) ? log(M_LN2 * (double)total_error_4 / (double)data_len) / M_LN2 : 0.0);
xor eax, eax
fild dword [esp + 40] ; ST = data_len (NOTE: assumes data_len is <2gigs)
.rbps_0:
test ebx, ebx
jz .total_error_0_is_0
fld1 ; ST = 1.0 data_len
mov [esp], ebx
mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_error_0
mov ebx, [esp + 44]
fild qword [esp] ; ST = total_error_0 1.0 data_len
fdiv st2 ; ST = total_error_0/data_len 1.0 data_len
fldln2 ; ST = ln2 total_error_0/data_len 1.0 data_len
fmulp st1 ; ST = ln2*total_error_0/data_len 1.0 data_len
fyl2x ; ST = log2(ln2*total_error_0/data_len) data_len
fstp dword [ebx] ; residual_bits_per_sample[0] = log2(ln2*total_error_0/data_len) ST = data_len
jmp short .rbps_1
.total_error_0_is_0:
mov ebx, [esp + 44]
mov [ebx], eax ; residual_bits_per_sample[0] = 0.0
.rbps_1:
test ecx, ecx
jz .total_error_1_is_0
fld1 ; ST = 1.0 data_len
mov [esp], ecx
mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_error_1
fild qword [esp] ; ST = total_error_1 1.0 data_len
fdiv st2 ; ST = total_error_1/data_len 1.0 data_len
fldln2 ; ST = ln2 total_error_1/data_len 1.0 data_len
fmulp st1 ; ST = ln2*total_error_1/data_len 1.0 data_len
fyl2x ; ST = log2(ln2*total_error_1/data_len) data_len
fstp dword [ebx + 4] ; residual_bits_per_sample[1] = log2(ln2*total_error_1/data_len) ST = data_len
jmp short .rbps_2
.total_error_1_is_0:
mov [ebx + 4], eax ; residual_bits_per_sample[1] = 0.0
.rbps_2:
test edx, edx
jz .total_error_2_is_0
fld1 ; ST = 1.0 data_len
mov [esp], edx
mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_error_2
fild qword [esp] ; ST = total_error_2 1.0 data_len
fdiv st2 ; ST = total_error_2/data_len 1.0 data_len
fldln2 ; ST = ln2 total_error_2/data_len 1.0 data_len
fmulp st1 ; ST = ln2*total_error_2/data_len 1.0 data_len
fyl2x ; ST = log2(ln2*total_error_2/data_len) data_len
fstp dword [ebx + 8] ; residual_bits_per_sample[2] = log2(ln2*total_error_2/data_len) ST = data_len
jmp short .rbps_3
.total_error_2_is_0:
mov [ebx + 8], eax ; residual_bits_per_sample[2] = 0.0
.rbps_3:
test esi, esi
jz .total_error_3_is_0
fld1 ; ST = 1.0 data_len
mov [esp], esi
mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_error_3
fild qword [esp] ; ST = total_error_3 1.0 data_len
fdiv st2 ; ST = total_error_3/data_len 1.0 data_len
fldln2 ; ST = ln2 total_error_3/data_len 1.0 data_len
fmulp st1 ; ST = ln2*total_error_3/data_len 1.0 data_len
fyl2x ; ST = log2(ln2*total_error_3/data_len) data_len
fstp dword [ebx + 12] ; residual_bits_per_sample[3] = log2(ln2*total_error_3/data_len) ST = data_len
jmp short .rbps_4
.total_error_3_is_0:
mov [ebx + 12], eax ; residual_bits_per_sample[3] = 0.0
.rbps_4:
test edi, edi
jz .total_error_4_is_0
fld1 ; ST = 1.0 data_len
mov [esp], edi
mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_error_4
fild qword [esp] ; ST = total_error_4 1.0 data_len
fdiv st2 ; ST = total_error_4/data_len 1.0 data_len
fldln2 ; ST = ln2 total_error_4/data_len 1.0 data_len
fmulp st1 ; ST = ln2*total_error_4/data_len 1.0 data_len
fyl2x ; ST = log2(ln2*total_error_4/data_len) data_len
fstp dword [ebx + 16] ; residual_bits_per_sample[4] = log2(ln2*total_error_4/data_len) ST = data_len
jmp short .rbps_end
.total_error_4_is_0:
mov [ebx + 16], eax ; residual_bits_per_sample[4] = 0.0
.rbps_end:
fstp st0 ; ST = [empty]
jmp short .end
.data_len_is_0:
; data_len == 0, so residual_bits_per_sample[*] = 0.0
xor ebp, ebp
mov edi, [esp + 44]
mov [edi], ebp
mov [edi + 4], ebp
mov [edi + 8], ebp
mov [edi + 12], ebp
mov [edi + 16], ebp
add ebp, byte 4 ; order = 4
.end:
mov eax, ebp ; return order
add esp, byte 16
pop edi
pop esi
pop ebx
pop ebp
ret
; end

View File

@ -1,727 +0,0 @@
; vim:filetype=nasm ts=8
; libFLAC - Free Lossless Audio Codec library
; Copyright (C) 2001-2009 Josh Coalson
; Copyright (C) 2011-2022 Xiph.Org Foundation
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
;
; - Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
;
; - Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in the
; documentation and/or other materials provided with the distribution.
;
; - Neither the name of the Xiph.org Foundation nor the names of its
; contributors may be used to endorse or promote products derived from
; this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
%include "nasm.h"
data_section
cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32
cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx
cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32
code_section
;void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
;
; for(i = 0; i < data_len; i++) {
; sum = 0;
; for(j = 0; j < order; j++)
; sum += qlp_coeff[j] * data[i-j-1];
; residual[i] = data[i] - (sum >> lp_quantization);
; }
;
ALIGN 16
cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32
;[esp + 40] residual[]
;[esp + 36] lp_quantization
;[esp + 32] order
;[esp + 28] qlp_coeff[]
;[esp + 24] data_len
;[esp + 20] data[]
;ASSERT(order > 0)
push ebp
push ebx
push esi
push edi
mov esi, [esp + 20] ; esi = data[]
mov edi, [esp + 40] ; edi = residual[]
mov eax, [esp + 32] ; eax = order
mov ebx, [esp + 24] ; ebx = data_len
test ebx, ebx
jz near .end ; do nothing if data_len == 0
.begin:
cmp eax, byte 1
jg short .i_1more
mov ecx, [esp + 28]
mov edx, [ecx] ; edx = qlp_coeff[0]
mov eax, [esi - 4] ; eax = data[-1]
mov ecx, [esp + 36] ; cl = lp_quantization
ALIGN 16
.i_1_loop_i:
imul eax, edx
sar eax, cl
neg eax
add eax, [esi]
mov [edi], eax
mov eax, [esi]
add edi, byte 4
add esi, byte 4
dec ebx
jnz .i_1_loop_i
jmp .end
.i_1more:
cmp eax, byte 32 ; for order <= 32 there is a faster routine
jbe short .i_32
; This version is here just for completeness, since FLAC__MAX_LPC_ORDER == 32
ALIGN 16
.i_32more_loop_i:
xor ebp, ebp
mov ecx, [esp + 32]
mov edx, ecx
shl edx, 2
add edx, [esp + 28]
neg ecx
ALIGN 16
.i_32more_loop_j:
sub edx, byte 4
mov eax, [edx]
imul eax, [esi + 4 * ecx]
add ebp, eax
inc ecx
jnz short .i_32more_loop_j
mov ecx, [esp + 36]
sar ebp, cl
neg ebp
add ebp, [esi]
mov [edi], ebp
add esi, byte 4
add edi, byte 4
dec ebx
jnz .i_32more_loop_i
jmp .end
.mov_eip_to_eax:
mov eax, [esp]
ret
.i_32:
sub edi, esi
neg eax
lea edx, [eax + eax * 8 + .jumper_0 - .get_eip0]
call .mov_eip_to_eax
.get_eip0:
add edx, eax
inc edx
mov eax, [esp + 28] ; eax = qlp_coeff[]
xor ebp, ebp
jmp edx
mov ecx, [eax + 124]
imul ecx, [esi - 128]
add ebp, ecx
mov ecx, [eax + 120]
imul ecx, [esi - 124]
add ebp, ecx
mov ecx, [eax + 116]
imul ecx, [esi - 120]
add ebp, ecx
mov ecx, [eax + 112]
imul ecx, [esi - 116]
add ebp, ecx
mov ecx, [eax + 108]
imul ecx, [esi - 112]
add ebp, ecx
mov ecx, [eax + 104]
imul ecx, [esi - 108]
add ebp, ecx
mov ecx, [eax + 100]
imul ecx, [esi - 104]
add ebp, ecx
mov ecx, [eax + 96]
imul ecx, [esi - 100]
add ebp, ecx
mov ecx, [eax + 92]
imul ecx, [esi - 96]
add ebp, ecx
mov ecx, [eax + 88]
imul ecx, [esi - 92]
add ebp, ecx
mov ecx, [eax + 84]
imul ecx, [esi - 88]
add ebp, ecx
mov ecx, [eax + 80]
imul ecx, [esi - 84]
add ebp, ecx
mov ecx, [eax + 76]
imul ecx, [esi - 80]
add ebp, ecx
mov ecx, [eax + 72]
imul ecx, [esi - 76]
add ebp, ecx
mov ecx, [eax + 68]
imul ecx, [esi - 72]
add ebp, ecx
mov ecx, [eax + 64]
imul ecx, [esi - 68]
add ebp, ecx
mov ecx, [eax + 60]
imul ecx, [esi - 64]
add ebp, ecx
mov ecx, [eax + 56]
imul ecx, [esi - 60]
add ebp, ecx
mov ecx, [eax + 52]
imul ecx, [esi - 56]
add ebp, ecx
mov ecx, [eax + 48]
imul ecx, [esi - 52]
add ebp, ecx
mov ecx, [eax + 44]
imul ecx, [esi - 48]
add ebp, ecx
mov ecx, [eax + 40]
imul ecx, [esi - 44]
add ebp, ecx
mov ecx, [eax + 36]
imul ecx, [esi - 40]
add ebp, ecx
mov ecx, [eax + 32]
imul ecx, [esi - 36]
add ebp, ecx
mov ecx, [eax + 28]
imul ecx, [esi - 32]
add ebp, ecx
mov ecx, [eax + 24]
imul ecx, [esi - 28]
add ebp, ecx
mov ecx, [eax + 20]
imul ecx, [esi - 24]
add ebp, ecx
mov ecx, [eax + 16]
imul ecx, [esi - 20]
add ebp, ecx
mov ecx, [eax + 12]
imul ecx, [esi - 16]
add ebp, ecx
mov ecx, [eax + 8]
imul ecx, [esi - 12]
add ebp, ecx
mov ecx, [eax + 4]
imul ecx, [esi - 8]
add ebp, ecx
mov ecx, [eax] ; there is one byte missing
imul ecx, [esi - 4]
add ebp, ecx
.jumper_0:
mov ecx, [esp + 36]
sar ebp, cl
neg ebp
add ebp, [esi]
mov [edi + esi], ebp
add esi, byte 4
dec ebx
jz short .end
xor ebp, ebp
jmp edx
.end:
pop edi
pop esi
pop ebx
pop ebp
ret
; WATCHOUT: this routine works on 16 bit data which means bits-per-sample for
; the channel and qlp_coeffs must be <= 16. Especially note that this routine
; cannot be used for side-channel coded 16bps channels since the effective bps
; is 17.
ALIGN 16
cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx
;[esp + 40] residual[]
;[esp + 36] lp_quantization
;[esp + 32] order
;[esp + 28] qlp_coeff[]
;[esp + 24] data_len
;[esp + 20] data[]
;ASSERT(order > 0)
push ebp
push ebx
push esi
push edi
mov esi, [esp + 20] ; esi = data[]
mov edi, [esp + 40] ; edi = residual[]
mov eax, [esp + 32] ; eax = order
mov ebx, [esp + 24] ; ebx = data_len
test ebx, ebx
jz near .end ; do nothing if data_len == 0
dec ebx
test ebx, ebx
jz near .last_one
mov edx, [esp + 28] ; edx = qlp_coeff[]
movd mm6, [esp + 36] ; mm6 = 0:lp_quantization
mov ebp, esp
and esp, 0xfffffff8
xor ecx, ecx
.copy_qlp_loop:
push word [edx + 4 * ecx]
inc ecx
cmp ecx, eax
jnz short .copy_qlp_loop
and ecx, 0x3
test ecx, ecx
je short .za_end
sub ecx, byte 4
.za_loop:
push word 0
inc eax
inc ecx
jnz short .za_loop
.za_end:
movq mm5, [esp + 2 * eax - 8]
movd mm4, [esi - 16]
punpckldq mm4, [esi - 12]
movd mm0, [esi - 8]
punpckldq mm0, [esi - 4]
packssdw mm4, mm0
cmp eax, byte 4
jnbe short .mmx_4more
ALIGN 16
.mmx_4_loop_i:
movd mm1, [esi]
movq mm3, mm4
punpckldq mm1, [esi + 4]
psrlq mm4, 16
movq mm0, mm1
psllq mm0, 48
por mm4, mm0
movq mm2, mm4
psrlq mm4, 16
pxor mm0, mm0
punpckhdq mm0, mm1
pmaddwd mm3, mm5
pmaddwd mm2, mm5
psllq mm0, 16
por mm4, mm0
movq mm0, mm3
punpckldq mm3, mm2
punpckhdq mm0, mm2
paddd mm3, mm0
psrad mm3, mm6
psubd mm1, mm3
movd [edi], mm1
punpckhdq mm1, mm1
movd [edi + 4], mm1
add edi, byte 8
add esi, byte 8
sub ebx, 2
jg .mmx_4_loop_i
jmp .mmx_end
.mmx_4more:
shl eax, 2
neg eax
add eax, byte 16
ALIGN 16
.mmx_4more_loop_i:
movd mm1, [esi]
punpckldq mm1, [esi + 4]
movq mm3, mm4
psrlq mm4, 16
movq mm0, mm1
psllq mm0, 48
por mm4, mm0
movq mm2, mm4
psrlq mm4, 16
pxor mm0, mm0
punpckhdq mm0, mm1
pmaddwd mm3, mm5
pmaddwd mm2, mm5
psllq mm0, 16
por mm4, mm0
mov ecx, esi
add ecx, eax
mov edx, esp
ALIGN 16
.mmx_4more_loop_j:
movd mm0, [ecx - 16]
movd mm7, [ecx - 8]
punpckldq mm0, [ecx - 12]
punpckldq mm7, [ecx - 4]
packssdw mm0, mm7
pmaddwd mm0, [edx]
punpckhdq mm7, mm7
paddd mm3, mm0
movd mm0, [ecx - 12]
punpckldq mm0, [ecx - 8]
punpckldq mm7, [ecx]
packssdw mm0, mm7
pmaddwd mm0, [edx]
paddd mm2, mm0
add edx, byte 8
add ecx, byte 16
cmp ecx, esi
jnz .mmx_4more_loop_j
movq mm0, mm3
punpckldq mm3, mm2
punpckhdq mm0, mm2
paddd mm3, mm0
psrad mm3, mm6
psubd mm1, mm3
movd [edi], mm1
punpckhdq mm1, mm1
movd [edi + 4], mm1
add edi, byte 8
add esi, byte 8
sub ebx, 2
jg near .mmx_4more_loop_i
.mmx_end:
emms
mov esp, ebp
.last_one:
mov eax, [esp + 32]
inc ebx
jnz near FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32.begin
.end:
pop edi
pop esi
pop ebx
pop ebp
ret
; **********************************************************************
;
;void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
; {
; unsigned i, j;
; FLAC__int64 sum;
;
; FLAC__ASSERT(order > 0);
;
; for(i = 0; i < data_len; i++) {
; sum = 0;
; for(j = 0; j < order; j++)
; sum += qlp_coeff[j] * (FLAC__int64)data[i-j-1];
; residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
; }
; }
ALIGN 16
cident FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32
;[esp + 40] residual[]
;[esp + 36] lp_quantization
;[esp + 32] order
;[esp + 28] qlp_coeff[]
;[esp + 24] data_len
;[esp + 20] data[]
;ASSERT(order > 0)
;ASSERT(order <= 32)
;ASSERT(lp_quantization <= 31)
push ebp
push ebx
push esi
push edi
mov ebx, [esp + 24] ; ebx = data_len
test ebx, ebx
jz near .end ; do nothing if data_len == 0
.begin:
mov eax, [esp + 32] ; eax = order
cmp eax, 1
jg short .i_32
mov esi, [esp + 40] ; esi = residual[]
mov edi, [esp + 20] ; edi = data[]
mov ecx, [esp + 28] ; ecx = qlp_coeff[]
mov ebp, [ecx] ; ebp = qlp_coeff[0]
mov eax, [edi - 4] ; eax = data[-1]
mov ecx, [esp + 36] ; cl = lp_quantization
ALIGN 16
.i_1_loop_i:
imul ebp ; edx:eax = qlp_coeff[0] * (FLAC__int64)data[i-1]
shrd eax, edx, cl ; 0 <= lp_quantization <= 15
neg eax
add eax, [edi]
mov [esi], eax
mov eax, [edi]
add esi, 4
add edi, 4
dec ebx
jnz .i_1_loop_i
jmp .end
.mov_eip_to_eax:
mov eax, [esp]
ret
.i_32: ; eax = order
neg eax
add eax, eax
lea ebp, [eax + eax * 4 + .jumper_0 - .get_eip0]
call .mov_eip_to_eax
.get_eip0:
add ebp, eax
inc ebp ; compensate for the shorter opcode on the last iteration
mov ebx, [esp + 28] ; ebx = qlp_coeff[]
mov edi, [esp + 20] ; edi = data[]
sub [esp + 40], edi ; residual[] -= data[]
xor ecx, ecx
xor esi, esi
jmp ebp
;eax = --
;edx = --
;ecx = 0
;esi = 0
;
;ebx = qlp_coeff[]
;edi = data[]
;ebp = @address
mov eax, [ebx + 124] ; eax = qlp_coeff[31]
imul dword [edi - 128] ; edx:eax = qlp_coeff[31] * data[i-32]
add ecx, eax
adc esi, edx ; sum += qlp_coeff[31] * data[i-32]
mov eax, [ebx + 120] ; eax = qlp_coeff[30]
imul dword [edi - 124] ; edx:eax = qlp_coeff[30] * data[i-31]
add ecx, eax
adc esi, edx ; sum += qlp_coeff[30] * data[i-31]
mov eax, [ebx + 116]
imul dword [edi - 120]
add ecx, eax
adc esi, edx
mov eax, [ebx + 112]
imul dword [edi - 116]
add ecx, eax
adc esi, edx
mov eax, [ebx + 108]
imul dword [edi - 112]
add ecx, eax
adc esi, edx
mov eax, [ebx + 104]
imul dword [edi - 108]
add ecx, eax
adc esi, edx
mov eax, [ebx + 100]
imul dword [edi - 104]
add ecx, eax
adc esi, edx
mov eax, [ebx + 96]
imul dword [edi - 100]
add ecx, eax
adc esi, edx
mov eax, [ebx + 92]
imul dword [edi - 96]
add ecx, eax
adc esi, edx
mov eax, [ebx + 88]
imul dword [edi - 92]
add ecx, eax
adc esi, edx
mov eax, [ebx + 84]
imul dword [edi - 88]
add ecx, eax
adc esi, edx
mov eax, [ebx + 80]
imul dword [edi - 84]
add ecx, eax
adc esi, edx
mov eax, [ebx + 76]
imul dword [edi - 80]
add ecx, eax
adc esi, edx
mov eax, [ebx + 72]
imul dword [edi - 76]
add ecx, eax
adc esi, edx
mov eax, [ebx + 68]
imul dword [edi - 72]
add ecx, eax
adc esi, edx
mov eax, [ebx + 64]
imul dword [edi - 68]
add ecx, eax
adc esi, edx
mov eax, [ebx + 60]
imul dword [edi - 64]
add ecx, eax
adc esi, edx
mov eax, [ebx + 56]
imul dword [edi - 60]
add ecx, eax
adc esi, edx
mov eax, [ebx + 52]
imul dword [edi - 56]
add ecx, eax
adc esi, edx
mov eax, [ebx + 48]
imul dword [edi - 52]
add ecx, eax
adc esi, edx
mov eax, [ebx + 44]
imul dword [edi - 48]
add ecx, eax
adc esi, edx
mov eax, [ebx + 40]
imul dword [edi - 44]
add ecx, eax
adc esi, edx
mov eax, [ebx + 36]
imul dword [edi - 40]
add ecx, eax
adc esi, edx
mov eax, [ebx + 32]
imul dword [edi - 36]
add ecx, eax
adc esi, edx
mov eax, [ebx + 28]
imul dword [edi - 32]
add ecx, eax
adc esi, edx
mov eax, [ebx + 24]
imul dword [edi - 28]
add ecx, eax
adc esi, edx
mov eax, [ebx + 20]
imul dword [edi - 24]
add ecx, eax
adc esi, edx
mov eax, [ebx + 16]
imul dword [edi - 20]
add ecx, eax
adc esi, edx
mov eax, [ebx + 12]
imul dword [edi - 16]
add ecx, eax
adc esi, edx
mov eax, [ebx + 8]
imul dword [edi - 12]
add ecx, eax
adc esi, edx
mov eax, [ebx + 4]
imul dword [edi - 8]
add ecx, eax
adc esi, edx
mov eax, [ebx] ; eax = qlp_coeff[ 0] (NOTE: one byte missing from instruction)
imul dword [edi - 4] ; edx:eax = qlp_coeff[ 0] * data[i- 1]
add ecx, eax
adc esi, edx ; sum += qlp_coeff[ 0] * data[i- 1]
.jumper_0:
mov edx, ecx
;esi:edx = sum
mov ecx, [esp + 36] ; cl = lp_quantization
shrd edx, esi, cl ; edx = (sum >> lp_quantization)
;eax = --
;ecx = --
;edx = sum >> lp_q
;esi = --
neg edx ; edx = -(sum >> lp_quantization)
mov eax, [esp + 40] ; residual[] - data[]
add edx, [edi] ; edx = data[i] - (sum >> lp_quantization)
mov [edi + eax], edx
add edi, 4
dec dword [esp + 24]
jz short .end
xor ecx, ecx
xor esi, esi
jmp ebp
.end:
pop edi
pop esi
pop ebx
pop ebp
ret
; end

View File

@ -1,95 +0,0 @@
; libFLAC - Free Lossless Audio Codec library
; Copyright (C) 2001-2009 Josh Coalson
; Copyright (C) 2011-2022 Xiph.Org Foundation
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
;
; - Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
;
; - Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in the
; documentation and/or other materials provided with the distribution.
;
; - Neither the name of the Xiph.org Foundation nor the names of its
; contributors may be used to endorse or promote products derived from
; this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
bits 32
%ifdef OBJ_FORMAT_win32
%define FLAC__PUBLIC_NEEDS_UNDERSCORE
%idefine code_section section .text align=16 class=CODE use32
%idefine data_section section .data align=32 class=DATA use32
%idefine bss_section section .bss align=32 class=DATA use32
%elifdef OBJ_FORMAT_aout
%define FLAC__PUBLIC_NEEDS_UNDERSCORE
%idefine code_section section .text
%idefine data_section section .data
%idefine bss_section section .bss
%elifdef OBJ_FORMAT_aoutb
%define FLAC__PUBLIC_NEEDS_UNDERSCORE
%idefine code_section section .text
%idefine data_section section .data
%idefine bss_section section .bss
%elifdef OBJ_FORMAT_coff
%define FLAC__PUBLIC_NEEDS_UNDERSCORE
%idefine code_section section .text
%idefine data_section section .data
%idefine bss_section section .bss
%elifdef OBJ_FORMAT_macho
%define FLAC__PUBLIC_NEEDS_UNDERSCORE
%idefine code_section section .text
%idefine data_section section .data
%idefine bss_section section .bss
%elifdef OBJ_FORMAT_elf
%idefine code_section section .text align=16
%idefine data_section section .data align=32
%idefine bss_section section .bss align=32
%else
%error unsupported object format! ; this directive doesn't really work here
%endif
%imacro cglobal 1
%ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
global _%1
%else
%if __NASM_MAJOR__ >= 2
global %1:function hidden
%else
global %1
%endif
%endif
%endmacro
%imacro cextern 1
%ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
extern _%1
%else
extern %1
%endif
%endmacro
%imacro cident 1
_%1:
%1:
%endmacro
%ifdef OBJ_FORMAT_elf
section .note.GNU-stack progbits noalloc noexec nowrite align=1
%endif

View File

@ -69,9 +69,6 @@ uint32_t FLAC__fixed_compute_best_predictor_intrin_ssse3(const FLAC__int32 data[
uint32_t FLAC__fixed_compute_best_predictor_wide_intrin_ssse3(const FLAC__int32 data[], uint32_t data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
# endif
# endif
# if defined FLAC__CPU_IA32 && defined FLAC__HAS_NASM
uint32_t FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov(const FLAC__int32 data[], uint32_t data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
# endif
# endif
#else
uint32_t FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], uint32_t data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);

View File

@ -174,13 +174,6 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_neon(const FLAC__in
void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_neon(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]);
# endif
# ifdef FLAC__CPU_IA32
# ifdef FLAC__HAS_NASM
void FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]);
void FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]);
void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]);
# endif
# endif
# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN
# ifdef FLAC__SSE2_SUPPORTED
void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]);

View File

@ -949,20 +949,6 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
if(encoder->private_->cpuinfo.use_asm) {
# ifdef FLAC__CPU_IA32
FLAC__ASSERT(encoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_IA32);
# ifdef FLAC__HAS_NASM
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32; /* OPT_IA32: was really necessary for GCC < 4.9 */
if (encoder->private_->cpuinfo.x86.mmx) {
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32;
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx;
}
else {
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32;
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32;
}
if (encoder->private_->cpuinfo.x86.mmx && encoder->private_->cpuinfo.x86.cmov)
encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov;
# endif /* FLAC__HAS_NASM */
# if FLAC__HAS_X86INTRIN
# ifdef FLAC__SSE2_SUPPORTED
if (encoder->private_->cpuinfo.x86.sse2) {

View File

@ -1,19 +0,0 @@
#!/bin/sh
#
# libtool assumes that the compiler can handle the -fPIC flag.
# This isn't always true (for example, nasm can't handle it).
# Also, on some versions of OS X it tries to pass -fno-common
# to 'as' which causes problems.
command=""
while [ $1 ]; do
if [ "$1" != "-fPIC" ]; then
if [ "$1" != "-DPIC" ]; then
if [ "$1" != "-fno-common" ]; then
command="$command $1"
fi
fi
fi
shift
done
echo $command
exec $command