diff --git a/src/libFLAC/Makefile.am b/src/libFLAC/Makefile.am index 920ddf26..1eb99422 100644 --- a/src/libFLAC/Makefile.am +++ b/src/libFLAC/Makefile.am @@ -42,6 +42,10 @@ ARCH_SUBDIRS = ia32 libFLAC_la_LIBADD = ia32/libFLAC-asm.la endif endif +if FLaC__CPU_PPC +ARCH_SUBDIRS = ppc +libFLAC_la_LIBADD = ppc/libFLAC-asm.la +endif endif SUBDIRS = $(ARCH_SUBDIRS) include . diff --git a/src/libFLAC/ppc/Makefile.am b/src/libFLAC/ppc/Makefile.am new file mode 100644 index 00000000..cd39964f --- /dev/null +++ b/src/libFLAC/ppc/Makefile.am @@ -0,0 +1,38 @@ +# libFLAC - Free Lossless Audio Codec library +# Copyright (C) 2004 Josh Coalson +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# - Neither the name of the Xiph.org Foundation nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +SUFFIXES = .s .lo + +.s.lo: + $(LIBTOOL) --mode=compile as -force_cpusubtype_ALL -o $@ $< + +noinst_LTLIBRARIES = libFLAC-asm.la +libFLAC_asm_la_SOURCES = \ + lpc_asm.s diff --git a/src/libFLAC/ppc/lpc_asm.s b/src/libFLAC/ppc/lpc_asm.s new file mode 100644 index 00000000..fb8af30e --- /dev/null +++ b/src/libFLAC/ppc/lpc_asm.s @@ -0,0 +1,428 @@ +; libFLAC - Free Lossless Audio Codec library +; Copyright (C) 2004 Josh Coalson +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; +; - Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; +; - Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in the +; documentation and/or other materials provided with the distribution. +; +; - Neither the name of the Xiph.org Foundation nor the names of its +; contributors may be used to endorse or promote products derived from +; this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR +; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +.text + .align 2 +.globl _FLAC__lpc_restore_signal_asm_ppc_altivec_16 +.globl _FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8 + +_FLAC__lpc_restore_signal_asm_ppc_altivec_16: +; r3: residual[] +; r4: data_len +; r5: qlp_coeff[] +; r6: order +; r7: lp_quantization +; r8: data[] + +; see src/libFLAC/lpc.c:FLAC__lpc_restore_signal() +; these is a PowerPC/Altivec assembly version which requires bps<=16 (or actual +; bps<=15 for mid-side coding, since that uses an extra bit) + +; these should be fast; the inner loop is unrolled (it takes no more than +; 3*(order%4) instructions, all of which are arithmetic), and all of the +; coefficients and all relevant history stay in registers, so the outer loop +; has only one load from memory (the residual) + +; I haven't yet run this through simg4, so there may be some avoidable stalls, +; and there may be a somewhat more clever way to do the outer loop + +; the branch mechanism may prevent dynamic loading; I still need to examine +; this issue, and there may be a more elegant method + + stmw r31,-4(r1) + + addi r9,r1,-28 + li r31,0xf + andc r9,r9,r31 ; for quadword-aligned stack data + + slwi r6,r6,2 ; adjust for word size + slwi r4,r4,2 + add r4,r4,r8 ; r4 = data+data_len + + mfspr r0,256 ; cache old vrsave + addis r31,0,hi16(0xfffffc00) + ori r31,r31,lo16(0xfffffc00) + mtspr 256,r31 ; declare VRs in vrsave + + cmplw cr0,r8,r4 ; i> lp_quantization + + lvewx v21,0,r3 ; v21[n]: *residual + vperm v21,v21,v21,v18 ; v21[3]: *residual + vaddsws v20,v21,v20 ; v20[3]: *residual + (sum >> lp_quantization) + vsldoi v18,v18,v18,4 ; increment shift vector + + vperm v21,v20,v20,v17 ; v21[n]: shift for storage + vsldoi v17,v17,v17,12 ; increment shift vector + stvewx v21,0,r8 + + vsldoi v20,v20,v20,12 + vsldoi v8,v8,v20,4 ; insert value onto history + + addi r3,r3,4 + addi r8,r8,4 + cmplw cr0,r8,r4 ; i> lp_quantization + + lvewx v9,0,r3 ; v9[n]: *residual + vperm v9,v9,v9,v6 ; v9[3]: *residual + vaddsws v8,v9,v8 ; v8[3]: *residual + (sum >> lp_quantization) + vsldoi v6,v6,v6,4 ; increment shift vector + + vperm v9,v8,v8,v5 ; v9[n]: shift for storage + vsldoi v5,v5,v5,12 ; increment shift vector + stvewx v9,0,r8 + + vsldoi v8,v8,v8,12 + vsldoi v2,v2,v8,4 ; insert value onto history + + addi r3,r3,4 + addi r8,r8,4 + cmplw cr0,r8,r4 ; i