mirror of https://github.com/xiph/flac
split PPC asm code into two directores, one holding asm compatible with OS X native 'as', and one for 'gas'
This commit is contained in:
parent
5f2b46d031
commit
47faab80d7
|
@ -44,7 +44,8 @@ endif
|
|||
endif
|
||||
if FLaC__CPU_PPC
|
||||
ARCH_SUBDIRS = ppc
|
||||
libFLAC_la_LIBADD = ppc/libFLAC-asm.la
|
||||
#@@@@@@ need to conditionalize this to gas or as depending on configure
|
||||
libFLAC_la_LIBADD = ppc/as/libFLAC-asm.la
|
||||
LOCAL_EXTRA_LDFLAGS = "-Wl,-read_only_relocs,warning"
|
||||
endif
|
||||
endif
|
||||
|
|
|
@ -49,7 +49,7 @@ DEBUG_CFLAGS = -DFLAC__OVERFLOW_DETECT
|
|||
|
||||
ifeq ($(DARWIN_BUILD),yes)
|
||||
SRCS_S = \
|
||||
ppc/lpc_asm.s
|
||||
ppc/as/lpc_asm.s
|
||||
else
|
||||
ifeq ($(SOLARIS_BUILD),yes)
|
||||
else
|
||||
|
|
|
@ -28,15 +28,6 @@
|
|||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
SUFFIXES = .s .lo
|
||||
|
||||
STRIP_NON_ASM = sh $(top_srcdir)/strip_non_asm_libtool_args.sh
|
||||
|
||||
# For some unknown reason libtool can't figure out the tag for 'as', so
|
||||
# we fake it with --tag=CC and strip out unwanted options.
|
||||
.s.lo:
|
||||
$(LIBTOOL) --tag=CC --mode=compile $(STRIP_NON_ASM) as -force_cpusubtype_ALL -o $@ $<
|
||||
|
||||
noinst_LTLIBRARIES = libFLAC-asm.la
|
||||
libFLAC_asm_la_SOURCES = \
|
||||
lpc_asm.s
|
||||
#@@@@@@ We actually want to only compile one based on some configuration,
|
||||
#@@@@@@ so we'll eventually have to conditionalize the sub-Makefile.am's
|
||||
SUBDIRS = as gas
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
# libFLAC - Free Lossless Audio Codec library
|
||||
# Copyright (C) 2004 Josh Coalson
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# - Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
#
|
||||
# - Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# - Neither the name of the Xiph.org Foundation nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
SUFFIXES = .s .lo
|
||||
|
||||
STRIP_NON_ASM = sh $(top_srcdir)/strip_non_asm_libtool_args.sh
|
||||
|
||||
# For some unknown reason libtool can't figure out the tag for 'as', so
|
||||
# we fake it with --tag=CC and strip out unwanted options.
|
||||
.s.lo:
|
||||
$(LIBTOOL) --tag=CC --mode=compile $(STRIP_NON_ASM) as -force_cpusubtype_ALL -o $@ $<
|
||||
|
||||
noinst_LTLIBRARIES = libFLAC-asm.la
|
||||
libFLAC_asm_la_SOURCES = \
|
||||
lpc_asm.s
|
|
@ -0,0 +1,42 @@
|
|||
# libFLAC - Free Lossless Audio Codec library
|
||||
# Copyright (C) 2004 Josh Coalson
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# - Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
#
|
||||
# - Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# - Neither the name of the Xiph.org Foundation nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
SUFFIXES = .s .lo
|
||||
|
||||
STRIP_NON_ASM = sh $(top_srcdir)/strip_non_asm_libtool_args.sh
|
||||
|
||||
# For some unknown reason libtool can't figure out the tag for 'gas', so
|
||||
# we fake it with --tag=CC and strip out unwanted options.
|
||||
.s.lo:
|
||||
$(LIBTOOL) --tag=CC --mode=compile $(STRIP_NON_ASM) gas -force_cpusubtype_ALL -o $@ $<
|
||||
|
||||
noinst_LTLIBRARIES = libFLAC-asm.la
|
||||
libFLAC_asm_la_SOURCES = \
|
||||
lpc_asm.s
|
|
@ -0,0 +1,431 @@
|
|||
# libFLAC - Free Lossless Audio Codec library
|
||||
# Copyright (C) 2004 Josh Coalson
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# - Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
#
|
||||
# - Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# - Neither the name of the Xiph.org Foundation nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
.text
|
||||
.align 2
|
||||
.globl _FLAC__lpc_restore_signal_asm_ppc_altivec_16
|
||||
.type _FLAC__lpc_restore_signal_asm_ppc_altivec_16, @function
|
||||
|
||||
.globl _FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8
|
||||
.type _FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8, @function
|
||||
|
||||
_FLAC__lpc_restore_signal_asm_ppc_altivec_16:
|
||||
# r3: residual[]
|
||||
# r4: data_len
|
||||
# r5: qlp_coeff[]
|
||||
# r6: order
|
||||
# r7: lp_quantization
|
||||
# r8: data[]
|
||||
|
||||
# see src/libFLAC/lpc.c:FLAC__lpc_restore_signal()
|
||||
# these is a PowerPC/Altivec assembly version which requires bps<=16 (or actual
|
||||
# bps<=15 for mid-side coding, since that uses an extra bit)
|
||||
|
||||
# these should be fast; the inner loop is unrolled (it takes no more than
|
||||
# 3*(order%4) instructions, all of which are arithmetic), and all of the
|
||||
# coefficients and all relevant history stay in registers, so the outer loop
|
||||
# has only one load from memory (the residual)
|
||||
|
||||
# I have not yet run this through simg4, so there may be some avoidable stalls,
|
||||
# and there may be a somewhat more clever way to do the outer loop
|
||||
|
||||
# the branch mechanism may prevent dynamic loading; I still need to examine
|
||||
# this issue, and there may be a more elegant method
|
||||
|
||||
stmw r31,-4(r1)
|
||||
|
||||
addi r9,r1,-28
|
||||
li r31,0xf
|
||||
andc r9,r9,r31 # for quadword-aligned stack data
|
||||
|
||||
slwi r6,r6,2 # adjust for word size
|
||||
slwi r4,r4,2
|
||||
add r4,r4,r8 # r4 = data+data_len
|
||||
|
||||
mfspr r0,256 # cache old vrsave
|
||||
addis r31,0,0xffff
|
||||
ori r31,r31,0xfc00
|
||||
mtspr 256,r31 # declare VRs in vrsave
|
||||
|
||||
cmplw cr0,r8,r4 # i<data_len
|
||||
bc 4,0,L1400
|
||||
|
||||
# load coefficients into v0-v7 and initial history into v8-v15
|
||||
li r31,0xf
|
||||
and r31,r8,r31 # r31: data%4
|
||||
li r11,16
|
||||
subf r31,r31,r11 # r31: 4-(data%4)
|
||||
slwi r31,r31,3 # convert to bits for vsro
|
||||
li r10,-4
|
||||
stw r31,-4(r9)
|
||||
lvewx v0,r10,r9
|
||||
vspltisb v18,-1
|
||||
vsro v18,v18,v0 # v18: mask vector
|
||||
|
||||
li r31,0x8
|
||||
lvsl v0,0,r31
|
||||
vsldoi v0,v0,v0,12
|
||||
li r31,0xc
|
||||
lvsl v1,0,r31
|
||||
vspltisb v2,0
|
||||
vspltisb v3,-1
|
||||
vmrglw v2,v2,v3
|
||||
vsel v0,v1,v0,v2 # v0: reversal permutation vector
|
||||
|
||||
add r10,r5,r6
|
||||
lvsl v17,0,r5 # v17: coefficient alignment permutation vector
|
||||
vperm v17,v17,v17,v0 # v17: reversal coefficient alignment permutation vector
|
||||
|
||||
mr r11,r8
|
||||
lvsl v16,0,r11 # v16: history alignment permutation vector
|
||||
|
||||
lvx v0,0,r5
|
||||
addi r5,r5,16
|
||||
lvx v1,0,r5
|
||||
vperm v0,v0,v1,v17
|
||||
lvx v8,0,r11
|
||||
addi r11,r11,-16
|
||||
lvx v9,0,r11
|
||||
vperm v8,v9,v8,v16
|
||||
cmplw cr0,r5,r10
|
||||
bc 12,0,L1101
|
||||
vand v0,v0,v18
|
||||
addis r31,0,L1307@ha
|
||||
ori r31,r31,L1307@l
|
||||
b L1199
|
||||
|
||||
L1101:
|
||||
addi r5,r5,16
|
||||
lvx v2,0,r5
|
||||
vperm v1,v1,v2,v17
|
||||
addi r11,r11,-16
|
||||
lvx v10,0,r11
|
||||
vperm v9,v10,v9,v16
|
||||
cmplw cr0,r5,r10
|
||||
bc 12,0,L1102
|
||||
vand v1,v1,v18
|
||||
addis r31,0,L1306@ha
|
||||
ori r31,r31,L1306@l
|
||||
b L1199
|
||||
|
||||
L1102:
|
||||
addi r5,r5,16
|
||||
lvx v3,0,r5
|
||||
vperm v2,v2,v3,v17
|
||||
addi r11,r11,-16
|
||||
lvx v11,0,r11
|
||||
vperm v10,v11,v10,v16
|
||||
cmplw cr0,r5,r10
|
||||
bc 12,0,L1103
|
||||
vand v2,v2,v18
|
||||
lis r31,L1305@ha
|
||||
la r31,L1305@l(r31)
|
||||
b L1199
|
||||
|
||||
L1103:
|
||||
addi r5,r5,16
|
||||
lvx v4,0,r5
|
||||
vperm v3,v3,v4,v17
|
||||
addi r11,r11,-16
|
||||
lvx v12,0,r11
|
||||
vperm v11,v12,v11,v16
|
||||
cmplw cr0,r5,r10
|
||||
bc 12,0,L1104
|
||||
vand v3,v3,v18
|
||||
lis r31,L1304@ha
|
||||
la r31,L1304@l(r31)
|
||||
b L1199
|
||||
|
||||
L1104:
|
||||
addi r5,r5,16
|
||||
lvx v5,0,r5
|
||||
vperm v4,v4,v5,v17
|
||||
addi r11,r11,-16
|
||||
lvx v13,0,r11
|
||||
vperm v12,v13,v12,v16
|
||||
cmplw cr0,r5,r10
|
||||
bc 12,0,L1105
|
||||
vand v4,v4,v18
|
||||
lis r31,L1303@ha
|
||||
la r31,L1303@l(r31)
|
||||
b L1199
|
||||
|
||||
L1105:
|
||||
addi r5,r5,16
|
||||
lvx v6,0,r5
|
||||
vperm v5,v5,v6,v17
|
||||
addi r11,r11,-16
|
||||
lvx v14,0,r11
|
||||
vperm v13,v14,v13,v16
|
||||
cmplw cr0,r5,r10
|
||||
bc 12,0,L1106
|
||||
vand v5,v5,v18
|
||||
lis r31,L1302@ha
|
||||
la r31,L1302@l(r31)
|
||||
b L1199
|
||||
|
||||
L1106:
|
||||
addi r5,r5,16
|
||||
lvx v7,0,r5
|
||||
vperm v6,v6,v7,v17
|
||||
addi r11,r11,-16
|
||||
lvx v15,0,r11
|
||||
vperm v14,v15,v14,v16
|
||||
cmplw cr0,r5,r10
|
||||
bc 12,0,L1107
|
||||
vand v6,v6,v18
|
||||
lis r31,L1301@ha
|
||||
la r31,L1301@l(r31)
|
||||
b L1199
|
||||
|
||||
L1107:
|
||||
addi r5,r5,16
|
||||
lvx v19,0,r5
|
||||
vperm v7,v7,v19,v17
|
||||
addi r11,r11,-16
|
||||
lvx v19,0,r11
|
||||
vperm v15,v19,v15,v16
|
||||
vand v7,v7,v18
|
||||
lis r31,L1300@ha
|
||||
la r31,L1300@l(r31)
|
||||
|
||||
L1199:
|
||||
mtctr r31
|
||||
|
||||
# set up invariant vectors
|
||||
vspltish v16,0 # v16: zero vector
|
||||
|
||||
li r10,-12
|
||||
lvsr v17,r10,r8 # v17: result shift vector
|
||||
lvsl v18,r10,r3 # v18: residual shift back vector
|
||||
|
||||
li r10,-4
|
||||
stw r7,-4(r9)
|
||||
lvewx v19,r10,r9 # v19: lp_quantization vector
|
||||
|
||||
L1200:
|
||||
vmulosh v20,v0,v8 # v20: sum vector
|
||||
bcctr 20,0
|
||||
|
||||
L1300:
|
||||
vmulosh v21,v7,v15
|
||||
vsldoi v15,v15,v14,4 # increment history
|
||||
vaddsws v20,v20,v21
|
||||
|
||||
L1301:
|
||||
vmulosh v21,v6,v14
|
||||
vsldoi v14,v14,v13,4
|
||||
vaddsws v20,v20,v21
|
||||
|
||||
L1302:
|
||||
vmulosh v21,v5,v13
|
||||
vsldoi v13,v13,v12,4
|
||||
vaddsws v20,v20,v21
|
||||
|
||||
L1303:
|
||||
vmulosh v21,v4,v12
|
||||
vsldoi v12,v12,v11,4
|
||||
vaddsws v20,v20,v21
|
||||
|
||||
L1304:
|
||||
vmulosh v21,v3,v11
|
||||
vsldoi v11,v11,v10,4
|
||||
vaddsws v20,v20,v21
|
||||
|
||||
L1305:
|
||||
vmulosh v21,v2,v10
|
||||
vsldoi v10,v10,v9,4
|
||||
vaddsws v20,v20,v21
|
||||
|
||||
L1306:
|
||||
vmulosh v21,v1,v9
|
||||
vsldoi v9,v9,v8,4
|
||||
vaddsws v20,v20,v21
|
||||
|
||||
L1307:
|
||||
vsumsws v20,v20,v16 # v20[3]: sum
|
||||
vsraw v20,v20,v19 # v20[3]: sum >> lp_quantization
|
||||
|
||||
lvewx v21,0,r3 # v21[n]: *residual
|
||||
vperm v21,v21,v21,v18 # v21[3]: *residual
|
||||
vaddsws v20,v21,v20 # v20[3]: *residual + (sum >> lp_quantization)
|
||||
vsldoi v18,v18,v18,4 # increment shift vector
|
||||
|
||||
vperm v21,v20,v20,v17 # v21[n]: shift for storage
|
||||
vsldoi v17,v17,v17,12 # increment shift vector
|
||||
stvewx v21,0,r8
|
||||
|
||||
vsldoi v20,v20,v20,12
|
||||
vsldoi v8,v8,v20,4 # insert value onto history
|
||||
|
||||
addi r3,r3,4
|
||||
addi r8,r8,4
|
||||
cmplw cr0,r8,r4 # i<data_len
|
||||
bc 12,0,L1200
|
||||
|
||||
L1400:
|
||||
mtspr 256,r0 # restore old vrsave
|
||||
lmw r31,-4(r1)
|
||||
blr
|
||||
|
||||
_FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8:
|
||||
# r3: residual[]
|
||||
# r4: data_len
|
||||
# r5: qlp_coeff[]
|
||||
# r6: order
|
||||
# r7: lp_quantization
|
||||
# r8: data[]
|
||||
|
||||
# see _FLAC__lpc_restore_signal_asm_ppc_altivec_16() above
|
||||
# this version assumes order<=8; it uses fewer vector registers, which should
|
||||
# save time in context switches, and has less code, which may improve
|
||||
# instruction caching
|
||||
|
||||
stmw r31,-4(r1)
|
||||
|
||||
addi r9,r1,-28
|
||||
li r31,0xf
|
||||
andc r9,r9,r31 # for quadword-aligned stack data
|
||||
|
||||
slwi r6,r6,2 # adjust for word size
|
||||
slwi r4,r4,2
|
||||
add r4,r4,r8 # r4 = data+data_len
|
||||
|
||||
mfspr r0,256 # cache old vrsave
|
||||
addis r31,0,0xffc0
|
||||
ori r31,r31,0x0000
|
||||
mtspr 256,r31 # declare VRs in vrsave
|
||||
|
||||
cmplw cr0,r8,r4 # i<data_len
|
||||
bc 4,0,L2400
|
||||
|
||||
# load coefficients into v0-v1 and initial history into v2-v3
|
||||
li r31,0xf
|
||||
and r31,r8,r31 # r31: data%4
|
||||
li r11,16
|
||||
subf r31,r31,r11 # r31: 4-(data%4)
|
||||
slwi r31,r31,3 # convert to bits for vsro
|
||||
li r10,-4
|
||||
stw r31,-4(r9)
|
||||
lvewx v0,r10,r9
|
||||
vspltisb v6,-1
|
||||
vsro v6,v6,v0 # v6: mask vector
|
||||
|
||||
li r31,0x8
|
||||
lvsl v0,0,r31
|
||||
vsldoi v0,v0,v0,12
|
||||
li r31,0xc
|
||||
lvsl v1,0,r31
|
||||
vspltisb v2,0
|
||||
vspltisb v3,-1
|
||||
vmrglw v2,v2,v3
|
||||
vsel v0,v1,v0,v2 # v0: reversal permutation vector
|
||||
|
||||
add r10,r5,r6
|
||||
lvsl v5,0,r5 # v5: coefficient alignment permutation vector
|
||||
vperm v5,v5,v5,v0 # v5: reversal coefficient alignment permutation vector
|
||||
|
||||
mr r11,r8
|
||||
lvsl v4,0,r11 # v4: history alignment permutation vector
|
||||
|
||||
lvx v0,0,r5
|
||||
addi r5,r5,16
|
||||
lvx v1,0,r5
|
||||
vperm v0,v0,v1,v5
|
||||
lvx v2,0,r11
|
||||
addi r11,r11,-16
|
||||
lvx v3,0,r11
|
||||
vperm v2,v3,v2,v4
|
||||
cmplw cr0,r5,r10
|
||||
bc 12,0,L2101
|
||||
vand v0,v0,v6
|
||||
lis r31,L2301@ha
|
||||
la r31,L2301@l(r31)
|
||||
b L2199
|
||||
|
||||
L2101:
|
||||
addi r5,r5,16
|
||||
lvx v7,0,r5
|
||||
vperm v1,v1,v7,v5
|
||||
addi r11,r11,-16
|
||||
lvx v7,0,r11
|
||||
vperm v3,v7,v3,v4
|
||||
vand v1,v1,v6
|
||||
lis r31,L2300@ha
|
||||
la r31,L2300@l(r31)
|
||||
|
||||
L2199:
|
||||
mtctr r31
|
||||
|
||||
# set up invariant vectors
|
||||
vspltish v4,0 # v4: zero vector
|
||||
|
||||
li r10,-12
|
||||
lvsr v5,r10,r8 # v5: result shift vector
|
||||
lvsl v6,r10,r3 # v6: residual shift back vector
|
||||
|
||||
li r10,-4
|
||||
stw r7,-4(r9)
|
||||
lvewx v7,r10,r9 # v7: lp_quantization vector
|
||||
|
||||
L2200:
|
||||
vmulosh v8,v0,v2 # v8: sum vector
|
||||
bcctr 20,0
|
||||
|
||||
L2300:
|
||||
vmulosh v9,v1,v3
|
||||
vsldoi v3,v3,v2,4
|
||||
vaddsws v8,v8,v9
|
||||
|
||||
L2301:
|
||||
vsumsws v8,v8,v4 # v8[3]: sum
|
||||
vsraw v8,v8,v7 # v8[3]: sum >> lp_quantization
|
||||
|
||||
lvewx v9,0,r3 # v9[n]: *residual
|
||||
vperm v9,v9,v9,v6 # v9[3]: *residual
|
||||
vaddsws v8,v9,v8 # v8[3]: *residual + (sum >> lp_quantization)
|
||||
vsldoi v6,v6,v6,4 # increment shift vector
|
||||
|
||||
vperm v9,v8,v8,v5 # v9[n]: shift for storage
|
||||
vsldoi v5,v5,v5,12 # increment shift vector
|
||||
stvewx v9,0,r8
|
||||
|
||||
vsldoi v8,v8,v8,12
|
||||
vsldoi v2,v2,v8,4 # insert value onto history
|
||||
|
||||
addi r3,r3,4
|
||||
addi r8,r8,4
|
||||
cmplw cr0,r8,r4 # i<data_len
|
||||
bc 12,0,L2200
|
||||
|
||||
L2400:
|
||||
mtspr 256,r0 # restore old vrsave
|
||||
lmw r31,-4(r1)
|
||||
blr
|
Loading…
Reference in New Issue