mirror of https://github.com/libsdl-org/SDL
Remove ARM32 assembly/pixman blitters
This commit is contained in:
parent
3ecb927587
commit
0f351cd6af
|
@ -292,7 +292,6 @@ dep_option(SDL_MMX "Use MMX assembly routines" ON "SDL_ASSEMBLY;
|
||||||
dep_option(SDL_ALTIVEC "Use Altivec assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_POWERPC32 OR SDL_CPU_POWERPC64" OFF)
|
dep_option(SDL_ALTIVEC "Use Altivec assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_POWERPC32 OR SDL_CPU_POWERPC64" OFF)
|
||||||
dep_option(SDL_ARMSIMD "Use SIMD assembly blitters on ARM" OFF "SDL_ASSEMBLY;SDL_CPU_ARM32" OFF)
|
dep_option(SDL_ARMSIMD "Use SIMD assembly blitters on ARM" OFF "SDL_ASSEMBLY;SDL_CPU_ARM32" OFF)
|
||||||
dep_option(SDL_ARMNEON "Use NEON assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_ARM32 OR SDL_CPU_ARM64" OFF)
|
dep_option(SDL_ARMNEON "Use NEON assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_ARM32 OR SDL_CPU_ARM64" OFF)
|
||||||
dep_option(SDL_ARMNEON_BLITTERS "Use NEON assembly blitters on ARM32" OFF "SDL_VIDEO;SDL_ASSEMBLY;SDL_ARMNEON;SDL_CPU_ARM32" OFF)
|
|
||||||
dep_option(SDL_LSX "Use LSX assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_LOONGARCH64" OFF)
|
dep_option(SDL_LSX "Use LSX assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_LOONGARCH64" OFF)
|
||||||
dep_option(SDL_LASX "Use LASX assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_LOONGARCH64" OFF)
|
dep_option(SDL_LASX "Use LASX assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_LOONGARCH64" OFF)
|
||||||
|
|
||||||
|
@ -883,67 +882,6 @@ if(SDL_ASSEMBLY)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(SDL_ARMSIMD)
|
|
||||||
cmake_push_check_state()
|
|
||||||
string(APPEND CMAKE_REQUIRED_FLAGS " -x assembler-with-cpp")
|
|
||||||
list(APPEND CMAKE_REQUIRED_LINK_OPTIONS -x none)
|
|
||||||
check_c_source_compiles("
|
|
||||||
.text
|
|
||||||
.arch armv6
|
|
||||||
.object_arch armv4
|
|
||||||
.arm
|
|
||||||
.altmacro
|
|
||||||
#ifndef __ARM_EABI__
|
|
||||||
#error EABI is required (to be sure that calling conventions are compatible)
|
|
||||||
#endif
|
|
||||||
main:
|
|
||||||
.global main
|
|
||||||
pld [r0]
|
|
||||||
uqadd8 r0, r0, r0
|
|
||||||
" ARMSIMD_FOUND)
|
|
||||||
cmake_pop_check_state()
|
|
||||||
|
|
||||||
if(ARMSIMD_FOUND)
|
|
||||||
set(HAVE_ARMSIMD TRUE)
|
|
||||||
set(SDL_ARM_SIMD_BLITTERS 1)
|
|
||||||
enable_language(ASM)
|
|
||||||
sdl_glob_sources("${SDL3_SOURCE_DIR}/src/video/arm/pixman-arm-simd*.S")
|
|
||||||
set_property(SOURCE ${ARMSIMD_SOURCES} APPEND PROPERTY COMPILE_OPTIONS -x assembler-with-cpp)
|
|
||||||
set(WARN_ABOUT_ARM_SIMD_ASM_MIT TRUE)
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(SDL_ARMNEON_BLITTERS)
|
|
||||||
cmake_push_check_state()
|
|
||||||
string(APPEND CMAKE_REQUIRED_FLAGS " -x assembler-with-cpp")
|
|
||||||
list(APPEND CMAKE_REQUIRED_LINK_OPTIONS -x none)
|
|
||||||
check_c_source_compiles("
|
|
||||||
.text
|
|
||||||
.fpu neon
|
|
||||||
.arch armv7a
|
|
||||||
.object_arch armv4
|
|
||||||
.eabi_attribute 10, 0
|
|
||||||
.arm
|
|
||||||
.altmacro
|
|
||||||
#ifndef __ARM_EABI__
|
|
||||||
#error EABI is required (to be sure that calling conventions are compatible)
|
|
||||||
#endif
|
|
||||||
main:
|
|
||||||
.global main
|
|
||||||
pld [r0]
|
|
||||||
vmovn.u16 d0, q0
|
|
||||||
" COMPILER_SUPPORTS_ARMNEON_ASSEMBLY)
|
|
||||||
cmake_pop_check_state()
|
|
||||||
if(COMPILER_SUPPORTS_ARMNEON_ASSEMBLY)
|
|
||||||
set(HAVE_ARMNEON_BLITTERS TRUE)
|
|
||||||
set(SDL_ARM_NEON_BLITTERS 1)
|
|
||||||
enable_language(ASM)
|
|
||||||
sdl_glob_sources("${SDL3_SOURCE_DIR}/src/video/arm/pixman-arm-neon*.S")
|
|
||||||
set_property(SOURCE ${ARMNEON_SOURCES} APPEND PROPERTY COMPILE_OPTIONS -x assembler-with-cpp)
|
|
||||||
set(WARN_ABOUT_ARM_NEON_ASM_MIT TRUE)
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(SDL_ARMNEON)
|
if(SDL_ARMNEON)
|
||||||
check_c_source_compiles("
|
check_c_source_compiles("
|
||||||
#include <arm_neon.h>
|
#include <arm_neon.h>
|
||||||
|
|
|
@ -25,10 +25,6 @@ function(get_clang_tidy_ignored_files OUTVAR)
|
||||||
# HIDAPI Steam controller
|
# HIDAPI Steam controller
|
||||||
"controller_constants.h"
|
"controller_constants.h"
|
||||||
"controller_structs.h"
|
"controller_structs.h"
|
||||||
# Nokia Pixman
|
|
||||||
"pixman-arm-asm.h"
|
|
||||||
"pixman-arm-neon-asm.h"
|
|
||||||
"pixman-arm-simd-asm.h"
|
|
||||||
# YUV2RGB
|
# YUV2RGB
|
||||||
"yuv_rgb.c"
|
"yuv_rgb.c"
|
||||||
"yuv_rgb_lsx_func.h"
|
"yuv_rgb_lsx_func.h"
|
||||||
|
|
|
@ -23,12 +23,6 @@
|
||||||
#ifndef SDL_blit_h_
|
#ifndef SDL_blit_h_
|
||||||
#define SDL_blit_h_
|
#define SDL_blit_h_
|
||||||
|
|
||||||
/* pixman ARM blitters are 32 bit only : */
|
|
||||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
|
||||||
#undef SDL_ARM_SIMD_BLITTERS
|
|
||||||
#undef SDL_ARM_NEON_BLITTERS
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Table to do pixel byte expansion */
|
/* Table to do pixel byte expansion */
|
||||||
extern const Uint8 *SDL_expand_byte[9];
|
extern const Uint8 *SDL_expand_byte[9];
|
||||||
extern const Uint16 SDL_expand_byte_10[];
|
extern const Uint16 SDL_expand_byte_10[];
|
||||||
|
|
|
@ -421,66 +421,6 @@ static void SDL_TARGETING("mmx") BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info)
|
||||||
|
|
||||||
#endif /* SDL_MMX_INTRINSICS */
|
#endif /* SDL_MMX_INTRINSICS */
|
||||||
|
|
||||||
#ifdef SDL_ARM_SIMD_BLITTERS
|
|
||||||
void BlitARGBto565PixelAlphaARMSIMDAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint32_t *src, int32_t src_stride);
|
|
||||||
|
|
||||||
static void BlitARGBto565PixelAlphaARMSIMD(SDL_BlitInfo *info)
|
|
||||||
{
|
|
||||||
int32_t width = info->dst_w;
|
|
||||||
int32_t height = info->dst_h;
|
|
||||||
uint16_t *dstp = (uint16_t *)info->dst;
|
|
||||||
int32_t dststride = width + (info->dst_skip >> 1);
|
|
||||||
uint32_t *srcp = (uint32_t *)info->src;
|
|
||||||
int32_t srcstride = width + (info->src_skip >> 2);
|
|
||||||
|
|
||||||
BlitARGBto565PixelAlphaARMSIMDAsm(width, height, dstp, dststride, srcp, srcstride);
|
|
||||||
}
|
|
||||||
|
|
||||||
void BlitRGBtoRGBPixelAlphaARMSIMDAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t *src, int32_t src_stride);
|
|
||||||
|
|
||||||
static void BlitRGBtoRGBPixelAlphaARMSIMD(SDL_BlitInfo *info)
|
|
||||||
{
|
|
||||||
int32_t width = info->dst_w;
|
|
||||||
int32_t height = info->dst_h;
|
|
||||||
uint32_t *dstp = (uint32_t *)info->dst;
|
|
||||||
int32_t dststride = width + (info->dst_skip >> 2);
|
|
||||||
uint32_t *srcp = (uint32_t *)info->src;
|
|
||||||
int32_t srcstride = width + (info->src_skip >> 2);
|
|
||||||
|
|
||||||
BlitRGBtoRGBPixelAlphaARMSIMDAsm(width, height, dstp, dststride, srcp, srcstride);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef SDL_ARM_NEON_BLITTERS
|
|
||||||
void BlitARGBto565PixelAlphaARMNEONAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint32_t *src, int32_t src_stride);
|
|
||||||
|
|
||||||
static void BlitARGBto565PixelAlphaARMNEON(SDL_BlitInfo *info)
|
|
||||||
{
|
|
||||||
int32_t width = info->dst_w;
|
|
||||||
int32_t height = info->dst_h;
|
|
||||||
uint16_t *dstp = (uint16_t *)info->dst;
|
|
||||||
int32_t dststride = width + (info->dst_skip >> 1);
|
|
||||||
uint32_t *srcp = (uint32_t *)info->src;
|
|
||||||
int32_t srcstride = width + (info->src_skip >> 2);
|
|
||||||
|
|
||||||
BlitARGBto565PixelAlphaARMNEONAsm(width, height, dstp, dststride, srcp, srcstride);
|
|
||||||
}
|
|
||||||
|
|
||||||
void BlitRGBtoRGBPixelAlphaARMNEONAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t *src, int32_t src_stride);
|
|
||||||
|
|
||||||
static void BlitRGBtoRGBPixelAlphaARMNEON(SDL_BlitInfo *info)
|
|
||||||
{
|
|
||||||
int32_t width = info->dst_w;
|
|
||||||
int32_t height = info->dst_h;
|
|
||||||
uint32_t *dstp = (uint32_t *)info->dst;
|
|
||||||
int32_t dststride = width + (info->dst_skip >> 2);
|
|
||||||
uint32_t *srcp = (uint32_t *)info->src;
|
|
||||||
int32_t srcstride = width + (info->src_skip >> 2);
|
|
||||||
|
|
||||||
BlitRGBtoRGBPixelAlphaARMNEONAsm(width, height, dstp, dststride, srcp, srcstride);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
|
/* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
|
||||||
static void BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo *info)
|
static void BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo *info)
|
||||||
{
|
{
|
||||||
|
@ -1274,21 +1214,7 @@ SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface)
|
||||||
}
|
}
|
||||||
|
|
||||||
case 2:
|
case 2:
|
||||||
#if defined(SDL_ARM_NEON_BLITTERS) || defined(SDL_ARM_SIMD_BLITTERS)
|
if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000 && sf->Gmask == 0xff00 && ((sf->Rmask == 0xff && df->Rmask == 0x1f) || (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
|
||||||
if (sf->bytes_per_pixel == 4 && sf->Amask == 0xff000000 && sf->Gmask == 0xff00 && df->Gmask == 0x7e0 && ((sf->Rmask == 0xff && df->Rmask == 0x1f) || (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
|
|
||||||
#ifdef SDL_ARM_NEON_BLITTERS
|
|
||||||
if (SDL_HasNEON()) {
|
|
||||||
return BlitARGBto565PixelAlphaARMNEON;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#ifdef SDL_ARM_SIMD_BLITTERS
|
|
||||||
if (SDL_HasARMSIMD()) {
|
|
||||||
return BlitARGBto565PixelAlphaARMSIMD;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
if (sf->bytes_per_pixel == 4 && sf->Amask == 0xff000000 && sf->Gmask == 0xff00 && ((sf->Rmask == 0xff && df->Rmask == 0x1f) || (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
|
|
||||||
if (df->Gmask == 0x7e0) {
|
if (df->Gmask == 0x7e0) {
|
||||||
return BlitARGBto565PixelAlpha;
|
return BlitARGBto565PixelAlpha;
|
||||||
} else if (df->Gmask == 0x3e0) {
|
} else if (df->Gmask == 0x3e0) {
|
||||||
|
@ -1311,18 +1237,6 @@ SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif /* SDL_MMX_INTRINSICS */
|
#endif /* SDL_MMX_INTRINSICS */
|
||||||
if (sf->Amask == 0xff000000) {
|
|
||||||
#ifdef SDL_ARM_NEON_BLITTERS
|
|
||||||
if (SDL_HasNEON()) {
|
|
||||||
return BlitRGBtoRGBPixelAlphaARMNEON;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#ifdef SDL_ARM_SIMD_BLITTERS
|
|
||||||
if (SDL_HasARMSIMD()) {
|
|
||||||
return BlitRGBtoRGBPixelAlphaARMSIMD;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return BlitNtoNPixelAlpha;
|
return BlitNtoNPixelAlpha;
|
||||||
|
|
||||||
|
|
|
@ -247,54 +247,6 @@ int SDL_FillSurfaceRect(SDL_Surface *dst, const SDL_Rect *rect, Uint32 color)
|
||||||
return SDL_FillSurfaceRects(dst, rect, 1, color);
|
return SDL_FillSurfaceRects(dst, rect, 1, color);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef SDL_ARM_NEON_BLITTERS
|
|
||||||
void FillSurfaceRect8ARMNEONAsm(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src);
|
|
||||||
void FillSurfaceRect16ARMNEONAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint16_t src);
|
|
||||||
void FillSurfaceRect32ARMNEONAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t src);
|
|
||||||
|
|
||||||
static void fill_8_neon(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
|
|
||||||
{
|
|
||||||
FillSurfaceRect8ARMNEONAsm(w, h, (uint8_t *)pixels, pitch >> 0, color);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void fill_16_neon(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
|
|
||||||
{
|
|
||||||
FillSurfaceRect16ARMNEONAsm(w, h, (uint16_t *)pixels, pitch >> 1, color);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void fill_32_neon(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
|
|
||||||
{
|
|
||||||
FillSurfaceRect32ARMNEONAsm(w, h, (uint32_t *)pixels, pitch >> 2, color);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef SDL_ARM_SIMD_BLITTERS
|
|
||||||
void FillSurfaceRect8ARMSIMDAsm(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src);
|
|
||||||
void FillSurfaceRect16ARMSIMDAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint16_t src);
|
|
||||||
void FillSurfaceRect32ARMSIMDAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t src);
|
|
||||||
|
|
||||||
static void fill_8_simd(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
|
|
||||||
{
|
|
||||||
FillSurfaceRect8ARMSIMDAsm(w, h, (uint8_t *)pixels, pitch >> 0, color);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void fill_16_simd(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
|
|
||||||
{
|
|
||||||
FillSurfaceRect16ARMSIMDAsm(w, h, (uint16_t *)pixels, pitch >> 1, color);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void fill_32_simd(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
|
|
||||||
{
|
|
||||||
FillSurfaceRect32ARMSIMDAsm(w, h, (uint32_t *)pixels, pitch >> 2, color);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count,
|
int SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count,
|
||||||
Uint32 color)
|
Uint32 color)
|
||||||
{
|
{
|
||||||
|
@ -339,39 +291,8 @@ int SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count,
|
||||||
return SDL_SetError("SDL_FillSurfaceRects(): Unsupported surface format");
|
return SDL_SetError("SDL_FillSurfaceRects(): Unsupported surface format");
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef SDL_ARM_NEON_BLITTERS
|
if (fill_function == NULL) {
|
||||||
if (SDL_HasNEON() && dst->format->bytes_per_pixel != 3 && !fill_function) {
|
switch (dst->format->BytesPerPixel) {
|
||||||
switch (dst->format->bytes_per_pixel) {
|
|
||||||
case 1:
|
|
||||||
fill_function = fill_8_neon;
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
fill_function = fill_16_neon;
|
|
||||||
break;
|
|
||||||
case 4:
|
|
||||||
fill_function = fill_32_neon;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#ifdef SDL_ARM_SIMD_BLITTERS
|
|
||||||
if (SDL_HasARMSIMD() && dst->format->bytes_per_pixel != 3 && !fill_function) {
|
|
||||||
switch (dst->format->bytes_per_pixel) {
|
|
||||||
case 1:
|
|
||||||
fill_function = fill_8_simd;
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
fill_function = fill_16_simd;
|
|
||||||
break;
|
|
||||||
case 4:
|
|
||||||
fill_function = fill_32_simd;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (!fill_function) {
|
|
||||||
switch (dst->format->bytes_per_pixel) {
|
|
||||||
case 1:
|
case 1:
|
||||||
{
|
{
|
||||||
color |= (color << 8);
|
color |= (color << 8);
|
||||||
|
|
|
@ -1,36 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright © 2010 Nokia Corporation
|
|
||||||
*
|
|
||||||
* Permission to use, copy, modify, distribute, and sell this software and its
|
|
||||||
* documentation for any purpose is hereby granted without fee, provided that
|
|
||||||
* the above copyright notice appear in all copies and that both that
|
|
||||||
* copyright notice and this permission notice appear in supporting
|
|
||||||
* documentation, and that the name of Mozilla Corporation not be used in
|
|
||||||
* advertising or publicity pertaining to distribution of the software without
|
|
||||||
* specific, written prior permission. Mozilla Corporation makes no
|
|
||||||
* representations about the suitability of this software for any purpose. It
|
|
||||||
* is provided "as is" without express or implied warranty.
|
|
||||||
*
|
|
||||||
* THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
|
|
||||||
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
|
||||||
* FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
||||||
* SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
||||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
|
|
||||||
* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
|
|
||||||
* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
|
|
||||||
* SOFTWARE.
|
|
||||||
*
|
|
||||||
* Author: Siarhei Siamashka (siarhei.siamashka@nokia.com)
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Supplementary macro for setting function attributes */
|
|
||||||
.macro pixman_asm_function fname
|
|
||||||
.func fname
|
|
||||||
.global fname
|
|
||||||
#ifdef __ELF__
|
|
||||||
.hidden fname
|
|
||||||
.type fname, %function
|
|
||||||
#endif
|
|
||||||
fname:
|
|
||||||
.endm
|
|
|
@ -1,375 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright © 2009 Nokia Corporation
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
|
||||||
* to deal in the Software without restriction, including without limitation
|
|
||||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
||||||
* and/or sell copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice (including the next
|
|
||||||
* paragraph) shall be included in all copies or substantial portions of the
|
|
||||||
* Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
||||||
* DEALINGS IN THE SOFTWARE.
|
|
||||||
*
|
|
||||||
* Author: Siarhei Siamashka (siarhei.siamashka@nokia.com)
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2018 RISC OS Open Ltd
|
|
||||||
*
|
|
||||||
* This software is provided 'as-is', without any express or implied
|
|
||||||
* warranty. In no event will the authors be held liable for any damages
|
|
||||||
* arising from the use of this software.
|
|
||||||
*
|
|
||||||
* Permission is granted to anyone to use this software for any purpose,
|
|
||||||
* including commercial applications, and to alter it and redistribute it
|
|
||||||
* freely, subject to the following restrictions:
|
|
||||||
*
|
|
||||||
* 1. The origin of this software must not be misrepresented; you must not
|
|
||||||
* claim that you wrote the original software. If you use this software
|
|
||||||
* in a product, an acknowledgment in the product documentation would be
|
|
||||||
* appreciated but is not required.
|
|
||||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
|
||||||
* misrepresented as being the original software.
|
|
||||||
* 3. This notice may not be removed or altered from any source distribution.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Prevent the stack from becoming executable for no reason... */
|
|
||||||
#if defined(__linux__) && defined(__ELF__)
|
|
||||||
.section .note.GNU-stack,"",%progbits
|
|
||||||
#endif
|
|
||||||
|
|
||||||
.text
|
|
||||||
.fpu neon
|
|
||||||
.arch armv7a
|
|
||||||
.object_arch armv4
|
|
||||||
.eabi_attribute 10, 0 /* suppress Tag_FP_arch */
|
|
||||||
.eabi_attribute 12, 0 /* suppress Tag_Advanced_SIMD_arch */
|
|
||||||
.arm
|
|
||||||
.altmacro
|
|
||||||
.p2align 2
|
|
||||||
|
|
||||||
#include "pixman-arm-asm.h"
|
|
||||||
#include "pixman-arm-neon-asm.h"
|
|
||||||
|
|
||||||
/* Global configuration options and preferences */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The code can optionally make use of unaligned memory accesses to improve
|
|
||||||
* performance of handling leading/trailing pixels for each scanline.
|
|
||||||
* Configuration variable RESPECT_STRICT_ALIGNMENT can be set to 0 for
|
|
||||||
* example in linux if unaligned memory accesses are not configured to
|
|
||||||
* generate.exceptions.
|
|
||||||
*/
|
|
||||||
.set RESPECT_STRICT_ALIGNMENT, 1
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Set default prefetch type. There is a choice between the following options:
|
|
||||||
*
|
|
||||||
* PREFETCH_TYPE_NONE (may be useful for the ARM cores where PLD is set to work
|
|
||||||
* as NOP to workaround some HW bugs or for whatever other reason)
|
|
||||||
*
|
|
||||||
* PREFETCH_TYPE_SIMPLE (may be useful for simple single-issue ARM cores where
|
|
||||||
* advanced prefetch introduces heavy overhead)
|
|
||||||
*
|
|
||||||
* PREFETCH_TYPE_ADVANCED (useful for superscalar cores such as ARM Cortex-A8
|
|
||||||
* which can run ARM and NEON instructions simultaneously so that extra ARM
|
|
||||||
* instructions do not add (many) extra cycles, but improve prefetch efficiency)
|
|
||||||
*
|
|
||||||
* Note: some types of function can't support advanced prefetch and fallback
|
|
||||||
* to simple one (those which handle 24bpp pixels)
|
|
||||||
*/
|
|
||||||
.set PREFETCH_TYPE_DEFAULT, PREFETCH_TYPE_ADVANCED
|
|
||||||
|
|
||||||
/* Prefetch distance in pixels for simple prefetch */
|
|
||||||
.set PREFETCH_DISTANCE_SIMPLE, 64
|
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
|
|
||||||
/* We can actually do significantly better than the Pixman macros, at least for
|
|
||||||
* the case of fills, by using a carefully scheduled inner loop. Cortex-A53
|
|
||||||
* shows an improvement of up to 78% in ideal cases (large fills to L1 cache).
|
|
||||||
*/
|
|
||||||
|
|
||||||
.macro generate_fillrect_function name, bpp, log2Bpp
|
|
||||||
/*
|
|
||||||
* void name(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src);
|
|
||||||
* On entry:
|
|
||||||
* a1 = width, pixels
|
|
||||||
* a2 = height, rows
|
|
||||||
* a3 = pointer to top-left destination pixel
|
|
||||||
* a4 = stride, pixels
|
|
||||||
* [sp] = pixel value to fill with
|
|
||||||
* Within the function:
|
|
||||||
* v1 = width remaining
|
|
||||||
* v2 = vst offset
|
|
||||||
* v3 = alternate pointer
|
|
||||||
* ip = data ARM register
|
|
||||||
*/
|
|
||||||
pixman_asm_function name
|
|
||||||
vld1.\bpp {d0[],d1[]}, [sp]
|
|
||||||
sub a4, a1
|
|
||||||
vld1.\bpp {d2[],d3[]}, [sp]
|
|
||||||
cmp a1, #(15+64) >> \log2Bpp
|
|
||||||
push {v1-v3,lr}
|
|
||||||
vmov ip, s0
|
|
||||||
blo 51f
|
|
||||||
|
|
||||||
/* Long-row case */
|
|
||||||
mov v2, #64
|
|
||||||
1: mov v1, a1
|
|
||||||
ands v3, a3, #15
|
|
||||||
beq 2f
|
|
||||||
/* Leading pixels */
|
|
||||||
rsb v3, v3, #16 /* number of leading bytes until 16-byte aligned */
|
|
||||||
sub v1, v1, v3, lsr #\log2Bpp
|
|
||||||
rbit v3, v3
|
|
||||||
.if bpp <= 16
|
|
||||||
.if bpp == 8
|
|
||||||
tst a3, #1 /* bit 0 unaffected by rsb so can avoid register interlock */
|
|
||||||
strneb ip, [a3], #1
|
|
||||||
tst v3, #1<<30
|
|
||||||
.else
|
|
||||||
tst a3, #2 /* bit 1 unaffected by rsb (assuming halfword alignment) so can avoid register interlock */
|
|
||||||
.endif
|
|
||||||
strneh ip, [a3], #2
|
|
||||||
.endif
|
|
||||||
movs v3, v3, lsl #3
|
|
||||||
vstmcs a3!, {s0}
|
|
||||||
vstmmi a3!, {d0}
|
|
||||||
2: sub v1, v1, #64 >> \log2Bpp /* simplifies inner loop termination */
|
|
||||||
add v3, a3, #32
|
|
||||||
/* Inner loop */
|
|
||||||
3: vst1.\bpp {q0-q1}, [a3 :128], v2
|
|
||||||
subs v1, v1, #64 >> \log2Bpp
|
|
||||||
vst1.\bpp {q0-q1}, [v3 :128], v2
|
|
||||||
bhs 3b
|
|
||||||
/* Trailing pixels */
|
|
||||||
4: movs v1, v1, lsl #27 + \log2Bpp
|
|
||||||
bcc 5f
|
|
||||||
vst1.\bpp {q0-q1}, [a3 :128]!
|
|
||||||
5: bpl 6f
|
|
||||||
vst1.\bpp {q0}, [a3 :128]!
|
|
||||||
6: movs v1, v1, lsl #2
|
|
||||||
vstmcs a3!, {d0}
|
|
||||||
vstmmi a3!, {s0}
|
|
||||||
.if bpp <= 16
|
|
||||||
movs v1, v1, lsl #2
|
|
||||||
strcsh ip, [a3], #2
|
|
||||||
.if bpp == 8
|
|
||||||
strmib ip, [a3], #1
|
|
||||||
.endif
|
|
||||||
.endif
|
|
||||||
subs a2, a2, #1
|
|
||||||
add a3, a3, a4, lsl #\log2Bpp
|
|
||||||
bhi 1b
|
|
||||||
pop {v1-v3,pc}
|
|
||||||
|
|
||||||
/* Short-row case */
|
|
||||||
51: movs v1, a1
|
|
||||||
.if bpp == 8
|
|
||||||
tst a3, #3
|
|
||||||
beq 53f
|
|
||||||
52: subs v1, v1, #1
|
|
||||||
blo 57f
|
|
||||||
strb ip, [a3], #1
|
|
||||||
tst a3, #3
|
|
||||||
bne 52b
|
|
||||||
.elseif bpp == 16
|
|
||||||
tstne a3, #2
|
|
||||||
subne v1, v1, #1
|
|
||||||
strneh ip, [a3], #2
|
|
||||||
.endif
|
|
||||||
53: cmp v1, #32 >> \log2Bpp
|
|
||||||
bcc 54f
|
|
||||||
vst1.\bpp {q0-q1}, [a3]!
|
|
||||||
sub v1, v1, #32 >> \log2Bpp
|
|
||||||
/* Trailing pixels */
|
|
||||||
54: movs v1, v1, lsl #27 + \log2Bpp
|
|
||||||
bcc 55f
|
|
||||||
vst1.\bpp {q0-q1}, [a3]!
|
|
||||||
55: bpl 56f
|
|
||||||
vst1.\bpp {q0}, [a3]!
|
|
||||||
56: movs v1, v1, lsl #2
|
|
||||||
vstmcs a3!, {d0}
|
|
||||||
vstmmi a3!, {s0}
|
|
||||||
.if bpp <= 16
|
|
||||||
movs v1, v1, lsl #2
|
|
||||||
strcsh ip, [a3], #2
|
|
||||||
.if bpp == 8
|
|
||||||
strmib ip, [a3], #1
|
|
||||||
.endif
|
|
||||||
.endif
|
|
||||||
subs a2, a2, #1
|
|
||||||
add a3, a3, a4, lsl #\log2Bpp
|
|
||||||
bhi 51b
|
|
||||||
57: pop {v1-v3,pc}
|
|
||||||
|
|
||||||
.endfunc
|
|
||||||
.endm
|
|
||||||
|
|
||||||
generate_fillrect_function FillSurfaceRect32ARMNEONAsm, 32, 2
|
|
||||||
generate_fillrect_function FillSurfaceRect16ARMNEONAsm, 16, 1
|
|
||||||
generate_fillrect_function FillSurfaceRect8ARMNEONAsm, 8, 0
|
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
|
|
||||||
.macro RGBtoRGBPixelAlpha_process_pixblock_head
|
|
||||||
vmvn d30, d3 /* get inverted source alpha */
|
|
||||||
vmov d31, d7 /* dest alpha is always unchanged */
|
|
||||||
vmull.u8 q14, d0, d3
|
|
||||||
vmlal.u8 q14, d4, d30
|
|
||||||
vmull.u8 q0, d1, d3
|
|
||||||
vmlal.u8 q0, d5, d30
|
|
||||||
vmull.u8 q1, d2, d3
|
|
||||||
vmlal.u8 q1, d6, d30
|
|
||||||
vrshr.u16 q2, q14, #8
|
|
||||||
vrshr.u16 q3, q0, #8
|
|
||||||
vraddhn.u16 d28, q14, q2
|
|
||||||
vrshr.u16 q2, q1, #8
|
|
||||||
vraddhn.u16 d29, q0, q3
|
|
||||||
vraddhn.u16 d30, q1, q2
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro RGBtoRGBPixelAlpha_process_pixblock_tail
|
|
||||||
/* nothing */
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro RGBtoRGBPixelAlpha_process_pixblock_tail_head
|
|
||||||
vld4.8 {d0-d3}, [SRC]!
|
|
||||||
PF add PF_X, PF_X, #8
|
|
||||||
vst4.8 {d28-d31}, [DST_W :128]!
|
|
||||||
PF tst PF_CTL, #0xF
|
|
||||||
vld4.8 {d4-d7}, [DST_R :128]!
|
|
||||||
PF addne PF_X, PF_X, #8
|
|
||||||
vmvn d30, d3 /* get inverted source alpha */
|
|
||||||
vmov d31, d7 /* dest alpha is always unchanged */
|
|
||||||
vmull.u8 q14, d0, d3
|
|
||||||
PF subne PF_CTL, PF_CTL, #1
|
|
||||||
vmlal.u8 q14, d4, d30
|
|
||||||
PF cmp PF_X, ORIG_W
|
|
||||||
vmull.u8 q0, d1, d3
|
|
||||||
PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
|
|
||||||
vmlal.u8 q0, d5, d30
|
|
||||||
PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
|
|
||||||
vmull.u8 q1, d2, d3
|
|
||||||
PF subge PF_X, PF_X, ORIG_W
|
|
||||||
vmlal.u8 q1, d6, d30
|
|
||||||
PF subges PF_CTL, PF_CTL, #0x10
|
|
||||||
vrshr.u16 q2, q14, #8
|
|
||||||
PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
|
|
||||||
vrshr.u16 q3, q0, #8
|
|
||||||
PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
|
|
||||||
vraddhn.u16 d28, q14, q2
|
|
||||||
vrshr.u16 q2, q1, #8
|
|
||||||
vraddhn.u16 d29, q0, q3
|
|
||||||
vraddhn.u16 d30, q1, q2
|
|
||||||
.endm
|
|
||||||
|
|
||||||
generate_composite_function \
|
|
||||||
BlitRGBtoRGBPixelAlphaARMNEONAsm, 32, 0, 32, \
|
|
||||||
FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
|
|
||||||
8, /* number of pixels, processed in a single block */ \
|
|
||||||
5, /* prefetch distance */ \
|
|
||||||
default_init, \
|
|
||||||
default_cleanup, \
|
|
||||||
RGBtoRGBPixelAlpha_process_pixblock_head, \
|
|
||||||
RGBtoRGBPixelAlpha_process_pixblock_tail, \
|
|
||||||
RGBtoRGBPixelAlpha_process_pixblock_tail_head
|
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
|
|
||||||
.macro ARGBto565PixelAlpha_process_pixblock_head
|
|
||||||
vmvn d6, d3
|
|
||||||
vshr.u8 d1, #2
|
|
||||||
vshr.u8 d3, #3
|
|
||||||
vshr.u8 d0, #3
|
|
||||||
vshrn.u16 d7, q2, #3
|
|
||||||
vshrn.u16 d25, q2, #8
|
|
||||||
vbic.i16 q2, #0xe0
|
|
||||||
vshr.u8 d6, #3
|
|
||||||
vshr.u8 d7, #2
|
|
||||||
vshr.u8 d2, #3
|
|
||||||
vmovn.u16 d24, q2
|
|
||||||
vshr.u8 d25, #3
|
|
||||||
vmull.u8 q13, d1, d3
|
|
||||||
vmlal.u8 q13, d7, d6
|
|
||||||
vmull.u8 q14, d0, d3
|
|
||||||
vmlal.u8 q14, d24, d6
|
|
||||||
vmull.u8 q15, d2, d3
|
|
||||||
vmlal.u8 q15, d25, d6
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro ARGBto565PixelAlpha_process_pixblock_tail
|
|
||||||
vsra.u16 q13, #5
|
|
||||||
vsra.u16 q14, #5
|
|
||||||
vsra.u16 q15, #5
|
|
||||||
vrshr.u16 q13, #5
|
|
||||||
vrshr.u16 q14, #5
|
|
||||||
vrshr.u16 q15, #5
|
|
||||||
vsli.u16 q14, q13, #5
|
|
||||||
vsli.u16 q14, q15, #11
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro ARGBto565PixelAlpha_process_pixblock_tail_head
|
|
||||||
vld4.8 {d0-d3}, [SRC]!
|
|
||||||
PF add PF_X, PF_X, #8
|
|
||||||
vsra.u16 q13, #5
|
|
||||||
PF tst PF_CTL, #0xF
|
|
||||||
vsra.u16 q14, #5
|
|
||||||
PF addne PF_X, PF_X, #8
|
|
||||||
vsra.u16 q15, #5
|
|
||||||
PF subne PF_CTL, PF_CTL, #1
|
|
||||||
vrshr.u16 q13, #5
|
|
||||||
PF cmp PF_X, ORIG_W
|
|
||||||
vrshr.u16 q14, #5
|
|
||||||
PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
|
|
||||||
vrshr.u16 q15, #5
|
|
||||||
PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
|
|
||||||
vld1.8 {d4-d5}, [DST_R]!
|
|
||||||
PF subge PF_X, PF_X, ORIG_W
|
|
||||||
vsli.u16 q14, q13, #5
|
|
||||||
PF subges PF_CTL, PF_CTL, #0x10
|
|
||||||
vsli.u16 q14, q15, #11
|
|
||||||
PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
|
|
||||||
vst1.8 {q14}, [DST_W :128]!
|
|
||||||
vmvn d6, d3
|
|
||||||
vshr.u8 d1, #2
|
|
||||||
vshr.u8 d3, #3
|
|
||||||
vshr.u8 d0, #3
|
|
||||||
vshrn.u16 d7, q2, #3
|
|
||||||
vshrn.u16 d25, q2, #8
|
|
||||||
vbic.i16 q2, #0xe0
|
|
||||||
PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
|
|
||||||
vshr.u8 d6, #3
|
|
||||||
vshr.u8 d7, #2
|
|
||||||
vshr.u8 d2, #3
|
|
||||||
vmovn.u16 d24, q2
|
|
||||||
vshr.u8 d25, #3
|
|
||||||
vmull.u8 q13, d1, d3
|
|
||||||
vmlal.u8 q13, d7, d6
|
|
||||||
vmull.u8 q14, d0, d3
|
|
||||||
vmlal.u8 q14, d24, d6
|
|
||||||
vmull.u8 q15, d2, d3
|
|
||||||
vmlal.u8 q15, d25, d6
|
|
||||||
.endm
|
|
||||||
|
|
||||||
generate_composite_function \
|
|
||||||
BlitARGBto565PixelAlphaARMNEONAsm, 32, 0, 16, \
|
|
||||||
FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
|
|
||||||
8, /* number of pixels, processed in a single block */ \
|
|
||||||
6, /* prefetch distance */ \
|
|
||||||
default_init, \
|
|
||||||
default_cleanup, \
|
|
||||||
ARGBto565PixelAlpha_process_pixblock_head, \
|
|
||||||
ARGBto565PixelAlpha_process_pixblock_tail, \
|
|
||||||
ARGBto565PixelAlpha_process_pixblock_tail_head
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,532 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2016 RISC OS Open Ltd
|
|
||||||
*
|
|
||||||
* This software is provided 'as-is', without any express or implied
|
|
||||||
* warranty. In no event will the authors be held liable for any damages
|
|
||||||
* arising from the use of this software.
|
|
||||||
*
|
|
||||||
* Permission is granted to anyone to use this software for any purpose,
|
|
||||||
* including commercial applications, and to alter it and redistribute it
|
|
||||||
* freely, subject to the following restrictions:
|
|
||||||
*
|
|
||||||
* 1. The origin of this software must not be misrepresented; you must not
|
|
||||||
* claim that you wrote the original software. If you use this software
|
|
||||||
* in a product, an acknowledgment in the product documentation would be
|
|
||||||
* appreciated but is not required.
|
|
||||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
|
||||||
* misrepresented as being the original software.
|
|
||||||
* 3. This notice may not be removed or altered from any source distribution.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Prevent the stack from becoming executable */
|
|
||||||
#if defined(__linux__) && defined(__ELF__)
|
|
||||||
.section .note.GNU-stack,"",%progbits
|
|
||||||
#endif
|
|
||||||
|
|
||||||
.text
|
|
||||||
.arch armv6
|
|
||||||
.object_arch armv4
|
|
||||||
.arm
|
|
||||||
.altmacro
|
|
||||||
.p2align 2
|
|
||||||
|
|
||||||
#include "pixman-arm-asm.h"
|
|
||||||
#include "pixman-arm-simd-asm.h"
|
|
||||||
|
|
||||||
/* A head macro should do all processing which results in an output of up to
|
|
||||||
* 16 bytes, as far as the final load instruction. The corresponding tail macro
|
|
||||||
* should complete the processing of the up-to-16 bytes. The calling macro will
|
|
||||||
* sometimes choose to insert a preload or a decrement of X between them.
|
|
||||||
* cond ARM condition code for code block
|
|
||||||
* numbytes Number of output bytes that should be generated this time
|
|
||||||
* firstreg First WK register in which to place output
|
|
||||||
* unaligned_src Whether to use non-wordaligned loads of source image
|
|
||||||
* unaligned_mask Whether to use non-wordaligned loads of mask image
|
|
||||||
* preload If outputting 16 bytes causes 64 bytes to be read, whether an extra preload should be output
|
|
||||||
*/
|
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
|
|
||||||
.macro FillRect32_init
|
|
||||||
ldr SRC, [sp, #ARGS_STACK_OFFSET]
|
|
||||||
mov STRIDE_S, SRC
|
|
||||||
mov MASK, SRC
|
|
||||||
mov STRIDE_M, SRC
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro FillRect16_init
|
|
||||||
ldrh SRC, [sp, #ARGS_STACK_OFFSET]
|
|
||||||
orr SRC, SRC, lsl #16
|
|
||||||
mov STRIDE_S, SRC
|
|
||||||
mov MASK, SRC
|
|
||||||
mov STRIDE_M, SRC
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro FillRect8_init
|
|
||||||
ldrb SRC, [sp, #ARGS_STACK_OFFSET]
|
|
||||||
orr SRC, SRC, lsl #8
|
|
||||||
orr SRC, SRC, lsl #16
|
|
||||||
mov STRIDE_S, SRC
|
|
||||||
mov MASK, SRC
|
|
||||||
mov STRIDE_M, SRC
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro FillRect_process_tail cond, numbytes, firstreg
|
|
||||||
WK4 .req SRC
|
|
||||||
WK5 .req STRIDE_S
|
|
||||||
WK6 .req MASK
|
|
||||||
WK7 .req STRIDE_M
|
|
||||||
pixst cond, numbytes, 4, DST
|
|
||||||
.unreq WK4
|
|
||||||
.unreq WK5
|
|
||||||
.unreq WK6
|
|
||||||
.unreq WK7
|
|
||||||
.endm
|
|
||||||
|
|
||||||
generate_composite_function \
|
|
||||||
FillSurfaceRect32ARMSIMDAsm, 0, 0, 32, \
|
|
||||||
FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \
|
|
||||||
0, /* prefetch distance doesn't apply */ \
|
|
||||||
FillRect32_init \
|
|
||||||
nop_macro, /* newline */ \
|
|
||||||
nop_macro /* cleanup */ \
|
|
||||||
nop_macro /* process head */ \
|
|
||||||
FillRect_process_tail
|
|
||||||
|
|
||||||
generate_composite_function \
|
|
||||||
FillSurfaceRect16ARMSIMDAsm, 0, 0, 16, \
|
|
||||||
FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \
|
|
||||||
0, /* prefetch distance doesn't apply */ \
|
|
||||||
FillRect16_init \
|
|
||||||
nop_macro, /* newline */ \
|
|
||||||
nop_macro /* cleanup */ \
|
|
||||||
nop_macro /* process head */ \
|
|
||||||
FillRect_process_tail
|
|
||||||
|
|
||||||
generate_composite_function \
|
|
||||||
FillSurfaceRect8ARMSIMDAsm, 0, 0, 8, \
|
|
||||||
FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \
|
|
||||||
0, /* prefetch distance doesn't apply */ \
|
|
||||||
FillRect8_init \
|
|
||||||
nop_macro, /* newline */ \
|
|
||||||
nop_macro /* cleanup */ \
|
|
||||||
nop_macro /* process head */ \
|
|
||||||
FillRect_process_tail
|
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
|
|
||||||
/* This differs from the over_8888_8888 routine in Pixman in that the destination
|
|
||||||
* alpha component is always left unchanged, and RGB components are not
|
|
||||||
* premultiplied by alpha. It differs from BlitRGBtoRGBPixelAlpha in that
|
|
||||||
* renormalisation is done by multiplying by 257/256 (with rounding) rather than
|
|
||||||
* simply shifting right by 8 bits - removing the need to special-case alpha=0xff.
|
|
||||||
*/
|
|
||||||
|
|
||||||
.macro RGBtoRGBPixelAlpha_init
|
|
||||||
line_saved_regs STRIDE_S, ORIG_W
|
|
||||||
mov MASK, #0x80
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro RGBtoRGBPixelAlpha_1pixel_translucent s, d, tmp0, tmp1, tmp2, tmp3, half
|
|
||||||
uxtb tmp3, s
|
|
||||||
uxtb tmp0, d
|
|
||||||
sub tmp0, tmp3, tmp0
|
|
||||||
uxtb tmp3, s, ror #16
|
|
||||||
uxtb tmp1, d, ror #16
|
|
||||||
sub tmp1, tmp3, tmp1
|
|
||||||
uxtb tmp3, s, ror #8
|
|
||||||
mov s, s, lsr #24
|
|
||||||
uxtb tmp2, d, ror #8
|
|
||||||
sub tmp2, tmp3, tmp2
|
|
||||||
smlabb tmp0, tmp0, s, half
|
|
||||||
smlabb tmp1, tmp1, s, half
|
|
||||||
smlabb tmp2, tmp2, s, half
|
|
||||||
add tmp0, tmp0, asr #8
|
|
||||||
add tmp1, tmp1, asr #8
|
|
||||||
add tmp2, tmp2, asr #8
|
|
||||||
pkhbt tmp0, tmp0, tmp1, lsl #16
|
|
||||||
and tmp2, tmp2, #0xff00
|
|
||||||
uxtb16 tmp0, tmp0, ror #8
|
|
||||||
orr tmp0, tmp0, tmp2
|
|
||||||
uadd8 d, d, tmp0
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro RGBtoRGBPixelAlpha_1pixel_opaque s, d
|
|
||||||
and d, d, #0xff000000
|
|
||||||
bic s, s, #0xff000000
|
|
||||||
orr d, d, s
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro RGBtoRGBPixelAlpha_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
|
|
||||||
.if numbytes == 16
|
|
||||||
ldm SRC!, {WK0, WK1}
|
|
||||||
ldm SRC!, {STRIDE_S, STRIDE_M}
|
|
||||||
ldrd WK2, WK3, [DST], #16
|
|
||||||
orr SCRATCH, WK0, WK1
|
|
||||||
and ORIG_W, WK0, WK1
|
|
||||||
orr SCRATCH, SCRATCH, STRIDE_S
|
|
||||||
and ORIG_W, ORIG_W, STRIDE_S
|
|
||||||
orr SCRATCH, SCRATCH, STRIDE_M
|
|
||||||
and ORIG_W, ORIG_W, STRIDE_M
|
|
||||||
tst SCRATCH, #0xff000000
|
|
||||||
.elseif numbytes == 8
|
|
||||||
ldm SRC!, {WK0, WK1}
|
|
||||||
ldm DST!, {WK2, WK3}
|
|
||||||
orr SCRATCH, WK0, WK1
|
|
||||||
and ORIG_W, WK0, WK1
|
|
||||||
tst SCRATCH, #0xff000000
|
|
||||||
.else // numbytes == 4
|
|
||||||
ldr WK0, [SRC], #4
|
|
||||||
ldr WK2, [DST], #4
|
|
||||||
tst WK0, #0xff000000
|
|
||||||
.endif
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro RGBtoRGBPixelAlpha_process_tail cond, numbytes, firstreg
|
|
||||||
beq 20f @ all transparent
|
|
||||||
.if numbytes == 16
|
|
||||||
cmp ORIG_W, #0xff000000
|
|
||||||
bhs 10f @ all opaque
|
|
||||||
RGBtoRGBPixelAlpha_1pixel_translucent WK0, WK2, STRIDE_S, STRIDE_M, SCRATCH, ORIG_W, MASK
|
|
||||||
RGBtoRGBPixelAlpha_1pixel_translucent WK1, WK3, STRIDE_S, STRIDE_M, SCRATCH, ORIG_W, MASK
|
|
||||||
strd WK2, WK3, [DST, #-16]
|
|
||||||
ldrd WK0, WK1, [SRC, #-8]
|
|
||||||
ldrd WK2, WK3, [DST, #-8]
|
|
||||||
RGBtoRGBPixelAlpha_1pixel_translucent WK0, WK2, STRIDE_S, STRIDE_M, SCRATCH, ORIG_W, MASK
|
|
||||||
RGBtoRGBPixelAlpha_1pixel_translucent WK1, WK3, STRIDE_S, STRIDE_M, SCRATCH, ORIG_W, MASK
|
|
||||||
b 19f
|
|
||||||
10: RGBtoRGBPixelAlpha_1pixel_opaque WK0, WK2
|
|
||||||
RGBtoRGBPixelAlpha_1pixel_opaque WK1, WK3
|
|
||||||
strd WK2, WK3, [DST, #-16]
|
|
||||||
ldrd WK0, WK1, [SRC, #-8]
|
|
||||||
ldrd WK2, WK3, [DST, #-8]
|
|
||||||
RGBtoRGBPixelAlpha_1pixel_opaque WK0, WK2
|
|
||||||
RGBtoRGBPixelAlpha_1pixel_opaque WK1, WK3
|
|
||||||
19: strd WK2, WK3, [DST, #-8]
|
|
||||||
.elseif numbytes == 8
|
|
||||||
cmp ORIG_W, #0xff000000
|
|
||||||
bhs 10f @ all opaque
|
|
||||||
RGBtoRGBPixelAlpha_1pixel_translucent WK0, WK2, STRIDE_S, STRIDE_M, SCRATCH, ORIG_W, MASK
|
|
||||||
RGBtoRGBPixelAlpha_1pixel_translucent WK1, WK3, STRIDE_S, STRIDE_M, SCRATCH, ORIG_W, MASK
|
|
||||||
b 19f
|
|
||||||
10: RGBtoRGBPixelAlpha_1pixel_opaque WK0, WK2
|
|
||||||
RGBtoRGBPixelAlpha_1pixel_opaque WK1, WK3
|
|
||||||
19: strd WK2, WK3, [DST, #-8]
|
|
||||||
.else // numbytes == 4
|
|
||||||
cmp WK0, #0xff000000
|
|
||||||
bhs 10f @ opaque
|
|
||||||
RGBtoRGBPixelAlpha_1pixel_translucent WK0, WK2, STRIDE_S, STRIDE_M, SCRATCH, ORIG_W, MASK
|
|
||||||
b 19f
|
|
||||||
10: RGBtoRGBPixelAlpha_1pixel_opaque WK0, WK2
|
|
||||||
19: str WK2, [DST, #-4]
|
|
||||||
.endif
|
|
||||||
20:
|
|
||||||
.endm
|
|
||||||
|
|
||||||
generate_composite_function \
|
|
||||||
BlitRGBtoRGBPixelAlphaARMSIMDAsm, 32, 0, 32, \
|
|
||||||
FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_WK0, \
|
|
||||||
2, /* prefetch distance */ \
|
|
||||||
RGBtoRGBPixelAlpha_init, \
|
|
||||||
nop_macro, /* newline */ \
|
|
||||||
nop_macro, /* cleanup */ \
|
|
||||||
RGBtoRGBPixelAlpha_process_head, \
|
|
||||||
RGBtoRGBPixelAlpha_process_tail
|
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
|
|
||||||
.macro ARGBto565PixelAlpha_init
|
|
||||||
line_saved_regs STRIDE_D, STRIDE_S, ORIG_W
|
|
||||||
mov MASK, #0x001f
|
|
||||||
mov STRIDE_M, #0x0010
|
|
||||||
orr MASK, MASK, MASK, lsl #16
|
|
||||||
orr STRIDE_M, STRIDE_M, STRIDE_M, lsl #16
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro ARGBto565PixelAlpha_newline
|
|
||||||
mov STRIDE_S, #0x0200
|
|
||||||
.endm
|
|
||||||
|
|
||||||
/* On entry:
|
|
||||||
* s1 holds 1 32bpp source pixel
|
|
||||||
* d holds 1 16bpp destination pixel
|
|
||||||
* rbmask, rbhalf, ghalf hold 0x001f001f, 0x00100010, 0x00000200 respectively
|
|
||||||
* other registers are temporaries
|
|
||||||
* On exit:
|
|
||||||
* Constant registers preserved
|
|
||||||
*/
|
|
||||||
|
|
||||||
.macro ARGBto565PixelAlpha_1pixel_translucent s, d, rbmask, rbhalf, ghalf, alpha, rb, g, misc
|
|
||||||
mov alpha, s, lsr #27
|
|
||||||
and misc, s, #0xfc00
|
|
||||||
and g, d, #0x07e0
|
|
||||||
pkhbt rb, d, d, lsl #5
|
|
||||||
rsb misc, g, misc, lsr #5
|
|
||||||
and s, rbmask, s, lsr #3
|
|
||||||
and rb, rbmask, rb
|
|
||||||
sub s, s, rb
|
|
||||||
smlabb misc, misc, alpha, ghalf
|
|
||||||
mla s, s, alpha, rbhalf
|
|
||||||
add misc, misc, misc, lsl #5
|
|
||||||
add g, g, misc, asr #10
|
|
||||||
add s, s, s, lsl #5
|
|
||||||
and g, g, #0x07e0
|
|
||||||
add rb, rb, s, asr #10
|
|
||||||
and rb, rb, rbmask
|
|
||||||
pkhbt rb, rb, rb, lsl #11
|
|
||||||
orr d, rb, g
|
|
||||||
orr d, d, rb, lsr #16
|
|
||||||
.endm
|
|
||||||
|
|
||||||
/* On entry:
|
|
||||||
* s1 holds 1 32bpp source pixel
|
|
||||||
* d holds 1 16bpp destination pixel
|
|
||||||
* rbmask holds 0x001f001f
|
|
||||||
* On exit:
|
|
||||||
* Constant registers preserved
|
|
||||||
*/
|
|
||||||
|
|
||||||
.macro ARGBto565PixelAlpha_1pixel_opaque s, d, rbmask
|
|
||||||
and d, rbmask, s, lsr #3
|
|
||||||
and s, s, #0xfc00
|
|
||||||
orr d, d, d, lsr #5
|
|
||||||
orr d, d, s, lsr #5
|
|
||||||
.endm
|
|
||||||
|
|
||||||
/* On entry:
|
|
||||||
* s1, s2 hold 2 32bpp source pixels
|
|
||||||
* d holds 2 16bpp destination pixels
|
|
||||||
* rbmask, rbhalf, ghalf hold 0x001f001f, 0x00100010, 0x00000200 respectively
|
|
||||||
* other registers are temporaries
|
|
||||||
* On exit:
|
|
||||||
* Constant registers preserved
|
|
||||||
* Blended results have been written through destination pointer
|
|
||||||
*/
|
|
||||||
|
|
||||||
.macro ARGBto565PixelAlpha_2pixels_translucent s1, s2, d, rbmask, rbhalf, ghalf, alpha, rb, g, misc
|
|
||||||
mov alpha, s1, lsr #27
|
|
||||||
and misc, s1, #0xfc00
|
|
||||||
and g, d, #0x07e0
|
|
||||||
pkhbt rb, d, d, lsl #5
|
|
||||||
rsb misc, g, misc, lsr #5
|
|
||||||
and s1, rbmask, s1, lsr #3
|
|
||||||
and rb, rbmask, rb
|
|
||||||
sub s1, s1, rb
|
|
||||||
smlabb misc, misc, alpha, ghalf
|
|
||||||
mla s1, s1, alpha, rbhalf
|
|
||||||
uxth d, d, ror #16
|
|
||||||
add misc, misc, misc, lsl #5
|
|
||||||
mov alpha, s2, lsr #27
|
|
||||||
add g, g, misc, asr #10
|
|
||||||
add s1, s1, s1, lsl #5
|
|
||||||
and g, g, #0x07e0
|
|
||||||
add rb, rb, s1, asr #10
|
|
||||||
and rb, rb, rbmask
|
|
||||||
and misc, s2, #0xfc00
|
|
||||||
pkhbt rb, rb, rb, lsl #11
|
|
||||||
and s1, d, #0x07e0
|
|
||||||
pkhbt d, d, d, lsl #5
|
|
||||||
rsb misc, s1, misc, lsr #5
|
|
||||||
and s2, rbmask, s2, lsr #3
|
|
||||||
and d, rbmask, d
|
|
||||||
sub s2, s2, d
|
|
||||||
smlabb misc, misc, alpha, ghalf
|
|
||||||
mla s2, s2, alpha, rbhalf
|
|
||||||
orr alpha, rb, g
|
|
||||||
add misc, misc, misc, lsl #5
|
|
||||||
orr alpha, alpha, rb, lsr #16
|
|
||||||
add s1, s1, misc, asr #10
|
|
||||||
add s2, s2, s2, lsl #5
|
|
||||||
and s1, s1, #0x07e0
|
|
||||||
add d, d, s2, asr #10
|
|
||||||
and d, d, rbmask
|
|
||||||
strh alpha, [DST, #-4]
|
|
||||||
pkhbt d, d, d, lsl #11
|
|
||||||
orr alpha, d, s1
|
|
||||||
orr alpha, alpha, d, lsr #16
|
|
||||||
strh alpha, [DST, #-2]
|
|
||||||
.endm
|
|
||||||
|
|
||||||
/* On entry:
|
|
||||||
* s1, s2 hold 2 32bpp source pixels
|
|
||||||
* rbmask holds 0x001f001f
|
|
||||||
* other registers are temporaries
|
|
||||||
* On exit:
|
|
||||||
* Constant registers preserved
|
|
||||||
* Blended results have been written through destination pointer
|
|
||||||
*/
|
|
||||||
|
|
||||||
.macro ARGBto565PixelAlpha_2pixels_opaque s1, s2, d, rbmask, g
|
|
||||||
and g, s1, #0xfc00
|
|
||||||
and d, rbmask, s1, lsr #3
|
|
||||||
and s1, rbmask, s2, lsr #3
|
|
||||||
orr d, d, d, lsr #5
|
|
||||||
orr d, d, g, lsr #5
|
|
||||||
and g, s2, #0xfc00
|
|
||||||
strh d, [DST, #-4]
|
|
||||||
orr s1, s1, s1, lsr #5
|
|
||||||
orr s1, s1, g, lsr #5
|
|
||||||
strh s1, [DST, #-2]
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro ARGBto565PixelAlpha_2pixels_head
|
|
||||||
ldrd WK0, WK1, [SRC], #8
|
|
||||||
ldr WK2, [DST], #4
|
|
||||||
orr SCRATCH, WK0, WK1
|
|
||||||
and ORIG_W, WK0, WK1
|
|
||||||
tst SCRATCH, #0xff000000
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro ARGBto565PixelAlpha_2pixels_tail
|
|
||||||
beq 20f @ all transparent
|
|
||||||
cmp ORIG_W, #0xff000000
|
|
||||||
bhs 10f @ all opaque
|
|
||||||
ARGBto565PixelAlpha_2pixels_translucent WK0, WK1, WK2, MASK, STRIDE_M, STRIDE_S, STRIDE_D, WK3, SCRATCH, ORIG_W
|
|
||||||
b 20f
|
|
||||||
10: ARGBto565PixelAlpha_2pixels_opaque WK0, WK1, WK2, MASK, SCRATCH
|
|
||||||
20:
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro ARGBto565PixelAlpha_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
|
|
||||||
.if numbytes == 16
|
|
||||||
ARGBto565PixelAlpha_2pixels_head
|
|
||||||
ARGBto565PixelAlpha_2pixels_tail
|
|
||||||
ARGBto565PixelAlpha_2pixels_head
|
|
||||||
ARGBto565PixelAlpha_2pixels_tail
|
|
||||||
.endif
|
|
||||||
.if numbytes >= 8
|
|
||||||
ARGBto565PixelAlpha_2pixels_head
|
|
||||||
ARGBto565PixelAlpha_2pixels_tail
|
|
||||||
.endif
|
|
||||||
.if numbytes >= 4
|
|
||||||
ARGBto565PixelAlpha_2pixels_head
|
|
||||||
.else // numbytes == 2
|
|
||||||
ldr WK0, [SRC], #4
|
|
||||||
ldrh WK2, [DST], #2
|
|
||||||
tst WK0, #0xff000000
|
|
||||||
.endif
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro ARGBto565PixelAlpha_process_tail cond, numbytes, firstreg
|
|
||||||
.if numbytes >= 4
|
|
||||||
ARGBto565PixelAlpha_2pixels_tail
|
|
||||||
.else // numbytes == 2
|
|
||||||
beq 20f @ all transparent
|
|
||||||
cmp WK0, #0xff000000
|
|
||||||
bhs 10f @ opaque
|
|
||||||
ARGBto565PixelAlpha_1pixel_translucent WK0, WK2, MASK, STRIDE_M, STRIDE_S, STRIDE_D, WK3, SCRATCH, ORIG_W
|
|
||||||
b 19f
|
|
||||||
10: ARGBto565PixelAlpha_1pixel_opaque WK0, WK2, MASK
|
|
||||||
19: strh WK2, [DST, #-2]
|
|
||||||
20:
|
|
||||||
.endif
|
|
||||||
.endm
|
|
||||||
|
|
||||||
generate_composite_function \
|
|
||||||
BlitARGBto565PixelAlphaARMSIMDAsm, 32, 0, 16, \
|
|
||||||
FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_WK0, \
|
|
||||||
2, /* prefetch distance */ \
|
|
||||||
ARGBto565PixelAlpha_init, \
|
|
||||||
ARGBto565PixelAlpha_newline, \
|
|
||||||
nop_macro, /* cleanup */ \
|
|
||||||
ARGBto565PixelAlpha_process_head, \
|
|
||||||
ARGBto565PixelAlpha_process_tail
|
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
|
|
||||||
.macro BGR888toRGB888_1pixel cond, reg, tmp
|
|
||||||
uxtb16&cond tmp, WK®, ror #8
|
|
||||||
uxtb16&cond WK®, WK®, ror #16
|
|
||||||
orr&cond WK®, WK®, tmp, lsl #8
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro BGR888toRGB888_2pixels cond, reg1, reg2, tmp1, tmp2
|
|
||||||
uxtb16&cond tmp1, WK®1, ror #8
|
|
||||||
uxtb16&cond WK®1, WK®1, ror #16
|
|
||||||
uxtb16&cond tmp2, WK®2, ror #8
|
|
||||||
uxtb16&cond WK®2, WK®2, ror #16
|
|
||||||
orr&cond WK®1, WK®1, tmp1, lsl #8
|
|
||||||
orr&cond WK®2, WK®2, tmp2, lsl #8
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro BGR888toRGB888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
|
|
||||||
pixld cond, numbytes, firstreg, SRC, unaligned_src
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro BGR888toRGB888_process_tail cond, numbytes, firstreg
|
|
||||||
.if numbytes >= 8
|
|
||||||
BGR888toRGB888_2pixels cond, %(firstreg+0), %(firstreg+1), MASK, STRIDE_M
|
|
||||||
.if numbytes == 16
|
|
||||||
BGR888toRGB888_2pixels cond, %(firstreg+2), %(firstreg+3), MASK, STRIDE_M
|
|
||||||
.endif
|
|
||||||
.else @ numbytes == 4
|
|
||||||
BGR888toRGB888_1pixel cond, %(firstreg+0), MASK
|
|
||||||
.endif
|
|
||||||
.endm
|
|
||||||
|
|
||||||
generate_composite_function \
|
|
||||||
Blit_XBGR8888_XRGB8888ARMSIMDAsm, 32, 0, 32, \
|
|
||||||
FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_SCRATCH, \
|
|
||||||
2, /* prefetch distance */ \
|
|
||||||
nop_macro, /* init */ \
|
|
||||||
nop_macro, /* newline */ \
|
|
||||||
nop_macro, /* cleanup */ \
|
|
||||||
BGR888toRGB888_process_head, \
|
|
||||||
BGR888toRGB888_process_tail
|
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
|
|
||||||
.macro RGB444toRGB888_init
|
|
||||||
ldr MASK, =0x0f0f0f0f
|
|
||||||
/* Set GE[3:0] to 0101 so SEL instructions do what we want */
|
|
||||||
msr CPSR_s, #0x50000
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro RGB444toRGB888_1pixel reg, mask, tmp
|
|
||||||
pkhbt WK®, WK®, WK®, lsl #12 @ 0000aaaarrrrggggaaaarrrrggggbbbb
|
|
||||||
and WK®, mask, WK® @ 0000aaaa0000gggg0000rrrr0000bbbb
|
|
||||||
orr WK®, WK®, WK®, lsl #4 @ aaaaaaaaggggggggrrrrrrrrbbbbbbbb
|
|
||||||
pkhtb tmp, WK®, WK®, asr #8 @ aaaaaaaaggggggggggggggggrrrrrrrr
|
|
||||||
pkhbt WK®, WK®, WK®, lsl #8 @ ggggggggrrrrrrrrrrrrrrrrbbbbbbbb
|
|
||||||
sel WK®, WK®, tmp @ aaaaaaaarrrrrrrrggggggggbbbbbbbb
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro RGB444toRGB888_2pixels in, out1, out2, mask, tmp1, tmp2
|
|
||||||
and tmp1, mask, WK&in @ 0000RRRR0000BBBB0000rrrr0000bbbb
|
|
||||||
and tmp2, mask, WK&in, lsr #4 @ 0000AAAA0000GGGG0000aaaa0000gggg
|
|
||||||
orr tmp1, tmp1, tmp1, lsl #4 @ RRRRRRRRBBBBBBBBrrrrrrrrbbbbbbbb
|
|
||||||
orr tmp2, tmp2, tmp2, lsl #4 @ AAAAAAAAGGGGGGGGaaaaaaaagggggggg
|
|
||||||
pkhtb WK&out2, tmp2, tmp1, asr #16 @ AAAAAAAAGGGGGGGGRRRRRRRRBBBBBBBB
|
|
||||||
pkhbt WK&out1, tmp1, tmp2, lsl #16 @ aaaaaaaaggggggggrrrrrrrrbbbbbbbb
|
|
||||||
pkhtb tmp2, WK&out2, WK&out2, asr #8 @ AAAAAAAAGGGGGGGGGGGGGGGGRRRRRRRR
|
|
||||||
pkhtb tmp1, WK&out1, WK&out1, asr #8 @ aaaaaaaaggggggggggggggggrrrrrrrr
|
|
||||||
pkhbt WK&out1, WK&out1, WK&out1, lsl #8 @ ggggggggrrrrrrrrrrrrrrrrbbbbbbbb
|
|
||||||
pkhbt WK&out2, WK&out2, WK&out2, lsl #8 @ GGGGGGGGRRRRRRRRRRRRRRRRBBBBBBBB
|
|
||||||
sel WK&out1, WK&out1, tmp1 @ aaaaaaaarrrrrrrrggggggggbbbbbbbb
|
|
||||||
sel WK&out2, WK&out2, tmp2 @ AAAAAAAARRRRRRRRGGGGGGGGBBBBBBBB
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro RGB444toRGB888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
|
|
||||||
pixld cond, numbytes/2, firstreg, SRC, unaligned_src
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro RGB444toRGB888_process_tail cond, numbytes, firstreg
|
|
||||||
.if numbytes >= 8
|
|
||||||
.if numbytes == 16
|
|
||||||
RGB444toRGB888_2pixels %(firstreg+1), %(firstreg+2), %(firstreg+3), MASK, STRIDE_M, SCRATCH
|
|
||||||
.endif
|
|
||||||
RGB444toRGB888_2pixels %(firstreg+0), %(firstreg+0), %(firstreg+1), MASK, STRIDE_M, SCRATCH
|
|
||||||
.else @ numbytes == 4
|
|
||||||
RGB444toRGB888_1pixel %(firstreg+0), MASK, SCRATCH
|
|
||||||
.endif
|
|
||||||
.endm
|
|
||||||
|
|
||||||
generate_composite_function \
|
|
||||||
Blit_RGB444_XRGB8888ARMSIMDAsm, 16, 0, 32, \
|
|
||||||
FLAG_DST_WRITEONLY | FLAG_BRANCH_OVER, \
|
|
||||||
2, /* prefetch distance */ \
|
|
||||||
RGB444toRGB888_init, \
|
|
||||||
nop_macro, /* newline */ \
|
|
||||||
nop_macro, /* cleanup */ \
|
|
||||||
RGB444toRGB888_process_head, \
|
|
||||||
RGB444toRGB888_process_tail
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue