From 6608bd7c78bd1fec5a856e9dcc50d62375c54c93 Mon Sep 17 00:00:00 2001 From: David McPaul Date: Sun, 15 Mar 2009 01:34:21 +0000 Subject: [PATCH] sync with ffmpeg 0.5 release git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@29534 a95241bf-73f2-0310-859d-f6bbb57e9c96 --- .../media/plugins/avcodec/libswscale/Jamfile | 3 +- .../plugins/avcodec/libswscale/cs_test.c | 175 -- .../plugins/avcodec/libswscale/rgb2rgb.c | 206 +- .../plugins/avcodec/libswscale/rgb2rgb.h | 69 +- .../avcodec/libswscale/rgb2rgb_template.c | 488 ++-- .../avcodec/libswscale/swscale-example.c | 229 -- .../plugins/avcodec/libswscale/swscale.c | 1230 ++++++---- .../plugins/avcodec/libswscale/swscale.h | 130 +- .../libswscale/swscale_altivec_template.c | 10 +- .../avcodec/libswscale/swscale_avoption.c | 5 +- .../plugins/avcodec/libswscale/swscale_bfin.c | 15 +- .../avcodec/libswscale/swscale_internal.h | 75 +- .../avcodec/libswscale/swscale_template.c | 2078 ++++++++--------- .../plugins/avcodec/libswscale/yuv2rgb.c | 1099 ++++----- .../avcodec/libswscale/yuv2rgb_altivec.c | 43 +- .../plugins/avcodec/libswscale/yuv2rgb_bfin.c | 21 +- .../plugins/avcodec/libswscale/yuv2rgb_mlib.c | 2 +- .../avcodec/libswscale/yuv2rgb_template.c | 343 +-- .../plugins/avcodec/libswscale/yuv2rgb_vis.c | 12 +- 19 files changed, 2804 insertions(+), 3429 deletions(-) delete mode 100644 src/add-ons/media/plugins/avcodec/libswscale/cs_test.c delete mode 100644 src/add-ons/media/plugins/avcodec/libswscale/swscale-example.c diff --git a/src/add-ons/media/plugins/avcodec/libswscale/Jamfile b/src/add-ons/media/plugins/avcodec/libswscale/Jamfile index 10267c0230..d2cea3e719 100644 --- a/src/add-ons/media/plugins/avcodec/libswscale/Jamfile +++ b/src/add-ons/media/plugins/avcodec/libswscale/Jamfile @@ -1,5 +1,6 @@ SubDir HAIKU_TOP src add-ons media plugins avcodec libswscale ; +SubDirHdrs [ FDirName $(SUBDIR) .. ] ; SubDirHdrs [ FDirName $(SUBDIR) ../libavutil ] ; SubDirHdrs [ FDirName $(SUBDIR) ../libavcodec ] ; @@ -8,7 +9,7 @@ TARGET_WARNING_CCFLAGS = [ FFilter $(TARGET_WARNING_CCFLAGS) : -Wall -Wmissing-prototypes -Wsign-compare -Wpointer-arith ] ; SubDirCcFlags -fomit-frame-pointer -DPIC ; -SubDirCcFlags -DHAVE_AV_CONFIG_H=1 ; +#SubDirCcFlags -DHAVE_AV_CONFIG_H=1 ; StaticLibrary libswscale.a : rgb2rgb.c diff --git a/src/add-ons/media/plugins/avcodec/libswscale/cs_test.c b/src/add-ons/media/plugins/avcodec/libswscale/cs_test.c deleted file mode 100644 index d49a60582f..0000000000 --- a/src/add-ons/media/plugins/avcodec/libswscale/cs_test.c +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Copyright (C) 2002 Michael Niedermayer - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include -#include /* for memset() */ -#include -#include -#include - -#include "swscale.h" -#include "rgb2rgb.h" - -#define SIZE 1000 -#define srcByte 0x55 -#define dstByte 0xBB - -#define FUNC(s,d,n) {s,d,#n,n} - -static int cpu_caps; - -static char *args_parse(int argc, char *argv[]) -{ - int o; - - while ((o = getopt(argc, argv, "m23")) != -1) { - switch (o) { - case 'm': - cpu_caps |= SWS_CPU_CAPS_MMX; - break; - case '2': - cpu_caps |= SWS_CPU_CAPS_MMX2; - break; - case '3': - cpu_caps |= SWS_CPU_CAPS_3DNOW; - break; - default: - av_log(NULL, AV_LOG_ERROR, "Unknown option %c\n", o); - } - } - - return argv[optind]; -} - -int main(int argc, char **argv) -{ - int i, funcNum; - uint8_t *srcBuffer= (uint8_t*)av_malloc(SIZE); - uint8_t *dstBuffer= (uint8_t*)av_malloc(SIZE); - int failedNum=0; - int passedNum=0; - - av_log(NULL, AV_LOG_INFO, "memory corruption test ...\n"); - args_parse(argc, argv); - av_log(NULL, AV_LOG_INFO, "CPU capabilities forced to %x\n", cpu_caps); - sws_rgb2rgb_init(cpu_caps); - - for(funcNum=0; ; funcNum++){ - struct func_info_s { - int src_bpp; - int dst_bpp; - char *name; - void (*func)(const uint8_t *src, uint8_t *dst, long src_size); - } func_info[] = { - FUNC(2, 2, rgb15to16), - FUNC(2, 3, rgb15to24), - FUNC(2, 4, rgb15to32), - FUNC(2, 3, rgb16to24), - FUNC(2, 4, rgb16to32), - FUNC(3, 2, rgb24to15), - FUNC(3, 2, rgb24to16), - FUNC(3, 4, rgb24to32), - FUNC(4, 2, rgb32to15), - FUNC(4, 2, rgb32to16), - FUNC(4, 3, rgb32to24), - FUNC(2, 2, rgb16to15), - FUNC(2, 2, rgb15tobgr15), - FUNC(2, 2, rgb15tobgr16), - FUNC(2, 3, rgb15tobgr24), - FUNC(2, 4, rgb15tobgr32), - FUNC(2, 2, rgb16tobgr15), - FUNC(2, 2, rgb16tobgr16), - FUNC(2, 3, rgb16tobgr24), - FUNC(2, 4, rgb16tobgr32), - FUNC(3, 2, rgb24tobgr15), - FUNC(3, 2, rgb24tobgr16), - FUNC(3, 3, rgb24tobgr24), - FUNC(3, 4, rgb24tobgr32), - FUNC(4, 2, rgb32tobgr15), - FUNC(4, 2, rgb32tobgr16), - FUNC(4, 3, rgb32tobgr24), - FUNC(4, 4, rgb32tobgr32), - FUNC(0, 0, NULL) - }; - int width; - int failed=0; - int srcBpp=0; - int dstBpp=0; - - if (!func_info[funcNum].func) break; - - av_log(NULL, AV_LOG_INFO,"."); - memset(srcBuffer, srcByte, SIZE); - - for(width=63; width>0; width--){ - int dstOffset; - for(dstOffset=128; dstOffset<196; dstOffset+=4){ - int srcOffset; - memset(dstBuffer, dstByte, SIZE); - - for(srcOffset=128; srcOffset<196; srcOffset+=4){ - uint8_t *src= srcBuffer+srcOffset; - uint8_t *dst= dstBuffer+dstOffset; - char *name=NULL; - - if(failed) break; //don't fill the screen with shit ... - - srcBpp = func_info[funcNum].src_bpp; - dstBpp = func_info[funcNum].dst_bpp; - name = func_info[funcNum].name; - - func_info[funcNum].func(src, dst, width*srcBpp); - - if(!srcBpp) break; - - for(i=0; i #include "config.h" -#include "x86_cpu.h" -#include "bswap.h" +#include "libavutil/x86_cpu.h" +#include "libavutil/bswap.h" #include "rgb2rgb.h" #include "swscale.h" #include "swscale_internal.h" #define FAST_BGR2YV12 // use 7-bit instead of 15-bit coefficients -void (*rgb24to32)(const uint8_t *src, uint8_t *dst, long src_size); -void (*rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size); -void (*rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size); -void (*rgb32to24)(const uint8_t *src, uint8_t *dst, long src_size); +void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size); +void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size); +void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size); +void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size); void (*rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size); void (*rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size); void (*rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size); -void (*rgb15to24)(const uint8_t *src, uint8_t *dst, long src_size); +void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size); void (*rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size); void (*rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size); -void (*rgb16to24)(const uint8_t *src, uint8_t *dst, long src_size); +void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size); void (*rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size); -//void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size); void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size); -void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size); -void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size); +void (*rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size); +void (*rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size); void (*rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size); -//void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size); void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size); void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size); @@ -65,6 +63,9 @@ void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, long width, long height, long lumStride, long chromStride, long dstStride); +void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, + long width, long height, + long lumStride, long chromStride, long dstStride); void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, long width, long height, long lumStride, long chromStride, long srcStride); @@ -87,12 +88,13 @@ void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *sr long srcStride1, long srcStride2, long srcStride3, long dstStride); -#if defined(ARCH_X86) && defined(CONFIG_GPL) +#if ARCH_X86 && CONFIG_GPL DECLARE_ASM_CONST(8, uint64_t, mmx_null) = 0x0000000000000000ULL; DECLARE_ASM_CONST(8, uint64_t, mmx_one) = 0xFFFFFFFFFFFFFFFFULL; DECLARE_ASM_CONST(8, uint64_t, mask32b) = 0x000000FF000000FFULL; DECLARE_ASM_CONST(8, uint64_t, mask32g) = 0x0000FF000000FF00ULL; DECLARE_ASM_CONST(8, uint64_t, mask32r) = 0x00FF000000FF0000ULL; +DECLARE_ASM_CONST(8, uint64_t, mask32a) = 0xFF000000FF000000ULL; DECLARE_ASM_CONST(8, uint64_t, mask32) = 0x00FFFFFF00FFFFFFULL; DECLARE_ASM_CONST(8, uint64_t, mask3216br) = 0x00F800F800F800F8ULL; DECLARE_ASM_CONST(8, uint64_t, mask3216g) = 0x0000FC000000FC00ULL; @@ -121,22 +123,7 @@ DECLARE_ASM_CONST(8, uint64_t, blue_16mask) = 0x0000001f0000001fULL; DECLARE_ASM_CONST(8, uint64_t, red_15mask) = 0x00007c0000007c00ULL; DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL; DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL; - -#if 0 -static volatile uint64_t __attribute__((aligned(8))) b5Dither; -static volatile uint64_t __attribute__((aligned(8))) g5Dither; -static volatile uint64_t __attribute__((aligned(8))) g6Dither; -static volatile uint64_t __attribute__((aligned(8))) r5Dither; - -static uint64_t __attribute__((aligned(8))) dither4[2]={ - 0x0103010301030103LL, - 0x0200020002000200LL,}; - -static uint64_t __attribute__((aligned(8))) dither8[2]={ - 0x0602060206020602LL, - 0x0004000400040004LL,}; -#endif -#endif /* defined(ARCH_X86) */ +#endif /* ARCH_X86 */ #define RGB2YUV_SHIFT 8 #define BY ((int)( 0.098*(1< dst format: ABC */ -void palette8torgb24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette) +void palette8topacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette) { long i; -/* - Writes 1 byte too much and might cause alignment issues on some architectures? - for (i=0; i> 2; @@ -349,14 +275,14 @@ void rgb32tobgr24(const uint8_t *src, uint8_t *dst, long src_size) } } -void rgb24tobgr32(const uint8_t *src, uint8_t *dst, long src_size) +void rgb24to32(const uint8_t *src, uint8_t *dst, long src_size) { long i; for (i=0; 3*i BGR32 (= A,R,G,B) */ - dst[4*i + 0] = 0; + dst[4*i + 0] = 255; dst[4*i + 1] = src[3*i + 0]; dst[4*i + 2] = src[3*i + 1]; dst[4*i + 3] = src[3*i + 2]; @@ -364,7 +290,7 @@ void rgb24tobgr32(const uint8_t *src, uint8_t *dst, long src_size) dst[4*i + 0] = src[3*i + 2]; dst[4*i + 1] = src[3*i + 1]; dst[4*i + 2] = src[3*i + 0]; - dst[4*i + 3] = 0; + dst[4*i + 3] = 255; #endif } } @@ -380,7 +306,7 @@ void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size) register uint16_t bgr; bgr = *s++; #ifdef WORDS_BIGENDIAN - *d++ = 0; + *d++ = 255; *d++ = (bgr&0x1F)<<3; *d++ = (bgr&0x7E0)>>3; *d++ = (bgr&0xF800)>>8; @@ -388,12 +314,12 @@ void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size) *d++ = (bgr&0xF800)>>8; *d++ = (bgr&0x7E0)>>3; *d++ = (bgr&0x1F)<<3; - *d++ = 0; + *d++ = 255; #endif } } -void rgb16tobgr24(const uint8_t *src, uint8_t *dst, long src_size) +void rgb16to24(const uint8_t *src, uint8_t *dst, long src_size) { const uint16_t *end; uint8_t *d = dst; @@ -416,13 +342,8 @@ void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size) for (i=0; i>5; - b = (rgb&0xF800)>>11; - dst[2*i] = (b&0x1F) | ((g&0x3F)<<5) | ((r&0x1F)<<11); + unsigned rgb = ((const uint16_t*)src)[i]; + ((uint16_t*)dst)[i] = (rgb>>11) | (rgb&0x7E0) | (rgb<<11); } } @@ -433,13 +354,8 @@ void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size) for (i=0; i>5; - b = (rgb&0xF800)>>11; - dst[2*i] = (b&0x1F) | ((g&0x1F)<<5) | ((r&0x1F)<<10); + unsigned rgb = ((const uint16_t*)src)[i]; + ((uint16_t*)dst)[i] = (rgb>>11) | ((rgb&0x7C0)>>1) | ((rgb&0x1F)<<10); } } @@ -454,7 +370,7 @@ void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size) register uint16_t bgr; bgr = *s++; #ifdef WORDS_BIGENDIAN - *d++ = 0; + *d++ = 255; *d++ = (bgr&0x1F)<<3; *d++ = (bgr&0x3E0)>>2; *d++ = (bgr&0x7C00)>>7; @@ -462,12 +378,12 @@ void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size) *d++ = (bgr&0x7C00)>>7; *d++ = (bgr&0x3E0)>>2; *d++ = (bgr&0x1F)<<3; - *d++ = 0; + *d++ = 255; #endif } } -void rgb15tobgr24(const uint8_t *src, uint8_t *dst, long src_size) +void rgb15to24(const uint8_t *src, uint8_t *dst, long src_size) { const uint16_t *end; uint8_t *d = dst; @@ -490,13 +406,8 @@ void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size) for (i=0; i>5; - b = (rgb&0x7C00)>>10; - dst[2*i] = (b&0x1F) | ((g&0x3F)<<5) | ((r&0x1F)<<11); + unsigned rgb = ((const uint16_t*)src)[i]; + ((uint16_t*)dst)[i] = ((rgb&0x7C00)>>10) | ((rgb&0x3E0)<<1) | (rgb<<11); } } @@ -507,17 +418,14 @@ void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size) for (i=0; i>5; - b = (rgb&0x7C00)>>10; - dst[2*i] = (b&0x1F) | ((g&0x1F)<<5) | ((r&0x1F)<<10); + unsigned br; + unsigned rgb = ((const uint16_t*)src)[i]; + br = rgb&0x7c1F; + ((uint16_t*)dst)[i] = (br>>10) | (rgb&0x3E0) | (br<<10); } } -void rgb8tobgr8(const uint8_t *src, uint8_t *dst, long src_size) +void bgr8torgb8(const uint8_t *src, uint8_t *dst, long src_size) { long i; long num_pixels = src_size; diff --git a/src/add-ons/media/plugins/avcodec/libswscale/rgb2rgb.h b/src/add-ons/media/plugins/avcodec/libswscale/rgb2rgb.h index f2697c65d6..df912c8533 100644 --- a/src/add-ons/media/plugins/avcodec/libswscale/rgb2rgb.h +++ b/src/add-ons/media/plugins/avcodec/libswscale/rgb2rgb.h @@ -23,58 +23,56 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef FFMPEG_RGB2RGB_H -#define FFMPEG_RGB2RGB_H +#ifndef SWSCALE_RGB2RGB_H +#define SWSCALE_RGB2RGB_H #include /* A full collection of RGB to RGB(BGR) converters */ -extern void (*rgb24to32) (const uint8_t *src, uint8_t *dst, long src_size); -extern void (*rgb24to16) (const uint8_t *src, uint8_t *dst, long src_size); -extern void (*rgb24to15) (const uint8_t *src, uint8_t *dst, long src_size); -extern void (*rgb32to24) (const uint8_t *src, uint8_t *dst, long src_size); +extern void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size); +extern void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size); +extern void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size); +extern void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size); extern void (*rgb32to16) (const uint8_t *src, uint8_t *dst, long src_size); extern void (*rgb32to15) (const uint8_t *src, uint8_t *dst, long src_size); extern void (*rgb15to16) (const uint8_t *src, uint8_t *dst, long src_size); -extern void (*rgb15to24) (const uint8_t *src, uint8_t *dst, long src_size); +extern void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size); extern void (*rgb15to32) (const uint8_t *src, uint8_t *dst, long src_size); extern void (*rgb16to15) (const uint8_t *src, uint8_t *dst, long src_size); -extern void (*rgb16to24) (const uint8_t *src, uint8_t *dst, long src_size); +extern void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size); extern void (*rgb16to32) (const uint8_t *src, uint8_t *dst, long src_size); extern void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size); -extern void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size); -extern void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size); +extern void (*rgb24to16) (const uint8_t *src, uint8_t *dst, long src_size); +extern void (*rgb24to15) (const uint8_t *src, uint8_t *dst, long src_size); extern void (*rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size); extern void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size); extern void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size); -extern void rgb24tobgr32(const uint8_t *src, uint8_t *dst, long src_size); -extern void rgb32tobgr24(const uint8_t *src, uint8_t *dst, long src_size); -extern void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size); -extern void rgb16tobgr24(const uint8_t *src, uint8_t *dst, long src_size); -extern void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size); -extern void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size); -extern void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size); -extern void rgb15tobgr24(const uint8_t *src, uint8_t *dst, long src_size); -extern void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size); -extern void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size); -extern void rgb8tobgr8 (const uint8_t *src, uint8_t *dst, long src_size); +void rgb24to32 (const uint8_t *src, uint8_t *dst, long src_size); +void rgb32to24 (const uint8_t *src, uint8_t *dst, long src_size); +void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size); +void rgb16to24 (const uint8_t *src, uint8_t *dst, long src_size); +void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size); +void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size); +void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size); +void rgb15to24 (const uint8_t *src, uint8_t *dst, long src_size); +void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size); +void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size); +void bgr8torgb8 (const uint8_t *src, uint8_t *dst, long src_size); -extern void palette8torgb32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette); -extern void palette8tobgr32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette); -extern void palette8torgb24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette); -extern void palette8tobgr24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette); -extern void palette8torgb16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette); -extern void palette8tobgr16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette); -extern void palette8torgb15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette); -extern void palette8tobgr15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette); +void palette8topacked32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette); +void palette8topacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette); +void palette8torgb16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette); +void palette8tobgr16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette); +void palette8torgb15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette); +void palette8tobgr15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette); /** * Height should be a multiple of 2 and width should be a multiple of 16. * (If this is a problem for anyone then tell me, and I will fix it.) * Chrominance data is only taken from every second line, others are ignored. - * FIXME: Write HQ version. + * FIXME: Write high quality version. */ //void uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, @@ -109,11 +107,18 @@ extern void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_ long width, long height, long lumStride, long chromStride, long dstStride); +/** + * Width should be a multiple of 16. + */ +extern void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, + long width, long height, + long lumStride, long chromStride, long dstStride); + /** * Height should be a multiple of 2 and width should be a multiple of 2. * (If this is a problem for anyone then tell me, and I will fix it.) * Chrominance data is only taken from every second line, others are ignored. - * FIXME: Write HQ version. + * FIXME: Write high quality version. */ extern void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, long width, long height, @@ -139,4 +144,4 @@ extern void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint void sws_rgb2rgb_init(int flags); -#endif /* FFMPEG_RGB2RGB_H */ +#endif /* SWSCALE_RGB2RGB_H */ diff --git a/src/add-ons/media/plugins/avcodec/libswscale/rgb2rgb_template.c b/src/add-ons/media/plugins/avcodec/libswscale/rgb2rgb_template.c index ffbf2c734b..e95b628049 100644 --- a/src/add-ons/media/plugins/avcodec/libswscale/rgb2rgb_template.c +++ b/src/add-ons/media/plugins/avcodec/libswscale/rgb2rgb_template.c @@ -28,12 +28,6 @@ */ #include -#include /* for __WORDSIZE */ - -#ifndef __WORDSIZE -// #warning You have a misconfigured system and will probably lose performance! -#define __WORDSIZE MP_WORDSIZE -#endif #undef PREFETCH #undef MOVNTQ @@ -43,38 +37,33 @@ #undef PREFETCHW #undef PAVGB -#ifdef HAVE_SSE2 +#if HAVE_SSE2 #define MMREG_SIZE 16 #else #define MMREG_SIZE 8 #endif -#ifdef HAVE_3DNOW +#if HAVE_AMD3DNOW #define PREFETCH "prefetch" #define PREFETCHW "prefetchw" #define PAVGB "pavgusb" -#elif defined (HAVE_MMX2) +#elif HAVE_MMX2 #define PREFETCH "prefetchnta" #define PREFETCHW "prefetcht0" #define PAVGB "pavgb" #else -#ifdef __APPLE__ -#define PREFETCH "#" -#define PREFETCHW "#" -#else #define PREFETCH " # nop" #define PREFETCHW " # nop" #endif -#endif -#ifdef HAVE_3DNOW -/* On K6 femms is faster than emms. On K7 femms is directly mapped on emms. */ +#if HAVE_AMD3DNOW +/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */ #define EMMS "femms" #else #define EMMS "emms" #endif -#ifdef HAVE_MMX2 +#if HAVE_MMX2 #define MOVNTQ "movntq" #define SFENCE "sfence" #else @@ -82,22 +71,22 @@ #define SFENCE " # nop" #endif -static inline void RENAME(rgb24to32)(const uint8_t *src, uint8_t *dst, long src_size) +static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size) { uint8_t *dest = dst; const uint8_t *s = src; const uint8_t *end; - #ifdef HAVE_MMX + #if HAVE_MMX const uint8_t *mm_end; #endif end = s + src_size; - #ifdef HAVE_MMX - asm volatile(PREFETCH" %0"::"m"(*s):"memory"); + #if HAVE_MMX + __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); mm_end = end - 23; - asm volatile("movq %0, %%mm7"::"m"(mask32):"memory"); + __asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory"); while (s < mm_end) { - asm volatile( + __asm__ volatile( PREFETCH" 32%1 \n\t" "movd %1, %%mm0 \n\t" "punpckldq 3%1, %%mm0 \n\t" @@ -107,10 +96,10 @@ static inline void RENAME(rgb24to32)(const uint8_t *src, uint8_t *dst, long src_ "punpckldq 15%1, %%mm2 \n\t" "movd 18%1, %%mm3 \n\t" "punpckldq 21%1, %%mm3 \n\t" - "pand %%mm7, %%mm0 \n\t" - "pand %%mm7, %%mm1 \n\t" - "pand %%mm7, %%mm2 \n\t" - "pand %%mm7, %%mm3 \n\t" + "por %%mm7, %%mm0 \n\t" + "por %%mm7, %%mm1 \n\t" + "por %%mm7, %%mm2 \n\t" + "por %%mm7, %%mm3 \n\t" MOVNTQ" %%mm0, %0 \n\t" MOVNTQ" %%mm1, 8%0 \n\t" MOVNTQ" %%mm2, 16%0 \n\t" @@ -121,14 +110,14 @@ static inline void RENAME(rgb24to32)(const uint8_t *src, uint8_t *dst, long src_ dest += 32; s += 24; } - asm volatile(SFENCE:::"memory"); - asm volatile(EMMS:::"memory"); + __asm__ volatile(SFENCE:::"memory"); + __asm__ volatile(EMMS:::"memory"); #endif while (s < end) { #ifdef WORDS_BIGENDIAN /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */ - *dest++ = 0; + *dest++ = 255; *dest++ = s[2]; *dest++ = s[1]; *dest++ = s[0]; @@ -137,26 +126,26 @@ static inline void RENAME(rgb24to32)(const uint8_t *src, uint8_t *dst, long src_ *dest++ = *s++; *dest++ = *s++; *dest++ = *s++; - *dest++ = 0; + *dest++ = 255; #endif } } -static inline void RENAME(rgb32to24)(const uint8_t *src, uint8_t *dst, long src_size) +static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size) { uint8_t *dest = dst; const uint8_t *s = src; const uint8_t *end; -#ifdef HAVE_MMX +#if HAVE_MMX const uint8_t *mm_end; #endif end = s + src_size; -#ifdef HAVE_MMX - asm volatile(PREFETCH" %0"::"m"(*s):"memory"); +#if HAVE_MMX + __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); mm_end = end - 31; while (s < mm_end) { - asm volatile( + __asm__ volatile( PREFETCH" 32%1 \n\t" "movq %1, %%mm0 \n\t" "movq 8%1, %%mm1 \n\t" @@ -207,8 +196,8 @@ static inline void RENAME(rgb32to24)(const uint8_t *src, uint8_t *dst, long src_ dest += 24; s += 32; } - asm volatile(SFENCE:::"memory"); - asm volatile(EMMS:::"memory"); + __asm__ volatile(SFENCE:::"memory"); + __asm__ volatile(EMMS:::"memory"); #endif while (s < end) { @@ -241,13 +230,13 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_ register const uint8_t *end; const uint8_t *mm_end; end = s + src_size; -#ifdef HAVE_MMX - asm volatile(PREFETCH" %0"::"m"(*s)); - asm volatile("movq %0, %%mm4"::"m"(mask15s)); +#if HAVE_MMX + __asm__ volatile(PREFETCH" %0"::"m"(*s)); + __asm__ volatile("movq %0, %%mm4"::"m"(mask15s)); mm_end = end - 15; while (s>7; *d++ = (bgr&0x3E0)>>2; *d++ = (bgr&0x1F)<<3; @@ -1284,7 +1273,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_ *d++ = (bgr&0x1F)<<3; *d++ = (bgr&0x3E0)>>2; *d++ = (bgr&0x7C00)>>7; - *d++ = 0; + *d++ = 255; #endif #endif @@ -1294,19 +1283,19 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size) { const uint16_t *end; -#ifdef HAVE_MMX +#if HAVE_MMX const uint16_t *mm_end; #endif uint8_t *d = dst; const uint16_t *s = (const uint16_t*)src; end = s + src_size/2; -#ifdef HAVE_MMX - asm volatile(PREFETCH" %0"::"m"(*s):"memory"); - asm volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); +#if HAVE_MMX + __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); + __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); mm_end = end - 3; while (s < mm_end) { - asm volatile( + __asm__ volatile( PREFETCH" 32%1 \n\t" "movq %1, %%mm0 \n\t" "movq %1, %%mm1 \n\t" @@ -1342,15 +1331,15 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_ d += 16; s += 4; } - asm volatile(SFENCE:::"memory"); - asm volatile(EMMS:::"memory"); + __asm__ volatile(SFENCE:::"memory"); + __asm__ volatile(EMMS:::"memory"); #endif while (s < end) { register uint16_t bgr; bgr = *s++; #ifdef WORDS_BIGENDIAN - *d++ = 0; + *d++ = 255; *d++ = (bgr&0xF800)>>8; *d++ = (bgr&0x7E0)>>3; *d++ = (bgr&0x1F)<<3; @@ -1358,7 +1347,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_ *d++ = (bgr&0x1F)<<3; *d++ = (bgr&0x7E0)>>3; *d++ = (bgr&0xF800)>>8; - *d++ = 0; + *d++ = 255; #endif } } @@ -1368,8 +1357,8 @@ static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long s long idx = 15 - src_size; const uint8_t *s = src-idx; uint8_t *d = dst-idx; -#ifdef HAVE_MMX - asm volatile( +#if HAVE_MMX + __asm__ volatile( "test %0, %0 \n\t" "jns 2f \n\t" PREFETCH" (%1, %0) \n\t" @@ -1382,7 +1371,7 @@ static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long s PREFETCH" 32(%1, %0) \n\t" "movq (%1, %0), %%mm0 \n\t" "movq 8(%1, %0), %%mm1 \n\t" -# ifdef HAVE_MMX2 +# if HAVE_MMX2 "pshufw $177, %%mm0, %%mm3 \n\t" "pshufw $177, %%mm1, %%mm5 \n\t" "pand %%mm7, %%mm0 \n\t" @@ -1430,9 +1419,9 @@ static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long s static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size) { unsigned i; -#ifdef HAVE_MMX +#if HAVE_MMX long mmx_size= 23 - src_size; - asm volatile ( + __asm__ volatile ( "test %%"REG_a", %%"REG_a" \n\t" "jns 2f \n\t" "movq "MANGLE(mask24r)", %%mm5 \n\t" @@ -1476,8 +1465,8 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long s : "r" (src-mmx_size), "r"(dst-mmx_size) ); - asm volatile(SFENCE:::"memory"); - asm volatile(EMMS:::"memory"); + __asm__ volatile(SFENCE:::"memory"); + __asm__ volatile(EMMS:::"memory"); if (mmx_size==23) return; //finished, was multiple of 8 @@ -1505,9 +1494,9 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u const long chromWidth= width>>1; for (y=0; y= 64 +#elif HAVE_FAST_64BIT int i; uint64_t *ldst = (uint64_t *) dst; const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; @@ -1630,8 +1619,8 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u ysrc += lumStride; dst += dstStride; } -#ifdef HAVE_MMX -asm( EMMS" \n\t" +#if HAVE_MMX +__asm__( EMMS" \n\t" SFENCE" \n\t" :::"memory"); #endif @@ -1657,9 +1646,9 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u const long chromWidth= width>>1; for (y=0; yyuy2 -#if __WORDSIZE >= 64 +#if HAVE_FAST_64BIT int i; uint64_t *ldst = (uint64_t *) dst; const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; @@ -1736,8 +1725,8 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u ysrc += lumStride; dst += dstStride; } -#ifdef HAVE_MMX -asm( EMMS" \n\t" +#if HAVE_MMX +__asm__( EMMS" \n\t" SFENCE" \n\t" :::"memory"); #endif @@ -1755,6 +1744,16 @@ static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2); } +/** + * Width should be a multiple of 16. + */ +static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, + long width, long height, + long lumStride, long chromStride, long dstStride) +{ + RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1); +} + /** * Width should be a multiple of 16. */ @@ -1777,8 +1776,8 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t const long chromWidth= width>>1; for (y=0; y>1; for (y=0; y>1; -#ifdef HAVE_MMX +#if HAVE_MMX for (y=0; y>1); uint8_t* d=dst1+dstStride1*y; x=0; -#ifdef HAVE_MMX +#if HAVE_MMX for (;x>1); uint8_t* d=dst2+dstStride2*y; x=0; -#ifdef HAVE_MMX +#if HAVE_MMX for (;x>2); uint8_t* d=dst+dstStride*y; x=0; -#ifdef HAVE_MMX +#if HAVE_MMX for (;x - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include -#include -#include -#include -#include - -#undef HAVE_AV_CONFIG_H -#include "libavutil/avutil.h" -#include "swscale.h" -#include "swscale_internal.h" -#include "rgb2rgb.h" - -static uint64_t getSSD(uint8_t *src1, uint8_t *src2, int stride1, int stride2, int w, int h){ - int x,y; - uint64_t ssd=0; - -//printf("%d %d\n", w, h); - - for (y=0; y src -> dst -> out & compare out against ref -// ref & out are YV12 -static int doTest(uint8_t *ref[3], int refStride[3], int w, int h, int srcFormat, int dstFormat, - int srcW, int srcH, int dstW, int dstH, int flags){ - uint8_t *src[3]; - uint8_t *dst[3]; - uint8_t *out[3]; - int srcStride[3], dstStride[3]; - int i; - uint64_t ssdY, ssdU, ssdV; - struct SwsContext *srcContext, *dstContext, *outContext; - int res; - - res = 0; - for (i=0; i<3; i++){ - // avoid stride % bpp != 0 - if (srcFormat==PIX_FMT_RGB24 || srcFormat==PIX_FMT_BGR24) - srcStride[i]= srcW*3; - else - srcStride[i]= srcW*4; - - if (dstFormat==PIX_FMT_RGB24 || dstFormat==PIX_FMT_BGR24) - dstStride[i]= dstW*3; - else - dstStride[i]= dstW*4; - - src[i]= (uint8_t*) malloc(srcStride[i]*srcH); - dst[i]= (uint8_t*) malloc(dstStride[i]*dstH); - out[i]= (uint8_t*) malloc(refStride[i]*h); - if (!src[i] || !dst[i] || !out[i]) { - perror("Malloc"); - res = -1; - - goto end; - } - } - - dstContext = outContext = NULL; - srcContext= sws_getContext(w, h, PIX_FMT_YUV420P, srcW, srcH, srcFormat, flags, NULL, NULL, NULL); - if (!srcContext) { - fprintf(stderr, "Failed to get %s ---> %s\n", - sws_format_name(PIX_FMT_YUV420P), - sws_format_name(srcFormat)); - res = -1; - - goto end; - } - dstContext= sws_getContext(srcW, srcH, srcFormat, dstW, dstH, dstFormat, flags, NULL, NULL, NULL); - if (!dstContext) { - fprintf(stderr, "Failed to get %s ---> %s\n", - sws_format_name(srcFormat), - sws_format_name(dstFormat)); - res = -1; - - goto end; - } - outContext= sws_getContext(dstW, dstH, dstFormat, w, h, PIX_FMT_YUV420P, flags, NULL, NULL, NULL); - if (!outContext) { - fprintf(stderr, "Failed to get %s ---> %s\n", - sws_format_name(dstFormat), - sws_format_name(PIX_FMT_YUV420P)); - res = -1; - - goto end; - } -// printf("test %X %X %X -> %X %X %X\n", (int)ref[0], (int)ref[1], (int)ref[2], -// (int)src[0], (int)src[1], (int)src[2]); - - sws_scale(srcContext, ref, refStride, 0, h , src, srcStride); - sws_scale(dstContext, src, srcStride, 0, srcH, dst, dstStride); - sws_scale(outContext, dst, dstStride, 0, dstH, out, refStride); - -#if defined(ARCH_X86) - asm volatile ("emms\n\t"); -#endif - - ssdY= getSSD(ref[0], out[0], refStride[0], refStride[0], w, h); - ssdU= getSSD(ref[1], out[1], refStride[1], refStride[1], (w+1)>>1, (h+1)>>1); - ssdV= getSSD(ref[2], out[2], refStride[2], refStride[2], (w+1)>>1, (h+1)>>1); - - if (srcFormat == PIX_FMT_GRAY8 || dstFormat==PIX_FMT_GRAY8) ssdU=ssdV=0; //FIXME check that output is really gray - - ssdY/= w*h; - ssdU/= w*h/4; - ssdV/= w*h/4; - - if (ssdY>100 || ssdU>100 || ssdV>100){ - printf(" %s %dx%d -> %s %4dx%4d flags=%2d SSD=%5lld,%5lld,%5lld\n", - sws_format_name(srcFormat), srcW, srcH, - sws_format_name(dstFormat), dstW, dstH, - flags, - ssdY, ssdU, ssdV); - } - - end: - - sws_freeContext(srcContext); - sws_freeContext(dstContext); - sws_freeContext(outContext); - - for (i=0; i<3; i++){ - free(src[i]); - free(dst[i]); - free(out[i]); - } - - return res; -} - -void fast_memcpy(void *a, void *b, int s){ //FIXME - memcpy(a, b, s); -} - -static void selfTest(uint8_t *src[3], int stride[3], int w, int h){ - enum PixelFormat srcFormat, dstFormat; - int srcW, srcH, dstW, dstH; - int flags; - - for (srcFormat = 0; srcFormat < PIX_FMT_NB; srcFormat++) { - for (dstFormat = 0; dstFormat < PIX_FMT_NB; dstFormat++) { - printf("%s -> %s\n", - sws_format_name(srcFormat), - sws_format_name(dstFormat)); - - srcW= w; - srcH= h; - for (dstW=w - w/3; dstW<= 4*w/3; dstW+= w/3){ - for (dstH=h - h/3; dstH<= 4*h/3; dstH+= h/3){ - for (flags=1; flags<33; flags*=2) { - int res; - - res = doTest(src, stride, w, h, srcFormat, dstFormat, - srcW, srcH, dstW, dstH, flags); - if (res < 0) { - dstW = 4 * w / 3; - dstH = 4 * h / 3; - flags = 33; - } - } - } - } - } - } -} - -#define W 96 -#define H 96 - -int main(int argc, char **argv){ - uint8_t *rgb_data = malloc (W*H*4); - uint8_t *rgb_src[3]= {rgb_data, NULL, NULL}; - int rgb_stride[3]={4*W, 0, 0}; - uint8_t *data = malloc (3*W*H); - uint8_t *src[3]= {data, data+W*H, data+W*H*2}; - int stride[3]={W, W, W}; - int x, y; - struct SwsContext *sws; - - sws= sws_getContext(W/12, H/12, PIX_FMT_RGB32, W, H, PIX_FMT_YUV420P, 2, NULL, NULL, NULL); - - for (y=0; y YV12 untested special converters - YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be ok) + YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK) YV12/I420 -> YV12/I420 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format BGR24 -> BGR32 & RGB24 -> RGB32 @@ -54,6 +54,7 @@ untested special converters BGR24 -> YV12 */ +#define _SVID_SOURCE //needed for MAP_ANONYMOUS #include #include #include @@ -61,7 +62,7 @@ untested special converters #include #include "config.h" #include -#ifdef HAVE_SYS_MMAN_H +#if HAVE_SYS_MMAN_H #include #if defined(MAP_ANON) && !defined(MAP_ANONYMOUS) #define MAP_ANONYMOUS MAP_ANON @@ -70,22 +71,27 @@ untested special converters #include "swscale.h" #include "swscale_internal.h" #include "rgb2rgb.h" -#include "x86_cpu.h" -#include "bswap.h" +#include "libavutil/x86_cpu.h" +#include "libavutil/bswap.h" + +unsigned swscale_version(void) +{ + return LIBSWSCALE_VERSION_INT; +} #undef MOVNTQ #undef PAVGB //#undef HAVE_MMX2 -//#define HAVE_3DNOW +//#define HAVE_AMD3DNOW //#undef HAVE_MMX //#undef ARCH_X86 //#define WORDS_BIGENDIAN #define DITHER1XBPP -#define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit +#define FAST_BGR2YV12 // use 7 bit coefficients instead of 15 bit -#define RET 0xC3 //near return opcode for X86 +#define RET 0xC3 //near return opcode for x86 #ifdef M_PI #define PI M_PI @@ -99,15 +105,18 @@ untested special converters || (x)==PIX_FMT_YUYV422 \ || (x)==PIX_FMT_UYVY422 \ || (x)==PIX_FMT_RGB32 \ + || (x)==PIX_FMT_RGB32_1 \ || (x)==PIX_FMT_BGR24 \ || (x)==PIX_FMT_BGR565 \ || (x)==PIX_FMT_BGR555 \ || (x)==PIX_FMT_BGR32 \ + || (x)==PIX_FMT_BGR32_1 \ || (x)==PIX_FMT_RGB24 \ || (x)==PIX_FMT_RGB565 \ || (x)==PIX_FMT_RGB555 \ || (x)==PIX_FMT_GRAY8 \ || (x)==PIX_FMT_YUV410P \ + || (x)==PIX_FMT_YUV440P \ || (x)==PIX_FMT_GRAY16BE \ || (x)==PIX_FMT_GRAY16LE \ || (x)==PIX_FMT_YUV444P \ @@ -119,6 +128,8 @@ untested special converters || (x)==PIX_FMT_BGR4_BYTE \ || (x)==PIX_FMT_RGB4_BYTE \ || (x)==PIX_FMT_YUV440P \ + || (x)==PIX_FMT_MONOWHITE \ + || (x)==PIX_FMT_MONOBLACK \ ) #define isSupportedOut(x) ( \ (x)==PIX_FMT_YUV420P \ @@ -135,6 +146,7 @@ untested special converters || (x)==PIX_FMT_GRAY16LE \ || (x)==PIX_FMT_GRAY8 \ || (x)==PIX_FMT_YUV410P \ + || (x)==PIX_FMT_YUV440P \ ) #define isPacked(x) ( \ (x)==PIX_FMT_PAL8 \ @@ -143,19 +155,37 @@ untested special converters || isRGB(x) \ || isBGR(x) \ ) +#define usePal(x) ( \ + (x)==PIX_FMT_PAL8 \ + || (x)==PIX_FMT_BGR4_BYTE \ + || (x)==PIX_FMT_RGB4_BYTE \ + || (x)==PIX_FMT_BGR8 \ + || (x)==PIX_FMT_RGB8 \ + ) -#define RGB2YUV_SHIFT 16 -#define BY ((int)( 0.098*(1<BGR scaler */ -#if defined(ARCH_X86) && defined (CONFIG_GPL) +#if ARCH_X86 && CONFIG_GPL DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL; DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL; DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL; @@ -182,11 +212,6 @@ DECLARE_ASM_CONST(8, uint64_t, bm00000111)=0x0000000000FFFFFFLL; DECLARE_ASM_CONST(8, uint64_t, bm11111000)=0xFFFFFFFFFF000000LL; DECLARE_ASM_CONST(8, uint64_t, bm01010101)=0x00FF00FF00FF00FFLL; -static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither; -static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither; -static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither; -static volatile uint64_t attribute_used __attribute__((aligned(8))) r5Dither; - const DECLARE_ALIGNED(8, uint64_t, ff_dither4[2]) = { 0x0103010301030103LL, 0x0200020002000200LL,}; @@ -218,18 +243,133 @@ DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00003831D0E6F6EAULL; DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL; DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL; DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; -#endif /* defined(ARCH_X86) */ + +DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL; +DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL; +DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL; +DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL; +DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL; + +DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV[2][4]) = { + {0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL}, + {0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL}, +}; + +DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL; + +#endif /* ARCH_X86 && CONFIG_GPL */ // clipping helper table for C implementations: static unsigned char clip_table[768]; static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b); -extern const uint8_t dither_2x2_4[2][8]; -extern const uint8_t dither_2x2_8[2][8]; -extern const uint8_t dither_8x8_32[8][8]; -extern const uint8_t dither_8x8_73[8][8]; -extern const uint8_t dither_8x8_220[8][8]; +static const uint8_t __attribute__((aligned(8))) dither_2x2_4[2][8]={ +{ 1, 3, 1, 3, 1, 3, 1, 3, }, +{ 2, 0, 2, 0, 2, 0, 2, 0, }, +}; + +static const uint8_t __attribute__((aligned(8))) dither_2x2_8[2][8]={ +{ 6, 2, 6, 2, 6, 2, 6, 2, }, +{ 0, 4, 0, 4, 0, 4, 0, 4, }, +}; + +const uint8_t __attribute__((aligned(8))) dither_8x8_32[8][8]={ +{ 17, 9, 23, 15, 16, 8, 22, 14, }, +{ 5, 29, 3, 27, 4, 28, 2, 26, }, +{ 21, 13, 19, 11, 20, 12, 18, 10, }, +{ 0, 24, 6, 30, 1, 25, 7, 31, }, +{ 16, 8, 22, 14, 17, 9, 23, 15, }, +{ 4, 28, 2, 26, 5, 29, 3, 27, }, +{ 20, 12, 18, 10, 21, 13, 19, 11, }, +{ 1, 25, 7, 31, 0, 24, 6, 30, }, +}; + +#if 0 +const uint8_t __attribute__((aligned(8))) dither_8x8_64[8][8]={ +{ 0, 48, 12, 60, 3, 51, 15, 63, }, +{ 32, 16, 44, 28, 35, 19, 47, 31, }, +{ 8, 56, 4, 52, 11, 59, 7, 55, }, +{ 40, 24, 36, 20, 43, 27, 39, 23, }, +{ 2, 50, 14, 62, 1, 49, 13, 61, }, +{ 34, 18, 46, 30, 33, 17, 45, 29, }, +{ 10, 58, 6, 54, 9, 57, 5, 53, }, +{ 42, 26, 38, 22, 41, 25, 37, 21, }, +}; +#endif + +const uint8_t __attribute__((aligned(8))) dither_8x8_73[8][8]={ +{ 0, 55, 14, 68, 3, 58, 17, 72, }, +{ 37, 18, 50, 32, 40, 22, 54, 35, }, +{ 9, 64, 5, 59, 13, 67, 8, 63, }, +{ 46, 27, 41, 23, 49, 31, 44, 26, }, +{ 2, 57, 16, 71, 1, 56, 15, 70, }, +{ 39, 21, 52, 34, 38, 19, 51, 33, }, +{ 11, 66, 7, 62, 10, 65, 6, 60, }, +{ 48, 30, 43, 25, 47, 29, 42, 24, }, +}; + +#if 0 +const uint8_t __attribute__((aligned(8))) dither_8x8_128[8][8]={ +{ 68, 36, 92, 60, 66, 34, 90, 58, }, +{ 20, 116, 12, 108, 18, 114, 10, 106, }, +{ 84, 52, 76, 44, 82, 50, 74, 42, }, +{ 0, 96, 24, 120, 6, 102, 30, 126, }, +{ 64, 32, 88, 56, 70, 38, 94, 62, }, +{ 16, 112, 8, 104, 22, 118, 14, 110, }, +{ 80, 48, 72, 40, 86, 54, 78, 46, }, +{ 4, 100, 28, 124, 2, 98, 26, 122, }, +}; +#endif + +#if 1 +const uint8_t __attribute__((aligned(8))) dither_8x8_220[8][8]={ +{117, 62, 158, 103, 113, 58, 155, 100, }, +{ 34, 199, 21, 186, 31, 196, 17, 182, }, +{144, 89, 131, 76, 141, 86, 127, 72, }, +{ 0, 165, 41, 206, 10, 175, 52, 217, }, +{110, 55, 151, 96, 120, 65, 162, 107, }, +{ 28, 193, 14, 179, 38, 203, 24, 189, }, +{138, 83, 124, 69, 148, 93, 134, 79, }, +{ 7, 172, 48, 213, 3, 168, 45, 210, }, +}; +#elif 1 +// tries to correct a gamma of 1.5 +const uint8_t __attribute__((aligned(8))) dither_8x8_220[8][8]={ +{ 0, 143, 18, 200, 2, 156, 25, 215, }, +{ 78, 28, 125, 64, 89, 36, 138, 74, }, +{ 10, 180, 3, 161, 16, 195, 8, 175, }, +{109, 51, 93, 38, 121, 60, 105, 47, }, +{ 1, 152, 23, 210, 0, 147, 20, 205, }, +{ 85, 33, 134, 71, 81, 30, 130, 67, }, +{ 14, 190, 6, 171, 12, 185, 5, 166, }, +{117, 57, 101, 44, 113, 54, 97, 41, }, +}; +#elif 1 +// tries to correct a gamma of 2.0 +const uint8_t __attribute__((aligned(8))) dither_8x8_220[8][8]={ +{ 0, 124, 8, 193, 0, 140, 12, 213, }, +{ 55, 14, 104, 42, 66, 19, 119, 52, }, +{ 3, 168, 1, 145, 6, 187, 3, 162, }, +{ 86, 31, 70, 21, 99, 39, 82, 28, }, +{ 0, 134, 11, 206, 0, 129, 9, 200, }, +{ 62, 17, 114, 48, 58, 16, 109, 45, }, +{ 5, 181, 2, 157, 4, 175, 1, 151, }, +{ 95, 36, 78, 26, 90, 34, 74, 24, }, +}; +#else +// tries to correct a gamma of 2.5 +const uint8_t __attribute__((aligned(8))) dither_8x8_220[8][8]={ +{ 0, 107, 3, 187, 0, 125, 6, 212, }, +{ 39, 7, 86, 28, 49, 11, 102, 36, }, +{ 1, 158, 0, 131, 3, 180, 1, 151, }, +{ 68, 19, 52, 12, 81, 25, 64, 17, }, +{ 0, 119, 5, 203, 0, 113, 4, 195, }, +{ 45, 9, 96, 33, 42, 8, 91, 30, }, +{ 2, 172, 1, 144, 2, 165, 0, 137, }, +{ 77, 23, 60, 15, 72, 21, 56, 14, }, +}; +#endif const char *sws_format_name(enum PixelFormat format) { @@ -312,6 +452,16 @@ const char *sws_format_name(enum PixelFormat format) return "nv21"; case PIX_FMT_YUV440P: return "yuv440p"; + case PIX_FMT_VDPAU_H264: + return "vdpau_h264"; + case PIX_FMT_VDPAU_MPEG1: + return "vdpau_mpeg1"; + case PIX_FMT_VDPAU_MPEG2: + return "vdpau_mpeg2"; + case PIX_FMT_VDPAU_WMV3: + return "vdpau_wmv3"; + case PIX_FMT_VDPAU_VC1: + return "vdpau_vc1"; default: return "Unknown format"; } @@ -321,7 +471,7 @@ static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW) { - //FIXME Optimize (just quickly writen not opti..) + //FIXME Optimize (just quickly written not optimized..) int i; for (i=0; i>1); i++){\ int j;\ int Y1 = 1<<18;\ @@ -425,6 +575,9 @@ static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFil Y2>>=19;\ U >>=19;\ V >>=19;\ + +#define YSCALE_YUV_2_PACKEDX_C(type) \ + YSCALE_YUV_2_PACKEDX_NOCLIP_C(type)\ if ((Y1|Y2|U|V)&256)\ {\ if (Y1>255) Y1=255; \ @@ -437,8 +590,70 @@ static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFil else if (V<0) V=0; \ } +#define YSCALE_YUV_2_PACKEDX_FULL_C \ + for (i=0; i>=10;\ + U >>=10;\ + V >>=10;\ + +#define YSCALE_YUV_2_RGBX_FULL_C(rnd) \ + YSCALE_YUV_2_PACKEDX_FULL_C\ + Y-= c->yuv2rgb_y_offset;\ + Y*= c->yuv2rgb_y_coeff;\ + Y+= rnd;\ + R= Y + V*c->yuv2rgb_v2r_coeff;\ + G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\ + B= Y + U*c->yuv2rgb_u2b_coeff;\ + if ((R|G|B)&(0xC0000000)){\ + if (R>=(256<<22)) R=(256<<22)-1; \ + else if (R<0)R=0; \ + if (G>=(256<<22)) G=(256<<22)-1; \ + else if (G<0)G=0; \ + if (B>=(256<<22)) B=(256<<22)-1; \ + else if (B<0)B=0; \ + }\ + + +#define YSCALE_YUV_2_GRAY16_C \ + for (i=0; i<(dstW>>1); i++){\ + int j;\ + int Y1 = 1<<18;\ + int Y2 = 1<<18;\ + int U = 1<<18;\ + int V = 1<<18;\ + \ + const int i2= 2*i;\ + \ + for (j=0; j>=11;\ + Y2>>=11;\ + if ((Y1|Y2|U|V)&65536)\ + {\ + if (Y1>65535) Y1=65535; \ + else if (Y1<0)Y1=0; \ + if (Y2>65535) Y2=65535; \ + else if (Y2<0)Y2=0; \ + } + #define YSCALE_YUV_2_RGBX_C(type) \ - YSCALE_YUV_2_PACKEDX_C(type) \ + YSCALE_YUV_2_PACKEDX_C(type) /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\ r = (type *)c->table_rV[V]; \ g = (type *)(c->table_gU[U] + c->table_gV[V]); \ b = (type *)c->table_bU[U]; \ @@ -451,6 +666,12 @@ static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFil int U= (uvbuf0[i ]*uvalpha1+uvbuf1[i ]*uvalpha)>>19; \ int V= (uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19; \ +#define YSCALE_YUV_2_GRAY16_2_C \ + for (i=0; i<(dstW>>1); i++){ \ + const int i2= 2*i; \ + int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>11; \ + int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11; \ + #define YSCALE_YUV_2_RGB2_C(type) \ YSCALE_YUV_2_PACKED2_C\ type *r, *b, *g;\ @@ -466,6 +687,12 @@ static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFil int U= (uvbuf1[i ])>>7;\ int V= (uvbuf1[i+VOFW])>>7;\ +#define YSCALE_YUV_2_GRAY16_1_C \ + for (i=0; i<(dstW>>1); i++){\ + const int i2= 2*i;\ + int Y1= buf0[i2 ]<<1;\ + int Y2= buf0[i2+1]<<1;\ + #define YSCALE_YUV_2_RGB1_C(type) \ YSCALE_YUV_2_PACKED1_C\ type *r, *b, *g;\ @@ -488,11 +715,63 @@ static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFil g = (type *)(c->table_gU[U] + c->table_gV[V]);\ b = (type *)c->table_bU[U];\ -#define YSCALE_YUV_2_ANYRGB_C(func, func2)\ +#define YSCALE_YUV_2_MONO2_C \ + const uint8_t * const d128=dither_8x8_220[y&7];\ + uint8_t *g= c->table_gU[128] + c->table_gV[128];\ + for (i=0; i>19) + d128[0]];\ + acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\ + acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\ + acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\ + acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\ + acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\ + acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\ + acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\ + ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\ + dest++;\ + }\ + + +#define YSCALE_YUV_2_MONOX_C \ + const uint8_t * const d128=dither_8x8_220[y&7];\ + uint8_t *g= c->table_gU[128] + c->table_gV[128];\ + int acc=0;\ + for (i=0; i>=19;\ + Y2>>=19;\ + if ((Y1|Y2)&256)\ + {\ + if (Y1>255) Y1=255;\ + else if (Y1<0)Y1=0;\ + if (Y2>255) Y2=255;\ + else if (Y2<0)Y2=0;\ + }\ + acc+= acc + g[Y1+d128[(i+0)&7]];\ + acc+= acc + g[Y2+d128[(i+1)&7]];\ + if ((i&7)==6){\ + ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\ + dest++;\ + }\ + } + + +#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\ switch(c->dstFormat)\ {\ case PIX_FMT_RGB32:\ case PIX_FMT_BGR32:\ + case PIX_FMT_RGB32_1:\ + case PIX_FMT_BGR32_1:\ func(uint32_t)\ ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\ ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\ @@ -584,67 +863,9 @@ static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFil }\ break;\ case PIX_FMT_MONOBLACK:\ + case PIX_FMT_MONOWHITE:\ {\ - const uint8_t * const d128=dither_8x8_220[y&7];\ - uint8_t *g= c->table_gU[128] + c->table_gV[128];\ - for (i=0; i>19) + d128[0]];\ - acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\ - acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\ - acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\ - acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\ - acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\ - acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\ - acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\ - ((uint8_t*)dest)[0]= acc;\ - dest++;\ - }\ -\ -/*\ -((uint8_t*)dest)-= dstW>>4;\ -{\ - int acc=0;\ - int left=0;\ - static int top[1024];\ - static int last_new[1024][1024];\ - static int last_in3[1024][1024];\ - static int drift[1024][1024];\ - int topLeft=0;\ - int shift=0;\ - int count=0;\ - const uint8_t * const d128=dither_8x8_220[y&7];\ - int error_new=0;\ - int error_in3=0;\ - int f=0;\ - \ - for (i=dstW>>1; i>19);\ - int in2 = (76309 * (in - 16) + 32768) >> 16;\ - int in3 = (in2 < 0) ? 0 : ((in2 > 255) ? 255 : in2);\ - int old= (left*7 + topLeft + top[i]*5 + top[i+1]*3)/20 + in3\ - + (last_new[y][i] - in3)*f/256;\ - int new= old> 128 ? 255 : 0;\ -\ - error_new+= FFABS(last_new[y][i] - new);\ - error_in3+= FFABS(last_in3[y][i] - in3);\ - f= error_new - error_in3*4;\ - if (f<0) f=0;\ - if (f>256) f=256;\ -\ - topLeft= top[i];\ - left= top[i]= old - new;\ - last_new[y][i]= new;\ - last_in3[y][i]= in3;\ -\ - acc+= acc + (new&1);\ - if ((i&7)==6){\ - ((uint8_t*)dest)[0]= acc;\ - ((uint8_t*)dest)++;\ - }\ - }\ -}\ -*/\ + func_monoblack\ }\ break;\ case PIX_FMT_YUYV422:\ @@ -663,6 +884,22 @@ static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFil ((uint8_t*)dest)[2*i2+3]= Y2;\ } \ break;\ + case PIX_FMT_GRAY16BE:\ + func_g16\ + ((uint8_t*)dest)[2*i2+0]= Y1>>8;\ + ((uint8_t*)dest)[2*i2+1]= Y1;\ + ((uint8_t*)dest)[2*i2+2]= Y2>>8;\ + ((uint8_t*)dest)[2*i2+3]= Y2;\ + } \ + break;\ + case PIX_FMT_GRAY16LE:\ + func_g16\ + ((uint8_t*)dest)[2*i2+0]= Y1;\ + ((uint8_t*)dest)[2*i2+1]= Y1>>8;\ + ((uint8_t*)dest)[2*i2+2]= Y2;\ + ((uint8_t*)dest)[2*i2+3]= Y2>>8;\ + } \ + break;\ }\ @@ -671,208 +908,109 @@ static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **l uint8_t *dest, int dstW, int y) { int i; - switch(c->dstFormat) - { - case PIX_FMT_BGR32: - case PIX_FMT_RGB32: - YSCALE_YUV_2_RGBX_C(uint32_t) - ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1]; - ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2]; - } - break; - case PIX_FMT_RGB24: - YSCALE_YUV_2_RGBX_C(uint8_t) - ((uint8_t*)dest)[0]= r[Y1]; - ((uint8_t*)dest)[1]= g[Y1]; - ((uint8_t*)dest)[2]= b[Y1]; - ((uint8_t*)dest)[3]= r[Y2]; - ((uint8_t*)dest)[4]= g[Y2]; - ((uint8_t*)dest)[5]= b[Y2]; - dest+=6; - } - break; - case PIX_FMT_BGR24: - YSCALE_YUV_2_RGBX_C(uint8_t) - ((uint8_t*)dest)[0]= b[Y1]; - ((uint8_t*)dest)[1]= g[Y1]; - ((uint8_t*)dest)[2]= r[Y1]; - ((uint8_t*)dest)[3]= b[Y2]; - ((uint8_t*)dest)[4]= g[Y2]; - ((uint8_t*)dest)[5]= r[Y2]; - dest+=6; - } - break; - case PIX_FMT_RGB565: - case PIX_FMT_BGR565: - { - const int dr1= dither_2x2_8[y&1 ][0]; - const int dg1= dither_2x2_4[y&1 ][0]; - const int db1= dither_2x2_8[(y&1)^1][0]; - const int dr2= dither_2x2_8[y&1 ][1]; - const int dg2= dither_2x2_4[y&1 ][1]; - const int db2= dither_2x2_8[(y&1)^1][1]; - YSCALE_YUV_2_RGBX_C(uint16_t) - ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1]; - ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2]; - } - } - break; - case PIX_FMT_RGB555: - case PIX_FMT_BGR555: - { - const int dr1= dither_2x2_8[y&1 ][0]; - const int dg1= dither_2x2_8[y&1 ][1]; - const int db1= dither_2x2_8[(y&1)^1][0]; - const int dr2= dither_2x2_8[y&1 ][1]; - const int dg2= dither_2x2_8[y&1 ][0]; - const int db2= dither_2x2_8[(y&1)^1][1]; - YSCALE_YUV_2_RGBX_C(uint16_t) - ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1]; - ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2]; - } - } - break; - case PIX_FMT_RGB8: - case PIX_FMT_BGR8: - { - const uint8_t * const d64= dither_8x8_73[y&7]; - const uint8_t * const d32= dither_8x8_32[y&7]; - YSCALE_YUV_2_RGBX_C(uint8_t) - ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]]; - ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]]; - } - } - break; - case PIX_FMT_RGB4: - case PIX_FMT_BGR4: - { - const uint8_t * const d64= dither_8x8_73 [y&7]; - const uint8_t * const d128=dither_8x8_220[y&7]; - YSCALE_YUV_2_RGBX_C(uint8_t) - ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]] - +((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4); - } - } - break; - case PIX_FMT_RGB4_BYTE: - case PIX_FMT_BGR4_BYTE: - { - const uint8_t * const d64= dither_8x8_73 [y&7]; - const uint8_t * const d128=dither_8x8_220[y&7]; - YSCALE_YUV_2_RGBX_C(uint8_t) - ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]; - ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]]; - } - } - break; - case PIX_FMT_MONOBLACK: - { - const uint8_t * const d128=dither_8x8_220[y&7]; - uint8_t *g= c->table_gU[128] + c->table_gV[128]; - int acc=0; - for (i=0; i>=19; - Y2>>=19; - if ((Y1|Y2)&256) - { - if (Y1>255) Y1=255; - else if (Y1<0)Y1=0; - if (Y2>255) Y2=255; - else if (Y2<0)Y2=0; - } - acc+= acc + g[Y1+d128[(i+0)&7]]; - acc+= acc + g[Y2+d128[(i+1)&7]]; - if ((i&7)==6){ - ((uint8_t*)dest)[0]= acc; - dest++; - } - } +static inline void yuv2rgbXinC_full(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, + int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, + uint8_t *dest, int dstW, int y) +{ + int i; + int step= fmt_depth(c->dstFormat)/8; + int aidx= 3; + + switch(c->dstFormat){ + case PIX_FMT_ARGB: + dest++; + aidx= -1; + case PIX_FMT_RGB24: + aidx--; + case PIX_FMT_RGBA: + YSCALE_YUV_2_RGBX_FULL_C(1<<21) + dest[aidx]= 255; + dest[0]= R>>22; + dest[1]= G>>22; + dest[2]= B>>22; + dest+= step; } break; - case PIX_FMT_YUYV422: - YSCALE_YUV_2_PACKEDX_C(void) - ((uint8_t*)dest)[2*i2+0]= Y1; - ((uint8_t*)dest)[2*i2+1]= U; - ((uint8_t*)dest)[2*i2+2]= Y2; - ((uint8_t*)dest)[2*i2+3]= V; - } - break; - case PIX_FMT_UYVY422: - YSCALE_YUV_2_PACKEDX_C(void) - ((uint8_t*)dest)[2*i2+0]= U; - ((uint8_t*)dest)[2*i2+1]= Y1; - ((uint8_t*)dest)[2*i2+2]= V; - ((uint8_t*)dest)[2*i2+3]= Y2; + case PIX_FMT_ABGR: + dest++; + aidx= -1; + case PIX_FMT_BGR24: + aidx--; + case PIX_FMT_BGRA: + YSCALE_YUV_2_RGBX_FULL_C(1<<21) + dest[aidx]= 255; + dest[0]= B>>22; + dest[1]= G>>22; + dest[2]= R>>22; + dest+= step; } break; + default: + assert(0); } } - -//Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one +//Note: we have C, X86, MMX, MMX2, 3DNOW versions, there is no 3DNOW+MMX2 one //Plain C versions -#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT) || !defined(CONFIG_GPL) +#if !HAVE_MMX || defined (RUNTIME_CPUDETECT) || !CONFIG_GPL #define COMPILE_C #endif -#ifdef ARCH_POWERPC -#if (defined (HAVE_ALTIVEC) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL) +#if ARCH_PPC +#if (HAVE_ALTIVEC || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL +#undef COMPILE_C #define COMPILE_ALTIVEC -#endif //HAVE_ALTIVEC -#endif //ARCH_POWERPC +#endif +#endif //ARCH_PPC -#if defined(ARCH_X86) +#if ARCH_X86 -#if ((defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL) +#if ((HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL #define COMPILE_MMX #endif -#if (defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL) +#if (HAVE_MMX2 || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL #define COMPILE_MMX2 #endif -#if ((defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL) +#if ((HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL #define COMPILE_3DNOW #endif -#endif //ARCH_X86 || ARCH_X86_64 +#endif //ARCH_X86 #undef HAVE_MMX #undef HAVE_MMX2 -#undef HAVE_3DNOW +#undef HAVE_AMD3DNOW +#undef HAVE_ALTIVEC +#define HAVE_MMX 0 +#define HAVE_MMX2 0 +#define HAVE_AMD3DNOW 0 +#define HAVE_ALTIVEC 0 #ifdef COMPILE_C -#undef HAVE_MMX -#undef HAVE_MMX2 -#undef HAVE_3DNOW -#undef HAVE_ALTIVEC #define RENAME(a) a ## _C #include "swscale_template.c" #endif #ifdef COMPILE_ALTIVEC #undef RENAME -#define HAVE_ALTIVEC +#undef HAVE_ALTIVEC +#define HAVE_ALTIVEC 1 #define RENAME(a) a ## _altivec #include "swscale_template.c" #endif -#if defined(ARCH_X86) +#if ARCH_X86 -//X86 versions +//x86 versions /* #undef RENAME #undef HAVE_MMX #undef HAVE_MMX2 -#undef HAVE_3DNOW +#undef HAVE_AMD3DNOW #define ARCH_X86 #define RENAME(a) a ## _X86 #include "swscale_template.c" @@ -880,9 +1018,12 @@ static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **l //MMX versions #ifdef COMPILE_MMX #undef RENAME -#define HAVE_MMX +#undef HAVE_MMX #undef HAVE_MMX2 -#undef HAVE_3DNOW +#undef HAVE_AMD3DNOW +#define HAVE_MMX 1 +#define HAVE_MMX2 0 +#define HAVE_AMD3DNOW 0 #define RENAME(a) a ## _MMX #include "swscale_template.c" #endif @@ -890,9 +1031,12 @@ static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **l //MMX2 versions #ifdef COMPILE_MMX2 #undef RENAME -#define HAVE_MMX -#define HAVE_MMX2 -#undef HAVE_3DNOW +#undef HAVE_MMX +#undef HAVE_MMX2 +#undef HAVE_AMD3DNOW +#define HAVE_MMX 1 +#define HAVE_MMX2 1 +#define HAVE_AMD3DNOW 0 #define RENAME(a) a ## _MMX2 #include "swscale_template.c" #endif @@ -900,16 +1044,19 @@ static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **l //3DNOW versions #ifdef COMPILE_3DNOW #undef RENAME -#define HAVE_MMX +#undef HAVE_MMX #undef HAVE_MMX2 -#define HAVE_3DNOW +#undef HAVE_AMD3DNOW +#define HAVE_MMX 1 +#define HAVE_MMX2 0 +#define HAVE_AMD3DNOW 1 #define RENAME(a) a ## _3DNow #include "swscale_template.c" #endif -#endif //ARCH_X86 || ARCH_X86_64 +#endif //ARCH_X86 -// minor note: the HAVE_xyz is messed up after that line so don't use it +// minor note: the HAVE_xyz are messed up after this line so don't use them static double getSplineCoeff(double a, double b, double c, double d, double dist) { @@ -930,27 +1077,27 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF int filterSize; int filter2Size; int minFilterSize; - double *filter=NULL; - double *filter2=NULL; + int64_t *filter=NULL; + int64_t *filter2=NULL; + const int64_t fone= 1LL<<54; int ret= -1; -#if defined(ARCH_X86) +#if ARCH_X86 if (flags & SWS_CPU_CAPS_MMX) - asm volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions) + __asm__ volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions) #endif - // Note the +1 is for the MMXscaler which reads over the end + // NOTE: the +1 is for the MMX scaler which reads over the end *filterPos = av_malloc((dstW+1)*sizeof(int16_t)); if (FFABS(xInc - 0x10000) <10) // unscaled { int i; filterSize= 1; - filter= av_malloc(dstW*sizeof(double)*filterSize); - for (i=0; i>16; (*filterPos)[i]= xx; - filter[i]= 1.0; + filter[i]= fone; xDstInSrc+= xInc; } } @@ -979,7 +1126,7 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF if (flags&SWS_BICUBIC) filterSize= 4; else if (flags&SWS_X ) filterSize= 4; else filterSize= 2; // SWS_BILINEAR / SWS_AREA - filter= av_malloc(dstW*sizeof(double)*filterSize); + filter= av_malloc(dstW*sizeof(*filter)*filterSize); xDstInSrc= xInc/2 - 0x8000; for (i=0; i>16); if (coeff<0) coeff=0; filter[i*filterSize + j]= coeff; xx++; @@ -1002,52 +1148,59 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF } else { - double xDstInSrc; - double sizeFactor, filterSizeInSrc; - const double xInc1= (double)xInc / (double)(1<<16); + int xDstInSrc; + int sizeFactor; - if (flags&SWS_BICUBIC) sizeFactor= 4.0; - else if (flags&SWS_X) sizeFactor= 8.0; - else if (flags&SWS_AREA) sizeFactor= 1.0; //downscale only, for upscale it is bilinear - else if (flags&SWS_GAUSS) sizeFactor= 8.0; // infinite ;) - else if (flags&SWS_LANCZOS) sizeFactor= param[0] != SWS_PARAM_DEFAULT ? 2.0*param[0] : 6.0; - else if (flags&SWS_SINC) sizeFactor= 20.0; // infinite ;) - else if (flags&SWS_SPLINE) sizeFactor= 20.0; // infinite ;) - else if (flags&SWS_BILINEAR) sizeFactor= 2.0; + if (flags&SWS_BICUBIC) sizeFactor= 4; + else if (flags&SWS_X) sizeFactor= 8; + else if (flags&SWS_AREA) sizeFactor= 1; //downscale only, for upscale it is bilinear + else if (flags&SWS_GAUSS) sizeFactor= 8; // infinite ;) + else if (flags&SWS_LANCZOS) sizeFactor= param[0] != SWS_PARAM_DEFAULT ? ceil(2*param[0]) : 6; + else if (flags&SWS_SINC) sizeFactor= 20; // infinite ;) + else if (flags&SWS_SPLINE) sizeFactor= 20; // infinite ;) + else if (flags&SWS_BILINEAR) sizeFactor= 2; else { - sizeFactor= 0.0; //GCC warning killer + sizeFactor= 0; //GCC warning killer assert(0); } - if (xInc1 <= 1.0) filterSizeInSrc= sizeFactor; // upscale - else filterSizeInSrc= sizeFactor*srcW / (double)dstW; + if (xInc <= 1<<16) filterSize= 1 + sizeFactor; // upscale + else filterSize= 1 + (sizeFactor*srcW + dstW - 1)/ dstW; - filterSize= (int)ceil(1 + filterSizeInSrc); // will be reduced later if possible if (filterSize > srcW-2) filterSize=srcW-2; - filter= av_malloc(dstW*sizeof(double)*filterSize); + filter= av_malloc(dstW*sizeof(*filter)*filterSize); - xDstInSrc= xInc1 / 2.0 - 0.5; + xDstInSrc= xInc - 0x10000; for (i=0; i 1<<16) + d= d*dstW/srcW; + floatd= d * (1.0/(1<<30)); + if (flags & SWS_BICUBIC) { - double B= param[0] != SWS_PARAM_DEFAULT ? param[0] : 0.0; - double C= param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6; + int64_t B= (param[0] != SWS_PARAM_DEFAULT ? param[0] : 0) * (1<<24); + int64_t C= (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1<<24); + int64_t dd = ( d*d)>>30; + int64_t ddd= (dd*d)>>30; - if (d<1.0) - coeff = (12-9*B-6*C)*d*d*d + (-18+12*B+6*C)*d*d + 6-2*B; - else if (d<2.0) - coeff = (-B-6*C)*d*d*d + (6*B+30*C)*d*d + (-12*B-48*C)*d +8*B+24*C; + if (d < 1LL<<30) + coeff = (12*(1<<24)-9*B-6*C)*ddd + (-18*(1<<24)+12*B+6*C)*dd + (6*(1<<24)-2*B)*(1<<30); + else if (d < 1LL<<31) + coeff = (-B-6*C)*ddd + (6*B+30*C)*dd + (-12*B-48*C)*d + (8*B+24*C)*(1<<30); else coeff=0.0; + coeff *= fone>>(30+24); } /* else if (flags & SWS_X) { @@ -1058,46 +1211,49 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF else if (flags & SWS_X) { double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0; + double c; - if (d<1.0) - coeff = cos(d*PI); + if (floatd<1.0) + c = cos(floatd*PI); else - coeff=-1.0; - if (coeff<0.0) coeff= -pow(-coeff, A); - else coeff= pow( coeff, A); - coeff= coeff*0.5 + 0.5; + c=-1.0; + if (c<0.0) c= -pow(-c, A); + else c= pow( c, A); + coeff= (c*0.5 + 0.5)*fone; } else if (flags & SWS_AREA) { - double srcPixelSize= 1.0/xInc1; - if (d + srcPixelSize/2 < 0.5) coeff= 1.0; - else if (d - srcPixelSize/2 < 0.5) coeff= (0.5-d)/srcPixelSize + 0.5; + int64_t d2= d - (1<<29); + if (d2*xInc < -(1LL<<(29+16))) coeff= 1.0 * (1LL<<(30+16)); + else if (d2*xInc < (1LL<<(29+16))) coeff= -d2*xInc + (1LL<<(29+16)); else coeff=0.0; + coeff *= fone>>(30+16); } else if (flags & SWS_GAUSS) { double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0; - coeff = pow(2.0, - p*d*d); + coeff = (pow(2.0, - p*floatd*floatd))*fone; } else if (flags & SWS_SINC) { - coeff = d ? sin(d*PI)/(d*PI) : 1.0; + coeff = (d ? sin(floatd*PI)/(floatd*PI) : 1.0)*fone; } else if (flags & SWS_LANCZOS) { double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0; - coeff = d ? sin(d*PI)*sin(d*PI/p)/(d*d*PI*PI/p) : 1.0; - if (d>p) coeff=0; + coeff = (d ? sin(floatd*PI)*sin(floatd*PI/p)/(floatd*floatd*PI*PI/p) : 1.0)*fone; + if (floatd>p) coeff=0; } else if (flags & SWS_BILINEAR) { - coeff= 1.0 - d; + coeff= (1<<30) - d; if (coeff<0) coeff=0; + coeff *= fone >> 30; } else if (flags & SWS_SPLINE) { double p=-2.196152422706632; - coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, d); + coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, floatd) * fone; } else { coeff= 0.0; //GCC warning killer @@ -1107,7 +1263,7 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF filter[i*filterSize + j]= coeff; xx++; } - xDstInSrc+= xInc1; + xDstInSrc+= 2*xInc; } } @@ -1119,31 +1275,24 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF if (srcFilter) filter2Size+= srcFilter->length - 1; if (dstFilter) filter2Size+= dstFilter->length - 1; assert(filter2Size>0); - filter2= av_malloc(filter2Size*dstW*sizeof(double)); + filter2= av_mallocz(filter2Size*dstW*sizeof(*filter2)); for (i=0; ilength == filter2Size); + if(srcFilter){ + for (k=0; klength; k++){ + for (j=0; jcoeff[k]*filter[i*filterSize + j]; + } + }else{ + for (j=0; jlength; j++) - { - filter2[i*filter2Size + j]= outVec->coeff[j]; - } - (*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2; - - if (outVec != &scaleFilter) sws_freeVec(outVec); } av_freep(&filter); @@ -1154,7 +1303,7 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF { int min= filter2Size; int j; - double cutOff=0.0; + int64_t cutOff=0.0; /* get rid off near zero elements on the left by shifting left */ for (j=0; j SWS_MAX_REDUCE_CUTOFF) break; + if (cutOff > SWS_MAX_REDUCE_CUTOFF*fone) break; /* preserve monotonicity because the core can't handle the filter otherwise */ if (i= (*filterPos)[i+1]) break; - // Move filter coeffs left + // move filter coefficients left for (k=1; k0; j--) { cutOff += FFABS(filter2[i*filter2Size + j]); - if (cutOff > SWS_MAX_REDUCE_CUTOFF) break; + if (cutOff > SWS_MAX_REDUCE_CUTOFF*fone) break; min--; } @@ -1193,10 +1342,10 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF if (minFilterSize < 5) filterAlign = 4; - // we really don't want to waste our time - // doing useless computation, so fall-back on - // the scalar C code for very small filter. - // vectorizing is worth it only if you have + // We really don't want to waste our time + // doing useless computation, so fall back on + // the scalar C code for very small filters. + // Vectorizing is worth it only if you have a // decent-sized vector. if (minFilterSize < 3) filterAlign = 1; @@ -1211,8 +1360,8 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF assert(minFilterSize > 0); filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1)); assert(filterSize > 0); - filter= av_malloc(filterSize*dstW*sizeof(double)); - if (filterSize >= MAX_FILTER_SIZE || !filter) + filter= av_malloc(filterSize*dstW*sizeof(*filter)); + if (filterSize >= MAX_FILTER_SIZE*16/((flags&SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter) goto error; *outFilterSize= filterSize; @@ -1225,13 +1374,15 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF for (j=0; j=filter2Size) filter[i*filterSize + j]= 0.0; + if (j>=filter2Size) filter[i*filterSize + j]= 0; else filter[i*filterSize + j]= filter2[i*filter2Size + j]; + if((flags & SWS_BITEXACT) && j>=minFilterSize) + filter[i*filterSize + j]= 0; } } - //FIXME try to align filterpos if possible + //FIXME try to align filterPos if possible //fix borders for (i=0; i srcW) { int shift= (*filterPos)[i] + filterSize - srcW; - // Move filter coeffs right to compensate for filterPos + // move filter coefficients right to compensate for filterPos for (j=filterSize-2; j>=0; j--) { int right= FFMIN(j + shift, filterSize-1); @@ -1263,29 +1414,28 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF } } - // Note the +1 is for the MMXscaler which reads over the end + // Note the +1 is for the MMX scaler which reads over the end /* align at 16 for AltiVec (needed by hScale_altivec_real) */ *outFilter= av_mallocz(*outFilterSize*(dstW+1)*sizeof(int16_t)); - /* Normalize & Store in outFilter */ + /* normalize & store in outFilter */ for (i=0; i {RGB,BGR}{15,16,24,32} */ +static int YUV422PToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dstParam[], int dstStride[]){ + uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY; + + yuv422ptoyuy2(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]); + + return srcSliceH; +} + +static int YUV422PToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dstParam[], int dstStride[]){ + uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY; + + yuv422ptouyvy(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]); + + return srcSliceH; +} + +static int pal2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dst[], int dstStride[]){ + const enum PixelFormat srcFormat= c->srcFormat; + const enum PixelFormat dstFormat= c->dstFormat; + void (*conv)(const uint8_t *src, uint8_t *dst, long num_pixels, + const uint8_t *palette)=NULL; + int i; + uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; + uint8_t *srcPtr= src[0]; + + if (!usePal(srcFormat)) + av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n", + sws_format_name(srcFormat), sws_format_name(dstFormat)); + + switch(dstFormat){ + case PIX_FMT_RGB32 : conv = palette8topacked32; break; + case PIX_FMT_BGR32 : conv = palette8topacked32; break; + case PIX_FMT_BGR32_1: conv = palette8topacked32; break; + case PIX_FMT_RGB32_1: conv = palette8topacked32; break; + case PIX_FMT_RGB24 : conv = palette8topacked24; break; + case PIX_FMT_BGR24 : conv = palette8topacked24; break; + default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n", + sws_format_name(srcFormat), sws_format_name(dstFormat)); break; + } + + + for (i=0; isrcW, (uint8_t *) c->pal_rgb); + srcPtr+= srcStride[0]; + dstPtr+= dstStride[0]; + } + + return srcSliceH; +} + +/* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */ static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]){ - const int srcFormat= c->srcFormat; - const int dstFormat= c->dstFormat; + const enum PixelFormat srcFormat= c->srcFormat; + const enum PixelFormat dstFormat= c->dstFormat; const int srcBpp= (fmt_depth(srcFormat) + 7) >> 3; const int dstBpp= (fmt_depth(dstFormat) + 7) >> 3; const int srcId= fmt_depth(srcFormat) >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */ @@ -1627,12 +1830,15 @@ static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int sr if(conv) { + uint8_t *srcPtr= src[0]; + if(srcFormat == PIX_FMT_RGB32_1 || srcFormat == PIX_FMT_BGR32_1) + srcPtr += ALT32_CORR; + if (dstStride[0]*srcBpp == srcStride[0]*dstBpp && srcStride[0] > 0) - conv(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]); + conv(srcPtr, dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]); else { int i; - uint8_t *srcPtr= src[0]; uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; for (i=0; idstFormat) || isGray(c->dstFormat)) return -1; memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4); memcpy(c->dstColorspaceTable, table, sizeof(int)*4); @@ -1891,6 +2096,7 @@ int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange c->saturation= saturation; c->srcRange = srcRange; c->dstRange = dstRange; + if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return 0; c->uOffset= 0x0400040004000400LL; c->vOffset= 0x0400040004000400LL; @@ -1920,12 +2126,19 @@ int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange c->ugCoeff= roundToInt16(cgu*8192) * 0x0001000100010001ULL; c->yOffset= roundToInt16(oy * 8) * 0x0001000100010001ULL; - yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation); + c->yuv2rgb_y_coeff = (int16_t)roundToInt16(cy <<13); + c->yuv2rgb_y_offset = (int16_t)roundToInt16(oy << 9); + c->yuv2rgb_v2r_coeff= (int16_t)roundToInt16(crv<<13); + c->yuv2rgb_v2g_coeff= (int16_t)roundToInt16(cgv<<13); + c->yuv2rgb_u2g_coeff= (int16_t)roundToInt16(cgu<<13); + c->yuv2rgb_u2b_coeff= (int16_t)roundToInt16(cbu<<13); + + sws_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation); //FIXME factorize #ifdef COMPILE_ALTIVEC if (c->flags & SWS_CPU_CAPS_ALTIVEC) - yuv2rgb_altivec_init_tables (c, inv_table, brightness, contrast, saturation); + sws_yuv2rgb_altivec_init_tables (c, inv_table, brightness, contrast, saturation); #endif return 0; } @@ -1947,7 +2160,7 @@ int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int return 0; } -static int handle_jpeg(int *format) +static int handle_jpeg(enum PixelFormat *format) { switch (*format) { case PIX_FMT_YUVJ420P: @@ -1967,7 +2180,7 @@ static int handle_jpeg(int *format) } } -SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat, int flags, +SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int dstW, int dstH, enum PixelFormat dstFormat, int flags, SwsFilter *srcFilter, SwsFilter *dstFilter, double *param){ SwsContext *c; @@ -1976,22 +2189,22 @@ SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH int unscaled, needsDither; int srcRange, dstRange; SwsFilter dummyFilter= {NULL, NULL, NULL, NULL}; -#if defined(ARCH_X86) +#if ARCH_X86 if (flags & SWS_CPU_CAPS_MMX) - asm volatile("emms\n\t"::: "memory"); + __asm__ volatile("emms\n\t"::: "memory"); #endif -#if !defined(RUNTIME_CPUDETECT) || !defined (CONFIG_GPL) //ensure that the flags match the compiled variant if cpudetect is off +#if !defined(RUNTIME_CPUDETECT) || !CONFIG_GPL //ensure that the flags match the compiled variant if cpudetect is off flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC|SWS_CPU_CAPS_BFIN); -#ifdef HAVE_MMX2 +#if HAVE_MMX2 flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2; -#elif defined (HAVE_3DNOW) +#elif HAVE_AMD3DNOW flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW; -#elif defined (HAVE_MMX) +#elif HAVE_MMX flags |= SWS_CPU_CAPS_MMX; -#elif defined (HAVE_ALTIVEC) +#elif HAVE_ALTIVEC flags |= SWS_CPU_CAPS_ALTIVEC; -#elif defined (ARCH_BFIN) +#elif ARCH_BFIN flags |= SWS_CPU_CAPS_BFIN; #endif #endif /* RUNTIME_CPUDETECT */ @@ -2030,11 +2243,10 @@ SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH |SWS_BICUBLIN); if(!i || (i & (i-1))) { - av_log(NULL, AV_LOG_ERROR, "swScaler: Exactly one scaler algorithm must be choosen\n"); + av_log(NULL, AV_LOG_ERROR, "swScaler: Exactly one scaler algorithm must be chosen\n"); return NULL; } - /* sanity check */ if (srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code { @@ -2043,7 +2255,7 @@ SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH return NULL; } if(srcW > VOFW || dstW > VOFW){ - av_log(NULL, AV_LOG_ERROR, "swScaler: Compile time max width is "AV_STRINGIFY(VOFW)" change VOF/VOFW and recompile\n"); + av_log(NULL, AV_LOG_ERROR, "swScaler: Compile-time maximum width is "AV_STRINGIFY(VOFW)" change VOF/VOFW and recompile\n"); return NULL; } @@ -2077,18 +2289,19 @@ SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat); getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat); - // reuse chroma for 2 pixles rgb/bgr unless user wants full chroma interpolation + // reuse chroma for 2 pixels RGB/BGR unless user wants full chroma interpolation if ((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1; // drop some chroma lines if the user wants it c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT; c->chrSrcVSubSample+= c->vChrDrop; - // drop every 2. pixel for chroma calculation unless user wants full chroma + // drop every other pixel for chroma calculation unless user wants full chroma if ((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP) && srcFormat!=PIX_FMT_RGB8 && srcFormat!=PIX_FMT_BGR8 && srcFormat!=PIX_FMT_RGB4 && srcFormat!=PIX_FMT_BGR4 - && srcFormat!=PIX_FMT_RGB4_BYTE && srcFormat!=PIX_FMT_BGR4_BYTE) + && srcFormat!=PIX_FMT_RGB4_BYTE && srcFormat!=PIX_FMT_BGR4_BYTE + && ((dstW>>c->chrDstHSubSample) <= (srcW>>1) || (flags&(SWS_FAST_BILINEAR|SWS_POINT)))) c->chrSrcHSubSample=1; if (param){ @@ -2108,34 +2321,33 @@ SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH c->chrDstW= -((-dstW) >> c->chrDstHSubSample); c->chrDstH= -((-dstH) >> c->chrDstVSubSample); - sws_setColorspaceDetails(c, Inverse_Table_6_9[SWS_CS_DEFAULT], srcRange, Inverse_Table_6_9[SWS_CS_DEFAULT] /* FIXME*/, dstRange, 0, 1<<16, 1<<16); + sws_setColorspaceDetails(c, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], srcRange, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT] /* FIXME*/, dstRange, 0, 1<<16, 1<<16); - /* unscaled special Cases */ - if (unscaled && !usesHFilter && !usesVFilter) + /* unscaled special cases */ + if (unscaled && !usesHFilter && !usesVFilter && (srcRange == dstRange || isBGR(dstFormat) || isRGB(dstFormat))) { /* yv12_to_nv12 */ - if (srcFormat == PIX_FMT_YUV420P && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)) + if ((srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P) && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)) { c->swScale= PlanarToNV12Wrapper; } -#ifdef CONFIG_GPL /* yuv2bgr */ - if ((srcFormat==PIX_FMT_YUV420P || srcFormat==PIX_FMT_YUV422P) && (isBGR(dstFormat) || isRGB(dstFormat))) + if ((srcFormat==PIX_FMT_YUV420P || srcFormat==PIX_FMT_YUV422P || srcFormat==PIX_FMT_YUVA420P) && (isBGR(dstFormat) || isRGB(dstFormat)) + && !(flags & SWS_ACCURATE_RND) && !(dstH&1)) { - c->swScale= yuv2rgb_get_func_ptr(c); + c->swScale= sws_yuv2rgb_get_func_ptr(c); } -#endif - if (srcFormat==PIX_FMT_YUV410P && dstFormat==PIX_FMT_YUV420P) + if (srcFormat==PIX_FMT_YUV410P && dstFormat==PIX_FMT_YUV420P && !(flags & SWS_BITEXACT)) { c->swScale= yvu9toyv12Wrapper; } /* bgr24toYV12 */ - if (srcFormat==PIX_FMT_BGR24 && dstFormat==PIX_FMT_YUV420P) + if (srcFormat==PIX_FMT_BGR24 && dstFormat==PIX_FMT_YUV420P && !(flags & SWS_ACCURATE_RND)) c->swScale= bgr24toyv12Wrapper; - /* rgb/bgr -> rgb/bgr (no dither needed forms) */ + /* RGB/BGR -> RGB/BGR (no dither needed forms) */ if ( (isBGR(srcFormat) || isRGB(srcFormat)) && (isBGR(dstFormat) || isRGB(dstFormat)) && srcFormat != PIX_FMT_BGR8 && dstFormat != PIX_FMT_BGR8 @@ -2145,42 +2357,56 @@ SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH && srcFormat != PIX_FMT_BGR4_BYTE && dstFormat != PIX_FMT_BGR4_BYTE && srcFormat != PIX_FMT_RGB4_BYTE && dstFormat != PIX_FMT_RGB4_BYTE && srcFormat != PIX_FMT_MONOBLACK && dstFormat != PIX_FMT_MONOBLACK - && !needsDither) + && srcFormat != PIX_FMT_MONOWHITE && dstFormat != PIX_FMT_MONOWHITE + && dstFormat != PIX_FMT_RGB32_1 + && dstFormat != PIX_FMT_BGR32_1 + && (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)))) c->swScale= rgb2rgbWrapper; + if ((usePal(srcFormat) && ( + dstFormat == PIX_FMT_RGB32 || + dstFormat == PIX_FMT_RGB32_1 || + dstFormat == PIX_FMT_RGB24 || + dstFormat == PIX_FMT_BGR32 || + dstFormat == PIX_FMT_BGR32_1 || + dstFormat == PIX_FMT_BGR24))) + c->swScale= pal2rgbWrapper; + + if (srcFormat == PIX_FMT_YUV422P) + { + if (dstFormat == PIX_FMT_YUYV422) + c->swScale= YUV422PToYuy2Wrapper; + else if (dstFormat == PIX_FMT_UYVY422) + c->swScale= YUV422PToUyvyWrapper; + } + /* LQ converters if -sws 0 or -sws 4*/ if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)){ - /* rgb/bgr -> rgb/bgr (dither needed forms) */ - if ( (isBGR(srcFormat) || isRGB(srcFormat)) - && (isBGR(dstFormat) || isRGB(dstFormat)) - && needsDither) - c->swScale= rgb2rgbWrapper; - /* yv12_to_yuy2 */ - if (srcFormat == PIX_FMT_YUV420P && - (dstFormat == PIX_FMT_YUYV422 || dstFormat == PIX_FMT_UYVY422)) + if (srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P) { if (dstFormat == PIX_FMT_YUYV422) c->swScale= PlanarToYuy2Wrapper; - else + else if (dstFormat == PIX_FMT_UYVY422) c->swScale= PlanarToUyvyWrapper; } } #ifdef COMPILE_ALTIVEC if ((c->flags & SWS_CPU_CAPS_ALTIVEC) && - ((srcFormat == PIX_FMT_YUV420P && - (dstFormat == PIX_FMT_YUYV422 || dstFormat == PIX_FMT_UYVY422)))) { + !(c->flags & SWS_BITEXACT) && + srcFormat == PIX_FMT_YUV420P) { // unscaled YV12 -> packed YUV, we want speed if (dstFormat == PIX_FMT_YUYV422) c->swScale= yv12toyuy2_unscaled_altivec; - else + else if (dstFormat == PIX_FMT_UYVY422) c->swScale= yv12touyvy_unscaled_altivec; } #endif /* simple copy */ if ( srcFormat == dstFormat + || (srcFormat == PIX_FMT_YUVA420P && dstFormat == PIX_FMT_YUV420P) || (isPlanarYUV(srcFormat) && isGray(dstFormat)) || (isPlanarYUV(dstFormat) && isGray(srcFormat))) { @@ -2204,7 +2430,7 @@ SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH c->swScale= gray16swap; } -#ifdef ARCH_BFIN +#if ARCH_BFIN if (flags & SWS_CPU_CAPS_BFIN) ff_bfin_get_unscaled_swscale (c); #endif @@ -2223,7 +2449,7 @@ SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) { if (flags&SWS_PRINT_INFO) - av_log(c, AV_LOG_INFO, "output Width is not a multiple of 32 -> no MMX2 scaler\n"); + av_log(c, AV_LOG_INFO, "output width is not a multiple of 32 -> no MMX2 scaler\n"); } if (usesHFilter) c->canMMX2BeUsed=0; } @@ -2246,7 +2472,7 @@ SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH c->lumXInc+= 20; c->chrXInc+= 20; } - //we don't use the x86asm scaler if mmx is available + //we don't use the x86 asm scaler if MMX is available else if (flags & SWS_CPU_CAPS_MMX) { c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20; @@ -2292,7 +2518,7 @@ SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4); } #endif /* defined(COMPILE_MMX2) */ - } // Init Horizontal stuff + } // initialize horizontal stuff @@ -2304,15 +2530,15 @@ SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH 1; initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc, - srcH , dstH, filterAlign, (1<<12)-4, + srcH , dstH, filterAlign, (1<<12), (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags, srcFilter->lumV, dstFilter->lumV, c->param); initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc, - c->chrSrcH, c->chrDstH, filterAlign, (1<<12)-4, + c->chrSrcH, c->chrDstH, filterAlign, (1<<12), (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, srcFilter->chrV, dstFilter->chrV, c->param); -#ifdef HAVE_ALTIVEC +#if HAVE_ALTIVEC c->vYCoeffsBank = av_malloc(sizeof (vector signed short)*c->vLumFilterSize*c->dstH); c->vCCoeffsBank = av_malloc(sizeof (vector signed short)*c->vChrFilterSize*c->chrDstH); @@ -2332,7 +2558,7 @@ SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH #endif } - // Calculate Buffer Sizes so that they won't run out while handling these damn slices + // calculate buffer sizes so that they won't run out while handling these damn slices c->vLumBufSize= c->vLumFilterSize; c->vChrBufSize= c->vChrFilterSize; for (i=0; ilumPixBuf= av_malloc(c->vLumBufSize*2*sizeof(int16_t*)); c->chrPixBuf= av_malloc(c->vChrBufSize*2*sizeof(int16_t*)); - //Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000) + //Note we need at least one pixel more at the end because of the MMX code (just in case someone wanna replace the 4000/8000) /* align at 16 bytes for AltiVec */ for (i=0; ivLumBufSize; i++) c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= av_mallocz(VOF+1); @@ -2442,8 +2668,8 @@ SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH } else { -#if defined(ARCH_X86) - av_log(c, AV_LOG_VERBOSE, "using X86-Asm scaler for horizontal scaling\n"); +#if ARCH_X86 + av_log(c, AV_LOG_VERBOSE, "using x86 asm scaler for horizontal scaling\n"); #else if (flags & SWS_FAST_BILINEAR) av_log(c, AV_LOG_VERBOSE, "using FAST_BILINEAR C scaler for horizontal scaling\n"); @@ -2470,22 +2696,22 @@ SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH } if (dstFormat==PIX_FMT_BGR24) - av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR24 Converter\n", + av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR24 converter\n", (flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C")); else if (dstFormat==PIX_FMT_RGB32) - av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); + av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); else if (dstFormat==PIX_FMT_BGR565) - av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); + av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); else if (dstFormat==PIX_FMT_BGR555) - av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); + av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH); } if (flags & SWS_PRINT_INFO) { - av_log(c, AV_LOG_DEBUG, "Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", + av_log(c, AV_LOG_DEBUG, "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc); - av_log(c, AV_LOG_DEBUG, "Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", + av_log(c, AV_LOG_DEBUG, "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc); } @@ -2495,13 +2721,13 @@ SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH /** * swscale wrapper, so we don't need to export the SwsContext. - * assumes planar YUV to be in YUV order instead of YVU + * Assumes planar YUV to be in YUV order instead of YVU. */ int sws_scale(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]){ int i; uint8_t* src2[4]= {src[0], src[1], src[2]}; - uint32_t pal[256]; + if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) { av_log(c, AV_LOG_ERROR, "Slices start in the middle!\n"); return 0; @@ -2510,18 +2736,65 @@ int sws_scale(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1; } - if (c->srcFormat == PIX_FMT_PAL8){ + if (usePal(c->srcFormat)){ for (i=0; i<256; i++){ - int p= ((uint32_t*)(src[1]))[i]; - int r= (p>>16)&0xFF; - int g= (p>> 8)&0xFF; - int b= p &0xFF; - int y= av_clip_uint8(((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16 ); - int u= av_clip_uint8(((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128); - int v= av_clip_uint8(((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128); - pal[i]= y + (u<<8) + (v<<16); + int p, r, g, b,y,u,v; + if(c->srcFormat == PIX_FMT_PAL8){ + p=((uint32_t*)(src[1]))[i]; + r= (p>>16)&0xFF; + g= (p>> 8)&0xFF; + b= p &0xFF; + }else if(c->srcFormat == PIX_FMT_RGB8){ + r= (i>>5 )*36; + g= ((i>>2)&7)*36; + b= (i&3 )*85; + }else if(c->srcFormat == PIX_FMT_BGR8){ + b= (i>>6 )*85; + g= ((i>>3)&7)*36; + r= (i&7 )*36; + }else if(c->srcFormat == PIX_FMT_RGB4_BYTE){ + r= (i>>3 )*255; + g= ((i>>1)&3)*85; + b= (i&1 )*255; + }else { + assert(c->srcFormat == PIX_FMT_BGR4_BYTE); + b= (i>>3 )*255; + g= ((i>>1)&3)*85; + r= (i&1 )*255; + } + y= av_clip_uint8((RY*r + GY*g + BY*b + ( 33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); + u= av_clip_uint8((RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); + v= av_clip_uint8((RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); + c->pal_yuv[i]= y + (u<<8) + (v<<16); + + + switch(c->dstFormat) { + case PIX_FMT_BGR32: +#ifndef WORDS_BIGENDIAN + case PIX_FMT_RGB24: +#endif + c->pal_rgb[i]= r + (g<<8) + (b<<16); + break; + case PIX_FMT_BGR32_1: +#ifdef WORDS_BIGENDIAN + case PIX_FMT_BGR24: +#endif + c->pal_rgb[i]= (r + (g<<8) + (b<<16)) << 8; + break; + case PIX_FMT_RGB32_1: +#ifdef WORDS_BIGENDIAN + case PIX_FMT_RGB24: +#endif + c->pal_rgb[i]= (b + (g<<8) + (r<<16)) << 8; + break; + case PIX_FMT_RGB32: +#ifndef WORDS_BIGENDIAN + case PIX_FMT_BGR24: +#endif + default: + c->pal_rgb[i]= b + (g<<8) + (r<<16); + } } - src2[1]= (uint8_t*)pal; } // copy strides, so they can safely be modified @@ -2539,7 +2812,7 @@ int sws_scale(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, int dstStride2[4]= {-dstStride[0], -dstStride[1], -dstStride[2]}; src2[0] += (srcSliceH-1)*srcStride[0]; - if (c->srcFormat != PIX_FMT_PAL8) + if (!usePal(c->srcFormat)) src2[1] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[1]; src2[2] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[2]; @@ -2547,13 +2820,12 @@ int sws_scale(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, } } -/** - * swscale wrapper, so we don't need to export the SwsContext - */ +#if LIBSWSCALE_VERSION_MAJOR < 1 int sws_scale_ordered(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]){ return sws_scale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride); } +#endif SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur, float lumaSharpen, float chromaSharpen, @@ -2607,16 +2879,12 @@ SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur, sws_normalizeVec(filter->lumH, 1.0); sws_normalizeVec(filter->lumV, 1.0); - if (verbose) sws_printVec(filter->chrH); - if (verbose) sws_printVec(filter->lumH); + if (verbose) sws_printVec2(filter->chrH, NULL, AV_LOG_DEBUG); + if (verbose) sws_printVec2(filter->lumH, NULL, AV_LOG_DEBUG); return filter; } -/** - * returns a normalized gaussian curve used to filter stuff - * quality=3 is high quality, lowwer is lowwer quality - */ SwsVector *sws_getGaussianVec(double variance, double quality){ const int length= (int)(variance*quality + 0.5) | 1; int i; @@ -2799,7 +3067,7 @@ SwsVector *sws_cloneVec(SwsVector *a){ return vec; } -void sws_printVec(SwsVector *a){ +void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level){ int i; double max=0; double min=0; @@ -2816,12 +3084,18 @@ void sws_printVec(SwsVector *a){ for (i=0; ilength; i++) { int x= (int)((a->coeff[i]-min)*60.0/range +0.5); - av_log(NULL, AV_LOG_DEBUG, "%1.3f ", a->coeff[i]); - for (;x>0; x--) av_log(NULL, AV_LOG_DEBUG, " "); - av_log(NULL, AV_LOG_DEBUG, "|\n"); + av_log(log_ctx, log_level, "%1.3f ", a->coeff[i]); + for (;x>0; x--) av_log(log_ctx, log_level, " "); + av_log(log_ctx, log_level, "|\n"); } } +#if LIBSWSCALE_VERSION_MAJOR < 1 +void sws_printVec(SwsVector *a){ + sws_printVec2(a, NULL, AV_LOG_DEBUG); +} +#endif + void sws_freeVec(SwsVector *a){ if (!a) return; av_freep(&a->coeff); @@ -2862,7 +3136,7 @@ void sws_freeContext(SwsContext *c){ av_freep(&c->vChrFilter); av_freep(&c->hLumFilter); av_freep(&c->hChrFilter); -#ifdef HAVE_ALTIVEC +#if HAVE_ALTIVEC av_freep(&c->vYCoeffsBank); av_freep(&c->vCCoeffsBank); #endif @@ -2872,7 +3146,7 @@ void sws_freeContext(SwsContext *c){ av_freep(&c->hLumFilterPos); av_freep(&c->hChrFilterPos); -#if defined(ARCH_X86) && defined(CONFIG_GPL) +#if ARCH_X86 && CONFIG_GPL #ifdef MAP_ANONYMOUS if (c->funnyYCode) munmap(c->funnyYCode, MAX_FUNNY_CODE_SIZE); if (c->funnyUVCode) munmap(c->funnyUVCode, MAX_FUNNY_CODE_SIZE); @@ -2882,7 +3156,7 @@ void sws_freeContext(SwsContext *c){ #endif c->funnyYCode=NULL; c->funnyUVCode=NULL; -#endif /* defined(ARCH_X86) */ +#endif /* ARCH_X86 && CONFIG_GPL */ av_freep(&c->lumMmx2Filter); av_freep(&c->chrMmx2Filter); @@ -2893,19 +3167,9 @@ void sws_freeContext(SwsContext *c){ av_free(c); } -/** - * Checks if context is valid or reallocs a new one instead. - * If context is NULL, just calls sws_getContext() to get a new one. - * Otherwise, checks if the parameters are the same already saved in context. - * If that is the case, returns the current context. - * Otherwise, frees context and gets a new one. - * - * Be warned that srcFilter, dstFilter are not checked, they are - * asumed to remain valid. - */ struct SwsContext *sws_getCachedContext(struct SwsContext *context, - int srcW, int srcH, int srcFormat, - int dstW, int dstH, int dstFormat, int flags, + int srcW, int srcH, enum PixelFormat srcFormat, + int dstW, int dstH, enum PixelFormat dstFormat, int flags, SwsFilter *srcFilter, SwsFilter *dstFilter, double *param) { static const double default_param[2] = {SWS_PARAM_DEFAULT, SWS_PARAM_DEFAULT}; diff --git a/src/add-ons/media/plugins/avcodec/libswscale/swscale.h b/src/add-ons/media/plugins/avcodec/libswscale/swscale.h index b58d358cf3..124a623338 100644 --- a/src/add-ons/media/plugins/avcodec/libswscale/swscale.h +++ b/src/add-ons/media/plugins/avcodec/libswscale/swscale.h @@ -18,19 +18,20 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef FFMPEG_SWSCALE_H -#define FFMPEG_SWSCALE_H +#ifndef SWSCALE_SWSCALE_H +#define SWSCALE_SWSCALE_H /** - * @file swscale.h + * @file libswscale/swscale.h * @brief * external api for the swscale stuff */ -#include "avutil.h" +#include "libavutil/avutil.h" +#include "libavutil/internal.h" #define LIBSWSCALE_VERSION_MAJOR 0 -#define LIBSWSCALE_VERSION_MINOR 5 +#define LIBSWSCALE_VERSION_MINOR 7 #define LIBSWSCALE_VERSION_MICRO 1 #define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \ @@ -43,6 +44,11 @@ #define LIBSWSCALE_IDENT "SwS" AV_STRINGIFY(LIBSWSCALE_VERSION) +/** + * Returns the LIBSWSCALE_VERSION_INT constant. + */ +unsigned swscale_version(void); + /* values for the flags, the stuff on the command line is different */ #define SWS_FAST_BILINEAR 1 #define SWS_BILINEAR 2 @@ -70,6 +76,7 @@ #define SWS_FULL_CHR_H_INP 0x4000 #define SWS_DIRECT_BGR 0x8000 #define SWS_ACCURATE_RND 0x40000 +#define SWS_BITEXACT 0x80000 #define SWS_CPU_CAPS_MMX 0x80000000 #define SWS_CPU_CAPS_MMX2 0x20000000 @@ -92,8 +99,8 @@ // when used for filters they must have an odd number of elements // coeffs cannot be shared between vectors typedef struct { - double *coeff; - int length; + double *coeff; ///< pointer to the list of coefficients + int length; ///< number of coefficients in the vector } SwsVector; // vectors can be shared @@ -108,39 +115,134 @@ struct SwsContext; void sws_freeContext(struct SwsContext *swsContext); -struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat, int flags, +/** + * Allocates and returns a SwsContext. You need it to perform + * scaling/conversion operations using sws_scale(). + * + * @param srcW the width of the source image + * @param srcH the height of the source image + * @param srcFormat the source image format + * @param dstW the width of the destination image + * @param dstH the height of the destination image + * @param dstFormat the destination image format + * @param flags specify which algorithm and options to use for rescaling + * @return a pointer to an allocated context, or NULL in case of error + */ +struct SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int dstW, int dstH, enum PixelFormat dstFormat, int flags, SwsFilter *srcFilter, SwsFilter *dstFilter, double *param); -int sws_scale(struct SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY, + +/** + * Scales the image slice in \p srcSlice and puts the resulting scaled + * slice in the image in \p dst. A slice is a sequence of consecutive + * rows in an image. + * + * @param context the scaling context previously created with + * sws_getContext() + * @param srcSlice the array containing the pointers to the planes of + * the source slice + * @param srcStride the array containing the strides for each plane of + * the source image + * @param srcSliceY the position in the source image of the slice to + * process, that is the number (counted starting from + * zero) in the image of the first row of the slice + * @param srcSliceH the height of the source slice, that is the number + * of rows in the slice + * @param dst the array containing the pointers to the planes of + * the destination image + * @param dstStride the array containing the strides for each plane of + * the destination image + * @return the height of the output slice + */ +int sws_scale(struct SwsContext *context, uint8_t* srcSlice[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]); +#if LIBSWSCALE_VERSION_MAJOR < 1 +/** + * @deprecated Use sws_scale() instead. + */ int sws_scale_ordered(struct SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]) attribute_deprecated; +#endif int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation); int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation); + +/** + * Returns a normalized Gaussian curve used to filter stuff + * quality=3 is high quality, lower is lower quality. + */ SwsVector *sws_getGaussianVec(double variance, double quality); + +/** + * Allocates and returns a vector with \p length coefficients, all + * with the same value \p c. + */ SwsVector *sws_getConstVec(double c, int length); + +/** + * Allocates and returns a vector with just one coefficient, with + * value 1.0. + */ SwsVector *sws_getIdentityVec(void); + +/** + * Scales all the coefficients of \p a by the \p scalar value. + */ void sws_scaleVec(SwsVector *a, double scalar); + +/** + * Scales all the coefficients of \p a so that their sum equals \p + * height." + */ void sws_normalizeVec(SwsVector *a, double height); void sws_convVec(SwsVector *a, SwsVector *b); void sws_addVec(SwsVector *a, SwsVector *b); void sws_subVec(SwsVector *a, SwsVector *b); void sws_shiftVec(SwsVector *a, int shift); + +/** + * Allocates and returns a clone of the vector \p a, that is a vector + * with the same coefficients as \p a. + */ SwsVector *sws_cloneVec(SwsVector *a); -void sws_printVec(SwsVector *a); +#if LIBSWSCALE_VERSION_MAJOR < 1 +/** + * @deprecated Use sws_printVec2() instead. + */ +attribute_deprecated void sws_printVec(SwsVector *a); +#endif + +/** + * Prints with av_log() a textual representation of the vector \p a + * if \p log_level <= av_log_level. + */ +void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level); + void sws_freeVec(SwsVector *a); SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur, - float lumaSarpen, float chromaSharpen, + float lumaSharpen, float chromaSharpen, float chromaHShift, float chromaVShift, int verbose); void sws_freeFilter(SwsFilter *filter); +/** + * Checks if \p context can be reused, otherwise reallocates a new + * one. + * + * If \p context is NULL, just calls sws_getContext() to get a new + * context. Otherwise, checks if the parameters are the ones already + * saved in \p context. If that is the case, returns the current + * context. Otherwise, frees \p context and gets a new context with + * the new parameters. + * + * Be warned that \p srcFilter and \p dstFilter are not checked, they + * are assumed to remain the same. + */ struct SwsContext *sws_getCachedContext(struct SwsContext *context, - int srcW, int srcH, int srcFormat, - int dstW, int dstH, int dstFormat, int flags, + int srcW, int srcH, enum PixelFormat srcFormat, + int dstW, int dstH, enum PixelFormat dstFormat, int flags, SwsFilter *srcFilter, SwsFilter *dstFilter, double *param); -#endif /* FFMPEG_SWSCALE_H */ +#endif /* SWSCALE_SWSCALE_H */ diff --git a/src/add-ons/media/plugins/avcodec/libswscale/swscale_altivec_template.c b/src/add-ons/media/plugins/avcodec/libswscale/swscale_altivec_template.c index 2111cec410..a008b966e8 100644 --- a/src/add-ons/media/plugins/avcodec/libswscale/swscale_altivec_template.c +++ b/src/add-ons/media/plugins/avcodec/libswscale/swscale_altivec_template.c @@ -220,7 +220,7 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW, uint8_t *src, int for (j=0; j>7, 0, (1<<15)-1); + dst[i] = FFMIN(val>>7, (1<<15)-1); } } else @@ -259,7 +259,7 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW, uint8_t *src, int val_vEven = vec_mule(src_v, filter_v); val_s = vec_sums(val_vEven, vzero); vec_st(val_s, 0, tempo); - dst[i] = av_clip(tempo[3]>>7, 0, (1<<15)-1); + dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1); } } break; @@ -286,7 +286,7 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW, uint8_t *src, int val_v = vec_msums(src_v, filter_v, (vector signed int)vzero); val_s = vec_sums(val_v, vzero); vec_st(val_s, 0, tempo); - dst[i] = av_clip(tempo[3]>>7, 0, (1<<15)-1); + dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1); } } break; @@ -315,7 +315,7 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW, uint8_t *src, int vector signed int val_s = vec_sums(val_v, vzero); vec_st(val_s, 0, tempo); - dst[i] = av_clip(tempo[3]>>7, 0, (1<<15)-1); + dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1); } } break; @@ -377,7 +377,7 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW, uint8_t *src, int val_s = vec_sums(val_v, vzero); vec_st(val_s, 0, tempo); - dst[i] = av_clip(tempo[3]>>7, 0, (1<<15)-1); + dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1); } } diff --git a/src/add-ons/media/plugins/avcodec/libswscale/swscale_avoption.c b/src/add-ons/media/plugins/avcodec/libswscale/swscale_avoption.c index c16258d254..996843df1d 100644 --- a/src/add-ons/media/plugins/avcodec/libswscale/swscale_avoption.c +++ b/src/add-ons/media/plugins/avcodec/libswscale/swscale_avoption.c @@ -18,8 +18,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "avutil.h" -#include "opt.h" +#include "libavutil/avutil.h" +#include "libavcodec/opt.h" #include "swscale.h" #include "swscale_internal.h" @@ -53,6 +53,7 @@ static const AVOption options[] = { { "bfin", "Blackfin SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_BFIN, INT_MIN, INT_MAX, VE, "sws_flags" }, { "full_chroma_int", "full chroma interpolation", 0 , FF_OPT_TYPE_CONST, SWS_FULL_CHR_H_INT, INT_MIN, INT_MAX, VE, "sws_flags" }, { "full_chroma_inp", "full chroma input", 0 , FF_OPT_TYPE_CONST, SWS_FULL_CHR_H_INP, INT_MIN, INT_MAX, VE, "sws_flags" }, + { "bitexact", "", 0 , FF_OPT_TYPE_CONST, SWS_BITEXACT, INT_MIN, INT_MAX, VE, "sws_flags" }, { NULL } }; diff --git a/src/add-ons/media/plugins/avcodec/libswscale/swscale_bfin.c b/src/add-ons/media/plugins/avcodec/libswscale/swscale_bfin.c index 3e63bbd638..ed7d9579b6 100644 --- a/src/add-ons/media/plugins/avcodec/libswscale/swscale_bfin.c +++ b/src/add-ons/media/plugins/avcodec/libswscale/swscale_bfin.c @@ -26,9 +26,6 @@ #include #include #include "config.h" -#ifdef HAVE_MALLOC_H -#include -#endif #include #include "rgb2rgb.h" #include "swscale.h" @@ -40,13 +37,13 @@ #define L1CODE #endif -extern int ff_bfin_uyvytoyv12 (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - long width, long height, - long lumStride, long chromStride, long srcStride) L1CODE; +int ff_bfin_uyvytoyv12 (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, + long width, long height, + long lumStride, long chromStride, long srcStride) L1CODE; -extern int ff_bfin_yuyvtoyv12 (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - long width, long height, - long lumStride, long chromStride, long srcStride) L1CODE; +int ff_bfin_yuyvtoyv12 (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, + long width, long height, + long lumStride, long chromStride, long srcStride) L1CODE; static int uyvytoyv12_unscaled (SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]) diff --git a/src/add-ons/media/plugins/avcodec/libswscale/swscale_internal.h b/src/add-ons/media/plugins/avcodec/libswscale/swscale_internal.h index 7aa3f9babd..cdf3754d14 100644 --- a/src/add-ons/media/plugins/avcodec/libswscale/swscale_internal.h +++ b/src/add-ons/media/plugins/avcodec/libswscale/swscale_internal.h @@ -18,22 +18,42 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef FFMPEG_SWSCALE_INTERNAL_H -#define FFMPEG_SWSCALE_INTERNAL_H +#ifndef SWSCALE_SWSCALE_INTERNAL_H +#define SWSCALE_SWSCALE_INTERNAL_H #include "config.h" -#ifdef HAVE_ALTIVEC_H +#if HAVE_ALTIVEC_H #include #endif -#include "avutil.h" +#include "libavutil/avutil.h" + +#define STR(s) AV_TOSTRING(s) //AV_STRINGIFY is too long #define MAX_FILTER_SIZE 256 #define VOFW 2048 #define VOF (VOFW*2) +#ifdef WORDS_BIGENDIAN +#define ALT32_CORR (-1) +#else +#define ALT32_CORR 1 +#endif + +#if ARCH_X86_64 +# define APCK_PTR2 8 +# define APCK_COEF 16 +# define APCK_SIZE 24 +#else +# define APCK_PTR2 4 +# define APCK_COEF 8 +# define APCK_SIZE 16 +#endif + +struct SwsContext; + typedef int (*SwsFunc)(struct SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]); @@ -53,7 +73,7 @@ typedef struct SwsContext{ int chrSrcW, chrSrcH, chrDstW, chrDstH; int lumXInc, chrXInc; int lumYInc, chrYInc; - int dstFormat, srcFormat; ///< format 4:2:0 type is always YV12 + enum PixelFormat dstFormat, srcFormat; ///< format 4:2:0 type is always YV12 int origDstFormat, origSrcFormat; ///< format int chrSrcHSubSample, chrSrcVSubSample; int chrIntHSubSample, chrIntVSubSample; @@ -62,6 +82,9 @@ typedef struct SwsContext{ int sliceDir; double param[2]; + uint32_t pal_yuv[256]; + uint32_t pal_rgb[256]; + int16_t **lumPixBuf; int16_t **chrPixBuf; int16_t *hLumFilter; @@ -108,6 +131,12 @@ typedef struct SwsContext{ int srcColorspaceTable[4]; int dstColorspaceTable[4]; int srcRange, dstRange; + int yuv2rgb_y_offset; + int yuv2rgb_y_coeff; + int yuv2rgb_v2r_coeff; + int yuv2rgb_v2g_coeff; + int yuv2rgb_u2g_coeff; + int yuv2rgb_u2b_coeff; #define RED_DITHER "0*8" #define GREEN_DITHER "1*8" @@ -148,7 +177,7 @@ typedef struct SwsContext{ uint64_t u_temp __attribute__((aligned(8))); uint64_t v_temp __attribute__((aligned(8))); -#ifdef HAVE_ALTIVEC +#if HAVE_ALTIVEC vector signed short CY; vector signed short CRV; @@ -162,7 +191,7 @@ typedef struct SwsContext{ #endif -#ifdef ARCH_BFIN +#if ARCH_BFIN uint32_t oy __attribute__((aligned(4))); uint32_t oc __attribute__((aligned(4))); uint32_t zero __attribute__((aligned(4))); @@ -176,18 +205,18 @@ typedef struct SwsContext{ uint32_t gmask __attribute__((aligned(4))); #endif -#ifdef HAVE_VIS +#if HAVE_VIS uint64_t sparc_coeffs[10] __attribute__((aligned(8))); #endif } SwsContext; //FIXME check init (where 0) -SwsFunc yuv2rgb_get_func_ptr (SwsContext *c); -int yuv2rgb_c_init_tables (SwsContext *c, const int inv_table[4], int fullRange, int brightness, int contrast, int saturation); +SwsFunc sws_yuv2rgb_get_func_ptr (SwsContext *c); +int sws_yuv2rgb_c_init_tables (SwsContext *c, const int inv_table[4], int fullRange, int brightness, int contrast, int saturation); -void yuv2rgb_altivec_init_tables (SwsContext *c, const int inv_table[4],int brightness,int contrast, int saturation); -SwsFunc yuv2rgb_init_altivec (SwsContext *c); +void sws_yuv2rgb_altivec_init_tables (SwsContext *c, const int inv_table[4],int brightness,int contrast, int saturation); +SwsFunc sws_yuv2rgb_init_altivec (SwsContext *c); void altivec_yuv2packedX (SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, @@ -221,7 +250,8 @@ const char *sws_format_name(int format); || (x)==PIX_FMT_GRAY16LE \ ) #define isRGB(x) ( \ - (x)==PIX_FMT_BGR32 \ + (x)==PIX_FMT_RGB32 \ + || (x)==PIX_FMT_RGB32_1 \ || (x)==PIX_FMT_RGB24 \ || (x)==PIX_FMT_RGB565 \ || (x)==PIX_FMT_RGB555 \ @@ -229,9 +259,11 @@ const char *sws_format_name(int format); || (x)==PIX_FMT_RGB4 \ || (x)==PIX_FMT_RGB4_BYTE \ || (x)==PIX_FMT_MONOBLACK \ + || (x)==PIX_FMT_MONOWHITE \ ) #define isBGR(x) ( \ - (x)==PIX_FMT_RGB32 \ + (x)==PIX_FMT_BGR32 \ + || (x)==PIX_FMT_BGR32_1 \ || (x)==PIX_FMT_BGR24 \ || (x)==PIX_FMT_BGR565 \ || (x)==PIX_FMT_BGR555 \ @@ -239,6 +271,14 @@ const char *sws_format_name(int format); || (x)==PIX_FMT_BGR4 \ || (x)==PIX_FMT_BGR4_BYTE \ || (x)==PIX_FMT_MONOBLACK \ + || (x)==PIX_FMT_MONOWHITE \ + ) +#define isALPHA(x) ( \ + (x)==PIX_FMT_BGR32 \ + || (x)==PIX_FMT_BGR32_1 \ + || (x)==PIX_FMT_RGB32 \ + || (x)==PIX_FMT_RGB32_1 \ + || (x)==PIX_FMT_YUVA420P \ ) static inline int fmt_depth(int fmt) @@ -269,15 +309,16 @@ static inline int fmt_depth(int fmt) case PIX_FMT_RGB4_BYTE: return 4; case PIX_FMT_MONOBLACK: + case PIX_FMT_MONOWHITE: return 1; default: return 0; } } -extern const DECLARE_ALIGNED(8, uint64_t, ff_dither4[2]); -extern const DECLARE_ALIGNED(8, uint64_t, ff_dither8[2]); +extern const uint64_t ff_dither4[2]; +extern const uint64_t ff_dither8[2]; extern const AVClass sws_context_class; -#endif /* FFMPEG_SWSCALE_INTERNAL_H */ +#endif /* SWSCALE_SWSCALE_INTERNAL_H */ diff --git a/src/add-ons/media/plugins/avcodec/libswscale/swscale_template.c b/src/add-ons/media/plugins/avcodec/libswscale/swscale_template.c index 1280ba6c02..3262b6ee85 100644 --- a/src/add-ons/media/plugins/avcodec/libswscale/swscale_template.c +++ b/src/add-ons/media/plugins/avcodec/libswscale/swscale_template.c @@ -29,17 +29,17 @@ #undef EMMS #undef SFENCE -#ifdef HAVE_3DNOW -/* On K6 femms is faster than emms. On K7 femms is directly mapped on emms. */ +#if HAVE_AMD3DNOW +/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */ #define EMMS "femms" #else #define EMMS "emms" #endif -#ifdef HAVE_3DNOW +#if HAVE_AMD3DNOW #define PREFETCH "prefetch" #define PREFETCHW "prefetchw" -#elif defined (HAVE_MMX2) +#elif HAVE_MMX2 #define PREFETCH "prefetchnta" #define PREFETCHW "prefetcht0" #else @@ -47,31 +47,31 @@ #define PREFETCHW " # nop" #endif -#ifdef HAVE_MMX2 +#if HAVE_MMX2 #define SFENCE "sfence" #else #define SFENCE " # nop" #endif -#ifdef HAVE_MMX2 +#if HAVE_MMX2 #define PAVGB(a,b) "pavgb " #a ", " #b " \n\t" -#elif defined (HAVE_3DNOW) +#elif HAVE_AMD3DNOW #define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" #endif -#ifdef HAVE_MMX2 +#if HAVE_MMX2 #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t" #else #define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t" #endif #define MOVNTQ(a,b) REAL_MOVNTQ(a,b) -#ifdef HAVE_ALTIVEC +#if HAVE_ALTIVEC #include "swscale_altivec_template.c" #endif #define YSCALEYUV2YV12X(x, offset, dest, width) \ - asm volatile(\ + __asm__ volatile(\ "xor %%"REG_a", %%"REG_a" \n\t"\ "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\ "movq %%mm3, %%mm4 \n\t"\ @@ -107,7 +107,7 @@ ); #define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \ - asm volatile(\ + __asm__ volatile(\ "lea " offset "(%0), %%"REG_d" \n\t"\ "xor %%"REG_a", %%"REG_a" \n\t"\ "pxor %%mm4, %%mm4 \n\t"\ @@ -119,19 +119,19 @@ "1: \n\t"\ "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* srcData */\ "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\ - "mov 4(%%"REG_d"), %%"REG_S" \n\t"\ + "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\ "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm1 \n\t" /* srcData */\ "movq %%mm0, %%mm3 \n\t"\ "punpcklwd %%mm1, %%mm0 \n\t"\ "punpckhwd %%mm1, %%mm3 \n\t"\ - "movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\ + "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\ "pmaddwd %%mm1, %%mm0 \n\t"\ "pmaddwd %%mm1, %%mm3 \n\t"\ "paddd %%mm0, %%mm4 \n\t"\ "paddd %%mm3, %%mm5 \n\t"\ "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* srcData */\ - "mov 16(%%"REG_d"), %%"REG_S" \n\t"\ - "add $16, %%"REG_d" \n\t"\ + "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\ + "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\ "test %%"REG_S", %%"REG_S" \n\t"\ "movq %%mm2, %%mm0 \n\t"\ "punpcklwd %%mm3, %%mm2 \n\t"\ @@ -190,8 +190,8 @@ "1: \n\t"\ "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\ "movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\ - "paddw %%mm7, %%mm0 \n\t"\ - "paddw %%mm7, %%mm1 \n\t"\ + "paddsw %%mm7, %%mm0 \n\t"\ + "paddsw %%mm7, %%mm1 \n\t"\ "psraw $7, %%mm0 \n\t"\ "psraw $7, %%mm1 \n\t"\ "packuswb %%mm1, %%mm0 \n\t"\ @@ -206,8 +206,8 @@ "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) : "%eax", "%ebx", "%ecx", "%edx", "%esi" */ -#define YSCALEYUV2PACKEDX \ - asm volatile(\ +#define YSCALEYUV2PACKEDX_UV \ + __asm__ volatile(\ "xor %%"REG_a", %%"REG_a" \n\t"\ ASMALIGN(4)\ "nop \n\t"\ @@ -229,8 +229,9 @@ "paddw %%mm5, %%mm4 \n\t"\ "test %%"REG_S", %%"REG_S" \n\t"\ " jnz 2b \n\t"\ -\ - "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\ + +#define YSCALEYUV2PACKEDX_YA(offset) \ + "lea "offset"(%0), %%"REG_d" \n\t"\ "mov (%%"REG_d"), %%"REG_S" \n\t"\ "movq "VROUNDER_OFFSET"(%0), %%mm1 \n\t"\ "movq %%mm1, %%mm7 \n\t"\ @@ -248,6 +249,10 @@ "test %%"REG_S", %%"REG_S" \n\t"\ " jnz 2b \n\t"\ +#define YSCALEYUV2PACKEDX \ + YSCALEYUV2PACKEDX_UV \ + YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET) \ + #define YSCALEYUV2PACKEDX_END \ :: "r" (&c->redDither), \ "m" (dummy), "m" (dummy), "m" (dummy),\ @@ -255,8 +260,8 @@ : "%"REG_a, "%"REG_d, "%"REG_S \ ); -#define YSCALEYUV2PACKEDX_ACCURATE \ - asm volatile(\ +#define YSCALEYUV2PACKEDX_ACCURATE_UV \ + __asm__ volatile(\ "xor %%"REG_a", %%"REG_a" \n\t"\ ASMALIGN(4)\ "nop \n\t"\ @@ -271,19 +276,19 @@ "2: \n\t"\ "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\ "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\ - "mov 4(%%"REG_d"), %%"REG_S" \n\t"\ + "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\ "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\ "movq %%mm0, %%mm3 \n\t"\ "punpcklwd %%mm1, %%mm0 \n\t"\ "punpckhwd %%mm1, %%mm3 \n\t"\ - "movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\ + "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1 \n\t" /* filterCoeff */\ "pmaddwd %%mm1, %%mm0 \n\t"\ "pmaddwd %%mm1, %%mm3 \n\t"\ "paddd %%mm0, %%mm4 \n\t"\ "paddd %%mm3, %%mm5 \n\t"\ "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\ - "mov 16(%%"REG_d"), %%"REG_S" \n\t"\ - "add $16, %%"REG_d" \n\t"\ + "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\ + "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\ "test %%"REG_S", %%"REG_S" \n\t"\ "movq %%mm2, %%mm0 \n\t"\ "punpcklwd %%mm3, %%mm2 \n\t"\ @@ -304,8 +309,9 @@ "paddw %%mm0, %%mm6 \n\t"\ "movq %%mm4, "U_TEMP"(%0) \n\t"\ "movq %%mm6, "V_TEMP"(%0) \n\t"\ -\ - "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\ + +#define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \ + "lea "offset"(%0), %%"REG_d" \n\t"\ "mov (%%"REG_d"), %%"REG_S" \n\t"\ "pxor %%mm1, %%mm1 \n\t"\ "pxor %%mm5, %%mm5 \n\t"\ @@ -315,19 +321,19 @@ "2: \n\t"\ "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\ "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\ - "mov 4(%%"REG_d"), %%"REG_S" \n\t"\ + "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\ "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\ "movq %%mm0, %%mm3 \n\t"\ "punpcklwd %%mm4, %%mm0 \n\t"\ "punpckhwd %%mm4, %%mm3 \n\t"\ - "movq 8(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\ + "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\ "pmaddwd %%mm4, %%mm0 \n\t"\ "pmaddwd %%mm4, %%mm3 \n\t"\ "paddd %%mm0, %%mm1 \n\t"\ "paddd %%mm3, %%mm5 \n\t"\ "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\ - "mov 16(%%"REG_d"), %%"REG_S" \n\t"\ - "add $16, %%"REG_d" \n\t"\ + "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\ + "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\ "test %%"REG_S", %%"REG_S" \n\t"\ "movq %%mm2, %%mm0 \n\t"\ "punpcklwd %%mm3, %%mm2 \n\t"\ @@ -349,6 +355,10 @@ "movq "U_TEMP"(%0), %%mm3 \n\t"\ "movq "V_TEMP"(%0), %%mm4 \n\t"\ +#define YSCALEYUV2PACKEDX_ACCURATE \ + YSCALEYUV2PACKEDX_ACCURATE_UV \ + YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET) + #define YSCALEYUV2RGBX \ "psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\ "psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\ @@ -384,61 +394,6 @@ "packuswb %%mm0, %%mm2 \n\t"\ "packuswb %%mm6, %%mm5 \n\t"\ "packuswb %%mm3, %%mm4 \n\t"\ - "pxor %%mm7, %%mm7 \n\t" -#if 0 -#define FULL_YSCALEYUV2RGB \ - "pxor %%mm7, %%mm7 \n\t"\ - "movd %6, %%mm6 \n\t" /*yalpha1*/\ - "punpcklwd %%mm6, %%mm6 \n\t"\ - "punpcklwd %%mm6, %%mm6 \n\t"\ - "movd %7, %%mm5 \n\t" /*uvalpha1*/\ - "punpcklwd %%mm5, %%mm5 \n\t"\ - "punpcklwd %%mm5, %%mm5 \n\t"\ - "xor %%"REG_a", %%"REG_a" \n\t"\ - ASMALIGN(4)\ - "1: \n\t"\ - "movq (%0, %%"REG_a",2), %%mm0 \n\t" /*buf0[eax]*/\ - "movq (%1, %%"REG_a",2), %%mm1 \n\t" /*buf1[eax]*/\ - "movq (%2, %%"REG_a",2), %%mm2 \n\t" /* uvbuf0[eax]*/\ - "movq (%3, %%"REG_a",2), %%mm3 \n\t" /* uvbuf1[eax]*/\ - "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ - "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ - "pmulhw %%mm6, %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ - "pmulhw %%mm5, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ - "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ - "movq "AV_STRINGIFY(VOF)"(%2, %%"REG_a",2), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ - "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ - "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ - "movq "AV_STRINGIFY(VOF)"(%3, %%"REG_a",2), %%mm0 \n\t" /* uvbuf1[eax+2048]*/\ - "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ - "psubw %%mm0, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ - "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\ - "psubw "MANGLE(w400)", %%mm3 \n\t" /* 8(U-128)*/\ - "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\ -\ -\ - "pmulhw %%mm5, %%mm4 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ - "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ - "pmulhw "MANGLE(ubCoeff)", %%mm3 \n\t"\ - "psraw $4, %%mm0 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ - "pmulhw "MANGLE(ugCoeff)", %%mm2 \n\t"\ - "paddw %%mm4, %%mm0 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ - "psubw "MANGLE(w400)", %%mm0 \n\t" /* (V-128)8*/\ -\ -\ - "movq %%mm0, %%mm4 \n\t" /* (V-128)8*/\ - "pmulhw "MANGLE(vrCoeff)", %%mm0 \n\t"\ - "pmulhw "MANGLE(vgCoeff)", %%mm4 \n\t"\ - "paddw %%mm1, %%mm3 \n\t" /* B*/\ - "paddw %%mm1, %%mm0 \n\t" /* R*/\ - "packuswb %%mm3, %%mm3 \n\t"\ -\ - "packuswb %%mm0, %%mm0 \n\t"\ - "paddw %%mm4, %%mm2 \n\t"\ - "paddw %%mm2, %%mm1 \n\t" /* G*/\ -\ - "packuswb %%mm1, %%mm1 \n\t" -#endif #define REAL_YSCALEYUV2PACKED(index, c) \ "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\ @@ -478,7 +433,7 @@ #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c) -#define REAL_YSCALEYUV2RGB(index, c) \ +#define REAL_YSCALEYUV2RGB_UV(index, c) \ "xor "#index", "#index" \n\t"\ ASMALIGN(4)\ "1: \n\t"\ @@ -502,6 +457,8 @@ "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\ "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\ /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ + +#define REAL_YSCALEYUV2RGB_YA(index, c) \ "movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\ "movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\ "movq 8(%0, "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\ @@ -514,6 +471,8 @@ "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ + +#define REAL_YSCALEYUV2RGB_COEFF(c) \ "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\ "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\ "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\ @@ -541,8 +500,13 @@ "packuswb %%mm0, %%mm2 \n\t"\ "packuswb %%mm6, %%mm5 \n\t"\ "packuswb %%mm3, %%mm4 \n\t"\ - "pxor %%mm7, %%mm7 \n\t" -#define YSCALEYUV2RGB(index, c) REAL_YSCALEYUV2RGB(index, c) + +#define YSCALEYUV2RGB_YA(index, c) REAL_YSCALEYUV2RGB_YA(index, c) + +#define YSCALEYUV2RGB(index, c) \ + REAL_YSCALEYUV2RGB_UV(index, c) \ + REAL_YSCALEYUV2RGB_YA(index, c) \ + REAL_YSCALEYUV2RGB_COEFF(c) #define REAL_YSCALEYUV2PACKED1(index, c) \ "xor "#index", "#index" \n\t"\ @@ -605,7 +569,7 @@ "packuswb %%mm0, %%mm2 \n\t"\ "packuswb %%mm6, %%mm5 \n\t"\ "packuswb %%mm3, %%mm4 \n\t"\ - "pxor %%mm7, %%mm7 \n\t" + #define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c) #define REAL_YSCALEYUV2PACKED1b(index, c) \ @@ -677,35 +641,34 @@ "packuswb %%mm0, %%mm2 \n\t"\ "packuswb %%mm6, %%mm5 \n\t"\ "packuswb %%mm3, %%mm4 \n\t"\ - "pxor %%mm7, %%mm7 \n\t" + #define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c) -#define REAL_WRITEBGR32(dst, dstw, index) \ - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ - "movq %%mm2, %%mm1 \n\t" /* B */\ - "movq %%mm5, %%mm6 \n\t" /* R */\ - "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ - "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ - "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ - "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ - "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ - "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ - "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ - "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ - "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ - "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ +#define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \ + "movq "#b", "#q2" \n\t" /* B */\ + "movq "#r", "#t" \n\t" /* R */\ + "punpcklbw "#g", "#b" \n\t" /* GBGBGBGB 0 */\ + "punpcklbw "#a", "#r" \n\t" /* ARARARAR 0 */\ + "punpckhbw "#g", "#q2" \n\t" /* GBGBGBGB 2 */\ + "punpckhbw "#a", "#t" \n\t" /* ARARARAR 2 */\ + "movq "#b", "#q0" \n\t" /* GBGBGBGB 0 */\ + "movq "#q2", "#q3" \n\t" /* GBGBGBGB 2 */\ + "punpcklwd "#r", "#q0" \n\t" /* ARGBARGB 0 */\ + "punpckhwd "#r", "#b" \n\t" /* ARGBARGB 1 */\ + "punpcklwd "#t", "#q2" \n\t" /* ARGBARGB 2 */\ + "punpckhwd "#t", "#q3" \n\t" /* ARGBARGB 3 */\ \ - MOVNTQ(%%mm0, (dst, index, 4))\ - MOVNTQ(%%mm2, 8(dst, index, 4))\ - MOVNTQ(%%mm1, 16(dst, index, 4))\ - MOVNTQ(%%mm3, 24(dst, index, 4))\ + MOVNTQ( q0, (dst, index, 4))\ + MOVNTQ( b, 8(dst, index, 4))\ + MOVNTQ( q2, 16(dst, index, 4))\ + MOVNTQ( q3, 24(dst, index, 4))\ \ "add $8, "#index" \n\t"\ "cmp "#dstw", "#index" \n\t"\ " jb 1b \n\t" -#define WRITEBGR32(dst, dstw, index) REAL_WRITEBGR32(dst, dstw, index) +#define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) -#define REAL_WRITEBGR16(dst, dstw, index) \ +#define REAL_WRITERGB16(dst, dstw, index) \ "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\ "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\ "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\ @@ -731,9 +694,9 @@ "add $8, "#index" \n\t"\ "cmp "#dstw", "#index" \n\t"\ " jb 1b \n\t" -#define WRITEBGR16(dst, dstw, index) REAL_WRITEBGR16(dst, dstw, index) +#define WRITERGB16(dst, dstw, index) REAL_WRITERGB16(dst, dstw, index) -#define REAL_WRITEBGR15(dst, dstw, index) \ +#define REAL_WRITERGB15(dst, dstw, index) \ "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\ "pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\ "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\ @@ -760,7 +723,7 @@ "add $8, "#index" \n\t"\ "cmp "#dstw", "#index" \n\t"\ " jb 1b \n\t" -#define WRITEBGR15(dst, dstw, index) REAL_WRITEBGR15(dst, dstw, index) +#define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index) #define WRITEBGR24OLD(dst, dstw, index) \ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ @@ -919,7 +882,7 @@ "cmp "#dstw", "#index" \n\t"\ " jb 1b \n\t" -#ifdef HAVE_MMX2 +#if HAVE_MMX2 #undef WRITEBGR24 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index) #else @@ -949,24 +912,27 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t * int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW) { -#ifdef HAVE_MMX - if (c->flags & SWS_ACCURATE_RND){ - if (uDest){ - YSCALEYUV2YV12X_ACCURATE( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW) - YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW) - } +#if HAVE_MMX + if(!(c->flags & SWS_BITEXACT)){ + if (c->flags & SWS_ACCURATE_RND){ + if (uDest){ + YSCALEYUV2YV12X_ACCURATE( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW) + YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW) + } - YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW) - }else{ - if (uDest){ - YSCALEYUV2YV12X( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW) - YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW) - } + YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW) + }else{ + if (uDest){ + YSCALEYUV2YV12X( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW) + YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW) + } - YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW) + YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW) + } + return; } -#else -#ifdef HAVE_ALTIVEC +#endif +#if HAVE_ALTIVEC yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize, chrFilter, chrSrc, chrFilterSize, dest, uDest, vDest, dstW, chrDstW); @@ -975,7 +941,6 @@ yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize, chrFilter, chrSrc, chrFilterSize, dest, uDest, vDest, dstW, chrDstW); #endif //!HAVE_ALTIVEC -#endif /* HAVE_MMX */ } static inline void RENAME(yuv2nv12X)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, @@ -990,34 +955,36 @@ yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize, static inline void RENAME(yuv2yuv1)(SwsContext *c, int16_t *lumSrc, int16_t *chrSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW) { -#ifdef HAVE_MMX - long p= uDest ? 3 : 1; - uint8_t *src[3]= {lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW}; - uint8_t *dst[3]= {dest, uDest, vDest}; - long counter[3] = {dstW, chrDstW, chrDstW}; - - if (c->flags & SWS_ACCURATE_RND){ - while(p--){ - asm volatile( - YSCALEYUV2YV121_ACCURATE - :: "r" (src[p]), "r" (dst[p] + counter[p]), - "g" (-counter[p]) - : "%"REG_a - ); - } - }else{ - while(p--){ - asm volatile( - YSCALEYUV2YV121 - :: "r" (src[p]), "r" (dst[p] + counter[p]), - "g" (-counter[p]) - : "%"REG_a - ); - } - } - -#else int i; +#if HAVE_MMX + if(!(c->flags & SWS_BITEXACT)){ + long p= uDest ? 3 : 1; + uint8_t *src[3]= {lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW}; + uint8_t *dst[3]= {dest, uDest, vDest}; + long counter[3] = {dstW, chrDstW, chrDstW}; + + if (c->flags & SWS_ACCURATE_RND){ + while(p--){ + __asm__ volatile( + YSCALEYUV2YV121_ACCURATE + :: "r" (src[p]), "r" (dst[p] + counter[p]), + "g" (-counter[p]) + : "%"REG_a + ); + } + }else{ + while(p--){ + __asm__ volatile( + YSCALEYUV2YV121 + :: "r" (src[p]), "r" (dst[p] + counter[p]), + "g" (-counter[p]) + : "%"REG_a + ); + } + } + return; + } +#endif for (i=0; i>7; @@ -1046,7 +1013,6 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, int16_t *lumSrc, int16_t *chr uDest[i]= u; vDest[i]= v; } -#endif } @@ -1057,137 +1023,148 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_ int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, uint8_t *dest, long dstW, long dstY) { -#ifdef HAVE_MMX +#if HAVE_MMX long dummy=0; - if (c->flags & SWS_ACCURATE_RND){ - switch(c->dstFormat){ - case PIX_FMT_RGB32: - YSCALEYUV2PACKEDX_ACCURATE - YSCALEYUV2RGBX - WRITEBGR32(%4, %5, %%REGa) + if(!(c->flags & SWS_BITEXACT)){ + if (c->flags & SWS_ACCURATE_RND){ + switch(c->dstFormat){ + case PIX_FMT_RGB32: + YSCALEYUV2PACKEDX_ACCURATE + YSCALEYUV2RGBX + "pcmpeqd %%mm7, %%mm7 \n\t" + WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) - YSCALEYUV2PACKEDX_END - return; - case PIX_FMT_BGR24: - YSCALEYUV2PACKEDX_ACCURATE - YSCALEYUV2RGBX - "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize - "add %4, %%"REG_c" \n\t" - WRITEBGR24(%%REGc, %5, %%REGa) + YSCALEYUV2PACKEDX_END + return; + case PIX_FMT_BGR24: + YSCALEYUV2PACKEDX_ACCURATE + YSCALEYUV2RGBX + "pxor %%mm7, %%mm7 \n\t" + "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize + "add %4, %%"REG_c" \n\t" + WRITEBGR24(%%REGc, %5, %%REGa) - :: "r" (&c->redDither), - "m" (dummy), "m" (dummy), "m" (dummy), - "r" (dest), "m" (dstW) - : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S - ); - return; - case PIX_FMT_BGR555: - YSCALEYUV2PACKEDX_ACCURATE - YSCALEYUV2RGBX - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ + :: "r" (&c->redDither), + "m" (dummy), "m" (dummy), "m" (dummy), + "r" (dest), "m" (dstW) + : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S + ); + return; + case PIX_FMT_RGB555: + YSCALEYUV2PACKEDX_ACCURATE + YSCALEYUV2RGBX + "pxor %%mm7, %%mm7 \n\t" + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2\n\t" - "paddusb "MANGLE(g5Dither)", %%mm4\n\t" - "paddusb "MANGLE(r5Dither)", %%mm5\n\t" + "paddusb "BLUE_DITHER"(%0), %%mm2\n\t" + "paddusb "GREEN_DITHER"(%0), %%mm4\n\t" + "paddusb "RED_DITHER"(%0), %%mm5\n\t" #endif - WRITEBGR15(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END - return; - case PIX_FMT_BGR565: - YSCALEYUV2PACKEDX_ACCURATE - YSCALEYUV2RGBX - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ + WRITERGB15(%4, %5, %%REGa) + YSCALEYUV2PACKEDX_END + return; + case PIX_FMT_RGB565: + YSCALEYUV2PACKEDX_ACCURATE + YSCALEYUV2RGBX + "pxor %%mm7, %%mm7 \n\t" + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2\n\t" - "paddusb "MANGLE(g6Dither)", %%mm4\n\t" - "paddusb "MANGLE(r5Dither)", %%mm5\n\t" + "paddusb "BLUE_DITHER"(%0), %%mm2\n\t" + "paddusb "GREEN_DITHER"(%0), %%mm4\n\t" + "paddusb "RED_DITHER"(%0), %%mm5\n\t" #endif - WRITEBGR16(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END - return; - case PIX_FMT_YUYV422: - YSCALEYUV2PACKEDX_ACCURATE - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ + WRITERGB16(%4, %5, %%REGa) + YSCALEYUV2PACKEDX_END + return; + case PIX_FMT_YUYV422: + YSCALEYUV2PACKEDX_ACCURATE + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ - "psraw $3, %%mm3 \n\t" - "psraw $3, %%mm4 \n\t" - "psraw $3, %%mm1 \n\t" - "psraw $3, %%mm7 \n\t" - WRITEYUY2(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END - return; - } - }else{ - switch(c->dstFormat) - { - case PIX_FMT_RGB32: - YSCALEYUV2PACKEDX - YSCALEYUV2RGBX - WRITEBGR32(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END - return; - case PIX_FMT_BGR24: - YSCALEYUV2PACKEDX - YSCALEYUV2RGBX - "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t" //FIXME optimize - "add %4, %%"REG_c" \n\t" - WRITEBGR24(%%REGc, %5, %%REGa) + "psraw $3, %%mm3 \n\t" + "psraw $3, %%mm4 \n\t" + "psraw $3, %%mm1 \n\t" + "psraw $3, %%mm7 \n\t" + WRITEYUY2(%4, %5, %%REGa) + YSCALEYUV2PACKEDX_END + return; + } + }else{ + switch(c->dstFormat) + { + case PIX_FMT_RGB32: + YSCALEYUV2PACKEDX + YSCALEYUV2RGBX + "pcmpeqd %%mm7, %%mm7 \n\t" + WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) + YSCALEYUV2PACKEDX_END + return; + case PIX_FMT_BGR24: + YSCALEYUV2PACKEDX + YSCALEYUV2RGBX + "pxor %%mm7, %%mm7 \n\t" + "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t" //FIXME optimize + "add %4, %%"REG_c" \n\t" + WRITEBGR24(%%REGc, %5, %%REGa) - :: "r" (&c->redDither), - "m" (dummy), "m" (dummy), "m" (dummy), - "r" (dest), "m" (dstW) - : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S - ); - return; - case PIX_FMT_BGR555: - YSCALEYUV2PACKEDX - YSCALEYUV2RGBX - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ + :: "r" (&c->redDither), + "m" (dummy), "m" (dummy), "m" (dummy), + "r" (dest), "m" (dstW) + : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S + ); + return; + case PIX_FMT_RGB555: + YSCALEYUV2PACKEDX + YSCALEYUV2RGBX + "pxor %%mm7, %%mm7 \n\t" + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" - "paddusb "MANGLE(g5Dither)", %%mm4 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" + "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t" + "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t" + "paddusb "RED_DITHER"(%0), %%mm5 \n\t" #endif - WRITEBGR15(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END - return; - case PIX_FMT_BGR565: - YSCALEYUV2PACKEDX - YSCALEYUV2RGBX - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ + WRITERGB15(%4, %5, %%REGa) + YSCALEYUV2PACKEDX_END + return; + case PIX_FMT_RGB565: + YSCALEYUV2PACKEDX + YSCALEYUV2RGBX + "pxor %%mm7, %%mm7 \n\t" + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" - "paddusb "MANGLE(g6Dither)", %%mm4 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" + "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t" + "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t" + "paddusb "RED_DITHER"(%0), %%mm5 \n\t" #endif - WRITEBGR16(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END - return; - case PIX_FMT_YUYV422: - YSCALEYUV2PACKEDX - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ + WRITERGB16(%4, %5, %%REGa) + YSCALEYUV2PACKEDX_END + return; + case PIX_FMT_YUYV422: + YSCALEYUV2PACKEDX + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ - "psraw $3, %%mm3 \n\t" - "psraw $3, %%mm4 \n\t" - "psraw $3, %%mm1 \n\t" - "psraw $3, %%mm7 \n\t" - WRITEYUY2(%4, %5, %%REGa) - YSCALEYUV2PACKEDX_END - return; + "psraw $3, %%mm3 \n\t" + "psraw $3, %%mm4 \n\t" + "psraw $3, %%mm1 \n\t" + "psraw $3, %%mm7 \n\t" + WRITEYUY2(%4, %5, %%REGa) + YSCALEYUV2PACKEDX_END + return; + } } } #endif /* HAVE_MMX */ -#ifdef HAVE_ALTIVEC +#if HAVE_ALTIVEC /* The following list of supported dstFormat values should match what's found in the body of altivec_yuv2packedX() */ - if (c->dstFormat==PIX_FMT_ABGR || c->dstFormat==PIX_FMT_BGRA || + if (!(c->flags & SWS_BITEXACT) && + (c->dstFormat==PIX_FMT_ABGR || c->dstFormat==PIX_FMT_BGRA || c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 || - c->dstFormat==PIX_FMT_RGBA || c->dstFormat==PIX_FMT_ARGB) + c->dstFormat==PIX_FMT_RGBA || c->dstFormat==PIX_FMT_ARGB)) altivec_yuv2packedX (c, lumFilter, lumSrc, lumFilterSize, chrFilter, chrSrc, chrFilterSize, dest, dstW, dstY); @@ -1204,236 +1181,23 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_ static inline void RENAME(yuv2packed2)(SwsContext *c, uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y) { - int yalpha1=yalpha^4095; - int uvalpha1=uvalpha^4095; + int yalpha1=4095- yalpha; + int uvalpha1=4095-uvalpha; int i; -#if 0 //isn't used - if (flags&SWS_FULL_CHR_H_INT) - { - switch(dstFormat) - { -#ifdef HAVE_MMX - case PIX_FMT_RGB32: - asm volatile( - - -FULL_YSCALEYUV2RGB - "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG - "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0 - - "movq %%mm3, %%mm1 \n\t" - "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0 - "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0 - - MOVNTQ(%%mm3, (%4, %%REGa, 4)) - MOVNTQ(%%mm1, 8(%4, %%REGa, 4)) - - "add $4, %%"REG_a" \n\t" - "cmp %5, %%"REG_a" \n\t" - " jb 1b \n\t" - - :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" ((long)dstW), - "m" (yalpha1), "m" (uvalpha1) - : "%"REG_a - ); - break; - case PIX_FMT_BGR24: - asm volatile( - -FULL_YSCALEYUV2RGB - - // lsb ... msb - "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG - "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0 - - "movq %%mm3, %%mm1 \n\t" - "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0 - "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0 - - "movq %%mm3, %%mm2 \n\t" // BGR0BGR0 - "psrlq $8, %%mm3 \n\t" // GR0BGR00 - "pand "MANGLE(bm00000111)", %%mm2 \n\t" // BGR00000 - "pand "MANGLE(bm11111000)", %%mm3 \n\t" // 000BGR00 - "por %%mm2, %%mm3 \n\t" // BGRBGR00 - "movq %%mm1, %%mm2 \n\t" - "psllq $48, %%mm1 \n\t" // 000000BG - "por %%mm1, %%mm3 \n\t" // BGRBGRBG - - "movq %%mm2, %%mm1 \n\t" // BGR0BGR0 - "psrld $16, %%mm2 \n\t" // R000R000 - "psrlq $24, %%mm1 \n\t" // 0BGR0000 - "por %%mm2, %%mm1 \n\t" // RBGRR000 - - "mov %4, %%"REG_b" \n\t" - "add %%"REG_a", %%"REG_b" \n\t" - -#ifdef HAVE_MMX2 - //FIXME Alignment - "movntq %%mm3, (%%"REG_b", %%"REG_a", 2) \n\t" - "movntq %%mm1, 8(%%"REG_b", %%"REG_a", 2) \n\t" -#else - "movd %%mm3, (%%"REG_b", %%"REG_a", 2) \n\t" - "psrlq $32, %%mm3 \n\t" - "movd %%mm3, 4(%%"REG_b", %%"REG_a", 2) \n\t" - "movd %%mm1, 8(%%"REG_b", %%"REG_a", 2) \n\t" -#endif - "add $4, %%"REG_a" \n\t" - "cmp %5, %%"REG_a" \n\t" - " jb 1b \n\t" - - :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW), - "m" (yalpha1), "m" (uvalpha1) - : "%"REG_a, "%"REG_b - ); - break; - case PIX_FMT_BGR555: - asm volatile( - -FULL_YSCALEYUV2RGB -#ifdef DITHER1XBPP - "paddusb "MANGLE(g5Dither)", %%mm1 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm0 \n\t" - "paddusb "MANGLE(b5Dither)", %%mm3 \n\t" -#endif - "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G - "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B - "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R - - "psrlw $3, %%mm3 \n\t" - "psllw $2, %%mm1 \n\t" - "psllw $7, %%mm0 \n\t" - "pand "MANGLE(g15Mask)", %%mm1 \n\t" - "pand "MANGLE(r15Mask)", %%mm0 \n\t" - - "por %%mm3, %%mm1 \n\t" - "por %%mm1, %%mm0 \n\t" - - MOVNTQ(%%mm0, (%4, %%REGa, 2)) - - "add $4, %%"REG_a" \n\t" - "cmp %5, %%"REG_a" \n\t" - " jb 1b \n\t" - - :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), - "m" (yalpha1), "m" (uvalpha1) - : "%"REG_a - ); - break; - case PIX_FMT_BGR565: - asm volatile( - -FULL_YSCALEYUV2RGB -#ifdef DITHER1XBPP - "paddusb "MANGLE(g6Dither)", %%mm1 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm0 \n\t" - "paddusb "MANGLE(b5Dither)", %%mm3 \n\t" -#endif - "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G - "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B - "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R - - "psrlw $3, %%mm3 \n\t" - "psllw $3, %%mm1 \n\t" - "psllw $8, %%mm0 \n\t" - "pand "MANGLE(g16Mask)", %%mm1 \n\t" - "pand "MANGLE(r16Mask)", %%mm0 \n\t" - - "por %%mm3, %%mm1 \n\t" - "por %%mm1, %%mm0 \n\t" - - MOVNTQ(%%mm0, (%4, %%REGa, 2)) - - "add $4, %%"REG_a" \n\t" - "cmp %5, %%"REG_a" \n\t" - " jb 1b \n\t" - - :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), - "m" (yalpha1), "m" (uvalpha1) - : "%"REG_a - ); - break; -#endif /* HAVE_MMX */ - case PIX_FMT_BGR32: -#ifndef HAVE_MMX - case PIX_FMT_RGB32: -#endif - if (dstFormat==PIX_FMT_RGB32) - { - int i; -#ifdef WORDS_BIGENDIAN - dest++; -#endif - for (i=0;i>19)]; - int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); - int V=((uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19); - dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)]; - dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)]; - dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)]; - dest+= 4; - } - } - else if (dstFormat==PIX_FMT_BGR24) - { - int i; - for (i=0;i>19)]; - int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); - int V=((uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19); - dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)]; - dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)]; - dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)]; - dest+= 3; - } - } - else if (dstFormat==PIX_FMT_BGR565) - { - int i; - for (i=0;i>19)]; - int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); - int V=((uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19); - - ((uint16_t*)dest)[i] = - clip_table16b[(Y + yuvtab_40cf[U]) >>13] | - clip_table16g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] | - clip_table16r[(Y + yuvtab_3343[V]) >>13]; - } - } - else if (dstFormat==PIX_FMT_BGR555) - { - int i; - for (i=0;i>19)]; - int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); - int V=((uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19); - - ((uint16_t*)dest)[i] = - clip_table15b[(Y + yuvtab_40cf[U]) >>13] | - clip_table15g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] | - clip_table15r[(Y + yuvtab_3343[V]) >>13]; - } - } - }//FULL_UV_IPOL - else - { -#endif // if 0 -#ifdef HAVE_MMX +#if HAVE_MMX + if(!(c->flags & SWS_BITEXACT)){ switch(c->dstFormat) { //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( case PIX_FMT_RGB32: - asm volatile( + __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" "push %%"REG_BP" \n\t" YSCALEYUV2RGB(%%REGBP, %5) - WRITEBGR32(%%REGb, 8280(%5), %%REGBP) + "pcmpeqd %%mm7, %%mm7 \n\t" + WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) "pop %%"REG_BP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" @@ -1442,11 +1206,12 @@ FULL_YSCALEYUV2RGB ); return; case PIX_FMT_BGR24: - asm volatile( + __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" "push %%"REG_BP" \n\t" YSCALEYUV2RGB(%%REGBP, %5) + "pxor %%mm7, %%mm7 \n\t" WRITEBGR24(%%REGb, 8280(%5), %%REGBP) "pop %%"REG_BP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" @@ -1454,20 +1219,21 @@ FULL_YSCALEYUV2RGB "a" (&c->redDither) ); return; - case PIX_FMT_BGR555: - asm volatile( + case PIX_FMT_RGB555: + __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" "push %%"REG_BP" \n\t" YSCALEYUV2RGB(%%REGBP, %5) + "pxor %%mm7, %%mm7 \n\t" /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" - "paddusb "MANGLE(g5Dither)", %%mm4 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" + "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" + "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" + "paddusb "RED_DITHER"(%5), %%mm5 \n\t" #endif - WRITEBGR15(%%REGb, 8280(%5), %%REGBP) + WRITERGB15(%%REGb, 8280(%5), %%REGBP) "pop %%"REG_BP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" @@ -1475,20 +1241,21 @@ FULL_YSCALEYUV2RGB "a" (&c->redDither) ); return; - case PIX_FMT_BGR565: - asm volatile( + case PIX_FMT_RGB565: + __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" "push %%"REG_BP" \n\t" YSCALEYUV2RGB(%%REGBP, %5) + "pxor %%mm7, %%mm7 \n\t" /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" - "paddusb "MANGLE(g6Dither)", %%mm4 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" + "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" + "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" + "paddusb "RED_DITHER"(%5), %%mm5 \n\t" #endif - WRITEBGR16(%%REGb, 8280(%5), %%REGBP) + WRITERGB16(%%REGb, 8280(%5), %%REGBP) "pop %%"REG_BP" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), @@ -1496,7 +1263,7 @@ FULL_YSCALEYUV2RGB ); return; case PIX_FMT_YUYV422: - asm volatile( + __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" "push %%"REG_BP" \n\t" @@ -1510,8 +1277,9 @@ FULL_YSCALEYUV2RGB return; default: break; } + } #endif //HAVE_MMX -YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C) +YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C, YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C) } /** @@ -1532,200 +1300,210 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t * return; } -#ifdef HAVE_MMX - if (uvalpha < 2048) // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster - { - switch(dstFormat) +#if HAVE_MMX + if(!(flags & SWS_BITEXACT)){ + if (uvalpha < 2048) // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster { - case PIX_FMT_RGB32: - asm volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1(%%REGBP, %5) - WRITEBGR32(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + switch(dstFormat) + { + case PIX_FMT_RGB32: + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1(%%REGBP, %5) + "pcmpeqd %%mm7, %%mm7 \n\t" + WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - case PIX_FMT_BGR24: - asm volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1(%%REGBP, %5) - WRITEBGR24(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), + "a" (&c->redDither) + ); + return; + case PIX_FMT_BGR24: + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1(%%REGBP, %5) + "pxor %%mm7, %%mm7 \n\t" + WRITEBGR24(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - case PIX_FMT_BGR555: - asm volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1(%%REGBP, %5) - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ + :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), + "a" (&c->redDither) + ); + return; + case PIX_FMT_RGB555: + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1(%%REGBP, %5) + "pxor %%mm7, %%mm7 \n\t" + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" - "paddusb "MANGLE(g5Dither)", %%mm4 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" + "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" + "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" + "paddusb "RED_DITHER"(%5), %%mm5 \n\t" #endif - WRITEBGR15(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + WRITERGB15(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - case PIX_FMT_BGR565: - asm volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1(%%REGBP, %5) - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ + :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), + "a" (&c->redDither) + ); + return; + case PIX_FMT_RGB565: + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1(%%REGBP, %5) + "pxor %%mm7, %%mm7 \n\t" + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" - "paddusb "MANGLE(g6Dither)", %%mm4 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" + "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" + "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" + "paddusb "RED_DITHER"(%5), %%mm5 \n\t" #endif - WRITEBGR16(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + WRITERGB16(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - case PIX_FMT_YUYV422: - asm volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2PACKED1(%%REGBP, %5) - WRITEYUY2(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), + "a" (&c->redDither) + ); + return; + case PIX_FMT_YUYV422: + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2PACKED1(%%REGBP, %5) + WRITEYUY2(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; + :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), + "a" (&c->redDither) + ); + return; + } } - } - else - { - switch(dstFormat) + else { - case PIX_FMT_RGB32: - asm volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) - WRITEBGR32(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + switch(dstFormat) + { + case PIX_FMT_RGB32: + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1b(%%REGBP, %5) + "pcmpeqd %%mm7, %%mm7 \n\t" + WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - case PIX_FMT_BGR24: - asm volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) - WRITEBGR24(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), + "a" (&c->redDither) + ); + return; + case PIX_FMT_BGR24: + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1b(%%REGBP, %5) + "pxor %%mm7, %%mm7 \n\t" + WRITEBGR24(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - case PIX_FMT_BGR555: - asm volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ + :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), + "a" (&c->redDither) + ); + return; + case PIX_FMT_RGB555: + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1b(%%REGBP, %5) + "pxor %%mm7, %%mm7 \n\t" + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" - "paddusb "MANGLE(g5Dither)", %%mm4 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" + "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" + "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" + "paddusb "RED_DITHER"(%5), %%mm5 \n\t" #endif - WRITEBGR15(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + WRITERGB15(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - case PIX_FMT_BGR565: - asm volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2RGB1b(%%REGBP, %5) - /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ + :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), + "a" (&c->redDither) + ); + return; + case PIX_FMT_RGB565: + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2RGB1b(%%REGBP, %5) + "pxor %%mm7, %%mm7 \n\t" + /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm2 \n\t" - "paddusb "MANGLE(g6Dither)", %%mm4 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm5 \n\t" + "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t" + "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" + "paddusb "RED_DITHER"(%5), %%mm5 \n\t" #endif - WRITEBGR16(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + WRITERGB16(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; - case PIX_FMT_YUYV422: - asm volatile( - "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" - "mov %4, %%"REG_b" \n\t" - "push %%"REG_BP" \n\t" - YSCALEYUV2PACKED1b(%%REGBP, %5) - WRITEYUY2(%%REGb, 8280(%5), %%REGBP) - "pop %%"REG_BP" \n\t" - "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" + :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), + "a" (&c->redDither) + ); + return; + case PIX_FMT_YUYV422: + __asm__ volatile( + "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" + "mov %4, %%"REG_b" \n\t" + "push %%"REG_BP" \n\t" + YSCALEYUV2PACKED1b(%%REGBP, %5) + WRITEYUY2(%%REGb, 8280(%5), %%REGBP) + "pop %%"REG_BP" \n\t" + "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" - :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), - "a" (&c->redDither) - ); - return; + :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), + "a" (&c->redDither) + ); + return; + } } } #endif /* HAVE_MMX */ if (uvalpha < 2048) { - YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C) + YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C, YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C) }else{ - YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C) + YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C, YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C) } } //FIXME yuy2* can read up to 7 samples too much -static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width) +static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused) { -#ifdef HAVE_MMX - asm volatile( +#if HAVE_MMX + __asm__ volatile( "movq "MANGLE(bm01010101)", %%mm2 \n\t" "mov %0, %%"REG_a" \n\t" "1: \n\t" @@ -1747,10 +1525,10 @@ static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width) #endif } -static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width) +static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused) { -#ifdef HAVE_MMX - asm volatile( +#if HAVE_MMX + __asm__ volatile( "movq "MANGLE(bm01010101)", %%mm4 \n\t" "mov %0, %%"REG_a" \n\t" "1: \n\t" @@ -1784,10 +1562,10 @@ static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, /* This is almost identical to the previous, end exists only because * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */ -static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, long width) +static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused) { -#ifdef HAVE_MMX - asm volatile( +#if HAVE_MMX + __asm__ volatile( "mov %0, %%"REG_a" \n\t" "1: \n\t" "movq (%1, %%"REG_a",2), %%mm0 \n\t" @@ -1808,10 +1586,10 @@ static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, long width) #endif } -static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width) +static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused) { -#ifdef HAVE_MMX - asm volatile( +#if HAVE_MMX + __asm__ volatile( "movq "MANGLE(bm01010101)", %%mm4 \n\t" "mov %0, %%"REG_a" \n\t" "1: \n\t" @@ -1843,110 +1621,184 @@ static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, assert(src1 == src2); } -static inline void RENAME(bgr32ToY)(uint8_t *dst, uint8_t *src, int width) -{ - int i; - for (i=0; i>8)&0xFF; - int r= (((uint32_t*)src)[i]>>16)&0xFF; - - dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); - } +#define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\ +static inline void RENAME(name)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)\ +{\ + int i;\ + for (i=0; i>shb)&maskb;\ + int g= (((type*)src)[i]>>shg)&maskg;\ + int r= (((type*)src)[i]>>shr)&maskr;\ +\ + dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\ + }\ } -static inline void RENAME(bgr32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) -{ - int i; - assert(src1 == src2); - for (i=0; i>8; - const int r= l>>16; +BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8) +BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8) +BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY , RGB2YUV_SHIFT+8) +BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY , RGB2YUV_SHIFT+7) +BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY , GY<<5, BY<<11, RGB2YUV_SHIFT+8) +BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY , GY<<5, BY<<10, RGB2YUV_SHIFT+7) - dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1)) + 128; - dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128; - } +#define BGR2UV(type, name, shr, shg, shb, maska, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S)\ +static inline void RENAME(name)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, uint8_t *dummy, long width, uint32_t *unused)\ +{\ + int i;\ + for (i=0; i>shb;\ + int g= (((type*)src)[i]&maskg)>>shg;\ + int r= (((type*)src)[i]&maskr)>>shr;\ +\ + dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\ + dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\ + }\ +}\ +static inline void RENAME(name ## _half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, uint8_t *dummy, long width, uint32_t *unused)\ +{\ + int i;\ + for (i=0; i>shb;\ + int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\ + g&= maskg|(2*maskg);\ +\ + g>>=shg;\ +\ + dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\ + dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\ + }\ } -static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, long width) +BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0xFF000000, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8) +BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0xFF000000, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8) +BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RU<<11, GU<<5, BU , RV<<11, GV<<5, BV , RGB2YUV_SHIFT+8) +BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RU<<10, GU<<5, BU , RV<<10, GV<<5, BV , RGB2YUV_SHIFT+7) +BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RU , GU<<5, BU<<11, RV , GV<<5, BV<<11, RGB2YUV_SHIFT+8) +BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RU , GU<<5, BU<<10, RV , GV<<5, BV<<10, RGB2YUV_SHIFT+7) + +#if HAVE_MMX +static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, uint8_t *src, long width, int srcFormat) { -#ifdef HAVE_MMX - asm volatile( - "mov %2, %%"REG_a" \n\t" - "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t" - "movq "MANGLE(ff_w1111)", %%mm5 \n\t" - "pxor %%mm7, %%mm7 \n\t" - "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" - ASMALIGN(4) - "1: \n\t" - PREFETCH" 64(%0, %%"REG_d") \n\t" - "movd (%0, %%"REG_d"), %%mm0 \n\t" - "movd 3(%0, %%"REG_d"), %%mm1 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "movd 6(%0, %%"REG_d"), %%mm2 \n\t" - "movd 9(%0, %%"REG_d"), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "pmaddwd %%mm6, %%mm0 \n\t" - "pmaddwd %%mm6, %%mm1 \n\t" - "pmaddwd %%mm6, %%mm2 \n\t" - "pmaddwd %%mm6, %%mm3 \n\t" -#ifndef FAST_BGR2YV12 - "psrad $8, %%mm0 \n\t" - "psrad $8, %%mm1 \n\t" - "psrad $8, %%mm2 \n\t" - "psrad $8, %%mm3 \n\t" -#endif - "packssdw %%mm1, %%mm0 \n\t" - "packssdw %%mm3, %%mm2 \n\t" - "pmaddwd %%mm5, %%mm0 \n\t" - "pmaddwd %%mm5, %%mm2 \n\t" - "packssdw %%mm2, %%mm0 \n\t" - "psraw $7, %%mm0 \n\t" - "movd 12(%0, %%"REG_d"), %%mm4 \n\t" - "movd 15(%0, %%"REG_d"), %%mm1 \n\t" - "punpcklbw %%mm7, %%mm4 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "movd 18(%0, %%"REG_d"), %%mm2 \n\t" - "movd 21(%0, %%"REG_d"), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "pmaddwd %%mm6, %%mm4 \n\t" - "pmaddwd %%mm6, %%mm1 \n\t" - "pmaddwd %%mm6, %%mm2 \n\t" - "pmaddwd %%mm6, %%mm3 \n\t" -#ifndef FAST_BGR2YV12 - "psrad $8, %%mm4 \n\t" - "psrad $8, %%mm1 \n\t" - "psrad $8, %%mm2 \n\t" - "psrad $8, %%mm3 \n\t" -#endif - "packssdw %%mm1, %%mm4 \n\t" - "packssdw %%mm3, %%mm2 \n\t" - "pmaddwd %%mm5, %%mm4 \n\t" - "pmaddwd %%mm5, %%mm2 \n\t" - "add $24, %%"REG_d" \n\t" - "packssdw %%mm2, %%mm4 \n\t" - "psraw $7, %%mm4 \n\t" + if(srcFormat == PIX_FMT_BGR24){ + __asm__ volatile( + "movq "MANGLE(ff_bgr24toY1Coeff)", %%mm5 \n\t" + "movq "MANGLE(ff_bgr24toY2Coeff)", %%mm6 \n\t" + : + ); + }else{ + __asm__ volatile( + "movq "MANGLE(ff_rgb24toY1Coeff)", %%mm5 \n\t" + "movq "MANGLE(ff_rgb24toY2Coeff)", %%mm6 \n\t" + : + ); + } - "packuswb %%mm4, %%mm0 \n\t" - "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t" - - "movq %%mm0, (%1, %%"REG_a") \n\t" - "add $8, %%"REG_a" \n\t" - " js 1b \n\t" - : : "r" (src+width*3), "r" (dst+width), "g" (-width) - : "%"REG_a, "%"REG_d + __asm__ volatile( + "movq "MANGLE(ff_bgr24toYOffset)", %%mm4 \n\t" + "mov %2, %%"REG_a" \n\t" + "pxor %%mm7, %%mm7 \n\t" + "1: \n\t" + PREFETCH" 64(%0) \n\t" + "movd (%0), %%mm0 \n\t" + "movd 2(%0), %%mm1 \n\t" + "movd 6(%0), %%mm2 \n\t" + "movd 8(%0), %%mm3 \n\t" + "add $12, %0 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm1 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "punpcklbw %%mm7, %%mm3 \n\t" + "pmaddwd %%mm5, %%mm0 \n\t" + "pmaddwd %%mm6, %%mm1 \n\t" + "pmaddwd %%mm5, %%mm2 \n\t" + "pmaddwd %%mm6, %%mm3 \n\t" + "paddd %%mm1, %%mm0 \n\t" + "paddd %%mm3, %%mm2 \n\t" + "paddd %%mm4, %%mm0 \n\t" + "paddd %%mm4, %%mm2 \n\t" + "psrad $15, %%mm0 \n\t" + "psrad $15, %%mm2 \n\t" + "packssdw %%mm2, %%mm0 \n\t" + "packuswb %%mm0, %%mm0 \n\t" + "movd %%mm0, (%1, %%"REG_a") \n\t" + "add $4, %%"REG_a" \n\t" + " js 1b \n\t" + : "+r" (src) + : "r" (dst+width), "g" (-width) + : "%"REG_a ); +} + +static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, long width, int srcFormat) +{ + __asm__ volatile( + "movq 24+%4, %%mm6 \n\t" + "mov %3, %%"REG_a" \n\t" + "pxor %%mm7, %%mm7 \n\t" + "1: \n\t" + PREFETCH" 64(%0) \n\t" + "movd (%0), %%mm0 \n\t" + "movd 2(%0), %%mm1 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm1 \n\t" + "movq %%mm0, %%mm2 \n\t" + "movq %%mm1, %%mm3 \n\t" + "pmaddwd %4, %%mm0 \n\t" + "pmaddwd 8+%4, %%mm1 \n\t" + "pmaddwd 16+%4, %%mm2 \n\t" + "pmaddwd %%mm6, %%mm3 \n\t" + "paddd %%mm1, %%mm0 \n\t" + "paddd %%mm3, %%mm2 \n\t" + + "movd 6(%0), %%mm1 \n\t" + "movd 8(%0), %%mm3 \n\t" + "add $12, %0 \n\t" + "punpcklbw %%mm7, %%mm1 \n\t" + "punpcklbw %%mm7, %%mm3 \n\t" + "movq %%mm1, %%mm4 \n\t" + "movq %%mm3, %%mm5 \n\t" + "pmaddwd %4, %%mm1 \n\t" + "pmaddwd 8+%4, %%mm3 \n\t" + "pmaddwd 16+%4, %%mm4 \n\t" + "pmaddwd %%mm6, %%mm5 \n\t" + "paddd %%mm3, %%mm1 \n\t" + "paddd %%mm5, %%mm4 \n\t" + + "movq "MANGLE(ff_bgr24toUVOffset)", %%mm3 \n\t" + "paddd %%mm3, %%mm0 \n\t" + "paddd %%mm3, %%mm2 \n\t" + "paddd %%mm3, %%mm1 \n\t" + "paddd %%mm3, %%mm4 \n\t" + "psrad $15, %%mm0 \n\t" + "psrad $15, %%mm2 \n\t" + "psrad $15, %%mm1 \n\t" + "psrad $15, %%mm4 \n\t" + "packssdw %%mm1, %%mm0 \n\t" + "packssdw %%mm4, %%mm2 \n\t" + "packuswb %%mm0, %%mm0 \n\t" + "packuswb %%mm2, %%mm2 \n\t" + "movd %%mm0, (%1, %%"REG_a") \n\t" + "movd %%mm2, (%2, %%"REG_a") \n\t" + "add $4, %%"REG_a" \n\t" + " js 1b \n\t" + : "+r" (src) + : "r" (dstU+width), "r" (dstV+width), "g" (-width), "m"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24][0]) + : "%"REG_a + ); +} +#endif + +static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused) +{ +#if HAVE_MMX + RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24); #else int i; for (i=0; i>RGB2YUV_SHIFT; + dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT; + } +#endif /* HAVE_MMX */ + assert(src1 == src2); +} -#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) - "movq 12(%0, %%"REG_d"), %%mm4 \n\t" - "movq 18(%0, %%"REG_d"), %%mm2 \n\t" - "movq %%mm4, %%mm1 \n\t" - "movq %%mm2, %%mm3 \n\t" - "psrlq $24, %%mm4 \n\t" - "psrlq $24, %%mm2 \n\t" - PAVGB(%%mm1, %%mm4) - PAVGB(%%mm3, %%mm2) - "punpcklbw %%mm7, %%mm4 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" -#else - "movd 12(%0, %%"REG_d"), %%mm4 \n\t" - "movd 15(%0, %%"REG_d"), %%mm2 \n\t" - "punpcklbw %%mm7, %%mm4 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "paddw %%mm2, %%mm4 \n\t" - "movd 18(%0, %%"REG_d"), %%mm5 \n\t" - "movd 21(%0, %%"REG_d"), %%mm2 \n\t" - "punpcklbw %%mm7, %%mm5 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "paddw %%mm5, %%mm2 \n\t" - "movq "MANGLE(ff_w1111)", %%mm5 \n\t" - "psrlw $2, %%mm4 \n\t" - "psrlw $2, %%mm2 \n\t" -#endif - "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" - "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" - - "pmaddwd %%mm4, %%mm1 \n\t" - "pmaddwd %%mm2, %%mm3 \n\t" - "pmaddwd %%mm6, %%mm4 \n\t" - "pmaddwd %%mm6, %%mm2 \n\t" -#ifndef FAST_BGR2YV12 - "psrad $8, %%mm4 \n\t" - "psrad $8, %%mm1 \n\t" - "psrad $8, %%mm2 \n\t" - "psrad $8, %%mm3 \n\t" -#endif - "packssdw %%mm2, %%mm4 \n\t" - "packssdw %%mm3, %%mm1 \n\t" - "pmaddwd %%mm5, %%mm4 \n\t" - "pmaddwd %%mm5, %%mm1 \n\t" - "add $24, %%"REG_d" \n\t" - "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2 - "psraw $7, %%mm4 \n\t" - - "movq %%mm0, %%mm1 \n\t" - "punpckldq %%mm4, %%mm0 \n\t" - "punpckhdq %%mm4, %%mm1 \n\t" - "packsswb %%mm1, %%mm0 \n\t" - "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0 \n\t" - - "movd %%mm0, (%1, %%"REG_a") \n\t" - "punpckhdq %%mm0, %%mm0 \n\t" - "movd %%mm0, (%2, %%"REG_a") \n\t" - "add $4, %%"REG_a" \n\t" - " js 1b \n\t" - : : "r" (src1+width*6), "r" (dstU+width), "r" (dstV+width), "g" (-width) - : "%"REG_a, "%"REG_d - ); -#else +static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused) +{ int i; for (i=0; i>(RGB2YUV_SHIFT+1)) + 128; - dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128; + dstU[i]= (RU*r + GU*g + BU*b + (257<>(RGB2YUV_SHIFT+1); + dstV[i]= (RV*r + GV*g + BV*b + (257<>(RGB2YUV_SHIFT+1); } -#endif /* HAVE_MMX */ assert(src1 == src2); } -static inline void RENAME(rgb16ToY)(uint8_t *dst, uint8_t *src, int width) -{ - int i; - for (i=0; i>5)&0x3F; - int r= (d>>11)&0x1F; - - dst[i]= ((2*RY*r + GY*g + 2*BY*b)>>(RGB2YUV_SHIFT-2)) + 16; - } -} - -static inline void RENAME(rgb16ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) -{ - int i; - assert(src1==src2); - for (i=0; i>5)&0x07C0F83F); - - int dh2= (dh>>11) + (dh<<21); - int d= dh2 + dl; - - int b= d&0x7F; - int r= (d>>11)&0x7F; - int g= d>>21; - dstU[i]= ((2*RU*r + GU*g + 2*BU*b)>>(RGB2YUV_SHIFT+1-2)) + 128; - dstV[i]= ((2*RV*r + GV*g + 2*BV*b)>>(RGB2YUV_SHIFT+1-2)) + 128; - } -} - -static inline void RENAME(rgb15ToY)(uint8_t *dst, uint8_t *src, int width) -{ - int i; - for (i=0; i>5)&0x1F; - int r= (d>>10)&0x1F; - - dst[i]= ((RY*r + GY*g + BY*b)>>(RGB2YUV_SHIFT-3)) + 16; - } -} - -static inline void RENAME(rgb15ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) -{ - int i; - assert(src1==src2); - for (i=0; i>5)&0x03E0F81F); - - int dh2= (dh>>11) + (dh<<21); - int d= dh2 + dl; - - int b= d&0x7F; - int r= (d>>10)&0x7F; - int g= d>>21; - dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1-3)) + 128; - dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1-3)) + 128; - } -} - - -static inline void RENAME(rgb32ToY)(uint8_t *dst, uint8_t *src, int width) -{ - int i; - for (i=0; i>8)&0xFF; - int b= (((uint32_t*)src)[i]>>16)&0xFF; - - dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); - } -} - -static inline void RENAME(rgb32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) -{ - int i; - assert(src1==src2); - for (i=0; i>8; - const int b= l>>16; - - dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1)) + 128; - dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128; - } -} - -static inline void RENAME(rgb24ToY)(uint8_t *dst, uint8_t *src, int width) +static inline void RENAME(rgb24ToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused) { +#if HAVE_MMX + RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24); +#else int i; for (i=0; i>RGB2YUV_SHIFT); } +#endif } -static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) +static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused) +{ +#if HAVE_MMX + assert(src1==src2); + RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24); +#else + int i; + assert(src1==src2); + for (i=0; i>RGB2YUV_SHIFT; + dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT; + } +#endif +} + +static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused) { int i; assert(src1==src2); @@ -2222,78 +1893,13 @@ static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1 int g= src1[6*i + 1] + src1[6*i + 4]; int b= src1[6*i + 2] + src1[6*i + 5]; - dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1)) + 128; - dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128; + dstU[i]= (RU*r + GU*g + BU*b + (257<>(RGB2YUV_SHIFT+1); + dstV[i]= (RV*r + GV*g + BV*b + (257<>(RGB2YUV_SHIFT+1); } } -static inline void RENAME(bgr16ToY)(uint8_t *dst, uint8_t *src, int width) -{ - int i; - for (i=0; i>5)&0x3F; - int b= (d>>11)&0x1F; - dst[i]= ((2*RY*r + GY*g + 2*BY*b)>>(RGB2YUV_SHIFT-2)) + 16; - } -} - -static inline void RENAME(bgr16ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) -{ - int i; - assert(src1 == src2); - for (i=0; i>16) + (d0<<16))&0x07E0F81F); - - int r= d&0x3F; - int b= (d>>11)&0x3F; - int g= d>>21; - dstU[i]= ((2*RU*r + GU*g + 2*BU*b)>>(RGB2YUV_SHIFT+1-2)) + 128; - dstV[i]= ((2*RV*r + GV*g + 2*BV*b)>>(RGB2YUV_SHIFT+1-2)) + 128; - } -} - -static inline void RENAME(bgr15ToY)(uint8_t *dst, uint8_t *src, int width) -{ - int i; - for (i=0; i>5)&0x1F; - int b= (d>>10)&0x1F; - - dst[i]= ((RY*r + GY*g + BY*b)>>(RGB2YUV_SHIFT-3)) + 16; - } -} - -static inline void RENAME(bgr15ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) -{ - int i; - assert(src1 == src2); - for (i=0; i>16) + (d0<<16))&0x03E07C1F); - - int r= d&0x3F; - int b= (d>>10)&0x3F; - int g= d>>21; - dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1-3)) + 128; - dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1-3)) + 128; - } -} - -static inline void RENAME(palToY)(uint8_t *dst, uint8_t *src, int width, uint32_t *pal) +static inline void RENAME(palToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *pal) { int i; for (i=0; i>(7-j))&1)*255; + } +} + +static inline void RENAME(monoblack2Y)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused) +{ + int i, j; + for (i=0; i>(7-j))&1)*255; + } +} + // bilinear / bicubic scaling static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc, int16_t *filter, int16_t *filterPos, long filterSize) { -#ifdef HAVE_MMX +#if HAVE_MMX assert(filterSize % 4 == 0 && filterSize>0); if (filterSize==4) // Always true for upscaling, sometimes for down, too. { @@ -2329,12 +1955,11 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW filter-= counter*2; filterPos-= counter/2; dst-= counter/2; - asm volatile( + __asm__ volatile( #if defined(PIC) "push %%"REG_b" \n\t" #endif "pxor %%mm7, %%mm7 \n\t" - "movq "MANGLE(w02)", %%mm6 \n\t" "push %%"REG_BP" \n\t" // we use 7 regs here ... "mov %%"REG_a", %%"REG_BP" \n\t" ASMALIGN(4) @@ -2349,10 +1974,11 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW "punpcklbw %%mm7, %%mm2 \n\t" "pmaddwd %%mm1, %%mm0 \n\t" "pmaddwd %%mm2, %%mm3 \n\t" - "psrad $8, %%mm0 \n\t" - "psrad $8, %%mm3 \n\t" - "packssdw %%mm3, %%mm0 \n\t" - "pmaddwd %%mm6, %%mm0 \n\t" + "movq %%mm0, %%mm4 \n\t" + "punpckldq %%mm3, %%mm0 \n\t" + "punpckhdq %%mm3, %%mm4 \n\t" + "paddd %%mm4, %%mm0 \n\t" + "psrad $7, %%mm0 \n\t" "packssdw %%mm0, %%mm0 \n\t" "movd %%mm0, (%4, %%"REG_BP") \n\t" "add $4, %%"REG_BP" \n\t" @@ -2375,12 +2001,11 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW filter-= counter*4; filterPos-= counter/2; dst-= counter/2; - asm volatile( + __asm__ volatile( #if defined(PIC) "push %%"REG_b" \n\t" #endif "pxor %%mm7, %%mm7 \n\t" - "movq "MANGLE(w02)", %%mm6 \n\t" "push %%"REG_BP" \n\t" // we use 7 regs here ... "mov %%"REG_a", %%"REG_BP" \n\t" ASMALIGN(4) @@ -2406,11 +2031,11 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW "pmaddwd %%mm2, %%mm5 \n\t" "paddd %%mm4, %%mm0 \n\t" "paddd %%mm5, %%mm3 \n\t" - - "psrad $8, %%mm0 \n\t" - "psrad $8, %%mm3 \n\t" - "packssdw %%mm3, %%mm0 \n\t" - "pmaddwd %%mm6, %%mm0 \n\t" + "movq %%mm0, %%mm4 \n\t" + "punpckldq %%mm3, %%mm0 \n\t" + "punpckhdq %%mm3, %%mm4 \n\t" + "paddd %%mm4, %%mm0 \n\t" + "psrad $7, %%mm0 \n\t" "packssdw %%mm0, %%mm0 \n\t" "movd %%mm0, (%4, %%"REG_BP") \n\t" "add $4, %%"REG_BP" \n\t" @@ -2434,9 +2059,8 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW //filter-= counter*filterSize/2; filterPos-= counter/2; dst-= counter/2; - asm volatile( + __asm__ volatile( "pxor %%mm7, %%mm7 \n\t" - "movq "MANGLE(w02)", %%mm6 \n\t" ASMALIGN(4) "1: \n\t" "mov %2, %%"REG_c" \n\t" @@ -2461,10 +2085,11 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW "cmp %4, %%"REG_c" \n\t" " jb 2b \n\t" "add %6, %1 \n\t" - "psrad $8, %%mm4 \n\t" - "psrad $8, %%mm5 \n\t" - "packssdw %%mm5, %%mm4 \n\t" - "pmaddwd %%mm6, %%mm4 \n\t" + "movq %%mm4, %%mm0 \n\t" + "punpckldq %%mm5, %%mm4 \n\t" + "punpckhdq %%mm5, %%mm0 \n\t" + "paddd %%mm0, %%mm4 \n\t" + "psrad $7, %%mm4 \n\t" "packssdw %%mm4, %%mm4 \n\t" "mov %3, %%"REG_a" \n\t" "movd %%mm4, (%%"REG_a", %0) \n\t" @@ -2478,7 +2103,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW ); } #else -#ifdef HAVE_ALTIVEC +#if HAVE_ALTIVEC hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize); #else int i; @@ -2494,76 +2119,96 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW val += ((int)src[srcPos + j])*filter[filterSize*i + j]; } //filter += hFilterSize; - dst[i] = av_clip(val>>7, 0, (1<<15)-1); // the cubic equation does overflow ... + dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ... //dst[i] = val>>7; } #endif /* HAVE_ALTIVEC */ #endif /* HAVE_MMX */ } // *** horizontal scale Y line to temp buffer -static inline void RENAME(hyscale)(uint16_t *dst, long dstWidth, uint8_t *src, int srcW, int xInc, +static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, uint8_t *src, int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hLumFilter, int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode, int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter, - int32_t *mmx2FilterPos, uint8_t *pal) + int32_t *mmx2FilterPos, uint32_t *pal) { if (srcFormat==PIX_FMT_YUYV422 || srcFormat==PIX_FMT_GRAY16BE) { - RENAME(yuy2ToY)(formatConvBuffer, src, srcW); + RENAME(yuy2ToY)(formatConvBuffer, src, srcW, pal); src= formatConvBuffer; } else if (srcFormat==PIX_FMT_UYVY422 || srcFormat==PIX_FMT_GRAY16LE) { - RENAME(uyvyToY)(formatConvBuffer, src, srcW); + RENAME(uyvyToY)(formatConvBuffer, src, srcW, pal); src= formatConvBuffer; } else if (srcFormat==PIX_FMT_RGB32) { - RENAME(bgr32ToY)(formatConvBuffer, src, srcW); + RENAME(bgr32ToY)(formatConvBuffer, src, srcW, pal); + src= formatConvBuffer; + } + else if (srcFormat==PIX_FMT_RGB32_1) + { + RENAME(bgr32ToY)(formatConvBuffer, src+ALT32_CORR, srcW, pal); src= formatConvBuffer; } else if (srcFormat==PIX_FMT_BGR24) { - RENAME(bgr24ToY)(formatConvBuffer, src, srcW); + RENAME(bgr24ToY)(formatConvBuffer, src, srcW, pal); src= formatConvBuffer; } else if (srcFormat==PIX_FMT_BGR565) { - RENAME(bgr16ToY)(formatConvBuffer, src, srcW); + RENAME(bgr16ToY)(formatConvBuffer, src, srcW, pal); src= formatConvBuffer; } else if (srcFormat==PIX_FMT_BGR555) { - RENAME(bgr15ToY)(formatConvBuffer, src, srcW); + RENAME(bgr15ToY)(formatConvBuffer, src, srcW, pal); src= formatConvBuffer; } else if (srcFormat==PIX_FMT_BGR32) { - RENAME(rgb32ToY)(formatConvBuffer, src, srcW); + RENAME(rgb32ToY)(formatConvBuffer, src, srcW, pal); + src= formatConvBuffer; + } + else if (srcFormat==PIX_FMT_BGR32_1) + { + RENAME(rgb32ToY)(formatConvBuffer, src+ALT32_CORR, srcW, pal); src= formatConvBuffer; } else if (srcFormat==PIX_FMT_RGB24) { - RENAME(rgb24ToY)(formatConvBuffer, src, srcW); + RENAME(rgb24ToY)(formatConvBuffer, src, srcW, pal); src= formatConvBuffer; } else if (srcFormat==PIX_FMT_RGB565) { - RENAME(rgb16ToY)(formatConvBuffer, src, srcW); + RENAME(rgb16ToY)(formatConvBuffer, src, srcW, pal); src= formatConvBuffer; } else if (srcFormat==PIX_FMT_RGB555) { - RENAME(rgb15ToY)(formatConvBuffer, src, srcW); + RENAME(rgb15ToY)(formatConvBuffer, src, srcW, pal); src= formatConvBuffer; } else if (srcFormat==PIX_FMT_RGB8 || srcFormat==PIX_FMT_BGR8 || srcFormat==PIX_FMT_PAL8 || srcFormat==PIX_FMT_BGR4_BYTE || srcFormat==PIX_FMT_RGB4_BYTE) { - RENAME(palToY)(formatConvBuffer, src, srcW, (uint32_t*)pal); + RENAME(palToY)(formatConvBuffer, src, srcW, pal); + src= formatConvBuffer; + } + else if (srcFormat==PIX_FMT_MONOBLACK) + { + RENAME(monoblack2Y)(formatConvBuffer, src, srcW, pal); + src= formatConvBuffer; + } + else if (srcFormat==PIX_FMT_MONOWHITE) + { + RENAME(monowhite2Y)(formatConvBuffer, src, srcW, pal); src= formatConvBuffer; } -#ifdef HAVE_MMX +#if HAVE_MMX // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one). if (!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed)) #else @@ -2574,15 +2219,15 @@ static inline void RENAME(hyscale)(uint16_t *dst, long dstWidth, uint8_t *src, i } else // fast bilinear upscale / crap downscale { -#if defined(ARCH_X86) -#ifdef HAVE_MMX2 +#if ARCH_X86 +#if HAVE_MMX2 int i; #if defined(PIC) uint64_t ebxsave __attribute__((aligned(8))); #endif if (canMMX2BeUsed) { - asm volatile( + __asm__ volatile( #if defined(PIC) "mov %%"REG_b", %5 \n\t" #endif @@ -2596,7 +2241,7 @@ static inline void RENAME(hyscale)(uint16_t *dst, long dstWidth, uint8_t *src, i PREFETCH" 32(%%"REG_c") \n\t" PREFETCH" 64(%%"REG_c") \n\t" -#ifdef ARCH_X86_64 +#if ARCH_X86_64 #define FUNNY_Y_CODE \ "movl (%%"REG_b"), %%esi \n\t"\ @@ -2647,7 +2292,7 @@ FUNNY_Y_CODE long xInc_shr16 = xInc >> 16; uint16_t xInc_mask = xInc & 0xffff; //NO MMX just normal asm ... - asm volatile( + __asm__ volatile( "xor %%"REG_a", %%"REG_a" \n\t" // i "xor %%"REG_d", %%"REG_d" \n\t" // xx "xorl %%ecx, %%ecx \n\t" // 2*xalpha @@ -2686,7 +2331,7 @@ FUNNY_Y_CODE :: "r" (src), "m" (dst), "m" (dstWidth), "m" (xInc_shr16), "m" (xInc_mask) : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi" ); -#ifdef HAVE_MMX2 +#if HAVE_MMX2 } //if MMX2 can't be used #endif #else @@ -2699,88 +2344,143 @@ FUNNY_Y_CODE dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha; xpos+=xInc; } -#endif /* defined(ARCH_X86) */ +#endif /* ARCH_X86 */ + } + + if(c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))){ + int i; + //FIXME all pal and rgb srcFormats could do this convertion as well + //FIXME all scalers more complex than bilinear could do half of this transform + if(c->srcRange){ + for (i=0; i>14; + }else{ + for (i=0; i>14; + } } } -inline static void RENAME(hcscale)(uint16_t *dst, long dstWidth, uint8_t *src1, uint8_t *src2, +inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, uint8_t *src1, uint8_t *src2, int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter, int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode, int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter, - int32_t *mmx2FilterPos, uint8_t *pal) + int32_t *mmx2FilterPos, uint32_t *pal) { if (srcFormat==PIX_FMT_YUYV422) { - RENAME(yuy2ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW); + RENAME(yuy2ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal); src1= formatConvBuffer; src2= formatConvBuffer+VOFW; } else if (srcFormat==PIX_FMT_UYVY422) { - RENAME(uyvyToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW); + RENAME(uyvyToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal); src1= formatConvBuffer; src2= formatConvBuffer+VOFW; } else if (srcFormat==PIX_FMT_RGB32) { - RENAME(bgr32ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW); + if(c->chrSrcHSubSample) + RENAME(bgr32ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal); + else + RENAME(bgr32ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal); + src1= formatConvBuffer; + src2= formatConvBuffer+VOFW; + } + else if (srcFormat==PIX_FMT_RGB32_1) + { + if(c->chrSrcHSubSample) + RENAME(bgr32ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1+ALT32_CORR, src2+ALT32_CORR, srcW, pal); + else + RENAME(bgr32ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1+ALT32_CORR, src2+ALT32_CORR, srcW, pal); src1= formatConvBuffer; src2= formatConvBuffer+VOFW; } else if (srcFormat==PIX_FMT_BGR24) { - RENAME(bgr24ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW); + if(c->chrSrcHSubSample) + RENAME(bgr24ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal); + else + RENAME(bgr24ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal); src1= formatConvBuffer; src2= formatConvBuffer+VOFW; } else if (srcFormat==PIX_FMT_BGR565) { - RENAME(bgr16ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW); + if(c->chrSrcHSubSample) + RENAME(bgr16ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal); + else + RENAME(bgr16ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal); src1= formatConvBuffer; src2= formatConvBuffer+VOFW; } else if (srcFormat==PIX_FMT_BGR555) { - RENAME(bgr15ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW); + if(c->chrSrcHSubSample) + RENAME(bgr15ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal); + else + RENAME(bgr15ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal); src1= formatConvBuffer; src2= formatConvBuffer+VOFW; } else if (srcFormat==PIX_FMT_BGR32) { - RENAME(rgb32ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW); + if(c->chrSrcHSubSample) + RENAME(rgb32ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal); + else + RENAME(rgb32ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal); + src1= formatConvBuffer; + src2= formatConvBuffer+VOFW; + } + else if (srcFormat==PIX_FMT_BGR32_1) + { + if(c->chrSrcHSubSample) + RENAME(rgb32ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1+ALT32_CORR, src2+ALT32_CORR, srcW, pal); + else + RENAME(rgb32ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1+ALT32_CORR, src2+ALT32_CORR, srcW, pal); src1= formatConvBuffer; src2= formatConvBuffer+VOFW; } else if (srcFormat==PIX_FMT_RGB24) { - RENAME(rgb24ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW); + if(c->chrSrcHSubSample) + RENAME(rgb24ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal); + else + RENAME(rgb24ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal); src1= formatConvBuffer; src2= formatConvBuffer+VOFW; } else if (srcFormat==PIX_FMT_RGB565) { - RENAME(rgb16ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW); + if(c->chrSrcHSubSample) + RENAME(rgb16ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal); + else + RENAME(rgb16ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal); src1= formatConvBuffer; src2= formatConvBuffer+VOFW; } else if (srcFormat==PIX_FMT_RGB555) { - RENAME(rgb15ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW); + if(c->chrSrcHSubSample) + RENAME(rgb15ToUV_half)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal); + else + RENAME(rgb15ToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal); src1= formatConvBuffer; src2= formatConvBuffer+VOFW; } - else if (isGray(srcFormat)) + else if (isGray(srcFormat) || srcFormat==PIX_FMT_MONOBLACK || srcFormat==PIX_FMT_MONOWHITE) { return; } else if (srcFormat==PIX_FMT_RGB8 || srcFormat==PIX_FMT_BGR8 || srcFormat==PIX_FMT_PAL8 || srcFormat==PIX_FMT_BGR4_BYTE || srcFormat==PIX_FMT_RGB4_BYTE) { - RENAME(palToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, (uint32_t*)pal); + RENAME(palToUV)(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal); src1= formatConvBuffer; src2= formatConvBuffer+VOFW; } -#ifdef HAVE_MMX +#if HAVE_MMX // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one). if (!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed)) #else @@ -2792,15 +2492,15 @@ inline static void RENAME(hcscale)(uint16_t *dst, long dstWidth, uint8_t *src1, } else // fast bilinear upscale / crap downscale { -#if defined(ARCH_X86) -#ifdef HAVE_MMX2 +#if ARCH_X86 +#if HAVE_MMX2 int i; #if defined(PIC) uint64_t ebxsave __attribute__((aligned(8))); #endif if (canMMX2BeUsed) { - asm volatile( + __asm__ volatile( #if defined(PIC) "mov %%"REG_b", %6 \n\t" #endif @@ -2814,7 +2514,7 @@ inline static void RENAME(hcscale)(uint16_t *dst, long dstWidth, uint8_t *src1, PREFETCH" 32(%%"REG_c") \n\t" PREFETCH" 64(%%"REG_c") \n\t" -#ifdef ARCH_X86_64 +#if ARCH_X86_64 #define FUNNY_UV_CODE \ "movl (%%"REG_b"), %%esi \n\t"\ @@ -2877,7 +2577,7 @@ FUNNY_UV_CODE #endif /* HAVE_MMX2 */ long xInc_shr16 = (long) (xInc >> 16); uint16_t xInc_mask = xInc & 0xffff; - asm volatile( + __asm__ volatile( "xor %%"REG_a", %%"REG_a" \n\t" // i "xor %%"REG_d", %%"REG_d" \n\t" // xx "xorl %%ecx, %%ecx \n\t" // 2*xalpha @@ -2912,7 +2612,7 @@ FUNNY_UV_CODE /* GCC 3.3 makes MPlayer crash on IA-32 machines when using "g" operand here, which is needed to support GCC 4.0. */ -#if defined(ARCH_X86_64) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) +#if ARCH_X86_64 && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) :: "m" (src1), "m" (dst), "g" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask), #else :: "m" (src1), "m" (dst), "m" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask), @@ -2920,7 +2620,7 @@ FUNNY_UV_CODE "r" (src2) : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi" ); -#ifdef HAVE_MMX2 +#if HAVE_MMX2 } //if MMX2 can't be used #endif #else @@ -2938,7 +2638,23 @@ FUNNY_UV_CODE */ xpos+=xInc; } -#endif /* defined(ARCH_X86) */ +#endif /* ARCH_X86 */ + } + if(c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))){ + int i; + //FIXME all pal and rgb srcFormats could do this convertion as well + //FIXME all scalers more complex than bilinear could do half of this transform + if(c->srcRange){ + for (i=0; i>11; //1469 + dst[i+VOFW]= (dst[i+VOFW]*1799 + 4081085)>>11; //1469 + } + }else{ + for (i=0; i>12; //-264 + dst[i+VOFW]= (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12; //-264 + } + } } } @@ -2981,7 +2697,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample; const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample); int lastDstY; - uint8_t *pal=NULL; + uint32_t *pal=c->pal_yuv; /* vars which will change and which we need to store back in the context */ int dstY= c->dstY; @@ -2991,7 +2707,6 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s int lastInChrBuf= c->lastInChrBuf; if (isPacked(c->srcFormat)){ - pal= src[1]; src[0]= src[1]= src[2]= src[0]; @@ -3020,12 +2735,12 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0) { - static int firstTime=1; //FIXME move this into the context perhaps - if (flags & SWS_PRINT_INFO && firstTime) + static int warnedAlready=0; //FIXME move this into the context perhaps + if (flags & SWS_PRINT_INFO && !warnedAlready) { av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n" " ->cannot do aligned memory accesses anymore\n"); - firstTime=0; + warnedAlready=1; } } @@ -3075,7 +2790,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s assert(lastInLumBuf + 1 - srcSliceY < srcSliceH); assert(lastInLumBuf + 1 - srcSliceY >= 0); //printf("%d %d\n", lumBufIndex, vLumBufSize); - RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc, + RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc, flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize, funnyYCode, c->srcFormat, formatConvBuffer, c->lumMmx2Filter, c->lumMmx2FilterPos, pal); @@ -3092,7 +2807,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s //FIXME replace parameters through context struct (some at least) if (!(isGray(srcFormat) || isGray(dstFormat))) - RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc, + RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc, flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize, funnyUVCode, c->srcFormat, formatConvBuffer, c->chrMmx2Filter, c->chrMmx2FilterPos, pal); @@ -3117,7 +2832,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s assert(lumBufIndex < 2*vLumBufSize); assert(lastInLumBuf + 1 - srcSliceY < srcSliceH); assert(lastInLumBuf + 1 - srcSliceY >= 0); - RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc, + RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc, flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize, funnyYCode, c->srcFormat, formatConvBuffer, c->lumMmx2Filter, c->lumMmx2FilterPos, pal); @@ -3133,7 +2848,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s assert(lastInChrBuf + 1 - chrSrcSliceY >= 0); if (!(isGray(srcFormat) || isGray(dstFormat))) - RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc, + RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc, flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize, funnyUVCode, c->srcFormat, formatConvBuffer, c->chrMmx2Filter, c->chrMmx2FilterPos, pal); @@ -3145,31 +2860,34 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s break; //we can't output a dstY line so let's try with the next slice } -#ifdef HAVE_MMX - b5Dither= ff_dither8[dstY&1]; - g6Dither= ff_dither4[dstY&1]; - g5Dither= ff_dither8[dstY&1]; - r5Dither= ff_dither8[(dstY+1)&1]; +#if HAVE_MMX + c->blueDither= ff_dither8[dstY&1]; + if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555) + c->greenDither= ff_dither8[dstY&1]; + else + c->greenDither= ff_dither4[dstY&1]; + c->redDither= ff_dither8[(dstY+1)&1]; #endif if (dstY < dstH-2) { int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; -#ifdef HAVE_MMX +#if HAVE_MMX int i; if (flags & SWS_ACCURATE_RND){ + int s= APCK_SIZE / 8; for (i=0; i1)]; - lumMmxFilter[2*i+2]= - lumMmxFilter[2*i+3]= vLumFilter[dstY*vLumFilterSize + i ] + *(void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ]; + *(void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)]; + lumMmxFilter[s*i+APCK_COEF/4 ]= + lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ] + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0); } for (i=0; i1)]; - chrMmxFilter[2*i+2]= - chrMmxFilter[2*i+3]= vChrFilter[chrDstY*vChrFilterSize + i ] + *(void**)&chrMmxFilter[s*i ]= chrSrcPtr[i ]; + *(void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrSrcPtr[i+(vChrFilterSize>1)]; + chrMmxFilter[s*i+APCK_COEF/4 ]= + chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ] + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0); } }else{ @@ -3199,7 +2917,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, dest, uDest, dstW, chrDstW, dstFormat); } - else if (isPlanarYUV(dstFormat) || isGray(dstFormat)) //YV12 like + else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) //YV12 like { const int chrSkipMask= (1<chrDstVSubSample)-1; if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi @@ -3224,8 +2942,15 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s if (vLumFilterSize == 1 && vChrFilterSize == 2) //unscaled RGB { int chrAlpha= vChrFilter[2*dstY+1]; - RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1), - dest, dstW, chrAlpha, dstFormat, flags, dstY); + if(flags & SWS_FULL_CHR_H_INT){ + yuv2rgbXinC_full(c, //FIXME write a packed1_full function + vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, + vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, + dest, dstW, dstY); + }else{ + RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1), + dest, dstW, chrAlpha, dstFormat, flags, dstY); + } } else if (vLumFilterSize == 2 && vChrFilterSize == 2) //bilinear upscale RGB { @@ -3235,15 +2960,29 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001; chrMmxFilter[2]= chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001; - RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1), - dest, dstW, lumAlpha, chrAlpha, dstY); + if(flags & SWS_FULL_CHR_H_INT){ + yuv2rgbXinC_full(c, //FIXME write a packed2_full function + vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, + vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, + dest, dstW, dstY); + }else{ + RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1), + dest, dstW, lumAlpha, chrAlpha, dstY); + } } else //general RGB { - RENAME(yuv2packedX)(c, - vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, - vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, - dest, dstW, dstY); + if(flags & SWS_FULL_CHR_H_INT){ + yuv2rgbXinC_full(c, + vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, + vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, + dest, dstW, dstY); + }else{ + RENAME(yuv2packedX)(c, + vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, + vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, + dest, dstW, dstY); + } } } } @@ -3259,7 +2998,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, dest, uDest, dstW, chrDstW, dstFormat); } - else if (isPlanarYUV(dstFormat) || isGray(dstFormat)) //YV12 + else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) //YV12 { const int chrSkipMask= (1<chrDstVSubSample)-1; if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi @@ -3272,17 +3011,24 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s { assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); - yuv2packedXinC(c, - vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, - vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, - dest, dstW, dstY); + if(flags & SWS_FULL_CHR_H_INT){ + yuv2rgbXinC_full(c, + vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, + vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, + dest, dstW, dstY); + }else{ + yuv2packedXinC(c, + vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, + vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, + dest, dstW, dstY); + } } } } -#ifdef HAVE_MMX - asm volatile(SFENCE:::"memory"); - asm volatile(EMMS:::"memory"); +#if HAVE_MMX + __asm__ volatile(SFENCE:::"memory"); + __asm__ volatile(EMMS:::"memory"); #endif /* store changed local vars back in the context */ c->dstY= dstY; diff --git a/src/add-ons/media/plugins/avcodec/libswscale/yuv2rgb.c b/src/add-ons/media/plugins/avcodec/libswscale/yuv2rgb.c index 83d65c5f2a..65af412c2c 100644 --- a/src/add-ons/media/plugins/avcodec/libswscale/yuv2rgb.c +++ b/src/add-ons/media/plugins/avcodec/libswscale/yuv2rgb.c @@ -1,32 +1,27 @@ /* - * yuv2rgb.c, Software YUV to RGB converter + * software YUV to RGB converter * - * Copyright (C) 1999, Aaron Holtzman + * Copyright (C) 2009 Konstantin Shishkov * - * Functions broken out from display_x11.c and several new modes - * added by HÃ¥kan Hjort + * MMX/MMX2 template stuff (needed for fast movntq support), + * 1,4,8bpp support and context / deglobalize stuff + * by Michael Niedermayer (michaelni@gmx.at) * - * 15 & 16 bpp support by Franck Sicard + * This file is part of FFmpeg. * - * MMX/MMX2 template stuff (needed for fast movntq support), - * 1,4,8bpp support and context / deglobalize stuff - * by Michael Niedermayer (michaelni@gmx.at) + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. * - * This file is part of mpeg2dec, a free MPEG-2 video decoder + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with mpeg2dec; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include @@ -41,148 +36,36 @@ #define DITHER1XBPP // only for MMX -const uint8_t __attribute__((aligned(8))) dither_2x2_4[2][8]={ -{ 1, 3, 1, 3, 1, 3, 1, 3, }, -{ 2, 0, 2, 0, 2, 0, 2, 0, }, -}; +extern const uint8_t dither_8x8_32[8][8]; +extern const uint8_t dither_8x8_73[8][8]; +extern const uint8_t dither_8x8_220[8][8]; -const uint8_t __attribute__((aligned(8))) dither_2x2_8[2][8]={ -{ 6, 2, 6, 2, 6, 2, 6, 2, }, -{ 0, 4, 0, 4, 0, 4, 0, 4, }, -}; - -const uint8_t __attribute__((aligned(8))) dither_8x8_32[8][8]={ -{ 17, 9, 23, 15, 16, 8, 22, 14, }, -{ 5, 29, 3, 27, 4, 28, 2, 26, }, -{ 21, 13, 19, 11, 20, 12, 18, 10, }, -{ 0, 24, 6, 30, 1, 25, 7, 31, }, -{ 16, 8, 22, 14, 17, 9, 23, 15, }, -{ 4, 28, 2, 26, 5, 29, 3, 27, }, -{ 20, 12, 18, 10, 21, 13, 19, 11, }, -{ 1, 25, 7, 31, 0, 24, 6, 30, }, -}; - -#if 0 -const uint8_t __attribute__((aligned(8))) dither_8x8_64[8][8]={ -{ 0, 48, 12, 60, 3, 51, 15, 63, }, -{ 32, 16, 44, 28, 35, 19, 47, 31, }, -{ 8, 56, 4, 52, 11, 59, 7, 55, }, -{ 40, 24, 36, 20, 43, 27, 39, 23, }, -{ 2, 50, 14, 62, 1, 49, 13, 61, }, -{ 34, 18, 46, 30, 33, 17, 45, 29, }, -{ 10, 58, 6, 54, 9, 57, 5, 53, }, -{ 42, 26, 38, 22, 41, 25, 37, 21, }, -}; -#endif - -const uint8_t __attribute__((aligned(8))) dither_8x8_73[8][8]={ -{ 0, 55, 14, 68, 3, 58, 17, 72, }, -{ 37, 18, 50, 32, 40, 22, 54, 35, }, -{ 9, 64, 5, 59, 13, 67, 8, 63, }, -{ 46, 27, 41, 23, 49, 31, 44, 26, }, -{ 2, 57, 16, 71, 1, 56, 15, 70, }, -{ 39, 21, 52, 34, 38, 19, 51, 33, }, -{ 11, 66, 7, 62, 10, 65, 6, 60, }, -{ 48, 30, 43, 25, 47, 29, 42, 24, }, -}; - -#if 0 -const uint8_t __attribute__((aligned(8))) dither_8x8_128[8][8]={ -{ 68, 36, 92, 60, 66, 34, 90, 58, }, -{ 20, 116, 12, 108, 18, 114, 10, 106, }, -{ 84, 52, 76, 44, 82, 50, 74, 42, }, -{ 0, 96, 24, 120, 6, 102, 30, 126, }, -{ 64, 32, 88, 56, 70, 38, 94, 62, }, -{ 16, 112, 8, 104, 22, 118, 14, 110, }, -{ 80, 48, 72, 40, 86, 54, 78, 46, }, -{ 4, 100, 28, 124, 2, 98, 26, 122, }, -}; -#endif - -#if 1 -const uint8_t __attribute__((aligned(8))) dither_8x8_220[8][8]={ -{117, 62, 158, 103, 113, 58, 155, 100, }, -{ 34, 199, 21, 186, 31, 196, 17, 182, }, -{144, 89, 131, 76, 141, 86, 127, 72, }, -{ 0, 165, 41, 206, 10, 175, 52, 217, }, -{110, 55, 151, 96, 120, 65, 162, 107, }, -{ 28, 193, 14, 179, 38, 203, 24, 189, }, -{138, 83, 124, 69, 148, 93, 134, 79, }, -{ 7, 172, 48, 213, 3, 168, 45, 210, }, -}; -#elif 1 -// tries to correct a gamma of 1.5 -const uint8_t __attribute__((aligned(8))) dither_8x8_220[8][8]={ -{ 0, 143, 18, 200, 2, 156, 25, 215, }, -{ 78, 28, 125, 64, 89, 36, 138, 74, }, -{ 10, 180, 3, 161, 16, 195, 8, 175, }, -{109, 51, 93, 38, 121, 60, 105, 47, }, -{ 1, 152, 23, 210, 0, 147, 20, 205, }, -{ 85, 33, 134, 71, 81, 30, 130, 67, }, -{ 14, 190, 6, 171, 12, 185, 5, 166, }, -{117, 57, 101, 44, 113, 54, 97, 41, }, -}; -#elif 1 -// tries to correct a gamma of 2.0 -const uint8_t __attribute__((aligned(8))) dither_8x8_220[8][8]={ -{ 0, 124, 8, 193, 0, 140, 12, 213, }, -{ 55, 14, 104, 42, 66, 19, 119, 52, }, -{ 3, 168, 1, 145, 6, 187, 3, 162, }, -{ 86, 31, 70, 21, 99, 39, 82, 28, }, -{ 0, 134, 11, 206, 0, 129, 9, 200, }, -{ 62, 17, 114, 48, 58, 16, 109, 45, }, -{ 5, 181, 2, 157, 4, 175, 1, 151, }, -{ 95, 36, 78, 26, 90, 34, 74, 24, }, -}; -#else -// tries to correct a gamma of 2.5 -const uint8_t __attribute__((aligned(8))) dither_8x8_220[8][8]={ -{ 0, 107, 3, 187, 0, 125, 6, 212, }, -{ 39, 7, 86, 28, 49, 11, 102, 36, }, -{ 1, 158, 0, 131, 3, 180, 1, 151, }, -{ 68, 19, 52, 12, 81, 25, 64, 17, }, -{ 0, 119, 5, 203, 0, 113, 4, 195, }, -{ 45, 9, 96, 33, 42, 8, 91, 30, }, -{ 2, 172, 1, 144, 2, 165, 0, 137, }, -{ 77, 23, 60, 15, 72, 21, 56, 14, }, -}; -#endif - -#ifdef HAVE_MMX +#if HAVE_MMX && CONFIG_GPL /* hope these constant values are cache line aligned */ DECLARE_ASM_CONST(8, uint64_t, mmx_00ffw) = 0x00ff00ff00ff00ffULL; DECLARE_ASM_CONST(8, uint64_t, mmx_redmask) = 0xf8f8f8f8f8f8f8f8ULL; DECLARE_ASM_CONST(8, uint64_t, mmx_grnmask) = 0xfcfcfcfcfcfcfcfcULL; -// The volatile is required because gcc otherwise optimizes some writes away -// not knowing that these are read in the ASM block. -static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither; -static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither; -static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither; -static volatile uint64_t attribute_used __attribute__((aligned(8))) r5Dither; - -#undef HAVE_MMX - //MMX versions #undef RENAME -#define HAVE_MMX #undef HAVE_MMX2 -#undef HAVE_3DNOW +#undef HAVE_AMD3DNOW +#define HAVE_MMX2 0 +#define HAVE_AMD3DNOW 0 #define RENAME(a) a ## _MMX #include "yuv2rgb_template.c" //MMX2 versions #undef RENAME -#define HAVE_MMX -#define HAVE_MMX2 -#undef HAVE_3DNOW +#undef HAVE_MMX2 +#define HAVE_MMX2 1 #define RENAME(a) a ## _MMX2 #include "yuv2rgb_template.c" -#endif /* HAVE_MMX */ +#endif /* HAVE_MMX && CONFIG_GPL */ -const int32_t Inverse_Table_6_9[8][4] = { +const int32_t ff_yuv2rgb_coeffs[8][4] = { {117504, 138453, 13954, 34903}, /* no sequence_display_extension */ {117504, 138453, 13954, 34903}, /* ITU-R Rec. 709 (1990) */ {104597, 132201, 25675, 53279}, /* unspecified */ @@ -193,73 +76,55 @@ const int32_t Inverse_Table_6_9[8][4] = { {117579, 136230, 16907, 35559} /* SMPTE 240M (1987) */ }; -#define RGB(i) \ +#define LOADCHROMA(i) \ U = pu[i]; \ V = pv[i]; \ r = (void *)c->table_rV[V]; \ g = (void *)(c->table_gU[U] + c->table_gV[V]); \ b = (void *)c->table_bU[U]; -#define DST1(i) \ - Y = py_1[2*i]; \ - dst_1[2*i] = r[Y] + g[Y] + b[Y]; \ - Y = py_1[2*i+1]; \ - dst_1[2*i+1] = r[Y] + g[Y] + b[Y]; +#define PUTRGB(dst,src,i,o) \ + Y = src[2*i+o]; \ + dst[2*i ] = r[Y] + g[Y] + b[Y]; \ + Y = src[2*i+1-o]; \ + dst[2*i+1] = r[Y] + g[Y] + b[Y]; -#define DST2(i) \ - Y = py_2[2*i]; \ - dst_2[2*i] = r[Y] + g[Y] + b[Y]; \ - Y = py_2[2*i+1]; \ - dst_2[2*i+1] = r[Y] + g[Y] + b[Y]; +#define PUTRGB24(dst,src,i) \ + Y = src[2*i]; \ + dst[6*i+0] = r[Y]; dst[6*i+1] = g[Y]; dst[6*i+2] = b[Y]; \ + Y = src[2*i+1]; \ + dst[6*i+3] = r[Y]; dst[6*i+4] = g[Y]; dst[6*i+5] = b[Y]; -#define DST1RGB(i) \ - Y = py_1[2*i]; \ - dst_1[6*i] = r[Y]; dst_1[6*i+1] = g[Y]; dst_1[6*i+2] = b[Y]; \ - Y = py_1[2*i+1]; \ - dst_1[6*i+3] = r[Y]; dst_1[6*i+4] = g[Y]; dst_1[6*i+5] = b[Y]; +#define PUTBGR24(dst,src,i) \ + Y = src[2*i]; \ + dst[6*i+0] = b[Y]; dst[6*i+1] = g[Y]; dst[6*i+2] = r[Y]; \ + Y = src[2*i+1]; \ + dst[6*i+3] = b[Y]; dst[6*i+4] = g[Y]; dst[6*i+5] = r[Y]; -#define DST2RGB(i) \ - Y = py_2[2*i]; \ - dst_2[6*i] = r[Y]; dst_2[6*i+1] = g[Y]; dst_2[6*i+2] = b[Y]; \ - Y = py_2[2*i+1]; \ - dst_2[6*i+3] = r[Y]; dst_2[6*i+4] = g[Y]; dst_2[6*i+5] = b[Y]; - -#define DST1BGR(i) \ - Y = py_1[2*i]; \ - dst_1[6*i] = b[Y]; dst_1[6*i+1] = g[Y]; dst_1[6*i+2] = r[Y]; \ - Y = py_1[2*i+1]; \ - dst_1[6*i+3] = b[Y]; dst_1[6*i+4] = g[Y]; dst_1[6*i+5] = r[Y]; - -#define DST2BGR(i) \ - Y = py_2[2*i]; \ - dst_2[6*i] = b[Y]; dst_2[6*i+1] = g[Y]; dst_2[6*i+2] = r[Y]; \ - Y = py_2[2*i+1]; \ - dst_2[6*i+3] = b[Y]; dst_2[6*i+4] = g[Y]; dst_2[6*i+5] = r[Y]; - -#define PROLOG(func_name, dst_type) \ +#define YUV2RGBFUNC(func_name, dst_type) \ static int func_name(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, \ int srcSliceH, uint8_t* dst[], int dstStride[]){\ int y;\ \ - if (c->srcFormat == PIX_FMT_YUV422P){\ + if (c->srcFormat == PIX_FMT_YUV422P) {\ srcStride[1] *= 2;\ srcStride[2] *= 2;\ }\ - for (y=0; y>1)*srcStride[1];\ - uint8_t *pv= src[2] + (y>>1)*srcStride[2];\ - unsigned int h_size= c->dstW>>3;\ + uint8_t *py_1 = src[0] + y*srcStride[0];\ + uint8_t *py_2 = py_1 + srcStride[0];\ + uint8_t *pu = src[1] + (y>>1)*srcStride[1];\ + uint8_t *pv = src[2] + (y>>1)*srcStride[2];\ + unsigned int h_size = c->dstW>>3;\ while (h_size--) {\ int av_unused U, V;\ int Y;\ -#define EPILOG1(dst_delta)\ +#define ENDYUV2RGBLINE(dst_delta)\ pu += 4;\ pv += 4;\ py_1 += 8;\ @@ -270,392 +135,354 @@ static int func_name(SwsContext *c, uint8_t* src[], int srcStride[], int srcSlic if (c->dstW & 4) {\ int av_unused Y, U, V;\ -#define EPILOG2()\ +#define ENDYUV2RGBFUNC()\ }\ }\ return srcSliceH;\ } -#define EPILOG(dst_delta)\ - EPILOG1(dst_delta)\ - EPILOG2() +#define CLOSEYUV2RGBFUNC(dst_delta)\ + ENDYUV2RGBLINE(dst_delta)\ + ENDYUV2RGBFUNC() -PROLOG(yuv2rgb_c_32, uint32_t) - RGB(0); - DST1(0); - DST2(0); +YUV2RGBFUNC(yuv2rgb_c_32, uint32_t) + LOADCHROMA(0); + PUTRGB(dst_1,py_1,0,0); + PUTRGB(dst_2,py_2,0,1); - RGB(1); - DST2(1); - DST1(1); + LOADCHROMA(1); + PUTRGB(dst_2,py_2,1,1); + PUTRGB(dst_1,py_1,1,0); + LOADCHROMA(1); + PUTRGB(dst_2,py_2,1,1); + PUTRGB(dst_1,py_1,1,0); - RGB(2); - DST1(2); - DST2(2); + LOADCHROMA(2); + PUTRGB(dst_1,py_1,2,0); + PUTRGB(dst_2,py_2,2,1); - RGB(3); - DST2(3); - DST1(3); -EPILOG1(8) - RGB(0); - DST1(0); - DST2(0); + LOADCHROMA(3); + PUTRGB(dst_2,py_2,3,1); + PUTRGB(dst_1,py_1,3,0); +ENDYUV2RGBLINE(8) + LOADCHROMA(0); + PUTRGB(dst_1,py_1,0,0); + PUTRGB(dst_2,py_2,0,1); - RGB(1); - DST2(1); - DST1(1); -EPILOG2() + LOADCHROMA(1); + PUTRGB(dst_2,py_2,1,1); + PUTRGB(dst_1,py_1,1,0); +ENDYUV2RGBFUNC() -PROLOG(yuv2rgb_c_24_rgb, uint8_t) - RGB(0); - DST1RGB(0); - DST2RGB(0); +YUV2RGBFUNC(yuv2rgb_c_24_rgb, uint8_t) + LOADCHROMA(0); + PUTRGB24(dst_1,py_1,0); + PUTRGB24(dst_2,py_2,0); - RGB(1); - DST2RGB(1); - DST1RGB(1); + LOADCHROMA(1); + PUTRGB24(dst_2,py_2,1); + PUTRGB24(dst_1,py_1,1); - RGB(2); - DST1RGB(2); - DST2RGB(2); + LOADCHROMA(2); + PUTRGB24(dst_1,py_1,2); + PUTRGB24(dst_2,py_2,2); - RGB(3); - DST2RGB(3); - DST1RGB(3); -EPILOG1(24) - RGB(0); - DST1RGB(0); - DST2RGB(0); + LOADCHROMA(3); + PUTRGB24(dst_2,py_2,3); + PUTRGB24(dst_1,py_1,3); +ENDYUV2RGBLINE(24) + LOADCHROMA(0); + PUTRGB24(dst_1,py_1,0); + PUTRGB24(dst_2,py_2,0); - RGB(1); - DST2RGB(1); - DST1RGB(1); -EPILOG2() + LOADCHROMA(1); + PUTRGB24(dst_2,py_2,1); + PUTRGB24(dst_1,py_1,1); +ENDYUV2RGBFUNC() // only trivial mods from yuv2rgb_c_24_rgb -PROLOG(yuv2rgb_c_24_bgr, uint8_t) - RGB(0); - DST1BGR(0); - DST2BGR(0); +YUV2RGBFUNC(yuv2rgb_c_24_bgr, uint8_t) + LOADCHROMA(0); + PUTBGR24(dst_1,py_1,0); + PUTBGR24(dst_2,py_2,0); - RGB(1); - DST2BGR(1); - DST1BGR(1); + LOADCHROMA(1); + PUTBGR24(dst_2,py_2,1); + PUTBGR24(dst_1,py_1,1); - RGB(2); - DST1BGR(2); - DST2BGR(2); + LOADCHROMA(2); + PUTBGR24(dst_1,py_1,2); + PUTBGR24(dst_2,py_2,2); - RGB(3); - DST2BGR(3); - DST1BGR(3); -EPILOG1(24) - RGB(0); - DST1BGR(0); - DST2BGR(0); + LOADCHROMA(3); + PUTBGR24(dst_2,py_2,3); + PUTBGR24(dst_1,py_1,3); +ENDYUV2RGBLINE(24) + LOADCHROMA(0); + PUTBGR24(dst_1,py_1,0); + PUTBGR24(dst_2,py_2,0); - RGB(1); - DST2BGR(1); - DST1BGR(1); -EPILOG2() + LOADCHROMA(1); + PUTBGR24(dst_2,py_2,1); + PUTBGR24(dst_1,py_1,1); +ENDYUV2RGBFUNC() // This is exactly the same code as yuv2rgb_c_32 except for the types of // r, g, b, dst_1, dst_2 -PROLOG(yuv2rgb_c_16, uint16_t) - RGB(0); - DST1(0); - DST2(0); +YUV2RGBFUNC(yuv2rgb_c_16, uint16_t) + LOADCHROMA(0); + PUTRGB(dst_1,py_1,0,0); + PUTRGB(dst_2,py_2,0,1); - RGB(1); - DST2(1); - DST1(1); + LOADCHROMA(1); + PUTRGB(dst_2,py_2,1,1); + PUTRGB(dst_1,py_1,1,0); - RGB(2); - DST1(2); - DST2(2); + LOADCHROMA(2); + PUTRGB(dst_1,py_1,2,0); + PUTRGB(dst_2,py_2,2,1); - RGB(3); - DST2(3); - DST1(3); -EPILOG(8) + LOADCHROMA(3); + PUTRGB(dst_2,py_2,3,1); + PUTRGB(dst_1,py_1,3,0); +CLOSEYUV2RGBFUNC(8) // This is exactly the same code as yuv2rgb_c_32 except for the types of // r, g, b, dst_1, dst_2 -PROLOG(yuv2rgb_c_8, uint8_t) - RGB(0); - DST1(0); - DST2(0); +YUV2RGBFUNC(yuv2rgb_c_8, uint8_t) + LOADCHROMA(0); + PUTRGB(dst_1,py_1,0,0); + PUTRGB(dst_2,py_2,0,1); - RGB(1); - DST2(1); - DST1(1); + LOADCHROMA(1); + PUTRGB(dst_2,py_2,1,1); + PUTRGB(dst_1,py_1,1,0); - RGB(2); - DST1(2); - DST2(2); + LOADCHROMA(2); + PUTRGB(dst_1,py_1,2,0); + PUTRGB(dst_2,py_2,2,1); - RGB(3); - DST2(3); - DST1(3); -EPILOG(8) + LOADCHROMA(3); + PUTRGB(dst_2,py_2,3,1); + PUTRGB(dst_1,py_1,3,0); +CLOSEYUV2RGBFUNC(8) // r, g, b, dst_1, dst_2 -PROLOG(yuv2rgb_c_8_ordered_dither, uint8_t) - const uint8_t *d32= dither_8x8_32[y&7]; - const uint8_t *d64= dither_8x8_73[y&7]; -#define DST1bpp8(i,o) \ - Y = py_1[2*i]; \ - dst_1[2*i] = r[Y+d32[0+o]] + g[Y+d32[0+o]] + b[Y+d64[0+o]]; \ - Y = py_1[2*i+1]; \ - dst_1[2*i+1] = r[Y+d32[1+o]] + g[Y+d32[1+o]] + b[Y+d64[1+o]]; +YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t) + const uint8_t *d32 = dither_8x8_32[y&7]; + const uint8_t *d64 = dither_8x8_73[y&7]; +#define PUTRGB8(dst,src,i,o) \ + Y = src[2*i]; \ + dst[2*i] = r[Y+d32[0+o]] + g[Y+d32[0+o]] + b[Y+d64[0+o]]; \ + Y = src[2*i+1]; \ + dst[2*i+1] = r[Y+d32[1+o]] + g[Y+d32[1+o]] + b[Y+d64[1+o]]; -#define DST2bpp8(i,o) \ - Y = py_2[2*i]; \ - dst_2[2*i] = r[Y+d32[8+o]] + g[Y+d32[8+o]] + b[Y+d64[8+o]]; \ - Y = py_2[2*i+1]; \ - dst_2[2*i+1] = r[Y+d32[9+o]] + g[Y+d32[9+o]] + b[Y+d64[9+o]]; + LOADCHROMA(0); + PUTRGB8(dst_1,py_1,0,0); + PUTRGB8(dst_2,py_2,0,0+8); + LOADCHROMA(1); + PUTRGB8(dst_2,py_2,1,2+8); + PUTRGB8(dst_1,py_1,1,2); - RGB(0); - DST1bpp8(0,0); - DST2bpp8(0,0); + LOADCHROMA(2); + PUTRGB8(dst_1,py_1,2,4); + PUTRGB8(dst_2,py_2,2,4+8); - RGB(1); - DST2bpp8(1,2); - DST1bpp8(1,2); - - RGB(2); - DST1bpp8(2,4); - DST2bpp8(2,4); - - RGB(3); - DST2bpp8(3,6); - DST1bpp8(3,6); -EPILOG(8) + LOADCHROMA(3); + PUTRGB8(dst_2,py_2,3,6+8); + PUTRGB8(dst_1,py_1,3,6); +CLOSEYUV2RGBFUNC(8) // This is exactly the same code as yuv2rgb_c_32 except for the types of // r, g, b, dst_1, dst_2 -PROLOG(yuv2rgb_c_4, uint8_t) +YUV2RGBFUNC(yuv2rgb_c_4, uint8_t) int acc; -#define DST1_4(i) \ - Y = py_1[2*i]; \ +#define PUTRGB4(dst,src,i) \ + Y = src[2*i]; \ acc = r[Y] + g[Y] + b[Y]; \ - Y = py_1[2*i+1]; \ + Y = src[2*i+1]; \ acc |= (r[Y] + g[Y] + b[Y])<<4; \ - dst_1[i] = acc; + dst[i] = acc; -#define DST2_4(i) \ - Y = py_2[2*i]; \ - acc = r[Y] + g[Y] + b[Y]; \ - Y = py_2[2*i+1]; \ - acc |= (r[Y] + g[Y] + b[Y])<<4; \ - dst_2[i] = acc; + LOADCHROMA(0); + PUTRGB4(dst_1,py_1,0); + PUTRGB4(dst_2,py_2,0); - RGB(0); - DST1_4(0); - DST2_4(0); + LOADCHROMA(1); + PUTRGB4(dst_2,py_2,1); + PUTRGB4(dst_1,py_1,1); - RGB(1); - DST2_4(1); - DST1_4(1); + LOADCHROMA(2); + PUTRGB4(dst_1,py_1,2); + PUTRGB4(dst_2,py_2,2); - RGB(2); - DST1_4(2); - DST2_4(2); + LOADCHROMA(3); + PUTRGB4(dst_2,py_2,3); + PUTRGB4(dst_1,py_1,3); +CLOSEYUV2RGBFUNC(4) - RGB(3); - DST2_4(3); - DST1_4(3); -EPILOG(4) - -PROLOG(yuv2rgb_c_4_ordered_dither, uint8_t) - const uint8_t *d64= dither_8x8_73[y&7]; - const uint8_t *d128=dither_8x8_220[y&7]; +YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t) + const uint8_t *d64 = dither_8x8_73[y&7]; + const uint8_t *d128 = dither_8x8_220[y&7]; int acc; -#define DST1bpp4(i,o) \ - Y = py_1[2*i]; \ +#define PUTRGB4D(dst,src,i,o) \ + Y = src[2*i]; \ acc = r[Y+d128[0+o]] + g[Y+d64[0+o]] + b[Y+d128[0+o]]; \ - Y = py_1[2*i+1]; \ + Y = src[2*i+1]; \ acc |= (r[Y+d128[1+o]] + g[Y+d64[1+o]] + b[Y+d128[1+o]])<<4; \ - dst_1[i]= acc; + dst[i]= acc; -#define DST2bpp4(i,o) \ - Y = py_2[2*i]; \ - acc = r[Y+d128[8+o]] + g[Y+d64[8+o]] + b[Y+d128[8+o]]; \ - Y = py_2[2*i+1]; \ - acc |= (r[Y+d128[9+o]] + g[Y+d64[9+o]] + b[Y+d128[9+o]])<<4; \ - dst_2[i]= acc; + LOADCHROMA(0); + PUTRGB4D(dst_1,py_1,0,0); + PUTRGB4D(dst_2,py_2,0,0+8); + LOADCHROMA(1); + PUTRGB4D(dst_2,py_2,1,2+8); + PUTRGB4D(dst_1,py_1,1,2); - RGB(0); - DST1bpp4(0,0); - DST2bpp4(0,0); + LOADCHROMA(2); + PUTRGB4D(dst_1,py_1,2,4); + PUTRGB4D(dst_2,py_2,2,4+8); - RGB(1); - DST2bpp4(1,2); - DST1bpp4(1,2); - - RGB(2); - DST1bpp4(2,4); - DST2bpp4(2,4); - - RGB(3); - DST2bpp4(3,6); - DST1bpp4(3,6); -EPILOG(4) + LOADCHROMA(3); + PUTRGB4D(dst_2,py_2,3,6+8); + PUTRGB4D(dst_1,py_1,3,6); +CLOSEYUV2RGBFUNC(4) // This is exactly the same code as yuv2rgb_c_32 except for the types of // r, g, b, dst_1, dst_2 -PROLOG(yuv2rgb_c_4b, uint8_t) - RGB(0); - DST1(0); - DST2(0); +YUV2RGBFUNC(yuv2rgb_c_4b, uint8_t) + LOADCHROMA(0); + PUTRGB(dst_1,py_1,0,0); + PUTRGB(dst_2,py_2,0,1); - RGB(1); - DST2(1); - DST1(1); + LOADCHROMA(1); + PUTRGB(dst_2,py_2,1,1); + PUTRGB(dst_1,py_1,1,0); - RGB(2); - DST1(2); - DST2(2); + LOADCHROMA(2); + PUTRGB(dst_1,py_1,2,0); + PUTRGB(dst_2,py_2,2,1); - RGB(3); - DST2(3); - DST1(3); -EPILOG(8) + LOADCHROMA(3); + PUTRGB(dst_2,py_2,3,1); + PUTRGB(dst_1,py_1,3,0); +CLOSEYUV2RGBFUNC(8) -PROLOG(yuv2rgb_c_4b_ordered_dither, uint8_t) - const uint8_t *d64= dither_8x8_73[y&7]; - const uint8_t *d128=dither_8x8_220[y&7]; +YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t) + const uint8_t *d64 = dither_8x8_73[y&7]; + const uint8_t *d128 = dither_8x8_220[y&7]; -#define DST1bpp4b(i,o) \ - Y = py_1[2*i]; \ - dst_1[2*i] = r[Y+d128[0+o]] + g[Y+d64[0+o]] + b[Y+d128[0+o]]; \ - Y = py_1[2*i+1]; \ - dst_1[2*i+1] = r[Y+d128[1+o]] + g[Y+d64[1+o]] + b[Y+d128[1+o]]; +#define PUTRGB4DB(dst,src,i,o) \ + Y = src[2*i]; \ + dst[2*i] = r[Y+d128[0+o]] + g[Y+d64[0+o]] + b[Y+d128[0+o]]; \ + Y = src[2*i+1]; \ + dst[2*i+1] = r[Y+d128[1+o]] + g[Y+d64[1+o]] + b[Y+d128[1+o]]; -#define DST2bpp4b(i,o) \ - Y = py_2[2*i]; \ - dst_2[2*i] = r[Y+d128[8+o]] + g[Y+d64[8+o]] + b[Y+d128[8+o]]; \ - Y = py_2[2*i+1]; \ - dst_2[2*i+1] = r[Y+d128[9+o]] + g[Y+d64[9+o]] + b[Y+d128[9+o]]; + LOADCHROMA(0); + PUTRGB4DB(dst_1,py_1,0,0); + PUTRGB4DB(dst_2,py_2,0,0+8); + LOADCHROMA(1); + PUTRGB4DB(dst_2,py_2,1,2+8); + PUTRGB4DB(dst_1,py_1,1,2); - RGB(0); - DST1bpp4b(0,0); - DST2bpp4b(0,0); + LOADCHROMA(2); + PUTRGB4DB(dst_1,py_1,2,4); + PUTRGB4DB(dst_2,py_2,2,4+8); - RGB(1); - DST2bpp4b(1,2); - DST1bpp4b(1,2); + LOADCHROMA(3); + PUTRGB4DB(dst_2,py_2,3,6+8); + PUTRGB4DB(dst_1,py_1,3,6); +CLOSEYUV2RGBFUNC(8) - RGB(2); - DST1bpp4b(2,4); - DST2bpp4b(2,4); - - RGB(3); - DST2bpp4b(3,6); - DST1bpp4b(3,6); -EPILOG(8) - -PROLOG(yuv2rgb_c_1_ordered_dither, uint8_t) - const uint8_t *d128=dither_8x8_220[y&7]; - char out_1=0, out_2=0; +YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t) + const uint8_t *d128 = dither_8x8_220[y&7]; + char out_1 = 0, out_2 = 0; g= c->table_gU[128] + c->table_gV[128]; -#define DST1bpp1(i,o) \ - Y = py_1[2*i]; \ - out_1+= out_1 + g[Y+d128[0+o]]; \ - Y = py_1[2*i+1]; \ - out_1+= out_1 + g[Y+d128[1+o]]; +#define PUTRGB1(out,src,i,o) \ + Y = src[2*i]; \ + out+= out + g[Y+d128[0+o]]; \ + Y = src[2*i+1]; \ + out+= out + g[Y+d128[1+o]]; -#define DST2bpp1(i,o) \ - Y = py_2[2*i]; \ - out_2+= out_2 + g[Y+d128[8+o]]; \ - Y = py_2[2*i+1]; \ - out_2+= out_2 + g[Y+d128[9+o]]; + PUTRGB1(out_1,py_1,0,0); + PUTRGB1(out_2,py_2,0,0+8); - DST1bpp1(0,0); - DST2bpp1(0,0); + PUTRGB1(out_2,py_2,1,2+8); + PUTRGB1(out_1,py_1,1,2); - DST2bpp1(1,2); - DST1bpp1(1,2); + PUTRGB1(out_1,py_1,2,4); + PUTRGB1(out_2,py_2,2,4+8); - DST1bpp1(2,4); - DST2bpp1(2,4); - - DST2bpp1(3,6); - DST1bpp1(3,6); + PUTRGB1(out_2,py_2,3,6+8); + PUTRGB1(out_1,py_1,3,6); dst_1[0]= out_1; dst_2[0]= out_2; -EPILOG(1) +CLOSEYUV2RGBFUNC(1) -SwsFunc yuv2rgb_get_func_ptr (SwsContext *c) +SwsFunc sws_yuv2rgb_get_func_ptr(SwsContext *c) { -#if defined(HAVE_MMX2) || defined(HAVE_MMX) - if (c->flags & SWS_CPU_CAPS_MMX2){ - switch(c->dstFormat){ + SwsFunc t = NULL; +#if (HAVE_MMX2 || HAVE_MMX) && CONFIG_GPL + if (c->flags & SWS_CPU_CAPS_MMX2) { + switch (c->dstFormat) { case PIX_FMT_RGB32: return yuv420_rgb32_MMX2; case PIX_FMT_BGR24: return yuv420_rgb24_MMX2; - case PIX_FMT_BGR565: return yuv420_rgb16_MMX2; - case PIX_FMT_BGR555: return yuv420_rgb15_MMX2; + case PIX_FMT_RGB565: return yuv420_rgb16_MMX2; + case PIX_FMT_RGB555: return yuv420_rgb15_MMX2; } } - if (c->flags & SWS_CPU_CAPS_MMX){ - switch(c->dstFormat){ + if (c->flags & SWS_CPU_CAPS_MMX) { + switch (c->dstFormat) { case PIX_FMT_RGB32: return yuv420_rgb32_MMX; case PIX_FMT_BGR24: return yuv420_rgb24_MMX; - case PIX_FMT_BGR565: return yuv420_rgb16_MMX; - case PIX_FMT_BGR555: return yuv420_rgb15_MMX; + case PIX_FMT_RGB565: return yuv420_rgb16_MMX; + case PIX_FMT_RGB555: return yuv420_rgb15_MMX; } } #endif -#ifdef HAVE_VIS - { - SwsFunc t= yuv2rgb_init_vis(c); - if (t) return t; - } +#if HAVE_VIS + t = sws_yuv2rgb_init_vis(c); #endif -#ifdef CONFIG_MLIB - { - SwsFunc t= yuv2rgb_init_mlib(c); - if (t) return t; - } +#if CONFIG_MLIB + t = sws_yuv2rgb_init_mlib(c); #endif -#ifdef HAVE_ALTIVEC +#if HAVE_ALTIVEC && CONFIG_GPL if (c->flags & SWS_CPU_CAPS_ALTIVEC) - { - SwsFunc t = yuv2rgb_init_altivec(c); - if (t) return t; - } + t = sws_yuv2rgb_init_altivec(c); #endif -#ifdef ARCH_BFIN +#if ARCH_BFIN if (c->flags & SWS_CPU_CAPS_BFIN) - { - SwsFunc t = ff_bfin_yuv2rgb_get_func_ptr (c); - if (t) return t; - } + t = sws_ff_bfin_yuv2rgb_get_func_ptr(c); #endif + if (t) + return t; + av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found.\n"); - switch(c->dstFormat){ + switch (c->dstFormat) { + case PIX_FMT_BGR32_1: + case PIX_FMT_RGB32_1: case PIX_FMT_BGR32: - case PIX_FMT_RGB32: return yuv2rgb_c_32; - case PIX_FMT_RGB24: return yuv2rgb_c_24_rgb; - case PIX_FMT_BGR24: return yuv2rgb_c_24_bgr; + case PIX_FMT_RGB32: return yuv2rgb_c_32; + case PIX_FMT_RGB24: return yuv2rgb_c_24_rgb; + case PIX_FMT_BGR24: return yuv2rgb_c_24_bgr; case PIX_FMT_RGB565: case PIX_FMT_BGR565: case PIX_FMT_RGB555: - case PIX_FMT_BGR555: return yuv2rgb_c_16; + case PIX_FMT_BGR555: return yuv2rgb_c_16; case PIX_FMT_RGB8: - case PIX_FMT_BGR8: return yuv2rgb_c_8_ordered_dither; + case PIX_FMT_BGR8: return yuv2rgb_c_8_ordered_dither; case PIX_FMT_RGB4: - case PIX_FMT_BGR4: return yuv2rgb_c_4_ordered_dither; + case PIX_FMT_BGR4: return yuv2rgb_c_4_ordered_dither; case PIX_FMT_RGB4_BYTE: case PIX_FMT_BGR4_BYTE: return yuv2rgb_c_4b_ordered_dither; case PIX_FMT_MONOBLACK: return yuv2rgb_c_1_ordered_dither; @@ -665,29 +492,49 @@ SwsFunc yuv2rgb_get_func_ptr (SwsContext *c) return NULL; } -static int div_round (int dividend, int divisor) +static void fill_table(uint8_t* table[256], const int elemsize, const int inc, uint8_t *y_table) { - if (dividend > 0) - return (dividend + (divisor>>1)) / divisor; - else - return -((-dividend + (divisor>>1)) / divisor); + int i; + int64_t cb = 0; + + y_table -= elemsize * (inc >> 9); + + for (i = 0; i < 256; i++) { + table[i] = y_table + elemsize * (cb >> 16); + cb += inc; + } } -int yuv2rgb_c_init_tables (SwsContext *c, const int inv_table[4], int fullRange, int brightness, int contrast, int saturation) +static void fill_gv_table(int table[256], const int elemsize, const int inc) { - const int isRgb = isBGR(c->dstFormat); - const int bpp = fmt_depth(c->dstFormat); int i; - uint8_t table_Y[1024]; - uint32_t *table_32 = 0; - uint16_t *table_16 = 0; - uint8_t *table_8 = 0; - uint8_t *table_332 = 0; - uint8_t *table_121 = 0; - uint8_t *table_1 = 0; - int entry_size = 0; - void *table_r = 0, *table_g = 0, *table_b = 0; - void *table_start; + int64_t cb = 0; + int off = -(inc >> 9); + + for (i = 0; i < 256; i++) { + table[i] = elemsize * (off + (cb >> 16)); + cb += inc; + } +} + +av_cold int sws_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int fullRange, + int brightness, int contrast, int saturation) +{ + const int isRgb = c->dstFormat==PIX_FMT_RGB32 + || c->dstFormat==PIX_FMT_RGB32_1 + || c->dstFormat==PIX_FMT_BGR24 + || c->dstFormat==PIX_FMT_RGB565 + || c->dstFormat==PIX_FMT_RGB555 + || c->dstFormat==PIX_FMT_RGB8 + || c->dstFormat==PIX_FMT_RGB4 + || c->dstFormat==PIX_FMT_RGB4_BYTE + || c->dstFormat==PIX_FMT_MONOBLACK; + const int bpp = fmt_depth(c->dstFormat); + uint8_t *y_table; + uint16_t *y_table16; + uint32_t *y_table32; + int i, base, rbase, gbase, bbase, abase; + const int yoffs = fullRange ? 384 : 326; int64_t crv = inv_table[0]; int64_t cbu = inv_table[1]; @@ -696,186 +543,142 @@ int yuv2rgb_c_init_tables (SwsContext *c, const int inv_table[4], int fullRange, int64_t cy = 1<<16; int64_t oy = 0; -//printf("%lld %lld %lld %lld %lld\n", cy, crv, cbu, cgu, cgv); - if (!fullRange){ - cy= (cy*255) / 219; - oy= 16<<16; - }else{ - crv= (crv*224) / 255; - cbu= (cbu*224) / 255; - cgu= (cgu*224) / 255; - cgv= (cgv*224) / 255; + int64_t yb = 0; + + if (!fullRange) { + cy = (cy*255) / 219; + oy = 16<<16; + } else { + crv = (crv*224) / 255; + cbu = (cbu*224) / 255; + cgu = (cgu*224) / 255; + cgv = (cgv*224) / 255; } - cy = (cy *contrast )>>16; - crv= (crv*contrast * saturation)>>32; - cbu= (cbu*contrast * saturation)>>32; - cgu= (cgu*contrast * saturation)>>32; - cgv= (cgv*contrast * saturation)>>32; -//printf("%lld %lld %lld %lld %lld\n", cy, crv, cbu, cgu, cgv); + cy = (cy *contrast ) >> 16; + crv = (crv*contrast * saturation) >> 32; + cbu = (cbu*contrast * saturation) >> 32; + cgu = (cgu*contrast * saturation) >> 32; + cgv = (cgv*contrast * saturation) >> 32; oy -= 256*brightness; - for (i = 0; i < 1024; i++) { - int j; + //scale coefficients by cy + crv = ((crv << 16) + 0x8000) / cy; + cbu = ((cbu << 16) + 0x8000) / cy; + cgu = ((cgu << 16) + 0x8000) / cy; + cgv = ((cgv << 16) + 0x8000) / cy; - j= (cy*(((i - 384)<<16) - oy) + (1<<31))>>32; - j = (j < 0) ? 0 : ((j > 255) ? 255 : j); - table_Y[i] = j; - } + av_free(c->yuvTable); switch (bpp) { - case 32: - table_start= table_32 = av_malloc ((197 + 2*682 + 256 + 132) * sizeof (uint32_t)); - - entry_size = sizeof (uint32_t); - table_r = table_32 + 197; - table_b = table_32 + 197 + 685; - table_g = table_32 + 197 + 2*682; - - for (i = -197; i < 256+197; i++) - ((uint32_t *)table_r)[i] = table_Y[i+384] << (isRgb ? 16 : 0); - for (i = -132; i < 256+132; i++) - ((uint32_t *)table_g)[i] = table_Y[i+384] << 8; - for (i = -232; i < 256+232; i++) - ((uint32_t *)table_b)[i] = table_Y[i+384] << (isRgb ? 0 : 16); - break; - - case 24: - table_start= table_8 = av_malloc ((256 + 2*232) * sizeof (uint8_t)); - - entry_size = sizeof (uint8_t); - table_r = table_g = table_b = table_8 + 232; - - for (i = -232; i < 256+232; i++) - ((uint8_t * )table_b)[i] = table_Y[i+384]; - break; - - case 15: - case 16: - table_start= table_16 = av_malloc ((197 + 2*682 + 256 + 132) * sizeof (uint16_t)); - - entry_size = sizeof (uint16_t); - table_r = table_16 + 197; - table_b = table_16 + 197 + 685; - table_g = table_16 + 197 + 2*682; - - for (i = -197; i < 256+197; i++) { - int j = table_Y[i+384] >> 3; - - if (isRgb) - j <<= ((bpp==16) ? 11 : 10); - - ((uint16_t *)table_r)[i] = j; - } - for (i = -132; i < 256+132; i++) { - int j = table_Y[i+384] >> ((bpp==16) ? 2 : 3); - - ((uint16_t *)table_g)[i] = j << 5; - } - for (i = -232; i < 256+232; i++) { - int j = table_Y[i+384] >> 3; - - if (!isRgb) - j <<= ((bpp==16) ? 11 : 10); - - ((uint16_t *)table_b)[i] = j; - } - break; - - case 8: - table_start= table_332 = av_malloc ((197 + 2*682 + 256 + 132) * sizeof (uint8_t)); - - entry_size = sizeof (uint8_t); - table_r = table_332 + 197; - table_b = table_332 + 197 + 685; - table_g = table_332 + 197 + 2*682; - - for (i = -197; i < 256+197; i++) { - int j = (table_Y[i+384 - 16] + 18)/36; - - if (isRgb) - j <<= 5; - - ((uint8_t *)table_r)[i] = j; - } - for (i = -132; i < 256+132; i++) { - int j = (table_Y[i+384 - 16] + 18)/36; - - if (!isRgb) - j <<= 1; - - ((uint8_t *)table_g)[i] = j << 2; - } - for (i = -232; i < 256+232; i++) { - int j = (table_Y[i+384 - 37] + 43)/85; - - if (!isRgb) - j <<= 6; - - ((uint8_t *)table_b)[i] = j; + case 1: + c->yuvTable = av_malloc(1024); + y_table = c->yuvTable; + yb = -(384<<16) - oy; + for (i = 0; i < 1024-110; i++) { + y_table[i+110] = av_clip_uint8((yb + 0x8000) >> 16) >> 7; + yb += cy; } + fill_table(c->table_gU, 1, cgu, y_table + yoffs); + fill_gv_table(c->table_gV, 1, cgv); break; case 4: case 4|128: - table_start= table_121 = av_malloc ((197 + 2*682 + 256 + 132) * sizeof (uint8_t)); - - entry_size = sizeof (uint8_t); - table_r = table_121 + 197; - table_b = table_121 + 197 + 685; - table_g = table_121 + 197 + 2*682; - - for (i = -197; i < 256+197; i++) { - int j = table_Y[i+384 - 110] >> 7; - - if (isRgb) - j <<= 3; - - ((uint8_t *)table_r)[i] = j; - } - for (i = -132; i < 256+132; i++) { - int j = (table_Y[i+384 - 37]+ 43)/85; - - ((uint8_t *)table_g)[i] = j << 1; - } - for (i = -232; i < 256+232; i++) { - int j =table_Y[i+384 - 110] >> 7; - - if (!isRgb) - j <<= 3; - - ((uint8_t *)table_b)[i] = j; + rbase = isRgb ? 3 : 0; + gbase = 1; + bbase = isRgb ? 0 : 3; + c->yuvTable = av_malloc(1024*3); + y_table = c->yuvTable; + yb = -(384<<16) - oy; + for (i = 0; i < 1024-110; i++) { + int yval = av_clip_uint8((yb + 0x8000) >> 16); + y_table[i+110 ] = (yval >> 7) << rbase; + y_table[i+ 37+1024] = ((yval + 43) / 85) << gbase; + y_table[i+110+2048] = (yval >> 7) << bbase; + yb += cy; } + fill_table(c->table_rV, 1, crv, y_table + yoffs); + fill_table(c->table_gU, 1, cgu, y_table + yoffs + 1024); + fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2048); + fill_gv_table(c->table_gV, 1, cgv); break; - - case 1: - table_start= table_1 = av_malloc (256*2 * sizeof (uint8_t)); - - entry_size = sizeof (uint8_t); - table_g = table_1; - table_r = table_b = NULL; - - for (i = 0; i < 256+256; i++) { - int j = table_Y[i + 384 - 110]>>7; - - ((uint8_t *)table_g)[i] = j; + case 8: + rbase = isRgb ? 5 : 0; + gbase = isRgb ? 2 : 3; + bbase = isRgb ? 0 : 6; + c->yuvTable = av_malloc(1024*3); + y_table = c->yuvTable; + yb = -(384<<16) - oy; + for (i = 0; i < 1024-38; i++) { + int yval = av_clip_uint8((yb + 0x8000) >> 16); + y_table[i+16 ] = ((yval + 18) / 36) << rbase; + y_table[i+16+1024] = ((yval + 18) / 36) << gbase; + y_table[i+37+2048] = ((yval + 43) / 85) << bbase; + yb += cy; } + fill_table(c->table_rV, 1, crv, y_table + yoffs); + fill_table(c->table_gU, 1, cgu, y_table + yoffs + 1024); + fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2048); + fill_gv_table(c->table_gV, 1, cgv); + break; + case 15: + case 16: + rbase = isRgb ? bpp - 5 : 0; + gbase = 5; + bbase = isRgb ? 0 : (bpp - 5); + c->yuvTable = av_malloc(1024*3*2); + y_table16 = c->yuvTable; + yb = -(384<<16) - oy; + for (i = 0; i < 1024; i++) { + uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16); + y_table16[i ] = (yval >> 3) << rbase; + y_table16[i+1024] = (yval >> (18 - bpp)) << gbase; + y_table16[i+2048] = (yval >> 3) << bbase; + yb += cy; + } + fill_table(c->table_rV, 2, crv, y_table16 + yoffs); + fill_table(c->table_gU, 2, cgu, y_table16 + yoffs + 1024); + fill_table(c->table_bU, 2, cbu, y_table16 + yoffs + 2048); + fill_gv_table(c->table_gV, 2, cgv); + break; + case 24: + c->yuvTable = av_malloc(1024); + y_table = c->yuvTable; + yb = -(384<<16) - oy; + for (i = 0; i < 1024; i++) { + y_table[i] = av_clip_uint8((yb + 0x8000) >> 16); + yb += cy; + } + fill_table(c->table_rV, 1, crv, y_table + yoffs); + fill_table(c->table_gU, 1, cgu, y_table + yoffs); + fill_table(c->table_bU, 1, cbu, y_table + yoffs); + fill_gv_table(c->table_gV, 1, cgv); + break; + case 32: + base = (c->dstFormat == PIX_FMT_RGB32_1 || c->dstFormat == PIX_FMT_BGR32_1) ? 8 : 0; + rbase = base + (isRgb ? 16 : 0); + gbase = base + 8; + bbase = base + (isRgb ? 0 : 16); + abase = (base + 24) & 31; + c->yuvTable = av_malloc(1024*3*4); + y_table32 = c->yuvTable; + yb = -(384<<16) - oy; + for (i = 0; i < 1024; i++) { + uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16); + y_table32[i ] = (yval << rbase) + (255 << abase); + y_table32[i+1024] = yval << gbase; + y_table32[i+2048] = yval << bbase; + yb += cy; + } + fill_table(c->table_rV, 4, crv, y_table32 + yoffs); + fill_table(c->table_gU, 4, cgu, y_table32 + yoffs + 1024); + fill_table(c->table_bU, 4, cbu, y_table32 + yoffs + 2048); + fill_gv_table(c->table_gV, 4, cgv); break; - default: - table_start= NULL; + c->yuvTable = NULL; av_log(c, AV_LOG_ERROR, "%ibpp not supported by yuv2rgb\n", bpp); - //free mem? return -1; } - - for (i = 0; i < 256; i++) { - c->table_rV[i] = (uint8_t *)table_r + entry_size * div_round (crv * (i-128), 76309); - c->table_gU[i] = (uint8_t *)table_g + entry_size * div_round (cgu * (i-128), 76309); - c->table_gV[i] = entry_size * div_round (cgv * (i-128), 76309); - c->table_bU[i] = (uint8_t *)table_b + entry_size * div_round (cbu * (i-128), 76309); - } - - av_free(c->yuvTable); - c->yuvTable= table_start; return 0; } diff --git a/src/add-ons/media/plugins/avcodec/libswscale/yuv2rgb_altivec.c b/src/add-ons/media/plugins/avcodec/libswscale/yuv2rgb_altivec.c index 43d224edfd..b3a87a0360 100644 --- a/src/add-ons/media/plugins/avcodec/libswscale/yuv2rgb_altivec.c +++ b/src/add-ons/media/plugins/avcodec/libswscale/yuv2rgb_altivec.c @@ -5,18 +5,18 @@ * * This file is part of FFmpeg. * - * FFmpeg is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. * - * You should have received a copy of the GNU General Public License - * along with FFmpeg; if not, write to the Free Software + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -91,9 +91,6 @@ adjustment. #include #include #include "config.h" -#ifdef HAVE_MALLOC_H -#include -#endif #include "rgb2rgb.h" #include "swscale.h" #include "swscale_internal.h" @@ -154,7 +151,7 @@ const vector unsigned char #define vec_merge3(x2,x1,x0,y0,y1,y2) \ do { \ - typeof(x0) o0,o2,o3; \ + __typeof__(x0) o0,o2,o3; \ o0 = vec_mergeh (x0,x1); \ y0 = vec_perm (o0, x2, perm_rgb_0); \ o2 = vec_perm (o0, x2, perm_rgb_1); \ @@ -165,7 +162,7 @@ do { \ #define vec_mstbgr24(x0,x1,x2,ptr) \ do { \ - typeof(x0) _0,_1,_2; \ + __typeof__(x0) _0,_1,_2; \ vec_merge3 (x0,x1,x2,_0,_1,_2); \ vec_st (_0, 0, ptr++); \ vec_st (_1, 0, ptr++); \ @@ -174,7 +171,7 @@ do { \ #define vec_mstrgb24(x0,x1,x2,ptr) \ do { \ - typeof(x0) _0,_1,_2; \ + __typeof__(x0) _0,_1,_2; \ vec_merge3 (x2,x1,x0,_0,_1,_2); \ vec_st (_0, 0, ptr++); \ vec_st (_1, 0, ptr++); \ @@ -222,12 +219,12 @@ do { \ #define vec_unh(x) \ (vector signed short) \ - vec_perm(x,(typeof(x)){0}, \ + vec_perm(x,(__typeof__(x)){0}, \ ((vector unsigned char){0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03,\ 0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07})) #define vec_unl(x) \ (vector signed short) \ - vec_perm(x,(typeof(x)){0}, \ + vec_perm(x,(__typeof__(x)){0}, \ ((vector unsigned char){0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B,\ 0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F})) @@ -240,7 +237,7 @@ do { \ ((vector unsigned short)vec_max (x,((vector signed short) {0})), \ (vector unsigned short)vec_max (y,((vector signed short) {0}))) -//#define out_pixels(a,b,c,ptr) vec_mstrgb32(typeof(a),((typeof (a)){0}),a,a,a,ptr) +//#define out_pixels(a,b,c,ptr) vec_mstrgb32(__typeof__(a),((__typeof__ (a)){255}),a,a,a,ptr) static inline void cvtyuvtoRGB (SwsContext *c, @@ -441,10 +438,10 @@ static int altivec_##name (SwsContext *c, \ } -#define out_abgr(a,b,c,ptr) vec_mstrgb32(typeof(a),((typeof (a)){0}),c,b,a,ptr) -#define out_bgra(a,b,c,ptr) vec_mstrgb32(typeof(a),c,b,a,((typeof (a)){0}),ptr) -#define out_rgba(a,b,c,ptr) vec_mstrgb32(typeof(a),a,b,c,((typeof (a)){0}),ptr) -#define out_argb(a,b,c,ptr) vec_mstrgb32(typeof(a),((typeof (a)){0}),a,b,c,ptr) +#define out_abgr(a,b,c,ptr) vec_mstrgb32(__typeof__(a),((__typeof__ (a)){255}),c,b,a,ptr) +#define out_bgra(a,b,c,ptr) vec_mstrgb32(__typeof__(a),c,b,a,((__typeof__ (a)){255}),ptr) +#define out_rgba(a,b,c,ptr) vec_mstrgb32(__typeof__(a),a,b,c,((__typeof__ (a)){255}),ptr) +#define out_argb(a,b,c,ptr) vec_mstrgb32(__typeof__(a),((__typeof__ (a)){255}),a,b,c,ptr) #define out_rgb24(a,b,c,ptr) vec_mstrgb24(a,b,c,ptr) #define out_bgr24(a,b,c,ptr) vec_mstbgr24(a,b,c,ptr) @@ -693,7 +690,7 @@ static int altivec_uyvy_rgb32 (SwsContext *c, So we just fall back to the C codes for this. */ -SwsFunc yuv2rgb_init_altivec (SwsContext *c) +SwsFunc sws_yuv2rgb_init_altivec (SwsContext *c) { if (!(c->flags & SWS_CPU_CAPS_ALTIVEC)) return NULL; @@ -753,7 +750,7 @@ SwsFunc yuv2rgb_init_altivec (SwsContext *c) return NULL; } -void yuv2rgb_altivec_init_tables (SwsContext *c, const int inv_table[4],int brightness,int contrast, int saturation) +void sws_yuv2rgb_altivec_init_tables (SwsContext *c, const int inv_table[4],int brightness,int contrast, int saturation) { union { signed short tmp[8] __attribute__ ((aligned(16))); diff --git a/src/add-ons/media/plugins/avcodec/libswscale/yuv2rgb_bfin.c b/src/add-ons/media/plugins/avcodec/libswscale/yuv2rgb_bfin.c index 1500a96b25..58cc5b6a35 100644 --- a/src/add-ons/media/plugins/avcodec/libswscale/yuv2rgb_bfin.c +++ b/src/add-ons/media/plugins/avcodec/libswscale/yuv2rgb_bfin.c @@ -27,9 +27,6 @@ #include #include #include "config.h" -#ifdef HAVE_MALLOC_H -#include -#endif #include #include "rgb2rgb.h" #include "swscale.h" @@ -41,17 +38,17 @@ #define L1CODE #endif -extern void ff_bfin_yuv2rgb555_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out, - int w, uint32_t *coeffs) L1CODE; +void ff_bfin_yuv2rgb555_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out, + int w, uint32_t *coeffs) L1CODE; -extern void ff_bfin_yuv2rgb565_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out, - int w, uint32_t *coeffs) L1CODE; +void ff_bfin_yuv2rgb565_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out, + int w, uint32_t *coeffs) L1CODE; -extern void ff_bfin_yuv2rgb24_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out, - int w, uint32_t *coeffs) L1CODE; +void ff_bfin_yuv2rgb24_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out, + int w, uint32_t *coeffs) L1CODE; -typedef void (* ltransform_t)(uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out, - int w, uint32_t *coeffs); +typedef void (* ltransform)(uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out, + int w, uint32_t *coeffs); static void bfin_prepare_coefficients (SwsContext *c, int rgb, int masks) @@ -95,7 +92,7 @@ static int core_yuv420_rgb (SwsContext *c, uint8_t **in, int *instrides, int srcSliceY, int srcSliceH, uint8_t **oplanes, int *outstrides, - ltransform_t lcscf, int rgb, int masks) + ltransform lcscf, int rgb, int masks) { uint8_t *py,*pu,*pv,*op; int w = instrides[0]; diff --git a/src/add-ons/media/plugins/avcodec/libswscale/yuv2rgb_mlib.c b/src/add-ons/media/plugins/avcodec/libswscale/yuv2rgb_mlib.c index ff2e50a2b0..68247914e7 100644 --- a/src/add-ons/media/plugins/avcodec/libswscale/yuv2rgb_mlib.c +++ b/src/add-ons/media/plugins/avcodec/libswscale/yuv2rgb_mlib.c @@ -73,7 +73,7 @@ static int mlib_YUV2RGB420_24(SwsContext *c, uint8_t* src[], int srcStride[], in } -SwsFunc yuv2rgb_init_mlib(SwsContext *c) +SwsFunc sws_yuv2rgb_init_mlib(SwsContext *c) { switch(c->dstFormat){ case PIX_FMT_RGB24: return mlib_YUV2RGB420_24; diff --git a/src/add-ons/media/plugins/avcodec/libswscale/yuv2rgb_template.c b/src/add-ons/media/plugins/avcodec/libswscale/yuv2rgb_template.c index 1f8e225baa..f55568b0ab 100644 --- a/src/add-ons/media/plugins/avcodec/libswscale/yuv2rgb_template.c +++ b/src/add-ons/media/plugins/avcodec/libswscale/yuv2rgb_template.c @@ -1,7 +1,7 @@ /* * yuv2rgb_mmx.c, software YUV to RGB converter with Intel MMX "technology" * - * Copyright (C) 2000, Silicon Integrated System Corp. + * Copyright (C) 2000, Silicon Integrated System Corp * * Author: Olie Lho * @@ -30,14 +30,14 @@ #undef EMMS #undef SFENCE -#ifdef HAVE_3DNOW -/* On K6 femms is faster than emms. On K7 femms is directly mapped on emms. */ +#if HAVE_AMD3DNOW +/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */ #define EMMS "femms" #else #define EMMS "emms" #endif -#ifdef HAVE_MMX2 +#if HAVE_MMX2 #define MOVNTQ "movntq" #define SFENCE "sfence" #else @@ -121,53 +121,73 @@ "punpcklbw %%mm5, %%mm2;" /* G7 G6 G5 G4 G3 G2 G1 G0 */\ +#define YUV422_UNSHIFT \ + if(c->srcFormat == PIX_FMT_YUV422P){ \ + srcStride[1] *= 2; \ + srcStride[2] *= 2; \ + } \ + +#define YUV2RGB_LOOP(depth) \ + h_size= (c->dstW+7)&~7; \ + if(h_size*depth > FFABS(dstStride[0])) h_size-=8; \ +\ + __asm__ volatile ("pxor %mm4, %mm4;" /* zero mm4 */ ); \ + for (y= 0; y>1)*srcStride[1]; \ + uint8_t *pv = src[2] + (y>>1)*srcStride[2]; \ + long index= -h_size/2; \ + +#define YUV2RGB_INIT \ + /* This MMX assembly code deals with a SINGLE scan line at a time, \ + * it converts 8 pixels in each iteration. */ \ + __asm__ volatile ( \ + /* load data for start of next scan line */ \ + "movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ \ + "movd (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ \ + "movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ \ + /* \ + ".balign 16 \n\t" \ + */ \ + "1: \n\t" \ + /* No speed difference on my p3@500 with prefetch, \ + * if it is faster for anyone with -benchmark then tell me. \ + PREFETCH" 64(%0) \n\t" \ + PREFETCH" 64(%1) \n\t" \ + PREFETCH" 64(%2) \n\t" \ + */ \ + +#define YUV2RGB_ENDLOOP(depth) \ + "add $"AV_STRINGIFY(depth*8)", %1 \n\t" \ + "add $4, %0 \n\t" \ + " js 1b \n\t" \ +\ + : "+r" (index), "+r" (image) \ + : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), "r" (py - 2*index) \ + ); \ + } \ + __asm__ volatile (EMMS); \ + return srcSliceH; \ + static inline int RENAME(yuv420_rgb16)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]){ int y, h_size; - if(c->srcFormat == PIX_FMT_YUV422P){ - srcStride[1] *= 2; - srcStride[2] *= 2; - } + YUV422_UNSHIFT + YUV2RGB_LOOP(2) - h_size= (c->dstW+7)&~7; - if(h_size*2 > FFABS(dstStride[0])) h_size-=8; + c->blueDither= ff_dither8[y&1]; + c->greenDither= ff_dither4[y&1]; + c->redDither= ff_dither8[(y+1)&1]; - asm volatile ("pxor %mm4, %mm4;" /* zero mm4 */ ); - //printf("%X %X %X %X %X %X %X %X %X %X\n", (int)&c->redDither, (int)&b5Dither, (int)src[0], (int)src[1], (int)src[2], (int)dst[0], - //srcStride[0],srcStride[1],srcStride[2],dstStride[0]); - for (y= 0; y>1)*srcStride[1]; - uint8_t *pv = src[2] + (y>>1)*srcStride[2]; - long index= -h_size/2; - - b5Dither= ff_dither8[y&1]; - g6Dither= ff_dither4[y&1]; - g5Dither= ff_dither8[y&1]; - r5Dither= ff_dither8[(y+1)&1]; - /* This MMX assembly code deals with a SINGLE scan line at a time, - * it converts 8 pixels in each iteration. */ - asm volatile ( - /* load data for start of next scan line */ - "movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ - "movd (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ - "movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ - //".balign 16 \n\t" - "1: \n\t" - /* No speed difference on my p3@500 with prefetch, - * if it is faster for anyone with -benchmark then tell me. - PREFETCH" 64(%0) \n\t" - PREFETCH" 64(%1) \n\t" - PREFETCH" 64(%2) \n\t" - */ -YUV2RGB + YUV2RGB_INIT + YUV2RGB #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm0;" - "paddusb "MANGLE(g6Dither)", %%mm2;" - "paddusb "MANGLE(r5Dither)", %%mm1;" + "paddusb "BLUE_DITHER"(%4), %%mm0;" + "paddusb "GREEN_DITHER"(%4), %%mm2;" + "paddusb "RED_DITHER"(%4), %%mm1;" #endif /* mask unneeded bits off */ "pand "MANGLE(mmx_redmask)", %%mm0;" /* b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0 */ @@ -202,61 +222,27 @@ YUV2RGB MOVNTQ " %%mm5, 8 (%1);" /* store pixel 4-7 */ - "add $16, %1 \n\t" - "add $4, %0 \n\t" - " js 1b \n\t" - - : "+r" (index), "+r" (image) - : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), "r" (py - 2*index) - ); - } - - asm volatile (EMMS); - - return srcSliceH; + YUV2RGB_ENDLOOP(2) } static inline int RENAME(yuv420_rgb15)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]){ int y, h_size; - if(c->srcFormat == PIX_FMT_YUV422P){ - srcStride[1] *= 2; - srcStride[2] *= 2; - } + YUV422_UNSHIFT + YUV2RGB_LOOP(2) - h_size= (c->dstW+7)&~7; - if(h_size*2 > FFABS(dstStride[0])) h_size-=8; + c->blueDither= ff_dither8[y&1]; + c->greenDither= ff_dither8[y&1]; + c->redDither= ff_dither8[(y+1)&1]; - asm volatile ("pxor %mm4, %mm4;" /* zero mm4 */ ); - //printf("%X %X %X %X %X %X %X %X %X %X\n", (int)&c->redDither, (int)&b5Dither, (int)src[0], (int)src[1], (int)src[2], (int)dst[0], - //srcStride[0],srcStride[1],srcStride[2],dstStride[0]); - for (y= 0; y>1)*srcStride[1]; - uint8_t *pv = src[2] + (y>>1)*srcStride[2]; - long index= -h_size/2; - - b5Dither= ff_dither8[y&1]; - g6Dither= ff_dither4[y&1]; - g5Dither= ff_dither8[y&1]; - r5Dither= ff_dither8[(y+1)&1]; - /* This MMX assembly code deals with a SINGLE scan line at a time, - * it converts 8 pixels in each iteration. */ - asm volatile ( - /* load data for start of next scan line */ - "movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ - "movd (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ - "movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ - //".balign 16 \n\t" - "1: \n\t" -YUV2RGB + YUV2RGB_INIT + YUV2RGB #ifdef DITHER1XBPP - "paddusb "MANGLE(b5Dither)", %%mm0 \n\t" - "paddusb "MANGLE(g5Dither)", %%mm2 \n\t" - "paddusb "MANGLE(r5Dither)", %%mm1 \n\t" + "paddusb "BLUE_DITHER"(%4), %%mm0 \n\t" + "paddusb "GREEN_DITHER"(%4), %%mm2 \n\t" + "paddusb "RED_DITHER"(%4), %%mm1 \n\t" #endif /* mask unneeded bits off */ @@ -293,51 +279,20 @@ YUV2RGB MOVNTQ " %%mm5, 8 (%1);" /* store pixel 4-7 */ - "add $16, %1 \n\t" - "add $4, %0 \n\t" - " js 1b \n\t" - : "+r" (index), "+r" (image) - : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), "r" (py - 2*index) - ); - } - - asm volatile (EMMS); - return srcSliceH; + YUV2RGB_ENDLOOP(2) } static inline int RENAME(yuv420_rgb24)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]){ int y, h_size; - if(c->srcFormat == PIX_FMT_YUV422P){ - srcStride[1] *= 2; - srcStride[2] *= 2; - } + YUV422_UNSHIFT + YUV2RGB_LOOP(3) - h_size= (c->dstW+7)&~7; - if(h_size*3 > FFABS(dstStride[0])) h_size-=8; - - asm volatile ("pxor %mm4, %mm4;" /* zero mm4 */ ); - - for (y= 0; y>1)*srcStride[1]; - uint8_t *pv = src[2] + (y>>1)*srcStride[2]; - long index= -h_size/2; - - /* This MMX assembly code deals with a SINGLE scan line at a time, - * it converts 8 pixels in each iteration. */ - asm volatile ( - /* load data for start of next scan line */ - "movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ - "movd (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ - "movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ - //".balign 16 \n\t" - "1: \n\t" -YUV2RGB + YUV2RGB_INIT + YUV2RGB /* mm0=B, %%mm2=G, %%mm1=R */ -#ifdef HAVE_MMX2 +#if HAVE_MMX2 "movq "MANGLE(ff_M24A)", %%mm4 \n\t" "movq "MANGLE(ff_M24C)", %%mm7 \n\t" "pshufw $0x50, %%mm0, %%mm5 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */ @@ -438,101 +393,61 @@ YUV2RGB "pxor %%mm4, %%mm4 \n\t" #endif - "add $24, %1 \n\t" - "add $4, %0 \n\t" - " js 1b \n\t" - - : "+r" (index), "+r" (image) - : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), "r" (py - 2*index) - ); - } - - asm volatile (EMMS); - return srcSliceH; + YUV2RGB_ENDLOOP(3) } +#define RGB_PLANAR2PACKED32 \ + /* convert RGB plane to RGB packed format, \ + mm0 -> B, mm1 -> R, mm2 -> G, mm3 -> A, \ + mm4 -> GB, mm5 -> AR pixel 4-7, \ + mm6 -> GB, mm7 -> AR pixel 0-3 */ \ + "movq %%mm0, %%mm6;" /* B7 B6 B5 B4 B3 B2 B1 B0 */ \ + "movq %%mm1, %%mm7;" /* R7 R6 R5 R4 R3 R2 R1 R0 */ \ +\ + "movq %%mm0, %%mm4;" /* B7 B6 B5 B4 B3 B2 B1 B0 */ \ + "movq %%mm1, %%mm5;" /* R7 R6 R5 R4 R3 R2 R1 R0 */ \ +\ + "punpcklbw %%mm2, %%mm6;" /* G3 B3 G2 B2 G1 B1 G0 B0 */ \ + "punpcklbw %%mm3, %%mm7;" /* A3 R3 A2 R2 A1 R1 A0 R0 */ \ +\ + "punpcklwd %%mm7, %%mm6;" /* A1 R1 B1 G1 A0 R0 B0 G0 */ \ + MOVNTQ " %%mm6, (%1);" /* Store ARGB1 ARGB0 */ \ +\ + "movq %%mm0, %%mm6;" /* B7 B6 B5 B4 B3 B2 B1 B0 */ \ + "punpcklbw %%mm2, %%mm6;" /* G3 B3 G2 B2 G1 B1 G0 B0 */ \ +\ + "punpckhwd %%mm7, %%mm6;" /* A3 R3 G3 B3 A2 R2 B3 G2 */ \ + MOVNTQ " %%mm6, 8 (%1);" /* Store ARGB3 ARGB2 */ \ +\ + "punpckhbw %%mm2, %%mm4;" /* G7 B7 G6 B6 G5 B5 G4 B4 */ \ + "punpckhbw %%mm3, %%mm5;" /* A7 R7 A6 R6 A5 R5 A4 R4 */ \ +\ + "punpcklwd %%mm5, %%mm4;" /* A5 R5 B5 G5 A4 R4 B4 G4 */ \ + MOVNTQ " %%mm4, 16 (%1);" /* Store ARGB5 ARGB4 */ \ +\ + "movq %%mm0, %%mm4;" /* B7 B6 B5 B4 B3 B2 B1 B0 */ \ + "punpckhbw %%mm2, %%mm4;" /* G7 B7 G6 B6 G5 B5 G4 B4 */ \ +\ + "punpckhwd %%mm5, %%mm4;" /* A7 R7 G7 B7 A6 R6 B6 G6 */ \ + MOVNTQ " %%mm4, 24 (%1);" /* Store ARGB7 ARGB6 */ \ +\ + "movd 4 (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ \ + "movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ \ +\ + "pxor %%mm4, %%mm4;" /* zero mm4 */ \ + "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ \ + static inline int RENAME(yuv420_rgb32)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]){ int y, h_size; - if(c->srcFormat == PIX_FMT_YUV422P){ - srcStride[1] *= 2; - srcStride[2] *= 2; - } + YUV422_UNSHIFT + YUV2RGB_LOOP(4) - h_size= (c->dstW+7)&~7; - if(h_size*4 > FFABS(dstStride[0])) h_size-=8; + YUV2RGB_INIT + YUV2RGB + "pcmpeqd %%mm3, %%mm3;" /* fill mm3 */ + RGB_PLANAR2PACKED32 - asm volatile ("pxor %mm4, %mm4;" /* zero mm4 */ ); - - for (y= 0; y>1)*srcStride[1]; - uint8_t *pv = src[2] + (y>>1)*srcStride[2]; - long index= -h_size/2; - - /* This MMX assembly code deals with a SINGLE scan line at a time, - * it converts 8 pixels in each iteration. */ - asm volatile ( - /* load data for start of next scan line */ - "movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ - "movd (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ - "movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ - //".balign 16 \n\t" - "1: \n\t" -YUV2RGB - /* convert RGB plane to RGB packed format, - mm0 -> B, mm1 -> R, mm2 -> G, mm3 -> 0, - mm4 -> GB, mm5 -> AR pixel 4-7, - mm6 -> GB, mm7 -> AR pixel 0-3 */ - "pxor %%mm3, %%mm3;" /* zero mm3 */ - - "movq %%mm0, %%mm6;" /* B7 B6 B5 B4 B3 B2 B1 B0 */ - "movq %%mm1, %%mm7;" /* R7 R6 R5 R4 R3 R2 R1 R0 */ - - "movq %%mm0, %%mm4;" /* B7 B6 B5 B4 B3 B2 B1 B0 */ - "movq %%mm1, %%mm5;" /* R7 R6 R5 R4 R3 R2 R1 R0 */ - - "punpcklbw %%mm2, %%mm6;" /* G3 B3 G2 B2 G1 B1 G0 B0 */ - "punpcklbw %%mm3, %%mm7;" /* 00 R3 00 R2 00 R1 00 R0 */ - - "punpcklwd %%mm7, %%mm6;" /* 00 R1 B1 G1 00 R0 B0 G0 */ - MOVNTQ " %%mm6, (%1);" /* Store ARGB1 ARGB0 */ - - "movq %%mm0, %%mm6;" /* B7 B6 B5 B4 B3 B2 B1 B0 */ - "punpcklbw %%mm2, %%mm6;" /* G3 B3 G2 B2 G1 B1 G0 B0 */ - - "punpckhwd %%mm7, %%mm6;" /* 00 R3 G3 B3 00 R2 B3 G2 */ - MOVNTQ " %%mm6, 8 (%1);" /* Store ARGB3 ARGB2 */ - - "punpckhbw %%mm2, %%mm4;" /* G7 B7 G6 B6 G5 B5 G4 B4 */ - "punpckhbw %%mm3, %%mm5;" /* 00 R7 00 R6 00 R5 00 R4 */ - - "punpcklwd %%mm5, %%mm4;" /* 00 R5 B5 G5 00 R4 B4 G4 */ - MOVNTQ " %%mm4, 16 (%1);" /* Store ARGB5 ARGB4 */ - - "movq %%mm0, %%mm4;" /* B7 B6 B5 B4 B3 B2 B1 B0 */ - "punpckhbw %%mm2, %%mm4;" /* G7 B7 G6 B6 G5 B5 G4 B4 */ - - "punpckhwd %%mm5, %%mm4;" /* 00 R7 G7 B7 00 R6 B6 G6 */ - MOVNTQ " %%mm4, 24 (%1);" /* Store ARGB7 ARGB6 */ - - "movd 4 (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ - "movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ - - "pxor %%mm4, %%mm4;" /* zero mm4 */ - "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ - - "add $32, %1 \n\t" - "add $4, %0 \n\t" - " js 1b \n\t" - - : "+r" (index), "+r" (image) - : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), "r" (py - 2*index) - ); - } - - asm volatile (EMMS); - return srcSliceH; + YUV2RGB_ENDLOOP(4) } diff --git a/src/add-ons/media/plugins/avcodec/libswscale/yuv2rgb_vis.c b/src/add-ons/media/plugins/avcodec/libswscale/yuv2rgb_vis.c index 120fa56c71..2e2737aa9f 100644 --- a/src/add-ons/media/plugins/avcodec/libswscale/yuv2rgb_vis.c +++ b/src/add-ons/media/plugins/avcodec/libswscale/yuv2rgb_vis.c @@ -80,12 +80,13 @@ +// FIXME: must be changed to set alpha to 255 instead of 0 static int vis_420P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]){ int y, out1, out2, out3, out4, out5, out6; for(y=0;y < srcSliceH;++y) { - asm volatile ( + __asm__ volatile ( YUV2RGB_INIT "wr %%g0, 0xd2, %%asi \n\t" /* ASI_FL16_P */ "1: \n\t" @@ -131,12 +132,13 @@ static int vis_420P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int s return srcSliceH; } +// FIXME: must be changed to set alpha to 255 instead of 0 static int vis_422P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]){ int y, out1, out2, out3, out4, out5, out6; for(y=0;y < srcSliceH;++y) { - asm volatile ( + __asm__ volatile ( YUV2RGB_INIT "wr %%g0, 0xd2, %%asi \n\t" /* ASI_FL16_P */ "1: \n\t" @@ -182,7 +184,7 @@ static int vis_422P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int s return srcSliceH; } -SwsFunc yuv2rgb_init_vis(SwsContext *c) { +SwsFunc sws_yuv2rgb_init_vis(SwsContext *c) { c->sparc_coeffs[5]=c->yCoeff; c->sparc_coeffs[6]=c->vgCoeff; c->sparc_coeffs[7]=c->vrCoeff; @@ -196,11 +198,11 @@ SwsFunc yuv2rgb_init_vis(SwsContext *c) { c->sparc_coeffs[4]=(((int16_t)c->vOffset*(int16_t)c->vrCoeff>>11) & 0xffff) * 0x0001000100010001ULL; if (c->dstFormat == PIX_FMT_RGB32 && c->srcFormat == PIX_FMT_YUV422P && (c->dstW & 7)==0) { - av_log(c, AV_LOG_INFO, "SPARC VIS accelerated YUV422P -> RGB32\n"); + av_log(c, AV_LOG_INFO, "SPARC VIS accelerated YUV422P -> RGB32 (WARNING: alpha value is wrong)\n"); return vis_422P_ARGB32; } else if (c->dstFormat == PIX_FMT_RGB32 && c->srcFormat == PIX_FMT_YUV420P && (c->dstW & 7)==0) { - av_log(c, AV_LOG_INFO, "SPARC VIS accelerated YUV420P -> RGB32\n"); + av_log(c, AV_LOG_INFO, "SPARC VIS accelerated YUV420P -> RGB32 (WARNING: alpha value is wrong)\n"); return vis_420P_ARGB32; } return NULL;