sync with ffmpeg 0.5 release

git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@29534 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
David McPaul 2009-03-15 01:34:21 +00:00
parent b38dbcd92d
commit 6608bd7c78
19 changed files with 2804 additions and 3429 deletions

View File

@ -1,5 +1,6 @@
SubDir HAIKU_TOP src add-ons media plugins avcodec libswscale ;
SubDirHdrs [ FDirName $(SUBDIR) .. ] ;
SubDirHdrs [ FDirName $(SUBDIR) ../libavutil ] ;
SubDirHdrs [ FDirName $(SUBDIR) ../libavcodec ] ;
@ -8,7 +9,7 @@ TARGET_WARNING_CCFLAGS = [ FFilter $(TARGET_WARNING_CCFLAGS)
: -Wall -Wmissing-prototypes -Wsign-compare -Wpointer-arith ] ;
SubDirCcFlags -fomit-frame-pointer -DPIC ;
SubDirCcFlags -DHAVE_AV_CONFIG_H=1 ;
#SubDirCcFlags -DHAVE_AV_CONFIG_H=1 ;
StaticLibrary libswscale.a :
rgb2rgb.c

View File

@ -1,175 +0,0 @@
/*
* Copyright (C) 2002 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdio.h>
#include <string.h> /* for memset() */
#include <unistd.h>
#include <stdlib.h>
#include <inttypes.h>
#include "swscale.h"
#include "rgb2rgb.h"
#define SIZE 1000
#define srcByte 0x55
#define dstByte 0xBB
#define FUNC(s,d,n) {s,d,#n,n}
static int cpu_caps;
static char *args_parse(int argc, char *argv[])
{
int o;
while ((o = getopt(argc, argv, "m23")) != -1) {
switch (o) {
case 'm':
cpu_caps |= SWS_CPU_CAPS_MMX;
break;
case '2':
cpu_caps |= SWS_CPU_CAPS_MMX2;
break;
case '3':
cpu_caps |= SWS_CPU_CAPS_3DNOW;
break;
default:
av_log(NULL, AV_LOG_ERROR, "Unknown option %c\n", o);
}
}
return argv[optind];
}
int main(int argc, char **argv)
{
int i, funcNum;
uint8_t *srcBuffer= (uint8_t*)av_malloc(SIZE);
uint8_t *dstBuffer= (uint8_t*)av_malloc(SIZE);
int failedNum=0;
int passedNum=0;
av_log(NULL, AV_LOG_INFO, "memory corruption test ...\n");
args_parse(argc, argv);
av_log(NULL, AV_LOG_INFO, "CPU capabilities forced to %x\n", cpu_caps);
sws_rgb2rgb_init(cpu_caps);
for(funcNum=0; ; funcNum++){
struct func_info_s {
int src_bpp;
int dst_bpp;
char *name;
void (*func)(const uint8_t *src, uint8_t *dst, long src_size);
} func_info[] = {
FUNC(2, 2, rgb15to16),
FUNC(2, 3, rgb15to24),
FUNC(2, 4, rgb15to32),
FUNC(2, 3, rgb16to24),
FUNC(2, 4, rgb16to32),
FUNC(3, 2, rgb24to15),
FUNC(3, 2, rgb24to16),
FUNC(3, 4, rgb24to32),
FUNC(4, 2, rgb32to15),
FUNC(4, 2, rgb32to16),
FUNC(4, 3, rgb32to24),
FUNC(2, 2, rgb16to15),
FUNC(2, 2, rgb15tobgr15),
FUNC(2, 2, rgb15tobgr16),
FUNC(2, 3, rgb15tobgr24),
FUNC(2, 4, rgb15tobgr32),
FUNC(2, 2, rgb16tobgr15),
FUNC(2, 2, rgb16tobgr16),
FUNC(2, 3, rgb16tobgr24),
FUNC(2, 4, rgb16tobgr32),
FUNC(3, 2, rgb24tobgr15),
FUNC(3, 2, rgb24tobgr16),
FUNC(3, 3, rgb24tobgr24),
FUNC(3, 4, rgb24tobgr32),
FUNC(4, 2, rgb32tobgr15),
FUNC(4, 2, rgb32tobgr16),
FUNC(4, 3, rgb32tobgr24),
FUNC(4, 4, rgb32tobgr32),
FUNC(0, 0, NULL)
};
int width;
int failed=0;
int srcBpp=0;
int dstBpp=0;
if (!func_info[funcNum].func) break;
av_log(NULL, AV_LOG_INFO,".");
memset(srcBuffer, srcByte, SIZE);
for(width=63; width>0; width--){
int dstOffset;
for(dstOffset=128; dstOffset<196; dstOffset+=4){
int srcOffset;
memset(dstBuffer, dstByte, SIZE);
for(srcOffset=128; srcOffset<196; srcOffset+=4){
uint8_t *src= srcBuffer+srcOffset;
uint8_t *dst= dstBuffer+dstOffset;
char *name=NULL;
if(failed) break; //don't fill the screen with shit ...
srcBpp = func_info[funcNum].src_bpp;
dstBpp = func_info[funcNum].dst_bpp;
name = func_info[funcNum].name;
func_info[funcNum].func(src, dst, width*srcBpp);
if(!srcBpp) break;
for(i=0; i<SIZE; i++){
if(srcBuffer[i]!=srcByte){
av_log(NULL, AV_LOG_INFO, "src damaged at %d w:%d src:%d dst:%d %s\n",
i, width, srcOffset, dstOffset, name);
failed=1;
break;
}
}
for(i=0; i<dstOffset; i++){
if(dstBuffer[i]!=dstByte){
av_log(NULL, AV_LOG_INFO, "dst damaged at %d w:%d src:%d dst:%d %s\n",
i, width, srcOffset, dstOffset, name);
failed=1;
break;
}
}
for(i=dstOffset + width*dstBpp; i<SIZE; i++){
if(dstBuffer[i]!=dstByte){
av_log(NULL, AV_LOG_INFO, "dst damaged at %d w:%d src:%d dst:%d %s\n",
i, width, srcOffset, dstOffset, name);
failed=1;
break;
}
}
}
}
}
if(failed) failedNum++;
else if(srcBpp) passedNum++;
}
av_log(NULL, AV_LOG_INFO, "\n%d converters passed, %d converters randomly overwrote memory\n", passedNum, failedNum);
return failedNum;
}

View File

@ -27,32 +27,30 @@
*/
#include <inttypes.h>
#include "config.h"
#include "x86_cpu.h"
#include "bswap.h"
#include "libavutil/x86_cpu.h"
#include "libavutil/bswap.h"
#include "rgb2rgb.h"
#include "swscale.h"
#include "swscale_internal.h"
#define FAST_BGR2YV12 // use 7-bit instead of 15-bit coefficients
void (*rgb24to32)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb32to24)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb15to24)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb16to24)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size);
//void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
//void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
@ -65,6 +63,9 @@ void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc
void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
long width, long height,
long lumStride, long chromStride, long dstStride);
void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
long width, long height,
long lumStride, long chromStride, long dstStride);
void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
long width, long height,
long lumStride, long chromStride, long srcStride);
@ -87,12 +88,13 @@ void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *sr
long srcStride1, long srcStride2,
long srcStride3, long dstStride);
#if defined(ARCH_X86) && defined(CONFIG_GPL)
#if ARCH_X86 && CONFIG_GPL
DECLARE_ASM_CONST(8, uint64_t, mmx_null) = 0x0000000000000000ULL;
DECLARE_ASM_CONST(8, uint64_t, mmx_one) = 0xFFFFFFFFFFFFFFFFULL;
DECLARE_ASM_CONST(8, uint64_t, mask32b) = 0x000000FF000000FFULL;
DECLARE_ASM_CONST(8, uint64_t, mask32g) = 0x0000FF000000FF00ULL;
DECLARE_ASM_CONST(8, uint64_t, mask32r) = 0x00FF000000FF0000ULL;
DECLARE_ASM_CONST(8, uint64_t, mask32a) = 0xFF000000FF000000ULL;
DECLARE_ASM_CONST(8, uint64_t, mask32) = 0x00FFFFFF00FFFFFFULL;
DECLARE_ASM_CONST(8, uint64_t, mask3216br) = 0x00F800F800F800F8ULL;
DECLARE_ASM_CONST(8, uint64_t, mask3216g) = 0x0000FC000000FC00ULL;
@ -121,22 +123,7 @@ DECLARE_ASM_CONST(8, uint64_t, blue_16mask) = 0x0000001f0000001fULL;
DECLARE_ASM_CONST(8, uint64_t, red_15mask) = 0x00007c0000007c00ULL;
DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL;
DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL;
#if 0
static volatile uint64_t __attribute__((aligned(8))) b5Dither;
static volatile uint64_t __attribute__((aligned(8))) g5Dither;
static volatile uint64_t __attribute__((aligned(8))) g6Dither;
static volatile uint64_t __attribute__((aligned(8))) r5Dither;
static uint64_t __attribute__((aligned(8))) dither4[2]={
0x0103010301030103LL,
0x0200020002000200LL,};
static uint64_t __attribute__((aligned(8))) dither8[2]={
0x0602060206020602LL,
0x0004000400040004LL,};
#endif
#endif /* defined(ARCH_X86) */
#endif /* ARCH_X86 */
#define RGB2YUV_SHIFT 8
#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
@ -153,37 +140,37 @@ static uint64_t __attribute__((aligned(8))) dither8[2]={
//plain C versions
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
#undef HAVE_AMD3DNOW
#undef HAVE_SSE2
#define HAVE_MMX 0
#define HAVE_MMX2 0
#define HAVE_AMD3DNOW 0
#define HAVE_SSE2 0
#define RENAME(a) a ## _C
#include "rgb2rgb_template.c"
#if defined(ARCH_X86) && defined(CONFIG_GPL)
#if ARCH_X86 && CONFIG_GPL
//MMX versions
#undef RENAME
#define HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
#undef HAVE_SSE2
#undef HAVE_MMX
#define HAVE_MMX 1
#define RENAME(a) a ## _MMX
#include "rgb2rgb_template.c"
//MMX2 versions
#undef RENAME
#define HAVE_MMX
#define HAVE_MMX2
#undef HAVE_3DNOW
#undef HAVE_SSE2
#undef HAVE_MMX2
#define HAVE_MMX2 1
#define RENAME(a) a ## _MMX2
#include "rgb2rgb_template.c"
//3DNOW versions
#undef RENAME
#define HAVE_MMX
#undef HAVE_MMX2
#define HAVE_3DNOW
#undef HAVE_SSE2
#undef HAVE_AMD3DNOW
#define HAVE_MMX2 0
#define HAVE_AMD3DNOW 1
#define RENAME(a) a ## _3DNOW
#include "rgb2rgb_template.c"
@ -197,7 +184,7 @@ static uint64_t __attribute__((aligned(8))) dither8[2]={
*/
void sws_rgb2rgb_init(int flags){
#if (defined(HAVE_MMX2) || defined(HAVE_3DNOW) || defined(HAVE_MMX)) && defined(CONFIG_GPL)
#if (HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX) && CONFIG_GPL
if (flags & SWS_CPU_CAPS_MMX2)
rgb2rgb_init_MMX2();
else if (flags & SWS_CPU_CAPS_3DNOW)
@ -205,89 +192,28 @@ void sws_rgb2rgb_init(int flags){
else if (flags & SWS_CPU_CAPS_MMX)
rgb2rgb_init_MMX();
else
#endif /* defined(HAVE_MMX2) || defined(HAVE_3DNOW) || defined(HAVE_MMX) */
#endif /* HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX */
rgb2rgb_init_C();
}
/**
* Palette is assumed to contain BGR32.
* Convert the palette to the same packet 32-bit format as the palette
*/
void palette8torgb32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
void palette8topacked32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
{
long i;
/*
for (i=0; i<num_pixels; i++)
((unsigned *)dst)[i] = ((unsigned *)palette)[src[i]];
*/
for (i=0; i<num_pixels; i++)
{
#ifdef WORDS_BIGENDIAN
dst[3]= palette[src[i]*4+2];
dst[2]= palette[src[i]*4+1];
dst[1]= palette[src[i]*4+0];
#else
//FIXME slow?
dst[0]= palette[src[i]*4+2];
dst[1]= palette[src[i]*4+1];
dst[2]= palette[src[i]*4+0];
//dst[3]= 0; /* do we need this cleansing? */
#endif
dst+= 4;
}
}
void palette8tobgr32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
{
long i;
for (i=0; i<num_pixels; i++)
{
#ifdef WORDS_BIGENDIAN
dst[3]= palette[src[i]*4+0];
dst[2]= palette[src[i]*4+1];
dst[1]= palette[src[i]*4+2];
#else
//FIXME slow?
dst[0]= palette[src[i]*4+0];
dst[1]= palette[src[i]*4+1];
dst[2]= palette[src[i]*4+2];
//dst[3]= 0; /* do we need this cleansing? */
#endif
dst+= 4;
}
((uint32_t *) dst)[i] = ((const uint32_t *) palette)[src[i]];
}
/**
* Palette is assumed to contain BGR32.
* Palette format: ABCD -> dst format: ABC
*/
void palette8torgb24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
void palette8topacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
{
long i;
/*
Writes 1 byte too much and might cause alignment issues on some architectures?
for (i=0; i<num_pixels; i++)
((unsigned *)(&dst[i*3])) = ((unsigned *)palette)[src[i]];
*/
for (i=0; i<num_pixels; i++)
{
//FIXME slow?
dst[0]= palette[src[i]*4+2];
dst[1]= palette[src[i]*4+1];
dst[2]= palette[src[i]*4+0];
dst+= 3;
}
}
void palette8tobgr24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
{
long i;
/*
Writes 1 byte too much and might cause alignment issues on some architectures?
for (i=0; i<num_pixels; i++)
((unsigned *)(&dst[i*3])) = ((unsigned *)palette)[src[i]];
*/
for (i=0; i<num_pixels; i++)
{
//FIXME slow?
@ -330,7 +256,7 @@ void palette8tobgr15(const uint8_t *src, uint8_t *dst, long num_pixels, const ui
((uint16_t *)dst)[i] = bswap_16(((const uint16_t *)palette)[src[i]]);
}
void rgb32tobgr24(const uint8_t *src, uint8_t *dst, long src_size)
void rgb32to24(const uint8_t *src, uint8_t *dst, long src_size)
{
long i;
long num_pixels = src_size >> 2;
@ -349,14 +275,14 @@ void rgb32tobgr24(const uint8_t *src, uint8_t *dst, long src_size)
}
}
void rgb24tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
void rgb24to32(const uint8_t *src, uint8_t *dst, long src_size)
{
long i;
for (i=0; 3*i<src_size; i++)
{
#ifdef WORDS_BIGENDIAN
/* RGB24 (= R,G,B) -> BGR32 (= A,R,G,B) */
dst[4*i + 0] = 0;
dst[4*i + 0] = 255;
dst[4*i + 1] = src[3*i + 0];
dst[4*i + 2] = src[3*i + 1];
dst[4*i + 3] = src[3*i + 2];
@ -364,7 +290,7 @@ void rgb24tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
dst[4*i + 0] = src[3*i + 2];
dst[4*i + 1] = src[3*i + 1];
dst[4*i + 2] = src[3*i + 0];
dst[4*i + 3] = 0;
dst[4*i + 3] = 255;
#endif
}
}
@ -380,7 +306,7 @@ void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
register uint16_t bgr;
bgr = *s++;
#ifdef WORDS_BIGENDIAN
*d++ = 0;
*d++ = 255;
*d++ = (bgr&0x1F)<<3;
*d++ = (bgr&0x7E0)>>3;
*d++ = (bgr&0xF800)>>8;
@ -388,12 +314,12 @@ void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
*d++ = (bgr&0xF800)>>8;
*d++ = (bgr&0x7E0)>>3;
*d++ = (bgr&0x1F)<<3;
*d++ = 0;
*d++ = 255;
#endif
}
}
void rgb16tobgr24(const uint8_t *src, uint8_t *dst, long src_size)
void rgb16to24(const uint8_t *src, uint8_t *dst, long src_size)
{
const uint16_t *end;
uint8_t *d = dst;
@ -416,13 +342,8 @@ void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
for (i=0; i<num_pixels; i++)
{
unsigned b,g,r;
register uint16_t rgb;
rgb = src[2*i];
r = rgb&0x1F;
g = (rgb&0x7E0)>>5;
b = (rgb&0xF800)>>11;
dst[2*i] = (b&0x1F) | ((g&0x3F)<<5) | ((r&0x1F)<<11);
unsigned rgb = ((const uint16_t*)src)[i];
((uint16_t*)dst)[i] = (rgb>>11) | (rgb&0x7E0) | (rgb<<11);
}
}
@ -433,13 +354,8 @@ void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
for (i=0; i<num_pixels; i++)
{
unsigned b,g,r;
register uint16_t rgb;
rgb = src[2*i];
r = rgb&0x1F;
g = (rgb&0x7E0)>>5;
b = (rgb&0xF800)>>11;
dst[2*i] = (b&0x1F) | ((g&0x1F)<<5) | ((r&0x1F)<<10);
unsigned rgb = ((const uint16_t*)src)[i];
((uint16_t*)dst)[i] = (rgb>>11) | ((rgb&0x7C0)>>1) | ((rgb&0x1F)<<10);
}
}
@ -454,7 +370,7 @@ void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
register uint16_t bgr;
bgr = *s++;
#ifdef WORDS_BIGENDIAN
*d++ = 0;
*d++ = 255;
*d++ = (bgr&0x1F)<<3;
*d++ = (bgr&0x3E0)>>2;
*d++ = (bgr&0x7C00)>>7;
@ -462,12 +378,12 @@ void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
*d++ = (bgr&0x7C00)>>7;
*d++ = (bgr&0x3E0)>>2;
*d++ = (bgr&0x1F)<<3;
*d++ = 0;
*d++ = 255;
#endif
}
}
void rgb15tobgr24(const uint8_t *src, uint8_t *dst, long src_size)
void rgb15to24(const uint8_t *src, uint8_t *dst, long src_size)
{
const uint16_t *end;
uint8_t *d = dst;
@ -490,13 +406,8 @@ void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
for (i=0; i<num_pixels; i++)
{
unsigned b,g,r;
register uint16_t rgb;
rgb = src[2*i];
r = rgb&0x1F;
g = (rgb&0x3E0)>>5;
b = (rgb&0x7C00)>>10;
dst[2*i] = (b&0x1F) | ((g&0x3F)<<5) | ((r&0x1F)<<11);
unsigned rgb = ((const uint16_t*)src)[i];
((uint16_t*)dst)[i] = ((rgb&0x7C00)>>10) | ((rgb&0x3E0)<<1) | (rgb<<11);
}
}
@ -507,17 +418,14 @@ void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
for (i=0; i<num_pixels; i++)
{
unsigned b,g,r;
register uint16_t rgb;
rgb = src[2*i];
r = rgb&0x1F;
g = (rgb&0x3E0)>>5;
b = (rgb&0x7C00)>>10;
dst[2*i] = (b&0x1F) | ((g&0x1F)<<5) | ((r&0x1F)<<10);
unsigned br;
unsigned rgb = ((const uint16_t*)src)[i];
br = rgb&0x7c1F;
((uint16_t*)dst)[i] = (br>>10) | (rgb&0x3E0) | (br<<10);
}
}
void rgb8tobgr8(const uint8_t *src, uint8_t *dst, long src_size)
void bgr8torgb8(const uint8_t *src, uint8_t *dst, long src_size)
{
long i;
long num_pixels = src_size;

View File

@ -23,58 +23,56 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef FFMPEG_RGB2RGB_H
#define FFMPEG_RGB2RGB_H
#ifndef SWSCALE_RGB2RGB_H
#define SWSCALE_RGB2RGB_H
#include <inttypes.h>
/* A full collection of RGB to RGB(BGR) converters */
extern void (*rgb24to32) (const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb24to16) (const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb24to15) (const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb32to24) (const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb32to16) (const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb32to15) (const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb15to16) (const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb15to24) (const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb15to32) (const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb16to15) (const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb16to24) (const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb16to32) (const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb24to16) (const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb24to15) (const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
extern void rgb24tobgr32(const uint8_t *src, uint8_t *dst, long src_size);
extern void rgb32tobgr24(const uint8_t *src, uint8_t *dst, long src_size);
extern void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size);
extern void rgb16tobgr24(const uint8_t *src, uint8_t *dst, long src_size);
extern void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size);
extern void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size);
extern void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size);
extern void rgb15tobgr24(const uint8_t *src, uint8_t *dst, long src_size);
extern void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size);
extern void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size);
extern void rgb8tobgr8 (const uint8_t *src, uint8_t *dst, long src_size);
void rgb24to32 (const uint8_t *src, uint8_t *dst, long src_size);
void rgb32to24 (const uint8_t *src, uint8_t *dst, long src_size);
void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size);
void rgb16to24 (const uint8_t *src, uint8_t *dst, long src_size);
void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size);
void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size);
void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size);
void rgb15to24 (const uint8_t *src, uint8_t *dst, long src_size);
void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size);
void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size);
void bgr8torgb8 (const uint8_t *src, uint8_t *dst, long src_size);
extern void palette8torgb32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
extern void palette8tobgr32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
extern void palette8torgb24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
extern void palette8tobgr24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
extern void palette8torgb16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
extern void palette8tobgr16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
extern void palette8torgb15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
extern void palette8tobgr15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
void palette8topacked32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
void palette8topacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
void palette8torgb16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
void palette8tobgr16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
void palette8torgb15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
void palette8tobgr15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
/**
* Height should be a multiple of 2 and width should be a multiple of 16.
* (If this is a problem for anyone then tell me, and I will fix it.)
* Chrominance data is only taken from every second line, others are ignored.
* FIXME: Write HQ version.
* FIXME: Write high quality version.
*/
//void uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
@ -109,11 +107,18 @@ extern void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_
long width, long height,
long lumStride, long chromStride, long dstStride);
/**
* Width should be a multiple of 16.
*/
extern void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
long width, long height,
long lumStride, long chromStride, long dstStride);
/**
* Height should be a multiple of 2 and width should be a multiple of 2.
* (If this is a problem for anyone then tell me, and I will fix it.)
* Chrominance data is only taken from every second line, others are ignored.
* FIXME: Write HQ version.
* FIXME: Write high quality version.
*/
extern void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
long width, long height,
@ -139,4 +144,4 @@ extern void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint
void sws_rgb2rgb_init(int flags);
#endif /* FFMPEG_RGB2RGB_H */
#endif /* SWSCALE_RGB2RGB_H */

View File

@ -1,229 +0,0 @@
/*
* Copyright (C) 2003 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include <stdarg.h>
#undef HAVE_AV_CONFIG_H
#include "libavutil/avutil.h"
#include "swscale.h"
#include "swscale_internal.h"
#include "rgb2rgb.h"
static uint64_t getSSD(uint8_t *src1, uint8_t *src2, int stride1, int stride2, int w, int h){
int x,y;
uint64_t ssd=0;
//printf("%d %d\n", w, h);
for (y=0; y<h; y++){
for (x=0; x<w; x++){
int d= src1[x + y*stride1] - src2[x + y*stride2];
ssd+= d*d;
//printf("%d", abs(src1[x + y*stride1] - src2[x + y*stride2])/26 );
}
//printf("\n");
}
return ssd;
}
// test by ref -> src -> dst -> out & compare out against ref
// ref & out are YV12
static int doTest(uint8_t *ref[3], int refStride[3], int w, int h, int srcFormat, int dstFormat,
int srcW, int srcH, int dstW, int dstH, int flags){
uint8_t *src[3];
uint8_t *dst[3];
uint8_t *out[3];
int srcStride[3], dstStride[3];
int i;
uint64_t ssdY, ssdU, ssdV;
struct SwsContext *srcContext, *dstContext, *outContext;
int res;
res = 0;
for (i=0; i<3; i++){
// avoid stride % bpp != 0
if (srcFormat==PIX_FMT_RGB24 || srcFormat==PIX_FMT_BGR24)
srcStride[i]= srcW*3;
else
srcStride[i]= srcW*4;
if (dstFormat==PIX_FMT_RGB24 || dstFormat==PIX_FMT_BGR24)
dstStride[i]= dstW*3;
else
dstStride[i]= dstW*4;
src[i]= (uint8_t*) malloc(srcStride[i]*srcH);
dst[i]= (uint8_t*) malloc(dstStride[i]*dstH);
out[i]= (uint8_t*) malloc(refStride[i]*h);
if (!src[i] || !dst[i] || !out[i]) {
perror("Malloc");
res = -1;
goto end;
}
}
dstContext = outContext = NULL;
srcContext= sws_getContext(w, h, PIX_FMT_YUV420P, srcW, srcH, srcFormat, flags, NULL, NULL, NULL);
if (!srcContext) {
fprintf(stderr, "Failed to get %s ---> %s\n",
sws_format_name(PIX_FMT_YUV420P),
sws_format_name(srcFormat));
res = -1;
goto end;
}
dstContext= sws_getContext(srcW, srcH, srcFormat, dstW, dstH, dstFormat, flags, NULL, NULL, NULL);
if (!dstContext) {
fprintf(stderr, "Failed to get %s ---> %s\n",
sws_format_name(srcFormat),
sws_format_name(dstFormat));
res = -1;
goto end;
}
outContext= sws_getContext(dstW, dstH, dstFormat, w, h, PIX_FMT_YUV420P, flags, NULL, NULL, NULL);
if (!outContext) {
fprintf(stderr, "Failed to get %s ---> %s\n",
sws_format_name(dstFormat),
sws_format_name(PIX_FMT_YUV420P));
res = -1;
goto end;
}
// printf("test %X %X %X -> %X %X %X\n", (int)ref[0], (int)ref[1], (int)ref[2],
// (int)src[0], (int)src[1], (int)src[2]);
sws_scale(srcContext, ref, refStride, 0, h , src, srcStride);
sws_scale(dstContext, src, srcStride, 0, srcH, dst, dstStride);
sws_scale(outContext, dst, dstStride, 0, dstH, out, refStride);
#if defined(ARCH_X86)
asm volatile ("emms\n\t");
#endif
ssdY= getSSD(ref[0], out[0], refStride[0], refStride[0], w, h);
ssdU= getSSD(ref[1], out[1], refStride[1], refStride[1], (w+1)>>1, (h+1)>>1);
ssdV= getSSD(ref[2], out[2], refStride[2], refStride[2], (w+1)>>1, (h+1)>>1);
if (srcFormat == PIX_FMT_GRAY8 || dstFormat==PIX_FMT_GRAY8) ssdU=ssdV=0; //FIXME check that output is really gray
ssdY/= w*h;
ssdU/= w*h/4;
ssdV/= w*h/4;
if (ssdY>100 || ssdU>100 || ssdV>100){
printf(" %s %dx%d -> %s %4dx%4d flags=%2d SSD=%5lld,%5lld,%5lld\n",
sws_format_name(srcFormat), srcW, srcH,
sws_format_name(dstFormat), dstW, dstH,
flags,
ssdY, ssdU, ssdV);
}
end:
sws_freeContext(srcContext);
sws_freeContext(dstContext);
sws_freeContext(outContext);
for (i=0; i<3; i++){
free(src[i]);
free(dst[i]);
free(out[i]);
}
return res;
}
void fast_memcpy(void *a, void *b, int s){ //FIXME
memcpy(a, b, s);
}
static void selfTest(uint8_t *src[3], int stride[3], int w, int h){
enum PixelFormat srcFormat, dstFormat;
int srcW, srcH, dstW, dstH;
int flags;
for (srcFormat = 0; srcFormat < PIX_FMT_NB; srcFormat++) {
for (dstFormat = 0; dstFormat < PIX_FMT_NB; dstFormat++) {
printf("%s -> %s\n",
sws_format_name(srcFormat),
sws_format_name(dstFormat));
srcW= w;
srcH= h;
for (dstW=w - w/3; dstW<= 4*w/3; dstW+= w/3){
for (dstH=h - h/3; dstH<= 4*h/3; dstH+= h/3){
for (flags=1; flags<33; flags*=2) {
int res;
res = doTest(src, stride, w, h, srcFormat, dstFormat,
srcW, srcH, dstW, dstH, flags);
if (res < 0) {
dstW = 4 * w / 3;
dstH = 4 * h / 3;
flags = 33;
}
}
}
}
}
}
}
#define W 96
#define H 96
int main(int argc, char **argv){
uint8_t *rgb_data = malloc (W*H*4);
uint8_t *rgb_src[3]= {rgb_data, NULL, NULL};
int rgb_stride[3]={4*W, 0, 0};
uint8_t *data = malloc (3*W*H);
uint8_t *src[3]= {data, data+W*H, data+W*H*2};
int stride[3]={W, W, W};
int x, y;
struct SwsContext *sws;
sws= sws_getContext(W/12, H/12, PIX_FMT_RGB32, W, H, PIX_FMT_YUV420P, 2, NULL, NULL, NULL);
for (y=0; y<H; y++){
for (x=0; x<W*4; x++){
rgb_data[ x + y*4*W]= random();
}
}
#if defined(ARCH_X86)
sws_rgb2rgb_init(SWS_CPU_CAPS_MMX*0);
#else
sws_rgb2rgb_init(0);
#endif
sws_scale(sws, rgb_src, rgb_stride, 0, H, src, stride);
#if defined(ARCH_X86)
asm volatile ("emms\n\t");
#endif
selfTest(src, stride, W, H);
return 123;
}

File diff suppressed because it is too large Load Diff

View File

@ -18,19 +18,20 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef FFMPEG_SWSCALE_H
#define FFMPEG_SWSCALE_H
#ifndef SWSCALE_SWSCALE_H
#define SWSCALE_SWSCALE_H
/**
* @file swscale.h
* @file libswscale/swscale.h
* @brief
* external api for the swscale stuff
*/
#include "avutil.h"
#include "libavutil/avutil.h"
#include "libavutil/internal.h"
#define LIBSWSCALE_VERSION_MAJOR 0
#define LIBSWSCALE_VERSION_MINOR 5
#define LIBSWSCALE_VERSION_MINOR 7
#define LIBSWSCALE_VERSION_MICRO 1
#define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \
@ -43,6 +44,11 @@
#define LIBSWSCALE_IDENT "SwS" AV_STRINGIFY(LIBSWSCALE_VERSION)
/**
* Returns the LIBSWSCALE_VERSION_INT constant.
*/
unsigned swscale_version(void);
/* values for the flags, the stuff on the command line is different */
#define SWS_FAST_BILINEAR 1
#define SWS_BILINEAR 2
@ -70,6 +76,7 @@
#define SWS_FULL_CHR_H_INP 0x4000
#define SWS_DIRECT_BGR 0x8000
#define SWS_ACCURATE_RND 0x40000
#define SWS_BITEXACT 0x80000
#define SWS_CPU_CAPS_MMX 0x80000000
#define SWS_CPU_CAPS_MMX2 0x20000000
@ -92,8 +99,8 @@
// when used for filters they must have an odd number of elements
// coeffs cannot be shared between vectors
typedef struct {
double *coeff;
int length;
double *coeff; ///< pointer to the list of coefficients
int length; ///< number of coefficients in the vector
} SwsVector;
// vectors can be shared
@ -108,39 +115,134 @@ struct SwsContext;
void sws_freeContext(struct SwsContext *swsContext);
struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat, int flags,
/**
* Allocates and returns a SwsContext. You need it to perform
* scaling/conversion operations using sws_scale().
*
* @param srcW the width of the source image
* @param srcH the height of the source image
* @param srcFormat the source image format
* @param dstW the width of the destination image
* @param dstH the height of the destination image
* @param dstFormat the destination image format
* @param flags specify which algorithm and options to use for rescaling
* @return a pointer to an allocated context, or NULL in case of error
*/
struct SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int dstW, int dstH, enum PixelFormat dstFormat, int flags,
SwsFilter *srcFilter, SwsFilter *dstFilter, double *param);
int sws_scale(struct SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY,
/**
* Scales the image slice in \p srcSlice and puts the resulting scaled
* slice in the image in \p dst. A slice is a sequence of consecutive
* rows in an image.
*
* @param context the scaling context previously created with
* sws_getContext()
* @param srcSlice the array containing the pointers to the planes of
* the source slice
* @param srcStride the array containing the strides for each plane of
* the source image
* @param srcSliceY the position in the source image of the slice to
* process, that is the number (counted starting from
* zero) in the image of the first row of the slice
* @param srcSliceH the height of the source slice, that is the number
* of rows in the slice
* @param dst the array containing the pointers to the planes of
* the destination image
* @param dstStride the array containing the strides for each plane of
* the destination image
* @return the height of the output slice
*/
int sws_scale(struct SwsContext *context, uint8_t* srcSlice[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]);
#if LIBSWSCALE_VERSION_MAJOR < 1
/**
* @deprecated Use sws_scale() instead.
*/
int sws_scale_ordered(struct SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]) attribute_deprecated;
#endif
int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation);
int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation);
/**
* Returns a normalized Gaussian curve used to filter stuff
* quality=3 is high quality, lower is lower quality.
*/
SwsVector *sws_getGaussianVec(double variance, double quality);
/**
* Allocates and returns a vector with \p length coefficients, all
* with the same value \p c.
*/
SwsVector *sws_getConstVec(double c, int length);
/**
* Allocates and returns a vector with just one coefficient, with
* value 1.0.
*/
SwsVector *sws_getIdentityVec(void);
/**
* Scales all the coefficients of \p a by the \p scalar value.
*/
void sws_scaleVec(SwsVector *a, double scalar);
/**
* Scales all the coefficients of \p a so that their sum equals \p
* height."
*/
void sws_normalizeVec(SwsVector *a, double height);
void sws_convVec(SwsVector *a, SwsVector *b);
void sws_addVec(SwsVector *a, SwsVector *b);
void sws_subVec(SwsVector *a, SwsVector *b);
void sws_shiftVec(SwsVector *a, int shift);
/**
* Allocates and returns a clone of the vector \p a, that is a vector
* with the same coefficients as \p a.
*/
SwsVector *sws_cloneVec(SwsVector *a);
void sws_printVec(SwsVector *a);
#if LIBSWSCALE_VERSION_MAJOR < 1
/**
* @deprecated Use sws_printVec2() instead.
*/
attribute_deprecated void sws_printVec(SwsVector *a);
#endif
/**
* Prints with av_log() a textual representation of the vector \p a
* if \p log_level <= av_log_level.
*/
void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level);
void sws_freeVec(SwsVector *a);
SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
float lumaSarpen, float chromaSharpen,
float lumaSharpen, float chromaSharpen,
float chromaHShift, float chromaVShift,
int verbose);
void sws_freeFilter(SwsFilter *filter);
/**
* Checks if \p context can be reused, otherwise reallocates a new
* one.
*
* If \p context is NULL, just calls sws_getContext() to get a new
* context. Otherwise, checks if the parameters are the ones already
* saved in \p context. If that is the case, returns the current
* context. Otherwise, frees \p context and gets a new context with
* the new parameters.
*
* Be warned that \p srcFilter and \p dstFilter are not checked, they
* are assumed to remain the same.
*/
struct SwsContext *sws_getCachedContext(struct SwsContext *context,
int srcW, int srcH, int srcFormat,
int dstW, int dstH, int dstFormat, int flags,
int srcW, int srcH, enum PixelFormat srcFormat,
int dstW, int dstH, enum PixelFormat dstFormat, int flags,
SwsFilter *srcFilter, SwsFilter *dstFilter, double *param);
#endif /* FFMPEG_SWSCALE_H */
#endif /* SWSCALE_SWSCALE_H */

View File

@ -220,7 +220,7 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW, uint8_t *src, int
for (j=0; j<filterSize; j++) {
val += ((int)src[srcPos + j])*filter[filterSize*i + j];
}
dst[i] = av_clip(val>>7, 0, (1<<15)-1);
dst[i] = FFMIN(val>>7, (1<<15)-1);
}
}
else
@ -259,7 +259,7 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW, uint8_t *src, int
val_vEven = vec_mule(src_v, filter_v);
val_s = vec_sums(val_vEven, vzero);
vec_st(val_s, 0, tempo);
dst[i] = av_clip(tempo[3]>>7, 0, (1<<15)-1);
dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1);
}
}
break;
@ -286,7 +286,7 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW, uint8_t *src, int
val_v = vec_msums(src_v, filter_v, (vector signed int)vzero);
val_s = vec_sums(val_v, vzero);
vec_st(val_s, 0, tempo);
dst[i] = av_clip(tempo[3]>>7, 0, (1<<15)-1);
dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1);
}
}
break;
@ -315,7 +315,7 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW, uint8_t *src, int
vector signed int val_s = vec_sums(val_v, vzero);
vec_st(val_s, 0, tempo);
dst[i] = av_clip(tempo[3]>>7, 0, (1<<15)-1);
dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1);
}
}
break;
@ -377,7 +377,7 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW, uint8_t *src, int
val_s = vec_sums(val_v, vzero);
vec_st(val_s, 0, tempo);
dst[i] = av_clip(tempo[3]>>7, 0, (1<<15)-1);
dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1);
}
}

View File

@ -18,8 +18,8 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "avutil.h"
#include "opt.h"
#include "libavutil/avutil.h"
#include "libavcodec/opt.h"
#include "swscale.h"
#include "swscale_internal.h"
@ -53,6 +53,7 @@ static const AVOption options[] = {
{ "bfin", "Blackfin SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_BFIN, INT_MIN, INT_MAX, VE, "sws_flags" },
{ "full_chroma_int", "full chroma interpolation", 0 , FF_OPT_TYPE_CONST, SWS_FULL_CHR_H_INT, INT_MIN, INT_MAX, VE, "sws_flags" },
{ "full_chroma_inp", "full chroma input", 0 , FF_OPT_TYPE_CONST, SWS_FULL_CHR_H_INP, INT_MIN, INT_MAX, VE, "sws_flags" },
{ "bitexact", "", 0 , FF_OPT_TYPE_CONST, SWS_BITEXACT, INT_MIN, INT_MAX, VE, "sws_flags" },
{ NULL }
};

View File

@ -26,9 +26,6 @@
#include <inttypes.h>
#include <assert.h>
#include "config.h"
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
#include <unistd.h>
#include "rgb2rgb.h"
#include "swscale.h"
@ -40,13 +37,13 @@
#define L1CODE
#endif
extern int ff_bfin_uyvytoyv12 (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
long width, long height,
long lumStride, long chromStride, long srcStride) L1CODE;
int ff_bfin_uyvytoyv12 (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
long width, long height,
long lumStride, long chromStride, long srcStride) L1CODE;
extern int ff_bfin_yuyvtoyv12 (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
long width, long height,
long lumStride, long chromStride, long srcStride) L1CODE;
int ff_bfin_yuyvtoyv12 (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
long width, long height,
long lumStride, long chromStride, long srcStride) L1CODE;
static int uyvytoyv12_unscaled (SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[])

View File

@ -18,22 +18,42 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef FFMPEG_SWSCALE_INTERNAL_H
#define FFMPEG_SWSCALE_INTERNAL_H
#ifndef SWSCALE_SWSCALE_INTERNAL_H
#define SWSCALE_SWSCALE_INTERNAL_H
#include "config.h"
#ifdef HAVE_ALTIVEC_H
#if HAVE_ALTIVEC_H
#include <altivec.h>
#endif
#include "avutil.h"
#include "libavutil/avutil.h"
#define STR(s) AV_TOSTRING(s) //AV_STRINGIFY is too long
#define MAX_FILTER_SIZE 256
#define VOFW 2048
#define VOF (VOFW*2)
#ifdef WORDS_BIGENDIAN
#define ALT32_CORR (-1)
#else
#define ALT32_CORR 1
#endif
#if ARCH_X86_64
# define APCK_PTR2 8
# define APCK_COEF 16
# define APCK_SIZE 24
#else
# define APCK_PTR2 4
# define APCK_COEF 8
# define APCK_SIZE 16
#endif
struct SwsContext;
typedef int (*SwsFunc)(struct SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]);
@ -53,7 +73,7 @@ typedef struct SwsContext{
int chrSrcW, chrSrcH, chrDstW, chrDstH;
int lumXInc, chrXInc;
int lumYInc, chrYInc;
int dstFormat, srcFormat; ///< format 4:2:0 type is always YV12
enum PixelFormat dstFormat, srcFormat; ///< format 4:2:0 type is always YV12
int origDstFormat, origSrcFormat; ///< format
int chrSrcHSubSample, chrSrcVSubSample;
int chrIntHSubSample, chrIntVSubSample;
@ -62,6 +82,9 @@ typedef struct SwsContext{
int sliceDir;
double param[2];
uint32_t pal_yuv[256];
uint32_t pal_rgb[256];
int16_t **lumPixBuf;
int16_t **chrPixBuf;
int16_t *hLumFilter;
@ -108,6 +131,12 @@ typedef struct SwsContext{
int srcColorspaceTable[4];
int dstColorspaceTable[4];
int srcRange, dstRange;
int yuv2rgb_y_offset;
int yuv2rgb_y_coeff;
int yuv2rgb_v2r_coeff;
int yuv2rgb_v2g_coeff;
int yuv2rgb_u2g_coeff;
int yuv2rgb_u2b_coeff;
#define RED_DITHER "0*8"
#define GREEN_DITHER "1*8"
@ -148,7 +177,7 @@ typedef struct SwsContext{
uint64_t u_temp __attribute__((aligned(8)));
uint64_t v_temp __attribute__((aligned(8)));
#ifdef HAVE_ALTIVEC
#if HAVE_ALTIVEC
vector signed short CY;
vector signed short CRV;
@ -162,7 +191,7 @@ typedef struct SwsContext{
#endif
#ifdef ARCH_BFIN
#if ARCH_BFIN
uint32_t oy __attribute__((aligned(4)));
uint32_t oc __attribute__((aligned(4)));
uint32_t zero __attribute__((aligned(4)));
@ -176,18 +205,18 @@ typedef struct SwsContext{
uint32_t gmask __attribute__((aligned(4)));
#endif
#ifdef HAVE_VIS
#if HAVE_VIS
uint64_t sparc_coeffs[10] __attribute__((aligned(8)));
#endif
} SwsContext;
//FIXME check init (where 0)
SwsFunc yuv2rgb_get_func_ptr (SwsContext *c);
int yuv2rgb_c_init_tables (SwsContext *c, const int inv_table[4], int fullRange, int brightness, int contrast, int saturation);
SwsFunc sws_yuv2rgb_get_func_ptr (SwsContext *c);
int sws_yuv2rgb_c_init_tables (SwsContext *c, const int inv_table[4], int fullRange, int brightness, int contrast, int saturation);
void yuv2rgb_altivec_init_tables (SwsContext *c, const int inv_table[4],int brightness,int contrast, int saturation);
SwsFunc yuv2rgb_init_altivec (SwsContext *c);
void sws_yuv2rgb_altivec_init_tables (SwsContext *c, const int inv_table[4],int brightness,int contrast, int saturation);
SwsFunc sws_yuv2rgb_init_altivec (SwsContext *c);
void altivec_yuv2packedX (SwsContext *c,
int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
@ -221,7 +250,8 @@ const char *sws_format_name(int format);
|| (x)==PIX_FMT_GRAY16LE \
)
#define isRGB(x) ( \
(x)==PIX_FMT_BGR32 \
(x)==PIX_FMT_RGB32 \
|| (x)==PIX_FMT_RGB32_1 \
|| (x)==PIX_FMT_RGB24 \
|| (x)==PIX_FMT_RGB565 \
|| (x)==PIX_FMT_RGB555 \
@ -229,9 +259,11 @@ const char *sws_format_name(int format);
|| (x)==PIX_FMT_RGB4 \
|| (x)==PIX_FMT_RGB4_BYTE \
|| (x)==PIX_FMT_MONOBLACK \
|| (x)==PIX_FMT_MONOWHITE \
)
#define isBGR(x) ( \
(x)==PIX_FMT_RGB32 \
(x)==PIX_FMT_BGR32 \
|| (x)==PIX_FMT_BGR32_1 \
|| (x)==PIX_FMT_BGR24 \
|| (x)==PIX_FMT_BGR565 \
|| (x)==PIX_FMT_BGR555 \
@ -239,6 +271,14 @@ const char *sws_format_name(int format);
|| (x)==PIX_FMT_BGR4 \
|| (x)==PIX_FMT_BGR4_BYTE \
|| (x)==PIX_FMT_MONOBLACK \
|| (x)==PIX_FMT_MONOWHITE \
)
#define isALPHA(x) ( \
(x)==PIX_FMT_BGR32 \
|| (x)==PIX_FMT_BGR32_1 \
|| (x)==PIX_FMT_RGB32 \
|| (x)==PIX_FMT_RGB32_1 \
|| (x)==PIX_FMT_YUVA420P \
)
static inline int fmt_depth(int fmt)
@ -269,15 +309,16 @@ static inline int fmt_depth(int fmt)
case PIX_FMT_RGB4_BYTE:
return 4;
case PIX_FMT_MONOBLACK:
case PIX_FMT_MONOWHITE:
return 1;
default:
return 0;
}
}
extern const DECLARE_ALIGNED(8, uint64_t, ff_dither4[2]);
extern const DECLARE_ALIGNED(8, uint64_t, ff_dither8[2]);
extern const uint64_t ff_dither4[2];
extern const uint64_t ff_dither8[2];
extern const AVClass sws_context_class;
#endif /* FFMPEG_SWSCALE_INTERNAL_H */
#endif /* SWSCALE_SWSCALE_INTERNAL_H */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -5,18 +5,18 @@
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with FFmpeg; if not, write to the Free Software
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@ -91,9 +91,6 @@ adjustment.
#include <inttypes.h>
#include <assert.h>
#include "config.h"
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
#include "rgb2rgb.h"
#include "swscale.h"
#include "swscale_internal.h"
@ -154,7 +151,7 @@ const vector unsigned char
#define vec_merge3(x2,x1,x0,y0,y1,y2) \
do { \
typeof(x0) o0,o2,o3; \
__typeof__(x0) o0,o2,o3; \
o0 = vec_mergeh (x0,x1); \
y0 = vec_perm (o0, x2, perm_rgb_0); \
o2 = vec_perm (o0, x2, perm_rgb_1); \
@ -165,7 +162,7 @@ do { \
#define vec_mstbgr24(x0,x1,x2,ptr) \
do { \
typeof(x0) _0,_1,_2; \
__typeof__(x0) _0,_1,_2; \
vec_merge3 (x0,x1,x2,_0,_1,_2); \
vec_st (_0, 0, ptr++); \
vec_st (_1, 0, ptr++); \
@ -174,7 +171,7 @@ do { \
#define vec_mstrgb24(x0,x1,x2,ptr) \
do { \
typeof(x0) _0,_1,_2; \
__typeof__(x0) _0,_1,_2; \
vec_merge3 (x2,x1,x0,_0,_1,_2); \
vec_st (_0, 0, ptr++); \
vec_st (_1, 0, ptr++); \
@ -222,12 +219,12 @@ do { \
#define vec_unh(x) \
(vector signed short) \
vec_perm(x,(typeof(x)){0}, \
vec_perm(x,(__typeof__(x)){0}, \
((vector unsigned char){0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03,\
0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07}))
#define vec_unl(x) \
(vector signed short) \
vec_perm(x,(typeof(x)){0}, \
vec_perm(x,(__typeof__(x)){0}, \
((vector unsigned char){0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B,\
0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F}))
@ -240,7 +237,7 @@ do { \
((vector unsigned short)vec_max (x,((vector signed short) {0})), \
(vector unsigned short)vec_max (y,((vector signed short) {0})))
//#define out_pixels(a,b,c,ptr) vec_mstrgb32(typeof(a),((typeof (a)){0}),a,a,a,ptr)
//#define out_pixels(a,b,c,ptr) vec_mstrgb32(__typeof__(a),((__typeof__ (a)){255}),a,a,a,ptr)
static inline void cvtyuvtoRGB (SwsContext *c,
@ -441,10 +438,10 @@ static int altivec_##name (SwsContext *c, \
}
#define out_abgr(a,b,c,ptr) vec_mstrgb32(typeof(a),((typeof (a)){0}),c,b,a,ptr)
#define out_bgra(a,b,c,ptr) vec_mstrgb32(typeof(a),c,b,a,((typeof (a)){0}),ptr)
#define out_rgba(a,b,c,ptr) vec_mstrgb32(typeof(a),a,b,c,((typeof (a)){0}),ptr)
#define out_argb(a,b,c,ptr) vec_mstrgb32(typeof(a),((typeof (a)){0}),a,b,c,ptr)
#define out_abgr(a,b,c,ptr) vec_mstrgb32(__typeof__(a),((__typeof__ (a)){255}),c,b,a,ptr)
#define out_bgra(a,b,c,ptr) vec_mstrgb32(__typeof__(a),c,b,a,((__typeof__ (a)){255}),ptr)
#define out_rgba(a,b,c,ptr) vec_mstrgb32(__typeof__(a),a,b,c,((__typeof__ (a)){255}),ptr)
#define out_argb(a,b,c,ptr) vec_mstrgb32(__typeof__(a),((__typeof__ (a)){255}),a,b,c,ptr)
#define out_rgb24(a,b,c,ptr) vec_mstrgb24(a,b,c,ptr)
#define out_bgr24(a,b,c,ptr) vec_mstbgr24(a,b,c,ptr)
@ -693,7 +690,7 @@ static int altivec_uyvy_rgb32 (SwsContext *c,
So we just fall back to the C codes for this.
*/
SwsFunc yuv2rgb_init_altivec (SwsContext *c)
SwsFunc sws_yuv2rgb_init_altivec (SwsContext *c)
{
if (!(c->flags & SWS_CPU_CAPS_ALTIVEC))
return NULL;
@ -753,7 +750,7 @@ SwsFunc yuv2rgb_init_altivec (SwsContext *c)
return NULL;
}
void yuv2rgb_altivec_init_tables (SwsContext *c, const int inv_table[4],int brightness,int contrast, int saturation)
void sws_yuv2rgb_altivec_init_tables (SwsContext *c, const int inv_table[4],int brightness,int contrast, int saturation)
{
union {
signed short tmp[8] __attribute__ ((aligned(16)));

View File

@ -27,9 +27,6 @@
#include <inttypes.h>
#include <assert.h>
#include "config.h"
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
#include <unistd.h>
#include "rgb2rgb.h"
#include "swscale.h"
@ -41,17 +38,17 @@
#define L1CODE
#endif
extern void ff_bfin_yuv2rgb555_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
int w, uint32_t *coeffs) L1CODE;
void ff_bfin_yuv2rgb555_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
int w, uint32_t *coeffs) L1CODE;
extern void ff_bfin_yuv2rgb565_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
int w, uint32_t *coeffs) L1CODE;
void ff_bfin_yuv2rgb565_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
int w, uint32_t *coeffs) L1CODE;
extern void ff_bfin_yuv2rgb24_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
int w, uint32_t *coeffs) L1CODE;
void ff_bfin_yuv2rgb24_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
int w, uint32_t *coeffs) L1CODE;
typedef void (* ltransform_t)(uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
int w, uint32_t *coeffs);
typedef void (* ltransform)(uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
int w, uint32_t *coeffs);
static void bfin_prepare_coefficients (SwsContext *c, int rgb, int masks)
@ -95,7 +92,7 @@ static int core_yuv420_rgb (SwsContext *c,
uint8_t **in, int *instrides,
int srcSliceY, int srcSliceH,
uint8_t **oplanes, int *outstrides,
ltransform_t lcscf, int rgb, int masks)
ltransform lcscf, int rgb, int masks)
{
uint8_t *py,*pu,*pv,*op;
int w = instrides[0];

View File

@ -73,7 +73,7 @@ static int mlib_YUV2RGB420_24(SwsContext *c, uint8_t* src[], int srcStride[], in
}
SwsFunc yuv2rgb_init_mlib(SwsContext *c)
SwsFunc sws_yuv2rgb_init_mlib(SwsContext *c)
{
switch(c->dstFormat){
case PIX_FMT_RGB24: return mlib_YUV2RGB420_24;

View File

@ -1,7 +1,7 @@
/*
* yuv2rgb_mmx.c, software YUV to RGB converter with Intel MMX "technology"
*
* Copyright (C) 2000, Silicon Integrated System Corp.
* Copyright (C) 2000, Silicon Integrated System Corp
*
* Author: Olie Lho <ollie@sis.com.tw>
*
@ -30,14 +30,14 @@
#undef EMMS
#undef SFENCE
#ifdef HAVE_3DNOW
/* On K6 femms is faster than emms. On K7 femms is directly mapped on emms. */
#if HAVE_AMD3DNOW
/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
#define EMMS "femms"
#else
#define EMMS "emms"
#endif
#ifdef HAVE_MMX2
#if HAVE_MMX2
#define MOVNTQ "movntq"
#define SFENCE "sfence"
#else
@ -121,53 +121,73 @@
"punpcklbw %%mm5, %%mm2;" /* G7 G6 G5 G4 G3 G2 G1 G0 */\
#define YUV422_UNSHIFT \
if(c->srcFormat == PIX_FMT_YUV422P){ \
srcStride[1] *= 2; \
srcStride[2] *= 2; \
} \
#define YUV2RGB_LOOP(depth) \
h_size= (c->dstW+7)&~7; \
if(h_size*depth > FFABS(dstStride[0])) h_size-=8; \
\
__asm__ volatile ("pxor %mm4, %mm4;" /* zero mm4 */ ); \
for (y= 0; y<srcSliceH; y++ ) { \
uint8_t *image = dst[0] + (y+srcSliceY)*dstStride[0]; \
uint8_t *py = src[0] + y*srcStride[0]; \
uint8_t *pu = src[1] + (y>>1)*srcStride[1]; \
uint8_t *pv = src[2] + (y>>1)*srcStride[2]; \
long index= -h_size/2; \
#define YUV2RGB_INIT \
/* This MMX assembly code deals with a SINGLE scan line at a time, \
* it converts 8 pixels in each iteration. */ \
__asm__ volatile ( \
/* load data for start of next scan line */ \
"movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ \
"movd (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ \
"movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ \
/* \
".balign 16 \n\t" \
*/ \
"1: \n\t" \
/* No speed difference on my p3@500 with prefetch, \
* if it is faster for anyone with -benchmark then tell me. \
PREFETCH" 64(%0) \n\t" \
PREFETCH" 64(%1) \n\t" \
PREFETCH" 64(%2) \n\t" \
*/ \
#define YUV2RGB_ENDLOOP(depth) \
"add $"AV_STRINGIFY(depth*8)", %1 \n\t" \
"add $4, %0 \n\t" \
" js 1b \n\t" \
\
: "+r" (index), "+r" (image) \
: "r" (pu - index), "r" (pv - index), "r"(&c->redDither), "r" (py - 2*index) \
); \
} \
__asm__ volatile (EMMS); \
return srcSliceH; \
static inline int RENAME(yuv420_rgb16)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){
int y, h_size;
if(c->srcFormat == PIX_FMT_YUV422P){
srcStride[1] *= 2;
srcStride[2] *= 2;
}
YUV422_UNSHIFT
YUV2RGB_LOOP(2)
h_size= (c->dstW+7)&~7;
if(h_size*2 > FFABS(dstStride[0])) h_size-=8;
c->blueDither= ff_dither8[y&1];
c->greenDither= ff_dither4[y&1];
c->redDither= ff_dither8[(y+1)&1];
asm volatile ("pxor %mm4, %mm4;" /* zero mm4 */ );
//printf("%X %X %X %X %X %X %X %X %X %X\n", (int)&c->redDither, (int)&b5Dither, (int)src[0], (int)src[1], (int)src[2], (int)dst[0],
//srcStride[0],srcStride[1],srcStride[2],dstStride[0]);
for (y= 0; y<srcSliceH; y++ ) {
uint8_t *image = dst[0] + (y+srcSliceY)*dstStride[0];
uint8_t *py = src[0] + y*srcStride[0];
uint8_t *pu = src[1] + (y>>1)*srcStride[1];
uint8_t *pv = src[2] + (y>>1)*srcStride[2];
long index= -h_size/2;
b5Dither= ff_dither8[y&1];
g6Dither= ff_dither4[y&1];
g5Dither= ff_dither8[y&1];
r5Dither= ff_dither8[(y+1)&1];
/* This MMX assembly code deals with a SINGLE scan line at a time,
* it converts 8 pixels in each iteration. */
asm volatile (
/* load data for start of next scan line */
"movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
"movd (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
"movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
//".balign 16 \n\t"
"1: \n\t"
/* No speed difference on my p3@500 with prefetch,
* if it is faster for anyone with -benchmark then tell me.
PREFETCH" 64(%0) \n\t"
PREFETCH" 64(%1) \n\t"
PREFETCH" 64(%2) \n\t"
*/
YUV2RGB
YUV2RGB_INIT
YUV2RGB
#ifdef DITHER1XBPP
"paddusb "MANGLE(b5Dither)", %%mm0;"
"paddusb "MANGLE(g6Dither)", %%mm2;"
"paddusb "MANGLE(r5Dither)", %%mm1;"
"paddusb "BLUE_DITHER"(%4), %%mm0;"
"paddusb "GREEN_DITHER"(%4), %%mm2;"
"paddusb "RED_DITHER"(%4), %%mm1;"
#endif
/* mask unneeded bits off */
"pand "MANGLE(mmx_redmask)", %%mm0;" /* b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0 */
@ -202,61 +222,27 @@ YUV2RGB
MOVNTQ " %%mm5, 8 (%1);" /* store pixel 4-7 */
"add $16, %1 \n\t"
"add $4, %0 \n\t"
" js 1b \n\t"
: "+r" (index), "+r" (image)
: "r" (pu - index), "r" (pv - index), "r"(&c->redDither), "r" (py - 2*index)
);
}
asm volatile (EMMS);
return srcSliceH;
YUV2RGB_ENDLOOP(2)
}
static inline int RENAME(yuv420_rgb15)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){
int y, h_size;
if(c->srcFormat == PIX_FMT_YUV422P){
srcStride[1] *= 2;
srcStride[2] *= 2;
}
YUV422_UNSHIFT
YUV2RGB_LOOP(2)
h_size= (c->dstW+7)&~7;
if(h_size*2 > FFABS(dstStride[0])) h_size-=8;
c->blueDither= ff_dither8[y&1];
c->greenDither= ff_dither8[y&1];
c->redDither= ff_dither8[(y+1)&1];
asm volatile ("pxor %mm4, %mm4;" /* zero mm4 */ );
//printf("%X %X %X %X %X %X %X %X %X %X\n", (int)&c->redDither, (int)&b5Dither, (int)src[0], (int)src[1], (int)src[2], (int)dst[0],
//srcStride[0],srcStride[1],srcStride[2],dstStride[0]);
for (y= 0; y<srcSliceH; y++ ) {
uint8_t *image = dst[0] + (y+srcSliceY)*dstStride[0];
uint8_t *py = src[0] + y*srcStride[0];
uint8_t *pu = src[1] + (y>>1)*srcStride[1];
uint8_t *pv = src[2] + (y>>1)*srcStride[2];
long index= -h_size/2;
b5Dither= ff_dither8[y&1];
g6Dither= ff_dither4[y&1];
g5Dither= ff_dither8[y&1];
r5Dither= ff_dither8[(y+1)&1];
/* This MMX assembly code deals with a SINGLE scan line at a time,
* it converts 8 pixels in each iteration. */
asm volatile (
/* load data for start of next scan line */
"movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
"movd (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
"movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
//".balign 16 \n\t"
"1: \n\t"
YUV2RGB
YUV2RGB_INIT
YUV2RGB
#ifdef DITHER1XBPP
"paddusb "MANGLE(b5Dither)", %%mm0 \n\t"
"paddusb "MANGLE(g5Dither)", %%mm2 \n\t"
"paddusb "MANGLE(r5Dither)", %%mm1 \n\t"
"paddusb "BLUE_DITHER"(%4), %%mm0 \n\t"
"paddusb "GREEN_DITHER"(%4), %%mm2 \n\t"
"paddusb "RED_DITHER"(%4), %%mm1 \n\t"
#endif
/* mask unneeded bits off */
@ -293,51 +279,20 @@ YUV2RGB
MOVNTQ " %%mm5, 8 (%1);" /* store pixel 4-7 */
"add $16, %1 \n\t"
"add $4, %0 \n\t"
" js 1b \n\t"
: "+r" (index), "+r" (image)
: "r" (pu - index), "r" (pv - index), "r"(&c->redDither), "r" (py - 2*index)
);
}
asm volatile (EMMS);
return srcSliceH;
YUV2RGB_ENDLOOP(2)
}
static inline int RENAME(yuv420_rgb24)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){
int y, h_size;
if(c->srcFormat == PIX_FMT_YUV422P){
srcStride[1] *= 2;
srcStride[2] *= 2;
}
YUV422_UNSHIFT
YUV2RGB_LOOP(3)
h_size= (c->dstW+7)&~7;
if(h_size*3 > FFABS(dstStride[0])) h_size-=8;
asm volatile ("pxor %mm4, %mm4;" /* zero mm4 */ );
for (y= 0; y<srcSliceH; y++ ) {
uint8_t *image = dst[0] + (y+srcSliceY)*dstStride[0];
uint8_t *py = src[0] + y*srcStride[0];
uint8_t *pu = src[1] + (y>>1)*srcStride[1];
uint8_t *pv = src[2] + (y>>1)*srcStride[2];
long index= -h_size/2;
/* This MMX assembly code deals with a SINGLE scan line at a time,
* it converts 8 pixels in each iteration. */
asm volatile (
/* load data for start of next scan line */
"movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
"movd (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
"movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
//".balign 16 \n\t"
"1: \n\t"
YUV2RGB
YUV2RGB_INIT
YUV2RGB
/* mm0=B, %%mm2=G, %%mm1=R */
#ifdef HAVE_MMX2
#if HAVE_MMX2
"movq "MANGLE(ff_M24A)", %%mm4 \n\t"
"movq "MANGLE(ff_M24C)", %%mm7 \n\t"
"pshufw $0x50, %%mm0, %%mm5 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */
@ -438,101 +393,61 @@ YUV2RGB
"pxor %%mm4, %%mm4 \n\t"
#endif
"add $24, %1 \n\t"
"add $4, %0 \n\t"
" js 1b \n\t"
: "+r" (index), "+r" (image)
: "r" (pu - index), "r" (pv - index), "r"(&c->redDither), "r" (py - 2*index)
);
}
asm volatile (EMMS);
return srcSliceH;
YUV2RGB_ENDLOOP(3)
}
#define RGB_PLANAR2PACKED32 \
/* convert RGB plane to RGB packed format, \
mm0 -> B, mm1 -> R, mm2 -> G, mm3 -> A, \
mm4 -> GB, mm5 -> AR pixel 4-7, \
mm6 -> GB, mm7 -> AR pixel 0-3 */ \
"movq %%mm0, %%mm6;" /* B7 B6 B5 B4 B3 B2 B1 B0 */ \
"movq %%mm1, %%mm7;" /* R7 R6 R5 R4 R3 R2 R1 R0 */ \
\
"movq %%mm0, %%mm4;" /* B7 B6 B5 B4 B3 B2 B1 B0 */ \
"movq %%mm1, %%mm5;" /* R7 R6 R5 R4 R3 R2 R1 R0 */ \
\
"punpcklbw %%mm2, %%mm6;" /* G3 B3 G2 B2 G1 B1 G0 B0 */ \
"punpcklbw %%mm3, %%mm7;" /* A3 R3 A2 R2 A1 R1 A0 R0 */ \
\
"punpcklwd %%mm7, %%mm6;" /* A1 R1 B1 G1 A0 R0 B0 G0 */ \
MOVNTQ " %%mm6, (%1);" /* Store ARGB1 ARGB0 */ \
\
"movq %%mm0, %%mm6;" /* B7 B6 B5 B4 B3 B2 B1 B0 */ \
"punpcklbw %%mm2, %%mm6;" /* G3 B3 G2 B2 G1 B1 G0 B0 */ \
\
"punpckhwd %%mm7, %%mm6;" /* A3 R3 G3 B3 A2 R2 B3 G2 */ \
MOVNTQ " %%mm6, 8 (%1);" /* Store ARGB3 ARGB2 */ \
\
"punpckhbw %%mm2, %%mm4;" /* G7 B7 G6 B6 G5 B5 G4 B4 */ \
"punpckhbw %%mm3, %%mm5;" /* A7 R7 A6 R6 A5 R5 A4 R4 */ \
\
"punpcklwd %%mm5, %%mm4;" /* A5 R5 B5 G5 A4 R4 B4 G4 */ \
MOVNTQ " %%mm4, 16 (%1);" /* Store ARGB5 ARGB4 */ \
\
"movq %%mm0, %%mm4;" /* B7 B6 B5 B4 B3 B2 B1 B0 */ \
"punpckhbw %%mm2, %%mm4;" /* G7 B7 G6 B6 G5 B5 G4 B4 */ \
\
"punpckhwd %%mm5, %%mm4;" /* A7 R7 G7 B7 A6 R6 B6 G6 */ \
MOVNTQ " %%mm4, 24 (%1);" /* Store ARGB7 ARGB6 */ \
\
"movd 4 (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ \
"movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ \
\
"pxor %%mm4, %%mm4;" /* zero mm4 */ \
"movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ \
static inline int RENAME(yuv420_rgb32)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){
int y, h_size;
if(c->srcFormat == PIX_FMT_YUV422P){
srcStride[1] *= 2;
srcStride[2] *= 2;
}
YUV422_UNSHIFT
YUV2RGB_LOOP(4)
h_size= (c->dstW+7)&~7;
if(h_size*4 > FFABS(dstStride[0])) h_size-=8;
YUV2RGB_INIT
YUV2RGB
"pcmpeqd %%mm3, %%mm3;" /* fill mm3 */
RGB_PLANAR2PACKED32
asm volatile ("pxor %mm4, %mm4;" /* zero mm4 */ );
for (y= 0; y<srcSliceH; y++ ) {
uint8_t *image = dst[0] + (y+srcSliceY)*dstStride[0];
uint8_t *py = src[0] + y*srcStride[0];
uint8_t *pu = src[1] + (y>>1)*srcStride[1];
uint8_t *pv = src[2] + (y>>1)*srcStride[2];
long index= -h_size/2;
/* This MMX assembly code deals with a SINGLE scan line at a time,
* it converts 8 pixels in each iteration. */
asm volatile (
/* load data for start of next scan line */
"movd (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
"movd (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
"movq (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
//".balign 16 \n\t"
"1: \n\t"
YUV2RGB
/* convert RGB plane to RGB packed format,
mm0 -> B, mm1 -> R, mm2 -> G, mm3 -> 0,
mm4 -> GB, mm5 -> AR pixel 4-7,
mm6 -> GB, mm7 -> AR pixel 0-3 */
"pxor %%mm3, %%mm3;" /* zero mm3 */
"movq %%mm0, %%mm6;" /* B7 B6 B5 B4 B3 B2 B1 B0 */
"movq %%mm1, %%mm7;" /* R7 R6 R5 R4 R3 R2 R1 R0 */
"movq %%mm0, %%mm4;" /* B7 B6 B5 B4 B3 B2 B1 B0 */
"movq %%mm1, %%mm5;" /* R7 R6 R5 R4 R3 R2 R1 R0 */
"punpcklbw %%mm2, %%mm6;" /* G3 B3 G2 B2 G1 B1 G0 B0 */
"punpcklbw %%mm3, %%mm7;" /* 00 R3 00 R2 00 R1 00 R0 */
"punpcklwd %%mm7, %%mm6;" /* 00 R1 B1 G1 00 R0 B0 G0 */
MOVNTQ " %%mm6, (%1);" /* Store ARGB1 ARGB0 */
"movq %%mm0, %%mm6;" /* B7 B6 B5 B4 B3 B2 B1 B0 */
"punpcklbw %%mm2, %%mm6;" /* G3 B3 G2 B2 G1 B1 G0 B0 */
"punpckhwd %%mm7, %%mm6;" /* 00 R3 G3 B3 00 R2 B3 G2 */
MOVNTQ " %%mm6, 8 (%1);" /* Store ARGB3 ARGB2 */
"punpckhbw %%mm2, %%mm4;" /* G7 B7 G6 B6 G5 B5 G4 B4 */
"punpckhbw %%mm3, %%mm5;" /* 00 R7 00 R6 00 R5 00 R4 */
"punpcklwd %%mm5, %%mm4;" /* 00 R5 B5 G5 00 R4 B4 G4 */
MOVNTQ " %%mm4, 16 (%1);" /* Store ARGB5 ARGB4 */
"movq %%mm0, %%mm4;" /* B7 B6 B5 B4 B3 B2 B1 B0 */
"punpckhbw %%mm2, %%mm4;" /* G7 B7 G6 B6 G5 B5 G4 B4 */
"punpckhwd %%mm5, %%mm4;" /* 00 R7 G7 B7 00 R6 B6 G6 */
MOVNTQ " %%mm4, 24 (%1);" /* Store ARGB7 ARGB6 */
"movd 4 (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
"movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
"pxor %%mm4, %%mm4;" /* zero mm4 */
"movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
"add $32, %1 \n\t"
"add $4, %0 \n\t"
" js 1b \n\t"
: "+r" (index), "+r" (image)
: "r" (pu - index), "r" (pv - index), "r"(&c->redDither), "r" (py - 2*index)
);
}
asm volatile (EMMS);
return srcSliceH;
YUV2RGB_ENDLOOP(4)
}

View File

@ -80,12 +80,13 @@
// FIXME: must be changed to set alpha to 255 instead of 0
static int vis_420P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){
int y, out1, out2, out3, out4, out5, out6;
for(y=0;y < srcSliceH;++y) {
asm volatile (
__asm__ volatile (
YUV2RGB_INIT
"wr %%g0, 0xd2, %%asi \n\t" /* ASI_FL16_P */
"1: \n\t"
@ -131,12 +132,13 @@ static int vis_420P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int s
return srcSliceH;
}
// FIXME: must be changed to set alpha to 255 instead of 0
static int vis_422P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){
int y, out1, out2, out3, out4, out5, out6;
for(y=0;y < srcSliceH;++y) {
asm volatile (
__asm__ volatile (
YUV2RGB_INIT
"wr %%g0, 0xd2, %%asi \n\t" /* ASI_FL16_P */
"1: \n\t"
@ -182,7 +184,7 @@ static int vis_422P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int s
return srcSliceH;
}
SwsFunc yuv2rgb_init_vis(SwsContext *c) {
SwsFunc sws_yuv2rgb_init_vis(SwsContext *c) {
c->sparc_coeffs[5]=c->yCoeff;
c->sparc_coeffs[6]=c->vgCoeff;
c->sparc_coeffs[7]=c->vrCoeff;
@ -196,11 +198,11 @@ SwsFunc yuv2rgb_init_vis(SwsContext *c) {
c->sparc_coeffs[4]=(((int16_t)c->vOffset*(int16_t)c->vrCoeff>>11) & 0xffff) * 0x0001000100010001ULL;
if (c->dstFormat == PIX_FMT_RGB32 && c->srcFormat == PIX_FMT_YUV422P && (c->dstW & 7)==0) {
av_log(c, AV_LOG_INFO, "SPARC VIS accelerated YUV422P -> RGB32\n");
av_log(c, AV_LOG_INFO, "SPARC VIS accelerated YUV422P -> RGB32 (WARNING: alpha value is wrong)\n");
return vis_422P_ARGB32;
}
else if (c->dstFormat == PIX_FMT_RGB32 && c->srcFormat == PIX_FMT_YUV420P && (c->dstW & 7)==0) {
av_log(c, AV_LOG_INFO, "SPARC VIS accelerated YUV420P -> RGB32\n");
av_log(c, AV_LOG_INFO, "SPARC VIS accelerated YUV420P -> RGB32 (WARNING: alpha value is wrong)\n");
return vis_420P_ARGB32;
}
return NULL;