use width and height when deciding on which conversion routine to select. Remove some checks per frame from sse2 conversion routines. Hopefully it still handles misaligned buffers ok. Too many tests were hurting performance

git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@34945 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
David McPaul 2010-01-08 11:25:48 +00:00
parent cd591a3e00
commit 96f1b483e4
4 changed files with 24 additions and 22 deletions

View File

@ -490,7 +490,8 @@ AVCodecDecoder::_NegotiateVideoOutputFormat(media_format* inOutFormat)
}
#else
fFormatConversionFunc = resolve_colorspace(
fOutputVideoFormat.display.format, fContext->pix_fmt);
fOutputVideoFormat.display.format, fContext->pix_fmt,
fContext->width, fContext->height);
}
if (fFormatConversionFunc != NULL)
break;
@ -749,7 +750,8 @@ AVCodecDecoder::_DecodeVideo(void* outBuffer, int64* outFrameCount,
#else
if (fFormatConversionFunc == NULL) {
fFormatConversionFunc = resolve_colorspace(
fOutputVideoFormat.display.format, fContext->pix_fmt);
fOutputVideoFormat.display.format, fContext->pix_fmt,
fContext->width, fContext->height);
}
#endif

View File

@ -41,27 +41,29 @@ void gfx_conv_yuv420p_rgb32_mmx(AVFrame *in, AVFrame *out, int width, int height
// Planar YUV420
void gfx_conv_yuv420p_rgba32_sse2(AVFrame *in, AVFrame *out, int width, int height)
{
// width must be divisible by 8 and height divisible by 2
// also in and out must be aligned to 32 bytes
if (width % 8 == 0 && height % 2 == 0
&& (off_t)out->data[0] % 32 == 0 && (off_t)in->data[0] % 32 == 0
&& (off_t)in->data[1] % 32 == 0 && (off_t)in->data[2] % 32 == 0) {
// in and out buffers must be aligned to 32 bytes, in should be as ffmpeg allocates it
if ((off_t)out->data[0] % 32 == 0) {
uint8 *ybase = (uint8 *)in->data[0];
uint8 *ubase = (uint8 *)in->data[1];
uint8 *vbase = (uint8 *)in->data[2];
uint8 *rgbbase = (uint8 *)out->data[0];
int yBaseInc = in->linesize[0];
int uBaseInc = in->linesize[1];
int vBaseInc = in->linesize[2];
int rgbBaseInc = out->linesize[0];
for (int i=0;i<height;i+=2) {
_Convert_YUV420P_RGBA32_SSE2(ybase, ubase, vbase, rgbbase, width); // First Y row
ybase += in->linesize[0];
rgbbase += out->linesize[0];
ybase += yBaseInc;
rgbbase += rgbBaseInc;
_Convert_YUV420P_RGBA32_SSE2(ybase, ubase, vbase, rgbbase, width); // Second Y row but same u and v row
ybase += in->linesize[0];
ubase += in->linesize[1];
vbase += in->linesize[2];
rgbbase += out->linesize[0];
ybase += yBaseInc;
ubase += uBaseInc;
vbase += vBaseInc;
rgbbase += rgbBaseInc;
}
} else {
gfx_conv_YCbCr420p_RGB32_c(in, out, width, height);
@ -71,10 +73,8 @@ void gfx_conv_yuv420p_rgba32_sse2(AVFrame *in, AVFrame *out, int width, int heig
// Packed YUV422
void gfx_conv_yuv422p_rgba32_sse2(AVFrame *in, AVFrame *out, int width, int height)
{
// width must be divisibile by 8
// also in and out must be aligned to 32 bytes
if (width % 8 == 0
&& (off_t)out->data[0] % 32 == 0 && (off_t)in->data[0] % 32 == 0) {
// in and out buffers must be aligned to 32 bytes, in should be as ffmpeg allocates it
if ((off_t)out->data[0] % 32 == 0) {
uint8 *ybase = (uint8 *)in->data[0];
uint8 *rgbbase = (uint8 *)out->data[0];

View File

@ -18,7 +18,7 @@
// this function will try to find the best colorspaces for both the ff-codec and
// the Media Kit sides.
gfx_convert_func resolve_colorspace(color_space colorSpace, PixelFormat pixelFormat)
gfx_convert_func resolve_colorspace(color_space colorSpace, PixelFormat pixelFormat, int width, int height)
{
CPUCapabilities cpu;
@ -46,7 +46,7 @@ CPUCapabilities cpu;
}
if (pixelFormat == PIX_FMT_YUV420P || pixelFormat == PIX_FMT_YUVJ420P) {
if (cpu.HasSSE2()) {
if (cpu.HasSSE2() && width % 8 == 0 && height % 2 == 0 ) {
TRACE("resolve_colorspace: gfx_conv_yuv420p_rgba32_sse2\n");
return gfx_conv_yuv420p_rgba32_sse2;
} else {
@ -56,7 +56,7 @@ CPUCapabilities cpu;
}
if (pixelFormat == PIX_FMT_YUV422P || pixelFormat == PIX_FMT_YUVJ422P) {
if (cpu.HasSSE2()) {
if (cpu.HasSSE2() && width % 8 == 0) {
return gfx_conv_yuv422p_rgba32_sse2;
} else {
return gfx_conv_YCbCr422_RGB32_c;

View File

@ -32,7 +32,7 @@ typedef void (*gfx_convert_func) (AVFrame *in, AVFrame *out, int width, int heig
// this function will try to find the best colorspaces for both the ff-codec and
// the Media Kit sides.
gfx_convert_func resolve_colorspace(color_space cs, PixelFormat pixelFormat);
gfx_convert_func resolve_colorspace(color_space cs, PixelFormat pixelFormat, int width, int height);
const char *pixfmt_to_string(int format);