From 782872541396e00a09b479a780d26af3123f6390 Mon Sep 17 00:00:00 2001 From: erbth Date: Tue, 9 Sep 2014 00:13:18 +0200 Subject: [PATCH] YUV data conversion of H.264 implementation (egfx): only convert invalid areas SIMD SSSE3 conversion in primitives compiling all primitives sources with optimization and cleanup after last merge --- channels/drdynvc/client/dvcman.c | 9 +- include/freerdp/codec/h264.h | 6 +- libfreerdp/codec/CMakeLists.txt | 18 --- libfreerdp/codec/h264.c | 129 +++++----------- .../codec/test/Makefile.TestOpenH264ASM32 | 17 --- .../codec/test/Makefile.TestOpenH264ASM64 | 17 --- .../codec/test/Makefile.TestOpenH264SSSE3 | 14 -- libfreerdp/codec/test/TestOpenH264 | Bin 15584 -> 0 bytes libfreerdp/codec/test/TestOpenH264ASM.c | 92 ------------ libfreerdp/codec/test/TestOpenH264ASM.h | 7 - libfreerdp/primitives/CMakeLists.txt | 15 +- libfreerdp/primitives/prim_YUV.c | 138 +++++++++++------- libfreerdp/primitives/prim_YUV.h | 1 + .../prim_YUV_opt.c} | 97 ++++++------ winpr/libwinpr/utils/collections/StreamPool.c | 2 - 15 files changed, 199 insertions(+), 363 deletions(-) delete mode 100644 libfreerdp/codec/test/Makefile.TestOpenH264ASM32 delete mode 100644 libfreerdp/codec/test/Makefile.TestOpenH264ASM64 delete mode 100644 libfreerdp/codec/test/Makefile.TestOpenH264SSSE3 delete mode 100755 libfreerdp/codec/test/TestOpenH264 delete mode 100644 libfreerdp/codec/test/TestOpenH264ASM.c delete mode 100644 libfreerdp/codec/test/TestOpenH264ASM.h rename libfreerdp/{codec/h264_ssse3.c => primitives/prim_YUV_opt.c} (80%) diff --git a/channels/drdynvc/client/dvcman.c b/channels/drdynvc/client/dvcman.c index 001717e14..f9e4873b8 100644 --- a/channels/drdynvc/client/dvcman.c +++ b/channels/drdynvc/client/dvcman.c @@ -486,7 +486,6 @@ int dvcman_receive_channel_data(IWTSVirtualChannelManager* pChannelMgr, UINT32 C int status = 0; DVCMAN_CHANNEL* channel; UINT32 dataSize = Stream_GetRemainingLength(data); - wStream* s; channel = (DVCMAN_CHANNEL*) dvcman_find_channel_by_id(pChannelMgr, ChannelId); @@ -499,7 +498,7 @@ int dvcman_receive_channel_data(IWTSVirtualChannelManager* pChannelMgr, UINT32 C if (channel->dvc_data) { /* Fragmented data */ - if (Stream_GetPosition(channel->dvc_data) + dataSize > (UINT32) Stream_Length(channel->dvc_data)) + if (Stream_GetPosition(channel->dvc_data) + dataSize > (UINT32) Stream_Capacity(channel->dvc_data)) { CLOG_ERR("data exceeding declared length!"); Stream_Release(channel->dvc_data); @@ -513,11 +512,9 @@ int dvcman_receive_channel_data(IWTSVirtualChannelManager* pChannelMgr, UINT32 C { Stream_SealLength(channel->dvc_data); Stream_SetPosition(channel->dvc_data, 0); - s=channel->dvc_data; + status = channel->channel_callback->OnDataReceived(channel->channel_callback, channel->dvc_data); + Stream_Release(channel->dvc_data); channel->dvc_data = NULL; - - status = channel->channel_callback->OnDataReceived(channel->channel_callback, s); - Stream_Release(s); } } else diff --git a/include/freerdp/codec/h264.h b/include/freerdp/codec/h264.h index d29a9e243..969914709 100644 --- a/include/freerdp/codec/h264.h +++ b/include/freerdp/codec/h264.h @@ -29,8 +29,7 @@ typedef struct _H264_CONTEXT H264_CONTEXT; typedef BOOL (*pfnH264SubsystemInit)(H264_CONTEXT* h264); typedef void (*pfnH264SubsystemUninit)(H264_CONTEXT* h264); -typedef int (*pfnH264SubsystemDecompress)(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize, - BYTE* pDstData, DWORD DstFormat, int nDstStep, int nXDst, int nYDst, int nWidth, int nHeight); +typedef int (*pfnH264SubsystemDecompress)(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize); struct _H264_CONTEXT_SUBSYSTEM { @@ -50,6 +49,9 @@ struct _H264_CONTEXT UINT32 width; UINT32 height; //int scanline; + + BYTE* pYUVData[3]; + int iStride[3]; /* <<<<<<< HEAD diff --git a/libfreerdp/codec/CMakeLists.txt b/libfreerdp/codec/CMakeLists.txt index f8ac3faa5..75999d262 100644 --- a/libfreerdp/codec/CMakeLists.txt +++ b/libfreerdp/codec/CMakeLists.txt @@ -101,24 +101,6 @@ if(WITH_LIBAVCODEC) set(FREERDP_LIBAVCODEC_LIBS ${LIBAVCODEC_LIB} ${LIBAVUTIL_LIB}) endif() -if(WITH_LIBAVCODEC OR WITH_OPENH264) - if(WITH_H264_SSSE3) - add_definitions(-DWITH_H264_SSSE3) - set(${MODULE_PREFIX}_SRCS - ${${MODULE_PREFIX}_SRCS} - h264_ssse3.c) - - if(CMAKE_COMPILER_IS_GNUCC) - set(OPTIMIZATION "${OPTIMIZATION} -msse2 -mssse3") - endif() - - if(MSVC) - set(OPTIMIZATION "${OPTIMIZATION} /arch:SSE2") - endif() - - set_property(SOURCE h264_ssse3.c PROPERTY COMPILE_FLAGS ${OPTIMIZATION}) - endif() -endif() add_complex_library(MODULE ${MODULE_NAME} TYPE "OBJECT" MONOLITHIC ${MONOLITHIC_BUILD} diff --git a/libfreerdp/codec/h264.c b/libfreerdp/codec/h264.c index 77527a4de..5f8f688ab 100644 --- a/libfreerdp/codec/h264.c +++ b/libfreerdp/codec/h264.c @@ -28,39 +28,14 @@ #include #include -#ifdef WITH_LIBAVCODEC -int h264_prepare_rgb_buffer(H264_CONTEXT* h264, int width, int height) -{ - UINT32 size; +#include - h264->width = width; - h264->height = height; - h264->scanline = h264->width * 4; - size = h264->scanline * h264->height; - - if (size > h264->size) - { - h264->size = size; - - if (!h264->data) - h264->data = (BYTE*) _aligned_malloc(h264->size, 16); - else - h264->data = (BYTE*) _aligned_realloc(h264->data, h264->size, 16); - } - - if (!h264->data) - return -1; - - return 1; -} -#endif /** * Dummy subsystem */ -static int dummy_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize, - BYTE* pDstData, DWORD DstFormat, int nDstStep, int nXDst, int nYDst, int nWidth, int nHeight) +static int dummy_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize) { return -1; } @@ -107,13 +82,9 @@ static void openh264_trace_callback(H264_CONTEXT* h264, int level, const char* m static int openh264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize) { - int srcStep[3]; - prim_size_t roi; - BYTE* pYUVData[3]; DECODING_STATE state; SBufferInfo sBufferInfo; SSysMEMBuffer* pSystemBuffer; - primitives_t* prims = primitives_get(); H264_CONTEXT_OPENH264* sys = (H264_CONTEXT_OPENH264*) h264->pSystemData; struct timeval T1,T2; @@ -147,7 +118,7 @@ static int openh264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSiz */ if (sBufferInfo.iBufferStatus != 1) - state = (*sys->pDecoder)->DecodeFrame2(sys->pDecoder, NULL, 0, pYUVData, &sBufferInfo); + state = (*sys->pDecoder)->DecodeFrame2(sys->pDecoder, NULL, 0, h264->pYUVData, &sBufferInfo); gettimeofday(&T2,NULL); printf("OpenH264: decoding took: %u sec %u usec\n",(unsigned int)(T2.tv_sec-T1.tv_sec),(unsigned int)(T2.tv_usec-T1.tv_usec)); @@ -164,17 +135,19 @@ static int openh264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSiz if (state != 0) return -1; - if (!h264->pYUVData[0] || !h264->pYUVData[1] || !h264->pYUVData[2]) - return -1; - if (sBufferInfo.iBufferStatus != 1) - return -1; + return -2; if (pSystemBuffer->iFormat != videoFormatI420) return -1; + if (!h264->pYUVData[0] || !h264->pYUVData[1] || !h264->pYUVData[2]) + return -1; + h264->iStride[0] = pSystemBuffer->iStride[0]; h264->iStride[1] = pSystemBuffer->iStride[1]; + h264->iStride[2] = pSystemBuffer->iStride[1]; + h264->width = pSystemBuffer->iWidth; h264->height = pSystemBuffer->iHeight; @@ -305,16 +278,11 @@ struct _H264_CONTEXT_LIBAVCODEC }; typedef struct _H264_CONTEXT_LIBAVCODEC H264_CONTEXT_LIBAVCODEC; -static int libavcodec_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize, - BYTE* pDstData, DWORD DstFormat, int nDstStep, int nXDst, int nYDst, int nWidth, int nHeight) +static int libavcodec_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize) { int status; - int srcStep[3]; int gotFrame = 0; AVPacket packet; - prim_size_t roi; - const BYTE* pSrc[3]; - primitives_t* prims = primitives_get(); H264_CONTEXT_LIBAVCODEC* sys = (H264_CONTEXT_LIBAVCODEC*) h264->pSystemData; struct timeval T1,T2; @@ -346,22 +314,19 @@ static int libavcodec_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcS if (gotFrame) { - if (h264_prepare_rgb_buffer(h264, sys->videoFrame->width, sys->videoFrame->height) < 0) - return -1; + h264->pYUVData[0] = sys->videoFrame->data[0]; + h264->pYUVData[1] = sys->videoFrame->data[1]; + h264->pYUVData[2] = sys->videoFrame->data[2]; - roi.width = h264->width; - roi.height = h264->height; + h264->iStride[0] = sys->videoFrame->linesize[0]; + h264->iStride[1] = sys->videoFrame->linesize[1]; + h264->iStride[2] = sys->videoFrame->linesize[2]; - pSrc[0] = sys->videoFrame->data[0]; - pSrc[1] = sys->videoFrame->data[1]; - pSrc[2] = sys->videoFrame->data[2]; - - srcStep[0] = sys->videoFrame->linesize[0]; - srcStep[1] = sys->videoFrame->linesize[1]; - srcStep[2] = sys->videoFrame->linesize[2]; - - prims->YUV420ToRGB_8u_P3AC4R(pSrc, srcStep, h264->data, h264->scanline, &roi); + h264->width = sys->videoFrame->width; + h264->height = sys->videoFrame->height; } + else + return -2; return 1; } @@ -482,6 +447,8 @@ int h264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize, int* iStride; int ret, i, cx, cy; int UncompressedSize; + primitives_t *prims = primitives_get(); + prim_size_t roi; struct timeval T1,T2; @@ -489,24 +456,24 @@ int h264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize, return -1; #if 0 - printf("h264_decompress: pSrcData=%p, SrcSize=%u, pDstData=%p, nDstStep=%d, nXDst=%d, nYDst=%d, nWidth=%d, nHeight=%d)\n", - pSrcData, SrcSize, *ppDstData, nDstStep, nXDst, nYDst, nWidth, nHeight); + printf("h264_decompress: pSrcData=%p, SrcSize=%u, pDstData=%p, nDstStep=%d, nDstHeight=%d, numRegionRects=%d\n", + pSrcData, SrcSize, *ppDstData, nDstStep, nDstHeight, numRegionRects); #endif if (!(pDstData = *ppDstData)) return -1; -<<<<<<< HEAD - if (h264->subsystem->Decompress(h264, pSrcData, SrcSize, - pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight)) - return -1; + if ((ret = h264->subsystem->Decompress(h264, pSrcData, SrcSize)) < 0) + return ret; UncompressedSize = h264->width * h264->height * 4; if (UncompressedSize > (nDstStep * nDstHeight)) return -1; + pYUVData = h264->pYUVData; + iStride = h264->iStride; gettimeofday(&T1,NULL); for (i = 0; i < numRegionRects; i++){ @@ -517,32 +484,18 @@ int h264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize, pDstPoint = pDstData + rect->top * nDstStep + rect->left * 4; pYUVPoint[0] = pYUVData[0] + rect->top * iStride[0] + rect->left; - ret = rect->top/2 * iStride[1] + rect->left/2; - pYUVPoint[1] = pYUVData[1] + ret; - pYUVPoint[2] = pYUVData[2] + ret; + pYUVPoint[1] = pYUVData[1] + rect->top/2 * iStride[1] + rect->left/2; + pYUVPoint[2] = pYUVData[2] + rect->top/2 * iStride[2] + rect->left/2; #if 0 printf("regionRect: x: %d, y: %d, cx: %d, cy: %d\n", rect->left, rect->top, cx, cy); #endif -#ifdef WITH_H264_SSSE3 - freerdp_image_yuv420p_to_xrgb_ssse3(pDstPoint, pYUVPoint, cx, cy, iStride, nDstStep); -#else -/* roi.width = h264->width; - roi.height = h264->height; + roi.width = cx; + roi.height = cy; - pSrc[0] = sys->videoFrame->data[0]; - pSrc[1] = sys->videoFrame->data[1]; - pSrc[2] = sys->videoFrame->data[2]; - - srcStep[0] = sys->videoFrame->linesize[0]; - srcStep[1] = sys->videoFrame->linesize[1]; - srcStep[2] = sys->videoFrame->linesize[2]; - - prims->YUV420ToRGB_8u_P3AC4R(pSrc, srcStep, h264->data, h264->scanline, &roi) - */ -#endif + prims->YUV420ToRGB_8u_P3AC4R((const BYTE**) pYUVPoint, iStride, pDstPoint, nDstStep, &roi); } gettimeofday(&T2,NULL); printf("converting took %u sec %u usec\n",(unsigned int)(T2.tv_sec-T1.tv_sec),(unsigned int)(T2.tv_usec-T1.tv_usec)); @@ -582,24 +535,12 @@ H264_CONTEXT* h264_context_new(BOOL Compressor) h264 = (H264_CONTEXT*) calloc(1, sizeof(H264_CONTEXT)); -#ifdef WITH_H264_SSSE3 - if(freerdp_check_ssse3()){ - printf("SSSE3 seems to be not supported on this system, try without WITH_H264_SSSE3 ..."); - return NULL; - } -#endif - if (h264) { h264->Compressor = Compressor; h264->subsystem = &g_Subsystem_dummy; -#ifdef WITH_LIBAVCODEC - if (h264_prepare_rgb_buffer(h264, 256, 256) < 0) - return NULL; -#endif - if (!h264_context_init(h264)) { free(h264); @@ -614,10 +555,6 @@ void h264_context_free(H264_CONTEXT* h264) { if (h264) { -#ifdef WITH_LIBAVCODEC - _aligned_free(h264->data); -#endif - h264->subsystem->Uninit(h264); free(h264); diff --git a/libfreerdp/codec/test/Makefile.TestOpenH264ASM32 b/libfreerdp/codec/test/Makefile.TestOpenH264ASM32 deleted file mode 100644 index 2a0308db4..000000000 --- a/libfreerdp/codec/test/Makefile.TestOpenH264ASM32 +++ /dev/null @@ -1,17 +0,0 @@ -TestOpenH264ASM: TestOpenH264ASM.c.o h264.c.o h264_ssse3.asm.o h264.asm.o - gcc -o TestOpenH264ASM TestOpenH264ASM.c.o h264.c.o h264_ssse3.asm.o h264.asm.o -lwinpr - -h264_ssse3.asm.o: ../h264_ssse3_x32.asm - nasm -f elf32 -o h264_ssse3.asm.o ../h264_ssse3_x32.asm - -h264.asm.o: ../h264_x32.asm - nasm -f elf32 -o h264.asm.o ../h264_x32.asm - -TestOpenH264ASM.c.o: TestOpenH264ASM.c - gcc -c -o TestOpenH264ASM.c.o TestOpenH264ASM.c - -h264.c.o: ../h264.c - gcc -c -o h264.c.o ../h264.c - -clean: - rm -f TestOpenH264ASM TestOpenH264ASM.c.o h264_ssse3.asm.o h264.c.o h264.asm.o diff --git a/libfreerdp/codec/test/Makefile.TestOpenH264ASM64 b/libfreerdp/codec/test/Makefile.TestOpenH264ASM64 deleted file mode 100644 index 53e208b69..000000000 --- a/libfreerdp/codec/test/Makefile.TestOpenH264ASM64 +++ /dev/null @@ -1,17 +0,0 @@ -TestOpenH264ASM: TestOpenH264ASM.c.o h264.c.o h264_ssse3.asm.o h264.asm.o - gcc -o TestOpenH264ASM TestOpenH264ASM.c.o h264.c.o h264_ssse3.asm.o h264.asm.o -lwinpr - -h264_ssse3.asm.o: ../h264_ssse3_x64.asm - nasm -f elf64 -o h264_ssse3.asm.o ../h264_ssse3_x64.asm - -h264.asm.o: ../h264_x64.asm - nasm -f elf64 -o h264.asm.o ../h264_x64.asm - -TestOpenH264ASM.c.o: TestOpenH264ASM.c - gcc -c -o TestOpenH264ASM.c.o TestOpenH264ASM.c - -h264.c.o: ../h264.c - gcc -c -o h264.c.o ../h264.c - -clean: - rm -f TestOpenH264ASM TestOpenH264ASM.c.o h264_ssse3.asm.o h264.c.o h264.asm.o diff --git a/libfreerdp/codec/test/Makefile.TestOpenH264SSSE3 b/libfreerdp/codec/test/Makefile.TestOpenH264SSSE3 deleted file mode 100644 index 7709e9423..000000000 --- a/libfreerdp/codec/test/Makefile.TestOpenH264SSSE3 +++ /dev/null @@ -1,14 +0,0 @@ -TestOpenH264: TestOpenH264ASM.c.o h264.c.o h264_ssse3.c.o - gcc -o TestOpenH264 TestOpenH264ASM.c.o h264.c.o h264_ssse3.c.o -lwinpr - -h264_ssse3.c.o: ../h264_ssse3.c - gcc -c -O3 -o h264_ssse3.c.o ../h264_ssse3.c -mssse3 - -TestOpenH264ASM.c.o: TestOpenH264ASM.c - gcc -c -o TestOpenH264ASM.c.o TestOpenH264ASM.c - -h264.c.o: ../h264.c - gcc -c -o h264.c.o ../h264.c - -clean: - rm -f TestOpenH264 TestOpenH264ASM.c.o h264_ssse3.asm.o h264.c.o h264.asm.o diff --git a/libfreerdp/codec/test/TestOpenH264 b/libfreerdp/codec/test/TestOpenH264 deleted file mode 100755 index c92bd5af2190f0d681727a24c74e78bfb62ea1c6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 15584 zcmcIre{@vUoxd{^AVkargc=d$iIbM>mJldlRID>F4_|bm6o}OpoP=Zo(Ik_d9}-$3 zamRS_c8Jnd_fWT-0%H;=e>8gs-6{Ai-pNxW#48*4d*#IWZW6+|2iiHR>pFflg(zc z*hC-`a5*`LVk0Hvm@qBlTp?#eoI`1=0IW_UA>vsEg&d3^J*1fO8w*a6G~&1C7^I9D zQ#e3+o>VKmXbw{hZ%WzkbQ0`3<%68tR+E zE%RE+mdsnSs3hnwS;+k_fI%wa9d}o9)3QEhf}*^qGcmmQZ3Q;?u`z82S2xURvKkLzk&WdCcpGJ~jg4pV<1 zA7mgO^CW@zN?cUlNw}yySa{ku3Q9EOw~u z3a-qc0H7-~dO5h5v%y0_8cy_+b zf=|xk=iV&x4`#vN%Yxqp`vvSwva6^H5@VrE;5#z#+q2+S;8W47rMw*Hzk$hVYzpg| z>>M+W)YBZdGtYJDIK#ZKslKT>KvThjk_F7`-O%W7@&-fIfsohBymvnAUFF+Q9}M{d z%NweLL0^!0%hyzRYkdI$uc=tx;BWG+sb1gUGkD{3svGJzH2G@1bpfA`l?PV_{58H{ z&>vXgs}6+&zLf!A(AN}FTbQ@5zNtRD1oMI&VqR&lvAUtbUqc0{p+YQSb#)El;6_&G zZ}v5@=5Q#;Y7CKJD9`|?(bpLCg;-rpgFonF!4QfPV0D`U^&ubI;0uN78-4z|+Um`y z3o!0Q*t62Of zHvS|gDWWa>&xmOrvamOyOY<0o@XUHlS5~$X?M|WJQN?4LtLTzJ_6M3PY0i^jRLIlZ zC`=eE(ccIcOBlyaC3QHC_aJOTxkSrp4BbiAIBoT^c5=~8;=;QP>qNzy}F;3?Z zO--5T;>5qt}CQVdv`j13YQzlk& z`Y6%Vgb637-z1uvE>X(q14L7kB@|BYC7PD-L;4f=|lzBM)f$M_S~>@X9siJ+ZD?PNwyA$9U9p@d_u4-vK)#+qY9) zYtae+d5yxf_E^Z8?BkWb-5mD(6b)6W&D()Ui;U)L`pHe!bqDt#Kdv;gZa=Y-1NPZ% zgzX_p%6E_2Ag}2MwC>ZlYnDDuKdc?O6o&p@lGWOehNS*SLci^{1l(casg=--KLSVB z9iH$3Aj`Rf)c#uJw%n^B5V|(y!qm$B19RY=elpp&F7ZRCBp$|X`*t$1T6=V87xLoL z@#|6VDCu03`iHQo>D_y&Q|_bgvwtI&i1-%N;U3^6 zS^Ts??J=n11{K>!EwZ2e@(~smJB0 z?$h6-WXD9P?SWdSW^0exweIdwD=b(JXweE`&C+Y%%{r`*i(M|)$K@)Aw3?%u+DSU1vb9N8{p4n)=WT>4*J`mla@e;YcG(a-9>G|HewVpdeMefKal z`$1Vix$V1SL~_O`QUfWmUO|zVeK%S`9pI^$zR02K-7b9;*~C)WBri}S#HqWAE;INL z^_3dZ?8kt!?=Il9VV5^~*@4h<=umxSh$6?{6g0rK$q7KnbU2miKOxh>D`jVR@)7pP zwn%H7h4S`ac|8Wu3t%I6v`|m{Gb6o+(jOb?7^VMUr28rTmXRKy^lVfcuR!h5Lo{f= zzwRK7VZHTYr1c`(Jc;uLuX?ogB8{O(Chb8xCI@X%^`aq&2~E+@=zY$P)}fAyc!zqz zz8l`F7ZJrHu{K>DMd}1kjX(?I=cC-HI#M5d3Uz4di^SWyM|0uGp?3m5b`q;5_n+v(f88~o`@-0B(EQajl14x{i z-1i1ADrPGMy^yL${M`s+Hu25GVCg{js1=YNdHb|&4qq}|MFErXXf8Szo zsV-L&h5|?1$p-Hd73gg3WS7r(B=iIcMb%MCzhp|+A)Ru_X*kqRxxf4#-Jj_rlX_<_ zqUem<4DSYrb7XJXW!~wd5bkUze#*O-3Dea$GLO?NOzX2T3*Swh=seibPRb?v4CN@) zT^OoI_O7}thi{DKKqcwm#%ZpL%Y12w5bHT&i9Bf!VB~o}<@t}C)lS9Kq7YU`d(;t# zu)zhI}qaNo@N7Vu?ihs2*%Iv>V=V~>*QB|R=dijc~4n)=G z!FVxRxvTplD;8Nk`nwEfyhk0TG9-#&3`Pnt)#SonR2`01%KZ0 z!HXyywHLJ~s`l^IotzwqsKcm~;ak<8*tfkUHv`xgRf)-n<`^^>dTn?Bs)#XsP{Jq{ zvXR5;Id)h*i|Y& zHI{lj+D_S`lNa2o9uJCKU!xvv-Z;2WTDfz&158@WuxpzBIt>66w{WaK_t{@@=Gb4M zu8kzKzY-HGy!y5QIVYyLPAjpEErHrA*o4`C#oZD;*f0ZssT zlQQ;7Zy7tO{aq=ucR_`3u(8Hduam<{Z^bGU9@eqx)GG!&y4ugB#KI1)X^C$g+DAX2 z_R|Cz{{{I#gD}!M%0e^X#b~s0$mr(OyI$knQXLiRS!AdH!&gX8A!!m)VdRa2_~App ziT2a4$M{QUlF9biUdkghbwB+;i*E(A;3ziKPh*d5{8n;(*LAN;f5%nn(%*NjDc48% z4}nUL{`zW7|8lmEm8d_Hn<>!&NCQydjh>|`S!*(#!I9R>Oaq4{kWtaWz( zgryUa-Hf&iHX`gq&T_<4|6O@S8;FWlxN@yEB<|_XPWiV4`~DCDt=pSKkw?iJ86I70W84o z&IgkfH76=+VjjKLQQ&$F>ps(JlI4-tEp=|Y|2hRuROp{-`YeYB<;gAYK9$pcw8DPR zsq*fV)(Xo&`%#bXb||hrw6p?$9j;$_0l%$1mL5&t;=tmy#ZgGMW}E;@i7N+@ARdP& zd_pkTVZ`>?ZpM~(F1_7@`f7i}^*d@74+@;?w!4$0ytoNs9{nx#hOac+T1Vjmj?SZB z^L6&mjwQZCZHeENbuN1oRdCT2V3tP?A*o6OvqX6ff=W#;N?+*aGSZ6;Oud~;m ztg}~tMKWi(%=^GjPB>-geGWe83m@US0J-5h`=!%prd@6D6zQ{+tF1h8(8#S>itiU+Pa4xVMj3+?o`3r(;Q^$8z%@K$WqNU3T|~>ouxFmoft5hj0x+>r|aRFp^7ZKmx<) zZl4}Rc|NPNpY8_mIkAo%06GXH!EGf#eGKYDPNB92(b9v5?CrmVb@j1N&bjPcE#YQQ zO&HXPy5;sJM=2MD67q%LCqPoJZZhO1ORD`CRb!9I2jowl!;4FBIB=d^JBMt~aCjJc zg9CIs6~6x*YUalBNOwUw_AgrGV8P879;tPn@3Gh)vtr&lXZLqen?TIJlPVBES>gkO zvy%qZS(kmdxXykIb?|4@kK(j1$c+e5C#8PLsY{3x!OVZwc6MqKK6TkgaOjVIXWeZ~8U*}ruTIYJe z<)ul-6wV#2qfla>{TTeEq0!*zr|~$TbCtB+$^dO~pq9Mmz4+>IBF3Y)*&wAW5<7N?0PlI?u{w<}+A5wzh=4O8& zM7Wo~IpQEAzI#Y%lh3HkjGDMeu==&PY9jQE>*kmHmIEHUYaHu@Ai zCl5D-lugw^rRYB6KTk3A1PP6b^3|&=Tx*tVN_9vns=Y-iT6mvw!)+AhG2g?!z~<1# z`lb!aJ$D)Y&kr>=&)?MG-!OlD&|6envT@;(MQqc0Him0Jh|}EM$W|`SV@0*Fo5vO| zMw+p^T^^+pkL49trQ-HA)HmW`df*nNzP1KgLyTSt_?uSwYC^XtEd+g2*=$5Lf~?^* z*`iS6Z-SE{a&eq`%Hxj&(J^@-c}h?$gTXOjZLH0LKP&!h_yd{tSIW}=+F;0h9kZnW zJ!MJ%Yhmesu#WM9%ZiJ#W8~i0#qo*Y<>AUV#7tzHHYOI{Ol^g1UL(E zDdM$=S0Zjh+>E#jaR*|C;o6V50P!%<2=bY>W{Yxp@G{7+ElM+uO2tpCOHAv=q}|LHmGqNolfpUT@}OcIZ-@|o3-}kn=VL_LQOEc{0sjQ}PP6_G$N2P1A?JKDd4rk1*5FgS z9tS@c{Q1Op=NCMfpUXT*jsRDHFUtY3EWhAM>$3d9Cv0lI(wU>?&)qhmJiqkuiSGQe z$RtmGvn@X-zifGasVjf(vU~-S&{zgbMxcD$s0Rg)yQn&3pn6&hpb)rD;CHr7czmKa z-?<_(=}GGowoc016#DS|ojPewKoKFG}zFDah|dt~E&h zT>&L$3c7zz6;DySP>|)5ZdO^HKpFe|QqoVcVslc&u?agm}faHvSS!doDK=TF!TQ&$?tSBGx}h%lS8LOd|t#8^9=lJBL2FFmxy?Ui0>6~ zoroV6@#7-?7ZLxvhh*|20VCchQ+Pq`3pBG5>_U7cu4U!(Le(1twV^3ovOZjo&w%P{ zSqV`atMRJ5q;_)?>=|(=U~sVh2C-(R5nk{DzJ_X25UJ*d5G&ztPDY;V=Z(gWoUS-0|C5d=NdBoU=74zy}Gf!23h-I5VsPZi(wIO+*wJD zzp)Wt3uIR3-%Qi^roJo34YN8yAqYT@Zvz~gUnG4a5;TUS{w%8#6y|dQD4ZI*$@xJd z3y?7DFSR;3YpKw8aBA$9sjq=WbBfHr%IXBgSd{=`oErN^AXm`RjvF+lcE8k@^OPbc zmZe$r1u#tREBeV@&Aty_^(A zOwU3n$oOfb&HD0t(qXZXtu=o%VbUWT6rROp)|czB=Q_?#`$mtDO#gQRBYj!_Ro6K| zq5lhg+Si(c^jl)PkTC1Z?=R(dnN&V=$jpB?m}Y%VadL4@;SqZ*BY)X`2A8oI64XXw zxTN+S^83|H`AnMjuy&Ka+?Vvv0f>+(pGnj23G!d&PhSvF82Bso{{&g;ds1Jn!*QWM zOvVw!a7g(h+u;;==KSS%P5ln8mXbH9vGT}r3})!deb9i=-z61AEcIkOlA+&GRK+Db ziVOwmr$LS7FX;4mW$ZqFQwxjvbV*-$HT-ATtM zGr69m<9ST36X`fsQaS&d7`Ab~=OnfbD}tP_P2#}wQ{VNc;UzM7_((&m`&P(a|49qiCW{9V4_!3;sBW9)zUuetuAssJF{r*YE zuVHdLr{iPa&8Fk>*WMV49*EwS&t_fW90QS}ahHSdc+)@-vv67P^?PQyih+|IXNH|` z3%of4|Bk>jf8TE4_Ghx4VxA%YC_IeIf~PZ?_52)g%C|4W{?EDms(@B;!rgV=&!(a#J2<@8|8gEQH#47>mZBs(t@IJt!%2|Etp zm;^;~YZm;+9KU>i`)L;WUuVG& zXTeW$JfA%;%0=rTg>wS`*J(~p^Jj%feGQAE!jbgbLf}--Qwp7&af4vHfltM*TF%3A zT&(8u={AV+Wx+QCS1uQ4PXR9&r@!swcJf)N@Mpfz{f)q9&v0^tKf6SVp4m>dUXwa^ zAjDapz_*L>EAc-GJM`Ht1)bmVyj0*-B47S&5xl7=IGvA>`zRsKt^-c>Rx#blA^xlt zsc&;!N{z*S9S z$2>SH5ejT(#)F~SaAPB#wJ=fqgh!g_8ecGU56*MYTPWA+3V2U%s1VgO)Hk#HEARD& z{N7b}EMptIB(j1YIhEH6${lWO=17ohd68qYm-M~-i4sro7k9i;R;+SWs9yDMHywZB zRV31PcHa`zW;7lMfq|@*@@F4M)u^$ z=J3OYALZH3#4zK~iPLF5U6NHACuoclF6r{(FiTA^Y@Dx=M_DqY43D{Ycn3#M^t^O1 z2j^`}y(r7UAUbQ4Zq9t1CY@tAVVvDb=kRko>Cj~danjks+s0;i8bq)1`2w}g-ulMs z4L)y;zj-rN@uG#L&D2L)0vp!af_x`A}IW}~m>AusQQqL2D$Oz{GFz0gl}$l9RayRo{dmJVAP=aZ;O`2$#K z%qV$Ap@nhti6vfCIA7oFZNhL4^Nt+G(Jt}+R-Bv4C>R}cGR{t=GmL{&>6meDUc3?b@heeGe!eT+=>G!h9cxDb diff --git a/libfreerdp/codec/test/TestOpenH264ASM.c b/libfreerdp/codec/test/TestOpenH264ASM.c deleted file mode 100644 index 040b1650d..000000000 --- a/libfreerdp/codec/test/TestOpenH264ASM.c +++ /dev/null @@ -1,92 +0,0 @@ -#include -#include -#include - -#include - -#include "TestOpenH264ASM.h" - -#define WIDTH 1920 -#define HEIGHT 1080 - -#define SSSE3 1 - - -int main(void){ - int i,j,k; - int ret; - unsigned char *pDstData_c,*pDstData_asm,*pSrcData[3]; - int nSrcStep[2]; - -#if SSSE3 - if(freerdp_check_ssse3()){ - fprintf(stderr,"ssse3 not supported!\n"); - return EXIT_FAILURE; - } -#endif - - struct timeval t1,t2,t3; - - pSrcData[0]=malloc(1984*HEIGHT*sizeof(char)); - pSrcData[1]=malloc(1984*HEIGHT/4*sizeof(char)); - pSrcData[2]=malloc(1984*HEIGHT/4*sizeof(char)); - pDstData_asm=_aligned_malloc(WIDTH*(HEIGHT+1)*4*sizeof(char),16); - pDstData_c=malloc(WIDTH*(HEIGHT+1)*4*sizeof(char)); - - memset(pDstData_asm,0xFF,WIDTH*(HEIGHT+1)*4*sizeof(char)); - memset(pDstData_c,0xFF,WIDTH*(HEIGHT+1)*4*sizeof(char)); - - for(i=0;iwidth & 0x01; + last_line = roi->height & 0x01; + + nWidth = (roi->width + 1) & ~0x0001; + nHeight = (roi->height + 1) & ~0x0001; - halfWidth = roi->width / 2; - halfHeight = roi->height / 2; + halfWidth = nWidth / 2; + halfHeight = nHeight / 2; - srcPad[0] = (srcStep[0] - roi->width); + srcPad[0] = (srcStep[0] - nWidth); srcPad[1] = (srcStep[1] - halfWidth); srcPad[2] = (srcStep[2] - halfWidth); - dstPad = (dstStep - (roi->width * 4)); + dstPad = (dstStep - (nWidth * 4)); - for (y = 0; y < halfHeight; y++) + for (y = 0; y < halfHeight; ) { - for (x = 0; x < halfWidth; x++) + y++; + if (y == halfHeight) + last_line = last_line << 1; + + for (x = 0; x < halfWidth; ) { + x++; + if (x == halfWidth) + last_column = last_column << 1; + U = *pU++; V = *pV++; @@ -105,32 +121,41 @@ pstatus_t general_YUV420ToRGB_8u_P3AC4R(const BYTE* pSrc[3], int srcStep[3], /* 2nd pixel */ - Y = *pY++; - Yp = Y << 8; + if (!(last_column & 0x02)) + { + Y = *pY++; + Yp = Y << 8; - R = (Yp + Vp403) >> 8; - G = (Yp - Up48 - Vp120) >> 8; - B = (Yp + Up475) >> 8; + R = (Yp + Vp403) >> 8; + G = (Yp - Up48 - Vp120) >> 8; + B = (Yp + Up475) >> 8; - if (R < 0) - R = 0; - else if (R > 255) - R = 255; + if (R < 0) + R = 0; + else if (R > 255) + R = 255; - if (G < 0) - G = 0; - else if (G > 255) - G = 255; + if (G < 0) + G = 0; + else if (G > 255) + G = 255; - if (B < 0) - B = 0; - else if (B > 255) - B = 255; + if (B < 0) + B = 0; + else if (B > 255) + B = 255; - *pRGB++ = (BYTE) B; - *pRGB++ = (BYTE) G; - *pRGB++ = (BYTE) R; - *pRGB++ = 0xFF; + *pRGB++ = (BYTE) B; + *pRGB++ = (BYTE) G; + *pRGB++ = (BYTE) R; + *pRGB++ = 0xFF; + } + else + { + pY++; + pRGB += 4; + last_column = last_column >> 1; + } } pY += srcPad[0]; @@ -138,8 +163,12 @@ pstatus_t general_YUV420ToRGB_8u_P3AC4R(const BYTE* pSrc[3], int srcStep[3], pV -= halfWidth; pRGB += dstPad; - for (x = 0; x < halfWidth; x++) + for (x = 0; x < halfWidth; ) { + x++; + if (x == halfWidth) + last_column = last_column << 1; + U = *pU++; V = *pV++; @@ -183,32 +212,41 @@ pstatus_t general_YUV420ToRGB_8u_P3AC4R(const BYTE* pSrc[3], int srcStep[3], /* 4th pixel */ - Y = *pY++; - Yp = Y << 8; + if(!(last_column & 0x02)) + { + Y = *pY++; + Yp = Y << 8; - R = (Yp + Vp403) >> 8; - G = (Yp - Up48 - Vp120) >> 8; - B = (Yp + Up475) >> 8; + R = (Yp + Vp403) >> 8; + G = (Yp - Up48 - Vp120) >> 8; + B = (Yp + Up475) >> 8; - if (R < 0) - R = 0; - else if (R > 255) - R = 255; + if (R < 0) + R = 0; + else if (R > 255) + R = 255; - if (G < 0) - G = 0; - else if (G > 255) - G = 255; + if (G < 0) + G = 0; + else if (G > 255) + G = 255; - if (B < 0) - B = 0; - else if (B > 255) - B = 255; + if (B < 0) + B = 0; + else if (B > 255) + B = 255; - *pRGB++ = (BYTE) B; - *pRGB++ = (BYTE) G; - *pRGB++ = (BYTE) R; - *pRGB++ = 0xFF; + *pRGB++ = (BYTE) B; + *pRGB++ = (BYTE) G; + *pRGB++ = (BYTE) R; + *pRGB++ = 0xFF; + } + else + { + pY++; + pRGB += 4; + last_column = last_column >> 1; + } } pY += srcPad[0]; @@ -223,6 +261,8 @@ pstatus_t general_YUV420ToRGB_8u_P3AC4R(const BYTE* pSrc[3], int srcStep[3], void primitives_init_YUV(primitives_t* prims) { prims->YUV420ToRGB_8u_P3AC4R = general_YUV420ToRGB_8u_P3AC4R; + + primitives_init_YUV_opt(prims); } void primitives_deinit_YUV(primitives_t* prims) diff --git a/libfreerdp/primitives/prim_YUV.h b/libfreerdp/primitives/prim_YUV.h index 12f796b61..99428ada6 100644 --- a/libfreerdp/primitives/prim_YUV.h +++ b/libfreerdp/primitives/prim_YUV.h @@ -22,6 +22,7 @@ pstatus_t general_yCbCrToRGB_16s8u_P3AC4R(const INT16* pSrc[3], int srcStep, BYTE* pDst, int dstStep, const prim_size_t* roi); void primitives_init_YUV(primitives_t* prims); +void primitives_init_YUV_opt(primitives_t* prims); void primitives_deinit_YUV(primitives_t* prims); #endif /* FREERDP_PRIMITIVES_YUV_H */ diff --git a/libfreerdp/codec/h264_ssse3.c b/libfreerdp/primitives/prim_YUV_opt.c similarity index 80% rename from libfreerdp/codec/h264_ssse3.c rename to libfreerdp/primitives/prim_YUV_opt.c index 1774856b4..4b5cea145 100644 --- a/libfreerdp/codec/h264_ssse3.c +++ b/libfreerdp/primitives/prim_YUV_opt.c @@ -1,32 +1,32 @@ /** function for converting YUV420p data to the RGB format (but without any special upconverting) * It's completely written in nasm-x86-assembly for intel processors supporting SSSE3 and higher. - * The target scanline (6th parameter) must be a multiple of 16. - * iStride[0] must be (target scanline) / 4 or bigger and iStride[1] the next multiple of four - * of the half of iStride[0] or bigger + * The target dstStep (6th parameter) must be a multiple of 16. + * srcStep[0] must be (target dstStep) / 4 or bigger and srcStep[1] the next multiple of four + * of the half of srcStep[0] or bigger */ #include -#include -//#include -#include +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif #include #include - -int freerdp_check_ssse3() -{ - if(IsProcessorFeaturePresentEx(PF_EX_SSSE3)&&IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) - return 0; - - return 1; -} +#include +#include -int freerdp_image_yuv420p_to_xrgb_ssse3(BYTE *pDstData,BYTE **pSrcData,int nWidth,int nHeight,int *iStride,int scanline) +#ifdef WITH_SSE2 + +#include +#include + +pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(const BYTE **pSrc, int *srcStep, + BYTE *pDst, int dstStep, const prim_size_t *roi) { char last_line,last_column; - int i,VaddDst,VaddY,VaddUV; + int i,nWidth,nHeight,VaddDst,VaddY,VaddU,VaddV; BYTE *UData,*VData,*YData; @@ -37,9 +37,12 @@ int freerdp_image_yuv420p_to_xrgb_ssse3(BYTE *pDstData,BYTE **pSrcData,int nWidt buffer=_aligned_malloc(4*16,16); - YData=pSrcData[0]; - UData=pSrcData[1]; - VData=pSrcData[2]; + YData=(BYTE *)pSrc[0]; + UData=(BYTE *)pSrc[1]; + VData=(BYTE *)pSrc[2]; + + nWidth=roi->width; + nHeight=roi->height; if((last_column=nWidth&3)){ @@ -48,7 +51,7 @@ int freerdp_image_yuv420p_to_xrgb_ssse3(BYTE *pDstData,BYTE **pSrcData,int nWidt case 2: r7=_mm_set_epi32(0,0,0xFFFFFFFF,0xFFFFFFFF); break; case 3: r7=_mm_set_epi32(0,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF); break; } - _mm_store_si128(buffer+48,r7); + _mm_store_si128(buffer+3,r7); last_column=1; } @@ -61,10 +64,10 @@ int freerdp_image_yuv420p_to_xrgb_ssse3(BYTE *pDstData,BYTE **pSrcData,int nWidt nHeight=nHeight>>1; - VaddDst=(scanline<<1)-(nWidth<<4); - VaddY=(iStride[0]<<1)-(nWidth<<2); - VaddUV=iStride[1]-(((nWidth<<1)+2)&0xFFFC); - + VaddDst=(dstStep<<1)-(nWidth<<4); + VaddY=(srcStep[0]<<1)-(nWidth<<2); + VaddU=srcStep[1]-(((nWidth<<1)+2)&0xFFFC); + VaddV=srcStep[2]-(((nWidth<<1)+2)&0xFFFC); while(nHeight-- >0){ @@ -129,7 +132,7 @@ int freerdp_image_yuv420p_to_xrgb_ssse3(BYTE *pDstData,BYTE **pSrcData,int nWidt r1=_mm_add_epi32(r1,r6); r7=_mm_add_epi32(r7,r6); - _mm_store_si128(buffer+16,r7); + _mm_store_si128(buffer+1,r7); /* Now we've prepared U-data. Preparing V-data is actually the same, just with other coefficients */ r2=_mm_cvtsi32_si128(*(UINT32 *)VData); @@ -153,7 +156,7 @@ int freerdp_image_yuv420p_to_xrgb_ssse3(BYTE *pDstData,BYTE **pSrcData,int nWidt r2=_mm_add_epi32(r2,r6); r7=_mm_add_epi32(r7,r6); - _mm_store_si128(buffer+32,r7); + _mm_store_si128(buffer+2,r7); @@ -170,8 +173,8 @@ int freerdp_image_yuv420p_to_xrgb_ssse3(BYTE *pDstData,BYTE **pSrcData,int nWidt _mm_store_si128(buffer,r4); }else{ - r1=_mm_load_si128(buffer+16); - r2=_mm_load_si128(buffer+32); + r1=_mm_load_si128(buffer+1); + r2=_mm_load_si128(buffer+2); r0=_mm_load_si128(buffer); } @@ -220,17 +223,17 @@ int freerdp_image_yuv420p_to_xrgb_ssse3(BYTE *pDstData,BYTE **pSrcData,int nWidt if(last_column&0x02){ - r6=_mm_load_si128(buffer+48); + r6=_mm_load_si128(buffer+3); r4=_mm_and_si128(r4,r6); - r5=_mm_lddqu_si128((__m128i *)pDstData); + r5=_mm_lddqu_si128((__m128i *)pDst); r6=_mm_andnot_si128(r6,r5); r4=_mm_or_si128(r4,r6); } - _mm_storeu_si128((__m128i *)pDstData,r4); + _mm_storeu_si128((__m128i *)pDst,r4); //Y data processing in secound line if(!(last_line&0x02)){ - r4=_mm_cvtsi32_si128(*(UINT32 *)(YData+iStride[0])); + r4=_mm_cvtsi32_si128(*(UINT32 *)(YData+srcStep[0])); r7=_mm_set_epi32(0x80800380,0x80800280,0x80800180,0x80800080); r4=_mm_shuffle_epi8(r4,r7); @@ -271,28 +274,40 @@ int freerdp_image_yuv420p_to_xrgb_ssse3(BYTE *pDstData,BYTE **pSrcData,int nWidt if(last_column&0x02){ - r6=_mm_load_si128(buffer+48); + r6=_mm_load_si128(buffer+3); r4=_mm_and_si128(r4,r6); - r5=_mm_lddqu_si128((__m128i *)(pDstData+scanline)); + r5=_mm_lddqu_si128((__m128i *)(pDst+dstStep)); r6=_mm_andnot_si128(r6,r5); r4=_mm_or_si128(r4,r6); last_column=last_column>>1; } - _mm_storeu_si128((__m128i *)(pDstData+scanline),r4); + _mm_storeu_si128((__m128i *)(pDst+dstStep),r4); } - pDstData+=16; + pDst+=16; YData+=4; }while(iYUV420ToRGB_8u_P3AC4R=ssse3_YUV420ToRGB_8u_P3AC4R; + } +#endif +} diff --git a/winpr/libwinpr/utils/collections/StreamPool.c b/winpr/libwinpr/utils/collections/StreamPool.c index 696ecd971..c95875fbe 100644 --- a/winpr/libwinpr/utils/collections/StreamPool.c +++ b/winpr/libwinpr/utils/collections/StreamPool.c @@ -155,8 +155,6 @@ wStream* StreamPool_Take(wStreamPool* pool, size_t size) Stream_SetPosition(s, 0); Stream_EnsureCapacity(s, size); - - Stream_SetLength(s,size); } s->pool = pool;