mirror of https://github.com/FreeRDP/FreeRDP
YUV data conversion of H.264 implementation (egfx):
only convert invalid areas SIMD SSSE3 conversion in primitives compiling all primitives sources with optimization and cleanup after last merge
This commit is contained in:
parent
cbc8b3a7e1
commit
7828725413
|
@ -486,7 +486,6 @@ int dvcman_receive_channel_data(IWTSVirtualChannelManager* pChannelMgr, UINT32 C
|
|||
int status = 0;
|
||||
DVCMAN_CHANNEL* channel;
|
||||
UINT32 dataSize = Stream_GetRemainingLength(data);
|
||||
wStream* s;
|
||||
|
||||
channel = (DVCMAN_CHANNEL*) dvcman_find_channel_by_id(pChannelMgr, ChannelId);
|
||||
|
||||
|
@ -499,7 +498,7 @@ int dvcman_receive_channel_data(IWTSVirtualChannelManager* pChannelMgr, UINT32 C
|
|||
if (channel->dvc_data)
|
||||
{
|
||||
/* Fragmented data */
|
||||
if (Stream_GetPosition(channel->dvc_data) + dataSize > (UINT32) Stream_Length(channel->dvc_data))
|
||||
if (Stream_GetPosition(channel->dvc_data) + dataSize > (UINT32) Stream_Capacity(channel->dvc_data))
|
||||
{
|
||||
CLOG_ERR("data exceeding declared length!");
|
||||
Stream_Release(channel->dvc_data);
|
||||
|
@ -513,11 +512,9 @@ int dvcman_receive_channel_data(IWTSVirtualChannelManager* pChannelMgr, UINT32 C
|
|||
{
|
||||
Stream_SealLength(channel->dvc_data);
|
||||
Stream_SetPosition(channel->dvc_data, 0);
|
||||
s=channel->dvc_data;
|
||||
status = channel->channel_callback->OnDataReceived(channel->channel_callback, channel->dvc_data);
|
||||
Stream_Release(channel->dvc_data);
|
||||
channel->dvc_data = NULL;
|
||||
|
||||
status = channel->channel_callback->OnDataReceived(channel->channel_callback, s);
|
||||
Stream_Release(s);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
|
|
@ -29,8 +29,7 @@ typedef struct _H264_CONTEXT H264_CONTEXT;
|
|||
typedef BOOL (*pfnH264SubsystemInit)(H264_CONTEXT* h264);
|
||||
typedef void (*pfnH264SubsystemUninit)(H264_CONTEXT* h264);
|
||||
|
||||
typedef int (*pfnH264SubsystemDecompress)(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize,
|
||||
BYTE* pDstData, DWORD DstFormat, int nDstStep, int nXDst, int nYDst, int nWidth, int nHeight);
|
||||
typedef int (*pfnH264SubsystemDecompress)(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize);
|
||||
|
||||
struct _H264_CONTEXT_SUBSYSTEM
|
||||
{
|
||||
|
@ -50,6 +49,9 @@ struct _H264_CONTEXT
|
|||
UINT32 width;
|
||||
UINT32 height;
|
||||
//int scanline;
|
||||
|
||||
BYTE* pYUVData[3];
|
||||
int iStride[3];
|
||||
|
||||
/*
|
||||
<<<<<<< HEAD
|
||||
|
|
|
@ -101,24 +101,6 @@ if(WITH_LIBAVCODEC)
|
|||
set(FREERDP_LIBAVCODEC_LIBS ${LIBAVCODEC_LIB} ${LIBAVUTIL_LIB})
|
||||
endif()
|
||||
|
||||
if(WITH_LIBAVCODEC OR WITH_OPENH264)
|
||||
if(WITH_H264_SSSE3)
|
||||
add_definitions(-DWITH_H264_SSSE3)
|
||||
set(${MODULE_PREFIX}_SRCS
|
||||
${${MODULE_PREFIX}_SRCS}
|
||||
h264_ssse3.c)
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCC)
|
||||
set(OPTIMIZATION "${OPTIMIZATION} -msse2 -mssse3")
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
set(OPTIMIZATION "${OPTIMIZATION} /arch:SSE2")
|
||||
endif()
|
||||
|
||||
set_property(SOURCE h264_ssse3.c PROPERTY COMPILE_FLAGS ${OPTIMIZATION})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
add_complex_library(MODULE ${MODULE_NAME} TYPE "OBJECT"
|
||||
MONOLITHIC ${MONOLITHIC_BUILD}
|
||||
|
|
|
@ -28,39 +28,14 @@
|
|||
#include <freerdp/primitives.h>
|
||||
#include <freerdp/codec/h264.h>
|
||||
|
||||
#ifdef WITH_LIBAVCODEC
|
||||
int h264_prepare_rgb_buffer(H264_CONTEXT* h264, int width, int height)
|
||||
{
|
||||
UINT32 size;
|
||||
#include <sys/time.h>
|
||||
|
||||
h264->width = width;
|
||||
h264->height = height;
|
||||
h264->scanline = h264->width * 4;
|
||||
size = h264->scanline * h264->height;
|
||||
|
||||
if (size > h264->size)
|
||||
{
|
||||
h264->size = size;
|
||||
|
||||
if (!h264->data)
|
||||
h264->data = (BYTE*) _aligned_malloc(h264->size, 16);
|
||||
else
|
||||
h264->data = (BYTE*) _aligned_realloc(h264->data, h264->size, 16);
|
||||
}
|
||||
|
||||
if (!h264->data)
|
||||
return -1;
|
||||
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Dummy subsystem
|
||||
*/
|
||||
|
||||
static int dummy_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize,
|
||||
BYTE* pDstData, DWORD DstFormat, int nDstStep, int nXDst, int nYDst, int nWidth, int nHeight)
|
||||
static int dummy_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
@ -107,13 +82,9 @@ static void openh264_trace_callback(H264_CONTEXT* h264, int level, const char* m
|
|||
|
||||
static int openh264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize)
|
||||
{
|
||||
int srcStep[3];
|
||||
prim_size_t roi;
|
||||
BYTE* pYUVData[3];
|
||||
DECODING_STATE state;
|
||||
SBufferInfo sBufferInfo;
|
||||
SSysMEMBuffer* pSystemBuffer;
|
||||
primitives_t* prims = primitives_get();
|
||||
H264_CONTEXT_OPENH264* sys = (H264_CONTEXT_OPENH264*) h264->pSystemData;
|
||||
|
||||
struct timeval T1,T2;
|
||||
|
@ -147,7 +118,7 @@ static int openh264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSiz
|
|||
*/
|
||||
|
||||
if (sBufferInfo.iBufferStatus != 1)
|
||||
state = (*sys->pDecoder)->DecodeFrame2(sys->pDecoder, NULL, 0, pYUVData, &sBufferInfo);
|
||||
state = (*sys->pDecoder)->DecodeFrame2(sys->pDecoder, NULL, 0, h264->pYUVData, &sBufferInfo);
|
||||
|
||||
gettimeofday(&T2,NULL);
|
||||
printf("OpenH264: decoding took: %u sec %u usec\n",(unsigned int)(T2.tv_sec-T1.tv_sec),(unsigned int)(T2.tv_usec-T1.tv_usec));
|
||||
|
@ -164,17 +135,19 @@ static int openh264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSiz
|
|||
if (state != 0)
|
||||
return -1;
|
||||
|
||||
if (!h264->pYUVData[0] || !h264->pYUVData[1] || !h264->pYUVData[2])
|
||||
return -1;
|
||||
|
||||
if (sBufferInfo.iBufferStatus != 1)
|
||||
return -1;
|
||||
return -2;
|
||||
|
||||
if (pSystemBuffer->iFormat != videoFormatI420)
|
||||
return -1;
|
||||
|
||||
if (!h264->pYUVData[0] || !h264->pYUVData[1] || !h264->pYUVData[2])
|
||||
return -1;
|
||||
|
||||
h264->iStride[0] = pSystemBuffer->iStride[0];
|
||||
h264->iStride[1] = pSystemBuffer->iStride[1];
|
||||
h264->iStride[2] = pSystemBuffer->iStride[1];
|
||||
|
||||
h264->width = pSystemBuffer->iWidth;
|
||||
h264->height = pSystemBuffer->iHeight;
|
||||
|
||||
|
@ -305,16 +278,11 @@ struct _H264_CONTEXT_LIBAVCODEC
|
|||
};
|
||||
typedef struct _H264_CONTEXT_LIBAVCODEC H264_CONTEXT_LIBAVCODEC;
|
||||
|
||||
static int libavcodec_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize,
|
||||
BYTE* pDstData, DWORD DstFormat, int nDstStep, int nXDst, int nYDst, int nWidth, int nHeight)
|
||||
static int libavcodec_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize)
|
||||
{
|
||||
int status;
|
||||
int srcStep[3];
|
||||
int gotFrame = 0;
|
||||
AVPacket packet;
|
||||
prim_size_t roi;
|
||||
const BYTE* pSrc[3];
|
||||
primitives_t* prims = primitives_get();
|
||||
H264_CONTEXT_LIBAVCODEC* sys = (H264_CONTEXT_LIBAVCODEC*) h264->pSystemData;
|
||||
|
||||
struct timeval T1,T2;
|
||||
|
@ -346,22 +314,19 @@ static int libavcodec_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcS
|
|||
|
||||
if (gotFrame)
|
||||
{
|
||||
if (h264_prepare_rgb_buffer(h264, sys->videoFrame->width, sys->videoFrame->height) < 0)
|
||||
return -1;
|
||||
h264->pYUVData[0] = sys->videoFrame->data[0];
|
||||
h264->pYUVData[1] = sys->videoFrame->data[1];
|
||||
h264->pYUVData[2] = sys->videoFrame->data[2];
|
||||
|
||||
roi.width = h264->width;
|
||||
roi.height = h264->height;
|
||||
h264->iStride[0] = sys->videoFrame->linesize[0];
|
||||
h264->iStride[1] = sys->videoFrame->linesize[1];
|
||||
h264->iStride[2] = sys->videoFrame->linesize[2];
|
||||
|
||||
pSrc[0] = sys->videoFrame->data[0];
|
||||
pSrc[1] = sys->videoFrame->data[1];
|
||||
pSrc[2] = sys->videoFrame->data[2];
|
||||
|
||||
srcStep[0] = sys->videoFrame->linesize[0];
|
||||
srcStep[1] = sys->videoFrame->linesize[1];
|
||||
srcStep[2] = sys->videoFrame->linesize[2];
|
||||
|
||||
prims->YUV420ToRGB_8u_P3AC4R(pSrc, srcStep, h264->data, h264->scanline, &roi);
|
||||
h264->width = sys->videoFrame->width;
|
||||
h264->height = sys->videoFrame->height;
|
||||
}
|
||||
else
|
||||
return -2;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -482,6 +447,8 @@ int h264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize,
|
|||
int* iStride;
|
||||
int ret, i, cx, cy;
|
||||
int UncompressedSize;
|
||||
primitives_t *prims = primitives_get();
|
||||
prim_size_t roi;
|
||||
|
||||
struct timeval T1,T2;
|
||||
|
||||
|
@ -489,24 +456,24 @@ int h264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize,
|
|||
return -1;
|
||||
|
||||
#if 0
|
||||
printf("h264_decompress: pSrcData=%p, SrcSize=%u, pDstData=%p, nDstStep=%d, nXDst=%d, nYDst=%d, nWidth=%d, nHeight=%d)\n",
|
||||
pSrcData, SrcSize, *ppDstData, nDstStep, nXDst, nYDst, nWidth, nHeight);
|
||||
printf("h264_decompress: pSrcData=%p, SrcSize=%u, pDstData=%p, nDstStep=%d, nDstHeight=%d, numRegionRects=%d\n",
|
||||
pSrcData, SrcSize, *ppDstData, nDstStep, nDstHeight, numRegionRects);
|
||||
#endif
|
||||
|
||||
if (!(pDstData = *ppDstData))
|
||||
return -1;
|
||||
|
||||
|
||||
<<<<<<< HEAD
|
||||
if (h264->subsystem->Decompress(h264, pSrcData, SrcSize,
|
||||
pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight))
|
||||
return -1;
|
||||
if ((ret = h264->subsystem->Decompress(h264, pSrcData, SrcSize)) < 0)
|
||||
return ret;
|
||||
|
||||
|
||||
UncompressedSize = h264->width * h264->height * 4;
|
||||
if (UncompressedSize > (nDstStep * nDstHeight))
|
||||
return -1;
|
||||
|
||||
pYUVData = h264->pYUVData;
|
||||
iStride = h264->iStride;
|
||||
|
||||
gettimeofday(&T1,NULL);
|
||||
for (i = 0; i < numRegionRects; i++){
|
||||
|
@ -517,32 +484,18 @@ int h264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize,
|
|||
pDstPoint = pDstData + rect->top * nDstStep + rect->left * 4;
|
||||
pYUVPoint[0] = pYUVData[0] + rect->top * iStride[0] + rect->left;
|
||||
|
||||
ret = rect->top/2 * iStride[1] + rect->left/2;
|
||||
pYUVPoint[1] = pYUVData[1] + ret;
|
||||
pYUVPoint[2] = pYUVData[2] + ret;
|
||||
pYUVPoint[1] = pYUVData[1] + rect->top/2 * iStride[1] + rect->left/2;
|
||||
pYUVPoint[2] = pYUVData[2] + rect->top/2 * iStride[2] + rect->left/2;
|
||||
|
||||
#if 0
|
||||
printf("regionRect: x: %d, y: %d, cx: %d, cy: %d\n",
|
||||
rect->left, rect->top, cx, cy);
|
||||
#endif
|
||||
|
||||
#ifdef WITH_H264_SSSE3
|
||||
freerdp_image_yuv420p_to_xrgb_ssse3(pDstPoint, pYUVPoint, cx, cy, iStride, nDstStep);
|
||||
#else
|
||||
/* roi.width = h264->width;
|
||||
roi.height = h264->height;
|
||||
roi.width = cx;
|
||||
roi.height = cy;
|
||||
|
||||
pSrc[0] = sys->videoFrame->data[0];
|
||||
pSrc[1] = sys->videoFrame->data[1];
|
||||
pSrc[2] = sys->videoFrame->data[2];
|
||||
|
||||
srcStep[0] = sys->videoFrame->linesize[0];
|
||||
srcStep[1] = sys->videoFrame->linesize[1];
|
||||
srcStep[2] = sys->videoFrame->linesize[2];
|
||||
|
||||
prims->YUV420ToRGB_8u_P3AC4R(pSrc, srcStep, h264->data, h264->scanline, &roi)
|
||||
*/
|
||||
#endif
|
||||
prims->YUV420ToRGB_8u_P3AC4R((const BYTE**) pYUVPoint, iStride, pDstPoint, nDstStep, &roi);
|
||||
}
|
||||
gettimeofday(&T2,NULL);
|
||||
printf("converting took %u sec %u usec\n",(unsigned int)(T2.tv_sec-T1.tv_sec),(unsigned int)(T2.tv_usec-T1.tv_usec));
|
||||
|
@ -582,24 +535,12 @@ H264_CONTEXT* h264_context_new(BOOL Compressor)
|
|||
|
||||
h264 = (H264_CONTEXT*) calloc(1, sizeof(H264_CONTEXT));
|
||||
|
||||
#ifdef WITH_H264_SSSE3
|
||||
if(freerdp_check_ssse3()){
|
||||
printf("SSSE3 seems to be not supported on this system, try without WITH_H264_SSSE3 ...");
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (h264)
|
||||
{
|
||||
h264->Compressor = Compressor;
|
||||
|
||||
h264->subsystem = &g_Subsystem_dummy;
|
||||
|
||||
#ifdef WITH_LIBAVCODEC
|
||||
if (h264_prepare_rgb_buffer(h264, 256, 256) < 0)
|
||||
return NULL;
|
||||
#endif
|
||||
|
||||
if (!h264_context_init(h264))
|
||||
{
|
||||
free(h264);
|
||||
|
@ -614,10 +555,6 @@ void h264_context_free(H264_CONTEXT* h264)
|
|||
{
|
||||
if (h264)
|
||||
{
|
||||
#ifdef WITH_LIBAVCODEC
|
||||
_aligned_free(h264->data);
|
||||
#endif
|
||||
|
||||
h264->subsystem->Uninit(h264);
|
||||
|
||||
free(h264);
|
||||
|
|
|
@ -1,17 +0,0 @@
|
|||
TestOpenH264ASM: TestOpenH264ASM.c.o h264.c.o h264_ssse3.asm.o h264.asm.o
|
||||
gcc -o TestOpenH264ASM TestOpenH264ASM.c.o h264.c.o h264_ssse3.asm.o h264.asm.o -lwinpr
|
||||
|
||||
h264_ssse3.asm.o: ../h264_ssse3_x32.asm
|
||||
nasm -f elf32 -o h264_ssse3.asm.o ../h264_ssse3_x32.asm
|
||||
|
||||
h264.asm.o: ../h264_x32.asm
|
||||
nasm -f elf32 -o h264.asm.o ../h264_x32.asm
|
||||
|
||||
TestOpenH264ASM.c.o: TestOpenH264ASM.c
|
||||
gcc -c -o TestOpenH264ASM.c.o TestOpenH264ASM.c
|
||||
|
||||
h264.c.o: ../h264.c
|
||||
gcc -c -o h264.c.o ../h264.c
|
||||
|
||||
clean:
|
||||
rm -f TestOpenH264ASM TestOpenH264ASM.c.o h264_ssse3.asm.o h264.c.o h264.asm.o
|
|
@ -1,17 +0,0 @@
|
|||
TestOpenH264ASM: TestOpenH264ASM.c.o h264.c.o h264_ssse3.asm.o h264.asm.o
|
||||
gcc -o TestOpenH264ASM TestOpenH264ASM.c.o h264.c.o h264_ssse3.asm.o h264.asm.o -lwinpr
|
||||
|
||||
h264_ssse3.asm.o: ../h264_ssse3_x64.asm
|
||||
nasm -f elf64 -o h264_ssse3.asm.o ../h264_ssse3_x64.asm
|
||||
|
||||
h264.asm.o: ../h264_x64.asm
|
||||
nasm -f elf64 -o h264.asm.o ../h264_x64.asm
|
||||
|
||||
TestOpenH264ASM.c.o: TestOpenH264ASM.c
|
||||
gcc -c -o TestOpenH264ASM.c.o TestOpenH264ASM.c
|
||||
|
||||
h264.c.o: ../h264.c
|
||||
gcc -c -o h264.c.o ../h264.c
|
||||
|
||||
clean:
|
||||
rm -f TestOpenH264ASM TestOpenH264ASM.c.o h264_ssse3.asm.o h264.c.o h264.asm.o
|
|
@ -1,14 +0,0 @@
|
|||
TestOpenH264: TestOpenH264ASM.c.o h264.c.o h264_ssse3.c.o
|
||||
gcc -o TestOpenH264 TestOpenH264ASM.c.o h264.c.o h264_ssse3.c.o -lwinpr
|
||||
|
||||
h264_ssse3.c.o: ../h264_ssse3.c
|
||||
gcc -c -O3 -o h264_ssse3.c.o ../h264_ssse3.c -mssse3
|
||||
|
||||
TestOpenH264ASM.c.o: TestOpenH264ASM.c
|
||||
gcc -c -o TestOpenH264ASM.c.o TestOpenH264ASM.c
|
||||
|
||||
h264.c.o: ../h264.c
|
||||
gcc -c -o h264.c.o ../h264.c
|
||||
|
||||
clean:
|
||||
rm -f TestOpenH264 TestOpenH264ASM.c.o h264_ssse3.asm.o h264.c.o h264.asm.o
|
Binary file not shown.
|
@ -1,92 +0,0 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <winpr/crt.h>
|
||||
|
||||
#include "TestOpenH264ASM.h"
|
||||
|
||||
#define WIDTH 1920
|
||||
#define HEIGHT 1080
|
||||
|
||||
#define SSSE3 1
|
||||
|
||||
|
||||
int main(void){
|
||||
int i,j,k;
|
||||
int ret;
|
||||
unsigned char *pDstData_c,*pDstData_asm,*pSrcData[3];
|
||||
int nSrcStep[2];
|
||||
|
||||
#if SSSE3
|
||||
if(freerdp_check_ssse3()){
|
||||
fprintf(stderr,"ssse3 not supported!\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
#endif
|
||||
|
||||
struct timeval t1,t2,t3;
|
||||
|
||||
pSrcData[0]=malloc(1984*HEIGHT*sizeof(char));
|
||||
pSrcData[1]=malloc(1984*HEIGHT/4*sizeof(char));
|
||||
pSrcData[2]=malloc(1984*HEIGHT/4*sizeof(char));
|
||||
pDstData_asm=_aligned_malloc(WIDTH*(HEIGHT+1)*4*sizeof(char),16);
|
||||
pDstData_c=malloc(WIDTH*(HEIGHT+1)*4*sizeof(char));
|
||||
|
||||
memset(pDstData_asm,0xFF,WIDTH*(HEIGHT+1)*4*sizeof(char));
|
||||
memset(pDstData_c,0xFF,WIDTH*(HEIGHT+1)*4*sizeof(char));
|
||||
|
||||
for(i=0;i<WIDTH*HEIGHT;i++){
|
||||
pSrcData[0][i]=i%255;
|
||||
pSrcData[1][i/4]=pSrcData[0][i];
|
||||
pSrcData[2][i/4]=255-pSrcData[0][i];
|
||||
}
|
||||
|
||||
nSrcStep[0]=1984;
|
||||
nSrcStep[1]=992;
|
||||
|
||||
gettimeofday(&t1,NULL);
|
||||
#if SSSE3
|
||||
ret=freerdp_image_yuv420p_to_xrgb_ssse3(pDstData_asm,pSrcData,WIDTH,HEIGHT,nSrcStep,WIDTH*4);
|
||||
#else
|
||||
ret=freerdp_image_yuv_to_xrgb_asm(pDstData_asm,pSrcData,WIDTH,HEIGHT,nSrcStep,WIDTH*4);
|
||||
#endif
|
||||
gettimeofday(&t2,NULL);
|
||||
freerdp_image_copy_yuv420p_to_xrgb(pDstData_c,WIDTH*4,0,0,WIDTH,HEIGHT,pSrcData,nSrcStep,0,0);
|
||||
gettimeofday(&t3,NULL);
|
||||
|
||||
printf("in asm (0x%08X) it took %u sec %u usec,\nin c %u sec %u usec.\n",ret,(int)(t2.tv_sec-t1.tv_sec),(int)(t2.tv_usec-t1.tv_usec),
|
||||
(int)(t3.tv_sec-t2.tv_sec),(int)(t3.tv_usec-t2.tv_usec));
|
||||
|
||||
printf("in asm the result was %X %X %X\n in c %X %X %X.\n",pDstData_asm[0],pDstData_asm[1],pDstData_asm[2],
|
||||
pDstData_c[0],pDstData_c[1],pDstData_c[2]);
|
||||
|
||||
/*k=0;
|
||||
for(i=0;i<HEIGHT+1;i++){
|
||||
for(j=0;j<WIDTH;j++){
|
||||
printf("%08X:%08X ",((unsigned int*)pDstData_asm)[k],((unsigned int*)pDstData_c)[k]);
|
||||
k++;
|
||||
}
|
||||
puts("\n");
|
||||
}*/
|
||||
|
||||
k=1;
|
||||
for(i=0;i<(WIDTH*HEIGHT*4);i++){
|
||||
if(pDstData_c[i]!=pDstData_asm[i]){
|
||||
k=0;
|
||||
printf("MISSMATCH at %d: %2X != %2X\n",i,(unsigned char)pDstData_asm[i],(unsigned char)pDstData_c[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(k)
|
||||
printf("everything OK\n");
|
||||
|
||||
free(pSrcData[0]);
|
||||
free(pSrcData[1]);
|
||||
free(pSrcData[2]);
|
||||
free(pDstData_c);
|
||||
_aligned_free(pDstData_asm);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,7 +0,0 @@
|
|||
int freerdp_image_copy_yuv420p_to_xrgb(unsigned char* pDstData, int nDstStep, int nXDst, int nYDst,
|
||||
int nWidth, int nHeight, unsigned char* pSrcData[3], int nSrcStep[2], int nXSrc, int nYSrc);
|
||||
|
||||
extern int freerdp_image_yuv_to_xrgb_asm(unsigned char *pDstData,unsigned char **pSrcData,int nWidth,int nHeight,int *istride,int scanline);
|
||||
|
||||
extern int freerdp_check_ssse3();
|
||||
extern int freerdp_image_yuv420p_to_xrgb_ssse3(unsigned char *pDstData,unsigned char **pSrcData,int nWidth,int nHeight,int *istride,int scanline);
|
|
@ -40,6 +40,7 @@ set(${MODULE_PREFIX}_OPT_SRCS
|
|||
prim_set_opt.c
|
||||
prim_shift_opt.c
|
||||
prim_sign_opt.c
|
||||
prim_YUV_opt.c
|
||||
prim_YCoCg_opt.c)
|
||||
|
||||
add_definitions(-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE})
|
||||
|
@ -55,11 +56,11 @@ endif()
|
|||
|
||||
if(WITH_SSE2)
|
||||
if(CMAKE_COMPILER_IS_GNUCC)
|
||||
set(OPTIMIZATION "${OPTIMIZATION} -msse2 -mssse3 -Wdeclaration-after-statement")
|
||||
set(OPTIMIZATION "${OPTIMIZATION} -msse2 -mssse3 -O2 -Wdeclaration-after-statement")
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
set(OPTIMIZATION "${OPTIMIZATION} /arch:SSE2")
|
||||
set(OPTIMIZATION "${OPTIMIZATION} /arch:SSE2 /O2")
|
||||
endif()
|
||||
elseif(WITH_NEON)
|
||||
if(CMAKE_COMPILER_IS_GNUCC)
|
||||
|
@ -70,6 +71,16 @@ endif()
|
|||
|
||||
set_property(SOURCE ${${MODULE_PREFIX}_OPT_SRCS} PROPERTY COMPILE_FLAGS ${OPTIMIZATION})
|
||||
|
||||
# always compile with optimization
|
||||
if(CMAKE_COMPILER_IS_GNUCC)
|
||||
set_property(SOURCE ${${MODULE_PREFIX}_SRCS} PROPERTY COMPILE_FLAGS "-O2")
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
set_property(SOURCE ${${MODULE_PREFIX}_SRCS} PROPERTY COMPILE_FLAGS "/O2")
|
||||
endif()
|
||||
|
||||
|
||||
set(${MODULE_PREFIX}_SRCS ${${MODULE_PREFIX}_SRCS} ${${MODULE_PREFIX}_OPT_SRCS})
|
||||
|
||||
add_complex_library(MODULE ${MODULE_NAME} TYPE "OBJECT"
|
||||
|
|
|
@ -44,24 +44,40 @@ pstatus_t general_YUV420ToRGB_8u_P3AC4R(const BYTE* pSrc[3], int srcStep[3],
|
|||
int Up48, Up475;
|
||||
int Vp403, Vp120;
|
||||
BYTE* pRGB = pDst;
|
||||
int nWidth, nHeight;
|
||||
int last_line, last_column;
|
||||
|
||||
pY = pSrc[0];
|
||||
pU = pSrc[1];
|
||||
pV = pSrc[2];
|
||||
|
||||
last_column = roi->width & 0x01;
|
||||
last_line = roi->height & 0x01;
|
||||
|
||||
nWidth = (roi->width + 1) & ~0x0001;
|
||||
nHeight = (roi->height + 1) & ~0x0001;
|
||||
|
||||
halfWidth = roi->width / 2;
|
||||
halfHeight = roi->height / 2;
|
||||
halfWidth = nWidth / 2;
|
||||
halfHeight = nHeight / 2;
|
||||
|
||||
srcPad[0] = (srcStep[0] - roi->width);
|
||||
srcPad[0] = (srcStep[0] - nWidth);
|
||||
srcPad[1] = (srcStep[1] - halfWidth);
|
||||
srcPad[2] = (srcStep[2] - halfWidth);
|
||||
|
||||
dstPad = (dstStep - (roi->width * 4));
|
||||
dstPad = (dstStep - (nWidth * 4));
|
||||
|
||||
for (y = 0; y < halfHeight; y++)
|
||||
for (y = 0; y < halfHeight; )
|
||||
{
|
||||
for (x = 0; x < halfWidth; x++)
|
||||
y++;
|
||||
if (y == halfHeight)
|
||||
last_line = last_line << 1;
|
||||
|
||||
for (x = 0; x < halfWidth; )
|
||||
{
|
||||
x++;
|
||||
if (x == halfWidth)
|
||||
last_column = last_column << 1;
|
||||
|
||||
U = *pU++;
|
||||
V = *pV++;
|
||||
|
||||
|
@ -105,32 +121,41 @@ pstatus_t general_YUV420ToRGB_8u_P3AC4R(const BYTE* pSrc[3], int srcStep[3],
|
|||
|
||||
/* 2nd pixel */
|
||||
|
||||
Y = *pY++;
|
||||
Yp = Y << 8;
|
||||
if (!(last_column & 0x02))
|
||||
{
|
||||
Y = *pY++;
|
||||
Yp = Y << 8;
|
||||
|
||||
R = (Yp + Vp403) >> 8;
|
||||
G = (Yp - Up48 - Vp120) >> 8;
|
||||
B = (Yp + Up475) >> 8;
|
||||
R = (Yp + Vp403) >> 8;
|
||||
G = (Yp - Up48 - Vp120) >> 8;
|
||||
B = (Yp + Up475) >> 8;
|
||||
|
||||
if (R < 0)
|
||||
R = 0;
|
||||
else if (R > 255)
|
||||
R = 255;
|
||||
if (R < 0)
|
||||
R = 0;
|
||||
else if (R > 255)
|
||||
R = 255;
|
||||
|
||||
if (G < 0)
|
||||
G = 0;
|
||||
else if (G > 255)
|
||||
G = 255;
|
||||
if (G < 0)
|
||||
G = 0;
|
||||
else if (G > 255)
|
||||
G = 255;
|
||||
|
||||
if (B < 0)
|
||||
B = 0;
|
||||
else if (B > 255)
|
||||
B = 255;
|
||||
if (B < 0)
|
||||
B = 0;
|
||||
else if (B > 255)
|
||||
B = 255;
|
||||
|
||||
*pRGB++ = (BYTE) B;
|
||||
*pRGB++ = (BYTE) G;
|
||||
*pRGB++ = (BYTE) R;
|
||||
*pRGB++ = 0xFF;
|
||||
*pRGB++ = (BYTE) B;
|
||||
*pRGB++ = (BYTE) G;
|
||||
*pRGB++ = (BYTE) R;
|
||||
*pRGB++ = 0xFF;
|
||||
}
|
||||
else
|
||||
{
|
||||
pY++;
|
||||
pRGB += 4;
|
||||
last_column = last_column >> 1;
|
||||
}
|
||||
}
|
||||
|
||||
pY += srcPad[0];
|
||||
|
@ -138,8 +163,12 @@ pstatus_t general_YUV420ToRGB_8u_P3AC4R(const BYTE* pSrc[3], int srcStep[3],
|
|||
pV -= halfWidth;
|
||||
pRGB += dstPad;
|
||||
|
||||
for (x = 0; x < halfWidth; x++)
|
||||
for (x = 0; x < halfWidth; )
|
||||
{
|
||||
x++;
|
||||
if (x == halfWidth)
|
||||
last_column = last_column << 1;
|
||||
|
||||
U = *pU++;
|
||||
V = *pV++;
|
||||
|
||||
|
@ -183,32 +212,41 @@ pstatus_t general_YUV420ToRGB_8u_P3AC4R(const BYTE* pSrc[3], int srcStep[3],
|
|||
|
||||
/* 4th pixel */
|
||||
|
||||
Y = *pY++;
|
||||
Yp = Y << 8;
|
||||
if(!(last_column & 0x02))
|
||||
{
|
||||
Y = *pY++;
|
||||
Yp = Y << 8;
|
||||
|
||||
R = (Yp + Vp403) >> 8;
|
||||
G = (Yp - Up48 - Vp120) >> 8;
|
||||
B = (Yp + Up475) >> 8;
|
||||
R = (Yp + Vp403) >> 8;
|
||||
G = (Yp - Up48 - Vp120) >> 8;
|
||||
B = (Yp + Up475) >> 8;
|
||||
|
||||
if (R < 0)
|
||||
R = 0;
|
||||
else if (R > 255)
|
||||
R = 255;
|
||||
if (R < 0)
|
||||
R = 0;
|
||||
else if (R > 255)
|
||||
R = 255;
|
||||
|
||||
if (G < 0)
|
||||
G = 0;
|
||||
else if (G > 255)
|
||||
G = 255;
|
||||
if (G < 0)
|
||||
G = 0;
|
||||
else if (G > 255)
|
||||
G = 255;
|
||||
|
||||
if (B < 0)
|
||||
B = 0;
|
||||
else if (B > 255)
|
||||
B = 255;
|
||||
if (B < 0)
|
||||
B = 0;
|
||||
else if (B > 255)
|
||||
B = 255;
|
||||
|
||||
*pRGB++ = (BYTE) B;
|
||||
*pRGB++ = (BYTE) G;
|
||||
*pRGB++ = (BYTE) R;
|
||||
*pRGB++ = 0xFF;
|
||||
*pRGB++ = (BYTE) B;
|
||||
*pRGB++ = (BYTE) G;
|
||||
*pRGB++ = (BYTE) R;
|
||||
*pRGB++ = 0xFF;
|
||||
}
|
||||
else
|
||||
{
|
||||
pY++;
|
||||
pRGB += 4;
|
||||
last_column = last_column >> 1;
|
||||
}
|
||||
}
|
||||
|
||||
pY += srcPad[0];
|
||||
|
@ -223,6 +261,8 @@ pstatus_t general_YUV420ToRGB_8u_P3AC4R(const BYTE* pSrc[3], int srcStep[3],
|
|||
void primitives_init_YUV(primitives_t* prims)
|
||||
{
|
||||
prims->YUV420ToRGB_8u_P3AC4R = general_YUV420ToRGB_8u_P3AC4R;
|
||||
|
||||
primitives_init_YUV_opt(prims);
|
||||
}
|
||||
|
||||
void primitives_deinit_YUV(primitives_t* prims)
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
pstatus_t general_yCbCrToRGB_16s8u_P3AC4R(const INT16* pSrc[3], int srcStep, BYTE* pDst, int dstStep, const prim_size_t* roi);
|
||||
|
||||
void primitives_init_YUV(primitives_t* prims);
|
||||
void primitives_init_YUV_opt(primitives_t* prims);
|
||||
void primitives_deinit_YUV(primitives_t* prims);
|
||||
|
||||
#endif /* FREERDP_PRIMITIVES_YUV_H */
|
||||
|
|
|
@ -1,32 +1,32 @@
|
|||
/** function for converting YUV420p data to the RGB format (but without any special upconverting)
|
||||
* It's completely written in nasm-x86-assembly for intel processors supporting SSSE3 and higher.
|
||||
* The target scanline (6th parameter) must be a multiple of 16.
|
||||
* iStride[0] must be (target scanline) / 4 or bigger and iStride[1] the next multiple of four
|
||||
* of the half of iStride[0] or bigger
|
||||
* The target dstStep (6th parameter) must be a multiple of 16.
|
||||
* srcStep[0] must be (target dstStep) / 4 or bigger and srcStep[1] the next multiple of four
|
||||
* of the half of srcStep[0] or bigger
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include <emmintrin.h>
|
||||
//#include <immintrin.h>
|
||||
#include <tmmintrin.h>
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <winpr/sysinfo.h>
|
||||
#include <winpr/crt.h>
|
||||
|
||||
int freerdp_check_ssse3()
|
||||
{
|
||||
if(IsProcessorFeaturePresentEx(PF_EX_SSSE3)&&IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
#include <freerdp/types.h>
|
||||
#include <freerdp/primitives.h>
|
||||
|
||||
|
||||
int freerdp_image_yuv420p_to_xrgb_ssse3(BYTE *pDstData,BYTE **pSrcData,int nWidth,int nHeight,int *iStride,int scanline)
|
||||
#ifdef WITH_SSE2
|
||||
|
||||
#include <emmintrin.h>
|
||||
#include <tmmintrin.h>
|
||||
|
||||
pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(const BYTE **pSrc, int *srcStep,
|
||||
BYTE *pDst, int dstStep, const prim_size_t *roi)
|
||||
{
|
||||
char last_line,last_column;
|
||||
int i,VaddDst,VaddY,VaddUV;
|
||||
int i,nWidth,nHeight,VaddDst,VaddY,VaddU,VaddV;
|
||||
|
||||
BYTE *UData,*VData,*YData;
|
||||
|
||||
|
@ -37,9 +37,12 @@ int freerdp_image_yuv420p_to_xrgb_ssse3(BYTE *pDstData,BYTE **pSrcData,int nWidt
|
|||
buffer=_aligned_malloc(4*16,16);
|
||||
|
||||
|
||||
YData=pSrcData[0];
|
||||
UData=pSrcData[1];
|
||||
VData=pSrcData[2];
|
||||
YData=(BYTE *)pSrc[0];
|
||||
UData=(BYTE *)pSrc[1];
|
||||
VData=(BYTE *)pSrc[2];
|
||||
|
||||
nWidth=roi->width;
|
||||
nHeight=roi->height;
|
||||
|
||||
|
||||
if((last_column=nWidth&3)){
|
||||
|
@ -48,7 +51,7 @@ int freerdp_image_yuv420p_to_xrgb_ssse3(BYTE *pDstData,BYTE **pSrcData,int nWidt
|
|||
case 2: r7=_mm_set_epi32(0,0,0xFFFFFFFF,0xFFFFFFFF); break;
|
||||
case 3: r7=_mm_set_epi32(0,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF); break;
|
||||
}
|
||||
_mm_store_si128(buffer+48,r7);
|
||||
_mm_store_si128(buffer+3,r7);
|
||||
last_column=1;
|
||||
}
|
||||
|
||||
|
@ -61,10 +64,10 @@ int freerdp_image_yuv420p_to_xrgb_ssse3(BYTE *pDstData,BYTE **pSrcData,int nWidt
|
|||
nHeight=nHeight>>1;
|
||||
|
||||
|
||||
VaddDst=(scanline<<1)-(nWidth<<4);
|
||||
VaddY=(iStride[0]<<1)-(nWidth<<2);
|
||||
VaddUV=iStride[1]-(((nWidth<<1)+2)&0xFFFC);
|
||||
|
||||
VaddDst=(dstStep<<1)-(nWidth<<4);
|
||||
VaddY=(srcStep[0]<<1)-(nWidth<<2);
|
||||
VaddU=srcStep[1]-(((nWidth<<1)+2)&0xFFFC);
|
||||
VaddV=srcStep[2]-(((nWidth<<1)+2)&0xFFFC);
|
||||
|
||||
|
||||
while(nHeight-- >0){
|
||||
|
@ -129,7 +132,7 @@ int freerdp_image_yuv420p_to_xrgb_ssse3(BYTE *pDstData,BYTE **pSrcData,int nWidt
|
|||
r1=_mm_add_epi32(r1,r6);
|
||||
r7=_mm_add_epi32(r7,r6);
|
||||
|
||||
_mm_store_si128(buffer+16,r7);
|
||||
_mm_store_si128(buffer+1,r7);
|
||||
|
||||
/* Now we've prepared U-data. Preparing V-data is actually the same, just with other coefficients */
|
||||
r2=_mm_cvtsi32_si128(*(UINT32 *)VData);
|
||||
|
@ -153,7 +156,7 @@ int freerdp_image_yuv420p_to_xrgb_ssse3(BYTE *pDstData,BYTE **pSrcData,int nWidt
|
|||
r2=_mm_add_epi32(r2,r6);
|
||||
r7=_mm_add_epi32(r7,r6);
|
||||
|
||||
_mm_store_si128(buffer+32,r7);
|
||||
_mm_store_si128(buffer+2,r7);
|
||||
|
||||
|
||||
|
||||
|
@ -170,8 +173,8 @@ int freerdp_image_yuv420p_to_xrgb_ssse3(BYTE *pDstData,BYTE **pSrcData,int nWidt
|
|||
|
||||
_mm_store_si128(buffer,r4);
|
||||
}else{
|
||||
r1=_mm_load_si128(buffer+16);
|
||||
r2=_mm_load_si128(buffer+32);
|
||||
r1=_mm_load_si128(buffer+1);
|
||||
r2=_mm_load_si128(buffer+2);
|
||||
r0=_mm_load_si128(buffer);
|
||||
}
|
||||
|
||||
|
@ -220,17 +223,17 @@ int freerdp_image_yuv420p_to_xrgb_ssse3(BYTE *pDstData,BYTE **pSrcData,int nWidt
|
|||
|
||||
|
||||
if(last_column&0x02){
|
||||
r6=_mm_load_si128(buffer+48);
|
||||
r6=_mm_load_si128(buffer+3);
|
||||
r4=_mm_and_si128(r4,r6);
|
||||
r5=_mm_lddqu_si128((__m128i *)pDstData);
|
||||
r5=_mm_lddqu_si128((__m128i *)pDst);
|
||||
r6=_mm_andnot_si128(r6,r5);
|
||||
r4=_mm_or_si128(r4,r6);
|
||||
}
|
||||
_mm_storeu_si128((__m128i *)pDstData,r4);
|
||||
_mm_storeu_si128((__m128i *)pDst,r4);
|
||||
|
||||
//Y data processing in secound line
|
||||
if(!(last_line&0x02)){
|
||||
r4=_mm_cvtsi32_si128(*(UINT32 *)(YData+iStride[0]));
|
||||
r4=_mm_cvtsi32_si128(*(UINT32 *)(YData+srcStep[0]));
|
||||
r7=_mm_set_epi32(0x80800380,0x80800280,0x80800180,0x80800080);
|
||||
r4=_mm_shuffle_epi8(r4,r7);
|
||||
|
||||
|
@ -271,28 +274,40 @@ int freerdp_image_yuv420p_to_xrgb_ssse3(BYTE *pDstData,BYTE **pSrcData,int nWidt
|
|||
|
||||
|
||||
if(last_column&0x02){
|
||||
r6=_mm_load_si128(buffer+48);
|
||||
r6=_mm_load_si128(buffer+3);
|
||||
r4=_mm_and_si128(r4,r6);
|
||||
r5=_mm_lddqu_si128((__m128i *)(pDstData+scanline));
|
||||
r5=_mm_lddqu_si128((__m128i *)(pDst+dstStep));
|
||||
r6=_mm_andnot_si128(r6,r5);
|
||||
r4=_mm_or_si128(r4,r6);
|
||||
|
||||
last_column=last_column>>1;
|
||||
}
|
||||
_mm_storeu_si128((__m128i *)(pDstData+scanline),r4);
|
||||
_mm_storeu_si128((__m128i *)(pDst+dstStep),r4);
|
||||
}
|
||||
|
||||
pDstData+=16;
|
||||
pDst+=16;
|
||||
YData+=4;
|
||||
|
||||
}while(i<nWidth);
|
||||
|
||||
pDstData+=VaddDst;
|
||||
pDst+=VaddDst;
|
||||
YData+=VaddY;
|
||||
UData+=VaddUV;
|
||||
VData+=VaddUV;
|
||||
UData+=VaddU;
|
||||
VData+=VaddV;
|
||||
}
|
||||
|
||||
_aligned_free(buffer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return PRIMITIVES_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
void primitives_init_YUV_opt(primitives_t *prims)
|
||||
{
|
||||
#ifdef WITH_SSE2
|
||||
if(IsProcessorFeaturePresentEx(PF_EX_SSSE3)&&IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
prims->YUV420ToRGB_8u_P3AC4R=ssse3_YUV420ToRGB_8u_P3AC4R;
|
||||
}
|
||||
#endif
|
||||
}
|
|
@ -155,8 +155,6 @@ wStream* StreamPool_Take(wStreamPool* pool, size_t size)
|
|||
|
||||
Stream_SetPosition(s, 0);
|
||||
Stream_EnsureCapacity(s, size);
|
||||
|
||||
Stream_SetLength(s,size);
|
||||
}
|
||||
|
||||
s->pool = pool;
|
||||
|
|
Loading…
Reference in New Issue