mirror of https://github.com/FreeRDP/FreeRDP
H.264: converting only clipping rects to XRGB
This commit is contained in:
parent
dee50a8ca2
commit
25593c7250
|
@ -23,8 +23,6 @@
|
|||
|
||||
#include "xf_gfx.h"
|
||||
|
||||
#include <sys/time.h>
|
||||
|
||||
int xf_ResetGraphics(RdpgfxClientContext* context, RDPGFX_RESET_GRAPHICS_PDU* resetGraphics)
|
||||
{
|
||||
xfContext* xfc = (xfContext*) context->custom;
|
||||
|
@ -350,19 +348,10 @@ int xf_SurfaceCommand_Planar(xfContext* xfc, RdpgfxClientContext* context, RDPGF
|
|||
int xf_SurfaceCommand_H264(xfContext* xfc, RdpgfxClientContext* context, RDPGFX_SURFACE_COMMAND* cmd)
|
||||
{
|
||||
int status;
|
||||
UINT32 i, j;
|
||||
int nXDst, nYDst;
|
||||
int nWidth, nHeight;
|
||||
int nbUpdateRects;
|
||||
UINT32 i;
|
||||
BYTE* DstData = NULL;
|
||||
RDPGFX_RECT16* rect;
|
||||
H264_CONTEXT* h264;
|
||||
xfGfxSurface* surface;
|
||||
REGION16 updateRegion;
|
||||
RECTANGLE_16 updateRect;
|
||||
RECTANGLE_16* updateRects;
|
||||
REGION16 clippingRects;
|
||||
RECTANGLE_16 clippingRect;
|
||||
RDPGFX_H264_METABLOCK* meta;
|
||||
RDPGFX_H264_BITMAP_STREAM* bs;
|
||||
|
||||
|
@ -384,7 +373,7 @@ int xf_SurfaceCommand_H264(xfContext* xfc, RdpgfxClientContext* context, RDPGFX_
|
|||
DstData = surface->data;
|
||||
|
||||
status = h264_decompress(xfc->h264, bs->data, bs->length, &DstData,
|
||||
PIXEL_FORMAT_XRGB32, surface->scanline, cmd->left, cmd->top, cmd->width, cmd->height);
|
||||
PIXEL_FORMAT_XRGB32, surface->scanline , surface->height, meta->regionRects, meta->numRegionRects);
|
||||
|
||||
if (status < 0)
|
||||
{
|
||||
|
@ -392,54 +381,11 @@ int xf_SurfaceCommand_H264(xfContext* xfc, RdpgfxClientContext* context, RDPGFX_
|
|||
return -1;
|
||||
}
|
||||
|
||||
if (status < 0)
|
||||
return -1;
|
||||
|
||||
region16_init(&clippingRects);
|
||||
|
||||
for (i = 0; i < meta->numRegionRects; i++)
|
||||
{
|
||||
rect = &(meta->regionRects[i]);
|
||||
|
||||
clippingRect.left = rect->left;
|
||||
clippingRect.top = rect->top;
|
||||
clippingRect.right = rect->right;
|
||||
clippingRect.bottom = rect->bottom;
|
||||
|
||||
region16_union_rect(&clippingRects, &clippingRects, &clippingRect);
|
||||
region16_union_rect(&(xfc->invalidRegion), &(xfc->invalidRegion), (RECTANGLE_16*) &(meta->regionRects[i]));
|
||||
}
|
||||
|
||||
updateRect.left = cmd->left;
|
||||
updateRect.top = cmd->top;
|
||||
updateRect.right = cmd->right;
|
||||
updateRect.bottom = cmd->bottom;
|
||||
|
||||
region16_init(&updateRegion);
|
||||
region16_intersect_rect(&updateRegion, &clippingRects, &updateRect);
|
||||
|
||||
updateRects = (RECTANGLE_16*) region16_rects(&updateRegion, &nbUpdateRects);
|
||||
|
||||
|
||||
for (j = 0; j < nbUpdateRects; j++)
|
||||
{
|
||||
nXDst = updateRects[j].left;
|
||||
nYDst = updateRects[j].top;
|
||||
nWidth = updateRects[j].right - updateRects[j].left;
|
||||
nHeight = updateRects[j].bottom - updateRects[j].top;
|
||||
|
||||
/* update region from decoded H264 buffer */
|
||||
freerdp_image_copy(surface->data, PIXEL_FORMAT_XRGB32, surface->scanline,
|
||||
nXDst, nYDst, nWidth, nHeight,
|
||||
h264->data, PIXEL_FORMAT_XRGB32, h264->scanline, nXDst, nYDst);
|
||||
|
||||
|
||||
region16_union_rect(&(xfc->invalidRegion), &(xfc->invalidRegion), &updateRects[j]);
|
||||
}
|
||||
|
||||
region16_uninit(&updateRegion);
|
||||
region16_uninit(&clippingRects);
|
||||
|
||||
|
||||
if (!xfc->inGfxFrame)
|
||||
xf_OutputUpdate(xfc);
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
|
||||
#include <freerdp/api.h>
|
||||
#include <freerdp/types.h>
|
||||
#include <freerdp/channels/rdpgfx.h>
|
||||
|
||||
#ifdef WITH_LIBAVCODEC
|
||||
#ifdef WITH_OPENH264
|
||||
|
@ -43,14 +44,16 @@ struct _H264_CONTEXT
|
|||
{
|
||||
BOOL Compressor;
|
||||
|
||||
BYTE* data;
|
||||
UINT32 size;
|
||||
//BYTE* data;
|
||||
//UINT32 size;
|
||||
UINT32 width;
|
||||
UINT32 height;
|
||||
int scanline;
|
||||
//int scanline;
|
||||
|
||||
#ifdef WITH_OPENH264
|
||||
ISVCDecoder* pDecoder;
|
||||
BYTE* pYUVData[3];
|
||||
int iStride[2];
|
||||
#endif
|
||||
|
||||
#ifdef WITH_LIBAVCODEC
|
||||
|
@ -69,7 +72,7 @@ extern "C" {
|
|||
FREERDP_API int h264_compress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize, BYTE** ppDstData, UINT32* pDstSize);
|
||||
|
||||
FREERDP_API int h264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize,
|
||||
BYTE** ppDstData, DWORD DstFormat, int nDstStep, int nXDst, int nYDst, int nWidth, int nHeight);
|
||||
BYTE** ppDstData, DWORD DstFormat, int nDstStep, int nDstHeight, RDPGFX_RECT16* regionRects, int numRegionRect);
|
||||
|
||||
FREERDP_API void h264_context_reset(H264_CONTEXT* h264);
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@
|
|||
|
||||
#ifdef WITH_H264_SSSE3
|
||||
extern int check_ssse3();
|
||||
extern int freerdp_image_yuv420p_to_xrgb(BYTE *pDstData,BYTE **pSrcData,int nWidth,int nHeight,int iStride0,int iStride1);
|
||||
extern int freerdp_image_yuv420p_to_xrgb(BYTE *pDstData,BYTE **pSrcData,int nWidth,int nHeight,int *iStride,int scanline);
|
||||
#else
|
||||
#ifdef WITH_H264_ASM
|
||||
extern int freerdp_image_yuv_to_xrgb_asm(BYTE *pDstData,BYTE **pSrcData,int nWidth,int nHeight,int iStride0,int iStride1);
|
||||
|
@ -204,6 +204,7 @@ void h264_dump_yuv_data(BYTE* yuv[], int width, int height, int stride[])
|
|||
fclose(fp);
|
||||
}
|
||||
|
||||
#ifdef WITH_LIBAVCODEC
|
||||
int h264_prepare_rgb_buffer(H264_CONTEXT* h264, int width, int height)
|
||||
{
|
||||
UINT32 size;
|
||||
|
@ -224,6 +225,7 @@ int h264_prepare_rgb_buffer(H264_CONTEXT* h264, int width, int height)
|
|||
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
int freerdp_image_copy_yuv420p_to_xrgb(BYTE* pDstData, int nDstStep, int nXDst, int nYDst,
|
||||
int nWidth, int nHeight, BYTE* pSrcData[3], int nSrcStep[2], int nXSrc, int nYSrc)
|
||||
|
@ -343,13 +345,11 @@ static void openh264_trace_callback(H264_CONTEXT* h264, int level, const char* m
|
|||
printf("%d - %s\n", level, message);
|
||||
}
|
||||
|
||||
static int openh264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize,
|
||||
BYTE* pDstData, DWORD DstFormat, int nDstStep, int nXDst, int nYDst, int nWidth, int nHeight)
|
||||
static int openh264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize)
|
||||
{
|
||||
DECODING_STATE state;
|
||||
SBufferInfo sBufferInfo;
|
||||
SSysMEMBuffer* pSystemBuffer;
|
||||
BYTE* pYUVData[3];
|
||||
|
||||
struct timeval T1,T2;
|
||||
|
||||
|
@ -360,9 +360,9 @@ static int openh264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSiz
|
|||
* Decompress the image. The RDP host only seems to send I420 format.
|
||||
*/
|
||||
|
||||
pYUVData[0] = NULL;
|
||||
pYUVData[1] = NULL;
|
||||
pYUVData[2] = NULL;
|
||||
h264->pYUVData[0] = NULL;
|
||||
h264->pYUVData[1] = NULL;
|
||||
h264->pYUVData[2] = NULL;
|
||||
|
||||
ZeroMemory(&sBufferInfo, sizeof(sBufferInfo));
|
||||
|
||||
|
@ -371,7 +371,7 @@ static int openh264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSiz
|
|||
h264->pDecoder,
|
||||
pSrcData,
|
||||
SrcSize,
|
||||
pYUVData,
|
||||
h264->pYUVData,
|
||||
&sBufferInfo);
|
||||
|
||||
/**
|
||||
|
@ -382,7 +382,7 @@ static int openh264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSiz
|
|||
*/
|
||||
|
||||
if (sBufferInfo.iBufferStatus != 1)
|
||||
state = (*h264->pDecoder)->DecodeFrame2(h264->pDecoder, NULL, 0, pYUVData, &sBufferInfo);
|
||||
state = (*h264->pDecoder)->DecodeFrame2(h264->pDecoder, NULL, 0, h264->pYUVData, &sBufferInfo);
|
||||
|
||||
gettimeofday(&T2,NULL);
|
||||
printf("OpenH264: decoding took: %u sec %u usec\n",(unsigned int)(T2.tv_sec-T1.tv_sec),(unsigned int)(T2.tv_usec-T1.tv_usec));
|
||||
|
@ -391,7 +391,7 @@ static int openh264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSiz
|
|||
|
||||
#if 0
|
||||
printf("h264_decompress: state=%u, pYUVData=[%p,%p,%p], bufferStatus=%d, width=%d, height=%d, format=%d, stride=[%d,%d]\n",
|
||||
state, pYUVData[0], pYUVData[1], pYUVData[2], sBufferInfo.iBufferStatus,
|
||||
state, h264->pYUVData[0], h264->pYUVData[1], h264->pYUVData[2], sBufferInfo.iBufferStatus,
|
||||
pSystemBuffer->iWidth, pSystemBuffer->iHeight, pSystemBuffer->iFormat,
|
||||
pSystemBuffer->iStride[0], pSystemBuffer->iStride[1]);
|
||||
#endif
|
||||
|
@ -399,7 +399,7 @@ static int openh264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSiz
|
|||
if (state != 0)
|
||||
return -1;
|
||||
|
||||
if (!pYUVData[0] || !pYUVData[1] || !pYUVData[2])
|
||||
if (!h264->pYUVData[0] || !h264->pYUVData[1] || !h264->pYUVData[2])
|
||||
return -1;
|
||||
|
||||
if (sBufferInfo.iBufferStatus != 1)
|
||||
|
@ -412,11 +412,18 @@ static int openh264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSiz
|
|||
|
||||
if (g_H264DumpFrames)
|
||||
{
|
||||
h264_dump_yuv_data(pYUVData, pSystemBuffer->iWidth, pSystemBuffer->iHeight, pSystemBuffer->iStride);
|
||||
h264_dump_yuv_data(h264->pYUVData, pSystemBuffer->iWidth, pSystemBuffer->iHeight, pSystemBuffer->iStride);
|
||||
}
|
||||
|
||||
g_H264FrameId++;
|
||||
|
||||
h264->iStride[0] = pSystemBuffer->iStride[0];
|
||||
h264->iStride[1] = pSystemBuffer->iStride[1];
|
||||
h264->width = pSystemBuffer->iWidth;
|
||||
h264->height = pSystemBuffer->iHeight;
|
||||
|
||||
|
||||
#if 0
|
||||
if (h264_prepare_rgb_buffer(h264, pSystemBuffer->iWidth, pSystemBuffer->iHeight) < 0)
|
||||
return -1;
|
||||
|
||||
|
@ -433,6 +440,7 @@ static int openh264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSiz
|
|||
#endif
|
||||
gettimeofday(&T2,NULL);
|
||||
printf("\tconverting took %u sec %u usec\n",(unsigned int)(T2.tv_sec-T1.tv_sec),(unsigned int)(T2.tv_usec-T1.tv_usec));
|
||||
#endif
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -662,10 +670,20 @@ EXCEPTION:
|
|||
|
||||
|
||||
int h264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize,
|
||||
BYTE** ppDstData, DWORD DstFormat, int nDstStep, int nXDst, int nYDst, int nWidth, int nHeight)
|
||||
BYTE** ppDstData, DWORD DstFormat, int nDstStep, int nDstHeight, RDPGFX_RECT16* regionRects, int numRegionRects)
|
||||
{
|
||||
UINT32 UncompressedSize;
|
||||
BYTE* pDstData;
|
||||
BYTE* pDstPoint;
|
||||
|
||||
BYTE** pYUVData;
|
||||
BYTE* pYUVPoint[2];
|
||||
|
||||
RDPGFX_RECT16* rect;
|
||||
int* iStride;
|
||||
int ret, i, cx, cy;
|
||||
|
||||
struct timeval T1,T2;
|
||||
|
||||
if (!h264)
|
||||
return -1;
|
||||
|
@ -675,39 +693,27 @@ int h264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize,
|
|||
#endif
|
||||
|
||||
#if 0
|
||||
printf("h264_decompress: pSrcData=%p, SrcSize=%u, pDstData=%p, nDstStep=%d, nXDst=%d, nYDst=%d, nWidth=%d, nHeight=%d)\n",
|
||||
pSrcData, SrcSize, *ppDstData, nDstStep, nXDst, nYDst, nWidth, nHeight);
|
||||
printf("h264_decompress: pSrcData=%p, SrcSize=%u, pDstData=%p, nDstStep=%d, numRegionRects=%d\n",
|
||||
pSrcData, SrcSize, *ppDstData, nDstStep, numRegionRects);
|
||||
#endif
|
||||
|
||||
/* Allocate a destination buffer (if needed). */
|
||||
|
||||
UncompressedSize = nWidth * nHeight * 4;
|
||||
|
||||
if (UncompressedSize == 0)
|
||||
if (!(pDstData = *ppDstData))
|
||||
return -1;
|
||||
|
||||
pDstData = *ppDstData;
|
||||
|
||||
if (!pDstData)
|
||||
{
|
||||
pDstData = (BYTE*) malloc(UncompressedSize);
|
||||
|
||||
if (!pDstData)
|
||||
return -1;
|
||||
|
||||
*ppDstData = pDstData;
|
||||
}
|
||||
|
||||
if (g_H264DumpFrames)
|
||||
{
|
||||
h264_dump_h264_data(pSrcData, SrcSize);
|
||||
}
|
||||
|
||||
|
||||
#ifdef WITH_OPENH264
|
||||
return openh264_decompress(
|
||||
h264, pSrcData, SrcSize,
|
||||
pDstData, DstFormat, nDstStep,
|
||||
nXDst, nYDst, nWidth, nHeight);
|
||||
ret = openh264_decompress(h264, pSrcData, SrcSize);
|
||||
if (ret != 1)
|
||||
return ret;
|
||||
|
||||
pYUVData = h264->pYUVData;
|
||||
iStride = h264->iStride;
|
||||
#endif
|
||||
|
||||
#ifdef WITH_LIBAVCODEC
|
||||
|
@ -717,6 +723,38 @@ int h264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize,
|
|||
nXDst, nYDst, nWidth, nHeight);
|
||||
#endif
|
||||
|
||||
|
||||
/* Convert I420 (same as IYUV) to XRGB. */
|
||||
UncompressedSize = h264->width * h264->height * 4;
|
||||
if (UncompressedSize > (nDstStep * nDstHeight))
|
||||
return -1;
|
||||
|
||||
|
||||
gettimeofday(&T1,NULL);
|
||||
for (i = 0; i < numRegionRects; i++){
|
||||
rect = &(regionRects[i]);
|
||||
cx = rect->right - rect->left;
|
||||
cy = rect->bottom - rect->top;
|
||||
|
||||
pDstPoint = pDstData + rect->top * nDstStep + rect->left * 4;
|
||||
pYUVPoint[0] = pYUVData[0] + rect->top * iStride[0] + rect->left;
|
||||
|
||||
ret = rect->top/2 * iStride[1] + rect->left/2;
|
||||
pYUVPoint[1] = pYUVData[1] + ret;
|
||||
pYUVPoint[2] = pYUVData[2] + ret;
|
||||
|
||||
#if 1
|
||||
printf("regionRect: x: %d, y: %d, cx: %d, cy: %d\n",
|
||||
rect->left, rect->top, cx, cy);
|
||||
#endif
|
||||
|
||||
#ifdef WITH_H264_SSSE3
|
||||
freerdp_image_yuv420p_to_xrgb(pDstPoint, pYUVPoint, cx, cy, iStride, nDstStep);
|
||||
#endif
|
||||
}
|
||||
gettimeofday(&T2,NULL);
|
||||
printf("converting took %u sec %u usec\n",(unsigned int)(T2.tv_sec-T1.tv_sec),(unsigned int)(T2.tv_usec-T1.tv_usec));
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -737,7 +775,7 @@ H264_CONTEXT* h264_context_new(BOOL Compressor)
|
|||
|
||||
#ifdef WITH_H264_SSSE3
|
||||
if(check_ssse3()){
|
||||
printf("SSSE3 seems to be not supported on this system, try without WITH_H264_ASM ...");
|
||||
printf("SSSE3 seems to be not supported on this system, try without WITH_H264_SSSE3 ...");
|
||||
return FALSE;
|
||||
}
|
||||
#endif
|
||||
|
@ -772,14 +810,13 @@ void h264_context_free(H264_CONTEXT* h264)
|
|||
{
|
||||
if (h264)
|
||||
{
|
||||
_aligne_free(h264->data);
|
||||
|
||||
#ifdef WITH_OPENH264
|
||||
openh264_free(h264);
|
||||
#endif
|
||||
|
||||
#ifdef WITH_LIBAVCODEC
|
||||
libavcodec_free(h264);
|
||||
_aligned_free(h264->data);
|
||||
#endif
|
||||
|
||||
free(h264);
|
||||
|
|
|
@ -73,7 +73,7 @@ freerdp_image_yuv420p_to_xrgb:
|
|||
mov ebp,esp
|
||||
|
||||
;"local variables"
|
||||
sub esp,318 ;res 8 -8,res 8 -16,res 8 -24,U 8 -32,nWidth 2 -34,nHeight 2 -36,iStride0 2 -38,iStride1 2 -40,last_column 1 -41,res 1 -42,G 16 -58,B 16 -74,
|
||||
sub esp,318 ;res 8 -8,res 8 -16,res 8 -24,U 8 -32,nWidth 2 -34,nHeight 2 -36,iStride0 2 -38,iStride1 2 -40,last_line 1 -41,res 1 -42,G 16 -58,B 16 -74,
|
||||
;R 16 -90,add:128 16 -106,sub:128 16 -122,mul:48 16 -138,mul:475 16 -154,mul:403 16 -170,mul:120 16 -186,VaddY 4 -190,VaddUV 4 -194,stack offset 8 -202,
|
||||
;cmp:255 16 -218,cmp:0 16 -234,shuflleR 16 -250,andG 16 -266,shuffleB 16 -280,shuffleY 16 -296,shuffleUV 16 -314,scanline 4 -318
|
||||
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
; a entire function for converting YUV420p data to the RGB format (without any special upconverting)
|
||||
; function for converting YUV420p data to the RGB format (but without any special upconverting)
|
||||
; It's completely written in nasm-x86-assembly for intel processors supporting SSSE3 and higher.
|
||||
; Restrictions are that output data has to be aligned to 16 byte (a question of REAL performance!)
|
||||
; and the width of resolution must be divisable by four.
|
||||
; The target scanline (6th parameter) must be a multiple of 16.
|
||||
; iStride[0] must be (target scanline) / 4 or bigger and iStride[1] the next multiple of four
|
||||
; of the half of iStride[0] or bigger
|
||||
;
|
||||
section .text
|
||||
global check_ssse3
|
||||
|
@ -48,7 +49,7 @@ check_ssse3_end:
|
|||
ret
|
||||
|
||||
|
||||
;extern int freerdp_image_yuv420p_to_xrgb(unsigned char *pDstData,unsigned char **pSrcData,int nWidth,int nHeight,int istride0,int istride1)
|
||||
;extern int freerdp_image_yuv420p_to_xrgb(unsigned char *pDstData,unsigned char **pSrcData,int nWidth,int nHeight,int *istride,int scanline)
|
||||
global freerdp_image_yuv420p_to_xrgb
|
||||
freerdp_image_yuv420p_to_xrgb:
|
||||
push rbx
|
||||
|
@ -79,11 +80,13 @@ freerdp_image_yuv420p_to_xrgb:
|
|||
xor r14,r14
|
||||
|
||||
;"local variables"
|
||||
sub rsp,316 ;pDstData 8 -8,Y 8 -16,U 8 -24,V 8 -32,nWidth 2 -34,nHeight 2 -36,iStride0 2 -38,iStride1 2 -40,last_column 1 -41,res 1 -42,G 16 -58,B 16 -74,
|
||||
;R 16 -90,add:128 16 -106,sub:128 16 -122,mul:48 16 -138,mul:475 16 -154,mul:403 16 -170,mul:120 16 -186,VaddY 2 -188,VaddUV 2 -190,res 12 -202,cmp:255 16 -218,
|
||||
;cmp:0 16 -234,shuflleR 16 -250,andG 16 -266,shuffleB 16 -280,shuffleY 16 -296,shuffleUV 16 -314,scanline 2 -316
|
||||
sub rsp,338 ;pDstData 8 -8,Y 8 -16,U 8 -24,V 8 -32,nWidth 2 -34,nHeight 2 -36,iStride0 2 -38,iStride1 2 -40,last_line 1 -41,last_column 1 -42,
|
||||
;G 16 -58,B 16 -74,R 16 -90,add:128 16 -106,sub:128 16 -122,mul:48 16 -138,mul:475 16 -154,mul:403 16 -170,mul:120 16 -186,VaddY 2 -188,VaddUV 2 -190,
|
||||
;res 12 -202,cmp:255 16 -218,cmp:0 16 -234,shuflleR 16 -250,andG 16 -266,shuffleB 16 -280,shuffleY 16 -296,shuffleUV 16 -314,andRemainingColumns 16 -330,
|
||||
;VddDst 8 -338
|
||||
|
||||
;last_line: if the last (U,V doubled) line should be skipped, set to 1B
|
||||
;last_line: if the last (U,V doubled) line should be skipped, set to 10B
|
||||
;last_column: if it's the last column in a line, set to 10B (for handling line-endings not multiple by four)
|
||||
|
||||
mov [rbp-8],rdi
|
||||
|
||||
|
@ -97,28 +100,46 @@ freerdp_image_yuv420p_to_xrgb:
|
|||
mov [rbp-34],dx
|
||||
mov r13w,cx
|
||||
|
||||
and r8,0FFFFH
|
||||
mov [rbp-38],r8w
|
||||
and r9,0FFFFH
|
||||
mov [rbp-40],r9w
|
||||
mov r10w,r9w
|
||||
and r10,0FFFFH
|
||||
|
||||
|
||||
shl r8w,1
|
||||
sub r8w,dx
|
||||
mov r11w,r8w
|
||||
mov ecx,[r8]
|
||||
mov [rbp-38],ecx
|
||||
mov r12d,[r8+4]
|
||||
mov [rbp-40],r12w
|
||||
|
||||
mov r10w,dx
|
||||
shr dx,1
|
||||
sub r9w,dx
|
||||
mov r12w,r9w
|
||||
|
||||
mov [rbp-42],dl
|
||||
and byte [rbp-42],11B
|
||||
|
||||
|
||||
mov [rbp-338],r10
|
||||
shr word [rbp-338],1
|
||||
shl cx,1
|
||||
|
||||
mov r8w,[rbp-34]
|
||||
shr r8w,2
|
||||
shl r10w,2
|
||||
add r8w,3
|
||||
and r8w, 0FFFCH
|
||||
|
||||
sub [rbp-338],r8w
|
||||
sub cx,r8w
|
||||
|
||||
shr r8w,1
|
||||
|
||||
mov dx,r8w
|
||||
add dx,2
|
||||
and dx,0FFFCH
|
||||
sub r12w,dx
|
||||
|
||||
shl dword [rbp-338],2
|
||||
mov r11w,cx
|
||||
|
||||
shr r8w,1
|
||||
|
||||
mov r9w,[rbp-38]
|
||||
|
||||
|
||||
;and al,11B
|
||||
;jz no_column_rest
|
||||
|
||||
|
@ -238,11 +259,40 @@ freerdp_image_yuv420p_to_xrgb:
|
|||
mov eax,80038003H
|
||||
mov [rbp-302],eax
|
||||
|
||||
;remaining columns and mask
|
||||
cmp byte [rbp-42],0
|
||||
je freerdp_image_yuv420p_to_xrgb_no_columns_remain
|
||||
|
||||
mov dl,[rbp-42]
|
||||
xor ebx,ebx
|
||||
xor ecx,ecx
|
||||
xor esi,esi
|
||||
|
||||
mov eax,0FFFFFFFFH
|
||||
cmp dl,1H
|
||||
je freerdp_image_yuv420p_to_xrgb_write_columns_remain
|
||||
|
||||
mov ebx,0FFFFFFFFH
|
||||
cmp dl,2H
|
||||
je freerdp_image_yuv420p_to_xrgb_write_columns_remain
|
||||
|
||||
mov ecx,0FFFFFFFFH
|
||||
|
||||
freerdp_image_yuv420p_to_xrgb_write_columns_remain:
|
||||
mov [rbp-330],eax
|
||||
mov [rbp-326],ebx
|
||||
mov [rbp-322],ecx
|
||||
mov [rbp-318],esi
|
||||
mov byte [rbp-42],1
|
||||
|
||||
freerdp_image_yuv420p_to_xrgb_no_columns_remain:
|
||||
|
||||
|
||||
mov rsi,[rbp-16]
|
||||
mov rax,[rbp-24]
|
||||
mov rbx,[rbp-32]
|
||||
|
||||
;jmp freerdp_image_yuv420p_to_xrgb_end
|
||||
|
||||
freerdp_image_yuv420p_to_xrgb_hloop:
|
||||
dec r13w
|
||||
|
@ -254,7 +304,7 @@ not_last_line:
|
|||
|
||||
xor cx,cx
|
||||
freerdp_image_yuv420p_to_xrgb_wloop:
|
||||
;main loop
|
||||
; Well, in the end it should look like this:
|
||||
; C = Y;
|
||||
; D = U - 128;
|
||||
; E = V - 128;
|
||||
|
@ -264,21 +314,31 @@ freerdp_image_yuv420p_to_xrgb_wloop:
|
|||
; B = clip(( 256 * C + 475 * D + 128) >> 8);
|
||||
|
||||
test cx,1B
|
||||
jnz load_yuv_data
|
||||
jnz freerdp_image_yuv420p_to_xrgb_load_yuv_data
|
||||
|
||||
|
||||
;prepare U data
|
||||
; Y-, U- and V-data is stored in different arrays.
|
||||
; We start with processing U-data.
|
||||
|
||||
; at first we fetch four U-values from its array and shuffle them like this:
|
||||
; 0d0d 0c0c 0b0b 0a0a
|
||||
; we've done two things: converting the values to signed words and duplicating
|
||||
; each value, because always two pixel "share" the same U- (and V-) data
|
||||
movd xmm0,[rax]
|
||||
movdqa xmm5,[rbp-314]
|
||||
pshufb xmm0,xmm5 ;but this is the omest instruction of all!!
|
||||
pshufb xmm0,xmm5 ;but this is the awesomest instruction of all!!
|
||||
|
||||
add rax,4
|
||||
|
||||
; then we subtract 128 from each value, so we get D
|
||||
movdqa xmm3,[rbp-122]
|
||||
psubsw xmm0,xmm3
|
||||
|
||||
; we need to do two things with our D, so let's store it for later use
|
||||
movdqa xmm2,xmm0
|
||||
|
||||
; now we can multiply our D with 48 and unpack it to xmm4:xmm0
|
||||
; this is what we need to get G data later on
|
||||
movdqa xmm4,xmm0
|
||||
movdqa xmm7,[rbp-138]
|
||||
pmullw xmm0,xmm7
|
||||
|
@ -289,11 +349,16 @@ freerdp_image_yuv420p_to_xrgb_wloop:
|
|||
punpckhwd xmm7,xmm4
|
||||
movdqa xmm4,xmm7
|
||||
|
||||
; to complete this step, add (?) 128 to each value (rounding ?!)
|
||||
; yeah, add. in the end this will be subtracted from something,
|
||||
; because it's part of G: 256*C - (48*D + 120*E - 128), 48*D-128 !
|
||||
; by the way, our values have become signed dwords during multiplication!
|
||||
movdqa xmm6,[rbp-106]
|
||||
psubd xmm0,xmm6
|
||||
psubd xmm4,xmm6
|
||||
|
||||
|
||||
; to get B data, we need to prepare a secound value, D*475+128
|
||||
movdqa xmm1,xmm2
|
||||
movdqa xmm7,[rbp-154]
|
||||
pmullw xmm1,xmm7
|
||||
|
@ -306,10 +371,14 @@ freerdp_image_yuv420p_to_xrgb_wloop:
|
|||
paddd xmm1,xmm6
|
||||
paddd xmm7,xmm6
|
||||
|
||||
; so we got something like this: xmm7:xmm1
|
||||
; this pair contains values for 16 pixel:
|
||||
; aabbccdd
|
||||
; aabbccdd, but we can only work on four pixel at once, so we need to save upper values
|
||||
movdqa [rbp-74],xmm7
|
||||
|
||||
|
||||
;prepare V data
|
||||
; Now we've prepared U-data. Preparing V-data is actually the same, just with other coefficients.
|
||||
movd xmm2,[rbx]
|
||||
pshufb xmm2,xmm5
|
||||
|
||||
|
@ -319,6 +388,7 @@ freerdp_image_yuv420p_to_xrgb_wloop:
|
|||
|
||||
movdqa xmm5,xmm2
|
||||
|
||||
; this is also known as E*403+128, we need it to convert R data
|
||||
movdqa xmm3,xmm2
|
||||
movdqa xmm7,[rbp-170]
|
||||
pmullw xmm2,xmm7
|
||||
|
@ -331,9 +401,11 @@ freerdp_image_yuv420p_to_xrgb_wloop:
|
|||
paddd xmm2,xmm6
|
||||
paddd xmm7,xmm6
|
||||
|
||||
; and preserve upper four values for future ...
|
||||
movdqa [rbp-90],xmm7
|
||||
|
||||
|
||||
; doing this step: E*120
|
||||
movdqa xmm3,xmm5
|
||||
movdqa xmm7,[rbp-186]
|
||||
pmullw xmm3,xmm7
|
||||
|
@ -343,59 +415,128 @@ freerdp_image_yuv420p_to_xrgb_wloop:
|
|||
punpcklwd xmm3,xmm5
|
||||
punpckhwd xmm7,xmm5
|
||||
|
||||
; now we complete what we've begun above:
|
||||
; (48*D-128) + (120*E) = (48*D +120*E -128)
|
||||
paddd xmm0,xmm3
|
||||
paddd xmm4,xmm7
|
||||
|
||||
; and store to memory !
|
||||
movdqa [rbp-58],xmm4
|
||||
|
||||
jmp valid_yuv_data
|
||||
; real assembly programmers do not only produce best results between 0 and 5 o'clock,
|
||||
; but are also kangaroos!
|
||||
jmp freerdp_image_yuv420p_to_xrgb_valid_yuv_data
|
||||
|
||||
load_yuv_data:
|
||||
freerdp_image_yuv420p_to_xrgb_load_yuv_data:
|
||||
; maybe you've wondered about the conditional jump to this label above ?
|
||||
; Well, we prepared UV data for eight pixel in each line, but can only process four
|
||||
; per loop. So we need to load the upper four pixel data from memory each secound loop!
|
||||
movdqa xmm1,[rbp-74]
|
||||
movdqa xmm2,[rbp-90]
|
||||
movdqa xmm0,[rbp-58]
|
||||
|
||||
valid_yuv_data:
|
||||
freerdp_image_yuv420p_to_xrgb_valid_yuv_data:
|
||||
|
||||
inc cx
|
||||
cmp cx,r8w
|
||||
jne freerdp_image_yuv420p_to_xrgb_not_last_columns
|
||||
|
||||
shl byte [rbp-42],1
|
||||
|
||||
|
||||
;Y data processing
|
||||
freerdp_image_yuv420p_to_xrgb_not_last_columns:
|
||||
|
||||
; We didn't produce any output yet, so let's do so!
|
||||
; Ok, fetch four pixel from the Y-data array and shuffle them like this:
|
||||
; 00d0 00c0 00b0 00a0, to get signed dwords and multiply by 256
|
||||
movd xmm4,[rsi]
|
||||
pshufb xmm4,[rbp-298]
|
||||
|
||||
movdqa xmm5,xmm4
|
||||
movdqa xmm6,xmm4
|
||||
|
||||
; no we can perform the "real" conversion itself and produce output!
|
||||
paddd xmm4,xmm2
|
||||
psubd xmm5,xmm0
|
||||
paddd xmm6,xmm1
|
||||
|
||||
; in the end, we only need bytes for RGB values.
|
||||
; So, what do we do? right! shifting left makes values bigger and thats always good.
|
||||
; before we had dwords of data, and by shifting left and treating the result
|
||||
; as packed words, we get not only signed words, but do also divide by 256
|
||||
; imagine, data is now ordered this way: ddx0 ccx0 bbx0 aax0, and x is the least
|
||||
; significant byte, that we don't need anymore, because we've done some rounding
|
||||
pslld xmm4,8
|
||||
pslld xmm5,8
|
||||
pslld xmm6,8
|
||||
|
||||
; one thing we still have to face is the clip() function ...
|
||||
; we have still signed words, and there are those min/max instructions in SSE2 ...
|
||||
; the max instruction takes always the bigger of the two operands and stores it in the first one,
|
||||
; and it operates with signs !
|
||||
; if we feed it with our values and zeros, it takes the zeros if our values are smaller than
|
||||
; zero and otherwise our values
|
||||
movdqa xmm7,[rbp-234]
|
||||
pmaxsw xmm4,xmm7 ;what an awesome instruction!
|
||||
pmaxsw xmm5,xmm7
|
||||
pmaxsw xmm6,xmm7
|
||||
|
||||
; the same thing just completely different can be used to limit our values to 255,
|
||||
; but now using the min instruction and 255s
|
||||
movdqa xmm7,[rbp-218]
|
||||
pminsw xmm4,xmm7
|
||||
pminsw xmm5,xmm7
|
||||
pminsw xmm6,xmm7
|
||||
|
||||
; Now we got our bytes.
|
||||
; the moment has come to assemble the three channels R,G and B to the xrgb dwords
|
||||
; on Red channel we just have to and each futural dword with 00FF0000H
|
||||
pand xmm4,[rbp-250]
|
||||
; on Green channel we have to shuffle somehow, so we get something like this:
|
||||
; 00d0 00c0 00b0 00a0
|
||||
pshufb xmm5,[rbp-266]
|
||||
; and on Blue channel that one:
|
||||
; 000d 000c 000b 000a
|
||||
pshufb xmm6,[rbp-282]
|
||||
|
||||
; and at last we or it together and get this one:
|
||||
; xrgb xrgb xrgb xrgb
|
||||
por xmm4,xmm5
|
||||
por xmm4,xmm6
|
||||
|
||||
movdqa [rdi],xmm4
|
||||
; Only thing to do know is writing data to memory, but this gets a bit more
|
||||
; complicated if the width is not a multiple of four and it is the last column in line.
|
||||
; but otherwise just play the kangaroo
|
||||
test byte [rbp-42],2
|
||||
je freerdp_image_yuv420p_to_xrgb_column_process_complete
|
||||
|
||||
; let's say, we need to only convert six pixel in width
|
||||
; Ok, the first 4 pixel will be converted just like every 4 pixel else, but
|
||||
; if it's the last loop in line, [rbp-42] is shifted left by one (curious? have a look above),
|
||||
; and we land here. Through initialisation a mask was prepared. In this case it looks like
|
||||
; 0000FFFFH 0000FFFFH 0000FFFFH 0000FFFFH
|
||||
movdqa xmm6,[rbp-330]
|
||||
; we and our output data with this mask to get only the valid pixel
|
||||
pand xmm4,xmm6
|
||||
; then we fetch memory from the destination array ...
|
||||
movdqu xmm5,[rdi]
|
||||
; ... and and it with the inverse mask. We get only those pixel, which should not be updated
|
||||
pandn xmm6,xmm5
|
||||
; we only have to or the two values together and write it back to the destination array,
|
||||
; and only the pixel that should be updated really get changed.
|
||||
por xmm4,xmm6
|
||||
|
||||
freerdp_image_yuv420p_to_xrgb_column_process_complete:
|
||||
movdqu [rdi],xmm4
|
||||
|
||||
|
||||
;Y data processing in secound line
|
||||
; Because UV data is the same for two lines, we can process the secound line just here,
|
||||
; in the same loop. Only thing we need to do is to add some offsets to the Y- and destination
|
||||
; pointer. These offsets are iStride[0] and the target scanline.
|
||||
; But if we don't need to process the secound line, like if we are in the last line of processing nine lines,
|
||||
; we just skip all this.
|
||||
test r14b,2
|
||||
jnz skip_last_line1
|
||||
jnz freerdp_yuv420p_to_xrgb_skip_last_line
|
||||
|
||||
movd xmm4,[rsi+r9]
|
||||
pshufb xmm4,[rbp-298]
|
||||
|
@ -429,21 +570,46 @@ valid_yuv_data:
|
|||
por xmm4,xmm5
|
||||
por xmm4,xmm6
|
||||
|
||||
movdqa [rdi+r10],xmm4
|
||||
test byte [rbp-42],2
|
||||
je freerdp_image_yuv420p_to_xrgb_column_process_complete2
|
||||
|
||||
skip_last_line1:
|
||||
movdqa xmm6,[rbp-330]
|
||||
pand xmm4,xmm6
|
||||
movdqu xmm5,[rdi+r10]
|
||||
pandn xmm6,xmm5
|
||||
por xmm4,xmm6
|
||||
|
||||
; only thing is, we should shift [rbp-42] back here, because we have processed the last column,
|
||||
; and this "special condition" can be released
|
||||
shr byte [rbp-42],1
|
||||
|
||||
freerdp_image_yuv420p_to_xrgb_column_process_complete2:
|
||||
movdqu [rdi+r10],xmm4
|
||||
|
||||
|
||||
freerdp_yuv420p_to_xrgb_skip_last_line:
|
||||
; after all we have to increase the destination- and Y-data pointer by four pixel
|
||||
add rdi,16
|
||||
add rsi,4
|
||||
|
||||
inc cx
|
||||
cmp cx,r8w
|
||||
jne freerdp_image_yuv420p_to_xrgb_wloop
|
||||
|
||||
freerdp_image_yuv420p_to_xrgb_wloop_end:
|
||||
add rdi,r10
|
||||
; after each line we have to add the scanline to the destination pointer, because
|
||||
; we are processing two lines at once, but only increasing the destination pointer
|
||||
; in the first line. Well, we only have one pointer, so it's the easiest way to access
|
||||
; the secound line with the one pointer and an offset (scanline)
|
||||
; if we're not converting the full width of the scanline, like only 64 pixel, but the
|
||||
; output buffer was "designed" for 1920p HD, we have to add the remaining length for each line,
|
||||
; to get into the next line.
|
||||
add rdi,[rbp-338]
|
||||
|
||||
; same thing has to be done for Y-data, but with iStride[0] instead of the target scanline
|
||||
add rsi,r11
|
||||
|
||||
; and again for UV data, but here it's enough to add the remaining length, because
|
||||
; UV data is the same for two lines and there exists only one "UV line" on two "real lines"
|
||||
add rax,r12
|
||||
add rbx,r12
|
||||
;mov eax,r12d
|
||||
|
|
|
@ -67,14 +67,17 @@ YUV_to_RGB_asm31:
|
|||
|
||||
ret
|
||||
|
||||
;extern int freerdp_image_yuv_to_xrgb_asm(unsigned char *pDstData,unsigned char **pSrcData,int nWidth,int nHeight);
|
||||
;extern int freerdp_image_yuv_to_xrgb_asm(unsigned char *pDstData,unsigned char **pSrcData,int nWidth,int nHeight,int *istride,int scanline);
|
||||
global freerdp_image_yuv_to_xrgb_asm
|
||||
freerdp_image_yuv_to_xrgb_asm:
|
||||
push rbx
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
;cWidth: cx
|
||||
sub rsp,72 ;pDstData,pSrcData[3],nWidth,nHeight,cHeight,scanline,iStride[1]
|
||||
push rbx
|
||||
sub rsp,82 ;pDstData -8,pSrcData[3] -32,nWidth -40,nHeight -48,cHeight -56,scanline -64,iStride[0] -72,VaddDst -80,last_column 1 -81,last_line 1 -82
|
||||
|
||||
;last_column: set to 10B, if last column should be skipped ('cause UV data is the same for two columns and two columns are processed at once)
|
||||
;last_line: set to 10B, if last line should be skipped ('cause UV data is the same for two lines and two lines are processed at once)
|
||||
|
||||
|
||||
mov [rbp-8],rdi
|
||||
|
@ -86,126 +89,160 @@ freerdp_image_yuv_to_xrgb_asm:
|
|||
mov rax,[rsi+16]
|
||||
mov [rbp-32],rax
|
||||
|
||||
mov [rbp-40],rdx
|
||||
and rdx,0FFFFH
|
||||
;mov [rbp-40],rdx
|
||||
|
||||
|
||||
shr rcx,1 ;/2
|
||||
mov [rbp-48],rcx
|
||||
|
||||
|
||||
shl rdx,2
|
||||
mov [rbp-64],rdx
|
||||
and r9,0FFFFH
|
||||
mov [rbp-64],r9
|
||||
|
||||
shr r9d,1
|
||||
sub r9d,edx
|
||||
shl r9d,2
|
||||
mov [rbp-80],r9
|
||||
|
||||
|
||||
mov rax,[rbp-48]
|
||||
mov [rbp-56],rax
|
||||
|
||||
|
||||
mov [rbp-72],r8
|
||||
mov rax,[rbp-40]
|
||||
mov rcx,[r8]
|
||||
and rcx,0FFFFH
|
||||
mov [rbp-72],rcx
|
||||
shl dword [rbp-72],1
|
||||
sub [rbp-72],rax
|
||||
sub [rbp-72],rdx
|
||||
|
||||
mov r9,[r8+4]
|
||||
mov r8,rcx
|
||||
|
||||
and r9,0FFFFH
|
||||
shr rax,1
|
||||
sub r9,rax
|
||||
|
||||
|
||||
mov al,dl
|
||||
and al,1B
|
||||
mov [rbp-81],al
|
||||
inc dx
|
||||
shr edx,1
|
||||
mov [rbp-40],rdx
|
||||
|
||||
freerdp_image_yuv_to_xrgb_asm_loopH:
|
||||
mov rcx,[rbp-40]
|
||||
shr rcx,1
|
||||
mov cx,[rbp-40]
|
||||
|
||||
|
||||
freerdp_image_yuv_to_xrgb_asm_loopW:
|
||||
mov rax,[rbp-16]
|
||||
mov edi,[rax]
|
||||
and edi,0xFF
|
||||
|
||||
mov rax,[rbp-24]
|
||||
mov esi,[rax]
|
||||
and esi,0xFF
|
||||
|
||||
mov rax,[rbp-32]
|
||||
mov edx,[rax]
|
||||
and edx,0xFF
|
||||
|
||||
call YUV_to_RGB_asm
|
||||
|
||||
mov rbx,[rbp-8]
|
||||
mov [rbx],eax
|
||||
|
||||
|
||||
mov rax,[rbp-16]
|
||||
mov edi,[rax+r8]
|
||||
inc rax
|
||||
mov [rbp-16],rax
|
||||
and edi,0xFF
|
||||
|
||||
mov rax,[rbp-24]
|
||||
mov esi,[rax]
|
||||
and esi,0xFF
|
||||
|
||||
mov rax,[rbp-32]
|
||||
mov edx,[rax]
|
||||
and edx,0xFF
|
||||
|
||||
call YUV_to_RGB_asm
|
||||
|
||||
mov rbx,[rbp-8]
|
||||
mov rdx,[rbp-64]
|
||||
mov [rbx+rdx],eax
|
||||
add rbx,4
|
||||
mov [rbp-8],rbx
|
||||
|
||||
|
||||
mov rax,[rbp-16]
|
||||
mov edi,[rax]
|
||||
and edi,0xFF
|
||||
|
||||
mov rax,[rbp-24]
|
||||
mov esi,[rax]
|
||||
and esi,0xFF
|
||||
|
||||
mov rax,[rbp-32]
|
||||
mov edx,[rax]
|
||||
and edx,0xFF
|
||||
|
||||
call YUV_to_RGB_asm
|
||||
|
||||
mov rbx,[rbp-8]
|
||||
mov [rbx],eax
|
||||
|
||||
|
||||
mov rax,[rbp-16]
|
||||
mov edi,[rax+r8]
|
||||
inc rax
|
||||
mov [rbp-16],rax
|
||||
and edi,0xFF
|
||||
|
||||
mov rax,[rbp-24]
|
||||
mov esi,[rax]
|
||||
inc rax
|
||||
mov [rbp-24],rax
|
||||
and esi,0xFF
|
||||
|
||||
mov rax,[rbp-32]
|
||||
mov edx,[rax]
|
||||
inc rax
|
||||
mov [rbp-32],rax
|
||||
and edx,0xFF
|
||||
|
||||
call YUV_to_RGB_asm
|
||||
|
||||
mov rbx,[rbp-8]
|
||||
mov rdx,[rbp-64]
|
||||
mov [rbx+rdx],eax
|
||||
add rbx,4
|
||||
mov [rbp-8],rbx
|
||||
|
||||
dec cx
|
||||
jne freerdp_image_yuv_to_xrgb_asm_not_last_column
|
||||
|
||||
shl byte [rbp-81],1
|
||||
|
||||
freerdp_image_yuv_to_xrgb_asm_not_last_column:
|
||||
|
||||
|
||||
mov rax,[rbp-16]
|
||||
mov edi,[rax]
|
||||
and edi,0xFF
|
||||
|
||||
mov rax,[rbp-24]
|
||||
mov esi,[rax]
|
||||
and esi,0xFF
|
||||
|
||||
mov rax,[rbp-32]
|
||||
mov edx,[rax]
|
||||
and edx,0xFF
|
||||
|
||||
call YUV_to_RGB_asm
|
||||
|
||||
mov rbx,[rbp-8]
|
||||
mov [rbx],eax
|
||||
|
||||
|
||||
test byte [rbp-81],2
|
||||
jne freerdp_image_yuv_to_xrgb_asm_skip_last_column
|
||||
|
||||
mov rax,[rbp-16]
|
||||
mov edi,[rax+r8]
|
||||
and edi,0xFF
|
||||
|
||||
mov rax,[rbp-24]
|
||||
mov esi,[rax]
|
||||
and esi,0xFF
|
||||
|
||||
mov rax,[rbp-32]
|
||||
mov edx,[rax]
|
||||
and edx,0xFF
|
||||
|
||||
call YUV_to_RGB_asm
|
||||
|
||||
mov rbx,[rbp-8]
|
||||
mov rdx,[rbp-64]
|
||||
mov [rbx+rdx],eax
|
||||
|
||||
freerdp_image_yuv_to_xrgb_asm_skip_last_column:
|
||||
add qword [rbp-8],4
|
||||
inc qword [rbp-16]
|
||||
|
||||
|
||||
mov rax,[rbp-16]
|
||||
mov edi,[rax]
|
||||
and edi,0xFF
|
||||
|
||||
mov rax,[rbp-24]
|
||||
mov esi,[rax]
|
||||
and esi,0xFF
|
||||
|
||||
mov rax,[rbp-32]
|
||||
mov edx,[rax]
|
||||
and edx,0xFF
|
||||
|
||||
call YUV_to_RGB_asm
|
||||
|
||||
mov rbx,[rbp-8]
|
||||
mov [rbx],eax
|
||||
|
||||
|
||||
test byte [rbp-81],2
|
||||
jne freerdp_image_yuv_to_xrgb_asm_skip_last_column2
|
||||
|
||||
mov rax,[rbp-16]
|
||||
mov edi,[rax+r8]
|
||||
and edi,0xFF
|
||||
|
||||
mov rax,[rbp-24]
|
||||
mov esi,[rax]
|
||||
and esi,0xFF
|
||||
|
||||
mov rax,[rbp-32]
|
||||
mov edx,[rax]
|
||||
and edx,0xFF
|
||||
|
||||
call YUV_to_RGB_asm
|
||||
|
||||
;shr [rbp-81],1
|
||||
|
||||
mov rbx,[rbp-8]
|
||||
mov rdx,[rbp-64]
|
||||
mov [rbx+rdx],eax
|
||||
|
||||
freerdp_image_yuv_to_xrgb_asm_skip_last_column2:
|
||||
add qword [rbp-8],4
|
||||
inc qword [rbp-16]
|
||||
inc qword [rbp-24]
|
||||
inc qword [rbp-32]
|
||||
|
||||
|
||||
test cx,0FFFFH
|
||||
jne freerdp_image_yuv_to_xrgb_asm_loopW
|
||||
jmp END
|
||||
|
||||
|
||||
mov rax,[rbp-8]
|
||||
add rax,[rbp-64]
|
||||
add rax,[rbp-80]
|
||||
mov [rbp-8],rax
|
||||
|
||||
mov rax,[rbp-16]
|
||||
|
@ -226,7 +263,7 @@ freerdp_image_yuv_to_xrgb_asm_loopW:
|
|||
;END
|
||||
mov rax,0
|
||||
END:
|
||||
pop rbx
|
||||
mov rsp,rbp
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
|
@ -19,7 +19,7 @@ int main(void){
|
|||
int nSrcStep[2];
|
||||
|
||||
#if SSSE3
|
||||
if(check_ssse3()){
|
||||
if(freerdp_check_ssse3()){
|
||||
fprintf(stderr,"ssse3 not supported!\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
@ -30,8 +30,11 @@ int main(void){
|
|||
pSrcData[0]=malloc(1984*HEIGHT*sizeof(char));
|
||||
pSrcData[1]=malloc(1984*HEIGHT/4*sizeof(char));
|
||||
pSrcData[2]=malloc(1984*HEIGHT/4*sizeof(char));
|
||||
pDstData_asm=_aligned_malloc(WIDTH*HEIGHT*4*sizeof(char),16);
|
||||
pDstData_c=malloc(WIDTH*HEIGHT*4*sizeof(char));
|
||||
pDstData_asm=_aligned_malloc(WIDTH*(HEIGHT+1)*4*sizeof(char),16);
|
||||
pDstData_c=malloc(WIDTH*(HEIGHT+1)*4*sizeof(char));
|
||||
|
||||
memset(pDstData_asm,0xFF,WIDTH*(HEIGHT+1)*4*sizeof(char));
|
||||
memset(pDstData_c,0xFF,WIDTH*(HEIGHT+1)*4*sizeof(char));
|
||||
|
||||
for(i=0;i<WIDTH*HEIGHT;i++){
|
||||
pSrcData[0][i]=i%255;
|
||||
|
@ -44,9 +47,9 @@ int main(void){
|
|||
|
||||
gettimeofday(&t1,NULL);
|
||||
#if SSSE3
|
||||
ret=freerdp_image_yuv420p_to_xrgb(pDstData_asm,pSrcData,WIDTH,HEIGHT,nSrcStep[0],nSrcStep[1]);
|
||||
ret=freerdp_image_yuv420p_to_xrgb_ssse3(pDstData_asm,pSrcData,WIDTH,HEIGHT,nSrcStep,WIDTH*4);
|
||||
#else
|
||||
ret=freerdp_image_yuv_to_xrgb_asm(pDstData_asm,pSrcData,WIDTH,HEIGHT,nSrcStep[0],nSrcStep[1]);
|
||||
ret=freerdp_image_yuv_to_xrgb_asm(pDstData_asm,pSrcData,WIDTH,HEIGHT,nSrcStep,WIDTH*4);
|
||||
#endif
|
||||
gettimeofday(&t2,NULL);
|
||||
freerdp_image_copy_yuv420p_to_xrgb(pDstData_c,WIDTH*4,0,0,WIDTH,HEIGHT,pSrcData,nSrcStep,0,0);
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
int freerdp_image_copy_yuv420p_to_xrgb(unsigned char* pDstData, int nDstStep, int nXDst, int nYDst,
|
||||
int nWidth, int nHeight, unsigned char* pSrcData[3], int nSrcStep[2], int nXSrc, int nYSrc);
|
||||
|
||||
extern int freerdp_image_yuv_to_xrgb_asm(unsigned char *pDstData,unsigned char **pSrcData,int nWidth,int nHeight,int istride0,int istride1);
|
||||
extern int freerdp_image_yuv_to_xrgb_asm(unsigned char *pDstData,unsigned char **pSrcData,int nWidth,int nHeight,int *istride,int scanline);
|
||||
|
||||
extern int check_ssse3();
|
||||
extern int freerdp_image_yuv420p_to_xrgb(unsigned char *pDstData,unsigned char **pSrcData,int nWidth,int nHeight,int istride0,int istride1);
|
||||
extern int freerdp_check_ssse3();
|
||||
extern int freerdp_image_yuv420p_to_xrgb_ssse3(unsigned char *pDstData,unsigned char **pSrcData,int nWidth,int nHeight,int *istride,int scanline);
|
Loading…
Reference in New Issue