OpenH264 YUV data conversion with intel SSSE3 in assembly
This commit is contained in:
parent
a8945306a1
commit
095a7aba99
2
.gitignore
vendored
2
.gitignore
vendored
@ -106,7 +106,7 @@ client/DirectFB/dfreerdp
|
|||||||
server/Sample/sfreerdp-server
|
server/Sample/sfreerdp-server
|
||||||
server/X11/xfreerdp-server
|
server/X11/xfreerdp-server
|
||||||
xcode
|
xcode
|
||||||
libfreerdp/codec/test/TestOpenH264
|
libfreerdp/codec/test/TestOpenH264ASM
|
||||||
|
|
||||||
# Other
|
# Other
|
||||||
*~
|
*~
|
||||||
|
@ -478,7 +478,6 @@ int dvcman_receive_channel_data_first(IWTSVirtualChannelManager* pChannelMgr, UI
|
|||||||
Stream_Release(channel->dvc_data);
|
Stream_Release(channel->dvc_data);
|
||||||
|
|
||||||
channel->dvc_data = StreamPool_Take(channel->dvcman->pool, length);
|
channel->dvc_data = StreamPool_Take(channel->dvcman->pool, length);
|
||||||
//Stream_AddRef(channel->dvc_data);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -488,6 +487,7 @@ int dvcman_receive_channel_data(IWTSVirtualChannelManager* pChannelMgr, UINT32 C
|
|||||||
int status = 0;
|
int status = 0;
|
||||||
DVCMAN_CHANNEL* channel;
|
DVCMAN_CHANNEL* channel;
|
||||||
UINT32 dataSize = Stream_GetRemainingLength(data);
|
UINT32 dataSize = Stream_GetRemainingLength(data);
|
||||||
|
wStream* s;
|
||||||
|
|
||||||
channel = (DVCMAN_CHANNEL*) dvcman_find_channel_by_id(pChannelMgr, ChannelId);
|
channel = (DVCMAN_CHANNEL*) dvcman_find_channel_by_id(pChannelMgr, ChannelId);
|
||||||
|
|
||||||
@ -500,7 +500,6 @@ int dvcman_receive_channel_data(IWTSVirtualChannelManager* pChannelMgr, UINT32 C
|
|||||||
if (channel->dvc_data)
|
if (channel->dvc_data)
|
||||||
{
|
{
|
||||||
/* Fragmented data */
|
/* Fragmented data */
|
||||||
//if (Stream_GetPosition(channel->dvc_data) + dataSize > (UINT32) Stream_Capacity(channel->dvc_data))
|
|
||||||
if (Stream_GetPosition(channel->dvc_data) + dataSize > (UINT32) Stream_Length(channel->dvc_data))
|
if (Stream_GetPosition(channel->dvc_data) + dataSize > (UINT32) Stream_Length(channel->dvc_data))
|
||||||
{
|
{
|
||||||
DEBUG_WARN("data exceeding declared length!");
|
DEBUG_WARN("data exceeding declared length!");
|
||||||
@ -511,14 +510,15 @@ int dvcman_receive_channel_data(IWTSVirtualChannelManager* pChannelMgr, UINT32 C
|
|||||||
|
|
||||||
Stream_Write(channel->dvc_data, Stream_Pointer(data), dataSize);
|
Stream_Write(channel->dvc_data, Stream_Pointer(data), dataSize);
|
||||||
|
|
||||||
//if (((size_t) Stream_GetPosition(channel->dvc_data)) >= Stream_Capacity(channel->dvc_data)-1)
|
|
||||||
if (((size_t) Stream_GetPosition(channel->dvc_data)) >= Stream_Length(channel->dvc_data)-1)
|
if (((size_t) Stream_GetPosition(channel->dvc_data)) >= Stream_Length(channel->dvc_data)-1)
|
||||||
{
|
{
|
||||||
Stream_SealLength(channel->dvc_data);
|
Stream_SealLength(channel->dvc_data);
|
||||||
Stream_SetPosition(channel->dvc_data, 0);
|
Stream_SetPosition(channel->dvc_data, 0);
|
||||||
status = channel->channel_callback->OnDataReceived(channel->channel_callback, channel->dvc_data);
|
s=channel->dvc_data;
|
||||||
Stream_Release(channel->dvc_data);
|
|
||||||
channel->dvc_data = NULL;
|
channel->dvc_data = NULL;
|
||||||
|
|
||||||
|
status = channel->channel_callback->OnDataReceived(channel->channel_callback, s);
|
||||||
|
Stream_Release(s);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -139,7 +139,7 @@ int xf_OutputUpdate(xfContext* xfc)
|
|||||||
int xf_OutputExpose(xfContext* xfc, int x, int y, int width, int height)
|
int xf_OutputExpose(xfContext* xfc, int x, int y, int width, int height)
|
||||||
{
|
{
|
||||||
/** *********************************
|
/** *********************************
|
||||||
* to be improved
|
* to be improved?
|
||||||
* *********************************/
|
* *********************************/
|
||||||
RECTANGLE_16 invalidRect;
|
RECTANGLE_16 invalidRect;
|
||||||
|
|
||||||
@ -366,15 +366,6 @@ int xf_SurfaceCommand_H264(xfContext* xfc, RdpgfxClientContext* context, RDPGFX_
|
|||||||
RDPGFX_H264_METABLOCK* meta;
|
RDPGFX_H264_METABLOCK* meta;
|
||||||
RDPGFX_H264_BITMAP_STREAM* bs;
|
RDPGFX_H264_BITMAP_STREAM* bs;
|
||||||
|
|
||||||
static struct timeval TGES1;
|
|
||||||
struct timeval TGES2,TDEC1,TDEC2;
|
|
||||||
|
|
||||||
TGES2.tv_usec=TGES1.tv_usec;
|
|
||||||
TGES2.tv_sec=TGES1.tv_sec;
|
|
||||||
|
|
||||||
gettimeofday(&TGES1,NULL);
|
|
||||||
printf("time since last xf_SurfaceCommand_H264: %d sec %d usec\n",(int)(TGES1.tv_sec-TGES2.tv_sec),(int)(TGES1.tv_usec-TGES2.tv_usec));
|
|
||||||
|
|
||||||
|
|
||||||
h264 = xfc->h264;
|
h264 = xfc->h264;
|
||||||
|
|
||||||
@ -392,13 +383,14 @@ int xf_SurfaceCommand_H264(xfContext* xfc, RdpgfxClientContext* context, RDPGFX_
|
|||||||
|
|
||||||
DstData = surface->data;
|
DstData = surface->data;
|
||||||
|
|
||||||
gettimeofday(&TDEC1,NULL);
|
|
||||||
status = h264_decompress(xfc->h264, bs->data, bs->length, &DstData,
|
status = h264_decompress(xfc->h264, bs->data, bs->length, &DstData,
|
||||||
PIXEL_FORMAT_XRGB32, surface->scanline, cmd->left, cmd->top, cmd->width, cmd->height);
|
PIXEL_FORMAT_XRGB32, surface->scanline, cmd->left, cmd->top, cmd->width, cmd->height);
|
||||||
gettimeofday(&TDEC2,NULL);
|
|
||||||
//printf("decoding took %d sec %d usec\n",(int)(TDEC2.tv_sec-TDEC1.tv_sec),(int)(TDEC2.tv_usec-TDEC1.tv_usec));
|
|
||||||
|
|
||||||
//printf("xf_SurfaceCommand_H264: status: %d\n", status);
|
if (status < 0)
|
||||||
|
{
|
||||||
|
printf("h264_decompress failure: %d\n",status);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
if (status < 0)
|
if (status < 0)
|
||||||
return -1;
|
return -1;
|
||||||
@ -427,9 +419,6 @@ int xf_SurfaceCommand_H264(xfContext* xfc, RdpgfxClientContext* context, RDPGFX_
|
|||||||
|
|
||||||
updateRects = (RECTANGLE_16*) region16_rects(&updateRegion, &nbUpdateRects);
|
updateRects = (RECTANGLE_16*) region16_rects(&updateRegion, &nbUpdateRects);
|
||||||
|
|
||||||
#if 0
|
|
||||||
printf("numRegionRects: %d nbUpdateRects: %d\n", meta->numRegionRects, nbUpdateRects);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
for (j = 0; j < nbUpdateRects; j++)
|
for (j = 0; j < nbUpdateRects; j++)
|
||||||
{
|
{
|
||||||
@ -439,13 +428,6 @@ int xf_SurfaceCommand_H264(xfContext* xfc, RdpgfxClientContext* context, RDPGFX_
|
|||||||
nHeight = updateRects[j].bottom - updateRects[j].top;
|
nHeight = updateRects[j].bottom - updateRects[j].top;
|
||||||
|
|
||||||
/* update region from decoded H264 buffer */
|
/* update region from decoded H264 buffer */
|
||||||
|
|
||||||
#if 0
|
|
||||||
printf("nXDst: %d nYDst: %d nWidth: %d nHeight: %d decoded: width: %d height: %d cmd: left: %d top: %d right: %d bottom: %d\n",
|
|
||||||
nXDst, nYDst, nWidth, nHeight, h264->width, h264->height,
|
|
||||||
cmd->left, cmd->top, cmd->right, cmd->bottom);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
freerdp_image_copy(surface->data, PIXEL_FORMAT_XRGB32, surface->scanline,
|
freerdp_image_copy(surface->data, PIXEL_FORMAT_XRGB32, surface->scanline,
|
||||||
nXDst, nYDst, nWidth, nHeight,
|
nXDst, nYDst, nWidth, nHeight,
|
||||||
h264->data, PIXEL_FORMAT_XRGB32, h264->scanline, nXDst, nYDst);
|
h264->data, PIXEL_FORMAT_XRGB32, h264->scanline, nXDst, nYDst);
|
||||||
@ -457,19 +439,9 @@ int xf_SurfaceCommand_H264(xfContext* xfc, RdpgfxClientContext* context, RDPGFX_
|
|||||||
region16_uninit(&updateRegion);
|
region16_uninit(&updateRegion);
|
||||||
region16_uninit(&clippingRects);
|
region16_uninit(&clippingRects);
|
||||||
|
|
||||||
#if 0
|
|
||||||
/* fill with red for now to distinguish from the rest */
|
|
||||||
|
|
||||||
freerdp_image_fill(surface->data, PIXEL_FORMAT_XRGB32, surface->scanline,
|
if (!xfc->inGfxFrame)
|
||||||
cmd->left, cmd->top, cmd->width, cmd->height, 0xFF0000);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (!xfc->inGfxFrame){
|
|
||||||
xf_OutputUpdate(xfc);
|
xf_OutputUpdate(xfc);
|
||||||
}
|
|
||||||
|
|
||||||
gettimeofday(&TGES2,NULL);
|
|
||||||
printf("the whole command took %d sec %d usec\n",(int)(TGES2.tv_sec-TGES1.tv_sec),(int)(TGES2.tv_usec-TGES1.tv_usec));
|
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -92,17 +92,44 @@ if(WITH_OPENH264)
|
|||||||
add_definitions(-DWITH_OPENH264)
|
add_definitions(-DWITH_OPENH264)
|
||||||
include_directories(${OPENH264_INCLUDE_DIR})
|
include_directories(${OPENH264_INCLUDE_DIR})
|
||||||
set(FREERDP_OPENH264_LIBS ${OPENH264_LIBRARIES})
|
set(FREERDP_OPENH264_LIBS ${OPENH264_LIBRARIES})
|
||||||
|
|
||||||
|
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||||
|
set(arch64 TRUE)
|
||||||
|
else()
|
||||||
|
set(arch64 FALSE)
|
||||||
|
endif()
|
||||||
|
|
||||||
if(WITH_OPENH264_ASM)
|
if(WITH_OPENH264_ASM)
|
||||||
set(OPENH264_ASM OPENH264_ASM_o)
|
set(OPENH264_ASM OPENH264_ASM_o)
|
||||||
set(OBJ ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${OPENH264_ASM}.dir/h264.asm.o)
|
|
||||||
set(SRC ${CMAKE_CURRENT_SOURCE_DIR}/h264.asm)
|
|
||||||
|
|
||||||
add_definitions(-DWITH_OPENH264_ASM)
|
add_definitions(-DWITH_OPENH264_ASM)
|
||||||
add_custom_target(${OPENH264_ASM})
|
add_custom_target(${OPENH264_ASM})
|
||||||
add_custom_command(TARGET ${OPENH264_ASM}
|
|
||||||
COMMAND nasm ARGS -f elf64 -o ${OBJ} ${SRC}
|
if(arch64)
|
||||||
COMMENT "building H.264 asm objects ...")
|
set(SRC ${CMAKE_CURRENT_SOURCE_DIR}/h264_x64.asm)
|
||||||
|
set(OBJ ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${OPENH264_ASM}.dir/h264_x64.asm.o)
|
||||||
|
add_custom_command(TARGET ${OPENH264_ASM}
|
||||||
|
COMMAND nasm ARGS -f elf64 -o ${OBJ} ${SRC})
|
||||||
|
else()
|
||||||
|
message(FATAL_ERROR "OpenH264 YUV data converting is not implemented in 32 bit assembly yet.")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(FREERDP_OPENH264_LIBS ${OPENH264_LIBRARIES} ${OBJ})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(WITH_OPENH264_SSSE3)
|
||||||
|
set(OPENH264_ASM OPENH264_ASM_o)
|
||||||
|
add_definitions(-DWITH_OPENH264_SSSE3)
|
||||||
|
add_custom_target(${OPENH264_ASM})
|
||||||
|
|
||||||
|
if(arch64)
|
||||||
|
set(SRC ${CMAKE_CURRENT_SOURCE_DIR}/h264_ssse3_x64.asm)
|
||||||
|
set(OBJ ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${OPENH264_ASM}.dir/h264_ssse3_x64.asm.o)
|
||||||
|
add_custom_command(TARGET ${OPENH264_ASM}
|
||||||
|
COMMAND nasm ARGS -f elf64 -o ${OBJ} ${SRC})
|
||||||
|
else()
|
||||||
|
message(FATAL_ERROR "OpenH264 YUV data converting with SSSE3 is not implemented in 32 bit assembly yet.")
|
||||||
|
endif()
|
||||||
|
|
||||||
set(FREERDP_OPENH264_LIBS ${OPENH264_LIBRARIES} ${OBJ})
|
set(FREERDP_OPENH264_LIBS ${OPENH264_LIBRARIES} ${OBJ})
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
@ -144,7 +171,7 @@ else()
|
|||||||
install(TARGETS ${MODULE_NAME} DESTINATION ${CMAKE_INSTALL_LIBDIR} EXPORT FreeRDPTargets)
|
install(TARGETS ${MODULE_NAME} DESTINATION ${CMAKE_INSTALL_LIBDIR} EXPORT FreeRDPTargets)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(WITH_OPENH264_ASM)
|
if(WITH_OPENH264_ASM OR WITH_OPENH264_SSSE3)
|
||||||
add_dependencies(${MODULE_NAME} ${OPENH264_ASM})
|
add_dependencies(${MODULE_NAME} ${OPENH264_ASM})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
@ -1,262 +0,0 @@
|
|||||||
;R=(256*Y+403*(V-128)+128)/265 =(256*Y+403*V-51456)/256
|
|
||||||
;G=(256*Y-48*(U-128)-120*(V-128)+128)/256 =(256*Y-48*U-120*V+21632)/256
|
|
||||||
;B=(256*Y+475*(U-128)+128)/256 =(256*Y+475*U-60672)/256
|
|
||||||
|
|
||||||
section .data
|
|
||||||
dbg1: db "DEBUG1",10
|
|
||||||
dbg2: db "DEBUG2",10
|
|
||||||
dbg3: db "DEBUG3",10
|
|
||||||
dbg4: db "DEBUG4",10
|
|
||||||
dbg equ $-dbg4
|
|
||||||
|
|
||||||
section .bss
|
|
||||||
temp1: resd 1
|
|
||||||
temp2: resd 1
|
|
||||||
temp3: resd 1
|
|
||||||
temp4: resd 1
|
|
||||||
|
|
||||||
section .text
|
|
||||||
extern printf
|
|
||||||
|
|
||||||
;global YUV_to_RGB_asm
|
|
||||||
YUV_to_RGB_asm:
|
|
||||||
shl edi,8
|
|
||||||
|
|
||||||
mov eax,edx
|
|
||||||
imul eax,403
|
|
||||||
mov [temp1],eax
|
|
||||||
add eax,edi
|
|
||||||
sub eax,51456
|
|
||||||
|
|
||||||
jae YUV_to_RGB_asm1
|
|
||||||
mov eax,0
|
|
||||||
jmp YUV_to_RGB_asm11
|
|
||||||
|
|
||||||
YUV_to_RGB_asm1:
|
|
||||||
cmp eax, 0xFFFF
|
|
||||||
jbe YUV_to_RGB_asm11
|
|
||||||
mov eax,0xFF00
|
|
||||||
|
|
||||||
YUV_to_RGB_asm11:
|
|
||||||
and eax,0xFF00
|
|
||||||
shl eax,8
|
|
||||||
|
|
||||||
mov ebx,esi
|
|
||||||
imul ebx,475
|
|
||||||
mov [temp2],ebx
|
|
||||||
add ebx,edi
|
|
||||||
sub ebx,60672
|
|
||||||
|
|
||||||
jae YUV_to_RGB_asm2
|
|
||||||
mov ebx, 0
|
|
||||||
jmp YUV_to_RGB_asm21
|
|
||||||
|
|
||||||
YUV_to_RGB_asm2:
|
|
||||||
cmp ebx,0xFFFF
|
|
||||||
jbe YUV_to_RGB_asm21
|
|
||||||
mov ebx,0xFF00
|
|
||||||
|
|
||||||
YUV_to_RGB_asm21:
|
|
||||||
and ebx,0xFF00
|
|
||||||
shr ebx,8
|
|
||||||
|
|
||||||
imul edx,120
|
|
||||||
mov [temp3],edx
|
|
||||||
sub edi,edx
|
|
||||||
imul esi,48
|
|
||||||
mov [temp4],esi
|
|
||||||
sub edi,esi
|
|
||||||
add edi,21632
|
|
||||||
|
|
||||||
jae YUV_to_RGB_asm3
|
|
||||||
mov edi, 0
|
|
||||||
jmp YUV_to_RGB_asm31
|
|
||||||
|
|
||||||
YUV_to_RGB_asm3:
|
|
||||||
cmp edi,0xFFFF
|
|
||||||
jbe YUV_to_RGB_asm31
|
|
||||||
mov edi, 0xFF00
|
|
||||||
|
|
||||||
YUV_to_RGB_asm31:
|
|
||||||
and edi,0xFF00
|
|
||||||
|
|
||||||
or eax,edi
|
|
||||||
or eax,ebx
|
|
||||||
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
YUV_to_RGB_2asm:
|
|
||||||
shl edi,8
|
|
||||||
|
|
||||||
mov eax,[temp1]
|
|
||||||
add eax,edi
|
|
||||||
sub eax,51456
|
|
||||||
|
|
||||||
jae YUV_to_RGB_2asm1
|
|
||||||
mov eax,0
|
|
||||||
jmp YUV_to_RGB_2asm11
|
|
||||||
|
|
||||||
YUV_to_RGB_2asm1:
|
|
||||||
cmp eax, 0xFFFF
|
|
||||||
jbe YUV_to_RGB_2asm11
|
|
||||||
mov eax,0xFF00
|
|
||||||
|
|
||||||
YUV_to_RGB_2asm11:
|
|
||||||
and eax,0xFF00
|
|
||||||
shl eax,8
|
|
||||||
|
|
||||||
mov ebx,[temp2]
|
|
||||||
add ebx,edi
|
|
||||||
sub ebx,60672
|
|
||||||
|
|
||||||
jae YUV_to_RGB_2asm2
|
|
||||||
mov ebx, 0
|
|
||||||
jmp YUV_to_RGB_2asm21
|
|
||||||
|
|
||||||
YUV_to_RGB_2asm2:
|
|
||||||
cmp ebx,0xFFFF
|
|
||||||
jbe YUV_to_RGB_2asm21
|
|
||||||
mov ebx,0xFF00
|
|
||||||
|
|
||||||
YUV_to_RGB_2asm21:
|
|
||||||
and ebx,0xFF00
|
|
||||||
shr ebx,8
|
|
||||||
|
|
||||||
sub edi,[temp3]
|
|
||||||
sub edi,[temp4]
|
|
||||||
add edi,21632
|
|
||||||
|
|
||||||
jae YUV_to_RGB_2asm3
|
|
||||||
mov edi, 0
|
|
||||||
jmp YUV_to_RGB_2asm31
|
|
||||||
|
|
||||||
YUV_to_RGB_2asm3:
|
|
||||||
cmp edi,0xFFFF
|
|
||||||
jbe YUV_to_RGB_2asm31
|
|
||||||
mov edi, 0xFF00
|
|
||||||
|
|
||||||
YUV_to_RGB_2asm31:
|
|
||||||
and edi,0xFF00
|
|
||||||
|
|
||||||
or eax,edi
|
|
||||||
or eax,ebx
|
|
||||||
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
|
||||||
;extern int freerdp_image_yuv_to_xrgb_asm(unsigned char *pDstData,unsigned char **pSrcData,int nWidth,int nHeight);
|
|
||||||
global freerdp_image_yuv_to_xrgb_asm
|
|
||||||
freerdp_image_yuv_to_xrgb_asm:
|
|
||||||
push rbp
|
|
||||||
mov rbp, rsp
|
|
||||||
;cWidth: cx
|
|
||||||
sub rsp,56 ;pDstData,pSrcData[3],nWidth,nHeight,cHeight
|
|
||||||
push rbx
|
|
||||||
|
|
||||||
|
|
||||||
mov [rbp-8],rdi
|
|
||||||
|
|
||||||
mov rax,[rsi]
|
|
||||||
mov [rbp-16],rax
|
|
||||||
mov rax,[rsi+8]
|
|
||||||
mov [rbp-24],rax
|
|
||||||
mov rax,[rsi+16]
|
|
||||||
mov [rbp-32],rax
|
|
||||||
|
|
||||||
mov [rbp-40],rdx
|
|
||||||
|
|
||||||
|
|
||||||
shr rcx,1 ;/2
|
|
||||||
mov [rbp-48],rcx
|
|
||||||
|
|
||||||
|
|
||||||
mov rax,[rbp-48]
|
|
||||||
mov [rbp-56],rax
|
|
||||||
|
|
||||||
freerdp_image_yuv_to_xrgb_asm_loopH:
|
|
||||||
mov rcx,[rbp-40]
|
|
||||||
shr rcx,1
|
|
||||||
|
|
||||||
|
|
||||||
freerdp_image_yuv_to_xrgb_asm_loopW:
|
|
||||||
mov rax,[rbp-16]
|
|
||||||
mov edi,[rax]
|
|
||||||
|
|
||||||
mov rax,[rbp-24]
|
|
||||||
mov esi,[rax]
|
|
||||||
inc rax
|
|
||||||
mov [rbp-24],rax
|
|
||||||
|
|
||||||
mov rax,[rbp-32]
|
|
||||||
mov edx,[rax]
|
|
||||||
inc rax
|
|
||||||
mov [rbp-32],rax
|
|
||||||
|
|
||||||
call YUV_to_RGB_asm
|
|
||||||
|
|
||||||
mov rbx,[rbp-8]
|
|
||||||
mov [rbx],eax
|
|
||||||
|
|
||||||
|
|
||||||
mov rax,[rbp-16]
|
|
||||||
mov rbx,[rbp-40]
|
|
||||||
mov edi,[rax+rbx]
|
|
||||||
inc rax
|
|
||||||
mov [rbp-16],rax
|
|
||||||
|
|
||||||
call YUV_to_RGB_2asm
|
|
||||||
|
|
||||||
mov rbx,[rbp-8]
|
|
||||||
mov rdx,[rbp-40]
|
|
||||||
mov [rbx+rdx],eax
|
|
||||||
add rbx,4
|
|
||||||
mov [rbp-8],rbx
|
|
||||||
|
|
||||||
|
|
||||||
mov rax,[rbp-16]
|
|
||||||
mov edi,[rax]
|
|
||||||
|
|
||||||
call YUV_to_RGB_2asm
|
|
||||||
|
|
||||||
mov rbx,[rbp-8]
|
|
||||||
mov [rbx],eax
|
|
||||||
|
|
||||||
|
|
||||||
mov rax,[rbp-16]
|
|
||||||
mov rbx,[rbp-40]
|
|
||||||
mov edi,[rax+rbx]
|
|
||||||
inc rax
|
|
||||||
mov [rbp-16],rax
|
|
||||||
|
|
||||||
call YUV_to_RGB_2asm
|
|
||||||
|
|
||||||
mov rbx,[rbp-8]
|
|
||||||
mov rdx,[rbp-40]
|
|
||||||
mov [rbx+rdx],eax
|
|
||||||
add rbx,4
|
|
||||||
mov [rbp-8],rbx
|
|
||||||
|
|
||||||
dec cx
|
|
||||||
jne freerdp_image_yuv_to_xrgb_asm_loopW
|
|
||||||
|
|
||||||
|
|
||||||
mov rax,[rbp-8]
|
|
||||||
add rax,[rbp-40]
|
|
||||||
mov [rbp-8],rax
|
|
||||||
|
|
||||||
mov rax,[rbp-16]
|
|
||||||
add rax,[rbp-40]
|
|
||||||
mov [rbp-16],rax
|
|
||||||
|
|
||||||
dec qword [rbp-56]
|
|
||||||
jne freerdp_image_yuv_to_xrgb_asm_loopH
|
|
||||||
|
|
||||||
;END
|
|
||||||
mov rax,0
|
|
||||||
END:
|
|
||||||
pop rbx
|
|
||||||
mov rsp,rbp
|
|
||||||
pop rbp
|
|
||||||
ret
|
|
@ -30,9 +30,14 @@
|
|||||||
|
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
|
|
||||||
|
#ifdef WITH_OPENH264_SSSE3
|
||||||
|
extern int check_ssse3();
|
||||||
|
extern int freerdp_image_yuv420p_to_xrgb(BYTE *pDstData,BYTE **pSrcData,int nWidth,int nHeight,int iStride0,int iStride1);
|
||||||
|
#else
|
||||||
#ifdef WITH_OPENH264_ASM
|
#ifdef WITH_OPENH264_ASM
|
||||||
extern int freerdp_image_yuv_to_xrgb_asm(BYTE *pDstData,BYTE **pSrcData,int nWidth,int nHeight,int iStride0,int iStride1);
|
extern int freerdp_image_yuv_to_xrgb_asm(BYTE *pDstData,BYTE **pSrcData,int nWidth,int nHeight,int iStride0,int iStride1);
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#define USE_GRAY_SCALE 0
|
#define USE_GRAY_SCALE 0
|
||||||
#define USE_UPCONVERT 0
|
#define USE_UPCONVERT 0
|
||||||
@ -381,7 +386,7 @@ static int openh264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSiz
|
|||||||
state = (*h264->pDecoder)->DecodeFrame2(h264->pDecoder, NULL, 0, pYUVData, &sBufferInfo);
|
state = (*h264->pDecoder)->DecodeFrame2(h264->pDecoder, NULL, 0, pYUVData, &sBufferInfo);
|
||||||
|
|
||||||
gettimeofday(&T2,NULL);
|
gettimeofday(&T2,NULL);
|
||||||
printf("\tdecoding took: %u sec %u usec\n",(unsigned int)(T2.tv_sec-T1.tv_sec),(unsigned int)(T2.tv_usec-T1.tv_usec));
|
//printf("\tdecoding took: %u sec %u usec\n",(unsigned int)(T2.tv_sec-T1.tv_sec),(unsigned int)(T2.tv_usec-T1.tv_usec));
|
||||||
|
|
||||||
pSystemBuffer = &sBufferInfo.UsrData.sSystemBuffer;
|
pSystemBuffer = &sBufferInfo.UsrData.sSystemBuffer;
|
||||||
|
|
||||||
@ -416,14 +421,15 @@ static int openh264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSiz
|
|||||||
if (h264_prepare_rgb_buffer(h264, pSystemBuffer->iWidth, pSystemBuffer->iHeight) < 0)
|
if (h264_prepare_rgb_buffer(h264, pSystemBuffer->iWidth, pSystemBuffer->iHeight) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
#ifdef WITH_OPENH264_SSSE3
|
||||||
|
freerdp_image_yuv420p_to_xrgb(h264->data,pYUVData,h264->width,h264->height,pSystemBuffer->iStride[0],pSystemBuffer->iStride[1]);
|
||||||
|
#else
|
||||||
#ifdef WITH_OPENH264_ASM
|
#ifdef WITH_OPENH264_ASM
|
||||||
gettimeofday(&T1,NULL);
|
|
||||||
freerdp_image_yuv_to_xrgb_asm(h264->data,pYUVData,h264->width,h264->height,pSystemBuffer->iStride[0],pSystemBuffer->iStride[1]);
|
freerdp_image_yuv_to_xrgb_asm(h264->data,pYUVData,h264->width,h264->height,pSystemBuffer->iStride[0],pSystemBuffer->iStride[1]);
|
||||||
gettimeofday(&T2,NULL);
|
|
||||||
printf("\tconverting took: %u sec %u usec\n",(unsigned int)(T2.tv_sec-T1.tv_sec),(unsigned int)(T2.tv_usec-T1.tv_usec));
|
|
||||||
#else
|
#else
|
||||||
freerdp_image_copy_yuv420p_to_xrgb(h264->data, h264->scanline, 0, 0,
|
freerdp_image_copy_yuv420p_to_xrgb(h264->data, h264->scanline, 0, 0,
|
||||||
h264->width, h264->height, pYUVData, pSystemBuffer->iStride, 0, 0);
|
h264->width, h264->height, pYUVData, pSystemBuffer->iStride, 0, 0);
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
@ -448,6 +454,13 @@ static BOOL openh264_init(H264_CONTEXT* h264)
|
|||||||
|
|
||||||
SDecodingParam sDecParam;
|
SDecodingParam sDecParam;
|
||||||
long status;
|
long status;
|
||||||
|
|
||||||
|
#ifdef WITH_OPENH264_SSSE3
|
||||||
|
if(check_ssse3()){
|
||||||
|
printf("SSSE3 seems to be not supported on this system, try without WITH_OPENH264_ASM ...");
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
WelsCreateDecoder(&h264->pDecoder);
|
WelsCreateDecoder(&h264->pDecoder);
|
||||||
|
|
||||||
|
447
libfreerdp/codec/h264_ssse3_x64.asm
Normal file
447
libfreerdp/codec/h264_ssse3_x64.asm
Normal file
@ -0,0 +1,447 @@
|
|||||||
|
section .text
|
||||||
|
global check_ssse3
|
||||||
|
|
||||||
|
check_ssse3:
|
||||||
|
push rbx
|
||||||
|
|
||||||
|
pushf
|
||||||
|
pop rax
|
||||||
|
or rax,1<<21
|
||||||
|
push rax
|
||||||
|
popf
|
||||||
|
pushf
|
||||||
|
pop rax
|
||||||
|
test rax,1<<21
|
||||||
|
jz check_ssse3_end
|
||||||
|
|
||||||
|
and rax,~(1<<21)
|
||||||
|
push rax
|
||||||
|
popf
|
||||||
|
|
||||||
|
|
||||||
|
mov eax,1
|
||||||
|
mov ebx,0
|
||||||
|
cpuid
|
||||||
|
test edx,1<<25 ;sse
|
||||||
|
jz check_ssse3_end
|
||||||
|
test edx,1<<26 ;sse2
|
||||||
|
jz check_ssse3_end
|
||||||
|
test ecx,1<<0 ;sse3
|
||||||
|
jz check_ssse3_end
|
||||||
|
test ecx,1<<9 ;ssse3
|
||||||
|
jz check_ssse3_end
|
||||||
|
|
||||||
|
|
||||||
|
pop rbx
|
||||||
|
mov eax,0
|
||||||
|
ret
|
||||||
|
|
||||||
|
|
||||||
|
check_ssse3_end:
|
||||||
|
pop rbx
|
||||||
|
mov eax,1
|
||||||
|
ret
|
||||||
|
|
||||||
|
|
||||||
|
;extern int freerdp_image_yuv420p_to_xrgb(unsigned char *pDstData,unsigned char **pSrcData,int nWidth,int nHeight,int istride0,int istride1)
|
||||||
|
global freerdp_image_yuv420p_to_xrgb
|
||||||
|
freerdp_image_yuv420p_to_xrgb:
|
||||||
|
push rbx
|
||||||
|
push rbp
|
||||||
|
|
||||||
|
;check wether stack is aligned to 16 byte boundary
|
||||||
|
mov rax,rsp
|
||||||
|
and rax,1111B
|
||||||
|
mov r15,22
|
||||||
|
sub r15b,al
|
||||||
|
sub rsp,r15
|
||||||
|
|
||||||
|
mov rbp,rsp
|
||||||
|
|
||||||
|
xor r10,r10
|
||||||
|
xor r11,r11
|
||||||
|
xor r12,r12
|
||||||
|
xor r13,r13
|
||||||
|
xor r14,r14
|
||||||
|
|
||||||
|
sub rsp,316 ;pDstData 8,Y 8,U 8,V 8,nWidth 2,nHeight 2,iStride0 2,iStride1 2,last_column 1,last_line 1,G 16,B 16,R 16,add:128 16
|
||||||
|
;sub:128 16,mul:48 16,mul:475 16,mul:403 16,mul:120 16,VaddY 2,VaddUV 2,res 12,cmp:255 16,cmp:0 16,shuflleR 16,andG 16,shuffleB 16,shuffleY 16,shuffleUV 16,scanline 2
|
||||||
|
|
||||||
|
;last_line: if the last (U,V doubled) line should be skipped, set to 1B
|
||||||
|
;last_column: if the last 4 columns should be skipped, set to 1B
|
||||||
|
|
||||||
|
mov [rbp-8],rdi
|
||||||
|
|
||||||
|
mov rax,[rsi]
|
||||||
|
mov [rbp-16],rax
|
||||||
|
mov rax,[rsi+8]
|
||||||
|
mov [rbp-24],rax
|
||||||
|
mov rax,[rsi+16]
|
||||||
|
mov [rbp-32],rax
|
||||||
|
|
||||||
|
mov [rbp-34],dx
|
||||||
|
mov r13w,cx
|
||||||
|
|
||||||
|
and r8,0FFFFH
|
||||||
|
mov [rbp-38],r8w
|
||||||
|
and r9,0FFFFH
|
||||||
|
mov [rbp-40],r9w
|
||||||
|
|
||||||
|
|
||||||
|
shl r8w,1
|
||||||
|
sub r8w,dx
|
||||||
|
mov r11w,r8w
|
||||||
|
|
||||||
|
mov r10w,dx
|
||||||
|
shr dx,1
|
||||||
|
sub r9w,dx
|
||||||
|
mov r12w,r9w
|
||||||
|
|
||||||
|
|
||||||
|
mov r8w,[rbp-34]
|
||||||
|
shr r8w,2
|
||||||
|
shl r10w,2
|
||||||
|
|
||||||
|
mov r9w,[rbp-38]
|
||||||
|
|
||||||
|
;and al,11B
|
||||||
|
;jz no_column_rest
|
||||||
|
|
||||||
|
;inc word [rbp-34]
|
||||||
|
|
||||||
|
;no_column_rest:
|
||||||
|
;mov [rbp-41],al
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
mov r14b,r13b
|
||||||
|
and r14b,1B
|
||||||
|
;jz no_line_rest
|
||||||
|
|
||||||
|
inc r13w
|
||||||
|
|
||||||
|
;no_line_rest:
|
||||||
|
shr r13w,1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
;init masks
|
||||||
|
mov eax,00000080H
|
||||||
|
mov [rbp-106],eax
|
||||||
|
mov [rbp-102],eax
|
||||||
|
mov [rbp-98],eax
|
||||||
|
mov [rbp-94],eax
|
||||||
|
|
||||||
|
mov eax,00800080H
|
||||||
|
mov [rbp-122],eax
|
||||||
|
mov [rbp-118],eax
|
||||||
|
mov [rbp-114],eax
|
||||||
|
mov [rbp-110],eax
|
||||||
|
|
||||||
|
mov eax,00300030H
|
||||||
|
mov [rbp-138],eax
|
||||||
|
mov [rbp-134],eax
|
||||||
|
mov [rbp-130],eax
|
||||||
|
mov [rbp-126],eax
|
||||||
|
|
||||||
|
mov eax,01DB01DBH
|
||||||
|
mov [rbp-154],eax
|
||||||
|
mov [rbp-150],eax
|
||||||
|
mov [rbp-146],eax
|
||||||
|
mov [rbp-142],eax
|
||||||
|
|
||||||
|
mov eax,01930193H
|
||||||
|
mov [rbp-170],eax
|
||||||
|
mov [rbp-166],eax
|
||||||
|
mov [rbp-162],eax
|
||||||
|
mov [rbp-158],eax
|
||||||
|
|
||||||
|
mov eax,00780078H
|
||||||
|
mov [rbp-186],eax
|
||||||
|
mov [rbp-182],eax
|
||||||
|
mov [rbp-178],eax
|
||||||
|
mov [rbp-174],eax
|
||||||
|
|
||||||
|
mov eax,000FF0000H
|
||||||
|
mov [rbp-218],eax
|
||||||
|
mov [rbp-214],eax
|
||||||
|
mov [rbp-210],eax
|
||||||
|
mov [rbp-206],eax
|
||||||
|
|
||||||
|
mov eax,00000000H
|
||||||
|
mov [rbp-234],eax
|
||||||
|
mov [rbp-230],eax
|
||||||
|
mov [rbp-226],eax
|
||||||
|
mov [rbp-222],eax
|
||||||
|
|
||||||
|
;shuffle masks
|
||||||
|
;00 xx 00 00 00 xx 00 00 00 xx 00 00 00 xx 00 00
|
||||||
|
;00 rr gg bb 00 rr gg bb 00 rr gg bb 00 rr gg bb
|
||||||
|
mov eax,00FF0000H
|
||||||
|
mov [rbp-250],eax
|
||||||
|
mov [rbp-246],eax
|
||||||
|
mov [rbp-242],eax
|
||||||
|
mov [rbp-238],eax
|
||||||
|
|
||||||
|
mov eax,80800280H
|
||||||
|
mov [rbp-266],eax
|
||||||
|
mov eax,80800680H
|
||||||
|
mov [rbp-262],eax
|
||||||
|
mov eax,80800A80H
|
||||||
|
mov [rbp-258],eax
|
||||||
|
mov eax,80800E80H
|
||||||
|
mov [rbp-254],eax
|
||||||
|
|
||||||
|
mov eax,80808002H
|
||||||
|
mov [rbp-282],eax
|
||||||
|
mov eax,80808006H
|
||||||
|
mov [rbp-278],eax
|
||||||
|
mov eax,8080800AH
|
||||||
|
mov [rbp-274],eax
|
||||||
|
mov eax,8080800EH
|
||||||
|
mov [rbp-270],eax
|
||||||
|
|
||||||
|
;dd cc bb aa
|
||||||
|
;00 00 dd 00 00 00 cc 00 00 00 bb 00 00 00 aa 00
|
||||||
|
mov eax,80800080H
|
||||||
|
mov [rbp-298],eax
|
||||||
|
mov eax,80800180H
|
||||||
|
mov [rbp-294],eax
|
||||||
|
mov eax,80800280H
|
||||||
|
mov [rbp-290],eax
|
||||||
|
mov eax,80800380H
|
||||||
|
mov [rbp-286],eax
|
||||||
|
|
||||||
|
;dd cc bb aa
|
||||||
|
;00 dd 00 dd 00 cc 00 cc 00 bb 00 bb 00 aa 00 aa
|
||||||
|
mov eax,80008000H
|
||||||
|
mov [rbp-314],eax
|
||||||
|
mov eax,80018001H
|
||||||
|
mov [rbp-310],eax
|
||||||
|
mov eax,80028002H
|
||||||
|
mov [rbp-306],eax
|
||||||
|
mov eax,80038003H
|
||||||
|
mov [rbp-302],eax
|
||||||
|
|
||||||
|
|
||||||
|
mov rsi,[rbp-16]
|
||||||
|
mov rax,[rbp-24]
|
||||||
|
mov rbx,[rbp-32]
|
||||||
|
|
||||||
|
|
||||||
|
freerdp_image_yuv420p_to_xrgb_hloop:
|
||||||
|
dec r13w
|
||||||
|
js freerdp_image_yuv420p_to_xrgb_hloop_end
|
||||||
|
jnz not_last_line
|
||||||
|
|
||||||
|
shl r14b,1
|
||||||
|
not_last_line:
|
||||||
|
|
||||||
|
xor cx,cx
|
||||||
|
freerdp_image_yuv420p_to_xrgb_wloop:
|
||||||
|
;main loop
|
||||||
|
; C = Y;
|
||||||
|
; D = U - 128;
|
||||||
|
; E = V - 128;
|
||||||
|
;
|
||||||
|
; R = clip(( 256 * C + 403 * E + 128) >> 8);
|
||||||
|
; G = clip(( 256 * C - 48 * D - 120 * E + 128) >> 8);
|
||||||
|
; B = clip(( 256 * C + 475 * D + 128) >> 8);
|
||||||
|
|
||||||
|
test cx,1B
|
||||||
|
jnz load_yuv_data
|
||||||
|
|
||||||
|
|
||||||
|
;prepare U data
|
||||||
|
movd xmm0,[rax]
|
||||||
|
movdqa xmm5,[rbp-314]
|
||||||
|
pshufb xmm0,xmm5
|
||||||
|
|
||||||
|
add rax,4
|
||||||
|
|
||||||
|
movdqa xmm3,[rbp-122]
|
||||||
|
psubsw xmm0,xmm3
|
||||||
|
|
||||||
|
movdqa xmm2,xmm0
|
||||||
|
|
||||||
|
movdqa xmm4,xmm0
|
||||||
|
movdqa xmm7,[rbp-138]
|
||||||
|
pmullw xmm0,xmm7
|
||||||
|
pmulhw xmm4,xmm7
|
||||||
|
|
||||||
|
movdqa xmm7,xmm0
|
||||||
|
punpcklwd xmm0,xmm4 ;what an awesome instruction!
|
||||||
|
punpckhwd xmm7,xmm4
|
||||||
|
movdqa xmm4,xmm7
|
||||||
|
|
||||||
|
movdqa xmm6,[rbp-106]
|
||||||
|
psubd xmm0,xmm6
|
||||||
|
psubd xmm4,xmm6
|
||||||
|
|
||||||
|
|
||||||
|
movdqa xmm1,xmm2
|
||||||
|
movdqa xmm7,[rbp-154]
|
||||||
|
pmullw xmm1,xmm7
|
||||||
|
pmulhw xmm2,xmm7
|
||||||
|
|
||||||
|
movdqa xmm7,xmm1
|
||||||
|
punpcklwd xmm1,xmm2
|
||||||
|
punpckhwd xmm7,xmm2
|
||||||
|
|
||||||
|
paddd xmm1,xmm6
|
||||||
|
paddd xmm7,xmm6
|
||||||
|
|
||||||
|
movdqa [rbp-74],xmm7
|
||||||
|
|
||||||
|
|
||||||
|
;prepare V data
|
||||||
|
movd xmm2,[rbx]
|
||||||
|
pshufb xmm2,xmm5
|
||||||
|
|
||||||
|
add rbx,4
|
||||||
|
|
||||||
|
psubsw xmm2,xmm3
|
||||||
|
|
||||||
|
movdqa xmm5,xmm2
|
||||||
|
|
||||||
|
movdqa xmm3,xmm2
|
||||||
|
movdqa xmm7,[rbp-170]
|
||||||
|
pmullw xmm2,xmm7
|
||||||
|
pmulhw xmm3,xmm7
|
||||||
|
|
||||||
|
movdqa xmm7,xmm2
|
||||||
|
punpcklwd xmm2,xmm3
|
||||||
|
punpckhwd xmm7,xmm3
|
||||||
|
|
||||||
|
paddd xmm2,xmm6
|
||||||
|
paddd xmm7,xmm6
|
||||||
|
|
||||||
|
movdqa [rbp-90],xmm7
|
||||||
|
|
||||||
|
|
||||||
|
movdqa xmm3,xmm5
|
||||||
|
movdqa xmm7,[rbp-186]
|
||||||
|
pmullw xmm3,xmm7
|
||||||
|
pmulhw xmm5,xmm7
|
||||||
|
|
||||||
|
movdqa xmm7,xmm3
|
||||||
|
punpcklwd xmm3,xmm5
|
||||||
|
punpckhwd xmm7,xmm5
|
||||||
|
|
||||||
|
paddd xmm0,xmm3
|
||||||
|
paddd xmm4,xmm7
|
||||||
|
|
||||||
|
movdqa [rbp-58],xmm4
|
||||||
|
|
||||||
|
jmp valid_yuv_data
|
||||||
|
|
||||||
|
load_yuv_data:
|
||||||
|
movdqa xmm1,[rbp-74]
|
||||||
|
movdqa xmm2,[rbp-90]
|
||||||
|
movdqa xmm0,[rbp-58]
|
||||||
|
|
||||||
|
valid_yuv_data:
|
||||||
|
|
||||||
|
|
||||||
|
;Y data processing
|
||||||
|
movd xmm4,[rsi]
|
||||||
|
pshufb xmm4,[rbp-298]
|
||||||
|
|
||||||
|
movdqa xmm5,xmm4
|
||||||
|
movdqa xmm6,xmm4
|
||||||
|
|
||||||
|
paddd xmm4,xmm2
|
||||||
|
psubd xmm5,xmm0
|
||||||
|
paddd xmm6,xmm1
|
||||||
|
|
||||||
|
pslld xmm4,8
|
||||||
|
pslld xmm5,8
|
||||||
|
pslld xmm6,8
|
||||||
|
|
||||||
|
movdqa xmm7,[rbp-234]
|
||||||
|
pmaxsw xmm4,xmm7 ;what an awesome instruction!
|
||||||
|
pmaxsw xmm5,xmm7
|
||||||
|
pmaxsw xmm6,xmm7
|
||||||
|
|
||||||
|
movdqa xmm7,[rbp-218]
|
||||||
|
pminsw xmm4,xmm7
|
||||||
|
pminsw xmm5,xmm7
|
||||||
|
pminsw xmm6,xmm7
|
||||||
|
|
||||||
|
pand xmm4,[rbp-250]
|
||||||
|
pshufb xmm5,[rbp-266]
|
||||||
|
pshufb xmm6,[rbp-282]
|
||||||
|
|
||||||
|
por xmm4,xmm5
|
||||||
|
por xmm4,xmm6
|
||||||
|
|
||||||
|
movdqa [rdi],xmm4
|
||||||
|
|
||||||
|
|
||||||
|
;Y data processing in secound line
|
||||||
|
test r14b,2
|
||||||
|
jnz skip_last_line1
|
||||||
|
|
||||||
|
movd xmm4,[rsi+r9]
|
||||||
|
pshufb xmm4,[rbp-298]
|
||||||
|
|
||||||
|
|
||||||
|
movdqa xmm5,xmm4
|
||||||
|
movdqa xmm6,xmm4
|
||||||
|
|
||||||
|
paddd xmm4,xmm2
|
||||||
|
psubd xmm5,xmm0
|
||||||
|
paddd xmm6,xmm1
|
||||||
|
|
||||||
|
pslld xmm4,8
|
||||||
|
pslld xmm5,8
|
||||||
|
pslld xmm6,8
|
||||||
|
|
||||||
|
movdqa xmm7,[rbp-234]
|
||||||
|
pmaxsw xmm4,xmm7 ;what an awesome instruction!
|
||||||
|
pmaxsw xmm5,xmm7
|
||||||
|
pmaxsw xmm6,xmm7
|
||||||
|
|
||||||
|
movdqa xmm7,[rbp-218]
|
||||||
|
pminsw xmm4,xmm7
|
||||||
|
pminsw xmm5,xmm7
|
||||||
|
pminsw xmm6,xmm7
|
||||||
|
|
||||||
|
pand xmm4,[rbp-250]
|
||||||
|
pshufb xmm5,[rbp-266]
|
||||||
|
pshufb xmm6,[rbp-282]
|
||||||
|
|
||||||
|
por xmm4,xmm5
|
||||||
|
por xmm4,xmm6
|
||||||
|
|
||||||
|
movdqa [rdi+r10],xmm4
|
||||||
|
|
||||||
|
skip_last_line1:
|
||||||
|
add rdi,16
|
||||||
|
add rsi,4
|
||||||
|
|
||||||
|
inc cx
|
||||||
|
cmp cx,r8w
|
||||||
|
jne freerdp_image_yuv420p_to_xrgb_wloop
|
||||||
|
|
||||||
|
freerdp_image_yuv420p_to_xrgb_wloop_end:
|
||||||
|
add rdi,r10
|
||||||
|
|
||||||
|
add rsi,r11
|
||||||
|
|
||||||
|
add rax,r12
|
||||||
|
add rbx,r12
|
||||||
|
;mov eax,r12d
|
||||||
|
;jmp freerdp_image_yuv420p_to_xrgb_end
|
||||||
|
|
||||||
|
jmp freerdp_image_yuv420p_to_xrgb_hloop
|
||||||
|
|
||||||
|
freerdp_image_yuv420p_to_xrgb_hloop_end:
|
||||||
|
|
||||||
|
mov eax,0
|
||||||
|
freerdp_image_yuv420p_to_xrgb_end:
|
||||||
|
mov rsp,rbp
|
||||||
|
add rsp,r15
|
||||||
|
pop rbp
|
||||||
|
pop rbx
|
||||||
|
ret
|
@ -2,10 +2,6 @@
|
|||||||
;G=(256*Y-48*(U-128)-120*(V-128)+128)/256 =(256*Y-48*U-120*V+21632)/256
|
;G=(256*Y-48*(U-128)-120*(V-128)+128)/256 =(256*Y-48*U-120*V+21632)/256
|
||||||
;B=(256*Y+475*(U-128)+128)/256 =(256*Y+475*U-60672)/256
|
;B=(256*Y+475*(U-128)+128)/256 =(256*Y+475*U-60672)/256
|
||||||
|
|
||||||
section .data
|
|
||||||
debug: db "DEBUG",10
|
|
||||||
dblen: equ $-debug
|
|
||||||
|
|
||||||
section .text
|
section .text
|
||||||
;global YUV_to_RGB_asm
|
;global YUV_to_RGB_asm
|
||||||
YUV_to_RGB_asm:
|
YUV_to_RGB_asm:
|
20
libfreerdp/codec/test/Makefile.TestOpenH264ASM
Normal file
20
libfreerdp/codec/test/Makefile.TestOpenH264ASM
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
TestOpenH264ASM: h264_ssse3.asm.o TestOpenH264ASM.c.o h264.c.o
|
||||||
|
gcc -o TestOpenH264ASM h264_ssse3.asm.o TestOpenH264ASM.c.o h264.c.o
|
||||||
|
|
||||||
|
h264_ssse3.asm.o: ../h264_ssse3_x64.asm
|
||||||
|
nasm -f elf64 -o h264_ssse3.asm.o ../h264_ssse3_x64.asm
|
||||||
|
|
||||||
|
h264.asm.o: ../h264.asm
|
||||||
|
nasm -f elf64 -o h264.asm.o ../h264.asm
|
||||||
|
|
||||||
|
TestOpenH264ASM.c.o: TestOpenH264ASM.c
|
||||||
|
gcc -c -o TestOpenH264ASM.c.o TestOpenH264ASM.c
|
||||||
|
|
||||||
|
h264.c.o: ../h264.c
|
||||||
|
gcc -c -O3 -o h264.c.o ../h264.c
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f TestOpenH264ASM TestOpenH264ASM.c.o h264_ssse3.asm.o h264.c.o h264.asm.o
|
||||||
|
|
||||||
|
old: h264.asm.o TestOpenH264ASM.c.o h264.c.o
|
||||||
|
gcc -o TestOpenH264ASM h264.asm.o TestOpenH264ASM.c.o h264.c.o
|
@ -4,49 +4,70 @@
|
|||||||
|
|
||||||
#include "TestOpenH264ASM.h"
|
#include "TestOpenH264ASM.h"
|
||||||
|
|
||||||
|
#define WIDTH 1920
|
||||||
|
#define HEIGHT 1080
|
||||||
|
|
||||||
int main(void){
|
int main(void){
|
||||||
int ret,i;
|
int i,j,k;
|
||||||
|
int ret;
|
||||||
unsigned char *pDstData_c,*pDstData_asm,*pSrcData[3];
|
unsigned char *pDstData_c,*pDstData_asm,*pSrcData[3];
|
||||||
int nSrcStep[2];
|
int nSrcStep[2];
|
||||||
|
|
||||||
|
if(check_ssse3()){
|
||||||
|
fprintf(stderr,"ssse3 not supported!\n");
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
struct timeval t1,t2,t3;
|
struct timeval t1,t2,t3;
|
||||||
|
|
||||||
pSrcData[0]=malloc(1920*1080*sizeof(char));
|
pSrcData[0]=malloc(1984*HEIGHT*sizeof(char));
|
||||||
pSrcData[1]=malloc(1920*1080/4*sizeof(char));
|
pSrcData[1]=malloc(1984*HEIGHT/4*sizeof(char));
|
||||||
pSrcData[2]=malloc(1920*1080/4*sizeof(char));
|
pSrcData[2]=malloc(1984*HEIGHT/4*sizeof(char));
|
||||||
pDstData_asm=malloc(1920*1080*4*sizeof(char));
|
pDstData_asm=malloc(WIDTH*HEIGHT*4*sizeof(char));
|
||||||
pDstData_c=malloc(1920*1080*4*sizeof(char));
|
pDstData_c=malloc(WIDTH*HEIGHT*4*sizeof(char));
|
||||||
|
|
||||||
for(i=0;i<1920*1080;i++){
|
for(i=0;i<WIDTH*HEIGHT;i++){
|
||||||
pSrcData[0][i]=i%255;
|
pSrcData[0][i]=i%255;
|
||||||
pSrcData[1][i/4]=pSrcData[0][i];
|
pSrcData[1][i/4]=pSrcData[0][i];
|
||||||
pSrcData[2][i/4]=255-pSrcData[0][i];
|
pSrcData[2][i/4]=255-pSrcData[0][i];
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("%X\n",pSrcData[0][0]);
|
nSrcStep[0]=1984;
|
||||||
|
nSrcStep[1]=992;
|
||||||
nSrcStep[0]=1088;
|
|
||||||
nSrcStep[1]=544;
|
|
||||||
|
|
||||||
gettimeofday(&t1,NULL);
|
gettimeofday(&t1,NULL);
|
||||||
ret=freerdp_image_yuv_to_xrgb_asm(pDstData_asm,pSrcData,1024,768,1088,544);
|
ret=freerdp_image_yuv420p_to_xrgb(pDstData_asm,pSrcData,WIDTH,HEIGHT,nSrcStep[0],nSrcStep[1]);
|
||||||
gettimeofday(&t2,NULL);
|
gettimeofday(&t2,NULL);
|
||||||
freerdp_image_copy_yuv420p_to_xrgb(pDstData_c,1024*4,0,0,1024,768,pSrcData,nSrcStep,0,0);
|
freerdp_image_copy_yuv420p_to_xrgb(pDstData_c,WIDTH*4,0,0,WIDTH,HEIGHT,pSrcData,nSrcStep,0,0);
|
||||||
gettimeofday(&t3,NULL);
|
gettimeofday(&t3,NULL);
|
||||||
|
|
||||||
printf("in asm (%d) it took %u sec %u usec,\nin c %u sec %u usec.\n",ret,(int)(t2.tv_sec-t1.tv_sec),(int)(t2.tv_usec-t1.tv_usec),
|
printf("in asm (0x%08X) it took %u sec %u usec,\nin c %u sec %u usec.\n",ret,(int)(t2.tv_sec-t1.tv_sec),(int)(t2.tv_usec-t1.tv_usec),
|
||||||
(int)(t3.tv_sec-t2.tv_sec),(int)(t3.tv_usec-t2.tv_usec));
|
(int)(t3.tv_sec-t2.tv_sec),(int)(t3.tv_usec-t2.tv_usec));
|
||||||
|
|
||||||
printf("in asm the result was %X %X %X\n in c %X %X %X.\n",(unsigned char *)pDstData_asm[92],(unsigned char *)pDstData_asm[93],(unsigned char *)pDstData_asm[94],
|
printf("in asm the result was %X %X %X\n in c %X %X %X.\n",pDstData_asm[0],pDstData_asm[1],pDstData_asm[2],
|
||||||
(unsigned char *)pDstData_c[92],(unsigned char *)pDstData_c[93],(unsigned char *)pDstData_c[94]);
|
pDstData_c[0],pDstData_c[1],pDstData_c[2]);
|
||||||
|
|
||||||
for(i=0;i<(1920*1080*4);i++){
|
/*k=0;
|
||||||
|
for(i=0;i<HEIGHT+1;i++){
|
||||||
|
for(j=0;j<WIDTH;j++){
|
||||||
|
printf("%08X:%08X ",((unsigned int*)pDstData_asm)[k],((unsigned int*)pDstData_c)[k]);
|
||||||
|
k++;
|
||||||
|
}
|
||||||
|
puts("\n");
|
||||||
|
}*/
|
||||||
|
|
||||||
|
k=1;
|
||||||
|
for(i=0;i<(WIDTH*HEIGHT*4);i++){
|
||||||
if(pDstData_c[i]!=pDstData_asm[i]){
|
if(pDstData_c[i]!=pDstData_asm[i]){
|
||||||
|
k=0;
|
||||||
printf("MISSMATCH at %d: %2X != %2X\n",i,(unsigned char)pDstData_asm[i],(unsigned char)pDstData_c[i]);
|
printf("MISSMATCH at %d: %2X != %2X\n",i,(unsigned char)pDstData_asm[i],(unsigned char)pDstData_c[i]);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(k)
|
||||||
|
printf("everything OK\n");
|
||||||
|
|
||||||
free(pSrcData[0]);
|
free(pSrcData[0]);
|
||||||
free(pSrcData[1]);
|
free(pSrcData[1]);
|
||||||
free(pSrcData[2]);
|
free(pSrcData[2]);
|
||||||
|
@ -4,4 +4,7 @@ extern int YUV_to_RGB(unsigned char Y,unsigned char U,unsigned char V);
|
|||||||
|
|
||||||
extern int freerdp_image_yuv_to_xrgb_asm(unsigned char *pDstData,unsigned char **pSrcData,int nWidth,int nHeight,int istride0,int istride1);
|
extern int freerdp_image_yuv_to_xrgb_asm(unsigned char *pDstData,unsigned char **pSrcData,int nWidth,int nHeight,int istride0,int istride1);
|
||||||
int freerdp_image_copy_yuv420p_to_xrgb(unsigned char* pDstData, int nDstStep, int nXDst, int nYDst,
|
int freerdp_image_copy_yuv420p_to_xrgb(unsigned char* pDstData, int nDstStep, int nXDst, int nYDst,
|
||||||
int nWidth, int nHeight, unsigned char* pSrcData[3], int nSrcStep[2], int nXSrc, int nYSrc);
|
int nWidth, int nHeight, unsigned char* pSrcData[3], int nSrcStep[2], int nXSrc, int nYSrc);
|
||||||
|
|
||||||
|
extern int check_ssse3();
|
||||||
|
extern int freerdp_image_yuv420p_to_xrgb(unsigned char *pDstData,unsigned char **pSrcData,int nWidth,int nHeight,int istride0,int istride1);
|
Loading…
Reference in New Issue
Block a user