diff --git a/CMakeLists.txt b/CMakeLists.txt index a1113150f..3ae1759ec 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -217,10 +217,6 @@ if(CMAKE_COMPILER_IS_GNUCC) if(CMAKE_BUILD_TYPE STREQUAL "Release") set(CMAKE_C_FLAGS_RELEASE "-DNDEBUG") set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG") - if(NOT OPENBSD) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2") - endif() CHECK_C_COMPILER_FLAG (-Wno-builtin-macro-redefined Wno-builtin-macro-redefined) if(Wno-builtin-macro-redefined) diff --git a/channels/rdpgfx/client/rdpgfx_codec.c b/channels/rdpgfx/client/rdpgfx_codec.c index 3eba6c363..bf28ddd4d 100644 --- a/channels/rdpgfx/client/rdpgfx_codec.c +++ b/channels/rdpgfx/client/rdpgfx_codec.c @@ -38,10 +38,11 @@ * * @return 0 on success, otherwise a Win32 error code */ -static UINT rdpgfx_read_h264_metablock(RDPGFX_PLUGIN* gfx, wStream* s, RDPGFX_H264_METABLOCK* meta) +static UINT rdpgfx_read_h264_metablock(RDPGFX_PLUGIN* gfx, wStream* s, + RDPGFX_H264_METABLOCK* meta) { UINT32 index; - RDPGFX_RECT16* regionRect; + RECTANGLE_16* regionRect; RDPGFX_H264_QUANT_QUALITY* quantQualityVal; UINT error = ERROR_INVALID_DATA; @@ -56,13 +57,13 @@ static UINT rdpgfx_read_h264_metablock(RDPGFX_PLUGIN* gfx, wStream* s, RDPGFX_H2 Stream_Read_UINT32(s, meta->numRegionRects); /* numRegionRects (4 bytes) */ - if (Stream_GetRemainingLength(s) < (meta->numRegionRects * 8)) + if (Stream_GetRemainingLength(s) < (meta->numRegionRects * sizeof(RECTANGLE_16))) { WLog_ERR(TAG, "not enough data!"); goto error_out; } - meta->regionRects = (RDPGFX_RECT16*) malloc(meta->numRegionRects * sizeof(RDPGFX_RECT16)); + meta->regionRects = (RECTANGLE_16*) malloc(meta->numRegionRects * sizeof(RECTANGLE_16)); if (!meta->regionRects) { @@ -128,11 +129,11 @@ error_out: * * @return 0 on success, otherwise a Win32 error code */ -static UINT rdpgfx_decode_h264(RDPGFX_PLUGIN* gfx, RDPGFX_SURFACE_COMMAND* cmd) +static UINT rdpgfx_decode_AVC420(RDPGFX_PLUGIN* gfx, RDPGFX_SURFACE_COMMAND* cmd) { UINT error; wStream* s; - RDPGFX_H264_BITMAP_STREAM h264; + RDPGFX_AVC420_BITMAP_STREAM h264; RdpgfxClientContext* context = (RdpgfxClientContext*) gfx->iface.pInterface; s = Stream_New(cmd->data, cmd->length); @@ -169,6 +170,91 @@ static UINT rdpgfx_decode_h264(RDPGFX_PLUGIN* gfx, RDPGFX_SURFACE_COMMAND* cmd) return error; } +/** + * Function description + * + * @return 0 on success, otherwise a Win32 error code + */ +static UINT rdpgfx_decode_AVC444(RDPGFX_PLUGIN* gfx, RDPGFX_SURFACE_COMMAND* cmd) +{ + UINT error; + UINT32 tmp; + size_t pos1, pos2; + wStream* s; + RDPGFX_AVC444_BITMAP_STREAM h264; + RdpgfxClientContext* context = (RdpgfxClientContext*) gfx->iface.pInterface; + + s = Stream_New(cmd->data, cmd->length); + + if (!s) + { + WLog_ERR(TAG, "Stream_New failed!"); + return CHANNEL_RC_NO_MEMORY; + } + + if (Stream_GetRemainingLength(s) < 4) + return ERROR_INVALID_DATA; + + Stream_Read_UINT32(s, tmp); + h264.cbAvc420EncodedBitstream1 = tmp & 0x3FFFFFFFUL; + h264.LC = (tmp >> 30UL) & 0x03UL; + + if (h264.LC == 0x03) + return ERROR_INVALID_DATA; + + pos1 = Stream_GetPosition(s); + if ((error = rdpgfx_read_h264_metablock(gfx, s, &(h264.bitstream[0].meta)))) + { + WLog_ERR(TAG, "rdpgfx_read_h264_metablock failed with error %lu!", error); + return error; + } + pos2 = Stream_GetPosition(s); + + h264.bitstream[0].data = Stream_Pointer(s); + + if (h264.LC == 0) + { + tmp = h264.cbAvc420EncodedBitstream1 - pos2 + pos1; + if (Stream_GetRemainingLength(s) < tmp) + return ERROR_INVALID_DATA; + + h264.bitstream[0].length = tmp; + Stream_Seek(s, tmp); + + if ((error = rdpgfx_read_h264_metablock(gfx, s, &(h264.bitstream[1].meta)))) + { + WLog_ERR(TAG, "rdpgfx_read_h264_metablock failed with error %lu!", error); + return error; + } + + h264.bitstream[1].data = Stream_Pointer(s); + h264.bitstream[1].length = Stream_GetRemainingLength(s); + } + else + { + h264.bitstream[0].length = Stream_GetRemainingLength(s); + memset(&h264.bitstream[1], 0, sizeof(h264.bitstream[1])); + } + + Stream_Free(s, FALSE); + + cmd->extra = (void*) &h264; + + if (context) + { + IFCALLRET(context->SurfaceCommand, error, context, cmd); + if (error) + WLog_ERR(TAG, "context->SurfaceCommand failed with error %lu", error); + } + + free(h264.bitstream[0].meta.regionRects); + free(h264.bitstream[0].meta.quantQualityVals); + free(h264.bitstream[1].meta.regionRects); + free(h264.bitstream[1].meta.quantQualityVals); + + return error; +} + /** * Function description * @@ -181,10 +267,18 @@ UINT rdpgfx_decode(RDPGFX_PLUGIN* gfx, RDPGFX_SURFACE_COMMAND* cmd) switch (cmd->codecId) { - case RDPGFX_CODECID_H264: - if ((error = rdpgfx_decode_h264(gfx, cmd))) + case RDPGFX_CODECID_AVC420: + if ((error = rdpgfx_decode_AVC420(gfx, cmd))) { - WLog_ERR(TAG, "rdpgfx_decode_h264 failed with error %lu", error); + WLog_ERR(TAG, "rdpgfx_decode_AVC420 failed with error %lu", error); + return error; + } + break; + + case RDPGFX_CODECID_AVC444: + if ((error = rdpgfx_decode_AVC444(gfx, cmd))) + { + WLog_ERR(TAG, "rdpgfx_decode_AVC444 failed with error %lu", error); return error; } break; diff --git a/channels/rdpgfx/client/rdpgfx_common.c b/channels/rdpgfx/client/rdpgfx_common.c index 8b6e07389..a7518a5f0 100644 --- a/channels/rdpgfx/client/rdpgfx_common.c +++ b/channels/rdpgfx/client/rdpgfx_common.c @@ -77,8 +77,10 @@ const char* rdpgfx_get_codec_id_string(UINT16 codecId) return "RDPGFX_CODECID_CLEARCODEC"; case RDPGFX_CODECID_PLANAR: return "RDPGFX_CODECID_PLANAR"; - case RDPGFX_CODECID_H264: - return "RDPGFX_CODECID_H264"; + case RDPGFX_CODECID_AVC420: + return "RDPGFX_CODECID_AVC420"; + case RDPGFX_CODECID_AVC444: + return "RDPGFX_CODECID_AVC444"; case RDPGFX_CODECID_ALPHA: return "RDPGFX_CODECID_ALPHA"; case RDPGFX_CODECID_CAPROGRESSIVE: @@ -161,7 +163,7 @@ UINT rdpgfx_write_point16(wStream* s, RDPGFX_POINT16* point16) * * @return 0 on success, otherwise a Win32 error code */ -UINT rdpgfx_read_rect16(wStream* s, RDPGFX_RECT16* rect16) +UINT rdpgfx_read_rect16(wStream* s, RECTANGLE_16* rect16) { if (Stream_GetRemainingLength(s) < 8) { @@ -182,7 +184,7 @@ UINT rdpgfx_read_rect16(wStream* s, RDPGFX_RECT16* rect16) * * @return 0 on success, otherwise a Win32 error code */ -UINT rdpgfx_write_rect16(wStream* s, RDPGFX_RECT16* rect16) +UINT rdpgfx_write_rect16(wStream* s, RECTANGLE_16* rect16) { Stream_Write_UINT16(s, rect16->left); /* left (2 bytes) */ Stream_Write_UINT16(s, rect16->top); /* top (2 bytes) */ diff --git a/channels/rdpgfx/client/rdpgfx_common.h b/channels/rdpgfx/client/rdpgfx_common.h index 188102cd8..9d082056d 100644 --- a/channels/rdpgfx/client/rdpgfx_common.h +++ b/channels/rdpgfx/client/rdpgfx_common.h @@ -36,8 +36,8 @@ UINT rdpgfx_write_header(wStream* s, RDPGFX_HEADER* header); UINT rdpgfx_read_point16(wStream* s, RDPGFX_POINT16* pt16); UINT rdpgfx_write_point16(wStream* s, RDPGFX_POINT16* point16); -UINT rdpgfx_read_rect16(wStream* s, RDPGFX_RECT16* rect16); -UINT rdpgfx_write_rect16(wStream* s, RDPGFX_RECT16* rect16); +UINT rdpgfx_read_rect16(wStream* s, RECTANGLE_16* rect16); +UINT rdpgfx_write_rect16(wStream* s, RECTANGLE_16* rect16); UINT rdpgfx_read_color32(wStream* s, RDPGFX_COLOR32* color32); UINT rdpgfx_write_color32(wStream* s, RDPGFX_COLOR32* color32); diff --git a/channels/rdpgfx/client/rdpgfx_main.c b/channels/rdpgfx/client/rdpgfx_main.c index 07ec66cc5..1ae288242 100644 --- a/channels/rdpgfx/client/rdpgfx_main.c +++ b/channels/rdpgfx/client/rdpgfx_main.c @@ -58,7 +58,7 @@ static UINT rdpgfx_send_caps_advertise_pdu(RDPGFX_CHANNEL_CALLBACK* callback) RDPGFX_PLUGIN* gfx; RDPGFX_HEADER header; RDPGFX_CAPSET* capsSet; - RDPGFX_CAPSET capsSets[2]; + RDPGFX_CAPSET capsSets[3]; RDPGFX_CAPS_ADVERTISE_PDU pdu; gfx = (RDPGFX_PLUGIN*) callback->plugin; @@ -90,7 +90,17 @@ static UINT rdpgfx_send_caps_advertise_pdu(RDPGFX_CHANNEL_CALLBACK* callback) capsSet->flags |= RDPGFX_CAPS_FLAG_SMALL_CACHE; if (gfx->H264) - capsSet->flags |= RDPGFX_CAPS_FLAG_H264ENABLED; + capsSet->flags |= RDPGFX_CAPS_FLAG_AVC420_ENABLED; + + capsSet = &capsSets[pdu.capsSetCount++]; + capsSet->version = RDPGFX_CAPVERSION_10; + capsSet->flags = 0; + + if (gfx->SmallCache) + capsSet->flags |= RDPGFX_CAPS_FLAG_SMALL_CACHE; + + if (!gfx->H264) + capsSet->flags |= RDPGFX_CAPS_FLAG_AVC_DISABLED; header.pduLength = RDPGFX_HEADER_SIZE + 2 + (pdu.capsSetCount * RDPGFX_CAPSET_SIZE); @@ -688,7 +698,7 @@ static UINT rdpgfx_recv_delete_encoding_context_pdu(RDPGFX_CHANNEL_CALLBACK* cal UINT rdpgfx_recv_solid_fill_pdu(RDPGFX_CHANNEL_CALLBACK* callback, wStream* s) { UINT16 index; - RDPGFX_RECT16* fillRect; + RECTANGLE_16* fillRect; RDPGFX_SOLID_FILL_PDU pdu; RDPGFX_PLUGIN* gfx = (RDPGFX_PLUGIN*) callback->plugin; RdpgfxClientContext* context = (RdpgfxClientContext*) gfx->iface.pInterface; @@ -714,7 +724,7 @@ UINT rdpgfx_recv_solid_fill_pdu(RDPGFX_CHANNEL_CALLBACK* callback, wStream* s) return ERROR_INVALID_DATA; } - pdu.fillRects = (RDPGFX_RECT16*) calloc(pdu.fillRectCount, sizeof(RDPGFX_RECT16)); + pdu.fillRects = (RECTANGLE_16*) calloc(pdu.fillRectCount, sizeof(RECTANGLE_16)); if (!pdu.fillRects) { @@ -742,7 +752,7 @@ UINT rdpgfx_recv_solid_fill_pdu(RDPGFX_CHANNEL_CALLBACK* callback, wStream* s) if (error) WLog_ERR(TAG, "context->SolidFill failed with error %lu", error); } - + free(pdu.fillRects); return error; diff --git a/client/X11/xf_gfx.c b/client/X11/xf_gfx.c index d87e79cdb..3c6363f7c 100644 --- a/client/X11/xf_gfx.c +++ b/client/X11/xf_gfx.c @@ -31,7 +31,7 @@ * * @return 0 on success, otherwise a Win32 error code */ -UINT xf_ResetGraphics(RdpgfxClientContext* context, RDPGFX_RESET_GRAPHICS_PDU* resetGraphics) +static UINT xf_ResetGraphics(RdpgfxClientContext* context, RDPGFX_RESET_GRAPHICS_PDU* resetGraphics) { int index; UINT16 count; @@ -70,7 +70,7 @@ UINT xf_ResetGraphics(RdpgfxClientContext* context, RDPGFX_RESET_GRAPHICS_PDU* r return CHANNEL_RC_OK; } -int xf_OutputUpdate(xfContext* xfc, xfGfxSurface* surface) +static int xf_OutputUpdate(xfContext* xfc, xfGfxSurface* surface) { UINT16 width, height; UINT32 surfaceX, surfaceY; @@ -132,7 +132,7 @@ int xf_OutputUpdate(xfContext* xfc, xfGfxSurface* surface) return 1; } -int xf_UpdateSurfaces(xfContext* xfc) +static int xf_UpdateSurfaces(xfContext* xfc) { UINT16 count; int index; @@ -220,7 +220,7 @@ int xf_OutputExpose(xfContext* xfc, int x, int y, int width, int height) * * @return 0 on success, otherwise a Win32 error code */ -UINT xf_StartFrame(RdpgfxClientContext* context, RDPGFX_START_FRAME_PDU* startFrame) +static UINT xf_StartFrame(RdpgfxClientContext* context, RDPGFX_START_FRAME_PDU* startFrame) { xfContext* xfc = (xfContext*) context->custom; @@ -234,7 +234,7 @@ UINT xf_StartFrame(RdpgfxClientContext* context, RDPGFX_START_FRAME_PDU* startFr * * @return 0 on success, otherwise a Win32 error code */ -UINT xf_EndFrame(RdpgfxClientContext* context, RDPGFX_END_FRAME_PDU* endFrame) +static UINT xf_EndFrame(RdpgfxClientContext* context, RDPGFX_END_FRAME_PDU* endFrame) { xfContext* xfc = (xfContext*) context->custom; @@ -250,7 +250,7 @@ UINT xf_EndFrame(RdpgfxClientContext* context, RDPGFX_END_FRAME_PDU* endFrame) * * @return 0 on success, otherwise a Win32 error code */ -UINT xf_SurfaceCommand_Uncompressed(xfContext* xfc, RdpgfxClientContext* context, RDPGFX_SURFACE_COMMAND* cmd) +static UINT xf_SurfaceCommand_Uncompressed(xfContext* xfc, RdpgfxClientContext* context, RDPGFX_SURFACE_COMMAND* cmd) { xfGfxSurface* surface; RECTANGLE_16 invalidRect; @@ -281,7 +281,7 @@ UINT xf_SurfaceCommand_Uncompressed(xfContext* xfc, RdpgfxClientContext* context * * @return 0 on success, otherwise a Win32 error code */ -UINT xf_SurfaceCommand_RemoteFX(xfContext* xfc, RdpgfxClientContext* context, RDPGFX_SURFACE_COMMAND* cmd) +static UINT xf_SurfaceCommand_RemoteFX(xfContext* xfc, RdpgfxClientContext* context, RDPGFX_SURFACE_COMMAND* cmd) { int j; UINT16 i; @@ -371,7 +371,7 @@ UINT xf_SurfaceCommand_RemoteFX(xfContext* xfc, RdpgfxClientContext* context, RD * * @return 0 on success, otherwise a Win32 error code */ -UINT xf_SurfaceCommand_ClearCodec(xfContext* xfc, RdpgfxClientContext* context, RDPGFX_SURFACE_COMMAND* cmd) +static UINT xf_SurfaceCommand_ClearCodec(xfContext* xfc, RdpgfxClientContext* context, RDPGFX_SURFACE_COMMAND* cmd) { int status; BYTE* DstData = NULL; @@ -415,7 +415,7 @@ UINT xf_SurfaceCommand_ClearCodec(xfContext* xfc, RdpgfxClientContext* context, * * @return 0 on success, otherwise a Win32 error code */ -UINT xf_SurfaceCommand_Planar(xfContext* xfc, RdpgfxClientContext* context, RDPGFX_SURFACE_COMMAND* cmd) +static UINT xf_SurfaceCommand_Planar(xfContext* xfc, RdpgfxClientContext* context, RDPGFX_SURFACE_COMMAND* cmd) { int status; BYTE* DstData = NULL; @@ -453,45 +453,46 @@ UINT xf_SurfaceCommand_Planar(xfContext* xfc, RdpgfxClientContext* context, RDPG * * @return 0 on success, otherwise a Win32 error code */ -UINT xf_SurfaceCommand_H264(xfContext* xfc, RdpgfxClientContext* context, RDPGFX_SURFACE_COMMAND* cmd) +static UINT xf_SurfaceCommand_AVC420(xfContext* xfc, RdpgfxClientContext* context, RDPGFX_SURFACE_COMMAND* cmd) { int status; UINT32 i; - BYTE* DstData = NULL; xfGfxSurface* surface; RDPGFX_H264_METABLOCK* meta; - RDPGFX_H264_BITMAP_STREAM* bs; + RDPGFX_AVC420_BITMAP_STREAM* bs; surface = (xfGfxSurface*) context->GetSurfaceData(context, cmd->surfaceId); if (!surface) return ERROR_INTERNAL_ERROR; - if (!freerdp_client_codecs_prepare(surface->codecs, FREERDP_CODEC_H264)) + if (!freerdp_client_codecs_prepare(surface->codecs, FREERDP_CODEC_AVC420)) return ERROR_INTERNAL_ERROR; - bs = (RDPGFX_H264_BITMAP_STREAM*) cmd->extra; + bs = (RDPGFX_AVC420_BITMAP_STREAM*) cmd->extra; if (!bs) return ERROR_INTERNAL_ERROR; meta = &(bs->meta); - DstData = surface->data; - - status = h264_decompress(surface->codecs->h264, bs->data, bs->length, &DstData, - surface->format, surface->scanline , surface->width, - surface->height, meta->regionRects, meta->numRegionRects); + status = avc420_decompress(surface->codecs->h264, bs->data, bs->length, + surface->data, surface->format, + surface->scanline , surface->width, + surface->height, meta->regionRects, + meta->numRegionRects); if (status < 0) { - WLog_WARN(TAG, "h264_decompress failure: %d, ignoring update.", status); + WLog_WARN(TAG, "avc420_decompress failure: %d, ignoring update.", status); return CHANNEL_RC_OK; } for (i = 0; i < meta->numRegionRects; i++) { - region16_union_rect(&surface->invalidRegion, &surface->invalidRegion, (RECTANGLE_16*) &(meta->regionRects[i])); + region16_union_rect(&surface->invalidRegion, + &surface->invalidRegion, + &(meta->regionRects[i])); } if (!xfc->inGfxFrame) @@ -505,7 +506,77 @@ UINT xf_SurfaceCommand_H264(xfContext* xfc, RdpgfxClientContext* context, RDPGFX * * @return 0 on success, otherwise a Win32 error code */ -UINT xf_SurfaceCommand_Alpha(xfContext* xfc, RdpgfxClientContext* context, RDPGFX_SURFACE_COMMAND* cmd) +static UINT xf_SurfaceCommand_AVC444(xfContext* xfc, RdpgfxClientContext* context, RDPGFX_SURFACE_COMMAND* cmd) +{ + int status; + UINT32 i; + xfGfxSurface* surface; + RDPGFX_AVC444_BITMAP_STREAM* bs; + RDPGFX_AVC420_BITMAP_STREAM* avc1; + RDPGFX_AVC420_BITMAP_STREAM* avc2; + RDPGFX_H264_METABLOCK* meta1; + RDPGFX_H264_METABLOCK* meta2; + RECTANGLE_16* regionRects = NULL; + + surface = (xfGfxSurface*) context->GetSurfaceData(context, cmd->surfaceId); + + if (!surface) + return ERROR_INTERNAL_ERROR; + + if (!freerdp_client_codecs_prepare(surface->codecs, FREERDP_CODEC_AVC444)) + return ERROR_INTERNAL_ERROR; + + bs = (RDPGFX_AVC444_BITMAP_STREAM*) cmd->extra; + + if (!bs) + return ERROR_INTERNAL_ERROR; + + avc1 = &bs->bitstream[0]; + avc2 = &bs->bitstream[1]; + meta1 = &avc1->meta; + meta2 = &avc2->meta; + + status = avc444_decompress(surface->codecs->h264, bs->LC, + meta1->regionRects, meta1->numRegionRects, + avc1->data, avc1->length, + meta2->regionRects, meta2->numRegionRects, + avc2->data, avc2->length, surface->data, + surface->format, surface->scanline, + surface->width, surface->height); + + if (status < 0) + { + WLog_WARN(TAG, "avc444_decompress failure: %d, ignoring update.", status); + return CHANNEL_RC_OK; + } + + for (i = 0; i < meta1->numRegionRects; i++) + { + region16_union_rect(&surface->invalidRegion, + &surface->invalidRegion, + &(meta1->regionRects[i])); + } + for (i = 0; i < meta2->numRegionRects; i++) + { + region16_union_rect(&surface->invalidRegion, + &surface->invalidRegion, + &(meta2->regionRects[i])); + } + + if (!xfc->inGfxFrame) + xf_UpdateSurfaces(xfc); + + free (regionRects); + + return CHANNEL_RC_OK; +} + +/** + * Function description + * + * @return 0 on success, otherwise a Win32 error code + */ +static UINT xf_SurfaceCommand_Alpha(xfContext* xfc, RdpgfxClientContext* context, RDPGFX_SURFACE_COMMAND* cmd) { int status = 0; xfGfxSurface* surface; @@ -543,7 +614,7 @@ UINT xf_SurfaceCommand_Alpha(xfContext* xfc, RdpgfxClientContext* context, RDPGF * * @return 0 on success, otherwise a Win32 error code */ -UINT xf_SurfaceCommand_Progressive(xfContext* xfc, RdpgfxClientContext* context, RDPGFX_SURFACE_COMMAND* cmd) +static UINT xf_SurfaceCommand_Progressive(xfContext* xfc, RdpgfxClientContext* context, RDPGFX_SURFACE_COMMAND* cmd) { int i, j; int status; @@ -645,7 +716,7 @@ UINT xf_SurfaceCommand_Progressive(xfContext* xfc, RdpgfxClientContext* context, * * @return 0 on success, otherwise a Win32 error code */ -UINT xf_SurfaceCommand(RdpgfxClientContext* context, RDPGFX_SURFACE_COMMAND* cmd) +static UINT xf_SurfaceCommand(RdpgfxClientContext* context, RDPGFX_SURFACE_COMMAND* cmd) { UINT status = CHANNEL_RC_OK; xfContext* xfc = (xfContext*) context->custom; @@ -668,8 +739,12 @@ UINT xf_SurfaceCommand(RdpgfxClientContext* context, RDPGFX_SURFACE_COMMAND* cmd status = xf_SurfaceCommand_Planar(xfc, context, cmd); break; - case RDPGFX_CODECID_H264: - status = xf_SurfaceCommand_H264(xfc, context, cmd); + case RDPGFX_CODECID_AVC420: + status = xf_SurfaceCommand_AVC420(xfc, context, cmd); + break; + + case RDPGFX_CODECID_AVC444: + status = xf_SurfaceCommand_AVC444(xfc, context, cmd); break; case RDPGFX_CODECID_ALPHA: @@ -697,7 +772,7 @@ UINT xf_SurfaceCommand(RdpgfxClientContext* context, RDPGFX_SURFACE_COMMAND* cmd * * @return 0 on success, otherwise a Win32 error code */ -UINT xf_DeleteEncodingContext(RdpgfxClientContext* context, RDPGFX_DELETE_ENCODING_CONTEXT_PDU* deleteEncodingContext) +static UINT xf_DeleteEncodingContext(RdpgfxClientContext* context, RDPGFX_DELETE_ENCODING_CONTEXT_PDU* deleteEncodingContext) { return CHANNEL_RC_OK; } @@ -707,7 +782,7 @@ UINT xf_DeleteEncodingContext(RdpgfxClientContext* context, RDPGFX_DELETE_ENCODI * * @return 0 on success, otherwise a Win32 error code */ -UINT xf_CreateSurface(RdpgfxClientContext* context, RDPGFX_CREATE_SURFACE_PDU* createSurface) +static UINT xf_CreateSurface(RdpgfxClientContext* context, RDPGFX_CREATE_SURFACE_PDU* createSurface) { size_t size; UINT32 bytesPerPixel; @@ -795,7 +870,7 @@ UINT xf_CreateSurface(RdpgfxClientContext* context, RDPGFX_CREATE_SURFACE_PDU* c * * @return 0 on success, otherwise a Win32 error code */ -UINT xf_DeleteSurface(RdpgfxClientContext* context, RDPGFX_DELETE_SURFACE_PDU* deleteSurface) +static UINT xf_DeleteSurface(RdpgfxClientContext* context, RDPGFX_DELETE_SURFACE_PDU* deleteSurface) { rdpCodecs* codecs = NULL; xfGfxSurface* surface = NULL; @@ -827,13 +902,13 @@ UINT xf_DeleteSurface(RdpgfxClientContext* context, RDPGFX_DELETE_SURFACE_PDU* d * * @return 0 on success, otherwise a Win32 error code */ -UINT xf_SolidFill(RdpgfxClientContext* context, RDPGFX_SOLID_FILL_PDU* solidFill) +static UINT xf_SolidFill(RdpgfxClientContext* context, RDPGFX_SOLID_FILL_PDU* solidFill) { UINT16 index; UINT32 color; BYTE a, r, g, b; int nWidth, nHeight; - RDPGFX_RECT16* rect; + RECTANGLE_16* rect; xfGfxSurface* surface; RECTANGLE_16 invalidRect; xfContext* xfc = (xfContext*) context->custom; @@ -879,12 +954,12 @@ UINT xf_SolidFill(RdpgfxClientContext* context, RDPGFX_SOLID_FILL_PDU* solidFill * * @return 0 on success, otherwise a Win32 error code */ -UINT xf_SurfaceToSurface(RdpgfxClientContext* context, RDPGFX_SURFACE_TO_SURFACE_PDU* surfaceToSurface) +static UINT xf_SurfaceToSurface(RdpgfxClientContext* context, RDPGFX_SURFACE_TO_SURFACE_PDU* surfaceToSurface) { UINT16 index; BOOL sameSurface; int nWidth, nHeight; - RDPGFX_RECT16* rectSrc; + RECTANGLE_16* rectSrc; RDPGFX_POINT16* destPt; RECTANGLE_16 invalidRect; xfGfxSurface* surfaceSrc; @@ -944,10 +1019,10 @@ UINT xf_SurfaceToSurface(RdpgfxClientContext* context, RDPGFX_SURFACE_TO_SURFACE * * @return 0 on success, otherwise a Win32 error code */ -UINT xf_SurfaceToCache(RdpgfxClientContext* context, RDPGFX_SURFACE_TO_CACHE_PDU* surfaceToCache) +static UINT xf_SurfaceToCache(RdpgfxClientContext* context, RDPGFX_SURFACE_TO_CACHE_PDU* surfaceToCache) { size_t size; - RDPGFX_RECT16* rect; + RECTANGLE_16* rect; xfGfxSurface* surface; xfGfxCacheEntry* cacheEntry; xfContext* xfc = (xfContext*) context->custom; @@ -997,7 +1072,7 @@ UINT xf_SurfaceToCache(RdpgfxClientContext* context, RDPGFX_SURFACE_TO_CACHE_PDU * * @return 0 on success, otherwise a Win32 error code */ -UINT xf_CacheToSurface(RdpgfxClientContext* context, RDPGFX_CACHE_TO_SURFACE_PDU* cacheToSurface) +static UINT xf_CacheToSurface(RdpgfxClientContext* context, RDPGFX_CACHE_TO_SURFACE_PDU* cacheToSurface) { UINT16 index; RDPGFX_POINT16* destPt; @@ -1039,7 +1114,7 @@ UINT xf_CacheToSurface(RdpgfxClientContext* context, RDPGFX_CACHE_TO_SURFACE_PDU * * @return 0 on success, otherwise a Win32 error code */ -UINT xf_CacheImportReply(RdpgfxClientContext* context, RDPGFX_CACHE_IMPORT_REPLY_PDU* cacheImportReply) +static UINT xf_CacheImportReply(RdpgfxClientContext* context, RDPGFX_CACHE_IMPORT_REPLY_PDU* cacheImportReply) { return CHANNEL_RC_OK; } @@ -1049,7 +1124,7 @@ UINT xf_CacheImportReply(RdpgfxClientContext* context, RDPGFX_CACHE_IMPORT_REPLY * * @return 0 on success, otherwise a Win32 error code */ -UINT xf_EvictCacheEntry(RdpgfxClientContext* context, RDPGFX_EVICT_CACHE_ENTRY_PDU* evictCacheEntry) +static UINT xf_EvictCacheEntry(RdpgfxClientContext* context, RDPGFX_EVICT_CACHE_ENTRY_PDU* evictCacheEntry) { xfGfxCacheEntry* cacheEntry; @@ -1071,7 +1146,7 @@ UINT xf_EvictCacheEntry(RdpgfxClientContext* context, RDPGFX_EVICT_CACHE_ENTRY_P * * @return 0 on success, otherwise a Win32 error code */ -UINT xf_MapSurfaceToOutput(RdpgfxClientContext* context, RDPGFX_MAP_SURFACE_TO_OUTPUT_PDU* surfaceToOutput) +static UINT xf_MapSurfaceToOutput(RdpgfxClientContext* context, RDPGFX_MAP_SURFACE_TO_OUTPUT_PDU* surfaceToOutput) { xfGfxSurface* surface; @@ -1094,7 +1169,7 @@ UINT xf_MapSurfaceToOutput(RdpgfxClientContext* context, RDPGFX_MAP_SURFACE_TO_O * * @return 0 on success, otherwise a Win32 error code */ -UINT xf_MapSurfaceToWindow(RdpgfxClientContext* context, RDPGFX_MAP_SURFACE_TO_WINDOW_PDU* surfaceToWindow) +static UINT xf_MapSurfaceToWindow(RdpgfxClientContext* context, RDPGFX_MAP_SURFACE_TO_WINDOW_PDU* surfaceToWindow) { return CHANNEL_RC_OK; } diff --git a/include/freerdp/channels/rdpgfx.h b/include/freerdp/channels/rdpgfx.h index 24943ff5f..e19714916 100644 --- a/include/freerdp/channels/rdpgfx.h +++ b/include/freerdp/channels/rdpgfx.h @@ -37,15 +37,6 @@ struct _RDPGFX_POINT16 }; typedef struct _RDPGFX_POINT16 RDPGFX_POINT16; -struct _RDPGFX_RECT16 -{ - UINT16 left; - UINT16 top; - UINT16 right; - UINT16 bottom; -}; -typedef struct _RDPGFX_RECT16 RDPGFX_RECT16; - struct _RDPGFX_COLOR32 { BYTE B; @@ -99,6 +90,7 @@ typedef struct _RDPGFX_HEADER RDPGFX_HEADER; #define RDPGFX_CAPVERSION_8 0x00080004 #define RDPGFX_CAPVERSION_81 0x00080105 +#define RDPGFX_CAPVERSION_10 0x000A0002 #define RDPGFX_CAPSET_SIZE 12 @@ -111,7 +103,8 @@ typedef struct _RDPGFX_CAPSET RDPGFX_CAPSET; #define RDPGFX_CAPS_FLAG_THINCLIENT 0x00000001 /* 8.0+ */ #define RDPGFX_CAPS_FLAG_SMALL_CACHE 0x00000002 /* 8.0+ */ -#define RDPGFX_CAPS_FLAG_H264ENABLED 0x00000010 /* 8.1+ */ +#define RDPGFX_CAPS_FLAG_AVC420_ENABLED 0x00000010 /* 8.1+ */ +#define RDPGFX_CAPS_FLAG_AVC_DISABLED 0x00000020 /* 10.0+ */ struct _RDPGFX_CAPSET_VERSION8 { @@ -129,6 +122,14 @@ struct _RDPGFX_CAPSET_VERSION81 }; typedef struct _RDPGFX_CAPSET_VERSION81 RDPGFX_CAPSET_VERSION81; +struct _RDPGFX_CAPSET_VERSION10 +{ + UINT32 version; + UINT32 capsDataLength; + UINT32 flags; +}; +typedef struct _RDPGFX_CAPSET_VERSION10 RDPGFX_CAPSET_VERSION10; + /** * Graphics Messages */ @@ -137,15 +138,16 @@ typedef struct _RDPGFX_CAPSET_VERSION81 RDPGFX_CAPSET_VERSION81; #define RDPGFX_CODECID_CAVIDEO 0x0003 #define RDPGFX_CODECID_CLEARCODEC 0x0008 #define RDPGFX_CODECID_PLANAR 0x000A -#define RDPGFX_CODECID_H264 0x000B +#define RDPGFX_CODECID_AVC420 0x000B #define RDPGFX_CODECID_ALPHA 0x000C +#define RDPGFX_CODECID_AVC444 0x000E struct _RDPGFX_WIRE_TO_SURFACE_PDU_1 { UINT16 surfaceId; UINT16 codecId; RDPGFX_PIXELFORMAT pixelFormat; - RDPGFX_RECT16 destRect; + RECTANGLE_16 destRect; UINT32 bitmapDataLength; BYTE* bitmapData; }; @@ -195,7 +197,7 @@ struct _RDPGFX_SOLID_FILL_PDU UINT16 surfaceId; RDPGFX_COLOR32 fillPixel; UINT16 fillRectCount; - RDPGFX_RECT16* fillRects; + RECTANGLE_16* fillRects; }; typedef struct _RDPGFX_SOLID_FILL_PDU RDPGFX_SOLID_FILL_PDU; @@ -203,7 +205,7 @@ struct _RDPGFX_SURFACE_TO_SURFACE_PDU { UINT16 surfaceIdSrc; UINT16 surfaceIdDest; - RDPGFX_RECT16 rectSrc; + RECTANGLE_16 rectSrc; UINT16 destPtsCount; RDPGFX_POINT16* destPts; }; @@ -214,7 +216,7 @@ struct _RDPGFX_SURFACE_TO_CACHE_PDU UINT16 surfaceId; UINT64 cacheKey; UINT16 cacheSlot; - RDPGFX_RECT16 rectSrc; + RECTANGLE_16 rectSrc; }; typedef struct _RDPGFX_SURFACE_TO_CACHE_PDU RDPGFX_SURFACE_TO_CACHE_PDU; @@ -349,18 +351,27 @@ typedef struct _RDPGFX_H264_QUANT_QUALITY RDPGFX_H264_QUANT_QUALITY; struct _RDPGFX_H264_METABLOCK { UINT32 numRegionRects; - RDPGFX_RECT16* regionRects; + RECTANGLE_16* regionRects; RDPGFX_H264_QUANT_QUALITY* quantQualityVals; }; typedef struct _RDPGFX_H264_METABLOCK RDPGFX_H264_METABLOCK; -struct _RDPGFX_H264_BITMAP_STREAM +struct _RDPGFX_AVC420_BITMAP_STREAM { RDPGFX_H264_METABLOCK meta; UINT32 length; BYTE* data; }; -typedef struct _RDPGFX_H264_BITMAP_STREAM RDPGFX_H264_BITMAP_STREAM; +typedef struct _RDPGFX_AVC420_BITMAP_STREAM RDPGFX_AVC420_BITMAP_STREAM; + +struct _RDPGFX_AVC444_BITMAP_STREAM +{ + UINT32 cbAvc420EncodedBitstream1; + BYTE LC; + RDPGFX_AVC420_BITMAP_STREAM bitstream[2]; +}; +typedef struct _RDPGFX_AVC444_BITMAP_STREAM RDPGFX_AVC444_BITMAP_STREAM; + #endif /* FREERDP_CHANNEL_RDPGFX_H */ diff --git a/include/freerdp/codec/h264.h b/include/freerdp/codec/h264.h index 86e2a2c2c..83e3ca51d 100644 --- a/include/freerdp/codec/h264.h +++ b/include/freerdp/codec/h264.h @@ -29,8 +29,10 @@ typedef struct _H264_CONTEXT H264_CONTEXT; typedef BOOL (*pfnH264SubsystemInit)(H264_CONTEXT* h264); typedef void (*pfnH264SubsystemUninit)(H264_CONTEXT* h264); -typedef int (*pfnH264SubsystemDecompress)(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize); -typedef int (*pfnH264SubsystemCompress)(H264_CONTEXT* h264, BYTE** ppDstData, UINT32* pDstSize); +typedef int (*pfnH264SubsystemDecompress)(H264_CONTEXT* h264, BYTE* pSrcData, + UINT32 SrcSize, UINT32 plane); +typedef int (*pfnH264SubsystemCompress)(H264_CONTEXT* h264, BYTE** ppDstData, + UINT32* pDstSize, UINT32 plane); struct _H264_CONTEXT_SUBSYSTEM { @@ -62,9 +64,14 @@ struct _H264_CONTEXT UINT32 QP; UINT32 NumberOfThreads; - int iStride[3]; - BYTE* pYUVData[3]; + UINT32 iStride[2][3]; + BYTE* pYUVData[2][3]; + UINT32 iYUV444Size[3]; + UINT32 iYUV444Stride[3]; + BYTE* pYUV444Data[3]; + + UINT32 numSystemData; void* pSystemData; H264_CONTEXT_SUBSYSTEM* subsystem; }; @@ -73,12 +80,30 @@ struct _H264_CONTEXT extern "C" { #endif -FREERDP_API int h264_compress(H264_CONTEXT* h264, BYTE* pSrcData, DWORD SrcFormat, - int nSrcStep, int nSrcWidth, int nSrcHeight, BYTE** ppDstData, UINT32* pDstSize); +FREERDP_API INT32 avc420_compress(H264_CONTEXT* h264, BYTE* pSrcData, + DWORD SrcFormat, UINT32 nSrcStep, + UINT32 nSrcWidth, UINT32 nSrcHeight, + BYTE** ppDstData, UINT32* pDstSize); -FREERDP_API int h264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize, - BYTE** ppDstData, DWORD DstFormat, int nDstStep, int nDstWidth, int nDstHeight, - RDPGFX_RECT16* regionRects, int numRegionRect); +FREERDP_API INT32 avc420_decompress(H264_CONTEXT* h264, BYTE* pSrcData, + UINT32 SrcSize, BYTE* pDstData, + DWORD DstFormat, UINT32 nDstStep, + UINT32 nDstWidth, UINT32 nDstHeight, + RECTANGLE_16* regionRects, UINT32 numRegionRect); + +FREERDP_API INT32 avc444_compress(H264_CONTEXT* h264, BYTE* pSrcData, DWORD SrcFormat, + UINT32 nSrcStep, UINT32 nSrcWidth, UINT32 nSrcHeight, + BYTE* op, + BYTE** pDstData, UINT32* pDstSize, + BYTE** pAuxDstData, UINT32* pAuxDstSize); + +FREERDP_API INT32 avc444_decompress(H264_CONTEXT* h264, BYTE op, + RECTANGLE_16* regionRects, UINT32 numRegionRect, + BYTE* pSrcData, UINT32 SrcSize, + RECTANGLE_16* auxRegionRects, UINT32 numAuxRegionRect, + BYTE* pAuxSrcData, UINT32 AuxSrcSize, + BYTE* pDstData, DWORD DstFormat, + UINT32 nDstStep, UINT32 nDstWidth, UINT32 nDstHeight); FREERDP_API BOOL h264_context_reset(H264_CONTEXT* h264, UINT32 width, UINT32 height); diff --git a/include/freerdp/codecs.h b/include/freerdp/codecs.h index 09dd04783..9066d90ee 100644 --- a/include/freerdp/codecs.h +++ b/include/freerdp/codecs.h @@ -39,7 +39,8 @@ #define FREERDP_CODEC_CLEARCODEC 0x00000010 #define FREERDP_CODEC_ALPHACODEC 0x00000020 #define FREERDP_CODEC_PROGRESSIVE 0x00000040 -#define FREERDP_CODEC_H264 0x00000080 +#define FREERDP_CODEC_AVC420 0x00000080 +#define FREERDP_CODEC_AVC444 0x00000100 #define FREERDP_CODEC_ALL 0xFFFFFFFF struct rdp_codecs diff --git a/include/freerdp/primitives.h b/include/freerdp/primitives.h index 1eb51b200..c6a9cb345 100644 --- a/include/freerdp/primitives.h +++ b/include/freerdp/primitives.h @@ -58,8 +58,8 @@ typedef INT32 pstatus_t; /* match IppStatus. */ /* Structures compatible with IPP */ typedef struct { - INT32 width; - INT32 height; + UINT32 width; + UINT32 height; } prim_size_t; /* like IppiSize */ /* Function prototypes for all of the supported primitives. */ @@ -74,7 +74,7 @@ typedef pstatus_t (*__copy_8u_t)( typedef pstatus_t (*__copy_8u_AC4r_t)( const BYTE *pSrc, INT32 srcStep, /* bytes */ - BYTE *pDst, + BYTE *pDst, INT32 dstStep, /* bytes */ INT32 width, INT32 height); /* pixels */ typedef pstatus_t (*__set_8u_t)( @@ -169,13 +169,31 @@ typedef pstatus_t (*__RGB565ToARGB_16u32u_C3C4_t)( UINT32 width, UINT32 height, BOOL alpha, BOOL invert); typedef pstatus_t (*__YUV420ToRGB_8u_P3AC4R_t)( - const BYTE* pSrc[3], INT32 srcStep[3], - BYTE* pDst, INT32 dstStep, + const BYTE* pSrc[3], const UINT32 srcStep[3], + BYTE* pDst, UINT32 dstStep, + const prim_size_t* roi); +typedef pstatus_t (*__YUV444ToRGB_8u_P3AC4R_t)( + const BYTE* pSrc[3], const UINT32 srcStep[3], + BYTE* pDst, UINT32 dstStep, const prim_size_t* roi); typedef pstatus_t (*__RGBToYUV420_8u_P3AC4R_t)( - const BYTE* pSrc, INT32 srcStep, - BYTE* pDst[3], INT32 dstStep[3], + const BYTE* pSrc, UINT32 srcStep, + BYTE* pDst[3], UINT32 dstStep[3], const prim_size_t* roi); +typedef pstatus_t (*__RGBToYUV444_8u_P3AC4R_t)( + const BYTE* pSrc, UINT32 srcStep, + BYTE* pDst[3], UINT32 dstStep[3], + const prim_size_t* roi); +typedef pstatus_t (*__YUV420CombineToYUV444_t)( + const BYTE* pMainSrc[3], const UINT32 srcMainStep[3], + const BYTE* pAuxSrc[3], const UINT32 srcAuxStep[3], + BYTE* pDst[3], const UINT32 dstStep[3], + const prim_size_t* roi); +typedef pstatus_t (*__YUV444SplitToYUV420_t)( + const BYTE* pSrc[3], const UINT32 srcStep[3], + BYTE* pMainDst[3], const UINT32 dstMainStep[3], + BYTE* pAuxDst[3], const UINT32 srcAuxStep[3], + const prim_size_t* roi); typedef pstatus_t (*__andC_32u_t)( const UINT32 *pSrc, UINT32 val, @@ -224,6 +242,10 @@ typedef struct __RGB565ToARGB_16u32u_C3C4_t RGB565ToARGB_16u32u_C3C4; __YUV420ToRGB_8u_P3AC4R_t YUV420ToRGB_8u_P3AC4R; __RGBToYUV420_8u_P3AC4R_t RGBToYUV420_8u_P3AC4R; + __RGBToYUV444_8u_P3AC4R_t RGBToYUV444_8u_P3AC4R; + __YUV420CombineToYUV444_t YUV420CombineToYUV444; + __YUV444SplitToYUV420_t YUV444SplitToYUV420; + __YUV420ToRGB_8u_P3AC4R_t YUV444ToRGB_8u_P3AC4R; } primitives_t; #ifdef __cplusplus diff --git a/libfreerdp/CMakeLists.txt b/libfreerdp/CMakeLists.txt index 41159a56c..95fa781d9 100644 --- a/libfreerdp/CMakeLists.txt +++ b/libfreerdp/CMakeLists.txt @@ -234,7 +234,7 @@ endif() if(WITH_SSE2) if(CMAKE_COMPILER_IS_GNUCC) - set(OPTIMIZATION "${OPTIMIZATION} -msse2 -mssse3 -O2 -Wdeclaration-after-statement") + set(OPTIMIZATION "${OPTIMIZATION} -msse2 -mssse3 -Wdeclaration-after-statement") endif() if(MSVC) @@ -251,12 +251,6 @@ if(DEFINED OPTIMIZATION) set_source_files_properties(${PRIMITIVES_OPT_SRCS} PROPERTIES COMPILE_FLAGS ${OPTIMIZATION}) endif() - -# always compile with optimization -if(CMAKE_COMPILER_IS_GNUCC) - set_source_files_properties(${PRIMITIVES_SRCS} PROPERTIES COMPILE_FLAGS "-O2") -endif() - set(PRIMITIVES_SRCS ${PRIMITIVES_SRCS} ${PRIMITIVES_OPT_SRCS}) freerdp_module_add(${PRIMITIVES_SRCS}) diff --git a/libfreerdp/codec/h264.c b/libfreerdp/codec/h264.c index 230b3ab11..fe2d0a637 100644 --- a/libfreerdp/codec/h264.c +++ b/libfreerdp/codec/h264.c @@ -35,7 +35,7 @@ * Dummy subsystem */ -static int dummy_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize) +static int dummy_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize, UINT32 plane) { return -1; } @@ -231,7 +231,7 @@ error: return hr; } -static int mf_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize) +static int mf_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize, UINT32 plane) { HRESULT hr; BYTE* pbBuffer = NULL; @@ -243,6 +243,8 @@ static int mf_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize) IMFMediaBuffer* outputBuffer = NULL; MFT_OUTPUT_DATA_BUFFER outputDataBuffer; H264_CONTEXT_MF* sys = (H264_CONTEXT_MF*) h264->pSystemData; + INT32* iStride = h264->iStride[plane]; + BYTE** pYUVData = h264->pYUVData[plane]; hr = sys->MFCreateMemoryBuffer(SrcSize, &inputBuffer); @@ -321,7 +323,7 @@ static int mf_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize) if (hr == MF_E_TRANSFORM_STREAM_CHANGE) { - BYTE* pYUVData; + BYTE* pTmpYUVData; int offset = 0; UINT32 stride = 0; UINT64 frameSize = 0; @@ -376,20 +378,20 @@ static int mf_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize) goto error; } - h264->iStride[0] = stride; - h264->iStride[1] = stride / 2; - h264->iStride[2] = stride / 2; + iStride[0] = stride; + iStride[1] = stride / 2; + iStride[2] = stride / 2; - pYUVData = (BYTE*) calloc(1, 2 * stride * sys->frameHeight); + pTmpYUVData = (BYTE*) calloc(1, 2 * stride * sys->frameHeight); - h264->pYUVData[0] = &pYUVData[offset]; - pYUVData += h264->iStride[0] * sys->frameHeight; + pYUVData[0] = &pTmpYUVData[offset]; + pTmpYUVData += iStride[0] * sys->frameHeight; - h264->pYUVData[1] = &pYUVData[offset]; - pYUVData += h264->iStride[1] * (sys->frameHeight / 2); + pYUVData[1] = &pTmpYUVData[offset]; + pTmpYUVData += iStride[1] * (sys->frameHeight / 2); - h264->pYUVData[2] = &pYUVData[offset]; - pYUVData += h264->iStride[2] * (sys->frameHeight / 2); + pYUVData[2] = &pTmpYUVData[offset]; + pTmpYUVData += iStride[2] * (sys->frameHeight / 2); h264->width = sys->frameWidth; h264->height = sys->frameHeight; @@ -435,14 +437,14 @@ static int mf_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize) goto error; } - CopyMemory(h264->pYUVData[0], &buffer[offset], h264->iStride[0] * sys->frameHeight); - offset += h264->iStride[0] * sys->frameHeight; + CopyMemory(pYUVData[0], &buffer[offset], iStride[0] * sys->frameHeight); + offset += iStride[0] * sys->frameHeight; - CopyMemory(h264->pYUVData[1], &buffer[offset], h264->iStride[1] * (sys->frameHeight / 2)); - offset += h264->iStride[1] * (sys->frameHeight / 2); + CopyMemory(pYUVData[1], &buffer[offset], iStride[1] * (sys->frameHeight / 2)); + offset += iStride[1] * (sys->frameHeight / 2); - CopyMemory(h264->pYUVData[2], &buffer[offset], h264->iStride[2] * (sys->frameHeight / 2)); - offset += h264->iStride[2] * (sys->frameHeight / 2); + CopyMemory(pYUVData[2], &buffer[offset], iStride[2] * (sys->frameHeight / 2)); + offset += iStride[2] * (sys->frameHeight / 2); hr = outputBuffer->lpVtbl->Unlock(outputBuffer); @@ -464,7 +466,7 @@ error: return -1; } -static int mf_compress(H264_CONTEXT* h264, BYTE** ppDstData, UINT32* pDstSize) +static int mf_compress(H264_CONTEXT* h264, BYTE** ppDstData, UINT32* pDstSize, UINT32 plane) { H264_CONTEXT_MF* sys = (H264_CONTEXT_MF*) h264->pSystemData; @@ -473,6 +475,7 @@ static int mf_compress(H264_CONTEXT* h264, BYTE** ppDstData, UINT32* pDstSize) static void mf_uninit(H264_CONTEXT* h264) { + UINT32 x; H264_CONTEXT_MF* sys = (H264_CONTEXT_MF*) h264->pSystemData; if (sys) @@ -513,9 +516,11 @@ static void mf_uninit(H264_CONTEXT* h264) sys->mfplat = NULL; } - free(h264->pYUVData[0]); - h264->pYUVData[0] = h264->pYUVData[1] = h264->pYUVData[2] = NULL; - h264->iStride[0] = h264->iStride[1] = h264->iStride[2] = 0; + for (x=0; xpYUVData) / sizeof(h264->pYUVData[0]); x++) + free (h264->pYUVData[x][0]); + + memset(h264->pYUVData, 0, sizeof(h264->pYUVData)); + memset(h264->iStride, 0, sizeof(h264->iStride)); sys->MFShutdown(); @@ -708,14 +713,14 @@ struct _H264_CONTEXT_X264 }; typedef struct _H264_CONTEXT_X264 H264_CONTEXT_X264; -static int x264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize) +static int x264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize, UINT32 plane) { //H264_CONTEXT_X264* sys = (H264_CONTEXT_X264*) h264->pSystemData; return 1; } -static int x264_compress(H264_CONTEXT* h264, BYTE** ppDstData, UINT32* pDstSize) +static int x264_compress(H264_CONTEXT* h264, BYTE** ppDstData, UINT32* pDstSize, UINT32 plane) { //H264_CONTEXT_X264* sys = (H264_CONTEXT_X264*) h264->pSystemData; @@ -737,7 +742,9 @@ static BOOL x264_init(H264_CONTEXT* h264) { H264_CONTEXT_X264* sys; - sys = (H264_CONTEXT_X264*) calloc(1, sizeof(H264_CONTEXT_X264)); + h264->numSystemData = 1; + sys = (H264_CONTEXT_X264*) calloc(h264->numSystemData, + sizeof(H264_CONTEXT_X264)); if (!sys) { @@ -815,12 +822,16 @@ static void openh264_trace_callback(H264_CONTEXT* h264, int level, const char* m WLog_INFO(TAG, "%d - %s", level, message); } -static int openh264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize) +static int openh264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize, UINT32 plane) { DECODING_STATE state; SBufferInfo sBufferInfo; SSysMEMBuffer* pSystemBuffer; H264_CONTEXT_OPENH264* sys = (H264_CONTEXT_OPENH264*) h264->pSystemData; + UINT32* iStride = h264->iStride[plane]; + BYTE** pYUVData = h264->pYUVData[plane]; + + sys = &((H264_CONTEXT_OPENH264*) h264->pSystemData)[0]; if (!sys->pDecoder) return -2001; @@ -829,25 +840,25 @@ static int openh264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSiz * Decompress the image. The RDP host only seems to send I420 format. */ - h264->pYUVData[0] = NULL; - h264->pYUVData[1] = NULL; - h264->pYUVData[2] = NULL; + pYUVData[0] = NULL; + pYUVData[1] = NULL; + pYUVData[2] = NULL; ZeroMemory(&sBufferInfo, sizeof(sBufferInfo)); - state = (*sys->pDecoder)->DecodeFrame2(sys->pDecoder, pSrcData, SrcSize, h264->pYUVData, &sBufferInfo); + state = (*sys->pDecoder)->DecodeFrame2(sys->pDecoder, pSrcData, SrcSize, pYUVData, &sBufferInfo); if (sBufferInfo.iBufferStatus != 1) { if (state == dsNoParamSets) { /* this happens on the first frame due to missing parameter sets */ - state = (*sys->pDecoder)->DecodeFrame2(sys->pDecoder, NULL, 0, h264->pYUVData, &sBufferInfo); + state = (*sys->pDecoder)->DecodeFrame2(sys->pDecoder, NULL, 0, pYUVData, &sBufferInfo); } else if (state == dsErrorFree) { /* call DecodeFrame2 again to decode without delay */ - state = (*sys->pDecoder)->DecodeFrame2(sys->pDecoder, NULL, 0, h264->pYUVData, &sBufferInfo); + state = (*sys->pDecoder)->DecodeFrame2(sys->pDecoder, NULL, 0, pYUVData, &sBufferInfo); } else { @@ -856,6 +867,12 @@ static int openh264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSiz } } + pSystemBuffer = &sBufferInfo.UsrData.sSystemBuffer; + + iStride[0] = pSystemBuffer->iStride[0]; + iStride[1] = pSystemBuffer->iStride[1]; + iStride[2] = pSystemBuffer->iStride[1]; + if (sBufferInfo.iBufferStatus != 1) { WLog_WARN(TAG, "DecodeFrame2 iBufferStatus: %d", sBufferInfo.iBufferStatus); @@ -868,44 +885,39 @@ static int openh264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSiz return -2003; } - pSystemBuffer = &sBufferInfo.UsrData.sSystemBuffer; - #if 0 WLog_INFO(TAG, "h264_decompress: state=%u, pYUVData=[%p,%p,%p], bufferStatus=%d, width=%d, height=%d, format=%d, stride=[%d,%d]", - state, h264->pYUVData[0], h264->pYUVData[1], h264->pYUVData[2], sBufferInfo.iBufferStatus, - pSystemBuffer->iWidth, pSystemBuffer->iHeight, pSystemBuffer->iFormat, - pSystemBuffer->iStride[0], pSystemBuffer->iStride[1]); + state, pYUVData[0], pYUVData[1], pYUVData[2], sBufferInfo.iBufferStatus, + pSystemBuffer->iWidth, pSystemBuffer->iHeight, pSystemBuffer->iFormat, + pSystemBuffer->iStride[0], pSystemBuffer->iStride[1]); #endif if (pSystemBuffer->iFormat != videoFormatI420) return -2004; - if (!h264->pYUVData[0] || !h264->pYUVData[1] || !h264->pYUVData[2]) + if (!pYUVData[0] || !pYUVData[1] || !pYUVData[2]) return -2005; - h264->iStride[0] = pSystemBuffer->iStride[0]; - h264->iStride[1] = pSystemBuffer->iStride[1]; - h264->iStride[2] = pSystemBuffer->iStride[1]; - - h264->width = pSystemBuffer->iWidth; - h264->height = pSystemBuffer->iHeight; - return 1; } -static int openh264_compress(H264_CONTEXT* h264, BYTE** ppDstData, UINT32* pDstSize) +static int openh264_compress(H264_CONTEXT* h264, BYTE** ppDstData, UINT32* pDstSize, UINT32 plane) { int i, j; int status; SFrameBSInfo info; SSourcePicture pic; SBitrateInfo bitrate; - H264_CONTEXT_OPENH264* sys = (H264_CONTEXT_OPENH264*) h264->pSystemData; + H264_CONTEXT_OPENH264* sys; + BYTE** pYUVData = h264->pYUVData[plane]; + UINT32* iStride = h264->iStride[plane]; + + sys = &((H264_CONTEXT_OPENH264*) h264->pSystemData)[0]; if (!sys->pEncoder) return -1; - if (!h264->pYUVData[0] || !h264->pYUVData[1] || !h264->pYUVData[2]) + if (!pYUVData[0] || !pYUVData[1] || !pYUVData[2]) return -1; if ((sys->EncParamExt.iPicWidth != h264->width) || (sys->EncParamExt.iPicHeight != h264->height)) @@ -935,16 +947,16 @@ static int openh264_compress(H264_CONTEXT* h264, BYTE** ppDstData, UINT32* pDstS switch (h264->RateControlMode) { - case H264_RATECONTROL_VBR: - sys->EncParamExt.iRCMode = RC_BITRATE_MODE; - sys->EncParamExt.iTargetBitrate = h264->BitRate; - sys->EncParamExt.sSpatialLayers[0].iSpatialBitrate = sys->EncParamExt.iTargetBitrate; - break; + case H264_RATECONTROL_VBR: + sys->EncParamExt.iRCMode = RC_BITRATE_MODE; + sys->EncParamExt.iTargetBitrate = h264->BitRate; + sys->EncParamExt.sSpatialLayers[0].iSpatialBitrate = sys->EncParamExt.iTargetBitrate; + break; - case H264_RATECONTROL_CQP: - sys->EncParamExt.iRCMode = RC_OFF_MODE; - sys->EncParamExt.sSpatialLayers[0].iDLayerQp = h264->QP; - break; + case H264_RATECONTROL_CQP: + sys->EncParamExt.iRCMode = RC_OFF_MODE; + sys->EncParamExt.sSpatialLayers[0].iDLayerQp = h264->QP; + break; } if (sys->EncParamExt.iMultipleThreadIdc > 1) @@ -961,7 +973,7 @@ static int openh264_compress(H264_CONTEXT* h264, BYTE** ppDstData, UINT32* pDstS } status = (*sys->pEncoder)->GetOption(sys->pEncoder, ENCODER_OPTION_SVC_ENCODE_PARAM_EXT, - &sys->EncParamExt); + &sys->EncParamExt); if (status < 0) { @@ -973,52 +985,52 @@ static int openh264_compress(H264_CONTEXT* h264, BYTE** ppDstData, UINT32* pDstS { switch (h264->RateControlMode) { - case H264_RATECONTROL_VBR: - if (sys->EncParamExt.iTargetBitrate != h264->BitRate) + case H264_RATECONTROL_VBR: + if (sys->EncParamExt.iTargetBitrate != h264->BitRate) + { + sys->EncParamExt.iTargetBitrate = h264->BitRate; + bitrate.iLayer = SPATIAL_LAYER_ALL; + bitrate.iBitrate = h264->BitRate; + + status = (*sys->pEncoder)->SetOption(sys->pEncoder, ENCODER_OPTION_BITRATE, + &bitrate); + + if (status < 0) { - sys->EncParamExt.iTargetBitrate = h264->BitRate; - bitrate.iLayer = SPATIAL_LAYER_ALL; - bitrate.iBitrate = h264->BitRate; - - status = (*sys->pEncoder)->SetOption(sys->pEncoder, ENCODER_OPTION_BITRATE, - &bitrate); - - if (status < 0) - { - WLog_ERR(TAG, "Failed to set encoder bitrate (status=%ld)", status); - return status; - } + WLog_ERR(TAG, "Failed to set encoder bitrate (status=%ld)", status); + return status; } - if (sys->EncParamExt.fMaxFrameRate != h264->FrameRate) + } + if (sys->EncParamExt.fMaxFrameRate != h264->FrameRate) + { + sys->EncParamExt.fMaxFrameRate = h264->FrameRate; + + status = (*sys->pEncoder)->SetOption(sys->pEncoder, ENCODER_OPTION_FRAME_RATE, + &sys->EncParamExt.fMaxFrameRate); + + if (status < 0) { - sys->EncParamExt.fMaxFrameRate = h264->FrameRate; - - status = (*sys->pEncoder)->SetOption(sys->pEncoder, ENCODER_OPTION_FRAME_RATE, - &sys->EncParamExt.fMaxFrameRate); - - if (status < 0) - { - WLog_ERR(TAG, "Failed to set encoder framerate (status=%ld)", status); - return status; - } + WLog_ERR(TAG, "Failed to set encoder framerate (status=%ld)", status); + return status; } - break; + } + break; - case H264_RATECONTROL_CQP: - if (sys->EncParamExt.sSpatialLayers[0].iDLayerQp != h264->QP) + case H264_RATECONTROL_CQP: + if (sys->EncParamExt.sSpatialLayers[0].iDLayerQp != h264->QP) + { + sys->EncParamExt.sSpatialLayers[0].iDLayerQp = h264->QP; + + status = (*sys->pEncoder)->SetOption(sys->pEncoder, ENCODER_OPTION_SVC_ENCODE_PARAM_EXT, + &sys->EncParamExt); + + if (status < 0) { - sys->EncParamExt.sSpatialLayers[0].iDLayerQp = h264->QP; - - status = (*sys->pEncoder)->SetOption(sys->pEncoder, ENCODER_OPTION_SVC_ENCODE_PARAM_EXT, - &sys->EncParamExt); - - if (status < 0) - { - WLog_ERR(TAG, "Failed to set encoder parameters (status=%ld)", status); - return status; - } + WLog_ERR(TAG, "Failed to set encoder parameters (status=%ld)", status); + return status; } - break; + } + break; } } @@ -1027,12 +1039,12 @@ static int openh264_compress(H264_CONTEXT* h264, BYTE** ppDstData, UINT32* pDstS pic.iPicWidth = h264->width; pic.iPicHeight = h264->height; pic.iColorFormat = videoFormatI420; - pic.iStride[0] = h264->iStride[0]; - pic.iStride[1] = h264->iStride[1]; - pic.iStride[2] = h264->iStride[2]; - pic.pData[0] = h264->pYUVData[0]; - pic.pData[1] = h264->pYUVData[1]; - pic.pData[2] = h264->pYUVData[2]; + pic.iStride[0] = iStride[0]; + pic.iStride[1] = iStride[1]; + pic.iStride[2] = iStride[2]; + pic.pData[0] = pYUVData[0]; + pic.pData[1] = pYUVData[1]; + pic.pData[2] = pYUVData[2]; status = (*sys->pEncoder)->EncodeFrame(sys->pEncoder, &pic, &info); @@ -1058,108 +1070,138 @@ static int openh264_compress(H264_CONTEXT* h264, BYTE** ppDstData, UINT32* pDstS static void openh264_uninit(H264_CONTEXT* h264) { - H264_CONTEXT_OPENH264* sys = (H264_CONTEXT_OPENH264*) h264->pSystemData; + UINT32 x; + H264_CONTEXT_OPENH264* sysContexts = (H264_CONTEXT_OPENH264*) h264->pSystemData; - if (sys) + if (sysContexts) { - if (sys->pDecoder) + for (x=0; xnumSystemData; x++) { - (*sys->pDecoder)->Uninitialize(sys->pDecoder); - WelsDestroyDecoder(sys->pDecoder); - sys->pDecoder = NULL; - } + H264_CONTEXT_OPENH264* sys = &sysContexts[x]; - if (sys->pEncoder) - { - (*sys->pEncoder)->Uninitialize(sys->pEncoder); - WelsDestroySVCEncoder(sys->pEncoder); - sys->pEncoder = NULL; - } + if (sys->pDecoder) + { + (*sys->pDecoder)->Uninitialize(sys->pDecoder); + WelsDestroyDecoder(sys->pDecoder); + sys->pDecoder = NULL; + } - free(sys); + if (sys->pEncoder) + { + (*sys->pEncoder)->Uninitialize(sys->pEncoder); + WelsDestroySVCEncoder(sys->pEncoder); + sys->pEncoder = NULL; + } + } + free(h264->pSystemData); h264->pSystemData = NULL; } } static BOOL openh264_init(H264_CONTEXT* h264) { + UINT32 x; long status; SDecodingParam sDecParam; - H264_CONTEXT_OPENH264* sys; + H264_CONTEXT_OPENH264* sysContexts; static int traceLevel = WELS_LOG_DEBUG; static EVideoFormatType videoFormat = videoFormatI420; static WelsTraceCallback traceCallback = (WelsTraceCallback) openh264_trace_callback; - sys = (H264_CONTEXT_OPENH264*) calloc(1, sizeof(H264_CONTEXT_OPENH264)); + h264->numSystemData = 1; - if (!sys) - { + sysContexts = (H264_CONTEXT_OPENH264*) calloc(h264->numSystemData, + sizeof(H264_CONTEXT_OPENH264)); + + if (!sysContexts) goto EXCEPTION; - } - h264->pSystemData = (void*) sys; + h264->pSystemData = (void*) sysContexts; - if (h264->Compressor) + for (x=0; xnumSystemData; x++) { - WelsCreateSVCEncoder(&sys->pEncoder); + H264_CONTEXT_OPENH264* sys = &sysContexts[x]; - if (!sys->pEncoder) + if (h264->Compressor) { - WLog_ERR(TAG, "Failed to create OpenH264 encoder"); - goto EXCEPTION; - } - } - else - { - WelsCreateDecoder(&sys->pDecoder); + WelsCreateSVCEncoder(&sys->pEncoder); - if (!sys->pDecoder) - { - WLog_ERR(TAG, "Failed to create OpenH264 decoder"); - goto EXCEPTION; - } - - ZeroMemory(&sDecParam, sizeof(sDecParam)); - sDecParam.eOutputColorFormat = videoFormatI420; - sDecParam.eEcActiveIdc = ERROR_CON_FRAME_COPY; - sDecParam.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_AVC; - - status = (*sys->pDecoder)->Initialize(sys->pDecoder, &sDecParam); - - if (status != 0) - { - WLog_ERR(TAG, "Failed to initialize OpenH264 decoder (status=%ld)", status); - goto EXCEPTION; - } - - status = (*sys->pDecoder)->SetOption(sys->pDecoder, DECODER_OPTION_DATAFORMAT, &videoFormat); - - if (status != 0) - { - WLog_ERR(TAG, "Failed to set data format option on OpenH264 decoder (status=%ld)", status); - } - - if (g_openh264_trace_enabled) - { - status = (*sys->pDecoder)->SetOption(sys->pDecoder, DECODER_OPTION_TRACE_LEVEL, &traceLevel); - - if (status != 0) + if (!sys->pEncoder) { - WLog_ERR(TAG, "Failed to set trace level option on OpenH264 decoder (status=%ld)", status); + WLog_ERR(TAG, "Failed to create OpenH264 encoder"); + goto EXCEPTION; + } + } + else + { + WelsCreateDecoder(&sys->pDecoder); + + if (!sys->pDecoder) + { + WLog_ERR(TAG, "Failed to create OpenH264 decoder"); + goto EXCEPTION; } - status = (*sys->pDecoder)->SetOption(sys->pDecoder, DECODER_OPTION_TRACE_CALLBACK, &traceCallback); + ZeroMemory(&sDecParam, sizeof(sDecParam)); + sDecParam.eOutputColorFormat = videoFormatI420; + sDecParam.eEcActiveIdc = ERROR_CON_FRAME_COPY; + sDecParam.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_AVC; + + status = (*sys->pDecoder)->Initialize(sys->pDecoder, &sDecParam); if (status != 0) { - WLog_ERR(TAG, "Failed to set trace callback option on OpenH264 decoder (status=%ld)", status); + WLog_ERR(TAG, "Failed to initialize OpenH264 decoder (status=%ld)", + status); + goto EXCEPTION; } - status = (*sys->pDecoder)->SetOption(sys->pDecoder, DECODER_OPTION_TRACE_CALLBACK_CONTEXT, &h264); + status = (*sys->pDecoder)->SetOption( + sys->pDecoder, DECODER_OPTION_DATAFORMAT, + &videoFormat); if (status != 0) { - WLog_ERR(TAG, "Failed to set trace callback context option on OpenH264 decoder (status=%ld)", status); + WLog_ERR(TAG, "Failed to set data format option on OpenH264 decoder (status=%ld)", + status); + goto EXCEPTION; + } + + if (g_openh264_trace_enabled) + { + status = (*sys->pDecoder)->SetOption( + sys->pDecoder, DECODER_OPTION_TRACE_LEVEL, + &traceLevel); + + if (status != 0) + { + WLog_ERR(TAG, "Failed to set trace level option on OpenH264 decoder (status=%ld)", + status); + goto EXCEPTION; + } + + status = (*sys->pDecoder)->SetOption( + sys->pDecoder, DECODER_OPTION_TRACE_CALLBACK, + &traceCallback); + + if (status != 0) + { + WLog_ERR(TAG, "Failed to set trace callback option on OpenH264 decoder (status=%ld)", + status); + goto EXCEPTION; + } + + status = (*sys->pDecoder)->SetOption( + sys->pDecoder, + DECODER_OPTION_TRACE_CALLBACK_CONTEXT, + &h264); + + if (status != 0) + { + WLog_ERR(TAG, "Failed to set trace callback context option on OpenH264 decoder (status=%ld)", + status); + goto EXCEPTION; + } } } } @@ -1201,12 +1243,14 @@ struct _H264_CONTEXT_LIBAVCODEC }; typedef struct _H264_CONTEXT_LIBAVCODEC H264_CONTEXT_LIBAVCODEC; -static int libavcodec_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize) +static int libavcodec_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize, UINT32 plane) { int status; int gotFrame = 0; AVPacket packet; H264_CONTEXT_LIBAVCODEC* sys = (H264_CONTEXT_LIBAVCODEC*) h264->pSystemData; + BYTE** pYUVData = h264->pYUVData[plane]; + INT32* iStride = h264->iStride[plane]; av_init_packet(&packet); @@ -1223,21 +1267,21 @@ static int libavcodec_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcS #if 0 WLog_INFO(TAG, "libavcodec_decompress: frame decoded (status=%d, gotFrame=%d, width=%d, height=%d, Y=[%p,%d], U=[%p,%d], V=[%p,%d])", - status, gotFrame, sys->videoFrame->width, sys->videoFrame->height, - sys->videoFrame->data[0], sys->videoFrame->linesize[0], - sys->videoFrame->data[1], sys->videoFrame->linesize[1], - sys->videoFrame->data[2], sys->videoFrame->linesize[2]); + status, gotFrame, sys->videoFrame->width, sys->videoFrame->height, + sys->videoFrame->data[0], sys->videoFrame->linesize[0], + sys->videoFrame->data[1], sys->videoFrame->linesize[1], + sys->videoFrame->data[2], sys->videoFrame->linesize[2]); #endif if (gotFrame) { - h264->pYUVData[0] = sys->videoFrame->data[0]; - h264->pYUVData[1] = sys->videoFrame->data[1]; - h264->pYUVData[2] = sys->videoFrame->data[2]; + pYUVData[0] = sys->videoFrame->data[0]; + pYUVData[1] = sys->videoFrame->data[1]; + pYUVData[2] = sys->videoFrame->data[2]; - h264->iStride[0] = sys->videoFrame->linesize[0]; - h264->iStride[1] = sys->videoFrame->linesize[1]; - h264->iStride[2] = sys->videoFrame->linesize[2]; + iStride[0] = sys->videoFrame->linesize[0]; + iStride[1] = sys->videoFrame->linesize[1]; + iStride[2] = sys->videoFrame->linesize[2]; h264->width = sys->videoFrame->width; h264->height = sys->videoFrame->height; @@ -1351,34 +1395,111 @@ static H264_CONTEXT_SUBSYSTEM g_Subsystem_libavcodec = #endif -int h264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize, - BYTE** ppDstData, DWORD DstFormat, int nDstStep, int nDstWidth, - int nDstHeight, RDPGFX_RECT16* regionRects, int numRegionRects) +static BOOL check_rect(const H264_CONTEXT* h264, const RECTANGLE_16* rect, + UINT32 nDstWidth, UINT32 nDstHeight) { - int index; - int status; - int* iStride; - BYTE* pDstData; + /* Check, if the output rectangle is valid in decoded h264 frame. */ + if ((rect->right > h264->width) || (rect->left > h264->width)) + return FALSE; + if ((rect->top > h264->height) || (rect->bottom > h264->height)) + return FALSE; + + /* Check, if the output rectangle is valid in destination buffer. */ + if ((rect->right > nDstWidth) || (rect->left > nDstWidth)) + return FALSE; + if ((rect->bottom > nDstHeight) || (rect->top > nDstHeight)) + return FALSE; + + return TRUE; +} + +static BOOL avc_yuv_to_rgb(H264_CONTEXT* h264, const RECTANGLE_16* regionRects, + UINT32 numRegionRects, UINT32 nDstWidth, + UINT32 nDstHeight, UINT32 nDstStep, BYTE* pDstData, + DWORD DstFormat, BOOL use444) +{ + UINT32 x; BYTE* pDstPoint; prim_size_t roi; - BYTE** pYUVData; int width, height; - BYTE* pYUVPoint[3]; - RDPGFX_RECT16* rect; + const BYTE* pYUVPoint[3]; primitives_t* prims = primitives_get(); + for (x=0; xiYUV444Stride; + ppYUVData = h264->pYUV444Data; + } + else + { + iStride = h264->iStride[0]; + ppYUVData = h264->pYUVData[0]; + } + + if (!check_rect(h264, rect, nDstWidth, nDstHeight)) + return -1003; + + width = rect->right - rect->left; + height = rect->bottom - rect->top; + + pDstPoint = pDstData + rect->top * nDstStep + rect->left * 4; + + pYUVPoint[0] = ppYUVData[0] + rect->top * iStride[0] + rect->left; + pYUVPoint[1] = ppYUVData[1]; + pYUVPoint[2] = ppYUVData[2]; + if (use444) + { + pYUVPoint[1] += rect->top * iStride[1] + rect->left; + pYUVPoint[2] += rect->top * iStride[2] + rect->left; + } + else + { + pYUVPoint[1] += rect->top/2 * iStride[1] + rect->left/2; + pYUVPoint[2] += rect->top/2 * iStride[2] + rect->left/2; + } + + + roi.width = width; + roi.height = height; + + if (use444) + { + if (prims->YUV444ToRGB_8u_P3AC4R( + pYUVPoint, iStride, pDstPoint, + nDstStep, &roi) != PRIMITIVES_SUCCESS) + { + return FALSE; + } + } + else + { + if (prims->YUV420ToRGB_8u_P3AC4R(pYUVPoint, iStride, pDstPoint, + nDstStep, &roi) != PRIMITIVES_SUCCESS) + return FALSE; + } + } + + return TRUE; +} + +INT32 avc420_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize, + BYTE* pDstData, DWORD DstFormat, UINT32 nDstStep, + UINT32 nDstWidth, UINT32 nDstHeight, + RECTANGLE_16* regionRects, UINT32 numRegionRects) +{ + int status; + if (!h264) return -1001; -#if 0 - WLog_INFO(TAG, "h264_decompress: pSrcData=%p, SrcSize=%u, pDstData=%p, nDstStep=%d, nDstHeight=%d, numRegionRects=%d", - pSrcData, SrcSize, *ppDstData, nDstStep, nDstHeight, numRegionRects); -#endif - - if (!(pDstData = *ppDstData)) - return -1002; - - status = h264->subsystem->Decompress(h264, pSrcData, SrcSize); + status = h264->subsystem->Decompress(h264, pSrcData, SrcSize, 0); if (status == 0) return 1; @@ -1386,93 +1507,324 @@ int h264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize, if (status < 0) return status; - pYUVData = h264->pYUVData; - iStride = h264->iStride; - - for (index = 0; index < numRegionRects; index++) - { - rect = &(regionRects[index]); - - /* Check, if the output rectangle is valid in decoded h264 frame. */ - if ((rect->right > h264->width) || (rect->left > h264->width)) - return -1003; - if ((rect->top > h264->height) || (rect->bottom > h264->height)) - return -1004; - - /* Check, if the output rectangle is valid in destination buffer. */ - if ((rect->right > nDstWidth) || (rect->left > nDstWidth)) - return -1005; - if ((rect->bottom > nDstHeight) || (rect->top > nDstHeight)) - return -1006; - - width = rect->right - rect->left; - height = rect->bottom - rect->top; - - pDstPoint = pDstData + rect->top * nDstStep + rect->left * 4; - pYUVPoint[0] = pYUVData[0] + rect->top * iStride[0] + rect->left; - - pYUVPoint[1] = pYUVData[1] + rect->top/2 * iStride[1] + rect->left/2; - pYUVPoint[2] = pYUVData[2] + rect->top/2 * iStride[2] + rect->left/2; - -#if 0 - WLog_INFO(TAG, "regionRect: x: %d y: %d width: %d height: %d", - rect->left, rect->top, width, height); -#endif - - roi.width = width; - roi.height = height; - - prims->YUV420ToRGB_8u_P3AC4R((const BYTE**) pYUVPoint, iStride, pDstPoint, nDstStep, &roi); - } + if (!avc_yuv_to_rgb(h264, regionRects, numRegionRects, nDstWidth, + nDstHeight, nDstStep, pDstData, DstFormat, FALSE)) + return -1002; return 1; } -int h264_compress(H264_CONTEXT* h264, BYTE* pSrcData, DWORD SrcFormat, - int nSrcStep, int nSrcWidth, int nSrcHeight, BYTE** ppDstData, UINT32* pDstSize) +INT32 avc420_compress(H264_CONTEXT* h264, BYTE* pSrcData, DWORD SrcFormat, + UINT32 nSrcStep, UINT32 nSrcWidth, UINT32 nSrcHeight, + BYTE** ppDstData, UINT32* pDstSize) { int status = -1; prim_size_t roi; int nWidth, nHeight; primitives_t* prims = primitives_get(); + UINT32* iStride; + BYTE** pYUVData; if (!h264) return -1; + if (!h264->subsystem->Compress) return -1; + iStride = h264->iStride[0]; + pYUVData = h264->pYUVData[0]; + nWidth = (nSrcWidth + 1) & ~1; nHeight = (nSrcHeight + 1) & ~1; - if (!(h264->pYUVData[0] = (BYTE*) malloc(nWidth * nHeight))) + if (!(pYUVData[0] = (BYTE*) malloc(nWidth * nHeight))) return -1; - h264->iStride[0] = nWidth; + iStride[0] = nWidth; - if (!(h264->pYUVData[1] = (BYTE*) malloc(nWidth * nHeight / 4))) + if (!(pYUVData[1] = (BYTE*) malloc(nWidth * nHeight))) goto error_1; - h264->iStride[1] = nWidth / 2; + iStride[1] = nWidth / 2; - if (!(h264->pYUVData[2] = (BYTE*) malloc(nWidth * nHeight / 4))) + if (!(pYUVData[2] = (BYTE*) malloc(nWidth * nHeight))) goto error_2; - h264->iStride[2] = nWidth / 2; + iStride[2] = nWidth / 2; - h264->width = nWidth; - h264->height = nHeight; roi.width = nSrcWidth; roi.height = nSrcHeight; - prims->RGBToYUV420_8u_P3AC4R(pSrcData, nSrcStep, h264->pYUVData, h264->iStride, &roi); + prims->RGBToYUV420_8u_P3AC4R(pSrcData, nSrcStep, pYUVData, iStride, &roi); - status = h264->subsystem->Compress(h264, ppDstData, pDstSize); + status = h264->subsystem->Compress(h264, ppDstData, pDstSize, 0); - free(h264->pYUVData[2]); - h264->pYUVData[2] = NULL; + free(pYUVData[2]); + pYUVData[2] = NULL; error_2: - free(h264->pYUVData[1]); - h264->pYUVData[1] = NULL; + free(pYUVData[1]); + pYUVData[1] = NULL; error_1: - free(h264->pYUVData[0]); - h264->pYUVData[0] = NULL; + free(pYUVData[0]); + pYUVData[0] = NULL; + + return status; +} + +INT32 avc444_compress(H264_CONTEXT* h264, BYTE* pSrcData, DWORD SrcFormat, + UINT32 nSrcStep, UINT32 nSrcWidth, UINT32 nSrcHeight, + BYTE* op, BYTE** ppDstData, UINT32* pDstSize, + BYTE** ppAuxDstData, UINT32* pAuxDstSize) +{ + return -1; +} + +static BOOL avc444_process_rect(H264_CONTEXT* h264, + const RECTANGLE_16* rect, + UINT32 nDstWidth, UINT32 nDstHeight) +{ + const primitives_t* prims = primitives_get(); + prim_size_t roi; + UINT16 width, height; + const BYTE* pYUVMainPoint[3]; + const BYTE* pYUVAuxPoint[3]; + BYTE* pYUVDstPoint[3]; + + UINT32* piDstStride = h264->iYUV444Stride; + BYTE** ppYUVDstData = h264->pYUV444Data; + const UINT32* piAuxStride = h264->iStride[1]; + const UINT32* piMainStride = h264->iStride[0]; + BYTE** ppYUVAuxData = h264->pYUVData[1]; + BYTE** ppYUVMainData = h264->pYUVData[0]; + + if (!check_rect(h264, rect, nDstWidth, nDstHeight)) + return FALSE; + + width = rect->right - rect->left + 1; + height = rect->bottom - rect->top + 1; + + roi.width = width; + roi.height = height; + + pYUVMainPoint[0] = ppYUVMainData[0] + rect->top * piMainStride[0] + + rect->left; + pYUVMainPoint[1] = ppYUVMainData[1] + rect->top/2 * piMainStride[1] + + rect->left/2; + pYUVMainPoint[2] = ppYUVMainData[2] + rect->top/2 * piMainStride[2] + + rect->left/2; + pYUVDstPoint[0] = ppYUVDstData[0] + rect->top * piDstStride[0] + + rect->left; + pYUVDstPoint[1] = ppYUVDstData[1] + rect->top * piDstStride[1] + + rect->left; + pYUVDstPoint[2] = ppYUVDstData[2] + rect->top * piDstStride[2] + + rect->left; + + pYUVAuxPoint[0] = ppYUVAuxData[0] + rect->top * piAuxStride[0] + + rect->left; + pYUVAuxPoint[1] = ppYUVAuxData[1] + rect->top/2 * piAuxStride[1] + + rect->left/2; + pYUVAuxPoint[2] = ppYUVAuxData[2] + rect->top/2 * piAuxStride[2] + + rect->left/2; + pYUVDstPoint[0] = ppYUVDstData[0] + rect->top * piDstStride[0] + + rect->left; + pYUVDstPoint[1] = ppYUVDstData[1] + rect->top * piDstStride[1] + + rect->left; + pYUVDstPoint[2] = ppYUVDstData[2] + rect->top * piDstStride[2] + + rect->left; + + if (prims->YUV420CombineToYUV444(pYUVMainPoint, piMainStride, + NULL, NULL, + pYUVDstPoint, piDstStride, + &roi) != PRIMITIVES_SUCCESS) + return FALSE; + return TRUE; +} + +static void avc444_rectangle_max(RECTANGLE_16* dst, const RECTANGLE_16* add) +{ + if (dst->left > add->left) + dst->left = add->left; + if (dst->right < add->right) + dst->right = add->right; + if (dst->top > add->top) + dst->top = add->top; + if (dst->bottom < add->bottom) + dst->bottom = add->bottom; +} + +static BOOL avc444_combine_yuv(H264_CONTEXT* h264, + const RECTANGLE_16* mainRegionRects, + UINT32 numMainRegionRect, + const RECTANGLE_16* auxRegionRects, + UINT32 numAuxRegionRect, UINT32 nDstWidth, + DWORD nDstHeight, UINT32 nDstStep) +{ + UINT32 x; + RECTANGLE_16 rect; + const UINT32* piMainStride = h264->iStride[0]; + UINT32* piDstSize = h264->iYUV444Size; + UINT32* piDstStride = h264->iYUV444Stride; + BYTE** ppYUVDstData = h264->pYUV444Data; + UINT32 padDstHeight = nDstHeight + 16; /* Need alignment to 16x16 blocks */ + + if ((piMainStride[0] != piDstStride[0]) || + (piDstSize[0] != piMainStride[0] * padDstHeight)) + { + for (x=0; x<3; x++) + { + BYTE* ppYUVTmpData; + + piDstStride[x] = piMainStride[0]; + piDstSize[x] = piDstStride[x] * padDstHeight; + + ppYUVTmpData = realloc(ppYUVDstData[x], piDstSize[x]); + + if (!ppYUVTmpData) + goto fail; + + ppYUVDstData[x] = ppYUVTmpData; + + memset(ppYUVDstData[x], 0, piDstSize[x]); + } + } + + for (x=0; x<3; x++) + { + if (!ppYUVDstData[x] || (piDstSize[x] == 0) || (piDstStride[x] == 0)) + { + WLog_ERR(TAG, "YUV buffer not initialized! check your decoder settings"); + goto fail; + } + } + + rect.right = 0; + rect.bottom = 0; + rect.left = 0xFFFF; + rect.top = 0xFFFF; + for (x=0; xsubsystem->Decompress(h264, pSrcData, SrcSize, 0); + if (status >= 0) + status = h264->subsystem->Decompress(h264, pAuxSrcData, AuxSrcSize, 1); + break; + case 2: /* Chroma420 in stream 1 */ + status = h264->subsystem->Decompress(h264, pSrcData, SrcSize, 1); + numChromaRects = numRegionRects; + chromaRects = regionRects; + break; + case 1: /* YUV420 in stream 1 */ + status = h264->subsystem->Decompress(h264, pSrcData, SrcSize, 0); + numYuvRects = numRegionRects; + yuvRects = regionRects; + break; + default: /* WTF? */ + break; + } + +#if defined(AVC444_FRAME_STAT) + switch(op) + { + case 0: + op1sum = avg(&op1, op1sum, SrcSize + AuxSrcSize); + break; + case 1: + op2sum = avg(&op2, op2sum, SrcSize); + break; + case 2: + op3sum = avg(&op3, op3sum, SrcSize); + break; + default: + break; + } + + WLog_INFO(TAG, "luma=%llu [avg=%lf] chroma=%llu [avg=%lf] combined=%llu [avg=%lf]", + op1, op1sum, op2, op2sum, op3, op3sum); +#endif + + if (status >= 0) + { + if (!avc444_combine_yuv(h264, yuvRects, numYuvRects, + chromaRects, numChromaRects, + nDstWidth, nDstHeight, nDstStep)) + status = -1002; + else + { + if (numYuvRects > 0) + { + if (!avc_yuv_to_rgb(h264, regionRects, numRegionRects, nDstWidth, + nDstHeight, nDstStep, pDstData, DstFormat, TRUE)) + status = -1003; + } + + if (numChromaRects > 0) + { + if (!avc_yuv_to_rgb(h264, auxRegionRects, numAuxRegionRect, + nDstWidth, nDstHeight, nDstStep, pDstData, + DstFormat, TRUE)) + status = -1004; + } + } + } return status; } @@ -1560,6 +1912,9 @@ void h264_context_free(H264_CONTEXT* h264) { h264->subsystem->Uninit(h264); + free (h264->pYUV444Data[0]); + free (h264->pYUV444Data[1]); + free (h264->pYUV444Data[2]); free(h264); } } diff --git a/libfreerdp/core/codecs.c b/libfreerdp/core/codecs.c index 11fbfa135..af1d9b16c 100644 --- a/libfreerdp/core/codecs.c +++ b/libfreerdp/core/codecs.c @@ -88,7 +88,7 @@ BOOL freerdp_client_codecs_prepare(rdpCodecs* codecs, UINT32 flags) } } - if ((flags & FREERDP_CODEC_H264) && !codecs->h264) + if ((flags & (FREERDP_CODEC_AVC420 | FREERDP_CODEC_AVC444)) && !codecs->h264) { if (!(codecs->h264 = h264_context_new(FALSE))) { @@ -161,7 +161,7 @@ BOOL freerdp_client_codecs_reset(rdpCodecs* codecs, UINT32 flags, } } - if (flags & FREERDP_CODEC_H264) + if (flags & (FREERDP_CODEC_AVC420 | FREERDP_CODEC_AVC444)) { if (codecs->h264) { @@ -179,9 +179,7 @@ rdpCodecs* codecs_new(rdpContext* context) codecs = (rdpCodecs*) calloc(1, sizeof(rdpCodecs)); if (codecs) - { codecs->context = context; - } return codecs; } diff --git a/libfreerdp/gdi/gfx.c b/libfreerdp/gdi/gfx.c index 523734038..df53ca063 100644 --- a/libfreerdp/gdi/gfx.c +++ b/libfreerdp/gdi/gfx.c @@ -461,39 +461,38 @@ UINT gdi_SurfaceCommand_Planar(rdpGdi* gdi, RdpgfxClientContext* context, RDPGFX * * @return 0 on success, otherwise a Win32 error code */ -UINT gdi_SurfaceCommand_H264(rdpGdi* gdi, RdpgfxClientContext* context, RDPGFX_SURFACE_COMMAND* cmd) +static UINT gdi_SurfaceCommand_AVC420(rdpGdi* gdi, RdpgfxClientContext* context, RDPGFX_SURFACE_COMMAND* cmd) { int status; UINT32 i; - BYTE* DstData = NULL; gdiGfxSurface* surface; RDPGFX_H264_METABLOCK* meta; - RDPGFX_H264_BITMAP_STREAM* bs; + RDPGFX_AVC420_BITMAP_STREAM* bs; surface = (gdiGfxSurface*) context->GetSurfaceData(context, cmd->surfaceId); if (!surface) return ERROR_INTERNAL_ERROR; - if (!freerdp_client_codecs_prepare(surface->codecs, FREERDP_CODEC_H264)) + if (!freerdp_client_codecs_prepare(surface->codecs, FREERDP_CODEC_AVC420)) return ERROR_INTERNAL_ERROR; - bs = (RDPGFX_H264_BITMAP_STREAM*) cmd->extra; + bs = (RDPGFX_AVC420_BITMAP_STREAM*) cmd->extra; if (!bs) return ERROR_INTERNAL_ERROR; meta = &(bs->meta); - DstData = surface->data; - - status = h264_decompress(surface->codecs->h264, bs->data, bs->length, &DstData, - PIXEL_FORMAT_XRGB32, surface->scanline, surface->width, surface->height, - meta->regionRects, meta->numRegionRects); + status = avc420_decompress(surface->codecs->h264, bs->data, bs->length, + surface->data, PIXEL_FORMAT_XRGB32, + surface->scanline, surface->width, + surface->height, meta->regionRects, + meta->numRegionRects); if (status < 0) { - WLog_WARN(TAG, "h264_decompress failure: %d, ignoring update.", status); + WLog_WARN(TAG, "avc420_decompress failure: %d, ignoring update.", status); return CHANNEL_RC_OK; } @@ -508,6 +507,77 @@ UINT gdi_SurfaceCommand_H264(rdpGdi* gdi, RdpgfxClientContext* context, RDPGFX_S return CHANNEL_RC_OK; } +/** + * Function description + * + * @return 0 on success, otherwise a Win32 error code + */ +static UINT gdi_SurfaceCommand_AVC444(rdpGdi* gdi, RdpgfxClientContext* context, RDPGFX_SURFACE_COMMAND* cmd) +{ + int status; + UINT32 i; + gdiGfxSurface* surface; + RDPGFX_AVC444_BITMAP_STREAM* bs; + RDPGFX_AVC420_BITMAP_STREAM* avc1; + RDPGFX_H264_METABLOCK* meta1; + RDPGFX_AVC420_BITMAP_STREAM* avc2; + RDPGFX_H264_METABLOCK* meta2; + RECTANGLE_16* regionRects = NULL; + + surface = (gdiGfxSurface*) context->GetSurfaceData(context, cmd->surfaceId); + + if (!surface) + return ERROR_INTERNAL_ERROR; + + if (!freerdp_client_codecs_prepare(surface->codecs, FREERDP_CODEC_AVC444)) + return ERROR_INTERNAL_ERROR; + + bs = (RDPGFX_AVC444_BITMAP_STREAM*) cmd->extra; + + if (!bs) + return ERROR_INTERNAL_ERROR; + + avc1 = &bs->bitstream[0]; + avc2 = &bs->bitstream[1]; + meta1 = &avc1->meta; + meta2 = &avc2->meta; + status = avc444_decompress(surface->codecs->h264, bs->LC, + meta1->regionRects, meta1->numRegionRects, + avc1->data, avc1->length, + meta2->regionRects, meta2->numRegionRects, + avc2->data, avc2->length, + surface->data, PIXEL_FORMAT_XRGB32, + surface->scanline, surface->width, + surface->height); + + if (status < 0) + { + WLog_WARN(TAG, "avc444_decompress failure: %d, ignoring update.", status); + return CHANNEL_RC_OK; + } + + for (i = 0; i < meta1->numRegionRects; i++) + { + region16_union_rect(&(surface->invalidRegion), + &(surface->invalidRegion), + &(meta1->regionRects[i])); + } + + for (i = 0; i < meta2->numRegionRects; i++) + { + region16_union_rect(&(surface->invalidRegion), + &(surface->invalidRegion), + &(meta2->regionRects[i])); + } + + if (!gdi->inGfxFrame) + gdi_UpdateSurfaces(gdi); + + free(regionRects); + + return CHANNEL_RC_OK; +} + /** * Function description * @@ -677,8 +747,12 @@ UINT gdi_SurfaceCommand(RdpgfxClientContext* context, RDPGFX_SURFACE_COMMAND* cm status = gdi_SurfaceCommand_Planar(gdi, context, cmd); break; - case RDPGFX_CODECID_H264: - status = gdi_SurfaceCommand_H264(gdi, context, cmd); + case RDPGFX_CODECID_AVC420: + status = gdi_SurfaceCommand_AVC420(gdi, context, cmd); + break; + + case RDPGFX_CODECID_AVC444: + status = gdi_SurfaceCommand_AVC444(gdi, context, cmd); break; case RDPGFX_CODECID_ALPHA: @@ -807,7 +881,7 @@ UINT gdi_SolidFill(RdpgfxClientContext* context, RDPGFX_SOLID_FILL_PDU* solidFil UINT32 color; BYTE a, r, g, b; int nWidth, nHeight; - RDPGFX_RECT16* rect; + RECTANGLE_16* rect; gdiGfxSurface* surface; RECTANGLE_16 invalidRect; rdpGdi* gdi = (rdpGdi*) context->custom; @@ -861,7 +935,7 @@ UINT gdi_SurfaceToSurface(RdpgfxClientContext* context, RDPGFX_SURFACE_TO_SURFAC UINT16 index; BOOL sameSurface; int nWidth, nHeight; - RDPGFX_RECT16* rectSrc; + RECTANGLE_16* rectSrc; RDPGFX_POINT16* destPt; RECTANGLE_16 invalidRect; gdiGfxSurface* surfaceSrc; @@ -922,7 +996,7 @@ UINT gdi_SurfaceToSurface(RdpgfxClientContext* context, RDPGFX_SURFACE_TO_SURFAC */ UINT gdi_SurfaceToCache(RdpgfxClientContext* context, RDPGFX_SURFACE_TO_CACHE_PDU* surfaceToCache) { - RDPGFX_RECT16* rect; + RECTANGLE_16* rect; gdiGfxSurface* surface; gdiGfxCacheEntry* cacheEntry; rdpGdi* gdi = (rdpGdi*) context->custom; diff --git a/libfreerdp/primitives/prim_YUV.c b/libfreerdp/primitives/prim_YUV.c index c8cddd50f..9e8b293ed 100644 --- a/libfreerdp/primitives/prim_YUV.c +++ b/libfreerdp/primitives/prim_YUV.c @@ -26,43 +26,368 @@ #include "prim_YUV.h" +static INLINE BYTE CLIP(INT32 X) +{ + if (X > 255L) + return 255L; + if (X < 0L) + return 0L; + return X; +} + +/** + * @brief general_YUV420CombineToYUV444 + * + * @param pMainSrc Pointer to luma YUV420 data + * @param srcMainStep Step width in luma YUV420 data + * @param pAuxSrc Pointer to chroma YUV420 data + * @param srcAuxStep Step width in chroma YUV420 data + * @param pDst Pointer to YUV444 data + * @param dstStep Step width in YUV444 data + * @param roi Region of source to combine in destination. + * + * @return PRIMITIVES_SUCCESS on success, an error code otherwise. + */ +static pstatus_t general_YUV420CombineToYUV444( + const BYTE* pMainSrc[3], const UINT32 srcMainStep[3], + const BYTE* pAuxSrc[3], const UINT32 srcAuxStep[3], + BYTE* pDst[3], const UINT32 dstStep[3], + const prim_size_t* roi) +{ + const UINT32 mod = 16; + UINT32 uY = 0; + UINT32 vY = 0; + UINT32 x, y; + UINT32 nWidth, nHeight; + UINT32 halfWidth, halfHeight; + const UINT32 oddY = 1; + const UINT32 evenY = 0; + const UINT32 oddX = 1; + const UINT32 evenX = 0; + + /* The auxilary frame is aligned to multiples of 16x16. + * We need the padded height for B4 and B5 conversion. */ + const UINT32 padHeigth = roi->height + 16 - roi->height % 16; + + nWidth = roi->width; + nHeight = roi->height; + halfWidth = (nWidth ) / 2; + halfHeight = (nHeight) / 2; + + if (pMainSrc) + { + /* Y data is already here... */ + /* B1 */ + for (y=0; y= nHeight) + continue; + pX = pDst[1] + dstStep[1] * pos; + } + else + { + const UINT32 pos = (2 * vY++ + oddY); + if (pos >= nHeight) + continue; + pX = pDst[2] + dstStep[2] * pos; + } + + memcpy(pX, Ya, nWidth); + } + + /* B6 and B7 */ + for (y=0; y nHeight) + continue; + + for (x=0; x nWidth) + continue; + + u2020 = up - pU[val2x1] - pU1[val2x] - pU1[val2x1]; + v2020 = vp - pV[val2x1] - pV1[val2x] - pV1[val2x1]; + + pU[val2x] = CLIP(u2020); + pV[val2x] = CLIP(v2020); + } + } + + return PRIMITIVES_SUCCESS; +} + +static pstatus_t general_YUV444SplitToYUV420( + const BYTE* pSrc[3], const UINT32 srcStep[3], + BYTE* pMainDst[3], const UINT32 dstMainStep[3], + BYTE* pAuxDst[3], const UINT32 dstAuxStep[3], + const prim_size_t* roi) +{ + UINT32 x, y, uY = 0, vY = 0; + UINT32 halfWidth, halfHeight; + /* The auxilary frame is aligned to multiples of 16x16. + * We need the padded height for B4 and B5 conversion. */ + const UINT32 padHeigth = roi->height + 16 - roi->height % 16; + + halfWidth = (roi->width + 1) / 2; + halfHeight = (roi->height + 1) / 2; + + /* B1 */ + for (y=0; yheight; y++) + { + const BYTE* pSrcY = pSrc[0] + y * srcStep[0]; + BYTE* pY = pMainDst[0] + y * dstMainStep[0]; + memcpy(pY, pSrcY, roi->width); + } + + /* B2 and B3 */ + for (y=0; y= roi->height) + continue; + memcpy(pY, pSrcU, roi->width); + } + else + { + const UINT32 pos = (2 * vY++ + 1); + const BYTE* pSrcV = pSrc[2] + pos * srcStep[2]; + if (pos >= roi->height) + continue; + memcpy(pY, pSrcV, roi->width); + } + } + + /* B6 and B7 */ + for (y=0; y> 8 + * | B | ( | 256 475 0 | | V - 128 | ) + */ +static INLINE INT32 C(INT32 Y) +{ + return (Y) - 0L; +} + +static INLINE INT32 D(INT32 U) +{ + return (U) - 128L; +} + +static INLINE INT32 E(INT32 V) +{ + return (V) - 128L; +} + +static INLINE BYTE YUV2R(INT32 Y, INT32 U, INT32 V) +{ + const INT32 r = ( 256L * C(Y) + 0L * D(U) + 403L * E(V)); + const INT32 r8 = r >> 8L; + return CLIP(r8); +} + +static INLINE BYTE YUV2G(INT32 Y, INT32 U, INT32 V) +{ + const INT32 g = ( 256L * C(Y) - 48L * D(U) - 120L * E(V)); + const INT32 g8 = g >> 8L; + return CLIP(g8); +} + +static INLINE BYTE YUV2B(INT32 Y, INT32 U, INT32 V) +{ + const INT32 b = ( 256L * C(Y) + 475L * D(U) + 0L * E(V)); + const INT32 b8 = b >> 8L; + return CLIP(b8); +} + +static pstatus_t general_YUV444ToRGB_8u_P3AC4R( + const BYTE* pSrc[3], const UINT32 srcStep[3], + BYTE* pDst, UINT32 dstStep, const prim_size_t* roi) +{ + UINT32 x, y; + UINT32 nWidth, nHeight; + + nWidth = roi->width; + nHeight = roi->height; + + for (y = 0; y < nHeight; y++) + { + const BYTE* pY = pSrc[0] + y * srcStep[0]; + const BYTE* pU = pSrc[1] + y * srcStep[1]; + const BYTE* pV = pSrc[2] + y * srcStep[2]; + BYTE* pRGB = pDst + y * dstStep; + + for (x = 0; x < nWidth; x++) + { + const BYTE Y = pY[x]; + const INT32 U = pU[x]; + const INT32 V = pV[x]; + + pRGB[4*x+0] = YUV2B(Y, U, V); + pRGB[4*x+1] = YUV2G(Y, U, V); + pRGB[4*x+2] = YUV2R(Y, U, V); + pRGB[4*x+3] = 0xFF; + } + } + + return PRIMITIVES_SUCCESS; +} + /** * | R | ( | 256 0 403 | | Y | ) * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 * | B | ( | 256 475 0 | | V - 128 | ) - * - * | Y | ( | 54 183 18 | | R | ) | 0 | - * | U | = ( | -29 -99 128 | | G | ) >> 8 + | 128 | - * | V | ( | 128 -116 -12 | | B | ) | 128 | */ -pstatus_t general_YUV420ToRGB_8u_P3AC4R(const BYTE* pSrc[3], int srcStep[3], - BYTE* pDst, int dstStep, const prim_size_t* roi) +static pstatus_t general_YUV420ToRGB_8u_P3AC4R( + const BYTE* pSrc[3], const UINT32 srcStep[3], + BYTE* pDst, UINT32 dstStep, const prim_size_t* roi) { - int x, y; - int dstPad; - int srcPad[3]; + UINT32 x, y; + UINT32 dstPad; + UINT32 srcPad[3]; BYTE Y, U, V; - int halfWidth; - int halfHeight; + UINT32 halfWidth; + UINT32 halfHeight; const BYTE* pY; const BYTE* pU; const BYTE* pV; - int R, G, B; - int Yp, Up, Vp; - int Up48, Up475; - int Vp403, Vp120; BYTE* pRGB = pDst; - int nWidth, nHeight; - int lastRow, lastCol; + UINT32 nWidth, nHeight; + UINT32 lastRow, lastCol; pY = pSrc[0]; pU = pSrc[1]; pV = pSrc[2]; - + lastCol = roi->width & 0x01; lastRow = roi->height & 0x01; - + nWidth = (roi->width + 1) & ~0x0001; nHeight = (roi->height + 1) & ~0x0001; @@ -88,73 +413,22 @@ pstatus_t general_YUV420ToRGB_8u_P3AC4R(const BYTE* pSrc[3], int srcStep[3], U = *pU++; V = *pV++; - Up = U - 128; - Vp = V - 128; - - Up48 = 48 * Up; - Up475 = 475 * Up; - - Vp403 = Vp * 403; - Vp120 = Vp * 120; - /* 1st pixel */ - Y = *pY++; - Yp = Y << 8; - R = (Yp + Vp403) >> 8; - G = (Yp - Up48 - Vp120) >> 8; - B = (Yp + Up475) >> 8; - - if (R < 0) - R = 0; - else if (R > 255) - R = 255; - - if (G < 0) - G = 0; - else if (G > 255) - G = 255; - - if (B < 0) - B = 0; - else if (B > 255) - B = 255; - - *pRGB++ = (BYTE) B; - *pRGB++ = (BYTE) G; - *pRGB++ = (BYTE) R; + *pRGB++ = YUV2B(Y, U, V); + *pRGB++ = YUV2G(Y, U, V); + *pRGB++ = YUV2R(Y, U, V); *pRGB++ = 0xFF; /* 2nd pixel */ - if (!(lastCol & 0x02)) { Y = *pY++; - Yp = Y << 8; - R = (Yp + Vp403) >> 8; - G = (Yp - Up48 - Vp120) >> 8; - B = (Yp + Up475) >> 8; - - if (R < 0) - R = 0; - else if (R > 255) - R = 255; - - if (G < 0) - G = 0; - else if (G > 255) - G = 255; - - if (B < 0) - B = 0; - else if (B > 255) - B = 255; - - *pRGB++ = (BYTE) B; - *pRGB++ = (BYTE) G; - *pRGB++ = (BYTE) R; + *pRGB++ = YUV2B(Y, U, V); + *pRGB++ = YUV2G(Y, U, V); + *pRGB++ = YUV2R(Y, U, V); *pRGB++ = 0xFF; } else @@ -170,6 +444,9 @@ pstatus_t general_YUV420ToRGB_8u_P3AC4R(const BYTE* pSrc[3], int srcStep[3], pV -= halfWidth; pRGB += dstPad; + if (lastRow & 0x02) + break; + for (x = 0; x < halfWidth; ) { if (++x == halfWidth) @@ -178,73 +455,22 @@ pstatus_t general_YUV420ToRGB_8u_P3AC4R(const BYTE* pSrc[3], int srcStep[3], U = *pU++; V = *pV++; - Up = U - 128; - Vp = V - 128; - - Up48 = 48 * Up; - Up475 = 475 * Up; - - Vp403 = Vp * 403; - Vp120 = Vp * 120; - /* 3rd pixel */ - Y = *pY++; - Yp = Y << 8; - R = (Yp + Vp403) >> 8; - G = (Yp - Up48 - Vp120) >> 8; - B = (Yp + Up475) >> 8; - - if (R < 0) - R = 0; - else if (R > 255) - R = 255; - - if (G < 0) - G = 0; - else if (G > 255) - G = 255; - - if (B < 0) - B = 0; - else if (B > 255) - B = 255; - - *pRGB++ = (BYTE) B; - *pRGB++ = (BYTE) G; - *pRGB++ = (BYTE) R; + *pRGB++ = YUV2B(Y, U, V); + *pRGB++ = YUV2G(Y, U, V); + *pRGB++ = YUV2R(Y, U, V); *pRGB++ = 0xFF; /* 4th pixel */ - if (!(lastCol & 0x02)) { Y = *pY++; - Yp = Y << 8; - R = (Yp + Vp403) >> 8; - G = (Yp - Up48 - Vp120) >> 8; - B = (Yp + Up475) >> 8; - - if (R < 0) - R = 0; - else if (R > 255) - R = 255; - - if (G < 0) - G = 0; - else if (G > 255) - G = 255; - - if (B < 0) - B = 0; - else if (B > 255) - B = 255; - - *pRGB++ = (BYTE) B; - *pRGB++ = (BYTE) G; - *pRGB++ = (BYTE) R; + *pRGB++ = YUV2B(Y, U, V); + *pRGB++ = YUV2G(Y, U, V); + *pRGB++ = YUV2R(Y, U, V); *pRGB++ = 0xFF; } else @@ -264,102 +490,142 @@ pstatus_t general_YUV420ToRGB_8u_P3AC4R(const BYTE* pSrc[3], int srcStep[3], return PRIMITIVES_SUCCESS; } -pstatus_t general_RGBToYUV420_8u_P3AC4R(const BYTE* pSrc, INT32 srcStep, - BYTE* pDst[3], INT32 dstStep[3], const prim_size_t* roi) +/** + * | Y | ( | 54 183 18 | | R | ) | 0 | + * | U | = ( | -29 -99 128 | | G | ) >> 8 + | 128 | + * | V | ( | 128 -116 -12 | | B | ) | 128 | + */ +static INLINE BYTE RGB2Y(INT32 R, INT32 G, INT32 B) { - int x, y; - int dstPad[3]; - int halfWidth; - int halfHeight; - BYTE* pY; - BYTE* pU; - BYTE* pV; - int Y, U, V; - int R, G, B; - int Ra, Ga, Ba; - const BYTE* pRGB; - int nWidth, nHeight; + const INT32 y = ( 54L * (R) + 183L * (G) + 18L * (B)); + const INT32 y8 = (y >> 8L); - pU = pDst[1]; - pV = pDst[2]; + return CLIP(y8); +} - nWidth = (roi->width + 1) & ~0x0001; - nHeight = (roi->height + 1) & ~0x0001; +static INLINE BYTE RGB2U(INT32 R, INT32 G, INT32 B) +{ + const INT32 u = ( -29L * (R) - 99L * (G) + 128L * (B)); + const INT32 u8 = (u >> 8L) + 128L; - halfWidth = nWidth / 2; - halfHeight = nHeight / 2; + return CLIP(u8); +} - dstPad[0] = (dstStep[0] - nWidth); - dstPad[1] = (dstStep[1] - halfWidth); - dstPad[2] = (dstStep[2] - halfWidth); +static INLINE BYTE RGB2V(INT32 R, INT32 G, INT32 B) +{ + const INT32 v = ( 128L * (R) - 116L * (G) - 12L * (B)); + const INT32 v8 = (v >> 8L) + 128L; + + return CLIP(v8); +} + +static pstatus_t general_RGBToYUV444_8u_P3AC4R( + const BYTE* pSrc, const UINT32 srcStep, + BYTE* pDst[3], UINT32 dstStep[3], const prim_size_t* roi) +{ + UINT32 x, y; + UINT32 nWidth, nHeight; + + nWidth = roi->width; + nHeight = roi->height; + + for (y=0; ywidth + roi->width % 2; + nHeight = roi->height + roi->height % 2; + + halfWidth = (nWidth + nWidth % 2) / 2; + halfHeight = (nHeight + nHeight % 2) / 2; for (y = 0; y < halfHeight; y++) { + const UINT32 val2y = (y * 2); + const UINT32 val2y1 = val2y + 1; + const BYTE* pRGB = pSrc + val2y * srcStep; + const BYTE* pRGB1 = pSrc + val2y1 * srcStep; + + BYTE* pY = pDst[0] + val2y * dstStep[0]; + BYTE* pY1 = pDst[0] + val2y1 * dstStep[0]; + BYTE* pU = pDst[1] + y * dstStep[1]; + BYTE* pV = pDst[2] + y * dstStep[2]; + for (x = 0; x < halfWidth; x++) { - /* 1st pixel */ - pRGB = pSrc + y * 2 * srcStep + x * 2 * 4; - pY = pDst[0] + y * 2 * dstStep[0] + x * 2; - Ba = B = pRGB[0]; - Ga = G = pRGB[1]; - Ra = R = pRGB[2]; - Y = (54 * R + 183 * G + 18 * B) >> 8; - pY[0] = (BYTE) Y; + INT32 R, G, B; + INT32 Ra, Ga, Ba; + const UINT32 val2x = (x * 2); + const UINT32 val2x1 = val2x + 1; - if (x * 2 + 1 < roi->width) + /* 1st pixel */ + Ba = B = pRGB[val2x * 4 + 0]; + Ga = G = pRGB[val2x * 4 + 1]; + Ra = R = pRGB[val2x * 4 + 2]; + pY[val2x] = RGB2Y(R, G, B); + + if (val2x1 < nWidth) { /* 2nd pixel */ - Ba += B = pRGB[4]; - Ga += G = pRGB[5]; - Ra += R = pRGB[6]; - Y = (54 * R + 183 * G + 18 * B) >> 8; - pY[1] = (BYTE) Y; + Ba += B = pRGB[val2x * 4 + 4]; + Ga += G = pRGB[val2x * 4 + 5]; + Ra += R = pRGB[val2x * 4 + 6]; + pY[val2x1] = RGB2Y(R, G, B); } - if (y * 2 + 1 < roi->height) + if (val2y1 < nHeight) { /* 3rd pixel */ - pRGB += srcStep; - pY += dstStep[0]; - Ba += B = pRGB[0]; - Ga += G = pRGB[1]; - Ra += R = pRGB[2]; - Y = (54 * R + 183 * G + 18 * B) >> 8; - pY[0] = (BYTE) Y; + Ba += B = pRGB1[val2x * 4 + 0]; + Ga += G = pRGB1[val2x * 4 + 1]; + Ra += R = pRGB1[val2x * 4 + 2]; + pY1[val2x] = RGB2Y(R, G, B); - if (x * 2 + 1 < roi->width) + if (val2x1 < nWidth) { /* 4th pixel */ - Ba += B = pRGB[4]; - Ga += G = pRGB[5]; - Ra += R = pRGB[6]; - Y = (54 * R + 183 * G + 18 * B) >> 8; - pY[1] = (BYTE) Y; + Ba += B = pRGB1[val2x * 4 + 4]; + Ga += G = pRGB1[val2x * 4 + 5]; + Ra += R = pRGB1[val2x * 4 + 6]; + pY1[val2x1] = RGB2Y(R, G, B); } } - /* U */ Ba >>= 2; Ga >>= 2; Ra >>= 2; - U = ((-29 * Ra - 99 * Ga + 128 * Ba) >> 8) + 128; - if (U < 0) - U = 0; - else if (U > 255) - U = 255; - *pU++ = (BYTE) U; - /* V */ - V = ((128 * Ra - 116 * Ga - 12 * Ba) >> 8) + 128; - if (V < 0) - V = 0; - else if (V > 255) - V = 255; - *pV++ = (BYTE) V; + pU[x] = RGB2U(Ra, Ga, Ba); + pV[x] = RGB2V(Ra, Ga, Ba); } - - pU += dstPad[1]; - pV += dstPad[2]; } return PRIMITIVES_SUCCESS; @@ -368,8 +634,12 @@ pstatus_t general_RGBToYUV420_8u_P3AC4R(const BYTE* pSrc, INT32 srcStep, void primitives_init_YUV(primitives_t* prims) { prims->YUV420ToRGB_8u_P3AC4R = general_YUV420ToRGB_8u_P3AC4R; + prims->YUV444ToRGB_8u_P3AC4R = general_YUV444ToRGB_8u_P3AC4R; prims->RGBToYUV420_8u_P3AC4R = general_RGBToYUV420_8u_P3AC4R; - + prims->RGBToYUV444_8u_P3AC4R = general_RGBToYUV444_8u_P3AC4R; + prims->YUV420CombineToYUV444 = general_YUV420CombineToYUV444; + prims->YUV444SplitToYUV420 = general_YUV444SplitToYUV420; + primitives_init_YUV_opt(prims); } diff --git a/libfreerdp/primitives/prim_YUV_opt.c b/libfreerdp/primitives/prim_YUV_opt.c index 7b80a4522..45688df55 100644 --- a/libfreerdp/primitives/prim_YUV_opt.c +++ b/libfreerdp/primitives/prim_YUV_opt.c @@ -22,27 +22,27 @@ #include #include -pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(const BYTE **pSrc, int *srcStep, - BYTE *pDst, int dstStep, const prim_size_t *roi) +pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(const BYTE **pSrc, const UINT32 *srcStep, + BYTE *pDst, UINT32 dstStep, const prim_size_t *roi) { - int lastRow, lastCol; + UINT32 lastRow, lastCol; BYTE *UData,*VData,*YData; - int i,nWidth,nHeight,VaddDst,VaddY,VaddU,VaddV; + UINT32 i,nWidth,nHeight,VaddDst,VaddY,VaddU,VaddV; __m128i r0,r1,r2,r3,r4,r5,r6,r7; __m128i *buffer; - + /* last_line: if the last (U,V doubled) line should be skipped, set to 10B * last_column: if it's the last column in a line, set to 10B (for handling line-endings not multiple by four) */ buffer = _aligned_malloc(4 * 16, 16); - + YData = (BYTE*) pSrc[0]; UData = (BYTE*) pSrc[1]; VData = (BYTE*) pSrc[2]; - + nWidth = roi->width; nHeight = roi->height; - + if ((lastCol = (nWidth & 3))) { switch (lastCol) @@ -63,26 +63,26 @@ pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(const BYTE **pSrc, int *srcStep, _mm_store_si128(buffer+3,r7); lastCol = 1; } - + nWidth += 3; nWidth = nWidth >> 2; - + lastRow = nHeight & 1; nHeight++; nHeight = nHeight >> 1; - + VaddDst = (dstStep << 1) - (nWidth << 4); VaddY = (srcStep[0] << 1) - (nWidth << 2); VaddU = srcStep[1] - (((nWidth << 1) + 2) & 0xFFFC); VaddV = srcStep[2] - (((nWidth << 1) + 2) & 0xFFFC); - + while (nHeight-- > 0) { if (nHeight == 0) lastRow <<= 1; i = 0; - + do { if (!(i & 0x01)) @@ -97,16 +97,16 @@ pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(const BYTE **pSrc, int *srcStep, r0 = _mm_cvtsi32_si128(*(UINT32 *)UData); r5 = _mm_set_epi32(0x80038003,0x80028002,0x80018001,0x80008000); r0 = _mm_shuffle_epi8(r0,r5); - + UData += 4; - + /* then we subtract 128 from each value, so we get D */ r3 = _mm_set_epi16(128,128,128,128,128,128,128,128); r0 = _mm_subs_epi16(r0,r3); - + /* we need to do two things with our D, so let's store it for later use */ r2 = r0; - + /* now we can multiply our D with 48 and unpack it to xmm4:xmm0 * this is what we need to get G data later on */ r4 = r0; @@ -116,7 +116,7 @@ pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(const BYTE **pSrc, int *srcStep, r7 = r0; r0 = _mm_unpacklo_epi16(r0,r4); r4 = _mm_unpackhi_epi16(r7,r4); - + /* to get B data, we need to prepare a second value, D*475 */ r1 = r2; r7 = _mm_set_epi16(475,475,475,475,475,475,475,475); @@ -125,23 +125,23 @@ pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(const BYTE **pSrc, int *srcStep, r7 = r1; r1 = _mm_unpacklo_epi16(r1,r2); r7 = _mm_unpackhi_epi16(r7,r2); - + /* so we got something like this: xmm7:xmm1 * this pair contains values for 16 pixel: * aabbccdd * aabbccdd, but we can only work on four pixel at once, so we need to save upper values */ _mm_store_si128(buffer+1,r7); - + /* Now we've prepared U-data. Preparing V-data is actually the same, just with other coefficients */ r2 = _mm_cvtsi32_si128(*(UINT32 *)VData); r2 = _mm_shuffle_epi8(r2,r5); - + VData += 4; - + r2 = _mm_subs_epi16(r2,r3); - + r5 = r2; - + /* this is also known as E*403, we need it to convert R data */ r3 = r2; r7 = _mm_set_epi16(403,403,403,403,403,403,403,403); @@ -150,10 +150,10 @@ pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(const BYTE **pSrc, int *srcStep, r7 = r2; r2 = _mm_unpacklo_epi16(r2,r3); r7 = _mm_unpackhi_epi16(r7,r3); - + /* and preserve upper four values for future ... */ _mm_store_si128(buffer+2,r7); - + /* doing this step: E*120 */ r3 = r5; r7 = _mm_set_epi16(120,120,120,120,120,120,120,120); @@ -162,12 +162,12 @@ pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(const BYTE **pSrc, int *srcStep, r7 = r3; r3 = _mm_unpacklo_epi16(r3,r5); r7 = _mm_unpackhi_epi16(r7,r5); - + /* now we complete what we've begun above: * (48*D) + (120*E) = (48*D +120*E) */ r0 = _mm_add_epi32(r0,r3); r4 = _mm_add_epi32(r4,r7); - + /* and store to memory ! */ _mm_store_si128(buffer,r4); } @@ -180,25 +180,25 @@ pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(const BYTE **pSrc, int *srcStep, r2 = _mm_load_si128(buffer+2); r0 = _mm_load_si128(buffer); } - + if (++i == nWidth) lastCol <<= 1; - + /* We didn't produce any output yet, so let's do so! * Ok, fetch four pixel from the Y-data array and shuffle them like this: * 00d0 00c0 00b0 00a0, to get signed dwords and multiply by 256 */ r4 = _mm_cvtsi32_si128(*(UINT32 *)YData); r7 = _mm_set_epi32(0x80800380,0x80800280,0x80800180,0x80800080); r4 = _mm_shuffle_epi8(r4,r7); - + r5 = r4; r6 = r4; - + /* no we can perform the "real" conversion itself and produce output! */ r4 = _mm_add_epi32(r4,r2); r5 = _mm_sub_epi32(r5,r0); r6 = _mm_add_epi32(r6,r1); - + /* in the end, we only need bytes for RGB values. * So, what do we do? right! shifting left makes values bigger and thats always good. * before we had dwords of data, and by shifting left and treating the result @@ -208,7 +208,7 @@ pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(const BYTE **pSrc, int *srcStep, r4 = _mm_slli_epi32(r4,8); r5 = _mm_slli_epi32(r5,8); r6 = _mm_slli_epi32(r6,8); - + /* one thing we still have to face is the clip() function ... * we have still signed words, and there are those min/max instructions in SSE2 ... * the max instruction takes always the bigger of the two operands and stores it in the first one, @@ -219,35 +219,35 @@ pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(const BYTE **pSrc, int *srcStep, r4 = _mm_max_epi16(r4,r7); r5 = _mm_max_epi16(r5,r7); r6 = _mm_max_epi16(r6,r7); - + /* the same thing just completely different can be used to limit our values to 255, * but now using the min instruction and 255s */ r7 = _mm_set_epi32(0x00FF0000,0x00FF0000,0x00FF0000,0x00FF0000); r4 = _mm_min_epi16(r4,r7); r5 = _mm_min_epi16(r5,r7); r6 = _mm_min_epi16(r6,r7); - + /* Now we got our bytes. * the moment has come to assemble the three channels R,G and B to the xrgb dwords * on Red channel we just have to and each futural dword with 00FF0000H */ //r7=_mm_set_epi32(0x00FF0000,0x00FF0000,0x00FF0000,0x00FF0000); r4 = _mm_and_si128(r4,r7); - + /* on Green channel we have to shuffle somehow, so we get something like this: * 00d0 00c0 00b0 00a0 */ r7 = _mm_set_epi32(0x80800E80,0x80800A80,0x80800680,0x80800280); r5 = _mm_shuffle_epi8(r5,r7); - + /* and on Blue channel that one: * 000d 000c 000b 000a */ r7 = _mm_set_epi32(0x8080800E,0x8080800A,0x80808006,0x80808002); r6 = _mm_shuffle_epi8(r6,r7); - + /* and at last we or it together and get this one: * xrgb xrgb xrgb xrgb */ r4 = _mm_or_si128(r4,r5); r4 = _mm_or_si128(r4,r6); - + /* Only thing to do know is writing data to memory, but this gets a bit more * complicated if the width is not a multiple of four and it is the last column in line. */ if (lastCol & 0x02) @@ -269,7 +269,7 @@ pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(const BYTE **pSrc, int *srcStep, r4 = _mm_or_si128(r4,r6); } _mm_storeu_si128((__m128i *)pDst,r4); - + if (!(lastRow & 0x02)) { /* Because UV data is the same for two lines, we can process the secound line just here, @@ -280,40 +280,40 @@ pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(const BYTE **pSrc, int *srcStep, r4 = _mm_cvtsi32_si128(*(UINT32 *)(YData+srcStep[0])); r7 = _mm_set_epi32(0x80800380,0x80800280,0x80800180,0x80800080); r4 = _mm_shuffle_epi8(r4,r7); - + r5 = r4; r6 = r4; - + r4 = _mm_add_epi32(r4,r2); r5 = _mm_sub_epi32(r5,r0); r6 = _mm_add_epi32(r6,r1); - + r4 = _mm_slli_epi32(r4,8); r5 = _mm_slli_epi32(r5,8); r6 = _mm_slli_epi32(r6,8); - + r7 = _mm_set_epi32(0,0,0,0); r4 = _mm_max_epi16(r4,r7); r5 = _mm_max_epi16(r5,r7); r6 = _mm_max_epi16(r6,r7); - + r7 = _mm_set_epi32(0x00FF0000,0x00FF0000,0x00FF0000,0x00FF0000); r4 = _mm_min_epi16(r4,r7); r5 = _mm_min_epi16(r5,r7); r6 = _mm_min_epi16(r6,r7); - + r7 = _mm_set_epi32(0x00FF0000,0x00FF0000,0x00FF0000,0x00FF0000); r4 = _mm_and_si128(r4,r7); - + r7 = _mm_set_epi32(0x80800E80,0x80800A80,0x80800680,0x80800280); r5 = _mm_shuffle_epi8(r5,r7); - + r7 = _mm_set_epi32(0x8080800E,0x8080800A,0x80808006,0x80808002); r6 = _mm_shuffle_epi8(r6,r7); - + r4 = _mm_or_si128(r4,r5); r4 = _mm_or_si128(r4,r6); - + if (lastCol & 0x02) { r6 = _mm_load_si128(buffer+3); @@ -321,20 +321,20 @@ pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(const BYTE **pSrc, int *srcStep, r5 = _mm_lddqu_si128((__m128i *)(pDst+dstStep)); r6 = _mm_andnot_si128(r6,r5); r4 = _mm_or_si128(r4,r6); - + /* only thing is, we should shift [rbp-42] back here, because we have processed the last column, * and this "special condition" can be released */ lastCol >>= 1; } _mm_storeu_si128((__m128i *)(pDst+dstStep),r4); } - + /* after all we have to increase the destination- and Y-data pointer by four pixel */ pDst += 16; YData += 4; } while (i < nWidth); - + /* after each line we have to add the scanline to the destination pointer, because * we are processing two lines at once, but only increasing the destination pointer * in the first line. Well, we only have one pointer, so it's the easiest way to access @@ -343,10 +343,10 @@ pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(const BYTE **pSrc, int *srcStep, * output buffer was "designed" for 1920p HD, we have to add the remaining length for each line, * to get into the next line. */ pDst += VaddDst; - + /* same thing has to be done for Y-data, but with iStride[0] instead of the target scanline */ YData += VaddY; - + /* and again for UV data, but here it's enough to add the remaining length, because * UV data is the same for two lines and there exists only one "UV line" on two "real lines" */ UData += VaddU; @@ -354,7 +354,7 @@ pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(const BYTE **pSrc, int *srcStep, } _aligned_free(buffer); - + return PRIMITIVES_SUCCESS; } #endif diff --git a/libfreerdp/primitives/test/CMakeLists.txt b/libfreerdp/primitives/test/CMakeLists.txt index 9a652a36a..caf651bc5 100644 --- a/libfreerdp/primitives/test/CMakeLists.txt +++ b/libfreerdp/primitives/test/CMakeLists.txt @@ -14,6 +14,7 @@ set(${MODULE_PREFIX}_TESTS TestPrimitivesSet.c TestPrimitivesShift.c TestPrimitivesSign.c + TestPrimitivesYUV.c TestPrimitivesYCbCr.c TestPrimitivesYCoCg.c) diff --git a/libfreerdp/primitives/test/TestPrimitives16to32bpp.c b/libfreerdp/primitives/test/TestPrimitives16to32bpp.c index 03a070e40..ed985bdef 100644 --- a/libfreerdp/primitives/test/TestPrimitives16to32bpp.c +++ b/libfreerdp/primitives/test/TestPrimitives16to32bpp.c @@ -57,8 +57,10 @@ static BOOL try_16To32( const UINT16 *src; UINT32 ALIGN(outNN1[4096+3]), ALIGN(outAN1[4096+3]), ALIGN(outNI1[4096+3]), ALIGN(outAI1[4096+3]); +#ifdef WITH_SSE2 UINT32 ALIGN(outNN2[4096+3]), ALIGN(outAN2[4096+3]), ALIGN(outNI2[4096+3]), ALIGN(outAI2[4096+3]); +#endif assert(sOffset < 4); assert(dOffset < 4); @@ -161,7 +163,7 @@ int test_RGB565ToARGB_16u32u_C3C4_func(void) STD_SPEED_TEST( test16to32_speed, UINT16, UINT32, PRIM_NOP, TRUE, general_RGB565ToARGB_16u32u_C3C4( - (const UINT16 *) src1, 64*2, (UINT32 *) dst, 64*4, + (const UINT16 *) src1, 64*2, (UINT32 *) dst, 64*4, 64,64, TRUE, TRUE), #ifdef WITH_SSE2 TRUE, sse3_RGB565ToARGB_16u32u_C3C4( @@ -182,7 +184,7 @@ int test_RGB565ToARGB_16u32u_C3C4_speed(void) get_random_data(src, sizeof(src)); - test16to32_speed("16-to-32bpp", "aligned", + test16to32_speed("16-to-32bpp", "aligned", (const UINT16 *) src, 0, 0, (UINT32 *) dst, size_array, 1, RGB_TRIAL_ITERATIONS, TEST_TIME); return SUCCESS; diff --git a/libfreerdp/primitives/test/TestPrimitivesYCoCg.c b/libfreerdp/primitives/test/TestPrimitivesYCoCg.c index 4c6a4b8e4..9fb0fd326 100644 --- a/libfreerdp/primitives/test/TestPrimitivesYCoCg.c +++ b/libfreerdp/primitives/test/TestPrimitivesYCoCg.c @@ -38,12 +38,14 @@ extern pstatus_t ssse3_YCoCgRToRGB_8u_AC4R(const BYTE *pSrc, INT32 srcStep, /* ------------------------------------------------------------------------- */ int test_YCoCgRToRGB_8u_AC4R_func(void) { +#ifdef WITH_SSE2 + int i; + INT32 ALIGN(out_sse[4098]), ALIGN(out_sse_inv[4098]); +#endif INT32 ALIGN(in[4098]); INT32 ALIGN(out_c[4098]), ALIGN(out_c_inv[4098]); - INT32 ALIGN(out_sse[4098]), ALIGN(out_sse_inv[4098]); char testStr[256]; BOOL failed = FALSE; - int i; testStr[0] = '\0'; get_random_data(in, sizeof(in)); diff --git a/libfreerdp/primitives/test/TestPrimitivesYUV.c b/libfreerdp/primitives/test/TestPrimitivesYUV.c new file mode 100644 index 000000000..a4b34f7e8 --- /dev/null +++ b/libfreerdp/primitives/test/TestPrimitivesYUV.c @@ -0,0 +1,427 @@ + +#include "prim_test.h" + +#include +#include +#include + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#define TAG __FILE__ + +/* YUV to RGB conversion is lossy, so consider every value only + * differing by less than 2 abs equal. */ +static BOOL similar(const BYTE* src, const BYTE* dst, size_t size) +{ + size_t x; + + for (x=0; x 2) + { + fprintf(stderr, "%zd %02X : %02X diff=%lf\n", x, val1, val2, diff); + return FALSE; + } + } + + return TRUE; +} + +static void get_size(UINT32* width, UINT32* height) +{ + winpr_RAND((BYTE*)width, sizeof(*width)); + winpr_RAND((BYTE*)height, sizeof(*height)); + + // TODO: Algorithm only works on even resolutions... + *width = (*width % 4000) << 1; + *height = (*height % 4000 << 1); +} + +static BOOL check_padding(const BYTE* psrc, size_t size, size_t padding, const char* buffer) +{ + size_t x; + BOOL rc = TRUE; + const BYTE* src; + const BYTE* esrc; + size_t halfPad = (padding+1)/2; + + if (!psrc) + return FALSE; + + src = psrc - halfPad; + esrc = src + size + halfPad; + for (x=0; xYUV420CombineToYUV444) + goto fail; + + for (x=0; x<3; x++) + { + size_t halfStride = ((x>0)?awidth/2:awidth); + size_t size = aheight * awidth; + size_t halfSize = ((x>0)?halfStride*aheight/2:awidth*aheight); + + yuvStride[x] = awidth; + if (!(yuv[x] = set_padding(size, padding))) + goto fail; + + lumaStride[x] = halfStride; + if (!(luma[x] = set_padding(halfSize, padding))) + goto fail; + + if (!(pmain[x] = set_padding(halfSize, padding))) + goto fail; + + chromaStride[x] = halfStride; + if (!(chroma[x] = set_padding(halfSize, padding))) + goto fail; + + if (!(paux[x] = set_padding(halfSize, padding))) + goto fail; + + memset(luma[x], 0xAB + 3*x, halfSize); + memset(chroma[x], 0x80 + 2*x, halfSize); + + if (!check_padding(luma[x], halfSize, padding, "luma")) + goto fail; + if (!check_padding(chroma[x], halfSize, padding, "chroma")) + goto fail; + if (!check_padding(pmain[x], halfSize, padding, "main")) + goto fail; + if (!check_padding(paux[x], halfSize, padding, "aux")) + goto fail; + if (!check_padding(yuv[x], size, padding, "yuv")) + goto fail; + } + + if (prims->YUV420CombineToYUV444((const BYTE**)luma, lumaStride, + (const BYTE**) chroma, chromaStride, + yuv, yuvStride, &roi) != PRIMITIVES_SUCCESS) + goto fail; + + for (x=0; x<3; x++) + { + size_t halfStride = ((x>0)?awidth/2:awidth); + size_t size = aheight * awidth; + size_t halfSize = ((x>0)?halfStride*aheight/2:awidth*aheight); + + if (!check_padding(luma[x], halfSize, padding, "luma")) + goto fail; + if (!check_padding(chroma[x], halfSize, padding, "chroma")) + goto fail; + if (!check_padding(yuv[x], size, padding, "yuv")) + goto fail; + } + + if (prims->YUV444SplitToYUV420(yuv, yuvStride, pmain, lumaStride, + paux, chromaStride, &roi) != PRIMITIVES_SUCCESS) + goto fail; + + for (x=0; x<3; x++) + { + size_t halfStride = ((x>0)?awidth/2:awidth); + size_t size = aheight * awidth; + size_t halfSize = ((x>0)?halfStride*aheight/2:awidth*aheight); + + if (!check_padding(pmain[x], halfSize, padding, "main")) + goto fail; + if (!check_padding(paux[x], halfSize, padding, "aux")) + goto fail; + if (!check_padding(yuv[x], size, padding, "yuv")) + goto fail; + } + + for (i=0; i<3; i++) + { + for (y=0; y 0) + { + w = (roi.width+3) / 4; + if (roi.height > (roi.height+1)/2) + continue; + } + + if (!similar(luma[i] + y * lstride, + pmain[i] + y * lstride, + w)) + goto fail; + + /* Need to ignore lines of destination Y plane, + * if the lines are not a multiple of 16 + * as the UV planes are packed in 8 line stripes. */ + if (i == 0) + { + /* TODO: This check is not perfect, it does not + * include the last V lines packed to the Y + * frame. */ + UINT32 rem = roi.height % 16; + if (y > roi.height - rem) + continue; + } + + if (!similar(chroma[i] + y * cstride, + paux[i] + y * cstride, + w)) + goto fail; + } + } + + rc = TRUE; +fail: + for (x=0; x<3; x++) + { + free_padding(yuv[x], padding); + free_padding(luma[x], padding); + free_padding(chroma[x], padding); + free_padding(pmain[x], padding); + free_padding(paux[x], padding); + } + + return rc; +} + +static BOOL TestPrimitiveYUV(BOOL use444) +{ + BOOL rc = FALSE; + UINT32 x, y; + UINT32 awidth, aheight; + BYTE* yuv[3] = {0}; + UINT32 yuv_step[3]; + prim_size_t roi; + BYTE* rgb = NULL; + BYTE* rgb_dst = NULL; + size_t size; + primitives_t* prims = primitives_get(); + size_t uvsize, uvwidth; + size_t padding = 10000; + size_t stride; + + get_size(&roi.width, &roi.height); + + /* Buffers need to be 16x16 aligned. */ + awidth = roi.width + 16 - roi.width % 16; + aheight = roi.height + 16 - roi.height % 16; + + stride = awidth * sizeof(UINT32); + size = awidth * aheight; + if (use444) + { + uvwidth = awidth; + uvsize = size; + if (!prims || !prims->RGBToYUV444_8u_P3AC4R || !prims->YUV444ToRGB_8u_P3AC4R) + return FALSE; + } + else + { + uvwidth = (awidth + 1) / 2; + uvsize = (aheight + 1) / 2 * uvwidth; + if (!prims || !prims->RGBToYUV420_8u_P3AC4R || !prims->YUV420ToRGB_8u_P3AC4R) + return FALSE; + } + + fprintf(stderr, "Running AVC%s on frame size %lux%lu\n", use444 ? "444" : "420", + roi.width, roi.height); + + /* Test RGB to YUV444 conversion and vice versa */ + if (!(rgb = set_padding(size * sizeof(UINT32), padding))) + goto fail; + + if (!(rgb_dst = set_padding(size * sizeof(UINT32), padding))) + goto fail; + + if (!(yuv[0] = set_padding(size, padding))) + goto fail; + + if (!(yuv[1] = set_padding(uvsize, padding))) + goto fail; + + if (!(yuv[2] = set_padding(uvsize, padding))) + goto fail; + + for (y=0; yRGBToYUV444_8u_P3AC4R(rgb, stride, yuv, yuv_step, &roi) != PRIMITIVES_SUCCESS) + goto fail; + } + else if (prims->RGBToYUV420_8u_P3AC4R(rgb, stride, yuv, yuv_step, &roi) != PRIMITIVES_SUCCESS) + goto fail; + + if (!check_padding(rgb, size * sizeof(UINT32), padding, "rgb")) + goto fail; + + if ((!check_padding(yuv[0], size, padding, "Y")) || + (!check_padding(yuv[1], uvsize, padding, "U")) || + (!check_padding(yuv[2], uvsize, padding, "V"))) + goto fail; + + if (use444) + { + if (prims->YUV444ToRGB_8u_P3AC4R((const BYTE**)yuv, yuv_step, rgb_dst, stride, &roi) != PRIMITIVES_SUCCESS) + goto fail; + } + else if (prims->YUV420ToRGB_8u_P3AC4R((const BYTE**)yuv, yuv_step, rgb_dst, stride, &roi) != PRIMITIVES_SUCCESS) + goto fail; + + if (!check_padding(rgb_dst, size * sizeof(UINT32), padding, "rgb dst")) + goto fail; + + if ((!check_padding(yuv[0], size, padding, "Y")) || + (!check_padding(yuv[1], uvsize, padding, "U")) || + (!check_padding(yuv[2], uvsize, padding, "V"))) + goto fail; + + for (y=0; y