Merge pull request #3701 from akallabeth/progressive_inlined

[performance] Progressive inlined
This commit is contained in:
Martin Fleisz 2017-01-16 10:36:28 +01:00 committed by GitHub
commit e218dc03af
10 changed files with 111 additions and 189 deletions

View File

@ -108,7 +108,6 @@ set(CODEC_SRCS
codec/rfx_constants.h
codec/rfx_decode.c
codec/rfx_decode.h
codec/rfx_differential.c
codec/rfx_differential.h
codec/rfx_dwt.c
codec/rfx_dwt.h

View File

@ -43,45 +43,36 @@ static const char* progressive_get_block_type_string(UINT16 blockType)
{
case PROGRESSIVE_WBT_SYNC:
return "PROGRESSIVE_WBT_SYNC";
break;
case PROGRESSIVE_WBT_FRAME_BEGIN:
return "PROGRESSIVE_WBT_FRAME_BEGIN";
break;
case PROGRESSIVE_WBT_FRAME_END:
return "PROGRESSIVE_WBT_FRAME_END";
break;
case PROGRESSIVE_WBT_CONTEXT:
return "PROGRESSIVE_WBT_CONTEXT";
break;
case PROGRESSIVE_WBT_REGION:
return "PROGRESSIVE_WBT_REGION";
break;
case PROGRESSIVE_WBT_TILE_SIMPLE:
return "PROGRESSIVE_WBT_TILE_SIMPLE";
break;
case PROGRESSIVE_WBT_TILE_FIRST:
return "PROGRESSIVE_WBT_TILE_FIRST";
break;
case PROGRESSIVE_WBT_TILE_UPGRADE:
return "PROGRESSIVE_WBT_TILE_UPGRADE";
break;
default:
return "PROGRESSIVE_WBT_UNKNOWN";
break;
}
return "PROGRESSIVE_WBT_UNKNOWN";
}
static void progressive_component_codec_quant_read(const BYTE* block,
static INLINE void progressive_component_codec_quant_read(const BYTE* block,
RFX_COMPONENT_CODEC_QUANT* quantVal)
{
quantVal->LL3 = block[0] & 0x0F;
@ -96,7 +87,7 @@ static void progressive_component_codec_quant_read(const BYTE* block,
quantVal->HH1 = block[4] >> 4;
}
static void progressive_rfx_quant_ladd(RFX_COMPONENT_CODEC_QUANT* q, int val)
static INLINE void progressive_rfx_quant_ladd(RFX_COMPONENT_CODEC_QUANT* q, int val)
{
q->HL1 += val; /* HL1 */
q->LH1 += val; /* LH1 */
@ -110,9 +101,9 @@ static void progressive_rfx_quant_ladd(RFX_COMPONENT_CODEC_QUANT* q, int val)
q->LL3 += val; /* LL3 */
}
static void progressive_rfx_quant_add(RFX_COMPONENT_CODEC_QUANT* q1,
RFX_COMPONENT_CODEC_QUANT* q2,
RFX_COMPONENT_CODEC_QUANT* dst)
static INLINE void progressive_rfx_quant_add(RFX_COMPONENT_CODEC_QUANT* q1,
RFX_COMPONENT_CODEC_QUANT* q2,
RFX_COMPONENT_CODEC_QUANT* dst)
{
dst->HL1 = q1->HL1 + q2->HL1; /* HL1 */
dst->LH1 = q1->LH1 + q2->LH1; /* LH1 */
@ -126,7 +117,7 @@ static void progressive_rfx_quant_add(RFX_COMPONENT_CODEC_QUANT* q1,
dst->LL3 = q1->LL3 + q2->LL3; /* LL3 */
}
static void progressive_rfx_quant_lsub(RFX_COMPONENT_CODEC_QUANT* q, int val)
static INLINE void progressive_rfx_quant_lsub(RFX_COMPONENT_CODEC_QUANT* q, int val)
{
q->HL1 -= val; /* HL1 */
q->LH1 -= val; /* LH1 */
@ -140,9 +131,9 @@ static void progressive_rfx_quant_lsub(RFX_COMPONENT_CODEC_QUANT* q, int val)
q->LL3 -= val; /* LL3 */
}
static void progressive_rfx_quant_sub(RFX_COMPONENT_CODEC_QUANT* q1,
RFX_COMPONENT_CODEC_QUANT* q2,
RFX_COMPONENT_CODEC_QUANT* dst)
static INLINE void progressive_rfx_quant_sub(RFX_COMPONENT_CODEC_QUANT* q1,
RFX_COMPONENT_CODEC_QUANT* q2,
RFX_COMPONENT_CODEC_QUANT* dst)
{
dst->HL1 = q1->HL1 - q2->HL1; /* HL1 */
dst->LH1 = q1->LH1 - q2->LH1; /* LH1 */
@ -156,7 +147,7 @@ static void progressive_rfx_quant_sub(RFX_COMPONENT_CODEC_QUANT* q1,
dst->LL3 = q1->LL3 - q2->LL3; /* LL3 */
}
static BOOL progressive_rfx_quant_lcmp_less_equal(RFX_COMPONENT_CODEC_QUANT* q,
static INLINE BOOL progressive_rfx_quant_lcmp_less_equal(RFX_COMPONENT_CODEC_QUANT* q,
int val)
{
if (q->HL1 > val) return FALSE; /* HL1 */
@ -182,7 +173,7 @@ static BOOL progressive_rfx_quant_lcmp_less_equal(RFX_COMPONENT_CODEC_QUANT* q,
return TRUE;
}
static BOOL progressive_rfx_quant_cmp_less_equal(RFX_COMPONENT_CODEC_QUANT* q1,
static INLINE BOOL progressive_rfx_quant_cmp_less_equal(RFX_COMPONENT_CODEC_QUANT* q1,
RFX_COMPONENT_CODEC_QUANT* q2)
{
if (q1->HL1 > q2->HL1) return FALSE; /* HL1 */
@ -208,7 +199,7 @@ static BOOL progressive_rfx_quant_cmp_less_equal(RFX_COMPONENT_CODEC_QUANT* q1,
return TRUE;
}
static BOOL progressive_rfx_quant_lcmp_greater_equal(RFX_COMPONENT_CODEC_QUANT*
static INLINE BOOL progressive_rfx_quant_lcmp_greater_equal(RFX_COMPONENT_CODEC_QUANT*
q,
int val)
{
@ -235,7 +226,7 @@ static BOOL progressive_rfx_quant_lcmp_greater_equal(RFX_COMPONENT_CODEC_QUANT*
return TRUE;
}
static BOOL progressive_rfx_quant_cmp_greater_equal(RFX_COMPONENT_CODEC_QUANT*
static INLINE BOOL progressive_rfx_quant_cmp_greater_equal(RFX_COMPONENT_CODEC_QUANT*
q1,
RFX_COMPONENT_CODEC_QUANT* q2)
{
@ -262,7 +253,7 @@ static BOOL progressive_rfx_quant_cmp_greater_equal(RFX_COMPONENT_CODEC_QUANT*
return TRUE;
}
static BOOL progressive_rfx_quant_cmp_equal(RFX_COMPONENT_CODEC_QUANT* q1,
static INLINE BOOL progressive_rfx_quant_cmp_equal(RFX_COMPONENT_CODEC_QUANT* q1,
RFX_COMPONENT_CODEC_QUANT* q2)
{
if (q1->HL1 != q2->HL1) return FALSE; /* HL1 */
@ -297,8 +288,8 @@ static void progressive_rfx_quant_print(RFX_COMPONENT_CODEC_QUANT* q,
q->LL3);
}
static int progressive_set_surface_data(PROGRESSIVE_CONTEXT* progressive,
UINT16 surfaceId, void* pData)
static INLINE int progressive_set_surface_data(PROGRESSIVE_CONTEXT* progressive,
UINT16 surfaceId, void* pData)
{
ULONG_PTR key;
key = ((ULONG_PTR) surfaceId) + 1;
@ -311,7 +302,7 @@ static int progressive_set_surface_data(PROGRESSIVE_CONTEXT* progressive,
return 1;
}
static void* progressive_get_surface_data(PROGRESSIVE_CONTEXT* progressive,
static INLINE void* progressive_get_surface_data(PROGRESSIVE_CONTEXT* progressive,
UINT16 surfaceId)
{
ULONG_PTR key;
@ -427,10 +418,10 @@ int progressive_delete_surface_context(PROGRESSIVE_CONTEXT* progressive,
* LL3 4015 9x9 81
*/
static void progressive_rfx_idwt_x(INT16* pLowBand, int nLowStep,
INT16* pHighBand,
int nHighStep, INT16* pDstBand, int nDstStep,
int nLowCount, int nHighCount, int nDstCount)
static INLINE void progressive_rfx_idwt_x(INT16* pLowBand, int nLowStep,
INT16* pHighBand,
int nHighStep, INT16* pDstBand, int nDstStep,
int nLowCount, int nHighCount, int nDstCount)
{
int i, j;
INT16 L0;
@ -501,10 +492,10 @@ static void progressive_rfx_idwt_x(INT16* pLowBand, int nLowStep,
}
}
static void progressive_rfx_idwt_y(INT16* pLowBand, int nLowStep,
INT16* pHighBand,
int nHighStep, INT16* pDstBand, int nDstStep,
int nLowCount, int nHighCount, int nDstCount)
static INLINE void progressive_rfx_idwt_y(INT16* pLowBand, int nLowStep,
INT16* pHighBand,
int nHighStep, INT16* pDstBand, int nDstStep,
int nLowCount, int nHighCount, int nDstCount)
{
int i, j;
INT16 L0;
@ -585,12 +576,12 @@ static void progressive_rfx_idwt_y(INT16* pLowBand, int nLowStep,
}
}
static int progressive_rfx_get_band_l_count(int level)
static INLINE int progressive_rfx_get_band_l_count(int level)
{
return (64 >> level) + 1;
}
static int progressive_rfx_get_band_h_count(int level)
static INLINE int progressive_rfx_get_band_h_count(int level)
{
if (level == 1)
return (64 >> 1) - 1;
@ -598,7 +589,7 @@ static int progressive_rfx_get_band_h_count(int level)
return (64 + (1 << (level - 1))) >> level;
}
static void progressive_rfx_dwt_2d_decode_block(INT16* buffer, INT16* temp,
static INLINE void progressive_rfx_dwt_2d_decode_block(INT16* buffer, INT16* temp,
int level)
{
int offset;
@ -675,7 +666,7 @@ static void progressive_rfx_dwt_2d_decode_block(INT16* buffer, INT16* temp,
pDstBand[2], nDstStep[2], nLowCount[2], nHighCount[2], nDstCount[2]);
}
static void progressive_rfx_dwt_2d_decode(INT16* buffer, INT16* temp,
static INLINE void progressive_rfx_dwt_2d_decode(INT16* buffer, INT16* temp,
INT16* current, INT16* sign, BOOL diff)
{
const primitives_t* prims = primitives_get();
@ -689,7 +680,7 @@ static void progressive_rfx_dwt_2d_decode(INT16* buffer, INT16* temp,
progressive_rfx_dwt_2d_decode_block(&buffer[0], temp, 1);
}
static void progressive_rfx_decode_block(const primitives_t* prims,
static INLINE void progressive_rfx_decode_block(const primitives_t* prims,
INT16* buffer,
int length, UINT32 shift)
{
@ -699,7 +690,7 @@ static void progressive_rfx_decode_block(const primitives_t* prims,
prims->lShiftC_16s(buffer, shift, buffer, length);
}
static int progressive_rfx_decode_component(PROGRESSIVE_CONTEXT* progressive,
static INLINE int progressive_rfx_decode_component(PROGRESSIVE_CONTEXT* progressive,
RFX_COMPONENT_CODEC_QUANT* shift,
const BYTE* data, int length,
INT16* buffer, INT16* current,
@ -731,7 +722,7 @@ static int progressive_rfx_decode_component(PROGRESSIVE_CONTEXT* progressive,
return 1;
}
static int progressive_decompress_tile_first(PROGRESSIVE_CONTEXT* progressive,
static INLINE int progressive_decompress_tile_first(PROGRESSIVE_CONTEXT* progressive,
RFX_PROGRESSIVE_TILE* tile)
{
BOOL diff;
@ -881,8 +872,8 @@ struct _RFX_PROGRESSIVE_UPGRADE_STATE
};
typedef struct _RFX_PROGRESSIVE_UPGRADE_STATE RFX_PROGRESSIVE_UPGRADE_STATE;
static INT16 progressive_rfx_srl_read(RFX_PROGRESSIVE_UPGRADE_STATE* state,
UINT32 numBits)
static INLINE INT16 progressive_rfx_srl_read(RFX_PROGRESSIVE_UPGRADE_STATE* state,
UINT32 numBits)
{
int k;
UINT32 bit;
@ -968,7 +959,7 @@ static INT16 progressive_rfx_srl_read(RFX_PROGRESSIVE_UPGRADE_STATE* state,
return sign ? -1 * mag : mag;
}
static int progressive_rfx_upgrade_state_finish(RFX_PROGRESSIVE_UPGRADE_STATE*
static INLINE int progressive_rfx_upgrade_state_finish(RFX_PROGRESSIVE_UPGRADE_STATE*
state)
{
int pad;
@ -993,7 +984,7 @@ static int progressive_rfx_upgrade_state_finish(RFX_PROGRESSIVE_UPGRADE_STATE*
return 1;
}
static int progressive_rfx_upgrade_block(RFX_PROGRESSIVE_UPGRADE_STATE* state,
static INLINE int progressive_rfx_upgrade_block(RFX_PROGRESSIVE_UPGRADE_STATE* state,
INT16* buffer, INT16* sign, UINT32 length,
UINT32 shift, UINT32 bitPos, UINT32 numBits)
{
@ -1051,7 +1042,7 @@ static int progressive_rfx_upgrade_block(RFX_PROGRESSIVE_UPGRADE_STATE* state,
return 1;
}
static int progressive_rfx_upgrade_component(PROGRESSIVE_CONTEXT* progressive,
static INLINE int progressive_rfx_upgrade_component(PROGRESSIVE_CONTEXT* progressive,
RFX_COMPONENT_CODEC_QUANT* shift,
RFX_COMPONENT_CODEC_QUANT* bitPos,
RFX_COMPONENT_CODEC_QUANT* numBits,
@ -1115,7 +1106,8 @@ static int progressive_rfx_upgrade_component(PROGRESSIVE_CONTEXT* progressive,
if (srlLen)
pSrlLen = (int)((((float) aSrlLen) / ((float) srlLen)) * 100.0f);
WLog_INFO(TAG, "RAW: %"PRIu32"/%"PRIu32" %d%% (%"PRIu32"/%"PRIu32":%"PRIu32")\tSRL: %"PRIu32"/%"PRIu32" %d%% (%"PRIu32"/%"PRIu32":%"PRIu32")",
WLog_INFO(TAG,
"RAW: %"PRIu32"/%"PRIu32" %d%% (%"PRIu32"/%"PRIu32":%"PRIu32")\tSRL: %"PRIu32"/%"PRIu32" %d%% (%"PRIu32"/%"PRIu32":%"PRIu32")",
aRawLen, rawLen, pRawLen, state.raw->position, rawLen * 8,
(rawLen * 8) - state.raw->position,
aSrlLen, srlLen, pSrlLen, state.srl->position, srlLen * 8,
@ -1132,7 +1124,7 @@ static int progressive_rfx_upgrade_component(PROGRESSIVE_CONTEXT* progressive,
return 1;
}
static int progressive_decompress_tile_upgrade(PROGRESSIVE_CONTEXT* progressive,
static INLINE int progressive_decompress_tile_upgrade(PROGRESSIVE_CONTEXT* progressive,
RFX_PROGRESSIVE_TILE* tile)
{
int status;
@ -1287,9 +1279,9 @@ static int progressive_decompress_tile_upgrade(PROGRESSIVE_CONTEXT* progressive,
return 1;
}
static int progressive_process_tiles(PROGRESSIVE_CONTEXT* progressive,
const BYTE* blocks, UINT32 blocksLen,
const PROGRESSIVE_SURFACE_CONTEXT* surface)
static INLINE int progressive_process_tiles(PROGRESSIVE_CONTEXT* progressive,
const BYTE* blocks, UINT32 blocksLen,
const PROGRESSIVE_SURFACE_CONTEXT* surface)
{
int status = -1;
const BYTE* block;

View File

@ -1,53 +0,0 @@
/**
* FreeRDP: A Remote Desktop Protocol Implementation
* RemoteFX Codec Library - Differential Encoding
*
* Copyright 2011 Vic Lee
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "rfx_differential.h"
void rfx_differential_decode(INT16* buffer, int size)
{
INT16* ptr = buffer;
INT16* end = &buffer[size - 1];
while (ptr != end)
{
ptr[1] += ptr[0];
ptr++;
}
}
void rfx_differential_encode(INT16* buffer, int size)
{
INT16 n1, n2;
INT16* dst;
for (n1 = *buffer, dst = buffer + 1; size > 1; dst++, size--)
{
n2 = *dst;
*dst -= n1;
n1 = n2;
}
}

View File

@ -23,7 +23,29 @@
#include <freerdp/codec/rfx.h>
#include <freerdp/api.h>
FREERDP_LOCAL void rfx_differential_decode(INT16* buffer, int size);
FREERDP_LOCAL void rfx_differential_encode(INT16* buffer, int size);
static INLINE void rfx_differential_decode(INT16* buffer, int size)
{
INT16* ptr = buffer;
INT16* end = &buffer[size - 1];
while (ptr != end)
{
ptr[1] += ptr[0];
ptr++;
}
}
static INLINE void rfx_differential_encode(INT16* buffer, int size)
{
INT16 n1, n2;
INT16* dst;
for (n1 = *buffer, dst = buffer + 1; size > 1; dst++, size--)
{
n2 = *dst;
*dst -= n1;
n1 = n2;
}
}
#endif /* __RFX_DIFFERENTIAL_H */

View File

@ -26,11 +26,6 @@
#include "prim_internal.h"
#ifndef MINMAX
#define MINMAX(_v_, _l_, _h_) \
((_v_) < (_l_) ? (_l_) : ((_v_) > (_h_) ? (_h_) : (_v_)))
#endif /* !MINMAX */
/* ------------------------------------------------------------------------- */
static pstatus_t general_YCoCgToRGB_8u_AC4R(
const BYTE* pSrc, INT32 srcStep,
@ -67,8 +62,8 @@ static pstatus_t general_YCoCgToRGB_8u_AC4R(
R = T + Co;
G = Y + Cg;
B = T - Co;
dptr = (*writePixel)(dptr, formatSize, DstFormat, MINMAX(R, 0, 255),
MINMAX(G, 0, 255), MINMAX(B, 0, 255), A);
dptr = (*writePixel)(dptr, formatSize, DstFormat, CLIP(R),
CLIP(G), CLIP(B), A);
}
sptr += srcPad;

View File

@ -25,17 +25,6 @@
#include <freerdp/codec/color.h>
#include "prim_internal.h"
static INLINE BYTE CLIP(INT32 X)
{
if (X > 255L)
return 255L;
if (X < 0L)
return 0L;
return X;
}
/**
* @brief general_YUV420CombineToYUV444
*

View File

@ -35,11 +35,9 @@ static pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(
UINT32 i, nWidth, nHeight, VaddDst, VaddY, VaddU, VaddV;
__m128i r0, r1, r2, r3, r4, r5, r6, r7;
__m128i* buffer;
// TODO: Need to implement proper color conversion!!!!!
return generic->YUV420ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep,
DstFormat, roi);
DstFormat, roi);
/* last_line: if the last (U,V doubled) line should be skipped, set to 10B
* last_column: if it's the last column in a line, set to 10B (for handling line-endings not multiple by four) */
buffer = _aligned_malloc(4 * 16, 16);

View File

@ -31,7 +31,6 @@
#define MINMAX(_v_, _l_, _h_) \
((_v_) < (_l_) ? (_l_) : ((_v_) > (_h_) ? (_h_) : (_v_)))
#endif /* !MINMAX */
/* ------------------------------------------------------------------------- */
static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R(
const INT16* pSrc[3], UINT32 srcStep,
@ -60,23 +59,8 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R(
R = ((INT16)(((Cr * 1.402525f) + Y + 16.0f)) >> 5);
G = ((INT16)((Y - (Cb * 0.343730f) - (Cr * 0.714401f) + 16.0f)) >> 5);
B = ((INT16)(((Cb * 1.769905f) + Y + 16.0f)) >> 5);
if (R < 0)
R = 0;
else if (R > 255)
R = 255;
if (G < 0)
G = 0;
else if (G > 255)
G = 255;
if (B < 0)
B = 0;
else if (B > 255)
B = 255;
pRGB = (*writePixel)(pRGB, formatSize, DstFormat, R, G, B, 0xFF);
pRGB = (*writePixel)(pRGB, formatSize, DstFormat, CLIP(R), CLIP(G),
CLIP(B), 0xFF);
pY++;
pCb++;
pCr++;
@ -118,23 +102,8 @@ static pstatus_t general_yCbCrToBGR_16s8u_P3AC4R(
R = ((INT16)(((Cr * 1.402525f) + Y + 16.0f)) >> 5);
G = ((INT16)((Y - (Cb * 0.343730f) - (Cr * 0.714401f) + 16.0f)) >> 5);
B = ((INT16)(((Cb * 1.769905f) + Y + 16.0f)) >> 5);
if (R < 0)
R = 0;
else if (R > 255)
R = 255;
if (G < 0)
G = 0;
else if (G > 255)
G = 255;
if (B < 0)
B = 0;
else if (B > 255)
B = 255;
pRGB = (*writePixel)(pRGB, formatSize, DstFormat, R, G, B, 0xFF);
pRGB = (*writePixel)(pRGB, formatSize, DstFormat, CLIP(R), CLIP(G),
CLIP(B), 0xFF);
pY++;
pCb++;
pCr++;
@ -196,9 +165,9 @@ static pstatus_t general_yCbCrToRGB_16s16s_P3P3(
* r = y + cr*1.403f;
* g = y - cb*0.344f - cr*0.714f;
* b = y + cb*1.770f;
* y_r_buf[i] = MINMAX(r>>5, 0, 255);
* cb_g_buf[i] = MINMAX(g>>5, 0, 255);
* cr_b_buf[i] = MINMAX(b>>5, 0, 255);
* y_r_buf[i] = CLIP(r>>5);
* cb_g_buf[i] = CLIP(g>>5);
* cr_b_buf[i] = CLIP(b>>5);
*/
/*
* We scale the factors by << 16 into 32-bit integers in order to
@ -214,9 +183,9 @@ static pstatus_t general_yCbCrToRGB_16s16s_P3P3(
r = y + cr * 91947;
g = y - cb * 22544 - cr * 46792;
b = y + cb * 115998;
*rptr++ = MINMAX(r >> 21, 0, 255);
*gptr++ = MINMAX(g >> 21, 0, 255);
*bptr++ = MINMAX(b >> 21, 0, 255);
*rptr++ = CLIP(r >> 21);
*gptr++ = CLIP(g >> 21);
*bptr++ = CLIP(b >> 21);
}
yptr += srcbump;

View File

@ -172,20 +172,20 @@ static pstatus_t sse2_yCbCrToRGB_16s16s_P3P3(
/* (y + HIWORD(cr*22986)) >> 3 */
r = _mm_add_epi16(y, _mm_mulhi_epi16(cr, r_cr));
r = _mm_srai_epi16(r, 3);
/* r_buf[i] = MINMAX(r, 0, 255); */
/* r_buf[i] = CLIP(r); */
_mm_between_epi16(r, zero, max);
_mm_store_si128(r_buf + i, r);
/* (y + HIWORD(cb*-5636) + HIWORD(cr*-11698)) >> 3 */
g = _mm_add_epi16(y, _mm_mulhi_epi16(cb, g_cb));
g = _mm_add_epi16(g, _mm_mulhi_epi16(cr, g_cr));
g = _mm_srai_epi16(g, 3);
/* g_buf[i] = MINMAX(g, 0, 255); */
/* g_buf[i] = CLIP(g); */
_mm_between_epi16(g, zero, max);
_mm_store_si128(g_buf + i, g);
/* (y + HIWORD(cb*28999)) >> 3 */
b = _mm_add_epi16(y, _mm_mulhi_epi16(cb, b_cb));
b = _mm_srai_epi16(b, 3);
/* b_buf[i] = MINMAX(b, 0, 255); */
/* b_buf[i] = CLIP(b); */
_mm_between_epi16(b, zero, max);
_mm_store_si128(b_buf + i, b);
}
@ -517,20 +517,20 @@ static pstatus_t neon_yCbCrToRGB_16s16s_P3P3(
/* (y + HIWORD(cr*22986)) >> 3 */
int16x8_t r = vaddq_s16(y, vshrq_n_s16(vqdmulhq_s16(cr, r_cr), 1));
r = vshrq_n_s16(r, 3);
/* r_buf[i] = MINMAX(r, 0, 255); */
/* r_buf[i] = CLIP(r); */
r = vminq_s16(vmaxq_s16(r, zero), max);
vst1q_s16((INT16*)&r_buf[i], r);
/* (y + HIWORD(cb*-5636) + HIWORD(cr*-11698)) >> 3 */
int16x8_t g = vaddq_s16(y, vshrq_n_s16(vqdmulhq_s16(cb, g_cb), 1));
g = vaddq_s16(g, vshrq_n_s16(vqdmulhq_s16(cr, g_cr), 1));
g = vshrq_n_s16(g, 3);
/* g_buf[i] = MINMAX(g, 0, 255); */
/* g_buf[i] = CLIP(g); */
g = vminq_s16(vmaxq_s16(g, zero), max);
vst1q_s16((INT16*)&g_buf[i], g);
/* (y + HIWORD(cb*28999)) >> 3 */
int16x8_t b = vaddq_s16(y, vshrq_n_s16(vqdmulhq_s16(cb, b_cb), 1));
b = vshrq_n_s16(b, 3);
/* b_buf[i] = MINMAX(b, 0, 255); */
/* b_buf[i] = CLIP(b); */
b = vminq_s16(vmaxq_s16(b, zero), max);
vst1q_s16((INT16*)&b_buf[i], b);
}

View File

@ -88,27 +88,38 @@ static INLINE fkt_writePixel getPixelWriteFunction(DWORD format)
{
switch (format)
{
case PIXEL_FORMAT_ARGB32:
case PIXEL_FORMAT_XRGB32:
return writePixelXRGB;
case PIXEL_FORMAT_ARGB32:
case PIXEL_FORMAT_XRGB32:
return writePixelXRGB;
case PIXEL_FORMAT_ABGR32:
case PIXEL_FORMAT_XBGR32:
return writePixelXBGR;
case PIXEL_FORMAT_ABGR32:
case PIXEL_FORMAT_XBGR32:
return writePixelXBGR;
case PIXEL_FORMAT_RGBA32:
case PIXEL_FORMAT_RGBX32:
return writePixelRGBX;
case PIXEL_FORMAT_RGBA32:
case PIXEL_FORMAT_RGBX32:
return writePixelRGBX;
case PIXEL_FORMAT_BGRA32:
case PIXEL_FORMAT_BGRX32:
return writePixelBGRX;
case PIXEL_FORMAT_BGRA32:
case PIXEL_FORMAT_BGRX32:
return writePixelBGRX;
default:
return writePixelGeneric;
default:
return writePixelGeneric;
}
}
static INLINE BYTE CLIP(INT32 X)
{
if (X > 255L)
return 255L;
if (X < 0L)
return 0L;
return X;
}
/* Function prototypes for all the init/deinit routines. */
FREERDP_LOCAL void primitives_init_copy(primitives_t* prims);
FREERDP_LOCAL void primitives_init_set(primitives_t* prims);