From 2e672d5bdbc09d6fa34005118ecf299cf39a9a5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Moreau?= Date: Mon, 18 Aug 2014 21:10:56 -0400 Subject: [PATCH] libfreerdp-codec: optimize RemoteFX dequantization and differentiation --- libfreerdp/codec/progressive.c | 23 ++++++-------- libfreerdp/codec/rfx_differential.c | 15 ++++----- libfreerdp/codec/rfx_differential.h | 4 +-- libfreerdp/codec/rfx_neon.c | 25 +++++++-------- libfreerdp/codec/rfx_quantization.c | 47 ++++++++++++++--------------- libfreerdp/codec/rfx_sse2.c | 24 +++++++-------- 6 files changed, 64 insertions(+), 74 deletions(-) diff --git a/libfreerdp/codec/progressive.c b/libfreerdp/codec/progressive.c index c9e3adf1d..c36b29a8a 100644 --- a/libfreerdp/codec/progressive.c +++ b/libfreerdp/codec/progressive.c @@ -107,19 +107,16 @@ int progressive_rfx_decode_component(PROGRESSIVE_CONTEXT* progressive, rfx_differential_decode(&buffer[4015], 81); /* LL3 */ - /* Scale the values so that they are represented as 11.5 fixed-point number */ - rfx_quantization_decode_block(prims, buffer, 4096, 5); - - rfx_quantization_decode_block(prims, &buffer[0], 1023, (quant->HL1 - 6)); /* HL1 */ - rfx_quantization_decode_block(prims, &buffer[1023], 1023, (quant->LH1 - 6)); /* LH1 */ - rfx_quantization_decode_block(prims, &buffer[2046], 961, (quant->HH1 - 6)); /* HH1 */ - rfx_quantization_decode_block(prims, &buffer[3007], 272, (quant->HL2 - 6)); /* HL2 */ - rfx_quantization_decode_block(prims, &buffer[3279], 272, (quant->LH2 - 6)); /* LH2 */ - rfx_quantization_decode_block(prims, &buffer[3551], 256, (quant->HH2 - 6)); /* HH2 */ - rfx_quantization_decode_block(prims, &buffer[3807], 72, (quant->HL3 - 6)); /* HL3 */ - rfx_quantization_decode_block(prims, &buffer[3879], 72, (quant->LH3 - 6)); /* LH3 */ - rfx_quantization_decode_block(prims, &buffer[3951], 64, (quant->HH3 - 6)); /* HH3 */ - rfx_quantization_decode_block(prims, &buffer[4015], 81, (quant->LL3 - 6)); /* LL3 */ + rfx_quantization_decode_block(prims, &buffer[0], 1023, (quant->HL1 - 1)); /* HL1 */ + rfx_quantization_decode_block(prims, &buffer[1023], 1023, (quant->LH1 - 1)); /* LH1 */ + rfx_quantization_decode_block(prims, &buffer[2046], 961, (quant->HH1 - 1)); /* HH1 */ + rfx_quantization_decode_block(prims, &buffer[3007], 272, (quant->HL2 - 1)); /* HL2 */ + rfx_quantization_decode_block(prims, &buffer[3279], 272, (quant->LH2 - 1)); /* LH2 */ + rfx_quantization_decode_block(prims, &buffer[3551], 256, (quant->HH2 - 1)); /* HH2 */ + rfx_quantization_decode_block(prims, &buffer[3807], 72, (quant->HL3 - 1)); /* HL3 */ + rfx_quantization_decode_block(prims, &buffer[3879], 72, (quant->LH3 - 1)); /* LH3 */ + rfx_quantization_decode_block(prims, &buffer[3951], 64, (quant->HH3 - 1)); /* HH3 */ + rfx_quantization_decode_block(prims, &buffer[4015], 81, (quant->LL3 - 1)); /* LL3 */ return 1; } diff --git a/libfreerdp/codec/rfx_differential.c b/libfreerdp/codec/rfx_differential.c index e80aa98f3..4429fb8c0 100644 --- a/libfreerdp/codec/rfx_differential.c +++ b/libfreerdp/codec/rfx_differential.c @@ -27,23 +27,24 @@ #include "rfx_differential.h" -void rfx_differential_decode(INT16* buffer, int buffer_size) +void rfx_differential_decode(INT16* buffer, int size) { - INT16* src; - INT16* dst; + INT16* ptr = buffer; + INT16* end = &buffer[size - 1]; - for (src = buffer, dst = buffer + 1; buffer_size > 1; src++, dst++, buffer_size--) + while (ptr != end) { - *dst += *src; + ptr[1] += ptr[0]; + ptr++; } } -void rfx_differential_encode(INT16* buffer, int buffer_size) +void rfx_differential_encode(INT16* buffer, int size) { INT16 n1, n2; INT16* dst; - for (n1 = *buffer, dst = buffer + 1; buffer_size > 1; dst++, buffer_size--) + for (n1 = *buffer, dst = buffer + 1; size > 1; dst++, size--) { n2 = *dst; *dst -= n1; diff --git a/libfreerdp/codec/rfx_differential.h b/libfreerdp/codec/rfx_differential.h index 3f6dbf623..47e1a4241 100644 --- a/libfreerdp/codec/rfx_differential.h +++ b/libfreerdp/codec/rfx_differential.h @@ -22,7 +22,7 @@ #include -void rfx_differential_decode(INT16* buffer, int buffer_size); -void rfx_differential_encode(INT16* buffer, int buffer_size); +void rfx_differential_decode(INT16* buffer, int size); +void rfx_differential_encode(INT16* buffer, int size); #endif /* __RFX_DIFFERENTIAL_H */ diff --git a/libfreerdp/codec/rfx_neon.c b/libfreerdp/codec/rfx_neon.c index 6fe8ec83d..1632f983a 100644 --- a/libfreerdp/codec/rfx_neon.c +++ b/libfreerdp/codec/rfx_neon.c @@ -51,21 +51,18 @@ rfx_quantization_decode_block_NEON(INT16 * buffer, const int buffer_size, const while(buf < buf_end); } -void -rfx_quantization_decode_NEON(INT16 * buffer, const UINT32 * quantization_values) +void rfx_quantization_decode_NEON(INT16 * buffer, const UINT32 * quantVals) { - rfx_quantization_decode_block_NEON(buffer, 4096, 5); - - rfx_quantization_decode_block_NEON(buffer, 1024, quantization_values[8] - 6); /* HL1 */ - rfx_quantization_decode_block_NEON(buffer + 1024, 1024, quantization_values[7] - 6); /* LH1 */ - rfx_quantization_decode_block_NEON(buffer + 2048, 1024, quantization_values[9] - 6); /* HH1 */ - rfx_quantization_decode_block_NEON(buffer + 3072, 256, quantization_values[5] - 6); /* HL2 */ - rfx_quantization_decode_block_NEON(buffer + 3328, 256, quantization_values[4] - 6); /* LH2 */ - rfx_quantization_decode_block_NEON(buffer + 3584, 256, quantization_values[6] - 6); /* HH2 */ - rfx_quantization_decode_block_NEON(buffer + 3840, 64, quantization_values[2] - 6); /* HL3 */ - rfx_quantization_decode_block_NEON(buffer + 3904, 64, quantization_values[1] - 6); /* LH3 */ - rfx_quantization_decode_block_NEON(buffer + 3968, 64, quantization_values[3] - 6); /* HH3 */ - rfx_quantization_decode_block_NEON(buffer + 4032, 64, quantization_values[0] - 6); /* LL3 */ + rfx_quantization_decode_block_NEON(&buffer[0], 1024, quantVals[8] - 1); /* HL1 */ + rfx_quantization_decode_block_NEON(&buffer[1024], 1024, quantVals[7] - 1); /* LH1 */ + rfx_quantization_decode_block_NEON(&buffer[2048], 1024, quantVals[9] - 1); /* HH1 */ + rfx_quantization_decode_block_NEON(&buffer[3072], 256, quantVals[5] - 1); /* HL2 */ + rfx_quantization_decode_block_NEON(&buffer[3328], 256, quantVals[4] - 1); /* LH2 */ + rfx_quantization_decode_block_NEON(&buffer[3584], 256, quantVals[6] - 1); /* HH2 */ + rfx_quantization_decode_block_NEON(&buffer[3840], 64, quantVals[2] - 1); /* HL3 */ + rfx_quantization_decode_block_NEON(&buffer[3904], 64, quantVals[1] - 1); /* LH3 */ + rfx_quantization_decode_block_NEON(&buffer[3968], 64, quantVals[3] - 1); /* HH3 */ + rfx_quantization_decode_block_NEON(&buffer[4032], 64, quantVals[0] - 1); /* LL3 */ } diff --git a/libfreerdp/codec/rfx_quantization.c b/libfreerdp/codec/rfx_quantization.c index b5c79e5f2..6c497a65a 100644 --- a/libfreerdp/codec/rfx_quantization.c +++ b/libfreerdp/codec/rfx_quantization.c @@ -26,21 +26,21 @@ #include "rfx_quantization.h" /* - * Band Offset Size + * Band Offset Dimensions Size * - * HL1 0 1024 - * LH1 1024 1024 - * HH1 2048 1024 + * HL1 0 32x32 1024 + * LH1 1024 32x32 1024 + * HH1 2048 32x32 1024 * - * HL2 3072 256 - * LH2 3328 256 - * HH2 3584 256 + * HL2 3072 16x16 256 + * LH2 3328 16x16 256 + * HH2 3584 16x16 256 * - * HL3 3840 64 - * LH3 3904 64 - * HH3 3968 64 + * HL3 3840 8x8 64 + * LH3 3904 8x8 64 + * HH3 3968 8x8 64 * - * LL3 4032 64 + * LL3 4032 8x8 64 */ void rfx_quantization_decode_block(const primitives_t *prims, INT16* buffer, int buffer_size, UINT32 factor) @@ -51,23 +51,20 @@ void rfx_quantization_decode_block(const primitives_t *prims, INT16* buffer, int prims->lShiftC_16s(buffer, factor, buffer, buffer_size); } -void rfx_quantization_decode(INT16* buffer, const UINT32* quantization_values) +void rfx_quantization_decode(INT16* buffer, const UINT32* quantVals) { const primitives_t* prims = primitives_get(); - /* Scale the values so that they are represented as 11.5 fixed-point number */ - rfx_quantization_decode_block(prims, buffer, 4096, 5); - - rfx_quantization_decode_block(prims, buffer, 1024, quantization_values[8] - 6); /* HL1 */ - rfx_quantization_decode_block(prims, buffer + 1024, 1024, quantization_values[7] - 6); /* LH1 */ - rfx_quantization_decode_block(prims, buffer + 2048, 1024, quantization_values[9] - 6); /* HH1 */ - rfx_quantization_decode_block(prims, buffer + 3072, 256, quantization_values[5] - 6); /* HL2 */ - rfx_quantization_decode_block(prims, buffer + 3328, 256, quantization_values[4] - 6); /* LH2 */ - rfx_quantization_decode_block(prims, buffer + 3584, 256, quantization_values[6] - 6); /* HH2 */ - rfx_quantization_decode_block(prims, buffer + 3840, 64, quantization_values[2] - 6); /* HL3 */ - rfx_quantization_decode_block(prims, buffer + 3904, 64, quantization_values[1] - 6); /* LH3 */ - rfx_quantization_decode_block(prims, buffer + 3968, 64, quantization_values[3] - 6); /* HH3 */ - rfx_quantization_decode_block(prims, buffer + 4032, 64, quantization_values[0] - 6); /* LL3 */ + rfx_quantization_decode_block(prims, &buffer[0], 1024, quantVals[8] - 1); /* HL1 */ + rfx_quantization_decode_block(prims, &buffer[1024], 1024, quantVals[7] - 1); /* LH1 */ + rfx_quantization_decode_block(prims, &buffer[2048], 1024, quantVals[9] - 1); /* HH1 */ + rfx_quantization_decode_block(prims, &buffer[3072], 256, quantVals[5] - 1); /* HL2 */ + rfx_quantization_decode_block(prims, &buffer[3328], 256, quantVals[4] - 1); /* LH2 */ + rfx_quantization_decode_block(prims, &buffer[3584], 256, quantVals[6] - 1); /* HH2 */ + rfx_quantization_decode_block(prims, &buffer[3840], 64, quantVals[2] - 1); /* HL3 */ + rfx_quantization_decode_block(prims, &buffer[3904], 64, quantVals[1] - 1); /* LH3 */ + rfx_quantization_decode_block(prims, &buffer[3968], 64, quantVals[3] - 1); /* HH3 */ + rfx_quantization_decode_block(prims, &buffer[4032], 64, quantVals[0] - 1); /* LL3 */ } static void rfx_quantization_encode_block(INT16* buffer, int buffer_size, UINT32 factor) diff --git a/libfreerdp/codec/rfx_sse2.c b/libfreerdp/codec/rfx_sse2.c index 47dbac32f..4f54d17cd 100644 --- a/libfreerdp/codec/rfx_sse2.c +++ b/libfreerdp/codec/rfx_sse2.c @@ -82,22 +82,20 @@ rfx_quantization_decode_block_sse2(INT16* buffer, const int buffer_size, const U } while(ptr < buf_end); } -static void rfx_quantization_decode_sse2(INT16* buffer, const UINT32* quantization_values) +static void rfx_quantization_decode_sse2(INT16* buffer, const UINT32* quantVals) { _mm_prefetch_buffer((char*) buffer, 4096 * sizeof(INT16)); - rfx_quantization_decode_block_sse2(buffer, 4096, 5); - - rfx_quantization_decode_block_sse2(buffer, 1024, quantization_values[8] - 6); /* HL1 */ - rfx_quantization_decode_block_sse2(buffer + 1024, 1024, quantization_values[7] - 6); /* LH1 */ - rfx_quantization_decode_block_sse2(buffer + 2048, 1024, quantization_values[9] - 6); /* HH1 */ - rfx_quantization_decode_block_sse2(buffer + 3072, 256, quantization_values[5] - 6); /* HL2 */ - rfx_quantization_decode_block_sse2(buffer + 3328, 256, quantization_values[4] - 6); /* LH2 */ - rfx_quantization_decode_block_sse2(buffer + 3584, 256, quantization_values[6] - 6); /* HH2 */ - rfx_quantization_decode_block_sse2(buffer + 3840, 64, quantization_values[2] - 6); /* HL3 */ - rfx_quantization_decode_block_sse2(buffer + 3904, 64, quantization_values[1] - 6); /* LH3 */ - rfx_quantization_decode_block_sse2(buffer + 3968, 64, quantization_values[3] - 6); /* HH3 */ - rfx_quantization_decode_block_sse2(buffer + 4032, 64, quantization_values[0] - 6); /* LL3 */ + rfx_quantization_decode_block_sse2(&buffer[0], 1024, quantVals[8] - 1); /* HL1 */ + rfx_quantization_decode_block_sse2(&buffer[1024], 1024, quantVals[7] - 1); /* LH1 */ + rfx_quantization_decode_block_sse2(&buffer[2048], 1024, quantVals[9] - 1); /* HH1 */ + rfx_quantization_decode_block_sse2(&buffer[3072], 256, quantVals[5] - 1); /* HL2 */ + rfx_quantization_decode_block_sse2(&buffer[3328], 256, quantVals[4] - 1); /* LH2 */ + rfx_quantization_decode_block_sse2(&buffer[3584], 256, quantVals[6] - 1); /* HH2 */ + rfx_quantization_decode_block_sse2(&buffer[3840], 64, quantVals[2] - 1); /* HL3 */ + rfx_quantization_decode_block_sse2(&buffer[3904], 64, quantVals[1] - 1); /* LH3 */ + rfx_quantization_decode_block_sse2(&buffer[3968], 64, quantVals[3] - 1); /* HH3 */ + rfx_quantization_decode_block_sse2(&buffer[4032], 64, quantVals[0] - 1); /* LL3 */ } static __inline void __attribute__((ATTRIBUTES))