libfreerdp-codec: optimize RemoteFX dequantization and differentiation

This commit is contained in:
Marc-André Moreau 2014-08-18 21:10:56 -04:00
parent 6744bbe7ae
commit 2e672d5bdb
6 changed files with 64 additions and 74 deletions

View File

@ -107,19 +107,16 @@ int progressive_rfx_decode_component(PROGRESSIVE_CONTEXT* progressive,
rfx_differential_decode(&buffer[4015], 81); /* LL3 */
/* Scale the values so that they are represented as 11.5 fixed-point number */
rfx_quantization_decode_block(prims, buffer, 4096, 5);
rfx_quantization_decode_block(prims, &buffer[0], 1023, (quant->HL1 - 6)); /* HL1 */
rfx_quantization_decode_block(prims, &buffer[1023], 1023, (quant->LH1 - 6)); /* LH1 */
rfx_quantization_decode_block(prims, &buffer[2046], 961, (quant->HH1 - 6)); /* HH1 */
rfx_quantization_decode_block(prims, &buffer[3007], 272, (quant->HL2 - 6)); /* HL2 */
rfx_quantization_decode_block(prims, &buffer[3279], 272, (quant->LH2 - 6)); /* LH2 */
rfx_quantization_decode_block(prims, &buffer[3551], 256, (quant->HH2 - 6)); /* HH2 */
rfx_quantization_decode_block(prims, &buffer[3807], 72, (quant->HL3 - 6)); /* HL3 */
rfx_quantization_decode_block(prims, &buffer[3879], 72, (quant->LH3 - 6)); /* LH3 */
rfx_quantization_decode_block(prims, &buffer[3951], 64, (quant->HH3 - 6)); /* HH3 */
rfx_quantization_decode_block(prims, &buffer[4015], 81, (quant->LL3 - 6)); /* LL3 */
rfx_quantization_decode_block(prims, &buffer[0], 1023, (quant->HL1 - 1)); /* HL1 */
rfx_quantization_decode_block(prims, &buffer[1023], 1023, (quant->LH1 - 1)); /* LH1 */
rfx_quantization_decode_block(prims, &buffer[2046], 961, (quant->HH1 - 1)); /* HH1 */
rfx_quantization_decode_block(prims, &buffer[3007], 272, (quant->HL2 - 1)); /* HL2 */
rfx_quantization_decode_block(prims, &buffer[3279], 272, (quant->LH2 - 1)); /* LH2 */
rfx_quantization_decode_block(prims, &buffer[3551], 256, (quant->HH2 - 1)); /* HH2 */
rfx_quantization_decode_block(prims, &buffer[3807], 72, (quant->HL3 - 1)); /* HL3 */
rfx_quantization_decode_block(prims, &buffer[3879], 72, (quant->LH3 - 1)); /* LH3 */
rfx_quantization_decode_block(prims, &buffer[3951], 64, (quant->HH3 - 1)); /* HH3 */
rfx_quantization_decode_block(prims, &buffer[4015], 81, (quant->LL3 - 1)); /* LL3 */
return 1;
}

View File

@ -27,23 +27,24 @@
#include "rfx_differential.h"
void rfx_differential_decode(INT16* buffer, int buffer_size)
void rfx_differential_decode(INT16* buffer, int size)
{
INT16* src;
INT16* dst;
INT16* ptr = buffer;
INT16* end = &buffer[size - 1];
for (src = buffer, dst = buffer + 1; buffer_size > 1; src++, dst++, buffer_size--)
while (ptr != end)
{
*dst += *src;
ptr[1] += ptr[0];
ptr++;
}
}
void rfx_differential_encode(INT16* buffer, int buffer_size)
void rfx_differential_encode(INT16* buffer, int size)
{
INT16 n1, n2;
INT16* dst;
for (n1 = *buffer, dst = buffer + 1; buffer_size > 1; dst++, buffer_size--)
for (n1 = *buffer, dst = buffer + 1; size > 1; dst++, size--)
{
n2 = *dst;
*dst -= n1;

View File

@ -22,7 +22,7 @@
#include <freerdp/codec/rfx.h>
void rfx_differential_decode(INT16* buffer, int buffer_size);
void rfx_differential_encode(INT16* buffer, int buffer_size);
void rfx_differential_decode(INT16* buffer, int size);
void rfx_differential_encode(INT16* buffer, int size);
#endif /* __RFX_DIFFERENTIAL_H */

View File

@ -51,21 +51,18 @@ rfx_quantization_decode_block_NEON(INT16 * buffer, const int buffer_size, const
while(buf < buf_end);
}
void
rfx_quantization_decode_NEON(INT16 * buffer, const UINT32 * quantization_values)
void rfx_quantization_decode_NEON(INT16 * buffer, const UINT32 * quantVals)
{
rfx_quantization_decode_block_NEON(buffer, 4096, 5);
rfx_quantization_decode_block_NEON(buffer, 1024, quantization_values[8] - 6); /* HL1 */
rfx_quantization_decode_block_NEON(buffer + 1024, 1024, quantization_values[7] - 6); /* LH1 */
rfx_quantization_decode_block_NEON(buffer + 2048, 1024, quantization_values[9] - 6); /* HH1 */
rfx_quantization_decode_block_NEON(buffer + 3072, 256, quantization_values[5] - 6); /* HL2 */
rfx_quantization_decode_block_NEON(buffer + 3328, 256, quantization_values[4] - 6); /* LH2 */
rfx_quantization_decode_block_NEON(buffer + 3584, 256, quantization_values[6] - 6); /* HH2 */
rfx_quantization_decode_block_NEON(buffer + 3840, 64, quantization_values[2] - 6); /* HL3 */
rfx_quantization_decode_block_NEON(buffer + 3904, 64, quantization_values[1] - 6); /* LH3 */
rfx_quantization_decode_block_NEON(buffer + 3968, 64, quantization_values[3] - 6); /* HH3 */
rfx_quantization_decode_block_NEON(buffer + 4032, 64, quantization_values[0] - 6); /* LL3 */
rfx_quantization_decode_block_NEON(&buffer[0], 1024, quantVals[8] - 1); /* HL1 */
rfx_quantization_decode_block_NEON(&buffer[1024], 1024, quantVals[7] - 1); /* LH1 */
rfx_quantization_decode_block_NEON(&buffer[2048], 1024, quantVals[9] - 1); /* HH1 */
rfx_quantization_decode_block_NEON(&buffer[3072], 256, quantVals[5] - 1); /* HL2 */
rfx_quantization_decode_block_NEON(&buffer[3328], 256, quantVals[4] - 1); /* LH2 */
rfx_quantization_decode_block_NEON(&buffer[3584], 256, quantVals[6] - 1); /* HH2 */
rfx_quantization_decode_block_NEON(&buffer[3840], 64, quantVals[2] - 1); /* HL3 */
rfx_quantization_decode_block_NEON(&buffer[3904], 64, quantVals[1] - 1); /* LH3 */
rfx_quantization_decode_block_NEON(&buffer[3968], 64, quantVals[3] - 1); /* HH3 */
rfx_quantization_decode_block_NEON(&buffer[4032], 64, quantVals[0] - 1); /* LL3 */
}

View File

@ -26,21 +26,21 @@
#include "rfx_quantization.h"
/*
* Band Offset Size
* Band Offset Dimensions Size
*
* HL1 0 1024
* LH1 1024 1024
* HH1 2048 1024
* HL1 0 32x32 1024
* LH1 1024 32x32 1024
* HH1 2048 32x32 1024
*
* HL2 3072 256
* LH2 3328 256
* HH2 3584 256
* HL2 3072 16x16 256
* LH2 3328 16x16 256
* HH2 3584 16x16 256
*
* HL3 3840 64
* LH3 3904 64
* HH3 3968 64
* HL3 3840 8x8 64
* LH3 3904 8x8 64
* HH3 3968 8x8 64
*
* LL3 4032 64
* LL3 4032 8x8 64
*/
void rfx_quantization_decode_block(const primitives_t *prims, INT16* buffer, int buffer_size, UINT32 factor)
@ -51,23 +51,20 @@ void rfx_quantization_decode_block(const primitives_t *prims, INT16* buffer, int
prims->lShiftC_16s(buffer, factor, buffer, buffer_size);
}
void rfx_quantization_decode(INT16* buffer, const UINT32* quantization_values)
void rfx_quantization_decode(INT16* buffer, const UINT32* quantVals)
{
const primitives_t* prims = primitives_get();
/* Scale the values so that they are represented as 11.5 fixed-point number */
rfx_quantization_decode_block(prims, buffer, 4096, 5);
rfx_quantization_decode_block(prims, buffer, 1024, quantization_values[8] - 6); /* HL1 */
rfx_quantization_decode_block(prims, buffer + 1024, 1024, quantization_values[7] - 6); /* LH1 */
rfx_quantization_decode_block(prims, buffer + 2048, 1024, quantization_values[9] - 6); /* HH1 */
rfx_quantization_decode_block(prims, buffer + 3072, 256, quantization_values[5] - 6); /* HL2 */
rfx_quantization_decode_block(prims, buffer + 3328, 256, quantization_values[4] - 6); /* LH2 */
rfx_quantization_decode_block(prims, buffer + 3584, 256, quantization_values[6] - 6); /* HH2 */
rfx_quantization_decode_block(prims, buffer + 3840, 64, quantization_values[2] - 6); /* HL3 */
rfx_quantization_decode_block(prims, buffer + 3904, 64, quantization_values[1] - 6); /* LH3 */
rfx_quantization_decode_block(prims, buffer + 3968, 64, quantization_values[3] - 6); /* HH3 */
rfx_quantization_decode_block(prims, buffer + 4032, 64, quantization_values[0] - 6); /* LL3 */
rfx_quantization_decode_block(prims, &buffer[0], 1024, quantVals[8] - 1); /* HL1 */
rfx_quantization_decode_block(prims, &buffer[1024], 1024, quantVals[7] - 1); /* LH1 */
rfx_quantization_decode_block(prims, &buffer[2048], 1024, quantVals[9] - 1); /* HH1 */
rfx_quantization_decode_block(prims, &buffer[3072], 256, quantVals[5] - 1); /* HL2 */
rfx_quantization_decode_block(prims, &buffer[3328], 256, quantVals[4] - 1); /* LH2 */
rfx_quantization_decode_block(prims, &buffer[3584], 256, quantVals[6] - 1); /* HH2 */
rfx_quantization_decode_block(prims, &buffer[3840], 64, quantVals[2] - 1); /* HL3 */
rfx_quantization_decode_block(prims, &buffer[3904], 64, quantVals[1] - 1); /* LH3 */
rfx_quantization_decode_block(prims, &buffer[3968], 64, quantVals[3] - 1); /* HH3 */
rfx_quantization_decode_block(prims, &buffer[4032], 64, quantVals[0] - 1); /* LL3 */
}
static void rfx_quantization_encode_block(INT16* buffer, int buffer_size, UINT32 factor)

View File

@ -82,22 +82,20 @@ rfx_quantization_decode_block_sse2(INT16* buffer, const int buffer_size, const U
} while(ptr < buf_end);
}
static void rfx_quantization_decode_sse2(INT16* buffer, const UINT32* quantization_values)
static void rfx_quantization_decode_sse2(INT16* buffer, const UINT32* quantVals)
{
_mm_prefetch_buffer((char*) buffer, 4096 * sizeof(INT16));
rfx_quantization_decode_block_sse2(buffer, 4096, 5);
rfx_quantization_decode_block_sse2(buffer, 1024, quantization_values[8] - 6); /* HL1 */
rfx_quantization_decode_block_sse2(buffer + 1024, 1024, quantization_values[7] - 6); /* LH1 */
rfx_quantization_decode_block_sse2(buffer + 2048, 1024, quantization_values[9] - 6); /* HH1 */
rfx_quantization_decode_block_sse2(buffer + 3072, 256, quantization_values[5] - 6); /* HL2 */
rfx_quantization_decode_block_sse2(buffer + 3328, 256, quantization_values[4] - 6); /* LH2 */
rfx_quantization_decode_block_sse2(buffer + 3584, 256, quantization_values[6] - 6); /* HH2 */
rfx_quantization_decode_block_sse2(buffer + 3840, 64, quantization_values[2] - 6); /* HL3 */
rfx_quantization_decode_block_sse2(buffer + 3904, 64, quantization_values[1] - 6); /* LH3 */
rfx_quantization_decode_block_sse2(buffer + 3968, 64, quantization_values[3] - 6); /* HH3 */
rfx_quantization_decode_block_sse2(buffer + 4032, 64, quantization_values[0] - 6); /* LL3 */
rfx_quantization_decode_block_sse2(&buffer[0], 1024, quantVals[8] - 1); /* HL1 */
rfx_quantization_decode_block_sse2(&buffer[1024], 1024, quantVals[7] - 1); /* LH1 */
rfx_quantization_decode_block_sse2(&buffer[2048], 1024, quantVals[9] - 1); /* HH1 */
rfx_quantization_decode_block_sse2(&buffer[3072], 256, quantVals[5] - 1); /* HL2 */
rfx_quantization_decode_block_sse2(&buffer[3328], 256, quantVals[4] - 1); /* LH2 */
rfx_quantization_decode_block_sse2(&buffer[3584], 256, quantVals[6] - 1); /* HH2 */
rfx_quantization_decode_block_sse2(&buffer[3840], 64, quantVals[2] - 1); /* HL3 */
rfx_quantization_decode_block_sse2(&buffer[3904], 64, quantVals[1] - 1); /* LH3 */
rfx_quantization_decode_block_sse2(&buffer[3968], 64, quantVals[3] - 1); /* HH3 */
rfx_quantization_decode_block_sse2(&buffer[4032], 64, quantVals[0] - 1); /* LL3 */
}
static __inline void __attribute__((ATTRIBUTES))