libfreerdp-rfx: fix rounding of fixed-point number.

The rounding of fixed-point numbers must be done in quantization phase, otherwise the differential result will be wrong.
This commit is contained in:
Vic Lee 2011-09-04 02:34:49 +08:00
parent 7997865e66
commit 4a25533599
5 changed files with 63 additions and 60 deletions

View File

@ -95,7 +95,7 @@ void rfx_decode_ycbcr_to_rgb(sint16* y_r_buf, sint16* cb_g_buf, sint16* cr_b_buf
*
* However only 7 integer bits will be actually used since the value range is [-128.0, 127.0].
* In other words, the decoded coeffectients is scaled by << 5 when intepreted as sint16.
* It was scaled in the first RLGR decoding phase, so we must scale it back here.
* It was scaled in the quantization phase, so we must scale it back here.
*/
for (i = 0; i < 4096; i++)
{

View File

@ -132,7 +132,7 @@ void rfx_encode_rgb_to_ycbcr(sint16* y_r_buf, sint16* cb_g_buf, sint16* cr_b_buf
*
* However only 7 integer bits will be actually used since the value range is [-128.0, 127.0].
* In other words, the encoded coeffectients is scaled by << 5 when intepreted as sint16.
* It will be scaled down to original during the last RLGR encoding phase.
* It will be scaled down to original during the quantization phase.
*/
for (i = 0; i < 4096; i++)
{
@ -143,17 +143,17 @@ void rfx_encode_rgb_to_ycbcr(sint16* y_r_buf, sint16* cb_g_buf, sint16* cr_b_buf
y = ((r << 3) + (r) + (r >> 1) + (r >> 4) + (r >> 7)) +
((g << 4) + (g << 1) + (g >> 1) + (g >> 2) + (g >> 5)) +
((b << 1) + (b) + (b >> 1) + (b >> 3) + (b >> 6) + (b >> 7));
y_r_buf[i] = MINMAX(y, 0, (255 << 5)) - (128 << 5);
y_r_buf[i] = MINMAX(y - 4096, -4096, 4095);
/* 0.168935 << 5 = 101.01100111(b), 0.331665 << 5 = 1010.10011100(b), 0.50059 << 5 = 10000.00000100(b) */
cb = 0 - ((r << 2) + (r) + (r >> 2) + (r >> 3) + (r >> 5)) -
((g << 3) + (g << 1) + (g >> 1) + (g >> 4) + (g >> 5) + (g >> 6)) +
((b << 4) + (b >> 6));
cb_g_buf[i] = MINMAX(cb, (-128 << 5), (127 << 5));
cb_g_buf[i] = MINMAX(cb, -4096, 4095);
/* 0.499813 << 5 = 1111.11111110(b), 0.418531 << 5 = 1101.01100100(b), 0.081282 << 5 = 10.10011001(b) */
cr = ((r << 4) - (r >> 7)) -
((g << 3) + (g << 2) + (g) + (g >> 2) + (g >> 3) + (g >> 6)) -
((b << 1) + (b >> 1) + (b >> 4) + (b >> 5) + (b >> 7));
cr_b_buf[i] = MINMAX(cr, (-128 << 5), (127 << 5));
cr_b_buf[i] = MINMAX(cr, -4096, 4095);
}
}

View File

@ -23,10 +23,9 @@ static void rfx_quantization_decode_block(sint16* buffer, int buffer_size, uint3
{
sint16* dst;
if (factor <= 6)
if (factor == 0)
return;
factor -= 6;
for (dst = buffer; buffer_size > 0; dst++, buffer_size--)
{
*dst <<= factor;
@ -35,26 +34,28 @@ static void rfx_quantization_decode_block(sint16* buffer, int buffer_size, uint3
void rfx_quantization_decode(sint16* buffer, const uint32* quantization_values)
{
rfx_quantization_decode_block(buffer, 1024, quantization_values[8]); /* HL1 */
rfx_quantization_decode_block(buffer + 1024, 1024, quantization_values[7]); /* LH1 */
rfx_quantization_decode_block(buffer + 2048, 1024, quantization_values[9]); /* HH1 */
rfx_quantization_decode_block(buffer + 3072, 256, quantization_values[5]); /* HL2 */
rfx_quantization_decode_block(buffer + 3328, 256, quantization_values[4]); /* LH2 */
rfx_quantization_decode_block(buffer + 3584, 256, quantization_values[6]); /* HH2 */
rfx_quantization_decode_block(buffer + 3840, 64, quantization_values[2]); /* HL3 */
rfx_quantization_decode_block(buffer + 3904, 64, quantization_values[1]); /* LH3 */
rfx_quantization_decode_block(buffer + 3868, 64, quantization_values[3]); /* HH3 */
rfx_quantization_decode_block(buffer + 4032, 64, quantization_values[0]); /* LL3 */
/* Scale the values so that they are represented as 11.5 fixed-point number */
rfx_quantization_decode_block(buffer, 4096, 5);
rfx_quantization_decode_block(buffer, 1024, quantization_values[8] - 6); /* HL1 */
rfx_quantization_decode_block(buffer + 1024, 1024, quantization_values[7] - 6); /* LH1 */
rfx_quantization_decode_block(buffer + 2048, 1024, quantization_values[9] - 6); /* HH1 */
rfx_quantization_decode_block(buffer + 3072, 256, quantization_values[5] - 6); /* HL2 */
rfx_quantization_decode_block(buffer + 3328, 256, quantization_values[4] - 6); /* LH2 */
rfx_quantization_decode_block(buffer + 3584, 256, quantization_values[6] - 6); /* HH2 */
rfx_quantization_decode_block(buffer + 3840, 64, quantization_values[2] - 6); /* HL3 */
rfx_quantization_decode_block(buffer + 3904, 64, quantization_values[1] - 6); /* LH3 */
rfx_quantization_decode_block(buffer + 3868, 64, quantization_values[3] - 6); /* HH3 */
rfx_quantization_decode_block(buffer + 4032, 64, quantization_values[0] - 6); /* LL3 */
}
static void rfx_quantization_encode_block(sint16* buffer, int buffer_size, uint32 factor)
{
sint16* dst;
if (factor <= 6)
if (factor == 0)
return;
factor -= 6;
for (dst = buffer; buffer_size > 0; dst++, buffer_size--)
{
*dst >>= factor;
@ -63,14 +64,17 @@ static void rfx_quantization_encode_block(sint16* buffer, int buffer_size, uint3
void rfx_quantization_encode(sint16* buffer, const uint32* quantization_values)
{
rfx_quantization_encode_block(buffer, 1024, quantization_values[8]); /* HL1 */
rfx_quantization_encode_block(buffer + 1024, 1024, quantization_values[7]); /* LH1 */
rfx_quantization_encode_block(buffer + 2048, 1024, quantization_values[9]); /* HH1 */
rfx_quantization_encode_block(buffer + 3072, 256, quantization_values[5]); /* HL2 */
rfx_quantization_encode_block(buffer + 3328, 256, quantization_values[4]); /* LH2 */
rfx_quantization_encode_block(buffer + 3584, 256, quantization_values[6]); /* HH2 */
rfx_quantization_encode_block(buffer + 3840, 64, quantization_values[2]); /* HL3 */
rfx_quantization_encode_block(buffer + 3904, 64, quantization_values[1]); /* LH3 */
rfx_quantization_encode_block(buffer + 3868, 64, quantization_values[3]); /* HH3 */
rfx_quantization_encode_block(buffer + 4032, 64, quantization_values[0]); /* LL3 */
rfx_quantization_encode_block(buffer, 1024, quantization_values[8] - 6); /* HL1 */
rfx_quantization_encode_block(buffer + 1024, 1024, quantization_values[7] - 6); /* LH1 */
rfx_quantization_encode_block(buffer + 2048, 1024, quantization_values[9] - 6); /* HH1 */
rfx_quantization_encode_block(buffer + 3072, 256, quantization_values[5] - 6); /* HL2 */
rfx_quantization_encode_block(buffer + 3328, 256, quantization_values[4] - 6); /* LH2 */
rfx_quantization_encode_block(buffer + 3584, 256, quantization_values[6] - 6); /* HH2 */
rfx_quantization_encode_block(buffer + 3840, 64, quantization_values[2] - 6); /* HL3 */
rfx_quantization_encode_block(buffer + 3904, 64, quantization_values[1] - 6); /* LH3 */
rfx_quantization_encode_block(buffer + 3868, 64, quantization_values[3] - 6); /* HH3 */
rfx_quantization_encode_block(buffer + 4032, 64, quantization_values[0] - 6); /* LL3 */
/* The coefficients are scaled by << 5 at RGB->YCbCr phase, so we round it back here */
rfx_quantization_encode_block(buffer, 4096, 5);
}

View File

@ -42,11 +42,10 @@
#define GetBits(nBits) rfx_bitstream_get_bits(bs, nBits)
/* From current output pointer, write "value", check and update buffer_size */
/* The value is represented as 11.5 fixed-point number */
#define WriteValue(value) \
{ \
if (buffer_size > 0) \
*dst++ = ((value) << 5); \
*dst++ = (value); \
buffer_size--; \
}
@ -232,13 +231,11 @@ int rfx_rlgr_decode(RLGR_MODE mode, const uint8* data, int data_size, sint16* bu
}
/* Returns the next coefficient (a signed int) to encode, from the input stream */
/* The coefficients are scaled by << 5 at RGB->YCbCr phase, so we round it back here */
#define GetNextInput(_n) \
{ \
if (data_size > 0) \
{ \
_n = *data++; \
_n = ((_n) + (1 << 4)) >> 5; \
data_size--; \
} \
else \

View File

@ -218,17 +218,16 @@ static void rfx_encode_rgb_to_ycbcr_sse2(sint16* y_r_buffer, sint16* cb_g_buffer
static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
rfx_quantization_decode_block_sse2(sint16* buffer, const int buffer_size, const uint32 factor)
{
int shift = factor-6;
if (shift <= 0)
if (factor == 0)
return;
__m128i a;
__m128i * ptr = (__m128i*) buffer;
__m128i * buf_end = (__m128i*) (buffer + buffer_size);
do
{
a = _mm_load_si128(ptr);
a = _mm_slli_epi16(a, shift);
a = _mm_slli_epi16(a, factor);
_mm_store_si128(ptr, a);
ptr++;
@ -239,23 +238,24 @@ static void rfx_quantization_decode_sse2(sint16* buffer, const uint32* quantizat
{
_mm_prefetch_buffer((char*) buffer, 4096 * sizeof(sint16));
rfx_quantization_decode_block_sse2(buffer, 1024, quantization_values[8]); /* HL1 */
rfx_quantization_decode_block_sse2(buffer + 1024, 1024, quantization_values[7]); /* LH1 */
rfx_quantization_decode_block_sse2(buffer + 2048, 1024, quantization_values[9]); /* HH1 */
rfx_quantization_decode_block_sse2(buffer + 3072, 256, quantization_values[5]); /* HL2 */
rfx_quantization_decode_block_sse2(buffer + 3328, 256, quantization_values[4]); /* LH2 */
rfx_quantization_decode_block_sse2(buffer + 3584, 256, quantization_values[6]); /* HH2 */
rfx_quantization_decode_block_sse2(buffer + 3840, 64, quantization_values[2]); /* HL3 */
rfx_quantization_decode_block_sse2(buffer + 3904, 64, quantization_values[1]); /* LH3 */
rfx_quantization_decode_block_sse2(buffer + 3868, 64, quantization_values[3]); /* HH3 */
rfx_quantization_decode_block_sse2(buffer + 4032, 64, quantization_values[0]); /* LL3 */
rfx_quantization_decode_block_sse2(buffer, 4096, 5);
rfx_quantization_decode_block_sse2(buffer, 1024, quantization_values[8] - 6); /* HL1 */
rfx_quantization_decode_block_sse2(buffer + 1024, 1024, quantization_values[7] - 6); /* LH1 */
rfx_quantization_decode_block_sse2(buffer + 2048, 1024, quantization_values[9] - 6); /* HH1 */
rfx_quantization_decode_block_sse2(buffer + 3072, 256, quantization_values[5] - 6); /* HL2 */
rfx_quantization_decode_block_sse2(buffer + 3328, 256, quantization_values[4] - 6); /* LH2 */
rfx_quantization_decode_block_sse2(buffer + 3584, 256, quantization_values[6] - 6); /* HH2 */
rfx_quantization_decode_block_sse2(buffer + 3840, 64, quantization_values[2] - 6); /* HL3 */
rfx_quantization_decode_block_sse2(buffer + 3904, 64, quantization_values[1] - 6); /* LH3 */
rfx_quantization_decode_block_sse2(buffer + 3868, 64, quantization_values[3] - 6); /* HH3 */
rfx_quantization_decode_block_sse2(buffer + 4032, 64, quantization_values[0] - 6); /* LL3 */
}
static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
rfx_quantization_encode_block_sse2(sint16* buffer, const int buffer_size, const uint32 factor)
{
int shift = factor-6;
if (shift <= 0)
if (factor == 0)
return;
__m128i a;
@ -264,7 +264,7 @@ rfx_quantization_encode_block_sse2(sint16* buffer, const int buffer_size, const
do
{
a = _mm_load_si128(ptr);
a = _mm_srai_epi16(a, shift);
a = _mm_srai_epi16(a, factor);
_mm_store_si128(ptr, a);
ptr++;
@ -275,16 +275,18 @@ static void rfx_quantization_encode_sse2(sint16* buffer, const uint32* quantizat
{
_mm_prefetch_buffer((char*) buffer, 4096 * sizeof(sint16));
rfx_quantization_encode_block_sse2(buffer, 1024, quantization_values[8]); /* HL1 */
rfx_quantization_encode_block_sse2(buffer + 1024, 1024, quantization_values[7]); /* LH1 */
rfx_quantization_encode_block_sse2(buffer + 2048, 1024, quantization_values[9]); /* HH1 */
rfx_quantization_encode_block_sse2(buffer + 3072, 256, quantization_values[5]); /* HL2 */
rfx_quantization_encode_block_sse2(buffer + 3328, 256, quantization_values[4]); /* LH2 */
rfx_quantization_encode_block_sse2(buffer + 3584, 256, quantization_values[6]); /* HH2 */
rfx_quantization_encode_block_sse2(buffer + 3840, 64, quantization_values[2]); /* HL3 */
rfx_quantization_encode_block_sse2(buffer + 3904, 64, quantization_values[1]); /* LH3 */
rfx_quantization_encode_block_sse2(buffer + 3868, 64, quantization_values[3]); /* HH3 */
rfx_quantization_encode_block_sse2(buffer + 4032, 64, quantization_values[0]); /* LL3 */
rfx_quantization_encode_block_sse2(buffer, 1024, quantization_values[8] - 6); /* HL1 */
rfx_quantization_encode_block_sse2(buffer + 1024, 1024, quantization_values[7] - 6); /* LH1 */
rfx_quantization_encode_block_sse2(buffer + 2048, 1024, quantization_values[9] - 6); /* HH1 */
rfx_quantization_encode_block_sse2(buffer + 3072, 256, quantization_values[5] - 6); /* HL2 */
rfx_quantization_encode_block_sse2(buffer + 3328, 256, quantization_values[4] - 6); /* LH2 */
rfx_quantization_encode_block_sse2(buffer + 3584, 256, quantization_values[6] - 6); /* HH2 */
rfx_quantization_encode_block_sse2(buffer + 3840, 64, quantization_values[2] - 6); /* HL3 */
rfx_quantization_encode_block_sse2(buffer + 3904, 64, quantization_values[1] - 6); /* LH3 */
rfx_quantization_encode_block_sse2(buffer + 3868, 64, quantization_values[3] - 6); /* HH3 */
rfx_quantization_encode_block_sse2(buffer + 4032, 64, quantization_values[0] - 6); /* LL3 */
rfx_quantization_encode_block_sse2(buffer, 4096, 5);
}
static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))