libfreerdp-rfx: fix rounding of fixed-point number.

The rounding of fixed-point numbers must be done in quantization phase, otherwise the differential result will be wrong.
2011-09-04 02:34:49 +08:00 · 2011-09-04 02:34:49 +08:00 · 4a25533599
commit 4a25533599
parent 7997865e66
5 changed files with 63 additions and 60 deletions
--- a/libfreerdp-rfx/rfx_decode.c
+++ b/libfreerdp-rfx/rfx_decode.c
@ -95,7 +95,7 @@ void rfx_decode_ycbcr_to_rgb(sint16* y_r_buf, sint16* cb_g_buf, sint16* cr_b_buf
 	 *
 	 * However only 7 integer bits will be actually used since the value range is [-128.0, 127.0].
 	 * In other words, the decoded coeffectients is scaled by << 5 when intepreted as sint16.
-	 * It was scaled in the first RLGR decoding phase, so we must scale it back here.
+	 * It was scaled in the quantization phase, so we must scale it back here.
 	 */
 	for (i = 0; i < 4096; i++)
 	{
--- a/libfreerdp-rfx/rfx_encode.c
+++ b/libfreerdp-rfx/rfx_encode.c
@ -132,7 +132,7 @@ void rfx_encode_rgb_to_ycbcr(sint16* y_r_buf, sint16* cb_g_buf, sint16* cr_b_buf
 	 *
 	 * However only 7 integer bits will be actually used since the value range is [-128.0, 127.0].
 	 * In other words, the encoded coeffectients is scaled by << 5 when intepreted as sint16.
-	 * It will be scaled down to original during the last RLGR encoding phase.
+	 * It will be scaled down to original during the quantization phase.
 	 */
 	for (i = 0; i < 4096; i++)
 	{
@ -143,17 +143,17 @@ void rfx_encode_rgb_to_ycbcr(sint16* y_r_buf, sint16* cb_g_buf, sint16* cr_b_buf
 		y = ((r << 3) + (r) + (r >> 1) + (r >> 4) + (r >> 7)) +
 			((g << 4) + (g << 1) + (g >> 1) + (g >> 2) + (g >> 5)) +
 			((b << 1) + (b) + (b >> 1) + (b >> 3) + (b >> 6) + (b >> 7));
-		y_r_buf[i] = MINMAX(y, 0, (255 << 5)) - (128 << 5);
+		y_r_buf[i] = MINMAX(y - 4096, -4096, 4095);
 		/* 0.168935 << 5 = 101.01100111(b), 0.331665 << 5 = 1010.10011100(b), 0.50059 << 5 = 10000.00000100(b) */
 		cb = 0 - ((r << 2) + (r) + (r >> 2) + (r >> 3) + (r >> 5)) -
 			((g << 3) + (g << 1) + (g >> 1) + (g >> 4) + (g >> 5) + (g >> 6)) +
 			((b << 4) + (b >> 6));
-		cb_g_buf[i] = MINMAX(cb, (-128 << 5), (127 << 5));
+		cb_g_buf[i] = MINMAX(cb, -4096, 4095);
 		/* 0.499813 << 5 = 1111.11111110(b), 0.418531 << 5 = 1101.01100100(b), 0.081282 << 5 = 10.10011001(b) */
 		cr = ((r << 4) - (r >> 7)) -
 			((g << 3) + (g << 2) + (g) + (g >> 2) + (g >> 3) + (g >> 6)) -
 			((b << 1) + (b >> 1) + (b >> 4) + (b >> 5) + (b >> 7));
-		cr_b_buf[i] = MINMAX(cr, (-128 << 5), (127 << 5));
+		cr_b_buf[i] = MINMAX(cr, -4096, 4095);
 	}
 }

--- a/libfreerdp-rfx/rfx_quantization.c
+++ b/libfreerdp-rfx/rfx_quantization.c
@ -23,10 +23,9 @@ static void rfx_quantization_decode_block(sint16* buffer, int buffer_size, uint3
 {
 	sint16* dst;

-	if (factor <= 6)
+	if (factor == 0)
 		return;

-	factor -= 6;
 	for (dst = buffer; buffer_size > 0; dst++, buffer_size--)
 	{
 		*dst <<= factor;
@ -35,26 +34,28 @@ static void rfx_quantization_decode_block(sint16* buffer, int buffer_size, uint3

 void rfx_quantization_decode(sint16* buffer, const uint32* quantization_values)
 {
-	rfx_quantization_decode_block(buffer, 1024, quantization_values[8]); /* HL1 */
-	rfx_quantization_decode_block(buffer + 1024, 1024, quantization_values[7]); /* LH1 */
-	rfx_quantization_decode_block(buffer + 2048, 1024, quantization_values[9]); /* HH1 */
-	rfx_quantization_decode_block(buffer + 3072, 256, quantization_values[5]); /* HL2 */
-	rfx_quantization_decode_block(buffer + 3328, 256, quantization_values[4]); /* LH2 */
-	rfx_quantization_decode_block(buffer + 3584, 256, quantization_values[6]); /* HH2 */
-	rfx_quantization_decode_block(buffer + 3840, 64, quantization_values[2]); /* HL3 */
-	rfx_quantization_decode_block(buffer + 3904, 64, quantization_values[1]); /* LH3 */
-	rfx_quantization_decode_block(buffer + 3868, 64, quantization_values[3]); /* HH3 */
-	rfx_quantization_decode_block(buffer + 4032, 64, quantization_values[0]); /* LL3 */
+	/* Scale the values so that they are represented as 11.5 fixed-point number */
+	rfx_quantization_decode_block(buffer, 4096, 5);
+
+	rfx_quantization_decode_block(buffer, 1024, quantization_values[8] - 6); /* HL1 */
+	rfx_quantization_decode_block(buffer + 1024, 1024, quantization_values[7] - 6); /* LH1 */
+	rfx_quantization_decode_block(buffer + 2048, 1024, quantization_values[9] - 6); /* HH1 */
+	rfx_quantization_decode_block(buffer + 3072, 256, quantization_values[5] - 6); /* HL2 */
+	rfx_quantization_decode_block(buffer + 3328, 256, quantization_values[4] - 6); /* LH2 */
+	rfx_quantization_decode_block(buffer + 3584, 256, quantization_values[6] - 6); /* HH2 */
+	rfx_quantization_decode_block(buffer + 3840, 64, quantization_values[2] - 6); /* HL3 */
+	rfx_quantization_decode_block(buffer + 3904, 64, quantization_values[1] - 6); /* LH3 */
+	rfx_quantization_decode_block(buffer + 3868, 64, quantization_values[3] - 6); /* HH3 */
+	rfx_quantization_decode_block(buffer + 4032, 64, quantization_values[0] - 6); /* LL3 */
 }

 static void rfx_quantization_encode_block(sint16* buffer, int buffer_size, uint32 factor)
 {
 	sint16* dst;

-	if (factor <= 6)
+	if (factor == 0)
 		return;

-	factor -= 6;
 	for (dst = buffer; buffer_size > 0; dst++, buffer_size--)
 	{
 		*dst >>= factor;
@ -63,14 +64,17 @@ static void rfx_quantization_encode_block(sint16* buffer, int buffer_size, uint3

 void rfx_quantization_encode(sint16* buffer, const uint32* quantization_values)
 {
-	rfx_quantization_encode_block(buffer, 1024, quantization_values[8]); /* HL1 */
-	rfx_quantization_encode_block(buffer + 1024, 1024, quantization_values[7]); /* LH1 */
-	rfx_quantization_encode_block(buffer + 2048, 1024, quantization_values[9]); /* HH1 */
-	rfx_quantization_encode_block(buffer + 3072, 256, quantization_values[5]); /* HL2 */
-	rfx_quantization_encode_block(buffer + 3328, 256, quantization_values[4]); /* LH2 */
-	rfx_quantization_encode_block(buffer + 3584, 256, quantization_values[6]); /* HH2 */
-	rfx_quantization_encode_block(buffer + 3840, 64, quantization_values[2]); /* HL3 */
-	rfx_quantization_encode_block(buffer + 3904, 64, quantization_values[1]); /* LH3 */
-	rfx_quantization_encode_block(buffer + 3868, 64, quantization_values[3]); /* HH3 */
-	rfx_quantization_encode_block(buffer + 4032, 64, quantization_values[0]); /* LL3 */
+	rfx_quantization_encode_block(buffer, 1024, quantization_values[8] - 6); /* HL1 */
+	rfx_quantization_encode_block(buffer + 1024, 1024, quantization_values[7] - 6); /* LH1 */
+	rfx_quantization_encode_block(buffer + 2048, 1024, quantization_values[9] - 6); /* HH1 */
+	rfx_quantization_encode_block(buffer + 3072, 256, quantization_values[5] - 6); /* HL2 */
+	rfx_quantization_encode_block(buffer + 3328, 256, quantization_values[4] - 6); /* LH2 */
+	rfx_quantization_encode_block(buffer + 3584, 256, quantization_values[6] - 6); /* HH2 */
+	rfx_quantization_encode_block(buffer + 3840, 64, quantization_values[2] - 6); /* HL3 */
+	rfx_quantization_encode_block(buffer + 3904, 64, quantization_values[1] - 6); /* LH3 */
+	rfx_quantization_encode_block(buffer + 3868, 64, quantization_values[3] - 6); /* HH3 */
+	rfx_quantization_encode_block(buffer + 4032, 64, quantization_values[0] - 6); /* LL3 */
+
+	/* The coefficients are scaled by << 5 at RGB->YCbCr phase, so we round it back here */
+	rfx_quantization_encode_block(buffer, 4096, 5);
 }
--- a/libfreerdp-rfx/rfx_rlgr.c
+++ b/libfreerdp-rfx/rfx_rlgr.c
@ -42,11 +42,10 @@
 #define GetBits(nBits) rfx_bitstream_get_bits(bs, nBits)

 /* From current output pointer, write "value", check and update buffer_size */
-/* The value is represented as 11.5 fixed-point number */
 #define WriteValue(value) \
 { \
 	if (buffer_size > 0) \
-		*dst++ = ((value) << 5); \
+		*dst++ = (value); \
 	buffer_size--; \
 }

@ -232,13 +231,11 @@ int rfx_rlgr_decode(RLGR_MODE mode, const uint8* data, int data_size, sint16* bu
 }

 /* Returns the next coefficient (a signed int) to encode, from the input stream */
-/* The coefficients are scaled by << 5 at RGB->YCbCr phase, so we round it back here */
 #define GetNextInput(_n) \
 { \
 	if (data_size > 0) \
 	{ \
 		_n = *data++; \
-		_n = ((_n) + (1 << 4)) >> 5; \
 		data_size--; \
 	} \
 	else \
--- a/libfreerdp-rfx/sse2/rfx_sse2.c
+++ b/libfreerdp-rfx/sse2/rfx_sse2.c
@ -218,17 +218,16 @@ static void rfx_encode_rgb_to_ycbcr_sse2(sint16* y_r_buffer, sint16* cb_g_buffer
 static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 rfx_quantization_decode_block_sse2(sint16* buffer, const int buffer_size, const uint32 factor)
 {
-	int shift = factor-6;
-	if (shift <= 0)
+	if (factor == 0)
 		return;
-	
+
 	__m128i a;
 	__m128i * ptr = (__m128i*) buffer;
 	__m128i * buf_end = (__m128i*) (buffer + buffer_size);
 	do
 	{
 		a = _mm_load_si128(ptr);
-		a = _mm_slli_epi16(a, shift);
+		a = _mm_slli_epi16(a, factor);
 		_mm_store_si128(ptr, a);

 		ptr++;
@ -239,23 +238,24 @@ static void rfx_quantization_decode_sse2(sint16* buffer, const uint32* quantizat
 {
 	_mm_prefetch_buffer((char*) buffer, 4096 * sizeof(sint16));

-	rfx_quantization_decode_block_sse2(buffer, 1024, quantization_values[8]); /* HL1 */
-	rfx_quantization_decode_block_sse2(buffer + 1024, 1024, quantization_values[7]); /* LH1 */
-	rfx_quantization_decode_block_sse2(buffer + 2048, 1024, quantization_values[9]); /* HH1 */
-	rfx_quantization_decode_block_sse2(buffer + 3072, 256, quantization_values[5]); /* HL2 */
-	rfx_quantization_decode_block_sse2(buffer + 3328, 256, quantization_values[4]); /* LH2 */
-	rfx_quantization_decode_block_sse2(buffer + 3584, 256, quantization_values[6]); /* HH2 */
-	rfx_quantization_decode_block_sse2(buffer + 3840, 64, quantization_values[2]); /* HL3 */
-	rfx_quantization_decode_block_sse2(buffer + 3904, 64, quantization_values[1]); /* LH3 */
-	rfx_quantization_decode_block_sse2(buffer + 3868, 64, quantization_values[3]); /* HH3 */
-	rfx_quantization_decode_block_sse2(buffer + 4032, 64, quantization_values[0]); /* LL3 */
+	rfx_quantization_decode_block_sse2(buffer, 4096, 5);
+
+	rfx_quantization_decode_block_sse2(buffer, 1024, quantization_values[8] - 6); /* HL1 */
+	rfx_quantization_decode_block_sse2(buffer + 1024, 1024, quantization_values[7] - 6); /* LH1 */
+	rfx_quantization_decode_block_sse2(buffer + 2048, 1024, quantization_values[9] - 6); /* HH1 */
+	rfx_quantization_decode_block_sse2(buffer + 3072, 256, quantization_values[5] - 6); /* HL2 */
+	rfx_quantization_decode_block_sse2(buffer + 3328, 256, quantization_values[4] - 6); /* LH2 */
+	rfx_quantization_decode_block_sse2(buffer + 3584, 256, quantization_values[6] - 6); /* HH2 */
+	rfx_quantization_decode_block_sse2(buffer + 3840, 64, quantization_values[2] - 6); /* HL3 */
+	rfx_quantization_decode_block_sse2(buffer + 3904, 64, quantization_values[1] - 6); /* LH3 */
+	rfx_quantization_decode_block_sse2(buffer + 3868, 64, quantization_values[3] - 6); /* HH3 */
+	rfx_quantization_decode_block_sse2(buffer + 4032, 64, quantization_values[0] - 6); /* LL3 */
 }

 static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 rfx_quantization_encode_block_sse2(sint16* buffer, const int buffer_size, const uint32 factor)
 {
-	int shift = factor-6;
-	if (shift <= 0)
+	if (factor == 0)
 		return;
 	
 	__m128i a;
@ -264,7 +264,7 @@ rfx_quantization_encode_block_sse2(sint16* buffer, const int buffer_size, const
 	do
 	{
 		a = _mm_load_si128(ptr);
-		a = _mm_srai_epi16(a, shift);
+		a = _mm_srai_epi16(a, factor);
 		_mm_store_si128(ptr, a);

 		ptr++;
@ -275,16 +275,18 @@ static void rfx_quantization_encode_sse2(sint16* buffer, const uint32* quantizat
 {
 	_mm_prefetch_buffer((char*) buffer, 4096 * sizeof(sint16));

-	rfx_quantization_encode_block_sse2(buffer, 1024, quantization_values[8]); /* HL1 */
-	rfx_quantization_encode_block_sse2(buffer + 1024, 1024, quantization_values[7]); /* LH1 */
-	rfx_quantization_encode_block_sse2(buffer + 2048, 1024, quantization_values[9]); /* HH1 */
-	rfx_quantization_encode_block_sse2(buffer + 3072, 256, quantization_values[5]); /* HL2 */
-	rfx_quantization_encode_block_sse2(buffer + 3328, 256, quantization_values[4]); /* LH2 */
-	rfx_quantization_encode_block_sse2(buffer + 3584, 256, quantization_values[6]); /* HH2 */
-	rfx_quantization_encode_block_sse2(buffer + 3840, 64, quantization_values[2]); /* HL3 */
-	rfx_quantization_encode_block_sse2(buffer + 3904, 64, quantization_values[1]); /* LH3 */
-	rfx_quantization_encode_block_sse2(buffer + 3868, 64, quantization_values[3]); /* HH3 */
-	rfx_quantization_encode_block_sse2(buffer + 4032, 64, quantization_values[0]); /* LL3 */
+	rfx_quantization_encode_block_sse2(buffer, 1024, quantization_values[8] - 6); /* HL1 */
+	rfx_quantization_encode_block_sse2(buffer + 1024, 1024, quantization_values[7] - 6); /* LH1 */
+	rfx_quantization_encode_block_sse2(buffer + 2048, 1024, quantization_values[9] - 6); /* HH1 */
+	rfx_quantization_encode_block_sse2(buffer + 3072, 256, quantization_values[5] - 6); /* HL2 */
+	rfx_quantization_encode_block_sse2(buffer + 3328, 256, quantization_values[4] - 6); /* LH2 */
+	rfx_quantization_encode_block_sse2(buffer + 3584, 256, quantization_values[6] - 6); /* HH2 */
+	rfx_quantization_encode_block_sse2(buffer + 3840, 64, quantization_values[2] - 6); /* HL3 */
+	rfx_quantization_encode_block_sse2(buffer + 3904, 64, quantization_values[1] - 6); /* LH3 */
+	rfx_quantization_encode_block_sse2(buffer + 3868, 64, quantization_values[3] - 6); /* HH3 */
+	rfx_quantization_encode_block_sse2(buffer + 4032, 64, quantization_values[0] - 6); /* LL3 */
+
+	rfx_quantization_encode_block_sse2(buffer, 4096, 5);
 }

 static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))