libfreerdp-rfx: fix rounding of fixed-point number.
The rounding of fixed-point numbers must be done in quantization phase, otherwise the differential result will be wrong.
This commit is contained in:
parent
7997865e66
commit
4a25533599
@ -95,7 +95,7 @@ void rfx_decode_ycbcr_to_rgb(sint16* y_r_buf, sint16* cb_g_buf, sint16* cr_b_buf
|
||||
*
|
||||
* However only 7 integer bits will be actually used since the value range is [-128.0, 127.0].
|
||||
* In other words, the decoded coeffectients is scaled by << 5 when intepreted as sint16.
|
||||
* It was scaled in the first RLGR decoding phase, so we must scale it back here.
|
||||
* It was scaled in the quantization phase, so we must scale it back here.
|
||||
*/
|
||||
for (i = 0; i < 4096; i++)
|
||||
{
|
||||
|
@ -132,7 +132,7 @@ void rfx_encode_rgb_to_ycbcr(sint16* y_r_buf, sint16* cb_g_buf, sint16* cr_b_buf
|
||||
*
|
||||
* However only 7 integer bits will be actually used since the value range is [-128.0, 127.0].
|
||||
* In other words, the encoded coeffectients is scaled by << 5 when intepreted as sint16.
|
||||
* It will be scaled down to original during the last RLGR encoding phase.
|
||||
* It will be scaled down to original during the quantization phase.
|
||||
*/
|
||||
for (i = 0; i < 4096; i++)
|
||||
{
|
||||
@ -143,17 +143,17 @@ void rfx_encode_rgb_to_ycbcr(sint16* y_r_buf, sint16* cb_g_buf, sint16* cr_b_buf
|
||||
y = ((r << 3) + (r) + (r >> 1) + (r >> 4) + (r >> 7)) +
|
||||
((g << 4) + (g << 1) + (g >> 1) + (g >> 2) + (g >> 5)) +
|
||||
((b << 1) + (b) + (b >> 1) + (b >> 3) + (b >> 6) + (b >> 7));
|
||||
y_r_buf[i] = MINMAX(y, 0, (255 << 5)) - (128 << 5);
|
||||
y_r_buf[i] = MINMAX(y - 4096, -4096, 4095);
|
||||
/* 0.168935 << 5 = 101.01100111(b), 0.331665 << 5 = 1010.10011100(b), 0.50059 << 5 = 10000.00000100(b) */
|
||||
cb = 0 - ((r << 2) + (r) + (r >> 2) + (r >> 3) + (r >> 5)) -
|
||||
((g << 3) + (g << 1) + (g >> 1) + (g >> 4) + (g >> 5) + (g >> 6)) +
|
||||
((b << 4) + (b >> 6));
|
||||
cb_g_buf[i] = MINMAX(cb, (-128 << 5), (127 << 5));
|
||||
cb_g_buf[i] = MINMAX(cb, -4096, 4095);
|
||||
/* 0.499813 << 5 = 1111.11111110(b), 0.418531 << 5 = 1101.01100100(b), 0.081282 << 5 = 10.10011001(b) */
|
||||
cr = ((r << 4) - (r >> 7)) -
|
||||
((g << 3) + (g << 2) + (g) + (g >> 2) + (g >> 3) + (g >> 6)) -
|
||||
((b << 1) + (b >> 1) + (b >> 4) + (b >> 5) + (b >> 7));
|
||||
cr_b_buf[i] = MINMAX(cr, (-128 << 5), (127 << 5));
|
||||
cr_b_buf[i] = MINMAX(cr, -4096, 4095);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -23,10 +23,9 @@ static void rfx_quantization_decode_block(sint16* buffer, int buffer_size, uint3
|
||||
{
|
||||
sint16* dst;
|
||||
|
||||
if (factor <= 6)
|
||||
if (factor == 0)
|
||||
return;
|
||||
|
||||
factor -= 6;
|
||||
for (dst = buffer; buffer_size > 0; dst++, buffer_size--)
|
||||
{
|
||||
*dst <<= factor;
|
||||
@ -35,26 +34,28 @@ static void rfx_quantization_decode_block(sint16* buffer, int buffer_size, uint3
|
||||
|
||||
void rfx_quantization_decode(sint16* buffer, const uint32* quantization_values)
|
||||
{
|
||||
rfx_quantization_decode_block(buffer, 1024, quantization_values[8]); /* HL1 */
|
||||
rfx_quantization_decode_block(buffer + 1024, 1024, quantization_values[7]); /* LH1 */
|
||||
rfx_quantization_decode_block(buffer + 2048, 1024, quantization_values[9]); /* HH1 */
|
||||
rfx_quantization_decode_block(buffer + 3072, 256, quantization_values[5]); /* HL2 */
|
||||
rfx_quantization_decode_block(buffer + 3328, 256, quantization_values[4]); /* LH2 */
|
||||
rfx_quantization_decode_block(buffer + 3584, 256, quantization_values[6]); /* HH2 */
|
||||
rfx_quantization_decode_block(buffer + 3840, 64, quantization_values[2]); /* HL3 */
|
||||
rfx_quantization_decode_block(buffer + 3904, 64, quantization_values[1]); /* LH3 */
|
||||
rfx_quantization_decode_block(buffer + 3868, 64, quantization_values[3]); /* HH3 */
|
||||
rfx_quantization_decode_block(buffer + 4032, 64, quantization_values[0]); /* LL3 */
|
||||
/* Scale the values so that they are represented as 11.5 fixed-point number */
|
||||
rfx_quantization_decode_block(buffer, 4096, 5);
|
||||
|
||||
rfx_quantization_decode_block(buffer, 1024, quantization_values[8] - 6); /* HL1 */
|
||||
rfx_quantization_decode_block(buffer + 1024, 1024, quantization_values[7] - 6); /* LH1 */
|
||||
rfx_quantization_decode_block(buffer + 2048, 1024, quantization_values[9] - 6); /* HH1 */
|
||||
rfx_quantization_decode_block(buffer + 3072, 256, quantization_values[5] - 6); /* HL2 */
|
||||
rfx_quantization_decode_block(buffer + 3328, 256, quantization_values[4] - 6); /* LH2 */
|
||||
rfx_quantization_decode_block(buffer + 3584, 256, quantization_values[6] - 6); /* HH2 */
|
||||
rfx_quantization_decode_block(buffer + 3840, 64, quantization_values[2] - 6); /* HL3 */
|
||||
rfx_quantization_decode_block(buffer + 3904, 64, quantization_values[1] - 6); /* LH3 */
|
||||
rfx_quantization_decode_block(buffer + 3868, 64, quantization_values[3] - 6); /* HH3 */
|
||||
rfx_quantization_decode_block(buffer + 4032, 64, quantization_values[0] - 6); /* LL3 */
|
||||
}
|
||||
|
||||
static void rfx_quantization_encode_block(sint16* buffer, int buffer_size, uint32 factor)
|
||||
{
|
||||
sint16* dst;
|
||||
|
||||
if (factor <= 6)
|
||||
if (factor == 0)
|
||||
return;
|
||||
|
||||
factor -= 6;
|
||||
for (dst = buffer; buffer_size > 0; dst++, buffer_size--)
|
||||
{
|
||||
*dst >>= factor;
|
||||
@ -63,14 +64,17 @@ static void rfx_quantization_encode_block(sint16* buffer, int buffer_size, uint3
|
||||
|
||||
void rfx_quantization_encode(sint16* buffer, const uint32* quantization_values)
|
||||
{
|
||||
rfx_quantization_encode_block(buffer, 1024, quantization_values[8]); /* HL1 */
|
||||
rfx_quantization_encode_block(buffer + 1024, 1024, quantization_values[7]); /* LH1 */
|
||||
rfx_quantization_encode_block(buffer + 2048, 1024, quantization_values[9]); /* HH1 */
|
||||
rfx_quantization_encode_block(buffer + 3072, 256, quantization_values[5]); /* HL2 */
|
||||
rfx_quantization_encode_block(buffer + 3328, 256, quantization_values[4]); /* LH2 */
|
||||
rfx_quantization_encode_block(buffer + 3584, 256, quantization_values[6]); /* HH2 */
|
||||
rfx_quantization_encode_block(buffer + 3840, 64, quantization_values[2]); /* HL3 */
|
||||
rfx_quantization_encode_block(buffer + 3904, 64, quantization_values[1]); /* LH3 */
|
||||
rfx_quantization_encode_block(buffer + 3868, 64, quantization_values[3]); /* HH3 */
|
||||
rfx_quantization_encode_block(buffer + 4032, 64, quantization_values[0]); /* LL3 */
|
||||
rfx_quantization_encode_block(buffer, 1024, quantization_values[8] - 6); /* HL1 */
|
||||
rfx_quantization_encode_block(buffer + 1024, 1024, quantization_values[7] - 6); /* LH1 */
|
||||
rfx_quantization_encode_block(buffer + 2048, 1024, quantization_values[9] - 6); /* HH1 */
|
||||
rfx_quantization_encode_block(buffer + 3072, 256, quantization_values[5] - 6); /* HL2 */
|
||||
rfx_quantization_encode_block(buffer + 3328, 256, quantization_values[4] - 6); /* LH2 */
|
||||
rfx_quantization_encode_block(buffer + 3584, 256, quantization_values[6] - 6); /* HH2 */
|
||||
rfx_quantization_encode_block(buffer + 3840, 64, quantization_values[2] - 6); /* HL3 */
|
||||
rfx_quantization_encode_block(buffer + 3904, 64, quantization_values[1] - 6); /* LH3 */
|
||||
rfx_quantization_encode_block(buffer + 3868, 64, quantization_values[3] - 6); /* HH3 */
|
||||
rfx_quantization_encode_block(buffer + 4032, 64, quantization_values[0] - 6); /* LL3 */
|
||||
|
||||
/* The coefficients are scaled by << 5 at RGB->YCbCr phase, so we round it back here */
|
||||
rfx_quantization_encode_block(buffer, 4096, 5);
|
||||
}
|
||||
|
@ -42,11 +42,10 @@
|
||||
#define GetBits(nBits) rfx_bitstream_get_bits(bs, nBits)
|
||||
|
||||
/* From current output pointer, write "value", check and update buffer_size */
|
||||
/* The value is represented as 11.5 fixed-point number */
|
||||
#define WriteValue(value) \
|
||||
{ \
|
||||
if (buffer_size > 0) \
|
||||
*dst++ = ((value) << 5); \
|
||||
*dst++ = (value); \
|
||||
buffer_size--; \
|
||||
}
|
||||
|
||||
@ -232,13 +231,11 @@ int rfx_rlgr_decode(RLGR_MODE mode, const uint8* data, int data_size, sint16* bu
|
||||
}
|
||||
|
||||
/* Returns the next coefficient (a signed int) to encode, from the input stream */
|
||||
/* The coefficients are scaled by << 5 at RGB->YCbCr phase, so we round it back here */
|
||||
#define GetNextInput(_n) \
|
||||
{ \
|
||||
if (data_size > 0) \
|
||||
{ \
|
||||
_n = *data++; \
|
||||
_n = ((_n) + (1 << 4)) >> 5; \
|
||||
data_size--; \
|
||||
} \
|
||||
else \
|
||||
|
@ -218,17 +218,16 @@ static void rfx_encode_rgb_to_ycbcr_sse2(sint16* y_r_buffer, sint16* cb_g_buffer
|
||||
static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
rfx_quantization_decode_block_sse2(sint16* buffer, const int buffer_size, const uint32 factor)
|
||||
{
|
||||
int shift = factor-6;
|
||||
if (shift <= 0)
|
||||
if (factor == 0)
|
||||
return;
|
||||
|
||||
|
||||
__m128i a;
|
||||
__m128i * ptr = (__m128i*) buffer;
|
||||
__m128i * buf_end = (__m128i*) (buffer + buffer_size);
|
||||
do
|
||||
{
|
||||
a = _mm_load_si128(ptr);
|
||||
a = _mm_slli_epi16(a, shift);
|
||||
a = _mm_slli_epi16(a, factor);
|
||||
_mm_store_si128(ptr, a);
|
||||
|
||||
ptr++;
|
||||
@ -239,23 +238,24 @@ static void rfx_quantization_decode_sse2(sint16* buffer, const uint32* quantizat
|
||||
{
|
||||
_mm_prefetch_buffer((char*) buffer, 4096 * sizeof(sint16));
|
||||
|
||||
rfx_quantization_decode_block_sse2(buffer, 1024, quantization_values[8]); /* HL1 */
|
||||
rfx_quantization_decode_block_sse2(buffer + 1024, 1024, quantization_values[7]); /* LH1 */
|
||||
rfx_quantization_decode_block_sse2(buffer + 2048, 1024, quantization_values[9]); /* HH1 */
|
||||
rfx_quantization_decode_block_sse2(buffer + 3072, 256, quantization_values[5]); /* HL2 */
|
||||
rfx_quantization_decode_block_sse2(buffer + 3328, 256, quantization_values[4]); /* LH2 */
|
||||
rfx_quantization_decode_block_sse2(buffer + 3584, 256, quantization_values[6]); /* HH2 */
|
||||
rfx_quantization_decode_block_sse2(buffer + 3840, 64, quantization_values[2]); /* HL3 */
|
||||
rfx_quantization_decode_block_sse2(buffer + 3904, 64, quantization_values[1]); /* LH3 */
|
||||
rfx_quantization_decode_block_sse2(buffer + 3868, 64, quantization_values[3]); /* HH3 */
|
||||
rfx_quantization_decode_block_sse2(buffer + 4032, 64, quantization_values[0]); /* LL3 */
|
||||
rfx_quantization_decode_block_sse2(buffer, 4096, 5);
|
||||
|
||||
rfx_quantization_decode_block_sse2(buffer, 1024, quantization_values[8] - 6); /* HL1 */
|
||||
rfx_quantization_decode_block_sse2(buffer + 1024, 1024, quantization_values[7] - 6); /* LH1 */
|
||||
rfx_quantization_decode_block_sse2(buffer + 2048, 1024, quantization_values[9] - 6); /* HH1 */
|
||||
rfx_quantization_decode_block_sse2(buffer + 3072, 256, quantization_values[5] - 6); /* HL2 */
|
||||
rfx_quantization_decode_block_sse2(buffer + 3328, 256, quantization_values[4] - 6); /* LH2 */
|
||||
rfx_quantization_decode_block_sse2(buffer + 3584, 256, quantization_values[6] - 6); /* HH2 */
|
||||
rfx_quantization_decode_block_sse2(buffer + 3840, 64, quantization_values[2] - 6); /* HL3 */
|
||||
rfx_quantization_decode_block_sse2(buffer + 3904, 64, quantization_values[1] - 6); /* LH3 */
|
||||
rfx_quantization_decode_block_sse2(buffer + 3868, 64, quantization_values[3] - 6); /* HH3 */
|
||||
rfx_quantization_decode_block_sse2(buffer + 4032, 64, quantization_values[0] - 6); /* LL3 */
|
||||
}
|
||||
|
||||
static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
rfx_quantization_encode_block_sse2(sint16* buffer, const int buffer_size, const uint32 factor)
|
||||
{
|
||||
int shift = factor-6;
|
||||
if (shift <= 0)
|
||||
if (factor == 0)
|
||||
return;
|
||||
|
||||
__m128i a;
|
||||
@ -264,7 +264,7 @@ rfx_quantization_encode_block_sse2(sint16* buffer, const int buffer_size, const
|
||||
do
|
||||
{
|
||||
a = _mm_load_si128(ptr);
|
||||
a = _mm_srai_epi16(a, shift);
|
||||
a = _mm_srai_epi16(a, factor);
|
||||
_mm_store_si128(ptr, a);
|
||||
|
||||
ptr++;
|
||||
@ -275,16 +275,18 @@ static void rfx_quantization_encode_sse2(sint16* buffer, const uint32* quantizat
|
||||
{
|
||||
_mm_prefetch_buffer((char*) buffer, 4096 * sizeof(sint16));
|
||||
|
||||
rfx_quantization_encode_block_sse2(buffer, 1024, quantization_values[8]); /* HL1 */
|
||||
rfx_quantization_encode_block_sse2(buffer + 1024, 1024, quantization_values[7]); /* LH1 */
|
||||
rfx_quantization_encode_block_sse2(buffer + 2048, 1024, quantization_values[9]); /* HH1 */
|
||||
rfx_quantization_encode_block_sse2(buffer + 3072, 256, quantization_values[5]); /* HL2 */
|
||||
rfx_quantization_encode_block_sse2(buffer + 3328, 256, quantization_values[4]); /* LH2 */
|
||||
rfx_quantization_encode_block_sse2(buffer + 3584, 256, quantization_values[6]); /* HH2 */
|
||||
rfx_quantization_encode_block_sse2(buffer + 3840, 64, quantization_values[2]); /* HL3 */
|
||||
rfx_quantization_encode_block_sse2(buffer + 3904, 64, quantization_values[1]); /* LH3 */
|
||||
rfx_quantization_encode_block_sse2(buffer + 3868, 64, quantization_values[3]); /* HH3 */
|
||||
rfx_quantization_encode_block_sse2(buffer + 4032, 64, quantization_values[0]); /* LL3 */
|
||||
rfx_quantization_encode_block_sse2(buffer, 1024, quantization_values[8] - 6); /* HL1 */
|
||||
rfx_quantization_encode_block_sse2(buffer + 1024, 1024, quantization_values[7] - 6); /* LH1 */
|
||||
rfx_quantization_encode_block_sse2(buffer + 2048, 1024, quantization_values[9] - 6); /* HH1 */
|
||||
rfx_quantization_encode_block_sse2(buffer + 3072, 256, quantization_values[5] - 6); /* HL2 */
|
||||
rfx_quantization_encode_block_sse2(buffer + 3328, 256, quantization_values[4] - 6); /* LH2 */
|
||||
rfx_quantization_encode_block_sse2(buffer + 3584, 256, quantization_values[6] - 6); /* HH2 */
|
||||
rfx_quantization_encode_block_sse2(buffer + 3840, 64, quantization_values[2] - 6); /* HL3 */
|
||||
rfx_quantization_encode_block_sse2(buffer + 3904, 64, quantization_values[1] - 6); /* LH3 */
|
||||
rfx_quantization_encode_block_sse2(buffer + 3868, 64, quantization_values[3] - 6); /* HH3 */
|
||||
rfx_quantization_encode_block_sse2(buffer + 4032, 64, quantization_values[0] - 6); /* LL3 */
|
||||
|
||||
rfx_quantization_encode_block_sse2(buffer, 4096, 5);
|
||||
}
|
||||
|
||||
static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
|
Loading…
Reference in New Issue
Block a user