/** * FreeRDP: A Remote Desktop Protocol Implementation * Generic YUV/RGB conversion operations * * Copyright 2014 Marc-Andre Moreau * Copyright 2015-2017 Armin Novak * Copyright 2015-2017 Norbert Federa * Copyright 2015-2017 Vic Lee * Copyright 2015-2017 Thincast Technologies GmbH * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include "prim_internal.h" #include "prim_YUV.h" static pstatus_t general_LumaToYUV444(const BYTE* const WINPR_RESTRICT pSrcRaw[3], const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDstRaw[3], const UINT32 dstStep[3], const RECTANGLE_16* WINPR_RESTRICT roi) { const UINT32 nWidth = roi->right - roi->left; const UINT32 nHeight = roi->bottom - roi->top; const UINT32 halfWidth = (nWidth + 1) / 2; const UINT32 halfHeight = (nHeight + 1) / 2; const UINT32 oddY = 1; const UINT32 evenY = 0; const UINT32 oddX = 1; const UINT32 evenX = 0; const BYTE* pSrc[3] = { pSrcRaw[0] + 1ULL * roi->top * srcStep[0] + roi->left, pSrcRaw[1] + 1ULL * roi->top / 2 * srcStep[1] + roi->left / 2, pSrcRaw[2] + 1ULL * roi->top / 2 * srcStep[2] + roi->left / 2 }; BYTE* pDst[3] = { pDstRaw[0] + 1ULL * roi->top * dstStep[0] + roi->left, pDstRaw[1] + 1ULL * roi->top * dstStep[1] + roi->left, pDstRaw[2] + 1ULL * roi->top * dstStep[2] + roi->left }; /* Y data is already here... */ /* B1 */ for (size_t y = 0; y < nHeight; y++) { const BYTE* Ym = pSrc[0] + y * srcStep[0]; BYTE* pY = pDst[0] + dstStep[0] * y; memcpy(pY, Ym, nWidth); } /* The first half of U, V are already here part of this frame. */ /* B2 and B3 */ for (size_t y = 0; y < halfHeight; y++) { const UINT32 val2y = (2ULL * y + evenY); const UINT32 val2y1 = val2y + oddY; const BYTE* Um = pSrc[1] + y * srcStep[1]; const BYTE* Vm = pSrc[2] + y * srcStep[2]; BYTE* pU = pDst[1] + 1ULL * dstStep[1] * val2y; BYTE* pV = pDst[2] + 1ULL * dstStep[2] * val2y; BYTE* pU1 = pDst[1] + 1ULL * dstStep[1] * val2y1; BYTE* pV1 = pDst[2] + 1ULL * dstStep[2] * val2y1; for (size_t x = 0; x < halfWidth; x++) { const UINT32 val2x = 2UL * x + evenX; const UINT32 val2x1 = val2x + oddX; pU[val2x] = Um[x]; pV[val2x] = Vm[x]; pU[val2x1] = Um[x]; pV[val2x1] = Vm[x]; pU1[val2x] = Um[x]; pV1[val2x] = Vm[x]; pU1[val2x1] = Um[x]; pV1[val2x1] = Vm[x]; } } return PRIMITIVES_SUCCESS; } static pstatus_t general_ChromaFilter(BYTE* WINPR_RESTRICT pDst[3], const UINT32 dstStep[3], const RECTANGLE_16* WINPR_RESTRICT roi) { const UINT32 oddY = 1; const UINT32 evenY = 0; const UINT32 nWidth = roi->right - roi->left; const UINT32 nHeight = roi->bottom - roi->top; const UINT32 halfHeight = (nHeight + 1) / 2; const UINT32 halfWidth = (nWidth + 1) / 2; /* Filter */ for (UINT32 y = roi->top; y < halfHeight + roi->top; y++) { const UINT32 val2y = (y * 2 + evenY); const UINT32 val2y1 = val2y + oddY; BYTE* pU1 = pDst[1] + 1ULL * dstStep[1] * val2y1; BYTE* pV1 = pDst[2] + 1ULL * dstStep[2] * val2y1; BYTE* pU = pDst[1] + 1ULL * dstStep[1] * val2y; BYTE* pV = pDst[2] + 1ULL * dstStep[2] * val2y; if (val2y1 > nHeight) continue; for (UINT32 x = roi->left; x < halfWidth + roi->left; x++) { const UINT32 val2x = (x * 2); const UINT32 val2x1 = val2x + 1; const BYTE inU = pU[val2x]; const BYTE inV = pV[val2x]; const INT32 up = inU * 4; const INT32 vp = inV * 4; INT32 u2020 = 0; INT32 v2020 = 0; if (val2x1 > nWidth) continue; u2020 = up - pU[val2x1] - pU1[val2x] - pU1[val2x1]; v2020 = vp - pV[val2x1] - pV1[val2x] - pV1[val2x1]; pU[val2x] = CONDITIONAL_CLIP(u2020, inU); pV[val2x] = CONDITIONAL_CLIP(v2020, inV); } } return PRIMITIVES_SUCCESS; } static pstatus_t general_ChromaV1ToYUV444(const BYTE* const WINPR_RESTRICT pSrcRaw[3], const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDstRaw[3], const UINT32 dstStep[3], const RECTANGLE_16* WINPR_RESTRICT roi) { const UINT32 mod = 16; UINT32 uY = 0; UINT32 vY = 0; const UINT32 nWidth = roi->right - roi->left; const UINT32 nHeight = roi->bottom - roi->top; const UINT32 halfWidth = (nWidth) / 2; const UINT32 halfHeight = (nHeight) / 2; const UINT32 oddY = 1; const UINT32 evenY = 0; const UINT32 oddX = 1; /* The auxilary frame is aligned to multiples of 16x16. * We need the padded height for B4 and B5 conversion. */ const UINT32 padHeigth = nHeight + 16 - nHeight % 16; const BYTE* pSrc[3] = { pSrcRaw[0] + 1ULL * roi->top * srcStep[0] + roi->left, pSrcRaw[1] + 1ULL * roi->top / 2 * srcStep[1] + roi->left / 2, pSrcRaw[2] + 1ULL * roi->top / 2 * srcStep[2] + roi->left / 2 }; BYTE* pDst[3] = { pDstRaw[0] + 1ULL * roi->top * dstStep[0] + roi->left, pDstRaw[1] + 1ULL * roi->top * dstStep[1] + roi->left, pDstRaw[2] + 1ULL * roi->top * dstStep[2] + roi->left }; /* The second half of U and V is a bit more tricky... */ /* B4 and B5 */ for (size_t y = 0; y < padHeigth; y++) { const BYTE* Ya = pSrc[0] + y * srcStep[0]; BYTE* pX = NULL; if ((y) % mod < (mod + 1) / 2) { const size_t pos = (2 * uY++ + oddY); if (pos >= nHeight) continue; pX = pDst[1] + dstStep[1] * pos; } else { const size_t pos = (2 * vY++ + oddY); if (pos >= nHeight) continue; pX = pDst[2] + dstStep[2] * pos; } memcpy(pX, Ya, nWidth); } /* B6 and B7 */ for (size_t y = 0; y < halfHeight; y++) { const UINT32 val2y = (y * 2ULL + evenY); const BYTE* Ua = pSrc[1] + y * srcStep[1]; const BYTE* Va = pSrc[2] + y * srcStep[2]; BYTE* pU = pDst[1] + 1ULL * dstStep[1] * val2y; BYTE* pV = pDst[2] + 1ULL * dstStep[2] * val2y; for (size_t x = 0; x < halfWidth; x++) { const UINT32 val2x1 = (x * 2 + oddX); pU[val2x1] = Ua[x]; pV[val2x1] = Va[x]; } } /* Filter */ return general_ChromaFilter(pDst, dstStep, roi); } static pstatus_t general_ChromaV2ToYUV444(const BYTE* const WINPR_RESTRICT pSrc[3], const UINT32 srcStep[3], UINT32 nTotalWidth, UINT32 nTotalHeight, BYTE* WINPR_RESTRICT pDst[3], const UINT32 dstStep[3], const RECTANGLE_16* WINPR_RESTRICT roi) { const UINT32 nWidth = roi->right - roi->left; const UINT32 nHeight = roi->bottom - roi->top; const UINT32 halfWidth = (nWidth + 1) / 2; const UINT32 halfHeight = (nHeight + 1) / 2; const UINT32 quaterWidth = (nWidth + 3) / 4; /* B4 and B5: odd UV values for width/2, height */ for (UINT32 y = 0; y < nHeight; y++) { const UINT32 yTop = y + roi->top; const BYTE* pYaU = pSrc[0] + 1ULL * srcStep[0] * yTop + roi->left / 2; const BYTE* pYaV = pYaU + nTotalWidth / 2; BYTE* pU = pDst[1] + 1ULL * dstStep[1] * yTop + roi->left; BYTE* pV = pDst[2] + 1ULL * dstStep[2] * yTop + roi->left; for (size_t x = 0; x < halfWidth; x++) { const UINT32 odd = 2 * x + 1; pU[odd] = *pYaU++; pV[odd] = *pYaV++; } } /* B6 - B9 */ for (size_t y = 0; y < halfHeight; y++) { const BYTE* pUaU = pSrc[1] + srcStep[1] * (y + roi->top / 2) + roi->left / 4; const BYTE* pUaV = pUaU + nTotalWidth / 4; const BYTE* pVaU = pSrc[2] + srcStep[2] * (y + roi->top / 2) + roi->left / 4; const BYTE* pVaV = pVaU + nTotalWidth / 4; BYTE* pU = pDst[1] + 1ULL * dstStep[1] * (2ULL * y + 1 + roi->top) + roi->left; BYTE* pV = pDst[2] + 1ULL * dstStep[2] * (2ULL * y + 1 + roi->top) + roi->left; for (size_t x = 0; x < quaterWidth; x++) { pU[4 * x + 0] = *pUaU++; pV[4 * x + 0] = *pUaV++; pU[4 * x + 2] = *pVaU++; pV[4 * x + 2] = *pVaV++; } } return general_ChromaFilter(pDst, dstStep, roi); } static pstatus_t general_YUV420CombineToYUV444(avc444_frame_type type, const BYTE* const WINPR_RESTRICT pSrc[3], const UINT32 srcStep[3], UINT32 nWidth, UINT32 nHeight, BYTE* WINPR_RESTRICT pDst[3], const UINT32 dstStep[3], const RECTANGLE_16* WINPR_RESTRICT roi) { if (!pSrc || !pSrc[0] || !pSrc[1] || !pSrc[2]) return -1; if (!pDst || !pDst[0] || !pDst[1] || !pDst[2]) return -1; if (!roi) return -1; switch (type) { case AVC444_LUMA: return general_LumaToYUV444(pSrc, srcStep, pDst, dstStep, roi); case AVC444_CHROMAv1: return general_ChromaV1ToYUV444(pSrc, srcStep, pDst, dstStep, roi); case AVC444_CHROMAv2: return general_ChromaV2ToYUV444(pSrc, srcStep, nWidth, nHeight, pDst, dstStep, roi); default: return -1; } } static pstatus_t general_YUV444SplitToYUV420(const BYTE* const WINPR_RESTRICT pSrc[3], const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pMainDst[3], const UINT32 dstMainStep[3], BYTE* WINPR_RESTRICT pAuxDst[3], const UINT32 dstAuxStep[3], const prim_size_t* WINPR_RESTRICT roi) { UINT32 uY = 0; UINT32 vY = 0; UINT32 halfWidth = 0; UINT32 halfHeight = 0; /* The auxilary frame is aligned to multiples of 16x16. * We need the padded height for B4 and B5 conversion. */ const UINT32 padHeigth = roi->height + 16 - roi->height % 16; halfWidth = (roi->width + 1) / 2; halfHeight = (roi->height + 1) / 2; /* B1 */ for (size_t y = 0; y < roi->height; y++) { const BYTE* pSrcY = pSrc[0] + y * srcStep[0]; BYTE* pY = pMainDst[0] + y * dstMainStep[0]; memcpy(pY, pSrcY, roi->width); } /* B2 and B3 */ for (size_t y = 0; y < halfHeight; y++) { const BYTE* pSrcU = pSrc[1] + 2ULL * y * srcStep[1]; const BYTE* pSrcV = pSrc[2] + 2ULL * y * srcStep[2]; const BYTE* pSrcU1 = pSrc[1] + (2ULL * y + 1ULL) * srcStep[1]; const BYTE* pSrcV1 = pSrc[2] + (2ULL * y + 1ULL) * srcStep[2]; BYTE* pU = pMainDst[1] + y * dstMainStep[1]; BYTE* pV = pMainDst[2] + y * dstMainStep[2]; for (size_t x = 0; x < halfWidth; x++) { /* Filter */ const INT32 u = pSrcU[2 * x] + pSrcU[2 * x + 1] + pSrcU1[2 * x] + pSrcU1[2 * x + 1]; const INT32 v = pSrcV[2 * x] + pSrcV[2 * x + 1] + pSrcV1[2 * x] + pSrcV1[2 * x + 1]; pU[x] = CLIP(u / 4L); pV[x] = CLIP(v / 4L); } } /* B4 and B5 */ for (size_t y = 0; y < padHeigth; y++) { BYTE* pY = pAuxDst[0] + y * dstAuxStep[0]; if (y % 16 < 8) { const size_t pos = (2 * uY++ + 1); const BYTE* pSrcU = pSrc[1] + pos * srcStep[1]; if (pos >= roi->height) continue; memcpy(pY, pSrcU, roi->width); } else { const size_t pos = (2 * vY++ + 1); const BYTE* pSrcV = pSrc[2] + pos * srcStep[2]; if (pos >= roi->height) continue; memcpy(pY, pSrcV, roi->width); } } /* B6 and B7 */ for (size_t y = 0; y < halfHeight; y++) { const BYTE* pSrcU = pSrc[1] + 2 * y * srcStep[1]; const BYTE* pSrcV = pSrc[2] + 2 * y * srcStep[2]; BYTE* pU = pAuxDst[1] + y * dstAuxStep[1]; BYTE* pV = pAuxDst[2] + y * dstAuxStep[2]; for (size_t x = 0; x < halfWidth; x++) { pU[x] = pSrcU[2 * x + 1]; pV[x] = pSrcV[2 * x + 1]; } } return PRIMITIVES_SUCCESS; } static pstatus_t general_YUV444ToRGB_8u_P3AC4R_general(const BYTE* const WINPR_RESTRICT pSrc[3], const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst, UINT32 dstStep, UINT32 DstFormat, const prim_size_t* WINPR_RESTRICT roi) { const DWORD formatSize = FreeRDPGetBytesPerPixel(DstFormat); fkt_writePixel writePixel = getPixelWriteFunction(DstFormat, FALSE); WINPR_ASSERT(pSrc); WINPR_ASSERT(pDst); WINPR_ASSERT(roi); const UINT32 nWidth = roi->width; const UINT32 nHeight = roi->height; for (size_t y = 0; y < nHeight; y++) { const BYTE* pY = pSrc[0] + y * srcStep[0]; const BYTE* pU = pSrc[1] + y * srcStep[1]; const BYTE* pV = pSrc[2] + y * srcStep[2]; BYTE* pRGB = pDst + y * dstStep; for (size_t x = 0; x < nWidth; x++) { const BYTE Y = pY[x]; const BYTE U = pU[x]; const BYTE V = pV[x]; const BYTE r = YUV2R(Y, U, V); const BYTE g = YUV2G(Y, U, V); const BYTE b = YUV2B(Y, U, V); pRGB = writePixel(pRGB, formatSize, DstFormat, r, g, b, 0); } } return PRIMITIVES_SUCCESS; } static pstatus_t general_YUV444ToRGB_8u_P3AC4R_BGRX(const BYTE* const WINPR_RESTRICT pSrc[3], const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst, UINT32 dstStep, UINT32 DstFormat, const prim_size_t* WINPR_RESTRICT roi) { const DWORD formatSize = FreeRDPGetBytesPerPixel(DstFormat); WINPR_ASSERT(pSrc); WINPR_ASSERT(pDst); WINPR_ASSERT(roi); const UINT32 nWidth = roi->width; const UINT32 nHeight = roi->height; for (size_t y = 0; y < nHeight; y++) { const BYTE* pY = pSrc[0] + y * srcStep[0]; const BYTE* pU = pSrc[1] + y * srcStep[1]; const BYTE* pV = pSrc[2] + y * srcStep[2]; BYTE* pRGB = pDst + y * dstStep; for (size_t x = 0; x < nWidth; x++) { const BYTE Y = pY[x]; const BYTE U = pU[x]; const BYTE V = pV[x]; const BYTE r = YUV2R(Y, U, V); const BYTE g = YUV2G(Y, U, V); const BYTE b = YUV2B(Y, U, V); pRGB = writePixelBGRX(pRGB, formatSize, DstFormat, r, g, b, 0); } } return PRIMITIVES_SUCCESS; } static pstatus_t general_YUV444ToRGB_8u_P3AC4R(const BYTE* const WINPR_RESTRICT pSrc[3], const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst, UINT32 dstStep, UINT32 DstFormat, const prim_size_t* WINPR_RESTRICT roi) { switch (DstFormat) { case PIXEL_FORMAT_BGRA32: case PIXEL_FORMAT_BGRX32: return general_YUV444ToRGB_8u_P3AC4R_BGRX(pSrc, srcStep, pDst, dstStep, DstFormat, roi); default: return general_YUV444ToRGB_8u_P3AC4R_general(pSrc, srcStep, pDst, dstStep, DstFormat, roi); } } /** * | R | ( | 256 0 403 | | Y | ) * | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8 * | B | ( | 256 475 0 | | V - 128 | ) */ static pstatus_t general_YUV420ToRGB_8u_P3AC4R(const BYTE* const WINPR_RESTRICT pSrc[3], const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst, UINT32 dstStep, UINT32 DstFormat, const prim_size_t* WINPR_RESTRICT roi) { UINT32 dstPad = 0; UINT32 srcPad[3]; BYTE Y = 0; BYTE U = 0; BYTE V = 0; UINT32 halfWidth = 0; UINT32 halfHeight = 0; const BYTE* pY = NULL; const BYTE* pU = NULL; const BYTE* pV = NULL; BYTE* pRGB = pDst; UINT32 nWidth = 0; UINT32 nHeight = 0; UINT32 lastRow = 0; UINT32 lastCol = 0; const DWORD formatSize = FreeRDPGetBytesPerPixel(DstFormat); fkt_writePixel writePixel = getPixelWriteFunction(DstFormat, FALSE); pY = pSrc[0]; pU = pSrc[1]; pV = pSrc[2]; lastCol = roi->width & 0x01; lastRow = roi->height & 0x01; nWidth = (roi->width + 1) & ~0x0001; nHeight = (roi->height + 1) & ~0x0001; halfWidth = nWidth / 2; halfHeight = nHeight / 2; srcPad[0] = (srcStep[0] - nWidth); srcPad[1] = (srcStep[1] - halfWidth); srcPad[2] = (srcStep[2] - halfWidth); dstPad = (dstStep - (nWidth * 4)); for (UINT32 y = 0; y < halfHeight;) { if (++y == halfHeight) lastRow <<= 1; for (UINT32 x = 0; x < halfWidth;) { BYTE r = 0; BYTE g = 0; BYTE b = 0; if (++x == halfWidth) lastCol <<= 1; U = *pU++; V = *pV++; /* 1st pixel */ Y = *pY++; r = YUV2R(Y, U, V); g = YUV2G(Y, U, V); b = YUV2B(Y, U, V); pRGB = writePixel(pRGB, formatSize, DstFormat, r, g, b, 0); /* 2nd pixel */ if (!(lastCol & 0x02)) { Y = *pY++; r = YUV2R(Y, U, V); g = YUV2G(Y, U, V); b = YUV2B(Y, U, V); pRGB = writePixel(pRGB, formatSize, DstFormat, r, g, b, 0); } else { pY++; pRGB += formatSize; lastCol >>= 1; } } pY += srcPad[0]; pU -= halfWidth; pV -= halfWidth; pRGB += dstPad; if (lastRow & 0x02) break; for (UINT32 x = 0; x < halfWidth;) { BYTE r = 0; BYTE g = 0; BYTE b = 0; if (++x == halfWidth) lastCol <<= 1; U = *pU++; V = *pV++; /* 3rd pixel */ Y = *pY++; r = YUV2R(Y, U, V); g = YUV2G(Y, U, V); b = YUV2B(Y, U, V); pRGB = writePixel(pRGB, formatSize, DstFormat, r, g, b, 0); /* 4th pixel */ if (!(lastCol & 0x02)) { Y = *pY++; r = YUV2R(Y, U, V); g = YUV2G(Y, U, V); b = YUV2B(Y, U, V); pRGB = writePixel(pRGB, formatSize, DstFormat, r, g, b, 0); } else { pY++; pRGB += formatSize; lastCol >>= 1; } } pY += srcPad[0]; pU += srcPad[1]; pV += srcPad[2]; pRGB += dstPad; } return PRIMITIVES_SUCCESS; } /** * | Y | ( | 54 183 18 | | R | ) | 0 | * | U | = ( | -29 -99 128 | | G | ) >> 8 + | 128 | * | V | ( | 128 -116 -12 | | B | ) | 128 | */ static INLINE BYTE RGB2Y(BYTE R, BYTE G, BYTE B) { return (54 * R + 183 * G + 18 * B) >> 8; } static INLINE BYTE RGB2U(BYTE R, BYTE G, BYTE B) { return ((-29 * R - 99 * G + 128 * B) >> 8) + 128; } static INLINE BYTE RGB2V(INT32 R, INT32 G, INT32 B) { return ((128 * R - 116 * G - 12 * B) >> 8) + 128; } static pstatus_t general_RGBToYUV444_8u_P3AC4R(const BYTE* WINPR_RESTRICT pSrc, UINT32 SrcFormat, const UINT32 srcStep, BYTE* WINPR_RESTRICT pDst[3], UINT32 dstStep[3], const prim_size_t* WINPR_RESTRICT roi) { const UINT32 bpp = FreeRDPGetBytesPerPixel(SrcFormat); UINT32 nWidth = 0; UINT32 nHeight = 0; nWidth = roi->width; nHeight = roi->height; for (size_t y = 0; y < nHeight; y++) { const BYTE* pRGB = pSrc + y * srcStep; BYTE* pY = pDst[0] + y * dstStep[0]; BYTE* pU = pDst[1] + y * dstStep[1]; BYTE* pV = pDst[2] + y * dstStep[2]; for (size_t x = 0; x < nWidth; x++) { BYTE B = 0; BYTE G = 0; BYTE R = 0; const UINT32 color = FreeRDPReadColor(&pRGB[x * bpp], SrcFormat); FreeRDPSplitColor(color, SrcFormat, &R, &G, &B, NULL, NULL); pY[x] = RGB2Y(R, G, B); pU[x] = RGB2U(R, G, B); pV[x] = RGB2V(R, G, B); } } return PRIMITIVES_SUCCESS; } static INLINE pstatus_t general_RGBToYUV420_BGRX(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcStep, BYTE* WINPR_RESTRICT pDst[3], const UINT32 dstStep[3], const prim_size_t* WINPR_RESTRICT roi) { size_t i = 0; size_t x1 = 0; size_t x2 = 4; size_t x3 = srcStep; size_t x4 = srcStep + 4; size_t y1 = 0; size_t y2 = 1; size_t y3 = dstStep[0]; size_t y4 = dstStep[0] + 1; UINT32 max_x = roi->width - 1; UINT32 max_y = roi->height - 1; for (size_t y = i = 0; y < roi->height; y += 2, i++) { const BYTE* src = pSrc + y * srcStep; BYTE* ydst = pDst[0] + y * dstStep[0]; BYTE* udst = pDst[1] + i * dstStep[1]; BYTE* vdst = pDst[2] + i * dstStep[2]; for (size_t x = 0; x < roi->width; x += 2) { BYTE R = 0; BYTE G = 0; BYTE B = 0; INT32 Ra = 0; INT32 Ga = 0; INT32 Ba = 0; /* row 1, pixel 1 */ Ba = B = *(src + x1 + 0); Ga = G = *(src + x1 + 1); Ra = R = *(src + x1 + 2); ydst[y1] = RGB2Y(R, G, B); if (x < max_x) { /* row 1, pixel 2 */ Ba += B = *(src + x2 + 0); Ga += G = *(src + x2 + 1); Ra += R = *(src + x2 + 2); ydst[y2] = RGB2Y(R, G, B); } if (y < max_y) { /* row 2, pixel 1 */ Ba += B = *(src + x3 + 0); Ga += G = *(src + x3 + 1); Ra += R = *(src + x3 + 2); ydst[y3] = RGB2Y(R, G, B); if (x < max_x) { /* row 2, pixel 2 */ Ba += B = *(src + x4 + 0); Ga += G = *(src + x4 + 1); Ra += R = *(src + x4 + 2); ydst[y4] = RGB2Y(R, G, B); } } Ba >>= 2; Ga >>= 2; Ra >>= 2; *udst++ = RGB2U(Ra, Ga, Ba); *vdst++ = RGB2V(Ra, Ga, Ba); ydst += 2; src += 8; } } return PRIMITIVES_SUCCESS; } static INLINE pstatus_t general_RGBToYUV420_RGBX(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcStep, BYTE* WINPR_RESTRICT pDst[3], const UINT32 dstStep[3], const prim_size_t* WINPR_RESTRICT roi) { size_t x1 = 0; size_t x2 = 4; size_t x3 = srcStep; size_t x4 = srcStep + 4; size_t y1 = 0; size_t y2 = 1; size_t y3 = dstStep[0]; size_t y4 = dstStep[0] + 1; UINT32 max_x = roi->width - 1; UINT32 max_y = roi->height - 1; for (size_t y = 0, i = 0; y < roi->height; y += 2, i++) { const BYTE* src = pSrc + y * srcStep; BYTE* ydst = pDst[0] + y * dstStep[0]; BYTE* udst = pDst[1] + i * dstStep[1]; BYTE* vdst = pDst[2] + i * dstStep[2]; for (UINT32 x = 0; x < roi->width; x += 2) { BYTE R = 0; BYTE G = 0; BYTE B = 0; INT32 Ra = 0; INT32 Ga = 0; INT32 Ba = 0; /* row 1, pixel 1 */ Ra = R = *(src + x1 + 0); Ga = G = *(src + x1 + 1); Ba = B = *(src + x1 + 2); ydst[y1] = RGB2Y(R, G, B); if (x < max_x) { /* row 1, pixel 2 */ Ra += R = *(src + x2 + 0); Ga += G = *(src + x2 + 1); Ba += B = *(src + x2 + 2); ydst[y2] = RGB2Y(R, G, B); } if (y < max_y) { /* row 2, pixel 1 */ Ra += R = *(src + x3 + 0); Ga += G = *(src + x3 + 1); Ba += B = *(src + x3 + 2); ydst[y3] = RGB2Y(R, G, B); if (x < max_x) { /* row 2, pixel 2 */ Ra += R = *(src + x4 + 0); Ga += G = *(src + x4 + 1); Ba += B = *(src + x4 + 2); ydst[y4] = RGB2Y(R, G, B); } } Ba >>= 2; Ga >>= 2; Ra >>= 2; *udst++ = RGB2U(Ra, Ga, Ba); *vdst++ = RGB2V(Ra, Ga, Ba); ydst += 2; src += 8; } } return PRIMITIVES_SUCCESS; } static INLINE pstatus_t general_RGBToYUV420_ANY(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcFormat, UINT32 srcStep, BYTE* WINPR_RESTRICT pDst[3], const UINT32 dstStep[3], const prim_size_t* WINPR_RESTRICT roi) { const UINT32 bpp = FreeRDPGetBytesPerPixel(srcFormat); size_t x1 = 0; size_t x2 = bpp; size_t x3 = srcStep; size_t x4 = srcStep + bpp; size_t y1 = 0; size_t y2 = 1; size_t y3 = dstStep[0]; size_t y4 = dstStep[0] + 1; UINT32 max_x = roi->width - 1; UINT32 max_y = roi->height - 1; for (size_t y = 0, i = 0; y < roi->height; y += 2, i++) { const BYTE* src = pSrc + y * srcStep; BYTE* ydst = pDst[0] + y * dstStep[0]; BYTE* udst = pDst[1] + i * dstStep[1]; BYTE* vdst = pDst[2] + i * dstStep[2]; for (size_t x = 0; x < roi->width; x += 2) { BYTE R = 0; BYTE G = 0; BYTE B = 0; INT32 Ra = 0; INT32 Ga = 0; INT32 Ba = 0; UINT32 color = 0; /* row 1, pixel 1 */ color = FreeRDPReadColor(src + x1, srcFormat); FreeRDPSplitColor(color, srcFormat, &R, &G, &B, NULL, NULL); Ra = R; Ga = G; Ba = B; ydst[y1] = RGB2Y(R, G, B); if (x < max_x) { /* row 1, pixel 2 */ color = FreeRDPReadColor(src + x2, srcFormat); FreeRDPSplitColor(color, srcFormat, &R, &G, &B, NULL, NULL); Ra += R; Ga += G; Ba += B; ydst[y2] = RGB2Y(R, G, B); } if (y < max_y) { /* row 2, pixel 1 */ color = FreeRDPReadColor(src + x3, srcFormat); FreeRDPSplitColor(color, srcFormat, &R, &G, &B, NULL, NULL); Ra += R; Ga += G; Ba += B; ydst[y3] = RGB2Y(R, G, B); if (x < max_x) { /* row 2, pixel 2 */ color = FreeRDPReadColor(src + x4, srcFormat); FreeRDPSplitColor(color, srcFormat, &R, &G, &B, NULL, NULL); Ra += R; Ga += G; Ba += B; ydst[y4] = RGB2Y(R, G, B); } } Ra >>= 2; Ga >>= 2; Ba >>= 2; *udst++ = RGB2U(Ra, Ga, Ba); *vdst++ = RGB2V(Ra, Ga, Ba); ydst += 2; src += 2ULL * bpp; } } return PRIMITIVES_SUCCESS; } static pstatus_t general_RGBToYUV420_8u_P3AC4R(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcFormat, UINT32 srcStep, BYTE* WINPR_RESTRICT pDst[3], const UINT32 dstStep[3], const prim_size_t* WINPR_RESTRICT roi) { switch (srcFormat) { case PIXEL_FORMAT_BGRA32: case PIXEL_FORMAT_BGRX32: return general_RGBToYUV420_BGRX(pSrc, srcStep, pDst, dstStep, roi); case PIXEL_FORMAT_RGBA32: case PIXEL_FORMAT_RGBX32: return general_RGBToYUV420_RGBX(pSrc, srcStep, pDst, dstStep, roi); default: return general_RGBToYUV420_ANY(pSrc, srcFormat, srcStep, pDst, dstStep, roi); } } static INLINE void general_RGBToAVC444YUV_BGRX_DOUBLE_ROW( const BYTE* WINPR_RESTRICT srcEven, const BYTE* WINPR_RESTRICT srcOdd, BYTE* WINPR_RESTRICT b1Even, BYTE* WINPR_RESTRICT b1Odd, BYTE* WINPR_RESTRICT b2, BYTE* WINPR_RESTRICT b3, BYTE* WINPR_RESTRICT b4, BYTE* WINPR_RESTRICT b5, BYTE* WINPR_RESTRICT b6, BYTE* WINPR_RESTRICT b7, UINT32 width) { for (UINT32 x = 0; x < width; x += 2) { const BOOL lastX = (x + 1) >= width; BYTE Y1e = 0; BYTE Y2e = 0; BYTE U1e = 0; BYTE V1e = 0; BYTE U2e = 0; BYTE V2e = 0; BYTE Y1o = 0; BYTE Y2o = 0; BYTE U1o = 0; BYTE V1o = 0; BYTE U2o = 0; BYTE V2o = 0; /* Read 4 pixels, 2 from even, 2 from odd lines */ { const BYTE b = *srcEven++; const BYTE g = *srcEven++; const BYTE r = *srcEven++; srcEven++; Y1e = Y2e = Y1o = Y2o = RGB2Y(r, g, b); U1e = U2e = U1o = U2o = RGB2U(r, g, b); V1e = V2e = V1o = V2o = RGB2V(r, g, b); } if (!lastX) { const BYTE b = *srcEven++; const BYTE g = *srcEven++; const BYTE r = *srcEven++; srcEven++; Y2e = RGB2Y(r, g, b); U2e = RGB2U(r, g, b); V2e = RGB2V(r, g, b); } if (b1Odd) { const BYTE b = *srcOdd++; const BYTE g = *srcOdd++; const BYTE r = *srcOdd++; srcOdd++; Y1o = Y2o = RGB2Y(r, g, b); U1o = U2o = RGB2U(r, g, b); V1o = V2o = RGB2V(r, g, b); } if (b1Odd && !lastX) { const BYTE b = *srcOdd++; const BYTE g = *srcOdd++; const BYTE r = *srcOdd++; srcOdd++; Y2o = RGB2Y(r, g, b); U2o = RGB2U(r, g, b); V2o = RGB2V(r, g, b); } /* We have 4 Y pixels, so store them. */ *b1Even++ = Y1e; *b1Even++ = Y2e; if (b1Odd) { *b1Odd++ = Y1o; *b1Odd++ = Y2o; } /* 2x 2y pixel in luma UV plane use averaging */ { const BYTE Uavg = ((UINT16)U1e + (UINT16)U2e + (UINT16)U1o + (UINT16)U2o) / 4; const BYTE Vavg = ((UINT16)V1e + (UINT16)V2e + (UINT16)V1o + (UINT16)V2o) / 4; *b2++ = Uavg; *b3++ = Vavg; } /* UV from 2x, 2y+1 */ if (b1Odd) { *b4++ = U1o; *b5++ = V1o; if (!lastX) { *b4++ = U2o; *b5++ = V2o; } } /* UV from 2x+1, 2y */ if (!lastX) { *b6++ = U2e; *b7++ = V2e; } } } static INLINE pstatus_t general_RGBToAVC444YUV_BGRX(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcStep, BYTE* WINPR_RESTRICT pDst1[3], const UINT32 dst1Step[3], BYTE* WINPR_RESTRICT pDst2[3], const UINT32 dst2Step[3], const prim_size_t* WINPR_RESTRICT roi) { /** * Note: * Read information in function general_RGBToAVC444YUV_ANY below ! */ const BYTE* pMaxSrc = pSrc + 1ULL * (roi->height - 1) * srcStep; for (size_t y = 0; y < roi->height; y += 2) { const BOOL last = (y >= (roi->height - 1)); const BYTE* srcEven = y < roi->height ? pSrc + y * srcStep : pMaxSrc; const BYTE* srcOdd = !last ? pSrc + (y + 1) * srcStep : pMaxSrc; const UINT32 i = y >> 1; const UINT32 n = (i & ~7) + i; BYTE* b1Even = pDst1[0] + y * dst1Step[0]; BYTE* b1Odd = !last ? (b1Even + dst1Step[0]) : NULL; BYTE* b2 = pDst1[1] + (y / 2) * dst1Step[1]; BYTE* b3 = pDst1[2] + (y / 2) * dst1Step[2]; BYTE* b4 = pDst2[0] + 1ULL * dst2Step[0] * n; BYTE* b5 = b4 + 8ULL * dst2Step[0]; BYTE* b6 = pDst2[1] + (y / 2) * dst2Step[1]; BYTE* b7 = pDst2[2] + (y / 2) * dst2Step[2]; general_RGBToAVC444YUV_BGRX_DOUBLE_ROW(srcEven, srcOdd, b1Even, b1Odd, b2, b3, b4, b5, b6, b7, roi->width); } return PRIMITIVES_SUCCESS; } static INLINE void general_RGBToAVC444YUV_RGBX_DOUBLE_ROW( const BYTE* WINPR_RESTRICT srcEven, const BYTE* WINPR_RESTRICT srcOdd, BYTE* WINPR_RESTRICT b1Even, BYTE* WINPR_RESTRICT b1Odd, BYTE* WINPR_RESTRICT b2, BYTE* WINPR_RESTRICT b3, BYTE* WINPR_RESTRICT b4, BYTE* WINPR_RESTRICT b5, BYTE* WINPR_RESTRICT b6, BYTE* WINPR_RESTRICT b7, UINT32 width) { for (UINT32 x = 0; x < width; x += 2) { const BOOL lastX = (x + 1) >= width; BYTE Y1e = 0; BYTE Y2e = 0; BYTE U1e = 0; BYTE V1e = 0; BYTE U2e = 0; BYTE V2e = 0; BYTE Y1o = 0; BYTE Y2o = 0; BYTE U1o = 0; BYTE V1o = 0; BYTE U2o = 0; BYTE V2o = 0; /* Read 4 pixels, 2 from even, 2 from odd lines */ { const BYTE r = *srcEven++; const BYTE g = *srcEven++; const BYTE b = *srcEven++; srcEven++; Y1e = Y2e = Y1o = Y2o = RGB2Y(r, g, b); U1e = U2e = U1o = U2o = RGB2U(r, g, b); V1e = V2e = V1o = V2o = RGB2V(r, g, b); } if (!lastX) { const BYTE r = *srcEven++; const BYTE g = *srcEven++; const BYTE b = *srcEven++; srcEven++; Y2e = RGB2Y(r, g, b); U2e = RGB2U(r, g, b); V2e = RGB2V(r, g, b); } if (b1Odd) { const BYTE r = *srcOdd++; const BYTE g = *srcOdd++; const BYTE b = *srcOdd++; srcOdd++; Y1o = Y2o = RGB2Y(r, g, b); U1o = U2o = RGB2U(r, g, b); V1o = V2o = RGB2V(r, g, b); } if (b1Odd && !lastX) { const BYTE r = *srcOdd++; const BYTE g = *srcOdd++; const BYTE b = *srcOdd++; srcOdd++; Y2o = RGB2Y(r, g, b); U2o = RGB2U(r, g, b); V2o = RGB2V(r, g, b); } /* We have 4 Y pixels, so store them. */ *b1Even++ = Y1e; *b1Even++ = Y2e; if (b1Odd) { *b1Odd++ = Y1o; *b1Odd++ = Y2o; } /* 2x 2y pixel in luma UV plane use averaging */ { const BYTE Uavg = ((UINT16)U1e + (UINT16)U2e + (UINT16)U1o + (UINT16)U2o) / 4; const BYTE Vavg = ((UINT16)V1e + (UINT16)V2e + (UINT16)V1o + (UINT16)V2o) / 4; *b2++ = Uavg; *b3++ = Vavg; } /* UV from 2x, 2y+1 */ if (b1Odd) { *b4++ = U1o; *b5++ = V1o; if (!lastX) { *b4++ = U2o; *b5++ = V2o; } } /* UV from 2x+1, 2y */ if (!lastX) { *b6++ = U2e; *b7++ = V2e; } } } static INLINE pstatus_t general_RGBToAVC444YUV_RGBX(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcStep, BYTE* WINPR_RESTRICT pDst1[3], const UINT32 dst1Step[3], BYTE* WINPR_RESTRICT pDst2[3], const UINT32 dst2Step[3], const prim_size_t* WINPR_RESTRICT roi) { /** * Note: * Read information in function general_RGBToAVC444YUV_ANY below ! */ const BYTE* pMaxSrc = pSrc + 1ULL * (roi->height - 1) * srcStep; for (size_t y = 0; y < roi->height; y += 2) { const BOOL last = (y >= (roi->height - 1)); const BYTE* srcEven = y < roi->height ? pSrc + y * srcStep : pMaxSrc; const BYTE* srcOdd = !last ? pSrc + (y + 1) * srcStep : pMaxSrc; const UINT32 i = y >> 1; const UINT32 n = (i & ~7) + i; BYTE* b1Even = pDst1[0] + y * dst1Step[0]; BYTE* b1Odd = !last ? (b1Even + dst1Step[0]) : NULL; BYTE* b2 = pDst1[1] + (y / 2) * dst1Step[1]; BYTE* b3 = pDst1[2] + (y / 2) * dst1Step[2]; BYTE* b4 = pDst2[0] + 1ULL * dst2Step[0] * n; BYTE* b5 = b4 + 8ULL * dst2Step[0]; BYTE* b6 = pDst2[1] + (y / 2) * dst2Step[1]; BYTE* b7 = pDst2[2] + (y / 2) * dst2Step[2]; general_RGBToAVC444YUV_RGBX_DOUBLE_ROW(srcEven, srcOdd, b1Even, b1Odd, b2, b3, b4, b5, b6, b7, roi->width); } return PRIMITIVES_SUCCESS; } static INLINE void general_RGBToAVC444YUV_ANY_DOUBLE_ROW( const BYTE* WINPR_RESTRICT srcEven, const BYTE* WINPR_RESTRICT srcOdd, UINT32 srcFormat, BYTE* WINPR_RESTRICT b1Even, BYTE* WINPR_RESTRICT b1Odd, BYTE* WINPR_RESTRICT b2, BYTE* WINPR_RESTRICT b3, BYTE* WINPR_RESTRICT b4, BYTE* WINPR_RESTRICT b5, BYTE* WINPR_RESTRICT b6, BYTE* WINPR_RESTRICT b7, UINT32 width) { const UINT32 bpp = FreeRDPGetBytesPerPixel(srcFormat); for (UINT32 x = 0; x < width; x += 2) { const BOOL lastX = (x + 1) >= width; BYTE Y1e = 0; BYTE Y2e = 0; BYTE U1e = 0; BYTE V1e = 0; BYTE U2e = 0; BYTE V2e = 0; BYTE Y1o = 0; BYTE Y2o = 0; BYTE U1o = 0; BYTE V1o = 0; BYTE U2o = 0; BYTE V2o = 0; /* Read 4 pixels, 2 from even, 2 from odd lines */ { BYTE r = 0; BYTE g = 0; BYTE b = 0; const UINT32 color = FreeRDPReadColor(srcEven, srcFormat); srcEven += bpp; FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL); Y1e = Y2e = Y1o = Y2o = RGB2Y(r, g, b); U1e = U2e = U1o = U2o = RGB2U(r, g, b); V1e = V2e = V1o = V2o = RGB2V(r, g, b); } if (!lastX) { BYTE r = 0; BYTE g = 0; BYTE b = 0; const UINT32 color = FreeRDPReadColor(srcEven, srcFormat); srcEven += bpp; FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL); Y2e = RGB2Y(r, g, b); U2e = RGB2U(r, g, b); V2e = RGB2V(r, g, b); } if (b1Odd) { BYTE r = 0; BYTE g = 0; BYTE b = 0; const UINT32 color = FreeRDPReadColor(srcOdd, srcFormat); srcOdd += bpp; FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL); Y1o = Y2o = RGB2Y(r, g, b); U1o = U2o = RGB2U(r, g, b); V1o = V2o = RGB2V(r, g, b); } if (b1Odd && !lastX) { BYTE r = 0; BYTE g = 0; BYTE b = 0; const UINT32 color = FreeRDPReadColor(srcOdd, srcFormat); srcOdd += bpp; FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL); Y2o = RGB2Y(r, g, b); U2o = RGB2U(r, g, b); V2o = RGB2V(r, g, b); } /* We have 4 Y pixels, so store them. */ *b1Even++ = Y1e; *b1Even++ = Y2e; if (b1Odd) { *b1Odd++ = Y1o; *b1Odd++ = Y2o; } /* 2x 2y pixel in luma UV plane use averaging */ { const BYTE Uavg = ((UINT16)U1e + (UINT16)U2e + (UINT16)U1o + (UINT16)U2o) / 4; const BYTE Vavg = ((UINT16)V1e + (UINT16)V2e + (UINT16)V1o + (UINT16)V2o) / 4; *b2++ = Uavg; *b3++ = Vavg; } /* UV from 2x, 2y+1 */ if (b1Odd) { *b4++ = U1o; *b5++ = V1o; if (!lastX) { *b4++ = U2o; *b5++ = V2o; } } /* UV from 2x+1, 2y */ if (!lastX) { *b6++ = U2e; *b7++ = V2e; } } } static INLINE pstatus_t general_RGBToAVC444YUV_ANY( const BYTE* WINPR_RESTRICT pSrc, UINT32 srcFormat, UINT32 srcStep, BYTE* WINPR_RESTRICT pDst1[3], const UINT32 dst1Step[3], BYTE* WINPR_RESTRICT pDst2[3], const UINT32 dst2Step[3], const prim_size_t* WINPR_RESTRICT roi) { /** * Note: According to [MS-RDPEGFX 2.2.4.4 RFX_AVC420_BITMAP_STREAM] the * width and height of the MPEG-4 AVC/H.264 codec bitstream MUST be aligned * to a multiple of 16. * Hence the passed destination YUV420/CHROMA420 buffers must have been * allocated accordingly !! */ /** * [MS-RDPEGFX 3.3.8.3.2 YUV420p Stream Combination] defines the following "Bx areas": * * YUV420 frame (main view): * B1: From Y444 all pixels * B2: From U444 all pixels in even rows with even columns * B3: From V444 all pixels in even rows with even columns * * Chroma420 frame (auxillary view): * B45: From U444 and V444 all pixels from all odd rows * (The odd U444 and V444 rows must be interleaved in 8-line blocks in B45 !!!) * B6: From U444 all pixels in even rows with odd columns * B7: From V444 all pixels in even rows with odd columns * * Microsoft's horrible unclear description in MS-RDPEGFX translated to pseudo code looks like * this: * * for (y = 0; y < fullHeight; y++) * { * for (x = 0; x < fullWidth; x++) * { * B1[x,y] = Y444[x,y]; * } * } * * for (y = 0; y < halfHeight; y++) * { * for (x = 0; x < halfWidth; x++) * { * B2[x,y] = U444[2 * x, 2 * y]; * B3[x,y] = V444[2 * x, 2 * y]; * B6[x,y] = U444[2 * x + 1, 2 * y]; * B7[x,y] = V444[2 * x + 1, 2 * y]; * } * } * * for (y = 0; y < halfHeight; y++) * { * yU = (y / 8) * 16; // identify first row of correct 8-line U block in B45 * yU += (y % 8); // add offset rows in destination block * yV = yU + 8; // the corresponding v line is always 8 rows ahead * * for (x = 0; x < fullWidth; x++) * { * B45[x,yU] = U444[x, 2 * y + 1]; * B45[x,yV] = V444[x, 2 * y + 1]; * } * } * */ const BYTE* pMaxSrc = pSrc + 1ULL * (roi->height - 1) * srcStep; for (size_t y = 0; y < roi->height; y += 2) { const BOOL last = (y >= (roi->height - 1)); const BYTE* srcEven = y < roi->height ? pSrc + y * srcStep : pMaxSrc; const BYTE* srcOdd = !last ? pSrc + (y + 1) * srcStep : pMaxSrc; const UINT32 i = y >> 1; const UINT32 n = (i & ~7) + i; BYTE* b1Even = pDst1[0] + y * dst1Step[0]; BYTE* b1Odd = !last ? (b1Even + dst1Step[0]) : NULL; BYTE* b2 = pDst1[1] + (y / 2) * dst1Step[1]; BYTE* b3 = pDst1[2] + (y / 2) * dst1Step[2]; BYTE* b4 = pDst2[0] + 1ULL * dst2Step[0] * n; BYTE* b5 = b4 + 8ULL * dst2Step[0]; BYTE* b6 = pDst2[1] + (y / 2) * dst2Step[1]; BYTE* b7 = pDst2[2] + (y / 2) * dst2Step[2]; general_RGBToAVC444YUV_ANY_DOUBLE_ROW(srcEven, srcOdd, srcFormat, b1Even, b1Odd, b2, b3, b4, b5, b6, b7, roi->width); } return PRIMITIVES_SUCCESS; } static INLINE pstatus_t general_RGBToAVC444YUV(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcFormat, UINT32 srcStep, BYTE* WINPR_RESTRICT pDst1[3], const UINT32 dst1Step[3], BYTE* WINPR_RESTRICT pDst2[3], const UINT32 dst2Step[3], const prim_size_t* WINPR_RESTRICT roi) { if (!pSrc || !pDst1 || !dst1Step || !pDst2 || !dst2Step) return -1; if (!pDst1[0] || !pDst1[1] || !pDst1[2]) return -1; if (!dst1Step[0] || !dst1Step[1] || !dst1Step[2]) return -1; if (!pDst2[0] || !pDst2[1] || !pDst2[2]) return -1; if (!dst2Step[0] || !dst2Step[1] || !dst2Step[2]) return -1; switch (srcFormat) { case PIXEL_FORMAT_BGRA32: case PIXEL_FORMAT_BGRX32: return general_RGBToAVC444YUV_BGRX(pSrc, srcStep, pDst1, dst1Step, pDst2, dst2Step, roi); case PIXEL_FORMAT_RGBA32: case PIXEL_FORMAT_RGBX32: return general_RGBToAVC444YUV_RGBX(pSrc, srcStep, pDst1, dst1Step, pDst2, dst2Step, roi); default: return general_RGBToAVC444YUV_ANY(pSrc, srcFormat, srcStep, pDst1, dst1Step, pDst2, dst2Step, roi); } return !PRIMITIVES_SUCCESS; } static INLINE void general_RGBToAVC444YUVv2_ANY_DOUBLE_ROW( const BYTE* WINPR_RESTRICT srcEven, const BYTE* WINPR_RESTRICT srcOdd, UINT32 srcFormat, BYTE* WINPR_RESTRICT yLumaDstEven, BYTE* WINPR_RESTRICT yLumaDstOdd, BYTE* WINPR_RESTRICT uLumaDst, BYTE* WINPR_RESTRICT vLumaDst, BYTE* WINPR_RESTRICT yEvenChromaDst1, BYTE* WINPR_RESTRICT yEvenChromaDst2, BYTE* WINPR_RESTRICT yOddChromaDst1, BYTE* WINPR_RESTRICT yOddChromaDst2, BYTE* WINPR_RESTRICT uChromaDst1, BYTE* WINPR_RESTRICT uChromaDst2, BYTE* WINPR_RESTRICT vChromaDst1, BYTE* WINPR_RESTRICT vChromaDst2, UINT32 width) { const UINT32 bpp = FreeRDPGetBytesPerPixel(srcFormat); for (UINT32 x = 0; x < width; x += 2) { BYTE Ya = 0; BYTE Ua = 0; BYTE Va = 0; BYTE Yb = 0; BYTE Ub = 0; BYTE Vb = 0; BYTE Yc = 0; BYTE Uc = 0; BYTE Vc = 0; BYTE Yd = 0; BYTE Ud = 0; BYTE Vd = 0; { BYTE b = 0; BYTE g = 0; BYTE r = 0; const UINT32 color = FreeRDPReadColor(srcEven, srcFormat); srcEven += bpp; FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL); Ya = RGB2Y(r, g, b); Ua = RGB2U(r, g, b); Va = RGB2V(r, g, b); } if (x < width - 1) { BYTE b = 0; BYTE g = 0; BYTE r = 0; const UINT32 color = FreeRDPReadColor(srcEven, srcFormat); srcEven += bpp; FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL); Yb = RGB2Y(r, g, b); Ub = RGB2U(r, g, b); Vb = RGB2V(r, g, b); } else { Yb = Ya; Ub = Ua; Vb = Va; } if (srcOdd) { BYTE b = 0; BYTE g = 0; BYTE r = 0; const UINT32 color = FreeRDPReadColor(srcOdd, srcFormat); srcOdd += bpp; FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL); Yc = RGB2Y(r, g, b); Uc = RGB2U(r, g, b); Vc = RGB2V(r, g, b); } else { Yc = Ya; Uc = Ua; Vc = Va; } if (srcOdd && (x < width - 1)) { BYTE b = 0; BYTE g = 0; BYTE r = 0; const UINT32 color = FreeRDPReadColor(srcOdd, srcFormat); srcOdd += bpp; FreeRDPSplitColor(color, srcFormat, &r, &g, &b, NULL, NULL); Yd = RGB2Y(r, g, b); Ud = RGB2U(r, g, b); Vd = RGB2V(r, g, b); } else { Yd = Ya; Ud = Ua; Vd = Va; } /* Y [b1] */ *yLumaDstEven++ = Ya; if (x < width - 1) *yLumaDstEven++ = Yb; if (srcOdd) *yLumaDstOdd++ = Yc; if (srcOdd && (x < width - 1)) *yLumaDstOdd++ = Yd; /* 2x 2y [b2,b3] */ *uLumaDst++ = (Ua + Ub + Uc + Ud) / 4; *vLumaDst++ = (Va + Vb + Vc + Vd) / 4; /* 2x+1, y [b4,b5] even */ if (x < width - 1) { *yEvenChromaDst1++ = Ub; *yEvenChromaDst2++ = Vb; } if (srcOdd) { /* 2x+1, y [b4,b5] odd */ if (x < width - 1) { *yOddChromaDst1++ = Ud; *yOddChromaDst2++ = Vd; } /* 4x 2y+1 [b6, b7] */ if (x % 4 == 0) { *uChromaDst1++ = Uc; *uChromaDst2++ = Vc; } /* 4x+2 2y+1 [b8, b9] */ else { *vChromaDst1++ = Uc; *vChromaDst2++ = Vc; } } } } static INLINE pstatus_t general_RGBToAVC444YUVv2_ANY( const BYTE* WINPR_RESTRICT pSrc, UINT32 srcFormat, UINT32 srcStep, BYTE* WINPR_RESTRICT pDst1[3], const UINT32 dst1Step[3], BYTE* WINPR_RESTRICT pDst2[3], const UINT32 dst2Step[3], const prim_size_t* WINPR_RESTRICT roi) { /** * Note: According to [MS-RDPEGFX 2.2.4.4 RFX_AVC420_BITMAP_STREAM] the * width and height of the MPEG-4 AVC/H.264 codec bitstream MUST be aligned * to a multiple of 16. * Hence the passed destination YUV420/CHROMA420 buffers must have been * allocated accordingly !! */ /** * [MS-RDPEGFX 3.3.8.3.3 YUV420p Stream Combination for YUV444v2 mode] defines the following "Bx * areas": * * YUV420 frame (main view): * B1: From Y444 all pixels * B2: From U444 all pixels in even rows with even rows and columns * B3: From V444 all pixels in even rows with even rows and columns * * Chroma420 frame (auxillary view): * B45: From U444 and V444 all pixels from all odd columns * B67: From U444 and V444 every 4th pixel in odd rows * B89: From U444 and V444 every 4th pixel (initial offset of 2) in odd rows * * Chroma Bxy areas correspond to the left and right half of the YUV420 plane. * for (y = 0; y < fullHeight; y++) * { * for (x = 0; x < fullWidth; x++) * { * B1[x,y] = Y444[x,y]; * } * * for (x = 0; x < halfWidth; x++) * { * B4[x,y] = U444[2 * x, 2 * y]; * B5[x,y] = V444[2 * x, 2 * y]; * } * } * * for (y = 0; y < halfHeight; y++) * { * for (x = 0; x < halfWidth; x++) * { * B2[x,y] = U444[2 * x, 2 * y]; * B3[x,y] = V444[2 * x, 2 * y]; * B6[x,y] = U444[4 * x, 2 * y + 1]; * B7[x,y] = V444[4 * x, 2 * y + 1]; * B8[x,y] = V444[4 * x + 2, 2 * y + 1]; * B9[x,y] = V444[4 * x + 2, 2 * y] + 1; * } * } * */ if (roi->height < 1 || roi->width < 1) return !PRIMITIVES_SUCCESS; for (size_t y = 0; y < roi->height; y += 2) { const BYTE* srcEven = (pSrc + y * srcStep); const BYTE* srcOdd = (y < roi->height - 1) ? (srcEven + srcStep) : NULL; BYTE* dstLumaYEven = (pDst1[0] + y * dst1Step[0]); BYTE* dstLumaYOdd = (dstLumaYEven + dst1Step[0]); BYTE* dstLumaU = (pDst1[1] + (y / 2) * dst1Step[1]); BYTE* dstLumaV = (pDst1[2] + (y / 2) * dst1Step[2]); BYTE* dstEvenChromaY1 = (pDst2[0] + y * dst2Step[0]); BYTE* dstEvenChromaY2 = dstEvenChromaY1 + roi->width / 2; BYTE* dstOddChromaY1 = dstEvenChromaY1 + dst2Step[0]; BYTE* dstOddChromaY2 = dstEvenChromaY2 + dst2Step[0]; BYTE* dstChromaU1 = (pDst2[1] + (y / 2) * dst2Step[1]); BYTE* dstChromaV1 = (pDst2[2] + (y / 2) * dst2Step[2]); BYTE* dstChromaU2 = dstChromaU1 + roi->width / 4; BYTE* dstChromaV2 = dstChromaV1 + roi->width / 4; general_RGBToAVC444YUVv2_ANY_DOUBLE_ROW( srcEven, srcOdd, srcFormat, dstLumaYEven, dstLumaYOdd, dstLumaU, dstLumaV, dstEvenChromaY1, dstEvenChromaY2, dstOddChromaY1, dstOddChromaY2, dstChromaU1, dstChromaU2, dstChromaV1, dstChromaV2, roi->width); } return PRIMITIVES_SUCCESS; } static INLINE void general_RGBToAVC444YUVv2_BGRX_DOUBLE_ROW( const BYTE* WINPR_RESTRICT srcEven, const BYTE* WINPR_RESTRICT srcOdd, BYTE* WINPR_RESTRICT yLumaDstEven, BYTE* WINPR_RESTRICT yLumaDstOdd, BYTE* WINPR_RESTRICT uLumaDst, BYTE* WINPR_RESTRICT vLumaDst, BYTE* WINPR_RESTRICT yEvenChromaDst1, BYTE* WINPR_RESTRICT yEvenChromaDst2, BYTE* WINPR_RESTRICT yOddChromaDst1, BYTE* WINPR_RESTRICT yOddChromaDst2, BYTE* WINPR_RESTRICT uChromaDst1, BYTE* WINPR_RESTRICT uChromaDst2, BYTE* WINPR_RESTRICT vChromaDst1, BYTE* WINPR_RESTRICT vChromaDst2, UINT32 width) { for (UINT32 x = 0; x < width; x += 2) { BYTE Ya = 0; BYTE Ua = 0; BYTE Va = 0; BYTE Yb = 0; BYTE Ub = 0; BYTE Vb = 0; BYTE Yc = 0; BYTE Uc = 0; BYTE Vc = 0; BYTE Yd = 0; BYTE Ud = 0; BYTE Vd = 0; { const BYTE b = *srcEven++; const BYTE g = *srcEven++; const BYTE r = *srcEven++; srcEven++; Ya = RGB2Y(r, g, b); Ua = RGB2U(r, g, b); Va = RGB2V(r, g, b); } if (x < width - 1) { const BYTE b = *srcEven++; const BYTE g = *srcEven++; const BYTE r = *srcEven++; srcEven++; Yb = RGB2Y(r, g, b); Ub = RGB2U(r, g, b); Vb = RGB2V(r, g, b); } else { Yb = Ya; Ub = Ua; Vb = Va; } if (srcOdd) { const BYTE b = *srcOdd++; const BYTE g = *srcOdd++; const BYTE r = *srcOdd++; srcOdd++; Yc = RGB2Y(r, g, b); Uc = RGB2U(r, g, b); Vc = RGB2V(r, g, b); } else { Yc = Ya; Uc = Ua; Vc = Va; } if (srcOdd && (x < width - 1)) { const BYTE b = *srcOdd++; const BYTE g = *srcOdd++; const BYTE r = *srcOdd++; srcOdd++; Yd = RGB2Y(r, g, b); Ud = RGB2U(r, g, b); Vd = RGB2V(r, g, b); } else { Yd = Ya; Ud = Ua; Vd = Va; } /* Y [b1] */ *yLumaDstEven++ = Ya; if (x < width - 1) *yLumaDstEven++ = Yb; if (srcOdd) *yLumaDstOdd++ = Yc; if (srcOdd && (x < width - 1)) *yLumaDstOdd++ = Yd; /* 2x 2y [b2,b3] */ *uLumaDst++ = (Ua + Ub + Uc + Ud) / 4; *vLumaDst++ = (Va + Vb + Vc + Vd) / 4; /* 2x+1, y [b4,b5] even */ if (x < width - 1) { *yEvenChromaDst1++ = Ub; *yEvenChromaDst2++ = Vb; } if (srcOdd) { /* 2x+1, y [b4,b5] odd */ if (x < width - 1) { *yOddChromaDst1++ = Ud; *yOddChromaDst2++ = Vd; } /* 4x 2y+1 [b6, b7] */ if (x % 4 == 0) { *uChromaDst1++ = Uc; *uChromaDst2++ = Vc; } /* 4x+2 2y+1 [b8, b9] */ else { *vChromaDst1++ = Uc; *vChromaDst2++ = Vc; } } } } static INLINE pstatus_t general_RGBToAVC444YUVv2_BGRX(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcStep, BYTE* WINPR_RESTRICT pDst1[3], const UINT32 dst1Step[3], BYTE* WINPR_RESTRICT pDst2[3], const UINT32 dst2Step[3], const prim_size_t* WINPR_RESTRICT roi) { if (roi->height < 1 || roi->width < 1) return !PRIMITIVES_SUCCESS; for (size_t y = 0; y < roi->height; y += 2) { const BYTE* srcEven = (pSrc + y * srcStep); const BYTE* srcOdd = (y < roi->height - 1) ? (srcEven + srcStep) : NULL; BYTE* dstLumaYEven = (pDst1[0] + y * dst1Step[0]); BYTE* dstLumaYOdd = (dstLumaYEven + dst1Step[0]); BYTE* dstLumaU = (pDst1[1] + (y / 2) * dst1Step[1]); BYTE* dstLumaV = (pDst1[2] + (y / 2) * dst1Step[2]); BYTE* dstEvenChromaY1 = (pDst2[0] + y * dst2Step[0]); BYTE* dstEvenChromaY2 = dstEvenChromaY1 + roi->width / 2; BYTE* dstOddChromaY1 = dstEvenChromaY1 + dst2Step[0]; BYTE* dstOddChromaY2 = dstEvenChromaY2 + dst2Step[0]; BYTE* dstChromaU1 = (pDst2[1] + (y / 2) * dst2Step[1]); BYTE* dstChromaV1 = (pDst2[2] + (y / 2) * dst2Step[2]); BYTE* dstChromaU2 = dstChromaU1 + roi->width / 4; BYTE* dstChromaV2 = dstChromaV1 + roi->width / 4; general_RGBToAVC444YUVv2_BGRX_DOUBLE_ROW( srcEven, srcOdd, dstLumaYEven, dstLumaYOdd, dstLumaU, dstLumaV, dstEvenChromaY1, dstEvenChromaY2, dstOddChromaY1, dstOddChromaY2, dstChromaU1, dstChromaU2, dstChromaV1, dstChromaV2, roi->width); } return PRIMITIVES_SUCCESS; } static INLINE pstatus_t general_RGBToAVC444YUVv2(const BYTE* WINPR_RESTRICT pSrc, UINT32 srcFormat, UINT32 srcStep, BYTE* WINPR_RESTRICT pDst1[3], const UINT32 dst1Step[3], BYTE* WINPR_RESTRICT pDst2[3], const UINT32 dst2Step[3], const prim_size_t* WINPR_RESTRICT roi) { switch (srcFormat) { case PIXEL_FORMAT_BGRA32: case PIXEL_FORMAT_BGRX32: return general_RGBToAVC444YUVv2_BGRX(pSrc, srcStep, pDst1, dst1Step, pDst2, dst2Step, roi); default: return general_RGBToAVC444YUVv2_ANY(pSrc, srcFormat, srcStep, pDst1, dst1Step, pDst2, dst2Step, roi); } return !PRIMITIVES_SUCCESS; } void primitives_init_YUV(primitives_t* WINPR_RESTRICT prims) { prims->YUV420ToRGB_8u_P3AC4R = general_YUV420ToRGB_8u_P3AC4R; prims->YUV444ToRGB_8u_P3AC4R = general_YUV444ToRGB_8u_P3AC4R; prims->RGBToYUV420_8u_P3AC4R = general_RGBToYUV420_8u_P3AC4R; prims->RGBToYUV444_8u_P3AC4R = general_RGBToYUV444_8u_P3AC4R; prims->YUV420CombineToYUV444 = general_YUV420CombineToYUV444; prims->YUV444SplitToYUV420 = general_YUV444SplitToYUV420; prims->RGBToAVC444YUV = general_RGBToAVC444YUV; prims->RGBToAVC444YUVv2 = general_RGBToAVC444YUVv2; } void primitives_init_YUV_opt(primitives_t* WINPR_RESTRICT prims) { primitives_init_YUV_ssse3(prims); primitives_init_YUV_neon(prims); }