Added surface width and heigth for YUV combine.

This commit is contained in:
Armin Novak 2017-04-18 14:12:40 +02:00
parent fcd2d820af
commit a2167d5417
5 changed files with 22 additions and 26 deletions

View File

@ -190,6 +190,7 @@ typedef pstatus_t (*__RGBToYUV444_8u_P3AC4R_t)(
typedef pstatus_t (*__YUV420CombineToYUV444_t)(
avc444_frame_type type,
const BYTE* pSrc[3], const UINT32 srcStep[3],
UINT32 nWidth, UINT32 nHeight,
BYTE* pDst[3], const UINT32 dstStep[3],
const RECTANGLE_16* roi);
typedef pstatus_t (*__YUV444SplitToYUV420_t)(

View File

@ -1725,6 +1725,7 @@ static BOOL avc444_process_rects(H264_CONTEXT* h264, const BYTE* pSrcData,
continue;
if (prims->YUV420CombineToYUV444(type, ppYUVData, piStride,
h264->width, h264->height,
ppYUVDstData, piDstStride,
rect) != PRIMITIVES_SUCCESS)
return FALSE;

View File

@ -224,6 +224,7 @@ static pstatus_t general_ChromaV1ToYUV444(const BYTE* pSrcRaw[3], const UINT32 s
}
static pstatus_t general_ChromaV2ToYUV444(const BYTE* pSrc[3], const UINT32 srcStep[3],
UINT32 nTotalWidth, UINT32 nTotalHeight,
BYTE* pDst[3], const UINT32 dstStep[3],
const RECTANGLE_16* roi)
{
@ -233,14 +234,13 @@ static pstatus_t general_ChromaV2ToYUV444(const BYTE* pSrc[3], const UINT32 srcS
const UINT32 halfWidth = (nWidth + 1) / 2;
const UINT32 halfHeight = (nHeight + 1) / 2;
const UINT32 quaterWidth = (nWidth + 3) / 4;
const UINT32 quaterHeight = (nHeight + 3) / 4;
/* B4 and B5: odd UV values for width/2, height */
for (y = 0; y < nHeight; y++)
{
const UINT32 yTop = y + roi->top;
const BYTE* pYaU = pSrc[0] + srcStep[0] * yTop + roi->left / 2;
const BYTE* pYaV = pYaU + srcStep[0] / 2;
const BYTE* pYaV = pYaU + nTotalWidth / 2;
BYTE* pU = pDst[1] + dstStep[1] * yTop + roi->left;
BYTE* pV = pDst[2] + dstStep[2] * yTop + roi->left;
@ -256,9 +256,9 @@ static pstatus_t general_ChromaV2ToYUV444(const BYTE* pSrc[3], const UINT32 srcS
for (y = 0; y < halfHeight; y++)
{
const BYTE* pUaU = pSrc[1] + srcStep[1] * (y + roi->top / 2) + roi->left / 4;
const BYTE* pUaV = pUaU + srcStep[1] / 2;
const BYTE* pUaV = pUaU + nTotalWidth / 4;
const BYTE* pVaU = pSrc[2] + srcStep[2] * (y + roi->top / 2) + roi->left / 4;
const BYTE* pVaV = pVaU + srcStep[2] / 2;
const BYTE* pVaV = pVaU + nTotalWidth / 4;
BYTE* pU = pDst[1] + dstStep[1] * (2 * y + 1 + roi->top) + roi->left;
BYTE* pV = pDst[2] + dstStep[2] * (2 * y + 1 + roi->top) + roi->left;
@ -274,22 +274,10 @@ static pstatus_t general_ChromaV2ToYUV444(const BYTE* pSrc[3], const UINT32 srcS
return general_ChromaFilter(pDst, dstStep, roi);
}
/**
* @brief general_YUV420CombineToYUV444
*
* @param pMainSrc Pointer to luma YUV420 data
* @param srcMainStep Step width in luma YUV420 data
* @param pAuxSrc Pointer to chroma YUV420 data
* @param srcAuxStep Step width in chroma YUV420 data
* @param pDst Pointer to YUV444 data
* @param dstStep Step width in YUV444 data
* @param roi Region of source to combine in destination.
*
* @return PRIMITIVES_SUCCESS on success, an error code otherwise.
*/
static pstatus_t general_YUV420CombineToYUV444(
avc444_frame_type type,
const BYTE* pSrc[3], const UINT32 srcStep[3],
UINT32 nWidth, UINT32 nHeight,
BYTE* pDst[3], const UINT32 dstStep[3],
const RECTANGLE_16* roi)
{
@ -311,7 +299,7 @@ static pstatus_t general_YUV420CombineToYUV444(
return general_ChromaV1ToYUV444(pSrc, srcStep, pDst, dstStep, roi);
case AVC444_CHROMAv2:
return general_ChromaV2ToYUV444(pSrc, srcStep, pDst, dstStep, roi);
return general_ChromaV2ToYUV444(pSrc, srcStep, nWidth, nHeight, pDst, dstStep, roi);
default:
return -1;

View File

@ -932,6 +932,7 @@ static pstatus_t ssse3_ChromaV1ToYUV444(const BYTE* pSrcRaw[3], const UINT32 src
}
static pstatus_t ssse3_ChromaV2ToYUV444(const BYTE* pSrc[3], const UINT32 srcStep[3],
UINT32 nTotalWidth, UINT32 nTotalHeight,
BYTE* pDst[3], const UINT32 dstStep[3],
const RECTANGLE_16* roi)
{
@ -956,7 +957,7 @@ static pstatus_t ssse3_ChromaV2ToYUV444(const BYTE* pSrc[3], const UINT32 srcSte
{
const UINT32 yTop = y + roi->top;
const BYTE* pYaU = pSrc[0] + srcStep[0] * yTop + roi->left / 2;
const BYTE* pYaV = pYaU + srcStep[0] / 2;
const BYTE* pYaV = pYaU + nTotalWidth / 2;
BYTE* pU = pDst[1] + dstStep[1] * yTop + roi->left;
BYTE* pV = pDst[2] + dstStep[2] * yTop + roi->left;
@ -990,9 +991,9 @@ static pstatus_t ssse3_ChromaV2ToYUV444(const BYTE* pSrc[3], const UINT32 srcSte
for (y = 0; y < halfHeight; y++)
{
const BYTE* pUaU = pSrc[1] + srcStep[1] * (y + roi->top / 2) + roi->left / 4;
const BYTE* pUaV = pUaU + srcStep[1] / 2;
const BYTE* pUaV = pUaU + nTotalWidth / 4;
const BYTE* pVaU = pSrc[2] + srcStep[2] * (y + roi->top / 2) + roi->left / 4;
const BYTE* pVaV = pVaU + srcStep[2] / 2;
const BYTE* pVaV = pVaU + nTotalWidth / 4;
BYTE* pU = pDst[1] + dstStep[1] * (2 * y + 1 + roi->top) + roi->left;
BYTE* pV = pDst[2] + dstStep[2] * (2 * y + 1 + roi->top) + roi->left;
@ -1043,6 +1044,7 @@ static pstatus_t ssse3_ChromaV2ToYUV444(const BYTE* pSrc[3], const UINT32 srcSte
static pstatus_t ssse3_YUV420CombineToYUV444(
avc444_frame_type type,
const BYTE* pSrc[3], const UINT32 srcStep[3],
UINT32 nWidth, UINT32 nHeight,
BYTE* pDst[3], const UINT32 dstStep[3],
const RECTANGLE_16* roi)
{
@ -1064,7 +1066,7 @@ static pstatus_t ssse3_YUV420CombineToYUV444(
return ssse3_ChromaV1ToYUV444(pSrc, srcStep, pDst, dstStep, roi);
case AVC444_CHROMAv2:
return ssse3_ChromaV2ToYUV444(pSrc, srcStep, pDst, dstStep, roi);
return ssse3_ChromaV2ToYUV444(pSrc, srcStep, nWidth, nHeight, pDst, dstStep, roi);
default:
return -1;
@ -1677,6 +1679,7 @@ static pstatus_t neon_ChromaV1ToYUV444(const BYTE* pSrcRaw[3], const UINT32 srcS
}
static pstatus_t neon_ChromaV2ToYUV444(const BYTE* pSrc[3], const UINT32 srcStep[3],
UINT32 nTotalWidth, UINT32 nTotalHeight,
BYTE* pDst[3], const UINT32 dstStep[3],
const RECTANGLE_16* roi)
{
@ -1694,7 +1697,7 @@ static pstatus_t neon_ChromaV2ToYUV444(const BYTE* pSrc[3], const UINT32 srcStep
{
const UINT32 yTop = y + roi->top;
const BYTE* pYaU = pSrc[0] + srcStep[0] * yTop + roi->left / 2;
const BYTE* pYaV = pYaU + srcStep[0] / 2;
const BYTE* pYaV = pYaU + nTotalWidth / 2;
BYTE* pU = pDst[1] + dstStep[1] * yTop + roi->left;
BYTE* pV = pDst[2] + dstStep[2] * yTop + roi->left;
@ -1724,9 +1727,9 @@ static pstatus_t neon_ChromaV2ToYUV444(const BYTE* pSrc[3], const UINT32 srcStep
for (y = 0; y < halfHeight; y++)
{
const BYTE* pUaU = pSrc[1] + srcStep[1] * (y + roi->top / 2) + roi->left / 4;
const BYTE* pUaV = pUaU + srcStep[1] / 2;
const BYTE* pUaV = pUaU + nTotalWidth / 4;
const BYTE* pVaU = pSrc[2] + srcStep[2] * (y + roi->top / 2) + roi->left / 4;
const BYTE* pVaV = pVaU + srcStep[2] / 2;
const BYTE* pVaV = pVaU + nTotalWidth / 4;
BYTE* pU = pDst[1] + dstStep[1] * (2 * y + 1 + roi->top) + roi->left;
BYTE* pV = pDst[2] + dstStep[2] * (2 * y + 1 + roi->top) + roi->left;
@ -1761,6 +1764,7 @@ static pstatus_t neon_ChromaV2ToYUV444(const BYTE* pSrc[3], const UINT32 srcStep
static pstatus_t neon_YUV420CombineToYUV444(
avc444_frame_type type,
const BYTE* pSrc[3], const UINT32 srcStep[3],
UINT32 nWidth, UINT32 nHeight,
BYTE* pDst[3], const UINT32 dstStep[3],
const RECTANGLE_16* roi)
{
@ -1782,7 +1786,7 @@ static pstatus_t neon_YUV420CombineToYUV444(
return neon_ChromaV1ToYUV444(pSrc, srcStep, pDst, dstStep, roi);
case AVC444_CHROMAv2:
return neon_ChromaV2ToYUV444(pSrc, srcStep, pDst, dstStep, roi);
return neon_ChromaV2ToYUV444(pSrc, srcStep, nWidth, nHeight, pDst, dstStep, roi);
default:
return -1;

View File

@ -241,6 +241,7 @@ static BOOL TestPrimitiveYUVCombine(primitives_t* prims, prim_size_t roi)
if (prims->YUV420CombineToYUV444(AVC444_LUMA,
(const BYTE**)luma, lumaStride,
roi.width, roi.height,
yuv, yuvStride, &rect) != PRIMITIVES_SUCCESS)
{
PROFILER_EXIT(yuvCombine);
@ -249,6 +250,7 @@ static BOOL TestPrimitiveYUVCombine(primitives_t* prims, prim_size_t roi)
if (prims->YUV420CombineToYUV444(AVC444_CHROMAv1,
(const BYTE**)chroma, chromaStride,
roi.width, roi.height,
yuv, yuvStride, &rect) != PRIMITIVES_SUCCESS)
{
PROFILER_EXIT(yuvCombine);