Added YUV444 kernel

This commit is contained in:
Armin Novak 2019-11-13 16:37:28 +01:00 committed by akallabeth
parent b3a3a6b9c2
commit e347c7b3de
2 changed files with 101 additions and 18 deletions

View File

@ -350,6 +350,34 @@ static pstatus_t opencl_YUV420ToRGB_8u_P3AC4R(const BYTE* pSrc[3], const UINT32
return opencl_YUVToRGB(kernel_name, pSrc, srcStep, pDst, dstStep, roi);
}
static pstatus_t opencl_YUV444ToRGB_8u_P3AC4R(const BYTE* pSrc[3], const UINT32 srcStep[3],
BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
const prim_size_t* roi)
{
const char* kernel_name;
switch (DstFormat)
{
case PIXEL_FORMAT_BGRA32:
case PIXEL_FORMAT_BGRX32:
kernel_name = "yuv444_to_bgra_1b";
break;
case PIXEL_FORMAT_XRGB32:
case PIXEL_FORMAT_ARGB32:
kernel_name = "yuv444_to_argb_1b";
break;
default:
{
primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU);
if (!p)
return -1;
return p->YUV444ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
}
}
return opencl_YUVToRGB(kernel_name, pSrc, srcStep, pDst, dstStep, roi);
}
BOOL primitives_init_opencl(primitives_t* prims)
{
primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU);
@ -361,6 +389,7 @@ BOOL primitives_init_opencl(primitives_t* prims)
return FALSE;
prims->YUV420ToRGB_8u_P3AC4R = opencl_YUV420ToRGB_8u_P3AC4R;
prims->YUV444ToRGB_8u_P3AC4R = opencl_YUV444ToRGB_8u_P3AC4R;
prims->flags |= PRIM_FLAGS_HAVE_EXTGPU;
prims->uninit = primitives_uninit_opencl;
return TRUE;

View File

@ -19,29 +19,29 @@
#define STRINGIFY(x) #x
STRINGIFY(
unsigned char clamp_uc(int v, int l, int h)
uchar clamp_uc(int v, short l, short h)
{
if (v > h)
v = h;
if (v < l)
v = l;
return (unsigned char)v;
return (uchar)v;
}
__kernel void yuv420_to_argb_1b(
__global unsigned char *bufY, int strideY,
__global unsigned char *bufU, int strideU,
__global unsigned char *bufV, int strideV,
__global unsigned char *dest, int strideDest)
__global const uchar *bufY, int strideY,
__global const uchar *bufU, int strideU,
__global const uchar *bufV, int strideV,
__global uchar *dest, int strideDest)
{
unsigned int x = get_global_id(0);
unsigned int y = get_global_id(1);
int Y = bufY[y * strideY + x];
int Udim = bufU[(y / 2) * strideU + (x / 2)] - 128;
int Vdim = bufV[(y / 2) * strideV + (x / 2)] - 128;
short Y = bufY[y * strideY + x];
short Udim = bufU[(y / 2) * strideU + (x / 2)] - 128;
short Vdim = bufV[(y / 2) * strideV + (x / 2)] - 128;
__global unsigned char *destPtr = dest + (strideDest * y) + (x * 4);
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
/**
* | R | ( | 256 0 403 | | Y | )
@ -56,19 +56,19 @@ __kernel void yuv420_to_argb_1b(
}
__kernel void yuv420_to_bgra_1b(
__global unsigned char *bufY, int strideY,
__global unsigned char *bufU, int strideU,
__global unsigned char *bufV, int strideV,
__global unsigned char *dest, int strideDest)
__global const uchar *bufY, int strideY,
__global const uchar *bufU, int strideU,
__global const uchar *bufV, int strideV,
__global uchar *dest, int strideDest)
{
unsigned int x = get_global_id(0);
unsigned int y = get_global_id(1);
int Y = bufY[y * strideY + x];
int U = bufU[(y / 2) * strideU + (x / 2)] - 128;
int V = bufV[(y / 2) * strideV + (x / 2)] - 128;
short Y = bufY[y * strideY + x];
short U = bufU[(y / 2) * strideU + (x / 2)] - 128;
short V = bufV[(y / 2) * strideV + (x / 2)] - 128;
__global unsigned char *destPtr = dest + (strideDest * y) + (x * 4);
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
/**
* | R | ( | 256 0 403 | | Y | )
@ -81,4 +81,58 @@ __kernel void yuv420_to_bgra_1b(
destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
destPtr[3] = 0xff; /* A */
}
__kernel void yuv444_to_bgra_1b(
__global const uchar *bufY, int strideY,
__global const uchar *bufU, int strideU,
__global const uchar *bufV, int strideV,
__global uchar *dest, int strideDest)
{
unsigned int x = get_global_id(0);
unsigned int y = get_global_id(1);
short Y = bufY[y * strideY + x];
short U = bufU[y * strideU + x] - 128;
short V = bufV[y * strideV + x] - 128;
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
/**
* | R | ( | 256 0 403 | | Y | )
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
* | B | ( | 256 475 0 | | V - 128 | )
*/
int y256 = 256 * Y;
destPtr[0] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
destPtr[1] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
destPtr[2] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
destPtr[3] = 0xff; /* A */
}
__kernel void yuv444_to_argb_1b(
__global const uchar *bufY, int strideY,
__global const uchar *bufU, int strideU,
__global const uchar *bufV, int strideV,
__global uchar *dest, int strideDest)
{
unsigned int x = get_global_id(0);
unsigned int y = get_global_id(1);
short Y = bufY[y * strideY + x];
short U = bufU[y * strideU + x] - 128;
short V = bufV[y * strideV + x] - 128;
__global uchar *destPtr = dest + (strideDest * y) + (x * 4);
/**
* | R | ( | 256 0 403 | | Y | )
* | G | = ( | 256 -48 -120 | | U - 128 | ) >> 8
* | B | ( | 256 475 0 | | V - 128 | )
*/
int y256 = 256 * Y;
destPtr[3] = clamp_uc((y256 + (475 * U)) >> 8, 0, 255); /* B */
destPtr[2] = clamp_uc((y256 - ( 48 * U) - (120 * V)) >> 8 , 0, 255); /* G */
destPtr[1] = clamp_uc((y256 + (403 * V)) >> 8, 0, 255); /* R */
destPtr[0] = 0xff; /* A */
}
)