diff --git a/libfreerdp/codec/progressive.c b/libfreerdp/codec/progressive.c
index 09078d292..8b926f03d 100644
--- a/libfreerdp/codec/progressive.c
+++ b/libfreerdp/codec/progressive.c
@@ -846,8 +846,8 @@ static INLINE int progressive_decompress_tile_first(PROGRESSIVE_CONTEXT* progres
 	                                 tile->crLen,
 	                                 pSrcDst[2], pCurrent[2], pSign[2], diff); /* Cr */
 	prims->yCbCrToRGB_16s8u_P3AC4R((const INT16**) pSrcDst, 64 * 2,
-	                               tile->data, tile->format,
-	                               tile->stride, &roi_64x64);
+	                               tile->data, tile->stride, tile->format,
+	                               &roi_64x64);
 	BufferPool_Return(progressive->bufferPool, pBuffer);
 	return 1;
 }
@@ -1261,8 +1261,8 @@ static INLINE int progressive_decompress_tile_upgrade(PROGRESSIVE_CONTEXT* progr
 		return -1;
 
 	prims->yCbCrToRGB_16s8u_P3AC4R((const INT16**) pSrcDst, 64 * 2,
-	                               tile->data, tile->format,
-	                               tile->stride, &roi_64x64);
+	                               tile->data, tile->stride, tile->format,
+	                               &roi_64x64);
 	BufferPool_Return(progressive->bufferPool, pBuffer);
 	return 1;
 }
diff --git a/libfreerdp/codec/rfx.c b/libfreerdp/codec/rfx.c
index 56834570d..ea044232e 100644
--- a/libfreerdp/codec/rfx.c
+++ b/libfreerdp/codec/rfx.c
@@ -91,8 +91,6 @@ static void rfx_profiler_create(RFX_CONTEXT* context)
 	                "rfx_quantization_decode");
 	PROFILER_CREATE(context->priv->prof_rfx_dwt_2d_decode, "rfx_dwt_2d_decode");
 	PROFILER_CREATE(context->priv->prof_rfx_ycbcr_to_rgb, "prims->yCbCrToRGB");
-	PROFILER_CREATE(context->priv->prof_rfx_decode_format_rgb,
-	                "rfx_decode_format_rgb");
 	PROFILER_CREATE(context->priv->prof_rfx_encode_rgb, "rfx_encode_rgb");
 	PROFILER_CREATE(context->priv->prof_rfx_encode_component,
 	                "rfx_encode_component");
@@ -116,7 +114,6 @@ static void rfx_profiler_free(RFX_CONTEXT* context)
 	PROFILER_FREE(context->priv->prof_rfx_quantization_decode);
 	PROFILER_FREE(context->priv->prof_rfx_dwt_2d_decode);
 	PROFILER_FREE(context->priv->prof_rfx_ycbcr_to_rgb);
-	PROFILER_FREE(context->priv->prof_rfx_decode_format_rgb);
 	PROFILER_FREE(context->priv->prof_rfx_encode_rgb);
 	PROFILER_FREE(context->priv->prof_rfx_encode_component);
 	PROFILER_FREE(context->priv->prof_rfx_rlgr_encode);
@@ -137,7 +134,6 @@ static void rfx_profiler_print(RFX_CONTEXT* context)
 	PROFILER_PRINT(context->priv->prof_rfx_quantization_decode);
 	PROFILER_PRINT(context->priv->prof_rfx_dwt_2d_decode);
 	PROFILER_PRINT(context->priv->prof_rfx_ycbcr_to_rgb);
-	PROFILER_PRINT(context->priv->prof_rfx_decode_format_rgb);
 	PROFILER_PRINT(context->priv->prof_rfx_encode_rgb);
 	PROFILER_PRINT(context->priv->prof_rfx_encode_component);
 	PROFILER_PRINT(context->priv->prof_rfx_rlgr_encode);
@@ -708,7 +704,8 @@ static BOOL rfx_process_message_region(RFX_CONTEXT* context,
 		Stream_Read_UINT16(s, rect->y); /* y (2 bytes) */
 		Stream_Read_UINT16(s, rect->width); /* width (2 bytes) */
 		Stream_Read_UINT16(s, rect->height); /* height (2 bytes) */
-		WLog_Print(context->priv->log, WLOG_DEBUG, "rect %d (x,y=%"PRIu16",%"PRIu16" w,h=%"PRIu16" %"PRIu16").", i,
+		WLog_Print(context->priv->log, WLOG_DEBUG,
+		           "rect %d (x,y=%"PRIu16",%"PRIu16" w,h=%"PRIu16" %"PRIu16").", i,
 		           rect->x, rect->y,
 		           rect->width, rect->height);
 	}
diff --git a/libfreerdp/codec/rfx_decode.c b/libfreerdp/codec/rfx_decode.c
index 05a1852d1..11d762996 100644
--- a/libfreerdp/codec/rfx_decode.c
+++ b/libfreerdp/codec/rfx_decode.c
@@ -37,26 +37,6 @@
 
 #include "rfx_decode.h"
 
-/* stride is bytes between rows in the output buffer. */
-static void rfx_decode_format_rgb(const INT16* r_buf, const INT16* g_buf,
-                                  const INT16* b_buf, UINT32 pixel_format,
-                                  BYTE* dst_buf, UINT32 stride)
-{
-	primitives_t* prims = primitives_get();
-	const INT16* r = r_buf;
-	const INT16* g = g_buf;
-	const INT16* b = b_buf;
-	const INT16* pSrc[3];
-	static const prim_size_t roi_64x64 = { 64, 64 };
-	BYTE* dst = dst_buf;
-	pSrc[0] = r;
-	pSrc[1] = g;
-	pSrc[2] = b;
-	prims->RGBToRGB_16s8u_P3AC4R(
-	    (const INT16**) pSrc, 64 * sizeof(INT16),
-	    dst, stride, pixel_format, &roi_64x64);
-}
-
 static void rfx_decode_component(RFX_CONTEXT* context,
                                  const UINT32* quantization_values,
                                  const BYTE* data, int size, INT16* buffer)
@@ -86,6 +66,7 @@ static void rfx_decode_component(RFX_CONTEXT* context,
 BOOL rfx_decode_rgb(RFX_CONTEXT* context, RFX_TILE* tile, BYTE* rgb_buffer,
                     int stride)
 {
+	BOOL rc = TRUE;
 	BYTE* pBuffer;
 	INT16* pSrcDst[3];
 	UINT32* y_quants, *cb_quants, *cr_quants;
@@ -109,14 +90,13 @@ BOOL rfx_decode_rgb(RFX_CONTEXT* context, RFX_TILE* tile, BYTE* rgb_buffer,
 	rfx_decode_component(context, cr_quants, tile->CrData, tile->CrLen,
 	                     pSrcDst[2]); /* CrData */
 	PROFILER_ENTER(context->priv->prof_rfx_ycbcr_to_rgb);
-	prims->yCbCrToRGB_16s16s_P3P3((const INT16**) pSrcDst, 64 * sizeof(INT16),
-	                              pSrcDst, 64 * sizeof(INT16), &roi_64x64);
+
+	if (prims->yCbCrToRGB_16s8u_P3AC4R((const INT16**)pSrcDst, 64 * sizeof(INT16),
+	                                   rgb_buffer, stride, context->pixel_format, &roi_64x64) != PRIMITIVES_SUCCESS)
+		rc = FALSE;
+
 	PROFILER_EXIT(context->priv->prof_rfx_ycbcr_to_rgb);
-	PROFILER_ENTER(context->priv->prof_rfx_decode_format_rgb);
-	rfx_decode_format_rgb(pSrcDst[0], pSrcDst[1], pSrcDst[2],
-	                      context->pixel_format, rgb_buffer, stride);
-	PROFILER_EXIT(context->priv->prof_rfx_decode_format_rgb);
 	PROFILER_EXIT(context->priv->prof_rfx_decode_rgb);
 	BufferPool_Return(context->priv->BufferPool, pBuffer);
-	return TRUE;
+	return rc;
 }
diff --git a/libfreerdp/codec/rfx_types.h b/libfreerdp/codec/rfx_types.h
index d85265519..082e14187 100644
--- a/libfreerdp/codec/rfx_types.h
+++ b/libfreerdp/codec/rfx_types.h
@@ -17,8 +17,8 @@
  * limitations under the License.
  */
 
-#ifndef __RFX_TYPES_H
-#define __RFX_TYPES_H
+#ifndef CODEC_RFX_TYPES_H
+#define CODEC_RFX_TYPES_H
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
@@ -66,7 +66,6 @@ struct _RFX_CONTEXT_PRIV
 	PROFILER_DEFINE(prof_rfx_quantization_decode);
 	PROFILER_DEFINE(prof_rfx_dwt_2d_decode);
 	PROFILER_DEFINE(prof_rfx_ycbcr_to_rgb);
-	PROFILER_DEFINE(prof_rfx_decode_format_rgb);
 
 	PROFILER_DEFINE(prof_rfx_encode_rgb);
 	PROFILER_DEFINE(prof_rfx_encode_component);
diff --git a/libfreerdp/primitives/prim_YCoCg_opt.c b/libfreerdp/primitives/prim_YCoCg_opt.c
index b9fde7257..f8bee4de4 100644
--- a/libfreerdp/primitives/prim_YCoCg_opt.c
+++ b/libfreerdp/primitives/prim_YCoCg_opt.c
@@ -50,7 +50,7 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_invert(
 	BYTE* dptr = (BYTE*) pDst;
 	int sRowBump = srcStep - width * sizeof(UINT32);
 	int dRowBump = dstStep - width * sizeof(UINT32);
-    UINT32 h;
+	UINT32 h;
 	/* Shift left by "shift" and divide by two is the same as shift
 	 * left by "shift-1".
 	 */
@@ -70,28 +70,30 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_invert(
 	{
 		/* Too small, or we'll never hit a 16-byte boundary.  Punt. */
 		return generic->YCoCgToRGB_8u_AC4R(
-			   pSrc, srcStep, pDst, DstFormat, dstStep,
-			   width, height, shift, withAlpha);
+		           pSrc, srcStep, pDst, DstFormat, dstStep,
+		           width, height, shift, withAlpha);
 	}
 
 	for (h = 0; h < height; h++)
 	{
-        UINT32 w = width;
+		UINT32 w = width;
 		BOOL onStride;
 
 		/* Get to a 16-byte destination boundary. */
 		if ((ULONG_PTR) dptr & 0x0f)
 		{
 			pstatus_t status;
-            UINT32 startup = (16 - ((ULONG_PTR) dptr & 0x0f)) / 4;
+			UINT32 startup = (16 - ((ULONG_PTR) dptr & 0x0f)) / 4;
 
 			if (startup > width) startup = width;
 
 			status = generic->YCoCgToRGB_8u_AC4R(
-					 sptr, srcStep, dptr, DstFormat, dstStep,
-					 startup, 1, shift, withAlpha);
+			             sptr, srcStep, dptr, DstFormat, dstStep,
+			             startup, 1, shift, withAlpha);
+
 			if (status != PRIMITIVES_SUCCESS)
 				return status;
+
 			sptr += startup * sizeof(UINT32);
 			dptr += startup * sizeof(UINT32);
 			w -= startup;
@@ -201,8 +203,9 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_invert(
 		{
 			pstatus_t status;
 			status = generic->YCoCgToRGB_8u_AC4R(
-					 sptr, srcStep, dptr, DstFormat, dstStep,
-					 w, 1, shift, withAlpha);
+			             sptr, srcStep, dptr, DstFormat, dstStep,
+			             w, 1, shift, withAlpha);
+
 			if (status != PRIMITIVES_SUCCESS)
 				return status;
 
@@ -229,7 +232,7 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_no_invert(
 	BYTE* dptr = (BYTE*) pDst;
 	int sRowBump = srcStep - width * sizeof(UINT32);
 	int dRowBump = dstStep - width * sizeof(UINT32);
-    UINT32 h;
+	UINT32 h;
 	/* Shift left by "shift" and divide by two is the same as shift
 	 * left by "shift-1".
 	 */
@@ -249,8 +252,8 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_no_invert(
 	{
 		/* Too small, or we'll never hit a 16-byte boundary.  Punt. */
 		return generic->YCoCgToRGB_8u_AC4R(
-					pSrc, srcStep, pDst, DstFormat, dstStep,
-					width, height, shift, withAlpha);
+		           pSrc, srcStep, pDst, DstFormat, dstStep,
+		           width, height, shift, withAlpha);
 	}
 
 	for (h = 0; h < height; h++)
@@ -262,13 +265,14 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_no_invert(
 		if ((ULONG_PTR) dptr & 0x0f)
 		{
 			pstatus_t status;
-            UINT32 startup = (16 - ((ULONG_PTR) dptr & 0x0f)) / 4;
+			UINT32 startup = (16 - ((ULONG_PTR) dptr & 0x0f)) / 4;
 
 			if (startup > width) startup = width;
 
 			status = generic->YCoCgToRGB_8u_AC4R(
-						sptr, srcStep, dptr, DstFormat,
-						dstStep, startup, 1, shift, withAlpha);
+			             sptr, srcStep, dptr, DstFormat,
+			             dstStep, startup, 1, shift, withAlpha);
+
 			if (status != PRIMITIVES_SUCCESS)
 				return status;
 
@@ -385,8 +389,9 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_no_invert(
 		{
 			pstatus_t status;
 			status = generic->YCoCgToRGB_8u_AC4R(
-					 sptr, srcStep, dptr, DstFormat, dstStep,
-					 w, 1, shift, withAlpha);
+			             sptr, srcStep, dptr, DstFormat, dstStep,
+			             w, 1, shift, withAlpha);
+
 			if (status != PRIMITIVES_SUCCESS)
 				return status;
 
@@ -411,24 +416,23 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R(
     UINT8 shift,
     BOOL withAlpha)
 {
-	// TODO: Need to implement proper color conversion!!!
-	return generic->YCoCgToRGB_8u_AC4R(pSrc, srcStep, pDst, DstFormat,
-					   dstStep, width, height, shift, withAlpha);
-
-	switch(DstFormat)
+	switch (DstFormat)
 	{
-	case PIXEL_FORMAT_BGRX32:
-	case PIXEL_FORMAT_BGRA32:
-		return ssse3_YCoCgRToRGB_8u_AC4R_invert(
-					pSrc, srcStep, pDst, DstFormat, dstStep,
-					width, height, shift, withAlpha);
-	case PIXEL_FORMAT_RGBX32:
-	case PIXEL_FORMAT_RGBA32:
-		return ssse3_YCoCgRToRGB_8u_AC4R_no_invert(
-					pSrc, srcStep, pDst, DstFormat, dstStep,
-					width, height, shift, withAlpha);
-	default:
-		return -1;
+		case PIXEL_FORMAT_BGRX32:
+		case PIXEL_FORMAT_BGRA32:
+			return ssse3_YCoCgRToRGB_8u_AC4R_no_invert(
+			           pSrc, srcStep, pDst, DstFormat, dstStep,
+			           width, height, shift, withAlpha);
+
+		case PIXEL_FORMAT_RGBX32:
+		case PIXEL_FORMAT_RGBA32:
+			return ssse3_YCoCgRToRGB_8u_AC4R_invert(
+			           pSrc, srcStep, pDst, DstFormat, dstStep,
+			           width, height, shift, withAlpha);
+
+		default:
+			return generic->YCoCgToRGB_8u_AC4R(pSrc, srcStep, pDst, DstFormat,
+			                                   dstStep, width, height, shift, withAlpha);
 	}
 }
 #endif /* WITH_SSE2 */
diff --git a/libfreerdp/primitives/prim_YUV_opt.c b/libfreerdp/primitives/prim_YUV_opt.c
index cc902f14a..e0dda085d 100644
--- a/libfreerdp/primitives/prim_YUV_opt.c
+++ b/libfreerdp/primitives/prim_YUV_opt.c
@@ -25,7 +25,7 @@ static primitives_t* generic = NULL;
 #include <emmintrin.h>
 #include <tmmintrin.h>
 
-static pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(
+static pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R_BGRX(
     const BYTE** pSrc, const UINT32* srcStep,
     BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
     const prim_size_t* roi)
@@ -35,9 +35,6 @@ static pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(
 	UINT32 i, nWidth, nHeight, VaddDst, VaddY, VaddU, VaddV;
 	__m128i r0, r1, r2, r3, r4, r5, r6, r7;
 	__m128i* buffer;
-	// TODO: Need to implement proper color conversion!!!!!
-	return generic->YUV420ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep,
-	                                      DstFormat, roi);
 	/* last_line: if the last (U,V doubled) line should be skipped, set to 10B
 	 * last_column: if it's the last column in a line, set to 10B (for handling line-endings not multiple by four) */
 	buffer = _aligned_malloc(4 * 16, 16);
@@ -324,6 +321,21 @@ static pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(
 	_aligned_free(buffer);
 	return PRIMITIVES_SUCCESS;
 }
+static pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(const BYTE** pSrc, const UINT32* srcStep,
+        BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
+        const prim_size_t* roi)
+{
+	switch (DstFormat)
+	{
+		case PIXEL_FORMAT_BGRX32:
+		case PIXEL_FORMAT_BGRA32:
+			return ssse3_YUV420ToRGB_8u_P3AC4R_BGRX(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
+
+		default:
+			return generic->YUV420ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
+	}
+}
+
 #endif
 
 void primitives_init_YUV_opt(primitives_t* prims)
diff --git a/libfreerdp/primitives/prim_colors.c b/libfreerdp/primitives/prim_colors.c
index 2f71895d2..545e93581 100644
--- a/libfreerdp/primitives/prim_colors.c
+++ b/libfreerdp/primitives/prim_colors.c
@@ -34,7 +34,7 @@
 /* ------------------------------------------------------------------------- */
 static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_BGRX(
     const INT16* pSrc[3], UINT32 srcStep,
-    BYTE* pDst, UINT32 DstFormat, UINT32 dstStep,
+    BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
     const prim_size_t* roi)
 {
 	UINT32 x, y;
@@ -51,23 +51,19 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_BGRX(
 		for (x = 0; x < roi->width; x++)
 		{
 			INT16 R, G, B;
-			const INT64 divisor = 20;
-			const INT64 Y = (pY[0] + 4096);
-			const INT64 Cb = (pCb[0]);
-			const INT64 Cr = (pCr[0]);
-			const INT64 CrR = Cr * (INT64)(1.402525f * (1 << divisor));
-			const INT64 CrG = Cr * (INT64)(0.714401f * (1 << divisor));
-			const INT64 CbG = Cb * (INT64)(0.343730f * (1 << divisor));
-			const INT64 CbB = Cb * (INT64)(1.769905f * (1 << divisor));
-			const INT64 Ytmp = (Y + 16) << divisor;
-			R = ((INT16)((CrR + Ytmp) >> divisor) >> 5);
-			G = ((INT16)((Ytmp - CbG - CrG) >> divisor) >> 5);
-			B = ((INT16)((CbB + Ytmp) >> divisor) >> 5);
+			const INT32 divisor = 16;
+			const INT32 Y = ((*pY++) + 4096) << divisor;
+			const INT32 Cb = (*pCb++);
+			const INT32 Cr = (*pCr++);
+			const INT32 CrR = Cr * (INT32)(1.402525f * (1 << divisor));
+			const INT32 CrG = Cr * (INT32)(0.714401f * (1 << divisor));
+			const INT32 CbG = Cb * (INT32)(0.343730f * (1 << divisor));
+			const INT32 CbB = Cb * (INT32)(1.769905f * (1 << divisor));
+			R = ((INT16)((CrR + Y) >> divisor) >> 5);
+			G = ((INT16)((Y - CbG - CrG) >> divisor) >> 5);
+			B = ((INT16)((CbB + Y) >> divisor) >> 5);
 			pRGB = writePixelBGRX(pRGB, formatSize, DstFormat, CLIP(R), CLIP(G),
 			                      CLIP(B), 0xFF);
-			pY++;
-			pCb++;
-			pCr++;
 		}
 
 		pY += srcPad;
@@ -81,7 +77,7 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_BGRX(
 
 static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_general(
     const INT16* pSrc[3], UINT32 srcStep,
-    BYTE* pDst, UINT32 DstFormat, UINT32 dstStep,
+    BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
     const prim_size_t* roi)
 {
 	UINT32 x, y;
@@ -99,23 +95,19 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_general(
 		for (x = 0; x < roi->width; x++)
 		{
 			INT16 R, G, B;
-			const INT64 divisor = 20;
-			const INT64 Y = (pY[0] + 4096);
-			const INT64 Cb = (pCb[0]);
-			const INT64 Cr = (pCr[0]);
-			const INT64 CrR = Cr * (INT64)(1.402525f * (1 << divisor));
-			const INT64 CrG = Cr * (INT64)(0.714401f * (1 << divisor));
-			const INT64 CbG = Cb * (INT64)(0.343730f * (1 << divisor));
-			const INT64 CbB = Cb * (INT64)(1.769905f * (1 << divisor));
-			const INT64 Ytmp = (Y + 16) << divisor;
-			R = ((INT16)((CrR + Ytmp) >> divisor) >> 5);
-			G = ((INT16)((Ytmp - CbG - CrG) >> divisor) >> 5);
-			B = ((INT16)((CbB + Ytmp) >> divisor) >> 5);
+			const INT32 divisor = 16;
+			const INT32 Y = ((*pY++) + 4096) << divisor;
+			const INT32 Cb = (*pCb++);
+			const INT32 Cr = (*pCr++);
+			const INT32 CrR = Cr * (INT32)(1.402525f * (1 << divisor));
+			const INT32 CrG = Cr * (INT32)(0.714401f * (1 << divisor));
+			const INT32 CbG = Cb * (INT32)(0.343730f * (1 << divisor));
+			const INT32 CbB = Cb * (INT32)(1.769905f * (1 << divisor));
+			R = ((INT16)((CrR + Y) >> divisor) >> 5);
+			G = ((INT16)((Y - CbG - CrG) >> divisor) >> 5);
+			B = ((INT16)((CbB + Y) >> divisor) >> 5);
 			pRGB = (*writePixel)(pRGB, formatSize, DstFormat, CLIP(R), CLIP(G),
 			                     CLIP(B), 0xFF);
-			pY++;
-			pCb++;
-			pCr++;
 		}
 
 		pY += srcPad;
@@ -129,16 +121,17 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_general(
 
 static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R(
     const INT16* pSrc[3], UINT32 srcStep,
-    BYTE* pDst, UINT32 DstFormat, UINT32 dstStep,
+    BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
     const prim_size_t* roi)
 {
 	switch (DstFormat)
 	{
+		case PIXEL_FORMAT_BGRA32:
 		case PIXEL_FORMAT_BGRX32:
-			return general_yCbCrToRGB_16s8u_P3AC4R_BGRX(pSrc, srcStep, pDst, DstFormat, dstStep, roi);
+			return general_yCbCrToRGB_16s8u_P3AC4R_BGRX(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
 
 		default:
-			return general_yCbCrToRGB_16s8u_P3AC4R_general(pSrc, srcStep, pDst, DstFormat, dstStep, roi);
+			return general_yCbCrToRGB_16s8u_P3AC4R_general(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
 	}
 }
 
diff --git a/libfreerdp/primitives/prim_colors_opt.c b/libfreerdp/primitives/prim_colors_opt.c
index 68e789780..de9baa50c 100644
--- a/libfreerdp/primitives/prim_colors_opt.c
+++ b/libfreerdp/primitives/prim_colors_opt.c
@@ -356,7 +356,7 @@ static pstatus_t sse2_RGBToYCbCr_16s16s_P3P3(
 #define XMM_ALL_ONES \
 	_mm_set1_epi32(0xFFFFFFFFU)
 
-pstatus_t sse2_RGBToRGB_16s8u_P3AC4R(
+static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R_BGRX(
     const INT16* const pSrc[3],	/* 16-bit R,G, and B arrays */
     UINT32 srcStep,			/* bytes between rows in source data */
     BYTE* pDst,				/* 32-bit interleaved ARGB (ABGR?) data */
@@ -388,9 +388,6 @@ pstatus_t sse2_RGBToRGB_16s8u_P3AC4R(
 		                                      dstStep, DstFormat, roi);
 	}
 
-	// TODO: Need to update SSE code to allow color conversion!!!
-	return generic->RGBToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst,
-	                                      dstStep, DstFormat, roi);
 	out = (BYTE*) pDst;
 	srcbump = (srcStep - (roi->width * sizeof(UINT16))) / sizeof(UINT16);
 	dstbump = (dstStep - (roi->width * sizeof(UINT32)));
@@ -453,15 +450,31 @@ pstatus_t sse2_RGBToRGB_16s8u_P3AC4R(
 
 	return PRIMITIVES_SUCCESS;
 }
+static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R(
+    const INT16* const pSrc[3],	/* 16-bit R,G, and B arrays */
+    UINT32 srcStep,			/* bytes between rows in source data */
+    BYTE* pDst,				/* 32-bit interleaved ARGB (ABGR?) data */
+    UINT32 dstStep,			/* bytes between rows in dest data */
+    UINT32 DstFormat,
+    const prim_size_t* roi)
+{
+	switch (DstFormat)
+	{
+		case PIXEL_FORMAT_BGRA32:
+		case PIXEL_FORMAT_BGRX32:
+			return sse2_RGBToRGB_16s8u_P3AC4R_BGRX(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
+
+		default:
+			return generic->RGBToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
+	}
+}
 #endif /* WITH_SSE2 */
 
 /*---------------------------------------------------------------------------*/
 #ifdef WITH_NEON
 static pstatus_t neon_yCbCrToRGB_16s16s_P3P3(
-    const INT16* pSrc[3],
-    int srcStep,
-    INT16* pDst[3],
-    int dstStep,
+    const INT16* pSrc[3],  INT32 srcStep,
+    INT16* pDst[3],  INT32 dstStep,
     const prim_size_t* roi)	/* region of interest */
 {
 	/* TODO: If necessary, check alignments and call the general version. */
@@ -545,6 +558,7 @@ static pstatus_t neon_yCbCrToRGB_16s16s_P3P3(
 
 	return PRIMITIVES_SUCCESS;
 }
+
 #endif /* WITH_NEON */
 
 
diff --git a/libfreerdp/primitives/test/TestPrimitivesYCbCr.c b/libfreerdp/primitives/test/TestPrimitivesYCbCr.c
index 8b728c8d6..3ed66715d 100644
--- a/libfreerdp/primitives/test/TestPrimitivesYCbCr.c
+++ b/libfreerdp/primitives/test/TestPrimitivesYCbCr.c
@@ -2080,7 +2080,7 @@ static UINT32 TEST_XRGB_IMAGE[4096] =
 };
 
 static int test_bmp_cmp_count(const BYTE* mem1, const BYTE* mem2, int size,
-			      int channel, int margin)
+                              int channel, int margin)
 {
 	int error;
 	int count = 0;
@@ -2107,7 +2107,7 @@ static int test_bmp_cmp_count(const BYTE* mem1, const BYTE* mem2, int size,
 }
 
 static int test_bmp_cmp_dump(const BYTE* actual, const BYTE* expected, int size,
-			     int channel, int margin)
+                             int channel, int margin)
 {
 	int x, y;
 	int error[3];
@@ -2155,7 +2155,7 @@ static int test_bmp_cmp_dump(const BYTE* actual, const BYTE* expected, int size,
 }
 
 static void test_fill_bitmap_channel(BYTE* data, int width, int height,
-				     BYTE value, int nChannel)
+                                     BYTE value, int nChannel)
 {
 	int x, y;
 	BYTE* pChannel;
@@ -2299,8 +2299,8 @@ int TestPrimitivesYCbCr(int argc, char* argv[])
 	if (1)
 	{
 		status = prims->yCbCrToRGB_16s8u_P3AC4R((const INT16**) pYCbCr, 64 * 2,
-					       actual, PIXEL_FORMAT_BGRA32,
-					       64 * 4, &roi_64x64);
+		                                        actual, 64 * 4, PIXEL_FORMAT_BGRA32,
+		                                        &roi_64x64);
 	}
 	else
 	{
@@ -2312,9 +2312,9 @@ int TestPrimitivesYCbCr(int argc, char* argv[])
 		CopyMemory(pSrcDst[1], pYCbCr[1], 4096 * 2);
 		CopyMemory(pSrcDst[2], pYCbCr[2], 4096 * 2);
 		prims->yCbCrToRGB_16s16s_P3P3((const INT16**) pSrcDst, 64 * 2,
-					      pSrcDst, 64 * 2, &roi_64x64);
+		                              pSrcDst, 64 * 2, &roi_64x64);
 		prims->RGBToRGB_16s8u_P3AC4R((const INT16**) pSrcDst, 64 * 2,
-					     actual, 64 * 4, PIXEL_FORMAT_BGRA32, &roi_64x64);
+		                             actual, 64 * 4, PIXEL_FORMAT_BGRA32, &roi_64x64);
 		_aligned_free(pSrcDst[0]);
 		_aligned_free(pSrcDst[1]);
 		_aligned_free(pSrcDst[2]);
diff --git a/libfreerdp/primitives/test/TestPrimitivesYUV.c b/libfreerdp/primitives/test/TestPrimitivesYUV.c
index e14aeb7ec..d7f48bc44 100644
--- a/libfreerdp/primitives/test/TestPrimitivesYUV.c
+++ b/libfreerdp/primitives/test/TestPrimitivesYUV.c
@@ -1,13 +1,14 @@
 
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
 #include "prim_test.h"
 
 #include <winpr/wlog.h>
 #include <winpr/crypto.h>
 #include <freerdp/primitives.h>
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
+#include <freerdp/utils/profiler.h>
 
 #define TAG __FILE__
 
@@ -31,6 +32,52 @@ static BOOL similar(const BYTE* src, const BYTE* dst, size_t size)
 	return TRUE;
 }
 
+static BOOL similarRGB(const BYTE* src, const BYTE* dst, size_t size, UINT32 format)
+{
+	size_t x;
+	const UINT32 bpp = GetBytesPerPixel(format);
+	const BOOL alpha = ColorHasAlpha(format);
+
+	for (x = 0; x < size; x++)
+	{
+		UINT32 sColor, dColor;
+		BYTE sR, sG, sB, sA;
+		BYTE dR, dG, dB, dA;
+		sColor = ReadColor(src, format);
+		dColor = ReadColor(dst, format);
+		src += bpp;
+		dst += bpp;
+		SplitColor(sColor, format, &sR, &sG, &sB, &sA, NULL);
+		SplitColor(sColor, format, &dR, &dG, &dB, &dA, NULL);
+
+		if ((abs(sR - dR) > 2) || (abs(sG - dG) > 2) || (abs(sB - dB) > 2))
+		{
+			fprintf(stderr, "Color value  mismatch R[%02X %02X], G[%02X %02X], B[%02X %02X] at position %lu",
+			        sR, dR, sG, dG, sA, dA, x);
+			return FALSE;
+		}
+
+		if (alpha)
+		{
+			if (abs(sA - dA) > 2)
+			{
+				fprintf(stderr, "Alpha value  mismatch %02X %02X at position %lu", sA, dA, x);
+				return FALSE;
+			}
+		}
+		else
+		{
+			if (dA != 0xFF)
+			{
+				fprintf(stderr, "Invalid destination alpha value %02X at position %lu", dA, x);
+				return FALSE;
+			}
+		}
+	}
+
+	return TRUE;
+}
+
 static void get_size(UINT32* width, UINT32* height)
 {
 	winpr_RAND((BYTE*)width, sizeof(*width));
@@ -41,7 +88,7 @@ static void get_size(UINT32* width, UINT32* height)
 }
 
 static BOOL check_padding(const BYTE* psrc, size_t size, size_t padding,
-			  const char* buffer)
+                          const char* buffer)
 {
 	size_t x;
 	BOOL rc = TRUE;
@@ -68,7 +115,7 @@ static BOOL check_padding(const BYTE* psrc, size_t size, size_t padding,
 				x++;
 
 			fprintf(stderr, "Buffer underflow detected %02"PRIx8" != %02X %s [%"PRIuz"-%"PRIuz"]\n",
-				d, 'A', buffer, start, x);
+			        d, 'A', buffer, start, x);
 			return FALSE;
 		}
 
@@ -80,7 +127,7 @@ static BOOL check_padding(const BYTE* psrc, size_t size, size_t padding,
 				x++;
 
 			fprintf(stderr, "Buffer overflow detected %02"PRIx8" != %02X %s [%"PRIuz"-%"PRIuz"]\n",
-				d, 'A', buffer, start, x);
+			        d, 'A', buffer, start, x);
 			return FALSE;
 		}
 	}
@@ -137,13 +184,17 @@ static BOOL TestPrimitiveYUVCombine(void)
 	UINT32 chromaStride[3];
 	UINT32 yuvStride[3];
 	size_t padding = 10000;
+	PROFILER_DEFINE(yuvCombine);
+	PROFILER_DEFINE(yuvSplit);
 	prim_size_t roi;
 	primitives_t* prims = primitives_get();
 	get_size(&roi.width, &roi.height);
 	awidth = roi.width + 16 - roi.width % 16;
 	aheight = roi.height + 16 - roi.height % 16;
 	fprintf(stderr, "Running YUVCombine on frame size %"PRIu32"x%"PRIu32" [%"PRIu32"x%"PRIu32"]\n",
-		roi.width, roi.height, awidth, aheight);
+	        roi.width, roi.height, awidth, aheight);
+	PROFILER_CREATE(yuvCombine, "YUV420CombineToYUV444");
+	PROFILER_CREATE(yuvSplit, "YUV444SplitToYUV420");
 
 	if (!prims || !prims->YUV420CombineToYUV444)
 		goto fail;
@@ -193,10 +244,18 @@ static BOOL TestPrimitiveYUVCombine(void)
 			goto fail;
 	}
 
+	PROFILER_ENTER(yuvCombine);
+
 	if (prims->YUV420CombineToYUV444((const BYTE**)luma, lumaStride,
-					 (const BYTE**)chroma, chromaStride,
-					 yuv, yuvStride, &roi) != PRIMITIVES_SUCCESS)
+	                                 (const BYTE**)chroma, chromaStride,
+	                                 yuv, yuvStride, &roi) != PRIMITIVES_SUCCESS)
+	{
+		PROFILER_EXIT(yuvCombine);
 		goto fail;
+	}
+
+	PROFILER_EXIT(yuvCombine);
+	PROFILER_PRINT(yuvCombine);
 
 	for (x = 0; x < 3; x++)
 	{
@@ -214,9 +273,17 @@ static BOOL TestPrimitiveYUVCombine(void)
 			goto fail;
 	}
 
+	PROFILER_ENTER(yuvSplit);
+
 	if (prims->YUV444SplitToYUV420((const BYTE**)yuv, yuvStride, pmain, lumaStride,
-				       paux, chromaStride, &roi) != PRIMITIVES_SUCCESS)
+	                               paux, chromaStride, &roi) != PRIMITIVES_SUCCESS)
+	{
+		PROFILER_EXIT(yuvSplit);
 		goto fail;
+	}
+
+	PROFILER_EXIT(yuvSplit);
+	PROFILER_PRINT(yuvSplit);
 
 	for (x = 0; x < 3; x++)
 	{
@@ -251,8 +318,8 @@ static BOOL TestPrimitiveYUVCombine(void)
 			}
 
 			if (!similar(luma[i] + y * lstride,
-				     pmain[i]  + y * lstride,
-				     w))
+			             pmain[i]  + y * lstride,
+			             w))
 				goto fail;
 
 			/* Need to ignore lines of destination Y plane,
@@ -270,14 +337,16 @@ static BOOL TestPrimitiveYUVCombine(void)
 			}
 
 			if (!similar(chroma[i] + y * cstride,
-				     paux[i]  + y * cstride,
-				     w))
+			             paux[i]  + y * cstride,
+			             w))
 				goto fail;
 		}
 	}
 
 	rc = TRUE;
 fail:
+	PROFILER_FREE(yuvCombine);
+	PROFILER_FREE(yuvSplit);
 
 	for (x = 0; x < 3; x++)
 	{
@@ -306,12 +375,31 @@ static BOOL TestPrimitiveYUV(BOOL use444)
 	size_t uvsize, uvwidth;
 	size_t padding = 10000;
 	size_t stride;
+	const UINT32 formats[] =
+	{
+		PIXEL_FORMAT_XRGB32,
+		PIXEL_FORMAT_XBGR32,
+		PIXEL_FORMAT_ARGB32,
+		PIXEL_FORMAT_ABGR32,
+		PIXEL_FORMAT_RGBA32,
+		PIXEL_FORMAT_RGBX32,
+		PIXEL_FORMAT_BGRA32,
+		PIXEL_FORMAT_BGRX32
+	};
+	PROFILER_DEFINE(rgbToYUV420);
+	PROFILER_DEFINE(rgbToYUV444);
+	PROFILER_DEFINE(yuv420ToRGB);
+	PROFILER_DEFINE(yuv444ToRGB);
 	get_size(&roi.width, &roi.height);
 	/* Buffers need to be 16x16 aligned. */
 	awidth = roi.width + 16 - roi.width % 16;
 	aheight = roi.height + 16 - roi.height % 16;
 	stride = awidth * sizeof(UINT32);
 	size = awidth * aheight;
+	PROFILER_CREATE(rgbToYUV420, "RGBToYUV420");
+	PROFILER_CREATE(rgbToYUV444, "RGBToYUV444");
+	PROFILER_CREATE(yuv420ToRGB, "YUV420ToRGB");
+	PROFILER_CREATE(yuv444ToRGB, "YUV444ToRGB");
 
 	if (use444)
 	{
@@ -331,7 +419,7 @@ static BOOL TestPrimitiveYUV(BOOL use444)
 	}
 
 	fprintf(stderr, "Running AVC%s on frame size %"PRIu32"x%"PRIu32"\n", use444 ? "444" : "420",
-		roi.width, roi.height);
+	        roi.width, roi.height);
 
 	/* Test RGB to YUV444 conversion and vice versa */
 	if (!(rgb = set_padding(size * sizeof(UINT32), padding)))
@@ -366,56 +454,103 @@ static BOOL TestPrimitiveYUV(BOOL use444)
 	yuv_step[1] = uvwidth;
 	yuv_step[2] = uvwidth;
 
-	if (use444)
+	for (x = 0; x < sizeof(formats) / sizeof(formats[0]); x++)
 	{
-		if (prims->RGBToYUV444_8u_P3AC4R(rgb, PIXEL_FORMAT_BGRA32,
-						 stride, yuv, yuv_step,
-						 &roi) != PRIMITIVES_SUCCESS)
+		const UINT32 DstFormat = formats[x];
+
+		if (use444)
+		{
+			PROFILER_ENTER(rgbToYUV444);
+
+			if (prims->RGBToYUV444_8u_P3AC4R(rgb, DstFormat,
+			                                 stride, yuv, yuv_step,
+			                                 &roi) != PRIMITIVES_SUCCESS)
+			{
+				PROFILER_EXIT(rgbToYUV444);
+				goto fail;
+			}
+
+			PROFILER_EXIT(rgbToYUV444);
+			PROFILER_PRINT(rgbToYUV444);
+		}
+		else
+		{
+			PROFILER_ENTER(rgbToYUV420);
+
+			if (prims->RGBToYUV420_8u_P3AC4R(rgb, DstFormat,
+			                                 stride, yuv, yuv_step,
+			                                 &roi) != PRIMITIVES_SUCCESS)
+			{
+				PROFILER_EXIT(rgbToYUV420);
+				goto fail;
+			}
+
+			PROFILER_EXIT(rgbToYUV420);
+			PROFILER_PRINT(rgbToYUV420);
+		}
+
+		if (!check_padding(rgb, size * sizeof(UINT32), padding, "rgb"))
 			goto fail;
-	}
-	else if (prims->RGBToYUV420_8u_P3AC4R(rgb, PIXEL_FORMAT_BGRA32,
-					      stride, yuv, yuv_step,
-					      &roi) != PRIMITIVES_SUCCESS)
-		goto fail;
 
-	if (!check_padding(rgb, size * sizeof(UINT32), padding, "rgb"))
-		goto fail;
-
-	if ((!check_padding(yuv[0], size, padding, "Y")) ||
-	    (!check_padding(yuv[1], uvsize, padding, "U")) ||
-	    (!check_padding(yuv[2], uvsize, padding, "V")))
-		goto fail;
-
-	if (use444)
-	{
-		if (prims->YUV444ToRGB_8u_P3AC4R((const BYTE**)yuv, yuv_step, rgb_dst, stride,
-						 PIXEL_FORMAT_BGRA32,
-						 &roi) != PRIMITIVES_SUCCESS)
+		if ((!check_padding(yuv[0], size, padding, "Y")) ||
+		    (!check_padding(yuv[1], uvsize, padding, "U")) ||
+		    (!check_padding(yuv[2], uvsize, padding, "V")))
 			goto fail;
-	}
-	else if (prims->YUV420ToRGB_8u_P3AC4R((const BYTE**)yuv, yuv_step, rgb_dst,
-					      stride, PIXEL_FORMAT_BGRA32, &roi) != PRIMITIVES_SUCCESS)
-		goto fail;
 
-	if (!check_padding(rgb_dst, size * sizeof(UINT32), padding, "rgb dst"))
-		goto fail;
+		if (use444)
+		{
+			PROFILER_ENTER(yuv444ToRGB);
 
-	if ((!check_padding(yuv[0], size, padding, "Y")) ||
-	    (!check_padding(yuv[1], uvsize, padding, "U")) ||
-	    (!check_padding(yuv[2], uvsize, padding, "V")))
-		goto fail;
+			if (prims->YUV444ToRGB_8u_P3AC4R((const BYTE**)yuv, yuv_step, rgb_dst, stride,
+			                                 DstFormat,
+			                                 &roi) != PRIMITIVES_SUCCESS)
+			{
+				PROFILER_EXIT(yuv444ToRGB);
+				goto fail;
+			}
 
-	for (y = 0; y < roi.height; y++)
-	{
-		BYTE* srgb = &rgb[y * stride];
-		BYTE* drgb = &rgb_dst[y * stride];
+			PROFILER_EXIT(yuv444ToRGB);
+			PROFILER_PRINT(yuv444ToRGB);
+		}
+		else
+		{
+			PROFILER_ENTER(yuv420ToRGB);
 
-		if (!similar(srgb, drgb, roi.width * sizeof(UINT32)))
+			if (prims->YUV420ToRGB_8u_P3AC4R((const BYTE**)yuv, yuv_step, rgb_dst,
+			                                 stride, DstFormat, &roi) != PRIMITIVES_SUCCESS)
+			{
+				PROFILER_EXIT(yuv420ToRGB);
+				goto fail;
+			}
+
+			PROFILER_EXIT(yuv420ToRGB);
+			PROFILER_PRINT(yuv420ToRGB);
+		}
+
+		if (!check_padding(rgb_dst, size * sizeof(UINT32), padding, "rgb dst"))
 			goto fail;
+
+		if ((!check_padding(yuv[0], size, padding, "Y")) ||
+		    (!check_padding(yuv[1], uvsize, padding, "U")) ||
+		    (!check_padding(yuv[2], uvsize, padding, "V")))
+			goto fail;
+
+		for (y = 0; y < roi.height; y++)
+		{
+			BYTE* srgb = &rgb[y * stride];
+			BYTE* drgb = &rgb_dst[y * stride];
+
+			if (!similarRGB(srgb, drgb, roi.width, DstFormat))
+				goto fail;
+		}
 	}
 
 	rc = TRUE;
 fail:
+	PROFILER_FREE(rgbToYUV420);
+	PROFILER_FREE(rgbToYUV444);
+	PROFILER_FREE(yuv420ToRGB);
+	PROFILER_FREE(yuv444ToRGB);
 	free_padding(rgb, padding);
 	free_padding(rgb_dst, padding);
 	free_padding(yuv[0], padding);
@@ -428,7 +563,6 @@ int TestPrimitivesYUV(int argc, char* argv[])
 {
 	UINT32 x;
 	int rc = -1;
-
 	prim_test_setup(FALSE);
 
 	for (x = 0; x < 10; x++)
diff --git a/libfreerdp/primitives/test/measure.h b/libfreerdp/primitives/test/measure.h
index 4d4c13d29..00ab2e7e0 100644
--- a/libfreerdp/primitives/test/measure.h
+++ b/libfreerdp/primitives/test/measure.h
@@ -22,8 +22,12 @@
  * Define GOOGLE_PROFILER if you want gperftools included.
  */
 
-#ifndef __MEASURE_H_INCLUDED__
-#define __MEASURE_H_INCLUDED__
+#ifndef TEST_MEASURE_H_INCLUDED
+#define TEST_MEASURE_H_INCLUDED
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
 
 #include <time.h>
 
diff --git a/winpr/include/winpr/platform.h b/winpr/include/winpr/platform.h
index 91291ed94..bc1c984cc 100644
--- a/winpr/include/winpr/platform.h
+++ b/winpr/include/winpr/platform.h
@@ -86,6 +86,15 @@
 #endif
 #endif
 
+/* MIPS64 (_M_MIPS64) */
+
+#if defined(mips64) || defined(__mips64) || \
+	defined(__mips64__) || defined(__MIPS64__)
+#ifndef _M_MIPS64
+#define	 _M_MIPS64	1
+#endif
+#endif
+
 /* PowerPC (_M_PPC) */
 
 #if defined(__ppc__) || defined(__powerpc) || \
diff --git a/winpr/include/winpr/sysinfo.h b/winpr/include/winpr/sysinfo.h
index 331e29921..b04ce9cdb 100644
--- a/winpr/include/winpr/sysinfo.h
+++ b/winpr/include/winpr/sysinfo.h
@@ -44,6 +44,8 @@ extern "C" {
 #define PROCESSOR_ARCHITECTURE_AMD64			9
 #define PROCESSOR_ARCHITECTURE_IA32_ON_WIN64		10
 #define PROCESSOR_ARCHITECTURE_NEUTRAL			11
+#define PROCESSOR_ARCHITECTURE_ARM64			12
+#define PROCESSOR_ARCHITECTURE_MIPS64			13
 #define PROCESSOR_ARCHITECTURE_UNKNOWN			0xFFFF
 
 #define PROCESSOR_INTEL_386				386
@@ -189,7 +191,8 @@ WINPR_API VOID GetLocalTime(LPSYSTEMTIME lpSystemTime);
 WINPR_API BOOL SetLocalTime(CONST SYSTEMTIME* lpSystemTime);
 
 WINPR_API VOID GetSystemTimeAsFileTime(LPFILETIME lpSystemTimeAsFileTime);
-WINPR_API BOOL GetSystemTimeAdjustment(PDWORD lpTimeAdjustment, PDWORD lpTimeIncrement, PBOOL lpTimeAdjustmentDisabled);
+WINPR_API BOOL GetSystemTimeAdjustment(PDWORD lpTimeAdjustment, PDWORD lpTimeIncrement,
+                                       PBOOL lpTimeAdjustmentDisabled);
 
 WINPR_API BOOL IsProcessorFeaturePresent(DWORD ProcessorFeature);
 
diff --git a/winpr/libwinpr/sysinfo/CMakeLists.txt b/winpr/libwinpr/sysinfo/CMakeLists.txt
index 60592ba1a..f9b7f69bf 100644
--- a/winpr/libwinpr/sysinfo/CMakeLists.txt
+++ b/winpr/libwinpr/sysinfo/CMakeLists.txt
@@ -15,6 +15,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+if(ANDROID)
+	add_subdirectory(cpufeatures)
+endif()
+
 winpr_module_add(sysinfo.c)
 
 if((NOT WIN32) AND (NOT APPLE) AND (NOT ANDROID) AND (NOT OPENBSD))
diff --git a/winpr/libwinpr/sysinfo/cpufeatures/CMakeLists.txt b/winpr/libwinpr/sysinfo/cpufeatures/CMakeLists.txt
new file mode 100644
index 000000000..f1b93df73
--- /dev/null
+++ b/winpr/libwinpr/sysinfo/cpufeatures/CMakeLists.txt
@@ -0,0 +1,20 @@
+# WinPR: Windows Portable Runtime
+# libwinpr-sysinfo cmake build script
+#
+# Copyright 2017 Armin Novak <armin.novak@thincast.com>
+# Copyright 2017 Thincast Technologies GmbH
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+winpr_module_add(cpu-features.c cpu-features.h)
+
diff --git a/winpr/libwinpr/sysinfo/cpufeatures/NOTICE b/winpr/libwinpr/sysinfo/cpufeatures/NOTICE
new file mode 100644
index 000000000..d6c092292
--- /dev/null
+++ b/winpr/libwinpr/sysinfo/cpufeatures/NOTICE
@@ -0,0 +1,13 @@
+Copyright (C) 2016 The Android Open Source Project
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/winpr/libwinpr/sysinfo/cpufeatures/README b/winpr/libwinpr/sysinfo/cpufeatures/README
new file mode 100644
index 000000000..ba85c2097
--- /dev/null
+++ b/winpr/libwinpr/sysinfo/cpufeatures/README
@@ -0,0 +1,4 @@
+Android CPUFeatures Library
+
+https://developer.android.com/ndk/guides/cpu-features.html
+https://android.googlesource.com/platform/ndk/+/master/sources/android/cpufeatures
diff --git a/winpr/libwinpr/sysinfo/cpufeatures/cpu-features.c b/winpr/libwinpr/sysinfo/cpufeatures/cpu-features.c
new file mode 100644
index 000000000..adff1d7b1
--- /dev/null
+++ b/winpr/libwinpr/sysinfo/cpufeatures/cpu-features.c
@@ -0,0 +1,1472 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* ChangeLog for this library:
+ *
+ * NDK r10e?: Add MIPS MSA feature.
+ *
+ * NDK r10: Support for 64-bit CPUs (Intel, ARM & MIPS).
+ *
+ * NDK r8d: Add android_setCpu().
+ *
+ * NDK r8c: Add new ARM CPU features: VFPv2, VFP_D32, VFP_FP16,
+ *          VFP_FMA, NEON_FMA, IDIV_ARM, IDIV_THUMB2 and iWMMXt.
+ *
+ *          Rewrite the code to parse /proc/self/auxv instead of
+ *          the "Features" field in /proc/cpuinfo.
+ *
+ *          Dynamically allocate the buffer that hold the content
+ *          of /proc/cpuinfo to deal with newer hardware.
+ *
+ * NDK r7c: Fix CPU count computation. The old method only reported the
+ *           number of _active_ CPUs when the library was initialized,
+ *           which could be less than the real total.
+ *
+ * NDK r5: Handle buggy kernels which report a CPU Architecture number of 7
+ *         for an ARMv6 CPU (see below).
+ *
+ *         Handle kernels that only report 'neon', and not 'vfpv3'
+ *         (VFPv3 is mandated by the ARM architecture is Neon is implemented)
+ *
+ *         Handle kernels that only report 'vfpv3d16', and not 'vfpv3'
+ *
+ *         Fix x86 compilation. Report ANDROID_CPU_FAMILY_X86 in
+ *         android_getCpuFamily().
+ *
+ * NDK r4: Initial release
+ */
+
+#include "cpu-features.h"
+
+#include <dlfcn.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/system_properties.h>
+#include <unistd.h>
+
+static  pthread_once_t     g_once;
+static  int                g_inited;
+static  AndroidCpuFamily   g_cpuFamily;
+static  uint64_t           g_cpuFeatures;
+static  int                g_cpuCount;
+
+#ifdef __arm__
+static  uint32_t           g_cpuIdArm;
+#endif
+
+static const int android_cpufeatures_debug = 0;
+
+#define  D(...) \
+	do { \
+		if (android_cpufeatures_debug) { \
+			printf(__VA_ARGS__); fflush(stdout); \
+		} \
+	} while (0)
+
+#ifdef __i386__
+static __inline__ void x86_cpuid(int func, int values[4])
+{
+	int a, b, c, d;
+	/* We need to preserve ebx since we're compiling PIC code */
+	/* this means we can't use "=b" for the second output register */
+	__asm__ __volatile__(\
+	                     "push %%ebx\n"
+	                     "cpuid\n" \
+	                     "mov %%ebx, %1\n"
+	                     "pop %%ebx\n"
+	                     : "=a"(a), "=r"(b), "=c"(c), "=d"(d) \
+	                     : "a"(func) \
+	                    );
+	values[0] = a;
+	values[1] = b;
+	values[2] = c;
+	values[3] = d;
+}
+#elif defined(__x86_64__)
+static __inline__ void x86_cpuid(int func, int values[4])
+{
+	int64_t a, b, c, d;
+	/* We need to preserve ebx since we're compiling PIC code */
+	/* this means we can't use "=b" for the second output register */
+	__asm__ __volatile__(\
+	                     "push %%rbx\n"
+	                     "cpuid\n" \
+	                     "mov %%rbx, %1\n"
+	                     "pop %%rbx\n"
+	                     : "=a"(a), "=r"(b), "=c"(c), "=d"(d) \
+	                     : "a"(func) \
+	                    );
+	values[0] = a;
+	values[1] = b;
+	values[2] = c;
+	values[3] = d;
+}
+#endif
+
+/* Get the size of a file by reading it until the end. This is needed
+ * because files under /proc do not always return a valid size when
+ * using fseek(0, SEEK_END) + ftell(). Nor can they be mmap()-ed.
+ */
+static int
+get_file_size(const char* pathname)
+{
+	int fd, result = 0;
+	char buffer[256];
+	fd = open(pathname, O_RDONLY);
+
+	if (fd < 0)
+	{
+		D("Can't open %s: %s\n", pathname, strerror(errno));
+		return -1;
+	}
+
+	for (;;)
+	{
+		int ret = read(fd, buffer, sizeof buffer);
+
+		if (ret < 0)
+		{
+			if (errno == EINTR)
+				continue;
+
+			D("Error while reading %s: %s\n", pathname, strerror(errno));
+			break;
+		}
+
+		if (ret == 0)
+			break;
+
+		result += ret;
+	}
+
+	close(fd);
+	return result;
+}
+
+/* Read the content of /proc/cpuinfo into a user-provided buffer.
+ * Return the length of the data, or -1 on error. Does *not*
+ * zero-terminate the content. Will not read more
+ * than 'buffsize' bytes.
+ */
+static int
+read_file(const char*  pathname, char*  buffer, size_t  buffsize)
+{
+	int  fd, count;
+	fd = open(pathname, O_RDONLY);
+
+	if (fd < 0)
+	{
+		D("Could not open %s: %s\n", pathname, strerror(errno));
+		return -1;
+	}
+
+	count = 0;
+
+	while (count < (int)buffsize)
+	{
+		int ret = read(fd, buffer + count, buffsize - count);
+
+		if (ret < 0)
+		{
+			if (errno == EINTR)
+				continue;
+
+			D("Error while reading from %s: %s\n", pathname, strerror(errno));
+
+			if (count == 0)
+				count = -1;
+
+			break;
+		}
+
+		if (ret == 0)
+			break;
+
+		count += ret;
+	}
+
+	close(fd);
+	return count;
+}
+
+#ifdef __arm__
+/* Extract the content of a the first occurence of a given field in
+ * the content of /proc/cpuinfo and return it as a heap-allocated
+ * string that must be freed by the caller.
+ *
+ * Return NULL if not found
+ */
+static char*
+extract_cpuinfo_field(const char* buffer, int buflen, const char* field)
+{
+	int  fieldlen = strlen(field);
+	const char* bufend = buffer + buflen;
+	char* result = NULL;
+	int len;
+	const char* p, *q;
+	/* Look for first field occurence, and ensures it starts the line. */
+	p = buffer;
+
+	for (;;)
+	{
+		p = memmem(p, bufend - p, field, fieldlen);
+
+		if (p == NULL)
+			goto EXIT;
+
+		if (p == buffer || p[-1] == '\n')
+			break;
+
+		p += fieldlen;
+	}
+
+	/* Skip to the first column followed by a space */
+	p += fieldlen;
+	p  = memchr(p, ':', bufend - p);
+
+	if (p == NULL || p[1] != ' ')
+		goto EXIT;
+
+	/* Find the end of the line */
+	p += 2;
+	q = memchr(p, '\n', bufend - p);
+
+	if (q == NULL)
+		q = bufend;
+
+	/* Copy the line into a heap-allocated buffer */
+	len = q - p;
+	result = malloc(len + 1);
+
+	if (result == NULL)
+		goto EXIT;
+
+	memcpy(result, p, len);
+	result[len] = '\0';
+EXIT:
+	return result;
+}
+
+/* Checks that a space-separated list of items contains one given 'item'.
+ * Returns 1 if found, 0 otherwise.
+ */
+static int
+has_list_item(const char* list, const char* item)
+{
+	const char*  p = list;
+	int itemlen = strlen(item);
+
+	if (list == NULL)
+		return 0;
+
+	while (*p)
+	{
+		const char*  q;
+
+		/* skip spaces */
+		while (*p == ' ' || *p == '\t')
+			p++;
+
+		/* find end of current list item */
+		q = p;
+
+		while (*q && *q != ' ' && *q != '\t')
+			q++;
+
+		if (itemlen == q - p && !memcmp(p, item, itemlen))
+			return 1;
+
+		/* skip to next item */
+		p = q;
+	}
+
+	return 0;
+}
+#endif /* __arm__ */
+
+/* Parse a number starting from 'input', but not going further
+ * than 'limit'. Return the value into '*result'.
+ *
+ * NOTE: Does not skip over leading spaces, or deal with sign characters.
+ * NOTE: Ignores overflows.
+ *
+ * The function returns NULL in case of error (bad format), or the new
+ * position after the decimal number in case of success (which will always
+ * be <= 'limit').
+ */
+static const char*
+parse_number(const char* input, const char* limit, int base, int* result)
+{
+	const char* p = input;
+	int val = 0;
+
+	while (p < limit)
+	{
+		int d = (*p - '0');
+
+		if ((unsigned)d >= 10U)
+		{
+			d = (*p - 'a');
+
+			if ((unsigned)d >= 6U)
+				d = (*p - 'A');
+
+			if ((unsigned)d >= 6U)
+				break;
+
+			d += 10;
+		}
+
+		if (d >= base)
+			break;
+
+		val = val * base + d;
+		p++;
+	}
+
+	if (p == input)
+		return NULL;
+
+	*result = val;
+	return p;
+}
+
+static const char*
+parse_decimal(const char* input, const char* limit, int* result)
+{
+	return parse_number(input, limit, 10, result);
+}
+
+#ifdef __arm__
+static const char*
+parse_hexadecimal(const char* input, const char* limit, int* result)
+{
+	return parse_number(input, limit, 16, result);
+}
+#endif /* __arm__ */
+
+/* This small data type is used to represent a CPU list / mask, as read
+ * from sysfs on Linux. See http://www.kernel.org/doc/Documentation/cputopology.txt
+ *
+ * For now, we don't expect more than 32 cores on mobile devices, so keep
+ * everything simple.
+ */
+typedef struct
+{
+	uint32_t mask;
+} CpuList;
+
+static __inline__ void
+cpulist_init(CpuList* list)
+{
+	list->mask = 0;
+}
+
+static __inline__ void
+cpulist_and(CpuList* list1, CpuList* list2)
+{
+	list1->mask &= list2->mask;
+}
+
+static __inline__ void
+cpulist_set(CpuList* list, int index)
+{
+	if ((unsigned)index < 32)
+	{
+		list->mask |= (uint32_t)(1U << index);
+	}
+}
+
+static __inline__ int
+cpulist_count(CpuList* list)
+{
+	return __builtin_popcount(list->mask);
+}
+
+/* Parse a textual list of cpus and store the result inside a CpuList object.
+ * Input format is the following:
+ * - comma-separated list of items (no spaces)
+ * - each item is either a single decimal number (cpu index), or a range made
+ *   of two numbers separated by a single dash (-). Ranges are inclusive.
+ *
+ * Examples:   0
+ *             2,4-127,128-143
+ *             0-1
+ */
+static void
+cpulist_parse(CpuList* list, const char* line, int line_len)
+{
+	const char* p = line;
+	const char* end = p + line_len;
+	const char* q;
+
+	/* NOTE: the input line coming from sysfs typically contains a
+	 * trailing newline, so take care of it in the code below
+	 */
+	while (p < end && *p != '\n')
+	{
+		int val, start_value, end_value;
+		/* Find the end of current item, and put it into 'q' */
+		q = memchr(p, ',', end - p);
+
+		if (q == NULL)
+		{
+			q = end;
+		}
+
+		/* Get first value */
+		p = parse_decimal(p, q, &start_value);
+
+		if (p == NULL)
+			goto BAD_FORMAT;
+
+		end_value = start_value;
+
+		/* If we're not at the end of the item, expect a dash and
+		 * and integer; extract end value.
+		 */
+		if (p < q && *p == '-')
+		{
+			p = parse_decimal(p + 1, q, &end_value);
+
+			if (p == NULL)
+				goto BAD_FORMAT;
+		}
+
+		/* Set bits CPU list bits */
+		for (val = start_value; val <= end_value; val++)
+		{
+			cpulist_set(list, val);
+		}
+
+		/* Jump to next item */
+		p = q;
+
+		if (p < end)
+			p++;
+	}
+
+BAD_FORMAT:
+	;
+}
+
+/* Read a CPU list from one sysfs file */
+static void
+cpulist_read_from(CpuList* list, const char* filename)
+{
+	char   file[64];
+	int    filelen;
+	cpulist_init(list);
+	filelen = read_file(filename, file, sizeof file);
+
+	if (filelen < 0)
+	{
+		D("Could not read %s: %s\n", filename, strerror(errno));
+		return;
+	}
+
+	cpulist_parse(list, file, filelen);
+}
+#if defined(__aarch64__)
+// see <uapi/asm/hwcap.h> kernel header
+#define HWCAP_FP                (1 << 0)
+#define HWCAP_ASIMD             (1 << 1)
+#define HWCAP_AES               (1 << 3)
+#define HWCAP_PMULL             (1 << 4)
+#define HWCAP_SHA1              (1 << 5)
+#define HWCAP_SHA2              (1 << 6)
+#define HWCAP_CRC32             (1 << 7)
+#endif
+
+#if defined(__arm__)
+
+// See <asm/hwcap.h> kernel header.
+#define HWCAP_VFP       (1 << 6)
+#define HWCAP_IWMMXT    (1 << 9)
+#define HWCAP_NEON      (1 << 12)
+#define HWCAP_VFPv3     (1 << 13)
+#define HWCAP_VFPv3D16  (1 << 14)
+#define HWCAP_VFPv4     (1 << 16)
+#define HWCAP_IDIVA     (1 << 17)
+#define HWCAP_IDIVT     (1 << 18)
+
+// see <uapi/asm/hwcap.h> kernel header
+#define HWCAP2_AES     (1 << 0)
+#define HWCAP2_PMULL   (1 << 1)
+#define HWCAP2_SHA1    (1 << 2)
+#define HWCAP2_SHA2    (1 << 3)
+#define HWCAP2_CRC32   (1 << 4)
+
+// This is the list of 32-bit ARMv7 optional features that are _always_
+// supported by ARMv8 CPUs, as mandated by the ARM Architecture Reference
+// Manual.
+#define HWCAP_SET_FOR_ARMV8  \
+	( HWCAP_VFP | \
+	  HWCAP_NEON | \
+	  HWCAP_VFPv3 | \
+	  HWCAP_VFPv4 | \
+	  HWCAP_IDIVA | \
+	  HWCAP_IDIVT )
+#endif
+
+#if defined(__mips__)
+// see <uapi/asm/hwcap.h> kernel header
+#define HWCAP_MIPS_R6           (1 << 0)
+#define HWCAP_MIPS_MSA          (1 << 1)
+#endif
+
+#if defined(__arm__) || defined(__aarch64__) || defined(__mips__)
+
+#define AT_HWCAP 16
+#define AT_HWCAP2 26
+
+// Probe the system's C library for a 'getauxval' function and call it if
+// it exits, or return 0 for failure. This function is available since API
+// level 20.
+//
+// This code does *NOT* check for '__ANDROID_API__ >= 20' to support the
+// edge case where some NDK developers use headers for a platform that is
+// newer than the one really targetted by their application.
+// This is typically done to use newer native APIs only when running on more
+// recent Android versions, and requires careful symbol management.
+//
+// Note that getauxval() can't really be re-implemented here, because
+// its implementation does not parse /proc/self/auxv. Instead it depends
+// on values  that are passed by the kernel at process-init time to the
+// C runtime initialization layer.
+static uint32_t
+get_elf_hwcap_from_getauxval(int hwcap_type)
+{
+	typedef unsigned long getauxval_func_t(unsigned long);
+	dlerror();
+	void* libc_handle = dlopen("libc.so", RTLD_NOW);
+
+	if (!libc_handle)
+	{
+		D("Could not dlopen() C library: %s\n", dlerror());
+		return 0;
+	}
+
+	uint32_t ret = 0;
+	getauxval_func_t* func = (getauxval_func_t*)
+	                         dlsym(libc_handle, "getauxval");
+
+	if (!func)
+	{
+		D("Could not find getauxval() in C library\n");
+	}
+	else
+	{
+		// Note: getauxval() returns 0 on failure. Doesn't touch errno.
+		ret = (uint32_t)(*func)(hwcap_type);
+	}
+
+	dlclose(libc_handle);
+	return ret;
+}
+#endif
+
+#if defined(__arm__)
+// Parse /proc/self/auxv to extract the ELF HW capabilities bitmap for the
+// current CPU. Note that this file is not accessible from regular
+// application processes on some Android platform releases.
+// On success, return new ELF hwcaps, or 0 on failure.
+static uint32_t
+get_elf_hwcap_from_proc_self_auxv(void)
+{
+	const char filepath[] = "/proc/self/auxv";
+	int fd = TEMP_FAILURE_RETRY(open(filepath, O_RDONLY));
+
+	if (fd < 0)
+	{
+		D("Could not open %s: %s\n", filepath, strerror(errno));
+		return 0;
+	}
+
+	struct
+	{
+		uint32_t tag;
+		uint32_t value;
+	} entry;
+
+	uint32_t result = 0;
+
+	for (;;)
+	{
+		int ret = TEMP_FAILURE_RETRY(read(fd, (char*)&entry, sizeof entry));
+
+		if (ret < 0)
+		{
+			D("Error while reading %s: %s\n", filepath, strerror(errno));
+			break;
+		}
+
+		// Detect end of list.
+		if (ret == 0 || (entry.tag == 0 && entry.value == 0))
+			break;
+
+		if (entry.tag == AT_HWCAP)
+		{
+			result = entry.value;
+			break;
+		}
+	}
+
+	close(fd);
+	return result;
+}
+
+/* Compute the ELF HWCAP flags from the content of /proc/cpuinfo.
+ * This works by parsing the 'Features' line, which lists which optional
+ * features the device's CPU supports, on top of its reference
+ * architecture.
+ */
+static uint32_t
+get_elf_hwcap_from_proc_cpuinfo(const char* cpuinfo, int cpuinfo_len)
+{
+	uint32_t hwcaps = 0;
+	long architecture = 0;
+	char* cpuArch = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "CPU architecture");
+
+	if (cpuArch)
+	{
+		architecture = strtol(cpuArch, NULL, 10);
+		free(cpuArch);
+
+		if (architecture >= 8L)
+		{
+			// This is a 32-bit ARM binary running on a 64-bit ARM64 kernel.
+			// The 'Features' line only lists the optional features that the
+			// device's CPU supports, compared to its reference architecture
+			// which are of no use for this process.
+			D("Faking 32-bit ARM HWCaps on ARMv%ld CPU\n", architecture);
+			return HWCAP_SET_FOR_ARMV8;
+		}
+	}
+
+	char* cpuFeatures = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Features");
+
+	if (cpuFeatures != NULL)
+	{
+		D("Found cpuFeatures = '%s'\n", cpuFeatures);
+
+		if (has_list_item(cpuFeatures, "vfp"))
+			hwcaps |= HWCAP_VFP;
+
+		if (has_list_item(cpuFeatures, "vfpv3"))
+			hwcaps |= HWCAP_VFPv3;
+
+		if (has_list_item(cpuFeatures, "vfpv3d16"))
+			hwcaps |= HWCAP_VFPv3D16;
+
+		if (has_list_item(cpuFeatures, "vfpv4"))
+			hwcaps |= HWCAP_VFPv4;
+
+		if (has_list_item(cpuFeatures, "neon"))
+			hwcaps |= HWCAP_NEON;
+
+		if (has_list_item(cpuFeatures, "idiva"))
+			hwcaps |= HWCAP_IDIVA;
+
+		if (has_list_item(cpuFeatures, "idivt"))
+			hwcaps |= HWCAP_IDIVT;
+
+		if (has_list_item(cpuFeatures, "idiv"))
+			hwcaps |= HWCAP_IDIVA | HWCAP_IDIVT;
+
+		if (has_list_item(cpuFeatures, "iwmmxt"))
+			hwcaps |= HWCAP_IWMMXT;
+
+		free(cpuFeatures);
+	}
+
+	return hwcaps;
+}
+#endif  /* __arm__ */
+
+/* Return the number of cpus present on a given device.
+ *
+ * To handle all weird kernel configurations, we need to compute the
+ * intersection of the 'present' and 'possible' CPU lists and count
+ * the result.
+ */
+static int
+get_cpu_count(void)
+{
+	CpuList cpus_present[1];
+	CpuList cpus_possible[1];
+	cpulist_read_from(cpus_present, "/sys/devices/system/cpu/present");
+	cpulist_read_from(cpus_possible, "/sys/devices/system/cpu/possible");
+	/* Compute the intersection of both sets to get the actual number of
+	 * CPU cores that can be used on this device by the kernel.
+	 */
+	cpulist_and(cpus_present, cpus_possible);
+	return cpulist_count(cpus_present);
+}
+
+static void
+android_cpuInitFamily(void)
+{
+#if defined(__arm__)
+	g_cpuFamily = ANDROID_CPU_FAMILY_ARM;
+#elif defined(__i386__)
+	g_cpuFamily = ANDROID_CPU_FAMILY_X86;
+#elif defined(__mips64)
+	/* Needs to be before __mips__ since the compiler defines both */
+	g_cpuFamily = ANDROID_CPU_FAMILY_MIPS64;
+#elif defined(__mips__)
+	g_cpuFamily = ANDROID_CPU_FAMILY_MIPS;
+#elif defined(__aarch64__)
+	g_cpuFamily = ANDROID_CPU_FAMILY_ARM64;
+#elif defined(__x86_64__)
+	g_cpuFamily = ANDROID_CPU_FAMILY_X86_64;
+#else
+	g_cpuFamily = ANDROID_CPU_FAMILY_UNKNOWN;
+#endif
+}
+
+static void
+android_cpuInit(void)
+{
+	char* cpuinfo = NULL;
+	int   cpuinfo_len;
+	android_cpuInitFamily();
+	g_cpuFeatures = 0;
+	g_cpuCount    = 1;
+	g_inited      = 1;
+	cpuinfo_len = get_file_size("/proc/cpuinfo");
+
+	if (cpuinfo_len < 0)
+	{
+		D("cpuinfo_len cannot be computed!");
+		return;
+	}
+
+	cpuinfo = malloc(cpuinfo_len);
+
+	if (cpuinfo == NULL)
+	{
+		D("cpuinfo buffer could not be allocated");
+		return;
+	}
+
+	cpuinfo_len = read_file("/proc/cpuinfo", cpuinfo, cpuinfo_len);
+	D("cpuinfo_len is (%d):\n%.*s\n", cpuinfo_len,
+	  cpuinfo_len >= 0 ? cpuinfo_len : 0, cpuinfo);
+
+	if (cpuinfo_len < 0)  /* should not happen */
+	{
+		free(cpuinfo);
+		return;
+	}
+
+	/* Count the CPU cores, the value may be 0 for single-core CPUs */
+	g_cpuCount = get_cpu_count();
+
+	if (g_cpuCount == 0)
+	{
+		g_cpuCount = 1;
+	}
+
+	D("found cpuCount = %d\n", g_cpuCount);
+#ifdef __arm__
+	{
+		/* Extract architecture from the "CPU Architecture" field.
+		 * The list is well-known, unlike the the output of
+		 * the 'Processor' field which can vary greatly.
+		 *
+		 * See the definition of the 'proc_arch' array in
+		 * $KERNEL/arch/arm/kernel/setup.c and the 'c_show' function in
+		 * same file.
+		 */
+		char* cpuArch = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "CPU architecture");
+
+		if (cpuArch != NULL)
+		{
+			char*  end;
+			long   archNumber;
+			int    hasARMv7 = 0;
+			D("found cpuArch = '%s'\n", cpuArch);
+			/* read the initial decimal number, ignore the rest */
+			archNumber = strtol(cpuArch, &end, 10);
+
+			/* Note that ARMv8 is upwards compatible with ARMv7. */
+			if (end > cpuArch && archNumber >= 7)
+			{
+				hasARMv7 = 1;
+			}
+
+			/* Unfortunately, it seems that certain ARMv6-based CPUs
+			 * report an incorrect architecture number of 7!
+			 *
+			 * See http://code.google.com/p/android/issues/detail?id=10812
+			 *
+			 * We try to correct this by looking at the 'elf_format'
+			 * field reported by the 'Processor' field, which is of the
+			 * form of "(v7l)" for an ARMv7-based CPU, and "(v6l)" for
+			 * an ARMv6-one.
+			 */
+			if (hasARMv7)
+			{
+				char* cpuProc = extract_cpuinfo_field(cpuinfo, cpuinfo_len,
+				                                      "Processor");
+
+				if (cpuProc != NULL)
+				{
+					D("found cpuProc = '%s'\n", cpuProc);
+
+					if (has_list_item(cpuProc, "(v6l)"))
+					{
+						D("CPU processor and architecture mismatch!!\n");
+						hasARMv7 = 0;
+					}
+
+					free(cpuProc);
+				}
+			}
+
+			if (hasARMv7)
+			{
+				g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_ARMv7;
+			}
+
+			/* The LDREX / STREX instructions are available from ARMv6 */
+			if (archNumber >= 6)
+			{
+				g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_LDREX_STREX;
+			}
+
+			free(cpuArch);
+		}
+
+		/* Extract the list of CPU features from ELF hwcaps */
+		uint32_t hwcaps = 0;
+		hwcaps = get_elf_hwcap_from_getauxval(AT_HWCAP);
+
+		if (!hwcaps)
+		{
+			D("Parsing /proc/self/auxv to extract ELF hwcaps!\n");
+			hwcaps = get_elf_hwcap_from_proc_self_auxv();
+		}
+
+		if (!hwcaps)
+		{
+			// Parsing /proc/self/auxv will fail from regular application
+			// processes on some Android platform versions, when this happens
+			// parse proc/cpuinfo instead.
+			D("Parsing /proc/cpuinfo to extract ELF hwcaps!\n");
+			hwcaps = get_elf_hwcap_from_proc_cpuinfo(cpuinfo, cpuinfo_len);
+		}
+
+		if (hwcaps != 0)
+		{
+			int has_vfp = (hwcaps & HWCAP_VFP);
+			int has_vfpv3 = (hwcaps & HWCAP_VFPv3);
+			int has_vfpv3d16 = (hwcaps & HWCAP_VFPv3D16);
+			int has_vfpv4 = (hwcaps & HWCAP_VFPv4);
+			int has_neon = (hwcaps & HWCAP_NEON);
+			int has_idiva = (hwcaps & HWCAP_IDIVA);
+			int has_idivt = (hwcaps & HWCAP_IDIVT);
+			int has_iwmmxt = (hwcaps & HWCAP_IWMMXT);
+
+			// The kernel does a poor job at ensuring consistency when
+			// describing CPU features. So lots of guessing is needed.
+
+			// 'vfpv4' implies VFPv3|VFP_FMA|FP16
+			if (has_vfpv4)
+				g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3    |
+				                 ANDROID_CPU_ARM_FEATURE_VFP_FP16 |
+				                 ANDROID_CPU_ARM_FEATURE_VFP_FMA;
+
+			// 'vfpv3' or 'vfpv3d16' imply VFPv3. Note that unlike GCC,
+			// a value of 'vfpv3' doesn't necessarily mean that the D32
+			// feature is present, so be conservative. All CPUs in the
+			// field that support D32 also support NEON, so this should
+			// not be a problem in practice.
+			if (has_vfpv3 || has_vfpv3d16)
+				g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3;
+
+			// 'vfp' is super ambiguous. Depending on the kernel, it can
+			// either mean VFPv2 or VFPv3. Make it depend on ARMv7.
+			if (has_vfp)
+			{
+				if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_ARMv7)
+					g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3;
+				else
+					g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2;
+			}
+
+			// Neon implies VFPv3|D32, and if vfpv4 is detected, NEON_FMA
+			if (has_neon)
+			{
+				g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 |
+				                 ANDROID_CPU_ARM_FEATURE_NEON |
+				                 ANDROID_CPU_ARM_FEATURE_VFP_D32;
+
+				if (has_vfpv4)
+					g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_NEON_FMA;
+			}
+
+			// VFPv3 implies VFPv2 and ARMv7
+			if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_VFPv3)
+				g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2 |
+				                 ANDROID_CPU_ARM_FEATURE_ARMv7;
+
+			if (has_idiva)
+				g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_ARM;
+
+			if (has_idivt)
+				g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2;
+
+			if (has_iwmmxt)
+				g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_iWMMXt;
+		}
+
+		/* Extract the list of CPU features from ELF hwcaps2 */
+		uint32_t hwcaps2 = 0;
+		hwcaps2 = get_elf_hwcap_from_getauxval(AT_HWCAP2);
+
+		if (hwcaps2 != 0)
+		{
+			int has_aes     = (hwcaps2 & HWCAP2_AES);
+			int has_pmull   = (hwcaps2 & HWCAP2_PMULL);
+			int has_sha1    = (hwcaps2 & HWCAP2_SHA1);
+			int has_sha2    = (hwcaps2 & HWCAP2_SHA2);
+			int has_crc32   = (hwcaps2 & HWCAP2_CRC32);
+
+			if (has_aes)
+				g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_AES;
+
+			if (has_pmull)
+				g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_PMULL;
+
+			if (has_sha1)
+				g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_SHA1;
+
+			if (has_sha2)
+				g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_SHA2;
+
+			if (has_crc32)
+				g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_CRC32;
+		}
+
+		/* Extract the cpuid value from various fields */
+		// The CPUID value is broken up in several entries in /proc/cpuinfo.
+		// This table is used to rebuild it from the entries.
+		static const struct CpuIdEntry
+		{
+			const char* field;
+			char        format;
+			char        bit_lshift;
+			char        bit_length;
+		} cpu_id_entries[] =
+		{
+			{ "CPU implementer", 'x', 24, 8 },
+			{ "CPU variant", 'x', 20, 4 },
+			{ "CPU part", 'x', 4, 12 },
+			{ "CPU revision", 'd', 0, 4 },
+		};
+		size_t i;
+		D("Parsing /proc/cpuinfo to recover CPUID\n");
+
+		for (i = 0;
+		     i < sizeof(cpu_id_entries) / sizeof(cpu_id_entries[0]);
+		     ++i)
+		{
+			const struct CpuIdEntry* entry = &cpu_id_entries[i];
+			char* value = extract_cpuinfo_field(cpuinfo,
+			                                    cpuinfo_len,
+			                                    entry->field);
+
+			if (value == NULL)
+				continue;
+
+			D("field=%s value='%s'\n", entry->field, value);
+			char* value_end = value + strlen(value);
+			int val = 0;
+			const char* start = value;
+			const char* p;
+
+			if (value[0] == '0' && (value[1] == 'x' || value[1] == 'X'))
+			{
+				start += 2;
+				p = parse_hexadecimal(start, value_end, &val);
+			}
+			else if (entry->format == 'x')
+				p = parse_hexadecimal(value, value_end, &val);
+			else
+				p = parse_decimal(value, value_end, &val);
+
+			if (p > (const char*)start)
+			{
+				val &= ((1 << entry->bit_length) - 1);
+				val <<= entry->bit_lshift;
+				g_cpuIdArm |= (uint32_t) val;
+			}
+
+			free(value);
+		}
+
+		// Handle kernel configuration bugs that prevent the correct
+		// reporting of CPU features.
+		static const struct CpuFix
+		{
+			uint32_t  cpuid;
+			uint64_t  or_flags;
+		} cpu_fixes[] =
+		{
+			/* The Nexus 4 (Qualcomm Krait) kernel configuration
+			 * forgets to report IDIV support. */
+			{
+				0x510006f2, ANDROID_CPU_ARM_FEATURE_IDIV_ARM |
+				ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2
+			},
+			{
+				0x510006f3, ANDROID_CPU_ARM_FEATURE_IDIV_ARM |
+				ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2
+			},
+		};
+		size_t n;
+
+		for (n = 0; n < sizeof(cpu_fixes) / sizeof(cpu_fixes[0]); ++n)
+		{
+			const struct CpuFix* entry = &cpu_fixes[n];
+
+			if (g_cpuIdArm == entry->cpuid)
+				g_cpuFeatures |= entry->or_flags;
+		}
+
+		// Special case: The emulator-specific Android 4.2 kernel fails
+		// to report support for the 32-bit ARM IDIV instruction.
+		// Technically, this is a feature of the virtual CPU implemented
+		// by the emulator. Note that it could also support Thumb IDIV
+		// in the future, and this will have to be slightly updated.
+		char* hardware = extract_cpuinfo_field(cpuinfo,
+		                                       cpuinfo_len,
+		                                       "Hardware");
+
+		if (hardware)
+		{
+			if (!strcmp(hardware, "Goldfish") &&
+			    g_cpuIdArm == 0x4100c080 &&
+			    (g_cpuFamily & ANDROID_CPU_ARM_FEATURE_ARMv7) != 0)
+			{
+				g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_ARM;
+			}
+
+			free(hardware);
+		}
+	}
+#endif /* __arm__ */
+#ifdef __aarch64__
+	{
+		/* Extract the list of CPU features from ELF hwcaps */
+		uint32_t hwcaps = 0;
+		hwcaps = get_elf_hwcap_from_getauxval(AT_HWCAP);
+
+		if (hwcaps != 0)
+		{
+			int has_fp      = (hwcaps & HWCAP_FP);
+			int has_asimd   = (hwcaps & HWCAP_ASIMD);
+			int has_aes     = (hwcaps & HWCAP_AES);
+			int has_pmull   = (hwcaps & HWCAP_PMULL);
+			int has_sha1    = (hwcaps & HWCAP_SHA1);
+			int has_sha2    = (hwcaps & HWCAP_SHA2);
+			int has_crc32   = (hwcaps & HWCAP_CRC32);
+
+			if (has_fp == 0)
+			{
+				D("ERROR: Floating-point unit missing, but is required by Android on AArch64 CPUs\n");
+			}
+
+			if (has_asimd == 0)
+			{
+				D("ERROR: ASIMD unit missing, but is required by Android on AArch64 CPUs\n");
+			}
+
+			if (has_fp)
+				g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_FP;
+
+			if (has_asimd)
+				g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_ASIMD;
+
+			if (has_aes)
+				g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_AES;
+
+			if (has_pmull)
+				g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_PMULL;
+
+			if (has_sha1)
+				g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_SHA1;
+
+			if (has_sha2)
+				g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_SHA2;
+
+			if (has_crc32)
+				g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_CRC32;
+		}
+	}
+#endif /* __aarch64__ */
+#if defined(__i386__) || defined(__x86_64__)
+	int regs[4];
+	/* According to http://en.wikipedia.org/wiki/CPUID */
+#define VENDOR_INTEL_b  0x756e6547
+#define VENDOR_INTEL_c  0x6c65746e
+#define VENDOR_INTEL_d  0x49656e69
+	x86_cpuid(0, regs);
+	int vendorIsIntel = (regs[1] == VENDOR_INTEL_b &&
+	                     regs[2] == VENDOR_INTEL_c &&
+	                     regs[3] == VENDOR_INTEL_d);
+	x86_cpuid(1, regs);
+
+	if ((regs[2] & (1 << 9)) != 0)
+	{
+		g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSSE3;
+	}
+
+	if ((regs[2] & (1 << 23)) != 0)
+	{
+		g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_POPCNT;
+	}
+
+	if ((regs[2] & (1 << 19)) != 0)
+	{
+		g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSE4_1;
+	}
+
+	if ((regs[2] & (1 << 20)) != 0)
+	{
+		g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSE4_2;
+	}
+
+	if (vendorIsIntel && (regs[2] & (1 << 22)) != 0)
+	{
+		g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_MOVBE;
+	}
+
+	if ((regs[2] & (1 << 25)) != 0)
+	{
+		g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_AES_NI;
+	}
+
+	if ((regs[2] & (1 << 28)) != 0)
+	{
+		g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_AVX;
+	}
+
+	if ((regs[2] & (1 << 30)) != 0)
+	{
+		g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_RDRAND;
+	}
+
+	x86_cpuid(7, regs);
+
+	if ((regs[1] & (1 << 5)) != 0)
+	{
+		g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_AVX2;
+	}
+
+	if ((regs[1] & (1 << 29)) != 0)
+	{
+		g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SHA_NI;
+	}
+
+#endif
+#if defined( __mips__)
+	{
+		/* MIPS and MIPS64 */
+		/* Extract the list of CPU features from ELF hwcaps */
+		uint32_t hwcaps = 0;
+		hwcaps = get_elf_hwcap_from_getauxval(AT_HWCAP);
+
+		if (hwcaps != 0)
+		{
+			int has_r6      = (hwcaps & HWCAP_MIPS_R6);
+			int has_msa     = (hwcaps & HWCAP_MIPS_MSA);
+
+			if (has_r6)
+				g_cpuFeatures |= ANDROID_CPU_MIPS_FEATURE_R6;
+
+			if (has_msa)
+				g_cpuFeatures |= ANDROID_CPU_MIPS_FEATURE_MSA;
+		}
+	}
+#endif /* __mips__ */
+	free(cpuinfo);
+}
+
+
+AndroidCpuFamily
+android_getCpuFamily(void)
+{
+	pthread_once(&g_once, android_cpuInit);
+	return g_cpuFamily;
+}
+
+
+uint64_t
+android_getCpuFeatures(void)
+{
+	pthread_once(&g_once, android_cpuInit);
+	return g_cpuFeatures;
+}
+
+
+int
+android_getCpuCount(void)
+{
+	pthread_once(&g_once, android_cpuInit);
+	return g_cpuCount;
+}
+
+static void
+android_cpuInitDummy(void)
+{
+	g_inited = 1;
+}
+
+int
+android_setCpu(int cpu_count, uint64_t cpu_features)
+{
+	/* Fail if the library was already initialized. */
+	if (g_inited)
+		return 0;
+
+	android_cpuInitFamily();
+	g_cpuCount = (cpu_count <= 0 ? 1 : cpu_count);
+	g_cpuFeatures = cpu_features;
+	pthread_once(&g_once, android_cpuInitDummy);
+	return 1;
+}
+
+#ifdef __arm__
+uint32_t
+android_getCpuIdArm(void)
+{
+	pthread_once(&g_once, android_cpuInit);
+	return g_cpuIdArm;
+}
+
+int
+android_setCpuArm(int cpu_count, uint64_t cpu_features, uint32_t cpu_id)
+{
+	if (!android_setCpu(cpu_count, cpu_features))
+		return 0;
+
+	g_cpuIdArm = cpu_id;
+	return 1;
+}
+#endif  /* __arm__ */
+
+/*
+ * Technical note: Making sense of ARM's FPU architecture versions.
+ *
+ * FPA was ARM's first attempt at an FPU architecture. There is no Android
+ * device that actually uses it since this technology was already obsolete
+ * when the project started. If you see references to FPA instructions
+ * somewhere, you can be sure that this doesn't apply to Android at all.
+ *
+ * FPA was followed by "VFP", soon renamed "VFPv1" due to the emergence of
+ * new versions / additions to it. ARM considers this obsolete right now,
+ * and no known Android device implements it either.
+ *
+ * VFPv2 added a few instructions to VFPv1, and is an *optional* extension
+ * supported by some ARMv5TE, ARMv6 and ARMv6T2 CPUs. Note that a device
+ * supporting the 'armeabi' ABI doesn't necessarily support these.
+ *
+ * VFPv3-D16 adds a few instructions on top of VFPv2 and is typically used
+ * on ARMv7-A CPUs which implement a FPU. Note that it is also mandated
+ * by the Android 'armeabi-v7a' ABI. The -D16 suffix in its name means
+ * that it provides 16 double-precision FPU registers (d0-d15) and 32
+ * single-precision ones (s0-s31) which happen to be mapped to the same
+ * register banks.
+ *
+ * VFPv3-D32 is the name of an extension to VFPv3-D16 that provides 16
+ * additional double precision registers (d16-d31). Note that there are
+ * still only 32 single precision registers.
+ *
+ * VFPv3xD is a *subset* of VFPv3-D16 that only provides single-precision
+ * registers. It is only used on ARMv7-M (i.e. on micro-controllers) which
+ * are not supported by Android. Note that it is not compatible with VFPv2.
+ *
+ * NOTE: The term 'VFPv3' usually designate either VFPv3-D16 or VFPv3-D32
+ *       depending on context. For example GCC uses it for VFPv3-D32, but
+ *       the Linux kernel code uses it for VFPv3-D16 (especially in
+ *       /proc/cpuinfo). Always try to use the full designation when
+ *       possible.
+ *
+ * NEON, a.k.a. "ARM Advanced SIMD" is an extension that provides
+ * instructions to perform parallel computations on vectors of 8, 16,
+ * 32, 64 and 128 bit quantities. NEON requires VFPv32-D32 since all
+ * NEON registers are also mapped to the same register banks.
+ *
+ * VFPv4-D16, adds a few instructions on top of VFPv3-D16 in order to
+ * perform fused multiply-accumulate on VFP registers, as well as
+ * half-precision (16-bit) conversion operations.
+ *
+ * VFPv4-D32 is VFPv4-D16 with 32, instead of 16, FPU double precision
+ * registers.
+ *
+ * VPFv4-NEON is VFPv4-D32 with NEON instructions. It also adds fused
+ * multiply-accumulate instructions that work on the NEON registers.
+ *
+ * NOTE: Similarly, "VFPv4" might either reference VFPv4-D16 or VFPv4-D32
+ *       depending on context.
+ *
+ * The following information was determined by scanning the binutils-2.22
+ * sources:
+ *
+ * Basic VFP instruction subsets:
+ *
+ * #define FPU_VFP_EXT_V1xD 0x08000000     // Base VFP instruction set.
+ * #define FPU_VFP_EXT_V1   0x04000000     // Double-precision insns.
+ * #define FPU_VFP_EXT_V2   0x02000000     // ARM10E VFPr1.
+ * #define FPU_VFP_EXT_V3xD 0x01000000     // VFPv3 single-precision.
+ * #define FPU_VFP_EXT_V3   0x00800000     // VFPv3 double-precision.
+ * #define FPU_NEON_EXT_V1  0x00400000     // Neon (SIMD) insns.
+ * #define FPU_VFP_EXT_D32  0x00200000     // Registers D16-D31.
+ * #define FPU_VFP_EXT_FP16 0x00100000     // Half-precision extensions.
+ * #define FPU_NEON_EXT_FMA 0x00080000     // Neon fused multiply-add
+ * #define FPU_VFP_EXT_FMA  0x00040000     // VFP fused multiply-add
+ *
+ * FPU types (excluding NEON)
+ *
+ * FPU_VFP_V1xD (EXT_V1xD)
+ *    |
+ *    +--------------------------+
+ *    |                          |
+ * FPU_VFP_V1 (+EXT_V1)       FPU_VFP_V3xD (+EXT_V2+EXT_V3xD)
+ *    |                          |
+ *    |                          |
+ * FPU_VFP_V2 (+EXT_V2)       FPU_VFP_V4_SP_D16 (+EXT_FP16+EXT_FMA)
+ *    |
+ * FPU_VFP_V3D16 (+EXT_Vx3D+EXT_V3)
+ *    |
+ *    +--------------------------+
+ *    |                          |
+ * FPU_VFP_V3 (+EXT_D32)     FPU_VFP_V4D16 (+EXT_FP16+EXT_FMA)
+ *    |                          |
+ *    |                      FPU_VFP_V4 (+EXT_D32)
+ *    |
+ * FPU_VFP_HARD (+EXT_FMA+NEON_EXT_FMA)
+ *
+ * VFP architectures:
+ *
+ * ARCH_VFP_V1xD  (EXT_V1xD)
+ *   |
+ *   +------------------+
+ *   |                  |
+ *   |             ARCH_VFP_V3xD (+EXT_V2+EXT_V3xD)
+ *   |                  |
+ *   |             ARCH_VFP_V3xD_FP16 (+EXT_FP16)
+ *   |                  |
+ *   |             ARCH_VFP_V4_SP_D16 (+EXT_FMA)
+ *   |
+ * ARCH_VFP_V1 (+EXT_V1)
+ *   |
+ * ARCH_VFP_V2 (+EXT_V2)
+ *   |
+ * ARCH_VFP_V3D16 (+EXT_V3xD+EXT_V3)
+ *   |
+ *   +-------------------+
+ *   |                   |
+ *   |         ARCH_VFP_V3D16_FP16  (+EXT_FP16)
+ *   |
+ *   +-------------------+
+ *   |                   |
+ *   |         ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA)
+ *   |                   |
+ *   |         ARCH_VFP_V4 (+EXT_D32)
+ *   |                   |
+ *   |         ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA)
+ *   |
+ * ARCH_VFP_V3 (+EXT_D32)
+ *   |
+ *   +-------------------+
+ *   |                   |
+ *   |         ARCH_VFP_V3_FP16 (+EXT_FP16)
+ *   |
+ * ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON)
+ *   |
+ * ARCH_NEON_FP16 (+EXT_FP16)
+ *
+ * -fpu=<name> values and their correspondance with FPU architectures above:
+ *
+ *   {"vfp",               FPU_ARCH_VFP_V2},
+ *   {"vfp9",              FPU_ARCH_VFP_V2},
+ *   {"vfp3",              FPU_ARCH_VFP_V3}, // For backwards compatbility.
+ *   {"vfp10",             FPU_ARCH_VFP_V2},
+ *   {"vfp10-r0",          FPU_ARCH_VFP_V1},
+ *   {"vfpxd",             FPU_ARCH_VFP_V1xD},
+ *   {"vfpv2",             FPU_ARCH_VFP_V2},
+ *   {"vfpv3",             FPU_ARCH_VFP_V3},
+ *   {"vfpv3-fp16",        FPU_ARCH_VFP_V3_FP16},
+ *   {"vfpv3-d16",         FPU_ARCH_VFP_V3D16},
+ *   {"vfpv3-d16-fp16",    FPU_ARCH_VFP_V3D16_FP16},
+ *   {"vfpv3xd",           FPU_ARCH_VFP_V3xD},
+ *   {"vfpv3xd-fp16",      FPU_ARCH_VFP_V3xD_FP16},
+ *   {"neon",              FPU_ARCH_VFP_V3_PLUS_NEON_V1},
+ *   {"neon-fp16",         FPU_ARCH_NEON_FP16},
+ *   {"vfpv4",             FPU_ARCH_VFP_V4},
+ *   {"vfpv4-d16",         FPU_ARCH_VFP_V4D16},
+ *   {"fpv4-sp-d16",       FPU_ARCH_VFP_V4_SP_D16},
+ *   {"neon-vfpv4",        FPU_ARCH_NEON_VFP_V4},
+ *
+ *
+ * Simplified diagram that only includes FPUs supported by Android:
+ * Only ARCH_VFP_V3D16 is actually mandated by the armeabi-v7a ABI,
+ * all others are optional and must be probed at runtime.
+ *
+ * ARCH_VFP_V3D16 (EXT_V1xD+EXT_V1+EXT_V2+EXT_V3xD+EXT_V3)
+ *   |
+ *   +-------------------+
+ *   |                   |
+ *   |         ARCH_VFP_V3D16_FP16  (+EXT_FP16)
+ *   |
+ *   +-------------------+
+ *   |                   |
+ *   |         ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA)
+ *   |                   |
+ *   |         ARCH_VFP_V4 (+EXT_D32)
+ *   |                   |
+ *   |         ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA)
+ *   |
+ * ARCH_VFP_V3 (+EXT_D32)
+ *   |
+ *   +-------------------+
+ *   |                   |
+ *   |         ARCH_VFP_V3_FP16 (+EXT_FP16)
+ *   |
+ * ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON)
+ *   |
+ * ARCH_NEON_FP16 (+EXT_FP16)
+ *
+ */
diff --git a/winpr/libwinpr/sysinfo/cpufeatures/cpu-features.h b/winpr/libwinpr/sysinfo/cpufeatures/cpu-features.h
new file mode 100644
index 000000000..e95ed9dc6
--- /dev/null
+++ b/winpr/libwinpr/sysinfo/cpufeatures/cpu-features.h
@@ -0,0 +1,328 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef CPU_FEATURES_H
+#define CPU_FEATURES_H
+
+#include <sys/cdefs.h>
+#include <stdint.h>
+
+__BEGIN_DECLS
+
+/* A list of valid values returned by android_getCpuFamily().
+ * They describe the CPU Architecture of the current process.
+ */
+typedef enum
+{
+	ANDROID_CPU_FAMILY_UNKNOWN = 0,
+	ANDROID_CPU_FAMILY_ARM,
+	ANDROID_CPU_FAMILY_X86,
+	ANDROID_CPU_FAMILY_MIPS,
+	ANDROID_CPU_FAMILY_ARM64,
+	ANDROID_CPU_FAMILY_X86_64,
+	ANDROID_CPU_FAMILY_MIPS64,
+
+	ANDROID_CPU_FAMILY_MAX  /* do not remove */
+
+} AndroidCpuFamily;
+
+/* Return the CPU family of the current process.
+ *
+ * Note that this matches the bitness of the current process. I.e. when
+ * running a 32-bit binary on a 64-bit capable CPU, this will return the
+ * 32-bit CPU family value.
+ */
+extern AndroidCpuFamily android_getCpuFamily(void);
+
+/* Return a bitmap describing a set of optional CPU features that are
+ * supported by the current device's CPU. The exact bit-flags returned
+ * depend on the value returned by android_getCpuFamily(). See the
+ * documentation for the ANDROID_CPU_*_FEATURE_* flags below for details.
+ */
+extern uint64_t android_getCpuFeatures(void);
+
+/* The list of feature flags for ANDROID_CPU_FAMILY_ARM that can be
+ * recognized by the library (see note below for 64-bit ARM). Value details
+ * are:
+ *
+ *   VFPv2:
+ *     CPU supports the VFPv2 instruction set. Many, but not all, ARMv6 CPUs
+ *     support these instructions. VFPv2 is a subset of VFPv3 so this will
+ *     be set whenever VFPv3 is set too.
+ *
+ *   ARMv7:
+ *     CPU supports the ARMv7-A basic instruction set.
+ *     This feature is mandated by the 'armeabi-v7a' ABI.
+ *
+ *   VFPv3:
+ *     CPU supports the VFPv3-D16 instruction set, providing hardware FPU
+ *     support for single and double precision floating point registers.
+ *     Note that only 16 FPU registers are available by default, unless
+ *     the D32 bit is set too. This feature is also mandated by the
+ *     'armeabi-v7a' ABI.
+ *
+ *   VFP_D32:
+ *     CPU VFP optional extension that provides 32 FPU registers,
+ *     instead of 16. Note that ARM mandates this feature is the 'NEON'
+ *     feature is implemented by the CPU.
+ *
+ *   NEON:
+ *     CPU FPU supports "ARM Advanced SIMD" instructions, also known as
+ *     NEON. Note that this mandates the VFP_D32 feature as well, per the
+ *     ARM Architecture specification.
+ *
+ *   VFP_FP16:
+ *     Half-width floating precision VFP extension. If set, the CPU
+ *     supports instructions to perform floating-point operations on
+ *     16-bit registers. This is part of the VFPv4 specification, but
+ *     not mandated by any Android ABI.
+ *
+ *   VFP_FMA:
+ *     Fused multiply-accumulate VFP instructions extension. Also part of
+ *     the VFPv4 specification, but not mandated by any Android ABI.
+ *
+ *   NEON_FMA:
+ *     Fused multiply-accumulate NEON instructions extension. Optional
+ *     extension from the VFPv4 specification, but not mandated by any
+ *     Android ABI.
+ *
+ *   IDIV_ARM:
+ *     Integer division available in ARM mode. Only available
+ *     on recent CPUs (e.g. Cortex-A15).
+ *
+ *   IDIV_THUMB2:
+ *     Integer division available in Thumb-2 mode. Only available
+ *     on recent CPUs (e.g. Cortex-A15).
+ *
+ *   iWMMXt:
+ *     Optional extension that adds MMX registers and operations to an
+ *     ARM CPU. This is only available on a few XScale-based CPU designs
+ *     sold by Marvell. Pretty rare in practice.
+ *
+ *   AES:
+ *     CPU supports AES instructions. These instructions are only
+ *     available for 32-bit applications running on ARMv8 CPU.
+ *
+ *   CRC32:
+ *     CPU supports CRC32 instructions. These instructions are only
+ *     available for 32-bit applications running on ARMv8 CPU.
+ *
+ *   SHA2:
+ *     CPU supports SHA2 instructions. These instructions are only
+ *     available for 32-bit applications running on ARMv8 CPU.
+ *
+ *   SHA1:
+ *     CPU supports SHA1 instructions. These instructions are only
+ *     available for 32-bit applications running on ARMv8 CPU.
+ *
+ *   PMULL:
+ *     CPU supports 64-bit PMULL and PMULL2 instructions. These
+ *     instructions are only available for 32-bit applications
+ *     running on ARMv8 CPU.
+ *
+ * If you want to tell the compiler to generate code that targets one of
+ * the feature set above, you should probably use one of the following
+ * flags (for more details, see technical note at the end of this file):
+ *
+ *   -mfpu=vfp
+ *   -mfpu=vfpv2
+ *     These are equivalent and tell GCC to use VFPv2 instructions for
+ *     floating-point operations. Use this if you want your code to
+ *     run on *some* ARMv6 devices, and any ARMv7-A device supported
+ *     by Android.
+ *
+ *     Generated code requires VFPv2 feature.
+ *
+ *   -mfpu=vfpv3-d16
+ *     Tell GCC to use VFPv3 instructions (using only 16 FPU registers).
+ *     This should be generic code that runs on any CPU that supports the
+ *     'armeabi-v7a' Android ABI. Note that no ARMv6 CPU supports this.
+ *
+ *     Generated code requires VFPv3 feature.
+ *
+ *   -mfpu=vfpv3
+ *     Tell GCC to use VFPv3 instructions with 32 FPU registers.
+ *     Generated code requires VFPv3|VFP_D32 features.
+ *
+ *   -mfpu=neon
+ *     Tell GCC to use VFPv3 instructions with 32 FPU registers, and
+ *     also support NEON intrinsics (see <arm_neon.h>).
+ *     Generated code requires VFPv3|VFP_D32|NEON features.
+ *
+ *   -mfpu=vfpv4-d16
+ *     Generated code requires VFPv3|VFP_FP16|VFP_FMA features.
+ *
+ *   -mfpu=vfpv4
+ *     Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32 features.
+ *
+ *   -mfpu=neon-vfpv4
+ *     Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32|NEON|NEON_FMA
+ *     features.
+ *
+ *   -mcpu=cortex-a7
+ *   -mcpu=cortex-a15
+ *     Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32|
+ *                             NEON|NEON_FMA|IDIV_ARM|IDIV_THUMB2
+ *     This flag implies -mfpu=neon-vfpv4.
+ *
+ *   -mcpu=iwmmxt
+ *     Allows the use of iWMMXt instrinsics with GCC.
+ *
+ * IMPORTANT NOTE: These flags should only be tested when
+ * android_getCpuFamily() returns ANDROID_CPU_FAMILY_ARM, i.e. this is a
+ * 32-bit process.
+ *
+ * When running a 64-bit ARM process on an ARMv8 CPU,
+ * android_getCpuFeatures() will return a different set of bitflags
+ */
+enum
+{
+	ANDROID_CPU_ARM_FEATURE_ARMv7       = (1 << 0),
+	ANDROID_CPU_ARM_FEATURE_VFPv3       = (1 << 1),
+	ANDROID_CPU_ARM_FEATURE_NEON        = (1 << 2),
+	ANDROID_CPU_ARM_FEATURE_LDREX_STREX = (1 << 3),
+	ANDROID_CPU_ARM_FEATURE_VFPv2       = (1 << 4),
+	ANDROID_CPU_ARM_FEATURE_VFP_D32     = (1 << 5),
+	ANDROID_CPU_ARM_FEATURE_VFP_FP16    = (1 << 6),
+	ANDROID_CPU_ARM_FEATURE_VFP_FMA     = (1 << 7),
+	ANDROID_CPU_ARM_FEATURE_NEON_FMA    = (1 << 8),
+	ANDROID_CPU_ARM_FEATURE_IDIV_ARM    = (1 << 9),
+	ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 = (1 << 10),
+	ANDROID_CPU_ARM_FEATURE_iWMMXt      = (1 << 11),
+	ANDROID_CPU_ARM_FEATURE_AES         = (1 << 12),
+	ANDROID_CPU_ARM_FEATURE_PMULL       = (1 << 13),
+	ANDROID_CPU_ARM_FEATURE_SHA1        = (1 << 14),
+	ANDROID_CPU_ARM_FEATURE_SHA2        = (1 << 15),
+	ANDROID_CPU_ARM_FEATURE_CRC32       = (1 << 16),
+};
+
+/* The bit flags corresponding to the output of android_getCpuFeatures()
+ * when android_getCpuFamily() returns ANDROID_CPU_FAMILY_ARM64. Value details
+ * are:
+ *
+ *   FP:
+ *     CPU has Floating-point unit.
+ *
+ *   ASIMD:
+ *     CPU has Advanced SIMD unit.
+ *
+ *   AES:
+ *     CPU supports AES instructions.
+ *
+ *   CRC32:
+ *     CPU supports CRC32 instructions.
+ *
+ *   SHA2:
+ *     CPU supports SHA2 instructions.
+ *
+ *   SHA1:
+ *     CPU supports SHA1 instructions.
+ *
+ *   PMULL:
+ *     CPU supports 64-bit PMULL and PMULL2 instructions.
+ */
+enum
+{
+	ANDROID_CPU_ARM64_FEATURE_FP      = (1 << 0),
+	ANDROID_CPU_ARM64_FEATURE_ASIMD   = (1 << 1),
+	ANDROID_CPU_ARM64_FEATURE_AES     = (1 << 2),
+	ANDROID_CPU_ARM64_FEATURE_PMULL   = (1 << 3),
+	ANDROID_CPU_ARM64_FEATURE_SHA1    = (1 << 4),
+	ANDROID_CPU_ARM64_FEATURE_SHA2    = (1 << 5),
+	ANDROID_CPU_ARM64_FEATURE_CRC32   = (1 << 6),
+};
+
+/* The bit flags corresponding to the output of android_getCpuFeatures()
+ * when android_getCpuFamily() returns ANDROID_CPU_FAMILY_X86 or
+ * ANDROID_CPU_FAMILY_X86_64.
+ */
+enum
+{
+	ANDROID_CPU_X86_FEATURE_SSSE3  = (1 << 0),
+	ANDROID_CPU_X86_FEATURE_POPCNT = (1 << 1),
+	ANDROID_CPU_X86_FEATURE_MOVBE  = (1 << 2),
+	ANDROID_CPU_X86_FEATURE_SSE4_1 = (1 << 3),
+	ANDROID_CPU_X86_FEATURE_SSE4_2 = (1 << 4),
+	ANDROID_CPU_X86_FEATURE_AES_NI = (1 << 5),
+	ANDROID_CPU_X86_FEATURE_AVX = (1 << 6),
+	ANDROID_CPU_X86_FEATURE_RDRAND = (1 << 7),
+	ANDROID_CPU_X86_FEATURE_AVX2 = (1 << 8),
+	ANDROID_CPU_X86_FEATURE_SHA_NI = (1 << 9),
+};
+
+/* The bit flags corresponding to the output of android_getCpuFeatures()
+ * when android_getCpuFamily() returns ANDROID_CPU_FAMILY_MIPS
+ * or ANDROID_CPU_FAMILY_MIPS64.  Values are:
+ *
+ *   R6:
+ *     CPU executes MIPS Release 6 instructions natively, and
+ *     supports obsoleted R1..R5 instructions only via kernel traps.
+ *
+ *   MSA:
+ *     CPU supports Mips SIMD Architecture instructions.
+ */
+enum
+{
+	ANDROID_CPU_MIPS_FEATURE_R6    = (1 << 0),
+	ANDROID_CPU_MIPS_FEATURE_MSA   = (1 << 1),
+};
+
+
+/* Return the number of CPU cores detected on this device. */
+extern int android_getCpuCount(void);
+
+/* The following is used to force the CPU count and features
+ * mask in sandboxed processes. Under 4.1 and higher, these processes
+ * cannot access /proc, which is the only way to get information from
+ * the kernel about the current hardware (at least on ARM).
+ *
+ * It _must_ be called only once, and before any android_getCpuXXX
+ * function, any other case will fail.
+ *
+ * This function return 1 on success, and 0 on failure.
+ */
+extern int android_setCpu(int      cpu_count,
+                          uint64_t cpu_features);
+
+#ifdef __arm__
+/* Retrieve the ARM 32-bit CPUID value from the kernel.
+ * Note that this cannot work on sandboxed processes under 4.1 and
+ * higher, unless you called android_setCpuArm() before.
+ */
+extern uint32_t android_getCpuIdArm(void);
+
+/* An ARM-specific variant of android_setCpu() that also allows you
+ * to set the ARM CPUID field.
+ */
+extern int android_setCpuArm(int      cpu_count,
+                             uint64_t cpu_features,
+                             uint32_t cpu_id);
+#endif
+
+__END_DECLS
+
+#endif /* CPU_FEATURES_H */
diff --git a/winpr/libwinpr/sysinfo/sysinfo.c b/winpr/libwinpr/sysinfo/sysinfo.c
index 4121ff80f..27e9c5780 100644
--- a/winpr/libwinpr/sysinfo/sysinfo.c
+++ b/winpr/libwinpr/sysinfo/sysinfo.c
@@ -25,7 +25,11 @@
 #include <winpr/sysinfo.h>
 #include <winpr/platform.h>
 
-#if defined(__linux__) && defined(__GNUC__)
+#if defined(ANDROID)
+#include "cpufeatures/cpu-features.h"
+#endif
+
+#if defined(__linux__)
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
@@ -72,19 +76,49 @@ defined(__OpenBSD__) || defined(__DragonFly__)
 #include <sys/sysctl.h>
 #endif
 
-static DWORD GetProcessorArchitecture()
+static DWORD GetProcessorArchitecture(void)
 {
 	DWORD cpuArch = PROCESSOR_ARCHITECTURE_UNKNOWN;
-#if defined(_M_AMD64)
-	cpuArch = PROCESSOR_ARCHITECTURE_AMD64;
-#elif defined(_M_IX86)
-	cpuArch = PROCESSOR_ARCHITECTURE_INTEL;
+#if defined(ANDROID)
+	AndroidCpuFamily family = android_getCpuFamily();
+
+	switch (family)
+	{
+	    case ANDROID_CPU_FAMILY_ARM:
+		    return PROCESSOR_ARCHITECTURE_ARM;
+
+	    case ANDROID_CPU_FAMILY_X86:
+		    return PROCESSOR_ARCHITECTURE_INTEL;
+
+	    case ANDROID_CPU_FAMILY_MIPS:
+		    return PROCESSOR_ARCHITECTURE_MIPS;
+
+	    case ANDROID_CPU_FAMILY_ARM64:
+		    return PROCESSOR_ARCHITECTURE_ARM64;
+
+	    case ANDROID_CPU_FAMILY_X86_64:
+		    return PROCESSOR_ARCHITECTURE_AMD64;
+
+	    case ANDROID_CPU_FAMILY_MIPS64:
+		    return PROCESSOR_ARCHITECTURE_MIPS64;
+
+	    default:
+		    return PROCESSOR_ARCHITECTURE_UNKNOWN;
+	}
+
 #elif defined(_M_ARM)
 	cpuArch = PROCESSOR_ARCHITECTURE_ARM;
-#elif defined(_M_IA64)
-	cpuArch = PROCESSOR_ARCHITECTURE_IA64;
+#elif defined(_M_IX86)
+	cpuArch = PROCESSOR_ARCHITECTURE_INTEL;
+#elif defined(_M_MIPS64)
+	/* Needs to be before __mips__ since the compiler defines both */
+	cpuArch = PROCESSOR_ARCHITECTURE_MIPS64;
 #elif defined(_M_MIPS)
 	cpuArch = PROCESSOR_ARCHITECTURE_MIPS;
+#elif defined(_M_ARM64)
+	cpuArch = PROCESSOR_ARCHITECTURE_ARM64;
+#elif defined(_M_AMD64)
+	cpuArch = PROCESSOR_ARCHITECTURE_AMD64;
 #elif defined(_M_PPC)
 	cpuArch = PROCESSOR_ARCHITECTURE_PPC;
 #elif defined(_M_ALPHA)
@@ -93,11 +127,13 @@ static DWORD GetProcessorArchitecture()
 	return cpuArch;
 }
 
-static DWORD GetNumberOfProcessors()
+static DWORD GetNumberOfProcessors(void)
 {
 	DWORD numCPUs = 1;
+#if defined(ANDROID)
+	return android_getCpuCount();
 	/* TODO: iOS */
-#if defined(__linux__) || defined(__sun) || defined(_AIX)
+#elif defined(__linux__) || defined(__sun) || defined(_AIX)
 	numCPUs = (DWORD) sysconf(_SC_NPROCESSORS_ONLN);
 #elif defined(__MACOSX__) || \
 	defined(__FreeBSD__) || defined(__NetBSD__) || \
@@ -130,19 +166,21 @@ static DWORD GetNumberOfProcessors()
 	return numCPUs;
 }
 
-static DWORD GetSystemPageSize()
+static DWORD GetSystemPageSize(void)
 {
 	DWORD dwPageSize = 0;
 	long sc_page_size = -1;
-
 #if defined(_SC_PAGESIZE)
+
 	if (sc_page_size < 0)
 		sc_page_size = sysconf(_SC_PAGESIZE);
-#endif
 
+#endif
 #if defined(_SC_PAGE_SIZE)
+
 	if (sc_page_size < 0)
 		sc_page_size = sysconf(_SC_PAGE_SIZE);
+
 #endif
 
 	if (sc_page_size > 0)
@@ -199,6 +237,7 @@ void GetSystemTime(LPSYSTEMTIME lpSystemTime)
 
 BOOL SetSystemTime(CONST SYSTEMTIME* lpSystemTime)
 {
+	/* TODO: Implement */
 	return FALSE;
 }
 
@@ -227,6 +266,7 @@ VOID GetLocalTime(LPSYSTEMTIME lpSystemTime)
 
 BOOL SetLocalTime(CONST SYSTEMTIME* lpSystemTime)
 {
+	/* TODO: Implement */
 	return FALSE;
 }
 
@@ -241,8 +281,10 @@ VOID GetSystemTimeAsFileTime(LPFILETIME lpSystemTimeAsFileTime)
 	lpSystemTimeAsFileTime->dwHighDateTime = time64.HighPart;
 }
 
-BOOL GetSystemTimeAdjustment(PDWORD lpTimeAdjustment, PDWORD lpTimeIncrement, PBOOL lpTimeAdjustmentDisabled)
+BOOL GetSystemTimeAdjustment(PDWORD lpTimeAdjustment, PDWORD lpTimeIncrement,
+                             PBOOL lpTimeAdjustmentDisabled)
 {
+	/* TODO: Implement */
 	return FALSE;
 }
 
@@ -283,9 +325,10 @@ DWORD GetTickCount(void)
 BOOL GetVersionExA(LPOSVERSIONINFOA lpVersionInformation)
 {
 #ifdef _UWP
+
 	/* Windows 10 Version Info */
 	if ((lpVersionInformation->dwOSVersionInfoSize == sizeof(OSVERSIONINFOA)) ||
-		(lpVersionInformation->dwOSVersionInfoSize == sizeof(OSVERSIONINFOEXA)))
+	    (lpVersionInformation->dwOSVersionInfoSize == sizeof(OSVERSIONINFOEXA)))
 	{
 		lpVersionInformation->dwMajorVersion = 10;
 		lpVersionInformation->dwMinorVersion = 0;
@@ -305,10 +348,12 @@ BOOL GetVersionExA(LPOSVERSIONINFOA lpVersionInformation)
 
 		return TRUE;
 	}
+
 #else
+
 	/* Windows 7 SP1 Version Info */
 	if ((lpVersionInformation->dwOSVersionInfoSize == sizeof(OSVERSIONINFOA)) ||
-		(lpVersionInformation->dwOSVersionInfoSize == sizeof(OSVERSIONINFOEXA)))
+	    (lpVersionInformation->dwOSVersionInfoSize == sizeof(OSVERSIONINFOEXA)))
 	{
 		lpVersionInformation->dwMajorVersion = 6;
 		lpVersionInformation->dwMinorVersion = 1;
@@ -328,8 +373,8 @@ BOOL GetVersionExA(LPOSVERSIONINFOA lpVersionInformation)
 
 		return TRUE;
 	}
-#endif
 
+#endif
 	return FALSE;
 }
 
@@ -356,7 +401,7 @@ BOOL GetComputerNameA(LPSTR lpBuffer, LPDWORD lpnSize)
 	dot = strchr(hostname, '.');
 
 	if (dot)
-		length = (int) (dot - hostname);
+		length = (int)(dot - hostname);
 
 	if (*lpnSize <= (DWORD) length)
 	{
@@ -371,7 +416,6 @@ BOOL GetComputerNameA(LPSTR lpBuffer, LPDWORD lpnSize)
 	CopyMemory(lpBuffer, hostname, length);
 	lpBuffer[length] = '\0';
 	*lpnSize = length;
-
 	return TRUE;
 }
 
@@ -390,13 +434,13 @@ BOOL GetComputerNameExA(COMPUTER_NAME_FORMAT NameType, LPSTR lpBuffer, LPDWORD l
 
 	switch (NameType)
 	{
-		case ComputerNameDnsHostname:
-		case ComputerNameDnsDomain:
-		case ComputerNameDnsFullyQualified:
-		case ComputerNamePhysicalDnsHostname:
-		case ComputerNamePhysicalDnsDomain:
-		case ComputerNamePhysicalDnsFullyQualified:
-			if (*lpnSize <= (DWORD) length)
+	    case ComputerNameDnsHostname:
+	    case ComputerNameDnsDomain:
+	    case ComputerNameDnsFullyQualified:
+	    case ComputerNamePhysicalDnsHostname:
+	    case ComputerNamePhysicalDnsDomain:
+	    case ComputerNamePhysicalDnsFullyQualified:
+		    if (*lpnSize <= (DWORD) length)
 			{
 				*lpnSize = length + 1;
 				SetLastError(ERROR_MORE_DATA);
@@ -408,10 +452,10 @@ BOOL GetComputerNameExA(COMPUTER_NAME_FORMAT NameType, LPSTR lpBuffer, LPDWORD l
 
 			CopyMemory(lpBuffer, hostname, length);
 			lpBuffer[length] = '\0';
-			break;
+		    break;
 
-		default:
-			return FALSE;
+	    default:
+		    return FALSE;
 	}
 
 	return TRUE;
@@ -493,31 +537,31 @@ ULONGLONG winpr_GetTickCount64(void)
 #define E_BITS_AVX      (E_BIT_XMM|E_BIT_YMM)
 
 static void cpuid(
-	unsigned info,
-	unsigned* eax,
-	unsigned* ebx,
-	unsigned* ecx,
-	unsigned* edx)
+    unsigned info,
+    unsigned* eax,
+    unsigned* ebx,
+    unsigned* ecx,
+    unsigned* edx)
 {
 #ifdef __GNUC__
 	*eax = *ebx = *ecx = *edx = 0;
 	__asm volatile
 	(
-		/* The EBX (or RBX register on x86_64) is used for the PIC base address
+	    /* The EBX (or RBX register on x86_64) is used for the PIC base address
 		 * and must not be corrupted by our inline assembly.
 		 */
 #ifdef _M_IX86
-		"mov %%ebx, %%esi;"
-		"cpuid;"
-		"xchg %%ebx, %%esi;"
+	    "mov %%ebx, %%esi;"
+	    "cpuid;"
+	    "xchg %%ebx, %%esi;"
 #else
-		"mov %%rbx, %%rsi;"
-		"cpuid;"
-		"xchg %%rbx, %%rsi;"
+	    "mov %%rbx, %%rsi;"
+	    "cpuid;"
+	    "xchg %%rbx, %%rsi;"
 #endif
-	: "=a"(*eax), "=S"(*ebx), "=c"(*ecx), "=d"(*edx)
-			: "0"(info)
-		);
+	    : "=a"(*eax), "=S"(*ebx), "=c"(*ecx), "=d"(*edx)
+	    : "0"(info)
+	);
 #elif defined(_MSC_VER)
 	int a[4];
 	__cpuid(a, info);
@@ -595,86 +639,98 @@ static unsigned GetARMCPUCaps(void)
 BOOL IsProcessorFeaturePresent(DWORD ProcessorFeature)
 {
 	BOOL ret = FALSE;
-#ifdef _M_ARM
-#ifdef __linux__
-	unsigned caps;
-	caps = GetARMCPUCaps();
+#if defined(ANDROID)
+	const uint64_t features = android_getCpuFeatures();
 
 	switch (ProcessorFeature)
 	{
-		case PF_ARM_NEON_INSTRUCTIONS_AVAILABLE:
-		case PF_ARM_NEON:
-			if (caps & HWCAP_NEON)
+	    case PF_ARM_NEON_INSTRUCTIONS_AVAILABLE:
+	    case PF_ARM_NEON:
+		    return features & ANDROID_CPU_ARM_FEATURE_NEON;
+
+	    default:
+		    return FALSE;
+	}
+
+#elif defined(_M_ARM)
+#ifdef __linux__
+	const unsigned caps = GetARMCPUCaps();
+
+	switch (ProcessorFeature)
+	{
+	    case PF_ARM_NEON_INSTRUCTIONS_AVAILABLE:
+	    case PF_ARM_NEON:
+		    if (caps & HWCAP_NEON)
 				ret = TRUE;
 
-			break;
+		    break;
 
-		case PF_ARM_THUMB:
-			if (caps & HWCAP_THUMB)
+	    case PF_ARM_THUMB:
+		    if (caps & HWCAP_THUMB)
 				ret = TRUE;
 
-		case PF_ARM_VFP_32_REGISTERS_AVAILABLE:
-			if (caps & HWCAP_VFPD32)
+	    case PF_ARM_VFP_32_REGISTERS_AVAILABLE:
+		    if (caps & HWCAP_VFPD32)
 				ret = TRUE;
 
-		case PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE:
-			if ((caps & HWCAP_IDIVA) || (caps & HWCAP_IDIVT))
+	    case PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE:
+		    if ((caps & HWCAP_IDIVA) || (caps & HWCAP_IDIVT))
 				ret = TRUE;
 
-		case PF_ARM_VFP3:
-			if (caps & HWCAP_VFPv3)
+	    case PF_ARM_VFP3:
+		    if (caps & HWCAP_VFPv3)
 				ret = TRUE;
 
-			break;
+		    break;
 
-		case PF_ARM_JAZELLE:
-			if (caps & HWCAP_JAVA)
+	    case PF_ARM_JAZELLE:
+		    if (caps & HWCAP_JAVA)
 				ret = TRUE;
 
-			break;
+		    break;
 
-		case PF_ARM_DSP:
-			if (caps & HWCAP_EDSP)
+	    case PF_ARM_DSP:
+		    if (caps & HWCAP_EDSP)
 				ret = TRUE;
 
-			break;
+		    break;
 
-		case PF_ARM_MPU:
-			if (caps & HWCAP_EDSP)
+	    case PF_ARM_MPU:
+		    if (caps & HWCAP_EDSP)
 				ret = TRUE;
 
-			break;
+		    break;
 
-		case PF_ARM_THUMB2:
-			if ((caps & HWCAP_IDIVT) || (caps & HWCAP_VFPv4))
+	    case PF_ARM_THUMB2:
+		    if ((caps & HWCAP_IDIVT) || (caps & HWCAP_VFPv4))
 				ret = TRUE;
 
-			break;
+		    break;
 
-		case PF_ARM_T2EE:
-			if (caps & HWCAP_THUMBEE)
+	    case PF_ARM_T2EE:
+		    if (caps & HWCAP_THUMBEE)
 				ret = TRUE;
 
-			break;
+		    break;
 
-		case PF_ARM_INTEL_WMMX:
-			if (caps & HWCAP_IWMMXT)
+	    case PF_ARM_INTEL_WMMX:
+		    if (caps & HWCAP_IWMMXT)
 				ret = TRUE;
 
-			break;
+		    break;
 
-		default:
-			break;
+	    default:
+		    break;
 	}
 
 #elif defined(__APPLE__) // __linux__
 
 	switch (ProcessorFeature)
 	{
-		case PF_ARM_NEON_INSTRUCTIONS_AVAILABLE:
-		case PF_ARM_NEON:
-			ret = TRUE;
-			break;
+	    case PF_ARM_NEON_INSTRUCTIONS_AVAILABLE:
+	    case PF_ARM_NEON:
+		    ret = TRUE;
+		    break;
 	}
 
 #endif // __linux__
@@ -685,38 +741,38 @@ BOOL IsProcessorFeaturePresent(DWORD ProcessorFeature)
 
 	switch (ProcessorFeature)
 	{
-		case PF_MMX_INSTRUCTIONS_AVAILABLE:
-			if (d & D_BIT_MMX)
+	    case PF_MMX_INSTRUCTIONS_AVAILABLE:
+		    if (d & D_BIT_MMX)
 				ret = TRUE;
 
-			break;
+		    break;
 
-		case PF_XMMI_INSTRUCTIONS_AVAILABLE:
-			if (d & D_BIT_SSE)
+	    case PF_XMMI_INSTRUCTIONS_AVAILABLE:
+		    if (d & D_BIT_SSE)
 				ret = TRUE;
 
-			break;
+		    break;
 
-		case PF_XMMI64_INSTRUCTIONS_AVAILABLE:
-			if (d & D_BIT_SSE2)
+	    case PF_XMMI64_INSTRUCTIONS_AVAILABLE:
+		    if (d & D_BIT_SSE2)
 				ret = TRUE;
 
-			break;
+		    break;
 
-		case PF_3DNOW_INSTRUCTIONS_AVAILABLE:
-			if (d & D_BIT_3DN)
+	    case PF_3DNOW_INSTRUCTIONS_AVAILABLE:
+		    if (d & D_BIT_3DN)
 				ret = TRUE;
 
-			break;
+		    break;
 
-		case PF_SSE3_INSTRUCTIONS_AVAILABLE:
-			if (c & C_BIT_SSE3)
+	    case PF_SSE3_INSTRUCTIONS_AVAILABLE:
+		    if (c & C_BIT_SSE3)
 				ret = TRUE;
 
-			break;
+		    break;
 
-		default:
-			break;
+	    default:
+		    break;
 	}
 
 #endif // __GNUC__
@@ -731,11 +787,9 @@ DWORD GetTickCountPrecise(void)
 #ifdef _WIN32
 	LARGE_INTEGER freq;
 	LARGE_INTEGER current;
-
 	QueryPerformanceFrequency(&freq);
 	QueryPerformanceCounter(&current);
-
-	return (DWORD) (current.QuadPart * 1000LL / freq.QuadPart);
+	return (DWORD)(current.QuadPart * 1000LL / freq.QuadPart);
 #else
 	return GetTickCount();
 #endif
@@ -751,35 +805,35 @@ BOOL IsProcessorFeaturePresentEx(DWORD ProcessorFeature)
 
 	switch (ProcessorFeature)
 	{
-		case PF_EX_ARM_VFP1:
-			if (caps & HWCAP_VFP)
+	    case PF_EX_ARM_VFP1:
+		    if (caps & HWCAP_VFP)
 				ret = TRUE;
 
-			break;
+		    break;
 
-		case PF_EX_ARM_VFP3D16:
-			if (caps & HWCAP_VFPv3D16)
+	    case PF_EX_ARM_VFP3D16:
+		    if (caps & HWCAP_VFPv3D16)
 				ret = TRUE;
 
-			break;
+		    break;
 
-		case PF_EX_ARM_VFP4:
-			if (caps & HWCAP_VFPv4)
+	    case PF_EX_ARM_VFP4:
+		    if (caps & HWCAP_VFPv4)
 				ret = TRUE;
 
-			break;
+		    break;
 
-		case PF_EX_ARM_IDIVA:
-			if (caps & HWCAP_IDIVA)
+	    case PF_EX_ARM_IDIVA:
+		    if (caps & HWCAP_IDIVA)
 				ret = TRUE;
 
-			break;
+		    break;
 
-		case PF_EX_ARM_IDIVT:
-			if (caps & HWCAP_IDIVT)
+	    case PF_EX_ARM_IDIVT:
+		    if (caps & HWCAP_IDIVT)
 				ret = TRUE;
 
-			break;
+		    break;
 	}
 
 #endif // __linux__
@@ -789,47 +843,48 @@ BOOL IsProcessorFeaturePresentEx(DWORD ProcessorFeature)
 
 	switch (ProcessorFeature)
 	{
-		case PF_EX_LZCNT:
-			{
-				unsigned a81, b81, c81, d81;
+	    case PF_EX_LZCNT:
+	        {
+		        unsigned a81, b81, c81, d81;
 				cpuid(0x80000001, &a81, &b81, &c81, &d81);
+
 				if (c81 & C81_BIT_LZCNT)
 					ret = TRUE;
-			}
-			break;
+	        }
+		    break;
 
-		case PF_EX_3DNOW_PREFETCH:
-			if (c & C_BIT_3DNP)
+	    case PF_EX_3DNOW_PREFETCH:
+		    if (c & C_BIT_3DNP)
 				ret = TRUE;
 
-			break;
+		    break;
 
-		case PF_EX_SSSE3:
-			if (c & C_BIT_SSSE3)
+	    case PF_EX_SSSE3:
+		    if (c & C_BIT_SSSE3)
 				ret = TRUE;
 
-			break;
+		    break;
 
-		case PF_EX_SSE41:
-			if (c & C_BIT_SSE41)
+	    case PF_EX_SSE41:
+		    if (c & C_BIT_SSE41)
 				ret = TRUE;
 
-			break;
+		    break;
 
-		case PF_EX_SSE42:
-			if (c & C_BIT_SSE42)
+	    case PF_EX_SSE42:
+		    if (c & C_BIT_SSE42)
 				ret = TRUE;
 
-			break;
+		    break;
 #if defined(__GNUC__) && defined(__AVX__)
 
-		case PF_EX_AVX:
-		case PF_EX_FMA:
-		case PF_EX_AVX_AES:
-		case PF_EX_AVX_PCLMULQDQ:
-			{
-				/* Check for general AVX support */
-				if ((c & C_BITS_AVX) != C_BITS_AVX)
+	    case PF_EX_AVX:
+	    case PF_EX_FMA:
+	    case PF_EX_AVX_AES:
+	    case PF_EX_AVX_PCLMULQDQ:
+	        {
+		        /* Check for general AVX support */
+		        if ((c & C_BITS_AVX) != C_BITS_AVX)
 					break;
 
 				int e, f;
@@ -840,35 +895,35 @@ BOOL IsProcessorFeaturePresentEx(DWORD ProcessorFeature)
 				{
 					switch (ProcessorFeature)
 					{
-						case PF_EX_AVX:
-							ret = TRUE;
-							break;
+					    case PF_EX_AVX:
+						    ret = TRUE;
+						    break;
 
-						case PF_EX_FMA:
-							if (c & C_BIT_FMA)
+					    case PF_EX_FMA:
+						    if (c & C_BIT_FMA)
 								ret = TRUE;
 
-							break;
+						    break;
 
-						case PF_EX_AVX_AES:
-							if (c & C_BIT_AES)
+					    case PF_EX_AVX_AES:
+						    if (c & C_BIT_AES)
 								ret = TRUE;
 
-							break;
+						    break;
 
-						case PF_EX_AVX_PCLMULQDQ:
-							if (c & C_BIT_PCLMULQDQ)
+					    case PF_EX_AVX_PCLMULQDQ:
+						    if (c & C_BIT_PCLMULQDQ)
 								ret = TRUE;
 
-							break;
+						    break;
 					}
 				}
-			}
-			break;
+	        }
+		    break;
 #endif //__AVX__
 
-		default:
-			break;
+	    default:
+		    break;
 	}
 
 #endif