From d2e1248b099f44675b0a1c827ed83562c5223557 Mon Sep 17 00:00:00 2001 From: Armin Novak Date: Thu, 15 Nov 2018 17:52:43 +0100 Subject: [PATCH] Fixed profiler API * Duplicate the name internally to avoid issues with stack * Made API opaque and removed direct dereferencing of struct. --- include/freerdp/utils/profiler.h | 11 +-- libfreerdp/codec/nsc_sse2.c | 2 +- libfreerdp/codec/rfx_neon.c | 129 +++++++++++++------------------ libfreerdp/codec/rfx_sse2.c | 8 +- libfreerdp/utils/profiler.c | 26 ++++--- 5 files changed, 80 insertions(+), 96 deletions(-) diff --git a/include/freerdp/utils/profiler.h b/include/freerdp/utils/profiler.h index e80ca1ce5..a0ff76619 100644 --- a/include/freerdp/utils/profiler.h +++ b/include/freerdp/utils/profiler.h @@ -23,18 +23,13 @@ #include #include -struct _PROFILER -{ - char* name; - STOPWATCH* stopwatch; -}; typedef struct _PROFILER PROFILER; #ifdef __cplusplus extern "C" { #endif -FREERDP_API PROFILER* profiler_create(char* name); +FREERDP_API PROFILER* profiler_create(const char* name); FREERDP_API void profiler_free(PROFILER* profiler); FREERDP_API void profiler_enter(PROFILER* profiler); @@ -49,7 +44,7 @@ FREERDP_API void profiler_print_footer(void); #endif #ifdef WITH_PROFILER -#define IF_PROFILER(then) then +#define PROFILER_RENAME(prof, name) do { profiler_free(prof); prof = profiler_create(name); } while(0) #define PROFILER_DEFINE(prof) PROFILER* prof; #define PROFILER_CREATE(prof,name) prof = profiler_create(name); #define PROFILER_FREE(prof) profiler_free(prof); @@ -59,7 +54,7 @@ FREERDP_API void profiler_print_footer(void); #define PROFILER_PRINT(prof) profiler_print(prof); #define PROFILER_PRINT_FOOTER profiler_print_footer(); #else -#define IF_PROFILER(then) do { } while (0) +#define PROFILER_RENAME(prof, name) do { } while (0) #define PROFILER_DEFINE(prof) #define PROFILER_CREATE(prof,name) do { } while (0); diff --git a/libfreerdp/codec/nsc_sse2.c b/libfreerdp/codec/nsc_sse2.c index 6bd7147b3..149e80a83 100644 --- a/libfreerdp/codec/nsc_sse2.c +++ b/libfreerdp/codec/nsc_sse2.c @@ -401,6 +401,6 @@ void nsc_init_sse2(NSC_CONTEXT* context) if (!IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) return; - IF_PROFILER(context->priv->prof_nsc_encode->name = "nsc_encode_sse2"); + PROFILER_RENAME(context->priv->prof_nsc_encode, "nsc_encode_sse2"); context->encode = nsc_encode_sse2; } diff --git a/libfreerdp/codec/rfx_neon.c b/libfreerdp/codec/rfx_neon.c index 1632f983a..f96f57d70 100644 --- a/libfreerdp/codec/rfx_neon.c +++ b/libfreerdp/codec/rfx_neon.c @@ -35,7 +35,7 @@ /* rfx_decode_YCbCr_to_RGB_NEON code now resides in the primitives library. */ static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -rfx_quantization_decode_block_NEON(INT16 * buffer, const int buffer_size, const UINT32 factor) +rfx_quantization_decode_block_NEON(INT16* buffer, const int buffer_size, const UINT32 factor) { int16x8_t quantFactors = vdupq_n_s16(factor); int16x8_t* buf = (int16x8_t*)buffer; @@ -48,10 +48,10 @@ rfx_quantization_decode_block_NEON(INT16 * buffer, const int buffer_size, const vst1q_s16((INT16*)buf, val); buf++; } - while(buf < buf_end); + while (buf < buf_end); } -void rfx_quantization_decode_NEON(INT16 * buffer, const UINT32 * quantVals) +void rfx_quantization_decode_NEON(INT16* buffer, const UINT32* quantVals) { rfx_quantization_decode_block_NEON(&buffer[0], 1024, quantVals[8] - 1); /* HL1 */ rfx_quantization_decode_block_NEON(&buffer[1024], 1024, quantVals[7] - 1); /* LH1 */ @@ -68,21 +68,20 @@ void rfx_quantization_decode_NEON(INT16 * buffer, const UINT32 * quantVals) static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -rfx_dwt_2d_decode_block_horiz_NEON(INT16 * l, INT16 * h, INT16 * dst, int subband_width) +rfx_dwt_2d_decode_block_horiz_NEON(INT16* l, INT16* h, INT16* dst, int subband_width) { int y, n; - INT16 * l_ptr = l; - INT16 * h_ptr = h; - INT16 * dst_ptr = dst; + INT16* l_ptr = l; + INT16* h_ptr = h; + INT16* dst_ptr = dst; for (y = 0; y < subband_width; y++) { /* Even coefficients */ - for (n = 0; n < subband_width; n+=8) + for (n = 0; n < subband_width; n += 8) { // dst[2n] = l[n] - ((h[n-1] + h[n] + 1) >> 1); int16x8_t l_n = vld1q_s16(l_ptr); - int16x8_t h_n = vld1q_s16(h_ptr); int16x8_t h_n_m = vld1q_s16(h_ptr - 1); @@ -95,29 +94,25 @@ rfx_dwt_2d_decode_block_horiz_NEON(INT16 * l, INT16 * h, INT16 * dst, int subban int16x8_t tmp_n = vaddq_s16(h_n, h_n_m); tmp_n = vaddq_s16(tmp_n, vdupq_n_s16(1)); tmp_n = vshrq_n_s16(tmp_n, 1); - int16x8_t dst_n = vsubq_s16(l_n, tmp_n); - vst1q_s16(l_ptr, dst_n); - - l_ptr+=8; - h_ptr+=8; + l_ptr += 8; + h_ptr += 8; } + l_ptr -= subband_width; h_ptr -= subband_width; /* Odd coefficients */ - for (n = 0; n < subband_width; n+=8) + for (n = 0; n < subband_width; n += 8) { // dst[2n + 1] = (h[n] << 1) + ((dst[2n] + dst[2n + 2]) >> 1); - int16x8_t h_n = vld1q_s16(h_ptr); - h_n = vshlq_n_s16(h_n, 1); - int16x8x2_t dst_n; dst_n.val[0] = vld1q_s16(l_ptr); int16x8_t dst_n_p = vld1q_s16(l_ptr + 1); + if (n == subband_width - 8) { int16_t last = vgetq_lane_s16(dst_n_p, 6); @@ -126,39 +121,34 @@ rfx_dwt_2d_decode_block_horiz_NEON(INT16 * l, INT16 * h, INT16 * dst, int subban dst_n.val[1] = vaddq_s16(dst_n_p, dst_n.val[0]); dst_n.val[1] = vshrq_n_s16(dst_n.val[1], 1); - dst_n.val[1] = vaddq_s16(dst_n.val[1], h_n); - vst2q_s16(dst_ptr, dst_n); - - l_ptr+=8; - h_ptr+=8; - dst_ptr+=16; + l_ptr += 8; + h_ptr += 8; + dst_ptr += 16; } } } static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -rfx_dwt_2d_decode_block_vert_NEON(INT16 * l, INT16 * h, INT16 * dst, int subband_width) +rfx_dwt_2d_decode_block_vert_NEON(INT16* l, INT16* h, INT16* dst, int subband_width) { int x, n; - INT16 * l_ptr = l; - INT16 * h_ptr = h; - INT16 * dst_ptr = dst; - + INT16* l_ptr = l; + INT16* h_ptr = h; + INT16* dst_ptr = dst; int total_width = subband_width + subband_width; /* Even coefficients */ for (n = 0; n < subband_width; n++) { - for (x = 0; x < total_width; x+=8) + for (x = 0; x < total_width; x += 8) { // dst[2n] = l[n] - ((h[n-1] + h[n] + 1) >> 1); - int16x8_t l_n = vld1q_s16(l_ptr); int16x8_t h_n = vld1q_s16(h_ptr); - int16x8_t tmp_n = vaddq_s16(h_n, vdupq_n_s16(1));; + if (n == 0) tmp_n = vaddq_s16(tmp_n, h_n); else @@ -166,16 +156,16 @@ rfx_dwt_2d_decode_block_vert_NEON(INT16 * l, INT16 * h, INT16 * dst, int subband int16x8_t h_n_m = vld1q_s16((h_ptr - total_width)); tmp_n = vaddq_s16(tmp_n, h_n_m); } - tmp_n = vshrq_n_s16(tmp_n, 1); + tmp_n = vshrq_n_s16(tmp_n, 1); int16x8_t dst_n = vsubq_s16(l_n, tmp_n); vst1q_s16(dst_ptr, dst_n); - - l_ptr+=8; - h_ptr+=8; - dst_ptr+=8; + l_ptr += 8; + h_ptr += 8; + dst_ptr += 8; } - dst_ptr+=total_width; + + dst_ptr += total_width; } h_ptr = h; @@ -184,78 +174,69 @@ rfx_dwt_2d_decode_block_vert_NEON(INT16 * l, INT16 * h, INT16 * dst, int subband /* Odd coefficients */ for (n = 0; n < subband_width; n++) { - for (x = 0; x < total_width; x+=8) + for (x = 0; x < total_width; x += 8) { - // dst[2n + 1] = (h[n] << 1) + ((dst[2n] + dst[2n + 2]) >> 1); - int16x8_t h_n = vld1q_s16(h_ptr); - int16x8_t dst_n_m = vld1q_s16(dst_ptr - total_width); + // dst[2n + 1] = (h[n] << 1) + ((dst[2n] + dst[2n + 2]) >> 1); + int16x8_t h_n = vld1q_s16(h_ptr); + int16x8_t dst_n_m = vld1q_s16(dst_ptr - total_width); + h_n = vshlq_n_s16(h_n, 1); + int16x8_t tmp_n = dst_n_m; - h_n = vshlq_n_s16(h_n, 1); + if (n == subband_width - 1) + tmp_n = vaddq_s16(tmp_n, dst_n_m); + else + { + int16x8_t dst_n_p = vld1q_s16((dst_ptr + total_width)); + tmp_n = vaddq_s16(tmp_n, dst_n_p); + } - int16x8_t tmp_n = dst_n_m; - if (n == subband_width - 1) - tmp_n = vaddq_s16(tmp_n, dst_n_m); - else - { - int16x8_t dst_n_p = vld1q_s16((dst_ptr + total_width)); - tmp_n = vaddq_s16(tmp_n, dst_n_p); + tmp_n = vshrq_n_s16(tmp_n, 1); + int16x8_t dst_n = vaddq_s16(tmp_n, h_n); + vst1q_s16(dst_ptr, dst_n); + h_ptr += 8; + dst_ptr += 8; } - tmp_n = vshrq_n_s16(tmp_n, 1); - int16x8_t dst_n = vaddq_s16(tmp_n, h_n); - vst1q_s16(dst_ptr, dst_n); - - h_ptr+=8; - dst_ptr+=8; + dst_ptr += total_width; } - dst_ptr+=total_width; -} } static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -rfx_dwt_2d_decode_block_NEON(INT16 * buffer, INT16 * idwt, int subband_width) +rfx_dwt_2d_decode_block_NEON(INT16* buffer, INT16* idwt, int subband_width) { - INT16 * hl, * lh, * hh, * ll; - INT16 * l_dst, * h_dst; - + INT16* hl, * lh, * hh, * ll; + INT16* l_dst, * h_dst; /* Inverse DWT in horizontal direction, results in 2 sub-bands in L, H order in tmp buffer idwt. */ /* The 4 sub-bands are stored in HL(0), LH(1), HH(2), LL(3) order. */ /* The lower part L uses LL(3) and HL(0). */ /* The higher part H uses LH(1) and HH(2). */ - ll = buffer + subband_width * subband_width * 3; hl = buffer; l_dst = idwt; - rfx_dwt_2d_decode_block_horiz_NEON(ll, hl, l_dst, subband_width); - lh = buffer + subband_width * subband_width; hh = buffer + subband_width * subband_width * 2; h_dst = idwt + subband_width * subband_width * 2; - rfx_dwt_2d_decode_block_horiz_NEON(lh, hh, h_dst, subband_width); - /* Inverse DWT in vertical direction, results are stored in original buffer. */ rfx_dwt_2d_decode_block_vert_NEON(l_dst, h_dst, buffer, subband_width); } -void rfx_dwt_2d_decode_NEON(INT16 * buffer, INT16 * dwt_buffer) +void rfx_dwt_2d_decode_NEON(INT16* buffer, INT16* dwt_buffer) { rfx_dwt_2d_decode_block_NEON(buffer + 3840, dwt_buffer, 8); rfx_dwt_2d_decode_block_NEON(buffer + 3072, dwt_buffer, 16); rfx_dwt_2d_decode_block_NEON(buffer, dwt_buffer, 32); } -void rfx_init_neon(RFX_CONTEXT * context) +void rfx_init_neon(RFX_CONTEXT* context) { if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) { DEBUG_RFX("Using NEON optimizations"); - - IF_PROFILER(context->priv->prof_rfx_ycbcr_to_rgb->name = "rfx_decode_YCbCr_to_RGB_NEON"); - IF_PROFILER(context->priv->prof_rfx_quantization_decode->name = "rfx_quantization_decode_NEON"); - IF_PROFILER(context->priv->prof_rfx_dwt_2d_decode->name = "rfx_dwt_2d_decode_NEON"); - + PROFILER_RENAME(context->priv->prof_rfx_ycbcr_to_rgb, "rfx_decode_YCbCr_to_RGB_NEON"); + PROFILER_RENAME(context->priv->prof_rfx_quantization_decode, "rfx_quantization_decode_NEON"); + PROFILER_RENAME(context->priv->prof_rfx_dwt_2d_decode, "rfx_dwt_2d_decode_NEON"); context->quantization_decode = rfx_quantization_decode_NEON; context->dwt_2d_decode = rfx_dwt_2d_decode_NEON; } diff --git a/libfreerdp/codec/rfx_sse2.c b/libfreerdp/codec/rfx_sse2.c index 94c6b21f5..0a28087cd 100644 --- a/libfreerdp/codec/rfx_sse2.c +++ b/libfreerdp/codec/rfx_sse2.c @@ -455,10 +455,10 @@ void rfx_init_sse2(RFX_CONTEXT* context) if (!IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) return; - IF_PROFILER(context->priv->prof_rfx_quantization_decode->name = "rfx_quantization_decode_sse2"); - IF_PROFILER(context->priv->prof_rfx_quantization_encode->name = "rfx_quantization_encode_sse2"); - IF_PROFILER(context->priv->prof_rfx_dwt_2d_decode->name = "rfx_dwt_2d_decode_sse2"); - IF_PROFILER(context->priv->prof_rfx_dwt_2d_encode->name = "rfx_dwt_2d_encode_sse2"); + PROFILER_RENAME(context->priv->prof_rfx_quantization_decode, "rfx_quantization_decode_sse2"); + PROFILER_RENAME(context->priv->prof_rfx_quantization_encode, "rfx_quantization_encode_sse2"); + PROFILER_RENAME(context->priv->prof_rfx_dwt_2d_decode, "rfx_dwt_2d_decode_sse2"); + PROFILER_RENAME(context->priv->prof_rfx_dwt_2d_encode, "rfx_dwt_2d_encode_sse2"); context->quantization_decode = rfx_quantization_decode_sse2; context->quantization_encode = rfx_quantization_encode_sse2; context->dwt_2d_decode = rfx_dwt_2d_decode_sse2; diff --git a/libfreerdp/utils/profiler.c b/libfreerdp/utils/profiler.c index 61b7a5298..a240e4d00 100644 --- a/libfreerdp/utils/profiler.c +++ b/libfreerdp/utils/profiler.c @@ -29,30 +29,38 @@ #define TAG FREERDP_TAG("utils") -PROFILER* profiler_create(char* name) +struct _PROFILER { - PROFILER* profiler; - profiler = (PROFILER*) malloc(sizeof(PROFILER)); + char* name; + STOPWATCH* stopwatch; +}; + +PROFILER* profiler_create(const char* name) +{ + PROFILER* profiler = (PROFILER*) calloc(1, sizeof(PROFILER)); if (!profiler) return NULL; - profiler->name = name; + profiler->name = _strdup(name); profiler->stopwatch = stopwatch_create(); - if (!profiler->stopwatch) - { - free(profiler); - return NULL; - } + if (!profiler->name || !profiler->stopwatch) + goto fail; return profiler; +fail: + profiler_free(profiler); + return NULL; } void profiler_free(PROFILER* profiler) { if (profiler) + { + free(profiler->name); stopwatch_free(profiler->stopwatch); + } free(profiler); }