From 4566eae245fd8bb3c80e758b161122222ea18b6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Molinari?= Date: Mon, 6 May 2024 09:27:04 +0200 Subject: [PATCH] gl-renderer: Improve wireframe rendering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Render wireframe within paint nodes instead of drawing lines in a second pass. The wireframe is blended over the node in a single draw call. This slightly simplifies the logic by removing the computation of a second set of indices and enables wireframe anti-aliasing using Celes and Abraham's "Fast and versatile texture-based wireframe rendering" paper from 2011. Celes and Abraham use a one-dimensional set of texture coords for each triangle edge, 1.0 for the 2 vertices defining the edge and 0.0 for the other vertex, which basically define barycentric coords. Texture mapping and the mip chain is then exploited to give a constant-width edge. The main drawback of the technique is that contour edges of node's damage mesh are drawn half as thick as interior lines since each triangle draws half of each line's thickness. Signed-off-by: Loïc Molinari --- libweston/renderer-gl/fragment.glsl | 22 +- libweston/renderer-gl/gl-renderer-internal.h | 15 +- libweston/renderer-gl/gl-renderer.c | 243 ++++++++++--------- libweston/renderer-gl/gl-shaders.c | 17 +- libweston/renderer-gl/vertex.glsl | 6 +- shared/helpers.h | 55 +++++ 6 files changed, 239 insertions(+), 119 deletions(-) diff --git a/libweston/renderer-gl/fragment.glsl b/libweston/renderer-gl/fragment.glsl index ba6aa2d9..1eed4b32 100644 --- a/libweston/renderer-gl/fragment.glsl +++ b/libweston/renderer-gl/fragment.glsl @@ -131,8 +131,11 @@ uniform sampler2D tex; varying HIGHPRECISION vec2 v_texcoord; varying HIGHPRECISION vec4 v_color; +varying HIGHPRECISION vec3 v_barycentric; + uniform sampler2D tex1; uniform sampler2D tex2; +uniform sampler2D tex_wireframe; uniform float view_alpha; uniform vec4 unicolor; @@ -428,6 +431,17 @@ color_pipeline(vec4 color) return color; } +vec4 +wireframe() +{ + float edge1 = texture2D(tex_wireframe, vec2(v_barycentric.x, 0.5)).r; + float edge2 = texture2D(tex_wireframe, vec2(v_barycentric.y, 0.5)).r; + float edge3 = texture2D(tex_wireframe, vec2(v_barycentric.z, 0.5)).r; + float edge = clamp(edge1 + edge2 + edge3, 0.0, 1.0); + + return vec4(edge) * v_color; +} + void main() { @@ -436,9 +450,6 @@ main() /* Electrical (non-linear) RGBA values, may be premult or not */ color = sample_input_texture(); - if (c_wireframe) - color *= v_color; - if (c_need_color_pipeline) color = color_pipeline(color); /* Produces straight alpha */ @@ -451,5 +462,10 @@ main() if (c_green_tint) color = vec4(0.0, 0.3, 0.0, 0.2) + color * 0.8; + if (c_wireframe) { + vec4 src = wireframe(); + color = color * vec4(1.0 - src.a) + src; + } + gl_FragColor = color; } diff --git a/libweston/renderer-gl/gl-renderer-internal.h b/libweston/renderer-gl/gl-renderer-internal.h index 10f3a3fa..72037212 100644 --- a/libweston/renderer-gl/gl-renderer-internal.h +++ b/libweston/renderer-gl/gl-renderer-internal.h @@ -76,6 +76,7 @@ enum gl_shader_attrib_loc { SHADER_ATTRIB_LOC_POSITION = 0, SHADER_ATTRIB_LOC_TEXCOORD, SHADER_ATTRIB_LOC_COLOR, + SHADER_ATTRIB_LOC_BARYCENTRIC, }; /** GL shader requirements key @@ -115,6 +116,8 @@ struct gl_shader; struct weston_color_transform; #define GL_SHADER_INPUT_TEX_MAX 3 +#define GL_SHADER_WIREFRAME_TEX_UNIT GL_SHADER_INPUT_TEX_MAX + struct gl_shader_config { struct gl_shader_requirements req; @@ -124,6 +127,7 @@ struct gl_shader_config { GLfloat unicolor[4]; GLint input_tex_filter; /* GL_NEAREST or GL_LINEAR */ GLuint input_tex[GL_SHADER_INPUT_TEX_MAX]; + GLuint wireframe_tex; union { struct { @@ -161,10 +165,14 @@ struct gl_renderer { struct weston_compositor *compositor; struct weston_log_scope *renderer_scope; - bool fragment_shader_debug; - bool wireframe_debug; struct weston_binding *fragment_binding; + bool fragment_shader_debug; + struct weston_binding *wireframe_binding; + bool wireframe_debug; + bool wireframe_dirty; + GLuint wireframe_tex; + int wireframe_size; EGLenum platform; EGLDisplay egl_display; @@ -176,7 +184,8 @@ struct gl_renderer { /* Vertex streams. */ struct wl_array position_stream; struct wl_array color_stream; - struct wl_array indices[2]; + struct wl_array barycentric_stream; + struct wl_array indices; EGLDeviceEXT egl_device; const char *drm_device; diff --git a/libweston/renderer-gl/gl-renderer.c b/libweston/renderer-gl/gl-renderer.c index 1f50333d..9a9580a8 100644 --- a/libweston/renderer-gl/gl-renderer.c +++ b/libweston/renderer-gl/gl-renderer.c @@ -1300,66 +1300,40 @@ transform_damage(const struct weston_paint_node *pnode, free(rects); } -/* Colorise a wireframe sub-mesh. 8 colors (32 bytes) are stored unconditionally - * into 'color_stream'. +/* Colorise and set barycentric coordinates of a sub-mesh of 'count' vertices. 8 + * colors (32 bytes) and 8 barycentric coordinates (32 bytes too) are stored + * unconditionally into 'color_stream' and 'barycentric_stream'. */ static void -store_wireframes(uint32_t *color_stream) +store_wireframes(size_t count, + uint32_t *restrict color_stream, + uint32_t *restrict barycentric_stream) { static const uint32_t colors[] = { 0xff0000ff, 0xff00ff00, 0xffff0000, 0xfffffff }; + const uint32_t x = 0xff0000, y = 0x00ff00, z = 0x0000ff; + static const uint32_t barycentrics[][8] = { + {}, {}, {}, + { x, z, y, 0, 0, 0, 0, 0 }, + { x, z, x, y, 0, 0, 0, 0 }, + { x, z, y, x, y, 0, 0, 0 }, + { x, z, y, z, x, y, 0, 0 }, + { x, z, y, x, z, x, y, 0 }, + { x, z, y, x, y, z, x, y }, + }; static size_t idx = 0; int i; - for (i = 0; i < 8; i++) + assert(count < ARRAY_LENGTH(barycentrics)); + + for (i = 0; i < 8; i++) { + barycentric_stream[i] = barycentrics[count][i]; color_stream[i] = colors[idx % ARRAY_LENGTH(colors)]; + } idx++; } -/* Triangulate a wireframe sub-mesh of 'count' vertices as indexed lines. 'bias' - * is added to each index. 'count' must be less than or equal to 8. 32 indices - * (64 bytes) are stored unconditionally into 'indices'. The return value is the - * index count. - */ -static int -store_lines(size_t count, - uint16_t bias, - uint16_t *indices) - { - /* Look-up table of triangle lines with last entry storing the index - * count. Padded to 32 elements for compilers to emit packed adds. */ - static const uint16_t lines[][32] = { - {}, {}, {}, { - 2, 0, 0, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6 - },{ - 3, 0, 0, 1, 1, 2, 2, 3, 3, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10 - },{ - 4, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 1, 1, 3, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14 - },{ - 5, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 1, 1, 4, - 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18 - },{ - 6, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 1, - 1, 5, 5, 2, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22 - },{ - 7, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, - 7, 1, 1, 6, 6, 2, 2, 5, 5, 3, 0, 0, 0, 0, 0, 26 - }, - }; - int i; - - assert(count < ARRAY_LENGTH(lines)); - - for (i = 0; i < 32; i++) - indices[i] = lines[count][i] + bias; - - return lines[count][31]; -} - /* Triangulate a sub-mesh of 'count' vertices as an indexed triangle strip. * 'bias' is added to each index. In order to chain sub-meshes, the last index * is followed by 2 indices creating 4 degenerate triangles. 'count' must be @@ -1368,9 +1342,9 @@ store_lines(size_t count, * indices. */ static int -store_strips(size_t count, - uint16_t bias, - uint16_t *indices) +store_indices(size_t count, + uint16_t bias, + uint16_t *indices) { /* Look-up table of triangle strips with last entry storing the index * count. Padded to 16 elements for compilers to emit packed adds. */ @@ -1396,54 +1370,42 @@ store_strips(size_t count, static void draw_mesh(struct gl_renderer *gr, struct weston_paint_node *pnode, - const struct gl_shader_config *sconf, + struct gl_shader_config *sconf, const struct clipper_vertex *positions, const uint32_t *colors, + const uint32_t *barycentrics, const uint16_t *strip, int nstrip, - const uint16_t *lines, - int nlines) + bool wireframe) { - struct gl_shader_config alt; - struct weston_color_transform *ctransf; - assert(nstrip > 0); + if (wireframe) { + /* Wireframe rendering is based on Celes & Abraham's "Fast and + * versatile texture-based wireframe rendering", 2011. */ + glEnableVertexAttribArray(SHADER_ATTRIB_LOC_COLOR); + glEnableVertexAttribArray(SHADER_ATTRIB_LOC_BARYCENTRIC); + glVertexAttribPointer(SHADER_ATTRIB_LOC_COLOR, 4, + GL_UNSIGNED_BYTE, GL_TRUE, 0, colors); + glVertexAttribPointer(SHADER_ATTRIB_LOC_BARYCENTRIC, 4, + GL_UNSIGNED_BYTE, GL_TRUE, 0, + barycentrics); + + sconf->req.wireframe = wireframe; + sconf->wireframe_tex = gr->wireframe_tex; + } + if (!gl_renderer_use_program(gr, sconf)) gl_renderer_send_shader_error(pnode); /* Use fallback shader. */ - glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, positions); + glVertexAttribPointer(SHADER_ATTRIB_LOC_POSITION, 2, GL_FLOAT, GL_FALSE, + 0, positions); glDrawElements(GL_TRIANGLE_STRIP, nstrip, GL_UNSIGNED_SHORT, strip); - if (nlines == 0) - return; - - /* Wireframe debugging is rendered as lines colored with the solid - * shader variant filtered per sub-mesh using vertex colors. */ - alt = (struct gl_shader_config) { - .req = { - .variant = SHADER_VARIANT_SOLID, - .input_is_premult = true, - .wireframe = true, - }, - .projection = sconf->projection, - .view_alpha = 1.0f, - .unicolor = { 1.0f, 1.0f, 1.0f, 1.0f }, - }; - ctransf = pnode->output->color_outcome->from_sRGB_to_blend; - if (!gl_shader_config_set_color_transform(gr, &alt, ctransf)) { - weston_log("GL-renderer: %s failed to generate a color " - "transformation.\n", __func__); - return; + if (wireframe) { + glDisableVertexAttribArray(SHADER_ATTRIB_LOC_BARYCENTRIC); + glDisableVertexAttribArray(SHADER_ATTRIB_LOC_COLOR); } - if (!gl_renderer_use_program(gr, &alt)) - return; - - glEnableVertexAttribArray(SHADER_ATTRIB_LOC_COLOR); - glVertexAttribPointer(SHADER_ATTRIB_LOC_COLOR, 4, GL_UNSIGNED_BYTE, - GL_FALSE, 0, colors); - glDrawElements(GL_LINES, nlines, GL_UNSIGNED_SHORT, lines); - glDisableVertexAttribArray(SHADER_ATTRIB_LOC_COLOR); } static void @@ -1452,22 +1414,20 @@ repaint_region(struct gl_renderer *gr, struct clipper_quad *quads, int nquads, pixman_region32_t *region, - const struct gl_shader_config *sconf) + struct gl_shader_config *sconf) { pixman_box32_t *rects; struct clipper_vertex *positions; - uint32_t *colors = NULL; - uint16_t *strips, *lines = NULL; - int i, j, n, nrects, positions_size, colors_size, strips_size; - int lines_size, nvtx = 0, nstrips = 0, nlines = 0; + uint32_t *colors = NULL, *barycentrics = NULL; + uint16_t *indices; + int i, j, n, nrects, positions_size, colors_size, barycentrics_size; + int indices_size, nvtx = 0, nidx = 0; bool wireframe = gr->wireframe_debug; /* Build-time sub-mesh constants. Clipping emits 8 vertices max. - * store_strips() and store_lines() respectively store 10 and 26 indices - * at most. */ + * store_indices() store at most 10 indices. */ const int nvtx_max = 8; - const int nstrips_max = 10; - const int nlines_max = 26; + const int nidx_max = 10; rects = pixman_region32_rectangles(region, &nrects); assert((nrects > 0) && (nquads > 0)); @@ -1476,14 +1436,15 @@ repaint_region(struct gl_renderer *gr, n = nquads * nrects; positions_size = n * nvtx_max * sizeof *positions; colors_size = ROUND_UP_N(n * nvtx_max * sizeof *colors, 32); - strips_size = ROUND_UP_N(n * nstrips_max * sizeof *strips, 32); - lines_size = ROUND_UP_N(n * nlines_max * sizeof *lines, 64); + barycentrics_size = ROUND_UP_N(n * nvtx_max * sizeof *barycentrics, 32); + indices_size = ROUND_UP_N(n * nidx_max * sizeof *indices, 32); positions = wl_array_add(&gr->position_stream, positions_size); - strips = wl_array_add(&gr->indices[0], strips_size); + indices = wl_array_add(&gr->indices, indices_size); if (wireframe) { colors = wl_array_add(&gr->color_stream, colors_size); - lines = wl_array_add(&gr->indices[1], lines_size); + barycentrics = wl_array_add(&gr->barycentric_stream, + barycentrics_size); } /* A node's damage mesh is created by clipping damage quads to surface @@ -1508,32 +1469,32 @@ repaint_region(struct gl_renderer *gr, for (j = 0; j < nrects; j++) { n = clipper_quad_clip_box32(&quads[i], &rects[j], &positions[nvtx]); - nstrips += store_strips(n, nvtx, &strips[nstrips]); - if (wireframe) { - store_wireframes(&colors[nvtx]); - nlines += store_lines(n, nvtx, &lines[nlines]); - } + nidx += store_indices(n, nvtx, &indices[nidx]); + if (wireframe) + store_wireframes(n, &colors[nvtx], + &barycentrics[nvtx]); nvtx += n; /* Highly unlikely flush to prevent index wraparound. * Subtracting 2 removes the last chaining indices. */ if ((nvtx + nvtx_max) > UINT16_MAX) { draw_mesh(gr, pnode, sconf, positions, colors, - strips, nstrips - 2, lines, nlines); - nvtx = nstrips = nlines = 0; + barycentrics, indices, nidx - 2, + wireframe); + nvtx = nidx = 0; } } } if (nvtx) - draw_mesh(gr, pnode, sconf, positions, colors, strips, - nstrips - 2, lines, nlines); + draw_mesh(gr, pnode, sconf, positions, colors, barycentrics, + indices, nidx - 2, wireframe); gr->position_stream.size = 0; - gr->indices[0].size = 0; + gr->indices.size = 0; if (wireframe) { gr->color_stream.size = 0; - gr->indices[1].size = 0; + gr->barycentric_stream.size = 0; } } @@ -1731,6 +1692,55 @@ update_buffer_release_fences(struct weston_compositor *compositor, } } +/* Update the wireframe texture. The texture is either created, deleted or + * resized depending on the wireframe debugging state and the area. + */ +static void +update_wireframe_tex(struct gl_renderer *gr, + const struct weston_geometry *area) +{ + int new_size, i; + uint8_t *buffer; + + if (!gr->wireframe_debug) { + if (gr->wireframe_size) { + glDeleteTextures(1, &gr->wireframe_tex); + gr->wireframe_size = 0; + } + return; + } + + /* Texture size at mip level 0 should be at least as large as the area + * in order to correctly anti-alias triangles covering it entirely. */ + glGetIntegerv(GL_MAX_TEXTURE_SIZE, &new_size); + new_size = MIN(round_up_pow2_32(MAX(area->width, area->height)), + round_down_pow2_32(new_size)); + if (new_size <= gr->wireframe_size) + return; + + if (gr->wireframe_size == 0) { + glGenTextures(1, &gr->wireframe_tex); + glBindTexture(GL_TEXTURE_2D, gr->wireframe_tex); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, + GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, + GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, + GL_LINEAR_MIPMAP_LINEAR); + } else { + glBindTexture(GL_TEXTURE_2D, gr->wireframe_tex); + } + gr->wireframe_size = new_size; + + /* Generate mip chain with a wireframe thickness of 1.0. */ + buffer = xzalloc(new_size); + buffer[0] = 0xff; + for (i = 0; new_size; i++, new_size >>= 1) + glTexImage2D(GL_TEXTURE_2D, i, GL_LUMINANCE, new_size, 1, 0, + GL_LUMINANCE, GL_UNSIGNED_BYTE, buffer); + free(buffer); +} + static void draw_output_border_texture(struct gl_renderer *gr, struct gl_output_state *go, @@ -2207,6 +2217,11 @@ gl_renderer_repaint_output(struct weston_output *output, go->area.width, go->area.height); } + if (gr->wireframe_dirty) { + update_wireframe_tex(gr, &go->area); + gr->wireframe_dirty = false; + } + /* In wireframe debug mode, redraw everything to make sure that we clear * any wireframes left over from previous draws on this buffer. This * precludes the use of EGL_EXT_swap_buffers_with_damage and @@ -3888,6 +3903,7 @@ gl_renderer_resize_output(struct weston_output *output, const struct weston_size *fb_size, const struct weston_geometry *area) { + struct gl_renderer *gr = get_renderer(output->compositor); struct gl_output_state *go = get_output_state(output); const struct pixel_format_info *shfmt = go->shadow_format; bool ret; @@ -3898,6 +3914,7 @@ gl_renderer_resize_output(struct weston_output *output, go->fb_size = *fb_size; go->area = *area; + gr->wireframe_dirty = true; weston_output_update_capture_info(output, WESTON_OUTPUT_CAPTURE_SOURCE_FRAMEBUFFER, @@ -4112,6 +4129,9 @@ gl_renderer_destroy(struct weston_compositor *ec) if (gr->fallback_shader) gl_shader_destroy(gr, gr->fallback_shader); + if (gr->wireframe_size) + glDeleteTextures(1, &gr->wireframe_tex); + /* Work around crash in egl_dri2.c's dri2_make_current() - when does this apply? */ eglMakeCurrent(gr->egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, @@ -4127,8 +4147,8 @@ gl_renderer_destroy(struct weston_compositor *ec) wl_array_release(&gr->position_stream); wl_array_release(&gr->color_stream); - wl_array_release(&gr->indices[0]); - wl_array_release(&gr->indices[1]); + wl_array_release(&gr->barycentric_stream); + wl_array_release(&gr->indices); if (gr->fragment_binding) weston_binding_destroy(gr->fragment_binding); @@ -4357,6 +4377,7 @@ wireframe_debug_repaint_binding(struct weston_keyboard *keyboard, struct gl_renderer *gr = get_renderer(compositor); gr->wireframe_debug = !gr->wireframe_debug; + gr->wireframe_dirty = true; weston_compositor_damage_all(compositor); } diff --git a/libweston/renderer-gl/gl-shaders.c b/libweston/renderer-gl/gl-shaders.c index 776061b3..51df3cf7 100644 --- a/libweston/renderer-gl/gl-shaders.c +++ b/libweston/renderer-gl/gl-shaders.c @@ -61,6 +61,7 @@ struct gl_shader { GLint proj_uniform; GLint surface_to_buffer_uniform; GLint tex_uniforms[3]; + GLint tex_uniform_wireframe; GLint view_alpha_uniform; GLint color_uniform; union { @@ -359,9 +360,13 @@ gl_shader_create(struct gl_renderer *gr, if (requirements->texcoord_input == SHADER_TEXCOORD_INPUT_ATTRIB) glBindAttribLocation(shader->program, SHADER_ATTRIB_LOC_TEXCOORD, "texcoord"); - if (requirements->wireframe) + if (requirements->wireframe) { glBindAttribLocation(shader->program, SHADER_ATTRIB_LOC_COLOR, "color"); + glBindAttribLocation(shader->program, + SHADER_ATTRIB_LOC_BARYCENTRIC, + "barycentric"); + } glLinkProgram(shader->program); glGetProgramiv(shader->program, GL_LINK_STATUS, &status); @@ -380,6 +385,9 @@ gl_shader_create(struct gl_renderer *gr, shader->tex_uniforms[0] = glGetUniformLocation(shader->program, "tex"); shader->tex_uniforms[1] = glGetUniformLocation(shader->program, "tex1"); shader->tex_uniforms[2] = glGetUniformLocation(shader->program, "tex2"); + if (requirements->wireframe) + shader->tex_uniform_wireframe = + glGetUniformLocation(shader->program, "tex_wireframe"); shader->view_alpha_uniform = glGetUniformLocation(shader->program, "view_alpha"); if (requirements->variant == SHADER_VARIANT_SOLID) { shader->color_uniform = glGetUniformLocation(shader->program, @@ -753,6 +761,13 @@ gl_shader_load_config(struct gl_shader *shader, sconf->color_post_curve.parametric.clamped_input); break; } + + if (sconf->req.wireframe) { + assert(sconf->wireframe_tex != 0); + glUniform1i(shader->tex_uniform_wireframe, GL_SHADER_WIREFRAME_TEX_UNIT); + glActiveTexture(GL_TEXTURE0 + GL_SHADER_WIREFRAME_TEX_UNIT); + glBindTexture(GL_TEXTURE_2D, sconf->wireframe_tex); + } } bool diff --git a/libweston/renderer-gl/vertex.glsl b/libweston/renderer-gl/vertex.glsl index 2b41de25..64a5d879 100644 --- a/libweston/renderer-gl/vertex.glsl +++ b/libweston/renderer-gl/vertex.glsl @@ -47,10 +47,12 @@ uniform mat4 surface_to_buffer; attribute vec2 position; attribute vec2 texcoord; attribute vec4 color; +attribute vec4 barycentric; /* Match the varying precision to the fragment shader */ varying FRAG_PRECISION vec2 v_texcoord; varying FRAG_PRECISION vec4 v_color; +varying FRAG_PRECISION vec3 v_barycentric; compile_const int c_texcoord_input = DEF_TEXCOORD_INPUT; compile_const bool c_wireframe = DEF_WIREFRAME; @@ -64,6 +66,8 @@ void main() else if (c_texcoord_input == SHADER_TEXCOORD_INPUT_SURFACE) v_texcoord = vec2(surface_to_buffer * vec4(position, 0.0, 1.0)); - if (c_wireframe) + if (c_wireframe) { v_color = color; + v_barycentric = barycentric.xyz; + } } diff --git a/shared/helpers.h b/shared/helpers.h index b5309ce4..dc203d74 100644 --- a/shared/helpers.h +++ b/shared/helpers.h @@ -275,6 +275,61 @@ bswap32(uint32_t x) #endif } +/** + * Returns the highest power of two lesser than or equal to 32-bit value x. + * Saturated to 0 (which isn't a power of two) if x is lesser than 2^0. + * + * @param x a 32-bit value. + * @return the rounded down 32-bit value. + */ +static inline uint32_t +round_down_pow2_32(uint32_t x) +{ +#if defined(HAVE_BUILTIN_CLZ) + /* clz depends on the underlying architecture when x is 0. */ + return x ? (1u << ((32 - __builtin_clz(x)) - 1)) : 0; +#else + /* See Hacker's Delight 2nd Edition, Chapter 3-2. */ + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x -= x >> 1; + + return x; +#endif +} + +/** + * Returns the smallest power of two greater than or equal to 32-bit value x. + * Saturated to 2^32 - 1 (which isn't a power of two) if x is greater than 2^31. + * + * @param x a 32-bit value. + * @return the rounded up 32-bit value. + */ +static inline uint32_t +round_up_pow2_32(uint32_t x) +{ + if (x > (1u << 31)) + return UINT32_MAX; + +#if defined(HAVE_BUILTIN_CLZ) + return (x > 1) ? (1 << (32 - __builtin_clz(x - 1))) : 1; +#else + /* Slight change from the Hacker's Delight version (which subtracts 1 + * unconditionally) in order to return 1 if x is 0. */ + x -= x != 0; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + + return x + 1; +#endif +} + #ifdef __cplusplus } #endif