gl-renderer: Get rid of begin fence sync

Output repaint uses a pair of fence syncs to profile GPU execution by
retrieving their timestamps once signalled. While the end timestamp
can be rather inaccurate in some cases (drivers reusing sync objects
from previous command buffers), the begin timestamp is never correct
because fence syncs are signalled on command buffer completion.

Get rid of the begin fence sync and use the EXT_disjoint_timer_query
extension to measure the actual repaint duration and extrapolate the
begin timestamp from the end one.

Fixes #342

Signed-off-by: Loïc Molinari <loic.molinari@gmail.com>
This commit is contained in:
Loïc Molinari 2023-01-12 08:04:40 +01:00
parent 14fc87cc42
commit 7c9c545b4e
2 changed files with 116 additions and 37 deletions

View File

@ -200,6 +200,16 @@ struct gl_renderer {
bool has_wait_sync; bool has_wait_sync;
PFNEGLWAITSYNCKHRPROC wait_sync; PFNEGLWAITSYNCKHRPROC wait_sync;
bool has_disjoint_timer_query;
PFNGLGENQUERIESEXTPROC gen_queries;
PFNGLDELETEQUERIESEXTPROC delete_queries;
PFNGLBEGINQUERYEXTPROC begin_query;
PFNGLENDQUERYEXTPROC end_query;
#if !defined(NDEBUG)
PFNGLGETQUERYOBJECTIVEXTPROC get_query_object_iv;
#endif
PFNGLGETQUERYOBJECTUI64VEXTPROC get_query_object_ui64v;
bool gl_supports_color_transforms; bool gl_supports_color_transforms;
/** Shader program cache in most recently used order /** Shader program cache in most recently used order

View File

@ -104,7 +104,8 @@ struct gl_output_state {
struct weston_matrix output_matrix; struct weston_matrix output_matrix;
EGLSyncKHR begin_render_sync, end_render_sync; EGLSyncKHR render_sync;
GLuint render_query;
/* struct timeline_render_point::link */ /* struct timeline_render_point::link */
struct wl_list timeline_render_point_list; struct wl_list timeline_render_point_list;
@ -188,16 +189,11 @@ struct gl_surface_state {
struct wl_listener renderer_destroy_listener; struct wl_listener renderer_destroy_listener;
}; };
enum timeline_render_point_type {
TIMELINE_RENDER_POINT_TYPE_BEGIN,
TIMELINE_RENDER_POINT_TYPE_END
};
struct timeline_render_point { struct timeline_render_point {
struct wl_list link; /* gl_output_state::timeline_render_point_list */ struct wl_list link; /* gl_output_state::timeline_render_point_list */
enum timeline_render_point_type type;
int fd; int fd;
GLuint query;
struct weston_output *output; struct weston_output *output;
struct wl_event_source *event_source; struct wl_event_source *event_source;
}; };
@ -316,6 +312,23 @@ struct yuv_format_descriptor yuv_formats[] = {
} }
}; };
static void
timeline_begin_render_query(struct gl_renderer *gr, GLuint query)
{
if (weston_log_scope_is_enabled(gr->compositor->timeline) &&
gr->has_native_fence_sync &&
gr->has_disjoint_timer_query)
gr->begin_query(GL_TIME_ELAPSED_EXT, query);
}
static void
timeline_end_render_query(struct gl_renderer *gr)
{
if (weston_log_scope_is_enabled(gr->compositor->timeline) &&
gr->has_native_fence_sync &&
gr->has_disjoint_timer_query)
gr->end_query(GL_TIME_ELAPSED_EXT);
}
static void static void
timeline_render_point_destroy(struct timeline_render_point *trp) timeline_render_point_destroy(struct timeline_render_point *trp)
@ -330,17 +343,33 @@ static int
timeline_render_point_handler(int fd, uint32_t mask, void *data) timeline_render_point_handler(int fd, uint32_t mask, void *data)
{ {
struct timeline_render_point *trp = data; struct timeline_render_point *trp = data;
const char *tp_name = trp->type == TIMELINE_RENDER_POINT_TYPE_BEGIN ? struct timespec end;
"renderer_gpu_begin" : "renderer_gpu_end";
if (mask & WL_EVENT_READABLE) { if ((mask & WL_EVENT_READABLE) &&
struct timespec tspec = { 0 }; (weston_linux_sync_file_read_timestamp(trp->fd, &end) == 0)) {
struct gl_renderer *gr = get_renderer(trp->output->compositor);
struct timespec begin;
GLuint64 elapsed;
#if !defined(NDEBUG)
GLint result_available;
if (weston_linux_sync_file_read_timestamp(trp->fd, /* The elapsed time result must now be available since the
&tspec) == 0) { * begin/end queries are meant to be queued prior to fence sync
TL_POINT(trp->output->compositor, tp_name, TLP_GPU(&tspec), * creation. */
TLP_OUTPUT(trp->output), TLP_END); gr->get_query_object_iv(trp->query,
} GL_QUERY_RESULT_AVAILABLE_EXT,
&result_available);
assert(result_available == GL_TRUE);
#endif
gr->get_query_object_ui64v(trp->query, GL_QUERY_RESULT_EXT,
&elapsed);
timespec_add_nsec(&begin, &end, -elapsed);
TL_POINT(trp->output->compositor, "renderer_gpu_begin",
TLP_GPU(&begin), TLP_OUTPUT(trp->output), TLP_END);
TL_POINT(trp->output->compositor, "renderer_gpu_end",
TLP_GPU(&end), TLP_OUTPUT(trp->output), TLP_END);
} }
timeline_render_point_destroy(trp); timeline_render_point_destroy(trp);
@ -364,7 +393,7 @@ static void
timeline_submit_render_sync(struct gl_renderer *gr, timeline_submit_render_sync(struct gl_renderer *gr,
struct weston_output *output, struct weston_output *output,
EGLSyncKHR sync, EGLSyncKHR sync,
enum timeline_render_point_type type) GLuint query)
{ {
struct gl_output_state *go; struct gl_output_state *go;
struct wl_event_loop *loop; struct wl_event_loop *loop;
@ -373,6 +402,7 @@ timeline_submit_render_sync(struct gl_renderer *gr,
if (!weston_log_scope_is_enabled(gr->compositor->timeline) || if (!weston_log_scope_is_enabled(gr->compositor->timeline) ||
!gr->has_native_fence_sync || !gr->has_native_fence_sync ||
!gr->has_disjoint_timer_query ||
sync == EGL_NO_SYNC_KHR) sync == EGL_NO_SYNC_KHR)
return; return;
@ -389,8 +419,8 @@ timeline_submit_render_sync(struct gl_renderer *gr,
return; return;
} }
trp->type = type;
trp->fd = fd; trp->fd = fd;
trp->query = query;
trp->output = output; trp->output = output;
trp->event_source = wl_event_loop_add_fd(loop, fd, trp->event_source = wl_event_loop_add_fd(loop, fd,
WL_EVENT_READABLE, WL_EVENT_READABLE,
@ -1756,12 +1786,7 @@ gl_renderer_repaint_output(struct weston_output *output,
} }
} }
if (go->begin_render_sync != EGL_NO_SYNC_KHR) timeline_begin_render_query(gr, go->render_query);
gr->destroy_sync(gr->egl_display, go->begin_render_sync);
if (go->end_render_sync != EGL_NO_SYNC_KHR)
gr->destroy_sync(gr->egl_display, go->end_render_sync);
go->begin_render_sync = create_render_sync(gr);
/* Calculate the global GL matrix */ /* Calculate the global GL matrix */
go->output_matrix = output->matrix; go->output_matrix = output->matrix;
@ -1853,7 +1878,11 @@ gl_renderer_repaint_output(struct weston_output *output,
WESTON_OUTPUT_CAPTURE_SOURCE_FULL_FRAMEBUFFER); WESTON_OUTPUT_CAPTURE_SOURCE_FULL_FRAMEBUFFER);
wl_signal_emit(&output->frame_signal, output_damage); wl_signal_emit(&output->frame_signal, output_damage);
go->end_render_sync = create_render_sync(gr); timeline_end_render_query(gr);
if (go->render_sync != EGL_NO_SYNC_KHR)
gr->destroy_sync(gr->egl_display, go->render_sync);
go->render_sync = create_render_sync(gr);
if (gr->swap_buffers_with_damage && !gr->fan_debug) { if (gr->swap_buffers_with_damage && !gr->fan_debug) {
int n_egl_rects; int n_egl_rects;
@ -1883,10 +1912,8 @@ gl_renderer_repaint_output(struct weston_output *output,
/* We have to submit the render sync objects after swap buffers, since /* We have to submit the render sync objects after swap buffers, since
* the objects get assigned a valid sync file fd only after a gl flush. * the objects get assigned a valid sync file fd only after a gl flush.
*/ */
timeline_submit_render_sync(gr, output, go->begin_render_sync, timeline_submit_render_sync(gr, output, go->render_sync,
TIMELINE_RENDER_POINT_TYPE_BEGIN); go->render_query);
timeline_submit_render_sync(gr, output, go->end_render_sync,
TIMELINE_RENDER_POINT_TYPE_END);
update_buffer_release_fences(compositor, output); update_buffer_release_fences(compositor, output);
@ -3505,10 +3532,12 @@ gl_renderer_output_create(struct weston_output *output,
for (i = 0; i < BUFFER_DAMAGE_COUNT; i++) for (i = 0; i < BUFFER_DAMAGE_COUNT; i++)
pixman_region32_init(&go->buffer_damage[i]); pixman_region32_init(&go->buffer_damage[i]);
if (gr->has_disjoint_timer_query)
gr->gen_queries(1, &go->render_query);
wl_list_init(&go->timeline_render_point_list); wl_list_init(&go->timeline_render_point_list);
go->begin_render_sync = EGL_NO_SYNC_KHR; go->render_sync = EGL_NO_SYNC_KHR;
go->end_render_sync = EGL_NO_SYNC_KHR;
if ((output->color_outcome->from_blend_to_output != NULL && if ((output->color_outcome->from_blend_to_output != NULL &&
output->from_blend_to_output_by_backend == false) || output->from_blend_to_output_by_backend == false) ||
@ -3642,13 +3671,14 @@ gl_renderer_output_destroy(struct weston_output *output)
weston_log("warning: discarding pending timeline render" weston_log("warning: discarding pending timeline render"
"objects at output destruction"); "objects at output destruction");
if (gr->has_disjoint_timer_query)
gr->delete_queries(1, &go->render_query);
wl_list_for_each_safe(trp, tmp, &go->timeline_render_point_list, link) wl_list_for_each_safe(trp, tmp, &go->timeline_render_point_list, link)
timeline_render_point_destroy(trp); timeline_render_point_destroy(trp);
if (go->begin_render_sync != EGL_NO_SYNC_KHR) if (go->render_sync != EGL_NO_SYNC_KHR)
gr->destroy_sync(gr->egl_display, go->begin_render_sync); gr->destroy_sync(gr->egl_display, go->render_sync);
if (go->end_render_sync != EGL_NO_SYNC_KHR)
gr->destroy_sync(gr->egl_display, go->end_render_sync);
free(go); free(go);
} }
@ -3660,10 +3690,10 @@ gl_renderer_create_fence_fd(struct weston_output *output)
struct gl_renderer *gr = get_renderer(output->compositor); struct gl_renderer *gr = get_renderer(output->compositor);
int fd; int fd;
if (go->end_render_sync == EGL_NO_SYNC_KHR) if (go->render_sync == EGL_NO_SYNC_KHR)
return -1; return -1;
fd = gr->dup_native_fence_fd(gr->egl_display, go->end_render_sync); fd = gr->dup_native_fence_fd(gr->egl_display, go->render_sync);
if (fd == EGL_NO_NATIVE_FENCE_FD_ANDROID) if (fd == EGL_NO_NATIVE_FENCE_FD_ANDROID)
return -1; return -1;
@ -4103,6 +4133,45 @@ gl_renderer_setup(struct weston_compositor *ec, EGLSurface egl_surface)
gr->gl_supports_color_transforms = true; gr->gl_supports_color_transforms = true;
} }
if (weston_check_egl_extension(extensions, "GL_EXT_disjoint_timer_query")) {
PFNGLGETQUERYIVEXTPROC get_query_iv =
(void *) eglGetProcAddress("glGetQueryivEXT");
int elapsed_bits;
assert(get_query_iv);
get_query_iv(GL_TIME_ELAPSED_EXT, GL_QUERY_COUNTER_BITS_EXT,
&elapsed_bits);
if (elapsed_bits != 0) {
gr->gen_queries =
(void *) eglGetProcAddress("glGenQueriesEXT");
gr->delete_queries =
(void *) eglGetProcAddress("glDeleteQueriesEXT");
gr->begin_query = (void *) eglGetProcAddress("glBeginQueryEXT");
gr->end_query = (void *) eglGetProcAddress("glEndQueryEXT");
#if !defined(NDEBUG)
gr->get_query_object_iv =
(void *) eglGetProcAddress("glGetQueryObjectivEXT");
#endif
gr->get_query_object_ui64v =
(void *) eglGetProcAddress("glGetQueryObjectui64vEXT");
assert(gr->gen_queries);
assert(gr->delete_queries);
assert(gr->begin_query);
assert(gr->end_query);
assert(gr->get_query_object_iv);
assert(gr->get_query_object_ui64v);
gr->has_disjoint_timer_query = true;
} else {
weston_log("warning: Disabling render GPU timeline due "
"to lack of support for elapsed counters by "
"the GL_EXT_disjoint_timer_query "
"extension\n");
}
} else if (gr->has_native_fence_sync) {
weston_log("warning: Disabling render GPU timeline due to "
"missing GL_EXT_disjoint_timer_query extension\n");
}
glActiveTexture(GL_TEXTURE0); glActiveTexture(GL_TEXTURE0);
gr->fallback_shader = gl_renderer_create_fallback_shader(gr); gr->fallback_shader = gl_renderer_create_fallback_shader(gr);