From 827001e74a917deaed07996cf1cff9839fa21b24 Mon Sep 17 00:00:00 2001 From: attilaz Date: Wed, 13 Feb 2019 19:09:31 +0100 Subject: [PATCH] 37-gpudrivenrendering fix for OpenGL NOTE: still has problem with flat varying Needs shader recompile --- .../37-gpudrivenrendering/cs_gdr_copy_z.sc | 26 ++++++++++++ .../cs_gdr_downscale_hi_z.sc | 30 +++++--------- .../cs_gdr_occlude_props.sc | 7 ++++ .../cs_gdr_stream_compaction.sc | 4 +- .../gpudrivenrendering.cpp | 41 ++++++++++--------- 5 files changed, 68 insertions(+), 40 deletions(-) create mode 100644 examples/37-gpudrivenrendering/cs_gdr_copy_z.sc diff --git a/examples/37-gpudrivenrendering/cs_gdr_copy_z.sc b/examples/37-gpudrivenrendering/cs_gdr_copy_z.sc new file mode 100644 index 000000000..b6365bdc6 --- /dev/null +++ b/examples/37-gpudrivenrendering/cs_gdr_copy_z.sc @@ -0,0 +1,26 @@ +/* + * Copyright 2018 Kostas Anagnostou. All rights reserved. + * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause + */ + +#include "bgfx_compute.sh" + +SAMPLER2D(s_texOcclusionDepth, 0); +IMAGE2D_WR(s_texOcclusionDepthOut, r32f, 1); + +uniform vec4 u_inputRTSize; + +NUM_THREADS(16, 16, 1) +void main() +{ + // this shader can be used to both copy a mip over to the output and downscale it. + + ivec2 coord = ivec2(gl_GlobalInvocationID.xy); + + if (all(lessThan(coord.xy, u_inputRTSize.xy) ) ) + { + float maxDepth = texelFetch(s_texOcclusionDepth, coord.xy, 0).x; + + imageStore(s_texOcclusionDepthOut, coord, vec4(maxDepth,0,0,1) ); + } +} diff --git a/examples/37-gpudrivenrendering/cs_gdr_downscale_hi_z.sc b/examples/37-gpudrivenrendering/cs_gdr_downscale_hi_z.sc index aecd7e416..9b03498f9 100644 --- a/examples/37-gpudrivenrendering/cs_gdr_downscale_hi_z.sc +++ b/examples/37-gpudrivenrendering/cs_gdr_downscale_hi_z.sc @@ -21,26 +21,18 @@ void main() { float maxDepth = 1.0; - if (u_inputRTSize.z > 1) - { - vec4 depths = vec4( - imageLoad(s_texOcclusionDepthIn, ivec2(u_inputRTSize.zw * coord.xy ) ).x - , imageLoad(s_texOcclusionDepthIn, ivec2(u_inputRTSize.zw * coord.xy + ivec2(1.0, 0.0) ) ).x - , imageLoad(s_texOcclusionDepthIn, ivec2(u_inputRTSize.zw * coord.xy + ivec2(0.0, 1.0) ) ).x - , imageLoad(s_texOcclusionDepthIn, ivec2(u_inputRTSize.zw * coord.xy + ivec2(1.0, 1.0) ) ).x - ); + vec4 depths = vec4( + imageLoad(s_texOcclusionDepthIn, ivec2(u_inputRTSize.zw * coord.xy ) ).x + , imageLoad(s_texOcclusionDepthIn, ivec2(u_inputRTSize.zw * coord.xy + ivec2(1.0, 0.0) ) ).x + , imageLoad(s_texOcclusionDepthIn, ivec2(u_inputRTSize.zw * coord.xy + ivec2(0.0, 1.0) ) ).x + , imageLoad(s_texOcclusionDepthIn, ivec2(u_inputRTSize.zw * coord.xy + ivec2(1.0, 1.0) ) ).x + ); - // find and return max depth - maxDepth = max( - max(depths.x, depths.y) - , max(depths.z, depths.w) - ); - } - else - { - // do not downscale, just copy the value over to the output rendertarget - maxDepth = imageLoad(s_texOcclusionDepthIn, coord.xy).x; - } + // find and return max depth + maxDepth = max( + max(depths.x, depths.y) + , max(depths.z, depths.w) + ); imageStore(s_texOcclusionDepthOut, coord, vec4(maxDepth,0,0,1) ); } diff --git a/examples/37-gpudrivenrendering/cs_gdr_occlude_props.sc b/examples/37-gpudrivenrendering/cs_gdr_occlude_props.sc index aced7da3c..2648eb4d0 100644 --- a/examples/37-gpudrivenrendering/cs_gdr_occlude_props.sc +++ b/examples/37-gpudrivenrendering/cs_gdr_occlude_props.sc @@ -51,6 +51,9 @@ void main() //transform World space aaBox to NDC vec4 clipPos = mul( u_viewProj, vec4(boxCorners[i], 1) ); +#if BGFX_SHADER_LANGUAGE_GLSL + clipPos.z = 0.5 * ( clipPos.z + clipPos.w ); +#endif clipPos.z = max(clipPos.z, 0); clipPos.xyz = clipPos.xyz / clipPos.w; @@ -83,6 +86,10 @@ void main() if (dims.x <= 2 && dims.y <= 2) mip = level_lower; +#if BGFX_SHADER_LANGUAGE_GLSL + boxUVs.y = 1.0 - boxUVs.y; + boxUVs.w = 1.0 - boxUVs.w; +#endif //load depths from high z buffer vec4 depth = { diff --git a/examples/37-gpudrivenrendering/cs_gdr_stream_compaction.sc b/examples/37-gpudrivenrendering/cs_gdr_stream_compaction.sc index 3c35acdc7..e8fcc64f3 100644 --- a/examples/37-gpudrivenrendering/cs_gdr_stream_compaction.sc +++ b/examples/37-gpudrivenrendering/cs_gdr_stream_compaction.sc @@ -32,8 +32,8 @@ void main() int NoofDrawcalls = int(u_cullingConfig.w); int offset = 1; - temp[2 * tID ] = uint(instancePredicates[2 * tID ]); // load input into shared memory - temp[2 * tID + 1] = uint(instancePredicates[2 * tID + 1]); + temp[2 * tID ] = uint(instancePredicates[2 * tID ] ? 1 : 0); // load input into shared memory + temp[2 * tID + 1] = uint(instancePredicates[2 * tID + 1] ? 1 : 0); int d; diff --git a/examples/37-gpudrivenrendering/gpudrivenrendering.cpp b/examples/37-gpudrivenrendering/gpudrivenrendering.cpp index 046f88a85..fec214a0f 100644 --- a/examples/37-gpudrivenrendering/gpudrivenrendering.cpp +++ b/examples/37-gpudrivenrendering/gpudrivenrendering.cpp @@ -604,6 +604,7 @@ public: // Create programs from shaders for occlusion pass. m_programOcclusionPass = loadProgram("vs_gdr_render_occlusion", NULL); + m_programCopyZ = loadProgram("cs_gdr_copy_z", NULL); m_programDownscaleHiZ = loadProgram("cs_gdr_downscale_hi_z", NULL); m_programOccludeProps = loadProgram("cs_gdr_occlude_props", NULL); m_programStreamCompaction = loadProgram("cs_gdr_stream_compaction", NULL); @@ -706,6 +707,7 @@ public: bgfx::destroy(m_programMainPass); bgfx::destroy(m_programOcclusionPass); + bgfx::destroy(m_programCopyZ); bgfx::destroy(m_programDownscaleHiZ); bgfx::destroy(m_programOccludeProps); bgfx::destroy(m_programStreamCompaction); @@ -813,29 +815,29 @@ public: uint32_t width = m_hiZwidth; uint32_t height = m_hiZheight; - for (uint8_t lod = 0; lod < m_noofHiZMips; ++lod) + // copy mip zero over to the hi Z buffer. + // We can't currently use blit as it requires same format and CopyResource is not exposed. { - float coordinateScale = lod > 0 ? 2.0f : 1.0f; - - float inputRendertargetSize[4] = { (float)width, (float)height, coordinateScale, coordinateScale }; + float inputRendertargetSize[4] = { (float)width, (float)height, 0.0f, 0.0f }; bgfx::setUniform(u_inputRTSize, inputRendertargetSize); - if (lod > 0) - { - // down scale mip 1 onwards - width /= 2; - height /= 2; + bgfx::setTexture(0, s_texOcclusionDepth, getTexture(m_hiZDepthBuffer, 0)); + bgfx::setImage(1, getTexture(m_hiZBuffer, 0), 0, bgfx::Access::Write); + + bgfx::dispatch(RENDER_PASS_HIZ_DOWNSCALE_ID, m_programCopyZ, width/16, height/16); + } - bgfx::setImage(0, getTexture(m_hiZBuffer, 0), lod - 1, bgfx::Access::Read); - bgfx::setImage(1, getTexture(m_hiZBuffer, 0), lod, bgfx::Access::Write); - } - else - { - // copy mip zero over to the hi Z buffer. - // We can't currently use blit as it requires same format and CopyResource is not exposed. - bgfx::setImage(0, getTexture(m_hiZDepthBuffer, 0), 0, bgfx::Access::Read); - bgfx::setImage(1, getTexture(m_hiZBuffer, 0), 0, bgfx::Access::Write); - } + for (uint8_t lod = 1; lod < m_noofHiZMips; ++lod) + { + float inputRendertargetSize[4] = { (float)width, (float)height, 2.0f, 2.0f }; + bgfx::setUniform(u_inputRTSize, inputRendertargetSize); + + // down scale mip 1 onwards + width /= 2; + height /= 2; + + bgfx::setImage(0, getTexture(m_hiZBuffer, 0), lod - 1, bgfx::Access::Read); + bgfx::setImage(1, getTexture(m_hiZBuffer, 0), lod, bgfx::Access::Write); bgfx::dispatch(RENDER_PASS_HIZ_DOWNSCALE_ID, m_programDownscaleHiZ, width/16, height/16); } @@ -1086,6 +1088,7 @@ public: bgfx::ProgramHandle m_programMainPass; bgfx::ProgramHandle m_programOcclusionPass; + bgfx::ProgramHandle m_programCopyZ; bgfx::ProgramHandle m_programDownscaleHiZ; bgfx::ProgramHandle m_programOccludeProps; bgfx::ProgramHandle m_programStreamCompaction;