Merge pull request #1655 from attilaz/37-gdr-ogl

37-gpudrivenrendering fix for OpenGL
2019-02-13 10:38:18 -08:00 · 2019-02-13 10:38:18 -08:00 · 0997b034ed
commit 0997b034ed
parent 8d39ab4e3b 827001e74a
5 changed files with 68 additions and 40 deletions
--- a/examples/37-gpudrivenrendering/cs_gdr_copy_z.sc
+++ b/examples/37-gpudrivenrendering/cs_gdr_copy_z.sc
@ -0,0 +1,26 @@
+/*
+ * Copyright 2018 Kostas Anagnostou. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+#include "bgfx_compute.sh"
+
+SAMPLER2D(s_texOcclusionDepth, 0);
+IMAGE2D_WR(s_texOcclusionDepthOut, r32f, 1);
+
+uniform vec4 u_inputRTSize;
+
+NUM_THREADS(16, 16, 1)
+void main()
+{
+	// this shader can be used to both copy a mip over to the output and downscale it.
+
+	ivec2 coord = ivec2(gl_GlobalInvocationID.xy);
+
+	if (all(lessThan(coord.xy, u_inputRTSize.xy) ) )
+	{
+		float maxDepth = texelFetch(s_texOcclusionDepth, coord.xy, 0).x;
+
+		imageStore(s_texOcclusionDepthOut, coord, vec4(maxDepth,0,0,1) );
+	}
+}
--- a/examples/37-gpudrivenrendering/cs_gdr_downscale_hi_z.sc
+++ b/examples/37-gpudrivenrendering/cs_gdr_downscale_hi_z.sc
@ -21,26 +21,18 @@ void main()
 	{
 		float maxDepth = 1.0;

-		if (u_inputRTSize.z > 1)
-		{
-			vec4 depths = vec4(
-				  imageLoad(s_texOcclusionDepthIn, ivec2(u_inputRTSize.zw * coord.xy                   ) ).x
-				, imageLoad(s_texOcclusionDepthIn, ivec2(u_inputRTSize.zw * coord.xy + ivec2(1.0, 0.0) ) ).x
-				, imageLoad(s_texOcclusionDepthIn, ivec2(u_inputRTSize.zw * coord.xy + ivec2(0.0, 1.0) ) ).x
-				, imageLoad(s_texOcclusionDepthIn, ivec2(u_inputRTSize.zw * coord.xy + ivec2(1.0, 1.0) ) ).x
-				);
+		vec4 depths = vec4(
+				imageLoad(s_texOcclusionDepthIn, ivec2(u_inputRTSize.zw * coord.xy                   ) ).x
+			, imageLoad(s_texOcclusionDepthIn, ivec2(u_inputRTSize.zw * coord.xy + ivec2(1.0, 0.0) ) ).x
+			, imageLoad(s_texOcclusionDepthIn, ivec2(u_inputRTSize.zw * coord.xy + ivec2(0.0, 1.0) ) ).x
+			, imageLoad(s_texOcclusionDepthIn, ivec2(u_inputRTSize.zw * coord.xy + ivec2(1.0, 1.0) ) ).x
+			);

-			// find and return max depth
-			maxDepth = max(
-				  max(depths.x, depths.y)
-				, max(depths.z, depths.w)
-				);
-		}
-		else
-		{
-			// do not downscale, just copy the value over to the output rendertarget
-			maxDepth = imageLoad(s_texOcclusionDepthIn, coord.xy).x;
-		}
+		// find and return max depth
+		maxDepth = max(
+				max(depths.x, depths.y)
+			, max(depths.z, depths.w)
+			);

 		imageStore(s_texOcclusionDepthOut, coord, vec4(maxDepth,0,0,1) );
 	}
--- a/examples/37-gpudrivenrendering/cs_gdr_occlude_props.sc
+++ b/examples/37-gpudrivenrendering/cs_gdr_occlude_props.sc
@ -51,6 +51,9 @@ void main()
 			//transform World space aaBox to NDC
 			vec4 clipPos = mul( u_viewProj, vec4(boxCorners[i], 1) );

+#if BGFX_SHADER_LANGUAGE_GLSL 
+			clipPos.z = 0.5 * ( clipPos.z + clipPos.w );
+#endif
 			clipPos.z = max(clipPos.z, 0);

 			clipPos.xyz = clipPos.xyz / clipPos.w;
@ -83,6 +86,10 @@ void main()
 		if (dims.x <= 2 && dims.y <= 2)
 			mip = level_lower;

+#if BGFX_SHADER_LANGUAGE_GLSL
+		boxUVs.y = 1.0 - boxUVs.y;
+		boxUVs.w = 1.0 - boxUVs.w;
+#endif
 		//load depths from high z buffer
 		vec4 depth =
 		{
--- a/examples/37-gpudrivenrendering/cs_gdr_stream_compaction.sc
+++ b/examples/37-gpudrivenrendering/cs_gdr_stream_compaction.sc
@ -32,8 +32,8 @@ void main()
 	int NoofDrawcalls = int(u_cullingConfig.w);

 	int offset = 1;
-	temp[2 * tID    ] = uint(instancePredicates[2 * tID    ]); // load input into shared memory
-	temp[2 * tID + 1] = uint(instancePredicates[2 * tID + 1]);
+	temp[2 * tID    ] = uint(instancePredicates[2 * tID    ] ? 1 : 0); // load input into shared memory
+	temp[2 * tID + 1] = uint(instancePredicates[2 * tID + 1] ? 1 : 0);

 	int d;

--- a/examples/37-gpudrivenrendering/gpudrivenrendering.cpp
+++ b/examples/37-gpudrivenrendering/gpudrivenrendering.cpp
@ -604,6 +604,7 @@ public:

 			// Create programs from shaders for occlusion pass.
 			m_programOcclusionPass    = loadProgram("vs_gdr_render_occlusion", NULL);
+			m_programCopyZ            = loadProgram("cs_gdr_copy_z", NULL);
 			m_programDownscaleHiZ     = loadProgram("cs_gdr_downscale_hi_z", NULL);
 			m_programOccludeProps     = loadProgram("cs_gdr_occlude_props", NULL);
 			m_programStreamCompaction = loadProgram("cs_gdr_stream_compaction", NULL);
@ -706,6 +707,7 @@ public:

 		bgfx::destroy(m_programMainPass);
 		bgfx::destroy(m_programOcclusionPass);
+		bgfx::destroy(m_programCopyZ);
 		bgfx::destroy(m_programDownscaleHiZ);
 		bgfx::destroy(m_programOccludeProps);
 		bgfx::destroy(m_programStreamCompaction);
@ -813,29 +815,29 @@ public:
 		uint32_t width = m_hiZwidth;
 		uint32_t height = m_hiZheight;

-		for (uint8_t lod = 0; lod < m_noofHiZMips; ++lod)
+		// copy mip zero over to the hi Z buffer.
+		// We can't currently use blit as it requires same format and CopyResource is not exposed.
 		{
-			float coordinateScale = lod > 0 ? 2.0f : 1.0f;
-
-			float inputRendertargetSize[4] = { (float)width, (float)height, coordinateScale, coordinateScale };
+			float inputRendertargetSize[4] = { (float)width, (float)height, 0.0f, 0.0f };
 			bgfx::setUniform(u_inputRTSize, inputRendertargetSize);

-			if (lod > 0)
-			{
-				// down scale mip 1 onwards
-				width /= 2;
-				height /= 2;
+			bgfx::setTexture(0, s_texOcclusionDepth, getTexture(m_hiZDepthBuffer, 0));
+			bgfx::setImage(1, getTexture(m_hiZBuffer,      0), 0, bgfx::Access::Write);
+		
+			bgfx::dispatch(RENDER_PASS_HIZ_DOWNSCALE_ID, m_programCopyZ, width/16, height/16);
+		}

-				bgfx::setImage(0, getTexture(m_hiZBuffer, 0), lod - 1, bgfx::Access::Read);
-				bgfx::setImage(1, getTexture(m_hiZBuffer, 0), lod,     bgfx::Access::Write);
-			}
-			else
-			{
-				// copy mip zero over to the hi Z buffer.
-				// We can't currently use blit as it requires same format and CopyResource is not exposed.
-				bgfx::setImage(0, getTexture(m_hiZDepthBuffer, 0), 0, bgfx::Access::Read);
-				bgfx::setImage(1, getTexture(m_hiZBuffer,      0), 0, bgfx::Access::Write);
-			}
+		for (uint8_t lod = 1; lod < m_noofHiZMips; ++lod)
+		{
+			float inputRendertargetSize[4] = { (float)width, (float)height, 2.0f, 2.0f };
+			bgfx::setUniform(u_inputRTSize, inputRendertargetSize);
+
+			// down scale mip 1 onwards
+			width /= 2;
+			height /= 2;
+
+			bgfx::setImage(0, getTexture(m_hiZBuffer, 0), lod - 1, bgfx::Access::Read);
+			bgfx::setImage(1, getTexture(m_hiZBuffer, 0), lod,     bgfx::Access::Write);

 			bgfx::dispatch(RENDER_PASS_HIZ_DOWNSCALE_ID, m_programDownscaleHiZ, width/16, height/16);
 		}
@ -1086,6 +1088,7 @@ public:

 	bgfx::ProgramHandle m_programMainPass;
 	bgfx::ProgramHandle m_programOcclusionPass;
+	bgfx::ProgramHandle m_programCopyZ;
 	bgfx::ProgramHandle m_programDownscaleHiZ;
 	bgfx::ProgramHandle m_programOccludeProps;
 	bgfx::ProgramHandle m_programStreamCompaction;