Merge pull request #1655 from attilaz/37-gdr-ogl

37-gpudrivenrendering fix for OpenGL
This commit is contained in:
Бранимир Караџић 2019-02-13 10:38:18 -08:00 committed by GitHub
commit 0997b034ed
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 68 additions and 40 deletions

View File

@ -0,0 +1,26 @@
/*
* Copyright 2018 Kostas Anagnostou. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#include "bgfx_compute.sh"
SAMPLER2D(s_texOcclusionDepth, 0);
IMAGE2D_WR(s_texOcclusionDepthOut, r32f, 1);
uniform vec4 u_inputRTSize;
NUM_THREADS(16, 16, 1)
void main()
{
// this shader can be used to both copy a mip over to the output and downscale it.
ivec2 coord = ivec2(gl_GlobalInvocationID.xy);
if (all(lessThan(coord.xy, u_inputRTSize.xy) ) )
{
float maxDepth = texelFetch(s_texOcclusionDepth, coord.xy, 0).x;
imageStore(s_texOcclusionDepthOut, coord, vec4(maxDepth,0,0,1) );
}
}

View File

@ -21,26 +21,18 @@ void main()
{ {
float maxDepth = 1.0; float maxDepth = 1.0;
if (u_inputRTSize.z > 1) vec4 depths = vec4(
{ imageLoad(s_texOcclusionDepthIn, ivec2(u_inputRTSize.zw * coord.xy ) ).x
vec4 depths = vec4( , imageLoad(s_texOcclusionDepthIn, ivec2(u_inputRTSize.zw * coord.xy + ivec2(1.0, 0.0) ) ).x
imageLoad(s_texOcclusionDepthIn, ivec2(u_inputRTSize.zw * coord.xy ) ).x , imageLoad(s_texOcclusionDepthIn, ivec2(u_inputRTSize.zw * coord.xy + ivec2(0.0, 1.0) ) ).x
, imageLoad(s_texOcclusionDepthIn, ivec2(u_inputRTSize.zw * coord.xy + ivec2(1.0, 0.0) ) ).x , imageLoad(s_texOcclusionDepthIn, ivec2(u_inputRTSize.zw * coord.xy + ivec2(1.0, 1.0) ) ).x
, imageLoad(s_texOcclusionDepthIn, ivec2(u_inputRTSize.zw * coord.xy + ivec2(0.0, 1.0) ) ).x );
, imageLoad(s_texOcclusionDepthIn, ivec2(u_inputRTSize.zw * coord.xy + ivec2(1.0, 1.0) ) ).x
);
// find and return max depth // find and return max depth
maxDepth = max( maxDepth = max(
max(depths.x, depths.y) max(depths.x, depths.y)
, max(depths.z, depths.w) , max(depths.z, depths.w)
); );
}
else
{
// do not downscale, just copy the value over to the output rendertarget
maxDepth = imageLoad(s_texOcclusionDepthIn, coord.xy).x;
}
imageStore(s_texOcclusionDepthOut, coord, vec4(maxDepth,0,0,1) ); imageStore(s_texOcclusionDepthOut, coord, vec4(maxDepth,0,0,1) );
} }

View File

@ -51,6 +51,9 @@ void main()
//transform World space aaBox to NDC //transform World space aaBox to NDC
vec4 clipPos = mul( u_viewProj, vec4(boxCorners[i], 1) ); vec4 clipPos = mul( u_viewProj, vec4(boxCorners[i], 1) );
#if BGFX_SHADER_LANGUAGE_GLSL
clipPos.z = 0.5 * ( clipPos.z + clipPos.w );
#endif
clipPos.z = max(clipPos.z, 0); clipPos.z = max(clipPos.z, 0);
clipPos.xyz = clipPos.xyz / clipPos.w; clipPos.xyz = clipPos.xyz / clipPos.w;
@ -83,6 +86,10 @@ void main()
if (dims.x <= 2 && dims.y <= 2) if (dims.x <= 2 && dims.y <= 2)
mip = level_lower; mip = level_lower;
#if BGFX_SHADER_LANGUAGE_GLSL
boxUVs.y = 1.0 - boxUVs.y;
boxUVs.w = 1.0 - boxUVs.w;
#endif
//load depths from high z buffer //load depths from high z buffer
vec4 depth = vec4 depth =
{ {

View File

@ -32,8 +32,8 @@ void main()
int NoofDrawcalls = int(u_cullingConfig.w); int NoofDrawcalls = int(u_cullingConfig.w);
int offset = 1; int offset = 1;
temp[2 * tID ] = uint(instancePredicates[2 * tID ]); // load input into shared memory temp[2 * tID ] = uint(instancePredicates[2 * tID ] ? 1 : 0); // load input into shared memory
temp[2 * tID + 1] = uint(instancePredicates[2 * tID + 1]); temp[2 * tID + 1] = uint(instancePredicates[2 * tID + 1] ? 1 : 0);
int d; int d;

View File

@ -604,6 +604,7 @@ public:
// Create programs from shaders for occlusion pass. // Create programs from shaders for occlusion pass.
m_programOcclusionPass = loadProgram("vs_gdr_render_occlusion", NULL); m_programOcclusionPass = loadProgram("vs_gdr_render_occlusion", NULL);
m_programCopyZ = loadProgram("cs_gdr_copy_z", NULL);
m_programDownscaleHiZ = loadProgram("cs_gdr_downscale_hi_z", NULL); m_programDownscaleHiZ = loadProgram("cs_gdr_downscale_hi_z", NULL);
m_programOccludeProps = loadProgram("cs_gdr_occlude_props", NULL); m_programOccludeProps = loadProgram("cs_gdr_occlude_props", NULL);
m_programStreamCompaction = loadProgram("cs_gdr_stream_compaction", NULL); m_programStreamCompaction = loadProgram("cs_gdr_stream_compaction", NULL);
@ -706,6 +707,7 @@ public:
bgfx::destroy(m_programMainPass); bgfx::destroy(m_programMainPass);
bgfx::destroy(m_programOcclusionPass); bgfx::destroy(m_programOcclusionPass);
bgfx::destroy(m_programCopyZ);
bgfx::destroy(m_programDownscaleHiZ); bgfx::destroy(m_programDownscaleHiZ);
bgfx::destroy(m_programOccludeProps); bgfx::destroy(m_programOccludeProps);
bgfx::destroy(m_programStreamCompaction); bgfx::destroy(m_programStreamCompaction);
@ -813,29 +815,29 @@ public:
uint32_t width = m_hiZwidth; uint32_t width = m_hiZwidth;
uint32_t height = m_hiZheight; uint32_t height = m_hiZheight;
for (uint8_t lod = 0; lod < m_noofHiZMips; ++lod) // copy mip zero over to the hi Z buffer.
// We can't currently use blit as it requires same format and CopyResource is not exposed.
{ {
float coordinateScale = lod > 0 ? 2.0f : 1.0f; float inputRendertargetSize[4] = { (float)width, (float)height, 0.0f, 0.0f };
float inputRendertargetSize[4] = { (float)width, (float)height, coordinateScale, coordinateScale };
bgfx::setUniform(u_inputRTSize, inputRendertargetSize); bgfx::setUniform(u_inputRTSize, inputRendertargetSize);
if (lod > 0) bgfx::setTexture(0, s_texOcclusionDepth, getTexture(m_hiZDepthBuffer, 0));
{ bgfx::setImage(1, getTexture(m_hiZBuffer, 0), 0, bgfx::Access::Write);
// down scale mip 1 onwards
width /= 2; bgfx::dispatch(RENDER_PASS_HIZ_DOWNSCALE_ID, m_programCopyZ, width/16, height/16);
height /= 2; }
bgfx::setImage(0, getTexture(m_hiZBuffer, 0), lod - 1, bgfx::Access::Read); for (uint8_t lod = 1; lod < m_noofHiZMips; ++lod)
bgfx::setImage(1, getTexture(m_hiZBuffer, 0), lod, bgfx::Access::Write); {
} float inputRendertargetSize[4] = { (float)width, (float)height, 2.0f, 2.0f };
else bgfx::setUniform(u_inputRTSize, inputRendertargetSize);
{
// copy mip zero over to the hi Z buffer. // down scale mip 1 onwards
// We can't currently use blit as it requires same format and CopyResource is not exposed. width /= 2;
bgfx::setImage(0, getTexture(m_hiZDepthBuffer, 0), 0, bgfx::Access::Read); height /= 2;
bgfx::setImage(1, getTexture(m_hiZBuffer, 0), 0, bgfx::Access::Write);
} bgfx::setImage(0, getTexture(m_hiZBuffer, 0), lod - 1, bgfx::Access::Read);
bgfx::setImage(1, getTexture(m_hiZBuffer, 0), lod, bgfx::Access::Write);
bgfx::dispatch(RENDER_PASS_HIZ_DOWNSCALE_ID, m_programDownscaleHiZ, width/16, height/16); bgfx::dispatch(RENDER_PASS_HIZ_DOWNSCALE_ID, m_programDownscaleHiZ, width/16, height/16);
} }
@ -1086,6 +1088,7 @@ public:
bgfx::ProgramHandle m_programMainPass; bgfx::ProgramHandle m_programMainPass;
bgfx::ProgramHandle m_programOcclusionPass; bgfx::ProgramHandle m_programOcclusionPass;
bgfx::ProgramHandle m_programCopyZ;
bgfx::ProgramHandle m_programDownscaleHiZ; bgfx::ProgramHandle m_programDownscaleHiZ;
bgfx::ProgramHandle m_programOccludeProps; bgfx::ProgramHandle m_programOccludeProps;
bgfx::ProgramHandle m_programStreamCompaction; bgfx::ProgramHandle m_programStreamCompaction;