Initial commit of gpu occlusion culling with multidraw indirect example (#1344)
This commit is contained in:
parent
985f0b2436
commit
e474666a55
44
examples/37-gpudrivenrendering/cs_downscaleHiZ.sc
Normal file
44
examples/37-gpudrivenrendering/cs_downscaleHiZ.sc
Normal file
@ -0,0 +1,44 @@
|
||||
|
||||
/*
|
||||
* Copyright 2018 Kostas Anagnostou. All rights reserved.
|
||||
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
|
||||
*/
|
||||
|
||||
#include "bgfx_compute.sh"
|
||||
|
||||
IMAGE2D_RO(s_texOcclusionDepthIn, r32f, 0);
|
||||
IMAGE2D_WR(s_texOcclusionDepthOut, r32f, 1);
|
||||
|
||||
uniform vec4 u_inputRTSize;
|
||||
|
||||
NUM_THREADS(16, 16, 1)
|
||||
void main()
|
||||
{
|
||||
//this shader can be used to both copy a mip over to the output and downscale it.
|
||||
|
||||
ivec2 coord = gl_GlobalInvocationID.xy;
|
||||
|
||||
if (all(coord.xy < u_inputRTSize.xy))
|
||||
{
|
||||
float maxDepth = 1.0;
|
||||
|
||||
if ( u_inputRTSize.z > 1)
|
||||
{
|
||||
vec4 depths = vec4( imageLoad(s_texOcclusionDepthIn, u_inputRTSize.zw * coord.xy ).r,
|
||||
imageLoad(s_texOcclusionDepthIn, u_inputRTSize.zw * coord.xy + ivec2(1,0) ).r,
|
||||
imageLoad(s_texOcclusionDepthIn, u_inputRTSize.zw * coord.xy + ivec2(0,1)).r,
|
||||
imageLoad(s_texOcclusionDepthIn, u_inputRTSize.zw * coord.xy + ivec2(1,1)).r
|
||||
);
|
||||
|
||||
//find and return max depth
|
||||
maxDepth = max(max(depths.x, depths.y), max(depths.z, depths.w));
|
||||
}
|
||||
else
|
||||
{
|
||||
//do not downscale, just copy the value over to the output rendertarget
|
||||
maxDepth = imageLoad(s_texOcclusionDepthIn, coord.xy ).r;
|
||||
}
|
||||
|
||||
imageStore(s_texOcclusionDepthOut, coord, vec4(maxDepth,0,0,1) );
|
||||
}
|
||||
}
|
106
examples/37-gpudrivenrendering/cs_occludeProps.sc
Normal file
106
examples/37-gpudrivenrendering/cs_occludeProps.sc
Normal file
@ -0,0 +1,106 @@
|
||||
|
||||
/*
|
||||
* Copyright 2018 Kostas Anagnostou. All rights reserved.
|
||||
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
|
||||
*/
|
||||
|
||||
#include "bgfx_compute.sh"
|
||||
|
||||
SAMPLER2D(s_texOcclusionDepth, 0);
|
||||
|
||||
BUFFER_RO(instanceDataIn, vec4, 1);
|
||||
BUFFER_RW(drawcallInstanceCount, uint, 2);
|
||||
BUFFER_WR(instancePredicates, bool, 3);
|
||||
|
||||
uniform vec4 u_inputRTSize;
|
||||
uniform vec4 u_cullingConfig;
|
||||
|
||||
NUM_THREADS(64, 1, 1)
|
||||
void main()
|
||||
{
|
||||
bool predicate = false;
|
||||
|
||||
//make sure that we not processing more instances than available
|
||||
if (gl_GlobalInvocationID.x < (int)u_cullingConfig.x)
|
||||
{
|
||||
//get the bounding box for this instance
|
||||
vec4 bboxMin = instanceDataIn[2 * gl_GlobalInvocationID.x] ;
|
||||
vec3 bboxMax = instanceDataIn[2 * gl_GlobalInvocationID.x + 1].xyz;
|
||||
|
||||
int drawcallID = bboxMin.w;
|
||||
|
||||
//Adapted from http://blog.selfshadow.com/publications/practical-visibility/
|
||||
vec3 bboxSize = bboxMax.xyz - bboxMin.xyz;
|
||||
|
||||
vec3 boxCorners[] = { bboxMin.xyz,
|
||||
bboxMin.xyz + vec3(bboxSize.x,0,0),
|
||||
bboxMin.xyz + vec3(0, bboxSize.y,0),
|
||||
bboxMin.xyz + vec3(0, 0, bboxSize.z),
|
||||
bboxMin.xyz + vec3(bboxSize.xy,0),
|
||||
bboxMin.xyz + vec3(0, bboxSize.yz),
|
||||
bboxMin.xyz + vec3(bboxSize.x, 0, bboxSize.z),
|
||||
bboxMin.xyz + bboxSize.xyz
|
||||
};
|
||||
float minZ = 1;
|
||||
vec2 minXY = vec2(1,1);
|
||||
vec2 maxXY = vec2(0,0);
|
||||
|
||||
[unroll]
|
||||
for (int i = 0; i < 8; i++)
|
||||
{
|
||||
//transform World space aaBox to NDC
|
||||
vec4 clipPos = mul( u_viewProj, vec4(boxCorners[i], 1) );
|
||||
|
||||
clipPos.z = max(clipPos.z, 0);
|
||||
|
||||
clipPos.xyz = clipPos.xyz / clipPos.w;
|
||||
|
||||
clipPos.xy = clamp(clipPos.xy, -1, 1);
|
||||
clipPos.xy = clipPos.xy * vec2(0.5, -0.5) + vec2(0.5, 0.5);
|
||||
|
||||
minXY = min(clipPos.xy, minXY);
|
||||
maxXY = max(clipPos.xy, maxXY);
|
||||
|
||||
minZ = saturate(min(minZ, clipPos.z));
|
||||
}
|
||||
|
||||
vec4 boxUVs = vec4(minXY, maxXY);
|
||||
|
||||
// Calculate hi-Z buffer mip
|
||||
ivec2 size = (maxXY - minXY) * u_inputRTSize.xy;
|
||||
float mip = ceil(log2(max(size.x, size.y)));
|
||||
|
||||
mip = clamp(mip, 0, u_cullingConfig.z);
|
||||
|
||||
// Texel footprint for the lower (finer-grained) level
|
||||
float level_lower = max(mip - 1, 0);
|
||||
vec2 scale = exp2(-level_lower);
|
||||
vec2 a = floor(boxUVs.xy*scale);
|
||||
vec2 b = ceil(boxUVs.zw*scale);
|
||||
vec2 dims = b - a;
|
||||
|
||||
// Use the lower level if we only touch <= 2 texels in both dimensions
|
||||
if (dims.x <= 2 && dims.y <= 2)
|
||||
mip = level_lower;
|
||||
|
||||
//load depths from high z buffer
|
||||
vec4 depth = { texture2DLod(s_texOcclusionDepth, boxUVs.xy, mip).x,
|
||||
texture2DLod(s_texOcclusionDepth, boxUVs.zy, mip).x,
|
||||
texture2DLod(s_texOcclusionDepth, boxUVs.xw, mip).x,
|
||||
texture2DLod(s_texOcclusionDepth, boxUVs.zw, mip).x,
|
||||
};
|
||||
|
||||
//find the max depth
|
||||
float maxDepth = max( max(depth.x, depth.y), max(depth.z, depth.w) );
|
||||
|
||||
if ( minZ <= maxDepth )
|
||||
{
|
||||
predicate = true;
|
||||
|
||||
//increase instance count for this particular prop type
|
||||
InterlockedAdd( drawcallInstanceCount[ drawcallID ], 1);
|
||||
}
|
||||
}
|
||||
|
||||
instancePredicates[gl_GlobalInvocationID.x] = predicate;
|
||||
}
|
122
examples/37-gpudrivenrendering/cs_streamCompaction.sc
Normal file
122
examples/37-gpudrivenrendering/cs_streamCompaction.sc
Normal file
@ -0,0 +1,122 @@
|
||||
|
||||
/*
|
||||
* Copyright 2018 Kostas Anagnostou. All rights reserved.
|
||||
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
|
||||
*/
|
||||
|
||||
#include "bgfx_compute.sh"
|
||||
|
||||
//the per drawcall data that is constant (noof indices and offsets to vertex/index buffers)
|
||||
BUFFER_RO(drawcallConstData, uint, 0);
|
||||
//instance data for all instances (pre culling)
|
||||
BUFFER_RO(instanceDataIn, vec4, 1);
|
||||
//per instance visibility (output of culling pass)
|
||||
BUFFER_RO(instancePredicates, bool, 2);
|
||||
|
||||
//how many instances per drawcall
|
||||
BUFFER_RW(drawcallInstanceCount, uint, 3);
|
||||
//drawcall data that will drive drawIndirect
|
||||
BUFFER_RW(drawcallData, uvec4, 4);
|
||||
//culled instance data
|
||||
BUFFER_WR(instanceDataOut, vec4, 5);
|
||||
|
||||
uniform vec4 u_cullingConfig;
|
||||
|
||||
// Based on Parallel Prefix Sum (Scan) with CUDA by Mark Harris
|
||||
groupshared uint temp[2048];
|
||||
|
||||
NUM_THREADS(1024, 1, 1)
|
||||
void main()
|
||||
{
|
||||
int tID = gl_GlobalInvocationID.x;
|
||||
int NoofInstancesPowOf2 = u_cullingConfig.y;
|
||||
int NoofDrawcalls = u_cullingConfig.w;
|
||||
|
||||
int offset = 1;
|
||||
temp[2 * tID] = instancePredicates[2 * tID]; // load input into shared memory
|
||||
temp[2 * tID + 1] = instancePredicates[2 * tID + 1];
|
||||
|
||||
int d;
|
||||
|
||||
//perform reduction
|
||||
for (d = NoofInstancesPowOf2 >> 1; d > 0; d >>= 1)
|
||||
{
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
||||
if (tID < d)
|
||||
{
|
||||
int ai = offset * (2 * tID + 1) - 1;
|
||||
int bi = offset * (2 * tID + 2) - 1;
|
||||
temp[bi] += temp[ai];
|
||||
}
|
||||
offset *= 2;
|
||||
}
|
||||
|
||||
// clear the last element
|
||||
if (tID == 0)
|
||||
temp[NoofInstancesPowOf2 - 1] = 0;
|
||||
|
||||
//perform downsweep and build scan
|
||||
for ( d = 1; d < NoofInstancesPowOf2; d *= 2)
|
||||
{
|
||||
offset >>= 1;
|
||||
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
||||
if (tID < d)
|
||||
{
|
||||
int ai = offset * (2 * tID + 1) - 1;
|
||||
int bi = offset * (2 * tID + 2) - 1;
|
||||
int t = temp[ai];
|
||||
temp[ai] = temp[bi];
|
||||
temp[bi] += t;
|
||||
}
|
||||
}
|
||||
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
||||
int index = 2 * tID;
|
||||
|
||||
//scatter results
|
||||
if (instancePredicates[index] != 0)
|
||||
{
|
||||
instanceDataOut[ 4 * temp[index] ] = instanceDataIn[ 4 * index ];
|
||||
instanceDataOut[ 4 * temp[index] + 1 ] = instanceDataIn[ 4 * index + 1 ];
|
||||
instanceDataOut[ 4 * temp[index] + 2 ] = instanceDataIn[ 4 * index + 2 ];
|
||||
instanceDataOut[ 4 * temp[index] + 3 ] = instanceDataIn[ 4 * index + 3 ];
|
||||
}
|
||||
|
||||
index = 2 * tID + 1;
|
||||
|
||||
if (instancePredicates[index] != 0)
|
||||
{
|
||||
instanceDataOut[ 4 * temp[index] ] = instanceDataIn[ 4 * index ];
|
||||
instanceDataOut[ 4 * temp[index] + 1 ] = instanceDataIn[ 4 * index + 1 ];
|
||||
instanceDataOut[ 4 * temp[index] + 2 ] = instanceDataIn[ 4 * index + 2 ];
|
||||
instanceDataOut[ 4 * temp[index] + 3 ] = instanceDataIn[ 4 * index + 3 ];
|
||||
}
|
||||
|
||||
if (tID == 0)
|
||||
{
|
||||
uint startInstance = 0;
|
||||
|
||||
//copy data to indirect buffer, could possible be done in a different compute shader
|
||||
for (int k = 0; k < NoofDrawcalls; k++)
|
||||
{
|
||||
drawIndexedIndirect(
|
||||
drawcallData,
|
||||
k,
|
||||
drawcallConstData[ k * 3 ], //number of indices
|
||||
drawcallInstanceCount[k], //number of instances
|
||||
drawcallConstData[ k * 3 + 1 ], //offset into the index buffer
|
||||
drawcallConstData[ k * 3 + 2 ], //offset into the vertex buffer
|
||||
startInstance //offset into the instance buffer
|
||||
);
|
||||
|
||||
startInstance += drawcallInstanceCount[k];
|
||||
|
||||
drawcallInstanceCount[k] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,24 @@
|
||||
$input v_materialID
|
||||
|
||||
/*
|
||||
* Copyright 2018 Kostas Anagnostou. All rights reserved.
|
||||
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
|
||||
*/
|
||||
|
||||
#include "../common/common.sh"
|
||||
|
||||
uniform vec4 u_colour[50];
|
||||
|
||||
void main()
|
||||
{
|
||||
vec4 colour = u_colour[v_materialID.x];
|
||||
|
||||
if ( colour.w < 1.0f )
|
||||
{
|
||||
//render dithered alpha
|
||||
if ( (gl_FragCoord.x % 2) == (gl_FragCoord.y % 2) )
|
||||
discard;
|
||||
}
|
||||
|
||||
gl_FragColor = vec4( colour.xyz,1 );
|
||||
}
|
13
examples/37-gpudrivenrendering/fs_renderOcclusion.sc
Normal file
13
examples/37-gpudrivenrendering/fs_renderOcclusion.sc
Normal file
@ -0,0 +1,13 @@
|
||||
|
||||
/*
|
||||
* Copyright 2018 Kostas Anagnostou. All rights reserved.
|
||||
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
|
||||
*/
|
||||
|
||||
#include "../common/common.sh"
|
||||
|
||||
//dummy shader for occlusion buffer pass until bgfx supports rendering with null shader
|
||||
void main()
|
||||
{
|
||||
gl_FragColor = vec4(0, 0, 0, 0);
|
||||
}
|
1229
examples/37-gpudrivenrendering/gpudrivenrendering.cpp
Normal file
1229
examples/37-gpudrivenrendering/gpudrivenrendering.cpp
Normal file
File diff suppressed because it is too large
Load Diff
10
examples/37-gpudrivenrendering/makefile
Normal file
10
examples/37-gpudrivenrendering/makefile
Normal file
@ -0,0 +1,10 @@
|
||||
#
|
||||
# Copyright 2011-2018 Branimir Karadzic. All rights reserved.
|
||||
# License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
|
||||
#
|
||||
|
||||
BGFX_DIR=../..
|
||||
RUNTIME_DIR=$(BGFX_DIR)/examples/runtime
|
||||
BUILD_DIR=../../.build
|
||||
|
||||
include $(BGFX_DIR)/scripts/shader.mk
|
7
examples/37-gpudrivenrendering/varying.def.sc
Normal file
7
examples/37-gpudrivenrendering/varying.def.sc
Normal file
@ -0,0 +1,7 @@
|
||||
uint v_materialID : TEXCOORD0;
|
||||
|
||||
vec3 a_position : POSITION;
|
||||
vec4 i_data0 : TEXCOORD7;
|
||||
vec4 i_data1 : TEXCOORD6;
|
||||
vec4 i_data2 : TEXCOORD5;
|
||||
vec4 i_data3 : TEXCOORD4;
|
4
examples/37-gpudrivenrendering/varying_pos_tex0.def.sc
Normal file
4
examples/37-gpudrivenrendering/varying_pos_tex0.def.sc
Normal file
@ -0,0 +1,4 @@
|
||||
vec2 v_texcoord0 : TEXCOORD0;
|
||||
|
||||
vec3 a_position : POSITION;
|
||||
vec2 a_texcoord0 : TEXCOORD0;
|
@ -0,0 +1,24 @@
|
||||
$input a_position, i_data0, i_data1, i_data2, i_data3
|
||||
$output v_materialID
|
||||
|
||||
/*
|
||||
* Copyright 2018 Kostas Anagnostou. All rights reserved.
|
||||
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
|
||||
*/
|
||||
|
||||
#include "../common/common.sh"
|
||||
|
||||
void main()
|
||||
{
|
||||
mat4 model;
|
||||
model[0] = i_data0;
|
||||
model[1] = i_data1;
|
||||
model[2] = i_data2;
|
||||
model[3] = i_data3;
|
||||
|
||||
v_materialID = i_data0.w;
|
||||
model[0][3] = 0;
|
||||
|
||||
vec4 worldPos = instMul(model, vec4(a_position, 1.0) );
|
||||
gl_Position = mul(u_viewProj, worldPos);
|
||||
}
|
20
examples/37-gpudrivenrendering/vs_renderOcclusion.sc
Normal file
20
examples/37-gpudrivenrendering/vs_renderOcclusion.sc
Normal file
@ -0,0 +1,20 @@
|
||||
$input a_position, i_data0, i_data1, i_data2, i_data3, i_data4
|
||||
|
||||
/*
|
||||
* Copyright 2018 Kostas Anagnostou. All rights reserved.
|
||||
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
|
||||
*/
|
||||
|
||||
#include "../common/common.sh"
|
||||
|
||||
void main()
|
||||
{
|
||||
mat4 model;
|
||||
model[0] = i_data0;
|
||||
model[1] = i_data1;
|
||||
model[2] = i_data2;
|
||||
model[3] = i_data3;
|
||||
|
||||
vec4 worldPos = instMul(model, vec4(a_position, 1.0) );
|
||||
gl_Position = mul(u_viewProj, worldPos);
|
||||
}
|
Loading…
Reference in New Issue
Block a user