Parallelised compute shader in example 48 (#2906)

* Parallelised compute shader in example 48

* Ex48 - Dispatch numToDraw/64 workgroups of 64 local threads

* fixed vs build (again)
This commit is contained in:
SnapperTT 2022-08-29 23:44:55 +10:00 committed by GitHub
parent 8d412e16e4
commit 4613bd1e02
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 19 additions and 8 deletions

View File

@ -14,19 +14,26 @@ BUFFER_WR(instanceBufferOut, vec4, 2);
uniform vec4 u_drawParams;
NUM_THREADS(1, 1, 1)
// Use 64*1*1 local threads
NUM_THREADS(64, 1, 1)
void main()
{
int tId = int(gl_GlobalInvocationID.x);
int numDrawItems = int(u_drawParams.x);
int sideSize = int(u_drawParams.y);
float time = u_drawParams.z;
// Prepare draw mtx
// Work out the amount of work we're going to do here
int maxToDraw = min(sideSize*sideSize, numDrawItems);
for (int k = 0; k < maxToDraw; k++) {
int numToDrawPerThread = maxToDraw/64 + 1;
int idxStart = tId*numToDrawPerThread;
int idxMax = min(maxToDraw, (tId+1)*numToDrawPerThread);
// Prepare draw mtx
for (int k = idxStart; k < idxMax; k++) {
int yy = k / sideSize;
int xx = k % sideSize;
@ -58,7 +65,7 @@ void main()
// Fill indirect buffer
for (int k = 0; k < maxToDraw; k++) {
for (int k = idxStart; k < idxMax; k++) {
drawIndexedIndirect(
// Target location params:
indirectBuffer, // target buffer

View File

@ -344,26 +344,30 @@ public:
// The model matrix for each instance is also set on compute
// you could modify this to, eg, do frustrum culling on the GPU
float ud[4] = { float(m_nDrawElements), float(m_sideSize), float(time), 0 };
uint32_t numToDraw = (m_sideSize*m_sideSize);
bgfx::setUniform(u_drawParams, ud);
bgfx::setBuffer(0, m_object_list_buffer, bgfx::Access::Read);
bgfx::setBuffer(1, m_indirect_buffer_handle, bgfx::Access::Write);
bgfx::setBuffer(2, m_instance_buffer, bgfx::Access::Write);
bgfx::dispatch(0, m_indirect_program);
// Dispatch the call. We are using 64 local threads on the GPU to process the object list
// So lets dispatch ceil(numToDraw/64) workgroups of 64 local threads
bgfx::dispatch(0, m_indirect_program, uint32_t(numToDraw/64 + 1), 1, 1);
// Submit our 1 draw call
// Set vertex and index buffer.
bgfx::setIndexBuffer(m_ibh);
bgfx::setVertexBuffer(0, m_vbh);
bgfx::setInstanceDataBuffer(m_instance_buffer, 0, m_sideSize*m_sideSize);
bgfx::setInstanceDataBuffer(m_instance_buffer, 0, numToDraw);
// Set render states.
bgfx::setState(BGFX_STATE_DEFAULT);
// Submit primitive for rendering to view 0.
// note that this submission requires the draw count
bgfx::submit(0, m_program, m_indirect_buffer_handle, 0, uint16_t(m_sideSize*m_sideSize));
bgfx::submit(0, m_program, m_indirect_buffer_handle, 0, uint16_t(numToDraw));
}
else
{