mirror of https://github.com/bkaradzic/bgfx
Parallelised compute shader in example 48 (#2906)
* Parallelised compute shader in example 48 * Ex48 - Dispatch numToDraw/64 workgroups of 64 local threads * fixed vs build (again)
This commit is contained in:
parent
8d412e16e4
commit
4613bd1e02
|
@ -14,19 +14,26 @@ BUFFER_WR(instanceBufferOut, vec4, 2);
|
|||
|
||||
uniform vec4 u_drawParams;
|
||||
|
||||
NUM_THREADS(1, 1, 1)
|
||||
// Use 64*1*1 local threads
|
||||
NUM_THREADS(64, 1, 1)
|
||||
|
||||
void main()
|
||||
{
|
||||
int tId = int(gl_GlobalInvocationID.x);
|
||||
int numDrawItems = int(u_drawParams.x);
|
||||
int sideSize = int(u_drawParams.y);
|
||||
float time = u_drawParams.z;
|
||||
|
||||
// Prepare draw mtx
|
||||
|
||||
// Work out the amount of work we're going to do here
|
||||
int maxToDraw = min(sideSize*sideSize, numDrawItems);
|
||||
|
||||
for (int k = 0; k < maxToDraw; k++) {
|
||||
int numToDrawPerThread = maxToDraw/64 + 1;
|
||||
|
||||
int idxStart = tId*numToDrawPerThread;
|
||||
int idxMax = min(maxToDraw, (tId+1)*numToDrawPerThread);
|
||||
|
||||
// Prepare draw mtx
|
||||
for (int k = idxStart; k < idxMax; k++) {
|
||||
int yy = k / sideSize;
|
||||
int xx = k % sideSize;
|
||||
|
||||
|
@ -58,7 +65,7 @@ void main()
|
|||
|
||||
// Fill indirect buffer
|
||||
|
||||
for (int k = 0; k < maxToDraw; k++) {
|
||||
for (int k = idxStart; k < idxMax; k++) {
|
||||
drawIndexedIndirect(
|
||||
// Target location params:
|
||||
indirectBuffer, // target buffer
|
||||
|
|
|
@ -344,26 +344,30 @@ public:
|
|||
// The model matrix for each instance is also set on compute
|
||||
// you could modify this to, eg, do frustrum culling on the GPU
|
||||
float ud[4] = { float(m_nDrawElements), float(m_sideSize), float(time), 0 };
|
||||
uint32_t numToDraw = (m_sideSize*m_sideSize);
|
||||
|
||||
bgfx::setUniform(u_drawParams, ud);
|
||||
|
||||
bgfx::setBuffer(0, m_object_list_buffer, bgfx::Access::Read);
|
||||
bgfx::setBuffer(1, m_indirect_buffer_handle, bgfx::Access::Write);
|
||||
bgfx::setBuffer(2, m_instance_buffer, bgfx::Access::Write);
|
||||
|
||||
bgfx::dispatch(0, m_indirect_program);
|
||||
// Dispatch the call. We are using 64 local threads on the GPU to process the object list
|
||||
// So lets dispatch ceil(numToDraw/64) workgroups of 64 local threads
|
||||
bgfx::dispatch(0, m_indirect_program, uint32_t(numToDraw/64 + 1), 1, 1);
|
||||
|
||||
// Submit our 1 draw call
|
||||
// Set vertex and index buffer.
|
||||
bgfx::setIndexBuffer(m_ibh);
|
||||
bgfx::setVertexBuffer(0, m_vbh);
|
||||
bgfx::setInstanceDataBuffer(m_instance_buffer, 0, m_sideSize*m_sideSize);
|
||||
bgfx::setInstanceDataBuffer(m_instance_buffer, 0, numToDraw);
|
||||
|
||||
// Set render states.
|
||||
bgfx::setState(BGFX_STATE_DEFAULT);
|
||||
|
||||
// Submit primitive for rendering to view 0.
|
||||
// note that this submission requires the draw count
|
||||
bgfx::submit(0, m_program, m_indirect_buffer_handle, 0, uint16_t(m_sideSize*m_sideSize));
|
||||
bgfx::submit(0, m_program, m_indirect_buffer_handle, 0, uint16_t(numToDraw));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue