example 41-tess (#1821)

* new example

new tess example

* renaming

Rename clock to counter...

* clean

Remove comments

* clean up

removed vector and const char* path

* ups

Removing compiled shaders...
This commit is contained in:
DanielGavin 2019-07-21 02:04:35 +02:00 committed by Бранимир Караџић
parent bc419eed10
commit caccb56707
18 changed files with 1543 additions and 0 deletions

View File

@ -0,0 +1,209 @@
#pragma once
#include "common.h"
const char* shader_options[] = {
"Normal",
"Diffuse"
};
////Instanced patch geometry at various subdiv levels////
//gpuSubd == 0
const float verticesL0[] = {
0.0f, 0.0f,
1.0f, 0.0f,
0.0f, 1.0f
};
const uint32_t indexesL0[] = { 0u, 1u, 2u };
//gpuSubd == 1
const float verticesL1[] = {
0.0f, 1.0f,
0.5f, 0.5f,
0.0f, 0.5f,
0.0f, 0.0f,
0.5f, 0.0f,
1.0f, 0.0f
};
const uint32_t indexesL1[] = {
1u, 0u, 2u,
1u, 2u, 3u,
1u, 3u, 4u,
1u, 4u, 5u
};
//gpuSubd == 2
const float verticesL2[] = {
0.25f, 0.75f,
0.0f, 1.0f,
0.0f, 0.75f,
0.0f, 0.5f,
0.25f, 0.5f,
0.5f, 0.5f,
0.25f, 0.25f,
0.0f, 0.25f,
0.0f, 0.0f,
0.25f, 0.0f,
0.5f, 0.0f,
0.5f, 0.25f,
0.75f, 0.25f,
0.75f, 0.0f,
1.0f, 0.0f //14
};
const uint32_t indexesL2[] = {
0u, 1u, 2u,
0u, 2u, 3u,
0u, 3u, 4u,
0u, 4u, 5u,
6u, 5u, 4u,
6u, 4u, 3u,
6u, 3u, 7u,
6u, 7u, 8u,
6u, 8u, 9u,
6u, 9u, 10u,
6u, 10u, 11u,
6u, 11u, 5u,
12u, 5u, 11u,
12u, 11u, 10u,
12u, 10u, 13u,
12u, 13u, 14u
};
//gpuSubd == 3
const float verticesL3[] = {
0.25f*0.5f, 0.75f*0.5f + 0.5f,
0.0f*0.5f, 1.0f*0.5f + 0.5f,
0.0f*0.5f, 0.75f*0.5f + 0.5f,
0.0f*0.5f , 0.5f*0.5f + 0.5f,
0.25f*0.5f, 0.5f*0.5f + 0.5f,
0.5f*0.5f, 0.5f*0.5f + 0.5f,
0.25f*0.5f, 0.25f*0.5f + 0.5f,
0.0f*0.5f, 0.25f*0.5f + 0.5f,
0.0f*0.5f, 0.0f*0.5f + 0.5f,
0.25f*0.5f, 0.0f*0.5f + 0.5f,
0.5f*0.5f, 0.0f*0.5f + 0.5f,
0.5f*0.5f, 0.25f*0.5f + 0.5f,
0.75f*0.5f, 0.25f*0.5f + 0.5f,
0.75f*0.5f, 0.0f*0.5f + 0.5f,
1.0f*0.5f, 0.0f*0.5f + 0.5f, //14
0.375f, 0.375f,
0.25f, 0.375f,
0.25f, 0.25f,
0.375f, 0.25f,
0.5f, 0.25f,
0.5f, 0.375f, //20
0.125f, 0.375f,
0.0f, 0.375f,
0.0f, 0.25f,
0.125f, 0.25f, //24
0.125f, 0.125f,
0.0f, 0.125f,
0.0f, 0.0f,
0.125f, 0.0f,
0.25f, 0.0f,
0.25f, 0.125f, //30
0.375f, 0.125f,
0.375f, 0.0f,
0.5f, 0.0f,
0.5f, 0.125f, //34
0.625f, 0.375f,
0.625f, 0.25f,
0.75f, 0.25f, //37
0.625f, 0.125f,
0.625f, 0.0f,
0.75f, 0.0f,
0.75f, 0.125f, //41
0.875f, 0.125f,
0.875f, 0.0f,
1.0f, 0.0f //44
};
const uint32_t indexesL3[] = {
0u, 1u, 2u,
0u, 2u, 3u,
0u, 3u, 4u,
0u, 4u, 5u,
6u, 5u, 4u,
6u, 4u, 3u,
6u, 3u, 7u,
6u, 7u, 8u,
6u, 8u, 9u,
6u, 9u, 10u,
6u, 10u, 11u,
6u, 11u, 5u,
12u, 5u, 11u,
12u, 11u, 10u,
12u, 10u, 13u,
12u, 13u, 14u, //End fo first big triangle
15u, 14u, 13u,
15u, 13u, 10u,
15u, 10u, 16u,
15u, 16u, 17u,
15u, 17u, 18u,
15u, 18u, 19u,
15u, 19u, 20u,
15u, 20u, 14u,
21u, 10u, 9u,
21u, 9u, 8u,
21u, 8u, 22u,
21u, 22u, 23u,
21u, 23u, 24u,
21u, 24u, 17u,
21u, 17u, 16u,
21u, 16u, 10u,
25u, 17u, 24u,
25u, 24u, 23u,
25u, 23u, 26u,
25u, 26u, 27u,
25u, 27u, 28u,
25u, 28u, 29u,
25u, 29u, 30u,
25u, 30u, 17u,
31u, 19u, 18u,
31u, 18u, 17u,
31u, 17u, 30u,
31u, 30u, 29u,
31u, 29u, 32u,
31u, 32u, 33u,
31u, 33u, 34u,
31u, 34u, 19u,
35u, 14u, 20u,
35u, 20u, 19u,
35u, 19u, 36u,
35u, 36u, 37u,
38u, 37u, 36u,
38u, 36u, 19u,
38u, 19u, 34u,
38u, 34u, 33u,
38u, 33u, 39u,
38u, 39u, 40u,
38u, 40u, 41u,
38u, 41u, 37u,
42u, 37u, 41u,
42u, 41u, 40u,
42u, 40u, 43u,
42u, 43u, 44u
};

View File

@ -0,0 +1,49 @@
#include "bgfx_compute.sh"
#include "uniforms.sh"
BUFFER_WR(u_SubdBufferOut, uint, 1);
BUFFER_RW(u_CulledSubdBuffer, uint, 2);
BUFFER_RW(indirectBuffer, uvec4, 3);
BUFFER_RW(atomicCounterBuffer, uint, 4);
BUFFER_WR(u_SubdBufferIn, uint, 8);
NUM_THREADS(1u, 1u, 1u)
void main()
{
uint subd = 6 << (2 * u_gpu_subd - 1);
if((2 * u_gpu_subd - 1) <= 0) {
subd = 3u;
}
drawIndexedIndirect(indirectBuffer, 0u, subd, 0u, 0u, 0u, 0u);
dispatchIndirect(indirectBuffer, 1u, 2u / UPDATE_INDIRECT_VALUE_DIVIDE + 1u, 1u, 1u);
u_SubdBufferOut[0] = 0;
u_SubdBufferOut[1] = 1;
u_SubdBufferOut[2] = 1;
u_SubdBufferOut[3] = 1;
u_CulledSubdBuffer[0] = 0;
u_CulledSubdBuffer[1] = 1;
u_CulledSubdBuffer[2] = 1;
u_CulledSubdBuffer[3] = 1;
u_SubdBufferIn[0] = 0;
u_SubdBufferIn[1] = 1;
u_SubdBufferIn[2] = 1;
u_SubdBufferIn[3] = 1;
uint tmp;
atomicFetchAndExchange(atomicCounterBuffer[0], 0, tmp);
atomicFetchAndExchange(atomicCounterBuffer[1], 0, tmp);
atomicFetchAndExchange(atomicCounterBuffer[2], 2, tmp);
}

View File

@ -0,0 +1,86 @@
////////////////////////////////////////////////////////////////////////////////
// Implicit Subdivision Shader for Terrain Rendering
//
#include "terrain_common.sh"
#include "fcull.sh"
BUFFER_RO(u_SubdBufferIn, uint, 8);
BUFFER_RW(u_CulledSubdBuffer, uint, 2);
BUFFER_RO(u_VertexBuffer, vec4, 6);
BUFFER_RO(u_IndexBuffer, uint, 7);
// -----------------------------------------------------------------------------
/**
* Compute LoD Shader
*
* This compute shader is responsible for updating the subdivision
* buffer and visible buffer that will be sent to the rasterizer.
*/
NUM_THREADS(COMPUTE_THREAD_COUNT, 1u, 1u)
void main()
{
// get threadID (each key is associated to a thread)
uint threadID = gl_GlobalInvocationID.x;
if (threadID >= u_AtomicCounterBuffer[2])
return;
// get coarse triangle associated to the key
uint primID = u_SubdBufferIn[threadID*2];
vec4 v_in[3];
v_in[0] = u_VertexBuffer[u_IndexBuffer[primID * 3 ]];
v_in[1] = u_VertexBuffer[u_IndexBuffer[primID * 3 + 1]];
v_in[2] = u_VertexBuffer[u_IndexBuffer[primID * 3 + 2]];
// compute distance-based LOD
uint key = u_SubdBufferIn[threadID*2+1];
vec4 v[3];
vec4 vp[3];
subd(key, v_in, v, vp);
uint targetLod; uint parentLod;
if(u_freeze == 0) {
targetLod = uint(computeLod(v));
parentLod = uint(computeLod(vp));
}
else {
targetLod = parentLod = findMSB(key);
}
updateSubdBuffer(primID, key, targetLod, parentLod);
// Cull invisible nodes
mat4 mvp = u_modelViewProj;
vec4 bmin = min(min(v[0], v[1]), v[2]);
vec4 bmax = max(max(v[0], v[1]), v[2]);
// account for displacement in bound computations
bmin.z = 0;
bmax.z = u_DmapFactor;
// update CulledSubdBuffer
if (u_cull == 0 || frustumCullingTest(mvp, bmin.xyz, bmax.xyz)) {
// write key
uint idx = 0;
atomicFetchAndAdd(u_AtomicCounterBuffer[1], 2, idx);
u_CulledSubdBuffer[idx] = primID;
u_CulledSubdBuffer[idx+1] = key;
}
}

View File

@ -0,0 +1,20 @@
#include "bgfx_compute.sh"
#include "uniforms.sh"
BUFFER_RW(indirectBuffer, uvec4, 3);
BUFFER_RW(atomicCounterBuffer, uint, 4);
NUM_THREADS(1u, 1u, 1u)
void main()
{
uint counter = atomicCounterBuffer[1];
uint subd = 6 << (2 * u_gpu_subd - 1);
if((2 * u_gpu_subd - 1) <= 0) {
subd = 3u;
}
drawIndexedIndirect(indirectBuffer, 0, subd, counter / 2, 0u, 0u, 0u);
}

View File

@ -0,0 +1,24 @@
#include "bgfx_compute.sh"
#include "uniforms.sh"
BUFFER_RW(indirectBuffer, uvec4, 3);
BUFFER_RW(atomicCounterBuffer, uint, 4);
NUM_THREADS(1u, 1u, 1u)
void main()
{
uint counter;
uint counter2;
atomicFetchAndExchange(atomicCounterBuffer[0], 0u, counter);
atomicFetchAndExchange(atomicCounterBuffer[1], 0u, counter2);
uint cnt = (counter / 2u) / UPDATE_INDIRECT_VALUE_DIVIDE + 1u;
uint tmp;
atomicFetchAndExchange(atomicCounterBuffer[2], (counter / 2), tmp);
dispatchIndirect(indirectBuffer, 1u, cnt, 1u, 1u);
}

78
examples/41-tess/fcull.sh Normal file
View File

@ -0,0 +1,78 @@
//////////////////////////////////////////////////////////////////////////////
//
// Frustum Culling API
//
bool frustumCullingTest(mat4 mvp, vec3 bmin, vec3 bmax);
//
//
//// end header file /////////////////////////////////////////////////////
// *****************************************************************************
// Frustum Implementation
struct Frustum {
vec4 planes[6];
};
/**
* Extract Frustum Planes from MVP Matrix
*
* Based on "Fast Extraction of Viewing Frustum Planes from the World-
* View-Projection Matrix", by Gil Gribb and Klaus Hartmann.
* This procedure computes the planes of the frustum and normalizes
* them.
*/
void loadFrustum(out Frustum f, mat4 mvp)
{
for (int i = 0; i < 3; ++i)
for (int j = 0; j < 2; ++j) {
f.planes[i*2+j].x = mtxGetElement(mvp, 0, 3) + (j == 0 ? mtxGetElement(mvp, 0, i) : -mtxGetElement(mvp, 0, i));
f.planes[i*2+j].y = mtxGetElement(mvp, 1, 3) + (j == 0 ? mtxGetElement(mvp, 1, i) : -mtxGetElement(mvp, 1, i));
f.planes[i*2+j].z = mtxGetElement(mvp, 2, 3) + (j == 0 ? mtxGetElement(mvp, 2, i) : -mtxGetElement(mvp, 2, i));
f.planes[i*2+j].w = mtxGetElement(mvp, 3, 3) + (j == 0 ? mtxGetElement(mvp, 3, i) : -mtxGetElement(mvp, 3, i));
f.planes[i*2+j]*= length(f.planes[i*2+j].xyz);
}
}
/**
* Negative Vertex of an AABB
*
* This procedure computes the negative vertex of an AABB
* given a normal.
* See the View Frustum Culling tutorial @ LightHouse3D.com
* http://www.lighthouse3d.com/tutorials/view-frustum-culling/geometric-approach-testing-boxes-ii/
*/
vec3 negativeVertex(vec3 bmin, vec3 bmax, vec3 n)
{
bvec3 b = greaterThan(n, vec3(0.0, 0.0, 0.0));
return mix(bmin, bmax, b);
}
/**
* Frustum-AABB Culling Test
*
* This procedure returns true if the AABB is either inside, or in
* intersection with the frustum, and false otherwise.
* The test is based on the View Frustum Culling tutorial @ LightHouse3D.com
* http://www.lighthouse3d.com/tutorials/view-frustum-culling/geometric-approach-testing-boxes-ii/
*/
bool frustumCullingTest(mat4 mvp, vec3 bmin, vec3 bmax)
{
float a = 1.0f;
Frustum f;
loadFrustum(f, mvp);
for (int i = 0; i < 6 && a >= 0.0f; ++i) {
vec3 n = negativeVertex(bmin, bmax, f.planes[i].xyz);
a = dot(vec4(n, 1.0f), f.planes[i]);
}
return (a >= 0.0);
}

View File

@ -0,0 +1,13 @@
$input v_texcoord0
#include "terrain_common.sh"
void main()
{
vec2 s = texture2D(u_SmapSampler, v_texcoord0).rg * u_DmapFactor;
vec3 n = normalize(vec3(-s, 1));
float d = clamp(n.z, 0.0, 1.0) / 3.14159;
vec3 r = vec3(d, d, d);
gl_FragColor = vec4(r, 1);
}

View File

@ -0,0 +1,11 @@
$input v_texcoord0
#include "terrain_common.sh"
void main()
{
vec2 s = texture2D(u_SmapSampler, v_texcoord0).rg * u_DmapFactor;
vec3 n = normalize(vec3(-s, 1));
gl_FragColor = vec4(abs(n), 1);
}

128
examples/41-tess/isubd.sh Normal file
View File

@ -0,0 +1,128 @@
uint findMSB(uint x)
{
uint i;
uint mask;
uint res = -1;
for(i = 0; i < 32; i++) {
mask = 0x80000000 >> i;
if ((x & mask) != 0) {
res = 31 - i;
break;
}
}
return res;
}
uint parentKey(in uint key)
{
return (key >> 1u);
}
void childrenKeys(in uint key, out uint children[2])
{
children[0] = (key << 1u) | 0u;
children[1] = (key << 1u) | 1u;
}
bool isRootKey(in uint key)
{
return (key == 1u);
}
bool isLeafKey(in uint key)
{
return findMSB(key) == 31;
}
bool isChildZeroKey(in uint key)
{
return ((key & 1u) == 0u);
}
// barycentric interpolation
vec3 berp(in vec3 v[3], in vec2 u)
{
return v[0] + u.x * (v[1] - v[0]) + u.y * (v[2] - v[0]);
}
vec4 berp(in vec4 v[3], in vec2 u)
{
return v[0] + u.x * (v[1] - v[0]) + u.y * (v[2] - v[0]);
}
// get xform from bit value
mat3 bitToXform(in uint bit)
{
float b = float(bit);
float c = 1.0f - b;
vec3 c1 = vec3(0.0f, c , b );
vec3 c2 = vec3(0.5f, b , 0.0f);
vec3 c3 = vec3(0.5f, 0.0f, c );
return mtxFromCols(c1, c2, c3);
}
// get xform from key
mat3 keyToXform(in uint key)
{
vec3 c1 = vec3(1.0f, 0.0f, 0.0f);
vec3 c2 = vec3(0.0f, 1.0f, 0.0f);
vec3 c3 = vec3(0.0f, 0.0f, 1.0f);
mat3 xf = mtxFromCols(c1, c2, c3);
while (key > 1u) {
xf = mul(xf, bitToXform(key & 1u));
key = key >> 1u;
}
return xf;
}
// get xform from key as well as xform from parent key
mat3 keyToXform(in uint key, out mat3 xfp)
{
xfp = keyToXform(parentKey(key));
return keyToXform(key);
}
// subdivision routine (vertex position only)
void subd(in uint key, in vec4 v_in[3], out vec4 v_out[3])
{
mat3 xf = keyToXform(key);
mat4x3 m = mtxFromRows(v_in[0], v_in[1], v_in[2]);
mat4x3 v = mul(xf, m);
v_out[0] = mtxGetRow(v, 0);
v_out[1] = mtxGetRow(v, 1);
v_out[2] = mtxGetRow(v, 2);
}
// subdivision routine (vertex position only)
// also computes parent position
void subd(in uint key, in vec4 v_in[3], out vec4 v_out[3], out vec4 v_out_p[3])
{
mat3 xfp; mat3 xf = keyToXform(key, xfp);
mat4x3 m = mtxFromRows(v_in[0], v_in[1], v_in[2]);
mat4x3 v = mul(xf, m);
mat4x3 vp = mul(xfp, m);
v_out[0] = mtxGetRow(v, 0);
v_out[1] = mtxGetRow(v, 1);
v_out[2] = mtxGetRow(v, 2);
v_out_p[0] = mtxGetRow(vp, 0);
v_out_p[1] = mtxGetRow(vp, 1);
v_out_p[2] = mtxGetRow(vp, 2);
}

10
examples/41-tess/makefile Normal file
View File

@ -0,0 +1,10 @@
#
# Copyright 2011-2019 Branimir Karadzic. All rights reserved.
# License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
#
BGFX_DIR=../..
RUNTIME_DIR=$(BGFX_DIR)/examples/runtime
BUILD_DIR=../../.build
include $(BGFX_DIR)/scripts/shader.mk

View File

@ -0,0 +1,68 @@
//I decided to keep the non square matrices definition in the example, since I am still not sure how non square matrices should be treated in bgfx (Daniel Gavin)
#ifndef MATRICES_H_HEADER_GUARD
#define MATRICES_H_HEADER_GUARD
#ifndef __cplusplus
#if BGFX_SHADER_LANGUAGE_HLSL || BGFX_SHADER_LANGUAGE_PSSL || BGFX_SHADER_LANGUAGE_SPIRV || BGFX_SHADER_LANGUAGE_METAL
# define mat3x4 float4x3
# define mat4x3 float3x4
#else
#endif // BGFX_SHADER_LANGUAGE_*
mat4x3 mtxFromRows(vec4 _0, vec4 _1, vec4 _2)
{
#if BGFX_SHADER_LANGUAGE_GLSL
return transpose(mat3x4(_0, _1, _2) );
#else
return mat4x3(_0, _1, _2);
#endif // BGFX_SHADER_LANGUAGE_GLSL
}
vec4 mtxGetRow(mat4x3 _0, uint row)
{
#if BGFX_SHADER_LANGUAGE_GLSL
return vec4(_0[0][row], _0[1][row], _0[2][row], _0[3][row]);
#else
return vec4(_0[row]);
#endif // BGFX_SHADER_LANGUAGE_GLSL
}
vec4 mtxGetRow(mat4 _0, uint row)
{
#if BGFX_SHADER_LANGUAGE_GLSL
return vec4(_0[0][row], _0[1][row], _0[2][row], _0[3][row]);
#else
return vec4(_0[row]);
#endif // BGFX_SHADER_LANGUAGE_GLSL
}
vec4 mtxGetColumn(mat4 _0, uint column)
{
#if BGFX_SHADER_LANGUAGE_GLSL
return vec4(_0[column]);
#else
return vec4(_0[0][column], _0[1][column], _0[2][column], _0[3][column]);
#endif // BGFX_SHADER_LANGUAGE_GLSL
}
float mtxGetElement(mat4 _0, uint column, uint row)
{
#if BGFX_SHADER_LANGUAGE_GLSL
return _0[column][row];
#else
return _0[row][column];
#endif // BGFX_SHADER_LANGUAGE_GLSL
}
#endif // __cplusplus
#endif // MATRICES_H_HEADER_GUARD

View File

@ -0,0 +1,101 @@
#include "bgfx_compute.sh"
#include "matrices.sh"
#include "isubd.sh"
#include "uniforms.sh"
BUFFER_RW(u_AtomicCounterBuffer, uint, 4);
BUFFER_RW(u_SubdBufferOut, uint, 1);
SAMPLER2D(u_DmapSampler, 0); // displacement map
SAMPLER2D(u_SmapSampler, 1); // slope map
// displacement map
float dmap(vec2 pos)
{
return (texture2DLod(u_DmapSampler, pos * 0.5 + 0.5, 0).x) * u_DmapFactor;
}
float distanceToLod(float z, float lodFactor)
{
// Note that we multiply the result by two because the triangles
// edge lengths decreases by half every two subdivision steps.
return -2.0 * log2(clamp(z * lodFactor, 0.0f, 1.0f));
}
float computeLod(vec3 c)
{
//displace
c.z += dmap(mtxGetColumn(u_invView, 3).xy);
vec3 cxf = mul(u_modelView, vec4(c.x, c.y, c.z, 1)).xyz;
float z = length(cxf);
return distanceToLod(z, u_LodFactor);
}
float computeLod(in vec4 v[3])
{
vec3 c = (v[1].xyz + v[2].xyz) / 2.0;
return computeLod(c);
}
float computeLod(in vec3 v[3])
{
vec3 c = (v[1].xyz + v[2].xyz) / 2.0;
return computeLod(c);
}
void writeKey(uint primID, uint key)
{
uint idx = 0;
atomicFetchAndAdd(u_AtomicCounterBuffer[0], 2, idx);
u_SubdBufferOut[idx] = primID;
u_SubdBufferOut[idx+1] = key;
}
void updateSubdBuffer(
uint primID,
uint key,
uint targetLod,
uint parentLod,
bool isVisible
) {
// extract subdivision level associated to the key
uint keyLod = findMSB(key);
// update the key accordingly
if (/* subdivide ? */ keyLod < targetLod && !isLeafKey(key) && isVisible) {
uint children[2]; childrenKeys(key, children);
writeKey(primID, children[0]);
writeKey(primID, children[1]);
}
else if (/* keep ? */ keyLod < (parentLod + 1) && isVisible) {
writeKey(primID, key);
}
else /* merge ? */ {
if (/* is root ? */isRootKey(key))
{
writeKey(primID, key);
}
else if (/* is zero child ? */isChildZeroKey(key)) {
writeKey(primID, parentKey(key));
}
}
}
void updateSubdBuffer(uint primID, uint key, uint targetLod, uint parentLod)
{
updateSubdBuffer(primID, key, targetLod, parentLod, true);
}

676
examples/41-tess/tess.cpp Normal file
View File

@ -0,0 +1,676 @@
/*
* Copyright 2019 Daniel Gavin. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
/*
* Reference(s):
* - Adaptive GPU Tessellation with Compute Shaders by Jad Khoury, Jonathan Dupuy, and Christophe Riccio
* http://onrendering.com/data/papers/isubd/isubd.pdf
* - Based on Demo
* https://github.com/jdupuy/opengl-framework/tree/master/demo-isubd-terrain#implicit-subdivision-on-the-gpu
*/
#include "common.h"
#include "bgfx_utils.h"
#include "imgui/imgui.h"
#include "camera.h"
#include "bounds.h"
#include <bx/allocator.h>
#include <bx/debug.h>
#include <bx/math.h>
#include <bx/file.h>
#include <vector>
#include "constants.h"
namespace
{
enum {
PROGRAM_TERRAIN_NORMAL,
PROGRAM_TERRAIN,
SHADING_COUNT };
enum {
BUFFER_SUBD
};
enum {
PROGRAM_SUBD_CS_LOD,
PROGRAM_UPDATE_INDIRECT,
PROGRAM_INIT_INDIRECT,
PROGRAM_UPDATE_DRAW,
PROGRAM_COUNT
};
enum {
TERRAIN_DMAP_SAMPLER,
TERRAIN_SMAP_SAMPLER,
SAMPLER_COUNT
};
enum {
TEXTURE_DMAP,
TEXTURE_SMAP,
TEXTURE_COUNT
};
struct Uniforms
{
enum { NumVec4 = 2 };
void init()
{
u_params = bgfx::createUniform("u_params", bgfx::UniformType::Vec4, NumVec4);
cull = 1;
freeze = 0;
gpuSubd = 3;
}
void submit()
{
bgfx::setUniform(u_params, params, NumVec4);
}
void destroy()
{
bgfx::destroy(u_params);
}
union
{
struct
{
float dmapFactor; float lodFactor; float cull; float freeze;
float gpuSubd; float padding[3];
};
float params[NumVec4 * 4];
};
bgfx::UniformHandle u_params;
};
class ExampleTessellation : public entry::AppI
{
public:
ExampleTessellation(const char* _name, const char* _description)
: entry::AppI(_name, _description)
{
}
void init(int32_t _argc, const char* const* _argv, uint32_t _width, uint32_t _height) override
{
Args args(_argc, _argv);
m_width = _width;
m_height = _height;
m_debug = BGFX_DEBUG_NONE;
m_reset = BGFX_RESET_NONE;
bgfx::Init init;
init.type = args.m_type;
init.vendorId = args.m_pciId;
init.resolution.width = m_width;
init.resolution.height = m_height;
init.resolution.reset = m_reset;
bgfx::init(init);
m_dmap = { "textures/dmap.png", 0.45f };
m_computeThreadCount = 5;
m_shading = PROGRAM_TERRAIN;
m_primitivePixelLengthTarget = 7.0f;
m_fovy = 60.0f;
m_pingPong = 0;
m_reset_gpu = true;
// Enable m_debug text.
bgfx::setDebug(m_debug);
// Set view 0 clear state.
bgfx::setViewClear(0
, BGFX_CLEAR_COLOR | BGFX_CLEAR_DEPTH
, 0x303030ff
, 1.0f
, 0
);
bgfx::setViewClear(1
, BGFX_CLEAR_COLOR | BGFX_CLEAR_DEPTH
, 0x303030ff
, 1.0f
, 0
);
// Imgui.
imguiCreate();
m_timeOffset = bx::getHPCounter();
m_oldWidth = 0;
m_oldHeight = 0;
m_oldReset = m_reset;
cameraCreate();
cameraSetPosition({ 0.0f, 0.5f, 0.0f });
cameraSetVerticalAngle(0);
is_wireframe = false;
is_frozen = false;
is_culled = true;
loadPrograms();
loadBuffers();
loadTextures();
createAtomicCounters();
m_dispatchIndirect = bgfx::createIndirectBuffer(2);
}
virtual int shutdown() override
{
// Cleanup.
cameraDestroy();
imguiDestroy();
m_uniforms.destroy();
bgfx::destroy(m_bufferCounter);
bgfx::destroy(m_bufferCulledSubd);
bgfx::destroy(m_bufferSubd[0]);
bgfx::destroy(m_bufferSubd[1]);
bgfx::destroy(m_dispatchIndirect);
bgfx::destroy(m_geometryIndices);
bgfx::destroy(m_geometryVertices);
bgfx::destroy(m_instancedGeometryIndices);
bgfx::destroy(m_instancedGeometryVertices);
for (uint32_t i = 0; i < PROGRAM_COUNT; ++i) {
bgfx::destroy(m_programsCompute[i]);
}
for (uint32_t i = 0; i < SHADING_COUNT; ++i) {
bgfx::destroy(m_programsDraw[i]);
}
for (uint32_t i = 0; i < SAMPLER_COUNT; ++i) {
bgfx::destroy(m_samplers[i]);
}
for (uint32_t i = 0; i < TEXTURE_COUNT; ++i) {
bgfx::destroy(m_textures[i]);
}
// Shutdown bgfx.
bgfx::shutdown();
return 0;
}
bool update() override
{
if (!entry::processEvents(m_width, m_height, m_debug, m_reset, &m_mouseState))
{
int64_t now = bx::getHPCounter();
static int64_t last = now;
const int64_t frameTime = now - last;
last = now;
const double freq = double(bx::getHPFrequency());
const float deltaTime = float(frameTime / freq);
imguiBeginFrame(m_mouseState.m_mx
, m_mouseState.m_my
, (m_mouseState.m_buttons[entry::MouseButton::Left] ? IMGUI_MBUT_LEFT : 0)
| (m_mouseState.m_buttons[entry::MouseButton::Right] ? IMGUI_MBUT_RIGHT : 0)
| (m_mouseState.m_buttons[entry::MouseButton::Middle] ? IMGUI_MBUT_MIDDLE : 0)
, m_mouseState.m_mz
, uint16_t(m_width)
, uint16_t(m_height)
);
showExampleDialog(this);
ImGui::SetNextWindowPos(
ImVec2(m_width - m_width / 5.0f - 10.0f, 10.0f)
, ImGuiCond_FirstUseEver
);
ImGui::SetNextWindowSize(
ImVec2(m_width / 5.0f, m_height / 3.0f)
, ImGuiCond_FirstUseEver
);
ImGui::Begin("Settings"
, NULL
, 0
);
if (ImGui::Checkbox("Debug wireframe", &is_wireframe)) {
if (is_wireframe) {
bgfx::setDebug(BGFX_DEBUG_WIREFRAME);
}
else {
bgfx::setDebug(BGFX_DEBUG_NONE);
}
}
ImGui::SameLine();
if (ImGui::Checkbox("Cull", &is_culled)) {
if (is_culled) {
m_uniforms.cull = 1.0;
}
else {
m_uniforms.cull = 0.0;
}
}
ImGui::SameLine();
if (ImGui::Checkbox("Freeze subdividing", &is_frozen)) {
if (is_frozen) {
m_uniforms.freeze = 1.0;
}
else {
m_uniforms.freeze = 0.0;
}
}
ImGui::SliderFloat("Pixels per edge", &m_primitivePixelLengthTarget, 1, 20);
int gpuSlider = (int)m_uniforms.gpuSubd;
if (ImGui::SliderInt("Triangle Patch level", &gpuSlider, 0, 3)) {
m_reset_gpu = true;
m_uniforms.gpuSubd = (float)gpuSlider;
}
ImGui::Combo("Shading", &m_shading, shader_options, 2);
ImGui::Text("Some variables require rebuilding the subdivide buffers and causes a stutter.");
ImGui::End();
if (!ImGui::MouseOverArea())
{
// Update camera.
cameraUpdate(deltaTime, m_mouseState);
if (!!m_mouseState.m_buttons[entry::MouseButton::Left])
{
}
}
bgfx::touch(0);
bgfx::touch(1);
configureUniforms();
cameraGetViewMtx(m_viewMtx);
float model[16];
bx::mtxRotateX(model, bx::toRad(90));
bx::mtxProj(m_projMtx, m_fovy, float(m_width) / float(m_height), 0.0001f, 2000.0f, bgfx::getCaps()->homogeneousDepth);
// Set view 0
bgfx::setViewTransform(0, m_viewMtx, m_projMtx);
// Set view 1
bgfx::setViewRect(1, 0, 0, uint16_t(m_width), uint16_t(m_height));
bgfx::setViewTransform(1, m_viewMtx, m_projMtx);
m_uniforms.submit();
// update the subd buffers
if (m_reset_gpu) {
m_pingPong = 1;
bgfx::destroy(m_instancedGeometryVertices);
bgfx::destroy(m_instancedGeometryIndices);
bgfx::destroy(m_bufferSubd[BUFFER_SUBD]);
bgfx::destroy(m_bufferSubd[BUFFER_SUBD + 1]);
bgfx::destroy(m_bufferCulledSubd);
loadInstancedGeometryBuffers();
loadSubdivisionBuffers();
//init indirect
bgfx::setBuffer(1, m_bufferSubd[m_pingPong], bgfx::Access::ReadWrite);
bgfx::setBuffer(2, m_bufferCulledSubd, bgfx::Access::ReadWrite);
bgfx::setBuffer(3, m_dispatchIndirect, bgfx::Access::ReadWrite);
bgfx::setBuffer(4, m_bufferCounter, bgfx::Access::ReadWrite);
bgfx::setBuffer(8, m_bufferSubd[1 - m_pingPong], bgfx::Access::ReadWrite);
bgfx::dispatch(0, m_programsCompute[PROGRAM_INIT_INDIRECT], 1, 1, 1);
m_reset_gpu = false;
}
else {
// update batch
bgfx::setBuffer(3, m_dispatchIndirect, bgfx::Access::ReadWrite);
bgfx::setBuffer(4, m_bufferCounter, bgfx::Access::ReadWrite);
bgfx::dispatch(0, m_programsCompute[PROGRAM_UPDATE_INDIRECT], 1, 1, 1);
}
bgfx::setBuffer(1, m_bufferSubd[m_pingPong], bgfx::Access::ReadWrite);
bgfx::setBuffer(2, m_bufferCulledSubd, bgfx::Access::ReadWrite);
bgfx::setBuffer(4, m_bufferCounter, bgfx::Access::ReadWrite);
bgfx::setBuffer(6, m_geometryVertices, bgfx::Access::Read);
bgfx::setBuffer(7, m_geometryIndices, bgfx::Access::Read);
bgfx::setBuffer(8, m_bufferSubd[1 - m_pingPong], bgfx::Access::Read);
bgfx::setTransform(model);
bgfx::setTexture(0, m_samplers[TERRAIN_DMAP_SAMPLER], m_textures[TEXTURE_DMAP], BGFX_SAMPLER_U_CLAMP | BGFX_SAMPLER_V_CLAMP);
m_uniforms.submit();
// update the subd buffer
bgfx::dispatch(0, m_programsCompute[PROGRAM_SUBD_CS_LOD], m_dispatchIndirect, 1);
// update draw
bgfx::setBuffer(3, m_dispatchIndirect, bgfx::Access::ReadWrite);
bgfx::setBuffer(4, m_bufferCounter, bgfx::Access::ReadWrite);
m_uniforms.submit();
bgfx::dispatch(1, m_programsCompute[PROGRAM_UPDATE_DRAW], 1, 1, 1);
// render the terrain
bgfx::setTexture(0, m_samplers[TERRAIN_DMAP_SAMPLER], m_textures[TEXTURE_DMAP], BGFX_SAMPLER_U_CLAMP | BGFX_SAMPLER_V_CLAMP);
bgfx::setTexture(1, m_samplers[TERRAIN_SMAP_SAMPLER], m_textures[TEXTURE_SMAP], BGFX_SAMPLER_MIN_ANISOTROPIC | BGFX_SAMPLER_MAG_ANISOTROPIC);
bgfx::setTransform(model);
bgfx::setVertexBuffer(0, m_instancedGeometryVertices);
bgfx::setIndexBuffer(m_instancedGeometryIndices);
bgfx::setBuffer(2, m_bufferCulledSubd, bgfx::Access::Read);
bgfx::setBuffer(3, m_geometryVertices, bgfx::Access::Read);
bgfx::setBuffer(4, m_geometryIndices, bgfx::Access::Read);
bgfx::setState(BGFX_STATE_WRITE_RGB | BGFX_STATE_WRITE_Z | BGFX_STATE_DEPTH_TEST_LESS);
m_uniforms.submit();
bgfx::submit(1, m_programsDraw[m_shading], m_dispatchIndirect, 0, true);
m_pingPong = 1 - m_pingPong;
imguiEndFrame();
// Advance to next frame. Rendering thread will be kicked to
// process submitted rendering primitives.
bgfx::frame(false);
return true;
}
return false;
}
void createAtomicCounters()
{
m_bufferCounter = bgfx::createDynamicIndexBuffer(3, BGFX_BUFFER_INDEX32 | BGFX_BUFFER_COMPUTE_READ_WRITE);
}
void configureUniforms()
{
float lodFactor = 2.0f * bx::tan(bx::toRad(m_fovy) / 2.0f)
/ m_width * (1 << (int)m_uniforms.gpuSubd)
* m_primitivePixelLengthTarget;
m_uniforms.lodFactor = lodFactor;
m_uniforms.dmapFactor = m_dmap.scale;
}
/**
* Load the Terrain Program
*
* This program renders an adaptive terrain using the implicit subdivision
* technique discribed in GPU Zen 2.
**/
void loadPrograms()
{
m_samplers[TERRAIN_DMAP_SAMPLER] = bgfx::createUniform("u_DmapSampler", bgfx::UniformType::Sampler);
m_samplers[TERRAIN_SMAP_SAMPLER] = bgfx::createUniform("u_SmapSampler", bgfx::UniformType::Sampler);
m_uniforms.init();
m_programsDraw[PROGRAM_TERRAIN] = loadProgram("vs_terrain_render", "fs_terrain_render");
m_programsDraw[PROGRAM_TERRAIN_NORMAL] = loadProgram("vs_terrain_render", "fs_terrain_render_normal");
m_programsCompute[PROGRAM_SUBD_CS_LOD] = bgfx::createProgram(loadShader("cs_terrain_lod"), true);
m_programsCompute[PROGRAM_UPDATE_INDIRECT] = bgfx::createProgram(loadShader("cs_terrain_update_indirect"), true);
m_programsCompute[PROGRAM_UPDATE_DRAW] = bgfx::createProgram(loadShader("cs_terrain_update_draw"), true);
m_programsCompute[PROGRAM_INIT_INDIRECT] = bgfx::createProgram(loadShader("cs_terrain_init"), true);
}
void loadSmapTexture()
{
int w = dmap->m_width;
int h = dmap->m_height;
const uint16_t *texels = (const uint16_t *)dmap->m_data;
int mipcnt = dmap->m_numMips;
const bgfx::Memory* mem = bgfx::alloc(w * h * 2 * sizeof(float));
float* smap = (float*)mem->data;
for (int j = 0; j < h; ++j) {
for (int i = 0; i < w; ++i) {
int i1 = bx::max(0, i - 1);
int i2 = bx::min(w - 1, i + 1);
int j1 = bx::max(0, j - 1);
int j2 = bx::min(h - 1, j + 1);
uint16_t px_l = texels[i1 + w * j]; // in [0,2^16-1]
uint16_t px_r = texels[i2 + w * j]; // in [0,2^16-1]
uint16_t px_b = texels[i + w * j1]; // in [0,2^16-1]
uint16_t px_t = texels[i + w * j2]; // in [0,2^16-1]
float z_l = (float)px_l / 65535.0f; // in [0, 1]
float z_r = (float)px_r / 65535.0f; // in [0, 1]
float z_b = (float)px_b / 65535.0f; // in [0, 1]
float z_t = (float)px_t / 65535.0f; // in [0, 1]
float slope_x = (float)w * 0.5f * (z_r - z_l);
float slope_y = (float)h * 0.5f * (z_t - z_b);
smap[2 * (i + w * j)] = slope_x;
smap[1 + 2 * (i + w * j)] = slope_y;
}
}
m_textures[TEXTURE_SMAP] = bgfx::createTexture2D((uint16_t)w, (uint16_t)h, mipcnt > 1, 1, bgfx::TextureFormat::RG32F,
BGFX_TEXTURE_NONE, mem);
}
/**
* Load the Displacement Texture
*
* This loads an R16 texture used as a displacement map
*/
void loadDmapTexture()
{
dmap = imageLoad(m_dmap.pathToFile.getCPtr(), bgfx::TextureFormat::R16);
m_textures[TEXTURE_DMAP] = bgfx::createTexture2D((uint16_t)dmap->m_width, (uint16_t)dmap->m_height, false, 1, bgfx::TextureFormat::R16,
BGFX_TEXTURE_NONE, bgfx::makeRef(dmap->m_data, dmap->m_size));
}
/**
* Load All Textures
*/
void loadTextures()
{
loadDmapTexture();
loadSmapTexture();
}
/**
* Load the Geometry Buffer
*
* This procedure loads the scene geometry into an index and
* vertex buffer. Here, we only load 2 triangles to define the
* terrain.
**/
void loadGeometryBuffers()
{
float vertices[] = {
-1.0f, -1.0f, 0.0f, 1.0f,
+1.0f, -1.0f, 0.0f, 1.0f,
+1.0f, +1.0f, 0.0f, 1.0f,
-1.0f, +1.0f, 0.0f, 1.0f
};
uint32_t indices[] = {
0,
1,
3,
2,
3,
1
};
m_geometryDecl.begin().add(bgfx::Attrib::Position, 4, bgfx::AttribType::Float).end();
m_geometryVertices = bgfx::createVertexBuffer(bgfx::copy(vertices, sizeof(vertices)), m_geometryDecl, BGFX_BUFFER_COMPUTE_READ);
m_geometryIndices = bgfx::createIndexBuffer(bgfx::copy(indices, sizeof(indices)), BGFX_BUFFER_COMPUTE_READ | BGFX_BUFFER_INDEX32);
}
void loadSubdivisionBuffers()
{
const size_t bufferCapacity = 1 << 27;
m_bufferSubd[BUFFER_SUBD] = bgfx::createDynamicIndexBuffer(bufferCapacity, BGFX_BUFFER_COMPUTE_READ_WRITE | BGFX_BUFFER_INDEX32);
m_bufferSubd[BUFFER_SUBD + 1] = bgfx::createDynamicIndexBuffer(bufferCapacity, BGFX_BUFFER_COMPUTE_READ_WRITE | BGFX_BUFFER_INDEX32);
m_bufferCulledSubd = bgfx::createDynamicIndexBuffer(bufferCapacity, BGFX_BUFFER_COMPUTE_READ_WRITE | BGFX_BUFFER_INDEX32);
}
/**
* Load All Buffers
*
*/
void loadBuffers()
{
loadSubdivisionBuffers();
loadGeometryBuffers();
loadInstancedGeometryBuffers();
}
/**
* This will be used to instantiate a triangle grid for each subdivision
* key present in the subd buffer.
*/
void loadInstancedGeometryBuffers()
{
const float* vertices;
const uint32_t* indexes;
if (m_uniforms.gpuSubd == 0) {
m_instancedMeshVertexCount = 3;
m_instancedMeshPrimitiveCount = 1;
vertices = verticesL0;
indexes = indexesL0;
}
else if (m_uniforms.gpuSubd == 1) {
m_instancedMeshVertexCount = 6;
m_instancedMeshPrimitiveCount = 4;
vertices = verticesL1;
indexes = indexesL1;
}
else if (m_uniforms.gpuSubd == 2) {
m_instancedMeshVertexCount = 15;
m_instancedMeshPrimitiveCount = 16;
vertices = verticesL2;
indexes = indexesL2;
}
else { //(m_settings.gpuSubd == 3) {
m_instancedMeshVertexCount = 45;
m_instancedMeshPrimitiveCount = 64;
vertices = verticesL3;
indexes = indexesL3;
}
m_instancedGeometryDecl.begin().add(bgfx::Attrib::TexCoord0, 2, bgfx::AttribType::Float).end();
m_instancedGeometryVertices = bgfx::createVertexBuffer(bgfx::makeRef(vertices, sizeof(float) * 2 * m_instancedMeshVertexCount), m_instancedGeometryDecl);
m_instancedGeometryIndices = bgfx::createIndexBuffer(bgfx::makeRef(indexes, sizeof(uint32_t) * m_instancedMeshPrimitiveCount * 3), BGFX_BUFFER_INDEX32);
}
Uniforms m_uniforms;
bgfx::ProgramHandle m_programsCompute[PROGRAM_COUNT];
bgfx::ProgramHandle m_programsDraw[SHADING_COUNT];
bgfx::TextureHandle m_textures[TEXTURE_COUNT];
bgfx::UniformHandle m_samplers[SAMPLER_COUNT];
bgfx::DynamicIndexBufferHandle m_bufferSubd[2];
bgfx::DynamicIndexBufferHandle m_bufferCulledSubd;
bgfx::DynamicIndexBufferHandle m_bufferCounter;
bgfx::IndexBufferHandle m_geometryIndices;
bgfx::VertexBufferHandle m_geometryVertices;
bgfx::VertexDecl m_geometryDecl;
bgfx::IndexBufferHandle m_instancedGeometryIndices;
bgfx::VertexBufferHandle m_instancedGeometryVertices;
bgfx::VertexDecl m_instancedGeometryDecl;
bgfx::IndirectBufferHandle m_dispatchIndirect;
bimg::ImageContainer* dmap;
float m_viewMtx[16];
float m_projMtx[16];
uint32_t m_width;
uint32_t m_height;
uint32_t m_debug;
uint32_t m_reset;
uint32_t m_oldWidth;
uint32_t m_oldHeight;
uint32_t m_oldReset;
uint32_t m_instancedMeshVertexCount;
uint32_t m_instancedMeshPrimitiveCount;
entry::MouseState m_mouseState;
int64_t m_timeOffset;
struct {
bx::FilePath pathToFile;
float scale;
} m_dmap;
int m_computeThreadCount;
int m_shading;
int m_gpuSubd;
float m_primitivePixelLengthTarget;
float m_fovy;
int m_pingPong;
bool m_reset_gpu;
bool is_wireframe;
bool is_culled;
bool is_frozen;
};
} // namespace
ENTRY_IMPLEMENT_MAIN(ExampleTessellation, "41-tess", "Adaptive Gpu Tessellation.");

View File

@ -0,0 +1,15 @@
uniform vec4 u_params[2];
#define u_DmapFactor u_params[0].x
#define u_LodFactor u_params[0].y
#define u_cull u_params[0].z
#define u_freeze u_params[0].w
#define u_gpu_subd int(u_params[1].x)
#define COMPUTE_THREAD_COUNT 32u
#define UPDATE_INDIRECT_VALUE_DIVIDE 32u

View File

@ -0,0 +1,13 @@
vec2 v_texcoord0 : TEXCOORD0 = vec2(0.0, 0.0);
vec3 v_position : TEXCOORD1 = vec3(0.0, 0.0, 0.0);
vec3 v_view : TEXCOORD2 = vec3(0.0, 0.0, 0.0);
vec3 v_normal : NORMAL = vec3(0.0, 0.0, 1.0);
vec3 v_tangent : TANGENT = vec3(1.0, 0.0, 0.0);
vec3 v_bitangent : BINORMAL = vec3(0.0, 1.0, 0.0);
vec4 v_color0 : COLOR = vec4(1.0, 0.0, 0.0, 1.0);
vec3 a_position : POSITION;
vec4 a_normal : NORMAL;
vec4 a_tangent : TANGENT;
vec2 a_texcoord0 : TEXCOORD0;
vec4 a_color0 : COLOR0;

View File

@ -0,0 +1,41 @@
$input a_texcoord0
$output v_texcoord0
#include "terrain_common.sh"
BUFFER_RO(u_CulledSubdBuffer, uint, 2);
BUFFER_RO(u_VertexBuffer, vec4, 3);
BUFFER_RO(u_IndexBuffer, uint, 4);
void main()
{
// get threadID (each key is associated to a thread)
int threadID = gl_InstanceID;
// get coarse triangle associated to the key
uint primID = u_CulledSubdBuffer[threadID*2];
vec4 v_in[3];
v_in[0] = u_VertexBuffer[u_IndexBuffer[primID * 3 ]];
v_in[1] = u_VertexBuffer[u_IndexBuffer[primID * 3 + 1]];
v_in[2] = u_VertexBuffer[u_IndexBuffer[primID * 3 + 2]];
// compute sub-triangle associated to the key
uint key = u_CulledSubdBuffer[threadID*2+1];
vec4 v[3];
subd(key, v_in, v);
// compute vertex location
vec4 finalVertex = berp(v, a_texcoord0);
finalVertex.z+= dmap(finalVertex.xy);
v_texcoord0 = finalVertex.xy * 0.5 + 0.5;
gl_Position = mul(u_modelViewProj, finalVertex);
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 MiB

View File

@ -494,6 +494,7 @@ or _OPTIONS["with-combined-examples"] then
, "38-bloom"
, "39-assao"
, "40-svt"
, "41-tess"
)
-- C99 source doesn't compile under WinRT settings