Revert "Revert "Avoid redundant uniform uploads (#2090)""
This reverts commit c023ac4620
.
This commit is contained in:
parent
c023ac4620
commit
1361ccf211
@ -205,6 +205,7 @@ typedef void (GL_APIENTRYP PFNGLUNIFORM1IVPROC) (GLint location, GLsiz
|
|||||||
typedef void (GL_APIENTRYP PFNGLUNIFORM2FVPROC) (GLint location, GLsizei count, const GLfloat *value);
|
typedef void (GL_APIENTRYP PFNGLUNIFORM2FVPROC) (GLint location, GLsizei count, const GLfloat *value);
|
||||||
typedef void (GL_APIENTRYP PFNGLUNIFORM3FVPROC) (GLint location, GLsizei count, const GLfloat *value);
|
typedef void (GL_APIENTRYP PFNGLUNIFORM3FVPROC) (GLint location, GLsizei count, const GLfloat *value);
|
||||||
typedef void (GL_APIENTRYP PFNGLUNIFORM4FVPROC) (GLint location, GLsizei count, const GLfloat *value);
|
typedef void (GL_APIENTRYP PFNGLUNIFORM4FVPROC) (GLint location, GLsizei count, const GLfloat *value);
|
||||||
|
typedef void (GL_APIENTRYP PFNGLUNIFORM4FPROC) (GLint location, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
|
||||||
typedef void (GL_APIENTRYP PFNGLUNIFORMMATRIX3FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
|
typedef void (GL_APIENTRYP PFNGLUNIFORMMATRIX3FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
|
||||||
typedef void (GL_APIENTRYP PFNGLUNIFORMMATRIX4FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
|
typedef void (GL_APIENTRYP PFNGLUNIFORMMATRIX4FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
|
||||||
typedef void (GL_APIENTRYP PFNGLUSEPROGRAMPROC) (GLuint program);
|
typedef void (GL_APIENTRYP PFNGLUSEPROGRAMPROC) (GLuint program);
|
||||||
@ -402,6 +403,7 @@ GL_IMPORT______(false, PFNGLUNIFORM1FVPROC, glUniform1fv)
|
|||||||
GL_IMPORT______(false, PFNGLUNIFORM2FVPROC, glUniform2fv);
|
GL_IMPORT______(false, PFNGLUNIFORM2FVPROC, glUniform2fv);
|
||||||
GL_IMPORT______(false, PFNGLUNIFORM3FVPROC, glUniform3fv);
|
GL_IMPORT______(false, PFNGLUNIFORM3FVPROC, glUniform3fv);
|
||||||
GL_IMPORT______(false, PFNGLUNIFORM4FVPROC, glUniform4fv);
|
GL_IMPORT______(false, PFNGLUNIFORM4FVPROC, glUniform4fv);
|
||||||
|
GL_IMPORT______(false, PFNGLUNIFORM4FPROC, glUniform4f);
|
||||||
GL_IMPORT______(false, PFNGLUNIFORMMATRIX3FVPROC, glUniformMatrix3fv);
|
GL_IMPORT______(false, PFNGLUNIFORMMATRIX3FVPROC, glUniformMatrix3fv);
|
||||||
GL_IMPORT______(false, PFNGLUNIFORMMATRIX4FVPROC, glUniformMatrix4fv);
|
GL_IMPORT______(false, PFNGLUNIFORMMATRIX4FVPROC, glUniformMatrix4fv);
|
||||||
GL_IMPORT______(false, PFNGLUSEPROGRAMPROC, glUseProgram);
|
GL_IMPORT______(false, PFNGLUSEPROGRAMPROC, glUseProgram);
|
||||||
|
@ -3442,17 +3442,17 @@ namespace bgfx { namespace gl
|
|||||||
GL_CHECK(glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE) );
|
GL_CHECK(glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE) );
|
||||||
|
|
||||||
ProgramGL& program = m_program[_blitter.m_program.idx];
|
ProgramGL& program = m_program[_blitter.m_program.idx];
|
||||||
GL_CHECK(glUseProgram(program.m_id) );
|
GlUseProgram(program.m_id);
|
||||||
GL_CHECK(glUniform1i(program.m_sampler[0].loc, 0) );
|
GlUniform1i(program.m_sampler[0].loc, 0);
|
||||||
|
|
||||||
float proj[16];
|
float proj[16];
|
||||||
bx::mtxOrtho(proj, 0.0f, (float)width, (float)height, 0.0f, 0.0f, 1000.0f, 0.0f, g_caps.homogeneousDepth);
|
bx::mtxOrtho(proj, 0.0f, (float)width, (float)height, 0.0f, 0.0f, 1000.0f, 0.0f, g_caps.homogeneousDepth);
|
||||||
|
|
||||||
GL_CHECK(glUniformMatrix4fv(program.m_predefined[0].m_loc
|
GlUniformMatrix4fv(program.m_predefined[0].m_loc
|
||||||
, 1
|
, 1
|
||||||
, GL_FALSE
|
, GL_FALSE
|
||||||
, proj
|
, proj
|
||||||
) );
|
);
|
||||||
|
|
||||||
GL_CHECK(glActiveTexture(GL_TEXTURE0) );
|
GL_CHECK(glActiveTexture(GL_TEXTURE0) );
|
||||||
GL_CHECK(glBindTexture(GL_TEXTURE_2D, m_textures[_blitter.m_texture.idx].m_id) );
|
GL_CHECK(glBindTexture(GL_TEXTURE_2D, m_textures[_blitter.m_texture.idx].m_id) );
|
||||||
@ -3553,19 +3553,19 @@ namespace bgfx { namespace gl
|
|||||||
|
|
||||||
void setShaderUniform4f(uint8_t /*_flags*/, uint32_t _regIndex, const void* _val, uint32_t _numRegs)
|
void setShaderUniform4f(uint8_t /*_flags*/, uint32_t _regIndex, const void* _val, uint32_t _numRegs)
|
||||||
{
|
{
|
||||||
GL_CHECK(glUniform4fv(_regIndex
|
GlUniform4fv(_regIndex
|
||||||
, _numRegs
|
, _numRegs
|
||||||
, (const GLfloat*)_val
|
, (const GLfloat*)_val
|
||||||
) );
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
void setShaderUniform4x4f(uint8_t /*_flags*/, uint32_t _regIndex, const void* _val, uint32_t _numRegs)
|
void setShaderUniform4x4f(uint8_t /*_flags*/, uint32_t _regIndex, const void* _val, uint32_t _numRegs)
|
||||||
{
|
{
|
||||||
GL_CHECK(glUniformMatrix4fv(_regIndex
|
GlUniformMatrix4fv(_regIndex
|
||||||
, _numRegs
|
, _numRegs
|
||||||
, GL_FALSE
|
, GL_FALSE
|
||||||
, (const GLfloat*)_val
|
, (const GLfloat*)_val
|
||||||
) );
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t setFrameBuffer(FrameBufferHandle _fbh, uint32_t _height, uint16_t _discard = BGFX_CLEAR_NONE, bool _msaa = true)
|
uint32_t setFrameBuffer(FrameBufferHandle _fbh, uint32_t _height, uint16_t _discard = BGFX_CLEAR_NONE, bool _msaa = true)
|
||||||
@ -4029,7 +4029,7 @@ namespace bgfx { namespace gl
|
|||||||
case UniformType::_uniform: \
|
case UniformType::_uniform: \
|
||||||
{ \
|
{ \
|
||||||
_type* value = (_type*)data; \
|
_type* value = (_type*)data; \
|
||||||
GL_CHECK(glUniform##_glsuffix(loc, num, value) ); \
|
GlUniform##_glsuffix(loc, num, value); \
|
||||||
} \
|
} \
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -4037,7 +4037,7 @@ namespace bgfx { namespace gl
|
|||||||
case UniformType::_uniform: \
|
case UniformType::_uniform: \
|
||||||
{ \
|
{ \
|
||||||
_type* value = (_type*)data; \
|
_type* value = (_type*)data; \
|
||||||
GL_CHECK(glUniform##_glsuffix(loc, num, GL_FALSE, value) ); \
|
GlUniform##_glsuffix(loc, num, GL_FALSE, value); \
|
||||||
} \
|
} \
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -4048,12 +4048,12 @@ namespace bgfx { namespace gl
|
|||||||
// since they need to marshal an array over from Wasm to JS, so optimize the case when there is exactly one
|
// since they need to marshal an array over from Wasm to JS, so optimize the case when there is exactly one
|
||||||
// uniform to upload.
|
// uniform to upload.
|
||||||
case UniformType::Sampler:
|
case UniformType::Sampler:
|
||||||
if (num > 1) glUniform1iv(loc, num, (int*)data);
|
if (num > 1) GlUniform1iv(loc, num, (int*)data);
|
||||||
else glUniform1i(loc, *(int*)data);
|
else GlUniform1i(loc, *(int*)data);
|
||||||
break;
|
break;
|
||||||
case UniformType::Vec4:
|
case UniformType::Vec4:
|
||||||
if (num > 1) glUniform4fv(loc, num, (float*)data);
|
if (num > 1) GlUniform4fv(loc, num, (float*)data);
|
||||||
else glUniform4f(loc, ((float*)data)[0], ((float*)data)[1], ((float*)data)[2], ((float*)data)[3]);
|
else GlUniform4f(loc, ((float*)data)[0], ((float*)data)[1], ((float*)data)[2], ((float*)data)[3]);
|
||||||
break;
|
break;
|
||||||
#else
|
#else
|
||||||
CASE_IMPLEMENT_UNIFORM(Sampler, 1iv, I, int);
|
CASE_IMPLEMENT_UNIFORM(Sampler, 1iv, I, int);
|
||||||
@ -4178,7 +4178,7 @@ namespace bgfx { namespace gl
|
|||||||
GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, vb.m_id) );
|
GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, vb.m_id) );
|
||||||
|
|
||||||
ProgramGL& program = m_program[_clearQuad.m_program[numMrt-1].idx];
|
ProgramGL& program = m_program[_clearQuad.m_program[numMrt-1].idx];
|
||||||
GL_CHECK(glUseProgram(program.m_id) );
|
GlUseProgram(program.m_id);
|
||||||
program.bindAttributesBegin();
|
program.bindAttributesBegin();
|
||||||
program.bindAttributes(layout, 0);
|
program.bindAttributes(layout, 0);
|
||||||
program.bindAttributesEnd();
|
program.bindAttributesEnd();
|
||||||
@ -4237,6 +4237,95 @@ namespace bgfx { namespace gl
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GlUseProgram(GLuint program)
|
||||||
|
{
|
||||||
|
m_uniformStateCache.saveCurrentProgram(program);
|
||||||
|
GL_CHECK(glUseProgram(program) );
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cache uniform uploads to avoid redundant uploading of state that is
|
||||||
|
// already set to a shader program
|
||||||
|
void GlUniform1i(uint32_t loc, int value)
|
||||||
|
{
|
||||||
|
if (m_uniformStateCache.updateUniformCache(loc, value))
|
||||||
|
{
|
||||||
|
GL_CHECK(glUniform1i(loc, value) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GlUniform1iv(uint32_t loc, int num, const int *data)
|
||||||
|
{
|
||||||
|
bool changed = false;
|
||||||
|
for(int i = 0; i < num; ++i)
|
||||||
|
{
|
||||||
|
if (m_uniformStateCache.updateUniformCache(loc+i, data[i]))
|
||||||
|
{
|
||||||
|
changed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (changed)
|
||||||
|
{
|
||||||
|
GL_CHECK(glUniform1iv(loc, num, data) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GlUniform4f(uint32_t loc, float x, float y, float z, float w)
|
||||||
|
{
|
||||||
|
UniformStateCache::f4 f; f.val[0] = x; f.val[1] = y; f.val[2] = z; f.val[3] = w;
|
||||||
|
if (m_uniformStateCache.updateUniformCache(loc, f))
|
||||||
|
{
|
||||||
|
GL_CHECK(glUniform4f(loc, x, y, z, w));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GlUniform4fv(uint32_t loc, int num, const float *data)
|
||||||
|
{
|
||||||
|
bool changed = false;
|
||||||
|
for(int i = 0; i < num; ++i)
|
||||||
|
{
|
||||||
|
if (m_uniformStateCache.updateUniformCache(loc+i, *(const UniformStateCache::f4*)&data[4*i]))
|
||||||
|
{
|
||||||
|
changed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (changed)
|
||||||
|
{
|
||||||
|
GL_CHECK(glUniform4fv(loc, num, data) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GlUniformMatrix3fv(uint32_t loc, int num, GLboolean transpose, const float *data)
|
||||||
|
{
|
||||||
|
bool changed = false;
|
||||||
|
for(int i = 0; i < num; ++i)
|
||||||
|
{
|
||||||
|
if (m_uniformStateCache.updateUniformCache(loc+i, *(const UniformStateCache::f3x3*)&data[9*i]))
|
||||||
|
{
|
||||||
|
changed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (changed)
|
||||||
|
{
|
||||||
|
GL_CHECK(glUniformMatrix3fv(loc, num, transpose, data) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GlUniformMatrix4fv(uint32_t loc, int num, GLboolean transpose, const float *data)
|
||||||
|
{
|
||||||
|
bool changed = false;
|
||||||
|
for(int i = 0; i < num; ++i)
|
||||||
|
{
|
||||||
|
if (m_uniformStateCache.updateUniformCache(loc+i, *(const UniformStateCache::f4x4*)&data[16*i]))
|
||||||
|
{
|
||||||
|
changed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (changed)
|
||||||
|
{
|
||||||
|
GL_CHECK(glUniformMatrix4fv(loc, num, transpose, data) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void* m_renderdocdll;
|
void* m_renderdocdll;
|
||||||
|
|
||||||
uint16_t m_numWindows;
|
uint16_t m_numWindows;
|
||||||
@ -4256,6 +4345,7 @@ namespace bgfx { namespace gl
|
|||||||
OcclusionQueryGL m_occlusionQuery;
|
OcclusionQueryGL m_occlusionQuery;
|
||||||
|
|
||||||
SamplerStateCache m_samplerStateCache;
|
SamplerStateCache m_samplerStateCache;
|
||||||
|
UniformStateCache m_uniformStateCache;
|
||||||
|
|
||||||
TextVideoMem m_textVideoMem;
|
TextVideoMem m_textVideoMem;
|
||||||
bool m_rtMsaa;
|
bool m_rtMsaa;
|
||||||
@ -4664,7 +4754,7 @@ namespace bgfx { namespace gl
|
|||||||
|
|
||||||
if (0 != m_id)
|
if (0 != m_id)
|
||||||
{
|
{
|
||||||
GL_CHECK(glUseProgram(0) );
|
s_renderGL->GlUseProgram(0);
|
||||||
GL_CHECK(glDeleteProgram(m_id) );
|
GL_CHECK(glDeleteProgram(m_id) );
|
||||||
m_id = 0;
|
m_id = 0;
|
||||||
}
|
}
|
||||||
@ -7155,7 +7245,7 @@ namespace bgfx { namespace gl
|
|||||||
const RenderCompute& compute = renderItem.compute;
|
const RenderCompute& compute = renderItem.compute;
|
||||||
|
|
||||||
ProgramGL& program = m_program[key.m_program.idx];
|
ProgramGL& program = m_program[key.m_program.idx];
|
||||||
GL_CHECK(glUseProgram(program.m_id) );
|
GlUseProgram(program.m_id);
|
||||||
|
|
||||||
GLbitfield barrier = 0;
|
GLbitfield barrier = 0;
|
||||||
for (uint32_t ii = 0; ii < maxComputeBindings; ++ii)
|
for (uint32_t ii = 0; ii < maxComputeBindings; ++ii)
|
||||||
@ -7653,7 +7743,7 @@ namespace bgfx { namespace gl
|
|||||||
// Skip rendering if program index is valid, but program is invalid.
|
// Skip rendering if program index is valid, but program is invalid.
|
||||||
currentProgram = 0 == id ? ProgramHandle{kInvalidHandle} : currentProgram;
|
currentProgram = 0 == id ? ProgramHandle{kInvalidHandle} : currentProgram;
|
||||||
|
|
||||||
GL_CHECK(glUseProgram(id) );
|
GlUseProgram(id);
|
||||||
programChanged =
|
programChanged =
|
||||||
constantsChanged =
|
constantsChanged =
|
||||||
bindAttribs = true;
|
bindAttribs = true;
|
||||||
|
@ -32,6 +32,10 @@
|
|||||||
|| BX_PLATFORM_WINDOWS \
|
|| BX_PLATFORM_WINDOWS \
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Keep a state cache of GL uniform values to avoid redundant uploads
|
||||||
|
// on the following platforms.
|
||||||
|
#define BGFX_GL_CONFIG_UNIFORM_CACHE BX_PLATFORM_EMSCRIPTEN
|
||||||
|
|
||||||
#define BGFX_GL_PROFILER_BEGIN(_view, _abgr) \
|
#define BGFX_GL_PROFILER_BEGIN(_view, _abgr) \
|
||||||
BX_MACRO_BLOCK_BEGIN \
|
BX_MACRO_BLOCK_BEGIN \
|
||||||
GL_CHECK(glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, -1, s_viewName[view]) ); \
|
GL_CHECK(glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, -1, s_viewName[view]) ); \
|
||||||
@ -1222,6 +1226,69 @@ namespace bgfx { namespace gl
|
|||||||
#define GL_IMPORT(_optional, _proto, _func, _import) extern _proto _func
|
#define GL_IMPORT(_optional, _proto, _func, _import) extern _proto _func
|
||||||
#include "glimports.h"
|
#include "glimports.h"
|
||||||
|
|
||||||
|
class UniformStateCache
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
struct f4 { float val[4]; bool operator ==(const f4 &rhs) { const uint64_t *a = (const uint64_t *)this; const uint64_t *b = (const uint64_t *)&rhs; return a[0] == b[0] && a[1] == b[1]; }};
|
||||||
|
struct f3x3 { float val[9]; bool operator ==(const f3x3 &rhs) { const uint64_t *a = (const uint64_t *)this; const uint64_t *b = (const uint64_t *)&rhs; return a[0] == b[0] && a[1] == b[1] && a[2] == b[2] && a[3] == b[3] && ((const uint32_t*)a)[8] == ((const uint32_t*)b)[8]; }};
|
||||||
|
struct f4x4 { float val[16]; bool operator ==(const f4x4 &rhs) { const uint64_t *a = (const uint64_t *)this; const uint64_t *b = (const uint64_t *)&rhs; return a[0] == b[0] && a[1] == b[1] && a[2] == b[2] && a[3] == b[3] && a[4] == b[4] && a[5] == b[5] && a[6] == b[6] && a[7] == b[7]; }};
|
||||||
|
|
||||||
|
UniformStateCache()
|
||||||
|
:currentProgram(0)
|
||||||
|
{}
|
||||||
|
|
||||||
|
// Inserts the new value into the uniform cache, and returns true
|
||||||
|
// if the old value was different than the new one.
|
||||||
|
template<typename T>
|
||||||
|
bool updateUniformCache(uint32_t loc, const T &value)
|
||||||
|
{
|
||||||
|
#if BGFX_GL_CONFIG_UNIFORM_CACHE
|
||||||
|
// Uniform state cache for various types.
|
||||||
|
stl::unordered_map<uint64_t, T> &uniformCacheMap = getUniformCache<T>();
|
||||||
|
|
||||||
|
uint64_t key = ((uint64_t)currentProgram << 32) | loc;
|
||||||
|
|
||||||
|
auto iter = uniformCacheMap.find(key);
|
||||||
|
// Not found in the cache? Add it.
|
||||||
|
if (iter == uniformCacheMap.end())
|
||||||
|
{
|
||||||
|
uniformCacheMap[key] = value;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// Value in the cache was the same as new state? Skip reuploading this state.
|
||||||
|
if (iter->second == value)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
iter->second = value;
|
||||||
|
#endif
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void saveCurrentProgram(GLuint program)
|
||||||
|
{
|
||||||
|
#if BGFX_GL_CONFIG_UNIFORM_CACHE
|
||||||
|
currentProgram = program;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
GLuint currentProgram;
|
||||||
|
|
||||||
|
stl::unordered_map<uint64_t, int> uniformiCacheMap;
|
||||||
|
stl::unordered_map<uint64_t, f4> uniformf4CacheMap;
|
||||||
|
stl::unordered_map<uint64_t, f3x3> uniformf3x3CacheMap;
|
||||||
|
stl::unordered_map<uint64_t, f4x4> uniformf4x4CacheMap;
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
stl::unordered_map<uint64_t, T> &getUniformCache();
|
||||||
|
};
|
||||||
|
|
||||||
|
template<> stl::unordered_map<uint64_t, int> &UniformStateCache::getUniformCache() { return uniformiCacheMap; }
|
||||||
|
template<> stl::unordered_map<uint64_t, UniformStateCache::f4> &UniformStateCache::getUniformCache() { return uniformf4CacheMap; }
|
||||||
|
template<> stl::unordered_map<uint64_t, UniformStateCache::f3x3> &UniformStateCache::getUniformCache() { return uniformf3x3CacheMap; }
|
||||||
|
template<> stl::unordered_map<uint64_t, UniformStateCache::f4x4> &UniformStateCache::getUniformCache() { return uniformf4x4CacheMap; }
|
||||||
|
|
||||||
class SamplerStateCache
|
class SamplerStateCache
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
Loading…
Reference in New Issue
Block a user