diff --git a/include/bgfx.h b/include/bgfx.h index 01c92b8da..fbc7b1943 100644 --- a/include/bgfx.h +++ b/include/bgfx.h @@ -49,6 +49,18 @@ namespace bgfx }; }; + struct Access + { + enum Enum + { + Read, + Write, + ReadWrite, + + Count + }; + }; + struct Attrib { enum Enum // corresponds to vertex shader attribute: @@ -1082,7 +1094,16 @@ namespace bgfx /// uint32_t submitMask(uint32_t _viewMask, int32_t _depth = 0); - /// Discard all previously set state for draw call. + /// + void setImage(uint8_t _stage, UniformHandle _sampler, TextureHandle _handle, uint8_t _mip, TextureFormat::Enum _format, Access::Enum _access); + + /// + void setImage(uint8_t _stage, UniformHandle _sampler, FrameBufferHandle _handle, uint8_t _attachment, TextureFormat::Enum _format, Access::Enum _access); + + /// Dispatch compute. + void dispatch(uint8_t _id, ProgramHandle _handle, uint16_t _numX = 1, uint16_t _numY = 1, uint16_t _numZ = 1); + + /// Discard all previously set state for draw or compute call. void discard(); /// Request screen shot. diff --git a/include/bgfxdefines.h b/include/bgfxdefines.h index b2253accc..fc9851b5a 100644 --- a/include/bgfxdefines.h +++ b/include/bgfxdefines.h @@ -236,6 +236,7 @@ #define BGFX_TEXTURE_COMPARE_ALWAYS UINT32_C(0x00080000) #define BGFX_TEXTURE_COMPARE_SHIFT 16 #define BGFX_TEXTURE_COMPARE_MASK UINT32_C(0x000f0000) +#define BGFX_TEXTURE_COMPUTE_WRITE UINT32_C(0x00100000) #define BGFX_TEXTURE_RESERVED_SHIFT 24 #define BGFX_TEXTURE_RESERVED_MASK UINT32_C(0xff000000) @@ -295,6 +296,7 @@ #define BGFX_CAPS_RENDERER_MULTITHREADED UINT64_C(0x0000000020000000) #define BGFX_CAPS_FRAGMENT_DEPTH UINT64_C(0x0000000040000000) #define BGFX_CAPS_BLEND_INDEPENDENT UINT64_C(0x0000000080000000) +#define BGFX_CAPS_COMPUTE UINT64_C(0x0000000100000000) #define BGFX_CAPS_TEXTURE_DEPTH_MASK (0 \ | BGFX_CAPS_TEXTURE_FORMAT_D16 \ diff --git a/premake/shader.mk b/premake/shader.mk index c6b88ad5a..cac92c423 100644 --- a/premake/shader.mk +++ b/premake/shader.mk @@ -46,6 +46,7 @@ else ifeq ($(TARGET), 1) VS_FLAGS=--platform windows -p vs_4_0 -O 3 FS_FLAGS=--platform windows -p ps_4_0 -O 3 +CS_FLAGS=--platform windows -p cs_5_0 -O 3 SHADER_PATH=shaders/dx11 else ifeq ($(TARGET), 2) @@ -56,11 +57,13 @@ else ifeq ($(TARGET), 3) VS_FLAGS=--platform android FS_FLAGS=--platform android +CS_FLAGS=--platform android SHADER_PATH=shaders/gles else ifeq ($(TARGET), 4) VS_FLAGS=--platform linux -p 120 FS_FLAGS=--platform linux -p 120 +CS_FLAGS=--platform linux -p 430 SHADER_PATH=shaders/glsl endif endif @@ -71,6 +74,7 @@ endif THISDIR := $(dir $(lastword $(MAKEFILE_LIST))) VS_FLAGS+=-i $(THISDIR)../src/ FS_FLAGS+=-i $(THISDIR)../src/ +CS_FLAGS+=-i $(THISDIR)../src/ BUILD_OUTPUT_DIR=$(addprefix ./, $(RUNTIME_DIR)/$(SHADER_PATH)) BUILD_INTERMEDIATE_DIR=$(addprefix $(BUILD_DIR)/, $(SHADER_PATH)) @@ -81,12 +85,31 @@ VS_DEPS=$(addprefix $(BUILD_INTERMEDIATE_DIR)/,$(addsuffix .bin.d, $(basename $( FS_SOURCES=$(wildcard fs_*.sc) FS_DEPS=$(addprefix $(BUILD_INTERMEDIATE_DIR)/,$(addsuffix .bin.d, $(basename $(FS_SOURCES)))) +CS_SOURCES=$(wildcard cs_*.sc) +CS_DEPS=$(addprefix $(BUILD_INTERMEDIATE_DIR)/,$(addsuffix .bin.d, $(basename $(CS_SOURCES)))) + VS_BIN = $(addprefix $(BUILD_INTERMEDIATE_DIR)/, $(addsuffix .bin, $(basename $(VS_SOURCES)))) FS_BIN = $(addprefix $(BUILD_INTERMEDIATE_DIR)/, $(addsuffix .bin, $(basename $(FS_SOURCES)))) +CS_BIN = $(addprefix $(BUILD_INTERMEDIATE_DIR)/, $(addsuffix .bin, $(basename $(CS_SOURCES)))) BIN = $(VS_BIN) $(FS_BIN) ASM = $(VS_ASM) $(FS_ASM) +ifeq ($(TARGET), 1) +BIN += $(CS_BIN) +ASM += $(CS_ASM) +else +ifeq ($(TARGET), 3) +BIN += $(CS_BIN) +ASM += $(CS_ASM) +else +ifeq ($(TARGET), 4) +BIN += $(CS_BIN) +ASM += $(CS_ASM) +endif +endif +endif + $(BUILD_INTERMEDIATE_DIR)/vs_%.bin : vs_%.sc @echo [$(<)] $(SILENT) $(SHADERC) $(VS_FLAGS) --type vertex --depends -o $(@) -f $(<) --disasm @@ -97,6 +120,11 @@ $(BUILD_INTERMEDIATE_DIR)/fs_%.bin : fs_%.sc $(SILENT) $(SHADERC) $(FS_FLAGS) --type fragment --depends -o $(@) -f $(<) --disasm $(SILENT) cp $(@) $(BUILD_OUTPUT_DIR)/$(@F) +$(BUILD_INTERMEDIATE_DIR)/cs_%.bin : cs_%.sc + @echo [$(<)] + $(SILENT) $(SHADERC) $(CS_FLAGS) --type compute --depends -o $(@) -f $(<) --disasm + $(SILENT) cp $(@) $(BUILD_OUTPUT_DIR)/$(@F) + .PHONY: all all: dirs $(BIN) @echo Target $(SHADER_PATH) @@ -119,3 +147,4 @@ endif # TARGET -include $(VS_DEPS) -include $(FS_DEPS) +-include $(CS_DEPS) diff --git a/src/bgfx.cpp b/src/bgfx.cpp index 878ac0556..9d2e69658 100644 --- a/src/bgfx.cpp +++ b/src/bgfx.cpp @@ -644,12 +644,14 @@ namespace bgfx } if (BGFX_CONFIG_MAX_DRAW_CALLS-1 <= m_num - || (0 == m_state.m_numVertices && 0 == m_state.m_numIndices) ) + || (0 == m_draw.m_numVertices && 0 == m_draw.m_numIndices) ) { ++m_numDropped; return m_num; } + m_constEnd = m_constantBuffer->getPos(); + BX_WARN(invalidHandle != m_key.m_program, "Program with invalid handle"); if (invalidHandle != m_key.m_program) { @@ -657,18 +659,20 @@ namespace bgfx m_key.m_view = _id; m_key.m_seq = s_ctx->m_seq[_id] & s_ctx->m_seqMask[_id]; s_ctx->m_seq[_id]++; - uint64_t key = m_key.encode(); + uint64_t key = m_key.encodeDraw(); m_sortKeys[m_num] = key; - m_sortValues[m_num] = m_numRenderStates; + m_sortValues[m_num] = m_numRenderItems; ++m_num; - m_state.m_constEnd = m_constantBuffer->getPos(); - m_state.m_flags |= m_flags; - m_renderState[m_numRenderStates] = m_state; - ++m_numRenderStates; + m_draw.m_constBegin = m_constBegin; + m_draw.m_constEnd = m_constEnd; + m_draw.m_flags |= m_flags; + m_renderItem[m_numRenderItems].draw = m_draw; + ++m_numRenderItems; } - m_state.clear(); + m_draw.clear(); + m_constBegin = m_constEnd; m_flags = BGFX_STATE_NONE; return m_num; @@ -683,12 +687,14 @@ namespace bgfx } if (BGFX_CONFIG_MAX_DRAW_CALLS-1 <= m_num - || (0 == m_state.m_numVertices && 0 == m_state.m_numIndices) ) + || (0 == m_draw.m_numVertices && 0 == m_draw.m_numIndices) ) { m_numDropped += bx::uint32_cntbits(_viewMask); return m_num; } + m_constEnd = m_constantBuffer->getPos(); + BX_WARN(invalidHandle != m_key.m_program, "Program with invalid handle"); if (invalidHandle != m_key.m_program) { @@ -702,24 +708,69 @@ namespace bgfx m_key.m_view = id; m_key.m_seq = s_ctx->m_seq[id] & s_ctx->m_seqMask[id]; s_ctx->m_seq[id]++; - uint64_t key = m_key.encode(); + uint64_t key = m_key.encodeDraw(); m_sortKeys[m_num] = key; - m_sortValues[m_num] = m_numRenderStates; + m_sortValues[m_num] = m_numRenderItems; ++m_num; } - m_state.m_constEnd = m_constantBuffer->getPos(); - m_state.m_flags |= m_flags; - m_renderState[m_numRenderStates] = m_state; - ++m_numRenderStates; + m_draw.m_constBegin = m_constBegin; + m_draw.m_constEnd = m_constEnd; + m_draw.m_flags |= m_flags; + m_renderItem[m_numRenderItems].draw = m_draw; + ++m_numRenderItems; } - m_state.clear(); + m_draw.clear(); + m_constBegin = m_constEnd; m_flags = BGFX_STATE_NONE; return m_num; } + uint32_t Frame::dispatch(uint8_t _id, ProgramHandle _handle, uint16_t _numX, uint16_t _numY, uint16_t _numZ) + { + if (m_discard) + { + discard(); + return m_num; + } + + if (BGFX_CONFIG_MAX_DRAW_CALLS-1 <= m_num) + { + ++m_numDropped; + return m_num; + } + + m_constEnd = m_constantBuffer->getPos(); + + m_compute.m_numX = bx::uint16_max(_numX, 1); + m_compute.m_numY = bx::uint16_max(_numY, 1); + m_compute.m_numZ = bx::uint16_max(_numZ, 1); + m_key.m_program = _handle.idx; + if (invalidHandle != m_key.m_program) + { + m_key.m_depth = 0; + m_key.m_view = _id; + m_key.m_seq = s_ctx->m_seq[_id] & s_ctx->m_seqMask[_id]; + s_ctx->m_seq[_id]++; + uint64_t key = m_key.encodeCompute(); + m_sortKeys[m_num] = key; + m_sortValues[m_num] = m_numRenderItems; + ++m_num; + + m_compute.m_constBegin = m_constBegin; + m_compute.m_constEnd = m_constEnd; + m_renderItem[m_numRenderItems].compute = m_compute; + ++m_numRenderItems; + } + + m_compute.clear(); + m_constBegin = m_constEnd; + + return m_num; + } + void Frame::sort() { bx::radixSort64(m_sortKeys, s_ctx->m_tempKeys, m_sortValues, s_ctx->m_tempValues, m_num); @@ -821,6 +872,7 @@ namespace bgfx CAPS_FLAGS(BGFX_CAPS_RENDERER_MULTITHREADED), CAPS_FLAGS(BGFX_CAPS_FRAGMENT_DEPTH), CAPS_FLAGS(BGFX_CAPS_BLEND_INDEPENDENT), + CAPS_FLAGS(BGFX_CAPS_COMPUTE), #undef CAPS_FLAGS }; @@ -2113,6 +2165,19 @@ again: return handle; } + ProgramHandle createProgram(ShaderHandle _vsh, bool _destroyShaders) + { + BGFX_CHECK_MAIN_THREAD(); + ProgramHandle handle = s_ctx->createProgram(_vsh); + + if (_destroyShaders) + { + destroyShader(_vsh); + } + + return handle; + } + void destroyProgram(ProgramHandle _handle) { BGFX_CHECK_MAIN_THREAD(); @@ -2592,6 +2657,24 @@ again: return s_ctx->submitMask(_viewMask, _depth); } + void setImage(uint8_t _stage, UniformHandle _sampler, TextureHandle _handle, uint8_t _mip, TextureFormat::Enum _format, Access::Enum _access) + { + BGFX_CHECK_MAIN_THREAD(); + s_ctx->setImage(_stage, _sampler, _handle, _mip, _format, _access); + } + + void setImage(uint8_t _stage, UniformHandle _sampler, FrameBufferHandle _handle, uint8_t _attachment, TextureFormat::Enum _format, Access::Enum _access) + { + BGFX_CHECK_MAIN_THREAD(); + s_ctx->setImage(_stage, _sampler, _handle, _attachment, _format, _access); + } + + void dispatch(uint8_t _id, ProgramHandle _handle, uint16_t _numX, uint16_t _numY, uint16_t _numZ) + { + BGFX_CHECK_MAIN_THREAD(); + s_ctx->dispatch(_id, _handle, _numX, _numY, _numZ); + } + void discard() { BGFX_CHECK_MAIN_THREAD(); diff --git a/src/bgfx_compute.sh b/src/bgfx_compute.sh new file mode 100644 index 000000000..ca2582718 --- /dev/null +++ b/src/bgfx_compute.sh @@ -0,0 +1,121 @@ +/* + * Copyright 2011-2014 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#ifndef BGFX_COMPUTE_H_HEADER_GUARD +#define BGFX_COMPUTE_H_HEADER_GUARD + +#ifndef __cplusplus + +#if BGFX_SHADER_LANGUAGE_HLSL + +#define IMAGE2D_RO(_name, _reg) Texture2D _name : register(t[_reg]) +#define IMAGE2D_RW(_name, _reg) RWTexture2D _name : register(u[_reg]) +#define IMAGE2D_WR(_name, _reg) IMAGE2D_RW(_name, _reg) + +#define BUFFER_RO(_name, _struct, _reg) StructuredBuffer<_struct> _name : register(b[_reg]) +#define BUFFER_RW(_name, _struct, _reg) RWStructuredBuffer<_struct> _name : register(b[_reg]) +#define BUFFER_WR(_name, _struct, _reg) BUFFER_RW(_name, _struct, _reg) + +#define NUM_THREADS(_x, _y, _z) [numthreads(_x, _y, _z)] + +vec4 imageLoad(Texture2D _image, ivec2 _uv) +{ + return _image.Load(uint3(_uv.xy, 0) ); +} + +ivec2 imageSize(Texture2D _image) +{ + ivec2 result; + _image.GetDimensions(result.x, result.y); + return result; +} + +//vec4 imageLoad(RWTexture2D _image, ivec2 _uv) +//{ +// return _image[_uv]; +//} + +ivec2 imageSize(RWTexture2D _image) +{ + ivec2 result; + _image.GetDimensions(result.x, result.y); + return result; +} + +void imageStore(RWTexture2D _image, ivec2 _uv, vec4 _rgba) +{ + _image[_uv] = _rgba; +} + +#define __ATOMIC_IMPL_TYPE(_genType, _glFunc, _dxFunc) \ + _genType _glFunc(_genType _mem, _genType _data) \ + { \ + _genType result; \ + _dxFunc(_mem, _data, result); \ + return result; \ + } + +#define __ATOMIC_IMPL(_glFunc, _dxFunc) \ + __ATOMIC_IMPL_TYPE(int, _glFunc, _dxFunc) \ + __ATOMIC_IMPL_TYPE(uint, _glFunc, _dxFunc) + +__ATOMIC_IMPL(atomicAdd, InterlockedAdd); +__ATOMIC_IMPL(atomicAnd, InterlockedAnd); +__ATOMIC_IMPL(atomicExchange, InterlockedExchange); +__ATOMIC_IMPL(atomicMax, InterlockedMax); +__ATOMIC_IMPL(atomicMin, InterlockedMin); +__ATOMIC_IMPL(atomicOr, InterlockedOr); +__ATOMIC_IMPL(atomicXor, InterlockedXor); + +int atomicCompSwap(int _mem, int _compare, int _data) +{ + int result; + InterlockedCompareExchange(_mem, _compare, _data, result); + return result; +} + +uint atomicCompSwap(uint _mem, uint _compare, uint _data) +{ + uint result; + InterlockedCompareExchange(_mem, _compare, _data, result); + return result; +} + +// InterlockedCompareStore + +#define barrier() GroupMemoryBarrierWithGroupSync() +#define memoryBarrier() GroupMemoryBarrierWithGroupSync() +#define memoryBarrierAtomicCounter() GroupMemoryBarrierWithGroupSync() +#define memoryBarrierBuffer() GroupMemoryBarrierWithGroupSync() +#define memoryBarrierImage() GroupMemoryBarrierWithGroupSync() +#define memoryBarrierShared() GroupMemoryBarrierWithGroupSync() +#define groupMemoryBarrier() GroupMemoryBarrierWithGroupSync() + +#else + +#define __IMAGE2D_XX(_name, _reg, _access) \ + layout(rgba8, binding=_reg) _access uniform highp image2D _name + +#define IMAGE2D_RO(_name, _reg) __IMAGE2D_XX(_name, _reg, readonly) +#define IMAGE2D_RW(_name, _reg) __IMAGE2D_XX(_name, _reg, readwrite) +#define IMAGE2D_WR(_name, _reg) __IMAGE2D_XX(_name, _reg, writeonly) + +#define __BUFFER_XX(_name, _type, _reg, _access) \ + layout(std430, binding=_reg) _access buffer _name ## Buffer \ + { \ + _type _name[]; \ + } + +#define BUFFER_RO(_name, _type, _reg) __BUFFER_XX(_name, _type, _reg, readonly) +#define BUFFER_RW(_name, _type, _reg) __BUFFER_XX(_name, _type, _reg, readwrite) +#define BUFFER_WR(_name, _type, _reg) __BUFFER_XX(_name, _type, _reg, writeonly) + +#define NUM_THREADS(_x, _y, _z) layout (local_size_x = _x, local_size_y = _y, local_size_z = _z) in; + +#endif // BGFX_SHADER_LANGUAGE_HLSL + +#endif // __cplusplus + +#endif // BGFX_COMPUTE_H_HEADER_GUARD diff --git a/src/bgfx_p.h b/src/bgfx_p.h index 184f8ff3d..cd8e9305b 100644 --- a/src/bgfx_p.h +++ b/src/bgfx_p.h @@ -138,6 +138,8 @@ namespace stl #define BGFX_STATE_TEX_MASK UINT64_C(0xff00000000000000) #define BGFX_STATE_TEX_COUNT 8 +#define BGFX_MAX_COMPUTE_BINDINGS 8 + #define BGFX_SAMPLER_DEFAULT_FLAGS UINT32_C(0x10000000) #define BGFX_RENDERER_DIRECT3D9_NAME "Direct3D 9" @@ -623,32 +625,60 @@ namespace bgfx void operator=(const CommandBuffer&); }; +#define SORT_KEY_RENDER_DRAW UINT64_C(0x0000000800000000) struct SortKey { - uint64_t encode() + uint64_t encodeDraw() { // | 3 2 1 0| // |fedcba9876543210fedcba9876543210fedcba9876543210fedcba9876543210| - // | vvvvvsssssssssssttmmmmmmmmmdddddddddddddddddddddddd| - // | ^ ^ ^ ^ ^| - // | | | | | || + // | vvvvvsssssssssssdttpppppppppdddddddddddddddddddddddd| + // | ^ ^^ ^ ^ ^| + // | | || | | || + // | view-+ seq-+| +-trans +-program depth-+| + // | +-draw | - const uint64_t tmp0 = m_depth; - const uint64_t tmp1 = uint64_t(m_program)<<0x18; - const uint64_t tmp2 = uint64_t(m_trans )<<0x21; - const uint64_t tmp3 = uint64_t(m_seq )<<0x23; - const uint64_t tmp4 = uint64_t(m_view )<<0x2e; - const uint64_t key = tmp0|tmp1|tmp2|tmp3|tmp4; + const uint64_t depth = m_depth; + const uint64_t program = uint64_t(m_program)<<0x18; + const uint64_t trans = uint64_t(m_trans )<<0x21; + const uint64_t seq = uint64_t(m_seq )<<0x24; + const uint64_t view = uint64_t(m_view )<<0x2f; + const uint64_t key = depth|program|trans|SORT_KEY_RENDER_DRAW|seq|view; return key; } - void decode(uint64_t _key) + uint64_t encodeCompute() { - m_depth = _key & 0xffffffff; - m_program = (_key>>0x18)&(BGFX_CONFIG_MAX_PROGRAMS-1); - m_trans = (_key>>0x21)& 0x3; - m_seq = (_key>>0x23)& 0x7ff; - m_view = (_key>>0x2e)&(BGFX_CONFIG_MAX_VIEWS-1); + // | 3 2 1 0| + // |fedcba9876543210fedcba9876543210fedcba9876543210fedcba9876543210| + // | vvvvvsssssssssssdppppppppp | + // | ^ ^^ ^ | + // | | || | | + // | view-+ seq-+| +-program | + // | +-draw | + + const uint64_t program = uint64_t(m_program)<<0x1a; + const uint64_t seq = uint64_t(m_seq )<<0x24; + const uint64_t view = uint64_t(m_view )<<0x2f; + const uint64_t key = program|seq|view; + return key; + } + + /// Returns true if item is command. + bool decode(uint64_t _key) + { + m_seq = (_key>>0x24)& 0x7ff; + m_view = (_key>>0x2f)&(BGFX_CONFIG_MAX_VIEWS-1); + if (_key & SORT_KEY_RENDER_DRAW) + { + m_depth = _key & 0xffffffff; + m_program = (_key>>0x18)&(BGFX_CONFIG_MAX_PROGRAMS-1); + m_trans = (_key>>0x21)& 0x3; + return false; // draw + } + + m_program = (_key>>0x1a)&(BGFX_CONFIG_MAX_PROGRAMS-1); + return true; // compute } void reset() @@ -660,12 +690,13 @@ namespace bgfx m_trans = 0; } - int32_t m_depth; + int32_t m_depth; uint16_t m_program; uint16_t m_seq; - uint8_t m_view; - uint8_t m_trans; + uint8_t m_view; + uint8_t m_trans; }; +#undef SORT_KEY_CMD BX_ALIGN_STRUCT_16(struct) Matrix4 { @@ -749,12 +780,6 @@ namespace bgfx uint32_t m_num; }; - struct Sampler - { - uint32_t m_flags; - uint16_t m_idx; - }; - #define CONSTANT_OPCODE_TYPE_SHIFT 27 #define CONSTANT_OPCODE_TYPE_MASK UINT32_C(0xf8000000) #define CONSTANT_OPCODE_LOC_SHIFT 11 @@ -934,17 +959,18 @@ namespace bgfx UniformHashMap m_uniforms; }; - struct RenderState + struct Sampler { - void reset() - { - m_constEnd = 0; - clear(); - } + uint32_t m_flags; + uint16_t m_idx; + }; + struct RenderDraw + { void clear() { - m_constBegin = m_constEnd; + m_constBegin = 0; + m_constEnd = 0; m_flags = BGFX_STATE_DEFAULT; m_stencil = packStencil(BGFX_STENCIL_DEFAULT, BGFX_STENCIL_DEFAULT); m_rgba = 0; @@ -987,12 +1013,61 @@ namespace bgfx uint16_t m_scissor; VertexBufferHandle m_vertexBuffer; - VertexDeclHandle m_vertexDecl; - IndexBufferHandle m_indexBuffer; + VertexDeclHandle m_vertexDecl; + IndexBufferHandle m_indexBuffer; VertexBufferHandle m_instanceDataBuffer; Sampler m_sampler[BGFX_STATE_TEX_COUNT]; }; + struct ComputeBinding + { + enum Enum + { + Image, + Buffer, + + Count + }; + + uint16_t m_idx; + uint8_t m_format; + uint8_t m_access; + uint8_t m_mip; + uint8_t m_type; + }; + + struct RenderCompute + { + void clear() + { + m_constBegin = 0; + m_constEnd = 0; + m_numX = 0; + m_numY = 0; + m_numZ = 0; + + for (uint32_t ii = 0; ii < BGFX_MAX_COMPUTE_BINDINGS; ++ii) + { + m_bind[ii].m_idx = invalidHandle; + } + } + + uint32_t m_constBegin; + uint32_t m_constEnd; + + uint16_t m_numX; + uint16_t m_numY; + uint16_t m_numZ; + + ComputeBinding m_bind[BGFX_MAX_COMPUTE_BINDINGS]; + }; + + union RenderItem + { + RenderDraw draw; + RenderCompute compute; + }; + struct Resolution { Resolution() @@ -1063,12 +1138,15 @@ namespace bgfx void start() { m_flags = BGFX_STATE_NONE; - m_state.reset(); + m_constBegin = 0; + m_constEnd = 0; + m_draw.clear(); + m_compute.clear(); m_matrixCache.reset(); m_rectCache.reset(); m_key.reset(); m_num = 0; - m_numRenderStates = 0; + m_numRenderItems = 0; m_numDropped = 0; m_iboffset = 0; m_vboffset = 0; @@ -1105,86 +1183,86 @@ namespace bgfx uint8_t blend = ( (_state&BGFX_STATE_BLEND_MASK)>>BGFX_STATE_BLEND_SHIFT)&0xff; // transparency sort order table m_key.m_trans = "\x0\x1\x1\x2\x2\x1\x2\x1\x2\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1"[( (blend)&0xf) + (!!blend)]; - m_state.m_flags = _state; - m_state.m_rgba = _rgba; + m_draw.m_flags = _state; + m_draw.m_rgba = _rgba; } void setStencil(uint32_t _fstencil, uint32_t _bstencil) { - m_state.m_stencil = packStencil(_fstencil, _bstencil); + m_draw.m_stencil = packStencil(_fstencil, _bstencil); } uint16_t setScissor(uint16_t _x, uint16_t _y, uint16_t _width, uint16_t _height) { uint16_t scissor = (uint16_t)m_rectCache.add(_x, _y, _width, _height); - m_state.m_scissor = scissor; + m_draw.m_scissor = scissor; return scissor; } void setScissor(uint16_t _cache) { - m_state.m_scissor = _cache; + m_draw.m_scissor = _cache; } uint32_t setTransform(const void* _mtx, uint16_t _num) { - m_state.m_matrix = m_matrixCache.add(_mtx, _num); - m_state.m_num = _num; + m_draw.m_matrix = m_matrixCache.add(_mtx, _num); + m_draw.m_num = _num; - return m_state.m_matrix; + return m_draw.m_matrix; } void setTransform(uint32_t _cache, uint16_t _num) { - m_state.m_matrix = _cache; - m_state.m_num = _num; + m_draw.m_matrix = _cache; + m_draw.m_num = _num; } void setIndexBuffer(IndexBufferHandle _handle, uint32_t _firstIndex, uint32_t _numIndices) { - m_state.m_startIndex = _firstIndex; - m_state.m_numIndices = _numIndices; - m_state.m_indexBuffer = _handle; + m_draw.m_startIndex = _firstIndex; + m_draw.m_numIndices = _numIndices; + m_draw.m_indexBuffer = _handle; } void setIndexBuffer(const TransientIndexBuffer* _tib, uint32_t _firstIndex, uint32_t _numIndices) { - m_state.m_indexBuffer = _tib->handle; - m_state.m_startIndex = _firstIndex; - m_state.m_numIndices = _numIndices; + m_draw.m_indexBuffer = _tib->handle; + m_draw.m_startIndex = _firstIndex; + m_draw.m_numIndices = _numIndices; m_discard = 0 == _numIndices; } void setVertexBuffer(VertexBufferHandle _handle, uint32_t _startVertex, uint32_t _numVertices) { BX_CHECK(_handle.idx < BGFX_CONFIG_MAX_VERTEX_BUFFERS, "Invalid vertex buffer handle. %d (< %d)", _handle.idx, BGFX_CONFIG_MAX_VERTEX_BUFFERS); - m_state.m_startVertex = _startVertex; - m_state.m_numVertices = _numVertices; - m_state.m_vertexBuffer = _handle; + m_draw.m_startVertex = _startVertex; + m_draw.m_numVertices = _numVertices; + m_draw.m_vertexBuffer = _handle; } void setVertexBuffer(const DynamicVertexBuffer& _dvb, uint32_t _numVertices) { - m_state.m_startVertex = _dvb.m_startVertex; - m_state.m_numVertices = bx::uint32_min(_dvb.m_numVertices, _numVertices); - m_state.m_vertexBuffer = _dvb.m_handle; - m_state.m_vertexDecl = _dvb.m_decl; + m_draw.m_startVertex = _dvb.m_startVertex; + m_draw.m_numVertices = bx::uint32_min(_dvb.m_numVertices, _numVertices); + m_draw.m_vertexBuffer = _dvb.m_handle; + m_draw.m_vertexDecl = _dvb.m_decl; } void setVertexBuffer(const TransientVertexBuffer* _tvb, uint32_t _startVertex, uint32_t _numVertices) { - m_state.m_startVertex = _startVertex; - m_state.m_numVertices = bx::uint32_min(_tvb->size/_tvb->stride, _numVertices); - m_state.m_vertexBuffer = _tvb->handle; - m_state.m_vertexDecl = _tvb->decl; + m_draw.m_startVertex = _startVertex; + m_draw.m_numVertices = bx::uint32_min(_tvb->size/_tvb->stride, _numVertices); + m_draw.m_vertexBuffer = _tvb->handle; + m_draw.m_vertexDecl = _tvb->decl; } void setInstanceDataBuffer(const InstanceDataBuffer* _idb, uint16_t _num) { - m_state.m_instanceDataOffset = _idb->offset; - m_state.m_instanceDataStride = _idb->stride; - m_state.m_numInstances = bx::uint16_min( (uint16_t)_idb->num, _num); - m_state.m_instanceDataBuffer = _idb->handle; + m_draw.m_instanceDataOffset = _idb->offset; + m_draw.m_instanceDataStride = _idb->stride; + m_draw.m_numInstances = bx::uint16_min( (uint16_t)_idb->num, _num); + m_draw.m_instanceDataBuffer = _idb->handle; BX_FREE(g_allocator, const_cast(_idb) ); } @@ -1197,7 +1275,7 @@ namespace bgfx void setTexture(uint8_t _stage, UniformHandle _sampler, TextureHandle _handle, uint32_t _flags) { m_flags |= BGFX_STATE_TEX0<<_stage; - Sampler& sampler = m_state.m_sampler[_stage]; + Sampler& sampler = m_draw.m_sampler[_stage]; sampler.m_idx = _handle.idx; sampler.m_flags = (_flags&BGFX_SAMPLER_DEFAULT_FLAGS) ? BGFX_SAMPLER_DEFAULT_FLAGS : _flags; @@ -1209,15 +1287,34 @@ namespace bgfx } } + void setImage(uint8_t _stage, UniformHandle _sampler, TextureHandle _handle, uint8_t _mip, TextureFormat::Enum _format, Access::Enum _access) + { + ComputeBinding& bind = m_compute.m_bind[_stage]; + bind.m_idx = _handle.idx; + bind.m_format = uint8_t(_format); + bind.m_access = uint8_t(_access); + bind.m_mip = _mip; + bind.m_type = uint8_t(ComputeBinding::Image); + + if (isValid(_sampler) + && (BX_ENABLED(BGFX_CONFIG_RENDERER_OPENGL) || BX_ENABLED(BGFX_CONFIG_RENDERER_OPENGLES) ) ) + { + uint32_t stage = _stage; + setUniform(_sampler, &stage); + } + } + void discard() { m_discard = false; - m_state.clear(); + m_draw.clear(); + m_compute.clear(); m_flags = BGFX_STATE_NONE; } uint32_t submit(uint8_t _id, int32_t _depth); uint32_t submitMask(uint32_t _viewMask, int32_t _depth); + uint32_t dispatch(uint8_t _id, ProgramHandle _handle, uint16_t _ngx, uint16_t _ngy, uint16_t _ngz); void sort(); bool checkAvailTransientIndexBuffer(uint32_t _num) @@ -1332,14 +1429,17 @@ namespace bgfx uint64_t m_sortKeys[BGFX_CONFIG_MAX_DRAW_CALLS]; uint16_t m_sortValues[BGFX_CONFIG_MAX_DRAW_CALLS]; - RenderState m_renderState[BGFX_CONFIG_MAX_DRAW_CALLS]; - RenderState m_state; + RenderItem m_renderItem[BGFX_CONFIG_MAX_DRAW_CALLS]; + RenderDraw m_draw; + RenderCompute m_compute; uint64_t m_flags; + uint32_t m_constBegin; + uint32_t m_constEnd; ConstantBuffer* m_constantBuffer; uint16_t m_num; - uint16_t m_numRenderStates; + uint16_t m_numRenderItems; uint16_t m_numDropped; MatrixCache m_matrixCache; @@ -2213,6 +2313,35 @@ namespace bgfx return handle; } + BGFX_API_FUNC(ProgramHandle createProgram(ShaderHandle _vsh) ) + { + if (!isValid(_vsh) ) + { + BX_WARN(false, "Vertex/fragment shader is invalid (vsh %d).", _vsh.idx); + ProgramHandle invalid = BGFX_INVALID_HANDLE; + return invalid; + } + + ProgramHandle handle; + handle.idx = m_programHandle.alloc(); + + BX_WARN(isValid(handle), "Failed to allocate program handle."); + if (isValid(handle) ) + { + shaderIncRef(_vsh); + m_programRef[handle.idx].m_vsh = _vsh; + + CommandBuffer& cmdbuf = getCommandBuffer(CommandBuffer::CreateProgram); + cmdbuf.write(handle); + cmdbuf.write(_vsh); + + ShaderHandle invalid = BGFX_INVALID_HANDLE; + cmdbuf.write(invalid); + } + + return handle; + } + BGFX_API_FUNC(void destroyProgram(ProgramHandle _handle) ) { CommandBuffer& cmdbuf = getCommandBuffer(CommandBuffer::DestroyProgram); @@ -2687,6 +2816,29 @@ namespace bgfx return m_submit->submitMask(_viewMask, _depth); } + BGFX_API_FUNC(void setImage(uint8_t _stage, UniformHandle _sampler, TextureHandle _handle, uint8_t _mip, TextureFormat::Enum _format, Access::Enum _access) ) + { + m_submit->setImage(_stage, _sampler, _handle, _mip, _format, _access); + } + + BGFX_API_FUNC(void setImage(uint8_t _stage, UniformHandle _sampler, FrameBufferHandle _handle, uint8_t _attachment, TextureFormat::Enum _format, Access::Enum _access) ) + { + BX_CHECK(_attachment < g_caps.maxFBAttachments, "Frame buffer attachment index %d is invalid.", _attachment); + TextureHandle textureHandle = BGFX_INVALID_HANDLE; + if (isValid(_handle) ) + { + textureHandle = m_frameBufferRef[_handle.idx].m_th[_attachment]; + BX_CHECK(isValid(textureHandle), "Frame buffer texture %d is invalid.", _attachment); + } + + setImage(_stage, _sampler, textureHandle, 0, _format, _access); + } + + BGFX_API_FUNC(uint32_t dispatch(uint8_t _id, ProgramHandle _handle, uint16_t _numX, uint16_t _numY, uint16_t _numZ) ) + { + return m_submit->dispatch(_id, _handle, _numX, _numY, _numZ); + } + BGFX_API_FUNC(void discard() ) { m_submit->discard(); diff --git a/src/config.h b/src/config.h index fb81c5d36..3bc0bd3da 100644 --- a/src/config.h +++ b/src/config.h @@ -135,6 +135,7 @@ #endif // BGFX_CONFIG_MAX_RECT_CACHE #ifndef BGFX_CONFIG_MAX_VIEWS +// Do not change. Must be power of 2. # define BGFX_CONFIG_MAX_VIEWS 32 #endif // BGFX_CONFIG_MAX_VIEWS @@ -171,6 +172,7 @@ #endif // BGFX_CONFIG_MAX_FRAGMENT_SHADERS #ifndef BGFX_CONFIG_MAX_PROGRAMS +// Must be power of 2. # define BGFX_CONFIG_MAX_PROGRAMS 512 #endif // BGFX_CONFIG_MAX_PROGRAMS diff --git a/src/glcontext_egl.cpp b/src/glcontext_egl.cpp index fe63fb6e8..5a084f796 100644 --- a/src/glcontext_egl.cpp +++ b/src/glcontext_egl.cpp @@ -10,6 +10,14 @@ # if BGFX_USE_EGL +#ifndef EGL_CONTEXT_MAJOR_VERSION_KHR +# define EGL_CONTEXT_MAJOR_VERSION_KHR EGL_CONTEXT_CLIENT_VERSION +#endif // EGL_CONTEXT_MAJOR_VERSION_KHR + +#ifndef EGL_CONTEXT_MINOR_VERSION_KHR +# define EGL_CONTEXT_MINOR_VERSION_KHR 0x30FB +#endif // EGL_CONTEXT_MINOR_VERSION_KHR + namespace bgfx { #if BGFX_USE_GL_DYNAMIC_LIB @@ -137,9 +145,15 @@ EGL_IMPORT EGLint contextAttrs[] = { # if BGFX_CONFIG_RENDERER_OPENGLES >= 30 - EGL_CONTEXT_CLIENT_VERSION, 3, + EGL_CONTEXT_MAJOR_VERSION_KHR, 3, +# if BGFX_CONFIG_RENDERER_OPENGLES >= 31 + EGL_CONTEXT_MINOR_VERSION_KHR, 1, +# else +// EGL_CONTEXT_MINOR_VERSION_KHR, 0, +# endif // BGFX_CONFIG_RENDERER_OPENGLES >= 31 # elif BGFX_CONFIG_RENDERER_OPENGLES - EGL_CONTEXT_CLIENT_VERSION, 2, + EGL_CONTEXT_MAJOR_VERSION_KHR, 2, +// EGL_CONTEXT_MINOR_VERSION_KHR, 0, # endif // BGFX_CONFIG_RENDERER_ EGL_NONE diff --git a/src/glimports.h b/src/glimports.h index dc1806641..e6a2be6bb 100644 --- a/src/glimports.h +++ b/src/glimports.h @@ -32,13 +32,15 @@ #if GL_IMPORT_TYPEDEFS typedef void (GL_APIENTRYP GLDEBUGPROC)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,const void *userParam); - typedef void (GL_APIENTRYP PFNGLACTIVETEXTUREPROC) (GLenum texture); typedef void (GL_APIENTRYP PFNGLATTACHSHADERPROC) (GLuint program, GLuint shader); typedef void (GL_APIENTRYP PFNGLBEGINQUERYPROC) (GLenum target, GLuint id); typedef void (GL_APIENTRYP PFNGLBINDBUFFERPROC) (GLenum target, GLuint buffer); +typedef void (GL_APIENTRYP PFNGLBINDBUFFERBASEPROC) (GLenum target, GLuint index, GLuint buffer); +typedef void (GL_APIENTRYP PFNGLBINDBUFFERRANGEPROC) (GLenum target, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size); typedef void (GL_APIENTRYP PFNGLBINDFRAGDATALOCATIONPROC) (GLuint program, GLuint color, const GLchar *name); typedef void (GL_APIENTRYP PFNGLBINDFRAMEBUFFERPROC) (GLenum target, GLuint framebuffer); +typedef void (GL_APIENTRYP PFNGLBINDIMAGETEXTUREPROC) (GLuint unit, GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum access, GLenum format); typedef void (GL_APIENTRYP PFNGLBINDRENDERBUFFERPROC) (GLenum target, GLuint renderbuffer); typedef void (GL_APIENTRYP PFNGLBINDSAMPLERPROC) (GLuint unit, GLuint sampler); typedef void (GL_APIENTRYP PFNGLBINDTEXTUREPROC) (GLenum target, GLuint texture); @@ -88,6 +90,8 @@ typedef void (GL_APIENTRYP PFNGLDETACHSHADERPROC) (GLuint program, GLu typedef void (GL_APIENTRYP PFNGLDISABLEPROC) (GLenum cap); typedef void (GL_APIENTRYP PFNGLDISABLEIPROC) (GLenum cap, GLuint index); typedef void (GL_APIENTRYP PFNGLDISABLEVERTEXATTRIBARRAYPROC) (GLuint index); +typedef void (GL_APIENTRYP PFNGLDISPATCHCOMPUTEPROC) (GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z); +typedef void (GL_APIENTRYP PFNGLDISPATCHCOMPUTEINDIRECTPROC) (GLintptr indirect); typedef void (GL_APIENTRYP PFNGLDRAWARRAYSPROC) (GLenum mode, GLint first, GLsizei count); typedef void (GL_APIENTRYP PFNGLDRAWARRAYSINSTANCEDPROC) (GLenum mode, GLint first, GLsizei count, GLsizei instancecount); typedef void (GL_APIENTRYP PFNGLDRAWBUFFERPROC) (GLenum mode); @@ -120,6 +124,12 @@ typedef void (GL_APIENTRYP PFNGLGETPOINTERVPROC) (GLenum pname, void * typedef void (GL_APIENTRYP PFNGLGETPROGRAMBINARYPROC) (GLuint program, GLsizei bufSize, GLsizei *length, GLenum *binaryFormat, void *binary); typedef void (GL_APIENTRYP PFNGLGETPROGRAMINFOLOGPROC) (GLuint program, GLsizei bufSize, GLsizei *length, GLchar *infoLog); typedef void (GL_APIENTRYP PFNGLGETPROGRAMIVPROC) (GLuint program, GLenum pname, GLint *params); +typedef void (GL_APIENTRYP PFNGLGETPROGRAMINTERFACEIVPROC) (GLuint program, GLenum programInterface, GLenum pname, GLint *params); +typedef GLuint (GL_APIENTRYP PFNGLGETPROGRAMRESOURCEINDEXPROC) (GLuint program, GLenum programInterface, const GLchar *name); +typedef void (GL_APIENTRYP PFNGLGETPROGRAMRESOURCEIVPROC) (GLuint program, GLenum programInterface, GLuint index, GLsizei propCount, const GLenum *props, GLsizei bufSize, GLsizei *length, GLint *params); +typedef void (GL_APIENTRYP PFNGLGETPROGRAMRESOURCENAMEPROC) (GLuint program, GLenum programInterface, GLuint index, GLsizei bufSize, GLsizei *length, GLchar *name); +typedef GLint (GL_APIENTRYP PFNGLGETPROGRAMRESOURCELOCATIONPROC) (GLuint program, GLenum programInterface, const GLchar *name); +typedef GLint (GL_APIENTRYP PFNGLGETPROGRAMRESOURCELOCATIONINDEXPROC) (GLuint program, GLenum programInterface, const GLchar *name); typedef void (GL_APIENTRYP PFNGLGETQUERYIVPROC) (GLenum target, GLenum pname, GLint *params); typedef void (GL_APIENTRYP PFNGLGETQUERYOBJECTIVPROC) (GLuint id, GLenum pname, GLint *params); typedef void (GL_APIENTRYP PFNGLGETQUERYOBJECTI64VPROC) (GLuint id, GLenum pname, GLint64 *params); @@ -130,6 +140,7 @@ typedef void (GL_APIENTRYP PFNGLGETSHADERIVPROC) (GLuint shader, GLenu typedef const GLubyte* (GL_APIENTRYP PFNGLGETSTRINGPROC) (GLenum name); typedef GLint (GL_APIENTRYP PFNGLGETUNIFORMLOCATIONPROC) (GLuint program, const GLchar *name); typedef void (GL_APIENTRYP PFNGLLINKPROGRAMPROC) (GLuint program); +typedef void (GL_APIENTRYP PFNGLMEMORYBARRIERPROC) (GLbitfield barriers); typedef void (GL_APIENTRYP PFNGLOBJECTLABELPROC) (GLenum identifier, GLuint name, GLsizei length, const GLchar *label); typedef void (GL_APIENTRYP PFNGLOBJECTPTRLABELPROC) (const void *ptr, GLsizei length, const GLchar *label); typedef void (GL_APIENTRYP PFNGLPIXELSTOREIPROC) (GLenum pname, GLint param); @@ -194,8 +205,11 @@ GL_IMPORT______(false, PFNGLACTIVETEXTUREPROC, glActiveTextu GL_IMPORT______(false, PFNGLATTACHSHADERPROC, glAttachShader); GL_IMPORT______(true, PFNGLBEGINQUERYPROC, glBeginQuery); GL_IMPORT______(false, PFNGLBINDBUFFERPROC, glBindBuffer); +GL_IMPORT______(true, PFNGLBINDBUFFERBASEPROC, glBindBufferBase); +GL_IMPORT______(true, PFNGLBINDBUFFERRANGEPROC, glBindBufferRange); GL_IMPORT______(true, PFNGLBINDFRAGDATALOCATIONPROC, glBindFragDataLocation); GL_IMPORT______(true, PFNGLBINDFRAMEBUFFERPROC, glBindFramebuffer); +GL_IMPORT______(true, PFNGLBINDIMAGETEXTUREPROC, glBindImageTexture); GL_IMPORT______(true, PFNGLBINDRENDERBUFFERPROC, glBindRenderbuffer); GL_IMPORT______(true, PFNGLBINDSAMPLERPROC, glBindSampler); GL_IMPORT______(false, PFNGLBINDTEXTUREPROC, glBindTexture); @@ -243,6 +257,8 @@ GL_IMPORT______(false, PFNGLDETACHSHADERPROC, glDetachShade GL_IMPORT______(false, PFNGLDISABLEPROC, glDisable); GL_IMPORT______(true, PFNGLDISABLEIPROC, glDisablei); GL_IMPORT______(false, PFNGLDISABLEVERTEXATTRIBARRAYPROC, glDisableVertexAttribArray); +GL_IMPORT______(true, PFNGLDISPATCHCOMPUTEPROC, glDispatchCompute); +GL_IMPORT______(true, PFNGLDISPATCHCOMPUTEINDIRECTPROC, glDispatchComputeIndirect); GL_IMPORT______(false, PFNGLDRAWARRAYSPROC, glDrawArrays); GL_IMPORT______(true, PFNGLDRAWARRAYSINSTANCEDPROC, glDrawArraysInstanced); GL_IMPORT______(true, PFNGLDRAWBUFFERPROC, glDrawBuffer); @@ -275,6 +291,12 @@ GL_IMPORT______(true, PFNGLGETPOINTERVPROC, glGetPointerv GL_IMPORT______(true, PFNGLGETPROGRAMBINARYPROC, glGetProgramBinary); GL_IMPORT______(false, PFNGLGETPROGRAMIVPROC, glGetProgramiv); GL_IMPORT______(false, PFNGLGETPROGRAMINFOLOGPROC, glGetProgramInfoLog); +GL_IMPORT______(true, PFNGLGETPROGRAMINTERFACEIVPROC, glGetProgramInterfaceiv); +GL_IMPORT______(true, PFNGLGETPROGRAMRESOURCEINDEXPROC, glGetProgramResourceIndex); +GL_IMPORT______(true, PFNGLGETPROGRAMRESOURCEIVPROC, glGetProgramResourceiv); +GL_IMPORT______(true, PFNGLGETPROGRAMRESOURCENAMEPROC, glGetProgramResourceName); +GL_IMPORT______(true, PFNGLGETPROGRAMRESOURCELOCATIONPROC, glGetProgramResourceLocation); +GL_IMPORT______(true, PFNGLGETPROGRAMRESOURCELOCATIONINDEXPROC, glGetProgramResourceLocationIndex); GL_IMPORT______(true, PFNGLGETQUERYIVPROC, glGetQueryiv); GL_IMPORT______(true, PFNGLGETQUERYOBJECTIVPROC, glGetQueryObjectiv); GL_IMPORT______(true, PFNGLGETQUERYOBJECTI64VPROC, glGetQueryObjecti64v); @@ -285,6 +307,7 @@ GL_IMPORT______(false, PFNGLGETSHADERINFOLOGPROC, glGetShaderIn GL_IMPORT______(false, PFNGLGETSTRINGPROC, glGetString); GL_IMPORT______(false, PFNGLGETUNIFORMLOCATIONPROC, glGetUniformLocation); GL_IMPORT______(false, PFNGLLINKPROGRAMPROC, glLinkProgram); +GL_IMPORT______(true, PFNGLMEMORYBARRIERPROC, glMemoryBarrier); GL_IMPORT______(true, PFNGLOBJECTLABELPROC, glObjectLabel); GL_IMPORT______(true, PFNGLOBJECTPTRLABELPROC, glObjectPtrLabel); GL_IMPORT______(false, PFNGLPIXELSTOREIPROC, glPixelStorei); @@ -430,6 +453,19 @@ GL_IMPORT_____x(true, PFNGLBINDSAMPLERPROC, glBindSampler GL_IMPORT_____x(true, PFNGLSAMPLERPARAMETERFPROC, glSamplerParameterf); GL_IMPORT_____x(true, PFNGLSAMPLERPARAMETERIPROC, glSamplerParameteri); +GL_IMPORT_____x(true, PFNGLBINDBUFFERBASEPROC, glBindBufferBase); +GL_IMPORT_____x(true, PFNGLBINDBUFFERRANGEPROC, glBindBufferRange); +GL_IMPORT_____x(true, PFNGLBINDIMAGETEXTUREPROC, glBindImageTexture); +GL_IMPORT_____x(true, PFNGLGETPROGRAMINTERFACEIVPROC, glGetProgramInterfaceiv); +GL_IMPORT_____x(true, PFNGLGETPROGRAMRESOURCEINDEXPROC, glGetProgramResourceIndex); +GL_IMPORT_____x(true, PFNGLGETPROGRAMRESOURCEIVPROC, glGetProgramResourceiv); +GL_IMPORT_____x(true, PFNGLGETPROGRAMRESOURCENAMEPROC, glGetProgramResourceName); +GL_IMPORT_____x(true, PFNGLGETPROGRAMRESOURCELOCATIONPROC, glGetProgramResourceLocation); +GL_IMPORT_____x(true, PFNGLGETPROGRAMRESOURCELOCATIONINDEXPROC, glGetProgramResourceLocationIndex); +GL_IMPORT_____x(true, PFNGLMEMORYBARRIERPROC, glMemoryBarrier); +GL_IMPORT_____x(true, PFNGLDISPATCHCOMPUTEPROC, glDispatchCompute); +GL_IMPORT_____x(true, PFNGLDISPATCHCOMPUTEINDIRECTPROC, glDispatchComputeIndirect); + GL_IMPORT_NV___(true, PFNGLDRAWBUFFERSPROC, glDrawBuffers); GL_IMPORT_NV___(true, PFNGLGENQUERIESPROC, glGenQueries); GL_IMPORT_NV___(true, PFNGLDELETEQUERIESPROC, glDeleteQueries); diff --git a/src/image.cpp b/src/image.cpp index 13439a18a..48c72f9b4 100644 --- a/src/image.cpp +++ b/src/image.cpp @@ -10,7 +10,7 @@ namespace bgfx { - static const ImageBlockInfo s_imageBlockInfo[TextureFormat::Count] = + static const ImageBlockInfo s_imageBlockInfo[] = { { 4, 4, 4, 8 }, // BC1 { 8, 4, 4, 16 }, // BC2 @@ -48,8 +48,9 @@ namespace bgfx { 32, 1, 1, 4 }, // D32F { 8, 1, 1, 1 }, // D0S8 }; + BX_STATIC_ASSERT(TextureFormat::Count == BX_COUNTOF(s_imageBlockInfo) ); - static const char* s_textureFormatName[TextureFormat::Count] = + static const char* s_textureFormatName[] = { "BC1", // BC1 "BC2", // BC2 @@ -87,6 +88,7 @@ namespace bgfx "D32F", // D32F "D0S8", // D0S8 }; + BX_STATIC_ASSERT(TextureFormat::Count == BX_COUNTOF(s_textureFormatName) ); bool isCompressed(TextureFormat::Enum _format) { diff --git a/src/renderer_d3d11.cpp b/src/renderer_d3d11.cpp index 99e37916b..c23728953 100644 --- a/src/renderer_d3d11.cpp +++ b/src/renderer_d3d11.cpp @@ -197,7 +197,7 @@ namespace bgfx # define DXGI_FORMAT_B4G4R4A4_UNORM DXGI_FORMAT(115) #endif // DXGI_FORMAT_B4G4R4A4_UNORM - static const TextureFormatInfo s_textureFormat[TextureFormat::Count] = + static const TextureFormatInfo s_textureFormat[] = { { DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_UNKNOWN }, // BC1 { DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_UNKNOWN }, // BC2 @@ -235,6 +235,7 @@ namespace bgfx { DXGI_FORMAT_R32_TYPELESS, DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_D32_FLOAT }, // D32F { DXGI_FORMAT_R24G8_TYPELESS, DXGI_FORMAT_R24_UNORM_X8_TYPELESS, DXGI_FORMAT_D24_UNORM_S8_UINT }, // D0S8 }; + BX_STATIC_ASSERT(TextureFormat::Count == BX_COUNTOF(s_textureFormat) ); static const D3D11_INPUT_ELEMENT_DESC s_attrib[Attrib::Count] = { @@ -548,6 +549,7 @@ namespace bgfx | BGFX_CAPS_VERTEX_ATTRIB_HALF | BGFX_CAPS_FRAGMENT_DEPTH | BGFX_CAPS_BLEND_INDEPENDENT + | BGFX_CAPS_COMPUTE ); g_caps.maxTextureSize = D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION; g_caps.maxFBAttachments = bx::uint32_min(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS); @@ -677,7 +679,7 @@ namespace bgfx void createProgram(ProgramHandle _handle, ShaderHandle _vsh, ShaderHandle _fsh) BX_OVERRIDE { - m_program[_handle.idx].create(m_shaders[_vsh.idx], m_shaders[_fsh.idx]); + m_program[_handle.idx].create(&m_shaders[_vsh.idx], isValid(_fsh) ? &m_shaders[_fsh.idx] : NULL); } void destroyProgram(ProgramHandle _handle) BX_OVERRIDE @@ -847,9 +849,9 @@ namespace bgfx ProgramD3D11& program = m_program[_blitter.m_program.idx]; m_currentProgram = &program; - deviceCtx->VSSetShader( (ID3D11VertexShader*)program.m_vsh->m_ptr, NULL, 0); + deviceCtx->VSSetShader(program.m_vsh->m_vertexShader, NULL, 0); deviceCtx->VSSetConstantBuffers(0, 1, &program.m_vsh->m_buffer); - deviceCtx->PSSetShader( (ID3D11PixelShader*)program.m_fsh->m_ptr, NULL, 0); + deviceCtx->PSSetShader(program.m_fsh->m_pixelShader, NULL, 0); deviceCtx->PSSetConstantBuffers(0, 1, &program.m_fsh->m_buffer); VertexBufferD3D11& vb = m_vertexBuffers[_blitter.m_vb->handle.idx]; @@ -1634,11 +1636,11 @@ namespace bgfx ProgramD3D11& program = m_program[_clearQuad.m_program[numMrt].idx]; m_currentProgram = &program; - deviceCtx->VSSetShader( (ID3D11VertexShader*)program.m_vsh->m_ptr, NULL, 0); + deviceCtx->VSSetShader(program.m_vsh->m_vertexShader, NULL, 0); deviceCtx->VSSetConstantBuffers(0, 0, NULL); if (NULL != m_currentColor) { - deviceCtx->PSSetShader( (ID3D11PixelShader*)program.m_fsh->m_ptr, NULL, 0); + deviceCtx->PSSetShader(program.m_fsh->m_pixelShader, NULL, 0); deviceCtx->PSSetConstantBuffers(0, 0, NULL); } else @@ -1883,6 +1885,7 @@ namespace bgfx switch (magic) { + case BGFX_CHUNK_MAGIC_CSH: case BGFX_CHUNK_MAGIC_FSH: case BGFX_CHUNK_MAGIC_VSH: break; @@ -1903,7 +1906,10 @@ namespace bgfx m_numPredefined = 0; m_numUniforms = count; - BX_TRACE("Shader consts %d", count); + BX_TRACE("%s Shader consts %d" + , BGFX_CHUNK_MAGIC_FSH == magic ? "Fragment" : BGFX_CHUNK_MAGIC_VSH == magic ? "Vertex" : "Compute" + , count + ); uint8_t fragmentBit = fragment ? BGFX_UNIFORM_FRAGMENTBIT : 0; @@ -1954,10 +1960,10 @@ namespace bgfx } } - BX_TRACE("\t%s: %s, type %2d, num %2d, r.index %3d, r.count %2d" + BX_TRACE("\t%s: %s (%s), num %2d, r.index %3d, r.count %2d" , kind , name - , type + , getUniformTypeName(UniformType::Enum(type&~BGFX_UNIFORM_FRAGMENTBIT) ) , num , regIndex , regCount @@ -1973,20 +1979,25 @@ namespace bgfx const DWORD* code = (const DWORD*)reader.getDataPtr(); bx::skip(&reader, shaderSize+1); - if (fragment) + if (BGFX_CHUNK_MAGIC_FSH == magic) { - DX_CHECK(s_renderD3D11->m_device->CreatePixelShader(code, shaderSize, NULL, (ID3D11PixelShader**)&m_ptr) ); + DX_CHECK(s_renderD3D11->m_device->CreatePixelShader(code, shaderSize, NULL, &m_pixelShader) ); BGFX_FATAL(NULL != m_ptr, bgfx::Fatal::InvalidShader, "Failed to create fragment shader."); } - else + else if (BGFX_CHUNK_MAGIC_VSH == magic) { m_hash = bx::hashMurmur2A(code, shaderSize); m_code = alloc(shaderSize); memcpy(m_code->data, code, shaderSize); - DX_CHECK(s_renderD3D11->m_device->CreateVertexShader(code, shaderSize, NULL, (ID3D11VertexShader**)&m_ptr) ); + DX_CHECK(s_renderD3D11->m_device->CreateVertexShader(code, shaderSize, NULL, &m_vertexShader) ); BGFX_FATAL(NULL != m_ptr, bgfx::Fatal::InvalidShader, "Failed to create vertex shader."); } + else + { + DX_CHECK(s_renderD3D11->m_device->CreateComputeShader(code, shaderSize, NULL, &m_computeShader) ); + BGFX_FATAL(NULL != m_ptr, bgfx::Fatal::InvalidShader, "Failed to create compute shader."); + } bx::read(&reader, m_attrMask, sizeof(m_attrMask) ); @@ -2056,8 +2067,9 @@ namespace bgfx uint32_t kk = 0; const bool compressed = isCompressed(TextureFormat::Enum(m_textureFormat) ); + const bool swizzle = TextureFormat::BGRA8 == m_textureFormat && 0 != (m_flags&BGFX_TEXTURE_COMPUTE_WRITE); - BX_TRACE("Texture %3d: %s (requested: %s), %dx%d%s%s." + BX_TRACE("Texture %3d: %s (requested: %s), %dx%d%s%s%s." , this - s_renderD3D11->m_textures , getName( (TextureFormat::Enum)m_textureFormat) , getName( (TextureFormat::Enum)m_requestedFormat) @@ -2065,6 +2077,7 @@ namespace bgfx , textureHeight , imageContainer.m_cubeMap ? "x6" : "" , 0 != (m_flags&BGFX_TEXTURE_RT_MASK) ? " (render target)" : "" + , swizzle ? " (swizzle BGRA8 -> RGBA8)" : "" ); for (uint8_t side = 0, numSides = imageContainer.m_cubeMap ? 6 : 1; side < numSides; ++side) @@ -2095,7 +2108,7 @@ namespace bgfx } else if (compressed) { - srd[kk].SysMemPitch = (mip.m_width/blockInfo.blockWidth)*mip.m_blockSize; + srd[kk].SysMemPitch = (mip.m_width /blockInfo.blockWidth )*mip.m_blockSize; srd[kk].SysMemSlicePitch = (mip.m_height/blockInfo.blockHeight)*srd[kk].SysMemPitch; } else @@ -2103,6 +2116,11 @@ namespace bgfx srd[kk].SysMemPitch = mip.m_width*mip.m_bpp/8; } + if (swizzle) + { +// imageSwizzleBgra8(width, height, mip.m_width*4, data, temp); + } + srd[kk].SysMemSlicePitch = mip.m_height*srd[kk].SysMemPitch; ++kk; } @@ -2113,17 +2131,23 @@ namespace bgfx } } - D3D11_SHADER_RESOURCE_VIEW_DESC srvd; - memset(&srvd, 0, sizeof(srvd) ); - srvd.Format = s_textureFormat[m_textureFormat].m_fmtSrv; - - const DXGI_FORMAT format = s_textureFormat[m_textureFormat].m_fmt; - const bool bufferOnly = 0 != (m_flags&BGFX_TEXTURE_RT_BUFFER_ONLY); + const bool computeWrite = 0 != (m_flags&BGFX_TEXTURE_COMPUTE_WRITE); const bool renderTarget = 0 != (m_flags&BGFX_TEXTURE_RT_MASK); const uint32_t msaaQuality = bx::uint32_satsub( (m_flags&BGFX_TEXTURE_RT_MSAA_MASK)>>BGFX_TEXTURE_RT_MSAA_SHIFT, 1); const DXGI_SAMPLE_DESC& msaa = s_msaa[msaaQuality]; + D3D11_SHADER_RESOURCE_VIEW_DESC srvd; + memset(&srvd, 0, sizeof(srvd) ); + srvd.Format = s_textureFormat[m_textureFormat].m_fmtSrv; + DXGI_FORMAT format = s_textureFormat[m_textureFormat].m_fmt; + + if (swizzle) + { + format = DXGI_FORMAT_R8G8B8A8_UNORM; + srvd.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + } + switch (m_type) { case Texture2D: @@ -2150,6 +2174,12 @@ namespace bgfx desc.Usage = D3D11_USAGE_DEFAULT; } + if (computeWrite) + { + desc.BindFlags |= D3D11_BIND_UNORDERED_ACCESS; + desc.Usage = D3D11_USAGE_DEFAULT; + } + if (imageContainer.m_cubeMap) { desc.ArraySize = 6; @@ -2182,6 +2212,12 @@ namespace bgfx desc.CPUAccessFlags = 0; desc.MiscFlags = 0; + if (computeWrite) + { + desc.BindFlags |= D3D11_BIND_UNORDERED_ACCESS; + desc.Usage = D3D11_USAGE_DEFAULT; + } + srvd.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D; srvd.Texture3D.MipLevels = numMips; @@ -2195,6 +2231,11 @@ namespace bgfx DX_CHECK(s_renderD3D11->m_device->CreateShaderResourceView(m_ptr, &srvd, &m_srv) ); } + if (computeWrite) + { + DX_CHECK(s_renderD3D11->m_device->CreateUnorderedAccessView(m_ptr, NULL, &m_uav) ); + } + if (convert && 0 != kk) { @@ -2214,6 +2255,7 @@ namespace bgfx void TextureD3D11::destroy() { DX_RELEASE(m_srv, 0); + DX_RELEASE(m_uav, 0); DX_RELEASE(m_ptr, 0); } @@ -2376,8 +2418,8 @@ namespace bgfx _render->sort(); - RenderState currentState; - currentState.reset(); + RenderDraw currentState; + currentState.clear(); currentState.m_flags = BGFX_STATE_NONE; currentState.m_stencil = packStencil(BGFX_STENCIL_NONE, BGFX_STENCIL_NONE); @@ -2409,6 +2451,7 @@ namespace bgfx PrimInfo prim = s_primInfo[primIndex]; deviceCtx->IASetPrimitiveTopology(prim.m_type); + bool wasCompute = false; bool viewHasScissor = false; Rect viewScissorRect; viewScissorRect.clear(); @@ -2422,26 +2465,13 @@ namespace bgfx { for (uint32_t item = 0, numItems = _render->m_num; item < numItems; ++item) { - key.decode(_render->m_sortKeys[item]); - const RenderState& state = _render->m_renderState[_render->m_sortValues[item] ]; + const bool isCompute = key.decode(_render->m_sortKeys[item]); + const bool viewChanged = key.m_view != view; - const uint64_t newFlags = state.m_flags; - uint64_t changedFlags = currentState.m_flags ^ state.m_flags; - currentState.m_flags = newFlags; + const RenderItem& renderItem = _render->m_renderItem[_render->m_sortValues[item] ]; - const uint64_t newStencil = state.m_stencil; - uint64_t changedStencil = currentState.m_stencil ^ state.m_stencil; - currentState.m_stencil = newStencil; - - if (key.m_view != view) + if (viewChanged) { - currentState.clear(); - currentState.m_scissor = !state.m_scissor; - changedFlags = BGFX_STATE_MASK; - changedStencil = packStencil(BGFX_STENCIL_MASK, BGFX_STENCIL_MASK); - currentState.m_flags = newFlags; - currentState.m_stencil = newStencil; - PIX_ENDEVENT(); PIX_BEGINEVENT(D3DCOLOR_RGBA(0xff, 0x00, 0x00, 0xff), s_viewNameW[key.m_view]); @@ -2473,7 +2503,147 @@ namespace bgfx { clearQuad(_clearQuad, rect, clear); } + } + if (isCompute) + { + if (!wasCompute) + { + wasCompute = true; + + ID3D11ShaderResourceView* srv[BGFX_STATE_TEX_COUNT] = {}; + deviceCtx->VSSetShaderResources(0, BGFX_STATE_TEX_COUNT, srv); + deviceCtx->PSSetShaderResources(0, BGFX_STATE_TEX_COUNT, srv); + + ID3D11SamplerState* sampler[BGFX_STATE_TEX_COUNT] = {}; + deviceCtx->VSSetSamplers(0, BGFX_STATE_TEX_COUNT, sampler); + deviceCtx->PSSetSamplers(0, BGFX_STATE_TEX_COUNT, sampler); + } + + const RenderCompute& compute = renderItem.compute; + + bool programChanged = false; + bool constantsChanged = compute.m_constBegin < compute.m_constEnd; + rendererUpdateUniforms(this, _render->m_constantBuffer, compute.m_constBegin, compute.m_constEnd); + + if (key.m_program != programIdx) + { + programIdx = key.m_program; + + ProgramD3D11& program = m_program[key.m_program]; + m_currentProgram = &program; + + deviceCtx->CSSetShader(program.m_vsh->m_computeShader, NULL, 0); + deviceCtx->CSSetConstantBuffers(0, 1, &program.m_vsh->m_buffer); + + programChanged = + constantsChanged = true; + } + + if (invalidHandle != programIdx) + { + ProgramD3D11& program = m_program[programIdx]; + + if (constantsChanged) + { + ConstantBuffer* vcb = program.m_vsh->m_constantBuffer; + if (NULL != vcb) + { + commit(*vcb); + } + } + + if (constantsChanged + || program.m_numPredefined > 0) + { + commitShaderConstants(); + } + } + + ID3D11UnorderedAccessView* uav[BGFX_MAX_COMPUTE_BINDINGS] = {}; + ID3D11ShaderResourceView* srv[BGFX_MAX_COMPUTE_BINDINGS] = {}; + ID3D11SamplerState* sampler[BGFX_STATE_TEX_COUNT] = {}; + + for (uint32_t ii = 0; ii < BGFX_MAX_COMPUTE_BINDINGS; ++ii) + { + const ComputeBinding& bind = compute.m_bind[ii]; + if (invalidHandle != bind.m_idx) + { + switch (bind.m_type) + { + case ComputeBinding::Image: + { + const TextureD3D11& texture = m_textures[bind.m_idx]; + if (Access::Read != bind.m_access) + { + uav[ii] = texture.m_uav; + } + else + { + srv[ii] = texture.m_srv; + sampler[ii] = texture.m_sampler; + } + } + break; + + case ComputeBinding::Buffer: + { + const VertexBufferD3D11& vertexBuffer = m_vertexBuffers[bind.m_idx]; + BX_UNUSED(vertexBuffer); + } + break; + } + } + } + + deviceCtx->CSSetUnorderedAccessViews(0, BGFX_MAX_COMPUTE_BINDINGS, uav, NULL); + deviceCtx->CSSetShaderResources(0, BGFX_MAX_COMPUTE_BINDINGS, srv); + deviceCtx->CSSetSamplers(0, BGFX_MAX_COMPUTE_BINDINGS, sampler); + + deviceCtx->Dispatch(compute.m_numX, compute.m_numY, compute.m_numZ); + + continue; + } + + if (wasCompute) + { + wasCompute = false; + + programIdx = invalidHandle; + m_currentProgram = NULL; + + deviceCtx->CSSetShader(NULL, NULL, 0); + + ID3D11UnorderedAccessView* uav[BGFX_STATE_TEX_COUNT] = {}; + deviceCtx->CSSetUnorderedAccessViews(0, BGFX_STATE_TEX_COUNT, uav, NULL); + + ID3D11ShaderResourceView* srv[BGFX_STATE_TEX_COUNT] = {}; + deviceCtx->CSSetShaderResources(0, BGFX_STATE_TEX_COUNT, srv); + + ID3D11SamplerState* samplers[BGFX_STATE_TEX_COUNT] = {}; + m_deviceCtx->CSSetSamplers(0, BGFX_STATE_TEX_COUNT, samplers); + } + + const RenderDraw& draw = renderItem.draw; + + const uint64_t newFlags = draw.m_flags; + uint64_t changedFlags = currentState.m_flags ^ draw.m_flags; + currentState.m_flags = newFlags; + + const uint64_t newStencil = draw.m_stencil; + uint64_t changedStencil = currentState.m_stencil ^ draw.m_stencil; + currentState.m_stencil = newStencil; + + if (viewChanged) + { + currentState.clear(); + currentState.m_scissor = !draw.m_scissor; + changedFlags = BGFX_STATE_MASK; + changedStencil = packStencil(BGFX_STENCIL_MASK, BGFX_STENCIL_MASK); + currentState.m_flags = newFlags; + currentState.m_stencil = newStencil; + + uint64_t newFlags = renderItem.draw.m_flags; setBlendState(newFlags); setDepthStencilState(newFlags, packStencil(BGFX_STENCIL_DEFAULT, BGFX_STENCIL_DEFAULT) ); @@ -2486,7 +2656,7 @@ namespace bgfx } } - uint16_t scissor = state.m_scissor; + uint16_t scissor = draw.m_scissor; if (currentState.m_scissor != scissor) { currentState.m_scissor = scissor; @@ -2540,7 +2710,7 @@ namespace bgfx { if ( (BGFX_STATE_BLEND_MASK|BGFX_STATE_BLEND_EQUATION_MASK|BGFX_STATE_ALPHA_WRITE|BGFX_STATE_RGB_WRITE) & changedFlags) { - setBlendState(newFlags, state.m_rgba); + setBlendState(newFlags, draw.m_rgba); } if ( (BGFX_STATE_CULL_MASK|BGFX_STATE_MSAA) & changedFlags) @@ -2564,8 +2734,8 @@ namespace bgfx } bool programChanged = false; - bool constantsChanged = state.m_constBegin < state.m_constEnd; - rendererUpdateUniforms(this, _render->m_constantBuffer, state.m_constBegin, state.m_constEnd); + bool constantsChanged = draw.m_constBegin < draw.m_constEnd; + rendererUpdateUniforms(this, _render->m_constantBuffer, draw.m_constBegin, draw.m_constEnd); if (key.m_program != programIdx) { @@ -2583,13 +2753,14 @@ namespace bgfx ProgramD3D11& program = m_program[programIdx]; m_currentProgram = &program; - deviceCtx->VSSetShader( (ID3D11VertexShader*)program.m_vsh->m_ptr, NULL, 0); - deviceCtx->VSSetConstantBuffers(0, 1, &program.m_vsh->m_buffer); + const ShaderD3D11* vsh = program.m_vsh; + deviceCtx->VSSetShader(vsh->m_vertexShader, NULL, 0); + deviceCtx->VSSetConstantBuffers(0, 1, &vsh->m_buffer); if (NULL != m_currentColor) { const ShaderD3D11* fsh = program.m_fsh; - deviceCtx->PSSetShader( (ID3D11PixelShader*)fsh->m_ptr, NULL, 0); + deviceCtx->PSSetShader(fsh->m_pixelShader, NULL, 0); deviceCtx->PSSetConstantBuffers(0, 1, &fsh->m_buffer); } else @@ -2705,15 +2876,15 @@ namespace bgfx case PredefinedUniform::Model: { - const Matrix4& model = _render->m_matrixCache.m_cache[state.m_matrix]; - setShaderConstant(flags, predefined.m_loc, model.un.val, bx::uint32_min(state.m_num*4, predefined.m_count) ); + const Matrix4& model = _render->m_matrixCache.m_cache[draw.m_matrix]; + setShaderConstant(flags, predefined.m_loc, model.un.val, bx::uint32_min(draw.m_num*4, predefined.m_count) ); } break; case PredefinedUniform::ModelView: { Matrix4 modelView; - const Matrix4& model = _render->m_matrixCache.m_cache[state.m_matrix]; + const Matrix4& model = _render->m_matrixCache.m_cache[draw.m_matrix]; bx::float4x4_mul(&modelView.un.f4x4, &model.un.f4x4, &_render->m_view[view].un.f4x4); setShaderConstant(flags, predefined.m_loc, modelView.un.val, bx::uint32_min(4, predefined.m_count) ); } @@ -2722,7 +2893,7 @@ namespace bgfx case PredefinedUniform::ModelViewProj: { Matrix4 modelViewProj; - const Matrix4& model = _render->m_matrixCache.m_cache[state.m_matrix]; + const Matrix4& model = _render->m_matrixCache.m_cache[draw.m_matrix]; bx::float4x4_mul(&modelViewProj.un.f4x4, &model.un.f4x4, &viewProj[view].un.f4x4); setShaderConstant(flags, predefined.m_loc, modelViewProj.un.val, bx::uint32_min(4, predefined.m_count) ); } @@ -2753,7 +2924,7 @@ namespace bgfx uint64_t flag = BGFX_STATE_TEX0; for (uint32_t stage = 0; stage < BGFX_STATE_TEX_COUNT; ++stage) { - const Sampler& sampler = state.m_sampler[stage]; + const Sampler& sampler = draw.m_sampler[stage]; Sampler& current = currentState.m_sampler[stage]; if (current.m_idx != sampler.m_idx || current.m_flags != sampler.m_flags @@ -2784,33 +2955,33 @@ namespace bgfx } if (programChanged - || currentState.m_vertexBuffer.idx != state.m_vertexBuffer.idx - || currentState.m_instanceDataBuffer.idx != state.m_instanceDataBuffer.idx - || currentState.m_instanceDataOffset != state.m_instanceDataOffset - || currentState.m_instanceDataStride != state.m_instanceDataStride) + || currentState.m_vertexBuffer.idx != draw.m_vertexBuffer.idx + || currentState.m_instanceDataBuffer.idx != draw.m_instanceDataBuffer.idx + || currentState.m_instanceDataOffset != draw.m_instanceDataOffset + || currentState.m_instanceDataStride != draw.m_instanceDataStride) { - currentState.m_vertexBuffer = state.m_vertexBuffer; - currentState.m_instanceDataBuffer.idx = state.m_instanceDataBuffer.idx; - currentState.m_instanceDataOffset = state.m_instanceDataOffset; - currentState.m_instanceDataStride = state.m_instanceDataStride; + currentState.m_vertexBuffer = draw.m_vertexBuffer; + currentState.m_instanceDataBuffer.idx = draw.m_instanceDataBuffer.idx; + currentState.m_instanceDataOffset = draw.m_instanceDataOffset; + currentState.m_instanceDataStride = draw.m_instanceDataStride; - uint16_t handle = state.m_vertexBuffer.idx; + uint16_t handle = draw.m_vertexBuffer.idx; if (invalidHandle != handle) { const VertexBufferD3D11& vb = m_vertexBuffers[handle]; - uint16_t decl = !isValid(vb.m_decl) ? state.m_vertexDecl.idx : vb.m_decl.idx; + uint16_t decl = !isValid(vb.m_decl) ? draw.m_vertexDecl.idx : vb.m_decl.idx; const VertexDecl& vertexDecl = m_vertexDecls[decl]; uint32_t stride = vertexDecl.m_stride; uint32_t offset = 0; deviceCtx->IASetVertexBuffers(0, 1, &vb.m_ptr, &stride, &offset); - if (isValid(state.m_instanceDataBuffer) ) + if (isValid(draw.m_instanceDataBuffer) ) { - const VertexBufferD3D11& inst = m_vertexBuffers[state.m_instanceDataBuffer.idx]; - uint32_t instStride = state.m_instanceDataStride; - deviceCtx->IASetVertexBuffers(1, 1, &inst.m_ptr, &instStride, &state.m_instanceDataOffset); - setInputLayout(vertexDecl, m_program[programIdx], state.m_instanceDataStride/16); + const VertexBufferD3D11& inst = m_vertexBuffers[draw.m_instanceDataBuffer.idx]; + uint32_t instStride = draw.m_instanceDataStride; + deviceCtx->IASetVertexBuffers(1, 1, &inst.m_ptr, &instStride, &draw.m_instanceDataOffset); + setInputLayout(vertexDecl, m_program[programIdx], draw.m_instanceDataStride/16); } else { @@ -2824,11 +2995,11 @@ namespace bgfx } } - if (currentState.m_indexBuffer.idx != state.m_indexBuffer.idx) + if (currentState.m_indexBuffer.idx != draw.m_indexBuffer.idx) { - currentState.m_indexBuffer = state.m_indexBuffer; + currentState.m_indexBuffer = draw.m_indexBuffer; - uint16_t handle = state.m_indexBuffer.idx; + uint16_t handle = draw.m_indexBuffer.idx; if (invalidHandle != handle) { const IndexBufferD3D11& ib = m_indexBuffers[handle]; @@ -2842,11 +3013,11 @@ namespace bgfx if (isValid(currentState.m_vertexBuffer) ) { - uint32_t numVertices = state.m_numVertices; + uint32_t numVertices = draw.m_numVertices; if (UINT32_MAX == numVertices) { const VertexBufferD3D11& vb = m_vertexBuffers[currentState.m_vertexBuffer.idx]; - uint16_t decl = !isValid(vb.m_decl) ? state.m_vertexDecl.idx : vb.m_decl.idx; + uint16_t decl = !isValid(vb.m_decl) ? draw.m_vertexDecl.idx : vb.m_decl.idx; const VertexDecl& vertexDecl = m_vertexDecls[decl]; numVertices = vb.m_size/vertexDecl.m_stride; } @@ -2856,33 +3027,33 @@ namespace bgfx uint32_t numInstances = 0; uint32_t numPrimsRendered = 0; - if (isValid(state.m_indexBuffer) ) + if (isValid(draw.m_indexBuffer) ) { - if (UINT32_MAX == state.m_numIndices) + if (UINT32_MAX == draw.m_numIndices) { - numIndices = m_indexBuffers[state.m_indexBuffer.idx].m_size/2; + numIndices = m_indexBuffers[draw.m_indexBuffer.idx].m_size/2; numPrimsSubmitted = numIndices/prim.m_div - prim.m_sub; - numInstances = state.m_numInstances; - numPrimsRendered = numPrimsSubmitted*state.m_numInstances; + numInstances = draw.m_numInstances; + numPrimsRendered = numPrimsSubmitted*draw.m_numInstances; deviceCtx->DrawIndexedInstanced(numIndices - , state.m_numInstances + , draw.m_numInstances , 0 - , state.m_startVertex + , draw.m_startVertex , 0 ); } - else if (prim.m_min <= state.m_numIndices) + else if (prim.m_min <= draw.m_numIndices) { - numIndices = state.m_numIndices; + numIndices = draw.m_numIndices; numPrimsSubmitted = numIndices/prim.m_div - prim.m_sub; - numInstances = state.m_numInstances; - numPrimsRendered = numPrimsSubmitted*state.m_numInstances; + numInstances = draw.m_numInstances; + numPrimsRendered = numPrimsSubmitted*draw.m_numInstances; deviceCtx->DrawIndexedInstanced(numIndices - , state.m_numInstances - , state.m_startIndex - , state.m_startVertex + , draw.m_numInstances + , draw.m_startIndex + , draw.m_startVertex , 0 ); } @@ -2890,12 +3061,12 @@ namespace bgfx else { numPrimsSubmitted = numVertices/prim.m_div - prim.m_sub; - numInstances = state.m_numInstances; - numPrimsRendered = numPrimsSubmitted*state.m_numInstances; + numInstances = draw.m_numInstances; + numPrimsRendered = numPrimsSubmitted*draw.m_numInstances; deviceCtx->DrawInstanced(numVertices - , state.m_numInstances - , state.m_startVertex + , draw.m_numInstances + , draw.m_startVertex , 0 ); } diff --git a/src/renderer_d3d11.h b/src/renderer_d3d11.h index ac068c016..0448537c0 100644 --- a/src/renderer_d3d11.h +++ b/src/renderer_d3d11.h @@ -156,7 +156,13 @@ namespace bgfx } } - IUnknown* m_ptr; + union + { + ID3D11ComputeShader* m_computeShader; + ID3D11PixelShader* m_pixelShader; + ID3D11VertexShader* m_vertexShader; + IUnknown* m_ptr; + }; const Memory* m_code; ID3D11Buffer* m_buffer; ConstantBuffer* m_constantBuffer; @@ -178,17 +184,20 @@ namespace bgfx { } - void create(const ShaderD3D11& _vsh, const ShaderD3D11& _fsh) + void create(const ShaderD3D11* _vsh, const ShaderD3D11* _fsh) { - BX_CHECK(NULL != _vsh.m_ptr, "Vertex shader doesn't exist."); - m_vsh = &_vsh; - memcpy(&m_predefined[0], _vsh.m_predefined, _vsh.m_numPredefined*sizeof(PredefinedUniform) ); - m_numPredefined = _vsh.m_numPredefined; + BX_CHECK(NULL != _vsh->m_ptr, "Vertex shader doesn't exist."); + m_vsh = _vsh; + memcpy(&m_predefined[0], _vsh->m_predefined, _vsh->m_numPredefined*sizeof(PredefinedUniform) ); + m_numPredefined = _vsh->m_numPredefined; - BX_CHECK(NULL != _fsh.m_ptr, "Fragment shader doesn't exist."); - m_fsh = &_fsh; - memcpy(&m_predefined[m_numPredefined], _fsh.m_predefined, _fsh.m_numPredefined*sizeof(PredefinedUniform) ); - m_numPredefined += _fsh.m_numPredefined; + if (NULL != _fsh) + { + BX_CHECK(NULL != _fsh->m_ptr, "Fragment shader doesn't exist."); + m_fsh = _fsh; + memcpy(&m_predefined[m_numPredefined], _fsh->m_predefined, _fsh->m_numPredefined*sizeof(PredefinedUniform) ); + m_numPredefined += _fsh->m_numPredefined; + } } void destroy() @@ -217,6 +226,7 @@ namespace bgfx TextureD3D11() : m_ptr(NULL) , m_srv(NULL) + , m_uav(NULL) , m_sampler(NULL) , m_numMips(0) { @@ -236,6 +246,7 @@ namespace bgfx }; ID3D11ShaderResourceView* m_srv; + ID3D11UnorderedAccessView* m_uav; ID3D11SamplerState* m_sampler; uint32_t m_flags; uint8_t m_type; diff --git a/src/renderer_d3d9.cpp b/src/renderer_d3d9.cpp index 2e9503407..9b181e0e4 100644 --- a/src/renderer_d3d9.cpp +++ b/src/renderer_d3d9.cpp @@ -183,7 +183,7 @@ namespace bgfx D3DFORMAT m_fmt; }; - static TextureFormatInfo s_textureFormat[TextureFormat::Count] = + static TextureFormatInfo s_textureFormat[] = { { D3DFMT_DXT1 }, // BC1 { D3DFMT_DXT3 }, // BC2 @@ -225,6 +225,7 @@ namespace bgfx { D3DFMT_INTZ /*D3DFMT_S8_LOCKABLE*/ }, // D0S8 #endif // defined(D3D_DISABLE_9EX) }; + BX_STATIC_ASSERT(TextureFormat::Count == BX_COUNTOF(s_textureFormat) ); static ExtendedFormat s_extendedFormats[ExtendedFormat::Count] = { @@ -822,8 +823,8 @@ namespace bgfx DX_CHECK(device->SetRenderState(D3DRS_FILLMODE, D3DFILL_SOLID) ); ProgramD3D9& program = m_program[_blitter.m_program.idx]; - DX_CHECK(device->SetVertexShader( (IDirect3DVertexShader9*)program.m_vsh->m_ptr) ); - DX_CHECK(device->SetPixelShader( (IDirect3DPixelShader9*)program.m_fsh->m_ptr) ); + DX_CHECK(device->SetVertexShader(program.m_vsh->m_vertexShader) ); + DX_CHECK(device->SetPixelShader(program.m_fsh->m_pixelShader) ); VertexBufferD3D9& vb = m_vertexBuffers[_blitter.m_vb->handle.idx]; VertexDeclaration& vertexDecl = m_vertexDecls[_blitter.m_vb->decl.idx]; @@ -1735,13 +1736,15 @@ namespace bgfx if (fragment) { - DX_CHECK(s_renderD3D9->m_device->CreatePixelShader(code, (IDirect3DPixelShader9**)&m_ptr) ); - BGFX_FATAL(NULL != m_ptr, bgfx::Fatal::InvalidShader, "Failed to create fragment shader."); + m_type = 1; + DX_CHECK(s_renderD3D9->m_device->CreatePixelShader(code, &m_pixelShader) ); + BGFX_FATAL(NULL != m_pixelShader, bgfx::Fatal::InvalidShader, "Failed to create fragment shader."); } else { - DX_CHECK(s_renderD3D9->m_device->CreateVertexShader(code, (IDirect3DVertexShader9**)&m_ptr) ); - BGFX_FATAL(NULL != m_ptr, bgfx::Fatal::InvalidShader, "Failed to create vertex shader."); + m_type = 0; + DX_CHECK(s_renderD3D9->m_device->CreateVertexShader(code, &m_vertexShader) ); + BGFX_FATAL(NULL != m_vertexShader, bgfx::Fatal::InvalidShader, "Failed to create vertex shader."); } } @@ -2451,8 +2454,8 @@ namespace bgfx _render->sort(); - RenderState currentState; - currentState.reset(); + RenderDraw currentState; + currentState.clear(); currentState.m_flags = BGFX_STATE_NONE; currentState.m_stencil = packStencil(BGFX_STENCIL_NONE, BGFX_STENCIL_NONE); @@ -2496,21 +2499,28 @@ namespace bgfx { for (uint32_t item = 0, numItems = _render->m_num; item < numItems; ++item) { - key.decode(_render->m_sortKeys[item]); - const RenderState& state = _render->m_renderState[_render->m_sortValues[item] ]; + const bool isCompute = key.decode(_render->m_sortKeys[item]); - const uint64_t newFlags = state.m_flags; - uint64_t changedFlags = currentState.m_flags ^ state.m_flags; + if (isCompute) + { + BX_CHECK(false, "Compute is not supported on DirectX 9."); + continue; + } + + const RenderDraw& draw = _render->m_renderItem[_render->m_sortValues[item] ].draw; + + const uint64_t newFlags = draw.m_flags; + uint64_t changedFlags = currentState.m_flags ^ draw.m_flags; currentState.m_flags = newFlags; - const uint64_t newStencil = state.m_stencil; - uint64_t changedStencil = currentState.m_stencil ^ state.m_stencil; + const uint64_t newStencil = draw.m_stencil; + uint64_t changedStencil = currentState.m_stencil ^ draw.m_stencil; currentState.m_stencil = newStencil; if (key.m_view != view) { currentState.clear(); - currentState.m_scissor = !state.m_scissor; + currentState.m_scissor = !draw.m_scissor; changedFlags = BGFX_STATE_MASK; changedStencil = packStencil(BGFX_STENCIL_MASK, BGFX_STENCIL_MASK); currentState.m_flags = newFlags; @@ -2590,7 +2600,7 @@ namespace bgfx DX_CHECK(device->SetRenderState(D3DRS_ALPHAFUNC, D3DCMP_GREATER) ); } - uint16_t scissor = state.m_scissor; + uint16_t scissor = draw.m_scissor; if (currentState.m_scissor != scissor) { currentState.m_scissor = scissor; @@ -2737,7 +2747,7 @@ namespace bgfx } if ( (BGFX_STATE_BLEND_MASK|BGFX_STATE_BLEND_EQUATION_MASK) & changedFlags - || blendFactor != state.m_rgba) + || blendFactor != draw.m_rgba) { bool enabled = !!(BGFX_STATE_BLEND_MASK & newFlags); DX_CHECK(device->SetRenderState(D3DRS_ALPHABLENDENABLE, enabled) ); @@ -2770,9 +2780,9 @@ namespace bgfx } if ( (s_blendFactor[srcRGB].m_factor || s_blendFactor[dstRGB].m_factor) - && blendFactor != state.m_rgba) + && blendFactor != draw.m_rgba) { - const uint32_t rgba = state.m_rgba; + const uint32_t rgba = draw.m_rgba; D3DCOLOR color = D3DCOLOR_RGBA(rgba>>24 , (rgba>>16)&0xff , (rgba>> 8)&0xff @@ -2782,7 +2792,7 @@ namespace bgfx } } - blendFactor = state.m_rgba; + blendFactor = draw.m_rgba; } const uint64_t pt = _render->m_debug&BGFX_DEBUG_WIREFRAME ? BGFX_STATE_PT_LINES : newFlags&BGFX_STATE_PT_MASK; @@ -2791,8 +2801,8 @@ namespace bgfx } bool programChanged = false; - bool constantsChanged = state.m_constBegin < state.m_constEnd; - rendererUpdateUniforms(this, _render->m_constantBuffer, state.m_constBegin, state.m_constEnd); + bool constantsChanged = draw.m_constBegin < draw.m_constEnd; + rendererUpdateUniforms(this, _render->m_constantBuffer, draw.m_constBegin, draw.m_constEnd); if (key.m_program != programIdx) { @@ -2806,8 +2816,8 @@ namespace bgfx else { ProgramD3D9& program = m_program[programIdx]; - device->SetVertexShader( (IDirect3DVertexShader9*)program.m_vsh->m_ptr); - device->SetPixelShader( (IDirect3DPixelShader9*)program.m_fsh->m_ptr); + device->SetVertexShader(program.m_vsh->m_vertexShader); + device->SetPixelShader(program.m_fsh->m_pixelShader); } programChanged = @@ -2917,15 +2927,15 @@ namespace bgfx case PredefinedUniform::Model: { - const Matrix4& model = _render->m_matrixCache.m_cache[state.m_matrix]; - setShaderConstantF(flags, predefined.m_loc, model.un.val, bx::uint32_min(state.m_num*4, predefined.m_count) ); + const Matrix4& model = _render->m_matrixCache.m_cache[draw.m_matrix]; + setShaderConstantF(flags, predefined.m_loc, model.un.val, bx::uint32_min(draw.m_num*4, predefined.m_count) ); } break; case PredefinedUniform::ModelView: { Matrix4 modelView; - const Matrix4& model = _render->m_matrixCache.m_cache[state.m_matrix]; + const Matrix4& model = _render->m_matrixCache.m_cache[draw.m_matrix]; bx::float4x4_mul(&modelView.un.f4x4, &model.un.f4x4, &_render->m_view[view].un.f4x4); setShaderConstantF(flags, predefined.m_loc, modelView.un.val, bx::uint32_min(4, predefined.m_count) ); } @@ -2934,7 +2944,7 @@ namespace bgfx case PredefinedUniform::ModelViewProj: { Matrix4 modelViewProj; - const Matrix4& model = _render->m_matrixCache.m_cache[state.m_matrix]; + const Matrix4& model = _render->m_matrixCache.m_cache[draw.m_matrix]; bx::float4x4_mul(&modelViewProj.un.f4x4, &model.un.f4x4, &viewProj[view].un.f4x4); setShaderConstantF(flags, predefined.m_loc, modelViewProj.un.val, bx::uint32_min(4, predefined.m_count) ); } @@ -2958,7 +2968,7 @@ namespace bgfx uint64_t flag = BGFX_STATE_TEX0; for (uint32_t stage = 0; stage < BGFX_STATE_TEX_COUNT; ++stage) { - const Sampler& sampler = state.m_sampler[stage]; + const Sampler& sampler = draw.m_sampler[stage]; Sampler& current = currentState.m_sampler[stage]; if (current.m_idx != sampler.m_idx || current.m_flags != sampler.m_flags @@ -2980,34 +2990,34 @@ namespace bgfx } if (programChanged - || currentState.m_vertexBuffer.idx != state.m_vertexBuffer.idx - || currentState.m_instanceDataBuffer.idx != state.m_instanceDataBuffer.idx - || currentState.m_instanceDataOffset != state.m_instanceDataOffset - || currentState.m_instanceDataStride != state.m_instanceDataStride) + || currentState.m_vertexBuffer.idx != draw.m_vertexBuffer.idx + || currentState.m_instanceDataBuffer.idx != draw.m_instanceDataBuffer.idx + || currentState.m_instanceDataOffset != draw.m_instanceDataOffset + || currentState.m_instanceDataStride != draw.m_instanceDataStride) { - currentState.m_vertexBuffer = state.m_vertexBuffer; - currentState.m_instanceDataBuffer.idx = state.m_instanceDataBuffer.idx; - currentState.m_instanceDataOffset = state.m_instanceDataOffset; - currentState.m_instanceDataStride = state.m_instanceDataStride; + currentState.m_vertexBuffer = draw.m_vertexBuffer; + currentState.m_instanceDataBuffer.idx = draw.m_instanceDataBuffer.idx; + currentState.m_instanceDataOffset = draw.m_instanceDataOffset; + currentState.m_instanceDataStride = draw.m_instanceDataStride; - uint16_t handle = state.m_vertexBuffer.idx; + uint16_t handle = draw.m_vertexBuffer.idx; if (invalidHandle != handle) { const VertexBufferD3D9& vb = m_vertexBuffers[handle]; - uint16_t decl = !isValid(vb.m_decl) ? state.m_vertexDecl.idx : vb.m_decl.idx; + uint16_t decl = !isValid(vb.m_decl) ? draw.m_vertexDecl.idx : vb.m_decl.idx; const VertexDeclaration& vertexDecl = m_vertexDecls[decl]; DX_CHECK(device->SetStreamSource(0, vb.m_ptr, 0, vertexDecl.m_decl.m_stride) ); - if (isValid(state.m_instanceDataBuffer) + if (isValid(draw.m_instanceDataBuffer) && m_instancing) { - const VertexBufferD3D9& inst = m_vertexBuffers[state.m_instanceDataBuffer.idx]; - DX_CHECK(device->SetStreamSourceFreq(0, D3DSTREAMSOURCE_INDEXEDDATA|state.m_numInstances) ); + const VertexBufferD3D9& inst = m_vertexBuffers[draw.m_instanceDataBuffer.idx]; + DX_CHECK(device->SetStreamSourceFreq(0, D3DSTREAMSOURCE_INDEXEDDATA|draw.m_numInstances) ); DX_CHECK(device->SetStreamSourceFreq(1, UINT(D3DSTREAMSOURCE_INSTANCEDATA|1) ) ); - DX_CHECK(device->SetStreamSource(1, inst.m_ptr, state.m_instanceDataOffset, state.m_instanceDataStride) ); + DX_CHECK(device->SetStreamSource(1, inst.m_ptr, draw.m_instanceDataOffset, draw.m_instanceDataStride) ); - IDirect3DVertexDeclaration9* ptr = createVertexDeclaration(vertexDecl.m_decl, state.m_instanceDataStride/16); + IDirect3DVertexDeclaration9* ptr = createVertexDeclaration(vertexDecl.m_decl, draw.m_instanceDataStride/16); DX_CHECK(device->SetVertexDeclaration(ptr) ); DX_RELEASE(ptr, 0); } @@ -3025,11 +3035,11 @@ namespace bgfx } } - if (currentState.m_indexBuffer.idx != state.m_indexBuffer.idx) + if (currentState.m_indexBuffer.idx != draw.m_indexBuffer.idx) { - currentState.m_indexBuffer = state.m_indexBuffer; + currentState.m_indexBuffer = draw.m_indexBuffer; - uint16_t handle = state.m_indexBuffer.idx; + uint16_t handle = draw.m_indexBuffer.idx; if (invalidHandle != handle) { const IndexBufferD3D9& ib = m_indexBuffers[handle]; @@ -3043,11 +3053,11 @@ namespace bgfx if (isValid(currentState.m_vertexBuffer) ) { - uint32_t numVertices = state.m_numVertices; + uint32_t numVertices = draw.m_numVertices; if (UINT32_MAX == numVertices) { const VertexBufferD3D9& vb = m_vertexBuffers[currentState.m_vertexBuffer.idx]; - uint16_t decl = !isValid(vb.m_decl) ? state.m_vertexDecl.idx : vb.m_decl.idx; + uint16_t decl = !isValid(vb.m_decl) ? draw.m_vertexDecl.idx : vb.m_decl.idx; const VertexDeclaration& vertexDecl = m_vertexDecls[decl]; numVertices = vb.m_size/vertexDecl.m_decl.m_stride; } @@ -3057,35 +3067,35 @@ namespace bgfx uint32_t numInstances = 0; uint32_t numPrimsRendered = 0; - if (isValid(state.m_indexBuffer) ) + if (isValid(draw.m_indexBuffer) ) { - if (UINT32_MAX == state.m_numIndices) + if (UINT32_MAX == draw.m_numIndices) { - numIndices = m_indexBuffers[state.m_indexBuffer.idx].m_size/2; + numIndices = m_indexBuffers[draw.m_indexBuffer.idx].m_size/2; numPrimsSubmitted = numIndices/prim.m_div - prim.m_sub; - numInstances = state.m_numInstances; - numPrimsRendered = numPrimsSubmitted*state.m_numInstances; + numInstances = draw.m_numInstances; + numPrimsRendered = numPrimsSubmitted*draw.m_numInstances; DX_CHECK(device->DrawIndexedPrimitive(prim.m_type - , state.m_startVertex + , draw.m_startVertex , 0 , numVertices , 0 , numPrimsSubmitted ) ); } - else if (prim.m_min <= state.m_numIndices) + else if (prim.m_min <= draw.m_numIndices) { - numIndices = state.m_numIndices; + numIndices = draw.m_numIndices; numPrimsSubmitted = numIndices/prim.m_div - prim.m_sub; - numInstances = state.m_numInstances; - numPrimsRendered = numPrimsSubmitted*state.m_numInstances; + numInstances = draw.m_numInstances; + numPrimsRendered = numPrimsSubmitted*draw.m_numInstances; DX_CHECK(device->DrawIndexedPrimitive(prim.m_type - , state.m_startVertex + , draw.m_startVertex , 0 , numVertices - , state.m_startIndex + , draw.m_startIndex , numPrimsSubmitted ) ); } @@ -3093,11 +3103,11 @@ namespace bgfx else { numPrimsSubmitted = numVertices/prim.m_div - prim.m_sub; - numInstances = state.m_numInstances; - numPrimsRendered = numPrimsSubmitted*state.m_numInstances; + numInstances = draw.m_numInstances; + numPrimsRendered = numPrimsSubmitted*draw.m_numInstances; DX_CHECK(device->DrawPrimitive(prim.m_type - , state.m_startVertex + , draw.m_startVertex , numPrimsSubmitted ) ); } diff --git a/src/renderer_d3d9.h b/src/renderer_d3d9.h index 692d81b4b..de7a708b0 100644 --- a/src/renderer_d3d9.h +++ b/src/renderer_d3d9.h @@ -218,9 +218,10 @@ namespace bgfx struct ShaderD3D9 { ShaderD3D9() - : m_ptr(NULL) + : m_vertexShader(NULL) , m_constantBuffer(NULL) , m_numPredefined(0) + , m_type(0) { } @@ -236,23 +237,33 @@ namespace bgfx } m_numPredefined = 0; - DX_RELEASE(m_ptr, 0); + switch (m_type) + { + case 0: DX_RELEASE(m_vertexShader, 0); + default: DX_RELEASE(m_pixelShader, 0); + } } - IUnknown* m_ptr; + union + { + // X360 doesn't have interface inheritance (can't use IUnknown*). + IDirect3DVertexShader9* m_vertexShader; + IDirect3DPixelShader9* m_pixelShader; + }; ConstantBuffer* m_constantBuffer; PredefinedUniform m_predefined[PredefinedUniform::Count]; uint8_t m_numPredefined; + uint8_t m_type; }; struct ProgramD3D9 { void create(const ShaderD3D9& _vsh, const ShaderD3D9& _fsh) { - BX_CHECK(NULL != _vsh.m_ptr, "Vertex shader doesn't exist."); + BX_CHECK(NULL != _vsh.m_vertexShader, "Vertex shader doesn't exist."); m_vsh = &_vsh; - BX_CHECK(NULL != _fsh.m_ptr, "Fragment shader doesn't exist."); + BX_CHECK(NULL != _fsh.m_pixelShader, "Fragment shader doesn't exist."); m_fsh = &_fsh; memcpy(&m_predefined[0], _vsh.m_predefined, _vsh.m_numPredefined*sizeof(PredefinedUniform) ); @@ -318,10 +329,10 @@ namespace bgfx union { - IDirect3DBaseTexture9* m_ptr; - IDirect3DTexture9* m_texture2d; + IDirect3DBaseTexture9* m_ptr; + IDirect3DTexture9* m_texture2d; IDirect3DVolumeTexture9* m_texture3d; - IDirect3DCubeTexture9* m_textureCube; + IDirect3DCubeTexture9* m_textureCube; }; IDirect3DSurface9* m_surface; diff --git a/src/renderer_gl.cpp b/src/renderer_gl.cpp index 15a4b1376..7168080c3 100644 --- a/src/renderer_gl.cpp +++ b/src/renderer_gl.cpp @@ -66,6 +66,13 @@ namespace bgfx "i_data4", }; + static const GLenum s_access[Access::Count] = + { + GL_READ_ONLY, + GL_WRITE_ONLY, + GL_READ_WRITE, + }; + static const GLenum s_attribType[AttribType::Count] = { GL_UNSIGNED_BYTE, @@ -169,7 +176,7 @@ namespace bgfx bool m_supported; }; - static TextureFormatInfo s_textureFormat[TextureFormat::Count] = + static TextureFormatInfo s_textureFormat[] = { { GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_ZERO, false }, // BC1 { GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_ZERO, false }, // BC2 @@ -207,6 +214,47 @@ namespace bgfx { GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, false }, // D32F { GL_STENCIL_INDEX8, GL_DEPTH_STENCIL, GL_UNSIGNED_BYTE, false }, // D0S8 }; + BX_STATIC_ASSERT(TextureFormat::Count == BX_COUNTOF(s_textureFormat) ); + + static GLenum s_imageFormat[] = + { + GL_ZERO, // BC1 + GL_ZERO, // BC2 + GL_ZERO, // BC3 + GL_ZERO, // BC4 + GL_ZERO, // BC5 + GL_ZERO, // ETC1 + GL_ZERO, // ETC2 + GL_ZERO, // ETC2A + GL_ZERO, // ETC2A1 + GL_ZERO, // PTC12 + GL_ZERO, // PTC14 + GL_ZERO, // PTC12A + GL_ZERO, // PTC14A + GL_ZERO, // PTC22 + GL_ZERO, // PTC24 + GL_ZERO, // Unknown + GL_R8, // R8 + GL_R16, // R16 + GL_R16F, // R16F + GL_RGBA8, // BGRA8 + GL_RGBA16, // RGBA16 + GL_RGBA16, // RGBA16F + GL_RGB565, // R5G6B5 + GL_RGBA4, // RGBA4 + GL_RGB5_A1, // RGB5A1 + GL_RGB10_A2, // RGB10A2 + GL_ZERO, // UnknownDepth + GL_ZERO, // D16 + GL_ZERO, // D24 + GL_ZERO, // D24S8 + GL_ZERO, // D32 + GL_ZERO, // D16F + GL_ZERO, // D24F + GL_ZERO, // D32F + GL_ZERO, // D0S8 + }; + BX_STATIC_ASSERT(TextureFormat::Count == BX_COUNTOF(s_imageFormat) ); struct Extension { @@ -224,6 +272,7 @@ namespace bgfx APPLE_texture_format_BGRA8888, APPLE_texture_max_level, + ARB_compute_shader, ARB_debug_label, ARB_debug_output, ARB_depth_clamp, @@ -237,8 +286,11 @@ namespace bgfx ARB_instanced_arrays, ARB_map_buffer_range, ARB_multisample, + ARB_program_interface_query, ARB_sampler_objects, ARB_seamless_cube_map, + ARB_shader_image_load_store, + ARB_shader_storage_buffer_object, ARB_shader_texture_lod, ARB_texture_compression_rgtc, ARB_texture_float, @@ -269,6 +321,7 @@ namespace bgfx EXT_framebuffer_sRGB, EXT_occlusion_query_boolean, EXT_read_format_bgra, + EXT_shader_image_load_store, EXT_shader_texture_lod, EXT_shadow_samplers, EXT_texture_array, @@ -297,6 +350,8 @@ namespace bgfx IMG_texture_compression_pvrtc2, IMG_texture_format_BGRA8888, + INTEL_fragment_shader_ordering, + KHR_debug, MOZ_WEBGL_compressed_texture_s3tc, @@ -357,6 +412,7 @@ namespace bgfx { "APPLE_texture_format_BGRA8888", false, true }, { "APPLE_texture_max_level", false, true }, + { "ARB_compute_shader", BGFX_CONFIG_RENDERER_OPENGL >= 43, true }, { "ARB_debug_label", false, true }, { "ARB_debug_output", BGFX_CONFIG_RENDERER_OPENGL >= 43, true }, { "ARB_depth_clamp", BGFX_CONFIG_RENDERER_OPENGL >= 32, true }, @@ -370,8 +426,11 @@ namespace bgfx { "ARB_instanced_arrays", BGFX_CONFIG_RENDERER_OPENGL >= 33, true }, { "ARB_map_buffer_range", BGFX_CONFIG_RENDERER_OPENGL >= 30, true }, { "ARB_multisample", false, true }, + { "ARB_program_interface_query", BGFX_CONFIG_RENDERER_OPENGL >= 43, true }, { "ARB_sampler_objects", BGFX_CONFIG_RENDERER_OPENGL >= 33, true }, { "ARB_seamless_cube_map", BGFX_CONFIG_RENDERER_OPENGL >= 32, true }, + { "ARB_shader_image_load_store", BGFX_CONFIG_RENDERER_OPENGL >= 42, true }, + { "ARB_shader_storage_buffer_object", BGFX_CONFIG_RENDERER_OPENGL >= 43, true }, { "ARB_shader_texture_lod", BGFX_CONFIG_RENDERER_OPENGL >= 30, true }, { "ARB_texture_compression_rgtc", BGFX_CONFIG_RENDERER_OPENGL >= 30, true }, { "ARB_texture_float", BGFX_CONFIG_RENDERER_OPENGL >= 30, true }, @@ -402,6 +461,7 @@ namespace bgfx { "EXT_framebuffer_sRGB", BGFX_CONFIG_RENDERER_OPENGL >= 30, true }, { "EXT_occlusion_query_boolean", false, true }, { "EXT_read_format_bgra", false, true }, + { "EXT_shader_image_load_store", false, true }, { "EXT_shader_texture_lod", false, true }, // GLES2 extension. { "EXT_shadow_samplers", false, true }, { "EXT_texture_array", BGFX_CONFIG_RENDERER_OPENGL >= 30, true }, @@ -430,6 +490,8 @@ namespace bgfx { "IMG_texture_compression_pvrtc2", false, true }, { "IMG_texture_format_BGRA8888", false, true }, + { "INTEL_fragment_shader_ordering", false, true }, + { "KHR_debug", BGFX_CONFIG_RENDERER_OPENGL >= 43, true }, { "MOZ_WEBGL_compressed_texture_s3tc", false, true }, @@ -1127,6 +1189,12 @@ namespace bgfx : 0 ; + g_caps.supported |= !!(BGFX_CONFIG_RENDERER_OPENGLES >= 31) + || s_extension[Extension::ARB_compute_shader].m_supported + ? BGFX_CAPS_COMPUTE + : 0 + ; + if (s_extension[Extension::EXT_texture_filter_anisotropic].m_supported) { GL_CHECK(glGetFloatv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &m_maxAnisotropy) ); @@ -2139,6 +2207,14 @@ namespace bgfx switch (_type) { + GLSL_TYPE(GL_INT); + GLSL_TYPE(GL_INT_VEC2); + GLSL_TYPE(GL_INT_VEC3); + GLSL_TYPE(GL_INT_VEC4); + GLSL_TYPE(GL_UNSIGNED_INT); + GLSL_TYPE(GL_UNSIGNED_INT_VEC2); + GLSL_TYPE(GL_UNSIGNED_INT_VEC3); + GLSL_TYPE(GL_UNSIGNED_INT_VEC4); GLSL_TYPE(GL_FLOAT); GLSL_TYPE(GL_FLOAT_VEC2); GLSL_TYPE(GL_FLOAT_VEC3); @@ -2158,10 +2234,15 @@ namespace bgfx GLSL_TYPE(GL_SAMPLER_CUBE); // GLSL_TYPE(GL_SAMPLER_1D_SHADOW); GLSL_TYPE(GL_SAMPLER_2D_SHADOW); + GLSL_TYPE(GL_IMAGE_1D); + GLSL_TYPE(GL_IMAGE_2D); + GLSL_TYPE(GL_IMAGE_3D); + GLSL_TYPE(GL_IMAGE_CUBE); } #undef GLSL_TYPE + BX_CHECK(false, "Unknown GLSL type? %x", _type); return "UNKNOWN GLSL TYPE!"; } @@ -2188,6 +2269,7 @@ namespace bgfx #undef GLENUM + BX_WARN(false, "Unknown enum? %x", _enum); return ""; } @@ -2196,6 +2278,7 @@ namespace bgfx switch (_type) { case GL_INT: + case GL_UNSIGNED_INT: return UniformType::Uniform1iv; case GL_FLOAT: @@ -2233,6 +2316,10 @@ namespace bgfx case GL_SAMPLER_CUBE: // case GL_SAMPLER_1D_SHADOW: case GL_SAMPLER_2D_SHADOW: + case GL_IMAGE_1D: + case GL_IMAGE_2D: + case GL_IMAGE_3D: + case GL_IMAGE_CUBE: return UniformType::Uniform1iv; }; @@ -2251,7 +2338,12 @@ namespace bgfx if (!cached) { GL_CHECK(glAttachShader(m_id, _vsh.m_id) ); - GL_CHECK(glAttachShader(m_id, _fsh.m_id) ); + + if (0 != _fsh.m_id) + { + GL_CHECK(glAttachShader(m_id, _fsh.m_id) ); + } + GL_CHECK(glLinkProgram(m_id) ); GLint linked = 0; @@ -2277,7 +2369,11 @@ namespace bgfx // Must be after init, otherwise init might fail to lookup shader // info (NVIDIA Tegra 3 OpenGL ES 2.0 14.01003). GL_CHECK(glDetachShader(m_id, _vsh.m_id) ); - GL_CHECK(glDetachShader(m_id, _fsh.m_id) ); + + if (0 != _fsh.m_id) + { + GL_CHECK(glDetachShader(m_id, _fsh.m_id) ); + } } } @@ -2302,19 +2398,30 @@ namespace bgfx void ProgramGL::init() { - GLint activeAttribs; - GLint activeUniforms; + GLint activeAttribs = 0; + GLint activeUniforms = 0; + GLint activeBuffers = 0; #if BGFX_CONFIG_RENDERER_OPENGL >= 31 GL_CHECK(glBindFragDataLocation(m_id, 0, "bgfx_FragColor") ); #endif // BGFX_CONFIG_RENDERER_OPENGL >= 31 - GL_CHECK(glGetProgramiv(m_id, GL_ACTIVE_ATTRIBUTES, &activeAttribs) ); - GL_CHECK(glGetProgramiv(m_id, GL_ACTIVE_UNIFORMS, &activeUniforms) ); + if (s_extension[Extension::ARB_program_interface_query].m_supported + || BX_ENABLED(BGFX_CONFIG_RENDERER_OPENGLES >= 31) ) + { + GL_CHECK(glGetProgramInterfaceiv(m_id, GL_PROGRAM_INPUT, GL_ACTIVE_RESOURCES, &activeAttribs ) ); + GL_CHECK(glGetProgramInterfaceiv(m_id, GL_UNIFORM, GL_ACTIVE_RESOURCES, &activeUniforms) ); + GL_CHECK(glGetProgramInterfaceiv(m_id, GL_BUFFER_VARIABLE, GL_ACTIVE_RESOURCES, &activeBuffers ) ); + } + else + { + GL_CHECK(glGetProgramiv(m_id, GL_ACTIVE_ATTRIBUTES, &activeAttribs ) ); + GL_CHECK(glGetProgramiv(m_id, GL_ACTIVE_UNIFORMS, &activeUniforms) ); + } GLint max0, max1; GL_CHECK(glGetProgramiv(m_id, GL_ACTIVE_ATTRIBUTE_MAX_LENGTH, &max0) ); - GL_CHECK(glGetProgramiv(m_id, GL_ACTIVE_UNIFORM_MAX_LENGTH, &max1) ); + GL_CHECK(glGetProgramiv(m_id, GL_ACTIVE_UNIFORM_MAX_LENGTH, &max1) ); uint32_t maxLength = bx::uint32_max(max0, max1); char* name = (char*)alloca(maxLength + 1); @@ -2365,7 +2472,11 @@ namespace bgfx case GL_SAMPLER_3D: case GL_SAMPLER_CUBE: case GL_SAMPLER_2D_SHADOW: - BX_TRACE("Sampler %d at %d.", m_numSamplers, loc); + case GL_IMAGE_1D: + case GL_IMAGE_2D: + case GL_IMAGE_3D: + case GL_IMAGE_CUBE: + BX_TRACE("Sampler #%d at location %d.", m_numSamplers, loc); m_sampler[m_numSamplers] = loc; m_numSamplers++; break; @@ -2405,6 +2516,76 @@ namespace bgfx BX_UNUSED(offset); } + if (s_extension[Extension::ARB_program_interface_query].m_supported + || BX_ENABLED(BGFX_CONFIG_RENDERER_OPENGLES >= 31) ) + { + struct VariableInfo + { + GLenum type; +// GLint loc; + }; + VariableInfo vi; + GLenum props[] = { GL_TYPE }; + +#if 0 + BX_TRACE("Buffers (%d):", activeUniforms); + for (int32_t ii = 0; ii < activeUniforms; ++ii) + { + GL_CHECK(glGetProgramResourceiv(m_id + , GL_UNIFORM + , ii + , BX_COUNTOF(props) + , props + , BX_COUNTOF(props) + , NULL + , (GLint*)&vi + ) ); + + GL_CHECK(glGetProgramResourceName(m_id + , GL_UNIFORM + , ii + , maxLength + 1 + , NULL + , name + ) ); + + BX_TRACE("\tuniform %s %s is at location %d" + , glslTypeName(vi.type) + , name + , 0 //vi.loc + ); + } +#endif // 0 + + BX_TRACE("Buffers (%d):", activeBuffers); + for (int32_t ii = 0; ii < activeBuffers; ++ii) + { + GL_CHECK(glGetProgramResourceiv(m_id + , GL_BUFFER_VARIABLE + , ii + , BX_COUNTOF(props) + , props + , BX_COUNTOF(props) + , NULL + , (GLint*)&vi + ) ); + + GL_CHECK(glGetProgramResourceName(m_id + , GL_BUFFER_VARIABLE + , ii + , maxLength + 1 + , NULL + , name + ) ); + + BX_TRACE("\t%s %s at %d" + , glslTypeName(vi.type) + , name + , 0 //vi.loc + ); + } + } + m_constantBuffer->finish(); memset(m_attributes, 0xff, sizeof(m_attributes) ); @@ -3076,6 +3257,11 @@ namespace bgfx uint16_t count; bx::read(&reader, count); + BX_TRACE("%s Shader consts %d" + , BGFX_CHUNK_MAGIC_FSH == magic ? "Fragment" : BGFX_CHUNK_MAGIC_VSH == magic ? "Vertex" : "Compute" + , count + ); + for (uint32_t ii = 0; ii < count; ++ii) { uint8_t nameSize; @@ -3585,8 +3771,8 @@ namespace bgfx _render->sort(); - RenderState currentState; - currentState.reset(); + RenderDraw currentState; + currentState.clear(); currentState.m_flags = BGFX_STATE_NONE; currentState.m_stencil = packStencil(BGFX_STENCIL_NONE, BGFX_STENCIL_NONE); @@ -3622,6 +3808,9 @@ namespace bgfx viewScissorRect.clear(); const bool blendIndependentSupported = s_extension[Extension::ARB_draw_buffers_blend].m_supported; + const bool computeSupported = (BX_ENABLED(BGFX_CONFIG_RENDERER_OPENGL) && s_extension[Extension::ARB_compute_shader].m_supported) + || BX_ENABLED(BGFX_CONFIG_RENDERER_OPENGLES >= 31) + ; uint32_t statsNumPrimsSubmitted[BX_COUNTOF(s_primInfo)] = {}; uint32_t statsNumPrimsRendered[BX_COUNTOF(s_primInfo)] = {}; @@ -3634,26 +3823,13 @@ namespace bgfx for (uint32_t item = 0, numItems = _render->m_num; item < numItems; ++item) { - key.decode(_render->m_sortKeys[item]); - const RenderState& state = _render->m_renderState[_render->m_sortValues[item] ]; + const bool isCompute = key.decode(_render->m_sortKeys[item]); + const bool viewChanged = key.m_view != view; - const uint64_t newFlags = state.m_flags; - uint64_t changedFlags = currentState.m_flags ^ state.m_flags; - currentState.m_flags = newFlags; + const RenderItem& renderItem = _render->m_renderItem[_render->m_sortValues[item] ]; - const uint64_t newStencil = state.m_stencil; - uint64_t changedStencil = currentState.m_stencil ^ state.m_stencil; - currentState.m_stencil = newStencil; - - if (key.m_view != view) + if (viewChanged) { - currentState.clear(); - currentState.m_scissor = !state.m_scissor; - changedFlags = BGFX_STATE_MASK; - changedStencil = packStencil(BGFX_STENCIL_MASK, BGFX_STENCIL_MASK); - currentState.m_flags = newFlags; - currentState.m_stencil = newStencil; - GL_CHECK(glInsertEventMarker(0, s_viewName[key.m_view]) ); view = key.m_view; @@ -3686,7 +3862,81 @@ namespace bgfx GL_CHECK(glDisable(GL_BLEND) ); } - uint16_t scissor = state.m_scissor; + if (isCompute) + { + if (computeSupported) + { + const RenderCompute& compute = renderItem.compute; + + ProgramGL& program = m_program[key.m_program]; + GL_CHECK(glUseProgram(program.m_id) ); + + GLbitfield barrier = 0; + for (uint32_t ii = 0; ii < BGFX_MAX_COMPUTE_BINDINGS; ++ii) + { + const ComputeBinding& bind = compute.m_bind[ii]; + if (invalidHandle != bind.m_idx) + { + switch (bind.m_type) + { + case ComputeBinding::Image: + { + const TextureGL& texture = m_textures[bind.m_idx]; + GL_CHECK(glBindImageTexture(ii, texture.m_id, bind.m_mip, GL_FALSE, 0, s_access[bind.m_access], s_imageFormat[bind.m_format]) ); + barrier |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT; + } + break; + + case ComputeBinding::Buffer: + { +// const VertexBufferGL& vertexBuffer = m_vertexBuffers[bind.m_idx]; +// GL_CHECK(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, ii, vertexBuffer.m_id) ); +// barrier |= GL_SHADER_STORAGE_BARRIER_BIT; + } + break; + } + } + } + + if (0 != barrier) + { + bool constantsChanged = compute.m_constBegin < compute.m_constEnd; + rendererUpdateUniforms(this, _render->m_constantBuffer, compute.m_constBegin, compute.m_constEnd); + + if (constantsChanged) + { + commit(*program.m_constantBuffer); + } + + GL_CHECK(glDispatchCompute(compute.m_numX, compute.m_numY, compute.m_numZ) ); + GL_CHECK(glMemoryBarrier(barrier) ); + } + } + + continue; + } + + const RenderDraw& draw = renderItem.draw; + + const uint64_t newFlags = draw.m_flags; + uint64_t changedFlags = currentState.m_flags ^ draw.m_flags; + currentState.m_flags = newFlags; + + const uint64_t newStencil = draw.m_stencil; + uint64_t changedStencil = currentState.m_stencil ^ draw.m_stencil; + currentState.m_stencil = newStencil; + + if (viewChanged) + { + currentState.clear(); + currentState.m_scissor = !draw.m_scissor; + changedFlags = BGFX_STATE_MASK; + changedStencil = packStencil(BGFX_STENCIL_MASK, BGFX_STENCIL_MASK); + currentState.m_flags = newFlags; + currentState.m_stencil = newStencil; + } + + uint16_t scissor = draw.m_scissor; if (currentState.m_scissor != scissor) { currentState.m_scissor = scissor; @@ -3843,10 +4093,10 @@ namespace bgfx } if ( (BGFX_STATE_BLEND_MASK|BGFX_STATE_BLEND_EQUATION_MASK|BGFX_STATE_BLEND_INDEPENDENT) & changedFlags - || blendFactor != state.m_rgba) + || blendFactor != draw.m_rgba) { if ( (BGFX_STATE_BLEND_MASK|BGFX_STATE_BLEND_EQUATION_MASK|BGFX_STATE_BLEND_INDEPENDENT) & newFlags - || blendFactor != state.m_rgba) + || blendFactor != draw.m_rgba) { const bool enabled = !!(BGFX_STATE_BLEND_MASK & newFlags); const bool independent = !!(BGFX_STATE_BLEND_INDEPENDENT & newFlags) @@ -3864,7 +4114,7 @@ namespace bgfx const uint32_t equRGB = (equation )&0x7; const uint32_t equA = (equation>>3)&0x7; - const uint32_t numRt = s_renderGL->getNumRt(); + const uint32_t numRt = getNumRt(); if (!BX_ENABLED(BGFX_CONFIG_RENDERER_OPENGL) || 1 >= numRt @@ -3881,9 +4131,9 @@ namespace bgfx GL_CHECK(glBlendEquationSeparate(s_blendEquation[equRGB], s_blendEquation[equA]) ); if ( (s_blendFactor[srcRGB].m_factor || s_blendFactor[dstRGB].m_factor) - && blendFactor != state.m_rgba) + && blendFactor != draw.m_rgba) { - const uint32_t rgba = state.m_rgba; + const uint32_t rgba = draw.m_rgba; GLclampf rr = ( (rgba>>24) )/255.0f; GLclampf gg = ( (rgba>>16)&0xff)/255.0f; GLclampf bb = ( (rgba>> 8)&0xff)/255.0f; @@ -3918,7 +4168,7 @@ namespace bgfx GL_CHECK(glDisablei(GL_BLEND, 0) ); } - for (uint32_t ii = 1, rgba = state.m_rgba; ii < numRt; ++ii, rgba >>= 11) + for (uint32_t ii = 1, rgba = draw.m_rgba; ii < numRt; ++ii, rgba >>= 11) { if (0 != (rgba&0x7ff) ) { @@ -3941,7 +4191,7 @@ namespace bgfx GL_CHECK(glDisable(GL_BLEND) ); } - blendFactor = state.m_rgba; + blendFactor = draw.m_rgba; } const uint64_t pt = _render->m_debug&BGFX_DEBUG_WIREFRAME ? BGFX_STATE_PT_LINES : newFlags&BGFX_STATE_PT_MASK; @@ -3950,14 +4200,14 @@ namespace bgfx } bool programChanged = false; - bool constantsChanged = state.m_constBegin < state.m_constEnd; + bool constantsChanged = draw.m_constBegin < draw.m_constEnd; bool bindAttribs = false; - rendererUpdateUniforms(this, _render->m_constantBuffer, state.m_constBegin, state.m_constEnd); + rendererUpdateUniforms(this, _render->m_constantBuffer, draw.m_constBegin, draw.m_constEnd); if (key.m_program != programIdx) { programIdx = key.m_program; - GLuint id = invalidHandle == programIdx ? 0 : s_renderGL->m_program[programIdx].m_id; + GLuint id = invalidHandle == programIdx ? 0 : m_program[programIdx].m_id; GL_CHECK(glUseProgram(id) ); programChanged = constantsChanged = @@ -3966,7 +4216,7 @@ namespace bgfx if (invalidHandle != programIdx) { - ProgramGL& program = s_renderGL->m_program[programIdx]; + ProgramGL& program = m_program[programIdx]; if (constantsChanged) { @@ -4086,9 +4336,9 @@ namespace bgfx case PredefinedUniform::Model: { - const Matrix4& model = _render->m_matrixCache.m_cache[state.m_matrix]; + const Matrix4& model = _render->m_matrixCache.m_cache[draw.m_matrix]; GL_CHECK(glUniformMatrix4fv(predefined.m_loc - , bx::uint32_min(predefined.m_count, state.m_num) + , bx::uint32_min(predefined.m_count, draw.m_num) , GL_FALSE , model.un.val ) ); @@ -4098,7 +4348,7 @@ namespace bgfx case PredefinedUniform::ModelView: { Matrix4 modelView; - const Matrix4& model = _render->m_matrixCache.m_cache[state.m_matrix]; + const Matrix4& model = _render->m_matrixCache.m_cache[draw.m_matrix]; bx::float4x4_mul(&modelView.un.f4x4, &model.un.f4x4, &_render->m_view[view].un.f4x4); GL_CHECK(glUniformMatrix4fv(predefined.m_loc @@ -4112,7 +4362,7 @@ namespace bgfx case PredefinedUniform::ModelViewProj: { Matrix4 modelViewProj; - const Matrix4& model = _render->m_matrixCache.m_cache[state.m_matrix]; + const Matrix4& model = _render->m_matrixCache.m_cache[draw.m_matrix]; bx::float4x4_mul(&modelViewProj.un.f4x4, &model.un.f4x4, &viewProj[view].un.f4x4); GL_CHECK(glUniformMatrix4fv(predefined.m_loc @@ -4139,7 +4389,7 @@ namespace bgfx uint64_t flag = BGFX_STATE_TEX0; for (uint32_t stage = 0; stage < BGFX_STATE_TEX_COUNT; ++stage) { - const Sampler& sampler = state.m_sampler[stage]; + const Sampler& sampler = draw.m_sampler[stage]; Sampler& current = currentState.m_sampler[stage]; if (current.m_idx != sampler.m_idx || current.m_flags != sampler.m_flags @@ -4147,7 +4397,7 @@ namespace bgfx { if (invalidHandle != sampler.m_idx) { - TextureGL& texture = s_renderGL->m_textures[sampler.m_idx]; + TextureGL& texture = m_textures[sampler.m_idx]; texture.commit(stage, sampler.m_flags); } } @@ -4158,33 +4408,33 @@ namespace bgfx } if (0 != defaultVao - && 0 == state.m_startVertex - && 0 == state.m_instanceDataOffset) + && 0 == draw.m_startVertex + && 0 == draw.m_instanceDataOffset) { if (programChanged - || currentState.m_vertexBuffer.idx != state.m_vertexBuffer.idx - || currentState.m_indexBuffer.idx != state.m_indexBuffer.idx - || currentState.m_instanceDataBuffer.idx != state.m_instanceDataBuffer.idx - || currentState.m_instanceDataOffset != state.m_instanceDataOffset - || currentState.m_instanceDataStride != state.m_instanceDataStride) + || currentState.m_vertexBuffer.idx != draw.m_vertexBuffer.idx + || currentState.m_indexBuffer.idx != draw.m_indexBuffer.idx + || currentState.m_instanceDataBuffer.idx != draw.m_instanceDataBuffer.idx + || currentState.m_instanceDataOffset != draw.m_instanceDataOffset + || currentState.m_instanceDataStride != draw.m_instanceDataStride) { bx::HashMurmur2A murmur; murmur.begin(); - murmur.add(state.m_vertexBuffer.idx); - murmur.add(state.m_indexBuffer.idx); - murmur.add(state.m_instanceDataBuffer.idx); - murmur.add(state.m_instanceDataOffset); - murmur.add(state.m_instanceDataStride); + murmur.add(draw.m_vertexBuffer.idx); + murmur.add(draw.m_indexBuffer.idx); + murmur.add(draw.m_instanceDataBuffer.idx); + murmur.add(draw.m_instanceDataOffset); + murmur.add(draw.m_instanceDataStride); murmur.add(programIdx); uint32_t hash = murmur.end(); - currentState.m_vertexBuffer = state.m_vertexBuffer; - currentState.m_indexBuffer = state.m_indexBuffer; - currentState.m_instanceDataOffset = state.m_instanceDataOffset; - currentState.m_instanceDataStride = state.m_instanceDataStride; - baseVertex = state.m_startVertex; + currentState.m_vertexBuffer = draw.m_vertexBuffer; + currentState.m_indexBuffer = draw.m_indexBuffer; + currentState.m_instanceDataOffset = draw.m_instanceDataOffset; + currentState.m_instanceDataStride = draw.m_instanceDataStride; + baseVertex = draw.m_startVertex; - GLuint id = s_renderGL->m_vaoStateCache.find(hash); + GLuint id = m_vaoStateCache.find(hash); if (UINT32_MAX != id) { currentVao = id; @@ -4192,28 +4442,28 @@ namespace bgfx } else { - id = s_renderGL->m_vaoStateCache.add(hash); + id = m_vaoStateCache.add(hash); currentVao = id; GL_CHECK(glBindVertexArray(id) ); - ProgramGL& program = s_renderGL->m_program[programIdx]; + ProgramGL& program = m_program[programIdx]; program.add(hash); - if (isValid(state.m_vertexBuffer) ) + if (isValid(draw.m_vertexBuffer) ) { - VertexBufferGL& vb = s_renderGL->m_vertexBuffers[state.m_vertexBuffer.idx]; + VertexBufferGL& vb = m_vertexBuffers[draw.m_vertexBuffer.idx]; vb.add(hash); GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, vb.m_id) ); - uint16_t decl = !isValid(vb.m_decl) ? state.m_vertexDecl.idx : vb.m_decl.idx; - program.bindAttributes(s_renderGL->m_vertexDecls[decl], state.m_startVertex); + uint16_t decl = !isValid(vb.m_decl) ? draw.m_vertexDecl.idx : vb.m_decl.idx; + program.bindAttributes(m_vertexDecls[decl], draw.m_startVertex); - if (isValid(state.m_instanceDataBuffer) ) + if (isValid(draw.m_instanceDataBuffer) ) { - VertexBufferGL& instanceVb = s_renderGL->m_vertexBuffers[state.m_instanceDataBuffer.idx]; + VertexBufferGL& instanceVb = m_vertexBuffers[draw.m_instanceDataBuffer.idx]; instanceVb.add(hash); GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, instanceVb.m_id) ); - program.bindInstanceData(state.m_instanceDataStride, state.m_instanceDataOffset); + program.bindInstanceData(draw.m_instanceDataStride, draw.m_instanceDataOffset); } } else @@ -4221,9 +4471,9 @@ namespace bgfx GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, 0) ); } - if (isValid(state.m_indexBuffer) ) + if (isValid(draw.m_indexBuffer) ) { - IndexBufferGL& ib = s_renderGL->m_indexBuffers[state.m_indexBuffer.idx]; + IndexBufferGL& ib = m_indexBuffers[draw.m_indexBuffer.idx]; ib.add(hash); GL_CHECK(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib.m_id) ); } @@ -4247,20 +4497,20 @@ namespace bgfx } if (programChanged - || currentState.m_vertexBuffer.idx != state.m_vertexBuffer.idx - || currentState.m_instanceDataBuffer.idx != state.m_instanceDataBuffer.idx - || currentState.m_instanceDataOffset != state.m_instanceDataOffset - || currentState.m_instanceDataStride != state.m_instanceDataStride) + || currentState.m_vertexBuffer.idx != draw.m_vertexBuffer.idx + || currentState.m_instanceDataBuffer.idx != draw.m_instanceDataBuffer.idx + || currentState.m_instanceDataOffset != draw.m_instanceDataOffset + || currentState.m_instanceDataStride != draw.m_instanceDataStride) { - currentState.m_vertexBuffer = state.m_vertexBuffer; - currentState.m_instanceDataBuffer.idx = state.m_instanceDataBuffer.idx; - currentState.m_instanceDataOffset = state.m_instanceDataOffset; - currentState.m_instanceDataStride = state.m_instanceDataStride; + currentState.m_vertexBuffer = draw.m_vertexBuffer; + currentState.m_instanceDataBuffer.idx = draw.m_instanceDataBuffer.idx; + currentState.m_instanceDataOffset = draw.m_instanceDataOffset; + currentState.m_instanceDataStride = draw.m_instanceDataStride; - uint16_t handle = state.m_vertexBuffer.idx; + uint16_t handle = draw.m_vertexBuffer.idx; if (invalidHandle != handle) { - VertexBufferGL& vb = s_renderGL->m_vertexBuffers[handle]; + VertexBufferGL& vb = m_vertexBuffers[handle]; GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, vb.m_id) ); bindAttribs = true; } @@ -4270,14 +4520,14 @@ namespace bgfx } } - if (currentState.m_indexBuffer.idx != state.m_indexBuffer.idx) + if (currentState.m_indexBuffer.idx != draw.m_indexBuffer.idx) { - currentState.m_indexBuffer = state.m_indexBuffer; + currentState.m_indexBuffer = draw.m_indexBuffer; - uint16_t handle = state.m_indexBuffer.idx; + uint16_t handle = draw.m_indexBuffer.idx; if (invalidHandle != handle) { - IndexBufferGL& ib = s_renderGL->m_indexBuffers[handle]; + IndexBufferGL& ib = m_indexBuffers[handle]; GL_CHECK(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib.m_id) ); } else @@ -4288,19 +4538,19 @@ namespace bgfx if (isValid(currentState.m_vertexBuffer) ) { - if (baseVertex != state.m_startVertex + if (baseVertex != draw.m_startVertex || bindAttribs) { - baseVertex = state.m_startVertex; - const VertexBufferGL& vb = s_renderGL->m_vertexBuffers[state.m_vertexBuffer.idx]; - uint16_t decl = !isValid(vb.m_decl) ? state.m_vertexDecl.idx : vb.m_decl.idx; - const ProgramGL& program = s_renderGL->m_program[programIdx]; - program.bindAttributes(s_renderGL->m_vertexDecls[decl], state.m_startVertex); + baseVertex = draw.m_startVertex; + const VertexBufferGL& vb = m_vertexBuffers[draw.m_vertexBuffer.idx]; + uint16_t decl = !isValid(vb.m_decl) ? draw.m_vertexDecl.idx : vb.m_decl.idx; + const ProgramGL& program = m_program[programIdx]; + program.bindAttributes(m_vertexDecls[decl], draw.m_startVertex); - if (isValid(state.m_instanceDataBuffer) ) + if (isValid(draw.m_instanceDataBuffer) ) { - GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, s_renderGL->m_vertexBuffers[state.m_instanceDataBuffer.idx].m_id) ); - program.bindInstanceData(state.m_instanceDataStride, state.m_instanceDataOffset); + GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, m_vertexBuffers[draw.m_instanceDataBuffer.idx].m_id) ); + program.bindInstanceData(draw.m_instanceDataStride, draw.m_instanceDataOffset); } } } @@ -4308,12 +4558,12 @@ namespace bgfx if (isValid(currentState.m_vertexBuffer) ) { - uint32_t numVertices = state.m_numVertices; + uint32_t numVertices = draw.m_numVertices; if (UINT32_MAX == numVertices) { - const VertexBufferGL& vb = s_renderGL->m_vertexBuffers[currentState.m_vertexBuffer.idx]; - uint16_t decl = !isValid(vb.m_decl) ? state.m_vertexDecl.idx : vb.m_decl.idx; - const VertexDecl& vertexDecl = s_renderGL->m_vertexDecls[decl]; + const VertexBufferGL& vb = m_vertexBuffers[currentState.m_vertexBuffer.idx]; + uint16_t decl = !isValid(vb.m_decl) ? draw.m_vertexDecl.idx : vb.m_decl.idx; + const VertexDecl& vertexDecl = m_vertexDecls[decl]; numVertices = vb.m_size/vertexDecl.m_stride; } @@ -4322,47 +4572,47 @@ namespace bgfx uint32_t numInstances = 0; uint32_t numPrimsRendered = 0; - if (isValid(state.m_indexBuffer) ) + if (isValid(draw.m_indexBuffer) ) { - if (UINT32_MAX == state.m_numIndices) + if (UINT32_MAX == draw.m_numIndices) { - numIndices = s_renderGL->m_indexBuffers[state.m_indexBuffer.idx].m_size/2; + numIndices = m_indexBuffers[draw.m_indexBuffer.idx].m_size/2; numPrimsSubmitted = numIndices/prim.m_div - prim.m_sub; - numInstances = state.m_numInstances; - numPrimsRendered = numPrimsSubmitted*state.m_numInstances; + numInstances = draw.m_numInstances; + numPrimsRendered = numPrimsSubmitted*draw.m_numInstances; GL_CHECK(glDrawElementsInstanced(prim.m_type , numIndices , GL_UNSIGNED_SHORT , (void*)0 - , state.m_numInstances + , draw.m_numInstances ) ); } - else if (prim.m_min <= state.m_numIndices) + else if (prim.m_min <= draw.m_numIndices) { - numIndices = state.m_numIndices; + numIndices = draw.m_numIndices; numPrimsSubmitted = numIndices/prim.m_div - prim.m_sub; - numInstances = state.m_numInstances; - numPrimsRendered = numPrimsSubmitted*state.m_numInstances; + numInstances = draw.m_numInstances; + numPrimsRendered = numPrimsSubmitted*draw.m_numInstances; GL_CHECK(glDrawElementsInstanced(prim.m_type , numIndices , GL_UNSIGNED_SHORT - , (void*)(uintptr_t)(state.m_startIndex*2) - , state.m_numInstances + , (void*)(uintptr_t)(draw.m_startIndex*2) + , draw.m_numInstances ) ); } } else { numPrimsSubmitted = numVertices/prim.m_div - prim.m_sub; - numInstances = state.m_numInstances; - numPrimsRendered = numPrimsSubmitted*state.m_numInstances; + numInstances = draw.m_numInstances; + numPrimsRendered = numPrimsSubmitted*draw.m_numInstances; GL_CHECK(glDrawArraysInstanced(prim.m_type , 0 , numVertices - , state.m_numInstances + , draw.m_numInstances ) ); } @@ -4374,12 +4624,12 @@ namespace bgfx } } - s_renderGL->blitMsaaFbo(); + blitMsaaFbo(); if (0 < _render->m_num) { captureElapsed = -bx::getHPCounter(); - s_renderGL->capture(); + capture(); captureElapsed += bx::getHPCounter(); } } @@ -4400,12 +4650,12 @@ namespace bgfx { double elapsedGpuMs = 0.0; #if BGFX_CONFIG_RENDERER_OPENGL - s_renderGL->m_queries.end(GL_TIME_ELAPSED); - uint64_t elapsedGl = s_renderGL->m_queries.getResult(0); + m_queries.end(GL_TIME_ELAPSED); + uint64_t elapsedGl = m_queries.getResult(0); elapsedGpuMs = double(elapsedGl)/1e6; #endif // BGFX_CONFIG_RENDERER_OPENGL - TextVideoMem& tvm = s_renderGL->m_textVideoMem; + TextVideoMem& tvm = m_textVideoMem; static int64_t next = now; @@ -4420,10 +4670,10 @@ namespace bgfx tvm.printf(0, pos++, BGFX_CONFIG_DEBUG ? 0x89 : 0x8f, " %s / " BX_COMPILER_NAME " / " BX_CPU_NAME " / " BX_ARCH_NAME " / " BX_PLATFORM_NAME " " , getRendererName() ); - tvm.printf(0, pos++, 0x0f, " Vendor: %s", s_renderGL->m_vendor); - tvm.printf(0, pos++, 0x0f, " Renderer: %s", s_renderGL->m_renderer); - tvm.printf(0, pos++, 0x0f, " Version: %s", s_renderGL->m_version); - tvm.printf(0, pos++, 0x0f, "GLSL version: %s", s_renderGL->m_glslVersion); + tvm.printf(0, pos++, 0x0f, " Vendor: %s", m_vendor); + tvm.printf(0, pos++, 0x0f, " Renderer: %s", m_renderer); + tvm.printf(0, pos++, 0x0f, " Version: %s", m_version); + tvm.printf(0, pos++, 0x0f, "GLSL version: %s", m_glslVersion); pos = 10; tvm.printf(10, pos++, 0x8e, " Frame CPU: %7.3f, % 7.3f \x1f, % 7.3f \x1e [ms] / % 6.2f FPS " diff --git a/src/renderer_gl.h b/src/renderer_gl.h index 40dab1ffc..22323c826 100644 --- a/src/renderer_gl.h +++ b/src/renderer_gl.h @@ -340,6 +340,86 @@ typedef uint64_t GLuint64; # define GL_COMPUTE_SHADER 0x91B9 #endif // GL_COMPUTE_SHADER +#ifndef GL_READ_ONLY +# define GL_READ_ONLY 0x88B8 +#endif // GL_READ_ONLY + +#ifndef GL_WRITE_ONLY +# define GL_WRITE_ONLY 0x88B9 +#endif // GL_WRITE_ONLY + +#ifndef GL_READ_WRITE +# define GL_READ_WRITE 0x88BA +#endif // GL_READ_WRITE + +#ifndef GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT +# define GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT 0x00000001 +#endif // GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT + +#ifndef GL_ELEMENT_ARRAY_BARRIER_BIT +# define GL_ELEMENT_ARRAY_BARRIER_BIT 0x00000002 +#endif // GL_ELEMENT_ARRAY_BARRIER_BIT + +#ifndef GL_SHADER_IMAGE_ACCESS_BARRIER_BIT +# define GL_SHADER_IMAGE_ACCESS_BARRIER_BIT 0x00000020 +#endif // GL_SHADER_IMAGE_ACCESS_BARRIER_BIT + +#ifndef GL_SHADER_STORAGE_BARRIER_BIT +# define GL_SHADER_STORAGE_BARRIER_BIT 0x00002000 +#endif // GL_SHADER_STORAGE_BARRIER_BIT + +#ifndef GL_SHADER_STORAGE_BUFFER +# define GL_SHADER_STORAGE_BUFFER 0x90D2 +#endif // GL_SHADER_STORAGE_BUFFER + +#ifndef GL_IMAGE_1D +# define GL_IMAGE_1D 0x904C +#endif // GL_IMAGE_1D + +#ifndef GL_IMAGE_2D +# define GL_IMAGE_2D 0x904D +#endif // GL_IMAGE_2D + +#ifndef GL_IMAGE_3D +# define GL_IMAGE_3D 0x904E +#endif // GL_IMAGE_3D + +#ifndef GL_IMAGE_CUBE +# define GL_IMAGE_CUBE 0x9050 +#endif // GL_IMAGE_CUBE + +#ifndef GL_PROGRAM_INPUT +# define GL_PROGRAM_INPUT 0x92E3 +#endif // GL_PROGRAM_INPUT + +#ifndef GL_ACTIVE_RESOURCES +# define GL_ACTIVE_RESOURCES 0x92F5 +#endif // GL_ACTIVE_RESOURCES + +#ifndef GL_UNIFORM +# define GL_UNIFORM 0x92E1 +#endif // GL_UNIFORM + +#ifndef GL_BUFFER_VARIABLE +# define GL_BUFFER_VARIABLE 0x92E5 +#endif // GL_BUFFER_VARIABLE + +#ifndef GL_UNSIGNED_INT_VEC2 +# define GL_UNSIGNED_INT_VEC2 0x8DC6 +#endif // GL_UNSIGNED_INT_VEC2 + +#ifndef GL_UNSIGNED_INT_VEC3 +# define GL_UNSIGNED_INT_VEC3 0x8DC7 +#endif // GL_UNSIGNED_INT_VEC3 + +#ifndef GL_UNSIGNED_INT_VEC4 +# define GL_UNSIGNED_INT_VEC4 0x8DC8 +#endif // GL_UNSIGNED_INT_VEC4 + +#ifndef GL_TYPE +# define GL_TYPE 0x92FA +#endif // GL_TYPE + #if BX_PLATFORM_NACL # include "glcontext_ppapi.h" #elif BX_PLATFORM_WINDOWS diff --git a/tools/shaderc/shaderc.cpp b/tools/shaderc/shaderc.cpp index ab866ac10..a4b5633d2 100644 --- a/tools/shaderc/shaderc.cpp +++ b/tools/shaderc/shaderc.cpp @@ -1651,6 +1651,7 @@ void addFragData(Preprocessor& _preprocessor, char* _data, uint32_t _idx, bool _ // 4.1 410 // 4.2 420 11.0 vhdgf+c 5.0 // 4.3 430 vhdgf+c +// 4.4 440 void help(const char* _error = NULL) { @@ -1991,8 +1992,6 @@ int main(int _argc, const char* _argv[]) } } -BX_TRACE("1"); - InOut shaderInputs; InOut shaderOutputs; uint32_t inputHash = 0; @@ -2056,75 +2055,255 @@ BX_TRACE("1"); } } -BX_TRACE("2"); - if (raw) { + bx::CrtFileWriter* writer = NULL; + + if (NULL != bin2c) { - bx::CrtFileWriter* writer = NULL; + writer = new Bin2cWriter(bin2c); + } + else + { + writer = new bx::CrtFileWriter; + } - if (NULL != bin2c) + if (0 != writer->open(outFilePath) ) + { + fprintf(stderr, "Unable to open output file '%s'.", outFilePath); + return EXIT_FAILURE; + } + + uint32_t inputHash = 0; + uint32_t outputHash = 0; + + if ('f' == shaderType) + { + bx::write(writer, BGFX_CHUNK_MAGIC_FSH); + bx::write(writer, inputHash); + } + else if ('v' == shaderType) + { + bx::write(writer, BGFX_CHUNK_MAGIC_VSH); + bx::write(writer, outputHash); + } + else + { + bx::write(writer, BGFX_CHUNK_MAGIC_CSH); + bx::write(writer, outputHash); + } + + if (glsl) + { + bx::write(writer, uint16_t(0) ); + + uint32_t shaderSize = (uint32_t)strlen(input); + bx::write(writer, shaderSize); + bx::write(writer, input, shaderSize); + bx::write(writer, uint8_t(0) ); + + compiled = true; + } + else + { + if (hlsl > 3) { - writer = new Bin2cWriter(bin2c); + compiled = compileHLSLShaderDx11(cmdLine, input, writer); } else { - writer = new bx::CrtFileWriter; - } - - if (0 != writer->open(outFilePath) ) - { - fprintf(stderr, "Unable to open output file '%s'.", outFilePath); - return EXIT_FAILURE; - } - - uint32_t inputHash = 0; - uint32_t outputHash = 0; - - if ('f' == shaderType) - { - bx::write(writer, BGFX_CHUNK_MAGIC_FSH); - bx::write(writer, inputHash); - } - else if ('v' == shaderType) - { - bx::write(writer, BGFX_CHUNK_MAGIC_VSH); - bx::write(writer, outputHash); - } - else - { - bx::write(writer, BGFX_CHUNK_MAGIC_CSH); - bx::write(writer, outputHash); + compiled = compileHLSLShaderDx9(cmdLine, input, writer); } + } + writer->close(); + delete writer; + } + else if ('c' == shaderType) // Compute + { + char* entry = strstr(input, "void main()"); + if (NULL == entry) + { + fprintf(stderr, "Shader entry point 'void main()' is not found.\n"); + } + else + { if (glsl) { - bx::write(writer, uint16_t(0) ); - - uint32_t shaderSize = (uint32_t)strlen(input); - bx::write(writer, shaderSize); - bx::write(writer, input, shaderSize); - bx::write(writer, uint8_t(0) ); - - compiled = true; } else { - if (hlsl > 3) + preprocessor.writef( + "#define lowp\n" + "#define mediump\n" + "#define highp\n" + "#define ivec2 int2\n" + "#define ivec3 int3\n" + "#define ivec4 int4\n" + "#define uvec2 uint2\n" + "#define uvec3 uint3\n" + "#define uvec4 uint4\n" + "#define vec2 float2\n" + "#define vec3 float3\n" + "#define vec4 float4\n" + "#define mat2 float2x2\n" + "#define mat3 float3x3\n" + "#define mat4 float4x4\n" + ); + + entry[4] = '_'; + + preprocessor.writef("#define void_main()"); + preprocessor.writef(" \\\n\tvoid main("); + + uint32_t arg = 0; + + const bool hasLocalInvocationID = NULL != strstr(input, "gl_LocalInvocationID"); + const bool hasLocalInvocationIndex = NULL != strstr(input, "gl_LocalInvocationIndex"); + const bool hasGlobalInvocationID = NULL != strstr(input, "gl_GlobalInvocationID"); + const bool hasWorkGroupID = NULL != strstr(input, "gl_WorkGroupID"); + + if (hasLocalInvocationID) { - compiled = compileHLSLShaderDx11(cmdLine, preprocessor.m_preprocessed, writer); + preprocessor.writef( + " \\\n\t%sint3 gl_LocalInvocationID : SV_GroupThreadID" + , arg++ > 0 ? ", " : " " + ); } - else + + if (hasLocalInvocationIndex) { - compiled = compileHLSLShaderDx9(cmdLine, preprocessor.m_preprocessed, writer); + preprocessor.writef( + " \\\n\t%sint gl_LocalInvocationIndex : SV_GroupIndex" + , arg++ > 0 ? ", " : " " + ); } + + if (hasGlobalInvocationID) + { + preprocessor.writef( + " \\\n\t%sint3 gl_GlobalInvocationID : SV_DispatchThreadID" + , arg++ > 0 ? ", " : " " + ); + } + + if (hasWorkGroupID) + { + preprocessor.writef( + " \\\n\t%sint3 gl_WorkGroupID : SV_GroupID" + , arg++ > 0 ? ", " : " " + ); + } + + preprocessor.writef( + " \\\n\t)\n" + ); } - writer->close(); - delete writer; + if (preprocessor.run(input) ) + { + BX_TRACE("Input file: %s", filePath); + BX_TRACE("Output file: %s", outFilePath); + + if (preprocessOnly) + { + bx::CrtFileWriter writer; + + if (0 != writer.open(outFilePath) ) + { + fprintf(stderr, "Unable to open output file '%s'.", outFilePath); + return EXIT_FAILURE; + } + + writer.write(preprocessor.m_preprocessed.c_str(), (int32_t)preprocessor.m_preprocessed.size() ); + writer.close(); + + return EXIT_SUCCESS; + } + + { + bx::CrtFileWriter* writer = NULL; + + if (NULL != bin2c) + { + writer = new Bin2cWriter(bin2c); + } + else + { + writer = new bx::CrtFileWriter; + } + + if (0 != writer->open(outFilePath) ) + { + fprintf(stderr, "Unable to open output file '%s'.", outFilePath); + return EXIT_FAILURE; + } + + bx::write(writer, BGFX_CHUNK_MAGIC_CSH); + bx::write(writer, outputHash); + + if (glsl) + { + std::string code; + + if (gles) + { + bx::stringPrintf(code, "#version 310 es\n"); + } + else + { + int32_t version = atoi(profile); + bx::stringPrintf(code, "#version %d\n", version == 0 ? 430 : version); + } + + code += preprocessor.m_preprocessed; +#if 1 + bx::write(writer, uint16_t(0) ); + + uint32_t shaderSize = (uint32_t)code.size(); + bx::write(writer, shaderSize); + bx::write(writer, code.c_str(), shaderSize); + bx::write(writer, uint8_t(0) ); + + compiled = true; +#else + compiled = compileGLSLShader(cmdLine, gles, code, writer); +#endif // 0 + } + else + { + if (hlsl > 3) + { + compiled = compileHLSLShaderDx11(cmdLine, preprocessor.m_preprocessed, writer); + } + else + { + compiled = compileHLSLShaderDx9(cmdLine, preprocessor.m_preprocessed, writer); + } + } + + writer->close(); + delete writer; + } + + if (compiled) + { + if (depends) + { + std::string ofp = outFilePath; + ofp += ".d"; + bx::CrtFileWriter writer; + if (0 == writer.open(ofp.c_str() ) ) + { + writef(&writer, "%s : %s\n", outFilePath, preprocessor.m_depends.c_str() ); + writer.close(); + } + } + } + } } } - else + else // Vertex/Fragment { char* entry = strstr(input, "void main()"); if (NULL == entry) @@ -2204,6 +2383,9 @@ BX_TRACE("2"); "#define ivec2 int2\n" "#define ivec3 int3\n" "#define ivec4 int4\n" + "#define uvec2 uint2\n" + "#define uvec3 uint3\n" + "#define uvec4 uint4\n" "#define vec2 float2\n" "#define vec3 float3\n" "#define vec4 float4\n"