Merge pull request #834 from attilaz/metal_wip2

metal backend
This commit is contained in:
Branimir Karadžić 2016-07-01 07:55:54 -07:00 committed by GitHub
commit b7913b4dbf
2 changed files with 124 additions and 52 deletions

View File

@ -23,6 +23,8 @@ namespace bgfx { namespace mtl
// objects with creation functions starting with 'new' has a refcount 1 after creation, object must be destroyed with release.
// commandBuffer, commandEncoders are autoreleased objects. Needs AutoreleasePool!
#define MTL_MAX_FRAMES_IN_FLIGHT (3)
#define MTL_CLASS(name) \
class name \
{ \
@ -35,6 +37,13 @@ namespace bgfx { namespace mtl
typedef void (*mtlCallback)(void* userData);
MTL_CLASS(BlitCommandEncoder)
void endEncoding()
{
[m_obj endEncoding];
}
MTL_CLASS_END
MTL_CLASS(Buffer)
void* contents()
{
@ -148,8 +157,14 @@ namespace bgfx { namespace mtl
id<MTLLibrary> newLibraryWithSource(const char* _source)
{
MTLCompileOptions* options = [MTLCompileOptions new];
//NOTE: turned of as 'When using the fast variants, math functions execute more quickly,
// but operate over a **LIMITED RANGE** and their behavior when handling NaN values is not defined.'
if (BX_ENABLED(BX_PLATFORM_IOS))
options.fastMathEnabled = NO;
NSError* error;
id<MTLLibrary> lib = [m_obj newLibraryWithSource:@(_source) options:nil error:&error];
id<MTLLibrary> lib = [m_obj newLibraryWithSource:@(_source) options:options error:&error];
BX_WARN(NULL == error
, "Shader compilation failed: %s"
, [error.localizedDescription cStringUsingEncoding:NSASCIIStringEncoding]
@ -583,10 +598,12 @@ namespace bgfx { namespace mtl
struct BufferMtl
{
BufferMtl()
: m_buffer(NULL)
, m_flags(BGFX_BUFFER_NONE)
: m_flags(BGFX_BUFFER_NONE)
, m_dynamic(false)
, m_bufferIndex(0)
{
for (uint32_t ii = 0; ii < MTL_MAX_FRAMES_IN_FLIGHT; ++ii)
m_buffers[ii] = NULL;
}
void create(uint32_t _size, void* _data, uint16_t _flags, uint16_t _stride = 0, bool _vertex = false);
@ -594,18 +611,22 @@ namespace bgfx { namespace mtl
void destroy()
{
if (NULL != m_buffer)
for (uint32_t ii = 0; ii < MTL_MAX_FRAMES_IN_FLIGHT; ++ii)
{
[m_buffer release];
m_buffer = NULL;
m_dynamic = false;
MTL_RELEASE(m_buffers[ii]);
}
m_dynamic = false;
}
Buffer getBuffer() const { return m_buffers[m_bufferIndex]; }
Buffer m_buffer;
uint32_t m_size;
uint16_t m_flags;
bool m_dynamic;
private:
uint8_t m_bufferIndex;
Buffer m_buffers[MTL_MAX_FRAMES_IN_FLIGHT];
};
typedef BufferMtl IndexBufferMtl;

View File

@ -18,34 +18,35 @@
#import <Foundation/Foundation.h>
#define UNIFORM_BUFFER_SIZE (8*1024*1024)
#define UNIFORM_BUFFER_COUNT (3)
/*
// known metal shader generation issues:
03-raymarch: OSX nothing is visible ( depth/color order should be swapped in fragment output struct)
15-shadowmaps-simple: shader compilation error
16-shadowmaps: //problem with essl -> metal: SAMPLER2D(u_shadowMap0, 4); sampler index is lost. Shadowmap is set to slot 4, but
metal shader uses sampler/texture slot 0. this could require changes outside of renderer_mtl?
packFloatToRGBA needs highp. currently it uses half.
24-nbody: no generated compute shaders for metal
27-terrain: shaderc generates invalid metal shader for vs_terrain_height_texture. vertex output: half4 gl_Position [[position]], should be float4
Known issues(driver problems??):
OSX mac mini(late 2014), OSX10.11.3 : nanovg-rendering: color writemask off causes problem...
iPad mini 2, iOS 8.1.1: 21-deferred: scissor not working properly
26-occlusion: doesn't work with two rendercommandencoders, merge should fix this
TODO: check if swap really solves this? 03-raymarch: OSX nothing is visible ( depth/color order should be swapped in fragment output struct)
iPad mini 2, iOS 8.1.1: 21-deferred: scissor not working properly
26-occlusion: query doesn't work with two rendercommandencoders, merge should fix this
Only on this device ( no problem on iPad Air 2 with iOS9.3.1)
TODOs:
07-callback, saveScreenshot should be implemented with one frame latency (using saveScreenshotBegin and End)
- iOS device orientation change is not handled properly
22-windows: todo support multiple windows
- optimization: remove heavy sync, merge views with same fb and no clear.
- optimization: remove sync points, merge views with same fb and no clear.
13-stencil and 16-shadowmaps are very inefficient. every view stores/loads backbuffer data
- 15-shadowmaps-simple (example needs modification mtxCrop znew = z * 0.5 + 0.5 is not needed ) could be hacked in shader too
BGFX_RESET_FLIP_AFTER_RENDER on low level renderers should be true? (crashes even with BGFX_RESET_FLIP_AFTER_RENDER because there is
one rendering frame before reset). Do I have absolutely need to send result to View at flip or can I do it in submit?
*/
@ -334,7 +335,7 @@ namespace bgfx { namespace mtl
: m_metalLayer(NULL)
, m_backBufferPixelFormatHash(0)
, m_maxAnisotropy(1)
, m_uniformBufferIndex(0)
, m_bufferIndex(0)
, m_numWindows(1)
, m_rtMsaa(false)
, m_drawable(NULL)
@ -405,7 +406,8 @@ namespace bgfx { namespace mtl
m_textureDescriptor = newTextureDescriptor();
m_samplerDescriptor = newSamplerDescriptor();
for (uint8_t i=0; i < UNIFORM_BUFFER_COUNT; ++i)
m_framesSemaphore.post(MTL_MAX_FRAMES_IN_FLIGHT);
for (uint8_t i=0; i < MTL_MAX_FRAMES_IN_FLIGHT; ++i)
{
m_uniformBuffers[i] = m_device.newBufferWithLength(UNIFORM_BUFFER_SIZE, 0);
}
@ -585,7 +587,7 @@ namespace bgfx { namespace mtl
MTL_RELEASE(m_backBufferStencil);
}
for (uint8_t i=0; i < UNIFORM_BUFFER_COUNT; ++i)
for (uint8_t i=0; i < MTL_MAX_FRAMES_IN_FLIGHT; ++i)
{
MTL_RELEASE(m_uniformBuffers[i]);
}
@ -806,7 +808,7 @@ namespace bgfx { namespace mtl
return;
}
//TODO: we should wait for completion of pending commandBuffers
sync();
//TODO: implement this with saveScreenshotBegin/End
Texture backBuffer = m_drawable.texture;
@ -908,7 +910,7 @@ namespace bgfx { namespace mtl
}
VertexBufferMtl& vb = m_vertexBuffers[_blitter.m_vb->handle.idx];
rce.setVertexBuffer(vb.m_buffer, 0, 1);
rce.setVertexBuffer(vb.getBuffer(), 0, 1);
float proj[16];
bx::mtxOrtho(proj, 0.0f, (float)width, (float)height, 0.0f, 0.0f, 1000.0f);
@ -925,13 +927,20 @@ namespace bgfx { namespace mtl
const uint32_t numVertices = _numIndices*4/6;
if (0 < numVertices)
{
m_indexBuffers [_blitter.m_ib->handle.idx].update(0, _numIndices*2, _blitter.m_ib->data);
m_indexBuffers [_blitter.m_ib->handle.idx].update(0, _numIndices*2, _blitter.m_ib->data, true);
m_vertexBuffers[_blitter.m_vb->handle.idx].update(0, numVertices*_blitter.m_decl.m_stride, _blitter.m_vb->data, true);
m_renderCommandEncoder.drawIndexedPrimitives(MTLPrimitiveTypeTriangle, _numIndices, MTLIndexTypeUInt16, m_indexBuffers[_blitter.m_ib->handle.idx].m_buffer, 0, 1);
m_renderCommandEncoder.drawIndexedPrimitives(MTLPrimitiveTypeTriangle, _numIndices, MTLIndexTypeUInt16, m_indexBuffers[_blitter.m_ib->handle.idx].getBuffer(), 0, 1);
}
}
static void commandBufferFinishedCallback(void* _data)
{
RendererContextMtl* renderer = (RendererContextMtl*)_data;
if ( renderer )
renderer->m_framesSemaphore.post();
}
void flip(HMD& /*_hmd*/) BX_OVERRIDE
{
if (NULL == m_drawable
@ -944,11 +953,13 @@ namespace bgfx { namespace mtl
m_commandBuffer.presentDrawable(m_drawable);
MTL_RELEASE(m_drawable);
m_commandBuffer.addCompletedHandler(commandBufferFinishedCallback, this);
m_commandBuffer.commit();
// using heavy syncing now
// TODO: refactor it with double/triple buffering frame data
m_commandBuffer.waitUntilCompleted();
MTL_RELEASE(m_prevCommandBuffer);
m_prevCommandBuffer = m_commandBuffer;
retain(m_commandBuffer);
MTL_RELEASE(m_commandBuffer);
@ -1306,6 +1317,29 @@ namespace bgfx { namespace mtl
return m_backBufferDepth.height();
}
void sync()
{
if ( m_prevCommandBuffer )
m_prevCommandBuffer.waitUntilCompleted();
}
BlitCommandEncoder getBlitCommandEncoder()
{
if ( m_blitCommandEncoder == NULL)
{
if ( m_commandBuffer == NULL )
{
m_commandBuffer = m_commandQueue.commandBuffer();
retain(m_commandBuffer);
}
m_blitCommandEncoder = m_commandBuffer.blitCommandEncoder();
}
return m_blitCommandEncoder;
}
Device m_device;
CommandQueue m_commandQueue;
CAMetalLayer* m_metalLayer;
@ -1320,11 +1354,14 @@ namespace bgfx { namespace mtl
OcclusionQueryMTL m_occlusionQuery;
bx::Semaphore m_framesSemaphore;
Buffer m_uniformBuffer;
Buffer m_uniformBuffers[UNIFORM_BUFFER_COUNT];
Buffer m_uniformBuffers[MTL_MAX_FRAMES_IN_FLIGHT];
uint32_t m_uniformBufferVertexOffset;
uint32_t m_uniformBufferFragmentOffset;
uint8_t m_uniformBufferIndex;
uint8_t m_bufferIndex;
uint16_t m_numWindows;
FrameBufferHandle m_windows[BGFX_CONFIG_MAX_FRAME_BUFFERS];
@ -1361,6 +1398,8 @@ namespace bgfx { namespace mtl
// currently active objects data
id <CAMetalDrawable> m_drawable;
CommandBuffer m_commandBuffer;
CommandBuffer m_prevCommandBuffer;
BlitCommandEncoder m_blitCommandEncoder;
RenderCommandEncoder m_renderCommandEncoder;
};
@ -1454,14 +1493,6 @@ namespace bgfx { namespace mtl
char* temp = (char*)alloca(tempLen);
bx::StaticMemoryBlockWriter writer(temp, tempLen);
//TODO: remove this hack. some shaders have problem with half<->float conversion
writeString(&writer
, "#define half float\n"
"#define half2 float2\n"
"#define half3 float3\n"
"#define half4 float4\n"
);
bx::write(&writer, code, codeLen);
bx::write(&writer, '\0');
code = temp;
@ -1892,14 +1923,16 @@ namespace bgfx { namespace mtl
m_size = _size;
m_flags = _flags;
m_dynamic = false; //NULL == _data;
if (NULL == _data)
{
m_buffer = s_renderMtl->m_device.newBufferWithLength(_size, 0);
for (uint32_t ii = 0; ii < MTL_MAX_FRAMES_IN_FLIGHT; ++ii)
m_buffers[ii] = s_renderMtl->m_device.newBufferWithLength(_size, 0);
}
else
{
m_buffer = s_renderMtl->m_device.newBufferWithBytes(_data, _size, 0);
m_buffers[0] = s_renderMtl->m_device.newBufferWithBytes(_data, _size, 0);
}
}
@ -1907,7 +1940,12 @@ namespace bgfx { namespace mtl
{
BX_UNUSED(_discard);
memcpy( (uint8_t*)m_buffer.contents() + _offset, _data, _size);
//TODO: cannot call this more than once per frame
if ( m_dynamic && _discard )
m_bufferIndex = (m_bufferIndex + 1) % MTL_MAX_FRAMES_IN_FLIGHT;
else
s_renderMtl->sync();
memcpy( (uint8_t*)getBuffer().contents() + _offset, _data, _size);
}
void VertexBufferMtl::create(uint32_t _size, void* _data, VertexDeclHandle _declHandle, uint16_t _flags)
@ -2007,7 +2045,7 @@ namespace bgfx { namespace mtl
desc.storageMode = (MTLStorageMode)(writeOnly||isDepth(TextureFormat::Enum(m_textureFormat))
? 2 /*MTLStorageModePrivate*/
: 1 /*MTLStorageModeManaged*/
: ((BX_ENABLED(BX_PLATFORM_IOS)) ? 0 /* MTLStorageModeShared */ : 1 /*MTLStorageModeManaged*/)
);
desc.usage = MTLTextureUsageShaderRead;
@ -2109,6 +2147,8 @@ namespace bgfx { namespace mtl
void TextureMtl::update(uint8_t _side, uint8_t _mip, const Rect& _rect, uint16_t _z, uint16_t _depth, uint16_t _pitch, const Memory* _mem)
{
s_renderMtl->sync();
MTLRegion region =
{
{ _rect.m_x, _rect.m_y, _z },
@ -2268,8 +2308,19 @@ namespace bgfx { namespace mtl
void RendererContextMtl::submit(Frame* _render, ClearQuad& _clearQuad, TextVideoMemBlitter& _textVideoMemBlitter) BX_OVERRIDE
{
m_commandBuffer = m_commandQueue.commandBuffer();
retain(m_commandBuffer); // keep alive to be useable at 'flip'
m_framesSemaphore.wait();
if ( m_commandBuffer == NULL )
{
m_commandBuffer = m_commandQueue.commandBuffer();
retain(m_commandBuffer); // keep alive to be useable at 'flip'
}
if ( m_blitCommandEncoder )
{
m_blitCommandEncoder.endEncoding();
m_blitCommandEncoder = 0;
}
//TODO: multithreading with multiple commandbuffer
// is there a FAST way to tell which view is active?
@ -2280,8 +2331,8 @@ namespace bgfx { namespace mtl
retain(m_drawable); // keep alive to be useable at 'flip'
#endif
m_uniformBuffer = m_uniformBuffers[m_uniformBufferIndex];
m_uniformBufferIndex = (m_uniformBufferIndex + 1) % UNIFORM_BUFFER_COUNT;
m_uniformBuffer = m_uniformBuffers[m_bufferIndex];
m_bufferIndex = (m_bufferIndex + 1) % MTL_MAX_FRAMES_IN_FLIGHT;
m_uniformBufferVertexOffset = 0;
m_uniformBufferFragmentOffset = 0;
@ -2299,13 +2350,13 @@ namespace bgfx { namespace mtl
if (0 < _render->m_iboffset)
{
TransientIndexBuffer* ib = _render->m_transientIb;
m_indexBuffers[ib->handle.idx].update(0, _render->m_iboffset, ib->data);
m_indexBuffers[ib->handle.idx].update(0, _render->m_iboffset, ib->data, true);
}
if (0 < _render->m_vboffset)
{
TransientVertexBuffer* vb = _render->m_transientVb;
m_vertexBuffers[vb->handle.idx].update(0, _render->m_vboffset, vb->data);
m_vertexBuffers[vb->handle.idx].update(0, _render->m_vboffset, vb->data, true);
}
_render->sort();
@ -2818,12 +2869,12 @@ namespace bgfx { namespace mtl
const VertexDecl& vertexDecl = m_vertexDecls[decl];
uint32_t offset = draw.m_startVertex * vertexDecl.getStride();
rce.setVertexBuffer(vb.m_buffer, offset, 1);
rce.setVertexBuffer(vb.getBuffer(), offset, 1);
if (isValid(draw.m_instanceDataBuffer) )
{
const VertexBufferMtl& inst = m_vertexBuffers[draw.m_instanceDataBuffer.idx];
rce.setVertexBuffer(inst.m_buffer, draw.m_instanceDataOffset, 2);
rce.setVertexBuffer(inst.getBuffer(), draw.m_instanceDataOffset, 2);
}
}
}
@ -2868,7 +2919,7 @@ namespace bgfx { namespace mtl
numInstances = draw.m_numInstances;
numPrimsRendered = numPrimsSubmitted*draw.m_numInstances;
rce.drawIndexedPrimitives(prim.m_type, numIndices, indexType, ib.m_buffer, 0, draw.m_numInstances);
rce.drawIndexedPrimitives(prim.m_type, numIndices, indexType, ib.getBuffer(), 0, draw.m_numInstances);
}
else if (prim.m_min <= draw.m_numIndices)
{
@ -2878,7 +2929,7 @@ namespace bgfx { namespace mtl
numInstances = draw.m_numInstances;
numPrimsRendered = numPrimsSubmitted*draw.m_numInstances;
rce.drawIndexedPrimitives(prim.m_type, numIndices, indexType, ib.m_buffer, draw.m_startIndex * indexSize,numInstances);
rce.drawIndexedPrimitives(prim.m_type, numIndices, indexType, ib.getBuffer(), draw.m_startIndex * indexSize,numInstances);
}
}
else