Added GPU timer.

This commit is contained in:
Branimir Karadžić 2015-05-12 17:03:25 -07:00
parent 146829b057
commit 7d50012dbe
5 changed files with 278 additions and 6 deletions

View File

@ -1468,6 +1468,8 @@ BX_PRAGMA_DIAGNOSTIC_POP();
{
ovrPreReset();
m_gpuTimer.destroy();
if (NULL == g_platformData.backBufferDS)
{
DX_RELEASE(m_backBufferDepthStencil, 0);
@ -1510,6 +1512,8 @@ BX_PRAGMA_DIAGNOSTIC_POP();
DX_RELEASE(color, 0);
}
m_gpuTimer.create();
ovrPostReset();
// If OVR doesn't create separate depth stencil view, create default one.
@ -2608,9 +2612,10 @@ BX_PRAGMA_DIAGNOSTIC_POP();
uint16_t m_numWindows;
FrameBufferHandle m_windows[BGFX_CONFIG_MAX_FRAME_BUFFERS];
ID3D11Device* m_device;
ID3D11DeviceContext* m_deviceCtx;
ID3D11InfoQueue* m_infoQueue;
ID3D11Device* m_device;
ID3D11DeviceContext* m_deviceCtx;
ID3D11InfoQueue* m_infoQueue;
TimerQueryD3D11 m_gpuTimer;
ID3D11RenderTargetView* m_backBufferColor;
ID3D11DepthStencilView* m_backBufferDepthStencil;
@ -3471,6 +3476,88 @@ BX_PRAGMA_DIAGNOSTIC_POP();
}
}
void TimerQueryD3D11::create()
{
ID3D11Device* device = s_renderD3D11->m_device;
D3D11_QUERY_DESC query;
query.MiscFlags = 0;
for (uint32_t ii = 0; ii < BX_COUNTOF(m_frame); ++ii)
{
Frame& frame = m_frame[ii];
query.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
DX_CHECK(device->CreateQuery(&query, &frame.m_disjoint) );
query.Query = D3D11_QUERY_TIMESTAMP;
DX_CHECK(device->CreateQuery(&query, &frame.m_start) );
DX_CHECK(device->CreateQuery(&query, &frame.m_end) );
}
m_elapsed = 0;
m_frequency = 1;
}
void TimerQueryD3D11::destroy()
{
for (uint32_t ii = 0; ii < BX_COUNTOF(m_frame); ++ii)
{
Frame& frame = m_frame[ii];
DX_RELEASE(frame.m_disjoint, 0);
DX_RELEASE(frame.m_start, 0);
DX_RELEASE(frame.m_end, 0);
}
}
void TimerQueryD3D11::begin()
{
ID3D11DeviceContext* deviceCtx = s_renderD3D11->m_deviceCtx;
while (0 == m_control.reserve(1) )
{
get();
}
Frame& frame = m_frame[m_control.m_current];
deviceCtx->Begin(frame.m_disjoint);
deviceCtx->End(frame.m_start);
}
void TimerQueryD3D11::end()
{
ID3D11DeviceContext* deviceCtx = s_renderD3D11->m_deviceCtx;
Frame& frame = m_frame[m_control.m_current];
deviceCtx->End(frame.m_end);
deviceCtx->End(frame.m_disjoint);
m_control.commit(1);
}
bool TimerQueryD3D11::get()
{
ID3D11DeviceContext* deviceCtx = s_renderD3D11->m_deviceCtx;
Frame& frame = m_frame[m_control.m_read];
uint64_t end;
HRESULT hr = deviceCtx->GetData(frame.m_end, &end, sizeof(end), 0);
if (S_OK == hr)
{
m_control.consume(1);
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint;
deviceCtx->GetData(frame.m_disjoint, &disjoint, sizeof(disjoint), 0);
uint64_t start;
deviceCtx->GetData(frame.m_start, &start, sizeof(start), 0);
m_frequency = disjoint.Frequency;
m_elapsed = end - start;
return true;
}
return false;
}
void RendererContextD3D11::submit(Frame* _render, ClearQuad& _clearQuad, TextVideoMemBlitter& _textVideoMemBlitter)
{
PIX_BEGINEVENT(D3DCOLOR_RGBA(0xff, 0x00, 0x00, 0xff), L"rendererSubmit");
@ -3482,6 +3569,11 @@ BX_PRAGMA_DIAGNOSTIC_POP();
int64_t elapsed = -bx::getHPCounter();
int64_t captureElapsed = 0;
if (_render->m_debug & (BGFX_DEBUG_IFH|BGFX_DEBUG_STATS) )
{
m_gpuTimer.begin();
}
if (0 < _render->m_iboffset)
{
TransientIndexBuffer* ib = _render->m_transientIb;
@ -4256,6 +4348,20 @@ BX_PRAGMA_DIAGNOSTIC_POP();
{
PIX_BEGINEVENT(D3DCOLOR_RGBA(0x40, 0x40, 0x40, 0xff), L"debugstats");
static uint32_t maxGpuLatency = 0;
static double maxGpuElapsed = 0.0f;
double elapsedGpuMs = 0.0;
m_gpuTimer.end();
while (m_gpuTimer.get() )
{
double toGpuMs = 1000.0 / double(m_gpuTimer.m_frequency);
elapsedGpuMs = m_gpuTimer.m_elapsed * toGpuMs;
maxGpuElapsed = elapsedGpuMs > maxGpuElapsed ? elapsedGpuMs : maxGpuElapsed;
}
maxGpuLatency = bx::uint32_max(maxGpuLatency, m_gpuTimer.m_control.available()-1);
TextVideoMem& tvm = m_textVideoMem;
static int64_t next = now;
@ -4314,12 +4420,18 @@ BX_PRAGMA_DIAGNOSTIC_POP();
);
double elapsedCpuMs = double(elapsed)*toMs;
tvm.printf(10, pos++, 0x8e, " Submitted: %4d (draw %4d, compute %4d) / CPU %3.4f [ms]"
tvm.printf(10, pos++, 0x8e, " Submitted: %4d (draw %4d, compute %4d) / CPU %3.4f [ms] %c GPU %3.4f [ms] (latency %d)"
, _render->m_num
, statsKeyType[0]
, statsKeyType[1]
, elapsedCpuMs
, elapsedCpuMs > maxGpuElapsed ? '>' : '<'
, maxGpuElapsed
, maxGpuLatency
);
maxGpuLatency = 0;
maxGpuElapsed = 0.0;
for (uint32_t ii = 0; ii < BX_COUNTOF(s_primName); ++ii)
{
tvm.printf(10, pos++, 0x8e, " %9s: %7d (#inst: %5d), submitted: %7d, indirect %7d"

View File

@ -297,6 +297,33 @@ namespace bgfx { namespace d3d11
TextureHandle m_th[BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS];
};
struct TimerQueryD3D11
{
TimerQueryD3D11()
: m_control(BX_COUNTOF(m_frame) )
{
}
void create();
void destroy();
void begin();
void end();
bool get();
struct Frame
{
ID3D11Query* m_disjoint;
ID3D11Query* m_start;
ID3D11Query* m_end;
};
uint64_t m_elapsed;
uint64_t m_frequency;
Frame m_frame[4];
bx::RingBufferControl m_control;
};
} /* namespace d3d11 */ } // namespace bgfx
#endif // BGFX_RENDERER_D3D11_H_HEADER_GUARD

View File

@ -1239,6 +1239,8 @@ namespace bgfx { namespace d3d9
capturePreReset();
m_gpuTimer.destroy();
for (uint32_t ii = 0; ii < BX_COUNTOF(m_indexBuffers); ++ii)
{
m_indexBuffers[ii].preReset();
@ -1266,6 +1268,8 @@ namespace bgfx { namespace d3d9
DX_CHECK(m_swapChain->GetBackBuffer(0, D3DBACKBUFFER_TYPE_MONO, &m_backBufferColor) );
DX_CHECK(m_device->GetDepthStencilSurface(&m_backBufferDepthStencil) );
m_gpuTimer.create();
capturePostReset();
for (uint32_t ii = 0; ii < BX_COUNTOF(m_indexBuffers); ++ii)
@ -1719,8 +1723,9 @@ namespace bgfx { namespace d3d9
IDirect3DDevice9Ex* m_deviceEx;
#endif // BGFX_CONFIG_RENDERER_DIRECT3D9EX
IDirect3D9* m_d3d9;
IDirect3D9* m_d3d9;
IDirect3DDevice9* m_device;
TimerQueryD3D9 m_gpuTimer;
D3DPOOL m_pool;
IDirect3DSwapChain9* m_swapChain;
@ -2892,6 +2897,80 @@ namespace bgfx { namespace d3d9
) );
}
void TimerQueryD3D9::create()
{
IDirect3DDevice9* device = s_renderD3D9->m_device;
for (uint32_t ii = 0; ii < BX_COUNTOF(m_frame); ++ii)
{
Frame& frame = m_frame[ii];
DX_CHECK(device->CreateQuery(D3DQUERYTYPE_TIMESTAMPDISJOINT, &frame.m_disjoint) );
DX_CHECK(device->CreateQuery(D3DQUERYTYPE_TIMESTAMP, &frame.m_start) );
DX_CHECK(device->CreateQuery(D3DQUERYTYPE_TIMESTAMP, &frame.m_end) );
DX_CHECK(device->CreateQuery(D3DQUERYTYPE_TIMESTAMPFREQ, &frame.m_freq) );
}
m_elapsed = 0;
m_frequency = 1;
}
void TimerQueryD3D9::destroy()
{
for (uint32_t ii = 0; ii < BX_COUNTOF(m_frame); ++ii)
{
Frame& frame = m_frame[ii];
DX_RELEASE(frame.m_disjoint, 0);
DX_RELEASE(frame.m_start, 0);
DX_RELEASE(frame.m_end, 0);
DX_RELEASE(frame.m_freq, 0);
}
}
void TimerQueryD3D9::begin()
{
while (0 == m_control.reserve(1) )
{
get();
}
Frame& frame = m_frame[m_control.m_current];
frame.m_disjoint->Issue(D3DISSUE_BEGIN);
frame.m_start->Issue(D3DISSUE_END);
}
void TimerQueryD3D9::end()
{
Frame& frame = m_frame[m_control.m_current];
frame.m_end->Issue(D3DISSUE_END);
frame.m_freq->Issue(D3DISSUE_END);
m_control.commit(1);
}
bool TimerQueryD3D9::get()
{
Frame& frame = m_frame[m_control.m_read];
uint64_t freq;
HRESULT hr = frame.m_freq->GetData(&freq, sizeof(freq), 0);
if (S_OK == hr)
{
m_control.consume(1);
uint64_t start;
DX_CHECK(frame.m_start->GetData(&start, sizeof(start), 0) );
uint64_t end;
DX_CHECK(frame.m_end->GetData(&end, sizeof(end), 0) );
m_frequency = freq;
m_elapsed = end - start;
return true;
}
return false;
}
void RendererContextD3D9::submit(Frame* _render, ClearQuad& _clearQuad, TextVideoMemBlitter& _textVideoMemBlitter)
{
IDirect3DDevice9* device = m_device;
@ -2905,6 +2984,11 @@ namespace bgfx { namespace d3d9
device->BeginScene();
if (_render->m_debug & (BGFX_DEBUG_IFH|BGFX_DEBUG_STATS) )
{
m_gpuTimer.begin();
}
if (0 < _render->m_iboffset)
{
TransientIndexBuffer* ib = _render->m_transientIb;
@ -3453,6 +3537,20 @@ namespace bgfx { namespace d3d9
{
PIX_BEGINEVENT(D3DCOLOR_RGBA(0x40, 0x40, 0x40, 0xff), L"debugstats");
static uint32_t maxGpuLatency = 0;
static double maxGpuElapsed = 0.0f;
double elapsedGpuMs = 0.0;
m_gpuTimer.end();
while (m_gpuTimer.get() )
{
double toGpuMs = 1000.0 / double(m_gpuTimer.m_frequency);
elapsedGpuMs = m_gpuTimer.m_elapsed * toGpuMs;
maxGpuElapsed = elapsedGpuMs > maxGpuElapsed ? elapsedGpuMs : maxGpuElapsed;
}
maxGpuLatency = bx::uint32_max(maxGpuLatency, m_gpuTimer.m_control.available()-1);
TextVideoMem& tvm = m_textVideoMem;
static int64_t next = now;
@ -3490,12 +3588,18 @@ namespace bgfx { namespace d3d9
);
double elapsedCpuMs = double(elapsed)*toMs;
tvm.printf(10, pos++, 0x8e, " Submitted: %4d (draw %4d, compute %4d) / CPU %3.4f [ms]"
tvm.printf(10, pos++, 0x8e, " Submitted: %4d (draw %4d, compute %4d) / CPU %3.4f [ms] %c GPU %3.4f [ms] (latency %d)"
, _render->m_num
, statsKeyType[0]
, statsKeyType[1]
, elapsedCpuMs
, elapsedCpuMs > maxGpuElapsed ? '>' : '<'
, maxGpuElapsed
, maxGpuLatency
);
maxGpuLatency = 0;
maxGpuElapsed = 0.0;
for (uint32_t ii = 0; ii < BX_COUNTOF(s_primName); ++ii)
{
tvm.printf(10, pos++, 0x8e, " %9s: %7d (#inst: %5d), submitted: %7d"

View File

@ -389,6 +389,34 @@ namespace bgfx { namespace d3d9
bool m_needResolve;
};
struct TimerQueryD3D9
{
TimerQueryD3D9()
: m_control(BX_COUNTOF(m_frame) )
{
}
void create();
void destroy();
void begin();
void end();
bool get();
struct Frame
{
IDirect3DQuery9* m_disjoint;
IDirect3DQuery9* m_start;
IDirect3DQuery9* m_end;
IDirect3DQuery9* m_freq;
};
uint64_t m_elapsed;
uint64_t m_frequency;
Frame m_frame[4];
bx::RingBufferControl m_control;
};
} /* namespace d3d9 */ } // namespace bgfx
#endif // BGFX_RENDERER_D3D9_H_HEADER_GUARD

View File

@ -5661,6 +5661,7 @@ namespace bgfx { namespace gl
, elapsedCpuMs > elapsedGpuMs ? '>' : '<'
, elapsedGpuMs
);
for (uint32_t ii = 0; ii < BX_COUNTOF(s_primInfo); ++ii)
{
tvm.printf(10, pos++, 0x8e, " %9s: %7d (#inst: %5d), submitted: %7d"