Added GPU timer.
This commit is contained in:
parent
146829b057
commit
7d50012dbe
@ -1468,6 +1468,8 @@ BX_PRAGMA_DIAGNOSTIC_POP();
|
||||
{
|
||||
ovrPreReset();
|
||||
|
||||
m_gpuTimer.destroy();
|
||||
|
||||
if (NULL == g_platformData.backBufferDS)
|
||||
{
|
||||
DX_RELEASE(m_backBufferDepthStencil, 0);
|
||||
@ -1510,6 +1512,8 @@ BX_PRAGMA_DIAGNOSTIC_POP();
|
||||
DX_RELEASE(color, 0);
|
||||
}
|
||||
|
||||
m_gpuTimer.create();
|
||||
|
||||
ovrPostReset();
|
||||
|
||||
// If OVR doesn't create separate depth stencil view, create default one.
|
||||
@ -2608,9 +2612,10 @@ BX_PRAGMA_DIAGNOSTIC_POP();
|
||||
uint16_t m_numWindows;
|
||||
FrameBufferHandle m_windows[BGFX_CONFIG_MAX_FRAME_BUFFERS];
|
||||
|
||||
ID3D11Device* m_device;
|
||||
ID3D11DeviceContext* m_deviceCtx;
|
||||
ID3D11InfoQueue* m_infoQueue;
|
||||
ID3D11Device* m_device;
|
||||
ID3D11DeviceContext* m_deviceCtx;
|
||||
ID3D11InfoQueue* m_infoQueue;
|
||||
TimerQueryD3D11 m_gpuTimer;
|
||||
|
||||
ID3D11RenderTargetView* m_backBufferColor;
|
||||
ID3D11DepthStencilView* m_backBufferDepthStencil;
|
||||
@ -3471,6 +3476,88 @@ BX_PRAGMA_DIAGNOSTIC_POP();
|
||||
}
|
||||
}
|
||||
|
||||
void TimerQueryD3D11::create()
|
||||
{
|
||||
ID3D11Device* device = s_renderD3D11->m_device;
|
||||
|
||||
D3D11_QUERY_DESC query;
|
||||
query.MiscFlags = 0;
|
||||
for (uint32_t ii = 0; ii < BX_COUNTOF(m_frame); ++ii)
|
||||
{
|
||||
Frame& frame = m_frame[ii];
|
||||
|
||||
query.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
|
||||
DX_CHECK(device->CreateQuery(&query, &frame.m_disjoint) );
|
||||
|
||||
query.Query = D3D11_QUERY_TIMESTAMP;
|
||||
DX_CHECK(device->CreateQuery(&query, &frame.m_start) );
|
||||
DX_CHECK(device->CreateQuery(&query, &frame.m_end) );
|
||||
}
|
||||
|
||||
m_elapsed = 0;
|
||||
m_frequency = 1;
|
||||
}
|
||||
|
||||
void TimerQueryD3D11::destroy()
|
||||
{
|
||||
for (uint32_t ii = 0; ii < BX_COUNTOF(m_frame); ++ii)
|
||||
{
|
||||
Frame& frame = m_frame[ii];
|
||||
DX_RELEASE(frame.m_disjoint, 0);
|
||||
DX_RELEASE(frame.m_start, 0);
|
||||
DX_RELEASE(frame.m_end, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void TimerQueryD3D11::begin()
|
||||
{
|
||||
ID3D11DeviceContext* deviceCtx = s_renderD3D11->m_deviceCtx;
|
||||
|
||||
while (0 == m_control.reserve(1) )
|
||||
{
|
||||
get();
|
||||
}
|
||||
|
||||
Frame& frame = m_frame[m_control.m_current];
|
||||
deviceCtx->Begin(frame.m_disjoint);
|
||||
deviceCtx->End(frame.m_start);
|
||||
}
|
||||
|
||||
void TimerQueryD3D11::end()
|
||||
{
|
||||
ID3D11DeviceContext* deviceCtx = s_renderD3D11->m_deviceCtx;
|
||||
Frame& frame = m_frame[m_control.m_current];
|
||||
deviceCtx->End(frame.m_end);
|
||||
deviceCtx->End(frame.m_disjoint);
|
||||
m_control.commit(1);
|
||||
}
|
||||
|
||||
bool TimerQueryD3D11::get()
|
||||
{
|
||||
ID3D11DeviceContext* deviceCtx = s_renderD3D11->m_deviceCtx;
|
||||
Frame& frame = m_frame[m_control.m_read];
|
||||
|
||||
uint64_t end;
|
||||
HRESULT hr = deviceCtx->GetData(frame.m_end, &end, sizeof(end), 0);
|
||||
if (S_OK == hr)
|
||||
{
|
||||
m_control.consume(1);
|
||||
|
||||
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint;
|
||||
deviceCtx->GetData(frame.m_disjoint, &disjoint, sizeof(disjoint), 0);
|
||||
|
||||
uint64_t start;
|
||||
deviceCtx->GetData(frame.m_start, &start, sizeof(start), 0);
|
||||
|
||||
m_frequency = disjoint.Frequency;
|
||||
m_elapsed = end - start;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void RendererContextD3D11::submit(Frame* _render, ClearQuad& _clearQuad, TextVideoMemBlitter& _textVideoMemBlitter)
|
||||
{
|
||||
PIX_BEGINEVENT(D3DCOLOR_RGBA(0xff, 0x00, 0x00, 0xff), L"rendererSubmit");
|
||||
@ -3482,6 +3569,11 @@ BX_PRAGMA_DIAGNOSTIC_POP();
|
||||
int64_t elapsed = -bx::getHPCounter();
|
||||
int64_t captureElapsed = 0;
|
||||
|
||||
if (_render->m_debug & (BGFX_DEBUG_IFH|BGFX_DEBUG_STATS) )
|
||||
{
|
||||
m_gpuTimer.begin();
|
||||
}
|
||||
|
||||
if (0 < _render->m_iboffset)
|
||||
{
|
||||
TransientIndexBuffer* ib = _render->m_transientIb;
|
||||
@ -4256,6 +4348,20 @@ BX_PRAGMA_DIAGNOSTIC_POP();
|
||||
{
|
||||
PIX_BEGINEVENT(D3DCOLOR_RGBA(0x40, 0x40, 0x40, 0xff), L"debugstats");
|
||||
|
||||
static uint32_t maxGpuLatency = 0;
|
||||
static double maxGpuElapsed = 0.0f;
|
||||
double elapsedGpuMs = 0.0;
|
||||
|
||||
m_gpuTimer.end();
|
||||
|
||||
while (m_gpuTimer.get() )
|
||||
{
|
||||
double toGpuMs = 1000.0 / double(m_gpuTimer.m_frequency);
|
||||
elapsedGpuMs = m_gpuTimer.m_elapsed * toGpuMs;
|
||||
maxGpuElapsed = elapsedGpuMs > maxGpuElapsed ? elapsedGpuMs : maxGpuElapsed;
|
||||
}
|
||||
maxGpuLatency = bx::uint32_max(maxGpuLatency, m_gpuTimer.m_control.available()-1);
|
||||
|
||||
TextVideoMem& tvm = m_textVideoMem;
|
||||
|
||||
static int64_t next = now;
|
||||
@ -4314,12 +4420,18 @@ BX_PRAGMA_DIAGNOSTIC_POP();
|
||||
);
|
||||
|
||||
double elapsedCpuMs = double(elapsed)*toMs;
|
||||
tvm.printf(10, pos++, 0x8e, " Submitted: %4d (draw %4d, compute %4d) / CPU %3.4f [ms]"
|
||||
tvm.printf(10, pos++, 0x8e, " Submitted: %4d (draw %4d, compute %4d) / CPU %3.4f [ms] %c GPU %3.4f [ms] (latency %d)"
|
||||
, _render->m_num
|
||||
, statsKeyType[0]
|
||||
, statsKeyType[1]
|
||||
, elapsedCpuMs
|
||||
, elapsedCpuMs > maxGpuElapsed ? '>' : '<'
|
||||
, maxGpuElapsed
|
||||
, maxGpuLatency
|
||||
);
|
||||
maxGpuLatency = 0;
|
||||
maxGpuElapsed = 0.0;
|
||||
|
||||
for (uint32_t ii = 0; ii < BX_COUNTOF(s_primName); ++ii)
|
||||
{
|
||||
tvm.printf(10, pos++, 0x8e, " %9s: %7d (#inst: %5d), submitted: %7d, indirect %7d"
|
||||
|
@ -297,6 +297,33 @@ namespace bgfx { namespace d3d11
|
||||
TextureHandle m_th[BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS];
|
||||
};
|
||||
|
||||
struct TimerQueryD3D11
|
||||
{
|
||||
TimerQueryD3D11()
|
||||
: m_control(BX_COUNTOF(m_frame) )
|
||||
{
|
||||
}
|
||||
|
||||
void create();
|
||||
void destroy();
|
||||
void begin();
|
||||
void end();
|
||||
bool get();
|
||||
|
||||
struct Frame
|
||||
{
|
||||
ID3D11Query* m_disjoint;
|
||||
ID3D11Query* m_start;
|
||||
ID3D11Query* m_end;
|
||||
};
|
||||
|
||||
uint64_t m_elapsed;
|
||||
uint64_t m_frequency;
|
||||
|
||||
Frame m_frame[4];
|
||||
bx::RingBufferControl m_control;
|
||||
};
|
||||
|
||||
} /* namespace d3d11 */ } // namespace bgfx
|
||||
|
||||
#endif // BGFX_RENDERER_D3D11_H_HEADER_GUARD
|
||||
|
@ -1239,6 +1239,8 @@ namespace bgfx { namespace d3d9
|
||||
|
||||
capturePreReset();
|
||||
|
||||
m_gpuTimer.destroy();
|
||||
|
||||
for (uint32_t ii = 0; ii < BX_COUNTOF(m_indexBuffers); ++ii)
|
||||
{
|
||||
m_indexBuffers[ii].preReset();
|
||||
@ -1266,6 +1268,8 @@ namespace bgfx { namespace d3d9
|
||||
DX_CHECK(m_swapChain->GetBackBuffer(0, D3DBACKBUFFER_TYPE_MONO, &m_backBufferColor) );
|
||||
DX_CHECK(m_device->GetDepthStencilSurface(&m_backBufferDepthStencil) );
|
||||
|
||||
m_gpuTimer.create();
|
||||
|
||||
capturePostReset();
|
||||
|
||||
for (uint32_t ii = 0; ii < BX_COUNTOF(m_indexBuffers); ++ii)
|
||||
@ -1719,8 +1723,9 @@ namespace bgfx { namespace d3d9
|
||||
IDirect3DDevice9Ex* m_deviceEx;
|
||||
#endif // BGFX_CONFIG_RENDERER_DIRECT3D9EX
|
||||
|
||||
IDirect3D9* m_d3d9;
|
||||
IDirect3D9* m_d3d9;
|
||||
IDirect3DDevice9* m_device;
|
||||
TimerQueryD3D9 m_gpuTimer;
|
||||
D3DPOOL m_pool;
|
||||
|
||||
IDirect3DSwapChain9* m_swapChain;
|
||||
@ -2892,6 +2897,80 @@ namespace bgfx { namespace d3d9
|
||||
) );
|
||||
}
|
||||
|
||||
void TimerQueryD3D9::create()
|
||||
{
|
||||
IDirect3DDevice9* device = s_renderD3D9->m_device;
|
||||
|
||||
for (uint32_t ii = 0; ii < BX_COUNTOF(m_frame); ++ii)
|
||||
{
|
||||
Frame& frame = m_frame[ii];
|
||||
DX_CHECK(device->CreateQuery(D3DQUERYTYPE_TIMESTAMPDISJOINT, &frame.m_disjoint) );
|
||||
DX_CHECK(device->CreateQuery(D3DQUERYTYPE_TIMESTAMP, &frame.m_start) );
|
||||
DX_CHECK(device->CreateQuery(D3DQUERYTYPE_TIMESTAMP, &frame.m_end) );
|
||||
DX_CHECK(device->CreateQuery(D3DQUERYTYPE_TIMESTAMPFREQ, &frame.m_freq) );
|
||||
}
|
||||
|
||||
m_elapsed = 0;
|
||||
m_frequency = 1;
|
||||
}
|
||||
|
||||
void TimerQueryD3D9::destroy()
|
||||
{
|
||||
for (uint32_t ii = 0; ii < BX_COUNTOF(m_frame); ++ii)
|
||||
{
|
||||
Frame& frame = m_frame[ii];
|
||||
DX_RELEASE(frame.m_disjoint, 0);
|
||||
DX_RELEASE(frame.m_start, 0);
|
||||
DX_RELEASE(frame.m_end, 0);
|
||||
DX_RELEASE(frame.m_freq, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void TimerQueryD3D9::begin()
|
||||
{
|
||||
while (0 == m_control.reserve(1) )
|
||||
{
|
||||
get();
|
||||
}
|
||||
|
||||
Frame& frame = m_frame[m_control.m_current];
|
||||
frame.m_disjoint->Issue(D3DISSUE_BEGIN);
|
||||
frame.m_start->Issue(D3DISSUE_END);
|
||||
}
|
||||
|
||||
void TimerQueryD3D9::end()
|
||||
{
|
||||
Frame& frame = m_frame[m_control.m_current];
|
||||
frame.m_end->Issue(D3DISSUE_END);
|
||||
frame.m_freq->Issue(D3DISSUE_END);
|
||||
m_control.commit(1);
|
||||
}
|
||||
|
||||
bool TimerQueryD3D9::get()
|
||||
{
|
||||
Frame& frame = m_frame[m_control.m_read];
|
||||
|
||||
uint64_t freq;
|
||||
HRESULT hr = frame.m_freq->GetData(&freq, sizeof(freq), 0);
|
||||
if (S_OK == hr)
|
||||
{
|
||||
m_control.consume(1);
|
||||
|
||||
uint64_t start;
|
||||
DX_CHECK(frame.m_start->GetData(&start, sizeof(start), 0) );
|
||||
|
||||
uint64_t end;
|
||||
DX_CHECK(frame.m_end->GetData(&end, sizeof(end), 0) );
|
||||
|
||||
m_frequency = freq;
|
||||
m_elapsed = end - start;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void RendererContextD3D9::submit(Frame* _render, ClearQuad& _clearQuad, TextVideoMemBlitter& _textVideoMemBlitter)
|
||||
{
|
||||
IDirect3DDevice9* device = m_device;
|
||||
@ -2905,6 +2984,11 @@ namespace bgfx { namespace d3d9
|
||||
|
||||
device->BeginScene();
|
||||
|
||||
if (_render->m_debug & (BGFX_DEBUG_IFH|BGFX_DEBUG_STATS) )
|
||||
{
|
||||
m_gpuTimer.begin();
|
||||
}
|
||||
|
||||
if (0 < _render->m_iboffset)
|
||||
{
|
||||
TransientIndexBuffer* ib = _render->m_transientIb;
|
||||
@ -3453,6 +3537,20 @@ namespace bgfx { namespace d3d9
|
||||
{
|
||||
PIX_BEGINEVENT(D3DCOLOR_RGBA(0x40, 0x40, 0x40, 0xff), L"debugstats");
|
||||
|
||||
static uint32_t maxGpuLatency = 0;
|
||||
static double maxGpuElapsed = 0.0f;
|
||||
double elapsedGpuMs = 0.0;
|
||||
|
||||
m_gpuTimer.end();
|
||||
|
||||
while (m_gpuTimer.get() )
|
||||
{
|
||||
double toGpuMs = 1000.0 / double(m_gpuTimer.m_frequency);
|
||||
elapsedGpuMs = m_gpuTimer.m_elapsed * toGpuMs;
|
||||
maxGpuElapsed = elapsedGpuMs > maxGpuElapsed ? elapsedGpuMs : maxGpuElapsed;
|
||||
}
|
||||
maxGpuLatency = bx::uint32_max(maxGpuLatency, m_gpuTimer.m_control.available()-1);
|
||||
|
||||
TextVideoMem& tvm = m_textVideoMem;
|
||||
|
||||
static int64_t next = now;
|
||||
@ -3490,12 +3588,18 @@ namespace bgfx { namespace d3d9
|
||||
);
|
||||
|
||||
double elapsedCpuMs = double(elapsed)*toMs;
|
||||
tvm.printf(10, pos++, 0x8e, " Submitted: %4d (draw %4d, compute %4d) / CPU %3.4f [ms]"
|
||||
tvm.printf(10, pos++, 0x8e, " Submitted: %4d (draw %4d, compute %4d) / CPU %3.4f [ms] %c GPU %3.4f [ms] (latency %d)"
|
||||
, _render->m_num
|
||||
, statsKeyType[0]
|
||||
, statsKeyType[1]
|
||||
, elapsedCpuMs
|
||||
, elapsedCpuMs > maxGpuElapsed ? '>' : '<'
|
||||
, maxGpuElapsed
|
||||
, maxGpuLatency
|
||||
);
|
||||
maxGpuLatency = 0;
|
||||
maxGpuElapsed = 0.0;
|
||||
|
||||
for (uint32_t ii = 0; ii < BX_COUNTOF(s_primName); ++ii)
|
||||
{
|
||||
tvm.printf(10, pos++, 0x8e, " %9s: %7d (#inst: %5d), submitted: %7d"
|
||||
|
@ -389,6 +389,34 @@ namespace bgfx { namespace d3d9
|
||||
bool m_needResolve;
|
||||
};
|
||||
|
||||
struct TimerQueryD3D9
|
||||
{
|
||||
TimerQueryD3D9()
|
||||
: m_control(BX_COUNTOF(m_frame) )
|
||||
{
|
||||
}
|
||||
|
||||
void create();
|
||||
void destroy();
|
||||
void begin();
|
||||
void end();
|
||||
bool get();
|
||||
|
||||
struct Frame
|
||||
{
|
||||
IDirect3DQuery9* m_disjoint;
|
||||
IDirect3DQuery9* m_start;
|
||||
IDirect3DQuery9* m_end;
|
||||
IDirect3DQuery9* m_freq;
|
||||
};
|
||||
|
||||
uint64_t m_elapsed;
|
||||
uint64_t m_frequency;
|
||||
|
||||
Frame m_frame[4];
|
||||
bx::RingBufferControl m_control;
|
||||
};
|
||||
|
||||
} /* namespace d3d9 */ } // namespace bgfx
|
||||
|
||||
#endif // BGFX_RENDERER_D3D9_H_HEADER_GUARD
|
||||
|
@ -5661,6 +5661,7 @@ namespace bgfx { namespace gl
|
||||
, elapsedCpuMs > elapsedGpuMs ? '>' : '<'
|
||||
, elapsedGpuMs
|
||||
);
|
||||
|
||||
for (uint32_t ii = 0; ii < BX_COUNTOF(s_primInfo); ++ii)
|
||||
{
|
||||
tvm.printf(10, pos++, 0x8e, " %9s: %7d (#inst: %5d), submitted: %7d"
|
||||
|
Loading…
Reference in New Issue
Block a user