New GUC, track_iotiming, to track I/O timings.

Currently, the only way to see the numbers this gathers is via
EXPLAIN (ANALYZE, BUFFERS), but the plan is to add visibility through
the stats collector and pg_stat_statements in subsequent patches.

Ants Aasma, reviewed by Greg Smith, with some further changes by me.
This commit is contained in:
Robert Haas 2012-03-27 14:52:37 -04:00
parent 98316e211b
commit 40b9b95769
8 changed files with 72 additions and 0 deletions

View File

@ -4284,6 +4284,21 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
</listitem>
</varlistentry>
<varlistentry id="guc-track-iotiming" xreflabel="track_iotiming">
<term><varname>track_iotiming</varname> (<type>boolean</type>)</term>
<indexterm>
<primary><varname>track_iotiming</> configuration parameter</primary>
</indexterm>
<listitem>
<para>
Enables timing of database I/O calls. This parameter is off by
default, because it will repeatedly query the operating system for
the current time, which may cause significant overhead on some
platforms. Only superusers can change this setting.
</para>
</listitem>
</varlistentry>
<varlistentry id="guc-track-functions" xreflabel="track_functions">
<term><varname>track_functions</varname> (<type>enum</type>)</term>
<indexterm>

View File

@ -1236,6 +1236,8 @@ ExplainNode(PlanState *planstate, List *ancestors,
usage->local_blks_written > 0);
bool has_temp = (usage->temp_blks_read > 0 ||
usage->temp_blks_written > 0);
bool has_timing = (!INSTR_TIME_IS_ZERO(usage->time_read) ||
!INSTR_TIME_IS_ZERO(usage->time_write));
/* Show only positive counter values. */
if (has_shared || has_local || has_temp)
@ -1291,6 +1293,20 @@ ExplainNode(PlanState *planstate, List *ancestors,
}
appendStringInfoChar(es->str, '\n');
}
/* As above, show only positive counter values. */
if (has_timing)
{
appendStringInfoSpaces(es->str, es->indent * 2);
appendStringInfoString(es->str, "I/O Timings:");
if (!INSTR_TIME_IS_ZERO(usage->time_read))
appendStringInfo(es->str, " read=%0.2f",
INSTR_TIME_GET_MILLISEC(usage->time_read));
if (!INSTR_TIME_IS_ZERO(usage->time_write))
appendStringInfo(es->str, " write=%0.2f",
INSTR_TIME_GET_MILLISEC(usage->time_write));
appendStringInfoChar(es->str, '\n');
}
}
else
{
@ -1304,6 +1320,8 @@ ExplainNode(PlanState *planstate, List *ancestors,
ExplainPropertyLong("Local Written Blocks", usage->local_blks_written, es);
ExplainPropertyLong("Temp Read Blocks", usage->temp_blks_read, es);
ExplainPropertyLong("Temp Written Blocks", usage->temp_blks_written, es);
ExplainPropertyFloat("I/O Read Time", INSTR_TIME_GET_MILLISEC(usage->time_read), 3, es);
ExplainPropertyFloat("I/O Write Time", INSTR_TIME_GET_MILLISEC(usage->time_write), 3, es);
}
}

View File

@ -145,4 +145,6 @@ BufferUsageAccumDiff(BufferUsage *dst,
dst->local_blks_written += add->local_blks_written - sub->local_blks_written;
dst->temp_blks_read += add->temp_blks_read - sub->temp_blks_read;
dst->temp_blks_written += add->temp_blks_written - sub->temp_blks_written;
INSTR_TIME_ACCUM_DIFF(dst->time_read, add->time_read, sub->time_read);
INSTR_TIME_ACCUM_DIFF(dst->time_write, add->time_write, sub->time_write);
}

View File

@ -67,6 +67,7 @@
bool zero_damaged_pages = false;
int bgwriter_lru_maxpages = 100;
double bgwriter_lru_multiplier = 2.0;
bool track_iotiming = false;
/*
* How many buffers PrefetchBuffer callers should try to stay ahead of their
@ -437,8 +438,21 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
MemSet((char *) bufBlock, 0, BLCKSZ);
else
{
instr_time io_start,
io_time;
if (track_iotiming)
INSTR_TIME_SET_CURRENT(io_start);
smgrread(smgr, forkNum, blockNum, (char *) bufBlock);
if (track_iotiming)
{
INSTR_TIME_SET_CURRENT(io_time);
INSTR_TIME_SUBTRACT(io_time, io_start);
INSTR_TIME_ADD(pgBufferUsage.time_read, io_time);
}
/* check for garbage data */
if (!PageHeaderIsValid((PageHeader) bufBlock))
{
@ -1874,6 +1888,7 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
{
XLogRecPtr recptr;
ErrorContextCallback errcontext;
instr_time io_start, io_end;
/*
* Acquire the buffer's io_in_progress lock. If StartBufferIO returns
@ -1921,12 +1936,21 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
buf->flags &= ~BM_JUST_DIRTIED;
UnlockBufHdr(buf);
if (track_iotiming)
INSTR_TIME_SET_CURRENT(io_start);
smgrwrite(reln,
buf->tag.forkNum,
buf->tag.blockNum,
(char *) BufHdrGetBlock(buf),
false);
if (track_iotiming)
{
INSTR_TIME_SET_CURRENT(io_end);
INSTR_TIME_ACCUM_DIFF(pgBufferUsage.time_write, io_end, io_start);
}
pgBufferUsage.shared_blks_written++;
/*

View File

@ -1018,6 +1018,15 @@ static struct config_bool ConfigureNamesBool[] =
true,
NULL, NULL, NULL
},
{
{"track_iotiming", PGC_SUSET, STATS_COLLECTOR,
gettext_noop("Collects timing information for database IO activity."),
NULL
},
&track_iotiming,
false,
NULL, NULL, NULL
},
{
{"update_process_title", PGC_SUSET, STATS_COLLECTOR,

View File

@ -424,6 +424,7 @@
#track_activities = on
#track_counts = on
#track_iotiming = off
#track_functions = none # none, pl, all
#track_activity_query_size = 1024 # (change requires restart)
#update_process_title = on

View File

@ -28,6 +28,8 @@ typedef struct BufferUsage
long local_blks_written; /* # of local disk blocks written */
long temp_blks_read; /* # of temp blocks read */
long temp_blks_written; /* # of temp blocks written */
instr_time time_read; /* time spent reading */
instr_time time_write; /* time spent writing */
} BufferUsage;
/* Flag bits included in InstrAlloc's instrument_options bitmask */

View File

@ -48,6 +48,7 @@ extern PGDLLIMPORT int NBuffers;
extern bool zero_damaged_pages;
extern int bgwriter_lru_maxpages;
extern double bgwriter_lru_multiplier;
extern bool track_iotiming;
extern int target_prefetch_pages;
/* in buf_init.c */