Make EXPLAIN sampling smarter, to avoid excessive sampling delay.
Martijn van Oosterhout
This commit is contained in:
parent
53d669e5c6
commit
87bd07d979
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.201 2006/04/27 00:33:41 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.202 2006/05/30 14:01:57 momjian Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -1306,7 +1306,7 @@ ExecCallTriggerFunc(TriggerData *trigdata,
|
|||||||
* one "tuple returned" (really the number of firings).
|
* one "tuple returned" (really the number of firings).
|
||||||
*/
|
*/
|
||||||
if (instr)
|
if (instr)
|
||||||
InstrStopNode(instr + tgindx, true);
|
InstrStopNode(instr + tgindx, 1);
|
||||||
|
|
||||||
return (HeapTuple) DatumGetPointer(result);
|
return (HeapTuple) DatumGetPointer(result);
|
||||||
}
|
}
|
||||||
@ -2154,7 +2154,7 @@ AfterTriggerExecute(AfterTriggerEvent event,
|
|||||||
* one "tuple returned" (really the number of firings).
|
* one "tuple returned" (really the number of firings).
|
||||||
*/
|
*/
|
||||||
if (instr)
|
if (instr)
|
||||||
InstrStopNode(instr + tgindx, true);
|
InstrStopNode(instr + tgindx, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/executor/execProcnode.c,v 1.54 2006/03/05 15:58:26 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/executor/execProcnode.c,v 1.55 2006/05/30 14:01:57 momjian Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -423,7 +423,7 @@ ExecProcNode(PlanState *node)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (node->instrument)
|
if (node->instrument)
|
||||||
InstrStopNode(node->instrument, !TupIsNull(result));
|
InstrStopNode(node->instrument, TupIsNull(result) ? 0 : 1 );
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -7,16 +7,78 @@
|
|||||||
* Copyright (c) 2001-2006, PostgreSQL Global Development Group
|
* Copyright (c) 2001-2006, PostgreSQL Global Development Group
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/executor/instrument.c,v 1.14 2006/03/05 15:58:26 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/executor/instrument.c,v 1.15 2006/05/30 14:01:58 momjian Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
#include "executor/instrument.h"
|
#include "executor/instrument.h"
|
||||||
|
|
||||||
|
/* This is the function that is used to determine the sampling intervals. In
|
||||||
|
* general, if the function is f(x), then for N tuples we will take on the
|
||||||
|
* order of integral(1/f(x), x=0..N) samples. Some examples follow, with the
|
||||||
|
* number of samples that would be collected over 1,000,000 tuples.
|
||||||
|
|
||||||
|
f(x) = x => log2(N) 20
|
||||||
|
f(x) = x^(1/2) => 2 * N^(1/2) 2000
|
||||||
|
f(x) = x^(1/3) => 1.5 * N^(2/3) 15000
|
||||||
|
|
||||||
|
* I've chosen the last one as it seems to provide a good compromise between
|
||||||
|
* low overhead but still getting a meaningful number of samples. However,
|
||||||
|
* not all machines have the cbrt() function so on those we substitute
|
||||||
|
* sqrt(). The difference is not very significant in the tests I made.
|
||||||
|
*/
|
||||||
|
#ifdef HAVE_CBRT
|
||||||
|
#define SampleFunc cbrt
|
||||||
|
#else
|
||||||
|
#define SampleFunc sqrt
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define SAMPLE_THRESHOLD 50
|
||||||
|
|
||||||
|
static double SampleOverhead;
|
||||||
|
static bool SampleOverheadCalculated;
|
||||||
|
|
||||||
|
static void
|
||||||
|
CalculateSampleOverhead()
|
||||||
|
{
|
||||||
|
Instrumentation instr;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/* We want to determine the sampling overhead, to correct
|
||||||
|
* calculations later. This only needs to be done once per backend.
|
||||||
|
* Is this the place? A wrong value here (due to a mistimed
|
||||||
|
* task-switch) will cause bad calculations later.
|
||||||
|
*
|
||||||
|
* To minimize the risk we do it a few times and take the lowest.
|
||||||
|
*/
|
||||||
|
|
||||||
|
SampleOverhead = 1.0e6;
|
||||||
|
|
||||||
|
for( i = 0; i<5; i++ )
|
||||||
|
{
|
||||||
|
int j;
|
||||||
|
double overhead;
|
||||||
|
|
||||||
|
memset( &instr, 0, sizeof(instr) );
|
||||||
|
|
||||||
|
/* Loop SAMPLE_THRESHOLD times or 100 microseconds, whichever is faster */
|
||||||
|
for( j=0; j<SAMPLE_THRESHOLD && INSTR_TIME_GET_DOUBLE(instr.counter) < 100e-6; i++ )
|
||||||
|
{
|
||||||
|
InstrStartNode( &instr );
|
||||||
|
InstrStopNode( &instr, 1 );
|
||||||
|
}
|
||||||
|
overhead = INSTR_TIME_GET_DOUBLE(instr.counter) / instr.samplecount;
|
||||||
|
if( overhead < SampleOverhead )
|
||||||
|
SampleOverhead = overhead;
|
||||||
|
}
|
||||||
|
|
||||||
|
SampleOverheadCalculated = true;
|
||||||
|
}
|
||||||
|
|
||||||
/* Allocate new instrumentation structure(s) */
|
/* Allocate new instrumentation structure(s) */
|
||||||
Instrumentation *
|
Instrumentation *
|
||||||
@ -26,6 +88,9 @@ InstrAlloc(int n)
|
|||||||
|
|
||||||
/* we don't need to do any initialization except zero 'em */
|
/* we don't need to do any initialization except zero 'em */
|
||||||
|
|
||||||
|
/* Calculate overhead, if not done yet */
|
||||||
|
if( !SampleOverheadCalculated )
|
||||||
|
CalculateSampleOverhead();
|
||||||
return instr;
|
return instr;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -34,21 +99,38 @@ void
|
|||||||
InstrStartNode(Instrumentation *instr)
|
InstrStartNode(Instrumentation *instr)
|
||||||
{
|
{
|
||||||
if (INSTR_TIME_IS_ZERO(instr->starttime))
|
if (INSTR_TIME_IS_ZERO(instr->starttime))
|
||||||
|
{
|
||||||
|
/* We always sample the first SAMPLE_THRESHOLD tuples, so small nodes are always accurate */
|
||||||
|
if (instr->tuplecount < SAMPLE_THRESHOLD)
|
||||||
|
instr->sampling = true;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Otherwise we go to sampling, see the comments on SampleFunc at the top of the file */
|
||||||
|
if( instr->tuplecount > instr->nextsample )
|
||||||
|
{
|
||||||
|
instr->sampling = true;
|
||||||
|
/* The doubling is so the random will average 1 over time */
|
||||||
|
instr->nextsample += 2.0 * SampleFunc(instr->tuplecount) * (double)rand() / (double)RAND_MAX;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (instr->sampling)
|
||||||
INSTR_TIME_SET_CURRENT(instr->starttime);
|
INSTR_TIME_SET_CURRENT(instr->starttime);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
elog(DEBUG2, "InstrStartNode called twice in a row");
|
elog(DEBUG2, "InstrStartNode called twice in a row");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Exit from a plan node */
|
/* Exit from a plan node */
|
||||||
void
|
void
|
||||||
InstrStopNode(Instrumentation *instr, bool returnedTuple)
|
InstrStopNode(Instrumentation *instr, double nTuples)
|
||||||
{
|
{
|
||||||
instr_time endtime;
|
instr_time endtime;
|
||||||
|
|
||||||
/* count the returned tuples */
|
/* count the returned tuples */
|
||||||
if (returnedTuple)
|
instr->tuplecount += nTuples;
|
||||||
instr->tuplecount += 1;
|
|
||||||
|
|
||||||
|
if (instr->sampling)
|
||||||
|
{
|
||||||
if (INSTR_TIME_IS_ZERO(instr->starttime))
|
if (INSTR_TIME_IS_ZERO(instr->starttime))
|
||||||
{
|
{
|
||||||
elog(DEBUG2, "InstrStopNode called without start");
|
elog(DEBUG2, "InstrStopNode called without start");
|
||||||
@ -77,6 +159,9 @@ InstrStopNode(Instrumentation *instr, bool returnedTuple)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
INSTR_TIME_SET_ZERO(instr->starttime);
|
INSTR_TIME_SET_ZERO(instr->starttime);
|
||||||
|
instr->samplecount += nTuples;
|
||||||
|
instr->sampling = false;
|
||||||
|
}
|
||||||
|
|
||||||
/* Is this the first tuple of this cycle? */
|
/* Is this the first tuple of this cycle? */
|
||||||
if (!instr->running)
|
if (!instr->running)
|
||||||
@ -86,17 +171,6 @@ InstrStopNode(Instrumentation *instr, bool returnedTuple)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* As above, but count multiple tuples returned at once */
|
|
||||||
void
|
|
||||||
InstrStopNodeMulti(Instrumentation *instr, double nTuples)
|
|
||||||
{
|
|
||||||
/* count the returned tuples */
|
|
||||||
instr->tuplecount += nTuples;
|
|
||||||
|
|
||||||
/* delegate the rest */
|
|
||||||
InstrStopNode(instr, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Finish a run cycle for a plan node */
|
/* Finish a run cycle for a plan node */
|
||||||
void
|
void
|
||||||
InstrEndLoop(Instrumentation *instr)
|
InstrEndLoop(Instrumentation *instr)
|
||||||
@ -114,8 +188,24 @@ InstrEndLoop(Instrumentation *instr)
|
|||||||
totaltime = INSTR_TIME_GET_DOUBLE(instr->counter);
|
totaltime = INSTR_TIME_GET_DOUBLE(instr->counter);
|
||||||
|
|
||||||
instr->startup += instr->firsttuple;
|
instr->startup += instr->firsttuple;
|
||||||
|
|
||||||
|
/* Here we take into account sampling effects. Doing it naively ends
|
||||||
|
* up assuming the sampling overhead applies to all tuples, even the
|
||||||
|
* ones we didn't measure. We've calculated an overhead, so we
|
||||||
|
* subtract that for all samples we didn't measure. The first tuple
|
||||||
|
* is also special cased, because it usually takes longer. */
|
||||||
|
|
||||||
|
if( instr->samplecount < instr->tuplecount )
|
||||||
|
{
|
||||||
|
double pertuple = (totaltime - instr->firsttuple) / (instr->samplecount - 1);
|
||||||
|
instr->total += instr->firsttuple + (pertuple * (instr->samplecount - 1))
|
||||||
|
+ ((pertuple - SampleOverhead) * (instr->tuplecount - instr->samplecount));
|
||||||
|
}
|
||||||
|
else
|
||||||
instr->total += totaltime;
|
instr->total += totaltime;
|
||||||
|
|
||||||
instr->ntuples += instr->tuplecount;
|
instr->ntuples += instr->tuplecount;
|
||||||
|
instr->nsamples += instr->samplecount;
|
||||||
instr->nloops += 1;
|
instr->nloops += 1;
|
||||||
|
|
||||||
/* Reset for next cycle (if any) */
|
/* Reset for next cycle (if any) */
|
||||||
@ -123,5 +213,6 @@ InstrEndLoop(Instrumentation *instr)
|
|||||||
INSTR_TIME_SET_ZERO(instr->starttime);
|
INSTR_TIME_SET_ZERO(instr->starttime);
|
||||||
INSTR_TIME_SET_ZERO(instr->counter);
|
INSTR_TIME_SET_ZERO(instr->counter);
|
||||||
instr->firsttuple = 0;
|
instr->firsttuple = 0;
|
||||||
|
instr->samplecount = 0;
|
||||||
instr->tuplecount = 0;
|
instr->tuplecount = 0;
|
||||||
}
|
}
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/executor/nodeBitmapAnd.c,v 1.7 2006/05/23 15:21:52 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/executor/nodeBitmapAnd.c,v 1.8 2006/05/30 14:01:58 momjian Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -161,7 +161,7 @@ MultiExecBitmapAnd(BitmapAndState *node)
|
|||||||
|
|
||||||
/* must provide our own instrumentation support */
|
/* must provide our own instrumentation support */
|
||||||
if (node->ps.instrument)
|
if (node->ps.instrument)
|
||||||
InstrStopNodeMulti(node->ps.instrument, 0 /* XXX */ );
|
InstrStopNode(node->ps.instrument, 0 /* XXX */ );
|
||||||
|
|
||||||
return (Node *) result;
|
return (Node *) result;
|
||||||
}
|
}
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/executor/nodeBitmapIndexscan.c,v 1.18 2006/05/23 15:21:52 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/executor/nodeBitmapIndexscan.c,v 1.19 2006/05/30 14:01:58 momjian Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -112,7 +112,7 @@ MultiExecBitmapIndexScan(BitmapIndexScanState *node)
|
|||||||
|
|
||||||
/* must provide our own instrumentation support */
|
/* must provide our own instrumentation support */
|
||||||
if (node->ss.ps.instrument)
|
if (node->ss.ps.instrument)
|
||||||
InstrStopNodeMulti(node->ss.ps.instrument, nTuples);
|
InstrStopNode(node->ss.ps.instrument, nTuples);
|
||||||
|
|
||||||
return (Node *) tbm;
|
return (Node *) tbm;
|
||||||
}
|
}
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/executor/nodeBitmapOr.c,v 1.6 2006/05/23 15:21:52 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/executor/nodeBitmapOr.c,v 1.7 2006/05/30 14:01:58 momjian Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -177,7 +177,7 @@ MultiExecBitmapOr(BitmapOrState *node)
|
|||||||
|
|
||||||
/* must provide our own instrumentation support */
|
/* must provide our own instrumentation support */
|
||||||
if (node->ps.instrument)
|
if (node->ps.instrument)
|
||||||
InstrStopNodeMulti(node->ps.instrument, 0 /* XXX */ );
|
InstrStopNode(node->ps.instrument, 0 /* XXX */ );
|
||||||
|
|
||||||
return (Node *) result;
|
return (Node *) result;
|
||||||
}
|
}
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.102 2006/05/23 15:21:52 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.103 2006/05/30 14:01:58 momjian Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -97,7 +97,7 @@ MultiExecHash(HashState *node)
|
|||||||
|
|
||||||
/* must provide our own instrumentation support */
|
/* must provide our own instrumentation support */
|
||||||
if (node->ps.instrument)
|
if (node->ps.instrument)
|
||||||
InstrStopNodeMulti(node->ps.instrument, hashtable->totalTuples);
|
InstrStopNode(node->ps.instrument, hashtable->totalTuples);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We do not return the hash table directly because it's not a subtype of
|
* We do not return the hash table directly because it's not a subtype of
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
*
|
*
|
||||||
* Copyright (c) 2001-2006, PostgreSQL Global Development Group
|
* Copyright (c) 2001-2006, PostgreSQL Global Development Group
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/executor/instrument.h,v 1.13 2006/03/05 15:58:56 momjian Exp $
|
* $PostgreSQL: pgsql/src/include/executor/instrument.h,v 1.14 2006/05/30 14:01:58 momjian Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -61,17 +61,21 @@ typedef struct Instrumentation
|
|||||||
instr_time counter; /* Accumulated runtime for this node */
|
instr_time counter; /* Accumulated runtime for this node */
|
||||||
double firsttuple; /* Time for first tuple of this cycle */
|
double firsttuple; /* Time for first tuple of this cycle */
|
||||||
double tuplecount; /* Tuples emitted so far this cycle */
|
double tuplecount; /* Tuples emitted so far this cycle */
|
||||||
|
double samplecount; /* Samples collected this cycle */
|
||||||
/* Accumulated statistics across all completed cycles: */
|
/* Accumulated statistics across all completed cycles: */
|
||||||
double startup; /* Total startup time (in seconds) */
|
double startup; /* Total startup time (in seconds) */
|
||||||
double total; /* Total total time (in seconds) */
|
double total; /* Total total time (in seconds) */
|
||||||
double ntuples; /* Total tuples produced */
|
double ntuples; /* Total tuples produced */
|
||||||
double nloops; /* # of run cycles for this node */
|
double nloops; /* # of run cycles for this node */
|
||||||
|
double nsamples; /* # of samples taken */
|
||||||
|
/* Tracking for sampling */
|
||||||
|
bool sampling; /* Are we sampling this iteration */
|
||||||
|
double nextsample; /* The next tuplecount we're going to sample */
|
||||||
} Instrumentation;
|
} Instrumentation;
|
||||||
|
|
||||||
extern Instrumentation *InstrAlloc(int n);
|
extern Instrumentation *InstrAlloc(int n);
|
||||||
extern void InstrStartNode(Instrumentation *instr);
|
extern void InstrStartNode(Instrumentation *instr);
|
||||||
extern void InstrStopNode(Instrumentation *instr, bool returnedTuple);
|
extern void InstrStopNode(Instrumentation *instr, double nTuples);
|
||||||
extern void InstrStopNodeMulti(Instrumentation *instr, double nTuples);
|
|
||||||
extern void InstrEndLoop(Instrumentation *instr);
|
extern void InstrEndLoop(Instrumentation *instr);
|
||||||
|
|
||||||
#endif /* INSTRUMENT_H */
|
#endif /* INSTRUMENT_H */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user