From 5de0cbdf0cfbd78b4b4d91f9e8c68e3e75f59f99 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 9 Jun 2006 19:30:56 +0000 Subject: [PATCH] Revert sampling patch for EXPLAIN ANALYZE; it turns out to be too unreliable because node timing is much less predictable than the patch expects. I kept the API change for InstrStopNode, however. --- src/backend/executor/execProcnode.c | 4 +- src/backend/executor/instrument.c | 199 ++++------------------------ src/include/executor/instrument.h | 6 +- 3 files changed, 30 insertions(+), 179 deletions(-) diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index 2b8bee97dd..879e94c0b3 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -12,7 +12,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/execProcnode.c,v 1.55 2006/05/30 14:01:57 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/executor/execProcnode.c,v 1.56 2006/06/09 19:30:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -423,7 +423,7 @@ ExecProcNode(PlanState *node) } if (node->instrument) - InstrStopNode(node->instrument, TupIsNull(result) ? 0 : 1 ); + InstrStopNode(node->instrument, TupIsNull(result) ? 0.0 : 1.0); return result; } diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c index ff3c4eddaa..b4c613891c 100644 --- a/src/backend/executor/instrument.c +++ b/src/backend/executor/instrument.c @@ -7,112 +7,22 @@ * Copyright (c) 2001-2006, PostgreSQL Global Development Group * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/instrument.c,v 1.17 2006/06/07 18:49:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/executor/instrument.c,v 1.18 2006/06/09 19:30:56 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" -#include #include #include "executor/instrument.h" -/* - * As of PostgreSQL 8.2, we try to reduce the overhead of EXPLAIN ANALYZE - * by not calling INSTR_TIME_SET_CURRENT() for every single node execution. - * (Because that requires a kernel call on most systems, it's expensive.) - * - * This macro determines the sampling interval: that is, after how many more - * iterations will we take the next time sample, given that niters iterations - * have occurred already. In general, if the function is f(x), then for N - * iterations we will take on the order of integral(1/f(x), x=0..N) - * samples. Some examples follow, with the number of samples that would be - * collected over 1,000,000 iterations: - * - * f(x) = x => log2(N) 20 - * f(x) = x^(1/2) => 2 * N^(1/2) 2000 - * f(x) = x^(1/3) => 1.5 * N^(2/3) 15000 - * - * I've chosen the last one as it seems to provide a good compromise between - * low overhead but still getting a meaningful number of samples. However, - * not all machines have the cbrt() function so on those we substitute - * sqrt(). The difference is not very significant in the tests I made. - * - * The actual sampling interval is randomized with the SampleFunc() value - * as the mean; this hopefully will reduce any measurement bias due to - * variation in the node execution time. - */ -#ifdef HAVE_CBRT -#define SampleFunc(niters) cbrt(niters) -#else -#define SampleFunc(niters) sqrt(niters) -#endif - -#define SampleInterval(niters) \ - (SampleFunc(niters) * (double) random() / (double) (MAX_RANDOM_VALUE/2)) - -/* - * We sample at every node iteration until we've reached this threshold, - * so that nodes not called a large number of times are completely accurate. - * (Perhaps this should be a GUC variable?) - */ -#define SAMPLE_THRESHOLD 50 - -/* - * Determine the sampling overhead. This only needs to be done once per - * backend (actually, probably once per postmaster would do ...) - */ -static double SampleOverhead; -static bool SampleOverheadCalculated = false; - -static void -CalculateSampleOverhead(void) -{ - int i; - - /* - * We could get a wrong result due to being interrupted by task switch. - * To minimize the risk we do it a few times and take the lowest. - */ - SampleOverhead = 1.0e6; - - for (i = 0; i < 5; i++) - { - Instrumentation timer; - instr_time tmptime; - int j; - double overhead; - - memset(&timer, 0, sizeof(timer)); - InstrStartNode(&timer); -#define TEST_COUNT 100 - for (j = 0; j < TEST_COUNT; j++) - { - INSTR_TIME_SET_CURRENT(tmptime); - } - InstrStopNode(&timer, 1); - overhead = INSTR_TIME_GET_DOUBLE(timer.counter) / TEST_COUNT; - if (overhead < SampleOverhead) - SampleOverhead = overhead; - } - - SampleOverheadCalculated = true; -} - - /* Allocate new instrumentation structure(s) */ Instrumentation * InstrAlloc(int n) { - Instrumentation *instr; - - /* Calculate sampling overhead, if not done yet in this backend */ - if (!SampleOverheadCalculated) - CalculateSampleOverhead(); - - instr = palloc0(n * sizeof(Instrumentation)); + Instrumentation *instr = palloc0(n * sizeof(Instrumentation)); /* we don't need to do any initialization except zero 'em */ @@ -124,22 +34,7 @@ void InstrStartNode(Instrumentation *instr) { if (INSTR_TIME_IS_ZERO(instr->starttime)) - { - /* - * Always sample if not yet up to threshold, else check whether - * next threshold has been reached - */ - if (instr->itercount < SAMPLE_THRESHOLD) - instr->sampling = true; - else if (instr->itercount >= instr->nextsample) - { - instr->sampling = true; - instr->nextsample = - instr->itercount + SampleInterval(instr->itercount); - } - if (instr->sampling) - INSTR_TIME_SET_CURRENT(instr->starttime); - } + INSTR_TIME_SET_CURRENT(instr->starttime); else elog(DEBUG2, "InstrStartNode called twice in a row"); } @@ -148,53 +43,39 @@ InstrStartNode(Instrumentation *instr) void InstrStopNode(Instrumentation *instr, double nTuples) { - /* count the returned tuples and iterations */ + instr_time endtime; + + /* count the returned tuples */ instr->tuplecount += nTuples; - instr->itercount += 1; - /* measure runtime if appropriate */ - if (instr->sampling) + if (INSTR_TIME_IS_ZERO(instr->starttime)) { - instr_time endtime; + elog(DEBUG2, "InstrStopNode called without start"); + return; + } - /* - * To be sure that SampleOverhead accurately reflects the extra - * overhead, we must do INSTR_TIME_SET_CURRENT() as the *first* - * action that is different between the sampling and non-sampling - * code paths. - */ - INSTR_TIME_SET_CURRENT(endtime); - - if (INSTR_TIME_IS_ZERO(instr->starttime)) - { - elog(DEBUG2, "InstrStopNode called without start"); - return; - } + INSTR_TIME_SET_CURRENT(endtime); #ifndef WIN32 - instr->counter.tv_sec += endtime.tv_sec - instr->starttime.tv_sec; - instr->counter.tv_usec += endtime.tv_usec - instr->starttime.tv_usec; + instr->counter.tv_sec += endtime.tv_sec - instr->starttime.tv_sec; + instr->counter.tv_usec += endtime.tv_usec - instr->starttime.tv_usec; - /* Normalize after each add to avoid overflow/underflow of tv_usec */ - while (instr->counter.tv_usec < 0) - { - instr->counter.tv_usec += 1000000; - instr->counter.tv_sec--; - } - while (instr->counter.tv_usec >= 1000000) - { - instr->counter.tv_usec -= 1000000; - instr->counter.tv_sec++; - } + /* Normalize after each add to avoid overflow/underflow of tv_usec */ + while (instr->counter.tv_usec < 0) + { + instr->counter.tv_usec += 1000000; + instr->counter.tv_sec--; + } + while (instr->counter.tv_usec >= 1000000) + { + instr->counter.tv_usec -= 1000000; + instr->counter.tv_sec++; + } #else /* WIN32 */ - instr->counter.QuadPart += (endtime.QuadPart - instr->starttime.QuadPart); + instr->counter.QuadPart += (endtime.QuadPart - instr->starttime.QuadPart); #endif - INSTR_TIME_SET_ZERO(instr->starttime); - - instr->samplecount += 1; - instr->sampling = false; - } + INSTR_TIME_SET_ZERO(instr->starttime); /* Is this the first tuple of this cycle? */ if (!instr->running) @@ -217,31 +98,9 @@ InstrEndLoop(Instrumentation *instr) if (!INSTR_TIME_IS_ZERO(instr->starttime)) elog(DEBUG2, "InstrEndLoop called on running node"); - /* Compute time spent in node */ + /* Accumulate per-cycle statistics into totals */ totaltime = INSTR_TIME_GET_DOUBLE(instr->counter); - /* - * If we didn't measure runtime on every iteration, then we must increase - * the measured total to account for the other iterations. Naively - * multiplying totaltime by itercount/samplecount would be wrong because - * it effectively assumes the sampling overhead applies to all iterations, - * even the ones we didn't measure. (Note that what we are trying to - * estimate here is the actual time spent in the node, including the - * actual measurement overhead; not the time exclusive of measurement - * overhead.) We exclude the first iteration from the correction basis, - * because it often takes longer than others. - */ - if (instr->itercount > instr->samplecount) - { - double per_iter; - - per_iter = (totaltime - instr->firsttuple) / (instr->samplecount - 1) - - SampleOverhead; - if (per_iter > 0) /* sanity check */ - totaltime += per_iter * (instr->itercount - instr->samplecount); - } - - /* Accumulate per-cycle statistics into totals */ instr->startup += instr->firsttuple; instr->total += totaltime; instr->ntuples += instr->tuplecount; @@ -249,12 +108,8 @@ InstrEndLoop(Instrumentation *instr) /* Reset for next cycle (if any) */ instr->running = false; - instr->sampling = false; INSTR_TIME_SET_ZERO(instr->starttime); INSTR_TIME_SET_ZERO(instr->counter); instr->firsttuple = 0; instr->tuplecount = 0; - instr->itercount = 0; - instr->samplecount = 0; - instr->nextsample = 0; } diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h index fd57f86578..8afe2a5f73 100644 --- a/src/include/executor/instrument.h +++ b/src/include/executor/instrument.h @@ -6,7 +6,7 @@ * * Copyright (c) 2001-2006, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/executor/instrument.h,v 1.15 2006/05/30 19:24:25 tgl Exp $ + * $PostgreSQL: pgsql/src/include/executor/instrument.h,v 1.16 2006/06/09 19:30:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -57,14 +57,10 @@ typedef struct Instrumentation { /* Info about current plan cycle: */ bool running; /* TRUE if we've completed first tuple */ - bool sampling; /* Are we sampling in current iteration? */ instr_time starttime; /* Start time of current iteration of node */ instr_time counter; /* Accumulated runtime for this node */ double firsttuple; /* Time for first tuple of this cycle */ double tuplecount; /* Tuples emitted so far this cycle */ - double itercount; /* Plan node iterations this cycle */ - double samplecount; /* Iterations in which we sampled runtime */ - double nextsample; /* Next itercount to sample at */ /* Accumulated statistics across all completed cycles: */ double startup; /* Total startup time (in seconds) */ double total; /* Total total time (in seconds) */