diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/backend/executor/instrument.c | 178 | ||||
| -rw-r--r-- | src/include/executor/instrument.h | 11 | 
2 files changed, 113 insertions, 76 deletions
| diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c index 41b061bd526..d094ffbbcf6 100644 --- a/src/backend/executor/instrument.c +++ b/src/backend/executor/instrument.c @@ -7,90 +7,117 @@   * Copyright (c) 2001-2006, PostgreSQL Global Development Group   *   * IDENTIFICATION - *	  $PostgreSQL: pgsql/src/backend/executor/instrument.c,v 1.15 2006/05/30 14:01:58 momjian Exp $ + *	  $PostgreSQL: pgsql/src/backend/executor/instrument.c,v 1.16 2006/05/30 19:24:25 tgl Exp $   *   *-------------------------------------------------------------------------   */  #include "postgres.h" -#include <unistd.h>  #include <math.h> +#include <unistd.h>  #include "executor/instrument.h" -/* This is the function that is used to determine the sampling intervals. In - * general, if the function is f(x), then for N tuples we will take on the - * order of integral(1/f(x), x=0..N) samples. Some examples follow, with the - * number of samples that would be collected over 1,000,000 tuples. -  f(x) = x         =>   log2(N)                        20 -  f(x) = x^(1/2)   =>   2 * N^(1/2)                  2000 -  f(x) = x^(1/3)   =>   1.5 * N^(2/3)               15000 -   +/* + * As of PostgreSQL 8.2, we try to reduce the overhead of EXPLAIN ANALYZE + * by not calling INSTR_TIME_SET_CURRENT() for every single node execution. + * (Because that requires a kernel call on most systems, it's expensive.) + * + * This macro determines the sampling interval: that is, after how many more + * iterations will we take the next time sample, given that niters iterations + * have occurred already.  In general, if the function is f(x), then for N + * iterations we will take on the order of integral(1/f(x), x=0..N) + * samples.  Some examples follow, with the number of samples that would be + * collected over 1,000,000 iterations: + * + *		f(x) = x         =>   log2(N)                        20 + *		f(x) = x^(1/2)   =>   2 * N^(1/2)                  2000 + *		f(x) = x^(1/3)   =>   1.5 * N^(2/3)               15000 + *   * I've chosen the last one as it seems to provide a good compromise between   * low overhead but still getting a meaningful number of samples. However,   * not all machines have the cbrt() function so on those we substitute   * sqrt(). The difference is not very significant in the tests I made. -*/  + * + * The actual sampling interval is randomized with the SampleFunc() value + * as the mean; this hopefully will reduce any measurement bias due to + * cyclic variation in the node execution time. + */  #ifdef HAVE_CBRT -#define SampleFunc cbrt +#define SampleFunc(niters) cbrt(niters)  #else -#define SampleFunc sqrt +#define SampleFunc(niters) sqrt(niters)  #endif +#define SampleInterval(niters) \ +	(SampleFunc(niters) * (double) random() / (double) (MAX_RANDOM_VALUE/2)) + +/* + * We sample at every node iteration until we've reached this threshold, + * so that nodes not called a large number of times are completely accurate. + * (Perhaps this should be a GUC variable?  But beware of messing up + * CalculateSampleOverhead if value is too small.) + */  #define SAMPLE_THRESHOLD 50 +/* + * Determine the sampling overhead.  This only needs to be done once per + * backend (actually, probably once per postmaster would do ...) + */  static double SampleOverhead; -static bool SampleOverheadCalculated; +static bool SampleOverheadCalculated = false;  static void -CalculateSampleOverhead() +CalculateSampleOverhead(void)  {  	Instrumentation instr;  	int i; -	 -	/* We want to determine the sampling overhead, to correct -	 * calculations later. This only needs to be done once per backend. -	 * Is this the place? A wrong value here (due to a mistimed -	 * task-switch) will cause bad calculations later. -	 * + +	/* +	 * We could get a wrong result due to being interrupted by task switch.  	 * To minimize the risk we do it a few times and take the lowest.  	 */ -	  	SampleOverhead = 1.0e6; -	 -	for( i = 0; i<5; i++ ) + +	for (i = 0; i < 5; i++)  	{  		int j;  		double overhead; -		 -		memset( &instr, 0, sizeof(instr) ); -	 -		/* Loop SAMPLE_THRESHOLD times or 100 microseconds, whichever is faster */ -		for( j=0; j<SAMPLE_THRESHOLD && INSTR_TIME_GET_DOUBLE(instr.counter) < 100e-6; i++ ) + +		memset(&instr, 0, sizeof(instr)); +		/* +		 * We know that samples will actually be taken up to SAMPLE_THRESHOLD, +		 * so that's as far as we can test. +		 */ +		for (j=0; j < SAMPLE_THRESHOLD; j++)  		{ -			InstrStartNode( &instr ); -			InstrStopNode( &instr, 1 ); +			InstrStartNode(&instr); +			InstrStopNode(&instr, 1);  		}  		overhead = INSTR_TIME_GET_DOUBLE(instr.counter) / instr.samplecount; -		if( overhead < SampleOverhead ) +		if (overhead < SampleOverhead)  			SampleOverhead = overhead;  	} -		 +  	SampleOverheadCalculated = true;  } +  /* Allocate new instrumentation structure(s) */  Instrumentation *  InstrAlloc(int n)  { -	Instrumentation *instr = palloc0(n * sizeof(Instrumentation)); +	Instrumentation *instr; -	/* we don't need to do any initialization except zero 'em */ -	 -	/* Calculate overhead, if not done yet */ -	if( !SampleOverheadCalculated ) +	/* Calculate sampling overhead, if not done yet in this backend */ +	if (!SampleOverheadCalculated)  		CalculateSampleOverhead(); + +	instr = palloc0(n * sizeof(Instrumentation)); + +	/* we don't need to do any initialization except zero 'em */ +  	return instr;  } @@ -100,18 +127,17 @@ InstrStartNode(Instrumentation *instr)  {  	if (INSTR_TIME_IS_ZERO(instr->starttime))  	{ -		/* We always sample the first SAMPLE_THRESHOLD tuples, so small nodes are always accurate */ -		if (instr->tuplecount < SAMPLE_THRESHOLD) +		/* +		 * Always sample if not yet up to threshold, else check whether +		 * next threshold has been reached +		 */ +		if (instr->itercount < SAMPLE_THRESHOLD)  			instr->sampling = true; -		else +		else if (instr->itercount >= instr->nextsample)  		{ -			/* Otherwise we go to sampling, see the comments on SampleFunc at the top of the file */ -			if( instr->tuplecount > instr->nextsample ) -			{ -				instr->sampling = true; -				/* The doubling is so the random will average 1 over time */ -				instr->nextsample += 2.0 * SampleFunc(instr->tuplecount) * (double)rand() / (double)RAND_MAX; -			} +			instr->sampling = true; +			instr->nextsample = +				instr->itercount + SampleInterval(instr->itercount);  		}  		if (instr->sampling)  			INSTR_TIME_SET_CURRENT(instr->starttime); @@ -124,13 +150,15 @@ InstrStartNode(Instrumentation *instr)  void  InstrStopNode(Instrumentation *instr, double nTuples)  { -	instr_time	endtime; - -	/* count the returned tuples */ +	/* count the returned tuples and iterations */  	instr->tuplecount += nTuples; +	instr->itercount += 1; +	/* measure runtime if appropriate */  	if (instr->sampling)  	{ +		instr_time	endtime; +  		if (INSTR_TIME_IS_ZERO(instr->starttime))  		{  			elog(DEBUG2, "InstrStopNode called without start"); @@ -159,7 +187,8 @@ InstrStopNode(Instrumentation *instr, double nTuples)  #endif  		INSTR_TIME_SET_ZERO(instr->starttime); -		instr->samplecount += nTuples; + +		instr->samplecount += 1;  		instr->sampling = false;  	} @@ -184,35 +213,44 @@ InstrEndLoop(Instrumentation *instr)  	if (!INSTR_TIME_IS_ZERO(instr->starttime))  		elog(DEBUG2, "InstrEndLoop called on running node"); -	/* Accumulate per-cycle statistics into totals */ +	/* Compute time spent in node */  	totaltime = INSTR_TIME_GET_DOUBLE(instr->counter); -	instr->startup += instr->firsttuple; - -	/* Here we take into account sampling effects. Doing it naively ends -	 * up assuming the sampling overhead applies to all tuples, even the -	 * ones we didn't measure. We've calculated an overhead, so we -	 * subtract that for all samples we didn't measure. The first tuple -	 * is also special cased, because it usually takes longer. */ - -	if( instr->samplecount < instr->tuplecount ) +	/* +	 * If we didn't measure runtime on every iteration, then we must increase +	 * the measured total to account for the other iterations.  Naively +	 * multiplying totaltime by itercount/samplecount would be wrong because +	 * it effectively assumes the sampling overhead applies to all iterations, +	 * even the ones we didn't measure.  (Note that what we are trying to +	 * estimate here is the actual time spent in the node, including the +	 * actual measurement overhead; not the time exclusive of measurement +	 * overhead.)  We exclude the first iteration from the correction basis, +	 * because it often takes longer than others. +	 */ +	if (instr->itercount > instr->samplecount)  	{ -		double pertuple = (totaltime - instr->firsttuple) / (instr->samplecount - 1); -		instr->total += instr->firsttuple + (pertuple * (instr->samplecount - 1)) -		                                  + ((pertuple - SampleOverhead) * (instr->tuplecount - instr->samplecount)); +		double per_iter; + +		per_iter = (totaltime - instr->firsttuple) / (instr->samplecount - 1) +			- SampleOverhead; +		if (per_iter > 0)				/* sanity check */ +			totaltime += per_iter * (instr->itercount - instr->samplecount);  	} -	else -		instr->total += totaltime; -		 + +	/* Accumulate per-cycle statistics into totals */ +	instr->startup += instr->firsttuple; +	instr->total += totaltime;  	instr->ntuples += instr->tuplecount; -	instr->nsamples += instr->samplecount;  	instr->nloops += 1;  	/* Reset for next cycle (if any) */  	instr->running = false; +	instr->sampling = false;  	INSTR_TIME_SET_ZERO(instr->starttime);  	INSTR_TIME_SET_ZERO(instr->counter);  	instr->firsttuple = 0; -	instr->samplecount = 0;  	instr->tuplecount = 0; +	instr->itercount = 0; +	instr->samplecount = 0; +	instr->nextsample = 0;  } diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h index fbf578d473f..fd57f865788 100644 --- a/src/include/executor/instrument.h +++ b/src/include/executor/instrument.h @@ -6,7 +6,7 @@   *   * Copyright (c) 2001-2006, PostgreSQL Global Development Group   * - * $PostgreSQL: pgsql/src/include/executor/instrument.h,v 1.14 2006/05/30 14:01:58 momjian Exp $ + * $PostgreSQL: pgsql/src/include/executor/instrument.h,v 1.15 2006/05/30 19:24:25 tgl Exp $   *   *-------------------------------------------------------------------------   */ @@ -57,20 +57,19 @@ typedef struct Instrumentation  {  	/* Info about current plan cycle: */  	bool		running;		/* TRUE if we've completed first tuple */ +	bool		sampling;		/* Are we sampling in current iteration? */  	instr_time	starttime;		/* Start time of current iteration of node */  	instr_time	counter;		/* Accumulated runtime for this node */  	double		firsttuple;		/* Time for first tuple of this cycle */  	double		tuplecount;		/* Tuples emitted so far this cycle */ -	double		samplecount;		/* Samples collected this cycle */ +	double		itercount;		/* Plan node iterations this cycle */ +	double		samplecount;	/* Iterations in which we sampled runtime */ +	double		nextsample;		/* Next itercount to sample at */  	/* Accumulated statistics across all completed cycles: */  	double		startup;		/* Total startup time (in seconds) */  	double		total;			/* Total total time (in seconds) */  	double		ntuples;		/* Total tuples produced */  	double		nloops;			/* # of run cycles for this node */ -	double		nsamples;		/* # of samples taken */ -	/* Tracking for sampling */ -	bool		sampling;		/* Are we sampling this iteration */ -	double		nextsample;		/* The next tuplecount we're going to sample */  } Instrumentation;  extern Instrumentation *InstrAlloc(int n); | 
