diff options
| author | Tom Lane <tgl@sss.pgh.pa.us> | 2010-11-19 22:28:25 -0500 | 
|---|---|---|
| committer | Tom Lane <tgl@sss.pgh.pa.us> | 2010-11-19 22:28:25 -0500 | 
| commit | b5efc094042638154d74fdb1e8344ae0ba977617 (patch) | |
| tree | f35e1fb179c14e625cc8cd18a1587b57a139fa1c /src | |
| parent | b11accc9a9ade1b5da3ba0791b725dd29f9abeb7 (diff) | |
Fix leakage of cost_limit when multiple autovacuum workers are active.
When using default autovacuum_vac_cost_limit, autovac_balance_cost relied
on VacuumCostLimit to contain the correct global value ... but after the
first time through in a particular worker process, it didn't, because we'd
trashed it in previous iterations.  Depending on the state of other autovac
workers, this could result in a steady reduction of the effective
cost_limit setting as a particular worker processed more and more tables,
causing it to go slower and slower.  Spotted by Simon Poole (bug #5759).
Fix by saving and restoring the GUC variables in the loop in do_autovacuum.
In passing, improve a few comments.
Back-patch to 8.3 ... the cost rebalancing code has been buggy since it was
put in.
Diffstat (limited to 'src')
| -rw-r--r-- | src/backend/postmaster/autovacuum.c | 63 | 
1 files changed, 45 insertions, 18 deletions
| diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 6e2741531b5..55e647022de 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -190,7 +190,7 @@ typedef struct autovac_table   *   * wi_links		entry into free list or running list   * wi_dboid		OID of the database this worker is supposed to work on - * wi_tableoid	OID of the table currently being vacuumed + * wi_tableoid	OID of the table currently being vacuumed, if any   * wi_proc		pointer to PGPROC of the running worker, NULL if not started   * wi_launchtime Time at which this worker was launched   * wi_cost_*	Vacuum cost-based delay parameters current in this worker @@ -1629,7 +1629,7 @@ FreeWorkerInfo(int code, Datum arg)  		 * limit setting of the remaining workers.  		 *  		 * We somewhat ignore the risk that the launcher changes its PID -		 * between we reading it and the actual kill; we expect ProcKill to be +		 * between us reading it and the actual kill; we expect ProcKill to be  		 * called shortly after us, and we assume that PIDs are not reused too  		 * quickly after a process exits.  		 */ @@ -1673,16 +1673,18 @@ AutoVacuumUpdateDelay(void)  /*   * autovac_balance_cost - *		Recalculate the cost limit setting for each active workers. + *		Recalculate the cost limit setting for each active worker.   *   * Caller must hold the AutovacuumLock in exclusive mode.   */  static void  autovac_balance_cost(void)  { -	WorkerInfo	worker; -  	/* +	 * The idea here is that we ration out I/O equally.  The amount of I/O +	 * that a worker can consume is determined by cost_limit/cost_delay, so +	 * we try to equalize those ratios rather than the raw limit settings. +	 *  	 * note: in cost_limit, zero also means use value from elsewhere, because  	 * zero is not a valid value.  	 */ @@ -1692,6 +1694,7 @@ autovac_balance_cost(void)  								autovacuum_vac_cost_delay : VacuumCostDelay);  	double		cost_total;  	double		cost_avail; +	WorkerInfo	worker;  	/* not set? nothing to do */  	if (vac_cost_limit <= 0 || vac_cost_delay <= 0) @@ -1718,7 +1721,7 @@ autovac_balance_cost(void)  		return;  	/* -	 * Adjust each cost limit of active workers to balance the total of cost +	 * Adjust cost limit of each active worker to balance the total of cost  	 * limit to autovacuum_vacuum_cost_limit.  	 */  	cost_avail = (double) vac_cost_limit / vac_cost_delay; @@ -1734,14 +1737,19 @@ autovac_balance_cost(void)  			(cost_avail * worker->wi_cost_limit_base / cost_total);  			/* -			 * We put a lower bound of 1 to the cost_limit, to avoid division- -			 * by-zero in the vacuum code. +			 * We put a lower bound of 1 on the cost_limit, to avoid division- +			 * by-zero in the vacuum code.  Also, in case of roundoff trouble +			 * in these calculations, let's be sure we don't ever set +			 * cost_limit to more than the base value.  			 */ -			worker->wi_cost_limit = Max(Min(limit, worker->wi_cost_limit_base), 1); - -			elog(DEBUG2, "autovac_balance_cost(pid=%u db=%u, rel=%u, cost_limit=%d, cost_delay=%d)", -				 worker->wi_proc->pid, worker->wi_dboid, -				 worker->wi_tableoid, worker->wi_cost_limit, worker->wi_cost_delay); +			worker->wi_cost_limit = Max(Min(limit, +											worker->wi_cost_limit_base), +										1); + +			elog(DEBUG2, "autovac_balance_cost(pid=%u db=%u, rel=%u, cost_limit=%d, cost_limit_base=%d, cost_delay=%d)", +				 worker->wi_proc->pid, worker->wi_dboid, worker->wi_tableoid, +				 worker->wi_cost_limit, worker->wi_cost_limit_base, +				 worker->wi_cost_delay);  		}  		worker = (WorkerInfo) SHMQueueNext(&AutoVacuumShmem->av_runningWorkers, @@ -2125,6 +2133,8 @@ do_autovacuum(void)  		autovac_table *tab;  		WorkerInfo	worker;  		bool		skipit; +		int			stdVacuumCostDelay; +		int			stdVacuumCostLimit;  		CHECK_FOR_INTERRUPTS(); @@ -2198,11 +2208,15 @@ do_autovacuum(void)  		MyWorkerInfo->wi_tableoid = relid;  		LWLockRelease(AutovacuumScheduleLock); -		/* Set the initial vacuum cost parameters for this table */ -		VacuumCostDelay = tab->at_vacuum_cost_delay; -		VacuumCostLimit = tab->at_vacuum_cost_limit; +		/* +		 * Remember the prevailing values of the vacuum cost GUCs.  We have +		 * to restore these at the bottom of the loop, else we'll compute +		 * wrong values in the next iteration of autovac_balance_cost(). +		 */ +		stdVacuumCostDelay = VacuumCostDelay; +		stdVacuumCostLimit = VacuumCostLimit; -		/* Last fixups before actually starting to work */ +		/* Must hold AutovacuumLock while mucking with cost balance info */  		LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);  		/* advertise my cost delay parameters for the balancing algorithm */ @@ -2213,6 +2227,9 @@ do_autovacuum(void)  		/* do a balance */  		autovac_balance_cost(); +		/* set the active cost parameters from the result of that */ +		AutoVacuumUpdateDelay(); +  		/* done */  		LWLockRelease(AutovacuumLock); @@ -2290,10 +2307,20 @@ deleted:  			pfree(tab->at_relname);  		pfree(tab); -		/* remove my info from shared memory */ +		/* +		 * Remove my info from shared memory.  We could, but intentionally +		 * don't, clear wi_cost_limit and friends --- this is on the +		 * assumption that we probably have more to do with similar cost +		 * settings, so we don't want to give up our share of I/O for a very +		 * short interval and thereby thrash the global balance. +		 */  		LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);  		MyWorkerInfo->wi_tableoid = InvalidOid;  		LWLockRelease(AutovacuumLock); + +		/* restore vacuum cost GUCs for the next iteration */ +		VacuumCostDelay = stdVacuumCostDelay; +		VacuumCostLimit = stdVacuumCostLimit;  	}  	/* | 
