diff options
Diffstat (limited to 'src/backend/commands/vacuum.c')
| -rw-r--r-- | src/backend/commands/vacuum.c | 98 | 
1 files changed, 82 insertions, 16 deletions
| diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index e77430e6e4e..5decc5cac67 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -20,6 +20,8 @@   */  #include "postgres.h" +#include <math.h> +  #include "access/clog.h"  #include "access/genam.h"  #include "access/heapam.h" @@ -63,7 +65,7 @@ static List *get_rel_oids(Oid relid, const RangeVar *vacrel,  			 const char *stmttype);  static void vac_truncate_clog(TransactionId frozenXID);  static void vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, -		   bool for_wraparound, bool *scanned_all); +		   bool for_wraparound);  /* @@ -224,11 +226,9 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,  		foreach(cur, relations)  		{  			Oid			relid = lfirst_oid(cur); -			bool		scanned_all = false;  			if (vacstmt->options & VACOPT_VACUUM) -				vacuum_rel(relid, vacstmt, do_toast, for_wraparound, -						   &scanned_all); +				vacuum_rel(relid, vacstmt, do_toast, for_wraparound);  			if (vacstmt->options & VACOPT_ANALYZE)  			{ @@ -243,7 +243,7 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,  					PushActiveSnapshot(GetTransactionSnapshot());  				} -				analyze_rel(relid, vacstmt, vac_strategy, !scanned_all); +				analyze_rel(relid, vacstmt, vac_strategy);  				if (use_own_xacts)  				{ @@ -456,6 +456,79 @@ vacuum_set_xid_limits(int freeze_min_age,  /* + * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples + * + *		If we scanned the whole relation then we should just use the count of + *		live tuples seen; but if we did not, we should not trust the count + *		unreservedly, especially not in VACUUM, which may have scanned a quite + *		nonrandom subset of the table.  When we have only partial information, + *		we take the old value of pg_class.reltuples as a measurement of the + *		tuple density in the unscanned pages. + * + *		This routine is shared by VACUUM and ANALYZE. + */ +double +vac_estimate_reltuples(Relation relation, bool is_analyze, +					   BlockNumber total_pages, +					   BlockNumber scanned_pages, +					   double scanned_tuples) +{ +	BlockNumber	old_rel_pages = relation->rd_rel->relpages; +	double		old_rel_tuples = relation->rd_rel->reltuples; +	double		old_density; +	double		new_density; +	double		multiplier; +	double		updated_density; + +	/* If we did scan the whole table, just use the count as-is */ +	if (scanned_pages >= total_pages) +		return scanned_tuples; + +	/* +	 * If scanned_pages is zero but total_pages isn't, keep the existing +	 * value of reltuples. +	 */ +	if (scanned_pages == 0) +		return old_rel_tuples; + +	/* +	 * If old value of relpages is zero, old density is indeterminate; we +	 * can't do much except scale up scanned_tuples to match total_pages. +	 */ +	if (old_rel_pages == 0) +		return floor((scanned_tuples / scanned_pages) * total_pages + 0.5); + +	/* +	 * Okay, we've covered the corner cases.  The normal calculation is to +	 * convert the old measurement to a density (tuples per page), then +	 * update the density using an exponential-moving-average approach, +	 * and finally compute reltuples as updated_density * total_pages. +	 * +	 * For ANALYZE, the moving average multiplier is just the fraction of +	 * the table's pages we scanned.  This is equivalent to assuming +	 * that the tuple density in the unscanned pages didn't change.  Of +	 * course, it probably did, if the new density measurement is different. +	 * But over repeated cycles, the value of reltuples will converge towards +	 * the correct value, if repeated measurements show the same new density. +	 * +	 * For VACUUM, the situation is a bit different: we have looked at a +	 * nonrandom sample of pages, but we know for certain that the pages we +	 * didn't look at are precisely the ones that haven't changed lately. +	 * Thus, there is a reasonable argument for doing exactly the same thing +	 * as for the ANALYZE case, that is use the old density measurement as +	 * the value for the unscanned pages. +	 * +	 * This logic could probably use further refinement. +	 */ +	old_density = old_rel_tuples / old_rel_pages; +	new_density = scanned_tuples / scanned_pages; +	multiplier = (double) scanned_pages / (double) total_pages; +	updated_density = old_density + (new_density - old_density) * multiplier; +	return floor(updated_density * total_pages + 0.5); +} + + +/*   *	vac_update_relstats() -- update statistics for one relation   *   *		Update the whole-relation statistics that are kept in its pg_class @@ -482,7 +555,7 @@ vacuum_set_xid_limits(int freeze_min_age,   *		somebody vacuuming pg_class might think they could delete a tuple   *		marked with xmin = our xid.   * - *		This routine is shared by VACUUM and stand-alone ANALYZE. + *		This routine is shared by VACUUM and ANALYZE.   */  void  vac_update_relstats(Relation relation, @@ -769,14 +842,10 @@ vac_truncate_clog(TransactionId frozenXID)   *		many small transactions.  Otherwise, two-phase locking would require   *		us to lock the entire database during one pass of the vacuum cleaner.   * - *		We'll return true in *scanned_all if the vacuum scanned all heap - *		pages, and updated pg_class. - *   *		At entry and exit, we are not inside a transaction.   */  static void -vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound, -		   bool *scanned_all) +vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound)  {  	LOCKMODE	lmode;  	Relation	onerel; @@ -786,9 +855,6 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,  	int			save_sec_context;  	int			save_nestlevel; -	if (scanned_all) -		*scanned_all = false; -  	/* Begin a transaction for vacuuming this relation */  	StartTransactionCommand(); @@ -967,7 +1033,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,  					vacstmt->freeze_min_age, vacstmt->freeze_table_age);  	}  	else -		lazy_vacuum_rel(onerel, vacstmt, vac_strategy, scanned_all); +		lazy_vacuum_rel(onerel, vacstmt, vac_strategy);  	/* Roll back any GUC changes executed by index functions */  	AtEOXact_GUC(false, save_nestlevel); @@ -993,7 +1059,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,  	 * totally unimportant for toast relations.  	 */  	if (toast_relid != InvalidOid) -		vacuum_rel(toast_relid, vacstmt, false, for_wraparound, NULL); +		vacuum_rel(toast_relid, vacstmt, false, for_wraparound);  	/*  	 * Now release the session-level lock on the master table. | 
