diff options
Diffstat (limited to 'src/backend/optimizer/path')
| -rw-r--r-- | src/backend/optimizer/path/clausesel.c | 245 | 
1 files changed, 235 insertions, 10 deletions
| diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index d3a494f9bc9..edce3d21291 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -7,7 +7,7 @@   *   *   * IDENTIFICATION - *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.28 2000/01/23 02:06:58 tgl Exp $ + *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.29 2000/01/24 07:16:46 tgl Exp $   *   *-------------------------------------------------------------------------   */ @@ -23,6 +23,23 @@  #include "utils/lsyscache.h" +/* + * Data structure for accumulating info about possible range-query + * clause pairs in clauselist_selectivity. + */ +typedef struct RangeQueryClause { +	struct RangeQueryClause *next; /* next in linked list */ +	Node	   *var;			/* The common variable of the clauses */ +	bool		have_lobound;	/* found a low-bound clause yet? */ +	bool		have_hibound;	/* found a high-bound clause yet? */ +	Selectivity	lobound;		/* Selectivity of a var > something clause */ +	Selectivity	hibound;		/* Selectivity of a var < something clause */ +} RangeQueryClause; + +static void addRangeClause(RangeQueryClause **rqlist, Node *clause, +						   int flag, bool isLTsel, Selectivity s2); + +  /****************************************************************************   *		ROUTINES TO COMPUTE SELECTIVITIES   ****************************************************************************/ @@ -55,30 +72,238 @@ restrictlist_selectivity(Query *root,   *	  must be returned.   *   * See clause_selectivity() for the meaning of the varRelid parameter. + * + * Our basic approach is to take the product of the selectivities of the + * subclauses.  However, that's only right if the subclauses have independent + * probabilities, and in reality they are often NOT independent.  So, + * we want to be smarter where we can. + + * Currently, the only extra smarts we have is to recognize "range queries", + * such as "x > 34 AND x < 42".  Clauses are recognized as possible range + * query components if they are restriction opclauses whose operators have + * scalarltsel() or scalargtsel() as their restriction selectivity estimator. + * We pair up clauses of this form that refer to the same variable.  An + * unpairable clause of this kind is simply multiplied into the selectivity + * product in the normal way.  But when we find a pair, we know that the + * selectivities represent the relative positions of the low and high bounds + * within the column's range, so instead of figuring the selectivity as + * hisel * losel, we can figure it as hisel + losel - 1.  (To visualize this, + * see that hisel is the fraction of the range below the high bound, while + * losel is the fraction above the low bound; so hisel can be interpreted + * directly as a 0..1 value but we need to convert losel to 1-losel before + * interpreting it as a value.  Then the available range is 1-losel to hisel.) + * If the calculation yields zero or negative, however, we chicken out and + * use the default interpretation; that probably means that one or both + * selectivities is a default estimate rather than an actual range value. + * Of course this is all very dependent on the behavior of + * scalarltsel/scalargtsel; perhaps some day we can generalize the approach.   */  Selectivity  clauselist_selectivity(Query *root,  					   List *clauses,  					   int varRelid)  { -	Selectivity		s1 = 1.0; -	List		   *clause; +	Selectivity			s1 = 1.0; +	RangeQueryClause   *rqlist = NULL; +	List			   *clist; -	/* Use the product of the selectivities of the subclauses. -	 * XXX this is too optimistic, since the subclauses -	 * are very likely not independent... +	/* +	 * Initial scan over clauses.  Anything that doesn't look like a +	 * potential rangequery clause gets multiplied into s1 and forgotten. +	 * Anything that does gets inserted into an rqlist entry.  	 */ -	foreach(clause, clauses) +	foreach(clist, clauses)  	{ -		Selectivity	s2 = clause_selectivity(root, -											(Node *) lfirst(clause), -											varRelid); +		Node	   *clause = (Node *) lfirst(clist); +		Selectivity	s2; + +		/* +		 * See if it looks like a restriction clause with a constant. +		 * (If it's not a constant we can't really trust the selectivity!) +		 * NB: for consistency of results, this fragment of code had +		 * better match what clause_selectivity() would do. +		 */ +		if (varRelid != 0 || NumRelids(clause) == 1) +		{ +			int			relidx; +			AttrNumber	attno; +			Datum		constval; +			int			flag; + +			get_relattval(clause, varRelid, +						  &relidx, &attno, &constval, &flag); +			if (relidx != 0 && (flag & SEL_CONSTANT)) +			{ +				/* if get_relattval succeeded, it must be an opclause */ +				Oid			opno = ((Oper *) ((Expr *) clause)->oper)->opno; +				RegProcedure oprrest = get_oprrest(opno); + +				if (!oprrest) +					s2 = (Selectivity) 0.5; +				else +					s2 = restriction_selectivity(oprrest, opno, +												 getrelid(relidx, +														  root->rtable), +												 attno, +												 constval, flag); +				/* +				 * If we reach here, we have computed the same result +				 * that clause_selectivity would, so we can just use s2 +				 * if it's the wrong oprrest.  But if it's the right +				 * oprrest, add the clause to rqlist for later processing. +				 */ +				switch (oprrest) +				{ +					case F_SCALARLTSEL: +						addRangeClause(&rqlist, clause, flag, true, s2); +						break; +					case F_SCALARGTSEL: +						addRangeClause(&rqlist, clause, flag, false, s2); +						break; +					default: +						/* Just merge the selectivity in generically */ +						s1 = s1 * s2; +						break; +				} +				continue; /* drop to loop bottom */ +			} +		} +		/* Not the right form, so treat it generically. */ +		s2 = clause_selectivity(root, clause, varRelid);  		s1 = s1 * s2;  	} + +	/* +	 * Now scan the rangequery pair list. +	 */ +	while (rqlist != NULL) +	{ +		RangeQueryClause   *rqnext; + +		if (rqlist->have_lobound && rqlist->have_hibound) +		{ +			/* Successfully matched a pair of range clauses */ +			Selectivity	s2 = rqlist->hibound + rqlist->lobound - 1.0; + +			if (s2 > 0.0) +			{ +				/* All our hard work has paid off! */ +				s1 *= s2; +			} +			else +			{ +				/* One or both is probably a default estimate, +				 * so punt and just merge them in generically. +				 */ +				s1 *= rqlist->hibound * rqlist->lobound; +			} +		} +		else +		{ +			/* Only found one of a pair, merge it in generically */ +			if (rqlist->have_lobound) +				s1 *= rqlist->lobound; +			else +				s1 *= rqlist->hibound; +		} +		/* release storage and advance */ +		rqnext = rqlist->next; +		pfree(rqlist); +		rqlist = rqnext; +	} +  	return s1;  }  /* + * addRangeClause --- add a new range clause for clauselist_selectivity + * + * Here is where we try to match up pairs of range-query clauses + */ +static void +addRangeClause(RangeQueryClause **rqlist, Node *clause, +			   int flag, bool isLTsel, Selectivity s2) +{ +	RangeQueryClause   *rqelem; +	Node			   *var; +	bool				is_lobound; + +	/* get_relattval sets flag&SEL_RIGHT if the var is on the LEFT. */ +	if (flag & SEL_RIGHT) +	{ +		var = (Node *) get_leftop((Expr *) clause); +		is_lobound = ! isLTsel;	/* x < something is high bound */ +	} +	else +	{ +		var = (Node *) get_rightop((Expr *) clause); +		is_lobound = isLTsel;	/* something < x is low bound */ +	} + +	for (rqelem = *rqlist; rqelem; rqelem = rqelem->next) +	{ +		/* We use full equal() here because the "var" might be a function +		 * of one or more attributes of the same relation... +		 */ +		if (! equal(var, rqelem->var)) +			continue; +		/* Found the right group to put this clause in */ +		if (is_lobound) +		{ +			if (! rqelem->have_lobound) +			{ +				rqelem->have_lobound = true; +				rqelem->lobound = s2; +			} +			else +			{ +				/* We have found two similar clauses, such as +				 * x < y AND x < z.  Keep only the more restrictive one. +				 */ +				if (rqelem->lobound > s2) +					rqelem->lobound = s2; +			} +		} +		else +		{ +			if (! rqelem->have_hibound) +			{ +				rqelem->have_hibound = true; +				rqelem->hibound = s2; +			} +			else +			{ +				/* We have found two similar clauses, such as +				 * x > y AND x > z.  Keep only the more restrictive one. +				 */ +				if (rqelem->hibound > s2) +					rqelem->hibound = s2; +			} +		} +		return; +	} + +	/* No matching var found, so make a new clause-pair data structure */ +	rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause)); +	rqelem->var = var; +	if (is_lobound) +	{ +		rqelem->have_lobound = true; +		rqelem->have_hibound = false; +		rqelem->lobound = s2; +	} +	else +	{ +		rqelem->have_lobound = false; +		rqelem->have_hibound = true; +		rqelem->hibound = s2; +	} +	rqelem->next = *rqlist; +	*rqlist = rqelem; +} + + +/*   * clause_selectivity -   *	  Compute the selectivity of a general boolean expression clause.   * | 
