| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
 | /*-------------------------------------------------------------------------
 *
 * orindxpath.c
 *	  Routines to find index paths that match a set of OR clauses
 *
 * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *	  $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.57 2004/01/05 23:39:54 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/restrictinfo.h"
static IndexPath *best_or_subclause_indexes(Query *root, RelOptInfo *rel,
											List *subclauses);
static bool best_or_subclause_index(Query *root,
						RelOptInfo *rel,
						Expr *subclause,
						IndexOptInfo **retIndexInfo,
						List **retIndexClauses,
						List **retIndexQuals,
						Cost *retStartupCost,
						Cost *retTotalCost);
/*----------
 * create_or_index_quals
 *	  Examine join OR-of-AND quals to see if any useful restriction OR
 *	  clauses can be extracted.  If so, add them to the query.
 *
 * Although a join clause must reference other relations overall,
 * an OR of ANDs clause might contain sub-clauses that reference just this
 * relation and can be used to build a restriction clause.
 * For example consider
 *		WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
 * We can transform this into
 *		WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
 *			AND (a.x = 42 OR a.x = 44)
 *			AND (b.y = 43 OR b.z = 45);
 * which opens the potential to build OR indexscans on a and b.  In essence
 * this is a partial transformation to CNF (AND of ORs format).  It is not
 * complete, however, because we do not unravel the original OR --- doing so
 * would usually bloat the qualification expression to little gain.
 *
 * The added quals are partially redundant with the original OR, and therefore
 * will cause the size of the joinrel to be underestimated when it is finally
 * formed.  (This would be true of a full transformation to CNF as well; the
 * fault is not really in the transformation, but in clauselist_selectivity's
 * inability to recognize redundant conditions.)  To minimize the collateral
 * damage, we want to minimize the number of quals added.  Therefore we do
 * not add every possible extracted restriction condition to the query.
 * Instead, we search for the single restriction condition that generates
 * the most useful (cheapest) OR indexscan, and add only that condition.
 * This is a pretty ad-hoc heuristic, but quite useful.
 *
 * We can then compensate for the redundancy of the added qual by poking
 * the recorded selectivity of the original OR clause, thereby ensuring
 * the added qual doesn't change the estimated size of the joinrel when
 * it is finally formed.  This is a MAJOR HACK: it depends on the fact
 * that clause selectivities are cached and on the fact that the same
 * RestrictInfo node will appear in every joininfo list that might be used
 * when the joinrel is formed.  And it probably isn't right in cases where
 * the size estimation is nonlinear (i.e., outer and IN joins).  But it
 * beats not doing anything.
 *
 * NOTE: one might think this messiness could be worked around by generating
 * the indexscan path with a small path->rows value, and not touching the
 * rel's baserestrictinfo or rel->rows.  However, that does not work.
 * The optimizer's fundamental design assumes that every general-purpose
 * Path for a given relation generates the same number of rows.  Without
 * this assumption we'd not be able to optimize solely on the cost of Paths,
 * but would have to take number of output rows into account as well.
 * (Perhaps someday that'd be worth doing, but it's a pretty big change...)
 *
 * 'rel' is the relation entry for which quals are to be created
 *
 * If successful, adds qual(s) to rel->baserestrictinfo and returns TRUE.
 * If no quals available, returns FALSE and doesn't change rel.
 *
 * Note: check_partial_indexes() must have been run previously.
 *----------
 */
bool
create_or_index_quals(Query *root, RelOptInfo *rel)
{
	IndexPath  *bestpath = NULL;
	RestrictInfo *bestrinfo = NULL;
	List	   *newrinfos;
	RestrictInfo *or_rinfo;
	Selectivity or_selec,
				orig_selec;
	List	   *i;
	/*
	 * We use the best_or_subclause_indexes() machinery to locate the
	 * best combination of restriction subclauses.  Note we must ignore
	 * any joinclauses that are not marked valid_everywhere, because they
	 * cannot be pushed down due to outer-join rules.
	 */
	foreach(i, rel->joininfo)
	{
		JoinInfo   *joininfo = (JoinInfo *) lfirst(i);
		List	   *j;
		foreach(j, joininfo->jinfo_restrictinfo)
		{
			RestrictInfo *rinfo = (RestrictInfo *) lfirst(j);
			if (restriction_is_or_clause(rinfo) &&
				rinfo->valid_everywhere)
			{
				IndexPath  *pathnode;
				pathnode = best_or_subclause_indexes(root,
													 rel,
										((BoolExpr *) rinfo->orclause)->args);
				if (pathnode)
				{
					if (bestpath == NULL ||
						pathnode->path.total_cost < bestpath->path.total_cost)
					{
						bestpath = pathnode;
						bestrinfo = rinfo;
					}
				}
			}
		}
	}
	/* Fail if no suitable clauses found */
	if (bestpath == NULL)
		return false;
	/*
	 * Convert the indexclauses structure to a RestrictInfo tree,
	 * and add it to the rel's restriction list.
	 */
	newrinfos = make_restrictinfo_from_indexclauses(bestpath->indexclauses,
													true, true);
	Assert(length(newrinfos) == 1);
	or_rinfo = (RestrictInfo *) lfirst(newrinfos);
	rel->baserestrictinfo = nconc(rel->baserestrictinfo, newrinfos);
	/*
	 * Adjust the original OR clause's cached selectivity to compensate
	 * for the selectivity of the added (but redundant) lower-level qual.
	 * This should result in the join rel getting approximately the same
	 * rows estimate as it would have gotten without all these shenanigans.
	 * (XXX major hack alert ... this depends on the assumption that the
	 * selectivity will stay cached ...)
	 */
	or_selec = clause_selectivity(root, (Node *) or_rinfo,
								  0, JOIN_INNER);
	if (or_selec > 0 && or_selec < 1)
	{
		orig_selec = clause_selectivity(root, (Node *) bestrinfo,
										0, JOIN_INNER);
		bestrinfo->this_selec = orig_selec / or_selec;
		/* clamp result to sane range */
		if (bestrinfo->this_selec > 1)
			bestrinfo->this_selec = 1;
	}
	/* Tell caller to recompute rel's rows estimate */
	return true;
}
/*
 * create_or_index_paths
 *	  Creates multi-scan index paths for indexes that match OR clauses.
 *
 * 'rel' is the relation entry for which the paths are to be created
 *
 * Returns nothing, but adds paths to rel->pathlist via add_path().
 *
 * Note: check_partial_indexes() must have been run previously.
 */
void
create_or_index_paths(Query *root, RelOptInfo *rel)
{
	List	   *i;
	/*
	 * Check each restriction clause to see if it is an OR clause, and if so,
	 * try to make a path using it.
	 */
	foreach(i, rel->baserestrictinfo)
	{
		RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
		if (restriction_is_or_clause(rinfo))
		{
			IndexPath  *pathnode;
			pathnode = best_or_subclause_indexes(root,
												 rel,
							   ((BoolExpr *) rinfo->orclause)->args);
			if (pathnode)
				add_path(rel, (Path *) pathnode);
		}
	}
}
/*
 * best_or_subclause_indexes
 *	  Determine the best index to be used in conjunction with each subclause
 *	  of an OR clause, and build a Path for a multi-index scan.
 *
 * 'rel' is the node of the relation to be scanned
 * 'subclauses' are the subclauses of the OR clause (must be the modified
 *		form that includes sub-RestrictInfo clauses)
 *
 * Returns an IndexPath if successful, or NULL if it is not possible to
 * find an index for each OR subclause.
 *
 * NOTE: we choose each scan on the basis of its total cost, ignoring startup
 * cost.  This is reasonable as long as all index types have zero or small
 * startup cost, but we might have to work harder if any index types with
 * nontrivial startup cost are ever invented.
 *
 * This routine also creates the indexqual list that will be needed by
 * the executor.  The indexqual list has one entry for each scan of the base
 * rel, which is a sublist of indexqual conditions to apply in that scan.
 * The implicit semantics are AND across each sublist of quals, and OR across
 * the toplevel list (note that the executor takes care not to return any
 * single tuple more than once).
 */
static IndexPath *
best_or_subclause_indexes(Query *root,
						  RelOptInfo *rel,
						  List *subclauses)
{
	FastList	infos;
	FastList	clauses;
	FastList	quals;
	Cost		path_startup_cost;
	Cost		path_total_cost;
	List	   *slist;
	IndexPath  *pathnode;
	FastListInit(&infos);
	FastListInit(&clauses);
	FastListInit(&quals);
	path_startup_cost = 0;
	path_total_cost = 0;
	/* Gather info for each OR subclause */
	foreach(slist, subclauses)
	{
		Expr	   *subclause = lfirst(slist);
		IndexOptInfo *best_indexinfo;
		List	   *best_indexclauses;
		List	   *best_indexquals;
		Cost		best_startup_cost;
		Cost		best_total_cost;
		if (!best_or_subclause_index(root, rel, subclause,
									 &best_indexinfo,
									 &best_indexclauses, &best_indexquals,
									 &best_startup_cost, &best_total_cost))
			return NULL;		/* failed to match this subclause */
		FastAppend(&infos, best_indexinfo);
		FastAppend(&clauses, best_indexclauses);
		FastAppend(&quals, best_indexquals);
		/*
		 * Path startup_cost is the startup cost for the first index scan only;
		 * startup costs for later scans will be paid later on, so they just
		 * get reflected in total_cost.
		 *
		 * Total cost is sum of the per-scan costs.
		 */
		if (slist == subclauses)	/* first scan? */
			path_startup_cost = best_startup_cost;
		path_total_cost += best_total_cost;
	}
	/* We succeeded, so build an IndexPath node */
	pathnode = makeNode(IndexPath);
	pathnode->path.pathtype = T_IndexScan;
	pathnode->path.parent = rel;
	pathnode->path.startup_cost = path_startup_cost;
	pathnode->path.total_cost = path_total_cost;
	/*
	 * This is an IndexScan, but the overall result will consist of tuples
	 * extracted in multiple passes (one for each subclause of the OR),
	 * so the result cannot be claimed to have any particular ordering.
	 */
	pathnode->path.pathkeys = NIL;
	pathnode->indexinfo = FastListValue(&infos);
	pathnode->indexclauses = FastListValue(&clauses);
	pathnode->indexquals = FastListValue(&quals);
	/* It's not an innerjoin path. */
	pathnode->isjoininner = false;
	/* We don't actually care what order the index scans in. */
	pathnode->indexscandir = NoMovementScanDirection;
	/*
	 * The number of rows is the same as the parent rel's estimate, since
	 * this isn't a join inner indexscan.
	 */
	pathnode->rows = rel->rows;
	return pathnode;
}
/*
 * best_or_subclause_index
 *	  Determines which is the best index to be used with a subclause of an
 *	  OR clause by estimating the cost of using each index and selecting
 *	  the least expensive (considering total cost only, for now).
 *
 * Returns FALSE if no index exists that can be used with this OR subclause;
 * in that case the output parameters are not set.
 *
 * 'rel' is the node of the relation to be scanned
 * 'subclause' is the OR subclause being considered
 *
 * '*retIndexInfo' gets the IndexOptInfo of the best index
 * '*retIndexClauses' gets a list of the index clauses for the best index
 * '*retIndexQuals' gets a list of the expanded indexquals for the best index
 * '*retStartupCost' gets the startup cost of a scan with that index
 * '*retTotalCost' gets the total cost of a scan with that index
 */
static bool
best_or_subclause_index(Query *root,
						RelOptInfo *rel,
						Expr *subclause,
						IndexOptInfo **retIndexInfo,	/* return value */
						List **retIndexClauses,	/* return value */
						List **retIndexQuals,	/* return value */
						Cost *retStartupCost,	/* return value */
						Cost *retTotalCost)		/* return value */
{
	bool		found = false;
	List	   *ilist;
	foreach(ilist, rel->indexlist)
	{
		IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
		List	   *indexclauses;
		List	   *indexquals;
		Path		subclause_path;
		/* Ignore partial indexes that do not match the query */
		if (index->indpred != NIL && !index->predOK)
			continue;
		/* Collect index clauses usable with this index */
		indexclauses = group_clauses_by_indexkey_for_or(rel, index, subclause);
		/* Ignore index if it doesn't match the subclause at all */
		if (indexclauses == NIL)
			continue;
		/* Convert clauses to indexquals the executor can handle */
		indexquals = expand_indexqual_conditions(index, indexclauses);
		cost_index(&subclause_path, root, rel, index, indexquals, false);
		if (!found || subclause_path.total_cost < *retTotalCost)
		{
			*retIndexInfo = index;
			*retIndexClauses = flatten_clausegroups_list(indexclauses);
			*retIndexQuals = indexquals;
			*retStartupCost = subclause_path.startup_cost;
			*retTotalCost = subclause_path.total_cost;
			found = true;
		}
	}
	return found;
}
 |