diff options
Diffstat (limited to 'src/backend/optimizer')
51 files changed, 0 insertions, 26609 deletions
diff --git a/src/backend/optimizer/Makefile b/src/backend/optimizer/Makefile deleted file mode 100644 index 39d9dcb0ad8..00000000000 --- a/src/backend/optimizer/Makefile +++ /dev/null @@ -1,30 +0,0 @@ -# -# Makefile for optimizer -# -# $Header: /cvsroot/pgsql/src/backend/optimizer/Makefile,v 1.9 2000/08/31 16:10:07 petere Exp $ -# - -subdir = src/backend/optimizer -top_builddir = ../../.. -include $(top_builddir)/src/Makefile.global - -SUBDIRS := geqo path plan prep util -SUBDIROBJS := $(SUBDIRS:%=%/SUBSYS.o) - -all: SUBSYS.o - -SUBSYS.o: $(SUBDIROBJS) - $(LD) $(LDREL) $(LDOUT) $@ $^ - -$(SUBDIROBJS): $(SUBDIRS:%=%-recursive) - -.PHONY: $(SUBDIRS:%=%-recursive) -$(SUBDIRS:%=%-recursive): - $(MAKE) -C $(subst -recursive,,$@) SUBSYS.o - -clean: - for dir in $(SUBDIRS); do $(MAKE) -C $$dir $@ || exit; done - rm -f SUBSYS.o - -dep depend: - for dir in $(SUBDIRS); do $(MAKE) -C $$dir $@ || exit; done diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README deleted file mode 100644 index 14e39909220..00000000000 --- a/src/backend/optimizer/README +++ /dev/null @@ -1,501 +0,0 @@ -Summary -------- - -These directories take the Query structure returned by the parser, and -generate a plan used by the executor. The /plan directory generates the -actual output plan, the /path code generates all possible ways to join the -tables, and /prep handles various preprocessing steps for special cases. -/util is utility stuff. /geqo is the separate "genetic optimization" planner ---- it does a semi-random search through the join tree space, rather than -exhaustively considering all possible join trees. (But each join considered -by /geqo is given to /path to create paths for, so we consider all possible -implementation paths for each specific join pair even in GEQO mode.) - - -Paths and Join Pairs --------------------- - -During the planning/optimizing process, we build "Path" trees representing -the different ways of doing a query. We select the cheapest Path that -generates the desired relation and turn it into a Plan to pass to the -executor. (There is pretty much a one-to-one correspondence between the -Path and Plan trees, but Path nodes omit info that won't be needed during -planning, and include info needed for planning that won't be needed by the -executor.) - -The optimizer builds a RelOptInfo structure for each base relation used in -the query. Base rels are either primitive tables, or subquery subselects -that are planned via a separate recursive invocation of the planner. A -RelOptInfo is also built for each join relation that is considered during -planning. A join rel is simply a combination of base rels. There is only -one join RelOptInfo for any given set of baserels --- for example, the join -{A B C} is represented by the same RelOptInfo no matter whether we build it -by joining A and B first and then adding C, or joining B and C first and -then adding A, etc. These different means of building the joinrel are -represented as Paths. For each RelOptInfo we build a list of Paths that -represent plausible ways to implement the scan or join of that relation. -Once we've considered all the plausible Paths for a rel, we select the one -that is cheapest according to the planner's cost estimates. The final plan -is derived from the cheapest Path for the RelOptInfo that includes all the -base rels of the query. - -Possible Paths for a primitive table relation include plain old sequential -scan, plus index scans for any indexes that exist on the table. A subquery -base relation just has one Path, a "SubqueryScan" path (which links to the -subplan that was built by a recursive invocation of the planner). Likewise -a function-RTE base relation has only one possible Path. - -Joins always occur using two RelOptInfos. One is outer, the other inner. -Outers drive lookups of values in the inner. In a nested loop, lookups of -values in the inner occur by scanning the inner path once per outer tuple -to find each matching inner row. In a mergejoin, inner and outer rows are -ordered, and are accessed in order, so only one scan is required to perform -the entire join: both inner and outer paths are scanned in-sync. (There's -not a lot of difference between inner and outer in a mergejoin...) In a -hashjoin, the inner is scanned first and all its rows are entered in a -hashtable, then the outer is scanned and for each row we lookup the join -key in the hashtable. - -A Path for a join relation is actually a tree structure, with the top -Path node representing the join method. It has left and right subpaths -that represent the scan or join methods used for the two input relations. - - -Join Tree Construction ----------------------- - -The optimizer generates optimal query plans by doing a more-or-less -exhaustive search through the ways of executing the query. The best Path -tree is found by a recursive process: - -1) Take each base relation in the query, and make a RelOptInfo structure -for it. Find each potentially useful way of accessing the relation, -including sequential and index scans, and make a Path representing that -way. All the Paths made for a given relation are placed in its -RelOptInfo.pathlist. (Actually, we discard Paths that are obviously -inferior alternatives before they ever get into the pathlist --- what -ends up in the pathlist is the cheapest way of generating each potentially -useful sort ordering of the relation.) Also create RelOptInfo.joininfo -nodes that list all the join clauses that involve this relation. For -example, the WHERE clause "tab1.col1 = tab2.col1" generates a JoinInfo -for tab1 listing tab2 as an unjoined relation, and also one for tab2 -showing tab1 as an unjoined relation. - -If we have only a single base relation in the query, we are done. -Otherwise we have to figure out how to join the base relations into a -single join relation. - -2) If the query's FROM clause contains explicit JOIN clauses, we join -those pairs of relations in exactly the tree structure indicated by the -JOIN clauses. (This is absolutely necessary when dealing with outer JOINs. -For inner JOINs we have more flexibility in theory, but don't currently -exploit it in practice.) For each such join pair, we generate a Path -for each feasible join method, and select the cheapest Path. Note that -the JOIN clause structure determines the join Path structure, but it -doesn't constrain the join implementation method at each join (nestloop, -merge, hash), nor does it say which rel is considered outer or inner at -each join. We consider all these possibilities in building Paths. - -3) At the top level of the FROM clause we will have a list of relations -that are either base rels or joinrels constructed per JOIN directives. -We can join these rels together in any order the planner sees fit. -The standard (non-GEQO) planner does this as follows: - -Consider joining each RelOptInfo to each other RelOptInfo specified in its -RelOptInfo.joininfo, and generate a Path for each possible join method for -each such pair. (If we have a RelOptInfo with no join clauses, we have no -choice but to generate a clauseless Cartesian-product join; so we consider -joining that rel to each other available rel. But in the presence of join -clauses we will only consider joins that use available join clauses.) - -If we only had two relations in the FROM list, we are done: we just pick -the cheapest path for the join RelOptInfo. If we had more than two, we now -need to consider ways of joining join RelOptInfos to each other to make -join RelOptInfos that represent more than two FROM items. - -The join tree is constructed using a "dynamic programming" algorithm: -in the first pass (already described) we consider ways to create join rels -representing exactly two FROM items. The second pass considers ways -to make join rels that represent exactly three FROM items; the next pass, -four items, etc. The last pass considers how to make the final join -relation that includes all FROM items --- obviously there can be only one -join rel at this top level, whereas there can be more than one join rel -at lower levels. At each level we use joins that follow available join -clauses, if possible, just as described for the first level. - -For example: - - SELECT * - FROM tab1, tab2, tab3, tab4 - WHERE tab1.col = tab2.col AND - tab2.col = tab3.col AND - tab3.col = tab4.col - - Tables 1, 2, 3, and 4 are joined as: - {1 2},{2 3},{3 4} - {1 2 3},{2 3 4} - {1 2 3 4} - (other possibilities will be excluded for lack of join clauses) - - SELECT * - FROM tab1, tab2, tab3, tab4 - WHERE tab1.col = tab2.col AND - tab1.col = tab3.col AND - tab1.col = tab4.col - - Tables 1, 2, 3, and 4 are joined as: - {1 2},{1 3},{1 4} - {1 2 3},{1 3 4},{1 2 4} - {1 2 3 4} - -We consider left-handed plans (the outer rel of an upper join is a joinrel, -but the inner is always a single FROM item); right-handed plans (outer rel -is always a single item); and bushy plans (both inner and outer can be -joins themselves). For example, when building {1 2 3 4} we consider -joining {1 2 3} to {4} (left-handed), {4} to {1 2 3} (right-handed), and -{1 2} to {3 4} (bushy), among other choices. Although the jointree -scanning code produces these potential join combinations one at a time, -all the ways to produce the same set of joined base rels will share the -same RelOptInfo, so the paths produced from different join combinations -that produce equivalent joinrels will compete in add_path. - -Once we have built the final join rel, we use either the cheapest path -for it or the cheapest path with the desired ordering (if that's cheaper -than applying a sort to the cheapest other path). - - -Pulling up subqueries ---------------------- - -As we described above, a subquery appearing in the range table is planned -independently and treated as a "black box" during planning of the outer -query. This is necessary when the subquery uses features such as -aggregates, GROUP, or DISTINCT. But if the subquery is just a simple -scan or join, treating the subquery as a black box may produce a poor plan -compared to considering it as part of the entire plan search space. -Therefore, at the start of the planning process the planner looks for -simple subqueries and pulls them up into the main query's jointree. - -Pulling up a subquery may result in FROM-list joins appearing below the top -of the join tree. Each FROM-list is planned using the dynamic-programming -search method described above. - -If pulling up a subquery produces a FROM-list as a direct child of another -FROM-list (with no explicit JOIN directives between), then we can merge the -two FROM-lists together. Once that's done, the subquery is an absolutely -integral part of the outer query and will not constrain the join tree -search space at all. However, that could result in unpleasant growth of -planning time, since the dynamic-programming search has runtime exponential -in the number of FROM-items considered. Therefore, we don't merge -FROM-lists if the result would have too many FROM-items in one list. - - -Optimizer Functions -------------------- - -The primary entry point is planner(). - -planner() - set up for recursive handling of subqueries - do final cleanup after planning. --subquery_planner() - pull up subqueries from rangetable, if possible - simplify constant expressions - canonicalize qual - Attempt to reduce WHERE clause to either CNF or DNF canonical form. - CNF (top-level-AND) is preferred, since the optimizer can then use - any of the AND subclauses to filter tuples; but quals that are in - or close to DNF form will suffer exponential expansion if we try to - force them to CNF. In pathological cases either transform may expand - the qual unreasonably; so we may have to leave it un-normalized, - thereby reducing the accuracy of selectivity estimates. - process sublinks - convert Vars of outer query levels into Params ---grouping_planner() - preprocess target list for non-SELECT queries - handle UNION/INTERSECT/EXCEPT, GROUP BY, HAVING, aggregates, - ORDER BY, DISTINCT, LIMIT ---query_planner() - pull out constant quals, which can be used to gate execution of the - whole plan (if any are found, we make a top-level Result node - to do the gating) - make a simplified target list that only contains Vars, no expressions ----subplanner() - make list of base relations used in query - split up the qual into restrictions (a=1) and joins (b=c) - find qual clauses that enable merge and hash joins -----make_one_rel() - set_base_rel_pathlist() - find scan and all index paths for each base relation - find selectivity of columns used in joins ------make_one_rel_by_joins() - jump to geqo if needed - else call make_rels_by_joins() for each level of join tree needed - make_rels_by_joins(): - For each joinrel of the prior level, do make_rels_by_clause_joins() - if it has join clauses, or make_rels_by_clauseless_joins() if not. - Also generate "bushy plan" joins between joinrels of lower levels. - Back at make_one_rel_by_joins(), apply set_cheapest() to extract the - cheapest path for each newly constructed joinrel. - Loop back if this wasn't the top join level. - Back at query_planner: - put back constant quals and non-simplified target list - Back at grouping_planner: - do grouping(GROUP) - do aggregates - make unique(DISTINCT) - make sort(ORDER BY) - make limit(LIMIT/OFFSET) - - -Optimizer Data Structures -------------------------- - -RelOptInfo - a relation or joined relations - - RestrictInfo - restriction clauses, like "x = 3" - JoinInfo - join clauses, including the relids needed for the join - - Path - every way to generate a RelOptInfo(sequential,index,joins) - SeqScan - a plain Path node with nodeTag = T_SeqScan - IndexPath - index scans - NestPath - nested-loop joins - MergePath - merge joins - HashPath - hash joins - - PathKeys - a data structure representing the ordering of a path - -The optimizer spends a good deal of its time worrying about the ordering -of the tuples returned by a path. The reason this is useful is that by -knowing the sort ordering of a path, we may be able to use that path as -the left or right input of a mergejoin and avoid an explicit sort step. -Nestloops and hash joins don't really care what the order of their inputs -is, but mergejoin needs suitably ordered inputs. Therefore, all paths -generated during the optimization process are marked with their sort order -(to the extent that it is known) for possible use by a higher-level merge. - -It is also possible to avoid an explicit sort step to implement a user's -ORDER BY clause if the final path has the right ordering already, so the -sort ordering is of interest even at the top level. subplanner() will -look for the cheapest path with a sort order matching the desired order, -and will compare its cost to the cost of using the cheapest-overall path -and doing an explicit sort. - -When we are generating paths for a particular RelOptInfo, we discard a path -if it is more expensive than another known path that has the same or better -sort order. We will never discard a path that is the only known way to -achieve a given sort order (without an explicit sort, that is). In this -way, the next level up will have the maximum freedom to build mergejoins -without sorting, since it can pick from any of the paths retained for its -inputs. - - -PathKeys --------- - -The PathKeys data structure represents what is known about the sort order -of a particular Path. - -Path.pathkeys is a List of Lists of PathKeyItem nodes that represent -the sort order of the result generated by the Path. The n'th sublist -represents the n'th sort key of the result. - -In single/base relation RelOptInfo's, the Paths represent various ways -of scanning the relation and the resulting ordering of the tuples. -Sequential scan Paths have NIL pathkeys, indicating no known ordering. -Index scans have Path.pathkeys that represent the chosen index's ordering, -if any. A single-key index would create a pathkey with a single sublist, -e.g. ( (tab1.indexkey1/sortop1) ). A multi-key index generates a sublist -per key, e.g. ( (tab1.indexkey1/sortop1) (tab1.indexkey2/sortop2) ) which -shows major sort by indexkey1 (ordering by sortop1) and minor sort by -indexkey2 with sortop2. - -Note that a multi-pass indexscan (OR clause scan) has NIL pathkeys since -we can say nothing about the overall order of its result. Also, an -indexscan on an unordered type of index generates NIL pathkeys. However, -we can always create a pathkey by doing an explicit sort. The pathkeys -for a Sort plan's output just represent the sort key fields and the -ordering operators used. - -Things get more interesting when we consider joins. Suppose we do a -mergejoin between A and B using the mergeclause A.X = B.Y. The output -of the mergejoin is sorted by X --- but it is also sorted by Y. We -represent this fact by listing both keys in a single pathkey sublist: -( (A.X/xsortop B.Y/ysortop) ). This pathkey asserts that the major -sort order of the Path can be taken to be *either* A.X or B.Y. -They are equal, so they are both primary sort keys. By doing this, -we allow future joins to use either var as a pre-sorted key, so upper -Mergejoins may be able to avoid having to re-sort the Path. This is -why pathkeys is a List of Lists. - -We keep a sortop associated with each PathKeyItem because cross-data-type -mergejoins are possible; for example int4 = int8 is mergejoinable. -In this case we need to remember that the left var is ordered by int4lt -while the right var is ordered by int8lt. So the different members of -each sublist could have different sortops. - -Note that while the order of the top list is meaningful (primary vs. -secondary sort key), the order of each sublist is arbitrary. Each sublist -should be regarded as a set of equivalent keys, with no significance -to the list order. - -With a little further thought, it becomes apparent that pathkeys for -joins need not only come from mergejoins. For example, if we do a -nestloop join between outer relation A and inner relation B, then any -pathkeys relevant to A are still valid for the join result: we have -not altered the order of the tuples from A. Even more interesting, -if there was a mergeclause (more formally, an "equijoin clause") A.X=B.Y, -and A.X was a pathkey for the outer relation A, then we can assert that -B.Y is a pathkey for the join result; X was ordered before and still is, -and the joined values of Y are equal to the joined values of X, so Y -must now be ordered too. This is true even though we used neither an -explicit sort nor a mergejoin on Y. - -More generally, whenever we have an equijoin clause A.X = B.Y and a -pathkey A.X, we can add B.Y to that pathkey if B is part of the joined -relation the pathkey is for, *no matter how we formed the join*. It works -as long as the clause has been applied at some point while forming the -join relation. (In the current implementation, we always apply qual -clauses as soon as possible, ie, as far down in the plan tree as possible. -So we can treat the pathkeys as equivalent everywhere. The exception is -when the relations A and B are joined inside the nullable side of an -OUTER JOIN and the equijoin clause comes from above the OUTER JOIN. In this -case we cannot apply the qual as soon as A and B are joined, so we do not -consider the pathkeys to be equivalent. This could be improved if we wanted -to go to the trouble of making pathkey equivalence be context-dependent, -but that seems much more complex than it's worth.) - -In short, then: when producing the pathkeys for a merge or nestloop join, -we can keep all of the keys of the outer path, since the ordering of the -outer path will be preserved in the result. Furthermore, we can add to -each pathkey sublist any inner vars that are equijoined to any of the -outer vars in the sublist; this works regardless of whether we are -implementing the join using that equijoin clause as a mergeclause, -or merely enforcing the clause after-the-fact as a qpqual filter. - -Although Hashjoins also work only with equijoin operators, it is *not* -safe to consider the output of a Hashjoin to be sorted in any particular -order --- not even the outer path's order. This is true because the -executor might have to split the join into multiple batches. Therefore -a Hashjoin is always given NIL pathkeys. (Also, we need to use only -mergejoinable operators when deducing which inner vars are now sorted, -because a mergejoin operator tells us which left- and right-datatype -sortops can be considered equivalent, whereas a hashjoin operator -doesn't imply anything about sort order.) - -Pathkeys are also useful to represent an ordering that we wish to achieve, -since they are easily compared to the pathkeys of a potential candidate -path. So, SortClause lists are turned into pathkeys lists for use inside -the optimizer. - -OK, now for how it *really* works: - -We did implement pathkeys just as described above, and found that the -planner spent a huge amount of time comparing pathkeys, because the -representation of pathkeys as unordered lists made it expensive to decide -whether two were equal or not. So, we've modified the representation -as described next. - -If we scan the WHERE clause for equijoin clauses (mergejoinable clauses) -during planner startup, we can construct lists of equivalent pathkey items -for the query. There could be more than two items per equivalence set; -for example, WHERE A.X = B.Y AND B.Y = C.Z AND D.R = E.S creates the -equivalence sets { A.X B.Y C.Z } and { D.R E.S } (plus associated sortops). -Any pathkey item that belongs to an equivalence set implies that all the -other items in its set apply to the relation too, or at least all the ones -that are for fields present in the relation. (Some of the items in the -set might be for as-yet-unjoined relations.) Furthermore, any multi-item -pathkey sublist that appears at any stage of planning the query *must* be -a subset of one or another of these equivalence sets; there's no way we'd -have put two items in the same pathkey sublist unless they were equijoined -in WHERE. - -Now suppose that we allow a pathkey sublist to contain pathkey items for -vars that are not yet part of the pathkey's relation. This introduces -no logical difficulty, because such items can easily be seen to be -irrelevant; we just mandate that they be ignored. But having allowed -this, we can declare (by fiat) that any multiple-item pathkey sublist -must be "equal()" to the appropriate equivalence set. In effect, -whenever we make a pathkey sublist that mentions any var appearing in an -equivalence set, we instantly add all the other vars equivalenced to it, -whether they appear yet in the pathkey's relation or not. And we also -mandate that the pathkey sublist appear in the same order as the -equivalence set it comes from. - -In fact, we can go even further, and say that the canonical representation -of a pathkey sublist is a pointer directly to the relevant equivalence set, -which is kept in a list of pathkey equivalence sets for the query. Then -pathkey sublist comparison reduces to pointer-equality checking! To do this -we also have to add single-element pathkey sublists to the query's list of -equivalence sets, but that's a small price to pay. - -By the way, it's OK and even useful for us to build equivalence sets -that mention multiple vars from the same relation. For example, if -we have WHERE A.X = A.Y and we are scanning A using an index on X, -we can legitimately conclude that the path is sorted by Y as well; -and this could be handy if Y is the variable used in other join clauses -or ORDER BY. So, any WHERE clause with a mergejoinable operator can -contribute to an equivalence set, even if it's not a join clause. - -As sketched so far, equijoin operators allow us to conclude that -A.X = B.Y and B.Y = C.Z together imply A.X = C.Z, even when different -datatypes are involved. What is not immediately obvious is that to use -the "canonical pathkey" representation, we *must* make this deduction. -An example (from a real bug in Postgres 7.0) is a mergejoin for a query -like - SELECT * FROM t1, t2 WHERE t1.f2 = t2.f3 AND t1.f1 = t2.f3; -The canonical-pathkey mechanism is able to deduce that t1.f1 = t1.f2 -(ie, both appear in the same canonical pathkey set). If we sort t1 -and then apply a mergejoin, we *must* filter the t1 tuples using the -implied qualification f1 = f2, because otherwise the output of the sort -will be ordered by f1 or f2 (whichever we sort on) but not both. The -merge will then fail since (depending on which qual clause it applies -first) it's expecting either ORDER BY f1,f2 or ORDER BY f2,f1, but the -actual output of the sort has neither of these orderings. The best fix -for this is to generate all the implied equality constraints for each -equijoin set and add these clauses to the query's qualification list. -In other words, we *explicitly* deduce f1 = f2 and add this to the WHERE -clause. The constraint will be applied as a qpqual to the output of the -scan on t1, resulting in sort output that is indeed ordered by both vars. -This approach provides more information to the selectivity estimation -code than it would otherwise have, and reduces the number of tuples -processed in join stages, so it's a win to make these deductions even -if we weren't forced to. - -When we generate implied equality constraints, we may find ourselves -adding redundant clauses to specific relations. For example, consider - SELECT * FROM t1, t2, t3 WHERE t1.a = t2.b AND t2.b = t3.c; -We will generate the implied clause t1.a = t3.c and add it to the tree. -This is good since it allows us to consider joining t1 and t3 directly, -which we otherwise wouldn't do. But when we reach the stage of joining -all three relations, we will have redundant join clauses --- eg, if we -join t1 and t2 first, then the path that joins (t1 t2) to t3 will have -both t2.b = t3.c and t1.a = t3.c as restriction clauses. This is bad; -not only is evaluation of the extra clause useless work at runtime, -but the selectivity estimator routines will underestimate the number -of tuples produced since they won't know that the two clauses are -perfectly redundant. We fix this by detecting and removing redundant -clauses as the restriction clause list is built for each join. (We -can't do it sooner, since which clauses are redundant will vary depending -on the join order.) - -Yet another implication of all this is that mergejoinable operators -must form closed equivalence sets. For example, if "int2 = int4" -and "int4 = int8" are both marked mergejoinable, then there had better -be a mergejoinable "int2 = int8" operator as well. Otherwise, when -we're given WHERE int2var = int4var AND int4var = int8var, we'll fail -while trying to create a representation of the implied clause -int2var = int8var. - -An additional refinement we can make is to insist that canonical pathkey -lists (sort orderings) do not mention the same pathkey set more than once. -For example, a pathkey list ((A) (B) (A)) is redundant --- the second -occurrence of (A) does not change the ordering, since the data must already -be sorted by A. Although a user probably wouldn't write ORDER BY A,B,A -directly, such redundancies are more probable once equijoin equivalences -have been considered. Also, the system is likely to generate redundant -pathkey lists when computing the sort ordering needed for a mergejoin. By -eliminating the redundancy, we save time and improve planning, since the -planner will more easily recognize equivalent orderings as being equivalent. - --- bjm & tgl diff --git a/src/backend/optimizer/geqo/Makefile b/src/backend/optimizer/geqo/Makefile deleted file mode 100644 index cc2c4bd667d..00000000000 --- a/src/backend/optimizer/geqo/Makefile +++ /dev/null @@ -1,34 +0,0 @@ -#------------------------------------------------------------------------- -# -# Makefile-- -# Makefile for the genetic query optimizer module -# -# Copyright (c) 1994, Regents of the University of California -# -# $Header: /cvsroot/pgsql/src/backend/optimizer/geqo/Makefile,v 1.16 2000/08/31 16:10:08 petere Exp $ -# -#------------------------------------------------------------------------- - -subdir = src/backend/optimizer/geqo -top_builddir = ../../../.. -include $(top_builddir)/src/Makefile.global - -OBJS = geqo_copy.o geqo_eval.o geqo_main.o geqo_misc.o \ - geqo_pool.o geqo_recombination.o \ - geqo_selection.o \ - geqo_erx.o geqo_pmx.o geqo_cx.o geqo_px.o geqo_ox1.o geqo_ox2.o - -all: SUBSYS.o - -SUBSYS.o: $(OBJS) - $(LD) $(LDREL) $(LDOUT) SUBSYS.o $(OBJS) - -depend dep: - $(CC) -MM $(CFLAGS) *.c >depend - -clean: - rm -f SUBSYS.o $(OBJS) - -ifeq (depend,$(wildcard depend)) -include depend -endif diff --git a/src/backend/optimizer/geqo/geqo_copy.c b/src/backend/optimizer/geqo/geqo_copy.c deleted file mode 100644 index 41d09dc02c5..00000000000 --- a/src/backend/optimizer/geqo/geqo_copy.c +++ /dev/null @@ -1,53 +0,0 @@ -/*------------------------------------------------------------------------ - * - * geqo_copy.c - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * $Id: geqo_copy.c,v 1.11 2002/06/20 20:29:29 momjian Exp $ - * - *------------------------------------------------------------------------- - */ - -/* contributed by: - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - * Martin Utesch * Institute of Automatic Control * - = = University of Mining and Technology = - * utesch@aut.tu-freiberg.de * Freiberg, Germany * - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - */ - -/* this is adopted from D. Whitley's Genitor algorithm */ - -/*************************************************************/ -/* */ -/* Copyright (c) 1990 */ -/* Darrell L. Whitley */ -/* Computer Science Department */ -/* Colorado State University */ -/* */ -/* Permission is hereby granted to copy all or any part of */ -/* this program for free distribution. The author's name */ -/* and this copyright notice must be included in any copy. */ -/* */ -/*************************************************************/ - -#include "postgres.h" -#include "optimizer/geqo_copy.h" - -/* geqo_copy - * - * copies one gene to another - * - */ -void -geqo_copy(Chromosome *chromo1, Chromosome *chromo2, int string_length) -{ - int i; - - for (i = 0; i < string_length; i++) - chromo1->string[i] = chromo2->string[i]; - - chromo1->worth = chromo2->worth; -} diff --git a/src/backend/optimizer/geqo/geqo_cx.c b/src/backend/optimizer/geqo/geqo_cx.c deleted file mode 100644 index fc7f72050b0..00000000000 --- a/src/backend/optimizer/geqo/geqo_cx.c +++ /dev/null @@ -1,121 +0,0 @@ -/*------------------------------------------------------------------------ -* -* geqo_cx.c -* -* cycle crossover [CX] routines; -* CX operator according to Oliver et al -* (Proc 2nd Int'l Conf on GA's) -* -* $Id: geqo_cx.c,v 1.9 1999/07/16 04:59:07 momjian Exp $ -* -*------------------------------------------------------------------------- -*/ - -/* contributed by: - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - * Martin Utesch * Institute of Automatic Control * - = = University of Mining and Technology = - * utesch@aut.tu-freiberg.de * Freiberg, Germany * - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - */ - -/* the cx algorithm is adopted from Genitor : */ -/*************************************************************/ -/* */ -/* Copyright (c) 1990 */ -/* Darrell L. Whitley */ -/* Computer Science Department */ -/* Colorado State University */ -/* */ -/* Permission is hereby granted to copy all or any part of */ -/* this program for free distribution. The author's name */ -/* and this copyright notice must be included in any copy. */ -/* */ -/*************************************************************/ - - -#include "postgres.h" -#include "optimizer/geqo_recombination.h" -#include "optimizer/geqo_random.h" - - -/* cx - * - * cycle crossover - */ -int -cx(Gene *tour1, Gene *tour2, Gene *offspring, int num_gene, City *city_table) -{ - - int i, - start_pos, - curr_pos; - int count = 0; - int num_diffs = 0; - - /* initialize city table */ - for (i = 1; i <= num_gene; i++) - { - city_table[i].used = 0; - city_table[tour2[i - 1]].tour2_position = i - 1; - city_table[tour1[i - 1]].tour1_position = i - 1; - } - - /* choose random cycle starting position */ - start_pos = geqo_randint(num_gene - 1, 0); - - /* child inherits first city */ - offspring[start_pos] = tour1[start_pos]; - - /* begin cycle with tour1 */ - curr_pos = start_pos; - city_table[(int) tour1[start_pos]].used = 1; - - count++; - - /* cx main part */ - - -/* STEP 1 */ - - while (tour2[curr_pos] != tour1[start_pos]) - { - city_table[(int) tour2[curr_pos]].used = 1; - curr_pos = city_table[(int) tour2[curr_pos]].tour1_position; - offspring[curr_pos] = tour1[curr_pos]; - count++; - } - - -/* STEP 2 */ - - /* failed to create a complete tour */ - if (count < num_gene) - { - for (i = 1; i <= num_gene; i++) - { - if (!city_table[i].used) - { - offspring[city_table[i].tour2_position] = - tour2[(int) city_table[i].tour2_position]; - count++; - } - } - } - - -/* STEP 3 */ - - /* still failed to create a complete tour */ - if (count < num_gene) - { - - /* count the number of differences between mom and offspring */ - for (i = 0; i < num_gene; i++) - if (tour1[i] != offspring[i]) - num_diffs++; - - } - - return num_diffs; -} diff --git a/src/backend/optimizer/geqo/geqo_erx.c b/src/backend/optimizer/geqo/geqo_erx.c deleted file mode 100644 index fe0baa42f1c..00000000000 --- a/src/backend/optimizer/geqo/geqo_erx.c +++ /dev/null @@ -1,472 +0,0 @@ -/*------------------------------------------------------------------------ -* -* geqo_erx.c -* edge recombination crossover [ER] -* -* $Id: geqo_erx.c,v 1.17 2002/03/02 21:39:26 momjian Exp $ -* -*------------------------------------------------------------------------- -*/ - -/* contributed by: - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - * Martin Utesch * Institute of Automatic Control * - = = University of Mining and Technology = - * utesch@aut.tu-freiberg.de * Freiberg, Germany * - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - */ - -/* the edge recombination algorithm is adopted from Genitor : */ -/*************************************************************/ -/* */ -/* Copyright (c) 1990 */ -/* Darrell L. Whitley */ -/* Computer Science Department */ -/* Colorado State University */ -/* */ -/* Permission is hereby granted to copy all or any part of */ -/* this program for free distribution. The author's name */ -/* and this copyright notice must be included in any copy. */ -/* */ -/*************************************************************/ - - -#include "postgres.h" -#include "optimizer/geqo_recombination.h" -#include "optimizer/geqo_random.h" - - -static int gimme_edge(Gene gene1, Gene gene2, Edge *edge_table); -static void remove_gene(Gene gene, Edge edge, Edge *edge_table); -static Gene gimme_gene(Edge edge, Edge *edge_table); - -static Gene edge_failure(Gene *gene, int index, Edge *edge_table, int num_gene); - - -/* alloc_edge_table - * - * allocate memory for edge table - * - */ - -Edge * -alloc_edge_table(int num_gene) -{ - Edge *edge_table; - - /* - * palloc one extra location so that nodes numbered 1..n can be - * indexed directly; 0 will not be used - */ - - edge_table = (Edge *) palloc((num_gene + 1) * sizeof(Edge)); - - return edge_table; -} - -/* free_edge_table - * - * deallocate memory of edge table - * - */ -void -free_edge_table(Edge *edge_table) -{ - pfree(edge_table); -} - -/* gimme_edge_table - * - * fills a data structure which represents the set of explicit - * edges between points in the (2) input genes - * - * assumes circular tours and bidirectional edges - * - * gimme_edge() will set "shared" edges to negative values - * - * returns average number edges/city in range 2.0 - 4.0 - * where 2.0=homogeneous; 4.0=diverse - * - */ -float -gimme_edge_table(Gene *tour1, Gene *tour2, int num_gene, Edge *edge_table) -{ - int i, - index1, - index2; - int edge_total; /* total number of unique edges in two - * genes */ - - /* at first clear the edge table's old data */ - for (i = 1; i <= num_gene; i++) - { - edge_table[i].total_edges = 0; - edge_table[i].unused_edges = 0; - } - - /* fill edge table with new data */ - - edge_total = 0; - - for (index1 = 0; index1 < num_gene; index1++) - { - /* - * presume the tour is circular, i.e. 1->2, 2->3, 3->1 this - * operaton maps n back to 1 - */ - - index2 = (index1 + 1) % num_gene; - - /* - * edges are bidirectional, i.e. 1->2 is same as 2->1 call - * gimme_edge twice per edge - */ - - edge_total += gimme_edge(tour1[index1], tour1[index2], edge_table); - gimme_edge(tour1[index2], tour1[index1], edge_table); - - edge_total += gimme_edge(tour2[index1], tour2[index2], edge_table); - gimme_edge(tour2[index2], tour2[index1], edge_table); - } - - /* return average number of edges per index */ - return ((float) (edge_total * 2) / (float) num_gene); -} - -/* gimme_edge - * - * registers edge from city1 to city2 in input edge table - * - * no assumptions about directionality are made; - * therefor it is up to the calling routine to - * call gimme_edge twice to make a bi-directional edge - * between city1 and city2; - * uni-directional edges are possible as well (just call gimme_edge - * once with the direction from city1 to city2) - * - * returns 1 if edge was not already registered and was just added; - * 0 if edge was already registered and edge_table is unchanged - */ -static int -gimme_edge(Gene gene1, Gene gene2, Edge *edge_table) -{ - int i; - int edges; - int city1 = (int) gene1; - int city2 = (int) gene2; - - - /* check whether edge city1->city2 already exists */ - edges = edge_table[city1].total_edges; - - for (i = 0; i < edges; i++) - { - if ((Gene) Abs(edge_table[city1].edge_list[i]) == city2) - { - - /* mark shared edges as negative */ - edge_table[city1].edge_list[i] = 0 - city2; - - return 0; - } - } - - /* add city1->city2; */ - edge_table[city1].edge_list[edges] = city2; - - /* increment the number of edges from city1 */ - edge_table[city1].total_edges++; - edge_table[city1].unused_edges++; - - return 1; -} - -/* gimme_tour - * - * creates a new tour using edges from the edge table. - * priority is given to "shared" edges (i.e. edges which - * all parent genes possess and are marked as negative - * in the edge table.) - * - */ -int -gimme_tour(Edge *edge_table, Gene *new_gene, int num_gene) -{ - int i; - int edge_failures = 0; - - new_gene[0] = (Gene) geqo_randint(num_gene, 1); /* choose int between 1 - * and num_gene */ - - for (i = 1; i < num_gene; i++) - { - /* - * as each point is entered into the tour, remove it from the edge - * table - */ - - remove_gene(new_gene[i - 1], edge_table[(int) new_gene[i - 1]], edge_table); - - /* find destination for the newly entered point */ - - if (edge_table[new_gene[i - 1]].unused_edges > 0) - new_gene[i] = gimme_gene(edge_table[(int) new_gene[i - 1]], edge_table); - - else - { /* cope with fault */ - edge_failures++; - - new_gene[i] = edge_failure(new_gene, i - 1, edge_table, num_gene); - } - - /* mark this node as incorporated */ - edge_table[(int) new_gene[i - 1]].unused_edges = -1; - - } /* for (i=1; i<num_gene; i++) */ - - return edge_failures; - -} - -/* remove_gene - * - * removes input gene from edge_table. - * input edge is used - * to identify deletion locations within edge table. - * - */ -static void -remove_gene(Gene gene, Edge edge, Edge *edge_table) -{ - int i, - j; - int possess_edge; - int genes_remaining; - - /* - * do for every gene known to have an edge to input gene (i.e. in - * edge_list for input edge) - */ - - for (i = 0; i < edge.unused_edges; i++) - { - possess_edge = (int) Abs(edge.edge_list[i]); - genes_remaining = edge_table[possess_edge].unused_edges; - - /* find the input gene in all edge_lists and delete it */ - for (j = 0; j < genes_remaining; j++) - { - - if ((Gene) Abs(edge_table[possess_edge].edge_list[j]) == gene) - { - - edge_table[possess_edge].unused_edges--; - - edge_table[possess_edge].edge_list[j] = - edge_table[possess_edge].edge_list[genes_remaining - 1]; - - break; - } - } - } -} - -/* gimme_gene - * - * priority is given to "shared" edges - * (i.e. edges which both genes possess) - * - */ -static Gene -gimme_gene(Edge edge, Edge *edge_table) -{ - int i; - Gene friend; - int minimum_edges; - int minimum_count = -1; - int rand_decision; - - /* - * no point has edges to more than 4 other points thus, this contrived - * minimum will be replaced - */ - - minimum_edges = 5; - - /* consider candidate destination points in edge list */ - - for (i = 0; i < edge.unused_edges; i++) - { - friend = (Gene) edge.edge_list[i]; - - /* - * give priority to shared edges that are negative; so return 'em - */ - - /* - * negative values are caught here so we need not worry about - * converting to absolute values - */ - if (friend < 0) - return (Gene) Abs(friend); - - - /* - * give priority to candidates with fewest remaining unused edges; - * find out what the minimum number of unused edges is - * (minimum_edges); if there is more than one cadidate with the - * minimum number of unused edges keep count of this number - * (minimum_count); - */ - - /* - * The test for minimum_count can probably be removed at some - * point but comments should probably indicate exactly why it is - * guaranteed that the test will always succeed the first time - * around. If it can fail then the code is in error - */ - - - if (edge_table[(int) friend].unused_edges < minimum_edges) - { - minimum_edges = edge_table[(int) friend].unused_edges; - minimum_count = 1; - } - else if (minimum_count == -1) - elog(ERROR, "gimme_gene: Internal error - minimum_count not set"); - else if (edge_table[(int) friend].unused_edges == minimum_edges) - minimum_count++; - - } /* for (i=0; i<edge.unused_edges; i++) */ - - - /* random decision of the possible candidates to use */ - rand_decision = (int) geqo_randint(minimum_count - 1, 0); - - - for (i = 0; i < edge.unused_edges; i++) - { - friend = (Gene) edge.edge_list[i]; - - /* return the chosen candidate point */ - if (edge_table[(int) friend].unused_edges == minimum_edges) - { - minimum_count--; - - if (minimum_count == rand_decision) - return friend; - } - } - - /* ... should never be reached */ - elog(ERROR, "gimme_gene: neither shared nor minimum number nor random edge found"); - return 0; /* to keep the compiler quiet */ -} - -/* edge_failure - * - * routine for handling edge failure - * - */ -static Gene -edge_failure(Gene *gene, int index, Edge *edge_table, int num_gene) -{ - int i; - Gene fail_gene = gene[index]; - int remaining_edges = 0; - int four_count = 0; - int rand_decision; - - - /* - * how many edges remain? how many gene with four total (initial) - * edges remain? - */ - - for (i = 1; i <= num_gene; i++) - { - if ((edge_table[i].unused_edges != -1) && (i != (int) fail_gene)) - { - remaining_edges++; - - if (edge_table[i].total_edges == 4) - four_count++; - } - } - - /* - * random decision of the gene with remaining edges and whose - * total_edges == 4 - */ - - if (four_count != 0) - { - - rand_decision = (int) geqo_randint(four_count - 1, 0); - - for (i = 1; i <= num_gene; i++) - { - - if ((Gene) i != fail_gene && - edge_table[i].unused_edges != -1 && - edge_table[i].total_edges == 4) - { - - four_count--; - - if (rand_decision == four_count) - return (Gene) i; - } - } - - elog(LOG, "edge_failure(1): no edge found via random decision and total_edges == 4"); - } - - else -/* random decision of the gene with remaining edges */ - - if (remaining_edges != 0) - { - - rand_decision = (int) geqo_randint(remaining_edges - 1, 0); - - for (i = 1; i <= num_gene; i++) - { - - if ((Gene) i != fail_gene && - edge_table[i].unused_edges != -1) - { - - remaining_edges--; - - if (rand_decision == remaining_edges) - return i; - } - } - - elog(LOG, "edge_failure(2): no edge found via random decision and remainig edges"); - } - - /* - * edge table seems to be empty; this happens sometimes on the last - * point due to the fact that the first point is removed from the - * table even though only one of its edges has been determined - */ - - else - { /* occurs only at the last point in the - * tour; simply look for the point which - * is not yet used */ - - for (i = 1; i <= num_gene; i++) - if (edge_table[i].unused_edges >= 0) - return (Gene) i; - - elog(LOG, "edge_failure(3): no edge found via looking for the last ununsed point"); - } - - -/* ... should never be reached */ - elog(ERROR, "edge_failure: no edge detected"); - return 0; /* to keep the compiler quiet */ -} diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c deleted file mode 100644 index 3e915747da9..00000000000 --- a/src/backend/optimizer/geqo/geqo_eval.c +++ /dev/null @@ -1,155 +0,0 @@ -/*------------------------------------------------------------------------ - * - * geqo_eval.c - * Routines to evaluate query trees - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * $Id: geqo_eval.c,v 1.59 2002/06/20 20:29:29 momjian Exp $ - * - *------------------------------------------------------------------------- - */ - -/* contributed by: - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - * Martin Utesch * Institute of Automatic Control * - = = University of Mining and Technology = - * utesch@aut.tu-freiberg.de * Freiberg, Germany * - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - */ - -#include "postgres.h" - -#include <math.h> -#include <limits.h> - -#include "optimizer/geqo.h" -#include "optimizer/pathnode.h" -#include "optimizer/paths.h" -#include "utils/memutils.h" - - -/* - * geqo_eval - * - * Returns cost of a query tree as an individual of the population. - */ -Cost -geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene) -{ - MemoryContext mycontext; - MemoryContext oldcxt; - RelOptInfo *joinrel; - Cost fitness; - List *savelist; - - /* - * Create a private memory context that will hold all temp storage - * allocated inside gimme_tree(). - * - * Since geqo_eval() will be called many times, we can't afford to let - * all that memory go unreclaimed until end of statement. Note we - * make the temp context a child of TransactionCommandContext, so that - * it will be freed even if we abort via elog(ERROR). - */ - mycontext = AllocSetContextCreate(TransactionCommandContext, - "GEQO", - ALLOCSET_DEFAULT_MINSIZE, - ALLOCSET_DEFAULT_INITSIZE, - ALLOCSET_DEFAULT_MAXSIZE); - oldcxt = MemoryContextSwitchTo(mycontext); - - /* preserve root->join_rel_list, which gimme_tree changes */ - savelist = root->join_rel_list; - - /* construct the best path for the given combination of relations */ - joinrel = gimme_tree(root, initial_rels, tour, num_gene, 0, NULL); - - /* - * compute fitness - * - * XXX geqo does not currently support optimization for partial result - * retrieval --- how to fix? - */ - fitness = joinrel->cheapest_total_path->total_cost; - - /* restore join_rel_list */ - root->join_rel_list = savelist; - - /* release all the memory acquired within gimme_tree */ - MemoryContextSwitchTo(oldcxt); - MemoryContextDelete(mycontext); - - return fitness; -} - -/* - * gimme_tree - * this routine considers only LEFT-SIDED TREES! - * - * 'root' is the Query - * 'initial_rels' is the list of initial relations (FROM-list items) - * 'tour' is the proposed join order, of length 'num_gene' - * 'rel_count' is number of initial_rels items already joined (initially 0) - * 'old_rel' is the preceding join (initially NULL) - * - * Returns a new join relation incorporating all joins in a left-sided tree. - */ -RelOptInfo * -gimme_tree(Query *root, List *initial_rels, - Gene *tour, int num_gene, - int rel_count, RelOptInfo *old_rel) -{ - RelOptInfo *inner_rel; /* current relation */ - int init_rel_index; - - if (rel_count < num_gene) - { - /* tree not yet finished */ - init_rel_index = (int) tour[rel_count]; - - inner_rel = (RelOptInfo *) nth(init_rel_index - 1, initial_rels); - - if (rel_count == 0) - { - /* processing first join with init_rel_index = (int) tour[0] */ - rel_count++; - return gimme_tree(root, initial_rels, - tour, num_gene, - rel_count, inner_rel); - } - else - { - /* tree main part */ - List *acceptable_rels = makeList1(inner_rel); - List *new_rels; - RelOptInfo *new_rel; - - new_rels = make_rels_by_clause_joins(root, old_rel, - acceptable_rels); - /* Shouldn't get more than one result */ - Assert(length(new_rels) <= 1); - if (new_rels == NIL) - { - new_rels = make_rels_by_clauseless_joins(root, old_rel, - acceptable_rels); - Assert(length(new_rels) <= 1); - if (new_rels == NIL) - elog(ERROR, "gimme_tree: failed to construct join rel"); - } - new_rel = (RelOptInfo *) lfirst(new_rels); - - /* Find and save the cheapest paths for this rel */ - set_cheapest(new_rel); - - /* and recurse... */ - rel_count++; - return gimme_tree(root, initial_rels, - tour, num_gene, - rel_count, new_rel); - } - } - - return old_rel; /* tree finished ... */ -} diff --git a/src/backend/optimizer/geqo/geqo_main.c b/src/backend/optimizer/geqo/geqo_main.c deleted file mode 100644 index 198eb6167b8..00000000000 --- a/src/backend/optimizer/geqo/geqo_main.c +++ /dev/null @@ -1,325 +0,0 @@ -/*------------------------------------------------------------------------ - * - * geqo_main.c - * solution of the query optimization problem - * by means of a Genetic Algorithm (GA) - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * $Id: geqo_main.c,v 1.31 2002/06/20 20:29:29 momjian Exp $ - * - *------------------------------------------------------------------------- - */ - -/* contributed by: - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - * Martin Utesch * Institute of Automatic Control * - = = University of Mining and Technology = - * utesch@aut.tu-freiberg.de * Freiberg, Germany * - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - */ - -/* -- parts of this are adapted from D. Whitley's Genitor algorithm -- */ - -#include "postgres.h" - -#include <time.h> -#include <math.h> - -#include "optimizer/geqo.h" -#include "optimizer/geqo_misc.h" -#include "optimizer/geqo_pool.h" -#include "optimizer/geqo_selection.h" - - -/* - * Configuration options - */ -int Geqo_pool_size; -int Geqo_effort; -int Geqo_generations; -double Geqo_selection_bias; -int Geqo_random_seed; - - -static int gimme_pool_size(int nr_rel); -static int gimme_number_generations(int pool_size, int effort); - - -/* define edge recombination crossover [ERX] per default */ -#if !defined(ERX) && \ - !defined(PMX) && \ - !defined(CX) && \ - !defined(PX) && \ - !defined(OX1) && \ - !defined(OX2) -#define ERX -#endif - - -/* - * geqo - * solution of the query optimization problem - * similar to a constrained Traveling Salesman Problem (TSP) - */ - -RelOptInfo * -geqo(Query *root, int number_of_rels, List *initial_rels) -{ - int generation; - Chromosome *momma; - Chromosome *daddy; - Chromosome *kid; - Pool *pool; - int pool_size, - number_generations, - status_interval; - Gene *best_tour; - RelOptInfo *best_rel; - -#if defined(ERX) - Edge *edge_table; /* list of edges */ - int edge_failures = 0; - float difference; -#endif -#if defined(CX) || defined(PX) || defined(OX1) || defined(OX2) - City *city_table; /* list of cities */ -#endif -#if defined(CX) - int cycle_diffs = 0; - int mutations = 0; -#endif - -/* set GA parameters */ - pool_size = gimme_pool_size(number_of_rels); - number_generations = gimme_number_generations(pool_size, Geqo_effort); - status_interval = 10; - -/* seed random number generator */ -/* XXX why is this done every time around? */ - if (Geqo_random_seed >= 0) - srandom((unsigned int) Geqo_random_seed); - else - srandom((unsigned int) time(NULL)); - -/* allocate genetic pool memory */ - pool = alloc_pool(pool_size, number_of_rels); - -/* random initialization of the pool */ - random_init_pool(root, initial_rels, pool, 0, pool->size); - -/* sort the pool according to cheapest path as fitness */ - sort_pool(pool); /* we have to do it only one time, since - * all kids replace the worst individuals - * in future (-> geqo_pool.c:spread_chromo - * ) */ - -/* allocate chromosome momma and daddy memory */ - momma = alloc_chromo(pool->string_length); - daddy = alloc_chromo(pool->string_length); - -#if defined (ERX) - elog(LOG, "geqo_main: using edge recombination crossover [ERX]"); -/* allocate edge table memory */ - edge_table = alloc_edge_table(pool->string_length); -#elif defined(PMX) - elog(LOG, "geqo_main: using partially matched crossover [PMX]"); -/* allocate chromosome kid memory */ - kid = alloc_chromo(pool->string_length); -#elif defined(CX) - elog(LOG, "geqo_main: using cycle crossover [CX]"); -/* allocate city table memory */ - kid = alloc_chromo(pool->string_length); - city_table = alloc_city_table(pool->string_length); -#elif defined(PX) - elog(LOG, "geqo_main: using position crossover [PX]"); -/* allocate city table memory */ - kid = alloc_chromo(pool->string_length); - city_table = alloc_city_table(pool->string_length); -#elif defined(OX1) - elog(LOG, "geqo_main: using order crossover [OX1]"); -/* allocate city table memory */ - kid = alloc_chromo(pool->string_length); - city_table = alloc_city_table(pool->string_length); -#elif defined(OX2) - elog(LOG, "geqo_main: using order crossover [OX2]"); -/* allocate city table memory */ - kid = alloc_chromo(pool->string_length); - city_table = alloc_city_table(pool->string_length); -#endif - - -/* my pain main part: */ -/* iterative optimization */ - - for (generation = 0; generation < number_generations; generation++) - { - - /* SELECTION */ - geqo_selection(momma, daddy, pool, Geqo_selection_bias); /* using linear bias - * function */ - - - -#if defined (ERX) - /* EDGE RECOMBINATION CROSSOVER */ - difference = gimme_edge_table(momma->string, daddy->string, pool->string_length, edge_table); - - /* let the kid grow in momma's womb (storage) for nine months ;-) */ - /* sleep(23328000) -- har har har */ - kid = momma; - - /* are there any edge failures ? */ - edge_failures += gimme_tour(edge_table, kid->string, pool->string_length); -#elif defined(PMX) - /* PARTIALLY MATCHED CROSSOVER */ - pmx(momma->string, daddy->string, kid->string, pool->string_length); -#elif defined(CX) - /* CYCLE CROSSOVER */ - cycle_diffs = cx(momma->string, daddy->string, kid->string, pool->string_length, city_table); - /* mutate the child */ - if (cycle_diffs == 0) - { - mutations++; - geqo_mutation(kid->string, pool->string_length); - } -#elif defined(PX) - /* POSITION CROSSOVER */ - px(momma->string, daddy->string, kid->string, pool->string_length, city_table); -#elif defined(OX1) - /* ORDER CROSSOVER */ - ox1(momma->string, daddy->string, kid->string, pool->string_length, city_table); -#elif defined(OX2) - /* ORDER CROSSOVER */ - ox2(momma->string, daddy->string, kid->string, pool->string_length, city_table); -#endif - - - /* EVALUATE FITNESS */ - kid->worth = geqo_eval(root, initial_rels, - kid->string, pool->string_length); - - /* push the kid into the wilderness of life according to its worth */ - spread_chromo(kid, pool); - - -#ifdef GEQO_DEBUG - if (status_interval && !(generation % status_interval)) - print_gen(stdout, pool, generation); -#endif - - } /* end of iterative optimization */ - - -#if defined(ERX) && defined(GEQO_DEBUG) - if (edge_failures != 0) - elog(LOG, "[GEQO] failures: %d, average: %d", - edge_failures, (int) generation / edge_failures); - else - elog(LOG, "[GEQO] No edge failures detected."); -#endif - - -#if defined(CX) && defined(GEQO_DEBUG) - if (mutations != 0) - elog(LOG, "[GEQO] mutations: %d, generations: %d", mutations, generation); - else - elog(LOG, "[GEQO] No mutations processed."); -#endif - - -#ifdef GEQO_DEBUG - print_pool(stdout, pool, 0, pool_size - 1); -#endif - - -/* got the cheapest query tree processed by geqo; - first element of the population indicates the best query tree */ - - best_tour = (Gene *) pool->data[0].string; - -/* root->join_rel_list will be modified during this ! */ - best_rel = gimme_tree(root, initial_rels, - best_tour, pool->string_length, - 0, NULL); - -/* DBG: show the query plan -print_plan(best_plan, root); - DBG */ - -/* ... free memory stuff */ - free_chromo(momma); - free_chromo(daddy); - -#if defined (ERX) - free_edge_table(edge_table); -#elif defined(PMX) - free_chromo(kid); -#elif defined(CX) - free_chromo(kid); - free_city_table(city_table); -#elif defined(PX) - free_chromo(kid); - free_city_table(city_table); -#elif defined(OX1) - free_chromo(kid); - free_city_table(city_table); -#elif defined(OX2) - free_chromo(kid); - free_city_table(city_table); -#endif - - free_pool(pool); - - return best_rel; -} - - - -/* - * Return either configured pool size or - * a good default based on query size (no. of relations) - * = 2^(QS+1) - * also constrain between 128 and 1024 - */ -static int -gimme_pool_size(int nr_rel) -{ - double size; - - if (Geqo_pool_size != 0) - { - if (Geqo_pool_size < MIN_GEQO_POOL_SIZE) - return MIN_GEQO_POOL_SIZE; - else if (Geqo_pool_size > MAX_GEQO_POOL_SIZE) - return MAX_GEQO_POOL_SIZE; - else - return Geqo_pool_size; - } - - size = pow(2.0, nr_rel + 1.0); - - if (size < MIN_GEQO_POOL_SIZE) - return MIN_GEQO_POOL_SIZE; - else if (size > MAX_GEQO_POOL_SIZE) - return MAX_GEQO_POOL_SIZE; - else - return (int) ceil(size); -} - - - -/* - * Return either configured number of generations or - * some reasonable default calculated on the fly. - * = Effort * Log2(PoolSize) - */ -static int -gimme_number_generations(int pool_size, int effort) -{ - if (Geqo_generations <= 0) - return effort * (int) ceil(log((double) pool_size) / log(2.0)); - else - return Geqo_generations; -} diff --git a/src/backend/optimizer/geqo/geqo_misc.c b/src/backend/optimizer/geqo/geqo_misc.c deleted file mode 100644 index 5385fc57fc7..00000000000 --- a/src/backend/optimizer/geqo/geqo_misc.c +++ /dev/null @@ -1,250 +0,0 @@ -/*------------------------------------------------------------------------ - * - * geqo_misc.c - * misc. printout and debug stuff - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * $Id: geqo_misc.c,v 1.32 2002/06/20 20:29:29 momjian Exp $ - * - *------------------------------------------------------------------------- - */ - -/* contributed by: - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - * Martin Utesch * Institute of Automatic Control * - = = University of Mining and Technology = - * utesch@aut.tu-freiberg.de * Freiberg, Germany * - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - */ - - - -#include "postgres.h" - -#include "optimizer/geqo_misc.h" -#include "nodes/print.h" - - -static float avg_pool(Pool *pool); - -/* avg_pool - * - */ -static float -avg_pool(Pool *pool) -{ - int i; - double cumulative = 0.0; - - if (pool->size == 0) - elog(ERROR, "avg_pool: pool_size of zero"); - - for (i = 0; i < pool->size; i++) - cumulative = cumulative + pool->data[i].worth; - - return (float) cumulative / pool->size; -} - -/* print_pool - */ -void -print_pool(FILE *fp, Pool *pool, int start, int stop) -{ - int i, - j; - - /* be extra careful that start and stop are valid inputs */ - - if (start < 0) - start = 0; - if (stop > pool->size) - stop = pool->size; - - if (start + stop > pool->size) - { - start = 0; - stop = pool->size; - } - - for (i = start; i < stop; i++) - { - fprintf(fp, "%d)\t", i); - for (j = 0; j < pool->string_length; j++) - fprintf(fp, "%d ", pool->data[i].string[j]); - fprintf(fp, "%f\n", pool->data[i].worth); - } -} - -/* print_gen - * - * printout for chromosome: best, worst, mean, average - * - */ -void -print_gen(FILE *fp, Pool *pool, int generation) -{ - int lowest; - - /* Get index to lowest ranking gene in poplulation. */ - /* Use 2nd to last since last is buffer. */ - lowest = pool->size > 1 ? pool->size - 2 : 0; - - fprintf(fp, - "%5d | Bst: %f Wst: %f Mean: %f Avg: %f\n", - generation, - pool->data[0].worth, - pool->data[lowest].worth, - pool->data[pool->size / 2].worth, - avg_pool(pool)); -} - - -void -print_edge_table(FILE *fp, Edge *edge_table, int num_gene) -{ - int i, - j; - - fprintf(fp, "\nEDGE TABLE\n"); - - for (i = 1; i <= num_gene; i++) - { - fprintf(fp, "%d :", i); - for (j = 0; j < edge_table[i].unused_edges; j++) - fprintf(fp, " %d", edge_table[i].edge_list[j]); - fprintf(fp, "\n"); - } - - fprintf(fp, "\n"); -} - -/************************************************************* - Debug output subroutines - *************************************************************/ - -void -geqo_print_joinclauses(Query *root, List *clauses) -{ - List *l; - - foreach(l, clauses) - { - RestrictInfo *c = lfirst(l); - - print_expr((Node *) c->clause, root->rtable); - if (lnext(l)) - printf(" "); - } -} - -void -geqo_print_path(Query *root, Path *path, int indent) -{ - char *ptype = NULL; - JoinPath *jp; - bool join = false; - int i; - - for (i = 0; i < indent; i++) - printf("\t"); - - switch (nodeTag(path)) - { - case T_Path: - ptype = "SeqScan"; - join = false; - break; - case T_IndexPath: - ptype = "IdxScan"; - join = false; - break; - case T_NestPath: - ptype = "Nestloop"; - join = true; - break; - case T_MergePath: - ptype = "MergeJoin"; - join = true; - break; - case T_HashPath: - ptype = "HashJoin"; - join = true; - break; - default: - break; - } - if (join) - { - jp = (JoinPath *) path; - printf("%s rows=%.0f cost=%.2f..%.2f\n", - ptype, path->parent->rows, - path->startup_cost, path->total_cost); - switch (nodeTag(path)) - { - case T_MergePath: - case T_HashPath: - for (i = 0; i < indent + 1; i++) - printf("\t"); - printf(" clauses=("); - geqo_print_joinclauses(root, jp->joinrestrictinfo); - printf(")\n"); - - if (nodeTag(path) == T_MergePath) - { - MergePath *mp = (MergePath *) path; - - if (mp->outersortkeys || mp->innersortkeys) - { - for (i = 0; i < indent + 1; i++) - printf("\t"); - printf(" sortouter=%d sortinner=%d\n", - ((mp->outersortkeys) ? 1 : 0), - ((mp->innersortkeys) ? 1 : 0)); - } - } - break; - default: - break; - } - geqo_print_path(root, jp->outerjoinpath, indent + 1); - geqo_print_path(root, jp->innerjoinpath, indent + 1); - } - else - { - int relid = lfirsti(path->parent->relids); - - printf("%s(%d) rows=%.0f cost=%.2f..%.2f\n", - ptype, relid, path->parent->rows, - path->startup_cost, path->total_cost); - - if (IsA(path, IndexPath)) - { - printf(" pathkeys="); - print_pathkeys(path->pathkeys, root->rtable); - } - } -} - -void -geqo_print_rel(Query *root, RelOptInfo *rel) -{ - List *l; - - printf("______________________________\n"); - printf("("); - foreach(l, rel->relids) - printf("%d ", lfirsti(l)); - printf("): rows=%.0f width=%d\n", rel->rows, rel->width); - - printf("\tpath list:\n"); - foreach(l, rel->pathlist) - geqo_print_path(root, lfirst(l), 1); - - printf("\n\tcheapest startup path:\n"); - geqo_print_path(root, rel->cheapest_startup_path, 1); - - printf("\n\tcheapest total path:\n"); - geqo_print_path(root, rel->cheapest_total_path, 1); -} diff --git a/src/backend/optimizer/geqo/geqo_mutation.c b/src/backend/optimizer/geqo/geqo_mutation.c deleted file mode 100644 index 037af7e3050..00000000000 --- a/src/backend/optimizer/geqo/geqo_mutation.c +++ /dev/null @@ -1,62 +0,0 @@ -/*------------------------------------------------------------------------ -* -* geqo_mutation.c -* -* TSP mutation routines -* -* $Id: geqo_mutation.c,v 1.8 1999/07/16 04:59:10 momjian Exp $ -* -*------------------------------------------------------------------------- -*/ - -/* contributed by: - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - * Martin Utesch * Institute of Automatic Control * - = = University of Mining and Technology = - * utesch@aut.tu-freiberg.de * Freiberg, Germany * - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - */ - -/* this is adopted from Genitor : */ -/*************************************************************/ -/* */ -/* Copyright (c) 1990 */ -/* Darrell L. Whitley */ -/* Computer Science Department */ -/* Colorado State University */ -/* */ -/* Permission is hereby granted to copy all or any part of */ -/* this program for free distribution. The author's name */ -/* and this copyright notice must be included in any copy. */ -/* */ -/*************************************************************/ - -#include "postgres.h" -#include "optimizer/geqo_mutation.h" -#include "optimizer/geqo_random.h" - -void -geqo_mutation(Gene *tour, int num_gene) -{ - int swap1; - int swap2; - int num_swaps = geqo_randint(num_gene / 3, 0); - Gene temp; - - - while (num_swaps > 0) - { - swap1 = geqo_randint(num_gene - 1, 0); - swap2 = geqo_randint(num_gene - 1, 0); - - while (swap1 == swap2) - swap2 = geqo_randint(num_gene - 1, 0); - - temp = tour[swap1]; - tour[swap1] = tour[swap2]; - tour[swap2] = temp; - - - num_swaps -= 1; - } -} diff --git a/src/backend/optimizer/geqo/geqo_ox1.c b/src/backend/optimizer/geqo/geqo_ox1.c deleted file mode 100644 index 14c63551afb..00000000000 --- a/src/backend/optimizer/geqo/geqo_ox1.c +++ /dev/null @@ -1,91 +0,0 @@ -/*------------------------------------------------------------------------ -* -* geqo_ox1.c -* -* order crossover [OX] routines; -* OX1 operator according to Davis -* (Proc Int'l Joint Conf on AI) -* -* $Id: geqo_ox1.c,v 1.8 1999/07/16 04:59:10 momjian Exp $ -* -*------------------------------------------------------------------------- -*/ - -/* contributed by: - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - * Martin Utesch * Institute of Automatic Control * - = = University of Mining and Technology = - * utesch@aut.tu-freiberg.de * Freiberg, Germany * - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - */ - -/* the ox algorithm is adopted from Genitor : */ -/*************************************************************/ -/* */ -/* Copyright (c) 1990 */ -/* Darrell L. Whitley */ -/* Computer Science Department */ -/* Colorado State University */ -/* */ -/* Permission is hereby granted to copy all or any part of */ -/* this program for free distribution. The author's name */ -/* and this copyright notice must be included in any copy. */ -/* */ -/*************************************************************/ - -#include "postgres.h" -#include "optimizer/geqo_random.h" -#include "optimizer/geqo_recombination.h" - - -/* ox1 - * - * position crossover - */ -void -ox1(Gene *tour1, Gene *tour2, Gene *offspring, int num_gene, City *city_table) -{ - int left, - right, - k, - p, - temp; - - /* initialize city table */ - for (k = 1; k <= num_gene; k++) - city_table[k].used = 0; - - /* select portion to copy from tour1 */ - left = geqo_randint(num_gene - 1, 0); - right = geqo_randint(num_gene - 1, 0); - - if (left > right) - { - temp = left; - left = right; - right = temp; - } - - /* copy portion from tour1 to offspring */ - for (k = left; k <= right; k++) - { - offspring[k] = tour1[k]; - city_table[(int) tour1[k]].used = 1; - } - - k = (right + 1) % num_gene; /* index into offspring */ - p = k; /* index into tour2 */ - - /* copy stuff from tour2 to offspring */ - while (k != left) - { - if (!city_table[(int) tour2[p]].used) - { - offspring[k] = tour2[p]; - k = (k + 1) % num_gene; - city_table[(int) tour2[p]].used = 1; - } - p = (p + 1) % num_gene; /* increment tour2-index */ - } - -} diff --git a/src/backend/optimizer/geqo/geqo_ox2.c b/src/backend/optimizer/geqo/geqo_ox2.c deleted file mode 100644 index 2270ac2aeb7..00000000000 --- a/src/backend/optimizer/geqo/geqo_ox2.c +++ /dev/null @@ -1,109 +0,0 @@ -/*------------------------------------------------------------------------ -* -* geqo_ox2.c -* -* order crossover [OX] routines; -* OX2 operator according to Syswerda -* (The Genetic Algorithms Handbook, ed L Davis) -* -* $Id: geqo_ox2.c,v 1.8 1999/07/16 04:59:10 momjian Exp $ -* -*------------------------------------------------------------------------- -*/ - -/* contributed by: - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - * Martin Utesch * Institute of Automatic Control * - = = University of Mining and Technology = - * utesch@aut.tu-freiberg.de * Freiberg, Germany * - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - */ - -/* the ox algorithm is adopted from Genitor : */ -/*************************************************************/ -/* */ -/* Copyright (c) 1990 */ -/* Darrell L. Whitley */ -/* Computer Science Department */ -/* Colorado State University */ -/* */ -/* Permission is hereby granted to copy all or any part of */ -/* this program for free distribution. The author's name */ -/* and this copyright notice must be included in any copy. */ -/* */ -/*************************************************************/ - -#include "postgres.h" -#include "optimizer/geqo_random.h" -#include "optimizer/geqo_recombination.h" - - -/* ox2 - * - * position crossover - */ -void -ox2(Gene *tour1, Gene *tour2, Gene *offspring, int num_gene, City *city_table) -{ - int k, - j, - count, - pos, - select, - num_positions; - - /* initialize city table */ - for (k = 1; k <= num_gene; k++) - { - city_table[k].used = 0; - city_table[k - 1].select_list = -1; - } - - /* determine the number of positions to be inherited from tour1 */ - num_positions = geqo_randint(2 * num_gene / 3, num_gene / 3); - - /* make a list of selected cities */ - for (k = 0; k < num_positions; k++) - { - pos = geqo_randint(num_gene - 1, 0); - city_table[pos].select_list = (int) tour1[pos]; - city_table[(int) tour1[pos]].used = 1; /* mark used */ - } - - - count = 0; - k = 0; - - /* consolidate the select list to adjacent positions */ - while (count < num_positions) - { - if (city_table[k].select_list == -1) - { - j = k + 1; - while ((city_table[j].select_list == -1) && (j < num_gene)) - j++; - - city_table[k].select_list = city_table[j].select_list; - city_table[j].select_list = -1; - count++; - } - else - count++; - k++; - } - - select = 0; - - for (k = 0; k < num_gene; k++) - { - if (city_table[(int) tour2[k]].used) - { - offspring[k] = (Gene) city_table[select].select_list; - select++; /* next city in the select list */ - } - else -/* city isn't used yet, so inherit from tour2 */ - offspring[k] = tour2[k]; - } - -} diff --git a/src/backend/optimizer/geqo/geqo_pmx.c b/src/backend/optimizer/geqo/geqo_pmx.c deleted file mode 100644 index 93e944c88a6..00000000000 --- a/src/backend/optimizer/geqo/geqo_pmx.c +++ /dev/null @@ -1,221 +0,0 @@ -/*------------------------------------------------------------------------ -* -* geqo_pmx.c -* -* partially matched crossover [PMX] routines; -* PMX operator according to Goldberg & Lingle -* (Proc Int'l Conf on GA's) -* -* $Id: geqo_pmx.c,v 1.9 1999/07/16 04:59:11 momjian Exp $ -* -*------------------------------------------------------------------------- -*/ - -/* contributed by: - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - * Martin Utesch * Institute of Automatic Control * - = = University of Mining and Technology = - * utesch@aut.tu-freiberg.de * Freiberg, Germany * - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - */ - -/* the pmx algorithm is adopted from Genitor : */ -/*************************************************************/ -/* */ -/* Copyright (c) 1990 */ -/* Darrell L. Whitley */ -/* Computer Science Department */ -/* Colorado State University */ -/* */ -/* Permission is hereby granted to copy all or any part of */ -/* this program for free distribution. The author's name */ -/* and this copyright notice must be included in any copy. */ -/* */ -/*************************************************************/ - -#include "postgres.h" -#include "optimizer/geqo_random.h" -#include "optimizer/geqo_recombination.h" - - -/* pmx - * - * partially matched crossover - */ -void -pmx(Gene *tour1, Gene *tour2, Gene *offspring, int num_gene) -{ - int *failed = (int *) palloc((num_gene + 1) * sizeof(int)); - int *from = (int *) palloc((num_gene + 1) * sizeof(int)); - int *indx = (int *) palloc((num_gene + 1) * sizeof(int)); - int *check_list = (int *) palloc((num_gene + 1) * sizeof(int)); - - int left, - right, - temp, - i, - j, - k; - int mx_fail, - found, - mx_hold; - - -/* no mutation so start up the pmx replacement algorithm */ -/* initialize failed[], from[], check_list[] */ - for (k = 0; k < num_gene; k++) - { - failed[k] = -1; - from[k] = -1; - check_list[k + 1] = 0; - } - -/* locate crossover points */ - left = geqo_randint(num_gene - 1, 0); - right = geqo_randint(num_gene - 1, 0); - - if (left > right) - { - temp = left; - left = right; - right = temp; - } - - -/* copy tour2 into offspring */ - for (k = 0; k < num_gene; k++) - { - offspring[k] = tour2[k]; - from[k] = DAD; - check_list[tour2[k]]++; - } - -/* copy tour1 into offspring */ - for (k = left; k <= right; k++) - { - check_list[offspring[k]]--; - offspring[k] = tour1[k]; - from[k] = MOM; - check_list[tour1[k]]++; - } - - -/* pmx main part */ - - mx_fail = 0; - -/* STEP 1 */ - - for (k = left; k <= right; k++) - { /* for all elements in the tour1-2 */ - - if (tour1[k] == tour2[k]) - found = 1; /* find match in tour2 */ - - else - { - found = 0; /* substitute elements */ - - j = 0; - while (!(found) && (j < num_gene)) - { - if ((offspring[j] == tour1[k]) && (from[j] == DAD)) - { - - check_list[offspring[j]]--; - offspring[j] = tour2[k]; - found = 1; - check_list[tour2[k]]++; - } - - j++; - } - - } - - if (!(found)) - { /* failed to replace gene */ - failed[mx_fail] = (int) tour1[k]; - indx[mx_fail] = k; - mx_fail++; - } - - } /* ... for */ - - -/* STEP 2 */ - - /* see if any genes could not be replaced */ - if (mx_fail > 0) - { - mx_hold = mx_fail; - - for (k = 0; k < mx_hold; k++) - { - found = 0; - - j = 0; - while (!(found) && (j < num_gene)) - { - - if ((failed[k] == (int) offspring[j]) && (from[j] == DAD)) - { - check_list[offspring[j]]--; - offspring[j] = tour2[indx[k]]; - check_list[tour2[indx[k]]]++; - - found = 1; - failed[k] = -1; - mx_fail--; - } - - j++; - } - - } /* ... for */ - - } /* ... if */ - - -/* STEP 3 */ - - for (k = 1; k <= num_gene; k++) - { - - if (check_list[k] > 1) - { - i = 0; - - while (i < num_gene) - { - if ((offspring[i] == (Gene) k) && (from[i] == DAD)) - { - j = 1; - - while (j <= num_gene) - { - if (check_list[j] == 0) - { - offspring[i] = (Gene) j; - check_list[k]--; - check_list[j]++; - i = num_gene + 1; - j = i; - } - - j++; - } - - } /* ... if */ - - i++; - } /* end while */ - - } - } /* ... for */ - - pfree(failed); - pfree(from); - pfree(indx); - pfree(check_list); -} diff --git a/src/backend/optimizer/geqo/geqo_pool.c b/src/backend/optimizer/geqo/geqo_pool.c deleted file mode 100644 index 2c9826a9ab9..00000000000 --- a/src/backend/optimizer/geqo/geqo_pool.c +++ /dev/null @@ -1,243 +0,0 @@ -/*------------------------------------------------------------------------ - * - * geqo_pool.c - * Genetic Algorithm (GA) pool stuff - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * $Id: geqo_pool.c,v 1.20 2002/06/20 20:29:29 momjian Exp $ - * - *------------------------------------------------------------------------- - */ - -/* contributed by: - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - * Martin Utesch * Institute of Automatic Control * - = = University of Mining and Technology = - * utesch@aut.tu-freiberg.de * Freiberg, Germany * - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - */ - -/* -- parts of this are adapted from D. Whitley's Genitor algorithm -- */ - -#include "postgres.h" -#include "optimizer/geqo.h" -#include "optimizer/geqo_copy.h" -#include "optimizer/geqo_pool.h" -#include "optimizer/geqo_recombination.h" - - -static int compare(const void *arg1, const void *arg2); - -/* - * alloc_pool - * allocates memory for GA pool - */ -Pool * -alloc_pool(int pool_size, int string_length) -{ - Pool *new_pool; - Chromosome *chromo; - int i; - - /* pool */ - new_pool = (Pool *) palloc(sizeof(Pool)); - new_pool->size = (int) pool_size; - new_pool->string_length = (int) string_length; - - /* all chromosome */ - new_pool->data = (Chromosome *) palloc(pool_size * sizeof(Chromosome)); - - /* all gene */ - chromo = (Chromosome *) new_pool->data; /* vector of all chromos */ - for (i = 0; i < pool_size; i++) - chromo[i].string = palloc((string_length + 1) * sizeof(Gene)); - - return new_pool; -} - -/* - * free_pool - * deallocates memory for GA pool - */ -void -free_pool(Pool *pool) -{ - Chromosome *chromo; - int i; - - /* all gene */ - chromo = (Chromosome *) pool->data; /* vector of all chromos */ - for (i = 0; i < pool->size; i++) - pfree(chromo[i].string); - - /* all chromosome */ - pfree(pool->data); - - /* pool */ - pfree(pool); -} - -/* - * random_init_pool - * initialize genetic pool - */ -void -random_init_pool(Query *root, List *initial_rels, - Pool *pool, int strt, int stp) -{ - Chromosome *chromo = (Chromosome *) pool->data; - int i; - - for (i = strt; i < stp; i++) - { - init_tour(chromo[i].string, pool->string_length); - pool->data[i].worth = geqo_eval(root, initial_rels, - chromo[i].string, - pool->string_length); - } -} - -/* - * sort_pool - * sorts input pool according to worth, from smallest to largest - * - * maybe you have to change compare() for different ordering ... - */ -void -sort_pool(Pool *pool) -{ - qsort(pool->data, pool->size, sizeof(Chromosome), compare); -} - -/* - * compare - * static input function for pg_sort - * - * return values for sort from smallest to largest are prooved! - * don't change them! - */ -static int -compare(const void *arg1, const void *arg2) -{ - Chromosome chromo1 = *(Chromosome *) arg1; - Chromosome chromo2 = *(Chromosome *) arg2; - - if (chromo1.worth == chromo2.worth) - return 0; - else if (chromo1.worth > chromo2.worth) - return 1; - else - return -1; -} - -/* alloc_chromo - * allocates a chromosome and string space - */ -Chromosome * -alloc_chromo(int string_length) -{ - Chromosome *chromo; - - chromo = (Chromosome *) palloc(sizeof(Chromosome)); - chromo->string = (Gene *) palloc((string_length + 1) * sizeof(Gene)); - - return chromo; -} - -/* free_chromo - * deallocates a chromosome and string space - */ -void -free_chromo(Chromosome *chromo) -{ - pfree(chromo->string); - pfree(chromo); -} - -/* spread_chromo - * inserts a new chromosome into the pool, displacing worst gene in pool - * assumes best->worst = smallest->largest - */ -void -spread_chromo(Chromosome *chromo, Pool *pool) -{ - int top, - mid, - bot; - int i, - index; - Chromosome swap_chromo, - tmp_chromo; - - /* new chromo is so bad we can't use it */ - if (chromo->worth > pool->data[pool->size - 1].worth) - return; - - /* do a binary search to find the index of the new chromo */ - - top = 0; - mid = pool->size / 2; - bot = pool->size - 1; - index = -1; - - while (index == -1) - { - /* these 4 cases find a new location */ - - if (chromo->worth <= pool->data[top].worth) - index = top; - else if (chromo->worth == pool->data[mid].worth) - index = mid; - else if (chromo->worth == pool->data[bot].worth) - index = bot; - else if (bot - top <= 1) - index = bot; - - - /* - * these 2 cases move the search indices since a new location has - * not yet been found. - */ - - else if (chromo->worth < pool->data[mid].worth) - { - bot = mid; - mid = top + ((bot - top) / 2); - } - else - { /* (chromo->worth > pool->data[mid].worth) */ - top = mid; - mid = top + ((bot - top) / 2); - } - } /* ... while */ - - /* now we have index for chromo */ - - /* - * move every gene from index on down one position to make room for - * chromo - */ - - /* - * copy new gene into pool storage; always replace worst gene in pool - */ - - geqo_copy(&pool->data[pool->size - 1], chromo, pool->string_length); - - swap_chromo.string = pool->data[pool->size - 1].string; - swap_chromo.worth = pool->data[pool->size - 1].worth; - - for (i = index; i < pool->size; i++) - { - tmp_chromo.string = pool->data[i].string; - tmp_chromo.worth = pool->data[i].worth; - - pool->data[i].string = swap_chromo.string; - pool->data[i].worth = swap_chromo.worth; - - swap_chromo.string = tmp_chromo.string; - swap_chromo.worth = tmp_chromo.worth; - } -} diff --git a/src/backend/optimizer/geqo/geqo_px.c b/src/backend/optimizer/geqo/geqo_px.c deleted file mode 100644 index 0567cb11d25..00000000000 --- a/src/backend/optimizer/geqo/geqo_px.c +++ /dev/null @@ -1,107 +0,0 @@ -/*------------------------------------------------------------------------ -* -* geqo_px.c -* -* position crossover [PX] routines; -* PX operator according to Syswerda -* (The Genetic Algorithms Handbook, L Davis, ed) -* -* $Id: geqo_px.c,v 1.9 1999/07/16 04:59:12 momjian Exp $ -* -*------------------------------------------------------------------------- -*/ - -/* contributed by: - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - * Martin Utesch * Institute of Automatic Control * - = = University of Mining and Technology = - * utesch@aut.tu-freiberg.de * Freiberg, Germany * - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - */ - -/* the px algorithm is adopted from Genitor : */ -/*************************************************************/ -/* */ -/* Copyright (c) 1990 */ -/* Darrell L. Whitley */ -/* Computer Science Department */ -/* Colorado State University */ -/* */ -/* Permission is hereby granted to copy all or any part of */ -/* this program for free distribution. The author's name */ -/* and this copyright notice must be included in any copy. */ -/* */ -/*************************************************************/ - -#include "postgres.h" -#include "optimizer/geqo_random.h" -#include "optimizer/geqo_recombination.h" - - -/* px - * - * position crossover - */ -void -px(Gene *tour1, Gene *tour2, Gene *offspring, int num_gene, City *city_table) -{ - - int num_positions; - int i, - pos, - tour2_index, - offspring_index; - - /* initialize city table */ - for (i = 1; i <= num_gene; i++) - city_table[i].used = 0; - - /* choose random positions that will be inherited directly from parent */ - num_positions = geqo_randint(2 * num_gene / 3, num_gene / 3); - - /* choose random position */ - for (i = 0; i < num_positions; i++) - { - pos = geqo_randint(num_gene - 1, 0); - - offspring[pos] = tour1[pos]; /* transfer cities to child */ - city_table[(int) tour1[pos]].used = 1; /* mark city used */ - } - - tour2_index = 0; - offspring_index = 0; - - - /* px main part */ - - while (offspring_index < num_gene) - { - - /* next position in offspring filled */ - if (!city_table[(int) tour1[offspring_index]].used) - { - - /* next city in tour1 not used */ - if (!city_table[(int) tour2[tour2_index]].used) - { - - /* inherit from tour1 */ - offspring[offspring_index] = tour2[tour2_index]; - - tour2_index++; - offspring_index++; - } - else - { /* next city in tour2 has been used */ - tour2_index++; - } - - } - else - { /* next position in offspring is filled */ - offspring_index++; - } - - } - -} diff --git a/src/backend/optimizer/geqo/geqo_recombination.c b/src/backend/optimizer/geqo/geqo_recombination.c deleted file mode 100644 index 6f74ce94ebc..00000000000 --- a/src/backend/optimizer/geqo/geqo_recombination.c +++ /dev/null @@ -1,95 +0,0 @@ -/*------------------------------------------------------------------------ -* -* geqo_recombination.c -* misc recombination procedures -* -* $Id: geqo_recombination.c,v 1.11 1999/07/16 04:59:13 momjian Exp $ -* -*------------------------------------------------------------------------- -*/ - -/* contributed by: - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - * Martin Utesch * Institute of Automatic Control * - = = University of Mining and Technology = - * utesch@aut.tu-freiberg.de * Freiberg, Germany * - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - */ - -/* -- parts of this are adapted from D. Whitley's Genitor algorithm -- */ - -#include "postgres.h" -#include "optimizer/geqo_random.h" -#include "optimizer/geqo_recombination.h" - - -/* - * init_tour - * - * Randomly generates a legal "traveling salesman" tour - * (i.e. where each point is visited only once.) - * Essentially, this routine fills an array with all possible - * points on the tour and randomly chooses the 'next' city from - * this array. When a city is chosen, the array is shortened - * and the procedure repeated. - * - */ -void -init_tour(Gene *tour, int num_gene) -{ - Gene *tmp; - int remainder; - int next, - i; - - tmp = (Gene *) palloc(num_gene * sizeof(Gene)); - - for (i = 0; i < num_gene; i++) - { - tmp[i] = (Gene) i + 1; /* builds tours "1 - 2 - 3" etc. */ - } - - remainder = num_gene - 1; - - for (i = 0; i < num_gene; i++) - { - next = (int) geqo_randint(remainder, 0); /* choose city between 0 - * and remainder */ - tour[i] = tmp[next]; - tmp[next] = tmp[remainder]; - remainder--; - } - - pfree(tmp); -} - -/* alloc_city_table - * - * allocate memory for city table - * - */ -City * -alloc_city_table(int num_gene) -{ - City *city_table; - - /* - * palloc one extra location so that nodes numbered 1..n can be - * indexed directly; 0 will not be used - */ - - city_table = (City *) palloc((num_gene + 1) * sizeof(City)); - - return city_table; -} - -/* free_city_table - * - * deallocate memory of city table - * - */ -void -free_city_table(City *city_table) -{ - pfree(city_table); -} diff --git a/src/backend/optimizer/geqo/geqo_selection.c b/src/backend/optimizer/geqo/geqo_selection.c deleted file mode 100644 index ae4cbbf426f..00000000000 --- a/src/backend/optimizer/geqo/geqo_selection.c +++ /dev/null @@ -1,90 +0,0 @@ -/*------------------------------------------------------------------------- - * - * geqo_selection.c - * linear selection scheme for the genetic query optimizer - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * $Id: geqo_selection.c,v 1.13 2002/06/20 20:29:29 momjian Exp $ - * - *------------------------------------------------------------------------- - */ - -/* contributed by: - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - * Martin Utesch * Institute of Automatic Control * - = = University of Mining and Technology = - * utesch@aut.tu-freiberg.de * Freiberg, Germany * - =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= - */ - -/* this is adopted from D. Whitley's Genitor algorithm */ - -/*************************************************************/ -/* */ -/* Copyright (c) 1990 */ -/* Darrell L. Whitley */ -/* Computer Science Department */ -/* Colorado State University */ -/* */ -/* Permission is hereby granted to copy all or any part of */ -/* this program for free distribution. The author's name */ -/* and this copyright notice must be included in any copy. */ -/* */ -/*************************************************************/ - -#include <math.h> - -#include "postgres.h" -#include "optimizer/geqo_copy.h" -#include "optimizer/geqo_random.h" -#include "optimizer/geqo_selection.h" - -static int linear(int max, double bias); - -/* geqo_selection - * - * according to bias described by input parameters, - * second genes are selected from the pool - */ -void -geqo_selection(Chromosome *momma, Chromosome *daddy, Pool *pool, double bias) -{ - int first, - second; - - first = (int) linear(pool->size, bias); - second = (int) linear(pool->size, bias); - - if (pool->size > 1) - { - while (first == second) - second = (int) linear(pool->size, bias); - } - - geqo_copy(momma, &pool->data[first], pool->string_length); - geqo_copy(daddy, &pool->data[second], pool->string_length); -} - -/* linear - * generates random integer between 0 and input max number - * using input linear bias - * - * probability distribution function is: f(x) = bias - 2(bias - 1)x - * bias = (prob of first rule) / (prob of middle rule) - * - */ - -static int -linear(int pool_size, double bias) /* bias is y-intercept of linear - * distribution */ -{ - double index; /* index between 0 and pop_size */ - double max = (double) pool_size; - - index = max * (bias - sqrt((bias * bias) - 4.0 * (bias - 1.0) * geqo_rand())) - / 2.0 / (bias - 1.0); - - return (int) index; -} diff --git a/src/backend/optimizer/path/Makefile b/src/backend/optimizer/path/Makefile deleted file mode 100644 index 50428922ea3..00000000000 --- a/src/backend/optimizer/path/Makefile +++ /dev/null @@ -1,31 +0,0 @@ -#------------------------------------------------------------------------- -# -# Makefile-- -# Makefile for optimizer/path -# -# IDENTIFICATION -# $Header: /cvsroot/pgsql/src/backend/optimizer/path/Makefile,v 1.15 2000/08/31 16:10:09 petere Exp $ -# -#------------------------------------------------------------------------- - -subdir = src/backend/optimizer/path -top_builddir = ../../../.. -include $(top_builddir)/src/Makefile.global - -OBJS = allpaths.o clausesel.o costsize.o indxpath.o \ - joinpath.o joinrels.o orindxpath.o pathkeys.o tidpath.o - -all: SUBSYS.o - -SUBSYS.o: $(OBJS) - $(LD) $(LDREL) $(LDOUT) SUBSYS.o $(OBJS) - -depend dep: - $(CC) -MM $(CFLAGS) *.c >depend - -clean: - rm -f SUBSYS.o $(OBJS) - -ifeq (depend,$(wildcard depend)) -include depend -endif diff --git a/src/backend/optimizer/path/_deadcode/predmig.c b/src/backend/optimizer/path/_deadcode/predmig.c deleted file mode 100644 index 5ce4083ab56..00000000000 --- a/src/backend/optimizer/path/_deadcode/predmig.c +++ /dev/null @@ -1,810 +0,0 @@ -/*------------------------------------------------------------------------- - * - * predmig.c - * - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/_deadcode/Attic/predmig.c,v 1.15 2002/06/20 20:29:30 momjian Exp $ - * - *------------------------------------------------------------------------- - */ -/* -** DESCRIPTION -** Main Routines to handle Predicate Migration (i.e. correct optimization -** of queries with expensive functions.) -** -** The reasoning behind some of these algorithms is rather detailed. -** Have a look at Sequoia Tech Report 92/13 for more info. Also -** see Monma and Sidney's paper "Sequencing with Series-Parallel -** Precedence Constraints", in "Mathematics of Operations Research", -** volume 4 (1979), pp. 215-224. -** -** The main thing that this code does that wasn't handled in xfunc.c is -** it considers the possibility that two joins in a stream may not -** be ordered by ascending rank -- in such a scenario, it may be optimal -** to pullup more restrictions than we did via xfunc_try_pullup. -** -** This code in some sense generalizes xfunc_try_pullup; if you -** run postgres -x noprune, you'll turn off xfunc_try_pullup, and this -** code will do everything that xfunc_try_pullup would have, and maybe -** more. However, this results in no pruning, which may slow down the -** optimizer and/or cause the system to run out of memory. -** -- JMH, 11/13/92 -*/ - -#include "nodes/pg_list.h" -#include "nodes/nodes.h" -#include "nodes/primnodes.h" -#include "nodes/relation.h" -#include "optimizer/pathnode.h" -#include "optimizer/internal.h" -#include "optimizer/cost.h" -#include "optimizer/keys.h" -#include "optimizer/tlist.h" - -#define is_clause(node) (get_cinfo(node)) /* a stream node - * represents a clause - * (not a join) iff it has - * a non-NULL cinfo field */ - -static void xfunc_predmig(JoinPath pathnode, Stream streamroot, - Stream laststream, bool *progressp); -static bool xfunc_series_llel(Stream stream); -static bool xfunc_llel_chains(Stream root, Stream bottom); -static Stream xfunc_complete_stream(Stream stream); -static bool xfunc_prdmig_pullup(Stream origstream, Stream pullme, - JoinPath joinpath); -static void xfunc_form_groups(Stream root, Stream bottom); -static void xfunc_free_stream(Stream root); -static Stream xfunc_add_clauses(Stream current); -static void xfunc_setup_group(Stream node, Stream bottom); -static Stream xfunc_streaminsert(RestrictInfo restrictinfo, Stream current, - int clausetype); -static int xfunc_num_relids(Stream node); -static StreamPtr xfunc_get_downjoin(Stream node); -static StreamPtr xfunc_get_upjoin(Stream node); -static Stream xfunc_stream_qsort(Stream root, Stream bottom); -static int xfunc_stream_compare(void *arg1, void *arg2); -static bool xfunc_check_stream(Stream node); -static bool xfunc_in_stream(Stream node, Stream stream); - -/* ----------------- MAIN FUNCTIONS ------------------------ */ -/* -** xfunc_do_predmig -** wrapper for Predicate Migration. It calls xfunc_predmig until no -** more progress is made. -** return value says if any changes were ever made. -*/ -bool -xfunc_do_predmig(Path root) -{ - bool progress, - changed = false; - - if (is_join(root)) - do - { - progress = false; - Assert(IsA(root, JoinPath)); - xfunc_predmig((JoinPath) root, (Stream) NULL, (Stream) NULL, - &progress); - if (changed && progress) - elog(DEBUG, "Needed to do a second round of predmig!\n"); - if (progress) - changed = true; - } while (progress); - return changed; -} - - -/* - ** xfunc_predmig - ** The main routine for Predicate Migration. It traverses a join tree, - ** and for each root-to-leaf path in the plan tree it constructs a - ** "Stream", which it passes to xfunc_series_llel for optimization. - ** Destructively modifies the join tree (via predicate pullup). - */ -static void -xfunc_predmig(JoinPath pathnode, /* root of the join tree */ - Stream streamroot, - Stream laststream,/* for recursive calls -- these are the - * root of the stream under construction, - * and the lowest node created so far */ - bool *progressp) -{ - Stream newstream; - - /* - * * traverse the join tree dfs-style, constructing a stream as you - * go. * When you hit a scan node, pass the stream off to - * xfunc_series_llel. - */ - - /* sanity check */ - if ((!streamroot && laststream) || - (streamroot && !laststream)) - elog(ERROR, "called xfunc_predmig with bad inputs"); - if (streamroot) - Assert(xfunc_check_stream(streamroot)); - - /* add path node to stream */ - newstream = RMakeStream(); - if (!streamroot) - streamroot = newstream; - set_upstream(newstream, (StreamPtr) laststream); - if (laststream) - set_downstream(laststream, (StreamPtr) newstream); - set_downstream(newstream, (StreamPtr) NULL); - set_pathptr(newstream, (pathPtr) pathnode); - set_cinfo(newstream, (RestrictInfo) NULL); - set_clausetype(newstream, XFUNC_UNKNOWN); - - /* base case: we're at a leaf, call xfunc_series_llel */ - if (!is_join(pathnode)) - { - /* form a fleshed-out copy of the stream */ - Stream fullstream = xfunc_complete_stream(streamroot); - - /* sort it via series-llel */ - if (xfunc_series_llel(fullstream)) - *progressp = true; - - /* free up the copy */ - xfunc_free_stream(fullstream); - } - else - { - /* visit left child */ - xfunc_predmig((JoinPath) get_outerjoinpath(pathnode), - streamroot, newstream, progressp); - - /* visit right child */ - xfunc_predmig((JoinPath) get_innerjoinpath(pathnode), - streamroot, newstream, progressp); - } - - /* remove this node */ - if (get_upstream(newstream)) - set_downstream((Stream) get_upstream(newstream), (StreamPtr) NULL); - pfree(newstream); -} - -/* - ** xfunc_series_llel - ** A flavor of Monma and Sidney's Series-Parallel algorithm. - ** Traverse stream downwards. When you find a node with restrictions on it, - ** call xfunc_llel_chains on the substream from root to that node. - */ -static bool -xfunc_series_llel(Stream stream) -{ - Stream temp, - next; - bool progress = false; - - for (temp = stream; temp != (Stream) NULL; temp = next) - { - next = (Stream) xfunc_get_downjoin(temp); - - /* - * * if there are restrictions/secondary join clauses above this * - * node, call xfunc_llel_chains - */ - if (get_upstream(temp) && is_clause((Stream) get_upstream(temp))) - if (xfunc_llel_chains(stream, temp)) - progress = true; - } - return progress; -} - -/* - ** xfunc_llel_chains - ** A flavor of Monma and Sidney's Parallel Chains algorithm. - ** Given a stream which has been well-ordered except for its lowermost - ** restrictions/2-ary joins, pull up the restrictions/2-arys as appropriate. - ** What that means here is to form groups in the chain above the lowest - ** join node above bottom inclusive, and then take all the restrictions - ** following bottom, and try to pull them up as far as possible. - */ -static bool -xfunc_llel_chains(Stream root, Stream bottom) -{ - bool progress = false; - Stream origstream; - Stream tmpstream, - pathstream; - Stream rootcopy = root; - - Assert(xfunc_check_stream(root)); - - /* xfunc_prdmig_pullup will need an unmodified copy of the stream */ - origstream = (Stream) copyObject((Node) root); - - /* form groups among ill-ordered nodes */ - xfunc_form_groups(root, bottom); - - /* sort chain by rank */ - Assert(xfunc_in_stream(bottom, root)); - rootcopy = xfunc_stream_qsort(root, bottom); - - /* - * * traverse sorted stream -- if any restriction has moved above a - * join, * we must pull it up in the plan. That is, make plan tree * - * reflect order of sorted stream. - */ - for (tmpstream = rootcopy, - pathstream = (Stream) xfunc_get_downjoin(rootcopy); - tmpstream != (Stream) NULL && pathstream != (Stream) NULL; - tmpstream = (Stream) get_downstream(tmpstream)) - { - if (is_clause(tmpstream) - && get_pathptr(pathstream) != get_pathptr(tmpstream)) - { - /* - * * If restriction moved above a Join after sort, we pull it * - * up in the join plan. * If restriction moved down, we - * ignore it. * This is because Joey's Sequoia paper proves - * that * restrictions should never move down. If this * one - * were moved down, it would violate "semantic correctness", * - * i.e. it would be lower than the attributes it references. - */ - Assert(xfunc_num_relids(pathstream) > xfunc_num_relids(tmpstream)); - progress = xfunc_prdmig_pullup(origstream, tmpstream, - (JoinPath) get_pathptr(pathstream)); - } - if (get_downstream(tmpstream)) - pathstream = (Stream) xfunc_get_downjoin((Stream) get_downstream(tmpstream)); - } - - /* free up origstream */ - xfunc_free_stream(origstream); - return progress; -} - -/* - ** xfunc_complete_stream - ** Given a stream composed of join nodes only, make a copy containing the - ** join nodes along with the associated restriction nodes. - */ -static Stream -xfunc_complete_stream(Stream stream) -{ - Stream tmpstream, - copystream, - curstream = (Stream) NULL; - - copystream = (Stream) copyObject((Node) stream); - Assert(xfunc_check_stream(copystream)); - - curstream = copystream; - Assert(!is_clause(curstream)); - - /* curstream = (Stream)xfunc_get_downjoin(curstream); */ - - while (curstream != (Stream) NULL) - { - xfunc_add_clauses(curstream); - curstream = (Stream) xfunc_get_downjoin(curstream); - } - - /* find top of stream and return it */ - for (tmpstream = copystream; get_upstream(tmpstream) != (StreamPtr) NULL; - tmpstream = (Stream) get_upstream(tmpstream)) - /* no body in for loop */ ; - - return tmpstream; -} - -/* - ** xfunc_prdmig_pullup - ** pullup a clause in a path above joinpath. Since the JoinPath tree - ** doesn't have upward pointers, it's difficult to deal with. Thus we - ** require the original stream, which maintains pointers to all the path - ** nodes. We use the original stream to find out what joins are - ** above the clause. - */ -static bool -xfunc_prdmig_pullup(Stream origstream, Stream pullme, JoinPath joinpath) -{ - RestrictInfo restrictinfo = get_cinfo(pullme); - bool progress = false; - Stream upjoin, - orignode, - temp; - int whichchild; - - /* find node in origstream that contains clause */ - for (orignode = origstream; - orignode != (Stream) NULL - && get_cinfo(orignode) != restrictinfo; - orignode = (Stream) get_downstream(orignode)) - /* empty body in for loop */ ; - if (!orignode) - elog(ERROR, "Didn't find matching node in original stream"); - - - /* pull up this node as far as it should go */ - for (upjoin = (Stream) xfunc_get_upjoin(orignode); - upjoin != (Stream) NULL - && (JoinPath) get_pathptr((Stream) xfunc_get_downjoin(upjoin)) - != joinpath; - upjoin = (Stream) xfunc_get_upjoin(upjoin)) - { -#ifdef DEBUG - elog(DEBUG, "pulling up in xfunc_predmig_pullup!"); -#endif - /* move clause up in path */ - if (get_pathptr((Stream) get_downstream(upjoin)) - == (pathPtr) get_outerjoinpath((JoinPath) get_pathptr(upjoin))) - whichchild = OUTER; - else - whichchild = INNER; - restrictinfo = xfunc_pullup((Path) get_pathptr((Stream) get_downstream(upjoin)), - (JoinPath) get_pathptr(upjoin), - restrictinfo, - whichchild, - get_clausetype(orignode)); - set_pathptr(pullme, get_pathptr(upjoin)); - /* pullme has been moved into locrestrictinfo */ - set_clausetype(pullme, XFUNC_LOCPRD); - - /* - * * xfunc_pullup makes new path nodes for children of * - * get_pathptr(current). We must modify the stream nodes to point * - * to these path nodes - */ - if (whichchild == OUTER) - { - for (temp = (Stream) get_downstream(upjoin); is_clause(temp); - temp = (Stream) get_downstream(temp)) - set_pathptr - (temp, (pathPtr) - get_outerjoinpath((JoinPath) get_pathptr(upjoin))); - set_pathptr - (temp, - (pathPtr) get_outerjoinpath((JoinPath) get_pathptr(upjoin))); - } - else - { - for (temp = (Stream) get_downstream(upjoin); is_clause(temp); - temp = (Stream) get_downstream(temp)) - set_pathptr - (temp, (pathPtr) - get_innerjoinpath((JoinPath) get_pathptr(upjoin))); - set_pathptr - (temp, (pathPtr) - get_innerjoinpath((JoinPath) get_pathptr(upjoin))); - } - progress = true; - } - if (!progress) - elog(DEBUG, "didn't succeed in pulling up in xfunc_prdmig_pullup"); - return progress; -} - -/* - ** xfunc_form_groups - ** A group is a pair of stream nodes a,b such that a is constrained to - ** precede b (for instance if a and b are both joins), but rank(a) > rank(b). - ** In such a situation, Monma and Sidney prove that no clauses should end - ** up between a and b, and therefore we may treat them as a group, with - ** selectivity equal to the product of their selectivities, and cost - ** equal to the cost of the first plus the selectivity of the first times the - ** cost of the second. We define each node to be in a group by itself, - ** and then repeatedly find adjacent groups which are ordered by descending - ** rank, and make larger groups. You know that two adjacent nodes are in a - ** group together if the lower has groupup set to true. They will both have - ** the same groupcost and groupsel (since they're in the same group!) - */ -static void -xfunc_form_groups(Query *queryInfo, Stream root, Stream bottom) -{ - Stream temp, - parent; - int lowest = xfunc_num_relids((Stream) xfunc_get_upjoin(bottom)); - bool progress; - LispValue primjoin; - int whichchild; - - if (!lowest) - return; /* no joins in stream, so no groups */ - - /* initialize groups to be single nodes */ - for (temp = root; - temp != (Stream) NULL && temp != bottom; - temp = (Stream) get_downstream(temp)) - { - /* if a Join node */ - if (!is_clause(temp)) - { - if (get_pathptr((Stream) get_downstream(temp)) - == (pathPtr) get_outerjoinpath((JoinPath) get_pathptr(temp))) - whichchild = OUTER; - else - whichchild = INNER; - set_groupcost(temp, - xfunc_join_expense((JoinPath) get_pathptr(temp), - whichchild)); - if (primjoin = xfunc_primary_join((JoinPath) get_pathptr(temp))) - { - set_groupsel(temp, - compute_clause_selec(queryInfo, - primjoin, NIL)); - } - else - set_groupsel(temp, 1.0); - } - else -/* a restriction, or 2-ary join pred */ - { - set_groupcost(temp, - xfunc_expense(queryInfo, - get_clause(get_cinfo(temp)))); - set_groupsel(temp, - compute_clause_selec(queryInfo, - get_clause(get_cinfo(temp)), - NIL)); - } - set_groupup(temp, false); - } - - /* make passes upwards, forming groups */ - do - { - progress = false; - for (temp = (Stream) get_upstream(bottom); - temp != (Stream) NULL; - temp = (Stream) get_upstream(temp)) - { - /* check for grouping with node upstream */ - if (!get_groupup(temp) && /* not already grouped */ - (parent = (Stream) get_upstream(temp)) != (Stream) NULL && - /* temp is a join or temp is the top of a group */ - (is_join((Path) get_pathptr(temp)) || - get_downstream(temp) && - get_groupup((Stream) get_downstream(temp))) && - get_grouprank(parent) < get_grouprank(temp)) - { - progress = true; /* we formed a new group */ - set_groupup(temp, true); - set_groupcost(temp, - get_groupcost(temp) + - get_groupsel(temp) * get_groupcost(parent)); - set_groupsel(temp, get_groupsel(temp) * get_groupsel(parent)); - - /* fix costs and sels of all members of group */ - xfunc_setup_group(temp, bottom); - } - } - } while (progress); -} - - -/* ------------------- UTILITY FUNCTIONS ------------------------- */ - -/* - ** xfunc_free_stream - ** walk down a stream and pfree it - */ -static void -xfunc_free_stream(Stream root) -{ - Stream cur, - next; - - Assert(xfunc_check_stream(root)); - - if (root != (Stream) NULL) - for (cur = root; cur != (Stream) NULL; cur = next) - { - next = (Stream) get_downstream(cur); - pfree(cur); - } -} - -/* - ** xfunc_add<_clauses - ** find any clauses above current, and insert them into stream as - ** appropriate. Return uppermost clause inserted, or current if none. - */ -static Stream -xfunc_add_clauses(Stream current) -{ - Stream topnode = current; - LispValue temp; - LispValue primjoin; - - /* first add in the local clauses */ - foreach(temp, get_loc_restrictinfo((Path) get_pathptr(current))) - { - topnode = xfunc_streaminsert((RestrictInfo) lfirst(temp), topnode, - XFUNC_LOCPRD); - } - - /* and add in the join clauses */ - if (IsA(get_pathptr(current), JoinPath)) - { - primjoin = xfunc_primary_join((JoinPath) get_pathptr(current)); - foreach(temp, get_pathrestrictinfo((JoinPath) get_pathptr(current))) - { - if (!equal(get_clause((RestrictInfo) lfirst(temp)), primjoin)) - topnode = xfunc_streaminsert((RestrictInfo) lfirst(temp), topnode, - XFUNC_JOINPRD); - } - } - return topnode; -} - - -/* - ** xfunc_setup_group - ** find all elements of stream that are grouped with node and are above - ** bottom, and set their groupcost and groupsel to be the same as node's. - */ -static void -xfunc_setup_group(Stream node, Stream bottom) -{ - Stream temp; - - if (node != bottom) - /* traverse downwards */ - for (temp = (Stream) get_downstream(node); - temp != (Stream) NULL && temp != bottom; - temp = (Stream) get_downstream(temp)) - { - if (!get_groupup(temp)) - break; - else - { - set_groupcost(temp, get_groupcost(node)); - set_groupsel(temp, get_groupsel(node)); - } - } - - /* traverse upwards */ - for (temp = (Stream) get_upstream(node); temp != (Stream) NULL; - temp = (Stream) get_upstream(temp)) - { - if (!get_groupup((Stream) get_downstream(temp))) - break; - else - { - set_groupcost(temp, get_groupcost(node)); - set_groupsel(temp, get_groupsel(node)); - } - } -} - - -/* - ** xfunc_streaminsert - ** Make a new Stream node to hold clause, and insert it above current. - ** Return new node. - */ -static Stream -xfunc_streaminsert(RestrictInfo restrictinfo, - Stream current, - int clausetype) /* XFUNC_LOCPRD or XFUNC_JOINPRD */ -{ - Stream newstream = RMakeStream(); - - set_upstream(newstream, get_upstream(current)); - if (get_upstream(current)) - set_downstream((Stream) (get_upstream(current)), (StreamPtr) newstream); - set_upstream(current, (StreamPtr) newstream); - set_downstream(newstream, (StreamPtr) current); - set_pathptr(newstream, get_pathptr(current)); - set_cinfo(newstream, restrictinfo); - set_clausetype(newstream, clausetype); - return newstream; -} - -/* - ** Given a Stream node, find the number of relids referenced in the pathnode - ** associated with the stream node. The number of relids gives a unique - ** ordering on the joins in a stream, which we use to compare the height of - ** join nodes. - */ -static int -xfunc_num_relids(Stream node) -{ - if (!node || !IsA(get_pathptr(node), JoinPath)) - return 0; - else - return (length - (get_relids(get_parent((JoinPath) get_pathptr(node))))); -} - -/* - ** xfunc_get_downjoin - ** Given a stream node, find the next lowest node which points to a - ** join predicate or a scan node. - */ -static StreamPtr -xfunc_get_downjoin(Stream node) -{ - Stream temp; - - if (!is_clause(node)) /* if this is a join */ - node = (Stream) get_downstream(node); - for (temp = node; temp && is_clause(temp); - temp = (Stream) get_downstream(temp)) - /* empty body in for loop */ ; - - return (StreamPtr) temp; -} - -/* - ** xfunc_get_upjoin - ** same as above, but upwards. - */ -static StreamPtr -xfunc_get_upjoin(Stream node) -{ - Stream temp; - - if (!is_clause(node)) /* if this is a join */ - node = (Stream) get_upstream(node); - for (temp = node; temp && is_clause(temp); - temp = (Stream) get_upstream(temp)) - /* empty body in for loop */ ; - - return (StreamPtr) temp; -} - -/* - ** xfunc_stream_qsort - ** Given a stream, sort by group rank the elements in the stream from the - ** node "bottom" up. DESTRUCTIVELY MODIFIES STREAM! Returns new root. - */ -static Stream -xfunc_stream_qsort(Stream root, Stream bottom) -{ - int i; - size_t num; - Stream *nodearray, - output; - Stream tmp; - - /* find size of list */ - for (num = 0, tmp = root; tmp != bottom; - tmp = (Stream) get_downstream(tmp)) - num++; - if (num <= 1) - return root; - - /* copy elements of the list into an array */ - nodearray = (Stream *) palloc(num * sizeof(Stream)); - - for (tmp = root, i = 0; tmp != bottom; - tmp = (Stream) get_downstream(tmp), i++) - nodearray[i] = tmp; - - /* sort the array */ - qsort(nodearray, num, sizeof(LispValue), xfunc_stream_compare); - - /* paste together the array elements */ - output = nodearray[num - 1]; - set_upstream(output, (StreamPtr) NULL); - for (i = num - 2; i >= 0; i--) - { - set_downstream(nodearray[i + 1], (StreamPtr) nodearray[i]); - set_upstream(nodearray[i], (StreamPtr) nodearray[i + 1]); - } - set_downstream(nodearray[0], (StreamPtr) bottom); - if (bottom) - set_upstream(bottom, (StreamPtr) nodearray[0]); - - Assert(xfunc_check_stream(output)); - return output; -} - -/* - ** xfunc_stream_compare - ** comparison function for xfunc_stream_qsort. - ** Compare nodes by group rank. If group ranks are equal, ensure that - ** join nodes appear in same order as in plan tree. - */ -static int -xfunc_stream_compare(void *arg1, void *arg2) -{ - Stream stream1 = *(Stream *) arg1; - Stream stream2 = *(Stream *) arg2; - Cost rank1, - rank2; - - rank1 = get_grouprank(stream1); - rank2 = get_grouprank(stream2); - - if (rank1 > rank2) - return 1; - else if (rank1 < rank2) - return -1; - else - { - if (is_clause(stream1) && is_clause(stream2)) - return 0; /* doesn't matter what order if both are - * restrictions */ - else if (!is_clause(stream1) && !is_clause(stream2)) - { - if (xfunc_num_relids(stream1) < xfunc_num_relids(stream2)) - return -1; - else - return 1; - } - else if (is_clause(stream1) && !is_clause(stream2)) - { - if (xfunc_num_relids(stream1) == xfunc_num_relids(stream2)) - /* stream1 is a restriction over stream2 */ - return 1; - else - return -1; - } - else if (!is_clause(stream1) && is_clause(stream2)) - { - /* stream2 is a restriction over stream1: never push down */ - return -1; - } - } -} - -/* ------------------ DEBUGGING ROUTINES ---------------------------- */ - -/* - ** Make sure all pointers in stream make sense. Make sure no joins are - ** out of order. - */ -static bool -xfunc_check_stream(Stream node) -{ - Stream temp; - int numrelids, - tmp; - - /* set numrelids higher than max */ - if (!is_clause(node)) - numrelids = xfunc_num_relids(node) + 1; - else if (xfunc_get_downjoin(node)) - numrelids = xfunc_num_relids((Stream) xfunc_get_downjoin(node)) + 1; - else - numrelids = 1; - - for (temp = node; get_downstream(temp); temp = (Stream) get_downstream(temp)) - { - if ((Stream) get_upstream((Stream) get_downstream(temp)) != temp) - { - elog(ERROR, "bad pointers in stream"); - return false; - } - if (!is_clause(temp)) - { - if ((tmp = xfunc_num_relids(temp)) >= numrelids) - { - elog(ERROR, "Joins got reordered!"); - return false; - } - numrelids = tmp; - } - } - - return true; -} - -/* - ** xfunc_in_stream - ** check if node is in stream - */ -static bool -xfunc_in_stream(Stream node, Stream stream) -{ - Stream temp; - - for (temp = stream; temp; temp = (Stream) get_downstream(temp)) - if (temp == node) - return 1; - return 0; -} diff --git a/src/backend/optimizer/path/_deadcode/xfunc.c b/src/backend/optimizer/path/_deadcode/xfunc.c deleted file mode 100644 index 80087652c79..00000000000 --- a/src/backend/optimizer/path/_deadcode/xfunc.c +++ /dev/null @@ -1,1479 +0,0 @@ -/*------------------------------------------------------------------------- - * - * xfunc.c - * Utility routines to handle expensive function optimization. - * Includes xfunc_trypullup(), which attempts early pullup of predicates - * to allow for maximal pruning. - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/_deadcode/Attic/xfunc.c,v 1.19 2002/06/20 20:29:30 momjian Exp $ - * - *------------------------------------------------------------------------- - */ -#include <math.h> - -#ifdef HAVE_VALUES_H -#include <values.h> -#endif - -#include "postgres.h" - -#include "access/heapam.h" -#include "access/htup.h" -#include "catalog/pg_language.h" -#include "catalog/pg_proc.h" -#include "catalog/pg_type.h" -#include "lib/lispsort.h" -#include "nodes/nodes.h" -#include "nodes/pg_list.h" -#include "nodes/primnodes.h" -#include "nodes/relation.h" -#include "optimizer/clauses.h" -#include "optimizer/cost.h" -#include "optimizer/internal.h" -#include "optimizer/keys.h" -#include "optimizer/pathnode.h" -#include "optimizer/tlist.h" -#include "storage/buf_internals.h" -#include "tcop/dest.h" -#include "utils/syscache.h" - -#define ever ; 1 ; - -/* local funcs */ -static int xfunc_card_unreferenced(Query *queryInfo, - Expr *clause, Relids referenced); - -*/ - -/* -** xfunc_trypullup -** Preliminary pullup of predicates, to allow for maximal pruning. -** Given a relation, check each of its paths and see if you can -** pullup clauses from its inner and outer. -*/ - -void -xfunc_trypullup(RelOptInfo rel) -{ - LispValue y; /* list ptr */ - RestrictInfo maxcinfo; /* The RestrictInfo to pull up, as - * calculated by xfunc_shouldpull() */ - JoinPath curpath; /* current path in list */ - int progress; /* has progress been made this time - * through? */ - int clausetype; - - do - { - progress = false; /* no progress yet in this iteration */ - foreach(y, get_pathlist(rel)) - { - curpath = (JoinPath) lfirst(y); - - /* - * * for each operand, attempt to pullup predicates until - * first * failure. - */ - for (ever) - { - /* No, the following should NOT be '==' !! */ - if (clausetype = xfunc_shouldpull((Path) get_innerjoinpath(curpath), - curpath, INNER, &maxcinfo)) - { - - xfunc_pullup((Path) get_innerjoinpath(curpath), - curpath, maxcinfo, INNER, clausetype); - progress = true; - } - else - break; - } - for (ever) - { - - /* No, the following should NOT be '==' !! */ - if (clausetype = xfunc_shouldpull((Path) get_outerjoinpath(curpath), - curpath, OUTER, &maxcinfo)) - { - - xfunc_pullup((Path) get_outerjoinpath(curpath), - curpath, maxcinfo, OUTER, clausetype); - progress = true; - } - else - break; - } - - /* - * * make sure the unpruneable flag bubbles up, i.e. * if - * anywhere below us in the path pruneable is false, * then - * pruneable should be false here - */ - if (get_pruneable(get_parent(curpath)) && - (!get_pruneable(get_parent - ((Path) get_innerjoinpath(curpath))) || - !get_pruneable(get_parent((Path) - get_outerjoinpath(curpath))))) - { - - set_pruneable(get_parent(curpath), false); - progress = true; - } - } - } while (progress); -} - -/* - ** xfunc_shouldpull - ** find clause with highest rank, and decide whether to pull it up - ** from child to parent. Currently we only pullup secondary join clauses - ** that are in the pathrestrictinfo. Secondary hash and sort clauses are - ** left where they are. - ** If we find an expensive function but decide *not* to pull it up, - ** we'd better set the unpruneable flag. -- JMH, 11/11/92 - ** - ** Returns: 0 if nothing left to pullup - ** XFUNC_LOCPRD if a local predicate is to be pulled up - ** XFUNC_JOINPRD if a secondary join predicate is to be pulled up - */ -int -xfunc_shouldpull(Query *queryInfo, - Path childpath, - JoinPath parentpath, - int whichchild, - RestrictInfo *maxcinfopt) /* Out: pointer to clause - * to pullup */ -{ - LispValue clauselist, - tmplist; /* lists of clauses */ - RestrictInfo maxcinfo; /* clause to pullup */ - LispValue primjoinclause /* primary join clause */ - = xfunc_primary_join(parentpath); - Cost tmprank, - maxrank = (-1 * MAXFLOAT); /* ranks of clauses */ - Cost joinselec = 0; /* selectivity of the join predicate */ - Cost joincost = 0; /* join cost + primjoinclause cost */ - int retval = XFUNC_LOCPRD; - - clauselist = get_loc_restrictinfo(childpath); - - if (clauselist != LispNil) - { - /* find local predicate with maximum rank */ - for (tmplist = clauselist, - maxcinfo = (RestrictInfo) lfirst(tmplist), - maxrank = xfunc_rank(get_clause(maxcinfo)); - tmplist != LispNil; - tmplist = lnext(tmplist)) - { - - if ((tmprank = xfunc_rank(get_clause((RestrictInfo) lfirst(tmplist)))) - > maxrank) - { - maxcinfo = (RestrictInfo) lfirst(tmplist); - maxrank = tmprank; - } - } - } - - /* - * * If child is a join path, and there are multiple join clauses, * - * see if any join clause has even higher rank than the highest * - * local predicate - */ - if (is_join(childpath) && xfunc_num_join_clauses((JoinPath) childpath) > 1) - for (tmplist = get_pathrestrictinfo((JoinPath) childpath); - tmplist != LispNil; - tmplist = lnext(tmplist)) - { - - if (tmplist != LispNil && - (tmprank = xfunc_rank(get_clause((RestrictInfo) lfirst(tmplist)))) - > maxrank) - { - maxcinfo = (RestrictInfo) lfirst(tmplist); - maxrank = tmprank; - retval = XFUNC_JOINPRD; - } - } - if (maxrank == (-1 * MAXFLOAT)) /* no expensive clauses */ - return 0; - - /* - * * Pullup over join if clause is higher rank than join, or if * join - * is nested loop and current path is inner child (note that * - * restrictions on the inner of a nested loop don't buy you anything - * -- * you still have to scan the entire inner relation each time). * - * Note that the cost of a secondary join clause is only what's * - * calculated by xfunc_expense(), since the actual joining * (i.e. the - * usual path_cost) is paid for by the primary join clause. - */ - if (primjoinclause != LispNil) - { - joinselec = compute_clause_selec(queryInfo, primjoinclause, LispNil); - joincost = xfunc_join_expense(parentpath, whichchild); - - if (XfuncMode == XFUNC_PULLALL || - (XfuncMode != XFUNC_WAIT && - ((joincost != 0 && - (maxrank = xfunc_rank(get_clause(maxcinfo))) > - ((joinselec - 1.0) / joincost)) - || (joincost == 0 && joinselec < 1) - || (!is_join(childpath) - && (whichchild == INNER) - && IsA(parentpath, NestPath) - &&!IsA(parentpath, HashPath) - &&!IsA(parentpath, MergePath))))) - { - - *maxcinfopt = maxcinfo; - return retval; - - } - else if (maxrank != -(MAXFLOAT)) - { - /* - * * we've left an expensive restriction below a join. Since * - * we may pullup this restriction in predmig.c, we'd best * - * set the RelOptInfo of this join to be unpruneable - */ - set_pruneable(get_parent(parentpath), false); - /* and fall through */ - } - } - return 0; -} - - -/* - ** xfunc_pullup - ** move clause from child pathnode to parent pathnode. This operation - ** makes the child pathnode produce a larger relation than it used to. - ** This means that we must construct a new RelOptInfo just for the childpath, - ** although this RelOptInfo will not be added to the list of Rels to be joined up - ** in the query; it's merely a parent for the new childpath. - ** We also have to fix up the path costs of the child and parent. - ** - ** Now returns a pointer to the new pulled-up RestrictInfo. -- JMH, 11/18/92 - */ -RestrictInfo -xfunc_pullup(Query *queryInfo, - Path childpath, - JoinPath parentpath, - RestrictInfo cinfo, /* clause to pull up */ - int whichchild, /* whether child is INNER or OUTER of join */ - int clausetype) /* whether clause to pull is join or local */ -{ - Path newkid; - RelOptInfo newrel; - Cost pulled_selec; - Cost cost; - RestrictInfo newinfo; - - /* remove clause from childpath */ - newkid = (Path) copyObject((Node) childpath); - if (clausetype == XFUNC_LOCPRD) - { - set_locrestrictinfo(newkid, - xfunc_LispRemove((LispValue) cinfo, - (List) get_loc_restrictinfo(newkid))); - } - else - { - set_pathrestrictinfo - ((JoinPath) newkid, - xfunc_LispRemove((LispValue) cinfo, - (List) get_pathrestrictinfo((JoinPath) newkid))); - } - - /* - * * give the new child path its own RelOptInfo node that reflects the * - * lack of the pulled-up predicate - */ - pulled_selec = compute_clause_selec(queryInfo, - get_clause(cinfo), LispNil); - xfunc_copyrel(get_parent(newkid), &newrel); - set_parent(newkid, newrel); - set_pathlist(newrel, makeList1(newkid)); - set_unorderedpath(newrel, (PathPtr) newkid); - set_cheapestpath(newrel, (PathPtr) newkid); - set_size(newrel, - (Count) ((Cost) get_size(get_parent(childpath)) / pulled_selec)); - - /* - * * fix up path cost of newkid. To do this we subtract away all the * - * xfunc_costs of childpath, then recompute the xfunc_costs of newkid - */ - cost = get_path_cost(newkid) - xfunc_get_path_cost(childpath); - Assert(cost >= 0); - set_path_cost(newkid, cost); - cost = get_path_cost(newkid) + xfunc_get_path_cost(newkid); - set_path_cost(newkid, cost); - - /* - * * We copy the cinfo, since it may appear in other plans, and we're - * going * to munge it. -- JMH, 7/22/92 - */ - newinfo = (RestrictInfo) copyObject((Node) cinfo); - - /* - * * Fix all vars in the clause * to point to the right varno and - * varattno in parentpath - */ - xfunc_fixvars(get_clause(newinfo), newrel, whichchild); - - /* add clause to parentpath, and fix up its cost. */ - set_locrestrictinfo(parentpath, - lispCons((LispValue) newinfo, - (LispValue) get_loc_restrictinfo(parentpath))); - /* put new childpath into the path tree */ - if (whichchild == INNER) - set_innerjoinpath(parentpath, (pathPtr) newkid); - else - set_outerjoinpath(parentpath, (pathPtr) newkid); - - /* - * * recompute parentpath cost from scratch -- the cost * of the join - * method has changed - */ - cost = xfunc_total_path_cost(parentpath); - set_path_cost(parentpath, cost); - - return newinfo; -} - -/* - ** calculate (selectivity-1)/cost. - */ -Cost -xfunc_rank(Query *queryInfo, LispValue clause) -{ - Cost selec = compute_clause_selec(queryInfo, clause, LispNil); - Cost cost = xfunc_expense(queryInfo, clause); - - if (cost == 0) - if (selec > 1) - return MAXFLOAT; - else - return -(MAXFLOAT); - return (selec - 1) / cost; -} - -/* - ** Find the "global" expense of a clause; i.e. the local expense divided - ** by the cardinalities of all the base relations of the query that are *not* - ** referenced in the clause. - */ -Cost -xfunc_expense(Query *queryInfo, clause) -LispValue clause; -{ - Cost cost = xfunc_local_expense(clause); - - if (cost) - { - Count card = xfunc_card_unreferenced(queryInfo, clause, LispNil); - - if (card) - cost /= card; - } - - return cost; -} - -/* - ** xfunc_join_expense - ** Find global expense of a join clause - */ -Cost -xfunc_join_expense(Query *queryInfo, JoinPath path, int whichchild) -{ - LispValue primjoinclause = xfunc_primary_join(path); - - /* - * * the second argument to xfunc_card_unreferenced reflects all the * - * relations involved in the join clause, i.e. all the relids in the - * RelOptInfo * of the join clause - */ - Count card = 0; - Cost cost = xfunc_expense_per_tuple(path, whichchild); - - card = xfunc_card_unreferenced(queryInfo, - primjoinclause, - get_relids(get_parent(path))); - if (primjoinclause) - cost += xfunc_local_expense(primjoinclause); - - if (card) - cost /= card; - - return cost; -} - -/* - ** Recursively find the per-tuple expense of a clause. See - ** xfunc_func_expense for more discussion. - */ -Cost -xfunc_local_expense(LispValue clause) -{ - Cost cost = 0; /* running expense */ - LispValue tmpclause; - - /* First handle the base case */ - if (IsA(clause, Const) ||IsA(clause, Var) ||IsA(clause, Param)) - return 0; - /* now other stuff */ - else if (IsA(clause, Iter)) - /* Too low. Should multiply by the expected number of iterations. */ - return xfunc_local_expense(get_iterexpr((Iter) clause)); - else if (IsA(clause, ArrayRef)) - return xfunc_local_expense(get_refexpr((ArrayRef) clause)); - else if (fast_is_clause(clause)) - return (xfunc_func_expense((LispValue) get_op(clause), - (LispValue) get_opargs(clause))); - else if (fast_is_funcclause(clause)) - return (xfunc_func_expense((LispValue) get_function(clause), - (LispValue) get_funcargs(clause))); - else if (fast_not_clause(clause)) - return xfunc_local_expense(lsecond(clause)); - else if (fast_or_clause(clause) || fast_and_clause(clause)) - { - /* find cost of evaluating each disjunct */ - for (tmpclause = lnext(clause); tmpclause != LispNil; - tmpclause = lnext(tmpclause)) - cost += xfunc_local_expense(lfirst(tmpclause)); - return cost; - } - else - { - elog(ERROR, "Clause node of undetermined type"); - return -1; - } -} - -/* - ** xfunc_func_expense - ** given a Func or Oper and its args, find its expense. - ** Note: in Stonebraker's SIGMOD '91 paper, he uses a more complicated metric - ** than the one here. We can ignore the expected number of tuples for - ** our calculations; we just need the per-tuple expense. But he also - ** proposes components to take into account the costs of accessing disk and - ** archive. We didn't adopt that scheme here; eventually the vacuum - ** cleaner should be able to tell us what percentage of bytes to find on - ** which storage level, and that should be multiplied in appropriately - ** in the cost function below. Right now we don't model the cost of - ** accessing secondary or tertiary storage, since we don't have sufficient - ** stats to do it right. - */ -Cost -xfunc_func_expense(LispValue node, LispValue args) -{ - HeapTuple tupl; /* the pg_proc tuple for each function */ - Form_pg_proc proc; /* a data structure to hold the pg_proc - * tuple */ - int width = 0; /* byte width of the field referenced by - * each clause */ - RegProcedure funcid; /* ID of function associate with node */ - Cost cost = 0; /* running expense */ - LispValue tmpclause; - LispValue operand; /* one operand of an operator */ - - if (IsA(node, Oper)) - { - /* don't trust the opid in the Oper node. Use the opno. */ - if (!(funcid = get_opcode(get_opno((Oper) node)))) - elog(ERROR, "Oper's function is undefined"); - } - else - funcid = get_funcid((Func) node); - - /* look up tuple in cache */ - tupl = SearchSysCacheTuple(PROCOID, - ObjectIdGetDatum(funcid), - 0, 0, 0); - if (!HeapTupleIsValid(tupl)) - elog(ERROR, "Cache lookup failed for procedure %u", funcid); - proc = (Form_pg_proc) GETSTRUCT(tupl); - - /* - * * if it's a Postquel function, its cost is stored in the * - * associated plan. - */ - if (proc->prolang == SQLlanguageId) - { - LispValue tmpplan; - List planlist; - - if (IsA(node, Oper) ||get_func_planlist((Func) node) == LispNil) - { - Oid *argOidVect; /* vector of argtypes */ - char *pq_src; /* text of PQ function */ - int nargs; /* num args to PQ function */ - QueryTreeList *queryTree_list; /* dummy variable */ - - /* - * * plan the function, storing it in the Func node for later * - * use by the executor. - */ - pq_src = (char *) textout(&(proc->prosrc)); - nargs = proc->pronargs; - if (nargs > 0) - argOidVect = proc->proargtypes; - planlist = (List) pg_parse_and_plan(pq_src, argOidVect, nargs, - &parseTree_list, None, FALSE); - if (IsA(node, Func)) - set_func_planlist((Func) node, planlist); - - } - else - { /* plan has been cached inside the Func - * node already */ - planlist = get_func_planlist((Func) node); - } - - /* - * * Return the sum of the costs of the plans (the PQ function * - * may have many queries in its body). - */ - foreach(tmpplan, planlist) - cost += get_cost((Plan) lfirst(tmpplan)); - return cost; - } - else - { /* it's a C function */ - - /* - * * find the cost of evaluating the function's arguments * and - * the width of the operands - */ - for (tmpclause = args; tmpclause != LispNil; - tmpclause = lnext(tmpclause)) - { - - if ((operand = lfirst(tmpclause)) != LispNil) - { - cost += xfunc_local_expense(operand); - width += xfunc_width(operand); - } - } - - /* - * * when stats become available, add in cost of accessing - * secondary * and tertiary storage here. - */ - return (cost + - (Cost) proc->propercall_cpu + - (Cost) proc->properbyte_cpu * (Cost) proc->probyte_pct / 100.00 * - (Cost) width - - /* - * Pct_of_obj_in_mem DISK_COST * proc->probyte_pct/100.00 * width - * Pct_of_obj_on_disk + ARCH_COST * proc->probyte_pct/100.00 * - * width Pct_of_obj_on_arch - */ - ); - } -} - -/* - ** xfunc_width - ** recursively find the width of a expression - */ - -int -xfunc_width(LispValue clause) -{ - Relation rd; /* Relation Descriptor */ - HeapTuple tupl; /* structure to hold a cached tuple */ - Form_pg_type type; /* structure to hold a type tuple */ - int retval = 0; - - if (IsA(clause, Const)) - { - /* base case: width is the width of this constant */ - retval = get_constlen((Const) clause); - goto exit; - } - else if (IsA(clause, ArrayRef)) - { - /* base case: width is width of the refelem within the array */ - retval = get_refelemlength((ArrayRef) clause); - goto exit; - } - else if (IsA(clause, Var)) - { - /* base case: width is width of this attribute */ - tupl = SearchSysCacheTuple(TYPEOID, - ObjectIdGetDatum(get_vartype((Var) clause)), - 0, 0, 0); - if (!HeapTupleIsValid(tupl)) - elog(ERROR, "Cache lookup failed for type %u", - get_vartype((Var) clause)); - type = (Form_pg_type) GETSTRUCT(tupl); - if (get_varattno((Var) clause) == 0) - { - /* clause is a tuple. Get its width */ - rd = heap_open(type->typrelid); - retval = xfunc_tuple_width(rd); - heap_close(rd); - } - else - { - /* attribute is a base type */ - retval = type->typlen; - } - goto exit; - } - else if (IsA(clause, Param)) - { - if (typeidTypeRelids(get_paramtype((Param) clause))) - { - /* Param node returns a tuple. Find its width */ - rd = heap_open(typeidTypeRelids(get_paramtype((Param) clause))); - retval = xfunc_tuple_width(rd); - heap_close(rd); - } - else if (get_param_tlist((Param) clause) != LispNil) - { - /* Param node projects a complex type */ - Assert(length(get_param_tlist((Param) clause)) == 1); /* sanity */ - retval = xfunc_width((LispValue) - get_expr(lfirst(get_param_tlist((Param) clause)))); - } - else - { - /* Param node returns a base type */ - retval = typeLen(typeidType(get_paramtype((Param) clause))); - } - goto exit; - } - else if (IsA(clause, Iter)) - { - /* - * * An Iter returns a setof things, so return the width of a - * single * thing. * Note: THIS MAY NOT WORK RIGHT WHEN AGGS GET - * FIXED, * SINCE AGG FUNCTIONS CHEW ON THE WHOLE SETOF THINGS!!!! * - * This whole Iter business is bogus, anyway. - */ - retval = xfunc_width(get_iterexpr((Iter) clause)); - goto exit; - } - else if (fast_is_clause(clause)) - { - /* - * * get function associated with this Oper, and treat this as * a - * Func - */ - tupl = SearchSysCacheTuple(OPEROID, - ObjectIdGetDatum(get_opno((Oper) get_op(clause))), - 0, 0, 0); - if (!HeapTupleIsValid(tupl)) - elog(ERROR, "Cache lookup failed for procedure %u", - get_opno((Oper) get_op(clause))); - return (xfunc_func_width - ((RegProcedure) (((Form_pg_operator) (GETSTRUCT(tupl)))->oprcode), - (LispValue) get_opargs(clause))); - } - else if (fast_is_funcclause(clause)) - { - Func func = (Func) get_function(clause); - - if (get_func_tlist(func) != LispNil) - { - /* - * this function has a projection on it. Get the length of - * the projected attribute - */ - Assert(length(get_func_tlist(func)) == 1); /* sanity */ - retval = xfunc_width((LispValue) - get_expr(lfirst(get_func_tlist(func)))); - goto exit; - } - else - { - return (xfunc_func_width((RegProcedure) get_funcid(func), - (LispValue) get_funcargs(clause))); - } - } - else - { - elog(ERROR, "Clause node of undetermined type"); - return -1; - } - -exit: - if (retval == -1) - retval = VARLEN_DEFAULT; - return retval; -} - -/* - ** xfunc_card_unreferenced: - ** find all relations not referenced in clause, and multiply their - ** cardinalities. Ignore relation of cardinality 0. - ** User may pass in referenced list, if they know it (useful - ** for joins). - */ -static Count -xfunc_card_unreferenced(Query *queryInfo, - LispValue clause, Relids referenced) -{ - Relids unreferenced, - allrelids = LispNil; - LispValue temp; - - /* find all relids of base relations referenced in query */ - foreach(temp, queryInfo->base_rel_list) - { - Assert(lnext(get_relids((RelOptInfo) lfirst(temp))) == LispNil); - allrelids = lappend(allrelids, - lfirst(get_relids((RelOptInfo) lfirst(temp)))); - } - - /* find all relids referenced in query but not in clause */ - if (!referenced) - referenced = xfunc_find_references(clause); - unreferenced = set_difference(allrelids, referenced); - - return xfunc_card_product(unreferenced); -} - -/* - ** xfunc_card_product - ** multiple together cardinalities of a list relations. - */ -Count -xfunc_card_product(Query *queryInfo, Relids relids) -{ - LispValue cinfonode; - LispValue temp; - RelOptInfo currel; - Cost tuples; - Count retval = 0; - - foreach(temp, relids) - { - currel = get_rel(lfirst(temp)); - tuples = get_tuples(currel); - - if (tuples) - { /* not of cardinality 0 */ - /* factor in the selectivity of all zero-cost clauses */ - foreach(cinfonode, get_restrictinfo(currel)) - { - if (!xfunc_expense(queryInfo, get_clause((RestrictInfo) lfirst(cinfonode)))) - tuples *= compute_clause_selec(queryInfo, - get_clause((RestrictInfo) lfirst(cinfonode)), - LispNil); - } - - if (retval == 0) - retval = tuples; - else - retval *= tuples; - } - } - if (retval == 0) - retval = 1; /* saves caller from dividing by zero */ - return retval; -} - - -/* - ** xfunc_find_references: - ** Traverse a clause and find all relids referenced in the clause. - */ -List -xfunc_find_references(LispValue clause) -{ - List retval = (List) LispNil; - LispValue tmpclause; - - /* Base cases */ - if (IsA(clause, Var)) - return lispCons(lfirst(get_varid((Var) clause)), LispNil); - else if (IsA(clause, Const) ||IsA(clause, Param)) - return (List) LispNil; - - /* recursion */ - else if (IsA(clause, Iter)) - - /* - * Too low. Should multiply by the expected number of iterations. - * maybe - */ - return xfunc_find_references(get_iterexpr((Iter) clause)); - else if (IsA(clause, ArrayRef)) - return xfunc_find_references(get_refexpr((ArrayRef) clause)); - else if (fast_is_clause(clause)) - { - /* string together result of all operands of Oper */ - for (tmpclause = (LispValue) get_opargs(clause); tmpclause != LispNil; - tmpclause = lnext(tmpclause)) - retval = nconc(retval, xfunc_find_references(lfirst(tmpclause))); - return retval; - } - else if (fast_is_funcclause(clause)) - { - /* string together result of all args of Func */ - for (tmpclause = (LispValue) get_funcargs(clause); - tmpclause != LispNil; - tmpclause = lnext(tmpclause)) - retval = nconc(retval, xfunc_find_references(lfirst(tmpclause))); - return retval; - } - else if (fast_not_clause(clause)) - return xfunc_find_references(lsecond(clause)); - else if (fast_or_clause(clause) || fast_and_clause(clause)) - { - /* string together result of all operands of OR */ - for (tmpclause = lnext(clause); tmpclause != LispNil; - tmpclause = lnext(tmpclause)) - retval = nconc(retval, xfunc_find_references(lfirst(tmpclause))); - return retval; - } - else - { - elog(ERROR, "Clause node of undetermined type"); - return (List) LispNil; - } -} - -/* - ** xfunc_primary_join: - ** Find the primary join clause: for Hash and Merge Joins, this is the - ** min rank Hash or Merge clause, while for Nested Loop it's the - ** min rank pathclause - */ -LispValue -xfunc_primary_join(JoinPath pathnode) -{ - LispValue joinclauselist = get_pathrestrictinfo(pathnode); - RestrictInfo mincinfo; - LispValue tmplist; - LispValue minclause = LispNil; - Cost minrank, - tmprank; - - if (IsA(pathnode, MergePath)) - { - for (tmplist = get_path_mergeclauses((MergePath) pathnode), - minclause = lfirst(tmplist), - minrank = xfunc_rank(minclause); - tmplist != LispNil; - tmplist = lnext(tmplist)) - if ((tmprank = xfunc_rank(lfirst(tmplist))) - < minrank) - { - minrank = tmprank; - minclause = lfirst(tmplist); - } - return minclause; - } - else if (IsA(pathnode, HashPath)) - { - for (tmplist = get_path_hashclauses((HashPath) pathnode), - minclause = lfirst(tmplist), - minrank = xfunc_rank(minclause); - tmplist != LispNil; - tmplist = lnext(tmplist)) - if ((tmprank = xfunc_rank(lfirst(tmplist))) - < minrank) - { - minrank = tmprank; - minclause = lfirst(tmplist); - } - return minclause; - } - - /* if we drop through, it's nested loop join */ - if (joinclauselist == LispNil) - return LispNil; - - for (tmplist = joinclauselist, mincinfo = (RestrictInfo) lfirst(joinclauselist), - minrank = xfunc_rank(get_clause((RestrictInfo) lfirst(tmplist))); - tmplist != LispNil; - tmplist = lnext(tmplist)) - if ((tmprank = xfunc_rank(get_clause((RestrictInfo) lfirst(tmplist)))) - < minrank) - { - minrank = tmprank; - mincinfo = (RestrictInfo) lfirst(tmplist); - } - return (LispValue) get_clause(mincinfo); -} - -/* - ** xfunc_get_path_cost - ** get the expensive function costs of the path - */ -Cost -xfunc_get_path_cost(Query *queryInfo, Path pathnode) -{ - Cost cost = 0; - LispValue tmplist; - Cost selec = 1.0; - - /* - * * first add in the expensive local function costs. * We ensure that - * the clauses are sorted by rank, so that we * know (via - * selectivities) the number of tuples that will be checked * by each - * function. If we're not doing any optimization of expensive * - * functions, we don't sort. - */ - if (XfuncMode != XFUNC_OFF) - set_locrestrictinfo(pathnode, lisp_qsort(get_loc_restrictinfo(pathnode), - xfunc_cinfo_compare)); - for (tmplist = get_loc_restrictinfo(pathnode), selec = 1.0; - tmplist != LispNil; - tmplist = lnext(tmplist)) - { - cost += (Cost) (xfunc_local_expense(get_clause((RestrictInfo) lfirst(tmplist))) - * (Cost) get_tuples(get_parent(pathnode)) * selec); - selec *= compute_clause_selec(queryInfo, - get_clause((RestrictInfo) lfirst(tmplist)), - LispNil); - } - - /* - * * Now add in any node-specific expensive function costs. * Again, - * we must ensure that the clauses are sorted by rank. - */ - if (IsA(pathnode, JoinPath)) - { - if (XfuncMode != XFUNC_OFF) - set_pathrestrictinfo((JoinPath) pathnode, lisp_qsort - (get_pathrestrictinfo((JoinPath) pathnode), - xfunc_cinfo_compare)); - for (tmplist = get_pathrestrictinfo((JoinPath) pathnode), selec = 1.0; - tmplist != LispNil; - tmplist = lnext(tmplist)) - { - cost += (Cost) (xfunc_local_expense(get_clause((RestrictInfo) lfirst(tmplist))) - * (Cost) get_tuples(get_parent(pathnode)) * selec); - selec *= compute_clause_selec(queryInfo, - get_clause((RestrictInfo) lfirst(tmplist)), - LispNil); - } - } - if (IsA(pathnode, HashPath)) - { - if (XfuncMode != XFUNC_OFF) - set_path_hashclauses - ((HashPath) pathnode, - lisp_qsort(get_path_hashclauses((HashPath) pathnode), - xfunc_clause_compare)); - for (tmplist = get_path_hashclauses((HashPath) pathnode), selec = 1.0; - tmplist != LispNil; - tmplist = lnext(tmplist)) - { - cost += (Cost) (xfunc_local_expense(lfirst(tmplist)) - * (Cost) get_tuples(get_parent(pathnode)) * selec); - selec *= compute_clause_selec(queryInfo, - lfirst(tmplist), LispNil); - } - } - if (IsA(pathnode, MergePath)) - { - if (XfuncMode != XFUNC_OFF) - set_path_mergeclauses - ((MergePath) pathnode, - lisp_qsort(get_path_mergeclauses((MergePath) pathnode), - xfunc_clause_compare)); - for (tmplist = get_path_mergeclauses((MergePath) pathnode), selec = 1.0; - tmplist != LispNil; - tmplist = lnext(tmplist)) - { - cost += (Cost) (xfunc_local_expense(lfirst(tmplist)) - * (Cost) get_tuples(get_parent(pathnode)) * selec); - selec *= compute_clause_selec(queryInfo, - lfirst(tmplist), LispNil); - } - } - Assert(cost >= 0); - return cost; -} - -/* - ** Recalculate the cost of a path node. This includes the basic cost of the - ** node, as well as the cost of its expensive functions. - ** We need to do this to the parent after pulling a clause from a child into a - ** parent. Thus we should only be calling this function on JoinPaths. - */ -Cost -xfunc_total_path_cost(JoinPath pathnode) -{ - Cost cost = xfunc_get_path_cost((Path) pathnode); - - Assert(IsA(pathnode, JoinPath)); - if (IsA(pathnode, MergePath)) - { - MergePath mrgnode = (MergePath) pathnode; - - cost += cost_mergejoin(get_path_cost((Path) get_outerjoinpath(mrgnode)), - get_path_cost((Path) get_innerjoinpath(mrgnode)), - get_outersortkeys(mrgnode), - get_innersortkeys(mrgnode), - get_tuples(get_parent((Path) get_outerjoinpath - (mrgnode))), - get_tuples(get_parent((Path) get_innerjoinpath - (mrgnode))), - get_width(get_parent((Path) get_outerjoinpath - (mrgnode))), - get_width(get_parent((Path) get_innerjoinpath - (mrgnode)))); - Assert(cost >= 0); - return cost; - } - else if (IsA(pathnode, HashPath)) - { - HashPath hashnode = (HashPath) pathnode; - - cost += cost_hashjoin(get_path_cost((Path) get_outerjoinpath(hashnode)), - get_path_cost((Path) get_innerjoinpath(hashnode)), - get_outerhashkeys(hashnode), - get_innerhashkeys(hashnode), - get_tuples(get_parent((Path) get_outerjoinpath - (hashnode))), - get_tuples(get_parent((Path) get_innerjoinpath - (hashnode))), - get_width(get_parent((Path) get_outerjoinpath - (hashnode))), - get_width(get_parent((Path) get_innerjoinpath - (hashnode)))); - Assert(cost >= 0); - return cost; - } - else -/* Nested Loop Join */ - { - cost += cost_nestloop(get_path_cost((Path) get_outerjoinpath(pathnode)), - get_path_cost((Path) get_innerjoinpath(pathnode)), - get_tuples(get_parent((Path) get_outerjoinpath - (pathnode))), - get_tuples(get_parent((Path) get_innerjoinpath - (pathnode))), - get_pages(get_parent((Path) get_outerjoinpath - (pathnode))), - IsA(get_innerjoinpath(pathnode), IndexPath)); - Assert(cost >= 0); - return cost; - } -} - - -/* - ** xfunc_expense_per_tuple - ** return the expense of the join *per-tuple* of the input relation. - ** The cost model here is that a join costs - ** k*card(outer)*card(inner) + l*card(outer) + m*card(inner) + n - ** - ** We treat the l and m terms by considering them to be like restrictions - ** constrained to be right under the join. Thus the cost per inner and - ** cost per outer of the join is different, reflecting these virtual nodes. - ** - ** The cost per tuple of outer is k + l/referenced(inner). Cost per tuple - ** of inner is k + m/referenced(outer). - ** The constants k, l, m and n depend on the join method. Measures here are - ** based on the costs in costsize.c, with fudging for HashJoin and Sorts to - ** make it fit our model (the 'q' in HashJoin results in a - ** card(outer)/card(inner) term, and sorting results in a log term. - - */ -Cost -xfunc_expense_per_tuple(JoinPath joinnode, int whichchild) -{ - RelOptInfo outerrel = get_parent((Path) get_outerjoinpath(joinnode)); - RelOptInfo innerrel = get_parent((Path) get_innerjoinpath(joinnode)); - Count outerwidth = get_width(outerrel); - Count outers_per_page = ceil(BLCKSZ / (outerwidth + MinTupleSize)); - - if (IsA(joinnode, HashPath)) - { - if (whichchild == INNER) - return (1 + cpu_page_weight) * outers_per_page / NBuffers; - else - return (((1 + cpu_page_weight) * outers_per_page / NBuffers) - + cpu_page_weight - / xfunc_card_product(get_relids(innerrel))); - } - else if (IsA(joinnode, MergePath)) - { - /* assumes sort exists, and costs one (I/O + CPU) per tuple */ - if (whichchild == INNER) - return ((2 * cpu_page_weight + 1) - / xfunc_card_product(get_relids(outerrel))); - else - return ((2 * cpu_page_weight + 1) - / xfunc_card_product(get_relids(innerrel))); - } - else -/* nestloop */ - { - Assert(IsA(joinnode, JoinPath)); - return cpu_page_weight; - } -} - -/* - ** xfunc_fixvars - ** After pulling up a clause, we must walk its expression tree, fixing Var - ** nodes to point to the correct varno (either INNER or OUTER, depending - ** on which child the clause was pulled from), and the right varattno in the - ** target list of the child's former relation. If the target list of the - ** child RelOptInfo does not contain the attribute we need, we add it. - */ -void -xfunc_fixvars(LispValue clause, /* clause being pulled up */ - RelOptInfo rel, /* rel it's being pulled from */ - int varno) /* whether rel is INNER or OUTER of join */ -{ - LispValue tmpclause; /* temporary variable */ - TargetEntry *tle; /* tlist member corresponding to var */ - - - if (IsA(clause, Const) ||IsA(clause, Param)) - return; - else if (IsA(clause, Var)) - { - /* here's the meat */ - tle = tlistentry_member((Var) clause, get_targetlist(rel)); - if (tle == LispNil) - { - /* - * * The attribute we need is not in the target list, * so we - * have to add it. * - * - */ - add_var_to_tlist(rel, (Var) clause); - tle = tlistentry_member((Var) clause, get_targetlist(rel)); - } - set_varno(((Var) clause), varno); - set_varattno(((Var) clause), get_resno(get_resdom(get_entry(tle)))); - } - else if (IsA(clause, Iter)) - xfunc_fixvars(get_iterexpr((Iter) clause), rel, varno); - else if (fast_is_clause(clause)) - { - xfunc_fixvars(lfirst(lnext(clause)), rel, varno); - xfunc_fixvars(lfirst(lnext(lnext(clause))), rel, varno); - } - else if (fast_is_funcclause(clause)) - for (tmpclause = lnext(clause); tmpclause != LispNil; - tmpclause = lnext(tmpclause)) - xfunc_fixvars(lfirst(tmpclause), rel, varno); - else if (fast_not_clause(clause)) - xfunc_fixvars(lsecond(clause), rel, varno); - else if (fast_or_clause(clause) || fast_and_clause(clause)) - for (tmpclause = lnext(clause); tmpclause != LispNil; - tmpclause = lnext(tmpclause)) - xfunc_fixvars(lfirst(tmpclause), rel, varno); - else - elog(ERROR, "Clause node of undetermined type"); -} - - -/* - ** Comparison function for lisp_qsort() on a list of RestrictInfo's. - ** arg1 and arg2 should really be of type (RestrictInfo *). - */ -int -xfunc_cinfo_compare(void *arg1, void *arg2) -{ - RestrictInfo info1 = *(RestrictInfo *) arg1; - RestrictInfo info2 = *(RestrictInfo *) arg2; - - LispValue clause1 = (LispValue) get_clause(info1), - clause2 = (LispValue) get_clause(info2); - - return xfunc_clause_compare((void *) &clause1, (void *) &clause2); -} - -/* - ** xfunc_clause_compare: comparison function for lisp_qsort() that compares two - ** clauses based on expense/(1 - selectivity) - ** arg1 and arg2 are really pointers to clauses. - */ -int -xfunc_clause_compare(void *arg1, void *arg2) -{ - LispValue clause1 = *(LispValue *) arg1; - LispValue clause2 = *(LispValue *) arg2; - Cost rank1, /* total xfunc rank of clause1 */ - rank2; /* total xfunc rank of clause2 */ - - rank1 = xfunc_rank(clause1); - rank2 = xfunc_rank(clause2); - - if (rank1 < rank2) - return -1; - else if (rank1 == rank2) - return 0; - else - return 1; -} - -/* - ** xfunc_disjunct_sort - ** given a list of clauses, for each clause sort the disjuncts by cost - ** (this assumes the predicates have been converted to Conjunctive NF) - ** Modifies the clause list! - */ -void -xfunc_disjunct_sort(LispValue clause_list) -{ - LispValue temp; - - foreach(temp, clause_list) - if (or_clause(lfirst(temp))) - lnext(lfirst(temp)) = lisp_qsort(lnext(lfirst(temp)), xfunc_disjunct_compare); -} - - -/* - ** xfunc_disjunct_compare: comparison function for qsort() that compares two - ** disjuncts based on cost/selec. - ** arg1 and arg2 are really pointers to disjuncts - */ -int -xfunc_disjunct_compare(Query *queryInfo, void *arg1, void *arg2) -{ - LispValue disjunct1 = *(LispValue *) arg1; - LispValue disjunct2 = *(LispValue *) arg2; - Cost cost1, /* total cost of disjunct1 */ - cost2, /* total cost of disjunct2 */ - selec1, - selec2; - Cost rank1, - rank2; - - cost1 = xfunc_expense(queryInfo, disjunct1); - cost2 = xfunc_expense(queryInfo, disjunct2); - selec1 = compute_clause_selec(queryInfo, - disjunct1, LispNil); - selec2 = compute_clause_selec(queryInfo, - disjunct2, LispNil); - - if (selec1 == 0) - rank1 = MAXFLOAT; - else if (cost1 == 0) - rank1 = 0; - else - rank1 = cost1 / selec1; - - if (selec2 == 0) - rank2 = MAXFLOAT; - else if (cost2 == 0) - rank2 = 0; - else - rank2 = cost2 / selec2; - - if (rank1 < rank2) - return -1; - else if (rank1 == rank2) - return 0; - else - return 1; -} - -/* ------------------------ UTILITY FUNCTIONS ------------------------------- */ -/* - ** xfunc_func_width - ** Given a function OID and operands, find the width of the return value. - */ -int -xfunc_func_width(RegProcedure funcid, LispValue args) -{ - Relation rd; /* Relation Descriptor */ - HeapTuple tupl; /* structure to hold a cached tuple */ - Form_pg_proc proc; /* structure to hold the pg_proc tuple */ - Form_pg_type type; /* structure to hold the pg_type tuple */ - LispValue tmpclause; - int retval; - - /* lookup function and find its return type */ - Assert(RegProcedureIsValid(funcid)); - tupl = SearchSysCacheTuple(PROCOID, - ObjectIdGetDatum(funcid), - 0, 0, 0); - if (!HeapTupleIsValid(tupl)) - elog(ERROR, "Cache lookup failed for procedure %u", funcid); - proc = (Form_pg_proc) GETSTRUCT(tupl); - - /* if function returns a tuple, get the width of that */ - if (typeidTypeRelids(proc->prorettype)) - { - rd = heap_open(typeidTypeRelids(proc->prorettype)); - retval = xfunc_tuple_width(rd); - heap_close(rd); - goto exit; - } - else -/* function returns a base type */ - { - tupl = SearchSysCacheTuple(TYPEOID, - ObjectIdGetDatum(proc->prorettype), - 0, 0, 0); - if (!HeapTupleIsValid(tupl)) - elog(ERROR, "Cache lookup failed for type %u", proc->prorettype); - type = (Form_pg_type) GETSTRUCT(tupl); - /* if the type length is known, return that */ - if (type->typlen != -1) - { - retval = type->typlen; - goto exit; - } - else -/* estimate the return size */ - { - /* find width of the function's arguments */ - for (tmpclause = args; tmpclause != LispNil; - tmpclause = lnext(tmpclause)) - retval += xfunc_width(lfirst(tmpclause)); - /* multiply by outin_ratio */ - retval = (int) (proc->prooutin_ratio / 100.0 * retval); - goto exit; - } - } -exit: - return retval; -} - -/* - ** xfunc_tuple_width - ** Return the sum of the lengths of all the attributes of a given relation - */ -int -xfunc_tuple_width(Relation rd) -{ - int i; - int retval = 0; - TupleDesc tdesc = RelationGetDescr(rd); - - for (i = 0; i < tdesc->natts; i++) - { - if (tdesc->attrs[i]->attlen != -1) - retval += tdesc->attrs[i]->attlen; - else - retval += VARLEN_DEFAULT; - } - - return retval; -} - -/* - ** xfunc_num_join_clauses - ** Find the number of join clauses associated with this join path - */ -int -xfunc_num_join_clauses(JoinPath path) -{ - int num = length(get_pathrestrictinfo(path)); - - if (IsA(path, MergePath)) - return num + length(get_path_mergeclauses((MergePath) path)); - else if (IsA(path, HashPath)) - return num + length(get_path_hashclauses((HashPath) path)); - else - return num; -} - -/* - ** xfunc_LispRemove - ** Just like LispRemove, but it whines if the item to be removed ain't there - */ -LispValue -xfunc_LispRemove(LispValue foo, List bar) -{ - LispValue temp = LispNil; - LispValue result = LispNil; - int sanity = false; - - for (temp = bar; !null(temp); temp = lnext(temp)) - if (!equal((Node) (foo), (Node) (lfirst(temp)))) - result = lappend(result, lfirst(temp)); - else - sanity = true; /* found a matching item to remove! */ - - if (!sanity) - elog(ERROR, "xfunc_LispRemove: didn't find a match!"); - - return result; -} - -#define Node_Copy(a, b, c, d) \ -do { \ - if (NodeCopy((Node)((a)->d), (Node*)&((b)->d), c) != true) \ - { \ - return false; \ - } \ -} while(0) - -/* - ** xfunc_copyrel - ** Just like _copyRel, but doesn't copy the paths - */ -bool -xfunc_copyrel(RelOptInfo from, RelOptInfo *to) -{ - RelOptInfo newnode; - - Pointer (*alloc) () = palloc; - - /* COPY_CHECKARGS() */ - if (to == NULL) - return false; - - /* COPY_CHECKNULL() */ - if (from == NULL) - { - (*to) = NULL; - return true; - } - - /* COPY_NEW(c) */ - newnode = (RelOptInfo) (*alloc) (classSize(RelOptInfo)); - if (newnode == NULL) - return false; - - /* - * copy node superclass fields - */ - CopyNodeFields((Node) from, (Node) newnode, alloc); - - /* - * copy remainder of node - */ - Node_Copy(from, newnode, alloc, relids); - - newnode->indexed = from->indexed; - newnode->pages = from->pages; - newnode->tuples = from->tuples; - newnode->size = from->size; - newnode->width = from->width; - - Node_Copy(from, newnode, alloc, targetlist); - - /* - * No!!!! Node_Copy(from, newnode, alloc, pathlist); - * Node_Copy(from, newnode, alloc, unorderedpath); Node_Copy(from, - * newnode, alloc, cheapestpath); - */ -#if 0 /* can't use Node_copy now. 2/95 -ay */ - Node_Copy(from, newnode, alloc, classlist); - Node_Copy(from, newnode, alloc, indexkeys); - Node_Copy(from, newnode, alloc, ordering); -#endif - Node_Copy(from, newnode, alloc, restrictinfo); - Node_Copy(from, newnode, alloc, joininfo); - Node_Copy(from, newnode, alloc, innerjoin); - - (*to) = newnode; - return true; -} diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c deleted file mode 100644 index 64e1c059dad..00000000000 --- a/src/backend/optimizer/path/allpaths.c +++ /dev/null @@ -1,704 +0,0 @@ -/*------------------------------------------------------------------------- - * - * allpaths.c - * Routines to find possible search paths for processing a query - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.86 2002/06/20 20:29:29 momjian Exp $ - * - *------------------------------------------------------------------------- - */ - -#include "postgres.h" - -#ifdef OPTIMIZER_DEBUG -#include "nodes/print.h" -#endif -#include "optimizer/clauses.h" -#include "optimizer/cost.h" -#include "optimizer/geqo.h" -#include "optimizer/pathnode.h" -#include "optimizer/paths.h" -#include "optimizer/plancat.h" -#include "optimizer/planner.h" -#include "optimizer/prep.h" -#include "parser/parsetree.h" -#include "rewrite/rewriteManip.h" - - -bool enable_geqo = true; -int geqo_rels = DEFAULT_GEQO_RELS; - - -static void set_base_rel_pathlists(Query *root); -static void set_plain_rel_pathlist(Query *root, RelOptInfo *rel, - RangeTblEntry *rte); -static void set_inherited_rel_pathlist(Query *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte, - List *inheritlist); -static void set_subquery_pathlist(Query *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte); -static void set_function_pathlist(Query *root, RelOptInfo *rel, - RangeTblEntry *rte); -static RelOptInfo *make_one_rel_by_joins(Query *root, int levels_needed, - List *initial_rels); - - -/* - * make_one_rel - * Finds all possible access paths for executing a query, returning a - * single rel that represents the join of all base rels in the query. - */ -RelOptInfo * -make_one_rel(Query *root) -{ - RelOptInfo *rel; - - /* - * Generate access paths for the base rels. - */ - set_base_rel_pathlists(root); - - /* - * Generate access paths for the entire join tree. - */ - Assert(root->jointree != NULL && IsA(root->jointree, FromExpr)); - - rel = make_fromexpr_rel(root, root->jointree); - - /* - * The result should join all the query's base rels. - */ - Assert(length(rel->relids) == length(root->base_rel_list)); - - return rel; -} - -/* - * set_base_rel_pathlists - * Finds all paths available for scanning each base-relation entry. - * Sequential scan and any available indices are considered. - * Each useful path is attached to its relation's 'pathlist' field. - */ -static void -set_base_rel_pathlists(Query *root) -{ - List *rellist; - - foreach(rellist, root->base_rel_list) - { - RelOptInfo *rel = (RelOptInfo *) lfirst(rellist); - Index rti; - RangeTblEntry *rte; - List *inheritlist; - - Assert(length(rel->relids) == 1); /* better be base rel */ - rti = lfirsti(rel->relids); - rte = rt_fetch(rti, root->rtable); - - if (rel->rtekind == RTE_SUBQUERY) - { - /* Subquery --- generate a separate plan for it */ - set_subquery_pathlist(root, rel, rti, rte); - } - else if (rel->rtekind == RTE_FUNCTION) - { - /* RangeFunction --- generate a separate plan for it */ - set_function_pathlist(root, rel, rte); - } - else if ((inheritlist = expand_inherted_rtentry(root, rti, true)) - != NIL) - { - /* Relation is root of an inheritance tree, process specially */ - set_inherited_rel_pathlist(root, rel, rti, rte, inheritlist); - } - else - { - /* Plain relation */ - set_plain_rel_pathlist(root, rel, rte); - } - -#ifdef OPTIMIZER_DEBUG - debug_print_rel(root, rel); -#endif - } -} - -/* - * set_plain_rel_pathlist - * Build access paths for a plain relation (no subquery, no inheritance) - */ -static void -set_plain_rel_pathlist(Query *root, RelOptInfo *rel, RangeTblEntry *rte) -{ - /* Mark rel with estimated output rows, width, etc */ - set_baserel_size_estimates(root, rel); - - /* - * Generate paths and add them to the rel's pathlist. - * - * Note: add_path() will discard any paths that are dominated by another - * available path, keeping only those paths that are superior along at - * least one dimension of cost or sortedness. - */ - - /* Consider sequential scan */ - add_path(rel, create_seqscan_path(root, rel)); - - /* Consider TID scans */ - create_tidscan_paths(root, rel); - - /* Consider index paths for both simple and OR index clauses */ - create_index_paths(root, rel); - - /* create_index_paths must be done before create_or_index_paths */ - create_or_index_paths(root, rel); - - /* Now find the cheapest of the paths for this rel */ - set_cheapest(rel); -} - -/* - * set_inherited_rel_pathlist - * Build access paths for a inheritance tree rooted at rel - * - * inheritlist is a list of RT indexes of all tables in the inheritance tree, - * including a duplicate of the parent itself. Note we will not come here - * unless there's at least one child in addition to the parent. - * - * NOTE: the passed-in rel and RTE will henceforth represent the appended - * result of the whole inheritance tree. The members of inheritlist represent - * the individual tables --- in particular, the inheritlist member that is a - * duplicate of the parent RTE represents the parent table alone. - * We will generate plans to scan the individual tables that refer to - * the inheritlist RTEs, whereas Vars elsewhere in the plan tree that - * refer to the original RTE are taken to refer to the append output. - * In particular, this means we have separate RelOptInfos for the parent - * table and for the append output, which is a good thing because they're - * not the same size. - */ -static void -set_inherited_rel_pathlist(Query *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte, - List *inheritlist) -{ - int parentRTindex = rti; - Oid parentOID = rte->relid; - List *subpaths = NIL; - List *il; - - /* - * XXX for now, can't handle inherited expansion of FOR UPDATE; can we - * do better? - */ - if (intMember(parentRTindex, root->rowMarks)) - elog(ERROR, "SELECT FOR UPDATE is not supported for inherit queries"); - - /* - * The executor will check the parent table's access permissions when - * it examines the parent's inheritlist entry. There's no need to - * check twice, so turn off access check bits in the original RTE. - */ - rte->checkForRead = false; - rte->checkForWrite = false; - - /* - * Initialize to compute size estimates for whole inheritance tree - */ - rel->rows = 0; - rel->width = 0; - - /* - * Generate access paths for each table in the tree (parent AND - * children), and pick the cheapest path for each table. - */ - foreach(il, inheritlist) - { - int childRTindex = lfirsti(il); - RangeTblEntry *childrte; - Oid childOID; - RelOptInfo *childrel; - - childrte = rt_fetch(childRTindex, root->rtable); - childOID = childrte->relid; - - /* - * Make a RelOptInfo for the child so we can do planning. Do NOT - * attach the RelOptInfo to the query's base_rel_list, however, - * since the child is not part of the main join tree. Instead, - * the child RelOptInfo is added to other_rel_list. - */ - childrel = build_other_rel(root, childRTindex); - - /* - * Copy the parent's targetlist and restriction quals to the - * child, with attribute-number adjustment as needed. We don't - * bother to copy the join quals, since we can't do any joining of - * the individual tables. - */ - childrel->targetlist = (List *) - adjust_inherited_attrs((Node *) rel->targetlist, - parentRTindex, - parentOID, - childRTindex, - childOID); - childrel->baserestrictinfo = (List *) - adjust_inherited_attrs((Node *) rel->baserestrictinfo, - parentRTindex, - parentOID, - childRTindex, - childOID); - childrel->baserestrictcost = rel->baserestrictcost; - - /* - * Now compute child access paths, and save the cheapest. - */ - set_plain_rel_pathlist(root, childrel, childrte); - - subpaths = lappend(subpaths, childrel->cheapest_total_path); - - /* Also update total size estimates */ - rel->rows += childrel->rows; - if (childrel->width > rel->width) - rel->width = childrel->width; - } - - /* - * Finally, build Append path and install it as the only access path - * for the parent rel. - */ - add_path(rel, (Path *) create_append_path(rel, subpaths)); - - /* Select cheapest path (pretty easy in this case...) */ - set_cheapest(rel); -} - -/* - * set_subquery_pathlist - * Build the (single) access path for a subquery RTE - */ -static void -set_subquery_pathlist(Query *root, RelOptInfo *rel, - Index rti, RangeTblEntry *rte) -{ - Query *subquery = rte->subquery; - - /* - * If there are any restriction clauses that have been attached to the - * subquery relation, consider pushing them down to become HAVING - * quals of the subquery itself. (Not WHERE clauses, since they may - * refer to subquery outputs that are aggregate results. But - * planner.c will transfer them into the subquery's WHERE if they do - * not.) This transformation is useful because it may allow us to - * generate a better plan for the subquery than evaluating all the - * subquery output rows and then filtering them. - * - * There are several cases where we cannot push down clauses: - * - * 1. If the subquery contains set ops (UNION/INTERSECT/EXCEPT) we do not - * push down any qual clauses, since the planner doesn't support quals - * at the top level of a setop. (With suitable analysis we could try - * to push the quals down into the component queries of the setop, but - * getting it right seems nontrivial. Work on this later.) - * - * 2. If the subquery has a LIMIT clause or a DISTINCT ON clause, we must - * not push down any quals, since that could change the set of rows - * returned. (Actually, we could push down quals into a DISTINCT ON - * subquery if they refer only to DISTINCT-ed output columns, but - * checking that seems more work than it's worth. In any case, a - * plain DISTINCT is safe to push down past.) - * - * 3. If the subquery has any functions returning sets in its target list, - * we do not push down any quals, since the quals - * might refer to those tlist items, which would mean we'd introduce - * functions-returning-sets into the subquery's WHERE/HAVING quals. - * (It'd be sufficient to not push down quals that refer to those - * particular tlist items, but that's much clumsier to check.) - * - * 4. We do not push down clauses that contain subselects, mainly because - * I'm not sure it will work correctly (the subplan hasn't yet - * transformed sublinks to subselects). - * - * Non-pushed-down clauses will get evaluated as qpquals of the - * SubqueryScan node. - * - * XXX Are there any cases where we want to make a policy decision not to - * push down, because it'd result in a worse plan? - */ - if (subquery->setOperations == NULL && - subquery->limitOffset == NULL && - subquery->limitCount == NULL && - !has_distinct_on_clause(subquery) && - !expression_returns_set((Node *) subquery->targetList)) - { - /* OK to consider pushing down individual quals */ - List *upperrestrictlist = NIL; - List *lst; - - foreach(lst, rel->baserestrictinfo) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(lst); - Node *clause = (Node *) rinfo->clause; - - if (contain_subplans(clause)) - { - /* Keep it in the upper query */ - upperrestrictlist = lappend(upperrestrictlist, rinfo); - } - else - { - /* - * We need to replace Vars in the clause (which must refer - * to outputs of the subquery) with copies of the - * subquery's targetlist expressions. Note that at this - * point, any uplevel Vars in the clause should have been - * replaced with Params, so they need no work. - */ - clause = ResolveNew(clause, rti, 0, - subquery->targetList, - CMD_SELECT, 0); - subquery->havingQual = make_and_qual(subquery->havingQual, - clause); - - /* - * We need not change the subquery's hasAggs or - * hasSublinks flags, since we can't be pushing down any - * aggregates that weren't there before, and we don't push - * down subselects at all. - */ - } - } - rel->baserestrictinfo = upperrestrictlist; - } - - /* Generate the plan for the subquery */ - rel->subplan = subquery_planner(subquery, - -1.0 /* default case */ ); - - /* Copy number of output rows from subplan */ - rel->tuples = rel->subplan->plan_rows; - - /* Mark rel with estimated output rows, width, etc */ - set_baserel_size_estimates(root, rel); - - /* Generate appropriate path */ - add_path(rel, create_subqueryscan_path(rel)); - - /* Select cheapest path (pretty easy in this case...) */ - set_cheapest(rel); -} - -/* - * set_function_pathlist - * Build the (single) access path for a function RTE - */ -static void -set_function_pathlist(Query *root, RelOptInfo *rel, RangeTblEntry *rte) -{ - /* Mark rel with estimated output rows, width, etc */ - set_function_size_estimates(root, rel); - - /* Generate appropriate path */ - add_path(rel, create_functionscan_path(root, rel)); - - /* Select cheapest path (pretty easy in this case...) */ - set_cheapest(rel); -} - -/* - * make_fromexpr_rel - * Build access paths for a FromExpr jointree node. - */ -RelOptInfo * -make_fromexpr_rel(Query *root, FromExpr *from) -{ - int levels_needed; - List *initial_rels = NIL; - List *jt; - - /* - * Count the number of child jointree nodes. This is the depth of the - * dynamic-programming algorithm we must employ to consider all ways - * of joining the child nodes. - */ - levels_needed = length(from->fromlist); - - if (levels_needed <= 0) - return NULL; /* nothing to do? */ - - /* - * Construct a list of rels corresponding to the child jointree nodes. - * This may contain both base rels and rels constructed according to - * explicit JOIN directives. - */ - foreach(jt, from->fromlist) - { - Node *jtnode = (Node *) lfirst(jt); - - initial_rels = lappend(initial_rels, - make_jointree_rel(root, jtnode)); - } - - if (levels_needed == 1) - { - /* - * Single jointree node, so we're done. - */ - return (RelOptInfo *) lfirst(initial_rels); - } - else - { - /* - * Consider the different orders in which we could join the rels, - * using either GEQO or regular optimizer. - */ - if (enable_geqo && levels_needed >= geqo_rels) - return geqo(root, levels_needed, initial_rels); - else - return make_one_rel_by_joins(root, levels_needed, initial_rels); - } -} - -/* - * make_one_rel_by_joins - * Find all possible joinpaths for a query by successively finding ways - * to join component relations into join relations. - * - * 'levels_needed' is the number of iterations needed, ie, the number of - * independent jointree items in the query. This is > 1. - * - * 'initial_rels' is a list of RelOptInfo nodes for each independent - * jointree item. These are the components to be joined together. - * - * Returns the final level of join relations, i.e., the relation that is - * the result of joining all the original relations together. - */ -static RelOptInfo * -make_one_rel_by_joins(Query *root, int levels_needed, List *initial_rels) -{ - List **joinitems; - int lev; - RelOptInfo *rel; - - /* - * We employ a simple "dynamic programming" algorithm: we first find - * all ways to build joins of two jointree items, then all ways to - * build joins of three items (from two-item joins and single items), - * then four-item joins, and so on until we have considered all ways - * to join all the items into one rel. - * - * joinitems[j] is a list of all the j-item rels. Initially we set - * joinitems[1] to represent all the single-jointree-item relations. - */ - joinitems = (List **) palloc((levels_needed + 1) * sizeof(List *)); - MemSet(joinitems, 0, (levels_needed + 1) * sizeof(List *)); - - joinitems[1] = initial_rels; - - for (lev = 2; lev <= levels_needed; lev++) - { - List *x; - - /* - * Determine all possible pairs of relations to be joined at this - * level, and build paths for making each one from every available - * pair of lower-level relations. - */ - joinitems[lev] = make_rels_by_joins(root, lev, joinitems); - - /* - * Do cleanup work on each just-processed rel. - */ - foreach(x, joinitems[lev]) - { - rel = (RelOptInfo *) lfirst(x); - -#ifdef NOT_USED - - /* - * * for each expensive predicate in each path in each - * distinct rel, * consider doing pullup -- JMH - */ - if (XfuncMode != XFUNC_NOPULL && XfuncMode != XFUNC_OFF) - xfunc_trypullup(rel); -#endif - - /* Find and save the cheapest paths for this rel */ - set_cheapest(rel); - -#ifdef OPTIMIZER_DEBUG - debug_print_rel(root, rel); -#endif - } - } - - /* - * We should have a single rel at the final level. - */ - Assert(length(joinitems[levels_needed]) == 1); - - rel = (RelOptInfo *) lfirst(joinitems[levels_needed]); - - return rel; -} - -/***************************************************************************** - * - *****************************************************************************/ - -#ifdef OPTIMIZER_DEBUG - -static void -print_relids(Relids relids) -{ - List *l; - - foreach(l, relids) - { - printf("%d", lfirsti(l)); - if (lnext(l)) - printf(" "); - } -} - -static void -print_restrictclauses(Query *root, List *clauses) -{ - List *l; - - foreach(l, clauses) - { - RestrictInfo *c = lfirst(l); - - print_expr((Node *) c->clause, root->rtable); - if (lnext(l)) - printf(", "); - } -} - -static void -print_path(Query *root, Path *path, int indent) -{ - const char *ptype; - bool join; - int i; - - switch (nodeTag(path)) - { - case T_Path: - ptype = "SeqScan"; - join = false; - break; - case T_IndexPath: - ptype = "IdxScan"; - join = false; - break; - case T_TidPath: - ptype = "TidScan"; - join = false; - break; - case T_NestPath: - ptype = "Nestloop"; - join = true; - break; - case T_MergePath: - ptype = "MergeJoin"; - join = true; - break; - case T_HashPath: - ptype = "HashJoin"; - join = true; - break; - default: - ptype = "???Path"; - join = false; - break; - } - - for (i = 0; i < indent; i++) - printf("\t"); - printf("%s(", ptype); - print_relids(path->parent->relids); - printf(") rows=%.0f cost=%.2f..%.2f\n", - path->parent->rows, path->startup_cost, path->total_cost); - - if (path->pathkeys) - { - for (i = 0; i < indent; i++) - printf("\t"); - printf(" pathkeys: "); - print_pathkeys(path->pathkeys, root->rtable); - } - - if (join) - { - JoinPath *jp = (JoinPath *) path; - - for (i = 0; i < indent; i++) - printf("\t"); - printf(" clauses: "); - print_restrictclauses(root, jp->joinrestrictinfo); - printf("\n"); - - if (nodeTag(path) == T_MergePath) - { - MergePath *mp = (MergePath *) path; - - if (mp->outersortkeys || mp->innersortkeys) - { - for (i = 0; i < indent; i++) - printf("\t"); - printf(" sortouter=%d sortinner=%d\n", - ((mp->outersortkeys) ? 1 : 0), - ((mp->innersortkeys) ? 1 : 0)); - } - } - - print_path(root, jp->outerjoinpath, indent + 1); - print_path(root, jp->innerjoinpath, indent + 1); - } -} - -void -debug_print_rel(Query *root, RelOptInfo *rel) -{ - List *l; - - printf("RELOPTINFO ("); - print_relids(rel->relids); - printf("): rows=%.0f width=%d\n", rel->rows, rel->width); - - if (rel->baserestrictinfo) - { - printf("\tbaserestrictinfo: "); - print_restrictclauses(root, rel->baserestrictinfo); - printf("\n"); - } - - foreach(l, rel->joininfo) - { - JoinInfo *j = (JoinInfo *) lfirst(l); - - printf("\tjoininfo ("); - print_relids(j->unjoined_relids); - printf("): "); - print_restrictclauses(root, j->jinfo_restrictinfo); - printf("\n"); - } - - printf("\tpath list:\n"); - foreach(l, rel->pathlist) - print_path(root, lfirst(l), 1); - printf("\n\tcheapest startup path:\n"); - print_path(root, rel->cheapest_startup_path, 1); - printf("\n\tcheapest total path:\n"); - print_path(root, rel->cheapest_total_path, 1); - printf("\n"); - fflush(stdout); -} - -#endif /* OPTIMIZER_DEBUG */ diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c deleted file mode 100644 index 25350c480cc..00000000000 --- a/src/backend/optimizer/path/clausesel.c +++ /dev/null @@ -1,526 +0,0 @@ -/*------------------------------------------------------------------------- - * - * clausesel.c - * Routines to compute clause selectivities - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.51 2002/06/20 20:29:29 momjian Exp $ - * - *------------------------------------------------------------------------- - */ -#include "postgres.h" - -#include "catalog/pg_operator.h" -#include "catalog/pg_type.h" -#include "nodes/makefuncs.h" -#include "optimizer/clauses.h" -#include "optimizer/cost.h" -#include "optimizer/plancat.h" -#include "optimizer/restrictinfo.h" -#include "parser/parsetree.h" -#include "utils/fmgroids.h" -#include "utils/lsyscache.h" -#include "utils/selfuncs.h" - - -/* note that pg_type.h hardwires size of bool as 1 ... duplicate it */ -#define MAKEBOOLCONST(val,isnull) \ - ((Node *) makeConst(BOOLOID, 1, (Datum) (val), \ - (isnull), true, false, false)) - - -/* - * Data structure for accumulating info about possible range-query - * clause pairs in clauselist_selectivity. - */ -typedef struct RangeQueryClause -{ - struct RangeQueryClause *next; /* next in linked list */ - Node *var; /* The common variable of the clauses */ - bool have_lobound; /* found a low-bound clause yet? */ - bool have_hibound; /* found a high-bound clause yet? */ - Selectivity lobound; /* Selectivity of a var > something clause */ - Selectivity hibound; /* Selectivity of a var < something clause */ -} RangeQueryClause; - -static void addRangeClause(RangeQueryClause **rqlist, Node *clause, - bool varonleft, bool isLTsel, Selectivity s2); - - -/**************************************************************************** - * ROUTINES TO COMPUTE SELECTIVITIES - ****************************************************************************/ - -/* - * restrictlist_selectivity - - * Compute the selectivity of an implicitly-ANDed list of RestrictInfo - * clauses. - * - * This is the same as clauselist_selectivity except for the representation - * of the clause list. - */ -Selectivity -restrictlist_selectivity(Query *root, - List *restrictinfo_list, - int varRelid) -{ - List *clauselist = get_actual_clauses(restrictinfo_list); - Selectivity result; - - result = clauselist_selectivity(root, clauselist, varRelid); - freeList(clauselist); - return result; -} - -/* - * clauselist_selectivity - - * Compute the selectivity of an implicitly-ANDed list of boolean - * expression clauses. The list can be empty, in which case 1.0 - * must be returned. - * - * See clause_selectivity() for the meaning of the varRelid parameter. - * - * Our basic approach is to take the product of the selectivities of the - * subclauses. However, that's only right if the subclauses have independent - * probabilities, and in reality they are often NOT independent. So, - * we want to be smarter where we can. - - * Currently, the only extra smarts we have is to recognize "range queries", - * such as "x > 34 AND x < 42". Clauses are recognized as possible range - * query components if they are restriction opclauses whose operators have - * scalarltsel() or scalargtsel() as their restriction selectivity estimator. - * We pair up clauses of this form that refer to the same variable. An - * unpairable clause of this kind is simply multiplied into the selectivity - * product in the normal way. But when we find a pair, we know that the - * selectivities represent the relative positions of the low and high bounds - * within the column's range, so instead of figuring the selectivity as - * hisel * losel, we can figure it as hisel + losel - 1. (To visualize this, - * see that hisel is the fraction of the range below the high bound, while - * losel is the fraction above the low bound; so hisel can be interpreted - * directly as a 0..1 value but we need to convert losel to 1-losel before - * interpreting it as a value. Then the available range is 1-losel to hisel.) - * If the calculation yields zero or negative, however, we chicken out and - * use a default estimate; that probably means that one or both - * selectivities is a default estimate rather than an actual range value. - * Of course this is all very dependent on the behavior of - * scalarltsel/scalargtsel; perhaps some day we can generalize the approach. - */ -Selectivity -clauselist_selectivity(Query *root, - List *clauses, - int varRelid) -{ - Selectivity s1 = 1.0; - RangeQueryClause *rqlist = NULL; - List *clist; - - /* - * Initial scan over clauses. Anything that doesn't look like a - * potential rangequery clause gets multiplied into s1 and forgotten. - * Anything that does gets inserted into an rqlist entry. - */ - foreach(clist, clauses) - { - Node *clause = (Node *) lfirst(clist); - Selectivity s2; - - /* - * See if it looks like a restriction clause with a pseudoconstant - * on one side. (Anything more complicated than that might not - * behave in the simple way we are expecting.) - * - * NB: for consistency of results, this fragment of code had better - * match what clause_selectivity() would do in the cases it - * handles. - */ - if (is_opclause(clause) && - (varRelid != 0 || NumRelids(clause) == 1)) - { - Expr *expr = (Expr *) clause; - - if (length(expr->args) == 2) - { - bool varonleft = true; - - if (is_pseudo_constant_clause(lsecond(expr->args)) || - (varonleft = false, - is_pseudo_constant_clause(lfirst(expr->args)))) - { - Oid opno = ((Oper *) expr->oper)->opno; - RegProcedure oprrest = get_oprrest(opno); - - s2 = restriction_selectivity(root, opno, - expr->args, varRelid); - - /* - * If we reach here, we have computed the same result - * that clause_selectivity would, so we can just use - * s2 if it's the wrong oprrest. But if it's the - * right oprrest, add the clause to rqlist for later - * processing. - */ - switch (oprrest) - { - case F_SCALARLTSEL: - addRangeClause(&rqlist, clause, - varonleft, true, s2); - break; - case F_SCALARGTSEL: - addRangeClause(&rqlist, clause, - varonleft, false, s2); - break; - default: - /* Just merge the selectivity in generically */ - s1 = s1 * s2; - break; - } - continue; /* drop to loop bottom */ - } - } - } - /* Not the right form, so treat it generically. */ - s2 = clause_selectivity(root, clause, varRelid); - s1 = s1 * s2; - } - - /* - * Now scan the rangequery pair list. - */ - while (rqlist != NULL) - { - RangeQueryClause *rqnext; - - if (rqlist->have_lobound && rqlist->have_hibound) - { - /* Successfully matched a pair of range clauses */ - Selectivity s2 = rqlist->hibound + rqlist->lobound - 1.0; - - /* - * A zero or slightly negative s2 should be converted into a - * small positive value; we probably are dealing with a very - * tight range and got a bogus result due to roundoff errors. - * However, if s2 is very negative, then we probably have - * default selectivity estimates on one or both sides of the - * range. In that case, insert a not-so-wildly-optimistic - * default estimate. - */ - if (s2 <= 0.0) - { - if (s2 < -0.01) - { - /* - * No data available --- use a default estimate that - * is small, but not real small. - */ - s2 = 0.005; - } - else - { - /* - * It's just roundoff error; use a small positive - * value - */ - s2 = 1.0e-10; - } - } - /* Merge in the selectivity of the pair of clauses */ - s1 *= s2; - } - else - { - /* Only found one of a pair, merge it in generically */ - if (rqlist->have_lobound) - s1 *= rqlist->lobound; - else - s1 *= rqlist->hibound; - } - /* release storage and advance */ - rqnext = rqlist->next; - pfree(rqlist); - rqlist = rqnext; - } - - return s1; -} - -/* - * addRangeClause --- add a new range clause for clauselist_selectivity - * - * Here is where we try to match up pairs of range-query clauses - */ -static void -addRangeClause(RangeQueryClause **rqlist, Node *clause, - bool varonleft, bool isLTsel, Selectivity s2) -{ - RangeQueryClause *rqelem; - Node *var; - bool is_lobound; - - if (varonleft) - { - var = (Node *) get_leftop((Expr *) clause); - is_lobound = !isLTsel; /* x < something is high bound */ - } - else - { - var = (Node *) get_rightop((Expr *) clause); - is_lobound = isLTsel; /* something < x is low bound */ - } - - for (rqelem = *rqlist; rqelem; rqelem = rqelem->next) - { - /* - * We use full equal() here because the "var" might be a function - * of one or more attributes of the same relation... - */ - if (!equal(var, rqelem->var)) - continue; - /* Found the right group to put this clause in */ - if (is_lobound) - { - if (!rqelem->have_lobound) - { - rqelem->have_lobound = true; - rqelem->lobound = s2; - } - else - { - - /*------ - * We have found two similar clauses, such as - * x < y AND x < z. - * Keep only the more restrictive one. - *------ - */ - if (rqelem->lobound > s2) - rqelem->lobound = s2; - } - } - else - { - if (!rqelem->have_hibound) - { - rqelem->have_hibound = true; - rqelem->hibound = s2; - } - else - { - - /*------ - * We have found two similar clauses, such as - * x > y AND x > z. - * Keep only the more restrictive one. - *------ - */ - if (rqelem->hibound > s2) - rqelem->hibound = s2; - } - } - return; - } - - /* No matching var found, so make a new clause-pair data structure */ - rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause)); - rqelem->var = var; - if (is_lobound) - { - rqelem->have_lobound = true; - rqelem->have_hibound = false; - rqelem->lobound = s2; - } - else - { - rqelem->have_lobound = false; - rqelem->have_hibound = true; - rqelem->hibound = s2; - } - rqelem->next = *rqlist; - *rqlist = rqelem; -} - - -/* - * clause_selectivity - - * Compute the selectivity of a general boolean expression clause. - * - * varRelid is either 0 or a rangetable index. - * - * When varRelid is not 0, only variables belonging to that relation are - * considered in computing selectivity; other vars are treated as constants - * of unknown values. This is appropriate for estimating the selectivity of - * a join clause that is being used as a restriction clause in a scan of a - * nestloop join's inner relation --- varRelid should then be the ID of the - * inner relation. - * - * When varRelid is 0, all variables are treated as variables. This - * is appropriate for ordinary join clauses and restriction clauses. - */ -Selectivity -clause_selectivity(Query *root, - Node *clause, - int varRelid) -{ - Selectivity s1 = 1.0; /* default for any unhandled clause type */ - - if (clause == NULL) - return s1; - if (IsA(clause, Var)) - { - Var *var = (Var *) clause; - - /* - * We probably shouldn't ever see an uplevel Var here, but if we - * do, return the default selectivity... - */ - if (var->varlevelsup == 0 && - (varRelid == 0 || varRelid == (int) var->varno)) - { - RangeTblEntry *rte = rt_fetch(var->varno, root->rtable); - - if (rte->rtekind == RTE_SUBQUERY) - { - /* - * XXX not smart about subquery references... any way to - * do better? - */ - s1 = 0.5; - } - else - { - /* - * A Var at the top of a clause must be a bool Var. This - * is equivalent to the clause reln.attribute = 't', so we - * compute the selectivity as if that is what we have. - */ - s1 = restriction_selectivity(root, - BooleanEqualOperator, - makeList2(var, - MAKEBOOLCONST(true, - false)), - varRelid); - } - } - } - else if (IsA(clause, Param)) - { - /* XXX any way to do better? */ - s1 = 1.0; - } - else if (IsA(clause, Const)) - { - /* bool constant is pretty easy... */ - s1 = ((bool) ((Const *) clause)->constvalue) ? 1.0 : 0.0; - } - else if (not_clause(clause)) - { - /* inverse of the selectivity of the underlying clause */ - s1 = 1.0 - clause_selectivity(root, - (Node *) get_notclausearg((Expr *) clause), - varRelid); - } - else if (and_clause(clause)) - { - /* share code with clauselist_selectivity() */ - s1 = clauselist_selectivity(root, - ((Expr *) clause)->args, - varRelid); - } - else if (or_clause(clause)) - { - /* - * Selectivities for an 'or' clause are computed as s1+s2 - s1*s2 - * to account for the probable overlap of selected tuple sets. XXX - * is this too conservative? - */ - List *arg; - - s1 = 0.0; - foreach(arg, ((Expr *) clause)->args) - { - Selectivity s2 = clause_selectivity(root, - (Node *) lfirst(arg), - varRelid); - - s1 = s1 + s2 - s1 * s2; - } - } - else if (is_opclause(clause)) - { - Oid opno = ((Oper *) ((Expr *) clause)->oper)->opno; - bool is_join_clause; - - if (varRelid != 0) - { - /* - * If we are considering a nestloop join then all clauses are - * restriction clauses, since we are only interested in the - * one relation. - */ - is_join_clause = false; - } - else - { - /* - * Otherwise, it's a join if there's more than one relation - * used. - */ - is_join_clause = (NumRelids(clause) > 1); - } - - if (is_join_clause) - { - /* Estimate selectivity for a join clause. */ - s1 = join_selectivity(root, opno, - ((Expr *) clause)->args); - } - else - { - /* Estimate selectivity for a restriction clause. */ - s1 = restriction_selectivity(root, opno, - ((Expr *) clause)->args, varRelid); - } - } - else if (is_funcclause(clause)) - { - /* - * This is not an operator, so we guess at the selectivity. THIS - * IS A HACK TO GET V4 OUT THE DOOR. FUNCS SHOULD BE ABLE TO HAVE - * SELECTIVITIES THEMSELVES. -- JMH 7/9/92 - */ - s1 = (Selectivity) 0.3333333; - } - else if (is_subplan(clause)) - { - /* - * Just for the moment! FIX ME! - vadim 02/04/98 - */ - s1 = (Selectivity) 0.5; - } - else if (IsA(clause, NullTest)) - { - /* Use node specific selectivity calculation function */ - s1 = nulltestsel(root, (NullTest *) clause, varRelid); - } - else if (IsA(clause, BooleanTest)) - { - /* Use node specific selectivity calculation function */ - s1 = booltestsel(root, (BooleanTest *) clause, varRelid); - } - else if (IsA(clause, RelabelType)) - { - /* Not sure this case is needed, but it can't hurt */ - s1 = clause_selectivity(root, - ((RelabelType *) clause)->arg, - varRelid); - } - -#ifdef SELECTIVITY_DEBUG - elog(DEBUG3, "clause_selectivity: s1 %f", s1); -#endif /* SELECTIVITY_DEBUG */ - - return s1; -} diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c deleted file mode 100644 index 3bf6d2f4dc8..00000000000 --- a/src/backend/optimizer/path/costsize.c +++ /dev/null @@ -1,1458 +0,0 @@ -/*------------------------------------------------------------------------- - * - * costsize.c - * Routines to compute (and set) relation sizes and path costs - * - * Path costs are measured in units of disk accesses: one sequential page - * fetch has cost 1. All else is scaled relative to a page fetch, using - * the scaling parameters - * - * random_page_cost Cost of a non-sequential page fetch - * cpu_tuple_cost Cost of typical CPU time to process a tuple - * cpu_index_tuple_cost Cost of typical CPU time to process an index tuple - * cpu_operator_cost Cost of CPU time to process a typical WHERE operator - * - * We also use a rough estimate "effective_cache_size" of the number of - * disk pages in Postgres + OS-level disk cache. (We can't simply use - * NBuffers for this purpose because that would ignore the effects of - * the kernel's disk cache.) - * - * Obviously, taking constants for these values is an oversimplification, - * but it's tough enough to get any useful estimates even at this level of - * detail. Note that all of these parameters are user-settable, in case - * the default values are drastically off for a particular platform. - * - * We compute two separate costs for each path: - * total_cost: total estimated cost to fetch all tuples - * startup_cost: cost that is expended before first tuple is fetched - * In some scenarios, such as when there is a LIMIT or we are implementing - * an EXISTS(...) sub-select, it is not necessary to fetch all tuples of the - * path's result. A caller can estimate the cost of fetching a partial - * result by interpolating between startup_cost and total_cost. In detail: - * actual_cost = startup_cost + - * (total_cost - startup_cost) * tuples_to_fetch / path->parent->rows; - * Note that a base relation's rows count (and, by extension, plan_rows for - * plan nodes below the LIMIT node) are set without regard to any LIMIT, so - * that this equation works properly. (Also, these routines guarantee not to - * set the rows count to zero, so there will be no zero divide.) The LIMIT is - * applied as a top-level plan node. - * - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.85 2002/06/20 20:29:29 momjian Exp $ - * - *------------------------------------------------------------------------- - */ - -#include "postgres.h" - -#include <math.h> - -#include "catalog/pg_statistic.h" -#include "executor/nodeHash.h" -#include "miscadmin.h" -#include "optimizer/clauses.h" -#include "optimizer/cost.h" -#include "optimizer/pathnode.h" -#include "parser/parsetree.h" -#include "utils/selfuncs.h" -#include "utils/lsyscache.h" -#include "utils/syscache.h" - - -#define LOG2(x) (log(x) / 0.693147180559945) -#define LOG6(x) (log(x) / 1.79175946922805) - - -double effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE; -double random_page_cost = DEFAULT_RANDOM_PAGE_COST; -double cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST; -double cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST; -double cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST; - -Cost disable_cost = 100000000.0; - -bool enable_seqscan = true; -bool enable_indexscan = true; -bool enable_tidscan = true; -bool enable_sort = true; -bool enable_nestloop = true; -bool enable_mergejoin = true; -bool enable_hashjoin = true; - - -static Selectivity estimate_hash_bucketsize(Query *root, Var *var); -static bool cost_qual_eval_walker(Node *node, Cost *total); -static Selectivity approx_selectivity(Query *root, List *quals); -static void set_rel_width(Query *root, RelOptInfo *rel); -static double relation_byte_size(double tuples, int width); -static double page_size(double tuples, int width); - - -/* - * cost_seqscan - * Determines and returns the cost of scanning a relation sequentially. - * - * Note: for historical reasons, this routine and the others in this module - * use the passed result Path only to store their startup_cost and total_cost - * results into. All the input data they need is passed as separate - * parameters, even though much of it could be extracted from the Path. - */ -void -cost_seqscan(Path *path, Query *root, - RelOptInfo *baserel) -{ - Cost startup_cost = 0; - Cost run_cost = 0; - Cost cpu_per_tuple; - - /* Should only be applied to base relations */ - Assert(length(baserel->relids) == 1); - Assert(baserel->rtekind == RTE_RELATION); - - if (!enable_seqscan) - startup_cost += disable_cost; - - /* - * disk costs - * - * The cost of reading a page sequentially is 1.0, by definition. Note - * that the Unix kernel will typically do some amount of read-ahead - * optimization, so that this cost is less than the true cost of - * reading a page from disk. We ignore that issue here, but must take - * it into account when estimating the cost of non-sequential - * accesses! - */ - run_cost += baserel->pages; /* sequential fetches with cost 1.0 */ - - /* CPU costs */ - cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost; - run_cost += cpu_per_tuple * baserel->tuples; - - path->startup_cost = startup_cost; - path->total_cost = startup_cost + run_cost; -} - -/* - * cost_nonsequential_access - * Estimate the cost of accessing one page at random from a relation - * (or sort temp file) of the given size in pages. - * - * The simplistic model that the cost is random_page_cost is what we want - * to use for large relations; but for small ones that is a serious - * overestimate because of the effects of caching. This routine tries to - * account for that. - * - * Unfortunately we don't have any good way of estimating the effective cache - * size we are working with --- we know that Postgres itself has NBuffers - * internal buffers, but the size of the kernel's disk cache is uncertain, - * and how much of it we get to use is even less certain. We punt the problem - * for now by assuming we are given an effective_cache_size parameter. - * - * Given a guesstimated cache size, we estimate the actual I/O cost per page - * with the entirely ad-hoc equations: - * for rel_size <= effective_cache_size: - * 1 + (random_page_cost/2-1) * (rel_size/effective_cache_size) ** 2 - * for rel_size >= effective_cache_size: - * random_page_cost * (1 - (effective_cache_size/rel_size)/2) - * These give the right asymptotic behavior (=> 1.0 as rel_size becomes - * small, => random_page_cost as it becomes large) and meet in the middle - * with the estimate that the cache is about 50% effective for a relation - * of the same size as effective_cache_size. (XXX this is probably all - * wrong, but I haven't been able to find any theory about how effective - * a disk cache should be presumed to be.) - */ -static Cost -cost_nonsequential_access(double relpages) -{ - double relsize; - - /* don't crash on bad input data */ - if (relpages <= 0.0 || effective_cache_size <= 0.0) - return random_page_cost; - - relsize = relpages / effective_cache_size; - - if (relsize >= 1.0) - return random_page_cost * (1.0 - 0.5 / relsize); - else - return 1.0 + (random_page_cost * 0.5 - 1.0) * relsize * relsize; -} - -/* - * cost_index - * Determines and returns the cost of scanning a relation using an index. - * - * NOTE: an indexscan plan node can actually represent several passes, - * but here we consider the cost of just one pass. - * - * 'root' is the query root - * 'baserel' is the base relation the index is for - * 'index' is the index to be used - * 'indexQuals' is the list of applicable qual clauses (implicit AND semantics) - * 'is_injoin' is T if we are considering using the index scan as the inside - * of a nestloop join (hence, some of the indexQuals are join clauses) - * - * NOTE: 'indexQuals' must contain only clauses usable as index restrictions. - * Any additional quals evaluated as qpquals may reduce the number of returned - * tuples, but they won't reduce the number of tuples we have to fetch from - * the table, so they don't reduce the scan cost. - */ -void -cost_index(Path *path, Query *root, - RelOptInfo *baserel, - IndexOptInfo *index, - List *indexQuals, - bool is_injoin) -{ - Cost startup_cost = 0; - Cost run_cost = 0; - Cost indexStartupCost; - Cost indexTotalCost; - Selectivity indexSelectivity; - double indexCorrelation, - csquared; - Cost min_IO_cost, - max_IO_cost; - Cost cpu_per_tuple; - double tuples_fetched; - double pages_fetched; - double T, - b; - - /* Should only be applied to base relations */ - Assert(IsA(baserel, RelOptInfo) && - IsA(index, IndexOptInfo)); - Assert(length(baserel->relids) == 1); - Assert(baserel->rtekind == RTE_RELATION); - - if (!enable_indexscan && !is_injoin) - startup_cost += disable_cost; - - /* - * Call index-access-method-specific code to estimate the processing - * cost for scanning the index, as well as the selectivity of the - * index (ie, the fraction of main-table tuples we will have to - * retrieve) and its correlation to the main-table tuple order. - */ - OidFunctionCall8(index->amcostestimate, - PointerGetDatum(root), - PointerGetDatum(baserel), - PointerGetDatum(index), - PointerGetDatum(indexQuals), - PointerGetDatum(&indexStartupCost), - PointerGetDatum(&indexTotalCost), - PointerGetDatum(&indexSelectivity), - PointerGetDatum(&indexCorrelation)); - - /* all costs for touching index itself included here */ - startup_cost += indexStartupCost; - run_cost += indexTotalCost - indexStartupCost; - - /*---------- - * Estimate number of main-table tuples and pages fetched. - * - * When the index ordering is uncorrelated with the table ordering, - * we use an approximation proposed by Mackert and Lohman, "Index Scans - * Using a Finite LRU Buffer: A Validated I/O Model", ACM Transactions - * on Database Systems, Vol. 14, No. 3, September 1989, Pages 401-424. - * The Mackert and Lohman approximation is that the number of pages - * fetched is - * PF = - * min(2TNs/(2T+Ns), T) when T <= b - * 2TNs/(2T+Ns) when T > b and Ns <= 2Tb/(2T-b) - * b + (Ns - 2Tb/(2T-b))*(T-b)/T when T > b and Ns > 2Tb/(2T-b) - * where - * T = # pages in table - * N = # tuples in table - * s = selectivity = fraction of table to be scanned - * b = # buffer pages available (we include kernel space here) - * - * When the index ordering is exactly correlated with the table ordering - * (just after a CLUSTER, for example), the number of pages fetched should - * be just sT. What's more, these will be sequential fetches, not the - * random fetches that occur in the uncorrelated case. So, depending on - * the extent of correlation, we should estimate the actual I/O cost - * somewhere between s * T * 1.0 and PF * random_cost. We currently - * interpolate linearly between these two endpoints based on the - * correlation squared (XXX is that appropriate?). - * - * In any case the number of tuples fetched is Ns. - *---------- - */ - - tuples_fetched = indexSelectivity * baserel->tuples; - /* Don't believe estimates less than 1... */ - if (tuples_fetched < 1.0) - tuples_fetched = 1.0; - - /* This part is the Mackert and Lohman formula */ - - T = (baserel->pages > 1) ? (double) baserel->pages : 1.0; - b = (effective_cache_size > 1) ? effective_cache_size : 1.0; - - if (T <= b) - { - pages_fetched = - (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched); - if (pages_fetched > T) - pages_fetched = T; - } - else - { - double lim; - - lim = (2.0 * T * b) / (2.0 * T - b); - if (tuples_fetched <= lim) - { - pages_fetched = - (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched); - } - else - { - pages_fetched = - b + (tuples_fetched - lim) * (T - b) / T; - } - } - - /* - * min_IO_cost corresponds to the perfectly correlated case - * (csquared=1), max_IO_cost to the perfectly uncorrelated case - * (csquared=0). Note that we just charge random_page_cost per page - * in the uncorrelated case, rather than using - * cost_nonsequential_access, since we've already accounted for - * caching effects by using the Mackert model. - */ - min_IO_cost = ceil(indexSelectivity * T); - max_IO_cost = pages_fetched * random_page_cost; - - /* - * Now interpolate based on estimated index order correlation to get - * total disk I/O cost for main table accesses. - */ - csquared = indexCorrelation * indexCorrelation; - - run_cost += max_IO_cost + csquared * (min_IO_cost - max_IO_cost); - - /* - * Estimate CPU costs per tuple. - * - * Normally the indexquals will be removed from the list of restriction - * clauses that we have to evaluate as qpquals, so we should subtract - * their costs from baserestrictcost. XXX For a lossy index, not all - * the quals will be removed and so we really shouldn't subtract their - * costs; but detecting that seems more expensive than it's worth. - * Also, if we are doing a join then some of the indexquals are join - * clauses and shouldn't be subtracted. Rather than work out exactly - * how much to subtract, we don't subtract anything. - */ - cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost; - - if (!is_injoin) - cpu_per_tuple -= cost_qual_eval(indexQuals); - - run_cost += cpu_per_tuple * tuples_fetched; - - path->startup_cost = startup_cost; - path->total_cost = startup_cost + run_cost; -} - -/* - * cost_tidscan - * Determines and returns the cost of scanning a relation using TIDs. - */ -void -cost_tidscan(Path *path, Query *root, - RelOptInfo *baserel, List *tideval) -{ - Cost startup_cost = 0; - Cost run_cost = 0; - Cost cpu_per_tuple; - int ntuples = length(tideval); - - /* Should only be applied to base relations */ - Assert(length(baserel->relids) == 1); - Assert(baserel->rtekind == RTE_RELATION); - - if (!enable_tidscan) - startup_cost += disable_cost; - - /* disk costs --- assume each tuple on a different page */ - run_cost += random_page_cost * ntuples; - - /* CPU costs */ - cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost; - run_cost += cpu_per_tuple * ntuples; - - path->startup_cost = startup_cost; - path->total_cost = startup_cost + run_cost; -} - -/* - * cost_functionscan - * Determines and returns the cost of scanning a function RTE. - */ -void -cost_functionscan(Path *path, Query *root, RelOptInfo *baserel) -{ - Cost startup_cost = 0; - Cost run_cost = 0; - Cost cpu_per_tuple; - - /* Should only be applied to base relations that are functions */ - Assert(length(baserel->relids) == 1); - Assert(baserel->rtekind == RTE_FUNCTION); - - /* - * For now, estimate function's cost at one operator eval per function - * call. Someday we should revive the function cost estimate columns in - * pg_proc... - */ - cpu_per_tuple = cpu_operator_cost; - - /* Add scanning CPU costs */ - cpu_per_tuple += cpu_tuple_cost + baserel->baserestrictcost; - run_cost += cpu_per_tuple * baserel->tuples; - - path->startup_cost = startup_cost; - path->total_cost = startup_cost + run_cost; -} - -/* - * cost_sort - * Determines and returns the cost of sorting a relation. - * - * The cost of supplying the input data is NOT included; the caller should - * add that cost to both startup and total costs returned from this routine! - * - * If the total volume of data to sort is less than SortMem, we will do - * an in-memory sort, which requires no I/O and about t*log2(t) tuple - * comparisons for t tuples. - * - * If the total volume exceeds SortMem, we switch to a tape-style merge - * algorithm. There will still be about t*log2(t) tuple comparisons in - * total, but we will also need to write and read each tuple once per - * merge pass. We expect about ceil(log6(r)) merge passes where r is the - * number of initial runs formed (log6 because tuplesort.c uses six-tape - * merging). Since the average initial run should be about twice SortMem, - * we have - * disk traffic = 2 * relsize * ceil(log6(p / (2*SortMem))) - * cpu = comparison_cost * t * log2(t) - * - * The disk traffic is assumed to be half sequential and half random - * accesses (XXX can't we refine that guess?) - * - * We charge two operator evals per tuple comparison, which should be in - * the right ballpark in most cases. - * - * 'pathkeys' is a list of sort keys - * 'tuples' is the number of tuples in the relation - * 'width' is the average tuple width in bytes - * - * NOTE: some callers currently pass NIL for pathkeys because they - * can't conveniently supply the sort keys. Since this routine doesn't - * currently do anything with pathkeys anyway, that doesn't matter... - * but if it ever does, it should react gracefully to lack of key data. - */ -void -cost_sort(Path *path, Query *root, - List *pathkeys, double tuples, int width) -{ - Cost startup_cost = 0; - Cost run_cost = 0; - double nbytes = relation_byte_size(tuples, width); - long sortmembytes = SortMem * 1024L; - - if (!enable_sort) - startup_cost += disable_cost; - - /* - * We want to be sure the cost of a sort is never estimated as zero, - * even if passed-in tuple count is zero. Besides, mustn't do - * log(0)... - */ - if (tuples < 2.0) - tuples = 2.0; - - /* - * CPU costs - * - * Assume about two operator evals per tuple comparison and N log2 N - * comparisons - */ - startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(tuples); - - /* disk costs */ - if (nbytes > sortmembytes) - { - double npages = ceil(nbytes / BLCKSZ); - double nruns = nbytes / (sortmembytes * 2); - double log_runs = ceil(LOG6(nruns)); - double npageaccesses; - - if (log_runs < 1.0) - log_runs = 1.0; - npageaccesses = 2.0 * npages * log_runs; - /* Assume half are sequential (cost 1), half are not */ - startup_cost += npageaccesses * - (1.0 + cost_nonsequential_access(npages)) * 0.5; - } - - /* - * Also charge a small amount (arbitrarily set equal to operator cost) - * per extracted tuple. - */ - run_cost += cpu_operator_cost * tuples; - - path->startup_cost = startup_cost; - path->total_cost = startup_cost + run_cost; -} - - -/* - * cost_nestloop - * Determines and returns the cost of joining two relations using the - * nested loop algorithm. - * - * 'outer_path' is the path for the outer relation - * 'inner_path' is the path for the inner relation - * 'restrictlist' are the RestrictInfo nodes to be applied at the join - */ -void -cost_nestloop(Path *path, Query *root, - Path *outer_path, - Path *inner_path, - List *restrictlist) -{ - Cost startup_cost = 0; - Cost run_cost = 0; - Cost cpu_per_tuple; - double ntuples; - - if (!enable_nestloop) - startup_cost += disable_cost; - - /* cost of source data */ - - /* - * NOTE: clearly, we must pay both outer and inner paths' startup_cost - * before we can start returning tuples, so the join's startup cost is - * their sum. What's not so clear is whether the inner path's - * startup_cost must be paid again on each rescan of the inner path. - * This is not true if the inner path is materialized, but probably is - * true otherwise. Since we don't yet have clean handling of the - * decision whether to materialize a path, we can't tell here which - * will happen. As a compromise, charge 50% of the inner startup cost - * for each restart. - */ - startup_cost += outer_path->startup_cost + inner_path->startup_cost; - run_cost += outer_path->total_cost - outer_path->startup_cost; - run_cost += outer_path->parent->rows * - (inner_path->total_cost - inner_path->startup_cost); - if (outer_path->parent->rows > 1) - run_cost += (outer_path->parent->rows - 1) * - inner_path->startup_cost * 0.5; - - /* - * Number of tuples processed (not number emitted!). If inner path is - * an indexscan, be sure to use its estimated output row count, which - * may be lower than the restriction-clause-only row count of its - * parent. - */ - if (IsA(inner_path, IndexPath)) - ntuples = ((IndexPath *) inner_path)->rows; - else - ntuples = inner_path->parent->rows; - ntuples *= outer_path->parent->rows; - - /* CPU costs */ - cpu_per_tuple = cpu_tuple_cost + cost_qual_eval(restrictlist); - run_cost += cpu_per_tuple * ntuples; - - path->startup_cost = startup_cost; - path->total_cost = startup_cost + run_cost; -} - -/* - * cost_mergejoin - * Determines and returns the cost of joining two relations using the - * merge join algorithm. - * - * 'outer_path' is the path for the outer relation - * 'inner_path' is the path for the inner relation - * 'restrictlist' are the RestrictInfo nodes to be applied at the join - * 'mergeclauses' are the RestrictInfo nodes to use as merge clauses - * (this should be a subset of the restrictlist) - * 'outersortkeys' and 'innersortkeys' are lists of the keys to be used - * to sort the outer and inner relations, or NIL if no explicit - * sort is needed because the source path is already ordered - */ -void -cost_mergejoin(Path *path, Query *root, - Path *outer_path, - Path *inner_path, - List *restrictlist, - List *mergeclauses, - List *outersortkeys, - List *innersortkeys) -{ - Cost startup_cost = 0; - Cost run_cost = 0; - Cost cpu_per_tuple; - RestrictInfo *firstclause; - Var *leftvar; - double outer_rows, - inner_rows; - double ntuples; - Selectivity outerscansel, - innerscansel; - Path sort_path; /* dummy for result of cost_sort */ - - if (!enable_mergejoin) - startup_cost += disable_cost; - - /* - * A merge join will stop as soon as it exhausts either input stream. - * Estimate fraction of the left and right inputs that will actually - * need to be scanned. We use only the first (most significant) - * merge clause for this purpose. - * - * Since this calculation is somewhat expensive, and will be the same - * for all mergejoin paths associated with the merge clause, we cache - * the results in the RestrictInfo node. - */ - firstclause = (RestrictInfo *) lfirst(mergeclauses); - if (firstclause->left_mergescansel < 0) /* not computed yet? */ - mergejoinscansel(root, (Node *) firstclause->clause, - &firstclause->left_mergescansel, - &firstclause->right_mergescansel); - - leftvar = get_leftop(firstclause->clause); - Assert(IsA(leftvar, Var)); - if (VARISRELMEMBER(leftvar->varno, outer_path->parent)) - { - /* left side of clause is outer */ - outerscansel = firstclause->left_mergescansel; - innerscansel = firstclause->right_mergescansel; - } - else - { - /* left side of clause is inner */ - outerscansel = firstclause->right_mergescansel; - innerscansel = firstclause->left_mergescansel; - } - - outer_rows = outer_path->parent->rows * outerscansel; - inner_rows = inner_path->parent->rows * innerscansel; - - /* cost of source data */ - - /* - * Note we are assuming that each source tuple is fetched just once, - * which is not right in the presence of equal keys. If we had a way - * of estimating the proportion of equal keys, we could apply a - * correction factor... - */ - if (outersortkeys) /* do we need to sort outer? */ - { - startup_cost += outer_path->total_cost; - cost_sort(&sort_path, - root, - outersortkeys, - outer_path->parent->rows, - outer_path->parent->width); - startup_cost += sort_path.startup_cost; - run_cost += (sort_path.total_cost - sort_path.startup_cost) - * outerscansel; - } - else - { - startup_cost += outer_path->startup_cost; - run_cost += (outer_path->total_cost - outer_path->startup_cost) - * outerscansel; - } - - if (innersortkeys) /* do we need to sort inner? */ - { - startup_cost += inner_path->total_cost; - cost_sort(&sort_path, - root, - innersortkeys, - inner_path->parent->rows, - inner_path->parent->width); - startup_cost += sort_path.startup_cost; - run_cost += (sort_path.total_cost - sort_path.startup_cost) - * innerscansel; - } - else - { - startup_cost += inner_path->startup_cost; - run_cost += (inner_path->total_cost - inner_path->startup_cost) - * innerscansel; - } - - /* - * The number of tuple comparisons needed depends drastically on the - * number of equal keys in the two source relations, which we have no - * good way of estimating. (XXX could the MCV statistics help?) - * Somewhat arbitrarily, we charge one tuple - * comparison (one cpu_operator_cost) for each tuple in the two source - * relations. This is probably a lower bound. - */ - run_cost += cpu_operator_cost * (outer_rows + inner_rows); - - /* - * For each tuple that gets through the mergejoin proper, we charge - * cpu_tuple_cost plus the cost of evaluating additional restriction - * clauses that are to be applied at the join. It's OK to use an - * approximate selectivity here, since in most cases this is a minor - * component of the cost. NOTE: it's correct to use the unscaled rows - * counts here, not the scaled-down counts we obtained above. - */ - ntuples = approx_selectivity(root, mergeclauses) * - outer_path->parent->rows * inner_path->parent->rows; - - /* CPU costs */ - cpu_per_tuple = cpu_tuple_cost + cost_qual_eval(restrictlist); - run_cost += cpu_per_tuple * ntuples; - - path->startup_cost = startup_cost; - path->total_cost = startup_cost + run_cost; -} - -/* - * cost_hashjoin - * Determines and returns the cost of joining two relations using the - * hash join algorithm. - * - * 'outer_path' is the path for the outer relation - * 'inner_path' is the path for the inner relation - * 'restrictlist' are the RestrictInfo nodes to be applied at the join - * 'hashclauses' is a list of the hash join clause (always a 1-element list) - * (this should be a subset of the restrictlist) - */ -void -cost_hashjoin(Path *path, Query *root, - Path *outer_path, - Path *inner_path, - List *restrictlist, - List *hashclauses) -{ - Cost startup_cost = 0; - Cost run_cost = 0; - Cost cpu_per_tuple; - double ntuples; - double outerbytes = relation_byte_size(outer_path->parent->rows, - outer_path->parent->width); - double innerbytes = relation_byte_size(inner_path->parent->rows, - inner_path->parent->width); - long hashtablebytes = SortMem * 1024L; - RestrictInfo *restrictinfo; - Var *left, - *right; - Selectivity innerbucketsize; - - if (!enable_hashjoin) - startup_cost += disable_cost; - - /* cost of source data */ - startup_cost += outer_path->startup_cost; - run_cost += outer_path->total_cost - outer_path->startup_cost; - startup_cost += inner_path->total_cost; - - /* cost of computing hash function: must do it once per input tuple */ - startup_cost += cpu_operator_cost * inner_path->parent->rows; - run_cost += cpu_operator_cost * outer_path->parent->rows; - - /* - * Determine bucketsize fraction for inner relation. First we have to - * figure out which side of the hashjoin clause is the inner side. - */ - Assert(length(hashclauses) == 1); - Assert(IsA(lfirst(hashclauses), RestrictInfo)); - restrictinfo = (RestrictInfo *) lfirst(hashclauses); - /* these must be OK, since check_hashjoinable accepted the clause */ - left = get_leftop(restrictinfo->clause); - right = get_rightop(restrictinfo->clause); - - /* - * Since we tend to visit the same clauses over and over when planning - * a large query, we cache the bucketsize estimate in the RestrictInfo - * node to avoid repeated lookups of statistics. - */ - if (VARISRELMEMBER(right->varno, inner_path->parent)) - { - /* righthand side is inner */ - innerbucketsize = restrictinfo->right_bucketsize; - if (innerbucketsize < 0) - { - /* not cached yet */ - innerbucketsize = estimate_hash_bucketsize(root, right); - restrictinfo->right_bucketsize = innerbucketsize; - } - } - else - { - Assert(VARISRELMEMBER(left->varno, inner_path->parent)); - /* lefthand side is inner */ - innerbucketsize = restrictinfo->left_bucketsize; - if (innerbucketsize < 0) - { - /* not cached yet */ - innerbucketsize = estimate_hash_bucketsize(root, left); - restrictinfo->left_bucketsize = innerbucketsize; - } - } - - /* - * The number of tuple comparisons needed is the number of outer - * tuples times the typical number of tuples in a hash bucket, which - * is the inner relation size times its bucketsize fraction. We charge - * one cpu_operator_cost per tuple comparison. - */ - run_cost += cpu_operator_cost * outer_path->parent->rows * - ceil(inner_path->parent->rows * innerbucketsize); - - /* - * For each tuple that gets through the hashjoin proper, we charge - * cpu_tuple_cost plus the cost of evaluating additional restriction - * clauses that are to be applied at the join. It's OK to use an - * approximate selectivity here, since in most cases this is a minor - * component of the cost. - */ - ntuples = approx_selectivity(root, hashclauses) * - outer_path->parent->rows * inner_path->parent->rows; - - /* CPU costs */ - cpu_per_tuple = cpu_tuple_cost + cost_qual_eval(restrictlist); - run_cost += cpu_per_tuple * ntuples; - - /* - * if inner relation is too big then we will need to "batch" the join, - * which implies writing and reading most of the tuples to disk an - * extra time. Charge one cost unit per page of I/O (correct since it - * should be nice and sequential...). Writing the inner rel counts as - * startup cost, all the rest as run cost. - */ - if (innerbytes > hashtablebytes) - { - double outerpages = page_size(outer_path->parent->rows, - outer_path->parent->width); - double innerpages = page_size(inner_path->parent->rows, - inner_path->parent->width); - - startup_cost += innerpages; - run_cost += innerpages + 2 * outerpages; - } - - /* - * Bias against putting larger relation on inside. We don't want an - * absolute prohibition, though, since larger relation might have - * better bucketsize --- and we can't trust the size estimates - * unreservedly, anyway. Instead, inflate the startup cost by the - * square root of the size ratio. (Why square root? No real good - * reason, but it seems reasonable...) - */ - if (innerbytes > outerbytes && outerbytes > 0) - startup_cost *= sqrt(innerbytes / outerbytes); - - path->startup_cost = startup_cost; - path->total_cost = startup_cost + run_cost; -} - -/* - * Estimate hash bucketsize fraction (ie, number of entries in a bucket - * divided by total tuples in relation) if the specified Var is used - * as a hash key. - * - * XXX This is really pretty bogus since we're effectively assuming that the - * distribution of hash keys will be the same after applying restriction - * clauses as it was in the underlying relation. However, we are not nearly - * smart enough to figure out how the restrict clauses might change the - * distribution, so this will have to do for now. - * - * We can get the number of buckets the executor will use for the given - * input relation. If the data were perfectly distributed, with the same - * number of tuples going into each available bucket, then the bucketsize - * fraction would be 1/nbuckets. But this happy state of affairs will occur - * only if (a) there are at least nbuckets distinct data values, and (b) - * we have a not-too-skewed data distribution. Otherwise the buckets will - * be nonuniformly occupied. If the other relation in the join has a key - * distribution similar to this one's, then the most-loaded buckets are - * exactly those that will be probed most often. Therefore, the "average" - * bucket size for costing purposes should really be taken as something close - * to the "worst case" bucket size. We try to estimate this by adjusting the - * fraction if there are too few distinct data values, and then scaling up - * by the ratio of the most common value's frequency to the average frequency. - * - * If no statistics are available, use a default estimate of 0.1. This will - * discourage use of a hash rather strongly if the inner relation is large, - * which is what we want. We do not want to hash unless we know that the - * inner rel is well-dispersed (or the alternatives seem much worse). - */ -static Selectivity -estimate_hash_bucketsize(Query *root, Var *var) -{ - Oid relid; - RelOptInfo *rel; - int virtualbuckets; - int physicalbuckets; - int numbatches; - HeapTuple tuple; - Form_pg_statistic stats; - double estfract, - ndistinct, - mcvfreq, - avgfreq; - float4 *numbers; - int nnumbers; - - /* - * Lookup info about var's relation and attribute; if none available, - * return default estimate. - */ - if (!IsA(var, Var)) - return 0.1; - - relid = getrelid(var->varno, root->rtable); - if (relid == InvalidOid) - return 0.1; - - rel = find_base_rel(root, var->varno); - - if (rel->tuples <= 0.0 || rel->rows <= 0.0) - return 0.1; /* ensure we can divide below */ - - /* Get hash table size that executor would use for this relation */ - ExecChooseHashTableSize(rel->rows, rel->width, - &virtualbuckets, - &physicalbuckets, - &numbatches); - - tuple = SearchSysCache(STATRELATT, - ObjectIdGetDatum(relid), - Int16GetDatum(var->varattno), - 0, 0); - if (!HeapTupleIsValid(tuple)) - { - /* - * Perhaps the Var is a system attribute; if so, it will have no - * entry in pg_statistic, but we may be able to guess something - * about its distribution anyway. - */ - switch (var->varattno) - { - case ObjectIdAttributeNumber: - case SelfItemPointerAttributeNumber: - /* these are unique, so buckets should be well-distributed */ - return 1.0 / (double) virtualbuckets; - case TableOidAttributeNumber: - /* hashing this is a terrible idea... */ - return 1.0; - } - return 0.1; - } - stats = (Form_pg_statistic) GETSTRUCT(tuple); - - /* - * Obtain number of distinct data values in raw relation. - */ - ndistinct = stats->stadistinct; - if (ndistinct < 0.0) - ndistinct = -ndistinct * rel->tuples; - - if (ndistinct <= 0.0) /* ensure we can divide */ - { - ReleaseSysCache(tuple); - return 0.1; - } - - /* Also compute avg freq of all distinct data values in raw relation */ - avgfreq = (1.0 - stats->stanullfrac) / ndistinct; - - /* - * Adjust ndistinct to account for restriction clauses. Observe we - * are assuming that the data distribution is affected uniformly by - * the restriction clauses! - * - * XXX Possibly better way, but much more expensive: multiply by - * selectivity of rel's restriction clauses that mention the target - * Var. - */ - ndistinct *= rel->rows / rel->tuples; - - /* - * Initial estimate of bucketsize fraction is 1/nbuckets as long as - * the number of buckets is less than the expected number of distinct - * values; otherwise it is 1/ndistinct. - */ - if (ndistinct > (double) virtualbuckets) - estfract = 1.0 / (double) virtualbuckets; - else - estfract = 1.0 / ndistinct; - - /* - * Look up the frequency of the most common value, if available. - */ - mcvfreq = 0.0; - - if (get_attstatsslot(tuple, var->vartype, var->vartypmod, - STATISTIC_KIND_MCV, InvalidOid, - NULL, NULL, &numbers, &nnumbers)) - { - /* - * The first MCV stat is for the most common value. - */ - if (nnumbers > 0) - mcvfreq = numbers[0]; - free_attstatsslot(var->vartype, NULL, 0, - numbers, nnumbers); - } - - /* - * Adjust estimated bucketsize upward to account for skewed - * distribution. - */ - if (avgfreq > 0.0 && mcvfreq > avgfreq) - estfract *= mcvfreq / avgfreq; - - ReleaseSysCache(tuple); - - return (Selectivity) estfract; -} - - -/* - * cost_qual_eval - * Estimate the CPU cost of evaluating a WHERE clause (once). - * The input can be either an implicitly-ANDed list of boolean - * expressions, or a list of RestrictInfo nodes. - */ -Cost -cost_qual_eval(List *quals) -{ - Cost total = 0; - List *l; - - /* We don't charge any cost for the implicit ANDing at top level ... */ - - foreach(l, quals) - { - Node *qual = (Node *) lfirst(l); - - /* - * RestrictInfo nodes contain an eval_cost field reserved for this - * routine's use, so that it's not necessary to evaluate the qual - * clause's cost more than once. If the clause's cost hasn't been - * computed yet, the field will contain -1. - */ - if (qual && IsA(qual, RestrictInfo)) - { - RestrictInfo *restrictinfo = (RestrictInfo *) qual; - - if (restrictinfo->eval_cost < 0) - { - restrictinfo->eval_cost = 0; - cost_qual_eval_walker((Node *) restrictinfo->clause, - &restrictinfo->eval_cost); - } - total += restrictinfo->eval_cost; - } - else - { - /* If it's a bare expression, must always do it the hard way */ - cost_qual_eval_walker(qual, &total); - } - } - return total; -} - -static bool -cost_qual_eval_walker(Node *node, Cost *total) -{ - if (node == NULL) - return false; - - /* - * Our basic strategy is to charge one cpu_operator_cost for each - * operator or function node in the given tree. Vars and Consts are - * charged zero, and so are boolean operators (AND, OR, NOT). - * Simplistic, but a lot better than no model at all. - * - * Should we try to account for the possibility of short-circuit - * evaluation of AND/OR? - */ - if (IsA(node, Expr)) - { - Expr *expr = (Expr *) node; - - switch (expr->opType) - { - case OP_EXPR: - case FUNC_EXPR: - *total += cpu_operator_cost; - break; - case OR_EXPR: - case AND_EXPR: - case NOT_EXPR: - break; - case SUBPLAN_EXPR: - - /* - * A subplan node in an expression indicates that the - * subplan will be executed on each evaluation, so charge - * accordingly. (We assume that sub-selects that can be - * executed as InitPlans have already been removed from - * the expression.) - * - * NOTE: this logic should agree with the estimates used by - * make_subplan() in plan/subselect.c. - */ - { - SubPlan *subplan = (SubPlan *) expr->oper; - Plan *plan = subplan->plan; - Cost subcost; - - if (subplan->sublink->subLinkType == EXISTS_SUBLINK) - { - /* we only need to fetch 1 tuple */ - subcost = plan->startup_cost + - (plan->total_cost - plan->startup_cost) / plan->plan_rows; - } - else if (subplan->sublink->subLinkType == ALL_SUBLINK || - subplan->sublink->subLinkType == ANY_SUBLINK) - { - /* assume we need 50% of the tuples */ - subcost = plan->startup_cost + - 0.50 * (plan->total_cost - plan->startup_cost); - /* XXX what if subplan has been materialized? */ - } - else - { - /* assume we need all tuples */ - subcost = plan->total_cost; - } - *total += subcost; - } - break; - } - /* fall through to examine args of Expr node */ - } - return expression_tree_walker(node, cost_qual_eval_walker, - (void *) total); -} - - -/* - * approx_selectivity - * Quick-and-dirty estimation of clause selectivities. - * The input can be either an implicitly-ANDed list of boolean - * expressions, or a list of RestrictInfo nodes (typically the latter). - * - * The "quick" part comes from caching the selectivity estimates so we can - * avoid recomputing them later. (Since the same clauses are typically - * examined over and over in different possible join trees, this makes a - * big difference.) - * - * The "dirty" part comes from the fact that the selectivities of multiple - * clauses are estimated independently and multiplied together. Now - * clauselist_selectivity often can't do any better than that anyhow, but - * for some situations (such as range constraints) it is smarter. - * - * Since we are only using the results to estimate how many potential - * output tuples are generated and passed through qpqual checking, it - * seems OK to live with the approximation. - */ -static Selectivity -approx_selectivity(Query *root, List *quals) -{ - Selectivity total = 1.0; - List *l; - - foreach(l, quals) - { - Node *qual = (Node *) lfirst(l); - Selectivity selec; - - /* - * RestrictInfo nodes contain a this_selec field reserved for this - * routine's use, so that it's not necessary to evaluate the qual - * clause's selectivity more than once. If the clause's - * selectivity hasn't been computed yet, the field will contain - * -1. - */ - if (qual && IsA(qual, RestrictInfo)) - { - RestrictInfo *restrictinfo = (RestrictInfo *) qual; - - if (restrictinfo->this_selec < 0) - restrictinfo->this_selec = - clause_selectivity(root, - (Node *) restrictinfo->clause, - 0); - selec = restrictinfo->this_selec; - } - else - { - /* If it's a bare expression, must always do it the hard way */ - selec = clause_selectivity(root, qual, 0); - } - total *= selec; - } - return total; -} - - -/* - * set_baserel_size_estimates - * Set the size estimates for the given base relation. - * - * The rel's targetlist and restrictinfo list must have been constructed - * already. - * - * We set the following fields of the rel node: - * rows: the estimated number of output tuples (after applying - * restriction clauses). - * width: the estimated average output tuple width in bytes. - * baserestrictcost: estimated cost of evaluating baserestrictinfo clauses. - */ -void -set_baserel_size_estimates(Query *root, RelOptInfo *rel) -{ - /* Should only be applied to base relations */ - Assert(length(rel->relids) == 1); - - rel->rows = rel->tuples * - restrictlist_selectivity(root, - rel->baserestrictinfo, - lfirsti(rel->relids)); - - /* - * Force estimate to be at least one row, to make explain output look - * better and to avoid possible divide-by-zero when interpolating - * cost. - */ - if (rel->rows < 1.0) - rel->rows = 1.0; - - rel->baserestrictcost = cost_qual_eval(rel->baserestrictinfo); - - set_rel_width(root, rel); -} - -/* - * set_joinrel_size_estimates - * Set the size estimates for the given join relation. - * - * The rel's targetlist must have been constructed already, and a - * restriction clause list that matches the given component rels must - * be provided. - * - * Since there is more than one way to make a joinrel for more than two - * base relations, the results we get here could depend on which component - * rel pair is provided. In theory we should get the same answers no matter - * which pair is provided; in practice, since the selectivity estimation - * routines don't handle all cases equally well, we might not. But there's - * not much to be done about it. (Would it make sense to repeat the - * calculations for each pair of input rels that's encountered, and somehow - * average the results? Probably way more trouble than it's worth.) - * - * We set the same relnode fields as set_baserel_size_estimates() does. - */ -void -set_joinrel_size_estimates(Query *root, RelOptInfo *rel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel, - JoinType jointype, - List *restrictlist) -{ - double temp; - - /* Start with the Cartesian product */ - temp = outer_rel->rows * inner_rel->rows; - - /* - * Apply join restrictivity. Note that we are only considering - * clauses that become restriction clauses at this join level; we are - * not double-counting them because they were not considered in - * estimating the sizes of the component rels. - */ - temp *= restrictlist_selectivity(root, - restrictlist, - 0); - - /* - * If we are doing an outer join, take that into account: the output - * must be at least as large as the non-nullable input. (Is there any - * chance of being even smarter?) - */ - switch (jointype) - { - case JOIN_INNER: - break; - case JOIN_LEFT: - if (temp < outer_rel->rows) - temp = outer_rel->rows; - break; - case JOIN_RIGHT: - if (temp < inner_rel->rows) - temp = inner_rel->rows; - break; - case JOIN_FULL: - if (temp < outer_rel->rows) - temp = outer_rel->rows; - if (temp < inner_rel->rows) - temp = inner_rel->rows; - break; - default: - elog(ERROR, "set_joinrel_size_estimates: unsupported join type %d", - (int) jointype); - break; - } - - /* - * Force estimate to be at least one row, to make explain output look - * better and to avoid possible divide-by-zero when interpolating - * cost. - */ - if (temp < 1.0) - temp = 1.0; - - rel->rows = temp; - - /* - * We could apply set_rel_width() to compute the output tuple width - * from scratch, but at present it's always just the sum of the input - * widths, so why work harder than necessary? If relnode.c is ever - * taught to remove unneeded columns from join targetlists, go back to - * using set_rel_width here. - */ - rel->width = outer_rel->width + inner_rel->width; -} - -/* - * set_function_size_estimates - * Set the size estimates for a base relation that is a function call. - * - * The rel's targetlist and restrictinfo list must have been constructed - * already. - * - * We set the following fields of the rel node: - * rows: the estimated number of output tuples (after applying - * restriction clauses). - * width: the estimated average output tuple width in bytes. - * baserestrictcost: estimated cost of evaluating baserestrictinfo clauses. - */ -void -set_function_size_estimates(Query *root, RelOptInfo *rel) -{ - /* Should only be applied to base relations that are functions */ - Assert(length(rel->relids) == 1); - Assert(rel->rtekind == RTE_FUNCTION); - - /* - * Estimate number of rows the function itself will return. - * - * XXX no idea how to do this yet; but should at least check whether - * function returns set or not... - */ - rel->tuples = 1000; - - /* Now estimate number of output rows */ - rel->rows = rel->tuples * - restrictlist_selectivity(root, - rel->baserestrictinfo, - lfirsti(rel->relids)); - - /* - * Force estimate to be at least one row, to make explain output look - * better and to avoid possible divide-by-zero when interpolating - * cost. - */ - if (rel->rows < 1.0) - rel->rows = 1.0; - - rel->baserestrictcost = cost_qual_eval(rel->baserestrictinfo); - - set_rel_width(root, rel); -} - - -/* - * set_rel_width - * Set the estimated output width of the relation. - * - * NB: this works best on base relations because it prefers to look at - * real Vars. It will fail to make use of pg_statistic info when applied - * to a subquery relation, even if the subquery outputs are simple vars - * that we could have gotten info for. Is it worth trying to be smarter - * about subqueries? - */ -static void -set_rel_width(Query *root, RelOptInfo *rel) -{ - int32 tuple_width = 0; - List *tllist; - - foreach(tllist, rel->targetlist) - { - TargetEntry *tle = (TargetEntry *) lfirst(tllist); - int32 item_width; - - /* - * If it's a Var, try to get statistical info from pg_statistic. - */ - if (tle->expr && IsA(tle->expr, Var)) - { - Var *var = (Var *) tle->expr; - Oid relid; - - relid = getrelid(var->varno, root->rtable); - if (relid != InvalidOid) - { - item_width = get_attavgwidth(relid, var->varattno); - if (item_width > 0) - { - tuple_width += item_width; - continue; - } - } - } - - /* - * Not a Var, or can't find statistics for it. Estimate using - * just the type info. - */ - item_width = get_typavgwidth(tle->resdom->restype, - tle->resdom->restypmod); - Assert(item_width > 0); - tuple_width += item_width; - } - Assert(tuple_width >= 0); - rel->width = tuple_width; -} - -/* - * relation_byte_size - * Estimate the storage space in bytes for a given number of tuples - * of a given width (size in bytes). - */ -static double -relation_byte_size(double tuples, int width) -{ - return tuples * ((double) MAXALIGN(width + sizeof(HeapTupleData))); -} - -/* - * page_size - * Returns an estimate of the number of pages covered by a given - * number of tuples of a given width (size in bytes). - */ -static double -page_size(double tuples, int width) -{ - return ceil(relation_byte_size(tuples, width) / BLCKSZ); -} diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c deleted file mode 100644 index baeed1f34b2..00000000000 --- a/src/backend/optimizer/path/indxpath.c +++ /dev/null @@ -1,2188 +0,0 @@ -/*------------------------------------------------------------------------- - * - * indxpath.c - * Routines to determine which indices are usable for scanning a - * given relation, and create IndexPaths accordingly. - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.119 2002/06/20 20:29:29 momjian Exp $ - * - *------------------------------------------------------------------------- - */ -#include "postgres.h" - -#include <math.h> - -#include "access/heapam.h" -#include "access/nbtree.h" -#include "catalog/catname.h" -#include "catalog/pg_amop.h" -#include "catalog/pg_namespace.h" -#include "catalog/pg_operator.h" -#include "executor/executor.h" -#include "nodes/makefuncs.h" -#include "nodes/nodeFuncs.h" -#include "optimizer/clauses.h" -#include "optimizer/cost.h" -#include "optimizer/pathnode.h" -#include "optimizer/paths.h" -#include "optimizer/restrictinfo.h" -#include "optimizer/var.h" -#include "parser/parse_coerce.h" -#include "parser/parse_expr.h" -#include "parser/parse_oper.h" -#include "utils/builtins.h" -#include "utils/fmgroids.h" -#include "utils/lsyscache.h" -#include "utils/selfuncs.h" -#include "utils/syscache.h" - - -/* - * DoneMatchingIndexKeys() - MACRO - * - * Determine whether we should continue matching index keys in a clause. - * Depends on if there are more to match or if this is a functional index. - * In the latter case we stop after the first match since there can - * be only 1 key (i.e. the function's return value) and the attributes in - * keys list represent the arguments to the function. -mer 3 Oct. 1991 - */ -#define DoneMatchingIndexKeys(indexkeys, index) \ - (indexkeys[0] == 0 || \ - (index->indproc != InvalidOid)) - -#define is_indexable_operator(clause,opclass,indexkey_on_left) \ - (indexable_operator(clause,opclass,indexkey_on_left) != InvalidOid) - - -static void match_index_orclauses(RelOptInfo *rel, IndexOptInfo *index, - List *restrictinfo_list); -static List *match_index_orclause(RelOptInfo *rel, IndexOptInfo *index, - List *or_clauses, - List *other_matching_indices); -static bool match_or_subclause_to_indexkey(RelOptInfo *rel, - IndexOptInfo *index, - Expr *clause); -static List *group_clauses_by_indexkey(RelOptInfo *rel, IndexOptInfo *index, - int *indexkeys, Oid *classes, - List *restrictinfo_list); -static List *group_clauses_by_ikey_for_joins(RelOptInfo *rel, - IndexOptInfo *index, - int *indexkeys, Oid *classes, - List *join_cinfo_list, - List *restr_cinfo_list); -static bool match_clause_to_indexkey(RelOptInfo *rel, IndexOptInfo *index, - int indexkey, Oid opclass, - Expr *clause, bool join); -static bool pred_test(List *predicate_list, List *restrictinfo_list, - List *joininfo_list); -static bool pred_test_restrict_list(Expr *predicate, List *restrictinfo_list); -static bool pred_test_recurse_clause(Expr *predicate, Node *clause); -static bool pred_test_recurse_pred(Expr *predicate, Node *clause); -static bool pred_test_simple_clause(Expr *predicate, Node *clause); -static void indexable_joinclauses(RelOptInfo *rel, IndexOptInfo *index, - List *joininfo_list, List *restrictinfo_list, - List **clausegroups, List **outerrelids); -static List *index_innerjoin(Query *root, RelOptInfo *rel, IndexOptInfo *index, - List *clausegroup_list, List *outerrelids_list); -static bool match_index_to_operand(int indexkey, Var *operand, - RelOptInfo *rel, IndexOptInfo *index); -static bool function_index_operand(Expr *funcOpnd, RelOptInfo *rel, - IndexOptInfo *index); -static bool match_special_index_operator(Expr *clause, Oid opclass, - bool indexkey_on_left); -static List *prefix_quals(Var *leftop, Oid expr_op, - char *prefix, Pattern_Prefix_Status pstatus); -static List *network_prefix_quals(Var *leftop, Oid expr_op, Datum rightop); -static Oid find_operator(const char *opname, Oid datatype); -static Datum string_to_datum(const char *str, Oid datatype); -static Const *string_to_const(const char *str, Oid datatype); - - -/* - * create_index_paths() - * Generate all interesting index paths for the given relation. - * Candidate paths are added to the rel's pathlist (using add_path). - * Additional IndexPath nodes may also be added to rel's innerjoin list. - * - * To be considered for an index scan, an index must match one or more - * restriction clauses or join clauses from the query's qual condition, - * or match the query's ORDER BY condition. - * - * There are two basic kinds of index scans. A "plain" index scan uses - * only restriction clauses (possibly none at all) in its indexqual, - * so it can be applied in any context. An "innerjoin" index scan uses - * join clauses (plus restriction clauses, if available) in its indexqual. - * Therefore it can only be used as the inner relation of a nestloop - * join against an outer rel that includes all the other rels mentioned - * in its join clauses. In that context, values for the other rels' - * attributes are available and fixed during any one scan of the indexpath. - * - * An IndexPath is generated and submitted to add_path() for each index - * this routine deems potentially interesting for the current query. - * An innerjoin path is also generated for each interesting combination of - * outer join relations. The innerjoin paths are *not* passed to add_path(), - * but are appended to the "innerjoin" list of the relation for later - * consideration in nested-loop joins. - * - * 'rel' is the relation for which we want to generate index paths - */ -void -create_index_paths(Query *root, RelOptInfo *rel) -{ - List *restrictinfo_list = rel->baserestrictinfo; - List *joininfo_list = rel->joininfo; - List *ilist; - - foreach(ilist, rel->indexlist) - { - IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist); - List *restrictclauses; - List *index_pathkeys; - List *useful_pathkeys; - bool index_is_ordered; - List *joinclausegroups; - List *joinouterrelids; - - /* - * If this is a partial index, we can only use it if it passes the - * predicate test. - */ - if (index->indpred != NIL) - if (!pred_test(index->indpred, restrictinfo_list, joininfo_list)) - continue; - - /* - * 1. Try matching the index against subclauses of restriction - * 'or' clauses (ie, 'or' clauses that reference only this - * relation). The restrictinfo nodes for the 'or' clauses are - * marked with lists of the matching indices. No paths are - * actually created now; that will be done in orindxpath.c after - * all indexes for the rel have been examined. (We need to do it - * that way because we can potentially use a different index for - * each subclause of an 'or', so we can't build a path for an 'or' - * clause until all indexes have been matched against it.) - * - * We don't even think about special handling of 'or' clauses that - * involve more than one relation (ie, are join clauses). Can we - * do anything useful with those? - */ - match_index_orclauses(rel, index, restrictinfo_list); - - /* - * 2. Match the index against non-'or' restriction clauses. - */ - restrictclauses = group_clauses_by_indexkey(rel, - index, - index->indexkeys, - index->classlist, - restrictinfo_list); - - /* - * 3. Compute pathkeys describing index's ordering, if any, then - * see how many of them are actually useful for this query. - */ - index_pathkeys = build_index_pathkeys(root, rel, index, - ForwardScanDirection); - index_is_ordered = (index_pathkeys != NIL); - useful_pathkeys = truncate_useless_pathkeys(root, rel, - index_pathkeys); - - /* - * 4. Generate an indexscan path if there are relevant restriction - * clauses OR the index ordering is potentially useful for later - * merging or final output ordering. - * - * If there is a predicate, consider it anyway since the index - * predicate has already been found to match the query. The - * selectivity of the predicate might alone make the index useful. - */ - if (restrictclauses != NIL || - useful_pathkeys != NIL || - index->indpred != NIL) - add_path(rel, (Path *) - create_index_path(root, rel, index, - restrictclauses, - useful_pathkeys, - index_is_ordered ? - ForwardScanDirection : - NoMovementScanDirection)); - - /* - * 5. If the index is ordered, a backwards scan might be - * interesting. Currently this is only possible for a DESC query - * result ordering. - */ - if (index_is_ordered) - { - index_pathkeys = build_index_pathkeys(root, rel, index, - BackwardScanDirection); - useful_pathkeys = truncate_useless_pathkeys(root, rel, - index_pathkeys); - if (useful_pathkeys != NIL) - add_path(rel, (Path *) - create_index_path(root, rel, index, - restrictclauses, - useful_pathkeys, - BackwardScanDirection)); - } - - /* - * 6. Create an innerjoin index path for each combination of other - * rels used in available join clauses. These paths will be - * considered as the inner side of nestloop joins against those - * sets of other rels. indexable_joinclauses() finds sets of - * clauses that can be used with each combination of outer rels, - * and index_innerjoin builds the paths themselves. We add the - * paths to the rel's innerjoin list, NOT to the result list. - */ - indexable_joinclauses(rel, index, - joininfo_list, restrictinfo_list, - &joinclausegroups, - &joinouterrelids); - if (joinclausegroups != NIL) - { - rel->innerjoin = nconc(rel->innerjoin, - index_innerjoin(root, rel, index, - joinclausegroups, - joinouterrelids)); - } - } -} - - -/**************************************************************************** - * ---- ROUTINES TO PROCESS 'OR' CLAUSES ---- - ****************************************************************************/ - - -/* - * match_index_orclauses - * Attempt to match an index against subclauses within 'or' clauses. - * Each subclause that does match is marked with the index's node. - * - * Essentially, this adds 'index' to the list of subclause indices in - * the RestrictInfo field of each of the 'or' clauses where it matches. - * NOTE: we can use storage in the RestrictInfo for this purpose because - * this processing is only done on single-relation restriction clauses. - * Therefore, we will never have indexes for more than one relation - * mentioned in the same RestrictInfo node's list. - * - * 'rel' is the node of the relation on which the index is defined. - * 'index' is the index node. - * 'restrictinfo_list' is the list of available restriction clauses. - */ -static void -match_index_orclauses(RelOptInfo *rel, - IndexOptInfo *index, - List *restrictinfo_list) -{ - List *i; - - foreach(i, restrictinfo_list) - { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i); - - if (restriction_is_or_clause(restrictinfo)) - { - /* - * Add this index to the subclause index list for each - * subclause that it matches. - */ - restrictinfo->subclauseindices = - match_index_orclause(rel, index, - restrictinfo->clause->args, - restrictinfo->subclauseindices); - } - } -} - -/* - * match_index_orclause - * Attempts to match an index against the subclauses of an 'or' clause. - * - * A match means that: - * (1) the operator within the subclause can be used with the - * index's specified operator class, and - * (2) one operand of the subclause matches the index key. - * - * If a subclause is an 'and' clause, then it matches if any of its - * subclauses is an opclause that matches. - * - * 'or_clauses' is the list of subclauses within the 'or' clause - * 'other_matching_indices' is the list of information on other indices - * that have already been matched to subclauses within this - * particular 'or' clause (i.e., a list previously generated by - * this routine), or NIL if this routine has not previously been - * run for this 'or' clause. - * - * Returns a list of the form ((a b c) (d e f) nil (g h) ...) where - * a,b,c are nodes of indices that match the first subclause in - * 'or-clauses', d,e,f match the second subclause, no indices - * match the third, g,h match the fourth, etc. - */ -static List * -match_index_orclause(RelOptInfo *rel, - IndexOptInfo *index, - List *or_clauses, - List *other_matching_indices) -{ - List *matching_indices; - List *index_list; - List *clist; - - /* - * first time through, we create list of same length as OR clause, - * containing an empty sublist for each subclause. - */ - if (!other_matching_indices) - { - matching_indices = NIL; - foreach(clist, or_clauses) - matching_indices = lcons(NIL, matching_indices); - } - else - matching_indices = other_matching_indices; - - index_list = matching_indices; - - foreach(clist, or_clauses) - { - Expr *clause = lfirst(clist); - - if (match_or_subclause_to_indexkey(rel, index, clause)) - { - /* OK to add this index to sublist for this subclause */ - lfirst(matching_indices) = lcons(index, - lfirst(matching_indices)); - } - - matching_indices = lnext(matching_indices); - } - - return index_list; -} - -/* - * See if a subclause of an OR clause matches an index. - * - * We accept the subclause if it is an operator clause that matches the - * index, or if it is an AND clause any of whose members is an opclause - * that matches the index. - * - * For multi-key indexes, we only look for matches to the first key; - * without such a match the index is useless. If the clause is an AND - * then we may be able to extract additional subclauses to use with the - * later indexkeys, but we need not worry about that until - * extract_or_indexqual_conditions() is called (if it ever is). - */ -static bool -match_or_subclause_to_indexkey(RelOptInfo *rel, - IndexOptInfo *index, - Expr *clause) -{ - int indexkey = index->indexkeys[0]; - Oid opclass = index->classlist[0]; - - if (and_clause((Node *) clause)) - { - List *item; - - foreach(item, clause->args) - { - if (match_clause_to_indexkey(rel, index, indexkey, opclass, - lfirst(item), false)) - return true; - } - return false; - } - else - return match_clause_to_indexkey(rel, index, indexkey, opclass, - clause, false); -} - -/*---------- - * Given an OR subclause that has previously been determined to match - * the specified index, extract a list of specific opclauses that can be - * used as indexquals. - * - * In the simplest case this just means making a one-element list of the - * given opclause. However, if the OR subclause is an AND, we have to - * scan it to find the opclause(s) that match the index. (There should - * be at least one, if match_or_subclause_to_indexkey succeeded, but there - * could be more.) - * - * Also, we can look at other restriction clauses of the rel to discover - * additional candidate indexquals: for example, consider - * ... where (a = 11 or a = 12) and b = 42; - * If we are dealing with an index on (a,b) then we can include the clause - * b = 42 in the indexqual list generated for each of the OR subclauses. - * Essentially, we are making an index-specific transformation from CNF to - * DNF. (NOTE: when we do this, we end up with a slightly inefficient plan - * because create_indexscan_plan is not very bright about figuring out which - * restriction clauses are implied by the generated indexqual condition. - * Currently we'll end up rechecking both the OR clause and the transferred - * restriction clause as qpquals. FIXME someday.) - * - * Also, we apply expand_indexqual_conditions() to convert any special - * matching opclauses to indexable operators. - * - * The passed-in clause is not changed. - *---------- - */ -List * -extract_or_indexqual_conditions(RelOptInfo *rel, - IndexOptInfo *index, - Expr *orsubclause) -{ - List *quals = NIL; - int *indexkeys = index->indexkeys; - Oid *classes = index->classlist; - - /* - * Extract relevant indexclauses in indexkey order. This is - * essentially just like group_clauses_by_indexkey() except that the - * input and output are lists of bare clauses, not of RestrictInfo - * nodes. - */ - do - { - int curIndxKey = indexkeys[0]; - Oid curClass = classes[0]; - List *clausegroup = NIL; - List *item; - - if (and_clause((Node *) orsubclause)) - { - foreach(item, orsubclause->args) - { - Expr *subsubclause = (Expr *) lfirst(item); - - if (match_clause_to_indexkey(rel, index, - curIndxKey, curClass, - subsubclause, false)) - clausegroup = lappend(clausegroup, subsubclause); - } - } - else if (match_clause_to_indexkey(rel, index, - curIndxKey, curClass, - orsubclause, false)) - clausegroup = makeList1(orsubclause); - - /* - * If we found no clauses for this indexkey in the OR subclause - * itself, try looking in the rel's top-level restriction list. - */ - if (clausegroup == NIL) - { - foreach(item, rel->baserestrictinfo) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(item); - - if (match_clause_to_indexkey(rel, index, - curIndxKey, curClass, - rinfo->clause, false)) - clausegroup = lappend(clausegroup, rinfo->clause); - } - } - - /* - * If still no clauses match this key, we're done; we don't want - * to look at keys to its right. - */ - if (clausegroup == NIL) - break; - - quals = nconc(quals, clausegroup); - - indexkeys++; - classes++; - } while (!DoneMatchingIndexKeys(indexkeys, index)); - - if (quals == NIL) - elog(ERROR, "extract_or_indexqual_conditions: no matching clause"); - - return expand_indexqual_conditions(quals); -} - - -/**************************************************************************** - * ---- ROUTINES TO CHECK RESTRICTIONS ---- - ****************************************************************************/ - - -/* - * group_clauses_by_indexkey - * Generates a list of restriction clauses that can be used with an index. - * - * 'rel' is the node of the relation itself. - * 'index' is a index on 'rel'. - * 'indexkeys' are the index keys to be matched. - * 'classes' are the classes of the index operators on those keys. - * 'restrictinfo_list' is the list of available restriction clauses for 'rel'. - * - * Returns a list of all the RestrictInfo nodes for clauses that can be - * used with this index. - * - * The list is ordered by index key. (This is not depended on by any part - * of the planner, as far as I can tell; but some parts of the executor - * do assume that the indxqual list ultimately delivered to the executor - * is so ordered. One such place is _bt_orderkeys() in the btree support. - * Perhaps that ought to be fixed someday --- tgl 7/00) - * - * Note that in a multi-key index, we stop if we find a key that cannot be - * used with any clause. For example, given an index on (A,B,C), we might - * return (C1 C2 C3 C4) if we find that clauses C1 and C2 use column A, - * clauses C3 and C4 use column B, and no clauses use column C. But if - * no clauses match B we will return (C1 C2), whether or not there are - * clauses matching column C, because the executor couldn't use them anyway. - */ -static List * -group_clauses_by_indexkey(RelOptInfo *rel, - IndexOptInfo *index, - int *indexkeys, - Oid *classes, - List *restrictinfo_list) -{ - List *clausegroup_list = NIL; - - if (restrictinfo_list == NIL || indexkeys[0] == 0) - return NIL; - - do - { - int curIndxKey = indexkeys[0]; - Oid curClass = classes[0]; - List *clausegroup = NIL; - List *curCinfo; - - foreach(curCinfo, restrictinfo_list) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(curCinfo); - - if (match_clause_to_indexkey(rel, - index, - curIndxKey, - curClass, - rinfo->clause, - false)) - clausegroup = lappend(clausegroup, rinfo); - } - - /* - * If no clauses match this key, we're done; we don't want to look - * at keys to its right. - */ - if (clausegroup == NIL) - break; - - clausegroup_list = nconc(clausegroup_list, clausegroup); - - indexkeys++; - classes++; - - } while (!DoneMatchingIndexKeys(indexkeys, index)); - - /* clausegroup_list holds all matched clauses ordered by indexkeys */ - return clausegroup_list; -} - -/* - * group_clauses_by_ikey_for_joins - * Generates a list of join clauses that can be used with an index - * to scan the inner side of a nestloop join. - * - * This is much like group_clauses_by_indexkey(), but we consider both - * join and restriction clauses. For each indexkey in the index, we - * accept both join and restriction clauses that match it, since both - * will make useful indexquals if the index is being used to scan the - * inner side of a nestloop join. But there must be at least one matching - * join clause, or we return NIL indicating that this index isn't useful - * for nestloop joining. - */ -static List * -group_clauses_by_ikey_for_joins(RelOptInfo *rel, - IndexOptInfo *index, - int *indexkeys, - Oid *classes, - List *join_cinfo_list, - List *restr_cinfo_list) -{ - List *clausegroup_list = NIL; - bool jfound = false; - - if (join_cinfo_list == NIL || indexkeys[0] == 0) - return NIL; - - do - { - int curIndxKey = indexkeys[0]; - Oid curClass = classes[0]; - List *clausegroup = NIL; - List *curCinfo; - - foreach(curCinfo, join_cinfo_list) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(curCinfo); - - if (match_clause_to_indexkey(rel, - index, - curIndxKey, - curClass, - rinfo->clause, - true)) - { - clausegroup = lappend(clausegroup, rinfo); - jfound = true; - } - } - foreach(curCinfo, restr_cinfo_list) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(curCinfo); - - if (match_clause_to_indexkey(rel, - index, - curIndxKey, - curClass, - rinfo->clause, - false)) - clausegroup = lappend(clausegroup, rinfo); - } - - /* - * If no clauses match this key, we're done; we don't want to look - * at keys to its right. - */ - if (clausegroup == NIL) - break; - - clausegroup_list = nconc(clausegroup_list, clausegroup); - - indexkeys++; - classes++; - - } while (!DoneMatchingIndexKeys(indexkeys, index)); - - /* - * if no join clause was matched then there ain't clauses for joins at - * all. - */ - if (!jfound) - { - freeList(clausegroup_list); - return NIL; - } - - /* clausegroup_list holds all matched clauses ordered by indexkeys */ - return clausegroup_list; -} - - -/* - * match_clause_to_indexkey() - * Determines whether a restriction or join clause matches - * a key of an index. - * - * To match, the clause: - * - * (1a) for a restriction clause: must be in the form (indexkey op const) - * or (const op indexkey), or - * (1b) for a join clause: must be in the form (indexkey op others) - * or (others op indexkey), where others is an expression involving - * only vars of the other relation(s); and - * (2) must contain an operator which is in the same class as the index - * operator for this key, or is a "special" operator as recognized - * by match_special_index_operator(). - * - * Presently, the executor can only deal with indexquals that have the - * indexkey on the left, so we can only use clauses that have the indexkey - * on the right if we can commute the clause to put the key on the left. - * We do not actually do the commuting here, but we check whether a - * suitable commutator operator is available. - * - * Note that in the join case, we already know that the clause as a - * whole uses vars from the interesting set of relations. But we need - * to defend against expressions like (a.f1 OP (b.f2 OP a.f3)); that's - * not processable by an indexscan nestloop join, whereas - * (a.f1 OP (b.f2 OP c.f3)) is. - * - * 'rel' is the relation of interest. - * 'index' is an index on 'rel'. - * 'indexkey' is a key of 'index'. - * 'opclass' is the corresponding operator class. - * 'clause' is the clause to be tested. - * 'join' is true if we are considering this clause for joins. - * - * Returns true if the clause can be used with this index key. - * - * NOTE: returns false if clause is an OR or AND clause; it is the - * responsibility of higher-level routines to cope with those. - */ -static bool -match_clause_to_indexkey(RelOptInfo *rel, - IndexOptInfo *index, - int indexkey, - Oid opclass, - Expr *clause, - bool join) -{ - Var *leftop, - *rightop; - - /* Clause must be a binary opclause. */ - if (!is_opclause((Node *) clause)) - return false; - leftop = get_leftop(clause); - rightop = get_rightop(clause); - if (!leftop || !rightop) - return false; - - if (!join) - { - /* - * Not considering joins, so check for clauses of the form: - * (indexkey operator constant) or (constant operator indexkey). - * Anything that is a "pseudo constant" expression will do. - */ - - if (match_index_to_operand(indexkey, leftop, rel, index) && - is_pseudo_constant_clause((Node *) rightop)) - { - if (is_indexable_operator(clause, opclass, true)) - return true; - - /* - * If we didn't find a member of the index's opclass, see - * whether it is a "special" indexable operator. - */ - if (match_special_index_operator(clause, opclass, true)) - return true; - return false; - } - if (match_index_to_operand(indexkey, rightop, rel, index) && - is_pseudo_constant_clause((Node *) leftop)) - { - if (is_indexable_operator(clause, opclass, false)) - return true; - - /* - * If we didn't find a member of the index's opclass, see - * whether it is a "special" indexable operator. - */ - if (match_special_index_operator(clause, opclass, false)) - return true; - return false; - } - } - else - { - /* - * Check for an indexqual that could be handled by a nestloop - * join. We need the index key to be compared against an - * expression that uses none of the indexed relation's vars and - * contains no volatile functions. - */ - if (match_index_to_operand(indexkey, leftop, rel, index)) - { - List *othervarnos = pull_varnos((Node *) rightop); - bool isIndexable; - - isIndexable = - !intMember(lfirsti(rel->relids), othervarnos) && - !contain_volatile_functions((Node *) rightop) && - is_indexable_operator(clause, opclass, true); - freeList(othervarnos); - return isIndexable; - } - else if (match_index_to_operand(indexkey, rightop, rel, index)) - { - List *othervarnos = pull_varnos((Node *) leftop); - bool isIndexable; - - isIndexable = - !intMember(lfirsti(rel->relids), othervarnos) && - !contain_volatile_functions((Node *) leftop) && - is_indexable_operator(clause, opclass, false); - freeList(othervarnos); - return isIndexable; - } - } - - return false; -} - -/* - * indexable_operator - * Does a binary opclause contain an operator matching the index opclass? - * - * If the indexkey is on the right, what we actually want to know - * is whether the operator has a commutator operator that matches - * the index's opclass. - * - * We try both the straightforward match and matches that rely on - * recognizing binary-compatible datatypes. For example, if we have - * an expression like "oid = 123", the operator will be oideqint4, - * which we need to replace with oideq in order to recognize it as - * matching an oid_ops index on the oid field. A variant case is where - * the expression is like "oid::int4 = 123", where the given operator - * will be int4eq and again we need to intuit that we want to use oideq. - * - * Returns the OID of the matching operator, or InvalidOid if no match. - * Note that the returned OID will be different from the one in the given - * expression if we used a binary-compatible substitution. Also note that - * if indexkey_on_left is FALSE (meaning we need to commute), the returned - * OID is *not* commuted; it can be plugged directly into the given clause. - */ -Oid -indexable_operator(Expr *clause, Oid opclass, bool indexkey_on_left) -{ - Oid expr_op = ((Oper *) clause->oper)->opno; - Oid commuted_op, - new_op; - Operator oldoptup; - Form_pg_operator oldopform; - char *opname; - Oid ltype, - rtype, - indexkeytype; - - /* Get the commuted operator if necessary */ - if (indexkey_on_left) - commuted_op = expr_op; - else - commuted_op = get_commutator(expr_op); - if (commuted_op == InvalidOid) - return InvalidOid; - - /* Done if the (commuted) operator is a member of the index's opclass */ - if (op_in_opclass(commuted_op, opclass)) - return expr_op; - - /* - * Maybe the index uses a binary-compatible operator set. - * - * Get the nominal input types of the given operator and the actual type - * (before binary-compatible relabeling) of the index key. - */ - oldoptup = SearchSysCache(OPEROID, - ObjectIdGetDatum(expr_op), - 0, 0, 0); - if (!HeapTupleIsValid(oldoptup)) - return InvalidOid; /* probably can't happen */ - oldopform = (Form_pg_operator) GETSTRUCT(oldoptup); - opname = pstrdup(NameStr(oldopform->oprname)); - ltype = oldopform->oprleft; - rtype = oldopform->oprright; - ReleaseSysCache(oldoptup); - - if (indexkey_on_left) - { - Node *leftop = (Node *) get_leftop(clause); - - if (leftop && IsA(leftop, RelabelType)) - leftop = ((RelabelType *) leftop)->arg; - indexkeytype = exprType(leftop); - } - else - { - Node *rightop = (Node *) get_rightop(clause); - - if (rightop && IsA(rightop, RelabelType)) - rightop = ((RelabelType *) rightop)->arg; - indexkeytype = exprType(rightop); - } - - /* - * Make sure we have different but binary-compatible types. - */ - if (ltype == indexkeytype && rtype == indexkeytype) - return InvalidOid; /* no chance for a different operator */ - if (!IsBinaryCompatible(ltype, indexkeytype)) - return InvalidOid; - if (!IsBinaryCompatible(rtype, indexkeytype)) - return InvalidOid; - - /* - * OK, look for operator of the same name with the indexkey's data - * type. (In theory this might find a non-semantically-comparable - * operator, but in practice that seems pretty unlikely for - * binary-compatible types.) - */ - new_op = compatible_oper_opid(makeList1(makeString(opname)), - indexkeytype, indexkeytype, true); - - if (OidIsValid(new_op)) - { - if (new_op != expr_op) - { - /* - * OK, we found a binary-compatible operator of the same name; - * now does it match the index? - */ - if (indexkey_on_left) - commuted_op = new_op; - else - commuted_op = get_commutator(new_op); - if (commuted_op == InvalidOid) - return InvalidOid; - - if (op_in_opclass(commuted_op, opclass)) - return new_op; - } - } - - return InvalidOid; -} - -/**************************************************************************** - * ---- ROUTINES TO DO PARTIAL INDEX PREDICATE TESTS ---- - ****************************************************************************/ - -/* - * pred_test - * Does the "predicate inclusion test" for partial indexes. - * - * Recursively checks whether the clauses in restrictinfo_list imply - * that the given predicate is true. - * - * This routine (together with the routines it calls) iterates over - * ANDs in the predicate first, then reduces the qualification - * clauses down to their constituent terms, and iterates over ORs - * in the predicate last. This order is important to make the test - * succeed whenever possible (assuming the predicate has been converted - * to CNF format). --Nels, Jan '93 - */ -static bool -pred_test(List *predicate_list, List *restrictinfo_list, List *joininfo_list) -{ - List *pred; - - /* - * Note: if Postgres tried to optimize queries by forming equivalence - * classes over equi-joined attributes (i.e., if it recognized that a - * qualification such as "where a.b=c.d and a.b=5" could make use of - * an index on c.d), then we could use that equivalence class info - * here with joininfo_list to do more complete tests for the usability - * of a partial index. For now, the test only uses restriction - * clauses (those in restrictinfo_list). --Nels, Dec '92 - * - * XXX as of 7.1, equivalence class info *is* available. Consider - * improving this code as foreseen by Nels. - */ - - if (predicate_list == NIL) - return true; /* no predicate: the index is usable */ - if (restrictinfo_list == NIL) - return false; /* no restriction clauses: the test must - * fail */ - - foreach(pred, predicate_list) - { - /* - * if any clause is not implied, the whole predicate is not - * implied. Note we assume that any sub-ANDs have been flattened - * when the predicate was fed through canonicalize_qual(). - */ - if (!pred_test_restrict_list(lfirst(pred), restrictinfo_list)) - return false; - } - return true; -} - - -/* - * pred_test_restrict_list - * Does the "predicate inclusion test" for one conjunct of a predicate - * expression. - */ -static bool -pred_test_restrict_list(Expr *predicate, List *restrictinfo_list) -{ - List *item; - - foreach(item, restrictinfo_list) - { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(item); - - /* if any clause implies the predicate, return true */ - if (pred_test_recurse_clause(predicate, - (Node *) restrictinfo->clause)) - return true; - } - return false; -} - - -/* - * pred_test_recurse_clause - * Does the "predicate inclusion test" for a general restriction-clause - * expression. Here we recursively deal with the possibility that the - * restriction clause is itself an AND or OR structure. - */ -static bool -pred_test_recurse_clause(Expr *predicate, Node *clause) -{ - List *items, - *item; - - Assert(clause != NULL); - if (or_clause(clause)) - { - items = ((Expr *) clause)->args; - foreach(item, items) - { - /* if any OR item doesn't imply the predicate, clause doesn't */ - if (!pred_test_recurse_clause(predicate, lfirst(item))) - return false; - } - return true; - } - else if (and_clause(clause)) - { - items = ((Expr *) clause)->args; - foreach(item, items) - { - /* - * if any AND item implies the predicate, the whole clause - * does - */ - if (pred_test_recurse_clause(predicate, lfirst(item))) - return true; - } - return false; - } - else - return pred_test_recurse_pred(predicate, clause); -} - - -/* - * pred_test_recurse_pred - * Does the "predicate inclusion test" for one conjunct of a predicate - * expression for a simple restriction clause. Here we recursively deal - * with the possibility that the predicate conjunct is itself an AND or - * OR structure. - */ -static bool -pred_test_recurse_pred(Expr *predicate, Node *clause) -{ - List *items, - *item; - - Assert(predicate != NULL); - if (or_clause((Node *) predicate)) - { - items = predicate->args; - foreach(item, items) - { - /* if any item is implied, the whole predicate is implied */ - if (pred_test_recurse_pred(lfirst(item), clause)) - return true; - } - return false; - } - else if (and_clause((Node *) predicate)) - { - items = predicate->args; - foreach(item, items) - { - /* - * if any item is not implied, the whole predicate is not - * implied - */ - if (!pred_test_recurse_pred(lfirst(item), clause)) - return false; - } - return true; - } - else - return pred_test_simple_clause(predicate, clause); -} - - -/* - * Define an "operator implication table" for btree operators ("strategies"). - * The "strategy numbers" are: (1) < (2) <= (3) = (4) >= (5) > - * - * The interpretation of: - * - * test_op = BT_implic_table[given_op-1][target_op-1] - * - * where test_op, given_op and target_op are strategy numbers (from 1 to 5) - * of btree operators, is as follows: - * - * If you know, for some ATTR, that "ATTR given_op CONST1" is true, and you - * want to determine whether "ATTR target_op CONST2" must also be true, then - * you can use "CONST1 test_op CONST2" as a test. If this test returns true, - * then the target expression must be true; if the test returns false, then - * the target expression may be false. - * - * An entry where test_op==0 means the implication cannot be determined, i.e., - * this test should always be considered false. - */ - -static const StrategyNumber - BT_implic_table[BTMaxStrategyNumber][BTMaxStrategyNumber] = { - {2, 2, 0, 0, 0}, - {1, 2, 0, 0, 0}, - {1, 2, 3, 4, 5}, - {0, 0, 0, 4, 5}, - {0, 0, 0, 4, 4} -}; - - -/* - * pred_test_simple_clause - * Does the "predicate inclusion test" for a "simple clause" predicate - * and a "simple clause" restriction. - * - * We have two strategies for determining whether one simple clause - * implies another. A simple and general way is to see if they are - * equal(); this works for any kind of expression. (Actually, there - * is an implied assumption that the functions in the expression are - * immutable, ie dependent only on their input arguments --- but this - * was checked for the predicate by CheckPredicate().) - * - * Our other way works only for (binary boolean) operators that are - * in some btree operator class. We use the above operator implication - * table to be able to derive implications between nonidentical clauses. - * - * Eventually, rtree operators could also be handled by defining an - * appropriate "RT_implic_table" array. - */ -static bool -pred_test_simple_clause(Expr *predicate, Node *clause) -{ - Var *pred_var, - *clause_var; - Const *pred_const, - *clause_const; - Oid pred_op, - clause_op, - test_op; - Oid opclass_id = InvalidOid; - StrategyNumber pred_strategy = 0, - clause_strategy, - test_strategy; - Oper *test_oper; - Expr *test_expr; - Datum test_result; - bool isNull; - Relation relation; - HeapScanDesc scan; - HeapTuple tuple; - ScanKeyData entry[1]; - Form_pg_amop aform; - ExprContext *econtext; - - /* First try the equal() test */ - if (equal((Node *) predicate, clause)) - return true; - - /* - * Can't do anything more unless they are both binary opclauses with a - * Var on the left and a Const on the right. - */ - if (!is_opclause((Node *) predicate)) - return false; - pred_var = (Var *) get_leftop(predicate); - pred_const = (Const *) get_rightop(predicate); - - if (!is_opclause(clause)) - return false; - clause_var = (Var *) get_leftop((Expr *) clause); - clause_const = (Const *) get_rightop((Expr *) clause); - - if (!IsA(clause_var, Var) || - clause_const == NULL || - !IsA(clause_const, Const) || - !IsA(pred_var, Var) || - pred_const == NULL || - !IsA(pred_const, Const)) - return false; - - /* - * The implication can't be determined unless the predicate and the - * clause refer to the same attribute. - */ - if (clause_var->varno != pred_var->varno || - clause_var->varattno != pred_var->varattno) - return false; - - /* Get the operators for the two clauses we're comparing */ - pred_op = ((Oper *) ((Expr *) predicate)->oper)->opno; - clause_op = ((Oper *) ((Expr *) clause)->oper)->opno; - - /* - * 1. Find a "btree" strategy number for the pred_op - * - * The following assumes that any given operator will only be in a single - * btree operator class. This is true at least for all the - * pre-defined operator classes. If it isn't true, then whichever - * operator class happens to be returned first for the given operator - * will be used to find the associated strategy numbers for the test. - * --Nels, Jan '93 - */ - ScanKeyEntryInitialize(&entry[0], 0x0, - Anum_pg_amop_amopopr, - F_OIDEQ, - ObjectIdGetDatum(pred_op)); - - relation = heap_openr(AccessMethodOperatorRelationName, AccessShareLock); - scan = heap_beginscan(relation, SnapshotNow, 1, entry); - - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) - { - aform = (Form_pg_amop) GETSTRUCT(tuple); - if (opclass_is_btree(aform->amopclaid)) - { - /* Get the predicate operator's btree strategy number (1 to 5) */ - pred_strategy = (StrategyNumber) aform->amopstrategy; - Assert(pred_strategy >= 1 && pred_strategy <= 5); - - /* - * Remember which operator class this strategy number came - * from - */ - opclass_id = aform->amopclaid; - break; - } - } - - heap_endscan(scan); - heap_close(relation, AccessShareLock); - - if (!OidIsValid(opclass_id)) - { - /* predicate operator isn't btree-indexable */ - return false; - } - - /* - * 2. From the same opclass, find a strategy num for the clause_op - */ - tuple = SearchSysCache(AMOPOPID, - ObjectIdGetDatum(opclass_id), - ObjectIdGetDatum(clause_op), - 0, 0); - if (!HeapTupleIsValid(tuple)) - { - /* clause operator isn't btree-indexable, or isn't in this opclass */ - return false; - } - aform = (Form_pg_amop) GETSTRUCT(tuple); - - /* Get the restriction clause operator's strategy number (1 to 5) */ - clause_strategy = (StrategyNumber) aform->amopstrategy; - Assert(clause_strategy >= 1 && clause_strategy <= 5); - - ReleaseSysCache(tuple); - - /* - * 3. Look up the "test" strategy number in the implication table - */ - test_strategy = BT_implic_table[clause_strategy - 1][pred_strategy - 1]; - if (test_strategy == 0) - { - return false; /* the implication cannot be determined */ - } - - /* - * 4. From the same opclass, find the operator for the test strategy - */ - tuple = SearchSysCache(AMOPSTRATEGY, - ObjectIdGetDatum(opclass_id), - Int16GetDatum(test_strategy), - 0, 0); - if (!HeapTupleIsValid(tuple)) - { - /* this probably shouldn't fail? */ - elog(LOG, "pred_test_simple_clause: unknown test_op"); - return false; - } - aform = (Form_pg_amop) GETSTRUCT(tuple); - - /* Get the test operator */ - test_op = aform->amopopr; - - ReleaseSysCache(tuple); - - /* - * 5. Evaluate the test - */ - test_oper = makeOper(test_op, /* opno */ - InvalidOid, /* opid */ - BOOLOID, /* opresulttype */ - false); /* opretset */ - replace_opid(test_oper); - test_expr = make_opclause(test_oper, - (Var *) clause_const, - (Var *) pred_const); - - econtext = MakeExprContext(NULL, TransactionCommandContext); - test_result = ExecEvalExprSwitchContext((Node *) test_expr, econtext, - &isNull, NULL); - FreeExprContext(econtext); - - if (isNull) - { - elog(LOG, "pred_test_simple_clause: null test result"); - return false; - } - return DatumGetBool(test_result); -} - - -/**************************************************************************** - * ---- ROUTINES TO CHECK JOIN CLAUSES ---- - ****************************************************************************/ - -/* - * indexable_joinclauses - * Finds all groups of join clauses from among 'joininfo_list' that can - * be used in conjunction with 'index' for the inner scan of a nestjoin. - * - * Each clause group comes from a single joininfo node plus the current - * rel's restrictinfo list. Therefore, every clause in the group references - * the current rel plus the same set of other rels (except for the restrict - * clauses, which only reference the current rel). Therefore, this set - * of clauses could be used as an indexqual if the relation is scanned - * as the inner side of a nestloop join when the outer side contains - * (at least) all those "other rels". - * - * XXX Actually, given that we are considering a join that requires an - * outer rel set (A,B,C), we should use all qual clauses that reference - * any subset of these rels, not just the full set or none. This is - * doable with a doubly nested loop over joininfo_list; is it worth it? - * - * Returns two parallel lists of the same length: the clause groups, - * and the required outer rel set for each one. - * - * 'rel' is the relation for which 'index' is defined - * 'joininfo_list' is the list of JoinInfo nodes for 'rel' - * 'restrictinfo_list' is the list of restriction clauses for 'rel' - * '*clausegroups' receives a list of clause sublists - * '*outerrelids' receives a list of relid lists - */ -static void -indexable_joinclauses(RelOptInfo *rel, IndexOptInfo *index, - List *joininfo_list, List *restrictinfo_list, - List **clausegroups, List **outerrelids) -{ - List *cg_list = NIL; - List *relid_list = NIL; - List *i; - - foreach(i, joininfo_list) - { - JoinInfo *joininfo = (JoinInfo *) lfirst(i); - List *clausegroup; - - clausegroup = group_clauses_by_ikey_for_joins(rel, - index, - index->indexkeys, - index->classlist, - joininfo->jinfo_restrictinfo, - restrictinfo_list); - - if (clausegroup != NIL) - { - cg_list = lappend(cg_list, clausegroup); - relid_list = lappend(relid_list, joininfo->unjoined_relids); - } - } - - *clausegroups = cg_list; - *outerrelids = relid_list; -} - -/**************************************************************************** - * ---- PATH CREATION UTILITIES ---- - ****************************************************************************/ - -/* - * index_innerjoin - * Creates index path nodes corresponding to paths to be used as inner - * relations in nestloop joins. - * - * 'rel' is the relation for which 'index' is defined - * 'clausegroup_list' is a list of lists of restrictinfo nodes which can use - * 'index'. Each sublist refers to the same set of outer rels. - * 'outerrelids_list' is a list of the required outer rels for each sublist - * of join clauses. - * - * Returns a list of index pathnodes. - */ -static List * -index_innerjoin(Query *root, RelOptInfo *rel, IndexOptInfo *index, - List *clausegroup_list, List *outerrelids_list) -{ - List *path_list = NIL; - List *i; - - foreach(i, clausegroup_list) - { - List *clausegroup = lfirst(i); - IndexPath *pathnode = makeNode(IndexPath); - List *indexquals = NIL; - bool alljoinquals = true; - List *temp; - - /* XXX this code ought to be merged with create_index_path? */ - - pathnode->path.pathtype = T_IndexScan; - pathnode->path.parent = rel; - - /* - * There's no point in marking the path with any pathkeys, since - * it will only ever be used as the inner path of a nestloop, and - * so its ordering does not matter. - */ - pathnode->path.pathkeys = NIL; - - /* extract bare indexqual clauses, check whether all from JOIN/ON */ - foreach(temp, clausegroup) - { - RestrictInfo *clause = (RestrictInfo *) lfirst(temp); - - indexquals = lappend(indexquals, clause->clause); - if (clause->ispusheddown) - alljoinquals = false; - } - - /* expand special operators to indexquals the executor can handle */ - indexquals = expand_indexqual_conditions(indexquals); - - /* - * Note that we are making a pathnode for a single-scan indexscan; - * therefore, both indexinfo and indexqual should be - * single-element lists. - */ - pathnode->indexinfo = makeList1(index); - pathnode->indexqual = makeList1(indexquals); - - /* We don't actually care what order the index scans in ... */ - pathnode->indexscandir = NoMovementScanDirection; - - /* joinrelids saves the rels needed on the outer side of the join */ - pathnode->joinrelids = lfirst(outerrelids_list); - - pathnode->alljoinquals = alljoinquals; - - /* - * We must compute the estimated number of output rows for the - * indexscan. This is less than rel->rows because of the - * additional selectivity of the join clauses. Since clausegroup - * may contain both restriction and join clauses, we have to do a - * set union to get the full set of clauses that must be - * considered to compute the correct selectivity. (We can't just - * nconc the two lists; then we might have some restriction - * clauses appearing twice, which'd mislead - * restrictlist_selectivity into double-counting their - * selectivity.) - */ - pathnode->rows = rel->tuples * - restrictlist_selectivity(root, - set_union(rel->baserestrictinfo, - clausegroup), - lfirsti(rel->relids)); - /* Like costsize.c, force estimate to be at least one row */ - if (pathnode->rows < 1.0) - pathnode->rows = 1.0; - - cost_index(&pathnode->path, root, rel, index, indexquals, true); - - path_list = lappend(path_list, pathnode); - outerrelids_list = lnext(outerrelids_list); - } - return path_list; -} - -/**************************************************************************** - * ---- ROUTINES TO CHECK OPERANDS ---- - ****************************************************************************/ - -/* - * match_index_to_operand() - * Generalized test for a match between an index's key - * and the operand on one side of a restriction or join clause. - * Now check for functional indices as well. - */ -static bool -match_index_to_operand(int indexkey, - Var *operand, - RelOptInfo *rel, - IndexOptInfo *index) -{ - /* - * Ignore any RelabelType node above the indexkey. This is needed to - * be able to apply indexscanning in binary-compatible-operator cases. - * Note: we can assume there is at most one RelabelType node; - * eval_const_expressions() will have simplified if more than one. - */ - if (operand && IsA(operand, RelabelType)) - operand = (Var *) ((RelabelType *) operand)->arg; - - if (index->indproc == InvalidOid) - { - /* - * Simple index. - */ - if (operand && IsA(operand, Var) && - lfirsti(rel->relids) == operand->varno && - indexkey == operand->varattno) - return true; - else - return false; - } - - /* - * Functional index. - */ - return function_index_operand((Expr *) operand, rel, index); -} - -static bool -function_index_operand(Expr *funcOpnd, RelOptInfo *rel, IndexOptInfo *index) -{ - int relvarno = lfirsti(rel->relids); - Func *function; - List *funcargs; - int *indexKeys = index->indexkeys; - List *arg; - int i; - - /* - * sanity check, make sure we know what we're dealing with here. - */ - if (funcOpnd == NULL || !IsA(funcOpnd, Expr) || - funcOpnd->opType != FUNC_EXPR || - funcOpnd->oper == NULL || indexKeys == NULL) - return false; - - function = (Func *) funcOpnd->oper; - funcargs = funcOpnd->args; - - if (function->funcid != index->indproc) - return false; - - /*---------- - * Check that the arguments correspond to the same arguments used to - * create the functional index. To do this we must check that - * 1. they refer to the right relation. - * 2. the args have the right attr. numbers in the right order. - * We must ignore RelabelType nodes above the argument Vars in order - * to recognize binary-compatible-function cases correctly. - *---------- - */ - i = 0; - foreach(arg, funcargs) - { - Var *var = (Var *) lfirst(arg); - - if (var && IsA(var, RelabelType)) - var = (Var *) ((RelabelType *) var)->arg; - if (var == NULL || !IsA(var, Var)) - return false; - if (indexKeys[i] == 0) - return false; - if (var->varno != relvarno || var->varattno != indexKeys[i]) - return false; - - i++; - } - - if (indexKeys[i] != 0) - return false; /* not enough arguments */ - - return true; -} - -/**************************************************************************** - * ---- ROUTINES FOR "SPECIAL" INDEXABLE OPERATORS ---- - ****************************************************************************/ - -/*---------- - * These routines handle special optimization of operators that can be - * used with index scans even though they are not known to the executor's - * indexscan machinery. The key idea is that these operators allow us - * to derive approximate indexscan qual clauses, such that any tuples - * that pass the operator clause itself must also satisfy the simpler - * indexscan condition(s). Then we can use the indexscan machinery - * to avoid scanning as much of the table as we'd otherwise have to, - * while applying the original operator as a qpqual condition to ensure - * we deliver only the tuples we want. (In essence, we're using a regular - * index as if it were a lossy index.) - * - * An example of what we're doing is - * textfield LIKE 'abc%' - * from which we can generate the indexscanable conditions - * textfield >= 'abc' AND textfield < 'abd' - * which allow efficient scanning of an index on textfield. - * (In reality, character set and collation issues make the transformation - * from LIKE to indexscan limits rather harder than one might think ... - * but that's the basic idea.) - * - * Two routines are provided here, match_special_index_operator() and - * expand_indexqual_conditions(). match_special_index_operator() is - * just an auxiliary function for match_clause_to_indexkey(); after - * the latter fails to recognize a restriction opclause's operator - * as a member of an index's opclass, it asks match_special_index_operator() - * whether the clause should be considered an indexqual anyway. - * expand_indexqual_conditions() converts a list of "raw" indexqual - * conditions (with implicit AND semantics across list elements) into - * a list that the executor can actually handle. For operators that - * are members of the index's opclass this transformation is a no-op, - * but operators recognized by match_special_index_operator() must be - * converted into one or more "regular" indexqual conditions. - *---------- - */ - -/* - * match_special_index_operator - * Recognize restriction clauses that can be used to generate - * additional indexscanable qualifications. - * - * The given clause is already known to be a binary opclause having - * the form (indexkey OP pseudoconst) or (pseudoconst OP indexkey), - * but the OP proved not to be one of the index's opclass operators. - * Return 'true' if we can do something with it anyway. - */ -static bool -match_special_index_operator(Expr *clause, Oid opclass, - bool indexkey_on_left) -{ - bool isIndexable = false; - Var *leftop, - *rightop; - Oid expr_op; - Datum constvalue; - char *patt; - char *prefix; - char *rest; - - /* - * Currently, all known special operators require the indexkey on the - * left, but this test could be pushed into the switch statement if - * some are added that do not... - */ - if (!indexkey_on_left) - return false; - - /* we know these will succeed */ - leftop = get_leftop(clause); - rightop = get_rightop(clause); - expr_op = ((Oper *) clause->oper)->opno; - - /* again, required for all current special ops: */ - if (!IsA(rightop, Const) || - ((Const *) rightop)->constisnull) - return false; - constvalue = ((Const *) rightop)->constvalue; - - switch (expr_op) - { - case OID_TEXT_LIKE_OP: - case OID_BPCHAR_LIKE_OP: - case OID_VARCHAR_LIKE_OP: - case OID_NAME_LIKE_OP: - if (locale_is_like_safe()) - { - /* the right-hand const is type text for all of these */ - patt = DatumGetCString(DirectFunctionCall1(textout, - constvalue)); - isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Like, - &prefix, &rest) != Pattern_Prefix_None; - if (prefix) - pfree(prefix); - pfree(patt); - } - break; - - case OID_TEXT_ICLIKE_OP: - case OID_BPCHAR_ICLIKE_OP: - case OID_VARCHAR_ICLIKE_OP: - case OID_NAME_ICLIKE_OP: - if (locale_is_like_safe()) - { - /* the right-hand const is type text for all of these */ - patt = DatumGetCString(DirectFunctionCall1(textout, - constvalue)); - isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Like_IC, - &prefix, &rest) != Pattern_Prefix_None; - if (prefix) - pfree(prefix); - pfree(patt); - } - break; - - case OID_TEXT_REGEXEQ_OP: - case OID_BPCHAR_REGEXEQ_OP: - case OID_VARCHAR_REGEXEQ_OP: - case OID_NAME_REGEXEQ_OP: - if (locale_is_like_safe()) - { - /* the right-hand const is type text for all of these */ - patt = DatumGetCString(DirectFunctionCall1(textout, - constvalue)); - isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Regex, - &prefix, &rest) != Pattern_Prefix_None; - if (prefix) - pfree(prefix); - pfree(patt); - } - break; - - case OID_TEXT_ICREGEXEQ_OP: - case OID_BPCHAR_ICREGEXEQ_OP: - case OID_VARCHAR_ICREGEXEQ_OP: - case OID_NAME_ICREGEXEQ_OP: - if (locale_is_like_safe()) - { - /* the right-hand const is type text for all of these */ - patt = DatumGetCString(DirectFunctionCall1(textout, - constvalue)); - isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC, - &prefix, &rest) != Pattern_Prefix_None; - if (prefix) - pfree(prefix); - pfree(patt); - } - break; - - case OID_INET_SUB_OP: - case OID_INET_SUBEQ_OP: - case OID_CIDR_SUB_OP: - case OID_CIDR_SUBEQ_OP: - isIndexable = true; - break; - } - - /* done if the expression doesn't look indexable */ - if (!isIndexable) - return false; - - /* - * Must also check that index's opclass supports the operators we will - * want to apply. (A hash index, for example, will not support ">=".) - * We cheat a little by not checking for availability of "=" ... any - * index type should support "=", methinks. - */ - switch (expr_op) - { - case OID_TEXT_LIKE_OP: - case OID_TEXT_ICLIKE_OP: - case OID_TEXT_REGEXEQ_OP: - case OID_TEXT_ICREGEXEQ_OP: - if (!op_in_opclass(find_operator(">=", TEXTOID), opclass) || - !op_in_opclass(find_operator("<", TEXTOID), opclass)) - isIndexable = false; - break; - - case OID_BPCHAR_LIKE_OP: - case OID_BPCHAR_ICLIKE_OP: - case OID_BPCHAR_REGEXEQ_OP: - case OID_BPCHAR_ICREGEXEQ_OP: - if (!op_in_opclass(find_operator(">=", BPCHAROID), opclass) || - !op_in_opclass(find_operator("<", BPCHAROID), opclass)) - isIndexable = false; - break; - - case OID_VARCHAR_LIKE_OP: - case OID_VARCHAR_ICLIKE_OP: - case OID_VARCHAR_REGEXEQ_OP: - case OID_VARCHAR_ICREGEXEQ_OP: - if (!op_in_opclass(find_operator(">=", VARCHAROID), opclass) || - !op_in_opclass(find_operator("<", VARCHAROID), opclass)) - isIndexable = false; - break; - - case OID_NAME_LIKE_OP: - case OID_NAME_ICLIKE_OP: - case OID_NAME_REGEXEQ_OP: - case OID_NAME_ICREGEXEQ_OP: - if (!op_in_opclass(find_operator(">=", NAMEOID), opclass) || - !op_in_opclass(find_operator("<", NAMEOID), opclass)) - isIndexable = false; - break; - - case OID_INET_SUB_OP: - case OID_INET_SUBEQ_OP: - /* for SUB we actually need ">" not ">=", but this should do */ - if (!op_in_opclass(find_operator(">=", INETOID), opclass) || - !op_in_opclass(find_operator("<=", INETOID), opclass)) - isIndexable = false; - break; - - case OID_CIDR_SUB_OP: - case OID_CIDR_SUBEQ_OP: - /* for SUB we actually need ">" not ">=", but this should do */ - if (!op_in_opclass(find_operator(">=", CIDROID), opclass) || - !op_in_opclass(find_operator("<=", CIDROID), opclass)) - isIndexable = false; - break; - } - - return isIndexable; -} - -/* - * expand_indexqual_conditions - * Given a list of (implicitly ANDed) indexqual clauses, - * expand any "special" index operators into clauses that the indexscan - * machinery will know what to do with. Clauses that were not - * recognized by match_special_index_operator() must be passed through - * unchanged. - */ -List * -expand_indexqual_conditions(List *indexquals) -{ - List *resultquals = NIL; - List *q; - - foreach(q, indexquals) - { - Expr *clause = (Expr *) lfirst(q); - - /* we know these will succeed */ - Var *leftop = get_leftop(clause); - Var *rightop = get_rightop(clause); - Oid expr_op = ((Oper *) clause->oper)->opno; - Datum constvalue; - char *patt; - char *prefix; - char *rest; - Pattern_Prefix_Status pstatus; - - switch (expr_op) - { - /* - * LIKE and regex operators are not members of any index - * opclass, so if we find one in an indexqual list we can - * assume that it was accepted by - * match_special_index_operator(). - */ - case OID_TEXT_LIKE_OP: - case OID_BPCHAR_LIKE_OP: - case OID_VARCHAR_LIKE_OP: - case OID_NAME_LIKE_OP: - /* the right-hand const is type text for all of these */ - constvalue = ((Const *) rightop)->constvalue; - patt = DatumGetCString(DirectFunctionCall1(textout, - constvalue)); - pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like, - &prefix, &rest); - resultquals = nconc(resultquals, - prefix_quals(leftop, expr_op, - prefix, pstatus)); - if (prefix) - pfree(prefix); - pfree(patt); - break; - - case OID_TEXT_ICLIKE_OP: - case OID_BPCHAR_ICLIKE_OP: - case OID_VARCHAR_ICLIKE_OP: - case OID_NAME_ICLIKE_OP: - /* the right-hand const is type text for all of these */ - constvalue = ((Const *) rightop)->constvalue; - patt = DatumGetCString(DirectFunctionCall1(textout, - constvalue)); - pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC, - &prefix, &rest); - resultquals = nconc(resultquals, - prefix_quals(leftop, expr_op, - prefix, pstatus)); - if (prefix) - pfree(prefix); - pfree(patt); - break; - - case OID_TEXT_REGEXEQ_OP: - case OID_BPCHAR_REGEXEQ_OP: - case OID_VARCHAR_REGEXEQ_OP: - case OID_NAME_REGEXEQ_OP: - /* the right-hand const is type text for all of these */ - constvalue = ((Const *) rightop)->constvalue; - patt = DatumGetCString(DirectFunctionCall1(textout, - constvalue)); - pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex, - &prefix, &rest); - resultquals = nconc(resultquals, - prefix_quals(leftop, expr_op, - prefix, pstatus)); - if (prefix) - pfree(prefix); - pfree(patt); - break; - - case OID_TEXT_ICREGEXEQ_OP: - case OID_BPCHAR_ICREGEXEQ_OP: - case OID_VARCHAR_ICREGEXEQ_OP: - case OID_NAME_ICREGEXEQ_OP: - /* the right-hand const is type text for all of these */ - constvalue = ((Const *) rightop)->constvalue; - patt = DatumGetCString(DirectFunctionCall1(textout, - constvalue)); - pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC, - &prefix, &rest); - resultquals = nconc(resultquals, - prefix_quals(leftop, expr_op, - prefix, pstatus)); - if (prefix) - pfree(prefix); - pfree(patt); - break; - - case OID_INET_SUB_OP: - case OID_INET_SUBEQ_OP: - case OID_CIDR_SUB_OP: - case OID_CIDR_SUBEQ_OP: - constvalue = ((Const *) rightop)->constvalue; - resultquals = nconc(resultquals, - network_prefix_quals(leftop, expr_op, - constvalue)); - break; - - default: - resultquals = lappend(resultquals, clause); - break; - } - } - - return resultquals; -} - -/* - * Given a fixed prefix that all the "leftop" values must have, - * generate suitable indexqual condition(s). expr_op is the original - * LIKE or regex operator; we use it to deduce the appropriate comparison - * operators. - */ -static List * -prefix_quals(Var *leftop, Oid expr_op, - char *prefix, Pattern_Prefix_Status pstatus) -{ - List *result; - Oid datatype; - Oid oproid; - Const *con; - Oper *op; - Expr *expr; - char *greaterstr; - - Assert(pstatus != Pattern_Prefix_None); - - switch (expr_op) - { - case OID_TEXT_LIKE_OP: - case OID_TEXT_ICLIKE_OP: - case OID_TEXT_REGEXEQ_OP: - case OID_TEXT_ICREGEXEQ_OP: - datatype = TEXTOID; - break; - - case OID_BPCHAR_LIKE_OP: - case OID_BPCHAR_ICLIKE_OP: - case OID_BPCHAR_REGEXEQ_OP: - case OID_BPCHAR_ICREGEXEQ_OP: - datatype = BPCHAROID; - break; - - case OID_VARCHAR_LIKE_OP: - case OID_VARCHAR_ICLIKE_OP: - case OID_VARCHAR_REGEXEQ_OP: - case OID_VARCHAR_ICREGEXEQ_OP: - datatype = VARCHAROID; - break; - - case OID_NAME_LIKE_OP: - case OID_NAME_ICLIKE_OP: - case OID_NAME_REGEXEQ_OP: - case OID_NAME_ICREGEXEQ_OP: - datatype = NAMEOID; - break; - - default: - elog(ERROR, "prefix_quals: unexpected operator %u", expr_op); - return NIL; - } - - /* - * If we found an exact-match pattern, generate an "=" indexqual. - */ - if (pstatus == Pattern_Prefix_Exact) - { - oproid = find_operator("=", datatype); - if (oproid == InvalidOid) - elog(ERROR, "prefix_quals: no = operator for type %u", datatype); - con = string_to_const(prefix, datatype); - op = makeOper(oproid, InvalidOid, BOOLOID, false); - expr = make_opclause(op, leftop, (Var *) con); - result = makeList1(expr); - return result; - } - - /* - * Otherwise, we have a nonempty required prefix of the values. - * - * We can always say "x >= prefix". - */ - oproid = find_operator(">=", datatype); - if (oproid == InvalidOid) - elog(ERROR, "prefix_quals: no >= operator for type %u", datatype); - con = string_to_const(prefix, datatype); - op = makeOper(oproid, InvalidOid, BOOLOID, false); - expr = make_opclause(op, leftop, (Var *) con); - result = makeList1(expr); - - /*------- - * If we can create a string larger than the prefix, we can say - * "x < greaterstr". - *------- - */ - greaterstr = make_greater_string(prefix, datatype); - if (greaterstr) - { - oproid = find_operator("<", datatype); - if (oproid == InvalidOid) - elog(ERROR, "prefix_quals: no < operator for type %u", datatype); - con = string_to_const(greaterstr, datatype); - op = makeOper(oproid, InvalidOid, BOOLOID, false); - expr = make_opclause(op, leftop, (Var *) con); - result = lappend(result, expr); - pfree(greaterstr); - } - - return result; -} - -/* - * Given a leftop and a rightop, and a inet-class sup/sub operator, - * generate suitable indexqual condition(s). expr_op is the original - * operator. - */ -static List * -network_prefix_quals(Var *leftop, Oid expr_op, Datum rightop) -{ - bool is_eq; - char *opr1name; - Datum opr1right; - Datum opr2right; - Oid opr1oid; - Oid opr2oid; - List *result; - Oid datatype; - Oper *op; - Expr *expr; - - switch (expr_op) - { - case OID_INET_SUB_OP: - datatype = INETOID; - is_eq = false; - break; - case OID_INET_SUBEQ_OP: - datatype = INETOID; - is_eq = true; - break; - case OID_CIDR_SUB_OP: - datatype = CIDROID; - is_eq = false; - break; - case OID_CIDR_SUBEQ_OP: - datatype = CIDROID; - is_eq = true; - break; - default: - elog(ERROR, "network_prefix_quals: unexpected operator %u", - expr_op); - return NIL; - } - - /* - * create clause "key >= network_scan_first( rightop )", or ">" if the - * operator disallows equality. - */ - - opr1name = is_eq ? ">=" : ">"; - opr1oid = find_operator(opr1name, datatype); - if (opr1oid == InvalidOid) - elog(ERROR, "network_prefix_quals: no %s operator for type %u", - opr1name, datatype); - - opr1right = network_scan_first(rightop); - - op = makeOper(opr1oid, InvalidOid, BOOLOID, false); - expr = make_opclause(op, leftop, - (Var *) makeConst(datatype, -1, opr1right, - false, false, false, false)); - result = makeList1(expr); - - /* create clause "key <= network_scan_last( rightop )" */ - - opr2oid = find_operator("<=", datatype); - if (opr2oid == InvalidOid) - elog(ERROR, "network_prefix_quals: no <= operator for type %u", - datatype); - - opr2right = network_scan_last(rightop); - - op = makeOper(opr2oid, InvalidOid, BOOLOID, false); - expr = make_opclause(op, leftop, - (Var *) makeConst(datatype, -1, opr2right, - false, false, false, false)); - result = lappend(result, expr); - - return result; -} - -/* - * Handy subroutines for match_special_index_operator() and friends. - */ - -/* See if there is a binary op of the given name for the given datatype */ -/* NB: we assume that only built-in system operators are searched for */ -static Oid -find_operator(const char *opname, Oid datatype) -{ - return GetSysCacheOid(OPERNAMENSP, - PointerGetDatum(opname), - ObjectIdGetDatum(datatype), - ObjectIdGetDatum(datatype), - ObjectIdGetDatum(PG_CATALOG_NAMESPACE)); -} - -/* - * Generate a Datum of the appropriate type from a C string. - * Note that all of the supported types are pass-by-ref, so the - * returned value should be pfree'd if no longer needed. - */ -static Datum -string_to_datum(const char *str, Oid datatype) -{ - /* - * We cheat a little by assuming that textin() will do for bpchar and - * varchar constants too... - */ - if (datatype == NAMEOID) - return DirectFunctionCall1(namein, CStringGetDatum(str)); - else - return DirectFunctionCall1(textin, CStringGetDatum(str)); -} - -/* - * Generate a Const node of the appropriate type from a C string. - */ -static Const * -string_to_const(const char *str, Oid datatype) -{ - Datum conval = string_to_datum(str, datatype); - - return makeConst(datatype, ((datatype == NAMEOID) ? NAMEDATALEN : -1), - conval, false, false, false, false); -} diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c deleted file mode 100644 index d208d5dbbac..00000000000 --- a/src/backend/optimizer/path/joinpath.c +++ /dev/null @@ -1,960 +0,0 @@ -/*------------------------------------------------------------------------- - * - * joinpath.c - * Routines to find all possible paths for processing a set of joins - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.69 2002/06/20 20:29:30 momjian Exp $ - * - *------------------------------------------------------------------------- - */ -#include "postgres.h" - -#include <sys/types.h> -#include <math.h> - -#include "optimizer/clauses.h" -#include "optimizer/cost.h" -#include "optimizer/pathnode.h" -#include "optimizer/paths.h" -#include "parser/parsetree.h" -#include "utils/lsyscache.h" - - -static void sort_inner_and_outer(Query *root, RelOptInfo *joinrel, - RelOptInfo *outerrel, RelOptInfo *innerrel, - List *restrictlist, List *mergeclause_list, - JoinType jointype); -static void match_unsorted_outer(Query *root, RelOptInfo *joinrel, - RelOptInfo *outerrel, RelOptInfo *innerrel, - List *restrictlist, List *mergeclause_list, - JoinType jointype); - -#ifdef NOT_USED -static void match_unsorted_inner(Query *root, RelOptInfo *joinrel, - RelOptInfo *outerrel, RelOptInfo *innerrel, - List *restrictlist, List *mergeclause_list, - JoinType jointype); -#endif -static void hash_inner_and_outer(Query *root, RelOptInfo *joinrel, - RelOptInfo *outerrel, RelOptInfo *innerrel, - List *restrictlist, JoinType jointype); -static Path *best_innerjoin(List *join_paths, List *outer_relid, - JoinType jointype); -static List *select_mergejoin_clauses(RelOptInfo *joinrel, - RelOptInfo *outerrel, - RelOptInfo *innerrel, - List *restrictlist, - JoinType jointype); - - -/* - * add_paths_to_joinrel - * Given a join relation and two component rels from which it can be made, - * consider all possible paths that use the two component rels as outer - * and inner rel respectively. Add these paths to the join rel's pathlist - * if they survive comparison with other paths (and remove any existing - * paths that are dominated by these paths). - * - * Modifies the pathlist field of the joinrel node to contain the best - * paths found so far. - */ -void -add_paths_to_joinrel(Query *root, - RelOptInfo *joinrel, - RelOptInfo *outerrel, - RelOptInfo *innerrel, - JoinType jointype, - List *restrictlist) -{ - List *mergeclause_list = NIL; - - /* - * Find potential mergejoin clauses. We can skip this if we are not - * interested in doing a mergejoin. However, mergejoin is currently - * our only way of implementing full outer joins, so override - * mergejoin disable if it's a full join. - */ - if (enable_mergejoin || jointype == JOIN_FULL) - mergeclause_list = select_mergejoin_clauses(joinrel, - outerrel, - innerrel, - restrictlist, - jointype); - - /* - * 1. Consider mergejoin paths where both relations must be explicitly - * sorted. - */ - sort_inner_and_outer(root, joinrel, outerrel, innerrel, - restrictlist, mergeclause_list, jointype); - - /* - * 2. Consider paths where the outer relation need not be explicitly - * sorted. This includes both nestloops and mergejoins where the outer - * path is already ordered. - */ - match_unsorted_outer(root, joinrel, outerrel, innerrel, - restrictlist, mergeclause_list, jointype); - -#ifdef NOT_USED - - /* - * 3. Consider paths where the inner relation need not be explicitly - * sorted. This includes mergejoins only (nestloops were already - * built in match_unsorted_outer). - * - * Diked out as redundant 2/13/2000 -- tgl. There isn't any really - * significant difference between the inner and outer side of a - * mergejoin, so match_unsorted_inner creates no paths that aren't - * equivalent to those made by match_unsorted_outer when - * add_paths_to_joinrel() is invoked with the two rels given in the - * other order. - */ - match_unsorted_inner(root, joinrel, outerrel, innerrel, - restrictlist, mergeclause_list, jointype); -#endif - - /* - * 4. Consider paths where both outer and inner relations must be - * hashed before being joined. - */ - if (enable_hashjoin) - hash_inner_and_outer(root, joinrel, outerrel, innerrel, - restrictlist, jointype); -} - -/* - * sort_inner_and_outer - * Create mergejoin join paths by explicitly sorting both the outer and - * inner join relations on each available merge ordering. - * - * 'joinrel' is the join relation - * 'outerrel' is the outer join relation - * 'innerrel' is the inner join relation - * 'restrictlist' contains all of the RestrictInfo nodes for restriction - * clauses that apply to this join - * 'mergeclause_list' is a list of RestrictInfo nodes for available - * mergejoin clauses in this join - * 'jointype' is the type of join to do - */ -static void -sort_inner_and_outer(Query *root, - RelOptInfo *joinrel, - RelOptInfo *outerrel, - RelOptInfo *innerrel, - List *restrictlist, - List *mergeclause_list, - JoinType jointype) -{ - bool useallclauses; - List *all_pathkeys; - List *i; - - /* - * If we are doing a right or full join, we must use *all* the - * mergeclauses as join clauses, else we will not have a valid plan. - */ - switch (jointype) - { - case JOIN_INNER: - case JOIN_LEFT: - useallclauses = false; - break; - case JOIN_RIGHT: - case JOIN_FULL: - useallclauses = true; - break; - default: - elog(ERROR, "sort_inner_and_outer: unexpected join type %d", - (int) jointype); - useallclauses = false; /* keep compiler quiet */ - break; - } - - /* - * Each possible ordering of the available mergejoin clauses will - * generate a differently-sorted result path at essentially the same - * cost. We have no basis for choosing one over another at this level - * of joining, but some sort orders may be more useful than others for - * higher-level mergejoins, so it's worth considering multiple - * orderings. - * - * Actually, it's not quite true that every mergeclause ordering will - * generate a different path order, because some of the clauses may be - * redundant. Therefore, what we do is convert the mergeclause list - * to a list of canonical pathkeys, and then consider different - * orderings of the pathkeys. - * - * Generating a path for *every* permutation of the pathkeys doesn't seem - * like a winning strategy; the cost in planning time is too high. For - * now, we generate one path for each pathkey, listing that pathkey - * first and the rest in random order. This should allow at least a - * one-clause mergejoin without re-sorting against any other possible - * mergejoin partner path. But if we've not guessed the right - * ordering of secondary keys, we may end up evaluating clauses as - * qpquals when they could have been done as mergeclauses. We need to - * figure out a better way. (Two possible approaches: look at all the - * relevant index relations to suggest plausible sort orders, or make - * just one output path and somehow mark it as having a sort-order - * that can be rearranged freely.) - */ - all_pathkeys = make_pathkeys_for_mergeclauses(root, - mergeclause_list, - outerrel); - - foreach(i, all_pathkeys) - { - List *front_pathkey = lfirst(i); - List *cur_pathkeys; - List *cur_mergeclauses; - List *outerkeys; - List *innerkeys; - List *merge_pathkeys; - - /* Make a pathkey list with this guy first. */ - if (i != all_pathkeys) - cur_pathkeys = lcons(front_pathkey, - lremove(front_pathkey, - listCopy(all_pathkeys))); - else - cur_pathkeys = all_pathkeys; /* no work at first one... */ - - /* - * Select mergeclause(s) that match this sort ordering. If we had - * redundant merge clauses then we will get a subset of the - * original clause list. There had better be some match, - * however... - */ - cur_mergeclauses = find_mergeclauses_for_pathkeys(root, - cur_pathkeys, - mergeclause_list); - Assert(cur_mergeclauses != NIL); - - /* Forget it if can't use all the clauses in right/full join */ - if (useallclauses && - length(cur_mergeclauses) != length(mergeclause_list)) - continue; - - /* - * Build sort pathkeys for both sides. - * - * Note: it's possible that the cheapest paths will already be sorted - * properly. create_mergejoin_path will detect that case and - * suppress an explicit sort step, so we needn't do so here. - */ - outerkeys = make_pathkeys_for_mergeclauses(root, - cur_mergeclauses, - outerrel); - innerkeys = make_pathkeys_for_mergeclauses(root, - cur_mergeclauses, - innerrel); - /* Build pathkeys representing output sort order. */ - merge_pathkeys = build_join_pathkeys(root, joinrel, outerkeys); - - /* - * And now we can make the path. We only consider the cheapest- - * total-cost input paths, since we are assuming here that a sort - * is required. We will consider cheapest-startup-cost input - * paths later, and only if they don't need a sort. - */ - add_path(joinrel, (Path *) - create_mergejoin_path(root, - joinrel, - jointype, - outerrel->cheapest_total_path, - innerrel->cheapest_total_path, - restrictlist, - merge_pathkeys, - cur_mergeclauses, - outerkeys, - innerkeys)); - } -} - -/* - * match_unsorted_outer - * Creates possible join paths for processing a single join relation - * 'joinrel' by employing either iterative substitution or - * mergejoining on each of its possible outer paths (considering - * only outer paths that are already ordered well enough for merging). - * - * We always generate a nestloop path for each available outer path. - * In fact we may generate as many as three: one on the cheapest-total-cost - * inner path, one on the cheapest-startup-cost inner path (if different), - * and one on the best inner-indexscan path (if any). - * - * We also consider mergejoins if mergejoin clauses are available. We have - * two ways to generate the inner path for a mergejoin: sort the cheapest - * inner path, or use an inner path that is already suitably ordered for the - * merge. If we have several mergeclauses, it could be that there is no inner - * path (or only a very expensive one) for the full list of mergeclauses, but - * better paths exist if we truncate the mergeclause list (thereby discarding - * some sort key requirements). So, we consider truncations of the - * mergeclause list as well as the full list. (Ideally we'd consider all - * subsets of the mergeclause list, but that seems way too expensive.) - * - * 'joinrel' is the join relation - * 'outerrel' is the outer join relation - * 'innerrel' is the inner join relation - * 'restrictlist' contains all of the RestrictInfo nodes for restriction - * clauses that apply to this join - * 'mergeclause_list' is a list of RestrictInfo nodes for available - * mergejoin clauses in this join - * 'jointype' is the type of join to do - */ -static void -match_unsorted_outer(Query *root, - RelOptInfo *joinrel, - RelOptInfo *outerrel, - RelOptInfo *innerrel, - List *restrictlist, - List *mergeclause_list, - JoinType jointype) -{ - bool nestjoinOK; - bool useallclauses; - Path *bestinnerjoin; - List *i; - - /* - * Nestloop only supports inner and left joins. Also, if we are doing - * a right or full join, we must use *all* the mergeclauses as join - * clauses, else we will not have a valid plan. (Although these two - * flags are currently inverses, keep them separate for clarity and - * possible future changes.) - */ - switch (jointype) - { - case JOIN_INNER: - case JOIN_LEFT: - nestjoinOK = true; - useallclauses = false; - break; - case JOIN_RIGHT: - case JOIN_FULL: - nestjoinOK = false; - useallclauses = true; - break; - default: - elog(ERROR, "match_unsorted_outer: unexpected join type %d", - (int) jointype); - nestjoinOK = false; /* keep compiler quiet */ - useallclauses = false; - break; - } - - /* - * Get the best innerjoin indexpath (if any) for this outer rel. It's - * the same for all outer paths. - */ - bestinnerjoin = best_innerjoin(innerrel->innerjoin, outerrel->relids, - jointype); - - foreach(i, outerrel->pathlist) - { - Path *outerpath = (Path *) lfirst(i); - List *merge_pathkeys; - List *mergeclauses; - List *innersortkeys; - List *trialsortkeys; - Path *cheapest_startup_inner; - Path *cheapest_total_inner; - int num_sortkeys; - int sortkeycnt; - - /* - * The result will have this sort order (even if it is implemented - * as a nestloop, and even if some of the mergeclauses are - * implemented by qpquals rather than as true mergeclauses): - */ - merge_pathkeys = build_join_pathkeys(root, joinrel, - outerpath->pathkeys); - - if (nestjoinOK) - { - /* - * Always consider a nestloop join with this outer and - * cheapest-total-cost inner. Consider nestloops using the - * cheapest-startup-cost inner as well, and the best innerjoin - * indexpath. - */ - add_path(joinrel, (Path *) - create_nestloop_path(root, - joinrel, - jointype, - outerpath, - innerrel->cheapest_total_path, - restrictlist, - merge_pathkeys)); - if (innerrel->cheapest_startup_path != - innerrel->cheapest_total_path) - add_path(joinrel, (Path *) - create_nestloop_path(root, - joinrel, - jointype, - outerpath, - innerrel->cheapest_startup_path, - restrictlist, - merge_pathkeys)); - if (bestinnerjoin != NULL) - add_path(joinrel, (Path *) - create_nestloop_path(root, - joinrel, - jointype, - outerpath, - bestinnerjoin, - restrictlist, - merge_pathkeys)); - } - - /* Look for useful mergeclauses (if any) */ - mergeclauses = find_mergeclauses_for_pathkeys(root, - outerpath->pathkeys, - mergeclause_list); - - /* Done with this outer path if no chance for a mergejoin */ - if (mergeclauses == NIL) - continue; - if (useallclauses && length(mergeclauses) != length(mergeclause_list)) - continue; - - /* Compute the required ordering of the inner path */ - innersortkeys = make_pathkeys_for_mergeclauses(root, - mergeclauses, - innerrel); - - /* - * Generate a mergejoin on the basis of sorting the cheapest - * inner. Since a sort will be needed, only cheapest total cost - * matters. (But create_mergejoin_path will do the right thing if - * innerrel->cheapest_total_path is already correctly sorted.) - */ - add_path(joinrel, (Path *) - create_mergejoin_path(root, - joinrel, - jointype, - outerpath, - innerrel->cheapest_total_path, - restrictlist, - merge_pathkeys, - mergeclauses, - NIL, - innersortkeys)); - - /* - * Look for presorted inner paths that satisfy the innersortkey - * list --- or any truncation thereof, if we are allowed to build - * a mergejoin using a subset of the merge clauses. Here, we - * consider both cheap startup cost and cheap total cost. Ignore - * innerrel->cheapest_total_path, since we already made a path - * with it. - */ - num_sortkeys = length(innersortkeys); - if (num_sortkeys > 1 && !useallclauses) - trialsortkeys = listCopy(innersortkeys); /* need modifiable copy */ - else - trialsortkeys = innersortkeys; /* won't really truncate */ - cheapest_startup_inner = NULL; - cheapest_total_inner = NULL; - - for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--) - { - Path *innerpath; - List *newclauses = NIL; - - /* - * Look for an inner path ordered well enough for the first - * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is - * modified destructively, which is why we made a copy... - */ - trialsortkeys = ltruncate(sortkeycnt, trialsortkeys); - innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, - trialsortkeys, - TOTAL_COST); - if (innerpath != NULL && - innerpath != innerrel->cheapest_total_path && - (cheapest_total_inner == NULL || - compare_path_costs(innerpath, cheapest_total_inner, - TOTAL_COST) < 0)) - { - /* Found a cheap (or even-cheaper) sorted path */ - /* Select the right mergeclauses, if we didn't already */ - if (sortkeycnt < num_sortkeys) - { - newclauses = - find_mergeclauses_for_pathkeys(root, - trialsortkeys, - mergeclauses); - Assert(newclauses != NIL); - } - else - newclauses = mergeclauses; - add_path(joinrel, (Path *) - create_mergejoin_path(root, - joinrel, - jointype, - outerpath, - innerpath, - restrictlist, - merge_pathkeys, - newclauses, - NIL, - NIL)); - cheapest_total_inner = innerpath; - } - /* Same on the basis of cheapest startup cost ... */ - innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, - trialsortkeys, - STARTUP_COST); - if (innerpath != NULL && - innerpath != innerrel->cheapest_total_path && - (cheapest_startup_inner == NULL || - compare_path_costs(innerpath, cheapest_startup_inner, - STARTUP_COST) < 0)) - { - /* Found a cheap (or even-cheaper) sorted path */ - if (innerpath != cheapest_total_inner) - { - /* - * Avoid rebuilding clause list if we already made - * one; saves memory in big join trees... - */ - if (newclauses == NIL) - { - if (sortkeycnt < num_sortkeys) - { - newclauses = - find_mergeclauses_for_pathkeys(root, - trialsortkeys, - mergeclauses); - Assert(newclauses != NIL); - } - else - newclauses = mergeclauses; - } - add_path(joinrel, (Path *) - create_mergejoin_path(root, - joinrel, - jointype, - outerpath, - innerpath, - restrictlist, - merge_pathkeys, - newclauses, - NIL, - NIL)); - } - cheapest_startup_inner = innerpath; - } - - /* - * Don't consider truncated sortkeys if we need all clauses. - */ - if (useallclauses) - break; - } - } -} - -#ifdef NOT_USED - -/* - * match_unsorted_inner - * Generate mergejoin paths that use an explicit sort of the outer path - * with an already-ordered inner path. - * - * 'joinrel' is the join result relation - * 'outerrel' is the outer join relation - * 'innerrel' is the inner join relation - * 'restrictlist' contains all of the RestrictInfo nodes for restriction - * clauses that apply to this join - * 'mergeclause_list' is a list of RestrictInfo nodes for available - * mergejoin clauses in this join - * 'jointype' is the type of join to do - */ -static void -match_unsorted_inner(Query *root, - RelOptInfo *joinrel, - RelOptInfo *outerrel, - RelOptInfo *innerrel, - List *restrictlist, - List *mergeclause_list, - JoinType jointype) -{ - bool useallclauses; - List *i; - - switch (jointype) - { - case JOIN_INNER: - case JOIN_LEFT: - useallclauses = false; - break; - case JOIN_RIGHT: - case JOIN_FULL: - useallclauses = true; - break; - default: - elog(ERROR, "match_unsorted_inner: unexpected join type %d", - (int) jointype); - useallclauses = false; /* keep compiler quiet */ - break; - } - - foreach(i, innerrel->pathlist) - { - Path *innerpath = (Path *) lfirst(i); - List *mergeclauses; - List *outersortkeys; - List *merge_pathkeys; - Path *totalouterpath; - Path *startupouterpath; - - /* Look for useful mergeclauses (if any) */ - mergeclauses = find_mergeclauses_for_pathkeys(root, - innerpath->pathkeys, - mergeclause_list); - - /* Done with this inner path if no chance for a mergejoin */ - if (mergeclauses == NIL) - continue; - if (useallclauses && length(mergeclauses) != length(mergeclause_list)) - continue; - - /* Compute the required ordering of the outer path */ - outersortkeys = make_pathkeys_for_mergeclauses(root, - mergeclauses, - outerrel); - - /* - * Generate a mergejoin on the basis of sorting the cheapest - * outer. Since a sort will be needed, only cheapest total cost - * matters. - */ - merge_pathkeys = build_join_pathkeys(root, joinrel, outersortkeys); - add_path(joinrel, (Path *) - create_mergejoin_path(root, - joinrel, - jointype, - outerrel->cheapest_total_path, - innerpath, - restrictlist, - merge_pathkeys, - mergeclauses, - outersortkeys, - NIL)); - - /* - * Now generate mergejoins based on already-sufficiently-ordered - * outer paths. There's likely to be some redundancy here with - * paths already generated by merge_unsorted_outer ... but since - * merge_unsorted_outer doesn't consider all permutations of the - * mergeclause list, it may fail to notice that this particular - * innerpath could have been used with this outerpath. - */ - totalouterpath = get_cheapest_path_for_pathkeys(outerrel->pathlist, - outersortkeys, - TOTAL_COST); - if (totalouterpath == NULL) - continue; /* there won't be a startup-cost path - * either */ - - merge_pathkeys = build_join_pathkeys(root, joinrel, - totalouterpath->pathkeys); - add_path(joinrel, (Path *) - create_mergejoin_path(root, - joinrel, - jointype, - totalouterpath, - innerpath, - restrictlist, - merge_pathkeys, - mergeclauses, - NIL, - NIL)); - - startupouterpath = get_cheapest_path_for_pathkeys(outerrel->pathlist, - outersortkeys, - STARTUP_COST); - if (startupouterpath != NULL && startupouterpath != totalouterpath) - { - merge_pathkeys = build_join_pathkeys(root, joinrel, - startupouterpath->pathkeys); - add_path(joinrel, (Path *) - create_mergejoin_path(root, - joinrel, - jointype, - startupouterpath, - innerpath, - restrictlist, - merge_pathkeys, - mergeclauses, - NIL, - NIL)); - } - } -} -#endif - -/* - * hash_inner_and_outer - * Create hashjoin join paths by explicitly hashing both the outer and - * inner join relations of each available hash clause. - * - * 'joinrel' is the join relation - * 'outerrel' is the outer join relation - * 'innerrel' is the inner join relation - * 'restrictlist' contains all of the RestrictInfo nodes for restriction - * clauses that apply to this join - * 'jointype' is the type of join to do - */ -static void -hash_inner_and_outer(Query *root, - RelOptInfo *joinrel, - RelOptInfo *outerrel, - RelOptInfo *innerrel, - List *restrictlist, - JoinType jointype) -{ - bool isouterjoin; - List *i; - - /* - * Hashjoin only supports inner and left joins. - */ - switch (jointype) - { - case JOIN_INNER: - isouterjoin = false; - break; - case JOIN_LEFT: - isouterjoin = true; - break; - default: - return; - } - - /* - * Scan the join's restrictinfo list to find hashjoinable clauses that - * are usable with this pair of sub-relations. Since we currently - * accept only var-op-var clauses as hashjoinable, we need only check - * the membership of the vars to determine whether a particular clause - * can be used with this pair of sub-relations. This code would need - * to be upgraded if we wanted to allow more-complex expressions in - * hash joins. - */ - foreach(i, restrictlist) - { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i); - Var *left, - *right; - List *hashclauses; - - if (restrictinfo->hashjoinoperator == InvalidOid) - continue; /* not hashjoinable */ - - /* - * If processing an outer join, only use its own join clauses for - * hashing. For inner joins we need not be so picky. - */ - if (isouterjoin && restrictinfo->ispusheddown) - continue; - - /* these must be OK, since check_hashjoinable accepted the clause */ - left = get_leftop(restrictinfo->clause); - right = get_rightop(restrictinfo->clause); - - /* - * Check if clause is usable with these input rels. - */ - if (VARISRELMEMBER(left->varno, outerrel) && - VARISRELMEMBER(right->varno, innerrel)) - { - /* righthand side is inner */ - } - else if (VARISRELMEMBER(left->varno, innerrel) && - VARISRELMEMBER(right->varno, outerrel)) - { - /* lefthand side is inner */ - } - else - continue; /* no good for these input relations */ - - /* always a one-element list of hash clauses */ - hashclauses = makeList1(restrictinfo); - - /* - * We consider both the cheapest-total-cost and - * cheapest-startup-cost outer paths. There's no need to consider - * any but the cheapest-total-cost inner path, however. - */ - add_path(joinrel, (Path *) - create_hashjoin_path(root, - joinrel, - jointype, - outerrel->cheapest_total_path, - innerrel->cheapest_total_path, - restrictlist, - hashclauses)); - if (outerrel->cheapest_startup_path != outerrel->cheapest_total_path) - add_path(joinrel, (Path *) - create_hashjoin_path(root, - joinrel, - jointype, - outerrel->cheapest_startup_path, - innerrel->cheapest_total_path, - restrictlist, - hashclauses)); - } -} - -/* - * best_innerjoin - * Find the cheapest index path that has already been identified by - * indexable_joinclauses() as being a possible inner path for the given - * outer relation(s) in a nestloop join. - * - * We compare indexpaths on total_cost only, assuming that they will all have - * zero or negligible startup_cost. We might have to think harder someday... - * - * 'join_paths' is a list of potential inner indexscan join paths - * 'outer_relids' is the relid list of the outer join relation - * - * Returns the pathnode of the best path, or NULL if there's no - * usable path. - */ -static Path * -best_innerjoin(List *join_paths, Relids outer_relids, JoinType jointype) -{ - Path *cheapest = (Path *) NULL; - bool isouterjoin; - List *join_path; - - /* - * Nestloop only supports inner and left joins. - */ - switch (jointype) - { - case JOIN_INNER: - isouterjoin = false; - break; - case JOIN_LEFT: - isouterjoin = true; - break; - default: - return NULL; - } - - foreach(join_path, join_paths) - { - IndexPath *path = (IndexPath *) lfirst(join_path); - - Assert(IsA(path, IndexPath)); - - /* - * If processing an outer join, only use explicit join clauses in - * the inner indexscan. For inner joins we need not be so picky. - */ - if (isouterjoin && !path->alljoinquals) - continue; - - /* - * path->joinrelids is the set of base rels that must be part of - * outer_relids in order to use this inner path, because those - * rels are used in the index join quals of this inner path. - */ - if (is_subseti(path->joinrelids, outer_relids) && - (cheapest == NULL || - compare_path_costs((Path *) path, cheapest, TOTAL_COST) < 0)) - cheapest = (Path *) path; - } - return cheapest; -} - -/* - * select_mergejoin_clauses - * Select mergejoin clauses that are usable for a particular join. - * Returns a list of RestrictInfo nodes for those clauses. - * - * We examine each restrictinfo clause known for the join to see - * if it is mergejoinable and involves vars from the two sub-relations - * currently of interest. - * - * Since we currently allow only plain Vars as the left and right sides - * of mergejoin clauses, this test is relatively simple. This routine - * would need to be upgraded to support more-complex expressions - * as sides of mergejoins. In theory, we could allow arbitrarily complex - * expressions in mergejoins, so long as one side uses only vars from one - * sub-relation and the other side uses only vars from the other. - */ -static List * -select_mergejoin_clauses(RelOptInfo *joinrel, - RelOptInfo *outerrel, - RelOptInfo *innerrel, - List *restrictlist, - JoinType jointype) -{ - List *result_list = NIL; - bool isouterjoin = IS_OUTER_JOIN(jointype); - List *i; - - foreach(i, restrictlist) - { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i); - Expr *clause; - Var *left, - *right; - - /* - * If processing an outer join, only use its own join clauses in - * the merge. For inner joins we need not be so picky. - * - * Furthermore, if it is a right/full join then *all* the explicit - * join clauses must be mergejoinable, else the executor will - * fail. If we are asked for a right join then just return NIL to - * indicate no mergejoin is possible (we can handle it as a left - * join instead). If we are asked for a full join then emit an - * error, because there is no fallback. - */ - if (isouterjoin) - { - if (restrictinfo->ispusheddown) - continue; - switch (jointype) - { - case JOIN_RIGHT: - if (restrictinfo->mergejoinoperator == InvalidOid) - return NIL; /* not mergejoinable */ - break; - case JOIN_FULL: - if (restrictinfo->mergejoinoperator == InvalidOid) - elog(ERROR, "FULL JOIN is only supported with mergejoinable join conditions"); - break; - default: - /* otherwise, it's OK to have nonmergeable join quals */ - break; - } - } - - if (restrictinfo->mergejoinoperator == InvalidOid) - continue; /* not mergejoinable */ - - clause = restrictinfo->clause; - /* these must be OK, since check_mergejoinable accepted the clause */ - left = get_leftop(clause); - right = get_rightop(clause); - - if ((VARISRELMEMBER(left->varno, outerrel) && - VARISRELMEMBER(right->varno, innerrel)) || - (VARISRELMEMBER(left->varno, innerrel) && - VARISRELMEMBER(right->varno, outerrel))) - result_list = lcons(restrictinfo, result_list); - } - - return result_list; -} diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c deleted file mode 100644 index 58772c0bc03..00000000000 --- a/src/backend/optimizer/path/joinrels.c +++ /dev/null @@ -1,444 +0,0 @@ -/*------------------------------------------------------------------------- - * - * joinrels.c - * Routines to determine which relations should be joined - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.57 2002/06/20 20:29:30 momjian Exp $ - * - *------------------------------------------------------------------------- - */ -#include "postgres.h" - -#include "optimizer/pathnode.h" -#include "optimizer/paths.h" - - -static RelOptInfo *make_join_rel(Query *root, RelOptInfo *rel1, - RelOptInfo *rel2, JoinType jointype); - - -/* - * make_rels_by_joins - * Consider ways to produce join relations containing exactly 'level' - * jointree items. (This is one step of the dynamic-programming method - * embodied in make_one_rel_by_joins.) Join rel nodes for each feasible - * combination of lower-level rels are created and returned in a list. - * Implementation paths are created for each such joinrel, too. - * - * level: level of rels we want to make this time. - * joinrels[j], 1 <= j < level, is a list of rels containing j items. - */ -List * -make_rels_by_joins(Query *root, int level, List **joinrels) -{ - List *result_rels = NIL; - List *new_rels; - List *nr; - List *r; - int k; - - /* - * First, consider left-sided and right-sided plans, in which rels of - * exactly level-1 member relations are joined against initial - * relations. We prefer to join using join clauses, but if we find a - * rel of level-1 members that has no join clauses, we will generate - * Cartesian-product joins against all initial rels not already - * contained in it. - * - * In the first pass (level == 2), we try to join each initial rel to - * each initial rel that appears later in joinrels[1]. (The - * mirror-image joins are handled automatically by make_join_rel.) In - * later passes, we try to join rels of size level-1 from - * joinrels[level-1] to each initial rel in joinrels[1]. - */ - foreach(r, joinrels[level - 1]) - { - RelOptInfo *old_rel = (RelOptInfo *) lfirst(r); - List *other_rels; - - if (level == 2) - other_rels = lnext(r); /* only consider remaining initial - * rels */ - else - other_rels = joinrels[1]; /* consider all initial rels */ - - if (old_rel->joininfo != NIL) - { - /* - * Note that if all available join clauses for this rel - * require more than one other rel, we will fail to make any - * joins against it here. That's OK; it'll be considered by - * "bushy plan" join code in a higher-level pass where we have - * those other rels collected into a join rel. See also the - * last-ditch case below. - */ - new_rels = make_rels_by_clause_joins(root, - old_rel, - other_rels); - } - else - { - /* - * Oops, we have a relation that is not joined to any other - * relation. Cartesian product time. - */ - new_rels = make_rels_by_clauseless_joins(root, - old_rel, - other_rels); - } - - /* - * At levels above 2 we will generate the same joined relation in - * multiple ways --- for example (a join b) join c is the same - * RelOptInfo as (b join c) join a, though the second case will - * add a different set of Paths to it. To avoid making extra work - * for subsequent passes, do not enter the same RelOptInfo into - * our output list multiple times. - */ - foreach(nr, new_rels) - { - RelOptInfo *jrel = (RelOptInfo *) lfirst(nr); - - if (!ptrMember(jrel, result_rels)) - result_rels = lcons(jrel, result_rels); - } - } - - /* - * Now, consider "bushy plans" in which relations of k initial rels - * are joined to relations of level-k initial rels, for 2 <= k <= - * level-2. - * - * We only consider bushy-plan joins for pairs of rels where there is a - * suitable join clause, in order to avoid unreasonable growth of - * planning time. - */ - for (k = 2;; k++) - { - int other_level = level - k; - - /* - * Since make_join_rel(x, y) handles both x,y and y,x cases, we - * only need to go as far as the halfway point. - */ - if (k > other_level) - break; - - foreach(r, joinrels[k]) - { - RelOptInfo *old_rel = (RelOptInfo *) lfirst(r); - List *other_rels; - List *r2; - - if (old_rel->joininfo == NIL) - continue; /* we ignore clauseless joins here */ - - if (k == other_level) - other_rels = lnext(r); /* only consider remaining rels */ - else - other_rels = joinrels[other_level]; - - foreach(r2, other_rels) - { - RelOptInfo *new_rel = (RelOptInfo *) lfirst(r2); - - if (nonoverlap_setsi(old_rel->relids, new_rel->relids)) - { - List *i; - - /* - * OK, we can build a rel of the right level from this - * pair of rels. Do so if there is at least one - * usable join clause. - */ - foreach(i, old_rel->joininfo) - { - JoinInfo *joininfo = (JoinInfo *) lfirst(i); - - if (is_subseti(joininfo->unjoined_relids, - new_rel->relids)) - { - RelOptInfo *jrel; - - jrel = make_join_rel(root, old_rel, new_rel, - JOIN_INNER); - /* Avoid making duplicate entries ... */ - if (!ptrMember(jrel, result_rels)) - result_rels = lcons(jrel, result_rels); - break; /* need not consider more - * joininfos */ - } - } - } - } - } - } - - /* - * Last-ditch effort: if we failed to find any usable joins so far, - * force a set of cartesian-product joins to be generated. This - * handles the special case where all the available rels have join - * clauses but we cannot use any of the joins yet. An example is - * - * SELECT * FROM a,b,c WHERE (a.f1 + b.f2 + c.f3) = 0; - * - * The join clause will be usable at level 3, but at level 2 we have no - * choice but to make cartesian joins. We consider only left-sided - * and right-sided cartesian joins in this case (no bushy). - */ - if (result_rels == NIL) - { - /* - * This loop is just like the first one, except we always call - * make_rels_by_clauseless_joins(). - */ - foreach(r, joinrels[level - 1]) - { - RelOptInfo *old_rel = (RelOptInfo *) lfirst(r); - List *other_rels; - - if (level == 2) - other_rels = lnext(r); /* only consider remaining initial - * rels */ - else - other_rels = joinrels[1]; /* consider all initial - * rels */ - - new_rels = make_rels_by_clauseless_joins(root, - old_rel, - other_rels); - - foreach(nr, new_rels) - { - RelOptInfo *jrel = (RelOptInfo *) lfirst(nr); - - if (!ptrMember(jrel, result_rels)) - result_rels = lcons(jrel, result_rels); - } - } - - if (result_rels == NIL) - elog(ERROR, "make_rels_by_joins: failed to build any %d-way joins", - level); - } - - return result_rels; -} - -/* - * make_rels_by_clause_joins - * Build joins between the given relation 'old_rel' and other relations - * that are mentioned within old_rel's joininfo nodes (i.e., relations - * that participate in join clauses that 'old_rel' also participates in). - * The join rel nodes are returned in a list. - * - * 'old_rel' is the relation entry for the relation to be joined - * 'other_rels': other rels to be considered for joining - * - * Currently, this is only used with initial rels in other_rels, but it - * will work for joining to joinrels too, if the caller ensures there is no - * membership overlap between old_rel and the rels in other_rels. (We need - * no extra test for overlap for initial rels, since the is_subset test can - * only succeed when other_rel is not already part of old_rel.) - */ -List * -make_rels_by_clause_joins(Query *root, - RelOptInfo *old_rel, - List *other_rels) -{ - List *result = NIL; - List *i, - *j; - - foreach(i, old_rel->joininfo) - { - JoinInfo *joininfo = (JoinInfo *) lfirst(i); - Relids unjoined_relids = joininfo->unjoined_relids; - - foreach(j, other_rels) - { - RelOptInfo *other_rel = (RelOptInfo *) lfirst(j); - - if (is_subseti(unjoined_relids, other_rel->relids)) - { - RelOptInfo *jrel; - - jrel = make_join_rel(root, old_rel, other_rel, JOIN_INNER); - - /* - * Avoid entering same joinrel into our output list more - * than once. (make_rels_by_joins doesn't really care, - * but GEQO does.) - */ - if (!ptrMember(jrel, result)) - result = lcons(jrel, result); - } - } - } - - return result; -} - -/* - * make_rels_by_clauseless_joins - * Given a relation 'old_rel' and a list of other relations - * 'other_rels', create a join relation between 'old_rel' and each - * member of 'other_rels' that isn't already included in 'old_rel'. - * The join rel nodes are returned in a list. - * - * 'old_rel' is the relation entry for the relation to be joined - * 'other_rels': other rels to be considered for joining - * - * Currently, this is only used with initial rels in other_rels, but it would - * work for joining to joinrels too. - */ -List * -make_rels_by_clauseless_joins(Query *root, - RelOptInfo *old_rel, - List *other_rels) -{ - List *result = NIL; - List *i; - - foreach(i, other_rels) - { - RelOptInfo *other_rel = (RelOptInfo *) lfirst(i); - - if (nonoverlap_setsi(other_rel->relids, old_rel->relids)) - { - RelOptInfo *jrel; - - jrel = make_join_rel(root, old_rel, other_rel, JOIN_INNER); - - /* - * As long as given other_rels are distinct, don't need to - * test to see if jrel is already part of output list. - */ - result = lcons(jrel, result); - } - } - - return result; -} - - -/* - * make_jointree_rel - * Find or build a RelOptInfo join rel representing a specific - * jointree item. For JoinExprs, we only consider the construction - * path that corresponds exactly to what the user wrote. - */ -RelOptInfo * -make_jointree_rel(Query *root, Node *jtnode) -{ - if (IsA(jtnode, RangeTblRef)) - { - int varno = ((RangeTblRef *) jtnode)->rtindex; - - return find_base_rel(root, varno); - } - else if (IsA(jtnode, FromExpr)) - { - FromExpr *f = (FromExpr *) jtnode; - - /* Recurse back to multi-way-join planner */ - return make_fromexpr_rel(root, f); - } - else if (IsA(jtnode, JoinExpr)) - { - JoinExpr *j = (JoinExpr *) jtnode; - RelOptInfo *rel, - *lrel, - *rrel; - - /* Recurse */ - lrel = make_jointree_rel(root, j->larg); - rrel = make_jointree_rel(root, j->rarg); - - /* Make this join rel */ - rel = make_join_rel(root, lrel, rrel, j->jointype); - - /* - * Since we are only going to consider this one way to do it, - * we're done generating Paths for this joinrel and can now select - * the cheapest. In fact we *must* do so now, since next level up - * will need it! - */ - set_cheapest(rel); - -#ifdef OPTIMIZER_DEBUG - debug_print_rel(root, rel); -#endif - - return rel; - } - else - elog(ERROR, "make_jointree_rel: unexpected node type %d", - nodeTag(jtnode)); - return NULL; /* keep compiler quiet */ -} - - -/* - * make_join_rel - * Find or create a join RelOptInfo that represents the join of - * the two given rels, and add to it path information for paths - * created with the two rels as outer and inner rel. - * (The join rel may already contain paths generated from other - * pairs of rels that add up to the same set of base rels.) - */ -static RelOptInfo * -make_join_rel(Query *root, RelOptInfo *rel1, RelOptInfo *rel2, - JoinType jointype) -{ - RelOptInfo *joinrel; - List *restrictlist; - - /* - * Find or build the join RelOptInfo, and compute the restrictlist - * that goes with this particular joining. - */ - joinrel = build_join_rel(root, rel1, rel2, jointype, &restrictlist); - - /* - * Consider paths using each rel as both outer and inner. - */ - switch (jointype) - { - case JOIN_INNER: - add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_INNER, - restrictlist); - add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_INNER, - restrictlist); - break; - case JOIN_LEFT: - add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_LEFT, - restrictlist); - add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_RIGHT, - restrictlist); - break; - case JOIN_FULL: - add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_FULL, - restrictlist); - add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_FULL, - restrictlist); - break; - case JOIN_RIGHT: - add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_RIGHT, - restrictlist); - add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_LEFT, - restrictlist); - break; - default: - elog(ERROR, "make_join_rel: unsupported join type %d", - (int) jointype); - break; - } - - return joinrel; -} diff --git a/src/backend/optimizer/path/orindxpath.c b/src/backend/optimizer/path/orindxpath.c deleted file mode 100644 index f0c1a44196d..00000000000 --- a/src/backend/optimizer/path/orindxpath.c +++ /dev/null @@ -1,241 +0,0 @@ -/*------------------------------------------------------------------------- - * - * orindxpath.c - * Routines to find index paths that match a set of 'or' clauses - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/orindxpath.c,v 1.47 2002/06/20 20:29:30 momjian Exp $ - * - *------------------------------------------------------------------------- - */ - -#include "postgres.h" - -#include "optimizer/cost.h" -#include "optimizer/pathnode.h" -#include "optimizer/paths.h" -#include "optimizer/restrictinfo.h" - - -static void best_or_subclause_indices(Query *root, RelOptInfo *rel, - List *subclauses, List *indices, - IndexPath *pathnode); -static void best_or_subclause_index(Query *root, RelOptInfo *rel, - Expr *subclause, List *indices, - IndexOptInfo **retIndexInfo, - List **retIndexQual, - Cost *retStartupCost, - Cost *retTotalCost); - - -/* - * create_or_index_paths - * Creates index paths for indices that match 'or' clauses. - * create_index_paths() must already have been called. - * - * 'rel' is the relation entry for which the paths are to be created - * - * Returns nothing, but adds paths to rel->pathlist via add_path(). - */ -void -create_or_index_paths(Query *root, RelOptInfo *rel) -{ - List *rlist; - - foreach(rlist, rel->baserestrictinfo) - { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(rlist); - - /* - * Check to see if this clause is an 'or' clause, and, if so, - * whether or not each of the subclauses within the 'or' clause - * has been matched by an index. The information used was saved - * by create_index_paths(). - */ - if (restriction_is_or_clause(restrictinfo) && - restrictinfo->subclauseindices) - { - bool all_indexable = true; - List *temp; - - foreach(temp, restrictinfo->subclauseindices) - { - if (lfirst(temp) == NIL) - { - all_indexable = false; - break; - } - } - if (all_indexable) - { - /* - * OK, build an IndexPath for this OR clause, using the - * best available index for each subclause. - */ - IndexPath *pathnode = makeNode(IndexPath); - - pathnode->path.pathtype = T_IndexScan; - pathnode->path.parent = rel; - - /* - * This is an IndexScan, but the overall result will - * consist of tuples extracted in multiple passes (one for - * each subclause of the OR), so the result cannot be - * claimed to have any particular ordering. - */ - pathnode->path.pathkeys = NIL; - - /* We don't actually care what order the index scans in. */ - pathnode->indexscandir = NoMovementScanDirection; - - /* This isn't a nestloop innerjoin, so: */ - pathnode->joinrelids = NIL; /* no join clauses here */ - pathnode->alljoinquals = false; - pathnode->rows = rel->rows; - - best_or_subclause_indices(root, - rel, - restrictinfo->clause->args, - restrictinfo->subclauseindices, - pathnode); - - add_path(rel, (Path *) pathnode); - } - } - } -} - -/* - * best_or_subclause_indices - * Determines the best index to be used in conjunction with each subclause - * of an 'or' clause and the cost of scanning a relation using these - * indices. The cost is the sum of the individual index costs, since - * the executor will perform a scan for each subclause of the 'or'. - * Returns a list of IndexOptInfo nodes, one per scan. - * - * This routine also creates the indexqual list that will be needed by - * the executor. The indexqual list has one entry for each scan of the base - * rel, which is a sublist of indexqual conditions to apply in that scan. - * The implicit semantics are AND across each sublist of quals, and OR across - * the toplevel list (note that the executor takes care not to return any - * single tuple more than once). - * - * 'rel' is the node of the relation on which the indexes are defined - * 'subclauses' are the subclauses of the 'or' clause - * 'indices' is a list of sublists of the IndexOptInfo nodes that matched - * each subclause of the 'or' clause - * 'pathnode' is the IndexPath node being built. - * - * Results are returned by setting these fields of the passed pathnode: - * 'indexinfo' gets a list of the index IndexOptInfo nodes, one per scan - * 'indexqual' gets the constructed indexquals for the path (a list - * of sublists of clauses, one sublist per scan of the base rel) - * 'startup_cost' and 'total_cost' get the complete path costs. - * - * 'startup_cost' is the startup cost for the first index scan only; - * startup costs for later scans will be paid later on, so they just - * get reflected in total_cost. - * - * NOTE: we choose each scan on the basis of its total cost, ignoring startup - * cost. This is reasonable as long as all index types have zero or small - * startup cost, but we might have to work harder if any index types with - * nontrivial startup cost are ever invented. - */ -static void -best_or_subclause_indices(Query *root, - RelOptInfo *rel, - List *subclauses, - List *indices, - IndexPath *pathnode) -{ - List *slist; - - pathnode->indexinfo = NIL; - pathnode->indexqual = NIL; - pathnode->path.startup_cost = 0; - pathnode->path.total_cost = 0; - - foreach(slist, subclauses) - { - Expr *subclause = lfirst(slist); - IndexOptInfo *best_indexinfo; - List *best_indexqual; - Cost best_startup_cost; - Cost best_total_cost; - - best_or_subclause_index(root, rel, subclause, lfirst(indices), - &best_indexinfo, &best_indexqual, - &best_startup_cost, &best_total_cost); - - Assert(best_indexinfo != NULL); - - pathnode->indexinfo = lappend(pathnode->indexinfo, best_indexinfo); - pathnode->indexqual = lappend(pathnode->indexqual, best_indexqual); - if (slist == subclauses) /* first scan? */ - pathnode->path.startup_cost = best_startup_cost; - pathnode->path.total_cost += best_total_cost; - - indices = lnext(indices); - } -} - -/* - * best_or_subclause_index - * Determines which is the best index to be used with a subclause of an - * 'or' clause by estimating the cost of using each index and selecting - * the least expensive (considering total cost only, for now). - * - * 'rel' is the node of the relation on which the index is defined - * 'subclause' is the OR subclause being considered - * 'indices' is a list of IndexOptInfo nodes that match the subclause - * '*retIndexInfo' gets the IndexOptInfo of the best index - * '*retIndexQual' gets a list of the indexqual conditions for the best index - * '*retStartupCost' gets the startup cost of a scan with that index - * '*retTotalCost' gets the total cost of a scan with that index - */ -static void -best_or_subclause_index(Query *root, - RelOptInfo *rel, - Expr *subclause, - List *indices, - IndexOptInfo **retIndexInfo, /* return value */ - List **retIndexQual, /* return value */ - Cost *retStartupCost, /* return value */ - Cost *retTotalCost) /* return value */ -{ - bool first_time = true; - List *ilist; - - /* if we don't match anything, return zeros */ - *retIndexInfo = NULL; - *retIndexQual = NIL; - *retStartupCost = 0; - *retTotalCost = 0; - - foreach(ilist, indices) - { - IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist); - List *indexqual; - Path subclause_path; - - Assert(IsA(index, IndexOptInfo)); - - /* Convert this 'or' subclause to an indexqual list */ - indexqual = extract_or_indexqual_conditions(rel, index, subclause); - - cost_index(&subclause_path, root, rel, index, indexqual, false); - - if (first_time || subclause_path.total_cost < *retTotalCost) - { - *retIndexInfo = index; - *retIndexQual = indexqual; - *retStartupCost = subclause_path.startup_cost; - *retTotalCost = subclause_path.total_cost; - first_time = false; - } - } -} diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c deleted file mode 100644 index 5e4aff32473..00000000000 --- a/src/backend/optimizer/path/pathkeys.c +++ /dev/null @@ -1,1025 +0,0 @@ -/*------------------------------------------------------------------------- - * - * pathkeys.c - * Utilities for matching and building path keys - * - * See src/backend/optimizer/README for a great deal of information about - * the nature and use of path keys. - * - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.39 2002/06/20 20:29:30 momjian Exp $ - * - *------------------------------------------------------------------------- - */ -#include "postgres.h" - -#include "nodes/makefuncs.h" -#include "optimizer/clauses.h" -#include "optimizer/pathnode.h" -#include "optimizer/paths.h" -#include "optimizer/planmain.h" -#include "optimizer/tlist.h" -#include "parser/parsetree.h" -#include "parser/parse_func.h" -#include "utils/lsyscache.h" - - -static PathKeyItem *makePathKeyItem(Node *key, Oid sortop); -static List *make_canonical_pathkey(Query *root, PathKeyItem *item); -static Var *find_indexkey_var(Query *root, RelOptInfo *rel, - AttrNumber varattno); - - -/* - * makePathKeyItem - * create a PathKeyItem node - */ -static PathKeyItem * -makePathKeyItem(Node *key, Oid sortop) -{ - PathKeyItem *item = makeNode(PathKeyItem); - - item->key = key; - item->sortop = sortop; - return item; -} - -/* - * add_equijoined_keys - * The given clause has a mergejoinable operator, so its two sides - * can be considered equal after restriction clause application; in - * particular, any pathkey mentioning one side (with the correct sortop) - * can be expanded to include the other as well. Record the vars and - * associated sortops in the query's equi_key_list for future use. - * - * The query's equi_key_list field points to a list of sublists of PathKeyItem - * nodes, where each sublist is a set of two or more vars+sortops that have - * been identified as logically equivalent (and, therefore, we may consider - * any two in a set to be equal). As described above, we will subsequently - * use direct pointers to one of these sublists to represent any pathkey - * that involves an equijoined variable. - * - * This code would actually work fine with expressions more complex than - * a single Var, but currently it won't see any because check_mergejoinable - * won't accept such clauses as mergejoinable. - */ -void -add_equijoined_keys(Query *root, RestrictInfo *restrictinfo) -{ - Expr *clause = restrictinfo->clause; - PathKeyItem *item1 = makePathKeyItem((Node *) get_leftop(clause), - restrictinfo->left_sortop); - PathKeyItem *item2 = makePathKeyItem((Node *) get_rightop(clause), - restrictinfo->right_sortop); - List *newset, - *cursetlink; - - /* We might see a clause X=X; don't make a single-element list from it */ - if (equal(item1, item2)) - return; - - /* - * Our plan is to make a two-element set, then sweep through the - * existing equijoin sets looking for matches to item1 or item2. When - * we find one, we remove that set from equi_key_list and union it - * into our new set. When done, we add the new set to the front of - * equi_key_list. - * - * It may well be that the two items we're given are already known to be - * equijoin-equivalent, in which case we don't need to change our data - * structure. If we find both of them in the same equivalence set to - * start with, we can quit immediately. - * - * This is a standard UNION-FIND problem, for which there exist better - * data structures than simple lists. If this code ever proves to be - * a bottleneck then it could be sped up --- but for now, simple is - * beautiful. - */ - newset = NIL; - - foreach(cursetlink, root->equi_key_list) - { - List *curset = lfirst(cursetlink); - bool item1here = member(item1, curset); - bool item2here = member(item2, curset); - - if (item1here || item2here) - { - /* - * If find both in same equivalence set, no need to do any - * more - */ - if (item1here && item2here) - { - /* Better not have seen only one in an earlier set... */ - Assert(newset == NIL); - return; - } - - /* Build the new set only when we know we must */ - if (newset == NIL) - newset = makeList2(item1, item2); - - /* Found a set to merge into our new set */ - newset = set_union(newset, curset); - - /* - * Remove old set from equi_key_list. NOTE this does not - * change lnext(cursetlink), so the foreach loop doesn't - * break. - */ - root->equi_key_list = lremove(curset, root->equi_key_list); - freeList(curset); /* might as well recycle old cons cells */ - } - } - - /* Build the new set only when we know we must */ - if (newset == NIL) - newset = makeList2(item1, item2); - - root->equi_key_list = lcons(newset, root->equi_key_list); -} - -/* - * generate_implied_equalities - * Scan the completed equi_key_list for the query, and generate explicit - * qualifications (WHERE clauses) for all the pairwise equalities not - * already mentioned in the quals. This is useful because the additional - * clauses help the selectivity-estimation code, and in fact it's - * *necessary* to ensure that sort keys we think are equivalent really - * are (see src/backend/optimizer/README for more info). - * - * This routine just walks the equi_key_list to find all pairwise equalities. - * We call process_implied_equality (in plan/initsplan.c) to determine whether - * each is already known and add it to the proper restrictinfo list if not. - */ -void -generate_implied_equalities(Query *root) -{ - List *cursetlink; - - foreach(cursetlink, root->equi_key_list) - { - List *curset = lfirst(cursetlink); - List *ptr1; - - /* - * A set containing only two items cannot imply any equalities - * beyond the one that created the set, so we can skip it. - */ - if (length(curset) < 3) - continue; - - /* - * Match each item in the set with all that appear after it (it's - * sufficient to generate A=B, need not process B=A too). - */ - foreach(ptr1, curset) - { - PathKeyItem *item1 = (PathKeyItem *) lfirst(ptr1); - List *ptr2; - - foreach(ptr2, lnext(ptr1)) - { - PathKeyItem *item2 = (PathKeyItem *) lfirst(ptr2); - - process_implied_equality(root, item1->key, item2->key, - item1->sortop, item2->sortop); - } - } - } -} - -/* - * make_canonical_pathkey - * Given a PathKeyItem, find the equi_key_list subset it is a member of, - * if any. If so, return a pointer to that sublist, which is the - * canonical representation (for this query) of that PathKeyItem's - * equivalence set. If it is not found, add a singleton "equivalence set" - * to the equi_key_list and return that --- see compare_pathkeys. - * - * Note that this function must not be used until after we have completed - * scanning the WHERE clause for equijoin operators. - */ -static List * -make_canonical_pathkey(Query *root, PathKeyItem *item) -{ - List *cursetlink; - List *newset; - - foreach(cursetlink, root->equi_key_list) - { - List *curset = lfirst(cursetlink); - - if (member(item, curset)) - return curset; - } - newset = makeList1(item); - root->equi_key_list = lcons(newset, root->equi_key_list); - return newset; -} - -/* - * canonicalize_pathkeys - * Convert a not-necessarily-canonical pathkeys list to canonical form. - * - * Note that this function must not be used until after we have completed - * scanning the WHERE clause for equijoin operators. - */ -List * -canonicalize_pathkeys(Query *root, List *pathkeys) -{ - List *new_pathkeys = NIL; - List *i; - - foreach(i, pathkeys) - { - List *pathkey = (List *) lfirst(i); - PathKeyItem *item; - List *cpathkey; - - /* - * It's sufficient to look at the first entry in the sublist; if - * there are more entries, they're already part of an equivalence - * set by definition. - */ - Assert(pathkey != NIL); - item = (PathKeyItem *) lfirst(pathkey); - cpathkey = make_canonical_pathkey(root, item); - - /* - * Eliminate redundant ordering requests --- ORDER BY A,A is the - * same as ORDER BY A. We want to check this only after we have - * canonicalized the keys, so that equivalent-key knowledge is - * used when deciding if an item is redundant. - */ - if (!ptrMember(cpathkey, new_pathkeys)) - new_pathkeys = lappend(new_pathkeys, cpathkey); - } - return new_pathkeys; -} - -/**************************************************************************** - * PATHKEY COMPARISONS - ****************************************************************************/ - -/* - * compare_pathkeys - * Compare two pathkeys to see if they are equivalent, and if not whether - * one is "better" than the other. - * - * This function may only be applied to canonicalized pathkey lists. - * In the canonical representation, sublists can be checked for equality - * by simple pointer comparison. - */ -PathKeysComparison -compare_pathkeys(List *keys1, List *keys2) -{ - List *key1, - *key2; - - for (key1 = keys1, key2 = keys2; - key1 != NIL && key2 != NIL; - key1 = lnext(key1), key2 = lnext(key2)) - { - List *subkey1 = lfirst(key1); - List *subkey2 = lfirst(key2); - - /* - * XXX would like to check that we've been given canonicalized - * input, but query root not accessible here... - */ -#ifdef NOT_USED - Assert(ptrMember(subkey1, root->equi_key_list)); - Assert(ptrMember(subkey2, root->equi_key_list)); -#endif - - /* - * We will never have two subkeys where one is a subset of the - * other, because of the canonicalization process. Either they - * are equal or they ain't. Furthermore, we only need pointer - * comparison to detect equality. - */ - if (subkey1 != subkey2) - return PATHKEYS_DIFFERENT; /* no need to keep looking */ - } - - /* - * If we reached the end of only one list, the other is longer and - * therefore not a subset. (We assume the additional sublist(s) of - * the other list are not NIL --- no pathkey list should ever have a - * NIL sublist.) - */ - if (key1 == NIL && key2 == NIL) - return PATHKEYS_EQUAL; - if (key1 != NIL) - return PATHKEYS_BETTER1; /* key1 is longer */ - return PATHKEYS_BETTER2; /* key2 is longer */ -} - -/* - * compare_noncanonical_pathkeys - * Compare two pathkeys to see if they are equivalent, and if not whether - * one is "better" than the other. This is used when we must compare - * non-canonicalized pathkeys. - * - * A pathkey can be considered better than another if it is a superset: - * it contains all the keys of the other plus more. For example, either - * ((A) (B)) or ((A B)) is better than ((A)). - * - * Currently, the only user of this routine is grouping_planner(), - * and it will only pass single-element sublists (from - * make_pathkeys_for_sortclauses). Therefore we don't have to do the - * full two-way-subset-inclusion test on each pair of sublists that is - * implied by the above statement. Instead we just verify they are - * singleton lists and then do an equal(). This could be improved if - * necessary. - */ -PathKeysComparison -compare_noncanonical_pathkeys(List *keys1, List *keys2) -{ - List *key1, - *key2; - - for (key1 = keys1, key2 = keys2; - key1 != NIL && key2 != NIL; - key1 = lnext(key1), key2 = lnext(key2)) - { - List *subkey1 = lfirst(key1); - List *subkey2 = lfirst(key2); - - Assert(length(subkey1) == 1); - Assert(length(subkey2) == 1); - if (!equal(subkey1, subkey2)) - return PATHKEYS_DIFFERENT; /* no need to keep looking */ - } - - /* - * If we reached the end of only one list, the other is longer and - * therefore not a subset. (We assume the additional sublist(s) of - * the other list are not NIL --- no pathkey list should ever have a - * NIL sublist.) - */ - if (key1 == NIL && key2 == NIL) - return PATHKEYS_EQUAL; - if (key1 != NIL) - return PATHKEYS_BETTER1; /* key1 is longer */ - return PATHKEYS_BETTER2; /* key2 is longer */ -} - -/* - * pathkeys_contained_in - * Common special case of compare_pathkeys: we just want to know - * if keys2 are at least as well sorted as keys1. - */ -bool -pathkeys_contained_in(List *keys1, List *keys2) -{ - switch (compare_pathkeys(keys1, keys2)) - { - case PATHKEYS_EQUAL: - case PATHKEYS_BETTER2: - return true; - default: - break; - } - return false; -} - -/* - * noncanonical_pathkeys_contained_in - * The same, when we don't have canonical pathkeys. - */ -bool -noncanonical_pathkeys_contained_in(List *keys1, List *keys2) -{ - switch (compare_noncanonical_pathkeys(keys1, keys2)) - { - case PATHKEYS_EQUAL: - case PATHKEYS_BETTER2: - return true; - default: - break; - } - return false; -} - -/* - * get_cheapest_path_for_pathkeys - * Find the cheapest path (according to the specified criterion) that - * satisfies the given pathkeys. Return NULL if no such path. - * - * 'paths' is a list of possible paths that all generate the same relation - * 'pathkeys' represents a required ordering (already canonicalized!) - * 'cost_criterion' is STARTUP_COST or TOTAL_COST - */ -Path * -get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, - CostSelector cost_criterion) -{ - Path *matched_path = NULL; - List *i; - - foreach(i, paths) - { - Path *path = (Path *) lfirst(i); - - /* - * Since cost comparison is a lot cheaper than pathkey comparison, - * do that first. (XXX is that still true?) - */ - if (matched_path != NULL && - compare_path_costs(matched_path, path, cost_criterion) <= 0) - continue; - - if (pathkeys_contained_in(pathkeys, path->pathkeys)) - matched_path = path; - } - return matched_path; -} - -/* - * get_cheapest_fractional_path_for_pathkeys - * Find the cheapest path (for retrieving a specified fraction of all - * the tuples) that satisfies the given pathkeys. - * Return NULL if no such path. - * - * See compare_fractional_path_costs() for the interpretation of the fraction - * parameter. - * - * 'paths' is a list of possible paths that all generate the same relation - * 'pathkeys' represents a required ordering (already canonicalized!) - * 'fraction' is the fraction of the total tuples expected to be retrieved - */ -Path * -get_cheapest_fractional_path_for_pathkeys(List *paths, - List *pathkeys, - double fraction) -{ - Path *matched_path = NULL; - List *i; - - foreach(i, paths) - { - Path *path = (Path *) lfirst(i); - - /* - * Since cost comparison is a lot cheaper than pathkey comparison, - * do that first. - */ - if (matched_path != NULL && - compare_fractional_path_costs(matched_path, path, fraction) <= 0) - continue; - - if (pathkeys_contained_in(pathkeys, path->pathkeys)) - matched_path = path; - } - return matched_path; -} - -/**************************************************************************** - * NEW PATHKEY FORMATION - ****************************************************************************/ - -/* - * build_index_pathkeys - * Build a pathkeys list that describes the ordering induced by an index - * scan using the given index. (Note that an unordered index doesn't - * induce any ordering; such an index will have no sortop OIDS in - * its "ordering" field, and we will return NIL.) - * - * If 'scandir' is BackwardScanDirection, attempt to build pathkeys - * representing a backwards scan of the index. Return NIL if can't do it. - */ -List * -build_index_pathkeys(Query *root, - RelOptInfo *rel, - IndexOptInfo *index, - ScanDirection scandir) -{ - List *retval = NIL; - int *indexkeys = index->indexkeys; - Oid *ordering = index->ordering; - PathKeyItem *item; - Oid sortop; - - if (!indexkeys || indexkeys[0] == 0 || - !ordering || ordering[0] == InvalidOid) - return NIL; /* unordered index? */ - - if (index->indproc) - { - /* Functional index: build a representation of the function call */ - Func *funcnode = makeNode(Func); - List *funcargs = NIL; - - funcnode->funcid = index->indproc; - funcnode->funcresulttype = get_func_rettype(index->indproc); - funcnode->funcretset = false; /* can never be a set */ - funcnode->func_fcache = NULL; - - while (*indexkeys != 0) - { - funcargs = lappend(funcargs, - find_indexkey_var(root, rel, *indexkeys)); - indexkeys++; - } - - sortop = *ordering; - if (ScanDirectionIsBackward(scandir)) - { - sortop = get_commutator(sortop); - if (sortop == InvalidOid) - return NIL; /* oops, no reverse sort operator? */ - } - - /* Make a one-sublist pathkeys list for the function expression */ - item = makePathKeyItem((Node *) make_funcclause(funcnode, funcargs), - sortop); - retval = makeList1(make_canonical_pathkey(root, item)); - } - else - { - /* Normal non-functional index */ - while (*indexkeys != 0 && *ordering != InvalidOid) - { - Var *relvar = find_indexkey_var(root, rel, *indexkeys); - List *cpathkey; - - sortop = *ordering; - if (ScanDirectionIsBackward(scandir)) - { - sortop = get_commutator(sortop); - if (sortop == InvalidOid) - break; /* oops, no reverse sort operator? */ - } - - /* OK, make a sublist for this sort key */ - item = makePathKeyItem((Node *) relvar, sortop); - cpathkey = make_canonical_pathkey(root, item); - - /* - * Eliminate redundant ordering info; could happen if query is - * such that index keys are equijoined... - */ - if (!ptrMember(cpathkey, retval)) - retval = lappend(retval, cpathkey); - indexkeys++; - ordering++; - } - } - - return retval; -} - -/* - * Find or make a Var node for the specified attribute of the rel. - * - * We first look for the var in the rel's target list, because that's - * easy and fast. But the var might not be there (this should normally - * only happen for vars that are used in WHERE restriction clauses, - * but not in join clauses or in the SELECT target list). In that case, - * gin up a Var node the hard way. - */ -static Var * -find_indexkey_var(Query *root, RelOptInfo *rel, AttrNumber varattno) -{ - List *temp; - int relid; - Oid reloid, - vartypeid; - int32 type_mod; - - foreach(temp, rel->targetlist) - { - Var *tle_var = get_expr(lfirst(temp)); - - if (IsA(tle_var, Var) &&tle_var->varattno == varattno) - return tle_var; - } - - relid = lfirsti(rel->relids); - reloid = getrelid(relid, root->rtable); - vartypeid = get_atttype(reloid, varattno); - type_mod = get_atttypmod(reloid, varattno); - - return makeVar(relid, varattno, vartypeid, type_mod, 0); -} - -/* - * build_join_pathkeys - * Build the path keys for a join relation constructed by mergejoin or - * nestloop join. These keys should include all the path key vars of the - * outer path (since the join will retain the ordering of the outer path) - * plus any vars of the inner path that are equijoined to the outer vars. - * - * Per the discussion in backend/optimizer/README, equijoined inner vars - * can be considered path keys of the result, just the same as the outer - * vars they were joined with; furthermore, it doesn't matter what kind - * of join algorithm is actually used. - * - * 'joinrel' is the join relation that paths are being formed for - * 'outer_pathkeys' is the list of the current outer path's path keys - * - * Returns the list of new path keys. - */ -List * -build_join_pathkeys(Query *root, - RelOptInfo *joinrel, - List *outer_pathkeys) -{ - /* - * This used to be quite a complex bit of code, but now that all - * pathkey sublists start out life canonicalized, we don't have to do - * a darn thing here! The inner-rel vars we used to need to add are - * *already* part of the outer pathkey! - * - * We do, however, need to truncate the pathkeys list, since it may - * contain pathkeys that were useful for forming this joinrel but are - * uninteresting to higher levels. - */ - return truncate_useless_pathkeys(root, joinrel, outer_pathkeys); -} - -/**************************************************************************** - * PATHKEYS AND SORT CLAUSES - ****************************************************************************/ - -/* - * make_pathkeys_for_sortclauses - * Generate a pathkeys list that represents the sort order specified - * by a list of SortClauses (GroupClauses will work too!) - * - * NB: the result is NOT in canonical form, but must be passed through - * canonicalize_pathkeys() before it can be used for comparisons or - * labeling relation sort orders. (We do things this way because - * grouping_planner needs to be able to construct requested pathkeys - * before the pathkey equivalence sets have been created for the query.) - * - * 'sortclauses' is a list of SortClause or GroupClause nodes - * 'tlist' is the targetlist to find the referenced tlist entries in - */ -List * -make_pathkeys_for_sortclauses(List *sortclauses, - List *tlist) -{ - List *pathkeys = NIL; - List *i; - - foreach(i, sortclauses) - { - SortClause *sortcl = (SortClause *) lfirst(i); - Node *sortkey; - PathKeyItem *pathkey; - - sortkey = get_sortgroupclause_expr(sortcl, tlist); - pathkey = makePathKeyItem(sortkey, sortcl->sortop); - - /* - * The pathkey becomes a one-element sublist, for now; - * canonicalize_pathkeys() might replace it with a longer sublist - * later. - */ - pathkeys = lappend(pathkeys, makeList1(pathkey)); - } - return pathkeys; -} - -/**************************************************************************** - * PATHKEYS AND MERGECLAUSES - ****************************************************************************/ - -/* - * cache_mergeclause_pathkeys - * Make the cached pathkeys valid in a mergeclause restrictinfo. - * - * RestrictInfo contains fields in which we may cache the result - * of looking up the canonical pathkeys for the left and right sides - * of the mergeclause. (Note that in normal cases they will be the - * same, but not if the mergeclause appears above an OUTER JOIN.) - * This is a worthwhile savings because these routines will be invoked - * many times when dealing with a many-relation query. - */ -void -cache_mergeclause_pathkeys(Query *root, RestrictInfo *restrictinfo) -{ - Node *key; - PathKeyItem *item; - - Assert(restrictinfo->mergejoinoperator != InvalidOid); - - if (restrictinfo->left_pathkey == NIL) - { - key = (Node *) get_leftop(restrictinfo->clause); - item = makePathKeyItem(key, restrictinfo->left_sortop); - restrictinfo->left_pathkey = make_canonical_pathkey(root, item); - } - if (restrictinfo->right_pathkey == NIL) - { - key = (Node *) get_rightop(restrictinfo->clause); - item = makePathKeyItem(key, restrictinfo->right_sortop); - restrictinfo->right_pathkey = make_canonical_pathkey(root, item); - } -} - -/* - * find_mergeclauses_for_pathkeys - * This routine attempts to find a set of mergeclauses that can be - * used with a specified ordering for one of the input relations. - * If successful, it returns a list of mergeclauses. - * - * 'pathkeys' is a pathkeys list showing the ordering of an input path. - * It doesn't matter whether it is for the inner or outer path. - * 'restrictinfos' is a list of mergejoinable restriction clauses for the - * join relation being formed. - * - * The result is NIL if no merge can be done, else a maximal list of - * usable mergeclauses (represented as a list of their restrictinfo nodes). - * - * XXX Ideally we ought to be considering context, ie what path orderings - * are available on the other side of the join, rather than just making - * an arbitrary choice among the mergeclauses that will work for this side - * of the join. - */ -List * -find_mergeclauses_for_pathkeys(Query *root, - List *pathkeys, - List *restrictinfos) -{ - List *mergeclauses = NIL; - List *i; - - /* make sure we have pathkeys cached in the clauses */ - foreach(i, restrictinfos) - { - RestrictInfo *restrictinfo = lfirst(i); - - cache_mergeclause_pathkeys(root, restrictinfo); - } - - foreach(i, pathkeys) - { - List *pathkey = lfirst(i); - List *matched_restrictinfos = NIL; - List *j; - - /* - * We can match a pathkey against either left or right side of any - * mergejoin clause. (We examine both sides since we aren't told if - * the given pathkeys are for inner or outer input path; no confusion - * is possible.) Furthermore, if there are multiple matching - * clauses, take them all. In plain inner-join scenarios we expect - * only one match, because redundant-mergeclause elimination will - * have removed any redundant mergeclauses from the input list. - * However, in outer-join scenarios there might be multiple matches. - * An example is - * - * select * from a full join b on - * a.v1 = b.v1 and a.v2 = b.v2 and a.v1 = b.v2; - * - * Given the pathkeys ((a.v1), (a.v2)) it is okay to return all - * three clauses (in the order a.v1=b.v1, a.v1=b.v2, a.v2=b.v2) - * and indeed we *must* do so or we will be unable to form a - * valid plan. - */ - foreach(j, restrictinfos) - { - RestrictInfo *restrictinfo = lfirst(j); - - /* - * We can compare canonical pathkey sublists by simple pointer - * equality; see compare_pathkeys. - */ - if ((pathkey == restrictinfo->left_pathkey || - pathkey == restrictinfo->right_pathkey) && - !ptrMember(restrictinfo, mergeclauses)) - { - matched_restrictinfos = lappend(matched_restrictinfos, - restrictinfo); - } - } - - /* - * If we didn't find a mergeclause, we're done --- any additional - * sort-key positions in the pathkeys are useless. (But we can - * still mergejoin if we found at least one mergeclause.) - */ - if (matched_restrictinfos == NIL) - break; - - /* - * If we did find usable mergeclause(s) for this sort-key position, - * add them to result list. - */ - mergeclauses = nconc(mergeclauses, matched_restrictinfos); - } - - return mergeclauses; -} - -/* - * make_pathkeys_for_mergeclauses - * Builds a pathkey list representing the explicit sort order that - * must be applied to a path in order to make it usable for the - * given mergeclauses. - * - * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses - * that will be used in a merge join. - * 'rel' is the relation the pathkeys will apply to (ie, either the inner - * or outer side of the proposed join rel). - * - * Returns a pathkeys list that can be applied to the indicated relation. - * - * Note that it is not this routine's job to decide whether sorting is - * actually needed for a particular input path. Assume a sort is necessary; - * just make the keys, eh? - */ -List * -make_pathkeys_for_mergeclauses(Query *root, - List *mergeclauses, - RelOptInfo *rel) -{ - List *pathkeys = NIL; - List *i; - - foreach(i, mergeclauses) - { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i); - Node *key; - List *pathkey; - - cache_mergeclause_pathkeys(root, restrictinfo); - - key = (Node *) get_leftop(restrictinfo->clause); - if (IsA(key, Var) && - VARISRELMEMBER(((Var *) key)->varno, rel)) - { - /* Rel is left side of mergeclause */ - pathkey = restrictinfo->left_pathkey; - } - else - { - key = (Node *) get_rightop(restrictinfo->clause); - if (IsA(key, Var) && - VARISRELMEMBER(((Var *) key)->varno, rel)) - { - /* Rel is right side of mergeclause */ - pathkey = restrictinfo->right_pathkey; - } - else - { - elog(ERROR, "make_pathkeys_for_mergeclauses: can't identify which side of mergeclause to use"); - pathkey = NIL; /* keep compiler quiet */ - } - } - - /* - * When we are given multiple merge clauses, it's possible that - * some clauses refer to the same vars as earlier clauses. There's - * no reason for us to specify sort keys like (A,B,A) when (A,B) - * will do --- and adding redundant sort keys makes add_path think - * that this sort order is different from ones that are really the - * same, so don't do it. Since we now have a canonicalized - * pathkey, a simple ptrMember test is sufficient to detect - * redundant keys. - */ - if (!ptrMember(pathkey, pathkeys)) - pathkeys = lappend(pathkeys, pathkey); - } - - return pathkeys; -} - -/**************************************************************************** - * PATHKEY USEFULNESS CHECKS - * - * We only want to remember as many of the pathkeys of a path as have some - * potential use, either for subsequent mergejoins or for meeting the query's - * requested output ordering. This ensures that add_path() won't consider - * a path to have a usefully different ordering unless it really is useful. - * These routines check for usefulness of given pathkeys. - ****************************************************************************/ - -/* - * pathkeys_useful_for_merging - * Count the number of pathkeys that may be useful for mergejoins - * above the given relation (by looking at its joininfo lists). - * - * We consider a pathkey potentially useful if it corresponds to the merge - * ordering of either side of any joinclause for the rel. This might be - * overoptimistic, since joinclauses that appear in different join lists - * might never be usable at the same time, but trying to be exact is likely - * to be more trouble than it's worth. - */ -int -pathkeys_useful_for_merging(Query *root, RelOptInfo *rel, List *pathkeys) -{ - int useful = 0; - List *i; - - foreach(i, pathkeys) - { - List *pathkey = lfirst(i); - bool matched = false; - List *j; - - foreach(j, rel->joininfo) - { - JoinInfo *joininfo = (JoinInfo *) lfirst(j); - List *k; - - foreach(k, joininfo->jinfo_restrictinfo) - { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(k); - - if (restrictinfo->mergejoinoperator == InvalidOid) - continue; - cache_mergeclause_pathkeys(root, restrictinfo); - - /* - * We can compare canonical pathkey sublists by simple - * pointer equality; see compare_pathkeys. - */ - if (pathkey == restrictinfo->left_pathkey || - pathkey == restrictinfo->right_pathkey) - { - matched = true; - break; - } - } - - if (matched) - break; - } - - /* - * If we didn't find a mergeclause, we're done --- any additional - * sort-key positions in the pathkeys are useless. (But we can - * still mergejoin if we found at least one mergeclause.) - */ - if (matched) - useful++; - else - break; - } - - return useful; -} - -/* - * pathkeys_useful_for_ordering - * Count the number of pathkeys that are useful for meeting the - * query's requested output ordering. - * - * Unlike merge pathkeys, this is an all-or-nothing affair: it does us - * no good to order by just the first key(s) of the requested ordering. - * So the result is always either 0 or length(root->query_pathkeys). - */ -int -pathkeys_useful_for_ordering(Query *root, List *pathkeys) -{ - if (root->query_pathkeys == NIL) - return 0; /* no special ordering requested */ - - if (pathkeys == NIL) - return 0; /* unordered path */ - - if (pathkeys_contained_in(root->query_pathkeys, pathkeys)) - { - /* It's useful ... or at least the first N keys are */ - return length(root->query_pathkeys); - } - - return 0; /* path ordering not useful */ -} - -/* - * truncate_useless_pathkeys - * Shorten the given pathkey list to just the useful pathkeys. - */ -List * -truncate_useless_pathkeys(Query *root, - RelOptInfo *rel, - List *pathkeys) -{ - int nuseful; - int nuseful2; - - nuseful = pathkeys_useful_for_merging(root, rel, pathkeys); - nuseful2 = pathkeys_useful_for_ordering(root, pathkeys); - if (nuseful2 > nuseful) - nuseful = nuseful2; - - /* - * Note: not safe to modify input list destructively, but we can avoid - * copying the list if we're not actually going to change it - */ - if (nuseful == length(pathkeys)) - return pathkeys; - else - return ltruncate(nuseful, listCopy(pathkeys)); -} diff --git a/src/backend/optimizer/path/tidpath.c b/src/backend/optimizer/path/tidpath.c deleted file mode 100644 index c9ab25d626e..00000000000 --- a/src/backend/optimizer/path/tidpath.c +++ /dev/null @@ -1,291 +0,0 @@ -/*------------------------------------------------------------------------- - * - * tidpath.c - * Routines to determine which tids are usable for scanning a - * given relation, and create TidPaths accordingly. - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/tidpath.c,v 1.10 2002/06/20 20:29:30 momjian Exp $ - * - *------------------------------------------------------------------------- - */ -#include <math.h> - -#include "postgres.h" - -#include "catalog/pg_operator.h" -#include "optimizer/clauses.h" -#include "optimizer/cost.h" -#include "optimizer/pathnode.h" -#include "optimizer/paths.h" -#include "parser/parse_coerce.h" -#include "utils/lsyscache.h" - -static void create_tidscan_joinpaths(Query *root, RelOptInfo *rel); -static List *TidqualFromRestrictinfo(List *relids, List *restrictinfo); -static bool isEvaluable(int varno, Node *node); -static Node *TidequalClause(int varno, Expr *node); -static List *TidqualFromExpr(int varno, Expr *expr); - -static -bool -isEvaluable(int varno, Node *node) -{ - List *lst; - Expr *expr; - - if (IsA(node, Const)) - return true; - if (IsA(node, Param)) - return true; - if (IsA(node, Var)) - { - Var *var = (Var *) node; - - if (var->varno == varno) - return false; - return true; - } - if (!is_funcclause(node)) - return false; - expr = (Expr *) node; - foreach(lst, expr->args) - { - if (!isEvaluable(varno, lfirst(lst))) - return false; - } - - return true; -} - -/* - * The 2nd parameter should be an opclause - * Extract the right node if the opclause is CTID= .... - * or the left node if the opclause is ....=CTID - */ -static -Node * -TidequalClause(int varno, Expr *node) -{ - Node *rnode = 0, - *arg1, - *arg2, - *arg; - Oper *oper; - Var *var; - Const *aconst; - Param *param; - Expr *expr; - - if (!node->oper) - return rnode; - if (!node->args) - return rnode; - if (length(node->args) != 2) - return rnode; - oper = (Oper *) node->oper; - if (oper->opno != TIDEqualOperator) - return rnode; - arg1 = lfirst(node->args); - arg2 = lsecond(node->args); - - arg = (Node *) 0; - if (IsA(arg1, Var)) - { - var = (Var *) arg1; - if (var->varno == varno && - var->varattno == SelfItemPointerAttributeNumber && - var->vartype == TIDOID) - arg = arg2; - else if (var->varnoold == varno && - var->varoattno == SelfItemPointerAttributeNumber && - var->vartype == TIDOID) - arg = arg2; - } - if ((!arg) && IsA(arg2, Var)) - { - var = (Var *) arg2; - if (var->varno == varno && - var->varattno == SelfItemPointerAttributeNumber && - var->vartype == TIDOID) - arg = arg1; - } - if (!arg) - return rnode; - switch (nodeTag(arg)) - { - case T_Const: - aconst = (Const *) arg; - if (aconst->consttype != TIDOID) - return rnode; - if (aconst->constbyval) - return rnode; - rnode = arg; - break; - case T_Param: - param = (Param *) arg; - if (param->paramtype != TIDOID) - return rnode; - rnode = arg; - break; - case T_Var: - var = (Var *) arg; - if (var->varno == varno || - var->vartype != TIDOID) - return rnode; - rnode = arg; - break; - case T_Expr: - expr = (Expr *) arg; - if (expr->typeOid != TIDOID) - return rnode; - if (expr->opType != FUNC_EXPR) - return rnode; - if (isEvaluable(varno, (Node *) expr)) - rnode = arg; - break; - default: - break; - } - return rnode; -} - -/* - * Extract the list of CTID values from a specified expr node. - * When the expr node is an or_clause,we try to extract CTID - * values from all member nodes. However we would discard them - * all if we couldn't extract CTID values from a member node. - * When the expr node is an and_clause,we return the list of - * CTID values if we could extract the CTID values from a member - * node. - */ -static -List * -TidqualFromExpr(int varno, Expr *expr) -{ - List *rlst = NIL, - *lst, - *frtn; - Node *node = (Node *) expr, - *rnode; - - if (is_opclause(node)) - { - rnode = TidequalClause(varno, expr); - if (rnode) - rlst = lcons(rnode, rlst); - } - else if (and_clause(node)) - { - foreach(lst, expr->args) - { - node = lfirst(lst); - if (!IsA(node, Expr)) - continue; - rlst = TidqualFromExpr(varno, (Expr *) node); - if (rlst) - break; - } - } - else if (or_clause(node)) - { - foreach(lst, expr->args) - { - node = lfirst(lst); - if (IsA(node, Expr) && - (frtn = TidqualFromExpr(varno, (Expr *) node))) - rlst = nconc(rlst, frtn); - else - { - if (rlst) - freeList(rlst); - rlst = NIL; - break; - } - } - } - return rlst; -} - -static List * -TidqualFromRestrictinfo(List *relids, List *restrictinfo) -{ - List *lst, - *rlst = NIL; - int varno; - Node *node; - Expr *expr; - - if (length(relids) != 1) - return NIL; - varno = lfirsti(relids); - foreach(lst, restrictinfo) - { - node = lfirst(lst); - if (!IsA(node, RestrictInfo)) - continue; - expr = ((RestrictInfo *) node)->clause; - rlst = TidqualFromExpr(varno, expr); - if (rlst) - break; - } - return rlst; -} - -/* - * create_tidscan_joinpaths - * Create innerjoin paths if there are suitable joinclauses. - * - * XXX does this actually work? - */ -static void -create_tidscan_joinpaths(Query *root, RelOptInfo *rel) -{ - List *rlst = NIL, - *lst; - - foreach(lst, rel->joininfo) - { - JoinInfo *joininfo = (JoinInfo *) lfirst(lst); - List *restinfo, - *tideval; - - restinfo = joininfo->jinfo_restrictinfo; - tideval = TidqualFromRestrictinfo(rel->relids, restinfo); - if (length(tideval) == 1) - { - TidPath *pathnode = makeNode(TidPath); - - pathnode->path.pathtype = T_TidScan; - pathnode->path.parent = rel; - pathnode->path.pathkeys = NIL; - pathnode->tideval = tideval; - pathnode->unjoined_relids = joininfo->unjoined_relids; - - cost_tidscan(&pathnode->path, root, rel, tideval); - - rlst = lappend(rlst, pathnode); - } - } - rel->innerjoin = nconc(rel->innerjoin, rlst); -} - -/* - * create_tidscan_paths - * Creates paths corresponding to tid direct scans of the given rel. - * Candidate paths are added to the rel's pathlist (using add_path). - */ -void -create_tidscan_paths(Query *root, RelOptInfo *rel) -{ - List *tideval = TidqualFromRestrictinfo(rel->relids, - rel->baserestrictinfo); - - if (tideval) - add_path(rel, (Path *) create_tidscan_path(root, rel, tideval)); - create_tidscan_joinpaths(root, rel); -} diff --git a/src/backend/optimizer/plan/Makefile b/src/backend/optimizer/plan/Makefile deleted file mode 100644 index a5e7b17007c..00000000000 --- a/src/backend/optimizer/plan/Makefile +++ /dev/null @@ -1,30 +0,0 @@ -#------------------------------------------------------------------------- -# -# Makefile-- -# Makefile for optimizer/plan -# -# IDENTIFICATION -# $Header: /cvsroot/pgsql/src/backend/optimizer/plan/Makefile,v 1.11 2000/08/31 16:10:11 petere Exp $ -# -#------------------------------------------------------------------------- - -subdir = src/backend/optimizer/plan -top_builddir = ../../../.. -include $(top_builddir)/src/Makefile.global - -OBJS = createplan.o initsplan.o planmain.o planner.o setrefs.o subselect.o - -all: SUBSYS.o - -SUBSYS.o: $(OBJS) - $(LD) $(LDREL) $(LDOUT) SUBSYS.o $(OBJS) - -depend dep: - $(CC) -MM $(CFLAGS) *.c >depend - -clean: - rm -f SUBSYS.o $(OBJS) - -ifeq (depend,$(wildcard depend)) -include depend -endif diff --git a/src/backend/optimizer/plan/README b/src/backend/optimizer/plan/README deleted file mode 100644 index d3a516834cc..00000000000 --- a/src/backend/optimizer/plan/README +++ /dev/null @@ -1,156 +0,0 @@ -Subselect notes from Vadim. - - - -From owner-pgsql-hackers@hub.org Fri Feb 13 09:01:19 1998 -Received: from renoir.op.net (root@renoir.op.net [209.152.193.4]) - by candle.pha.pa.us (8.8.5/8.8.5) with ESMTP id JAA11576 - for <maillist@candle.pha.pa.us>; Fri, 13 Feb 1998 09:01:17 -0500 (EST) -Received: from hub.org (hub.org [209.47.148.200]) by renoir.op.net (o1/$Revision: 1.1 $) with ESMTP id IAA09761 for <maillist@candle.pha.pa.us>; Fri, 13 Feb 1998 08:41:22 -0500 (EST) -Received: from localhost (majordom@localhost) by hub.org (8.8.8/8.7.5) with SMTP id IAA08135; Fri, 13 Feb 1998 08:40:17 -0500 (EST) -Received: by hub.org (TLB v0.10a (1.23 tibbs 1997/01/09 00:29:32)); Fri, 13 Feb 1998 08:38:42 -0500 (EST) -Received: (from majordom@localhost) by hub.org (8.8.8/8.7.5) id IAA06646 for pgsql-hackers-outgoing; Fri, 13 Feb 1998 08:38:35 -0500 (EST) -Received: from dune.krasnet.ru (dune.krasnet.ru [193.125.44.86]) by hub.org (8.8.8/8.7.5) with ESMTP id IAA04568 for <hackers@postgreSQL.org>; Fri, 13 Feb 1998 08:37:16 -0500 (EST) -Received: from sable.krasnoyarsk.su (dune.krasnet.ru [193.125.44.86]) - by dune.krasnet.ru (8.8.7/8.8.7) with ESMTP id UAA13717 - for <hackers@postgreSQL.org>; Fri, 13 Feb 1998 20:51:03 +0700 (KRS) - (envelope-from vadim@sable.krasnoyarsk.su) -Message-ID: <34E44FBA.D64E7997@sable.krasnoyarsk.su> -Date: Fri, 13 Feb 1998 20:50:50 +0700 -From: "Vadim B. Mikheev" <vadim@sable.krasnoyarsk.su> -Organization: ITTS (Krasnoyarsk) -X-Mailer: Mozilla 4.04 [en] (X11; I; FreeBSD 2.2.5-RELEASE i386) -MIME-Version: 1.0 -To: PostgreSQL Developers List <hackers@postgreSQL.org> -Subject: [HACKERS] Subselects are in CVS... -Content-Type: text/plain; charset=us-ascii -Content-Transfer-Encoding: 7bit -Sender: owner-pgsql-hackers@hub.org -Precedence: bulk -Status: OR - -This is some implementation notes and opened issues... - -First, implementation uses new type of parameters - PARAM_EXEC - to deal -with correlation Vars. When query_planner() is called, it first tries to -replace all upper queries Var referenced in current query with Param of -this type. Some global variables are used to keep mapping of Vars to -Params and Params to Vars. - -After this, all current query' SubLinks are processed: for each SubLink -found in query' qual union_planner() (old planner() function) will be -called to plan corresponding subselect (union_planner() calls -query_planner() for "simple" query and supports UNIONs). After subselect -are planned, optimizer knows about is this correlated, un-correlated or -_undirect_ correlated (references some grand-parent Vars but no parent -ones: uncorrelated from the parent' point of view) query. - -For uncorrelated and undirect correlated subqueries of EXPRession or -EXISTS type SubLinks will be replaced with "normal" clauses from -SubLink->Oper list (I changed this list to be list of EXPR nodes, -not just Oper ones). Right sides of these nodes are replaced with -PARAM_EXEC parameters. This is second use of new parameter type. -At run-time these parameters get value from result of subquery -evaluation (i.e. - from target list of subquery). Execution plan of -subquery itself becomes init plan of parent query. InitPlan knows -what parameters are to get values from subquery' results and will be -executed "on-demand" (for query select * from table where x > 0 and -y > (select max(a) from table_a) subquery will not be executed at all -if there are no tuples with x > 0 _and_ y is not used in index scan). - -SubLinks for subqueries of all other types are transformed into -new type of Expr node - SUBPLAN_EXPR. Expr->args are just correlation -variables from _parent_ query. Expr->oper is new SubPlan node. - -This node is used for InitPlan too. It keeps subquery range table, -indices of Params which are to get value from _parent_ query Vars -(i.e. - from Expr->args), indices of Params into which subquery' -results are to be substituted (this is for InitPlans), SubLink -and subquery' execution plan. - -Plan node was changed to know about dependencies on Params from -parent queries and InitPlans, to keep list of changed Params -(from the above) and so be re-scanned if this list is not NULL. -Also, added list of InitPlans (actually, all of them for current -query are in topmost plan node now) and other SubPlans (from -plan->qual) - to initialize them and let them know about changed -Params (from the list of their "interests"). - -After all SubLinks are processed, query_planner() calls qual' -canonificator and does "normal" work. By using Params optimizer -is mostly unchanged. - -Well, Executor. To get subplans re-evaluated without ExecutorStart() -and ExecutorEnd() (without opening and closing relations and indices -and without many palloc() and pfree() - this is what SQL-funcs does -on each call) ExecReScan() now supports most of Plan types... - -Explanation of EXPLAIN. - -vac=> explain select * from tmp where x >= (select max(x2) from test2 -where y2 = y and exists (select * from tempx where tx = x)); -NOTICE: QUERY PLAN: - -Seq Scan on tmp (cost=40.03 size=101 width=8) - SubPlan - ^^^^^^^ subquery is in Seq Scan' qual, its plan is below - -> Aggregate (cost=2.05 size=0 width=0) - InitPlan - ^^^^^^^^ EXISTS subsubquery is InitPlan of subquery - -> Seq Scan on tempx (cost=4.33 size=1 width=4) - -> Result (cost=2.05 size=0 width=0) - ^^^^^^ EXISTS subsubquery was transformed into Param - and so we have Result node here - -> Index Scan on test2 (cost=2.05 size=1 width=4) - - -Opened issues. - -1. No read permissions checking (easy, just not done yet). -2. readfuncs.c can't read subplan-s (easy, not critical, because of - we currently nowhere use ascii representation of execution plans). -3. ExecReScan() doesn't support all plan types. At least support for - MergeJoin has to be implemented. -4. Memory leaks in ExecReScan(). -5. I need in advice: if subquery introduced with NOT IN doesn't return - any tuples then qualification is failed, yes ? -6. Regression tests !!!!!!!!!!!!!!!!!!!! - (Could we use data/queries from MySQL' crash.me ? - Copyright-ed ? Could they give us rights ?) -7. Performance. - - Should be good when subquery is transformed into InitPlan. - - Something should be done for uncorrelated subqueries introduced - with ANY/ALL - keep thinking. Currently, subplan will be re-scanned - for each parent tuple - very slow... - -Results of some test. TMP is table with x,y (int4-s), x in 0-9, -y = 100 - x, 1000 tuples (10 duplicates of each tuple). TEST2 is table -with x2, y2 (int4-s), x2 in 1-99, y2 = 100 -x2, 10000 tuples (100 dups). - - Trying - -select * from tmp where x >= (select max(x2) from test2 where y2 = y); - - and - -begin; -select y as ty, max(x2) as mx into table tsub from test2, tmp -where y2 = y group by ty; -vacuum tsub; -select x, y from tmp, tsub where x >= mx and y = ty; -drop table tsub; -end; - - Without index on test2(y2): - -SubSelect -> 320 sec -Using temp table -> 32 sec - - Having index - -SubSelect -> 17 sec (2M of memory) -Using temp table -> 32 sec (12M of memory: -S 8192) - -Vadim - - diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c deleted file mode 100644 index f6792789de9..00000000000 --- a/src/backend/optimizer/plan/createplan.c +++ /dev/null @@ -1,1974 +0,0 @@ -/*------------------------------------------------------------------------- - * - * createplan.c - * Routines to create the desired plan for processing a query. - * Planning is complete, we just need to convert the selected - * Path into a Plan. - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.116 2002/06/20 20:29:30 momjian Exp $ - * - *------------------------------------------------------------------------- - */ -#include "postgres.h" - -#include <sys/types.h> - -#include "nodes/makefuncs.h" -#include "nodes/nodeFuncs.h" -#include "optimizer/clauses.h" -#include "optimizer/cost.h" -#include "optimizer/paths.h" -#include "optimizer/planmain.h" -#include "optimizer/restrictinfo.h" -#include "optimizer/tlist.h" -#include "optimizer/var.h" -#include "parser/parse_expr.h" -#include "utils/lsyscache.h" -#include "utils/syscache.h" - - -static Scan *create_scan_plan(Query *root, Path *best_path); -static Join *create_join_plan(Query *root, JoinPath *best_path); -static Append *create_append_plan(Query *root, AppendPath *best_path); -static SeqScan *create_seqscan_plan(Path *best_path, List *tlist, - List *scan_clauses); -static IndexScan *create_indexscan_plan(Query *root, IndexPath *best_path, - List *tlist, List *scan_clauses); -static TidScan *create_tidscan_plan(TidPath *best_path, List *tlist, - List *scan_clauses); -static SubqueryScan *create_subqueryscan_plan(Path *best_path, - List *tlist, List *scan_clauses); -static FunctionScan *create_functionscan_plan(Path *best_path, - List *tlist, List *scan_clauses); -static NestLoop *create_nestloop_plan(Query *root, - NestPath *best_path, List *tlist, - List *joinclauses, List *otherclauses, - Plan *outer_plan, List *outer_tlist, - Plan *inner_plan, List *inner_tlist); -static MergeJoin *create_mergejoin_plan(Query *root, - MergePath *best_path, List *tlist, - List *joinclauses, List *otherclauses, - Plan *outer_plan, List *outer_tlist, - Plan *inner_plan, List *inner_tlist); -static HashJoin *create_hashjoin_plan(Query *root, - HashPath *best_path, List *tlist, - List *joinclauses, List *otherclauses, - Plan *outer_plan, List *outer_tlist, - Plan *inner_plan, List *inner_tlist); -static void fix_indxqual_references(List *indexquals, IndexPath *index_path, - List **fixed_indexquals, - List **recheck_indexquals); -static void fix_indxqual_sublist(List *indexqual, int baserelid, - IndexOptInfo *index, - List **fixed_quals, List **recheck_quals); -static Node *fix_indxqual_operand(Node *node, int baserelid, - IndexOptInfo *index, - Oid *opclass); -static List *switch_outer(List *clauses); -static void copy_path_costsize(Plan *dest, Path *src); -static void copy_plan_costsize(Plan *dest, Plan *src); -static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid); -static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid, - List *indxid, List *indxqual, - List *indxqualorig, - ScanDirection indexscandir); -static TidScan *make_tidscan(List *qptlist, List *qpqual, Index scanrelid, - List *tideval); -static FunctionScan *make_functionscan(List *qptlist, List *qpqual, - Index scanrelid); -static NestLoop *make_nestloop(List *tlist, - List *joinclauses, List *otherclauses, - Plan *lefttree, Plan *righttree, - JoinType jointype); -static HashJoin *make_hashjoin(List *tlist, - List *joinclauses, List *otherclauses, - List *hashclauses, - Plan *lefttree, Plan *righttree, - JoinType jointype); -static Hash *make_hash(List *tlist, Node *hashkey, Plan *lefttree); -static MergeJoin *make_mergejoin(List *tlist, - List *joinclauses, List *otherclauses, - List *mergeclauses, - Plan *lefttree, Plan *righttree, - JoinType jointype); - -/* - * create_plan - * Creates the access plan for a query by tracing backwards through the - * desired chain of pathnodes, starting at the node 'best_path'. For - * every pathnode found: - * (1) Create a corresponding plan node containing appropriate id, - * target list, and qualification information. - * (2) Modify qual clauses of join nodes so that subplan attributes are - * referenced using relative values. - * (3) Target lists are not modified, but will be in setrefs.c. - * - * best_path is the best access path - * - * Returns a Plan tree. - */ -Plan * -create_plan(Query *root, Path *best_path) -{ - Plan *plan; - - switch (best_path->pathtype) - { - case T_IndexScan: - case T_SeqScan: - case T_TidScan: - case T_SubqueryScan: - case T_FunctionScan: - plan = (Plan *) create_scan_plan(root, best_path); - break; - case T_HashJoin: - case T_MergeJoin: - case T_NestLoop: - plan = (Plan *) create_join_plan(root, - (JoinPath *) best_path); - break; - case T_Append: - plan = (Plan *) create_append_plan(root, - (AppendPath *) best_path); - break; - default: - elog(ERROR, "create_plan: unknown pathtype %d", - best_path->pathtype); - plan = NULL; /* keep compiler quiet */ - break; - } - -#ifdef NOT_USED /* fix xfunc */ - /* sort clauses by cost/(1-selectivity) -- JMH 2/26/92 */ - if (XfuncMode != XFUNC_OFF) - { - set_qpqual((Plan) plan, - lisp_qsort(get_qpqual((Plan) plan), - xfunc_clause_compare)); - if (XfuncMode != XFUNC_NOR) - /* sort the disjuncts within each clause by cost -- JMH 3/4/92 */ - xfunc_disjunct_sort(plan->qpqual); - } -#endif - - return plan; -} - -/* - * create_scan_plan - * Create a scan plan for the parent relation of 'best_path'. - * - * Returns a Plan node. - */ -static Scan * -create_scan_plan(Query *root, Path *best_path) -{ - Scan *plan; - List *tlist = best_path->parent->targetlist; - List *scan_clauses; - - /* - * Extract the relevant restriction clauses from the parent relation; - * the executor must apply all these restrictions during the scan. - */ - scan_clauses = get_actual_clauses(best_path->parent->baserestrictinfo); - - switch (best_path->pathtype) - { - case T_SeqScan: - plan = (Scan *) create_seqscan_plan(best_path, - tlist, - scan_clauses); - break; - - case T_IndexScan: - plan = (Scan *) create_indexscan_plan(root, - (IndexPath *) best_path, - tlist, - scan_clauses); - break; - - case T_TidScan: - plan = (Scan *) create_tidscan_plan((TidPath *) best_path, - tlist, - scan_clauses); - break; - - case T_SubqueryScan: - plan = (Scan *) create_subqueryscan_plan(best_path, - tlist, - scan_clauses); - break; - - case T_FunctionScan: - plan = (Scan *) create_functionscan_plan(best_path, - tlist, - scan_clauses); - break; - - default: - elog(ERROR, "create_scan_plan: unknown node type: %d", - best_path->pathtype); - plan = NULL; /* keep compiler quiet */ - break; - } - - return plan; -} - -/* - * create_join_plan - * Create a join plan for 'best_path' and (recursively) plans for its - * inner and outer paths. - * - * Returns a Plan node. - */ -static Join * -create_join_plan(Query *root, JoinPath *best_path) -{ - List *join_tlist = best_path->path.parent->targetlist; - Plan *outer_plan; - List *outer_tlist; - Plan *inner_plan; - List *inner_tlist; - List *joinclauses; - List *otherclauses; - Join *plan; - - outer_plan = create_plan(root, best_path->outerjoinpath); - outer_tlist = outer_plan->targetlist; - - inner_plan = create_plan(root, best_path->innerjoinpath); - inner_tlist = inner_plan->targetlist; - - if (IS_OUTER_JOIN(best_path->jointype)) - { - get_actual_join_clauses(best_path->joinrestrictinfo, - &joinclauses, &otherclauses); - } - else - { - /* We can treat all clauses alike for an inner join */ - joinclauses = get_actual_clauses(best_path->joinrestrictinfo); - otherclauses = NIL; - } - - switch (best_path->path.pathtype) - { - case T_MergeJoin: - plan = (Join *) create_mergejoin_plan(root, - (MergePath *) best_path, - join_tlist, - joinclauses, - otherclauses, - outer_plan, - outer_tlist, - inner_plan, - inner_tlist); - break; - case T_HashJoin: - plan = (Join *) create_hashjoin_plan(root, - (HashPath *) best_path, - join_tlist, - joinclauses, - otherclauses, - outer_plan, - outer_tlist, - inner_plan, - inner_tlist); - break; - case T_NestLoop: - plan = (Join *) create_nestloop_plan(root, - (NestPath *) best_path, - join_tlist, - joinclauses, - otherclauses, - outer_plan, - outer_tlist, - inner_plan, - inner_tlist); - break; - default: - elog(ERROR, "create_join_plan: unknown node type: %d", - best_path->path.pathtype); - plan = NULL; /* keep compiler quiet */ - break; - } - -#ifdef NOT_USED - - /* - * * Expensive function pullups may have pulled local predicates * - * into this path node. Put them in the qpqual of the plan node. * - * JMH, 6/15/92 - */ - if (get_loc_restrictinfo(best_path) != NIL) - set_qpqual((Plan) plan, - nconc(get_qpqual((Plan) plan), - get_actual_clauses(get_loc_restrictinfo(best_path)))); -#endif - - return plan; -} - -/* - * create_append_plan - * Create an Append plan for 'best_path' and (recursively) plans - * for its subpaths. - * - * Returns a Plan node. - */ -static Append * -create_append_plan(Query *root, AppendPath *best_path) -{ - Append *plan; - List *tlist = best_path->path.parent->targetlist; - List *subplans = NIL; - List *subpaths; - - foreach(subpaths, best_path->subpaths) - { - Path *subpath = (Path *) lfirst(subpaths); - - subplans = lappend(subplans, create_plan(root, subpath)); - } - - plan = make_append(subplans, false, tlist); - - return plan; -} - - -/***************************************************************************** - * - * BASE-RELATION SCAN METHODS - * - *****************************************************************************/ - - -/* - * create_seqscan_plan - * Returns a seqscan plan for the base relation scanned by 'best_path' - * with restriction clauses 'scan_clauses' and targetlist 'tlist'. - */ -static SeqScan * -create_seqscan_plan(Path *best_path, List *tlist, List *scan_clauses) -{ - SeqScan *scan_plan; - Index scan_relid; - - /* there should be exactly one base rel involved... */ - Assert(length(best_path->parent->relids) == 1); - Assert(best_path->parent->rtekind == RTE_RELATION); - - scan_relid = (Index) lfirsti(best_path->parent->relids); - - scan_plan = make_seqscan(tlist, - scan_clauses, - scan_relid); - - copy_path_costsize(&scan_plan->plan, best_path); - - return scan_plan; -} - -/* - * create_indexscan_plan - * Returns a indexscan plan for the base relation scanned by 'best_path' - * with restriction clauses 'scan_clauses' and targetlist 'tlist'. - * - * The indexqual of the path contains a sublist of implicitly-ANDed qual - * conditions for each scan of the index(es); if there is more than one - * scan then the retrieved tuple sets are ORed together. The indexqual - * and indexinfo lists must have the same length, ie, the number of scans - * that will occur. Note it is possible for a qual condition sublist - * to be empty --- then no index restrictions will be applied during that - * scan. - */ -static IndexScan * -create_indexscan_plan(Query *root, - IndexPath *best_path, - List *tlist, - List *scan_clauses) -{ - List *indxqual = best_path->indexqual; - Index baserelid; - List *qpqual; - Expr *indxqual_or_expr = NULL; - List *fixed_indxqual; - List *recheck_indxqual; - List *indexids; - List *ixinfo; - IndexScan *scan_plan; - - /* there should be exactly one base rel involved... */ - Assert(length(best_path->path.parent->relids) == 1); - Assert(best_path->path.parent->rtekind == RTE_RELATION); - - baserelid = lfirsti(best_path->path.parent->relids); - - /* - * Build list of index OIDs. - */ - indexids = NIL; - foreach(ixinfo, best_path->indexinfo) - { - IndexOptInfo *index = (IndexOptInfo *) lfirst(ixinfo); - - indexids = lappendi(indexids, index->indexoid); - } - - /* - * The qpqual list must contain all restrictions not automatically - * handled by the index. Normally the predicates in the indxqual are - * checked fully by the index, but if the index is "lossy" for a - * particular operator (as signaled by the amopreqcheck flag in - * pg_amop), then we need to double-check that predicate in qpqual, - * because the index may return more tuples than match the predicate. - * - * Since the indexquals were generated from the restriction clauses given - * by scan_clauses, there will normally be some duplications between - * the lists. We get rid of the duplicates, then add back if lossy. - */ - if (length(indxqual) > 1) - { - /* - * Build an expression representation of the indexqual, expanding - * the implicit OR and AND semantics of the first- and - * second-level lists. - */ - List *orclauses = NIL; - List *orclause; - - foreach(orclause, indxqual) - { - orclauses = lappend(orclauses, - make_ands_explicit(lfirst(orclause))); - } - indxqual_or_expr = make_orclause(orclauses); - - qpqual = set_difference(scan_clauses, makeList1(indxqual_or_expr)); - } - else if (indxqual != NIL) - { - /* - * Here, we can simply treat the first sublist as an independent - * set of qual expressions, since there is no top-level OR - * behavior. - */ - qpqual = set_difference(scan_clauses, lfirst(indxqual)); - } - else - qpqual = scan_clauses; - - /* - * The executor needs a copy with the indexkey on the left of each - * clause and with index attr numbers substituted for table ones. This - * pass also looks for "lossy" operators. - */ - fix_indxqual_references(indxqual, best_path, - &fixed_indxqual, &recheck_indxqual); - - /* - * If there were any "lossy" operators, need to add back the - * appropriate qual clauses to the qpqual. When there is just one - * indexscan being performed (ie, we have simple AND semantics), we - * can just add the lossy clauses themselves to qpqual. If we have - * OR-of-ANDs, we'd better add the entire original indexqual to make - * sure that the semantics are correct. - */ - if (recheck_indxqual != NIL) - { - if (indxqual_or_expr) - { - /* Better do a deep copy of the original scanclauses */ - qpqual = lappend(qpqual, copyObject(indxqual_or_expr)); - } - else - { - /* Subroutine already copied quals, so just append to list */ - Assert(length(recheck_indxqual) == 1); - qpqual = nconc(qpqual, (List *) lfirst(recheck_indxqual)); - } - } - - /* Finally ready to build the plan node */ - scan_plan = make_indexscan(tlist, - qpqual, - baserelid, - indexids, - fixed_indxqual, - indxqual, - best_path->indexscandir); - - copy_path_costsize(&scan_plan->scan.plan, &best_path->path); - /* use the indexscan-specific rows estimate, not the parent rel's */ - scan_plan->scan.plan.plan_rows = best_path->rows; - - return scan_plan; -} - -/* - * create_tidscan_plan - * Returns a tidscan plan for the base relation scanned by 'best_path' - * with restriction clauses 'scan_clauses' and targetlist 'tlist'. - */ -static TidScan * -create_tidscan_plan(TidPath *best_path, List *tlist, List *scan_clauses) -{ - TidScan *scan_plan; - Index scan_relid; - - /* there should be exactly one base rel involved... */ - Assert(length(best_path->path.parent->relids) == 1); - Assert(best_path->path.parent->rtekind == RTE_RELATION); - - scan_relid = (Index) lfirsti(best_path->path.parent->relids); - - scan_plan = make_tidscan(tlist, - scan_clauses, - scan_relid, - best_path->tideval); - - if (best_path->unjoined_relids) - scan_plan->needRescan = true; - - copy_path_costsize(&scan_plan->scan.plan, &best_path->path); - - return scan_plan; -} - -/* - * create_subqueryscan_plan - * Returns a subqueryscan plan for the base relation scanned by 'best_path' - * with restriction clauses 'scan_clauses' and targetlist 'tlist'. - */ -static SubqueryScan * -create_subqueryscan_plan(Path *best_path, List *tlist, List *scan_clauses) -{ - SubqueryScan *scan_plan; - Index scan_relid; - - /* there should be exactly one base rel involved... */ - Assert(length(best_path->parent->relids) == 1); - /* and it must be a subquery */ - Assert(best_path->parent->rtekind == RTE_SUBQUERY); - - scan_relid = (Index) lfirsti(best_path->parent->relids); - - scan_plan = make_subqueryscan(tlist, - scan_clauses, - scan_relid, - best_path->parent->subplan); - - return scan_plan; -} - -/* - * create_functionscan_plan - * Returns a functionscan plan for the base relation scanned by 'best_path' - * with restriction clauses 'scan_clauses' and targetlist 'tlist'. - */ -static FunctionScan * -create_functionscan_plan(Path *best_path, List *tlist, List *scan_clauses) -{ - FunctionScan *scan_plan; - Index scan_relid; - - /* there should be exactly one base rel involved... */ - Assert(length(best_path->parent->relids) == 1); - /* and it must be a function */ - Assert(best_path->parent->rtekind == RTE_FUNCTION); - - scan_relid = (Index) lfirsti(best_path->parent->relids); - - scan_plan = make_functionscan(tlist, scan_clauses, scan_relid); - - copy_path_costsize(&scan_plan->scan.plan, best_path); - - return scan_plan; -} - -/***************************************************************************** - * - * JOIN METHODS - * - * A general note about join_references() processing in these routines: - * once we have changed a Var node to refer to a subplan output rather than - * the original relation, it is no longer equal() to an unmodified Var node - * for the same var. So, we cannot easily compare reference-adjusted qual - * clauses to clauses that have not been adjusted. Fortunately, that - * doesn't seem to be necessary; all the decisions are made before we do - * the reference adjustments. - * - * A cleaner solution would be to not call join_references() here at all, - * but leave it for setrefs.c to do at the end of plan tree construction. - * But that would make switch_outer() much more complicated, and some care - * would be needed to get setrefs.c to do the right thing with nestloop - * inner indexscan quals. So, we do subplan reference adjustment here for - * quals of join nodes (and *only* for quals of join nodes). - * - *****************************************************************************/ - -static NestLoop * -create_nestloop_plan(Query *root, - NestPath *best_path, - List *tlist, - List *joinclauses, - List *otherclauses, - Plan *outer_plan, - List *outer_tlist, - Plan *inner_plan, - List *inner_tlist) -{ - NestLoop *join_plan; - - if (IsA(inner_plan, IndexScan)) - { - /* - * An index is being used to reduce the number of tuples scanned - * in the inner relation. If there are join clauses being used - * with the index, we must update their outer-rel var nodes to - * refer to the outer side of the join. - * - * We can also remove those join clauses from the list of clauses - * that have to be checked as qpquals at the join node, but only - * if there's just one indexscan in the inner path (otherwise, - * several different sets of clauses are being ORed together). - * - * Note: if the index is lossy, the same clauses may also be getting - * checked as qpquals in the indexscan. We can still remove them - * from the nestloop's qpquals, but we gotta update the outer-rel - * vars in the indexscan's qpquals too. - * - * Note: we can safely do set_difference() against my clauses and - * join_references() because the innerscan is a primitive plan, - * and therefore has not itself done join_references renumbering - * of the vars in its quals. - */ - IndexScan *innerscan = (IndexScan *) inner_plan; - List *indxqualorig = innerscan->indxqualorig; - - /* No work needed if indxqual refers only to its own relation... */ - if (NumRelids((Node *) indxqualorig) > 1) - { - Index innerrel = innerscan->scan.scanrelid; - - /* - * Remove redundant tests from my clauses, if possible. Note - * we must compare against indxqualorig not the "fixed" - * indxqual (which has index attnos instead of relation - * attnos, and may have been commuted as well). - */ - if (length(indxqualorig) == 1) /* single indexscan? */ - joinclauses = set_difference(joinclauses, - lfirst(indxqualorig)); - - /* only refs to outer vars get changed in the inner indexqual */ - innerscan->indxqualorig = join_references(indxqualorig, - root->rtable, - outer_tlist, - NIL, - innerrel); - innerscan->indxqual = join_references(innerscan->indxqual, - root->rtable, - outer_tlist, - NIL, - innerrel); - /* fix the inner qpqual too, if it has join clauses */ - if (NumRelids((Node *) inner_plan->qual) > 1) - inner_plan->qual = join_references(inner_plan->qual, - root->rtable, - outer_tlist, - NIL, - innerrel); - } - } - else if (IsA(inner_plan, TidScan)) - { - TidScan *innerscan = (TidScan *) inner_plan; - - innerscan->tideval = join_references(innerscan->tideval, - root->rtable, - outer_tlist, - inner_tlist, - innerscan->scan.scanrelid); - } - else if (IsA_Join(inner_plan)) - { - /* - * Materialize the inner join for speed reasons. - * - * XXX It is probably *not* always fastest to materialize an inner - * join --- how can we estimate whether this is a good thing to - * do? - */ - inner_plan = (Plan *) make_material(inner_tlist, - inner_plan); - } - - /* - * Set quals to contain INNER/OUTER var references. - */ - joinclauses = join_references(joinclauses, - root->rtable, - outer_tlist, - inner_tlist, - (Index) 0); - otherclauses = join_references(otherclauses, - root->rtable, - outer_tlist, - inner_tlist, - (Index) 0); - - join_plan = make_nestloop(tlist, - joinclauses, - otherclauses, - outer_plan, - inner_plan, - best_path->jointype); - - copy_path_costsize(&join_plan->join.plan, &best_path->path); - - return join_plan; -} - -static MergeJoin * -create_mergejoin_plan(Query *root, - MergePath *best_path, - List *tlist, - List *joinclauses, - List *otherclauses, - Plan *outer_plan, - List *outer_tlist, - Plan *inner_plan, - List *inner_tlist) -{ - List *mergeclauses; - MergeJoin *join_plan; - - mergeclauses = get_actual_clauses(best_path->path_mergeclauses); - - /* - * Remove the mergeclauses from the list of join qual clauses, leaving - * the list of quals that must be checked as qpquals. Set those - * clauses to contain INNER/OUTER var references. - */ - joinclauses = join_references(set_difference(joinclauses, mergeclauses), - root->rtable, - outer_tlist, - inner_tlist, - (Index) 0); - - /* - * Fix the additional qpquals too. - */ - otherclauses = join_references(otherclauses, - root->rtable, - outer_tlist, - inner_tlist, - (Index) 0); - - /* - * Now set the references in the mergeclauses and rearrange them so - * that the outer variable is always on the left. - */ - mergeclauses = switch_outer(join_references(mergeclauses, - root->rtable, - outer_tlist, - inner_tlist, - (Index) 0)); - - /* - * Create explicit sort nodes for the outer and inner join paths if - * necessary. The sort cost was already accounted for in the path. - */ - if (best_path->outersortkeys) - outer_plan = (Plan *) - make_sort_from_pathkeys(root, - outer_tlist, - outer_plan, - best_path->outersortkeys); - - if (best_path->innersortkeys) - inner_plan = (Plan *) - make_sort_from_pathkeys(root, - inner_tlist, - inner_plan, - best_path->innersortkeys); - - /* - * The executor requires the inner side of a mergejoin to support - * "mark" and "restore" operations. Not all plan types do, so we must - * be careful not to generate an invalid plan. If necessary, an - * invalid inner plan can be handled by inserting a Materialize node. - * - * Since the inner side must be ordered, and only Sorts and IndexScans - * can create order to begin with, you might think there's no problem - * --- but you'd be wrong. Nestloop and merge joins can *preserve* - * the order of their inputs, so they can be selected as the input of - * a mergejoin, and that won't work in the present executor. - * - * Doing this here is a bit of a kluge since the cost of the Materialize - * wasn't taken into account in our earlier decisions. But - * Materialize is hard to estimate a cost for, and the above - * consideration shows that this is a rare case anyway, so this seems - * an acceptable way to proceed. - * - * This check must agree with ExecMarkPos/ExecRestrPos in - * executor/execAmi.c! - */ - switch (nodeTag(inner_plan)) - { - case T_SeqScan: - case T_IndexScan: - case T_FunctionScan: - case T_Material: - case T_Sort: - /* OK, these inner plans support mark/restore */ - break; - - default: - /* Ooops, need to materialize the inner plan */ - inner_plan = (Plan *) make_material(inner_tlist, - inner_plan); - break; - } - - /* - * Now we can build the mergejoin node. - */ - join_plan = make_mergejoin(tlist, - joinclauses, - otherclauses, - mergeclauses, - outer_plan, - inner_plan, - best_path->jpath.jointype); - - copy_path_costsize(&join_plan->join.plan, &best_path->jpath.path); - - return join_plan; -} - -static HashJoin * -create_hashjoin_plan(Query *root, - HashPath *best_path, - List *tlist, - List *joinclauses, - List *otherclauses, - Plan *outer_plan, - List *outer_tlist, - Plan *inner_plan, - List *inner_tlist) -{ - List *hashclauses; - HashJoin *join_plan; - Hash *hash_plan; - Node *innerhashkey; - - /* - * NOTE: there will always be exactly one hashclause in the list - * best_path->path_hashclauses (cf. hash_inner_and_outer()). We - * represent it as a list anyway, for convenience with routines that - * want to work on lists of clauses. - */ - hashclauses = get_actual_clauses(best_path->path_hashclauses); - - /* - * Remove the hashclauses from the list of join qual clauses, leaving - * the list of quals that must be checked as qpquals. Set those - * clauses to contain INNER/OUTER var references. - */ - joinclauses = join_references(set_difference(joinclauses, hashclauses), - root->rtable, - outer_tlist, - inner_tlist, - (Index) 0); - - /* - * Fix the additional qpquals too. - */ - otherclauses = join_references(otherclauses, - root->rtable, - outer_tlist, - inner_tlist, - (Index) 0); - - /* - * Now set the references in the hashclauses and rearrange them so - * that the outer variable is always on the left. - */ - hashclauses = switch_outer(join_references(hashclauses, - root->rtable, - outer_tlist, - inner_tlist, - (Index) 0)); - - /* Now the righthand op of the sole hashclause is the inner hash key. */ - innerhashkey = (Node *) get_rightop(lfirst(hashclauses)); - - /* - * Build the hash node and hash join node. - */ - hash_plan = make_hash(inner_tlist, innerhashkey, inner_plan); - join_plan = make_hashjoin(tlist, - joinclauses, - otherclauses, - hashclauses, - outer_plan, - (Plan *) hash_plan, - best_path->jpath.jointype); - - copy_path_costsize(&join_plan->join.plan, &best_path->jpath.path); - - return join_plan; -} - - -/***************************************************************************** - * - * SUPPORTING ROUTINES - * - *****************************************************************************/ - -/* - * fix_indxqual_references - * Adjust indexqual clauses to the form the executor's indexqual - * machinery needs, and check for recheckable (lossy) index conditions. - * - * We have four tasks here: - * * Index keys must be represented by Var nodes with varattno set to the - * index's attribute number, not the attribute number in the original rel. - * * indxpath.c may have selected an index that is binary-compatible with - * the actual expression operator, but not exactly the same datatype. - * We must replace the expression's operator with the binary-compatible - * equivalent operator that the index will recognize. - * * If the index key is on the right, commute the clause to put it on the - * left. (Someday the executor might not need this, but for now it does.) - * * If the indexable operator is marked 'amopreqcheck' in pg_amop, then - * the index is "lossy" for this operator: it may return more tuples than - * actually satisfy the operator condition. For each such operator, we - * must add (the original form of) the indexqual clause to the "qpquals" - * of the indexscan node, where the operator will be re-evaluated to - * ensure it passes. - * - * This code used to be entirely bogus for multi-index scans. Now it keeps - * track of which index applies to each subgroup of index qual clauses... - * - * Both the input list and the output lists have the form of lists of sublists - * of qual clauses --- the top-level list has one entry for each indexscan - * to be performed. The semantics are OR-of-ANDs. - * - * fixed_indexquals receives a modified copy of the indexqual list --- the - * original is not changed. Note also that the copy shares no substructure - * with the original; this is needed in case there is a subplan in it (we need - * two separate copies of the subplan tree, or things will go awry). - * - * recheck_indexquals similarly receives a full copy of whichever clauses - * need rechecking. - */ -static void -fix_indxqual_references(List *indexquals, IndexPath *index_path, - List **fixed_indexquals, List **recheck_indexquals) -{ - List *fixed_quals = NIL; - List *recheck_quals = NIL; - int baserelid = lfirsti(index_path->path.parent->relids); - List *ixinfo = index_path->indexinfo; - List *i; - - foreach(i, indexquals) - { - List *indexqual = lfirst(i); - IndexOptInfo *index = (IndexOptInfo *) lfirst(ixinfo); - List *fixed_qual; - List *recheck_qual; - - fix_indxqual_sublist(indexqual, baserelid, index, - &fixed_qual, &recheck_qual); - fixed_quals = lappend(fixed_quals, fixed_qual); - if (recheck_qual != NIL) - recheck_quals = lappend(recheck_quals, recheck_qual); - - ixinfo = lnext(ixinfo); - } - - *fixed_indexquals = fixed_quals; - *recheck_indexquals = recheck_quals; -} - -/* - * Fix the sublist of indexquals to be used in a particular scan. - * - * For each qual clause, commute if needed to put the indexkey operand on the - * left, and then fix its varattno. (We do not need to change the other side - * of the clause.) Also change the operator if necessary, and check for - * lossy index behavior. - * - * Returns two lists: the list of fixed indexquals, and the list (usually - * empty) of original clauses that must be rechecked as qpquals because - * the index is lossy for this operator type. - */ -static void -fix_indxqual_sublist(List *indexqual, int baserelid, IndexOptInfo *index, - List **fixed_quals, List **recheck_quals) -{ - List *fixed_qual = NIL; - List *recheck_qual = NIL; - List *i; - - foreach(i, indexqual) - { - Expr *clause = (Expr *) lfirst(i); - Expr *newclause; - List *leftvarnos; - Oid opclass, - newopno; - - if (!is_opclause((Node *) clause) || length(clause->args) != 2) - elog(ERROR, "fix_indxqual_sublist: indexqual clause is not binary opclause"); - - /* - * Make a copy that will become the fixed clause. - * - * We used to try to do a shallow copy here, but that fails if there - * is a subplan in the arguments of the opclause. So just do a - * full copy. - */ - newclause = (Expr *) copyObject((Node *) clause); - - /* - * Check to see if the indexkey is on the right; if so, commute - * the clause. The indexkey should be the side that refers to - * (only) the base relation. - */ - leftvarnos = pull_varnos((Node *) lfirst(newclause->args)); - if (length(leftvarnos) != 1 || lfirsti(leftvarnos) != baserelid) - CommuteClause(newclause); - freeList(leftvarnos); - - /* - * Now, determine which index attribute this is, change the - * indexkey operand as needed, and get the index opclass. - */ - lfirst(newclause->args) = fix_indxqual_operand(lfirst(newclause->args), - baserelid, - index, - &opclass); - - /* - * Substitute the appropriate operator if the expression operator - * is merely binary-compatible with the index. This shouldn't - * fail, since indxpath.c found it before... - */ - newopno = indexable_operator(newclause, opclass, true); - if (newopno == InvalidOid) - elog(ERROR, "fix_indxqual_sublist: failed to find substitute op"); - ((Oper *) newclause->oper)->opno = newopno; - - fixed_qual = lappend(fixed_qual, newclause); - - /* - * Finally, check to see if index is lossy for this operator. If - * so, add (a copy of) original form of clause to recheck list. - */ - if (op_requires_recheck(newopno, opclass)) - recheck_qual = lappend(recheck_qual, - copyObject((Node *) clause)); - } - - *fixed_quals = fixed_qual; - *recheck_quals = recheck_qual; -} - -static Node * -fix_indxqual_operand(Node *node, int baserelid, IndexOptInfo *index, - Oid *opclass) -{ - /* - * Remove any binary-compatible relabeling of the indexkey - */ - if (IsA(node, RelabelType)) - node = ((RelabelType *) node)->arg; - - /* - * We represent index keys by Var nodes having the varno of the base - * table but varattno equal to the index's attribute number (index - * column position). This is a bit hokey ... would be cleaner to use - * a special-purpose node type that could not be mistaken for a - * regular Var. But it will do for now. - */ - if (IsA(node, Var)) - { - /* If it's a var, find which index key position it occupies */ - Assert(index->indproc == InvalidOid); - - if (((Var *) node)->varno == baserelid) - { - int varatt = ((Var *) node)->varattno; - int pos; - - for (pos = 0; pos < index->nkeys; pos++) - { - if (index->indexkeys[pos] == varatt) - { - Node *newnode = copyObject(node); - - ((Var *) newnode)->varattno = pos + 1; - /* return the correct opclass, too */ - *opclass = index->classlist[pos]; - return newnode; - } - } - } - - /* - * Oops, this Var isn't an indexkey! - */ - elog(ERROR, "fix_indxqual_operand: var is not index attribute"); - } - - /* - * Else, it must be a func expression matching a functional index. - * Since we currently only support single-column functional indexes, - * the returned varattno must be 1. - */ - Assert(index->indproc != InvalidOid); - Assert(is_funcclause(node)); /* not a very thorough check, but easy */ - - /* classlist[0] is the only class of a functional index */ - *opclass = index->classlist[0]; - - return (Node *) makeVar(baserelid, 1, exprType(node), -1, 0); -} - -/* - * switch_outer - * Given a list of merge or hash joinclauses, rearrange the elements within - * the clauses so the outer join variable is on the left and the inner is - * on the right. The original list is not touched; a modified list - * is returned. - */ -static List * -switch_outer(List *clauses) -{ - List *t_list = NIL; - List *i; - - foreach(i, clauses) - { - Expr *clause = (Expr *) lfirst(i); - Var *op; - - Assert(is_opclause((Node *) clause)); - op = get_rightop(clause); - Assert(op && IsA(op, Var)); - if (var_is_outer(op)) - { - /* - * Duplicate just enough of the structure to allow commuting - * the clause without changing the original list. Could use - * copyObject, but a complete deep copy is overkill. - */ - Expr *temp; - - temp = make_clause(clause->opType, clause->oper, - listCopy(clause->args)); - /* Commute it --- note this modifies the temp node in-place. */ - CommuteClause(temp); - t_list = lappend(t_list, temp); - } - else - t_list = lappend(t_list, clause); - } - return t_list; -} - -/* - * Copy cost and size info from a Path node to the Plan node created from it. - * The executor won't use this info, but it's needed by EXPLAIN. - */ -static void -copy_path_costsize(Plan *dest, Path *src) -{ - if (src) - { - dest->startup_cost = src->startup_cost; - dest->total_cost = src->total_cost; - dest->plan_rows = src->parent->rows; - dest->plan_width = src->parent->width; - } - else - { - dest->startup_cost = 0; - dest->total_cost = 0; - dest->plan_rows = 0; - dest->plan_width = 0; - } -} - -/* - * Copy cost and size info from a lower plan node to an inserted node. - * This is not critical, since the decisions have already been made, - * but it helps produce more reasonable-looking EXPLAIN output. - * (Some callers alter the info after copying it.) - */ -static void -copy_plan_costsize(Plan *dest, Plan *src) -{ - if (src) - { - dest->startup_cost = src->startup_cost; - dest->total_cost = src->total_cost; - dest->plan_rows = src->plan_rows; - dest->plan_width = src->plan_width; - } - else - { - dest->startup_cost = 0; - dest->total_cost = 0; - dest->plan_rows = 0; - dest->plan_width = 0; - } -} - - -/***************************************************************************** - * - * PLAN NODE BUILDING ROUTINES - * - * Some of these are exported because they are called to build plan nodes - * in contexts where we're not deriving the plan node from a path node. - * - *****************************************************************************/ - -static SeqScan * -make_seqscan(List *qptlist, - List *qpqual, - Index scanrelid) -{ - SeqScan *node = makeNode(SeqScan); - Plan *plan = &node->plan; - - /* cost should be inserted by caller */ - plan->state = (EState *) NULL; - plan->targetlist = qptlist; - plan->qual = qpqual; - plan->lefttree = NULL; - plan->righttree = NULL; - node->scanrelid = scanrelid; - node->scanstate = (CommonScanState *) NULL; - - return node; -} - -static IndexScan * -make_indexscan(List *qptlist, - List *qpqual, - Index scanrelid, - List *indxid, - List *indxqual, - List *indxqualorig, - ScanDirection indexscandir) -{ - IndexScan *node = makeNode(IndexScan); - Plan *plan = &node->scan.plan; - - /* cost should be inserted by caller */ - plan->state = (EState *) NULL; - plan->targetlist = qptlist; - plan->qual = qpqual; - plan->lefttree = NULL; - plan->righttree = NULL; - node->scan.scanrelid = scanrelid; - node->indxid = indxid; - node->indxqual = indxqual; - node->indxqualorig = indxqualorig; - node->indxorderdir = indexscandir; - node->scan.scanstate = (CommonScanState *) NULL; - - return node; -} - -static TidScan * -make_tidscan(List *qptlist, - List *qpqual, - Index scanrelid, - List *tideval) -{ - TidScan *node = makeNode(TidScan); - Plan *plan = &node->scan.plan; - - /* cost should be inserted by caller */ - plan->state = (EState *) NULL; - plan->targetlist = qptlist; - plan->qual = qpqual; - plan->lefttree = NULL; - plan->righttree = NULL; - node->scan.scanrelid = scanrelid; - node->tideval = copyObject(tideval); /* XXX do we really need a - * copy? */ - node->needRescan = false; - node->scan.scanstate = (CommonScanState *) NULL; - - return node; -} - -SubqueryScan * -make_subqueryscan(List *qptlist, - List *qpqual, - Index scanrelid, - Plan *subplan) -{ - SubqueryScan *node = makeNode(SubqueryScan); - Plan *plan = &node->scan.plan; - - copy_plan_costsize(plan, subplan); - plan->state = (EState *) NULL; - plan->targetlist = qptlist; - plan->qual = qpqual; - plan->lefttree = NULL; - plan->righttree = NULL; - node->scan.scanrelid = scanrelid; - node->subplan = subplan; - node->scan.scanstate = (CommonScanState *) NULL; - - return node; -} - -static FunctionScan * -make_functionscan(List *qptlist, - List *qpqual, - Index scanrelid) -{ - FunctionScan *node = makeNode(FunctionScan); - Plan *plan = &node->scan.plan; - - /* cost should be inserted by caller */ - plan->state = (EState *) NULL; - plan->targetlist = qptlist; - plan->qual = qpqual; - plan->lefttree = NULL; - plan->righttree = NULL; - node->scan.scanrelid = scanrelid; - node->scan.scanstate = (CommonScanState *) NULL; - - return node; -} - -Append * -make_append(List *appendplans, bool isTarget, List *tlist) -{ - Append *node = makeNode(Append); - Plan *plan = &node->plan; - List *subnode; - - /* compute costs from subplan costs */ - plan->startup_cost = 0; - plan->total_cost = 0; - plan->plan_rows = 0; - plan->plan_width = 0; - foreach(subnode, appendplans) - { - Plan *subplan = (Plan *) lfirst(subnode); - - if (subnode == appendplans) /* first node? */ - plan->startup_cost = subplan->startup_cost; - plan->total_cost += subplan->total_cost; - plan->plan_rows += subplan->plan_rows; - if (plan->plan_width < subplan->plan_width) - plan->plan_width = subplan->plan_width; - } - plan->state = (EState *) NULL; - plan->targetlist = tlist; - plan->qual = NIL; - plan->lefttree = NULL; - plan->righttree = NULL; - node->appendplans = appendplans; - node->isTarget = isTarget; - - return node; -} - -static NestLoop * -make_nestloop(List *tlist, - List *joinclauses, - List *otherclauses, - Plan *lefttree, - Plan *righttree, - JoinType jointype) -{ - NestLoop *node = makeNode(NestLoop); - Plan *plan = &node->join.plan; - - /* cost should be inserted by caller */ - plan->state = (EState *) NULL; - plan->targetlist = tlist; - plan->qual = otherclauses; - plan->lefttree = lefttree; - plan->righttree = righttree; - node->join.jointype = jointype; - node->join.joinqual = joinclauses; - - return node; -} - -static HashJoin * -make_hashjoin(List *tlist, - List *joinclauses, - List *otherclauses, - List *hashclauses, - Plan *lefttree, - Plan *righttree, - JoinType jointype) -{ - HashJoin *node = makeNode(HashJoin); - Plan *plan = &node->join.plan; - - /* cost should be inserted by caller */ - plan->state = (EState *) NULL; - plan->targetlist = tlist; - plan->qual = otherclauses; - plan->lefttree = lefttree; - plan->righttree = righttree; - node->hashclauses = hashclauses; - node->join.jointype = jointype; - node->join.joinqual = joinclauses; - - return node; -} - -static Hash * -make_hash(List *tlist, Node *hashkey, Plan *lefttree) -{ - Hash *node = makeNode(Hash); - Plan *plan = &node->plan; - - copy_plan_costsize(plan, lefttree); - - /* - * For plausibility, make startup & total costs equal total cost of - * input plan; this only affects EXPLAIN display not decisions. - */ - plan->startup_cost = plan->total_cost; - plan->state = (EState *) NULL; - plan->targetlist = tlist; - plan->qual = NULL; - plan->lefttree = lefttree; - plan->righttree = NULL; - node->hashkey = hashkey; - - return node; -} - -static MergeJoin * -make_mergejoin(List *tlist, - List *joinclauses, - List *otherclauses, - List *mergeclauses, - Plan *lefttree, - Plan *righttree, - JoinType jointype) -{ - MergeJoin *node = makeNode(MergeJoin); - Plan *plan = &node->join.plan; - - /* cost should be inserted by caller */ - plan->state = (EState *) NULL; - plan->targetlist = tlist; - plan->qual = otherclauses; - plan->lefttree = lefttree; - plan->righttree = righttree; - node->mergeclauses = mergeclauses; - node->join.jointype = jointype; - node->join.joinqual = joinclauses; - - return node; -} - -/* - * To use make_sort directly, you must already have marked the tlist - * with reskey and reskeyop information. The keys had better be - * non-redundant, too (ie, there had better be tlist items marked with - * each key number from 1 to keycount), or the executor will get confused! - */ -Sort * -make_sort(Query *root, List *tlist, Plan *lefttree, int keycount) -{ - Sort *node = makeNode(Sort); - Plan *plan = &node->plan; - Path sort_path; /* dummy for result of cost_sort */ - - copy_plan_costsize(plan, lefttree); /* only care about copying size */ - cost_sort(&sort_path, root, NIL, - lefttree->plan_rows, lefttree->plan_width); - plan->startup_cost = sort_path.startup_cost + lefttree->total_cost; - plan->total_cost = sort_path.total_cost + lefttree->total_cost; - plan->state = (EState *) NULL; - plan->targetlist = tlist; - plan->qual = NIL; - plan->lefttree = lefttree; - plan->righttree = NULL; - node->keycount = keycount; - - return node; -} - -/* - * make_sort_from_pathkeys - * Create sort plan to sort according to given pathkeys - * - * 'tlist' is the target list of the input plan - * 'lefttree' is the node which yields input tuples - * 'pathkeys' is the list of pathkeys by which the result is to be sorted - * - * We must convert the pathkey information into reskey and reskeyop fields - * of resdom nodes in the sort plan's target list. - */ -Sort * -make_sort_from_pathkeys(Query *root, List *tlist, - Plan *lefttree, List *pathkeys) -{ - List *sort_tlist; - List *i; - int numsortkeys = 0; - - /* Create a new target list for the sort, with sort keys set. */ - sort_tlist = new_unsorted_tlist(tlist); - - foreach(i, pathkeys) - { - List *keysublist = (List *) lfirst(i); - PathKeyItem *pathkey = NULL; - Resdom *resdom = NULL; - List *j; - - /* - * We can sort by any one of the sort key items listed in this - * sublist. For now, we take the first one that corresponds to an - * available Var in the sort_tlist. - * - * XXX if we have a choice, is there any way of figuring out which - * might be cheapest to execute? (For example, int4lt is likely - * much cheaper to execute than numericlt, but both might appear - * in the same pathkey sublist...) Not clear that we ever will - * have a choice in practice, so it may not matter. - */ - foreach(j, keysublist) - { - pathkey = lfirst(j); - Assert(IsA(pathkey, PathKeyItem)); - resdom = tlist_member(pathkey->key, sort_tlist); - if (resdom) - break; - } - if (!resdom) - elog(ERROR, "make_sort_from_pathkeys: cannot find tlist item to sort"); - - /* - * The resdom might be already marked as a sort key, if the - * pathkeys contain duplicate entries. (This can happen in - * scenarios where multiple mergejoinable clauses mention the same - * var, for example.) In that case the current pathkey is - * essentially a no-op, because only one value can be seen within - * any subgroup where it would be consulted. We can ignore it. - */ - if (resdom->reskey == 0) - { - /* OK, mark it as a sort key and set the sort operator */ - resdom->reskey = ++numsortkeys; - resdom->reskeyop = pathkey->sortop; - } - } - - Assert(numsortkeys > 0); - - return make_sort(root, sort_tlist, lefttree, numsortkeys); -} - -Material * -make_material(List *tlist, Plan *lefttree) -{ - Material *node = makeNode(Material); - Plan *plan = &node->plan; - - copy_plan_costsize(plan, lefttree); - - /* - * For plausibility, make startup & total costs equal total cost of - * input plan; this only affects EXPLAIN display not decisions. - * - * XXX shouldn't we charge some additional cost for materialization? - */ - plan->startup_cost = plan->total_cost; - plan->state = (EState *) NULL; - plan->targetlist = tlist; - plan->qual = NIL; - plan->lefttree = lefttree; - plan->righttree = NULL; - - return node; -} - -Agg * -make_agg(List *tlist, List *qual, Plan *lefttree) -{ - Agg *node = makeNode(Agg); - Plan *plan = &node->plan; - - copy_plan_costsize(plan, lefttree); - - /* - * Charge one cpu_operator_cost per aggregate function per input - * tuple. - */ - plan->total_cost += cpu_operator_cost * plan->plan_rows * - (length(pull_agg_clause((Node *) tlist)) + - length(pull_agg_clause((Node *) qual))); - - /* - * We will produce a single output tuple if the input is not a Group, - * and a tuple per group otherwise. For now, estimate the number of - * groups as 10% of the number of tuples --- bogus, but how to do - * better? (Note we assume the input Group node is in "tuplePerGroup" - * mode, so it didn't reduce its row count already.) - */ - if (IsA(lefttree, Group)) - { - plan->plan_rows *= 0.1; - if (plan->plan_rows < 1) - plan->plan_rows = 1; - } - else - { - plan->plan_rows = 1; - plan->startup_cost = plan->total_cost; - } - - plan->state = (EState *) NULL; - plan->qual = qual; - plan->targetlist = tlist; - plan->lefttree = lefttree; - plan->righttree = (Plan *) NULL; - - return node; -} - -Group * -make_group(List *tlist, - bool tuplePerGroup, - int ngrp, - AttrNumber *grpColIdx, - Plan *lefttree) -{ - Group *node = makeNode(Group); - Plan *plan = &node->plan; - - copy_plan_costsize(plan, lefttree); - - /* - * Charge one cpu_operator_cost per comparison per input tuple. We - * assume all columns get compared at most of the tuples. - */ - plan->total_cost += cpu_operator_cost * plan->plan_rows * ngrp; - - /* - * If tuplePerGroup (which is named exactly backwards) is true, we - * will return all the input tuples, so the input node's row count is - * OK. Otherwise, we'll return only one tuple from each group. For - * now, estimate the number of groups as 10% of the number of tuples - * --- bogus, but how to do better? - */ - if (!tuplePerGroup) - { - plan->plan_rows *= 0.1; - if (plan->plan_rows < 1) - plan->plan_rows = 1; - } - - plan->state = (EState *) NULL; - plan->qual = NULL; - plan->targetlist = tlist; - plan->lefttree = lefttree; - plan->righttree = (Plan *) NULL; - node->tuplePerGroup = tuplePerGroup; - node->numCols = ngrp; - node->grpColIdx = grpColIdx; - - return node; -} - -/* - * distinctList is a list of SortClauses, identifying the targetlist items - * that should be considered by the Unique filter. - */ - -Unique * -make_unique(List *tlist, Plan *lefttree, List *distinctList) -{ - Unique *node = makeNode(Unique); - Plan *plan = &node->plan; - int numCols = length(distinctList); - int keyno = 0; - AttrNumber *uniqColIdx; - List *slitem; - - copy_plan_costsize(plan, lefttree); - - /* - * Charge one cpu_operator_cost per comparison per input tuple. We - * assume all columns get compared at most of the tuples. - */ - plan->total_cost += cpu_operator_cost * plan->plan_rows * numCols; - - /* - * As for Group, we make the unsupported assumption that there will be - * 10% as many tuples out as in. - */ - plan->plan_rows *= 0.1; - if (plan->plan_rows < 1) - plan->plan_rows = 1; - - plan->state = (EState *) NULL; - plan->targetlist = tlist; - plan->qual = NIL; - plan->lefttree = lefttree; - plan->righttree = NULL; - - /* - * convert SortClause list into array of attr indexes, as wanted by - * exec - */ - Assert(numCols > 0); - uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); - - foreach(slitem, distinctList) - { - SortClause *sortcl = (SortClause *) lfirst(slitem); - TargetEntry *tle = get_sortgroupclause_tle(sortcl, tlist); - - uniqColIdx[keyno++] = tle->resdom->resno; - } - - node->numCols = numCols; - node->uniqColIdx = uniqColIdx; - - return node; -} - -/* - * distinctList is a list of SortClauses, identifying the targetlist items - * that should be considered by the SetOp filter. - */ - -SetOp * -make_setop(SetOpCmd cmd, List *tlist, Plan *lefttree, - List *distinctList, AttrNumber flagColIdx) -{ - SetOp *node = makeNode(SetOp); - Plan *plan = &node->plan; - int numCols = length(distinctList); - int keyno = 0; - AttrNumber *dupColIdx; - List *slitem; - - copy_plan_costsize(plan, lefttree); - - /* - * Charge one cpu_operator_cost per comparison per input tuple. We - * assume all columns get compared at most of the tuples. - */ - plan->total_cost += cpu_operator_cost * plan->plan_rows * numCols; - - /* - * As for Group, we make the unsupported assumption that there will be - * 10% as many tuples out as in. - */ - plan->plan_rows *= 0.1; - if (plan->plan_rows < 1) - plan->plan_rows = 1; - - plan->state = (EState *) NULL; - plan->targetlist = tlist; - plan->qual = NIL; - plan->lefttree = lefttree; - plan->righttree = NULL; - - /* - * convert SortClause list into array of attr indexes, as wanted by - * exec - */ - Assert(numCols > 0); - dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); - - foreach(slitem, distinctList) - { - SortClause *sortcl = (SortClause *) lfirst(slitem); - TargetEntry *tle = get_sortgroupclause_tle(sortcl, tlist); - - dupColIdx[keyno++] = tle->resdom->resno; - } - - node->cmd = cmd; - node->numCols = numCols; - node->dupColIdx = dupColIdx; - node->flagColIdx = flagColIdx; - - return node; -} - -Limit * -make_limit(List *tlist, Plan *lefttree, - Node *limitOffset, Node *limitCount) -{ - Limit *node = makeNode(Limit); - Plan *plan = &node->plan; - - copy_plan_costsize(plan, lefttree); - - /* - * If offset/count are constants, adjust the output rows count and - * costs accordingly. This is only a cosmetic issue if we are at top - * level, but if we are building a subquery then it's important to - * report correct info to the outer planner. - */ - if (limitOffset && IsA(limitOffset, Const)) - { - Const *limito = (Const *) limitOffset; - int32 offset = DatumGetInt32(limito->constvalue); - - if (!limito->constisnull && offset > 0) - { - if (offset > plan->plan_rows) - offset = (int32) plan->plan_rows; - if (plan->plan_rows > 0) - plan->startup_cost += - (plan->total_cost - plan->startup_cost) - * ((double) offset) / plan->plan_rows; - plan->plan_rows -= offset; - if (plan->plan_rows < 1) - plan->plan_rows = 1; - } - } - if (limitCount && IsA(limitCount, Const)) - { - Const *limitc = (Const *) limitCount; - int32 count = DatumGetInt32(limitc->constvalue); - - if (!limitc->constisnull && count >= 0) - { - if (count > plan->plan_rows) - count = (int32) plan->plan_rows; - if (plan->plan_rows > 0) - plan->total_cost = plan->startup_cost + - (plan->total_cost - plan->startup_cost) - * ((double) count) / plan->plan_rows; - plan->plan_rows = count; - if (plan->plan_rows < 1) - plan->plan_rows = 1; - } - } - - plan->state = (EState *) NULL; - plan->targetlist = tlist; - plan->qual = NIL; - plan->lefttree = lefttree; - plan->righttree = NULL; - - node->limitOffset = limitOffset; - node->limitCount = limitCount; - - return node; -} - -Result * -make_result(List *tlist, - Node *resconstantqual, - Plan *subplan) -{ - Result *node = makeNode(Result); - Plan *plan = &node->plan; - -#ifdef NOT_USED - tlist = generate_fjoin(tlist); -#endif - if (subplan) - copy_plan_costsize(plan, subplan); - else - { - plan->startup_cost = 0; - plan->total_cost = cpu_tuple_cost; - plan->plan_rows = 1; /* wrong if we have a set-valued function? */ - plan->plan_width = 0; /* XXX try to be smarter? */ - } - - plan->state = (EState *) NULL; - plan->targetlist = tlist; - plan->qual = NIL; - plan->lefttree = subplan; - plan->righttree = NULL; - node->resconstantqual = resconstantqual; - node->resstate = NULL; - - return node; -} - -#ifdef NOT_USED -List * -generate_fjoin(List *tlist) -{ - List tlistP; - List newTlist = NIL; - List fjoinList = NIL; - int nIters = 0; - - /* - * Break the target list into elements with Iter nodes, and those - * without them. - */ - foreach(tlistP, tlist) - { - List tlistElem; - - tlistElem = lfirst(tlistP); - if (IsA(lsecond(tlistElem), Iter)) - { - nIters++; - fjoinList = lappend(fjoinList, tlistElem); - } - else - newTlist = lappend(newTlist, tlistElem); - } - - /* - * if we have an Iter node then we need to flatten. - */ - if (nIters > 0) - { - List *inner; - List *tempList; - Fjoin *fjoinNode; - DatumPtr results = (DatumPtr) palloc(nIters * sizeof(Datum)); - BoolPtr alwaysDone = (BoolPtr) palloc(nIters * sizeof(bool)); - - inner = lfirst(fjoinList); - fjoinList = lnext(fjoinList); - fjoinNode = (Fjoin) MakeFjoin(false, - nIters, - inner, - results, - alwaysDone); - tempList = lcons(fjoinNode, fjoinList); - newTlist = lappend(newTlist, tempList); - } - return newTlist; - return tlist; /* do nothing for now - ay 10/94 */ -} - -#endif diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c deleted file mode 100644 index 12c6d8f3521..00000000000 --- a/src/backend/optimizer/plan/initsplan.c +++ /dev/null @@ -1,968 +0,0 @@ -/*------------------------------------------------------------------------- - * - * initsplan.c - * Target list, qualification, joininfo initialization routines - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.73 2002/06/20 20:29:30 momjian Exp $ - * - *------------------------------------------------------------------------- - */ -#include "postgres.h" - -#include <sys/types.h> - -#include "catalog/pg_operator.h" -#include "catalog/pg_type.h" -#include "nodes/makefuncs.h" -#include "optimizer/clauses.h" -#include "optimizer/cost.h" -#include "optimizer/joininfo.h" -#include "optimizer/pathnode.h" -#include "optimizer/paths.h" -#include "optimizer/planmain.h" -#include "optimizer/tlist.h" -#include "optimizer/var.h" -#include "parser/parsetree.h" -#include "parser/parse_expr.h" -#include "parser/parse_oper.h" -#include "utils/builtins.h" -#include "utils/lsyscache.h" -#include "utils/syscache.h" - - -static void mark_baserels_for_outer_join(Query *root, Relids rels, - Relids outerrels); -static void distribute_qual_to_rels(Query *root, Node *clause, - bool ispusheddown, - bool isouterjoin, - bool isdeduced, - Relids qualscope); -static void add_join_info_to_rels(Query *root, RestrictInfo *restrictinfo, - Relids join_relids); -static void add_vars_to_targetlist(Query *root, List *vars); -static bool qual_is_redundant(Query *root, RestrictInfo *restrictinfo, - List *restrictlist); -static void check_mergejoinable(RestrictInfo *restrictinfo); -static void check_hashjoinable(RestrictInfo *restrictinfo); - - -/***************************************************************************** - * - * JOIN TREES - * - *****************************************************************************/ - -/* - * add_base_rels_to_query - * - * Scan the query's jointree and create baserel RelOptInfos for all - * the base relations (ie, table and subquery RTEs) appearing in the - * jointree. Also, create otherrel RelOptInfos for join RTEs. - * - * The return value is a list of all the baserel indexes (but not join RTE - * indexes) included in the scanned jointree. This is actually just an - * internal convenience for marking join otherrels properly; no outside - * caller uses the result. - * - * At the end of this process, there should be one baserel RelOptInfo for - * every non-join RTE that is used in the query. Therefore, this routine - * is the only place that should call build_base_rel. But build_other_rel - * will be used again later to build rels for inheritance children. - */ -List * -add_base_rels_to_query(Query *root, Node *jtnode) -{ - List *result = NIL; - - if (jtnode == NULL) - return NIL; - if (IsA(jtnode, RangeTblRef)) - { - int varno = ((RangeTblRef *) jtnode)->rtindex; - - build_base_rel(root, varno); - result = makeListi1(varno); - } - else if (IsA(jtnode, FromExpr)) - { - FromExpr *f = (FromExpr *) jtnode; - List *l; - - foreach(l, f->fromlist) - { - result = nconc(result, - add_base_rels_to_query(root, lfirst(l))); - } - } - else if (IsA(jtnode, JoinExpr)) - { - JoinExpr *j = (JoinExpr *) jtnode; - RelOptInfo *jrel; - - result = add_base_rels_to_query(root, j->larg); - result = nconc(result, - add_base_rels_to_query(root, j->rarg)); - /* the join's own rtindex is NOT added to result */ - jrel = build_other_rel(root, j->rtindex); - /* - * Mark the join's otherrel with outerjoinset = list of baserel ids - * included in the join. Note we must copy here because result list - * is destructively modified by nconcs at higher levels. - */ - jrel->outerjoinset = listCopy(result); - /* - * Safety check: join RTEs should not be SELECT FOR UPDATE targets - */ - if (intMember(j->rtindex, root->rowMarks)) - elog(ERROR, "SELECT FOR UPDATE cannot be applied to a join"); - } - else - elog(ERROR, "add_base_rels_to_query: unexpected node type %d", - nodeTag(jtnode)); - return result; -} - - -/***************************************************************************** - * - * TARGET LISTS - * - *****************************************************************************/ - -/* - * build_base_rel_tlists - * Creates targetlist entries for each var seen in 'tlist' and adds - * them to the tlist of the appropriate rel node. - */ -void -build_base_rel_tlists(Query *root, List *tlist) -{ - List *tlist_vars = pull_var_clause((Node *) tlist, false); - - add_vars_to_targetlist(root, tlist_vars); - freeList(tlist_vars); -} - -/* - * add_vars_to_targetlist - * For each variable appearing in the list, add it to the owning - * relation's targetlist if not already present. - * - * Note that join alias variables will be attached to the otherrel for - * the join RTE. They will later be transferred to the tlist of - * the corresponding joinrel. We will also cause entries to be made - * for the Vars that the alias will eventually depend on. - */ -static void -add_vars_to_targetlist(Query *root, List *vars) -{ - List *temp; - - foreach(temp, vars) - { - Var *var = (Var *) lfirst(temp); - RelOptInfo *rel = find_base_rel(root, var->varno); - - add_var_to_tlist(rel, var); - - if (rel->reloptkind == RELOPT_OTHER_JOIN_REL) - { - /* Var is an alias */ - Node *expansion; - List *varsused; - - expansion = flatten_join_alias_vars((Node *) var, - root->rtable, true); - varsused = pull_var_clause(expansion, false); - add_vars_to_targetlist(root, varsused); - freeList(varsused); - } - } -} - - -/***************************************************************************** - * - * QUALIFICATIONS - * - *****************************************************************************/ - - -/* - * distribute_quals_to_rels - * Recursively scan the query's join tree for WHERE and JOIN/ON qual - * clauses, and add these to the appropriate RestrictInfo and JoinInfo - * lists belonging to base RelOptInfos. Also, base RelOptInfos are marked - * with outerjoinset information, to aid in proper positioning of qual - * clauses that appear above outer joins. - * - * NOTE: when dealing with inner joins, it is appropriate to let a qual clause - * be evaluated at the lowest level where all the variables it mentions are - * available. However, we cannot push a qual down into the nullable side(s) - * of an outer join since the qual might eliminate matching rows and cause a - * NULL row to be incorrectly emitted by the join. Therefore, rels appearing - * within the nullable side(s) of an outer join are marked with - * outerjoinset = list of Relids used at the outer join node. - * This list will be added to the list of rels referenced by quals using such - * a rel, thereby forcing them up the join tree to the right level. - * - * To ease the calculation of these values, distribute_quals_to_rels() returns - * the list of base Relids involved in its own level of join. This is just an - * internal convenience; no outside callers pay attention to the result. - */ -Relids -distribute_quals_to_rels(Query *root, Node *jtnode) -{ - Relids result = NIL; - - if (jtnode == NULL) - return result; - if (IsA(jtnode, RangeTblRef)) - { - int varno = ((RangeTblRef *) jtnode)->rtindex; - - /* No quals to deal with, just return correct result */ - result = makeListi1(varno); - } - else if (IsA(jtnode, FromExpr)) - { - FromExpr *f = (FromExpr *) jtnode; - List *l; - List *qual; - - /* - * First, recurse to handle child joins. - * - * Note: we assume it's impossible to see same RT index from more - * than one subtree, so nconc() is OK rather than set_unioni(). - */ - foreach(l, f->fromlist) - { - result = nconc(result, - distribute_quals_to_rels(root, lfirst(l))); - } - - /* - * Now process the top-level quals. These are always marked as - * "pushed down", since they clearly didn't come from a JOIN expr. - */ - foreach(qual, (List *) f->quals) - distribute_qual_to_rels(root, (Node *) lfirst(qual), - true, false, false, result); - } - else if (IsA(jtnode, JoinExpr)) - { - JoinExpr *j = (JoinExpr *) jtnode; - Relids leftids, - rightids; - bool isouterjoin; - List *qual; - - /* - * Order of operations here is subtle and critical. First we - * recurse to handle sub-JOINs. Their join quals will be placed - * without regard for whether this level is an outer join, which - * is correct. Then, if we are an outer join, we mark baserels - * contained within the nullable side(s) with our own rel list; - * this will restrict placement of subsequent quals using those - * rels, including our own quals and quals above us in the join - * tree. Finally we place our own join quals. - */ - leftids = distribute_quals_to_rels(root, j->larg); - rightids = distribute_quals_to_rels(root, j->rarg); - - result = nconc(listCopy(leftids), rightids); - - isouterjoin = false; - switch (j->jointype) - { - case JOIN_INNER: - /* Inner join adds no restrictions for quals */ - break; - case JOIN_LEFT: - mark_baserels_for_outer_join(root, rightids, result); - isouterjoin = true; - break; - case JOIN_FULL: - mark_baserels_for_outer_join(root, result, result); - isouterjoin = true; - break; - case JOIN_RIGHT: - mark_baserels_for_outer_join(root, leftids, result); - isouterjoin = true; - break; - case JOIN_UNION: - - /* - * This is where we fail if upper levels of planner - * haven't rewritten UNION JOIN as an Append ... - */ - elog(ERROR, "UNION JOIN is not implemented yet"); - break; - default: - elog(ERROR, - "distribute_quals_to_rels: unsupported join type %d", - (int) j->jointype); - break; - } - - foreach(qual, (List *) j->quals) - distribute_qual_to_rels(root, (Node *) lfirst(qual), - false, isouterjoin, false, result); - } - else - elog(ERROR, "distribute_quals_to_rels: unexpected node type %d", - nodeTag(jtnode)); - return result; -} - -/* - * mark_baserels_for_outer_join - * Mark all base rels listed in 'rels' as having the given outerjoinset. - */ -static void -mark_baserels_for_outer_join(Query *root, Relids rels, Relids outerrels) -{ - List *relid; - - foreach(relid, rels) - { - int relno = lfirsti(relid); - RelOptInfo *rel = find_base_rel(root, relno); - - /* - * Since we do this bottom-up, any outer-rels previously marked - * should be within the new outer join set. - */ - Assert(is_subseti(rel->outerjoinset, outerrels)); - - /* - * Presently the executor cannot support FOR UPDATE marking of - * rels appearing on the nullable side of an outer join. (It's - * somewhat unclear what that would mean, anyway: what should we - * mark when a result row is generated from no element of the - * nullable relation?) So, complain if target rel is FOR UPDATE. - * It's sufficient to make this check once per rel, so do it only - * if rel wasn't already known nullable. - */ - if (rel->outerjoinset == NIL) - { - if (intMember(relno, root->rowMarks)) - elog(ERROR, "SELECT FOR UPDATE cannot be applied to the nullable side of an OUTER JOIN"); - } - - rel->outerjoinset = outerrels; - } -} - -/* - * distribute_qual_to_rels - * Add clause information to either the 'RestrictInfo' or 'JoinInfo' field - * (depending on whether the clause is a join) of each base relation - * mentioned in the clause. A RestrictInfo node is created and added to - * the appropriate list for each rel. Also, if the clause uses a - * mergejoinable operator and is not an outer-join qual, enter the left- - * and right-side expressions into the query's lists of equijoined vars. - * - * 'clause': the qual clause to be distributed - * 'ispusheddown': if TRUE, force the clause to be marked 'ispusheddown' - * (this indicates the clause came from a FromExpr, not a JoinExpr) - * 'isouterjoin': TRUE if the qual came from an OUTER JOIN's ON-clause - * 'isdeduced': TRUE if the qual came from implied-equality deduction - * 'qualscope': list of baserels the qual's syntactic scope covers - * - * 'qualscope' identifies what level of JOIN the qual came from. For a top - * level qual (WHERE qual), qualscope lists all baserel ids and in addition - * 'ispusheddown' will be TRUE. - */ -static void -distribute_qual_to_rels(Query *root, Node *clause, - bool ispusheddown, - bool isouterjoin, - bool isdeduced, - Relids qualscope) -{ - RestrictInfo *restrictinfo = makeNode(RestrictInfo); - Relids relids; - List *vars; - bool can_be_equijoin; - - restrictinfo->clause = (Expr *) clause; - restrictinfo->subclauseindices = NIL; - restrictinfo->eval_cost = -1; /* not computed until needed */ - restrictinfo->this_selec = -1; /* not computed until needed */ - restrictinfo->mergejoinoperator = InvalidOid; - restrictinfo->left_sortop = InvalidOid; - restrictinfo->right_sortop = InvalidOid; - restrictinfo->left_pathkey = NIL; /* not computable yet */ - restrictinfo->right_pathkey = NIL; - restrictinfo->left_mergescansel = -1; /* not computed until needed */ - restrictinfo->right_mergescansel = -1; - restrictinfo->hashjoinoperator = InvalidOid; - restrictinfo->left_bucketsize = -1; /* not computed until needed */ - restrictinfo->right_bucketsize = -1; - - /* - * Retrieve all relids and vars contained within the clause. - */ - clause_get_relids_vars(clause, &relids, &vars); - - /* - * The clause might contain some join alias vars; if so, we want to - * remove the join otherrelids from relids and add the referent joins' - * scope lists instead (thus ensuring that the clause can be evaluated - * no lower than that join node). We rely here on the marking done - * earlier by add_base_rels_to_query. - * - * We can combine this step with a cross-check that the clause contains - * no relids not within its scope. If the first crosscheck succeeds, - * the clause contains no aliases and we needn't look more closely. - */ - if (!is_subseti(relids, qualscope)) - { - Relids newrelids = NIL; - List *relid; - - foreach(relid, relids) - { - RelOptInfo *rel = find_other_rel(root, lfirsti(relid)); - - if (rel && rel->outerjoinset) - { - /* this relid is for a join RTE */ - newrelids = set_unioni(newrelids, rel->outerjoinset); - } - else - { - /* this relid is for a true baserel */ - newrelids = lappendi(newrelids, lfirsti(relid)); - } - } - relids = newrelids; - /* Now repeat the crosscheck */ - if (!is_subseti(relids, qualscope)) - elog(ERROR, "JOIN qualification may not refer to other relations"); - } - - /* - * If the clause is variable-free, we force it to be evaluated at its - * original syntactic level. Note that this should not happen for - * top-level clauses, because query_planner() special-cases them. But - * it will happen for variable-free JOIN/ON clauses. We don't have to - * be real smart about such a case, we just have to be correct. - */ - if (relids == NIL) - relids = qualscope; - - /* - * For an outer-join qual, pretend that the clause references all rels - * appearing within its syntactic scope, even if it really doesn't. - * This ensures that the clause will be evaluated exactly at the level - * of joining corresponding to the outer join. - * - * For a non-outer-join qual, we can evaluate the qual as soon as (1) we - * have all the rels it mentions, and (2) we are at or above any outer - * joins that can null any of these rels and are below the syntactic - * location of the given qual. To enforce the latter, scan the base - * rels listed in relids, and merge their outer-join lists into the - * clause's own reference list. At the time we are called, the - * outerjoinset list of each baserel will show exactly those outer - * joins that are below the qual in the join tree. - * - * If the qual came from implied-equality deduction, we can evaluate the - * qual at its natural semantic level. - * - */ - if (isdeduced) - { - Assert(sameseti(relids, qualscope)); - can_be_equijoin = true; - } - else if (isouterjoin) - { - relids = qualscope; - can_be_equijoin = false; - } - else - { - Relids newrelids = relids; - List *relid; - - /* - * We rely on set_unioni to be nondestructive of its input - * lists... - */ - can_be_equijoin = true; - foreach(relid, relids) - { - RelOptInfo *rel = find_base_rel(root, lfirsti(relid)); - - if (rel->outerjoinset && - !is_subseti(rel->outerjoinset, relids)) - { - newrelids = set_unioni(newrelids, rel->outerjoinset); - - /* - * Because application of the qual will be delayed by - * outer join, we mustn't assume its vars are equal - * everywhere. - */ - can_be_equijoin = false; - } - } - relids = newrelids; - /* Should still be a subset of current scope ... */ - Assert(is_subseti(relids, qualscope)); - } - - /* - * Mark the qual as "pushed down" if it can be applied at a level - * below its original syntactic level. This allows us to distinguish - * original JOIN/ON quals from higher-level quals pushed down to the - * same joinrel. A qual originating from WHERE is always considered - * "pushed down". - */ - restrictinfo->ispusheddown = ispusheddown || !sameseti(relids, - qualscope); - - if (length(relids) == 1) - { - /* - * There is only one relation participating in 'clause', so - * 'clause' is a restriction clause for that relation. - */ - RelOptInfo *rel = find_base_rel(root, lfirsti(relids)); - - /* - * Check for a "mergejoinable" clause even though it's not a join - * clause. This is so that we can recognize that "a.x = a.y" - * makes x and y eligible to be considered equal, even when they - * belong to the same rel. Without this, we would not recognize - * that "a.x = a.y AND a.x = b.z AND a.y = c.q" allows us to - * consider z and q equal after their rels are joined. - */ - if (can_be_equijoin) - check_mergejoinable(restrictinfo); - - /* - * If the clause was deduced from implied equality, check to see - * whether it is redundant with restriction clauses we already - * have for this rel. Note we cannot apply this check to - * user-written clauses, since we haven't found the canonical - * pathkey sets yet while processing user clauses. (NB: no - * comparable check is done in the join-clause case; redundancy - * will be detected when the join clause is moved into a join - * rel's restriction list.) - */ - if (!isdeduced || - !qual_is_redundant(root, restrictinfo, rel->baserestrictinfo)) - { - /* Add clause to rel's restriction list */ - rel->baserestrictinfo = lappend(rel->baserestrictinfo, - restrictinfo); - } - } - else if (relids != NIL) - { - /* - * 'clause' is a join clause, since there is more than one rel in - * the relid list. Set additional RestrictInfo fields for - * joining. - * - * We don't bother setting the merge/hashjoin info if we're not going - * to need it. We do want to know about mergejoinable ops in any - * potential equijoin clause (see later in this routine), and we - * ignore enable_mergejoin if isouterjoin is true, because - * mergejoin is the only implementation we have for full and right - * outer joins. - */ - if (enable_mergejoin || isouterjoin || can_be_equijoin) - check_mergejoinable(restrictinfo); - if (enable_hashjoin) - check_hashjoinable(restrictinfo); - - /* - * Add clause to the join lists of all the relevant relations. - */ - add_join_info_to_rels(root, restrictinfo, relids); - - /* - * Add vars used in the join clause to targetlists of their - * relations, so that they will be emitted by the plan nodes that - * scan those relations (else they won't be available at the join - * node!). - */ - add_vars_to_targetlist(root, vars); - } - else - { - /* - * 'clause' references no rels, and therefore we have no place to - * attach it. Shouldn't get here if callers are working properly. - */ - elog(ERROR, "distribute_qual_to_rels: can't cope with variable-free clause"); - } - - /* - * If the clause has a mergejoinable operator, and is not an - * outer-join qualification nor bubbled up due to an outer join, then - * the two sides represent equivalent PathKeyItems for path keys: any - * path that is sorted by one side will also be sorted by the other - * (as soon as the two rels are joined, that is). Record the key - * equivalence for future use. (We can skip this for a deduced - * clause, since the keys are already known equivalent in that case.) - */ - if (can_be_equijoin && restrictinfo->mergejoinoperator != InvalidOid && - !isdeduced) - add_equijoined_keys(root, restrictinfo); -} - -/* - * add_join_info_to_rels - * For every relation participating in a join clause, add 'restrictinfo' to - * the appropriate joininfo list (creating a new list and adding it to the - * appropriate rel node if necessary). - * - * 'restrictinfo' describes the join clause - * 'join_relids' is the list of relations participating in the join clause - */ -static void -add_join_info_to_rels(Query *root, RestrictInfo *restrictinfo, - Relids join_relids) -{ - List *join_relid; - - /* For every relid, find the joininfo, and add the proper join entries */ - foreach(join_relid, join_relids) - { - int cur_relid = lfirsti(join_relid); - Relids unjoined_relids = NIL; - JoinInfo *joininfo; - List *otherrel; - - /* Get the relids not equal to the current relid */ - foreach(otherrel, join_relids) - { - if (lfirsti(otherrel) != cur_relid) - unjoined_relids = lappendi(unjoined_relids, lfirsti(otherrel)); - } - - /* - * Find or make the joininfo node for this combination of rels, - * and add the restrictinfo node to it. - */ - joininfo = find_joininfo_node(find_base_rel(root, cur_relid), - unjoined_relids); - joininfo->jinfo_restrictinfo = lappend(joininfo->jinfo_restrictinfo, - restrictinfo); - } -} - -/* - * process_implied_equality - * Check to see whether we already have a restrictinfo item that says - * item1 = item2, and create one if not. This is a consequence of - * transitivity of mergejoin equality: if we have mergejoinable - * clauses A = B and B = C, we can deduce A = C (where = is an - * appropriate mergejoinable operator). - */ -void -process_implied_equality(Query *root, Node *item1, Node *item2, - Oid sortop1, Oid sortop2) -{ - Index irel1; - Index irel2; - RelOptInfo *rel1; - List *restrictlist; - List *itm; - Oid ltype, - rtype; - Operator eq_operator; - Form_pg_operator pgopform; - Expr *clause; - - /* - * Currently, since check_mergejoinable only accepts Var = Var - * clauses, we should only see Var nodes here. Would have to work a - * little harder to locate the right rel(s) if more-general mergejoin - * clauses were accepted. - */ - Assert(IsA(item1, Var)); - irel1 = ((Var *) item1)->varno; - Assert(IsA(item2, Var)); - irel2 = ((Var *) item2)->varno; - - /* - * If both vars belong to same rel, we need to look at that rel's - * baserestrictinfo list. If different rels, each will have a - * joininfo node for the other, and we can scan either list. - */ - rel1 = find_base_rel(root, irel1); - if (irel1 == irel2) - restrictlist = rel1->baserestrictinfo; - else - { - JoinInfo *joininfo = find_joininfo_node(rel1, - makeListi1(irel2)); - - restrictlist = joininfo->jinfo_restrictinfo; - } - - /* - * Scan to see if equality is already known. - */ - foreach(itm, restrictlist) - { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(itm); - Node *left, - *right; - - if (restrictinfo->mergejoinoperator == InvalidOid) - continue; /* ignore non-mergejoinable clauses */ - /* We now know the restrictinfo clause is a binary opclause */ - left = (Node *) get_leftop(restrictinfo->clause); - right = (Node *) get_rightop(restrictinfo->clause); - if ((equal(item1, left) && equal(item2, right)) || - (equal(item2, left) && equal(item1, right))) - return; /* found a matching clause */ - } - - /* - * This equality is new information, so construct a clause - * representing it to add to the query data structures. - */ - ltype = exprType(item1); - rtype = exprType(item2); - eq_operator = compatible_oper(makeList1(makeString("=")), - ltype, rtype, true); - if (!HeapTupleIsValid(eq_operator)) - { - /* - * Would it be safe to just not add the equality to the query if - * we have no suitable equality operator for the combination of - * datatypes? NO, because sortkey selection may screw up anyway. - */ - elog(ERROR, "Unable to identify an equality operator for types '%s' and '%s'", - format_type_be(ltype), format_type_be(rtype)); - } - pgopform = (Form_pg_operator) GETSTRUCT(eq_operator); - - /* - * Let's just make sure this appears to be a compatible operator. - */ - if (pgopform->oprlsortop != sortop1 || - pgopform->oprrsortop != sortop2 || - pgopform->oprresult != BOOLOID) - elog(ERROR, "Equality operator for types '%s' and '%s' should be mergejoinable, but isn't", - format_type_be(ltype), format_type_be(rtype)); - - clause = makeNode(Expr); - clause->typeOid = BOOLOID; - clause->opType = OP_EXPR; - clause->oper = (Node *) makeOper(oprid(eq_operator),/* opno */ - InvalidOid, /* opid */ - BOOLOID, /* opresulttype */ - false); /* opretset */ - clause->args = makeList2(item1, item2); - - ReleaseSysCache(eq_operator); - - /* - * Note: we mark the qual "pushed down" to ensure that it can never be - * taken for an original JOIN/ON clause. - */ - distribute_qual_to_rels(root, (Node *) clause, - true, false, true, - pull_varnos((Node *) clause)); -} - -/* - * qual_is_redundant - * Detect whether an implied-equality qual that turns out to be a - * restriction clause for a single base relation is redundant with - * already-known restriction clauses for that rel. This occurs with, - * for example, - * SELECT * FROM tab WHERE f1 = f2 AND f2 = f3; - * We need to suppress the redundant condition to avoid computing - * too-small selectivity, not to mention wasting time at execution. - */ -static bool -qual_is_redundant(Query *root, - RestrictInfo *restrictinfo, - List *restrictlist) -{ - List *oldquals; - List *olditem; - Node *newleft; - Node *newright; - List *equalvars; - bool someadded; - - /* - * Set cached pathkeys. NB: it is okay to do this now because this - * routine is only invoked while we are generating implied equalities. - * Therefore, the equi_key_list is already complete and so we can - * correctly determine canonical pathkeys. - */ - cache_mergeclause_pathkeys(root, restrictinfo); - /* If different, say "not redundant" (should never happen) */ - if (restrictinfo->left_pathkey != restrictinfo->right_pathkey) - return false; - - /* - * Scan existing quals to find those referencing same pathkeys. - * Usually there will be few, if any, so build a list of just the - * interesting ones. - */ - oldquals = NIL; - foreach(olditem, restrictlist) - { - RestrictInfo *oldrinfo = (RestrictInfo *) lfirst(olditem); - - if (oldrinfo->mergejoinoperator != InvalidOid) - { - cache_mergeclause_pathkeys(root, oldrinfo); - if (restrictinfo->left_pathkey == oldrinfo->left_pathkey && - restrictinfo->right_pathkey == oldrinfo->right_pathkey) - oldquals = lcons(oldrinfo, oldquals); - } - } - if (oldquals == NIL) - return false; - - /* - * Now, we want to develop a list of Vars that are known equal to the - * left side of the new qual. We traverse the old-quals list - * repeatedly to transitively expand the Vars list. If at any point - * we find we can reach the right-side Var of the new qual, we are - * done. We give up when we can't expand the equalvars list any more. - */ - newleft = (Node *) get_leftop(restrictinfo->clause); - newright = (Node *) get_rightop(restrictinfo->clause); - equalvars = makeList1(newleft); - do - { - someadded = false; - foreach(olditem, oldquals) - { - RestrictInfo *oldrinfo = (RestrictInfo *) lfirst(olditem); - Node *oldleft = (Node *) get_leftop(oldrinfo->clause); - Node *oldright = (Node *) get_rightop(oldrinfo->clause); - Node *newguy = NULL; - - if (member(oldleft, equalvars)) - newguy = oldright; - else if (member(oldright, equalvars)) - newguy = oldleft; - else - continue; - if (equal(newguy, newright)) - return true; /* we proved new clause is redundant */ - equalvars = lcons(newguy, equalvars); - someadded = true; - - /* - * Remove this qual from list, since we don't need it anymore. - * Note this doesn't break the foreach() loop, since lremove - * doesn't touch the next-link of the removed cons cell. - */ - oldquals = lremove(oldrinfo, oldquals); - } - } while (someadded); - - return false; /* it's not redundant */ -} - - -/***************************************************************************** - * - * CHECKS FOR MERGEJOINABLE AND HASHJOINABLE CLAUSES - * - *****************************************************************************/ - -/* - * check_mergejoinable - * If the restrictinfo's clause is mergejoinable, set the mergejoin - * info fields in the restrictinfo. - * - * Currently, we support mergejoin for binary opclauses where - * both operands are simple Vars and the operator is a mergejoinable - * operator. - */ -static void -check_mergejoinable(RestrictInfo *restrictinfo) -{ - Expr *clause = restrictinfo->clause; - Var *left, - *right; - Oid opno, - leftOp, - rightOp; - - if (!is_opclause((Node *) clause)) - return; - - left = get_leftop(clause); - right = get_rightop(clause); - - /* caution: is_opclause accepts more than I do, so check it */ - if (!right) - return; /* unary opclauses need not apply */ - if (!IsA(left, Var) ||!IsA(right, Var)) - return; - - opno = ((Oper *) clause->oper)->opno; - - if (op_mergejoinable(opno, - left->vartype, - right->vartype, - &leftOp, - &rightOp)) - { - restrictinfo->mergejoinoperator = opno; - restrictinfo->left_sortop = leftOp; - restrictinfo->right_sortop = rightOp; - } -} - -/* - * check_hashjoinable - * If the restrictinfo's clause is hashjoinable, set the hashjoin - * info fields in the restrictinfo. - * - * Currently, we support hashjoin for binary opclauses where - * both operands are simple Vars and the operator is a hashjoinable - * operator. - */ -static void -check_hashjoinable(RestrictInfo *restrictinfo) -{ - Expr *clause = restrictinfo->clause; - Var *left, - *right; - Oid opno; - - if (!is_opclause((Node *) clause)) - return; - - left = get_leftop(clause); - right = get_rightop(clause); - - /* caution: is_opclause accepts more than I do, so check it */ - if (!right) - return; /* unary opclauses need not apply */ - if (!IsA(left, Var) ||!IsA(right, Var)) - return; - - opno = ((Oper *) clause->oper)->opno; - - if (op_hashjoinable(opno, - left->vartype, - right->vartype)) - restrictinfo->hashjoinoperator = opno; -} diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c deleted file mode 100644 index 8efbf36f921..00000000000 --- a/src/backend/optimizer/plan/planmain.c +++ /dev/null @@ -1,330 +0,0 @@ -/*------------------------------------------------------------------------- - * - * planmain.c - * Routines to plan a single query - * - * What's in a name, anyway? The top-level entry point of the planner/ - * optimizer is over in planner.c, not here as you might think from the - * file name. But this is the main code for planning a basic join operation, - * shorn of features like subselects, inheritance, aggregates, grouping, - * and so on. (Those are the things planner.c deals with.) - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.69 2002/06/20 20:29:31 momjian Exp $ - * - *------------------------------------------------------------------------- - */ -#include "postgres.h" - -#include <sys/types.h> - -#include "optimizer/clauses.h" -#include "optimizer/cost.h" -#include "optimizer/pathnode.h" -#include "optimizer/paths.h" -#include "optimizer/planmain.h" -#include "optimizer/tlist.h" -#include "parser/parsetree.h" -#include "utils/memutils.h" - - -static Plan *subplanner(Query *root, List *flat_tlist, - double tuple_fraction); - - -/*-------------------- - * query_planner - * Generate a plan for a basic query, which may involve joins but - * not any fancier features. - * - * tlist is the target list the query should produce (NOT root->targetList!) - * tuple_fraction is the fraction of tuples we expect will be retrieved - * - * Note: the Query node now also includes a query_pathkeys field, which - * is both an input and an output of query_planner(). The input value - * signals query_planner that the indicated sort order is wanted in the - * final output plan. The output value is the actual pathkeys of the - * selected path. This might not be the same as what the caller requested; - * the caller must do pathkeys_contained_in() to decide whether an - * explicit sort is still needed. (The main reason query_pathkeys is a - * Query field and not a passed parameter is that the low-level routines - * in indxpath.c need to see it.) The pathkeys value passed to query_planner - * has not yet been "canonicalized", since the necessary info does not get - * computed until subplanner() scans the qual clauses. We canonicalize it - * inside subplanner() as soon as that task is done. The output value - * will be in canonical form as well. - * - * tuple_fraction is interpreted as follows: - * 0 (or less): expect all tuples to be retrieved (normal case) - * 0 < tuple_fraction < 1: expect the given fraction of tuples available - * from the plan to be retrieved - * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples - * expected to be retrieved (ie, a LIMIT specification) - * Note that while this routine and its subroutines treat a negative - * tuple_fraction the same as 0, grouping_planner has a different - * interpretation. - * - * Returns a query plan. - *-------------------- - */ -Plan * -query_planner(Query *root, - List *tlist, - double tuple_fraction) -{ - List *constant_quals; - List *var_only_tlist; - Plan *subplan; - - /* - * If the query has an empty join tree, then it's something easy like - * "SELECT 2+2;" or "INSERT ... VALUES()". Fall through quickly. - */ - if (root->jointree->fromlist == NIL) - { - root->query_pathkeys = NIL; /* signal unordered result */ - - /* Make childless Result node to evaluate given tlist. */ - return (Plan *) make_result(tlist, root->jointree->quals, - (Plan *) NULL); - } - - /* - * Pull out any non-variable WHERE clauses so these can be put in a - * toplevel "Result" node, where they will gate execution of the whole - * plan (the Result will not invoke its descendant plan unless the - * quals are true). Note that any *really* non-variable quals will - * have been optimized away by eval_const_expressions(). What we're - * mostly interested in here is quals that depend only on outer-level - * vars, although if the qual reduces to "WHERE FALSE" this path will - * also be taken. - */ - root->jointree->quals = (Node *) - pull_constant_clauses((List *) root->jointree->quals, - &constant_quals); - - /* - * Create a target list that consists solely of (resdom var) target - * list entries, i.e., contains no arbitrary expressions. - * - * All subplan nodes will have "flat" (var-only) tlists. - * - * This implies that all expression evaluations are done at the root of - * the plan tree. Once upon a time there was code to try to push - * expensive function calls down to lower plan nodes, but that's dead - * code and has been for a long time... - */ - var_only_tlist = flatten_tlist(tlist); - - /* - * Choose the best access path and build a plan for it. - */ - subplan = subplanner(root, var_only_tlist, tuple_fraction); - - /* - * Build a result node to control the plan if we have constant quals, - * or if the top-level plan node is one that cannot do expression - * evaluation (it won't be able to evaluate the requested tlist). - * Currently, the only plan node we might see here that falls into - * that category is Append. - * - * XXX future improvement: if the given tlist is flat anyway, we don't - * really need a Result node. - */ - if (constant_quals || IsA(subplan, Append)) - { - /* - * The result node will also be responsible for evaluating the - * originally requested tlist. - */ - subplan = (Plan *) make_result(tlist, - (Node *) constant_quals, - subplan); - } - else - { - /* - * Replace the toplevel plan node's flattened target list with the - * targetlist given by my caller, so that expressions are - * evaluated. - */ - subplan->targetlist = tlist; - } - - return subplan; -} - -/* - * subplanner - * - * Subplanner creates an entire plan consisting of joins and scans - * for processing a single level of attributes. - * - * flat_tlist is the flattened target list - * tuple_fraction is the fraction of tuples we expect will be retrieved - * - * See query_planner() comments about the interpretation of tuple_fraction. - * - * Returns a subplan. - */ -static Plan * -subplanner(Query *root, - List *flat_tlist, - double tuple_fraction) -{ - RelOptInfo *final_rel; - Plan *resultplan; - Path *cheapestpath; - Path *presortedpath; - - /* init lists to empty */ - root->base_rel_list = NIL; - root->other_rel_list = NIL; - root->join_rel_list = NIL; - root->equi_key_list = NIL; - - /* - * Construct RelOptInfo nodes for all base relations in query. - */ - (void) add_base_rels_to_query(root, (Node *) root->jointree); - - /* - * Examine the targetlist and qualifications, adding entries to - * baserel targetlists for all referenced Vars. Restrict and join - * clauses are added to appropriate lists belonging to the mentioned - * relations. We also build lists of equijoined keys for pathkey - * construction. - */ - build_base_rel_tlists(root, flat_tlist); - - (void) distribute_quals_to_rels(root, (Node *) root->jointree); - - /* - * Use the completed lists of equijoined keys to deduce any implied - * but unstated equalities (for example, A=B and B=C imply A=C). - */ - generate_implied_equalities(root); - - /* - * We should now have all the pathkey equivalence sets built, so it's - * now possible to convert the requested query_pathkeys to canonical - * form. - */ - root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys); - - /* - * Ready to do the primary planning. - */ - final_rel = make_one_rel(root); - - if (!final_rel) - elog(ERROR, "subplanner: failed to construct a relation"); - -#ifdef NOT_USED /* fix xfunc */ - - /* - * Perform Predicate Migration on each path, to optimize and correctly - * assess the cost of each before choosing the cheapest one. -- JMH, - * 11/16/92 - * - * Needn't do so if the top rel is pruneable: that means there's no - * expensive functions left to pull up. -- JMH, 11/22/92 - */ - if (XfuncMode != XFUNC_OFF && XfuncMode != XFUNC_NOPM && - XfuncMode != XFUNC_NOPULL && !final_rel->pruneable) - { - List *pathnode; - - foreach(pathnode, final_rel->pathlist) - { - if (xfunc_do_predmig((Path *) lfirst(pathnode))) - set_cheapest(final_rel); - } - } -#endif - - /* - * Now that we have an estimate of the final rel's size, we can - * convert a tuple_fraction specified as an absolute count (ie, a - * LIMIT option) into a fraction of the total tuples. - */ - if (tuple_fraction >= 1.0) - tuple_fraction /= final_rel->rows; - - /* - * Determine the cheapest path, independently of any ordering - * considerations. We do, however, take into account whether the - * whole plan is expected to be evaluated or not. - */ - if (tuple_fraction <= 0.0 || tuple_fraction >= 1.0) - cheapestpath = final_rel->cheapest_total_path; - else - cheapestpath = - get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist, - NIL, - tuple_fraction); - - Assert(cheapestpath != NULL); - - /* - * Select the best path and create a subplan to execute it. - * - * If no special sort order is wanted, or if the cheapest path is already - * appropriately ordered, we use the cheapest path found above. - */ - if (root->query_pathkeys == NIL || - pathkeys_contained_in(root->query_pathkeys, - cheapestpath->pathkeys)) - { - root->query_pathkeys = cheapestpath->pathkeys; - resultplan = create_plan(root, cheapestpath); - goto plan_built; - } - - /* - * Otherwise, look to see if we have an already-ordered path that is - * cheaper than doing an explicit sort on the cheapest-total-cost - * path. - */ - cheapestpath = final_rel->cheapest_total_path; - presortedpath = - get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist, - root->query_pathkeys, - tuple_fraction); - if (presortedpath) - { - Path sort_path; /* dummy for result of cost_sort */ - - cost_sort(&sort_path, root, root->query_pathkeys, - final_rel->rows, final_rel->width); - sort_path.startup_cost += cheapestpath->total_cost; - sort_path.total_cost += cheapestpath->total_cost; - if (compare_fractional_path_costs(presortedpath, &sort_path, - tuple_fraction) <= 0) - { - /* Presorted path is cheaper, use it */ - root->query_pathkeys = presortedpath->pathkeys; - resultplan = create_plan(root, presortedpath); - goto plan_built; - } - /* otherwise, doing it the hard way is still cheaper */ - } - - /* - * Nothing for it but to sort the cheapest-total-cost path --- but we - * let the caller do that. grouping_planner has to be able to add a - * sort node anyway, so no need for extra code here. (Furthermore, - * the given pathkeys might involve something we can't compute here, - * such as an aggregate function...) - */ - root->query_pathkeys = cheapestpath->pathkeys; - resultplan = create_plan(root, cheapestpath); - -plan_built: - - return resultplan; -} diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c deleted file mode 100644 index 999702a05a9..00000000000 --- a/src/backend/optimizer/plan/planner.c +++ /dev/null @@ -1,1563 +0,0 @@ -/*------------------------------------------------------------------------- - * - * planner.c - * The query optimizer external interface. - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.122 2002/06/20 20:29:31 momjian Exp $ - * - *------------------------------------------------------------------------- - */ - -#include "postgres.h" - -#include "catalog/pg_type.h" -#include "nodes/makefuncs.h" -#ifdef OPTIMIZER_DEBUG -#include "nodes/print.h" -#endif -#include "optimizer/clauses.h" -#include "optimizer/paths.h" -#include "optimizer/planmain.h" -#include "optimizer/planner.h" -#include "optimizer/prep.h" -#include "optimizer/subselect.h" -#include "optimizer/tlist.h" -#include "optimizer/var.h" -#include "parser/analyze.h" -#include "parser/parsetree.h" -#include "parser/parse_expr.h" -#include "rewrite/rewriteManip.h" -#include "utils/lsyscache.h" - - -/* Expression kind codes for preprocess_expression */ -#define EXPRKIND_TARGET 0 -#define EXPRKIND_WHERE 1 -#define EXPRKIND_HAVING 2 - - -static Node *pull_up_subqueries(Query *parse, Node *jtnode, - bool below_outer_join); -static bool is_simple_subquery(Query *subquery); -static bool has_nullable_targetlist(Query *subquery); -static void resolvenew_in_jointree(Node *jtnode, int varno, List *subtlist); -static Node *preprocess_jointree(Query *parse, Node *jtnode); -static Node *preprocess_expression(Query *parse, Node *expr, int kind); -static void preprocess_qual_conditions(Query *parse, Node *jtnode); -static Plan *inheritance_planner(Query *parse, List *inheritlist); -static Plan *grouping_planner(Query *parse, double tuple_fraction); -static List *make_subplanTargetList(Query *parse, List *tlist, - AttrNumber **groupColIdx); -static Plan *make_groupplan(Query *parse, - List *group_tlist, bool tuplePerGroup, - List *groupClause, AttrNumber *grpColIdx, - bool is_presorted, Plan *subplan); -static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist); - - -/***************************************************************************** - * - * Query optimizer entry point - * - *****************************************************************************/ -Plan * -planner(Query *parse) -{ - Plan *result_plan; - Index save_PlannerQueryLevel; - List *save_PlannerParamVar; - - /* - * The planner can be called recursively (an example is when - * eval_const_expressions tries to pre-evaluate an SQL function). So, - * these global state variables must be saved and restored. - * - * These vars cannot be moved into the Query structure since their whole - * purpose is communication across multiple sub-Queries. - * - * Note we do NOT save and restore PlannerPlanId: it exists to assign - * unique IDs to SubPlan nodes, and we want those IDs to be unique for - * the life of a backend. Also, PlannerInitPlan is saved/restored in - * subquery_planner, not here. - */ - save_PlannerQueryLevel = PlannerQueryLevel; - save_PlannerParamVar = PlannerParamVar; - - /* Initialize state for handling outer-level references and params */ - PlannerQueryLevel = 0; /* will be 1 in top-level subquery_planner */ - PlannerParamVar = NIL; - - /* primary planning entry point (may recurse for subqueries) */ - result_plan = subquery_planner(parse, -1.0 /* default case */ ); - - Assert(PlannerQueryLevel == 0); - - /* executor wants to know total number of Params used overall */ - result_plan->nParamExec = length(PlannerParamVar); - - /* final cleanup of the plan */ - set_plan_references(result_plan, parse->rtable); - - /* restore state for outer planner, if any */ - PlannerQueryLevel = save_PlannerQueryLevel; - PlannerParamVar = save_PlannerParamVar; - - return result_plan; -} - - -/*-------------------- - * subquery_planner - * Invokes the planner on a subquery. We recurse to here for each - * sub-SELECT found in the query tree. - * - * parse is the querytree produced by the parser & rewriter. - * tuple_fraction is the fraction of tuples we expect will be retrieved. - * tuple_fraction is interpreted as explained for grouping_planner, below. - * - * Basically, this routine does the stuff that should only be done once - * per Query object. It then calls grouping_planner. At one time, - * grouping_planner could be invoked recursively on the same Query object; - * that's not currently true, but we keep the separation between the two - * routines anyway, in case we need it again someday. - * - * subquery_planner will be called recursively to handle sub-Query nodes - * found within the query's expressions and rangetable. - * - * Returns a query plan. - *-------------------- - */ -Plan * -subquery_planner(Query *parse, double tuple_fraction) -{ - List *saved_initplan = PlannerInitPlan; - int saved_planid = PlannerPlanId; - Plan *plan; - List *newHaving; - List *lst; - - /* Set up for a new level of subquery */ - PlannerQueryLevel++; - PlannerInitPlan = NIL; - - /* - * Check to see if any subqueries in the rangetable can be merged into - * this query. - */ - parse->jointree = (FromExpr *) - pull_up_subqueries(parse, (Node *) parse->jointree, false); - - /* - * If so, we may have created opportunities to simplify the jointree. - */ - parse->jointree = (FromExpr *) - preprocess_jointree(parse, (Node *) parse->jointree); - - /* - * Do expression preprocessing on targetlist and quals. - */ - parse->targetList = (List *) - preprocess_expression(parse, (Node *) parse->targetList, - EXPRKIND_TARGET); - - preprocess_qual_conditions(parse, (Node *) parse->jointree); - - parse->havingQual = preprocess_expression(parse, parse->havingQual, - EXPRKIND_HAVING); - - /* Also need to preprocess expressions for function RTEs */ - foreach(lst, parse->rtable) - { - RangeTblEntry *rte = (RangeTblEntry *) lfirst(lst); - - if (rte->rtekind == RTE_FUNCTION) - rte->funcexpr = preprocess_expression(parse, rte->funcexpr, - EXPRKIND_TARGET); - /* These are not targetlist items, but close enough... */ - } - - /* - * Check for ungrouped variables passed to subplans in targetlist and - * HAVING clause (but not in WHERE or JOIN/ON clauses, since those are - * evaluated before grouping). We can't do this any earlier because - * we must use the preprocessed targetlist for comparisons of grouped - * expressions. - */ - if (parse->hasSubLinks && - (parse->groupClause != NIL || parse->hasAggs)) - check_subplans_for_ungrouped_vars(parse); - - /* - * A HAVING clause without aggregates is equivalent to a WHERE clause - * (except it can only refer to grouped fields). Transfer any - * agg-free clauses of the HAVING qual into WHERE. This may seem like - * wasting cycles to cater to stupidly-written queries, but there are - * other reasons for doing it. Firstly, if the query contains no aggs - * at all, then we aren't going to generate an Agg plan node, and so - * there'll be no place to execute HAVING conditions; without this - * transfer, we'd lose the HAVING condition entirely, which is wrong. - * Secondly, when we push down a qual condition into a sub-query, it's - * easiest to push the qual into HAVING always, in case it contains - * aggs, and then let this code sort it out. - * - * Note that both havingQual and parse->jointree->quals are in - * implicitly-ANDed-list form at this point, even though they are - * declared as Node *. Also note that contain_agg_clause does not - * recurse into sub-selects, which is exactly what we need here. - */ - newHaving = NIL; - foreach(lst, (List *) parse->havingQual) - { - Node *havingclause = (Node *) lfirst(lst); - - if (contain_agg_clause(havingclause)) - newHaving = lappend(newHaving, havingclause); - else - parse->jointree->quals = (Node *) - lappend((List *) parse->jointree->quals, havingclause); - } - parse->havingQual = (Node *) newHaving; - - /* - * Do the main planning. If we have an inherited target relation, - * that needs special processing, else go straight to - * grouping_planner. - */ - if (parse->resultRelation && - (lst = expand_inherted_rtentry(parse, parse->resultRelation, false)) - != NIL) - plan = inheritance_planner(parse, lst); - else - plan = grouping_planner(parse, tuple_fraction); - - /* - * If any subplans were generated, or if we're inside a subplan, build - * subPlan, extParam and locParam lists for plan nodes. - */ - if (PlannerPlanId != saved_planid || PlannerQueryLevel > 1) - { - (void) SS_finalize_plan(plan, parse->rtable); - - /* - * At the moment, SS_finalize_plan doesn't handle initPlans and so - * we assign them to the topmost plan node. - */ - plan->initPlan = PlannerInitPlan; - /* Must add the initPlans' extParams to the topmost node's, too */ - foreach(lst, plan->initPlan) - { - SubPlan *subplan = (SubPlan *) lfirst(lst); - - plan->extParam = set_unioni(plan->extParam, - subplan->plan->extParam); - } - } - - /* Return to outer subquery context */ - PlannerQueryLevel--; - PlannerInitPlan = saved_initplan; - /* we do NOT restore PlannerPlanId; that's not an oversight! */ - - return plan; -} - -/* - * pull_up_subqueries - * Look for subqueries in the rangetable that can be pulled up into - * the parent query. If the subquery has no special features like - * grouping/aggregation then we can merge it into the parent's jointree. - * - * below_outer_join is true if this jointree node is within the nullable - * side of an outer join. This restricts what we can do. - * - * A tricky aspect of this code is that if we pull up a subquery we have - * to replace Vars that reference the subquery's outputs throughout the - * parent query, including quals attached to jointree nodes above the one - * we are currently processing! We handle this by being careful not to - * change the jointree structure while recursing: no nodes other than - * subquery RangeTblRef entries will be replaced. Also, we can't turn - * ResolveNew loose on the whole jointree, because it'll return a mutated - * copy of the tree; we have to invoke it just on the quals, instead. - */ -static Node * -pull_up_subqueries(Query *parse, Node *jtnode, bool below_outer_join) -{ - if (jtnode == NULL) - return NULL; - if (IsA(jtnode, RangeTblRef)) - { - int varno = ((RangeTblRef *) jtnode)->rtindex; - RangeTblEntry *rte = rt_fetch(varno, parse->rtable); - Query *subquery = rte->subquery; - - /* - * Is this a subquery RTE, and if so, is the subquery simple - * enough to pull up? (If not, do nothing at this node.) - * - * If we are inside an outer join, only pull up subqueries whose - * targetlists are nullable --- otherwise substituting their tlist - * entries for upper Var references would do the wrong thing - * (the results wouldn't become NULL when they're supposed to). - * XXX This could be improved by generating pseudo-variables for - * such expressions; we'd have to figure out how to get the pseudo- - * variables evaluated at the right place in the modified plan tree. - * Fix it someday. - * - * Note: even if the subquery itself is simple enough, we can't pull - * it up if there is a reference to its whole tuple result. Perhaps - * a pseudo-variable is the answer here too. - */ - if (rte->rtekind == RTE_SUBQUERY && is_simple_subquery(subquery) && - (!below_outer_join || has_nullable_targetlist(subquery)) && - !contain_whole_tuple_var((Node *) parse, varno, 0)) - { - int rtoffset; - List *subtlist; - List *rt; - - /* - * First, recursively pull up the subquery's subqueries, so - * that this routine's processing is complete for its jointree - * and rangetable. NB: if the same subquery is referenced - * from multiple jointree items (which can't happen normally, - * but might after rule rewriting), then we will invoke this - * processing multiple times on that subquery. OK because - * nothing will happen after the first time. We do have to be - * careful to copy everything we pull up, however, or risk - * having chunks of structure multiply linked. - */ - subquery->jointree = (FromExpr *) - pull_up_subqueries(subquery, (Node *) subquery->jointree, - below_outer_join); - - /* - * Now make a modifiable copy of the subquery that we can - * run OffsetVarNodes on. - */ - subquery = copyObject(subquery); - - /* - * Adjust varnos in subquery so that we can append its - * rangetable to upper query's. - */ - rtoffset = length(parse->rtable); - OffsetVarNodes((Node *) subquery, rtoffset, 0); - - /* - * Replace all of the top query's references to the subquery's - * outputs with copies of the adjusted subtlist items, being - * careful not to replace any of the jointree structure. - * (This'd be a lot cleaner if we could use query_tree_mutator.) - */ - subtlist = subquery->targetList; - parse->targetList = (List *) - ResolveNew((Node *) parse->targetList, - varno, 0, subtlist, CMD_SELECT, 0); - resolvenew_in_jointree((Node *) parse->jointree, varno, subtlist); - Assert(parse->setOperations == NULL); - parse->havingQual = - ResolveNew(parse->havingQual, - varno, 0, subtlist, CMD_SELECT, 0); - - foreach(rt, parse->rtable) - { - RangeTblEntry *rte = (RangeTblEntry *) lfirst(rt); - - if (rte->rtekind == RTE_JOIN) - rte->joinaliasvars = (List *) - ResolveNew((Node *) rte->joinaliasvars, - varno, 0, subtlist, CMD_SELECT, 0); - } - - /* - * Now append the adjusted rtable entries to upper query. - * (We hold off until after fixing the upper rtable entries; - * no point in running that code on the subquery ones too.) - */ - parse->rtable = nconc(parse->rtable, subquery->rtable); - - /* - * Pull up any FOR UPDATE markers, too. (OffsetVarNodes - * already adjusted the marker values, so just nconc the list.) - */ - parse->rowMarks = nconc(parse->rowMarks, subquery->rowMarks); - - /* - * Miscellaneous housekeeping. - */ - parse->hasSubLinks |= subquery->hasSubLinks; - /* subquery won't be pulled up if it hasAggs, so no work there */ - - /* - * Return the adjusted subquery jointree to replace the - * RangeTblRef entry in my jointree. - */ - return (Node *) subquery->jointree; - } - } - else if (IsA(jtnode, FromExpr)) - { - FromExpr *f = (FromExpr *) jtnode; - List *l; - - foreach(l, f->fromlist) - lfirst(l) = pull_up_subqueries(parse, lfirst(l), - below_outer_join); - } - else if (IsA(jtnode, JoinExpr)) - { - JoinExpr *j = (JoinExpr *) jtnode; - - /* Recurse, being careful to tell myself when inside outer join */ - switch (j->jointype) - { - case JOIN_INNER: - j->larg = pull_up_subqueries(parse, j->larg, - below_outer_join); - j->rarg = pull_up_subqueries(parse, j->rarg, - below_outer_join); - break; - case JOIN_LEFT: - j->larg = pull_up_subqueries(parse, j->larg, - below_outer_join); - j->rarg = pull_up_subqueries(parse, j->rarg, - true); - break; - case JOIN_FULL: - j->larg = pull_up_subqueries(parse, j->larg, - true); - j->rarg = pull_up_subqueries(parse, j->rarg, - true); - break; - case JOIN_RIGHT: - j->larg = pull_up_subqueries(parse, j->larg, - true); - j->rarg = pull_up_subqueries(parse, j->rarg, - below_outer_join); - break; - case JOIN_UNION: - - /* - * This is where we fail if upper levels of planner - * haven't rewritten UNION JOIN as an Append ... - */ - elog(ERROR, "UNION JOIN is not implemented yet"); - break; - default: - elog(ERROR, "pull_up_subqueries: unexpected join type %d", - j->jointype); - break; - } - } - else - elog(ERROR, "pull_up_subqueries: unexpected node type %d", - nodeTag(jtnode)); - return jtnode; -} - -/* - * is_simple_subquery - * Check a subquery in the range table to see if it's simple enough - * to pull up into the parent query. - */ -static bool -is_simple_subquery(Query *subquery) -{ - /* - * Let's just make sure it's a valid subselect ... - */ - if (!IsA(subquery, Query) || - subquery->commandType != CMD_SELECT || - subquery->resultRelation != 0 || - subquery->into != NULL || - subquery->isPortal) - elog(ERROR, "is_simple_subquery: subquery is bogus"); - - /* - * Can't currently pull up a query with setops. Maybe after querytree - * redesign... - */ - if (subquery->setOperations) - return false; - - /* - * Can't pull up a subquery involving grouping, aggregation, sorting, - * or limiting. - */ - if (subquery->hasAggs || - subquery->groupClause || - subquery->havingQual || - subquery->sortClause || - subquery->distinctClause || - subquery->limitOffset || - subquery->limitCount) - return false; - - /* - * Don't pull up a subquery that has any set-returning functions in - * its targetlist. Otherwise we might well wind up inserting - * set-returning functions into places where they mustn't go, - * such as quals of higher queries. - */ - if (expression_returns_set((Node *) subquery->targetList)) - return false; - - /* - * Hack: don't try to pull up a subquery with an empty jointree. - * query_planner() will correctly generate a Result plan for a - * jointree that's totally empty, but I don't think the right things - * happen if an empty FromExpr appears lower down in a jointree. Not - * worth working hard on this, just to collapse SubqueryScan/Result - * into Result... - */ - if (subquery->jointree->fromlist == NIL) - return false; - - return true; -} - -/* - * has_nullable_targetlist - * Check a subquery in the range table to see if all the non-junk - * targetlist items are simple variables (and, hence, will correctly - * go to NULL when examined above the point of an outer join). - * - * A possible future extension is to accept strict functions of simple - * variables, eg, "x + 1". - */ -static bool -has_nullable_targetlist(Query *subquery) -{ - List *l; - - foreach(l, subquery->targetList) - { - TargetEntry *tle = (TargetEntry *) lfirst(l); - - /* ignore resjunk columns */ - if (tle->resdom->resjunk) - continue; - - /* Okay if tlist item is a simple Var */ - if (tle->expr && IsA(tle->expr, Var)) - continue; - - return false; - } - return true; -} - -/* - * Helper routine for pull_up_subqueries: do ResolveNew on every expression - * in the jointree, without changing the jointree structure itself. Ugly, - * but there's no other way... - */ -static void -resolvenew_in_jointree(Node *jtnode, int varno, List *subtlist) -{ - if (jtnode == NULL) - return; - if (IsA(jtnode, RangeTblRef)) - { - /* nothing to do here */ - } - else if (IsA(jtnode, FromExpr)) - { - FromExpr *f = (FromExpr *) jtnode; - List *l; - - foreach(l, f->fromlist) - resolvenew_in_jointree(lfirst(l), varno, subtlist); - f->quals = ResolveNew(f->quals, - varno, 0, subtlist, CMD_SELECT, 0); - } - else if (IsA(jtnode, JoinExpr)) - { - JoinExpr *j = (JoinExpr *) jtnode; - - resolvenew_in_jointree(j->larg, varno, subtlist); - resolvenew_in_jointree(j->rarg, varno, subtlist); - j->quals = ResolveNew(j->quals, - varno, 0, subtlist, CMD_SELECT, 0); - - /* - * We don't bother to update the colvars list, since it won't be - * used again ... - */ - } - else - elog(ERROR, "resolvenew_in_jointree: unexpected node type %d", - nodeTag(jtnode)); -} - -/* - * preprocess_jointree - * Attempt to simplify a query's jointree. - * - * If we succeed in pulling up a subquery then we might form a jointree - * in which a FromExpr is a direct child of another FromExpr. In that - * case we can consider collapsing the two FromExprs into one. This is - * an optional conversion, since the planner will work correctly either - * way. But we may find a better plan (at the cost of more planning time) - * if we merge the two nodes. - * - * NOTE: don't try to do this in the same jointree scan that does subquery - * pullup! Since we're changing the jointree structure here, that wouldn't - * work reliably --- see comments for pull_up_subqueries(). - */ -static Node * -preprocess_jointree(Query *parse, Node *jtnode) -{ - if (jtnode == NULL) - return NULL; - if (IsA(jtnode, RangeTblRef)) - { - /* nothing to do here... */ - } - else if (IsA(jtnode, FromExpr)) - { - FromExpr *f = (FromExpr *) jtnode; - List *newlist = NIL; - List *l; - - foreach(l, f->fromlist) - { - Node *child = (Node *) lfirst(l); - - /* Recursively simplify the child... */ - child = preprocess_jointree(parse, child); - /* Now, is it a FromExpr? */ - if (child && IsA(child, FromExpr)) - { - /* - * Yes, so do we want to merge it into parent? Always do - * so if child has just one element (since that doesn't - * make the parent's list any longer). Otherwise we have - * to be careful about the increase in planning time - * caused by combining the two join search spaces into - * one. Our heuristic is to merge if the merge will - * produce a join list no longer than GEQO_RELS/2. - * (Perhaps need an additional user parameter?) - */ - FromExpr *subf = (FromExpr *) child; - int childlen = length(subf->fromlist); - int myothers = length(newlist) + length(lnext(l)); - - if (childlen <= 1 || (childlen + myothers) <= geqo_rels / 2) - { - newlist = nconc(newlist, subf->fromlist); - f->quals = make_and_qual(subf->quals, f->quals); - } - else - newlist = lappend(newlist, child); - } - else - newlist = lappend(newlist, child); - } - f->fromlist = newlist; - } - else if (IsA(jtnode, JoinExpr)) - { - JoinExpr *j = (JoinExpr *) jtnode; - - /* Can't usefully change the JoinExpr, but recurse on children */ - j->larg = preprocess_jointree(parse, j->larg); - j->rarg = preprocess_jointree(parse, j->rarg); - } - else - elog(ERROR, "preprocess_jointree: unexpected node type %d", - nodeTag(jtnode)); - return jtnode; -} - -/* - * preprocess_expression - * Do subquery_planner's preprocessing work for an expression, - * which can be a targetlist, a WHERE clause (including JOIN/ON - * conditions), or a HAVING clause. - */ -static Node * -preprocess_expression(Query *parse, Node *expr, int kind) -{ - bool has_join_rtes; - List *rt; - - /* - * Simplify constant expressions. - * - * Note that at this point quals have not yet been converted to - * implicit-AND form, so we can apply eval_const_expressions directly. - * Also note that we need to do this before SS_process_sublinks, - * because that routine inserts bogus "Const" nodes. - */ - expr = eval_const_expressions(expr); - - /* - * If it's a qual or havingQual, canonicalize it, and convert it to - * implicit-AND format. - * - * XXX Is there any value in re-applying eval_const_expressions after - * canonicalize_qual? - */ - if (kind != EXPRKIND_TARGET) - { - expr = (Node *) canonicalize_qual((Expr *) expr, true); - -#ifdef OPTIMIZER_DEBUG - printf("After canonicalize_qual()\n"); - pprint(expr); -#endif - } - - /* Expand SubLinks to SubPlans */ - if (parse->hasSubLinks) - expr = SS_process_sublinks(expr); - - /* Replace uplevel vars with Param nodes */ - if (PlannerQueryLevel > 1) - expr = SS_replace_correlation_vars(expr); - - /* - * If the query has any join RTEs, try to replace join alias variables - * with base-relation variables, to allow quals to be pushed down. - * We must do this after sublink processing, since it does not recurse - * into sublinks. - * - * The flattening pass is expensive enough that it seems worthwhile to - * scan the rangetable to see if we can avoid it. - */ - has_join_rtes = false; - foreach(rt, parse->rtable) - { - RangeTblEntry *rte = lfirst(rt); - - if (rte->rtekind == RTE_JOIN) - { - has_join_rtes = true; - break; - } - } - if (has_join_rtes) - expr = flatten_join_alias_vars(expr, parse->rtable, false); - - return expr; -} - -/* - * preprocess_qual_conditions - * Recursively scan the query's jointree and do subquery_planner's - * preprocessing work on each qual condition found therein. - */ -static void -preprocess_qual_conditions(Query *parse, Node *jtnode) -{ - if (jtnode == NULL) - return; - if (IsA(jtnode, RangeTblRef)) - { - /* nothing to do here */ - } - else if (IsA(jtnode, FromExpr)) - { - FromExpr *f = (FromExpr *) jtnode; - List *l; - - foreach(l, f->fromlist) - preprocess_qual_conditions(parse, lfirst(l)); - - f->quals = preprocess_expression(parse, f->quals, EXPRKIND_WHERE); - } - else if (IsA(jtnode, JoinExpr)) - { - JoinExpr *j = (JoinExpr *) jtnode; - - preprocess_qual_conditions(parse, j->larg); - preprocess_qual_conditions(parse, j->rarg); - - j->quals = preprocess_expression(parse, j->quals, EXPRKIND_WHERE); - } - else - elog(ERROR, "preprocess_qual_conditions: unexpected node type %d", - nodeTag(jtnode)); -} - -/*-------------------- - * inheritance_planner - * Generate a plan in the case where the result relation is an - * inheritance set. - * - * We have to handle this case differently from cases where a source - * relation is an inheritance set. Source inheritance is expanded at - * the bottom of the plan tree (see allpaths.c), but target inheritance - * has to be expanded at the top. The reason is that for UPDATE, each - * target relation needs a different targetlist matching its own column - * set. (This is not so critical for DELETE, but for simplicity we treat - * inherited DELETE the same way.) Fortunately, the UPDATE/DELETE target - * can never be the nullable side of an outer join, so it's OK to generate - * the plan this way. - * - * parse is the querytree produced by the parser & rewriter. - * inheritlist is an integer list of RT indexes for the result relation set. - * - * Returns a query plan. - *-------------------- - */ -static Plan * -inheritance_planner(Query *parse, List *inheritlist) -{ - int parentRTindex = parse->resultRelation; - Oid parentOID = getrelid(parentRTindex, parse->rtable); - List *subplans = NIL; - List *tlist = NIL; - List *l; - - foreach(l, inheritlist) - { - int childRTindex = lfirsti(l); - Oid childOID = getrelid(childRTindex, parse->rtable); - Query *subquery; - Plan *subplan; - - /* Generate modified query with this rel as target */ - subquery = (Query *) adjust_inherited_attrs((Node *) parse, - parentRTindex, parentOID, - childRTindex, childOID); - /* Generate plan */ - subplan = grouping_planner(subquery, 0.0 /* retrieve all tuples */ ); - subplans = lappend(subplans, subplan); - /* Save preprocessed tlist from first rel for use in Append */ - if (tlist == NIL) - tlist = subplan->targetlist; - } - - /* Save the target-relations list for the executor, too */ - parse->resultRelations = inheritlist; - - return (Plan *) make_append(subplans, true, tlist); -} - -/*-------------------- - * grouping_planner - * Perform planning steps related to grouping, aggregation, etc. - * This primarily means adding top-level processing to the basic - * query plan produced by query_planner. - * - * parse is the querytree produced by the parser & rewriter. - * tuple_fraction is the fraction of tuples we expect will be retrieved - * - * tuple_fraction is interpreted as follows: - * < 0: determine fraction by inspection of query (normal case) - * 0: expect all tuples to be retrieved - * 0 < tuple_fraction < 1: expect the given fraction of tuples available - * from the plan to be retrieved - * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples - * expected to be retrieved (ie, a LIMIT specification) - * The normal case is to pass -1, but some callers pass values >= 0 to - * override this routine's determination of the appropriate fraction. - * - * Returns a query plan. - *-------------------- - */ -static Plan * -grouping_planner(Query *parse, double tuple_fraction) -{ - List *tlist = parse->targetList; - Plan *result_plan; - List *current_pathkeys; - List *group_pathkeys; - List *sort_pathkeys; - AttrNumber *groupColIdx = NULL; - - if (parse->setOperations) - { - /* - * Construct the plan for set operations. The result will not - * need any work except perhaps a top-level sort and/or LIMIT. - */ - result_plan = plan_set_operations(parse); - - /* - * We should not need to call preprocess_targetlist, since we must - * be in a SELECT query node. Instead, use the targetlist - * returned by plan_set_operations (since this tells whether it - * returned any resjunk columns!), and transfer any sort key - * information from the original tlist. - */ - Assert(parse->commandType == CMD_SELECT); - - tlist = postprocess_setop_tlist(result_plan->targetlist, tlist); - - /* - * Can't handle FOR UPDATE here (parser should have checked - * already, but let's make sure). - */ - if (parse->rowMarks) - elog(ERROR, "SELECT FOR UPDATE is not allowed with UNION/INTERSECT/EXCEPT"); - - /* - * We set current_pathkeys NIL indicating we do not know sort - * order. This is correct when the top set operation is UNION - * ALL, since the appended-together results are unsorted even if - * the subplans were sorted. For other set operations we could be - * smarter --- room for future improvement! - */ - current_pathkeys = NIL; - - /* - * Calculate pathkeys that represent grouping/ordering - * requirements (grouping should always be null, but...) - */ - group_pathkeys = make_pathkeys_for_sortclauses(parse->groupClause, - tlist); - sort_pathkeys = make_pathkeys_for_sortclauses(parse->sortClause, - tlist); - } - else - { - List *sub_tlist; - - /* Preprocess targetlist in case we are inside an INSERT/UPDATE. */ - tlist = preprocess_targetlist(tlist, - parse->commandType, - parse->resultRelation, - parse->rtable); - - /* - * Add TID targets for rels selected FOR UPDATE (should this be - * done in preprocess_targetlist?). The executor uses the TID to - * know which rows to lock, much as for UPDATE or DELETE. - */ - if (parse->rowMarks) - { - List *l; - - /* - * We've got trouble if the FOR UPDATE appears inside - * grouping, since grouping renders a reference to individual - * tuple CTIDs invalid. This is also checked at parse time, - * but that's insufficient because of rule substitution, query - * pullup, etc. - */ - CheckSelectForUpdate(parse); - - /* - * Currently the executor only supports FOR UPDATE at top - * level - */ - if (PlannerQueryLevel > 1) - elog(ERROR, "SELECT FOR UPDATE is not allowed in subselects"); - - foreach(l, parse->rowMarks) - { - Index rti = lfirsti(l); - char *resname; - Resdom *resdom; - Var *var; - TargetEntry *ctid; - - resname = (char *) palloc(32); - sprintf(resname, "ctid%u", rti); - resdom = makeResdom(length(tlist) + 1, - TIDOID, - -1, - resname, - true); - - var = makeVar(rti, - SelfItemPointerAttributeNumber, - TIDOID, - -1, - 0); - - ctid = makeTargetEntry(resdom, (Node *) var); - tlist = lappend(tlist, ctid); - } - } - - /* - * Generate appropriate target list for subplan; may be different - * from tlist if grouping or aggregation is needed. - */ - sub_tlist = make_subplanTargetList(parse, tlist, &groupColIdx); - - /* - * Calculate pathkeys that represent grouping/ordering - * requirements - */ - group_pathkeys = make_pathkeys_for_sortclauses(parse->groupClause, - tlist); - sort_pathkeys = make_pathkeys_for_sortclauses(parse->sortClause, - tlist); - - /* - * Figure out whether we need a sorted result from query_planner. - * - * If we have a GROUP BY clause, then we want a result sorted - * properly for grouping. Otherwise, if there is an ORDER BY - * clause, we want to sort by the ORDER BY clause. (Note: if we - * have both, and ORDER BY is a superset of GROUP BY, it would be - * tempting to request sort by ORDER BY --- but that might just - * leave us failing to exploit an available sort order at all. - * Needs more thought...) - */ - if (parse->groupClause) - parse->query_pathkeys = group_pathkeys; - else if (parse->sortClause) - parse->query_pathkeys = sort_pathkeys; - else - parse->query_pathkeys = NIL; - - /* - * Figure out whether we expect to retrieve all the tuples that - * the plan can generate, or to stop early due to outside factors - * such as a cursor. If the caller passed a value >= 0, believe - * that value, else do our own examination of the query context. - */ - if (tuple_fraction < 0.0) - { - /* Initial assumption is we need all the tuples */ - tuple_fraction = 0.0; - - /* - * Check for retrieve-into-portal, ie DECLARE CURSOR. - * - * We have no real idea how many tuples the user will ultimately - * FETCH from a cursor, but it seems a good bet that he - * doesn't want 'em all. Optimize for 10% retrieval (you - * gotta better number? Should this be a SETtable parameter?) - */ - if (parse->isPortal) - tuple_fraction = 0.10; - } - - /* - * Adjust tuple_fraction if we see that we are going to apply - * limiting/grouping/aggregation/etc. This is not overridable by - * the caller, since it reflects plan actions that this routine - * will certainly take, not assumptions about context. - */ - if (parse->limitCount != NULL) - { - /* - * A LIMIT clause limits the absolute number of tuples - * returned. However, if it's not a constant LIMIT then we - * have to punt; for lack of a better idea, assume 10% of the - * plan's result is wanted. - */ - double limit_fraction = 0.0; - - if (IsA(parse->limitCount, Const)) - { - Const *limitc = (Const *) parse->limitCount; - int32 count = DatumGetInt32(limitc->constvalue); - - /* - * A NULL-constant LIMIT represents "LIMIT ALL", which we - * treat the same as no limit (ie, expect to retrieve all - * the tuples). - */ - if (!limitc->constisnull && count > 0) - { - limit_fraction = (double) count; - /* We must also consider the OFFSET, if present */ - if (parse->limitOffset != NULL) - { - if (IsA(parse->limitOffset, Const)) - { - int32 offset; - - limitc = (Const *) parse->limitOffset; - offset = DatumGetInt32(limitc->constvalue); - if (!limitc->constisnull && offset > 0) - limit_fraction += (double) offset; - } - else - { - /* OFFSET is an expression ... punt ... */ - limit_fraction = 0.10; - } - } - } - } - else - { - /* LIMIT is an expression ... punt ... */ - limit_fraction = 0.10; - } - - if (limit_fraction > 0.0) - { - /* - * If we have absolute limits from both caller and LIMIT, - * use the smaller value; if one is fractional and the - * other absolute, treat the fraction as a fraction of the - * absolute value; else we can multiply the two fractions - * together. - */ - if (tuple_fraction >= 1.0) - { - if (limit_fraction >= 1.0) - { - /* both absolute */ - tuple_fraction = Min(tuple_fraction, limit_fraction); - } - else - { - /* caller absolute, limit fractional */ - tuple_fraction *= limit_fraction; - if (tuple_fraction < 1.0) - tuple_fraction = 1.0; - } - } - else if (tuple_fraction > 0.0) - { - if (limit_fraction >= 1.0) - { - /* caller fractional, limit absolute */ - tuple_fraction *= limit_fraction; - if (tuple_fraction < 1.0) - tuple_fraction = 1.0; - } - else - { - /* both fractional */ - tuple_fraction *= limit_fraction; - } - } - else - { - /* no info from caller, just use limit */ - tuple_fraction = limit_fraction; - } - } - } - - if (parse->groupClause) - { - /* - * In GROUP BY mode, we have the little problem that we don't - * really know how many input tuples will be needed to make a - * group, so we can't translate an output LIMIT count into an - * input count. For lack of a better idea, assume 25% of the - * input data will be processed if there is any output limit. - * However, if the caller gave us a fraction rather than an - * absolute count, we can keep using that fraction (which - * amounts to assuming that all the groups are about the same - * size). - */ - if (tuple_fraction >= 1.0) - tuple_fraction = 0.25; - - /* - * If both GROUP BY and ORDER BY are specified, we will need - * two levels of sort --- and, therefore, certainly need to - * read all the input tuples --- unless ORDER BY is a subset - * of GROUP BY. (We have not yet canonicalized the pathkeys, - * so must use the slower noncanonical comparison method.) - */ - if (parse->groupClause && parse->sortClause && - !noncanonical_pathkeys_contained_in(sort_pathkeys, - group_pathkeys)) - tuple_fraction = 0.0; - } - else if (parse->hasAggs) - { - /* - * Ungrouped aggregate will certainly want all the input - * tuples. - */ - tuple_fraction = 0.0; - } - else if (parse->distinctClause) - { - /* - * SELECT DISTINCT, like GROUP, will absorb an unpredictable - * number of input tuples per output tuple. Handle the same - * way. - */ - if (tuple_fraction >= 1.0) - tuple_fraction = 0.25; - } - - /* Generate the basic plan for this Query */ - result_plan = query_planner(parse, - sub_tlist, - tuple_fraction); - - /* - * query_planner returns actual sort order (which is not - * necessarily what we requested) in query_pathkeys. - */ - current_pathkeys = parse->query_pathkeys; - } - - /* - * We couldn't canonicalize group_pathkeys and sort_pathkeys before - * running query_planner(), so do it now. - */ - group_pathkeys = canonicalize_pathkeys(parse, group_pathkeys); - sort_pathkeys = canonicalize_pathkeys(parse, sort_pathkeys); - - /* - * If we have a GROUP BY clause, insert a group node (plus the - * appropriate sort node, if necessary). - */ - if (parse->groupClause) - { - bool tuplePerGroup; - List *group_tlist; - bool is_sorted; - - /* - * Decide whether how many tuples per group the Group node needs - * to return. (Needs only one tuple per group if no aggregate is - * present. Otherwise, need every tuple from the group to do the - * aggregation.) Note tuplePerGroup is named backwards :-( - */ - tuplePerGroup = parse->hasAggs; - - /* - * If there are aggregates then the Group node should just return - * the same set of vars as the subplan did. If there are no aggs - * then the Group node had better compute the final tlist. - */ - if (parse->hasAggs) - group_tlist = new_unsorted_tlist(result_plan->targetlist); - else - group_tlist = tlist; - - /* - * Figure out whether the path result is already ordered the way - * we need it --- if so, no need for an explicit sort step. - */ - if (pathkeys_contained_in(group_pathkeys, current_pathkeys)) - { - is_sorted = true; /* no sort needed now */ - /* current_pathkeys remains unchanged */ - } - else - { - /* - * We will need to do an explicit sort by the GROUP BY clause. - * make_groupplan will do the work, but set current_pathkeys - * to indicate the resulting order. - */ - is_sorted = false; - current_pathkeys = group_pathkeys; - } - - result_plan = make_groupplan(parse, - group_tlist, - tuplePerGroup, - parse->groupClause, - groupColIdx, - is_sorted, - result_plan); - } - - /* - * If aggregate is present, insert the Agg node - * - * HAVING clause, if any, becomes qual of the Agg node - */ - if (parse->hasAggs) - { - result_plan = (Plan *) make_agg(tlist, - (List *) parse->havingQual, - result_plan); - /* Note: Agg does not affect any existing sort order of the tuples */ - } - else - { - /* If there are no Aggs, we shouldn't have any HAVING qual anymore */ - Assert(parse->havingQual == NULL); - } - - /* - * If we were not able to make the plan come out in the right order, - * add an explicit sort step. - */ - if (parse->sortClause) - { - if (!pathkeys_contained_in(sort_pathkeys, current_pathkeys)) - result_plan = make_sortplan(parse, tlist, result_plan, - parse->sortClause); - } - - /* - * If there is a DISTINCT clause, add the UNIQUE node. - */ - if (parse->distinctClause) - { - result_plan = (Plan *) make_unique(tlist, result_plan, - parse->distinctClause); - } - - /* - * Finally, if there is a LIMIT/OFFSET clause, add the LIMIT node. - */ - if (parse->limitOffset || parse->limitCount) - { - result_plan = (Plan *) make_limit(tlist, result_plan, - parse->limitOffset, - parse->limitCount); - } - - return result_plan; -} - -/*--------------- - * make_subplanTargetList - * Generate appropriate target list when grouping is required. - * - * When grouping_planner inserts Aggregate and/or Group plan nodes above - * the result of query_planner, we typically want to pass a different - * target list to query_planner than the outer plan nodes should have. - * This routine generates the correct target list for the subplan. - * - * The initial target list passed from the parser already contains entries - * for all ORDER BY and GROUP BY expressions, but it will not have entries - * for variables used only in HAVING clauses; so we need to add those - * variables to the subplan target list. Also, if we are doing either - * grouping or aggregation, we flatten all expressions except GROUP BY items - * into their component variables; the other expressions will be computed by - * the inserted nodes rather than by the subplan. For example, - * given a query like - * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b; - * we want to pass this targetlist to the subplan: - * a,b,c,d,a+b - * where the a+b target will be used by the Sort/Group steps, and the - * other targets will be used for computing the final results. (In the - * above example we could theoretically suppress the a and b targets and - * pass down only c,d,a+b, but it's not really worth the trouble to - * eliminate simple var references from the subplan. We will avoid doing - * the extra computation to recompute a+b at the outer level; see - * replace_vars_with_subplan_refs() in setrefs.c.) - * - * 'parse' is the query being processed. - * 'tlist' is the query's target list. - * 'groupColIdx' receives an array of column numbers for the GROUP BY - * expressions (if there are any) in the subplan's target list. - * - * The result is the targetlist to be passed to the subplan. - *--------------- - */ -static List * -make_subplanTargetList(Query *parse, - List *tlist, - AttrNumber **groupColIdx) -{ - List *sub_tlist; - List *extravars; - int numCols; - - *groupColIdx = NULL; - - /* - * If we're not grouping or aggregating, nothing to do here; - * query_planner should receive the unmodified target list. - */ - if (!parse->hasAggs && !parse->groupClause && !parse->havingQual) - return tlist; - - /* - * Otherwise, start with a "flattened" tlist (having just the vars - * mentioned in the targetlist and HAVING qual --- but not upper- - * level Vars; they will be replaced by Params later on). - */ - sub_tlist = flatten_tlist(tlist); - extravars = pull_var_clause(parse->havingQual, false); - sub_tlist = add_to_flat_tlist(sub_tlist, extravars); - freeList(extravars); - - /* - * If grouping, create sub_tlist entries for all GROUP BY expressions - * (GROUP BY items that are simple Vars should be in the list - * already), and make an array showing where the group columns are in - * the sub_tlist. - */ - numCols = length(parse->groupClause); - if (numCols > 0) - { - int keyno = 0; - AttrNumber *grpColIdx; - List *gl; - - grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); - *groupColIdx = grpColIdx; - - foreach(gl, parse->groupClause) - { - GroupClause *grpcl = (GroupClause *) lfirst(gl); - Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist); - TargetEntry *te = NULL; - List *sl; - - /* Find or make a matching sub_tlist entry */ - foreach(sl, sub_tlist) - { - te = (TargetEntry *) lfirst(sl); - if (equal(groupexpr, te->expr)) - break; - } - if (!sl) - { - te = makeTargetEntry(makeResdom(length(sub_tlist) + 1, - exprType(groupexpr), - exprTypmod(groupexpr), - NULL, - false), - groupexpr); - sub_tlist = lappend(sub_tlist, te); - } - - /* and save its resno */ - grpColIdx[keyno++] = te->resdom->resno; - } - } - - return sub_tlist; -} - -/* - * make_groupplan - * Add a Group node for GROUP BY processing. - * If we couldn't make the subplan produce presorted output for grouping, - * first add an explicit Sort node. - */ -static Plan * -make_groupplan(Query *parse, - List *group_tlist, - bool tuplePerGroup, - List *groupClause, - AttrNumber *grpColIdx, - bool is_presorted, - Plan *subplan) -{ - int numCols = length(groupClause); - - if (!is_presorted) - { - /* - * The Sort node always just takes a copy of the subplan's tlist - * plus ordering information. (This might seem inefficient if the - * subplan contains complex GROUP BY expressions, but in fact Sort - * does not evaluate its targetlist --- it only outputs the same - * tuples in a new order. So the expressions we might be copying - * are just dummies with no extra execution cost.) - */ - List *sort_tlist = new_unsorted_tlist(subplan->targetlist); - int keyno = 0; - List *gl; - - foreach(gl, groupClause) - { - GroupClause *grpcl = (GroupClause *) lfirst(gl); - TargetEntry *te = nth(grpColIdx[keyno] - 1, sort_tlist); - Resdom *resdom = te->resdom; - - /* - * Check for the possibility of duplicate group-by clauses --- - * the parser should have removed 'em, but the Sort executor - * will get terribly confused if any get through! - */ - if (resdom->reskey == 0) - { - /* OK, insert the ordering info needed by the executor. */ - resdom->reskey = ++keyno; - resdom->reskeyop = grpcl->sortop; - } - } - - Assert(keyno > 0); - - subplan = (Plan *) make_sort(parse, sort_tlist, subplan, keyno); - } - - return (Plan *) make_group(group_tlist, tuplePerGroup, numCols, - grpColIdx, subplan); -} - -/* - * make_sortplan - * Add a Sort node to implement an explicit ORDER BY clause. - */ -Plan * -make_sortplan(Query *parse, List *tlist, Plan *plannode, List *sortcls) -{ - List *sort_tlist; - List *i; - int keyno = 0; - - /* - * First make a copy of the tlist so that we don't corrupt the - * original. - */ - sort_tlist = new_unsorted_tlist(tlist); - - foreach(i, sortcls) - { - SortClause *sortcl = (SortClause *) lfirst(i); - TargetEntry *tle = get_sortgroupclause_tle(sortcl, sort_tlist); - Resdom *resdom = tle->resdom; - - /* - * Check for the possibility of duplicate order-by clauses --- the - * parser should have removed 'em, but the executor will get - * terribly confused if any get through! - */ - if (resdom->reskey == 0) - { - /* OK, insert the ordering info needed by the executor. */ - resdom->reskey = ++keyno; - resdom->reskeyop = sortcl->sortop; - } - } - - Assert(keyno > 0); - - return (Plan *) make_sort(parse, sort_tlist, plannode, keyno); -} - -/* - * postprocess_setop_tlist - * Fix up targetlist returned by plan_set_operations(). - * - * We need to transpose sort key info from the orig_tlist into new_tlist. - * NOTE: this would not be good enough if we supported resjunk sort keys - * for results of set operations --- then, we'd need to project a whole - * new tlist to evaluate the resjunk columns. For now, just elog if we - * find any resjunk columns in orig_tlist. - */ -static List * -postprocess_setop_tlist(List *new_tlist, List *orig_tlist) -{ - List *l; - - foreach(l, new_tlist) - { - TargetEntry *new_tle = (TargetEntry *) lfirst(l); - TargetEntry *orig_tle; - - /* ignore resjunk columns in setop result */ - if (new_tle->resdom->resjunk) - continue; - - Assert(orig_tlist != NIL); - orig_tle = (TargetEntry *) lfirst(orig_tlist); - orig_tlist = lnext(orig_tlist); - if (orig_tle->resdom->resjunk) - elog(ERROR, "postprocess_setop_tlist: resjunk output columns not implemented"); - Assert(new_tle->resdom->resno == orig_tle->resdom->resno); - Assert(new_tle->resdom->restype == orig_tle->resdom->restype); - new_tle->resdom->ressortgroupref = orig_tle->resdom->ressortgroupref; - } - if (orig_tlist != NIL) - elog(ERROR, "postprocess_setop_tlist: resjunk output columns not implemented"); - return new_tlist; -} diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c deleted file mode 100644 index bcbf9c8e992..00000000000 --- a/src/backend/optimizer/plan/setrefs.c +++ /dev/null @@ -1,580 +0,0 @@ -/*------------------------------------------------------------------------- - * - * setrefs.c - * Post-processing of a completed plan tree: fix references to subplan - * vars, and compute regproc values for operators - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/setrefs.c,v 1.79 2002/06/20 20:29:31 momjian Exp $ - * - *------------------------------------------------------------------------- - */ -#include "postgres.h" - -#include <sys/types.h> - -#include "nodes/makefuncs.h" -#include "nodes/nodeFuncs.h" -#include "optimizer/clauses.h" -#include "optimizer/planmain.h" -#include "optimizer/tlist.h" -#include "optimizer/var.h" -#include "parser/parsetree.h" - - -typedef struct -{ - List *rtable; - List *outer_tlist; - List *inner_tlist; - Index acceptable_rel; -} join_references_context; - -typedef struct -{ - Index subvarno; - List *subplan_targetlist; - bool tlist_has_non_vars; -} replace_vars_with_subplan_refs_context; - -static void fix_expr_references(Plan *plan, Node *node); -static void set_join_references(Join *join, List *rtable); -static void set_uppernode_references(Plan *plan, Index subvarno); -static Node *join_references_mutator(Node *node, - join_references_context *context); -static Node *replace_vars_with_subplan_refs(Node *node, - Index subvarno, - List *subplan_targetlist, - bool tlist_has_non_vars); -static Node *replace_vars_with_subplan_refs_mutator(Node *node, - replace_vars_with_subplan_refs_context *context); -static bool fix_opids_walker(Node *node, void *context); - -/***************************************************************************** - * - * SUBPLAN REFERENCES - * - *****************************************************************************/ - -/* - * set_plan_references - * This is the final processing pass of the planner/optimizer. The plan - * tree is complete; we just have to adjust some representational details - * for the convenience of the executor. We update Vars in upper plan nodes - * to refer to the outputs of their subplans, and we compute regproc OIDs - * for operators (ie, we look up the function that implements each op). - * We must also build lists of all the subplan nodes present in each - * plan node's expression trees. - * - * set_plan_references recursively traverses the whole plan tree. - * - * Returns nothing of interest, but modifies internal fields of nodes. - */ -void -set_plan_references(Plan *plan, List *rtable) -{ - List *pl; - - if (plan == NULL) - return; - - /* - * We must rebuild the plan's list of subplan nodes, since we are - * copying/mutating its expression trees. - */ - plan->subPlan = NIL; - - /* - * Plan-type-specific fixes - */ - switch (nodeTag(plan)) - { - case T_SeqScan: - fix_expr_references(plan, (Node *) plan->targetlist); - fix_expr_references(plan, (Node *) plan->qual); - break; - case T_IndexScan: - fix_expr_references(plan, (Node *) plan->targetlist); - fix_expr_references(plan, (Node *) plan->qual); - fix_expr_references(plan, - (Node *) ((IndexScan *) plan)->indxqual); - fix_expr_references(plan, - (Node *) ((IndexScan *) plan)->indxqualorig); - break; - case T_TidScan: - fix_expr_references(plan, (Node *) plan->targetlist); - fix_expr_references(plan, (Node *) plan->qual); - break; - case T_SubqueryScan: - { - RangeTblEntry *rte; - - /* - * We do not do set_uppernode_references() here, because a - * SubqueryScan will always have been created with correct - * references to its subplan's outputs to begin with. - */ - fix_expr_references(plan, (Node *) plan->targetlist); - fix_expr_references(plan, (Node *) plan->qual); - - /* Recurse into subplan too */ - rte = rt_fetch(((SubqueryScan *) plan)->scan.scanrelid, - rtable); - Assert(rte->rtekind == RTE_SUBQUERY); - set_plan_references(((SubqueryScan *) plan)->subplan, - rte->subquery->rtable); - } - break; - case T_FunctionScan: - { - RangeTblEntry *rte; - - fix_expr_references(plan, (Node *) plan->targetlist); - fix_expr_references(plan, (Node *) plan->qual); - rte = rt_fetch(((FunctionScan *) plan)->scan.scanrelid, - rtable); - Assert(rte->rtekind == RTE_FUNCTION); - fix_expr_references(plan, rte->funcexpr); - } - break; - case T_NestLoop: - set_join_references((Join *) plan, rtable); - fix_expr_references(plan, (Node *) plan->targetlist); - fix_expr_references(plan, (Node *) plan->qual); - fix_expr_references(plan, (Node *) ((Join *) plan)->joinqual); - break; - case T_MergeJoin: - set_join_references((Join *) plan, rtable); - fix_expr_references(plan, (Node *) plan->targetlist); - fix_expr_references(plan, (Node *) plan->qual); - fix_expr_references(plan, (Node *) ((Join *) plan)->joinqual); - fix_expr_references(plan, - (Node *) ((MergeJoin *) plan)->mergeclauses); - break; - case T_HashJoin: - set_join_references((Join *) plan, rtable); - fix_expr_references(plan, (Node *) plan->targetlist); - fix_expr_references(plan, (Node *) plan->qual); - fix_expr_references(plan, (Node *) ((Join *) plan)->joinqual); - fix_expr_references(plan, - (Node *) ((HashJoin *) plan)->hashclauses); - break; - case T_Material: - case T_Sort: - case T_Unique: - case T_SetOp: - case T_Limit: - case T_Hash: - - /* - * These plan types don't actually bother to evaluate their - * targetlists or quals (because they just return their - * unmodified input tuples). The optimizer is lazy about - * creating really valid targetlists for them. Best to just - * leave the targetlist alone. In particular, we do not want - * to pull a subplan list for them, since we will likely end - * up with duplicate list entries for subplans that also - * appear in lower levels of the plan tree! - */ - break; - case T_Agg: - case T_Group: - set_uppernode_references(plan, (Index) 0); - fix_expr_references(plan, (Node *) plan->targetlist); - fix_expr_references(plan, (Node *) plan->qual); - break; - case T_Result: - - /* - * Result may or may not have a subplan; no need to fix up - * subplan references if it hasn't got one... - * - * XXX why does Result use a different subvarno from Agg/Group? - */ - if (plan->lefttree != NULL) - set_uppernode_references(plan, (Index) OUTER); - fix_expr_references(plan, (Node *) plan->targetlist); - fix_expr_references(plan, (Node *) plan->qual); - fix_expr_references(plan, ((Result *) plan)->resconstantqual); - break; - case T_Append: - - /* - * Append, like Sort et al, doesn't actually evaluate its - * targetlist or quals, and we haven't bothered to give it its - * own tlist copy. So, don't fix targetlist/qual. But do - * recurse into subplans. - */ - foreach(pl, ((Append *) plan)->appendplans) - set_plan_references((Plan *) lfirst(pl), rtable); - break; - default: - elog(ERROR, "set_plan_references: unknown plan type %d", - nodeTag(plan)); - break; - } - - /* - * Now recurse into subplans, if any - * - * NOTE: it is essential that we recurse into subplans AFTER we set - * subplan references in this plan's tlist and quals. If we did the - * reference-adjustments bottom-up, then we would fail to match this - * plan's var nodes against the already-modified nodes of the - * subplans. - */ - set_plan_references(plan->lefttree, rtable); - set_plan_references(plan->righttree, rtable); - foreach(pl, plan->initPlan) - { - SubPlan *sp = (SubPlan *) lfirst(pl); - - Assert(IsA(sp, SubPlan)); - set_plan_references(sp->plan, sp->rtable); - } - foreach(pl, plan->subPlan) - { - SubPlan *sp = (SubPlan *) lfirst(pl); - - Assert(IsA(sp, SubPlan)); - set_plan_references(sp->plan, sp->rtable); - } -} - -/* - * fix_expr_references - * Do final cleanup on expressions (targetlists or quals). - * - * This consists of looking up operator opcode info for Oper nodes - * and adding subplans to the Plan node's list of contained subplans. - */ -static void -fix_expr_references(Plan *plan, Node *node) -{ - fix_opids(node); - plan->subPlan = nconc(plan->subPlan, pull_subplans(node)); -} - -/* - * set_join_references - * Modifies the target list of a join node to reference its subplans, - * by setting the varnos to OUTER or INNER and setting attno values to the - * result domain number of either the corresponding outer or inner join - * tuple item. - * - * Note: this same transformation has already been applied to the quals - * of the join by createplan.c. It's a little odd to do it here for the - * targetlist and there for the quals, but it's easier that way. (Look - * at switch_outer() and the handling of nestloop inner indexscans to - * see why.) - * - * Because the quals are reference-adjusted sooner, we cannot do equal() - * comparisons between qual and tlist var nodes during the time between - * creation of a plan node by createplan.c and its fixing by this module. - * Fortunately, there doesn't seem to be any need to do that. - * - * 'join' is a join plan node - * 'rtable' is the associated range table - */ -static void -set_join_references(Join *join, List *rtable) -{ - Plan *outer = join->plan.lefttree; - Plan *inner = join->plan.righttree; - List *outer_tlist = ((outer == NULL) ? NIL : outer->targetlist); - List *inner_tlist = ((inner == NULL) ? NIL : inner->targetlist); - - join->plan.targetlist = join_references(join->plan.targetlist, - rtable, - outer_tlist, - inner_tlist, - (Index) 0); -} - -/* - * set_uppernode_references - * Update the targetlist and quals of an upper-level plan node - * to refer to the tuples returned by its lefttree subplan. - * - * This is used for single-input plan types like Agg, Group, Result. - * - * In most cases, we have to match up individual Vars in the tlist and - * qual expressions with elements of the subplan's tlist (which was - * generated by flatten_tlist() from these selfsame expressions, so it - * should have all the required variables). There is an important exception, - * however: GROUP BY and ORDER BY expressions will have been pushed into the - * subplan tlist unflattened. If these values are also needed in the output - * then we want to reference the subplan tlist element rather than recomputing - * the expression. - */ -static void -set_uppernode_references(Plan *plan, Index subvarno) -{ - Plan *subplan = plan->lefttree; - List *subplan_targetlist, - *output_targetlist, - *l; - bool tlist_has_non_vars; - - if (subplan != NULL) - subplan_targetlist = subplan->targetlist; - else - subplan_targetlist = NIL; - - /* - * Detect whether subplan tlist has any non-Vars (typically it won't - * because it's been flattened). This allows us to save comparisons - * in common cases. - */ - tlist_has_non_vars = false; - foreach(l, subplan_targetlist) - { - TargetEntry *tle = (TargetEntry *) lfirst(l); - - if (tle->expr && !IsA(tle->expr, Var)) - { - tlist_has_non_vars = true; - break; - } - } - - output_targetlist = NIL; - foreach(l, plan->targetlist) - { - TargetEntry *tle = (TargetEntry *) lfirst(l); - Node *newexpr; - - newexpr = replace_vars_with_subplan_refs(tle->expr, - subvarno, - subplan_targetlist, - tlist_has_non_vars); - output_targetlist = lappend(output_targetlist, - makeTargetEntry(tle->resdom, newexpr)); - } - plan->targetlist = output_targetlist; - - plan->qual = (List *) - replace_vars_with_subplan_refs((Node *) plan->qual, - subvarno, - subplan_targetlist, - tlist_has_non_vars); -} - -/* - * join_references - * Creates a new set of targetlist entries or join qual clauses by - * changing the varno/varattno values of variables in the clauses - * to reference target list values from the outer and inner join - * relation target lists. Also, any join alias variables in the - * clauses are expanded into references to their component variables. - * - * This is used in two different scenarios: a normal join clause, where - * all the Vars in the clause *must* be replaced by OUTER or INNER references; - * and an indexscan being used on the inner side of a nestloop join. - * In the latter case we want to replace the outer-relation Vars by OUTER - * references, but not touch the Vars of the inner relation. - * - * For a normal join, acceptable_rel should be zero so that any failure to - * match a Var will be reported as an error. For the indexscan case, - * pass inner_tlist = NIL and acceptable_rel = the ID of the inner relation. - * - * 'clauses' is the targetlist or list of join clauses - * 'rtable' is the current range table - * 'outer_tlist' is the target list of the outer join relation - * 'inner_tlist' is the target list of the inner join relation, or NIL - * 'acceptable_rel' is either zero or the rangetable index of a relation - * whose Vars may appear in the clause without provoking an error. - * - * Returns the new expression tree. The original clause structure is - * not modified. - */ -List * -join_references(List *clauses, - List *rtable, - List *outer_tlist, - List *inner_tlist, - Index acceptable_rel) -{ - join_references_context context; - - context.rtable = rtable; - context.outer_tlist = outer_tlist; - context.inner_tlist = inner_tlist; - context.acceptable_rel = acceptable_rel; - return (List *) join_references_mutator((Node *) clauses, &context); -} - -static Node * -join_references_mutator(Node *node, - join_references_context *context) -{ - if (node == NULL) - return NULL; - if (IsA(node, Var)) - { - Var *var = (Var *) node; - Resdom *resdom; - Node *newnode; - - /* First look for the var in the input tlists */ - resdom = tlist_member((Node *) var, context->outer_tlist); - if (resdom) - { - Var *newvar = (Var *) copyObject(var); - - newvar->varno = OUTER; - newvar->varattno = resdom->resno; - return (Node *) newvar; - } - resdom = tlist_member((Node *) var, context->inner_tlist); - if (resdom) - { - Var *newvar = (Var *) copyObject(var); - - newvar->varno = INNER; - newvar->varattno = resdom->resno; - return (Node *) newvar; - } - - /* Perhaps it's a join alias that can be resolved to input vars? */ - newnode = flatten_join_alias_vars((Node *) var, - context->rtable, - true); - if (!equal(newnode, (Node *) var)) - { - /* Must now resolve the input vars... */ - newnode = join_references_mutator(newnode, context); - return newnode; - } - - /* - * No referent found for Var --- either raise an error, or return - * the Var unmodified if it's for acceptable_rel. - */ - if (var->varno != context->acceptable_rel) - elog(ERROR, "join_references: variable not in subplan target lists"); - return (Node *) copyObject(var); - } - return expression_tree_mutator(node, - join_references_mutator, - (void *) context); -} - -/* - * replace_vars_with_subplan_refs - * This routine modifies an expression tree so that all Var nodes - * reference target nodes of a subplan. It is used to fix up - * target and qual expressions of non-join upper-level plan nodes. - * - * An error is raised if no matching var can be found in the subplan tlist - * --- so this routine should only be applied to nodes whose subplans' - * targetlists were generated via flatten_tlist() or some such method. - * - * If tlist_has_non_vars is true, then we try to match whole subexpressions - * against elements of the subplan tlist, so that we can avoid recomputing - * expressions that were already computed by the subplan. (This is relatively - * expensive, so we don't want to try it in the common case where the - * subplan tlist is just a flattened list of Vars.) - * - * 'node': the tree to be fixed (a target item or qual) - * 'subvarno': varno to be assigned to all Vars - * 'subplan_targetlist': target list for subplan - * 'tlist_has_non_vars': true if subplan_targetlist contains non-Var exprs - * - * The resulting tree is a copy of the original in which all Var nodes have - * varno = subvarno, varattno = resno of corresponding subplan target. - * The original tree is not modified. - */ -static Node * -replace_vars_with_subplan_refs(Node *node, - Index subvarno, - List *subplan_targetlist, - bool tlist_has_non_vars) -{ - replace_vars_with_subplan_refs_context context; - - context.subvarno = subvarno; - context.subplan_targetlist = subplan_targetlist; - context.tlist_has_non_vars = tlist_has_non_vars; - return replace_vars_with_subplan_refs_mutator(node, &context); -} - -static Node * -replace_vars_with_subplan_refs_mutator(Node *node, - replace_vars_with_subplan_refs_context *context) -{ - if (node == NULL) - return NULL; - if (IsA(node, Var)) - { - Var *var = (Var *) node; - Resdom *resdom; - Var *newvar; - - resdom = tlist_member((Node *) var, context->subplan_targetlist); - if (!resdom) - elog(ERROR, "replace_vars_with_subplan_refs: variable not in subplan target list"); - newvar = (Var *) copyObject(var); - newvar->varno = context->subvarno; - newvar->varattno = resdom->resno; - return (Node *) newvar; - } - /* Try matching more complex expressions too, if tlist has any */ - if (context->tlist_has_non_vars) - { - Resdom *resdom; - - resdom = tlist_member(node, context->subplan_targetlist); - if (resdom) - { - /* Found a matching subplan output expression */ - Var *newvar; - - newvar = makeVar(context->subvarno, - resdom->resno, - resdom->restype, - resdom->restypmod, - 0); - newvar->varnoold = 0; /* wasn't ever a plain Var */ - newvar->varoattno = 0; - return (Node *) newvar; - } - } - return expression_tree_mutator(node, - replace_vars_with_subplan_refs_mutator, - (void *) context); -} - -/***************************************************************************** - * OPERATOR REGPROC LOOKUP - *****************************************************************************/ - -/* - * fix_opids - * Calculate opid field from opno for each Oper node in given tree. - * The given tree can be anything expression_tree_walker handles. - * - * The argument is modified in-place. (This is OK since we'd want the - * same change for any node, even if it gets visited more than once due to - * shared structure.) - */ -void -fix_opids(Node *node) -{ - /* This tree walk requires no special setup, so away we go... */ - fix_opids_walker(node, NULL); -} - -static bool -fix_opids_walker(Node *node, void *context) -{ - if (node == NULL) - return false; - if (is_opclause(node)) - replace_opid((Oper *) ((Expr *) node)->oper); - return expression_tree_walker(node, fix_opids_walker, context); -} diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c deleted file mode 100644 index c1ab186b011..00000000000 --- a/src/backend/optimizer/plan/subselect.c +++ /dev/null @@ -1,741 +0,0 @@ -/*------------------------------------------------------------------------- - * - * subselect.c - * Planning routines for subselects and parameters. - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.54 2002/06/20 20:29:31 momjian Exp $ - * - *------------------------------------------------------------------------- - */ -#include "postgres.h" - -#include "catalog/pg_operator.h" -#include "catalog/pg_type.h" -#include "nodes/makefuncs.h" -#include "optimizer/clauses.h" -#include "optimizer/cost.h" -#include "optimizer/planmain.h" -#include "optimizer/planner.h" -#include "optimizer/subselect.h" -#include "parser/parsetree.h" -#include "parser/parse_expr.h" -#include "parser/parse_oper.h" -#include "utils/syscache.h" - - -Index PlannerQueryLevel; /* level of current query */ -List *PlannerInitPlan; /* init subplans for current query */ -List *PlannerParamVar; /* to get Var from Param->paramid */ - -int PlannerPlanId = 0; /* to assign unique ID to subquery plans */ - -/*-------------------- - * PlannerParamVar is a list of Var nodes, wherein the n'th entry - * (n counts from 0) corresponds to Param->paramid = n. The Var nodes - * are ordinary except for one thing: their varlevelsup field does NOT - * have the usual interpretation of "subplan levels out from current". - * Instead, it contains the absolute plan level, with the outermost - * plan being level 1 and nested plans having higher level numbers. - * This nonstandardness is useful because we don't have to run around - * and update the list elements when we enter or exit a subplan - * recursion level. But we must pay attention not to confuse this - * meaning with the normal meaning of varlevelsup. - *-------------------- - */ - - -/* - * Create a new entry in the PlannerParamVar list, and return its index. - * - * var contains the data to be copied, except for varlevelsup which - * is set from the absolute level value given by varlevel. - */ -static int -new_param(Var *var, Index varlevel) -{ - Var *paramVar = (Var *) copyObject(var); - - paramVar->varlevelsup = varlevel; - - PlannerParamVar = lappend(PlannerParamVar, paramVar); - - return length(PlannerParamVar) - 1; -} - -/* - * Generate a Param node to replace the given Var, - * which is expected to have varlevelsup > 0 (ie, it is not local). - */ -static Param * -replace_var(Var *var) -{ - List *ppv; - Param *retval; - Index varlevel; - int i; - - Assert(var->varlevelsup > 0 && var->varlevelsup < PlannerQueryLevel); - varlevel = PlannerQueryLevel - var->varlevelsup; - - /* - * If there's already a PlannerParamVar entry for this same Var, just - * use it. NOTE: in sufficiently complex querytrees, it is possible - * for the same varno/varlevel to refer to different RTEs in different - * parts of the parsetree, so that different fields might end up - * sharing the same Param number. As long as we check the vartype as - * well, I believe that this sort of aliasing will cause no trouble. - * The correct field should get stored into the Param slot at - * execution in each part of the tree. - */ - i = 0; - foreach(ppv, PlannerParamVar) - { - Var *pvar = lfirst(ppv); - - if (pvar->varno == var->varno && - pvar->varattno == var->varattno && - pvar->varlevelsup == varlevel && - pvar->vartype == var->vartype) - break; - i++; - } - - if (!ppv) - { - /* Nope, so make a new one */ - i = new_param(var, varlevel); - } - - retval = makeNode(Param); - retval->paramkind = PARAM_EXEC; - retval->paramid = (AttrNumber) i; - retval->paramtype = var->vartype; - - return retval; -} - -/* - * Convert a bare SubLink (as created by the parser) into a SubPlan. - */ -static Node * -make_subplan(SubLink *slink) -{ - SubPlan *node = makeNode(SubPlan); - Query *subquery = (Query *) (slink->subselect); - Oid result_type = exprType((Node *) slink); - double tuple_fraction; - Plan *plan; - List *lst; - Node *result; - - /* - * Check to see if this node was already processed; if so we have - * trouble. We check to see if the linked-to Query appears to have - * been planned already, too. - */ - if (subquery == NULL) - elog(ERROR, "make_subplan: invalid expression structure (SubLink already processed?)"); - if (subquery->base_rel_list != NIL) - elog(ERROR, "make_subplan: invalid expression structure (subquery already processed?)"); - - /* - * Copy the source Query node. This is a quick and dirty kluge to - * resolve the fact that the parser can generate trees with multiple - * links to the same sub-Query node, but the planner wants to scribble - * on the Query. Try to clean this up when we do querytree redesign... - */ - subquery = (Query *) copyObject(subquery); - - /* - * For an EXISTS subplan, tell lower-level planner to expect that only - * the first tuple will be retrieved. For ALL and ANY subplans, we - * will be able to stop evaluating if the test condition fails, so - * very often not all the tuples will be retrieved; for lack of a - * better idea, specify 50% retrieval. For EXPR and MULTIEXPR - * subplans, use default behavior (we're only expecting one row out, - * anyway). - * - * NOTE: if you change these numbers, also change cost_qual_eval_walker() - * in path/costsize.c. - * - * XXX If an ALL/ANY subplan is uncorrelated, we may decide to - * materialize its result below. In that case it would've been better - * to specify full retrieval. At present, however, we can only detect - * correlation or lack of it after we've made the subplan :-(. Perhaps - * detection of correlation should be done as a separate step. - * Meanwhile, we don't want to be too optimistic about the percentage - * of tuples retrieved, for fear of selecting a plan that's bad for - * the materialization case. - */ - if (slink->subLinkType == EXISTS_SUBLINK) - tuple_fraction = 1.0; /* just like a LIMIT 1 */ - else if (slink->subLinkType == ALL_SUBLINK || - slink->subLinkType == ANY_SUBLINK) - tuple_fraction = 0.5; /* 50% */ - else - tuple_fraction = -1.0; /* default behavior */ - - /* - * Generate the plan for the subquery. - */ - node->plan = plan = subquery_planner(subquery, tuple_fraction); - - node->plan_id = PlannerPlanId++; /* Assign unique ID to this - * SubPlan */ - - node->rtable = subquery->rtable; - node->sublink = slink; - - slink->subselect = NULL; /* cool ?! see error check above! */ - - /* - * Make parParam list of params that current query level will pass to - * this child plan. - */ - foreach(lst, plan->extParam) - { - int paramid = lfirsti(lst); - Var *var = nth(paramid, PlannerParamVar); - - /* note varlevelsup is absolute level number */ - if (var->varlevelsup == PlannerQueryLevel) - node->parParam = lappendi(node->parParam, paramid); - } - - /* - * Un-correlated or undirect correlated plans of EXISTS, EXPR, or - * MULTIEXPR types can be used as initPlans. For EXISTS or EXPR, we - * just produce a Param referring to the result of evaluating the - * initPlan. For MULTIEXPR, we must build an AND or OR-clause of the - * individual comparison operators, using the appropriate lefthand - * side expressions and Params for the initPlan's target items. - */ - if (node->parParam == NIL && slink->subLinkType == EXISTS_SUBLINK) - { - Var *var = makeVar(0, 0, BOOLOID, -1, 0); - Param *prm = makeNode(Param); - - prm->paramkind = PARAM_EXEC; - prm->paramid = (AttrNumber) new_param(var, PlannerQueryLevel); - prm->paramtype = var->vartype; - pfree(var); /* var is only needed for new_param */ - node->setParam = lappendi(node->setParam, prm->paramid); - PlannerInitPlan = lappend(PlannerInitPlan, node); - result = (Node *) prm; - } - else if (node->parParam == NIL && slink->subLinkType == EXPR_SUBLINK) - { - TargetEntry *te = lfirst(plan->targetlist); - - /* need a var node just to pass to new_param()... */ - Var *var = makeVar(0, 0, te->resdom->restype, - te->resdom->restypmod, 0); - Param *prm = makeNode(Param); - - prm->paramkind = PARAM_EXEC; - prm->paramid = (AttrNumber) new_param(var, PlannerQueryLevel); - prm->paramtype = var->vartype; - pfree(var); /* var is only needed for new_param */ - node->setParam = lappendi(node->setParam, prm->paramid); - PlannerInitPlan = lappend(PlannerInitPlan, node); - result = (Node *) prm; - } - else if (node->parParam == NIL && slink->subLinkType == MULTIEXPR_SUBLINK) - { - List *newoper = NIL; - int i = 0; - - /* - * Convert oper list of Opers into a list of Exprs, using lefthand - * arguments and Params representing inside results. - */ - foreach(lst, slink->oper) - { - Oper *oper = (Oper *) lfirst(lst); - Node *lefthand = nth(i, slink->lefthand); - TargetEntry *te = nth(i, plan->targetlist); - - /* need a var node just to pass to new_param()... */ - Var *var = makeVar(0, 0, te->resdom->restype, - te->resdom->restypmod, 0); - Param *prm = makeNode(Param); - Operator tup; - Form_pg_operator opform; - Node *left, - *right; - - prm->paramkind = PARAM_EXEC; - prm->paramid = (AttrNumber) new_param(var, PlannerQueryLevel); - prm->paramtype = var->vartype; - pfree(var); /* var is only needed for new_param */ - - Assert(IsA(oper, Oper)); - tup = SearchSysCache(OPEROID, - ObjectIdGetDatum(oper->opno), - 0, 0, 0); - if (!HeapTupleIsValid(tup)) - elog(ERROR, "cache lookup failed for operator %u", oper->opno); - opform = (Form_pg_operator) GETSTRUCT(tup); - - /* - * Note: we use make_operand in case runtime type conversion - * function calls must be inserted for this operator! - */ - left = make_operand(lefthand, - exprType(lefthand), opform->oprleft); - right = make_operand((Node *) prm, - prm->paramtype, opform->oprright); - ReleaseSysCache(tup); - - newoper = lappend(newoper, - make_opclause(oper, - (Var *) left, - (Var *) right)); - node->setParam = lappendi(node->setParam, prm->paramid); - i++; - } - slink->oper = newoper; - slink->lefthand = NIL; - PlannerInitPlan = lappend(PlannerInitPlan, node); - if (i > 1) - result = (Node *) ((slink->useor) ? make_orclause(newoper) : - make_andclause(newoper)); - else - result = (Node *) lfirst(newoper); - } - else - { - Expr *expr = makeNode(Expr); - List *args = NIL; - List *newoper = NIL; - int i = 0; - - /* - * We can't convert subplans of ALL_SUBLINK or ANY_SUBLINK types - * to initPlans, even when they are uncorrelated or undirect - * correlated, because we need to scan the output of the subplan - * for each outer tuple. However, we have the option to tack a - * MATERIAL node onto the top of an uncorrelated/undirect - * correlated subplan, which lets us do the work of evaluating the - * subplan only once. We do this if the subplan's top plan node - * is anything more complicated than a plain sequential scan, and - * we do it even for seqscan if the qual appears selective enough - * to eliminate many tuples. - * - * XXX It's pretty ugly to be inserting a MATERIAL node at this - * point. Since subquery_planner has already run SS_finalize_plan - * on the subplan tree, we have to kluge up parameter lists for - * the MATERIAL node. Possibly this could be fixed by postponing - * SS_finalize_plan processing until setrefs.c is run. - */ - if (node->parParam == NIL) - { - bool use_material; - - switch (nodeTag(plan)) - { - case T_SeqScan: - if (plan->initPlan || plan->subPlan) - use_material = true; - else - { - Selectivity qualsel; - - qualsel = clauselist_selectivity(subquery, - plan->qual, - 0); - /* Is 10% selectivity a good threshold?? */ - use_material = qualsel < 0.10; - } - break; - case T_Material: - case T_FunctionScan: - case T_Sort: - - /* - * Don't add another Material node if there's one - * already, nor if the top node is any other type that - * materializes its output anyway. - */ - use_material = false; - break; - default: - use_material = true; - break; - } - if (use_material) - { - Plan *matplan; - - matplan = (Plan *) make_material(plan->targetlist, plan); - /* kluge --- see comments above */ - matplan->extParam = listCopy(plan->extParam); - matplan->locParam = listCopy(plan->locParam); - node->plan = plan = matplan; - } - } - - /* - * Make expression of SUBPLAN type - */ - expr->typeOid = result_type; - expr->opType = SUBPLAN_EXPR; - expr->oper = (Node *) node; - - /* - * Make expr->args from parParam. - */ - foreach(lst, node->parParam) - { - Var *var = nth(lfirsti(lst), PlannerParamVar); - - var = (Var *) copyObject(var); - - /* - * Must fix absolute-level varlevelsup from the - * PlannerParamVar entry. But since var is at current subplan - * level, this is easy: - */ - var->varlevelsup = 0; - args = lappend(args, var); - } - expr->args = args; - - /* - * Convert oper list of Opers into a list of Exprs, using lefthand - * arguments and Consts representing inside results. - */ - foreach(lst, slink->oper) - { - Oper *oper = (Oper *) lfirst(lst); - Node *lefthand = nth(i, slink->lefthand); - TargetEntry *te = nth(i, plan->targetlist); - Const *con; - Operator tup; - Form_pg_operator opform; - Node *left, - *right; - - con = makeNullConst(te->resdom->restype); - - Assert(IsA(oper, Oper)); - tup = SearchSysCache(OPEROID, - ObjectIdGetDatum(oper->opno), - 0, 0, 0); - if (!HeapTupleIsValid(tup)) - elog(ERROR, "cache lookup failed for operator %u", oper->opno); - opform = (Form_pg_operator) GETSTRUCT(tup); - - /* - * Note: we use make_operand in case runtime type conversion - * function calls must be inserted for this operator! - */ - left = make_operand(lefthand, - exprType(lefthand), opform->oprleft); - right = make_operand((Node *) con, - con->consttype, opform->oprright); - ReleaseSysCache(tup); - - newoper = lappend(newoper, - make_opclause(oper, - (Var *) left, - (Var *) right)); - i++; - } - slink->oper = newoper; - slink->lefthand = NIL; - result = (Node *) expr; - } - - return result; -} - -/* - * finalize_primnode: build lists of subplans and params appearing - * in the given expression tree. NOTE: items are added to lists passed in, - * so caller must initialize lists to NIL before first call! - * - * Note: the subplan list that is constructed here and assigned to the - * plan's subPlan field will be replaced with an up-to-date list in - * set_plan_references(). We could almost dispense with building this - * subplan list at all; I believe the only place that uses it is the - * check in make_subplan to see whether a subselect has any subselects. - */ - -typedef struct finalize_primnode_results -{ - List *subplans; /* List of subplans found in expr */ - List *paramids; /* List of PARAM_EXEC paramids found */ -} finalize_primnode_results; - -static bool -finalize_primnode(Node *node, finalize_primnode_results *results) -{ - if (node == NULL) - return false; - if (IsA(node, Param)) - { - if (((Param *) node)->paramkind == PARAM_EXEC) - { - int paramid = (int) ((Param *) node)->paramid; - - if (!intMember(paramid, results->paramids)) - results->paramids = lconsi(paramid, results->paramids); - } - return false; /* no more to do here */ - } - if (is_subplan(node)) - { - SubPlan *subplan = (SubPlan *) ((Expr *) node)->oper; - List *lst; - - /* Add subplan to subplans list */ - results->subplans = lappend(results->subplans, subplan); - /* Check extParam list for params to add to paramids */ - foreach(lst, subplan->plan->extParam) - { - int paramid = lfirsti(lst); - Var *var = nth(paramid, PlannerParamVar); - - /* note varlevelsup is absolute level number */ - if (var->varlevelsup < PlannerQueryLevel && - !intMember(paramid, results->paramids)) - results->paramids = lconsi(paramid, results->paramids); - } - /* fall through to recurse into subplan args */ - } - return expression_tree_walker(node, finalize_primnode, - (void *) results); -} - -/* - * Replace correlation vars (uplevel vars) with Params. - */ - -static Node *replace_correlation_vars_mutator(Node *node, void *context); - -Node * -SS_replace_correlation_vars(Node *expr) -{ - /* No setup needed for tree walk, so away we go */ - return replace_correlation_vars_mutator(expr, NULL); -} - -static Node * -replace_correlation_vars_mutator(Node *node, void *context) -{ - if (node == NULL) - return NULL; - if (IsA(node, Var)) - { - if (((Var *) node)->varlevelsup > 0) - return (Node *) replace_var((Var *) node); - } - return expression_tree_mutator(node, - replace_correlation_vars_mutator, - context); -} - -/* - * Expand SubLinks to SubPlans in the given expression. - */ - -static Node *process_sublinks_mutator(Node *node, void *context); - -Node * -SS_process_sublinks(Node *expr) -{ - /* No setup needed for tree walk, so away we go */ - return process_sublinks_mutator(expr, NULL); -} - -static Node * -process_sublinks_mutator(Node *node, void *context) -{ - if (node == NULL) - return NULL; - if (IsA(node, SubLink)) - { - SubLink *sublink = (SubLink *) node; - - /* - * First, scan the lefthand-side expressions, if any. This is a - * tad klugy since we modify the input SubLink node, but that - * should be OK (make_subplan does it too!) - */ - sublink->lefthand = (List *) - process_sublinks_mutator((Node *) sublink->lefthand, context); - /* Now build the SubPlan node and make the expr to return */ - return make_subplan(sublink); - } - - /* - * Note that we will never see a SubPlan expression in the input - * (since this is the very routine that creates 'em to begin with). So - * the code in expression_tree_mutator() that might do inappropriate - * things with SubPlans or SubLinks will not be exercised. - */ - Assert(!is_subplan(node)); - - return expression_tree_mutator(node, - process_sublinks_mutator, - context); -} - -List * -SS_finalize_plan(Plan *plan, List *rtable) -{ - List *extParam = NIL; - List *locParam = NIL; - finalize_primnode_results results; - List *lst; - - if (plan == NULL) - return NIL; - - results.subplans = NIL; /* initialize lists to NIL */ - results.paramids = NIL; - - /* - * When we call finalize_primnode, results.paramids lists are - * automatically merged together. But when recursing to self, we have - * to do it the hard way. We want the paramids list to include params - * in subplans as well as at this level. (We don't care about finding - * subplans of subplans, though.) - */ - - /* Find params and subplans in targetlist and qual */ - finalize_primnode((Node *) plan->targetlist, &results); - finalize_primnode((Node *) plan->qual, &results); - - /* Check additional node-type-specific fields */ - switch (nodeTag(plan)) - { - case T_Result: - finalize_primnode(((Result *) plan)->resconstantqual, - &results); - break; - - case T_IndexScan: - finalize_primnode((Node *) ((IndexScan *) plan)->indxqual, - &results); - - /* - * we need not look at indxqualorig, since it will have the - * same param references as indxqual, and we aren't really - * concerned yet about having a complete subplan list. - */ - break; - - case T_TidScan: - finalize_primnode((Node *) ((TidScan *) plan)->tideval, - &results); - break; - - case T_SubqueryScan: - - /* - * In a SubqueryScan, SS_finalize_plan has already been run on - * the subplan by the inner invocation of subquery_planner, so - * there's no need to do it again. Instead, just pull out the - * subplan's extParams list, which represents the params it - * needs from my level and higher levels. - */ - results.paramids = set_unioni(results.paramids, - ((SubqueryScan *) plan)->subplan->extParam); - break; - - case T_FunctionScan: - { - RangeTblEntry *rte; - - rte = rt_fetch(((FunctionScan *) plan)->scan.scanrelid, - rtable); - Assert(rte->rtekind == RTE_FUNCTION); - finalize_primnode(rte->funcexpr, &results); - } - break; - - case T_Append: - foreach(lst, ((Append *) plan)->appendplans) - results.paramids = set_unioni(results.paramids, - SS_finalize_plan((Plan *) lfirst(lst), - rtable)); - break; - - case T_NestLoop: - finalize_primnode((Node *) ((Join *) plan)->joinqual, - &results); - break; - - case T_MergeJoin: - finalize_primnode((Node *) ((Join *) plan)->joinqual, - &results); - finalize_primnode((Node *) ((MergeJoin *) plan)->mergeclauses, - &results); - break; - - case T_HashJoin: - finalize_primnode((Node *) ((Join *) plan)->joinqual, - &results); - finalize_primnode((Node *) ((HashJoin *) plan)->hashclauses, - &results); - break; - - case T_Hash: - finalize_primnode(((Hash *) plan)->hashkey, - &results); - break; - - case T_Agg: - case T_SeqScan: - case T_Material: - case T_Sort: - case T_Unique: - case T_SetOp: - case T_Limit: - case T_Group: - break; - - default: - elog(ERROR, "SS_finalize_plan: node %d unsupported", - nodeTag(plan)); - } - - /* Process left and right subplans, if any */ - results.paramids = set_unioni(results.paramids, - SS_finalize_plan(plan->lefttree, - rtable)); - results.paramids = set_unioni(results.paramids, - SS_finalize_plan(plan->righttree, - rtable)); - - /* Now we have all the paramids and subplans */ - - foreach(lst, results.paramids) - { - int paramid = lfirsti(lst); - Var *var = nth(paramid, PlannerParamVar); - - /* note varlevelsup is absolute level number */ - if (var->varlevelsup < PlannerQueryLevel) - extParam = lappendi(extParam, paramid); - else if (var->varlevelsup > PlannerQueryLevel) - elog(ERROR, "SS_finalize_plan: plan shouldn't reference subplan's variable"); - else - { - Assert(var->varno == 0 && var->varattno == 0); - locParam = lappendi(locParam, paramid); - } - } - - plan->extParam = extParam; - plan->locParam = locParam; - plan->subPlan = results.subplans; - - return results.paramids; -} diff --git a/src/backend/optimizer/prep/Makefile b/src/backend/optimizer/prep/Makefile deleted file mode 100644 index 60925de441f..00000000000 --- a/src/backend/optimizer/prep/Makefile +++ /dev/null @@ -1,31 +0,0 @@ -#------------------------------------------------------------------------- -# -# Makefile-- -# Makefile for optimizer/prep -# -# IDENTIFICATION -# $Header: /cvsroot/pgsql/src/backend/optimizer/prep/Makefile,v 1.13 2002/06/16 00:09:11 momjian Exp $ -# -#------------------------------------------------------------------------- - -subdir = src/backend/optimizer/prep -top_builddir = ../../../.. -include $(top_builddir)/src/Makefile.global - -OBJS = prepqual.o preptlist.o prepunion.o - -all: SUBSYS.o - -SUBSYS.o: $(OBJS) - $(LD) $(LDREL) $(LDOUT) SUBSYS.o $(OBJS) - -depend dep: - $(CC) -MM $(CFLAGS) *.c >depend - -clean: - rm -f SUBSYS.o $(OBJS) - -ifeq (depend,$(wildcard depend)) -include depend -endif - diff --git a/src/backend/optimizer/prep/_deadcode/prepkeyset.c b/src/backend/optimizer/prep/_deadcode/prepkeyset.c deleted file mode 100644 index b67431716fb..00000000000 --- a/src/backend/optimizer/prep/_deadcode/prepkeyset.c +++ /dev/null @@ -1,222 +0,0 @@ -/*------------------------------------------------------------------------- - * - * prepkeyset.c - * Special preperation for keyset queries. - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - *------------------------------------------------------------------------- - */ - -/* THIS FILE WAS USED FOR KSQO, WHICH IS DISABLED NOW. bjm 2002-06-15 */ - -#include "postgres.h" -#include "optimizer/planmain.h" - -/* - * Node_Copy - * a macro to simplify calling of copyObject on the specified field - */ -#define Node_Copy(from, newnode, field) newnode->field = copyObject(from->field) - -bool _use_keyset_query_optimizer = FALSE; - -#ifdef ENABLE_KEY_SET_QUERY - -static int inspectOpNode(Expr *expr); -static int inspectAndNode(Expr *expr); -static int inspectOrNode(Expr *expr); -static int TotalExpr; - -/********************************************************************** - * This routine transforms query trees with the following form: - * SELECT a,b, ... FROM one_table WHERE - * (v1 = const1 AND v2 = const2 [ vn = constn ]) OR - * (v1 = const3 AND v2 = const4 [ vn = constn ]) OR - * (v1 = const5 AND v2 = const6 [ vn = constn ]) OR - * ... - * [(v1 = constn AND v2 = constn [ vn = constn ])] - * - * into - * - * SELECT a,b, ... FROM one_table WHERE - * (v1 = const1 AND v2 = const2 [ vn = constn ]) UNION - * SELECT a,b, ... FROM one_table WHERE - * (v1 = const3 AND v2 = const4 [ vn = constn ]) UNION - * SELECT a,b, ... FROM one_table WHERE - * (v1 = const5 AND v2 = const6 [ vn = constn ]) UNION - * ... - * SELECT a,b, ... FROM one_table WHERE - * [(v1 = constn AND v2 = constn [ vn = constn ])] - * - * - * To qualify for transformation the query must not be a sub select, - * a HAVING, or a GROUP BY. It must be a single table and have KSQO - * set to 'on'. - * - * The primary use of this transformation is to avoid the exponrntial - * memory consumption of cnfify() and to make use of index access - * methods. - * - * daveh@insightdist.com 1998-08-31 - * - * May want to also prune out duplicate terms. - **********************************************************************/ -void -transformKeySetQuery(Query *origNode) -{ - /* Qualify as a key set query candidate */ - if (_use_keyset_query_optimizer == FALSE || - origNode->groupClause || - origNode->havingQual || - origNode->hasAggs || - origNode->utilityStmt || - origNode->unionClause || - origNode->unionall || - origNode->hasSubLinks || - origNode->commandType != CMD_SELECT) - return; - - /* Qualify single table query */ - if (length(origNode->rtable) != 1) - return; - - /* Sorry about the global, not worth passing around */ - /* 9 expressions seems like a good number. More than 9 */ - /* and it starts to slow down quite a bit */ - TotalExpr = 0; - /*************************/ - /* Qualify where clause */ - /*************************/ - if (!inspectOrNode((Expr *) origNode->jointree->quals) || TotalExpr < 9) - return; - - /* Copy essential elements into a union node */ - while (((Expr *) origNode->jointree->quals)->opType == OR_EXPR) - { - Query *unionNode = makeNode(Query); - List *qualargs = ((Expr *) origNode->jointree->quals)->args; - - unionNode->commandType = origNode->commandType; - unionNode->resultRelation = origNode->resultRelation; - unionNode->isPortal = origNode->isPortal; - unionNode->isBinary = origNode->isBinary; - - Node_Copy(origNode, unionNode, distinctClause); - Node_Copy(origNode, unionNode, sortClause); - Node_Copy(origNode, unionNode, rtable); - origNode->jointree->quals = NULL; /* avoid unnecessary - * copying */ - Node_Copy(origNode, unionNode, jointree); - Node_Copy(origNode, unionNode, targetList); - - /* Pull up Expr = */ - unionNode->jointree->quals = lsecond(qualargs); - - /* Pull up balance of tree */ - origNode->jointree->quals = lfirst(qualargs); - - origNode->unionClause = lappend(origNode->unionClause, unionNode); - } - return; -} - - - - -static int -/********************************************************************** - * Checks for 1 or more OR terms w/ 1 or more AND terms. - * AND terms must be equal in size. - * Returns the number of each AND term. - **********************************************************************/ -inspectOrNode(Expr *expr) -{ - int rc; - Expr *firstExpr, - *secondExpr; - - if (!(expr && nodeTag(expr) == T_Expr && expr->opType == OR_EXPR)) - return 0; - - firstExpr = lfirst(expr->args); - secondExpr = lsecond(expr->args); - if (nodeTag(firstExpr) != T_Expr || nodeTag(secondExpr) != T_Expr) - return 0; - - if (firstExpr->opType == OR_EXPR && secondExpr->opType == AND_EXPR) - { - if ((rc = inspectOrNode(firstExpr)) == 0) - return 0; - - return (rc == inspectAndNode(secondExpr)) ? rc : 0; - } - else if (firstExpr->opType == AND_EXPR && secondExpr->opType == AND_EXPR) - { - if ((rc = inspectAndNode(firstExpr)) == 0) - return 0; - - return (rc == inspectAndNode(secondExpr)) ? rc : 0; - - } - - return 0; -} - - -static int -/********************************************************************** - * Check for one or more AND terms. Each sub-term must be a T_Const - * T_Var expression. - * Returns the number of AND terms. - **********************************************************************/ -inspectAndNode(Expr *expr) -{ - int rc; - Expr *firstExpr, - *secondExpr; - - if (!(expr && nodeTag(expr) == T_Expr && expr->opType == AND_EXPR)) - return 0; - - firstExpr = lfirst(expr->args); - secondExpr = lsecond(expr->args); - if (nodeTag(firstExpr) != T_Expr || nodeTag(secondExpr) != T_Expr) - return 0; - - if (firstExpr->opType == AND_EXPR && - secondExpr->opType == OP_EXPR && inspectOpNode(secondExpr)) - { - rc = inspectAndNode(firstExpr); - return ((rc) ? (rc + 1) : 0); /* Add up the AND nodes */ - } - else if (firstExpr->opType == OP_EXPR && inspectOpNode(firstExpr) && - secondExpr->opType == OP_EXPR && inspectOpNode(secondExpr)) - return 1; - - return 0; -} - - -static int -/****************************************************************** - * Return TRUE if T_Var = T_Const, else FALSE - * Actually it does not test for =. Need to do this! - ******************************************************************/ -inspectOpNode(Expr *expr) -{ - Expr *firstExpr, - *secondExpr; - - if (nodeTag(expr) != T_Expr || expr->opType != OP_EXPR) - return FALSE; - - TotalExpr++; - - firstExpr = lfirst(expr->args); - secondExpr = lsecond(expr->args); - return (firstExpr && secondExpr && nodeTag(firstExpr) == T_Var && nodeTag(secondExpr) == T_Const); -} - -#endif /* ENABLE_KEY_SET_QUERY */ diff --git a/src/backend/optimizer/prep/prepqual.c b/src/backend/optimizer/prep/prepqual.c deleted file mode 100644 index 188765f856b..00000000000 --- a/src/backend/optimizer/prep/prepqual.c +++ /dev/null @@ -1,973 +0,0 @@ -/*------------------------------------------------------------------------- - * - * prepqual.c - * Routines for preprocessing qualification expressions - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepqual.c,v 1.32 2002/06/20 20:29:31 momjian Exp $ - * - *------------------------------------------------------------------------- - */ -#include <sys/types.h> - -#include "postgres.h" - -#include "nodes/makefuncs.h" -#include "optimizer/clauses.h" -#include "optimizer/prep.h" -#include "utils/lsyscache.h" - -static Expr *flatten_andors(Expr *qual); -static List *pull_ors(List *orlist); -static List *pull_ands(List *andlist); -static Expr *find_nots(Expr *qual); -static Expr *push_nots(Expr *qual); -static Expr *find_ors(Expr *qual); -static Expr *or_normalize(List *orlist); -static Expr *find_ands(Expr *qual); -static Expr *and_normalize(List *andlist); -static Expr *qual_cleanup(Expr *qual); -static List *remove_duplicates(List *list); -static void count_bool_nodes(Expr *qual, double *nodes, - double *cnfnodes, double *dnfnodes); - -/***************************************************************************** - * - * CNF/DNF CONVERSION ROUTINES - * - * These routines convert an arbitrary boolean expression into - * conjunctive normal form or disjunctive normal form. - * - * Normalization is only carried out in the top AND/OR/NOT portion - * of the given tree; we do not attempt to normalize boolean expressions - * that may appear as arguments of operators or functions in the tree. - * - * Query qualifications (WHERE clauses) are ordinarily transformed into - * CNF, ie, AND-of-ORs form, because then the optimizer can use any one - * of the independent AND clauses as a filtering qualification. However, - * quals that are naturally expressed as OR-of-ANDs can suffer an - * exponential growth in size in this transformation, so we also consider - * converting to DNF (OR-of-ANDs), and we may also leave well enough alone - * if both transforms cause unreasonable growth. The OR-of-ANDs format - * is useful for indexscan implementation, so we prefer that format when - * there is just one relation involved. - * - * canonicalize_qual() does "smart" conversion to either CNF or DNF, per - * the above considerations, while cnfify() and dnfify() simply perform - * the demanded transformation. The latter two may become dead code - * eventually. - *****************************************************************************/ - - -/* - * canonicalize_qual - * Convert a qualification to the most useful normalized form. - * - * Returns the modified qualification. - * - * If 'removeAndFlag' is true then it removes explicit AND at the top level, - * producing a list of implicitly-ANDed conditions. Otherwise, a regular - * boolean expression is returned. Since most callers pass 'true', we - * prefer to declare the result as List *, not Expr *. - * - * XXX This code could be much smarter, at the cost of also being slower, - * if we tried to compute selectivities and/or see whether there are - * actually indexes to support an indexscan implementation of a DNF qual. - * We could even try converting the CNF clauses that mention a single - * relation into a single DNF clause to see if that looks cheaper to - * implement. For now, though, we just try to avoid doing anything - * quite as stupid as unconditionally converting to CNF was... - */ -List * -canonicalize_qual(Expr *qual, bool removeAndFlag) -{ - Expr *newqual; - double nodes, - cnfnodes, - dnfnodes; - bool cnfok, - dnfok; - - if (qual == NULL) - return NIL; - - /* - * Flatten AND and OR groups throughout the tree. This improvement is - * always worthwhile, so do it unconditionally. - */ - qual = flatten_andors(qual); - - /* - * Push down NOTs. We do this only in the top-level boolean - * expression, without examining arguments of operators/functions. - * Even so, it might not be a win if we are unable to find negators - * for all the operators involved; perhaps we should compare before- - * and-after tree sizes? - */ - newqual = find_nots(qual); - - /* - * Choose whether to convert to CNF, or DNF, or leave well enough - * alone. - * - * We make an approximate estimate of the number of bottom-level nodes - * that will appear in the CNF and DNF forms of the query. - */ - count_bool_nodes(newqual, &nodes, &cnfnodes, &dnfnodes); - - /* - * First heuristic is to forget about *both* normal forms if there are - * a huge number of terms in the qual clause. This would only happen - * with machine-generated queries, presumably; and most likely such a - * query is already in either CNF or DNF. - */ - cnfok = dnfok = true; - if (nodes >= 500.0) - cnfok = dnfok = false; - - /* - * Second heuristic is to forget about either CNF or DNF if it shows - * unreasonable growth compared to the original form of the qual, - * where we define "unreasonable" a tad arbitrarily as 4x more - * operators. - */ - if (cnfnodes >= 4.0 * nodes) - cnfok = false; - if (dnfnodes >= 4.0 * nodes) - dnfok = false; - - /* - * Third heuristic is to prefer DNF if top level is already an OR, and - * only one relation is mentioned, and DNF is no larger than the CNF - * representation. (Pretty shaky; can we improve on this?) - */ - if (cnfok && dnfok && dnfnodes <= cnfnodes && - or_clause((Node *) newqual) && - NumRelids((Node *) newqual) == 1) - cnfok = false; - - /* - * Otherwise, we prefer CNF. - * - * XXX obviously, these rules could be improved upon. - */ - if (cnfok) - { - /* - * Normalize into conjunctive normal form, and clean up the - * result. - */ - newqual = qual_cleanup(find_ors(newqual)); - } - else if (dnfok) - { - /* - * Normalize into disjunctive normal form, and clean up the - * result. - */ - newqual = qual_cleanup(find_ands(newqual)); - } - - /* Convert to implicit-AND list if requested */ - if (removeAndFlag) - newqual = (Expr *) make_ands_implicit(newqual); - - return (List *) newqual; -} - -/* - * cnfify - * Convert a qualification to conjunctive normal form by applying - * successive normalizations. - * - * Returns the modified qualification. - * - * If 'removeAndFlag' is true then it removes explicit AND at the top level, - * producing a list of implicitly-ANDed conditions. Otherwise, a regular - * boolean expression is returned. Since most callers pass 'true', we - * prefer to declare the result as List *, not Expr *. - */ -List * -cnfify(Expr *qual, bool removeAndFlag) -{ - Expr *newqual; - - if (qual == NULL) - return NIL; - - /* - * Flatten AND and OR groups throughout the tree. This improvement is - * always worthwhile. - */ - newqual = flatten_andors(qual); - - /* - * Push down NOTs. We do this only in the top-level boolean - * expression, without examining arguments of operators/functions. - */ - newqual = find_nots(newqual); - /* Normalize into conjunctive normal form. */ - newqual = find_ors(newqual); - /* Clean up the result. */ - newqual = qual_cleanup(newqual); - - if (removeAndFlag) - newqual = (Expr *) make_ands_implicit(newqual); - - return (List *) newqual; -} - -#ifdef NOT_USED -/* - * dnfify - * Convert a qualification to disjunctive normal form by applying - * successive normalizations. - * - * Returns the modified qualification. - * - * We do not offer a 'removeOrFlag' in this case; the usages are - * different. - */ -static Expr * -dnfify(Expr *qual) -{ - Expr *newqual; - - if (qual == NULL) - return NULL; - - /* - * Flatten AND and OR groups throughout the tree. This improvement is - * always worthwhile. - */ - newqual = flatten_andors(qual); - - /* - * Push down NOTs. We do this only in the top-level boolean - * expression, without examining arguments of operators/functions. - */ - newqual = find_nots(newqual); - /* Normalize into disjunctive normal form. */ - newqual = find_ands(newqual); - /* Clean up the result. */ - newqual = qual_cleanup(newqual); - - return newqual; -} -#endif - -/*-------------------- - * The parser regards AND and OR as purely binary operators, so a qual like - * (A = 1) OR (A = 2) OR (A = 3) ... - * will produce a nested parsetree - * (OR (A = 1) (OR (A = 2) (OR (A = 3) ...))) - * In reality, the optimizer and executor regard AND and OR as n-argument - * operators, so this tree can be flattened to - * (OR (A = 1) (A = 2) (A = 3) ...) - * which is the responsibility of the routines below. - * - * flatten_andors() does the basic transformation with no initial assumptions. - * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR - * tree after local transformations that might introduce nested AND/ORs. - *-------------------- - */ - -/*-------------------- - * flatten_andors - * Given a qualification, simplify nested AND/OR clauses into flat - * AND/OR clauses with more arguments. - * - * Returns the rebuilt expr (note original list structure is not touched). - *-------------------- - */ -static Expr * -flatten_andors(Expr *qual) -{ - if (qual == NULL) - return NULL; - - if (and_clause((Node *) qual)) - { - List *out_list = NIL; - List *arg; - - foreach(arg, qual->args) - { - Expr *subexpr = flatten_andors((Expr *) lfirst(arg)); - - /* - * Note: we can destructively nconc the subexpression's - * arglist because we know the recursive invocation of - * flatten_andors will have built a new arglist not shared - * with any other expr. Otherwise we'd need a listCopy here. - */ - if (and_clause((Node *) subexpr)) - out_list = nconc(out_list, subexpr->args); - else - out_list = lappend(out_list, subexpr); - } - return make_andclause(out_list); - } - else if (or_clause((Node *) qual)) - { - List *out_list = NIL; - List *arg; - - foreach(arg, qual->args) - { - Expr *subexpr = flatten_andors((Expr *) lfirst(arg)); - - /* - * Note: we can destructively nconc the subexpression's - * arglist because we know the recursive invocation of - * flatten_andors will have built a new arglist not shared - * with any other expr. Otherwise we'd need a listCopy here. - */ - if (or_clause((Node *) subexpr)) - out_list = nconc(out_list, subexpr->args); - else - out_list = lappend(out_list, subexpr); - } - return make_orclause(out_list); - } - else if (not_clause((Node *) qual)) - return make_notclause(flatten_andors(get_notclausearg(qual))); - else if (is_opclause((Node *) qual)) - { - Expr *left = (Expr *) get_leftop(qual); - Expr *right = (Expr *) get_rightop(qual); - - if (right) - return make_clause(qual->opType, qual->oper, - lcons(flatten_andors(left), - lcons(flatten_andors(right), - NIL))); - else - return make_clause(qual->opType, qual->oper, - lcons(flatten_andors(left), - NIL)); - } - else - return qual; -} - -/* - * pull_ors - * Pull the arguments of an 'or' clause nested within another 'or' - * clause up into the argument list of the parent. - * - * Input is the arglist of an OR clause. - * Returns the rebuilt arglist (note original list structure is not touched). - */ -static List * -pull_ors(List *orlist) -{ - List *out_list = NIL; - List *arg; - - foreach(arg, orlist) - { - Expr *subexpr = (Expr *) lfirst(arg); - - /* - * Note: we can destructively nconc the subexpression's arglist - * because we know the recursive invocation of pull_ors will have - * built a new arglist not shared with any other expr. Otherwise - * we'd need a listCopy here. - */ - if (or_clause((Node *) subexpr)) - out_list = nconc(out_list, pull_ors(subexpr->args)); - else - out_list = lappend(out_list, subexpr); - } - return out_list; -} - -/* - * pull_ands - * Pull the arguments of an 'and' clause nested within another 'and' - * clause up into the argument list of the parent. - * - * Returns the modified list. - */ -static List * -pull_ands(List *andlist) -{ - List *out_list = NIL; - List *arg; - - foreach(arg, andlist) - { - Expr *subexpr = (Expr *) lfirst(arg); - - /* - * Note: we can destructively nconc the subexpression's arglist - * because we know the recursive invocation of pull_ands will have - * built a new arglist not shared with any other expr. Otherwise - * we'd need a listCopy here. - */ - if (and_clause((Node *) subexpr)) - out_list = nconc(out_list, pull_ands(subexpr->args)); - else - out_list = lappend(out_list, subexpr); - } - return out_list; -} - -/* - * find_nots - * Traverse the qualification, looking for 'NOT's to take care of. - * For 'NOT' clauses, apply push_not() to try to push down the 'NOT'. - * For all other clause types, simply recurse. - * - * Returns the modified qualification. AND/OR flatness is preserved. - */ -static Expr * -find_nots(Expr *qual) -{ - if (qual == NULL) - return NULL; - -#ifdef NOT_USED - /* recursing into operator expressions is probably not worth it. */ - if (is_opclause((Node *) qual)) - { - Expr *left = (Expr *) get_leftop(qual); - Expr *right = (Expr *) get_rightop(qual); - - if (right) - return make_clause(qual->opType, qual->oper, - lcons(find_nots(left), - lcons(find_nots(right), - NIL))); - else - return make_clause(qual->opType, qual->oper, - lcons(find_nots(left), - NIL)); - } -#endif - if (and_clause((Node *) qual)) - { - List *t_list = NIL; - List *temp; - - foreach(temp, qual->args) - t_list = lappend(t_list, find_nots(lfirst(temp))); - return make_andclause(pull_ands(t_list)); - } - else if (or_clause((Node *) qual)) - { - List *t_list = NIL; - List *temp; - - foreach(temp, qual->args) - t_list = lappend(t_list, find_nots(lfirst(temp))); - return make_orclause(pull_ors(t_list)); - } - else if (not_clause((Node *) qual)) - return push_nots(get_notclausearg(qual)); - else - return qual; -} - -/* - * push_nots - * Push down a 'NOT' as far as possible. - * - * Input is an expression to be negated (e.g., the argument of a NOT clause). - * Returns a new qual equivalent to the negation of the given qual. - */ -static Expr * -push_nots(Expr *qual) -{ - if (qual == NULL) - return make_notclause(qual); /* XXX is this right? Or - * possible? */ - - /* - * Negate an operator clause if possible: ("NOT" (< A B)) => (> A B) - * Otherwise, retain the clause as it is (the 'not' can't be pushed - * down any farther). - */ - if (is_opclause((Node *) qual)) - { - Oper *oper = (Oper *) ((Expr *) qual)->oper; - Oid negator = get_negator(oper->opno); - - if (negator) - { - Oper *op = (Oper *) makeOper(negator, - InvalidOid, - oper->opresulttype, - oper->opretset); - - return make_opclause(op, get_leftop(qual), get_rightop(qual)); - } - else - return make_notclause(qual); - } - else if (and_clause((Node *) qual)) - { - /*-------------------- - * Apply DeMorgan's Laws: - * ("NOT" ("AND" A B)) => ("OR" ("NOT" A) ("NOT" B)) - * ("NOT" ("OR" A B)) => ("AND" ("NOT" A) ("NOT" B)) - * i.e., swap AND for OR and negate all the subclauses. - *-------------------- - */ - List *t_list = NIL; - List *temp; - - foreach(temp, qual->args) - t_list = lappend(t_list, push_nots(lfirst(temp))); - return make_orclause(pull_ors(t_list)); - } - else if (or_clause((Node *) qual)) - { - List *t_list = NIL; - List *temp; - - foreach(temp, qual->args) - t_list = lappend(t_list, push_nots(lfirst(temp))); - return make_andclause(pull_ands(t_list)); - } - else if (not_clause((Node *) qual)) - { - /* - * Another 'not' cancels this 'not', so eliminate the 'not' and - * stop negating this branch. But search the subexpression for - * more 'not's to simplify. - */ - return find_nots(get_notclausearg(qual)); - } - else - { - /* - * We don't know how to negate anything else, place a 'not' at - * this level. - */ - return make_notclause(qual); - } -} - -/* - * find_ors - * Given a qualification tree with the 'not's pushed down, convert it - * to a tree in CNF by repeatedly applying the rule: - * ("OR" A ("AND" B C)) => ("AND" ("OR" A B) ("OR" A C)) - * - * Note that 'or' clauses will always be turned into 'and' clauses - * if they contain any 'and' subclauses. - * - * Returns the modified qualification. AND/OR flatness is preserved. - */ -static Expr * -find_ors(Expr *qual) -{ - if (qual == NULL) - return NULL; - - /* We used to recurse into opclauses here, but I see no reason to... */ - if (and_clause((Node *) qual)) - { - List *andlist = NIL; - List *temp; - - foreach(temp, qual->args) - andlist = lappend(andlist, find_ors(lfirst(temp))); - return make_andclause(pull_ands(andlist)); - } - else if (or_clause((Node *) qual)) - { - List *orlist = NIL; - List *temp; - - foreach(temp, qual->args) - orlist = lappend(orlist, find_ors(lfirst(temp))); - return or_normalize(pull_ors(orlist)); - } - else if (not_clause((Node *) qual)) - return make_notclause(find_ors(get_notclausearg(qual))); - else - return qual; -} - -/* - * or_normalize - * Given a list of exprs which are 'or'ed together, try to apply - * the distributive law - * ("OR" A ("AND" B C)) => ("AND" ("OR" A B) ("OR" A C)) - * to convert the top-level OR clause to a top-level AND clause. - * - * Returns the resulting expression (could be an AND clause, an OR - * clause, or maybe even a single subexpression). - */ -static Expr * -or_normalize(List *orlist) -{ - Expr *distributable = NULL; - int num_subclauses = 1; - List *andclauses = NIL; - List *temp; - - if (orlist == NIL) - return NULL; /* probably can't happen */ - if (lnext(orlist) == NIL) - return lfirst(orlist); /* single-expression OR (can this happen?) */ - - /* - * If we have a choice of AND clauses, pick the one with the most - * subclauses. Because we initialized num_subclauses = 1, any AND - * clauses with only one arg will be ignored as useless. - */ - foreach(temp, orlist) - { - Expr *clause = lfirst(temp); - - if (and_clause((Node *) clause)) - { - int nclauses = length(clause->args); - - if (nclauses > num_subclauses) - { - distributable = clause; - num_subclauses = nclauses; - } - } - } - - /* if there's no suitable AND clause, we can't transform the OR */ - if (!distributable) - return make_orclause(orlist); - - /* - * Caution: lremove destructively modifies the input orlist. This - * should be OK, since or_normalize is only called with freshly - * constructed lists that are not referenced elsewhere. - */ - orlist = lremove(distributable, orlist); - - foreach(temp, distributable->args) - { - Expr *andclause = lfirst(temp); - List *neworlist; - - /* - * We are going to insert the orlist into multiple places in the - * result expression. For most expression types, it'd be OK to - * just have multiple links to the same subtree, but this fails - * badly for SubLinks (and perhaps other cases?). For safety, we - * make a distinct copy for each place the orlist is inserted. - */ - if (lnext(temp) == NIL) - neworlist = orlist; /* can use original tree at the end */ - else - neworlist = copyObject(orlist); - - /* - * pull_ors is needed here in case andclause has a top-level OR. - * Then we recursively apply or_normalize, since there might be an - * AND subclause in the resulting OR-list. - */ - andclause = or_normalize(pull_ors(lcons(andclause, neworlist))); - andclauses = lappend(andclauses, andclause); - } - - /* pull_ands is needed in case any sub-or_normalize succeeded */ - return make_andclause(pull_ands(andclauses)); -} - -/* - * find_ands - * Given a qualification tree with the 'not's pushed down, convert it - * to a tree in DNF by repeatedly applying the rule: - * ("AND" A ("OR" B C)) => ("OR" ("AND" A B) ("AND" A C)) - * - * Note that 'and' clauses will always be turned into 'or' clauses - * if they contain any 'or' subclauses. - * - * Returns the modified qualification. AND/OR flatness is preserved. - */ -static Expr * -find_ands(Expr *qual) -{ - if (qual == NULL) - return NULL; - - /* We used to recurse into opclauses here, but I see no reason to... */ - if (or_clause((Node *) qual)) - { - List *orlist = NIL; - List *temp; - - foreach(temp, qual->args) - orlist = lappend(orlist, find_ands(lfirst(temp))); - return make_orclause(pull_ors(orlist)); - } - else if (and_clause((Node *) qual)) - { - List *andlist = NIL; - List *temp; - - foreach(temp, qual->args) - andlist = lappend(andlist, find_ands(lfirst(temp))); - return and_normalize(pull_ands(andlist)); - } - else if (not_clause((Node *) qual)) - return make_notclause(find_ands(get_notclausearg(qual))); - else - return qual; -} - -/* - * and_normalize - * Given a list of exprs which are 'and'ed together, try to apply - * the distributive law - * ("AND" A ("OR" B C)) => ("OR" ("AND" A B) ("AND" A C)) - * to convert the top-level AND clause to a top-level OR clause. - * - * Returns the resulting expression (could be an AND clause, an OR - * clause, or maybe even a single subexpression). - */ -static Expr * -and_normalize(List *andlist) -{ - Expr *distributable = NULL; - int num_subclauses = 1; - List *orclauses = NIL; - List *temp; - - if (andlist == NIL) - return NULL; /* probably can't happen */ - if (lnext(andlist) == NIL) - return lfirst(andlist); /* single-expression AND (can this - * happen?) */ - - /* - * If we have a choice of OR clauses, pick the one with the most - * subclauses. Because we initialized num_subclauses = 1, any OR - * clauses with only one arg will be ignored as useless. - */ - foreach(temp, andlist) - { - Expr *clause = lfirst(temp); - - if (or_clause((Node *) clause)) - { - int nclauses = length(clause->args); - - if (nclauses > num_subclauses) - { - distributable = clause; - num_subclauses = nclauses; - } - } - } - - /* if there's no suitable OR clause, we can't transform the AND */ - if (!distributable) - return make_andclause(andlist); - - /* - * Caution: lremove destructively modifies the input andlist. This - * should be OK, since and_normalize is only called with freshly - * constructed lists that are not referenced elsewhere. - */ - andlist = lremove(distributable, andlist); - - foreach(temp, distributable->args) - { - Expr *orclause = lfirst(temp); - List *newandlist; - - /* - * We are going to insert the andlist into multiple places in the - * result expression. For most expression types, it'd be OK to - * just have multiple links to the same subtree, but this fails - * badly for SubLinks (and perhaps other cases?). For safety, we - * make a distinct copy for each place the andlist is inserted. - */ - if (lnext(temp) == NIL) - newandlist = andlist; /* can use original tree at the - * end */ - else - newandlist = copyObject(andlist); - - /* - * pull_ands is needed here in case orclause has a top-level AND. - * Then we recursively apply and_normalize, since there might be - * an OR subclause in the resulting AND-list. - */ - orclause = and_normalize(pull_ands(lcons(orclause, newandlist))); - orclauses = lappend(orclauses, orclause); - } - - /* pull_ors is needed in case any sub-and_normalize succeeded */ - return make_orclause(pull_ors(orclauses)); -} - -/* - * qual_cleanup - * Fix up a qualification by removing duplicate entries (which could be - * created during normalization, if identical subexpressions from different - * parts of the tree are brought together). Also, check for AND and OR - * clauses with only one remaining subexpression, and simplify. - * - * Returns the modified qualification. - */ -static Expr * -qual_cleanup(Expr *qual) -{ - if (qual == NULL) - return NULL; - - if (and_clause((Node *) qual)) - { - List *andlist = NIL; - List *temp; - - foreach(temp, qual->args) - andlist = lappend(andlist, qual_cleanup(lfirst(temp))); - - andlist = remove_duplicates(pull_ands(andlist)); - - if (length(andlist) > 1) - return make_andclause(andlist); - else - return lfirst(andlist); - } - else if (or_clause((Node *) qual)) - { - List *orlist = NIL; - List *temp; - - foreach(temp, qual->args) - orlist = lappend(orlist, qual_cleanup(lfirst(temp))); - - orlist = remove_duplicates(pull_ors(orlist)); - - if (length(orlist) > 1) - return make_orclause(orlist); - else - return lfirst(orlist); - } - else if (not_clause((Node *) qual)) - return make_notclause(qual_cleanup(get_notclausearg(qual))); - else - return qual; -} - -/* - * remove_duplicates - */ -static List * -remove_duplicates(List *list) -{ - List *result = NIL; - List *i; - - if (length(list) <= 1) - return list; - - foreach(i, list) - { - if (!member(lfirst(i), result)) - result = lappend(result, lfirst(i)); - } - return result; -} - -/* - * count_bool_nodes - * Support for heuristics in canonicalize_qual(): count the - * number of nodes that are inputs to the top level AND/OR/NOT - * part of a qual tree, and estimate how many nodes will appear - * in the CNF'ified or DNF'ified equivalent of the expression. - * - * This is just an approximate calculation; it doesn't deal with NOTs - * very well, and of course it cannot detect possible simplifications - * from eliminating duplicate subclauses. The idea is just to cheaply - * determine whether CNF will be markedly worse than DNF or vice versa. - * - * The counts/estimates are represented as doubles to avoid risk of overflow. - */ -static void -count_bool_nodes(Expr *qual, - double *nodes, - double *cnfnodes, - double *dnfnodes) -{ - List *temp; - double subnodes, - subcnfnodes, - subdnfnodes; - - if (and_clause((Node *) qual)) - { - *nodes = *cnfnodes = 0.0; - *dnfnodes = 1.0; /* DNF nodes will be product of sub-counts */ - - foreach(temp, qual->args) - { - count_bool_nodes(lfirst(temp), - &subnodes, &subcnfnodes, &subdnfnodes); - *nodes += subnodes; - *cnfnodes += subcnfnodes; - *dnfnodes *= subdnfnodes; - } - - /* - * we could get dnfnodes < cnfnodes here, if all the sub-nodes are - * simple ones with count 1. Make sure dnfnodes isn't too small. - */ - if (*dnfnodes < *cnfnodes) - *dnfnodes = *cnfnodes; - } - else if (or_clause((Node *) qual)) - { - *nodes = *dnfnodes = 0.0; - *cnfnodes = 1.0; /* CNF nodes will be product of sub-counts */ - - foreach(temp, qual->args) - { - count_bool_nodes(lfirst(temp), - &subnodes, &subcnfnodes, &subdnfnodes); - *nodes += subnodes; - *cnfnodes *= subcnfnodes; - *dnfnodes += subdnfnodes; - } - - /* - * we could get cnfnodes < dnfnodes here, if all the sub-nodes are - * simple ones with count 1. Make sure cnfnodes isn't too small. - */ - if (*cnfnodes < *dnfnodes) - *cnfnodes = *dnfnodes; - } - else if (not_clause((Node *) qual)) - { - count_bool_nodes(get_notclausearg(qual), - nodes, cnfnodes, dnfnodes); - } - else if (contain_subplans((Node *) qual)) - { - /* - * charge extra for subexpressions containing sub-SELECTs, to - * discourage us from rearranging them in a way that might - * generate N copies of a subselect rather than one. The magic - * constant here interacts with the "4x maximum growth" heuristic - * in canonicalize_qual(). - */ - *nodes = 1.0; - *cnfnodes = *dnfnodes = 25.0; - } - else - { - /* anything else counts 1 for my purposes */ - *nodes = *cnfnodes = *dnfnodes = 1.0; - } -} diff --git a/src/backend/optimizer/prep/preptlist.c b/src/backend/optimizer/prep/preptlist.c deleted file mode 100644 index 1cc9f5af489..00000000000 --- a/src/backend/optimizer/prep/preptlist.c +++ /dev/null @@ -1,237 +0,0 @@ -/*------------------------------------------------------------------------- - * - * preptlist.c - * Routines to preprocess the parse tree target list - * - * This module takes care of altering the query targetlist as needed for - * INSERT, UPDATE, and DELETE queries. For INSERT and UPDATE queries, - * the targetlist must contain an entry for each attribute of the target - * relation in the correct order. For both UPDATE and DELETE queries, - * we need a junk targetlist entry holding the CTID attribute --- the - * executor relies on this to find the tuple to be replaced/deleted. - * - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/preptlist.c,v 1.53 2002/06/20 20:29:31 momjian Exp $ - * - *------------------------------------------------------------------------- - */ - -#include "postgres.h" - -#include "access/heapam.h" -#include "catalog/pg_type.h" -#include "nodes/makefuncs.h" -#include "optimizer/prep.h" -#include "parser/parsetree.h" - - -static List *expand_targetlist(List *tlist, int command_type, - Index result_relation, List *range_table); - - -/* - * preprocess_targetlist - * Driver for preprocessing the parse tree targetlist. - * - * Returns the new targetlist. - */ -List * -preprocess_targetlist(List *tlist, - int command_type, - Index result_relation, - List *range_table) -{ - /* - * Sanity check: if there is a result relation, it'd better be a real - * relation not a subquery. Else parser or rewriter messed up. - */ - if (result_relation) - { - RangeTblEntry *rte = rt_fetch(result_relation, range_table); - - if (rte->subquery != NULL || rte->relid == InvalidOid) - elog(ERROR, "preprocess_targetlist: subquery cannot be result relation"); - } - - /* - * for heap_formtuple to work, the targetlist must match the exact - * order of the attributes. We also need to fill in any missing - * attributes. -ay 10/94 - */ - if (command_type == CMD_INSERT || command_type == CMD_UPDATE) - tlist = expand_targetlist(tlist, command_type, - result_relation, range_table); - - /* - * for "update" and "delete" queries, add ctid of the result relation - * into the target list so that the ctid will propagate through - * execution and ExecutePlan() will be able to identify the right - * tuple to replace or delete. This extra field is marked "junk" so - * that it is not stored back into the tuple. - */ - if (command_type == CMD_UPDATE || command_type == CMD_DELETE) - { - Resdom *resdom; - Var *var; - - resdom = makeResdom(length(tlist) + 1, - TIDOID, - -1, - pstrdup("ctid"), - true); - - var = makeVar(result_relation, SelfItemPointerAttributeNumber, - TIDOID, -1, 0); - - /* - * For an UPDATE, expand_targetlist already created a fresh tlist. - * For DELETE, better do a listCopy so that we don't destructively - * modify the original tlist (is this really necessary?). - */ - if (command_type == CMD_DELETE) - tlist = listCopy(tlist); - - tlist = lappend(tlist, makeTargetEntry(resdom, (Node *) var)); - } - - return tlist; -} - -/***************************************************************************** - * - * TARGETLIST EXPANSION - * - *****************************************************************************/ - -/* - * expand_targetlist - * Given a target list as generated by the parser and a result relation, - * add targetlist entries for any missing attributes, and ensure the - * non-junk attributes appear in proper field order. - * - * NOTE: if you are tempted to put more processing here, consider whether - * it shouldn't go in the rewriter's rewriteTargetList() instead. - */ -static List * -expand_targetlist(List *tlist, int command_type, - Index result_relation, List *range_table) -{ - List *new_tlist = NIL; - Relation rel; - int attrno, - numattrs; - - /* - * The rewriter should have already ensured that the TLEs are in - * correct order; but we have to insert TLEs for any missing attributes. - * - * Scan the tuple description in the relation's relcache entry to make - * sure we have all the user attributes in the right order. - */ - rel = heap_open(getrelid(result_relation, range_table), AccessShareLock); - - numattrs = RelationGetNumberOfAttributes(rel); - - for (attrno = 1; attrno <= numattrs; attrno++) - { - Form_pg_attribute att_tup = rel->rd_att->attrs[attrno - 1]; - TargetEntry *new_tle = NULL; - - if (tlist != NIL) - { - TargetEntry *old_tle = (TargetEntry *) lfirst(tlist); - Resdom *resdom = old_tle->resdom; - - if (!resdom->resjunk && resdom->resno == attrno) - { - Assert(strcmp(resdom->resname, - NameStr(att_tup->attname)) == 0); - new_tle = old_tle; - tlist = lnext(tlist); - } - } - - if (new_tle == NULL) - { - /* - * Didn't find a matching tlist entry, so make one. - * - * For INSERT, generate a NULL constant. (We assume the - * rewriter would have inserted any available default value.) - * - * For UPDATE, generate a Var reference to the existing value of - * the attribute, so that it gets copied to the new tuple. - */ - Oid atttype = att_tup->atttypid; - int32 atttypmod = att_tup->atttypmod; - Node *new_expr; - - switch (command_type) - { - case CMD_INSERT: - new_expr = (Node *) makeConst(atttype, - att_tup->attlen, - (Datum) 0, - true, /* isnull */ - att_tup->attbyval, - false, /* not a set */ - false); - break; - case CMD_UPDATE: - new_expr = (Node *) makeVar(result_relation, - attrno, - atttype, - atttypmod, - 0); - break; - default: - elog(ERROR, "expand_targetlist: unexpected command_type"); - new_expr = NULL; /* keep compiler quiet */ - break; - } - - new_tle = makeTargetEntry(makeResdom(attrno, - atttype, - atttypmod, - pstrdup(NameStr(att_tup->attname)), - false), - new_expr); - } - - new_tlist = lappend(new_tlist, new_tle); - } - - /* - * The remaining tlist entries should be resjunk; append them all to - * the end of the new tlist, making sure they have resnos higher than - * the last real attribute. (Note: although the rewriter already did - * such renumbering, we have to do it again here in case we are doing - * an UPDATE in an inheritance child table with more columns.) - */ - while (tlist) - { - TargetEntry *old_tle = (TargetEntry *) lfirst(tlist); - Resdom *resdom = old_tle->resdom; - - if (!resdom->resjunk) - elog(ERROR, "expand_targetlist: targetlist is not sorted correctly"); - /* Get the resno right, but don't copy unnecessarily */ - if (resdom->resno != attrno) - { - resdom = (Resdom *) copyObject((Node *) resdom); - resdom->resno = attrno; - old_tle = makeTargetEntry(resdom, old_tle->expr); - } - new_tlist = lappend(new_tlist, old_tle); - attrno++; - tlist = lnext(tlist); - } - - heap_close(rel, AccessShareLock); - - return new_tlist; -} diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c deleted file mode 100644 index 367ef4a58a7..00000000000 --- a/src/backend/optimizer/prep/prepunion.c +++ /dev/null @@ -1,886 +0,0 @@ -/*------------------------------------------------------------------------- - * - * prepunion.c - * Routines to plan set-operation queries. The filename is a leftover - * from a time when only UNIONs were implemented. - * - * There is also some code here to support planning of queries that use - * inheritance (SELECT FROM foo*). This no longer has much connection - * to the processing of UNION queries, but it's still here. - * - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.74 2002/06/20 20:29:31 momjian Exp $ - * - *------------------------------------------------------------------------- - */ -#include "postgres.h" - -#include <sys/types.h> - -#include "catalog/pg_type.h" -#include "nodes/makefuncs.h" -#include "optimizer/clauses.h" -#include "optimizer/plancat.h" -#include "optimizer/planmain.h" -#include "optimizer/planner.h" -#include "optimizer/prep.h" -#include "optimizer/tlist.h" -#include "parser/parse_clause.h" -#include "parser/parse_coerce.h" -#include "parser/parsetree.h" -#include "utils/lsyscache.h" - -/* macros borrowed from expression_tree_mutator */ - -#define FLATCOPY(newnode, node, nodetype) \ - ( (newnode) = makeNode(nodetype), \ - memcpy((newnode), (node), sizeof(nodetype)) ) - -typedef struct -{ - Index old_rt_index; - Index new_rt_index; - Oid old_relid; - Oid new_relid; -} adjust_inherited_attrs_context; - -static Plan *recurse_set_operations(Node *setOp, Query *parse, - List *colTypes, bool junkOK, - int flag, List *refnames_tlist); -static Plan *generate_union_plan(SetOperationStmt *op, Query *parse, - List *refnames_tlist); -static Plan *generate_nonunion_plan(SetOperationStmt *op, Query *parse, - List *refnames_tlist); -static List *recurse_union_children(Node *setOp, Query *parse, - SetOperationStmt *top_union, - List *refnames_tlist); -static List *generate_setop_tlist(List *colTypes, int flag, - bool hack_constants, - List *input_tlist, - List *refnames_tlist); -static List *generate_append_tlist(List *colTypes, bool flag, - List *input_plans, - List *refnames_tlist); -static bool tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK); -static Node *adjust_inherited_attrs_mutator(Node *node, - adjust_inherited_attrs_context *context); - - -/* - * plan_set_operations - * - * Plans the queries for a tree of set operations (UNION/INTERSECT/EXCEPT) - * - * This routine only deals with the setOperations tree of the given query. - * Any top-level ORDER BY requested in parse->sortClause will be added - * when we return to grouping_planner. - */ -Plan * -plan_set_operations(Query *parse) -{ - SetOperationStmt *topop = (SetOperationStmt *) parse->setOperations; - Node *node; - Query *leftmostQuery; - - Assert(topop && IsA(topop, SetOperationStmt)); - - /* - * Find the leftmost component Query. We need to use its column names - * for all generated tlists (else SELECT INTO won't work right). - */ - node = topop->larg; - while (node && IsA(node, SetOperationStmt)) - node = ((SetOperationStmt *) node)->larg; - Assert(node && IsA(node, RangeTblRef)); - leftmostQuery = rt_fetch(((RangeTblRef *) node)->rtindex, - parse->rtable)->subquery; - Assert(leftmostQuery != NULL); - - /* - * Recurse on setOperations tree to generate plans for set ops. The - * final output plan should have just the column types shown as the - * output from the top-level node, plus possibly a resjunk working - * column (we can rely on upper-level nodes to deal with that). - */ - return recurse_set_operations((Node *) topop, parse, - topop->colTypes, true, -1, - leftmostQuery->targetList); -} - -/* - * recurse_set_operations - * Recursively handle one step in a tree of set operations - * - * colTypes: integer list of type OIDs of expected output columns - * junkOK: if true, child resjunk columns may be left in the result - * flag: if >= 0, add a resjunk output column indicating value of flag - * refnames_tlist: targetlist to take column names from - */ -static Plan * -recurse_set_operations(Node *setOp, Query *parse, - List *colTypes, bool junkOK, - int flag, List *refnames_tlist) -{ - if (IsA(setOp, RangeTblRef)) - { - RangeTblRef *rtr = (RangeTblRef *) setOp; - RangeTblEntry *rte = rt_fetch(rtr->rtindex, parse->rtable); - Query *subquery = rte->subquery; - Plan *subplan, - *plan; - - Assert(subquery != NULL); - - /* - * Generate plan for primitive subquery - */ - subplan = subquery_planner(subquery, - -1.0 /* default case */ ); - - /* - * Add a SubqueryScan with the caller-requested targetlist - */ - plan = (Plan *) - make_subqueryscan(generate_setop_tlist(colTypes, flag, true, - subplan->targetlist, - refnames_tlist), - NIL, - rtr->rtindex, - subplan); - return plan; - } - else if (IsA(setOp, SetOperationStmt)) - { - SetOperationStmt *op = (SetOperationStmt *) setOp; - Plan *plan; - - /* UNIONs are much different from INTERSECT/EXCEPT */ - if (op->op == SETOP_UNION) - plan = generate_union_plan(op, parse, refnames_tlist); - else - plan = generate_nonunion_plan(op, parse, refnames_tlist); - - /* - * If necessary, add a Result node to project the caller-requested - * output columns. - * - * XXX you don't really want to know about this: setrefs.c will apply - * replace_vars_with_subplan_refs() to the Result node's tlist. - * This would fail if the Vars generated by generate_setop_tlist() - * were not exactly equal() to the corresponding tlist entries of - * the subplan. However, since the subplan was generated by - * generate_union_plan() or generate_nonunion_plan(), and hence its - * tlist was generated by generate_append_tlist(), this will work. - */ - if (flag >= 0 || - !tlist_same_datatypes(plan->targetlist, colTypes, junkOK)) - { - plan = (Plan *) - make_result(generate_setop_tlist(colTypes, flag, false, - plan->targetlist, - refnames_tlist), - NULL, - plan); - } - return plan; - } - else - { - elog(ERROR, "recurse_set_operations: unexpected node %d", - (int) nodeTag(setOp)); - return NULL; /* keep compiler quiet */ - } -} - -/* - * Generate plan for a UNION or UNION ALL node - */ -static Plan * -generate_union_plan(SetOperationStmt *op, Query *parse, - List *refnames_tlist) -{ - List *planlist; - List *tlist; - Plan *plan; - - /* - * If any of my children are identical UNION nodes (same op, all-flag, - * and colTypes) then they can be merged into this node so that we - * generate only one Append and Sort for the lot. Recurse to find - * such nodes and compute their children's plans. - */ - planlist = nconc(recurse_union_children(op->larg, parse, - op, refnames_tlist), - recurse_union_children(op->rarg, parse, - op, refnames_tlist)); - - /* - * Generate tlist for Append plan node. - * - * The tlist for an Append plan isn't important as far as the Append is - * concerned, but we must make it look real anyway for the benefit of - * the next plan level up. - */ - tlist = generate_append_tlist(op->colTypes, false, - planlist, refnames_tlist); - - /* - * Append the child results together. - */ - plan = (Plan *) make_append(planlist, false, tlist); - - /* - * For UNION ALL, we just need the Append plan. For UNION, need to - * add Sort and Unique nodes to produce unique output. - */ - if (!op->all) - { - List *sortList; - - tlist = new_unsorted_tlist(tlist); - sortList = addAllTargetsToSortList(NIL, tlist); - plan = make_sortplan(parse, tlist, plan, sortList); - plan = (Plan *) make_unique(tlist, plan, copyObject(sortList)); - } - return plan; -} - -/* - * Generate plan for an INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL node - */ -static Plan * -generate_nonunion_plan(SetOperationStmt *op, Query *parse, - List *refnames_tlist) -{ - Plan *lplan, - *rplan, - *plan; - List *tlist, - *sortList, - *planlist; - SetOpCmd cmd; - - /* Recurse on children, ensuring their outputs are marked */ - lplan = recurse_set_operations(op->larg, parse, - op->colTypes, false, 0, - refnames_tlist); - rplan = recurse_set_operations(op->rarg, parse, - op->colTypes, false, 1, - refnames_tlist); - planlist = makeList2(lplan, rplan); - - /* - * Generate tlist for Append plan node. - * - * The tlist for an Append plan isn't important as far as the Append is - * concerned, but we must make it look real anyway for the benefit of - * the next plan level up. In fact, it has to be real enough that the - * flag column is shown as a variable not a constant, else setrefs.c - * will get confused. - */ - tlist = generate_append_tlist(op->colTypes, true, - planlist, refnames_tlist); - - /* - * Append the child results together. - */ - plan = (Plan *) make_append(planlist, false, tlist); - - /* - * Sort the child results, then add a SetOp plan node to generate the - * correct output. - */ - tlist = new_unsorted_tlist(tlist); - sortList = addAllTargetsToSortList(NIL, tlist); - plan = make_sortplan(parse, tlist, plan, sortList); - switch (op->op) - { - case SETOP_INTERSECT: - cmd = op->all ? SETOPCMD_INTERSECT_ALL : SETOPCMD_INTERSECT; - break; - case SETOP_EXCEPT: - cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT; - break; - default: - elog(ERROR, "generate_nonunion_plan: bogus operation code"); - cmd = SETOPCMD_INTERSECT; /* keep compiler quiet */ - break; - } - plan = (Plan *) make_setop(cmd, tlist, plan, sortList, - length(op->colTypes) + 1); - return plan; -} - -/* - * Pull up children of a UNION node that are identically-propertied UNIONs. - * - * NOTE: we can also pull a UNION ALL up into a UNION, since the distinct - * output rows will be lost anyway. - */ -static List * -recurse_union_children(Node *setOp, Query *parse, - SetOperationStmt *top_union, - List *refnames_tlist) -{ - if (IsA(setOp, SetOperationStmt)) - { - SetOperationStmt *op = (SetOperationStmt *) setOp; - - if (op->op == top_union->op && - (op->all == top_union->all || op->all) && - equali(op->colTypes, top_union->colTypes)) - { - /* Same UNION, so fold children into parent's subplan list */ - return nconc(recurse_union_children(op->larg, parse, - top_union, - refnames_tlist), - recurse_union_children(op->rarg, parse, - top_union, - refnames_tlist)); - } - } - - /* - * Not same, so plan this child separately. - * - * Note we disallow any resjunk columns in child results. This is - * necessary since the Append node that implements the union won't do - * any projection, and upper levels will get confused if some of our - * output tuples have junk and some don't. This case only arises when - * we have an EXCEPT or INTERSECT as child, else there won't be - * resjunk anyway. - */ - return makeList1(recurse_set_operations(setOp, parse, - top_union->colTypes, false, - -1, refnames_tlist)); -} - -/* - * Generate targetlist for a set-operation plan node - * - * colTypes: column datatypes for non-junk columns - * flag: -1 if no flag column needed, 0 or 1 to create a const flag column - * hack_constants: true to copy up constants (see comments in code) - * input_tlist: targetlist of this node's input node - * refnames_tlist: targetlist to take column names from - */ -static List * -generate_setop_tlist(List *colTypes, int flag, - bool hack_constants, - List *input_tlist, - List *refnames_tlist) -{ - List *tlist = NIL; - int resno = 1; - List *i; - Resdom *resdom; - Node *expr; - - foreach(i, colTypes) - { - Oid colType = (Oid) lfirsti(i); - TargetEntry *inputtle = (TargetEntry *) lfirst(input_tlist); - TargetEntry *reftle = (TargetEntry *) lfirst(refnames_tlist); - int32 colTypmod; - - Assert(inputtle->resdom->resno == resno); - Assert(reftle->resdom->resno == resno); - Assert(!inputtle->resdom->resjunk); - Assert(!reftle->resdom->resjunk); - - /* - * Generate columns referencing input columns and having - * appropriate data types and column names. Insert datatype - * coercions where necessary. - * - * HACK: constants in the input's targetlist are copied up as-is - * rather than being referenced as subquery outputs. This is - * mainly to ensure that when we try to coerce them to the output - * column's datatype, the right things happen for UNKNOWN - * constants. But do this only at the first level of - * subquery-scan plans; we don't want phony constants appearing in - * the output tlists of upper-level nodes! - */ - if (hack_constants && inputtle->expr && IsA(inputtle->expr, Const)) - expr = inputtle->expr; - else - expr = (Node *) makeVar(0, - inputtle->resdom->resno, - inputtle->resdom->restype, - inputtle->resdom->restypmod, - 0); - if (inputtle->resdom->restype == colType) - { - /* no coercion needed, and believe the input typmod */ - colTypmod = inputtle->resdom->restypmod; - } - else - { - expr = coerce_to_common_type(NULL, - expr, - colType, - "UNION/INTERSECT/EXCEPT"); - colTypmod = -1; - } - resdom = makeResdom((AttrNumber) resno++, - colType, - colTypmod, - pstrdup(reftle->resdom->resname), - false); - tlist = lappend(tlist, makeTargetEntry(resdom, expr)); - input_tlist = lnext(input_tlist); - refnames_tlist = lnext(refnames_tlist); - } - - if (flag >= 0) - { - /* Add a resjunk flag column */ - resdom = makeResdom((AttrNumber) resno++, - INT4OID, - -1, - pstrdup("flag"), - true); - /* flag value is the given constant */ - expr = (Node *) makeConst(INT4OID, - sizeof(int4), - Int32GetDatum(flag), - false, - true, - false, - false); - tlist = lappend(tlist, makeTargetEntry(resdom, expr)); - } - - return tlist; -} - -/* - * Generate targetlist for a set-operation Append node - * - * colTypes: column datatypes for non-junk columns - * flag: true to create a flag column copied up from subplans - * input_plans: list of sub-plans of the Append - * refnames_tlist: targetlist to take column names from - * - * The entries in the Append's targetlist should always be simple Vars; - * we just have to make sure they have the right datatypes and typmods. - */ -static List * -generate_append_tlist(List *colTypes, bool flag, - List *input_plans, - List *refnames_tlist) -{ - List *tlist = NIL; - int resno = 1; - List *curColType; - int colindex; - Resdom *resdom; - Node *expr; - List *planl; - int32 *colTypmods; - - /* - * First extract typmods to use. - * - * If the inputs all agree on type and typmod of a particular column, - * use that typmod; else use -1. - */ - colTypmods = (int32 *) palloc(length(colTypes) * sizeof(int32)); - - foreach(planl, input_plans) - { - Plan *subplan = (Plan *) lfirst(planl); - List *subtlist; - - curColType = colTypes; - colindex = 0; - foreach(subtlist, subplan->targetlist) - { - TargetEntry *subtle = (TargetEntry *) lfirst(subtlist); - - if (subtle->resdom->resjunk) - continue; - Assert(curColType != NIL); - if (subtle->resdom->restype == (Oid) lfirsti(curColType)) - { - /* If first subplan, copy the typmod; else compare */ - if (planl == input_plans) - colTypmods[colindex] = subtle->resdom->restypmod; - else if (subtle->resdom->restypmod != colTypmods[colindex]) - colTypmods[colindex] = -1; - } - else - { - /* types disagree, so force typmod to -1 */ - colTypmods[colindex] = -1; - } - curColType = lnext(curColType); - colindex++; - } - Assert(curColType == NIL); - } - - /* - * Now we can build the tlist for the Append. - */ - colindex = 0; - foreach(curColType, colTypes) - { - Oid colType = (Oid) lfirsti(curColType); - int32 colTypmod = colTypmods[colindex++]; - TargetEntry *reftle = (TargetEntry *) lfirst(refnames_tlist); - - Assert(reftle->resdom->resno == resno); - Assert(!reftle->resdom->resjunk); - expr = (Node *) makeVar(0, - resno, - colType, - colTypmod, - 0); - resdom = makeResdom((AttrNumber) resno++, - colType, - colTypmod, - pstrdup(reftle->resdom->resname), - false); - tlist = lappend(tlist, makeTargetEntry(resdom, expr)); - refnames_tlist = lnext(refnames_tlist); - } - - if (flag) - { - /* Add a resjunk flag column */ - resdom = makeResdom((AttrNumber) resno++, - INT4OID, - -1, - pstrdup("flag"), - true); - /* flag value is shown as copied up from subplan */ - expr = (Node *) makeVar(0, - resdom->resno, - INT4OID, - -1, - 0); - tlist = lappend(tlist, makeTargetEntry(resdom, expr)); - } - - pfree(colTypmods); - - return tlist; -} - -/* - * Does tlist have same datatypes as requested colTypes? - * - * Resjunk columns are ignored if junkOK is true; otherwise presence of - * a resjunk column will always cause a 'false' result. - */ -static bool -tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK) -{ - List *i; - - foreach(i, tlist) - { - TargetEntry *tle = (TargetEntry *) lfirst(i); - - if (tle->resdom->resjunk) - { - if (!junkOK) - return false; - } - else - { - if (colTypes == NIL) - return false; - if (tle->resdom->restype != (Oid) lfirsti(colTypes)) - return false; - colTypes = lnext(colTypes); - } - } - if (colTypes != NIL) - return false; - return true; -} - - -/* - * find_all_inheritors - - * Returns an integer list of relids including the given rel plus - * all relations that inherit from it, directly or indirectly. - */ -List * -find_all_inheritors(Oid parentrel) -{ - List *examined_relids = NIL; - List *unexamined_relids = makeListi1(parentrel); - - /* - * While the queue of unexamined relids is nonempty, remove the first - * element, mark it examined, and find its direct descendants. NB: - * cannot use foreach(), since we modify the queue inside loop. - */ - while (unexamined_relids != NIL) - { - Oid currentrel = lfirsti(unexamined_relids); - List *currentchildren; - - unexamined_relids = lnext(unexamined_relids); - examined_relids = lappendi(examined_relids, currentrel); - currentchildren = find_inheritance_children(currentrel); - - /* - * Add to the queue only those children not already seen. This - * avoids making duplicate entries in case of multiple inheritance - * paths from the same parent. (It'll also keep us from getting - * into an infinite loop, though theoretically there can't be any - * cycles in the inheritance graph anyway.) - */ - currentchildren = set_differencei(currentchildren, examined_relids); - unexamined_relids = set_unioni(unexamined_relids, currentchildren); - } - - return examined_relids; -} - -/* - * expand_inherted_rtentry - * Check whether a rangetable entry represents an inheritance set. - * If so, add entries for all the child tables to the query's - * rangetable, and return an integer list of RT indexes for the - * whole inheritance set (parent and children). - * If not, return NIL. - * - * When dup_parent is false, the initially given RT index is part of the - * returned list (if any). When dup_parent is true, the given RT index - * is *not* in the returned list; a duplicate RTE will be made for the - * parent table. - * - * A childless table is never considered to be an inheritance set; therefore - * the result will never be a one-element list. It'll be either empty - * or have two or more elements. - * - * NOTE: after this routine executes, the specified RTE will always have - * its inh flag cleared, whether or not there were any children. This - * ensures we won't expand the same RTE twice, which would otherwise occur - * for the case of an inherited UPDATE/DELETE target relation. - */ -List * -expand_inherted_rtentry(Query *parse, Index rti, bool dup_parent) -{ - RangeTblEntry *rte = rt_fetch(rti, parse->rtable); - Oid parentOID; - List *inhOIDs; - List *inhRTIs; - List *l; - - /* Does RT entry allow inheritance? */ - if (!rte->inh) - return NIL; - Assert(rte->rtekind == RTE_RELATION); - /* Always clear the parent's inh flag, see above comments */ - rte->inh = false; - /* Fast path for common case of childless table */ - parentOID = rte->relid; - if (!has_subclass(parentOID)) - return NIL; - /* Scan for all members of inheritance set */ - inhOIDs = find_all_inheritors(parentOID); - - /* - * Check that there's at least one descendant, else treat as no-child - * case. This could happen despite above has_subclass() check, if - * table once had a child but no longer does. - */ - if (lnext(inhOIDs) == NIL) - return NIL; - /* OK, it's an inheritance set; expand it */ - if (dup_parent) - inhRTIs = NIL; - else - inhRTIs = makeListi1(rti); /* include original RTE in result */ - - foreach(l, inhOIDs) - { - Oid childOID = (Oid) lfirsti(l); - RangeTblEntry *childrte; - Index childRTindex; - - /* parent will be in the list too; skip it if not dup requested */ - if (childOID == parentOID && !dup_parent) - continue; - - /* - * Build an RTE for the child, and attach to query's rangetable - * list. We copy most fields of the parent's RTE, but replace - * relation real name and OID. Note that inh will be false at - * this point. - */ - childrte = copyObject(rte); - childrte->relid = childOID; - parse->rtable = lappend(parse->rtable, childrte); - childRTindex = length(parse->rtable); - - inhRTIs = lappendi(inhRTIs, childRTindex); - } - - return inhRTIs; -} - -/* - * adjust_inherited_attrs - * Copy the specified query or expression and translate Vars referring - * to old_rt_index to refer to new_rt_index. - * - * We also adjust varattno to match the new table by column name, rather - * than column number. This hack makes it possible for child tables to have - * different column positions for the "same" attribute as a parent, which - * helps ALTER TABLE ADD COLUMN. Unfortunately this isn't nearly enough to - * make it work transparently; there are other places where things fall down - * if children and parents don't have the same column numbers for inherited - * attributes. It'd be better to rip this code out and fix ALTER TABLE... - */ -Node * -adjust_inherited_attrs(Node *node, - Index old_rt_index, Oid old_relid, - Index new_rt_index, Oid new_relid) -{ - adjust_inherited_attrs_context context; - - /* Handle simple case simply... */ - if (old_rt_index == new_rt_index) - { - Assert(old_relid == new_relid); - return copyObject(node); - } - - context.old_rt_index = old_rt_index; - context.new_rt_index = new_rt_index; - context.old_relid = old_relid; - context.new_relid = new_relid; - - /* - * Must be prepared to start with a Query or a bare expression tree. - */ - if (node && IsA(node, Query)) - { - Query *query = (Query *) node; - Query *newnode; - - FLATCOPY(newnode, query, Query); - if (newnode->resultRelation == old_rt_index) - newnode->resultRelation = new_rt_index; - query_tree_mutator(newnode, adjust_inherited_attrs_mutator, - (void *) &context, false); - return (Node *) newnode; - } - else - return adjust_inherited_attrs_mutator(node, &context); -} - -static Node * -adjust_inherited_attrs_mutator(Node *node, - adjust_inherited_attrs_context *context) -{ - if (node == NULL) - return NULL; - if (IsA(node, Var)) - { - Var *var = (Var *) copyObject(node); - - if (var->varlevelsup == 0 && - var->varno == context->old_rt_index) - { - var->varno = context->new_rt_index; - if (var->varattno > 0) - var->varattno = get_attnum(context->new_relid, - get_attname(context->old_relid, - var->varattno)); - } - return (Node *) var; - } - if (IsA(node, RangeTblRef)) - { - RangeTblRef *rtr = (RangeTblRef *) copyObject(node); - - if (rtr->rtindex == context->old_rt_index) - rtr->rtindex = context->new_rt_index; - return (Node *) rtr; - } - if (IsA(node, JoinExpr)) - { - /* Copy the JoinExpr node with correct mutation of subnodes */ - JoinExpr *j; - - j = (JoinExpr *) expression_tree_mutator(node, - adjust_inherited_attrs_mutator, - (void *) context); - /* now fix JoinExpr's rtindex */ - if (j->rtindex == context->old_rt_index) - j->rtindex = context->new_rt_index; - return (Node *) j; - } - - /* - * We have to process RestrictInfo nodes specially: we do NOT want to - * copy the original subclauseindices list, since the new rel may have - * different indices. The list will be rebuilt during later planning. - */ - if (IsA(node, RestrictInfo)) - { - RestrictInfo *oldinfo = (RestrictInfo *) node; - RestrictInfo *newinfo = makeNode(RestrictInfo); - - /* Copy all flat-copiable fields */ - memcpy(newinfo, oldinfo, sizeof(RestrictInfo)); - - newinfo->clause = (Expr *) - adjust_inherited_attrs_mutator((Node *) oldinfo->clause, context); - - newinfo->subclauseindices = NIL; - newinfo->eval_cost = -1; /* reset these too */ - newinfo->this_selec = -1; - newinfo->left_pathkey = NIL; /* and these */ - newinfo->right_pathkey = NIL; - newinfo->left_mergescansel = -1; - newinfo->right_mergescansel = -1; - newinfo->left_bucketsize = -1; - newinfo->right_bucketsize = -1; - - return (Node *) newinfo; - } - - /* - * NOTE: we do not need to recurse into sublinks, because they should - * already have been converted to subplans before we see them. - */ - - /* - * BUT: although we don't need to recurse into subplans, we do need to - * make sure that they are copied, not just referenced as - * expression_tree_mutator will do by default. Otherwise we'll have - * the same subplan node referenced from each arm of the inheritance - * APPEND plan, which will cause trouble in the executor. This is a - * kluge that should go away when we redesign querytrees. - */ - if (is_subplan(node)) - { - SubPlan *subplan; - - /* Copy the node and process subplan args */ - node = expression_tree_mutator(node, adjust_inherited_attrs_mutator, - (void *) context); - /* Make sure we have separate copies of subplan and its rtable */ - subplan = (SubPlan *) ((Expr *) node)->oper; - subplan->plan = copyObject(subplan->plan); - subplan->rtable = copyObject(subplan->rtable); - return node; - } - - return expression_tree_mutator(node, adjust_inherited_attrs_mutator, - (void *) context); -} diff --git a/src/backend/optimizer/util/Makefile b/src/backend/optimizer/util/Makefile deleted file mode 100644 index 471cfdf6b9b..00000000000 --- a/src/backend/optimizer/util/Makefile +++ /dev/null @@ -1,31 +0,0 @@ -#------------------------------------------------------------------------- -# -# Makefile-- -# Makefile for optimizer/util -# -# IDENTIFICATION -# $Header: /cvsroot/pgsql/src/backend/optimizer/util/Makefile,v 1.14 2000/09/29 18:21:23 tgl Exp $ -# -#------------------------------------------------------------------------- - -subdir = src/backend/optimizer/util -top_builddir = ../../../.. -include $(top_builddir)/src/Makefile.global - -OBJS = restrictinfo.o clauses.o plancat.o \ - joininfo.o pathnode.o relnode.o tlist.o var.o - -all: SUBSYS.o - -SUBSYS.o: $(OBJS) - $(LD) $(LDREL) $(LDOUT) SUBSYS.o $(OBJS) - -depend dep: - $(CC) -MM $(CFLAGS) *.c >depend - -clean: - rm -f SUBSYS.o $(OBJS) - -ifeq (depend,$(wildcard depend)) -include depend -endif diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c deleted file mode 100644 index 366a23c5cd0..00000000000 --- a/src/backend/optimizer/util/clauses.c +++ /dev/null @@ -1,2324 +0,0 @@ -/*------------------------------------------------------------------------- - * - * clauses.c - * routines to manipulate qualification clauses - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.101 2002/06/20 20:29:31 momjian Exp $ - * - * HISTORY - * AUTHOR DATE MAJOR EVENT - * Andrew Yu Nov 3, 1994 clause.c and clauses.c combined - * - *------------------------------------------------------------------------- - */ - -#include "postgres.h" - -#include "catalog/pg_operator.h" -#include "catalog/pg_proc.h" -#include "catalog/pg_type.h" -#include "executor/executor.h" -#include "nodes/makefuncs.h" -#include "nodes/nodeFuncs.h" -#include "optimizer/clauses.h" -#include "optimizer/tlist.h" -#include "optimizer/var.h" -#include "parser/parsetree.h" -#include "utils/datum.h" -#include "utils/lsyscache.h" -#include "utils/syscache.h" - - -/* note that pg_type.h hardwires size of bool as 1 ... duplicate it */ -#define MAKEBOOLCONST(val,isnull) \ - ((Node *) makeConst(BOOLOID, 1, (Datum) (val), \ - (isnull), true, false, false)) - -typedef struct -{ - Query *query; - List *groupClauses; -} check_subplans_for_ungrouped_vars_context; - -static bool contain_agg_clause_walker(Node *node, void *context); -static bool pull_agg_clause_walker(Node *node, List **listptr); -static bool expression_returns_set_walker(Node *node, void *context); -static bool contain_subplans_walker(Node *node, void *context); -static bool pull_subplans_walker(Node *node, List **listptr); -static bool check_subplans_for_ungrouped_vars_walker(Node *node, - check_subplans_for_ungrouped_vars_context * context); -static bool contain_mutable_functions_walker(Node *node, void *context); -static bool contain_volatile_functions_walker(Node *node, void *context); -static Node *eval_const_expressions_mutator(Node *node, void *context); -static Expr *simplify_op_or_func(Expr *expr, List *args); - - -Expr * -make_clause(int type, Node *oper, List *args) -{ - Expr *expr = makeNode(Expr); - - switch (type) - { - case AND_EXPR: - case OR_EXPR: - case NOT_EXPR: - expr->typeOid = BOOLOID; - break; - case OP_EXPR: - expr->typeOid = ((Oper *) oper)->opresulttype; - break; - case FUNC_EXPR: - expr->typeOid = ((Func *) oper)->funcresulttype; - break; - default: - elog(ERROR, "make_clause: unsupported type %d", type); - break; - } - expr->opType = type; - expr->oper = oper; /* ignored for AND, OR, NOT */ - expr->args = args; - return expr; -} - - -/***************************************************************************** - * OPERATOR clause functions - *****************************************************************************/ - - -/* - * is_opclause - * - * Returns t iff the clause is an operator clause: - * (op expr expr) or (op expr). - * - * [historical note: is_clause has the exact functionality and is used - * throughout the code. They're renamed to is_opclause for clarity. - * - ay 10/94.] - */ -bool -is_opclause(Node *clause) -{ - return (clause != NULL && - IsA(clause, Expr) && - ((Expr *) clause)->opType == OP_EXPR); -} - -/* - * make_opclause - * Creates a clause given its operator left operand and right - * operand (if it is non-null). - * - */ -Expr * -make_opclause(Oper *op, Var *leftop, Var *rightop) -{ - Expr *expr = makeNode(Expr); - - expr->typeOid = op->opresulttype; - expr->opType = OP_EXPR; - expr->oper = (Node *) op; - if (rightop) - expr->args = makeList2(leftop, rightop); - else - expr->args = makeList1(leftop); - return expr; -} - -/* - * get_leftop - * - * Returns the left operand of a clause of the form (op expr expr) - * or (op expr) - * - * NB: for historical reasons, the result is declared Var *, even - * though many callers can cope with results that are not Vars. - * The result really ought to be declared Expr * or Node *. - */ -Var * -get_leftop(Expr *clause) -{ - if (clause->args != NULL) - return lfirst(clause->args); - else - return NULL; -} - -/* - * get_rightop - * - * Returns the right operand in a clause of the form (op expr expr). - * NB: result will be NULL if applied to a unary op clause. - */ -Var * -get_rightop(Expr *clause) -{ - if (clause->args != NULL && lnext(clause->args) != NULL) - return lfirst(lnext(clause->args)); - else - return NULL; -} - -/***************************************************************************** - * FUNC clause functions - *****************************************************************************/ - -/* - * is_funcclause - * - * Returns t iff the clause is a function clause: (func { expr }). - * - */ -bool -is_funcclause(Node *clause) -{ - return (clause != NULL && - IsA(clause, Expr) && - ((Expr *) clause)->opType == FUNC_EXPR); -} - -/* - * make_funcclause - * - * Creates a function clause given the FUNC node and the functional - * arguments. - * - */ -Expr * -make_funcclause(Func *func, List *funcargs) -{ - Expr *expr = makeNode(Expr); - - expr->typeOid = func->funcresulttype; - expr->opType = FUNC_EXPR; - expr->oper = (Node *) func; - expr->args = funcargs; - return expr; -} - -/***************************************************************************** - * OR clause functions - *****************************************************************************/ - -/* - * or_clause - * - * Returns t iff the clause is an 'or' clause: (OR { expr }). - * - */ -bool -or_clause(Node *clause) -{ - return (clause != NULL && - IsA(clause, Expr) && - ((Expr *) clause)->opType == OR_EXPR); -} - -/* - * make_orclause - * - * Creates an 'or' clause given a list of its subclauses. - * - */ -Expr * -make_orclause(List *orclauses) -{ - Expr *expr = makeNode(Expr); - - expr->typeOid = BOOLOID; - expr->opType = OR_EXPR; - expr->oper = NULL; - expr->args = orclauses; - return expr; -} - -/***************************************************************************** - * NOT clause functions - *****************************************************************************/ - -/* - * not_clause - * - * Returns t iff this is a 'not' clause: (NOT expr). - * - */ -bool -not_clause(Node *clause) -{ - return (clause != NULL && - IsA(clause, Expr) && - ((Expr *) clause)->opType == NOT_EXPR); -} - -/* - * make_notclause - * - * Create a 'not' clause given the expression to be negated. - * - */ -Expr * -make_notclause(Expr *notclause) -{ - Expr *expr = makeNode(Expr); - - expr->typeOid = BOOLOID; - expr->opType = NOT_EXPR; - expr->oper = NULL; - expr->args = makeList1(notclause); - return expr; -} - -/* - * get_notclausearg - * - * Retrieve the clause within a 'not' clause - * - */ -Expr * -get_notclausearg(Expr *notclause) -{ - return lfirst(notclause->args); -} - -/***************************************************************************** - * AND clause functions - *****************************************************************************/ - - -/* - * and_clause - * - * Returns t iff its argument is an 'and' clause: (AND { expr }). - * - */ -bool -and_clause(Node *clause) -{ - return (clause != NULL && - IsA(clause, Expr) && - ((Expr *) clause)->opType == AND_EXPR); -} - -/* - * make_andclause - * - * Create an 'and' clause given its arguments in a list. - */ -Expr * -make_andclause(List *andclauses) -{ - Expr *expr = makeNode(Expr); - - expr->typeOid = BOOLOID; - expr->opType = AND_EXPR; - expr->oper = NULL; - expr->args = andclauses; - return expr; -} - -/* - * make_and_qual - * - * Variant of make_andclause for ANDing two qual conditions together. - * Qual conditions have the property that a NULL nodetree is interpreted - * as 'true'. - */ -Node * -make_and_qual(Node *qual1, Node *qual2) -{ - if (qual1 == NULL) - return qual2; - if (qual2 == NULL) - return qual1; - return (Node *) make_andclause(makeList2(qual1, qual2)); -} - -/* - * Sometimes (such as in the result of canonicalize_qual or the input of - * ExecQual), we use lists of expression nodes with implicit AND semantics. - * - * These functions convert between an AND-semantics expression list and the - * ordinary representation of a boolean expression. - * - * Note that an empty list is considered equivalent to TRUE. - */ -Expr * -make_ands_explicit(List *andclauses) -{ - if (andclauses == NIL) - return (Expr *) MAKEBOOLCONST(true, false); - else if (lnext(andclauses) == NIL) - return (Expr *) lfirst(andclauses); - else - return make_andclause(andclauses); -} - -List * -make_ands_implicit(Expr *clause) -{ - /* - * NB: because the parser sets the qual field to NULL in a query that - * has no WHERE clause, we must consider a NULL input clause as TRUE, - * even though one might more reasonably think it FALSE. Grumble. If - * this causes trouble, consider changing the parser's behavior. - */ - if (clause == NULL) - return NIL; /* NULL -> NIL list == TRUE */ - else if (and_clause((Node *) clause)) - return clause->args; - else if (IsA(clause, Const) && - !((Const *) clause)->constisnull && - DatumGetBool(((Const *) clause)->constvalue)) - return NIL; /* constant TRUE input -> NIL list */ - else - return makeList1(clause); -} - - -/***************************************************************************** - * Aggregate-function clause manipulation - *****************************************************************************/ - -/* - * contain_agg_clause - * Recursively search for Aggref nodes within a clause. - * - * Returns true if any aggregate found. - */ -bool -contain_agg_clause(Node *clause) -{ - return contain_agg_clause_walker(clause, NULL); -} - -static bool -contain_agg_clause_walker(Node *node, void *context) -{ - if (node == NULL) - return false; - if (IsA(node, Aggref)) - return true; /* abort the tree traversal and return - * true */ - return expression_tree_walker(node, contain_agg_clause_walker, context); -} - -/* - * pull_agg_clause - * Recursively pulls all Aggref nodes from an expression tree. - * - * Returns list of Aggref nodes found. Note the nodes themselves are not - * copied, only referenced. - * - * Note: this also checks for nested aggregates, which are an error. - */ -List * -pull_agg_clause(Node *clause) -{ - List *result = NIL; - - pull_agg_clause_walker(clause, &result); - return result; -} - -static bool -pull_agg_clause_walker(Node *node, List **listptr) -{ - if (node == NULL) - return false; - if (IsA(node, Aggref)) - { - *listptr = lappend(*listptr, node); - - /* - * Complain if the aggregate's argument contains any aggregates; - * nested agg functions are semantically nonsensical. - */ - if (contain_agg_clause(((Aggref *) node)->target)) - elog(ERROR, "Aggregate function calls may not be nested"); - - /* - * Having checked that, we need not recurse into the argument. - */ - return false; - } - return expression_tree_walker(node, pull_agg_clause_walker, - (void *) listptr); -} - - -/***************************************************************************** - * Support for expressions returning sets - *****************************************************************************/ - -/* - * expression_returns_set - * Test whethe an expression returns a set result. - * - * Because we use expression_tree_walker(), this can also be applied to - * whole targetlists; it'll produce TRUE if any one of the tlist items - * returns a set. - */ -bool -expression_returns_set(Node *clause) -{ - return expression_returns_set_walker(clause, NULL); -} - -static bool -expression_returns_set_walker(Node *node, void *context) -{ - if (node == NULL) - return false; - if (IsA(node, Expr)) - { - Expr *expr = (Expr *) node; - - switch (expr->opType) - { - case OP_EXPR: - if (((Oper *) expr->oper)->opretset) - return true; - /* else fall through to check args */ - break; - case FUNC_EXPR: - if (((Func *) expr->oper)->funcretset) - return true; - /* else fall through to check args */ - break; - case OR_EXPR: - case AND_EXPR: - case NOT_EXPR: - /* Booleans can't return a set, so no need to recurse */ - return false; - case SUBPLAN_EXPR: - /* Subplans can't presently return sets either */ - return false; - } - } - /* Avoid recursion for some other cases that can't return a set */ - if (IsA(node, Aggref)) - return false; - if (IsA(node, SubLink)) - return false; - return expression_tree_walker(node, expression_returns_set_walker, - context); -} - -/***************************************************************************** - * Subplan clause manipulation - *****************************************************************************/ - -/* - * contain_subplans - * Recursively search for subplan nodes within a clause. - * - * If we see a SubLink node, we will return TRUE. This is only possible if - * the expression tree hasn't yet been transformed by subselect.c. We do not - * know whether the node will produce a true subplan or just an initplan, - * but we make the conservative assumption that it will be a subplan. - * - * Returns true if any subplan found. - */ -bool -contain_subplans(Node *clause) -{ - return contain_subplans_walker(clause, NULL); -} - -static bool -contain_subplans_walker(Node *node, void *context) -{ - if (node == NULL) - return false; - if (is_subplan(node) || IsA(node, SubLink)) - return true; /* abort the tree traversal and return - * true */ - return expression_tree_walker(node, contain_subplans_walker, context); -} - -/* - * pull_subplans - * Recursively pulls all subplans from an expression tree. - * - * Returns list of subplan nodes found. Note the nodes themselves are not - * copied, only referenced. - */ -List * -pull_subplans(Node *clause) -{ - List *result = NIL; - - pull_subplans_walker(clause, &result); - return result; -} - -static bool -pull_subplans_walker(Node *node, List **listptr) -{ - if (node == NULL) - return false; - if (is_subplan(node)) - { - *listptr = lappend(*listptr, ((Expr *) node)->oper); - /* fall through to check args to subplan */ - } - return expression_tree_walker(node, pull_subplans_walker, - (void *) listptr); -} - -/* - * check_subplans_for_ungrouped_vars - * Check for subplans that are being passed ungrouped variables as - * parameters; generate an error message if any are found. - * - * In most contexts, ungrouped variables will be detected by the parser (see - * parse_agg.c, check_ungrouped_columns()). But that routine currently does - * not check subplans, because the necessary info is not computed until the - * planner runs. So we do it here, after we have processed sublinks into - * subplans. This ought to be cleaned up someday. - * - * A deficiency in this scheme is that any outer reference var must be - * grouped by itself; we don't recognize groupable expressions within - * subselects. For example, consider - * SELECT - * (SELECT x FROM bar where y = (foo.a + foo.b)) - * FROM foo - * GROUP BY a + b; - * This query will be rejected although it could be allowed. - */ -void -check_subplans_for_ungrouped_vars(Query *query) -{ - check_subplans_for_ungrouped_vars_context context; - List *gl; - - context.query = query; - - /* - * Build a list of the acceptable GROUP BY expressions for use in the - * walker (to avoid repeated scans of the targetlist within the - * recursive routine). - */ - context.groupClauses = NIL; - foreach(gl, query->groupClause) - { - GroupClause *grpcl = lfirst(gl); - Node *expr; - - expr = get_sortgroupclause_expr(grpcl, query->targetList); - context.groupClauses = lcons(expr, context.groupClauses); - } - - /* - * Recursively scan the targetlist and the HAVING clause. WHERE and - * JOIN/ON conditions are not examined, since they are evaluated - * before grouping. - */ - check_subplans_for_ungrouped_vars_walker((Node *) query->targetList, - &context); - check_subplans_for_ungrouped_vars_walker(query->havingQual, - &context); - - freeList(context.groupClauses); -} - -static bool -check_subplans_for_ungrouped_vars_walker(Node *node, - check_subplans_for_ungrouped_vars_context * context) -{ - List *gl; - - if (node == NULL) - return false; - if (IsA(node, Const) ||IsA(node, Param)) - return false; /* constants are always acceptable */ - - /* - * If we find an aggregate function, do not recurse into its - * arguments. Subplans invoked within aggregate calls are allowed to - * receive ungrouped variables. (This test and the next one should - * match the logic in parse_agg.c's check_ungrouped_columns().) - */ - if (IsA(node, Aggref)) - return false; - - /* - * Check to see if subexpression as a whole matches any GROUP BY item. - * We need to do this at every recursion level so that we recognize - * GROUPed-BY expressions before reaching variables within them. - */ - foreach(gl, context->groupClauses) - { - if (equal(node, lfirst(gl))) - return false; /* acceptable, do not descend more */ - } - - /* - * We can ignore Vars other than in subplan args lists, since the - * parser already checked 'em. - */ - if (is_subplan(node)) - { - /* - * The args list of the subplan node represents attributes from - * outside passed into the sublink. - */ - List *t; - - foreach(t, ((Expr *) node)->args) - { - Node *thisarg = lfirst(t); - Var *var; - bool contained_in_group_clause; - - /* - * We do not care about args that are not local variables; - * params or outer-level vars are not our responsibility to - * check. (The outer-level query passing them to us needs to - * worry, instead.) - */ - if (!IsA(thisarg, Var)) - continue; - var = (Var *) thisarg; - if (var->varlevelsup > 0) - continue; - - /* - * Else, see if it is a grouping column. - */ - contained_in_group_clause = false; - foreach(gl, context->groupClauses) - { - if (equal(thisarg, lfirst(gl))) - { - contained_in_group_clause = true; - break; - } - } - - if (!contained_in_group_clause) - { - /* Found an ungrouped argument. Complain. */ - RangeTblEntry *rte; - char *attname; - - Assert(var->varno > 0 && - (int) var->varno <= length(context->query->rtable)); - rte = rt_fetch(var->varno, context->query->rtable); - attname = get_rte_attribute_name(rte, var->varattno); - elog(ERROR, "Sub-SELECT uses un-GROUPed attribute %s.%s from outer query", - rte->eref->aliasname, attname); - } - } - } - return expression_tree_walker(node, - check_subplans_for_ungrouped_vars_walker, - (void *) context); -} - - -/***************************************************************************** - * Check clauses for mutable functions - *****************************************************************************/ - -/* - * contain_mutable_functions - * Recursively search for mutable functions within a clause. - * - * Returns true if any mutable function (or operator implemented by a - * mutable function) is found. This test is needed so that we don't - * mistakenly think that something like "WHERE random() < 0.5" can be treated - * as a constant qualification. - * - * XXX we do not examine sublinks/subplans to see if they contain uses of - * mutable functions. It's not real clear if that is correct or not... - */ -bool -contain_mutable_functions(Node *clause) -{ - return contain_mutable_functions_walker(clause, NULL); -} - -static bool -contain_mutable_functions_walker(Node *node, void *context) -{ - if (node == NULL) - return false; - if (IsA(node, Expr)) - { - Expr *expr = (Expr *) node; - - switch (expr->opType) - { - case OP_EXPR: - if (op_volatile(((Oper *) expr->oper)->opno) != PROVOLATILE_IMMUTABLE) - return true; - break; - case FUNC_EXPR: - if (func_volatile(((Func *) expr->oper)->funcid) != PROVOLATILE_IMMUTABLE) - return true; - break; - default: - break; - } - } - return expression_tree_walker(node, contain_mutable_functions_walker, - context); -} - - -/***************************************************************************** - * Check clauses for volatile functions - *****************************************************************************/ - -/* - * contain_volatile_functions - * Recursively search for volatile functions within a clause. - * - * Returns true if any volatile function (or operator implemented by a - * volatile function) is found. This test prevents invalid conversions - * of volatile expressions into indexscan quals. - * - * XXX we do not examine sublinks/subplans to see if they contain uses of - * volatile functions. It's not real clear if that is correct or not... - */ -bool -contain_volatile_functions(Node *clause) -{ - return contain_volatile_functions_walker(clause, NULL); -} - -static bool -contain_volatile_functions_walker(Node *node, void *context) -{ - if (node == NULL) - return false; - if (IsA(node, Expr)) - { - Expr *expr = (Expr *) node; - - switch (expr->opType) - { - case OP_EXPR: - if (op_volatile(((Oper *) expr->oper)->opno) == PROVOLATILE_VOLATILE) - return true; - break; - case FUNC_EXPR: - if (func_volatile(((Func *) expr->oper)->funcid) == PROVOLATILE_VOLATILE) - return true; - break; - default: - break; - } - } - return expression_tree_walker(node, contain_volatile_functions_walker, - context); -} - - -/***************************************************************************** - * Check for "pseudo-constant" clauses - *****************************************************************************/ - -/* - * is_pseudo_constant_clause - * Detect whether a clause is "constant", ie, it contains no variables - * of the current query level and no uses of volatile functions. - * Such a clause is not necessarily a true constant: it can still contain - * Params and outer-level Vars, not to mention functions whose results - * may vary from one statement to the next. However, the clause's value - * will be constant over any one scan of the current query, so it can be - * used as an indexscan key or (if a top-level qual) can be pushed up to - * become a gating qual. - */ -bool -is_pseudo_constant_clause(Node *clause) -{ - /* - * We could implement this check in one recursive scan. But since the - * check for volatile functions is both moderately expensive and - * unlikely to fail, it seems better to look for Vars first and only - * check for volatile functions if we find no Vars. - */ - if (!contain_var_clause(clause) && - !contain_volatile_functions(clause)) - return true; - return false; -} - -/* - * pull_constant_clauses - * Scan through a list of qualifications and separate "constant" quals - * from those that are not. - * - * Returns a list of the pseudo-constant clauses in constantQual and the - * remaining quals as the return value. - */ -List * -pull_constant_clauses(List *quals, List **constantQual) -{ - List *constqual = NIL; - List *restqual = NIL; - List *q; - - foreach(q, quals) - { - Node *qual = (Node *) lfirst(q); - - if (is_pseudo_constant_clause(qual)) - constqual = lappend(constqual, qual); - else - restqual = lappend(restqual, qual); - } - *constantQual = constqual; - return restqual; -} - - -/***************************************************************************** - * Tests on clauses of queries - * - * Possibly this code should go someplace else, since this isn't quite the - * same meaning of "clause" as is used elsewhere in this module. But I can't - * think of a better place for it... - *****************************************************************************/ - -/* - * Test whether a sort/group reference value appears in the given list of - * SortClause (or GroupClause) nodes. - * - * Because GroupClause is typedef'd as SortClause, either kind of - * node list can be passed without casting. - */ -static bool -sortgroupref_is_present(Index sortgroupref, List *clauselist) -{ - List *clause; - - foreach(clause, clauselist) - { - SortClause *scl = (SortClause *) lfirst(clause); - - if (scl->tleSortGroupRef == sortgroupref) - return true; - } - return false; -} - -/* - * Test whether a query uses DISTINCT ON, ie, has a distinct-list that is - * not the same as the set of output columns. - */ -bool -has_distinct_on_clause(Query *query) -{ - List *targetList; - - /* Is there a DISTINCT clause at all? */ - if (query->distinctClause == NIL) - return false; - - /* - * If the DISTINCT list contains all the nonjunk targetlist items, and - * nothing else (ie, no junk tlist items), then it's a simple - * DISTINCT, else it's DISTINCT ON. We do not require the lists to be - * in the same order (since the parser may have adjusted the DISTINCT - * clause ordering to agree with ORDER BY). Furthermore, a - * non-DISTINCT junk tlist item that is in the sortClause is also - * evidence of DISTINCT ON, since we don't allow ORDER BY on junk - * tlist items when plain DISTINCT is used. - * - * This code assumes that the DISTINCT list is valid, ie, all its entries - * match some entry of the tlist. - */ - foreach(targetList, query->targetList) - { - TargetEntry *tle = (TargetEntry *) lfirst(targetList); - Index ressortgroupref = tle->resdom->ressortgroupref; - - if (ressortgroupref == 0) - { - if (tle->resdom->resjunk) - continue; /* we can ignore unsorted junk cols */ - return true; /* definitely not in DISTINCT list */ - } - if (sortgroupref_is_present(ressortgroupref, query->distinctClause)) - { - if (tle->resdom->resjunk) - return true; /* junk TLE in DISTINCT means DISTINCT ON */ - /* else this TLE is okay, keep looking */ - } - else - { - /* This TLE is not in DISTINCT list */ - if (!tle->resdom->resjunk) - return true; /* non-junk, non-DISTINCT, so DISTINCT ON */ - if (sortgroupref_is_present(ressortgroupref, query->sortClause)) - return true; /* sorted, non-distinct junk */ - /* unsorted junk is okay, keep looking */ - } - } - /* It's a simple DISTINCT */ - return false; -} - - -/***************************************************************************** - * * - * General clause-manipulating routines * - * * - *****************************************************************************/ - -/* - * clause_get_relids_vars - * Retrieves distinct relids and vars appearing within a clause. - * - * '*relids' is set to an integer list of all distinct "varno"s appearing - * in Vars within the clause. - * '*vars' is set to a list of all distinct Vars appearing within the clause. - * Var nodes are considered distinct if they have different varno - * or varattno values. If there are several occurrences of the same - * varno/varattno, you get a randomly chosen one... - * - * Note that upper-level vars are ignored, since they normally will - * become Params with respect to this query level. - */ -void -clause_get_relids_vars(Node *clause, Relids *relids, List **vars) -{ - List *clvars = pull_var_clause(clause, false); - List *varno_list = NIL; - List *var_list = NIL; - List *i; - - foreach(i, clvars) - { - Var *var = (Var *) lfirst(i); - List *vi; - - if (!intMember(var->varno, varno_list)) - varno_list = lconsi(var->varno, varno_list); - foreach(vi, var_list) - { - Var *in_list = (Var *) lfirst(vi); - - if (in_list->varno == var->varno && - in_list->varattno == var->varattno) - break; - } - if (vi == NIL) - var_list = lcons(var, var_list); - } - freeList(clvars); - - *relids = varno_list; - *vars = var_list; -} - -/* - * NumRelids - * (formerly clause_relids) - * - * Returns the number of different relations referenced in 'clause'. - */ -int -NumRelids(Node *clause) -{ - List *varno_list = pull_varnos(clause); - int result = length(varno_list); - - freeList(varno_list); - return result; -} - -/*-------------------- - * CommuteClause: commute a binary operator clause - * - * XXX the clause is destructively modified! - *-------------------- - */ -void -CommuteClause(Expr *clause) -{ - Oid opoid; - HeapTuple optup; - Form_pg_operator commuTup; - Oper *commu; - Node *temp; - - if (!is_opclause((Node *) clause) || - length(clause->args) != 2) - elog(ERROR, "CommuteClause: applied to non-binary-operator clause"); - - opoid = ((Oper *) clause->oper)->opno; - - optup = SearchSysCache(OPEROID, - ObjectIdGetDatum(get_commutator(opoid)), - 0, 0, 0); - if (!HeapTupleIsValid(optup)) - elog(ERROR, "CommuteClause: no commutator for operator %u", opoid); - - commuTup = (Form_pg_operator) GETSTRUCT(optup); - - commu = makeOper(optup->t_data->t_oid, - commuTup->oprcode, - commuTup->oprresult, - ((Oper *) clause->oper)->opretset); - - ReleaseSysCache(optup); - - /* - * re-form the clause in-place! - */ - clause->oper = (Node *) commu; - temp = lfirst(clause->args); - lfirst(clause->args) = lsecond(clause->args); - lsecond(clause->args) = temp; -} - - -/*-------------------- - * eval_const_expressions - * - * Reduce any recognizably constant subexpressions of the given - * expression tree, for example "2 + 2" => "4". More interestingly, - * we can reduce certain boolean expressions even when they contain - * non-constant subexpressions: "x OR true" => "true" no matter what - * the subexpression x is. (XXX We assume that no such subexpression - * will have important side-effects, which is not necessarily a good - * assumption in the presence of user-defined functions; do we need a - * pg_proc flag that prevents discarding the execution of a function?) - * - * We do understand that certain functions may deliver non-constant - * results even with constant inputs, "nextval()" being the classic - * example. Functions that are not marked "immutable" in pg_proc - * will not be pre-evaluated here, although we will reduce their - * arguments as far as possible. - * - * We assume that the tree has already been type-checked and contains - * only operators and functions that are reasonable to try to execute. - * - * This routine should be invoked before converting sublinks to subplans - * (subselect.c's SS_process_sublinks()). The converted form contains - * bogus "Const" nodes that are actually placeholders where the executor - * will insert values from the inner plan, and obviously we mustn't try - * to reduce the expression as though these were really constants. - * As a safeguard, if we happen to find an already-converted SubPlan node, - * we will return it unchanged rather than recursing into it. - *-------------------- - */ -Node * -eval_const_expressions(Node *node) -{ - /* no context or special setup needed, so away we go... */ - return eval_const_expressions_mutator(node, NULL); -} - -static Node * -eval_const_expressions_mutator(Node *node, void *context) -{ - if (node == NULL) - return NULL; - if (IsA(node, Expr)) - { - Expr *expr = (Expr *) node; - List *args; - Const *const_input; - Expr *newexpr; - - /* - * Reduce constants in the Expr's arguments. We know args is - * either NIL or a List node, so we can call - * expression_tree_mutator directly rather than recursing to self. - */ - args = (List *) expression_tree_mutator((Node *) expr->args, - eval_const_expressions_mutator, - (void *) context); - - switch (expr->opType) - { - case OP_EXPR: - case FUNC_EXPR: - - /* - * Code for op/func case is pretty bulky, so split it out - * as a separate function. - */ - newexpr = simplify_op_or_func(expr, args); - if (newexpr) /* successfully simplified it */ - return (Node *) newexpr; - - /* - * else fall out to build new Expr node with simplified - * args - */ - break; - case OR_EXPR: - { - - /*---------- - * OR arguments are handled as follows: - * non constant: keep - * FALSE: drop (does not affect result) - * TRUE: force result to TRUE - * NULL: keep only one - * We keep one NULL input because ExecEvalOr returns NULL - * when no input is TRUE and at least one is NULL. - *---------- - */ - List *newargs = NIL; - List *arg; - bool haveNull = false; - bool forceTrue = false; - - foreach(arg, args) - { - if (!IsA(lfirst(arg), Const)) - { - newargs = lappend(newargs, lfirst(arg)); - continue; - } - const_input = (Const *) lfirst(arg); - if (const_input->constisnull) - haveNull = true; - else if (DatumGetBool(const_input->constvalue)) - forceTrue = true; - /* otherwise, we can drop the constant-false input */ - } - - /* - * We could return TRUE before falling out of the - * loop, but this coding method will be easier to - * adapt if we ever add a notion of non-removable - * functions. We'd need to check all the inputs for - * non-removability. - */ - if (forceTrue) - return MAKEBOOLCONST(true, false); - if (haveNull) - newargs = lappend(newargs, MAKEBOOLCONST(false, true)); - /* If all the inputs are FALSE, result is FALSE */ - if (newargs == NIL) - return MAKEBOOLCONST(false, false); - /* If only one nonconst-or-NULL input, it's the result */ - if (lnext(newargs) == NIL) - return (Node *) lfirst(newargs); - /* Else we still need an OR node */ - return (Node *) make_orclause(newargs); - } - case AND_EXPR: - { - - /*---------- - * AND arguments are handled as follows: - * non constant: keep - * TRUE: drop (does not affect result) - * FALSE: force result to FALSE - * NULL: keep only one - * We keep one NULL input because ExecEvalAnd returns NULL - * when no input is FALSE and at least one is NULL. - *---------- - */ - List *newargs = NIL; - List *arg; - bool haveNull = false; - bool forceFalse = false; - - foreach(arg, args) - { - if (!IsA(lfirst(arg), Const)) - { - newargs = lappend(newargs, lfirst(arg)); - continue; - } - const_input = (Const *) lfirst(arg); - if (const_input->constisnull) - haveNull = true; - else if (!DatumGetBool(const_input->constvalue)) - forceFalse = true; - /* otherwise, we can drop the constant-true input */ - } - - /* - * We could return FALSE before falling out of the - * loop, but this coding method will be easier to - * adapt if we ever add a notion of non-removable - * functions. We'd need to check all the inputs for - * non-removability. - */ - if (forceFalse) - return MAKEBOOLCONST(false, false); - if (haveNull) - newargs = lappend(newargs, MAKEBOOLCONST(false, true)); - /* If all the inputs are TRUE, result is TRUE */ - if (newargs == NIL) - return MAKEBOOLCONST(true, false); - /* If only one nonconst-or-NULL input, it's the result */ - if (lnext(newargs) == NIL) - return (Node *) lfirst(newargs); - /* Else we still need an AND node */ - return (Node *) make_andclause(newargs); - } - case NOT_EXPR: - Assert(length(args) == 1); - if (!IsA(lfirst(args), Const)) - break; - const_input = (Const *) lfirst(args); - /* NOT NULL => NULL */ - if (const_input->constisnull) - return MAKEBOOLCONST(false, true); - /* otherwise pretty easy */ - return MAKEBOOLCONST(!DatumGetBool(const_input->constvalue), - false); - case SUBPLAN_EXPR: - - /* - * Safety measure per notes at head of this routine: - * return a SubPlan unchanged. Too late to do anything - * with it. The arglist simplification above was wasted - * work (the list probably only contains Var nodes - * anyway). - */ - return (Node *) expr; - default: - elog(ERROR, "eval_const_expressions: unexpected opType %d", - (int) expr->opType); - break; - } - - /* - * If we break out of the above switch on opType, then the - * expression cannot be simplified any further, so build and - * return a replacement Expr node using the possibly-simplified - * arguments and the original oper node. Can't use make_clause() - * here because we want to be sure the typeOid field is - * preserved... - */ - newexpr = makeNode(Expr); - newexpr->typeOid = expr->typeOid; - newexpr->opType = expr->opType; - newexpr->oper = expr->oper; - newexpr->args = args; - return (Node *) newexpr; - } - if (IsA(node, RelabelType)) - { - /* - * If we can simplify the input to a constant, then we don't need - * the RelabelType node anymore: just change the type field of the - * Const node. Otherwise, must copy the RelabelType node. - */ - RelabelType *relabel = (RelabelType *) node; - Node *arg; - - arg = eval_const_expressions_mutator(relabel->arg, context); - - /* - * If we find stacked RelabelTypes (eg, from foo :: int :: oid) we - * can discard all but the top one. - */ - while (arg && IsA(arg, RelabelType)) - arg = ((RelabelType *) arg)->arg; - - if (arg && IsA(arg, Const)) - { - Const *con = (Const *) arg; - - con->consttype = relabel->resulttype; - - /* - * relabel's resulttypmod is discarded, which is OK for now; - * if the type actually needs a runtime length coercion then - * there should be a function call to do it just above this - * node. - */ - return (Node *) con; - } - else - { - RelabelType *newrelabel = makeNode(RelabelType); - - newrelabel->arg = arg; - newrelabel->resulttype = relabel->resulttype; - newrelabel->resulttypmod = relabel->resulttypmod; - return (Node *) newrelabel; - } - } - if (IsA(node, CaseExpr)) - { - - /*---------- - * CASE expressions can be simplified if there are constant - * condition clauses: - * FALSE (or NULL): drop the alternative - * TRUE: drop all remaining alternatives - * If the first non-FALSE alternative is a constant TRUE, we can - * simplify the entire CASE to that alternative's expression. - * If there are no non-FALSE alternatives, we simplify the entire - * CASE to the default result (ELSE result). - *---------- - */ - CaseExpr *caseexpr = (CaseExpr *) node; - CaseExpr *newcase; - List *newargs = NIL; - Node *defresult; - Const *const_input; - List *arg; - - foreach(arg, caseexpr->args) - { - /* Simplify this alternative's condition and result */ - CaseWhen *casewhen = (CaseWhen *) - expression_tree_mutator((Node *) lfirst(arg), - eval_const_expressions_mutator, - (void *) context); - - Assert(IsA(casewhen, CaseWhen)); - if (casewhen->expr == NULL || - !IsA(casewhen->expr, Const)) - { - newargs = lappend(newargs, casewhen); - continue; - } - const_input = (Const *) casewhen->expr; - if (const_input->constisnull || - !DatumGetBool(const_input->constvalue)) - continue; /* drop alternative with FALSE condition */ - - /* - * Found a TRUE condition. If it's the first (un-dropped) - * alternative, the CASE reduces to just this alternative. - */ - if (newargs == NIL) - return casewhen->result; - - /* - * Otherwise, add it to the list, and drop all the rest. - */ - newargs = lappend(newargs, casewhen); - break; - } - - /* Simplify the default result */ - defresult = eval_const_expressions_mutator(caseexpr->defresult, - context); - - /* - * If no non-FALSE alternatives, CASE reduces to the default - * result - */ - if (newargs == NIL) - return defresult; - /* Otherwise we need a new CASE node */ - newcase = makeNode(CaseExpr); - newcase->casetype = caseexpr->casetype; - newcase->arg = NULL; - newcase->args = newargs; - newcase->defresult = defresult; - return (Node *) newcase; - } - - /* - * For any node type not handled above, we recurse using - * expression_tree_mutator, which will copy the node unchanged but try - * to simplify its arguments (if any) using this routine. For example: - * we cannot eliminate an ArrayRef node, but we might be able to - * simplify constant expressions in its subscripts. - */ - return expression_tree_mutator(node, eval_const_expressions_mutator, - (void *) context); -} - -/* - * Subroutine for eval_const_expressions: try to evaluate an op or func - * - * Inputs are the op or func Expr node, and the pre-simplified argument list. - * Returns a simplified expression if successful, or NULL if cannot - * simplify the op/func. - * - * XXX Possible future improvement: if the func is SQL-language, and its - * definition is simply "SELECT expression", we could parse and substitute - * the expression here. This would avoid much runtime overhead, and perhaps - * expose opportunities for constant-folding within the expression even if - * not all the func's input args are constants. It'd be appropriate to do - * that here, not in the parser, since we wouldn't want it to happen until - * after rule substitution/rewriting. - */ -static Expr * -simplify_op_or_func(Expr *expr, List *args) -{ - List *arg; - Oid funcid; - Oid result_typeid; - HeapTuple func_tuple; - Form_pg_proc funcform; - char provolatile; - bool proisstrict; - bool proretset; - int16 resultTypLen; - bool resultTypByVal; - Expr *newexpr; - ExprContext *econtext; - Datum const_val; - bool has_nonconst_input = false; - bool has_null_input = false; - bool const_is_null; - - /* - * Check for constant inputs and especially constant-NULL inputs. - */ - foreach(arg, args) - { - if (IsA(lfirst(arg), Const)) - has_null_input |= ((Const *) lfirst(arg))->constisnull; - else - has_nonconst_input = true; - } - - /* - * If the function is strict and has a constant-NULL input, it will - * never be called at all, so we can replace the call by a NULL - * constant even if there are other inputs that aren't constant. - * Otherwise, we can only simplify if all inputs are constants. We can - * skip the function lookup if neither case applies. - */ - if (has_nonconst_input && !has_null_input) - return NULL; - - /* - * Get the function procedure's OID and look to see whether it is - * marked immutable. - * - * Note we take the result type from the Oper or Func node, not the - * pg_proc tuple; probably necessary for binary-compatibility cases. - * - */ - if (expr->opType == OP_EXPR) - { - Oper *oper = (Oper *) expr->oper; - - replace_opid(oper); /* OK to scribble on input to this extent */ - funcid = oper->opid; - result_typeid = oper->opresulttype; - } - else - { - Func *func = (Func *) expr->oper; - - funcid = func->funcid; - result_typeid = func->funcresulttype; - } - - /* - * we could use func_volatile() here, but we need several fields out - * of the func tuple, so might as well just look it up once. - */ - func_tuple = SearchSysCache(PROCOID, - ObjectIdGetDatum(funcid), - 0, 0, 0); - if (!HeapTupleIsValid(func_tuple)) - elog(ERROR, "Function OID %u does not exist", funcid); - funcform = (Form_pg_proc) GETSTRUCT(func_tuple); - provolatile = funcform->provolatile; - proisstrict = funcform->proisstrict; - proretset = funcform->proretset; - ReleaseSysCache(func_tuple); - - if (provolatile != PROVOLATILE_IMMUTABLE) - return NULL; - - /* - * Also check to make sure it doesn't return a set. - */ - if (proretset) - return NULL; - - /* - * Now that we know if the function is strict, we can finish the - * checks for simplifiable inputs that we started above. - */ - if (proisstrict && has_null_input) - { - /* - * It's strict and has NULL input, so must produce NULL output. - * Return a NULL constant of the right type. - */ - return (Expr *) makeNullConst(result_typeid); - } - - /* - * Otherwise, can simplify only if all inputs are constants. (For a - * non-strict function, constant NULL inputs are treated the same as - * constant non-NULL inputs.) - */ - if (has_nonconst_input) - return NULL; - - /* - * OK, looks like we can simplify this operator/function. - * - * We use the executor's routine ExecEvalExpr() to avoid duplication of - * code and ensure we get the same result as the executor would get. - * - * Build a new Expr node containing the already-simplified arguments. The - * only other setup needed here is the replace_opid() that we already - * did for the OP_EXPR case. - */ - newexpr = makeNode(Expr); - newexpr->typeOid = expr->typeOid; - newexpr->opType = expr->opType; - newexpr->oper = expr->oper; - newexpr->args = args; - - /* Get info needed about result datatype */ - get_typlenbyval(result_typeid, &resultTypLen, &resultTypByVal); - - /* - * It is OK to pass a dummy econtext because none of the - * ExecEvalExpr() code used in this situation will use econtext. That - * might seem fortuitous, but it's not so unreasonable --- a constant - * expression does not depend on context, by definition, n'est ce pas? - */ - econtext = MakeExprContext(NULL, CurrentMemoryContext); - - const_val = ExecEvalExprSwitchContext((Node *) newexpr, econtext, - &const_is_null, NULL); - - /* Must copy result out of sub-context used by expression eval */ - if (!const_is_null) - const_val = datumCopy(const_val, resultTypByVal, resultTypLen); - - FreeExprContext(econtext); - pfree(newexpr); - - /* - * Make the constant result node. - */ - return (Expr *) makeConst(result_typeid, resultTypLen, - const_val, const_is_null, - resultTypByVal, false, false); -} - - -/* - * Standard expression-tree walking support - * - * We used to have near-duplicate code in many different routines that - * understood how to recurse through an expression node tree. That was - * a pain to maintain, and we frequently had bugs due to some particular - * routine neglecting to support a particular node type. In most cases, - * these routines only actually care about certain node types, and don't - * care about other types except insofar as they have to recurse through - * non-primitive node types. Therefore, we now provide generic tree-walking - * logic to consolidate the redundant "boilerplate" code. There are - * two versions: expression_tree_walker() and expression_tree_mutator(). - */ - -/*-------------------- - * expression_tree_walker() is designed to support routines that traverse - * a tree in a read-only fashion (although it will also work for routines - * that modify nodes in-place but never add/delete/replace nodes). - * A walker routine should look like this: - * - * bool my_walker (Node *node, my_struct *context) - * { - * if (node == NULL) - * return false; - * // check for nodes that special work is required for, eg: - * if (IsA(node, Var)) - * { - * ... do special actions for Var nodes - * } - * else if (IsA(node, ...)) - * { - * ... do special actions for other node types - * } - * // for any node type not specially processed, do: - * return expression_tree_walker(node, my_walker, (void *) context); - * } - * - * The "context" argument points to a struct that holds whatever context - * information the walker routine needs --- it can be used to return data - * gathered by the walker, too. This argument is not touched by - * expression_tree_walker, but it is passed down to recursive sub-invocations - * of my_walker. The tree walk is started from a setup routine that - * fills in the appropriate context struct, calls my_walker with the top-level - * node of the tree, and then examines the results. - * - * The walker routine should return "false" to continue the tree walk, or - * "true" to abort the walk and immediately return "true" to the top-level - * caller. This can be used to short-circuit the traversal if the walker - * has found what it came for. "false" is returned to the top-level caller - * iff no invocation of the walker returned "true". - * - * The node types handled by expression_tree_walker include all those - * normally found in target lists and qualifier clauses during the planning - * stage. In particular, it handles List nodes since a cnf-ified qual clause - * will have List structure at the top level, and it handles TargetEntry nodes - * so that a scan of a target list can be handled without additional code. - * (But only the "expr" part of a TargetEntry is examined, unless the walker - * chooses to process TargetEntry nodes specially.) Also, RangeTblRef, - * FromExpr, JoinExpr, and SetOperationStmt nodes are handled, so that query - * jointrees and setOperation trees can be processed without additional code. - * - * expression_tree_walker will handle SubLink and SubPlan nodes by recursing - * normally into the "lefthand" arguments (which belong to the outer plan). - * It will also call the walker on the sub-Query node; however, when - * expression_tree_walker itself is called on a Query node, it does nothing - * and returns "false". The net effect is that unless the walker does - * something special at a Query node, sub-selects will not be visited - * during an expression tree walk. This is exactly the behavior wanted - * in many cases --- and for those walkers that do want to recurse into - * sub-selects, special behavior is typically needed anyway at the entry - * to a sub-select (such as incrementing a depth counter). A walker that - * wants to examine sub-selects should include code along the lines of: - * - * if (IsA(node, Query)) - * { - * adjust context for subquery; - * result = query_tree_walker((Query *) node, my_walker, context, - * true); // to visit subquery RTEs too - * restore context if needed; - * return result; - * } - * - * query_tree_walker is a convenience routine (see below) that calls the - * walker on all the expression subtrees of the given Query node. - * - * NOTE: currently, because make_subplan() clears the subselect link in - * a SubLink node, it is not actually possible to recurse into subselects - * of an already-planned expression tree. This is OK for current uses, - * but ought to be cleaned up when we redesign querytree processing. - *-------------------- - */ - -bool -expression_tree_walker(Node *node, - bool (*walker) (), - void *context) -{ - List *temp; - - /* - * The walker has already visited the current node, and so we need - * only recurse into any sub-nodes it has. - * - * We assume that the walker is not interested in List nodes per se, so - * when we expect a List we just recurse directly to self without - * bothering to call the walker. - */ - if (node == NULL) - return false; - switch (nodeTag(node)) - { - case T_Const: - case T_Var: - case T_Param: - case T_RangeTblRef: - /* primitive node types with no subnodes */ - break; - case T_Expr: - { - Expr *expr = (Expr *) node; - - if (expr->opType == SUBPLAN_EXPR) - { - /* recurse to the SubLink node (skipping SubPlan!) */ - if (walker((Node *) ((SubPlan *) expr->oper)->sublink, - context)) - return true; - } - /* for all Expr node types, examine args list */ - if (expression_tree_walker((Node *) expr->args, - walker, context)) - return true; - } - break; - case T_Aggref: - return walker(((Aggref *) node)->target, context); - case T_ArrayRef: - { - ArrayRef *aref = (ArrayRef *) node; - - /* recurse directly for upper/lower array index lists */ - if (expression_tree_walker((Node *) aref->refupperindexpr, - walker, context)) - return true; - if (expression_tree_walker((Node *) aref->reflowerindexpr, - walker, context)) - return true; - /* walker must see the refexpr and refassgnexpr, however */ - if (walker(aref->refexpr, context)) - return true; - if (walker(aref->refassgnexpr, context)) - return true; - } - break; - case T_FieldSelect: - return walker(((FieldSelect *) node)->arg, context); - case T_RelabelType: - return walker(((RelabelType *) node)->arg, context); - case T_CaseExpr: - { - CaseExpr *caseexpr = (CaseExpr *) node; - - /* we assume walker doesn't care about CaseWhens, either */ - foreach(temp, caseexpr->args) - { - CaseWhen *when = (CaseWhen *) lfirst(temp); - - Assert(IsA(when, CaseWhen)); - if (walker(when->expr, context)) - return true; - if (walker(when->result, context)) - return true; - } - /* caseexpr->arg should be null, but we'll check it anyway */ - if (walker(caseexpr->arg, context)) - return true; - if (walker(caseexpr->defresult, context)) - return true; - } - break; - case T_NullTest: - return walker(((NullTest *) node)->arg, context); - case T_BooleanTest: - return walker(((BooleanTest *) node)->arg, context); - case T_SubLink: - { - SubLink *sublink = (SubLink *) node; - - /* - * If the SubLink has already been processed by - * subselect.c, it will have lefthand=NIL, and we need to - * scan the oper list. Otherwise we only need to look at - * the lefthand list (the incomplete Oper nodes in the - * oper list are deemed uninteresting, perhaps even - * confusing). - */ - if (sublink->lefthand) - { - if (walker((Node *) sublink->lefthand, context)) - return true; - } - else - { - if (walker((Node *) sublink->oper, context)) - return true; - } - - /* - * Also invoke the walker on the sublink's Query node, so - * it can recurse into the sub-query if it wants to. - */ - return walker(sublink->subselect, context); - } - break; - case T_Query: - /* Do nothing with a sub-Query, per discussion above */ - break; - case T_List: - foreach(temp, (List *) node) - { - if (walker((Node *) lfirst(temp), context)) - return true; - } - break; - case T_TargetEntry: - return walker(((TargetEntry *) node)->expr, context); - case T_FromExpr: - { - FromExpr *from = (FromExpr *) node; - - if (walker(from->fromlist, context)) - return true; - if (walker(from->quals, context)) - return true; - } - break; - case T_JoinExpr: - { - JoinExpr *join = (JoinExpr *) node; - - if (walker(join->larg, context)) - return true; - if (walker(join->rarg, context)) - return true; - if (walker(join->quals, context)) - return true; - /* - * alias clause, using list are deemed uninteresting. - */ - } - break; - case T_SetOperationStmt: - { - SetOperationStmt *setop = (SetOperationStmt *) node; - - if (walker(setop->larg, context)) - return true; - if (walker(setop->rarg, context)) - return true; - } - break; - default: - elog(ERROR, "expression_tree_walker: Unexpected node type %d", - nodeTag(node)); - break; - } - return false; -} - -/* - * query_tree_walker --- initiate a walk of a Query's expressions - * - * This routine exists just to reduce the number of places that need to know - * where all the expression subtrees of a Query are. Note it can be used - * for starting a walk at top level of a Query regardless of whether the - * walker intends to descend into subqueries. It is also useful for - * descending into subqueries within a walker. - * - * If visitQueryRTEs is true, the walker will also be called on sub-Query - * nodes present in subquery rangetable entries of the given Query. This - * is optional since some callers handle those sub-queries separately, - * or don't really want to see subqueries anyway. - */ -bool -query_tree_walker(Query *query, - bool (*walker) (), - void *context, - bool visitQueryRTEs) -{ - List *rt; - - Assert(query != NULL && IsA(query, Query)); - - if (walker((Node *) query->targetList, context)) - return true; - if (walker((Node *) query->jointree, context)) - return true; - if (walker(query->setOperations, context)) - return true; - if (walker(query->havingQual, context)) - return true; - foreach(rt, query->rtable) - { - RangeTblEntry *rte = (RangeTblEntry *) lfirst(rt); - - switch (rte->rtekind) - { - case RTE_RELATION: - case RTE_SPECIAL: - /* nothing to do */ - break; - case RTE_SUBQUERY: - if (visitQueryRTEs) - if (walker(rte->subquery, context)) - return true; - break; - case RTE_JOIN: - if (walker(rte->joinaliasvars, context)) - return true; - break; - case RTE_FUNCTION: - if (walker(rte->funcexpr, context)) - return true; - break; - } - } - return false; -} - - -/*-------------------- - * expression_tree_mutator() is designed to support routines that make a - * modified copy of an expression tree, with some nodes being added, - * removed, or replaced by new subtrees. The original tree is (normally) - * not changed. Each recursion level is responsible for returning a copy of - * (or appropriately modified substitute for) the subtree it is handed. - * A mutator routine should look like this: - * - * Node * my_mutator (Node *node, my_struct *context) - * { - * if (node == NULL) - * return NULL; - * // check for nodes that special work is required for, eg: - * if (IsA(node, Var)) - * { - * ... create and return modified copy of Var node - * } - * else if (IsA(node, ...)) - * { - * ... do special transformations of other node types - * } - * // for any node type not specially processed, do: - * return expression_tree_mutator(node, my_mutator, (void *) context); - * } - * - * The "context" argument points to a struct that holds whatever context - * information the mutator routine needs --- it can be used to return extra - * data gathered by the mutator, too. This argument is not touched by - * expression_tree_mutator, but it is passed down to recursive sub-invocations - * of my_mutator. The tree walk is started from a setup routine that - * fills in the appropriate context struct, calls my_mutator with the - * top-level node of the tree, and does any required post-processing. - * - * Each level of recursion must return an appropriately modified Node. - * If expression_tree_mutator() is called, it will make an exact copy - * of the given Node, but invoke my_mutator() to copy the sub-node(s) - * of that Node. In this way, my_mutator() has full control over the - * copying process but need not directly deal with expression trees - * that it has no interest in. - * - * Just as for expression_tree_walker, the node types handled by - * expression_tree_mutator include all those normally found in target lists - * and qualifier clauses during the planning stage. - * - * expression_tree_mutator will handle a SUBPLAN_EXPR node by recursing into - * the args and slink->oper lists (which belong to the outer plan), but it - * will simply copy the link to the inner plan, since that's typically what - * expression tree mutators want. A mutator that wants to modify the subplan - * can force appropriate behavior by recognizing subplan expression nodes - * and doing the right thing. - * - * Bare SubLink nodes (without a SUBPLAN_EXPR) are handled by recursing into - * the "lefthand" argument list only. (A bare SubLink should be seen only if - * the tree has not yet been processed by subselect.c.) Again, this can be - * overridden by the mutator, but it seems to be the most useful default - * behavior. - *-------------------- - */ - -Node * -expression_tree_mutator(Node *node, - Node *(*mutator) (), - void *context) -{ - /* - * The mutator has already decided not to modify the current node, but - * we must call the mutator for any sub-nodes. - */ - -#define FLATCOPY(newnode, node, nodetype) \ - ( (newnode) = makeNode(nodetype), \ - memcpy((newnode), (node), sizeof(nodetype)) ) - -#define CHECKFLATCOPY(newnode, node, nodetype) \ - ( AssertMacro(IsA((node), nodetype)), \ - (newnode) = makeNode(nodetype), \ - memcpy((newnode), (node), sizeof(nodetype)) ) - -#define MUTATE(newfield, oldfield, fieldtype) \ - ( (newfield) = (fieldtype) mutator((Node *) (oldfield), context) ) - - if (node == NULL) - return NULL; - switch (nodeTag(node)) - { - case T_Const: - case T_Var: - case T_Param: - case T_RangeTblRef: - /* primitive node types with no subnodes */ - return (Node *) copyObject(node); - case T_Expr: - { - Expr *expr = (Expr *) node; - Expr *newnode; - - FLATCOPY(newnode, expr, Expr); - - if (expr->opType == SUBPLAN_EXPR) - { - SubLink *oldsublink = ((SubPlan *) expr->oper)->sublink; - SubPlan *newsubplan; - - /* flat-copy the oper node, which is a SubPlan */ - CHECKFLATCOPY(newsubplan, expr->oper, SubPlan); - newnode->oper = (Node *) newsubplan; - /* likewise its SubLink node */ - CHECKFLATCOPY(newsubplan->sublink, oldsublink, SubLink); - - /* - * transform args list (params to be passed to - * subplan) - */ - MUTATE(newnode->args, expr->args, List *); - /* transform sublink's oper list as well */ - MUTATE(newsubplan->sublink->oper, oldsublink->oper, List *); - - /* - * but not the subplan itself, which is referenced - * as-is - */ - } - else - { - /* - * for other Expr node types, just transform args - * list, linking to original oper node (OK?) - */ - MUTATE(newnode->args, expr->args, List *); - } - return (Node *) newnode; - } - break; - case T_Aggref: - { - Aggref *aggref = (Aggref *) node; - Aggref *newnode; - - FLATCOPY(newnode, aggref, Aggref); - MUTATE(newnode->target, aggref->target, Node *); - return (Node *) newnode; - } - break; - case T_ArrayRef: - { - ArrayRef *arrayref = (ArrayRef *) node; - ArrayRef *newnode; - - FLATCOPY(newnode, arrayref, ArrayRef); - MUTATE(newnode->refupperindexpr, arrayref->refupperindexpr, - List *); - MUTATE(newnode->reflowerindexpr, arrayref->reflowerindexpr, - List *); - MUTATE(newnode->refexpr, arrayref->refexpr, - Node *); - MUTATE(newnode->refassgnexpr, arrayref->refassgnexpr, - Node *); - return (Node *) newnode; - } - break; - case T_FieldSelect: - { - FieldSelect *fselect = (FieldSelect *) node; - FieldSelect *newnode; - - FLATCOPY(newnode, fselect, FieldSelect); - MUTATE(newnode->arg, fselect->arg, Node *); - return (Node *) newnode; - } - break; - case T_RelabelType: - { - RelabelType *relabel = (RelabelType *) node; - RelabelType *newnode; - - FLATCOPY(newnode, relabel, RelabelType); - MUTATE(newnode->arg, relabel->arg, Node *); - return (Node *) newnode; - } - break; - case T_CaseExpr: - { - CaseExpr *caseexpr = (CaseExpr *) node; - CaseExpr *newnode; - - FLATCOPY(newnode, caseexpr, CaseExpr); - MUTATE(newnode->args, caseexpr->args, List *); - /* caseexpr->arg should be null, but we'll check it anyway */ - MUTATE(newnode->arg, caseexpr->arg, Node *); - MUTATE(newnode->defresult, caseexpr->defresult, Node *); - return (Node *) newnode; - } - break; - case T_CaseWhen: - { - CaseWhen *casewhen = (CaseWhen *) node; - CaseWhen *newnode; - - FLATCOPY(newnode, casewhen, CaseWhen); - MUTATE(newnode->expr, casewhen->expr, Node *); - MUTATE(newnode->result, casewhen->result, Node *); - return (Node *) newnode; - } - break; - case T_NullTest: - { - NullTest *ntest = (NullTest *) node; - NullTest *newnode; - - FLATCOPY(newnode, ntest, NullTest); - MUTATE(newnode->arg, ntest->arg, Node *); - return (Node *) newnode; - } - break; - case T_BooleanTest: - { - BooleanTest *btest = (BooleanTest *) node; - BooleanTest *newnode; - - FLATCOPY(newnode, btest, BooleanTest); - MUTATE(newnode->arg, btest->arg, Node *); - return (Node *) newnode; - } - break; - case T_SubLink: - { - /* - * A "bare" SubLink (note we will not come here if we - * found a SUBPLAN_EXPR node above it). Transform the - * lefthand side, but not the oper list nor the subquery. - */ - SubLink *sublink = (SubLink *) node; - SubLink *newnode; - - FLATCOPY(newnode, sublink, SubLink); - MUTATE(newnode->lefthand, sublink->lefthand, List *); - return (Node *) newnode; - } - break; - case T_List: - { - /* - * We assume the mutator isn't interested in the list - * nodes per se, so just invoke it on each list element. - * NOTE: this would fail badly on a list with integer - * elements! - */ - List *resultlist = NIL; - List *temp; - - foreach(temp, (List *) node) - { - resultlist = lappend(resultlist, - mutator((Node *) lfirst(temp), - context)); - } - return (Node *) resultlist; - } - break; - case T_TargetEntry: - { - /* - * We mutate the expression, but not the resdom, by - * default. - */ - TargetEntry *targetentry = (TargetEntry *) node; - TargetEntry *newnode; - - FLATCOPY(newnode, targetentry, TargetEntry); - MUTATE(newnode->expr, targetentry->expr, Node *); - return (Node *) newnode; - } - break; - case T_FromExpr: - { - FromExpr *from = (FromExpr *) node; - FromExpr *newnode; - - FLATCOPY(newnode, from, FromExpr); - MUTATE(newnode->fromlist, from->fromlist, List *); - MUTATE(newnode->quals, from->quals, Node *); - return (Node *) newnode; - } - break; - case T_JoinExpr: - { - JoinExpr *join = (JoinExpr *) node; - JoinExpr *newnode; - - FLATCOPY(newnode, join, JoinExpr); - MUTATE(newnode->larg, join->larg, Node *); - MUTATE(newnode->rarg, join->rarg, Node *); - MUTATE(newnode->quals, join->quals, Node *); - /* We do not mutate alias or using by default */ - return (Node *) newnode; - } - break; - case T_SetOperationStmt: - { - SetOperationStmt *setop = (SetOperationStmt *) node; - SetOperationStmt *newnode; - - FLATCOPY(newnode, setop, SetOperationStmt); - MUTATE(newnode->larg, setop->larg, Node *); - MUTATE(newnode->rarg, setop->rarg, Node *); - return (Node *) newnode; - } - break; - default: - elog(ERROR, "expression_tree_mutator: Unexpected node type %d", - nodeTag(node)); - break; - } - /* can't get here, but keep compiler happy */ - return NULL; -} - - -/* - * query_tree_mutator --- initiate modification of a Query's expressions - * - * This routine exists just to reduce the number of places that need to know - * where all the expression subtrees of a Query are. Note it can be used - * for starting a walk at top level of a Query regardless of whether the - * mutator intends to descend into subqueries. It is also useful for - * descending into subqueries within a mutator. - * - * The specified Query node is modified-in-place; do a FLATCOPY() beforehand - * if you don't want to change the original. All substructure is safely - * copied, however. - * - * If visitQueryRTEs is true, the mutator will also be called on sub-Query - * nodes present in subquery rangetable entries of the given Query. This - * is optional since some callers handle those sub-queries separately, - * or don't really want to see subqueries anyway. - */ -void -query_tree_mutator(Query *query, - Node *(*mutator) (), - void *context, - bool visitQueryRTEs) -{ - List *newrt = NIL; - List *rt; - - Assert(query != NULL && IsA(query, Query)); - - MUTATE(query->targetList, query->targetList, List *); - MUTATE(query->jointree, query->jointree, FromExpr *); - MUTATE(query->setOperations, query->setOperations, Node *); - MUTATE(query->havingQual, query->havingQual, Node *); - foreach(rt, query->rtable) - { - RangeTblEntry *rte = (RangeTblEntry *) lfirst(rt); - RangeTblEntry *newrte; - - switch (rte->rtekind) - { - case RTE_RELATION: - case RTE_SPECIAL: - /* nothing to do, don't bother to make a copy */ - break; - case RTE_SUBQUERY: - if (visitQueryRTEs) - { - FLATCOPY(newrte, rte, RangeTblEntry); - CHECKFLATCOPY(newrte->subquery, rte->subquery, Query); - MUTATE(newrte->subquery, newrte->subquery, Query *); - rte = newrte; - } - break; - case RTE_JOIN: - FLATCOPY(newrte, rte, RangeTblEntry); - MUTATE(newrte->joinaliasvars, rte->joinaliasvars, List *); - rte = newrte; - break; - case RTE_FUNCTION: - FLATCOPY(newrte, rte, RangeTblEntry); - MUTATE(newrte->funcexpr, rte->funcexpr, Node *); - rte = newrte; - break; - } - newrt = lappend(newrt, rte); - } - query->rtable = newrt; -} diff --git a/src/backend/optimizer/util/joininfo.c b/src/backend/optimizer/util/joininfo.c deleted file mode 100644 index 0f3cf201908..00000000000 --- a/src/backend/optimizer/util/joininfo.c +++ /dev/null @@ -1,76 +0,0 @@ -/*------------------------------------------------------------------------- - * - * joininfo.c - * JoinInfo node manipulation routines - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/joininfo.c,v 1.31 2002/06/20 20:29:31 momjian Exp $ - * - *------------------------------------------------------------------------- - */ -#include "postgres.h" - - -#include "optimizer/joininfo.h" - -static JoinInfo *joininfo_member(List *join_relids, List *joininfo_list); - -/* - * joininfo_member - * Determines whether a node has already been created for a join - * between a set of join relations and the relation described by - * 'joininfo_list'. - * - * 'join_relids' is a list of relids corresponding to the join relation - * 'joininfo_list' is the list of joininfo nodes against which this is - * checked - * - * Returns the corresponding node in 'joininfo_list' if such a node - * exists. - * - */ -static JoinInfo * -joininfo_member(List *join_relids, List *joininfo_list) -{ - List *i; - - foreach(i, joininfo_list) - { - JoinInfo *joininfo = (JoinInfo *) lfirst(i); - - if (sameseti(join_relids, joininfo->unjoined_relids)) - return joininfo; - } - return NULL; -} - - -/* - * find_joininfo_node - * Find the joininfo node within a relation entry corresponding - * to a join between 'this_rel' and the relations in 'join_relids'. - * A new node is created and added to the relation entry's joininfo - * field if the desired one can't be found. - * - * Returns a joininfo node. - * - */ -JoinInfo * -find_joininfo_node(RelOptInfo *this_rel, Relids join_relids) -{ - JoinInfo *joininfo = joininfo_member(join_relids, - this_rel->joininfo); - - if (joininfo == NULL) - { - joininfo = makeNode(JoinInfo); - joininfo->unjoined_relids = join_relids; - joininfo->jinfo_restrictinfo = NIL; - this_rel->joininfo = lcons(joininfo, this_rel->joininfo); - } - return joininfo; -} diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c deleted file mode 100644 index 4b3c9809b8b..00000000000 --- a/src/backend/optimizer/util/pathnode.c +++ /dev/null @@ -1,620 +0,0 @@ -/*------------------------------------------------------------------------- - * - * pathnode.c - * Routines to manipulate pathlists and create path nodes - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.78 2002/06/20 20:29:31 momjian Exp $ - * - *------------------------------------------------------------------------- - */ -#include "postgres.h" - -#include <math.h> - -#include "nodes/plannodes.h" -#include "optimizer/cost.h" -#include "optimizer/pathnode.h" -#include "optimizer/paths.h" -#include "optimizer/restrictinfo.h" - - -/***************************************************************************** - * MISC. PATH UTILITIES - *****************************************************************************/ - -/* - * compare_path_costs - * Return -1, 0, or +1 according as path1 is cheaper, the same cost, - * or more expensive than path2 for the specified criterion. - */ -int -compare_path_costs(Path *path1, Path *path2, CostSelector criterion) -{ - if (criterion == STARTUP_COST) - { - if (path1->startup_cost < path2->startup_cost) - return -1; - if (path1->startup_cost > path2->startup_cost) - return +1; - - /* - * If paths have the same startup cost (not at all unlikely), - * order them by total cost. - */ - if (path1->total_cost < path2->total_cost) - return -1; - if (path1->total_cost > path2->total_cost) - return +1; - } - else - { - if (path1->total_cost < path2->total_cost) - return -1; - if (path1->total_cost > path2->total_cost) - return +1; - - /* - * If paths have the same total cost, order them by startup cost. - */ - if (path1->startup_cost < path2->startup_cost) - return -1; - if (path1->startup_cost > path2->startup_cost) - return +1; - } - return 0; -} - -/* - * compare_path_fractional_costs - * Return -1, 0, or +1 according as path1 is cheaper, the same cost, - * or more expensive than path2 for fetching the specified fraction - * of the total tuples. - * - * If fraction is <= 0 or > 1, we interpret it as 1, ie, we select the - * path with the cheaper total_cost. - */ -int -compare_fractional_path_costs(Path *path1, Path *path2, - double fraction) -{ - Cost cost1, - cost2; - - if (fraction <= 0.0 || fraction >= 1.0) - return compare_path_costs(path1, path2, TOTAL_COST); - cost1 = path1->startup_cost + - fraction * (path1->total_cost - path1->startup_cost); - cost2 = path2->startup_cost + - fraction * (path2->total_cost - path2->startup_cost); - if (cost1 < cost2) - return -1; - if (cost1 > cost2) - return +1; - return 0; -} - -/* - * set_cheapest - * Find the minimum-cost paths from among a relation's paths, - * and save them in the rel's cheapest-path fields. - * - * This is normally called only after we've finished constructing the path - * list for the rel node. - * - * If we find two paths of identical costs, try to keep the better-sorted one. - * The paths might have unrelated sort orderings, in which case we can only - * guess which might be better to keep, but if one is superior then we - * definitely should keep it. - */ -void -set_cheapest(RelOptInfo *parent_rel) -{ - List *pathlist = parent_rel->pathlist; - List *p; - Path *cheapest_startup_path; - Path *cheapest_total_path; - - Assert(IsA(parent_rel, RelOptInfo)); - - if (pathlist == NIL) - elog(ERROR, "Unable to devise a query plan for the given query"); - - cheapest_startup_path = cheapest_total_path = (Path *) lfirst(pathlist); - - foreach(p, lnext(pathlist)) - { - Path *path = (Path *) lfirst(p); - int cmp; - - cmp = compare_path_costs(cheapest_startup_path, path, STARTUP_COST); - if (cmp > 0 || - (cmp == 0 && - compare_pathkeys(cheapest_startup_path->pathkeys, - path->pathkeys) == PATHKEYS_BETTER2)) - cheapest_startup_path = path; - - cmp = compare_path_costs(cheapest_total_path, path, TOTAL_COST); - if (cmp > 0 || - (cmp == 0 && - compare_pathkeys(cheapest_total_path->pathkeys, - path->pathkeys) == PATHKEYS_BETTER2)) - cheapest_total_path = path; - } - - parent_rel->cheapest_startup_path = cheapest_startup_path; - parent_rel->cheapest_total_path = cheapest_total_path; -} - -/* - * add_path - * Consider a potential implementation path for the specified parent rel, - * and add it to the rel's pathlist if it is worthy of consideration. - * A path is worthy if it has either a better sort order (better pathkeys) - * or cheaper cost (on either dimension) than any of the existing old paths. - * - * Unless parent_rel->pruneable is false, we also remove from the rel's - * pathlist any old paths that are dominated by new_path --- that is, - * new_path is both cheaper and at least as well ordered. - * - * The pathlist is kept sorted by TOTAL_COST metric, with cheaper paths - * at the front. No code depends on that for correctness; it's simply - * a speed hack within this routine. Doing it that way makes it more - * likely that we will reject an inferior path after a few comparisons, - * rather than many comparisons. - * - * NOTE: discarded Path objects are immediately pfree'd to reduce planner - * memory consumption. We dare not try to free the substructure of a Path, - * since much of it may be shared with other Paths or the query tree itself; - * but just recycling discarded Path nodes is a very useful savings in - * a large join tree. We can recycle the List nodes of pathlist, too. - * - * 'parent_rel' is the relation entry to which the path corresponds. - * 'new_path' is a potential path for parent_rel. - * - * Returns nothing, but modifies parent_rel->pathlist. - */ -void -add_path(RelOptInfo *parent_rel, Path *new_path) -{ - bool accept_new = true; /* unless we find a superior old - * path */ - List *insert_after = NIL; /* where to insert new item */ - List *p1_prev = NIL; - List *p1; - - /* - * Loop to check proposed new path against old paths. Note it is - * possible for more than one old path to be tossed out because - * new_path dominates it. - */ - p1 = parent_rel->pathlist; /* cannot use foreach here */ - while (p1 != NIL) - { - Path *old_path = (Path *) lfirst(p1); - bool remove_old = false; /* unless new proves superior */ - int costcmp; - - costcmp = compare_path_costs(new_path, old_path, TOTAL_COST); - - /* - * If the two paths compare differently for startup and total - * cost, then we want to keep both, and we can skip the (much - * slower) comparison of pathkeys. If they compare the same, - * proceed with the pathkeys comparison. Note: this test relies - * on the fact that compare_path_costs will only return 0 if both - * costs are equal (and, therefore, there's no need to call it - * twice in that case). - */ - if (costcmp == 0 || - costcmp == compare_path_costs(new_path, old_path, - STARTUP_COST)) - { - switch (compare_pathkeys(new_path->pathkeys, old_path->pathkeys)) - { - case PATHKEYS_EQUAL: - if (costcmp < 0) - remove_old = true; /* new dominates old */ - else - accept_new = false; /* old equals or dominates - * new */ - break; - case PATHKEYS_BETTER1: - if (costcmp <= 0) - remove_old = true; /* new dominates old */ - break; - case PATHKEYS_BETTER2: - if (costcmp >= 0) - accept_new = false; /* old dominates new */ - break; - case PATHKEYS_DIFFERENT: - /* keep both paths, since they have different ordering */ - break; - } - } - - /* - * Remove current element from pathlist if dominated by new, - * unless xfunc told us not to remove any paths. - */ - if (remove_old && parent_rel->pruneable) - { - List *p1_next = lnext(p1); - - if (p1_prev) - lnext(p1_prev) = p1_next; - else - parent_rel->pathlist = p1_next; - pfree(old_path); - pfree(p1); /* this is why we can't use foreach */ - p1 = p1_next; - } - else - { - /* new belongs after this old path if it has cost >= old's */ - if (costcmp >= 0) - insert_after = p1; - p1_prev = p1; - p1 = lnext(p1); - } - - /* - * If we found an old path that dominates new_path, we can quit - * scanning the pathlist; we will not add new_path, and we assume - * new_path cannot dominate any other elements of the pathlist. - */ - if (!accept_new) - break; - } - - if (accept_new) - { - /* Accept the new path: insert it at proper place in pathlist */ - if (insert_after) - lnext(insert_after) = lcons(new_path, lnext(insert_after)); - else - parent_rel->pathlist = lcons(new_path, parent_rel->pathlist); - } - else - { - /* Reject and recycle the new path */ - pfree(new_path); - } -} - - -/***************************************************************************** - * PATH NODE CREATION ROUTINES - *****************************************************************************/ - -/* - * create_seqscan_path - * Creates a path corresponding to a sequential scan, returning the - * pathnode. - */ -Path * -create_seqscan_path(Query *root, RelOptInfo *rel) -{ - Path *pathnode = makeNode(Path); - - pathnode->pathtype = T_SeqScan; - pathnode->parent = rel; - pathnode->pathkeys = NIL; /* seqscan has unordered result */ - - cost_seqscan(pathnode, root, rel); - - return pathnode; -} - -/* - * create_index_path - * Creates a path node for an index scan. - * - * 'rel' is the parent rel - * 'index' is an index on 'rel' - * 'restriction_clauses' is a list of RestrictInfo nodes - * to be used as index qual conditions in the scan. - * 'pathkeys' describes the ordering of the path. - * 'indexscandir' is ForwardScanDirection or BackwardScanDirection - * for an ordered index, or NoMovementScanDirection for - * an unordered index. - * - * Returns the new path node. - */ -IndexPath * -create_index_path(Query *root, - RelOptInfo *rel, - IndexOptInfo *index, - List *restriction_clauses, - List *pathkeys, - ScanDirection indexscandir) -{ - IndexPath *pathnode = makeNode(IndexPath); - List *indexquals; - - pathnode->path.pathtype = T_IndexScan; - pathnode->path.parent = rel; - pathnode->path.pathkeys = pathkeys; - - indexquals = get_actual_clauses(restriction_clauses); - /* expand special operators to indexquals the executor can handle */ - indexquals = expand_indexqual_conditions(indexquals); - - /* - * We are making a pathnode for a single-scan indexscan; therefore, - * both indexinfo and indexqual should be single-element lists. - */ - pathnode->indexinfo = makeList1(index); - pathnode->indexqual = makeList1(indexquals); - - pathnode->indexscandir = indexscandir; - - /* - * This routine is only used to generate "standalone" indexpaths, not - * nestloop inner indexpaths. So joinrelids is always NIL and the - * number of rows is the same as the parent rel's estimate. - */ - pathnode->joinrelids = NIL; /* no join clauses here */ - pathnode->alljoinquals = false; - pathnode->rows = rel->rows; - - /* - * Not sure if this is necessary, but it should help if the statistics - * are too far off - */ - if (index->indpred && index->tuples < pathnode->rows) - pathnode->rows = index->tuples; - - cost_index(&pathnode->path, root, rel, index, indexquals, false); - - return pathnode; -} - -/* - * create_tidscan_path - * Creates a path corresponding to a tid_direct scan, returning the - * pathnode. - */ -TidPath * -create_tidscan_path(Query *root, RelOptInfo *rel, List *tideval) -{ - TidPath *pathnode = makeNode(TidPath); - - pathnode->path.pathtype = T_TidScan; - pathnode->path.parent = rel; - pathnode->path.pathkeys = NIL; - pathnode->tideval = copyObject(tideval); /* is copy really - * necessary? */ - pathnode->unjoined_relids = NIL; - - cost_tidscan(&pathnode->path, root, rel, tideval); - - /* - * divide selectivity for each clause to get an equal selectivity as - * IndexScan does OK ? - */ - - return pathnode; -} - -/* - * create_append_path - * Creates a path corresponding to an Append plan, returning the - * pathnode. - * - */ -AppendPath * -create_append_path(RelOptInfo *rel, List *subpaths) -{ - AppendPath *pathnode = makeNode(AppendPath); - List *l; - - pathnode->path.pathtype = T_Append; - pathnode->path.parent = rel; - pathnode->path.pathkeys = NIL; /* result is always considered - * unsorted */ - pathnode->subpaths = subpaths; - - pathnode->path.startup_cost = 0; - pathnode->path.total_cost = 0; - foreach(l, subpaths) - { - Path *subpath = (Path *) lfirst(l); - - if (l == subpaths) /* first node? */ - pathnode->path.startup_cost = subpath->startup_cost; - pathnode->path.total_cost += subpath->total_cost; - } - - return pathnode; -} - -/* - * create_subqueryscan_path - * Creates a path corresponding to a sequential scan of a subquery, - * returning the pathnode. - */ -Path * -create_subqueryscan_path(RelOptInfo *rel) -{ - Path *pathnode = makeNode(Path); - - pathnode->pathtype = T_SubqueryScan; - pathnode->parent = rel; - pathnode->pathkeys = NIL; /* for now, assume unordered result */ - - /* just copy the subplan's cost estimates */ - pathnode->startup_cost = rel->subplan->startup_cost; - pathnode->total_cost = rel->subplan->total_cost; - - return pathnode; -} - -/* - * create_functionscan_path - * Creates a path corresponding to a sequential scan of a function, - * returning the pathnode. - */ -Path * -create_functionscan_path(Query *root, RelOptInfo *rel) -{ - Path *pathnode = makeNode(Path); - - pathnode->pathtype = T_FunctionScan; - pathnode->parent = rel; - pathnode->pathkeys = NIL; /* for now, assume unordered result */ - - cost_functionscan(pathnode, root, rel); - - return pathnode; -} - -/* - * create_nestloop_path - * Creates a pathnode corresponding to a nestloop join between two - * relations. - * - * 'joinrel' is the join relation. - * 'jointype' is the type of join required - * 'outer_path' is the outer path - * 'inner_path' is the inner path - * 'restrict_clauses' are the RestrictInfo nodes to apply at the join - * 'pathkeys' are the path keys of the new join path - * - * Returns the resulting path node. - */ -NestPath * -create_nestloop_path(Query *root, - RelOptInfo *joinrel, - JoinType jointype, - Path *outer_path, - Path *inner_path, - List *restrict_clauses, - List *pathkeys) -{ - NestPath *pathnode = makeNode(NestPath); - - pathnode->path.pathtype = T_NestLoop; - pathnode->path.parent = joinrel; - pathnode->jointype = jointype; - pathnode->outerjoinpath = outer_path; - pathnode->innerjoinpath = inner_path; - pathnode->joinrestrictinfo = restrict_clauses; - pathnode->path.pathkeys = pathkeys; - - cost_nestloop(&pathnode->path, root, outer_path, inner_path, - restrict_clauses); - - return pathnode; -} - -/* - * create_mergejoin_path - * Creates a pathnode corresponding to a mergejoin join between - * two relations - * - * 'joinrel' is the join relation - * 'jointype' is the type of join required - * 'outer_path' is the outer path - * 'inner_path' is the inner path - * 'restrict_clauses' are the RestrictInfo nodes to apply at the join - * 'pathkeys' are the path keys of the new join path - * 'mergeclauses' are the RestrictInfo nodes to use as merge clauses - * (this should be a subset of the restrict_clauses list) - * 'outersortkeys' are the sort varkeys for the outer relation - * 'innersortkeys' are the sort varkeys for the inner relation - */ -MergePath * -create_mergejoin_path(Query *root, - RelOptInfo *joinrel, - JoinType jointype, - Path *outer_path, - Path *inner_path, - List *restrict_clauses, - List *pathkeys, - List *mergeclauses, - List *outersortkeys, - List *innersortkeys) -{ - MergePath *pathnode = makeNode(MergePath); - - /* - * If the given paths are already well enough ordered, we can skip - * doing an explicit sort. - */ - if (outersortkeys && - pathkeys_contained_in(outersortkeys, outer_path->pathkeys)) - outersortkeys = NIL; - if (innersortkeys && - pathkeys_contained_in(innersortkeys, inner_path->pathkeys)) - innersortkeys = NIL; - - pathnode->jpath.path.pathtype = T_MergeJoin; - pathnode->jpath.path.parent = joinrel; - pathnode->jpath.jointype = jointype; - pathnode->jpath.outerjoinpath = outer_path; - pathnode->jpath.innerjoinpath = inner_path; - pathnode->jpath.joinrestrictinfo = restrict_clauses; - pathnode->jpath.path.pathkeys = pathkeys; - pathnode->path_mergeclauses = mergeclauses; - pathnode->outersortkeys = outersortkeys; - pathnode->innersortkeys = innersortkeys; - - cost_mergejoin(&pathnode->jpath.path, - root, - outer_path, - inner_path, - restrict_clauses, - mergeclauses, - outersortkeys, - innersortkeys); - - return pathnode; -} - -/* - * create_hashjoin_path - * Creates a pathnode corresponding to a hash join between two relations. - * - * 'joinrel' is the join relation - * 'jointype' is the type of join required - * 'outer_path' is the cheapest outer path - * 'inner_path' is the cheapest inner path - * 'restrict_clauses' are the RestrictInfo nodes to apply at the join - * 'hashclauses' is a list of the hash join clause (always a 1-element list) - * (this should be a subset of the restrict_clauses list) - */ -HashPath * -create_hashjoin_path(Query *root, - RelOptInfo *joinrel, - JoinType jointype, - Path *outer_path, - Path *inner_path, - List *restrict_clauses, - List *hashclauses) -{ - HashPath *pathnode = makeNode(HashPath); - - pathnode->jpath.path.pathtype = T_HashJoin; - pathnode->jpath.path.parent = joinrel; - pathnode->jpath.jointype = jointype; - pathnode->jpath.outerjoinpath = outer_path; - pathnode->jpath.innerjoinpath = inner_path; - pathnode->jpath.joinrestrictinfo = restrict_clauses; - /* A hashjoin never has pathkeys, since its ordering is unpredictable */ - pathnode->jpath.path.pathkeys = NIL; - pathnode->path_hashclauses = hashclauses; - - cost_hashjoin(&pathnode->jpath.path, - root, - outer_path, - inner_path, - restrict_clauses, - hashclauses); - - return pathnode; -} diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c deleted file mode 100644 index b9a91d18627..00000000000 --- a/src/backend/optimizer/util/plancat.c +++ /dev/null @@ -1,363 +0,0 @@ -/*------------------------------------------------------------------------- - * - * plancat.c - * routines for accessing the system catalogs - * - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.73 2002/06/20 20:29:31 momjian Exp $ - * - *------------------------------------------------------------------------- - */ -#include "postgres.h" - -#include <math.h> - -#include "access/genam.h" -#include "access/heapam.h" -#include "catalog/catname.h" -#include "catalog/pg_amop.h" -#include "catalog/pg_inherits.h" -#include "catalog/pg_index.h" -#include "optimizer/clauses.h" -#include "optimizer/plancat.h" -#include "parser/parsetree.h" -#include "utils/builtins.h" -#include "utils/fmgroids.h" -#include "utils/lsyscache.h" -#include "utils/relcache.h" -#include "utils/syscache.h" -#include "catalog/catalog.h" -#include "miscadmin.h" - - -/* - * get_relation_info - - * Retrieves catalog information for a given relation. - * Given the Oid of the relation, return the following info: - * whether the relation has secondary indices - * number of pages - * number of tuples - */ -void -get_relation_info(Oid relationObjectId, - bool *hasindex, long *pages, double *tuples) -{ - HeapTuple relationTuple; - Form_pg_class relation; - - relationTuple = SearchSysCache(RELOID, - ObjectIdGetDatum(relationObjectId), - 0, 0, 0); - if (!HeapTupleIsValid(relationTuple)) - elog(ERROR, "get_relation_info: Relation %u not found", - relationObjectId); - relation = (Form_pg_class) GETSTRUCT(relationTuple); - - if (IsIgnoringSystemIndexes() && IsSystemClass(relation)) - *hasindex = false; - else - *hasindex = relation->relhasindex; - - *pages = relation->relpages; - *tuples = relation->reltuples; - - ReleaseSysCache(relationTuple); -} - -/* - * find_secondary_indexes - * Creates a list of IndexOptInfo nodes containing information for each - * secondary index defined on the specified relation. - * - * 'relationObjectId' is the OID of the relation for which indices are wanted - * - * Returns a list of new IndexOptInfo nodes. - */ -List * -find_secondary_indexes(Oid relationObjectId) -{ - List *indexinfos = NIL; - List *indexoidlist, - *indexoidscan; - Relation relation; - - /* - * We used to scan pg_index directly, but now the relcache offers a - * cached list of OID indexes for each relation. So, get that list - * and then use the syscache to obtain pg_index entries. - */ - relation = heap_open(relationObjectId, AccessShareLock); - indexoidlist = RelationGetIndexList(relation); - - foreach(indexoidscan, indexoidlist) - { - Oid indexoid = lfirsti(indexoidscan); - Relation indexRelation; - Form_pg_index index; - IndexOptInfo *info; - int i; - int16 amorderstrategy; - - /* Extract info from the relation descriptor for the index */ - indexRelation = index_open(indexoid); - - info = makeNode(IndexOptInfo); - - /* - * Need to make these arrays large enough to be sure there is room - * for a terminating 0 at the end of each one. - */ - info->classlist = (Oid *) palloc(sizeof(Oid) * (INDEX_MAX_KEYS + 1)); - info->indexkeys = (int *) palloc(sizeof(int) * (INDEX_MAX_KEYS + 1)); - info->ordering = (Oid *) palloc(sizeof(Oid) * (INDEX_MAX_KEYS + 1)); - - /* Extract info from the pg_index tuple */ - index = indexRelation->rd_index; - info->indexoid = index->indexrelid; - info->indproc = index->indproc; /* functional index ?? */ - if (VARSIZE(&index->indpred) > VARHDRSZ) /* partial index ?? */ - { - char *predString; - - predString = DatumGetCString(DirectFunctionCall1(textout, - PointerGetDatum(&index->indpred))); - info->indpred = (List *) stringToNode(predString); - pfree(predString); - } - else - info->indpred = NIL; - info->unique = index->indisunique; - - for (i = 0; i < INDEX_MAX_KEYS; i++) - { - if (index->indclass[i] == (Oid) 0) - break; - info->classlist[i] = index->indclass[i]; - } - info->classlist[i] = (Oid) 0; - info->ncolumns = i; - - for (i = 0; i < INDEX_MAX_KEYS; i++) - { - if (index->indkey[i] == 0) - break; - info->indexkeys[i] = index->indkey[i]; - } - info->indexkeys[i] = 0; - info->nkeys = i; - - info->relam = indexRelation->rd_rel->relam; - info->pages = indexRelation->rd_rel->relpages; - info->tuples = indexRelation->rd_rel->reltuples; - info->amcostestimate = index_cost_estimator(indexRelation); - amorderstrategy = indexRelation->rd_am->amorderstrategy; - - /* - * Fetch the ordering operators associated with the index, if any. - */ - MemSet(info->ordering, 0, sizeof(Oid) * (INDEX_MAX_KEYS + 1)); - if (amorderstrategy != 0) - { - int oprindex = amorderstrategy - 1; - - for (i = 0; i < info->ncolumns; i++) - { - info->ordering[i] = indexRelation->rd_operator[oprindex]; - oprindex += indexRelation->rd_am->amstrategies; - } - } - - index_close(indexRelation); - - indexinfos = lcons(info, indexinfos); - } - - freeList(indexoidlist); - - /* XXX keep the lock here? */ - heap_close(relation, AccessShareLock); - - return indexinfos; -} - -/* - * restriction_selectivity - * - * Returns the selectivity of a specified restriction operator clause. - * This code executes registered procedures stored in the - * operator relation, by calling the function manager. - * - * varRelid is either 0 or a rangetable index. See clause_selectivity() - * for details about its meaning. - */ -Selectivity -restriction_selectivity(Query *root, - Oid operator, - List *args, - int varRelid) -{ - RegProcedure oprrest = get_oprrest(operator); - float8 result; - - /* - * if the oprrest procedure is missing for whatever reason, use a - * selectivity of 0.5 - */ - if (!oprrest) - return (Selectivity) 0.5; - - result = DatumGetFloat8(OidFunctionCall4(oprrest, - PointerGetDatum(root), - ObjectIdGetDatum(operator), - PointerGetDatum(args), - Int32GetDatum(varRelid))); - - if (result < 0.0 || result > 1.0) - elog(ERROR, "restriction_selectivity: bad value %f", result); - - return (Selectivity) result; -} - -/* - * join_selectivity - * - * Returns the selectivity of a specified join operator clause. - * This code executes registered procedures stored in the - * operator relation, by calling the function manager. - */ -Selectivity -join_selectivity(Query *root, - Oid operator, - List *args) -{ - RegProcedure oprjoin = get_oprjoin(operator); - float8 result; - - /* - * if the oprjoin procedure is missing for whatever reason, use a - * selectivity of 0.5 - */ - if (!oprjoin) - return (Selectivity) 0.5; - - result = DatumGetFloat8(OidFunctionCall3(oprjoin, - PointerGetDatum(root), - ObjectIdGetDatum(operator), - PointerGetDatum(args))); - - if (result < 0.0 || result > 1.0) - elog(ERROR, "join_selectivity: bad value %f", result); - - return (Selectivity) result; -} - -/* - * find_inheritance_children - * - * Returns an integer list containing the OIDs of all relations which - * inherit *directly* from the relation with OID 'inhparent'. - * - * XXX might be a good idea to create an index on pg_inherits' inhparent - * field, so that we can use an indexscan instead of sequential scan here. - * However, in typical databases pg_inherits won't have enough entries to - * justify an indexscan... - */ -List * -find_inheritance_children(Oid inhparent) -{ - List *list = NIL; - Relation relation; - HeapScanDesc scan; - HeapTuple inheritsTuple; - Oid inhrelid; - ScanKeyData key[1]; - - /* - * Can skip the scan if pg_class shows the relation has never had a - * subclass. - */ - if (!has_subclass(inhparent)) - return NIL; - - ScanKeyEntryInitialize(&key[0], - (bits16) 0x0, - (AttrNumber) Anum_pg_inherits_inhparent, - (RegProcedure) F_OIDEQ, - ObjectIdGetDatum(inhparent)); - relation = heap_openr(InheritsRelationName, AccessShareLock); - scan = heap_beginscan(relation, SnapshotNow, 1, key); - while ((inheritsTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) - { - inhrelid = ((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhrelid; - list = lappendi(list, inhrelid); - } - heap_endscan(scan); - heap_close(relation, AccessShareLock); - return list; -} - -/* - * has_subclass - * - * In the current implementation, has_subclass returns whether a - * particular class *might* have a subclass. It will not return the - * correct result if a class had a subclass which was later dropped. - * This is because relhassubclass in pg_class is not updated when a - * subclass is dropped, primarily because of concurrency concerns. - * - * Currently has_subclass is only used as an efficiency hack to skip - * unnecessary inheritance searches, so this is OK. - */ -bool -has_subclass(Oid relationId) -{ - HeapTuple tuple; - bool result; - - tuple = SearchSysCache(RELOID, - ObjectIdGetDatum(relationId), - 0, 0, 0); - if (!HeapTupleIsValid(tuple)) - elog(ERROR, "has_subclass: Relation %u not found", relationId); - - result = ((Form_pg_class) GETSTRUCT(tuple))->relhassubclass; - ReleaseSysCache(tuple); - return result; -} - -/* - * has_unique_index - * - * Detect whether there is a unique index on the specified attribute - * of the specified relation, thus allowing us to conclude that all - * the (non-null) values of the attribute are distinct. - */ -bool -has_unique_index(RelOptInfo *rel, AttrNumber attno) -{ - List *ilist; - - foreach(ilist, rel->indexlist) - { - IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist); - - /* - * Note: ignore functional and partial indexes, since they don't - * allow us to conclude that all attr values are distinct. Also, a - * multicolumn unique index doesn't allow us to conclude that just - * the specified attr is unique. - */ - if (index->unique && - index->nkeys == 1 && - index->indexkeys[0] == attno && - index->indproc == InvalidOid && - index->indpred == NIL) - return true; - } - return false; -} diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c deleted file mode 100644 index 978d9f0b303..00000000000 --- a/src/backend/optimizer/util/relnode.c +++ /dev/null @@ -1,679 +0,0 @@ -/*------------------------------------------------------------------------- - * - * relnode.c - * Relation-node lookup/construction routines - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.38 2002/06/20 20:29:31 momjian Exp $ - * - *------------------------------------------------------------------------- - */ -#include "postgres.h" - -#include "optimizer/cost.h" -#include "optimizer/joininfo.h" -#include "optimizer/pathnode.h" -#include "optimizer/paths.h" -#include "optimizer/plancat.h" -#include "optimizer/tlist.h" -#include "parser/parsetree.h" - - -static RelOptInfo *make_base_rel(Query *root, int relid); -static List *new_join_tlist(List *tlist, int first_resdomno); -static List *build_joinrel_restrictlist(Query *root, - RelOptInfo *joinrel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel); -static void build_joinrel_joinlist(RelOptInfo *joinrel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel); -static List *subbuild_joinrel_restrictlist(RelOptInfo *joinrel, - List *joininfo_list); -static void subbuild_joinrel_joinlist(RelOptInfo *joinrel, - List *joininfo_list); - - -/* - * build_base_rel - * Construct a new base relation RelOptInfo, and put it in the query's - * base_rel_list. - */ -void -build_base_rel(Query *root, int relid) -{ - List *rels; - RelOptInfo *rel; - - /* Rel should not exist already */ - foreach(rels, root->base_rel_list) - { - rel = (RelOptInfo *) lfirst(rels); - - /* length(rel->relids) == 1 for all members of base_rel_list */ - if (lfirsti(rel->relids) == relid) - elog(ERROR, "build_base_rel: rel already exists"); - } - - /* It should not exist as an "other" rel, either */ - foreach(rels, root->other_rel_list) - { - rel = (RelOptInfo *) lfirst(rels); - - if (lfirsti(rel->relids) == relid) - elog(ERROR, "build_base_rel: rel already exists as 'other' rel"); - } - - /* No existing RelOptInfo for this base rel, so make a new one */ - rel = make_base_rel(root, relid); - - /* and add it to the list */ - root->base_rel_list = lcons(rel, root->base_rel_list); -} - -/* - * build_other_rel - * Returns relation entry corresponding to 'relid', creating a new one - * if necessary. This is for 'other' relations, which are much like - * base relations except that they live in a different list. - */ -RelOptInfo * -build_other_rel(Query *root, int relid) -{ - List *rels; - RelOptInfo *rel; - - /* Already made? */ - foreach(rels, root->other_rel_list) - { - rel = (RelOptInfo *) lfirst(rels); - - /* length(rel->relids) == 1 for all members of other_rel_list */ - if (lfirsti(rel->relids) == relid) - return rel; - } - - /* It should not exist as a base rel */ - foreach(rels, root->base_rel_list) - { - rel = (RelOptInfo *) lfirst(rels); - - if (lfirsti(rel->relids) == relid) - elog(ERROR, "build_other_rel: rel already exists as base rel"); - } - - /* No existing RelOptInfo for this other rel, so make a new one */ - rel = make_base_rel(root, relid); - - /* if it's not a join rel, must be a child rel */ - if (rel->reloptkind == RELOPT_BASEREL) - rel->reloptkind = RELOPT_OTHER_CHILD_REL; - - /* and add it to the list */ - root->other_rel_list = lcons(rel, root->other_rel_list); - - return rel; -} - -/* - * make_base_rel - * Construct a base-relation RelOptInfo for the specified rangetable index. - * - * Common code for build_base_rel and build_other_rel. - */ -static RelOptInfo * -make_base_rel(Query *root, int relid) -{ - RelOptInfo *rel = makeNode(RelOptInfo); - RangeTblEntry *rte = rt_fetch(relid, root->rtable); - - rel->reloptkind = RELOPT_BASEREL; - rel->relids = makeListi1(relid); - rel->rows = 0; - rel->width = 0; - rel->targetlist = NIL; - rel->pathlist = NIL; - rel->cheapest_startup_path = NULL; - rel->cheapest_total_path = NULL; - rel->pruneable = true; - rel->rtekind = rte->rtekind; - rel->indexlist = NIL; - rel->pages = 0; - rel->tuples = 0; - rel->subplan = NULL; - rel->joinrti = 0; - rel->joinrteids = NIL; - rel->baserestrictinfo = NIL; - rel->baserestrictcost = 0; - rel->outerjoinset = NIL; - rel->joininfo = NIL; - rel->innerjoin = NIL; - - /* Check type of rtable entry */ - switch (rte->rtekind) - { - case RTE_RELATION: - { - /* Table --- retrieve statistics from the system catalogs */ - bool indexed; - - get_relation_info(rte->relid, - &indexed, &rel->pages, &rel->tuples); - if (indexed) - rel->indexlist = find_secondary_indexes(rte->relid); - break; - } - case RTE_SUBQUERY: - case RTE_FUNCTION: - /* Subquery or function --- nothing to do here */ - break; - case RTE_JOIN: - /* Join --- must be an otherrel */ - rel->reloptkind = RELOPT_OTHER_JOIN_REL; - break; - default: - elog(ERROR, "make_base_rel: unsupported RTE kind %d", - (int) rte->rtekind); - break; - } - - return rel; -} - -/* - * find_base_rel - * Find a base or other relation entry, which must already exist - * (since we'd have no idea which list to add it to). - */ -RelOptInfo * -find_base_rel(Query *root, int relid) -{ - List *rels; - RelOptInfo *rel; - - foreach(rels, root->base_rel_list) - { - rel = (RelOptInfo *) lfirst(rels); - - /* length(rel->relids) == 1 for all members of base_rel_list */ - if (lfirsti(rel->relids) == relid) - return rel; - } - - foreach(rels, root->other_rel_list) - { - rel = (RelOptInfo *) lfirst(rels); - - if (lfirsti(rel->relids) == relid) - return rel; - } - - elog(ERROR, "find_base_rel: no relation entry for relid %d", relid); - - return NULL; /* keep compiler quiet */ -} - -/* - * find_other_rel - * Find an otherrel entry, if one exists for the given relid. - * Return NULL if no entry. - */ -RelOptInfo * -find_other_rel(Query *root, int relid) -{ - List *rels; - - foreach(rels, root->other_rel_list) - { - RelOptInfo *rel = (RelOptInfo *) lfirst(rels); - - if (lfirsti(rel->relids) == relid) - return rel; - } - return NULL; -} - -/* - * find_other_rel_for_join - * Look for an otherrel for a join RTE matching the given baserel set. - * Return NULL if no entry. - */ -RelOptInfo * -find_other_rel_for_join(Query *root, List *relids) -{ - List *rels; - - foreach(rels, root->other_rel_list) - { - RelOptInfo *rel = (RelOptInfo *) lfirst(rels); - - if (rel->reloptkind == RELOPT_OTHER_JOIN_REL - && sameseti(relids, rel->outerjoinset)) - return rel; - } - return NULL; -} - -/* - * find_join_rel - * Returns relation entry corresponding to 'relids' (a list of RT indexes), - * or NULL if none exists. This is for join relations. - * - * Note: there is probably no good reason for this to be called from - * anywhere except build_join_rel, but keep it as a separate routine - * just in case. - */ -static RelOptInfo * -find_join_rel(Query *root, Relids relids) -{ - List *joinrels; - - foreach(joinrels, root->join_rel_list) - { - RelOptInfo *rel = (RelOptInfo *) lfirst(joinrels); - - if (sameseti(rel->relids, relids)) - return rel; - } - - return NULL; -} - -/* - * build_join_rel - * Returns relation entry corresponding to the union of two given rels, - * creating a new relation entry if none already exists. - * - * 'outer_rel' and 'inner_rel' are relation nodes for the relations to be - * joined - * 'jointype': type of join (inner/outer) - * 'restrictlist_ptr': result variable. If not NULL, *restrictlist_ptr - * receives the list of RestrictInfo nodes that apply to this - * particular pair of joinable relations. - * - * restrictlist_ptr makes the routine's API a little grotty, but it saves - * duplicated calculation of the restrictlist... - */ -RelOptInfo * -build_join_rel(Query *root, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel, - JoinType jointype, - List **restrictlist_ptr) -{ - List *joinrelids; - RelOptInfo *joinrel; - RelOptInfo *joinrterel; - List *restrictlist; - List *new_outer_tlist; - List *new_inner_tlist; - - /* We should never try to join two overlapping sets of rels. */ - Assert(nonoverlap_setsi(outer_rel->relids, inner_rel->relids)); - - /* - * See if we already have a joinrel for this set of base rels. - * - * nconc(listCopy(x), y) is an idiom for making a new list without - * changing either input list. - */ - joinrelids = nconc(listCopy(outer_rel->relids), inner_rel->relids); - joinrel = find_join_rel(root, joinrelids); - - if (joinrel) - { - /* - * Yes, so we only need to figure the restrictlist for this - * particular pair of component relations. - */ - if (restrictlist_ptr) - *restrictlist_ptr = build_joinrel_restrictlist(root, - joinrel, - outer_rel, - inner_rel); - return joinrel; - } - - /* - * Nope, so make one. - */ - joinrel = makeNode(RelOptInfo); - joinrel->reloptkind = RELOPT_JOINREL; - joinrel->relids = joinrelids; - joinrel->rows = 0; - joinrel->width = 0; - joinrel->targetlist = NIL; - joinrel->pathlist = NIL; - joinrel->cheapest_startup_path = NULL; - joinrel->cheapest_total_path = NULL; - joinrel->pruneable = true; - joinrel->rtekind = RTE_JOIN; - joinrel->indexlist = NIL; - joinrel->pages = 0; - joinrel->tuples = 0; - joinrel->subplan = NULL; - joinrel->joinrti = 0; - joinrel->joinrteids = nconc(listCopy(outer_rel->joinrteids), - inner_rel->joinrteids); - joinrel->baserestrictinfo = NIL; - joinrel->baserestrictcost = 0; - joinrel->outerjoinset = NIL; - joinrel->joininfo = NIL; - joinrel->innerjoin = NIL; - - /* Is there a join RTE matching this join? */ - joinrterel = find_other_rel_for_join(root, joinrelids); - if (joinrterel) - { - /* Yes, remember its RT index */ - joinrel->joinrti = lfirsti(joinrterel->relids); - joinrel->joinrteids = lconsi(joinrel->joinrti, joinrel->joinrteids); - } - - /* - * Create a new tlist by removing irrelevant elements from both tlists - * of the outer and inner join relations and then merging the results - * together. - * - * XXX right now we don't remove any irrelevant elements, we just - * append the two tlists together. Someday consider pruning vars from the - * join's targetlist if they are needed only to evaluate restriction - * clauses of this join, and will never be accessed at higher levels of - * the plantree. - * - * NOTE: the tlist order for a join rel will depend on which pair of - * outer and inner rels we first try to build it from. But the - * contents should be the same regardless. - */ - new_outer_tlist = new_join_tlist(outer_rel->targetlist, 1); - new_inner_tlist = new_join_tlist(inner_rel->targetlist, - length(new_outer_tlist) + 1); - joinrel->targetlist = nconc(new_outer_tlist, new_inner_tlist); - - /* - * If there are any alias variables attached to the matching join RTE, - * attach them to the tlist too, so that they will be evaluated for use - * at higher plan levels. - */ - if (joinrterel) - { - List *jrtetl; - - foreach(jrtetl, joinrterel->targetlist) - { - TargetEntry *jrtete = lfirst(jrtetl); - - add_var_to_tlist(joinrel, (Var *) jrtete->expr); - } - } - - /* - * Construct restrict and join clause lists for the new joinrel. (The - * caller might or might not need the restrictlist, but I need it - * anyway for set_joinrel_size_estimates().) - */ - restrictlist = build_joinrel_restrictlist(root, - joinrel, - outer_rel, - inner_rel); - if (restrictlist_ptr) - *restrictlist_ptr = restrictlist; - build_joinrel_joinlist(joinrel, outer_rel, inner_rel); - - /* - * Set estimates of the joinrel's size. - */ - set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel, - jointype, restrictlist); - - /* - * Add the joinrel to the query's joinrel list. - */ - root->join_rel_list = lcons(joinrel, root->join_rel_list); - - return joinrel; -} - -/* - * new_join_tlist - * Builds a join relation's target list by keeping those elements that - * will be in the final target list and any other elements that are still - * needed for future joins. For a target list entry to still be needed - * for future joins, its 'joinlist' field must not be empty after removal - * of all relids in 'other_relids'. - * - * XXX the above comment refers to code that is long dead and gone; - * we don't keep track of joinlists for individual targetlist entries - * anymore. For now, all vars present in either input tlist will be - * emitted in the join's tlist. - * - * 'tlist' is the target list of one of the join relations - * 'first_resdomno' is the resdom number to use for the first created - * target list entry - * - * Returns the new target list. - */ -static List * -new_join_tlist(List *tlist, - int first_resdomno) -{ - int resdomno = first_resdomno - 1; - List *t_list = NIL; - List *i; - - foreach(i, tlist) - { - TargetEntry *xtl = lfirst(i); - - resdomno += 1; - t_list = lappend(t_list, - create_tl_element(get_expr(xtl), resdomno)); - } - - return t_list; -} - -/* - * build_joinrel_restrictlist - * build_joinrel_joinlist - * These routines build lists of restriction and join clauses for a - * join relation from the joininfo lists of the relations it joins. - * - * These routines are separate because the restriction list must be - * built afresh for each pair of input sub-relations we consider, whereas - * the join lists need only be computed once for any join RelOptInfo. - * The join lists are fully determined by the set of rels making up the - * joinrel, so we should get the same results (up to ordering) from any - * candidate pair of sub-relations. But the restriction list is whatever - * is not handled in the sub-relations, so it depends on which - * sub-relations are considered. - * - * If a join clause from an input relation refers to base rels still not - * present in the joinrel, then it is still a join clause for the joinrel; - * we put it into an appropriate JoinInfo list for the joinrel. Otherwise, - * the clause is now a restrict clause for the joined relation, and we - * return it to the caller of build_joinrel_restrictlist() to be stored in - * join paths made from this pair of sub-relations. (It will not need to - * be considered further up the join tree.) - * - * When building a restriction list, we eliminate redundant clauses. - * We don't try to do that for join clause lists, since the join clauses - * aren't really doing anything, just waiting to become part of higher - * levels' restriction lists. - * - * 'joinrel' is a join relation node - * 'outer_rel' and 'inner_rel' are a pair of relations that can be joined - * to form joinrel. - * - * build_joinrel_restrictlist() returns a list of relevant restrictinfos, - * whereas build_joinrel_joinlist() stores its results in the joinrel's - * joininfo lists. One or the other must accept each given clause! - * - * NB: Formerly, we made deep(!) copies of each input RestrictInfo to pass - * up to the join relation. I believe this is no longer necessary, because - * RestrictInfo nodes are no longer context-dependent. Instead, just include - * the original nodes in the lists made for the join relation. - */ -static List * -build_joinrel_restrictlist(Query *root, - RelOptInfo *joinrel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel) -{ - List *result = NIL; - List *rlist; - List *item; - - /* - * Collect all the clauses that syntactically belong at this level. - */ - rlist = nconc(subbuild_joinrel_restrictlist(joinrel, - outer_rel->joininfo), - subbuild_joinrel_restrictlist(joinrel, - inner_rel->joininfo)); - - /* - * Eliminate duplicate and redundant clauses. - * - * We must eliminate duplicates, since we will see many of the same - * clauses arriving from both input relations. Also, if a clause is a - * mergejoinable clause, it's possible that it is redundant with - * previous clauses (see optimizer/README for discussion). We detect - * that case and omit the redundant clause from the result list. - * - * We can detect redundant mergejoinable clauses very cheaply by using - * their left and right pathkeys, which uniquely identify the sets of - * equijoined variables in question. All the members of a pathkey set - * that are in the left relation have already been forced to be equal; - * likewise for those in the right relation. So, we need to have only - * one clause that checks equality between any set member on the left - * and any member on the right; by transitivity, all the rest are then - * equal. - */ - foreach(item, rlist) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(item); - - /* eliminate duplicates */ - if (member(rinfo, result)) - continue; - - /* check for redundant merge clauses */ - if (rinfo->mergejoinoperator != InvalidOid) - { - bool redundant = false; - List *olditem; - - cache_mergeclause_pathkeys(root, rinfo); - - foreach(olditem, result) - { - RestrictInfo *oldrinfo = (RestrictInfo *) lfirst(olditem); - - if (oldrinfo->mergejoinoperator != InvalidOid && - rinfo->left_pathkey == oldrinfo->left_pathkey && - rinfo->right_pathkey == oldrinfo->right_pathkey) - { - redundant = true; - break; - } - } - - if (redundant) - continue; - } - - /* otherwise, add it to result list */ - result = lappend(result, rinfo); - } - - freeList(rlist); - - return result; -} - -static void -build_joinrel_joinlist(RelOptInfo *joinrel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel) -{ - subbuild_joinrel_joinlist(joinrel, outer_rel->joininfo); - subbuild_joinrel_joinlist(joinrel, inner_rel->joininfo); -} - -static List * -subbuild_joinrel_restrictlist(RelOptInfo *joinrel, - List *joininfo_list) -{ - List *restrictlist = NIL; - List *xjoininfo; - - foreach(xjoininfo, joininfo_list) - { - JoinInfo *joininfo = (JoinInfo *) lfirst(xjoininfo); - - if (is_subseti(joininfo->unjoined_relids, joinrel->relids)) - { - /* - * Clauses in this JoinInfo list become restriction clauses - * for the joinrel, since they refer to no outside rels. - * - * We must copy the list to avoid disturbing the input relation, - * but we can use a shallow copy. - */ - restrictlist = nconc(restrictlist, - listCopy(joininfo->jinfo_restrictinfo)); - } - else - { - /* - * These clauses are still join clauses at this level, so we - * ignore them in this routine. - */ - } - } - - return restrictlist; -} - -static void -subbuild_joinrel_joinlist(RelOptInfo *joinrel, - List *joininfo_list) -{ - List *xjoininfo; - - foreach(xjoininfo, joininfo_list) - { - JoinInfo *joininfo = (JoinInfo *) lfirst(xjoininfo); - Relids new_unjoined_relids; - - new_unjoined_relids = set_differencei(joininfo->unjoined_relids, - joinrel->relids); - if (new_unjoined_relids == NIL) - { - /* - * Clauses in this JoinInfo list become restriction clauses - * for the joinrel, since they refer to no outside rels. So we - * can ignore them in this routine. - */ - } - else - { - /* - * These clauses are still join clauses at this level, so find - * or make the appropriate JoinInfo item for the joinrel, and - * add the clauses to it (eliminating duplicates). - */ - JoinInfo *new_joininfo; - - new_joininfo = find_joininfo_node(joinrel, new_unjoined_relids); - new_joininfo->jinfo_restrictinfo = - set_union(new_joininfo->jinfo_restrictinfo, - joininfo->jinfo_restrictinfo); - } - } -} diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c deleted file mode 100644 index c9f1e75e232..00000000000 --- a/src/backend/optimizer/util/restrictinfo.c +++ /dev/null @@ -1,82 +0,0 @@ -/*------------------------------------------------------------------------- - * - * restrictinfo.c - * RestrictInfo node manipulation routines. - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.14 2002/06/20 20:29:31 momjian Exp $ - * - *------------------------------------------------------------------------- - */ -#include "postgres.h" - - -#include "optimizer/clauses.h" -#include "optimizer/restrictinfo.h" - -/* - * restriction_is_or_clause - * - * Returns t iff the restrictinfo node contains an 'or' clause. - * - */ -bool -restriction_is_or_clause(RestrictInfo *restrictinfo) -{ - if (restrictinfo != NULL && - or_clause((Node *) restrictinfo->clause)) - return true; - else - return false; -} - -/* - * get_actual_clauses - * - * Returns a list containing the clauses from 'restrictinfo_list'. - * - */ -List * -get_actual_clauses(List *restrictinfo_list) -{ - List *result = NIL; - List *temp; - - foreach(temp, restrictinfo_list) - { - RestrictInfo *clause = (RestrictInfo *) lfirst(temp); - - result = lappend(result, clause->clause); - } - return result; -} - -/* - * get_actual_join_clauses - * - * Extract clauses from 'restrictinfo_list', separating those that - * syntactically match the join level from those that were pushed down. - */ -void -get_actual_join_clauses(List *restrictinfo_list, - List **joinquals, List **otherquals) -{ - List *temp; - - *joinquals = NIL; - *otherquals = NIL; - - foreach(temp, restrictinfo_list) - { - RestrictInfo *clause = (RestrictInfo *) lfirst(temp); - - if (clause->ispusheddown) - *otherquals = lappend(*otherquals, clause->clause); - else - *joinquals = lappend(*joinquals, clause->clause); - } -} diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c deleted file mode 100644 index fa8c89862f4..00000000000 --- a/src/backend/optimizer/util/tlist.c +++ /dev/null @@ -1,257 +0,0 @@ -/*------------------------------------------------------------------------- - * - * tlist.c - * Target list manipulation routines - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/tlist.c,v 1.52 2002/06/20 20:29:31 momjian Exp $ - * - *------------------------------------------------------------------------- - */ -#include "postgres.h" - -#include "nodes/makefuncs.h" -#include "optimizer/tlist.h" -#include "optimizer/var.h" - - -/***************************************************************************** - * ---------- RELATION node target list routines ---------- - *****************************************************************************/ - -/* - * tlistentry_member - * Finds the (first) member of the given tlist whose expression is - * equal() to the given expression. Result is NULL if no such member. - */ -TargetEntry * -tlistentry_member(Node *node, List *targetlist) -{ - List *temp; - - foreach(temp, targetlist) - { - TargetEntry *tlentry = (TargetEntry *) lfirst(temp); - - if (equal(node, tlentry->expr)) - return tlentry; - } - return NULL; -} - -#ifdef NOT_USED -/* - * matching_tlist_expr - * Same as tlistentry_member(), except returns the tlist expression - * rather than its parent TargetEntry node. - */ -Node * -matching_tlist_expr(Node *node, List *targetlist) -{ - TargetEntry *tlentry; - - tlentry = tlistentry_member(node, targetlist); - if (tlentry) - return tlentry->expr; - - return (Node *) NULL; -} -#endif - -/* - * tlist_member - * Same as tlistentry_member(), except returns the Resdom node - * rather than its parent TargetEntry node. - */ -Resdom * -tlist_member(Node *node, List *targetlist) -{ - TargetEntry *tlentry; - - tlentry = tlistentry_member(node, targetlist); - if (tlentry) - return tlentry->resdom; - - return (Resdom *) NULL; -} - -/* - * add_var_to_tlist - * Creates a targetlist entry corresponding to the supplied var node - * 'var' and adds the new targetlist entry to the targetlist field of - * 'rel'. No entry is created if 'var' is already in the tlist. - */ -void -add_var_to_tlist(RelOptInfo *rel, Var *var) -{ - if (!tlistentry_member((Node *) var, rel->targetlist)) - { - /* XXX is copyObject necessary here? */ - rel->targetlist = lappend(rel->targetlist, - create_tl_element((Var *) copyObject(var), - length(rel->targetlist) + 1)); - } -} - -/* - * create_tl_element - * Creates a target list entry node and its associated (resdom var) pair - * with its resdom number equal to 'resdomno'. - */ -TargetEntry * -create_tl_element(Var *var, int resdomno) -{ - return makeTargetEntry(makeResdom(resdomno, - var->vartype, - var->vartypmod, - NULL, - false), - (Node *) var); -} - -/***************************************************************************** - * ---------- GENERAL target list routines ---------- - *****************************************************************************/ - -/* - * new_unsorted_tlist - * Creates a copy of a target list by creating new resdom nodes - * without sort information. - * - * 'targetlist' is the target list to be copied. - * - * Returns the resulting target list. - * - */ -List * -new_unsorted_tlist(List *targetlist) -{ - List *new_targetlist = (List *) copyObject((Node *) targetlist); - List *x; - - foreach(x, new_targetlist) - { - TargetEntry *tle = (TargetEntry *) lfirst(x); - - tle->resdom->reskey = 0; - tle->resdom->reskeyop = (Oid) 0; - } - return new_targetlist; -} - -/* - * flatten_tlist - * Create a target list that only contains unique variables. - * - * Note that Vars with varlevelsup > 0 are not included in the output - * tlist. We expect that those will eventually be replaced with Params, - * but that probably has not happened at the time this routine is called. - * - * 'tlist' is the current target list - * - * Returns the "flattened" new target list. - * - * The result is entirely new structure sharing no nodes with the original. - * Copying the Var nodes is probably overkill, but be safe for now. - */ -List * -flatten_tlist(List *tlist) -{ - List *vlist = pull_var_clause((Node *) tlist, false); - List *new_tlist; - - new_tlist = add_to_flat_tlist(NIL, vlist); - freeList(vlist); - return new_tlist; -} - -/* - * add_to_flat_tlist - * Add more vars to a flattened tlist (if they're not already in it) - * - * 'tlist' is the flattened tlist - * 'vars' is a list of var nodes - * - * Returns the extended tlist. - */ -List * -add_to_flat_tlist(List *tlist, List *vars) -{ - int next_resdomno = length(tlist) + 1; - List *v; - - foreach(v, vars) - { - Var *var = lfirst(v); - - if (!tlistentry_member((Node *) var, tlist)) - { - Resdom *r; - - r = makeResdom(next_resdomno++, - var->vartype, - var->vartypmod, - NULL, - false); - tlist = lappend(tlist, - makeTargetEntry(r, copyObject(var))); - } - } - return tlist; -} - -Var * -get_expr(TargetEntry *tle) -{ - Assert(tle != NULL); - Assert(tle->expr != NULL); - - return (Var *) tle->expr; -} - -/* - * get_sortgroupclause_tle - * Find the targetlist entry matching the given SortClause - * (or GroupClause) by ressortgroupref, and return it. - * - * Because GroupClause is typedef'd as SortClause, either kind of - * node can be passed without casting. - */ -TargetEntry * -get_sortgroupclause_tle(SortClause *sortClause, - List *targetList) -{ - Index refnumber = sortClause->tleSortGroupRef; - List *l; - - foreach(l, targetList) - { - TargetEntry *tle = (TargetEntry *) lfirst(l); - - if (tle->resdom->ressortgroupref == refnumber) - return tle; - } - - elog(ERROR, "get_sortgroupclause_tle: ORDER/GROUP BY expression not found in targetlist"); - return NULL; /* keep compiler quiet */ -} - -/* - * get_sortgroupclause_expr - * Find the targetlist entry matching the given SortClause - * (or GroupClause) by ressortgroupref, and return its expression. - * - * Because GroupClause is typedef'd as SortClause, either kind of - * node can be passed without casting. - */ -Node * -get_sortgroupclause_expr(SortClause *sortClause, List *targetList) -{ - TargetEntry *tle = get_sortgroupclause_tle(sortClause, targetList); - - return tle->expr; -} diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c deleted file mode 100644 index 776636ff595..00000000000 --- a/src/backend/optimizer/util/var.c +++ /dev/null @@ -1,370 +0,0 @@ -/*------------------------------------------------------------------------- - * - * var.c - * Var node manipulation routines - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/var.c,v 1.38 2002/06/20 20:29:31 momjian Exp $ - * - *------------------------------------------------------------------------- - */ -#include "postgres.h" - -#include "nodes/plannodes.h" -#include "optimizer/clauses.h" -#include "optimizer/var.h" -#include "parser/parsetree.h" - - -typedef struct -{ - List *varlist; - int sublevels_up; -} pull_varnos_context; - -typedef struct -{ - int varno; - int varattno; - int sublevels_up; -} contain_var_reference_context; - -typedef struct -{ - List *varlist; - bool includeUpperVars; -} pull_var_clause_context; - -typedef struct -{ - List *rtable; - bool force; -} flatten_join_alias_vars_context; - -static bool pull_varnos_walker(Node *node, - pull_varnos_context *context); -static bool contain_var_reference_walker(Node *node, - contain_var_reference_context *context); -static bool contain_var_clause_walker(Node *node, void *context); -static bool pull_var_clause_walker(Node *node, - pull_var_clause_context *context); -static Node *flatten_join_alias_vars_mutator(Node *node, - flatten_join_alias_vars_context *context); - - -/* - * pull_varnos - * - * Create a list of all the distinct varnos present in a parsetree. - * Only varnos that reference level-zero rtable entries are considered. - * - * NOTE: this is used on not-yet-planned expressions. It may therefore find - * bare SubLinks, and if so it needs to recurse into them to look for uplevel - * references to the desired rtable level! But when we find a completed - * SubPlan, we only need to look at the parameters passed to the subplan. - */ -List * -pull_varnos(Node *node) -{ - pull_varnos_context context; - - context.varlist = NIL; - context.sublevels_up = 0; - - /* - * Must be prepared to start with a Query or a bare expression tree; - * if it's a Query, go straight to query_tree_walker to make sure that - * sublevels_up doesn't get incremented prematurely. - */ - if (node && IsA(node, Query)) - query_tree_walker((Query *) node, pull_varnos_walker, - (void *) &context, true); - else - pull_varnos_walker(node, &context); - - return context.varlist; -} - -static bool -pull_varnos_walker(Node *node, pull_varnos_context *context) -{ - if (node == NULL) - return false; - if (IsA(node, Var)) - { - Var *var = (Var *) node; - - if (var->varlevelsup == context->sublevels_up && - !intMember(var->varno, context->varlist)) - context->varlist = lconsi(var->varno, context->varlist); - return false; - } - if (is_subplan(node)) - { - /* - * Already-planned subquery. Examine the args list (parameters to - * be passed to subquery), as well as the "oper" list which is - * executed by the outer query. But short-circuit recursion into - * the subquery itself, which would be a waste of effort. - */ - Expr *expr = (Expr *) node; - - if (pull_varnos_walker((Node *) ((SubPlan *) expr->oper)->sublink->oper, - context)) - return true; - if (pull_varnos_walker((Node *) expr->args, - context)) - return true; - return false; - } - if (IsA(node, Query)) - { - /* Recurse into RTE subquery or not-yet-planned sublink subquery */ - bool result; - - context->sublevels_up++; - result = query_tree_walker((Query *) node, pull_varnos_walker, - (void *) context, true); - context->sublevels_up--; - return result; - } - return expression_tree_walker(node, pull_varnos_walker, - (void *) context); -} - - -/* - * contain_var_reference - * - * Detect whether a parsetree contains any references to a specified - * attribute of a specified rtable entry. - * - * NOTE: this is used on not-yet-planned expressions. It may therefore find - * bare SubLinks, and if so it needs to recurse into them to look for uplevel - * references to the desired rtable entry! But when we find a completed - * SubPlan, we only need to look at the parameters passed to the subplan. - */ -bool -contain_var_reference(Node *node, int varno, int varattno, int levelsup) -{ - contain_var_reference_context context; - - context.varno = varno; - context.varattno = varattno; - context.sublevels_up = levelsup; - - /* - * Must be prepared to start with a Query or a bare expression tree; - * if it's a Query, go straight to query_tree_walker to make sure that - * sublevels_up doesn't get incremented prematurely. - */ - if (node && IsA(node, Query)) - return query_tree_walker((Query *) node, - contain_var_reference_walker, - (void *) &context, true); - else - return contain_var_reference_walker(node, &context); -} - -static bool -contain_var_reference_walker(Node *node, - contain_var_reference_context *context) -{ - if (node == NULL) - return false; - if (IsA(node, Var)) - { - Var *var = (Var *) node; - - if (var->varno == context->varno && - var->varattno == context->varattno && - var->varlevelsup == context->sublevels_up) - return true; - return false; - } - if (is_subplan(node)) - { - /* - * Already-planned subquery. Examine the args list (parameters to - * be passed to subquery), as well as the "oper" list which is - * executed by the outer query. But short-circuit recursion into - * the subquery itself, which would be a waste of effort. - */ - Expr *expr = (Expr *) node; - - if (contain_var_reference_walker((Node *) ((SubPlan *) expr->oper)->sublink->oper, - context)) - return true; - if (contain_var_reference_walker((Node *) expr->args, - context)) - return true; - return false; - } - if (IsA(node, Query)) - { - /* Recurse into RTE subquery or not-yet-planned sublink subquery */ - bool result; - - context->sublevels_up++; - result = query_tree_walker((Query *) node, - contain_var_reference_walker, - (void *) context, true); - context->sublevels_up--; - return result; - } - return expression_tree_walker(node, contain_var_reference_walker, - (void *) context); -} - - -/* - * contain_whole_tuple_var - * - * Detect whether a parsetree contains any references to the whole - * tuple of a given rtable entry (ie, a Var with varattno = 0). - */ -bool -contain_whole_tuple_var(Node *node, int varno, int levelsup) -{ - return contain_var_reference(node, varno, InvalidAttrNumber, levelsup); -} - - -/* - * contain_var_clause - * Recursively scan a clause to discover whether it contains any Var nodes - * (of the current query level). - * - * Returns true if any varnode found. - * - * Does not examine subqueries, therefore must only be used after reduction - * of sublinks to subplans! - */ -bool -contain_var_clause(Node *node) -{ - return contain_var_clause_walker(node, NULL); -} - -static bool -contain_var_clause_walker(Node *node, void *context) -{ - if (node == NULL) - return false; - if (IsA(node, Var)) - { - if (((Var *) node)->varlevelsup == 0) - return true; /* abort the tree traversal and return - * true */ - return false; - } - return expression_tree_walker(node, contain_var_clause_walker, context); -} - - -/* - * pull_var_clause - * Recursively pulls all var nodes from an expression clause. - * - * Upper-level vars (with varlevelsup > 0) are included only - * if includeUpperVars is true. Most callers probably want - * to ignore upper-level vars. - * - * Returns list of varnodes found. Note the varnodes themselves are not - * copied, only referenced. - * - * Does not examine subqueries, therefore must only be used after reduction - * of sublinks to subplans! - */ -List * -pull_var_clause(Node *node, bool includeUpperVars) -{ - pull_var_clause_context context; - - context.varlist = NIL; - context.includeUpperVars = includeUpperVars; - - pull_var_clause_walker(node, &context); - return context.varlist; -} - -static bool -pull_var_clause_walker(Node *node, pull_var_clause_context *context) -{ - if (node == NULL) - return false; - if (IsA(node, Var)) - { - if (((Var *) node)->varlevelsup == 0 || context->includeUpperVars) - context->varlist = lappend(context->varlist, node); - return false; - } - return expression_tree_walker(node, pull_var_clause_walker, - (void *) context); -} - - -/* - * flatten_join_alias_vars - * Replace Vars that reference JOIN outputs with references to the original - * relation variables instead. This allows quals involving such vars to be - * pushed down. - * - * If force is TRUE then we will reduce all JOIN alias Vars to non-alias Vars - * or expressions thereof (there may be COALESCE and/or type conversions - * involved). If force is FALSE we will not expand a Var to a non-Var - * expression. This is a hack to avoid confusing mergejoin planning, which - * currently cannot cope with non-Var join items --- we leave the join vars - * as Vars till after planning is done, then expand them during setrefs.c. - * - * Upper-level vars (with varlevelsup > 0) are ignored; normally there - * should not be any by the time this routine is called. - * - * Does not examine subqueries, therefore must only be used after reduction - * of sublinks to subplans! - */ -Node * -flatten_join_alias_vars(Node *node, List *rtable, bool force) -{ - flatten_join_alias_vars_context context; - - context.rtable = rtable; - context.force = force; - - return flatten_join_alias_vars_mutator(node, &context); -} - -static Node * -flatten_join_alias_vars_mutator(Node *node, - flatten_join_alias_vars_context *context) -{ - if (node == NULL) - return NULL; - if (IsA(node, Var)) - { - Var *var = (Var *) node; - RangeTblEntry *rte; - Node *newvar; - - if (var->varlevelsup != 0) - return node; /* no need to copy, really */ - rte = rt_fetch(var->varno, context->rtable); - if (rte->rtekind != RTE_JOIN) - return node; - Assert(var->varattno > 0); - newvar = (Node *) nth(var->varattno - 1, rte->joinaliasvars); - if (IsA(newvar, Var) || context->force) - { - /* expand it; recurse in case join input is itself a join */ - return flatten_join_alias_vars_mutator(newvar, context); - } - /* we don't want to force expansion of this alias Var */ - return node; - } - return expression_tree_mutator(node, flatten_join_alias_vars_mutator, - (void *) context); -} |