diff options
| author | Tom Lane <tgl@sss.pgh.pa.us> | 2008-08-14 18:48:00 +0000 |
|---|---|---|
| committer | Tom Lane <tgl@sss.pgh.pa.us> | 2008-08-14 18:48:00 +0000 |
| commit | e006a24ad152b3faec748afe8c1ff0829699b2e6 (patch) | |
| tree | d00e01d25270b4b04aac3c723b9e440a56d8a085 /src/include/nodes/relation.h | |
| parent | ef1c807c25b47960aa86cd185fb74371e88d0cbf (diff) | |
Implement SEMI and ANTI joins in the planner and executor. (Semijoins replace
the old JOIN_IN code, but antijoins are new functionality.) Teach the planner
to convert appropriate EXISTS and NOT EXISTS subqueries into semi and anti
joins respectively. Also, LEFT JOINs with suitable upper-level IS NULL
filters are recognized as being anti joins. Unify the InClauseInfo and
OuterJoinInfo infrastructure into "SpecialJoinInfo". With that change,
it becomes possible to associate a SpecialJoinInfo with every join attempt,
which permits some cleanup of join selectivity estimation. That needs to be
taken much further than this patch does, but the next step is to change the
API for oprjoin selectivity functions, which seems like material for a
separate patch. So for the moment the output size estimates for semi and
especially anti joins are quite bogus.
Diffstat (limited to 'src/include/nodes/relation.h')
| -rw-r--r-- | src/include/nodes/relation.h | 108 |
1 files changed, 63 insertions, 45 deletions
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index f8c23071661..8fb6861e50c 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.157 2008/08/05 02:43:17 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.158 2008/08/14 18:48:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -153,9 +153,7 @@ typedef struct PlannerInfo List *full_join_clauses; /* list of RestrictInfos for * mergejoinable full join clauses */ - List *oj_info_list; /* list of OuterJoinInfos */ - - List *in_info_list; /* list of InClauseInfos */ + List *join_info_list; /* list of SpecialJoinInfos */ List *append_rel_list; /* list of AppendRelInfos */ @@ -175,7 +173,6 @@ typedef struct PlannerInfo double tuple_fraction; /* tuple_fraction passed to query_planner */ bool hasJoinRTEs; /* true if any RTEs are RTE_JOIN kind */ - bool hasOuterJoins; /* true if any RTEs are outer joins */ bool hasHavingQual; /* true if havingQual was non-null */ bool hasPseudoConstantQuals; /* true if any RestrictInfo has * pseudoconstant = true */ @@ -756,6 +753,8 @@ typedef struct UniquePath Path path; Path *subpath; UniquePathMethod umethod; + List *in_operators; /* equality operators of the IN clause */ + List *uniq_exprs; /* expressions to be made unique */ double rows; /* estimated number of result tuples */ } UniquePath; @@ -1053,18 +1052,49 @@ typedef struct InnerIndexscanInfo } InnerIndexscanInfo; /* - * Outer join info. + * "Flattened SubLinks" + * + * When we pull an IN or EXISTS SubLink up into the parent query, the + * join conditions extracted from the IN/EXISTS clause need to be specially + * treated in distribute_qual_to_rels processing. We handle this by + * wrapping such expressions in a FlattenedSubLink node that identifies + * the join they come from. The FlattenedSubLink node is discarded after + * distribute_qual_to_rels, having served its purpose. + * + * Although the planner treats this as an expression node type, it is not + * recognized by the parser or executor, so we declare it here rather than + * in primnodes.h. + */ + +typedef struct FlattenedSubLink +{ + Expr xpr; + JoinType jointype; /* must be JOIN_SEMI or JOIN_ANTI */ + Relids lefthand; /* base relids treated as syntactic LHS */ + Relids righthand; /* base relids syntactically within RHS */ + Expr *quals; /* join quals (in explicit-AND format) */ +} FlattenedSubLink; + +/* + * "Special join" info. * * One-sided outer joins constrain the order of joining partially but not * completely. We flatten such joins into the planner's top-level list of - * relations to join, but record information about each outer join in an - * OuterJoinInfo struct. These structs are kept in the PlannerInfo node's - * oj_info_list. + * relations to join, but record information about each outer join in a + * SpecialJoinInfo struct. These structs are kept in the PlannerInfo node's + * join_info_list. + * + * Similarly, semijoins and antijoins created by flattening IN (subselect) + * and EXISTS(subselect) clauses create partial constraints on join order. + * These are likewise recorded in SpecialJoinInfo structs. + * + * We make SpecialJoinInfos for FULL JOINs even though there is no flexibility + * of planning for them, because this simplifies make_join_rel()'s API. * * min_lefthand and min_righthand are the sets of base relids that must be - * available on each side when performing the outer join. lhs_strict is - * true if the outer join's condition cannot succeed when the LHS variables - * are all NULL (this means that the outer join can commute with upper-level + * available on each side when performing the special join. lhs_strict is + * true if the special join's condition cannot succeed when the LHS variables + * are all NULL (this means that an outer join can commute with upper-level * outer joins even if it appears in their RHS). We don't bother to set * lhs_strict for FULL JOINs, however. * @@ -1072,9 +1102,8 @@ typedef struct InnerIndexscanInfo * if they were, this would break the logic that enforces join order. * * syn_lefthand and syn_righthand are the sets of base relids that are - * syntactically below this outer join. (These are needed to help compute - * min_lefthand and min_righthand for higher joins, but are not used - * thereafter.) + * syntactically below this special join. (These are needed to help compute + * min_lefthand and min_righthand for higher joins.) * * delay_upper_joins is set TRUE if we detect a pushed-down clause that has * to be evaluated after this join is formed (because it references the RHS). @@ -1082,46 +1111,35 @@ typedef struct InnerIndexscanInfo * commute with this join, because that would leave noplace to check the * pushed-down clause. (We don't track this for FULL JOINs, either.) * - * Note: OuterJoinInfo directly represents only LEFT JOIN and FULL JOIN; - * RIGHT JOIN is handled by switching the inputs to make it a LEFT JOIN. - * We make an OuterJoinInfo for FULL JOINs even though there is no flexibility - * of planning for them, because this simplifies make_join_rel()'s API. + * join_quals is an implicit-AND list of the quals syntactically associated + * with the join (they may or may not end up being applied at the join level). + * This is just a side list and does not drive actual application of quals. + * For JOIN_SEMI joins, this is cleared to NIL in create_unique_path() if + * the join is found not to be suitable for a uniqueify-the-RHS plan. + * + * jointype is never JOIN_RIGHT; a RIGHT JOIN is handled by switching + * the inputs to make it a LEFT JOIN. So the allowed values of jointype + * in a join_info_list member are only LEFT, FULL, SEMI, or ANTI. + * + * For purposes of join selectivity estimation, we create transient + * SpecialJoinInfo structures for regular inner joins; so it is possible + * to have jointype == JOIN_INNER in such a structure, even though this is + * not allowed within join_info_list. Note that lhs_strict, delay_upper_joins, + * and join_quals are not set meaningfully for such structs. */ -typedef struct OuterJoinInfo +typedef struct SpecialJoinInfo { NodeTag type; Relids min_lefthand; /* base relids in minimum LHS for join */ Relids min_righthand; /* base relids in minimum RHS for join */ Relids syn_lefthand; /* base relids syntactically within LHS */ Relids syn_righthand; /* base relids syntactically within RHS */ - bool is_full_join; /* it's a FULL OUTER JOIN */ + JoinType jointype; /* always INNER, LEFT, FULL, SEMI, or ANTI */ bool lhs_strict; /* joinclause is strict for some LHS rel */ bool delay_upper_joins; /* can't commute with upper RHS */ -} OuterJoinInfo; - -/* - * IN clause info. - * - * When we convert top-level IN quals into join operations, we must restrict - * the order of joining and use special join methods at some join points. - * We record information about each such IN clause in an InClauseInfo struct. - * These structs are kept in the PlannerInfo node's in_info_list. - * - * Note: sub_targetlist is a bit misnamed; it is a list of the expressions - * on the RHS of the IN's join clauses. (This normally starts out as a list - * of Vars referencing the subquery outputs, but can get mutated if the - * subquery is flattened into the main query.) - */ - -typedef struct InClauseInfo -{ - NodeTag type; - Relids lefthand; /* base relids in lefthand expressions */ - Relids righthand; /* base relids coming from the subselect */ - List *sub_targetlist; /* RHS expressions of the IN's comparisons */ - List *in_operators; /* OIDs of the IN's equality operators */ -} InClauseInfo; + List *join_quals; /* join quals, in implicit-AND list format */ +} SpecialJoinInfo; /* * Append-relation info. |
