diff options
| author | Tom Lane <tgl@sss.pgh.pa.us> | 2008-08-14 18:48:00 +0000 |
|---|---|---|
| committer | Tom Lane <tgl@sss.pgh.pa.us> | 2008-08-14 18:48:00 +0000 |
| commit | e006a24ad152b3faec748afe8c1ff0829699b2e6 (patch) | |
| tree | d00e01d25270b4b04aac3c723b9e440a56d8a085 /src/include/nodes | |
| parent | ef1c807c25b47960aa86cd185fb74371e88d0cbf (diff) | |
Implement SEMI and ANTI joins in the planner and executor. (Semijoins replace
the old JOIN_IN code, but antijoins are new functionality.) Teach the planner
to convert appropriate EXISTS and NOT EXISTS subqueries into semi and anti
joins respectively. Also, LEFT JOINs with suitable upper-level IS NULL
filters are recognized as being anti joins. Unify the InClauseInfo and
OuterJoinInfo infrastructure into "SpecialJoinInfo". With that change,
it becomes possible to associate a SpecialJoinInfo with every join attempt,
which permits some cleanup of join selectivity estimation. That needs to be
taken much further than this patch does, but the next step is to change the
API for oprjoin selectivity functions, which seems like material for a
separate patch. So for the moment the output size estimates for semi and
especially anti joins are quite bogus.
Diffstat (limited to 'src/include/nodes')
| -rw-r--r-- | src/include/nodes/nodes.h | 52 | ||||
| -rw-r--r-- | src/include/nodes/pg_list.h | 5 | ||||
| -rw-r--r-- | src/include/nodes/relation.h | 108 |
3 files changed, 102 insertions, 63 deletions
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index e35356ab55f..5fdd23c5a91 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.207 2008/08/02 21:32:00 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.208 2008/08/14 18:48:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -202,8 +202,8 @@ typedef enum NodeTag T_PathKey, T_RestrictInfo, T_InnerIndexscanInfo, - T_OuterJoinInfo, - T_InClauseInfo, + T_FlattenedSubLink, + T_SpecialJoinInfo, T_AppendRelInfo, T_PlannerParamItem, @@ -474,31 +474,49 @@ typedef enum CmdType typedef enum JoinType { /* - * The canonical kinds of joins + * The canonical kinds of joins according to the SQL JOIN syntax. + * Only these codes can appear in parser output (e.g., JoinExpr nodes). */ JOIN_INNER, /* matching tuple pairs only */ - JOIN_LEFT, /* pairs + unmatched outer tuples */ - JOIN_FULL, /* pairs + unmatched outer + unmatched inner */ - JOIN_RIGHT, /* pairs + unmatched inner tuples */ + JOIN_LEFT, /* pairs + unmatched LHS tuples */ + JOIN_FULL, /* pairs + unmatched LHS + unmatched RHS */ + JOIN_RIGHT, /* pairs + unmatched RHS tuples */ /* - * These are used for queries like WHERE foo IN (SELECT bar FROM ...). - * Only JOIN_IN is actually implemented in the executor; the others are - * defined for internal use in the planner. + * Semijoins and anti-semijoins (as defined in relational theory) do + * not appear in the SQL JOIN syntax, but there are standard idioms for + * representing them (e.g., using EXISTS). The planner recognizes these + * cases and converts them to joins. So the planner and executor must + * support these codes. NOTE: in JOIN_SEMI output, it is unspecified + * which matching RHS row is joined to. In JOIN_ANTI output, the row + * is guaranteed to be null-extended. */ - JOIN_IN, /* at most one result per outer row */ - JOIN_REVERSE_IN, /* at most one result per inner row */ - JOIN_UNIQUE_OUTER, /* outer path must be made unique */ - JOIN_UNIQUE_INNER /* inner path must be made unique */ + JOIN_SEMI, /* 1 copy of each LHS row that has match(es) */ + JOIN_ANTI, /* 1 copy of each LHS row that has no match */ + + /* + * These codes are used internally in the planner, but are not supported + * by the executor (nor, indeed, by most of the planner). + */ + JOIN_UNIQUE_OUTER, /* LHS path must be made unique */ + JOIN_UNIQUE_INNER /* RHS path must be made unique */ /* * We might need additional join types someday. */ } JoinType; +/* + * OUTER joins are those for which pushed-down quals must behave differently + * from the join's own quals. This is in fact everything except INNER joins. + * However, this macro must also exclude the JOIN_UNIQUE symbols since those + * are temporary proxies for what will eventually be an INNER join. + * + * Note: in some places it is preferable to treat JOIN_SEMI as not being + * an outer join, since it doesn't produce null-extended rows. Be aware + * of that distinction when deciding whether to use this macro. + */ #define IS_OUTER_JOIN(jointype) \ - ((jointype) == JOIN_LEFT || \ - (jointype) == JOIN_FULL || \ - (jointype) == JOIN_RIGHT) + ((jointype) > JOIN_INNER && (jointype) < JOIN_UNIQUE_OUTER) #endif /* NODES_H */ diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h index fc9331939d7..fc7630b7e31 100644 --- a/src/include/nodes/pg_list.h +++ b/src/include/nodes/pg_list.h @@ -30,7 +30,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/pg_list.h,v 1.58 2008/03/17 02:18:55 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/pg_list.h,v 1.59 2008/08/14 18:48:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -218,6 +218,9 @@ extern List *list_union_ptr(List *list1, List *list2); extern List *list_union_int(List *list1, List *list2); extern List *list_union_oid(List *list1, List *list2); +extern List *list_intersection(List *list1, List *list2); +/* currently, there's no need for list_intersection_int etc */ + extern List *list_difference(List *list1, List *list2); extern List *list_difference_ptr(List *list1, List *list2); extern List *list_difference_int(List *list1, List *list2); diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index f8c23071661..8fb6861e50c 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.157 2008/08/05 02:43:17 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.158 2008/08/14 18:48:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -153,9 +153,7 @@ typedef struct PlannerInfo List *full_join_clauses; /* list of RestrictInfos for * mergejoinable full join clauses */ - List *oj_info_list; /* list of OuterJoinInfos */ - - List *in_info_list; /* list of InClauseInfos */ + List *join_info_list; /* list of SpecialJoinInfos */ List *append_rel_list; /* list of AppendRelInfos */ @@ -175,7 +173,6 @@ typedef struct PlannerInfo double tuple_fraction; /* tuple_fraction passed to query_planner */ bool hasJoinRTEs; /* true if any RTEs are RTE_JOIN kind */ - bool hasOuterJoins; /* true if any RTEs are outer joins */ bool hasHavingQual; /* true if havingQual was non-null */ bool hasPseudoConstantQuals; /* true if any RestrictInfo has * pseudoconstant = true */ @@ -756,6 +753,8 @@ typedef struct UniquePath Path path; Path *subpath; UniquePathMethod umethod; + List *in_operators; /* equality operators of the IN clause */ + List *uniq_exprs; /* expressions to be made unique */ double rows; /* estimated number of result tuples */ } UniquePath; @@ -1053,18 +1052,49 @@ typedef struct InnerIndexscanInfo } InnerIndexscanInfo; /* - * Outer join info. + * "Flattened SubLinks" + * + * When we pull an IN or EXISTS SubLink up into the parent query, the + * join conditions extracted from the IN/EXISTS clause need to be specially + * treated in distribute_qual_to_rels processing. We handle this by + * wrapping such expressions in a FlattenedSubLink node that identifies + * the join they come from. The FlattenedSubLink node is discarded after + * distribute_qual_to_rels, having served its purpose. + * + * Although the planner treats this as an expression node type, it is not + * recognized by the parser or executor, so we declare it here rather than + * in primnodes.h. + */ + +typedef struct FlattenedSubLink +{ + Expr xpr; + JoinType jointype; /* must be JOIN_SEMI or JOIN_ANTI */ + Relids lefthand; /* base relids treated as syntactic LHS */ + Relids righthand; /* base relids syntactically within RHS */ + Expr *quals; /* join quals (in explicit-AND format) */ +} FlattenedSubLink; + +/* + * "Special join" info. * * One-sided outer joins constrain the order of joining partially but not * completely. We flatten such joins into the planner's top-level list of - * relations to join, but record information about each outer join in an - * OuterJoinInfo struct. These structs are kept in the PlannerInfo node's - * oj_info_list. + * relations to join, but record information about each outer join in a + * SpecialJoinInfo struct. These structs are kept in the PlannerInfo node's + * join_info_list. + * + * Similarly, semijoins and antijoins created by flattening IN (subselect) + * and EXISTS(subselect) clauses create partial constraints on join order. + * These are likewise recorded in SpecialJoinInfo structs. + * + * We make SpecialJoinInfos for FULL JOINs even though there is no flexibility + * of planning for them, because this simplifies make_join_rel()'s API. * * min_lefthand and min_righthand are the sets of base relids that must be - * available on each side when performing the outer join. lhs_strict is - * true if the outer join's condition cannot succeed when the LHS variables - * are all NULL (this means that the outer join can commute with upper-level + * available on each side when performing the special join. lhs_strict is + * true if the special join's condition cannot succeed when the LHS variables + * are all NULL (this means that an outer join can commute with upper-level * outer joins even if it appears in their RHS). We don't bother to set * lhs_strict for FULL JOINs, however. * @@ -1072,9 +1102,8 @@ typedef struct InnerIndexscanInfo * if they were, this would break the logic that enforces join order. * * syn_lefthand and syn_righthand are the sets of base relids that are - * syntactically below this outer join. (These are needed to help compute - * min_lefthand and min_righthand for higher joins, but are not used - * thereafter.) + * syntactically below this special join. (These are needed to help compute + * min_lefthand and min_righthand for higher joins.) * * delay_upper_joins is set TRUE if we detect a pushed-down clause that has * to be evaluated after this join is formed (because it references the RHS). @@ -1082,46 +1111,35 @@ typedef struct InnerIndexscanInfo * commute with this join, because that would leave noplace to check the * pushed-down clause. (We don't track this for FULL JOINs, either.) * - * Note: OuterJoinInfo directly represents only LEFT JOIN and FULL JOIN; - * RIGHT JOIN is handled by switching the inputs to make it a LEFT JOIN. - * We make an OuterJoinInfo for FULL JOINs even though there is no flexibility - * of planning for them, because this simplifies make_join_rel()'s API. + * join_quals is an implicit-AND list of the quals syntactically associated + * with the join (they may or may not end up being applied at the join level). + * This is just a side list and does not drive actual application of quals. + * For JOIN_SEMI joins, this is cleared to NIL in create_unique_path() if + * the join is found not to be suitable for a uniqueify-the-RHS plan. + * + * jointype is never JOIN_RIGHT; a RIGHT JOIN is handled by switching + * the inputs to make it a LEFT JOIN. So the allowed values of jointype + * in a join_info_list member are only LEFT, FULL, SEMI, or ANTI. + * + * For purposes of join selectivity estimation, we create transient + * SpecialJoinInfo structures for regular inner joins; so it is possible + * to have jointype == JOIN_INNER in such a structure, even though this is + * not allowed within join_info_list. Note that lhs_strict, delay_upper_joins, + * and join_quals are not set meaningfully for such structs. */ -typedef struct OuterJoinInfo +typedef struct SpecialJoinInfo { NodeTag type; Relids min_lefthand; /* base relids in minimum LHS for join */ Relids min_righthand; /* base relids in minimum RHS for join */ Relids syn_lefthand; /* base relids syntactically within LHS */ Relids syn_righthand; /* base relids syntactically within RHS */ - bool is_full_join; /* it's a FULL OUTER JOIN */ + JoinType jointype; /* always INNER, LEFT, FULL, SEMI, or ANTI */ bool lhs_strict; /* joinclause is strict for some LHS rel */ bool delay_upper_joins; /* can't commute with upper RHS */ -} OuterJoinInfo; - -/* - * IN clause info. - * - * When we convert top-level IN quals into join operations, we must restrict - * the order of joining and use special join methods at some join points. - * We record information about each such IN clause in an InClauseInfo struct. - * These structs are kept in the PlannerInfo node's in_info_list. - * - * Note: sub_targetlist is a bit misnamed; it is a list of the expressions - * on the RHS of the IN's join clauses. (This normally starts out as a list - * of Vars referencing the subquery outputs, but can get mutated if the - * subquery is flattened into the main query.) - */ - -typedef struct InClauseInfo -{ - NodeTag type; - Relids lefthand; /* base relids in lefthand expressions */ - Relids righthand; /* base relids coming from the subselect */ - List *sub_targetlist; /* RHS expressions of the IN's comparisons */ - List *in_operators; /* OIDs of the IN's equality operators */ -} InClauseInfo; + List *join_quals; /* join quals, in implicit-AND list format */ +} SpecialJoinInfo; /* * Append-relation info. |
