diff options
Diffstat (limited to 'src/backend/optimizer/plan/analyzejoins.c')
-rw-r--r-- | src/backend/optimizer/plan/analyzejoins.c | 1211 |
1 files changed, 1154 insertions, 57 deletions
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 5f3cce873a0..2e14268e188 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -22,6 +22,7 @@ */ #include "postgres.h" +#include "catalog/pg_class.h" #include "nodes/nodeFuncs.h" #include "optimizer/clauses.h" #include "optimizer/joininfo.h" @@ -33,10 +34,45 @@ #include "optimizer/tlist.h" #include "utils/lsyscache.h" +/* + * UniqueRelInfo caches a fact that a relation is unique when being joined + * to other relation(s) specified by outerrelids. + * 'extra_clauses' contains additional clauses from a baserestrictinfo list that + * were used to prove uniqueness. We cache it for the SJ checking procedure: SJ + * can be removed if the outer relation contains strictly the same set of + * clauses. + */ +typedef struct UniqueRelInfo +{ + Relids outerrelids; + List *extra_clauses; +} UniqueRelInfo; + +/* + * The context for replace_varno_walker() containing source and target relids. + */ +typedef struct +{ + int from; + int to; +} ReplaceVarnoContext; + +/* + * The struct containing self-join candidate. Used to find duplicate reloids. + */ +typedef struct +{ + int relid; + Oid reloid; +} SelfJoinCandidate; + +bool enable_self_join_removal; + /* local functions */ static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); -static void remove_rel_from_query(PlannerInfo *root, int relid, - SpecialJoinInfo *sjinfo); + +static void remove_leftjoinrel_from_query(PlannerInfo *root, int relid, + SpecialJoinInfo *sjinfo); static void remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid); static void remove_rel_from_eclass(EquivalenceClass *ec, @@ -44,14 +80,20 @@ static void remove_rel_from_eclass(EquivalenceClass *ec, static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved); static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel); static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, - List *clause_list); + List *clause_list, List **extra_clauses); static Oid distinct_col_search(int colno, List *colnos, List *opids); static bool is_innerrel_unique_for(PlannerInfo *root, Relids joinrelids, Relids outerrelids, RelOptInfo *innerrel, JoinType jointype, - List *restrictlist); + List *restrictlist, + List **extra_clauses); +static Bitmapset *replace_relid(Relids relids, int oldId, int newId); +static void replace_varno(Node *node, int from, int to); +static bool replace_varno_walker(Node *node, ReplaceVarnoContext *ctx); +static int self_join_candidates_cmp(const void *a, const void *b); + /* @@ -89,7 +131,7 @@ restart: */ innerrelid = bms_singleton_member(sjinfo->min_righthand); - remove_rel_from_query(root, innerrelid, sjinfo); + remove_leftjoinrel_from_query(root, innerrelid, sjinfo); /* We verify that exactly one reference gets removed from joinlist */ nremoved = 0; @@ -308,7 +350,7 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) * Now that we have the relevant equality join clauses, try to prove the * innerrel distinct. */ - if (rel_is_distinct_for(root, innerrel, clause_list)) + if (rel_is_distinct_for(root, innerrel, clause_list, NULL)) return true; /* @@ -320,29 +362,24 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) /* - * Remove the target relid and references to the target join from the + * Remove the target rel->relid and references to the target join from the * planner's data structures, having determined that there is no need - * to include them in the query. + * to include them in the query. Optionally replace them with subst if subst + * is non-negative. * - * We are not terribly thorough here. We only bother to update parts of - * the planner's data structures that will actually be consulted later. + * This function updates only parts needed for both left-join removal and + * self-join removal. */ static void -remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo) +remove_rel_from_query(PlannerInfo *root, RelOptInfo *rel, + int subst, SpecialJoinInfo *sjinfo, + Relids joinrelids) { - RelOptInfo *rel = find_base_rel(root, relid); - int ojrelid = sjinfo->ojrelid; - Relids joinrelids; - Relids join_plus_commute; - List *joininfos; + int relid = rel->relid; + int ojrelid = (sjinfo != NULL) ? sjinfo->ojrelid : -1; Index rti; ListCell *l; - /* Compute the relid set for the join we are considering */ - joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); - Assert(ojrelid != 0); - joinrelids = bms_add_member(joinrelids, ojrelid); - /* * Remove references to the rel from other baserels' attr_needed arrays. */ @@ -366,19 +403,22 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo) attroff--) { otherrel->attr_needed[attroff] = - bms_del_member(otherrel->attr_needed[attroff], relid); + replace_relid(otherrel->attr_needed[attroff], relid, subst); otherrel->attr_needed[attroff] = - bms_del_member(otherrel->attr_needed[attroff], ojrelid); + replace_relid(otherrel->attr_needed[attroff], ojrelid, subst); } + + /* Update lateral references. */ + replace_varno((Node *) otherrel->lateral_vars, relid, subst); } /* * Update all_baserels and related relid sets. */ - root->all_baserels = bms_del_member(root->all_baserels, relid); - root->outer_join_rels = bms_del_member(root->outer_join_rels, ojrelid); - root->all_query_rels = bms_del_member(root->all_query_rels, relid); - root->all_query_rels = bms_del_member(root->all_query_rels, ojrelid); + root->all_baserels = replace_relid(root->all_baserels, relid, subst); + root->outer_join_rels = replace_relid(root->outer_join_rels, ojrelid, subst); + root->all_query_rels = replace_relid(root->all_query_rels, relid, subst); + root->all_query_rels = replace_relid(root->all_query_rels, ojrelid, subst); /* * Likewise remove references from SpecialJoinInfo data structures. @@ -392,19 +432,21 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo) { SpecialJoinInfo *sjinf = (SpecialJoinInfo *) lfirst(l); - sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, relid); - sjinf->min_righthand = bms_del_member(sjinf->min_righthand, relid); - sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, relid); - sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, relid); - sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, ojrelid); - sjinf->min_righthand = bms_del_member(sjinf->min_righthand, ojrelid); - sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, ojrelid); - sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, ojrelid); + sjinf->min_lefthand = replace_relid(sjinf->min_lefthand, relid, subst); + sjinf->min_righthand = replace_relid(sjinf->min_righthand, relid, subst); + sjinf->syn_lefthand = replace_relid(sjinf->syn_lefthand, relid, subst); + sjinf->syn_righthand = replace_relid(sjinf->syn_righthand, relid, subst); + sjinf->min_lefthand = replace_relid(sjinf->min_lefthand, ojrelid, subst); + sjinf->min_righthand = replace_relid(sjinf->min_righthand, ojrelid, subst); + sjinf->syn_lefthand = replace_relid(sjinf->syn_lefthand, ojrelid, subst); + sjinf->syn_righthand = replace_relid(sjinf->syn_righthand, ojrelid, subst); /* relid cannot appear in these fields, but ojrelid can: */ - sjinf->commute_above_l = bms_del_member(sjinf->commute_above_l, ojrelid); - sjinf->commute_above_r = bms_del_member(sjinf->commute_above_r, ojrelid); - sjinf->commute_below_l = bms_del_member(sjinf->commute_below_l, ojrelid); - sjinf->commute_below_r = bms_del_member(sjinf->commute_below_r, ojrelid); + sjinf->commute_above_l = replace_relid(sjinf->commute_above_l, ojrelid, subst); + sjinf->commute_above_r = replace_relid(sjinf->commute_above_r, ojrelid, subst); + sjinf->commute_below_l = replace_relid(sjinf->commute_below_l, ojrelid, subst); + sjinf->commute_below_r = replace_relid(sjinf->commute_below_r, ojrelid, subst); + + replace_varno((Node *) sjinf->semi_rhs_exprs, relid, subst); } /* @@ -438,18 +480,47 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo) { PlaceHolderVar *phv = phinfo->ph_var; - phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid); - phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, ojrelid); + phinfo->ph_eval_at = replace_relid(phinfo->ph_eval_at, relid, subst); + phinfo->ph_eval_at = replace_relid(phinfo->ph_eval_at, ojrelid, subst); Assert(!bms_is_empty(phinfo->ph_eval_at)); /* checked previously */ - phinfo->ph_needed = bms_del_member(phinfo->ph_needed, relid); - phinfo->ph_needed = bms_del_member(phinfo->ph_needed, ojrelid); + phinfo->ph_needed = replace_relid(phinfo->ph_needed, relid, subst); + phinfo->ph_needed = replace_relid(phinfo->ph_needed, ojrelid, subst); /* ph_needed might or might not become empty */ - phv->phrels = bms_del_member(phv->phrels, relid); - phv->phrels = bms_del_member(phv->phrels, ojrelid); + phv->phrels = replace_relid(phv->phrels, relid, subst); + phv->phrels = replace_relid(phv->phrels, ojrelid, subst); + phinfo->ph_lateral = replace_relid(phinfo->ph_lateral, relid, subst); + phinfo->ph_var->phrels = replace_relid(phinfo->ph_var->phrels, relid, subst); Assert(!bms_is_empty(phv->phrels)); Assert(phv->phnullingrels == NULL); /* no need to adjust */ } } +} + +/* + * Remove the target relid and references to the target join from the + * planner's data structures, having determined that there is no need + * to include them in the query. + * + * We are not terribly thorough here. We only bother to update parts of + * the planner's data structures that will actually be consulted later. + */ +static void +remove_leftjoinrel_from_query(PlannerInfo *root, int relid, + SpecialJoinInfo *sjinfo) +{ + List *joininfos; + int ojrelid = sjinfo->ojrelid; + RelOptInfo *rel = find_base_rel(root, relid); + Relids join_plus_commute; + Relids joinrelids; + ListCell *l; + + /* Compute the relid set for the join we are considering */ + joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); + Assert(ojrelid != 0); + joinrelids = bms_add_member(joinrelids, ojrelid); + + remove_rel_from_query(root, rel, -1, sjinfo, joinrelids); /* * Remove any joinquals referencing the rel from the joininfo lists. @@ -844,9 +915,14 @@ rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel) * Note that the passed-in clause_list may be destructively modified! This * is OK for current uses, because the clause_list is built by the caller for * the sole purpose of passing to this function. + * + * outer_exprs contains the right sides of baserestrictinfo clauses looking + * like x = const if distinctness is derived from such clauses, not joininfo + * clause. Pass NULL to the outer_exprs, if its value is not needed. */ static bool -rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list) +rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list, + List **extra_clauses) { /* * We could skip a couple of tests here if we assume all callers checked @@ -859,10 +935,11 @@ rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list) { /* * Examine the indexes to see if we have a matching unique index. - * relation_has_unique_index_for automatically adds any usable + * relation_has_unique_index_ext automatically adds any usable * restriction clauses for the rel, so we needn't do that here. */ - if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL)) + if (relation_has_unique_index_ext(root, rel, clause_list, NIL, NIL, + extra_clauses)) return true; } else if (rel->rtekind == RTE_SUBQUERY) @@ -1177,8 +1254,29 @@ innerrel_is_unique(PlannerInfo *root, List *restrictlist, bool force_cache) { + return innerrel_is_unique_ext(root, joinrelids, outerrelids, innerrel, + jointype, restrictlist, force_cache, NULL); +} + +/* + * innerrel_is_unique_ext + * Do the same as innerrel_is_unique(), but also return additional clauses + * from a baserestrictinfo list that were used to prove uniqueness. + */ +bool +innerrel_is_unique_ext(PlannerInfo *root, + Relids joinrelids, + Relids outerrelids, + RelOptInfo *innerrel, + JoinType jointype, + List *restrictlist, + bool force_cache, + List **extra_clauses) +{ MemoryContext old_context; ListCell *lc; + UniqueRelInfo *uniqueRelInfo; + List *outer_exprs = NIL; /* Certainly can't prove uniqueness when there are no joinclauses */ if (restrictlist == NIL) @@ -1200,10 +1298,14 @@ innerrel_is_unique(PlannerInfo *root, */ foreach(lc, innerrel->unique_for_rels) { - Relids unique_for_rels = (Relids) lfirst(lc); + uniqueRelInfo = (UniqueRelInfo *) lfirst(lc); - if (bms_is_subset(unique_for_rels, outerrelids)) + if (bms_is_subset(uniqueRelInfo->outerrelids, outerrelids)) + { + if (extra_clauses) + *extra_clauses = uniqueRelInfo->extra_clauses; return true; /* Success! */ + } } /* @@ -1220,7 +1322,7 @@ innerrel_is_unique(PlannerInfo *root, /* No cached information, so try to make the proof. */ if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel, - jointype, restrictlist)) + jointype, restrictlist, &outer_exprs)) { /* * Cache the positive result for future probes, being sure to keep it @@ -1233,10 +1335,15 @@ innerrel_is_unique(PlannerInfo *root, * supersets of them anyway. */ old_context = MemoryContextSwitchTo(root->planner_cxt); + uniqueRelInfo = palloc(sizeof(UniqueRelInfo)); + uniqueRelInfo->extra_clauses = outer_exprs; + uniqueRelInfo->outerrelids = bms_copy(outerrelids); innerrel->unique_for_rels = lappend(innerrel->unique_for_rels, - bms_copy(outerrelids)); + uniqueRelInfo); MemoryContextSwitchTo(old_context); + if (extra_clauses) + *extra_clauses = outer_exprs; return true; /* Success! */ } else @@ -1282,7 +1389,8 @@ is_innerrel_unique_for(PlannerInfo *root, Relids outerrelids, RelOptInfo *innerrel, JoinType jointype, - List *restrictlist) + List *restrictlist, + List **extra_clauses) { List *clause_list = NIL; ListCell *lc; @@ -1312,17 +1420,1006 @@ is_innerrel_unique_for(PlannerInfo *root, continue; /* not mergejoinable */ /* - * Check if clause has the form "outer op inner" or "inner op outer", - * and if so mark which side is inner. + * Check if the clause has the form "outer op inner" or "inner op + * outer", and if so mark which side is inner. */ if (!clause_sides_match_join(restrictinfo, outerrelids, innerrel->relids)) continue; /* no good for these input relations */ - /* OK, add to list */ + /* OK, add to the list */ clause_list = lappend(clause_list, restrictinfo); } /* Let rel_is_distinct_for() do the hard work */ - return rel_is_distinct_for(root, innerrel, clause_list); + return rel_is_distinct_for(root, innerrel, clause_list, extra_clauses); +} + +/* + * Replace each occurrence of removing relid with the keeping one + */ +static void +replace_varno(Node *node, int from, int to) +{ + ReplaceVarnoContext ctx; + + if (to <= 0) + return; + + ctx.from = from; + ctx.to = to; + replace_varno_walker(node, &ctx); +} + +/* + * Walker function for replace_varno() + */ +static bool +replace_varno_walker(Node *node, ReplaceVarnoContext *ctx) +{ + if (node == NULL) + return false; + + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + if (var->varno == ctx->from) + { + var->varno = ctx->to; + var->varnosyn = ctx->to; + } + return false; + } + if (IsA(node, RestrictInfo)) + { + RestrictInfo *rinfo = (RestrictInfo *) node; + int relid = -1; + bool is_req_equal = + (rinfo->required_relids == rinfo->clause_relids); + + if (bms_is_member(ctx->from, rinfo->clause_relids)) + { + replace_varno((Node *) rinfo->clause, ctx->from, ctx->to); + replace_varno((Node *) rinfo->orclause, ctx->from, ctx->to); + rinfo->clause_relids = replace_relid(rinfo->clause_relids, ctx->from, ctx->to); + rinfo->left_relids = replace_relid(rinfo->left_relids, ctx->from, ctx->to); + rinfo->right_relids = replace_relid(rinfo->right_relids, ctx->from, ctx->to); + } + + if (is_req_equal) + rinfo->required_relids = rinfo->clause_relids; + else + rinfo->required_relids = replace_relid(rinfo->required_relids, ctx->from, ctx->to); + + rinfo->outer_relids = replace_relid(rinfo->outer_relids, ctx->from, ctx->to); + rinfo->incompatible_relids = replace_relid(rinfo->incompatible_relids, ctx->from, ctx->to); + + if (rinfo->mergeopfamilies && + bms_get_singleton_member(rinfo->clause_relids, &relid) && + relid == ctx->to && IsA(rinfo->clause, OpExpr)) + { + Expr *leftOp; + Expr *rightOp; + + leftOp = (Expr *) get_leftop(rinfo->clause); + rightOp = (Expr *) get_rightop(rinfo->clause); + + if (leftOp != NULL && equal(leftOp, rightOp)) + { + NullTest *ntest = makeNode(NullTest); + + ntest->arg = leftOp; + ntest->nulltesttype = IS_NOT_NULL; + ntest->argisrow = false; + ntest->location = -1; + rinfo->clause = (Expr *) ntest; + rinfo->mergeopfamilies = NIL; + } + Assert(rinfo->orclause == NULL); + } + + return false; + } + return expression_tree_walker(node, replace_varno_walker, (void *) ctx); +} + +/* + * Substitute newId by oldId in relids. + */ +static Bitmapset * +replace_relid(Relids relids, int oldId, int newId) +{ + if (oldId < 0) + return relids; + + if (newId < 0) + /* Delete relid without substitution. */ + return bms_del_member(relids, oldId); + + if (bms_is_member(oldId, relids)) + return bms_add_member(bms_del_member(relids, oldId), newId); + + return relids; +} + +/* + * Update EC members to point to the remaining relation instead of the removed + * one, removing duplicates. + * + * Restriction clauses for base relations are already distributed to + * the respective baserestrictinfo lists (see + * generate_implied_equalities_for_column). The above code has already processed + * this list, and updated these clauses to reference the remaining + * relation, so we can skip them here based on their relids. + * + * Likewise, we have already processed the join clauses that join the + * removed relation to the remaining one. + * + * Finally, there are join clauses that join the removed relation to + * some third relation. We can't just delete the source clauses and + * regenerate them from the EC because the corresponding equality + * operators might be missing (see the handling of ec_broken). + * Therefore, we will update the references in the source clauses. + * + * Derived clauses can be generated again, so it is simpler to just + * delete them. + */ +static void +update_eclasses(EquivalenceClass *ec, int from, int to) +{ + List *new_members = NIL; + List *new_sources = NIL; + ListCell *lc; + ListCell *lc1; + + foreach(lc, ec->ec_members) + { + EquivalenceMember *em = lfirst_node(EquivalenceMember, lc); + bool is_redundant = false; + + if (!bms_is_member(from, em->em_relids)) + { + new_members = lappend(new_members, em); + continue; + } + + em->em_relids = replace_relid(em->em_relids, from, to); + em->em_jdomain->jd_relids = replace_relid(em->em_jdomain->jd_relids, from, to); + + /* We only process inner joins */ + replace_varno((Node *) em->em_expr, from, to); + + foreach(lc1, new_members) + { + EquivalenceMember *other = lfirst_node(EquivalenceMember, lc1); + + if (!equal(em->em_relids, other->em_relids)) + continue; + + if (equal(em->em_expr, other->em_expr)) + { + is_redundant = true; + break; + } + } + + if (!is_redundant) + new_members = lappend(new_members, em); + } + + list_free(ec->ec_members); + ec->ec_members = new_members; + + list_free(ec->ec_derives); + ec->ec_derives = NULL; + + /* Update EC source expressions */ + foreach(lc, ec->ec_sources) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + bool is_redundant = false; + + if (!bms_is_member(from, rinfo->required_relids)) + { + new_sources = lappend(new_sources, rinfo); + continue; + } + + replace_varno((Node *) rinfo, from, to); + + /* + * After switching the clause to the remaining relation, check it for + * redundancy with existing ones. We don't have to check for + * redundancy with derived clauses, because we've just deleted them. + */ + foreach(lc1, new_sources) + { + RestrictInfo *other = lfirst_node(RestrictInfo, lc1); + + if (!equal(rinfo->clause_relids, other->clause_relids)) + continue; + + if (equal(rinfo->clause, other->clause)) + { + is_redundant = true; + break; + } + } + + if (!is_redundant) + new_sources = lappend(new_sources, rinfo); + } + + list_free(ec->ec_sources); + ec->ec_sources = new_sources; + ec->ec_relids = replace_relid(ec->ec_relids, from, to); +} + +static bool +sje_walker(Node *node, ReplaceVarnoContext *ctx) +{ + if (node == NULL) + return false; + + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + if (var->varno == ctx->from) + { + var->varno = ctx->to; + var->varnosyn = ctx->to; + } + return false; + } + return expression_tree_walker(node, sje_walker, (void *) ctx); +} + +/* + * Remove a relation after we have proven that it participates only in an + * unneeded unique self join. + * + * Replace any links in planner info structures. + * + * Transfer join and restriction clauses from the removed relation to the + * remaining one. We change the Vars of the clause to point to the + * remaining relation instead of the removed one. The clauses that require + * a subset of joinrelids become restriction clauses of the remaining + * relation, and others remain join clauses. We append them to + * baserestrictinfo and joininfo respectively, trying not to introduce + * duplicates. + * + * We also have to process the 'joinclauses' list here, because it + * contains EC-derived join clauses which must become filter clauses. It + * is not enough to just correct the ECs because the EC-derived + * restrictions are generated before join removal (see + * generate_base_implied_equalities). + */ +static void +remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark, + RelOptInfo *toKeep, RelOptInfo *toRemove, + List *restrictlist) +{ + List *joininfos; + ListCell *lc; + int i; + List *jinfo_candidates = NIL; + List *binfo_candidates = NIL; + ReplaceVarnoContext ctx = {.from = toRemove->relid,.to = toKeep->relid}; + + Assert(toKeep->relid != -1); + + /* + * Replace the index of the removing table with the keeping one. The + * technique of removing/distributing restrictinfo is used here to attach + * just appeared (for keeping relation) join clauses and avoid adding + * duplicates of those that already exist in the joininfo list. + */ + joininfos = list_copy(toRemove->joininfo); + foreach(lc, joininfos) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + + remove_join_clause_from_rels(root, rinfo, rinfo->required_relids); + replace_varno((Node *) rinfo, toRemove->relid, toKeep->relid); + + if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE) + jinfo_candidates = lappend(jinfo_candidates, rinfo); + else + binfo_candidates = lappend(binfo_candidates, rinfo); + } + + /* + * Concatenate restrictlist to the list of base restrictions of the + * removing table just to simplify the replacement procedure: all of them + * weren't connected to any keeping relations and need to be added to some + * rels. + */ + toRemove->baserestrictinfo = list_concat(toRemove->baserestrictinfo, + restrictlist); + foreach(lc, toRemove->baserestrictinfo) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + + replace_varno((Node *) rinfo, toRemove->relid, toKeep->relid); + + if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE) + jinfo_candidates = lappend(jinfo_candidates, rinfo); + else + binfo_candidates = lappend(binfo_candidates, rinfo); + } + + /* + * Now, add all non-redundant clauses to the keeping relation. + * Contradictory operation. On the one side, we reduce the length of + * restrict lists that can impact planning or executing time. + * Additionally, we improve the accuracy of cardinality estimation. On the + * other side, it is one more place that can make planning time much + * longer in specific cases. It would have been better to avoid calling + * the equal() function here, but it's the only way to detect duplicated + * inequality expressions. + */ + foreach(lc, binfo_candidates) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + ListCell *olc = NULL; + bool is_redundant = false; + + Assert(!bms_is_member(toRemove->relid, rinfo->required_relids)); + + foreach(olc, toKeep->baserestrictinfo) + { + RestrictInfo *src = lfirst_node(RestrictInfo, olc); + + if (!bms_equal(src->clause_relids, rinfo->clause_relids)) + /* Const and non-const expressions can't be equal */ + continue; + + if (src == rinfo || + (rinfo->parent_ec != NULL + && src->parent_ec == rinfo->parent_ec) + || equal(rinfo->clause, src->clause)) + { + is_redundant = true; + break; + } + } + if (!is_redundant) + distribute_restrictinfo_to_rels(root, rinfo); + } + foreach(lc, jinfo_candidates) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + ListCell *olc = NULL; + bool is_redundant = false; + + Assert(!bms_is_member(toRemove->relid, rinfo->required_relids)); + + foreach(olc, toKeep->joininfo) + { + RestrictInfo *src = lfirst_node(RestrictInfo, olc); + + if (!bms_equal(src->clause_relids, rinfo->clause_relids)) + /* Can't compare trivially different clauses */ + continue; + + if (src == rinfo || + (rinfo->parent_ec != NULL + && src->parent_ec == rinfo->parent_ec) + || equal(rinfo->clause, src->clause)) + { + is_redundant = true; + break; + } + } + if (!is_redundant) + distribute_restrictinfo_to_rels(root, rinfo); + } + list_free(binfo_candidates); + list_free(jinfo_candidates); + + /* + * Arrange equivalence classes, mentioned removing a table, with the + * keeping one: varno of removing table should be replaced in members and + * sources lists. Also, remove duplicated elements if this replacement + * procedure created them. + */ + i = -1; + while ((i = bms_next_member(toRemove->eclass_indexes, i)) >= 0) + { + EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, i); + + update_eclasses(ec, toRemove->relid, toKeep->relid); + toKeep->eclass_indexes = bms_add_member(toKeep->eclass_indexes, i); + } + + /* + * Transfer the targetlist and attr_needed flags. + */ + + foreach(lc, toRemove->reltarget->exprs) + { + Node *node = lfirst(lc); + + replace_varno(node, toRemove->relid, toKeep->relid); + if (!list_member(toKeep->reltarget->exprs, node)) + toKeep->reltarget->exprs = lappend(toKeep->reltarget->exprs, node); + } + + for (i = toKeep->min_attr; i <= toKeep->max_attr; i++) + { + int attno = i - toKeep->min_attr; + + toRemove->attr_needed[attno] = replace_relid(toRemove->attr_needed[attno], + toRemove->relid, toKeep->relid); + toKeep->attr_needed[attno] = bms_add_members(toKeep->attr_needed[attno], + toRemove->attr_needed[attno]); + } + + /* + * If the removed relation has a row mark, transfer it to the remaining + * one. + * + * If both rels have row marks, just keep the one corresponding to the + * remaining relation, because we verified earlier that they have the same + * strength. + */ + if (rmark) + { + if (kmark) + { + Assert(kmark->markType == rmark->markType); + + root->rowMarks = list_delete_ptr(root->rowMarks, rmark); + } + else + { + /* Shouldn't have inheritance children here. */ + Assert(rmark->rti == rmark->prti); + + rmark->rti = rmark->prti = toKeep->relid; + } + } + + /* Replace varno in all the query structures */ + query_tree_walker(root->parse, sje_walker, &ctx, QTW_EXAMINE_SORTGROUP); + + /* Replace links in the planner info */ + remove_rel_from_query(root, toRemove, toKeep->relid, NULL, NULL); + + /* At last, replace varno in root targetlist and HAVING clause */ + replace_varno((Node *) root->processed_tlist, + toRemove->relid, toKeep->relid); + replace_varno((Node *) root->processed_groupClause, + toRemove->relid, toKeep->relid); + + /* + * There may be references to the rel in root->fkey_list, but if so, + * match_foreign_keys_to_quals() will get rid of them. + */ + + /* + * Finally, remove the rel from the baserel array to prevent it from being + * referenced again. (We can't do this earlier because + * remove_join_clause_from_rels will touch it.) + */ + root->simple_rel_array[toRemove->relid] = NULL; + + /* And nuke the RelOptInfo, just in case there's another access path */ + pfree(toRemove); +} + +/* + * split_selfjoin_quals + * Processes 'joinquals' by building two lists: one containing the quals + * where the columns/exprs are on either side of the join match, and + * another one containing the remaining quals. + * + * 'joinquals' must only contain quals for a RTE_RELATION being joined to + * itself. + */ +static void +split_selfjoin_quals(PlannerInfo *root, List *joinquals, List **selfjoinquals, + List **otherjoinquals, int from, int to) +{ + ListCell *lc; + List *sjoinquals = NIL; + List *ojoinquals = NIL; + + foreach(lc, joinquals) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + OpExpr *expr; + Node *leftexpr; + Node *rightexpr; + + /* In general, clause looks like F(arg1) = G(arg2) */ + if (!rinfo->mergeopfamilies || + bms_num_members(rinfo->clause_relids) != 2 || + bms_membership(rinfo->left_relids) != BMS_SINGLETON || + bms_membership(rinfo->right_relids) != BMS_SINGLETON) + { + ojoinquals = lappend(ojoinquals, rinfo); + continue; + } + + expr = (OpExpr *) rinfo->clause; + + if (!IsA(expr, OpExpr) || list_length(expr->args) != 2) + { + ojoinquals = lappend(ojoinquals, rinfo); + continue; + } + + leftexpr = get_leftop(rinfo->clause); + rightexpr = copyObject(get_rightop(rinfo->clause)); + + if (leftexpr && IsA(leftexpr, RelabelType)) + leftexpr = (Node *) ((RelabelType *) leftexpr)->arg; + if (rightexpr && IsA(rightexpr, RelabelType)) + rightexpr = (Node *) ((RelabelType *) rightexpr)->arg; + + /* + * Quite an expensive operation, narrowing the use case. For example, + * when we have cast of the same var to different (but compatible) + * types. + */ + replace_varno(rightexpr, bms_singleton_member(rinfo->right_relids), + bms_singleton_member(rinfo->left_relids)); + + if (equal(leftexpr, rightexpr)) + sjoinquals = lappend(sjoinquals, rinfo); + else + ojoinquals = lappend(ojoinquals, rinfo); + } + + *selfjoinquals = sjoinquals; + *otherjoinquals = ojoinquals; +} + +/* + * Check for a case when uniqueness is at least partly derived from a + * baserestrictinfo clause. In this case, we have a chance to return only + * one row (if such clauses on both sides of SJ are equal) or nothing (if they + * are different). + */ +static bool +match_unique_clauses(PlannerInfo *root, RelOptInfo *outer, List *uclauses, + Index relid) +{ + ListCell *lc; + + foreach(lc, uclauses) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + Expr *clause; + Node *iclause; + Node *c1; + bool matched = false; + ListCell *olc; + + Assert(outer->relid > 0 && relid > 0); + + /* Only filters like f(R.x1,...,R.xN) == expr we should consider. */ + Assert(bms_is_empty(rinfo->left_relids) ^ + bms_is_empty(rinfo->right_relids)); + + clause = (Expr *) copyObject(rinfo->clause); + replace_varno((Node *) clause, relid, outer->relid); + + iclause = bms_is_empty(rinfo->left_relids) ? get_rightop(clause) : + get_leftop(clause); + c1 = bms_is_empty(rinfo->left_relids) ? get_leftop(clause) : + get_rightop(clause); + + /* + * Compare these left and right sides with the corresponding sides of + * the outer's filters. If no one is detected - return immediately. + */ + foreach(olc, outer->baserestrictinfo) + { + RestrictInfo *orinfo = lfirst_node(RestrictInfo, olc); + Node *oclause; + Node *c2; + + if (orinfo->mergeopfamilies == NIL) + /* Don't consider clauses which aren't similar to 'F(X)=G(Y)' */ + continue; + + Assert(is_opclause(orinfo->clause)); + + oclause = bms_is_empty(orinfo->left_relids) ? + get_rightop(orinfo->clause) : get_leftop(orinfo->clause); + c2 = (bms_is_empty(orinfo->left_relids) ? + get_leftop(orinfo->clause) : get_rightop(orinfo->clause)); + + if (equal(iclause, oclause) && equal(c1, c2)) + { + matched = true; + break; + } + } + + if (!matched) + return false; + } + + return true; +} + +/* + * Find and remove unique self joins in a group of base relations that have + * the same Oid. + * + * Returns a set of relids that were removed. + */ +static Relids +remove_self_joins_one_group(PlannerInfo *root, Relids relids) +{ + Relids result = NULL; + int k; /* Index of kept relation */ + int r = -1; /* Index of removed relation */ + + while ((r = bms_next_member(relids, r)) > 0) + { + RelOptInfo *inner = root->simple_rel_array[r]; + + k = r; + + while ((k = bms_next_member(relids, k)) > 0) + { + Relids joinrelids = NULL; + RelOptInfo *outer = root->simple_rel_array[k]; + List *restrictlist; + List *selfjoinquals; + List *otherjoinquals; + ListCell *lc; + bool jinfo_check = true; + PlanRowMark *omark = NULL; + PlanRowMark *imark = NULL; + List *uclauses = NIL; + + /* A sanity check: the relations have the same Oid. */ + Assert(root->simple_rte_array[k]->relid == + root->simple_rte_array[r]->relid); + + /* + * It is impossible to eliminate join of two relations if they + * belong to different rules of order. Otherwise planner can't be + * able to find any variants of correct query plan. + */ + foreach(lc, root->join_info_list) + { + SpecialJoinInfo *info = (SpecialJoinInfo *) lfirst(lc); + + if ((bms_is_member(k, info->syn_lefthand) ^ + bms_is_member(r, info->syn_lefthand)) || + (bms_is_member(k, info->syn_righthand) ^ + bms_is_member(r, info->syn_righthand))) + { + jinfo_check = false; + break; + } + } + if (!jinfo_check) + continue; + + /* + * Check Row Marks equivalence. We can't remove the join if the + * relations have row marks of different strength (e.g. one is + * locked FOR UPDATE and another just has ROW_MARK_REFERENCE for + * EvalPlanQual rechecking). + */ + foreach(lc, root->rowMarks) + { + PlanRowMark *rowMark = (PlanRowMark *) lfirst(lc); + + if (rowMark->rti == k) + { + Assert(imark == NULL); + imark = rowMark; + } + else if (rowMark->rti == r) + { + Assert(omark == NULL); + omark = rowMark; + } + + if (omark && imark) + break; + } + if (omark && imark && omark->markType != imark->markType) + continue; + + /* + * We only deal with base rels here, so their relids bitset + * contains only one member -- their relid. + */ + joinrelids = bms_add_member(joinrelids, r); + joinrelids = bms_add_member(joinrelids, k); + + /* + * Be safe to do not remove tables participated in complicated PH + */ + foreach(lc, root->placeholder_list) + { + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc); + + /* there isn't any other place to eval PHV */ + if (bms_is_subset(phinfo->ph_eval_at, joinrelids) || + bms_is_subset(phinfo->ph_needed, joinrelids)) + break; + } + if (lc) + continue; + + /* + * At this stage, joininfo lists of inner and outer can contain + * only clauses, required for a superior outer join that can't + * influence this optimization. So, we can avoid to call the + * build_joinrel_restrictlist() routine. + */ + restrictlist = generate_join_implied_equalities(root, joinrelids, + inner->relids, + outer, NULL); + + /* + * Process restrictlist to separate the self join quals out of the + * other quals. e.g x = x goes to selfjoinquals and a = b to + * otherjoinquals. + */ + split_selfjoin_quals(root, restrictlist, &selfjoinquals, + &otherjoinquals, inner->relid, outer->relid); + + /* + * To enable SJE for the only degenerate case without any self + * join clauses at all, add baserestrictinfo to this list. The + * degenerate case works only if both sides have the same clause. + * So doesn't matter which side to add. + */ + selfjoinquals = list_concat(selfjoinquals, outer->baserestrictinfo); + + /* + * Determine if the inner table can duplicate outer rows. We must + * bypass the unique rel cache here since we're possibly using a + * subset of join quals. We can use 'force_cache' == true when all + * join quals are self-join quals. Otherwise, we could end up + * putting false negatives in the cache. + */ + if (!innerrel_is_unique_ext(root, joinrelids, inner->relids, + outer, JOIN_INNER, selfjoinquals, + list_length(otherjoinquals) == 0, + &uclauses)) + continue; + + /* + * We have proven that for both relations, the same unique index + * guarantees that there is at most one row where columns equal + * given values. These values must be the same for both relations, + * or else we won't match the same row on each side of the join. + */ + if (!match_unique_clauses(root, inner, uclauses, outer->relid)) + continue; + + /* + * We can remove either relation, so remove the inner one in order + * to simplify this loop. + */ + remove_self_join_rel(root, omark, imark, outer, inner, restrictlist); + + result = bms_add_member(result, r); + + /* We have removed the outer relation, try the next one. */ + break; + } + } + + return result; +} + +/* + * Gather indexes of base relations from the joinlist and try to eliminate self + * joins. To avoid complexity, limit the max power of this set by a GUC. + */ +static Relids +remove_self_joins_recurse(PlannerInfo *root, List *joinlist, Relids toRemove) +{ + ListCell *jl; + Relids relids = NULL; + SelfJoinCandidate *candidates = NULL; + int i; + int j; + int numRels; + + /* Collect indexes of base relations of the join tree */ + foreach(jl, joinlist) + { + Node *jlnode = (Node *) lfirst(jl); + + if (IsA(jlnode, RangeTblRef)) + { + RangeTblRef *ref = (RangeTblRef *) jlnode; + RangeTblEntry *rte = root->simple_rte_array[ref->rtindex]; + + /* + * We only care about base relations from which we select + * something. + */ + if (rte->rtekind == RTE_RELATION && + rte->relkind == RELKIND_RELATION && + root->simple_rel_array[ref->rtindex] != NULL) + { + Assert(!bms_is_member(ref->rtindex, relids)); + relids = bms_add_member(relids, ref->rtindex); + } + } + else if (IsA(jlnode, List)) + /* Recursively go inside the sub-joinlist */ + toRemove = remove_self_joins_recurse(root, (List *) jlnode, + toRemove); + else + elog(ERROR, "unrecognized joinlist node type: %d", + (int) nodeTag(jlnode)); + } + + numRels = bms_num_members(relids); + + /* Need at least two relations for the join */ + if (numRels < 2) + return toRemove; + + /* + * In order to find relations with the same oid we first build an array of + * candidates and then sort it by oid. + */ + candidates = (SelfJoinCandidate *) palloc(sizeof(SelfJoinCandidate) * + numRels); + i = -1; + j = 0; + while ((i = bms_next_member(relids, i)) >= 0) + { + candidates[j].relid = i; + candidates[j].reloid = root->simple_rte_array[i]->relid; + j++; + } + + pg_qsort(candidates, numRels, sizeof(SelfJoinCandidate), + self_join_candidates_cmp); + + /* + * Iteratively form a group of relation indexes with the same oid and + * launch the routine that detects self-joins in this group and removes + * excessive range table entries. + * + * At the end of the iteration, exclude the group from the overall relids + * list. So each next iteration of the cycle will involve less and less + * value of relids. + */ + i = 0; + for (j = 1; j < numRels + 1; j++) + { + if (j == numRels || candidates[j].reloid != candidates[i].reloid) + { + if (j - i >= 2) + { + /* Create a group of relation indexes with the same oid */ + Relids group = NULL; + Relids removed; + + while (i < j) + { + group = bms_add_member(group, candidates[i].relid); + i++; + } + + relids = bms_del_members(relids, group); + + /* + * Try to remove self-joins from a group of identical entries. + * Make the next attempt iteratively - if something is deleted + * from a group, changes in clauses and equivalence classes + * can give us a chance to find more candidates. + */ + do + { + Assert(!bms_overlap(group, toRemove)); + removed = remove_self_joins_one_group(root, group); + toRemove = bms_add_members(toRemove, removed); + group = bms_del_members(group, removed); + } while (!bms_is_empty(removed) && + bms_membership(group) == BMS_MULTIPLE); + bms_free(removed); + bms_free(group); + } + else + { + /* Single relation, just remove it from the set */ + relids = bms_del_member(relids, candidates[i].relid); + i = j; + } + } + } + + Assert(bms_is_empty(relids)); + + return toRemove; +} + +/* + * Compare self-join candidates by their oids. + */ +static int +self_join_candidates_cmp(const void *a, const void *b) +{ + const SelfJoinCandidate *ca = (const SelfJoinCandidate *) a; + const SelfJoinCandidate *cb = (const SelfJoinCandidate *) b; + + if (ca->reloid != cb->reloid) + return (ca->reloid < cb->reloid ? -1 : 1); + else + return 0; +} + +/* + * Find and remove useless self joins. + * + * Search for joins where a relation is joined to itself. If the join clause + * for each tuple from one side of the join is proven to match the same + * physical row (or nothing) on the other side, that self-join can be + * eliminated from the query. Suitable join clauses are assumed to be in the + * form of X = X, and can be replaced with NOT NULL clauses. + * + * For the sake of simplicity, we don't apply this optimization to special + * joins. Here is a list of what we could do in some particular cases: + * 'a a1 semi join a a2': is reduced to inner by reduce_unique_semijoins, + * and then removed normally. + * 'a a1 anti join a a2': could simplify to a scan with 'outer quals AND + * (IS NULL on join columns OR NOT inner quals)'. + * 'a a1 left join a a2': could simplify to a scan like inner but without + * NOT NULL conditions on join columns. + * 'a a1 left join (a a2 join b)': can't simplify this, because join to b + * can both remove rows and introduce duplicates. + * + * To search for removable joins, we order all the relations on their Oid, + * go over each set with the same Oid, and consider each pair of relations + * in this set. + * + * To remove the join, we mark one of the participating relations as dead + * and rewrite all references to it to point to the remaining relation. + * This includes modifying RestrictInfos, EquivalenceClasses, and + * EquivalenceMembers. We also have to modify the row marks. The join clauses + * of the removed relation become either restriction or join clauses, based on + * whether they reference any relations not participating in the removed join. + * + * 'targetlist' is the top-level targetlist of the query. If it has any + * references to the removed relations, we update them to point to the + * remaining ones. + */ +List * +remove_useless_self_joins(PlannerInfo *root, List *joinlist) +{ + Relids toRemove = NULL; + int relid = -1; + + if (!enable_self_join_removal || joinlist == NIL || + (list_length(joinlist) == 1 && !IsA(linitial(joinlist), List))) + return joinlist; + + /* + * Merge pairs of relations participated in self-join. Remove unnecessary + * range table entries. + */ + toRemove = remove_self_joins_recurse(root, joinlist, toRemove); + + if (unlikely(toRemove != NULL)) + { + int nremoved = 0; + + /* At the end, remove orphaned relation links */ + while ((relid = bms_next_member(toRemove, relid)) >= 0) + joinlist = remove_rel_from_joinlist(joinlist, relid, &nremoved); + } + + return joinlist; } |