summaryrefslogtreecommitdiff
path: root/src/backend/optimizer/path/equivclass.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2020-10-28 11:15:47 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2020-10-28 11:15:47 -0400
commitad1c36b0709e47cdb3cc4abd6c939fe64279b63f (patch)
tree61b6c959d98f9f1dc52bf7f359d703c1e4b2a964 /src/backend/optimizer/path/equivclass.c
parentce7f772c5e6066e0bbafea5759e652c9757c8e6b (diff)
Fix foreign-key selectivity estimation in the presence of constants.
get_foreign_key_join_selectivity() looks for join clauses that equate the two sides of the FK constraint. However, if we have a query like "WHERE fktab.a = pktab.a and fktab.a = 1", it won't find any such join clause, because equivclass.c replaces the given clauses with "fktab.a = 1 and pktab.a = 1", which can be enforced at the scan level, leaving nothing to be done for column "a" at the join level. We can fix that expectation without much trouble, but then a new problem arises: applying the foreign-key-based selectivity rule produces a rowcount underestimate, because we're effectively double-counting the selectivity of the "fktab.a = 1" clause. So we have to cancel that selectivity out of the estimate. To fix, refactor process_implied_equality() so that it can pass back the new RestrictInfo to its callers in equivclass.c, allowing the generated "fktab.a = 1" clause to be saved in the EquivalenceClass's ec_derives list. Then it's not much trouble to dig out the relevant RestrictInfo when we need to adjust an FK selectivity estimate. (While at it, we can also remove the expensive use of initialize_mergeclause_eclasses() to set up the new RestrictInfo's left_ec and right_ec pointers. The equivclass.c code can set those basically for free.) This seems like clearly a bug fix, but I'm hesitant to back-patch it, first because there's some API/ABI risk for extensions and second because we're usually loath to destabilize plan choices in stable branches. Per report from Sigrid Ehrenreich. Discussion: https://postgr.es/m/1019549.1603770457@sss.pgh.pa.us Discussion: https://postgr.es/m/AM6PR02MB5287A0ADD936C1FA80973E72AB190@AM6PR02MB5287.eurprd02.prod.outlook.com
Diffstat (limited to 'src/backend/optimizer/path/equivclass.c')
-rw-r--r--src/backend/optimizer/path/equivclass.c121
1 files changed, 96 insertions, 25 deletions
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 690b753369e..a21b3b4756c 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -840,10 +840,8 @@ find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
* scanning of the quals and before Path construction begins.
*
* We make no attempt to avoid generating duplicate RestrictInfos here: we
- * don't search ec_sources for matches, nor put the created RestrictInfos
- * into ec_derives. Doing so would require some slightly ugly changes in
- * initsplan.c's API, and there's no real advantage, because the clauses
- * generated here can't duplicate anything we will generate for joins anyway.
+ * don't search ec_sources or ec_derives for matches. It doesn't really
+ * seem worth the trouble to do so.
*/
void
generate_base_implied_equalities(PlannerInfo *root)
@@ -969,6 +967,7 @@ generate_base_implied_equalities_const(PlannerInfo *root,
{
EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
Oid eq_op;
+ RestrictInfo *rinfo;
Assert(!cur_em->em_is_child); /* no children yet */
if (cur_em == const_em)
@@ -982,14 +981,31 @@ generate_base_implied_equalities_const(PlannerInfo *root,
ec->ec_broken = true;
break;
}
- process_implied_equality(root, eq_op, ec->ec_collation,
- cur_em->em_expr, const_em->em_expr,
- bms_copy(ec->ec_relids),
- bms_union(cur_em->em_nullable_relids,
- const_em->em_nullable_relids),
- ec->ec_min_security,
- ec->ec_below_outer_join,
- cur_em->em_is_const);
+ rinfo = process_implied_equality(root, eq_op, ec->ec_collation,
+ cur_em->em_expr, const_em->em_expr,
+ bms_copy(ec->ec_relids),
+ bms_union(cur_em->em_nullable_relids,
+ const_em->em_nullable_relids),
+ ec->ec_min_security,
+ ec->ec_below_outer_join,
+ cur_em->em_is_const);
+
+ /*
+ * If the clause didn't degenerate to a constant, fill in the correct
+ * markings for a mergejoinable clause, and save it in ec_derives. (We
+ * will not re-use such clauses directly, but selectivity estimation
+ * may consult the list later. Note that this use of ec_derives does
+ * not overlap with its use for join clauses, since we never generate
+ * join clauses from an ec_has_const eclass.)
+ */
+ if (rinfo && rinfo->mergeopfamilies)
+ {
+ /* it's not redundant, so don't set parent_ec */
+ rinfo->left_ec = rinfo->right_ec = ec;
+ rinfo->left_em = cur_em;
+ rinfo->right_em = const_em;
+ ec->ec_derives = lappend(ec->ec_derives, rinfo);
+ }
}
}
@@ -1028,6 +1044,7 @@ generate_base_implied_equalities_no_const(PlannerInfo *root,
{
EquivalenceMember *prev_em = prev_ems[relid];
Oid eq_op;
+ RestrictInfo *rinfo;
eq_op = select_equality_operator(ec,
prev_em->em_datatype,
@@ -1038,14 +1055,29 @@ generate_base_implied_equalities_no_const(PlannerInfo *root,
ec->ec_broken = true;
break;
}
- process_implied_equality(root, eq_op, ec->ec_collation,
- prev_em->em_expr, cur_em->em_expr,
- bms_copy(ec->ec_relids),
- bms_union(prev_em->em_nullable_relids,
- cur_em->em_nullable_relids),
- ec->ec_min_security,
- ec->ec_below_outer_join,
- false);
+ rinfo = process_implied_equality(root, eq_op, ec->ec_collation,
+ prev_em->em_expr, cur_em->em_expr,
+ bms_copy(ec->ec_relids),
+ bms_union(prev_em->em_nullable_relids,
+ cur_em->em_nullable_relids),
+ ec->ec_min_security,
+ ec->ec_below_outer_join,
+ false);
+
+ /*
+ * If the clause didn't degenerate to a constant, fill in the
+ * correct markings for a mergejoinable clause. We don't put it
+ * in ec_derives however; we don't currently need to re-find such
+ * clauses, and we don't want to clutter that list with non-join
+ * clauses.
+ */
+ if (rinfo && rinfo->mergeopfamilies)
+ {
+ /* it's not redundant, so don't set parent_ec */
+ rinfo->left_ec = rinfo->right_ec = ec;
+ rinfo->left_em = prev_em;
+ rinfo->right_em = cur_em;
+ }
}
prev_ems[relid] = cur_em;
}
@@ -2151,6 +2183,10 @@ exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2)
* we ignore that fine point here.) This is much like exprs_known_equal,
* except that we insist on the comparison operator matching the eclass, so
* that the result is definite not approximate.
+ *
+ * On success, we also set fkinfo->eclass[colno] to the matching eclass,
+ * and set fkinfo->fk_eclass_member[colno] to the eclass member for the
+ * referencing Var.
*/
EquivalenceClass *
match_eclasses_to_foreign_key_col(PlannerInfo *root,
@@ -2180,8 +2216,8 @@ match_eclasses_to_foreign_key_col(PlannerInfo *root,
{
EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes,
i);
- bool item1member = false;
- bool item2member = false;
+ EquivalenceMember *item1_em = NULL;
+ EquivalenceMember *item2_em = NULL;
ListCell *lc2;
/* Never match to a volatile EC */
@@ -2206,12 +2242,12 @@ match_eclasses_to_foreign_key_col(PlannerInfo *root,
/* Match? */
if (var->varno == var1varno && var->varattno == var1attno)
- item1member = true;
+ item1_em = em;
else if (var->varno == var2varno && var->varattno == var2attno)
- item2member = true;
+ item2_em = em;
/* Have we found both PK and FK column in this EC? */
- if (item1member && item2member)
+ if (item1_em && item2_em)
{
/*
* Succeed if eqop matches EC's opfamilies. We could test
@@ -2221,7 +2257,11 @@ match_eclasses_to_foreign_key_col(PlannerInfo *root,
if (opfamilies == NIL) /* compute if we didn't already */
opfamilies = get_mergejoin_opfamilies(eqop);
if (equal(opfamilies, ec->ec_opfamilies))
+ {
+ fkinfo->eclass[colno] = ec;
+ fkinfo->fk_eclass_member[colno] = item2_em;
return ec;
+ }
/* Otherwise, done with this EC, move on to the next */
break;
}
@@ -2230,6 +2270,37 @@ match_eclasses_to_foreign_key_col(PlannerInfo *root,
return NULL;
}
+/*
+ * find_derived_clause_for_ec_member
+ * Search for a previously-derived clause mentioning the given EM.
+ *
+ * The eclass should be an ec_has_const EC, of which the EM is a non-const
+ * member. This should ensure there is just one derived clause mentioning
+ * the EM (and equating it to a constant).
+ * Returns NULL if no such clause can be found.
+ */
+RestrictInfo *
+find_derived_clause_for_ec_member(EquivalenceClass *ec,
+ EquivalenceMember *em)
+{
+ ListCell *lc;
+
+ Assert(ec->ec_has_const);
+ Assert(!em->em_is_const);
+ foreach(lc, ec->ec_derives)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ /*
+ * generate_base_implied_equalities_const will have put non-const
+ * members on the left side of derived clauses.
+ */
+ if (rinfo->left_em == em)
+ return rinfo;
+ }
+ return NULL;
+}
+
/*
* add_child_rel_equivalences