summaryrefslogtreecommitdiff
path: root/src/backend/optimizer/util/restrictinfo.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2023-01-30 13:16:20 -0500
committerTom Lane <tgl@sss.pgh.pa.us>2023-01-30 13:16:20 -0500
commit2489d76c4906f4461a364ca8ad7e0751ead8aa0d (patch)
tree145ebc28d5ea8f5a5ba340b9e353a11de786adae /src/backend/optimizer/util/restrictinfo.c
parentec7e053a98f39a9e3c7e6d35f0d2e83933882399 (diff)
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value of a table column everywhere in parse and plan trees. This choice predates our support for SQL outer joins, and it's really a pretty bad idea with outer joins, because the Var's value can depend on where it is in the tree: it might go to NULL above an outer join. So expression nodes that are equal() per equalfuncs.c might not represent the same value, which is a huge correctness hazard for the planner. To improve this, decorate Var nodes with a bitmapset showing which outer joins (identified by RTE indexes) may have nulled them at the point in the parse tree where the Var appears. This allows us to trust that equal() Vars represent the same value. A certain amount of klugery is still needed to cope with cases where we re-order two outer joins, but it's possible to make it work without sacrificing that core principle. PlaceHolderVars receive similar decoration for the same reason. In the planner, we include these outer join bitmapsets into the relids that an expression is considered to depend on, and in consequence also add outer-join relids to the relids of join RelOptInfos. This allows us to correctly perceive whether an expression can be calculated above or below a particular outer join. This change affects FDWs that want to plan foreign joins. They *must* follow suit when labeling foreign joins in order to match with the core planner, but for many purposes (if postgres_fdw is any guide) they'd prefer to consider only base relations within the join. To support both requirements, redefine ForeignScan.fs_relids as base+OJ relids, and add a new field fs_base_relids that's set up by the core planner. Large though it is, this commit just does the minimum necessary to install the new mechanisms and get check-world passing again. Follow-up patches will perform some cleanup. (The README additions and comments mention some stuff that will appear in the follow-up.) Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
Diffstat (limited to 'src/backend/optimizer/util/restrictinfo.c')
-rw-r--r--src/backend/optimizer/util/restrictinfo.c93
1 files changed, 92 insertions, 1 deletions
diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c
index 0c3878a805b..1350f011a62 100644
--- a/src/backend/optimizer/util/restrictinfo.c
+++ b/src/backend/optimizer/util/restrictinfo.c
@@ -53,6 +53,10 @@ static Expr *make_sub_restrictinfos(PlannerInfo *root,
* required_relids can be NULL, in which case it defaults to the actual clause
* contents (i.e., clause_relids).
*
+ * Note that there aren't options to set the has_clone and is_clone flags:
+ * we always initialize those to false. There's just one place that wants
+ * something different, so making all callers pass them seems inconvenient.
+ *
* We initialize fields that depend only on the given subexpression, leaving
* others that depend on context (or may never be needed at all) to be filled
* later.
@@ -116,12 +120,15 @@ make_restrictinfo_internal(PlannerInfo *root,
Relids nullable_relids)
{
RestrictInfo *restrictinfo = makeNode(RestrictInfo);
+ Relids baserels;
restrictinfo->clause = clause;
restrictinfo->orclause = orclause;
restrictinfo->is_pushed_down = is_pushed_down;
restrictinfo->outerjoin_delayed = outerjoin_delayed;
restrictinfo->pseudoconstant = pseudoconstant;
+ restrictinfo->has_clone = false; /* may get set by caller */
+ restrictinfo->is_clone = false; /* may get set by caller */
restrictinfo->can_join = false; /* may get set below */
restrictinfo->security_level = security_level;
restrictinfo->outer_relids = outer_relids;
@@ -188,6 +195,25 @@ make_restrictinfo_internal(PlannerInfo *root,
restrictinfo->required_relids = restrictinfo->clause_relids;
/*
+ * Count the number of base rels appearing in clause_relids. To do this,
+ * we just delete rels mentioned in root->outer_join_rels and count the
+ * survivors. Because we are called during deconstruct_jointree which is
+ * the same tree walk that populates outer_join_rels, this is a little bit
+ * unsafe-looking; but it should be fine because the recursion in
+ * deconstruct_jointree should already have visited any outer join that
+ * could be mentioned in this clause.
+ */
+ baserels = bms_difference(restrictinfo->clause_relids,
+ root->outer_join_rels);
+ restrictinfo->num_base_rels = bms_num_members(baserels);
+ bms_free(baserels);
+
+ /*
+ * Label this RestrictInfo with a fresh serial number.
+ */
+ restrictinfo->rinfo_serial = ++(root->last_rinfo_serial);
+
+ /*
* Fill in all the cacheable fields with "not yet set" markers. None of
* these will be computed until/unless needed. Note in particular that we
* don't mark a binary opclause as mergejoinable or hashjoinable here;
@@ -350,7 +376,7 @@ commute_restrictinfo(RestrictInfo *rinfo, Oid comm_op)
* ... and adjust those we need to change. Note in particular that we can
* preserve any cached selectivity or cost estimates, since those ought to
* be the same for the new clause. Likewise we can keep the source's
- * parent_ec.
+ * parent_ec. It's also important that we keep the same rinfo_serial.
*/
result->clause = (Expr *) newclause;
result->left_relids = rinfo->right_relids;
@@ -497,6 +523,58 @@ extract_actual_join_clauses(List *restrictinfo_list,
}
}
+/*
+ * clause_is_computable_at
+ * Test whether a clause is computable at a given evaluation level.
+ *
+ * There are two conditions for whether an expression can actually be
+ * evaluated at a given join level: the evaluation context must include
+ * all the relids (both base and OJ) used by the expression, and we must
+ * not have already evaluated any outer joins that null Vars/PHVs of the
+ * expression and are not listed in their nullingrels.
+ *
+ * This function checks the second condition; we assume the caller already
+ * saw to the first one.
+ *
+ * For speed reasons, we don't individually examine each Var/PHV of the
+ * expression, but just look at the overall clause_relids (the union of the
+ * varnos and varnullingrels). This could give a misleading answer if the
+ * Vars of a given varno don't all have the same varnullingrels; but that
+ * really shouldn't happen within a single scalar expression or RestrictInfo
+ * clause. Despite that, this is still annoyingly expensive :-(
+ */
+bool
+clause_is_computable_at(PlannerInfo *root,
+ Relids clause_relids,
+ Relids eval_relids)
+{
+ ListCell *lc;
+
+ /* Nothing to do if no outer joins have been performed yet. */
+ if (!bms_overlap(eval_relids, root->outer_join_rels))
+ return true;
+
+ foreach(lc, root->join_info_list)
+ {
+ SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
+
+ /* Ignore outer joins that are not yet performed. */
+ if (!bms_is_member(sjinfo->ojrelid, eval_relids))
+ continue;
+
+ /* OK if clause lists it (we assume all Vars in it agree). */
+ if (bms_is_member(sjinfo->ojrelid, clause_relids))
+ continue;
+
+ /* Else, trouble if clause mentions any nullable Vars. */
+ if (bms_overlap(clause_relids, sjinfo->min_righthand) ||
+ (sjinfo->jointype == JOIN_FULL &&
+ bms_overlap(clause_relids, sjinfo->min_lefthand)))
+ return false; /* doesn't work */
+ }
+
+ return true; /* OK */
+}
/*
* join_clause_is_movable_to
@@ -522,6 +600,12 @@ extract_actual_join_clauses(List *restrictinfo_list,
* Also, the join clause must not use any relations that have LATERAL
* references to the target relation, since we could not put such rels on
* the outer side of a nestloop with the target relation.
+ *
+ * Also, we reject is_clone versions of outer-join clauses. This has the
+ * effect of preventing us from generating variant parameterized paths
+ * that differ only in which outer joins null the parameterization rel(s).
+ * Generating one path from the minimally-parameterized has_clone version
+ * is sufficient.
*/
bool
join_clause_is_movable_to(RestrictInfo *rinfo, RelOptInfo *baserel)
@@ -542,6 +626,10 @@ join_clause_is_movable_to(RestrictInfo *rinfo, RelOptInfo *baserel)
if (bms_overlap(baserel->lateral_referencers, rinfo->clause_relids))
return false;
+ /* Ignore clones, too */
+ if (rinfo->is_clone)
+ return false;
+
return true;
}
@@ -587,6 +675,9 @@ join_clause_is_movable_to(RestrictInfo *rinfo, RelOptInfo *baserel)
* moved for some valid set of outer rels, so we don't have the benefit of
* relying on prior checks for lateral-reference validity.
*
+ * Likewise, we don't check is_clone here: rejecting the inappropriate
+ * variants of a cloned clause must be handled upstream.
+ *
* Note: if this returns true, it means that the clause could be moved to
* this join relation, but that doesn't mean that this is the lowest join
* it could be moved to. Caller may need to make additional calls to verify