summaryrefslogtreecommitdiff
path: root/src/backend/optimizer/util
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2023-01-30 13:16:20 -0500
committerTom Lane <tgl@sss.pgh.pa.us>2023-01-30 13:16:20 -0500
commit2489d76c4906f4461a364ca8ad7e0751ead8aa0d (patch)
tree145ebc28d5ea8f5a5ba340b9e353a11de786adae /src/backend/optimizer/util
parentec7e053a98f39a9e3c7e6d35f0d2e83933882399 (diff)
Make Vars be outer-join-aware.
Traditionally we used the same Var struct to represent the value of a table column everywhere in parse and plan trees. This choice predates our support for SQL outer joins, and it's really a pretty bad idea with outer joins, because the Var's value can depend on where it is in the tree: it might go to NULL above an outer join. So expression nodes that are equal() per equalfuncs.c might not represent the same value, which is a huge correctness hazard for the planner. To improve this, decorate Var nodes with a bitmapset showing which outer joins (identified by RTE indexes) may have nulled them at the point in the parse tree where the Var appears. This allows us to trust that equal() Vars represent the same value. A certain amount of klugery is still needed to cope with cases where we re-order two outer joins, but it's possible to make it work without sacrificing that core principle. PlaceHolderVars receive similar decoration for the same reason. In the planner, we include these outer join bitmapsets into the relids that an expression is considered to depend on, and in consequence also add outer-join relids to the relids of join RelOptInfos. This allows us to correctly perceive whether an expression can be calculated above or below a particular outer join. This change affects FDWs that want to plan foreign joins. They *must* follow suit when labeling foreign joins in order to match with the core planner, but for many purposes (if postgres_fdw is any guide) they'd prefer to consider only base relations within the join. To support both requirements, redefine ForeignScan.fs_relids as base+OJ relids, and add a new field fs_base_relids that's set up by the core planner. Large though it is, this commit just does the minimum necessary to install the new mechanisms and get check-world passing again. Follow-up patches will perform some cleanup. (The README additions and comments mention some stuff that will appear in the follow-up.) Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
Diffstat (limited to 'src/backend/optimizer/util')
-rw-r--r--src/backend/optimizer/util/appendinfo.c59
-rw-r--r--src/backend/optimizer/util/clauses.c6
-rw-r--r--src/backend/optimizer/util/joininfo.c19
-rw-r--r--src/backend/optimizer/util/orclauses.c4
-rw-r--r--src/backend/optimizer/util/pathnode.c15
-rw-r--r--src/backend/optimizer/util/placeholder.c123
-rw-r--r--src/backend/optimizer/util/relnode.c389
-rw-r--r--src/backend/optimizer/util/restrictinfo.c93
-rw-r--r--src/backend/optimizer/util/var.c252
9 files changed, 871 insertions, 89 deletions
diff --git a/src/backend/optimizer/util/appendinfo.c b/src/backend/optimizer/util/appendinfo.c
index cd45ab48993..d449b5c2746 100644
--- a/src/backend/optimizer/util/appendinfo.c
+++ b/src/backend/optimizer/util/appendinfo.c
@@ -228,6 +228,28 @@ adjust_appendrel_attrs_mutator(Node *node,
if (var->varlevelsup != 0)
return (Node *) var; /* no changes needed */
+ /*
+ * You might think we need to adjust var->varnullingrels, but that
+ * shouldn't need any changes. It will contain outer-join relids,
+ * while the transformation we are making affects only baserels.
+ * Below, we just propagate var->varnullingrels into the translated
+ * Var.
+ *
+ * If var->varnullingrels isn't empty, and the translation wouldn't be
+ * a Var, we have to fail. One could imagine wrapping the translated
+ * expression in a PlaceHolderVar, but that won't work because this is
+ * typically used after freezing placeholders. Fortunately, the case
+ * appears unreachable at the moment. We can see nonempty
+ * var->varnullingrels here, but only in cases involving partitionwise
+ * joining, and in such cases the translations will always be Vars.
+ * (Non-Var translations occur only for appendrels made by flattening
+ * UNION ALL subqueries.) Should we need to make this work in future,
+ * a possible fix is to mandate that prepjointree.c create PHVs for
+ * all non-Var outputs of such subqueries, and then we could look up
+ * the pre-existing PHV here. Or perhaps just wrap the translations
+ * that way to begin with?
+ */
+
for (cnt = 0; cnt < nappinfos; cnt++)
{
if (var->varno == appinfos[cnt]->parent_relid)
@@ -255,6 +277,10 @@ adjust_appendrel_attrs_mutator(Node *node,
if (newnode == NULL)
elog(ERROR, "attribute %d of relation \"%s\" does not exist",
var->varattno, get_rel_name(appinfo->parent_reloid));
+ if (IsA(newnode, Var))
+ ((Var *) newnode)->varnullingrels = var->varnullingrels;
+ else if (var->varnullingrels != NULL)
+ elog(ERROR, "failed to apply nullingrels to a non-Var");
return newnode;
}
else if (var->varattno == 0)
@@ -308,6 +334,9 @@ adjust_appendrel_attrs_mutator(Node *node,
rowexpr->colnames = copyObject(rte->eref->colnames);
rowexpr->location = -1;
+ if (var->varnullingrels != NULL)
+ elog(ERROR, "failed to apply nullingrels to a non-Var");
+
return (Node *) rowexpr;
}
}
@@ -348,6 +377,8 @@ adjust_appendrel_attrs_mutator(Node *node,
var = copyObject(ridinfo->rowidvar);
/* ... but use the correct relid */
var->varno = leaf_relid;
+ /* identity vars shouldn't have nulling rels */
+ Assert(var->varnullingrels == NULL);
/* varnosyn in the RowIdentityVarInfo is probably wrong */
var->varnosyn = 0;
var->varattnosyn = 0;
@@ -392,8 +423,11 @@ adjust_appendrel_attrs_mutator(Node *node,
(void *) context);
/* now fix PlaceHolderVar's relid sets */
if (phv->phlevelsup == 0)
- phv->phrels = adjust_child_relids(phv->phrels, context->nappinfos,
- context->appinfos);
+ {
+ phv->phrels = adjust_child_relids(phv->phrels,
+ nappinfos, appinfos);
+ /* as above, we needn't touch phnullingrels */
+ }
return (Node *) phv;
}
/* Shouldn't need to handle planner auxiliary nodes here */
@@ -412,7 +446,7 @@ adjust_appendrel_attrs_mutator(Node *node,
RestrictInfo *oldinfo = (RestrictInfo *) node;
RestrictInfo *newinfo = makeNode(RestrictInfo);
- /* Copy all flat-copiable fields */
+ /* Copy all flat-copiable fields, notably including rinfo_serial */
memcpy(newinfo, oldinfo, sizeof(RestrictInfo));
/* Recursively fix the clause itself */
@@ -688,7 +722,11 @@ get_translated_update_targetlist(PlannerInfo *root, Index relid,
/*
* find_appinfos_by_relids
- * Find AppendRelInfo structures for all relations specified by relids.
+ * Find AppendRelInfo structures for base relations listed in relids.
+ *
+ * The relids argument is typically a join relation's relids, which can
+ * include outer-join RT indexes in addition to baserels. We silently
+ * ignore the outer joins.
*
* The AppendRelInfos are returned in an array, which can be pfree'd by the
* caller. *nappinfos is set to the number of entries in the array.
@@ -700,8 +738,9 @@ find_appinfos_by_relids(PlannerInfo *root, Relids relids, int *nappinfos)
int cnt = 0;
int i;
- *nappinfos = bms_num_members(relids);
- appinfos = (AppendRelInfo **) palloc(sizeof(AppendRelInfo *) * *nappinfos);
+ /* Allocate an array that's certainly big enough */
+ appinfos = (AppendRelInfo **)
+ palloc(sizeof(AppendRelInfo *) * bms_num_members(relids));
i = -1;
while ((i = bms_next_member(relids, i)) >= 0)
@@ -709,10 +748,17 @@ find_appinfos_by_relids(PlannerInfo *root, Relids relids, int *nappinfos)
AppendRelInfo *appinfo = root->append_rel_array[i];
if (!appinfo)
+ {
+ /* Probably i is an OJ index, but let's check */
+ if (find_base_rel_ignore_join(root, i) == NULL)
+ continue;
+ /* It's a base rel, but we lack an append_rel_array entry */
elog(ERROR, "child rel %d not found in append_rel_array", i);
+ }
appinfos[cnt++] = appinfo;
}
+ *nappinfos = cnt;
return appinfos;
}
@@ -754,6 +800,7 @@ add_row_identity_var(PlannerInfo *root, Var *orig_var,
Assert(IsA(orig_var, Var));
Assert(orig_var->varno == rtindex);
Assert(orig_var->varlevelsup == 0);
+ Assert(orig_var->varnullingrels == NULL);
/*
* If we're doing non-inherited UPDATE/DELETE/MERGE, there's little need
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index aa584848cf9..76e25118f94 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -2004,14 +2004,16 @@ is_pseudo_constant_clause_relids(Node *clause, Relids relids)
* NumRelids
* (formerly clause_relids)
*
- * Returns the number of different relations referenced in 'clause'.
+ * Returns the number of different base relations referenced in 'clause'.
*/
int
NumRelids(PlannerInfo *root, Node *clause)
{
+ int result;
Relids varnos = pull_varnos(root, clause);
- int result = bms_num_members(varnos);
+ varnos = bms_del_members(varnos, root->outer_join_rels);
+ result = bms_num_members(varnos);
bms_free(varnos);
return result;
}
diff --git a/src/backend/optimizer/util/joininfo.c b/src/backend/optimizer/util/joininfo.c
index 197f20faec7..968a5a488ef 100644
--- a/src/backend/optimizer/util/joininfo.c
+++ b/src/backend/optimizer/util/joininfo.c
@@ -88,8 +88,8 @@ have_relevant_joinclause(PlannerInfo *root,
* not depend on context).
*
* 'restrictinfo' describes the join clause
- * 'join_relids' is the list of relations participating in the join clause
- * (there must be more than one)
+ * 'join_relids' is the set of relations participating in the join clause
+ * (some of these could be outer joins)
*/
void
add_join_clause_to_rels(PlannerInfo *root,
@@ -101,8 +101,11 @@ add_join_clause_to_rels(PlannerInfo *root,
cur_relid = -1;
while ((cur_relid = bms_next_member(join_relids, cur_relid)) >= 0)
{
- RelOptInfo *rel = find_base_rel(root, cur_relid);
+ RelOptInfo *rel = find_base_rel_ignore_join(root, cur_relid);
+ /* We only need to add the clause to baserels */
+ if (rel == NULL)
+ continue;
rel->joininfo = lappend(rel->joininfo, restrictinfo);
}
}
@@ -115,8 +118,8 @@ add_join_clause_to_rels(PlannerInfo *root,
* discover that a relation need not be joined at all.
*
* 'restrictinfo' describes the join clause
- * 'join_relids' is the list of relations participating in the join clause
- * (there must be more than one)
+ * 'join_relids' is the set of relations participating in the join clause
+ * (some of these could be outer joins)
*/
void
remove_join_clause_from_rels(PlannerInfo *root,
@@ -128,7 +131,11 @@ remove_join_clause_from_rels(PlannerInfo *root,
cur_relid = -1;
while ((cur_relid = bms_next_member(join_relids, cur_relid)) >= 0)
{
- RelOptInfo *rel = find_base_rel(root, cur_relid);
+ RelOptInfo *rel = find_base_rel_ignore_join(root, cur_relid);
+
+ /* We would only have added the clause to baserels */
+ if (rel == NULL)
+ continue;
/*
* Remove the restrictinfo from the list. Pointer comparison is
diff --git a/src/backend/optimizer/util/orclauses.c b/src/backend/optimizer/util/orclauses.c
index 493aaa7aedc..abc994dbf2e 100644
--- a/src/backend/optimizer/util/orclauses.c
+++ b/src/backend/optimizer/util/orclauses.c
@@ -338,6 +338,10 @@ consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
sjinfo.syn_lefthand = sjinfo.min_lefthand;
sjinfo.syn_righthand = sjinfo.min_righthand;
sjinfo.jointype = JOIN_INNER;
+ sjinfo.ojrelid = 0;
+ sjinfo.commute_above_l = NULL;
+ sjinfo.commute_above_r = NULL;
+ sjinfo.commute_below = NULL;
/* we don't bother trying to make the remaining fields valid */
sjinfo.lhs_strict = false;
sjinfo.delay_upper_joins = false;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 4478036bb6a..f2bf68d33be 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1307,7 +1307,7 @@ create_append_path(PlannerInfo *root,
* Apply query-wide LIMIT if known and path is for sole base relation.
* (Handling this at this low level is a bit klugy.)
*/
- if (root != NULL && bms_equal(rel->relids, root->all_baserels))
+ if (root != NULL && bms_equal(rel->relids, root->all_query_rels))
pathnode->limit_tuples = root->limit_tuples;
else
pathnode->limit_tuples = -1.0;
@@ -1436,7 +1436,7 @@ create_merge_append_path(PlannerInfo *root,
* Apply query-wide LIMIT if known and path is for sole base relation.
* (Handling this at this low level is a bit klugy.)
*/
- if (bms_equal(rel->relids, root->all_baserels))
+ if (bms_equal(rel->relids, root->all_query_rels))
pathnode->limit_tuples = root->limit_tuples;
else
pathnode->limit_tuples = -1.0;
@@ -2442,12 +2442,12 @@ create_nestloop_path(PlannerInfo *root,
* restrict_clauses that are due to be moved into the inner path. We have
* to do this now, rather than postpone the work till createplan time,
* because the restrict_clauses list can affect the size and cost
- * estimates for this path.
+ * estimates for this path. We detect such clauses by checking for serial
+ * number match to clauses already enforced in the inner path.
*/
if (bms_overlap(inner_req_outer, outer_path->parent->relids))
{
- Relids inner_and_outer = bms_union(inner_path->parent->relids,
- inner_req_outer);
+ Bitmapset *enforced_serials = get_param_path_clause_serials(inner_path);
List *jclauses = NIL;
ListCell *lc;
@@ -2455,9 +2455,7 @@ create_nestloop_path(PlannerInfo *root,
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
- if (!join_clause_is_movable_into(rinfo,
- inner_path->parent->relids,
- inner_and_outer))
+ if (!bms_is_member(rinfo->rinfo_serial, enforced_serials))
jclauses = lappend(jclauses, rinfo);
}
restrict_clauses = jclauses;
@@ -4298,6 +4296,7 @@ do { \
new_ppi->ppi_rows = old_ppi->ppi_rows;
new_ppi->ppi_clauses = old_ppi->ppi_clauses;
ADJUST_CHILD_ATTRS(new_ppi->ppi_clauses);
+ new_ppi->ppi_serials = bms_copy(old_ppi->ppi_serials);
rel->ppilist = lappend(rel->ppilist, new_ppi);
MemoryContextSwitchTo(oldcontext);
diff --git a/src/backend/optimizer/util/placeholder.c b/src/backend/optimizer/util/placeholder.c
index 72b9977022b..af10dbd124f 100644
--- a/src/backend/optimizer/util/placeholder.c
+++ b/src/backend/optimizer/util/placeholder.c
@@ -23,17 +23,32 @@
#include "optimizer/planmain.h"
#include "utils/lsyscache.h"
+
+typedef struct contain_placeholder_references_context
+{
+ int relid;
+ int sublevels_up;
+} contain_placeholder_references_context;
+
/* Local functions */
static void find_placeholders_recurse(PlannerInfo *root, Node *jtnode);
static void find_placeholders_in_expr(PlannerInfo *root, Node *expr);
+static bool contain_placeholder_references_walker(Node *node,
+ contain_placeholder_references_context *context);
/*
* make_placeholder_expr
* Make a PlaceHolderVar for the given expression.
*
- * phrels is the syntactic location (as a set of baserels) to attribute
+ * phrels is the syntactic location (as a set of relids) to attribute
* to the expression.
+ *
+ * The caller is responsible for adjusting phlevelsup and phnullingrels
+ * as needed. Because we do not know here which query level the PHV
+ * will be associated with, it's important that this function touches
+ * only root->glob; messing with other parts of PlannerInfo would be
+ * likely to do the wrong thing.
*/
PlaceHolderVar *
make_placeholder_expr(PlannerInfo *root, Expr *expr, Relids phrels)
@@ -42,8 +57,9 @@ make_placeholder_expr(PlannerInfo *root, Expr *expr, Relids phrels)
phv->phexpr = expr;
phv->phrels = phrels;
+ phv->phnullingrels = NULL; /* caller may change this later */
phv->phid = ++(root->glob->lastPHId);
- phv->phlevelsup = 0;
+ phv->phlevelsup = 0; /* caller may change this later */
return phv;
}
@@ -93,6 +109,15 @@ find_placeholder_info(PlannerInfo *root, PlaceHolderVar *phv)
phinfo->ph_var = copyObject(phv);
/*
+ * By convention, phinfo->ph_var->phnullingrels is always empty, since the
+ * PlaceHolderInfo represents the initially-calculated state of the
+ * PlaceHolderVar. PlaceHolderVars appearing in the query tree might have
+ * varying values of phnullingrels, reflecting outer joins applied above
+ * the calculation level.
+ */
+ phinfo->ph_var->phnullingrels = NULL;
+
+ /*
* Any referenced rels that are outside the PHV's syntactic scope are
* LATERAL references, which should be included in ph_lateral but not in
* ph_eval_at. If no referenced rels are within the syntactic scope,
@@ -339,6 +364,8 @@ update_placeholder_eval_levels(PlannerInfo *root, SpecialJoinInfo *new_sjinfo)
sjinfo->min_lefthand);
eval_at = bms_add_members(eval_at,
sjinfo->min_righthand);
+ if (sjinfo->ojrelid)
+ eval_at = bms_add_member(eval_at, sjinfo->ojrelid);
/* we'll need another iteration */
found_some = true;
}
@@ -413,6 +440,14 @@ add_placeholders_to_base_rels(PlannerInfo *root)
{
RelOptInfo *rel = find_base_rel(root, varno);
+ /*
+ * As in add_vars_to_targetlist(), a value computed at scan level
+ * has not yet been nulled by any outer join, so its phnullingrels
+ * should be empty.
+ */
+ Assert(phinfo->ph_var->phnullingrels == NULL);
+
+ /* Copying the PHV might be unnecessary here, but be safe */
rel->reltarget->exprs = lappend(rel->reltarget->exprs,
copyObject(phinfo->ph_var));
/* reltarget's cost and width fields will be updated later */
@@ -435,7 +470,8 @@ add_placeholders_to_base_rels(PlannerInfo *root)
*/
void
add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
- RelOptInfo *outer_rel, RelOptInfo *inner_rel)
+ RelOptInfo *outer_rel, RelOptInfo *inner_rel,
+ SpecialJoinInfo *sjinfo)
{
Relids relids = joinrel->relids;
ListCell *lc;
@@ -466,9 +502,17 @@ add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
if (!bms_is_subset(phinfo->ph_eval_at, outer_rel->relids) &&
!bms_is_subset(phinfo->ph_eval_at, inner_rel->relids))
{
- PlaceHolderVar *phv = phinfo->ph_var;
+ /* Copying might be unnecessary here, but be safe */
+ PlaceHolderVar *phv = copyObject(phinfo->ph_var);
QualCost cost;
+ /*
+ * It'll start out not nulled by anything. Joins above
+ * this one might add to its phnullingrels later, in much
+ * the same way as for Vars.
+ */
+ Assert(phv->phnullingrels == NULL);
+
joinrel->reltarget->exprs = lappend(joinrel->reltarget->exprs,
phv);
cost_qual_eval_node(&cost, (Node *) phv->phexpr, root);
@@ -499,3 +543,74 @@ add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
}
}
}
+
+/*
+ * contain_placeholder_references_to
+ * Detect whether any PlaceHolderVars in the given clause contain
+ * references to the given relid (typically an OJ relid).
+ *
+ * "Contain" means that there's a use of the relid inside the PHV's
+ * contained expression, so that changing the nullability status of
+ * the rel might change what the PHV computes.
+ *
+ * The code here to cope with upper-level PHVs is likely dead, but keep it
+ * anyway just in case.
+ */
+bool
+contain_placeholder_references_to(PlannerInfo *root, Node *clause,
+ int relid)
+{
+ contain_placeholder_references_context context;
+
+ /* We can answer quickly in the common case that there's no PHVs at all */
+ if (root->glob->lastPHId == 0)
+ return false;
+ /* Else run the recursive search */
+ context.relid = relid;
+ context.sublevels_up = 0;
+ return contain_placeholder_references_walker(clause, &context);
+}
+
+static bool
+contain_placeholder_references_walker(Node *node,
+ contain_placeholder_references_context *context)
+{
+ if (node == NULL)
+ return false;
+ if (IsA(node, PlaceHolderVar))
+ {
+ PlaceHolderVar *phv = (PlaceHolderVar *) node;
+
+ /* We should just look through PHVs of other query levels */
+ if (phv->phlevelsup == context->sublevels_up)
+ {
+ /* If phrels matches, we found what we came for */
+ if (bms_is_member(context->relid, phv->phrels))
+ return true;
+
+ /*
+ * We should not examine phnullingrels: what we are looking for is
+ * references in the contained expression, not OJs that might null
+ * the result afterwards. Also, we don't need to recurse into the
+ * contained expression, because phrels should adequately
+ * summarize what's in there. So we're done here.
+ */
+ return false;
+ }
+ }
+ else if (IsA(node, Query))
+ {
+ /* Recurse into RTE subquery or not-yet-planned sublink subquery */
+ bool result;
+
+ context->sublevels_up++;
+ result = query_tree_walker((Query *) node,
+ contain_placeholder_references_walker,
+ context,
+ 0);
+ context->sublevels_up--;
+ return result;
+ }
+ return expression_tree_walker(node, contain_placeholder_references_walker,
+ context);
+}
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 0a5632699d6..ebfb4ddd121 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -28,6 +28,7 @@
#include "optimizer/plancat.h"
#include "optimizer/restrictinfo.h"
#include "optimizer/tlist.h"
+#include "rewrite/rewriteManip.h"
#include "parser/parse_relation.h"
#include "utils/hsearch.h"
#include "utils/lsyscache.h"
@@ -40,7 +41,9 @@ typedef struct JoinHashEntry
} JoinHashEntry;
static void build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
- RelOptInfo *input_rel);
+ RelOptInfo *input_rel,
+ SpecialJoinInfo *sjinfo,
+ bool can_null);
static List *build_joinrel_restrictlist(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *outer_rel,
@@ -48,8 +51,10 @@ static List *build_joinrel_restrictlist(PlannerInfo *root,
static void build_joinrel_joinlist(RelOptInfo *joinrel,
RelOptInfo *outer_rel,
RelOptInfo *inner_rel);
-static List *subbuild_joinrel_restrictlist(RelOptInfo *joinrel,
- List *joininfo_list,
+static List *subbuild_joinrel_restrictlist(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *input_rel,
+ Relids both_input_relids,
List *new_restrictlist);
static List *subbuild_joinrel_joinlist(RelOptInfo *joinrel,
List *joininfo_list,
@@ -57,10 +62,12 @@ static List *subbuild_joinrel_joinlist(RelOptInfo *joinrel,
static void set_foreign_rel_properties(RelOptInfo *joinrel,
RelOptInfo *outer_rel, RelOptInfo *inner_rel);
static void add_join_rel(PlannerInfo *root, RelOptInfo *joinrel);
-static void build_joinrel_partition_info(RelOptInfo *joinrel,
+static void build_joinrel_partition_info(PlannerInfo *root,
+ RelOptInfo *joinrel,
RelOptInfo *outer_rel, RelOptInfo *inner_rel,
- List *restrictlist, JoinType jointype);
-static bool have_partkey_equi_join(RelOptInfo *joinrel,
+ SpecialJoinInfo *sjinfo,
+ List *restrictlist);
+static bool have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *rel1, RelOptInfo *rel2,
JoinType jointype, List *restrictlist);
static int match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel,
@@ -373,7 +380,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
/*
* find_base_rel
- * Find a base or other relation entry, which must already exist.
+ * Find a base or otherrel relation entry, which must already exist.
*/
RelOptInfo *
find_base_rel(PlannerInfo *root, int relid)
@@ -395,6 +402,44 @@ find_base_rel(PlannerInfo *root, int relid)
}
/*
+ * find_base_rel_ignore_join
+ * Find a base or otherrel relation entry, which must already exist.
+ *
+ * Unlike find_base_rel, if relid references an outer join then this
+ * will return NULL rather than raising an error. This is convenient
+ * for callers that must deal with relid sets including both base and
+ * outer joins.
+ */
+RelOptInfo *
+find_base_rel_ignore_join(PlannerInfo *root, int relid)
+{
+ Assert(relid > 0);
+
+ if (relid < root->simple_rel_array_size)
+ {
+ RelOptInfo *rel;
+ RangeTblEntry *rte;
+
+ rel = root->simple_rel_array[relid];
+ if (rel)
+ return rel;
+
+ /*
+ * We could just return NULL here, but for debugging purposes it seems
+ * best to actually verify that the relid is an outer join and not
+ * something weird.
+ */
+ rte = root->simple_rte_array[relid];
+ if (rte && rte->rtekind == RTE_JOIN && rte->jointype != JOIN_INNER)
+ return NULL;
+ }
+
+ elog(ERROR, "no relation entry for relid %d", relid);
+
+ return NULL; /* keep compiler quiet */
+}
+
+/*
* build_join_rel_hash
* Construct the auxiliary hash table for join relations.
*/
@@ -692,9 +737,11 @@ build_join_rel(PlannerInfo *root,
* and inner rels we first try to build it from. But the contents should
* be the same regardless.
*/
- build_joinrel_tlist(root, joinrel, outer_rel);
- build_joinrel_tlist(root, joinrel, inner_rel);
- add_placeholders_to_joinrel(root, joinrel, outer_rel, inner_rel);
+ build_joinrel_tlist(root, joinrel, outer_rel, sjinfo,
+ (sjinfo->jointype == JOIN_FULL));
+ build_joinrel_tlist(root, joinrel, inner_rel, sjinfo,
+ (sjinfo->jointype != JOIN_INNER));
+ add_placeholders_to_joinrel(root, joinrel, outer_rel, inner_rel, sjinfo);
/*
* add_placeholders_to_joinrel also took care of adding the ph_lateral
@@ -726,8 +773,8 @@ build_join_rel(PlannerInfo *root,
joinrel->has_eclass_joins = has_relevant_eclass_joinclause(root, joinrel);
/* Store the partition information. */
- build_joinrel_partition_info(joinrel, outer_rel, inner_rel, restrictlist,
- sjinfo->jointype);
+ build_joinrel_partition_info(root, joinrel, outer_rel, inner_rel, sjinfo,
+ restrictlist);
/*
* Set estimates of the joinrel's size.
@@ -783,16 +830,14 @@ build_join_rel(PlannerInfo *root,
* 'parent_joinrel' is the RelOptInfo representing the join between parent
* relations. Some of the members of new RelOptInfo are produced by
* translating corresponding members of this RelOptInfo
- * 'sjinfo': child-join context info
* 'restrictlist': list of RestrictInfo nodes that apply to this particular
* pair of joinable relations
- * 'jointype' is the join type (inner, left, full, etc)
+ * 'sjinfo': child join's join-type details
*/
RelOptInfo *
build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
RelOptInfo *inner_rel, RelOptInfo *parent_joinrel,
- List *restrictlist, SpecialJoinInfo *sjinfo,
- JoinType jointype)
+ List *restrictlist, SpecialJoinInfo *sjinfo)
{
RelOptInfo *joinrel = makeNode(RelOptInfo);
AppendRelInfo **appinfos;
@@ -806,6 +851,8 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
joinrel->reloptkind = RELOPT_OTHER_JOINREL;
joinrel->relids = bms_union(outer_rel->relids, inner_rel->relids);
+ if (sjinfo->ojrelid != 0)
+ joinrel->relids = bms_add_member(joinrel->relids, sjinfo->ojrelid);
joinrel->rows = 0;
/* cheap startup cost is interesting iff not all tuples to be retrieved */
joinrel->consider_startup = (root->tuple_fraction > 0);
@@ -892,8 +939,8 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
joinrel->has_eclass_joins = parent_joinrel->has_eclass_joins;
/* Is the join between partitions itself partitioned? */
- build_joinrel_partition_info(joinrel, outer_rel, inner_rel, restrictlist,
- jointype);
+ build_joinrel_partition_info(root, joinrel, outer_rel, inner_rel, sjinfo,
+ restrictlist);
/* Child joinrel is parallel safe if parent is parallel safe. */
joinrel->consider_parallel = parent_joinrel->consider_parallel;
@@ -975,10 +1022,41 @@ min_join_parameterization(PlannerInfo *root,
*
* We also compute the expected width of the join's output, making use
* of data that was cached at the baserel level by set_rel_width().
+ *
+ * Pass can_null as true if the join is an outer join that can null Vars
+ * from this input relation. If so, we will (normally) add the join's relid
+ * to the nulling bitmaps of Vars and PHVs bubbled up from the input.
+ *
+ * When forming an outer join's target list, special handling is needed
+ * in case the outer join was commuted with another one per outer join
+ * identity 3 (see optimizer/README). We must take steps to ensure that
+ * the output Vars have the same nulling bitmaps that they would if the
+ * two joins had been done in syntactic order; else they won't match Vars
+ * appearing higher in the query tree. We need to do two things:
+ *
+ * First, sjinfo->commute_above_r is added to the nulling bitmaps of RHS Vars.
+ * This takes care of the case where we implement
+ * A leftjoin (B leftjoin C on (Pbc)) on (Pab)
+ * as
+ * (A leftjoin B on (Pab)) leftjoin C on (Pbc)
+ * The C columns emitted by the B/C join need to be shown as nulled by both
+ * the B/C and A/B joins, even though they've not traversed the A/B join.
+ * (If the joins haven't been commuted, we are adding the nullingrel bits
+ * prematurely; but that's okay because the C columns can't be referenced
+ * between here and the upper join.)
+ *
+ * Second, if a RHS Var has any of the relids in sjinfo->commute_above_l
+ * already set in its nulling bitmap, then we *don't* add sjinfo->ojrelid
+ * to its nulling bitmap (but we do still add commute_above_r). This takes
+ * care of the reverse transformation: if the original syntax was
+ * (A leftjoin B on (Pab)) leftjoin C on (Pbc)
+ * then the now-upper A/B join must not mark C columns as nulled by itself.
*/
static void
build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
- RelOptInfo *input_rel)
+ RelOptInfo *input_rel,
+ SpecialJoinInfo *sjinfo,
+ bool can_null)
{
Relids relids = joinrel->relids;
ListCell *vars;
@@ -998,7 +1076,24 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
/* Is it still needed above this joinrel? */
if (bms_nonempty_difference(phinfo->ph_needed, relids))
{
- /* Yup, add it to the output */
+ /*
+ * Yup, add it to the output. If this join potentially nulls
+ * this input, we have to update the PHV's phnullingrels,
+ * which means making a copy.
+ */
+ if (can_null)
+ {
+ phv = copyObject(phv);
+ /* See comments above to understand this logic */
+ if (sjinfo->ojrelid != 0 &&
+ !bms_overlap(phv->phnullingrels, sjinfo->commute_above_l))
+ phv->phnullingrels = bms_add_member(phv->phnullingrels,
+ sjinfo->ojrelid);
+ if (sjinfo->commute_above_r)
+ phv->phnullingrels = bms_add_members(phv->phnullingrels,
+ sjinfo->commute_above_r);
+ }
+
joinrel->reltarget->exprs = lappend(joinrel->reltarget->exprs,
phv);
/* Bubbling up the precomputed result has cost zero */
@@ -1022,9 +1117,7 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
RowIdentityVarInfo *ridinfo = (RowIdentityVarInfo *)
list_nth(root->row_identity_vars, var->varattno - 1);
- joinrel->reltarget->exprs = lappend(joinrel->reltarget->exprs,
- var);
- /* Vars have cost zero, so no need to adjust reltarget->cost */
+ /* Update reltarget width estimate from RowIdentityVarInfo */
joinrel->reltarget->width += ridinfo->rowidwidth;
}
else
@@ -1037,15 +1130,35 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
/* Is it still needed above this joinrel? */
ndx = var->varattno - baserel->min_attr;
- if (bms_nonempty_difference(baserel->attr_needed[ndx], relids))
- {
- /* Yup, add it to the output */
- joinrel->reltarget->exprs = lappend(joinrel->reltarget->exprs,
- var);
- /* Vars have cost zero, so no need to adjust reltarget->cost */
- joinrel->reltarget->width += baserel->attr_widths[ndx];
- }
+ if (!bms_nonempty_difference(baserel->attr_needed[ndx], relids))
+ continue; /* nope, skip it */
+
+ /* Update reltarget width estimate from baserel's attr_widths */
+ joinrel->reltarget->width += baserel->attr_widths[ndx];
}
+
+ /*
+ * Add the Var to the output. If this join potentially nulls this
+ * input, we have to update the Var's varnullingrels, which means
+ * making a copy.
+ */
+ if (can_null)
+ {
+ var = copyObject(var);
+ /* See comments above to understand this logic */
+ if (sjinfo->ojrelid != 0 &&
+ !bms_overlap(var->varnullingrels, sjinfo->commute_above_l))
+ var->varnullingrels = bms_add_member(var->varnullingrels,
+ sjinfo->ojrelid);
+ if (sjinfo->commute_above_r)
+ var->varnullingrels = bms_add_members(var->varnullingrels,
+ sjinfo->commute_above_r);
+ }
+
+ joinrel->reltarget->exprs = lappend(joinrel->reltarget->exprs,
+ var);
+
+ /* Vars have cost zero, so no need to adjust reltarget->cost */
}
}
@@ -1064,7 +1177,7 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
* is not handled in the sub-relations, so it depends on which
* sub-relations are considered.
*
- * If a join clause from an input relation refers to base rels still not
+ * If a join clause from an input relation refers to base+OJ rels still not
* present in the joinrel, then it is still a join clause for the joinrel;
* we put it into the joininfo list for the joinrel. Otherwise,
* the clause is now a restrict clause for the joined relation, and we
@@ -1098,14 +1211,19 @@ build_joinrel_restrictlist(PlannerInfo *root,
RelOptInfo *inner_rel)
{
List *result;
+ Relids both_input_relids;
+
+ both_input_relids = bms_union(outer_rel->relids, inner_rel->relids);
/*
* Collect all the clauses that syntactically belong at this level,
* eliminating any duplicates (important since we will see many of the
* same clauses arriving from both input relations).
*/
- result = subbuild_joinrel_restrictlist(joinrel, outer_rel->joininfo, NIL);
- result = subbuild_joinrel_restrictlist(joinrel, inner_rel->joininfo, result);
+ result = subbuild_joinrel_restrictlist(root, joinrel, outer_rel,
+ both_input_relids, NIL);
+ result = subbuild_joinrel_restrictlist(root, joinrel, inner_rel,
+ both_input_relids, result);
/*
* Add on any clauses derived from EquivalenceClasses. These cannot be
@@ -1140,24 +1258,63 @@ build_joinrel_joinlist(RelOptInfo *joinrel,
}
static List *
-subbuild_joinrel_restrictlist(RelOptInfo *joinrel,
- List *joininfo_list,
+subbuild_joinrel_restrictlist(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *input_rel,
+ Relids both_input_relids,
List *new_restrictlist)
{
ListCell *l;
- foreach(l, joininfo_list)
+ foreach(l, input_rel->joininfo)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
if (bms_is_subset(rinfo->required_relids, joinrel->relids))
{
/*
- * This clause becomes a restriction clause for the joinrel, since
- * it refers to no outside rels. Add it to the list, being
- * careful to eliminate duplicates. (Since RestrictInfo nodes in
- * different joinlists will have been multiply-linked rather than
- * copied, pointer equality should be a sufficient test.)
+ * This clause should become a restriction clause for the joinrel,
+ * since it refers to no outside rels. However, if it's a clone
+ * clause then it might be too late to evaluate it, so we have to
+ * check. (If it is too late, just ignore the clause, taking it
+ * on faith that another clone was or will be selected.) Clone
+ * clauses should always be outer-join clauses, so we compare
+ * against both_input_relids.
+ */
+ if (rinfo->has_clone || rinfo->is_clone)
+ {
+ Assert(!RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids));
+ if (!bms_is_subset(rinfo->required_relids, both_input_relids))
+ continue;
+ if (!clause_is_computable_at(root, rinfo->clause_relids,
+ both_input_relids))
+ continue;
+ }
+ else
+ {
+ /*
+ * For non-clone clauses, we just Assert it's OK. These might
+ * be either join or filter clauses.
+ */
+#ifdef USE_ASSERT_CHECKING
+ if (RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids))
+ Assert(clause_is_computable_at(root, rinfo->clause_relids,
+ joinrel->relids));
+ else
+ {
+ Assert(bms_is_subset(rinfo->required_relids,
+ both_input_relids));
+ Assert(clause_is_computable_at(root, rinfo->clause_relids,
+ both_input_relids));
+ }
+#endif
+ }
+
+ /*
+ * OK, so add it to the list, being careful to eliminate
+ * duplicates. (Since RestrictInfo nodes in different joinlists
+ * will have been multiply-linked rather than copied, pointer
+ * equality should be a sufficient test.)
*/
new_restrictlist = list_append_unique_ptr(new_restrictlist, rinfo);
}
@@ -1319,6 +1476,7 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel,
ParamPathInfo *ppi;
Relids joinrelids;
List *pclauses;
+ Bitmapset *pserials;
double rows;
ListCell *lc;
@@ -1361,6 +1519,15 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel,
required_outer,
baserel));
+ /* Compute set of serial numbers of the enforced clauses */
+ pserials = NULL;
+ foreach(lc, pclauses)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ pserials = bms_add_member(pserials, rinfo->rinfo_serial);
+ }
+
/* Estimate the number of rows returned by the parameterized scan */
rows = get_parameterized_baserel_size(root, baserel, pclauses);
@@ -1369,6 +1536,7 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel,
ppi->ppi_req_outer = required_outer;
ppi->ppi_rows = rows;
ppi->ppi_clauses = pclauses;
+ ppi->ppi_serials = pserials;
baserel->ppilist = lappend(baserel->ppilist, ppi);
return ppi;
@@ -1594,6 +1762,7 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel,
ppi->ppi_req_outer = required_outer;
ppi->ppi_rows = rows;
ppi->ppi_clauses = NIL;
+ ppi->ppi_serials = NULL;
joinrel->ppilist = lappend(joinrel->ppilist, ppi);
return ppi;
@@ -1632,6 +1801,7 @@ get_appendrel_parampathinfo(RelOptInfo *appendrel, Relids required_outer)
ppi->ppi_req_outer = required_outer;
ppi->ppi_rows = 0;
ppi->ppi_clauses = NIL;
+ ppi->ppi_serials = NULL;
appendrel->ppilist = lappend(appendrel->ppilist, ppi);
return ppi;
@@ -1658,15 +1828,110 @@ find_param_path_info(RelOptInfo *rel, Relids required_outer)
}
/*
+ * get_param_path_clause_serials
+ * Given a parameterized Path, return the set of pushed-down clauses
+ * (identified by rinfo_serial numbers) enforced within the Path.
+ */
+Bitmapset *
+get_param_path_clause_serials(Path *path)
+{
+ if (path->param_info == NULL)
+ return NULL; /* not parameterized */
+ if (IsA(path, NestPath) ||
+ IsA(path, MergePath) ||
+ IsA(path, HashPath))
+ {
+ /*
+ * For a join path, combine clauses enforced within either input path
+ * with those enforced as joinrestrictinfo in this path. Note that
+ * joinrestrictinfo may include some non-pushed-down clauses, but for
+ * current purposes it's okay if we include those in the result. (To
+ * be more careful, we could check for clause_relids overlapping the
+ * path parameterization, but it's not worth the cycles for now.)
+ */
+ JoinPath *jpath = (JoinPath *) path;
+ Bitmapset *pserials;
+ ListCell *lc;
+
+ pserials = NULL;
+ pserials = bms_add_members(pserials,
+ get_param_path_clause_serials(jpath->outerjoinpath));
+ pserials = bms_add_members(pserials,
+ get_param_path_clause_serials(jpath->innerjoinpath));
+ foreach(lc, jpath->joinrestrictinfo)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ pserials = bms_add_member(pserials, rinfo->rinfo_serial);
+ }
+ return pserials;
+ }
+ else if (IsA(path, AppendPath))
+ {
+ /*
+ * For an appendrel, take the intersection of the sets of clauses
+ * enforced in each input path.
+ */
+ AppendPath *apath = (AppendPath *) path;
+ Bitmapset *pserials;
+ ListCell *lc;
+
+ pserials = NULL;
+ foreach(lc, apath->subpaths)
+ {
+ Path *subpath = (Path *) lfirst(lc);
+ Bitmapset *subserials;
+
+ subserials = get_param_path_clause_serials(subpath);
+ if (lc == list_head(apath->subpaths))
+ pserials = bms_copy(subserials);
+ else
+ pserials = bms_int_members(pserials, subserials);
+ }
+ return pserials;
+ }
+ else if (IsA(path, MergeAppendPath))
+ {
+ /* Same as AppendPath case */
+ MergeAppendPath *apath = (MergeAppendPath *) path;
+ Bitmapset *pserials;
+ ListCell *lc;
+
+ pserials = NULL;
+ foreach(lc, apath->subpaths)
+ {
+ Path *subpath = (Path *) lfirst(lc);
+ Bitmapset *subserials;
+
+ subserials = get_param_path_clause_serials(subpath);
+ if (lc == list_head(apath->subpaths))
+ pserials = bms_copy(subserials);
+ else
+ pserials = bms_int_members(pserials, subserials);
+ }
+ return pserials;
+ }
+ else
+ {
+ /*
+ * Otherwise, it's a baserel path and we can use the
+ * previously-computed set of serial numbers.
+ */
+ return path->param_info->ppi_serials;
+ }
+}
+
+/*
* build_joinrel_partition_info
* Checks if the two relations being joined can use partitionwise join
* and if yes, initialize partitioning information of the resulting
* partitioned join relation.
*/
static void
-build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel,
- RelOptInfo *inner_rel, List *restrictlist,
- JoinType jointype)
+build_joinrel_partition_info(PlannerInfo *root,
+ RelOptInfo *joinrel, RelOptInfo *outer_rel,
+ RelOptInfo *inner_rel, SpecialJoinInfo *sjinfo,
+ List *restrictlist)
{
PartitionScheme part_scheme;
@@ -1692,8 +1957,8 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel,
!outer_rel->consider_partitionwise_join ||
!inner_rel->consider_partitionwise_join ||
outer_rel->part_scheme != inner_rel->part_scheme ||
- !have_partkey_equi_join(joinrel, outer_rel, inner_rel,
- jointype, restrictlist))
+ !have_partkey_equi_join(root, joinrel, outer_rel, inner_rel,
+ sjinfo->jointype, restrictlist))
{
Assert(!IS_PARTITIONED_REL(joinrel));
return;
@@ -1717,7 +1982,8 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel,
* child-join relations of the join relation in try_partitionwise_join().
*/
joinrel->part_scheme = part_scheme;
- set_joinrel_partition_key_exprs(joinrel, outer_rel, inner_rel, jointype);
+ set_joinrel_partition_key_exprs(joinrel, outer_rel, inner_rel,
+ sjinfo->jointype);
/*
* Set the consider_partitionwise_join flag.
@@ -1735,7 +2001,7 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel,
* partition keys.
*/
static bool
-have_partkey_equi_join(RelOptInfo *joinrel,
+have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *rel1, RelOptInfo *rel2,
JoinType jointype, List *restrictlist)
{
@@ -1801,6 +2067,24 @@ have_partkey_equi_join(RelOptInfo *joinrel,
strict_op = op_strict(opexpr->opno);
/*
+ * Vars appearing in the relation's partition keys will not have any
+ * varnullingrels, but those in expr1 and expr2 will if we're above
+ * outer joins that could null the respective rels. It's okay to
+ * match anyway, if the join operator is strict.
+ */
+ if (strict_op)
+ {
+ if (bms_overlap(rel1->relids, root->outer_join_rels))
+ expr1 = (Expr *) remove_nulling_relids((Node *) expr1,
+ root->outer_join_rels,
+ NULL);
+ if (bms_overlap(rel2->relids, root->outer_join_rels))
+ expr2 = (Expr *) remove_nulling_relids((Node *) expr2,
+ root->outer_join_rels,
+ NULL);
+ }
+
+ /*
* Only clauses referencing the partition keys are useful for
* partitionwise join.
*/
@@ -2012,7 +2296,12 @@ set_joinrel_partition_key_exprs(RelOptInfo *joinrel,
* partitionwise nesting of any outer join.) We assume no
* type coercions are needed to make the coalesce expressions,
* since columns of different types won't have gotten
- * classified as the same PartitionScheme.
+ * classified as the same PartitionScheme. Note that we
+ * intentionally leave out the varnullingrels decoration that
+ * would ordinarily appear on the Vars inside these
+ * CoalesceExprs, because have_partkey_equi_join will strip
+ * varnullingrels from the expressions it will compare to the
+ * partexprs.
*/
foreach(lc, list_concat_copy(outer_expr, outer_null_expr))
{
diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c
index 0c3878a805b..1350f011a62 100644
--- a/src/backend/optimizer/util/restrictinfo.c
+++ b/src/backend/optimizer/util/restrictinfo.c
@@ -53,6 +53,10 @@ static Expr *make_sub_restrictinfos(PlannerInfo *root,
* required_relids can be NULL, in which case it defaults to the actual clause
* contents (i.e., clause_relids).
*
+ * Note that there aren't options to set the has_clone and is_clone flags:
+ * we always initialize those to false. There's just one place that wants
+ * something different, so making all callers pass them seems inconvenient.
+ *
* We initialize fields that depend only on the given subexpression, leaving
* others that depend on context (or may never be needed at all) to be filled
* later.
@@ -116,12 +120,15 @@ make_restrictinfo_internal(PlannerInfo *root,
Relids nullable_relids)
{
RestrictInfo *restrictinfo = makeNode(RestrictInfo);
+ Relids baserels;
restrictinfo->clause = clause;
restrictinfo->orclause = orclause;
restrictinfo->is_pushed_down = is_pushed_down;
restrictinfo->outerjoin_delayed = outerjoin_delayed;
restrictinfo->pseudoconstant = pseudoconstant;
+ restrictinfo->has_clone = false; /* may get set by caller */
+ restrictinfo->is_clone = false; /* may get set by caller */
restrictinfo->can_join = false; /* may get set below */
restrictinfo->security_level = security_level;
restrictinfo->outer_relids = outer_relids;
@@ -188,6 +195,25 @@ make_restrictinfo_internal(PlannerInfo *root,
restrictinfo->required_relids = restrictinfo->clause_relids;
/*
+ * Count the number of base rels appearing in clause_relids. To do this,
+ * we just delete rels mentioned in root->outer_join_rels and count the
+ * survivors. Because we are called during deconstruct_jointree which is
+ * the same tree walk that populates outer_join_rels, this is a little bit
+ * unsafe-looking; but it should be fine because the recursion in
+ * deconstruct_jointree should already have visited any outer join that
+ * could be mentioned in this clause.
+ */
+ baserels = bms_difference(restrictinfo->clause_relids,
+ root->outer_join_rels);
+ restrictinfo->num_base_rels = bms_num_members(baserels);
+ bms_free(baserels);
+
+ /*
+ * Label this RestrictInfo with a fresh serial number.
+ */
+ restrictinfo->rinfo_serial = ++(root->last_rinfo_serial);
+
+ /*
* Fill in all the cacheable fields with "not yet set" markers. None of
* these will be computed until/unless needed. Note in particular that we
* don't mark a binary opclause as mergejoinable or hashjoinable here;
@@ -350,7 +376,7 @@ commute_restrictinfo(RestrictInfo *rinfo, Oid comm_op)
* ... and adjust those we need to change. Note in particular that we can
* preserve any cached selectivity or cost estimates, since those ought to
* be the same for the new clause. Likewise we can keep the source's
- * parent_ec.
+ * parent_ec. It's also important that we keep the same rinfo_serial.
*/
result->clause = (Expr *) newclause;
result->left_relids = rinfo->right_relids;
@@ -497,6 +523,58 @@ extract_actual_join_clauses(List *restrictinfo_list,
}
}
+/*
+ * clause_is_computable_at
+ * Test whether a clause is computable at a given evaluation level.
+ *
+ * There are two conditions for whether an expression can actually be
+ * evaluated at a given join level: the evaluation context must include
+ * all the relids (both base and OJ) used by the expression, and we must
+ * not have already evaluated any outer joins that null Vars/PHVs of the
+ * expression and are not listed in their nullingrels.
+ *
+ * This function checks the second condition; we assume the caller already
+ * saw to the first one.
+ *
+ * For speed reasons, we don't individually examine each Var/PHV of the
+ * expression, but just look at the overall clause_relids (the union of the
+ * varnos and varnullingrels). This could give a misleading answer if the
+ * Vars of a given varno don't all have the same varnullingrels; but that
+ * really shouldn't happen within a single scalar expression or RestrictInfo
+ * clause. Despite that, this is still annoyingly expensive :-(
+ */
+bool
+clause_is_computable_at(PlannerInfo *root,
+ Relids clause_relids,
+ Relids eval_relids)
+{
+ ListCell *lc;
+
+ /* Nothing to do if no outer joins have been performed yet. */
+ if (!bms_overlap(eval_relids, root->outer_join_rels))
+ return true;
+
+ foreach(lc, root->join_info_list)
+ {
+ SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
+
+ /* Ignore outer joins that are not yet performed. */
+ if (!bms_is_member(sjinfo->ojrelid, eval_relids))
+ continue;
+
+ /* OK if clause lists it (we assume all Vars in it agree). */
+ if (bms_is_member(sjinfo->ojrelid, clause_relids))
+ continue;
+
+ /* Else, trouble if clause mentions any nullable Vars. */
+ if (bms_overlap(clause_relids, sjinfo->min_righthand) ||
+ (sjinfo->jointype == JOIN_FULL &&
+ bms_overlap(clause_relids, sjinfo->min_lefthand)))
+ return false; /* doesn't work */
+ }
+
+ return true; /* OK */
+}
/*
* join_clause_is_movable_to
@@ -522,6 +600,12 @@ extract_actual_join_clauses(List *restrictinfo_list,
* Also, the join clause must not use any relations that have LATERAL
* references to the target relation, since we could not put such rels on
* the outer side of a nestloop with the target relation.
+ *
+ * Also, we reject is_clone versions of outer-join clauses. This has the
+ * effect of preventing us from generating variant parameterized paths
+ * that differ only in which outer joins null the parameterization rel(s).
+ * Generating one path from the minimally-parameterized has_clone version
+ * is sufficient.
*/
bool
join_clause_is_movable_to(RestrictInfo *rinfo, RelOptInfo *baserel)
@@ -542,6 +626,10 @@ join_clause_is_movable_to(RestrictInfo *rinfo, RelOptInfo *baserel)
if (bms_overlap(baserel->lateral_referencers, rinfo->clause_relids))
return false;
+ /* Ignore clones, too */
+ if (rinfo->is_clone)
+ return false;
+
return true;
}
@@ -587,6 +675,9 @@ join_clause_is_movable_to(RestrictInfo *rinfo, RelOptInfo *baserel)
* moved for some valid set of outer rels, so we don't have the benefit of
* relying on prior checks for lateral-reference validity.
*
+ * Likewise, we don't check is_clone here: rejecting the inappropriate
+ * variants of a cloned clause must be handled upstream.
+ *
* Note: if this returns true, it means that the clause could be moved to
* this join relation, but that doesn't mean that this is the lowest join
* it could be moved to. Caller may need to make additional calls to verify
diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c
index 09cc7d6f518..8c9824e54d6 100644
--- a/src/backend/optimizer/util/var.c
+++ b/src/backend/optimizer/util/var.c
@@ -62,6 +62,7 @@ typedef struct
typedef struct
{
+ PlannerInfo *root; /* could be NULL! */
Query *query; /* outer Query */
int sublevels_up;
bool possible_sublink; /* could aliases include a SubLink? */
@@ -80,6 +81,10 @@ static bool pull_var_clause_walker(Node *node,
pull_var_clause_context *context);
static Node *flatten_join_alias_vars_mutator(Node *node,
flatten_join_alias_vars_context *context);
+static Node *add_nullingrels_if_needed(PlannerInfo *root, Node *newnode,
+ Var *oldvar);
+static bool is_standard_join_alias_expression(Node *newnode, Var *oldvar);
+static void adjust_standard_join_alias_expression(Node *newnode, Var *oldvar);
static Relids alias_relid_set(Query *query, Relids relids);
@@ -88,6 +93,9 @@ static Relids alias_relid_set(Query *query, Relids relids);
* Create a set of all the distinct varnos present in a parsetree.
* Only varnos that reference level-zero rtable entries are considered.
*
+ * The result includes outer-join relids mentioned in Var.varnullingrels and
+ * PlaceHolderVar.phnullingrels fields in the parsetree.
+ *
* "root" can be passed as NULL if it is not necessary to process
* PlaceHolderVars.
*
@@ -153,7 +161,11 @@ pull_varnos_walker(Node *node, pull_varnos_context *context)
Var *var = (Var *) node;
if (var->varlevelsup == context->sublevels_up)
+ {
context->varnos = bms_add_member(context->varnos, var->varno);
+ context->varnos = bms_add_members(context->varnos,
+ var->varnullingrels);
+ }
return false;
}
if (IsA(node, CurrentOfExpr))
@@ -244,6 +256,14 @@ pull_varnos_walker(Node *node, pull_varnos_context *context)
context->varnos = bms_join(context->varnos,
newevalat);
}
+
+ /*
+ * In all three cases, include phnullingrels in the result. We
+ * don't worry about possibly needing to translate it, because
+ * appendrels only translate varnos of baserels, not outer joins.
+ */
+ context->varnos = bms_add_members(context->varnos,
+ phv->phnullingrels);
return false; /* don't recurse into expression */
}
}
@@ -707,26 +727,42 @@ pull_var_clause_walker(Node *node, pull_var_clause_context *context)
* is the only way that the executor can directly handle whole-row Vars.
*
* This also adjusts relid sets found in some expression node types to
- * substitute the contained base rels for any join relid.
+ * substitute the contained base+OJ rels for any join relid.
*
* If a JOIN contains sub-selects that have been flattened, its join alias
* entries might now be arbitrary expressions, not just Vars. This affects
- * this function in one important way: we might find ourselves inserting
- * SubLink expressions into subqueries, and we must make sure that their
- * Query.hasSubLinks fields get set to true if so. If there are any
+ * this function in two important ways. First, we might find ourselves
+ * inserting SubLink expressions into subqueries, and we must make sure that
+ * their Query.hasSubLinks fields get set to true if so. If there are any
* SubLinks in the join alias lists, the outer Query should already have
* hasSubLinks = true, so this is only relevant to un-flattened subqueries.
+ * Second, we have to preserve any varnullingrels info attached to the
+ * alias Vars we're replacing. If the replacement expression is a Var or
+ * PlaceHolderVar or constructed from those, we can just add the
+ * varnullingrels bits to the existing nullingrels field(s); otherwise
+ * we have to add a PlaceHolderVar wrapper.
*
- * NOTE: this is used on not-yet-planned expressions. We do not expect it
- * to be applied directly to the whole Query, so if we see a Query to start
- * with, we do want to increment sublevels_up (this occurs for LATERAL
- * subqueries).
+ * NOTE: this is also used by the parser, to expand join alias Vars before
+ * checking GROUP BY validity. For that use-case, root will be NULL, which
+ * is why we have to pass the Query separately. We need the root itself only
+ * for making PlaceHolderVars. We can avoid making PlaceHolderVars in the
+ * parser's usage because it won't be dealing with arbitrary expressions:
+ * so long as adjust_standard_join_alias_expression can handle everything
+ * the parser would make as a join alias expression, we're OK.
*/
Node *
-flatten_join_alias_vars(Query *query, Node *node)
+flatten_join_alias_vars(PlannerInfo *root, Query *query, Node *node)
{
flatten_join_alias_vars_context context;
+ /*
+ * We do not expect this to be applied to the whole Query, only to
+ * expressions or LATERAL subqueries. Hence, if the top node is a Query,
+ * it's okay to immediately increment sublevels_up.
+ */
+ Assert(node != (Node *) query);
+
+ context.root = root;
context.query = query;
context.sublevels_up = 0;
/* flag whether join aliases could possibly contain SubLinks */
@@ -797,7 +833,9 @@ flatten_join_alias_vars_mutator(Node *node,
rowexpr->colnames = colnames;
rowexpr->location = var->location;
- return (Node *) rowexpr;
+ /* Lastly, add any varnullingrels to the replacement expression */
+ return add_nullingrels_if_needed(context->root, (Node *) rowexpr,
+ var);
}
/* Expand join alias reference */
@@ -824,7 +862,8 @@ flatten_join_alias_vars_mutator(Node *node,
if (context->possible_sublink && !context->inserted_sublink)
context->inserted_sublink = checkExprHasSubLink(newvar);
- return newvar;
+ /* Lastly, add any varnullingrels to the replacement expression */
+ return add_nullingrels_if_needed(context->root, newvar, var);
}
if (IsA(node, PlaceHolderVar))
{
@@ -839,6 +878,7 @@ flatten_join_alias_vars_mutator(Node *node,
{
phv->phrels = alias_relid_set(context->query,
phv->phrels);
+ /* we *don't* change phnullingrels */
}
return (Node *) phv;
}
@@ -873,8 +913,196 @@ flatten_join_alias_vars_mutator(Node *node,
}
/*
+ * Add oldvar's varnullingrels, if any, to a flattened join alias expression.
+ * The newnode has been copied, so we can modify it freely.
+ */
+static Node *
+add_nullingrels_if_needed(PlannerInfo *root, Node *newnode, Var *oldvar)
+{
+ if (oldvar->varnullingrels == NULL)
+ return newnode; /* nothing to do */
+ /* If possible, do it by adding to existing nullingrel fields */
+ if (is_standard_join_alias_expression(newnode, oldvar))
+ adjust_standard_join_alias_expression(newnode, oldvar);
+ else if (root)
+ {
+ /*
+ * We can insert a PlaceHolderVar to carry the nullingrels. However,
+ * deciding where to evaluate the PHV is slightly tricky. We first
+ * try to evaluate it at the natural semantic level of the new
+ * expression; but if that expression is variable-free, fall back to
+ * evaluating it at the join that the oldvar is an alias Var for.
+ */
+ PlaceHolderVar *newphv;
+ Index levelsup = oldvar->varlevelsup;
+ Relids phrels = pull_varnos_of_level(root, newnode, levelsup);
+
+ if (bms_is_empty(phrels)) /* variable-free? */
+ {
+ if (levelsup != 0) /* this won't work otherwise */
+ elog(ERROR, "unsupported join alias expression");
+ phrels = get_relids_for_join(root->parse, oldvar->varno);
+ /* If it's an outer join, eval below not above the join */
+ phrels = bms_del_member(phrels, oldvar->varno);
+ Assert(!bms_is_empty(phrels));
+ }
+ newphv = make_placeholder_expr(root, (Expr *) newnode, phrels);
+ /* newphv has zero phlevelsup and NULL phnullingrels; fix it */
+ newphv->phlevelsup = levelsup;
+ newphv->phnullingrels = bms_copy(oldvar->varnullingrels);
+ newnode = (Node *) newphv;
+ }
+ else
+ {
+ /* ooops, we're missing support for something the parser can make */
+ elog(ERROR, "unsupported join alias expression");
+ }
+ return newnode;
+}
+
+/*
+ * Check to see if we can insert nullingrels into this join alias expression
+ * without use of a separate PlaceHolderVar.
+ *
+ * This will handle Vars, PlaceHolderVars, and implicit-coercion and COALESCE
+ * expressions built from those. This coverage needs to handle anything
+ * that the parser would put into joinaliasvars.
+ */
+static bool
+is_standard_join_alias_expression(Node *newnode, Var *oldvar)
+{
+ if (newnode == NULL)
+ return false;
+ if (IsA(newnode, Var) &&
+ ((Var *) newnode)->varlevelsup == oldvar->varlevelsup)
+ return true;
+ else if (IsA(newnode, PlaceHolderVar) &&
+ ((PlaceHolderVar *) newnode)->phlevelsup == oldvar->varlevelsup)
+ return true;
+ else if (IsA(newnode, FuncExpr))
+ {
+ FuncExpr *fexpr = (FuncExpr *) newnode;
+
+ /*
+ * We need to assume that the function wouldn't produce non-NULL from
+ * NULL, which is reasonable for implicit coercions but otherwise not
+ * so much. (Looking at its strictness is likely overkill, and anyway
+ * it would cause us to fail if someone forgot to mark an implicit
+ * coercion as strict.)
+ */
+ if (fexpr->funcformat != COERCE_IMPLICIT_CAST ||
+ fexpr->args == NIL)
+ return false;
+
+ /*
+ * Examine only the first argument --- coercions might have additional
+ * arguments that are constants.
+ */
+ return is_standard_join_alias_expression(linitial(fexpr->args), oldvar);
+ }
+ else if (IsA(newnode, RelabelType))
+ {
+ RelabelType *relabel = (RelabelType *) newnode;
+
+ /* This definitely won't produce non-NULL from NULL */
+ return is_standard_join_alias_expression((Node *) relabel->arg, oldvar);
+ }
+ else if (IsA(newnode, CoerceViaIO))
+ {
+ CoerceViaIO *iocoerce = (CoerceViaIO *) newnode;
+
+ /* This definitely won't produce non-NULL from NULL */
+ return is_standard_join_alias_expression((Node *) iocoerce->arg, oldvar);
+ }
+ else if (IsA(newnode, ArrayCoerceExpr))
+ {
+ ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) newnode;
+
+ /* This definitely won't produce non-NULL from NULL (at array level) */
+ return is_standard_join_alias_expression((Node *) acoerce->arg, oldvar);
+ }
+ else if (IsA(newnode, CoalesceExpr))
+ {
+ CoalesceExpr *cexpr = (CoalesceExpr *) newnode;
+ ListCell *lc;
+
+ Assert(cexpr->args != NIL);
+ foreach(lc, cexpr->args)
+ {
+ if (!is_standard_join_alias_expression(lfirst(lc), oldvar))
+ return false;
+ }
+ return true;
+ }
+ else
+ return false;
+}
+
+/*
+ * Insert nullingrels into an expression accepted by
+ * is_standard_join_alias_expression.
+ */
+static void
+adjust_standard_join_alias_expression(Node *newnode, Var *oldvar)
+{
+ if (IsA(newnode, Var) &&
+ ((Var *) newnode)->varlevelsup == oldvar->varlevelsup)
+ {
+ Var *newvar = (Var *) newnode;
+
+ newvar->varnullingrels = bms_add_members(newvar->varnullingrels,
+ oldvar->varnullingrels);
+ }
+ else if (IsA(newnode, PlaceHolderVar) &&
+ ((PlaceHolderVar *) newnode)->phlevelsup == oldvar->varlevelsup)
+ {
+ PlaceHolderVar *newphv = (PlaceHolderVar *) newnode;
+
+ newphv->phnullingrels = bms_add_members(newphv->phnullingrels,
+ oldvar->varnullingrels);
+ }
+ else if (IsA(newnode, FuncExpr))
+ {
+ FuncExpr *fexpr = (FuncExpr *) newnode;
+
+ adjust_standard_join_alias_expression(linitial(fexpr->args), oldvar);
+ }
+ else if (IsA(newnode, RelabelType))
+ {
+ RelabelType *relabel = (RelabelType *) newnode;
+
+ adjust_standard_join_alias_expression((Node *) relabel->arg, oldvar);
+ }
+ else if (IsA(newnode, CoerceViaIO))
+ {
+ CoerceViaIO *iocoerce = (CoerceViaIO *) newnode;
+
+ adjust_standard_join_alias_expression((Node *) iocoerce->arg, oldvar);
+ }
+ else if (IsA(newnode, ArrayCoerceExpr))
+ {
+ ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) newnode;
+
+ adjust_standard_join_alias_expression((Node *) acoerce->arg, oldvar);
+ }
+ else if (IsA(newnode, CoalesceExpr))
+ {
+ CoalesceExpr *cexpr = (CoalesceExpr *) newnode;
+ ListCell *lc;
+
+ Assert(cexpr->args != NIL);
+ foreach(lc, cexpr->args)
+ {
+ adjust_standard_join_alias_expression(lfirst(lc), oldvar);
+ }
+ }
+ else
+ Assert(false);
+}
+
+/*
* alias_relid_set: in a set of RT indexes, replace joins by their
- * underlying base relids
+ * underlying base+OJ relids
*/
static Relids
alias_relid_set(Query *query, Relids relids)