summaryrefslogtreecommitdiff
path: root/src/backend/optimizer/path/joinpath.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer/path/joinpath.c')
-rw-r--r--src/backend/optimizer/path/joinpath.c214
1 files changed, 214 insertions, 0 deletions
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 57ce97fd53b..3894991a95d 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -18,10 +18,13 @@
#include "executor/executor.h"
#include "foreign/fdwapi.h"
+#include "nodes/nodeFuncs.h"
#include "optimizer/cost.h"
+#include "optimizer/optimizer.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/planmain.h"
+#include "utils/typcache.h"
/* Hook for plugins to get control in add_paths_to_joinrel() */
set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
@@ -52,6 +55,9 @@ static void try_partial_mergejoin_path(PlannerInfo *root,
static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
JoinType jointype, JoinPathExtraData *extra);
+static inline bool clause_sides_match_join(RestrictInfo *rinfo,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel);
static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
JoinType jointype, JoinPathExtraData *extra);
@@ -163,6 +169,11 @@ add_paths_to_joinrel(PlannerInfo *root,
{
case JOIN_SEMI:
case JOIN_ANTI:
+
+ /*
+ * XXX it may be worth proving this to allow a ResultCache to be
+ * considered for Nested Loop Semi/Anti Joins.
+ */
extra.inner_unique = false; /* well, unproven */
break;
case JOIN_UNIQUE_INNER:
@@ -355,6 +366,180 @@ allow_star_schema_join(PlannerInfo *root,
}
/*
+ * paraminfo_get_equal_hashops
+ * Determine if param_info and innerrel's lateral_vars can be hashed.
+ * Returns true the hashing is possible, otherwise return false.
+ *
+ * Additionally we also collect the outer exprs and the hash operators for
+ * each parameter to innerrel. These set in 'param_exprs' and 'operators'
+ * when we return true.
+ */
+static bool
+paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
+ RelOptInfo *outerrel, RelOptInfo *innerrel,
+ List **param_exprs, List **operators)
+
+{
+ ListCell *lc;
+
+ *param_exprs = NIL;
+ *operators = NIL;
+
+ if (param_info != NULL)
+ {
+ List *clauses = param_info->ppi_clauses;
+
+ foreach(lc, clauses)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+ OpExpr *opexpr;
+ Node *expr;
+
+ /* can't use result cache without a valid hash equals operator */
+ if (!OidIsValid(rinfo->hasheqoperator) ||
+ !clause_sides_match_join(rinfo, outerrel, innerrel))
+ {
+ list_free(*operators);
+ list_free(*param_exprs);
+ return false;
+ }
+
+ /*
+ * We already checked that this is an OpExpr with 2 args when
+ * setting hasheqoperator.
+ */
+ opexpr = (OpExpr *) rinfo->clause;
+ if (rinfo->outer_is_left)
+ expr = (Node *) linitial(opexpr->args);
+ else
+ expr = (Node *) lsecond(opexpr->args);
+
+ *operators = lappend_oid(*operators, rinfo->hasheqoperator);
+ *param_exprs = lappend(*param_exprs, expr);
+ }
+ }
+
+ /* Now add any lateral vars to the cache key too */
+ foreach(lc, innerrel->lateral_vars)
+ {
+ Node *expr = (Node *) lfirst(lc);
+ TypeCacheEntry *typentry;
+
+ /* Reject if there are any volatile functions */
+ if (contain_volatile_functions(expr))
+ {
+ list_free(*operators);
+ list_free(*param_exprs);
+ return false;
+ }
+
+ typentry = lookup_type_cache(exprType(expr),
+ TYPECACHE_HASH_PROC | TYPECACHE_EQ_OPR);
+
+ /* can't use result cache without a valid hash equals operator */
+ if (!OidIsValid(typentry->hash_proc) || !OidIsValid(typentry->eq_opr))
+ {
+ list_free(*operators);
+ list_free(*param_exprs);
+ return false;
+ }
+
+ *operators = lappend_oid(*operators, typentry->eq_opr);
+ *param_exprs = lappend(*param_exprs, expr);
+ }
+
+ /* We're okay to use result cache */
+ return true;
+}
+
+/*
+ * get_resultcache_path
+ * If possible, make and return a Result Cache path atop of 'inner_path'.
+ * Otherwise return NULL.
+ */
+static Path *
+get_resultcache_path(PlannerInfo *root, RelOptInfo *innerrel,
+ RelOptInfo *outerrel, Path *inner_path,
+ Path *outer_path, JoinType jointype,
+ JoinPathExtraData *extra)
+{
+ List *param_exprs;
+ List *hash_operators;
+ ListCell *lc;
+
+ /* Obviously not if it's disabled */
+ if (!enable_resultcache)
+ return NULL;
+
+ /*
+ * We can safely not bother with all this unless we expect to perform more
+ * than one inner scan. The first scan is always going to be a cache
+ * miss. This would likely fail later anyway based on costs, so this is
+ * really just to save some wasted effort.
+ */
+ if (outer_path->parent->rows < 2)
+ return NULL;
+
+ /*
+ * We can only have a result cache when there's some kind of cache key,
+ * either parameterized path clauses or lateral Vars. No cache key sounds
+ * more like something a Materialize node might be more useful for.
+ */
+ if ((inner_path->param_info == NULL ||
+ inner_path->param_info->ppi_clauses == NIL) &&
+ innerrel->lateral_vars == NIL)
+ return NULL;
+
+ /*
+ * Currently we don't do this for SEMI and ANTI joins unless they're
+ * marked as inner_unique. This is because nested loop SEMI/ANTI joins
+ * don't scan the inner node to completion, which will mean result cache
+ * cannot mark the cache entry as complete.
+ *
+ * XXX Currently we don't attempt to mark SEMI/ANTI joins as inner_unique
+ * = true. Should we? See add_paths_to_joinrel()
+ */
+ if (!extra->inner_unique && (jointype == JOIN_SEMI ||
+ jointype == JOIN_ANTI))
+ return NULL;
+
+ /*
+ * We can't use a result cache if there are volatile functions in the
+ * inner rel's target list or restrict list. A cache hit could reduce the
+ * number of calls to these functions.
+ */
+ if (contain_volatile_functions((Node *) innerrel->reltarget))
+ return NULL;
+
+ foreach(lc, innerrel->baserestrictinfo)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ if (contain_volatile_functions((Node *) rinfo))
+ return NULL;
+ }
+
+ /* Check if we have hash ops for each parameter to the path */
+ if (paraminfo_get_equal_hashops(root,
+ inner_path->param_info,
+ outerrel,
+ innerrel,
+ &param_exprs,
+ &hash_operators))
+ {
+ return (Path *) create_resultcache_path(root,
+ innerrel,
+ inner_path,
+ param_exprs,
+ hash_operators,
+ extra->inner_unique,
+ outer_path->parent->rows);
+ }
+
+ return NULL;
+}
+
+/*
* try_nestloop_path
* Consider a nestloop join path; if it appears useful, push it into
* the joinrel's pathlist via add_path().
@@ -1471,6 +1656,7 @@ match_unsorted_outer(PlannerInfo *root,
foreach(lc2, innerrel->cheapest_parameterized_paths)
{
Path *innerpath = (Path *) lfirst(lc2);
+ Path *rcpath;
try_nestloop_path(root,
joinrel,
@@ -1479,6 +1665,22 @@ match_unsorted_outer(PlannerInfo *root,
merge_pathkeys,
jointype,
extra);
+
+ /*
+ * Try generating a result cache path and see if that makes the
+ * nested loop any cheaper.
+ */
+ rcpath = get_resultcache_path(root, innerrel, outerrel,
+ innerpath, outerpath, jointype,
+ extra);
+ if (rcpath != NULL)
+ try_nestloop_path(root,
+ joinrel,
+ outerpath,
+ rcpath,
+ merge_pathkeys,
+ jointype,
+ extra);
}
/* Also consider materialized form of the cheapest inner path */
@@ -1633,6 +1835,7 @@ consider_parallel_nestloop(PlannerInfo *root,
foreach(lc2, innerrel->cheapest_parameterized_paths)
{
Path *innerpath = (Path *) lfirst(lc2);
+ Path *rcpath;
/* Can't join to an inner path that is not parallel-safe */
if (!innerpath->parallel_safe)
@@ -1657,6 +1860,17 @@ consider_parallel_nestloop(PlannerInfo *root,
try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
pathkeys, jointype, extra);
+
+ /*
+ * Try generating a result cache path and see if that makes the
+ * nested loop any cheaper.
+ */
+ rcpath = get_resultcache_path(root, innerrel, outerrel,
+ innerpath, outerpath, jointype,
+ extra);
+ if (rcpath != NULL)
+ try_partial_nestloop_path(root, joinrel, outerpath, rcpath,
+ pathkeys, jointype, extra);
}
}
}