1 files changed, 35 insertions, 178 deletions
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 1395a211398..faca30b322d 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1088,12 +1088,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	Path		sort_path;		/* dummy for result of cost_sort */
 	Path		agg_path;		/* dummy for result of cost_agg */
 	MemoryContext oldcontext;
-	List	   *in_operators;
-	List	   *uniq_exprs;
-	bool		all_btree;
-	bool		all_hash;
 	int			numCols;
-	ListCell   *lc;
 
 	/* Caller made a mistake if subpath isn't cheapest_total ... */
 	Assert(subpath == rel->cheapest_total_path);
@@ -1106,8 +1101,8 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	if (rel->cheapest_unique_path)
 		return (UniquePath *) rel->cheapest_unique_path;
 
-	/* If we previously failed, return NULL quickly */
-	if (sjinfo->join_quals == NIL)
+	/* If it's not possible to unique-ify, return NULL */
+	if (!(sjinfo->semi_can_btree || sjinfo->semi_can_hash))
 		return NULL;
 
 	/*
@@ -1116,150 +1111,6 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	 */
 	oldcontext = MemoryContextSwitchTo(root->planner_cxt);
 
-	/*----------
-	 * Look to see whether the semijoin's join quals consist of AND'ed
-	 * equality operators, with (only) RHS variables on only one side of
-	 * each one.  If so, we can figure out how to enforce uniqueness for
-	 * the RHS.
-	 *
-	 * Note that the input join_quals list is the list of quals that are
-	 * *syntactically* associated with the semijoin, which in practice means
-	 * the synthesized comparison list for an IN or the WHERE of an EXISTS.
-	 * Particularly in the latter case, it might contain clauses that aren't
-	 * *semantically* associated with the join, but refer to just one side or
-	 * the other.  We can ignore such clauses here, as they will just drop
-	 * down to be processed within one side or the other.  (It is okay to
-	 * consider only the syntactically-associated clauses here because for a
-	 * semijoin, no higher-level quals could refer to the RHS, and so there
-	 * can be no other quals that are semantically associated with this join.
-	 * We do things this way because it is useful to be able to run this test
-	 * before we have extracted the list of quals that are actually
-	 * semantically associated with the particular join.)
-	 *
-	 * Note that the in_operators list consists of the joinqual operators
-	 * themselves (but commuted if needed to put the RHS value on the right).
-	 * These could be cross-type operators, in which case the operator
-	 * actually needed for uniqueness is a related single-type operator.
-	 * We assume here that that operator will be available from the btree
-	 * or hash opclass when the time comes ... if not, create_unique_plan()
-	 * will fail.
-	 *----------
-	 */
-	in_operators = NIL;
-	uniq_exprs = NIL;
-	all_btree = true;
-	all_hash = enable_hashagg;	/* don't consider hash if not enabled */
-	foreach(lc, sjinfo->join_quals)
-	{
-		OpExpr	   *op = (OpExpr *) lfirst(lc);
-		Oid			opno;
-		Node	   *left_expr;
-		Node	   *right_expr;
-		Relids		left_varnos;
-		Relids		right_varnos;
-		Relids		all_varnos;
-		Oid			opinputtype;
-
-		/* Is it a binary opclause? */
-		if (!IsA(op, OpExpr) ||
-			list_length(op->args) != 2)
-		{
-			/* No, but does it reference both sides? */
-			all_varnos = pull_varnos((Node *) op);
-			if (!bms_overlap(all_varnos, sjinfo->syn_righthand) ||
-				bms_is_subset(all_varnos, sjinfo->syn_righthand))
-			{
-				/*
-				 * Clause refers to only one rel, so ignore it --- unless it
-				 * contains volatile functions, in which case we'd better
-				 * punt.
-				 */
-				if (contain_volatile_functions((Node *) op))
-					goto no_unique_path;
-				continue;
-			}
-			/* Non-operator clause referencing both sides, must punt */
-			goto no_unique_path;
-		}
-
-		/* Extract data from binary opclause */
-		opno = op->opno;
-		left_expr = linitial(op->args);
-		right_expr = lsecond(op->args);
-		left_varnos = pull_varnos(left_expr);
-		right_varnos = pull_varnos(right_expr);
-		all_varnos = bms_union(left_varnos, right_varnos);
-		opinputtype = exprType(left_expr);
-
-		/* Does it reference both sides? */
-		if (!bms_overlap(all_varnos, sjinfo->syn_righthand) ||
-			bms_is_subset(all_varnos, sjinfo->syn_righthand))
-		{
-			/*
-			 * Clause refers to only one rel, so ignore it --- unless it
-			 * contains volatile functions, in which case we'd better punt.
-			 */
-			if (contain_volatile_functions((Node *) op))
-				goto no_unique_path;
-			continue;
-		}
-
-		/* check rel membership of arguments */
-		if (!bms_is_empty(right_varnos) &&
-			bms_is_subset(right_varnos, sjinfo->syn_righthand) &&
-			!bms_overlap(left_varnos, sjinfo->syn_righthand))
-		{
-			/* typical case, right_expr is RHS variable */
-		}
-		else if (!bms_is_empty(left_varnos) &&
-				 bms_is_subset(left_varnos, sjinfo->syn_righthand) &&
-				 !bms_overlap(right_varnos, sjinfo->syn_righthand))
-		{
-			/* flipped case, left_expr is RHS variable */
-			opno = get_commutator(opno);
-			if (!OidIsValid(opno))
-				goto no_unique_path;
-			right_expr = left_expr;
-		}
-		else
-			goto no_unique_path;
-
-		/* all operators must be btree equality or hash equality */
-		if (all_btree)
-		{
-			/* oprcanmerge is considered a hint... */
-			if (!op_mergejoinable(opno, opinputtype) ||
-				get_mergejoin_opfamilies(opno) == NIL)
-				all_btree = false;
-		}
-		if (all_hash)
-		{
-			/* ... but oprcanhash had better be correct */
-			if (!op_hashjoinable(opno, opinputtype))
-				all_hash = false;
-		}
-		if (!(all_btree || all_hash))
-			goto no_unique_path;
-
-		/* so far so good, keep building lists */
-		in_operators = lappend_oid(in_operators, opno);
-		uniq_exprs = lappend(uniq_exprs, copyObject(right_expr));
-	}
-
-	/* Punt if we didn't find at least one column to unique-ify */
-	if (uniq_exprs == NIL)
-		goto no_unique_path;
-
-	/*
-	 * The expressions we'd need to unique-ify mustn't be volatile.
-	 */
-	if (contain_volatile_functions((Node *) uniq_exprs))
-		goto no_unique_path;
-
-	/*
-	 * If we get here, we can unique-ify using at least one of sorting and
-	 * hashing.  Start building the result Path object.
-	 */
 	pathnode = makeNode(UniquePath);
 
 	pathnode->path.pathtype = T_Unique;
@@ -1273,18 +1124,19 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	pathnode->path.pathkeys = NIL;
 
 	pathnode->subpath = subpath;
-	pathnode->in_operators = in_operators;
-	pathnode->uniq_exprs = uniq_exprs;
+	pathnode->in_operators = sjinfo->semi_operators;
+	pathnode->uniq_exprs = sjinfo->semi_rhs_exprs;
 
 	/*
 	 * If the input is a relation and it has a unique index that proves the
-	 * uniq_exprs are unique, then we don't need to do anything.  Note that
-	 * relation_has_unique_index_for automatically considers restriction
+	 * semi_rhs_exprs are unique, then we don't need to do anything.  Note
+	 * that relation_has_unique_index_for automatically considers restriction
 	 * clauses for the rel, as well.
 	 */
-	if (rel->rtekind == RTE_RELATION && all_btree &&
+	if (rel->rtekind == RTE_RELATION && sjinfo->semi_can_btree &&
 		relation_has_unique_index_for(root, rel, NIL,
-									  uniq_exprs, in_operators))
+									  sjinfo->semi_rhs_exprs,
+									  sjinfo->semi_operators))
 	{
 		pathnode->umethod = UNIQUE_PATH_NOOP;
 		pathnode->path.rows = rel->rows;
@@ -1304,7 +1156,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	 * don't need to do anything.  The test for uniqueness has to consider
 	 * exactly which columns we are extracting; for example "SELECT DISTINCT
 	 * x,y" doesn't guarantee that x alone is distinct. So we cannot check for
-	 * this optimization unless uniq_exprs consists only of simple Vars
+	 * this optimization unless semi_rhs_exprs consists only of simple Vars
 	 * referencing subquery outputs.  (Possibly we could do something with
 	 * expressions in the subquery outputs, too, but for now keep it simple.)
 	 */
@@ -1316,11 +1168,13 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 		{
 			List	   *sub_tlist_colnos;
 
-			sub_tlist_colnos = translate_sub_tlist(uniq_exprs, rel->relid);
+			sub_tlist_colnos = translate_sub_tlist(sjinfo->semi_rhs_exprs,
+												   rel->relid);
 
 			if (sub_tlist_colnos &&
 				query_is_distinct_for(rte->subquery,
-									  sub_tlist_colnos, in_operators))
+									  sub_tlist_colnos,
+									  sjinfo->semi_operators))
 			{
 				pathnode->umethod = UNIQUE_PATH_NOOP;
 				pathnode->path.rows = rel->rows;
@@ -1338,10 +1192,12 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	}
 
 	/* Estimate number of output rows */
-	pathnode->path.rows = estimate_num_groups(root, uniq_exprs, rel->rows);
-	numCols = list_length(uniq_exprs);
+	pathnode->path.rows = estimate_num_groups(root,
+											  sjinfo->semi_rhs_exprs,
+											  rel->rows);
+	numCols = list_length(sjinfo->semi_rhs_exprs);
 
-	if (all_btree)
+	if (sjinfo->semi_can_btree)
 	{
 		/*
 		 * Estimate cost for sort+unique implementation
@@ -1363,7 +1219,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 		sort_path.total_cost += cpu_operator_cost * rel->rows * numCols;
 	}
 
-	if (all_hash)
+	if (sjinfo->semi_can_hash)
 	{
 		/*
 		 * Estimate the overhead per hashtable entry at 64 bytes (same as in
@@ -1372,7 +1228,13 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 		int			hashentrysize = rel->width + 64;
 
 		if (hashentrysize * pathnode->path.rows > work_mem * 1024L)
-			all_hash = false;	/* don't try to hash */
+		{
+			/*
+			 * We should not try to hash.  Hack the SpecialJoinInfo to
+			 * remember this, in case we come through here again.
+			 */
+			sjinfo->semi_can_hash = false;
+		}
 		else
 			cost_agg(&agg_path, root,
 					 AGG_HASHED, NULL,
@@ -1382,19 +1244,23 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 					 rel->rows);
 	}
 
-	if (all_btree && all_hash)
+	if (sjinfo->semi_can_btree && sjinfo->semi_can_hash)
 	{
 		if (agg_path.total_cost < sort_path.total_cost)
 			pathnode->umethod = UNIQUE_PATH_HASH;
 		else
 			pathnode->umethod = UNIQUE_PATH_SORT;
 	}
-	else if (all_btree)
+	else if (sjinfo->semi_can_btree)
 		pathnode->umethod = UNIQUE_PATH_SORT;
-	else if (all_hash)
+	else if (sjinfo->semi_can_hash)
 		pathnode->umethod = UNIQUE_PATH_HASH;
 	else
-		goto no_unique_path;
+	{
+		/* we can get here only if we abandoned hashing above */
+		MemoryContextSwitchTo(oldcontext);
+		return NULL;
+	}
 
 	if (pathnode->umethod == UNIQUE_PATH_HASH)
 	{
@@ -1412,15 +1278,6 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	MemoryContextSwitchTo(oldcontext);
 
 	return pathnode;
-
-no_unique_path:			/* failure exit */
-
-	/* Mark the SpecialJoinInfo as not unique-able */
-	sjinfo->join_quals = NIL;
-
-	MemoryContextSwitchTo(oldcontext);
-
-	return NULL;
 }
 
 /*