9 files changed, 206 insertions, 31 deletions
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index b4bdaa3c305..e1a3a813dd9 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -14,6 +14,7 @@
  */
 #include "postgres.h"
 
+#include "access/htup_details.h"
 #include "access/parallel.h"
 #include "common/hashfn.h"
 #include "executor/executor.h"
@@ -303,6 +304,64 @@ ResetTupleHashTable(TupleHashTable hashtable)
 }
 
 /*
+ * Estimate the amount of space needed for a TupleHashTable with nentries
+ * entries, if the tuples have average data width tupleWidth and the caller
+ * requires additionalsize extra space per entry.
+ *
+ * Return SIZE_MAX if it'd overflow size_t.
+ *
+ * nentries is "double" because this is meant for use by the planner,
+ * which typically works with double rowcount estimates.  So we'd need to
+ * clamp to integer somewhere and that might as well be here.  We do expect
+ * the value not to be NaN or negative, else the result will be garbage.
+ */
+Size
+EstimateTupleHashTableSpace(double nentries,
+							Size tupleWidth,
+							Size additionalsize)
+{
+	Size		sh_space;
+	double		tuples_space;
+
+	/* First estimate the space needed for the simplehash table */
+	sh_space = tuplehash_estimate_space(nentries);
+
+	/* Give up if that's already too big */
+	if (sh_space >= SIZE_MAX)
+		return sh_space;
+
+	/*
+	 * Compute space needed for hashed tuples with additional data.  nentries
+	 * must be somewhat sane, so it should be safe to compute this product.
+	 *
+	 * We assume that the hashed tuples will be kept in a BumpContext so that
+	 * there is not additional per-tuple overhead.
+	 *
+	 * (Note that this is only accurate if MEMORY_CONTEXT_CHECKING is off,
+	 * else bump.c will add a MemoryChunk header to each tuple.  However, it
+	 * seems undesirable for debug builds to make different planning choices
+	 * than production builds, so we assume the production behavior always.)
+	 */
+	tuples_space = nentries * (MAXALIGN(SizeofMinimalTupleHeader) +
+							   MAXALIGN(tupleWidth) +
+							   MAXALIGN(additionalsize));
+
+	/*
+	 * Check for size_t overflow.  This coding is trickier than it may appear,
+	 * because on 64-bit machines SIZE_MAX cannot be represented exactly as a
+	 * double.  We must cast it explicitly to suppress compiler warnings about
+	 * an inexact conversion, and we must trust that any double value that
+	 * compares strictly less than "(double) SIZE_MAX" will cast to a
+	 * representable size_t value.
+	 */
+	if (sh_space + tuples_space >= (double) SIZE_MAX)
+		return SIZE_MAX;
+
+	/* We don't bother estimating size of the miscellaneous overhead data */
+	return (Size) (sh_space + tuples_space);
+}
+
+/*
  * Find or create a hashtable entry for the tuple group containing the
  * given tuple.  The tuple must be the same type as the hashtable entries.
  *
diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c
index 7b223a7ca3a..5aabed18a09 100644
--- a/src/backend/executor/nodeSetOp.c
+++ b/src/backend/executor/nodeSetOp.c
@@ -111,6 +111,15 @@ build_hash_table(SetOpState *setopstate)
 												false);
 }
 
+/* Planner support routine to estimate space needed for hash table */
+Size
+EstimateSetOpHashTableSpace(double nentries, Size tupleWidth)
+{
+	return EstimateTupleHashTableSpace(nentries,
+									   tupleWidth,
+									   sizeof(SetOpStatePerGroupData));
+}
+
 /*
  * We've completed processing a tuple group.  Decide how many copies (if any)
  * of its representative row to emit, and store the count into numOutput.
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index 9f6e45bcb0b..1cd0988bb49 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -525,7 +525,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 											  node->tab_hash_funcs,
 											  node->tab_collations,
 											  nbuckets,
-											  0,
+											  0,	/* no additional data */
 											  node->planstate->state->es_query_cxt,
 											  node->tuplesContext,
 											  innerecontext->ecxt_per_tuple_memory,
@@ -554,7 +554,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 												  node->tab_hash_funcs,
 												  node->tab_collations,
 												  nbuckets,
-												  0,
+												  0,	/* no additional data */
 												  node->planstate->state->es_query_cxt,
 												  node->tuplesContext,
 												  innerecontext->ecxt_per_tuple_memory,
@@ -636,6 +636,55 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 	MemoryContextSwitchTo(oldcontext);
 }
 
+/* Planner support routine to estimate space needed for hash table(s) */
+Size
+EstimateSubplanHashTableSpace(double nentries,
+							  Size tupleWidth,
+							  bool unknownEqFalse)
+{
+	Size		tab1space,
+				tab2space;
+
+	/* Estimate size of main hashtable */
+	tab1space = EstimateTupleHashTableSpace(nentries,
+											tupleWidth,
+											0 /* no additional data */ );
+
+	/* Give up if that's already too big */
+	if (tab1space >= SIZE_MAX)
+		return tab1space;
+
+	/* Done if we don't need a hashnulls table */
+	if (unknownEqFalse)
+		return tab1space;
+
+	/*
+	 * Adjust the rowcount estimate in the same way that buildSubPlanHash
+	 * will, except that we don't bother with the special case for a single
+	 * hash column.  (We skip that detail because it'd be notationally painful
+	 * for our caller to provide the column count, and this table has
+	 * relatively little impact on the total estimate anyway.)
+	 */
+	nentries /= 16;
+	if (nentries < 1)
+		nentries = 1;
+
+	/*
+	 * It might be sane to also reduce the tupleWidth, but on the other hand
+	 * we are not accounting for the space taken by the tuples' null bitmaps.
+	 * Leave it alone for now.
+	 */
+	tab2space = EstimateTupleHashTableSpace(nentries,
+											tupleWidth,
+											0 /* no additional data */ );
+
+	/* Guard against overflow */
+	if (tab2space >= SIZE_MAX - tab1space)
+		return SIZE_MAX;
+
+	return tab1space + tab2space;
+}
+
 /*
  * execTuplesUnequal
  *		Return true if two tuples are definitely unequal in the indicated
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 14192a13236..ff63d20f8d5 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -20,6 +20,7 @@
 #include "catalog/pg_operator.h"
 #include "catalog/pg_type.h"
 #include "executor/executor.h"
+#include "executor/nodeSubplan.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
@@ -79,8 +80,8 @@ static Node *convert_testexpr(PlannerInfo *root,
 							  List *subst_nodes);
 static Node *convert_testexpr_mutator(Node *node,
 									  convert_testexpr_context *context);
-static bool subplan_is_hashable(Plan *plan);
-static bool subpath_is_hashable(Path *path);
+static bool subplan_is_hashable(Plan *plan, bool unknownEqFalse);
+static bool subpath_is_hashable(Path *path, bool unknownEqFalse);
 static bool testexpr_is_hashable(Node *testexpr, List *param_ids);
 static bool test_opexpr_is_hashable(OpExpr *testexpr, List *param_ids);
 static bool hash_ok_operator(OpExpr *expr);
@@ -283,7 +284,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
 			best_path = final_rel->cheapest_total_path;
 
 			/* Now we can check if it'll fit in hash_mem */
-			if (subpath_is_hashable(best_path))
+			if (subpath_is_hashable(best_path, true))
 			{
 				SubPlan    *hashplan;
 				AlternativeSubPlan *asplan;
@@ -524,7 +525,7 @@ build_subplan(PlannerInfo *root, Plan *plan, Path *path,
 		 */
 		if (subLinkType == ANY_SUBLINK &&
 			splan->parParam == NIL &&
-			subplan_is_hashable(plan) &&
+			subplan_is_hashable(plan, unknownEqFalse) &&
 			testexpr_is_hashable(splan->testexpr, splan->paramIds))
 			splan->useHashTable = true;
 
@@ -711,19 +712,19 @@ convert_testexpr_mutator(Node *node,
  * is suitable for hashing.  We only look at the subquery itself.
  */
 static bool
-subplan_is_hashable(Plan *plan)
+subplan_is_hashable(Plan *plan, bool unknownEqFalse)
 {
-	double		subquery_size;
+	Size		hashtablesize;
 
 	/*
-	 * The estimated size of the subquery result must fit in hash_mem. (Note:
-	 * we use heap tuple overhead here even though the tuples will actually be
-	 * stored as MinimalTuples; this provides some fudge factor for hashtable
-	 * overhead.)
+	 * The estimated size of the hashtable holding the subquery result must
+	 * fit in hash_mem.  (Note: reject on equality, to ensure that an estimate
+	 * of SIZE_MAX disables hashing regardless of the hash_mem limit.)
 	 */
-	subquery_size = plan->plan_rows *
-		(MAXALIGN(plan->plan_width) + MAXALIGN(SizeofHeapTupleHeader));
-	if (subquery_size > get_hash_memory_limit())
+	hashtablesize = EstimateSubplanHashTableSpace(plan->plan_rows,
+												  plan->plan_width,
+												  unknownEqFalse);
+	if (hashtablesize >= get_hash_memory_limit())
 		return false;
 
 	return true;
@@ -735,19 +736,19 @@ subplan_is_hashable(Plan *plan)
  * Identical to subplan_is_hashable, but work from a Path for the subplan.
  */
 static bool
-subpath_is_hashable(Path *path)
+subpath_is_hashable(Path *path, bool unknownEqFalse)
 {
-	double		subquery_size;
+	Size		hashtablesize;
 
 	/*
-	 * The estimated size of the subquery result must fit in hash_mem. (Note:
-	 * we use heap tuple overhead here even though the tuples will actually be
-	 * stored as MinimalTuples; this provides some fudge factor for hashtable
-	 * overhead.)
+	 * The estimated size of the hashtable holding the subquery result must
+	 * fit in hash_mem.  (Note: reject on equality, to ensure that an estimate
+	 * of SIZE_MAX disables hashing regardless of the hash_mem limit.)
 	 */
-	subquery_size = path->rows *
-		(MAXALIGN(path->pathtarget->width) + MAXALIGN(SizeofHeapTupleHeader));
-	if (subquery_size > get_hash_memory_limit())
+	hashtablesize = EstimateSubplanHashTableSpace(path->rows,
+												  path->pathtarget->width,
+												  unknownEqFalse);
+	if (hashtablesize >= get_hash_memory_limit())
 		return false;
 
 	return true;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 44ac5312edd..e4fd6950fad 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -17,6 +17,7 @@
 #include <math.h>
 
 #include "access/htup_details.h"
+#include "executor/nodeSetOp.h"
 #include "foreign/fdwapi.h"
 #include "miscadmin.h"
 #include "nodes/extensible.h"
@@ -3461,7 +3462,7 @@ create_setop_path(PlannerInfo *root,
 	}
 	else
 	{
-		Size		hashentrysize;
+		Size		hashtablesize;
 
 		/*
 		 * In hashed mode, we must read all the input before we can emit
@@ -3490,11 +3491,12 @@ create_setop_path(PlannerInfo *root,
 
 		/*
 		 * Also disable if it doesn't look like the hashtable will fit into
-		 * hash_mem.
+		 * hash_mem.  (Note: reject on equality, to ensure that an estimate of
+		 * SIZE_MAX disables hashing regardless of the hash_mem limit.)
 		 */
-		hashentrysize = MAXALIGN(leftpath->pathtarget->width) +
-			MAXALIGN(SizeofMinimalTupleHeader);
-		if (hashentrysize * numGroups > get_hash_memory_limit())
+		hashtablesize = EstimateSetOpHashTableSpace(numGroups,
+													leftpath->pathtarget->width);
+		if (hashtablesize >= get_hash_memory_limit())
 			pathnode->path.disabled_nodes++;
 	}
 	pathnode->path.rows = outputRows;
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 8e7a5453064..086f52cff3d 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -157,6 +157,9 @@ extern TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable,
 										 ExprState *eqcomp,
 										 ExprState *hashexpr);
 extern void ResetTupleHashTable(TupleHashTable hashtable);
+extern Size EstimateTupleHashTableSpace(double nentries,
+										Size tupleWidth,
+										Size additionalsize);
 
 #ifndef FRONTEND
 /*
diff --git a/src/include/executor/nodeSetOp.h b/src/include/executor/nodeSetOp.h
index 024c6ba1fce..302936df8be 100644
--- a/src/include/executor/nodeSetOp.h
+++ b/src/include/executor/nodeSetOp.h
@@ -20,4 +20,6 @@ extern SetOpState *ExecInitSetOp(SetOp *node, EState *estate, int eflags);
 extern void ExecEndSetOp(SetOpState *node);
 extern void ExecReScanSetOp(SetOpState *node);
 
+extern Size EstimateSetOpHashTableSpace(double nentries, Size tupleWidth);
+
 #endif							/* NODESETOP_H */
diff --git a/src/include/executor/nodeSubplan.h b/src/include/executor/nodeSubplan.h
index a1cafbcc694..301c29d1f24 100644
--- a/src/include/executor/nodeSubplan.h
+++ b/src/include/executor/nodeSubplan.h
@@ -20,6 +20,10 @@ extern SubPlanState *ExecInitSubPlan(SubPlan *subplan, PlanState *parent);
 
 extern Datum ExecSubPlan(SubPlanState *node, ExprContext *econtext, bool *isNull);
 
+extern Size EstimateSubplanHashTableSpace(double nentries,
+										  Size tupleWidth,
+										  bool unknownEqFalse);
+
 extern void ExecReScanSetParamPlan(SubPlanState *node, PlanState *parent);
 
 extern void ExecSetParamPlan(SubPlanState *node, ExprContext *econtext);
diff --git a/src/include/lib/simplehash.h b/src/include/lib/simplehash.h
index 9622131ede6..031a377da84 100644
--- a/src/include/lib/simplehash.h
+++ b/src/include/lib/simplehash.h
@@ -125,6 +125,7 @@
 #define SH_ITERATE SH_MAKE_NAME(iterate)
 #define SH_ALLOCATE SH_MAKE_NAME(allocate)
 #define SH_FREE SH_MAKE_NAME(free)
+#define SH_ESTIMATE_SPACE SH_MAKE_NAME(estimate_space)
 #define SH_STAT SH_MAKE_NAME(stat)
 
 /* internal helper functions (no externally visible prototypes) */
@@ -242,7 +243,10 @@ SH_SCOPE void SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at);
 /* <element> *<prefix>_iterate(<prefix>_hash *tb, <prefix>_iterator *iter) */
 SH_SCOPE	SH_ELEMENT_TYPE *SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
 
-/* void <prefix>_stat(<prefix>_hash *tb */
+/* size_t <prefix>_estimate_space(double nentries) */
+SH_SCOPE size_t SH_ESTIMATE_SPACE(double nentries);
+
+/* void <prefix>_stat(<prefix>_hash *tb) */
 SH_SCOPE void SH_STAT(SH_TYPE * tb);
 
 #endif							/* SH_DECLARE */
@@ -305,7 +309,7 @@ SH_SCOPE void SH_STAT(SH_TYPE * tb);
 
 /*
  * Compute allocation size for hashtable. Result can be passed to
- * SH_UPDATE_PARAMETERS.
+ * SH_UPDATE_PARAMETERS.  (Keep SH_ESTIMATE_SPACE in sync with this!)
  */
 static inline uint64
 SH_COMPUTE_SIZE(uint64 newsize)
@@ -1069,6 +1073,47 @@ SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
 }
 
 /*
+ * Estimate the amount of space needed for a hashtable with nentries entries.
+ * Return SIZE_MAX if that's too many entries.
+ *
+ * nentries is "double" because this is meant for use by the planner,
+ * which typically works with double rowcount estimates.  So we'd need to
+ * clamp to integer somewhere and that might as well be here.  We do expect
+ * the value not to be NaN or negative, else the result will be garbage.
+ */
+SH_SCOPE size_t
+SH_ESTIMATE_SPACE(double nentries)
+{
+	uint64		size;
+	uint64		space;
+
+	/* scale request by SH_FILLFACTOR, as SH_CREATE does */
+	nentries = nentries / SH_FILLFACTOR;
+
+	/* fail if we'd overrun SH_MAX_SIZE entries */
+	if (nentries >= SH_MAX_SIZE)
+		return SIZE_MAX;
+
+	/* should be safe to convert to uint64 */
+	size = (uint64) nentries;
+
+	/* supporting zero sized hashes would complicate matters */
+	size = Max(size, 2);
+
+	/* round up size to the next power of 2, that's how bucketing works */
+	size = pg_nextpower2_64(size);
+
+	/* calculate space needed for ->data */
+	space = ((uint64) sizeof(SH_ELEMENT_TYPE)) * size;
+
+	/* verify that allocation of ->data is possible on this platform */
+	if (space >= SIZE_MAX / 2)
+		return SIZE_MAX;
+
+	return (size_t) space + sizeof(SH_TYPE);
+}
+
+/*
  * Report some statistics about the state of the hashtable. For
  * debugging/profiling purposes only.
  */
@@ -1195,6 +1240,7 @@ SH_STAT(SH_TYPE * tb)
 #undef SH_ITERATE
 #undef SH_ALLOCATE
 #undef SH_FREE
+#undef SH_ESTIMATE_SPACE
 #undef SH_STAT
 
 /* internal function names */