summaryrefslogtreecommitdiff
path: root/src/backend/executor/execGrouping.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/executor/execGrouping.c')
-rw-r--r--src/backend/executor/execGrouping.c153
1 files changed, 117 insertions, 36 deletions
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 75087204f0c..8b64a625ca5 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -14,15 +14,18 @@
*/
#include "postgres.h"
+#include <math.h>
+
+#include "access/htup_details.h"
#include "access/parallel.h"
#include "common/hashfn.h"
#include "executor/executor.h"
#include "miscadmin.h"
#include "utils/lsyscache.h"
-static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
+static int TupleHashTableMatch(struct tuplehash_hash *tb, MinimalTuple tuple1, MinimalTuple tuple2);
static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
- const MinimalTuple tuple);
+ MinimalTuple tuple);
static inline TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable,
TupleTableSlot *slot,
bool *isnew, uint32 hash);
@@ -143,10 +146,10 @@ execTuplesHashPrepare(int numCols,
* eqfuncoids: OIDs of equality comparison functions to use
* hashfunctions: FmgrInfos of datatype-specific hashing functions to use
* collations: collations to use in comparisons
- * nbuckets: initial estimate of hashtable size
+ * nelements: initial estimate of hashtable size
* additionalsize: size of data that may be stored along with the hash entry
- * metacxt: memory context for long-lived allocation, but not per-entry data
- * tablecxt: memory context in which to store table entries
+ * metacxt: memory context for long-lived data and the simplehash table
+ * tuplescxt: memory context in which to store the hashed tuples themselves
* tempcxt: short-lived context for evaluation hash and comparison functions
* use_variable_hash_iv: if true, adjust hash IV per-parallel-worker
*
@@ -157,11 +160,25 @@ execTuplesHashPrepare(int numCols,
* Note that the keyColIdx, hashfunctions, and collations arrays must be
* allocated in storage that will live as long as the hashtable does.
*
+ * The metacxt and tuplescxt are separate because it's usually desirable for
+ * tuplescxt to be a BumpContext to avoid memory wastage, while metacxt must
+ * support pfree in case the simplehash table needs to be enlarged. (We could
+ * simplify the API of TupleHashTables by managing the tuplescxt internally.
+ * But that would be disadvantageous to nodeAgg.c and nodeSubplan.c, which use
+ * a single tuplescxt for multiple TupleHashTables that are reset together.)
+ *
* LookupTupleHashEntry, FindTupleHashEntry, and related functions may leak
* memory in the tempcxt. It is caller's responsibility to reset that context
* reasonably often, typically once per tuple. (We do it that way, rather
* than managing an extra context within the hashtable, because in many cases
* the caller can specify a tempcxt that it needs to reset per-tuple anyway.)
+ *
+ * We don't currently provide DestroyTupleHashTable functionality; the hash
+ * table will be cleaned up at destruction of the metacxt. (Some callers
+ * bother to delete the tuplescxt explicitly, though it'd be sufficient to
+ * ensure it's a child of the metacxt.) There's not much point in working
+ * harder than this so long as the expression-evaluation infrastructure
+ * behaves similarly.
*/
TupleHashTable
BuildTupleHashTable(PlanState *parent,
@@ -172,28 +189,38 @@ BuildTupleHashTable(PlanState *parent,
const Oid *eqfuncoids,
FmgrInfo *hashfunctions,
Oid *collations,
- long nbuckets,
+ double nelements,
Size additionalsize,
MemoryContext metacxt,
- MemoryContext tablecxt,
+ MemoryContext tuplescxt,
MemoryContext tempcxt,
bool use_variable_hash_iv)
{
TupleHashTable hashtable;
- Size entrysize;
- Size hash_mem_limit;
+ uint32 nbuckets;
MemoryContext oldcontext;
- bool allow_jit;
uint32 hash_iv = 0;
- Assert(nbuckets > 0);
- additionalsize = MAXALIGN(additionalsize);
- entrysize = sizeof(TupleHashEntryData) + additionalsize;
+ /*
+ * tuplehash_create requires a uint32 element count, so we had better
+ * clamp the given nelements to fit in that. As long as we have to do
+ * that, we might as well protect against completely insane input like
+ * zero or NaN. But it is not our job here to enforce issues like staying
+ * within hash_mem: the caller should have done that, and we don't have
+ * enough info to second-guess.
+ */
+ if (isnan(nelements) || nelements <= 0)
+ nbuckets = 1;
+ else if (nelements >= PG_UINT32_MAX)
+ nbuckets = PG_UINT32_MAX;
+ else
+ nbuckets = (uint32) nelements;
- /* Limit initial table size request to not more than hash_mem */
- hash_mem_limit = get_hash_memory_limit() / entrysize;
- if (nbuckets > hash_mem_limit)
- nbuckets = hash_mem_limit;
+ /* tuplescxt must be separate, else ResetTupleHashTable breaks things */
+ Assert(metacxt != tuplescxt);
+
+ /* ensure additionalsize is maxalign'ed */
+ additionalsize = MAXALIGN(additionalsize);
oldcontext = MemoryContextSwitchTo(metacxt);
@@ -202,7 +229,7 @@ BuildTupleHashTable(PlanState *parent,
hashtable->numCols = numCols;
hashtable->keyColIdx = keyColIdx;
hashtable->tab_collations = collations;
- hashtable->tablecxt = tablecxt;
+ hashtable->tuplescxt = tuplescxt;
hashtable->tempcxt = tempcxt;
hashtable->additionalsize = additionalsize;
hashtable->tableslot = NULL; /* will be made on first lookup */
@@ -230,16 +257,6 @@ BuildTupleHashTable(PlanState *parent,
hashtable->tableslot = MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc),
&TTSOpsMinimalTuple);
- /*
- * If the caller fails to make the metacxt different from the tablecxt,
- * allowing JIT would lead to the generated functions to a) live longer
- * than the query or b) be re-generated each time the table is being
- * reset. Therefore prevent JIT from being used in that case, by not
- * providing a parent node (which prevents accessing the JitContext in the
- * EState).
- */
- allow_jit = (metacxt != tablecxt);
-
/* build hash ExprState for all columns */
hashtable->tab_hash_expr = ExecBuildHash32FromAttrs(inputDesc,
inputOps,
@@ -247,7 +264,7 @@ BuildTupleHashTable(PlanState *parent,
collations,
numCols,
keyColIdx,
- allow_jit ? parent : NULL,
+ parent,
hash_iv);
/* build comparator for all columns */
@@ -256,7 +273,7 @@ BuildTupleHashTable(PlanState *parent,
&TTSOpsMinimalTuple,
numCols,
keyColIdx, eqfuncoids, collations,
- allow_jit ? parent : NULL);
+ parent);
/*
* While not pretty, it's ok to not shut down this context, but instead
@@ -273,13 +290,77 @@ BuildTupleHashTable(PlanState *parent,
/*
* Reset contents of the hashtable to be empty, preserving all the non-content
- * state. Note that the tablecxt passed to BuildTupleHashTable() should
- * also be reset, otherwise there will be leaks.
+ * state.
+ *
+ * Note: in usages where several TupleHashTables share a tuplescxt, all must
+ * be reset together, as the first one's reset call will destroy all their
+ * data. The additional reset calls for the rest will redundantly reset the
+ * tuplescxt. But because of mcxt.c's isReset flag, that's cheap enough that
+ * we need not avoid it.
*/
void
ResetTupleHashTable(TupleHashTable hashtable)
{
tuplehash_reset(hashtable->hashtab);
+ MemoryContextReset(hashtable->tuplescxt);
+}
+
+/*
+ * Estimate the amount of space needed for a TupleHashTable with nentries
+ * entries, if the tuples have average data width tupleWidth and the caller
+ * requires additionalsize extra space per entry.
+ *
+ * Return SIZE_MAX if it'd overflow size_t.
+ *
+ * nentries is "double" because this is meant for use by the planner,
+ * which typically works with double rowcount estimates. So we'd need to
+ * clamp to integer somewhere and that might as well be here. We do expect
+ * the value not to be NaN or negative, else the result will be garbage.
+ */
+Size
+EstimateTupleHashTableSpace(double nentries,
+ Size tupleWidth,
+ Size additionalsize)
+{
+ Size sh_space;
+ double tuples_space;
+
+ /* First estimate the space needed for the simplehash table */
+ sh_space = tuplehash_estimate_space(nentries);
+
+ /* Give up if that's already too big */
+ if (sh_space >= SIZE_MAX)
+ return sh_space;
+
+ /*
+ * Compute space needed for hashed tuples with additional data. nentries
+ * must be somewhat sane, so it should be safe to compute this product.
+ *
+ * We assume that the hashed tuples will be kept in a BumpContext so that
+ * there is not additional per-tuple overhead.
+ *
+ * (Note that this is only accurate if MEMORY_CONTEXT_CHECKING is off,
+ * else bump.c will add a MemoryChunk header to each tuple. However, it
+ * seems undesirable for debug builds to make different planning choices
+ * than production builds, so we assume the production behavior always.)
+ */
+ tuples_space = nentries * (MAXALIGN(SizeofMinimalTupleHeader) +
+ MAXALIGN(tupleWidth) +
+ MAXALIGN(additionalsize));
+
+ /*
+ * Check for size_t overflow. This coding is trickier than it may appear,
+ * because on 64-bit machines SIZE_MAX cannot be represented exactly as a
+ * double. We must cast it explicitly to suppress compiler warnings about
+ * an inexact conversion, and we must trust that any double value that
+ * compares strictly less than "(double) SIZE_MAX" will cast to a
+ * representable size_t value.
+ */
+ if (sh_space + tuples_space >= (double) SIZE_MAX)
+ return SIZE_MAX;
+
+ /* We don't bother estimating size of the miscellaneous overhead data */
+ return (Size) (sh_space + tuples_space);
}
/*
@@ -419,7 +500,7 @@ FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
*/
static uint32
TupleHashTableHash_internal(struct tuplehash_hash *tb,
- const MinimalTuple tuple)
+ MinimalTuple tuple)
{
TupleHashTable hashtable = (TupleHashTable) tb->private_data;
uint32 hashkey;
@@ -489,10 +570,10 @@ LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
/* created new entry */
*isnew = true;
- MemoryContextSwitchTo(hashtable->tablecxt);
+ MemoryContextSwitchTo(hashtable->tuplescxt);
/*
- * Copy the first tuple into the table context, and request
+ * Copy the first tuple into the tuples context, and request
* additionalsize extra bytes before the allocation.
*
* The caller can get a pointer to the additional data with
@@ -517,7 +598,7 @@ LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
* See whether two tuples (presumably of the same hash value) match
*/
static int
-TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)
+TupleHashTableMatch(struct tuplehash_hash *tb, MinimalTuple tuple1, MinimalTuple tuple2)
{
TupleTableSlot *slot1;
TupleTableSlot *slot2;