diff options
Diffstat (limited to 'src/backend/executor/execGrouping.c')
| -rw-r--r-- | src/backend/executor/execGrouping.c | 153 |
1 files changed, 117 insertions, 36 deletions
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c index 75087204f0c..8b64a625ca5 100644 --- a/src/backend/executor/execGrouping.c +++ b/src/backend/executor/execGrouping.c @@ -14,15 +14,18 @@ */ #include "postgres.h" +#include <math.h> + +#include "access/htup_details.h" #include "access/parallel.h" #include "common/hashfn.h" #include "executor/executor.h" #include "miscadmin.h" #include "utils/lsyscache.h" -static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2); +static int TupleHashTableMatch(struct tuplehash_hash *tb, MinimalTuple tuple1, MinimalTuple tuple2); static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb, - const MinimalTuple tuple); + MinimalTuple tuple); static inline TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot, bool *isnew, uint32 hash); @@ -143,10 +146,10 @@ execTuplesHashPrepare(int numCols, * eqfuncoids: OIDs of equality comparison functions to use * hashfunctions: FmgrInfos of datatype-specific hashing functions to use * collations: collations to use in comparisons - * nbuckets: initial estimate of hashtable size + * nelements: initial estimate of hashtable size * additionalsize: size of data that may be stored along with the hash entry - * metacxt: memory context for long-lived allocation, but not per-entry data - * tablecxt: memory context in which to store table entries + * metacxt: memory context for long-lived data and the simplehash table + * tuplescxt: memory context in which to store the hashed tuples themselves * tempcxt: short-lived context for evaluation hash and comparison functions * use_variable_hash_iv: if true, adjust hash IV per-parallel-worker * @@ -157,11 +160,25 @@ execTuplesHashPrepare(int numCols, * Note that the keyColIdx, hashfunctions, and collations arrays must be * allocated in storage that will live as long as the hashtable does. * + * The metacxt and tuplescxt are separate because it's usually desirable for + * tuplescxt to be a BumpContext to avoid memory wastage, while metacxt must + * support pfree in case the simplehash table needs to be enlarged. (We could + * simplify the API of TupleHashTables by managing the tuplescxt internally. + * But that would be disadvantageous to nodeAgg.c and nodeSubplan.c, which use + * a single tuplescxt for multiple TupleHashTables that are reset together.) + * * LookupTupleHashEntry, FindTupleHashEntry, and related functions may leak * memory in the tempcxt. It is caller's responsibility to reset that context * reasonably often, typically once per tuple. (We do it that way, rather * than managing an extra context within the hashtable, because in many cases * the caller can specify a tempcxt that it needs to reset per-tuple anyway.) + * + * We don't currently provide DestroyTupleHashTable functionality; the hash + * table will be cleaned up at destruction of the metacxt. (Some callers + * bother to delete the tuplescxt explicitly, though it'd be sufficient to + * ensure it's a child of the metacxt.) There's not much point in working + * harder than this so long as the expression-evaluation infrastructure + * behaves similarly. */ TupleHashTable BuildTupleHashTable(PlanState *parent, @@ -172,28 +189,38 @@ BuildTupleHashTable(PlanState *parent, const Oid *eqfuncoids, FmgrInfo *hashfunctions, Oid *collations, - long nbuckets, + double nelements, Size additionalsize, MemoryContext metacxt, - MemoryContext tablecxt, + MemoryContext tuplescxt, MemoryContext tempcxt, bool use_variable_hash_iv) { TupleHashTable hashtable; - Size entrysize; - Size hash_mem_limit; + uint32 nbuckets; MemoryContext oldcontext; - bool allow_jit; uint32 hash_iv = 0; - Assert(nbuckets > 0); - additionalsize = MAXALIGN(additionalsize); - entrysize = sizeof(TupleHashEntryData) + additionalsize; + /* + * tuplehash_create requires a uint32 element count, so we had better + * clamp the given nelements to fit in that. As long as we have to do + * that, we might as well protect against completely insane input like + * zero or NaN. But it is not our job here to enforce issues like staying + * within hash_mem: the caller should have done that, and we don't have + * enough info to second-guess. + */ + if (isnan(nelements) || nelements <= 0) + nbuckets = 1; + else if (nelements >= PG_UINT32_MAX) + nbuckets = PG_UINT32_MAX; + else + nbuckets = (uint32) nelements; - /* Limit initial table size request to not more than hash_mem */ - hash_mem_limit = get_hash_memory_limit() / entrysize; - if (nbuckets > hash_mem_limit) - nbuckets = hash_mem_limit; + /* tuplescxt must be separate, else ResetTupleHashTable breaks things */ + Assert(metacxt != tuplescxt); + + /* ensure additionalsize is maxalign'ed */ + additionalsize = MAXALIGN(additionalsize); oldcontext = MemoryContextSwitchTo(metacxt); @@ -202,7 +229,7 @@ BuildTupleHashTable(PlanState *parent, hashtable->numCols = numCols; hashtable->keyColIdx = keyColIdx; hashtable->tab_collations = collations; - hashtable->tablecxt = tablecxt; + hashtable->tuplescxt = tuplescxt; hashtable->tempcxt = tempcxt; hashtable->additionalsize = additionalsize; hashtable->tableslot = NULL; /* will be made on first lookup */ @@ -230,16 +257,6 @@ BuildTupleHashTable(PlanState *parent, hashtable->tableslot = MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc), &TTSOpsMinimalTuple); - /* - * If the caller fails to make the metacxt different from the tablecxt, - * allowing JIT would lead to the generated functions to a) live longer - * than the query or b) be re-generated each time the table is being - * reset. Therefore prevent JIT from being used in that case, by not - * providing a parent node (which prevents accessing the JitContext in the - * EState). - */ - allow_jit = (metacxt != tablecxt); - /* build hash ExprState for all columns */ hashtable->tab_hash_expr = ExecBuildHash32FromAttrs(inputDesc, inputOps, @@ -247,7 +264,7 @@ BuildTupleHashTable(PlanState *parent, collations, numCols, keyColIdx, - allow_jit ? parent : NULL, + parent, hash_iv); /* build comparator for all columns */ @@ -256,7 +273,7 @@ BuildTupleHashTable(PlanState *parent, &TTSOpsMinimalTuple, numCols, keyColIdx, eqfuncoids, collations, - allow_jit ? parent : NULL); + parent); /* * While not pretty, it's ok to not shut down this context, but instead @@ -273,13 +290,77 @@ BuildTupleHashTable(PlanState *parent, /* * Reset contents of the hashtable to be empty, preserving all the non-content - * state. Note that the tablecxt passed to BuildTupleHashTable() should - * also be reset, otherwise there will be leaks. + * state. + * + * Note: in usages where several TupleHashTables share a tuplescxt, all must + * be reset together, as the first one's reset call will destroy all their + * data. The additional reset calls for the rest will redundantly reset the + * tuplescxt. But because of mcxt.c's isReset flag, that's cheap enough that + * we need not avoid it. */ void ResetTupleHashTable(TupleHashTable hashtable) { tuplehash_reset(hashtable->hashtab); + MemoryContextReset(hashtable->tuplescxt); +} + +/* + * Estimate the amount of space needed for a TupleHashTable with nentries + * entries, if the tuples have average data width tupleWidth and the caller + * requires additionalsize extra space per entry. + * + * Return SIZE_MAX if it'd overflow size_t. + * + * nentries is "double" because this is meant for use by the planner, + * which typically works with double rowcount estimates. So we'd need to + * clamp to integer somewhere and that might as well be here. We do expect + * the value not to be NaN or negative, else the result will be garbage. + */ +Size +EstimateTupleHashTableSpace(double nentries, + Size tupleWidth, + Size additionalsize) +{ + Size sh_space; + double tuples_space; + + /* First estimate the space needed for the simplehash table */ + sh_space = tuplehash_estimate_space(nentries); + + /* Give up if that's already too big */ + if (sh_space >= SIZE_MAX) + return sh_space; + + /* + * Compute space needed for hashed tuples with additional data. nentries + * must be somewhat sane, so it should be safe to compute this product. + * + * We assume that the hashed tuples will be kept in a BumpContext so that + * there is not additional per-tuple overhead. + * + * (Note that this is only accurate if MEMORY_CONTEXT_CHECKING is off, + * else bump.c will add a MemoryChunk header to each tuple. However, it + * seems undesirable for debug builds to make different planning choices + * than production builds, so we assume the production behavior always.) + */ + tuples_space = nentries * (MAXALIGN(SizeofMinimalTupleHeader) + + MAXALIGN(tupleWidth) + + MAXALIGN(additionalsize)); + + /* + * Check for size_t overflow. This coding is trickier than it may appear, + * because on 64-bit machines SIZE_MAX cannot be represented exactly as a + * double. We must cast it explicitly to suppress compiler warnings about + * an inexact conversion, and we must trust that any double value that + * compares strictly less than "(double) SIZE_MAX" will cast to a + * representable size_t value. + */ + if (sh_space + tuples_space >= (double) SIZE_MAX) + return SIZE_MAX; + + /* We don't bother estimating size of the miscellaneous overhead data */ + return (Size) (sh_space + tuples_space); } /* @@ -419,7 +500,7 @@ FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, */ static uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb, - const MinimalTuple tuple) + MinimalTuple tuple) { TupleHashTable hashtable = (TupleHashTable) tb->private_data; uint32 hashkey; @@ -489,10 +570,10 @@ LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot, /* created new entry */ *isnew = true; - MemoryContextSwitchTo(hashtable->tablecxt); + MemoryContextSwitchTo(hashtable->tuplescxt); /* - * Copy the first tuple into the table context, and request + * Copy the first tuple into the tuples context, and request * additionalsize extra bytes before the allocation. * * The caller can get a pointer to the additional data with @@ -517,7 +598,7 @@ LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot, * See whether two tuples (presumably of the same hash value) match */ static int -TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2) +TupleHashTableMatch(struct tuplehash_hash *tb, MinimalTuple tuple1, MinimalTuple tuple2) { TupleTableSlot *slot1; TupleTableSlot *slot2; |
