summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/executor/execGrouping.c66
-rw-r--r--src/backend/executor/nodeAgg.c32
-rw-r--r--src/backend/executor/nodeRecursiveunion.c25
-rw-r--r--src/backend/executor/nodeSetOp.c26
-rw-r--r--src/backend/executor/nodeSubplan.c17
-rw-r--r--src/include/executor/executor.h2
-rw-r--r--src/include/nodes/execnodes.h20
7 files changed, 103 insertions, 85 deletions
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index f34530fdacf..b4bdaa3c305 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -145,8 +145,8 @@ execTuplesHashPrepare(int numCols,
* collations: collations to use in comparisons
* nbuckets: initial estimate of hashtable size
* additionalsize: size of data that may be stored along with the hash entry
- * metacxt: memory context for long-lived allocation, but not per-entry data
- * tablecxt: memory context in which to store table entries
+ * metacxt: memory context for long-lived data and the simplehash table
+ * tuplescxt: memory context in which to store the hashed tuples themselves
* tempcxt: short-lived context for evaluation hash and comparison functions
* use_variable_hash_iv: if true, adjust hash IV per-parallel-worker
*
@@ -157,11 +157,25 @@ execTuplesHashPrepare(int numCols,
* Note that the keyColIdx, hashfunctions, and collations arrays must be
* allocated in storage that will live as long as the hashtable does.
*
+ * The metacxt and tuplescxt are separate because it's usually desirable for
+ * tuplescxt to be a BumpContext to avoid memory wastage, while metacxt must
+ * support pfree in case the simplehash table needs to be enlarged. (We could
+ * simplify the API of TupleHashTables by managing the tuplescxt internally.
+ * But that would be disadvantageous to nodeAgg.c and nodeSubplan.c, which use
+ * a single tuplescxt for multiple TupleHashTables that are reset together.)
+ *
* LookupTupleHashEntry, FindTupleHashEntry, and related functions may leak
* memory in the tempcxt. It is caller's responsibility to reset that context
* reasonably often, typically once per tuple. (We do it that way, rather
* than managing an extra context within the hashtable, because in many cases
* the caller can specify a tempcxt that it needs to reset per-tuple anyway.)
+ *
+ * We don't currently provide DestroyTupleHashTable functionality; the hash
+ * table will be cleaned up at destruction of the metacxt. (Some callers
+ * bother to delete the tuplescxt explicitly, though it'd be sufficient to
+ * ensure it's a child of the metacxt.) There's not much point in working
+ * harder than this so long as the expression-evaluation infrastructure
+ * behaves similarly.
*/
TupleHashTable
BuildTupleHashTable(PlanState *parent,
@@ -175,7 +189,7 @@ BuildTupleHashTable(PlanState *parent,
long nbuckets,
Size additionalsize,
MemoryContext metacxt,
- MemoryContext tablecxt,
+ MemoryContext tuplescxt,
MemoryContext tempcxt,
bool use_variable_hash_iv)
{
@@ -183,14 +197,24 @@ BuildTupleHashTable(PlanState *parent,
Size entrysize;
Size hash_mem_limit;
MemoryContext oldcontext;
- bool allow_jit;
uint32 hash_iv = 0;
Assert(nbuckets > 0);
+
+ /* tuplescxt must be separate, else ResetTupleHashTable breaks things */
+ Assert(metacxt != tuplescxt);
+
+ /* ensure additionalsize is maxalign'ed */
additionalsize = MAXALIGN(additionalsize);
- entrysize = sizeof(TupleHashEntryData) + additionalsize;
- /* Limit initial table size request to not more than hash_mem */
+ /*
+ * Limit initial table size request to not more than hash_mem.
+ *
+ * XXX this calculation seems pretty misguided, as it counts only overhead
+ * and not the tuples themselves. But we have no knowledge of the
+ * expected tuple width here.
+ */
+ entrysize = sizeof(TupleHashEntryData) + additionalsize;
hash_mem_limit = get_hash_memory_limit() / entrysize;
if (nbuckets > hash_mem_limit)
nbuckets = hash_mem_limit;
@@ -202,7 +226,7 @@ BuildTupleHashTable(PlanState *parent,
hashtable->numCols = numCols;
hashtable->keyColIdx = keyColIdx;
hashtable->tab_collations = collations;
- hashtable->tablecxt = tablecxt;
+ hashtable->tuplescxt = tuplescxt;
hashtable->tempcxt = tempcxt;
hashtable->additionalsize = additionalsize;
hashtable->tableslot = NULL; /* will be made on first lookup */
@@ -230,16 +254,6 @@ BuildTupleHashTable(PlanState *parent,
hashtable->tableslot = MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc),
&TTSOpsMinimalTuple);
- /*
- * If the caller fails to make the metacxt different from the tablecxt,
- * allowing JIT would lead to the generated functions to a) live longer
- * than the query or b) be re-generated each time the table is being
- * reset. Therefore prevent JIT from being used in that case, by not
- * providing a parent node (which prevents accessing the JitContext in the
- * EState).
- */
- allow_jit = (metacxt != tablecxt);
-
/* build hash ExprState for all columns */
hashtable->tab_hash_expr = ExecBuildHash32FromAttrs(inputDesc,
inputOps,
@@ -247,7 +261,7 @@ BuildTupleHashTable(PlanState *parent,
collations,
numCols,
keyColIdx,
- allow_jit ? parent : NULL,
+ parent,
hash_iv);
/* build comparator for all columns */
@@ -256,7 +270,7 @@ BuildTupleHashTable(PlanState *parent,
&TTSOpsMinimalTuple,
numCols,
keyColIdx, eqfuncoids, collations,
- allow_jit ? parent : NULL);
+ parent);
/*
* While not pretty, it's ok to not shut down this context, but instead
@@ -273,13 +287,19 @@ BuildTupleHashTable(PlanState *parent,
/*
* Reset contents of the hashtable to be empty, preserving all the non-content
- * state. Note that the tablecxt passed to BuildTupleHashTable() should
- * also be reset, otherwise there will be leaks.
+ * state.
+ *
+ * Note: in usages where several TupleHashTables share a tuplescxt, all must
+ * be reset together, as the first one's reset call will destroy all their
+ * data. The additional reset calls for the rest will redundantly reset the
+ * tuplescxt. But because of mcxt.c's isReset flag, that's cheap enough that
+ * we need not avoid it.
*/
void
ResetTupleHashTable(TupleHashTable hashtable)
{
tuplehash_reset(hashtable->hashtab);
+ MemoryContextReset(hashtable->tuplescxt);
}
/*
@@ -489,10 +509,10 @@ LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
/* created new entry */
*isnew = true;
- MemoryContextSwitchTo(hashtable->tablecxt);
+ MemoryContextSwitchTo(hashtable->tuplescxt);
/*
- * Copy the first tuple into the table context, and request
+ * Copy the first tuple into the tuples context, and request
* additionalsize extra bytes before the allocation.
*
* The caller can get a pointer to the additional data with
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 64643c3943a..759ffeed2e6 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -1457,7 +1457,7 @@ find_cols_walker(Node *node, FindColsContext *context)
* We have a separate hashtable and associated perhash data structure for each
* grouping set for which we're doing hashing.
*
- * The contents of the hash tables always live in the hashcontext's per-tuple
+ * The contents of the hash tables live in the aggstate's hash_tuplescxt
* memory context (there is only one of these for all tables together, since
* they are all reset at the same time).
*/
@@ -1509,7 +1509,7 @@ build_hash_table(AggState *aggstate, int setno, long nbuckets)
{
AggStatePerHash perhash = &aggstate->perhash[setno];
MemoryContext metacxt = aggstate->hash_metacxt;
- MemoryContext tablecxt = aggstate->hash_tablecxt;
+ MemoryContext tuplescxt = aggstate->hash_tuplescxt;
MemoryContext tmpcxt = aggstate->tmpcontext->ecxt_per_tuple_memory;
Size additionalsize;
@@ -1535,7 +1535,7 @@ build_hash_table(AggState *aggstate, int setno, long nbuckets)
nbuckets,
additionalsize,
metacxt,
- tablecxt,
+ tuplescxt,
tmpcxt,
DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit));
}
@@ -1868,7 +1868,7 @@ hash_agg_check_limits(AggState *aggstate)
uint64 ngroups = aggstate->hash_ngroups_current;
Size meta_mem = MemoryContextMemAllocated(aggstate->hash_metacxt,
true);
- Size entry_mem = MemoryContextMemAllocated(aggstate->hash_tablecxt,
+ Size entry_mem = MemoryContextMemAllocated(aggstate->hash_tuplescxt,
true);
Size tval_mem = MemoryContextMemAllocated(aggstate->hashcontext->ecxt_per_tuple_memory,
true);
@@ -1959,7 +1959,7 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
meta_mem = MemoryContextMemAllocated(aggstate->hash_metacxt, true);
/* memory for hash entries */
- entry_mem = MemoryContextMemAllocated(aggstate->hash_tablecxt, true);
+ entry_mem = MemoryContextMemAllocated(aggstate->hash_tuplescxt, true);
/* memory for byref transition states */
hashkey_mem = MemoryContextMemAllocated(aggstate->hashcontext->ecxt_per_tuple_memory, true);
@@ -2042,11 +2042,11 @@ hash_create_memory(AggState *aggstate)
/* and no smaller than ALLOCSET_DEFAULT_INITSIZE */
maxBlockSize = Max(maxBlockSize, ALLOCSET_DEFAULT_INITSIZE);
- aggstate->hash_tablecxt = BumpContextCreate(aggstate->ss.ps.state->es_query_cxt,
- "HashAgg table context",
- ALLOCSET_DEFAULT_MINSIZE,
- ALLOCSET_DEFAULT_INITSIZE,
- maxBlockSize);
+ aggstate->hash_tuplescxt = BumpContextCreate(aggstate->ss.ps.state->es_query_cxt,
+ "HashAgg hashed tuples",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ maxBlockSize);
}
@@ -2707,7 +2707,6 @@ agg_refill_hash_table(AggState *aggstate)
/* free memory and reset hash tables */
ReScanExprContext(aggstate->hashcontext);
- MemoryContextReset(aggstate->hash_tablecxt);
for (int setno = 0; setno < aggstate->num_hashes; setno++)
ResetTupleHashTable(aggstate->perhash[setno].hashtable);
@@ -4428,18 +4427,18 @@ ExecEndAgg(AggState *node)
hashagg_reset_spill_state(node);
+ /* Release hash tables too */
if (node->hash_metacxt != NULL)
{
MemoryContextDelete(node->hash_metacxt);
node->hash_metacxt = NULL;
}
- if (node->hash_tablecxt != NULL)
+ if (node->hash_tuplescxt != NULL)
{
- MemoryContextDelete(node->hash_tablecxt);
- node->hash_tablecxt = NULL;
+ MemoryContextDelete(node->hash_tuplescxt);
+ node->hash_tuplescxt = NULL;
}
-
for (transno = 0; transno < node->numtrans; transno++)
{
AggStatePerTrans pertrans = &node->pertrans[transno];
@@ -4555,8 +4554,7 @@ ExecReScanAgg(AggState *node)
node->hash_ngroups_current = 0;
ReScanExprContext(node->hashcontext);
- MemoryContextReset(node->hash_tablecxt);
- /* Rebuild an empty hash table */
+ /* Rebuild empty hash table(s) */
build_hash_tables(node);
node->table_filled = false;
/* iterator will be reset when the table is filled */
diff --git a/src/backend/executor/nodeRecursiveunion.c b/src/backend/executor/nodeRecursiveunion.c
index 40f66fd0680..ebb7919b49b 100644
--- a/src/backend/executor/nodeRecursiveunion.c
+++ b/src/backend/executor/nodeRecursiveunion.c
@@ -53,7 +53,7 @@ build_hash_table(RecursiveUnionState *rustate)
node->numGroups,
0,
rustate->ps.state->es_query_cxt,
- rustate->tableContext,
+ rustate->tuplesContext,
rustate->tempContext,
false);
}
@@ -197,7 +197,7 @@ ExecInitRecursiveUnion(RecursiveUnion *node, EState *estate, int eflags)
rustate->hashfunctions = NULL;
rustate->hashtable = NULL;
rustate->tempContext = NULL;
- rustate->tableContext = NULL;
+ rustate->tuplesContext = NULL;
/* initialize processing state */
rustate->recursing = false;
@@ -209,7 +209,8 @@ ExecInitRecursiveUnion(RecursiveUnion *node, EState *estate, int eflags)
* If hashing, we need a per-tuple memory context for comparisons, and a
* longer-lived context to store the hash table. The table can't just be
* kept in the per-query context because we want to be able to throw it
- * away when rescanning.
+ * away when rescanning. We can use a BumpContext to save storage,
+ * because we will have no need to delete individual table entries.
*/
if (node->numCols > 0)
{
@@ -217,10 +218,10 @@ ExecInitRecursiveUnion(RecursiveUnion *node, EState *estate, int eflags)
AllocSetContextCreate(CurrentMemoryContext,
"RecursiveUnion",
ALLOCSET_DEFAULT_SIZES);
- rustate->tableContext =
- AllocSetContextCreate(CurrentMemoryContext,
- "RecursiveUnion hash table",
- ALLOCSET_DEFAULT_SIZES);
+ rustate->tuplesContext =
+ BumpContextCreate(CurrentMemoryContext,
+ "RecursiveUnion hashed tuples",
+ ALLOCSET_DEFAULT_SIZES);
}
/*
@@ -288,11 +289,11 @@ ExecEndRecursiveUnion(RecursiveUnionState *node)
tuplestore_end(node->working_table);
tuplestore_end(node->intermediate_table);
- /* free subsidiary stuff including hashtable */
+ /* free subsidiary stuff including hashtable data */
if (node->tempContext)
MemoryContextDelete(node->tempContext);
- if (node->tableContext)
- MemoryContextDelete(node->tableContext);
+ if (node->tuplesContext)
+ MemoryContextDelete(node->tuplesContext);
/*
* close down subplans
@@ -328,10 +329,6 @@ ExecReScanRecursiveUnion(RecursiveUnionState *node)
if (outerPlan->chgParam == NULL)
ExecReScan(outerPlan);
- /* Release any hashtable storage */
- if (node->tableContext)
- MemoryContextReset(node->tableContext);
-
/* Empty hashtable if needed */
if (plan->numCols > 0)
ResetTupleHashTable(node->hashtable);
diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c
index 4068481a523..7b223a7ca3a 100644
--- a/src/backend/executor/nodeSetOp.c
+++ b/src/backend/executor/nodeSetOp.c
@@ -106,7 +106,7 @@ build_hash_table(SetOpState *setopstate)
node->numGroups,
sizeof(SetOpStatePerGroupData),
setopstate->ps.state->es_query_cxt,
- setopstate->tableContext,
+ setopstate->tuplesContext,
econtext->ecxt_per_tuple_memory,
false);
}
@@ -589,13 +589,15 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
/*
* If hashing, we also need a longer-lived context to store the hash
* table. The table can't just be kept in the per-query context because
- * we want to be able to throw it away in ExecReScanSetOp.
+ * we want to be able to throw it away in ExecReScanSetOp. We can use a
+ * BumpContext to save storage, because we will have no need to delete
+ * individual table entries.
*/
if (node->strategy == SETOP_HASHED)
- setopstate->tableContext =
- AllocSetContextCreate(CurrentMemoryContext,
- "SetOp hash table",
- ALLOCSET_DEFAULT_SIZES);
+ setopstate->tuplesContext =
+ BumpContextCreate(CurrentMemoryContext,
+ "SetOp hashed tuples",
+ ALLOCSET_DEFAULT_SIZES);
/*
* initialize child nodes
@@ -680,9 +682,9 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
void
ExecEndSetOp(SetOpState *node)
{
- /* free subsidiary stuff including hashtable */
- if (node->tableContext)
- MemoryContextDelete(node->tableContext);
+ /* free subsidiary stuff including hashtable data */
+ if (node->tuplesContext)
+ MemoryContextDelete(node->tuplesContext);
ExecEndNode(outerPlanState(node));
ExecEndNode(innerPlanState(node));
@@ -721,11 +723,7 @@ ExecReScanSetOp(SetOpState *node)
return;
}
- /* Release any hashtable storage */
- if (node->tableContext)
- MemoryContextReset(node->tableContext);
-
- /* And rebuild an empty hashtable */
+ /* Else, we must rebuild the hashtable */
ResetTupleHashTable(node->hashtable);
node->table_filled = false;
}
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index 53fb56f7388..9f6e45bcb0b 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -506,7 +506,6 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
* saves a needless fetch inner op step for the hashing ExprState created
* in BuildTupleHashTable().
*/
- MemoryContextReset(node->hashtablecxt);
node->havehashrows = false;
node->havenullrows = false;
@@ -528,7 +527,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
nbuckets,
0,
node->planstate->state->es_query_cxt,
- node->hashtablecxt,
+ node->tuplesContext,
innerecontext->ecxt_per_tuple_memory,
false);
@@ -557,7 +556,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
nbuckets,
0,
node->planstate->state->es_query_cxt,
- node->hashtablecxt,
+ node->tuplesContext,
innerecontext->ecxt_per_tuple_memory,
false);
}
@@ -838,7 +837,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
sstate->projRight = NULL;
sstate->hashtable = NULL;
sstate->hashnulls = NULL;
- sstate->hashtablecxt = NULL;
+ sstate->tuplesContext = NULL;
sstate->innerecontext = NULL;
sstate->keyColIdx = NULL;
sstate->tab_eq_funcoids = NULL;
@@ -889,11 +888,11 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
*righttlist;
ListCell *l;
- /* We need a memory context to hold the hash table(s) */
- sstate->hashtablecxt =
- AllocSetContextCreate(CurrentMemoryContext,
- "Subplan HashTable Context",
- ALLOCSET_DEFAULT_SIZES);
+ /* We need a memory context to hold the hash table(s)' tuples */
+ sstate->tuplesContext =
+ BumpContextCreate(CurrentMemoryContext,
+ "SubPlan hashed tuples",
+ ALLOCSET_DEFAULT_SIZES);
/* and a short-lived exprcontext for function evaluation */
sstate->innerecontext = CreateExprContext(estate);
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 26db9522d8b..8e7a5453064 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -141,7 +141,7 @@ extern TupleHashTable BuildTupleHashTable(PlanState *parent,
long nbuckets,
Size additionalsize,
MemoryContext metacxt,
- MemoryContext tablecxt,
+ MemoryContext tuplescxt,
MemoryContext tempcxt,
bool use_variable_hash_iv);
extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable,
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index a36653c37f9..18ae8f0d4bb 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -844,9 +844,15 @@ typedef struct ExecAuxRowMark
typedef struct TupleHashEntryData *TupleHashEntry;
typedef struct TupleHashTableData *TupleHashTable;
+/*
+ * TupleHashEntryData is a slot in the tuplehash_hash table. If it's
+ * populated, it contains a pointer to a MinimalTuple that can also have
+ * associated "additional data". That's stored in the TupleHashTable's
+ * tuplescxt.
+ */
typedef struct TupleHashEntryData
{
- MinimalTuple firstTuple; /* copy of first tuple in this group */
+ MinimalTuple firstTuple; /* -> copy of first tuple in this group */
uint32 status; /* hash status */
uint32 hash; /* hash value (cached) */
} TupleHashEntryData;
@@ -861,13 +867,13 @@ typedef struct TupleHashEntryData
typedef struct TupleHashTableData
{
- tuplehash_hash *hashtab; /* underlying hash table */
+ tuplehash_hash *hashtab; /* underlying simplehash hash table */
int numCols; /* number of columns in lookup key */
AttrNumber *keyColIdx; /* attr numbers of key columns */
ExprState *tab_hash_expr; /* ExprState for hashing table datatype(s) */
ExprState *tab_eq_func; /* comparator for table datatype(s) */
Oid *tab_collations; /* collations for hash and comparison */
- MemoryContext tablecxt; /* memory context containing table */
+ MemoryContext tuplescxt; /* memory context storing hashed tuples */
MemoryContext tempcxt; /* context for function evaluations */
Size additionalsize; /* size of additional data */
TupleTableSlot *tableslot; /* slot for referencing table entries */
@@ -1017,7 +1023,7 @@ typedef struct SubPlanState
TupleHashTable hashnulls; /* hash table for rows with null(s) */
bool havehashrows; /* true if hashtable is not empty */
bool havenullrows; /* true if hashnulls is not empty */
- MemoryContext hashtablecxt; /* memory context containing hash tables */
+ MemoryContext tuplesContext; /* context containing hash tables' tuples */
ExprContext *innerecontext; /* econtext for computing inner tuples */
int numCols; /* number of columns being hashed */
/* each of the remaining fields is an array of length numCols: */
@@ -1566,7 +1572,7 @@ typedef struct RecursiveUnionState
FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
MemoryContext tempContext; /* short-term context for comparisons */
TupleHashTable hashtable; /* hash table for tuples already seen */
- MemoryContext tableContext; /* memory context containing hash table */
+ MemoryContext tuplesContext; /* context containing hash table's tuples */
} RecursiveUnionState;
/* ----------------
@@ -2567,7 +2573,7 @@ typedef struct AggState
bool table_filled; /* hash table filled yet? */
int num_hashes;
MemoryContext hash_metacxt; /* memory for hash table bucket array */
- MemoryContext hash_tablecxt; /* memory for hash table entries */
+ MemoryContext hash_tuplescxt; /* memory for hash table tuples */
struct LogicalTapeSet *hash_tapeset; /* tape set for hash spill tapes */
struct HashAggSpill *hash_spills; /* HashAggSpill for each grouping set,
* exists only during first pass */
@@ -2866,7 +2872,7 @@ typedef struct SetOpState
Oid *eqfuncoids; /* per-grouping-field equality fns */
FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
TupleHashTable hashtable; /* hash table with one entry per group */
- MemoryContext tableContext; /* memory context containing hash table */
+ MemoryContext tuplesContext; /* context containing hash table's tuples */
bool table_filled; /* hash table filled yet? */
TupleHashIterator hashiter; /* for iterating through hash table */
} SetOpState;