diff options
| author | David Rowley <drowley@postgresql.org> | 2024-08-20 13:38:22 +1200 |
|---|---|---|
| committer | David Rowley <drowley@postgresql.org> | 2024-08-20 13:38:22 +1200 |
| commit | adf97c1562380e02acd60dc859c289ed3a8352ee (patch) | |
| tree | bbc199a61078c00d997903c4d5ce0c2fdccc7224 /src/include/executor | |
| parent | 9380e5f129d2a160ecc2444f61bb7cb97fd51fbb (diff) | |
Speed up Hash Join by making ExprStates support hashing
Here we add ExprState support for obtaining a 32-bit hash value from a
list of expressions. This allows both faster hashing and also JIT
compilation of these expressions. This is especially useful when hash
joins have multiple join keys as the previous code called ExecEvalExpr on
each hash join key individually and that was inefficient as tuple
deformation would have only taken into account one key at a time, which
could lead to walking the tuple once for each join key. With the new
code, we'll determine the maximum attribute required and deform the tuple
to that point only once.
Some performance tests done with this change have shown up to a 20%
performance increase of a query containing a Hash Join without JIT
compilation and up to a 26% performance increase when JIT is enabled and
optimization and inlining were performed by the JIT compiler. The
performance increase with 1 join column was less with a 14% increase
with and without JIT. This test was done using a fairly small hash
table and a large number of hash probes. The increase will likely be
less with large tables, especially ones larger than L3 cache as memory
pressure is more likely to be the limiting factor there.
This commit only addresses Hash Joins, but lays expression evaluation
and JIT compilation infrastructure for other hashing needs such as Hash
Aggregate.
Author: David Rowley
Reviewed-by: Alexey Dvoichenkov <alexey@hyperplane.net>
Reviewed-by: Tels <nospam-pg-abuse@bloodgate.com>
Discussion: https://postgr.es/m/CAApHDvoexAxgQFNQD_GRkr2O_eJUD1-wUGm%3Dm0L%2BGc%3DT%3DkEa4g%40mail.gmail.com
Diffstat (limited to 'src/include/executor')
| -rw-r--r-- | src/include/executor/execExpr.h | 24 | ||||
| -rw-r--r-- | src/include/executor/executor.h | 7 | ||||
| -rw-r--r-- | src/include/executor/hashjoin.h | 12 | ||||
| -rw-r--r-- | src/include/executor/nodeHash.h | 9 |
4 files changed, 32 insertions, 20 deletions
diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h index 845f3422dea..eec0aa699e5 100644 --- a/src/include/executor/execExpr.h +++ b/src/include/executor/execExpr.h @@ -235,6 +235,13 @@ typedef enum ExprEvalOp /* evaluate a single domain CHECK constraint */ EEOP_DOMAIN_CHECK, + /* evaluation steps for hashing */ + EEOP_HASHDATUM_SET_INITVAL, + EEOP_HASHDATUM_FIRST, + EEOP_HASHDATUM_FIRST_STRICT, + EEOP_HASHDATUM_NEXT32, + EEOP_HASHDATUM_NEXT32_STRICT, + /* evaluate assorted special-purpose expression types */ EEOP_CONVERT_ROWTYPE, EEOP_SCALARARRAYOP, @@ -558,6 +565,23 @@ typedef struct ExprEvalStep ErrorSaveContext *escontext; } domaincheck; + /* for EEOP_HASH_SET_INITVAL */ + struct + { + Datum init_value; + + } hashdatum_initvalue; + + /* for EEOP_HASHDATUM_(FIRST|NEXT32)[_STRICT] */ + struct + { + FmgrInfo *finfo; /* function's lookup data */ + FunctionCallInfo fcinfo_data; /* arguments etc */ + /* faster to access without additional indirection: */ + PGFunction fn_addr; /* actual call address */ + int jumpdone; /* jump here on null */ + } hashdatum; + /* for EEOP_CONVERT_ROWTYPE */ struct { diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 9770752ea3c..046a7fb69b0 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -285,6 +285,13 @@ extern ExprState *ExecInitCheck(List *qual, PlanState *parent); extern List *ExecInitExprList(List *nodes, PlanState *parent); extern ExprState *ExecBuildAggTrans(AggState *aggstate, struct AggStatePerPhaseData *phase, bool doSort, bool doHash, bool nullcheck); +extern ExprState *ExecBuildHash32Expr(TupleDesc desc, + const TupleTableSlotOps *ops, + const Oid *hashfunc_oids, + const List *collations, + const List *hash_exprs, + const bool *opstrict, PlanState *parent, + uint32 init_value, bool keep_nulls); extern ExprState *ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, const TupleTableSlotOps *lops, const TupleTableSlotOps *rops, int numCols, diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h index 9197846cda7..2d8ed8688cd 100644 --- a/src/include/executor/hashjoin.h +++ b/src/include/executor/hashjoin.h @@ -313,8 +313,6 @@ typedef struct HashJoinTableData dsa_pointer_atomic *shared; } buckets; - bool keepNulls; /* true to store unmatchable NULL tuples */ - bool skewEnabled; /* are we using skew optimization? */ HashSkewBucket **skewBucket; /* hashtable of skew buckets */ int skewBucketLen; /* size of skewBucket array (a power of 2!) */ @@ -343,16 +341,6 @@ typedef struct HashJoinTableData BufFile **innerBatchFile; /* buffered virtual temp file per batch */ BufFile **outerBatchFile; /* buffered virtual temp file per batch */ - /* - * Info about the datatype-specific hash functions for the datatypes being - * hashed. These are arrays of the same length as the number of hash join - * clauses (hash keys). - */ - FmgrInfo *outer_hashfunctions; /* lookup data for hash functions */ - FmgrInfo *inner_hashfunctions; /* lookup data for hash functions */ - bool *hashStrict; /* is each hash join operator strict? */ - Oid *collations; - Size spaceUsed; /* memory space currently used by tuples */ Size spaceAllowed; /* upper limit for space used */ Size spacePeak; /* peak space used */ diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h index a95911c2fee..e4eb7bc6359 100644 --- a/src/include/executor/nodeHash.h +++ b/src/include/executor/nodeHash.h @@ -24,8 +24,7 @@ extern Node *MultiExecHash(HashState *node); extern void ExecEndHash(HashState *node); extern void ExecReScanHash(HashState *node); -extern HashJoinTable ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, - bool keepNulls); +extern HashJoinTable ExecHashTableCreate(HashState *state); extern void ExecParallelHashTableAlloc(HashJoinTable hashtable, int batchno); extern void ExecHashTableDestroy(HashJoinTable hashtable); @@ -43,12 +42,6 @@ extern void ExecParallelHashTableInsert(HashJoinTable hashtable, extern void ExecParallelHashTableInsertCurrentBatch(HashJoinTable hashtable, TupleTableSlot *slot, uint32 hashvalue); -extern bool ExecHashGetHashValue(HashJoinTable hashtable, - ExprContext *econtext, - List *hashkeys, - bool outer_tuple, - bool keep_nulls, - uint32 *hashvalue); extern void ExecHashGetBucketAndBatch(HashJoinTable hashtable, uint32 hashvalue, int *bucketno, |
