summaryrefslogtreecommitdiff
path: root/src/backend/utils
diff options
context:
space:
mode:
authorTomas Vondra <tomas.vondra@postgresql.org>2025-03-03 16:53:03 +0100
committerTomas Vondra <tomas.vondra@postgresql.org>2025-03-03 16:53:06 +0100
commit8492feb98f6df3f0f03e84ed56f0d1cbb2ac514c (patch)
tree8b5775ca7cdb77a61c4ada41b45579e50bc3cf35 /src/backend/utils
parent3f1db99bfabbb9d4afc41f362d9801512f4c7c65 (diff)
Allow parallel CREATE INDEX for GIN indexes
Allow using parallel workers to build a GIN index, similarly to BTREE and BRIN. For large tables this may result in significant speedup when the build is CPU-bound. The work is divided so that each worker builds index entries on a subset of the table, determined by the regular parallel scan used to read the data. Each worker uses a local tuplesort to sort and merge the entries for the same key. The TID lists do not overlap (for a given key), which means the merge sort simply concatenates the two lists. The merged entries are written into a shared tuplesort for the leader. The leader needs to merge the sorted entries again, before writing them into the index. But this way a significant part of the work happens in the workers, and the leader is left with merging fewer large entries, which is more efficient. Most of the parallelism infrastructure is a simplified copy of the code used by BTREE indexes, omitting the parts irrelevant for GIN indexes (e.g. uniqueness checks). Original patch by me, with reviews and substantial improvements by Matthias van de Meent, certainly enough to make him a co-author. Author: Tomas Vondra, Matthias van de Meent Reviewed-by: Matthias van de Meent, Andy Fan, Kirill Reshke Discussion: https://postgr.es/m/6ab4003f-a8b8-4d75-a67f-f25ad98582dc%40enterprisedb.com
Diffstat (limited to 'src/backend/utils')
-rw-r--r--src/backend/utils/sort/tuplesortvariants.c198
1 files changed, 198 insertions, 0 deletions
diff --git a/src/backend/utils/sort/tuplesortvariants.c b/src/backend/utils/sort/tuplesortvariants.c
index 913c4ef455e..eb8601e2257 100644
--- a/src/backend/utils/sort/tuplesortvariants.c
+++ b/src/backend/utils/sort/tuplesortvariants.c
@@ -20,10 +20,12 @@
#include "postgres.h"
#include "access/brin_tuple.h"
+#include "access/gin_tuple.h"
#include "access/hash.h"
#include "access/htup_details.h"
#include "access/nbtree.h"
#include "catalog/index.h"
+#include "catalog/pg_collation.h"
#include "executor/executor.h"
#include "pg_trace.h"
#include "utils/datum.h"
@@ -46,6 +48,8 @@ static void removeabbrev_index(Tuplesortstate *state, SortTuple *stups,
int count);
static void removeabbrev_index_brin(Tuplesortstate *state, SortTuple *stups,
int count);
+static void removeabbrev_index_gin(Tuplesortstate *state, SortTuple *stups,
+ int count);
static void removeabbrev_datum(Tuplesortstate *state, SortTuple *stups,
int count);
static int comparetup_heap(const SortTuple *a, const SortTuple *b,
@@ -74,6 +78,8 @@ static int comparetup_index_hash_tiebreak(const SortTuple *a, const SortTuple *b
Tuplesortstate *state);
static int comparetup_index_brin(const SortTuple *a, const SortTuple *b,
Tuplesortstate *state);
+static int comparetup_index_gin(const SortTuple *a, const SortTuple *b,
+ Tuplesortstate *state);
static void writetup_index(Tuplesortstate *state, LogicalTape *tape,
SortTuple *stup);
static void readtup_index(Tuplesortstate *state, SortTuple *stup,
@@ -82,6 +88,10 @@ static void writetup_index_brin(Tuplesortstate *state, LogicalTape *tape,
SortTuple *stup);
static void readtup_index_brin(Tuplesortstate *state, SortTuple *stup,
LogicalTape *tape, unsigned int len);
+static void writetup_index_gin(Tuplesortstate *state, LogicalTape *tape,
+ SortTuple *stup);
+static void readtup_index_gin(Tuplesortstate *state, SortTuple *stup,
+ LogicalTape *tape, unsigned int len);
static int comparetup_datum(const SortTuple *a, const SortTuple *b,
Tuplesortstate *state);
static int comparetup_datum_tiebreak(const SortTuple *a, const SortTuple *b,
@@ -569,6 +579,77 @@ tuplesort_begin_index_brin(int workMem,
}
Tuplesortstate *
+tuplesort_begin_index_gin(Relation heapRel,
+ Relation indexRel,
+ int workMem, SortCoordinate coordinate,
+ int sortopt)
+{
+ Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
+ sortopt);
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ MemoryContext oldcontext;
+ int i;
+ TupleDesc desc = RelationGetDescr(indexRel);
+
+ oldcontext = MemoryContextSwitchTo(base->maincontext);
+
+#ifdef TRACE_SORT
+ if (trace_sort)
+ elog(LOG,
+ "begin index sort: workMem = %d, randomAccess = %c",
+ workMem,
+ sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
+#endif
+
+ /*
+ * Multi-column GIN indexes expand the row into a separate index entry for
+ * attribute, and that's what we write into the tuplesort. But we still
+ * need to initialize sortsupport for all the attributes.
+ */
+ base->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel);
+
+ /* Prepare SortSupport data for each column */
+ base->sortKeys = (SortSupport) palloc0(base->nKeys *
+ sizeof(SortSupportData));
+
+ for (i = 0; i < base->nKeys; i++)
+ {
+ SortSupport sortKey = base->sortKeys + i;
+ Form_pg_attribute att = TupleDescAttr(desc, i);
+ TypeCacheEntry *typentry;
+
+ sortKey->ssup_cxt = CurrentMemoryContext;
+ sortKey->ssup_collation = indexRel->rd_indcollation[i];
+ sortKey->ssup_nulls_first = false;
+ sortKey->ssup_attno = i + 1;
+ sortKey->abbreviate = false;
+
+ Assert(sortKey->ssup_attno != 0);
+
+ if (!OidIsValid(sortKey->ssup_collation))
+ sortKey->ssup_collation = DEFAULT_COLLATION_OID;
+
+ /*
+ * Look for a ordering for the index key data type, and then the sort
+ * support function.
+ */
+ typentry = lookup_type_cache(att->atttypid, TYPECACHE_LT_OPR);
+ PrepareSortSupportFromOrderingOp(typentry->lt_opr, sortKey);
+ }
+
+ base->removeabbrev = removeabbrev_index_gin;
+ base->comparetup = comparetup_index_gin;
+ base->writetup = writetup_index_gin;
+ base->readtup = readtup_index_gin;
+ base->haveDatum1 = false;
+ base->arg = NULL;
+
+ MemoryContextSwitchTo(oldcontext);
+
+ return state;
+}
+
+Tuplesortstate *
tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation,
bool nullsFirstFlag, int workMem,
SortCoordinate coordinate, int sortopt)
@@ -803,6 +884,37 @@ tuplesort_putbrintuple(Tuplesortstate *state, BrinTuple *tuple, Size size)
MemoryContextSwitchTo(oldcontext);
}
+void
+tuplesort_putgintuple(Tuplesortstate *state, GinTuple *tuple, Size size)
+{
+ SortTuple stup;
+ GinTuple *ctup;
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ MemoryContext oldcontext = MemoryContextSwitchTo(base->tuplecontext);
+ Size tuplen;
+
+ /* copy the GinTuple into the right memory context */
+ ctup = palloc(size);
+ memcpy(ctup, tuple, size);
+
+ stup.tuple = ctup;
+ stup.datum1 = (Datum) 0;
+ stup.isnull1 = false;
+
+ /* GetMemoryChunkSpace is not supported for bump contexts */
+ if (TupleSortUseBumpTupleCxt(base->sortopt))
+ tuplen = MAXALIGN(size);
+ else
+ tuplen = GetMemoryChunkSpace(ctup);
+
+ tuplesort_puttuple_common(state, &stup,
+ base->sortKeys &&
+ base->sortKeys->abbrev_converter &&
+ !stup.isnull1, tuplen);
+
+ MemoryContextSwitchTo(oldcontext);
+}
+
/*
* Accept one Datum while collecting input data for sort.
*
@@ -975,6 +1087,29 @@ tuplesort_getbrintuple(Tuplesortstate *state, Size *len, bool forward)
return &btup->tuple;
}
+GinTuple *
+tuplesort_getgintuple(Tuplesortstate *state, Size *len, bool forward)
+{
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext);
+ SortTuple stup;
+ GinTuple *tup;
+
+ if (!tuplesort_gettuple_common(state, forward, &stup))
+ stup.tuple = NULL;
+
+ MemoryContextSwitchTo(oldcontext);
+
+ if (!stup.tuple)
+ return false;
+
+ tup = (GinTuple *) stup.tuple;
+
+ *len = tup->tuplen;
+
+ return tup;
+}
+
/*
* Fetch the next Datum in either forward or back direction.
* Returns false if no more datums.
@@ -1764,6 +1899,69 @@ readtup_index_brin(Tuplesortstate *state, SortTuple *stup,
}
/*
+ * Routines specialized for GIN case
+ */
+
+static void
+removeabbrev_index_gin(Tuplesortstate *state, SortTuple *stups, int count)
+{
+ Assert(false);
+ elog(ERROR, "removeabbrev_index_gin not implemented");
+}
+
+static int
+comparetup_index_gin(const SortTuple *a, const SortTuple *b,
+ Tuplesortstate *state)
+{
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+
+ Assert(!TuplesortstateGetPublic(state)->haveDatum1);
+
+ return _gin_compare_tuples((GinTuple *) a->tuple,
+ (GinTuple *) b->tuple,
+ base->sortKeys);
+}
+
+static void
+writetup_index_gin(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
+{
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ GinTuple *tuple = (GinTuple *) stup->tuple;
+ unsigned int tuplen = tuple->tuplen;
+
+ tuplen = tuplen + sizeof(tuplen);
+ LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
+ LogicalTapeWrite(tape, tuple, tuple->tuplen);
+ if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
+ LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
+}
+
+static void
+readtup_index_gin(Tuplesortstate *state, SortTuple *stup,
+ LogicalTape *tape, unsigned int len)
+{
+ GinTuple *tuple;
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ unsigned int tuplen = len - sizeof(unsigned int);
+
+ /*
+ * Allocate space for the GIN sort tuple, which already has the proper
+ * length included in the header.
+ */
+ tuple = (GinTuple *) tuplesort_readtup_alloc(state, tuplen);
+
+ tuple->tuplen = tuplen;
+
+ LogicalTapeReadExact(tape, tuple, tuplen);
+ if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
+ LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
+ stup->tuple = (void *) tuple;
+
+ /* no abbreviations (FIXME maybe use attrnum for this?) */
+ stup->datum1 = (Datum) 0;
+}
+
+/*
* Routines specialized for DatumTuple case
*/