summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomas Vondra <tomas.vondra@postgresql.org>2023-12-30 22:50:54 +0100
committerTomas Vondra <tomas.vondra@postgresql.org>2023-12-30 23:15:04 +0100
commit6c63bcbf3ccf555f02a8fd8b2acaf01a6e94cb97 (patch)
treec7bfb309c0aa09e27ba008b3c01e8bea1ef9c249
parent5632d6e18a8afd586a99f680eddddc38b4ead769 (diff)
Minor cleanup of the BRIN parallel build code
Commit b437571714 added support for parallel builds for BRIN indexes, using code similar to BTREE parallel builds, and also a new tuplesort variant. This commit simplifies the new code in two ways: * The "spool" grouping tuplesort and the heap/index is not necessary. The heap/index are available as separate arguments, causing confusion. So remove the spool, and use the tuplesort directly. * The new tuplesort variant does not need the heap/index, as it sorts simply by the range block number, without accessing the tuple data. So simplify that too. Initial report and patch by Ranier Vilela, further cleanup by me. Author: Ranier Vilela Discussion: https://postgr.es/m/CAEudQAqD7f2i4iyEaAz-5o-bf6zXVX-AkNUBm-YjUXEemaEh6A%40mail.gmail.com
-rw-r--r--src/backend/access/brin/brin.c98
-rw-r--r--src/backend/utils/sort/tuplesortvariants.c26
-rw-r--r--src/include/utils/tuplesort.h4
-rw-r--r--src/tools/pgindent/typedefs.list1
4 files changed, 39 insertions, 90 deletions
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index 23f081389b2..5e662a4d4ee 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -51,16 +51,6 @@
#define PARALLEL_KEY_BUFFER_USAGE UINT64CONST(0xB000000000000005)
/*
- * Status record for spooling/sorting phase.
- */
-typedef struct BrinSpool
-{
- Tuplesortstate *sortstate; /* state data for tuplesort.c */
- Relation heap;
- Relation index;
-} BrinSpool;
-
-/*
* Status for index builds performed in parallel. This is allocated in a
* dynamic shared memory segment.
*/
@@ -183,7 +173,13 @@ typedef struct BrinBuildState
*/
BrinLeader *bs_leader;
int bs_worker_id;
- BrinSpool *bs_spool;
+
+ /*
+ * The sortstate is used by workers (including the leader). It has to be
+ * part of the build state, because that's the only thing passed to the
+ * build callback etc.
+ */
+ Tuplesortstate *bs_sortstate;
} BrinBuildState;
/*
@@ -231,12 +227,11 @@ static void brin_fill_empty_ranges(BrinBuildState *state,
/* parallel index builds */
static void _brin_begin_parallel(BrinBuildState *buildstate, Relation heap, Relation index,
bool isconcurrent, int request);
-static void _brin_end_parallel(BrinLeader *btleader, BrinBuildState *state);
+static void _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state);
static Size _brin_parallel_estimate_shared(Relation heap, Snapshot snapshot);
static void _brin_leader_participate_as_worker(BrinBuildState *buildstate,
Relation heap, Relation index);
static void _brin_parallel_scan_and_build(BrinBuildState *buildstate,
- BrinSpool *brinspool,
BrinShared *brinshared,
Sharedsort *sharedsort,
Relation heap, Relation index,
@@ -1143,10 +1138,6 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
state = initialize_brin_buildstate(index, revmap, pagesPerRange,
RelationGetNumberOfBlocks(heap));
- state->bs_spool = (BrinSpool *) palloc0(sizeof(BrinSpool));
- state->bs_spool->heap = heap;
- state->bs_spool->index = index;
-
/*
* Attempt to launch parallel worker scan when required
*
@@ -1160,11 +1151,13 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
indexInfo->ii_ParallelWorkers);
/*
- * Now scan the relation. No syncscan allowed here because we want the
- * heap blocks in physical order.
- *
* If parallel build requested and at least one worker process was
- * successfully launched, set up coordination state
+ * successfully launched, set up coordination state, wait for workers to
+ * complete. Then read all tuples from the shared tuplesort and insert
+ * them into the index.
+ *
+ * In serial mode, simply scan the table and build the index one index
+ * tuple at a time.
*/
if (state->bs_leader)
{
@@ -1176,9 +1169,8 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
state->bs_leader->nparticipanttuplesorts;
coordinate->sharedsort = state->bs_leader->sharedsort;
-
/*
- * Begin serial/leader tuplesort.
+ * Begin leader tuplesort.
*
* In cases where parallelism is involved, the leader receives the
* same share of maintenance_work_mem as a serial sort (it is
@@ -1199,19 +1191,18 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
* INDEX operation, regardless of the use of parallelism or any other
* factor.
*/
- state->bs_spool->sortstate =
- tuplesort_begin_index_brin(heap, index,
- maintenance_work_mem, coordinate,
+ state->bs_sortstate =
+ tuplesort_begin_index_brin(maintenance_work_mem, coordinate,
TUPLESORT_NONE);
- /*
- * In parallel mode, wait for workers to complete, and then read all
- * tuples from the shared tuplesort and insert them into the index.
- */
_brin_end_parallel(state->bs_leader, state);
}
else /* no parallel index build */
{
+ /*
+ * Now scan the relation. No syncscan allowed here because we want
+ * the heap blocks in physical order.
+ */
reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
brinbuildCallback, (void *) state, NULL);
@@ -1671,7 +1662,7 @@ initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap,
state->bs_dtuple = brin_new_memtuple(state->bs_bdesc);
state->bs_leader = NULL;
state->bs_worker_id = 0;
- state->bs_spool = NULL;
+ state->bs_sortstate = NULL;
state->bs_context = CurrentMemoryContext;
state->bs_emptyTuple = NULL;
state->bs_emptyTupleLen = 0;
@@ -2002,7 +1993,7 @@ form_and_spill_tuple(BrinBuildState *state)
state->bs_dtuple, &size);
/* write the BRIN tuple to the tuplesort */
- tuplesort_putbrintuple(state->bs_spool->sortstate, tup, size);
+ tuplesort_putbrintuple(state->bs_sortstate, tup, size);
state->bs_numtuples++;
@@ -2522,7 +2513,6 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
Size tuplen;
BrinShared *brinshared = brinleader->brinshared;
BlockNumber prevblkno = InvalidBlockNumber;
- BrinSpool *spool;
MemoryContext rangeCxt,
oldCxt;
@@ -2541,8 +2531,7 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
state->bs_numtuples = brinshared->indtuples;
/* do the actual sort in the leader */
- spool = state->bs_spool;
- tuplesort_performsort(spool->sortstate);
+ tuplesort_performsort(state->bs_sortstate);
/*
* Initialize BrinMemTuple we'll use to union summaries from workers (in
@@ -2568,7 +2557,7 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
* That probably gives us an index that is cheaper to scan, thanks to
* mostly getting data from the same index page as before.
*/
- while ((btup = tuplesort_getbrintuple(spool->sortstate, &tuplen, true)) != NULL)
+ while ((btup = tuplesort_getbrintuple(state->bs_sortstate, &tuplen, true)) != NULL)
{
/* Ranges should be multiples of pages_per_range for the index. */
Assert(btup->bt_blkno % brinshared->pagesPerRange == 0);
@@ -2640,7 +2629,7 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
prevblkno = btup->bt_blkno;
}
- tuplesort_end(spool->sortstate);
+ tuplesort_end(state->bs_sortstate);
/* Fill the BRIN tuple for the last page range with data. */
if (prevblkno != InvalidBlockNumber)
@@ -2704,11 +2693,6 @@ _brin_leader_participate_as_worker(BrinBuildState *buildstate, Relation heap, Re
BrinLeader *brinleader = buildstate->bs_leader;
int sortmem;
- /* Allocate memory and initialize private spool */
- buildstate->bs_spool = (BrinSpool *) palloc0(sizeof(BrinSpool));
- buildstate->bs_spool->heap = buildstate->bs_spool->heap;
- buildstate->bs_spool->index = buildstate->bs_spool->index;
-
/*
* Might as well use reliable figure when doling out maintenance_work_mem
* (when requested number of workers were not launched, this will be
@@ -2717,16 +2701,14 @@ _brin_leader_participate_as_worker(BrinBuildState *buildstate, Relation heap, Re
sortmem = maintenance_work_mem / brinleader->nparticipanttuplesorts;
/* Perform work common to all participants */
- _brin_parallel_scan_and_build(buildstate, buildstate->bs_spool, brinleader->brinshared,
+ _brin_parallel_scan_and_build(buildstate, brinleader->brinshared,
brinleader->sharedsort, heap, index, sortmem, true);
}
/*
* Perform a worker's portion of a parallel sort.
*
- * This generates a tuplesort for passed btspool, and a second tuplesort
- * state if a second btspool is need (i.e. for unique index builds). All
- * other spool fields should already be set when this is called.
+ * This generates a tuplesort for the worker portion of the table.
*
* sortmem is the amount of working memory to use within each worker,
* expressed in KBs.
@@ -2734,10 +2716,10 @@ _brin_leader_participate_as_worker(BrinBuildState *buildstate, Relation heap, Re
* When this returns, workers are done, and need only release resources.
*/
static void
-_brin_parallel_scan_and_build(BrinBuildState *state, BrinSpool *brinspool,
+_brin_parallel_scan_and_build(BrinBuildState *state,
BrinShared *brinshared, Sharedsort *sharedsort,
- Relation heap, Relation index, int sortmem,
- bool progress)
+ Relation heap, Relation index,
+ int sortmem, bool progress)
{
SortCoordinate coordinate;
TableScanDesc scan;
@@ -2751,10 +2733,8 @@ _brin_parallel_scan_and_build(BrinBuildState *state, BrinSpool *brinspool,
coordinate->sharedsort = sharedsort;
/* Begin "partial" tuplesort */
- brinspool->sortstate = tuplesort_begin_index_brin(brinspool->heap,
- brinspool->index,
- sortmem, coordinate,
- TUPLESORT_NONE);
+ state->bs_sortstate = tuplesort_begin_index_brin(sortmem, coordinate,
+ TUPLESORT_NONE);
/* Join parallel scan */
indexInfo = BuildIndexInfo(index);
@@ -2770,7 +2750,7 @@ _brin_parallel_scan_and_build(BrinBuildState *state, BrinSpool *brinspool,
form_and_spill_tuple(state);
/* sort the BRIN ranges built by this worker */
- tuplesort_performsort(brinspool->sortstate);
+ tuplesort_performsort(state->bs_sortstate);
state->bs_reltuples += reltuples;
@@ -2786,7 +2766,7 @@ _brin_parallel_scan_and_build(BrinBuildState *state, BrinSpool *brinspool,
/* Notify leader */
ConditionVariableSignal(&brinshared->workersdonecv);
- tuplesort_end(brinspool->sortstate);
+ tuplesort_end(state->bs_sortstate);
}
/*
@@ -2844,11 +2824,6 @@ _brin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
brinshared->pagesPerRange,
InvalidBlockNumber);
- /* Initialize worker's own spool */
- buildstate->bs_spool = (BrinSpool *) palloc0(sizeof(BrinSpool));
- buildstate->bs_spool->heap = heapRel;
- buildstate->bs_spool->index = indexRel;
-
/* Look up shared state private to tuplesort.c */
sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
tuplesort_attach_shared(sharedsort, seg);
@@ -2863,8 +2838,7 @@ _brin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
*/
sortmem = maintenance_work_mem / brinshared->scantuplesortstates;
- _brin_parallel_scan_and_build(buildstate, buildstate->bs_spool,
- brinshared, sharedsort,
+ _brin_parallel_scan_and_build(buildstate, brinshared, sharedsort,
heapRel, indexRel, sortmem, false);
/* Report WAL/buffer usage during parallel execution */
diff --git a/src/backend/utils/sort/tuplesortvariants.c b/src/backend/utils/sort/tuplesortvariants.c
index 90fc605f1ca..27425880a5f 100644
--- a/src/backend/utils/sort/tuplesortvariants.c
+++ b/src/backend/utils/sort/tuplesortvariants.c
@@ -138,16 +138,6 @@ typedef struct
} TuplesortIndexHashArg;
/*
- * Data struture pointed by "TuplesortPublic.arg" for the index_brin subcase.
- */
-typedef struct
-{
- TuplesortIndexArg index;
-
- /* XXX do we need something here? */
-} TuplesortIndexBrinArg;
-
-/*
* Data struture pointed by "TuplesortPublic.arg" for the Datum case.
* Set by tuplesort_begin_datum and used only by the DatumTuple routines.
*/
@@ -562,20 +552,13 @@ tuplesort_begin_index_gist(Relation heapRel,
}
Tuplesortstate *
-tuplesort_begin_index_brin(Relation heapRel,
- Relation indexRel,
- int workMem,
+tuplesort_begin_index_brin(int workMem,
SortCoordinate coordinate,
int sortopt)
{
Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
sortopt);
TuplesortPublic *base = TuplesortstateGetPublic(state);
- MemoryContext oldcontext;
- TuplesortIndexBrinArg *arg;
-
- oldcontext = MemoryContextSwitchTo(base->maincontext);
- arg = (TuplesortIndexBrinArg *) palloc(sizeof(TuplesortIndexBrinArg));
#ifdef TRACE_SORT
if (trace_sort)
@@ -592,12 +575,7 @@ tuplesort_begin_index_brin(Relation heapRel,
base->writetup = writetup_index_brin;
base->readtup = readtup_index_brin;
base->haveDatum1 = true;
- base->arg = arg;
-
- arg->index.heapRel = heapRel;
- arg->index.indexRel = indexRel;
-
- MemoryContextSwitchTo(oldcontext);
+ base->arg = NULL;
return state;
}
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h
index 357eb35311d..103ced4005f 100644
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -430,9 +430,7 @@ extern Tuplesortstate *tuplesort_begin_index_gist(Relation heapRel,
Relation indexRel,
int workMem, SortCoordinate coordinate,
int sortopt);
-extern Tuplesortstate *tuplesort_begin_index_brin(Relation heapRel,
- Relation indexRel,
- int workMem, SortCoordinate coordinate,
+extern Tuplesortstate *tuplesort_begin_index_brin(int workMem, SortCoordinate coordinate,
int sortopt);
extern Tuplesortstate *tuplesort_begin_datum(Oid datumType,
Oid sortOperator, Oid sortCollation,
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index e37ef9aa76d..3310726cdd6 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -307,7 +307,6 @@ BrinRevmap
BrinShared
BrinSortTuple
BrinSpecialSpace
-BrinSpool
BrinStatsData
BrinTuple
BrinValues