7 files changed, 262 insertions, 56 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 4b0c49f4bb0..0a820bab87a 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -258,7 +258,9 @@ heap_scan_stream_read_next_parallel(ReadStream *stream,
 		/* parallel scan */
 		table_block_parallelscan_startblock_init(scan->rs_base.rs_rd,
 												 scan->rs_parallelworkerdata,
-												 (ParallelBlockTableScanDesc) scan->rs_base.rs_parallel);
+												 (ParallelBlockTableScanDesc) scan->rs_base.rs_parallel,
+												 scan->rs_startblock,
+												 scan->rs_numblocks);
 
 		/* may return InvalidBlockNumber if there are no more blocks */
 		scan->rs_prefetch_block = table_block_parallelscan_nextpage(scan->rs_base.rs_rd,
diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c
index 5e41404937e..1e099febdc8 100644
--- a/src/backend/access/table/tableam.c
+++ b/src/backend/access/table/tableam.c
@@ -188,6 +188,37 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan)
 											pscan, flags);
 }
 
+TableScanDesc
+table_beginscan_parallel_tidrange(Relation relation,
+								  ParallelTableScanDesc pscan)
+{
+	Snapshot	snapshot;
+	uint32		flags = SO_TYPE_TIDRANGESCAN | SO_ALLOW_PAGEMODE;
+	TableScanDesc sscan;
+
+	Assert(RelFileLocatorEquals(relation->rd_locator, pscan->phs_locator));
+
+	/* disable syncscan in parallel tid range scan. */
+	pscan->phs_syncscan = false;
+
+	if (!pscan->phs_snapshot_any)
+	{
+		/* Snapshot was serialized -- restore it */
+		snapshot = RestoreSnapshot((char *) pscan + pscan->phs_snapshot_off);
+		RegisterSnapshot(snapshot);
+		flags |= SO_TEMP_SNAPSHOT;
+	}
+	else
+	{
+		/* SnapshotAny passed by caller (not serialized) */
+		snapshot = SnapshotAny;
+	}
+
+	sscan = relation->rd_tableam->scan_begin(relation, snapshot, 0, NULL,
+											 pscan, flags);
+	return sscan;
+}
+
 
 /* ----------------------------------------------------------------------------
  * Index scan related functions.
@@ -398,6 +429,7 @@ table_block_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan)
 		bpscan->phs_nblocks > NBuffers / 4;
 	SpinLockInit(&bpscan->phs_mutex);
 	bpscan->phs_startblock = InvalidBlockNumber;
+	bpscan->phs_numblock = InvalidBlockNumber;
 	pg_atomic_init_u64(&bpscan->phs_nallocated, 0);
 
 	return sizeof(ParallelBlockTableScanDescData);
@@ -416,14 +448,22 @@ table_block_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
  *
  * Determine where the parallel seq scan should start.  This function may be
  * called many times, once by each parallel worker.  We must be careful only
- * to set the startblock once.
+ * to set the phs_startblock and phs_numblock fields once.
+ *
+ * Callers may optionally specify a non-InvalidBlockNumber value for
+ * 'startblock' to force the scan to start at the given page.  Likewise,
+ * 'numblocks' can be specified as a non-InvalidBlockNumber to limit the
+ * number of blocks to scan to that many blocks.
  */
 void
 table_block_parallelscan_startblock_init(Relation rel,
 										 ParallelBlockTableScanWorker pbscanwork,
-										 ParallelBlockTableScanDesc pbscan)
+										 ParallelBlockTableScanDesc pbscan,
+										 BlockNumber startblock,
+										 BlockNumber numblocks)
 {
 	BlockNumber sync_startpage = InvalidBlockNumber;
+	BlockNumber scan_nblocks;
 
 	/* Reset the state we use for controlling allocation size. */
 	memset(pbscanwork, 0, sizeof(*pbscanwork));
@@ -431,42 +471,36 @@ table_block_parallelscan_startblock_init(Relation rel,
 	StaticAssertStmt(MaxBlockNumber <= 0xFFFFFFFE,
 					 "pg_nextpower2_32 may be too small for non-standard BlockNumber width");
 
-	/*
-	 * We determine the chunk size based on the size of the relation. First we
-	 * split the relation into PARALLEL_SEQSCAN_NCHUNKS chunks but we then
-	 * take the next highest power of 2 number of the chunk size.  This means
-	 * we split the relation into somewhere between PARALLEL_SEQSCAN_NCHUNKS
-	 * and PARALLEL_SEQSCAN_NCHUNKS / 2 chunks.
-	 */
-	pbscanwork->phsw_chunk_size = pg_nextpower2_32(Max(pbscan->phs_nblocks /
-													   PARALLEL_SEQSCAN_NCHUNKS, 1));
-
-	/*
-	 * Ensure we don't go over the maximum chunk size with larger tables. This
-	 * means we may get much more than PARALLEL_SEQSCAN_NCHUNKS for larger
-	 * tables.  Too large a chunk size has been shown to be detrimental to
-	 * synchronous scan performance.
-	 */
-	pbscanwork->phsw_chunk_size = Min(pbscanwork->phsw_chunk_size,
-									  PARALLEL_SEQSCAN_MAX_CHUNK_SIZE);
-
 retry:
 	/* Grab the spinlock. */
 	SpinLockAcquire(&pbscan->phs_mutex);
 
 	/*
-	 * If the scan's startblock has not yet been initialized, we must do so
-	 * now.  If this is not a synchronized scan, we just start at block 0, but
-	 * if it is a synchronized scan, we must get the starting position from
-	 * the synchronized scan machinery.  We can't hold the spinlock while
-	 * doing that, though, so release the spinlock, get the information we
-	 * need, and retry.  If nobody else has initialized the scan in the
-	 * meantime, we'll fill in the value we fetched on the second time
-	 * through.
+	 * When the caller specified a limit on the number of blocks to scan, set
+	 * that in the ParallelBlockTableScanDesc, if it's not been done by
+	 * another worker already.
+	 */
+	if (numblocks != InvalidBlockNumber &&
+		pbscan->phs_numblock == InvalidBlockNumber)
+	{
+		pbscan->phs_numblock = numblocks;
+	}
+
+	/*
+	 * If the scan's phs_startblock has not yet been initialized, we must do
+	 * so now.  If a startblock was specified, start there, otherwise if this
+	 * is not a synchronized scan, we just start at block 0, but if it is a
+	 * synchronized scan, we must get the starting position from the
+	 * synchronized scan machinery.  We can't hold the spinlock while doing
+	 * that, though, so release the spinlock, get the information we need, and
+	 * retry.  If nobody else has initialized the scan in the meantime, we'll
+	 * fill in the value we fetched on the second time through.
 	 */
 	if (pbscan->phs_startblock == InvalidBlockNumber)
 	{
-		if (!pbscan->base.phs_syncscan)
+		if (startblock != InvalidBlockNumber)
+			pbscan->phs_startblock = startblock;
+		else if (!pbscan->base.phs_syncscan)
 			pbscan->phs_startblock = 0;
 		else if (sync_startpage != InvalidBlockNumber)
 			pbscan->phs_startblock = sync_startpage;
@@ -478,6 +512,34 @@ retry:
 		}
 	}
 	SpinLockRelease(&pbscan->phs_mutex);
+
+	/*
+	 * Figure out how many blocks we're going to scan; either all of them, or
+	 * just phs_numblock's worth, if a limit has been imposed.
+	 */
+	if (pbscan->phs_numblock == InvalidBlockNumber)
+		scan_nblocks = pbscan->phs_nblocks;
+	else
+		scan_nblocks = pbscan->phs_numblock;
+
+	/*
+	 * We determine the chunk size based on scan_nblocks.  First we split
+	 * scan_nblocks into PARALLEL_SEQSCAN_NCHUNKS chunks then we calculate the
+	 * next highest power of 2 number of the result.  This means we split the
+	 * blocks we're scanning into somewhere between PARALLEL_SEQSCAN_NCHUNKS
+	 * and PARALLEL_SEQSCAN_NCHUNKS / 2 chunks.
+	 */
+	pbscanwork->phsw_chunk_size = pg_nextpower2_32(Max(scan_nblocks /
+													   PARALLEL_SEQSCAN_NCHUNKS, 1));
+
+	/*
+	 * Ensure we don't go over the maximum chunk size with larger tables. This
+	 * means we may get much more than PARALLEL_SEQSCAN_NCHUNKS for larger
+	 * tables.  Too large a chunk size has been shown to be detrimental to
+	 * sequential scan performance.
+	 */
+	pbscanwork->phsw_chunk_size = Min(pbscanwork->phsw_chunk_size,
+									  PARALLEL_SEQSCAN_MAX_CHUNK_SIZE);
 }
 
 /*
@@ -493,6 +555,7 @@ table_block_parallelscan_nextpage(Relation rel,
 								  ParallelBlockTableScanWorker pbscanwork,
 								  ParallelBlockTableScanDesc pbscan)
 {
+	BlockNumber scan_nblocks;
 	BlockNumber page;
 	uint64		nallocated;
 
@@ -513,7 +576,7 @@ table_block_parallelscan_nextpage(Relation rel,
 	 *
 	 * Here we name these ranges of blocks "chunks".  The initial size of
 	 * these chunks is determined in table_block_parallelscan_startblock_init
-	 * based on the size of the relation.  Towards the end of the scan, we
+	 * based on the number of blocks to scan.  Towards the end of the scan, we
 	 * start making reductions in the size of the chunks in order to attempt
 	 * to divide the remaining work over all the workers as evenly as
 	 * possible.
@@ -530,17 +593,23 @@ table_block_parallelscan_nextpage(Relation rel,
 	 * phs_nallocated counter will exceed rs_nblocks, because workers will
 	 * still increment the value, when they try to allocate the next block but
 	 * all blocks have been allocated already. The counter must be 64 bits
-	 * wide because of that, to avoid wrapping around when rs_nblocks is close
-	 * to 2^32.
+	 * wide because of that, to avoid wrapping around when scan_nblocks is
+	 * close to 2^32.
 	 *
 	 * The actual block to return is calculated by adding the counter to the
-	 * starting block number, modulo nblocks.
+	 * starting block number, modulo phs_nblocks.
 	 */
 
+	/* First, figure out how many blocks we're planning on scanning */
+	if (pbscan->phs_numblock == InvalidBlockNumber)
+		scan_nblocks = pbscan->phs_nblocks;
+	else
+		scan_nblocks = pbscan->phs_numblock;
+
 	/*
-	 * First check if we have any remaining blocks in a previous chunk for
-	 * this worker.  We must consume all of the blocks from that before we
-	 * allocate a new chunk to the worker.
+	 * Now check if we have any remaining blocks in a previous chunk for this
+	 * worker.  We must consume all of the blocks from that before we allocate
+	 * a new chunk to the worker.
 	 */
 	if (pbscanwork->phsw_chunk_remaining > 0)
 	{
@@ -562,7 +631,7 @@ table_block_parallelscan_nextpage(Relation rel,
 		 * chunk size set to 1.
 		 */
 		if (pbscanwork->phsw_chunk_size > 1 &&
-			pbscanwork->phsw_nallocated > pbscan->phs_nblocks -
+			pbscanwork->phsw_nallocated > scan_nblocks -
 			(pbscanwork->phsw_chunk_size * PARALLEL_SEQSCAN_RAMPDOWN_CHUNKS))
 			pbscanwork->phsw_chunk_size >>= 1;
 
@@ -577,7 +646,8 @@ table_block_parallelscan_nextpage(Relation rel,
 		pbscanwork->phsw_chunk_remaining = pbscanwork->phsw_chunk_size - 1;
 	}
 
-	if (nallocated >= pbscan->phs_nblocks)
+	/* Check if we've run out of blocks to scan */
+	if (nallocated >= scan_nblocks)
 		page = InvalidBlockNumber;	/* all blocks have been allocated */
 	else
 		page = (nallocated + pbscan->phs_startblock) % pbscan->phs_nblocks;
diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c
index f098a5557cf..0125464d942 100644
--- a/src/backend/executor/execParallel.c
+++ b/src/backend/executor/execParallel.c
@@ -40,6 +40,7 @@
 #include "executor/nodeSeqscan.h"
 #include "executor/nodeSort.h"
 #include "executor/nodeSubplan.h"
+#include "executor/nodeTidrangescan.h"
 #include "executor/tqueue.h"
 #include "jit/jit.h"
 #include "nodes/nodeFuncs.h"
@@ -266,6 +267,11 @@ ExecParallelEstimate(PlanState *planstate, ExecParallelEstimateContext *e)
 				ExecForeignScanEstimate((ForeignScanState *) planstate,
 										e->pcxt);
 			break;
+		case T_TidRangeScanState:
+			if (planstate->plan->parallel_aware)
+				ExecTidRangeScanEstimate((TidRangeScanState *) planstate,
+										 e->pcxt);
+			break;
 		case T_AppendState:
 			if (planstate->plan->parallel_aware)
 				ExecAppendEstimate((AppendState *) planstate,
@@ -493,6 +499,11 @@ ExecParallelInitializeDSM(PlanState *planstate,
 				ExecForeignScanInitializeDSM((ForeignScanState *) planstate,
 											 d->pcxt);
 			break;
+		case T_TidRangeScanState:
+			if (planstate->plan->parallel_aware)
+				ExecTidRangeScanInitializeDSM((TidRangeScanState *) planstate,
+											  d->pcxt);
+			break;
 		case T_AppendState:
 			if (planstate->plan->parallel_aware)
 				ExecAppendInitializeDSM((AppendState *) planstate,
@@ -994,6 +1005,11 @@ ExecParallelReInitializeDSM(PlanState *planstate,
 				ExecForeignScanReInitializeDSM((ForeignScanState *) planstate,
 											   pcxt);
 			break;
+		case T_TidRangeScanState:
+			if (planstate->plan->parallel_aware)
+				ExecTidRangeScanReInitializeDSM((TidRangeScanState *) planstate,
+												pcxt);
+			break;
 		case T_AppendState:
 			if (planstate->plan->parallel_aware)
 				ExecAppendReInitializeDSM((AppendState *) planstate, pcxt);
@@ -1362,6 +1378,11 @@ ExecParallelInitializeWorker(PlanState *planstate, ParallelWorkerContext *pwcxt)
 				ExecForeignScanInitializeWorker((ForeignScanState *) planstate,
 												pwcxt);
 			break;
+		case T_TidRangeScanState:
+			if (planstate->plan->parallel_aware)
+				ExecTidRangeScanInitializeWorker((TidRangeScanState *) planstate,
+												 pwcxt);
+			break;
 		case T_AppendState:
 			if (planstate->plan->parallel_aware)
 				ExecAppendInitializeWorker((AppendState *) planstate, pwcxt);
diff --git a/src/backend/executor/nodeTidrangescan.c b/src/backend/executor/nodeTidrangescan.c
index 1bce8d6cbfe..6fd9f68cddd 100644
--- a/src/backend/executor/nodeTidrangescan.c
+++ b/src/backend/executor/nodeTidrangescan.c
@@ -415,3 +415,83 @@ ExecInitTidRangeScan(TidRangeScan *node, EState *estate, int eflags)
 	 */
 	return tidrangestate;
 }
+
+/* ----------------------------------------------------------------
+ *						Parallel Scan Support
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------------------------------------------------------
+ *		ExecTidRangeScanEstimate
+ *
+ *		Compute the amount of space we'll need in the parallel
+ *		query DSM, and inform pcxt->estimator about our needs.
+ * ----------------------------------------------------------------
+ */
+void
+ExecTidRangeScanEstimate(TidRangeScanState *node, ParallelContext *pcxt)
+{
+	EState	   *estate = node->ss.ps.state;
+
+	node->trss_pscanlen =
+		table_parallelscan_estimate(node->ss.ss_currentRelation,
+									estate->es_snapshot);
+	shm_toc_estimate_chunk(&pcxt->estimator, node->trss_pscanlen);
+	shm_toc_estimate_keys(&pcxt->estimator, 1);
+}
+
+/* ----------------------------------------------------------------
+ *		ExecTidRangeScanInitializeDSM
+ *
+ *		Set up a parallel TID range scan descriptor.
+ * ----------------------------------------------------------------
+ */
+void
+ExecTidRangeScanInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt)
+{
+	EState	   *estate = node->ss.ps.state;
+	ParallelTableScanDesc pscan;
+
+	pscan = shm_toc_allocate(pcxt->toc, node->trss_pscanlen);
+	table_parallelscan_initialize(node->ss.ss_currentRelation,
+								  pscan,
+								  estate->es_snapshot);
+	shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan);
+	node->ss.ss_currentScanDesc =
+		table_beginscan_parallel_tidrange(node->ss.ss_currentRelation,
+										  pscan);
+}
+
+/* ----------------------------------------------------------------
+ *		ExecTidRangeScanReInitializeDSM
+ *
+ *		Reset shared state before beginning a fresh scan.
+ * ----------------------------------------------------------------
+ */
+void
+ExecTidRangeScanReInitializeDSM(TidRangeScanState *node,
+								ParallelContext *pcxt)
+{
+	ParallelTableScanDesc pscan;
+
+	pscan = node->ss.ss_currentScanDesc->rs_parallel;
+	table_parallelscan_reinitialize(node->ss.ss_currentRelation, pscan);
+}
+
+/* ----------------------------------------------------------------
+ *		ExecTidRangeScanInitializeWorker
+ *
+ *		Copy relevant information from TOC into planstate.
+ * ----------------------------------------------------------------
+ */
+void
+ExecTidRangeScanInitializeWorker(TidRangeScanState *node,
+								 ParallelWorkerContext *pwcxt)
+{
+	ParallelTableScanDesc pscan;
+
+	pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
+	node->ss.ss_currentScanDesc =
+		table_beginscan_parallel_tidrange(node->ss.ss_currentRelation,
+										  pscan);
+}
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 8335cf5b5c5..5a7283bd2f5 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1340,8 +1340,9 @@ cost_tidrangescan(Path *path, PlannerInfo *root,
 {
 	Selectivity selectivity;
 	double		pages;
-	Cost		startup_cost = 0;
-	Cost		run_cost = 0;
+	Cost		startup_cost;
+	Cost		cpu_run_cost;
+	Cost		disk_run_cost;
 	QualCost	qpqual_cost;
 	Cost		cpu_per_tuple;
 	QualCost	tid_qual_cost;
@@ -1373,8 +1374,8 @@ cost_tidrangescan(Path *path, PlannerInfo *root,
 	 * page is just a normal sequential page read. NOTE: it's desirable for
 	 * TID Range Scans to cost more than the equivalent Sequential Scans,
 	 * because Seq Scans have some performance advantages such as scan
-	 * synchronization and parallelizability, and we'd prefer one of them to
-	 * be picked unless a TID Range Scan really is better.
+	 * synchronization, and we'd prefer one of them to be picked unless a TID
+	 * Range Scan really is better.
 	 */
 	ntuples = selectivity * baserel->tuples;
 	nseqpages = pages - 1.0;
@@ -1391,7 +1392,7 @@ cost_tidrangescan(Path *path, PlannerInfo *root,
 							  &spc_seq_page_cost);
 
 	/* disk costs; 1 random page and the remainder as seq pages */
-	run_cost += spc_random_page_cost + spc_seq_page_cost * nseqpages;
+	disk_run_cost = spc_random_page_cost + spc_seq_page_cost * nseqpages;
 
 	/* Add scanning CPU costs */
 	get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
@@ -1403,20 +1404,35 @@ cost_tidrangescan(Path *path, PlannerInfo *root,
 	 * can't be removed, this is a mistake and we're going to underestimate
 	 * the CPU cost a bit.)
 	 */
-	startup_cost += qpqual_cost.startup + tid_qual_cost.per_tuple;
+	startup_cost = qpqual_cost.startup + tid_qual_cost.per_tuple;
 	cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple -
 		tid_qual_cost.per_tuple;
-	run_cost += cpu_per_tuple * ntuples;
+	cpu_run_cost = cpu_per_tuple * ntuples;
 
 	/* tlist eval costs are paid per output row, not per tuple scanned */
 	startup_cost += path->pathtarget->cost.startup;
-	run_cost += path->pathtarget->cost.per_tuple * path->rows;
+	cpu_run_cost += path->pathtarget->cost.per_tuple * path->rows;
+
+	/* Adjust costing for parallelism, if used. */
+	if (path->parallel_workers > 0)
+	{
+		double		parallel_divisor = get_parallel_divisor(path);
+
+		/* The CPU cost is divided among all the workers. */
+		cpu_run_cost /= parallel_divisor;
+
+		/*
+		 * In the case of a parallel plan, the row count needs to represent
+		 * the number of tuples processed per worker.
+		 */
+		path->rows = clamp_row_est(path->rows / parallel_divisor);
+	}
 
 	/* we should not generate this path type when enable_tidscan=false */
 	Assert(enable_tidscan);
 	path->disabled_nodes = 0;
 	path->startup_cost = startup_cost;
-	path->total_cost = startup_cost + run_cost;
+	path->total_cost = startup_cost + cpu_run_cost + disk_run_cost;
 }
 
 /*
diff --git a/src/backend/optimizer/path/tidpath.c b/src/backend/optimizer/path/tidpath.c
index 2bfb338b81c..3ddbc10bbdf 100644
--- a/src/backend/optimizer/path/tidpath.c
+++ b/src/backend/optimizer/path/tidpath.c
@@ -490,9 +490,8 @@ ec_member_matches_ctid(PlannerInfo *root, RelOptInfo *rel,
 
 /*
  * create_tidscan_paths
- *	  Create paths corresponding to direct TID scans of the given rel.
- *
- *	  Candidate paths are added to the rel's pathlist (using add_path).
+ *	  Create paths corresponding to direct TID scans of the given rel and add
+ *	  them to the corresponding path list via add_path or add_partial_path.
  */
 bool
 create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel)
@@ -553,7 +552,24 @@ create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel)
 
 		add_path(rel, (Path *) create_tidrangescan_path(root, rel,
 														tidrangequals,
-														required_outer));
+														required_outer,
+														0));
+
+		/* If appropriate, consider parallel tid range scan. */
+		if (rel->consider_parallel && required_outer == NULL)
+		{
+			int			parallel_workers;
+
+			parallel_workers = compute_parallel_worker(rel, rel->pages, -1,
+													   max_parallel_workers_per_gather);
+
+			if (parallel_workers > 0)
+				add_partial_path(rel, (Path *) create_tidrangescan_path(root,
+																		rel,
+																		tidrangequals,
+																		required_outer,
+																		parallel_workers));
+		}
 	}
 
 	/*
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index e4fd6950fad..fd4bd5f93f0 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1262,7 +1262,8 @@ create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals,
  */
 TidRangePath *
 create_tidrangescan_path(PlannerInfo *root, RelOptInfo *rel,
-						 List *tidrangequals, Relids required_outer)
+						 List *tidrangequals, Relids required_outer,
+						 int parallel_workers)
 {
 	TidRangePath *pathnode = makeNode(TidRangePath);
 
@@ -1271,9 +1272,9 @@ create_tidrangescan_path(PlannerInfo *root, RelOptInfo *rel,
 	pathnode->path.pathtarget = rel->reltarget;
 	pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
 														  required_outer);
-	pathnode->path.parallel_aware = false;
+	pathnode->path.parallel_aware = (parallel_workers > 0);
 	pathnode->path.parallel_safe = rel->consider_parallel;
-	pathnode->path.parallel_workers = 0;
+	pathnode->path.parallel_workers = parallel_workers;
 	pathnode->path.pathkeys = NIL;	/* always unordered */
 
 	pathnode->tidrangequals = tidrangequals;