summaryrefslogtreecommitdiff
path: root/src/backend/access/heap/heapam_handler.c
diff options
context:
space:
mode:
authorMelanie Plageman <melanieplageman@gmail.com>2025-03-15 10:34:42 -0400
committerMelanie Plageman <melanieplageman@gmail.com>2025-03-15 10:34:42 -0400
commit2b73a8cd33b745c5b8a7f44322f86642519e3a40 (patch)
tree43c4cb7aa83247b634ff15d4df76a75f16221230 /src/backend/access/heap/heapam_handler.c
parent944e81bf99db2b5b70b8a389d4f273534da73f74 (diff)
BitmapHeapScan uses the read stream API
Make Bitmap Heap Scan use the read stream API instead of invoking ReadBuffer() for each block indicated by the bitmap. The read stream API handles prefetching, so remove all of the explicit prefetching from bitmap heap scan code. Now, heap table AM implements a read stream callback which uses the bitmap iterator to return the next required block to the read stream code. Tomas Vondra conducted extensive regression testing of this feature. Andres Freund, Thomas Munro, and I analyzed regressions and Thomas Munro patched the read stream API. Author: Melanie Plageman <melanieplageman@gmail.com> Reviewed-by: Tomas Vondra <tomas@vondra.me> Tested-by: Tomas Vondra <tomas@vondra.me> Tested-by: Andres Freund <andres@anarazel.de> Tested-by: Thomas Munro <thomas.munro@gmail.com> Tested-by: Nazir Bilal Yavuz <byavuz81@gmail.com> Discussion: https://postgr.es/m/flat/CAAKRu_ZwCwWFeL_H3ia26bP2e7HiKLWt0ZmGXPVwPO6uXq0vaA%40mail.gmail.com
Diffstat (limited to 'src/backend/access/heap/heapam_handler.c')
-rw-r--r--src/backend/access/heap/heapam_handler.c90
1 files changed, 40 insertions, 50 deletions
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 25d26409e2c..3035adacade 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -2117,82 +2117,72 @@ heapam_estimate_rel_size(Relation rel, int32 *attr_widths,
static bool
heapam_scan_bitmap_next_block(TableScanDesc scan,
- BlockNumber *blockno, bool *recheck,
+ bool *recheck,
uint64 *lossy_pages, uint64 *exact_pages)
{
BitmapHeapScanDesc bscan = (BitmapHeapScanDesc) scan;
HeapScanDesc hscan = (HeapScanDesc) bscan;
BlockNumber block;
+ void *per_buffer_data;
Buffer buffer;
Snapshot snapshot;
int ntup;
- TBMIterateResult tbmres;
+ TBMIterateResult *tbmres;
OffsetNumber offsets[TBM_MAX_TUPLES_PER_PAGE];
int noffsets = -1;
Assert(scan->rs_flags & SO_TYPE_BITMAPSCAN);
+ Assert(hscan->rs_read_stream);
hscan->rs_cindex = 0;
hscan->rs_ntuples = 0;
- *blockno = InvalidBlockNumber;
- *recheck = true;
-
- do
+ /* Release buffer containing previous block. */
+ if (BufferIsValid(hscan->rs_cbuf))
{
- CHECK_FOR_INTERRUPTS();
+ ReleaseBuffer(hscan->rs_cbuf);
+ hscan->rs_cbuf = InvalidBuffer;
+ }
- if (!tbm_iterate(&scan->st.rs_tbmiterator, &tbmres))
- return false;
+ hscan->rs_cbuf = read_stream_next_buffer(hscan->rs_read_stream,
+ &per_buffer_data);
- /* Exact pages need their tuple offsets extracted. */
- if (!tbmres.lossy)
- noffsets = tbm_extract_page_tuple(&tbmres, offsets,
- TBM_MAX_TUPLES_PER_PAGE);
+ if (BufferIsInvalid(hscan->rs_cbuf))
+ {
+ if (BufferIsValid(bscan->rs_vmbuffer))
+ {
+ ReleaseBuffer(bscan->rs_vmbuffer);
+ bscan->rs_vmbuffer = InvalidBuffer;
+ }
/*
- * Ignore any claimed entries past what we think is the end of the
- * relation. It may have been extended after the start of our scan (we
- * only hold an AccessShareLock, and it could be inserts from this
- * backend). We don't take this optimization in SERIALIZABLE
- * isolation though, as we need to examine all invisible tuples
- * reachable by the index.
+ * Bitmap is exhausted. Time to emit empty tuples if relevant. We emit
+ * all empty tuples at the end instead of emitting them per block we
+ * skip fetching. This is necessary because the streaming read API
+ * will only return TBMIterateResults for blocks actually fetched.
+ * When we skip fetching a block, we keep track of how many empty
+ * tuples to emit at the end of the BitmapHeapScan. We do not recheck
+ * all NULL tuples.
*/
- } while (!IsolationIsSerializable() &&
- tbmres.blockno >= hscan->rs_nblocks);
+ *recheck = false;
+ return bscan->rs_empty_tuples_pending > 0;
+ }
- /* Got a valid block */
- *blockno = tbmres.blockno;
- *recheck = tbmres.recheck;
+ Assert(per_buffer_data);
- /*
- * We can skip fetching the heap page if we don't need any fields from the
- * heap, the bitmap entries don't need rechecking, and all tuples on the
- * page are visible to our transaction.
- */
- if (!(scan->rs_flags & SO_NEED_TUPLES) &&
- !tbmres.recheck &&
- VM_ALL_VISIBLE(scan->rs_rd, tbmres.blockno, &bscan->rs_vmbuffer))
- {
- /* can't be lossy in the skip_fetch case */
- Assert(!tbmres.lossy);
- Assert(bscan->rs_empty_tuples_pending >= 0);
- Assert(noffsets > -1);
+ tbmres = per_buffer_data;
- bscan->rs_empty_tuples_pending += noffsets;
+ Assert(BlockNumberIsValid(tbmres->blockno));
+ Assert(BufferGetBlockNumber(hscan->rs_cbuf) == tbmres->blockno);
- return true;
- }
+ /* Exact pages need their tuple offsets extracted. */
+ if (!tbmres->lossy)
+ noffsets = tbm_extract_page_tuple(tbmres, offsets,
+ TBM_MAX_TUPLES_PER_PAGE);
- block = tbmres.blockno;
+ *recheck = tbmres->recheck;
- /*
- * Acquire pin on the target heap page, trading in any pin we held before.
- */
- hscan->rs_cbuf = ReleaseAndReadBuffer(hscan->rs_cbuf,
- scan->rs_rd,
- block);
- hscan->rs_cblock = block;
+ block = hscan->rs_cblock = tbmres->blockno;
buffer = hscan->rs_cbuf;
snapshot = scan->rs_snapshot;
@@ -2213,7 +2203,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
/*
* We need two separate strategies for lossy and non-lossy cases.
*/
- if (!tbmres.lossy)
+ if (!tbmres->lossy)
{
/*
* Bitmap is non-lossy, so we just look through the offsets listed in
@@ -2277,7 +2267,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
Assert(ntup <= MaxHeapTuplesPerPage);
hscan->rs_ntuples = ntup;
- if (tbmres.lossy)
+ if (tbmres->lossy)
(*lossy_pages)++;
else
(*exact_pages)++;