diff options
author | Melanie Plageman <melanieplageman@gmail.com> | 2025-03-15 10:34:42 -0400 |
---|---|---|
committer | Melanie Plageman <melanieplageman@gmail.com> | 2025-03-15 10:34:42 -0400 |
commit | 2b73a8cd33b745c5b8a7f44322f86642519e3a40 (patch) | |
tree | 43c4cb7aa83247b634ff15d4df76a75f16221230 /src/backend/access/heap/heapam_handler.c | |
parent | 944e81bf99db2b5b70b8a389d4f273534da73f74 (diff) |
BitmapHeapScan uses the read stream API
Make Bitmap Heap Scan use the read stream API instead of invoking
ReadBuffer() for each block indicated by the bitmap.
The read stream API handles prefetching, so remove all of the explicit
prefetching from bitmap heap scan code.
Now, heap table AM implements a read stream callback which uses the
bitmap iterator to return the next required block to the read stream
code.
Tomas Vondra conducted extensive regression testing of this feature.
Andres Freund, Thomas Munro, and I analyzed regressions and Thomas Munro
patched the read stream API.
Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Tomas Vondra <tomas@vondra.me>
Tested-by: Tomas Vondra <tomas@vondra.me>
Tested-by: Andres Freund <andres@anarazel.de>
Tested-by: Thomas Munro <thomas.munro@gmail.com>
Tested-by: Nazir Bilal Yavuz <byavuz81@gmail.com>
Discussion: https://postgr.es/m/flat/CAAKRu_ZwCwWFeL_H3ia26bP2e7HiKLWt0ZmGXPVwPO6uXq0vaA%40mail.gmail.com
Diffstat (limited to 'src/backend/access/heap/heapam_handler.c')
-rw-r--r-- | src/backend/access/heap/heapam_handler.c | 90 |
1 files changed, 40 insertions, 50 deletions
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 25d26409e2c..3035adacade 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -2117,82 +2117,72 @@ heapam_estimate_rel_size(Relation rel, int32 *attr_widths, static bool heapam_scan_bitmap_next_block(TableScanDesc scan, - BlockNumber *blockno, bool *recheck, + bool *recheck, uint64 *lossy_pages, uint64 *exact_pages) { BitmapHeapScanDesc bscan = (BitmapHeapScanDesc) scan; HeapScanDesc hscan = (HeapScanDesc) bscan; BlockNumber block; + void *per_buffer_data; Buffer buffer; Snapshot snapshot; int ntup; - TBMIterateResult tbmres; + TBMIterateResult *tbmres; OffsetNumber offsets[TBM_MAX_TUPLES_PER_PAGE]; int noffsets = -1; Assert(scan->rs_flags & SO_TYPE_BITMAPSCAN); + Assert(hscan->rs_read_stream); hscan->rs_cindex = 0; hscan->rs_ntuples = 0; - *blockno = InvalidBlockNumber; - *recheck = true; - - do + /* Release buffer containing previous block. */ + if (BufferIsValid(hscan->rs_cbuf)) { - CHECK_FOR_INTERRUPTS(); + ReleaseBuffer(hscan->rs_cbuf); + hscan->rs_cbuf = InvalidBuffer; + } - if (!tbm_iterate(&scan->st.rs_tbmiterator, &tbmres)) - return false; + hscan->rs_cbuf = read_stream_next_buffer(hscan->rs_read_stream, + &per_buffer_data); - /* Exact pages need their tuple offsets extracted. */ - if (!tbmres.lossy) - noffsets = tbm_extract_page_tuple(&tbmres, offsets, - TBM_MAX_TUPLES_PER_PAGE); + if (BufferIsInvalid(hscan->rs_cbuf)) + { + if (BufferIsValid(bscan->rs_vmbuffer)) + { + ReleaseBuffer(bscan->rs_vmbuffer); + bscan->rs_vmbuffer = InvalidBuffer; + } /* - * Ignore any claimed entries past what we think is the end of the - * relation. It may have been extended after the start of our scan (we - * only hold an AccessShareLock, and it could be inserts from this - * backend). We don't take this optimization in SERIALIZABLE - * isolation though, as we need to examine all invisible tuples - * reachable by the index. + * Bitmap is exhausted. Time to emit empty tuples if relevant. We emit + * all empty tuples at the end instead of emitting them per block we + * skip fetching. This is necessary because the streaming read API + * will only return TBMIterateResults for blocks actually fetched. + * When we skip fetching a block, we keep track of how many empty + * tuples to emit at the end of the BitmapHeapScan. We do not recheck + * all NULL tuples. */ - } while (!IsolationIsSerializable() && - tbmres.blockno >= hscan->rs_nblocks); + *recheck = false; + return bscan->rs_empty_tuples_pending > 0; + } - /* Got a valid block */ - *blockno = tbmres.blockno; - *recheck = tbmres.recheck; + Assert(per_buffer_data); - /* - * We can skip fetching the heap page if we don't need any fields from the - * heap, the bitmap entries don't need rechecking, and all tuples on the - * page are visible to our transaction. - */ - if (!(scan->rs_flags & SO_NEED_TUPLES) && - !tbmres.recheck && - VM_ALL_VISIBLE(scan->rs_rd, tbmres.blockno, &bscan->rs_vmbuffer)) - { - /* can't be lossy in the skip_fetch case */ - Assert(!tbmres.lossy); - Assert(bscan->rs_empty_tuples_pending >= 0); - Assert(noffsets > -1); + tbmres = per_buffer_data; - bscan->rs_empty_tuples_pending += noffsets; + Assert(BlockNumberIsValid(tbmres->blockno)); + Assert(BufferGetBlockNumber(hscan->rs_cbuf) == tbmres->blockno); - return true; - } + /* Exact pages need their tuple offsets extracted. */ + if (!tbmres->lossy) + noffsets = tbm_extract_page_tuple(tbmres, offsets, + TBM_MAX_TUPLES_PER_PAGE); - block = tbmres.blockno; + *recheck = tbmres->recheck; - /* - * Acquire pin on the target heap page, trading in any pin we held before. - */ - hscan->rs_cbuf = ReleaseAndReadBuffer(hscan->rs_cbuf, - scan->rs_rd, - block); - hscan->rs_cblock = block; + block = hscan->rs_cblock = tbmres->blockno; buffer = hscan->rs_cbuf; snapshot = scan->rs_snapshot; @@ -2213,7 +2203,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, /* * We need two separate strategies for lossy and non-lossy cases. */ - if (!tbmres.lossy) + if (!tbmres->lossy) { /* * Bitmap is non-lossy, so we just look through the offsets listed in @@ -2277,7 +2267,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, Assert(ntup <= MaxHeapTuplesPerPage); hscan->rs_ntuples = ntup; - if (tbmres.lossy) + if (tbmres->lossy) (*lossy_pages)++; else (*exact_pages)++; |