diff options
Diffstat (limited to 'src/backend/access/heap/heapam.c')
-rw-r--r-- | src/backend/access/heap/heapam.c | 126 |
1 files changed, 112 insertions, 14 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 0b20e5e9a8d..a0b561c209e 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.234 2007/05/30 20:11:53 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.235 2007/06/08 18:23:52 tgl Exp $ * * * INTERFACE ROUTINES @@ -78,29 +78,44 @@ initscan(HeapScanDesc scan, ScanKey key) * Determine the number of blocks we have to scan. * * It is sufficient to do this once at scan start, since any tuples added - * while the scan is in progress will be invisible to my transaction - * anyway... + * while the scan is in progress will be invisible to my snapshot + * anyway. (That is not true when using a non-MVCC snapshot. However, + * we couldn't guarantee to return tuples added after scan start anyway, + * since they might go into pages we already scanned. To guarantee + * consistent results for a non-MVCC snapshot, the caller must hold some + * higher-level lock that ensures the interesting tuple(s) won't change.) */ scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_rd); /* * If the table is large relative to NBuffers, use a bulk-read access - * strategy, else use the default random-access strategy. During a - * rescan, don't make a new strategy object if we don't have to. + * strategy and enable synchronized scanning (see syncscan.c). Although + * the thresholds for these features could be different, we make them the + * same so that there are only two behaviors to tune rather than four. + * + * During a rescan, don't make a new strategy object if we don't have to. */ if (scan->rs_nblocks > NBuffers / 4 && !scan->rs_rd->rd_istemp) { if (scan->rs_strategy == NULL) scan->rs_strategy = GetAccessStrategy(BAS_BULKREAD); + + scan->rs_syncscan = true; + scan->rs_startblock = ss_get_location(scan->rs_rd, scan->rs_nblocks); } else { if (scan->rs_strategy != NULL) FreeAccessStrategy(scan->rs_strategy); scan->rs_strategy = NULL; + + scan->rs_syncscan = false; + scan->rs_startblock = 0; } + /* rs_pageatatime was set when the snapshot was filled in */ + scan->rs_inited = false; scan->rs_ctup.t_data = NULL; ItemPointerSetInvalid(&scan->rs_ctup.t_self); @@ -229,6 +244,7 @@ heapgettup(HeapScanDesc scan, Snapshot snapshot = scan->rs_snapshot; bool backward = ScanDirectionIsBackward(dir); BlockNumber page; + bool finished; Page dp; int lines; OffsetNumber lineoff; @@ -251,7 +267,7 @@ heapgettup(HeapScanDesc scan, tuple->t_data = NULL; return; } - page = 0; /* first page */ + page = scan->rs_startblock; /* first page */ heapgetpage(scan, page); lineoff = FirstOffsetNumber; /* first offnum */ scan->rs_inited = true; @@ -285,7 +301,18 @@ heapgettup(HeapScanDesc scan, tuple->t_data = NULL; return; } - page = scan->rs_nblocks - 1; /* final page */ + /* + * Disable reporting to syncscan logic in a backwards scan; it's + * not very likely anyone else is doing the same thing at the same + * time, and much more likely that we'll just bollix things for + * forward scanners. + */ + scan->rs_syncscan = false; + /* start from last page of the scan */ + if (scan->rs_startblock > 0) + page = scan->rs_startblock - 1; + else + page = scan->rs_nblocks - 1; heapgetpage(scan, page); } else @@ -398,9 +425,42 @@ heapgettup(HeapScanDesc scan, LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); /* + * advance to next/prior page and detect end of scan + */ + if (backward) + { + finished = (page == scan->rs_startblock); + if (page == 0) + page = scan->rs_nblocks; + page--; + } + else + { + page++; + if (page >= scan->rs_nblocks) + page = 0; + finished = (page == scan->rs_startblock); + + /* + * Report our new scan position for synchronization purposes. + * We don't do that when moving backwards, however. That would + * just mess up any other forward-moving scanners. + * + * Note: we do this before checking for end of scan so that the + * final state of the position hint is back at the start of the + * rel. That's not strictly necessary, but otherwise when you run + * the same query multiple times the starting position would shift + * a little bit backwards on every invocation, which is confusing. + * We don't guarantee any specific ordering in general, though. + */ + if (scan->rs_syncscan) + ss_report_location(scan->rs_rd, page); + } + + /* * return NULL if we've exhausted all the pages */ - if (backward ? (page == 0) : (page + 1 >= scan->rs_nblocks)) + if (finished) { if (BufferIsValid(scan->rs_cbuf)) ReleaseBuffer(scan->rs_cbuf); @@ -411,8 +471,6 @@ heapgettup(HeapScanDesc scan, return; } - page = backward ? (page - 1) : (page + 1); - heapgetpage(scan, page); LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); @@ -455,6 +513,7 @@ heapgettup_pagemode(HeapScanDesc scan, HeapTuple tuple = &(scan->rs_ctup); bool backward = ScanDirectionIsBackward(dir); BlockNumber page; + bool finished; Page dp; int lines; int lineindex; @@ -478,7 +537,7 @@ heapgettup_pagemode(HeapScanDesc scan, tuple->t_data = NULL; return; } - page = 0; /* first page */ + page = scan->rs_startblock; /* first page */ heapgetpage(scan, page); lineindex = 0; scan->rs_inited = true; @@ -509,7 +568,18 @@ heapgettup_pagemode(HeapScanDesc scan, tuple->t_data = NULL; return; } - page = scan->rs_nblocks - 1; /* final page */ + /* + * Disable reporting to syncscan logic in a backwards scan; it's + * not very likely anyone else is doing the same thing at the same + * time, and much more likely that we'll just bollix things for + * forward scanners. + */ + scan->rs_syncscan = false; + /* start from last page of the scan */ + if (scan->rs_startblock > 0) + page = scan->rs_startblock - 1; + else + page = scan->rs_nblocks - 1; heapgetpage(scan, page); } else @@ -616,11 +686,40 @@ heapgettup_pagemode(HeapScanDesc scan, * if we get here, it means we've exhausted the items on this page and * it's time to move to the next. */ + if (backward) + { + finished = (page == scan->rs_startblock); + if (page == 0) + page = scan->rs_nblocks; + page--; + } + else + { + page++; + if (page >= scan->rs_nblocks) + page = 0; + finished = (page == scan->rs_startblock); + + /* + * Report our new scan position for synchronization purposes. + * We don't do that when moving backwards, however. That would + * just mess up any other forward-moving scanners. + * + * Note: we do this before checking for end of scan so that the + * final state of the position hint is back at the start of the + * rel. That's not strictly necessary, but otherwise when you run + * the same query multiple times the starting position would shift + * a little bit backwards on every invocation, which is confusing. + * We don't guarantee any specific ordering in general, though. + */ + if (scan->rs_syncscan) + ss_report_location(scan->rs_rd, page); + } /* * return NULL if we've exhausted all the pages */ - if (backward ? (page == 0) : (page + 1 >= scan->rs_nblocks)) + if (finished) { if (BufferIsValid(scan->rs_cbuf)) ReleaseBuffer(scan->rs_cbuf); @@ -631,7 +730,6 @@ heapgettup_pagemode(HeapScanDesc scan, return; } - page = backward ? (page - 1) : (page + 1); heapgetpage(scan, page); dp = (Page) BufferGetPage(scan->rs_cbuf); |