diff options
author | Robert Haas <rhaas@postgresql.org> | 2017-09-22 13:26:25 -0400 |
---|---|---|
committer | Robert Haas <rhaas@postgresql.org> | 2017-09-22 13:56:27 -0400 |
commit | 7c75ef571579a3ad7a1d3ee909f11dba5e0b9440 (patch) | |
tree | 4e8f45e95db7f7168d0f0ee0202da0093538d88a /src/include | |
parent | 0f574a7afb5c998d19dc3d981e45cb10267286ed (diff) |
hash: Implement page-at-a-time scan.
Commit 09cb5c0e7d6fbc9dee26dc429e4fc0f2a88e5272 added a similar
optimization to btree back in 2006, but nobody bothered to implement
the same thing for hash indexes, probably because they weren't
WAL-logged and had lots of other performance problems as well. As
with the corresponding btree case, this eliminates the problem of
potentially needing to refind our position within the page, and cuts
down on pin/unpin traffic as well.
Ashutosh Sharma, reviewed by Alexander Korotkov, Jesper Pedersen,
Amit Kapila, and me. Some final edits to comments and README by
me.
Discussion: http://postgr.es/m/CAE9k0Pm3KTx93K8_5j6VMzG4h5F+SyknxUwXrN-zqSZ9X8ZS3w@mail.gmail.com
Diffstat (limited to 'src/include')
-rw-r--r-- | src/include/access/hash.h | 70 |
1 files changed, 54 insertions, 16 deletions
diff --git a/src/include/access/hash.h b/src/include/access/hash.h index c06dcb214f0..0e0f3e17a7c 100644 --- a/src/include/access/hash.h +++ b/src/include/access/hash.h @@ -114,6 +114,53 @@ typedef struct HashScanPosItem /* what we remember about each match */ OffsetNumber indexOffset; /* index item's location within page */ } HashScanPosItem; +typedef struct HashScanPosData +{ + Buffer buf; /* if valid, the buffer is pinned */ + XLogRecPtr lsn; /* pos in the WAL stream when page was read */ + BlockNumber currPage; /* current hash index page */ + BlockNumber nextPage; /* next overflow page */ + BlockNumber prevPage; /* prev overflow or bucket page */ + + /* + * The items array is always ordered in index order (ie, increasing + * indexoffset). When scanning backwards it is convenient to fill the + * array back-to-front, so we start at the last slot and fill downwards. + * Hence we need both a first-valid-entry and a last-valid-entry counter. + * itemIndex is a cursor showing which entry was last returned to caller. + */ + int firstItem; /* first valid index in items[] */ + int lastItem; /* last valid index in items[] */ + int itemIndex; /* current index in items[] */ + + HashScanPosItem items[MaxIndexTuplesPerPage]; /* MUST BE LAST */ +} HashScanPosData; + +#define HashScanPosIsPinned(scanpos) \ +( \ + AssertMacro(BlockNumberIsValid((scanpos).currPage) || \ + !BufferIsValid((scanpos).buf)), \ + BufferIsValid((scanpos).buf) \ +) + +#define HashScanPosIsValid(scanpos) \ +( \ + AssertMacro(BlockNumberIsValid((scanpos).currPage) || \ + !BufferIsValid((scanpos).buf)), \ + BlockNumberIsValid((scanpos).currPage) \ +) + +#define HashScanPosInvalidate(scanpos) \ + do { \ + (scanpos).buf = InvalidBuffer; \ + (scanpos).lsn = InvalidXLogRecPtr; \ + (scanpos).currPage = InvalidBlockNumber; \ + (scanpos).nextPage = InvalidBlockNumber; \ + (scanpos).prevPage = InvalidBlockNumber; \ + (scanpos).firstItem = 0; \ + (scanpos).lastItem = 0; \ + (scanpos).itemIndex = 0; \ + } while (0); /* * HashScanOpaqueData is private state for a hash index scan. @@ -123,14 +170,6 @@ typedef struct HashScanOpaqueData /* Hash value of the scan key, ie, the hash key we seek */ uint32 hashso_sk_hash; - /* - * We also want to remember which buffer we're currently examining in the - * scan. We keep the buffer pinned (but not locked) across hashgettuple - * calls, in order to avoid doing a ReadBuffer() for every tuple in the - * index. - */ - Buffer hashso_curbuf; - /* remember the buffer associated with primary bucket */ Buffer hashso_bucket_buf; @@ -141,12 +180,6 @@ typedef struct HashScanOpaqueData */ Buffer hashso_split_bucket_buf; - /* Current position of the scan, as an index TID */ - ItemPointerData hashso_curpos; - - /* Current position of the scan, as a heap TID */ - ItemPointerData hashso_heappos; - /* Whether scan starts on bucket being populated due to split */ bool hashso_buc_populated; @@ -156,8 +189,14 @@ typedef struct HashScanOpaqueData */ bool hashso_buc_split; /* info about killed items if any (killedItems is NULL if never used) */ - HashScanPosItem *killedItems; /* tids and offset numbers of killed items */ + int *killedItems; /* currPos.items indexes of killed items */ int numKilled; /* number of currently stored items */ + + /* + * Identify all the matching items on a page and save them in + * HashScanPosData + */ + HashScanPosData currPos; /* current position data */ } HashScanOpaqueData; typedef HashScanOpaqueData *HashScanOpaque; @@ -401,7 +440,6 @@ extern void _hash_finish_split(Relation rel, Buffer metabuf, Buffer obuf, /* hashsearch.c */ extern bool _hash_next(IndexScanDesc scan, ScanDirection dir); extern bool _hash_first(IndexScanDesc scan, ScanDirection dir); -extern bool _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir); /* hashsort.c */ typedef struct HSpool HSpool; /* opaque struct in hashsort.c */ |