summaryrefslogtreecommitdiff
path: root/src/backend/access/hash/hashinsert.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/hash/hashinsert.c')
-rw-r--r--src/backend/access/hash/hashinsert.c243
1 files changed, 116 insertions, 127 deletions
diff --git a/src/backend/access/hash/hashinsert.c b/src/backend/access/hash/hashinsert.c
index d9da5e1ee37..4cb3266a944 100644
--- a/src/backend/access/hash/hashinsert.c
+++ b/src/backend/access/hash/hashinsert.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/hash/hashinsert.c,v 1.27 2003/08/04 02:39:57 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/hashinsert.c,v 1.27.2.1 2003/09/07 04:36:47 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -16,136 +16,124 @@
#include "postgres.h"
#include "access/hash.h"
+#include "storage/lmgr.h"
+
+
+static OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf,
+ Size itemsize, HashItem hitem);
-static InsertIndexResult _hash_insertonpg(Relation rel, Buffer buf, int keysz, ScanKey scankey, HashItem hitem, Buffer metabuf);
-static OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf, int keysz, ScanKey itup_scankey, Size itemsize, HashItem hitem);
/*
* _hash_doinsert() -- Handle insertion of a single HashItem in the table.
*
* This routine is called by the public interface routines, hashbuild
- * and hashinsert. By here, hashitem is filled in, and has a unique
- * (xid, seqno) pair. The datum to be used as a "key" is in the
- * hashitem.
+ * and hashinsert. By here, hashitem is completely filled in.
+ * The datum to be used as a "key" is in the hashitem.
*/
InsertIndexResult
_hash_doinsert(Relation rel, HashItem hitem)
{
Buffer buf;
Buffer metabuf;
- BlockNumber blkno;
HashMetaPage metap;
IndexTuple itup;
+ BlockNumber itup_blkno;
+ OffsetNumber itup_off;
InsertIndexResult res;
- ScanKey itup_scankey;
- int natts;
+ BlockNumber blkno;
Page page;
-
- metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
- metap = (HashMetaPage) BufferGetPage(metabuf);
- _hash_checkpage((Page) metap, LH_META_PAGE);
-
- /* we need a scan key to do our search, so build one */
- itup = &(hitem->hash_itup);
- if ((natts = rel->rd_rel->relnatts) != 1)
- elog(ERROR, "Hash indexes support only one index key");
- itup_scankey = _hash_mkscankey(rel, itup);
+ HashPageOpaque pageopaque;
+ Size itemsz;
+ bool do_expand;
+ uint32 hashkey;
+ Bucket bucket;
+ Datum datum;
+ bool isnull;
/*
- * find the first page in the bucket chain containing this key and
- * place it in buf. _hash_search obtains a read lock for us.
+ * Compute the hash key for the item. We do this first so as not to
+ * need to hold any locks while running the hash function.
*/
- _hash_search(rel, natts, itup_scankey, &buf, metap);
- page = BufferGetPage(buf);
- _hash_checkpage(page, LH_BUCKET_PAGE);
+ itup = &(hitem->hash_itup);
+ if (rel->rd_rel->relnatts != 1)
+ elog(ERROR, "hash indexes support only one index key");
+ datum = index_getattr(itup, 1, RelationGetDescr(rel), &isnull);
+ Assert(!isnull);
+ hashkey = _hash_datum2hashkey(rel, datum);
+
+ /* compute item size too */
+ itemsz = IndexTupleDSize(hitem->hash_itup)
+ + (sizeof(HashItemData) - sizeof(IndexTupleData));
+
+ itemsz = MAXALIGN(itemsz); /* be safe, PageAddItem will do this but
+ * we need to be consistent */
/*
- * trade in our read lock for a write lock so that we can do the
- * insertion.
+ * Acquire shared split lock so we can compute the target bucket
+ * safely (see README).
*/
- blkno = BufferGetBlockNumber(buf);
- _hash_relbuf(rel, buf, HASH_READ);
- buf = _hash_getbuf(rel, blkno, HASH_WRITE);
+ _hash_getlock(rel, 0, HASH_SHARE);
+ /* Read the metapage */
+ metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
+ metap = (HashMetaPage) BufferGetPage(metabuf);
+ _hash_checkpage(rel, (Page) metap, LH_META_PAGE);
/*
- * XXX btree comment (haven't decided what to do in hash): don't think
- * the bucket can be split while we're reading the metapage.
- *
- * If the page was split between the time that we surrendered our read
- * lock and acquired our write lock, then this page may no longer be
- * the right place for the key we want to insert.
+ * Check whether the item can fit on a hash page at all. (Eventually,
+ * we ought to try to apply TOAST methods if not.) Note that at this
+ * point, itemsz doesn't include the ItemId.
*/
+ if (itemsz > HashMaxItemSize((Page) metap))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("index tuple size %lu exceeds hash maximum, %lu",
+ (unsigned long) itemsz,
+ (unsigned long) HashMaxItemSize((Page) metap))));
- /* do the insertion */
- res = _hash_insertonpg(rel, buf, natts, itup_scankey,
- hitem, metabuf);
+ /*
+ * Compute the target bucket number, and convert to block number.
+ */
+ bucket = _hash_hashkey2bucket(hashkey,
+ metap->hashm_maxbucket,
+ metap->hashm_highmask,
+ metap->hashm_lowmask);
- /* be tidy */
- _hash_freeskey(itup_scankey);
+ blkno = BUCKET_TO_BLKNO(metap, bucket);
- return res;
-}
+ /* release lock on metapage, but keep pin since we'll need it again */
+ _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
-/*
- * _hash_insertonpg() -- Insert a tuple on a particular page in the table.
- *
- * This recursive procedure does the following things:
- *
- * + if necessary, splits the target page.
- * + inserts the tuple.
- *
- * On entry, we must have the right buffer on which to do the
- * insertion, and the buffer must be pinned and locked. On return,
- * we will have dropped both the pin and the write lock on the buffer.
- *
- */
-static InsertIndexResult
-_hash_insertonpg(Relation rel,
- Buffer buf,
- int keysz,
- ScanKey scankey,
- HashItem hitem,
- Buffer metabuf)
-{
- InsertIndexResult res;
- Page page;
- BlockNumber itup_blkno;
- OffsetNumber itup_off;
- Size itemsz;
- HashPageOpaque pageopaque;
- bool do_expand = false;
- Buffer ovflbuf;
- HashMetaPage metap;
- Bucket bucket;
+ /*
+ * Acquire share lock on target bucket; then we can release split lock.
+ */
+ _hash_getlock(rel, blkno, HASH_SHARE);
- metap = (HashMetaPage) BufferGetPage(metabuf);
- _hash_checkpage((Page) metap, LH_META_PAGE);
+ _hash_droplock(rel, 0, HASH_SHARE);
+ /* Fetch the primary bucket page for the bucket */
+ buf = _hash_getbuf(rel, blkno, HASH_WRITE);
page = BufferGetPage(buf);
- _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+ _hash_checkpage(rel, page, LH_BUCKET_PAGE);
pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
- bucket = pageopaque->hasho_bucket;
-
- itemsz = IndexTupleDSize(hitem->hash_itup)
- + (sizeof(HashItemData) - sizeof(IndexTupleData));
- itemsz = MAXALIGN(itemsz);
+ Assert(pageopaque->hasho_bucket == bucket);
+ /* Do the insertion */
while (PageGetFreeSpace(page) < itemsz)
{
/*
* no space on this page; check for an overflow page
*/
- if (BlockNumberIsValid(pageopaque->hasho_nextblkno))
+ BlockNumber nextblkno = pageopaque->hasho_nextblkno;
+
+ if (BlockNumberIsValid(nextblkno))
{
/*
* ovfl page exists; go get it. if it doesn't have room,
* we'll find out next pass through the loop test above.
*/
- ovflbuf = _hash_getbuf(rel, pageopaque->hasho_nextblkno,
- HASH_WRITE);
- _hash_relbuf(rel, buf, HASH_WRITE);
- buf = ovflbuf;
+ _hash_relbuf(rel, buf);
+ buf = _hash_getbuf(rel, nextblkno, HASH_WRITE);
page = BufferGetPage(buf);
}
else
@@ -154,68 +142,72 @@ _hash_insertonpg(Relation rel,
* we're at the end of the bucket chain and we haven't found a
* page with enough room. allocate a new overflow page.
*/
- do_expand = true;
- ovflbuf = _hash_addovflpage(rel, &metabuf, buf);
- _hash_relbuf(rel, buf, HASH_WRITE);
- buf = ovflbuf;
+
+ /* release our write lock without modifying buffer */
+ _hash_chgbufaccess(rel, buf, HASH_READ, HASH_NOLOCK);
+
+ /* chain to a new overflow page */
+ buf = _hash_addovflpage(rel, metabuf, buf);
page = BufferGetPage(buf);
- if (PageGetFreeSpace(page) < itemsz)
- {
- /* it doesn't fit on an empty page -- give up */
- elog(ERROR, "hash item too large");
- }
+ /* should fit now, given test above */
+ Assert(PageGetFreeSpace(page) >= itemsz);
}
- _hash_checkpage(page, LH_OVERFLOW_PAGE);
+ _hash_checkpage(rel, page, LH_OVERFLOW_PAGE);
pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
Assert(pageopaque->hasho_bucket == bucket);
}
- itup_off = _hash_pgaddtup(rel, buf, keysz, scankey, itemsz, hitem);
+ /* found page with enough space, so add the item here */
+ itup_off = _hash_pgaddtup(rel, buf, itemsz, hitem);
itup_blkno = BufferGetBlockNumber(buf);
- /* by here, the new tuple is inserted */
- res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData));
+ /* write and release the modified page */
+ _hash_wrtbuf(rel, buf);
- ItemPointerSet(&(res->pointerData), itup_blkno, itup_off);
+ /* We can drop the bucket lock now */
+ _hash_droplock(rel, blkno, HASH_SHARE);
- if (res != NULL)
- {
- /*
- * Increment the number of keys in the table. We switch lock
- * access type just for a moment to allow greater accessibility to
- * the metapage.
- */
- metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf,
- HASH_READ, HASH_WRITE);
- metap->hashm_nkeys += 1;
- metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf,
- HASH_WRITE, HASH_READ);
+ /*
+ * Write-lock the metapage so we can increment the tuple count.
+ * After incrementing it, check to see if it's time for a split.
+ */
+ _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
- }
+ metap->hashm_ntuples += 1;
- _hash_wrtbuf(rel, buf);
+ /* Make sure this stays in sync with _hash_expandtable() */
+ do_expand = metap->hashm_ntuples >
+ (double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1);
- if (do_expand ||
- (metap->hashm_nkeys / (metap->hashm_maxbucket + 1))
- > metap->hashm_ffactor)
+ /* Write out the metapage and drop lock, but keep pin */
+ _hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);
+
+ /* Attempt to split if a split is needed */
+ if (do_expand)
_hash_expandtable(rel, metabuf);
- _hash_relbuf(rel, metabuf, HASH_READ);
+
+ /* Finally drop our pin on the metapage */
+ _hash_dropbuf(rel, metabuf);
+
+ /* Create the return data structure */
+ res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData));
+
+ ItemPointerSet(&(res->pointerData), itup_blkno, itup_off);
+
return res;
}
/*
* _hash_pgaddtup() -- add a tuple to a particular page in the index.
*
- * This routine adds the tuple to the page as requested, and keeps the
- * write lock and reference associated with the page's buffer. It is
- * an error to call pgaddtup() without a write lock and reference.
+ * This routine adds the tuple to the page as requested; it does
+ * not write out the page. It is an error to call pgaddtup() without
+ * a write lock and pin.
*/
static OffsetNumber
_hash_pgaddtup(Relation rel,
Buffer buf,
- int keysz,
- ScanKey itup_scankey,
Size itemsize,
HashItem hitem)
{
@@ -223,7 +215,7 @@ _hash_pgaddtup(Relation rel,
Page page;
page = BufferGetPage(buf);
- _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+ _hash_checkpage(rel, page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
itup_off = OffsetNumberNext(PageGetMaxOffsetNumber(page));
if (PageAddItem(page, (Item) hitem, itemsize, itup_off, LP_USED)
@@ -231,8 +223,5 @@ _hash_pgaddtup(Relation rel,
elog(ERROR, "failed to add index item to \"%s\"",
RelationGetRelationName(rel));
- /* write the buffer, but hold our lock */
- _hash_wrtnorelbuf(buf);
-
return itup_off;
}