summaryrefslogtreecommitdiff
path: root/src/backend/utils/hash
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/hash')
-rw-r--r--src/backend/utils/hash/dynahash.c558
-rw-r--r--src/backend/utils/hash/hashfn.c160
2 files changed, 282 insertions, 436 deletions
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index fd9b11ba98e..92e775bfe86 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -1,14 +1,15 @@
/*-------------------------------------------------------------------------
*
* dynahash.c
- * dynamic hashing
+ * dynamic hash tables
+ *
*
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.36 2001/06/22 19:16:23 wieck Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.37 2001/10/01 05:36:16 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -40,46 +41,40 @@
* Modified by sullivan@postgres.berkeley.edu April 1990
* changed ctl structure for shared memory
*/
-#include <sys/types.h>
#include "postgres.h"
+
+#include <sys/types.h>
+
#include "utils/dynahash.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
/*
- * Fast MOD arithmetic, assuming that y is a power of 2 !
+ * Key (also entry) part of a HASHELEMENT
*/
+#define ELEMENTKEY(helem) (((char *)(helem)) + MAXALIGN(sizeof(HASHELEMENT)))
+/*
+ * Fast MOD arithmetic, assuming that y is a power of 2 !
+ */
#define MOD(x,y) ((x) & ((y)-1))
/*
* Private function prototypes
*/
static void *DynaHashAlloc(Size size);
-static uint32 call_hash(HTAB *hashp, char *k);
-static SEG_OFFSET seg_alloc(HTAB *hashp);
-static int bucket_alloc(HTAB *hashp);
-static int dir_realloc(HTAB *hashp);
-static int expand_table(HTAB *hashp);
-static int hdefault(HTAB *hashp);
-static int init_htab(HTAB *hashp, int nelem);
+static uint32 call_hash(HTAB *hashp, void *k);
+static HASHSEGMENT seg_alloc(HTAB *hashp);
+static bool element_alloc(HTAB *hashp);
+static bool dir_realloc(HTAB *hashp);
+static bool expand_table(HTAB *hashp);
+static bool hdefault(HTAB *hashp);
+static bool init_htab(HTAB *hashp, long nelem);
-/* ----------------
+/*
* memory allocation routines
- *
- * for postgres: all hash elements have to be in
- * the global cache context. Otherwise the postgres
- * garbage collector is going to corrupt them. -wei
- *
- * ??? the "cache" memory context is intended to store only
- * system cache information. The user of the hashing
- * routines should specify which context to use or we
- * should create a separate memory context for these
- * hash routines. For now I have modified this code to
- * do the latter -cim 1/19/91
- * ----------------
*/
static MemoryContext DynaHashCxt = NULL;
static MemoryContext CurrentDynaHashCxt = NULL;
@@ -95,39 +90,22 @@ DynaHashAlloc(Size size)
#define MEM_FREE pfree
-/*
- * pointer access macros. Shared memory implementation cannot
- * store pointers in the hash table data structures because
- * pointer values will be different in different address spaces.
- * these macros convert offsets to pointers and pointers to offsets.
- * Shared memory need not be contiguous, but all addresses must be
- * calculated relative to some offset (segbase).
- */
-
-#define GET_SEG(hp,seg_num)\
- (SEGMENT) (((unsigned long) (hp)->segbase) + (hp)->dir[seg_num])
-
-#define GET_BUCKET(hp,bucket_offs)\
- (ELEMENT *) (((unsigned long) (hp)->segbase) + bucket_offs)
-
-#define MAKE_HASHOFFSET(hp,ptr)\
- ( ((unsigned long) ptr) - ((unsigned long) (hp)->segbase) )
-
#if HASH_STATISTICS
static long hash_accesses,
hash_collisions,
hash_expansions;
-
#endif
+
/************************** CREATE ROUTINES **********************/
HTAB *
-hash_create(int nelem, HASHCTL *info, int flags)
+hash_create(long nelem, HASHCTL *info, int flags)
{
- HHDR *hctl;
HTAB *hashp;
+ HASHHDR *hctl;
+ /* First time through, create a memory context for hash tables */
if (!DynaHashCxt)
DynaHashCxt = AllocSetContextCreate(TopMemoryContext,
"DynaHash",
@@ -135,62 +113,57 @@ hash_create(int nelem, HASHCTL *info, int flags)
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
+ /* Select allocation context for this hash table */
if (flags & HASH_CONTEXT)
CurrentDynaHashCxt = info->hcxt;
else
CurrentDynaHashCxt = DynaHashCxt;
+ /* Initialize the hash header */
hashp = (HTAB *) MEM_ALLOC(sizeof(HTAB));
+ if (!hashp)
+ return NULL;
MemSet(hashp, 0, sizeof(HTAB));
if (flags & HASH_FUNCTION)
hashp->hash = info->hash;
else
- {
- /* default */
- hashp->hash = string_hash;
- }
+ hashp->hash = string_hash; /* default hash function */
if (flags & HASH_SHARED_MEM)
{
-
/*
* ctl structure is preallocated for shared memory tables. Note
* that HASH_DIRSIZE had better be set as well.
*/
-
- hashp->hctl = (HHDR *) info->hctl;
- hashp->segbase = (char *) info->segbase;
+ hashp->hctl = info->hctl;
+ hashp->dir = info->dir;
hashp->alloc = info->alloc;
- hashp->dir = (SEG_OFFSET *) info->dir;
hashp->hcxt = NULL;
/* hash table already exists, we're just attaching to it */
if (flags & HASH_ATTACH)
return hashp;
-
}
else
{
/* setup hash table defaults */
-
hashp->hctl = NULL;
- hashp->alloc = MEM_ALLOC;
hashp->dir = NULL;
- hashp->segbase = NULL;
+ hashp->alloc = MEM_ALLOC;
hashp->hcxt = DynaHashCxt;
-
}
if (!hashp->hctl)
{
- hashp->hctl = (HHDR *) hashp->alloc(sizeof(HHDR));
+ hashp->hctl = (HASHHDR *) hashp->alloc(sizeof(HASHHDR));
if (!hashp->hctl)
- return 0;
+ return NULL;
}
if (!hdefault(hashp))
- return 0;
+ return NULL;
+
hctl = hashp->hctl;
#ifdef HASH_STATISTICS
hctl->accesses = hctl->collisions = 0;
@@ -222,24 +195,26 @@ hash_create(int nelem, HASHCTL *info, int flags)
if (flags & HASH_ELEM)
{
hctl->keysize = info->keysize;
- hctl->datasize = info->datasize;
+ hctl->entrysize = info->entrysize;
}
+
if (flags & HASH_ALLOC)
hashp->alloc = info->alloc;
else
{
if (flags & HASH_CONTEXT)
{
+ /* hash table structures live in child of given context */
CurrentDynaHashCxt = AllocSetContextCreate(info->hcxt,
"DynaHashTable",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
-
hashp->hcxt = CurrentDynaHashCxt;
}
else
{
+ /* hash table structures live in child of DynaHashCxt */
CurrentDynaHashCxt = AllocSetContextCreate(DynaHashCxt,
"DynaHashTable",
ALLOCSET_DEFAULT_MINSIZE,
@@ -249,57 +224,54 @@ hash_create(int nelem, HASHCTL *info, int flags)
}
}
- if (init_htab(hashp, nelem))
+ if (!init_htab(hashp, nelem))
{
hash_destroy(hashp);
- return 0;
+ return NULL;
}
return hashp;
}
/*
- * Set default HHDR parameters.
+ * Set default HASHHDR parameters.
*/
-static int
+static bool
hdefault(HTAB *hashp)
{
- HHDR *hctl;
+ HASHHDR *hctl = hashp->hctl;
- MemSet(hashp->hctl, 0, sizeof(HHDR));
+ MemSet(hctl, 0, sizeof(HASHHDR));
- hctl = hashp->hctl;
hctl->ssize = DEF_SEGSIZE;
hctl->sshift = DEF_SEGSIZE_SHIFT;
hctl->dsize = DEF_DIRSIZE;
hctl->ffactor = DEF_FFACTOR;
- hctl->nkeys = 0;
+ hctl->nentries = 0;
hctl->nsegs = 0;
/* I added these MS. */
- /* default memory allocation for hash buckets */
+ /* rather pointless defaults for key & entry size */
hctl->keysize = sizeof(char *);
- hctl->datasize = sizeof(char *);
+ hctl->entrysize = 2 * sizeof(char *);
/* table has no fixed maximum size */
hctl->max_dsize = NO_MAX_DSIZE;
/* garbage collection for HASH_REMOVE */
- hctl->freeBucketIndex = INVALID_INDEX;
+ hctl->freeList = NULL;
- return 1;
+ return true;
}
-static int
-init_htab(HTAB *hashp, int nelem)
+static bool
+init_htab(HTAB *hashp, long nelem)
{
- SEG_OFFSET *segp;
+ HASHHDR *hctl = hashp->hctl;
+ HASHSEGMENT *segp;
int nbuckets;
int nsegs;
- HHDR *hctl;
-
- hctl = hashp->hctl;
/*
* Divide number of elements by the fill factor to determine a desired
@@ -329,29 +301,29 @@ init_htab(HTAB *hashp, int nelem)
if (!(hashp->dir))
hctl->dsize = nsegs;
else
- return -1;
+ return false;
}
/* Allocate a directory */
if (!(hashp->dir))
{
CurrentDynaHashCxt = hashp->hcxt;
- hashp->dir = (SEG_OFFSET *)
- hashp->alloc(hctl->dsize * sizeof(SEG_OFFSET));
+ hashp->dir = (HASHSEGMENT *)
+ hashp->alloc(hctl->dsize * sizeof(HASHSEGMENT));
if (!hashp->dir)
- return -1;
+ return false;
}
/* Allocate initial segments */
for (segp = hashp->dir; hctl->nsegs < nsegs; hctl->nsegs++, segp++)
{
*segp = seg_alloc(hashp);
- if (*segp == (SEG_OFFSET) 0)
- return -1;
+ if (*segp == NULL)
+ return false;
}
#if HASH_DEBUG
- fprintf(stderr, "%s\n%s%x\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%x\n%s%x\n%s%d\n%s%d\n",
+ fprintf(stderr, "%s\n%s%p\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%x\n%s%x\n%s%d\n%s%d\n",
"init_htab:",
"TABLE POINTER ", hashp,
"DIRECTORY SIZE ", hctl->dsize,
@@ -362,9 +334,9 @@ init_htab(HTAB *hashp, int nelem)
"HIGH MASK ", hctl->high_mask,
"LOW MASK ", hctl->low_mask,
"NSEGS ", hctl->nsegs,
- "NKEYS ", hctl->nkeys);
+ "NENTRIES ", hctl->nentries);
#endif
- return 0;
+ return true;
}
/*
@@ -375,14 +347,14 @@ init_htab(HTAB *hashp, int nelem)
* NB: assumes that all hash structure parameters have default values!
*/
long
-hash_estimate_size(long num_entries, long keysize, long datasize)
+hash_estimate_size(long num_entries, long entrysize)
{
long size = 0;
long nBuckets,
nSegments,
nDirEntries,
- nRecordAllocs,
- recordSize;
+ nElementAllocs,
+ elementSize;
/* estimate number of buckets wanted */
nBuckets = 1L << my_log2((num_entries - 1) / DEF_FFACTOR + 1);
@@ -394,16 +366,15 @@ hash_estimate_size(long num_entries, long keysize, long datasize)
nDirEntries <<= 1; /* dir_alloc doubles dsize at each call */
/* fixed control info */
- size += MAXALIGN(sizeof(HHDR)); /* but not HTAB, per above */
+ size += MAXALIGN(sizeof(HASHHDR)); /* but not HTAB, per above */
/* directory */
- size += MAXALIGN(nDirEntries * sizeof(SEG_OFFSET));
+ size += MAXALIGN(nDirEntries * sizeof(HASHSEGMENT));
/* segments */
- size += nSegments * MAXALIGN(DEF_SEGSIZE * sizeof(BUCKET_INDEX));
- /* records --- allocated in groups of BUCKET_ALLOC_INCR */
- recordSize = sizeof(BUCKET_INDEX) + keysize + datasize;
- recordSize = MAXALIGN(recordSize);
- nRecordAllocs = (num_entries - 1) / BUCKET_ALLOC_INCR + 1;
- size += nRecordAllocs * BUCKET_ALLOC_INCR * recordSize;
+ size += nSegments * MAXALIGN(DEF_SEGSIZE * sizeof(HASHBUCKET));
+ /* elements --- allocated in groups of HASHELEMENT_ALLOC_INCR */
+ elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize);
+ nElementAllocs = (num_entries - 1) / HASHELEMENT_ALLOC_INCR + 1;
+ size += nElementAllocs * HASHELEMENT_ALLOC_INCR * elementSize;
return size;
}
@@ -439,40 +410,11 @@ hash_select_dirsize(long num_entries)
/********************** DESTROY ROUTINES ************************/
-/*
- * XXX this sure looks thoroughly broken to me --- tgl 2/99.
- * It's freeing every entry individually --- but they weren't
- * allocated individually, see bucket_alloc!! Why doesn't it crash?
- * ANSWER: it probably does crash, but is never invoked in normal
- * operations...
- *
- * Thomas is right, it does crash. Therefore I changed the code
- * to use a separate memory context which is a child of the DynaHashCxt
- * by default. And the HASHCTL structure got extended with a hcxt
- * field, where someone can specify an explicit context (giving new
- * flag HASH_CONTEXT) and forget about hash_destroy() completely.
- * The shmem operations aren't changed, but in shmem mode a destroy
- * doesn't work anyway. Jan Wieck 03/2001.
- */
-
void
hash_destroy(HTAB *hashp)
{
if (hashp != NULL)
{
-#if 0
- SEG_OFFSET segNum;
- SEGMENT segp;
- int nsegs = hashp->hctl->nsegs;
- int j;
- BUCKET_INDEX *elp,
- p,
- q;
- ELEMENT *curr;
-#endif
-
- /* cannot destroy a shared memory hash table */
- Assert(!hashp->segbase);
/* allocation method must be one we know how to free, too */
Assert(hashp->alloc == MEM_ALLOC);
/* so this hashtable must have it's own context */
@@ -481,40 +423,18 @@ hash_destroy(HTAB *hashp)
hash_stats("destroy", hashp);
/*
- * Free buckets, dir etc. by destroying the hash tables
+ * Free buckets, dir etc. by destroying the hash table's
* memory context.
*/
MemoryContextDelete(hashp->hcxt);
-#if 0
- /*
- * Dead code - replaced by MemoryContextDelete() above
- */
- for (segNum = 0; nsegs > 0; nsegs--, segNum++)
- {
-
- segp = GET_SEG(hashp, segNum);
- for (j = 0, elp = segp; j < hashp->hctl->ssize; j++, elp++)
- {
- for (p = *elp; p != INVALID_INDEX; p = q)
- {
- curr = GET_BUCKET(hashp, p);
- q = curr->next;
- MEM_FREE((char *) curr);
- }
- }
- MEM_FREE((char *) segp);
- }
- MEM_FREE((char *) hashp->dir);
-#endif
-
/*
* Free the HTAB and control structure, which are allocated
* in the parent context (DynaHashCxt or the context given
- * by the caller of hash_create().
+ * by the caller of hash_create()).
*/
- MEM_FREE((char *) hashp->hctl);
- MEM_FREE((char *) hashp);
+ MEM_FREE(hashp->hctl);
+ MEM_FREE(hashp);
}
}
@@ -526,8 +446,8 @@ hash_stats(char *where, HTAB *hashp)
fprintf(stderr, "%s: this HTAB -- accesses %ld collisions %ld\n",
where, hashp->hctl->accesses, hashp->hctl->collisions);
- fprintf(stderr, "hash_stats: keys %ld keysize %ld maxp %d segmentcount %d\n",
- hashp->hctl->nkeys, hashp->hctl->keysize,
+ fprintf(stderr, "hash_stats: entries %ld keysize %ld maxp %d segmentcount %d\n",
+ hashp->hctl->nentries, hashp->hctl->keysize,
hashp->hctl->max_bucket, hashp->hctl->nsegs);
fprintf(stderr, "%s: total accesses %ld total collisions %ld\n",
where, hash_accesses, hash_collisions);
@@ -541,9 +461,9 @@ hash_stats(char *where, HTAB *hashp)
/*******************************SEARCH ROUTINES *****************************/
static uint32
-call_hash(HTAB *hashp, char *k)
+call_hash(HTAB *hashp, void *k)
{
- HHDR *hctl = hashp->hctl;
+ HASHHDR *hctl = hashp->hctl;
long hash_val,
bucket;
@@ -553,7 +473,7 @@ call_hash(HTAB *hashp, char *k)
if (bucket > hctl->max_bucket)
bucket = bucket & hctl->low_mask;
- return bucket;
+ return (uint32) bucket;
}
/*
@@ -566,31 +486,34 @@ call_hash(HTAB *hashp, char *k)
* foundPtr is TRUE if we found an element in the table
* (FALSE if we entered one).
*/
-long *
+void *
hash_search(HTAB *hashp,
- char *keyPtr,
+ void *keyPtr,
HASHACTION action, /* HASH_FIND / HASH_ENTER / HASH_REMOVE
* HASH_FIND_SAVE / HASH_REMOVE_SAVED */
bool *foundPtr)
{
+ HASHHDR *hctl;
uint32 bucket;
long segment_num;
long segment_ndx;
- SEGMENT segp;
- ELEMENT *curr;
- HHDR *hctl;
- BUCKET_INDEX currIndex;
- BUCKET_INDEX *prevIndexPtr;
- char *destAddr;
+ HASHSEGMENT segp;
+ HASHBUCKET currBucket;
+ HASHBUCKET *prevBucketPtr;
+
static struct State
{
- ELEMENT *currElem;
- BUCKET_INDEX currIndex;
- BUCKET_INDEX *prevIndex;
+ HASHBUCKET currBucket;
+ HASHBUCKET *prevBucketPtr;
} saveState;
- Assert((hashp && keyPtr));
- Assert((action == HASH_FIND) || (action == HASH_REMOVE) || (action == HASH_ENTER) || (action == HASH_FIND_SAVE) || (action == HASH_REMOVE_SAVED));
+ Assert(hashp);
+ Assert(keyPtr);
+ Assert((action == HASH_FIND) ||
+ (action == HASH_REMOVE) ||
+ (action == HASH_ENTER) ||
+ (action == HASH_FIND_SAVE) ||
+ (action == HASH_REMOVE_SAVED));
hctl = hashp->hctl;
@@ -598,16 +521,16 @@ hash_search(HTAB *hashp,
hash_accesses++;
hashp->hctl->accesses++;
#endif
+
if (action == HASH_REMOVE_SAVED)
{
- curr = saveState.currElem;
- currIndex = saveState.currIndex;
- prevIndexPtr = saveState.prevIndex;
+ currBucket = saveState.currBucket;
+ prevBucketPtr = saveState.prevBucketPtr;
/*
* Try to catch subsequent errors
*/
- Assert(saveState.currElem && !(saveState.currElem = 0));
+ Assert(currBucket && !(saveState.currBucket = NULL));
}
else
{
@@ -615,25 +538,22 @@ hash_search(HTAB *hashp,
segment_num = bucket >> hctl->sshift;
segment_ndx = MOD(bucket, hctl->ssize);
- segp = GET_SEG(hashp, segment_num);
+ segp = hashp->dir[segment_num];
Assert(segp);
- prevIndexPtr = &segp[segment_ndx];
- currIndex = *prevIndexPtr;
+ prevBucketPtr = &segp[segment_ndx];
+ currBucket = *prevBucketPtr;
/*
- * Follow collision chain
+ * Follow collision chain looking for matching key
*/
- for (curr = NULL; currIndex != INVALID_INDEX;)
+ while (currBucket != NULL)
{
- /* coerce bucket index into a pointer */
- curr = GET_BUCKET(hashp, currIndex);
-
- if (!memcmp((char *) &(curr->key), keyPtr, hctl->keysize))
+ if (memcmp(ELEMENTKEY(currBucket), keyPtr, hctl->keysize) == 0)
break;
- prevIndexPtr = &(curr->next);
- currIndex = *prevIndexPtr;
+ prevBucketPtr = &(currBucket->link);
+ currBucket = *prevBucketPtr;
#if HASH_STATISTICS
hash_collisions++;
hashp->hctl->collisions++;
@@ -645,48 +565,52 @@ hash_search(HTAB *hashp,
* if we found an entry or if we weren't trying to insert, we're done
* now.
*/
- *foundPtr = (bool) (currIndex != INVALID_INDEX);
+ *foundPtr = (bool) (currBucket != NULL);
+
switch (action)
{
case HASH_ENTER:
- if (currIndex != INVALID_INDEX)
- return &(curr->key);
+ if (currBucket != NULL)
+ return (void *) ELEMENTKEY(currBucket);
break;
+
case HASH_REMOVE:
case HASH_REMOVE_SAVED:
- if (currIndex != INVALID_INDEX)
+ if (currBucket != NULL)
{
- Assert(hctl->nkeys > 0);
- hctl->nkeys--;
+ Assert(hctl->nentries > 0);
+ hctl->nentries--;
/* remove record from hash bucket's chain. */
- *prevIndexPtr = curr->next;
+ *prevBucketPtr = currBucket->link;
/* add the record to the freelist for this table. */
- curr->next = hctl->freeBucketIndex;
- hctl->freeBucketIndex = currIndex;
+ currBucket->link = hctl->freeList;
+ hctl->freeList = currBucket;
/*
* better hope the caller is synchronizing access to this
* element, because someone else is going to reuse it the
* next time something is added to the table
*/
- return &(curr->key);
+ return (void *) ELEMENTKEY(currBucket);
}
- return (long *) TRUE;
+ return (void *) TRUE;
+
case HASH_FIND:
- if (currIndex != INVALID_INDEX)
- return &(curr->key);
- return (long *) TRUE;
+ if (currBucket != NULL)
+ return (void *) ELEMENTKEY(currBucket);
+ return (void *) TRUE;
+
case HASH_FIND_SAVE:
- if (currIndex != INVALID_INDEX)
+ if (currBucket != NULL)
{
- saveState.currElem = curr;
- saveState.prevIndex = prevIndexPtr;
- saveState.currIndex = currIndex;
- return &(curr->key);
+ saveState.currBucket = currBucket;
+ saveState.prevBucketPtr = prevBucketPtr;
+ return (void *) ELEMENTKEY(currBucket);
}
- return (long *) TRUE;
+ return (void *) TRUE;
+
default:
/* can't get here */
return NULL;
@@ -696,39 +620,36 @@ hash_search(HTAB *hashp,
* If we got here, then we didn't find the element and we have to
* insert it into the hash table
*/
- Assert(currIndex == INVALID_INDEX);
+ Assert(currBucket == NULL);
/* get the next free bucket */
- currIndex = hctl->freeBucketIndex;
- if (currIndex == INVALID_INDEX)
+ currBucket = hctl->freeList;
+ if (currBucket == NULL)
{
/* no free elements. allocate another chunk of buckets */
- if (!bucket_alloc(hashp))
+ if (!element_alloc(hashp))
return NULL;
- currIndex = hctl->freeBucketIndex;
+ currBucket = hctl->freeList;
}
- Assert(currIndex != INVALID_INDEX);
+ Assert(currBucket != NULL);
- curr = GET_BUCKET(hashp, currIndex);
- hctl->freeBucketIndex = curr->next;
+ hctl->freeList = currBucket->link;
/* link into chain */
- *prevIndexPtr = currIndex;
+ *prevBucketPtr = currBucket;
+ currBucket->link = NULL;
/* copy key into record */
- destAddr = (char *) &(curr->key);
- memmove(destAddr, keyPtr, hctl->keysize);
- curr->next = INVALID_INDEX;
+ memcpy(ELEMENTKEY(currBucket), keyPtr, hctl->keysize);
/*
* let the caller initialize the data field after hash_search returns.
*/
- /* memmove(destAddr,keyPtr,hctl->keysize+hctl->datasize); */
/*
* Check if it is time to split the segment
*/
- if (++hctl->nkeys / (hctl->max_bucket + 1) > hctl->ffactor)
+ if (++hctl->nentries / (hctl->max_bucket + 1) > hctl->ffactor)
{
/*
@@ -737,14 +658,15 @@ hash_search(HTAB *hashp,
*/
expand_table(hashp);
}
- return &(curr->key);
+
+ return (void *) ELEMENTKEY(currBucket);
}
/*
* hash_seq_init/_search
* Sequentially search through hash table and return
* all the elements one by one, return NULL on error and
- * return (long *) TRUE in the end.
+ * return (void *) TRUE in the end.
*
* NOTE: caller may delete the returned element before continuing the scan.
* However, deleting any other element while the scan is in progress is
@@ -757,31 +679,31 @@ hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
{
status->hashp = hashp;
status->curBucket = 0;
- status->curIndex = INVALID_INDEX;
+ status->curEntry = NULL;
}
-long *
+void *
hash_seq_search(HASH_SEQ_STATUS *status)
{
HTAB *hashp = status->hashp;
- HHDR *hctl = hashp->hctl;
+ HASHHDR *hctl = hashp->hctl;
while (status->curBucket <= hctl->max_bucket)
{
long segment_num;
long segment_ndx;
- SEGMENT segp;
+ HASHSEGMENT segp;
- if (status->curIndex != INVALID_INDEX)
+ if (status->curEntry != NULL)
{
/* Continuing scan of curBucket... */
- ELEMENT *curElem;
+ HASHELEMENT *curElem;
- curElem = GET_BUCKET(hashp, status->curIndex);
- status->curIndex = curElem->next;
- if (status->curIndex == INVALID_INDEX) /* end of this bucket */
+ curElem = status->curEntry;
+ status->curEntry = curElem->link;
+ if (status->curEntry == NULL) /* end of this bucket */
++status->curBucket;
- return &(curElem->key);
+ return (void *) ELEMENTKEY(curElem);
}
/*
@@ -793,10 +715,10 @@ hash_seq_search(HASH_SEQ_STATUS *status)
/*
* first find the right segment in the table directory.
*/
- segp = GET_SEG(hashp, segment_num);
+ segp = hashp->dir[segment_num];
if (segp == NULL)
/* this is probably an error */
- return (long *) NULL;
+ return NULL;
/*
* now find the right index into the segment for the first item in
@@ -806,13 +728,13 @@ hash_seq_search(HASH_SEQ_STATUS *status)
* directory of valid stuff. if there are elements in the bucket
* chains that point to the freelist we're in big trouble.
*/
- status->curIndex = segp[segment_ndx];
+ status->curEntry = segp[segment_ndx];
- if (status->curIndex == INVALID_INDEX) /* empty bucket */
+ if (status->curEntry == NULL) /* empty bucket */
++status->curBucket;
}
- return (long *) TRUE; /* out of buckets */
+ return (void *) TRUE; /* out of buckets */
}
@@ -821,11 +743,11 @@ hash_seq_search(HASH_SEQ_STATUS *status)
/*
* Expand the table by adding one more hash bucket.
*/
-static int
+static bool
expand_table(HTAB *hashp)
{
- HHDR *hctl;
- SEGMENT old_seg,
+ HASHHDR *hctl = hashp->hctl;
+ HASHSEGMENT old_seg,
new_seg;
long old_bucket,
new_bucket;
@@ -833,18 +755,15 @@ expand_table(HTAB *hashp)
new_segndx;
long old_segnum,
old_segndx;
- ELEMENT *chain;
- BUCKET_INDEX *old,
- *newbi;
- BUCKET_INDEX chainIndex,
- nextIndex;
+ HASHBUCKET *oldlink,
+ *newlink;
+ HASHBUCKET currElement,
+ nextElement;
#ifdef HASH_STATISTICS
hash_expansions++;
#endif
- hctl = hashp->hctl;
-
new_bucket = hctl->max_bucket + 1;
new_segnum = new_bucket >> hctl->sshift;
new_segndx = MOD(new_bucket, hctl->ssize);
@@ -854,9 +773,9 @@ expand_table(HTAB *hashp)
/* Allocate new segment if necessary -- could fail if dir full */
if (new_segnum >= hctl->dsize)
if (!dir_realloc(hashp))
- return 0;
+ return false;
if (!(hashp->dir[new_segnum] = seg_alloc(hashp)))
- return 0;
+ return false;
hctl->nsegs++;
}
@@ -890,137 +809,118 @@ expand_table(HTAB *hashp)
old_segnum = old_bucket >> hctl->sshift;
old_segndx = MOD(old_bucket, hctl->ssize);
- old_seg = GET_SEG(hashp, old_segnum);
- new_seg = GET_SEG(hashp, new_segnum);
+ old_seg = hashp->dir[old_segnum];
+ new_seg = hashp->dir[new_segnum];
- old = &old_seg[old_segndx];
- newbi = &new_seg[new_segndx];
- for (chainIndex = *old;
- chainIndex != INVALID_INDEX;
- chainIndex = nextIndex)
+ oldlink = &old_seg[old_segndx];
+ newlink = &new_seg[new_segndx];
+
+ for (currElement = *oldlink;
+ currElement != NULL;
+ currElement = nextElement)
{
- chain = GET_BUCKET(hashp, chainIndex);
- nextIndex = chain->next;
- if ((long) call_hash(hashp, (char *) &(chain->key)) == old_bucket)
+ nextElement = currElement->link;
+ if ((long) call_hash(hashp, (void *) ELEMENTKEY(currElement))
+ == old_bucket)
{
- *old = chainIndex;
- old = &chain->next;
+ *oldlink = currElement;
+ oldlink = &currElement->link;
}
else
{
- *newbi = chainIndex;
- newbi = &chain->next;
+ *newlink = currElement;
+ newlink = &currElement->link;
}
}
/* don't forget to terminate the rebuilt hash chains... */
- *old = INVALID_INDEX;
- *newbi = INVALID_INDEX;
- return 1;
+ *oldlink = NULL;
+ *newlink = NULL;
+
+ return true;
}
-static int
+static bool
dir_realloc(HTAB *hashp)
{
- char *p;
- char *old_p;
+ HASHSEGMENT *p;
+ HASHSEGMENT *old_p;
long new_dsize;
long old_dirsize;
long new_dirsize;
if (hashp->hctl->max_dsize != NO_MAX_DSIZE)
- return 0;
+ return false;
/* Reallocate directory */
new_dsize = hashp->hctl->dsize << 1;
- old_dirsize = hashp->hctl->dsize * sizeof(SEG_OFFSET);
- new_dirsize = new_dsize * sizeof(SEG_OFFSET);
+ old_dirsize = hashp->hctl->dsize * sizeof(HASHSEGMENT);
+ new_dirsize = new_dsize * sizeof(HASHSEGMENT);
+ old_p = hashp->dir;
CurrentDynaHashCxt = hashp->hcxt;
- old_p = (char *) hashp->dir;
- p = (char *) hashp->alloc((Size) new_dirsize);
+ p = (HASHSEGMENT *) hashp->alloc((Size) new_dirsize);
if (p != NULL)
{
- memmove(p, old_p, old_dirsize);
- MemSet(p + old_dirsize, 0, new_dirsize - old_dirsize);
+ memcpy(p, old_p, old_dirsize);
+ MemSet(((char *) p) + old_dirsize, 0, new_dirsize - old_dirsize);
MEM_FREE((char *) old_p);
- hashp->dir = (SEG_OFFSET *) p;
+ hashp->dir = p;
hashp->hctl->dsize = new_dsize;
- return 1;
+ return true;
}
- return 0;
+
+ return false;
}
-static SEG_OFFSET
+static HASHSEGMENT
seg_alloc(HTAB *hashp)
{
- SEGMENT segp;
- SEG_OFFSET segOffset;
+ HASHSEGMENT segp;
CurrentDynaHashCxt = hashp->hcxt;
- segp = (SEGMENT) hashp->alloc(sizeof(BUCKET_INDEX) * hashp->hctl->ssize);
+ segp = (HASHSEGMENT) hashp->alloc(sizeof(HASHBUCKET) * hashp->hctl->ssize);
if (!segp)
- return 0;
+ return NULL;
- MemSet((char *) segp, 0,
- (long) sizeof(BUCKET_INDEX) * hashp->hctl->ssize);
+ MemSet(segp, 0, sizeof(HASHBUCKET) * hashp->hctl->ssize);
- segOffset = MAKE_HASHOFFSET(hashp, segp);
- return segOffset;
+ return segp;
}
/*
- * allocate some new buckets and link them into the free list
+ * allocate some new elements and link them into the free list
*/
-static int
-bucket_alloc(HTAB *hashp)
+static bool
+element_alloc(HTAB *hashp)
{
+ HASHHDR *hctl = hashp->hctl;
+ Size elementSize;
+ HASHELEMENT *tmpElement;
int i;
- ELEMENT *tmpBucket;
- long bucketSize;
- BUCKET_INDEX tmpIndex,
- lastIndex;
-
- /* Each bucket has a BUCKET_INDEX header plus user data. */
- bucketSize = sizeof(BUCKET_INDEX) + hashp->hctl->keysize + hashp->hctl->datasize;
- /* make sure its aligned correctly */
- bucketSize = MAXALIGN(bucketSize);
+ /* Each element has a HASHELEMENT header plus user data. */
+ elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(hctl->entrysize);
CurrentDynaHashCxt = hashp->hcxt;
- tmpBucket = (ELEMENT *) hashp->alloc(BUCKET_ALLOC_INCR * bucketSize);
-
- if (!tmpBucket)
- return 0;
+ tmpElement = (HASHELEMENT *)
+ hashp->alloc(HASHELEMENT_ALLOC_INCR * elementSize);
- /* tmpIndex is the shmem offset into the first bucket of the array */
- tmpIndex = MAKE_HASHOFFSET(hashp, tmpBucket);
+ if (!tmpElement)
+ return false;
- /* set the freebucket list to point to the first bucket */
- lastIndex = hashp->hctl->freeBucketIndex;
- hashp->hctl->freeBucketIndex = tmpIndex;
-
- /*
- * initialize each bucket to point to the one behind it. NOTE: loop
- * sets last bucket incorrectly; we fix below.
- */
- for (i = 0; i < BUCKET_ALLOC_INCR; i++)
+ /* link all the new entries into the freelist */
+ for (i = 0; i < HASHELEMENT_ALLOC_INCR; i++)
{
- tmpBucket = GET_BUCKET(hashp, tmpIndex);
- tmpIndex += bucketSize;
- tmpBucket->next = tmpIndex;
+ tmpElement->link = hctl->freeList;
+ hctl->freeList = tmpElement;
+ tmpElement = (HASHELEMENT *) (((char *) tmpElement) + elementSize);
}
- /*
- * the last bucket points to the old freelist head (which is probably
- * invalid or we wouldn't be here)
- */
- tmpBucket->next = lastIndex;
-
- return 1;
+ return true;
}
/* calculate ceil(log base 2) of num */
diff --git a/src/backend/utils/hash/hashfn.c b/src/backend/utils/hash/hashfn.c
index 889837b528d..958deee804f 100644
--- a/src/backend/utils/hash/hashfn.c
+++ b/src/backend/utils/hash/hashfn.c
@@ -1,6 +1,7 @@
/*-------------------------------------------------------------------------
*
* hashfn.c
+ * Hash functions for use in dynahash.c hashtables
*
*
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
@@ -8,7 +9,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/hash/hashfn.c,v 1.13 2001/01/24 19:43:15 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/hash/hashfn.c,v 1.14 2001/10/01 05:36:16 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -17,155 +18,100 @@
#include "utils/hsearch.h"
/*
- * Assume that we've already split the bucket to which this
- * key hashes, calculate that bucket, and check that in fact
- * we did already split it.
+ * string_hash: hash function for keys that are null-terminated strings.
+ *
+ * NOTE: since dynahash.c backs this up with a fixed-length memcmp(),
+ * the key must actually be zero-padded to the specified maximum length
+ * to work correctly. However, if it is known that nothing after the
+ * first zero byte is interesting, this is the right hash function to use.
+ *
+ * NOTE: this is the default hash function if none is specified.
*/
long
-string_hash(char *key, int keysize)
+string_hash(void *key, int keysize)
{
- int h;
unsigned char *k = (unsigned char *) key;
+ long h = 0;
- h = 0;
-
- /*
- * Convert string to integer
- */
while (*k)
- h = h * PRIME1 ^ (*k++ - ' ');
+ h = (h * PRIME1) ^ (*k++);
+
h %= PRIME2;
return h;
}
-
+/*
+ * tag_hash: hash function for fixed-size tag values
+ *
+ * NB: we assume that the supplied key is aligned at least on an 'int'
+ * boundary, if its size is >= sizeof(int).
+ */
long
-tag_hash(int *key, int keysize)
+tag_hash(void *key, int keysize)
{
+ int *k = (int *) key;
long h = 0;
/*
- * Convert tag to integer; Use four byte chunks in a "jump table" to
- * go a little faster. Currently the maximum keysize is 16 (mar 17
- * 1992) I have put in cases for up to 24. Bigger than this will
- * resort to the old behavior of the for loop. (see the default case).
+ * Use four byte chunks in a "jump table" to go a little faster.
+ *
+ * Currently the maximum keysize is 16 (mar 17 1992). I have put in
+ * cases for up to 32. Bigger than this will resort to a for loop
+ * (see the default case).
*/
switch (keysize)
{
+ case 8 * sizeof(int):
+ h = (h * PRIME1) ^ (*k++);
+ /* fall through */
+
+ case 7 * sizeof(int):
+ h = (h * PRIME1) ^ (*k++);
+ /* fall through */
+
case 6 * sizeof(int):
- h = h * PRIME1 ^ (*key);
- key++;
+ h = (h * PRIME1) ^ (*k++);
/* fall through */
case 5 * sizeof(int):
- h = h * PRIME1 ^ (*key);
- key++;
+ h = (h * PRIME1) ^ (*k++);
/* fall through */
case 4 * sizeof(int):
- h = h * PRIME1 ^ (*key);
- key++;
+ h = (h * PRIME1) ^ (*k++);
/* fall through */
case 3 * sizeof(int):
- h = h * PRIME1 ^ (*key);
- key++;
+ h = (h * PRIME1) ^ (*k++);
/* fall through */
case 2 * sizeof(int):
- h = h * PRIME1 ^ (*key);
- key++;
+ h = (h * PRIME1) ^ (*k++);
/* fall through */
case sizeof(int):
- h = h * PRIME1 ^ (*key);
- key++;
+ h = (h * PRIME1) ^ (*k++);
break;
default:
- for (; keysize >= (int) sizeof(int); keysize -= sizeof(int), key++)
- h = h * PRIME1 ^ (*key);
-
- /*
- * now let's grab the last few bytes of the tag if the tag has
- * (size % 4) != 0 (which it sometimes will on a sun3).
- */
- if (keysize)
+ /* Do an int at a time */
+ for (; keysize >= (int) sizeof(int); keysize -= sizeof(int))
+ h = (h * PRIME1) ^ (*k++);
+
+ /* Cope with any partial-int leftover bytes */
+ if (keysize > 0)
{
- char *keytmp = (char *) key;
-
- switch (keysize)
- {
- case 3:
- h = h * PRIME1 ^ (*keytmp);
- keytmp++;
- /* fall through */
- case 2:
- h = h * PRIME1 ^ (*keytmp);
- keytmp++;
- /* fall through */
- case 1:
- h = h * PRIME1 ^ (*keytmp);
- break;
- }
+ unsigned char *keybyte = (unsigned char *) k;
+
+ do
+ h = (h * PRIME1) ^ (*keybyte++);
+ while (--keysize > 0);
}
break;
}
h %= PRIME2;
- return h;
-}
-
-/*
- * This is INCREDIBLY ugly, but fast.
- * We break the string up into 8 byte units. On the first time
- * through the loop we get the "leftover bytes" (strlen % 8).
- * On every other iteration, we perform 8 HASHC's so we handle
- * all 8 bytes. Essentially, this saves us 7 cmp & branch
- * instructions. If this routine is heavily used enough, it's
- * worth the ugly coding
- */
-#ifdef NOT_USED
-long
-disk_hash(char *key)
-{
- int n = 0;
- char *str = key;
- int len = strlen(key);
- int loop;
-#define HASHC n = *str++ + 65599 * n
-
- if (len > 0)
- {
- loop = (len + 8 - 1) >> 3;
-
- switch (len & (8 - 1))
- {
- case 0:
- do
- { /* All fall throughs */
- HASHC;
- case 7:
- HASHC;
- case 6:
- HASHC;
- case 5:
- HASHC;
- case 4:
- HASHC;
- case 3:
- HASHC;
- case 2:
- HASHC;
- case 1:
- HASHC;
- } while (--loop);
- }
-
- }
- return n;
+ return h;
}
-
-#endif