summaryrefslogtreecommitdiff
path: root/src/backend/access
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2002-03-09 17:35:37 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2002-03-09 17:35:37 +0000
commitc422b5ca6b0dd9b8a2d1d7b8b437e14f3ca79052 (patch)
treefc242c201ff8748e98bfbb610b312205c44b769b /src/backend/access
parent1eb31d197d8eadc5340f0dfe7e2c7169e1005275 (diff)
Code review for improved-hashing patch. Fix some portability issues
(char != unsigned char, Datum != uint32); make use of new hash code in dynahash hash tables and hash joins.
Diffstat (limited to 'src/backend/access')
-rw-r--r--src/backend/access/hash/hash.c5
-rw-r--r--src/backend/access/hash/hashfunc.c113
2 files changed, 61 insertions, 57 deletions
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index ebad04c9fe5..c0a22b62456 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.55 2002/03/06 20:49:37 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.56 2002/03/09 17:35:35 tgl Exp $
*
* NOTES
* This file contains only the public interface routines.
@@ -164,6 +164,9 @@ hashinsert(PG_FUNCTION_ARGS)
Datum *datum = (Datum *) PG_GETARG_POINTER(1);
char *nulls = (char *) PG_GETARG_POINTER(2);
ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
+#ifdef NOT_USED
+ Relation heapRel = (Relation) PG_GETARG_POINTER(4);
+#endif
InsertIndexResult res;
HashItem hitem;
diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c
index 2e0f181939d..b1e3fbf4a08 100644
--- a/src/backend/access/hash/hashfunc.c
+++ b/src/backend/access/hash/hashfunc.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.32 2002/03/06 20:49:38 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.33 2002/03/09 17:35:35 tgl Exp $
*
* NOTES
* These functions are stored in pg_amproc. For each operator class
@@ -58,7 +58,7 @@ hashfloat4(PG_FUNCTION_ARGS)
{
float4 key = PG_GETARG_FLOAT4(0);
- return hash_any((char *) &key, sizeof(key));
+ return hash_any((unsigned char *) &key, sizeof(key));
}
Datum
@@ -66,7 +66,7 @@ hashfloat8(PG_FUNCTION_ARGS)
{
float8 key = PG_GETARG_FLOAT8(0);
- return hash_any((char *) &key, sizeof(key));
+ return hash_any((unsigned char *) &key, sizeof(key));
}
Datum
@@ -74,7 +74,7 @@ hashoidvector(PG_FUNCTION_ARGS)
{
Oid *key = (Oid *) PG_GETARG_POINTER(0);
- return hash_any((char *) key, INDEX_MAX_KEYS * sizeof(Oid));
+ return hash_any((unsigned char *) key, INDEX_MAX_KEYS * sizeof(Oid));
}
/*
@@ -87,17 +87,18 @@ hashint2vector(PG_FUNCTION_ARGS)
{
int16 *key = (int16 *) PG_GETARG_POINTER(0);
- return hash_any((char *) key, INDEX_MAX_KEYS * sizeof(int16));
+ return hash_any((unsigned char *) key, INDEX_MAX_KEYS * sizeof(int16));
}
Datum
hashname(PG_FUNCTION_ARGS)
{
char *key = NameStr(*PG_GETARG_NAME(0));
+ int keylen = strlen(key);
- Assert(strlen(key) <= NAMEDATALEN);
+ Assert(keylen < NAMEDATALEN); /* else it's not truncated correctly */
- return hash_any(key, strlen(key));
+ return hash_any((unsigned char *) key, keylen);
}
/*
@@ -110,7 +111,8 @@ hashvarlena(PG_FUNCTION_ARGS)
struct varlena *key = PG_GETARG_VARLENA_P(0);
Datum result;
- result = hash_any(VARDATA(key), VARSIZE(key) - VARHDRSZ);
+ result = hash_any((unsigned char *) VARDATA(key),
+ VARSIZE(key) - VARHDRSZ);
/* Avoid leaking memory for toasted inputs */
PG_FREE_IF_COPY(key, 0);
@@ -118,13 +120,15 @@ hashvarlena(PG_FUNCTION_ARGS)
return result;
}
-/* This hash function was written by Bob Jenkins
+/*
+ * This hash function was written by Bob Jenkins
* (bob_jenkins@burtleburtle.net), and superficially adapted
* for PostgreSQL by Neil Conway. For more information on this
- * hash function, see http://burtleburtle.net/bob/hash/doobs.html
+ * hash function, see http://burtleburtle.net/bob/hash/doobs.html,
+ * or Bob's article in Dr. Dobb's Journal, Sept. 1997.
*/
-/*
+/*----------
* mix -- mix 3 32-bit values reversibly.
* For every delta with one or two bits set, and the deltas of all three
* high bits or all three low bits, whether the original value of a,b,c
@@ -133,6 +137,7 @@ hashvarlena(PG_FUNCTION_ARGS)
* have at least 1/4 probability of changing.
* - If mix() is run forward, every bit of c will change between 1/3 and
* 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
+ *----------
*/
#define mix(a,b,c) \
{ \
@@ -151,56 +156,52 @@ hashvarlena(PG_FUNCTION_ARGS)
* hash_any() -- hash a variable-length key into a 32-bit value
* k : the key (the unaligned variable-length array of bytes)
* len : the length of the key, counting by bytes
- * Returns a 32-bit value. Every bit of the key affects every bit of
+ *
+ * Returns a uint32 value. Every bit of the key affects every bit of
* the return value. Every 1-bit and 2-bit delta achieves avalanche.
* About 6*len+35 instructions. The best hash table sizes are powers
* of 2. There is no need to do mod a prime (mod is sooo slow!).
* If you need less than 32 bits, use a bitmask.
*/
Datum
-hash_any(register const char *k, register int keylen)
+hash_any(register const unsigned char *k, register int keylen)
{
- register Datum a,b,c,len;
-
- /* Set up the internal state */
- len = keylen;
- a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
- /* Another arbitrary value. If the hash function is called
- * multiple times, this could be the previously generated
- * hash value; however, the interface currently doesn't allow
- * this. AFAIK this isn't a big deal.
- */
- c = 3923095;
-
- /* handle most of the key */
- while (len >= 12)
- {
- a += (k[0] +((Datum)k[1]<<8) +((Datum)k[2]<<16) +((Datum)k[3]<<24));
- b += (k[4] +((Datum)k[5]<<8) +((Datum)k[6]<<16) +((Datum)k[7]<<24));
- c += (k[8] +((Datum)k[9]<<8) +((Datum)k[10]<<16)+((Datum)k[11]<<24));
- mix(a,b,c);
- k += 12; len -= 12;
- }
-
- /* handle the last 11 bytes */
- c += keylen;
- switch(len) /* all the case statements fall through */
- {
- case 11: c+=((Datum)k[10]<<24);
- case 10: c+=((Datum)k[9]<<16);
- case 9 : c+=((Datum)k[8]<<8);
- /* the first byte of c is reserved for the length */
- case 8 : b+=((Datum)k[7]<<24);
- case 7 : b+=((Datum)k[6]<<16);
- case 6 : b+=((Datum)k[5]<<8);
- case 5 : b+=k[4];
- case 4 : a+=((Datum)k[3]<<24);
- case 3 : a+=((Datum)k[2]<<16);
- case 2 : a+=((Datum)k[1]<<8);
- case 1 : a+=k[0];
- /* case 0: nothing left to add */
- }
- mix(a,b,c);
- /* report the result */
- return c;
+ register uint32 a,b,c,len;
+
+ /* Set up the internal state */
+ len = keylen;
+ a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
+ c = 3923095; /* initialize with an arbitrary value */
+
+ /* handle most of the key */
+ while (len >= 12)
+ {
+ a += (k[0] +((uint32)k[1]<<8) +((uint32)k[2]<<16) +((uint32)k[3]<<24));
+ b += (k[4] +((uint32)k[5]<<8) +((uint32)k[6]<<16) +((uint32)k[7]<<24));
+ c += (k[8] +((uint32)k[9]<<8) +((uint32)k[10]<<16)+((uint32)k[11]<<24));
+ mix(a,b,c);
+ k += 12; len -= 12;
+ }
+
+ /* handle the last 11 bytes */
+ c += keylen;
+ switch (len) /* all the case statements fall through */
+ {
+ case 11: c+=((uint32)k[10]<<24);
+ case 10: c+=((uint32)k[9]<<16);
+ case 9 : c+=((uint32)k[8]<<8);
+ /* the first byte of c is reserved for the length */
+ case 8 : b+=((uint32)k[7]<<24);
+ case 7 : b+=((uint32)k[6]<<16);
+ case 6 : b+=((uint32)k[5]<<8);
+ case 5 : b+=k[4];
+ case 4 : a+=((uint32)k[3]<<24);
+ case 3 : a+=((uint32)k[2]<<16);
+ case 2 : a+=((uint32)k[1]<<8);
+ case 1 : a+=k[0];
+ /* case 0: nothing left to add */
+ }
+ mix(a,b,c);
+ /* report the result */
+ return UInt32GetDatum(c);
}