diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2002-03-09 17:35:37 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2002-03-09 17:35:37 +0000 |
commit | c422b5ca6b0dd9b8a2d1d7b8b437e14f3ca79052 (patch) | |
tree | fc242c201ff8748e98bfbb610b312205c44b769b /src/backend/access | |
parent | 1eb31d197d8eadc5340f0dfe7e2c7169e1005275 (diff) |
Code review for improved-hashing patch. Fix some portability issues
(char != unsigned char, Datum != uint32); make use of new hash code in
dynahash hash tables and hash joins.
Diffstat (limited to 'src/backend/access')
-rw-r--r-- | src/backend/access/hash/hash.c | 5 | ||||
-rw-r--r-- | src/backend/access/hash/hashfunc.c | 113 |
2 files changed, 61 insertions, 57 deletions
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index ebad04c9fe5..c0a22b62456 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.55 2002/03/06 20:49:37 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.56 2002/03/09 17:35:35 tgl Exp $ * * NOTES * This file contains only the public interface routines. @@ -164,6 +164,9 @@ hashinsert(PG_FUNCTION_ARGS) Datum *datum = (Datum *) PG_GETARG_POINTER(1); char *nulls = (char *) PG_GETARG_POINTER(2); ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); +#ifdef NOT_USED + Relation heapRel = (Relation) PG_GETARG_POINTER(4); +#endif InsertIndexResult res; HashItem hitem; diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index 2e0f181939d..b1e3fbf4a08 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.32 2002/03/06 20:49:38 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.33 2002/03/09 17:35:35 tgl Exp $ * * NOTES * These functions are stored in pg_amproc. For each operator class @@ -58,7 +58,7 @@ hashfloat4(PG_FUNCTION_ARGS) { float4 key = PG_GETARG_FLOAT4(0); - return hash_any((char *) &key, sizeof(key)); + return hash_any((unsigned char *) &key, sizeof(key)); } Datum @@ -66,7 +66,7 @@ hashfloat8(PG_FUNCTION_ARGS) { float8 key = PG_GETARG_FLOAT8(0); - return hash_any((char *) &key, sizeof(key)); + return hash_any((unsigned char *) &key, sizeof(key)); } Datum @@ -74,7 +74,7 @@ hashoidvector(PG_FUNCTION_ARGS) { Oid *key = (Oid *) PG_GETARG_POINTER(0); - return hash_any((char *) key, INDEX_MAX_KEYS * sizeof(Oid)); + return hash_any((unsigned char *) key, INDEX_MAX_KEYS * sizeof(Oid)); } /* @@ -87,17 +87,18 @@ hashint2vector(PG_FUNCTION_ARGS) { int16 *key = (int16 *) PG_GETARG_POINTER(0); - return hash_any((char *) key, INDEX_MAX_KEYS * sizeof(int16)); + return hash_any((unsigned char *) key, INDEX_MAX_KEYS * sizeof(int16)); } Datum hashname(PG_FUNCTION_ARGS) { char *key = NameStr(*PG_GETARG_NAME(0)); + int keylen = strlen(key); - Assert(strlen(key) <= NAMEDATALEN); + Assert(keylen < NAMEDATALEN); /* else it's not truncated correctly */ - return hash_any(key, strlen(key)); + return hash_any((unsigned char *) key, keylen); } /* @@ -110,7 +111,8 @@ hashvarlena(PG_FUNCTION_ARGS) struct varlena *key = PG_GETARG_VARLENA_P(0); Datum result; - result = hash_any(VARDATA(key), VARSIZE(key) - VARHDRSZ); + result = hash_any((unsigned char *) VARDATA(key), + VARSIZE(key) - VARHDRSZ); /* Avoid leaking memory for toasted inputs */ PG_FREE_IF_COPY(key, 0); @@ -118,13 +120,15 @@ hashvarlena(PG_FUNCTION_ARGS) return result; } -/* This hash function was written by Bob Jenkins +/* + * This hash function was written by Bob Jenkins * (bob_jenkins@burtleburtle.net), and superficially adapted * for PostgreSQL by Neil Conway. For more information on this - * hash function, see http://burtleburtle.net/bob/hash/doobs.html + * hash function, see http://burtleburtle.net/bob/hash/doobs.html, + * or Bob's article in Dr. Dobb's Journal, Sept. 1997. */ -/* +/*---------- * mix -- mix 3 32-bit values reversibly. * For every delta with one or two bits set, and the deltas of all three * high bits or all three low bits, whether the original value of a,b,c @@ -133,6 +137,7 @@ hashvarlena(PG_FUNCTION_ARGS) * have at least 1/4 probability of changing. * - If mix() is run forward, every bit of c will change between 1/3 and * 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.) + *---------- */ #define mix(a,b,c) \ { \ @@ -151,56 +156,52 @@ hashvarlena(PG_FUNCTION_ARGS) * hash_any() -- hash a variable-length key into a 32-bit value * k : the key (the unaligned variable-length array of bytes) * len : the length of the key, counting by bytes - * Returns a 32-bit value. Every bit of the key affects every bit of + * + * Returns a uint32 value. Every bit of the key affects every bit of * the return value. Every 1-bit and 2-bit delta achieves avalanche. * About 6*len+35 instructions. The best hash table sizes are powers * of 2. There is no need to do mod a prime (mod is sooo slow!). * If you need less than 32 bits, use a bitmask. */ Datum -hash_any(register const char *k, register int keylen) +hash_any(register const unsigned char *k, register int keylen) { - register Datum a,b,c,len; - - /* Set up the internal state */ - len = keylen; - a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */ - /* Another arbitrary value. If the hash function is called - * multiple times, this could be the previously generated - * hash value; however, the interface currently doesn't allow - * this. AFAIK this isn't a big deal. - */ - c = 3923095; - - /* handle most of the key */ - while (len >= 12) - { - a += (k[0] +((Datum)k[1]<<8) +((Datum)k[2]<<16) +((Datum)k[3]<<24)); - b += (k[4] +((Datum)k[5]<<8) +((Datum)k[6]<<16) +((Datum)k[7]<<24)); - c += (k[8] +((Datum)k[9]<<8) +((Datum)k[10]<<16)+((Datum)k[11]<<24)); - mix(a,b,c); - k += 12; len -= 12; - } - - /* handle the last 11 bytes */ - c += keylen; - switch(len) /* all the case statements fall through */ - { - case 11: c+=((Datum)k[10]<<24); - case 10: c+=((Datum)k[9]<<16); - case 9 : c+=((Datum)k[8]<<8); - /* the first byte of c is reserved for the length */ - case 8 : b+=((Datum)k[7]<<24); - case 7 : b+=((Datum)k[6]<<16); - case 6 : b+=((Datum)k[5]<<8); - case 5 : b+=k[4]; - case 4 : a+=((Datum)k[3]<<24); - case 3 : a+=((Datum)k[2]<<16); - case 2 : a+=((Datum)k[1]<<8); - case 1 : a+=k[0]; - /* case 0: nothing left to add */ - } - mix(a,b,c); - /* report the result */ - return c; + register uint32 a,b,c,len; + + /* Set up the internal state */ + len = keylen; + a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */ + c = 3923095; /* initialize with an arbitrary value */ + + /* handle most of the key */ + while (len >= 12) + { + a += (k[0] +((uint32)k[1]<<8) +((uint32)k[2]<<16) +((uint32)k[3]<<24)); + b += (k[4] +((uint32)k[5]<<8) +((uint32)k[6]<<16) +((uint32)k[7]<<24)); + c += (k[8] +((uint32)k[9]<<8) +((uint32)k[10]<<16)+((uint32)k[11]<<24)); + mix(a,b,c); + k += 12; len -= 12; + } + + /* handle the last 11 bytes */ + c += keylen; + switch (len) /* all the case statements fall through */ + { + case 11: c+=((uint32)k[10]<<24); + case 10: c+=((uint32)k[9]<<16); + case 9 : c+=((uint32)k[8]<<8); + /* the first byte of c is reserved for the length */ + case 8 : b+=((uint32)k[7]<<24); + case 7 : b+=((uint32)k[6]<<16); + case 6 : b+=((uint32)k[5]<<8); + case 5 : b+=k[4]; + case 4 : a+=((uint32)k[3]<<24); + case 3 : a+=((uint32)k[2]<<16); + case 2 : a+=((uint32)k[1]<<8); + case 1 : a+=k[0]; + /* case 0: nothing left to add */ + } + mix(a,b,c); + /* report the result */ + return UInt32GetDatum(c); } |