summaryrefslogtreecommitdiff
path: root/contrib/pg_trgm/trgm_gist.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/pg_trgm/trgm_gist.c')
-rw-r--r--contrib/pg_trgm/trgm_gist.c227
1 files changed, 127 insertions, 100 deletions
diff --git a/contrib/pg_trgm/trgm_gist.c b/contrib/pg_trgm/trgm_gist.c
index 35a75c60668..9937ef92531 100644
--- a/contrib/pg_trgm/trgm_gist.c
+++ b/contrib/pg_trgm/trgm_gist.c
@@ -3,11 +3,23 @@
*/
#include "postgres.h"
+#include "access/reloptions.h"
#include "access/stratnum.h"
#include "fmgr.h"
#include "port/pg_bitutils.h"
#include "trgm.h"
+/* gist_trgm_ops opclass options */
+typedef struct
+{
+ int32 vl_len_; /* varlena header (do not touch directly!) */
+ int siglen; /* signature length in bytes */
+} TrgmGistOptions;
+
+#define LTREE_GET_ASIGLEN() (PG_HAS_OPCLASS_OPTIONS() ? \
+ ((TrgmGistOptions *) PG_GET_OPCLASS_OPTIONS())->siglen : \
+ SIGLEN_DEFAULT)
+
typedef struct
{
/* most recent inputs to gtrgm_consistent */
@@ -37,6 +49,7 @@ PG_FUNCTION_INFO_V1(gtrgm_union);
PG_FUNCTION_INFO_V1(gtrgm_same);
PG_FUNCTION_INFO_V1(gtrgm_penalty);
PG_FUNCTION_INFO_V1(gtrgm_picksplit);
+PG_FUNCTION_INFO_V1(gtrgm_options);
Datum
@@ -53,20 +66,41 @@ gtrgm_out(PG_FUNCTION_ARGS)
PG_RETURN_DATUM(0);
}
+static TRGM *
+gtrgm_alloc(bool isalltrue, int siglen, BITVECP sign)
+{
+ int flag = SIGNKEY | (isalltrue ? ALLISTRUE : 0);
+ int size = CALCGTSIZE(flag, siglen);
+ TRGM *res = palloc(size);
+
+ SET_VARSIZE(res, size);
+ res->flag = flag;
+
+ if (!isalltrue)
+ {
+ if (sign)
+ memcpy(GETSIGN(res), sign, siglen);
+ else
+ memset(GETSIGN(res), 0, siglen);
+ }
+
+ return res;
+}
+
static void
-makesign(BITVECP sign, TRGM *a)
+makesign(BITVECP sign, TRGM *a, int siglen)
{
int32 k,
len = ARRNELEM(a);
trgm *ptr = GETARR(a);
int32 tmp = 0;
- MemSet((void *) sign, 0, sizeof(BITVEC));
- SETBIT(sign, SIGLENBIT); /* set last unused bit */
+ MemSet((void *) sign, 0, siglen);
+ SETBIT(sign, SIGLENBIT(siglen)); /* set last unused bit */
for (k = 0; k < len; k++)
{
CPTRGM(((char *) &tmp), ptr + k);
- HASH(sign, tmp);
+ HASH(sign, tmp, siglen);
}
}
@@ -74,6 +108,7 @@ Datum
gtrgm_compress(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+ int siglen = LTREE_GET_ASIGLEN();
GISTENTRY *retval = entry;
if (entry->leafkey)
@@ -90,22 +125,17 @@ gtrgm_compress(PG_FUNCTION_ARGS)
else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
!ISALLTRUE(DatumGetPointer(entry->key)))
{
- int32 i,
- len;
+ int32 i;
TRGM *res;
BITVECP sign = GETSIGN(DatumGetPointer(entry->key));
- LOOPBYTE
+ LOOPBYTE(siglen)
{
if ((sign[i] & 0xff) != 0xff)
PG_RETURN_POINTER(retval);
}
- len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
- res = (TRGM *) palloc(len);
- SET_VARSIZE(res, len);
- res->flag = SIGNKEY | ALLISTRUE;
-
+ res = gtrgm_alloc(true, siglen, sign);
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(res),
entry->rel, entry->page,
@@ -139,7 +169,7 @@ gtrgm_decompress(PG_FUNCTION_ARGS)
}
static int32
-cnt_sml_sign_common(TRGM *qtrg, BITVECP sign)
+cnt_sml_sign_common(TRGM *qtrg, BITVECP sign, int siglen)
{
int32 count = 0;
int32 k,
@@ -150,7 +180,7 @@ cnt_sml_sign_common(TRGM *qtrg, BITVECP sign)
for (k = 0; k < len; k++)
{
CPTRGM(((char *) &tmp), ptr + k);
- count += GETBIT(sign, HASHVAL(tmp));
+ count += GETBIT(sign, HASHVAL(tmp, siglen));
}
return count;
@@ -165,6 +195,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
/* Oid subtype = PG_GETARG_OID(3); */
bool *recheck = (bool *) PG_GETARG_POINTER(4);
+ int siglen = LTREE_GET_ASIGLEN();
TRGM *key = (TRGM *) DatumGetPointer(entry->key);
TRGM *qtrg;
bool res;
@@ -292,7 +323,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
}
else
{ /* non-leaf contains signature */
- int32 count = cnt_sml_sign_common(qtrg, GETSIGN(key));
+ int32 count = cnt_sml_sign_common(qtrg, GETSIGN(key), siglen);
int32 len = ARRNELEM(qtrg);
if (len == 0)
@@ -334,7 +365,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
for (k = 0; k < len; k++)
{
CPTRGM(((char *) &tmp), ptr + k);
- if (!GETBIT(sign, HASHVAL(tmp)))
+ if (!GETBIT(sign, HASHVAL(tmp, siglen)))
{
res = false;
break;
@@ -387,7 +418,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
for (k = 0; k < len; k++)
{
CPTRGM(((char *) &tmp), ptr + k);
- check[k] = GETBIT(sign, HASHVAL(tmp));
+ check[k] = GETBIT(sign, HASHVAL(tmp, siglen));
}
res = trigramsMatchGraph(cache->graph, check);
pfree(check);
@@ -417,6 +448,7 @@ gtrgm_distance(PG_FUNCTION_ARGS)
/* Oid subtype = PG_GETARG_OID(3); */
bool *recheck = (bool *) PG_GETARG_POINTER(4);
+ int siglen = LTREE_GET_ASIGLEN();
TRGM *key = (TRGM *) DatumGetPointer(entry->key);
TRGM *qtrg;
float8 res;
@@ -474,7 +506,7 @@ gtrgm_distance(PG_FUNCTION_ARGS)
}
else
{ /* non-leaf contains signature */
- int32 count = cnt_sml_sign_common(qtrg, GETSIGN(key));
+ int32 count = cnt_sml_sign_common(qtrg, GETSIGN(key), siglen);
int32 len = ARRNELEM(qtrg);
res = (len == 0) ? -1.0 : 1.0 - ((float8) count) / ((float8) len);
@@ -490,7 +522,7 @@ gtrgm_distance(PG_FUNCTION_ARGS)
}
static int32
-unionkey(BITVECP sbase, TRGM *add)
+unionkey(BITVECP sbase, TRGM *add, int siglen)
{
int32 i;
@@ -501,7 +533,7 @@ unionkey(BITVECP sbase, TRGM *add)
if (ISALLTRUE(add))
return 1;
- LOOPBYTE
+ LOOPBYTE(siglen)
sbase[i] |= sadd[i];
}
else
@@ -512,7 +544,7 @@ unionkey(BITVECP sbase, TRGM *add)
for (i = 0; i < ARRNELEM(add); i++)
{
CPTRGM(((char *) &tmp), ptr + i);
- HASH(sbase, tmp);
+ HASH(sbase, tmp, siglen);
}
}
return 0;
@@ -525,29 +557,22 @@ gtrgm_union(PG_FUNCTION_ARGS)
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
int32 len = entryvec->n;
int *size = (int *) PG_GETARG_POINTER(1);
- BITVEC base;
+ int siglen = LTREE_GET_ASIGLEN();
int32 i;
- int32 flag = 0;
- TRGM *result;
+ TRGM *result = gtrgm_alloc(false, siglen, NULL);
+ BITVECP base = GETSIGN(result);
- MemSet((void *) base, 0, sizeof(BITVEC));
for (i = 0; i < len; i++)
{
- if (unionkey(base, GETENTRY(entryvec, i)))
+ if (unionkey(base, GETENTRY(entryvec, i), siglen))
{
- flag = ALLISTRUE;
+ result->flag = ALLISTRUE;
+ SET_VARSIZE(result, CALCGTSIZE(ALLISTRUE, siglen));
break;
}
}
- flag |= SIGNKEY;
- len = CALCGTSIZE(flag, 0);
- result = (TRGM *) palloc(len);
- SET_VARSIZE(result, len);
- result->flag = flag;
- if (!ISALLTRUE(result))
- memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
- *size = len;
+ *size = VARSIZE(result);
PG_RETURN_POINTER(result);
}
@@ -558,6 +583,7 @@ gtrgm_same(PG_FUNCTION_ARGS)
TRGM *a = (TRGM *) PG_GETARG_POINTER(0);
TRGM *b = (TRGM *) PG_GETARG_POINTER(1);
bool *result = (bool *) PG_GETARG_POINTER(2);
+ int siglen = LTREE_GET_ASIGLEN();
if (ISSIGNKEY(a))
{ /* then b also ISSIGNKEY */
@@ -574,7 +600,7 @@ gtrgm_same(PG_FUNCTION_ARGS)
sb = GETSIGN(b);
*result = true;
- LOOPBYTE
+ LOOPBYTE(siglen)
{
if (sa[i] != sb[i])
{
@@ -611,19 +637,19 @@ gtrgm_same(PG_FUNCTION_ARGS)
}
static int32
-sizebitvec(BITVECP sign)
+sizebitvec(BITVECP sign, int siglen)
{
- return pg_popcount(sign, SIGLEN);
+ return pg_popcount(sign, siglen);
}
static int
-hemdistsign(BITVECP a, BITVECP b)
+hemdistsign(BITVECP a, BITVECP b, int siglen)
{
int i,
diff,
dist = 0;
- LOOPBYTE
+ LOOPBYTE(siglen)
{
diff = (unsigned char) (a[i] ^ b[i]);
/* Using the popcount functions here isn't likely to win */
@@ -633,19 +659,19 @@ hemdistsign(BITVECP a, BITVECP b)
}
static int
-hemdist(TRGM *a, TRGM *b)
+hemdist(TRGM *a, TRGM *b, int siglen)
{
if (ISALLTRUE(a))
{
if (ISALLTRUE(b))
return 0;
else
- return SIGLENBIT - sizebitvec(GETSIGN(b));
+ return SIGLENBIT(siglen) - sizebitvec(GETSIGN(b), siglen);
}
else if (ISALLTRUE(b))
- return SIGLENBIT - sizebitvec(GETSIGN(a));
+ return SIGLENBIT(siglen) - sizebitvec(GETSIGN(a), siglen);
- return hemdistsign(GETSIGN(a), GETSIGN(b));
+ return hemdistsign(GETSIGN(a), GETSIGN(b), siglen);
}
Datum
@@ -654,6 +680,7 @@ gtrgm_penalty(PG_FUNCTION_ARGS)
GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
float *penalty = (float *) PG_GETARG_POINTER(2);
+ int siglen = LTREE_GET_ASIGLEN();
TRGM *origval = (TRGM *) DatumGetPointer(origentry->key);
TRGM *newval = (TRGM *) DatumGetPointer(newentry->key);
BITVECP orig = GETSIGN(origval);
@@ -663,7 +690,7 @@ gtrgm_penalty(PG_FUNCTION_ARGS)
if (ISARRKEY(newval))
{
char *cache = (char *) fcinfo->flinfo->fn_extra;
- TRGM *cachedVal = (TRGM *) (cache + MAXALIGN(sizeof(BITVEC)));
+ TRGM *cachedVal = (TRGM *) (cache + MAXALIGN(siglen));
Size newvalsize = VARSIZE(newval);
BITVECP sign;
@@ -677,12 +704,12 @@ gtrgm_penalty(PG_FUNCTION_ARGS)
char *newcache;
newcache = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
- MAXALIGN(sizeof(BITVEC)) +
+ MAXALIGN(siglen) +
newvalsize);
- makesign((BITVECP) newcache, newval);
+ makesign((BITVECP) newcache, newval, siglen);
- cachedVal = (TRGM *) (newcache + MAXALIGN(sizeof(BITVEC)));
+ cachedVal = (TRGM *) (newcache + MAXALIGN(siglen));
memcpy(cachedVal, newval, newvalsize);
if (cache)
@@ -694,31 +721,32 @@ gtrgm_penalty(PG_FUNCTION_ARGS)
sign = (BITVECP) cache;
if (ISALLTRUE(origval))
- *penalty = ((float) (SIGLENBIT - sizebitvec(sign))) / (float) (SIGLENBIT + 1);
+ *penalty = ((float) (SIGLENBIT(siglen) - sizebitvec(sign, siglen))) / (float) (SIGLENBIT(siglen) + 1);
else
- *penalty = hemdistsign(sign, orig);
+ *penalty = hemdistsign(sign, orig, siglen);
}
else
- *penalty = hemdist(origval, newval);
+ *penalty = hemdist(origval, newval, siglen);
PG_RETURN_POINTER(penalty);
}
typedef struct
{
bool allistrue;
- BITVEC sign;
+ BITVECP sign;
} CACHESIGN;
static void
-fillcache(CACHESIGN *item, TRGM *key)
+fillcache(CACHESIGN *item, TRGM *key, BITVECP sign, int siglen)
{
item->allistrue = false;
+ item->sign = sign;
if (ISARRKEY(key))
- makesign(item->sign, key);
+ makesign(item->sign, key, siglen);
else if (ISALLTRUE(key))
item->allistrue = true;
else
- memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
+ memcpy((void *) item->sign, (void *) GETSIGN(key), siglen);
}
#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
@@ -739,19 +767,19 @@ comparecost(const void *a, const void *b)
static int
-hemdistcache(CACHESIGN *a, CACHESIGN *b)
+hemdistcache(CACHESIGN *a, CACHESIGN *b, int siglen)
{
if (a->allistrue)
{
if (b->allistrue)
return 0;
else
- return SIGLENBIT - sizebitvec(b->sign);
+ return SIGLENBIT(siglen) - sizebitvec(b->sign, siglen);
}
else if (b->allistrue)
- return SIGLENBIT - sizebitvec(a->sign);
+ return SIGLENBIT(siglen) - sizebitvec(a->sign, siglen);
- return hemdistsign(a->sign, b->sign);
+ return hemdistsign(a->sign, b->sign, siglen);
}
Datum
@@ -760,6 +788,7 @@ gtrgm_picksplit(PG_FUNCTION_ARGS)
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
OffsetNumber maxoff = entryvec->n - 2;
GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+ int siglen = LTREE_GET_ASIGLEN();
OffsetNumber k,
j;
TRGM *datum_l,
@@ -778,19 +807,23 @@ gtrgm_picksplit(PG_FUNCTION_ARGS)
BITVECP ptr;
int i;
CACHESIGN *cache;
+ char *cache_sign;
SPLITCOST *costvector;
/* cache the sign data for each existing item */
cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
+ cache_sign = palloc(siglen * (maxoff + 2));
+
for (k = FirstOffsetNumber; k <= maxoff; k = OffsetNumberNext(k))
- fillcache(&cache[k], GETENTRY(entryvec, k));
+ fillcache(&cache[k], GETENTRY(entryvec, k), &cache_sign[siglen * k],
+ siglen);
/* now find the two furthest-apart items */
for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
{
for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
{
- size_waste = hemdistcache(&(cache[j]), &(cache[k]));
+ size_waste = hemdistcache(&(cache[j]), &(cache[k]), siglen);
if (size_waste > waste)
{
waste = size_waste;
@@ -815,44 +848,22 @@ gtrgm_picksplit(PG_FUNCTION_ARGS)
v->spl_nright = 0;
/* form initial .. */
- if (cache[seed_1].allistrue)
- {
- datum_l = (TRGM *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
- SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
- datum_l->flag = SIGNKEY | ALLISTRUE;
- }
- else
- {
- datum_l = (TRGM *) palloc(CALCGTSIZE(SIGNKEY, 0));
- SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY, 0));
- datum_l->flag = SIGNKEY;
- memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
- }
- if (cache[seed_2].allistrue)
- {
- datum_r = (TRGM *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
- SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
- datum_r->flag = SIGNKEY | ALLISTRUE;
- }
- else
- {
- datum_r = (TRGM *) palloc(CALCGTSIZE(SIGNKEY, 0));
- SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY, 0));
- datum_r->flag = SIGNKEY;
- memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
- }
+ datum_l = gtrgm_alloc(cache[seed_1].allistrue, siglen, cache[seed_1].sign);
+ datum_r = gtrgm_alloc(cache[seed_2].allistrue, siglen, cache[seed_2].sign);
union_l = GETSIGN(datum_l);
union_r = GETSIGN(datum_r);
maxoff = OffsetNumberNext(maxoff);
- fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
+ fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff),
+ &cache_sign[siglen * maxoff], siglen);
+
/* sort before ... */
costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
{
costvector[j - 1].pos = j;
- size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
- size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]));
+ size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]), siglen);
+ size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]), siglen);
costvector[j - 1].cost = abs(size_alpha - size_beta);
}
qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
@@ -878,36 +889,38 @@ gtrgm_picksplit(PG_FUNCTION_ARGS)
if (ISALLTRUE(datum_l) && cache[j].allistrue)
size_alpha = 0;
else
- size_alpha = SIGLENBIT -
+ size_alpha = SIGLENBIT(siglen) -
sizebitvec((cache[j].allistrue) ? GETSIGN(datum_l) :
- GETSIGN(cache[j].sign));
+ GETSIGN(cache[j].sign),
+ siglen);
}
else
- size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l));
+ size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l), siglen);
if (ISALLTRUE(datum_r) || cache[j].allistrue)
{
if (ISALLTRUE(datum_r) && cache[j].allistrue)
size_beta = 0;
else
- size_beta = SIGLENBIT -
+ size_beta = SIGLENBIT(siglen) -
sizebitvec((cache[j].allistrue) ? GETSIGN(datum_r) :
- GETSIGN(cache[j].sign));
+ GETSIGN(cache[j].sign),
+ siglen);
}
else
- size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r));
+ size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r), siglen);
if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1))
{
if (ISALLTRUE(datum_l) || cache[j].allistrue)
{
if (!ISALLTRUE(datum_l))
- MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
+ MemSet((void *) GETSIGN(datum_l), 0xff, siglen);
}
else
{
ptr = cache[j].sign;
- LOOPBYTE
+ LOOPBYTE(siglen)
union_l[i] |= ptr[i];
}
*left++ = j;
@@ -918,12 +931,12 @@ gtrgm_picksplit(PG_FUNCTION_ARGS)
if (ISALLTRUE(datum_r) || cache[j].allistrue)
{
if (!ISALLTRUE(datum_r))
- MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
+ MemSet((void *) GETSIGN(datum_r), 0xff, siglen);
}
else
{
ptr = cache[j].sign;
- LOOPBYTE
+ LOOPBYTE(siglen)
union_r[i] |= ptr[i];
}
*right++ = j;
@@ -937,3 +950,17 @@ gtrgm_picksplit(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(v);
}
+
+Datum
+gtrgm_options(PG_FUNCTION_ARGS)
+{
+ local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0);
+
+ init_local_reloptions(relopts, sizeof(TrgmGistOptions));
+ add_local_int_reloption(relopts, "siglen",
+ "signature length in bytes",
+ SIGLEN_DEFAULT, 1, SIGLEN_MAX,
+ offsetof(TrgmGistOptions, siglen));
+
+ PG_RETURN_VOID();
+}