diff options
Diffstat (limited to 'contrib/pg_trgm/trgm_gist.c')
-rw-r--r-- | contrib/pg_trgm/trgm_gist.c | 227 |
1 files changed, 127 insertions, 100 deletions
diff --git a/contrib/pg_trgm/trgm_gist.c b/contrib/pg_trgm/trgm_gist.c index 35a75c60668..9937ef92531 100644 --- a/contrib/pg_trgm/trgm_gist.c +++ b/contrib/pg_trgm/trgm_gist.c @@ -3,11 +3,23 @@ */ #include "postgres.h" +#include "access/reloptions.h" #include "access/stratnum.h" #include "fmgr.h" #include "port/pg_bitutils.h" #include "trgm.h" +/* gist_trgm_ops opclass options */ +typedef struct +{ + int32 vl_len_; /* varlena header (do not touch directly!) */ + int siglen; /* signature length in bytes */ +} TrgmGistOptions; + +#define LTREE_GET_ASIGLEN() (PG_HAS_OPCLASS_OPTIONS() ? \ + ((TrgmGistOptions *) PG_GET_OPCLASS_OPTIONS())->siglen : \ + SIGLEN_DEFAULT) + typedef struct { /* most recent inputs to gtrgm_consistent */ @@ -37,6 +49,7 @@ PG_FUNCTION_INFO_V1(gtrgm_union); PG_FUNCTION_INFO_V1(gtrgm_same); PG_FUNCTION_INFO_V1(gtrgm_penalty); PG_FUNCTION_INFO_V1(gtrgm_picksplit); +PG_FUNCTION_INFO_V1(gtrgm_options); Datum @@ -53,20 +66,41 @@ gtrgm_out(PG_FUNCTION_ARGS) PG_RETURN_DATUM(0); } +static TRGM * +gtrgm_alloc(bool isalltrue, int siglen, BITVECP sign) +{ + int flag = SIGNKEY | (isalltrue ? ALLISTRUE : 0); + int size = CALCGTSIZE(flag, siglen); + TRGM *res = palloc(size); + + SET_VARSIZE(res, size); + res->flag = flag; + + if (!isalltrue) + { + if (sign) + memcpy(GETSIGN(res), sign, siglen); + else + memset(GETSIGN(res), 0, siglen); + } + + return res; +} + static void -makesign(BITVECP sign, TRGM *a) +makesign(BITVECP sign, TRGM *a, int siglen) { int32 k, len = ARRNELEM(a); trgm *ptr = GETARR(a); int32 tmp = 0; - MemSet((void *) sign, 0, sizeof(BITVEC)); - SETBIT(sign, SIGLENBIT); /* set last unused bit */ + MemSet((void *) sign, 0, siglen); + SETBIT(sign, SIGLENBIT(siglen)); /* set last unused bit */ for (k = 0; k < len; k++) { CPTRGM(((char *) &tmp), ptr + k); - HASH(sign, tmp); + HASH(sign, tmp, siglen); } } @@ -74,6 +108,7 @@ Datum gtrgm_compress(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + int siglen = LTREE_GET_ASIGLEN(); GISTENTRY *retval = entry; if (entry->leafkey) @@ -90,22 +125,17 @@ gtrgm_compress(PG_FUNCTION_ARGS) else if (ISSIGNKEY(DatumGetPointer(entry->key)) && !ISALLTRUE(DatumGetPointer(entry->key))) { - int32 i, - len; + int32 i; TRGM *res; BITVECP sign = GETSIGN(DatumGetPointer(entry->key)); - LOOPBYTE + LOOPBYTE(siglen) { if ((sign[i] & 0xff) != 0xff) PG_RETURN_POINTER(retval); } - len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0); - res = (TRGM *) palloc(len); - SET_VARSIZE(res, len); - res->flag = SIGNKEY | ALLISTRUE; - + res = gtrgm_alloc(true, siglen, sign); retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), entry->rel, entry->page, @@ -139,7 +169,7 @@ gtrgm_decompress(PG_FUNCTION_ARGS) } static int32 -cnt_sml_sign_common(TRGM *qtrg, BITVECP sign) +cnt_sml_sign_common(TRGM *qtrg, BITVECP sign, int siglen) { int32 count = 0; int32 k, @@ -150,7 +180,7 @@ cnt_sml_sign_common(TRGM *qtrg, BITVECP sign) for (k = 0; k < len; k++) { CPTRGM(((char *) &tmp), ptr + k); - count += GETBIT(sign, HASHVAL(tmp)); + count += GETBIT(sign, HASHVAL(tmp, siglen)); } return count; @@ -165,6 +195,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS) /* Oid subtype = PG_GETARG_OID(3); */ bool *recheck = (bool *) PG_GETARG_POINTER(4); + int siglen = LTREE_GET_ASIGLEN(); TRGM *key = (TRGM *) DatumGetPointer(entry->key); TRGM *qtrg; bool res; @@ -292,7 +323,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS) } else { /* non-leaf contains signature */ - int32 count = cnt_sml_sign_common(qtrg, GETSIGN(key)); + int32 count = cnt_sml_sign_common(qtrg, GETSIGN(key), siglen); int32 len = ARRNELEM(qtrg); if (len == 0) @@ -334,7 +365,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS) for (k = 0; k < len; k++) { CPTRGM(((char *) &tmp), ptr + k); - if (!GETBIT(sign, HASHVAL(tmp))) + if (!GETBIT(sign, HASHVAL(tmp, siglen))) { res = false; break; @@ -387,7 +418,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS) for (k = 0; k < len; k++) { CPTRGM(((char *) &tmp), ptr + k); - check[k] = GETBIT(sign, HASHVAL(tmp)); + check[k] = GETBIT(sign, HASHVAL(tmp, siglen)); } res = trigramsMatchGraph(cache->graph, check); pfree(check); @@ -417,6 +448,7 @@ gtrgm_distance(PG_FUNCTION_ARGS) /* Oid subtype = PG_GETARG_OID(3); */ bool *recheck = (bool *) PG_GETARG_POINTER(4); + int siglen = LTREE_GET_ASIGLEN(); TRGM *key = (TRGM *) DatumGetPointer(entry->key); TRGM *qtrg; float8 res; @@ -474,7 +506,7 @@ gtrgm_distance(PG_FUNCTION_ARGS) } else { /* non-leaf contains signature */ - int32 count = cnt_sml_sign_common(qtrg, GETSIGN(key)); + int32 count = cnt_sml_sign_common(qtrg, GETSIGN(key), siglen); int32 len = ARRNELEM(qtrg); res = (len == 0) ? -1.0 : 1.0 - ((float8) count) / ((float8) len); @@ -490,7 +522,7 @@ gtrgm_distance(PG_FUNCTION_ARGS) } static int32 -unionkey(BITVECP sbase, TRGM *add) +unionkey(BITVECP sbase, TRGM *add, int siglen) { int32 i; @@ -501,7 +533,7 @@ unionkey(BITVECP sbase, TRGM *add) if (ISALLTRUE(add)) return 1; - LOOPBYTE + LOOPBYTE(siglen) sbase[i] |= sadd[i]; } else @@ -512,7 +544,7 @@ unionkey(BITVECP sbase, TRGM *add) for (i = 0; i < ARRNELEM(add); i++) { CPTRGM(((char *) &tmp), ptr + i); - HASH(sbase, tmp); + HASH(sbase, tmp, siglen); } } return 0; @@ -525,29 +557,22 @@ gtrgm_union(PG_FUNCTION_ARGS) GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); int32 len = entryvec->n; int *size = (int *) PG_GETARG_POINTER(1); - BITVEC base; + int siglen = LTREE_GET_ASIGLEN(); int32 i; - int32 flag = 0; - TRGM *result; + TRGM *result = gtrgm_alloc(false, siglen, NULL); + BITVECP base = GETSIGN(result); - MemSet((void *) base, 0, sizeof(BITVEC)); for (i = 0; i < len; i++) { - if (unionkey(base, GETENTRY(entryvec, i))) + if (unionkey(base, GETENTRY(entryvec, i), siglen)) { - flag = ALLISTRUE; + result->flag = ALLISTRUE; + SET_VARSIZE(result, CALCGTSIZE(ALLISTRUE, siglen)); break; } } - flag |= SIGNKEY; - len = CALCGTSIZE(flag, 0); - result = (TRGM *) palloc(len); - SET_VARSIZE(result, len); - result->flag = flag; - if (!ISALLTRUE(result)) - memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC)); - *size = len; + *size = VARSIZE(result); PG_RETURN_POINTER(result); } @@ -558,6 +583,7 @@ gtrgm_same(PG_FUNCTION_ARGS) TRGM *a = (TRGM *) PG_GETARG_POINTER(0); TRGM *b = (TRGM *) PG_GETARG_POINTER(1); bool *result = (bool *) PG_GETARG_POINTER(2); + int siglen = LTREE_GET_ASIGLEN(); if (ISSIGNKEY(a)) { /* then b also ISSIGNKEY */ @@ -574,7 +600,7 @@ gtrgm_same(PG_FUNCTION_ARGS) sb = GETSIGN(b); *result = true; - LOOPBYTE + LOOPBYTE(siglen) { if (sa[i] != sb[i]) { @@ -611,19 +637,19 @@ gtrgm_same(PG_FUNCTION_ARGS) } static int32 -sizebitvec(BITVECP sign) +sizebitvec(BITVECP sign, int siglen) { - return pg_popcount(sign, SIGLEN); + return pg_popcount(sign, siglen); } static int -hemdistsign(BITVECP a, BITVECP b) +hemdistsign(BITVECP a, BITVECP b, int siglen) { int i, diff, dist = 0; - LOOPBYTE + LOOPBYTE(siglen) { diff = (unsigned char) (a[i] ^ b[i]); /* Using the popcount functions here isn't likely to win */ @@ -633,19 +659,19 @@ hemdistsign(BITVECP a, BITVECP b) } static int -hemdist(TRGM *a, TRGM *b) +hemdist(TRGM *a, TRGM *b, int siglen) { if (ISALLTRUE(a)) { if (ISALLTRUE(b)) return 0; else - return SIGLENBIT - sizebitvec(GETSIGN(b)); + return SIGLENBIT(siglen) - sizebitvec(GETSIGN(b), siglen); } else if (ISALLTRUE(b)) - return SIGLENBIT - sizebitvec(GETSIGN(a)); + return SIGLENBIT(siglen) - sizebitvec(GETSIGN(a), siglen); - return hemdistsign(GETSIGN(a), GETSIGN(b)); + return hemdistsign(GETSIGN(a), GETSIGN(b), siglen); } Datum @@ -654,6 +680,7 @@ gtrgm_penalty(PG_FUNCTION_ARGS) GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */ GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1); float *penalty = (float *) PG_GETARG_POINTER(2); + int siglen = LTREE_GET_ASIGLEN(); TRGM *origval = (TRGM *) DatumGetPointer(origentry->key); TRGM *newval = (TRGM *) DatumGetPointer(newentry->key); BITVECP orig = GETSIGN(origval); @@ -663,7 +690,7 @@ gtrgm_penalty(PG_FUNCTION_ARGS) if (ISARRKEY(newval)) { char *cache = (char *) fcinfo->flinfo->fn_extra; - TRGM *cachedVal = (TRGM *) (cache + MAXALIGN(sizeof(BITVEC))); + TRGM *cachedVal = (TRGM *) (cache + MAXALIGN(siglen)); Size newvalsize = VARSIZE(newval); BITVECP sign; @@ -677,12 +704,12 @@ gtrgm_penalty(PG_FUNCTION_ARGS) char *newcache; newcache = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, - MAXALIGN(sizeof(BITVEC)) + + MAXALIGN(siglen) + newvalsize); - makesign((BITVECP) newcache, newval); + makesign((BITVECP) newcache, newval, siglen); - cachedVal = (TRGM *) (newcache + MAXALIGN(sizeof(BITVEC))); + cachedVal = (TRGM *) (newcache + MAXALIGN(siglen)); memcpy(cachedVal, newval, newvalsize); if (cache) @@ -694,31 +721,32 @@ gtrgm_penalty(PG_FUNCTION_ARGS) sign = (BITVECP) cache; if (ISALLTRUE(origval)) - *penalty = ((float) (SIGLENBIT - sizebitvec(sign))) / (float) (SIGLENBIT + 1); + *penalty = ((float) (SIGLENBIT(siglen) - sizebitvec(sign, siglen))) / (float) (SIGLENBIT(siglen) + 1); else - *penalty = hemdistsign(sign, orig); + *penalty = hemdistsign(sign, orig, siglen); } else - *penalty = hemdist(origval, newval); + *penalty = hemdist(origval, newval, siglen); PG_RETURN_POINTER(penalty); } typedef struct { bool allistrue; - BITVEC sign; + BITVECP sign; } CACHESIGN; static void -fillcache(CACHESIGN *item, TRGM *key) +fillcache(CACHESIGN *item, TRGM *key, BITVECP sign, int siglen) { item->allistrue = false; + item->sign = sign; if (ISARRKEY(key)) - makesign(item->sign, key); + makesign(item->sign, key, siglen); else if (ISALLTRUE(key)) item->allistrue = true; else - memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC)); + memcpy((void *) item->sign, (void *) GETSIGN(key), siglen); } #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) ) @@ -739,19 +767,19 @@ comparecost(const void *a, const void *b) static int -hemdistcache(CACHESIGN *a, CACHESIGN *b) +hemdistcache(CACHESIGN *a, CACHESIGN *b, int siglen) { if (a->allistrue) { if (b->allistrue) return 0; else - return SIGLENBIT - sizebitvec(b->sign); + return SIGLENBIT(siglen) - sizebitvec(b->sign, siglen); } else if (b->allistrue) - return SIGLENBIT - sizebitvec(a->sign); + return SIGLENBIT(siglen) - sizebitvec(a->sign, siglen); - return hemdistsign(a->sign, b->sign); + return hemdistsign(a->sign, b->sign, siglen); } Datum @@ -760,6 +788,7 @@ gtrgm_picksplit(PG_FUNCTION_ARGS) GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); OffsetNumber maxoff = entryvec->n - 2; GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); + int siglen = LTREE_GET_ASIGLEN(); OffsetNumber k, j; TRGM *datum_l, @@ -778,19 +807,23 @@ gtrgm_picksplit(PG_FUNCTION_ARGS) BITVECP ptr; int i; CACHESIGN *cache; + char *cache_sign; SPLITCOST *costvector; /* cache the sign data for each existing item */ cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2)); + cache_sign = palloc(siglen * (maxoff + 2)); + for (k = FirstOffsetNumber; k <= maxoff; k = OffsetNumberNext(k)) - fillcache(&cache[k], GETENTRY(entryvec, k)); + fillcache(&cache[k], GETENTRY(entryvec, k), &cache_sign[siglen * k], + siglen); /* now find the two furthest-apart items */ for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) { for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) { - size_waste = hemdistcache(&(cache[j]), &(cache[k])); + size_waste = hemdistcache(&(cache[j]), &(cache[k]), siglen); if (size_waste > waste) { waste = size_waste; @@ -815,44 +848,22 @@ gtrgm_picksplit(PG_FUNCTION_ARGS) v->spl_nright = 0; /* form initial .. */ - if (cache[seed_1].allistrue) - { - datum_l = (TRGM *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0)); - SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0)); - datum_l->flag = SIGNKEY | ALLISTRUE; - } - else - { - datum_l = (TRGM *) palloc(CALCGTSIZE(SIGNKEY, 0)); - SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY, 0)); - datum_l->flag = SIGNKEY; - memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC)); - } - if (cache[seed_2].allistrue) - { - datum_r = (TRGM *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0)); - SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0)); - datum_r->flag = SIGNKEY | ALLISTRUE; - } - else - { - datum_r = (TRGM *) palloc(CALCGTSIZE(SIGNKEY, 0)); - SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY, 0)); - datum_r->flag = SIGNKEY; - memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC)); - } + datum_l = gtrgm_alloc(cache[seed_1].allistrue, siglen, cache[seed_1].sign); + datum_r = gtrgm_alloc(cache[seed_2].allistrue, siglen, cache[seed_2].sign); union_l = GETSIGN(datum_l); union_r = GETSIGN(datum_r); maxoff = OffsetNumberNext(maxoff); - fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff)); + fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff), + &cache_sign[siglen * maxoff], siglen); + /* sort before ... */ costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff); for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) { costvector[j - 1].pos = j; - size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j])); - size_beta = hemdistcache(&(cache[seed_2]), &(cache[j])); + size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]), siglen); + size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]), siglen); costvector[j - 1].cost = abs(size_alpha - size_beta); } qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost); @@ -878,36 +889,38 @@ gtrgm_picksplit(PG_FUNCTION_ARGS) if (ISALLTRUE(datum_l) && cache[j].allistrue) size_alpha = 0; else - size_alpha = SIGLENBIT - + size_alpha = SIGLENBIT(siglen) - sizebitvec((cache[j].allistrue) ? GETSIGN(datum_l) : - GETSIGN(cache[j].sign)); + GETSIGN(cache[j].sign), + siglen); } else - size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l)); + size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l), siglen); if (ISALLTRUE(datum_r) || cache[j].allistrue) { if (ISALLTRUE(datum_r) && cache[j].allistrue) size_beta = 0; else - size_beta = SIGLENBIT - + size_beta = SIGLENBIT(siglen) - sizebitvec((cache[j].allistrue) ? GETSIGN(datum_r) : - GETSIGN(cache[j].sign)); + GETSIGN(cache[j].sign), + siglen); } else - size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r)); + size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r), siglen); if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) { if (ISALLTRUE(datum_l) || cache[j].allistrue) { if (!ISALLTRUE(datum_l)) - MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC)); + MemSet((void *) GETSIGN(datum_l), 0xff, siglen); } else { ptr = cache[j].sign; - LOOPBYTE + LOOPBYTE(siglen) union_l[i] |= ptr[i]; } *left++ = j; @@ -918,12 +931,12 @@ gtrgm_picksplit(PG_FUNCTION_ARGS) if (ISALLTRUE(datum_r) || cache[j].allistrue) { if (!ISALLTRUE(datum_r)) - MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC)); + MemSet((void *) GETSIGN(datum_r), 0xff, siglen); } else { ptr = cache[j].sign; - LOOPBYTE + LOOPBYTE(siglen) union_r[i] |= ptr[i]; } *right++ = j; @@ -937,3 +950,17 @@ gtrgm_picksplit(PG_FUNCTION_ARGS) PG_RETURN_POINTER(v); } + +Datum +gtrgm_options(PG_FUNCTION_ARGS) +{ + local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0); + + init_local_reloptions(relopts, sizeof(TrgmGistOptions)); + add_local_int_reloption(relopts, "siglen", + "signature length in bytes", + SIGLEN_DEFAULT, 1, SIGLEN_MAX, + offsetof(TrgmGistOptions, siglen)); + + PG_RETURN_VOID(); +} |