summaryrefslogtreecommitdiff
path: root/contrib
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2012-08-20 13:24:58 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2012-08-20 13:24:58 -0400
commite0badf67e9cd409ea35f2c2d5e3ca36ffecb47d7 (patch)
tree98e16ba2f65c60b349289d02f6f4e91e6625c2c3 /contrib
parent33f40976a716287ecddfb01f446797d8df215125 (diff)
Fix bugs in contrib/pg_trgm's LIKE pattern analysis code.
Extraction of trigrams did not process LIKE escape sequences properly, leading to possible misidentification of trigrams near escapes, resulting in incorrect index search results. Fujii Masao
Diffstat (limited to 'contrib')
-rw-r--r--contrib/pg_trgm/expected/pg_trgm.out12
-rw-r--r--contrib/pg_trgm/sql/pg_trgm.sql2
-rw-r--r--contrib/pg_trgm/trgm_op.c44
3 files changed, 39 insertions, 19 deletions
diff --git a/contrib/pg_trgm/expected/pg_trgm.out b/contrib/pg_trgm/expected/pg_trgm.out
index e7af7d48902..81d0ca80b20 100644
--- a/contrib/pg_trgm/expected/pg_trgm.out
+++ b/contrib/pg_trgm/expected/pg_trgm.out
@@ -3497,6 +3497,12 @@ select * from test2 where t like '%bcd%';
abcdef
(1 row)
+select * from test2 where t like E'%\\bcd%';
+ t
+--------
+ abcdef
+(1 row)
+
select * from test2 where t ilike '%BCD%';
t
--------
@@ -3539,6 +3545,12 @@ select * from test2 where t like '%bcd%';
abcdef
(1 row)
+select * from test2 where t like E'%\\bcd%';
+ t
+--------
+ abcdef
+(1 row)
+
select * from test2 where t ilike '%BCD%';
t
--------
diff --git a/contrib/pg_trgm/sql/pg_trgm.sql b/contrib/pg_trgm/sql/pg_trgm.sql
index ea902f602f9..81ab1e79b17 100644
--- a/contrib/pg_trgm/sql/pg_trgm.sql
+++ b/contrib/pg_trgm/sql/pg_trgm.sql
@@ -49,6 +49,7 @@ explain (costs off)
select * from test2 where t ilike '%BCD%';
select * from test2 where t like '%BCD%';
select * from test2 where t like '%bcd%';
+select * from test2 where t like E'%\\bcd%';
select * from test2 where t ilike '%BCD%';
select * from test2 where t ilike 'qua%';
drop index test2_idx_gin;
@@ -60,5 +61,6 @@ explain (costs off)
select * from test2 where t ilike '%BCD%';
select * from test2 where t like '%BCD%';
select * from test2 where t like '%bcd%';
+select * from test2 where t like E'%\\bcd%';
select * from test2 where t ilike '%BCD%';
select * from test2 where t ilike 'qua%';
diff --git a/contrib/pg_trgm/trgm_op.c b/contrib/pg_trgm/trgm_op.c
index 4e32c6f654c..87dffd1dd2c 100644
--- a/contrib/pg_trgm/trgm_op.c
+++ b/contrib/pg_trgm/trgm_op.c
@@ -272,33 +272,36 @@ get_wildcard_part(const char *str, int lenstr,
const char *beginword = str;
const char *endword;
char *s = buf;
- bool in_wildcard_meta = false;
+ bool in_leading_wildcard_meta = false;
+ bool in_trailing_wildcard_meta = false;
bool in_escape = false;
int clen;
/*
- * Find the first word character remembering whether last character was
- * wildcard meta-character.
+ * Find the first word character, remembering whether preceding character
+ * was wildcard meta-character. Note that the in_escape state persists
+ * from this loop to the next one, since we may exit at a word character
+ * that is in_escape.
*/
while (beginword - str < lenstr)
{
if (in_escape)
{
- in_escape = false;
- in_wildcard_meta = false;
if (iswordchr(beginword))
break;
+ in_escape = false;
+ in_leading_wildcard_meta = false;
}
else
{
if (ISESCAPECHAR(beginword))
in_escape = true;
else if (ISWILDCARDCHAR(beginword))
- in_wildcard_meta = true;
+ in_leading_wildcard_meta = true;
else if (iswordchr(beginword))
break;
else
- in_wildcard_meta = false;
+ in_leading_wildcard_meta = false;
}
beginword += pg_mblen(beginword);
}
@@ -310,11 +313,11 @@ get_wildcard_part(const char *str, int lenstr,
return NULL;
/*
- * Add left padding spaces if last character wasn't wildcard
+ * Add left padding spaces if preceding character wasn't wildcard
* meta-character.
*/
*charlen = 0;
- if (!in_wildcard_meta)
+ if (!in_leading_wildcard_meta)
{
if (LPADDING > 0)
{
@@ -333,15 +336,11 @@ get_wildcard_part(const char *str, int lenstr,
* string boundary. Strip escapes during copy.
*/
endword = beginword;
- in_wildcard_meta = false;
- in_escape = false;
while (endword - str < lenstr)
{
clen = pg_mblen(endword);
if (in_escape)
{
- in_escape = false;
- in_wildcard_meta = false;
if (iswordchr(endword))
{
memcpy(s, endword, clen);
@@ -349,7 +348,17 @@ get_wildcard_part(const char *str, int lenstr,
s += clen;
}
else
+ {
+ /*
+ * Back up endword to the escape character when stopping at
+ * an escaped char, so that subsequent get_wildcard_part will
+ * restart from the escape character. We assume here that
+ * escape chars are single-byte.
+ */
+ endword--;
break;
+ }
+ in_escape = false;
}
else
{
@@ -357,7 +366,7 @@ get_wildcard_part(const char *str, int lenstr,
in_escape = true;
else if (ISWILDCARDCHAR(endword))
{
- in_wildcard_meta = true;
+ in_trailing_wildcard_meta = true;
break;
}
else if (iswordchr(endword))
@@ -367,19 +376,16 @@ get_wildcard_part(const char *str, int lenstr,
s += clen;
}
else
- {
- in_wildcard_meta = false;
break;
- }
}
endword += clen;
}
/*
- * Add right padding spaces if last character wasn't wildcard
+ * Add right padding spaces if next character isn't wildcard
* meta-character.
*/
- if (!in_wildcard_meta)
+ if (!in_trailing_wildcard_meta)
{
if (RPADDING > 0)
{