summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2014-07-01 11:22:53 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2014-07-01 11:22:53 -0400
commitc66256b9bd34a4af477eb3dc558ac8f46727f2f3 (patch)
treefdc1f0d48ca596866c2c8a3dbadbaa74ea3201c2
parentf6d6b7b1e7eac2aa049bbb1e41c468fbbf5b7fef (diff)
Fix inadequately-sized output buffer in contrib/unaccent.
The output buffer size in unaccent_lexize() was calculated as input string length times pg_database_encoding_max_length(), which effectively assumes that replacement strings aren't more than one character. While that was all that we previously documented it to support, the code actually has always allowed replacement strings of arbitrary length; so if you tried to make use of longer strings, you were at risk of buffer overrun. To fix, use an expansible StringInfo buffer instead of trying to determine the maximum space needed a-priori. This would be a security issue if unaccent rules files could be installed by unprivileged users; but fortunately they can't, so in the back branches the problem can be labeled as improper configuration by a superuser. Nonetheless, a memory stomp isn't a nice way of reacting to improper configuration, so let's back-patch the fix.
-rw-r--r--contrib/unaccent/unaccent.c51
1 files changed, 27 insertions, 24 deletions
diff --git a/contrib/unaccent/unaccent.c b/contrib/unaccent/unaccent.c
index 262d5ec15f8..eabf01baf82 100644
--- a/contrib/unaccent/unaccent.c
+++ b/contrib/unaccent/unaccent.c
@@ -15,6 +15,7 @@
#include "catalog/namespace.h"
#include "commands/defrem.h"
+#include "lib/stringinfo.h"
#include "tsearch/ts_cache.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
@@ -265,46 +266,48 @@ unaccent_lexize(PG_FUNCTION_ARGS)
SuffixChar *rootSuffixTree = (SuffixChar *) PG_GETARG_POINTER(0);
char *srcchar = (char *) PG_GETARG_POINTER(1);
int32 len = PG_GETARG_INT32(2);
- char *srcstart,
- *trgchar = NULL;
- int charlen;
- TSLexeme *res = NULL;
- SuffixChar *node;
+ char *srcstart = srcchar;
+ TSLexeme *res;
+ StringInfoData buf;
+
+ /* we allocate storage for the buffer only if needed */
+ buf.data = NULL;
- srcstart = srcchar;
while (srcchar - srcstart < len)
{
+ SuffixChar *node;
+ int charlen;
+
charlen = pg_mblen(srcchar);
node = findReplaceTo(rootSuffixTree, (unsigned char *) srcchar, charlen);
if (node && node->replaceTo)
{
- if (!res)
+ if (buf.data == NULL)
{
- /* allocate res only if it's needed */
- res = palloc0(sizeof(TSLexeme) * 2);
- res->lexeme = trgchar = palloc(len * pg_database_encoding_max_length() + 1 /* \0 */ );
- res->flags = TSL_FILTER;
+ /* initialize buffer */
+ initStringInfo(&buf);
+ /* insert any data we already skipped over */
if (srcchar != srcstart)
- {
- memcpy(trgchar, srcstart, srcchar - srcstart);
- trgchar += (srcchar - srcstart);
- }
+ appendBinaryStringInfo(&buf, srcstart, srcchar - srcstart);
}
- memcpy(trgchar, node->replaceTo, node->replacelen);
- trgchar += node->replacelen;
- }
- else if (res)
- {
- memcpy(trgchar, srcchar, charlen);
- trgchar += charlen;
+ appendBinaryStringInfo(&buf, node->replaceTo, node->replacelen);
}
+ else if (buf.data != NULL)
+ appendBinaryStringInfo(&buf, srcchar, charlen);
srcchar += charlen;
}
- if (res)
- *trgchar = '\0';
+ /* return a result only if we made at least one substitution */
+ if (buf.data != NULL)
+ {
+ res = (TSLexeme *) palloc0(sizeof(TSLexeme) * 2);
+ res->lexeme = buf.data;
+ res->flags = TSL_FILTER;
+ }
+ else
+ res = NULL;
PG_RETURN_POINTER(res);
}