summaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/varlena.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2021-08-03 13:08:49 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2021-08-03 13:08:49 -0400
commit6424337073589476303b10f6d7cc74f501b8d9d7 (patch)
tree12aaf35032d9e80c4564160a0937bb40ea25a5f1 /src/backend/utils/adt/varlena.c
parent9e51cc87fd0ac46b183cb7302a6751d52d3f159a (diff)
Add assorted new regexp_xxx SQL functions.
This patch adds new functions regexp_count(), regexp_instr(), regexp_like(), and regexp_substr(), and extends regexp_replace() with some new optional arguments. All these functions follow the definitions used in Oracle, although there are small differences in the regexp language due to using our own regexp engine -- most notably, that the default newline-matching behavior is different. Similar functions appear in DB2 and elsewhere, too. Aside from easing portability, these functions are easier to use for certain tasks than our existing regexp_match[es] functions. Gilles Darold, heavily revised by me Discussion: https://postgr.es/m/fc160ee0-c843-b024-29bb-97b5da61971f@darold.net
Diffstat (limited to 'src/backend/utils/adt/varlena.c')
-rw-r--r--src/backend/utils/adt/varlena.c33
1 files changed, 27 insertions, 6 deletions
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index d2a11b1b5dd..348b5566de4 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -4496,23 +4496,28 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
/*
* replace_text_regexp
*
- * replace text that matches to regexp in src_text to replace_text.
+ * replace substring(s) in src_text that match regexp with replace_text.
+ *
+ * search_start: the character (not byte) offset in src_text at which to
+ * begin searching.
+ * n: if 0, replace all matches; if > 0, replace only the N'th match.
*
* Note: to avoid having to include regex.h in builtins.h, we declare
* the regexp argument as void *, but really it's regex_t *.
*/
text *
replace_text_regexp(text *src_text, void *regexp,
- text *replace_text, bool glob)
+ text *replace_text,
+ int search_start, int n)
{
text *ret_text;
regex_t *re = (regex_t *) regexp;
int src_text_len = VARSIZE_ANY_EXHDR(src_text);
+ int nmatches = 0;
StringInfoData buf;
regmatch_t pmatch[REGEXP_REPLACE_BACKREF_CNT];
pg_wchar *data;
size_t data_len;
- int search_start;
int data_pos;
char *start_ptr;
bool have_escape;
@@ -4530,7 +4535,6 @@ replace_text_regexp(text *src_text, void *regexp,
start_ptr = (char *) VARDATA_ANY(src_text);
data_pos = 0;
- search_start = 0;
while (search_start <= data_len)
{
int regexec_result;
@@ -4561,6 +4565,23 @@ replace_text_regexp(text *src_text, void *regexp,
}
/*
+ * Count matches, and decide whether to replace this match.
+ */
+ nmatches++;
+ if (n > 0 && nmatches != n)
+ {
+ /*
+ * No, so advance search_start, but not start_ptr/data_pos. (Thus,
+ * we treat the matched text as if it weren't matched, and copy it
+ * to the output later.)
+ */
+ search_start = pmatch[0].rm_eo;
+ if (pmatch[0].rm_so == pmatch[0].rm_eo)
+ search_start++;
+ continue;
+ }
+
+ /*
* Copy the text to the left of the match position. Note we are given
* character not byte indexes.
*/
@@ -4596,9 +4617,9 @@ replace_text_regexp(text *src_text, void *regexp,
data_pos = pmatch[0].rm_eo;
/*
- * When global option is off, replace the first instance only.
+ * If we only want to replace one occurrence, we're done.
*/
- if (!glob)
+ if (n > 0)
break;
/*