diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2021-08-03 13:08:49 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2021-08-03 13:08:49 -0400 |
commit | 6424337073589476303b10f6d7cc74f501b8d9d7 (patch) | |
tree | 12aaf35032d9e80c4564160a0937bb40ea25a5f1 /src/backend/utils/adt/varlena.c | |
parent | 9e51cc87fd0ac46b183cb7302a6751d52d3f159a (diff) |
Add assorted new regexp_xxx SQL functions.
This patch adds new functions regexp_count(), regexp_instr(),
regexp_like(), and regexp_substr(), and extends regexp_replace()
with some new optional arguments. All these functions follow
the definitions used in Oracle, although there are small differences
in the regexp language due to using our own regexp engine -- most
notably, that the default newline-matching behavior is different.
Similar functions appear in DB2 and elsewhere, too. Aside from
easing portability, these functions are easier to use for certain
tasks than our existing regexp_match[es] functions.
Gilles Darold, heavily revised by me
Discussion: https://postgr.es/m/fc160ee0-c843-b024-29bb-97b5da61971f@darold.net
Diffstat (limited to 'src/backend/utils/adt/varlena.c')
-rw-r--r-- | src/backend/utils/adt/varlena.c | 33 |
1 files changed, 27 insertions, 6 deletions
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index d2a11b1b5dd..348b5566de4 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -4496,23 +4496,28 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text, /* * replace_text_regexp * - * replace text that matches to regexp in src_text to replace_text. + * replace substring(s) in src_text that match regexp with replace_text. + * + * search_start: the character (not byte) offset in src_text at which to + * begin searching. + * n: if 0, replace all matches; if > 0, replace only the N'th match. * * Note: to avoid having to include regex.h in builtins.h, we declare * the regexp argument as void *, but really it's regex_t *. */ text * replace_text_regexp(text *src_text, void *regexp, - text *replace_text, bool glob) + text *replace_text, + int search_start, int n) { text *ret_text; regex_t *re = (regex_t *) regexp; int src_text_len = VARSIZE_ANY_EXHDR(src_text); + int nmatches = 0; StringInfoData buf; regmatch_t pmatch[REGEXP_REPLACE_BACKREF_CNT]; pg_wchar *data; size_t data_len; - int search_start; int data_pos; char *start_ptr; bool have_escape; @@ -4530,7 +4535,6 @@ replace_text_regexp(text *src_text, void *regexp, start_ptr = (char *) VARDATA_ANY(src_text); data_pos = 0; - search_start = 0; while (search_start <= data_len) { int regexec_result; @@ -4561,6 +4565,23 @@ replace_text_regexp(text *src_text, void *regexp, } /* + * Count matches, and decide whether to replace this match. + */ + nmatches++; + if (n > 0 && nmatches != n) + { + /* + * No, so advance search_start, but not start_ptr/data_pos. (Thus, + * we treat the matched text as if it weren't matched, and copy it + * to the output later.) + */ + search_start = pmatch[0].rm_eo; + if (pmatch[0].rm_so == pmatch[0].rm_eo) + search_start++; + continue; + } + + /* * Copy the text to the left of the match position. Note we are given * character not byte indexes. */ @@ -4596,9 +4617,9 @@ replace_text_regexp(text *src_text, void *regexp, data_pos = pmatch[0].rm_eo; /* - * When global option is off, replace the first instance only. + * If we only want to replace one occurrence, we're done. */ - if (!glob) + if (n > 0) break; /* |