From 1b70619311f74ccbab52dfad7e0c96c36b6718a5 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 11 Aug 2007 03:56:24 +0000 Subject: Code review for regexp_matches/regexp_split patch. Refactor to avoid assuming that cached compiled patterns will still be there when the function is next called. Clean up looping logic, thereby fixing bug identified by Pavel Stehule. Share setup code between the two functions, add some comments, and avoid risky mixing of int and size_t variables. Clean up the documentation a tad, and accept all the flag characters mentioned in table 9-19 rather than just a subset. --- doc/src/sgml/func.sgml | 65 +++++++++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 32 deletions(-) (limited to 'doc/src') diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 5d5a86a0c7f..4e5f8f1148a 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -1,4 +1,4 @@ - + Functions and Operators @@ -1499,7 +1499,7 @@ regexp_matches(string text, pattern text [, flags text]) setof text[] - Return all capture groups resulting from matching POSIX regular + Return all captured substrings resulting from matching a POSIX regular expression against the string. See for more information. @@ -1511,7 +1511,7 @@ regexp_replace(string text, pattern text, replacement text [, flags text]) text - Replace substring matching POSIX regular expression. See + Replace substring(s) matching a POSIX regular expression. See for more information. regexp_replace('Thomas', '.[mN]a.', 'M') @@ -1522,7 +1522,7 @@ regexp_split_to_array(string text, pattern text [, flags text ]) text[] - Split string using POSIX regular expression as + Split string using a POSIX regular expression as the delimiter. See for more information. @@ -1534,7 +1534,7 @@ regexp_split_to_table(string text, pattern text [, flags text]) setof text - Split string using POSIX regular expression as + Split string using a POSIX regular expression as the delimiter. See for more information. @@ -2856,11 +2856,9 @@ cast(-44 as bit(12)) 111111010100 SQL LIKE operator, the more recent SIMILAR TO operator (added in SQL:1999), and POSIX-style regular - expressions. - Additionally, a pattern matching function, - substring, is available, using either - SIMILAR TO-style or POSIX-style regular - expressions. + expressions. Aside from the basic does this string match + this pattern? operators, functions are available to extract + or replace matching substrings and to split a string at the matches. @@ -3186,15 +3184,20 @@ substring('foobar' from '#"o_b#"%' for '#') NULL - - Some examples: + + Some examples: 'abc' ~ 'abc' true 'abc' ~ '^a' true 'abc' ~ '(b|d)' true 'abc' ~ '^(b|c)' false - + + + + The POSIX pattern language is described in much + greater detail below. + The substring function with two parameters, @@ -3246,9 +3249,7 @@ substring('foobar' from 'o(.)b') o function's behavior. Flag i specifies case-insensitive matching, while flag g specifies replacement of each matching substring rather than only the first one. Other supported flags are - m, n, p, w and - x, whose meanings correspond to those shown in - . + described in . @@ -3264,23 +3265,25 @@ regexp_replace('foobarbaz', 'b(..)', E'X\\1Y', 'g') - The regexp_matches function returns all of the capture - groups resulting from matching a POSIX regular expression pattern. + The regexp_matches function returns all of the captured + substrings resulting from matching a POSIX regular expression pattern. It has the syntax regexp_matches(string, pattern , flags ). - If there is no match to the pattern, the function returns no rows. - If there is a match, the function returns the contents of all of the capture groups - in a text array, or if there were no capture groups in the pattern, it returns the - contents of the entire match as a single-element text array. + If there is no match to the pattern, the function returns + no rows. If there is a match, the function returns a text array whose + n'th element is the substring matching the + n'th parenthesized subexpression of the pattern + (not counting non-capturing parentheses; see below for + details). If the pattern does not contain any parenthesized + subexpressions, then the result is a single-element text array containing + the substring matching the whole pattern. The flags parameter is an optional text string containing zero or more single-letter flags that change the - function's behavior. Flag i specifies case-insensitive - matching, while flag g causes the return of each matching - substring rather than only the first one. Other supported - flags are m, n, p, w and - x, whose meanings are described in - . + function's behavior. Flag g causes the function to find + each match in the string, not only the first one, and return a row for + each such match. Other supported + flags are described in . @@ -3319,16 +3322,14 @@ SELECT regexp_matches('foobarbequebaz', 'barbeque'); returns the text from the end of the last match to the end of the string. The flags parameter is an optional text string containing zero or more single-letter flags that change the function's behavior. - regexp_split_to_table supports the flags i, - m, n, p, w and - x, whose meanings are described in + regexp_split_to_table supports the flags described in . The regexp_split_to_array function behaves the same as regexp_split_to_table, except that regexp_split_to_array - returns its results as a text[]. It has the syntax + returns its result as an array of text. It has the syntax regexp_split_to_array(string, pattern , flags ). The parameters are the same as for regexp_split_to_table. -- cgit v1.2.3