diff options
| author | Jeff Davis <jdavis@postgresql.org> | 2025-12-01 11:06:17 -0800 |
|---|---|---|
| committer | Jeff Davis <jdavis@postgresql.org> | 2025-12-01 11:06:17 -0800 |
| commit | 19b966243c38196a33b033fb0c259dcf760c0d69 (patch) | |
| tree | 5588be32dc7672158eec3b82947586539681b18d /src | |
| parent | 99cd8890becacf9d7059297c3d75cd388ad83ac0 (diff) | |
Make regex "max_chr" depend on encoding, not provider.
The regex mechanism scans through the first "max_chr" character values
to cache character property ranges (isalpha, etc.). For single-byte
encodings, there's no sense in scanning beyond UCHAR_MAX; but for
UTF-8 it makes sense to cache higher code point values (though not all
of them; only up to MAX_SIMPLE_CHR).
Prior to 5a38104b36, the logic about how many character values to scan
was based on the pg_regex_strategy, which was dependent on the
provider. Commit 5a38104b36 preserved that logic exactly, allowing
different providers to define the "max_chr".
Now, change it to depend only on the encoding and whether
ctype_is_c. For this specific calculation, distinguishing between
providers creates more complexity than it's worth.
Discussion: https://postgr.es/m/450ceb6260cad30d7afdf155d991a9caafee7c0d.camel@j-davis.com
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Diffstat (limited to 'src')
| -rw-r--r-- | src/backend/regex/regc_pg_locale.c | 18 | ||||
| -rw-r--r-- | src/backend/utils/adt/pg_locale_libc.c | 2 | ||||
| -rw-r--r-- | src/include/utils/pg_locale.h | 6 |
3 files changed, 10 insertions, 16 deletions
diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c index 4698f110a0c..bb0e3f1d139 100644 --- a/src/backend/regex/regc_pg_locale.c +++ b/src/backend/regex/regc_pg_locale.c @@ -320,16 +320,18 @@ regc_ctype_get_cache(regc_wc_probefunc probefunc, int cclasscode) max_chr = (pg_wchar) MAX_SIMPLE_CHR; #endif } + else if (GetDatabaseEncoding() == PG_UTF8) + { + max_chr = (pg_wchar) MAX_SIMPLE_CHR; + } else { - if (pg_regex_locale->ctype->max_chr != 0 && - pg_regex_locale->ctype->max_chr <= MAX_SIMPLE_CHR) - { - max_chr = pg_regex_locale->ctype->max_chr; - pcc->cv.cclasscode = -1; - } - else - max_chr = (pg_wchar) MAX_SIMPLE_CHR; +#if MAX_SIMPLE_CHR >= UCHAR_MAX + max_chr = (pg_wchar) UCHAR_MAX; + pcc->cv.cclasscode = -1; +#else + max_chr = (pg_wchar) MAX_SIMPLE_CHR; +#endif } /* diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c index e2beee44335..6ad3f93b543 100644 --- a/src/backend/utils/adt/pg_locale_libc.c +++ b/src/backend/utils/adt/pg_locale_libc.c @@ -342,7 +342,6 @@ static const struct ctype_methods ctype_methods_libc_sb = { .char_tolower = char_tolower_libc, .wc_toupper = toupper_libc_sb, .wc_tolower = tolower_libc_sb, - .max_chr = UCHAR_MAX, }; /* @@ -369,7 +368,6 @@ static const struct ctype_methods ctype_methods_libc_other_mb = { .char_tolower = char_tolower_libc, .wc_toupper = toupper_libc_sb, .wc_tolower = tolower_libc_sb, - .max_chr = UCHAR_MAX, }; static const struct ctype_methods ctype_methods_libc_utf8 = { diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 54193a17a90..42e21e7fb8a 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -134,12 +134,6 @@ struct ctype_methods * pg_strlower(). */ char (*char_tolower) (unsigned char ch, pg_locale_t locale); - - /* - * For regex and pattern matching efficiency, the maximum char value - * supported by the above methods. If zero, limit is set by regex code. - */ - pg_wchar max_chr; }; /* |
