From 19b966243c38196a33b033fb0c259dcf760c0d69 Mon Sep 17 00:00:00 2001
From: Jeff Davis <jdavis@postgresql.org>
Date: Mon, 1 Dec 2025 11:06:17 -0800
Subject: Make regex "max_chr" depend on encoding, not provider.

The regex mechanism scans through the first "max_chr" character values
to cache character property ranges (isalpha, etc.). For single-byte
encodings, there's no sense in scanning beyond UCHAR_MAX; but for
UTF-8 it makes sense to cache higher code point values (though not all
of them; only up to MAX_SIMPLE_CHR).

Prior to 5a38104b36, the logic about how many character values to scan
was based on the pg_regex_strategy, which was dependent on the
provider. Commit 5a38104b36 preserved that logic exactly, allowing
different providers to define the "max_chr".

Now, change it to depend only on the encoding and whether
ctype_is_c. For this specific calculation, distinguishing between
providers creates more complexity than it's worth.

Discussion: https://postgr.es/m/450ceb6260cad30d7afdf155d991a9caafee7c0d.camel@j-davis.com
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
---
 src/backend/regex/regc_pg_locale.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'src/backend/regex')

diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c
index 4698f110a0c..bb0e3f1d139 100644
--- a/src/backend/regex/regc_pg_locale.c
+++ b/src/backend/regex/regc_pg_locale.c
@@ -320,16 +320,18 @@ regc_ctype_get_cache(regc_wc_probefunc probefunc, int cclasscode)
 		max_chr = (pg_wchar) MAX_SIMPLE_CHR;
 #endif
 	}
+	else if (GetDatabaseEncoding() == PG_UTF8)
+	{
+		max_chr = (pg_wchar) MAX_SIMPLE_CHR;
+	}
 	else
 	{
-		if (pg_regex_locale->ctype->max_chr != 0 &&
-			pg_regex_locale->ctype->max_chr <= MAX_SIMPLE_CHR)
-		{
-			max_chr = pg_regex_locale->ctype->max_chr;
-			pcc->cv.cclasscode = -1;
-		}
-		else
-			max_chr = (pg_wchar) MAX_SIMPLE_CHR;
+#if MAX_SIMPLE_CHR >= UCHAR_MAX
+		max_chr = (pg_wchar) UCHAR_MAX;
+		pcc->cv.cclasscode = -1;
+#else
+		max_chr = (pg_wchar) MAX_SIMPLE_CHR;
+#endif
 	}
 
 	/*
-- 
cgit v1.2.3