v9-0005-Make-regex-max_chr-depend-on-encoding-not-provide.patch

text/x-patch

Filename: v9-0005-Make-regex-max_chr-depend-on-encoding-not-provide.patch
Type: text/x-patch
Part: 4
Message: Re: Remaining dependency on setlocale()
From b1add0b2b4c9785b56e4dc222a89ec8f43b9c586 Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Fri, 21 Nov 2025 12:41:47 -0800
Subject: [PATCH v9 05/11] Make regex "max_chr" depend on encoding, not
 provider.

The previous per-provider "max_chr" field was there as a hack to
preserve the exact prior behavior, which depended on the
provider. Change to depend on the encoding, which makes more sense,
and remove the per-provider logic.

The only difference is for ICU: previously it always used
MAX_SIMPLE_CHR (0x7FF) regardless of the encoding; whereas now it will
match libc and use MAX_SIMPLE_CHR for UTF-8, and MAX_UCHAR for other
encodings. That's possibly a loss for non-UTF8 multibyte encodings,
but a win for single-byte encodings. Regardless, this distinction was
not worth the complexity.
---
 src/backend/regex/regc_pg_locale.c     | 18 ++++++++++--------
 src/backend/utils/adt/pg_locale_libc.c |  2 --
 src/include/utils/pg_locale.h          |  6 ------
 3 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c
index 4698f110a0c..bb0e3f1d139 100644
--- a/src/backend/regex/regc_pg_locale.c
+++ b/src/backend/regex/regc_pg_locale.c
@@ -320,16 +320,18 @@ regc_ctype_get_cache(regc_wc_probefunc probefunc, int cclasscode)
 		max_chr = (pg_wchar) MAX_SIMPLE_CHR;
 #endif
 	}
+	else if (GetDatabaseEncoding() == PG_UTF8)
+	{
+		max_chr = (pg_wchar) MAX_SIMPLE_CHR;
+	}
 	else
 	{
-		if (pg_regex_locale->ctype->max_chr != 0 &&
-			pg_regex_locale->ctype->max_chr <= MAX_SIMPLE_CHR)
-		{
-			max_chr = pg_regex_locale->ctype->max_chr;
-			pcc->cv.cclasscode = -1;
-		}
-		else
-			max_chr = (pg_wchar) MAX_SIMPLE_CHR;
+#if MAX_SIMPLE_CHR >= UCHAR_MAX
+		max_chr = (pg_wchar) UCHAR_MAX;
+		pcc->cv.cclasscode = -1;
+#else
+		max_chr = (pg_wchar) MAX_SIMPLE_CHR;
+#endif
 	}
 
 	/*
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index 942454de4ed..a55167b0697 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -341,7 +341,6 @@ static const struct ctype_methods ctype_methods_libc_sb = {
 	.char_tolower = char_tolower_libc,
 	.wc_toupper = toupper_libc_sb,
 	.wc_tolower = tolower_libc_sb,
-	.max_chr = UCHAR_MAX,
 };
 
 /*
@@ -367,7 +366,6 @@ static const struct ctype_methods ctype_methods_libc_other_mb = {
 	.char_tolower = char_tolower_libc,
 	.wc_toupper = toupper_libc_sb,
 	.wc_tolower = tolower_libc_sb,
-	.max_chr = UCHAR_MAX,
 };
 
 static const struct ctype_methods ctype_methods_libc_utf8 = {
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 49fd22bf8eb..40e58cc52b8 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -131,12 +131,6 @@ struct ctype_methods
 	 * pg_strlower().
 	 */
 	char		(*char_tolower) (unsigned char ch, pg_locale_t locale);
-
-	/*
-	 * For regex and pattern matching efficiency, the maximum char value
-	 * supported by the above methods. If zero, limit is set by regex code.
-	 */
-	pg_wchar	max_chr;
 };
 
 /*
-- 
2.43.0