v6-0002-Define-char_tolower-char_toupper-for-all-locale-p.patch

text/x-patch

Filename: v6-0002-Define-char_tolower-char_toupper-for-all-locale-p.patch
Type: text/x-patch
Part: 1
Message: Re: Remaining dependency on setlocale()
From 631daededebd9649169951764c72d8a372897b5c Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Sun, 26 Oct 2025 14:51:47 -0700
Subject: [PATCH v6 2/9] Define char_tolower()/char_toupper() for all locale
 providers.

The behavior is defined for each locale provider rather than
unconditionally depending on the global LC_CTYPE setting. Needed as an
alternative for tolower()/toupper() for some callers.
---
 src/backend/utils/adt/like.c              |  4 +--
 src/backend/utils/adt/pg_locale.c         | 32 ++++++++++++++++-------
 src/backend/utils/adt/pg_locale_builtin.c | 18 +++++++++++++
 src/backend/utils/adt/pg_locale_icu.c     | 23 ++++++++++++++++
 src/backend/utils/adt/pg_locale_libc.c    | 21 +++++++++++++--
 src/include/utils/pg_locale.h             | 10 +++----
 6 files changed, 89 insertions(+), 19 deletions(-)

diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 4216ac17f43..37c1c86aee8 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -209,9 +209,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
 	 * way.
 	 */
 
-	if (locale->ctype_is_c ||
-		(char_tolower_enabled(locale) &&
-		 pg_database_encoding_max_length() == 1))
+	if (locale->ctype_is_c || locale->ctype->pattern_casefold_char)
 	{
 		p = VARDATA_ANY(pat);
 		plen = VARSIZE_ANY_EXHDR(pat);
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 67299c55ed8..26a7244c3db 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1551,25 +1551,39 @@ char_is_cased(char ch, pg_locale_t locale)
 }
 
 /*
- * char_tolower_enabled()
+ * char_tolower()
  *
- * Does the provider support char_tolower()?
+ * Convert single-byte char to lowercase. Not correct for multibyte encodings,
+ * but needed for historical compatibility purposes.
  */
-bool
-char_tolower_enabled(pg_locale_t locale)
+char
+char_tolower(unsigned char ch, pg_locale_t locale)
 {
-	return (locale->ctype->char_tolower != NULL);
+	if (locale->ctype == NULL)
+	{
+		if (ch >= 'A' && ch <= 'Z')
+			return ch + ('a' - 'A');
+		return ch;
+	}
+	return locale->ctype->char_tolower(ch, locale);
 }
 
 /*
- * char_tolower()
+ * char_toupper()
  *
- * Convert char (single-byte encoding) to lowercase.
+ * Convert single-byte char to uppercase. Not correct for multibyte encodings,
+ * but needed for historical compatibility purposes.
  */
 char
-char_tolower(unsigned char ch, pg_locale_t locale)
+char_toupper(unsigned char ch, pg_locale_t locale)
 {
-	return locale->ctype->char_tolower(ch, locale);
+	if (locale->ctype == NULL)
+	{
+		if (ch >= 'a' && ch <= 'z')
+			return ch - ('a' - 'A');
+		return ch;
+	}
+	return locale->ctype->char_toupper(ch, locale);
 }
 
 /*
diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c
index 3dc611b50e1..cfef6a86377 100644
--- a/src/backend/utils/adt/pg_locale_builtin.c
+++ b/src/backend/utils/adt/pg_locale_builtin.c
@@ -169,6 +169,22 @@ wc_isxdigit_builtin(pg_wchar wc, pg_locale_t locale)
 	return pg_u_isxdigit(wc, !locale->builtin.casemap_full);
 }
 
+static char
+char_tolower_builtin(unsigned char ch, pg_locale_t locale)
+{
+	if (ch >= 'A' && ch <= 'Z')
+		return ch + ('a' - 'A');
+	return ch;
+}
+
+static char
+char_toupper_builtin(unsigned char ch, pg_locale_t locale)
+{
+	if (ch >= 'a' && ch <= 'z')
+		return ch - ('a' - 'A');
+	return ch;
+}
+
 static bool
 char_is_cased_builtin(char ch, pg_locale_t locale)
 {
@@ -203,6 +219,8 @@ static const struct ctype_methods ctype_methods_builtin = {
 	.wc_ispunct = wc_ispunct_builtin,
 	.wc_isspace = wc_isspace_builtin,
 	.wc_isxdigit = wc_isxdigit_builtin,
+	.char_tolower = char_tolower_builtin,
+	.char_toupper = char_toupper_builtin,
 	.char_is_cased = char_is_cased_builtin,
 	.wc_tolower = wc_tolower_builtin,
 	.wc_toupper = wc_toupper_builtin,
diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
index f5a0cc8fe41..449e3bbb7a6 100644
--- a/src/backend/utils/adt/pg_locale_icu.c
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -121,6 +121,27 @@ static int32_t u_strFoldCase_default(UChar *dest, int32_t destCapacity,
 									 const char *locale,
 									 UErrorCode *pErrorCode);
 
+/*
+ * ICU still depends on libc for compatibility with certain historical
+ * behavior for single-byte encodings.  XXX: consider fixing by decoding the
+ * single byte into a code point, and using u_tolower().
+ */
+static char
+char_tolower_icu(unsigned char ch, pg_locale_t locale)
+{
+	if (isupper(ch))
+		return tolower(ch);
+	return ch;
+}
+
+static char
+char_toupper_icu(unsigned char ch, pg_locale_t locale)
+{
+	if (islower(ch))
+		return toupper(ch);
+	return ch;
+}
+
 static bool
 char_is_cased_icu(char ch, pg_locale_t locale)
 {
@@ -238,6 +259,8 @@ static const struct ctype_methods ctype_methods_icu = {
 	.wc_ispunct = wc_ispunct_icu,
 	.wc_isspace = wc_isspace_icu,
 	.wc_isxdigit = wc_isxdigit_icu,
+	.char_tolower = char_tolower_icu,
+	.char_toupper = char_toupper_icu,
 	.char_is_cased = char_is_cased_icu,
 	.wc_toupper = toupper_icu,
 	.wc_tolower = tolower_icu,
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index 716f005066a..b0428ad288e 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -251,8 +251,21 @@ wc_isxdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
 static char
 char_tolower_libc(unsigned char ch, pg_locale_t locale)
 {
-	Assert(pg_database_encoding_max_length() == 1);
-	return tolower_l(ch, locale->lt);
+	locale_t	loc = locale->lt;
+
+	if (isupper_l(ch, loc))
+		return tolower_l(ch, loc);
+	return ch;
+}
+
+static char
+char_toupper_libc(unsigned char ch, pg_locale_t locale)
+{
+	locale_t	loc = locale->lt;
+
+	if (islower_l(ch, loc))
+		return toupper_l(ch, loc);
+	return ch;
 }
 
 static bool
@@ -338,9 +351,11 @@ static const struct ctype_methods ctype_methods_libc_sb = {
 	.wc_isxdigit = wc_isxdigit_libc_sb,
 	.char_is_cased = char_is_cased_libc,
 	.char_tolower = char_tolower_libc,
+	.char_toupper = char_toupper_libc,
 	.wc_toupper = toupper_libc_sb,
 	.wc_tolower = tolower_libc_sb,
 	.max_chr = UCHAR_MAX,
+	.pattern_casefold_char = true,
 };
 
 /*
@@ -363,6 +378,7 @@ static const struct ctype_methods ctype_methods_libc_other_mb = {
 	.wc_isxdigit = wc_isxdigit_libc_sb,
 	.char_is_cased = char_is_cased_libc,
 	.char_tolower = char_tolower_libc,
+	.char_toupper = char_toupper_libc,
 	.wc_toupper = toupper_libc_sb,
 	.wc_tolower = tolower_libc_sb,
 	.max_chr = UCHAR_MAX,
@@ -384,6 +400,7 @@ static const struct ctype_methods ctype_methods_libc_utf8 = {
 	.wc_isxdigit = wc_isxdigit_libc_mb,
 	.char_is_cased = char_is_cased_libc,
 	.char_tolower = char_tolower_libc,
+	.char_toupper = char_toupper_libc,
 	.wc_toupper = toupper_libc_mb,
 	.wc_tolower = tolower_libc_mb,
 };
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 683e1a0eef8..790db566e91 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -113,13 +113,13 @@ struct ctype_methods
 
 	/* required */
 	bool		(*char_is_cased) (char ch, pg_locale_t locale);
+	char		(*char_tolower) (unsigned char ch, pg_locale_t locale);
+	char		(*char_toupper) (unsigned char ch, pg_locale_t locale);
 
 	/*
-	 * Optional. If defined, will only be called for single-byte encodings. If
-	 * not defined, or if the encoding is multibyte, will fall back to
-	 * pg_strlower().
+	 * Use byte-at-a-time case folding for case-insensitive patterns.
 	 */
-	char		(*char_tolower) (unsigned char ch, pg_locale_t locale);
+	bool		pattern_casefold_char;
 
 	/*
 	 * For regex and pattern matching efficiency, the maximum char value
@@ -177,8 +177,8 @@ extern pg_locale_t pg_newlocale_from_collation(Oid collid);
 extern char *get_collation_actual_version(char collprovider, const char *collcollate);
 
 extern bool char_is_cased(char ch, pg_locale_t locale);
-extern bool char_tolower_enabled(pg_locale_t locale);
 extern char char_tolower(unsigned char ch, pg_locale_t locale);
+extern char char_toupper(unsigned char ch, pg_locale_t locale);
 extern size_t pg_strlower(char *dst, size_t dstsize,
 						  const char *src, ssize_t srclen,
 						  pg_locale_t locale);
-- 
2.43.0