v9-0004-Allow-pg_locale_t-APIs-to-work-when-ctype_is_c.patch

text/x-patch

Filename: v9-0004-Allow-pg_locale_t-APIs-to-work-when-ctype_is_c.patch
Type: text/x-patch
Part: 3
Message: Re: Remaining dependency on setlocale()
From 22419241e495c163d40e893d818741db7b1f3c78 Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Fri, 7 Nov 2025 12:11:34 -0800
Subject: [PATCH v9 04/11] Allow pg_locale_t APIs to work when ctype_is_c.

Previously, the caller needed to check ctype_is_c first for some
routines and not others. Now, the APIs consistently work, and the
caller can just check ctype_is_c for optimization purposes.
---
 src/backend/utils/adt/like_support.c   | 34 ++++----------
 src/backend/utils/adt/pg_locale.c      | 63 ++++++++++++++++++++++++--
 src/backend/utils/adt/pg_locale_libc.c |  3 ++
 3 files changed, 72 insertions(+), 28 deletions(-)

diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c
index 999f23f86d5..0debccfa67b 100644
--- a/src/backend/utils/adt/like_support.c
+++ b/src/backend/utils/adt/like_support.c
@@ -99,8 +99,6 @@ static Selectivity like_selectivity(const char *patt, int pattlen,
 static Selectivity regex_selectivity(const char *patt, int pattlen,
 									 bool case_insensitive,
 									 int fixed_prefix_len);
-static int	pattern_char_isalpha(char c, bool is_multibyte,
-								 pg_locale_t locale);
 static Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc,
 								  Oid collation);
 static Datum string_to_datum(const char *str, Oid datatype);
@@ -995,7 +993,6 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
 	Oid			typeid = patt_const->consttype;
 	int			pos,
 				match_pos;
-	bool		is_multibyte = (pg_database_encoding_max_length() > 1);
 	pg_locale_t locale = 0;
 
 	/* the right-hand const is type text or bytea */
@@ -1055,9 +1052,16 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
 				break;
 		}
 
-		/* Stop if case-varying character (it's sort of a wildcard) */
-		if (case_insensitive &&
-			pattern_char_isalpha(patt[pos], is_multibyte, locale))
+		/*
+		 * Stop if case-varying character (it's sort of a wildcard).
+		 *
+		 * In multibyte character sets or with non-libc providers, we can't
+		 * use isalpha, and it does not seem worth trying to convert to
+		 * wchar_t or char32_t.  Instead, just pass the single byte to the
+		 * provider, which will assume any non-ASCII char is potentially
+		 * case-varying.
+		 */
+		if (case_insensitive && char_is_cased(patt[pos], locale))
 			break;
 
 		match[match_pos++] = patt[pos];
@@ -1481,24 +1485,6 @@ regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
 	return sel;
 }
 
-/*
- * Check whether char is a letter (and, hence, subject to case-folding)
- *
- * In multibyte character sets or with ICU, we can't use isalpha, and it does
- * not seem worth trying to convert to wchar_t to use iswalpha or u_isalpha.
- * Instead, just assume any non-ASCII char is potentially case-varying, and
- * hard-wire knowledge of which ASCII chars are letters.
- */
-static int
-pattern_char_isalpha(char c, bool is_multibyte,
-					 pg_locale_t locale)
-{
-	if (locale->ctype_is_c)
-		return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
-	else
-		return char_is_cased(c, locale);
-}
-
 
 /*
  * For bytea, the increment function need only increment the current byte
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index b14c7837938..9319fb633b6 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1261,6 +1261,17 @@ size_t
 pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
 			pg_locale_t locale)
 {
+	if (locale->ctype == NULL)
+	{
+		int			i;
+
+		srclen = (srclen >= 0) ? srclen : strlen(src);
+		for (i = 0; i < srclen && i < dstsize; i++)
+			dst[i] = pg_ascii_tolower(src[i]);
+		if (i < dstsize)
+			dst[i] = '\0';
+		return srclen;
+	}
 	return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
 }
 
@@ -1268,6 +1279,29 @@ size_t
 pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
 			pg_locale_t locale)
 {
+	if (locale->ctype == NULL)
+	{
+		bool		wasalnum = false;
+		int			i;
+
+		srclen = (srclen >= 0) ? srclen : strlen(src);
+		for (i = 0; i < Min(srclen, dstsize); i++)
+		{
+			char		c = src[i];
+
+			if (wasalnum)
+				dst[i] = pg_ascii_tolower(c);
+			else
+				dst[i] = pg_ascii_toupper(c);
+
+			wasalnum = ((c >= '0' && c <= '9') ||
+						(c >= 'A' && c <= 'Z') ||
+						(c >= 'a' && c <= 'z'));
+		}
+		if (i < dstsize)
+			dst[i] = '\0';
+		return srclen;
+	}
 	return locale->ctype->strtitle(dst, dstsize, src, srclen, locale);
 }
 
@@ -1275,6 +1309,17 @@ size_t
 pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
 			pg_locale_t locale)
 {
+	if (locale->ctype == NULL)
+	{
+		int			i;
+
+		srclen = (srclen >= 0) ? srclen : strlen(src);
+		for (i = 0; i < srclen && i < dstsize; i++)
+			dst[i] = pg_ascii_toupper(src[i]);
+		if (i < dstsize)
+			dst[i] = '\0';
+		return srclen;
+	}
 	return locale->ctype->strupper(dst, dstsize, src, srclen, locale);
 }
 
@@ -1282,10 +1327,18 @@ size_t
 pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen,
 		   pg_locale_t locale)
 {
-	if (locale->ctype->strfold)
-		return locale->ctype->strfold(dst, dstsize, src, srclen, locale);
-	else
-		return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
+	if (locale->ctype == NULL)
+	{
+		int			i;
+
+		srclen = (srclen >= 0) ? srclen : strlen(src);
+		for (i = 0; i < srclen && i < dstsize; i++)
+			dst[i] = pg_ascii_tolower(src[i]);
+		if (i < dstsize)
+			dst[i] = '\0';
+		return srclen;
+	}
+	return locale->ctype->strfold(dst, dstsize, src, srclen, locale);
 }
 
 /*
@@ -1560,6 +1613,8 @@ pg_towlower(pg_wchar wc, pg_locale_t locale)
 bool
 char_is_cased(char ch, pg_locale_t locale)
 {
+	if (locale->ctype == NULL)
+		return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
 	return locale->ctype->char_is_cased(ch, locale);
 }
 
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index 716f005066a..942454de4ed 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -326,6 +326,7 @@ static const struct ctype_methods ctype_methods_libc_sb = {
 	.strlower = strlower_libc_sb,
 	.strtitle = strtitle_libc_sb,
 	.strupper = strupper_libc_sb,
+	.strfold = strlower_libc_sb,
 	.wc_isdigit = wc_isdigit_libc_sb,
 	.wc_isalpha = wc_isalpha_libc_sb,
 	.wc_isalnum = wc_isalnum_libc_sb,
@@ -351,6 +352,7 @@ static const struct ctype_methods ctype_methods_libc_other_mb = {
 	.strlower = strlower_libc_mb,
 	.strtitle = strtitle_libc_mb,
 	.strupper = strupper_libc_mb,
+	.strfold = strlower_libc_mb,
 	.wc_isdigit = wc_isdigit_libc_sb,
 	.wc_isalpha = wc_isalpha_libc_sb,
 	.wc_isalnum = wc_isalnum_libc_sb,
@@ -372,6 +374,7 @@ static const struct ctype_methods ctype_methods_libc_utf8 = {
 	.strlower = strlower_libc_mb,
 	.strtitle = strtitle_libc_mb,
 	.strupper = strupper_libc_mb,
+	.strfold = strlower_libc_mb,
 	.wc_isdigit = wc_isdigit_libc_mb,
 	.wc_isalpha = wc_isalpha_libc_mb,
 	.wc_isalnum = wc_isalnum_libc_mb,
-- 
2.43.0