From 9d1c50e406099ebe82de8a8e03d9cb2f564d76eb Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Fri, 7 Nov 2025 12:11:34 -0800
Subject: [PATCH v10 04/11] Allow pg_locale_t APIs to work when ctype_is_c.

Previously, the caller needed to check ctype_is_c first for some
routines and not others. Now, the APIs consistently work, and the
caller can just check ctype_is_c for optimization purposes.

Discussion: https://postgr.es/m/450ceb6260cad30d7afdf155d991a9caafee7c0d.camel@j-davis.com
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
---
 src/backend/utils/adt/like_support.c   | 34 ++++-------
 src/backend/utils/adt/pg_locale.c      | 78 ++++++++++++++++++++++++--
 src/backend/utils/adt/pg_locale_libc.c |  6 ++
 3 files changed, 88 insertions(+), 30 deletions(-)

diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c
index 999f23f86d5..0debccfa67b 100644
--- a/src/backend/utils/adt/like_support.c
+++ b/src/backend/utils/adt/like_support.c
@@ -99,8 +99,6 @@ static Selectivity like_selectivity(const char *patt, int pattlen,
 static Selectivity regex_selectivity(const char *patt, int pattlen,
 									 bool case_insensitive,
 									 int fixed_prefix_len);
-static int	pattern_char_isalpha(char c, bool is_multibyte,
-								 pg_locale_t locale);
 static Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc,
 								  Oid collation);
 static Datum string_to_datum(const char *str, Oid datatype);
@@ -995,7 +993,6 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
 	Oid			typeid = patt_const->consttype;
 	int			pos,
 				match_pos;
-	bool		is_multibyte = (pg_database_encoding_max_length() > 1);
 	pg_locale_t locale = 0;
 
 	/* the right-hand const is type text or bytea */
@@ -1055,9 +1052,16 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
 				break;
 		}
 
-		/* Stop if case-varying character (it's sort of a wildcard) */
-		if (case_insensitive &&
-			pattern_char_isalpha(patt[pos], is_multibyte, locale))
+		/*
+		 * Stop if case-varying character (it's sort of a wildcard).
+		 *
+		 * In multibyte character sets or with non-libc providers, we can't
+		 * use isalpha, and it does not seem worth trying to convert to
+		 * wchar_t or char32_t.  Instead, just pass the single byte to the
+		 * provider, which will assume any non-ASCII char is potentially
+		 * case-varying.
+		 */
+		if (case_insensitive && char_is_cased(patt[pos], locale))
 			break;
 
 		match[match_pos++] = patt[pos];
@@ -1481,24 +1485,6 @@ regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
 	return sel;
 }
 
-/*
- * Check whether char is a letter (and, hence, subject to case-folding)
- *
- * In multibyte character sets or with ICU, we can't use isalpha, and it does
- * not seem worth trying to convert to wchar_t to use iswalpha or u_isalpha.
- * Instead, just assume any non-ASCII char is potentially case-varying, and
- * hard-wire knowledge of which ASCII chars are letters.
- */
-static int
-pattern_char_isalpha(char c, bool is_multibyte,
-					 pg_locale_t locale)
-{
-	if (locale->ctype_is_c)
-		return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
-	else
-		return char_is_cased(c, locale);
-}
-
 
 /*
  * For bytea, the increment function need only increment the current byte
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index b14c7837938..48f9d44a5f7 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1257,35 +1257,99 @@ get_collation_actual_version(char collprovider, const char *collcollate)
 	return collversion;
 }
 
+/* lowercasing/casefolding in C locale */
+static size_t
+strlower_c(char *dst, size_t dstsize, const char *src, ssize_t srclen)
+{
+	int			i;
+
+	srclen = (srclen >= 0) ? srclen : strlen(src);
+	for (i = 0; i < srclen && i < dstsize; i++)
+		dst[i] = pg_ascii_tolower(src[i]);
+	if (i < dstsize)
+		dst[i] = '\0';
+	return srclen;
+}
+
+/* titlecasing in C locale */
+static size_t
+strtitle_c(char *dst, size_t dstsize, const char *src, ssize_t srclen)
+{
+	bool		wasalnum = false;
+	int			i;
+
+	srclen = (srclen >= 0) ? srclen : strlen(src);
+	for (i = 0; i < srclen && i < dstsize; i++)
+	{
+		char		c = src[i];
+
+		if (wasalnum)
+			dst[i] = pg_ascii_tolower(c);
+		else
+			dst[i] = pg_ascii_toupper(c);
+
+		wasalnum = ((c >= '0' && c <= '9') ||
+					(c >= 'A' && c <= 'Z') ||
+					(c >= 'a' && c <= 'z'));
+	}
+	if (i < dstsize)
+		dst[i] = '\0';
+	return srclen;
+}
+
+/* uppercasing in C locale */
+static size_t
+strupper_c(char *dst, size_t dstsize, const char *src, ssize_t srclen)
+{
+	int			i;
+
+	srclen = (srclen >= 0) ? srclen : strlen(src);
+	for (i = 0; i < srclen && i < dstsize; i++)
+		dst[i] = pg_ascii_toupper(src[i]);
+	if (i < dstsize)
+		dst[i] = '\0';
+	return srclen;
+}
+
 size_t
 pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
 			pg_locale_t locale)
 {
-	return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
+	if (locale->ctype == NULL)
+		return strlower_c(dst, dstsize, src, srclen);
+	else
+		return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
 }
 
 size_t
 pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
 			pg_locale_t locale)
 {
-	return locale->ctype->strtitle(dst, dstsize, src, srclen, locale);
+	if (locale->ctype == NULL)
+		return strtitle_c(dst, dstsize, src, srclen);
+	else
+		return locale->ctype->strtitle(dst, dstsize, src, srclen, locale);
 }
 
 size_t
 pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
 			pg_locale_t locale)
 {
-	return locale->ctype->strupper(dst, dstsize, src, srclen, locale);
+	if (locale->ctype == NULL)
+		return strupper_c(dst, dstsize, src, srclen);
+	else
+		return locale->ctype->strupper(dst, dstsize, src, srclen, locale);
 }
 
 size_t
 pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen,
 		   pg_locale_t locale)
 {
-	if (locale->ctype->strfold)
-		return locale->ctype->strfold(dst, dstsize, src, srclen, locale);
+	/* in the C locale, casefolding is the same as lowercasing */
+	if (locale->ctype == NULL)
+		return strlower_c(dst, dstsize, src, srclen);
 	else
-		return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
+		return locale->ctype->strfold(dst, dstsize, src, srclen, locale);
 }
 
 /*
@@ -1560,6 +1624,8 @@ pg_towlower(pg_wchar wc, pg_locale_t locale)
 bool
 char_is_cased(char ch, pg_locale_t locale)
 {
+	if (locale->ctype == NULL)
+		return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
 	return locale->ctype->char_is_cased(ch, locale);
 }
 
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index 716f005066a..26ba1be73f1 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -326,6 +326,8 @@ static const struct ctype_methods ctype_methods_libc_sb = {
 	.strlower = strlower_libc_sb,
 	.strtitle = strtitle_libc_sb,
 	.strupper = strupper_libc_sb,
+	/* in libc, casefolding is the same as lowercasing */
+	.strfold = strlower_libc_sb,
 	.wc_isdigit = wc_isdigit_libc_sb,
 	.wc_isalpha = wc_isalpha_libc_sb,
 	.wc_isalnum = wc_isalnum_libc_sb,
@@ -351,6 +353,8 @@ static const struct ctype_methods ctype_methods_libc_other_mb = {
 	.strlower = strlower_libc_mb,
 	.strtitle = strtitle_libc_mb,
 	.strupper = strupper_libc_mb,
+	/* in libc, casefolding is the same as lowercasing */
+	.strfold = strlower_libc_mb,
 	.wc_isdigit = wc_isdigit_libc_sb,
 	.wc_isalpha = wc_isalpha_libc_sb,
 	.wc_isalnum = wc_isalnum_libc_sb,
@@ -372,6 +376,8 @@ static const struct ctype_methods ctype_methods_libc_utf8 = {
 	.strlower = strlower_libc_mb,
 	.strtitle = strtitle_libc_mb,
 	.strupper = strupper_libc_mb,
+	/* in libc, casefolding is the same as lowercasing */
+	.strfold = strlower_libc_mb,
 	.wc_isdigit = wc_isdigit_libc_mb,
 	.wc_isalpha = wc_isalpha_libc_mb,
 	.wc_isalnum = wc_isalnum_libc_mb,
-- 
2.43.0

