v7-0002-Define-char_tolower-char_toupper-for-all-locale-p.patch
text/x-patch
Filename: v7-0002-Define-char_tolower-char_toupper-for-all-locale-p.patch
Type: text/x-patch
Part: 1
From c7405ecc07e552fa9bdf4cf535b4757c2de7e9e4 Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Sun, 26 Oct 2025 14:51:47 -0700
Subject: [PATCH v7 2/9] Define char_tolower()/char_toupper() for all locale
providers.
The behavior is defined for each locale provider rather than
unconditionally depending on the global LC_CTYPE setting. Needed as an
alternative for tolower()/toupper() for some callers.
---
src/backend/utils/adt/like.c | 4 +--
src/backend/utils/adt/pg_locale.c | 32 ++++++++++++++++-------
src/backend/utils/adt/pg_locale_builtin.c | 18 +++++++++++++
src/backend/utils/adt/pg_locale_icu.c | 23 ++++++++++++++++
src/backend/utils/adt/pg_locale_libc.c | 21 +++++++++++++--
src/include/utils/pg_locale.h | 10 +++----
6 files changed, 89 insertions(+), 19 deletions(-)
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 4216ac17f43..37c1c86aee8 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -209,9 +209,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
* way.
*/
- if (locale->ctype_is_c ||
- (char_tolower_enabled(locale) &&
- pg_database_encoding_max_length() == 1))
+ if (locale->ctype_is_c || locale->ctype->pattern_casefold_char)
{
p = VARDATA_ANY(pat);
plen = VARSIZE_ANY_EXHDR(pat);
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index b14c7837938..9631d274611 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1564,25 +1564,39 @@ char_is_cased(char ch, pg_locale_t locale)
}
/*
- * char_tolower_enabled()
+ * char_tolower()
*
- * Does the provider support char_tolower()?
+ * Convert single-byte char to lowercase. Not correct for multibyte encodings,
+ * but needed for historical compatibility purposes.
*/
-bool
-char_tolower_enabled(pg_locale_t locale)
+char
+char_tolower(unsigned char ch, pg_locale_t locale)
{
- return (locale->ctype->char_tolower != NULL);
+ if (locale->ctype == NULL)
+ {
+ if (ch >= 'A' && ch <= 'Z')
+ return ch + ('a' - 'A');
+ return ch;
+ }
+ return locale->ctype->char_tolower(ch, locale);
}
/*
- * char_tolower()
+ * char_toupper()
*
- * Convert char (single-byte encoding) to lowercase.
+ * Convert single-byte char to uppercase. Not correct for multibyte encodings,
+ * but needed for historical compatibility purposes.
*/
char
-char_tolower(unsigned char ch, pg_locale_t locale)
+char_toupper(unsigned char ch, pg_locale_t locale)
{
- return locale->ctype->char_tolower(ch, locale);
+ if (locale->ctype == NULL)
+ {
+ if (ch >= 'a' && ch <= 'z')
+ return ch - ('a' - 'A');
+ return ch;
+ }
+ return locale->ctype->char_toupper(ch, locale);
}
/*
diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c
index 1021e0d129b..5059b2bb59a 100644
--- a/src/backend/utils/adt/pg_locale_builtin.c
+++ b/src/backend/utils/adt/pg_locale_builtin.c
@@ -185,6 +185,22 @@ wc_isxdigit_builtin(pg_wchar wc, pg_locale_t locale)
return pg_u_isxdigit(to_char32(wc), !locale->builtin.casemap_full);
}
+static char
+char_tolower_builtin(unsigned char ch, pg_locale_t locale)
+{
+ if (ch >= 'A' && ch <= 'Z')
+ return ch + ('a' - 'A');
+ return ch;
+}
+
+static char
+char_toupper_builtin(unsigned char ch, pg_locale_t locale)
+{
+ if (ch >= 'a' && ch <= 'z')
+ return ch - ('a' - 'A');
+ return ch;
+}
+
static bool
char_is_cased_builtin(char ch, pg_locale_t locale)
{
@@ -219,6 +235,8 @@ static const struct ctype_methods ctype_methods_builtin = {
.wc_ispunct = wc_ispunct_builtin,
.wc_isspace = wc_isspace_builtin,
.wc_isxdigit = wc_isxdigit_builtin,
+ .char_tolower = char_tolower_builtin,
+ .char_toupper = char_toupper_builtin,
.char_is_cased = char_is_cased_builtin,
.wc_tolower = wc_tolower_builtin,
.wc_toupper = wc_toupper_builtin,
diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
index f5a0cc8fe41..449e3bbb7a6 100644
--- a/src/backend/utils/adt/pg_locale_icu.c
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -121,6 +121,27 @@ static int32_t u_strFoldCase_default(UChar *dest, int32_t destCapacity,
const char *locale,
UErrorCode *pErrorCode);
+/*
+ * ICU still depends on libc for compatibility with certain historical
+ * behavior for single-byte encodings. XXX: consider fixing by decoding the
+ * single byte into a code point, and using u_tolower().
+ */
+static char
+char_tolower_icu(unsigned char ch, pg_locale_t locale)
+{
+ if (isupper(ch))
+ return tolower(ch);
+ return ch;
+}
+
+static char
+char_toupper_icu(unsigned char ch, pg_locale_t locale)
+{
+ if (islower(ch))
+ return toupper(ch);
+ return ch;
+}
+
static bool
char_is_cased_icu(char ch, pg_locale_t locale)
{
@@ -238,6 +259,8 @@ static const struct ctype_methods ctype_methods_icu = {
.wc_ispunct = wc_ispunct_icu,
.wc_isspace = wc_isspace_icu,
.wc_isxdigit = wc_isxdigit_icu,
+ .char_tolower = char_tolower_icu,
+ .char_toupper = char_toupper_icu,
.char_is_cased = char_is_cased_icu,
.wc_toupper = toupper_icu,
.wc_tolower = tolower_icu,
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index 716f005066a..b0428ad288e 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -251,8 +251,21 @@ wc_isxdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
static char
char_tolower_libc(unsigned char ch, pg_locale_t locale)
{
- Assert(pg_database_encoding_max_length() == 1);
- return tolower_l(ch, locale->lt);
+ locale_t loc = locale->lt;
+
+ if (isupper_l(ch, loc))
+ return tolower_l(ch, loc);
+ return ch;
+}
+
+static char
+char_toupper_libc(unsigned char ch, pg_locale_t locale)
+{
+ locale_t loc = locale->lt;
+
+ if (islower_l(ch, loc))
+ return toupper_l(ch, loc);
+ return ch;
}
static bool
@@ -338,9 +351,11 @@ static const struct ctype_methods ctype_methods_libc_sb = {
.wc_isxdigit = wc_isxdigit_libc_sb,
.char_is_cased = char_is_cased_libc,
.char_tolower = char_tolower_libc,
+ .char_toupper = char_toupper_libc,
.wc_toupper = toupper_libc_sb,
.wc_tolower = tolower_libc_sb,
.max_chr = UCHAR_MAX,
+ .pattern_casefold_char = true,
};
/*
@@ -363,6 +378,7 @@ static const struct ctype_methods ctype_methods_libc_other_mb = {
.wc_isxdigit = wc_isxdigit_libc_sb,
.char_is_cased = char_is_cased_libc,
.char_tolower = char_tolower_libc,
+ .char_toupper = char_toupper_libc,
.wc_toupper = toupper_libc_sb,
.wc_tolower = tolower_libc_sb,
.max_chr = UCHAR_MAX,
@@ -384,6 +400,7 @@ static const struct ctype_methods ctype_methods_libc_utf8 = {
.wc_isxdigit = wc_isxdigit_libc_mb,
.char_is_cased = char_is_cased_libc,
.char_tolower = char_tolower_libc,
+ .char_toupper = char_toupper_libc,
.wc_toupper = toupper_libc_mb,
.wc_tolower = tolower_libc_mb,
};
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 683e1a0eef8..790db566e91 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -113,13 +113,13 @@ struct ctype_methods
/* required */
bool (*char_is_cased) (char ch, pg_locale_t locale);
+ char (*char_tolower) (unsigned char ch, pg_locale_t locale);
+ char (*char_toupper) (unsigned char ch, pg_locale_t locale);
/*
- * Optional. If defined, will only be called for single-byte encodings. If
- * not defined, or if the encoding is multibyte, will fall back to
- * pg_strlower().
+ * Use byte-at-a-time case folding for case-insensitive patterns.
*/
- char (*char_tolower) (unsigned char ch, pg_locale_t locale);
+ bool pattern_casefold_char;
/*
* For regex and pattern matching efficiency, the maximum char value
@@ -177,8 +177,8 @@ extern pg_locale_t pg_newlocale_from_collation(Oid collid);
extern char *get_collation_actual_version(char collprovider, const char *collcollate);
extern bool char_is_cased(char ch, pg_locale_t locale);
-extern bool char_tolower_enabled(pg_locale_t locale);
extern char char_tolower(unsigned char ch, pg_locale_t locale);
+extern char char_toupper(unsigned char ch, pg_locale_t locale);
extern size_t pg_strlower(char *dst, size_t dstsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
--
2.43.0