v8-0004-Allow-pg_locale_t-APIs-to-work-when-ctype_is_c.patch
text/x-patch
Filename: v8-0004-Allow-pg_locale_t-APIs-to-work-when-ctype_is_c.patch
Type: text/x-patch
Part: 3
From 735ee6342c2365f879c47c3aa0867c58174402aa Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Fri, 7 Nov 2025 12:11:34 -0800
Subject: [PATCH v8 4/7] Allow pg_locale_t APIs to work when ctype_is_c.
Previously, the caller needed to check ctype_is_c first for some
routines and not others. Now, the APIs consistently work, and the
caller can just check ctype_is_c for optimization purposes.
---
src/backend/utils/adt/like_support.c | 34 ++++----------
src/backend/utils/adt/pg_locale.c | 63 ++++++++++++++++++++++++--
src/backend/utils/adt/pg_locale_libc.c | 3 ++
3 files changed, 72 insertions(+), 28 deletions(-)
diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c
index 999f23f86d5..0debccfa67b 100644
--- a/src/backend/utils/adt/like_support.c
+++ b/src/backend/utils/adt/like_support.c
@@ -99,8 +99,6 @@ static Selectivity like_selectivity(const char *patt, int pattlen,
static Selectivity regex_selectivity(const char *patt, int pattlen,
bool case_insensitive,
int fixed_prefix_len);
-static int pattern_char_isalpha(char c, bool is_multibyte,
- pg_locale_t locale);
static Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc,
Oid collation);
static Datum string_to_datum(const char *str, Oid datatype);
@@ -995,7 +993,6 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
Oid typeid = patt_const->consttype;
int pos,
match_pos;
- bool is_multibyte = (pg_database_encoding_max_length() > 1);
pg_locale_t locale = 0;
/* the right-hand const is type text or bytea */
@@ -1055,9 +1052,16 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
break;
}
- /* Stop if case-varying character (it's sort of a wildcard) */
- if (case_insensitive &&
- pattern_char_isalpha(patt[pos], is_multibyte, locale))
+ /*
+ * Stop if case-varying character (it's sort of a wildcard).
+ *
+ * In multibyte character sets or with non-libc providers, we can't
+ * use isalpha, and it does not seem worth trying to convert to
+ * wchar_t or char32_t. Instead, just pass the single byte to the
+ * provider, which will assume any non-ASCII char is potentially
+ * case-varying.
+ */
+ if (case_insensitive && char_is_cased(patt[pos], locale))
break;
match[match_pos++] = patt[pos];
@@ -1481,24 +1485,6 @@ regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
return sel;
}
-/*
- * Check whether char is a letter (and, hence, subject to case-folding)
- *
- * In multibyte character sets or with ICU, we can't use isalpha, and it does
- * not seem worth trying to convert to wchar_t to use iswalpha or u_isalpha.
- * Instead, just assume any non-ASCII char is potentially case-varying, and
- * hard-wire knowledge of which ASCII chars are letters.
- */
-static int
-pattern_char_isalpha(char c, bool is_multibyte,
- pg_locale_t locale)
-{
- if (locale->ctype_is_c)
- return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
- else
- return char_is_cased(c, locale);
-}
-
/*
* For bytea, the increment function need only increment the current byte
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index b14c7837938..9319fb633b6 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1261,6 +1261,17 @@ size_t
pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
+ if (locale->ctype == NULL)
+ {
+ int i;
+
+ srclen = (srclen >= 0) ? srclen : strlen(src);
+ for (i = 0; i < srclen && i < dstsize; i++)
+ dst[i] = pg_ascii_tolower(src[i]);
+ if (i < dstsize)
+ dst[i] = '\0';
+ return srclen;
+ }
return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
}
@@ -1268,6 +1279,29 @@ size_t
pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
+ if (locale->ctype == NULL)
+ {
+ bool wasalnum = false;
+ int i;
+
+ srclen = (srclen >= 0) ? srclen : strlen(src);
+ for (i = 0; i < Min(srclen, dstsize); i++)
+ {
+ char c = src[i];
+
+ if (wasalnum)
+ dst[i] = pg_ascii_tolower(c);
+ else
+ dst[i] = pg_ascii_toupper(c);
+
+ wasalnum = ((c >= '0' && c <= '9') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= 'a' && c <= 'z'));
+ }
+ if (i < dstsize)
+ dst[i] = '\0';
+ return srclen;
+ }
return locale->ctype->strtitle(dst, dstsize, src, srclen, locale);
}
@@ -1275,6 +1309,17 @@ size_t
pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
+ if (locale->ctype == NULL)
+ {
+ int i;
+
+ srclen = (srclen >= 0) ? srclen : strlen(src);
+ for (i = 0; i < srclen && i < dstsize; i++)
+ dst[i] = pg_ascii_toupper(src[i]);
+ if (i < dstsize)
+ dst[i] = '\0';
+ return srclen;
+ }
return locale->ctype->strupper(dst, dstsize, src, srclen, locale);
}
@@ -1282,10 +1327,18 @@ size_t
pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- if (locale->ctype->strfold)
- return locale->ctype->strfold(dst, dstsize, src, srclen, locale);
- else
- return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
+ if (locale->ctype == NULL)
+ {
+ int i;
+
+ srclen = (srclen >= 0) ? srclen : strlen(src);
+ for (i = 0; i < srclen && i < dstsize; i++)
+ dst[i] = pg_ascii_tolower(src[i]);
+ if (i < dstsize)
+ dst[i] = '\0';
+ return srclen;
+ }
+ return locale->ctype->strfold(dst, dstsize, src, srclen, locale);
}
/*
@@ -1560,6 +1613,8 @@ pg_towlower(pg_wchar wc, pg_locale_t locale)
bool
char_is_cased(char ch, pg_locale_t locale)
{
+ if (locale->ctype == NULL)
+ return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
return locale->ctype->char_is_cased(ch, locale);
}
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index 716f005066a..942454de4ed 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -326,6 +326,7 @@ static const struct ctype_methods ctype_methods_libc_sb = {
.strlower = strlower_libc_sb,
.strtitle = strtitle_libc_sb,
.strupper = strupper_libc_sb,
+ .strfold = strlower_libc_sb,
.wc_isdigit = wc_isdigit_libc_sb,
.wc_isalpha = wc_isalpha_libc_sb,
.wc_isalnum = wc_isalnum_libc_sb,
@@ -351,6 +352,7 @@ static const struct ctype_methods ctype_methods_libc_other_mb = {
.strlower = strlower_libc_mb,
.strtitle = strtitle_libc_mb,
.strupper = strupper_libc_mb,
+ .strfold = strlower_libc_mb,
.wc_isdigit = wc_isdigit_libc_sb,
.wc_isalpha = wc_isalpha_libc_sb,
.wc_isalnum = wc_isalnum_libc_sb,
@@ -372,6 +374,7 @@ static const struct ctype_methods ctype_methods_libc_utf8 = {
.strlower = strlower_libc_mb,
.strtitle = strtitle_libc_mb,
.strupper = strupper_libc_mb,
+ .strfold = strlower_libc_mb,
.wc_isdigit = wc_isdigit_libc_mb,
.wc_isalpha = wc_isalpha_libc_mb,
.wc_isalnum = wc_isalnum_libc_mb,
--
2.43.0