v10-0004-Allow-pg_locale_t-APIs-to-work-when-ctype_is_c.patch
text/x-patch
Filename: v10-0004-Allow-pg_locale_t-APIs-to-work-when-ctype_is_c.patch
Type: text/x-patch
Part: 3
From 9d1c50e406099ebe82de8a8e03d9cb2f564d76eb Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Fri, 7 Nov 2025 12:11:34 -0800
Subject: [PATCH v10 04/11] Allow pg_locale_t APIs to work when ctype_is_c.
Previously, the caller needed to check ctype_is_c first for some
routines and not others. Now, the APIs consistently work, and the
caller can just check ctype_is_c for optimization purposes.
Discussion: https://postgr.es/m/450ceb6260cad30d7afdf155d991a9caafee7c0d.camel@j-davis.com
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
---
src/backend/utils/adt/like_support.c | 34 ++++-------
src/backend/utils/adt/pg_locale.c | 78 ++++++++++++++++++++++++--
src/backend/utils/adt/pg_locale_libc.c | 6 ++
3 files changed, 88 insertions(+), 30 deletions(-)
diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c
index 999f23f86d5..0debccfa67b 100644
--- a/src/backend/utils/adt/like_support.c
+++ b/src/backend/utils/adt/like_support.c
@@ -99,8 +99,6 @@ static Selectivity like_selectivity(const char *patt, int pattlen,
static Selectivity regex_selectivity(const char *patt, int pattlen,
bool case_insensitive,
int fixed_prefix_len);
-static int pattern_char_isalpha(char c, bool is_multibyte,
- pg_locale_t locale);
static Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc,
Oid collation);
static Datum string_to_datum(const char *str, Oid datatype);
@@ -995,7 +993,6 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
Oid typeid = patt_const->consttype;
int pos,
match_pos;
- bool is_multibyte = (pg_database_encoding_max_length() > 1);
pg_locale_t locale = 0;
/* the right-hand const is type text or bytea */
@@ -1055,9 +1052,16 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
break;
}
- /* Stop if case-varying character (it's sort of a wildcard) */
- if (case_insensitive &&
- pattern_char_isalpha(patt[pos], is_multibyte, locale))
+ /*
+ * Stop if case-varying character (it's sort of a wildcard).
+ *
+ * In multibyte character sets or with non-libc providers, we can't
+ * use isalpha, and it does not seem worth trying to convert to
+ * wchar_t or char32_t. Instead, just pass the single byte to the
+ * provider, which will assume any non-ASCII char is potentially
+ * case-varying.
+ */
+ if (case_insensitive && char_is_cased(patt[pos], locale))
break;
match[match_pos++] = patt[pos];
@@ -1481,24 +1485,6 @@ regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
return sel;
}
-/*
- * Check whether char is a letter (and, hence, subject to case-folding)
- *
- * In multibyte character sets or with ICU, we can't use isalpha, and it does
- * not seem worth trying to convert to wchar_t to use iswalpha or u_isalpha.
- * Instead, just assume any non-ASCII char is potentially case-varying, and
- * hard-wire knowledge of which ASCII chars are letters.
- */
-static int
-pattern_char_isalpha(char c, bool is_multibyte,
- pg_locale_t locale)
-{
- if (locale->ctype_is_c)
- return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
- else
- return char_is_cased(c, locale);
-}
-
/*
* For bytea, the increment function need only increment the current byte
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index b14c7837938..48f9d44a5f7 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1257,35 +1257,99 @@ get_collation_actual_version(char collprovider, const char *collcollate)
return collversion;
}
+/* lowercasing/casefolding in C locale */
+static size_t
+strlower_c(char *dst, size_t dstsize, const char *src, ssize_t srclen)
+{
+ int i;
+
+ srclen = (srclen >= 0) ? srclen : strlen(src);
+ for (i = 0; i < srclen && i < dstsize; i++)
+ dst[i] = pg_ascii_tolower(src[i]);
+ if (i < dstsize)
+ dst[i] = '\0';
+ return srclen;
+}
+
+/* titlecasing in C locale */
+static size_t
+strtitle_c(char *dst, size_t dstsize, const char *src, ssize_t srclen)
+{
+ bool wasalnum = false;
+ int i;
+
+ srclen = (srclen >= 0) ? srclen : strlen(src);
+ for (i = 0; i < srclen && i < dstsize; i++)
+ {
+ char c = src[i];
+
+ if (wasalnum)
+ dst[i] = pg_ascii_tolower(c);
+ else
+ dst[i] = pg_ascii_toupper(c);
+
+ wasalnum = ((c >= '0' && c <= '9') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= 'a' && c <= 'z'));
+ }
+ if (i < dstsize)
+ dst[i] = '\0';
+ return srclen;
+}
+
+/* uppercasing in C locale */
+static size_t
+strupper_c(char *dst, size_t dstsize, const char *src, ssize_t srclen)
+{
+ int i;
+
+ srclen = (srclen >= 0) ? srclen : strlen(src);
+ for (i = 0; i < srclen && i < dstsize; i++)
+ dst[i] = pg_ascii_toupper(src[i]);
+ if (i < dstsize)
+ dst[i] = '\0';
+ return srclen;
+}
+
size_t
pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
+ if (locale->ctype == NULL)
+ return strlower_c(dst, dstsize, src, srclen);
+ else
+ return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
}
size_t
pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- return locale->ctype->strtitle(dst, dstsize, src, srclen, locale);
+ if (locale->ctype == NULL)
+ return strtitle_c(dst, dstsize, src, srclen);
+ else
+ return locale->ctype->strtitle(dst, dstsize, src, srclen, locale);
}
size_t
pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- return locale->ctype->strupper(dst, dstsize, src, srclen, locale);
+ if (locale->ctype == NULL)
+ return strupper_c(dst, dstsize, src, srclen);
+ else
+ return locale->ctype->strupper(dst, dstsize, src, srclen, locale);
}
size_t
pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- if (locale->ctype->strfold)
- return locale->ctype->strfold(dst, dstsize, src, srclen, locale);
+ /* in the C locale, casefolding is the same as lowercasing */
+ if (locale->ctype == NULL)
+ return strlower_c(dst, dstsize, src, srclen);
else
- return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
+ return locale->ctype->strfold(dst, dstsize, src, srclen, locale);
}
/*
@@ -1560,6 +1624,8 @@ pg_towlower(pg_wchar wc, pg_locale_t locale)
bool
char_is_cased(char ch, pg_locale_t locale)
{
+ if (locale->ctype == NULL)
+ return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
return locale->ctype->char_is_cased(ch, locale);
}
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index 716f005066a..26ba1be73f1 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -326,6 +326,8 @@ static const struct ctype_methods ctype_methods_libc_sb = {
.strlower = strlower_libc_sb,
.strtitle = strtitle_libc_sb,
.strupper = strupper_libc_sb,
+ /* in libc, casefolding is the same as lowercasing */
+ .strfold = strlower_libc_sb,
.wc_isdigit = wc_isdigit_libc_sb,
.wc_isalpha = wc_isalpha_libc_sb,
.wc_isalnum = wc_isalnum_libc_sb,
@@ -351,6 +353,8 @@ static const struct ctype_methods ctype_methods_libc_other_mb = {
.strlower = strlower_libc_mb,
.strtitle = strtitle_libc_mb,
.strupper = strupper_libc_mb,
+ /* in libc, casefolding is the same as lowercasing */
+ .strfold = strlower_libc_mb,
.wc_isdigit = wc_isdigit_libc_sb,
.wc_isalpha = wc_isalpha_libc_sb,
.wc_isalnum = wc_isalnum_libc_sb,
@@ -372,6 +376,8 @@ static const struct ctype_methods ctype_methods_libc_utf8 = {
.strlower = strlower_libc_mb,
.strtitle = strtitle_libc_mb,
.strupper = strupper_libc_mb,
+ /* in libc, casefolding is the same as lowercasing */
+ .strfold = strlower_libc_mb,
.wc_isdigit = wc_isdigit_libc_mb,
.wc_isalpha = wc_isalpha_libc_mb,
.wc_isalnum = wc_isalnum_libc_mb,
--
2.43.0