v7-0009-Avoid-global-LC_CTYPE-dependency-in-strcasecmp.c-.patch
text/x-patch
Filename: v7-0009-Avoid-global-LC_CTYPE-dependency-in-strcasecmp.c-.patch
Type: text/x-patch
Part: 8
From 9ae6c6f9a0994fb694041d587acb81df45156984 Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Mon, 27 Oct 2025 16:08:54 -0700
Subject: [PATCH v7 9/9] Avoid global LC_CTYPE dependency in strcasecmp.c for
server.
For the server (but not the frontend), change to use
char_tolower()/char_toupper() instead of tolower()/toupper().
---
src/port/pgstrcasecmp.c | 73 ++++++++++++++++++++++++++++++-----------
1 file changed, 53 insertions(+), 20 deletions(-)
diff --git a/src/port/pgstrcasecmp.c b/src/port/pgstrcasecmp.c
index ec2b3a75c3d..2184f132f3a 100644
--- a/src/port/pgstrcasecmp.c
+++ b/src/port/pgstrcasecmp.c
@@ -3,15 +3,31 @@
* pgstrcasecmp.c
* Portable SQL-like case-independent comparisons and conversions.
*
- * SQL99 specifies Unicode-aware case normalization, which we don't yet
- * have the infrastructure for. Instead we use tolower() to provide a
- * locale-aware translation. However, there are some locales where this
- * is not right either (eg, Turkish may do strange things with 'i' and
- * 'I'). Our current compromise is to use tolower() for characters with
- * the high bit set, and use an ASCII-only downcasing for 7-bit
- * characters.
+ * These functions are for case-insensitive identifier matching and related
+ * functionality, and may be called either from the client or from the
+ * server. These functions are not intended for text data stored in the
+ * database; see pg_locale.h.
*
- * NB: this code should match downcase_truncate_identifier() in scansup.c.
+ * In the server, the casing behavior is determined by the database default
+ * collation, which may be different depending on the provider and locale.
+ * In the client, casing behavior is determined by libc's tolower() and
+ * toupper(), which depends on the locale settings on the client (and
+ * therefore may not match the server's semantics). In any case, the ASCII
+ * range is guaranteed to use plain ASCII casing semantics.
+ *
+ * SQL99 specifies Unicode-aware case normalization, but for historical
+ * compatibility reasons, we don't do so. Instead we do char-at-a-time
+ * lowercasing to provide a locale-aware translation for single-byte
+ * encodings. However, there are some locales where this is not right either
+ * (eg, Turkish may do strange things with 'i' and 'I'). Our current
+ * compromise is to use tolower()/char_tolower() for characters with the high
+ * bit set, and use an ASCII-only downcasing for 7-bit characters.
+ *
+ * NB: these functions are not multibyte-aware. For UTF8, the behavior
+ * degenerates to plain ASCII casing semantics.
+ *
+ * NB: this code should match downcase_truncate_identifier() in scansup.c,
+ * except that we don't check for multibyte encodings.
*
* We also provide strict ASCII-only case conversion functions, which can
* be used to implement C/POSIX case folding semantics no matter what the
@@ -28,6 +44,23 @@
#include <ctype.h>
+/*
+ * In the server, use char_tolower()/char_toupper() with the database default
+ * locale; in the client, use tolower()/toupper().
+ */
+#ifndef FRONTEND
+
+#include "utils/pg_locale.h"
+/* char_tolower()/char_toupper() don't need isupper()/islower() test */
+#define TOLOWER(x) char_tolower(x, NULL)
+#define TOUPPER(x) char_toupper(x, NULL)
+
+#else
+
+#define TOLOWER(x) (isupper(x) ? tolower(x) : x)
+#define TOUPPER(x) (islower(x) ? toupper(x) : x)
+
+#endif
/*
* Case-independent comparison of two null-terminated strings.
@@ -44,13 +77,13 @@ pg_strcasecmp(const char *s1, const char *s2)
{
if (ch1 >= 'A' && ch1 <= 'Z')
ch1 += 'a' - 'A';
- else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
- ch1 = tolower(ch1);
+ else if (IS_HIGHBIT_SET(ch1))
+ ch1 = TOLOWER(ch1);
if (ch2 >= 'A' && ch2 <= 'Z')
ch2 += 'a' - 'A';
- else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
- ch2 = tolower(ch2);
+ else if (IS_HIGHBIT_SET(ch2))
+ ch2 = TOLOWER(ch2);
if (ch1 != ch2)
return (int) ch1 - (int) ch2;
@@ -77,13 +110,13 @@ pg_strncasecmp(const char *s1, const char *s2, size_t n)
{
if (ch1 >= 'A' && ch1 <= 'Z')
ch1 += 'a' - 'A';
- else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
- ch1 = tolower(ch1);
+ else if (IS_HIGHBIT_SET(ch1))
+ ch1 = TOLOWER(ch1);
if (ch2 >= 'A' && ch2 <= 'Z')
ch2 += 'a' - 'A';
- else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
- ch2 = tolower(ch2);
+ else if (IS_HIGHBIT_SET(ch2))
+ ch2 = TOLOWER(ch2);
if (ch1 != ch2)
return (int) ch1 - (int) ch2;
@@ -106,8 +139,8 @@ pg_toupper(unsigned char ch)
{
if (ch >= 'a' && ch <= 'z')
ch += 'A' - 'a';
- else if (IS_HIGHBIT_SET(ch) && islower(ch))
- ch = toupper(ch);
+ else if (IS_HIGHBIT_SET(ch))
+ ch = TOUPPER(ch);
return ch;
}
@@ -123,8 +156,8 @@ pg_tolower(unsigned char ch)
{
if (ch >= 'A' && ch <= 'Z')
ch += 'a' - 'A';
- else if (IS_HIGHBIT_SET(ch) && isupper(ch))
- ch = tolower(ch);
+ else if (IS_HIGHBIT_SET(ch))
+ ch = TOLOWER(ch);
return ch;
}
--
2.43.0