v7-0009-Avoid-global-LC_CTYPE-dependency-in-strcasecmp.c-.patch

text/x-patch

Filename: v7-0009-Avoid-global-LC_CTYPE-dependency-in-strcasecmp.c-.patch
Type: text/x-patch
Part: 8
Message: Re: Remaining dependency on setlocale()
From 9ae6c6f9a0994fb694041d587acb81df45156984 Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Mon, 27 Oct 2025 16:08:54 -0700
Subject: [PATCH v7 9/9] Avoid global LC_CTYPE dependency in strcasecmp.c for
 server.

For the server (but not the frontend), change to use
char_tolower()/char_toupper() instead of tolower()/toupper().
---
 src/port/pgstrcasecmp.c | 73 ++++++++++++++++++++++++++++++-----------
 1 file changed, 53 insertions(+), 20 deletions(-)

diff --git a/src/port/pgstrcasecmp.c b/src/port/pgstrcasecmp.c
index ec2b3a75c3d..2184f132f3a 100644
--- a/src/port/pgstrcasecmp.c
+++ b/src/port/pgstrcasecmp.c
@@ -3,15 +3,31 @@
  * pgstrcasecmp.c
  *	   Portable SQL-like case-independent comparisons and conversions.
  *
- * SQL99 specifies Unicode-aware case normalization, which we don't yet
- * have the infrastructure for.  Instead we use tolower() to provide a
- * locale-aware translation.  However, there are some locales where this
- * is not right either (eg, Turkish may do strange things with 'i' and
- * 'I').  Our current compromise is to use tolower() for characters with
- * the high bit set, and use an ASCII-only downcasing for 7-bit
- * characters.
+ * These functions are for case-insensitive identifier matching and related
+ * functionality, and may be called either from the client or from the
+ * server. These functions are not intended for text data stored in the
+ * database; see pg_locale.h.
  *
- * NB: this code should match downcase_truncate_identifier() in scansup.c.
+ * In the server, the casing behavior is determined by the database default
+ * collation, which may be different depending on the provider and locale.
+ * In the client, casing behavior is determined by libc's tolower() and
+ * toupper(), which depends on the locale settings on the client (and
+ * therefore may not match the server's semantics).  In any case, the ASCII
+ * range is guaranteed to use plain ASCII casing semantics.
+ *
+ * SQL99 specifies Unicode-aware case normalization, but for historical
+ * compatibility reasons, we don't do so.  Instead we do char-at-a-time
+ * lowercasing to provide a locale-aware translation for single-byte
+ * encodings.  However, there are some locales where this is not right either
+ * (eg, Turkish may do strange things with 'i' and 'I').  Our current
+ * compromise is to use tolower()/char_tolower() for characters with the high
+ * bit set, and use an ASCII-only downcasing for 7-bit characters.
+ *
+ * NB: these functions are not multibyte-aware. For UTF8, the behavior
+ * degenerates to plain ASCII casing semantics.
+ *
+ * NB: this code should match downcase_truncate_identifier() in scansup.c,
+ * except that we don't check for multibyte encodings.
  *
  * We also provide strict ASCII-only case conversion functions, which can
  * be used to implement C/POSIX case folding semantics no matter what the
@@ -28,6 +44,23 @@
 
 #include <ctype.h>
 
+/*
+ * In the server, use char_tolower()/char_toupper() with the database default
+ * locale; in the client, use tolower()/toupper().
+ */
+#ifndef FRONTEND
+
+#include "utils/pg_locale.h"
+/* char_tolower()/char_toupper() don't need isupper()/islower() test */
+#define TOLOWER(x) char_tolower(x, NULL)
+#define TOUPPER(x) char_toupper(x, NULL)
+
+#else
+
+#define TOLOWER(x) (isupper(x) ? tolower(x) : x)
+#define TOUPPER(x) (islower(x) ? toupper(x) : x)
+
+#endif
 
 /*
  * Case-independent comparison of two null-terminated strings.
@@ -44,13 +77,13 @@ pg_strcasecmp(const char *s1, const char *s2)
 		{
 			if (ch1 >= 'A' && ch1 <= 'Z')
 				ch1 += 'a' - 'A';
-			else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
-				ch1 = tolower(ch1);
+			else if (IS_HIGHBIT_SET(ch1))
+				ch1 = TOLOWER(ch1);
 
 			if (ch2 >= 'A' && ch2 <= 'Z')
 				ch2 += 'a' - 'A';
-			else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
-				ch2 = tolower(ch2);
+			else if (IS_HIGHBIT_SET(ch2))
+				ch2 = TOLOWER(ch2);
 
 			if (ch1 != ch2)
 				return (int) ch1 - (int) ch2;
@@ -77,13 +110,13 @@ pg_strncasecmp(const char *s1, const char *s2, size_t n)
 		{
 			if (ch1 >= 'A' && ch1 <= 'Z')
 				ch1 += 'a' - 'A';
-			else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
-				ch1 = tolower(ch1);
+			else if (IS_HIGHBIT_SET(ch1))
+				ch1 = TOLOWER(ch1);
 
 			if (ch2 >= 'A' && ch2 <= 'Z')
 				ch2 += 'a' - 'A';
-			else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
-				ch2 = tolower(ch2);
+			else if (IS_HIGHBIT_SET(ch2))
+				ch2 = TOLOWER(ch2);
 
 			if (ch1 != ch2)
 				return (int) ch1 - (int) ch2;
@@ -106,8 +139,8 @@ pg_toupper(unsigned char ch)
 {
 	if (ch >= 'a' && ch <= 'z')
 		ch += 'A' - 'a';
-	else if (IS_HIGHBIT_SET(ch) && islower(ch))
-		ch = toupper(ch);
+	else if (IS_HIGHBIT_SET(ch))
+		ch = TOUPPER(ch);
 	return ch;
 }
 
@@ -123,8 +156,8 @@ pg_tolower(unsigned char ch)
 {
 	if (ch >= 'A' && ch <= 'Z')
 		ch += 'a' - 'A';
-	else if (IS_HIGHBIT_SET(ch) && isupper(ch))
-		ch = tolower(ch);
+	else if (IS_HIGHBIT_SET(ch))
+		ch = TOLOWER(ch);
 	return ch;
 }
 
-- 
2.43.0