From 68b05c20a68098613fbe6657ddb2b07c5ffd3d0b Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Mon, 24 Nov 2025 14:00:52 -0800
Subject: [PATCH v12 8/8] Control LC_COLLATE with GUC.

Now that the global LC_COLLATE setting is not used for any in-core
purpose at all (see commit 5e6e42e44f), allow it to be set with a
GUC. This may be useful for extensions or procedural languages that
still depend on the global LC_COLLATE setting.

TODO: needs discussion

Discussion: https://postgr.es/m/450ceb6260cad30d7afdf155d991a9caafee7c0d.camel@j-davis.com
---
 src/backend/utils/adt/pg_locale.c             | 59 +++++++++++++++++++
 src/backend/utils/init/postinit.c             |  2 +
 src/backend/utils/misc/guc_parameters.dat     |  9 +++
 src/backend/utils/misc/postgresql.conf.sample |  2 +
 src/bin/initdb/initdb.c                       |  3 +
 src/include/utils/guc_hooks.h                 |  2 +
 src/include/utils/pg_locale.h                 |  1 +
 7 files changed, 78 insertions(+)

diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index ee08ac045b7..6dfbe8af47b 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -81,6 +81,7 @@ extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
 extern char *get_collation_actual_version_libc(const char *collcollate);
 
 /* GUC settings */
+char	   *locale_collate;
 char	   *locale_messages;
 char	   *locale_monetary;
 char	   *locale_numeric;
@@ -369,6 +370,64 @@ assign_locale_time(const char *newval, void *extra)
 	CurrentLCTimeValid = false;
 }
 
+/*
+ * We allow LC_COLLATE to actually be set globally.
+ *
+ * Note: we normally disallow value = "" because it wouldn't have consistent
+ * semantics (it'd effectively just use the previous value).  However, this
+ * is the value passed for PGC_S_DEFAULT, so don't complain in that case,
+ * not even if the attempted setting fails due to invalid environment value.
+ * The idea there is just to accept the environment setting *if possible*
+ * during startup, until we can read the proper value from postgresql.conf.
+ */
+bool
+check_locale_collate(char **newval, void **extra, GucSource source)
+{
+	int			locale_enc;
+	int			db_enc;
+
+	if (**newval == '\0')
+	{
+		if (source == PGC_S_DEFAULT)
+			return true;
+		else
+			return false;
+	}
+
+	locale_enc = pg_get_encoding_from_locale(*newval, true);
+	db_enc = GetDatabaseEncoding();
+
+	if (!(locale_enc == db_enc ||
+		  locale_enc == PG_SQL_ASCII ||
+		  db_enc == PG_SQL_ASCII ||
+		  locale_enc == -1))
+	{
+		if (source == PGC_S_FILE)
+		{
+			guc_free(*newval);
+			*newval = guc_strdup(LOG, "C");
+			if (!*newval)
+				return false;
+		}
+		else if (source != PGC_S_TEST)
+		{
+			ereport(WARNING,
+					(errmsg("encoding mismatch"),
+					 errdetail("Locale \"%s\" uses encoding \"%s\", which does not match database encoding \"%s\".",
+							   *newval, pg_encoding_to_char(locale_enc), pg_encoding_to_char(db_enc))));
+			return false;
+		}
+	}
+
+	return check_locale(LC_COLLATE, *newval, NULL);
+}
+
+void
+assign_locale_collate(const char *newval, void *extra)
+{
+	(void) pg_perm_setlocale(LC_COLLATE, newval);
+}
+
 /*
  * We allow LC_MESSAGES to actually be set globally.
  *
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 4ed69ac7ba2..8586832acaa 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -404,6 +404,8 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect
 	 * the pg_database tuple.
 	 */
 	SetDatabaseEncoding(dbform->encoding);
+	/* Reset lc_collate to check encoding, and fall back to C if necessary */
+	SetConfigOption("lc_collate", locale_collate, PGC_POSTMASTER, PGC_S_FILE);
 	/* Record it as a GUC internal option, too */
 	SetConfigOption("server_encoding", GetDatabaseEncodingName(),
 					PGC_INTERNAL, PGC_S_DYNAMIC_DEFAULT);
diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat
index 3b9d8349078..a36c680719f 100644
--- a/src/backend/utils/misc/guc_parameters.dat
+++ b/src/backend/utils/misc/guc_parameters.dat
@@ -1457,6 +1457,15 @@
   boot_val => 'PG_KRB_SRVTAB',
 },
 
+{ name => 'lc_collate', type => 'string', context => 'PGC_SUSET', group => 'CLIENT_CONN_LOCALE',
+  short_desc => 'Sets the locale for text ordering in extensions.',
+  long_desc => 'An empty string means use the operating system setting.',
+  variable => 'locale_collate',
+  boot_val => '""',
+  check_hook => 'check_locale_collate',
+  assign_hook => 'assign_locale_collate',
+},
+
 { name => 'lc_messages', type => 'string', context => 'PGC_SUSET', group => 'CLIENT_CONN_LOCALE',
   short_desc => 'Sets the language in which messages are displayed.',
   long_desc => 'An empty string means use the operating system setting.',
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index dc9e2255f8a..19332e39e82 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -798,6 +798,8 @@
                                         # encoding
 
 # These settings are initialized by initdb, but they can be changed.
+#lc_collate = ''                        # locale for text ordering (only affects
+                                        # extensions)
 #lc_messages = ''                       # locale for system error message
                                         # strings
 #lc_monetary = 'C'                      # locale for monetary formatting
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 92fe2f531f7..8b2e7bfab6f 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -1312,6 +1312,9 @@ setup_config(void)
 	conflines = replace_guc_value(conflines, "shared_buffers",
 								  repltok, false);
 
+	conflines = replace_guc_value(conflines, "lc_collate",
+								  lc_collate, false);
+
 	conflines = replace_guc_value(conflines, "lc_messages",
 								  lc_messages, false);
 
diff --git a/src/include/utils/guc_hooks.h b/src/include/utils/guc_hooks.h
index 82ac8646a8d..8a20f76eec8 100644
--- a/src/include/utils/guc_hooks.h
+++ b/src/include/utils/guc_hooks.h
@@ -65,6 +65,8 @@ extern bool check_huge_page_size(int *newval, void **extra, GucSource source);
 extern void assign_io_method(int newval, void *extra);
 extern bool check_io_max_concurrency(int *newval, void **extra, GucSource source);
 extern const char *show_in_hot_standby(void);
+extern bool check_locale_collate(char **newval, void **extra, GucSource source);
+extern void assign_locale_collate(const char *newval, void *extra);
 extern bool check_locale_messages(char **newval, void **extra, GucSource source);
 extern void assign_locale_messages(const char *newval, void *extra);
 extern bool check_locale_monetary(char **newval, void **extra, GucSource source);
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 8ad8900cf93..e29497dc7d2 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -41,6 +41,7 @@
 #define UNICODE_CASEMAP_BUFSZ	(UNICODE_CASEMAP_LEN * sizeof(char32_t))
 
 /* GUC settings */
+extern PGDLLIMPORT char *locale_collate;
 extern PGDLLIMPORT char *locale_messages;
 extern PGDLLIMPORT char *locale_monetary;
 extern PGDLLIMPORT char *locale_numeric;
-- 
2.43.0

