v2-0003-pg_stat_statements-add-DSA-based-query-text-stora.patch

application/octet-stream

Filename: v2-0003-pg_stat_statements-add-DSA-based-query-text-stora.patch
Type: application/octet-stream
Part: 2
Message: Re: Improve pg_stat_statements scalability
From 52e18de566596815daba73eea5b8224dd686769b Mon Sep 17 00:00:00 2001
From: Sami Imseih <samimseih@gmail.com>
Date: Thu, 14 May 2026 12:38:29 -0500
Subject: [PATCH v2 3/4] pg_stat_statements: add DSA-based query text storage

Add optional DSA (dynamic shared area) storage for query texts.
When enabled via pg_stat_statements.query_text_memory (default 64MB),
query texts are allocated from a shared DSA area, avoiding the overhead
of file I/O for text storage.  When the DSA limit is reached, new texts
fall back to the existing external file (PGSS_TEXT_FILE).

Setting query_text_memory to 0 disables DSA storage entirely,
reverting to the file-only behavior.
---
 .../pg_stat_statements/pg_stat_statements.c   | 285 +++++++++++++++---
 doc/src/sgml/pgstatstatements.sgml            |  33 +-
 2 files changed, 278 insertions(+), 40 deletions(-)

diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 0e6e65e3e51..af7637b25b9 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -18,8 +18,9 @@
  * strings in which constants are replaced with parameter symbols ($n), to
  * make it clearer what a normalized entry can represent.
  *
- * Each shared pgstat entry carries its own query text, stored in an
- * external file (PGSS_TEXT_FILE).
+ * Each shared pgstat entry carries its own query text.  When DSA storage is
+ * enabled, text is allocated from a shared DSA area.  If DSA is disabled or
+ * exhausted, texts fall back to an external file (PGSS_TEXT_FILE).
  *
  * Eviction of least-used entries is throttled to run at most once every
  * EVICTION_INTERVAL_MS milliseconds.  When eviction is needed, a single
@@ -60,6 +61,8 @@
 #include "storage/lwlock.h"
 #include "storage/shmem.h"
 #include "storage/spin.h"
+#include "storage/dsm_registry.h"
+#include "utils/dsa.h"
 #include "tcop/utility.h"
 #include "utils/acl.h"
 #include "utils/builtins.h"
@@ -234,6 +237,7 @@ struct pgssSharedState
 	Size		extent;			/* current extent of query file */
 	int			n_writers;		/* number of active writers to query file */
 	int			gc_count;		/* query file garbage collection cycle count */
+	bool		dsa_created;	/* true once query text DSA has been created */
 	TimestampTz last_eviction_time; /* throttle: last time entry_dealloc ran */
 	pgssGlobalStats stats;		/* global statistics for pgss */
 };
@@ -252,7 +256,12 @@ struct PgStatShared_Pgss
 	int			query_len;		/* length of query text, or -1 if invalid */
 	int			encoding;		/* encoding of query text */
 
-	Size		query_offset;	/* offset in external query text file */
+	/*
+	 * Query text is stored in exactly one location: DSA memory if
+	 * query_text_dp is valid, otherwise the overflow file at query_offset.
+	 */
+	dsa_pointer query_text_dp;	/* DSA pointer to text, or InvalidDsaPointer */
+	Size		query_offset;	/* offset in overflow file (when dp invalid) */
 };
 
 /*
@@ -274,6 +283,8 @@ struct PendingDrop
 
 /* Links to shared memory state */
 static pgssSharedState *pgss;
+static dsa_area *pgss_qtext_dsa = NULL; /* backend-local handle to query text
+										 * DSA */
 
 /* Buffer used during serialization to avoid reloading text file per entry */
 static char *pgss_qtext_write_buffer = NULL;
@@ -354,6 +365,7 @@ static bool pgss_track_utility = true;	/* whether to track utility commands */
 static bool pgss_track_planning = false;	/* whether to track planning
 											 * duration */
 static bool pgss_save = true;	/* whether to save stats across shutdown */
+static int	pgss_query_text_memory = 64;	/* MB of DSA for query texts */
 #define pgss_enabled(level) \
 	(!IsParallelWorker() && \
 	(pgss_track == PGSS_TRACK_ALL || \
@@ -416,7 +428,8 @@ static void pg_stat_statements_internal(FunctionCallInfo fcinfo,
 										bool showtext);
 static void entry_dealloc(void);
 static bool qtext_store(const char *query, int query_len,
-						Size *query_offset, int *gc_count);
+						Size *query_offset, dsa_pointer *query_dp,
+						int *gc_count);
 static char *qtext_load_file(Size *buffer_size);
 static char *qtext_fetch(Size query_offset, int query_len,
 						 char *buffer, Size buffer_size);
@@ -426,6 +439,8 @@ static TimestampTz entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_
 static char *generate_normalized_query(const JumbleState *jstate,
 									   const char *query,
 									   int query_loc, int *query_len_p);
+static void pgss_qtext_dsa_attach(void);
+static void pgss_query_text_memory_assign(int newval, void *extra);
 static void pgss_entry_init(PgStatShared_Pgss *shared_entry,
 							const pgssHashKey *key, int encoding);
 static void pgss_store_query_text(PgStatShared_Pgss *shared_entry,
@@ -540,6 +555,19 @@ _PG_init(void)
 							 NULL,
 							 NULL);
 
+	DefineCustomIntVariable("pg_stat_statements.query_text_memory",
+							"Amount of shared memory (MB) for storing query texts.",
+							NULL,
+							&pgss_query_text_memory,
+							64,
+							0,
+							MaxAllocSize / (1024 * 1024),
+							PGC_SIGHUP,
+							GUC_UNIT_MB,
+							NULL,
+							pgss_query_text_memory_assign,
+							NULL);
+
 	MarkGUCPrefixReserved("pg_stat_statements");
 
 	/*
@@ -639,6 +667,44 @@ pgss_shmem_init(void *arg)
 	FreeFile(qfile);
 }
 
+/*
+ * Ensure the backend-local DSA pointer for query texts is set up.
+ * Creates the DSA on first use (under exclusive lock) or attaches to it.
+ */
+static void
+pgss_qtext_dsa_attach(void)
+{
+	bool		found;
+
+	if (pgss_qtext_dsa != NULL)
+		return;
+
+	/* DSA never created and memory disabled, nothing to do */
+	if (!pgss->dsa_created && pgss_query_text_memory == 0)
+		return;
+
+	pgss_qtext_dsa = GetNamedDSA("pg_stat_statements_qtext", &found);
+	if (!found)
+	{
+		dsa_set_size_limit(pgss_qtext_dsa,
+						   (size_t) pgss_query_text_memory * 1024 * 1024);
+		pgss->dsa_created = true;
+	}
+}
+
+/*
+ * GUC assign_hook for pg_stat_statements.query_text_memory.
+ * Update the DSA size limit when the setting changes at runtime.
+ */
+static void
+pgss_query_text_memory_assign(int newval, void *extra)
+{
+	if (pgss_qtext_dsa)
+		dsa_set_size_limit(pgss_qtext_dsa,
+						   (size_t) newval * 1024 * 1024);
+}
+
+
 /*
  * Post-parse-analysis hook: mark query with a queryId
  */
@@ -1086,11 +1152,13 @@ pgss_entry_init(PgStatShared_Pgss *shared_entry,
 	shared_entry->minmax_stats_since = shared_entry->stats_since;
 	shared_entry->query_len = -1;
 	shared_entry->encoding = encoding;
+	shared_entry->query_text_dp = InvalidDsaPointer;
 	shared_entry->query_offset = 0;
 }
 
 /*
- * Store query text into a shared entry via the external text file.
+ * Store query text into a shared entry, trying DSA first and falling back
+ * to the external text file.
  *
  * Caller must hold the entry lock.  Does nothing if text is already present.
  */
@@ -1098,20 +1166,43 @@ static void
 pgss_store_query_text(PgStatShared_Pgss *shared_entry,
 					  const char *query, int query_len, int encoding)
 {
-	Size		query_offset;
-	int			gc_count;
+	dsa_pointer dp;
 
 	if (shared_entry->query_len >= 0)
 		return;
 
-	LWLockAcquire(&pgss->lock.lock, LW_SHARED);
-	if (qtext_store(query, query_len, &query_offset, &gc_count))
+	pgss_qtext_dsa_attach();
+	dp = InvalidDsaPointer;
+	if (pgss_qtext_dsa && pgss_query_text_memory > 0)
+		dp = dsa_allocate_extended(pgss_qtext_dsa, query_len + 1,
+								   DSA_ALLOC_NO_OOM);
+
+	if (DsaPointerIsValid(dp))
 	{
-		shared_entry->query_offset = query_offset;
+		char	   *dst = dsa_get_address(pgss_qtext_dsa, dp);
+
+		memcpy(dst, query, query_len);
+		dst[query_len] = '\0';
+		shared_entry->query_text_dp = dp;
 		shared_entry->query_len = query_len;
 		shared_entry->encoding = encoding;
 	}
-	LWLockRelease(&pgss->lock.lock);
+	else
+	{
+		Size		query_offset;
+		dsa_pointer query_dp;
+		int			gc_count;
+
+		LWLockAcquire(&pgss->lock.lock, LW_SHARED);
+		if (qtext_store(query, query_len, &query_offset,
+						&query_dp, &gc_count))
+		{
+			shared_entry->query_offset = query_offset;
+			shared_entry->query_len = query_len;
+			shared_entry->encoding = encoding;
+		}
+		LWLockRelease(&pgss->lock.lock);
+	}
 }
 
 /*
@@ -1692,10 +1783,23 @@ pg_stat_statements_internal(FunctionCallInfo fcinfo,
 
 				if (shared_entry->query_len >= 0)
 				{
-					qstr = qtext_fetch(shared_entry->query_offset,
-									   shared_entry->query_len,
-									   qbuffer,
-									   qbuffer_size);
+					if (DsaPointerIsValid(shared_entry->query_text_dp))
+					{
+						pgss_qtext_dsa_attach();
+						/*
+						 * XXX: should we add a warning if we have a valid DSA
+						 * pointer but can't attach to the DSA? It should be
+						 * rare, if ever.
+						 */
+						if (pgss_qtext_dsa)
+							qstr = (char *) dsa_get_address(pgss_qtext_dsa,
+															shared_entry->query_text_dp);
+					}
+					else
+						qstr = qtext_fetch(shared_entry->query_offset,
+										   shared_entry->query_len,
+										   qbuffer,
+										   qbuffer_size);
 				}
 
 				if (qstr)
@@ -1924,14 +2028,43 @@ pg_stat_statements_info(PG_FUNCTION_ARGS)
  */
 static bool
 qtext_store(const char *query, int query_len,
-			Size *query_offset, int *gc_count)
+			Size *query_offset, dsa_pointer *query_dp,
+			int *gc_count)
 {
 	Size		off;
 	int			fd;
 
 	*query_offset = 0;
+	*query_dp = InvalidDsaPointer;
 
 	/*
+	 * Try to store the query text in DSA memory first, otherwise write to
+	 * disk.
+	 *
+	 * Caller must have called pgss_qtext_dsa_attach() before acquiring
+	 * pgss->lock to avoid deadlock on first-time DSA creation.
+	 */
+	if (pgss_qtext_dsa && pgss_query_text_memory > 0)
+	{
+		dsa_pointer dp;
+
+		dp = dsa_allocate_extended(pgss_qtext_dsa, query_len + 1,
+								   DSA_ALLOC_NO_OOM);
+		if (DsaPointerIsValid(dp))
+		{
+			char	   *dst = dsa_get_address(pgss_qtext_dsa, dp);
+
+			memcpy(dst, query, query_len);
+			dst[query_len] = '\0';
+			*query_dp = dp;
+			return true;
+		}
+	}
+
+	/*
+	 * DSA allocation failed (memory limit reached).  Fall back to the
+	 * external query text file.
+	 *
 	 * We use a spinlock to protect extent/n_writers/gc_count, so that
 	 * multiple processes may execute this function concurrently.
 	 */
@@ -2221,6 +2354,10 @@ gc_qtexts(void)
 
 		shared_entry = (PgStatShared_Pgss *) dsa_get_address(pgStatLocal.dsa, p->body);
 
+		/* Skip entries whose text lives in DSA, not in the file */
+		if (DsaPointerIsValid(shared_entry->query_text_dp))
+			continue;
+
 		query_len = shared_entry->query_len;
 		if (query_len < 0)
 			continue;
@@ -2292,7 +2429,8 @@ gc_fail:
 
 	/*
 	 * Since the contents of the external file are now uncertain, mark all
-	 * entries as having invalid texts.
+	 * file-based text entries as having invalid texts.  DSA entries are
+	 * unaffected.
 	 */
 	dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
 	while ((p = dshash_seq_next(&hstat)) != NULL)
@@ -2305,6 +2443,8 @@ gc_fail:
 			continue;
 
 		shared_entry = (PgStatShared_Pgss *) dsa_get_address(pgStatLocal.dsa, p->body);
+		if (DsaPointerIsValid(shared_entry->query_text_dp))
+			continue;
 		shared_entry->query_offset = 0;
 		shared_entry->query_len = -1;
 	}
@@ -2338,6 +2478,13 @@ if (shared) { \
 	} \
 	else \
 	{ \
+		if (DsaPointerIsValid((shared)->query_text_dp)) \
+		{ \
+			pgss_qtext_dsa_attach(); \
+			if (pgss_qtext_dsa) \
+				dsa_free(pgss_qtext_dsa, (shared)->query_text_dp); \
+			(shared)->query_text_dp = InvalidDsaPointer; \
+		} \
 		(shared)->query_len = -1; \
 		pgstat_drop_entry(PGSTAT_KIND_PGSS, (key_ptr)->dbid, \
 						  pgss_objid(key_ptr)); \
@@ -2438,6 +2585,13 @@ entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only)
 				}
 				else
 				{
+					if (DsaPointerIsValid(shared_entry->query_text_dp))
+					{
+						pgss_qtext_dsa_attach();
+						if (pgss_qtext_dsa)
+							dsa_free(pgss_qtext_dsa, shared_entry->query_text_dp);
+						shared_entry->query_text_dp = InvalidDsaPointer;
+					}
 					shared_entry->query_len = -1;
 
 					/* Collect for deferred drop */
@@ -2668,13 +2822,23 @@ pgss_to_serialized_data(const PgStat_HashKey *key,
 	{
 		char	   *qstr = NULL;
 
-		if (!pgss_qtext_write_buffer && pgss)
-			pgss_qtext_write_buffer = qtext_load_file(&pgss_qtext_write_buffer_size);
+		if (DsaPointerIsValid(entry->query_text_dp))
+		{
+			pgss_qtext_dsa_attach();
+			if (pgss_qtext_dsa)
+				qstr = (char *) dsa_get_address(pgss_qtext_dsa,
+												entry->query_text_dp);
+		}
+		else
+		{
+			if (!pgss_qtext_write_buffer && pgss)
+				pgss_qtext_write_buffer = qtext_load_file(&pgss_qtext_write_buffer_size);
 
-		if (pgss_qtext_write_buffer)
-			qstr = qtext_fetch(entry->query_offset, query_len,
-							   pgss_qtext_write_buffer,
-							   pgss_qtext_write_buffer_size);
+			if (pgss_qtext_write_buffer)
+				qstr = qtext_fetch(entry->query_offset, query_len,
+								   pgss_qtext_write_buffer,
+								   pgss_qtext_write_buffer_size);
+		}
 
 		if (qstr)
 			pgstat_write_chunk(statfile, qstr, query_len + 1);
@@ -2723,14 +2887,16 @@ pgss_from_serialized_data(const PgStat_HashKey *key,
 	/* Initialize text fields */
 	entry->query_len = -1;
 	entry->encoding = encoding;
+	entry->query_text_dp = InvalidDsaPointer;
 	entry->query_offset = 0;
 
 	/*
-	 * Read the query text and store it in the external file.
+	 * Read the query text and store it directly in the entry.
 	 */
 	if (query_len >= 0)
 	{
 		char	   *buf = palloc(query_len + 1);
+		dsa_pointer dp = InvalidDsaPointer;
 
 		if (!pgstat_read_chunk(statfile, buf, query_len + 1))
 		{
@@ -2738,25 +2904,49 @@ pgss_from_serialized_data(const PgStat_HashKey *key,
 			return false;
 		}
 
-		if (!pgss_qtext_rebuild_file)
+		/* Try DSA allocation first */
+		pgss_qtext_dsa_attach();
+		if (pgss_qtext_dsa && pgss_query_text_memory > 0)
 		{
-			pgss_qtext_rebuild_file = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
-			if (!pgss_qtext_rebuild_file)
+			dp = dsa_allocate_extended(pgss_qtext_dsa, query_len + 1,
+									   DSA_ALLOC_NO_OOM);
+			if (DsaPointerIsValid(dp))
 			{
-				pfree(buf);
-				return false;
+				char	   *dst = dsa_get_address(pgss_qtext_dsa, dp);
+
+				memcpy(dst, buf, query_len + 1);
 			}
-			pgss_qtext_rebuild_extent = 0;
 		}
 
-		entry->query_offset = pgss_qtext_rebuild_extent;
-
-		if (fwrite(buf, 1, query_len + 1, pgss_qtext_rebuild_file) != (size_t) (query_len + 1))
+		if (DsaPointerIsValid(dp))
 		{
-			pfree(buf);
-			return false;
+			entry->query_text_dp = dp;
+			entry->query_offset = 0;
+		}
+		else
+		{
+			/* DSA unavailable or full; fall back to file */
+			if (!pgss_qtext_rebuild_file)
+			{
+				pgss_qtext_rebuild_file = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
+				if (!pgss_qtext_rebuild_file)
+				{
+					pfree(buf);
+					return false;
+				}
+				pgss_qtext_rebuild_extent = 0;
+			}
+
+			entry->query_text_dp = InvalidDsaPointer;
+			entry->query_offset = pgss_qtext_rebuild_extent;
+
+			if (fwrite(buf, 1, query_len + 1, pgss_qtext_rebuild_file) != (size_t) (query_len + 1))
+			{
+				pfree(buf);
+				return false;
+			}
+			pgss_qtext_rebuild_extent += query_len + 1;
 		}
-		pgss_qtext_rebuild_extent += query_len + 1;
 
 		entry->query_len = query_len;
 		entry->encoding = encoding;
@@ -2885,12 +3075,33 @@ entry_dealloc(void)
 	else
 		pgss->mean_query_len = ASSUMED_LENGTH_INIT;
 
-	/* Drop the bottom fraction */
+	/* Drop the bottom fraction, freeing DSA text if applicable */
 	nvictims = Max(10, nentries * USAGE_DEALLOC_PERCENT / 100);
 	nvictims = Min(nvictims, nentries);
 
+	pgss_qtext_dsa_attach();
 	for (i = 0; i < nvictims; i++)
 	{
+		PgStat_EntryRef *victim_ref;
+
+		/* Free DSA text from the entry before dropping it */
+		victim_ref = pgstat_get_entry_ref(PGSTAT_KIND_PGSS,
+										  entries[i].key.dbid,
+										  pgss_objid(&entries[i].key),
+										  false, NULL);
+		if (victim_ref)
+		{
+			PgStatShared_Pgss *victim = (PgStatShared_Pgss *) victim_ref->shared_stats;
+
+			if (DsaPointerIsValid(victim->query_text_dp))
+			{
+				if (pgss_qtext_dsa)
+					dsa_free(pgss_qtext_dsa, victim->query_text_dp);
+				victim->query_text_dp = InvalidDsaPointer;
+			}
+			victim->query_len = -1;
+		}
+
 		pgstat_drop_entry(PGSTAT_KIND_PGSS,
 						  entries[i].key.dbid,
 						  pgss_objid(&entries[i].key));
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 19b1dab74c7..0f4ab34965b 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -924,6 +924,30 @@ calls | 2
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term>
+     <varname>pg_stat_statements.query_text_memory</varname> (<type>integer</type>)
+     <indexterm>
+      <primary><varname>pg_stat_statements.query_text_memory</varname> configuration parameter</primary>
+     </indexterm>
+    </term>
+
+    <listitem>
+     <para>
+      <varname>pg_stat_statements.query_text_memory</varname> specifies the
+      amount of shared memory used for storing query texts in
+      DSA memory.  When this limit is reached, new query texts overflow to an
+      on-disk file.  Setting this to <literal>0</literal> disables DSA storage
+      entirely, causing all query texts to be written to disk.
+      If this value is specified without units, it is taken as megabytes.
+      The default value is <literal>64MB</literal>.
+      This parameter can be changed at any time by reloading the server
+      configuration.  Lowering the value does not immediately free existing
+      DSA allocations; they are released gradually as entries are evicted.
+     </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term>
      <varname>pg_stat_statements.track</varname> (<type>enum</type>)
@@ -1012,14 +1036,16 @@ calls | 2
 
   <para>
    The module requires additional shared memory proportional to
-   <varname>pg_stat_statements.max</varname>.  Note that this memory is
-   consumed whenever the module is loaded, even if
+   <varname>pg_stat_statements.max</varname> and
+   <varname>pg_stat_statements.query_text_memory</varname>.  Note that this
+   memory is consumed whenever the module is loaded, even if
    <varname>pg_stat_statements.track</varname> is set to <literal>none</literal>.
   </para>
 
   <para>
    These parameters are typically set in <filename>postgresql.conf</filename>.
-   Note that <varname>pg_stat_statements.max</varname> can be changed
+   Note that <varname>pg_stat_statements.max</varname> and
+   <varname>pg_stat_statements.query_text_memory</varname> can be changed
    without a server restart by reloading the configuration.
    Typical usage might be:
 
@@ -1030,6 +1056,7 @@ shared_preload_libraries = 'pg_stat_statements'
 compute_query_id = on
 pg_stat_statements.max = 10000
 pg_stat_statements.track = all
+pg_stat_statements.query_text_memory = 64
 </programlisting>
   </para>
  </sect2>
-- 
2.50.1 (Apple Git-155)