v2-0003-pg_stat_statements-add-DSA-based-query-text-stora.patch
application/octet-stream
Filename: v2-0003-pg_stat_statements-add-DSA-based-query-text-stora.patch
Type: application/octet-stream
Part: 2
From 52e18de566596815daba73eea5b8224dd686769b Mon Sep 17 00:00:00 2001
From: Sami Imseih <samimseih@gmail.com>
Date: Thu, 14 May 2026 12:38:29 -0500
Subject: [PATCH v2 3/4] pg_stat_statements: add DSA-based query text storage
Add optional DSA (dynamic shared area) storage for query texts.
When enabled via pg_stat_statements.query_text_memory (default 64MB),
query texts are allocated from a shared DSA area, avoiding the overhead
of file I/O for text storage. When the DSA limit is reached, new texts
fall back to the existing external file (PGSS_TEXT_FILE).
Setting query_text_memory to 0 disables DSA storage entirely,
reverting to the file-only behavior.
---
.../pg_stat_statements/pg_stat_statements.c | 285 +++++++++++++++---
doc/src/sgml/pgstatstatements.sgml | 33 +-
2 files changed, 278 insertions(+), 40 deletions(-)
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 0e6e65e3e51..af7637b25b9 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -18,8 +18,9 @@
* strings in which constants are replaced with parameter symbols ($n), to
* make it clearer what a normalized entry can represent.
*
- * Each shared pgstat entry carries its own query text, stored in an
- * external file (PGSS_TEXT_FILE).
+ * Each shared pgstat entry carries its own query text. When DSA storage is
+ * enabled, text is allocated from a shared DSA area. If DSA is disabled or
+ * exhausted, texts fall back to an external file (PGSS_TEXT_FILE).
*
* Eviction of least-used entries is throttled to run at most once every
* EVICTION_INTERVAL_MS milliseconds. When eviction is needed, a single
@@ -60,6 +61,8 @@
#include "storage/lwlock.h"
#include "storage/shmem.h"
#include "storage/spin.h"
+#include "storage/dsm_registry.h"
+#include "utils/dsa.h"
#include "tcop/utility.h"
#include "utils/acl.h"
#include "utils/builtins.h"
@@ -234,6 +237,7 @@ struct pgssSharedState
Size extent; /* current extent of query file */
int n_writers; /* number of active writers to query file */
int gc_count; /* query file garbage collection cycle count */
+ bool dsa_created; /* true once query text DSA has been created */
TimestampTz last_eviction_time; /* throttle: last time entry_dealloc ran */
pgssGlobalStats stats; /* global statistics for pgss */
};
@@ -252,7 +256,12 @@ struct PgStatShared_Pgss
int query_len; /* length of query text, or -1 if invalid */
int encoding; /* encoding of query text */
- Size query_offset; /* offset in external query text file */
+ /*
+ * Query text is stored in exactly one location: DSA memory if
+ * query_text_dp is valid, otherwise the overflow file at query_offset.
+ */
+ dsa_pointer query_text_dp; /* DSA pointer to text, or InvalidDsaPointer */
+ Size query_offset; /* offset in overflow file (when dp invalid) */
};
/*
@@ -274,6 +283,8 @@ struct PendingDrop
/* Links to shared memory state */
static pgssSharedState *pgss;
+static dsa_area *pgss_qtext_dsa = NULL; /* backend-local handle to query text
+ * DSA */
/* Buffer used during serialization to avoid reloading text file per entry */
static char *pgss_qtext_write_buffer = NULL;
@@ -354,6 +365,7 @@ static bool pgss_track_utility = true; /* whether to track utility commands */
static bool pgss_track_planning = false; /* whether to track planning
* duration */
static bool pgss_save = true; /* whether to save stats across shutdown */
+static int pgss_query_text_memory = 64; /* MB of DSA for query texts */
#define pgss_enabled(level) \
(!IsParallelWorker() && \
(pgss_track == PGSS_TRACK_ALL || \
@@ -416,7 +428,8 @@ static void pg_stat_statements_internal(FunctionCallInfo fcinfo,
bool showtext);
static void entry_dealloc(void);
static bool qtext_store(const char *query, int query_len,
- Size *query_offset, int *gc_count);
+ Size *query_offset, dsa_pointer *query_dp,
+ int *gc_count);
static char *qtext_load_file(Size *buffer_size);
static char *qtext_fetch(Size query_offset, int query_len,
char *buffer, Size buffer_size);
@@ -426,6 +439,8 @@ static TimestampTz entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_
static char *generate_normalized_query(const JumbleState *jstate,
const char *query,
int query_loc, int *query_len_p);
+static void pgss_qtext_dsa_attach(void);
+static void pgss_query_text_memory_assign(int newval, void *extra);
static void pgss_entry_init(PgStatShared_Pgss *shared_entry,
const pgssHashKey *key, int encoding);
static void pgss_store_query_text(PgStatShared_Pgss *shared_entry,
@@ -540,6 +555,19 @@ _PG_init(void)
NULL,
NULL);
+ DefineCustomIntVariable("pg_stat_statements.query_text_memory",
+ "Amount of shared memory (MB) for storing query texts.",
+ NULL,
+ &pgss_query_text_memory,
+ 64,
+ 0,
+ MaxAllocSize / (1024 * 1024),
+ PGC_SIGHUP,
+ GUC_UNIT_MB,
+ NULL,
+ pgss_query_text_memory_assign,
+ NULL);
+
MarkGUCPrefixReserved("pg_stat_statements");
/*
@@ -639,6 +667,44 @@ pgss_shmem_init(void *arg)
FreeFile(qfile);
}
+/*
+ * Ensure the backend-local DSA pointer for query texts is set up.
+ * Creates the DSA on first use (under exclusive lock) or attaches to it.
+ */
+static void
+pgss_qtext_dsa_attach(void)
+{
+ bool found;
+
+ if (pgss_qtext_dsa != NULL)
+ return;
+
+ /* DSA never created and memory disabled, nothing to do */
+ if (!pgss->dsa_created && pgss_query_text_memory == 0)
+ return;
+
+ pgss_qtext_dsa = GetNamedDSA("pg_stat_statements_qtext", &found);
+ if (!found)
+ {
+ dsa_set_size_limit(pgss_qtext_dsa,
+ (size_t) pgss_query_text_memory * 1024 * 1024);
+ pgss->dsa_created = true;
+ }
+}
+
+/*
+ * GUC assign_hook for pg_stat_statements.query_text_memory.
+ * Update the DSA size limit when the setting changes at runtime.
+ */
+static void
+pgss_query_text_memory_assign(int newval, void *extra)
+{
+ if (pgss_qtext_dsa)
+ dsa_set_size_limit(pgss_qtext_dsa,
+ (size_t) newval * 1024 * 1024);
+}
+
+
/*
* Post-parse-analysis hook: mark query with a queryId
*/
@@ -1086,11 +1152,13 @@ pgss_entry_init(PgStatShared_Pgss *shared_entry,
shared_entry->minmax_stats_since = shared_entry->stats_since;
shared_entry->query_len = -1;
shared_entry->encoding = encoding;
+ shared_entry->query_text_dp = InvalidDsaPointer;
shared_entry->query_offset = 0;
}
/*
- * Store query text into a shared entry via the external text file.
+ * Store query text into a shared entry, trying DSA first and falling back
+ * to the external text file.
*
* Caller must hold the entry lock. Does nothing if text is already present.
*/
@@ -1098,20 +1166,43 @@ static void
pgss_store_query_text(PgStatShared_Pgss *shared_entry,
const char *query, int query_len, int encoding)
{
- Size query_offset;
- int gc_count;
+ dsa_pointer dp;
if (shared_entry->query_len >= 0)
return;
- LWLockAcquire(&pgss->lock.lock, LW_SHARED);
- if (qtext_store(query, query_len, &query_offset, &gc_count))
+ pgss_qtext_dsa_attach();
+ dp = InvalidDsaPointer;
+ if (pgss_qtext_dsa && pgss_query_text_memory > 0)
+ dp = dsa_allocate_extended(pgss_qtext_dsa, query_len + 1,
+ DSA_ALLOC_NO_OOM);
+
+ if (DsaPointerIsValid(dp))
{
- shared_entry->query_offset = query_offset;
+ char *dst = dsa_get_address(pgss_qtext_dsa, dp);
+
+ memcpy(dst, query, query_len);
+ dst[query_len] = '\0';
+ shared_entry->query_text_dp = dp;
shared_entry->query_len = query_len;
shared_entry->encoding = encoding;
}
- LWLockRelease(&pgss->lock.lock);
+ else
+ {
+ Size query_offset;
+ dsa_pointer query_dp;
+ int gc_count;
+
+ LWLockAcquire(&pgss->lock.lock, LW_SHARED);
+ if (qtext_store(query, query_len, &query_offset,
+ &query_dp, &gc_count))
+ {
+ shared_entry->query_offset = query_offset;
+ shared_entry->query_len = query_len;
+ shared_entry->encoding = encoding;
+ }
+ LWLockRelease(&pgss->lock.lock);
+ }
}
/*
@@ -1692,10 +1783,23 @@ pg_stat_statements_internal(FunctionCallInfo fcinfo,
if (shared_entry->query_len >= 0)
{
- qstr = qtext_fetch(shared_entry->query_offset,
- shared_entry->query_len,
- qbuffer,
- qbuffer_size);
+ if (DsaPointerIsValid(shared_entry->query_text_dp))
+ {
+ pgss_qtext_dsa_attach();
+ /*
+ * XXX: should we add a warning if we have a valid DSA
+ * pointer but can't attach to the DSA? It should be
+ * rare, if ever.
+ */
+ if (pgss_qtext_dsa)
+ qstr = (char *) dsa_get_address(pgss_qtext_dsa,
+ shared_entry->query_text_dp);
+ }
+ else
+ qstr = qtext_fetch(shared_entry->query_offset,
+ shared_entry->query_len,
+ qbuffer,
+ qbuffer_size);
}
if (qstr)
@@ -1924,14 +2028,43 @@ pg_stat_statements_info(PG_FUNCTION_ARGS)
*/
static bool
qtext_store(const char *query, int query_len,
- Size *query_offset, int *gc_count)
+ Size *query_offset, dsa_pointer *query_dp,
+ int *gc_count)
{
Size off;
int fd;
*query_offset = 0;
+ *query_dp = InvalidDsaPointer;
/*
+ * Try to store the query text in DSA memory first, otherwise write to
+ * disk.
+ *
+ * Caller must have called pgss_qtext_dsa_attach() before acquiring
+ * pgss->lock to avoid deadlock on first-time DSA creation.
+ */
+ if (pgss_qtext_dsa && pgss_query_text_memory > 0)
+ {
+ dsa_pointer dp;
+
+ dp = dsa_allocate_extended(pgss_qtext_dsa, query_len + 1,
+ DSA_ALLOC_NO_OOM);
+ if (DsaPointerIsValid(dp))
+ {
+ char *dst = dsa_get_address(pgss_qtext_dsa, dp);
+
+ memcpy(dst, query, query_len);
+ dst[query_len] = '\0';
+ *query_dp = dp;
+ return true;
+ }
+ }
+
+ /*
+ * DSA allocation failed (memory limit reached). Fall back to the
+ * external query text file.
+ *
* We use a spinlock to protect extent/n_writers/gc_count, so that
* multiple processes may execute this function concurrently.
*/
@@ -2221,6 +2354,10 @@ gc_qtexts(void)
shared_entry = (PgStatShared_Pgss *) dsa_get_address(pgStatLocal.dsa, p->body);
+ /* Skip entries whose text lives in DSA, not in the file */
+ if (DsaPointerIsValid(shared_entry->query_text_dp))
+ continue;
+
query_len = shared_entry->query_len;
if (query_len < 0)
continue;
@@ -2292,7 +2429,8 @@ gc_fail:
/*
* Since the contents of the external file are now uncertain, mark all
- * entries as having invalid texts.
+ * file-based text entries as having invalid texts. DSA entries are
+ * unaffected.
*/
dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
while ((p = dshash_seq_next(&hstat)) != NULL)
@@ -2305,6 +2443,8 @@ gc_fail:
continue;
shared_entry = (PgStatShared_Pgss *) dsa_get_address(pgStatLocal.dsa, p->body);
+ if (DsaPointerIsValid(shared_entry->query_text_dp))
+ continue;
shared_entry->query_offset = 0;
shared_entry->query_len = -1;
}
@@ -2338,6 +2478,13 @@ if (shared) { \
} \
else \
{ \
+ if (DsaPointerIsValid((shared)->query_text_dp)) \
+ { \
+ pgss_qtext_dsa_attach(); \
+ if (pgss_qtext_dsa) \
+ dsa_free(pgss_qtext_dsa, (shared)->query_text_dp); \
+ (shared)->query_text_dp = InvalidDsaPointer; \
+ } \
(shared)->query_len = -1; \
pgstat_drop_entry(PGSTAT_KIND_PGSS, (key_ptr)->dbid, \
pgss_objid(key_ptr)); \
@@ -2438,6 +2585,13 @@ entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only)
}
else
{
+ if (DsaPointerIsValid(shared_entry->query_text_dp))
+ {
+ pgss_qtext_dsa_attach();
+ if (pgss_qtext_dsa)
+ dsa_free(pgss_qtext_dsa, shared_entry->query_text_dp);
+ shared_entry->query_text_dp = InvalidDsaPointer;
+ }
shared_entry->query_len = -1;
/* Collect for deferred drop */
@@ -2668,13 +2822,23 @@ pgss_to_serialized_data(const PgStat_HashKey *key,
{
char *qstr = NULL;
- if (!pgss_qtext_write_buffer && pgss)
- pgss_qtext_write_buffer = qtext_load_file(&pgss_qtext_write_buffer_size);
+ if (DsaPointerIsValid(entry->query_text_dp))
+ {
+ pgss_qtext_dsa_attach();
+ if (pgss_qtext_dsa)
+ qstr = (char *) dsa_get_address(pgss_qtext_dsa,
+ entry->query_text_dp);
+ }
+ else
+ {
+ if (!pgss_qtext_write_buffer && pgss)
+ pgss_qtext_write_buffer = qtext_load_file(&pgss_qtext_write_buffer_size);
- if (pgss_qtext_write_buffer)
- qstr = qtext_fetch(entry->query_offset, query_len,
- pgss_qtext_write_buffer,
- pgss_qtext_write_buffer_size);
+ if (pgss_qtext_write_buffer)
+ qstr = qtext_fetch(entry->query_offset, query_len,
+ pgss_qtext_write_buffer,
+ pgss_qtext_write_buffer_size);
+ }
if (qstr)
pgstat_write_chunk(statfile, qstr, query_len + 1);
@@ -2723,14 +2887,16 @@ pgss_from_serialized_data(const PgStat_HashKey *key,
/* Initialize text fields */
entry->query_len = -1;
entry->encoding = encoding;
+ entry->query_text_dp = InvalidDsaPointer;
entry->query_offset = 0;
/*
- * Read the query text and store it in the external file.
+ * Read the query text and store it directly in the entry.
*/
if (query_len >= 0)
{
char *buf = palloc(query_len + 1);
+ dsa_pointer dp = InvalidDsaPointer;
if (!pgstat_read_chunk(statfile, buf, query_len + 1))
{
@@ -2738,25 +2904,49 @@ pgss_from_serialized_data(const PgStat_HashKey *key,
return false;
}
- if (!pgss_qtext_rebuild_file)
+ /* Try DSA allocation first */
+ pgss_qtext_dsa_attach();
+ if (pgss_qtext_dsa && pgss_query_text_memory > 0)
{
- pgss_qtext_rebuild_file = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
- if (!pgss_qtext_rebuild_file)
+ dp = dsa_allocate_extended(pgss_qtext_dsa, query_len + 1,
+ DSA_ALLOC_NO_OOM);
+ if (DsaPointerIsValid(dp))
{
- pfree(buf);
- return false;
+ char *dst = dsa_get_address(pgss_qtext_dsa, dp);
+
+ memcpy(dst, buf, query_len + 1);
}
- pgss_qtext_rebuild_extent = 0;
}
- entry->query_offset = pgss_qtext_rebuild_extent;
-
- if (fwrite(buf, 1, query_len + 1, pgss_qtext_rebuild_file) != (size_t) (query_len + 1))
+ if (DsaPointerIsValid(dp))
{
- pfree(buf);
- return false;
+ entry->query_text_dp = dp;
+ entry->query_offset = 0;
+ }
+ else
+ {
+ /* DSA unavailable or full; fall back to file */
+ if (!pgss_qtext_rebuild_file)
+ {
+ pgss_qtext_rebuild_file = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
+ if (!pgss_qtext_rebuild_file)
+ {
+ pfree(buf);
+ return false;
+ }
+ pgss_qtext_rebuild_extent = 0;
+ }
+
+ entry->query_text_dp = InvalidDsaPointer;
+ entry->query_offset = pgss_qtext_rebuild_extent;
+
+ if (fwrite(buf, 1, query_len + 1, pgss_qtext_rebuild_file) != (size_t) (query_len + 1))
+ {
+ pfree(buf);
+ return false;
+ }
+ pgss_qtext_rebuild_extent += query_len + 1;
}
- pgss_qtext_rebuild_extent += query_len + 1;
entry->query_len = query_len;
entry->encoding = encoding;
@@ -2885,12 +3075,33 @@ entry_dealloc(void)
else
pgss->mean_query_len = ASSUMED_LENGTH_INIT;
- /* Drop the bottom fraction */
+ /* Drop the bottom fraction, freeing DSA text if applicable */
nvictims = Max(10, nentries * USAGE_DEALLOC_PERCENT / 100);
nvictims = Min(nvictims, nentries);
+ pgss_qtext_dsa_attach();
for (i = 0; i < nvictims; i++)
{
+ PgStat_EntryRef *victim_ref;
+
+ /* Free DSA text from the entry before dropping it */
+ victim_ref = pgstat_get_entry_ref(PGSTAT_KIND_PGSS,
+ entries[i].key.dbid,
+ pgss_objid(&entries[i].key),
+ false, NULL);
+ if (victim_ref)
+ {
+ PgStatShared_Pgss *victim = (PgStatShared_Pgss *) victim_ref->shared_stats;
+
+ if (DsaPointerIsValid(victim->query_text_dp))
+ {
+ if (pgss_qtext_dsa)
+ dsa_free(pgss_qtext_dsa, victim->query_text_dp);
+ victim->query_text_dp = InvalidDsaPointer;
+ }
+ victim->query_len = -1;
+ }
+
pgstat_drop_entry(PGSTAT_KIND_PGSS,
entries[i].key.dbid,
pgss_objid(&entries[i].key));
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 19b1dab74c7..0f4ab34965b 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -924,6 +924,30 @@ calls | 2
</listitem>
</varlistentry>
+ <varlistentry>
+ <term>
+ <varname>pg_stat_statements.query_text_memory</varname> (<type>integer</type>)
+ <indexterm>
+ <primary><varname>pg_stat_statements.query_text_memory</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+
+ <listitem>
+ <para>
+ <varname>pg_stat_statements.query_text_memory</varname> specifies the
+ amount of shared memory used for storing query texts in
+ DSA memory. When this limit is reached, new query texts overflow to an
+ on-disk file. Setting this to <literal>0</literal> disables DSA storage
+ entirely, causing all query texts to be written to disk.
+ If this value is specified without units, it is taken as megabytes.
+ The default value is <literal>64MB</literal>.
+ This parameter can be changed at any time by reloading the server
+ configuration. Lowering the value does not immediately free existing
+ DSA allocations; they are released gradually as entries are evicted.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry>
<term>
<varname>pg_stat_statements.track</varname> (<type>enum</type>)
@@ -1012,14 +1036,16 @@ calls | 2
<para>
The module requires additional shared memory proportional to
- <varname>pg_stat_statements.max</varname>. Note that this memory is
- consumed whenever the module is loaded, even if
+ <varname>pg_stat_statements.max</varname> and
+ <varname>pg_stat_statements.query_text_memory</varname>. Note that this
+ memory is consumed whenever the module is loaded, even if
<varname>pg_stat_statements.track</varname> is set to <literal>none</literal>.
</para>
<para>
These parameters are typically set in <filename>postgresql.conf</filename>.
- Note that <varname>pg_stat_statements.max</varname> can be changed
+ Note that <varname>pg_stat_statements.max</varname> and
+ <varname>pg_stat_statements.query_text_memory</varname> can be changed
without a server restart by reloading the configuration.
Typical usage might be:
@@ -1030,6 +1056,7 @@ shared_preload_libraries = 'pg_stat_statements'
compute_query_id = on
pg_stat_statements.max = 10000
pg_stat_statements.track = all
+pg_stat_statements.query_text_memory = 64
</programlisting>
</para>
</sect2>
--
2.50.1 (Apple Git-155)