v02-0001-Fix-location-and-length-tracking-of-select-state.patch
application/octet-stream
Filename: v02-0001-Fix-location-and-length-tracking-of-select-state.patch
Type: application/octet-stream
Part: 0
From 40c3ea62fc7ffc31439f42d524b2768dd0120cc0 Mon Sep 17 00:00:00 2001
From: Anthonin Bonnefoy <anthonin.bonnefoy@datadoghq.com>
Date: Tue, 10 Jun 2025 11:04:58 +0200
Subject: Fix location and length tracking of select statement
Previously, we tracked the start of a select statement using SELECT's
position. However, this doesn't work for statements like '(SELECT 1)
limit 1;' as the 'limit 1' needs to be part of the statement and thus,
the parenthesis needs to be included.
This patch instead tracks the start of the select_stmt to the outermost
'(' if present. The statement's length is only filled where we can
reliably get the information:
- COPY: We use the position of the '()' surrounding the PreparableStmt
- CTAS: Use the location of the opt_with_data to update the length.
Outside of those cases, the statement length is unset by default and it
will fallback to the RawStmt's length to compute the remaining length.
---
.../expected/level_tracking.out | 84 +++++++++++--------
.../pg_stat_statements/expected/select.out | 10 ++-
.../pg_stat_statements/sql/level_tracking.sql | 12 ++-
contrib/pg_stat_statements/sql/select.sql | 3 +
src/backend/parser/gram.y | 54 ++++++++----
5 files changed, 109 insertions(+), 54 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/level_tracking.out b/contrib/pg_stat_statements/expected/level_tracking.out
index 75e785e1719..a078822a89b 100644
--- a/contrib/pg_stat_statements/expected/level_tracking.out
+++ b/contrib/pg_stat_statements/expected/level_tracking.out
@@ -208,6 +208,9 @@ SELECT toplevel, calls, query FROM pg_stat_statements
ORDER BY query COLLATE "C";
toplevel | calls | query
----------+-------+--------------------------------------------------------------------
+ f | 1 | (SELECT $1, $2)
+ f | 1 | (TABLE test_table)
+ f | 1 | (VALUES ($1, $2))
f | 1 | DELETE FROM stats_track_tab
t | 1 | EXPLAIN (COSTS OFF) (SELECT $1, $2)
t | 1 | EXPLAIN (COSTS OFF) (TABLE test_table)
@@ -230,13 +233,10 @@ SELECT toplevel, calls, query FROM pg_stat_statements
| | WHEN NOT MATCHED THEN INSERT (x) VALUES (id)
f | 1 | SELECT $1
f | 1 | SELECT $1 UNION SELECT $2
- f | 1 | SELECT $1, $2
t | 1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
f | 1 | TABLE stats_track_tab
- f | 1 | TABLE test_table
f | 1 | UPDATE stats_track_tab SET x = $1 WHERE x = $2
f | 1 | VALUES ($1)
- f | 1 | VALUES ($1, $2)
(23 rows)
-- EXPLAIN - top-level tracking.
@@ -407,7 +407,9 @@ SELECT toplevel, calls, query FROM pg_stat_statements
ORDER BY query COLLATE "C";
toplevel | calls | query
----------+-------+-----------------------------------------------------------------
+ f | 1 | (SELECT $1, $2, $3)
f | 1 | (SELECT $1, $2, $3) UNION SELECT $4, $5, $6
+ f | 1 | (SELECT $1, $2, $3, $4)
t | 1 | EXPLAIN (COSTS OFF) (SELECT $1, $2, $3)
t | 1 | EXPLAIN (COSTS OFF) (SELECT $1, $2, $3) UNION SELECT $4, $5, $6
t | 1 | EXPLAIN (COSTS OFF) (SELECT $1, $2, $3, $4)
@@ -417,8 +419,6 @@ SELECT toplevel, calls, query FROM pg_stat_statements
f | 1 | SELECT $1
f | 1 | SELECT $1, $2
f | 1 | SELECT $1, $2 UNION SELECT $3, $4
- f | 1 | SELECT $1, $2, $3
- f | 1 | SELECT $1, $2, $3, $4
t | 1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
(13 rows)
@@ -496,6 +496,8 @@ SELECT toplevel, calls, query FROM pg_stat_statements
ORDER BY query COLLATE "C";
toplevel | calls | query
----------+-------+--------------------------------------------------------------------
+ f | 1 | (TABLE test_table)
+ f | 1 | (VALUES ($1, $2))
f | 1 | DELETE FROM stats_track_tab
f | 1 | DELETE FROM stats_track_tab WHERE x = $1
t | 1 | EXPLAIN (COSTS OFF) (TABLE test_table)
@@ -512,11 +514,9 @@ SELECT toplevel, calls, query FROM pg_stat_statements
f | 1 | INSERT INTO stats_track_tab VALUES (($1))
t | 1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
f | 1 | TABLE stats_track_tab
- f | 1 | TABLE test_table
f | 1 | UPDATE stats_track_tab SET x = $1
f | 1 | UPDATE stats_track_tab SET x = $1 WHERE x = $2
f | 1 | VALUES ($1)
- f | 1 | VALUES ($1, $2)
(21 rows)
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
@@ -788,6 +788,7 @@ SELECT toplevel, calls, query FROM pg_stat_statements
ORDER BY query COLLATE "C";
toplevel | calls | query
----------+-------+------------------------------------------------------------------------------------------
+ f | 1 | (WITH a AS (SELECT $1) (SELECT $2, $3))
t | 1 | EXPLAIN (COSTS OFF) (WITH a AS (SELECT $1) (SELECT $2, $3))
t | 1 | EXPLAIN (COSTS OFF) WITH a AS (SELECT $1) DELETE FROM stats_track_tab
t | 1 | EXPLAIN (COSTS OFF) WITH a AS (SELECT $1) INSERT INTO stats_track_tab VALUES (($2))
@@ -799,7 +800,6 @@ SELECT toplevel, calls, query FROM pg_stat_statements
t | 1 | EXPLAIN (COSTS OFF) WITH a AS (SELECT $1) UPDATE stats_track_tab SET x = $2 WHERE x = $3
t | 1 | EXPLAIN (COSTS OFF) WITH a AS (select $1) SELECT $2 UNION SELECT $3
t | 1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
- f | 1 | WITH a AS (SELECT $1) (SELECT $2, $3)
f | 1 | WITH a AS (SELECT $1) DELETE FROM stats_track_tab
f | 1 | WITH a AS (SELECT $1) INSERT INTO stats_track_tab VALUES (($2))
f | 1 | WITH a AS (SELECT $1) MERGE INTO stats_track_tab +
@@ -1041,17 +1041,33 @@ SELECT pg_stat_statements_reset() IS NOT NULL AS t;
(1 row)
CREATE TEMPORARY TABLE pgss_ctas_1 AS SELECT 1;
-CREATE TEMPORARY TABLE pgss_ctas_2 AS EXECUTE test_prepare_pgss;
+CREATE TEMPORARY TABLE pgss_ctas_2 AS SELECT 1 as a, 2 as b WITH DATA;
+CREATE TEMPORARY TABLE pgss_ctas_3 AS EXECUTE test_prepare_pgss;
+CREATE TEMPORARY TABLE pgss_ctas_5 AS (SELECT (SELECT 2)) WITH DATA;
+CREATE TEMPORARY TABLE pgss_ctas_6 AS (SELECT (SELECT ((SELECT 3)) limit 1) order by 1) WITH DATA;
+CREATE TEMPORARY TABLE pgss_ctas_7 AS (SELECT (SELECT (SELECT 3))) WITH NO DATA;
+CREATE TEMPORARY TABLE IF NOT EXISTS pgss_ctas_8 AS SELECT 1 as a;
+CREATE TEMPORARY TABLE IF NOT EXISTS pgss_ctas_9 AS ((SELECT 1 as b) limit 1) WITH DATA;
SELECT toplevel, calls, query FROM pg_stat_statements
ORDER BY query COLLATE "C";
- toplevel | calls | query
-----------+-------+-----------------------------------------------------------------
+ toplevel | calls | query
+----------+-------+-----------------------------------------------------------------------------------------------------
+ f | 1 | ((SELECT $1 as b) limit $2)
+ f | 1 | (SELECT (SELECT $1))
+ f | 1 | (SELECT (SELECT ((SELECT $1)) limit $2) order by 1)
+ t | 1 | CREATE TEMPORARY TABLE IF NOT EXISTS pgss_ctas_8 AS SELECT $1 as a
+ t | 1 | CREATE TEMPORARY TABLE IF NOT EXISTS pgss_ctas_9 AS ((SELECT $1 as b) limit $2) WITH DATA
t | 1 | CREATE TEMPORARY TABLE pgss_ctas_1 AS SELECT $1
- t | 1 | CREATE TEMPORARY TABLE pgss_ctas_2 AS EXECUTE test_prepare_pgss
- f | 1 | SELECT $1
+ t | 1 | CREATE TEMPORARY TABLE pgss_ctas_2 AS SELECT $1 as a, $2 as b WITH DATA
+ t | 1 | CREATE TEMPORARY TABLE pgss_ctas_3 AS EXECUTE test_prepare_pgss
+ t | 1 | CREATE TEMPORARY TABLE pgss_ctas_5 AS (SELECT (SELECT $1)) WITH DATA
+ t | 1 | CREATE TEMPORARY TABLE pgss_ctas_6 AS (SELECT (SELECT ((SELECT $1)) limit $2) order by 1) WITH DATA
+ t | 1 | CREATE TEMPORARY TABLE pgss_ctas_7 AS (SELECT (SELECT (SELECT $1))) WITH NO DATA
+ f | 2 | SELECT $1
+ f | 1 | SELECT $1 as a, $2 as b
t | 1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
f | 1 | select generate_series($1, $2)
-(5 rows)
+(15 rows)
-- CREATE TABLE AS, top-level tracking.
SET pg_stat_statements.track = 'top';
@@ -1061,14 +1077,14 @@ SELECT pg_stat_statements_reset() IS NOT NULL AS t;
t
(1 row)
-CREATE TEMPORARY TABLE pgss_ctas_3 AS SELECT 1;
-CREATE TEMPORARY TABLE pgss_ctas_4 AS EXECUTE test_prepare_pgss;
+CREATE TEMPORARY TABLE pgss_ctas_top_1 AS SELECT 1;
+CREATE TEMPORARY TABLE pgss_ctas_top_2 AS EXECUTE test_prepare_pgss;
SELECT toplevel, calls, query FROM pg_stat_statements
ORDER BY query COLLATE "C";
- toplevel | calls | query
-----------+-------+-----------------------------------------------------------------
- t | 1 | CREATE TEMPORARY TABLE pgss_ctas_3 AS SELECT $1
- t | 1 | CREATE TEMPORARY TABLE pgss_ctas_4 AS EXECUTE test_prepare_pgss
+ toplevel | calls | query
+----------+-------+---------------------------------------------------------------------
+ t | 1 | CREATE TEMPORARY TABLE pgss_ctas_top_1 AS SELECT $1
+ t | 1 | CREATE TEMPORARY TABLE pgss_ctas_top_2 AS EXECUTE test_prepare_pgss
t | 1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
(3 rows)
@@ -1205,23 +1221,23 @@ SELECT toplevel, calls, query FROM pg_stat_statements
ORDER BY query COLLATE "C";
toplevel | calls | query
----------+-------+---------------------------------------------------------------------------
- t | 1 | COPY (DELETE FROM stats_track_tab WHERE x = 2 RETURNING x) TO stdout
- t | 1 | COPY (INSERT INTO stats_track_tab (x) VALUES (1) RETURNING x) TO stdout
- t | 1 | COPY (MERGE INTO stats_track_tab USING (SELECT 1 id) ON x = id +
- | | WHEN MATCHED THEN UPDATE SET x = id +
- | | WHEN NOT MATCHED THEN INSERT (x) VALUES (id) RETURNING x) TO stdout
- t | 1 | COPY (SELECT 1 UNION SELECT 2) TO stdout
- t | 1 | COPY (SELECT 1) TO stdout
- t | 1 | COPY (UPDATE stats_track_tab SET x = 2 WHERE x = 1 RETURNING x) TO stdout
- f | 1 | DELETE FROM stats_track_tab WHERE x = $1 RETURNING x
- f | 1 | INSERT INTO stats_track_tab (x) VALUES ($1) RETURNING x
- f | 1 | MERGE INTO stats_track_tab USING (SELECT $1 id) ON x = id +
+ f | 1 | (DELETE FROM stats_track_tab WHERE x = $1 RETURNING x
+ f | 1 | (INSERT INTO stats_track_tab (x) VALUES ($1) RETURNING x
+ f | 1 | (MERGE INTO stats_track_tab USING (SELECT $1 id) ON x = id +
| | WHEN MATCHED THEN UPDATE SET x = id +
| | WHEN NOT MATCHED THEN INSERT (x) VALUES (id) RETURNING x
- f | 1 | SELECT $1
- f | 1 | SELECT $1 UNION SELECT $2
+ f | 1 | (SELECT $1
+ f | 1 | (SELECT $1 UNION SELECT $2
+ f | 1 | (UPDATE stats_track_tab SET x = $1 WHERE x = $2 RETURNING x
+ t | 1 | COPY (DELETE FROM stats_track_tab WHERE x = 2 RETURNING x) TO stdout
+ t | 1 | COPY (INSERT INTO stats_track_tab (x) VALUES (1) RETURNING x) TO stdout
+ t | 1 | COPY (MERGE INTO stats_track_tab USING (SELECT 1 id) ON x = id +
+ | | WHEN MATCHED THEN UPDATE SET x = id +
+ | | WHEN NOT MATCHED THEN INSERT (x) VALUES (id) RETURNING x) TO stdout
+ t | 1 | COPY (SELECT 1 UNION SELECT 2) TO stdout
+ t | 1 | COPY (SELECT 1) TO stdout
+ t | 1 | COPY (UPDATE stats_track_tab SET x = 2 WHERE x = 1 RETURNING x) TO stdout
t | 1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
- f | 1 | UPDATE stats_track_tab SET x = $1 WHERE x = $2 RETURNING x
(13 rows)
-- COPY - top-level tracking.
diff --git a/contrib/pg_stat_statements/expected/select.out b/contrib/pg_stat_statements/expected/select.out
index 038ae110364..beb21ff9002 100644
--- a/contrib/pg_stat_statements/expected/select.out
+++ b/contrib/pg_stat_statements/expected/select.out
@@ -185,6 +185,13 @@ select '{"a":1, "b":2}'::jsonb ? 'b';
t
(1 row)
+-- values with limit
+(values(1)) limit 1;
+ column1
+---------
+ 1
+(1 row)
+
-- cte
WITH t(f) AS (
VALUES (1.0), (2.0)
@@ -208,6 +215,7 @@ DEALLOCATE pgss_test;
SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C";
calls | rows | query
-------+------+------------------------------------------------------------------------------
+ 1 | 1 | (values($1)) limit $2
4 | 4 | SELECT $1 +
| | -- but this one will appear +
| | AS "text"
@@ -230,7 +238,7 @@ SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C";
| | ) +
| | SELECT f FROM t ORDER BY f
1 | 1 | select $1::jsonb ? $2
-(17 rows)
+(18 rows)
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
t
diff --git a/contrib/pg_stat_statements/sql/level_tracking.sql b/contrib/pg_stat_statements/sql/level_tracking.sql
index 86f007e8552..7a961252b05 100644
--- a/contrib/pg_stat_statements/sql/level_tracking.sql
+++ b/contrib/pg_stat_statements/sql/level_tracking.sql
@@ -225,15 +225,21 @@ SET pg_stat_statements.track = 'all';
PREPARE test_prepare_pgss AS select generate_series(1, 10);
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
CREATE TEMPORARY TABLE pgss_ctas_1 AS SELECT 1;
-CREATE TEMPORARY TABLE pgss_ctas_2 AS EXECUTE test_prepare_pgss;
+CREATE TEMPORARY TABLE pgss_ctas_2 AS SELECT 1 as a, 2 as b WITH DATA;
+CREATE TEMPORARY TABLE pgss_ctas_3 AS EXECUTE test_prepare_pgss;
+CREATE TEMPORARY TABLE pgss_ctas_5 AS (SELECT (SELECT 2)) WITH DATA;
+CREATE TEMPORARY TABLE pgss_ctas_6 AS (SELECT (SELECT ((SELECT 3)) limit 1) order by 1) WITH DATA;
+CREATE TEMPORARY TABLE pgss_ctas_7 AS (SELECT (SELECT (SELECT 3))) WITH NO DATA;
+CREATE TEMPORARY TABLE IF NOT EXISTS pgss_ctas_8 AS SELECT 1 as a;
+CREATE TEMPORARY TABLE IF NOT EXISTS pgss_ctas_9 AS ((SELECT 1 as b) limit 1) WITH DATA;
SELECT toplevel, calls, query FROM pg_stat_statements
ORDER BY query COLLATE "C";
-- CREATE TABLE AS, top-level tracking.
SET pg_stat_statements.track = 'top';
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
-CREATE TEMPORARY TABLE pgss_ctas_3 AS SELECT 1;
-CREATE TEMPORARY TABLE pgss_ctas_4 AS EXECUTE test_prepare_pgss;
+CREATE TEMPORARY TABLE pgss_ctas_top_1 AS SELECT 1;
+CREATE TEMPORARY TABLE pgss_ctas_top_2 AS EXECUTE test_prepare_pgss;
SELECT toplevel, calls, query FROM pg_stat_statements
ORDER BY query COLLATE "C";
diff --git a/contrib/pg_stat_statements/sql/select.sql b/contrib/pg_stat_statements/sql/select.sql
index 189d405512f..ff9328fbe02 100644
--- a/contrib/pg_stat_statements/sql/select.sql
+++ b/contrib/pg_stat_statements/sql/select.sql
@@ -65,6 +65,9 @@ SELECT 1 AS i UNION SELECT 2 ORDER BY i;
-- ? operator
select '{"a":1, "b":2}'::jsonb ? 'b';
+-- values with limit
+(values(1)) limit 1;
+
-- cte
WITH t(f) AS (
VALUES (1.0), (2.0)
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 0b5652071d1..eb69162866e 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -154,7 +154,8 @@ static void base_yyerror(YYLTYPE *yylloc, core_yyscan_t yyscanner,
const char *msg);
static RawStmt *makeRawStmt(Node *stmt, int stmt_location);
static void updateRawStmtEnd(RawStmt *rs, int end_location);
-static void updatePreparableStmtEnd(Node *n, int end_location);
+static void updatePreparableStmtEnd(Node *n, int location, int len);
+static void updateSelectStmtEnd(Node *n, int end_location);
static Node *makeColumnRef(char *colname, List *indirection,
int location, core_yyscan_t yyscanner);
static Node *makeTypeCast(Node *arg, TypeName *typename, int location);
@@ -3417,9 +3418,9 @@ CopyStmt: COPY opt_binary qualified_name opt_column_list
{
CopyStmt *n = makeNode(CopyStmt);
- updatePreparableStmtEnd($3, @4);
n->relation = NULL;
n->query = $3;
+ updatePreparableStmtEnd($3, @2, @4 - @2);
n->attlist = NIL;
n->is_from = false;
n->is_program = $6;
@@ -4805,6 +4806,7 @@ CreateAsStmt:
CreateTableAsStmt *ctas = makeNode(CreateTableAsStmt);
ctas->query = $6;
+ updateSelectStmtEnd($6, @7);
ctas->into = $4;
ctas->objtype = OBJECT_TABLE;
ctas->is_select_into = false;
@@ -4819,6 +4821,7 @@ CreateAsStmt:
CreateTableAsStmt *ctas = makeNode(CreateTableAsStmt);
ctas->query = $9;
+ updateSelectStmtEnd($9, @10);
ctas->into = $7;
ctas->objtype = OBJECT_TABLE;
ctas->is_select_into = false;
@@ -12828,18 +12831,20 @@ select_with_parens:
'(' select_no_parens ')'
{
SelectStmt *n = (SelectStmt *) $2;
-
/*
- * As SelectStmt's location starts at the SELECT keyword,
- * we need to track the length of the SelectStmt within
- * parentheses to be able to extract the relevant part
- * of the query. Without this, the RawStmt's length would
- * be used and would include the closing parenthesis.
+ * With queries like '(SELECT 1) limit 1', parentheses are part of the
+ * query so we need to move the location of the select stmt to the '('
*/
- n->stmt_len = @3 - @2;
+ n->stmt_location = @1;
+ $$ = $2;
+ }
+ | '(' select_with_parens ')'
+ {
+ SelectStmt *n = (SelectStmt *) $2;
+ /* Track the outermost '(' as the select statement start */
+ n->stmt_location = @1;
$$ = $2;
}
- | '(' select_with_parens ')' { $$ = $2; }
;
/*
@@ -18753,42 +18758,59 @@ updateRawStmtEnd(RawStmt *rs, int end_location)
* string.
*/
static void
-updatePreparableStmtEnd(Node *n, int end_location)
+updatePreparableStmtEnd(Node *n, int location, int len)
{
if (IsA(n, SelectStmt))
{
SelectStmt *stmt = (SelectStmt *) n;
- stmt->stmt_len = end_location - stmt->stmt_location;
+ stmt->stmt_location = location;
+ stmt->stmt_len = len;
}
else if (IsA(n, InsertStmt))
{
InsertStmt *stmt = (InsertStmt *) n;
- stmt->stmt_len = end_location - stmt->stmt_location;
+ stmt->stmt_location = location;
+ stmt->stmt_len = len;
}
else if (IsA(n, UpdateStmt))
{
UpdateStmt *stmt = (UpdateStmt *) n;
- stmt->stmt_len = end_location - stmt->stmt_location;
+ stmt->stmt_location = location;
+ stmt->stmt_len = len;
}
else if (IsA(n, DeleteStmt))
{
DeleteStmt *stmt = (DeleteStmt *) n;
- stmt->stmt_len = end_location - stmt->stmt_location;
+ stmt->stmt_location = location;
+ stmt->stmt_len = len;
}
else if (IsA(n, MergeStmt))
{
MergeStmt *stmt = (MergeStmt *) n;
- stmt->stmt_len = end_location - stmt->stmt_location;
+ stmt->stmt_location = location;
+ stmt->stmt_len = len;
}
else
elog(ERROR, "unexpected node type %d", (int) n->type);
}
+/*
+ * Adjust SelectStmt's length if we have a known end location
+ */
+static void
+updateSelectStmtEnd(Node *n, int end_location)
+{
+ SelectStmt *stmt = (SelectStmt *) n;
+ if (end_location > -1)
+ stmt->stmt_len = end_location - stmt->stmt_location;
+}
+
+
static Node *
makeColumnRef(char *colname, List *indirection,
int location, core_yyscan_t yyscanner)
--
2.49.0