v2-0001-Propagate-pathkeys-from-CTEs-up-to-the-outer-query.patch
application/octet-stream
Filename: v2-0001-Propagate-pathkeys-from-CTEs-up-to-the-outer-query.patch
Type: application/octet-stream
Part: 0
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v2-0001
Subject: Propagate pathkeys from CTEs up to the outer query
| File | + | − |
|---|---|---|
| src/backend/optimizer/path/allpaths.c | 16 | 1 |
| src/backend/optimizer/plan/planner.c | 1 | 0 |
| src/backend/optimizer/plan/subselect.c | 10 | 6 |
| src/backend/optimizer/prep/prepjointree.c | 1 | 0 |
| src/backend/optimizer/util/pathnode.c | 3 | 2 |
| src/include/nodes/pathnodes.h | 5 | 0 |
| src/include/optimizer/pathnode.h | 1 | 1 |
| src/test/regress/expected/with.out | 17 | 0 |
| src/test/regress/sql/with.sql | 7 | 0 |
From 48c79d4fcba599ce17bbd66d09b9a05e92db54c8 Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Mon, 20 Nov 2023 10:08:28 +0800
Subject: [PATCH v2] Propagate pathkeys from CTEs up to the outer query
---
src/backend/optimizer/path/allpaths.c | 17 ++++++++++++++++-
src/backend/optimizer/plan/planner.c | 1 +
src/backend/optimizer/plan/subselect.c | 16 ++++++++++------
src/backend/optimizer/prep/prepjointree.c | 1 +
src/backend/optimizer/util/pathnode.c | 5 +++--
src/include/nodes/pathnodes.h | 5 +++++
src/include/optimizer/pathnode.h | 2 +-
src/test/regress/expected/with.out | 17 +++++++++++++++++
src/test/regress/sql/with.sql | 7 +++++++
9 files changed, 61 insertions(+), 10 deletions(-)
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 67921a0826..749a69ff51 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -2882,6 +2882,8 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
ListCell *lc;
int plan_id;
Relids required_outer;
+ Path *ctepath;
+ List *pathkeys;
/*
* Find the referenced CTE, and locate the plan previously made for it.
@@ -2921,6 +2923,19 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
/* Mark rel with estimated output rows, width, etc */
set_cte_size_estimates(root, rel, cteplan->plan_rows);
+ /*
+ * Locate the best path previously made for the referenced CTE. We need to
+ * know its pathkeys.
+ */
+ Assert(list_length(cteroot->cte_plan_ids) == list_length(cteroot->cte_paths));
+ ctepath = (Path *) list_nth(cteroot->cte_paths, ndx);
+
+ /* Convert the pathkeys to outer representation */
+ pathkeys = convert_subquery_pathkeys(root,
+ rel,
+ ctepath->pathkeys,
+ cteplan->targetlist);
+
/*
* We don't support pushing join clauses into the quals of a CTE scan, but
* it could still have required parameterization due to LATERAL refs in
@@ -2929,7 +2944,7 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
required_outer = rel->lateral_relids;
/* Generate appropriate path */
- add_path(rel, create_ctescan_path(root, rel, required_outer));
+ add_path(rel, create_ctescan_path(root, rel, pathkeys, required_outer));
}
/*
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index a8cea5efe1..7ac5d7b159 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -643,6 +643,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
root->planner_cxt = CurrentMemoryContext;
root->init_plans = NIL;
root->cte_plan_ids = NIL;
+ root->cte_paths = NIL;
root->multiexpr_params = NIL;
root->join_domains = NIL;
root->eq_classes = NIL;
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 7a9fe88fec..5a112b1562 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -885,16 +885,17 @@ hash_ok_operator(OpExpr *expr)
* unreferenced SELECT), "inline" it to create a regular sub-SELECT-in-FROM,
* or convert it to an initplan.
*
- * A side effect is to fill in root->cte_plan_ids with a list that
- * parallels root->parse->cteList and provides the subplan ID for
- * each CTE's initplan, or a dummy ID (-1) if we didn't make an initplan.
+ * A side effect is to fill in root->cte_plan_ids and root->cte_paths with
+ * lists that parallel root->parse->cteList and provide the subplan ID and
+ * best path for each CTE, or a dummy ID (-1) and a dummy Path (NULL) if we
+ * didn't make a subplan.
*/
void
SS_process_ctes(PlannerInfo *root)
{
ListCell *lc;
- Assert(root->cte_plan_ids == NIL);
+ Assert(root->cte_plan_ids == NIL && root->cte_paths == NIL);
foreach(lc, root->parse->cteList)
{
@@ -913,8 +914,9 @@ SS_process_ctes(PlannerInfo *root)
*/
if (cte->cterefcount == 0 && cmdType == CMD_SELECT)
{
- /* Make a dummy entry in cte_plan_ids */
+ /* Make a dummy entry in cte_plan_ids and cte_paths */
root->cte_plan_ids = lappend_int(root->cte_plan_ids, -1);
+ root->cte_paths = lappend(root->cte_paths, NULL);
continue;
}
@@ -960,8 +962,9 @@ SS_process_ctes(PlannerInfo *root)
!contain_volatile_functions(cte->ctequery))
{
inline_cte(root, cte);
- /* Make a dummy entry in cte_plan_ids */
+ /* Make a dummy entry in cte_plan_ids and cte_paths */
root->cte_plan_ids = lappend_int(root->cte_plan_ids, -1);
+ root->cte_paths = lappend(root->cte_paths, NULL);
continue;
}
@@ -1051,6 +1054,7 @@ SS_process_ctes(PlannerInfo *root)
root->init_plans = lappend(root->init_plans, splan);
root->cte_plan_ids = lappend_int(root->cte_plan_ids, splan->plan_id);
+ root->cte_paths = lappend(root->cte_paths, best_path);
/* Label the subplan for EXPLAIN purposes */
splan->plan_name = psprintf("CTE %s", cte->ctename);
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index 73ff40721c..4164ada823 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -990,6 +990,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
subroot->planner_cxt = CurrentMemoryContext;
subroot->init_plans = NIL;
subroot->cte_plan_ids = NIL;
+ subroot->cte_paths = NIL;
subroot->multiexpr_params = NIL;
subroot->join_domains = NIL;
subroot->eq_classes = NIL;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 0b1d17b9d3..05b85305fe 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2113,7 +2113,8 @@ create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel,
* returning the pathnode.
*/
Path *
-create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
+create_ctescan_path(PlannerInfo *root, RelOptInfo *rel,
+ List *pathkeys, Relids required_outer)
{
Path *pathnode = makeNode(Path);
@@ -2125,7 +2126,7 @@ create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
pathnode->parallel_aware = false;
pathnode->parallel_safe = rel->consider_parallel;
pathnode->parallel_workers = 0;
- pathnode->pathkeys = NIL; /* XXX for now, result is always unordered */
+ pathnode->pathkeys = pathkeys;
cost_ctescan(pathnode, root, rel, pathnode->param_info);
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index ed85dc7414..cacd566777 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -301,6 +301,11 @@ struct PlannerInfo
*/
List *cte_plan_ids;
+ /*
+ * per-CTE-item list of Paths (or NULL if no Path was made for that CTE)
+ */
+ List *cte_paths;
+
/* List of Lists of Params for MULTIEXPR subquery outputs */
List *multiexpr_params;
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 6e557bebc4..2d76471814 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -115,7 +115,7 @@ extern Path *create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel,
extern Path *create_tablefuncscan_path(PlannerInfo *root, RelOptInfo *rel,
Relids required_outer);
extern Path *create_ctescan_path(PlannerInfo *root, RelOptInfo *rel,
- Relids required_outer);
+ List *pathkeys, Relids required_outer);
extern Path *create_namedtuplestorescan_path(PlannerInfo *root, RelOptInfo *rel,
Relids required_outer);
extern Path *create_resultscan_path(PlannerInfo *root, RelOptInfo *rel,
diff --git a/src/test/regress/expected/with.out b/src/test/regress/expected/with.out
index 69c56ce207..9549cdc3d1 100644
--- a/src/test/regress/expected/with.out
+++ b/src/test/regress/expected/with.out
@@ -654,6 +654,23 @@ select count(*) from tenk1 a
-> CTE Scan on x
(8 rows)
+-- test that pathkeys from a materialized CTE are propagated up to the
+-- outer query
+explain (costs off)
+with x as materialized (select unique1 from tenk1 b order by unique1)
+select count(*) from tenk1 a
+ where unique1 in (select * from x);
+ QUERY PLAN
+------------------------------------------------------------
+ Aggregate
+ CTE x
+ -> Index Only Scan using tenk1_unique1 on tenk1 b
+ -> Merge Semi Join
+ Merge Cond: (a.unique1 = x.unique1)
+ -> Index Only Scan using tenk1_unique1 on tenk1 a
+ -> CTE Scan on x
+(7 rows)
+
-- SEARCH clause
create temp table graph0( f int, t int, label text );
insert into graph0 values
diff --git a/src/test/regress/sql/with.sql b/src/test/regress/sql/with.sql
index 3ef9898866..54da8b2d21 100644
--- a/src/test/regress/sql/with.sql
+++ b/src/test/regress/sql/with.sql
@@ -354,6 +354,13 @@ with x as materialized (select unique1 from tenk1 b)
select count(*) from tenk1 a
where unique1 in (select * from x);
+-- test that pathkeys from a materialized CTE are propagated up to the
+-- outer query
+explain (costs off)
+with x as materialized (select unique1 from tenk1 b order by unique1)
+select count(*) from tenk1 a
+ where unique1 in (select * from x);
+
-- SEARCH clause
create temp table graph0( f int, t int, label text );
--
2.31.0